Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
authorLinus Torvalds <torvalds@linux-foundation.org>
Sun, 17 Mar 2019 16:19:22 +0000 (09:19 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sun, 17 Mar 2019 16:19:22 +0000 (09:19 -0700)
Pull perf fixes from Thomas Gleixner:
 "Three fixes for the fallout from the TSX errata workaround:

   - Prevent memory corruption caused by a unchecked out of bound array
     index.

   - Two trivial fixes to address compiler warnings"

* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  perf/x86/intel: Make dev_attr_allow_tsx_force_abort static
  perf/x86: Fixup typo in stub functions
  perf/x86/intel: Fix memory corruption

434 files changed:
Documentation/ABI/obsolete/sysfs-class-dax [new file with mode: 0644]
Documentation/ABI/testing/sysfs-fs-f2fs
Documentation/admin-guide/md.rst
Documentation/arm/kernel_mode_neon.txt
Documentation/devicetree/bindings/display/ssd1307fb.txt
Documentation/filesystems/cifs/TODO
Documentation/filesystems/cifs/cifs.txt
Documentation/filesystems/f2fs.txt
Documentation/virtual/kvm/api.txt
Documentation/virtual/kvm/halt-polling.txt
Documentation/virtual/kvm/mmu.txt
MAINTAINERS
arch/arm/Kconfig
arch/arm/Kconfig-nommu
arch/arm/Makefile
arch/arm/boot/bootp/Makefile
arch/arm/boot/bootp/init.S
arch/arm/boot/compressed/Makefile
arch/arm/boot/compressed/ll_char_wr.S
arch/arm/boot/dts/imx28-cfa10036.dts
arch/arm/common/mcpm_entry.c
arch/arm/include/asm/arch_gicv3.h
arch/arm/include/asm/assembler.h
arch/arm/include/asm/barrier.h
arch/arm/include/asm/hardware/entry-macro-iomd.S
arch/arm/include/asm/kvm_emulate.h
arch/arm/include/asm/kvm_host.h
arch/arm/include/asm/kvm_hyp.h
arch/arm/include/asm/kvm_mmu.h
arch/arm/include/asm/pgtable.h
arch/arm/include/asm/processor.h
arch/arm/include/asm/smp.h
arch/arm/include/asm/smp_twd.h
arch/arm/include/asm/spinlock.h
arch/arm/include/asm/suspend.h
arch/arm/include/asm/uaccess.h
arch/arm/include/asm/v7m.h
arch/arm/include/asm/vfpmacros.h
arch/arm/include/debug/tegra.S
arch/arm/kernel/debug.S
arch/arm/kernel/entry-armv.S
arch/arm/kernel/entry-common.S
arch/arm/kernel/entry-header.S
arch/arm/kernel/entry-v7m.S
arch/arm/kernel/head-nommu.S
arch/arm/kernel/hyp-stub.S
arch/arm/kernel/machine_kexec.c
arch/arm/kernel/patch.c
arch/arm/kernel/sleep.S
arch/arm/kernel/smp.c
arch/arm/kernel/smp_twd.c
arch/arm/kernel/unwind.c
arch/arm/kvm/Makefile
arch/arm/kvm/coproc.c
arch/arm/kvm/hyp/cp15-sr.c
arch/arm/kvm/hyp/hyp-entry.S
arch/arm/kvm/hyp/switch.c
arch/arm/kvm/hyp/tlb.c
arch/arm/kvm/interrupts.S
arch/arm/lib/Makefile
arch/arm/lib/bitops.h
arch/arm/lib/clear_user.S
arch/arm/lib/copy_from_user.S
arch/arm/lib/copy_page.S
arch/arm/lib/copy_template.S
arch/arm/lib/copy_to_user.S
arch/arm/lib/csumpartial.S
arch/arm/lib/csumpartialcopygeneric.S
arch/arm/lib/csumpartialcopyuser.S
arch/arm/lib/div64.S
arch/arm/lib/floppydma.S
arch/arm/lib/io-readsb.S
arch/arm/lib/io-readsl.S
arch/arm/lib/io-readsw-armv3.S
arch/arm/lib/io-readsw-armv4.S
arch/arm/lib/io-writesb.S
arch/arm/lib/io-writesl.S
arch/arm/lib/io-writesw-armv3.S
arch/arm/lib/io-writesw-armv4.S
arch/arm/lib/lib1funcs.S
arch/arm/lib/memcpy.S
arch/arm/lib/memmove.S
arch/arm/lib/memset.S
arch/arm/lib/xor-neon.c
arch/arm/mach-actions/platsmp.c
arch/arm/mach-exynos/headsmp.S
arch/arm/mach-exynos/platsmp.c
arch/arm/mach-ks8695/include/mach/entry-macro.S
arch/arm/mach-omap2/prm_common.c
arch/arm/mach-oxnas/Makefile
arch/arm/mach-oxnas/hotplug.c [deleted file]
arch/arm/mach-oxnas/platsmp.c
arch/arm/mach-prima2/common.h
arch/arm/mach-prima2/headsmp.S
arch/arm/mach-prima2/hotplug.c
arch/arm/mach-prima2/platsmp.c
arch/arm/mach-qcom/platsmp.c
arch/arm/mach-spear/generic.h
arch/arm/mach-spear/headsmp.S
arch/arm/mach-spear/hotplug.c
arch/arm/mach-spear/platsmp.c
arch/arm/mach-tegra/reset-handler.S
arch/arm/mm/cache-v6.S
arch/arm/mm/copypage-v4mc.c
arch/arm/mm/copypage-v4wb.c
arch/arm/mm/copypage-v4wt.c
arch/arm/mm/dma-mapping.c
arch/arm/mm/idmap.c
arch/arm/mm/init.c
arch/arm/mm/pmsa-v8.c
arch/arm/mm/proc-v7m.S
arch/arm64/include/asm/kvm_emulate.h
arch/arm64/include/asm/kvm_host.h
arch/arm64/include/asm/kvm_hyp.h
arch/arm64/include/asm/kvm_mmu.h
arch/arm64/include/asm/sysreg.h
arch/arm64/kvm/Makefile
arch/arm64/kvm/debug.c
arch/arm64/kvm/hyp.S
arch/arm64/kvm/hyp/hyp-entry.S
arch/arm64/kvm/hyp/sysreg-sr.c
arch/arm64/kvm/sys_regs.c
arch/ia64/hp/sim/simscsi.c
arch/mips/include/asm/kvm_host.h
arch/powerpc/configs/skiroot_defconfig
arch/powerpc/include/asm/book3s/64/hugetlb.h
arch/powerpc/include/asm/kvm_host.h
arch/powerpc/include/asm/kvm_ppc.h
arch/powerpc/include/uapi/asm/kvm.h
arch/powerpc/kernel/head_fsl_booke.S
arch/powerpc/kernel/traps.c
arch/powerpc/kvm/book3s.c
arch/powerpc/kvm/book3s_32_mmu.c
arch/powerpc/kvm/book3s_64_mmu.c
arch/powerpc/kvm/book3s_64_mmu_hv.c
arch/powerpc/kvm/book3s_64_mmu_radix.c
arch/powerpc/kvm/book3s_64_vio.c
arch/powerpc/kvm/book3s_emulate.c
arch/powerpc/kvm/book3s_hv.c
arch/powerpc/kvm/book3s_hv_builtin.c
arch/powerpc/kvm/book3s_hv_rm_xics.c
arch/powerpc/kvm/book3s_hv_rmhandlers.S
arch/powerpc/kvm/book3s_rtas.c
arch/powerpc/kvm/powerpc.c
arch/powerpc/mm/Makefile
arch/powerpc/platforms/powernv/opal-call.c
arch/powerpc/platforms/pseries/papr_scm.c
arch/s390/include/asm/cio.h
arch/s390/include/asm/irq.h
arch/s390/include/asm/isc.h
arch/s390/include/asm/kvm_host.h
arch/s390/kernel/irq.c
arch/s390/kvm/interrupt.c
arch/s390/kvm/kvm-s390.c
arch/s390/kvm/kvm-s390.h
arch/um/drivers/ubd_kern.c
arch/um/drivers/vector_user.c
arch/x86/entry/syscalls/syscall_32.tbl
arch/x86/entry/syscalls/syscall_64.tbl
arch/x86/include/asm/kvm_host.h
arch/x86/include/asm/kvm_vcpu_regs.h [new file with mode: 0644]
arch/x86/kernel/kvmclock.c
arch/x86/kvm/cpuid.c
arch/x86/kvm/hyperv.c
arch/x86/kvm/i8254.c
arch/x86/kvm/i8259.c
arch/x86/kvm/ioapic.c
arch/x86/kvm/lapic.c
arch/x86/kvm/mmu.c
arch/x86/kvm/mmu.h
arch/x86/kvm/mmutrace.h
arch/x86/kvm/page_track.c
arch/x86/kvm/svm.c
arch/x86/kvm/vmx/nested.c
arch/x86/kvm/vmx/vmcs.h
arch/x86/kvm/vmx/vmenter.S
arch/x86/kvm/vmx/vmx.c
arch/x86/kvm/vmx/vmx.h
arch/x86/kvm/x86.c
arch/x86/kvm/x86.h
block/blk-merge.c
drivers/acpi/nfit/core.c
drivers/acpi/numa.c
drivers/amba/bus.c
drivers/base/memory.c
drivers/block/xen-blkback/xenbus.c
drivers/clocksource/arm_arch_timer.c
drivers/dax/Kconfig
drivers/dax/Makefile
drivers/dax/bus.c [new file with mode: 0644]
drivers/dax/bus.h [new file with mode: 0644]
drivers/dax/dax-private.h
drivers/dax/dax.h [deleted file]
drivers/dax/device-dax.h [deleted file]
drivers/dax/device.c
drivers/dax/kmem.c [new file with mode: 0644]
drivers/dax/pmem.c [deleted file]
drivers/dax/pmem/Makefile [new file with mode: 0644]
drivers/dax/pmem/compat.c [new file with mode: 0644]
drivers/dax/pmem/core.c [new file with mode: 0644]
drivers/dax/pmem/pmem.c [new file with mode: 0644]
drivers/dax/super.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
drivers/gpu/drm/amd/amdgpu/soc15.c
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
drivers/gpu/drm/amd/display/dc/core/dc_link.c
drivers/gpu/drm/amd/display/modules/freesync/freesync.c
drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h
drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.c
drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c
drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h
drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.h
drivers/gpu/drm/amd/powerplay/hwmgr/vega20_processpptables.c
drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
drivers/gpu/drm/amd/powerplay/smumgr/smu9_smumgr.c
drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c
drivers/gpu/drm/drm_atomic_helper.c
drivers/gpu/drm/drm_ioc32.c
drivers/gpu/drm/etnaviv/Kconfig
drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.h
drivers/gpu/drm/etnaviv/etnaviv_dump.c
drivers/gpu/drm/etnaviv/etnaviv_gem.h
drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c
drivers/gpu/drm/etnaviv/etnaviv_perfmon.c
drivers/gpu/drm/etnaviv/etnaviv_sched.c
drivers/gpu/drm/i915/i915_active.c
drivers/gpu/drm/i915/i915_drv.c
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_scheduler.c
drivers/gpu/drm/i915/intel_breadcrumbs.c
drivers/gpu/drm/i915/intel_ddi.c
drivers/gpu/drm/i915/selftests/i915_gem_context.c
drivers/gpu/drm/qxl/qxl_drv.c
drivers/gpu/vga/vgaarb.c
drivers/hwtracing/coresight/coresight-etm3x.c
drivers/hwtracing/coresight/coresight-etm4x.c
drivers/hwtracing/coresight/coresight-priv.h
drivers/hwtracing/coresight/coresight-stm.c
drivers/hwtracing/coresight/coresight-tmc.c
drivers/i2c/busses/i2c-designware-platdrv.c
drivers/i2c/busses/i2c-mt65xx.c
drivers/i2c/busses/i2c-rcar.c
drivers/i2c/busses/i2c-sis630.c
drivers/i2c/busses/i2c-stm32f7.c
drivers/i2c/i2c-core-base.c
drivers/iommu/amd_iommu.c
drivers/lightnvm/pblk-rl.c
drivers/md/raid10.c
drivers/md/raid5-log.h
drivers/md/raid5-ppl.c
drivers/md/raid5.c
drivers/ntb/hw/intel/ntb_hw_gen1.c
drivers/ntb/hw/intel/ntb_hw_gen1.h
drivers/ntb/hw/intel/ntb_hw_gen3.c
drivers/ntb/hw/mscc/ntb_hw_switchtec.c
drivers/ntb/ntb_transport.c
drivers/nvdimm/e820.c
drivers/nvdimm/nd.h
drivers/nvdimm/of_pmem.c
drivers/nvdimm/region_devs.c
drivers/nvme/host/core.c
drivers/nvme/host/fc.c
drivers/nvme/host/nvme.h
drivers/nvme/host/pci.c
drivers/nvme/host/tcp.c
drivers/nvme/host/trace.c
drivers/nvme/host/trace.h
drivers/nvme/target/core.c
drivers/nvme/target/fc.c
drivers/nvme/target/io-cmd-bdev.c
drivers/nvme/target/io-cmd-file.c
drivers/s390/cio/chsc.c
drivers/s390/cio/chsc.h
drivers/scsi/aacraid/linit.c
drivers/scsi/hisi_sas/hisi_sas.h
drivers/scsi/hisi_sas/hisi_sas_main.c
drivers/scsi/hisi_sas/hisi_sas_v2_hw.c
drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
drivers/scsi/libiscsi.c
drivers/scsi/libiscsi_tcp.c
drivers/scsi/lpfc/lpfc_init.c
drivers/scsi/lpfc/lpfc_nvme.c
drivers/scsi/lpfc/lpfc_sli.c
drivers/scsi/megaraid/megaraid_sas_base.c
drivers/scsi/qla2xxx/qla_attr.c
drivers/scsi/qla2xxx/qla_dfs.c
drivers/scsi/qla2xxx/qla_iocb.c
drivers/scsi/smartpqi/smartpqi_init.c
drivers/scsi/ufs/ufs-hisi.c
drivers/scsi/ufs/ufshcd-pltfrm.c
drivers/scsi/ufs/ufshcd-pltfrm.h
drivers/scsi/ufs/ufshcd.h
drivers/scsi/virtio_scsi.c
drivers/target/target_core_user.c
drivers/video/fbdev/aty/radeon_pm.c
drivers/video/fbdev/cg14.c
drivers/video/fbdev/cg3.c
drivers/video/fbdev/chipsfb.c
drivers/video/fbdev/core/fb_cmdline.c
drivers/video/fbdev/core/fbcon.c
drivers/video/fbdev/core/fbmem.c
drivers/video/fbdev/core/fbmon.c
drivers/video/fbdev/ffb.c
drivers/video/fbdev/geode/gxfb_core.c
drivers/video/fbdev/geode/lxfb_core.c
drivers/video/fbdev/imsttfb.c
drivers/video/fbdev/mbx/mbxdebugfs.c
drivers/video/fbdev/mbx/mbxfb.c
drivers/video/fbdev/offb.c
drivers/video/fbdev/omap2/omapfb/dss/core.c
drivers/video/fbdev/omap2/omapfb/dss/dss-of.c
drivers/video/fbdev/omap2/omapfb/dss/dss.h
drivers/video/fbdev/omap2/omapfb/dss/hdmi4_core.c
drivers/video/fbdev/ssd1307fb.c
drivers/video/fbdev/via/viafbdev.c
drivers/xen/balloon.c
fs/9p/v9fs_vfs.h
fs/9p/vfs_file.c
fs/9p/vfs_inode.c
fs/9p/vfs_inode_dotl.c
fs/9p/vfs_super.c
fs/cifs/cifs_debug.c
fs/cifs/cifs_ioctl.h
fs/cifs/cifsglob.h
fs/cifs/connect.c
fs/cifs/dir.c
fs/cifs/file.c
fs/cifs/smb1ops.c
fs/cifs/smb2inode.c
fs/cifs/smb2ops.c
fs/cifs/smb2pdu.c
fs/cifs/smb2pdu.h
fs/cifs/smb2proto.h
fs/cifs/smb2status.h
fs/cifs/trace.h
fs/cifs/transport.c
fs/f2fs/checkpoint.c
fs/f2fs/data.c
fs/f2fs/debug.c
fs/f2fs/dir.c
fs/f2fs/extent_cache.c
fs/f2fs/f2fs.h
fs/f2fs/file.c
fs/f2fs/inline.c
fs/f2fs/inode.c
fs/f2fs/namei.c
fs/f2fs/node.c
fs/f2fs/segment.c
fs/f2fs/segment.h
fs/f2fs/super.c
fs/f2fs/sysfs.c
fs/f2fs/trace.c
fs/f2fs/xattr.c
fs/f2fs/xattr.h
fs/nfs/pnfs.c
fs/proc/base.c
fs/sysfs/mount.c
fs/xfs/libxfs/xfs_dir2_leaf.c
fs/xfs/libxfs/xfs_dir2_node.c
include/clocksource/arm_arch_timer.h
include/drm/drm_fb_helper.h
include/kvm/arm_arch_timer.h
include/linux/acpi.h
include/linux/amba/bus.h
include/linux/f2fs_fs.h
include/linux/kvm_host.h
include/linux/libnvdimm.h
include/linux/ntb.h
include/linux/pagemap.h
include/linux/proc_fs.h
include/linux/ring_buffer.h
include/linux/switchtec.h
include/linux/syscalls.h
include/linux/vgaarb.h
include/sound/pcm.h
include/trace/events/f2fs.h
include/trace/events/sunrpc.h
include/uapi/asm-generic/unistd.h
kernel/printk/printk.c
kernel/resource.c
kernel/signal.c
kernel/sys_ni.c
kernel/trace/blktrace.c
kernel/trace/ring_buffer.c
kernel/trace/trace.c
kernel/trace/trace_kdb.c
kernel/trace/trace_kprobe.c
kernel/trace/trace_probe.c
kernel/trace/trace_probe.h
kernel/trace/trace_uprobe.c
lib/raid6/Makefile
mm/filemap.c
mm/memory_hotplug.c
net/9p/client.c
net/9p/trans_xen.c
net/sunrpc/clnt.c
net/sunrpc/xprt.c
net/sunrpc/xprtsock.c
sound/hda/hdac_stream.c
sound/pci/hda/hda_tegra.c
sound/pci/hda/patch_conexant.c
sound/pci/hda/patch_hdmi.c
sound/pci/hda/patch_realtek.c
sound/usb/usx2y/usb_stream.c
tools/testing/nvdimm/Kbuild
tools/testing/nvdimm/dax-dev.c
tools/testing/selftests/Makefile
tools/testing/selftests/kvm/.gitignore
tools/testing/selftests/kvm/Makefile
tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c [new file with mode: 0644]
tools/testing/selftests/pidfd/Makefile [new file with mode: 0644]
tools/testing/selftests/pidfd/pidfd_test.c [new file with mode: 0644]
virt/kvm/arm/arch_timer.c
virt/kvm/arm/arm.c
virt/kvm/arm/hyp/vgic-v3-sr.c
virt/kvm/arm/mmu.c
virt/kvm/arm/trace.h
virt/kvm/arm/vgic/vgic-v3.c
virt/kvm/coalesced_mmio.c
virt/kvm/eventfd.c
virt/kvm/irqchip.c
virt/kvm/kvm_main.c
virt/kvm/vfio.c

diff --git a/Documentation/ABI/obsolete/sysfs-class-dax b/Documentation/ABI/obsolete/sysfs-class-dax
new file mode 100644 (file)
index 0000000..2cb9fc5
--- /dev/null
@@ -0,0 +1,22 @@
+What:           /sys/class/dax/
+Date:           May, 2016
+KernelVersion:  v4.7
+Contact:        linux-nvdimm@lists.01.org
+Description:   Device DAX is the device-centric analogue of Filesystem
+               DAX (CONFIG_FS_DAX).  It allows memory ranges to be
+               allocated and mapped without need of an intervening file
+               system.  Device DAX is strict, precise and predictable.
+               Specifically this interface:
+
+               1/ Guarantees fault granularity with respect to a given
+               page size (pte, pmd, or pud) set at configuration time.
+
+               2/ Enforces deterministic behavior by being strict about
+               what fault scenarios are supported.
+
+               The /sys/class/dax/ interface enumerates all the
+               device-dax instances in the system. The ABI is
+               deprecated and will be removed after 2020. It is
+               replaced with the DAX bus interface /sys/bus/dax/ where
+               device-dax instances can be found under
+               /sys/bus/dax/devices/
index a7ce331994578451759aebd8a080bc38085d1093..91822ce258317df500271f8d28cf540e30cb19da 100644 (file)
@@ -86,6 +86,13 @@ Description:
                The unit size is one block, now only support configuring in range
                of [1, 512].
 
+What:          /sys/fs/f2fs/<disk>/umount_discard_timeout
+Date:          January 2019
+Contact:       "Jaegeuk Kim" <jaegeuk@kernel.org>
+Description:
+               Set timeout to issue discard commands during umount.
+               Default: 5 secs
+
 What:          /sys/fs/f2fs/<disk>/max_victim_search
 Date:          January 2014
 Contact:       "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
index 84de718f24a469ee455b6e5e3aab5b6608f3f1dc..3c51084ffd379758cbc2d7fb8212710087d54a7b 100644 (file)
@@ -756,3 +756,6 @@ These currently include:
       The cache mode for raid5. raid5 could include an extra disk for
       caching. The mode can be "write-throuth" and "write-back". The
       default is "write-through".
+
+  ppl_write_hint
+      NVMe stream ID to be set for each PPL write request.
index 525452726d31e94c43d7c8c7c05c4431b006912a..b9e060c5b61e08c1491c710adc560530b8bae03e 100644 (file)
@@ -6,7 +6,7 @@ TL;DR summary
 * Use only NEON instructions, or VFP instructions that don't rely on support
   code
 * Isolate your NEON code in a separate compilation unit, and compile it with
-  '-mfpu=neon -mfloat-abi=softfp'
+  '-march=armv7-a -mfpu=neon -mfloat-abi=softfp'
 * Put kernel_neon_begin() and kernel_neon_end() calls around the calls into your
   NEON code
 * Don't sleep in your NEON code, and be aware that it will be executed with
@@ -87,7 +87,7 @@ instructions appearing in unexpected places if no special care is taken.
 Therefore, the recommended and only supported way of using NEON/VFP in the
 kernel is by adhering to the following rules:
 * isolate the NEON code in a separate compilation unit and compile it with
-  '-mfpu=neon -mfloat-abi=softfp';
+  '-march=armv7-a -mfpu=neon -mfloat-abi=softfp';
 * issue the calls to kernel_neon_begin(), kernel_neon_end() as well as the calls
   into the unit containing the NEON code from a compilation unit which is *not*
   built with the GCC flag '-mfpu=neon' set.
index 209d931ef16c4e53e46e8378b09f5cf04b121586..b67f8caa212c8fa8959944cf3a8cd2152ba68f92 100644 (file)
@@ -36,7 +36,6 @@ ssd1307: oled@3c {
         reg = <0x3c>;
         pwms = <&pwm 4 3000>;
         reset-gpios = <&gpio2 7>;
-        reset-active-low;
 };
 
 ssd1306: oled@3c {
@@ -44,7 +43,6 @@ ssd1306: oled@3c {
         reg = <0x3c>;
         pwms = <&pwm 4 3000>;
         reset-gpios = <&gpio2 7>;
-        reset-active-low;
         solomon,com-lrremap;
         solomon,com-invdir;
         solomon,com-offset = <32>;
index 66b3f54aa6dc48bacd70e646092669a2e1a323e8..9267f3fb131f9b5cf0e027ba9bdd9ff51a93d5b8 100644 (file)
@@ -111,7 +111,8 @@ negotiated size) and send larger write sizes to modern servers.
 
 5) Continue to extend the smb3 "buildbot" which does automated xfstesting
 against Windows, Samba and Azure currently - to add additional tests and
-to allow the buildbot to execute the tests faster.
+to allow the buildbot to execute the tests faster. The URL for the
+buildbot is: http://smb3-test-rhel-75.southcentralus.cloudapp.azure.com
 
 6) Address various coverity warnings (most are not bugs per-se, but
 the more warnings are addressed, the easier it is to spot real
index 67756607246e767a9105bc06c00a7b97730abbb1..1be3d21c286ece45d9024dfe9d718d46b5de66e0 100644 (file)
@@ -1,16 +1,21 @@
   This is the client VFS module for the SMB3 NAS protocol as well
-  older dialects such as the Common Internet File System (CIFS)
+  as for older dialects such as the Common Internet File System (CIFS)
   protocol which was the successor to the Server Message Block
   (SMB) protocol, the native file sharing mechanism for most early
   PC operating systems. New and improved versions of CIFS are now
-  called SMB2 and SMB3. These dialects are also supported by the
-  CIFS VFS module. CIFS is fully supported by network
-  file servers such as Windows 2000, 2003, 2008, 2012 and 2016
-  as well by Samba (which provides excellent CIFS
-  server support for Linux and many other operating systems), Apple
-  systems, as well as most Network Attached Storage vendors, so
-  this network filesystem client can mount to a wide variety of
-  servers.
+  called SMB2 and SMB3. Use of SMB3 (and later, including SMB3.1.1)
+  is strongly preferred over using older dialects like CIFS due to
+  security reaasons. All modern dialects, including the most recent,
+  SMB3.1.1 are supported by the CIFS VFS module. The SMB3 protocol
+  is implemented and supported by all major file servers
+  such as all modern versions of Windows (including Windows 2016
+  Server), as well as by Samba (which provides excellent
+  CIFS/SMB2/SMB3 server support and tools for Linux and many other
+  operating systems).  Apple systems also support SMB3 well, as
+  do most Network Attached Storage vendors, so this network
+  filesystem client can mount to a wide variety of systems.
+  It also supports mounting to the cloud (for example
+  Microsoft Azure), including the necessary security features.
 
   The intent of this module is to provide the most advanced network
   file system function for SMB3 compliant servers, including advanced
   cluster file systems for fileserving in some Linux to Linux environments,
   not just in Linux to Windows (or Linux to Mac) environments.
 
-  This filesystem has an mount utility (mount.cifs) that can be obtained from
+  This filesystem has a mount utility (mount.cifs) and various user space
+  tools (including smbinfo and setcifsacl) that can be obtained from
 
-      https://ftp.samba.org/pub/linux-cifs/cifs-utils/
+      https://git.samba.org/?p=cifs-utils.git
+  or
+      git://git.samba.org/cifs-utils.git
 
-  It must be installed in the directory with the other mount helpers.
+  mount.cifs should be installed in the directory with the other mount helpers.
 
   For more information on the module see the project wiki page at
 
+      https://wiki.samba.org/index.php/LinuxCIFS
+  and
       https://wiki.samba.org/index.php/LinuxCIFS_utils
index e46c2147ddf8e02083622c0976fb63da061c85ee..f7b5e4ff0de3e1a196cf7d66f4780345e24829b1 100644 (file)
@@ -126,6 +126,8 @@ disable_ext_identify   Disable the extension list configured by mkfs, so f2fs
                        does not aware of cold files such as media files.
 inline_xattr           Enable the inline xattrs feature.
 noinline_xattr         Disable the inline xattrs feature.
+inline_xattr_size=%u   Support configuring inline xattr size, it depends on
+                      flexible inline xattr feature.
 inline_data            Enable the inline data feature: New created small(<~3.4k)
                        files can be written into inode block.
 inline_dentry          Enable the inline dir feature: data in new created
index 356156f5c52d299b13481ccc516f15388c5797aa..7de9eee73fcd9d533aec2c1bc88d413f6216db73 100644 (file)
@@ -45,6 +45,23 @@ the API.  The only supported use is one virtual machine per process,
 and one vcpu per thread.
 
 
+It is important to note that althought VM ioctls may only be issued from
+the process that created the VM, a VM's lifecycle is associated with its
+file descriptor, not its creator (process).  In other words, the VM and
+its resources, *including the associated address space*, are not freed
+until the last reference to the VM's file descriptor has been released.
+For example, if fork() is issued after ioctl(KVM_CREATE_VM), the VM will
+not be freed until both the parent (original) process and its child have
+put their references to the VM's file descriptor.
+
+Because a VM's resources are not freed until the last reference to its
+file descriptor is released, creating additional references to a VM via
+via fork(), dup(), etc... without careful consideration is strongly
+discouraged and may have unwanted side effects, e.g. memory allocated
+by and on behalf of the VM's process may not be freed/unaccounted when
+the VM is shut down.
+
+
 3. Extensions
 -------------
 
index 4a841831876978cbc5396f8fc88232815cfd786c..4f791b128dd27a0ed9bc4ad79eddc8794bcab2bd 100644 (file)
@@ -53,7 +53,8 @@ the global max polling interval then the polling interval can be increased in
 the hope that next time during the longer polling interval the wake up source
 will be received while the host is polling and the latency benefits will be
 received. The polling interval is grown in the function grow_halt_poll_ns() and
-is multiplied by the module parameter halt_poll_ns_grow.
+is multiplied by the module parameters halt_poll_ns_grow and
+halt_poll_ns_grow_start.
 
 In the event that the total block time was greater than the global max polling
 interval then the host will never poll for long enough (limited by the global
@@ -80,22 +81,30 @@ shrunk. These variables are defined in include/linux/kvm_host.h and as module
 parameters in virt/kvm/kvm_main.c, or arch/powerpc/kvm/book3s_hv.c in the
 powerpc kvm-hv case.
 
-Module Parameter    |       Description              |      Default Value
+Module Parameter       |   Description             |        Default Value
 --------------------------------------------------------------------------------
-halt_poll_ns       | The global max polling interval | KVM_HALT_POLL_NS_DEFAULT
-                   | which defines the ceiling value |
-                   | of the polling interval for     | (per arch value)
-                   | each vcpu.                      |
+halt_poll_ns           | The global max polling    | KVM_HALT_POLL_NS_DEFAULT
+                       | interval which defines    |
+                       | the ceiling value of the  |
+                       | polling interval for      | (per arch value)
+                       | each vcpu.                |
 --------------------------------------------------------------------------------
-halt_poll_ns_grow   | The value by which the halt     |        2
-                   | polling interval is multiplied  |
-                   | in the grow_halt_poll_ns()      |
-                   | function.                       |
+halt_poll_ns_grow      | The value by which the    | 2
+                       | halt polling interval is  |
+                       | multiplied in the         |
+                       | grow_halt_poll_ns()       |
+                       | function.                 |
 --------------------------------------------------------------------------------
-halt_poll_ns_shrink | The value by which the halt     |        0
-                   | polling interval is divided in  |
-                   | the shrink_halt_poll_ns()       |
-                   | function.                       |
+halt_poll_ns_grow_start | The initial value to grow | 10000
+                       | to from zero in the       |
+                       | grow_halt_poll_ns()       |
+                       | function.                 |
+--------------------------------------------------------------------------------
+halt_poll_ns_shrink    | The value by which the    | 0
+                       | halt polling interval is  |
+                       | divided in the            |
+                       | shrink_halt_poll_ns()     |
+                       | function.                 |
 --------------------------------------------------------------------------------
 
 These module parameters can be set from the debugfs files in:
index e507a9e0421ed22e630425074e053303f5e990bf..f365102c80f5dd64133cbe60a8a4fd76fc86393d 100644 (file)
@@ -224,10 +224,6 @@ Shadow pages contain the following information:
     A bitmap indicating which sptes in spt point (directly or indirectly) at
     pages that may be unsynchronized.  Used to quickly locate all unsychronized
     pages reachable from a given page.
-  mmu_valid_gen:
-    Generation number of the page.  It is compared with kvm->arch.mmu_valid_gen
-    during hash table lookup, and used to skip invalidated shadow pages (see
-    "Zapping all pages" below.)
   clear_spte_count:
     Only present on 32-bit hosts, where a 64-bit spte cannot be written
     atomically.  The reader uses this while running out of the MMU lock
@@ -402,27 +398,6 @@ causes its disallow_lpage to be incremented, thus preventing instantiation of
 a large spte.  The frames at the end of an unaligned memory slot have
 artificially inflated ->disallow_lpages so they can never be instantiated.
 
-Zapping all pages (page generation count)
-=========================================
-
-For the large memory guests, walking and zapping all pages is really slow
-(because there are a lot of pages), and also blocks memory accesses of
-all VCPUs because it needs to hold the MMU lock.
-
-To make it be more scalable, kvm maintains a global generation number
-which is stored in kvm->arch.mmu_valid_gen.  Every shadow page stores
-the current global generation-number into sp->mmu_valid_gen when it
-is created.  Pages with a mismatching generation number are "obsolete".
-
-When KVM need zap all shadow pages sptes, it just simply increases the global
-generation-number then reload root shadow pages on all vcpus.  As the VCPUs
-create new shadow page tables, the old pages are not used because of the
-mismatching generation number.
-
-KVM then walks through all pages and zaps obsolete pages.  While the zap
-operation needs to take the MMU lock, the lock can be released periodically
-so that the VCPUs can make progress.
-
 Fast invalidation of MMIO sptes
 ===============================
 
@@ -435,8 +410,7 @@ shadow pages, and is made more scalable with a similar technique.
 MMIO sptes have a few spare bits, which are used to store a
 generation number.  The global generation number is stored in
 kvm_memslots(kvm)->generation, and increased whenever guest memory info
-changes.  This generation number is distinct from the one described in
-the previous section.
+changes.
 
 When KVM finds an MMIO spte, it checks the generation number of the spte.
 If the generation number of the spte does not equal the global generation
@@ -452,13 +426,16 @@ stored into the MMIO spte.  Thus, the MMIO spte might be created based on
 out-of-date information, but with an up-to-date generation number.
 
 To avoid this, the generation number is incremented again after synchronize_srcu
-returns; thus, the low bit of kvm_memslots(kvm)->generation is only 1 during a
+returns; thus, bit 63 of kvm_memslots(kvm)->generation set to 1 only during a
 memslot update, while some SRCU readers might be using the old copy.  We do not
 want to use an MMIO sptes created with an odd generation number, and we can do
-this without losing a bit in the MMIO spte.  The low bit of the generation
-is not stored in MMIO spte, and presumed zero when it is extracted out of the
-spte.  If KVM is unlucky and creates an MMIO spte while the low bit is 1,
-the next access to the spte will always be a cache miss.
+this without losing a bit in the MMIO spte.  The "update in-progress" bit of the
+generation is not stored in MMIO spte, and is so is implicitly zero when the
+generation is extracted out of the spte.  If KVM is unlucky and creates an MMIO
+spte while an update is in-progress, the next access to the spte will always be
+a cache miss.  For example, a subsequent access during the update window will
+miss due to the in-progress flag diverging, while an access after the update
+window closes will have a higher generation number (as compared to the spte).
 
 
 Further reading
index f8ff9ae52c21085372c31d04a1c7ed4f02cd1494..e17ebf70b5480ecc232ce1f62aedf95a03b5f403 100644 (file)
@@ -5278,7 +5278,7 @@ DRM DRIVERS FOR VIVANTE GPU IP
 M:     Lucas Stach <l.stach@pengutronix.de>
 R:     Russell King <linux+etnaviv@armlinux.org.uk>
 R:     Christian Gmeiner <christian.gmeiner@gmail.com>
-L:     etnaviv@lists.freedesktop.org
+L:     etnaviv@lists.freedesktop.org (moderated for non-subscribers)
 L:     dri-devel@lists.freedesktop.org
 S:     Maintained
 F:     drivers/gpu/drm/etnaviv/
@@ -8461,6 +8461,7 @@ F:        include/linux/kvm*
 F:     include/kvm/iodev.h
 F:     virt/kvm/*
 F:     tools/kvm/
+F:     tools/testing/selftests/kvm/
 
 KERNEL VIRTUAL MACHINE FOR AMD-V (KVM/amd)
 M:     Joerg Roedel <joro@8bytes.org>
@@ -8470,29 +8471,25 @@ S:      Maintained
 F:     arch/x86/include/asm/svm.h
 F:     arch/x86/kvm/svm.c
 
-KERNEL VIRTUAL MACHINE FOR ARM (KVM/arm)
+KERNEL VIRTUAL MACHINE FOR ARM/ARM64 (KVM/arm, KVM/arm64)
 M:     Christoffer Dall <christoffer.dall@arm.com>
 M:     Marc Zyngier <marc.zyngier@arm.com>
+R:     James Morse <james.morse@arm.com>
+R:     Julien Thierry <julien.thierry@arm.com>
+R:     Suzuki K Pouloze <suzuki.poulose@arm.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 L:     kvmarm@lists.cs.columbia.edu
 W:     http://systems.cs.columbia.edu/projects/kvm-arm
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git
-S:     Supported
+S:     Maintained
 F:     arch/arm/include/uapi/asm/kvm*
 F:     arch/arm/include/asm/kvm*
 F:     arch/arm/kvm/
-F:     virt/kvm/arm/
-F:     include/kvm/arm_*
-
-KERNEL VIRTUAL MACHINE FOR ARM64 (KVM/arm64)
-M:     Christoffer Dall <christoffer.dall@arm.com>
-M:     Marc Zyngier <marc.zyngier@arm.com>
-L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-L:     kvmarm@lists.cs.columbia.edu
-S:     Maintained
 F:     arch/arm64/include/uapi/asm/kvm*
 F:     arch/arm64/include/asm/kvm*
 F:     arch/arm64/kvm/
+F:     virt/kvm/arm/
+F:     include/kvm/arm_*
 
 KERNEL VIRTUAL MACHINE FOR MIPS (KVM/mips)
 M:     James Hogan <jhogan@kernel.org>
index 5085a1eab9fc538adbc7e07f84c7ef89f8195949..054ead960f983a99a9f241ce1427fe0e1cd6cb8a 100644 (file)
@@ -1310,7 +1310,7 @@ config SCHED_SMT
 config HAVE_ARM_SCU
        bool
        help
-         This option enables support for the ARM system coherency unit
+         This option enables support for the ARM snoop control unit
 
 config HAVE_ARM_ARCH_TIMER
        bool "Architected timer support"
@@ -1322,7 +1322,6 @@ config HAVE_ARM_ARCH_TIMER
 
 config HAVE_ARM_TWD
        bool
-       select TIMER_OF if OF
        help
          This options enables support for the ARM timer and watchdog unit
 
index 1168a03c85255fbe7295c0fad7f77fb7d2e9b2af..36c80d3dd93f2fe64f83d21482ba74fe9fe8cf88 100644 (file)
@@ -20,10 +20,12 @@ config DRAM_SIZE
 
 config FLASH_MEM_BASE
        hex 'FLASH Base Address' if SET_MEM_PARAM
+       depends on CPU_ARM740T || CPU_ARM946E || CPU_ARM940T
        default 0x00400000
 
 config FLASH_SIZE
        hex 'FLASH Size' if SET_MEM_PARAM
+       depends on CPU_ARM740T || CPU_ARM946E || CPU_ARM940T
        default 0x00400000
 
 config PROCESSOR_ID
index 00000e91ad652897624e7a39061292999292199c..807a7d06c2a0825bed8f8ea8c1d9d8c7eb5f8be6 100644 (file)
@@ -10,7 +10,7 @@
 #
 # Copyright (C) 1995-2001 by Russell King
 
-LDFLAGS_vmlinux        :=-p --no-undefined -X --pic-veneer
+LDFLAGS_vmlinux        := --no-undefined -X --pic-veneer
 ifeq ($(CONFIG_CPU_ENDIAN_BE8),y)
 LDFLAGS_vmlinux        += --be8
 KBUILD_LDFLAGS_MODULE  += --be8
index 83e1a076a5d64a095d558d214f541f9407e027fa..981a8d03f064c24f47d7fa69294a5e14a3805fa4 100644 (file)
@@ -8,7 +8,7 @@
 
 GCOV_PROFILE   := n
 
-LDFLAGS_bootp  :=-p --no-undefined -X \
+LDFLAGS_bootp  := --no-undefined -X \
                 --defsym initrd_phys=$(INITRD_PHYS) \
                 --defsym params_phys=$(PARAMS_PHYS) -T
 AFLAGS_initrd.o :=-DINITRD=\"$(INITRD)\"
index 78b508075161fd4967bdbb3821eab162aa9dd5bc..142927e5f485adfe034855af467013ca0bbe8543 100644 (file)
@@ -44,7 +44,7 @@ _start:               add     lr, pc, #-0x8           @ lr = current load addr
  */
                movne   r10, #0                 @ terminator
                movne   r4, #2                  @ Size of this entry (2 words)
-               stmneia r9, {r4, r5, r10}       @ Size, ATAG_CORE, terminator
+               stmiane r9, {r4, r5, r10}       @ Size, ATAG_CORE, terminator
 
 /*
  * find the end of the tag list, and then add an INITRD tag on the end.
index 6114ae6ea4666ddee8e5429b6074c7b5249af229..9219389bbe612799fd473a36fb10261596325548 100644 (file)
@@ -132,8 +132,6 @@ endif
 ifeq ($(CONFIG_CPU_ENDIAN_BE8),y)
 LDFLAGS_vmlinux += --be8
 endif
-# ?
-LDFLAGS_vmlinux += -p
 # Report unresolved symbol references
 LDFLAGS_vmlinux += --no-undefined
 # Delete all temporary local symbols
index 8517c8606b4a7f1bafabd39556b8aa2390910bc1..b1dcdb9f4030e22b65fa1838a4b68cbeb847fa49 100644 (file)
@@ -75,7 +75,7 @@ Lrow4bpplp:
        tst     r1, #7                          @ avoid using r7 directly after
        str     r7, [r0, -r5]!
        subne   r1, r1, #1
-       ldrneb  r7, [r6, r1]
+       ldrbne  r7, [r6, r1]
        bne     Lrow4bpplp
        ldmfd   sp!, {r4 - r7, pc}
 
@@ -103,7 +103,7 @@ Lrow8bpplp:
        sub     r0, r0, r5                      @ avoid ip
        stmia   r0, {r4, ip}
        subne   r1, r1, #1
-       ldrneb  r7, [r6, r1]
+       ldrbne  r7, [r6, r1]
        bne     Lrow8bpplp
        ldmfd   sp!, {r4 - r7, pc}
 
index d3e3622979c5bf9379dd0f3b9cd5ce55048975f9..de48b5808ef6e97efee62f4e7e2c1a257c770970 100644 (file)
@@ -11,6 +11,7 @@
 
 /dts-v1/;
 #include "imx28.dtsi"
+#include <dt-bindings/gpio/gpio.h>
 
 / {
        model = "Crystalfontz CFA-10036 Board";
@@ -96,7 +97,7 @@
                                        pinctrl-names = "default";
                                        pinctrl-0 = <&ssd1306_cfa10036>;
                                        reg = <0x3c>;
-                                       reset-gpios = <&gpio2 7 0>;
+                                       reset-gpios = <&gpio2 7 GPIO_ACTIVE_LOW>;
                                        solomon,height = <32>;
                                        solomon,width = <128>;
                                        solomon,page-offset = <0>;
index ad574d20415c219f408bb494a1bec3de6a7fcbac..1b1b82b37ce035f97e17e40be6d81d913ee27e61 100644 (file)
@@ -381,7 +381,7 @@ static int __init nocache_trampoline(unsigned long _arg)
        unsigned int cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
        phys_reset_t phys_reset;
 
-       mcpm_set_entry_vector(cpu, cluster, cpu_resume);
+       mcpm_set_entry_vector(cpu, cluster, cpu_resume_no_hyp);
        setup_mm_for_reboot();
 
        __mcpm_cpu_going_down(cpu, cluster);
index f6f485f4744e034679f5b4483daa74d98a3907d3..d15b8c99f1b3c994967ac6840e0c223eb92c1e0c 100644 (file)
@@ -55,7 +55,7 @@
 #define ICH_VTR                                __ACCESS_CP15(c12, 4, c11, 1)
 #define ICH_MISR                       __ACCESS_CP15(c12, 4, c11, 2)
 #define ICH_EISR                       __ACCESS_CP15(c12, 4, c11, 3)
-#define ICH_ELSR                       __ACCESS_CP15(c12, 4, c11, 5)
+#define ICH_ELRSR                      __ACCESS_CP15(c12, 4, c11, 5)
 #define ICH_VMCR                       __ACCESS_CP15(c12, 4, c11, 7)
 
 #define __LR0(x)                       __ACCESS_CP15(c12, 4, c12, x)
@@ -152,7 +152,7 @@ CPUIF_MAP(ICH_HCR, ICH_HCR_EL2)
 CPUIF_MAP(ICH_VTR, ICH_VTR_EL2)
 CPUIF_MAP(ICH_MISR, ICH_MISR_EL2)
 CPUIF_MAP(ICH_EISR, ICH_EISR_EL2)
-CPUIF_MAP(ICH_ELSR, ICH_ELSR_EL2)
+CPUIF_MAP(ICH_ELRSR, ICH_ELRSR_EL2)
 CPUIF_MAP(ICH_VMCR, ICH_VMCR_EL2)
 CPUIF_MAP(ICH_AP0R3, ICH_AP0R3_EL2)
 CPUIF_MAP(ICH_AP0R2, ICH_AP0R2_EL2)
index 28a48e0d4cca04f65b8bc90179c8daf5a3632397..b59921a560da3ea0fb72baa5194a1aced4239c6e 100644 (file)
@@ -376,9 +376,9 @@ THUMB(      orr     \reg , \reg , #PSR_T_BIT        )
        .macro  usraccoff, instr, reg, ptr, inc, off, cond, abort, t=TUSER()
 9999:
        .if     \inc == 1
-       \instr\cond\()b\()\t\().w \reg, [\ptr, #\off]
+       \instr\()b\t\cond\().w \reg, [\ptr, #\off]
        .elseif \inc == 4
-       \instr\cond\()\t\().w \reg, [\ptr, #\off]
+       \instr\t\cond\().w \reg, [\ptr, #\off]
        .else
        .error  "Unsupported inc macro argument"
        .endif
@@ -417,9 +417,9 @@ THUMB(      orr     \reg , \reg , #PSR_T_BIT        )
        .rept   \rept
 9999:
        .if     \inc == 1
-       \instr\cond\()b\()\t \reg, [\ptr], #\inc
+       \instr\()b\t\cond \reg, [\ptr], #\inc
        .elseif \inc == 4
-       \instr\cond\()\t \reg, [\ptr], #\inc
+       \instr\t\cond \reg, [\ptr], #\inc
        .else
        .error  "Unsupported inc macro argument"
        .endif
@@ -460,7 +460,7 @@ THUMB(      orr     \reg , \reg , #PSR_T_BIT        )
        .macro check_uaccess, addr:req, size:req, limit:req, tmp:req, bad:req
 #ifndef CONFIG_CPU_USE_DOMAINS
        adds    \tmp, \addr, #\size - 1
-       sbcccs  \tmp, \tmp, \limit
+       sbcscc  \tmp, \tmp, \limit
        bcs     \bad
 #ifdef CONFIG_CPU_SPECTRE
        movcs   \addr, #0
@@ -474,7 +474,7 @@ THUMB(      orr     \reg , \reg , #PSR_T_BIT        )
        sub     \tmp, \limit, #1
        subs    \tmp, \tmp, \addr       @ tmp = limit - 1 - addr
        addhs   \tmp, \tmp, #1          @ if (tmp >= 0) {
-       subhss  \tmp, \tmp, \size       @ tmp = limit - (addr + size) }
+       subshs  \tmp, \tmp, \size       @ tmp = limit - (addr + size) }
        movlo   \addr, #0               @ if (tmp < 0) addr = NULL
        csdb
 #endif
index 69772e742a0acdc16dbf76f2130a8025af41b6c0..83ae97c049d9bd48b474f0127164c71628bf05c0 100644 (file)
@@ -11,6 +11,8 @@
 #define sev()  __asm__ __volatile__ ("sev" : : : "memory")
 #define wfe()  __asm__ __volatile__ ("wfe" : : : "memory")
 #define wfi()  __asm__ __volatile__ ("wfi" : : : "memory")
+#else
+#define wfe()  do { } while (0)
 #endif
 
 #if __LINUX_ARM_ARCH__ >= 7
index 8c215acd9b573232a6818de2cd847047bdc6bb6e..f7692731e514359a6a8fb66cb229444b9cf9fabe 100644 (file)
                ldr     \tmp, =irq_prio_h
                teq     \irqstat, #0
 #ifdef IOMD_BASE
-               ldreqb  \irqstat, [\base, #IOMD_DMAREQ] @ get dma
+               ldrbeq  \irqstat, [\base, #IOMD_DMAREQ] @ get dma
                addeq   \tmp, \tmp, #256                @ irq_prio_h table size
                teqeq   \irqstat, #0
                bne     2406f
 #endif
-               ldreqb  \irqstat, [\base, #IOMD_IRQREQA]        @ get low priority
+               ldrbeq  \irqstat, [\base, #IOMD_IRQREQA]        @ get low priority
                addeq   \tmp, \tmp, #256                @ irq_prio_d table size
                teqeq   \irqstat, #0
 #ifdef IOMD_IRQREQC
-               ldreqb  \irqstat, [\base, #IOMD_IRQREQC]
+               ldrbeq  \irqstat, [\base, #IOMD_IRQREQC]
                addeq   \tmp, \tmp, #256                @ irq_prio_l table size
                teqeq   \irqstat, #0
 #endif
 #ifdef IOMD_IRQREQD
-               ldreqb  \irqstat, [\base, #IOMD_IRQREQD]
+               ldrbeq  \irqstat, [\base, #IOMD_IRQREQD]
                addeq   \tmp, \tmp, #256                @ irq_prio_lc table size
                teqeq   \irqstat, #0
 #endif
-2406:          ldrneb  \irqnr, [\tmp, \irqstat]        @ get IRQ number
+2406:          ldrbne  \irqnr, [\tmp, \irqstat]        @ get IRQ number
                .endm
 
 /*
index 77121b713bef3adbc4b2c77c559dab1886dec96b..8927cae7c96662a4f4bb3187a8beca641cbe6e5a 100644 (file)
@@ -265,6 +265,14 @@ static inline bool kvm_vcpu_dabt_isextabt(struct kvm_vcpu *vcpu)
        }
 }
 
+static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
+{
+       if (kvm_vcpu_trap_is_iabt(vcpu))
+               return false;
+
+       return kvm_vcpu_dabt_iswrite(vcpu);
+}
+
 static inline u32 kvm_vcpu_hvc_get_imm(struct kvm_vcpu *vcpu)
 {
        return kvm_vcpu_get_hsr(vcpu) & HSR_HVC_IMM_MASK;
index 50e89869178a9725f0bb6c8bb2082bc186fcbab7..770d73257ad936d6dea09f11d05b9639bd00b051 100644 (file)
@@ -26,6 +26,7 @@
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmio.h>
 #include <asm/fpstate.h>
+#include <asm/smp_plat.h>
 #include <kvm/arm_arch_timer.h>
 
 #define __KVM_HAVE_ARCH_INTC_INITIALIZED
@@ -57,10 +58,13 @@ int __attribute_const__ kvm_target_cpu(void);
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
 void kvm_reset_coprocs(struct kvm_vcpu *vcpu);
 
-struct kvm_arch {
-       /* VTTBR value associated with below pgd and vmid */
-       u64    vttbr;
+struct kvm_vmid {
+       /* The VMID generation used for the virt. memory system */
+       u64    vmid_gen;
+       u32    vmid;
+};
 
+struct kvm_arch {
        /* The last vcpu id that ran on each physical CPU */
        int __percpu *last_vcpu_ran;
 
@@ -70,11 +74,11 @@ struct kvm_arch {
         */
 
        /* The VMID generation used for the virt. memory system */
-       u64    vmid_gen;
-       u32    vmid;
+       struct kvm_vmid vmid;
 
        /* Stage-2 page table */
        pgd_t *pgd;
+       phys_addr_t pgd_phys;
 
        /* Interrupt controller */
        struct vgic_dist        vgic;
@@ -148,6 +152,13 @@ struct kvm_cpu_context {
 
 typedef struct kvm_cpu_context kvm_cpu_context_t;
 
+static inline void kvm_init_host_cpu_context(kvm_cpu_context_t *cpu_ctxt,
+                                            int cpu)
+{
+       /* The host's MPIDR is immutable, so let's set it up at boot time */
+       cpu_ctxt->cp15[c0_MPIDR] = cpu_logical_map(cpu);
+}
+
 struct vcpu_reset_state {
        unsigned long   pc;
        unsigned long   r0;
@@ -224,7 +235,35 @@ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
 int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
 int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
-unsigned long kvm_call_hyp(void *hypfn, ...);
+
+unsigned long __kvm_call_hyp(void *hypfn, ...);
+
+/*
+ * The has_vhe() part doesn't get emitted, but is used for type-checking.
+ */
+#define kvm_call_hyp(f, ...)                                           \
+       do {                                                            \
+               if (has_vhe()) {                                        \
+                       f(__VA_ARGS__);                                 \
+               } else {                                                \
+                       __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__); \
+               }                                                       \
+       } while(0)
+
+#define kvm_call_hyp_ret(f, ...)                                       \
+       ({                                                              \
+               typeof(f(__VA_ARGS__)) ret;                             \
+                                                                       \
+               if (has_vhe()) {                                        \
+                       ret = f(__VA_ARGS__);                           \
+               } else {                                                \
+                       ret = __kvm_call_hyp(kvm_ksym_ref(f),           \
+                                            ##__VA_ARGS__);            \
+               }                                                       \
+                                                                       \
+               ret;                                                    \
+       })
+
 void force_vm_exit(const cpumask_t *mask);
 int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
                              struct kvm_vcpu_events *events);
@@ -275,7 +314,7 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
         * compliant with the PCS!).
         */
 
-       kvm_call_hyp((void*)hyp_stack_ptr, vector_ptr, pgd_ptr);
+       __kvm_call_hyp((void*)hyp_stack_ptr, vector_ptr, pgd_ptr);
 }
 
 static inline void __cpu_init_stage2(void)
index e93a0cac9addc5bf8caf9fa2bcca34223ee1c281..87bcd18df8d58eb83175f54f7220088cbb287691 100644 (file)
@@ -40,6 +40,7 @@
 #define TTBR1          __ACCESS_CP15_64(1, c2)
 #define VTTBR          __ACCESS_CP15_64(6, c2)
 #define PAR            __ACCESS_CP15_64(0, c7)
+#define CNTP_CVAL      __ACCESS_CP15_64(2, c14)
 #define CNTV_CVAL      __ACCESS_CP15_64(3, c14)
 #define CNTVOFF                __ACCESS_CP15_64(4, c14)
 
@@ -85,6 +86,7 @@
 #define TID_PRIV       __ACCESS_CP15(c13, 0, c0, 4)
 #define HTPIDR         __ACCESS_CP15(c13, 4, c0, 2)
 #define CNTKCTL                __ACCESS_CP15(c14, 0, c1, 0)
+#define CNTP_CTL       __ACCESS_CP15(c14, 0, c2, 1)
 #define CNTV_CTL       __ACCESS_CP15(c14, 0, c3, 1)
 #define CNTHCTL                __ACCESS_CP15(c14, 4, c1, 0)
 
@@ -94,6 +96,8 @@
 #define read_sysreg_el0(r)             read_sysreg(r##_el0)
 #define write_sysreg_el0(v, r)         write_sysreg(v, r##_el0)
 
+#define cntp_ctl_el0                   CNTP_CTL
+#define cntp_cval_el0                  CNTP_CVAL
 #define cntv_ctl_el0                   CNTV_CTL
 #define cntv_cval_el0                  CNTV_CVAL
 #define cntvoff_el2                    CNTVOFF
index 3a875fc1b63ca3416c071afa9536262525915025..2de96a180166eb920833b1100159716735f5e206 100644 (file)
@@ -421,9 +421,14 @@ static inline int hyp_map_aux_data(void)
 
 static inline void kvm_set_ipa_limit(void) {}
 
-static inline bool kvm_cpu_has_cnp(void)
+static __always_inline u64 kvm_get_vttbr(struct kvm *kvm)
 {
-       return false;
+       struct kvm_vmid *vmid = &kvm->arch.vmid;
+       u64 vmid_field, baddr;
+
+       baddr = kvm->arch.pgd_phys;
+       vmid_field = (u64)vmid->vmid << VTTBR_VMID_SHIFT;
+       return kvm_phys_to_vttbr(baddr) | vmid_field;
 }
 
 #endif /* !__ASSEMBLY__ */
index a757401129f9567cbdebea5249b60e7e9a117e87..48ce1b19069b67d86bb97525cc3bd7e54384f33f 100644 (file)
@@ -125,6 +125,9 @@ extern pgprot_t             pgprot_s2_device;
 #define pgprot_stronglyordered(prot) \
        __pgprot_modify(prot, L_PTE_MT_MASK, L_PTE_MT_UNCACHED)
 
+#define pgprot_device(prot) \
+       __pgprot_modify(prot, L_PTE_MT_MASK, L_PTE_MT_DEV_SHARED | L_PTE_SHARED | L_PTE_DIRTY | L_PTE_XN)
+
 #ifdef CONFIG_ARM_DMA_MEM_BUFFERABLE
 #define pgprot_dmacoherent(prot) \
        __pgprot_modify(prot, L_PTE_MT_MASK, L_PTE_MT_BUFFERABLE | L_PTE_XN)
index 120f4c9bbfde2a3fbade0fa5e611aca635bc6b82..57fe73ea0f7258af4315ea6b7fcecea7566d838e 100644 (file)
@@ -89,7 +89,11 @@ extern void release_thread(struct task_struct *);
 unsigned long get_wchan(struct task_struct *p);
 
 #if __LINUX_ARM_ARCH__ == 6 || defined(CONFIG_ARM_ERRATA_754327)
-#define cpu_relax()                    smp_mb()
+#define cpu_relax()                                            \
+       do {                                                    \
+               smp_mb();                                       \
+               __asm__ __volatile__("nop; nop; nop; nop; nop; nop; nop; nop; nop; nop;");      \
+       } while (0)
 #else
 #define cpu_relax()                    barrier()
 #endif
index 709a55989cb0641f7006f5a1765898aa9fb972c3..451ae684aaf48ff0822bafd30849d9c82cacf6f7 100644 (file)
@@ -67,7 +67,6 @@ struct secondary_data {
        void *stack;
 };
 extern struct secondary_data secondary_data;
-extern volatile int pen_release;
 extern void secondary_startup(void);
 extern void secondary_startup_arm(void);
 
index 312784ee9936ae4fe6da6459d9ca452ada438d07..c729d2113a2457e32a32ca989fa05404c52625e0 100644 (file)
 #define TWD_TIMER_CONTROL_PERIODIC     (1 << 1)
 #define TWD_TIMER_CONTROL_IT_ENABLE    (1 << 2)
 
-#include <linux/ioport.h>
-
-struct twd_local_timer {
-       struct resource res[2];
-};
-
-#define DEFINE_TWD_LOCAL_TIMER(name,base,irq)  \
-struct twd_local_timer name __initdata = {     \
-       .res    = {                             \
-               DEFINE_RES_MEM(base, 0x10),     \
-               DEFINE_RES_IRQ(irq),            \
-       },                                      \
-};
-
-int twd_local_timer_register(struct twd_local_timer *);
-
 #endif
index 099c78fcf62d43cd0a123b4d520d44a5d853a813..8f009e788ad401766b5b9456b6ac9f1ec75e84fe 100644 (file)
@@ -210,11 +210,12 @@ static inline void arch_read_lock(arch_rwlock_t *rw)
 
        prefetchw(&rw->lock);
        __asm__ __volatile__(
+"      .syntax unified\n"
 "1:    ldrex   %0, [%2]\n"
 "      adds    %0, %0, #1\n"
 "      strexpl %1, %0, [%2]\n"
        WFE("mi")
-"      rsbpls  %0, %1, #0\n"
+"      rsbspl  %0, %1, #0\n"
 "      bmi     1b"
        : "=&r" (tmp), "=&r" (tmp2)
        : "r" (&rw->lock)
index 452bbdcbcc835fc838b4c957f2da3e6dafd2fdc9..506314265c6f1a24e50641dbbea1d41367d322d5 100644 (file)
@@ -10,6 +10,7 @@ struct sleep_save_sp {
 };
 
 extern void cpu_resume(void);
+extern void cpu_resume_no_hyp(void);
 extern void cpu_resume_arm(void);
 extern int cpu_suspend(unsigned long, int (*)(unsigned long));
 
index ae5a0df5316e5780121bcc8de5f78b847222e9e7..dff49845eb87628a007776e695fc4103db1a7a58 100644 (file)
@@ -85,7 +85,8 @@ static inline void set_fs(mm_segment_t fs)
 #define __range_ok(addr, size) ({ \
        unsigned long flag, roksum; \
        __chk_user_ptr(addr);   \
-       __asm__("adds %1, %2, %3; sbcccs %1, %1, %0; movcc %0, #0" \
+       __asm__(".syntax unified\n" \
+               "adds %1, %2, %3; sbcscc %1, %1, %0; movcc %0, #0" \
                : "=&r" (flag), "=&r" (roksum) \
                : "r" (addr), "Ir" (size), "0" (current_thread_info()->addr_limit) \
                : "cc"); \
index 187ccf6496ad61c222dc6e53102ec8a5b6ccf881..2cb00d15831b93e9e10164134d1f72cdb64c4bb0 100644 (file)
@@ -49,7 +49,7 @@
  * (0 -> msp; 1 -> psp). Bits [1:0] are fixed to 0b01.
  */
 #define EXC_RET_STACK_MASK                     0x00000004
-#define EXC_RET_THREADMODE_PROCESSSTACK                0xfffffffd
+#define EXC_RET_THREADMODE_PROCESSSTACK                (3 << 2)
 
 /* Cache related definitions */
 
index ef5dfedacd8d642bdfe27147e37bdec0a6f055bb..628c336e8e3b20c5918f58d9d8f5139b10723953 100644 (file)
        ldr     \tmp, =elf_hwcap                    @ may not have MVFR regs
        ldr     \tmp, [\tmp, #0]
        tst     \tmp, #HWCAP_VFPD32
-       ldcnel  p11, cr0, [\base],#32*4             @ FLDMIAD \base!, {d16-d31}
+       ldclne  p11, cr0, [\base],#32*4             @ FLDMIAD \base!, {d16-d31}
        addeq   \base, \base, #32*4                 @ step over unused register space
 #else
        VFPFMRX \tmp, MVFR0                         @ Media and VFP Feature Register 0
        and     \tmp, \tmp, #MVFR0_A_SIMD_MASK      @ A_SIMD field
        cmp     \tmp, #2                            @ 32 x 64bit registers?
-       ldceql  p11, cr0, [\base],#32*4             @ FLDMIAD \base!, {d16-d31}
+       ldcleq  p11, cr0, [\base],#32*4             @ FLDMIAD \base!, {d16-d31}
        addne   \base, \base, #32*4                 @ step over unused register space
 #endif
 #endif
        ldr     \tmp, =elf_hwcap                    @ may not have MVFR regs
        ldr     \tmp, [\tmp, #0]
        tst     \tmp, #HWCAP_VFPD32
-       stcnel  p11, cr0, [\base],#32*4             @ FSTMIAD \base!, {d16-d31}
+       stclne  p11, cr0, [\base],#32*4             @ FSTMIAD \base!, {d16-d31}
        addeq   \base, \base, #32*4                 @ step over unused register space
 #else
        VFPFMRX \tmp, MVFR0                         @ Media and VFP Feature Register 0
        and     \tmp, \tmp, #MVFR0_A_SIMD_MASK      @ A_SIMD field
        cmp     \tmp, #2                            @ 32 x 64bit registers?
-       stceql  p11, cr0, [\base],#32*4             @ FSTMIAD \base!, {d16-d31}
+       stcleq  p11, cr0, [\base],#32*4             @ FSTMIAD \base!, {d16-d31}
        addne   \base, \base, #32*4                 @ step over unused register space
 #endif
 #endif
index 3bc80599c02256a8e2f6681a039a98fe67fef82a..4a5a645c76e2f9629c8f888ffeacfa9b3b077591 100644 (file)
 
                .macro  senduart, rd, rx
                cmp     \rx, #0
-               strneb  \rd, [\rx, #UART_TX << UART_SHIFT]
+               strbne  \rd, [\rx, #UART_TX << UART_SHIFT]
 1001:
                .endm
 
index b795dc2408c05a65fb3e28ec2001ebb04eaf50df..b9f94e03d916a9919654ce87284f451e7ee2c488 100644 (file)
@@ -86,7 +86,7 @@ hexbuf_rel:   .long   hexbuf_addr - .
 ENTRY(printascii)
                addruart_current r3, r1, r2
 1:             teq     r0, #0
-               ldrneb  r1, [r0], #1
+               ldrbne  r1, [r0], #1
                teqne   r1, #0
                reteq   lr
 2:             teq     r1, #'\n'
index e85a3af9ddeb5694b793363f8245ba1ad5f99899..ce4aea57130aff81ac1ec01279777a22df6ee815 100644 (file)
@@ -636,7 +636,7 @@ call_fpe:
        @ Test if we need to give access to iWMMXt coprocessors
        ldr     r5, [r10, #TI_FLAGS]
        rsbs    r7, r8, #(1 << 8)               @ CP 0 or 1 only
-       movcss  r7, r5, lsr #(TIF_USING_IWMMXT + 1)
+       movscs  r7, r5, lsr #(TIF_USING_IWMMXT + 1)
        bcs     iwmmxt_task_enable
 #endif
  ARM(  add     pc, pc, r8, lsr #6      )
@@ -872,7 +872,7 @@ __kuser_cmpxchg64:                          @ 0xffff0f60
        smp_dmb arm
 1:     ldrexd  r0, r1, [r2]                    @ load current val
        eors    r3, r0, r4                      @ compare with oldval (1)
-       eoreqs  r3, r1, r5                      @ compare with oldval (2)
+       eorseq  r3, r1, r5                      @ compare with oldval (2)
        strexdeq r3, r6, r7, [r2]               @ store newval if eq
        teqeq   r3, #1                          @ success?
        beq     1b                              @ if no then retry
@@ -896,8 +896,8 @@ __kuser_cmpxchg64:                          @ 0xffff0f60
        ldmia   r1, {r6, lr}                    @ load new val
 1:     ldmia   r2, {r0, r1}                    @ load current val
        eors    r3, r0, r4                      @ compare with oldval (1)
-       eoreqs  r3, r1, r5                      @ compare with oldval (2)
-2:     stmeqia r2, {r6, lr}                    @ store newval if eq
+       eorseq  r3, r1, r5                      @ compare with oldval (2)
+2:     stmiaeq r2, {r6, lr}                    @ store newval if eq
        rsbs    r0, r3, #0                      @ set return val and C flag
        ldmfd   sp!, {r4, r5, r6, pc}
 
@@ -911,7 +911,7 @@ kuser_cmpxchg64_fixup:
        mov     r7, #0xffff0fff
        sub     r7, r7, #(0xffff0fff - (0xffff0f60 + (1b - __kuser_cmpxchg64)))
        subs    r8, r4, r7
-       rsbcss  r8, r8, #(2b - 1b)
+       rsbscs  r8, r8, #(2b - 1b)
        strcs   r7, [sp, #S_PC]
 #if __LINUX_ARM_ARCH__ < 6
        bcc     kuser_cmpxchg32_fixup
@@ -969,7 +969,7 @@ kuser_cmpxchg32_fixup:
        mov     r7, #0xffff0fff
        sub     r7, r7, #(0xffff0fff - (0xffff0fc0 + (1b - __kuser_cmpxchg)))
        subs    r8, r4, r7
-       rsbcss  r8, r8, #(2b - 1b)
+       rsbscs  r8, r8, #(2b - 1b)
        strcs   r7, [sp, #S_PC]
        ret     lr
        .previous
index 0465d65d23de5786ef5df32738d0830ca492353a..f7649adef505ebffc02c3891af9a59cb8bea5792 100644 (file)
@@ -373,7 +373,7 @@ sys_syscall:
                movhs   scno, #0
                csdb
 #endif
-               stmloia sp, {r5, r6}            @ shuffle args
+               stmialo sp, {r5, r6}            @ shuffle args
                movlo   r0, r1
                movlo   r1, r2
                movlo   r2, r3
index 773424843d6efcc2ebeb0ec0cfa88d67643213cc..32051ec5b33fa3dc41eb9c26e063cba7d502c048 100644 (file)
          */
        .macro  v7m_exception_slow_exit ret_r0
        cpsid   i
-       ldr     lr, =EXC_RET_THREADMODE_PROCESSSTACK
+       ldr     lr, =exc_ret
+       ldr     lr, [lr]
 
        @ read original r12, sp, lr, pc and xPSR
        add     r12, sp, #S_IP
        badr    lr, \ret                        @ return address
        .if     \reload
        add     r1, sp, #S_R0 + S_OFF           @ pointer to regs
-       ldmccia r1, {r0 - r6}                   @ reload r0-r6
-       stmccia sp, {r4, r5}                    @ update stack arguments
+       ldmiacc r1, {r0 - r6}                   @ reload r0-r6
+       stmiacc sp, {r4, r5}                    @ update stack arguments
        .endif
        ldrcc   pc, [\table, \tmp, lsl #2]      @ call sys_* routine
 #else
        badr    lr, \ret                        @ return address
        .if     \reload
        add     r1, sp, #S_R0 + S_OFF           @ pointer to regs
-       ldmccia r1, {r0 - r6}                   @ reload r0-r6
-       stmccia sp, {r4, r5}                    @ update stack arguments
+       ldmiacc r1, {r0 - r6}                   @ reload r0-r6
+       stmiacc sp, {r4, r5}                    @ update stack arguments
        .endif
        ldrcc   pc, [\table, \nr, lsl #2]       @ call sys_* routine
 #endif
index abcf4784852593397daf3b1e6cf5d70cf47660e0..19d2dcd6530dc351188bd6c7785705e36e9e64d7 100644 (file)
@@ -146,3 +146,7 @@ ENTRY(vector_table)
        .rept   CONFIG_CPU_V7M_NUM_IRQ
        .long   __irq_entry             @ External Interrupts
        .endr
+       .align  2
+       .globl  exc_ret
+exc_ret:
+       .space  4
index ec29de2500764e11ad839bda165c135f12b6a6e5..c08d2d890f7b918981c472c155c6df368a1b30b3 100644 (file)
@@ -439,8 +439,8 @@ M_CLASS(str r6, [r12, #PMSAv8_RLAR_A(3)])
        str     r5, [r12, #PMSAv8_RBAR_A(0)]
        str     r6, [r12, #PMSAv8_RLAR_A(0)]
 #else
-       mcr     p15, 0, r5, c6, c10, 1                  @ PRBAR4
-       mcr     p15, 0, r6, c6, c10, 2                  @ PRLAR4
+       mcr     p15, 0, r5, c6, c10, 0                  @ PRBAR4
+       mcr     p15, 0, r6, c6, c10, 1                  @ PRLAR4
 #endif
 #endif
        ret     lr
index 60146e32619a5912bf12b5277397f2e19213b2a8..82a942894fc04142b1aaf6eaeb646b6e6552aab6 100644 (file)
@@ -180,8 +180,8 @@ ARM_BE8(orr r7, r7, #(1 << 25))     @ HSCTLR.EE
        @ Check whether GICv3 system registers are available
        mrc     p15, 0, r7, c0, c1, 1   @ ID_PFR1
        ubfx    r7, r7, #28, #4
-       cmp     r7, #1
-       bne     2f
+       teq     r7, #0
+       beq     2f
 
        @ Enable system register accesses
        mrc     p15, 4, r7, c12, c9, 5  @ ICC_HSRE
index dd2eb5f76b9f0a7d64f50169dd0d04a402b2ae67..76300f3813e89bc48a76d83dc3076d9f7b79ee84 100644 (file)
@@ -91,8 +91,11 @@ void machine_crash_nonpanic_core(void *unused)
 
        set_cpu_online(smp_processor_id(), false);
        atomic_dec(&waiting_for_crash_ipi);
-       while (1)
+
+       while (1) {
                cpu_relax();
+               wfe();
+       }
 }
 
 void crash_smp_send_stop(void)
index a50dc00d79a273fac9e5d5c3f8be75f37231f766..d0a05a3bdb9652450ea4a6e1cdc6036c945ab42a 100644 (file)
@@ -16,7 +16,7 @@ struct patch {
        unsigned int insn;
 };
 
-static DEFINE_SPINLOCK(patch_lock);
+static DEFINE_RAW_SPINLOCK(patch_lock);
 
 static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags)
        __acquires(&patch_lock)
@@ -33,7 +33,7 @@ static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags)
                return addr;
 
        if (flags)
-               spin_lock_irqsave(&patch_lock, *flags);
+               raw_spin_lock_irqsave(&patch_lock, *flags);
        else
                __acquire(&patch_lock);
 
@@ -48,7 +48,7 @@ static void __kprobes patch_unmap(int fixmap, unsigned long *flags)
        clear_fixmap(fixmap);
 
        if (flags)
-               spin_unlock_irqrestore(&patch_lock, *flags);
+               raw_spin_unlock_irqrestore(&patch_lock, *flags);
        else
                __release(&patch_lock);
 }
index a8257fc9cf2a908c4dd2b5f19c819b4c7fc40588..5dc8b80bb69383643eddec5ba62164e0458b4512 100644 (file)
@@ -120,6 +120,14 @@ ENDPROC(cpu_resume_after_mmu)
        .text
        .align
 
+#ifdef CONFIG_MCPM
+       .arm
+THUMB( .thumb                  )
+ENTRY(cpu_resume_no_hyp)
+ARM_BE8(setend be)                     @ ensure we are in BE mode
+       b       no_hyp
+#endif
+
 #ifdef CONFIG_MMU
        .arm
 ENTRY(cpu_resume_arm)
@@ -135,6 +143,7 @@ ARM_BE8(setend be)                  @ ensure we are in BE mode
        bl      __hyp_stub_install_secondary
 #endif
        safe_svcmode_maskall r1
+no_hyp:
        mov     r1, #0
        ALT_SMP(mrc p15, 0, r0, c0, c0, 5)
        ALT_UP_B(1f)
@@ -163,6 +172,9 @@ ENDPROC(cpu_resume)
 
 #ifdef CONFIG_MMU
 ENDPROC(cpu_resume_arm)
+#endif
+#ifdef CONFIG_MCPM
+ENDPROC(cpu_resume_no_hyp)
 #endif
 
        .align 2
index 1d6f5ea522f49184c53a7d996769104107b4de8e..facd4240ca02c776716a2e1e14c803c359967cc8 100644 (file)
  */
 struct secondary_data secondary_data;
 
-/*
- * control for which core is the next to come out of the secondary
- * boot "holding pen"
- */
-volatile int pen_release = -1;
-
 enum ipi_msg_type {
        IPI_WAKEUP,
        IPI_TIMER,
@@ -604,8 +598,10 @@ static void ipi_cpu_stop(unsigned int cpu)
        local_fiq_disable();
        local_irq_disable();
 
-       while (1)
+       while (1) {
                cpu_relax();
+               wfe();
+       }
 }
 
 static DEFINE_PER_CPU(struct completion *, cpu_completion);
index b30eafeef09633d24b1f55e9cf9b4f14314f0fee..3cdc399b9fc32064fd7c419962ad84f42c4dff53 100644 (file)
@@ -100,8 +100,6 @@ static void twd_timer_stop(void)
        disable_percpu_irq(clk->irq);
 }
 
-#ifdef CONFIG_COMMON_CLK
-
 /*
  * Updates clockevent frequency when the cpu frequency changes.
  * Called on the cpu that is changing frequency with interrupts disabled.
@@ -143,54 +141,6 @@ static int twd_clk_init(void)
 }
 core_initcall(twd_clk_init);
 
-#elif defined (CONFIG_CPU_FREQ)
-
-#include <linux/cpufreq.h>
-
-/*
- * Updates clockevent frequency when the cpu frequency changes.
- * Called on the cpu that is changing frequency with interrupts disabled.
- */
-static void twd_update_frequency(void *data)
-{
-       twd_timer_rate = clk_get_rate(twd_clk);
-
-       clockevents_update_freq(raw_cpu_ptr(twd_evt), twd_timer_rate);
-}
-
-static int twd_cpufreq_transition(struct notifier_block *nb,
-       unsigned long state, void *data)
-{
-       struct cpufreq_freqs *freqs = data;
-
-       /*
-        * The twd clock events must be reprogrammed to account for the new
-        * frequency.  The timer is local to a cpu, so cross-call to the
-        * changing cpu.
-        */
-       if (state == CPUFREQ_POSTCHANGE)
-               smp_call_function_single(freqs->cpu, twd_update_frequency,
-                       NULL, 1);
-
-       return NOTIFY_OK;
-}
-
-static struct notifier_block twd_cpufreq_nb = {
-       .notifier_call = twd_cpufreq_transition,
-};
-
-static int twd_cpufreq_init(void)
-{
-       if (twd_evt && raw_cpu_ptr(twd_evt) && !IS_ERR(twd_clk))
-               return cpufreq_register_notifier(&twd_cpufreq_nb,
-                       CPUFREQ_TRANSITION_NOTIFIER);
-
-       return 0;
-}
-core_initcall(twd_cpufreq_init);
-
-#endif
-
 static void twd_calibrate_rate(void)
 {
        unsigned long count;
@@ -366,21 +316,6 @@ out_free:
        return err;
 }
 
-int __init twd_local_timer_register(struct twd_local_timer *tlt)
-{
-       if (twd_base || twd_evt)
-               return -EBUSY;
-
-       twd_ppi = tlt->res[1].start;
-
-       twd_base = ioremap(tlt->res[0].start, resource_size(&tlt->res[0]));
-       if (!twd_base)
-               return -ENOMEM;
-
-       return twd_local_timer_common_register(NULL);
-}
-
-#ifdef CONFIG_OF
 static int __init twd_local_timer_of_register(struct device_node *np)
 {
        int err;
@@ -406,4 +341,3 @@ out:
 TIMER_OF_DECLARE(arm_twd_a9, "arm,cortex-a9-twd-timer", twd_local_timer_of_register);
 TIMER_OF_DECLARE(arm_twd_a5, "arm,cortex-a5-twd-timer", twd_local_timer_of_register);
 TIMER_OF_DECLARE(arm_twd_11mp, "arm,arm11mp-twd-timer", twd_local_timer_of_register);
-#endif
index 0bee233fef9a30bc92df73da7beee4e3f2966f2f..314cfb232a6353165dc899f35e5747a27a7ef617 100644 (file)
@@ -93,7 +93,7 @@ extern const struct unwind_idx __start_unwind_idx[];
 static const struct unwind_idx *__origin_unwind_idx;
 extern const struct unwind_idx __stop_unwind_idx[];
 
-static DEFINE_SPINLOCK(unwind_lock);
+static DEFINE_RAW_SPINLOCK(unwind_lock);
 static LIST_HEAD(unwind_tables);
 
 /* Convert a prel31 symbol to an absolute address */
@@ -201,7 +201,7 @@ static const struct unwind_idx *unwind_find_idx(unsigned long addr)
                /* module unwind tables */
                struct unwind_table *table;
 
-               spin_lock_irqsave(&unwind_lock, flags);
+               raw_spin_lock_irqsave(&unwind_lock, flags);
                list_for_each_entry(table, &unwind_tables, list) {
                        if (addr >= table->begin_addr &&
                            addr < table->end_addr) {
@@ -213,7 +213,7 @@ static const struct unwind_idx *unwind_find_idx(unsigned long addr)
                                break;
                        }
                }
-               spin_unlock_irqrestore(&unwind_lock, flags);
+               raw_spin_unlock_irqrestore(&unwind_lock, flags);
        }
 
        pr_debug("%s: idx = %p\n", __func__, idx);
@@ -529,9 +529,9 @@ struct unwind_table *unwind_table_add(unsigned long start, unsigned long size,
        tab->begin_addr = text_addr;
        tab->end_addr = text_addr + text_size;
 
-       spin_lock_irqsave(&unwind_lock, flags);
+       raw_spin_lock_irqsave(&unwind_lock, flags);
        list_add_tail(&tab->list, &unwind_tables);
-       spin_unlock_irqrestore(&unwind_lock, flags);
+       raw_spin_unlock_irqrestore(&unwind_lock, flags);
 
        return tab;
 }
@@ -543,9 +543,9 @@ void unwind_table_del(struct unwind_table *tab)
        if (!tab)
                return;
 
-       spin_lock_irqsave(&unwind_lock, flags);
+       raw_spin_lock_irqsave(&unwind_lock, flags);
        list_del(&tab->list);
-       spin_unlock_irqrestore(&unwind_lock, flags);
+       raw_spin_unlock_irqrestore(&unwind_lock, flags);
 
        kfree(tab);
 }
index 48de846f22464637be95c64e0a1ff9357b6e5a65..531e59f5be9c8f77370b926f5b77f2f8189e77ca 100644 (file)
@@ -8,9 +8,8 @@ ifeq ($(plus_virt),+virt)
        plus_virt_def := -DREQUIRES_VIRT=1
 endif
 
-ccflags-y += -Iarch/arm/kvm -Ivirt/kvm/arm/vgic
-CFLAGS_arm.o := -I. $(plus_virt_def)
-CFLAGS_mmu.o := -I.
+ccflags-y += -I $(srctree)/$(src) -I $(srctree)/virt/kvm/arm/vgic
+CFLAGS_arm.o := $(plus_virt_def)
 
 AFLAGS_init.o := -Wa,-march=armv7-a$(plus_virt)
 AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt)
index e8bd288fd5be909dad8ec74561330fea3a972ff7..14915c78bd99b6bdeed2ebb4ac02b006e14c714c 100644 (file)
@@ -293,15 +293,16 @@ static bool access_cntp_tval(struct kvm_vcpu *vcpu,
                             const struct coproc_params *p,
                             const struct coproc_reg *r)
 {
-       u64 now = kvm_phys_timer_read();
-       u64 val;
+       u32 val;
 
        if (p->is_write) {
                val = *vcpu_reg(vcpu, p->Rt1);
-               kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL, val + now);
+               kvm_arm_timer_write_sysreg(vcpu,
+                                          TIMER_PTIMER, TIMER_REG_TVAL, val);
        } else {
-               val = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL);
-               *vcpu_reg(vcpu, p->Rt1) = val - now;
+               val = kvm_arm_timer_read_sysreg(vcpu,
+                                               TIMER_PTIMER, TIMER_REG_TVAL);
+               *vcpu_reg(vcpu, p->Rt1) = val;
        }
 
        return true;
@@ -315,9 +316,11 @@ static bool access_cntp_ctl(struct kvm_vcpu *vcpu,
 
        if (p->is_write) {
                val = *vcpu_reg(vcpu, p->Rt1);
-               kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CTL, val);
+               kvm_arm_timer_write_sysreg(vcpu,
+                                          TIMER_PTIMER, TIMER_REG_CTL, val);
        } else {
-               val = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CTL);
+               val = kvm_arm_timer_read_sysreg(vcpu,
+                                               TIMER_PTIMER, TIMER_REG_CTL);
                *vcpu_reg(vcpu, p->Rt1) = val;
        }
 
@@ -333,9 +336,11 @@ static bool access_cntp_cval(struct kvm_vcpu *vcpu,
        if (p->is_write) {
                val = (u64)*vcpu_reg(vcpu, p->Rt2) << 32;
                val |= *vcpu_reg(vcpu, p->Rt1);
-               kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL, val);
+               kvm_arm_timer_write_sysreg(vcpu,
+                                          TIMER_PTIMER, TIMER_REG_CVAL, val);
        } else {
-               val = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL);
+               val = kvm_arm_timer_read_sysreg(vcpu,
+                                               TIMER_PTIMER, TIMER_REG_CVAL);
                *vcpu_reg(vcpu, p->Rt1) = val;
                *vcpu_reg(vcpu, p->Rt2) = val >> 32;
        }
index c4782812714cf4cbdbd532c8b975cd5cd20ee4cf..8bf895ec6e04231f9849ef3ca4bf88d45eb3f74f 100644 (file)
@@ -27,7 +27,6 @@ static u64 *cp15_64(struct kvm_cpu_context *ctxt, int idx)
 
 void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt)
 {
-       ctxt->cp15[c0_MPIDR]            = read_sysreg(VMPIDR);
        ctxt->cp15[c0_CSSELR]           = read_sysreg(CSSELR);
        ctxt->cp15[c1_SCTLR]            = read_sysreg(SCTLR);
        ctxt->cp15[c1_CPACR]            = read_sysreg(CPACR);
index aa3f9a9837acafb43e8d97acdac7fac03a37d2b2..6ed3cf23fe8900c7bca998468c22ff55d2c61ae3 100644 (file)
@@ -176,7 +176,7 @@ THUMB(      orr     lr, lr, #PSR_T_BIT      )
        msr     spsr_cxsf, lr
        ldr     lr, =panic
        msr     ELR_hyp, lr
-       ldr     lr, =kvm_call_hyp
+       ldr     lr, =__kvm_call_hyp
        clrex
        eret
 ENDPROC(__hyp_do_panic)
index acf1c37fa49c218234a3927617eb65476ef21d06..3b058a5d7c5f145e411e426bfad4606406307762 100644 (file)
@@ -77,7 +77,7 @@ static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
 static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu)
 {
        struct kvm *kvm = kern_hyp_va(vcpu->kvm);
-       write_sysreg(kvm->arch.vttbr, VTTBR);
+       write_sysreg(kvm_get_vttbr(kvm), VTTBR);
        write_sysreg(vcpu->arch.midr, VPIDR);
 }
 
index c0edd450e10459612e37cc292ad8585494d12773..8e4afba7363571df59e755bacb84f8dda407e2f9 100644 (file)
@@ -41,7 +41,7 @@ void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm)
 
        /* Switch to requested VMID */
        kvm = kern_hyp_va(kvm);
-       write_sysreg(kvm->arch.vttbr, VTTBR);
+       write_sysreg(kvm_get_vttbr(kvm), VTTBR);
        isb();
 
        write_sysreg(0, TLBIALLIS);
@@ -61,7 +61,7 @@ void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu)
        struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm);
 
        /* Switch to requested VMID */
-       write_sysreg(kvm->arch.vttbr, VTTBR);
+       write_sysreg(kvm_get_vttbr(kvm), VTTBR);
        isb();
 
        write_sysreg(0, TLBIALL);
index 80a1d6cd261cecf25df08f0e00e8b66c4a539e1d..a08e6419ebe90c53a79da18c091070d0aef7d703 100644 (file)
@@ -42,7 +42,7 @@
  *   r12:     caller save
  *   rest:    callee save
  */
-ENTRY(kvm_call_hyp)
+ENTRY(__kvm_call_hyp)
        hvc     #0
        bx      lr
-ENDPROC(kvm_call_hyp)
+ENDPROC(__kvm_call_hyp)
index ad25fd1872c7d7dd5c6c837e9f579b284dec2c15..0bff0176db2c4f1bb31dd9cdaa3b0eceaa26dcd9 100644 (file)
@@ -39,7 +39,7 @@ $(obj)/csumpartialcopy.o:     $(obj)/csumpartialcopygeneric.S
 $(obj)/csumpartialcopyuser.o:  $(obj)/csumpartialcopygeneric.S
 
 ifeq ($(CONFIG_KERNEL_MODE_NEON),y)
-  NEON_FLAGS                   := -mfloat-abi=softfp -mfpu=neon
+  NEON_FLAGS                   := -march=armv7-a -mfloat-abi=softfp -mfpu=neon
   CFLAGS_xor-neon.o            += $(NEON_FLAGS)
   obj-$(CONFIG_XOR_BLOCKS)     += xor-neon.o
 endif
index 93cddab73072cc716b07c0353ec79bdbaec3757a..95bd359912889a5d31ceaefeaefb7597deaf5c14 100644 (file)
@@ -7,7 +7,7 @@
 ENTRY( \name           )
 UNWIND(        .fnstart        )
        ands    ip, r1, #3
-       strneb  r1, [ip]                @ assert word-aligned
+       strbne  r1, [ip]                @ assert word-aligned
        mov     r2, #1
        and     r3, r0, #31             @ Get bit offset
        mov     r0, r0, lsr #5
@@ -32,7 +32,7 @@ ENDPROC(\name         )
 ENTRY( \name           )
 UNWIND(        .fnstart        )
        ands    ip, r1, #3
-       strneb  r1, [ip]                @ assert word-aligned
+       strbne  r1, [ip]                @ assert word-aligned
        mov     r2, #1
        and     r3, r0, #31             @ Get bit offset
        mov     r0, r0, lsr #5
@@ -62,7 +62,7 @@ ENDPROC(\name         )
 ENTRY( \name           )
 UNWIND(        .fnstart        )
        ands    ip, r1, #3
-       strneb  r1, [ip]                @ assert word-aligned
+       strbne  r1, [ip]                @ assert word-aligned
        and     r2, r0, #31
        mov     r0, r0, lsr #5
        mov     r3, #1
@@ -89,7 +89,7 @@ ENDPROC(\name         )
 ENTRY( \name           )
 UNWIND(        .fnstart        )
        ands    ip, r1, #3
-       strneb  r1, [ip]                @ assert word-aligned
+       strbne  r1, [ip]                @ assert word-aligned
        and     r3, r0, #31
        mov     r0, r0, lsr #5
        save_and_disable_irqs ip
index e936352ccb0013e040fcd9b22bda1c583cfff361..55946e3fa2ba8407a8108e664101481a3a7eff7d 100644 (file)
@@ -44,7 +44,7 @@ UNWIND(.save {r1, lr})
                strusr  r2, r0, 1, ne, rept=2
                tst     r1, #1                  @ x1 x0 x1 x0 x1 x0 x1
                it      ne                      @ explicit IT needed for the label
-USER(          strnebt r2, [r0])
+USER(          strbtne r2, [r0])
                mov     r0, #0
                ldmfd   sp!, {r1, pc}
 UNWIND(.fnend)
index 0d4c189c7f4f00ca4795ae7aa78917838697580b..6a3419e2c6d86e69dcea30f5921a95bab89f7c31 100644 (file)
@@ -91,7 +91,7 @@
        .endm
 
        .macro str1b ptr reg cond=al abort
-       str\cond\()b \reg, [\ptr], #1
+       strb\cond \reg, [\ptr], #1
        .endm
 
        .macro enter reg1 reg2
index 6ee2f6706f869b03c95f30d2b1a1cf7adf9086d9..b84ce17920439e0c45d7712faf461a796d490c27 100644 (file)
@@ -39,9 +39,9 @@ ENTRY(copy_page)
        .endr
                subs    r2, r2, #1                      @       1
                stmia   r0!, {r3, r4, ip, lr}           @       4
-               ldmgtia r1!, {r3, r4, ip, lr}           @       4
+               ldmiagt r1!, {r3, r4, ip, lr}           @       4
                bgt     1b                              @       1
-       PLD(    ldmeqia r1!, {r3, r4, ip, lr}   )
+       PLD(    ldmiaeq r1!, {r3, r4, ip, lr}   )
        PLD(    beq     2b                      )
                ldmfd   sp!, {r4, pc}                   @       3
 ENDPROC(copy_page)
index 652e4d98cd47b7c56fefbbc055451ace6d3e99e1..a11f2c25e03a7a130e8f40c881aedc1cb49e582c 100644 (file)
@@ -99,7 +99,7 @@
 
        CALGN(  ands    ip, r0, #31             )
        CALGN(  rsb     r3, ip, #32             )
-       CALGN(  sbcnes  r4, r3, r2              )  @ C is always set here
+       CALGN(  sbcsne  r4, r3, r2              )  @ C is always set here
        CALGN(  bcs     2f                      )
        CALGN(  adr     r4, 6f                  )
        CALGN(  subs    r2, r2, r3              )  @ C gets set
 
        CALGN(  ands    ip, r0, #31             )
        CALGN(  rsb     ip, ip, #32             )
-       CALGN(  sbcnes  r4, ip, r2              )  @ C is always set here
+       CALGN(  sbcsne  r4, ip, r2              )  @ C is always set here
        CALGN(  subcc   r2, r2, ip              )
        CALGN(  bcc     15f                     )
 
                orr     r9, r9, ip, lspush #\push
                mov     ip, ip, lspull #\pull
                orr     ip, ip, lr, lspush #\push
-               str8w   r0, r3, r4, r5, r6, r7, r8, r9, ip, abort=19f
+               str8w   r0, r3, r4, r5, r6, r7, r8, r9, ip, abort=19f
                bge     12b
        PLD(    cmn     r2, #96                 )
        PLD(    bge     13b                     )
index 97a6ff4b7e3cab0bd4501498bf8a7433dc9ca08a..c7d08096e35453652b043323f54b69f4a5812842 100644 (file)
@@ -49,7 +49,7 @@
        .endm
 
        .macro ldr1b ptr reg cond=al abort
-       ldr\cond\()b \reg, [\ptr], #1
+       ldrb\cond \reg, [\ptr], #1
        .endm
 
 #ifdef CONFIG_CPU_USE_DOMAINS
index 984e0f29d548b456884e643d9f9337e4cd42fc31..bd84e2db353b17f763d0d876360e6494ff3d15fa 100644 (file)
@@ -40,9 +40,9 @@ td3   .req    lr
                /* we must have at least one byte. */
                tst     buf, #1                 @ odd address?
                movne   sum, sum, ror #8
-               ldrneb  td0, [buf], #1
+               ldrbne  td0, [buf], #1
                subne   len, len, #1
-               adcnes  sum, sum, td0, put_byte_1
+               adcsne  sum, sum, td0, put_byte_1
 
 .Lless4:               tst     len, #6
                beq     .Lless8_byte
@@ -68,8 +68,8 @@ td3   .req    lr
                bne     .Lless8_wordlp
 
 .Lless8_byte:  tst     len, #1                 @ odd number of bytes
-               ldrneb  td0, [buf], #1          @ include last byte
-               adcnes  sum, sum, td0, put_byte_0       @ update checksum
+               ldrbne  td0, [buf], #1          @ include last byte
+               adcsne  sum, sum, td0, put_byte_0       @ update checksum
 
 .Ldone:                adc     r0, sum, #0             @ collect up the last carry
                ldr     td0, [sp], #4
@@ -78,17 +78,17 @@ td3 .req    lr
                ldr     pc, [sp], #4            @ return
 
 .Lnot_aligned: tst     buf, #1                 @ odd address
-               ldrneb  td0, [buf], #1          @ make even
+               ldrbne  td0, [buf], #1          @ make even
                subne   len, len, #1
-               adcnes  sum, sum, td0, put_byte_1       @ update checksum
+               adcsne  sum, sum, td0, put_byte_1       @ update checksum
 
                tst     buf, #2                 @ 32-bit aligned?
 #if __LINUX_ARM_ARCH__ >= 4
-               ldrneh  td0, [buf], #2          @ make 32-bit aligned
+               ldrhne  td0, [buf], #2          @ make 32-bit aligned
                subne   len, len, #2
 #else
-               ldrneb  td0, [buf], #1
-               ldrneb  ip, [buf], #1
+               ldrbne  td0, [buf], #1
+               ldrbne  ip, [buf], #1
                subne   len, len, #2
 #ifndef __ARMEB__
                orrne   td0, td0, ip, lsl #8
@@ -96,7 +96,7 @@ td3   .req    lr
                orrne   td0, ip, td0, lsl #8
 #endif
 #endif
-               adcnes  sum, sum, td0           @ update checksum
+               adcsne  sum, sum, td0           @ update checksum
                ret     lr
 
 ENTRY(csum_partial)
index 10b45909610ca6f4ca6f6f8bdc664b79c2f2bd6f..08e17758cbea9fb08d8293d122494367e9df449d 100644 (file)
@@ -148,9 +148,9 @@ FN_ENTRY
                strb    r5, [dst], #1
                mov     r5, r4, get_byte_2
 .Lexit:                tst     len, #1
-               strneb  r5, [dst], #1
+               strbne  r5, [dst], #1
                andne   r5, r5, #255
-               adcnes  sum, sum, r5, put_byte_0
+               adcsne  sum, sum, r5, put_byte_0
 
                /*
                 * If the dst pointer was not 16-bit aligned, we
index b83fdc06286a64ece150fb7e419bc587e47c3e34..f4716d98e0b4afcce0c1d696cc775cad04b16556 100644 (file)
@@ -95,7 +95,7 @@
                add     r2, r2, r1
                mov     r0, #0                  @ zero the buffer
 9002:          teq     r2, r1
-               strneb  r0, [r1], #1
+               strbne  r0, [r1], #1
                bne     9002b
                load_regs
                .popsection
index a9eafe4981eb847e2f07e0e245aa8e1f1747fa59..4d80f690c48bf1b55e95355053691c5c14126651 100644 (file)
@@ -88,8 +88,8 @@ UNWIND(.fnstart)
        @ Break out early if dividend reaches 0.
 2:     cmp     xh, yl
        orrcs   yh, yh, ip
-       subcss  xh, xh, yl
-       movnes  ip, ip, lsr #1
+       subscs  xh, xh, yl
+       movsne  ip, ip, lsr #1
        mov     yl, yl, lsr #1
        bne     2b
 
index 617150b1baef06e8de8e822852b3dd6d0cec0a2d..de68d3b343e30a47aaa3963ccf4f981b2e557598 100644 (file)
@@ -14,8 +14,8 @@
                .global floppy_fiqin_end
 ENTRY(floppy_fiqin_start)
                subs    r9, r9, #1
-               ldrgtb  r12, [r11, #-4]
-               ldrleb  r12, [r11], #0
+               ldrbgt  r12, [r11, #-4]
+               ldrble  r12, [r11], #0
                strb    r12, [r10], #1
                subs    pc, lr, #4
 floppy_fiqin_end:
@@ -23,10 +23,10 @@ floppy_fiqin_end:
                .global floppy_fiqout_end
 ENTRY(floppy_fiqout_start)
                subs    r9, r9, #1
-               ldrgeb  r12, [r10], #1
+               ldrbge  r12, [r10], #1
                movlt   r12, #0
-               strleb  r12, [r11], #0
-               subles  pc, lr, #4
+               strble  r12, [r11], #0
+               subsle  pc, lr, #4
                strb    r12, [r11, #-4]
                subs    pc, lr, #4
 floppy_fiqout_end:
index c31b2f3153f171fd09602aed2ea9cb8c97797f4d..91038a0a77b57f3d0d5345773d6c72b08dd86f5d 100644 (file)
                cmp     ip, #2
                ldrb    r3, [r0]
                strb    r3, [r1], #1
-               ldrgeb  r3, [r0]
-               strgeb  r3, [r1], #1
-               ldrgtb  r3, [r0]
-               strgtb  r3, [r1], #1
+               ldrbge  r3, [r0]
+               strbge  r3, [r1], #1
+               ldrbgt  r3, [r0]
+               strbgt  r3, [r1], #1
                subs    r2, r2, ip
                bne     .Linsb_aligned
 
@@ -72,7 +72,7 @@ ENTRY(__raw_readsb)
                bpl     .Linsb_16_lp
 
                tst     r2, #15
-               ldmeqfd sp!, {r4 - r6, pc}
+               ldmfdeq sp!, {r4 - r6, pc}
 
 .Linsb_no_16:  tst     r2, #8
                beq     .Linsb_no_8
@@ -109,15 +109,15 @@ ENTRY(__raw_readsb)
                str     r3, [r1], #4
 
 .Linsb_no_4:   ands    r2, r2, #3
-               ldmeqfd sp!, {r4 - r6, pc}
+               ldmfdeq sp!, {r4 - r6, pc}
 
                cmp     r2, #2
                ldrb    r3, [r0]
                strb    r3, [r1], #1
-               ldrgeb  r3, [r0]
-               strgeb  r3, [r1], #1
-               ldrgtb  r3, [r0]
-               strgtb  r3, [r1]
+               ldrbge  r3, [r0]
+               strbge  r3, [r1], #1
+               ldrbgt  r3, [r0]
+               strbgt  r3, [r1]
 
                ldmfd   sp!, {r4 - r6, pc}
 ENDPROC(__raw_readsb)
index 2ed86fa5465f70cdcb92a46a167d9aa81edad68f..f2e2064318d2142d43464d298d69b146006dc2e5 100644 (file)
@@ -30,7 +30,7 @@ ENTRY(__raw_readsl)
 2:             movs    r2, r2, lsl #31
                ldrcs   r3, [r0, #0]
                ldrcs   ip, [r0, #0]
-               stmcsia r1!, {r3, ip}
+               stmiacs r1!, {r3, ip}
                ldrne   r3, [r0, #0]
                strne   r3, [r1, #0]
                ret     lr
index 413da99145292f3e535b618fee2a5c9c96e114b4..8b25b69c516e79f4ef0580994b744bbfc5b39d58 100644 (file)
@@ -68,7 +68,7 @@ ENTRY(__raw_readsw)
                bpl     .Linsw_8_lp
 
                tst     r2, #7
-               ldmeqfd sp!, {r4, r5, r6, pc}
+               ldmfdeq sp!, {r4, r5, r6, pc}
 
 .Lno_insw_8:   tst     r2, #4
                beq     .Lno_insw_4
@@ -97,9 +97,9 @@ ENTRY(__raw_readsw)
 
 .Lno_insw_2:   tst     r2, #1
                ldrne   r3, [r0]
-               strneb  r3, [r1], #1
+               strbne  r3, [r1], #1
                movne   r3, r3, lsr #8
-               strneb  r3, [r1]
+               strbne  r3, [r1]
 
                ldmfd   sp!, {r4, r5, r6, pc}
 
index d9a45e9692aee3ad1de5dea37653a65cd8c18da4..5efdd66f5dcd695e88b5673264f064d35a615c25 100644 (file)
@@ -76,8 +76,8 @@ ENTRY(__raw_readsw)
                pack    r3, r3, ip
                str     r3, [r1], #4
 
-.Lno_insw_2:   ldrneh  r3, [r0]
-               strneh  r3, [r1]
+.Lno_insw_2:   ldrhne  r3, [r0]
+               strhne  r3, [r1]
 
                ldmfd   sp!, {r4, r5, pc}
 
@@ -94,7 +94,7 @@ ENTRY(__raw_readsw)
 #endif
 
 .Linsw_noalign:        stmfd   sp!, {r4, lr}
-               ldrccb  ip, [r1, #-1]!
+               ldrbcc  ip, [r1, #-1]!
                bcc     1f
 
                ldrh    ip, [r0]
@@ -121,11 +121,11 @@ ENTRY(__raw_readsw)
 
 3:             tst     r2, #1
                strb    ip, [r1], #1
-               ldrneh  ip, [r0]
+               ldrhne  ip, [r0]
    _BE_ONLY_(  movne   ip, ip, ror #8          )
-               strneb  ip, [r1], #1
+               strbne  ip, [r1], #1
    _LE_ONLY_(  movne   ip, ip, lsr #8          )
    _BE_ONLY_(  movne   ip, ip, lsr #24         )
-               strneb  ip, [r1]
+               strbne  ip, [r1]
                ldmfd   sp!, {r4, pc}
 ENDPROC(__raw_readsw)
index a46bbc9b168b45f7016096244eb4933a911d4ac0..7d2881a2381eb01e32336a39384b9bf8edef8db2 100644 (file)
                cmp     ip, #2
                ldrb    r3, [r1], #1
                strb    r3, [r0]
-               ldrgeb  r3, [r1], #1
-               strgeb  r3, [r0]
-               ldrgtb  r3, [r1], #1
-               strgtb  r3, [r0]
+               ldrbge  r3, [r1], #1
+               strbge  r3, [r0]
+               ldrbgt  r3, [r1], #1
+               strbgt  r3, [r0]
                subs    r2, r2, ip
                bne     .Loutsb_aligned
 
@@ -64,7 +64,7 @@ ENTRY(__raw_writesb)
                bpl     .Loutsb_16_lp
 
                tst     r2, #15
-               ldmeqfd sp!, {r4, r5, pc}
+               ldmfdeq sp!, {r4, r5, pc}
 
 .Loutsb_no_16: tst     r2, #8
                beq     .Loutsb_no_8
@@ -80,15 +80,15 @@ ENTRY(__raw_writesb)
                outword r3
 
 .Loutsb_no_4:  ands    r2, r2, #3
-               ldmeqfd sp!, {r4, r5, pc}
+               ldmfdeq sp!, {r4, r5, pc}
 
                cmp     r2, #2
                ldrb    r3, [r1], #1
                strb    r3, [r0]
-               ldrgeb  r3, [r1], #1
-               strgeb  r3, [r0]
-               ldrgtb  r3, [r1]
-               strgtb  r3, [r0]
+               ldrbge  r3, [r1], #1
+               strbge  r3, [r0]
+               ldrbgt  r3, [r1]
+               strbgt  r3, [r0]
 
                ldmfd   sp!, {r4, r5, pc}
 ENDPROC(__raw_writesb)
index 4ea2435988c1f75d8fddac8ac63a499067d02cae..7596ac0c90b05d4559be1fd344395e5af6b912fd 100644 (file)
@@ -28,7 +28,7 @@ ENTRY(__raw_writesl)
                bpl     1b
                ldmfd   sp!, {r4, lr}
 2:             movs    r2, r2, lsl #31
-               ldmcsia r1!, {r3, ip}
+               ldmiacs r1!, {r3, ip}
                strcs   r3, [r0, #0]
                ldrne   r3, [r1, #0]
                strcs   ip, [r0, #0]
index 121789eb680235f9dad2c8f1492960d2f26fded1..cb94b9b4940569f6c81f5fa316223388ff00939b 100644 (file)
@@ -79,7 +79,7 @@ ENTRY(__raw_writesw)
                bpl     .Loutsw_8_lp
 
                tst     r2, #7
-               ldmeqfd sp!, {r4, r5, r6, pc}
+               ldmfdeq sp!, {r4, r5, r6, pc}
 
 .Lno_outsw_8:  tst     r2, #4
                beq     .Lno_outsw_4
index 269f90c51ad279c63bf4dd9f8bfed8c6827a75d2..e6645b2f249ef225fcb3a57df97a710e5ed8461d 100644 (file)
@@ -61,8 +61,8 @@ ENTRY(__raw_writesw)
                ldr     r3, [r1], #4
                outword r3
 
-.Lno_outsw_2:  ldrneh  r3, [r1]
-               strneh  r3, [r0]
+.Lno_outsw_2:  ldrhne  r3, [r1]
+               strhne  r3, [r0]
 
                ldmfd   sp!, {r4, r5, pc}
 
@@ -95,6 +95,6 @@ ENTRY(__raw_writesw)
 
                tst     r2, #1
 3:             movne   ip, r3, lsr #8
-               strneh  ip, [r0]
+               strhne  ip, [r0]
                ret     lr
 ENDPROC(__raw_writesw)
index 9397b2e532afa3d863930b4e29a663c166ae475e..c23f9d9e29704be4c834185a22d8ca9eefef7013 100644 (file)
@@ -96,7 +96,7 @@ Boston, MA 02111-1307, USA.  */
        subhs   \dividend, \dividend, \divisor, lsr #3
        orrhs   \result,   \result,   \curbit,  lsr #3
        cmp     \dividend, #0                   @ Early termination?
-       movnes  \curbit,   \curbit,  lsr #4     @ No, any more bits to do?
+       movsne  \curbit,   \curbit,  lsr #4     @ No, any more bits to do?
        movne   \divisor,  \divisor, lsr #4
        bne     1b
 
@@ -182,7 +182,7 @@ Boston, MA 02111-1307, USA.  */
        subhs   \dividend, \dividend, \divisor, lsr #3
        cmp     \dividend, #1
        mov     \divisor, \divisor, lsr #4
-       subges  \order, \order, #4
+       subsge  \order, \order, #4
        bge     1b
 
        tst     \order, #3
index 64111bd4440b1aa3702c469ce349b303a0244ebd..4a6997bb4404316a98268c4394b236aa395a2721 100644 (file)
@@ -30,7 +30,7 @@
        .endm
 
        .macro ldr1b ptr reg cond=al abort
-       ldr\cond\()b \reg, [\ptr], #1
+       ldrb\cond \reg, [\ptr], #1
        .endm
 
        .macro str1w ptr reg abort
@@ -42,7 +42,7 @@
        .endm
 
        .macro str1b ptr reg cond=al abort
-       str\cond\()b \reg, [\ptr], #1
+       strb\cond \reg, [\ptr], #1
        .endm
 
        .macro enter reg1 reg2
index 69a9d47fc5abdcb9f1801cbfe249eaed99b00d99..d70304cb2cd0ddc36a521fbc75713d05210f28f0 100644 (file)
@@ -59,7 +59,7 @@ ENTRY(memmove)
                blt     5f
 
        CALGN(  ands    ip, r0, #31             )
-       CALGN(  sbcnes  r4, ip, r2              )  @ C is always set here
+       CALGN(  sbcsne  r4, ip, r2              )  @ C is always set here
        CALGN(  bcs     2f                      )
        CALGN(  adr     r4, 6f                  )
        CALGN(  subs    r2, r2, ip              )  @ C is set here
@@ -114,20 +114,20 @@ ENTRY(memmove)
        UNWIND( .save   {r0, r4, lr}            ) @ still in first stmfd block
 
 8:             movs    r2, r2, lsl #31
-               ldrneb  r3, [r1, #-1]!
-               ldrcsb  r4, [r1, #-1]!
-               ldrcsb  ip, [r1, #-1]
-               strneb  r3, [r0, #-1]!
-               strcsb  r4, [r0, #-1]!
-               strcsb  ip, [r0, #-1]
+               ldrbne  r3, [r1, #-1]!
+               ldrbcs  r4, [r1, #-1]!
+               ldrbcs  ip, [r1, #-1]
+               strbne  r3, [r0, #-1]!
+               strbcs  r4, [r0, #-1]!
+               strbcs  ip, [r0, #-1]
                ldmfd   sp!, {r0, r4, pc}
 
 9:             cmp     ip, #2
-               ldrgtb  r3, [r1, #-1]!
-               ldrgeb  r4, [r1, #-1]!
+               ldrbgt  r3, [r1, #-1]!
+               ldrbge  r4, [r1, #-1]!
                ldrb    lr, [r1, #-1]!
-               strgtb  r3, [r0, #-1]!
-               strgeb  r4, [r0, #-1]!
+               strbgt  r3, [r0, #-1]!
+               strbge  r4, [r0, #-1]!
                subs    r2, r2, ip
                strb    lr, [r0, #-1]!
                blt     8b
@@ -150,7 +150,7 @@ ENTRY(memmove)
                blt     14f
 
        CALGN(  ands    ip, r0, #31             )
-       CALGN(  sbcnes  r4, ip, r2              )  @ C is always set here
+       CALGN(  sbcsne  r4, ip, r2              )  @ C is always set here
        CALGN(  subcc   r2, r2, ip              )
        CALGN(  bcc     15f                     )
 
index ed6d35d9cdb5a6288f70d116e6914f803455006e..5593a45e0a8c69a54c31bcc00056554a4d65b145 100644 (file)
@@ -44,20 +44,20 @@ UNWIND( .save {r8, lr}      )
        mov     lr, r3
 
 2:     subs    r2, r2, #64
-       stmgeia ip!, {r1, r3, r8, lr}   @ 64 bytes at a time.
-       stmgeia ip!, {r1, r3, r8, lr}
-       stmgeia ip!, {r1, r3, r8, lr}
-       stmgeia ip!, {r1, r3, r8, lr}
+       stmiage ip!, {r1, r3, r8, lr}   @ 64 bytes at a time.
+       stmiage ip!, {r1, r3, r8, lr}
+       stmiage ip!, {r1, r3, r8, lr}
+       stmiage ip!, {r1, r3, r8, lr}
        bgt     2b
-       ldmeqfd sp!, {r8, pc}           @ Now <64 bytes to go.
+       ldmfdeq sp!, {r8, pc}           @ Now <64 bytes to go.
 /*
  * No need to correct the count; we're only testing bits from now on
  */
        tst     r2, #32
-       stmneia ip!, {r1, r3, r8, lr}
-       stmneia ip!, {r1, r3, r8, lr}
+       stmiane ip!, {r1, r3, r8, lr}
+       stmiane ip!, {r1, r3, r8, lr}
        tst     r2, #16
-       stmneia ip!, {r1, r3, r8, lr}
+       stmiane ip!, {r1, r3, r8, lr}
        ldmfd   sp!, {r8, lr}
 UNWIND( .fnend              )
 
@@ -87,22 +87,22 @@ UNWIND( .save {r4-r8, lr}      )
        rsb     r8, r8, #32
        sub     r2, r2, r8
        movs    r8, r8, lsl #(32 - 4)
-       stmcsia ip!, {r4, r5, r6, r7}
-       stmmiia ip!, {r4, r5}
+       stmiacs ip!, {r4, r5, r6, r7}
+       stmiami ip!, {r4, r5}
        tst     r8, #(1 << 30)
        mov     r8, r1
        strne   r1, [ip], #4
 
 3:     subs    r2, r2, #64
-       stmgeia ip!, {r1, r3-r8, lr}
-       stmgeia ip!, {r1, r3-r8, lr}
+       stmiage ip!, {r1, r3-r8, lr}
+       stmiage ip!, {r1, r3-r8, lr}
        bgt     3b
-       ldmeqfd sp!, {r4-r8, pc}
+       ldmfdeq sp!, {r4-r8, pc}
 
        tst     r2, #32
-       stmneia ip!, {r1, r3-r8, lr}
+       stmiane ip!, {r1, r3-r8, lr}
        tst     r2, #16
-       stmneia ip!, {r4-r7}
+       stmiane ip!, {r4-r7}
        ldmfd   sp!, {r4-r8, lr}
 UNWIND( .fnend                 )
 
@@ -110,7 +110,7 @@ UNWIND( .fnend                 )
 
 UNWIND( .fnstart            )
 4:     tst     r2, #8
-       stmneia ip!, {r1, r3}
+       stmiane ip!, {r1, r3}
        tst     r2, #4
        strne   r1, [ip], #4
 /*
@@ -118,17 +118,17 @@ UNWIND( .fnstart            )
  * may have an unaligned pointer as well.
  */
 5:     tst     r2, #2
-       strneb  r1, [ip], #1
-       strneb  r1, [ip], #1
+       strbne  r1, [ip], #1
+       strbne  r1, [ip], #1
        tst     r2, #1
-       strneb  r1, [ip], #1
+       strbne  r1, [ip], #1
        ret     lr
 
 6:     subs    r2, r2, #4              @ 1 do we have enough
        blt     5b                      @ 1 bytes to align with?
        cmp     r3, #2                  @ 1
-       strltb  r1, [ip], #1            @ 1
-       strleb  r1, [ip], #1            @ 1
+       strblt  r1, [ip], #1            @ 1
+       strble  r1, [ip], #1            @ 1
        strb    r1, [ip], #1            @ 1
        add     r2, r2, r3              @ 1 (r2 = r2 - (4 - r3))
        b       1b
index 2c40aeab3eaae8cb038a283b6fa2dc422d744d08..c691b901092f55a8f251c186a6938ba19d79f6ec 100644 (file)
@@ -14,7 +14,7 @@
 MODULE_LICENSE("GPL");
 
 #ifndef __ARM_NEON__
-#error You should compile this file with '-mfloat-abi=softfp -mfpu=neon'
+#error You should compile this file with '-march=armv7-a -mfloat-abi=softfp -mfpu=neon'
 #endif
 
 /*
index 3efaa10efc43929010c20aa31a453de58a633fbd..4fd479c948e670429449fe1bd170bc448f4bc27f 100644 (file)
@@ -39,10 +39,6 @@ static void __iomem *sps_base_addr;
 static void __iomem *timer_base_addr;
 static int ncores;
 
-static DEFINE_SPINLOCK(boot_lock);
-
-void owl_secondary_startup(void);
-
 static int s500_wakeup_secondary(unsigned int cpu)
 {
        int ret;
@@ -84,7 +80,6 @@ static int s500_wakeup_secondary(unsigned int cpu)
 
 static int s500_smp_boot_secondary(unsigned int cpu, struct task_struct *idle)
 {
-       unsigned long timeout;
        int ret;
 
        ret = s500_wakeup_secondary(cpu);
@@ -93,21 +88,11 @@ static int s500_smp_boot_secondary(unsigned int cpu, struct task_struct *idle)
 
        udelay(10);
 
-       spin_lock(&boot_lock);
-
        smp_send_reschedule(cpu);
 
-       timeout = jiffies + (1 * HZ);
-       while (time_before(jiffies, timeout)) {
-               if (pen_release == -1)
-                       break;
-       }
-
        writel(0, timer_base_addr + OWL_CPU1_ADDR + (cpu - 1) * 4);
        writel(0, timer_base_addr + OWL_CPU1_FLAG + (cpu - 1) * 4);
 
-       spin_unlock(&boot_lock);
-
        return 0;
 }
 
index 005695c9bf4006130719acc36e0ee2f4b7ae7797..0ac2cb9a735568613c3dc7cdd52b599945e0134c 100644 (file)
@@ -36,4 +36,4 @@ ENDPROC(exynos4_secondary_startup)
 
        .align 2
 1:     .long   .
-       .long   pen_release
+       .long   exynos_pen_release
index b6da7edbbd2fd1a7d27dbf62e4b9c73a79747e31..abcac616423319bf0634bce260a85c3fe9134b60 100644 (file)
@@ -28,6 +28,9 @@
 
 extern void exynos4_secondary_startup(void);
 
+/* XXX exynos_pen_release is cargo culted code - DO NOT COPY XXX */
+volatile int exynos_pen_release = -1;
+
 #ifdef CONFIG_HOTPLUG_CPU
 static inline void cpu_leave_lowpower(u32 core_id)
 {
@@ -57,7 +60,7 @@ static inline void platform_do_lowpower(unsigned int cpu, int *spurious)
 
                wfi();
 
-               if (pen_release == core_id) {
+               if (exynos_pen_release == core_id) {
                        /*
                         * OK, proper wakeup, we're done
                         */
@@ -228,15 +231,17 @@ void exynos_core_restart(u32 core_id)
 }
 
 /*
- * Write pen_release in a way that is guaranteed to be visible to all
- * observers, irrespective of whether they're taking part in coherency
+ * XXX CARGO CULTED CODE - DO NOT COPY XXX
+ *
+ * Write exynos_pen_release in a way that is guaranteed to be visible to
+ * all observers, irrespective of whether they're taking part in coherency
  * or not.  This is necessary for the hotplug code to work reliably.
  */
-static void write_pen_release(int val)
+static void exynos_write_pen_release(int val)
 {
-       pen_release = val;
+       exynos_pen_release = val;
        smp_wmb();
-       sync_cache_w(&pen_release);
+       sync_cache_w(&exynos_pen_release);
 }
 
 static DEFINE_SPINLOCK(boot_lock);
@@ -247,7 +252,7 @@ static void exynos_secondary_init(unsigned int cpu)
         * let the primary processor know we're out of the
         * pen, then head off into the C entry point
         */
-       write_pen_release(-1);
+       exynos_write_pen_release(-1);
 
        /*
         * Synchronise with the boot thread.
@@ -322,12 +327,12 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle)
        /*
         * The secondary processor is waiting to be released from
         * the holding pen - release it, then wait for it to flag
-        * that it has been released by resetting pen_release.
+        * that it has been released by resetting exynos_pen_release.
         *
-        * Note that "pen_release" is the hardware CPU core ID, whereas
+        * Note that "exynos_pen_release" is the hardware CPU core ID, whereas
         * "cpu" is Linux's internal ID.
         */
-       write_pen_release(core_id);
+       exynos_write_pen_release(core_id);
 
        if (!exynos_cpu_power_state(core_id)) {
                exynos_cpu_power_up(core_id);
@@ -376,13 +381,13 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle)
                else
                        arch_send_wakeup_ipi_mask(cpumask_of(cpu));
 
-               if (pen_release == -1)
+               if (exynos_pen_release == -1)
                        break;
 
                udelay(10);
        }
 
-       if (pen_release != -1)
+       if (exynos_pen_release != -1)
                ret = -ETIMEDOUT;
 
        /*
@@ -392,7 +397,7 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle)
 fail:
        spin_unlock(&boot_lock);
 
-       return pen_release != -1 ? ret : 0;
+       return exynos_pen_release != -1 ? ret : 0;
 }
 
 static void __init exynos_smp_prepare_cpus(unsigned int max_cpus)
index 8315b34f32ff00923738c48bf9d654a37b3c5f43..7ff812cb010bb5e53f7cca7c1cb7552c1a2974a2 100644 (file)
@@ -42,6 +42,6 @@
                moveq   \irqstat, \irqstat, lsr #2
                addeq   \irqnr, \irqnr, #2
                tst     \irqstat, #0x01
-               addeqs  \irqnr, \irqnr, #1
+               addseq  \irqnr, \irqnr, #1
 1001:
        .endm
index 058a37e6d11c34955ab37f4df9833cdb0166fb6c..fd6e0671f957342e06e0a1601837f221969a01af 100644 (file)
@@ -523,8 +523,10 @@ void omap_prm_reset_system(void)
 
        prm_ll_data->reset_system();
 
-       while (1)
+       while (1) {
                cpu_relax();
+               wfe();
+       }
 }
 
 /**
index b625906a99702d7ac36941401ef4c67aa475b067..61a34e1c0f2217f3a8eb7da0e09909dff832d2c4 100644 (file)
@@ -1,2 +1 @@
 obj-$(CONFIG_SMP)              += platsmp.o headsmp.o
-obj-$(CONFIG_HOTPLUG_CPU)      += hotplug.o
diff --git a/arch/arm/mach-oxnas/hotplug.c b/arch/arm/mach-oxnas/hotplug.c
deleted file mode 100644 (file)
index 854f29b..0000000
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- *  Copyright (C) 2002 ARM Ltd.
- *  All Rights Reserved
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/smp.h>
-
-#include <asm/cp15.h>
-#include <asm/smp_plat.h>
-
-static inline void cpu_enter_lowpower(void)
-{
-       unsigned int v;
-
-       asm volatile(
-       "       mcr     p15, 0, %1, c7, c5, 0\n"
-       "       mcr     p15, 0, %1, c7, c10, 4\n"
-       /*
-        * Turn off coherency
-        */
-       "       mrc     p15, 0, %0, c1, c0, 1\n"
-       "       bic     %0, %0, #0x20\n"
-       "       mcr     p15, 0, %0, c1, c0, 1\n"
-       "       mrc     p15, 0, %0, c1, c0, 0\n"
-       "       bic     %0, %0, %2\n"
-       "       mcr     p15, 0, %0, c1, c0, 0\n"
-         : "=&r" (v)
-         : "r" (0), "Ir" (CR_C)
-         : "cc");
-}
-
-static inline void cpu_leave_lowpower(void)
-{
-       unsigned int v;
-
-       asm volatile(   "mrc    p15, 0, %0, c1, c0, 0\n"
-       "       orr     %0, %0, %1\n"
-       "       mcr     p15, 0, %0, c1, c0, 0\n"
-       "       mrc     p15, 0, %0, c1, c0, 1\n"
-       "       orr     %0, %0, #0x20\n"
-       "       mcr     p15, 0, %0, c1, c0, 1\n"
-         : "=&r" (v)
-         : "Ir" (CR_C)
-         : "cc");
-}
-
-static inline void platform_do_lowpower(unsigned int cpu, int *spurious)
-{
-       /*
-        * there is no power-control hardware on this platform, so all
-        * we can do is put the core into WFI; this is safe as the calling
-        * code will have already disabled interrupts
-        */
-       for (;;) {
-               /*
-                * here's the WFI
-                */
-               asm(".word      0xe320f003\n"
-                   :
-                   :
-                   : "memory", "cc");
-
-               if (pen_release == cpu_logical_map(cpu)) {
-                       /*
-                        * OK, proper wakeup, we're done
-                        */
-                       break;
-               }
-
-               /*
-                * Getting here, means that we have come out of WFI without
-                * having been woken up - this shouldn't happen
-                *
-                * Just note it happening - when we're woken, we can report
-                * its occurrence.
-                */
-               (*spurious)++;
-       }
-}
-
-/*
- * platform-specific code to shutdown a CPU
- *
- * Called with IRQs disabled
- */
-void ox820_cpu_die(unsigned int cpu)
-{
-       int spurious = 0;
-
-       /*
-        * we're ready for shutdown now, so do it
-        */
-       cpu_enter_lowpower();
-       platform_do_lowpower(cpu, &spurious);
-
-       /*
-        * bring this CPU back into the world of cache
-        * coherency, and then restore interrupts
-        */
-       cpu_leave_lowpower();
-
-       if (spurious)
-               pr_warn("CPU%u: %u spurious wakeup calls\n", cpu, spurious);
-}
index 442cc8a2f7dc81e43a88c2ef6510ba177218ac09..735141c0e3a377275f3c241f8dc181edee2c2c98 100644 (file)
@@ -19,7 +19,6 @@
 #include <asm/smp_scu.h>
 
 extern void ox820_secondary_startup(void);
-extern void ox820_cpu_die(unsigned int cpu);
 
 static void __iomem *cpu_ctrl;
 static void __iomem *gic_cpu_ctrl;
@@ -94,9 +93,6 @@ unmap_scu:
 static const struct smp_operations ox820_smp_ops __initconst = {
        .smp_prepare_cpus       = ox820_smp_prepare_cpus,
        .smp_boot_secondary     = ox820_boot_secondary,
-#ifdef CONFIG_HOTPLUG_CPU
-       .cpu_die                = ox820_cpu_die,
-#endif
 };
 
 CPU_METHOD_OF_DECLARE(ox820_smp, "oxsemi,ox820-smp", &ox820_smp_ops);
index 6d77b622d168502978369df3613abf0763e0563f..457eb7b1816007edde0307f4f80b5c5537438b1f 100644 (file)
@@ -15,6 +15,8 @@
 #include <asm/mach/time.h>
 #include <asm/exception.h>
 
+extern volatile int prima2_pen_release;
+
 extern const struct smp_operations sirfsoc_smp_ops;
 extern void sirfsoc_secondary_startup(void);
 extern void sirfsoc_cpu_die(unsigned int cpu);
index 209d9fc5c16cf49909434ac243c1f794f3d22f81..6cf4fc60347b5fdad94708f739c811c39777c80e 100644 (file)
@@ -34,4 +34,4 @@ ENDPROC(sirfsoc_secondary_startup)
 
         .align
 1:      .long   .
-        .long   pen_release
+        .long   prima2_pen_release
index a728c78b996f7fa0e1050f9e79c775cfa14b42da..b6cf1527e3309ce3ee0a18f20189c1be0099bd39 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/smp.h>
 
 #include <asm/smp_plat.h>
+#include "common.h"
 
 static inline void platform_do_lowpower(unsigned int cpu)
 {
@@ -18,7 +19,7 @@ static inline void platform_do_lowpower(unsigned int cpu)
        for (;;) {
                __asm__ __volatile__("dsb\n\t" "wfi\n\t"
                        : : : "memory");
-               if (pen_release == cpu_logical_map(cpu)) {
+               if (prima2_pen_release == cpu_logical_map(cpu)) {
                        /*
                         * OK, proper wakeup, we're done
                         */
index 75ef5d4be554ce9f8564f347e52da1e6766bf5ac..d1f8b5168083c345ec6e4481a65b69be68661a99 100644 (file)
@@ -24,13 +24,16 @@ static void __iomem *clk_base;
 
 static DEFINE_SPINLOCK(boot_lock);
 
+/* XXX prima2_pen_release is cargo culted code - DO NOT COPY XXX */
+volatile int prima2_pen_release = -1;
+
 static void sirfsoc_secondary_init(unsigned int cpu)
 {
        /*
         * let the primary processor know we're out of the
         * pen, then head off into the C entry point
         */
-       pen_release = -1;
+       prima2_pen_release = -1;
        smp_wmb();
 
        /*
@@ -80,13 +83,13 @@ static int sirfsoc_boot_secondary(unsigned int cpu, struct task_struct *idle)
        /*
         * The secondary processor is waiting to be released from
         * the holding pen - release it, then wait for it to flag
-        * that it has been released by resetting pen_release.
+        * that it has been released by resetting prima2_pen_release.
         *
-        * Note that "pen_release" is the hardware CPU ID, whereas
+        * Note that "prima2_pen_release" is the hardware CPU ID, whereas
         * "cpu" is Linux's internal ID.
         */
-       pen_release = cpu_logical_map(cpu);
-       sync_cache_w(&pen_release);
+       prima2_pen_release = cpu_logical_map(cpu);
+       sync_cache_w(&prima2_pen_release);
 
        /*
         * Send the secondary CPU SEV, thereby causing the boot monitor to read
@@ -97,7 +100,7 @@ static int sirfsoc_boot_secondary(unsigned int cpu, struct task_struct *idle)
        timeout = jiffies + (1 * HZ);
        while (time_before(jiffies, timeout)) {
                smp_rmb();
-               if (pen_release == -1)
+               if (prima2_pen_release == -1)
                        break;
 
                udelay(10);
@@ -109,7 +112,7 @@ static int sirfsoc_boot_secondary(unsigned int cpu, struct task_struct *idle)
         */
        spin_unlock(&boot_lock);
 
-       return pen_release != -1 ? -ENOSYS : 0;
+       return prima2_pen_release != -1 ? -ENOSYS : 0;
 }
 
 const struct smp_operations sirfsoc_smp_ops __initconst = {
index 5494c9e0c909b549ec696a9482852c339a4039a9..99a6a5e809e0e953545c552001d7ffbb44fb6ad8 100644 (file)
@@ -46,8 +46,6 @@
 
 extern void secondary_startup_arm(void);
 
-static DEFINE_SPINLOCK(boot_lock);
-
 #ifdef CONFIG_HOTPLUG_CPU
 static void qcom_cpu_die(unsigned int cpu)
 {
@@ -55,15 +53,6 @@ static void qcom_cpu_die(unsigned int cpu)
 }
 #endif
 
-static void qcom_secondary_init(unsigned int cpu)
-{
-       /*
-        * Synchronise with the boot thread.
-        */
-       spin_lock(&boot_lock);
-       spin_unlock(&boot_lock);
-}
-
 static int scss_release_secondary(unsigned int cpu)
 {
        struct device_node *node;
@@ -280,12 +269,6 @@ static int qcom_boot_secondary(unsigned int cpu, int (*func)(unsigned int))
                        per_cpu(cold_boot_done, cpu) = true;
        }
 
-       /*
-        * set synchronisation state between this boot processor
-        * and the secondary one
-        */
-       spin_lock(&boot_lock);
-
        /*
         * Send the secondary CPU a soft interrupt, thereby causing
         * the boot monitor to read the system wide flags register,
@@ -293,12 +276,6 @@ static int qcom_boot_secondary(unsigned int cpu, int (*func)(unsigned int))
         */
        arch_send_wakeup_ipi_mask(cpumask_of(cpu));
 
-       /*
-        * now the secondary core is starting up let it run its
-        * calibrations, then wait for it to finish
-        */
-       spin_unlock(&boot_lock);
-
        return ret;
 }
 
@@ -334,7 +311,6 @@ static void __init qcom_smp_prepare_cpus(unsigned int max_cpus)
 
 static const struct smp_operations smp_msm8660_ops __initconst = {
        .smp_prepare_cpus       = qcom_smp_prepare_cpus,
-       .smp_secondary_init     = qcom_secondary_init,
        .smp_boot_secondary     = msm8660_boot_secondary,
 #ifdef CONFIG_HOTPLUG_CPU
        .cpu_die                = qcom_cpu_die,
@@ -344,7 +320,6 @@ CPU_METHOD_OF_DECLARE(qcom_smp, "qcom,gcc-msm8660", &smp_msm8660_ops);
 
 static const struct smp_operations qcom_smp_kpssv1_ops __initconst = {
        .smp_prepare_cpus       = qcom_smp_prepare_cpus,
-       .smp_secondary_init     = qcom_secondary_init,
        .smp_boot_secondary     = kpssv1_boot_secondary,
 #ifdef CONFIG_HOTPLUG_CPU
        .cpu_die                = qcom_cpu_die,
@@ -354,7 +329,6 @@ CPU_METHOD_OF_DECLARE(qcom_smp_kpssv1, "qcom,kpss-acc-v1", &qcom_smp_kpssv1_ops)
 
 static const struct smp_operations qcom_smp_kpssv2_ops __initconst = {
        .smp_prepare_cpus       = qcom_smp_prepare_cpus,
-       .smp_secondary_init     = qcom_secondary_init,
        .smp_boot_secondary     = kpssv2_boot_secondary,
 #ifdef CONFIG_HOTPLUG_CPU
        .cpu_die                = qcom_cpu_die,
index 909b97c0b23719010d4950532bcfada3f5ea1c30..25b4c5e66e39610e34de8ba0128e1b43ac18ebb0 100644 (file)
@@ -20,6 +20,8 @@
 
 #include <asm/mach/time.h>
 
+extern volatile int spear_pen_release;
+
 extern void spear13xx_timer_init(void);
 extern void spear3xx_timer_init(void);
 extern struct pl022_ssp_controller pl022_plat_data;
index c52192dc3d9f9e3040bf5f5c49c7ce8d644276d6..6e250b6c0aa230815e41e37c4090152107d1cfd5 100644 (file)
@@ -43,5 +43,5 @@ pen:  ldr     r7, [r6]
 
        .align
 1:     .long   .
-       .long   pen_release
+       .long   spear_pen_release
 ENDPROC(spear13xx_secondary_startup)
index 12edd1cf8a12f11a2a07851f59bc5747e925cd36..0dd84f609627ac7194679ea49a4259f6620d13dc 100644 (file)
@@ -16,6 +16,8 @@
 #include <asm/cp15.h>
 #include <asm/smp_plat.h>
 
+#include "generic.h"
+
 static inline void cpu_enter_lowpower(void)
 {
        unsigned int v;
@@ -57,7 +59,7 @@ static inline void spear13xx_do_lowpower(unsigned int cpu, int *spurious)
        for (;;) {
                wfi();
 
-               if (pen_release == cpu) {
+               if (spear_pen_release == cpu) {
                        /*
                         * OK, proper wakeup, we're done
                         */
index 39038a03836acb8f3288488f063a99d5ef0f814c..b1ff4bb86f6d8aaeaa6c662b07a05a5a26847ed8 100644 (file)
 #include <mach/spear.h>
 #include "generic.h"
 
+/* XXX spear_pen_release is cargo culted code - DO NOT COPY XXX */
+volatile int spear_pen_release = -1;
+
 /*
- * Write pen_release in a way that is guaranteed to be visible to all
- * observers, irrespective of whether they're taking part in coherency
+ * XXX CARGO CULTED CODE - DO NOT COPY XXX
+ *
+ * Write spear_pen_release in a way that is guaranteed to be visible to
+ * all observers, irrespective of whether they're taking part in coherency
  * or not.  This is necessary for the hotplug code to work reliably.
  */
-static void write_pen_release(int val)
+static void spear_write_pen_release(int val)
 {
-       pen_release = val;
+       spear_pen_release = val;
        smp_wmb();
-       sync_cache_w(&pen_release);
+       sync_cache_w(&spear_pen_release);
 }
 
 static DEFINE_SPINLOCK(boot_lock);
@@ -42,7 +47,7 @@ static void spear13xx_secondary_init(unsigned int cpu)
         * let the primary processor know we're out of the
         * pen, then head off into the C entry point
         */
-       write_pen_release(-1);
+       spear_write_pen_release(-1);
 
        /*
         * Synchronise with the boot thread.
@@ -64,17 +69,17 @@ static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle)
        /*
         * The secondary processor is waiting to be released from
         * the holding pen - release it, then wait for it to flag
-        * that it has been released by resetting pen_release.
+        * that it has been released by resetting spear_pen_release.
         *
-        * Note that "pen_release" is the hardware CPU ID, whereas
+        * Note that "spear_pen_release" is the hardware CPU ID, whereas
         * "cpu" is Linux's internal ID.
         */
-       write_pen_release(cpu);
+       spear_write_pen_release(cpu);
 
        timeout = jiffies + (1 * HZ);
        while (time_before(jiffies, timeout)) {
                smp_rmb();
-               if (pen_release == -1)
+               if (spear_pen_release == -1)
                        break;
 
                udelay(10);
@@ -86,7 +91,7 @@ static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle)
         */
        spin_unlock(&boot_lock);
 
-       return pen_release != -1 ? -ENOSYS : 0;
+       return spear_pen_release != -1 ? -ENOSYS : 0;
 }
 
 /*
index 805f306fa6f707f055878a31f00a2f412a89f9c5..e22ccf87eded394ff99df3187ddf5309f886fdc4 100644 (file)
@@ -172,7 +172,7 @@ after_errata:
        mov32   r5, TEGRA_IRAM_BASE + TEGRA_IRAM_RESET_HANDLER_OFFSET
        mov     r0, #CPU_NOT_RESETTABLE
        cmp     r10, #0
-       strneb  r0, [r5, #__tegra20_cpu1_resettable_status_offset]
+       strbne  r0, [r5, #__tegra20_cpu1_resettable_status_offset]
 1:
 #endif
 
index 24659952c2784de64a53dc2e889ab616bd19b12b..be68d62566c7cd86f5ffd3aabfd9b1bd1e9a70de 100644 (file)
@@ -215,8 +215,8 @@ v6_dma_inv_range:
 #endif
        tst     r1, #D_CACHE_LINE_SIZE - 1
 #ifdef CONFIG_DMA_CACHE_RWFO
-       ldrneb  r2, [r1, #-1]                   @ read for ownership
-       strneb  r2, [r1, #-1]                   @ write for ownership
+       ldrbne  r2, [r1, #-1]                   @ read for ownership
+       strbne  r2, [r1, #-1]                   @ write for ownership
 #endif
        bic     r1, r1, #D_CACHE_LINE_SIZE - 1
 #ifdef HARVARD_CACHE
@@ -284,8 +284,8 @@ ENTRY(v6_dma_flush_range)
        add     r0, r0, #D_CACHE_LINE_SIZE
        cmp     r0, r1
 #ifdef CONFIG_DMA_CACHE_RWFO
-       ldrlob  r2, [r0]                        @ read for ownership
-       strlob  r2, [r0]                        @ write for ownership
+       ldrblo  r2, [r0]                        @ read for ownership
+       strblo  r2, [r0]                        @ write for ownership
 #endif
        blo     1b
        mov     r0, #0
index b03202cddddb2d07bf2fcfe3ee2d9d118066f846..f74cdce6d4dad47fd51ab18212e2d7979fbfc34f 100644 (file)
@@ -45,6 +45,7 @@ static void mc_copy_user_page(void *from, void *to)
        int tmp;
 
        asm volatile ("\
+       .syntax unified\n\
        ldmia   %0!, {r2, r3, ip, lr}           @ 4\n\
 1:     mcr     p15, 0, %1, c7, c6, 1           @ 1   invalidate D line\n\
        stmia   %1!, {r2, r3, ip, lr}           @ 4\n\
@@ -56,7 +57,7 @@ static void mc_copy_user_page(void *from, void *to)
        ldmia   %0!, {r2, r3, ip, lr}           @ 4\n\
        subs    %2, %2, #1                      @ 1\n\
        stmia   %1!, {r2, r3, ip, lr}           @ 4\n\
-       ldmneia %0!, {r2, r3, ip, lr}           @ 4\n\
+       ldmiane %0!, {r2, r3, ip, lr}           @ 4\n\
        bne     1b                              @ "
        : "+&r" (from), "+&r" (to), "=&r" (tmp)
        : "2" (PAGE_SIZE / 64)
index cd3e165afeedeb400c19b1dbb1b578e10b0d2400..6d336740aae49374c37946dc292cf5270dc5629a 100644 (file)
@@ -27,6 +27,7 @@ static void v4wb_copy_user_page(void *kto, const void *kfrom)
        int tmp;
 
        asm volatile ("\
+       .syntax unified\n\
        ldmia   %1!, {r3, r4, ip, lr}           @ 4\n\
 1:     mcr     p15, 0, %0, c7, c6, 1           @ 1   invalidate D line\n\
        stmia   %0!, {r3, r4, ip, lr}           @ 4\n\
@@ -38,7 +39,7 @@ static void v4wb_copy_user_page(void *kto, const void *kfrom)
        ldmia   %1!, {r3, r4, ip, lr}           @ 4\n\
        subs    %2, %2, #1                      @ 1\n\
        stmia   %0!, {r3, r4, ip, lr}           @ 4\n\
-       ldmneia %1!, {r3, r4, ip, lr}           @ 4\n\
+       ldmiane %1!, {r3, r4, ip, lr}           @ 4\n\
        bne     1b                              @ 1\n\
        mcr     p15, 0, %1, c7, c10, 4          @ 1   drain WB"
        : "+&r" (kto), "+&r" (kfrom), "=&r" (tmp)
index 8614572e1296ba904a018fd07b2dfe66843a5272..3851bb39644286bd49122cc0cd16b3df58ba2d07 100644 (file)
@@ -25,6 +25,7 @@ static void v4wt_copy_user_page(void *kto, const void *kfrom)
        int tmp;
 
        asm volatile ("\
+       .syntax unified\n\
        ldmia   %1!, {r3, r4, ip, lr}           @ 4\n\
 1:     stmia   %0!, {r3, r4, ip, lr}           @ 4\n\
        ldmia   %1!, {r3, r4, ip, lr}           @ 4+1\n\
@@ -34,7 +35,7 @@ static void v4wt_copy_user_page(void *kto, const void *kfrom)
        ldmia   %1!, {r3, r4, ip, lr}           @ 4\n\
        subs    %2, %2, #1                      @ 1\n\
        stmia   %0!, {r3, r4, ip, lr}           @ 4\n\
-       ldmneia %1!, {r3, r4, ip, lr}           @ 4\n\
+       ldmiane %1!, {r3, r4, ip, lr}           @ 4\n\
        bne     1b                              @ 1\n\
        mcr     p15, 0, %2, c7, c7, 0           @ flush ID cache"
        : "+&r" (kto), "+&r" (kfrom), "=&r" (tmp)
index c6aab9c36ff189b98373586cf619324b0ebb6160..43f46aa7ef3351e6cc278d42ea8ab7d0dc861dcb 100644 (file)
@@ -2279,7 +2279,7 @@ EXPORT_SYMBOL_GPL(arm_iommu_attach_device);
  * @dev: valid struct device pointer
  *
  * Detaches the provided device from a previously attached map.
- * This voids the dma operations (dma_map_ops pointer)
+ * This overwrites the dma_ops pointer with appropriate non-IOMMU ops.
  */
 void arm_iommu_detach_device(struct device *dev)
 {
index 1d1edd0641995490b520b690f2ecbe3410d6524a..a033f6134a6499030252585eb933505569ecaed1 100644 (file)
@@ -6,6 +6,7 @@
 
 #include <asm/cputype.h>
 #include <asm/idmap.h>
+#include <asm/hwcap.h>
 #include <asm/pgalloc.h>
 #include <asm/pgtable.h>
 #include <asm/sections.h>
@@ -110,7 +111,8 @@ static int __init init_static_idmap(void)
                             __idmap_text_end, 0);
 
        /* Flush L1 for the hardware to see this page table content */
-       flush_cache_louis();
+       if (!(elf_hwcap & HWCAP_LPAE))
+               flush_cache_louis();
 
        return 0;
 }
index 15dddfe43319547d2e3c81a80a33f0315a3e2c55..c2daabbe0af05da23a469efe0ea2ed1a6eb2eb7b 100644 (file)
@@ -282,15 +282,12 @@ void __init arm_memblock_init(const struct machine_desc *mdesc)
 
 void __init bootmem_init(void)
 {
-       unsigned long min, max_low, max_high;
-
        memblock_allow_resize();
-       max_low = max_high = 0;
 
-       find_limits(&min, &max_low, &max_high);
+       find_limits(&min_low_pfn, &max_low_pfn, &max_pfn);
 
-       early_memtest((phys_addr_t)min << PAGE_SHIFT,
-                     (phys_addr_t)max_low << PAGE_SHIFT);
+       early_memtest((phys_addr_t)min_low_pfn << PAGE_SHIFT,
+                     (phys_addr_t)max_low_pfn << PAGE_SHIFT);
 
        /*
         * Sparsemem tries to allocate bootmem in memory_present(),
@@ -308,16 +305,7 @@ void __init bootmem_init(void)
         * the sparse mem_map arrays initialized by sparse_init()
         * for memmap_init_zone(), otherwise all PFNs are invalid.
         */
-       zone_sizes_init(min, max_low, max_high);
-
-       /*
-        * This doesn't seem to be used by the Linux memory manager any
-        * more, but is used by ll_rw_block.  If we can get rid of it, we
-        * also get rid of some of the stuff above as well.
-        */
-       min_low_pfn = min;
-       max_low_pfn = max_low;
-       max_pfn = max_high;
+       zone_sizes_init(min_low_pfn, max_low_pfn, max_pfn);
 }
 
 /*
@@ -498,55 +486,6 @@ void __init mem_init(void)
 
        mem_init_print_info(NULL);
 
-#define MLK(b, t) b, t, ((t) - (b)) >> 10
-#define MLM(b, t) b, t, ((t) - (b)) >> 20
-#define MLK_ROUNDUP(b, t) b, t, DIV_ROUND_UP(((t) - (b)), SZ_1K)
-
-       pr_notice("Virtual kernel memory layout:\n"
-                       "    vector  : 0x%08lx - 0x%08lx   (%4ld kB)\n"
-#ifdef CONFIG_HAVE_TCM
-                       "    DTCM    : 0x%08lx - 0x%08lx   (%4ld kB)\n"
-                       "    ITCM    : 0x%08lx - 0x%08lx   (%4ld kB)\n"
-#endif
-                       "    fixmap  : 0x%08lx - 0x%08lx   (%4ld kB)\n"
-                       "    vmalloc : 0x%08lx - 0x%08lx   (%4ld MB)\n"
-                       "    lowmem  : 0x%08lx - 0x%08lx   (%4ld MB)\n"
-#ifdef CONFIG_HIGHMEM
-                       "    pkmap   : 0x%08lx - 0x%08lx   (%4ld MB)\n"
-#endif
-#ifdef CONFIG_MODULES
-                       "    modules : 0x%08lx - 0x%08lx   (%4ld MB)\n"
-#endif
-                       "      .text : 0x%p" " - 0x%p" "   (%4td kB)\n"
-                       "      .init : 0x%p" " - 0x%p" "   (%4td kB)\n"
-                       "      .data : 0x%p" " - 0x%p" "   (%4td kB)\n"
-                       "       .bss : 0x%p" " - 0x%p" "   (%4td kB)\n",
-
-                       MLK(VECTORS_BASE, VECTORS_BASE + PAGE_SIZE),
-#ifdef CONFIG_HAVE_TCM
-                       MLK(DTCM_OFFSET, (unsigned long) dtcm_end),
-                       MLK(ITCM_OFFSET, (unsigned long) itcm_end),
-#endif
-                       MLK(FIXADDR_START, FIXADDR_END),
-                       MLM(VMALLOC_START, VMALLOC_END),
-                       MLM(PAGE_OFFSET, (unsigned long)high_memory),
-#ifdef CONFIG_HIGHMEM
-                       MLM(PKMAP_BASE, (PKMAP_BASE) + (LAST_PKMAP) *
-                               (PAGE_SIZE)),
-#endif
-#ifdef CONFIG_MODULES
-                       MLM(MODULES_VADDR, MODULES_END),
-#endif
-
-                       MLK_ROUNDUP(_text, _etext),
-                       MLK_ROUNDUP(__init_begin, __init_end),
-                       MLK_ROUNDUP(_sdata, _edata),
-                       MLK_ROUNDUP(__bss_start, __bss_stop));
-
-#undef MLK
-#undef MLM
-#undef MLK_ROUNDUP
-
        /*
         * Check boundaries twice: Some fundamental inconsistencies can
         * be detected at build time already.
index 617a83def88a9f8e5d907b7a9965544a26ba1b40..0d7d5fb59247d42038e0c69dcec3d89499c77cd7 100644 (file)
@@ -165,7 +165,7 @@ static int __init pmsav8_setup_ram(unsigned int number, phys_addr_t start,phys_a
                return -EINVAL;
 
        bar = start;
-       lar = (end - 1) & ~(PMSAv8_MINALIGN - 1);;
+       lar = (end - 1) & ~(PMSAv8_MINALIGN - 1);
 
        bar |= PMSAv8_AP_PL1RW_PL0RW | PMSAv8_RGN_SHARED;
        lar |= PMSAv8_LAR_IDX(PMSAv8_RGN_NORMAL) | PMSAv8_LAR_EN;
@@ -181,7 +181,7 @@ static int __init pmsav8_setup_io(unsigned int number, phys_addr_t start,phys_ad
                return -EINVAL;
 
        bar = start;
-       lar = (end - 1) & ~(PMSAv8_MINALIGN - 1);;
+       lar = (end - 1) & ~(PMSAv8_MINALIGN - 1);
 
        bar |= PMSAv8_AP_PL1RW_PL0RW | PMSAv8_RGN_SHARED | PMSAv8_BAR_XN;
        lar |= PMSAv8_LAR_IDX(PMSAv8_RGN_DEVICE_nGnRnE) | PMSAv8_LAR_EN;
index 47a5acc644333f7f995293ef6b3dc6fb3527270a..acd5a66dfc23bb2dbe2b95e03453cbd3df2a999b 100644 (file)
@@ -139,6 +139,9 @@ __v7m_setup_cont:
        cpsie   i
        svc     #0
 1:     cpsid   i
+       ldr     r0, =exc_ret
+       orr     lr, lr, #EXC_RET_THREADMODE_PROCESSSTACK
+       str     lr, [r0]
        ldmia   sp, {r0-r3, r12}
        str     r5, [r12, #11 * 4]      @ restore the original SVC vector entry
        mov     lr, r6                  @ restore LR
@@ -149,10 +152,10 @@ __v7m_setup_cont:
 
        @ Configure caches (if implemented)
        teq     r8, #0
-       stmneia sp, {r0-r6, lr}         @ v7m_invalidate_l1 touches r0-r6
+       stmiane sp, {r0-r6, lr}         @ v7m_invalidate_l1 touches r0-r6
        blne    v7m_invalidate_l1
        teq     r8, #0                  @ re-evalutae condition
-       ldmneia sp, {r0-r6, lr}
+       ldmiane sp, {r0-r6, lr}
 
        @ Configure the System Control Register to ensure 8-byte stack alignment
        @ Note the STKALIGN bit is either RW or RAO.
index 506386a3eddecd2e12decd7a19ab203932e2293c..d3842791e1c42a2a9bc8b62238b1658c1e650193 100644 (file)
@@ -77,6 +77,10 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
         */
        if (!vcpu_el1_is_32bit(vcpu))
                vcpu->arch.hcr_el2 |= HCR_TID3;
+
+       if (cpus_have_const_cap(ARM64_MISMATCHED_CACHE_TYPE) ||
+           vcpu_el1_is_32bit(vcpu))
+               vcpu->arch.hcr_el2 |= HCR_TID2;
 }
 
 static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu)
@@ -331,6 +335,14 @@ static inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu)
        return ESR_ELx_SYS64_ISS_RT(esr);
 }
 
+static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
+{
+       if (kvm_vcpu_trap_is_iabt(vcpu))
+               return false;
+
+       return kvm_vcpu_dabt_iswrite(vcpu);
+}
+
 static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
 {
        return vcpu_read_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK;
index 222af1d2c3e4ac50695b053d6c3994c1e6047b25..a01fe087e022882d63f50e6fb766b13217a38208 100644 (file)
@@ -31,6 +31,7 @@
 #include <asm/kvm.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmio.h>
+#include <asm/smp_plat.h>
 #include <asm/thread_info.h>
 
 #define __KVM_HAVE_ARCH_INTC_INITIALIZED
@@ -58,16 +59,19 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
 int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext);
 void __extended_idmap_trampoline(phys_addr_t boot_pgd, phys_addr_t idmap_start);
 
-struct kvm_arch {
+struct kvm_vmid {
        /* The VMID generation used for the virt. memory system */
        u64    vmid_gen;
        u32    vmid;
+};
+
+struct kvm_arch {
+       struct kvm_vmid vmid;
 
        /* stage2 entry level table */
        pgd_t *pgd;
+       phys_addr_t pgd_phys;
 
-       /* VTTBR value associated with above pgd and vmid */
-       u64    vttbr;
        /* VTCR_EL2 value for this VM */
        u64    vtcr;
 
@@ -382,7 +386,36 @@ void kvm_arm_halt_guest(struct kvm *kvm);
 void kvm_arm_resume_guest(struct kvm *kvm);
 
 u64 __kvm_call_hyp(void *hypfn, ...);
-#define kvm_call_hyp(f, ...) __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__)
+
+/*
+ * The couple of isb() below are there to guarantee the same behaviour
+ * on VHE as on !VHE, where the eret to EL1 acts as a context
+ * synchronization event.
+ */
+#define kvm_call_hyp(f, ...)                                           \
+       do {                                                            \
+               if (has_vhe()) {                                        \
+                       f(__VA_ARGS__);                                 \
+                       isb();                                          \
+               } else {                                                \
+                       __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__); \
+               }                                                       \
+       } while(0)
+
+#define kvm_call_hyp_ret(f, ...)                                       \
+       ({                                                              \
+               typeof(f(__VA_ARGS__)) ret;                             \
+                                                                       \
+               if (has_vhe()) {                                        \
+                       ret = f(__VA_ARGS__);                           \
+                       isb();                                          \
+               } else {                                                \
+                       ret = __kvm_call_hyp(kvm_ksym_ref(f),           \
+                                            ##__VA_ARGS__);            \
+               }                                                       \
+                                                                       \
+               ret;                                                    \
+       })
 
 void force_vm_exit(const cpumask_t *mask);
 void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
@@ -401,6 +434,13 @@ struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
 
 DECLARE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state);
 
+static inline void kvm_init_host_cpu_context(kvm_cpu_context_t *cpu_ctxt,
+                                            int cpu)
+{
+       /* The host's MPIDR is immutable, so let's set it up at boot time */
+       cpu_ctxt->sys_regs[MPIDR_EL1] = cpu_logical_map(cpu);
+}
+
 void __kvm_enable_ssbs(void);
 
 static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
index a80a7ef573252a048401a20789683eb507ace060..4da765f2cca589a6ba0761b5002e876269b6a21f 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/compiler.h>
 #include <linux/kvm_host.h>
 #include <asm/alternative.h>
+#include <asm/kvm_mmu.h>
 #include <asm/sysreg.h>
 
 #define __hyp_text __section(.hyp.text) notrace
@@ -163,7 +164,7 @@ void __noreturn __hyp_do_panic(unsigned long, ...);
 static __always_inline void __hyp_text __load_guest_stage2(struct kvm *kvm)
 {
        write_sysreg(kvm->arch.vtcr, vtcr_el2);
-       write_sysreg(kvm->arch.vttbr, vttbr_el2);
+       write_sysreg(kvm_get_vttbr(kvm), vttbr_el2);
 
        /*
         * ARM erratum 1165522 requires the actual execution of the above
index 8af4b1befa421338fc4c40e5824f58f938c60e9c..b0742a16c6c9e43ca73888c2c9778042174328a2 100644 (file)
@@ -138,7 +138,8 @@ static inline unsigned long __kern_hyp_va(unsigned long v)
        })
 
 /*
- * We currently only support a 40bit IPA.
+ * We currently support using a VM-specified IPA size. For backward
+ * compatibility, the default IPA size is fixed to 40bits.
  */
 #define KVM_PHYS_SHIFT (40)
 
@@ -591,9 +592,15 @@ static inline u64 kvm_vttbr_baddr_mask(struct kvm *kvm)
        return vttbr_baddr_mask(kvm_phys_shift(kvm), kvm_stage2_levels(kvm));
 }
 
-static inline bool kvm_cpu_has_cnp(void)
+static __always_inline u64 kvm_get_vttbr(struct kvm *kvm)
 {
-       return system_supports_cnp();
+       struct kvm_vmid *vmid = &kvm->arch.vmid;
+       u64 vmid_field, baddr;
+       u64 cnp = system_supports_cnp() ? VTTBR_CNP_BIT : 0;
+
+       baddr = kvm->arch.pgd_phys;
+       vmid_field = (u64)vmid->vmid << VTTBR_VMID_SHIFT;
+       return kvm_phys_to_vttbr(baddr) | vmid_field | cnp;
 }
 
 #endif /* __ASSEMBLY__ */
index 72dc4c011014c208108ab878b4f9f65da2d56740..5b267dec6194e9675bb48f710a0d16b58eca3d64 100644 (file)
 
 #define SYS_CNTKCTL_EL1                        sys_reg(3, 0, 14, 1, 0)
 
+#define SYS_CCSIDR_EL1                 sys_reg(3, 1, 0, 0, 0)
 #define SYS_CLIDR_EL1                  sys_reg(3, 1, 0, 0, 1)
 #define SYS_AIDR_EL1                   sys_reg(3, 1, 0, 0, 7)
 
 #define SYS_CNTP_CTL_EL0               sys_reg(3, 3, 14, 2, 1)
 #define SYS_CNTP_CVAL_EL0              sys_reg(3, 3, 14, 2, 2)
 
+#define SYS_AARCH32_CNTP_TVAL          sys_reg(0, 0, 14, 2, 0)
+#define SYS_AARCH32_CNTP_CTL           sys_reg(0, 0, 14, 2, 1)
+#define SYS_AARCH32_CNTP_CVAL          sys_reg(0, 2, 0, 14, 0)
+
 #define __PMEV_op2(n)                  ((n) & 0x7)
 #define __CNTR_CRm(n)                  (0x8 | (((n) >> 3) & 0x3))
 #define SYS_PMEVCNTRn_EL0(n)           sys_reg(3, 3, 14, __CNTR_CRm(n), __PMEV_op2(n))
 #define SYS_ICH_VTR_EL2                        sys_reg(3, 4, 12, 11, 1)
 #define SYS_ICH_MISR_EL2               sys_reg(3, 4, 12, 11, 2)
 #define SYS_ICH_EISR_EL2               sys_reg(3, 4, 12, 11, 3)
-#define SYS_ICH_ELSR_EL2               sys_reg(3, 4, 12, 11, 5)
+#define SYS_ICH_ELRSR_EL2              sys_reg(3, 4, 12, 11, 5)
 #define SYS_ICH_VMCR_EL2               sys_reg(3, 4, 12, 11, 7)
 
 #define __SYS__LR0_EL2(x)              sys_reg(3, 4, 12, 12, x)
index 0f2a135ba15bbe5bd66d148325d3ed227b1fe072..690e033a91c000513281a45f38ce39e680720e8b 100644 (file)
@@ -3,9 +3,7 @@
 # Makefile for Kernel-based Virtual Machine module
 #
 
-ccflags-y += -Iarch/arm64/kvm -Ivirt/kvm/arm/vgic
-CFLAGS_arm.o := -I.
-CFLAGS_mmu.o := -I.
+ccflags-y += -I $(srctree)/$(src) -I $(srctree)/virt/kvm/arm/vgic
 
 KVM=../../../virt/kvm
 
index f39801e4136cd0e27c3ba718a461d476de805992..fd917d6d12afb4060725e8342289390ea5461a2c 100644 (file)
@@ -76,7 +76,7 @@ static void restore_guest_debug_regs(struct kvm_vcpu *vcpu)
 
 void kvm_arm_init_debug(void)
 {
-       __this_cpu_write(mdcr_el2, kvm_call_hyp(__kvm_get_mdcr_el2));
+       __this_cpu_write(mdcr_el2, kvm_call_hyp_ret(__kvm_get_mdcr_el2));
 }
 
 /**
index 952f6cb9cf72051ec1415ad5d63954985544ba53..2845aa680841ea9623e517ebb1050524544a1fc6 100644 (file)
@@ -40,9 +40,6 @@
  * arch/arm64/kernel/hyp_stub.S.
  */
 ENTRY(__kvm_call_hyp)
-alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
        hvc     #0
        ret
-alternative_else_nop_endif
-       b       __vhe_hyp_call
 ENDPROC(__kvm_call_hyp)
index 73c1b483ec3963817aca5a8c650766eb6d3d9508..2b1e686772bfd6786378ac4e4ee5afa3ce415fa8 100644 (file)
        ldr     lr, [sp], #16
 .endm
 
-ENTRY(__vhe_hyp_call)
-       do_el2_call
-       /*
-        * We used to rely on having an exception return to get
-        * an implicit isb. In the E2H case, we don't have it anymore.
-        * rather than changing all the leaf functions, just do it here
-        * before returning to the rest of the kernel.
-        */
-       isb
-       ret
-ENDPROC(__vhe_hyp_call)
-
 el1_sync:                              // Guest trapped into EL2
 
        mrs     x0, esr_el2
index b426e2cf973cfe01a90ae40545abb2ee46c66bca..c52a8451637c483f949b40f931e3a0ba3a99351e 100644 (file)
@@ -53,7 +53,6 @@ static void __hyp_text __sysreg_save_user_state(struct kvm_cpu_context *ctxt)
 
 static void __hyp_text __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
 {
-       ctxt->sys_regs[MPIDR_EL1]       = read_sysreg(vmpidr_el2);
        ctxt->sys_regs[CSSELR_EL1]      = read_sysreg(csselr_el1);
        ctxt->sys_regs[SCTLR_EL1]       = read_sysreg_el1(sctlr);
        ctxt->sys_regs[ACTLR_EL1]       = read_sysreg(actlr_el1);
index c936aa40c3f4a0393d03ee66e4b8316c35fa0566..539feecda5b8123eed039c0dcda7221695f339b5 100644 (file)
@@ -982,6 +982,10 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
        return true;
 }
 
+#define reg_to_encoding(x)                                             \
+       sys_reg((u32)(x)->Op0, (u32)(x)->Op1,                           \
+               (u32)(x)->CRn, (u32)(x)->CRm, (u32)(x)->Op2);
+
 /* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */
 #define DBG_BCR_BVR_WCR_WVR_EL1(n)                                     \
        { SYS_DESC(SYS_DBGBVRn_EL1(n)),                                 \
@@ -1003,44 +1007,38 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
        { SYS_DESC(SYS_PMEVTYPERn_EL0(n)),                                      \
          access_pmu_evtyper, reset_unknown, (PMEVTYPER0_EL0 + n), }
 
-static bool access_cntp_tval(struct kvm_vcpu *vcpu,
-               struct sys_reg_params *p,
-               const struct sys_reg_desc *r)
+static bool access_arch_timer(struct kvm_vcpu *vcpu,
+                             struct sys_reg_params *p,
+                             const struct sys_reg_desc *r)
 {
-       u64 now = kvm_phys_timer_read();
-       u64 cval;
+       enum kvm_arch_timers tmr;
+       enum kvm_arch_timer_regs treg;
+       u64 reg = reg_to_encoding(r);
 
-       if (p->is_write) {
-               kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL,
-                                     p->regval + now);
-       } else {
-               cval = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL);
-               p->regval = cval - now;
+       switch (reg) {
+       case SYS_CNTP_TVAL_EL0:
+       case SYS_AARCH32_CNTP_TVAL:
+               tmr = TIMER_PTIMER;
+               treg = TIMER_REG_TVAL;
+               break;
+       case SYS_CNTP_CTL_EL0:
+       case SYS_AARCH32_CNTP_CTL:
+               tmr = TIMER_PTIMER;
+               treg = TIMER_REG_CTL;
+               break;
+       case SYS_CNTP_CVAL_EL0:
+       case SYS_AARCH32_CNTP_CVAL:
+               tmr = TIMER_PTIMER;
+               treg = TIMER_REG_CVAL;
+               break;
+       default:
+               BUG();
        }
 
-       return true;
-}
-
-static bool access_cntp_ctl(struct kvm_vcpu *vcpu,
-               struct sys_reg_params *p,
-               const struct sys_reg_desc *r)
-{
-       if (p->is_write)
-               kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CTL, p->regval);
-       else
-               p->regval = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CTL);
-
-       return true;
-}
-
-static bool access_cntp_cval(struct kvm_vcpu *vcpu,
-               struct sys_reg_params *p,
-               const struct sys_reg_desc *r)
-{
        if (p->is_write)
-               kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL, p->regval);
+               kvm_arm_timer_write_sysreg(vcpu, tmr, treg, p->regval);
        else
-               p->regval = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL);
+               p->regval = kvm_arm_timer_read_sysreg(vcpu, tmr, treg);
 
        return true;
 }
@@ -1160,6 +1158,64 @@ static int set_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
        return __set_id_reg(rd, uaddr, true);
 }
 
+static bool access_ctr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+                      const struct sys_reg_desc *r)
+{
+       if (p->is_write)
+               return write_to_read_only(vcpu, p, r);
+
+       p->regval = read_sanitised_ftr_reg(SYS_CTR_EL0);
+       return true;
+}
+
+static bool access_clidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+                        const struct sys_reg_desc *r)
+{
+       if (p->is_write)
+               return write_to_read_only(vcpu, p, r);
+
+       p->regval = read_sysreg(clidr_el1);
+       return true;
+}
+
+static bool access_csselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+                         const struct sys_reg_desc *r)
+{
+       if (p->is_write)
+               vcpu_write_sys_reg(vcpu, p->regval, r->reg);
+       else
+               p->regval = vcpu_read_sys_reg(vcpu, r->reg);
+       return true;
+}
+
+static bool access_ccsidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+                         const struct sys_reg_desc *r)
+{
+       u32 csselr;
+
+       if (p->is_write)
+               return write_to_read_only(vcpu, p, r);
+
+       csselr = vcpu_read_sys_reg(vcpu, CSSELR_EL1);
+       p->regval = get_ccsidr(csselr);
+
+       /*
+        * Guests should not be doing cache operations by set/way at all, and
+        * for this reason, we trap them and attempt to infer the intent, so
+        * that we can flush the entire guest's address space at the appropriate
+        * time.
+        * To prevent this trapping from causing performance problems, let's
+        * expose the geometry of all data and unified caches (which are
+        * guaranteed to be PIPT and thus non-aliasing) as 1 set and 1 way.
+        * [If guests should attempt to infer aliasing properties from the
+        * geometry (which is not permitted by the architecture), they would
+        * only do so for virtually indexed caches.]
+        */
+       if (!(csselr & 1)) // data or unified cache
+               p->regval &= ~GENMASK(27, 3);
+       return true;
+}
+
 /* sys_reg_desc initialiser for known cpufeature ID registers */
 #define ID_SANITISED(name) {                   \
        SYS_DESC(SYS_##name),                   \
@@ -1377,7 +1433,10 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 
        { SYS_DESC(SYS_CNTKCTL_EL1), NULL, reset_val, CNTKCTL_EL1, 0},
 
-       { SYS_DESC(SYS_CSSELR_EL1), NULL, reset_unknown, CSSELR_EL1 },
+       { SYS_DESC(SYS_CCSIDR_EL1), access_ccsidr },
+       { SYS_DESC(SYS_CLIDR_EL1), access_clidr },
+       { SYS_DESC(SYS_CSSELR_EL1), access_csselr, reset_unknown, CSSELR_EL1 },
+       { SYS_DESC(SYS_CTR_EL0), access_ctr },
 
        { SYS_DESC(SYS_PMCR_EL0), access_pmcr, reset_pmcr, },
        { SYS_DESC(SYS_PMCNTENSET_EL0), access_pmcnten, reset_unknown, PMCNTENSET_EL0 },
@@ -1400,9 +1459,9 @@ static const struct sys_reg_desc sys_reg_descs[] = {
        { SYS_DESC(SYS_TPIDR_EL0), NULL, reset_unknown, TPIDR_EL0 },
        { SYS_DESC(SYS_TPIDRRO_EL0), NULL, reset_unknown, TPIDRRO_EL0 },
 
-       { SYS_DESC(SYS_CNTP_TVAL_EL0), access_cntp_tval },
-       { SYS_DESC(SYS_CNTP_CTL_EL0), access_cntp_ctl },
-       { SYS_DESC(SYS_CNTP_CVAL_EL0), access_cntp_cval },
+       { SYS_DESC(SYS_CNTP_TVAL_EL0), access_arch_timer },
+       { SYS_DESC(SYS_CNTP_CTL_EL0), access_arch_timer },
+       { SYS_DESC(SYS_CNTP_CVAL_EL0), access_arch_timer },
 
        /* PMEVCNTRn_EL0 */
        PMU_PMEVCNTR_EL0(0),
@@ -1476,7 +1535,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 
        { SYS_DESC(SYS_DACR32_EL2), NULL, reset_unknown, DACR32_EL2 },
        { SYS_DESC(SYS_IFSR32_EL2), NULL, reset_unknown, IFSR32_EL2 },
-       { SYS_DESC(SYS_FPEXC32_EL2), NULL, reset_val, FPEXC32_EL2, 0x70 },
+       { SYS_DESC(SYS_FPEXC32_EL2), NULL, reset_val, FPEXC32_EL2, 0x700 },
 };
 
 static bool trap_dbgidr(struct kvm_vcpu *vcpu,
@@ -1677,6 +1736,7 @@ static const struct sys_reg_desc cp14_64_regs[] = {
  * register).
  */
 static const struct sys_reg_desc cp15_regs[] = {
+       { Op1( 0), CRn( 0), CRm( 0), Op2( 1), access_ctr },
        { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_vm_reg, NULL, c1_SCTLR },
        { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
        { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 },
@@ -1723,10 +1783,9 @@ static const struct sys_reg_desc cp15_regs[] = {
 
        { Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID },
 
-       /* CNTP_TVAL */
-       { Op1( 0), CRn(14), CRm( 2), Op2( 0), access_cntp_tval },
-       /* CNTP_CTL */
-       { Op1( 0), CRn(14), CRm( 2), Op2( 1), access_cntp_ctl },
+       /* Arch Tmers */
+       { SYS_DESC(SYS_AARCH32_CNTP_TVAL), access_arch_timer },
+       { SYS_DESC(SYS_AARCH32_CNTP_CTL), access_arch_timer },
 
        /* PMEVCNTRn */
        PMU_PMEVCNTR(0),
@@ -1794,6 +1853,10 @@ static const struct sys_reg_desc cp15_regs[] = {
        PMU_PMEVTYPER(30),
        /* PMCCFILTR */
        { Op1(0), CRn(14), CRm(15), Op2(7), access_pmu_evtyper },
+
+       { Op1(1), CRn( 0), CRm( 0), Op2(0), access_ccsidr },
+       { Op1(1), CRn( 0), CRm( 0), Op2(1), access_clidr },
+       { Op1(2), CRn( 0), CRm( 0), Op2(0), access_csselr, NULL, c0_CSSELR },
 };
 
 static const struct sys_reg_desc cp15_64_regs[] = {
@@ -1803,7 +1866,7 @@ static const struct sys_reg_desc cp15_64_regs[] = {
        { Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 },
        { Op1( 1), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_ASGI1R */
        { Op1( 2), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI0R */
-       { Op1( 2), CRn( 0), CRm(14), Op2( 0), access_cntp_cval },
+       { SYS_DESC(SYS_AARCH32_CNTP_CVAL),    access_arch_timer },
 };
 
 /* Target specific emulation tables */
@@ -1832,30 +1895,19 @@ static const struct sys_reg_desc *get_target_table(unsigned target,
        }
 }
 
-#define reg_to_match_value(x)                                          \
-       ({                                                              \
-               unsigned long val;                                      \
-               val  = (x)->Op0 << 14;                                  \
-               val |= (x)->Op1 << 11;                                  \
-               val |= (x)->CRn << 7;                                   \
-               val |= (x)->CRm << 3;                                   \
-               val |= (x)->Op2;                                        \
-               val;                                                    \
-        })
-
 static int match_sys_reg(const void *key, const void *elt)
 {
        const unsigned long pval = (unsigned long)key;
        const struct sys_reg_desc *r = elt;
 
-       return pval - reg_to_match_value(r);
+       return pval - reg_to_encoding(r);
 }
 
 static const struct sys_reg_desc *find_reg(const struct sys_reg_params *params,
                                         const struct sys_reg_desc table[],
                                         unsigned int num)
 {
-       unsigned long pval = reg_to_match_value(params);
+       unsigned long pval = reg_to_encoding(params);
 
        return bsearch((void *)pval, table, num, sizeof(table[0]), match_sys_reg);
 }
@@ -2218,11 +2270,15 @@ static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu,
        }
 
 FUNCTION_INVARIANT(midr_el1)
-FUNCTION_INVARIANT(ctr_el0)
 FUNCTION_INVARIANT(revidr_el1)
 FUNCTION_INVARIANT(clidr_el1)
 FUNCTION_INVARIANT(aidr_el1)
 
+static void get_ctr_el0(struct kvm_vcpu *v, const struct sys_reg_desc *r)
+{
+       ((struct sys_reg_desc *)r)->val = read_sanitised_ftr_reg(SYS_CTR_EL0);
+}
+
 /* ->val is filled in by kvm_sys_reg_table_init() */
 static struct sys_reg_desc invariant_sys_regs[] = {
        { SYS_DESC(SYS_MIDR_EL1), NULL, get_midr_el1 },
index f86844fc0725bd1c25e1b5b9cb3fac6f2176b43a..0a8a7427117306a922fbcd3f72de5ccccdab96c1 100644 (file)
@@ -105,7 +105,8 @@ simscsi_interrupt (unsigned long val)
                atomic_dec(&num_reqs);
                queue[rd].sc = NULL;
                if (DBG)
-                       printk("simscsi_interrupt: done with %ld\n", sc->serial_number);
+                       printk("simscsi_interrupt: done with %u\n",
+                              sc->request->tag);
                (*sc->scsi_done)(sc);
                rd = (rd + 1) % SIMSCSI_REQ_QUEUE_LEN;
        }
@@ -214,8 +215,8 @@ simscsi_queuecommand_lck (struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *)
        register long sp asm ("sp");
 
        if (DBG)
-               printk("simscsi_queuecommand: target=%d,cmnd=%u,sc=%lu,sp=%lx,done=%p\n",
-                      target_id, sc->cmnd[0], sc->serial_number, sp, done);
+               printk("simscsi_queuecommand: target=%d,cmnd=%u,sc=%u,sp=%lx,done=%p\n",
+                      target_id, sc->cmnd[0], sc->request->tag, sp, done);
 #endif
 
        sc->result = DID_BAD_TARGET << 16;
index d2abd98471e860ca2e42cf2576c5902cc82e1360..41204a49cf95eaa8ef2735468ebbb062d23a457a 100644 (file)
@@ -1134,7 +1134,7 @@ static inline void kvm_arch_hardware_unsetup(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_free_memslot(struct kvm *kvm,
                struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
-static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) {}
+static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {}
 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
 static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
index cfdd08897a0602f7498aec34c4a7241510a31377..5ba131c30f6bcded4e65ccc40bb8aa2595e44ff1 100644 (file)
@@ -37,7 +37,8 @@ CONFIG_MODULE_SIG=y
 CONFIG_MODULE_SIG_FORCE=y
 CONFIG_MODULE_SIG_SHA512=y
 CONFIG_PARTITION_ADVANCED=y
-# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_MQ_IOSCHED_DEADLINE is not set
+# CONFIG_MQ_IOSCHED_KYBER is not set
 # CONFIG_PPC_VAS is not set
 # CONFIG_PPC_PSERIES is not set
 # CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set
@@ -49,7 +50,6 @@ CONFIG_IRQ_ALL_CPUS=y
 CONFIG_NUMA=y
 # CONFIG_COMPACTION is not set
 # CONFIG_MIGRATION is not set
-# CONFIG_BOUNCE is not set
 CONFIG_PPC_64K_PAGES=y
 CONFIG_SCHED_SMT=y
 CONFIG_CMDLINE_BOOL=y
@@ -136,9 +136,11 @@ CONFIG_ACENIC_OMIT_TIGON_I=y
 # CONFIG_NET_VENDOR_AQUANTIA is not set
 # CONFIG_NET_VENDOR_ARC is not set
 # CONFIG_NET_VENDOR_ATHEROS is not set
+# CONFIG_NET_VENDOR_AURORA is not set
 CONFIG_TIGON3=m
 CONFIG_BNX2X=m
 # CONFIG_NET_VENDOR_BROCADE is not set
+# CONFIG_NET_VENDOR_CADENCE is not set
 # CONFIG_NET_CADENCE is not set
 # CONFIG_NET_VENDOR_CAVIUM is not set
 CONFIG_CHELSIO_T1=m
@@ -151,6 +153,7 @@ CONFIG_BE2NET=m
 # CONFIG_NET_VENDOR_HP is not set
 # CONFIG_NET_VENDOR_HUAWEI is not set
 CONFIG_E1000=m
+CONFIG_E1000E=m
 CONFIG_IGB=m
 CONFIG_IXGB=m
 CONFIG_IXGBE=m
@@ -161,15 +164,18 @@ CONFIG_MLX4_EN=m
 # CONFIG_MLX4_CORE_GEN2 is not set
 CONFIG_MLX5_CORE=m
 # CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_MICROSEMI is not set
 CONFIG_MYRI10GE=m
 # CONFIG_NET_VENDOR_NATSEMI is not set
 # CONFIG_NET_VENDOR_NETRONOME is not set
 # CONFIG_NET_VENDOR_NI is not set
 # CONFIG_NET_VENDOR_NVIDIA is not set
 # CONFIG_NET_VENDOR_OKI is not set
-# CONFIG_NET_PACKET_ENGINE is not set
+# CONFIG_NET_VENDOR_PACKET_ENGINES is not set
 CONFIG_QLGE=m
 CONFIG_NETXEN_NIC=m
+CONFIG_QED=m
+CONFIG_QEDE=m
 # CONFIG_NET_VENDOR_QUALCOMM is not set
 # CONFIG_NET_VENDOR_RDC is not set
 # CONFIG_NET_VENDOR_REALTEK is not set
index 66c1e4f88d654ebf18cadcfff1f906eeae048fce..ec2a55a553c75adb8a66327dc7d756f9f7e0cdfe 100644 (file)
@@ -39,6 +39,14 @@ static inline int hstate_get_psize(struct hstate *hstate)
 #ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
 static inline bool gigantic_page_supported(void)
 {
+       /*
+        * We used gigantic page reservation with hypervisor assist in some case.
+        * We cannot use runtime allocation of gigantic pages in those platforms
+        * This is hash translation mode LPARs.
+        */
+       if (firmware_has_feature(FW_FEATURE_LPAR) && !radix_enabled())
+               return false;
+
        return true;
 }
 #endif
index 0f98f00da2ea3b7027e29efbf0bf3c4b8a7116be..e6b5bb012ccb962fa5dcf7fdcd2db8962da1a57e 100644 (file)
@@ -99,6 +99,8 @@ struct kvm_nested_guest;
 
 struct kvm_vm_stat {
        ulong remote_tlb_flush;
+       ulong num_2M_pages;
+       ulong num_1G_pages;
 };
 
 struct kvm_vcpu_stat {
@@ -377,6 +379,7 @@ struct kvmppc_mmu {
        void (*slbmte)(struct kvm_vcpu *vcpu, u64 rb, u64 rs);
        u64  (*slbmfee)(struct kvm_vcpu *vcpu, u64 slb_nr);
        u64  (*slbmfev)(struct kvm_vcpu *vcpu, u64 slb_nr);
+       int  (*slbfee)(struct kvm_vcpu *vcpu, gva_t eaddr, ulong *ret_slb);
        void (*slbie)(struct kvm_vcpu *vcpu, u64 slb_nr);
        void (*slbia)(struct kvm_vcpu *vcpu);
        /* book3s */
@@ -837,7 +840,7 @@ struct kvm_vcpu_arch {
 static inline void kvm_arch_hardware_disable(void) {}
 static inline void kvm_arch_hardware_unsetup(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
-static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) {}
+static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {}
 static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
 static inline void kvm_arch_exit(void) {}
index a6c8548ed9faa6c9188d496950ab5ac268756ceb..ac22b28ae78d4bc52223c94b478b83fc1c5ce48e 100644 (file)
@@ -36,6 +36,8 @@
 #endif
 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER
 #include <asm/paca.h>
+#include <asm/xive.h>
+#include <asm/cpu_has_feature.h>
 #endif
 
 /*
@@ -617,6 +619,18 @@ static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 ir
 static inline void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) { }
 #endif /* CONFIG_KVM_XIVE */
 
+#if defined(CONFIG_PPC_POWERNV) && defined(CONFIG_KVM_BOOK3S_64_HANDLER)
+static inline bool xics_on_xive(void)
+{
+       return xive_enabled() && cpu_has_feature(CPU_FTR_HVMODE);
+}
+#else
+static inline bool xics_on_xive(void)
+{
+       return false;
+}
+#endif
+
 /*
  * Prototypes for functions called only from assembler code.
  * Having prototypes reduces sparse errors.
index 8c876c166ef27b2c6fa754781fdbb103f2addc54..26ca425f4c2c39515bccee31029b3cada4c73639 100644 (file)
@@ -463,10 +463,12 @@ struct kvm_ppc_cpu_char {
 #define KVM_PPC_CPU_CHAR_BR_HINT_HONOURED      (1ULL << 58)
 #define KVM_PPC_CPU_CHAR_MTTRIG_THR_RECONF     (1ULL << 57)
 #define KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS       (1ULL << 56)
+#define KVM_PPC_CPU_CHAR_BCCTR_FLUSH_ASSIST    (1ull << 54)
 
 #define KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY      (1ULL << 63)
 #define KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR         (1ULL << 62)
 #define KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR    (1ULL << 61)
+#define KVM_PPC_CPU_BEHAV_FLUSH_COUNT_CACHE    (1ull << 58)
 
 /* Per-vcpu XICS interrupt controller state */
 #define KVM_REG_PPC_ICP_STATE  (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c)
index 1881127682e995f5c6c94eb3d90d00e2559f1582..32332e24e4216402d21787f883c498fa2bd8e57f 100644 (file)
@@ -194,13 +194,6 @@ set_ivor:
 #endif
        mtspr   SPRN_MAS4, r2
 
-#if 0
-       /* Enable DOZE */
-       mfspr   r2,SPRN_HID0
-       oris    r2,r2,HID0_DOZE@h
-       mtspr   SPRN_HID0, r2
-#endif
-
 #if !defined(CONFIG_BDI_SWITCH)
        /*
         * The Abatron BDI JTAG debugger does not tolerate others
index a21200c6aaeaaf99dfe6ee1d4190354189f46fb2..1fd45a8650e1762f6f6e736fc8fc8ba6ee6f3101 100644 (file)
@@ -71,6 +71,7 @@
 #include <sysdev/fsl_pci.h>
 #include <asm/kprobes.h>
 #include <asm/stacktrace.h>
+#include <asm/nmi.h>
 
 #if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC_CORE)
 int (*__debugger)(struct pt_regs *regs) __read_mostly;
index 9a7dadbe1f1733a8f7cf60a08363b447bea1bf51..10c5579d20cec64152946f2f703a79e2da055154 100644 (file)
@@ -39,6 +39,7 @@
 #include "book3s.h"
 #include "trace.h"
 
+#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
 
 /* #define EXIT_DEBUG */
@@ -71,6 +72,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
        { "pthru_all",       VCPU_STAT(pthru_all) },
        { "pthru_host",      VCPU_STAT(pthru_host) },
        { "pthru_bad_aff",   VCPU_STAT(pthru_bad_aff) },
+       { "largepages_2M",    VM_STAT(num_2M_pages) },
+       { "largepages_1G",    VM_STAT(num_1G_pages) },
        { NULL }
 };
 
@@ -642,7 +645,7 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
                                r = -ENXIO;
                                break;
                        }
-                       if (xive_enabled())
+                       if (xics_on_xive())
                                *val = get_reg_val(id, kvmppc_xive_get_icp(vcpu));
                        else
                                *val = get_reg_val(id, kvmppc_xics_get_icp(vcpu));
@@ -715,7 +718,7 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
                                r = -ENXIO;
                                break;
                        }
-                       if (xive_enabled())
+                       if (xics_on_xive())
                                r = kvmppc_xive_set_icp(vcpu, set_reg_val(id, *val));
                        else
                                r = kvmppc_xics_set_icp(vcpu, set_reg_val(id, *val));
@@ -991,7 +994,7 @@ int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hcall)
 int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
                bool line_status)
 {
-       if (xive_enabled())
+       if (xics_on_xive())
                return kvmppc_xive_set_irq(kvm, irq_source_id, irq, level,
                                           line_status);
        else
@@ -1044,7 +1047,7 @@ static int kvmppc_book3s_init(void)
 
 #ifdef CONFIG_KVM_XICS
 #ifdef CONFIG_KVM_XIVE
-       if (xive_enabled()) {
+       if (xics_on_xive()) {
                kvmppc_xive_init_module();
                kvm_register_device_ops(&kvm_xive_ops, KVM_DEV_TYPE_XICS);
        } else
@@ -1057,7 +1060,7 @@ static int kvmppc_book3s_init(void)
 static void kvmppc_book3s_exit(void)
 {
 #ifdef CONFIG_KVM_XICS
-       if (xive_enabled())
+       if (xics_on_xive())
                kvmppc_xive_exit_module();
 #endif
 #ifdef CONFIG_KVM_BOOK3S_32_HANDLER
index 612169988a3d8a15262665e03d3cc9cfddb4a87e..6f789f674048a4c2a534ad338f3162e33c4068cb 100644 (file)
@@ -425,6 +425,7 @@ void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu)
        mmu->slbmte = NULL;
        mmu->slbmfee = NULL;
        mmu->slbmfev = NULL;
+       mmu->slbfee = NULL;
        mmu->slbie = NULL;
        mmu->slbia = NULL;
 }
index c92dd25bed237bcd9ac488401c80ed4b3d304aed..d4b967f0e8d4bd1de83f40a40b11c9b02ef97d89 100644 (file)
@@ -435,6 +435,19 @@ static void kvmppc_mmu_book3s_64_slbmte(struct kvm_vcpu *vcpu, u64 rs, u64 rb)
        kvmppc_mmu_map_segment(vcpu, esid << SID_SHIFT);
 }
 
+static int kvmppc_mmu_book3s_64_slbfee(struct kvm_vcpu *vcpu, gva_t eaddr,
+                                      ulong *ret_slb)
+{
+       struct kvmppc_slb *slbe = kvmppc_mmu_book3s_64_find_slbe(vcpu, eaddr);
+
+       if (slbe) {
+               *ret_slb = slbe->origv;
+               return 0;
+       }
+       *ret_slb = 0;
+       return -ENOENT;
+}
+
 static u64 kvmppc_mmu_book3s_64_slbmfee(struct kvm_vcpu *vcpu, u64 slb_nr)
 {
        struct kvmppc_slb *slbe;
@@ -670,6 +683,7 @@ void kvmppc_mmu_book3s_64_init(struct kvm_vcpu *vcpu)
        mmu->slbmte = kvmppc_mmu_book3s_64_slbmte;
        mmu->slbmfee = kvmppc_mmu_book3s_64_slbmfee;
        mmu->slbmfev = kvmppc_mmu_book3s_64_slbmfev;
+       mmu->slbfee = kvmppc_mmu_book3s_64_slbfee;
        mmu->slbie = kvmppc_mmu_book3s_64_slbie;
        mmu->slbia = kvmppc_mmu_book3s_64_slbia;
        mmu->xlate = kvmppc_mmu_book3s_64_xlate;
index bd2dcfbf00cdb1cc6a20a766bb68bf17d9f5e3dc..be7bc070eae5fc701251d7d53ce7979195051f7d 100644 (file)
@@ -441,6 +441,24 @@ int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu,
 {
        u32 last_inst;
 
+       /*
+        * Fast path - check if the guest physical address corresponds to a
+        * device on the FAST_MMIO_BUS, if so we can avoid loading the
+        * instruction all together, then we can just handle it and return.
+        */
+       if (is_store) {
+               int idx, ret;
+
+               idx = srcu_read_lock(&vcpu->kvm->srcu);
+               ret = kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, (gpa_t) gpa, 0,
+                                      NULL);
+               srcu_read_unlock(&vcpu->kvm->srcu, idx);
+               if (!ret) {
+                       kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4);
+                       return RESUME_GUEST;
+               }
+       }
+
        /*
         * If we fail, we just return to the guest and try executing it again.
         */
index 1b821c6efdefba002f3c104208857d7b0694b478..f55ef071883f13166662ca006e10d314d70a135c 100644 (file)
@@ -403,8 +403,13 @@ void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte, unsigned long gpa,
                if (!memslot)
                        return;
        }
-       if (shift)
+       if (shift) { /* 1GB or 2MB page */
                page_size = 1ul << shift;
+               if (shift == PMD_SHIFT)
+                       kvm->stat.num_2M_pages--;
+               else if (shift == PUD_SHIFT)
+                       kvm->stat.num_1G_pages--;
+       }
 
        gpa &= ~(page_size - 1);
        hpa = old & PTE_RPN_MASK;
@@ -878,6 +883,14 @@ int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu,
                put_page(page);
        }
 
+       /* Increment number of large pages if we (successfully) inserted one */
+       if (!ret) {
+               if (level == 1)
+                       kvm->stat.num_2M_pages++;
+               else if (level == 2)
+                       kvm->stat.num_1G_pages++;
+       }
+
        return ret;
 }
 
index 532ab79734c7a08c356666a968c025a432c2f482..f02b049737109c670b1af440f9f5704bbdf0afc0 100644 (file)
@@ -133,7 +133,6 @@ extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm,
                                        continue;
 
                                kref_put(&stit->kref, kvm_spapr_tce_liobn_put);
-                               return;
                        }
                }
        }
@@ -338,14 +337,15 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
                }
        }
 
+       kvm_get_kvm(kvm);
        if (!ret)
                ret = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
                                       stt, O_RDWR | O_CLOEXEC);
 
-       if (ret >= 0) {
+       if (ret >= 0)
                list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
-               kvm_get_kvm(kvm);
-       }
+       else
+               kvm_put_kvm(kvm);
 
        mutex_unlock(&kvm->lock);
 
index 8c7e933e942e5b56c42faf9eacdf08d5a183cde4..6ef7c5f00a49c0ac503a2d3eb2cf02f4ead324a9 100644 (file)
@@ -47,6 +47,7 @@
 #define OP_31_XOP_SLBMFEV      851
 #define OP_31_XOP_EIOIO                854
 #define OP_31_XOP_SLBMFEE      915
+#define OP_31_XOP_SLBFEE       979
 
 #define OP_31_XOP_TBEGIN       654
 #define OP_31_XOP_TABORT       910
@@ -416,6 +417,23 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
                        vcpu->arch.mmu.slbia(vcpu);
                        break;
+               case OP_31_XOP_SLBFEE:
+                       if (!(inst & 1) || !vcpu->arch.mmu.slbfee) {
+                               return EMULATE_FAIL;
+                       } else {
+                               ulong b, t;
+                               ulong cr = kvmppc_get_cr(vcpu) & ~CR0_MASK;
+
+                               b = kvmppc_get_gpr(vcpu, rb);
+                               if (!vcpu->arch.mmu.slbfee(vcpu, b, &t))
+                                       cr |= 2 << CR0_SHIFT;
+                               kvmppc_set_gpr(vcpu, rt, t);
+                               /* copy XER[SO] bit to CR0[SO] */
+                               cr |= (vcpu->arch.regs.xer & 0x80000000) >>
+                                       (31 - CR0_SHIFT);
+                               kvmppc_set_cr(vcpu, cr);
+                       }
+                       break;
                case OP_31_XOP_SLBMFEE:
                        if (!vcpu->arch.mmu.slbmfee) {
                                emulated = EMULATE_FAIL;
index a3d5318f5d1e9a9e2654525cfa65059a232982b8..06964350b97a94118d065d90a257c882b5280136 100644 (file)
@@ -922,7 +922,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
        case H_IPOLL:
        case H_XIRR_X:
                if (kvmppc_xics_enabled(vcpu)) {
-                       if (xive_enabled()) {
+                       if (xics_on_xive()) {
                                ret = H_NOT_AVAILABLE;
                                return RESUME_GUEST;
                        }
@@ -937,6 +937,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
                ret = kvmppc_h_set_xdabr(vcpu, kvmppc_get_gpr(vcpu, 4),
                                                kvmppc_get_gpr(vcpu, 5));
                break;
+#ifdef CONFIG_SPAPR_TCE_IOMMU
        case H_GET_TCE:
                ret = kvmppc_h_get_tce(vcpu, kvmppc_get_gpr(vcpu, 4),
                                                kvmppc_get_gpr(vcpu, 5));
@@ -966,6 +967,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
                if (ret == H_TOO_HARD)
                        return RESUME_HOST;
                break;
+#endif
        case H_RANDOM:
                if (!powernv_get_random_long(&vcpu->arch.regs.gpr[4]))
                        ret = H_HARDWARE;
@@ -1445,7 +1447,7 @@ static int kvmppc_handle_nested_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
        case BOOK3S_INTERRUPT_HV_RM_HARD:
                vcpu->arch.trap = 0;
                r = RESUME_GUEST;
-               if (!xive_enabled())
+               if (!xics_on_xive())
                        kvmppc_xics_rm_complete(vcpu, 0);
                break;
        default:
@@ -3648,11 +3650,12 @@ static void kvmppc_wait_for_exec(struct kvmppc_vcore *vc,
 
 static void grow_halt_poll_ns(struct kvmppc_vcore *vc)
 {
-       /* 10us base */
-       if (vc->halt_poll_ns == 0 && halt_poll_ns_grow)
-               vc->halt_poll_ns = 10000;
-       else
-               vc->halt_poll_ns *= halt_poll_ns_grow;
+       if (!halt_poll_ns_grow)
+               return;
+
+       vc->halt_poll_ns *= halt_poll_ns_grow;
+       if (vc->halt_poll_ns < halt_poll_ns_grow_start)
+               vc->halt_poll_ns = halt_poll_ns_grow_start;
 }
 
 static void shrink_halt_poll_ns(struct kvmppc_vcore *vc)
@@ -3666,7 +3669,7 @@ static void shrink_halt_poll_ns(struct kvmppc_vcore *vc)
 #ifdef CONFIG_KVM_XICS
 static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu)
 {
-       if (!xive_enabled())
+       if (!xics_on_xive())
                return false;
        return vcpu->arch.irq_pending || vcpu->arch.xive_saved_state.pipr <
                vcpu->arch.xive_saved_state.cppr;
@@ -4226,7 +4229,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
                                vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
                        srcu_read_unlock(&kvm->srcu, srcu_idx);
                } else if (r == RESUME_PASSTHROUGH) {
-                       if (WARN_ON(xive_enabled()))
+                       if (WARN_ON(xics_on_xive()))
                                r = H_SUCCESS;
                        else
                                r = kvmppc_xics_rm_complete(vcpu, 0);
@@ -4750,7 +4753,7 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
                 * If xive is enabled, we route 0x500 interrupts directly
                 * to the guest.
                 */
-               if (xive_enabled())
+               if (xics_on_xive())
                        lpcr |= LPCR_LPES;
        }
 
@@ -4986,7 +4989,7 @@ static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
        if (i == pimap->n_mapped)
                pimap->n_mapped++;
 
-       if (xive_enabled())
+       if (xics_on_xive())
                rc = kvmppc_xive_set_mapped(kvm, guest_gsi, desc);
        else
                kvmppc_xics_set_mapped(kvm, guest_gsi, desc->irq_data.hwirq);
@@ -5027,7 +5030,7 @@ static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
                return -ENODEV;
        }
 
-       if (xive_enabled())
+       if (xics_on_xive())
                rc = kvmppc_xive_clr_mapped(kvm, guest_gsi, pimap->mapped[i].desc);
        else
                kvmppc_xics_clr_mapped(kvm, guest_gsi, pimap->mapped[i].r_hwirq);
@@ -5359,13 +5362,11 @@ static int kvm_init_subcore_bitmap(void)
                        continue;
 
                sibling_subcore_state =
-                       kmalloc_node(sizeof(struct sibling_subcore_state),
+                       kzalloc_node(sizeof(struct sibling_subcore_state),
                                                        GFP_KERNEL, node);
                if (!sibling_subcore_state)
                        return -ENOMEM;
 
-               memset(sibling_subcore_state, 0,
-                               sizeof(struct sibling_subcore_state));
 
                for (j = 0; j < threads_per_core; j++) {
                        int cpu = first_cpu + j;
@@ -5406,7 +5407,7 @@ static int kvmppc_book3s_init_hv(void)
         * indirectly, via OPAL.
         */
 #ifdef CONFIG_SMP
-       if (!xive_enabled() && !kvmhv_on_pseries() &&
+       if (!xics_on_xive() && !kvmhv_on_pseries() &&
            !local_paca->kvm_hstate.xics_phys) {
                struct device_node *np;
 
index a71e2fc00a4e899be931d16ae08dd6042927bdde..b0cf22477e879b74ce4c0fa771d0deabb6c54af7 100644 (file)
@@ -257,7 +257,7 @@ void kvmhv_rm_send_ipi(int cpu)
        }
 
        /* We should never reach this */
-       if (WARN_ON_ONCE(xive_enabled()))
+       if (WARN_ON_ONCE(xics_on_xive()))
            return;
 
        /* Else poke the target with an IPI */
@@ -577,7 +577,7 @@ unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu)
 {
        if (!kvmppc_xics_enabled(vcpu))
                return H_TOO_HARD;
-       if (xive_enabled()) {
+       if (xics_on_xive()) {
                if (is_rm())
                        return xive_rm_h_xirr(vcpu);
                if (unlikely(!__xive_vm_h_xirr))
@@ -592,7 +592,7 @@ unsigned long kvmppc_rm_h_xirr_x(struct kvm_vcpu *vcpu)
        if (!kvmppc_xics_enabled(vcpu))
                return H_TOO_HARD;
        vcpu->arch.regs.gpr[5] = get_tb();
-       if (xive_enabled()) {
+       if (xics_on_xive()) {
                if (is_rm())
                        return xive_rm_h_xirr(vcpu);
                if (unlikely(!__xive_vm_h_xirr))
@@ -606,7 +606,7 @@ unsigned long kvmppc_rm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server)
 {
        if (!kvmppc_xics_enabled(vcpu))
                return H_TOO_HARD;
-       if (xive_enabled()) {
+       if (xics_on_xive()) {
                if (is_rm())
                        return xive_rm_h_ipoll(vcpu, server);
                if (unlikely(!__xive_vm_h_ipoll))
@@ -621,7 +621,7 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
 {
        if (!kvmppc_xics_enabled(vcpu))
                return H_TOO_HARD;
-       if (xive_enabled()) {
+       if (xics_on_xive()) {
                if (is_rm())
                        return xive_rm_h_ipi(vcpu, server, mfrr);
                if (unlikely(!__xive_vm_h_ipi))
@@ -635,7 +635,7 @@ int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
 {
        if (!kvmppc_xics_enabled(vcpu))
                return H_TOO_HARD;
-       if (xive_enabled()) {
+       if (xics_on_xive()) {
                if (is_rm())
                        return xive_rm_h_cppr(vcpu, cppr);
                if (unlikely(!__xive_vm_h_cppr))
@@ -649,7 +649,7 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
 {
        if (!kvmppc_xics_enabled(vcpu))
                return H_TOO_HARD;
-       if (xive_enabled()) {
+       if (xics_on_xive()) {
                if (is_rm())
                        return xive_rm_h_eoi(vcpu, xirr);
                if (unlikely(!__xive_vm_h_eoi))
index b3f5786b20dcf33098280cdc1af37792b6182cbd..3b9662a4207e06125d108a2cd13724dbf665632a 100644 (file)
@@ -144,6 +144,13 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
                return;
        }
 
+       if (xive_enabled() && kvmhv_on_pseries()) {
+               /* No XICS access or hypercalls available, too hard */
+               this_icp->rm_action |= XICS_RM_KICK_VCPU;
+               this_icp->rm_kick_target = vcpu;
+               return;
+       }
+
        /*
         * Check if the core is loaded,
         * if not, find an available host core to post to wake the VCPU,
index 25043b50cb30a4b7d5dcde8e45ba61bc3b3e547f..3a5e719ef032bcdc7097f840b1932c47408bafc3 100644 (file)
@@ -2272,8 +2272,13 @@ hcall_real_table:
        .long   DOTSYM(kvmppc_h_clear_mod) - hcall_real_table
        .long   DOTSYM(kvmppc_h_clear_ref) - hcall_real_table
        .long   DOTSYM(kvmppc_h_protect) - hcall_real_table
+#ifdef CONFIG_SPAPR_TCE_IOMMU
        .long   DOTSYM(kvmppc_h_get_tce) - hcall_real_table
        .long   DOTSYM(kvmppc_rm_h_put_tce) - hcall_real_table
+#else
+       .long   0               /* 0x1c */
+       .long   0               /* 0x20 */
+#endif
        .long   0               /* 0x24 - H_SET_SPRG0 */
        .long   DOTSYM(kvmppc_h_set_dabr) - hcall_real_table
        .long   0               /* 0x2c */
@@ -2351,8 +2356,13 @@ hcall_real_table:
        .long   0               /* 0x12c */
        .long   0               /* 0x130 */
        .long   DOTSYM(kvmppc_h_set_xdabr) - hcall_real_table
+#ifdef CONFIG_SPAPR_TCE_IOMMU
        .long   DOTSYM(kvmppc_rm_h_stuff_tce) - hcall_real_table
        .long   DOTSYM(kvmppc_rm_h_put_tce_indirect) - hcall_real_table
+#else
+       .long   0               /* 0x138 */
+       .long   0               /* 0x13c */
+#endif
        .long   0               /* 0x140 */
        .long   0               /* 0x144 */
        .long   0               /* 0x148 */
index 2d3b2b1cc272b0989858bfb4e945567ddef96369..4e178c4c1ea5074d638bbbb3eaa3315f39b2bfe0 100644 (file)
@@ -33,7 +33,7 @@ static void kvm_rtas_set_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
        server = be32_to_cpu(args->args[1]);
        priority = be32_to_cpu(args->args[2]);
 
-       if (xive_enabled())
+       if (xics_on_xive())
                rc = kvmppc_xive_set_xive(vcpu->kvm, irq, server, priority);
        else
                rc = kvmppc_xics_set_xive(vcpu->kvm, irq, server, priority);
@@ -56,7 +56,7 @@ static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
        irq = be32_to_cpu(args->args[0]);
 
        server = priority = 0;
-       if (xive_enabled())
+       if (xics_on_xive())
                rc = kvmppc_xive_get_xive(vcpu->kvm, irq, &server, &priority);
        else
                rc = kvmppc_xics_get_xive(vcpu->kvm, irq, &server, &priority);
@@ -83,7 +83,7 @@ static void kvm_rtas_int_off(struct kvm_vcpu *vcpu, struct rtas_args *args)
 
        irq = be32_to_cpu(args->args[0]);
 
-       if (xive_enabled())
+       if (xics_on_xive())
                rc = kvmppc_xive_int_off(vcpu->kvm, irq);
        else
                rc = kvmppc_xics_int_off(vcpu->kvm, irq);
@@ -105,7 +105,7 @@ static void kvm_rtas_int_on(struct kvm_vcpu *vcpu, struct rtas_args *args)
 
        irq = be32_to_cpu(args->args[0]);
 
-       if (xive_enabled())
+       if (xics_on_xive())
                rc = kvmppc_xive_int_on(vcpu->kvm, irq);
        else
                rc = kvmppc_xics_int_on(vcpu->kvm, irq);
index b90a7d154180032d97efdb9150752d678453f9b3..8885377ec3e0c611b3ec3f14b8565e2f7ffde4aa 100644 (file)
@@ -748,7 +748,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
                kvmppc_mpic_disconnect_vcpu(vcpu->arch.mpic, vcpu);
                break;
        case KVMPPC_IRQ_XICS:
-               if (xive_enabled())
+               if (xics_on_xive())
                        kvmppc_xive_cleanup_vcpu(vcpu);
                else
                        kvmppc_xics_free_icp(vcpu);
@@ -1931,7 +1931,7 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
                r = -EPERM;
                dev = kvm_device_from_filp(f.file);
                if (dev) {
-                       if (xive_enabled())
+                       if (xics_on_xive())
                                r = kvmppc_xive_connect_vcpu(dev, vcpu, cap->args[1]);
                        else
                                r = kvmppc_xics_connect_vcpu(dev, vcpu, cap->args[1]);
@@ -2189,10 +2189,12 @@ static int pseries_get_cpu_char(struct kvm_ppc_cpu_char *cp)
                        KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV |
                        KVM_PPC_CPU_CHAR_BR_HINT_HONOURED |
                        KVM_PPC_CPU_CHAR_MTTRIG_THR_RECONF |
-                       KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS;
+                       KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS |
+                       KVM_PPC_CPU_CHAR_BCCTR_FLUSH_ASSIST;
                cp->behaviour_mask = KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY |
                        KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR |
-                       KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR;
+                       KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR |
+                       KVM_PPC_CPU_BEHAV_FLUSH_COUNT_CACHE;
        }
        return 0;
 }
@@ -2251,12 +2253,16 @@ static int kvmppc_get_cpu_char(struct kvm_ppc_cpu_char *cp)
                if (have_fw_feat(fw_features, "enabled",
                                 "fw-count-cache-disabled"))
                        cp->character |= KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS;
+               if (have_fw_feat(fw_features, "enabled",
+                                "fw-count-cache-flush-bcctr2,0,0"))
+                       cp->character |= KVM_PPC_CPU_CHAR_BCCTR_FLUSH_ASSIST;
                cp->character_mask = KVM_PPC_CPU_CHAR_SPEC_BAR_ORI31 |
                        KVM_PPC_CPU_CHAR_BCCTRL_SERIALISED |
                        KVM_PPC_CPU_CHAR_L1D_FLUSH_ORI30 |
                        KVM_PPC_CPU_CHAR_L1D_FLUSH_TRIG2 |
                        KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV |
-                       KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS;
+                       KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS |
+                       KVM_PPC_CPU_CHAR_BCCTR_FLUSH_ASSIST;
 
                if (have_fw_feat(fw_features, "enabled",
                                 "speculation-policy-favor-security"))
@@ -2267,9 +2273,13 @@ static int kvmppc_get_cpu_char(struct kvm_ppc_cpu_char *cp)
                if (!have_fw_feat(fw_features, "disabled",
                                  "needs-spec-barrier-for-bound-checks"))
                        cp->behaviour |= KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR;
+               if (have_fw_feat(fw_features, "enabled",
+                                "needs-count-cache-flush-on-context-switch"))
+                       cp->behaviour |= KVM_PPC_CPU_BEHAV_FLUSH_COUNT_CACHE;
                cp->behaviour_mask = KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY |
                        KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR |
-                       KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR;
+                       KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR |
+                       KVM_PPC_CPU_BEHAV_FLUSH_COUNT_CACHE;
 
                of_node_put(fw_features);
        }
index d52ec118e09db842283a1bd9607f727711a92f35..3c1bd9fa23cd9610c7e8013771119913167bf1ef 100644 (file)
@@ -52,3 +52,6 @@ obj-$(CONFIG_PPC_MEM_KEYS)    += pkeys.o
 # This is necessary for booting with kcov enabled on book3e machines
 KCOV_INSTRUMENT_tlb_nohash.o := n
 KCOV_INSTRUMENT_fsl_booke_mmu.o := n
+
+# Instrumenting the SLB fault path can lead to duplicate SLB entries
+KCOV_INSTRUMENT_slb.o := n
index 578757d403ab8df76b2002ab1f75322677c64e92..daad8c45c8e729d53cf83c3a5011024be178affa 100644 (file)
@@ -86,6 +86,7 @@ static s64 __opal_call_trace(s64 a0, s64 a1, s64 a2, s64 a3,
                             s64 a4, s64 a5, s64 a6, s64 a7,
                              unsigned long opcode, unsigned long msr)
 {
+       return 0;
 }
 
 #define DO_TRACE false
index bba281b1fe1b0730f8a0d31fc469f324118a8933..96c53b23e58f9c843fea7fed17192c5e60d2a0fd 100644 (file)
@@ -239,6 +239,7 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
        memset(&ndr_desc, 0, sizeof(ndr_desc));
        ndr_desc.attr_groups = region_attr_groups;
        ndr_desc.numa_node = dev_to_node(&p->pdev->dev);
+       ndr_desc.target_node = ndr_desc.numa_node;
        ndr_desc.res = &p->res;
        ndr_desc.of_node = p->dn;
        ndr_desc.provider_data = p;
index 22566765206925ac7d427896031fe0775bddec6d..1727180e8ca17124c97e039e9f3368d00680a66b 100644 (file)
@@ -331,5 +331,6 @@ extern void css_schedule_reprobe(void);
 /* Function from drivers/s390/cio/chsc.c */
 int chsc_sstpc(void *page, unsigned int op, u16 ctrl, u64 *clock_delta);
 int chsc_sstpi(void *page, void *result, size_t size);
+int chsc_sgib(u32 origin);
 
 #endif
index 2f7f27e5493f6b28c2f0cb79f840ed546c0660ac..afaf5e3c57fd8b66b59b220699131c4ee70018cb 100644 (file)
@@ -62,6 +62,7 @@ enum interruption_class {
        IRQIO_MSI,
        IRQIO_VIR,
        IRQIO_VAI,
+       IRQIO_GAL,
        NMI_NMI,
        CPU_RST,
        NR_ARCH_IRQS
index 6cb9e2ed05b6c1b211f9d36fcb7e512399c61339..b2cc1ec78d062052e3289682ae3cbac1670a48f7 100644 (file)
@@ -21,6 +21,7 @@
 /* Adapter interrupts. */
 #define QDIO_AIRQ_ISC IO_SCH_ISC       /* I/O subchannel in qdio mode */
 #define PCI_ISC 2                      /* PCI I/O subchannels */
+#define GAL_ISC 5                      /* GIB alert */
 #define AP_ISC 6                       /* adjunct processor (crypto) devices */
 
 /* Functions for registration of I/O interruption subclasses */
index d5d24889c3bcf44b6acea740164d6144e82c0e0f..c47e22bba87fac58b08ecf28c498fe54074ede5d 100644 (file)
@@ -591,7 +591,6 @@ struct kvm_s390_float_interrupt {
        struct kvm_s390_mchk_info mchk;
        struct kvm_s390_ext_info srv_signal;
        int next_rr_cpu;
-       unsigned long idle_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)];
        struct mutex ais_lock;
        u8 simm;
        u8 nimm;
@@ -712,6 +711,7 @@ struct s390_io_adapter {
 struct kvm_s390_cpu_model {
        /* facility mask supported by kvm & hosting machine */
        __u64 fac_mask[S390_ARCH_FAC_LIST_SIZE_U64];
+       struct kvm_s390_vm_cpu_subfunc subfuncs;
        /* facility list requested by guest (in dma page) */
        __u64 *fac_list;
        u64 cpuid;
@@ -782,9 +782,21 @@ struct kvm_s390_gisa {
                        u8  reserved03[11];
                        u32 airq_count;
                } g1;
+               struct {
+                       u64 word[4];
+               } u64;
        };
 };
 
+struct kvm_s390_gib {
+       u32 alert_list_origin;
+       u32 reserved01;
+       u8:5;
+       u8  nisc:3;
+       u8  reserved03[3];
+       u32 reserved04[5];
+};
+
 /*
  * sie_page2 has to be allocated as DMA because fac_list, crycb and
  * gisa need 31bit addresses in the sie control block.
@@ -793,7 +805,8 @@ struct sie_page2 {
        __u64 fac_list[S390_ARCH_FAC_LIST_SIZE_U64];    /* 0x0000 */
        struct kvm_s390_crypto_cb crycb;                /* 0x0800 */
        struct kvm_s390_gisa gisa;                      /* 0x0900 */
-       u8 reserved920[0x1000 - 0x920];                 /* 0x0920 */
+       struct kvm *kvm;                                /* 0x0920 */
+       u8 reserved928[0x1000 - 0x928];                 /* 0x0928 */
 };
 
 struct kvm_s390_vsie {
@@ -804,6 +817,20 @@ struct kvm_s390_vsie {
        struct page *pages[KVM_MAX_VCPUS];
 };
 
+struct kvm_s390_gisa_iam {
+       u8 mask;
+       spinlock_t ref_lock;
+       u32 ref_count[MAX_ISC + 1];
+};
+
+struct kvm_s390_gisa_interrupt {
+       struct kvm_s390_gisa *origin;
+       struct kvm_s390_gisa_iam alert;
+       struct hrtimer timer;
+       u64 expires;
+       DECLARE_BITMAP(kicked_mask, KVM_MAX_VCPUS);
+};
+
 struct kvm_arch{
        void *sca;
        int use_esca;
@@ -837,7 +864,8 @@ struct kvm_arch{
        atomic64_t cmma_dirty_pages;
        /* subset of available cpu features enabled by user space */
        DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
-       struct kvm_s390_gisa *gisa;
+       DECLARE_BITMAP(idle_mask, KVM_MAX_VCPUS);
+       struct kvm_s390_gisa_interrupt gisa_int;
 };
 
 #define KVM_HVA_ERR_BAD                (-1UL)
@@ -871,6 +899,9 @@ void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
 extern int sie64a(struct kvm_s390_sie_block *, u64 *);
 extern char sie_exit;
 
+extern int kvm_s390_gisc_register(struct kvm *kvm, u32 gisc);
+extern int kvm_s390_gisc_unregister(struct kvm *kvm, u32 gisc);
+
 static inline void kvm_arch_hardware_disable(void) {}
 static inline void kvm_arch_check_processor_compat(void *rtn) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
@@ -878,7 +909,7 @@ static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
 static inline void kvm_arch_free_memslot(struct kvm *kvm,
                struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
-static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) {}
+static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {}
 static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
 static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
                struct kvm_memory_slot *slot) {}
index 0e8d68bac82c29356886e24b24088d0463c50880..0cd5a5f96729dad40540016bf2d32acc1ff18893 100644 (file)
@@ -88,6 +88,7 @@ static const struct irq_class irqclass_sub_desc[] = {
        {.irq = IRQIO_MSI,  .name = "MSI", .desc = "[I/O] MSI Interrupt" },
        {.irq = IRQIO_VIR,  .name = "VIR", .desc = "[I/O] Virtual I/O Devices"},
        {.irq = IRQIO_VAI,  .name = "VAI", .desc = "[I/O] Virtual I/O Devices AI"},
+       {.irq = IRQIO_GAL,  .name = "GAL", .desc = "[I/O] GIB Alert"},
        {.irq = NMI_NMI,    .name = "NMI", .desc = "[NMI] Machine Check"},
        {.irq = CPU_RST,    .name = "RST", .desc = "[CPU] CPU Restart"},
 };
index fcb55b02990ef96e20148472828de2e324c6a56f..82162867f378d225ede29ff32adee1983072a7ac 100644 (file)
@@ -7,6 +7,9 @@
  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  */
 
+#define KMSG_COMPONENT "kvm-s390"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
 #include <linux/interrupt.h>
 #include <linux/kvm_host.h>
 #include <linux/hrtimer.h>
@@ -23,6 +26,7 @@
 #include <asm/gmap.h>
 #include <asm/switch_to.h>
 #include <asm/nmi.h>
+#include <asm/airq.h>
 #include "kvm-s390.h"
 #include "gaccess.h"
 #include "trace-s390.h"
@@ -31,6 +35,8 @@
 #define PFAULT_DONE 0x0680
 #define VIRTIO_PARAM 0x0d00
 
+static struct kvm_s390_gib *gib;
+
 /* handle external calls via sigp interpretation facility */
 static int sca_ext_call_pending(struct kvm_vcpu *vcpu, int *src_id)
 {
@@ -217,22 +223,100 @@ static inline u8 int_word_to_isc(u32 int_word)
  */
 #define IPM_BIT_OFFSET (offsetof(struct kvm_s390_gisa, ipm) * BITS_PER_BYTE)
 
-static inline void kvm_s390_gisa_set_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc)
+/**
+ * gisa_set_iam - change the GISA interruption alert mask
+ *
+ * @gisa: gisa to operate on
+ * @iam: new IAM value to use
+ *
+ * Change the IAM atomically with the next alert address and the IPM
+ * of the GISA if the GISA is not part of the GIB alert list. All three
+ * fields are located in the first long word of the GISA.
+ *
+ * Returns: 0 on success
+ *          -EBUSY in case the gisa is part of the alert list
+ */
+static inline int gisa_set_iam(struct kvm_s390_gisa *gisa, u8 iam)
+{
+       u64 word, _word;
+
+       do {
+               word = READ_ONCE(gisa->u64.word[0]);
+               if ((u64)gisa != word >> 32)
+                       return -EBUSY;
+               _word = (word & ~0xffUL) | iam;
+       } while (cmpxchg(&gisa->u64.word[0], word, _word) != word);
+
+       return 0;
+}
+
+/**
+ * gisa_clear_ipm - clear the GISA interruption pending mask
+ *
+ * @gisa: gisa to operate on
+ *
+ * Clear the IPM atomically with the next alert address and the IAM
+ * of the GISA unconditionally. All three fields are located in the
+ * first long word of the GISA.
+ */
+static inline void gisa_clear_ipm(struct kvm_s390_gisa *gisa)
+{
+       u64 word, _word;
+
+       do {
+               word = READ_ONCE(gisa->u64.word[0]);
+               _word = word & ~(0xffUL << 24);
+       } while (cmpxchg(&gisa->u64.word[0], word, _word) != word);
+}
+
+/**
+ * gisa_get_ipm_or_restore_iam - return IPM or restore GISA IAM
+ *
+ * @gi: gisa interrupt struct to work on
+ *
+ * Atomically restores the interruption alert mask if none of the
+ * relevant ISCs are pending and return the IPM.
+ *
+ * Returns: the relevant pending ISCs
+ */
+static inline u8 gisa_get_ipm_or_restore_iam(struct kvm_s390_gisa_interrupt *gi)
+{
+       u8 pending_mask, alert_mask;
+       u64 word, _word;
+
+       do {
+               word = READ_ONCE(gi->origin->u64.word[0]);
+               alert_mask = READ_ONCE(gi->alert.mask);
+               pending_mask = (u8)(word >> 24) & alert_mask;
+               if (pending_mask)
+                       return pending_mask;
+               _word = (word & ~0xffUL) | alert_mask;
+       } while (cmpxchg(&gi->origin->u64.word[0], word, _word) != word);
+
+       return 0;
+}
+
+static inline int gisa_in_alert_list(struct kvm_s390_gisa *gisa)
+{
+       return READ_ONCE(gisa->next_alert) != (u32)(u64)gisa;
+}
+
+static inline void gisa_set_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc)
 {
        set_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa);
 }
 
-static inline u8 kvm_s390_gisa_get_ipm(struct kvm_s390_gisa *gisa)
+static inline u8 gisa_get_ipm(struct kvm_s390_gisa *gisa)
 {
        return READ_ONCE(gisa->ipm);
 }
 
-static inline void kvm_s390_gisa_clear_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc)
+static inline void gisa_clear_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc)
 {
        clear_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa);
 }
 
-static inline int kvm_s390_gisa_tac_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc)
+static inline int gisa_tac_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc)
 {
        return test_and_clear_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa);
 }
@@ -245,8 +329,13 @@ static inline unsigned long pending_irqs_no_gisa(struct kvm_vcpu *vcpu)
 
 static inline unsigned long pending_irqs(struct kvm_vcpu *vcpu)
 {
-       return pending_irqs_no_gisa(vcpu) |
-               kvm_s390_gisa_get_ipm(vcpu->kvm->arch.gisa) << IRQ_PEND_IO_ISC_7;
+       struct kvm_s390_gisa_interrupt *gi = &vcpu->kvm->arch.gisa_int;
+       unsigned long pending_mask;
+
+       pending_mask = pending_irqs_no_gisa(vcpu);
+       if (gi->origin)
+               pending_mask |= gisa_get_ipm(gi->origin) << IRQ_PEND_IO_ISC_7;
+       return pending_mask;
 }
 
 static inline int isc_to_irq_type(unsigned long isc)
@@ -318,13 +407,13 @@ static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu)
 static void __set_cpu_idle(struct kvm_vcpu *vcpu)
 {
        kvm_s390_set_cpuflags(vcpu, CPUSTAT_WAIT);
-       set_bit(vcpu->vcpu_id, vcpu->kvm->arch.float_int.idle_mask);
+       set_bit(vcpu->vcpu_id, vcpu->kvm->arch.idle_mask);
 }
 
 static void __unset_cpu_idle(struct kvm_vcpu *vcpu)
 {
        kvm_s390_clear_cpuflags(vcpu, CPUSTAT_WAIT);
-       clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.float_int.idle_mask);
+       clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.idle_mask);
 }
 
 static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
@@ -345,7 +434,7 @@ static void set_intercept_indicators_io(struct kvm_vcpu *vcpu)
 {
        if (!(pending_irqs_no_gisa(vcpu) & IRQ_PEND_IO_MASK))
                return;
-       else if (psw_ioint_disabled(vcpu))
+       if (psw_ioint_disabled(vcpu))
                kvm_s390_set_cpuflags(vcpu, CPUSTAT_IO_INT);
        else
                vcpu->arch.sie_block->lctl |= LCTL_CR6;
@@ -353,7 +442,7 @@ static void set_intercept_indicators_io(struct kvm_vcpu *vcpu)
 
 static void set_intercept_indicators_ext(struct kvm_vcpu *vcpu)
 {
-       if (!(pending_irqs(vcpu) & IRQ_PEND_EXT_MASK))
+       if (!(pending_irqs_no_gisa(vcpu) & IRQ_PEND_EXT_MASK))
                return;
        if (psw_extint_disabled(vcpu))
                kvm_s390_set_cpuflags(vcpu, CPUSTAT_EXT_INT);
@@ -363,7 +452,7 @@ static void set_intercept_indicators_ext(struct kvm_vcpu *vcpu)
 
 static void set_intercept_indicators_mchk(struct kvm_vcpu *vcpu)
 {
-       if (!(pending_irqs(vcpu) & IRQ_PEND_MCHK_MASK))
+       if (!(pending_irqs_no_gisa(vcpu) & IRQ_PEND_MCHK_MASK))
                return;
        if (psw_mchk_disabled(vcpu))
                vcpu->arch.sie_block->ictl |= ICTL_LPSW;
@@ -956,6 +1045,7 @@ static int __must_check __deliver_io(struct kvm_vcpu *vcpu,
 {
        struct list_head *isc_list;
        struct kvm_s390_float_interrupt *fi;
+       struct kvm_s390_gisa_interrupt *gi = &vcpu->kvm->arch.gisa_int;
        struct kvm_s390_interrupt_info *inti = NULL;
        struct kvm_s390_io_info io;
        u32 isc;
@@ -998,8 +1088,7 @@ static int __must_check __deliver_io(struct kvm_vcpu *vcpu,
                goto out;
        }
 
-       if (vcpu->kvm->arch.gisa &&
-           kvm_s390_gisa_tac_ipm_gisc(vcpu->kvm->arch.gisa, isc)) {
+       if (gi->origin && gisa_tac_ipm_gisc(gi->origin, isc)) {
                /*
                 * in case an adapter interrupt was not delivered
                 * in SIE context KVM will handle the delivery
@@ -1089,6 +1178,7 @@ static u64 __calculate_sltime(struct kvm_vcpu *vcpu)
 
 int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
 {
+       struct kvm_s390_gisa_interrupt *gi = &vcpu->kvm->arch.gisa_int;
        u64 sltime;
 
        vcpu->stat.exit_wait_state++;
@@ -1102,6 +1192,11 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
                return -EOPNOTSUPP; /* disabled wait */
        }
 
+       if (gi->origin &&
+           (gisa_get_ipm_or_restore_iam(gi) &
+            vcpu->arch.sie_block->gcr[6] >> 24))
+               return 0;
+
        if (!ckc_interrupts_enabled(vcpu) &&
            !cpu_timer_interrupts_enabled(vcpu)) {
                VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer");
@@ -1533,18 +1628,19 @@ static struct kvm_s390_interrupt_info *get_top_io_int(struct kvm *kvm,
 
 static int get_top_gisa_isc(struct kvm *kvm, u64 isc_mask, u32 schid)
 {
+       struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int;
        unsigned long active_mask;
        int isc;
 
        if (schid)
                goto out;
-       if (!kvm->arch.gisa)
+       if (!gi->origin)
                goto out;
 
-       active_mask = (isc_mask & kvm_s390_gisa_get_ipm(kvm->arch.gisa) << 24) << 32;
+       active_mask = (isc_mask & gisa_get_ipm(gi->origin) << 24) << 32;
        while (active_mask) {
                isc = __fls(active_mask) ^ (BITS_PER_LONG - 1);
-               if (kvm_s390_gisa_tac_ipm_gisc(kvm->arch.gisa, isc))
+               if (gisa_tac_ipm_gisc(gi->origin, isc))
                        return isc;
                clear_bit_inv(isc, &active_mask);
        }
@@ -1567,6 +1663,7 @@ out:
 struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
                                                    u64 isc_mask, u32 schid)
 {
+       struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int;
        struct kvm_s390_interrupt_info *inti, *tmp_inti;
        int isc;
 
@@ -1584,7 +1681,7 @@ struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
        /* both types of interrupts present */
        if (int_word_to_isc(inti->io.io_int_word) <= isc) {
                /* classical IO int with higher priority */
-               kvm_s390_gisa_set_ipm_gisc(kvm->arch.gisa, isc);
+               gisa_set_ipm_gisc(gi->origin, isc);
                goto out;
        }
 gisa_out:
@@ -1596,7 +1693,7 @@ gisa_out:
                        kvm_s390_reinject_io_int(kvm, inti);
                inti = tmp_inti;
        } else
-               kvm_s390_gisa_set_ipm_gisc(kvm->arch.gisa, isc);
+               gisa_set_ipm_gisc(gi->origin, isc);
 out:
        return inti;
 }
@@ -1685,6 +1782,7 @@ static int __inject_float_mchk(struct kvm *kvm,
 
 static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
 {
+       struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int;
        struct kvm_s390_float_interrupt *fi;
        struct list_head *list;
        int isc;
@@ -1692,9 +1790,9 @@ static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
        kvm->stat.inject_io++;
        isc = int_word_to_isc(inti->io.io_int_word);
 
-       if (kvm->arch.gisa && inti->type & KVM_S390_INT_IO_AI_MASK) {
+       if (gi->origin && inti->type & KVM_S390_INT_IO_AI_MASK) {
                VM_EVENT(kvm, 4, "%s isc %1u", "inject: I/O (AI/gisa)", isc);
-               kvm_s390_gisa_set_ipm_gisc(kvm->arch.gisa, isc);
+               gisa_set_ipm_gisc(gi->origin, isc);
                kfree(inti);
                return 0;
        }
@@ -1726,7 +1824,6 @@ static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
  */
 static void __floating_irq_kick(struct kvm *kvm, u64 type)
 {
-       struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
        struct kvm_vcpu *dst_vcpu;
        int sigcpu, online_vcpus, nr_tries = 0;
 
@@ -1735,11 +1832,11 @@ static void __floating_irq_kick(struct kvm *kvm, u64 type)
                return;
 
        /* find idle VCPUs first, then round robin */
-       sigcpu = find_first_bit(fi->idle_mask, online_vcpus);
+       sigcpu = find_first_bit(kvm->arch.idle_mask, online_vcpus);
        if (sigcpu == online_vcpus) {
                do {
-                       sigcpu = fi->next_rr_cpu;
-                       fi->next_rr_cpu = (fi->next_rr_cpu + 1) % online_vcpus;
+                       sigcpu = kvm->arch.float_int.next_rr_cpu++;
+                       kvm->arch.float_int.next_rr_cpu %= online_vcpus;
                        /* avoid endless loops if all vcpus are stopped */
                        if (nr_tries++ >= online_vcpus)
                                return;
@@ -1753,7 +1850,8 @@ static void __floating_irq_kick(struct kvm *kvm, u64 type)
                kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_STOP_INT);
                break;
        case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
-               if (!(type & KVM_S390_INT_IO_AI_MASK && kvm->arch.gisa))
+               if (!(type & KVM_S390_INT_IO_AI_MASK &&
+                     kvm->arch.gisa_int.origin))
                        kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_IO_INT);
                break;
        default:
@@ -2003,6 +2101,7 @@ void kvm_s390_clear_float_irqs(struct kvm *kvm)
 
 static int get_all_floating_irqs(struct kvm *kvm, u8 __user *usrbuf, u64 len)
 {
+       struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int;
        struct kvm_s390_interrupt_info *inti;
        struct kvm_s390_float_interrupt *fi;
        struct kvm_s390_irq *buf;
@@ -2026,15 +2125,14 @@ static int get_all_floating_irqs(struct kvm *kvm, u8 __user *usrbuf, u64 len)
 
        max_irqs = len / sizeof(struct kvm_s390_irq);
 
-       if (kvm->arch.gisa &&
-           kvm_s390_gisa_get_ipm(kvm->arch.gisa)) {
+       if (gi->origin && gisa_get_ipm(gi->origin)) {
                for (i = 0; i <= MAX_ISC; i++) {
                        if (n == max_irqs) {
                                /* signal userspace to try again */
                                ret = -ENOMEM;
                                goto out_nolock;
                        }
-                       if (kvm_s390_gisa_tac_ipm_gisc(kvm->arch.gisa, i)) {
+                       if (gisa_tac_ipm_gisc(gi->origin, i)) {
                                irq = (struct kvm_s390_irq *) &buf[n];
                                irq->type = KVM_S390_INT_IO(1, 0, 0, 0);
                                irq->u.io.io_int_word = isc_to_int_word(i);
@@ -2831,7 +2929,7 @@ static void store_local_irq(struct kvm_s390_local_interrupt *li,
 int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len)
 {
        int scn;
-       unsigned long sigp_emerg_pending[BITS_TO_LONGS(KVM_MAX_VCPUS)];
+       DECLARE_BITMAP(sigp_emerg_pending, KVM_MAX_VCPUS);
        struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
        unsigned long pending_irqs;
        struct kvm_s390_irq irq;
@@ -2884,27 +2982,278 @@ int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len)
        return n;
 }
 
-void kvm_s390_gisa_clear(struct kvm *kvm)
+static void __airqs_kick_single_vcpu(struct kvm *kvm, u8 deliverable_mask)
 {
-       if (kvm->arch.gisa) {
-               memset(kvm->arch.gisa, 0, sizeof(struct kvm_s390_gisa));
-               kvm->arch.gisa->next_alert = (u32)(u64)kvm->arch.gisa;
-               VM_EVENT(kvm, 3, "gisa 0x%pK cleared", kvm->arch.gisa);
+       int vcpu_id, online_vcpus = atomic_read(&kvm->online_vcpus);
+       struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int;
+       struct kvm_vcpu *vcpu;
+
+       for_each_set_bit(vcpu_id, kvm->arch.idle_mask, online_vcpus) {
+               vcpu = kvm_get_vcpu(kvm, vcpu_id);
+               if (psw_ioint_disabled(vcpu))
+                       continue;
+               deliverable_mask &= (u8)(vcpu->arch.sie_block->gcr[6] >> 24);
+               if (deliverable_mask) {
+                       /* lately kicked but not yet running */
+                       if (test_and_set_bit(vcpu_id, gi->kicked_mask))
+                               return;
+                       kvm_s390_vcpu_wakeup(vcpu);
+                       return;
+               }
        }
 }
 
+static enum hrtimer_restart gisa_vcpu_kicker(struct hrtimer *timer)
+{
+       struct kvm_s390_gisa_interrupt *gi =
+               container_of(timer, struct kvm_s390_gisa_interrupt, timer);
+       struct kvm *kvm =
+               container_of(gi->origin, struct sie_page2, gisa)->kvm;
+       u8 pending_mask;
+
+       pending_mask = gisa_get_ipm_or_restore_iam(gi);
+       if (pending_mask) {
+               __airqs_kick_single_vcpu(kvm, pending_mask);
+               hrtimer_forward_now(timer, ns_to_ktime(gi->expires));
+               return HRTIMER_RESTART;
+       };
+
+       return HRTIMER_NORESTART;
+}
+
+#define NULL_GISA_ADDR 0x00000000UL
+#define NONE_GISA_ADDR 0x00000001UL
+#define GISA_ADDR_MASK 0xfffff000UL
+
+static void process_gib_alert_list(void)
+{
+       struct kvm_s390_gisa_interrupt *gi;
+       struct kvm_s390_gisa *gisa;
+       struct kvm *kvm;
+       u32 final, origin = 0UL;
+
+       do {
+               /*
+                * If the NONE_GISA_ADDR is still stored in the alert list
+                * origin, we will leave the outer loop. No further GISA has
+                * been added to the alert list by millicode while processing
+                * the current alert list.
+                */
+               final = (origin & NONE_GISA_ADDR);
+               /*
+                * Cut off the alert list and store the NONE_GISA_ADDR in the
+                * alert list origin to avoid further GAL interruptions.
+                * A new alert list can be build up by millicode in parallel
+                * for guests not in the yet cut-off alert list. When in the
+                * final loop, store the NULL_GISA_ADDR instead. This will re-
+                * enable GAL interruptions on the host again.
+                */
+               origin = xchg(&gib->alert_list_origin,
+                             (!final) ? NONE_GISA_ADDR : NULL_GISA_ADDR);
+               /*
+                * Loop through the just cut-off alert list and start the
+                * gisa timers to kick idle vcpus to consume the pending
+                * interruptions asap.
+                */
+               while (origin & GISA_ADDR_MASK) {
+                       gisa = (struct kvm_s390_gisa *)(u64)origin;
+                       origin = gisa->next_alert;
+                       gisa->next_alert = (u32)(u64)gisa;
+                       kvm = container_of(gisa, struct sie_page2, gisa)->kvm;
+                       gi = &kvm->arch.gisa_int;
+                       if (hrtimer_active(&gi->timer))
+                               hrtimer_cancel(&gi->timer);
+                       hrtimer_start(&gi->timer, 0, HRTIMER_MODE_REL);
+               }
+       } while (!final);
+
+}
+
+void kvm_s390_gisa_clear(struct kvm *kvm)
+{
+       struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int;
+
+       if (!gi->origin)
+               return;
+       gisa_clear_ipm(gi->origin);
+       VM_EVENT(kvm, 3, "gisa 0x%pK cleared", gi->origin);
+}
+
 void kvm_s390_gisa_init(struct kvm *kvm)
 {
-       if (css_general_characteristics.aiv) {
-               kvm->arch.gisa = &kvm->arch.sie_page2->gisa;
-               VM_EVENT(kvm, 3, "gisa 0x%pK initialized", kvm->arch.gisa);
-               kvm_s390_gisa_clear(kvm);
-       }
+       struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int;
+
+       if (!css_general_characteristics.aiv)
+               return;
+       gi->origin = &kvm->arch.sie_page2->gisa;
+       gi->alert.mask = 0;
+       spin_lock_init(&gi->alert.ref_lock);
+       gi->expires = 50 * 1000; /* 50 usec */
+       hrtimer_init(&gi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+       gi->timer.function = gisa_vcpu_kicker;
+       memset(gi->origin, 0, sizeof(struct kvm_s390_gisa));
+       gi->origin->next_alert = (u32)(u64)gi->origin;
+       VM_EVENT(kvm, 3, "gisa 0x%pK initialized", gi->origin);
 }
 
 void kvm_s390_gisa_destroy(struct kvm *kvm)
 {
-       if (!kvm->arch.gisa)
+       struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int;
+
+       if (!gi->origin)
+               return;
+       if (gi->alert.mask)
+               KVM_EVENT(3, "vm 0x%pK has unexpected iam 0x%02x",
+                         kvm, gi->alert.mask);
+       while (gisa_in_alert_list(gi->origin))
+               cpu_relax();
+       hrtimer_cancel(&gi->timer);
+       gi->origin = NULL;
+}
+
+/**
+ * kvm_s390_gisc_register - register a guest ISC
+ *
+ * @kvm:  the kernel vm to work with
+ * @gisc: the guest interruption sub class to register
+ *
+ * The function extends the vm specific alert mask to use.
+ * The effective IAM mask in the GISA is updated as well
+ * in case the GISA is not part of the GIB alert list.
+ * It will be updated latest when the IAM gets restored
+ * by gisa_get_ipm_or_restore_iam().
+ *
+ * Returns: the nonspecific ISC (NISC) the gib alert mechanism
+ *          has registered with the channel subsystem.
+ *          -ENODEV in case the vm uses no GISA
+ *          -ERANGE in case the guest ISC is invalid
+ */
+int kvm_s390_gisc_register(struct kvm *kvm, u32 gisc)
+{
+       struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int;
+
+       if (!gi->origin)
+               return -ENODEV;
+       if (gisc > MAX_ISC)
+               return -ERANGE;
+
+       spin_lock(&gi->alert.ref_lock);
+       gi->alert.ref_count[gisc]++;
+       if (gi->alert.ref_count[gisc] == 1) {
+               gi->alert.mask |= 0x80 >> gisc;
+               gisa_set_iam(gi->origin, gi->alert.mask);
+       }
+       spin_unlock(&gi->alert.ref_lock);
+
+       return gib->nisc;
+}
+EXPORT_SYMBOL_GPL(kvm_s390_gisc_register);
+
+/**
+ * kvm_s390_gisc_unregister - unregister a guest ISC
+ *
+ * @kvm:  the kernel vm to work with
+ * @gisc: the guest interruption sub class to register
+ *
+ * The function reduces the vm specific alert mask to use.
+ * The effective IAM mask in the GISA is updated as well
+ * in case the GISA is not part of the GIB alert list.
+ * It will be updated latest when the IAM gets restored
+ * by gisa_get_ipm_or_restore_iam().
+ *
+ * Returns: the nonspecific ISC (NISC) the gib alert mechanism
+ *          has registered with the channel subsystem.
+ *          -ENODEV in case the vm uses no GISA
+ *          -ERANGE in case the guest ISC is invalid
+ *          -EINVAL in case the guest ISC is not registered
+ */
+int kvm_s390_gisc_unregister(struct kvm *kvm, u32 gisc)
+{
+       struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int;
+       int rc = 0;
+
+       if (!gi->origin)
+               return -ENODEV;
+       if (gisc > MAX_ISC)
+               return -ERANGE;
+
+       spin_lock(&gi->alert.ref_lock);
+       if (gi->alert.ref_count[gisc] == 0) {
+               rc = -EINVAL;
+               goto out;
+       }
+       gi->alert.ref_count[gisc]--;
+       if (gi->alert.ref_count[gisc] == 0) {
+               gi->alert.mask &= ~(0x80 >> gisc);
+               gisa_set_iam(gi->origin, gi->alert.mask);
+       }
+out:
+       spin_unlock(&gi->alert.ref_lock);
+
+       return rc;
+}
+EXPORT_SYMBOL_GPL(kvm_s390_gisc_unregister);
+
+static void gib_alert_irq_handler(struct airq_struct *airq)
+{
+       inc_irq_stat(IRQIO_GAL);
+       process_gib_alert_list();
+}
+
+static struct airq_struct gib_alert_irq = {
+       .handler = gib_alert_irq_handler,
+       .lsi_ptr = &gib_alert_irq.lsi_mask,
+};
+
+void kvm_s390_gib_destroy(void)
+{
+       if (!gib)
                return;
-       kvm->arch.gisa = NULL;
+       chsc_sgib(0);
+       unregister_adapter_interrupt(&gib_alert_irq);
+       free_page((unsigned long)gib);
+       gib = NULL;
+}
+
+int kvm_s390_gib_init(u8 nisc)
+{
+       int rc = 0;
+
+       if (!css_general_characteristics.aiv) {
+               KVM_EVENT(3, "%s", "gib not initialized, no AIV facility");
+               goto out;
+       }
+
+       gib = (struct kvm_s390_gib *)get_zeroed_page(GFP_KERNEL | GFP_DMA);
+       if (!gib) {
+               rc = -ENOMEM;
+               goto out;
+       }
+
+       gib_alert_irq.isc = nisc;
+       if (register_adapter_interrupt(&gib_alert_irq)) {
+               pr_err("Registering the GIB alert interruption handler failed\n");
+               rc = -EIO;
+               goto out_free_gib;
+       }
+
+       gib->nisc = nisc;
+       if (chsc_sgib((u32)(u64)gib)) {
+               pr_err("Associating the GIB with the AIV facility failed\n");
+               free_page((unsigned long)gib);
+               gib = NULL;
+               rc = -EIO;
+               goto out_unreg_gal;
+       }
+
+       KVM_EVENT(3, "gib 0x%pK (nisc=%d) initialized", gib, gib->nisc);
+       goto out;
+
+out_unreg_gal:
+       unregister_adapter_interrupt(&gib_alert_irq);
+out_free_gib:
+       free_page((unsigned long)gib);
+       gib = NULL;
+out:
+       return rc;
 }
index 7f4bc58a53b976e2d7c2b89f1e4d5643f28fd5d6..4638303ba6a858793eded0e331b67aeea67db5d0 100644 (file)
@@ -432,11 +432,18 @@ int kvm_arch_init(void *opaque)
        /* Register floating interrupt controller interface. */
        rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
        if (rc) {
-               pr_err("Failed to register FLIC rc=%d\n", rc);
+               pr_err("A FLIC registration call failed with rc=%d\n", rc);
                goto out_debug_unreg;
        }
+
+       rc = kvm_s390_gib_init(GAL_ISC);
+       if (rc)
+               goto out_gib_destroy;
+
        return 0;
 
+out_gib_destroy:
+       kvm_s390_gib_destroy();
 out_debug_unreg:
        debug_unregister(kvm_s390_dbf);
        return rc;
@@ -444,6 +451,7 @@ out_debug_unreg:
 
 void kvm_arch_exit(void)
 {
+       kvm_s390_gib_destroy();
        debug_unregister(kvm_s390_dbf);
 }
 
@@ -1258,11 +1266,65 @@ static int kvm_s390_set_processor_feat(struct kvm *kvm,
 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
                                          struct kvm_device_attr *attr)
 {
-       /*
-        * Once supported by kernel + hw, we have to store the subfunctions
-        * in kvm->arch and remember that user space configured them.
-        */
-       return -ENXIO;
+       mutex_lock(&kvm->lock);
+       if (kvm->created_vcpus) {
+               mutex_unlock(&kvm->lock);
+               return -EBUSY;
+       }
+
+       if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
+                          sizeof(struct kvm_s390_vm_cpu_subfunc))) {
+               mutex_unlock(&kvm->lock);
+               return -EFAULT;
+       }
+       mutex_unlock(&kvm->lock);
+
+       VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
+                ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
+                ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
+                ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
+                ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
+       VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
+                ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
+                ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
+       VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
+                ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
+                ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
+       VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
+                ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
+                ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
+       VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
+                ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
+                ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
+       VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
+                ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
+                ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
+       VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
+                ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
+                ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
+       VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
+                ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
+                ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
+       VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
+                ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
+                ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
+       VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
+                ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
+                ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
+       VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
+                ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
+                ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
+       VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
+                ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
+                ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
+       VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
+                ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
+                ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
+       VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
+                ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
+                ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
+
+       return 0;
 }
 
 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
@@ -1381,12 +1443,56 @@ static int kvm_s390_get_machine_feat(struct kvm *kvm,
 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
                                          struct kvm_device_attr *attr)
 {
-       /*
-        * Once we can actually configure subfunctions (kernel + hw support),
-        * we have to check if they were already set by user space, if so copy
-        * them from kvm->arch.
-        */
-       return -ENXIO;
+       if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
+           sizeof(struct kvm_s390_vm_cpu_subfunc)))
+               return -EFAULT;
+
+       VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
+                ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
+                ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
+                ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
+                ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
+       VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
+                ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
+                ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
+       VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
+                ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
+                ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
+       VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
+                ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
+                ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
+       VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
+                ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
+                ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
+       VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
+                ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
+                ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
+       VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
+                ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
+                ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
+       VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
+                ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
+                ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
+       VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
+                ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
+                ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
+       VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
+                ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
+                ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
+       VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
+                ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
+                ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
+       VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
+                ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
+                ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
+       VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
+                ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
+                ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
+       VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
+                ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
+                ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
+
+       return 0;
 }
 
 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
@@ -1395,8 +1501,55 @@ static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
        if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
            sizeof(struct kvm_s390_vm_cpu_subfunc)))
                return -EFAULT;
+
+       VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
+                ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
+                ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
+                ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
+                ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
+       VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
+                ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
+                ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
+       VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
+                ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
+                ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
+       VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
+                ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
+                ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
+       VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
+                ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
+                ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
+       VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
+                ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
+                ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
+       VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
+                ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
+                ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
+       VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
+                ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
+                ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
+       VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
+                ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
+                ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
+       VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
+                ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
+                ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
+       VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
+                ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
+                ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
+       VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
+                ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
+                ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
+       VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
+                ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
+                ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
+       VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
+                ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
+                ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
+
        return 0;
 }
+
 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 {
        int ret = -ENXIO;
@@ -1514,10 +1667,9 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
                case KVM_S390_VM_CPU_PROCESSOR_FEAT:
                case KVM_S390_VM_CPU_MACHINE_FEAT:
                case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
+               case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
                        ret = 0;
                        break;
-               /* configuring subfunctions is not supported yet */
-               case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
                default:
                        ret = -ENXIO;
                        break;
@@ -2209,6 +2361,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
        if (!kvm->arch.sie_page2)
                goto out_err;
 
+       kvm->arch.sie_page2->kvm = kvm;
        kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
 
        for (i = 0; i < kvm_s390_fac_size(); i++) {
@@ -2218,6 +2371,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
                kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
                                              kvm_s390_fac_base[i];
        }
+       kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
 
        /* we are always in czam mode - even on pre z14 machines */
        set_kvm_facility(kvm->arch.model.fac_mask, 138);
@@ -2812,7 +2966,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 
        vcpu->arch.sie_block->icpua = id;
        spin_lock_init(&vcpu->arch.local_int.lock);
-       vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa;
+       vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa_int.origin;
        if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
                vcpu->arch.sie_block->gd |= GISA_FORMAT1;
        seqcount_init(&vcpu->arch.cputm_seqcount);
@@ -3458,6 +3612,8 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
                kvm_s390_patch_guest_per_regs(vcpu);
        }
 
+       clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask);
+
        vcpu->arch.sie_block->icptcode = 0;
        cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
        VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
@@ -4293,12 +4449,12 @@ static int __init kvm_s390_init(void)
        int i;
 
        if (!sclp.has_sief2) {
-               pr_info("SIE not available\n");
+               pr_info("SIE is not available\n");
                return -ENODEV;
        }
 
        if (nested && hpage) {
-               pr_info("nested (vSIE) and hpage (huge page backing) can currently not be activated concurrently");
+               pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
                return -EINVAL;
        }
 
index 1f6e36cdce0da88f8f0e54a772011e7080d2723c..6d9448dbd052bd1ae6f3267176d2921cff8227e7 100644 (file)
@@ -67,7 +67,7 @@ static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu)
 
 static inline int is_vcpu_idle(struct kvm_vcpu *vcpu)
 {
-       return test_bit(vcpu->vcpu_id, vcpu->kvm->arch.float_int.idle_mask);
+       return test_bit(vcpu->vcpu_id, vcpu->kvm->arch.idle_mask);
 }
 
 static inline int kvm_is_ucontrol(struct kvm *kvm)
@@ -381,6 +381,8 @@ int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu,
 void kvm_s390_gisa_init(struct kvm *kvm);
 void kvm_s390_gisa_clear(struct kvm *kvm);
 void kvm_s390_gisa_destroy(struct kvm *kvm);
+int kvm_s390_gib_init(u8 nisc);
+void kvm_s390_gib_destroy(void);
 
 /* implemented in guestdbg.c */
 void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu);
index a4a41421c5e2a005cca783cfd652e41076b65a93..aca09be2373e77245ec9adfccdb540342a4d0bb9 100644 (file)
@@ -938,7 +938,7 @@ static int ubd_add(int n, char **error_out)
        ubd_dev->queue = blk_mq_init_queue(&ubd_dev->tag_set);
        if (IS_ERR(ubd_dev->queue)) {
                err = PTR_ERR(ubd_dev->queue);
-               goto out_cleanup;
+               goto out_cleanup_tags;
        }
 
        ubd_dev->queue->queuedata = ubd_dev;
@@ -968,8 +968,8 @@ out:
 
 out_cleanup_tags:
        blk_mq_free_tag_set(&ubd_dev->tag_set);
-out_cleanup:
-       blk_cleanup_queue(ubd_dev->queue);
+       if (!(IS_ERR(ubd_dev->queue)))
+               blk_cleanup_queue(ubd_dev->queue);
        goto out;
 }
 
index d2c17dd746204a4147b5112cb5cef344ea08f47d..b3f7b3ca896dd268391e139731cd2823efdf3488 100644 (file)
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
-#include <sys/types.h>
 #include <sys/socket.h>
 #include <net/ethernet.h>
 #include <netinet/ip.h>
 #include <netinet/ether.h>
 #include <linux/if_ether.h>
 #include <linux/if_packet.h>
-#include <sys/socket.h>
 #include <sys/wait.h>
 #include <sys/uio.h>
 #include <linux/virtio_net.h>
@@ -31,7 +29,6 @@
 #include <stdlib.h>
 #include <os.h>
 #include <um_malloc.h>
-#include <sys/uio.h>
 #include "vector_user.h"
 
 #define ID_GRE 0
index 8da78595d69dbf24133c1fb84ed22272f685f733..1f9607ed087c0b999b87febb7b8b6f796eb91095 100644 (file)
 421    i386    rt_sigtimedwait_time64  sys_rt_sigtimedwait             __ia32_compat_sys_rt_sigtimedwait_time64
 422    i386    futex_time64            sys_futex                       __ia32_sys_futex
 423    i386    sched_rr_get_interval_time64    sys_sched_rr_get_interval       __ia32_sys_sched_rr_get_interval
+424    i386    pidfd_send_signal       sys_pidfd_send_signal           __ia32_sys_pidfd_send_signal
 425    i386    io_uring_setup          sys_io_uring_setup              __ia32_sys_io_uring_setup
 426    i386    io_uring_enter          sys_io_uring_enter              __ia32_sys_io_uring_enter
 427    i386    io_uring_register       sys_io_uring_register           __ia32_sys_io_uring_register
index c768447f97ec6818d874ced4955cfa0f3229104c..92ee0b4378d4c23b9ac3d22d92d4637efb187f94 100644 (file)
 334    common  rseq                    __x64_sys_rseq
 # don't use numbers 387 through 423, add new calls after the last
 # 'common' entry
+424    common  pidfd_send_signal       __x64_sys_pidfd_send_signal
 425    common  io_uring_setup          __x64_sys_io_uring_setup
 426    common  io_uring_enter          __x64_sys_io_uring_enter
 427    common  io_uring_register       __x64_sys_io_uring_register
index 180373360e34256ef3a3bba30ada81f7df9d27ed..a5db4475e72db63031284ecb986c445d7c835eb8 100644 (file)
@@ -35,6 +35,7 @@
 #include <asm/msr-index.h>
 #include <asm/asm.h>
 #include <asm/kvm_page_track.h>
+#include <asm/kvm_vcpu_regs.h>
 #include <asm/hyperv-tlfs.h>
 
 #define KVM_MAX_VCPUS 288
@@ -137,23 +138,23 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
 #define ASYNC_PF_PER_VCPU 64
 
 enum kvm_reg {
-       VCPU_REGS_RAX = 0,
-       VCPU_REGS_RCX = 1,
-       VCPU_REGS_RDX = 2,
-       VCPU_REGS_RBX = 3,
-       VCPU_REGS_RSP = 4,
-       VCPU_REGS_RBP = 5,
-       VCPU_REGS_RSI = 6,
-       VCPU_REGS_RDI = 7,
+       VCPU_REGS_RAX = __VCPU_REGS_RAX,
+       VCPU_REGS_RCX = __VCPU_REGS_RCX,
+       VCPU_REGS_RDX = __VCPU_REGS_RDX,
+       VCPU_REGS_RBX = __VCPU_REGS_RBX,
+       VCPU_REGS_RSP = __VCPU_REGS_RSP,
+       VCPU_REGS_RBP = __VCPU_REGS_RBP,
+       VCPU_REGS_RSI = __VCPU_REGS_RSI,
+       VCPU_REGS_RDI = __VCPU_REGS_RDI,
 #ifdef CONFIG_X86_64
-       VCPU_REGS_R8 8,
-       VCPU_REGS_R9 9,
-       VCPU_REGS_R10 = 10,
-       VCPU_REGS_R11 = 11,
-       VCPU_REGS_R12 = 12,
-       VCPU_REGS_R13 = 13,
-       VCPU_REGS_R14 = 14,
-       VCPU_REGS_R15 = 15,
+       VCPU_REGS_R8  = __VCPU_REGS_R8,
+       VCPU_REGS_R9  = __VCPU_REGS_R9,
+       VCPU_REGS_R10 = __VCPU_REGS_R10,
+       VCPU_REGS_R11 = __VCPU_REGS_R11,
+       VCPU_REGS_R12 = __VCPU_REGS_R12,
+       VCPU_REGS_R13 = __VCPU_REGS_R13,
+       VCPU_REGS_R14 = __VCPU_REGS_R14,
+       VCPU_REGS_R15 = __VCPU_REGS_R15,
 #endif
        VCPU_REGS_RIP,
        NR_VCPU_REGS
@@ -319,6 +320,7 @@ struct kvm_mmu_page {
        struct list_head link;
        struct hlist_node hash_link;
        bool unsync;
+       bool mmio_cached;
 
        /*
         * The following two entries are used to key the shadow page in the
@@ -333,10 +335,6 @@ struct kvm_mmu_page {
        int root_count;          /* Currently serving as active root */
        unsigned int unsync_children;
        struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */
-
-       /* The page is obsolete if mmu_valid_gen != kvm->arch.mmu_valid_gen.  */
-       unsigned long mmu_valid_gen;
-
        DECLARE_BITMAP(unsync_child_bitmap, 512);
 
 #ifdef CONFIG_X86_32
@@ -848,13 +846,11 @@ struct kvm_arch {
        unsigned int n_requested_mmu_pages;
        unsigned int n_max_mmu_pages;
        unsigned int indirect_shadow_pages;
-       unsigned long mmu_valid_gen;
        struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
        /*
         * Hash table of struct kvm_mmu_page.
         */
        struct list_head active_mmu_pages;
-       struct list_head zapped_obsolete_pages;
        struct kvm_page_track_notifier_node mmu_sp_tracker;
        struct kvm_page_track_notifier_head track_notifier_head;
 
@@ -1255,7 +1251,7 @@ void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
                                   struct kvm_memory_slot *slot,
                                   gfn_t gfn_offset, unsigned long mask);
 void kvm_mmu_zap_all(struct kvm *kvm);
-void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, struct kvm_memslots *slots);
+void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen);
 unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
 void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
 
diff --git a/arch/x86/include/asm/kvm_vcpu_regs.h b/arch/x86/include/asm/kvm_vcpu_regs.h
new file mode 100644 (file)
index 0000000..1af2cb5
--- /dev/null
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_KVM_VCPU_REGS_H
+#define _ASM_X86_KVM_VCPU_REGS_H
+
+#define __VCPU_REGS_RAX  0
+#define __VCPU_REGS_RCX  1
+#define __VCPU_REGS_RDX  2
+#define __VCPU_REGS_RBX  3
+#define __VCPU_REGS_RSP  4
+#define __VCPU_REGS_RBP  5
+#define __VCPU_REGS_RSI  6
+#define __VCPU_REGS_RDI  7
+
+#ifdef CONFIG_X86_64
+#define __VCPU_REGS_R8   8
+#define __VCPU_REGS_R9   9
+#define __VCPU_REGS_R10 10
+#define __VCPU_REGS_R11 11
+#define __VCPU_REGS_R12 12
+#define __VCPU_REGS_R13 13
+#define __VCPU_REGS_R14 14
+#define __VCPU_REGS_R15 15
+#endif
+
+#endif /* _ASM_X86_KVM_VCPU_REGS_H */
index e811d4d1c824718acd6b4bef7acba2565290bcd3..904494b924c13bffdf667776cc0a514cf8c3b8d1 100644 (file)
@@ -104,12 +104,8 @@ static u64 kvm_sched_clock_read(void)
 
 static inline void kvm_sched_clock_init(bool stable)
 {
-       if (!stable) {
-               pv_ops.time.sched_clock = kvm_clock_read;
+       if (!stable)
                clear_sched_clock_stable();
-               return;
-       }
-
        kvm_sched_clock_offset = kvm_clock_read();
        pv_ops.time.sched_clock = kvm_sched_clock_read;
 
@@ -355,6 +351,20 @@ void __init kvmclock_init(void)
        machine_ops.crash_shutdown  = kvm_crash_shutdown;
 #endif
        kvm_get_preset_lpj();
+
+       /*
+        * X86_FEATURE_NONSTOP_TSC is TSC runs at constant rate
+        * with P/T states and does not stop in deep C-states.
+        *
+        * Invariant TSC exposed by host means kvmclock is not necessary:
+        * can use TSC as clocksource.
+        *
+        */
+       if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) &&
+           boot_cpu_has(X86_FEATURE_NONSTOP_TSC) &&
+           !check_tsc_unstable())
+               kvm_clock.rating = 299;
+
        clocksource_register_hz(&kvm_clock, NSEC_PER_SEC);
        pv_info.name = "KVM";
 }
index c07958b59f5051d525ddf3e110412f9377ac5e96..fd3951638ae45aebcc2c9ecaa0ee5e100b41e741 100644 (file)
@@ -405,7 +405,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
                F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ |
                F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
                F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
-               F(CLDEMOTE);
+               F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B);
 
        /* cpuid 7.0.edx*/
        const u32 kvm_cpuid_7_0_edx_x86_features =
index 89d20ed1d2e8bf7abe753adba301ef2b31ae8398..27c43525a05f1afabeb705b27f955eba5fe5356d 100644 (file)
@@ -1729,7 +1729,7 @@ static int kvm_hv_eventfd_assign(struct kvm *kvm, u32 conn_id, int fd)
 
        mutex_lock(&hv->hv_lock);
        ret = idr_alloc(&hv->conn_to_evt, eventfd, conn_id, conn_id + 1,
-                       GFP_KERNEL);
+                       GFP_KERNEL_ACCOUNT);
        mutex_unlock(&hv->hv_lock);
 
        if (ret >= 0)
index af192895b1fc633e9b2922c587862d1cbb41efd7..4a6dc54cc12becf739afb3bb468b1e0d9c4c52d1 100644 (file)
@@ -653,7 +653,7 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
        pid_t pid_nr;
        int ret;
 
-       pit = kzalloc(sizeof(struct kvm_pit), GFP_KERNEL);
+       pit = kzalloc(sizeof(struct kvm_pit), GFP_KERNEL_ACCOUNT);
        if (!pit)
                return NULL;
 
index bdcd4139eca9233bbd9e82615a1ed3c45c2ad060..8b38bb4868a65defc9143776ab96b19744be2fa1 100644 (file)
@@ -583,7 +583,7 @@ int kvm_pic_init(struct kvm *kvm)
        struct kvm_pic *s;
        int ret;
 
-       s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL);
+       s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL_ACCOUNT);
        if (!s)
                return -ENOMEM;
        spin_lock_init(&s->lock);
index 4e822ad363f37f613d14ab94f35609bcf3539bf7..1add1bc881e22418ff06e4c375aaca1a4a8b274c 100644 (file)
@@ -622,7 +622,7 @@ int kvm_ioapic_init(struct kvm *kvm)
        struct kvm_ioapic *ioapic;
        int ret;
 
-       ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL);
+       ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL_ACCOUNT);
        if (!ioapic)
                return -ENOMEM;
        spin_lock_init(&ioapic->lock);
index 4b6c2da7265c88f8f530eb026ba6b0e950eac51e..991fdf7fc17fbd9e1a4cab99d688a7af820d397c 100644 (file)
@@ -181,7 +181,8 @@ static void recalculate_apic_map(struct kvm *kvm)
                        max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic));
 
        new = kvzalloc(sizeof(struct kvm_apic_map) +
-                          sizeof(struct kvm_lapic *) * ((u64)max_id + 1), GFP_KERNEL);
+                          sizeof(struct kvm_lapic *) * ((u64)max_id + 1),
+                          GFP_KERNEL_ACCOUNT);
 
        if (!new)
                goto out;
@@ -2259,13 +2260,13 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
        ASSERT(vcpu != NULL);
        apic_debug("apic_init %d\n", vcpu->vcpu_id);
 
-       apic = kzalloc(sizeof(*apic), GFP_KERNEL);
+       apic = kzalloc(sizeof(*apic), GFP_KERNEL_ACCOUNT);
        if (!apic)
                goto nomem;
 
        vcpu->arch.apic = apic;
 
-       apic->regs = (void *)get_zeroed_page(GFP_KERNEL);
+       apic->regs = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
        if (!apic->regs) {
                printk(KERN_ERR "malloc apic regs error for vcpu %x\n",
                       vcpu->vcpu_id);
index f2d1d230d5b8421827aa447984fb271360bfec00..7837ab001d806f2f3ffd3a56e30d1bae916f7b03 100644 (file)
@@ -109,9 +109,11 @@ module_param(dbg, bool, 0644);
        (((address) >> PT32_LEVEL_SHIFT(level)) & ((1 << PT32_LEVEL_BITS) - 1))
 
 
-#define PT64_BASE_ADDR_MASK __sme_clr((((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1)))
-#define PT64_DIR_BASE_ADDR_MASK \
-       (PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + PT64_LEVEL_BITS)) - 1))
+#ifdef CONFIG_DYNAMIC_PHYSICAL_MASK
+#define PT64_BASE_ADDR_MASK (physical_mask & ~(u64)(PAGE_SIZE-1))
+#else
+#define PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1))
+#endif
 #define PT64_LVL_ADDR_MASK(level) \
        (PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + (((level) - 1) \
                                                * PT64_LEVEL_BITS))) - 1))
@@ -330,53 +332,56 @@ static inline bool is_access_track_spte(u64 spte)
 }
 
 /*
- * the low bit of the generation number is always presumed to be zero.
- * This disables mmio caching during memslot updates.  The concept is
- * similar to a seqcount but instead of retrying the access we just punt
- * and ignore the cache.
+ * Due to limited space in PTEs, the MMIO generation is a 19 bit subset of
+ * the memslots generation and is derived as follows:
  *
- * spte bits 3-11 are used as bits 1-9 of the generation number,
- * the bits 52-61 are used as bits 10-19 of the generation number.
+ * Bits 0-8 of the MMIO generation are propagated to spte bits 3-11
+ * Bits 9-18 of the MMIO generation are propagated to spte bits 52-61
+ *
+ * The KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS flag is intentionally not included in
+ * the MMIO generation number, as doing so would require stealing a bit from
+ * the "real" generation number and thus effectively halve the maximum number
+ * of MMIO generations that can be handled before encountering a wrap (which
+ * requires a full MMU zap).  The flag is instead explicitly queried when
+ * checking for MMIO spte cache hits.
  */
-#define MMIO_SPTE_GEN_LOW_SHIFT                2
-#define MMIO_SPTE_GEN_HIGH_SHIFT       52
+#define MMIO_SPTE_GEN_MASK             GENMASK_ULL(18, 0)
 
-#define MMIO_GEN_SHIFT                 20
-#define MMIO_GEN_LOW_SHIFT             10
-#define MMIO_GEN_LOW_MASK              ((1 << MMIO_GEN_LOW_SHIFT) - 2)
-#define MMIO_GEN_MASK                  ((1 << MMIO_GEN_SHIFT) - 1)
+#define MMIO_SPTE_GEN_LOW_START                3
+#define MMIO_SPTE_GEN_LOW_END          11
+#define MMIO_SPTE_GEN_LOW_MASK         GENMASK_ULL(MMIO_SPTE_GEN_LOW_END, \
+                                                   MMIO_SPTE_GEN_LOW_START)
 
-static u64 generation_mmio_spte_mask(unsigned int gen)
+#define MMIO_SPTE_GEN_HIGH_START       52
+#define MMIO_SPTE_GEN_HIGH_END         61
+#define MMIO_SPTE_GEN_HIGH_MASK                GENMASK_ULL(MMIO_SPTE_GEN_HIGH_END, \
+                                                   MMIO_SPTE_GEN_HIGH_START)
+static u64 generation_mmio_spte_mask(u64 gen)
 {
        u64 mask;
 
-       WARN_ON(gen & ~MMIO_GEN_MASK);
+       WARN_ON(gen & ~MMIO_SPTE_GEN_MASK);
 
-       mask = (gen & MMIO_GEN_LOW_MASK) << MMIO_SPTE_GEN_LOW_SHIFT;
-       mask |= ((u64)gen >> MMIO_GEN_LOW_SHIFT) << MMIO_SPTE_GEN_HIGH_SHIFT;
+       mask = (gen << MMIO_SPTE_GEN_LOW_START) & MMIO_SPTE_GEN_LOW_MASK;
+       mask |= (gen << MMIO_SPTE_GEN_HIGH_START) & MMIO_SPTE_GEN_HIGH_MASK;
        return mask;
 }
 
-static unsigned int get_mmio_spte_generation(u64 spte)
+static u64 get_mmio_spte_generation(u64 spte)
 {
-       unsigned int gen;
+       u64 gen;
 
        spte &= ~shadow_mmio_mask;
 
-       gen = (spte >> MMIO_SPTE_GEN_LOW_SHIFT) & MMIO_GEN_LOW_MASK;
-       gen |= (spte >> MMIO_SPTE_GEN_HIGH_SHIFT) << MMIO_GEN_LOW_SHIFT;
+       gen = (spte & MMIO_SPTE_GEN_LOW_MASK) >> MMIO_SPTE_GEN_LOW_START;
+       gen |= (spte & MMIO_SPTE_GEN_HIGH_MASK) >> MMIO_SPTE_GEN_HIGH_START;
        return gen;
 }
 
-static unsigned int kvm_current_mmio_generation(struct kvm_vcpu *vcpu)
-{
-       return kvm_vcpu_memslots(vcpu)->generation & MMIO_GEN_MASK;
-}
-
 static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn,
                           unsigned access)
 {
-       unsigned int gen = kvm_current_mmio_generation(vcpu);
+       u64 gen = kvm_vcpu_memslots(vcpu)->generation & MMIO_SPTE_GEN_MASK;
        u64 mask = generation_mmio_spte_mask(gen);
        u64 gpa = gfn << PAGE_SHIFT;
 
@@ -386,6 +391,8 @@ static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn,
        mask |= (gpa & shadow_nonpresent_or_rsvd_mask)
                << shadow_nonpresent_or_rsvd_mask_len;
 
+       page_header(__pa(sptep))->mmio_cached = true;
+
        trace_mark_mmio_spte(sptep, gfn, access, gen);
        mmu_spte_set(sptep, mask);
 }
@@ -407,7 +414,7 @@ static gfn_t get_mmio_spte_gfn(u64 spte)
 
 static unsigned get_mmio_spte_access(u64 spte)
 {
-       u64 mask = generation_mmio_spte_mask(MMIO_GEN_MASK) | shadow_mmio_mask;
+       u64 mask = generation_mmio_spte_mask(MMIO_SPTE_GEN_MASK) | shadow_mmio_mask;
        return (spte & ~mask) & ~PAGE_MASK;
 }
 
@@ -424,9 +431,13 @@ static bool set_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn,
 
 static bool check_mmio_spte(struct kvm_vcpu *vcpu, u64 spte)
 {
-       unsigned int kvm_gen, spte_gen;
+       u64 kvm_gen, spte_gen, gen;
 
-       kvm_gen = kvm_current_mmio_generation(vcpu);
+       gen = kvm_vcpu_memslots(vcpu)->generation;
+       if (unlikely(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS))
+               return false;
+
+       kvm_gen = gen & MMIO_SPTE_GEN_MASK;
        spte_gen = get_mmio_spte_generation(spte);
 
        trace_check_mmio_spte(spte, kvm_gen, spte_gen);
@@ -959,7 +970,7 @@ static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
        if (cache->nobjs >= min)
                return 0;
        while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
-               obj = kmem_cache_zalloc(base_cache, GFP_KERNEL);
+               obj = kmem_cache_zalloc(base_cache, GFP_KERNEL_ACCOUNT);
                if (!obj)
                        return cache->nobjs >= min ? 0 : -ENOMEM;
                cache->objects[cache->nobjs++] = obj;
@@ -2049,12 +2060,6 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, int direct
        if (!direct)
                sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache);
        set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
-
-       /*
-        * The active_mmu_pages list is the FIFO list, do not move the
-        * page until it is zapped. kvm_zap_obsolete_pages depends on
-        * this feature. See the comments in kvm_zap_obsolete_pages().
-        */
        list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
        kvm_mod_used_mmu_pages(vcpu->kvm, +1);
        return sp;
@@ -2195,23 +2200,15 @@ static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
        --kvm->stat.mmu_unsync;
 }
 
-static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
-                                   struct list_head *invalid_list);
+static bool kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
+                                    struct list_head *invalid_list);
 static void kvm_mmu_commit_zap_page(struct kvm *kvm,
                                    struct list_head *invalid_list);
 
-/*
- * NOTE: we should pay more attention on the zapped-obsolete page
- * (is_obsolete_sp(sp) && sp->role.invalid) when you do hash list walk
- * since it has been deleted from active_mmu_pages but still can be found
- * at hast list.
- *
- * for_each_valid_sp() has skipped that kind of pages.
- */
 #define for_each_valid_sp(_kvm, _sp, _gfn)                             \
        hlist_for_each_entry(_sp,                                       \
          &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \
-               if (is_obsolete_sp((_kvm), (_sp)) || (_sp)->role.invalid) {    \
+               if ((_sp)->role.invalid) {    \
                } else
 
 #define for_each_gfn_indirect_valid_sp(_kvm, _sp, _gfn)                        \
@@ -2231,18 +2228,28 @@ static bool __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
        return true;
 }
 
+static bool kvm_mmu_remote_flush_or_zap(struct kvm *kvm,
+                                       struct list_head *invalid_list,
+                                       bool remote_flush)
+{
+       if (!remote_flush && !list_empty(invalid_list))
+               return false;
+
+       if (!list_empty(invalid_list))
+               kvm_mmu_commit_zap_page(kvm, invalid_list);
+       else
+               kvm_flush_remote_tlbs(kvm);
+       return true;
+}
+
 static void kvm_mmu_flush_or_zap(struct kvm_vcpu *vcpu,
                                 struct list_head *invalid_list,
                                 bool remote_flush, bool local_flush)
 {
-       if (!list_empty(invalid_list)) {
-               kvm_mmu_commit_zap_page(vcpu->kvm, invalid_list);
+       if (kvm_mmu_remote_flush_or_zap(vcpu->kvm, invalid_list, remote_flush))
                return;
-       }
 
-       if (remote_flush)
-               kvm_flush_remote_tlbs(vcpu->kvm);
-       else if (local_flush)
+       if (local_flush)
                kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
 }
 
@@ -2253,11 +2260,6 @@ static void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) { }
 static void mmu_audit_disable(void) { }
 #endif
 
-static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
-{
-       return unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen);
-}
-
 static bool kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
                         struct list_head *invalid_list)
 {
@@ -2482,7 +2484,6 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
                if (level > PT_PAGE_TABLE_LEVEL && need_sync)
                        flush |= kvm_sync_pages(vcpu, gfn, &invalid_list);
        }
-       sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen;
        clear_page(sp->spt);
        trace_kvm_mmu_get_page(sp, true);
 
@@ -2668,17 +2669,22 @@ static int mmu_zap_unsync_children(struct kvm *kvm,
        return zapped;
 }
 
-static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
-                                   struct list_head *invalid_list)
+static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm,
+                                      struct kvm_mmu_page *sp,
+                                      struct list_head *invalid_list,
+                                      int *nr_zapped)
 {
-       int ret;
+       bool list_unstable;
 
        trace_kvm_mmu_prepare_zap_page(sp);
        ++kvm->stat.mmu_shadow_zapped;
-       ret = mmu_zap_unsync_children(kvm, sp, invalid_list);
+       *nr_zapped = mmu_zap_unsync_children(kvm, sp, invalid_list);
        kvm_mmu_page_unlink_children(kvm, sp);
        kvm_mmu_unlink_parents(kvm, sp);
 
+       /* Zapping children means active_mmu_pages has become unstable. */
+       list_unstable = *nr_zapped;
+
        if (!sp->role.invalid && !sp->role.direct)
                unaccount_shadowed(kvm, sp);
 
@@ -2686,22 +2692,27 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
                kvm_unlink_unsync_page(kvm, sp);
        if (!sp->root_count) {
                /* Count self */
-               ret++;
+               (*nr_zapped)++;
                list_move(&sp->link, invalid_list);
                kvm_mod_used_mmu_pages(kvm, -1);
        } else {
                list_move(&sp->link, &kvm->arch.active_mmu_pages);
 
-               /*
-                * The obsolete pages can not be used on any vcpus.
-                * See the comments in kvm_mmu_invalidate_zap_all_pages().
-                */
-               if (!sp->role.invalid && !is_obsolete_sp(kvm, sp))
+               if (!sp->role.invalid)
                        kvm_reload_remote_mmus(kvm);
        }
 
        sp->role.invalid = 1;
-       return ret;
+       return list_unstable;
+}
+
+static bool kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
+                                    struct list_head *invalid_list)
+{
+       int nr_zapped;
+
+       __kvm_mmu_prepare_zap_page(kvm, sp, invalid_list, &nr_zapped);
+       return nr_zapped;
 }
 
 static void kvm_mmu_commit_zap_page(struct kvm *kvm,
@@ -3703,7 +3714,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 
                        u64 *lm_root;
 
-                       lm_root = (void*)get_zeroed_page(GFP_KERNEL);
+                       lm_root = (void*)get_zeroed_page(GFP_KERNEL_ACCOUNT);
                        if (lm_root == NULL)
                                return 1;
 
@@ -4204,14 +4215,6 @@ static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3,
                        return false;
 
                if (cached_root_available(vcpu, new_cr3, new_role)) {
-                       /*
-                        * It is possible that the cached previous root page is
-                        * obsolete because of a change in the MMU
-                        * generation number. However, that is accompanied by
-                        * KVM_REQ_MMU_RELOAD, which will free the root that we
-                        * have set here and allocate a new one.
-                        */
-
                        kvm_make_request(KVM_REQ_LOAD_CR3, vcpu);
                        if (!skip_tlb_flush) {
                                kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
@@ -5486,6 +5489,76 @@ void kvm_disable_tdp(void)
 }
 EXPORT_SYMBOL_GPL(kvm_disable_tdp);
 
+
+/* The return value indicates if tlb flush on all vcpus is needed. */
+typedef bool (*slot_level_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head);
+
+/* The caller should hold mmu-lock before calling this function. */
+static __always_inline bool
+slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot,
+                       slot_level_handler fn, int start_level, int end_level,
+                       gfn_t start_gfn, gfn_t end_gfn, bool lock_flush_tlb)
+{
+       struct slot_rmap_walk_iterator iterator;
+       bool flush = false;
+
+       for_each_slot_rmap_range(memslot, start_level, end_level, start_gfn,
+                       end_gfn, &iterator) {
+               if (iterator.rmap)
+                       flush |= fn(kvm, iterator.rmap);
+
+               if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
+                       if (flush && lock_flush_tlb) {
+                               kvm_flush_remote_tlbs(kvm);
+                               flush = false;
+                       }
+                       cond_resched_lock(&kvm->mmu_lock);
+               }
+       }
+
+       if (flush && lock_flush_tlb) {
+               kvm_flush_remote_tlbs(kvm);
+               flush = false;
+       }
+
+       return flush;
+}
+
+static __always_inline bool
+slot_handle_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
+                 slot_level_handler fn, int start_level, int end_level,
+                 bool lock_flush_tlb)
+{
+       return slot_handle_level_range(kvm, memslot, fn, start_level,
+                       end_level, memslot->base_gfn,
+                       memslot->base_gfn + memslot->npages - 1,
+                       lock_flush_tlb);
+}
+
+static __always_inline bool
+slot_handle_all_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
+                     slot_level_handler fn, bool lock_flush_tlb)
+{
+       return slot_handle_level(kvm, memslot, fn, PT_PAGE_TABLE_LEVEL,
+                                PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb);
+}
+
+static __always_inline bool
+slot_handle_large_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
+                       slot_level_handler fn, bool lock_flush_tlb)
+{
+       return slot_handle_level(kvm, memslot, fn, PT_PAGE_TABLE_LEVEL + 1,
+                                PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb);
+}
+
+static __always_inline bool
+slot_handle_leaf(struct kvm *kvm, struct kvm_memory_slot *memslot,
+                slot_level_handler fn, bool lock_flush_tlb)
+{
+       return slot_handle_level(kvm, memslot, fn, PT_PAGE_TABLE_LEVEL,
+                                PT_PAGE_TABLE_LEVEL, lock_flush_tlb);
+}
+
 static void free_mmu_pages(struct kvm_vcpu *vcpu)
 {
        free_page((unsigned long)vcpu->arch.mmu->pae_root);
@@ -5505,7 +5578,7 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
         * Therefore we need to allocate shadow page tables in the first
         * 4GB of memory, which happens to fit the DMA32 zone.
         */
-       page = alloc_page(GFP_KERNEL | __GFP_DMA32);
+       page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_DMA32);
        if (!page)
                return -ENOMEM;
 
@@ -5543,105 +5616,62 @@ static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm,
                        struct kvm_memory_slot *slot,
                        struct kvm_page_track_notifier_node *node)
 {
-       kvm_mmu_invalidate_zap_all_pages(kvm);
-}
-
-void kvm_mmu_init_vm(struct kvm *kvm)
-{
-       struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
-
-       node->track_write = kvm_mmu_pte_write;
-       node->track_flush_slot = kvm_mmu_invalidate_zap_pages_in_memslot;
-       kvm_page_track_register_notifier(kvm, node);
-}
+       struct kvm_mmu_page *sp;
+       LIST_HEAD(invalid_list);
+       unsigned long i;
+       bool flush;
+       gfn_t gfn;
 
-void kvm_mmu_uninit_vm(struct kvm *kvm)
-{
-       struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
+       spin_lock(&kvm->mmu_lock);
 
-       kvm_page_track_unregister_notifier(kvm, node);
-}
+       if (list_empty(&kvm->arch.active_mmu_pages))
+               goto out_unlock;
 
-/* The return value indicates if tlb flush on all vcpus is needed. */
-typedef bool (*slot_level_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head);
+       flush = slot_handle_all_level(kvm, slot, kvm_zap_rmapp, false);
 
-/* The caller should hold mmu-lock before calling this function. */
-static __always_inline bool
-slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot,
-                       slot_level_handler fn, int start_level, int end_level,
-                       gfn_t start_gfn, gfn_t end_gfn, bool lock_flush_tlb)
-{
-       struct slot_rmap_walk_iterator iterator;
-       bool flush = false;
+       for (i = 0; i < slot->npages; i++) {
+               gfn = slot->base_gfn + i;
 
-       for_each_slot_rmap_range(memslot, start_level, end_level, start_gfn,
-                       end_gfn, &iterator) {
-               if (iterator.rmap)
-                       flush |= fn(kvm, iterator.rmap);
+               for_each_valid_sp(kvm, sp, gfn) {
+                       if (sp->gfn != gfn)
+                               continue;
 
+                       kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
+               }
                if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
-                       if (flush && lock_flush_tlb) {
-                               kvm_flush_remote_tlbs(kvm);
-                               flush = false;
-                       }
+                       kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush);
+                       flush = false;
                        cond_resched_lock(&kvm->mmu_lock);
                }
        }
+       kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush);
 
-       if (flush && lock_flush_tlb) {
-               kvm_flush_remote_tlbs(kvm);
-               flush = false;
-       }
-
-       return flush;
+out_unlock:
+       spin_unlock(&kvm->mmu_lock);
 }
 
-static __always_inline bool
-slot_handle_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
-                 slot_level_handler fn, int start_level, int end_level,
-                 bool lock_flush_tlb)
+void kvm_mmu_init_vm(struct kvm *kvm)
 {
-       return slot_handle_level_range(kvm, memslot, fn, start_level,
-                       end_level, memslot->base_gfn,
-                       memslot->base_gfn + memslot->npages - 1,
-                       lock_flush_tlb);
-}
+       struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
 
-static __always_inline bool
-slot_handle_all_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
-                     slot_level_handler fn, bool lock_flush_tlb)
-{
-       return slot_handle_level(kvm, memslot, fn, PT_PAGE_TABLE_LEVEL,
-                                PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb);
+       node->track_write = kvm_mmu_pte_write;
+       node->track_flush_slot = kvm_mmu_invalidate_zap_pages_in_memslot;
+       kvm_page_track_register_notifier(kvm, node);
 }
 
-static __always_inline bool
-slot_handle_large_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
-                       slot_level_handler fn, bool lock_flush_tlb)
+void kvm_mmu_uninit_vm(struct kvm *kvm)
 {
-       return slot_handle_level(kvm, memslot, fn, PT_PAGE_TABLE_LEVEL + 1,
-                                PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb);
-}
+       struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
 
-static __always_inline bool
-slot_handle_leaf(struct kvm *kvm, struct kvm_memory_slot *memslot,
-                slot_level_handler fn, bool lock_flush_tlb)
-{
-       return slot_handle_level(kvm, memslot, fn, PT_PAGE_TABLE_LEVEL,
-                                PT_PAGE_TABLE_LEVEL, lock_flush_tlb);
+       kvm_page_track_unregister_notifier(kvm, node);
 }
 
 void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
 {
        struct kvm_memslots *slots;
        struct kvm_memory_slot *memslot;
-       bool flush_tlb = true;
-       bool flush = false;
        int i;
 
-       if (kvm_available_flush_tlb_with_range())
-               flush_tlb = false;
-
        spin_lock(&kvm->mmu_lock);
        for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
                slots = __kvm_memslots(kvm, i);
@@ -5653,17 +5683,12 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
                        if (start >= end)
                                continue;
 
-                       flush |= slot_handle_level_range(kvm, memslot,
-                                       kvm_zap_rmapp, PT_PAGE_TABLE_LEVEL,
-                                       PT_MAX_HUGEPAGE_LEVEL, start,
-                                       end - 1, flush_tlb);
+                       slot_handle_level_range(kvm, memslot, kvm_zap_rmapp,
+                                               PT_PAGE_TABLE_LEVEL, PT_MAX_HUGEPAGE_LEVEL,
+                                               start, end - 1, true);
                }
        }
 
-       if (flush)
-               kvm_flush_remote_tlbs_with_address(kvm, gfn_start,
-                               gfn_end - gfn_start + 1);
-
        spin_unlock(&kvm->mmu_lock);
 }
 
@@ -5815,101 +5840,58 @@ void kvm_mmu_slot_set_dirty(struct kvm *kvm,
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_slot_set_dirty);
 
-#define BATCH_ZAP_PAGES        10
-static void kvm_zap_obsolete_pages(struct kvm *kvm)
+static void __kvm_mmu_zap_all(struct kvm *kvm, bool mmio_only)
 {
        struct kvm_mmu_page *sp, *node;
-       int batch = 0;
+       LIST_HEAD(invalid_list);
+       int ign;
 
+       spin_lock(&kvm->mmu_lock);
 restart:
-       list_for_each_entry_safe_reverse(sp, node,
-             &kvm->arch.active_mmu_pages, link) {
-               int ret;
-
-               /*
-                * No obsolete page exists before new created page since
-                * active_mmu_pages is the FIFO list.
-                */
-               if (!is_obsolete_sp(kvm, sp))
-                       break;
-
-               /*
-                * Since we are reversely walking the list and the invalid
-                * list will be moved to the head, skip the invalid page
-                * can help us to avoid the infinity list walking.
-                */
-               if (sp->role.invalid)
+       list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) {
+               if (mmio_only && !sp->mmio_cached)
                        continue;
-
-               /*
-                * Need not flush tlb since we only zap the sp with invalid
-                * generation number.
-                */
-               if (batch >= BATCH_ZAP_PAGES &&
-                     cond_resched_lock(&kvm->mmu_lock)) {
-                       batch = 0;
+               if (sp->role.invalid && sp->root_count)
+                       continue;
+               if (__kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, &ign)) {
+                       WARN_ON_ONCE(mmio_only);
                        goto restart;
                }
-
-               ret = kvm_mmu_prepare_zap_page(kvm, sp,
-                               &kvm->arch.zapped_obsolete_pages);
-               batch += ret;
-
-               if (ret)
+               if (cond_resched_lock(&kvm->mmu_lock))
                        goto restart;
        }
 
-       /*
-        * Should flush tlb before free page tables since lockless-walking
-        * may use the pages.
-        */
-       kvm_mmu_commit_zap_page(kvm, &kvm->arch.zapped_obsolete_pages);
-}
-
-/*
- * Fast invalidate all shadow pages and use lock-break technique
- * to zap obsolete pages.
- *
- * It's required when memslot is being deleted or VM is being
- * destroyed, in these cases, we should ensure that KVM MMU does
- * not use any resource of the being-deleted slot or all slots
- * after calling the function.
- */
-void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm)
-{
-       spin_lock(&kvm->mmu_lock);
-       trace_kvm_mmu_invalidate_zap_all_pages(kvm);
-       kvm->arch.mmu_valid_gen++;
-
-       /*
-        * Notify all vcpus to reload its shadow page table
-        * and flush TLB. Then all vcpus will switch to new
-        * shadow page table with the new mmu_valid_gen.
-        *
-        * Note: we should do this under the protection of
-        * mmu-lock, otherwise, vcpu would purge shadow page
-        * but miss tlb flush.
-        */
-       kvm_reload_remote_mmus(kvm);
-
-       kvm_zap_obsolete_pages(kvm);
+       kvm_mmu_commit_zap_page(kvm, &invalid_list);
        spin_unlock(&kvm->mmu_lock);
 }
 
-static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm)
+void kvm_mmu_zap_all(struct kvm *kvm)
 {
-       return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages));
+       return __kvm_mmu_zap_all(kvm, false);
 }
 
-void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, struct kvm_memslots *slots)
+void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen)
 {
+       WARN_ON(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS);
+
+       gen &= MMIO_SPTE_GEN_MASK;
+
        /*
-        * The very rare case: if the generation-number is round,
+        * Generation numbers are incremented in multiples of the number of
+        * address spaces in order to provide unique generations across all
+        * address spaces.  Strip what is effectively the address space
+        * modifier prior to checking for a wrap of the MMIO generation so
+        * that a wrap in any address space is detected.
+        */
+       gen &= ~((u64)KVM_ADDRESS_SPACE_NUM - 1);
+
+       /*
+        * The very rare case: if the MMIO generation number has wrapped,
         * zap all shadow pages.
         */
-       if (unlikely((slots->generation & MMIO_GEN_MASK) == 0)) {
+       if (unlikely(gen == 0)) {
                kvm_debug_ratelimited("kvm: zapping shadow pages for mmio generation wraparound\n");
-               kvm_mmu_invalidate_zap_all_pages(kvm);
+               __kvm_mmu_zap_all(kvm, true);
        }
 }
 
@@ -5940,24 +5922,16 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
                 * want to shrink a VM that only started to populate its MMU
                 * anyway.
                 */
-               if (!kvm->arch.n_used_mmu_pages &&
-                     !kvm_has_zapped_obsolete_pages(kvm))
+               if (!kvm->arch.n_used_mmu_pages)
                        continue;
 
                idx = srcu_read_lock(&kvm->srcu);
                spin_lock(&kvm->mmu_lock);
 
-               if (kvm_has_zapped_obsolete_pages(kvm)) {
-                       kvm_mmu_commit_zap_page(kvm,
-                             &kvm->arch.zapped_obsolete_pages);
-                       goto unlock;
-               }
-
                if (prepare_zap_oldest_mmu_page(kvm, &invalid_list))
                        freed++;
                kvm_mmu_commit_zap_page(kvm, &invalid_list);
 
-unlock:
                spin_unlock(&kvm->mmu_lock);
                srcu_read_unlock(&kvm->srcu, idx);
 
index c7b333147c4a6afecbbe0098ee30b715afd37e6c..bbdc60f2fae89beb34c72716d9e7eb9c33584651 100644 (file)
@@ -203,7 +203,6 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
        return -(u32)fault & errcode;
 }
 
-void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm);
 void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end);
 
 void kvm_mmu_gfn_disallow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
index c73bf4e4988cb5c84065dd324b50b93cbe12c2e1..9f6c855a00439a58f5c4ea58e099d935d65e22d9 100644 (file)
@@ -8,18 +8,16 @@
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM kvmmmu
 
-#define KVM_MMU_PAGE_FIELDS                    \
-       __field(unsigned long, mmu_valid_gen)   \
-       __field(__u64, gfn)                     \
-       __field(__u32, role)                    \
-       __field(__u32, root_count)              \
+#define KVM_MMU_PAGE_FIELDS \
+       __field(__u64, gfn) \
+       __field(__u32, role) \
+       __field(__u32, root_count) \
        __field(bool, unsync)
 
-#define KVM_MMU_PAGE_ASSIGN(sp)                                \
-       __entry->mmu_valid_gen = sp->mmu_valid_gen;     \
-       __entry->gfn = sp->gfn;                         \
-       __entry->role = sp->role.word;                  \
-       __entry->root_count = sp->root_count;           \
+#define KVM_MMU_PAGE_ASSIGN(sp)                             \
+       __entry->gfn = sp->gfn;                      \
+       __entry->role = sp->role.word;               \
+       __entry->root_count = sp->root_count;        \
        __entry->unsync = sp->unsync;
 
 #define KVM_MMU_PAGE_PRINTK() ({                                       \
@@ -31,9 +29,8 @@
                                                                        \
        role.word = __entry->role;                                      \
                                                                        \
-       trace_seq_printf(p, "sp gen %lx gfn %llx l%u%s q%u%s %s%s"      \
+       trace_seq_printf(p, "sp gfn %llx l%u%s q%u%s %s%s"              \
                         " %snxe %sad root %u %s%c",                    \
-                        __entry->mmu_valid_gen,                        \
                         __entry->gfn, role.level,                      \
                         role.cr4_pae ? " pae" : "",                    \
                         role.quadrant,                                 \
@@ -282,27 +279,6 @@ TRACE_EVENT(
        )
 );
 
-TRACE_EVENT(
-       kvm_mmu_invalidate_zap_all_pages,
-       TP_PROTO(struct kvm *kvm),
-       TP_ARGS(kvm),
-
-       TP_STRUCT__entry(
-               __field(unsigned long, mmu_valid_gen)
-               __field(unsigned int, mmu_used_pages)
-       ),
-
-       TP_fast_assign(
-               __entry->mmu_valid_gen = kvm->arch.mmu_valid_gen;
-               __entry->mmu_used_pages = kvm->arch.n_used_mmu_pages;
-       ),
-
-       TP_printk("kvm-mmu-valid-gen %lx used_pages %x",
-                 __entry->mmu_valid_gen, __entry->mmu_used_pages
-       )
-);
-
-
 TRACE_EVENT(
        check_mmio_spte,
        TP_PROTO(u64 spte, unsigned int kvm_gen, unsigned int spte_gen),
index 3052a59a30655bcadccb53ec0cd1c14dab2b7591..fd04d462fdaeec18392757fc193b3dc4f0dd85e5 100644 (file)
@@ -42,7 +42,7 @@ int kvm_page_track_create_memslot(struct kvm_memory_slot *slot,
        for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) {
                slot->arch.gfn_track[i] =
                        kvcalloc(npages, sizeof(*slot->arch.gfn_track[i]),
-                                GFP_KERNEL);
+                                GFP_KERNEL_ACCOUNT);
                if (!slot->arch.gfn_track[i])
                        goto track_free;
        }
index f13a3a24d3609e03b2bd849c0058ff41a670f2ca..b5b128a0a05124d275af1f103fe3e40315df80f5 100644 (file)
@@ -145,7 +145,6 @@ struct kvm_svm {
 
        /* Struct members for AVIC */
        u32 avic_vm_id;
-       u32 ldr_mode;
        struct page *avic_logical_id_table_page;
        struct page *avic_physical_id_table_page;
        struct hlist_node hnode;
@@ -236,6 +235,7 @@ struct vcpu_svm {
        bool nrips_enabled      : 1;
 
        u32 ldr_reg;
+       u32 dfr_reg;
        struct page *avic_backing_page;
        u64 *avic_physical_id_cache;
        bool avic_is_running;
@@ -1795,9 +1795,10 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
        /* Avoid using vmalloc for smaller buffers. */
        size = npages * sizeof(struct page *);
        if (size > PAGE_SIZE)
-               pages = vmalloc(size);
+               pages = __vmalloc(size, GFP_KERNEL_ACCOUNT | __GFP_ZERO,
+                                 PAGE_KERNEL);
        else
-               pages = kmalloc(size, GFP_KERNEL);
+               pages = kmalloc(size, GFP_KERNEL_ACCOUNT);
 
        if (!pages)
                return NULL;
@@ -1865,7 +1866,9 @@ static void __unregister_enc_region_locked(struct kvm *kvm,
 
 static struct kvm *svm_vm_alloc(void)
 {
-       struct kvm_svm *kvm_svm = vzalloc(sizeof(struct kvm_svm));
+       struct kvm_svm *kvm_svm = __vmalloc(sizeof(struct kvm_svm),
+                                           GFP_KERNEL_ACCOUNT | __GFP_ZERO,
+                                           PAGE_KERNEL);
        return &kvm_svm->kvm;
 }
 
@@ -1940,7 +1943,7 @@ static int avic_vm_init(struct kvm *kvm)
                return 0;
 
        /* Allocating physical APIC ID table (4KB) */
-       p_page = alloc_page(GFP_KERNEL);
+       p_page = alloc_page(GFP_KERNEL_ACCOUNT);
        if (!p_page)
                goto free_avic;
 
@@ -1948,7 +1951,7 @@ static int avic_vm_init(struct kvm *kvm)
        clear_page(page_address(p_page));
 
        /* Allocating logical APIC ID table (4KB) */
-       l_page = alloc_page(GFP_KERNEL);
+       l_page = alloc_page(GFP_KERNEL_ACCOUNT);
        if (!l_page)
                goto free_avic;
 
@@ -2106,6 +2109,7 @@ static int avic_init_vcpu(struct vcpu_svm *svm)
 
        INIT_LIST_HEAD(&svm->ir_list);
        spin_lock_init(&svm->ir_list_lock);
+       svm->dfr_reg = APIC_DFR_FLAT;
 
        return ret;
 }
@@ -2119,13 +2123,14 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
        struct page *nested_msrpm_pages;
        int err;
 
-       svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
+       svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL_ACCOUNT);
        if (!svm) {
                err = -ENOMEM;
                goto out;
        }
 
-       svm->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache, GFP_KERNEL);
+       svm->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache,
+                                                    GFP_KERNEL_ACCOUNT);
        if (!svm->vcpu.arch.guest_fpu) {
                printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n");
                err = -ENOMEM;
@@ -2137,19 +2142,19 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
                goto free_svm;
 
        err = -ENOMEM;
-       page = alloc_page(GFP_KERNEL);
+       page = alloc_page(GFP_KERNEL_ACCOUNT);
        if (!page)
                goto uninit;
 
-       msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
+       msrpm_pages = alloc_pages(GFP_KERNEL_ACCOUNT, MSRPM_ALLOC_ORDER);
        if (!msrpm_pages)
                goto free_page1;
 
-       nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
+       nested_msrpm_pages = alloc_pages(GFP_KERNEL_ACCOUNT, MSRPM_ALLOC_ORDER);
        if (!nested_msrpm_pages)
                goto free_page2;
 
-       hsave_page = alloc_page(GFP_KERNEL);
+       hsave_page = alloc_page(GFP_KERNEL_ACCOUNT);
        if (!hsave_page)
                goto free_page3;
 
@@ -4565,8 +4570,7 @@ static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool flat)
        return &logical_apic_id_table[index];
 }
 
-static int avic_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id, u32 ldr,
-                         bool valid)
+static int avic_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id, u32 ldr)
 {
        bool flat;
        u32 *entry, new_entry;
@@ -4579,31 +4583,39 @@ static int avic_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id, u32 ldr,
        new_entry = READ_ONCE(*entry);
        new_entry &= ~AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK;
        new_entry |= (g_physical_id & AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK);
-       if (valid)
-               new_entry |= AVIC_LOGICAL_ID_ENTRY_VALID_MASK;
-       else
-               new_entry &= ~AVIC_LOGICAL_ID_ENTRY_VALID_MASK;
+       new_entry |= AVIC_LOGICAL_ID_ENTRY_VALID_MASK;
        WRITE_ONCE(*entry, new_entry);
 
        return 0;
 }
 
+static void avic_invalidate_logical_id_entry(struct kvm_vcpu *vcpu)
+{
+       struct vcpu_svm *svm = to_svm(vcpu);
+       bool flat = svm->dfr_reg == APIC_DFR_FLAT;
+       u32 *entry = avic_get_logical_id_entry(vcpu, svm->ldr_reg, flat);
+
+       if (entry)
+               WRITE_ONCE(*entry, (u32) ~AVIC_LOGICAL_ID_ENTRY_VALID_MASK);
+}
+
 static int avic_handle_ldr_update(struct kvm_vcpu *vcpu)
 {
-       int ret;
+       int ret = 0;
        struct vcpu_svm *svm = to_svm(vcpu);
        u32 ldr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LDR);
 
-       if (!ldr)
-               return 1;
+       if (ldr == svm->ldr_reg)
+               return 0;
 
-       ret = avic_ldr_write(vcpu, vcpu->vcpu_id, ldr, true);
-       if (ret && svm->ldr_reg) {
-               avic_ldr_write(vcpu, 0, svm->ldr_reg, false);
-               svm->ldr_reg = 0;
-       } else {
+       avic_invalidate_logical_id_entry(vcpu);
+
+       if (ldr)
+               ret = avic_ldr_write(vcpu, vcpu->vcpu_id, ldr);
+
+       if (!ret)
                svm->ldr_reg = ldr;
-       }
+
        return ret;
 }
 
@@ -4637,27 +4649,16 @@ static int avic_handle_apic_id_update(struct kvm_vcpu *vcpu)
        return 0;
 }
 
-static int avic_handle_dfr_update(struct kvm_vcpu *vcpu)
+static void avic_handle_dfr_update(struct kvm_vcpu *vcpu)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
-       struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
        u32 dfr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR);
-       u32 mod = (dfr >> 28) & 0xf;
 
-       /*
-        * We assume that all local APICs are using the same type.
-        * If this changes, we need to flush the AVIC logical
-        * APID id table.
-        */
-       if (kvm_svm->ldr_mode == mod)
-               return 0;
-
-       clear_page(page_address(kvm_svm->avic_logical_id_table_page));
-       kvm_svm->ldr_mode = mod;
+       if (svm->dfr_reg == dfr)
+               return;
 
-       if (svm->ldr_reg)
-               avic_handle_ldr_update(vcpu);
-       return 0;
+       avic_invalidate_logical_id_entry(vcpu);
+       svm->dfr_reg = dfr;
 }
 
 static int avic_unaccel_trap_write(struct vcpu_svm *svm)
@@ -5125,11 +5126,11 @@ static void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
        struct vcpu_svm *svm = to_svm(vcpu);
        struct vmcb *vmcb = svm->vmcb;
 
-       if (!kvm_vcpu_apicv_active(&svm->vcpu))
-               return;
-
-       vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK;
-       mark_dirty(vmcb, VMCB_INTR);
+       if (kvm_vcpu_apicv_active(vcpu))
+               vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
+       else
+               vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK;
+       mark_dirty(vmcb, VMCB_AVIC);
 }
 
 static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
@@ -5195,7 +5196,7 @@ static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
         * Allocating new amd_iommu_pi_data, which will get
         * add to the per-vcpu ir_list.
         */
-       ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_KERNEL);
+       ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_KERNEL_ACCOUNT);
        if (!ir) {
                ret = -ENOMEM;
                goto out;
@@ -6163,8 +6164,7 @@ static inline void avic_post_state_restore(struct kvm_vcpu *vcpu)
 {
        if (avic_handle_apic_id_update(vcpu) != 0)
                return;
-       if (avic_handle_dfr_update(vcpu) != 0)
-               return;
+       avic_handle_dfr_update(vcpu);
        avic_handle_ldr_update(vcpu);
 }
 
@@ -6311,7 +6311,7 @@ static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error)
        if (ret)
                return ret;
 
-       data = kzalloc(sizeof(*data), GFP_KERNEL);
+       data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
        if (!data)
                return -ENOMEM;
 
@@ -6361,7 +6361,7 @@ static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
        if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
                return -EFAULT;
 
-       start = kzalloc(sizeof(*start), GFP_KERNEL);
+       start = kzalloc(sizeof(*start), GFP_KERNEL_ACCOUNT);
        if (!start)
                return -ENOMEM;
 
@@ -6458,7 +6458,7 @@ static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
        if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
                return -EFAULT;
 
-       data = kzalloc(sizeof(*data), GFP_KERNEL);
+       data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
        if (!data)
                return -ENOMEM;
 
@@ -6535,7 +6535,7 @@ static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
        if (copy_from_user(&params, measure, sizeof(params)))
                return -EFAULT;
 
-       data = kzalloc(sizeof(*data), GFP_KERNEL);
+       data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
        if (!data)
                return -ENOMEM;
 
@@ -6597,7 +6597,7 @@ static int sev_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
        if (!sev_guest(kvm))
                return -ENOTTY;
 
-       data = kzalloc(sizeof(*data), GFP_KERNEL);
+       data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
        if (!data)
                return -ENOMEM;
 
@@ -6618,7 +6618,7 @@ static int sev_guest_status(struct kvm *kvm, struct kvm_sev_cmd *argp)
        if (!sev_guest(kvm))
                return -ENOTTY;
 
-       data = kzalloc(sizeof(*data), GFP_KERNEL);
+       data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
        if (!data)
                return -ENOMEM;
 
@@ -6646,7 +6646,7 @@ static int __sev_issue_dbg_cmd(struct kvm *kvm, unsigned long src,
        struct sev_data_dbg *data;
        int ret;
 
-       data = kzalloc(sizeof(*data), GFP_KERNEL);
+       data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
        if (!data)
                return -ENOMEM;
 
@@ -6901,7 +6901,7 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
        }
 
        ret = -ENOMEM;
-       data = kzalloc(sizeof(*data), GFP_KERNEL);
+       data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
        if (!data)
                goto e_unpin_memory;
 
@@ -7007,7 +7007,7 @@ static int svm_register_enc_region(struct kvm *kvm,
        if (range->addr > ULONG_MAX || range->size > ULONG_MAX)
                return -EINVAL;
 
-       region = kzalloc(sizeof(*region), GFP_KERNEL);
+       region = kzalloc(sizeof(*region), GFP_KERNEL_ACCOUNT);
        if (!region)
                return -ENOMEM;
 
index d737a51a53ca368b3a223e2ed41e397a7abbafbd..f24a2c2250706f24741e4503ed5ba60232b3613e 100644 (file)
@@ -211,7 +211,6 @@ static void free_nested(struct kvm_vcpu *vcpu)
        if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon)
                return;
 
-       hrtimer_cancel(&vmx->nested.preemption_timer);
        vmx->nested.vmxon = false;
        vmx->nested.smm.vmxon = false;
        free_vpid(vmx->nested.vpid02);
@@ -274,6 +273,7 @@ static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
 void nested_vmx_free_vcpu(struct kvm_vcpu *vcpu)
 {
        vcpu_load(vcpu);
+       vmx_leave_nested(vcpu);
        vmx_switch_vmcs(vcpu, &to_vmx(vcpu)->vmcs01);
        free_nested(vcpu);
        vcpu_put(vcpu);
@@ -1979,17 +1979,6 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
        if (vmx->nested.dirty_vmcs12 || vmx->nested.hv_evmcs)
                prepare_vmcs02_early_full(vmx, vmcs12);
 
-       /*
-        * HOST_RSP is normally set correctly in vmx_vcpu_run() just before
-        * entry, but only if the current (host) sp changed from the value
-        * we wrote last (vmx->host_rsp).  This cache is no longer relevant
-        * if we switch vmcs, and rather than hold a separate cache per vmcs,
-        * here we just force the write to happen on entry.  host_rsp will
-        * also be written unconditionally by nested_vmx_check_vmentry_hw()
-        * if we are doing early consistency checks via hardware.
-        */
-       vmx->host_rsp = 0;
-
        /*
         * PIN CONTROLS
         */
@@ -2289,10 +2278,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
        }
        vmx_set_rflags(vcpu, vmcs12->guest_rflags);
 
-       vmx->nested.preemption_timer_expired = false;
-       if (nested_cpu_has_preemption_timer(vmcs12))
-               vmx_start_preemption_timer(vcpu);
-
        /* EXCEPTION_BITMAP and CR0_GUEST_HOST_MASK should basically be the
         * bitwise-or of what L1 wants to trap for L2, and what we want to
         * trap. Note that CR0.TS also needs updating - we do this later.
@@ -2722,6 +2707,7 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        unsigned long cr3, cr4;
+       bool vm_fail;
 
        if (!nested_early_check)
                return 0;
@@ -2755,29 +2741,34 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
                vmx->loaded_vmcs->host_state.cr4 = cr4;
        }
 
-       vmx->__launched = vmx->loaded_vmcs->launched;
-
        asm(
-               /* Set HOST_RSP */
                "sub $%c[wordsize], %%" _ASM_SP "\n\t" /* temporarily adjust RSP for CALL */
-               __ex("vmwrite %%" _ASM_SP ", %%" _ASM_DX) "\n\t"
-               "mov %%" _ASM_SP ", %c[host_rsp](%1)\n\t"
+               "cmp %%" _ASM_SP ", %c[host_state_rsp](%[loaded_vmcs]) \n\t"
+               "je 1f \n\t"
+               __ex("vmwrite %%" _ASM_SP ", %[HOST_RSP]") "\n\t"
+               "mov %%" _ASM_SP ", %c[host_state_rsp](%[loaded_vmcs]) \n\t"
+               "1: \n\t"
                "add $%c[wordsize], %%" _ASM_SP "\n\t" /* un-adjust RSP */
 
                /* Check if vmlaunch or vmresume is needed */
-               "cmpl $0, %c[launched](%% " _ASM_CX")\n\t"
+               "cmpb $0, %c[launched](%[loaded_vmcs])\n\t"
 
+               /*
+                * VMLAUNCH and VMRESUME clear RFLAGS.{CF,ZF} on VM-Exit, set
+                * RFLAGS.CF on VM-Fail Invalid and set RFLAGS.ZF on VM-Fail
+                * Valid.  vmx_vmenter() directly "returns" RFLAGS, and so the
+                * results of VM-Enter is captured via CC_{SET,OUT} to vm_fail.
+                */
                "call vmx_vmenter\n\t"
 
-               /* Set vmx->fail accordingly */
-               "setbe %c[fail](%% " _ASM_CX")\n\t"
-             : ASM_CALL_CONSTRAINT
-             : "c"(vmx), "d"((unsigned long)HOST_RSP),
-               [launched]"i"(offsetof(struct vcpu_vmx, __launched)),
-               [fail]"i"(offsetof(struct vcpu_vmx, fail)),
-               [host_rsp]"i"(offsetof(struct vcpu_vmx, host_rsp)),
+               CC_SET(be)
+             : ASM_CALL_CONSTRAINT, CC_OUT(be) (vm_fail)
+             : [HOST_RSP]"r"((unsigned long)HOST_RSP),
+               [loaded_vmcs]"r"(vmx->loaded_vmcs),
+               [launched]"i"(offsetof(struct loaded_vmcs, launched)),
+               [host_state_rsp]"i"(offsetof(struct loaded_vmcs, host_state.rsp)),
                [wordsize]"i"(sizeof(ulong))
-             : "rax", "cc", "memory"
+             : "cc", "memory"
        );
 
        preempt_enable();
@@ -2787,10 +2778,9 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
        if (vmx->msr_autoload.guest.nr)
                vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
 
-       if (vmx->fail) {
+       if (vm_fail) {
                WARN_ON_ONCE(vmcs_read32(VM_INSTRUCTION_ERROR) !=
                             VMXERR_ENTRY_INVALID_CONTROL_FIELD);
-               vmx->fail = 0;
                return 1;
        }
 
@@ -2813,8 +2803,6 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
 
        return 0;
 }
-STACK_FRAME_NON_STANDARD(nested_vmx_check_vmentry_hw);
-
 
 static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
                                                 struct vmcs12 *vmcs12);
@@ -3030,6 +3018,15 @@ int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
        if (unlikely(evaluate_pending_interrupts))
                kvm_make_request(KVM_REQ_EVENT, vcpu);
 
+       /*
+        * Do not start the preemption timer hrtimer until after we know
+        * we are successful, so that only nested_vmx_vmexit needs to cancel
+        * the timer.
+        */
+       vmx->nested.preemption_timer_expired = false;
+       if (nested_cpu_has_preemption_timer(vmcs12))
+               vmx_start_preemption_timer(vcpu);
+
        /*
         * Note no nested_vmx_succeed or nested_vmx_fail here. At this point
         * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet
@@ -3450,13 +3447,10 @@ static void sync_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
        else
                vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE;
 
-       if (nested_cpu_has_preemption_timer(vmcs12)) {
-               if (vmcs12->vm_exit_controls &
-                   VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)
+       if (nested_cpu_has_preemption_timer(vmcs12) &&
+           vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)
                        vmcs12->vmx_preemption_timer_value =
                                vmx_get_preemption_timer_value(vcpu);
-               hrtimer_cancel(&to_vmx(vcpu)->nested.preemption_timer);
-       }
 
        /*
         * In some cases (usually, nested EPT), L2 is allowed to change its
@@ -3864,6 +3858,9 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
 
        leave_guest_mode(vcpu);
 
+       if (nested_cpu_has_preemption_timer(vmcs12))
+               hrtimer_cancel(&to_vmx(vcpu)->nested.preemption_timer);
+
        if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
                vcpu->arch.tsc_offset -= vmcs12->tsc_offset;
 
@@ -3915,9 +3912,6 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
                vmx_flush_tlb(vcpu, true);
        }
 
-       /* This is needed for same reason as it was needed in prepare_vmcs02 */
-       vmx->host_rsp = 0;
-
        /* Unpin physical memory we referred to in vmcs02 */
        if (vmx->nested.apic_access_page) {
                kvm_release_page_dirty(vmx->nested.apic_access_page);
@@ -4035,25 +4029,50 @@ int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
        /* Addr = segment_base + offset */
        /* offset = base + [index * scale] + displacement */
        off = exit_qualification; /* holds the displacement */
+       if (addr_size == 1)
+               off = (gva_t)sign_extend64(off, 31);
+       else if (addr_size == 0)
+               off = (gva_t)sign_extend64(off, 15);
        if (base_is_valid)
                off += kvm_register_read(vcpu, base_reg);
        if (index_is_valid)
                off += kvm_register_read(vcpu, index_reg)<<scaling;
        vmx_get_segment(vcpu, &s, seg_reg);
-       *ret = s.base + off;
 
+       /*
+        * The effective address, i.e. @off, of a memory operand is truncated
+        * based on the address size of the instruction.  Note that this is
+        * the *effective address*, i.e. the address prior to accounting for
+        * the segment's base.
+        */
        if (addr_size == 1) /* 32 bit */
-               *ret &= 0xffffffff;
+               off &= 0xffffffff;
+       else if (addr_size == 0) /* 16 bit */
+               off &= 0xffff;
 
        /* Checks for #GP/#SS exceptions. */
        exn = false;
        if (is_long_mode(vcpu)) {
+               /*
+                * The virtual/linear address is never truncated in 64-bit
+                * mode, e.g. a 32-bit address size can yield a 64-bit virtual
+                * address when using FS/GS with a non-zero base.
+                */
+               *ret = s.base + off;
+
                /* Long mode: #GP(0)/#SS(0) if the memory address is in a
                 * non-canonical form. This is the only check on the memory
                 * destination for long mode!
                 */
                exn = is_noncanonical_address(*ret, vcpu);
-       } else if (is_protmode(vcpu)) {
+       } else {
+               /*
+                * When not in long mode, the virtual/linear address is
+                * unconditionally truncated to 32 bits regardless of the
+                * address size.
+                */
+               *ret = (s.base + off) & 0xffffffff;
+
                /* Protected mode: apply checks for segment validity in the
                 * following order:
                 * - segment type check (#GP(0) may be thrown)
@@ -4077,10 +4096,16 @@ int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
                /* Protected mode: #GP(0)/#SS(0) if the segment is unusable.
                 */
                exn = (s.unusable != 0);
-               /* Protected mode: #GP(0)/#SS(0) if the memory
-                * operand is outside the segment limit.
+
+               /*
+                * Protected mode: #GP(0)/#SS(0) if the memory operand is
+                * outside the segment limit.  All CPUs that support VMX ignore
+                * limit checks for flat segments, i.e. segments with base==0,
+                * limit==0xffffffff and of type expand-up data or code.
                 */
-               exn = exn || (off + sizeof(u64) > s.limit);
+               if (!(s.base == 0 && s.limit == 0xffffffff &&
+                    ((s.type & 8) || !(s.type & 4))))
+                       exn = exn || (off + sizeof(u64) > s.limit);
        }
        if (exn) {
                kvm_queue_exception_e(vcpu,
@@ -4145,11 +4170,11 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
        if (r < 0)
                goto out_vmcs02;
 
-       vmx->nested.cached_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL);
+       vmx->nested.cached_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL_ACCOUNT);
        if (!vmx->nested.cached_vmcs12)
                goto out_cached_vmcs12;
 
-       vmx->nested.cached_shadow_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL);
+       vmx->nested.cached_shadow_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL_ACCOUNT);
        if (!vmx->nested.cached_shadow_vmcs12)
                goto out_cached_shadow_vmcs12;
 
@@ -5696,6 +5721,10 @@ __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *))
                enable_shadow_vmcs = 0;
        if (enable_shadow_vmcs) {
                for (i = 0; i < VMX_BITMAP_NR; i++) {
+                       /*
+                        * The vmx_bitmap is not tied to a VM and so should
+                        * not be charged to a memcg.
+                        */
                        vmx_bitmap[i] = (unsigned long *)
                                __get_free_page(GFP_KERNEL);
                        if (!vmx_bitmap[i]) {
index 6def3ba88e3b35109dd4688f4aad2ea72deff965..cb6079f8a227f4f63aa38e2da493e49a6e44764b 100644 (file)
@@ -34,6 +34,7 @@ struct vmcs_host_state {
        unsigned long cr4;      /* May not match real cr4 */
        unsigned long gs_base;
        unsigned long fs_base;
+       unsigned long rsp;
 
        u16           fs_sel, gs_sel, ldt_sel;
 #ifdef CONFIG_X86_64
index bcef2c7e9bc48cf658c83dab5a95d45887ac92af..7b272738c5768bac029ca3e4f3e6d7b1003260da 100644 (file)
@@ -1,6 +1,30 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #include <linux/linkage.h>
 #include <asm/asm.h>
+#include <asm/bitsperlong.h>
+#include <asm/kvm_vcpu_regs.h>
+
+#define WORD_SIZE (BITS_PER_LONG / 8)
+
+#define VCPU_RAX       __VCPU_REGS_RAX * WORD_SIZE
+#define VCPU_RCX       __VCPU_REGS_RCX * WORD_SIZE
+#define VCPU_RDX       __VCPU_REGS_RDX * WORD_SIZE
+#define VCPU_RBX       __VCPU_REGS_RBX * WORD_SIZE
+/* Intentionally omit RSP as it's context switched by hardware */
+#define VCPU_RBP       __VCPU_REGS_RBP * WORD_SIZE
+#define VCPU_RSI       __VCPU_REGS_RSI * WORD_SIZE
+#define VCPU_RDI       __VCPU_REGS_RDI * WORD_SIZE
+
+#ifdef CONFIG_X86_64
+#define VCPU_R8                __VCPU_REGS_R8  * WORD_SIZE
+#define VCPU_R9                __VCPU_REGS_R9  * WORD_SIZE
+#define VCPU_R10       __VCPU_REGS_R10 * WORD_SIZE
+#define VCPU_R11       __VCPU_REGS_R11 * WORD_SIZE
+#define VCPU_R12       __VCPU_REGS_R12 * WORD_SIZE
+#define VCPU_R13       __VCPU_REGS_R13 * WORD_SIZE
+#define VCPU_R14       __VCPU_REGS_R14 * WORD_SIZE
+#define VCPU_R15       __VCPU_REGS_R15 * WORD_SIZE
+#endif
 
        .text
 
@@ -55,3 +79,146 @@ ENDPROC(vmx_vmenter)
 ENTRY(vmx_vmexit)
        ret
 ENDPROC(vmx_vmexit)
+
+/**
+ * __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode
+ * @vmx:       struct vcpu_vmx *
+ * @regs:      unsigned long * (to guest registers)
+ * @launched:  %true if the VMCS has been launched
+ *
+ * Returns:
+ *     0 on VM-Exit, 1 on VM-Fail
+ */
+ENTRY(__vmx_vcpu_run)
+       push %_ASM_BP
+       mov  %_ASM_SP, %_ASM_BP
+#ifdef CONFIG_X86_64
+       push %r15
+       push %r14
+       push %r13
+       push %r12
+#else
+       push %edi
+       push %esi
+#endif
+       push %_ASM_BX
+
+       /*
+        * Save @regs, _ASM_ARG2 may be modified by vmx_update_host_rsp() and
+        * @regs is needed after VM-Exit to save the guest's register values.
+        */
+       push %_ASM_ARG2
+
+       /* Copy @launched to BL, _ASM_ARG3 is volatile. */
+       mov %_ASM_ARG3B, %bl
+
+       /* Adjust RSP to account for the CALL to vmx_vmenter(). */
+       lea -WORD_SIZE(%_ASM_SP), %_ASM_ARG2
+       call vmx_update_host_rsp
+
+       /* Load @regs to RAX. */
+       mov (%_ASM_SP), %_ASM_AX
+
+       /* Check if vmlaunch or vmresume is needed */
+       cmpb $0, %bl
+
+       /* Load guest registers.  Don't clobber flags. */
+       mov VCPU_RBX(%_ASM_AX), %_ASM_BX
+       mov VCPU_RCX(%_ASM_AX), %_ASM_CX
+       mov VCPU_RDX(%_ASM_AX), %_ASM_DX
+       mov VCPU_RSI(%_ASM_AX), %_ASM_SI
+       mov VCPU_RDI(%_ASM_AX), %_ASM_DI
+       mov VCPU_RBP(%_ASM_AX), %_ASM_BP
+#ifdef CONFIG_X86_64
+       mov VCPU_R8 (%_ASM_AX),  %r8
+       mov VCPU_R9 (%_ASM_AX),  %r9
+       mov VCPU_R10(%_ASM_AX), %r10
+       mov VCPU_R11(%_ASM_AX), %r11
+       mov VCPU_R12(%_ASM_AX), %r12
+       mov VCPU_R13(%_ASM_AX), %r13
+       mov VCPU_R14(%_ASM_AX), %r14
+       mov VCPU_R15(%_ASM_AX), %r15
+#endif
+       /* Load guest RAX.  This kills the vmx_vcpu pointer! */
+       mov VCPU_RAX(%_ASM_AX), %_ASM_AX
+
+       /* Enter guest mode */
+       call vmx_vmenter
+
+       /* Jump on VM-Fail. */
+       jbe 2f
+
+       /* Temporarily save guest's RAX. */
+       push %_ASM_AX
+
+       /* Reload @regs to RAX. */
+       mov WORD_SIZE(%_ASM_SP), %_ASM_AX
+
+       /* Save all guest registers, including RAX from the stack */
+       __ASM_SIZE(pop) VCPU_RAX(%_ASM_AX)
+       mov %_ASM_BX,   VCPU_RBX(%_ASM_AX)
+       mov %_ASM_CX,   VCPU_RCX(%_ASM_AX)
+       mov %_ASM_DX,   VCPU_RDX(%_ASM_AX)
+       mov %_ASM_SI,   VCPU_RSI(%_ASM_AX)
+       mov %_ASM_DI,   VCPU_RDI(%_ASM_AX)
+       mov %_ASM_BP,   VCPU_RBP(%_ASM_AX)
+#ifdef CONFIG_X86_64
+       mov %r8,  VCPU_R8 (%_ASM_AX)
+       mov %r9,  VCPU_R9 (%_ASM_AX)
+       mov %r10, VCPU_R10(%_ASM_AX)
+       mov %r11, VCPU_R11(%_ASM_AX)
+       mov %r12, VCPU_R12(%_ASM_AX)
+       mov %r13, VCPU_R13(%_ASM_AX)
+       mov %r14, VCPU_R14(%_ASM_AX)
+       mov %r15, VCPU_R15(%_ASM_AX)
+#endif
+
+       /* Clear RAX to indicate VM-Exit (as opposed to VM-Fail). */
+       xor %eax, %eax
+
+       /*
+        * Clear all general purpose registers except RSP and RAX to prevent
+        * speculative use of the guest's values, even those that are reloaded
+        * via the stack.  In theory, an L1 cache miss when restoring registers
+        * could lead to speculative execution with the guest's values.
+        * Zeroing XORs are dirt cheap, i.e. the extra paranoia is essentially
+        * free.  RSP and RAX are exempt as RSP is restored by hardware during
+        * VM-Exit and RAX is explicitly loaded with 0 or 1 to return VM-Fail.
+        */
+1:     xor %ebx, %ebx
+       xor %ecx, %ecx
+       xor %edx, %edx
+       xor %esi, %esi
+       xor %edi, %edi
+       xor %ebp, %ebp
+#ifdef CONFIG_X86_64
+       xor %r8d,  %r8d
+       xor %r9d,  %r9d
+       xor %r10d, %r10d
+       xor %r11d, %r11d
+       xor %r12d, %r12d
+       xor %r13d, %r13d
+       xor %r14d, %r14d
+       xor %r15d, %r15d
+#endif
+
+       /* "POP" @regs. */
+       add $WORD_SIZE, %_ASM_SP
+       pop %_ASM_BX
+
+#ifdef CONFIG_X86_64
+       pop %r12
+       pop %r13
+       pop %r14
+       pop %r15
+#else
+       pop %esi
+       pop %edi
+#endif
+       pop %_ASM_BP
+       ret
+
+       /* VM-Fail.  Out-of-line to avoid a taken Jcc after VM-Exit. */
+2:     mov $1, %eax
+       jmp 1b
+ENDPROC(__vmx_vcpu_run)
index 30a6bcd735ec36e4163b6e9a72ce26ff3f6b356a..c73375e01ab8c4ca52d5d87e2ca9b3648906485a 100644 (file)
@@ -246,6 +246,10 @@ static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
 
        if (l1tf != VMENTER_L1D_FLUSH_NEVER && !vmx_l1d_flush_pages &&
            !boot_cpu_has(X86_FEATURE_FLUSH_L1D)) {
+               /*
+                * This allocation for vmx_l1d_flush_pages is not tied to a VM
+                * lifetime and so should not be charged to a memcg.
+                */
                page = alloc_pages(GFP_KERNEL, L1D_CACHE_ORDER);
                if (!page)
                        return -ENOMEM;
@@ -2387,13 +2391,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
        return 0;
 }
 
-struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu)
+struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags)
 {
        int node = cpu_to_node(cpu);
        struct page *pages;
        struct vmcs *vmcs;
 
-       pages = __alloc_pages_node(node, GFP_KERNEL, vmcs_config.order);
+       pages = __alloc_pages_node(node, flags, vmcs_config.order);
        if (!pages)
                return NULL;
        vmcs = page_address(pages);
@@ -2440,7 +2444,8 @@ int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
        loaded_vmcs_init(loaded_vmcs);
 
        if (cpu_has_vmx_msr_bitmap()) {
-               loaded_vmcs->msr_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
+               loaded_vmcs->msr_bitmap = (unsigned long *)
+                               __get_free_page(GFP_KERNEL_ACCOUNT);
                if (!loaded_vmcs->msr_bitmap)
                        goto out_vmcs;
                memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE);
@@ -2481,7 +2486,7 @@ static __init int alloc_kvm_area(void)
        for_each_possible_cpu(cpu) {
                struct vmcs *vmcs;
 
-               vmcs = alloc_vmcs_cpu(false, cpu);
+               vmcs = alloc_vmcs_cpu(false, cpu, GFP_KERNEL);
                if (!vmcs) {
                        free_kvm_area();
                        return -ENOMEM;
@@ -6360,150 +6365,15 @@ static void vmx_update_hv_timer(struct kvm_vcpu *vcpu)
        vmx->loaded_vmcs->hv_timer_armed = false;
 }
 
-static void __vmx_vcpu_run(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)
+void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp)
 {
-       unsigned long evmcs_rsp;
-
-       vmx->__launched = vmx->loaded_vmcs->launched;
-
-       evmcs_rsp = static_branch_unlikely(&enable_evmcs) ?
-               (unsigned long)&current_evmcs->host_rsp : 0;
-
-       if (static_branch_unlikely(&vmx_l1d_should_flush))
-               vmx_l1d_flush(vcpu);
-
-       asm(
-               /* Store host registers */
-               "push %%" _ASM_DX "; push %%" _ASM_BP ";"
-               "push %%" _ASM_CX " \n\t" /* placeholder for guest rcx */
-               "push %%" _ASM_CX " \n\t"
-               "sub $%c[wordsize], %%" _ASM_SP "\n\t" /* temporarily adjust RSP for CALL */
-               "cmp %%" _ASM_SP ", %c[host_rsp](%%" _ASM_CX ") \n\t"
-               "je 1f \n\t"
-               "mov %%" _ASM_SP ", %c[host_rsp](%%" _ASM_CX ") \n\t"
-               /* Avoid VMWRITE when Enlightened VMCS is in use */
-               "test %%" _ASM_SI ", %%" _ASM_SI " \n\t"
-               "jz 2f \n\t"
-               "mov %%" _ASM_SP ", (%%" _ASM_SI ") \n\t"
-               "jmp 1f \n\t"
-               "2: \n\t"
-               __ex("vmwrite %%" _ASM_SP ", %%" _ASM_DX) "\n\t"
-               "1: \n\t"
-               "add $%c[wordsize], %%" _ASM_SP "\n\t" /* un-adjust RSP */
-
-               /* Reload cr2 if changed */
-               "mov %c[cr2](%%" _ASM_CX "), %%" _ASM_AX " \n\t"
-               "mov %%cr2, %%" _ASM_DX " \n\t"
-               "cmp %%" _ASM_AX ", %%" _ASM_DX " \n\t"
-               "je 3f \n\t"
-               "mov %%" _ASM_AX", %%cr2 \n\t"
-               "3: \n\t"
-               /* Check if vmlaunch or vmresume is needed */
-               "cmpl $0, %c[launched](%%" _ASM_CX ") \n\t"
-               /* Load guest registers.  Don't clobber flags. */
-               "mov %c[rax](%%" _ASM_CX "), %%" _ASM_AX " \n\t"
-               "mov %c[rbx](%%" _ASM_CX "), %%" _ASM_BX " \n\t"
-               "mov %c[rdx](%%" _ASM_CX "), %%" _ASM_DX " \n\t"
-               "mov %c[rsi](%%" _ASM_CX "), %%" _ASM_SI " \n\t"
-               "mov %c[rdi](%%" _ASM_CX "), %%" _ASM_DI " \n\t"
-               "mov %c[rbp](%%" _ASM_CX "), %%" _ASM_BP " \n\t"
-#ifdef CONFIG_X86_64
-               "mov %c[r8](%%" _ASM_CX "),  %%r8  \n\t"
-               "mov %c[r9](%%" _ASM_CX "),  %%r9  \n\t"
-               "mov %c[r10](%%" _ASM_CX "), %%r10 \n\t"
-               "mov %c[r11](%%" _ASM_CX "), %%r11 \n\t"
-               "mov %c[r12](%%" _ASM_CX "), %%r12 \n\t"
-               "mov %c[r13](%%" _ASM_CX "), %%r13 \n\t"
-               "mov %c[r14](%%" _ASM_CX "), %%r14 \n\t"
-               "mov %c[r15](%%" _ASM_CX "), %%r15 \n\t"
-#endif
-               /* Load guest RCX.  This kills the vmx_vcpu pointer! */
-               "mov %c[rcx](%%" _ASM_CX "), %%" _ASM_CX " \n\t"
-
-               /* Enter guest mode */
-               "call vmx_vmenter\n\t"
-
-               /* Save guest's RCX to the stack placeholder (see above) */
-               "mov %%" _ASM_CX ", %c[wordsize](%%" _ASM_SP ") \n\t"
-
-               /* Load host's RCX, i.e. the vmx_vcpu pointer */
-               "pop %%" _ASM_CX " \n\t"
-
-               /* Set vmx->fail based on EFLAGS.{CF,ZF} */
-               "setbe %c[fail](%%" _ASM_CX ")\n\t"
-
-               /* Save all guest registers, including RCX from the stack */
-               "mov %%" _ASM_AX ", %c[rax](%%" _ASM_CX ") \n\t"
-               "mov %%" _ASM_BX ", %c[rbx](%%" _ASM_CX ") \n\t"
-               __ASM_SIZE(pop) " %c[rcx](%%" _ASM_CX ") \n\t"
-               "mov %%" _ASM_DX ", %c[rdx](%%" _ASM_CX ") \n\t"
-               "mov %%" _ASM_SI ", %c[rsi](%%" _ASM_CX ") \n\t"
-               "mov %%" _ASM_DI ", %c[rdi](%%" _ASM_CX ") \n\t"
-               "mov %%" _ASM_BP ", %c[rbp](%%" _ASM_CX ") \n\t"
-#ifdef CONFIG_X86_64
-               "mov %%r8,  %c[r8](%%" _ASM_CX ") \n\t"
-               "mov %%r9,  %c[r9](%%" _ASM_CX ") \n\t"
-               "mov %%r10, %c[r10](%%" _ASM_CX ") \n\t"
-               "mov %%r11, %c[r11](%%" _ASM_CX ") \n\t"
-               "mov %%r12, %c[r12](%%" _ASM_CX ") \n\t"
-               "mov %%r13, %c[r13](%%" _ASM_CX ") \n\t"
-               "mov %%r14, %c[r14](%%" _ASM_CX ") \n\t"
-               "mov %%r15, %c[r15](%%" _ASM_CX ") \n\t"
-               /*
-               * Clear host registers marked as clobbered to prevent
-               * speculative use.
-               */
-               "xor %%r8d,  %%r8d \n\t"
-               "xor %%r9d,  %%r9d \n\t"
-               "xor %%r10d, %%r10d \n\t"
-               "xor %%r11d, %%r11d \n\t"
-               "xor %%r12d, %%r12d \n\t"
-               "xor %%r13d, %%r13d \n\t"
-               "xor %%r14d, %%r14d \n\t"
-               "xor %%r15d, %%r15d \n\t"
-#endif
-               "mov %%cr2, %%" _ASM_AX "   \n\t"
-               "mov %%" _ASM_AX ", %c[cr2](%%" _ASM_CX ") \n\t"
-
-               "xor %%eax, %%eax \n\t"
-               "xor %%ebx, %%ebx \n\t"
-               "xor %%esi, %%esi \n\t"
-               "xor %%edi, %%edi \n\t"
-               "pop  %%" _ASM_BP "; pop  %%" _ASM_DX " \n\t"
-             : ASM_CALL_CONSTRAINT
-             : "c"(vmx), "d"((unsigned long)HOST_RSP), "S"(evmcs_rsp),
-               [launched]"i"(offsetof(struct vcpu_vmx, __launched)),
-               [fail]"i"(offsetof(struct vcpu_vmx, fail)),
-               [host_rsp]"i"(offsetof(struct vcpu_vmx, host_rsp)),
-               [rax]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RAX])),
-               [rbx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RBX])),
-               [rcx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RCX])),
-               [rdx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RDX])),
-               [rsi]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RSI])),
-               [rdi]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RDI])),
-               [rbp]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RBP])),
-#ifdef CONFIG_X86_64
-               [r8]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R8])),
-               [r9]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R9])),
-               [r10]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R10])),
-               [r11]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R11])),
-               [r12]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R12])),
-               [r13]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R13])),
-               [r14]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R14])),
-               [r15]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R15])),
-#endif
-               [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)),
-               [wordsize]"i"(sizeof(ulong))
-             : "cc", "memory"
-#ifdef CONFIG_X86_64
-               , "rax", "rbx", "rdi"
-               , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
-#else
-               , "eax", "ebx", "edi"
-#endif
-             );
+       if (unlikely(host_rsp != vmx->loaded_vmcs->host_state.rsp)) {
+               vmx->loaded_vmcs->host_state.rsp = host_rsp;
+               vmcs_writel(HOST_RSP, host_rsp);
+       }
 }
-STACK_FRAME_NON_STANDARD(__vmx_vcpu_run);
+
+bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched);
 
 static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
 {
@@ -6572,7 +6442,16 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
         */
        x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0);
 
-       __vmx_vcpu_run(vcpu, vmx);
+       if (static_branch_unlikely(&vmx_l1d_should_flush))
+               vmx_l1d_flush(vcpu);
+
+       if (vcpu->arch.cr2 != read_cr2())
+               write_cr2(vcpu->arch.cr2);
+
+       vmx->fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs,
+                                  vmx->loaded_vmcs->launched);
+
+       vcpu->arch.cr2 = read_cr2();
 
        /*
         * We do not use IBRS in the kernel. If this vCPU has used the
@@ -6657,7 +6536,9 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
 
 static struct kvm *vmx_vm_alloc(void)
 {
-       struct kvm_vmx *kvm_vmx = vzalloc(sizeof(struct kvm_vmx));
+       struct kvm_vmx *kvm_vmx = __vmalloc(sizeof(struct kvm_vmx),
+                                           GFP_KERNEL_ACCOUNT | __GFP_ZERO,
+                                           PAGE_KERNEL);
        return &kvm_vmx->kvm;
 }
 
@@ -6673,7 +6554,6 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
        if (enable_pml)
                vmx_destroy_pml_buffer(vmx);
        free_vpid(vmx->vpid);
-       leave_guest_mode(vcpu);
        nested_vmx_free_vcpu(vcpu);
        free_loaded_vmcs(vmx->loaded_vmcs);
        kfree(vmx->guest_msrs);
@@ -6685,14 +6565,16 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
 static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 {
        int err;
-       struct vcpu_vmx *vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
+       struct vcpu_vmx *vmx;
        unsigned long *msr_bitmap;
        int cpu;
 
+       vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL_ACCOUNT);
        if (!vmx)
                return ERR_PTR(-ENOMEM);
 
-       vmx->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache, GFP_KERNEL);
+       vmx->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache,
+                       GFP_KERNEL_ACCOUNT);
        if (!vmx->vcpu.arch.guest_fpu) {
                printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n");
                err = -ENOMEM;
@@ -6714,12 +6596,12 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
         * for the guest, etc.
         */
        if (enable_pml) {
-               vmx->pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO);
+               vmx->pml_pg = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
                if (!vmx->pml_pg)
                        goto uninit_vcpu;
        }
 
-       vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
+       vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL_ACCOUNT);
        BUILD_BUG_ON(ARRAY_SIZE(vmx_msr_index) * sizeof(vmx->guest_msrs[0])
                     > PAGE_SIZE);
 
index 0ac0a64c7790fb772b31d61179480548e6d06082..1554cb45b3931a6de9d91b0092de1ff85d4c993f 100644 (file)
@@ -175,7 +175,6 @@ struct nested_vmx {
 
 struct vcpu_vmx {
        struct kvm_vcpu       vcpu;
-       unsigned long         host_rsp;
        u8                    fail;
        u8                    msr_bitmap_mode;
        u32                   exit_intr_info;
@@ -209,7 +208,7 @@ struct vcpu_vmx {
        struct loaded_vmcs    vmcs01;
        struct loaded_vmcs   *loaded_vmcs;
        struct loaded_vmcs   *loaded_cpu_state;
-       bool                  __launched; /* temporary, used in vmx_vcpu_run */
+
        struct msr_autoload {
                struct vmx_msrs guest;
                struct vmx_msrs host;
@@ -339,8 +338,8 @@ static inline int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
 
 static inline void pi_set_sn(struct pi_desc *pi_desc)
 {
-       return set_bit(POSTED_INTR_SN,
-                       (unsigned long *)&pi_desc->control);
+       set_bit(POSTED_INTR_SN,
+               (unsigned long *)&pi_desc->control);
 }
 
 static inline void pi_set_on(struct pi_desc *pi_desc)
@@ -445,7 +444,8 @@ static inline u32 vmx_vmentry_ctrl(void)
 {
        u32 vmentry_ctrl = vmcs_config.vmentry_ctrl;
        if (pt_mode == PT_MODE_SYSTEM)
-               vmentry_ctrl &= ~(VM_EXIT_PT_CONCEAL_PIP | VM_EXIT_CLEAR_IA32_RTIT_CTL);
+               vmentry_ctrl &= ~(VM_ENTRY_PT_CONCEAL_PIP |
+                                 VM_ENTRY_LOAD_IA32_RTIT_CTL);
        /* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */
        return vmentry_ctrl &
                ~(VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VM_ENTRY_LOAD_IA32_EFER);
@@ -455,9 +455,10 @@ static inline u32 vmx_vmexit_ctrl(void)
 {
        u32 vmexit_ctrl = vmcs_config.vmexit_ctrl;
        if (pt_mode == PT_MODE_SYSTEM)
-               vmexit_ctrl &= ~(VM_ENTRY_PT_CONCEAL_PIP | VM_ENTRY_LOAD_IA32_RTIT_CTL);
+               vmexit_ctrl &= ~(VM_EXIT_PT_CONCEAL_PIP |
+                                VM_EXIT_CLEAR_IA32_RTIT_CTL);
        /* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */
-       return vmcs_config.vmexit_ctrl &
+       return vmexit_ctrl &
                ~(VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | VM_EXIT_LOAD_IA32_EFER);
 }
 
@@ -478,7 +479,7 @@ static inline struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
        return &(to_vmx(vcpu)->pi_desc);
 }
 
-struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu);
+struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags);
 void free_vmcs(struct vmcs *vmcs);
 int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs);
 void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs);
@@ -487,7 +488,8 @@ void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs);
 
 static inline struct vmcs *alloc_vmcs(bool shadow)
 {
-       return alloc_vmcs_cpu(shadow, raw_smp_processor_id());
+       return alloc_vmcs_cpu(shadow, raw_smp_processor_id(),
+                             GFP_KERNEL_ACCOUNT);
 }
 
 u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa);
index 941f932373d03547f606ea6a20fd6161c48c6398..65e4559eef2fc8589e0a4277077e766ceead3994 100644 (file)
@@ -3879,7 +3879,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                r = -EINVAL;
                if (!lapic_in_kernel(vcpu))
                        goto out;
-               u.lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
+               u.lapic = kzalloc(sizeof(struct kvm_lapic_state),
+                               GFP_KERNEL_ACCOUNT);
 
                r = -ENOMEM;
                if (!u.lapic)
@@ -4066,7 +4067,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                break;
        }
        case KVM_GET_XSAVE: {
-               u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL);
+               u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL_ACCOUNT);
                r = -ENOMEM;
                if (!u.xsave)
                        break;
@@ -4090,7 +4091,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                break;
        }
        case KVM_GET_XCRS: {
-               u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL);
+               u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL_ACCOUNT);
                r = -ENOMEM;
                if (!u.xcrs)
                        break;
@@ -7055,6 +7056,13 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid)
 
 void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu)
 {
+       if (!lapic_in_kernel(vcpu)) {
+               WARN_ON_ONCE(vcpu->arch.apicv_active);
+               return;
+       }
+       if (!vcpu->arch.apicv_active)
+               return;
+
        vcpu->arch.apicv_active = false;
        kvm_x86_ops->refresh_apicv_exec_ctrl(vcpu);
 }
@@ -9005,7 +9013,6 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
        struct page *page;
        int r;
 
-       vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu);
        vcpu->arch.emulate_ctxt.ops = &emulate_ops;
        if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu))
                vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
@@ -9026,6 +9033,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
                goto fail_free_pio_data;
 
        if (irqchip_in_kernel(vcpu->kvm)) {
+               vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu);
                r = kvm_create_lapic(vcpu);
                if (r < 0)
                        goto fail_mmu_destroy;
@@ -9033,14 +9041,15 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
                static_key_slow_inc(&kvm_no_apic_vcpu);
 
        vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
-                                      GFP_KERNEL);
+                                      GFP_KERNEL_ACCOUNT);
        if (!vcpu->arch.mce_banks) {
                r = -ENOMEM;
                goto fail_free_lapic;
        }
        vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
 
-       if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL)) {
+       if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask,
+                               GFP_KERNEL_ACCOUNT)) {
                r = -ENOMEM;
                goto fail_free_mce_banks;
        }
@@ -9104,7 +9113,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
        INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
        INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
-       INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
        INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
        atomic_set(&kvm->arch.noncoherent_dma_count, 0);
 
@@ -9299,13 +9307,13 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
 
                slot->arch.rmap[i] =
                        kvcalloc(lpages, sizeof(*slot->arch.rmap[i]),
-                                GFP_KERNEL);
+                                GFP_KERNEL_ACCOUNT);
                if (!slot->arch.rmap[i])
                        goto out_free;
                if (i == 0)
                        continue;
 
-               linfo = kvcalloc(lpages, sizeof(*linfo), GFP_KERNEL);
+               linfo = kvcalloc(lpages, sizeof(*linfo), GFP_KERNEL_ACCOUNT);
                if (!linfo)
                        goto out_free;
 
@@ -9348,13 +9356,13 @@ out_free:
        return -ENOMEM;
 }
 
-void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots)
+void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen)
 {
        /*
         * memslots->generation has been incremented.
         * mmio generation may have reached its maximum value.
         */
-       kvm_mmu_invalidate_mmio_sptes(kvm, slots);
+       kvm_mmu_invalidate_mmio_sptes(kvm, gen);
 }
 
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
@@ -9462,7 +9470,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 
 void kvm_arch_flush_shadow_all(struct kvm *kvm)
 {
-       kvm_mmu_invalidate_zap_all_pages(kvm);
+       kvm_mmu_zap_all(kvm);
 }
 
 void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
index 224cd0a475684055a5a5d74fb5a0cea982135918..28406aa1136d7eb772ed712f9df34ffe14290e66 100644 (file)
@@ -181,6 +181,11 @@ static inline bool emul_is_noncanonical_address(u64 la,
 static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu,
                                        gva_t gva, gfn_t gfn, unsigned access)
 {
+       u64 gen = kvm_memslots(vcpu->kvm)->generation;
+
+       if (unlikely(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS))
+               return;
+
        /*
         * If this is a shadow nested page table, the "GVA" is
         * actually a nGPA.
@@ -188,7 +193,7 @@ static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu,
        vcpu->arch.mmio_gva = mmu_is_nested(vcpu) ? 0 : gva & PAGE_MASK;
        vcpu->arch.access = access;
        vcpu->arch.mmio_gfn = gfn;
-       vcpu->arch.mmio_gen = kvm_memslots(vcpu->kvm)->generation;
+       vcpu->arch.mmio_gen = gen;
 }
 
 static inline bool vcpu_match_mmio_gen(struct kvm_vcpu *vcpu)
index 22467f475ab412c345379db97313414a19eaeb16..1c9d4f0f96eafa2fdf7b54ea8f0860e054223147 100644 (file)
@@ -180,7 +180,7 @@ static unsigned get_max_segment_size(struct request_queue *q,
  */
 static bool bvec_split_segs(struct request_queue *q, struct bio_vec *bv,
                unsigned *nsegs, unsigned *last_seg_size,
-               unsigned *front_seg_size, unsigned *sectors)
+               unsigned *front_seg_size, unsigned *sectors, unsigned max_segs)
 {
        unsigned len = bv->bv_len;
        unsigned total_len = 0;
@@ -190,7 +190,7 @@ static bool bvec_split_segs(struct request_queue *q, struct bio_vec *bv,
         * Multi-page bvec may be too big to hold in one segment, so the
         * current bvec has to be splitted as multiple segments.
         */
-       while (len && new_nsegs + *nsegs < queue_max_segments(q)) {
+       while (len && new_nsegs + *nsegs < max_segs) {
                seg_size = get_max_segment_size(q, bv->bv_offset + total_len);
                seg_size = min(seg_size, len);
 
@@ -240,6 +240,7 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
        bool do_split = true;
        struct bio *new = NULL;
        const unsigned max_sectors = get_max_io_size(q, bio);
+       const unsigned max_segs = queue_max_segments(q);
 
        bio_for_each_bvec(bv, bio, iter) {
                /*
@@ -254,14 +255,14 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
                         * Consider this a new segment if we're splitting in
                         * the middle of this vector.
                         */
-                       if (nsegs < queue_max_segments(q) &&
+                       if (nsegs < max_segs &&
                            sectors < max_sectors) {
                                /* split in the middle of bvec */
                                bv.bv_len = (max_sectors - sectors) << 9;
                                bvec_split_segs(q, &bv, &nsegs,
                                                &seg_size,
                                                &front_seg_size,
-                                               &sectors);
+                                               &sectors, max_segs);
                        }
                        goto split;
                }
@@ -283,7 +284,7 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
                        continue;
                }
 new_segment:
-               if (nsegs == queue_max_segments(q))
+               if (nsegs == max_segs)
                        goto split;
 
                bvprv = bv;
@@ -296,7 +297,7 @@ new_segment:
                        if (nsegs == 1 && seg_size > front_seg_size)
                                front_seg_size = seg_size;
                } else if (bvec_split_segs(q, &bv, &nsegs, &seg_size,
-                                   &front_seg_size, &sectors)) {
+                                   &front_seg_size, &sectors, max_segs)) {
                        goto split;
                }
        }
@@ -415,7 +416,7 @@ new_segment:
                        bvprv = bv;
                        prev = 1;
                        bvec_split_segs(q, &bv, &nr_phys_segs, &seg_size,
-                                       &front_seg_size, NULL);
+                                       &front_seg_size, NULL, UINT_MAX);
                }
                bbio = bio;
        }
index df8979008dd4ec6496c1e5600ec856a300dec08f..5a389a4f4f652edda26c109baf5e595bf6325903 100644 (file)
@@ -2956,11 +2956,15 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
        ndr_desc->res = &res;
        ndr_desc->provider_data = nfit_spa;
        ndr_desc->attr_groups = acpi_nfit_region_attribute_groups;
-       if (spa->flags & ACPI_NFIT_PROXIMITY_VALID)
+       if (spa->flags & ACPI_NFIT_PROXIMITY_VALID) {
                ndr_desc->numa_node = acpi_map_pxm_to_online_node(
                                                spa->proximity_domain);
-       else
+               ndr_desc->target_node = acpi_map_pxm_to_node(
+                               spa->proximity_domain);
+       } else {
                ndr_desc->numa_node = NUMA_NO_NODE;
+               ndr_desc->target_node = NUMA_NO_NODE;
+       }
 
        /*
         * Persistence domain bits are hierarchical, if
index 7bbbf8256a41aa404c8b738756fc53f9d5c053d9..867f6e3f2b4f42fea98920d5726f24ce398547fc 100644 (file)
@@ -84,6 +84,7 @@ int acpi_map_pxm_to_node(int pxm)
 
        return node;
 }
+EXPORT_SYMBOL(acpi_map_pxm_to_node);
 
 /**
  * acpi_map_pxm_to_online_node - Map proximity ID to online node
index 41b706403ef72ec94848b7dae98867ecd30ec971..b4dae624b9afe64069d28580498a0d8714176740 100644 (file)
 
 #define to_amba_driver(d)      container_of(d, struct amba_driver, drv)
 
-static const struct amba_id *
-amba_lookup(const struct amba_id *table, struct amba_device *dev)
+/* called on periphid match and class 0x9 coresight device. */
+static int
+amba_cs_uci_id_match(const struct amba_id *table, struct amba_device *dev)
 {
        int ret = 0;
+       struct amba_cs_uci_id *uci;
+
+       uci = table->data;
 
+       /* no table data or zero mask - return match on periphid */
+       if (!uci || (uci->devarch_mask == 0))
+               return 1;
+
+       /* test against read devtype and masked devarch value */
+       ret = (dev->uci.devtype == uci->devtype) &&
+               ((dev->uci.devarch & uci->devarch_mask) == uci->devarch);
+       return ret;
+}
+
+static const struct amba_id *
+amba_lookup(const struct amba_id *table, struct amba_device *dev)
+{
        while (table->mask) {
-               ret = (dev->periphid & table->mask) == table->id;
-               if (ret)
-                       break;
+               if (((dev->periphid & table->mask) == table->id) &&
+                       ((dev->cid != CORESIGHT_CID) ||
+                        (amba_cs_uci_id_match(table, dev))))
+                       return table;
                table++;
        }
-
-       return ret ? table : NULL;
+       return NULL;
 }
 
 static int amba_match(struct device *dev, struct device_driver *drv)
@@ -399,10 +416,22 @@ static int amba_device_try_add(struct amba_device *dev, struct resource *parent)
                        cid |= (readl(tmp + size - 0x10 + 4 * i) & 255) <<
                                (i * 8);
 
+               if (cid == CORESIGHT_CID) {
+                       /* set the base to the start of the last 4k block */
+                       void __iomem *csbase = tmp + size - 4096;
+
+                       dev->uci.devarch =
+                               readl(csbase + UCI_REG_DEVARCH_OFFSET);
+                       dev->uci.devtype =
+                               readl(csbase + UCI_REG_DEVTYPE_OFFSET) & 0xff;
+               }
+
                amba_put_disable_pclk(dev);
 
-               if (cid == AMBA_CID || cid == CORESIGHT_CID)
+               if (cid == AMBA_CID || cid == CORESIGHT_CID) {
                        dev->periphid = pid;
+                       dev->cid = cid;
+               }
 
                if (!dev->periphid)
                        ret = -ENODEV;
index 048cbf7d5233b4fdcec30db341ce287df1b1e89c..cb8347500ce2871e5003d8ce45a4014a97b8de3e 100644 (file)
@@ -88,6 +88,7 @@ unsigned long __weak memory_block_size_bytes(void)
 {
        return MIN_MEMORY_BLOCK_SIZE;
 }
+EXPORT_SYMBOL_GPL(memory_block_size_bytes);
 
 static unsigned long get_memory_block_size(void)
 {
index a4bc74e72c394965f31dcbe7b55c8e5cd0fc6cd5..24896ffb04ed8748fbb68e5e148dbface3ca4648 100644 (file)
@@ -926,7 +926,7 @@ static int read_per_ring_refs(struct xen_blkif_ring *ring, const char *dir)
        int err, i, j;
        struct xen_blkif *blkif = ring->blkif;
        struct xenbus_device *dev = blkif->be->dev;
-       unsigned int ring_page_order, nr_grefs, evtchn;
+       unsigned int nr_grefs, evtchn;
 
        err = xenbus_scanf(XBT_NIL, dir, "event-channel", "%u",
                          &evtchn);
@@ -936,43 +936,42 @@ static int read_per_ring_refs(struct xen_blkif_ring *ring, const char *dir)
                return err;
        }
 
-       err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-page-order", "%u",
-                         &ring_page_order);
-       if (err != 1) {
-               err = xenbus_scanf(XBT_NIL, dir, "ring-ref", "%u", &ring_ref[0]);
+       nr_grefs = blkif->nr_ring_pages;
+
+       if (unlikely(!nr_grefs)) {
+               WARN_ON(true);
+               return -EINVAL;
+       }
+
+       for (i = 0; i < nr_grefs; i++) {
+               char ring_ref_name[RINGREF_NAME_LEN];
+
+               snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
+               err = xenbus_scanf(XBT_NIL, dir, ring_ref_name,
+                                  "%u", &ring_ref[i]);
+
                if (err != 1) {
+                       if (nr_grefs == 1)
+                               break;
+
                        err = -EINVAL;
-                       xenbus_dev_fatal(dev, err, "reading %s/ring-ref", dir);
+                       xenbus_dev_fatal(dev, err, "reading %s/%s",
+                                        dir, ring_ref_name);
                        return err;
                }
-               nr_grefs = 1;
-       } else {
-               unsigned int i;
+       }
 
-               if (ring_page_order > xen_blkif_max_ring_order) {
+       if (err != 1) {
+               WARN_ON(nr_grefs != 1);
+
+               err = xenbus_scanf(XBT_NIL, dir, "ring-ref", "%u",
+                                  &ring_ref[0]);
+               if (err != 1) {
                        err = -EINVAL;
-                       xenbus_dev_fatal(dev, err, "%s/request %d ring page order exceed max:%d",
-                                        dir, ring_page_order,
-                                        xen_blkif_max_ring_order);
+                       xenbus_dev_fatal(dev, err, "reading %s/ring-ref", dir);
                        return err;
                }
-
-               nr_grefs = 1 << ring_page_order;
-               for (i = 0; i < nr_grefs; i++) {
-                       char ring_ref_name[RINGREF_NAME_LEN];
-
-                       snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
-                       err = xenbus_scanf(XBT_NIL, dir, ring_ref_name,
-                                          "%u", &ring_ref[i]);
-                       if (err != 1) {
-                               err = -EINVAL;
-                               xenbus_dev_fatal(dev, err, "reading %s/%s",
-                                                dir, ring_ref_name);
-                               return err;
-                       }
-               }
        }
-       blkif->nr_ring_pages = nr_grefs;
 
        for (i = 0; i < nr_grefs * XEN_BLKIF_REQS_PER_PAGE; i++) {
                req = kzalloc(sizeof(*req), GFP_KERNEL);
@@ -1023,6 +1022,7 @@ fail:
 static int connect_ring(struct backend_info *be)
 {
        struct xenbus_device *dev = be->dev;
+       struct xen_blkif *blkif = be->blkif;
        unsigned int pers_grants;
        char protocol[64] = "";
        int err, i;
@@ -1030,28 +1030,29 @@ static int connect_ring(struct backend_info *be)
        size_t xspathsize;
        const size_t xenstore_path_ext_size = 11; /* sufficient for "/queue-NNN" */
        unsigned int requested_num_queues = 0;
+       unsigned int ring_page_order;
 
        pr_debug("%s %s\n", __func__, dev->otherend);
 
-       be->blkif->blk_protocol = BLKIF_PROTOCOL_DEFAULT;
+       blkif->blk_protocol = BLKIF_PROTOCOL_DEFAULT;
        err = xenbus_scanf(XBT_NIL, dev->otherend, "protocol",
                           "%63s", protocol);
        if (err <= 0)
                strcpy(protocol, "unspecified, assuming default");
        else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE))
-               be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
+               blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
        else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32))
-               be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32;
+               blkif->blk_protocol = BLKIF_PROTOCOL_X86_32;
        else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64))
-               be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64;
+               blkif->blk_protocol = BLKIF_PROTOCOL_X86_64;
        else {
                xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
                return -ENOSYS;
        }
        pers_grants = xenbus_read_unsigned(dev->otherend, "feature-persistent",
                                           0);
-       be->blkif->vbd.feature_gnt_persistent = pers_grants;
-       be->blkif->vbd.overflow_max_grants = 0;
+       blkif->vbd.feature_gnt_persistent = pers_grants;
+       blkif->vbd.overflow_max_grants = 0;
 
        /*
         * Read the number of hardware queues from frontend.
@@ -1067,16 +1068,30 @@ static int connect_ring(struct backend_info *be)
                                requested_num_queues, xenblk_max_queues);
                return -ENOSYS;
        }
-       be->blkif->nr_rings = requested_num_queues;
-       if (xen_blkif_alloc_rings(be->blkif))
+       blkif->nr_rings = requested_num_queues;
+       if (xen_blkif_alloc_rings(blkif))
                return -ENOMEM;
 
        pr_info("%s: using %d queues, protocol %d (%s) %s\n", dev->nodename,
-                be->blkif->nr_rings, be->blkif->blk_protocol, protocol,
+                blkif->nr_rings, blkif->blk_protocol, protocol,
                 pers_grants ? "persistent grants" : "");
 
-       if (be->blkif->nr_rings == 1)
-               return read_per_ring_refs(&be->blkif->rings[0], dev->otherend);
+       ring_page_order = xenbus_read_unsigned(dev->otherend,
+                                              "ring-page-order", 0);
+
+       if (ring_page_order > xen_blkif_max_ring_order) {
+               err = -EINVAL;
+               xenbus_dev_fatal(dev, err,
+                                "requested ring page order %d exceed max:%d",
+                                ring_page_order,
+                                xen_blkif_max_ring_order);
+               return err;
+       }
+
+       blkif->nr_ring_pages = 1 << ring_page_order;
+
+       if (blkif->nr_rings == 1)
+               return read_per_ring_refs(&blkif->rings[0], dev->otherend);
        else {
                xspathsize = strlen(dev->otherend) + xenstore_path_ext_size;
                xspath = kmalloc(xspathsize, GFP_KERNEL);
@@ -1085,10 +1100,10 @@ static int connect_ring(struct backend_info *be)
                        return -ENOMEM;
                }
 
-               for (i = 0; i < be->blkif->nr_rings; i++) {
+               for (i = 0; i < blkif->nr_rings; i++) {
                        memset(xspath, 0, xspathsize);
                        snprintf(xspath, xspathsize, "%s/queue-%u", dev->otherend, i);
-                       err = read_per_ring_refs(&be->blkif->rings[i], xspath);
+                       err = read_per_ring_refs(&blkif->rings[i], xspath);
                        if (err) {
                                kfree(xspath);
                                return err;
index a8b20b65bd4b77ab09473aaa81057975aa0914a7..aa4ec53281cea585214c3a5f8b4faf941e7e7bd3 100644 (file)
@@ -1261,6 +1261,13 @@ static enum arch_timer_ppi_nr __init arch_timer_select_ppi(void)
        return ARCH_TIMER_PHYS_SECURE_PPI;
 }
 
+static void __init arch_timer_populate_kvm_info(void)
+{
+       arch_timer_kvm_info.virtual_irq = arch_timer_ppi[ARCH_TIMER_VIRT_PPI];
+       if (is_kernel_in_hyp_mode())
+               arch_timer_kvm_info.physical_irq = arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI];
+}
+
 static int __init arch_timer_of_init(struct device_node *np)
 {
        int i, ret;
@@ -1275,7 +1282,7 @@ static int __init arch_timer_of_init(struct device_node *np)
        for (i = ARCH_TIMER_PHYS_SECURE_PPI; i < ARCH_TIMER_MAX_TIMER_PPI; i++)
                arch_timer_ppi[i] = irq_of_parse_and_map(np, i);
 
-       arch_timer_kvm_info.virtual_irq = arch_timer_ppi[ARCH_TIMER_VIRT_PPI];
+       arch_timer_populate_kvm_info();
 
        rate = arch_timer_get_cntfrq();
        arch_timer_of_configure_rate(rate, np);
@@ -1605,7 +1612,7 @@ static int __init arch_timer_acpi_init(struct acpi_table_header *table)
        arch_timer_ppi[ARCH_TIMER_HYP_PPI] =
                acpi_gtdt_map_ppi(ARCH_TIMER_HYP_PPI);
 
-       arch_timer_kvm_info.virtual_irq = arch_timer_ppi[ARCH_TIMER_VIRT_PPI];
+       arch_timer_populate_kvm_info();
 
        /*
         * When probing via ACPI, we have no mechanism to override the sysreg
index e0700bf4893a31e4e4f148ae26e95b509c8b6cbd..5ef624fe3934c26930e0c2a41166bc1715dd691e 100644 (file)
@@ -23,12 +23,38 @@ config DEV_DAX
 config DEV_DAX_PMEM
        tristate "PMEM DAX: direct access to persistent memory"
        depends on LIBNVDIMM && NVDIMM_DAX && DEV_DAX
+       depends on m # until we can kill DEV_DAX_PMEM_COMPAT
        default DEV_DAX
        help
          Support raw access to persistent memory.  Note that this
          driver consumes memory ranges allocated and exported by the
          libnvdimm sub-system.
 
-         Say Y if unsure
+         Say M if unsure
+
+config DEV_DAX_KMEM
+       tristate "KMEM DAX: volatile-use of persistent memory"
+       default DEV_DAX
+       depends on DEV_DAX
+       depends on MEMORY_HOTPLUG # for add_memory() and friends
+       help
+         Support access to persistent memory as if it were RAM.  This
+         allows easier use of persistent memory by unmodified
+         applications.
+
+         To use this feature, a DAX device must be unbound from the
+         device_dax driver (PMEM DAX) and bound to this kmem driver
+         on each boot.
+
+         Say N if unsure.
+
+config DEV_DAX_PMEM_COMPAT
+       tristate "PMEM DAX: support the deprecated /sys/class/dax interface"
+       depends on DEV_DAX_PMEM
+       default DEV_DAX_PMEM
+       help
+         Older versions of the libdaxctl library expect to find all
+         device-dax instances under /sys/class/dax. If libdaxctl in
+         your distribution is older than v58 say M, otherwise say N.
 
 endif
index 574286fac87ce71b88e61bf5935e3905e226638b..81f7d54dadfb34ed470ee90627556172c9fc3f7a 100644 (file)
@@ -1,8 +1,10 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_DAX) += dax.o
 obj-$(CONFIG_DEV_DAX) += device_dax.o
-obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o
+obj-$(CONFIG_DEV_DAX_KMEM) += kmem.o
 
 dax-y := super.o
-dax_pmem-y := pmem.o
+dax-y += bus.o
 device_dax-y := device.o
+
+obj-y += pmem/
diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c
new file mode 100644 (file)
index 0000000..2109cfe
--- /dev/null
@@ -0,0 +1,503 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2017-2018 Intel Corporation. All rights reserved. */
+#include <linux/memremap.h>
+#include <linux/device.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/dax.h>
+#include "dax-private.h"
+#include "bus.h"
+
+static struct class *dax_class;
+
+static DEFINE_MUTEX(dax_bus_lock);
+
+#define DAX_NAME_LEN 30
+struct dax_id {
+       struct list_head list;
+       char dev_name[DAX_NAME_LEN];
+};
+
+static int dax_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+       /*
+        * We only ever expect to handle device-dax instances, i.e. the
+        * @type argument to MODULE_ALIAS_DAX_DEVICE() is always zero
+        */
+       return add_uevent_var(env, "MODALIAS=" DAX_DEVICE_MODALIAS_FMT, 0);
+}
+
+static struct dax_device_driver *to_dax_drv(struct device_driver *drv)
+{
+       return container_of(drv, struct dax_device_driver, drv);
+}
+
+static struct dax_id *__dax_match_id(struct dax_device_driver *dax_drv,
+               const char *dev_name)
+{
+       struct dax_id *dax_id;
+
+       lockdep_assert_held(&dax_bus_lock);
+
+       list_for_each_entry(dax_id, &dax_drv->ids, list)
+               if (sysfs_streq(dax_id->dev_name, dev_name))
+                       return dax_id;
+       return NULL;
+}
+
+static int dax_match_id(struct dax_device_driver *dax_drv, struct device *dev)
+{
+       int match;
+
+       mutex_lock(&dax_bus_lock);
+       match = !!__dax_match_id(dax_drv, dev_name(dev));
+       mutex_unlock(&dax_bus_lock);
+
+       return match;
+}
+
+enum id_action {
+       ID_REMOVE,
+       ID_ADD,
+};
+
+static ssize_t do_id_store(struct device_driver *drv, const char *buf,
+               size_t count, enum id_action action)
+{
+       struct dax_device_driver *dax_drv = to_dax_drv(drv);
+       unsigned int region_id, id;
+       char devname[DAX_NAME_LEN];
+       struct dax_id *dax_id;
+       ssize_t rc = count;
+       int fields;
+
+       fields = sscanf(buf, "dax%d.%d", &region_id, &id);
+       if (fields != 2)
+               return -EINVAL;
+       sprintf(devname, "dax%d.%d", region_id, id);
+       if (!sysfs_streq(buf, devname))
+               return -EINVAL;
+
+       mutex_lock(&dax_bus_lock);
+       dax_id = __dax_match_id(dax_drv, buf);
+       if (!dax_id) {
+               if (action == ID_ADD) {
+                       dax_id = kzalloc(sizeof(*dax_id), GFP_KERNEL);
+                       if (dax_id) {
+                               strncpy(dax_id->dev_name, buf, DAX_NAME_LEN);
+                               list_add(&dax_id->list, &dax_drv->ids);
+                       } else
+                               rc = -ENOMEM;
+               } else
+                       /* nothing to remove */;
+       } else if (action == ID_REMOVE) {
+               list_del(&dax_id->list);
+               kfree(dax_id);
+       } else
+               /* dax_id already added */;
+       mutex_unlock(&dax_bus_lock);
+
+       if (rc < 0)
+               return rc;
+       if (action == ID_ADD)
+               rc = driver_attach(drv);
+       if (rc)
+               return rc;
+       return count;
+}
+
+static ssize_t new_id_store(struct device_driver *drv, const char *buf,
+               size_t count)
+{
+       return do_id_store(drv, buf, count, ID_ADD);
+}
+static DRIVER_ATTR_WO(new_id);
+
+static ssize_t remove_id_store(struct device_driver *drv, const char *buf,
+               size_t count)
+{
+       return do_id_store(drv, buf, count, ID_REMOVE);
+}
+static DRIVER_ATTR_WO(remove_id);
+
+static struct attribute *dax_drv_attrs[] = {
+       &driver_attr_new_id.attr,
+       &driver_attr_remove_id.attr,
+       NULL,
+};
+ATTRIBUTE_GROUPS(dax_drv);
+
+static int dax_bus_match(struct device *dev, struct device_driver *drv);
+
+static struct bus_type dax_bus_type = {
+       .name = "dax",
+       .uevent = dax_bus_uevent,
+       .match = dax_bus_match,
+       .drv_groups = dax_drv_groups,
+};
+
+static int dax_bus_match(struct device *dev, struct device_driver *drv)
+{
+       struct dax_device_driver *dax_drv = to_dax_drv(drv);
+
+       /*
+        * All but the 'device-dax' driver, which has 'match_always'
+        * set, requires an exact id match.
+        */
+       if (dax_drv->match_always)
+               return 1;
+
+       return dax_match_id(dax_drv, dev);
+}
+
+/*
+ * Rely on the fact that drvdata is set before the attributes are
+ * registered, and that the attributes are unregistered before drvdata
+ * is cleared to assume that drvdata is always valid.
+ */
+static ssize_t id_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct dax_region *dax_region = dev_get_drvdata(dev);
+
+       return sprintf(buf, "%d\n", dax_region->id);
+}
+static DEVICE_ATTR_RO(id);
+
+static ssize_t region_size_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct dax_region *dax_region = dev_get_drvdata(dev);
+
+       return sprintf(buf, "%llu\n", (unsigned long long)
+                       resource_size(&dax_region->res));
+}
+static struct device_attribute dev_attr_region_size = __ATTR(size, 0444,
+               region_size_show, NULL);
+
+static ssize_t align_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct dax_region *dax_region = dev_get_drvdata(dev);
+
+       return sprintf(buf, "%u\n", dax_region->align);
+}
+static DEVICE_ATTR_RO(align);
+
+static struct attribute *dax_region_attributes[] = {
+       &dev_attr_region_size.attr,
+       &dev_attr_align.attr,
+       &dev_attr_id.attr,
+       NULL,
+};
+
+static const struct attribute_group dax_region_attribute_group = {
+       .name = "dax_region",
+       .attrs = dax_region_attributes,
+};
+
+static const struct attribute_group *dax_region_attribute_groups[] = {
+       &dax_region_attribute_group,
+       NULL,
+};
+
+static void dax_region_free(struct kref *kref)
+{
+       struct dax_region *dax_region;
+
+       dax_region = container_of(kref, struct dax_region, kref);
+       kfree(dax_region);
+}
+
+void dax_region_put(struct dax_region *dax_region)
+{
+       kref_put(&dax_region->kref, dax_region_free);
+}
+EXPORT_SYMBOL_GPL(dax_region_put);
+
+static void dax_region_unregister(void *region)
+{
+       struct dax_region *dax_region = region;
+
+       sysfs_remove_groups(&dax_region->dev->kobj,
+                       dax_region_attribute_groups);
+       dax_region_put(dax_region);
+}
+
+struct dax_region *alloc_dax_region(struct device *parent, int region_id,
+               struct resource *res, int target_node, unsigned int align,
+               unsigned long pfn_flags)
+{
+       struct dax_region *dax_region;
+
+       /*
+        * The DAX core assumes that it can store its private data in
+        * parent->driver_data. This WARN is a reminder / safeguard for
+        * developers of device-dax drivers.
+        */
+       if (dev_get_drvdata(parent)) {
+               dev_WARN(parent, "dax core failed to setup private data\n");
+               return NULL;
+       }
+
+       if (!IS_ALIGNED(res->start, align)
+                       || !IS_ALIGNED(resource_size(res), align))
+               return NULL;
+
+       dax_region = kzalloc(sizeof(*dax_region), GFP_KERNEL);
+       if (!dax_region)
+               return NULL;
+
+       dev_set_drvdata(parent, dax_region);
+       memcpy(&dax_region->res, res, sizeof(*res));
+       dax_region->pfn_flags = pfn_flags;
+       kref_init(&dax_region->kref);
+       dax_region->id = region_id;
+       dax_region->align = align;
+       dax_region->dev = parent;
+       dax_region->target_node = target_node;
+       if (sysfs_create_groups(&parent->kobj, dax_region_attribute_groups)) {
+               kfree(dax_region);
+               return NULL;
+       }
+
+       kref_get(&dax_region->kref);
+       if (devm_add_action_or_reset(parent, dax_region_unregister, dax_region))
+               return NULL;
+       return dax_region;
+}
+EXPORT_SYMBOL_GPL(alloc_dax_region);
+
+static ssize_t size_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct dev_dax *dev_dax = to_dev_dax(dev);
+       unsigned long long size = resource_size(&dev_dax->region->res);
+
+       return sprintf(buf, "%llu\n", size);
+}
+static DEVICE_ATTR_RO(size);
+
+static int dev_dax_target_node(struct dev_dax *dev_dax)
+{
+       struct dax_region *dax_region = dev_dax->region;
+
+       return dax_region->target_node;
+}
+
+static ssize_t target_node_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct dev_dax *dev_dax = to_dev_dax(dev);
+
+       return sprintf(buf, "%d\n", dev_dax_target_node(dev_dax));
+}
+static DEVICE_ATTR_RO(target_node);
+
+static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
+               char *buf)
+{
+       /*
+        * We only ever expect to handle device-dax instances, i.e. the
+        * @type argument to MODULE_ALIAS_DAX_DEVICE() is always zero
+        */
+       return sprintf(buf, DAX_DEVICE_MODALIAS_FMT "\n", 0);
+}
+static DEVICE_ATTR_RO(modalias);
+
+static umode_t dev_dax_visible(struct kobject *kobj, struct attribute *a, int n)
+{
+       struct device *dev = container_of(kobj, struct device, kobj);
+       struct dev_dax *dev_dax = to_dev_dax(dev);
+
+       if (a == &dev_attr_target_node.attr && dev_dax_target_node(dev_dax) < 0)
+               return 0;
+       return a->mode;
+}
+
+static struct attribute *dev_dax_attributes[] = {
+       &dev_attr_modalias.attr,
+       &dev_attr_size.attr,
+       &dev_attr_target_node.attr,
+       NULL,
+};
+
+static const struct attribute_group dev_dax_attribute_group = {
+       .attrs = dev_dax_attributes,
+       .is_visible = dev_dax_visible,
+};
+
+static const struct attribute_group *dax_attribute_groups[] = {
+       &dev_dax_attribute_group,
+       NULL,
+};
+
+void kill_dev_dax(struct dev_dax *dev_dax)
+{
+       struct dax_device *dax_dev = dev_dax->dax_dev;
+       struct inode *inode = dax_inode(dax_dev);
+
+       kill_dax(dax_dev);
+       unmap_mapping_range(inode->i_mapping, 0, 0, 1);
+}
+EXPORT_SYMBOL_GPL(kill_dev_dax);
+
+static void dev_dax_release(struct device *dev)
+{
+       struct dev_dax *dev_dax = to_dev_dax(dev);
+       struct dax_region *dax_region = dev_dax->region;
+       struct dax_device *dax_dev = dev_dax->dax_dev;
+
+       dax_region_put(dax_region);
+       put_dax(dax_dev);
+       kfree(dev_dax);
+}
+
+static void unregister_dev_dax(void *dev)
+{
+       struct dev_dax *dev_dax = to_dev_dax(dev);
+
+       dev_dbg(dev, "%s\n", __func__);
+
+       kill_dev_dax(dev_dax);
+       device_del(dev);
+       put_device(dev);
+}
+
+struct dev_dax *__devm_create_dev_dax(struct dax_region *dax_region, int id,
+               struct dev_pagemap *pgmap, enum dev_dax_subsys subsys)
+{
+       struct device *parent = dax_region->dev;
+       struct dax_device *dax_dev;
+       struct dev_dax *dev_dax;
+       struct inode *inode;
+       struct device *dev;
+       int rc = -ENOMEM;
+
+       if (id < 0)
+               return ERR_PTR(-EINVAL);
+
+       dev_dax = kzalloc(sizeof(*dev_dax), GFP_KERNEL);
+       if (!dev_dax)
+               return ERR_PTR(-ENOMEM);
+
+       memcpy(&dev_dax->pgmap, pgmap, sizeof(*pgmap));
+
+       /*
+        * No 'host' or dax_operations since there is no access to this
+        * device outside of mmap of the resulting character device.
+        */
+       dax_dev = alloc_dax(dev_dax, NULL, NULL);
+       if (!dax_dev)
+               goto err;
+
+       /* a device_dax instance is dead while the driver is not attached */
+       kill_dax(dax_dev);
+
+       /* from here on we're committed to teardown via dax_dev_release() */
+       dev = &dev_dax->dev;
+       device_initialize(dev);
+
+       dev_dax->dax_dev = dax_dev;
+       dev_dax->region = dax_region;
+       dev_dax->target_node = dax_region->target_node;
+       kref_get(&dax_region->kref);
+
+       inode = dax_inode(dax_dev);
+       dev->devt = inode->i_rdev;
+       if (subsys == DEV_DAX_BUS)
+               dev->bus = &dax_bus_type;
+       else
+               dev->class = dax_class;
+       dev->parent = parent;
+       dev->groups = dax_attribute_groups;
+       dev->release = dev_dax_release;
+       dev_set_name(dev, "dax%d.%d", dax_region->id, id);
+
+       rc = device_add(dev);
+       if (rc) {
+               kill_dev_dax(dev_dax);
+               put_device(dev);
+               return ERR_PTR(rc);
+       }
+
+       rc = devm_add_action_or_reset(dax_region->dev, unregister_dev_dax, dev);
+       if (rc)
+               return ERR_PTR(rc);
+
+       return dev_dax;
+
+ err:
+       kfree(dev_dax);
+
+       return ERR_PTR(rc);
+}
+EXPORT_SYMBOL_GPL(__devm_create_dev_dax);
+
+static int match_always_count;
+
+int __dax_driver_register(struct dax_device_driver *dax_drv,
+               struct module *module, const char *mod_name)
+{
+       struct device_driver *drv = &dax_drv->drv;
+       int rc = 0;
+
+       INIT_LIST_HEAD(&dax_drv->ids);
+       drv->owner = module;
+       drv->name = mod_name;
+       drv->mod_name = mod_name;
+       drv->bus = &dax_bus_type;
+
+       /* there can only be one default driver */
+       mutex_lock(&dax_bus_lock);
+       match_always_count += dax_drv->match_always;
+       if (match_always_count > 1) {
+               match_always_count--;
+               WARN_ON(1);
+               rc = -EINVAL;
+       }
+       mutex_unlock(&dax_bus_lock);
+       if (rc)
+               return rc;
+       return driver_register(drv);
+}
+EXPORT_SYMBOL_GPL(__dax_driver_register);
+
+void dax_driver_unregister(struct dax_device_driver *dax_drv)
+{
+       struct device_driver *drv = &dax_drv->drv;
+       struct dax_id *dax_id, *_id;
+
+       mutex_lock(&dax_bus_lock);
+       match_always_count -= dax_drv->match_always;
+       list_for_each_entry_safe(dax_id, _id, &dax_drv->ids, list) {
+               list_del(&dax_id->list);
+               kfree(dax_id);
+       }
+       mutex_unlock(&dax_bus_lock);
+       driver_unregister(drv);
+}
+EXPORT_SYMBOL_GPL(dax_driver_unregister);
+
+int __init dax_bus_init(void)
+{
+       int rc;
+
+       if (IS_ENABLED(CONFIG_DEV_DAX_PMEM_COMPAT)) {
+               dax_class = class_create(THIS_MODULE, "dax");
+               if (IS_ERR(dax_class))
+                       return PTR_ERR(dax_class);
+       }
+
+       rc = bus_register(&dax_bus_type);
+       if (rc)
+               class_destroy(dax_class);
+       return rc;
+}
+
+void __exit dax_bus_exit(void)
+{
+       bus_unregister(&dax_bus_type);
+       class_destroy(dax_class);
+}
diff --git a/drivers/dax/bus.h b/drivers/dax/bus.h
new file mode 100644 (file)
index 0000000..8619e32
--- /dev/null
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2016 - 2018 Intel Corporation. All rights reserved. */
+#ifndef __DAX_BUS_H__
+#define __DAX_BUS_H__
+#include <linux/device.h>
+
+struct dev_dax;
+struct resource;
+struct dax_device;
+struct dax_region;
+void dax_region_put(struct dax_region *dax_region);
+struct dax_region *alloc_dax_region(struct device *parent, int region_id,
+               struct resource *res, int target_node, unsigned int align,
+               unsigned long flags);
+
+enum dev_dax_subsys {
+       DEV_DAX_BUS,
+       DEV_DAX_CLASS,
+};
+
+struct dev_dax *__devm_create_dev_dax(struct dax_region *dax_region, int id,
+               struct dev_pagemap *pgmap, enum dev_dax_subsys subsys);
+
+static inline struct dev_dax *devm_create_dev_dax(struct dax_region *dax_region,
+               int id, struct dev_pagemap *pgmap)
+{
+       return __devm_create_dev_dax(dax_region, id, pgmap, DEV_DAX_BUS);
+}
+
+/* to be deleted when DEV_DAX_CLASS is removed */
+struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys);
+
+struct dax_device_driver {
+       struct device_driver drv;
+       struct list_head ids;
+       int match_always;
+};
+
+int __dax_driver_register(struct dax_device_driver *dax_drv,
+               struct module *module, const char *mod_name);
+#define dax_driver_register(driver) \
+       __dax_driver_register(driver, THIS_MODULE, KBUILD_MODNAME)
+void dax_driver_unregister(struct dax_device_driver *dax_drv);
+void kill_dev_dax(struct dev_dax *dev_dax);
+
+#if IS_ENABLED(CONFIG_DEV_DAX_PMEM_COMPAT)
+int dev_dax_probe(struct device *dev);
+#endif
+
+/*
+ * While run_dax() is potentially a generic operation that could be
+ * defined in include/linux/dax.h we don't want to grow any users
+ * outside of drivers/dax/
+ */
+void run_dax(struct dax_device *dax_dev);
+
+#define MODULE_ALIAS_DAX_DEVICE(type) \
+       MODULE_ALIAS("dax:t" __stringify(type) "*")
+#define DAX_DEVICE_MODALIAS_FMT "dax:t%d"
+
+#endif /* __DAX_BUS_H__ */
index b6fc4f04636de1cd52d4195331b1e4168847bb1e..a45612148ca021e96ff3c8cf60eab9b4071fb858 100644 (file)
 #include <linux/device.h>
 #include <linux/cdev.h>
 
+/* private routines between core files */
+struct dax_device;
+struct dax_device *inode_dax(struct inode *inode);
+struct inode *dax_inode(struct dax_device *dax_dev);
+int dax_bus_init(void);
+void dax_bus_exit(void);
+
 /**
  * struct dax_region - mapping infrastructure for dax devices
  * @id: kernel-wide unique region for a memory range
- * @base: linear address corresponding to @res
+ * @target_node: effective numa node if this memory range is onlined
  * @kref: to pin while other agents have a need to do lookups
  * @dev: parent device backing this region
  * @align: allocation and mapping alignment for child dax devices
@@ -28,8 +35,7 @@
  */
 struct dax_region {
        int id;
-       struct ida ida;
-       void *base;
+       int target_node;
        struct kref kref;
        struct device *dev;
        unsigned int align;
@@ -38,20 +44,28 @@ struct dax_region {
 };
 
 /**
- * struct dev_dax - instance data for a subdivision of a dax region
+ * struct dev_dax - instance data for a subdivision of a dax region, and
+ * data while the device is activated in the driver.
  * @region - parent region
  * @dax_dev - core dax functionality
+ * @target_node: effective numa node if dev_dax memory range is onlined
  * @dev - device core
- * @id - child id in the region
- * @num_resources - number of physical address extents in this device
- * @res - array of physical address ranges
+ * @pgmap - pgmap for memmap setup / lifetime (driver owned)
+ * @ref: pgmap reference count (driver owned)
+ * @cmp: @ref final put completion (driver owned)
  */
 struct dev_dax {
        struct dax_region *region;
        struct dax_device *dax_dev;
+       int target_node;
        struct device dev;
-       int id;
-       int num_resources;
-       struct resource res[0];
+       struct dev_pagemap pgmap;
+       struct percpu_ref ref;
+       struct completion cmp;
 };
+
+static inline struct dev_dax *to_dev_dax(struct device *dev)
+{
+       return container_of(dev, struct dev_dax, dev);
+}
 #endif
diff --git a/drivers/dax/dax.h b/drivers/dax/dax.h
deleted file mode 100644 (file)
index f9e5fee..0000000
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * Copyright(c) 2016 - 2017 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- */
-#ifndef __DAX_H__
-#define __DAX_H__
-struct dax_device;
-struct dax_device *inode_dax(struct inode *inode);
-struct inode *dax_inode(struct dax_device *dax_dev);
-#endif /* __DAX_H__ */
diff --git a/drivers/dax/device-dax.h b/drivers/dax/device-dax.h
deleted file mode 100644 (file)
index 688b051..0000000
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Copyright(c) 2016 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- */
-#ifndef __DEVICE_DAX_H__
-#define __DEVICE_DAX_H__
-struct device;
-struct dev_dax;
-struct resource;
-struct dax_region;
-void dax_region_put(struct dax_region *dax_region);
-struct dax_region *alloc_dax_region(struct device *parent,
-               int region_id, struct resource *res, unsigned int align,
-               void *addr, unsigned long flags);
-struct dev_dax *devm_create_dev_dax(struct dax_region *dax_region,
-               int id, struct resource *res, int count);
-#endif /* __DEVICE_DAX_H__ */
index 948806e57cee33f74024adb442f398579319b89d..e428468ab6618246b9a09e4258901274d73161b5 100644 (file)
@@ -1,15 +1,6 @@
-/*
- * Copyright(c) 2016 - 2017 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- */
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2016-2018 Intel Corporation. All rights reserved. */
+#include <linux/memremap.h>
 #include <linux/pagemap.h>
 #include <linux/module.h>
 #include <linux/device.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
 #include "dax-private.h"
-#include "dax.h"
+#include "bus.h"
 
-static struct class *dax_class;
-
-/*
- * Rely on the fact that drvdata is set before the attributes are
- * registered, and that the attributes are unregistered before drvdata
- * is cleared to assume that drvdata is always valid.
- */
-static ssize_t id_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
-{
-       struct dax_region *dax_region = dev_get_drvdata(dev);
-
-       return sprintf(buf, "%d\n", dax_region->id);
-}
-static DEVICE_ATTR_RO(id);
-
-static ssize_t region_size_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
-{
-       struct dax_region *dax_region = dev_get_drvdata(dev);
-
-       return sprintf(buf, "%llu\n", (unsigned long long)
-                       resource_size(&dax_region->res));
-}
-static struct device_attribute dev_attr_region_size = __ATTR(size, 0444,
-               region_size_show, NULL);
-
-static ssize_t align_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
-{
-       struct dax_region *dax_region = dev_get_drvdata(dev);
-
-       return sprintf(buf, "%u\n", dax_region->align);
-}
-static DEVICE_ATTR_RO(align);
-
-static struct attribute *dax_region_attributes[] = {
-       &dev_attr_region_size.attr,
-       &dev_attr_align.attr,
-       &dev_attr_id.attr,
-       NULL,
-};
-
-static const struct attribute_group dax_region_attribute_group = {
-       .name = "dax_region",
-       .attrs = dax_region_attributes,
-};
-
-static const struct attribute_group *dax_region_attribute_groups[] = {
-       &dax_region_attribute_group,
-       NULL,
-};
-
-static void dax_region_free(struct kref *kref)
-{
-       struct dax_region *dax_region;
-
-       dax_region = container_of(kref, struct dax_region, kref);
-       kfree(dax_region);
-}
-
-void dax_region_put(struct dax_region *dax_region)
+static struct dev_dax *ref_to_dev_dax(struct percpu_ref *ref)
 {
-       kref_put(&dax_region->kref, dax_region_free);
+       return container_of(ref, struct dev_dax, ref);
 }
-EXPORT_SYMBOL_GPL(dax_region_put);
 
-static void dax_region_unregister(void *region)
+static void dev_dax_percpu_release(struct percpu_ref *ref)
 {
-       struct dax_region *dax_region = region;
+       struct dev_dax *dev_dax = ref_to_dev_dax(ref);
 
-       sysfs_remove_groups(&dax_region->dev->kobj,
-                       dax_region_attribute_groups);
-       dax_region_put(dax_region);
+       dev_dbg(&dev_dax->dev, "%s\n", __func__);
+       complete(&dev_dax->cmp);
 }
 
-struct dax_region *alloc_dax_region(struct device *parent, int region_id,
-               struct resource *res, unsigned int align, void *addr,
-               unsigned long pfn_flags)
+static void dev_dax_percpu_exit(void *data)
 {
-       struct dax_region *dax_region;
-
-       /*
-        * The DAX core assumes that it can store its private data in
-        * parent->driver_data. This WARN is a reminder / safeguard for
-        * developers of device-dax drivers.
-        */
-       if (dev_get_drvdata(parent)) {
-               dev_WARN(parent, "dax core failed to setup private data\n");
-               return NULL;
-       }
-
-       if (!IS_ALIGNED(res->start, align)
-                       || !IS_ALIGNED(resource_size(res), align))
-               return NULL;
-
-       dax_region = kzalloc(sizeof(*dax_region), GFP_KERNEL);
-       if (!dax_region)
-               return NULL;
-
-       dev_set_drvdata(parent, dax_region);
-       memcpy(&dax_region->res, res, sizeof(*res));
-       dax_region->pfn_flags = pfn_flags;
-       kref_init(&dax_region->kref);
-       dax_region->id = region_id;
-       ida_init(&dax_region->ida);
-       dax_region->align = align;
-       dax_region->dev = parent;
-       dax_region->base = addr;
-       if (sysfs_create_groups(&parent->kobj, dax_region_attribute_groups)) {
-               kfree(dax_region);
-               return NULL;
-       }
+       struct percpu_ref *ref = data;
+       struct dev_dax *dev_dax = ref_to_dev_dax(ref);
 
-       kref_get(&dax_region->kref);
-       if (devm_add_action_or_reset(parent, dax_region_unregister, dax_region))
-               return NULL;
-       return dax_region;
+       dev_dbg(&dev_dax->dev, "%s\n", __func__);
+       wait_for_completion(&dev_dax->cmp);
+       percpu_ref_exit(ref);
 }
-EXPORT_SYMBOL_GPL(alloc_dax_region);
 
-static struct dev_dax *to_dev_dax(struct device *dev)
+static void dev_dax_percpu_kill(struct percpu_ref *data)
 {
-       return container_of(dev, struct dev_dax, dev);
-}
-
-static ssize_t size_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
-{
-       struct dev_dax *dev_dax = to_dev_dax(dev);
-       unsigned long long size = 0;
-       int i;
+       struct percpu_ref *ref = data;
+       struct dev_dax *dev_dax = ref_to_dev_dax(ref);
 
-       for (i = 0; i < dev_dax->num_resources; i++)
-               size += resource_size(&dev_dax->res[i]);
-
-       return sprintf(buf, "%llu\n", size);
+       dev_dbg(&dev_dax->dev, "%s\n", __func__);
+       percpu_ref_kill(ref);
 }
-static DEVICE_ATTR_RO(size);
-
-static struct attribute *dev_dax_attributes[] = {
-       &dev_attr_size.attr,
-       NULL,
-};
-
-static const struct attribute_group dev_dax_attribute_group = {
-       .attrs = dev_dax_attributes,
-};
-
-static const struct attribute_group *dax_attribute_groups[] = {
-       &dev_dax_attribute_group,
-       NULL,
-};
 
 static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma,
                const char *func)
@@ -226,21 +95,11 @@ static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma,
 __weak phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff,
                unsigned long size)
 {
-       struct resource *res;
-       /* gcc-4.6.3-nolibc for i386 complains that this is uninitialized */
-       phys_addr_t uninitialized_var(phys);
-       int i;
-
-       for (i = 0; i < dev_dax->num_resources; i++) {
-               res = &dev_dax->res[i];
-               phys = pgoff * PAGE_SIZE + res->start;
-               if (phys >= res->start && phys <= res->end)
-                       break;
-               pgoff -= PHYS_PFN(resource_size(res));
-       }
+       struct resource *res = &dev_dax->region->res;
+       phys_addr_t phys;
 
-       if (i < dev_dax->num_resources) {
-               res = &dev_dax->res[i];
+       phys = pgoff * PAGE_SIZE + res->start;
+       if (phys >= res->start && phys <= res->end) {
                if (phys + size - 1 <= res->end)
                        return phys;
        }
@@ -576,152 +435,100 @@ static const struct file_operations dax_fops = {
        .mmap_supported_flags = MAP_SYNC,
 };
 
-static void dev_dax_release(struct device *dev)
+static void dev_dax_cdev_del(void *cdev)
 {
-       struct dev_dax *dev_dax = to_dev_dax(dev);
-       struct dax_region *dax_region = dev_dax->region;
-       struct dax_device *dax_dev = dev_dax->dax_dev;
-
-       if (dev_dax->id >= 0)
-               ida_simple_remove(&dax_region->ida, dev_dax->id);
-       dax_region_put(dax_region);
-       put_dax(dax_dev);
-       kfree(dev_dax);
+       cdev_del(cdev);
 }
 
-static void kill_dev_dax(struct dev_dax *dev_dax)
+static void dev_dax_kill(void *dev_dax)
 {
-       struct dax_device *dax_dev = dev_dax->dax_dev;
-       struct inode *inode = dax_inode(dax_dev);
-
-       kill_dax(dax_dev);
-       unmap_mapping_range(inode->i_mapping, 0, 0, 1);
+       kill_dev_dax(dev_dax);
 }
 
-static void unregister_dev_dax(void *dev)
+int dev_dax_probe(struct device *dev)
 {
        struct dev_dax *dev_dax = to_dev_dax(dev);
        struct dax_device *dax_dev = dev_dax->dax_dev;
-       struct inode *inode = dax_inode(dax_dev);
-       struct cdev *cdev = inode->i_cdev;
-
-       dev_dbg(dev, "trace\n");
-
-       kill_dev_dax(dev_dax);
-       cdev_device_del(cdev, dev);
-       put_device(dev);
-}
-
-struct dev_dax *devm_create_dev_dax(struct dax_region *dax_region,
-               int id, struct resource *res, int count)
-{
-       struct device *parent = dax_region->dev;
-       struct dax_device *dax_dev;
-       struct dev_dax *dev_dax;
+       struct resource *res = &dev_dax->region->res;
        struct inode *inode;
-       struct device *dev;
        struct cdev *cdev;
-       int rc, i;
-
-       if (!count)
-               return ERR_PTR(-EINVAL);
-
-       dev_dax = kzalloc(struct_size(dev_dax, res, count), GFP_KERNEL);
-       if (!dev_dax)
-               return ERR_PTR(-ENOMEM);
-
-       for (i = 0; i < count; i++) {
-               if (!IS_ALIGNED(res[i].start, dax_region->align)
-                               || !IS_ALIGNED(resource_size(&res[i]),
-                                       dax_region->align)) {
-                       rc = -EINVAL;
-                       break;
-               }
-               dev_dax->res[i].start = res[i].start;
-               dev_dax->res[i].end = res[i].end;
+       void *addr;
+       int rc;
+
+       /* 1:1 map region resource range to device-dax instance range */
+       if (!devm_request_mem_region(dev, res->start, resource_size(res),
+                               dev_name(dev))) {
+               dev_warn(dev, "could not reserve region %pR\n", res);
+               return -EBUSY;
        }
 
-       if (i < count)
-               goto err_id;
+       init_completion(&dev_dax->cmp);
+       rc = percpu_ref_init(&dev_dax->ref, dev_dax_percpu_release, 0,
+                       GFP_KERNEL);
+       if (rc)
+               return rc;
 
-       if (id < 0) {
-               id = ida_simple_get(&dax_region->ida, 0, 0, GFP_KERNEL);
-               dev_dax->id = id;
-               if (id < 0) {
-                       rc = id;
-                       goto err_id;
-               }
-       } else {
-               /* region provider owns @id lifetime */
-               dev_dax->id = -1;
-       }
+       rc = devm_add_action_or_reset(dev, dev_dax_percpu_exit, &dev_dax->ref);
+       if (rc)
+               return rc;
 
-       /*
-        * No 'host' or dax_operations since there is no access to this
-        * device outside of mmap of the resulting character device.
-        */
-       dax_dev = alloc_dax(dev_dax, NULL, NULL);
-       if (!dax_dev) {
-               rc = -ENOMEM;
-               goto err_dax;
+       dev_dax->pgmap.ref = &dev_dax->ref;
+       dev_dax->pgmap.kill = dev_dax_percpu_kill;
+       addr = devm_memremap_pages(dev, &dev_dax->pgmap);
+       if (IS_ERR(addr)) {
+               devm_remove_action(dev, dev_dax_percpu_exit, &dev_dax->ref);
+               percpu_ref_exit(&dev_dax->ref);
+               return PTR_ERR(addr);
        }
 
-       /* from here on we're committed to teardown via dax_dev_release() */
-       dev = &dev_dax->dev;
-       device_initialize(dev);
-
        inode = dax_inode(dax_dev);
        cdev = inode->i_cdev;
        cdev_init(cdev, &dax_fops);
-       cdev->owner = parent->driver->owner;
-
-       dev_dax->num_resources = count;
-       dev_dax->dax_dev = dax_dev;
-       dev_dax->region = dax_region;
-       kref_get(&dax_region->kref);
-
-       dev->devt = inode->i_rdev;
-       dev->class = dax_class;
-       dev->parent = parent;
-       dev->groups = dax_attribute_groups;
-       dev->release = dev_dax_release;
-       dev_set_name(dev, "dax%d.%d", dax_region->id, id);
-
-       rc = cdev_device_add(cdev, dev);
-       if (rc) {
-               kill_dev_dax(dev_dax);
-               put_device(dev);
-               return ERR_PTR(rc);
-       }
-
-       rc = devm_add_action_or_reset(dax_region->dev, unregister_dev_dax, dev);
+       if (dev->class) {
+               /* for the CONFIG_DEV_DAX_PMEM_COMPAT case */
+               cdev->owner = dev->parent->driver->owner;
+       } else
+               cdev->owner = dev->driver->owner;
+       cdev_set_parent(cdev, &dev->kobj);
+       rc = cdev_add(cdev, dev->devt, 1);
        if (rc)
-               return ERR_PTR(rc);
+               return rc;
 
-       return dev_dax;
+       rc = devm_add_action_or_reset(dev, dev_dax_cdev_del, cdev);
+       if (rc)
+               return rc;
 
- err_dax:
-       if (dev_dax->id >= 0)
-               ida_simple_remove(&dax_region->ida, dev_dax->id);
- err_id:
-       kfree(dev_dax);
+       run_dax(dax_dev);
+       return devm_add_action_or_reset(dev, dev_dax_kill, dev_dax);
+}
+EXPORT_SYMBOL_GPL(dev_dax_probe);
 
-       return ERR_PTR(rc);
+static int dev_dax_remove(struct device *dev)
+{
+       /* all probe actions are unwound by devm */
+       return 0;
 }
-EXPORT_SYMBOL_GPL(devm_create_dev_dax);
+
+static struct dax_device_driver device_dax_driver = {
+       .drv = {
+               .probe = dev_dax_probe,
+               .remove = dev_dax_remove,
+       },
+       .match_always = 1,
+};
 
 static int __init dax_init(void)
 {
-       dax_class = class_create(THIS_MODULE, "dax");
-       return PTR_ERR_OR_ZERO(dax_class);
+       return dax_driver_register(&device_dax_driver);
 }
 
 static void __exit dax_exit(void)
 {
-       class_destroy(dax_class);
+       dax_driver_unregister(&device_dax_driver);
 }
 
 MODULE_AUTHOR("Intel Corporation");
 MODULE_LICENSE("GPL v2");
-subsys_initcall(dax_init);
+module_init(dax_init);
 module_exit(dax_exit);
+MODULE_ALIAS_DAX_DEVICE(0);
diff --git a/drivers/dax/kmem.c b/drivers/dax/kmem.c
new file mode 100644 (file)
index 0000000..a02318c
--- /dev/null
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2016-2019 Intel Corporation. All rights reserved. */
+#include <linux/memremap.h>
+#include <linux/pagemap.h>
+#include <linux/memory.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/pfn_t.h>
+#include <linux/slab.h>
+#include <linux/dax.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include "dax-private.h"
+#include "bus.h"
+
+int dev_dax_kmem_probe(struct device *dev)
+{
+       struct dev_dax *dev_dax = to_dev_dax(dev);
+       struct resource *res = &dev_dax->region->res;
+       resource_size_t kmem_start;
+       resource_size_t kmem_size;
+       resource_size_t kmem_end;
+       struct resource *new_res;
+       int numa_node;
+       int rc;
+
+       /*
+        * Ensure good NUMA information for the persistent memory.
+        * Without this check, there is a risk that slow memory
+        * could be mixed in a node with faster memory, causing
+        * unavoidable performance issues.
+        */
+       numa_node = dev_dax->target_node;
+       if (numa_node < 0) {
+               dev_warn(dev, "rejecting DAX region %pR with invalid node: %d\n",
+                        res, numa_node);
+               return -EINVAL;
+       }
+
+       /* Hotplug starting at the beginning of the next block: */
+       kmem_start = ALIGN(res->start, memory_block_size_bytes());
+
+       kmem_size = resource_size(res);
+       /* Adjust the size down to compensate for moving up kmem_start: */
+       kmem_size -= kmem_start - res->start;
+       /* Align the size down to cover only complete blocks: */
+       kmem_size &= ~(memory_block_size_bytes() - 1);
+       kmem_end = kmem_start + kmem_size;
+
+       /* Region is permanently reserved.  Hot-remove not yet implemented. */
+       new_res = request_mem_region(kmem_start, kmem_size, dev_name(dev));
+       if (!new_res) {
+               dev_warn(dev, "could not reserve region [%pa-%pa]\n",
+                        &kmem_start, &kmem_end);
+               return -EBUSY;
+       }
+
+       /*
+        * Set flags appropriate for System RAM.  Leave ..._BUSY clear
+        * so that add_memory() can add a child resource.  Do not
+        * inherit flags from the parent since it may set new flags
+        * unknown to us that will break add_memory() below.
+        */
+       new_res->flags = IORESOURCE_SYSTEM_RAM;
+       new_res->name = dev_name(dev);
+
+       rc = add_memory(numa_node, new_res->start, resource_size(new_res));
+       if (rc)
+               return rc;
+
+       return 0;
+}
+
+static int dev_dax_kmem_remove(struct device *dev)
+{
+       /*
+        * Purposely leak the request_mem_region() for the device-dax
+        * range and return '0' to ->remove() attempts. The removal of
+        * the device from the driver always succeeds, but the region
+        * is permanently pinned as reserved by the unreleased
+        * request_mem_region().
+        */
+       return 0;
+}
+
+static struct dax_device_driver device_dax_kmem_driver = {
+       .drv = {
+               .probe = dev_dax_kmem_probe,
+               .remove = dev_dax_kmem_remove,
+       },
+};
+
+static int __init dax_kmem_init(void)
+{
+       return dax_driver_register(&device_dax_kmem_driver);
+}
+
+static void __exit dax_kmem_exit(void)
+{
+       dax_driver_unregister(&device_dax_kmem_driver);
+}
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL v2");
+module_init(dax_kmem_init);
+module_exit(dax_kmem_exit);
+MODULE_ALIAS_DAX_DEVICE(0);
diff --git a/drivers/dax/pmem.c b/drivers/dax/pmem.c
deleted file mode 100644 (file)
index 2c1f459..0000000
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * Copyright(c) 2016 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- */
-#include <linux/percpu-refcount.h>
-#include <linux/memremap.h>
-#include <linux/module.h>
-#include <linux/pfn_t.h>
-#include "../nvdimm/pfn.h"
-#include "../nvdimm/nd.h"
-#include "device-dax.h"
-
-struct dax_pmem {
-       struct device *dev;
-       struct percpu_ref ref;
-       struct dev_pagemap pgmap;
-       struct completion cmp;
-};
-
-static struct dax_pmem *to_dax_pmem(struct percpu_ref *ref)
-{
-       return container_of(ref, struct dax_pmem, ref);
-}
-
-static void dax_pmem_percpu_release(struct percpu_ref *ref)
-{
-       struct dax_pmem *dax_pmem = to_dax_pmem(ref);
-
-       dev_dbg(dax_pmem->dev, "trace\n");
-       complete(&dax_pmem->cmp);
-}
-
-static void dax_pmem_percpu_exit(void *data)
-{
-       struct percpu_ref *ref = data;
-       struct dax_pmem *dax_pmem = to_dax_pmem(ref);
-
-       dev_dbg(dax_pmem->dev, "trace\n");
-       wait_for_completion(&dax_pmem->cmp);
-       percpu_ref_exit(ref);
-}
-
-static void dax_pmem_percpu_kill(struct percpu_ref *ref)
-{
-       struct dax_pmem *dax_pmem = to_dax_pmem(ref);
-
-       dev_dbg(dax_pmem->dev, "trace\n");
-       percpu_ref_kill(ref);
-}
-
-static int dax_pmem_probe(struct device *dev)
-{
-       void *addr;
-       struct resource res;
-       int rc, id, region_id;
-       struct nd_pfn_sb *pfn_sb;
-       struct dev_dax *dev_dax;
-       struct dax_pmem *dax_pmem;
-       struct nd_namespace_io *nsio;
-       struct dax_region *dax_region;
-       struct nd_namespace_common *ndns;
-       struct nd_dax *nd_dax = to_nd_dax(dev);
-       struct nd_pfn *nd_pfn = &nd_dax->nd_pfn;
-
-       ndns = nvdimm_namespace_common_probe(dev);
-       if (IS_ERR(ndns))
-               return PTR_ERR(ndns);
-       nsio = to_nd_namespace_io(&ndns->dev);
-
-       dax_pmem = devm_kzalloc(dev, sizeof(*dax_pmem), GFP_KERNEL);
-       if (!dax_pmem)
-               return -ENOMEM;
-
-       /* parse the 'pfn' info block via ->rw_bytes */
-       rc = devm_nsio_enable(dev, nsio);
-       if (rc)
-               return rc;
-       rc = nvdimm_setup_pfn(nd_pfn, &dax_pmem->pgmap);
-       if (rc)
-               return rc;
-       devm_nsio_disable(dev, nsio);
-
-       pfn_sb = nd_pfn->pfn_sb;
-
-       if (!devm_request_mem_region(dev, nsio->res.start,
-                               resource_size(&nsio->res),
-                               dev_name(&ndns->dev))) {
-               dev_warn(dev, "could not reserve region %pR\n", &nsio->res);
-               return -EBUSY;
-       }
-
-       dax_pmem->dev = dev;
-       init_completion(&dax_pmem->cmp);
-       rc = percpu_ref_init(&dax_pmem->ref, dax_pmem_percpu_release, 0,
-                       GFP_KERNEL);
-       if (rc)
-               return rc;
-
-       rc = devm_add_action(dev, dax_pmem_percpu_exit, &dax_pmem->ref);
-       if (rc) {
-               percpu_ref_exit(&dax_pmem->ref);
-               return rc;
-       }
-
-       dax_pmem->pgmap.ref = &dax_pmem->ref;
-       dax_pmem->pgmap.kill = dax_pmem_percpu_kill;
-       addr = devm_memremap_pages(dev, &dax_pmem->pgmap);
-       if (IS_ERR(addr))
-               return PTR_ERR(addr);
-
-       /* adjust the dax_region resource to the start of data */
-       memcpy(&res, &dax_pmem->pgmap.res, sizeof(res));
-       res.start += le64_to_cpu(pfn_sb->dataoff);
-
-       rc = sscanf(dev_name(&ndns->dev), "namespace%d.%d", &region_id, &id);
-       if (rc != 2)
-               return -EINVAL;
-
-       dax_region = alloc_dax_region(dev, region_id, &res,
-                       le32_to_cpu(pfn_sb->align), addr, PFN_DEV|PFN_MAP);
-       if (!dax_region)
-               return -ENOMEM;
-
-       /* TODO: support for subdividing a dax region... */
-       dev_dax = devm_create_dev_dax(dax_region, id, &res, 1);
-
-       /* child dev_dax instances now own the lifetime of the dax_region */
-       dax_region_put(dax_region);
-
-       return PTR_ERR_OR_ZERO(dev_dax);
-}
-
-static struct nd_device_driver dax_pmem_driver = {
-       .probe = dax_pmem_probe,
-       .drv = {
-               .name = "dax_pmem",
-       },
-       .type = ND_DRIVER_DAX_PMEM,
-};
-
-module_nd_driver(dax_pmem_driver);
-
-MODULE_LICENSE("GPL v2");
-MODULE_AUTHOR("Intel Corporation");
-MODULE_ALIAS_ND_DEVICE(ND_DEVICE_DAX_PMEM);
diff --git a/drivers/dax/pmem/Makefile b/drivers/dax/pmem/Makefile
new file mode 100644 (file)
index 0000000..e2e79bd
--- /dev/null
@@ -0,0 +1,7 @@
+obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o
+obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem_core.o
+obj-$(CONFIG_DEV_DAX_PMEM_COMPAT) += dax_pmem_compat.o
+
+dax_pmem-y := pmem.o
+dax_pmem_core-y := core.o
+dax_pmem_compat-y := compat.o
diff --git a/drivers/dax/pmem/compat.c b/drivers/dax/pmem/compat.c
new file mode 100644 (file)
index 0000000..d7b15e6
--- /dev/null
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2016 - 2018 Intel Corporation. All rights reserved. */
+#include <linux/percpu-refcount.h>
+#include <linux/memremap.h>
+#include <linux/module.h>
+#include <linux/pfn_t.h>
+#include <linux/nd.h>
+#include "../bus.h"
+
+/* we need the private definitions to implement compat suport */
+#include "../dax-private.h"
+
+static int dax_pmem_compat_probe(struct device *dev)
+{
+       struct dev_dax *dev_dax = __dax_pmem_probe(dev, DEV_DAX_CLASS);
+       int rc;
+
+       if (IS_ERR(dev_dax))
+               return PTR_ERR(dev_dax);
+
+        if (!devres_open_group(&dev_dax->dev, dev_dax, GFP_KERNEL))
+               return -ENOMEM;
+
+       device_lock(&dev_dax->dev);
+       rc = dev_dax_probe(&dev_dax->dev);
+       device_unlock(&dev_dax->dev);
+
+       devres_close_group(&dev_dax->dev, dev_dax);
+       if (rc)
+               devres_release_group(&dev_dax->dev, dev_dax);
+
+       return rc;
+}
+
+static int dax_pmem_compat_release(struct device *dev, void *data)
+{
+       device_lock(dev);
+       devres_release_group(dev, to_dev_dax(dev));
+       device_unlock(dev);
+
+       return 0;
+}
+
+static int dax_pmem_compat_remove(struct device *dev)
+{
+       device_for_each_child(dev, NULL, dax_pmem_compat_release);
+       return 0;
+}
+
+static struct nd_device_driver dax_pmem_compat_driver = {
+       .probe = dax_pmem_compat_probe,
+       .remove = dax_pmem_compat_remove,
+       .drv = {
+               .name = "dax_pmem_compat",
+       },
+       .type = ND_DRIVER_DAX_PMEM,
+};
+
+static int __init dax_pmem_compat_init(void)
+{
+       return nd_driver_register(&dax_pmem_compat_driver);
+}
+module_init(dax_pmem_compat_init);
+
+static void __exit dax_pmem_compat_exit(void)
+{
+       driver_unregister(&dax_pmem_compat_driver.drv);
+}
+module_exit(dax_pmem_compat_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Intel Corporation");
+MODULE_ALIAS_ND_DEVICE(ND_DEVICE_DAX_PMEM);
diff --git a/drivers/dax/pmem/core.c b/drivers/dax/pmem/core.c
new file mode 100644 (file)
index 0000000..f71019c
--- /dev/null
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2016 - 2018 Intel Corporation. All rights reserved. */
+#include <linux/memremap.h>
+#include <linux/module.h>
+#include <linux/pfn_t.h>
+#include "../../nvdimm/pfn.h"
+#include "../../nvdimm/nd.h"
+#include "../bus.h"
+
+struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys)
+{
+       struct resource res;
+       int rc, id, region_id;
+       resource_size_t offset;
+       struct nd_pfn_sb *pfn_sb;
+       struct dev_dax *dev_dax;
+       struct nd_namespace_io *nsio;
+       struct dax_region *dax_region;
+       struct dev_pagemap pgmap = { 0 };
+       struct nd_namespace_common *ndns;
+       struct nd_dax *nd_dax = to_nd_dax(dev);
+       struct nd_pfn *nd_pfn = &nd_dax->nd_pfn;
+       struct nd_region *nd_region = to_nd_region(dev->parent);
+
+       ndns = nvdimm_namespace_common_probe(dev);
+       if (IS_ERR(ndns))
+               return ERR_CAST(ndns);
+       nsio = to_nd_namespace_io(&ndns->dev);
+
+       /* parse the 'pfn' info block via ->rw_bytes */
+       rc = devm_nsio_enable(dev, nsio);
+       if (rc)
+               return ERR_PTR(rc);
+       rc = nvdimm_setup_pfn(nd_pfn, &pgmap);
+       if (rc)
+               return ERR_PTR(rc);
+       devm_nsio_disable(dev, nsio);
+
+       /* reserve the metadata area, device-dax will reserve the data */
+        pfn_sb = nd_pfn->pfn_sb;
+       offset = le64_to_cpu(pfn_sb->dataoff);
+       if (!devm_request_mem_region(dev, nsio->res.start, offset,
+                               dev_name(&ndns->dev))) {
+                dev_warn(dev, "could not reserve metadata\n");
+               return ERR_PTR(-EBUSY);
+        }
+
+       rc = sscanf(dev_name(&ndns->dev), "namespace%d.%d", &region_id, &id);
+       if (rc != 2)
+               return ERR_PTR(-EINVAL);
+
+       /* adjust the dax_region resource to the start of data */
+       memcpy(&res, &pgmap.res, sizeof(res));
+       res.start += offset;
+       dax_region = alloc_dax_region(dev, region_id, &res,
+                       nd_region->target_node, le32_to_cpu(pfn_sb->align),
+                       PFN_DEV|PFN_MAP);
+       if (!dax_region)
+               return ERR_PTR(-ENOMEM);
+
+       dev_dax = __devm_create_dev_dax(dax_region, id, &pgmap, subsys);
+
+       /* child dev_dax instances now own the lifetime of the dax_region */
+       dax_region_put(dax_region);
+
+       return dev_dax;
+}
+EXPORT_SYMBOL_GPL(__dax_pmem_probe);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Intel Corporation");
diff --git a/drivers/dax/pmem/pmem.c b/drivers/dax/pmem/pmem.c
new file mode 100644 (file)
index 0000000..0ae4238
--- /dev/null
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2016 - 2018 Intel Corporation. All rights reserved. */
+#include <linux/percpu-refcount.h>
+#include <linux/memremap.h>
+#include <linux/module.h>
+#include <linux/pfn_t.h>
+#include <linux/nd.h>
+#include "../bus.h"
+
+static int dax_pmem_probe(struct device *dev)
+{
+       return PTR_ERR_OR_ZERO(__dax_pmem_probe(dev, DEV_DAX_BUS));
+}
+
+static struct nd_device_driver dax_pmem_driver = {
+       .probe = dax_pmem_probe,
+       .drv = {
+               .name = "dax_pmem",
+       },
+       .type = ND_DRIVER_DAX_PMEM,
+};
+
+static int __init dax_pmem_init(void)
+{
+       return nd_driver_register(&dax_pmem_driver);
+}
+module_init(dax_pmem_init);
+
+static void __exit dax_pmem_exit(void)
+{
+       driver_unregister(&dax_pmem_driver.drv);
+}
+module_exit(dax_pmem_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Intel Corporation");
+#if !IS_ENABLED(CONFIG_DEV_DAX_PMEM_COMPAT)
+/* For compat builds, don't load this module by default */
+MODULE_ALIAS_ND_DEVICE(ND_DEVICE_DAX_PMEM);
+#endif
index 0cb8c30ea27882ee14ae52254f8d5a379e95b0a5..0a339b85133e1e2ea621dd24f6f42b637e6dac7e 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/uio.h>
 #include <linux/dax.h>
 #include <linux/fs.h>
+#include "dax-private.h"
 
 static dev_t dax_devt;
 DEFINE_STATIC_SRCU(dax_srcu);
@@ -383,11 +384,15 @@ void kill_dax(struct dax_device *dax_dev)
        spin_lock(&dax_host_lock);
        hlist_del_init(&dax_dev->list);
        spin_unlock(&dax_host_lock);
-
-       dax_dev->private = NULL;
 }
 EXPORT_SYMBOL_GPL(kill_dax);
 
+void run_dax(struct dax_device *dax_dev)
+{
+       set_bit(DAXDEV_ALIVE, &dax_dev->flags);
+}
+EXPORT_SYMBOL_GPL(run_dax);
+
 static struct inode *dax_alloc_inode(struct super_block *sb)
 {
        struct dax_device *dax_dev;
@@ -602,6 +607,8 @@ EXPORT_SYMBOL_GPL(dax_inode);
 
 void *dax_get_private(struct dax_device *dax_dev)
 {
+       if (!test_bit(DAXDEV_ALIVE, &dax_dev->flags))
+               return NULL;
        return dax_dev->private;
 }
 EXPORT_SYMBOL_GPL(dax_get_private);
@@ -615,7 +622,7 @@ static void init_once(void *_dax_dev)
        inode_init_once(inode);
 }
 
-static int __dax_fs_init(void)
+static int dax_fs_init(void)
 {
        int rc;
 
@@ -647,35 +654,45 @@ static int __dax_fs_init(void)
        return rc;
 }
 
-static void __dax_fs_exit(void)
+static void dax_fs_exit(void)
 {
        kern_unmount(dax_mnt);
        unregister_filesystem(&dax_fs_type);
        kmem_cache_destroy(dax_cache);
 }
 
-static int __init dax_fs_init(void)
+static int __init dax_core_init(void)
 {
        int rc;
 
-       rc = __dax_fs_init();
+       rc = dax_fs_init();
        if (rc)
                return rc;
 
        rc = alloc_chrdev_region(&dax_devt, 0, MINORMASK+1, "dax");
        if (rc)
-               __dax_fs_exit();
-       return rc;
+               goto err_chrdev;
+
+       rc = dax_bus_init();
+       if (rc)
+               goto err_bus;
+       return 0;
+
+err_bus:
+       unregister_chrdev_region(dax_devt, MINORMASK+1);
+err_chrdev:
+       dax_fs_exit();
+       return 0;
 }
 
-static void __exit dax_fs_exit(void)
+static void __exit dax_core_exit(void)
 {
        unregister_chrdev_region(dax_devt, MINORMASK+1);
        ida_destroy(&dax_minor_ida);
-       __dax_fs_exit();
+       dax_fs_exit();
 }
 
 MODULE_AUTHOR("Intel Corporation");
 MODULE_LICENSE("GPL v2");
-subsys_initcall(dax_fs_init);
-module_exit(dax_fs_exit);
+subsys_initcall(dax_core_init);
+module_exit(dax_core_exit);
index ead851413c0aa054c6c8e6a74d23a7cc739a5fa9..bfa9062ce6b9fed957a5e52c592dc57ff257a02e 100644 (file)
@@ -947,10 +947,6 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
                if (r)
                        return r;
 
-               r = amdgpu_vm_clear_bo(adev, vm, pt, cursor.level, ats);
-               if (r)
-                       goto error_free_pt;
-
                if (vm->use_cpu_for_update) {
                        r = amdgpu_bo_kmap(pt, NULL);
                        if (r)
@@ -963,6 +959,10 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
                pt->parent = amdgpu_bo_ref(cursor.parent->base.bo);
 
                amdgpu_vm_bo_base_init(&entry->base, vm, pt);
+
+               r = amdgpu_vm_clear_bo(adev, vm, pt, cursor.level, ats);
+               if (r)
+                       goto error_free_pt;
        }
 
        return 0;
@@ -3033,13 +3033,14 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
        if (r)
                goto error_unreserve;
 
+       amdgpu_vm_bo_base_init(&vm->root.base, vm, root);
+
        r = amdgpu_vm_clear_bo(adev, vm, root,
                               adev->vm_manager.root_level,
                               vm->pte_support_ats);
        if (r)
                goto error_unreserve;
 
-       amdgpu_vm_bo_base_init(&vm->root.base, vm, root);
        amdgpu_bo_unreserve(vm->root.base.bo);
 
        if (pasid) {
index 5533f6e4f4a48be84351c2429b8202862f827988..d0309e8c9d12cdafa95d2a23e84018f4bb6b8035 100644 (file)
@@ -220,6 +220,7 @@ static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
 
 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
 {
+       SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
 };
index c63de945c0214d6321da38c7a70a60f52561c5b1..0487e3a4e9e783c603e54720304e89e260c5c440 100644 (file)
@@ -500,9 +500,7 @@ static bool psp_v3_1_smu_reload_quirk(struct psp_context *psp)
        struct amdgpu_device *adev = psp->adev;
        uint32_t reg;
 
-       reg = smnMP1_FIRMWARE_FLAGS | 0x03b00000;
-       WREG32_SOC15(NBIO, 0, mmPCIE_INDEX2, reg);
-       reg = RREG32_SOC15(NBIO, 0, mmPCIE_DATA2);
+       reg = RREG32_PCIE(smnMP1_FIRMWARE_FLAGS | 0x03b00000);
        return (reg & MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED_MASK) ? true : false;
 }
 
index 99ebcf29dcb0fdc35b92872844025884bd35b002..ed89a101f73f387139a309b35a372b5b7299de64 100644 (file)
@@ -461,7 +461,6 @@ static int soc15_asic_reset(struct amdgpu_device *adev)
 
        switch (adev->asic_type) {
        case CHIP_VEGA10:
-       case CHIP_VEGA20:
                soc15_asic_get_baco_capability(adev, &baco_reset);
                break;
        default:
index 47243165a082a5221b56fa05929f4f5b46fa8081..ae90a99909efeced0641a2283863bc679e03555e 100644 (file)
@@ -323,57 +323,7 @@ static int init_mqd_hiq(struct mqd_manager *mm, void **mqd,
                struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
                struct queue_properties *q)
 {
-       uint64_t addr;
-       struct cik_mqd *m;
-       int retval;
-
-       retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct cik_mqd),
-                                       mqd_mem_obj);
-
-       if (retval != 0)
-               return -ENOMEM;
-
-       m = (struct cik_mqd *) (*mqd_mem_obj)->cpu_ptr;
-       addr = (*mqd_mem_obj)->gpu_addr;
-
-       memset(m, 0, ALIGN(sizeof(struct cik_mqd), 256));
-
-       m->header = 0xC0310800;
-       m->compute_pipelinestat_enable = 1;
-       m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF;
-       m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF;
-       m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF;
-       m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF;
-
-       m->cp_hqd_persistent_state = DEFAULT_CP_HQD_PERSISTENT_STATE |
-                                       PRELOAD_REQ;
-       m->cp_hqd_quantum = QUANTUM_EN | QUANTUM_SCALE_1MS |
-                               QUANTUM_DURATION(10);
-
-       m->cp_mqd_control             = MQD_CONTROL_PRIV_STATE_EN;
-       m->cp_mqd_base_addr_lo        = lower_32_bits(addr);
-       m->cp_mqd_base_addr_hi        = upper_32_bits(addr);
-
-       m->cp_hqd_ib_control = DEFAULT_MIN_IB_AVAIL_SIZE;
-
-       /*
-        * Pipe Priority
-        * Identifies the pipe relative priority when this queue is connected
-        * to the pipeline. The pipe priority is against the GFX pipe and HP3D.
-        * In KFD we are using a fixed pipe priority set to CS_MEDIUM.
-        * 0 = CS_LOW (typically below GFX)
-        * 1 = CS_MEDIUM (typically between HP3D and GFX
-        * 2 = CS_HIGH (typically above HP3D)
-        */
-       m->cp_hqd_pipe_priority = 1;
-       m->cp_hqd_queue_priority = 15;
-
-       *mqd = m;
-       if (gart_addr)
-               *gart_addr = addr;
-       retval = mm->update_mqd(mm, m, q);
-
-       return retval;
+       return init_mqd(mm, mqd, mqd_mem_obj, gart_addr, q);
 }
 
 static int update_mqd_hiq(struct mqd_manager *mm, void *mqd,
index 2f26581b93ff5c4bacf77f3ca9d5619a2d979e32..fb27783d7a542d565e1c002d03fc051d055039be 100644 (file)
@@ -886,6 +886,7 @@ static void emulated_link_detect(struct dc_link *link)
                return;
        }
 
+       /* dc_sink_create returns a new reference */
        link->local_sink = sink;
 
        edid_status = dm_helpers_read_local_edid(
@@ -952,6 +953,8 @@ static int dm_resume(void *handle)
                if (aconnector->fake_enable && aconnector->dc_link->local_sink)
                        aconnector->fake_enable = false;
 
+               if (aconnector->dc_sink)
+                       dc_sink_release(aconnector->dc_sink);
                aconnector->dc_sink = NULL;
                amdgpu_dm_update_connector_after_detect(aconnector);
                mutex_unlock(&aconnector->hpd_lock);
@@ -1061,6 +1064,8 @@ amdgpu_dm_update_connector_after_detect(struct amdgpu_dm_connector *aconnector)
 
 
        sink = aconnector->dc_link->local_sink;
+       if (sink)
+               dc_sink_retain(sink);
 
        /*
         * Edid mgmt connector gets first update only in mode_valid hook and then
@@ -1085,21 +1090,24 @@ amdgpu_dm_update_connector_after_detect(struct amdgpu_dm_connector *aconnector)
                                 * to it anymore after disconnect, so on next crtc to connector
                                 * reshuffle by UMD we will get into unwanted dc_sink release
                                 */
-                               if (aconnector->dc_sink != aconnector->dc_em_sink)
-                                       dc_sink_release(aconnector->dc_sink);
+                               dc_sink_release(aconnector->dc_sink);
                        }
                        aconnector->dc_sink = sink;
+                       dc_sink_retain(aconnector->dc_sink);
                        amdgpu_dm_update_freesync_caps(connector,
                                        aconnector->edid);
                } else {
                        amdgpu_dm_update_freesync_caps(connector, NULL);
-                       if (!aconnector->dc_sink)
+                       if (!aconnector->dc_sink) {
                                aconnector->dc_sink = aconnector->dc_em_sink;
-                       else if (aconnector->dc_sink != aconnector->dc_em_sink)
                                dc_sink_retain(aconnector->dc_sink);
+                       }
                }
 
                mutex_unlock(&dev->mode_config.mutex);
+
+               if (sink)
+                       dc_sink_release(sink);
                return;
        }
 
@@ -1107,8 +1115,10 @@ amdgpu_dm_update_connector_after_detect(struct amdgpu_dm_connector *aconnector)
         * TODO: temporary guard to look for proper fix
         * if this sink is MST sink, we should not do anything
         */
-       if (sink && sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT_MST)
+       if (sink && sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT_MST) {
+               dc_sink_release(sink);
                return;
+       }
 
        if (aconnector->dc_sink == sink) {
                /*
@@ -1117,6 +1127,8 @@ amdgpu_dm_update_connector_after_detect(struct amdgpu_dm_connector *aconnector)
                 */
                DRM_DEBUG_DRIVER("DCHPD: connector_id=%d: dc_sink didn't change.\n",
                                aconnector->connector_id);
+               if (sink)
+                       dc_sink_release(sink);
                return;
        }
 
@@ -1138,6 +1150,7 @@ amdgpu_dm_update_connector_after_detect(struct amdgpu_dm_connector *aconnector)
                        amdgpu_dm_update_freesync_caps(connector, NULL);
 
                aconnector->dc_sink = sink;
+               dc_sink_retain(aconnector->dc_sink);
                if (sink->dc_edid.length == 0) {
                        aconnector->edid = NULL;
                        drm_dp_cec_unset_edid(&aconnector->dm_dp_aux.aux);
@@ -1158,11 +1171,15 @@ amdgpu_dm_update_connector_after_detect(struct amdgpu_dm_connector *aconnector)
                amdgpu_dm_update_freesync_caps(connector, NULL);
                drm_connector_update_edid_property(connector, NULL);
                aconnector->num_modes = 0;
+               dc_sink_release(aconnector->dc_sink);
                aconnector->dc_sink = NULL;
                aconnector->edid = NULL;
        }
 
        mutex_unlock(&dev->mode_config.mutex);
+
+       if (sink)
+               dc_sink_release(sink);
 }
 
 static void handle_hpd_irq(void *param)
@@ -2977,6 +2994,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
                        return stream;
        } else {
                sink = aconnector->dc_sink;
+               dc_sink_retain(sink);
        }
 
        stream = dc_create_stream_for_sink(sink);
@@ -3042,8 +3060,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
        update_stream_signal(stream, sink);
 
 finish:
-       if (sink && sink->sink_signal == SIGNAL_TYPE_VIRTUAL && aconnector->base.force != DRM_FORCE_ON)
-               dc_sink_release(sink);
+       dc_sink_release(sink);
 
        return stream;
 }
@@ -3301,6 +3318,14 @@ static void amdgpu_dm_connector_destroy(struct drm_connector *connector)
                dm->backlight_dev = NULL;
        }
 #endif
+
+       if (aconnector->dc_em_sink)
+               dc_sink_release(aconnector->dc_em_sink);
+       aconnector->dc_em_sink = NULL;
+       if (aconnector->dc_sink)
+               dc_sink_release(aconnector->dc_sink);
+       aconnector->dc_sink = NULL;
+
        drm_dp_cec_unregister_connector(&aconnector->dm_dp_aux.aux);
        drm_connector_unregister(connector);
        drm_connector_cleanup(connector);
@@ -3398,10 +3423,12 @@ static void create_eml_sink(struct amdgpu_dm_connector *aconnector)
                (edid->extensions + 1) * EDID_LENGTH,
                &init_params);
 
-       if (aconnector->base.force == DRM_FORCE_ON)
+       if (aconnector->base.force == DRM_FORCE_ON) {
                aconnector->dc_sink = aconnector->dc_link->local_sink ?
                aconnector->dc_link->local_sink :
                aconnector->dc_em_sink;
+               dc_sink_retain(aconnector->dc_sink);
+       }
 }
 
 static void handle_edid_mgmt(struct amdgpu_dm_connector *aconnector)
index f51d52eb52e6d23f269cb3d7fa6745fb677bc002..c4ea3a91f17aa44910e13a92932727b3f4388f96 100644 (file)
@@ -191,6 +191,7 @@ static int dm_dp_mst_get_modes(struct drm_connector *connector)
                        &init_params);
 
                dc_sink->priv = aconnector;
+               /* dc_link_add_remote_sink returns a new reference */
                aconnector->dc_sink = dc_sink;
 
                if (aconnector->dc_sink)
index 12d1842079ae5e3902d2fe7ea9719a7f35ebd2ef..eb62d10bb65cd75843a2e385b7ff9f348a266b13 100644 (file)
@@ -1348,12 +1348,12 @@ void dcn_bw_update_from_pplib(struct dc *dc)
        struct dm_pp_clock_levels_with_voltage fclks = {0}, dcfclks = {0};
        bool res;
 
-       kernel_fpu_begin();
-
        /* TODO: This is not the proper way to obtain fabric_and_dram_bandwidth, should be min(fclk, memclk) */
        res = dm_pp_get_clock_levels_by_type_with_voltage(
                        ctx, DM_PP_CLOCK_TYPE_FCLK, &fclks);
 
+       kernel_fpu_begin();
+
        if (res)
                res = verify_clock_values(&fclks);
 
@@ -1372,9 +1372,13 @@ void dcn_bw_update_from_pplib(struct dc *dc)
        } else
                BREAK_TO_DEBUGGER();
 
+       kernel_fpu_end();
+
        res = dm_pp_get_clock_levels_by_type_with_voltage(
                        ctx, DM_PP_CLOCK_TYPE_DCFCLK, &dcfclks);
 
+       kernel_fpu_begin();
+
        if (res)
                res = verify_clock_values(&dcfclks);
 
index 7f5a947ad31dfd7f6315cf982bdebd13f6fda09a..4eba3c4800b63bef00ec9fd532919aa84ca72126 100644 (file)
@@ -794,6 +794,7 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason)
                sink->link->dongle_max_pix_clk = sink_caps.max_hdmi_pixel_clock;
                sink->converter_disable_audio = converter_disable_audio;
 
+               /* dc_sink_create returns a new reference */
                link->local_sink = sink;
 
                edid_status = dm_helpers_read_local_edid(
@@ -2037,6 +2038,9 @@ static enum dc_status enable_link(
                break;
        }
 
+       if (status == DC_OK)
+               pipe_ctx->stream->link->link_status.link_active = true;
+
        return status;
 }
 
@@ -2060,6 +2064,14 @@ static void disable_link(struct dc_link *link, enum signal_type signal)
                        dp_disable_link_phy_mst(link, signal);
        } else
                link->link_enc->funcs->disable_output(link->link_enc, signal);
+
+       if (signal == SIGNAL_TYPE_DISPLAY_PORT_MST) {
+               /* MST disable link only when no stream use the link */
+               if (link->mst_stream_alloc_table.stream_count <= 0)
+                       link->link_status.link_active = false;
+       } else {
+               link->link_status.link_active = false;
+       }
 }
 
 static bool dp_active_dongle_validate_timing(
@@ -2623,8 +2635,6 @@ void core_link_enable_stream(
                        }
                }
 
-               stream->link->link_status.link_active = true;
-
                core_dc->hwss.enable_audio_stream(pipe_ctx);
 
                /* turn off otg test pattern if enable */
@@ -2659,8 +2669,6 @@ void core_link_disable_stream(struct pipe_ctx *pipe_ctx, int option)
        core_dc->hwss.disable_stream(pipe_ctx, option);
 
        disable_link(pipe_ctx->stream->link, pipe_ctx->stream->signal);
-
-       pipe_ctx->stream->link->link_status.link_active = false;
 }
 
 void core_link_set_avmute(struct pipe_ctx *pipe_ctx, bool enable)
index 94a84bc57c7a6a5db2604fbaf4d2ef9ae40e445d..bfd27f10879e98bb866b6877fc9c0a8b1e4188b5 100644 (file)
@@ -724,7 +724,7 @@ static void build_vrr_infopacket_v1(enum signal_type signal,
 
 static void build_vrr_infopacket_v2(enum signal_type signal,
                const struct mod_vrr_params *vrr,
-               const enum color_transfer_func *app_tf,
+               enum color_transfer_func app_tf,
                struct dc_info_packet *infopacket)
 {
        unsigned int payload_size = 0;
@@ -732,8 +732,7 @@ static void build_vrr_infopacket_v2(enum signal_type signal,
        build_vrr_infopacket_header_v2(signal, infopacket, &payload_size);
        build_vrr_infopacket_data(vrr, infopacket);
 
-       if (app_tf != NULL)
-               build_vrr_infopacket_fs2_data(*app_tf, infopacket);
+       build_vrr_infopacket_fs2_data(app_tf, infopacket);
 
        build_vrr_infopacket_checksum(&payload_size, infopacket);
 
@@ -757,7 +756,7 @@ void mod_freesync_build_vrr_infopacket(struct mod_freesync *mod_freesync,
                const struct dc_stream_state *stream,
                const struct mod_vrr_params *vrr,
                enum vrr_packet_type packet_type,
-               const enum color_transfer_func *app_tf,
+               enum color_transfer_func app_tf,
                struct dc_info_packet *infopacket)
 {
        /* SPD info packet for FreeSync
index 4222e403b15150f43d50e75822844b47570107e4..dcef85994c45d189065f8723f1c7b9471538e5ca 100644 (file)
@@ -145,7 +145,7 @@ void mod_freesync_build_vrr_infopacket(struct mod_freesync *mod_freesync,
                const struct dc_stream_state *stream,
                const struct mod_vrr_params *vrr,
                enum vrr_packet_type packet_type,
-               const enum color_transfer_func *app_tf,
+               enum color_transfer_func app_tf,
                struct dc_info_packet *infopacket);
 
 void mod_freesync_build_vrr_params(struct mod_freesync *mod_freesync,
index ce177d7f04cbe9ad930d01650a5ede1995e9a135..6bf48934fdc4d465b9c28cb99a39c0111d025ea7 100644 (file)
@@ -277,8 +277,7 @@ int psm_adjust_power_state_dynamic(struct pp_hwmgr *hwmgr, bool skip_display_set
        if (!skip_display_settings)
                phm_notify_smc_display_config_after_ps_adjustment(hwmgr);
 
-       if ((hwmgr->request_dpm_level != hwmgr->dpm_level) &&
-           !phm_force_dpm_levels(hwmgr, hwmgr->request_dpm_level))
+       if (!phm_force_dpm_levels(hwmgr, hwmgr->request_dpm_level))
                hwmgr->dpm_level = hwmgr->request_dpm_level;
 
        if (hwmgr->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL) {
index 4588bddf8b33289d9fdf31a392ffcfc911aabde3..615cf2c09e54e73e55aa06e162426a97ba3474e9 100644 (file)
@@ -489,15 +489,16 @@ int pp_atomfwctrl_get_gpio_information(struct pp_hwmgr *hwmgr,
 }
 
 int pp_atomfwctrl_get_clk_information_by_clkid(struct pp_hwmgr *hwmgr,
-                                              uint8_t id, uint32_t *frequency)
+                                              uint8_t clk_id, uint8_t syspll_id,
+                                              uint32_t *frequency)
 {
        struct amdgpu_device *adev = hwmgr->adev;
        struct atom_get_smu_clock_info_parameters_v3_1   parameters;
        struct atom_get_smu_clock_info_output_parameters_v3_1 *output;
        uint32_t ix;
 
-       parameters.clk_id = id;
-       parameters.syspll_id = 0;
+       parameters.clk_id = clk_id;
+       parameters.syspll_id = syspll_id;
        parameters.command = GET_SMU_CLOCK_INFO_V3_1_GET_CLOCK_FREQ;
        parameters.dfsdid = 0;
 
@@ -530,20 +531,23 @@ static void pp_atomfwctrl_copy_vbios_bootup_values_3_2(struct pp_hwmgr *hwmgr,
        boot_values->ulSocClk   = 0;
        boot_values->ulDCEFClk   = 0;
 
-       if (!pp_atomfwctrl_get_clk_information_by_clkid(hwmgr, SMU11_SYSPLL0_SOCCLK_ID, &frequency))
+       if (!pp_atomfwctrl_get_clk_information_by_clkid(hwmgr, SMU11_SYSPLL0_SOCCLK_ID, SMU11_SYSPLL0_ID, &frequency))
                boot_values->ulSocClk   = frequency;
 
-       if (!pp_atomfwctrl_get_clk_information_by_clkid(hwmgr, SMU11_SYSPLL0_DCEFCLK_ID, &frequency))
+       if (!pp_atomfwctrl_get_clk_information_by_clkid(hwmgr, SMU11_SYSPLL0_DCEFCLK_ID, SMU11_SYSPLL0_ID, &frequency))
                boot_values->ulDCEFClk  = frequency;
 
-       if (!pp_atomfwctrl_get_clk_information_by_clkid(hwmgr, SMU11_SYSPLL0_ECLK_ID, &frequency))
+       if (!pp_atomfwctrl_get_clk_information_by_clkid(hwmgr, SMU11_SYSPLL0_ECLK_ID, SMU11_SYSPLL0_ID, &frequency))
                boot_values->ulEClk     = frequency;
 
-       if (!pp_atomfwctrl_get_clk_information_by_clkid(hwmgr, SMU11_SYSPLL0_VCLK_ID, &frequency))
+       if (!pp_atomfwctrl_get_clk_information_by_clkid(hwmgr, SMU11_SYSPLL0_VCLK_ID, SMU11_SYSPLL0_ID, &frequency))
                boot_values->ulVClk     = frequency;
 
-       if (!pp_atomfwctrl_get_clk_information_by_clkid(hwmgr, SMU11_SYSPLL0_DCLK_ID, &frequency))
+       if (!pp_atomfwctrl_get_clk_information_by_clkid(hwmgr, SMU11_SYSPLL0_DCLK_ID, SMU11_SYSPLL0_ID, &frequency))
                boot_values->ulDClk     = frequency;
+
+       if (!pp_atomfwctrl_get_clk_information_by_clkid(hwmgr, SMU11_SYSPLL1_0_FCLK_ID, SMU11_SYSPLL1_2_ID, &frequency))
+               boot_values->ulFClk     = frequency;
 }
 
 static void pp_atomfwctrl_copy_vbios_bootup_values_3_1(struct pp_hwmgr *hwmgr,
@@ -563,19 +567,19 @@ static void pp_atomfwctrl_copy_vbios_bootup_values_3_1(struct pp_hwmgr *hwmgr,
        boot_values->ulSocClk   = 0;
        boot_values->ulDCEFClk   = 0;
 
-       if (!pp_atomfwctrl_get_clk_information_by_clkid(hwmgr, SMU9_SYSPLL0_SOCCLK_ID, &frequency))
+       if (!pp_atomfwctrl_get_clk_information_by_clkid(hwmgr, SMU9_SYSPLL0_SOCCLK_ID, 0, &frequency))
                boot_values->ulSocClk   = frequency;
 
-       if (!pp_atomfwctrl_get_clk_information_by_clkid(hwmgr, SMU9_SYSPLL0_DCEFCLK_ID, &frequency))
+       if (!pp_atomfwctrl_get_clk_information_by_clkid(hwmgr, SMU9_SYSPLL0_DCEFCLK_ID, 0, &frequency))
                boot_values->ulDCEFClk  = frequency;
 
-       if (!pp_atomfwctrl_get_clk_information_by_clkid(hwmgr, SMU9_SYSPLL0_ECLK_ID, &frequency))
+       if (!pp_atomfwctrl_get_clk_information_by_clkid(hwmgr, SMU9_SYSPLL0_ECLK_ID, 0, &frequency))
                boot_values->ulEClk     = frequency;
 
-       if (!pp_atomfwctrl_get_clk_information_by_clkid(hwmgr, SMU9_SYSPLL0_VCLK_ID, &frequency))
+       if (!pp_atomfwctrl_get_clk_information_by_clkid(hwmgr, SMU9_SYSPLL0_VCLK_ID, 0, &frequency))
                boot_values->ulVClk     = frequency;
 
-       if (!pp_atomfwctrl_get_clk_information_by_clkid(hwmgr, SMU9_SYSPLL0_DCLK_ID, &frequency))
+       if (!pp_atomfwctrl_get_clk_information_by_clkid(hwmgr, SMU9_SYSPLL0_DCLK_ID, 0, &frequency))
                boot_values->ulDClk     = frequency;
 }
 
index fe9e8ceef50e60f37f909b7101e941780b035537..b7e2651b570bcfa2e13f83aa34a5ed2c3dd2428b 100644 (file)
@@ -139,6 +139,7 @@ struct pp_atomfwctrl_bios_boot_up_values {
        uint32_t   ulEClk;
        uint32_t   ulVClk;
        uint32_t   ulDClk;
+       uint32_t   ulFClk;
        uint16_t   usVddc;
        uint16_t   usVddci;
        uint16_t   usMvddc;
@@ -236,7 +237,8 @@ int pp_atomfwctrl_get_vbios_bootup_values(struct pp_hwmgr *hwmgr,
 int pp_atomfwctrl_get_smc_dpm_information(struct pp_hwmgr *hwmgr,
                        struct pp_atomfwctrl_smc_dpm_parameters *param);
 int pp_atomfwctrl_get_clk_information_by_clkid(struct pp_hwmgr *hwmgr,
-                                       uint8_t id, uint32_t *frequency);
+                                       uint8_t clk_id, uint8_t syspll_id,
+                                       uint32_t *frequency);
 
 #endif
 
index 48187acac59e7bf1181565e852584fe856f26efc..83d3d935f3acc899cf682d7e4fa616094e746a58 100644 (file)
@@ -3491,14 +3491,14 @@ static int smu7_get_gpu_power(struct pp_hwmgr *hwmgr, u32 *query)
 
        smum_send_msg_to_smc(hwmgr, PPSMC_MSG_PmStatusLogStart);
        cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
-                                                       ixSMU_PM_STATUS_94, 0);
+                                                       ixSMU_PM_STATUS_95, 0);
 
        for (i = 0; i < 10; i++) {
-               mdelay(1);
+               mdelay(500);
                smum_send_msg_to_smc(hwmgr, PPSMC_MSG_PmStatusLogSample);
                tmp = cgs_read_ind_register(hwmgr->device,
                                                CGS_IND_REG__SMC,
-                                               ixSMU_PM_STATUS_94);
+                                               ixSMU_PM_STATUS_95);
                if (tmp != 0)
                        break;
        }
index 5479125ff4f6e974ed2395c75d748b44b0af65be..5c4f701939ea542b7a3cbbd9baa36b7a5958797e 100644 (file)
@@ -2575,10 +2575,10 @@ static int vega10_init_smc_table(struct pp_hwmgr *hwmgr)
                data->vbios_boot_state.gfx_clock = boot_up_values.ulGfxClk;
                data->vbios_boot_state.mem_clock = boot_up_values.ulUClk;
                pp_atomfwctrl_get_clk_information_by_clkid(hwmgr,
-                               SMU9_SYSPLL0_SOCCLK_ID, &boot_up_values.ulSocClk);
+                               SMU9_SYSPLL0_SOCCLK_ID, 0, &boot_up_values.ulSocClk);
 
                pp_atomfwctrl_get_clk_information_by_clkid(hwmgr,
-                               SMU9_SYSPLL0_DCEFCLK_ID, &boot_up_values.ulDCEFClk);
+                               SMU9_SYSPLL0_DCEFCLK_ID, 0, &boot_up_values.ulDCEFClk);
 
                data->vbios_boot_state.soc_clock = boot_up_values.ulSocClk;
                data->vbios_boot_state.dcef_clock = boot_up_values.ulDCEFClk;
@@ -4407,9 +4407,9 @@ static int vega10_set_ppfeature_status(struct pp_hwmgr *hwmgr, uint64_t new_ppfe
                return ret;
 
        features_to_disable =
-               (features_enabled ^ new_ppfeature_masks) & features_enabled;
+               features_enabled & ~new_ppfeature_masks;
        features_to_enable =
-               (features_enabled ^ new_ppfeature_masks) ^ features_to_disable;
+               ~features_enabled & new_ppfeature_masks;
 
        pr_debug("features_to_disable 0x%llx\n", features_to_disable);
        pr_debug("features_to_enable 0x%llx\n", features_to_enable);
index 6c8e78611c033c20a381f492c7ad01cca764e150..bdb48e94eff6082e4cf16b958925075e8dfd7466 100644 (file)
@@ -2009,9 +2009,9 @@ static int vega12_set_ppfeature_status(struct pp_hwmgr *hwmgr, uint64_t new_ppfe
                return ret;
 
        features_to_disable =
-               (features_enabled ^ new_ppfeature_masks) & features_enabled;
+               features_enabled & ~new_ppfeature_masks;
        features_to_enable =
-               (features_enabled ^ new_ppfeature_masks) ^ features_to_disable;
+               ~features_enabled & new_ppfeature_masks;
 
        pr_debug("features_to_disable 0x%llx\n", features_to_disable);
        pr_debug("features_to_enable 0x%llx\n", features_to_enable);
index aad79affb08123a6acb21dcf03d80bbfbf5609d3..9aa7bec1b5fe6f3aeb67da66d4b88b2e16966bbb 100644 (file)
@@ -463,9 +463,9 @@ static int vega20_setup_asic_task(struct pp_hwmgr *hwmgr)
 static void vega20_init_dpm_state(struct vega20_dpm_state *dpm_state)
 {
        dpm_state->soft_min_level = 0x0;
-       dpm_state->soft_max_level = 0xffff;
+       dpm_state->soft_max_level = VG20_CLOCK_MAX_DEFAULT;
        dpm_state->hard_min_level = 0x0;
-       dpm_state->hard_max_level = 0xffff;
+       dpm_state->hard_max_level = VG20_CLOCK_MAX_DEFAULT;
 }
 
 static int vega20_get_number_of_dpm_level(struct pp_hwmgr *hwmgr,
@@ -711,8 +711,10 @@ static int vega20_setup_default_dpm_tables(struct pp_hwmgr *hwmgr)
                PP_ASSERT_WITH_CODE(!ret,
                                "[SetupDefaultDpmTable] failed to get fclk dpm levels!",
                                return ret);
-       } else
-               dpm_table->count = 0;
+       } else {
+               dpm_table->count = 1;
+               dpm_table->dpm_levels[0].value = data->vbios_boot_state.fclock / 100;
+       }
        vega20_init_dpm_state(&(dpm_table->dpm_state));
 
        /* save a copy of the default DPM table */
@@ -754,6 +756,7 @@ static int vega20_init_smc_table(struct pp_hwmgr *hwmgr)
        data->vbios_boot_state.eclock = boot_up_values.ulEClk;
        data->vbios_boot_state.vclock = boot_up_values.ulVClk;
        data->vbios_boot_state.dclock = boot_up_values.ulDClk;
+       data->vbios_boot_state.fclock = boot_up_values.ulFClk;
        data->vbios_boot_state.uc_cooling_id = boot_up_values.ucCoolingID;
 
        smum_send_msg_to_smc_with_parameter(hwmgr,
@@ -780,6 +783,8 @@ static int vega20_init_smc_table(struct pp_hwmgr *hwmgr)
 static int vega20_override_pcie_parameters(struct pp_hwmgr *hwmgr)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev);
+       struct vega20_hwmgr *data =
+                       (struct vega20_hwmgr *)(hwmgr->backend);
        uint32_t pcie_gen = 0, pcie_width = 0, smu_pcie_arg;
        int ret;
 
@@ -816,6 +821,10 @@ static int vega20_override_pcie_parameters(struct pp_hwmgr *hwmgr)
                "[OverridePcieParameters] Attempt to override pcie params failed!",
                return ret);
 
+       data->pcie_parameters_override = 1;
+       data->pcie_gen_level1 = pcie_gen;
+       data->pcie_width_level1 = pcie_width;
+
        return 0;
 }
 
@@ -979,6 +988,8 @@ static int vega20_od8_set_feature_capabilities(
        }
 
        if (data->smu_features[GNLD_DPM_UCLK].enabled) {
+               pptable_information->od_settings_min[OD8_SETTING_UCLK_FMAX] =
+                       data->dpm_table.mem_table.dpm_levels[data->dpm_table.mem_table.count - 2].value;
                if (pptable_information->od_feature_capabilities[ATOM_VEGA20_ODFEATURE_UCLK_MAX] &&
                    pptable_information->od_settings_min[OD8_SETTING_UCLK_FMAX] > 0 &&
                    pptable_information->od_settings_max[OD8_SETTING_UCLK_FMAX] > 0 &&
@@ -2314,32 +2325,8 @@ static int vega20_force_dpm_lowest(struct pp_hwmgr *hwmgr)
 
 static int vega20_unforce_dpm_levels(struct pp_hwmgr *hwmgr)
 {
-       struct vega20_hwmgr *data =
-                       (struct vega20_hwmgr *)(hwmgr->backend);
-       uint32_t soft_min_level, soft_max_level;
        int ret = 0;
 
-       soft_min_level = vega20_find_lowest_dpm_level(&(data->dpm_table.gfx_table));
-       soft_max_level = vega20_find_highest_dpm_level(&(data->dpm_table.gfx_table));
-       data->dpm_table.gfx_table.dpm_state.soft_min_level =
-               data->dpm_table.gfx_table.dpm_levels[soft_min_level].value;
-       data->dpm_table.gfx_table.dpm_state.soft_max_level =
-               data->dpm_table.gfx_table.dpm_levels[soft_max_level].value;
-
-       soft_min_level = vega20_find_lowest_dpm_level(&(data->dpm_table.mem_table));
-       soft_max_level = vega20_find_highest_dpm_level(&(data->dpm_table.mem_table));
-       data->dpm_table.mem_table.dpm_state.soft_min_level =
-               data->dpm_table.mem_table.dpm_levels[soft_min_level].value;
-       data->dpm_table.mem_table.dpm_state.soft_max_level =
-               data->dpm_table.mem_table.dpm_levels[soft_max_level].value;
-
-       soft_min_level = vega20_find_lowest_dpm_level(&(data->dpm_table.soc_table));
-       soft_max_level = vega20_find_highest_dpm_level(&(data->dpm_table.soc_table));
-       data->dpm_table.soc_table.dpm_state.soft_min_level =
-               data->dpm_table.soc_table.dpm_levels[soft_min_level].value;
-       data->dpm_table.soc_table.dpm_state.soft_max_level =
-               data->dpm_table.soc_table.dpm_levels[soft_max_level].value;
-
        ret = vega20_upload_dpm_min_level(hwmgr, 0xFFFFFFFF);
        PP_ASSERT_WITH_CODE(!ret,
                        "Failed to upload DPM Bootup Levels!",
@@ -2641,9 +2628,8 @@ static int vega20_get_sclks(struct pp_hwmgr *hwmgr,
        struct vega20_single_dpm_table *dpm_table = &(data->dpm_table.gfx_table);
        int i, count;
 
-       PP_ASSERT_WITH_CODE(data->smu_features[GNLD_DPM_GFXCLK].enabled,
-               "[GetSclks]: gfxclk dpm not enabled!\n",
-               return -EPERM);
+       if (!data->smu_features[GNLD_DPM_GFXCLK].enabled)
+               return -1;
 
        count = (dpm_table->count > MAX_NUM_CLOCKS) ? MAX_NUM_CLOCKS : dpm_table->count;
        clocks->num_levels = count;
@@ -2670,9 +2656,8 @@ static int vega20_get_memclocks(struct pp_hwmgr *hwmgr,
        struct vega20_single_dpm_table *dpm_table = &(data->dpm_table.mem_table);
        int i, count;
 
-       PP_ASSERT_WITH_CODE(data->smu_features[GNLD_DPM_UCLK].enabled,
-               "[GetMclks]: uclk dpm not enabled!\n",
-               return -EPERM);
+       if (!data->smu_features[GNLD_DPM_UCLK].enabled)
+               return -1;
 
        count = (dpm_table->count > MAX_NUM_CLOCKS) ? MAX_NUM_CLOCKS : dpm_table->count;
        clocks->num_levels = data->mclk_latency_table.count = count;
@@ -2696,9 +2681,8 @@ static int vega20_get_dcefclocks(struct pp_hwmgr *hwmgr,
        struct vega20_single_dpm_table *dpm_table = &(data->dpm_table.dcef_table);
        int i, count;
 
-       PP_ASSERT_WITH_CODE(data->smu_features[GNLD_DPM_DCEFCLK].enabled,
-               "[GetDcfclocks]: dcefclk dpm not enabled!\n",
-               return -EPERM);
+       if (!data->smu_features[GNLD_DPM_DCEFCLK].enabled)
+               return -1;
 
        count = (dpm_table->count > MAX_NUM_CLOCKS) ? MAX_NUM_CLOCKS : dpm_table->count;
        clocks->num_levels = count;
@@ -2719,9 +2703,8 @@ static int vega20_get_socclocks(struct pp_hwmgr *hwmgr,
        struct vega20_single_dpm_table *dpm_table = &(data->dpm_table.soc_table);
        int i, count;
 
-       PP_ASSERT_WITH_CODE(data->smu_features[GNLD_DPM_SOCCLK].enabled,
-               "[GetSocclks]: socclk dpm not enabled!\n",
-               return -EPERM);
+       if (!data->smu_features[GNLD_DPM_SOCCLK].enabled)
+               return -1;
 
        count = (dpm_table->count > MAX_NUM_CLOCKS) ? MAX_NUM_CLOCKS : dpm_table->count;
        clocks->num_levels = count;
@@ -2799,7 +2782,6 @@ static int vega20_odn_edit_dpm_table(struct pp_hwmgr *hwmgr,
                        data->od8_settings.od8_settings_array;
        OverDriveTable_t *od_table =
                        &(data->smc_state_table.overdrive_table);
-       struct pp_clock_levels_with_latency clocks;
        int32_t input_index, input_clk, input_vol, i;
        int od8_id;
        int ret;
@@ -2858,11 +2840,6 @@ static int vega20_odn_edit_dpm_table(struct pp_hwmgr *hwmgr,
                        return -EOPNOTSUPP;
                }
 
-               ret = vega20_get_memclocks(hwmgr, &clocks);
-               PP_ASSERT_WITH_CODE(!ret,
-                               "Attempt to get memory clk levels failed!",
-                               return ret);
-
                for (i = 0; i < size; i += 2) {
                        if (i + 2 > size) {
                                pr_info("invalid number of input parameters %d\n",
@@ -2879,11 +2856,11 @@ static int vega20_odn_edit_dpm_table(struct pp_hwmgr *hwmgr,
                                return -EINVAL;
                        }
 
-                       if (input_clk < clocks.data[0].clocks_in_khz / 1000 ||
+                       if (input_clk < od8_settings[OD8_SETTING_UCLK_FMAX].min_value ||
                            input_clk > od8_settings[OD8_SETTING_UCLK_FMAX].max_value) {
                                pr_info("clock freq %d is not within allowed range [%d - %d]\n",
                                        input_clk,
-                                       clocks.data[0].clocks_in_khz / 1000,
+                                       od8_settings[OD8_SETTING_UCLK_FMAX].min_value,
                                        od8_settings[OD8_SETTING_UCLK_FMAX].max_value);
                                return -EINVAL;
                        }
@@ -3088,9 +3065,9 @@ static int vega20_set_ppfeature_status(struct pp_hwmgr *hwmgr, uint64_t new_ppfe
                return ret;
 
        features_to_disable =
-               (features_enabled ^ new_ppfeature_masks) & features_enabled;
+               features_enabled & ~new_ppfeature_masks;
        features_to_enable =
-               (features_enabled ^ new_ppfeature_masks) ^ features_to_disable;
+               ~features_enabled & new_ppfeature_masks;
 
        pr_debug("features_to_disable 0x%llx\n", features_to_disable);
        pr_debug("features_to_enable 0x%llx\n", features_to_enable);
@@ -3128,7 +3105,7 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr,
                        &(data->dpm_table.fclk_table);
        int i, now, size = 0;
        int ret = 0;
-       uint32_t gen_speed, lane_width;
+       uint32_t gen_speed, lane_width, current_gen_speed, current_lane_width;
 
        switch (type) {
        case PP_SCLK:
@@ -3137,10 +3114,11 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr,
                                "Attempt to get current gfx clk Failed!",
                                return ret);
 
-               ret = vega20_get_sclks(hwmgr, &clocks);
-               PP_ASSERT_WITH_CODE(!ret,
-                               "Attempt to get gfx clk levels Failed!",
-                               return ret);
+               if (vega20_get_sclks(hwmgr, &clocks)) {
+                       size += sprintf(buf + size, "0: %uMhz * (DPM disabled)\n",
+                               now / 100);
+                       break;
+               }
 
                for (i = 0; i < clocks.num_levels; i++)
                        size += sprintf(buf + size, "%d: %uMhz %s\n",
@@ -3154,10 +3132,11 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr,
                                "Attempt to get current mclk freq Failed!",
                                return ret);
 
-               ret = vega20_get_memclocks(hwmgr, &clocks);
-               PP_ASSERT_WITH_CODE(!ret,
-                               "Attempt to get memory clk levels Failed!",
-                               return ret);
+               if (vega20_get_memclocks(hwmgr, &clocks)) {
+                       size += sprintf(buf + size, "0: %uMhz * (DPM disabled)\n",
+                               now / 100);
+                       break;
+               }
 
                for (i = 0; i < clocks.num_levels; i++)
                        size += sprintf(buf + size, "%d: %uMhz %s\n",
@@ -3171,10 +3150,11 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr,
                                "Attempt to get current socclk freq Failed!",
                                return ret);
 
-               ret = vega20_get_socclocks(hwmgr, &clocks);
-               PP_ASSERT_WITH_CODE(!ret,
-                               "Attempt to get soc clk levels Failed!",
-                               return ret);
+               if (vega20_get_socclocks(hwmgr, &clocks)) {
+                       size += sprintf(buf + size, "0: %uMhz * (DPM disabled)\n",
+                               now / 100);
+                       break;
+               }
 
                for (i = 0; i < clocks.num_levels; i++)
                        size += sprintf(buf + size, "%d: %uMhz %s\n",
@@ -3200,10 +3180,11 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr,
                                "Attempt to get current dcefclk freq Failed!",
                                return ret);
 
-               ret = vega20_get_dcefclocks(hwmgr, &clocks);
-               PP_ASSERT_WITH_CODE(!ret,
-                               "Attempt to get dcefclk levels Failed!",
-                               return ret);
+               if (vega20_get_dcefclocks(hwmgr, &clocks)) {
+                       size += sprintf(buf + size, "0: %uMhz * (DPM disabled)\n",
+                               now / 100);
+                       break;
+               }
 
                for (i = 0; i < clocks.num_levels; i++)
                        size += sprintf(buf + size, "%d: %uMhz %s\n",
@@ -3212,28 +3193,36 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr,
                break;
 
        case PP_PCIE:
-               gen_speed = (RREG32_PCIE(smnPCIE_LC_SPEED_CNTL) &
+               current_gen_speed = (RREG32_PCIE(smnPCIE_LC_SPEED_CNTL) &
                             PSWUSP0_PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE_MASK)
                            >> PSWUSP0_PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT;
-               lane_width = (RREG32_PCIE(smnPCIE_LC_LINK_WIDTH_CNTL) &
+               current_lane_width = (RREG32_PCIE(smnPCIE_LC_LINK_WIDTH_CNTL) &
                              PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD_MASK)
                            >> PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD__SHIFT;
-               for (i = 0; i < NUM_LINK_LEVELS; i++)
+               for (i = 0; i < NUM_LINK_LEVELS; i++) {
+                       if (i == 1 && data->pcie_parameters_override) {
+                               gen_speed = data->pcie_gen_level1;
+                               lane_width = data->pcie_width_level1;
+                       } else {
+                               gen_speed = pptable->PcieGenSpeed[i];
+                               lane_width = pptable->PcieLaneCount[i];
+                       }
                        size += sprintf(buf + size, "%d: %s %s %dMhz %s\n", i,
-                                       (pptable->PcieGenSpeed[i] == 0) ? "2.5GT/s," :
-                                       (pptable->PcieGenSpeed[i] == 1) ? "5.0GT/s," :
-                                       (pptable->PcieGenSpeed[i] == 2) ? "8.0GT/s," :
-                                       (pptable->PcieGenSpeed[i] == 3) ? "16.0GT/s," : "",
-                                       (pptable->PcieLaneCount[i] == 1) ? "x1" :
-                                       (pptable->PcieLaneCount[i] == 2) ? "x2" :
-                                       (pptable->PcieLaneCount[i] == 3) ? "x4" :
-                                       (pptable->PcieLaneCount[i] == 4) ? "x8" :
-                                       (pptable->PcieLaneCount[i] == 5) ? "x12" :
-                                       (pptable->PcieLaneCount[i] == 6) ? "x16" : "",
+                                       (gen_speed == 0) ? "2.5GT/s," :
+                                       (gen_speed == 1) ? "5.0GT/s," :
+                                       (gen_speed == 2) ? "8.0GT/s," :
+                                       (gen_speed == 3) ? "16.0GT/s," : "",
+                                       (lane_width == 1) ? "x1" :
+                                       (lane_width == 2) ? "x2" :
+                                       (lane_width == 3) ? "x4" :
+                                       (lane_width == 4) ? "x8" :
+                                       (lane_width == 5) ? "x12" :
+                                       (lane_width == 6) ? "x16" : "",
                                        pptable->LclkFreq[i],
-                                       (gen_speed == pptable->PcieGenSpeed[i]) &&
-                                       (lane_width == pptable->PcieLaneCount[i]) ?
+                                       (current_gen_speed == gen_speed) &&
+                                       (current_lane_width == lane_width) ?
                                        "*" : "");
+               }
                break;
 
        case OD_SCLK:
@@ -3288,13 +3277,8 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr,
                }
 
                if (od8_settings[OD8_SETTING_UCLK_FMAX].feature_id) {
-                       ret = vega20_get_memclocks(hwmgr, &clocks);
-                       PP_ASSERT_WITH_CODE(!ret,
-                                       "Fail to get memory clk levels!",
-                                       return ret);
-
                        size += sprintf(buf + size, "MCLK: %7uMhz %10uMhz\n",
-                               clocks.data[0].clocks_in_khz / 1000,
+                               od8_settings[OD8_SETTING_UCLK_FMAX].min_value,
                                od8_settings[OD8_SETTING_UCLK_FMAX].max_value);
                }
 
@@ -3356,6 +3340,31 @@ static int vega20_set_uclk_to_highest_dpm_level(struct pp_hwmgr *hwmgr,
        return ret;
 }
 
+static int vega20_set_fclk_to_highest_dpm_level(struct pp_hwmgr *hwmgr)
+{
+       struct vega20_hwmgr *data = (struct vega20_hwmgr *)(hwmgr->backend);
+       struct vega20_single_dpm_table *dpm_table = &(data->dpm_table.fclk_table);
+       int ret = 0;
+
+       if (data->smu_features[GNLD_DPM_FCLK].enabled) {
+               PP_ASSERT_WITH_CODE(dpm_table->count > 0,
+                               "[SetFclkToHightestDpmLevel] Dpm table has no entry!",
+                               return -EINVAL);
+               PP_ASSERT_WITH_CODE(dpm_table->count <= NUM_FCLK_DPM_LEVELS,
+                               "[SetFclkToHightestDpmLevel] Dpm table has too many entries!",
+                               return -EINVAL);
+
+               dpm_table->dpm_state.soft_min_level = dpm_table->dpm_levels[dpm_table->count - 1].value;
+               PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(hwmgr,
+                               PPSMC_MSG_SetSoftMinByFreq,
+                               (PPCLK_FCLK << 16 ) | dpm_table->dpm_state.soft_min_level)),
+                               "[SetFclkToHightestDpmLevel] Set soft min fclk failed!",
+                               return ret);
+       }
+
+       return ret;
+}
+
 static int vega20_pre_display_configuration_changed_task(struct pp_hwmgr *hwmgr)
 {
        struct vega20_hwmgr *data = (struct vega20_hwmgr *)(hwmgr->backend);
@@ -3366,8 +3375,10 @@ static int vega20_pre_display_configuration_changed_task(struct pp_hwmgr *hwmgr)
 
        ret = vega20_set_uclk_to_highest_dpm_level(hwmgr,
                        &data->dpm_table.mem_table);
+       if (ret)
+               return ret;
 
-       return ret;
+       return vega20_set_fclk_to_highest_dpm_level(hwmgr);
 }
 
 static int vega20_display_configuration_changed_task(struct pp_hwmgr *hwmgr)
@@ -3461,9 +3472,9 @@ static int vega20_apply_clocks_adjust_rules(struct pp_hwmgr *hwmgr)
        /* gfxclk */
        dpm_table = &(data->dpm_table.gfx_table);
        dpm_table->dpm_state.soft_min_level = dpm_table->dpm_levels[0].value;
-       dpm_table->dpm_state.soft_max_level = dpm_table->dpm_levels[dpm_table->count - 1].value;
+       dpm_table->dpm_state.soft_max_level = VG20_CLOCK_MAX_DEFAULT;
        dpm_table->dpm_state.hard_min_level = dpm_table->dpm_levels[0].value;
-       dpm_table->dpm_state.hard_max_level = dpm_table->dpm_levels[dpm_table->count - 1].value;
+       dpm_table->dpm_state.hard_max_level = VG20_CLOCK_MAX_DEFAULT;
 
        if (PP_CAP(PHM_PlatformCaps_UMDPState)) {
                if (VEGA20_UMD_PSTATE_GFXCLK_LEVEL < dpm_table->count) {
@@ -3485,9 +3496,9 @@ static int vega20_apply_clocks_adjust_rules(struct pp_hwmgr *hwmgr)
        /* memclk */
        dpm_table = &(data->dpm_table.mem_table);
        dpm_table->dpm_state.soft_min_level = dpm_table->dpm_levels[0].value;
-       dpm_table->dpm_state.soft_max_level = dpm_table->dpm_levels[dpm_table->count - 1].value;
+       dpm_table->dpm_state.soft_max_level = VG20_CLOCK_MAX_DEFAULT;
        dpm_table->dpm_state.hard_min_level = dpm_table->dpm_levels[0].value;
-       dpm_table->dpm_state.hard_max_level = dpm_table->dpm_levels[dpm_table->count - 1].value;
+       dpm_table->dpm_state.hard_max_level = VG20_CLOCK_MAX_DEFAULT;
 
        if (PP_CAP(PHM_PlatformCaps_UMDPState)) {
                if (VEGA20_UMD_PSTATE_MCLK_LEVEL < dpm_table->count) {
@@ -3526,12 +3537,21 @@ static int vega20_apply_clocks_adjust_rules(struct pp_hwmgr *hwmgr)
        if (hwmgr->display_config->nb_pstate_switch_disable)
                dpm_table->dpm_state.hard_min_level = dpm_table->dpm_levels[dpm_table->count - 1].value;
 
+       /* fclk */
+       dpm_table = &(data->dpm_table.fclk_table);
+       dpm_table->dpm_state.soft_min_level = dpm_table->dpm_levels[0].value;
+       dpm_table->dpm_state.soft_max_level = VG20_CLOCK_MAX_DEFAULT;
+       dpm_table->dpm_state.hard_min_level = dpm_table->dpm_levels[0].value;
+       dpm_table->dpm_state.hard_max_level = VG20_CLOCK_MAX_DEFAULT;
+       if (hwmgr->display_config->nb_pstate_switch_disable)
+               dpm_table->dpm_state.soft_min_level = dpm_table->dpm_levels[dpm_table->count - 1].value;
+
        /* vclk */
        dpm_table = &(data->dpm_table.vclk_table);
        dpm_table->dpm_state.soft_min_level = dpm_table->dpm_levels[0].value;
-       dpm_table->dpm_state.soft_max_level = dpm_table->dpm_levels[dpm_table->count - 1].value;
+       dpm_table->dpm_state.soft_max_level = VG20_CLOCK_MAX_DEFAULT;
        dpm_table->dpm_state.hard_min_level = dpm_table->dpm_levels[0].value;
-       dpm_table->dpm_state.hard_max_level = dpm_table->dpm_levels[dpm_table->count - 1].value;
+       dpm_table->dpm_state.hard_max_level = VG20_CLOCK_MAX_DEFAULT;
 
        if (PP_CAP(PHM_PlatformCaps_UMDPState)) {
                if (VEGA20_UMD_PSTATE_UVDCLK_LEVEL < dpm_table->count) {
@@ -3548,9 +3568,9 @@ static int vega20_apply_clocks_adjust_rules(struct pp_hwmgr *hwmgr)
        /* dclk */
        dpm_table = &(data->dpm_table.dclk_table);
        dpm_table->dpm_state.soft_min_level = dpm_table->dpm_levels[0].value;
-       dpm_table->dpm_state.soft_max_level = dpm_table->dpm_levels[dpm_table->count - 1].value;
+       dpm_table->dpm_state.soft_max_level = VG20_CLOCK_MAX_DEFAULT;
        dpm_table->dpm_state.hard_min_level = dpm_table->dpm_levels[0].value;
-       dpm_table->dpm_state.hard_max_level = dpm_table->dpm_levels[dpm_table->count - 1].value;
+       dpm_table->dpm_state.hard_max_level = VG20_CLOCK_MAX_DEFAULT;
 
        if (PP_CAP(PHM_PlatformCaps_UMDPState)) {
                if (VEGA20_UMD_PSTATE_UVDCLK_LEVEL < dpm_table->count) {
@@ -3567,9 +3587,9 @@ static int vega20_apply_clocks_adjust_rules(struct pp_hwmgr *hwmgr)
        /* socclk */
        dpm_table = &(data->dpm_table.soc_table);
        dpm_table->dpm_state.soft_min_level = dpm_table->dpm_levels[0].value;
-       dpm_table->dpm_state.soft_max_level = dpm_table->dpm_levels[dpm_table->count - 1].value;
+       dpm_table->dpm_state.soft_max_level = VG20_CLOCK_MAX_DEFAULT;
        dpm_table->dpm_state.hard_min_level = dpm_table->dpm_levels[0].value;
-       dpm_table->dpm_state.hard_max_level = dpm_table->dpm_levels[dpm_table->count - 1].value;
+       dpm_table->dpm_state.hard_max_level = VG20_CLOCK_MAX_DEFAULT;
 
        if (PP_CAP(PHM_PlatformCaps_UMDPState)) {
                if (VEGA20_UMD_PSTATE_SOCCLK_LEVEL < dpm_table->count) {
@@ -3586,9 +3606,9 @@ static int vega20_apply_clocks_adjust_rules(struct pp_hwmgr *hwmgr)
        /* eclk */
        dpm_table = &(data->dpm_table.eclk_table);
        dpm_table->dpm_state.soft_min_level = dpm_table->dpm_levels[0].value;
-       dpm_table->dpm_state.soft_max_level = dpm_table->dpm_levels[dpm_table->count - 1].value;
+       dpm_table->dpm_state.soft_max_level = VG20_CLOCK_MAX_DEFAULT;
        dpm_table->dpm_state.hard_min_level = dpm_table->dpm_levels[0].value;
-       dpm_table->dpm_state.hard_max_level = dpm_table->dpm_levels[dpm_table->count - 1].value;
+       dpm_table->dpm_state.hard_max_level = VG20_CLOCK_MAX_DEFAULT;
 
        if (PP_CAP(PHM_PlatformCaps_UMDPState)) {
                if (VEGA20_UMD_PSTATE_VCEMCLK_LEVEL < dpm_table->count) {
index 37f5f5e657da796a2610cc86d21e8170520405b7..a5bc758ae09728327bd1230dbbf9969460eba263 100644 (file)
@@ -42,6 +42,8 @@
 #define AVFS_CURVE 0
 #define OD8_HOTCURVE_TEMPERATURE 85
 
+#define VG20_CLOCK_MAX_DEFAULT 0xFFFF
+
 typedef uint32_t PP_Clock;
 
 enum {
@@ -219,6 +221,7 @@ struct vega20_vbios_boot_state {
        uint32_t    eclock;
        uint32_t    dclock;
        uint32_t    vclock;
+       uint32_t    fclock;
 };
 
 #define DPMTABLE_OD_UPDATE_SCLK     0x00000001
@@ -523,6 +526,10 @@ struct vega20_hwmgr {
 
        unsigned long                  metrics_time;
        SmuMetrics_t                   metrics_table;
+
+       bool                           pcie_parameters_override;
+       uint32_t                       pcie_gen_level1;
+       uint32_t                       pcie_width_level1;
 };
 
 #define VEGA20_DPM2_NEAR_TDP_DEC                      10
index 97f8a1a970c37e124c8e5b07727f7ce6e32e8849..7a7f15d0c53afaacb70df7df97fa983e4ad35715 100644 (file)
@@ -32,6 +32,8 @@
 #include "cgs_common.h"
 #include "vega20_pptable.h"
 
+#define VEGA20_FAN_TARGET_TEMPERATURE_OVERRIDE 105
+
 static void set_hw_cap(struct pp_hwmgr *hwmgr, bool enable,
                enum phm_platform_caps cap)
 {
@@ -798,6 +800,17 @@ static int append_vbios_pptable(struct pp_hwmgr *hwmgr, PPTable_t *ppsmc_pptable
        return 0;
 }
 
+static int override_powerplay_table_fantargettemperature(struct pp_hwmgr *hwmgr)
+{
+       struct phm_ppt_v3_information *pptable_information =
+               (struct phm_ppt_v3_information *)hwmgr->pptable;
+       PPTable_t *ppsmc_pptable = (PPTable_t *)(pptable_information->smc_pptable);
+
+       ppsmc_pptable->FanTargetTemperature = VEGA20_FAN_TARGET_TEMPERATURE_OVERRIDE;
+
+       return 0;
+}
+
 #define VEGA20_ENGINECLOCK_HARDMAX 198000
 static int init_powerplay_table_information(
                struct pp_hwmgr *hwmgr,
@@ -887,6 +900,10 @@ static int init_powerplay_table_information(
 
 
        result = append_vbios_pptable(hwmgr, (pptable_information->smc_pptable));
+       if (result)
+               return result;
+
+       result = override_powerplay_table_fantargettemperature(hwmgr);
 
        return result;
 }
index 52abca065764ad1ef440328d2392cd16a9d9b2ad..2d4cfe14f72e03353c84aad2835a41ed91d85f44 100644 (file)
@@ -2330,6 +2330,7 @@ static uint32_t polaris10_get_offsetof(uint32_t type, uint32_t member)
                case DRAM_LOG_BUFF_SIZE:
                        return offsetof(SMU74_SoftRegisters, DRAM_LOG_BUFF_SIZE);
                }
+               break;
        case SMU_Discrete_DpmTable:
                switch (member) {
                case UvdBootLevel:
@@ -2339,6 +2340,7 @@ static uint32_t polaris10_get_offsetof(uint32_t type, uint32_t member)
                case LowSclkInterruptThreshold:
                        return offsetof(SMU74_Discrete_DpmTable, LowSclkInterruptThreshold);
                }
+               break;
        }
        pr_warn("can't get the offset of type %x member %x\n", type, member);
        return 0;
index 079fc8e8f709f39d1ca764e6893fefaa640f58bc..742b3dc1f6cba748012c054da7cc77849d6d270b 100644 (file)
@@ -40,10 +40,8 @@ bool smu9_is_smc_ram_running(struct pp_hwmgr *hwmgr)
        struct amdgpu_device *adev = hwmgr->adev;
        uint32_t mp1_fw_flags;
 
-       WREG32_SOC15(NBIF, 0, mmPCIE_INDEX2,
-                       (MP1_Public | (smnMP1_FIRMWARE_FLAGS & 0xffffffff)));
-
-       mp1_fw_flags = RREG32_SOC15(NBIF, 0, mmPCIE_DATA2);
+       mp1_fw_flags = RREG32_PCIE(MP1_Public |
+                                  (smnMP1_FIRMWARE_FLAGS & 0xffffffff));
 
        if (mp1_fw_flags & MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED_MASK)
                return true;
index b7ff7d4d6f448d1145ed97cc4805f2dc9e7a9d73..ba00744c3413f53e03db1aa9c91226ed9ddc5c6a 100644 (file)
@@ -49,10 +49,8 @@ static bool vega20_is_smc_ram_running(struct pp_hwmgr *hwmgr)
        struct amdgpu_device *adev = hwmgr->adev;
        uint32_t mp1_fw_flags;
 
-       WREG32_SOC15(NBIF, 0, mmPCIE_INDEX2,
-                    (MP1_Public | (smnMP1_FIRMWARE_FLAGS & 0xffffffff)));
-
-       mp1_fw_flags = RREG32_SOC15(NBIF, 0, mmPCIE_DATA2);
+       mp1_fw_flags = RREG32_PCIE(MP1_Public |
+                                  (smnMP1_FIRMWARE_FLAGS & 0xffffffff));
 
        if ((mp1_fw_flags & MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED_MASK) >>
            MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED__SHIFT)
index 540a77a2ade9d80bb350185286c8e795d9c99a64..40ac1984803459b7a0e8f67e09f81b61820035ef 100644 (file)
@@ -3039,9 +3039,31 @@ commit:
        return 0;
 }
 
-static int __drm_atomic_helper_disable_all(struct drm_device *dev,
-                                          struct drm_modeset_acquire_ctx *ctx,
-                                          bool clean_old_fbs)
+/**
+ * drm_atomic_helper_disable_all - disable all currently active outputs
+ * @dev: DRM device
+ * @ctx: lock acquisition context
+ *
+ * Loops through all connectors, finding those that aren't turned off and then
+ * turns them off by setting their DPMS mode to OFF and deactivating the CRTC
+ * that they are connected to.
+ *
+ * This is used for example in suspend/resume to disable all currently active
+ * functions when suspending. If you just want to shut down everything at e.g.
+ * driver unload, look at drm_atomic_helper_shutdown().
+ *
+ * Note that if callers haven't already acquired all modeset locks this might
+ * return -EDEADLK, which must be handled by calling drm_modeset_backoff().
+ *
+ * Returns:
+ * 0 on success or a negative error code on failure.
+ *
+ * See also:
+ * drm_atomic_helper_suspend(), drm_atomic_helper_resume() and
+ * drm_atomic_helper_shutdown().
+ */
+int drm_atomic_helper_disable_all(struct drm_device *dev,
+                                 struct drm_modeset_acquire_ctx *ctx)
 {
        struct drm_atomic_state *state;
        struct drm_connector_state *conn_state;
@@ -3099,35 +3121,6 @@ free:
        drm_atomic_state_put(state);
        return ret;
 }
-
-/**
- * drm_atomic_helper_disable_all - disable all currently active outputs
- * @dev: DRM device
- * @ctx: lock acquisition context
- *
- * Loops through all connectors, finding those that aren't turned off and then
- * turns them off by setting their DPMS mode to OFF and deactivating the CRTC
- * that they are connected to.
- *
- * This is used for example in suspend/resume to disable all currently active
- * functions when suspending. If you just want to shut down everything at e.g.
- * driver unload, look at drm_atomic_helper_shutdown().
- *
- * Note that if callers haven't already acquired all modeset locks this might
- * return -EDEADLK, which must be handled by calling drm_modeset_backoff().
- *
- * Returns:
- * 0 on success or a negative error code on failure.
- *
- * See also:
- * drm_atomic_helper_suspend(), drm_atomic_helper_resume() and
- * drm_atomic_helper_shutdown().
- */
-int drm_atomic_helper_disable_all(struct drm_device *dev,
-                                 struct drm_modeset_acquire_ctx *ctx)
-{
-       return __drm_atomic_helper_disable_all(dev, ctx, false);
-}
 EXPORT_SYMBOL(drm_atomic_helper_disable_all);
 
 /**
@@ -3148,7 +3141,7 @@ void drm_atomic_helper_shutdown(struct drm_device *dev)
 
        DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, 0, ret);
 
-       ret = __drm_atomic_helper_disable_all(dev, &ctx, true);
+       ret = drm_atomic_helper_disable_all(dev, &ctx);
        if (ret)
                DRM_ERROR("Disabling all crtc's during unload failed with %i\n", ret);
 
index 67b1fca39aa6c76755816dee80da87805fb8d70d..0e3043e08c694b58051f1eb9a96713c5612e5b98 100644 (file)
@@ -185,7 +185,7 @@ static int compat_drm_getmap(struct file *file, unsigned int cmd,
        m32.size = map.size;
        m32.type = map.type;
        m32.flags = map.flags;
-       m32.handle = ptr_to_compat(map.handle);
+       m32.handle = ptr_to_compat((void __user *)map.handle);
        m32.mtrr = map.mtrr;
        if (copy_to_user(argp, &m32, sizeof(m32)))
                return -EFAULT;
@@ -216,7 +216,7 @@ static int compat_drm_addmap(struct file *file, unsigned int cmd,
 
        m32.offset = map.offset;
        m32.mtrr = map.mtrr;
-       m32.handle = ptr_to_compat(map.handle);
+       m32.handle = ptr_to_compat((void __user *)map.handle);
        if (map.handle != compat_ptr(m32.handle))
                pr_err_ratelimited("compat_drm_addmap truncated handle %p for type %d offset %x\n",
                                   map.handle, m32.type, m32.offset);
@@ -526,7 +526,7 @@ static int compat_drm_getsareactx(struct file *file, unsigned int cmd,
        if (err)
                return err;
 
-       req32.handle = ptr_to_compat(req.handle);
+       req32.handle = ptr_to_compat((void __user *)req.handle);
        if (copy_to_user(argp, &req32, sizeof(req32)))
                return -EFAULT;
 
index 041a77e400d4ef8ad4b2b680117e9f42428c7baa..21df44b78df3c76220ea750821885f611745abfb 100644 (file)
@@ -2,7 +2,6 @@
 config DRM_ETNAVIV
        tristate "ETNAVIV (DRM support for Vivante GPU IP cores)"
        depends on DRM
-       depends on ARCH_MXC || ARCH_DOVE || (ARM && COMPILE_TEST)
        depends on MMU
        select SHMEM
        select SYNC_FILE
index acb68c69836389832475c474a506a548d2ac9903..4d5d1a77eb2abd1c9b1afb8ffc9bac7a5a8b064c 100644 (file)
@@ -15,8 +15,6 @@ struct etnaviv_perfmon_request;
 struct etnaviv_cmdbuf {
        /* suballocator this cmdbuf is allocated from */
        struct etnaviv_cmdbuf_suballoc *suballoc;
-       /* user context key, must be unique between all active users */
-       struct etnaviv_file_private *ctx;
        /* cmdbuf properties */
        int suballoc_offset;
        void *vaddr;
index 3fbb4855396cc98505eeb2062de6579c01478d57..33854c94cb858f1cf62209e5f1c5e8d5de69e3e1 100644 (file)
@@ -215,7 +215,7 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
                mutex_lock(&obj->lock);
                pages = etnaviv_gem_get_pages(obj);
                mutex_unlock(&obj->lock);
-               if (pages) {
+               if (!IS_ERR(pages)) {
                        int j;
 
                        iter.hdr->data[0] = bomap - bomap_start;
index 76079c2291f88336521e412246a0646d2fcfb215..f0abb744ef9554e21772db45588859cc310c4937 100644 (file)
@@ -95,6 +95,7 @@ struct etnaviv_gem_submit_bo {
 struct etnaviv_gem_submit {
        struct drm_sched_job sched_job;
        struct kref refcount;
+       struct etnaviv_file_private *ctx;
        struct etnaviv_gpu *gpu;
        struct dma_fence *out_fence, *in_fence;
        int out_fence_id;
index 0566171f8df22f65e0236bf37afe204e828f688b..f21529e635e3d8d9ae16ffd440a425ae5a93abf0 100644 (file)
@@ -15,7 +15,7 @@ struct sg_table *etnaviv_gem_prime_get_sg_table(struct drm_gem_object *obj)
        int npages = obj->size >> PAGE_SHIFT;
 
        if (WARN_ON(!etnaviv_obj->pages))  /* should have already pinned! */
-               return NULL;
+               return ERR_PTR(-EINVAL);
 
        return drm_prime_pages_to_sg(etnaviv_obj->pages, npages);
 }
index 30875f8f293371ccd240be02b826302a8abeed08..b2fe3446bfbcd35393e2203ba073fd568120a0b3 100644 (file)
@@ -506,7 +506,7 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
        if (ret)
                goto err_submit_objects;
 
-       submit->cmdbuf.ctx = file->driver_priv;
+       submit->ctx = file->driver_priv;
        submit->exec_state = args->exec_state;
        submit->flags = args->flags;
 
index f1c88d8ad5ba880fefbd123c74986ce6154713af..f794e04be9e67162bceee8068115851adde103b0 100644 (file)
@@ -320,8 +320,8 @@ etnaviv_iommuv2_domain_alloc(struct etnaviv_gpu *gpu)
        domain = &etnaviv_domain->base;
 
        domain->dev = gpu->dev;
-       domain->base = 0;
-       domain->size = (u64)SZ_1G * 4;
+       domain->base = SZ_4K;
+       domain->size = (u64)SZ_1G * 4 - SZ_4K;
        domain->ops = &etnaviv_iommuv2_ops;
 
        ret = etnaviv_iommuv2_init(etnaviv_domain);
index 9980d81a26e3cef9816646bc82733f89548dd778..4227a4006c34963800563838a90c067a9cee95c4 100644 (file)
@@ -113,7 +113,7 @@ static const struct etnaviv_pm_domain doms_3d[] = {
                .name = "PE",
                .profile_read = VIVS_MC_PROFILE_PE_READ,
                .profile_config = VIVS_MC_PROFILE_CONFIG0,
-               .nr_signals = 5,
+               .nr_signals = 4,
                .signal = (const struct etnaviv_pm_signal[]) {
                        {
                                "PIXEL_COUNT_KILLED_BY_COLOR_PIPE",
@@ -435,7 +435,7 @@ int etnaviv_pm_query_sig(struct etnaviv_gpu *gpu,
 
        dom = meta->domains + signal->domain;
 
-       if (signal->iter > dom->nr_signals)
+       if (signal->iter >= dom->nr_signals)
                return -EINVAL;
 
        sig = &dom->signal[signal->iter];
@@ -461,7 +461,7 @@ int etnaviv_pm_req_validate(const struct drm_etnaviv_gem_submit_pmr *r,
 
        dom = meta->domains + r->domain;
 
-       if (r->signal > dom->nr_signals)
+       if (r->signal >= dom->nr_signals)
                return -EINVAL;
 
        return 0;
index 67ae266020244dcd4cad74582920526c94a91aae..6d24fea1766b004a590e1efceed987a33bed7e20 100644 (file)
@@ -153,7 +153,7 @@ int etnaviv_sched_push_job(struct drm_sched_entity *sched_entity,
        mutex_lock(&submit->gpu->fence_lock);
 
        ret = drm_sched_job_init(&submit->sched_job, sched_entity,
-                                submit->cmdbuf.ctx);
+                                submit->ctx);
        if (ret)
                goto out_unlock;
 
index 215b6ff8aa7301ec16b1395b0356c27ff788e004..db7bb5bd5adde4863df871b71e1279d633ea74f6 100644 (file)
@@ -163,17 +163,25 @@ int i915_active_ref(struct i915_active *ref,
                    struct i915_request *rq)
 {
        struct i915_active_request *active;
+       int err = 0;
+
+       /* Prevent reaping in case we malloc/wait while building the tree */
+       i915_active_acquire(ref);
 
        active = active_instance(ref, timeline);
-       if (IS_ERR(active))
-               return PTR_ERR(active);
+       if (IS_ERR(active)) {
+               err = PTR_ERR(active);
+               goto out;
+       }
 
        if (!i915_active_request_isset(active))
                ref->count++;
        __i915_active_request_set(active, rq);
 
        GEM_BUG_ON(!ref->count);
-       return 0;
+out:
+       i915_active_release(ref);
+       return err;
 }
 
 bool i915_active_acquire(struct i915_active *ref)
@@ -223,19 +231,25 @@ int i915_request_await_active_request(struct i915_request *rq,
 int i915_request_await_active(struct i915_request *rq, struct i915_active *ref)
 {
        struct active_node *it, *n;
-       int ret;
+       int err = 0;
 
-       ret = i915_request_await_active_request(rq, &ref->last);
-       if (ret)
-               return ret;
+       /* await allocates and so we need to avoid hitting the shrinker */
+       if (i915_active_acquire(ref))
+               goto out; /* was idle */
+
+       err = i915_request_await_active_request(rq, &ref->last);
+       if (err)
+               goto out;
 
        rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
-               ret = i915_request_await_active_request(rq, &it->base);
-               if (ret)
-                       return ret;
+               err = i915_request_await_active_request(rq, &it->base);
+               if (err)
+                       goto out;
        }
 
-       return 0;
+out:
+       i915_active_release(ref);
+       return err;
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
index 6630212f2faf3375dd030273aec226b369787486..9df65d386d11b40349df879fec224f0990a40b62 100644 (file)
@@ -757,39 +757,6 @@ static int i915_kick_out_firmware_fb(struct drm_i915_private *dev_priv)
        return ret;
 }
 
-#if !defined(CONFIG_VGA_CONSOLE)
-static int i915_kick_out_vgacon(struct drm_i915_private *dev_priv)
-{
-       return 0;
-}
-#elif !defined(CONFIG_DUMMY_CONSOLE)
-static int i915_kick_out_vgacon(struct drm_i915_private *dev_priv)
-{
-       return -ENODEV;
-}
-#else
-static int i915_kick_out_vgacon(struct drm_i915_private *dev_priv)
-{
-       int ret = 0;
-
-       DRM_INFO("Replacing VGA console driver\n");
-
-       console_lock();
-       if (con_is_bound(&vga_con))
-               ret = do_take_over_console(&dummy_con, 0, MAX_NR_CONSOLES - 1, 1);
-       if (ret == 0) {
-               ret = do_unregister_con_driver(&vga_con);
-
-               /* Ignore "already unregistered". */
-               if (ret == -ENODEV)
-                       ret = 0;
-       }
-       console_unlock();
-
-       return ret;
-}
-#endif
-
 static void intel_init_dpio(struct drm_i915_private *dev_priv)
 {
        /*
@@ -1420,7 +1387,7 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
                goto err_ggtt;
        }
 
-       ret = i915_kick_out_vgacon(dev_priv);
+       ret = vga_remove_vgacon(pdev);
        if (ret) {
                DRM_ERROR("failed to remove conflicting VGA console\n");
                goto err_ggtt;
index 6728ea5c71d4c2916a37daa20ec767252a96d0ff..30d516e975c64697b2b45019ba747c6251b7ecf1 100644 (file)
@@ -1688,7 +1688,8 @@ __vma_matches(struct vm_area_struct *vma, struct file *filp,
        if (vma->vm_file != filp)
                return false;
 
-       return vma->vm_start == addr && (vma->vm_end - vma->vm_start) == size;
+       return vma->vm_start == addr &&
+              (vma->vm_end - vma->vm_start) == PAGE_ALIGN(size);
 }
 
 /**
index d01683167c7747e58ea2d85e9dcf2cbc45fb0c3e..8bc042551692c3db5b0ff30b14bcf46c905ede2d 100644 (file)
@@ -223,8 +223,14 @@ out:
        return &p->requests[idx];
 }
 
+struct sched_cache {
+       struct list_head *priolist;
+};
+
 static struct intel_engine_cs *
-sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked)
+sched_lock_engine(const struct i915_sched_node *node,
+                 struct intel_engine_cs *locked,
+                 struct sched_cache *cache)
 {
        struct intel_engine_cs *engine = node_to_request(node)->engine;
 
@@ -232,6 +238,7 @@ sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked)
 
        if (engine != locked) {
                spin_unlock(&locked->timeline.lock);
+               memset(cache, 0, sizeof(*cache));
                spin_lock(&engine->timeline.lock);
        }
 
@@ -253,11 +260,11 @@ static bool inflight(const struct i915_request *rq,
 static void __i915_schedule(struct i915_request *rq,
                            const struct i915_sched_attr *attr)
 {
-       struct list_head *uninitialized_var(pl);
-       struct intel_engine_cs *engine, *last;
+       struct intel_engine_cs *engine;
        struct i915_dependency *dep, *p;
        struct i915_dependency stack;
        const int prio = attr->priority;
+       struct sched_cache cache;
        LIST_HEAD(dfs);
 
        /* Needed in order to use the temporary link inside i915_dependency */
@@ -328,7 +335,7 @@ static void __i915_schedule(struct i915_request *rq,
                __list_del_entry(&stack.dfs_link);
        }
 
-       last = NULL;
+       memset(&cache, 0, sizeof(cache));
        engine = rq->engine;
        spin_lock_irq(&engine->timeline.lock);
 
@@ -338,7 +345,7 @@ static void __i915_schedule(struct i915_request *rq,
 
                INIT_LIST_HEAD(&dep->dfs_link);
 
-               engine = sched_lock_engine(node, engine);
+               engine = sched_lock_engine(node, engine, &cache);
                lockdep_assert_held(&engine->timeline.lock);
 
                /* Recheck after acquiring the engine->timeline.lock */
@@ -347,11 +354,11 @@ static void __i915_schedule(struct i915_request *rq,
 
                node->attr.priority = prio;
                if (!list_empty(&node->link)) {
-                       if (last != engine) {
-                               pl = i915_sched_lookup_priolist(engine, prio);
-                               last = engine;
-                       }
-                       list_move_tail(&node->link, pl);
+                       if (!cache.priolist)
+                               cache.priolist =
+                                       i915_sched_lookup_priolist(engine,
+                                                                  prio);
+                       list_move_tail(&node->link, cache.priolist);
                } else {
                        /*
                         * If the request is not in the priolist queue because
index cacaa1d04d174cab452231ae622c16b0cdbb98b1..09ed90c0ba0070110f4f98c53e1077958236652d 100644 (file)
@@ -106,16 +106,6 @@ bool intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
 
                        GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_SIGNAL,
                                             &rq->fence.flags));
-                       clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
-
-                       /*
-                        * We may race with direct invocation of
-                        * dma_fence_signal(), e.g. i915_request_retire(),
-                        * in which case we can skip processing it ourselves.
-                        */
-                       if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
-                                    &rq->fence.flags))
-                               continue;
 
                        /*
                         * Queue for execution after dropping the signaling
@@ -123,6 +113,14 @@ bool intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
                         * more signalers to the same context or engine.
                         */
                        i915_request_get(rq);
+
+                       /*
+                        * We may race with direct invocation of
+                        * dma_fence_signal(), e.g. i915_request_retire(),
+                        * so we need to acquire our reference to the request
+                        * before we cancel the breadcrumb.
+                        */
+                       clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
                        list_add_tail(&rq->signal_link, &signal);
                }
 
index ca705546a0abe7380ebfa87f1a21a4da4a9ed133..14d580cdefd3e875e08b7af0be350d4f877fb7ef 100644 (file)
@@ -3568,6 +3568,13 @@ static void intel_ddi_update_pipe(struct intel_encoder *encoder,
 {
        if (!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI))
                intel_ddi_update_pipe_dp(encoder, crtc_state, conn_state);
+
+       if (conn_state->content_protection ==
+           DRM_MODE_CONTENT_PROTECTION_DESIRED)
+               intel_hdcp_enable(to_intel_connector(conn_state->connector));
+       else if (conn_state->content_protection ==
+                DRM_MODE_CONTENT_PROTECTION_UNDESIRED)
+               intel_hdcp_disable(to_intel_connector(conn_state->connector));
 }
 
 static void intel_ddi_set_fia_lane_count(struct intel_encoder *encoder,
@@ -3962,12 +3969,7 @@ static int modeset_pipe(struct drm_crtc *crtc,
                goto out;
 
        ret = drm_atomic_commit(state);
-       if (ret)
-               goto out;
-
-       return 0;
-
- out:
+out:
        drm_atomic_state_put(state);
 
        return ret;
index d00d0bb07784229a323145336803802479c6ebb9..7eb58a9d1319f9c70f146e5d53e57a699d3b3272 100644 (file)
@@ -710,47 +710,45 @@ __sseu_prepare(struct drm_i915_private *i915,
               unsigned int flags,
               struct i915_gem_context *ctx,
               struct intel_engine_cs *engine,
-              struct igt_spinner **spin_out)
+              struct igt_spinner **spin)
 {
-       int ret = 0;
-
-       if (flags & (TEST_BUSY | TEST_RESET)) {
-               struct igt_spinner *spin;
-               struct i915_request *rq;
+       struct i915_request *rq;
+       int ret;
 
-               spin = kzalloc(sizeof(*spin), GFP_KERNEL);
-               if (!spin) {
-                       ret = -ENOMEM;
-                       goto out;
-               }
+       *spin = NULL;
+       if (!(flags & (TEST_BUSY | TEST_RESET)))
+               return 0;
 
-               ret = igt_spinner_init(spin, i915);
-               if (ret)
-                       return ret;
+       *spin = kzalloc(sizeof(**spin), GFP_KERNEL);
+       if (!*spin)
+               return -ENOMEM;
 
-               rq = igt_spinner_create_request(spin, ctx, engine, MI_NOOP);
-               if (IS_ERR(rq)) {
-                       ret = PTR_ERR(rq);
-                       igt_spinner_fini(spin);
-                       kfree(spin);
-                       goto out;
-               }
+       ret = igt_spinner_init(*spin, i915);
+       if (ret)
+               goto err_free;
 
-               i915_request_add(rq);
+       rq = igt_spinner_create_request(*spin, ctx, engine, MI_NOOP);
+       if (IS_ERR(rq)) {
+               ret = PTR_ERR(rq);
+               goto err_fini;
+       }
 
-               if (!igt_wait_for_spinner(spin, rq)) {
-                       pr_err("%s: Spinner failed to start!\n", name);
-                       igt_spinner_end(spin);
-                       igt_spinner_fini(spin);
-                       kfree(spin);
-                       ret = -ETIMEDOUT;
-                       goto out;
-               }
+       i915_request_add(rq);
 
-               *spin_out = spin;
+       if (!igt_wait_for_spinner(*spin, rq)) {
+               pr_err("%s: Spinner failed to start!\n", name);
+               ret = -ETIMEDOUT;
+               goto err_end;
        }
 
-out:
+       return 0;
+
+err_end:
+       igt_spinner_end(*spin);
+err_fini:
+       igt_spinner_fini(*spin);
+err_free:
+       kfree(fetch_and_zero(spin));
        return ret;
 }
 
@@ -897,22 +895,23 @@ __sseu_test(struct drm_i915_private *i915,
 
        ret = __sseu_prepare(i915, name, flags, ctx, engine, &spin);
        if (ret)
-               goto out;
+               goto out_context;
 
        ret = __i915_gem_context_reconfigure_sseu(ctx, engine, sseu);
        if (ret)
-               goto out;
+               goto out_spin;
 
        ret = __sseu_finish(i915, name, flags, ctx, kctx, engine, obj,
                            hweight32(sseu.slice_mask), spin);
 
-out:
+out_spin:
        if (spin) {
                igt_spinner_end(spin);
                igt_spinner_fini(spin);
                kfree(spin);
        }
 
+out_context:
        kernel_context_close(kctx);
 
        return ret;
index bb81e310eb6d391971c1607c5e10a20d620efef0..578d867a81d59aa476d56693d7f51399f3065dee 100644 (file)
@@ -79,6 +79,10 @@ qxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        if (ret)
                goto free_dev;
 
+       ret = drm_fb_helper_remove_conflicting_pci_framebuffers(pdev, 0, "qxl");
+       if (ret)
+               goto disable_pci;
+
        ret = qxl_device_init(qdev, &qxl_driver, pdev);
        if (ret)
                goto disable_pci;
@@ -94,7 +98,6 @@ qxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        if (ret)
                goto modeset_cleanup;
 
-       drm_fb_helper_remove_conflicting_pci_framebuffers(pdev, 0, "qxl");
        drm_fbdev_generic_setup(&qdev->ddev, 32);
        return 0;
 
index dc8e039bfab57f207801afba2c0bfb0e6a2acd38..f2f3ef8af2710f6f60b49309122c8b2dd12da1a3 100644 (file)
@@ -48,6 +48,8 @@
 #include <linux/miscdevice.h>
 #include <linux/slab.h>
 #include <linux/screen_info.h>
+#include <linux/vt.h>
+#include <linux/console.h>
 
 #include <linux/uaccess.h>
 
@@ -168,6 +170,53 @@ void vga_set_default_device(struct pci_dev *pdev)
        vga_default = pci_dev_get(pdev);
 }
 
+/**
+ * vga_remove_vgacon - deactivete vga console
+ *
+ * Unbind and unregister vgacon in case pdev is the default vga
+ * device.  Can be called by gpu drivers on initialization to make
+ * sure vga register access done by vgacon will not disturb the
+ * device.
+ *
+ * @pdev: pci device.
+ */
+#if !defined(CONFIG_VGA_CONSOLE)
+int vga_remove_vgacon(struct pci_dev *pdev)
+{
+       return 0;
+}
+#elif !defined(CONFIG_DUMMY_CONSOLE)
+int vga_remove_vgacon(struct pci_dev *pdev)
+{
+       return -ENODEV;
+}
+#else
+int vga_remove_vgacon(struct pci_dev *pdev)
+{
+       int ret = 0;
+
+       if (pdev != vga_default)
+               return 0;
+       vgaarb_info(&pdev->dev, "deactivate vga console\n");
+
+       console_lock();
+       if (con_is_bound(&vga_con))
+               ret = do_take_over_console(&dummy_con, 0,
+                                          MAX_NR_CONSOLES - 1, 1);
+       if (ret == 0) {
+               ret = do_unregister_con_driver(&vga_con);
+
+               /* Ignore "already unregistered". */
+               if (ret == -ENODEV)
+                       ret = 0;
+       }
+       console_unlock();
+
+       return ret;
+}
+#endif
+EXPORT_SYMBOL(vga_remove_vgacon);
+
 static inline void vga_irq_set_state(struct vga_device *vgadev, bool state)
 {
        if (vgadev->irq_set_state)
index 9a63e87ea5f3f5d647f1ab0caaf765629e6d41e5..be302ec5f66bdd0be912fa31b7c922d77bedd1fe 100644 (file)
@@ -871,7 +871,7 @@ static int etm_probe(struct amba_device *adev, const struct amba_id *id)
        }
 
        pm_runtime_put(&adev->dev);
-       dev_info(dev, "%s initialized\n", (char *)id->data);
+       dev_info(dev, "%s initialized\n", (char *)coresight_get_uci_data(id));
        if (boot_enable) {
                coresight_enable(drvdata->csdev);
                drvdata->boot_enable = true;
@@ -915,36 +915,18 @@ static const struct dev_pm_ops etm_dev_pm_ops = {
 };
 
 static const struct amba_id etm_ids[] = {
-       {       /* ETM 3.3 */
-               .id     = 0x000bb921,
-               .mask   = 0x000fffff,
-               .data   = "ETM 3.3",
-       },
-       {       /* ETM 3.5 - Cortex-A5 */
-               .id     = 0x000bb955,
-               .mask   = 0x000fffff,
-               .data   = "ETM 3.5",
-       },
-       {       /* ETM 3.5 */
-               .id     = 0x000bb956,
-               .mask   = 0x000fffff,
-               .data   = "ETM 3.5",
-       },
-       {       /* PTM 1.0 */
-               .id     = 0x000bb950,
-               .mask   = 0x000fffff,
-               .data   = "PTM 1.0",
-       },
-       {       /* PTM 1.1 */
-               .id     = 0x000bb95f,
-               .mask   = 0x000fffff,
-               .data   = "PTM 1.1",
-       },
-       {       /* PTM 1.1 Qualcomm */
-               .id     = 0x000b006f,
-               .mask   = 0x000fffff,
-               .data   = "PTM 1.1",
-       },
+       /* ETM 3.3 */
+       CS_AMBA_ID_DATA(0x000bb921, "ETM 3.3"),
+       /* ETM 3.5 - Cortex-A5 */
+       CS_AMBA_ID_DATA(0x000bb955, "ETM 3.5"),
+       /* ETM 3.5 */
+       CS_AMBA_ID_DATA(0x000bb956, "ETM 3.5"),
+       /* PTM 1.0 */
+       CS_AMBA_ID_DATA(0x000bb950, "PTM 1.0"),
+       /* PTM 1.1 */
+       CS_AMBA_ID_DATA(0x000bb95f, "PTM 1.1"),
+       /* PTM 1.1 Qualcomm */
+       CS_AMBA_ID_DATA(0x000b006f, "PTM 1.1"),
        { 0, 0},
 };
 
index fe76b176974a67d87ef95fbb1170a778ab9d1187..08ce37c9475da79a3d12345a5880fa0f08fc054d 100644 (file)
@@ -1068,18 +1068,21 @@ err_arch_supported:
        return ret;
 }
 
-#define ETM4x_AMBA_ID(pid)                     \
-       {                                       \
-               .id     = pid,                  \
-               .mask   = 0x000fffff,           \
+static struct amba_cs_uci_id uci_id_etm4[] = {
+       {
+               /*  ETMv4 UCI data */
+               .devarch        = 0x47704a13,
+               .devarch_mask   = 0xfff0ffff,
+               .devtype        = 0x00000013,
        }
+};
 
 static const struct amba_id etm4_ids[] = {
-       ETM4x_AMBA_ID(0x000bb95d),              /* Cortex-A53 */
-       ETM4x_AMBA_ID(0x000bb95e),              /* Cortex-A57 */
-       ETM4x_AMBA_ID(0x000bb95a),              /* Cortex-A72 */
-       ETM4x_AMBA_ID(0x000bb959),              /* Cortex-A73 */
-       ETM4x_AMBA_ID(0x000bb9da),              /* Cortex-A35 */
+       CS_AMBA_ID(0x000bb95d),         /* Cortex-A53 */
+       CS_AMBA_ID(0x000bb95e),         /* Cortex-A57 */
+       CS_AMBA_ID(0x000bb95a),         /* Cortex-A72 */
+       CS_AMBA_ID(0x000bb959),         /* Cortex-A73 */
+       CS_AMBA_UCI_ID(0x000bb9da, uci_id_etm4),        /* Cortex-A35 */
        {},
 };
 
index b936c6d7e13f3a31b7668dcb508772d3dbf3d0e2..e0684d06e9ee9b0cf1a139cd84742cc559358d2b 100644 (file)
@@ -6,6 +6,7 @@
 #ifndef _CORESIGHT_PRIV_H
 #define _CORESIGHT_PRIV_H
 
+#include <linux/amba/bus.h>
 #include <linux/bitops.h>
 #include <linux/io.h>
 #include <linux/coresight.h>
@@ -160,4 +161,43 @@ static inline int etm_readl_cp14(u32 off, unsigned int *val) { return 0; }
 static inline int etm_writel_cp14(u32 off, u32 val) { return 0; }
 #endif
 
+/*
+ * Macros and inline functions to handle CoreSight UCI data and driver
+ * private data in AMBA ID table entries, and extract data values.
+ */
+
+/* coresight AMBA ID, no UCI, no driver data: id table entry */
+#define CS_AMBA_ID(pid)                        \
+       {                               \
+               .id     = pid,          \
+               .mask   = 0x000fffff,   \
+       }
+
+/* coresight AMBA ID, UCI with driver data only: id table entry. */
+#define CS_AMBA_ID_DATA(pid, dval)                             \
+       {                                                       \
+               .id     = pid,                                  \
+               .mask   = 0x000fffff,                           \
+               .data   =  (void *)&(struct amba_cs_uci_id)     \
+                       {                               \
+                               .data = (void *)dval,   \
+                       }                               \
+       }
+
+/* coresight AMBA ID, full UCI structure: id table entry. */
+#define CS_AMBA_UCI_ID(pid, uci_ptr)   \
+       {                               \
+               .id     = pid,          \
+               .mask   = 0x000fffff,   \
+               .data   = uci_ptr       \
+       }
+
+/* extract the data value from a UCI structure given amba_id pointer. */
+static inline void *coresight_get_uci_data(const struct amba_id *id)
+{
+       if (id->data)
+               return ((struct amba_cs_uci_id *)(id->data))->data;
+       return 0;
+}
+
 #endif
index f07825df5c7a9a5119131048c71496a76ef381e6..9f8a844ed7aa304a40ab2b57750d9b8b1462b05d 100644 (file)
@@ -870,7 +870,7 @@ static int stm_probe(struct amba_device *adev, const struct amba_id *id)
 
        pm_runtime_put(&adev->dev);
 
-       dev_info(dev, "%s initialized\n", (char *)id->data);
+       dev_info(dev, "%s initialized\n", (char *)coresight_get_uci_data(id));
        return 0;
 
 stm_unregister:
@@ -905,16 +905,8 @@ static const struct dev_pm_ops stm_dev_pm_ops = {
 };
 
 static const struct amba_id stm_ids[] = {
-       {
-               .id     = 0x000bb962,
-               .mask   = 0x000fffff,
-               .data   = "STM32",
-       },
-       {
-               .id     = 0x000bb963,
-               .mask   = 0x000fffff,
-               .data   = "STM500",
-       },
+       CS_AMBA_ID_DATA(0x000bb962, "STM32"),
+       CS_AMBA_ID_DATA(0x000bb963, "STM500"),
        { 0, 0},
 };
 
index ea249f0bcd73c63e54645aab10dce41e5455c831..2a02da3d630f4e50fa3508dd8e94ef32edd49dae 100644 (file)
@@ -443,7 +443,8 @@ static int tmc_probe(struct amba_device *adev, const struct amba_id *id)
                desc.type = CORESIGHT_DEV_TYPE_SINK;
                desc.subtype.sink_subtype = CORESIGHT_DEV_SUBTYPE_SINK_BUFFER;
                desc.ops = &tmc_etr_cs_ops;
-               ret = tmc_etr_setup_caps(drvdata, devid, id->data);
+               ret = tmc_etr_setup_caps(drvdata, devid,
+                                        coresight_get_uci_data(id));
                if (ret)
                        goto out;
                break;
@@ -475,26 +476,13 @@ out:
 }
 
 static const struct amba_id tmc_ids[] = {
-       {
-               .id     = 0x000bb961,
-               .mask   = 0x000fffff,
-       },
-       {
-               /* Coresight SoC 600 TMC-ETR/ETS */
-               .id     = 0x000bb9e8,
-               .mask   = 0x000fffff,
-               .data   = (void *)(unsigned long)CORESIGHT_SOC_600_ETR_CAPS,
-       },
-       {
-               /* Coresight SoC 600 TMC-ETB */
-               .id     = 0x000bb9e9,
-               .mask   = 0x000fffff,
-       },
-       {
-               /* Coresight SoC 600 TMC-ETF */
-               .id     = 0x000bb9ea,
-               .mask   = 0x000fffff,
-       },
+       CS_AMBA_ID(0x000bb961),
+       /* Coresight SoC 600 TMC-ETR/ETS */
+       CS_AMBA_ID_DATA(0x000bb9e8, (unsigned long)CORESIGHT_SOC_600_ETR_CAPS),
+       /* Coresight SoC 600 TMC-ETB */
+       CS_AMBA_ID(0x000bb9e9),
+       /* Coresight SoC 600 TMC-ETF */
+       CS_AMBA_ID(0x000bb9ea),
        { 0, 0},
 };
 
index ead5e7de3e4d199d47ea6d1a0ea4048757610e0b..416f89b8f8812598abcb73f44430e089f94b8c51 100644 (file)
@@ -86,7 +86,6 @@ static int dw_i2c_acpi_configure(struct platform_device *pdev)
        struct i2c_timings *t = &dev->timings;
        u32 ss_ht = 0, fp_ht = 0, hs_ht = 0, fs_ht = 0;
 
-       dev->adapter.nr = -1;
        dev->tx_fifo_depth = 32;
        dev->rx_fifo_depth = 32;
 
@@ -219,7 +218,7 @@ static void i2c_dw_configure_slave(struct dw_i2c_dev *dev)
        dev->mode = DW_IC_SLAVE;
 }
 
-static void dw_i2c_set_fifo_size(struct dw_i2c_dev *dev, int id)
+static void dw_i2c_set_fifo_size(struct dw_i2c_dev *dev)
 {
        u32 param, tx_fifo_depth, rx_fifo_depth;
 
@@ -233,7 +232,6 @@ static void dw_i2c_set_fifo_size(struct dw_i2c_dev *dev, int id)
        if (!dev->tx_fifo_depth) {
                dev->tx_fifo_depth = tx_fifo_depth;
                dev->rx_fifo_depth = rx_fifo_depth;
-               dev->adapter.nr = id;
        } else if (tx_fifo_depth >= 2) {
                dev->tx_fifo_depth = min_t(u32, dev->tx_fifo_depth,
                                tx_fifo_depth);
@@ -358,13 +356,14 @@ static int dw_i2c_plat_probe(struct platform_device *pdev)
                                div_u64(clk_khz * t->sda_hold_ns + 500000, 1000000);
        }
 
-       dw_i2c_set_fifo_size(dev, pdev->id);
+       dw_i2c_set_fifo_size(dev);
 
        adap = &dev->adapter;
        adap->owner = THIS_MODULE;
        adap->class = I2C_CLASS_DEPRECATED;
        ACPI_COMPANION_SET(&adap->dev, ACPI_COMPANION(&pdev->dev));
        adap->dev.of_node = pdev->dev.of_node;
+       adap->nr = -1;
 
        dev_pm_set_driver_flags(&pdev->dev,
                                DPM_FLAG_SMART_PREPARE |
index 660de1ee68ed4abb2149a1b53c3ccccc5baee50a..684d651612b3066a46a16e5fdcd0390bd513fac0 100644 (file)
@@ -503,7 +503,7 @@ static int mtk_i2c_do_transfer(struct mtk_i2c *i2c, struct i2c_msg *msgs,
                writel(I2C_DMA_INT_FLAG_NONE, i2c->pdmabase + OFFSET_INT_FLAG);
                writel(I2C_DMA_CON_RX, i2c->pdmabase + OFFSET_CON);
 
-               dma_rd_buf = i2c_get_dma_safe_msg_buf(msgs, 0);
+               dma_rd_buf = i2c_get_dma_safe_msg_buf(msgs, 1);
                if (!dma_rd_buf)
                        return -ENOMEM;
 
@@ -526,7 +526,7 @@ static int mtk_i2c_do_transfer(struct mtk_i2c *i2c, struct i2c_msg *msgs,
                writel(I2C_DMA_INT_FLAG_NONE, i2c->pdmabase + OFFSET_INT_FLAG);
                writel(I2C_DMA_CON_TX, i2c->pdmabase + OFFSET_CON);
 
-               dma_wr_buf = i2c_get_dma_safe_msg_buf(msgs, 0);
+               dma_wr_buf = i2c_get_dma_safe_msg_buf(msgs, 1);
                if (!dma_wr_buf)
                        return -ENOMEM;
 
@@ -549,7 +549,7 @@ static int mtk_i2c_do_transfer(struct mtk_i2c *i2c, struct i2c_msg *msgs,
                writel(I2C_DMA_CLR_FLAG, i2c->pdmabase + OFFSET_INT_FLAG);
                writel(I2C_DMA_CLR_FLAG, i2c->pdmabase + OFFSET_CON);
 
-               dma_wr_buf = i2c_get_dma_safe_msg_buf(msgs, 0);
+               dma_wr_buf = i2c_get_dma_safe_msg_buf(msgs, 1);
                if (!dma_wr_buf)
                        return -ENOMEM;
 
@@ -561,7 +561,7 @@ static int mtk_i2c_do_transfer(struct mtk_i2c *i2c, struct i2c_msg *msgs,
                        return -ENOMEM;
                }
 
-               dma_rd_buf = i2c_get_dma_safe_msg_buf((msgs + 1), 0);
+               dma_rd_buf = i2c_get_dma_safe_msg_buf((msgs + 1), 1);
                if (!dma_rd_buf) {
                        dma_unmap_single(i2c->dev, wpaddr,
                                         msgs->len, DMA_TO_DEVICE);
index dd52a068b140685ecf33c84a79ff89acda8bed26..a7578f6da9796647724fb346a88bd4a2e91a3b14 100644 (file)
@@ -363,9 +363,6 @@ static void rcar_i2c_dma_unmap(struct rcar_i2c_priv *priv)
        struct dma_chan *chan = priv->dma_direction == DMA_FROM_DEVICE
                ? priv->dma_rx : priv->dma_tx;
 
-       /* Disable DMA Master Received/Transmitted */
-       rcar_i2c_write(priv, ICDMAER, 0);
-
        dma_unmap_single(chan->device->dev, sg_dma_address(&priv->sg),
                         sg_dma_len(&priv->sg), priv->dma_direction);
 
@@ -375,6 +372,9 @@ static void rcar_i2c_dma_unmap(struct rcar_i2c_priv *priv)
                priv->flags |= ID_P_NO_RXDMA;
 
        priv->dma_direction = DMA_NONE;
+
+       /* Disable DMA Master Received/Transmitted, must be last! */
+       rcar_i2c_write(priv, ICDMAER, 0);
 }
 
 static void rcar_i2c_cleanup_dma(struct rcar_i2c_priv *priv)
@@ -611,6 +611,15 @@ static bool rcar_i2c_slave_irq(struct rcar_i2c_priv *priv)
        return true;
 }
 
+/*
+ * This driver has a lock-free design because there are IP cores (at least
+ * R-Car Gen2) which have an inherent race condition in their hardware design.
+ * There, we need to clear RCAR_BUS_MASK_DATA bits as soon as possible after
+ * the interrupt was generated, otherwise an unwanted repeated message gets
+ * generated. It turned out that taking a spinlock at the beginning of the ISR
+ * was already causing repeated messages. Thus, this driver was converted to
+ * the now lockless behaviour. Please keep this in mind when hacking the driver.
+ */
 static irqreturn_t rcar_i2c_irq(int irq, void *ptr)
 {
        struct rcar_i2c_priv *priv = ptr;
index 1e6805b5cef23e810d5437a943f01793acb56466..a57aa4fe51a45729712ce70a0cda41492c2af2c3 100644 (file)
@@ -478,7 +478,7 @@ static int sis630_setup(struct pci_dev *sis630_dev)
        if (!request_region(smbus_base + SMB_STS, SIS630_SMB_IOREGION,
                            sis630_driver.name)) {
                dev_err(&sis630_dev->dev,
-                       "I/O Region 0x%04hx-0x%04hx for SMBus already in use.\n",
+                       "I/O Region 0x%04x-0x%04x for SMBus already in use.\n",
                        smbus_base + SMB_STS,
                        smbus_base + SMB_STS + SIS630_SMB_IOREGION - 1);
                retval = -EBUSY;
@@ -528,7 +528,7 @@ static int sis630_probe(struct pci_dev *dev, const struct pci_device_id *id)
        sis630_adapter.dev.parent = &dev->dev;
 
        snprintf(sis630_adapter.name, sizeof(sis630_adapter.name),
-                "SMBus SIS630 adapter at %04hx", smbus_base + SMB_STS);
+                "SMBus SIS630 adapter at %04x", smbus_base + SMB_STS);
 
        return i2c_add_adapter(&sis630_adapter);
 }
index 13e1213561d4b40335b44ff28d832d7840a4b4d2..4284fc991cfd47e110d319b7b39b77d8866d436c 100644 (file)
@@ -432,7 +432,7 @@ static int stm32f7_i2c_compute_timing(struct stm32f7_i2c_dev *i2c_dev,
                 STM32F7_I2C_ANALOG_FILTER_DELAY_MAX : 0);
        dnf_delay = setup->dnf * i2cclk;
 
-       sdadel_min = setup->fall_time - i2c_specs[setup->speed].hddat_min -
+       sdadel_min = i2c_specs[setup->speed].hddat_min + setup->fall_time -
                af_delay_min - (setup->dnf + 3) * i2cclk;
 
        sdadel_max = i2c_specs[setup->speed].vddat_max - setup->rise_time -
index cb6c5cb0df0b3d5438521127110940b8f4cc56f8..38af18645133cb486d6494bb642128414f2194eb 100644 (file)
@@ -2258,7 +2258,8 @@ EXPORT_SYMBOL(i2c_put_adapter);
 /**
  * i2c_get_dma_safe_msg_buf() - get a DMA safe buffer for the given i2c_msg
  * @msg: the message to be checked
- * @threshold: the minimum number of bytes for which using DMA makes sense
+ * @threshold: the minimum number of bytes for which using DMA makes sense.
+ *            Should at least be 1.
  *
  * Return: NULL if a DMA safe buffer was not obtained. Use msg->buf with PIO.
  *        Or a valid pointer to be used with DMA. After use, release it by
@@ -2268,7 +2269,11 @@ EXPORT_SYMBOL(i2c_put_adapter);
  */
 u8 *i2c_get_dma_safe_msg_buf(struct i2c_msg *msg, unsigned int threshold)
 {
-       if (msg->len < threshold)
+       /* also skip 0-length msgs for bogus thresholds of 0 */
+       if (!threshold)
+               pr_debug("DMA buffer for addr=0x%02x with length 0 is bogus\n",
+                        msg->addr);
+       if (msg->len < threshold || msg->len == 0)
                return NULL;
 
        if (msg->flags & I2C_M_DMA_SAFE)
index 6b0760dafb3e9c0ff53158676cdcd4e6f08e8ce2..b319e51c379bd664999e7d710c5e1240cc21a42d 100644 (file)
@@ -140,10 +140,14 @@ static struct lock_class_key reserved_rbtree_key;
 static inline int match_hid_uid(struct device *dev,
                                struct acpihid_map_entry *entry)
 {
+       struct acpi_device *adev = ACPI_COMPANION(dev);
        const char *hid, *uid;
 
-       hid = acpi_device_hid(ACPI_COMPANION(dev));
-       uid = acpi_device_uid(ACPI_COMPANION(dev));
+       if (!adev)
+               return -ENODEV;
+
+       hid = acpi_device_hid(adev);
+       uid = acpi_device_uid(adev);
 
        if (!hid || !(*hid))
                return -ENODEV;
index b014957dde0bae609b99ab932e50f0a76ed797f0..a5f8bc2defbcde1656acfa972301050267e3f77b 100644 (file)
@@ -233,10 +233,15 @@ void pblk_rl_init(struct pblk_rl *rl, int budget, int threshold)
        /* To start with, all buffer is available to user I/O writers */
        rl->rb_budget = budget;
        rl->rb_user_max = budget;
-       rl->rb_max_io = threshold ? (budget - threshold) : (budget - 1);
        rl->rb_gc_max = 0;
        rl->rb_state = PBLK_RL_HIGH;
 
+       /* Maximize I/O size and ansure that back threshold is respected */
+       if (threshold)
+               rl->rb_max_io = budget - pblk->min_write_pgs_data - threshold;
+       else
+               rl->rb_max_io = budget - pblk->min_write_pgs_data - 1;
+
        atomic_set(&rl->rb_user_cnt, 0);
        atomic_set(&rl->rb_gc_cnt, 0);
        atomic_set(&rl->rb_space, -1);
index abb5d382f64d1db9fd53f71d89d96064a2a2c437..3b6880dd648d26e6d588a09180d181587f5c40b8 100644 (file)
@@ -3939,6 +3939,8 @@ static int raid10_run(struct mddev *mddev)
                set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
                mddev->sync_thread = md_register_thread(md_do_sync, mddev,
                                                        "reshape");
+               if (!mddev->sync_thread)
+                       goto out_free_conf;
        }
 
        return 0;
@@ -4670,7 +4672,6 @@ read_more:
        atomic_inc(&r10_bio->remaining);
        read_bio->bi_next = NULL;
        generic_make_request(read_bio);
-       sector_nr += nr_sectors;
        sectors_done += nr_sectors;
        if (sector_nr <= last)
                goto read_more;
index bfb81140706140a53af24ccf90420e6cc107cef1..43c714a8798c5f2cce7d4c616cbc13cab58a30fb 100644 (file)
@@ -45,6 +45,7 @@ extern void ppl_stripe_write_finished(struct stripe_head *sh);
 extern int ppl_modify_log(struct r5conf *conf, struct md_rdev *rdev, bool add);
 extern void ppl_quiesce(struct r5conf *conf, int quiesce);
 extern int ppl_handle_flush_request(struct r5l_log *log, struct bio *bio);
+extern struct md_sysfs_entry ppl_write_hint;
 
 static inline bool raid5_has_log(struct r5conf *conf)
 {
index 0b096ddc9c1e62cd6f5bc0ba050dee8fa4936017..17e9e7d51097853f8fbf51aef4db761898a7ec3c 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/raid/md_p.h>
 #include "md.h"
 #include "raid5.h"
+#include "raid5-log.h"
 
 /*
  * PPL consists of a 4KB header (struct ppl_header) and at least 128KB for
@@ -115,6 +116,8 @@ struct ppl_conf {
        /* stripes to retry if failed to allocate io_unit */
        struct list_head no_mem_stripes;
        spinlock_t no_mem_stripes_lock;
+
+       unsigned short write_hint;
 };
 
 struct ppl_log {
@@ -474,6 +477,7 @@ static void ppl_submit_iounit(struct ppl_io_unit *io)
        bio_set_dev(bio, log->rdev->bdev);
        bio->bi_iter.bi_sector = log->next_io_sector;
        bio_add_page(bio, io->header_page, PAGE_SIZE, 0);
+       bio->bi_write_hint = ppl_conf->write_hint;
 
        pr_debug("%s: log->current_io_sector: %llu\n", __func__,
            (unsigned long long)log->next_io_sector);
@@ -503,6 +507,7 @@ static void ppl_submit_iounit(struct ppl_io_unit *io)
                        bio = bio_alloc_bioset(GFP_NOIO, BIO_MAX_PAGES,
                                               &ppl_conf->bs);
                        bio->bi_opf = prev->bi_opf;
+                       bio->bi_write_hint = prev->bi_write_hint;
                        bio_copy_dev(bio, prev);
                        bio->bi_iter.bi_sector = bio_end_sector(prev);
                        bio_add_page(bio, sh->ppl_page, PAGE_SIZE, 0);
@@ -1407,6 +1412,7 @@ int ppl_init_log(struct r5conf *conf)
        atomic64_set(&ppl_conf->seq, 0);
        INIT_LIST_HEAD(&ppl_conf->no_mem_stripes);
        spin_lock_init(&ppl_conf->no_mem_stripes_lock);
+       ppl_conf->write_hint = RWF_WRITE_LIFE_NOT_SET;
 
        if (!mddev->external) {
                ppl_conf->signature = ~crc32c_le(~0, mddev->uuid, sizeof(mddev->uuid));
@@ -1501,3 +1507,60 @@ int ppl_modify_log(struct r5conf *conf, struct md_rdev *rdev, bool add)
 
        return ret;
 }
+
+static ssize_t
+ppl_write_hint_show(struct mddev *mddev, char *buf)
+{
+       size_t ret = 0;
+       struct r5conf *conf;
+       struct ppl_conf *ppl_conf = NULL;
+
+       spin_lock(&mddev->lock);
+       conf = mddev->private;
+       if (conf && raid5_has_ppl(conf))
+               ppl_conf = conf->log_private;
+       ret = sprintf(buf, "%d\n", ppl_conf ? ppl_conf->write_hint : 0);
+       spin_unlock(&mddev->lock);
+
+       return ret;
+}
+
+static ssize_t
+ppl_write_hint_store(struct mddev *mddev, const char *page, size_t len)
+{
+       struct r5conf *conf;
+       struct ppl_conf *ppl_conf;
+       int err = 0;
+       unsigned short new;
+
+       if (len >= PAGE_SIZE)
+               return -EINVAL;
+       if (kstrtou16(page, 10, &new))
+               return -EINVAL;
+
+       err = mddev_lock(mddev);
+       if (err)
+               return err;
+
+       conf = mddev->private;
+       if (!conf) {
+               err = -ENODEV;
+       } else if (raid5_has_ppl(conf)) {
+               ppl_conf = conf->log_private;
+               if (!ppl_conf)
+                       err = -EINVAL;
+               else
+                       ppl_conf->write_hint = new;
+       } else {
+               err = -EINVAL;
+       }
+
+       mddev_unlock(mddev);
+
+       return err ?: len;
+}
+
+struct md_sysfs_entry
+ppl_write_hint = __ATTR(ppl_write_hint, S_IRUGO | S_IWUSR,
+                       ppl_write_hint_show,
+                       ppl_write_hint_store);
index 77ffd09be486da312dd0e3d02067672ad69ee340..c033bfcb209e442ae47b0dcf8a6e0064a833b407 100644 (file)
@@ -6650,6 +6650,7 @@ static struct attribute *raid5_attrs[] =  {
        &raid5_skip_copy.attr,
        &raid5_rmw_level.attr,
        &r5c_journal_mode.attr,
+       &ppl_write_hint.attr,
        NULL,
 };
 static struct attribute_group raid5_attrs_group = {
@@ -7393,6 +7394,8 @@ static int raid5_run(struct mddev *mddev)
                set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
                mddev->sync_thread = md_register_thread(md_do_sync, mddev,
                                                        "reshape");
+               if (!mddev->sync_thread)
+                       goto abort;
        }
 
        /* Ok, everything is just fine now */
index 2ad263f708da7ab68b12c9767058df9505501013..bb57ec2390299580754a84bdf6602439f9885239 100644 (file)
@@ -180,7 +180,7 @@ int ndev_mw_to_bar(struct intel_ntb_dev *ndev, int idx)
        return ndev->reg->mw_bar[idx];
 }
 
-static inline int ndev_db_addr(struct intel_ntb_dev *ndev,
+void ndev_db_addr(struct intel_ntb_dev *ndev,
                               phys_addr_t *db_addr, resource_size_t *db_size,
                               phys_addr_t reg_addr, unsigned long reg)
 {
@@ -196,8 +196,6 @@ static inline int ndev_db_addr(struct intel_ntb_dev *ndev,
                *db_size = ndev->reg->db_size;
                dev_dbg(&ndev->ntb.pdev->dev, "Peer db size %llx\n", *db_size);
        }
-
-       return 0;
 }
 
 u64 ndev_db_read(struct intel_ntb_dev *ndev,
@@ -1111,13 +1109,28 @@ int intel_ntb_db_clear_mask(struct ntb_dev *ntb, u64 db_bits)
                                  ndev->self_reg->db_mask);
 }
 
-int intel_ntb_peer_db_addr(struct ntb_dev *ntb, phys_addr_t *db_addr,
-                          resource_size_t *db_size)
+static int intel_ntb_peer_db_addr(struct ntb_dev *ntb, phys_addr_t *db_addr,
+                          resource_size_t *db_size, u64 *db_data, int db_bit)
 {
+       u64 db_bits;
        struct intel_ntb_dev *ndev = ntb_ndev(ntb);
 
-       return ndev_db_addr(ndev, db_addr, db_size, ndev->peer_addr,
+       if (unlikely(db_bit >= BITS_PER_LONG_LONG))
+               return -EINVAL;
+
+       db_bits = BIT_ULL(db_bit);
+
+       if (unlikely(db_bits & ~ntb_ndev(ntb)->db_valid_mask))
+               return -EINVAL;
+
+       ndev_db_addr(ndev, db_addr, db_size, ndev->peer_addr,
                            ndev->peer_reg->db_bell);
+
+       if (db_data)
+               *db_data = db_bits;
+
+
+       return 0;
 }
 
 static int intel_ntb_peer_db_set(struct ntb_dev *ntb, u64 db_bits)
index ad8ec1444436e535ecab21e90c6434b79ae87d96..544cf5c06f4dc2ef1412b54ab5551ad25fc37727 100644 (file)
@@ -147,6 +147,9 @@ extern struct intel_b2b_addr xeon_b2b_dsd_addr;
 int ndev_init_isr(struct intel_ntb_dev *ndev, int msix_min, int msix_max,
                int msix_shift, int total_shift);
 enum ntb_topo xeon_ppd_topo(struct intel_ntb_dev *ndev, u8 ppd);
+void ndev_db_addr(struct intel_ntb_dev *ndev,
+                               phys_addr_t *db_addr, resource_size_t *db_size,
+                               phys_addr_t reg_addr, unsigned long reg);
 u64 ndev_db_read(struct intel_ntb_dev *ndev, void __iomem *mmio);
 int ndev_db_write(struct intel_ntb_dev *ndev, u64 db_bits,
                                void __iomem *mmio);
@@ -166,8 +169,6 @@ int intel_ntb_db_vector_count(struct ntb_dev *ntb);
 u64 intel_ntb_db_vector_mask(struct ntb_dev *ntb, int db_vector);
 int intel_ntb_db_set_mask(struct ntb_dev *ntb, u64 db_bits);
 int intel_ntb_db_clear_mask(struct ntb_dev *ntb, u64 db_bits);
-int intel_ntb_peer_db_addr(struct ntb_dev *ntb, phys_addr_t *db_addr,
-               resource_size_t *db_size);
 int intel_ntb_spad_is_unsafe(struct ntb_dev *ntb);
 int intel_ntb_spad_count(struct ntb_dev *ntb);
 u32 intel_ntb_spad_read(struct ntb_dev *ntb, int idx);
index b3fa24778f9406e6b70837162e34e1a3a0db2335..f475b56a3f4926e56e95913717ac9f73c27e0570 100644 (file)
@@ -532,6 +532,37 @@ static int intel_ntb3_mw_set_trans(struct ntb_dev *ntb, int pidx, int idx,
        return 0;
 }
 
+int intel_ntb3_peer_db_addr(struct ntb_dev *ntb, phys_addr_t *db_addr,
+                               resource_size_t *db_size,
+                               u64 *db_data, int db_bit)
+{
+       phys_addr_t db_addr_base;
+       struct intel_ntb_dev *ndev = ntb_ndev(ntb);
+
+       if (unlikely(db_bit >= BITS_PER_LONG_LONG))
+               return -EINVAL;
+
+       if (unlikely(BIT_ULL(db_bit) & ~ntb_ndev(ntb)->db_valid_mask))
+               return -EINVAL;
+
+       ndev_db_addr(ndev, &db_addr_base, db_size, ndev->peer_addr,
+                               ndev->peer_reg->db_bell);
+
+       if (db_addr) {
+               *db_addr = db_addr_base + (db_bit * 4);
+               dev_dbg(&ndev->ntb.pdev->dev, "Peer db addr %llx db bit %d\n",
+                               *db_addr, db_bit);
+       }
+
+       if (db_data) {
+               *db_data = 1;
+               dev_dbg(&ndev->ntb.pdev->dev, "Peer db data %llx db bit %d\n",
+                               *db_data, db_bit);
+       }
+
+       return 0;
+}
+
 static int intel_ntb3_peer_db_set(struct ntb_dev *ntb, u64 db_bits)
 {
        struct intel_ntb_dev *ndev = ntb_ndev(ntb);
@@ -584,7 +615,7 @@ const struct ntb_dev_ops intel_ntb3_ops = {
        .db_clear               = intel_ntb3_db_clear,
        .db_set_mask            = intel_ntb_db_set_mask,
        .db_clear_mask          = intel_ntb_db_clear_mask,
-       .peer_db_addr           = intel_ntb_peer_db_addr,
+       .peer_db_addr           = intel_ntb3_peer_db_addr,
        .peer_db_set            = intel_ntb3_peer_db_set,
        .spad_is_unsafe         = intel_ntb_spad_is_unsafe,
        .spad_count             = intel_ntb_spad_count,
index f2df2d39c65b0e87838c413988ca089d11b6b466..d905d368d28c716a4ca5fa7a59766be06f601eab 100644 (file)
@@ -236,6 +236,7 @@ static void switchtec_ntb_mw_clr_direct(struct switchtec_ntb *sndev, int idx)
        ctl_val &= ~NTB_CTRL_BAR_DIR_WIN_EN;
        iowrite32(ctl_val, &ctl->bar_entry[bar].ctl);
        iowrite32(0, &ctl->bar_entry[bar].win_size);
+       iowrite32(0, &ctl->bar_ext_entry[bar].win_size);
        iowrite64(sndev->self_partition, &ctl->bar_entry[bar].xlate_addr);
 }
 
@@ -258,7 +259,9 @@ static void switchtec_ntb_mw_set_direct(struct switchtec_ntb *sndev, int idx,
        ctl_val |= NTB_CTRL_BAR_DIR_WIN_EN;
 
        iowrite32(ctl_val, &ctl->bar_entry[bar].ctl);
-       iowrite32(xlate_pos | size, &ctl->bar_entry[bar].win_size);
+       iowrite32(xlate_pos | (lower_32_bits(size) & 0xFFFFF000),
+                 &ctl->bar_entry[bar].win_size);
+       iowrite32(upper_32_bits(size), &ctl->bar_ext_entry[bar].win_size);
        iowrite64(sndev->self_partition | addr,
                  &ctl->bar_entry[bar].xlate_addr);
 }
@@ -679,11 +682,16 @@ static u64 switchtec_ntb_db_read_mask(struct ntb_dev *ntb)
 
 static int switchtec_ntb_peer_db_addr(struct ntb_dev *ntb,
                                      phys_addr_t *db_addr,
-                                     resource_size_t *db_size)
+                                     resource_size_t *db_size,
+                                     u64 *db_data,
+                                     int db_bit)
 {
        struct switchtec_ntb *sndev = ntb_sndev(ntb);
        unsigned long offset;
 
+       if (unlikely(db_bit >= BITS_PER_LONG_LONG))
+               return -EINVAL;
+
        offset = (unsigned long)sndev->mmio_peer_dbmsg->odb -
                (unsigned long)sndev->stdev->mmio;
 
@@ -693,6 +701,8 @@ static int switchtec_ntb_peer_db_addr(struct ntb_dev *ntb,
                *db_addr = pci_resource_start(ntb->pdev, 0) + offset;
        if (db_size)
                *db_size = sizeof(u32);
+       if (db_data)
+               *db_data = BIT_ULL(db_bit) << sndev->db_peer_shift;
 
        return 0;
 }
@@ -1025,7 +1035,9 @@ static int crosslink_setup_mws(struct switchtec_ntb *sndev, int ntb_lut_idx,
                ctl_val |= NTB_CTRL_BAR_DIR_WIN_EN;
 
                iowrite32(ctl_val, &ctl->bar_entry[bar].ctl);
-               iowrite32(xlate_pos | size, &ctl->bar_entry[bar].win_size);
+               iowrite32(xlate_pos | (lower_32_bits(size) & 0xFFFFF000),
+                         &ctl->bar_entry[bar].win_size);
+               iowrite32(upper_32_bits(size), &ctl->bar_ext_entry[bar].win_size);
                iowrite64(sndev->peer_partition | addr,
                          &ctl->bar_entry[bar].xlate_addr);
        }
@@ -1092,7 +1104,7 @@ static int crosslink_enum_partition(struct switchtec_ntb *sndev,
 
                dev_dbg(&sndev->stdev->dev,
                        "Crosslink BAR%d addr: %llx\n",
-                       i, bar_addr);
+                       i*2, bar_addr);
 
                if (bar_addr != bar_space * i)
                        continue;
index 3bfdb4562408879fd04f340b05e84673c4da2232..d4f39ba1d9769c911fc6aaed4d42f7d54410e9bb 100644 (file)
@@ -144,7 +144,9 @@ struct ntb_transport_qp {
        struct list_head tx_free_q;
        spinlock_t ntb_tx_free_q_lock;
        void __iomem *tx_mw;
-       dma_addr_t tx_mw_phys;
+       phys_addr_t tx_mw_phys;
+       size_t tx_mw_size;
+       dma_addr_t tx_mw_dma_addr;
        unsigned int tx_index;
        unsigned int tx_max_entry;
        unsigned int tx_max_frame;
@@ -862,6 +864,9 @@ static void ntb_transport_link_cleanup(struct ntb_transport_ctx *nt)
        if (!nt->link_is_up)
                cancel_delayed_work_sync(&nt->link_work);
 
+       for (i = 0; i < nt->mw_count; i++)
+               ntb_free_mw(nt, i);
+
        /* The scratchpad registers keep the values if the remote side
         * goes down, blast them now to give them a sane value the next
         * time they are accessed
@@ -1049,6 +1054,7 @@ static int ntb_transport_init_queue(struct ntb_transport_ctx *nt,
        tx_size = (unsigned int)mw_size / num_qps_mw;
        qp_offset = tx_size * (qp_num / mw_count);
 
+       qp->tx_mw_size = tx_size;
        qp->tx_mw = nt->mw_vec[mw_num].vbase + qp_offset;
        if (!qp->tx_mw)
                return -EINVAL;
@@ -1644,7 +1650,7 @@ static int ntb_async_tx_submit(struct ntb_transport_qp *qp,
        dma_cookie_t cookie;
 
        device = chan->device;
-       dest = qp->tx_mw_phys + qp->tx_max_frame * entry->tx_index;
+       dest = qp->tx_mw_dma_addr + qp->tx_max_frame * entry->tx_index;
        buff_off = (size_t)buf & ~PAGE_MASK;
        dest_off = (size_t)dest & ~PAGE_MASK;
 
@@ -1863,6 +1869,18 @@ ntb_transport_create_queue(void *data, struct device *client_dev,
                qp->rx_dma_chan = NULL;
        }
 
+       if (qp->tx_dma_chan) {
+               qp->tx_mw_dma_addr =
+                       dma_map_resource(qp->tx_dma_chan->device->dev,
+                                        qp->tx_mw_phys, qp->tx_mw_size,
+                                        DMA_FROM_DEVICE, 0);
+               if (dma_mapping_error(qp->tx_dma_chan->device->dev,
+                                     qp->tx_mw_dma_addr)) {
+                       qp->tx_mw_dma_addr = 0;
+                       goto err1;
+               }
+       }
+
        dev_dbg(&pdev->dev, "Using %s memcpy for TX\n",
                qp->tx_dma_chan ? "DMA" : "CPU");
 
@@ -1904,6 +1922,10 @@ err1:
        qp->rx_alloc_entry = 0;
        while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_free_q)))
                kfree(entry);
+       if (qp->tx_mw_dma_addr)
+               dma_unmap_resource(qp->tx_dma_chan->device->dev,
+                                  qp->tx_mw_dma_addr, qp->tx_mw_size,
+                                  DMA_FROM_DEVICE, 0);
        if (qp->tx_dma_chan)
                dma_release_channel(qp->tx_dma_chan);
        if (qp->rx_dma_chan)
@@ -1945,6 +1967,11 @@ void ntb_transport_free_queue(struct ntb_transport_qp *qp)
                 */
                dma_sync_wait(chan, qp->last_cookie);
                dmaengine_terminate_all(chan);
+
+               dma_unmap_resource(chan->device->dev,
+                                  qp->tx_mw_dma_addr, qp->tx_mw_size,
+                                  DMA_FROM_DEVICE, 0);
+
                dma_release_channel(chan);
        }
 
index 521eaf53a52aada9c99e804971f7041fcc327563..36be9b61918760e2edfa8e87be82e017a773c5df 100644 (file)
@@ -47,6 +47,7 @@ static int e820_register_one(struct resource *res, void *data)
        ndr_desc.res = res;
        ndr_desc.attr_groups = e820_pmem_region_attribute_groups;
        ndr_desc.numa_node = e820_range_to_nid(res->start);
+       ndr_desc.target_node = ndr_desc.numa_node;
        set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
        if (!nvdimm_pmem_region_create(nvdimm_bus, &ndr_desc))
                return -ENXIO;
index 379bf4305e6159a2568940df7cb06f07f774b55c..a5ac3b240293b3567a6295b3e7488c35d4ab0bd2 100644 (file)
@@ -153,7 +153,7 @@ struct nd_region {
        u16 ndr_mappings;
        u64 ndr_size;
        u64 ndr_start;
-       int id, num_lanes, ro, numa_node;
+       int id, num_lanes, ro, numa_node, target_node;
        void *provider_data;
        struct kernfs_node *bb_state;
        struct badblocks bb;
index 11b9821eba8594ce61688ffd92873d4df61d2583..a0c8dcfa0bf923cc1e8a1b2ffacb74f2f801d681 100644 (file)
@@ -68,6 +68,7 @@ static int of_pmem_region_probe(struct platform_device *pdev)
                memset(&ndr_desc, 0, sizeof(ndr_desc));
                ndr_desc.attr_groups = region_attr_groups;
                ndr_desc.numa_node = dev_to_node(&pdev->dev);
+               ndr_desc.target_node = ndr_desc.numa_node;
                ndr_desc.res = &pdev->resource[i];
                ndr_desc.of_node = np;
                set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
index 3b58baa44b5cf4b2622c0ab487a0eeebe08b6d89..b4ef7d9ff22ebb517744566ea68eb677db052285 100644 (file)
@@ -1072,6 +1072,7 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
        nd_region->flags = ndr_desc->flags;
        nd_region->ro = ro;
        nd_region->numa_node = ndr_desc->numa_node;
+       nd_region->target_node = ndr_desc->target_node;
        ida_init(&nd_region->ns_ida);
        ida_init(&nd_region->btt_ida);
        ida_init(&nd_region->pfn_ida);
index 07bf2bff3a76f9a6fa7ab8b868adc3a09469db27..470601980794edd9ebd803199587c62f0586fb03 100644 (file)
@@ -179,8 +179,8 @@ static int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl)
        int ret = 0;
 
        /*
-        * Keep a reference until the work is flushed since ->delete_ctrl
-        * can free the controller.
+        * Keep a reference until nvme_do_delete_ctrl() complete,
+        * since ->delete_ctrl can free the controller.
         */
        nvme_get_ctrl(ctrl);
        if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING))
@@ -1250,7 +1250,7 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
        if (ns) {
                if (ctrl->effects)
                        effects = le32_to_cpu(ctrl->effects->iocs[opcode]);
-               if (effects & ~NVME_CMD_EFFECTS_CSUPP)
+               if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC))
                        dev_warn(ctrl->device,
                                 "IO command:%02x has unhandled effects:%08x\n",
                                 opcode, effects);
@@ -1495,10 +1495,10 @@ static void nvme_set_chunk_size(struct nvme_ns *ns)
        blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(chunk_size));
 }
 
-static void nvme_config_discard(struct nvme_ns *ns)
+static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns)
 {
        struct nvme_ctrl *ctrl = ns->ctrl;
-       struct request_queue *queue = ns->queue;
+       struct request_queue *queue = disk->queue;
        u32 size = queue_logical_block_size(queue);
 
        if (!(ctrl->oncs & NVME_CTRL_ONCS_DSM)) {
@@ -1526,12 +1526,13 @@ static void nvme_config_discard(struct nvme_ns *ns)
                blk_queue_max_write_zeroes_sectors(queue, UINT_MAX);
 }
 
-static inline void nvme_config_write_zeroes(struct nvme_ns *ns)
+static void nvme_config_write_zeroes(struct gendisk *disk, struct nvme_ns *ns)
 {
        u32 max_sectors;
        unsigned short bs = 1 << ns->lba_shift;
 
-       if (!(ns->ctrl->oncs & NVME_CTRL_ONCS_WRITE_ZEROES))
+       if (!(ns->ctrl->oncs & NVME_CTRL_ONCS_WRITE_ZEROES) ||
+           (ns->ctrl->quirks & NVME_QUIRK_DISABLE_WRITE_ZEROES))
                return;
        /*
         * Even though NVMe spec explicitly states that MDTS is not
@@ -1548,13 +1549,7 @@ static inline void nvme_config_write_zeroes(struct nvme_ns *ns)
        else
                max_sectors = ((u32)(ns->ctrl->max_hw_sectors + 1) * bs) >> 9;
 
-       blk_queue_max_write_zeroes_sectors(ns->queue, max_sectors);
-}
-
-static inline void nvme_ns_config_oncs(struct nvme_ns *ns)
-{
-       nvme_config_discard(ns);
-       nvme_config_write_zeroes(ns);
+       blk_queue_max_write_zeroes_sectors(disk->queue, max_sectors);
 }
 
 static void nvme_report_ns_ids(struct nvme_ctrl *ctrl, unsigned int nsid,
@@ -1610,7 +1605,9 @@ static void nvme_update_disk_info(struct gendisk *disk,
                capacity = 0;
 
        set_capacity(disk, capacity);
-       nvme_ns_config_oncs(ns);
+
+       nvme_config_discard(disk, ns);
+       nvme_config_write_zeroes(disk, ns);
 
        if (id->nsattr & (1 << 0))
                set_disk_ro(disk, true);
@@ -3304,6 +3301,7 @@ static int nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
        mutex_lock(&ctrl->subsys->lock);
        list_del_rcu(&ns->siblings);
        mutex_unlock(&ctrl->subsys->lock);
+       nvme_put_ns_head(ns->head);
  out_free_id:
        kfree(id);
  out_free_queue:
index b29b12498a1a9bb5fa900359b0a9534ceecb8ad9..f3b9d91ba0dfd30ba7c4c3f554e14ea860c389b7 100644 (file)
@@ -2107,7 +2107,7 @@ nvme_fc_map_data(struct nvme_fc_ctrl *ctrl, struct request *rq,
 
        freq->sg_cnt = 0;
 
-       if (!blk_rq_payload_bytes(rq))
+       if (!blk_rq_nr_phys_segments(rq))
                return 0;
 
        freq->sg_table.sgl = freq->first_sgl;
@@ -2304,12 +2304,23 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,
        if (ret)
                return ret;
 
-       data_len = blk_rq_payload_bytes(rq);
-       if (data_len)
+       /*
+        * nvme core doesn't quite treat the rq opaquely. Commands such
+        * as WRITE ZEROES will return a non-zero rq payload_bytes yet
+        * there is no actual payload to be transferred.
+        * To get it right, key data transmission on there being 1 or
+        * more physical segments in the sg list. If there is no
+        * physical segments, there is no payload.
+        */
+       if (blk_rq_nr_phys_segments(rq)) {
+               data_len = blk_rq_payload_bytes(rq);
                io_dir = ((rq_data_dir(rq) == WRITE) ?
                                        NVMEFC_FCP_WRITE : NVMEFC_FCP_READ);
-       else
+       } else {
+               data_len = 0;
                io_dir = NVMEFC_FCP_NODATA;
+       }
+
 
        return nvme_fc_start_fcp_op(ctrl, queue, op, data_len, io_dir);
 }
@@ -2464,6 +2475,7 @@ static int
 nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl)
 {
        struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
+       u32 prior_ioq_cnt = ctrl->ctrl.queue_count - 1;
        unsigned int nr_io_queues;
        int ret;
 
@@ -2476,6 +2488,13 @@ nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl)
                return ret;
        }
 
+       if (!nr_io_queues && prior_ioq_cnt) {
+               dev_info(ctrl->ctrl.device,
+                       "Fail Reconnect: At least 1 io queue "
+                       "required (was %d)\n", prior_ioq_cnt);
+               return -ENOSPC;
+       }
+
        ctrl->ctrl.queue_count = nr_io_queues + 1;
        /* check for io queues existing */
        if (ctrl->ctrl.queue_count == 1)
@@ -2489,6 +2508,10 @@ nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl)
        if (ret)
                goto out_delete_hw_queues;
 
+       if (prior_ioq_cnt != nr_io_queues)
+               dev_info(ctrl->ctrl.device,
+                       "reconnect: revising io queue count from %d to %d\n",
+                       prior_ioq_cnt, nr_io_queues);
        blk_mq_update_nr_hw_queues(&ctrl->tag_set, nr_io_queues);
 
        return 0;
@@ -3006,7 +3029,10 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 
        ctrl->ctrl.opts = opts;
        ctrl->ctrl.nr_reconnects = 0;
-       ctrl->ctrl.numa_node = dev_to_node(lport->dev);
+       if (lport->dev)
+               ctrl->ctrl.numa_node = dev_to_node(lport->dev);
+       else
+               ctrl->ctrl.numa_node = NUMA_NO_NODE;
        INIT_LIST_HEAD(&ctrl->ctrl_list);
        ctrl->lport = lport;
        ctrl->rport = rport;
index b91f1838bbd5d3ee9f27e6d5388066686cbc4774..527d645450230d40780062356366d025dd5ca500 100644 (file)
@@ -87,6 +87,11 @@ enum nvme_quirks {
         * Ignore device provided subnqn.
         */
        NVME_QUIRK_IGNORE_DEV_SUBNQN            = (1 << 8),
+
+       /*
+        * Broken Write Zeroes.
+        */
+       NVME_QUIRK_DISABLE_WRITE_ZEROES         = (1 << 9),
 };
 
 /*
index 92bad1c810acda473bdf5d92e96960a3107e19e6..a90cf5d63aac4bdfc739c387e21a45587451dae2 100644 (file)
@@ -2937,7 +2937,8 @@ static const struct pci_device_id nvme_id_table[] = {
        { PCI_VDEVICE(INTEL, 0xf1a6),   /* Intel 760p/Pro 7600p */
                .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, },
        { PCI_VDEVICE(INTEL, 0x5845),   /* Qemu emulated controller */
-               .driver_data = NVME_QUIRK_IDENTIFY_CNS, },
+               .driver_data = NVME_QUIRK_IDENTIFY_CNS |
+                               NVME_QUIRK_DISABLE_WRITE_ZEROES, },
        { PCI_DEVICE(0x1bb1, 0x0100),   /* Seagate Nytro Flash Storage */
                .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
        { PCI_DEVICE(0x1c58, 0x0003),   /* HGST adapter */
index 208ee518af6506edea83f50544c88eedc274dd81..e7e08889865e732d503a6ac2af5d38cac4dd9672 100644 (file)
@@ -463,6 +463,15 @@ static int nvme_tcp_handle_c2h_data(struct nvme_tcp_queue *queue,
 
        queue->data_remaining = le32_to_cpu(pdu->data_length);
 
+       if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS &&
+           unlikely(!(pdu->hdr.flags & NVME_TCP_F_DATA_LAST))) {
+               dev_err(queue->ctrl->ctrl.device,
+                       "queue %d tag %#x SUCCESS set but not last PDU\n",
+                       nvme_tcp_queue_id(queue), rq->tag);
+               nvme_tcp_error_recovery(&queue->ctrl->ctrl);
+               return -EPROTO;
+       }
+
        return 0;
 
 }
@@ -618,6 +627,14 @@ static int nvme_tcp_recv_pdu(struct nvme_tcp_queue *queue, struct sk_buff *skb,
        return ret;
 }
 
+static inline void nvme_tcp_end_request(struct request *rq, __le16 status)
+{
+       union nvme_result res = {};
+
+       nvme_end_request(rq, cpu_to_le16(status << 1), res);
+}
+
+
 static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
                              unsigned int *offset, size_t *len)
 {
@@ -685,6 +702,8 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
                        nvme_tcp_ddgst_final(queue->rcv_hash, &queue->exp_ddgst);
                        queue->ddgst_remaining = NVME_TCP_DIGEST_LENGTH;
                } else {
+                       if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS)
+                               nvme_tcp_end_request(rq, NVME_SC_SUCCESS);
                        nvme_tcp_init_recv_ctx(queue);
                }
        }
@@ -695,6 +714,7 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
 static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue,
                struct sk_buff *skb, unsigned int *offset, size_t *len)
 {
+       struct nvme_tcp_data_pdu *pdu = (void *)queue->pdu;
        char *ddgst = (char *)&queue->recv_ddgst;
        size_t recv_len = min_t(size_t, *len, queue->ddgst_remaining);
        off_t off = NVME_TCP_DIGEST_LENGTH - queue->ddgst_remaining;
@@ -718,6 +738,13 @@ static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue,
                return -EIO;
        }
 
+       if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) {
+               struct request *rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue),
+                                               pdu->command_id);
+
+               nvme_tcp_end_request(rq, NVME_SC_SUCCESS);
+       }
+
        nvme_tcp_init_recv_ctx(queue);
        return 0;
 }
@@ -815,10 +842,7 @@ static inline void nvme_tcp_done_send_req(struct nvme_tcp_queue *queue)
 
 static void nvme_tcp_fail_request(struct nvme_tcp_request *req)
 {
-       union nvme_result res = {};
-
-       nvme_end_request(blk_mq_rq_from_pdu(req),
-               cpu_to_le16(NVME_SC_DATA_XFER_ERROR), res);
+       nvme_tcp_end_request(blk_mq_rq_from_pdu(req), NVME_SC_DATA_XFER_ERROR);
 }
 
 static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
index 58456de78bb2a2ca472254792ea22858faa822e2..5f24ea7a28eb8560052e03ef3d4fbad4520b5fb8 100644 (file)
@@ -50,7 +50,19 @@ static const char *nvme_trace_admin_identify(struct trace_seq *p, u8 *cdw10)
        return ret;
 }
 
+static const char *nvme_trace_admin_get_features(struct trace_seq *p,
+                                                u8 *cdw10)
+{
+       const char *ret = trace_seq_buffer_ptr(p);
+       u8 fid = cdw10[0];
+       u8 sel = cdw10[1] & 0x7;
+       u32 cdw11 = get_unaligned_le32(cdw10 + 4);
+
+       trace_seq_printf(p, "fid=0x%x sel=0x%x cdw11=0x%x", fid, sel, cdw11);
+       trace_seq_putc(p, 0);
 
+       return ret;
+}
 
 static const char *nvme_trace_read_write(struct trace_seq *p, u8 *cdw10)
 {
@@ -101,6 +113,8 @@ const char *nvme_trace_parse_admin_cmd(struct trace_seq *p,
                return nvme_trace_create_cq(p, cdw10);
        case nvme_admin_identify:
                return nvme_trace_admin_identify(p, cdw10);
+       case nvme_admin_get_features:
+               return nvme_trace_admin_get_features(p, cdw10);
        default:
                return nvme_trace_common(p, cdw10);
        }
index 244d7c177e5a7e32d42e262c8e559ef6a94b74d3..97d3c77365b890e0e4dc4e02ab1c4d08bfe8a1cb 100644 (file)
@@ -108,7 +108,7 @@ TRACE_EVENT(nvme_setup_cmd,
                __entry->metadata = le64_to_cpu(cmd->common.metadata);
                __assign_disk_name(__entry->disk, req->rq_disk);
                memcpy(__entry->cdw10, &cmd->common.cdw10,
-                       6 * sizeof(__entry->cdw10));
+                       sizeof(__entry->cdw10));
            ),
            TP_printk("nvme%d: %sqid=%d, cmdid=%u, nsid=%u, flags=0x%x, meta=0x%llx, cmd=(%s %s)",
                      __entry->ctrl_id, __print_disk_name(__entry->disk),
index d44ede147263b9a340586ad1b5d79d3b6a492f3f..2d73b66e368627cdee268a74d30fb3c5d6a34235 100644 (file)
@@ -1163,6 +1163,15 @@ static void nvmet_release_p2p_ns_map(struct nvmet_ctrl *ctrl)
        put_device(ctrl->p2p_client);
 }
 
+static void nvmet_fatal_error_handler(struct work_struct *work)
+{
+       struct nvmet_ctrl *ctrl =
+                       container_of(work, struct nvmet_ctrl, fatal_err_work);
+
+       pr_err("ctrl %d fatal error occurred!\n", ctrl->cntlid);
+       ctrl->ops->delete_ctrl(ctrl);
+}
+
 u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
                struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp)
 {
@@ -1205,6 +1214,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
        INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work);
        INIT_LIST_HEAD(&ctrl->async_events);
        INIT_RADIX_TREE(&ctrl->p2p_ns_map, GFP_KERNEL);
+       INIT_WORK(&ctrl->fatal_err_work, nvmet_fatal_error_handler);
 
        memcpy(ctrl->subsysnqn, subsysnqn, NVMF_NQN_SIZE);
        memcpy(ctrl->hostnqn, hostnqn, NVMF_NQN_SIZE);
@@ -1308,21 +1318,11 @@ void nvmet_ctrl_put(struct nvmet_ctrl *ctrl)
        kref_put(&ctrl->ref, nvmet_ctrl_free);
 }
 
-static void nvmet_fatal_error_handler(struct work_struct *work)
-{
-       struct nvmet_ctrl *ctrl =
-                       container_of(work, struct nvmet_ctrl, fatal_err_work);
-
-       pr_err("ctrl %d fatal error occurred!\n", ctrl->cntlid);
-       ctrl->ops->delete_ctrl(ctrl);
-}
-
 void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl)
 {
        mutex_lock(&ctrl->lock);
        if (!(ctrl->csts & NVME_CSTS_CFS)) {
                ctrl->csts |= NVME_CSTS_CFS;
-               INIT_WORK(&ctrl->fatal_err_work, nvmet_fatal_error_handler);
                schedule_work(&ctrl->fatal_err_work);
        }
        mutex_unlock(&ctrl->lock);
index 1e9654f04c608d194bb464b854180ed1b0c06ef9..98b7b1f4ee96aae168f494c7808d50582fa77eb0 100644 (file)
@@ -1143,10 +1143,8 @@ __nvmet_fc_free_assocs(struct nvmet_fc_tgtport *tgtport)
                                &tgtport->assoc_list, a_list) {
                if (!nvmet_fc_tgt_a_get(assoc))
                        continue;
-               spin_unlock_irqrestore(&tgtport->lock, flags);
-               nvmet_fc_delete_target_assoc(assoc);
-               nvmet_fc_tgt_a_put(assoc);
-               spin_lock_irqsave(&tgtport->lock, flags);
+               if (!schedule_work(&assoc->del_work))
+                       nvmet_fc_tgt_a_put(assoc);
        }
        spin_unlock_irqrestore(&tgtport->lock, flags);
 }
@@ -1185,7 +1183,8 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl)
                nvmet_fc_tgtport_put(tgtport);
 
                if (found_ctrl) {
-                       schedule_work(&assoc->del_work);
+                       if (!schedule_work(&assoc->del_work))
+                               nvmet_fc_tgt_a_put(assoc);
                        return;
                }
 
@@ -1503,10 +1502,8 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport,
                        (struct fcnvme_ls_disconnect_rqst *)iod->rqstbuf;
        struct fcnvme_ls_disconnect_acc *acc =
                        (struct fcnvme_ls_disconnect_acc *)iod->rspbuf;
-       struct nvmet_fc_tgt_queue *queue = NULL;
        struct nvmet_fc_tgt_assoc *assoc;
        int ret = 0;
-       bool del_assoc = false;
 
        memset(acc, 0, sizeof(*acc));
 
@@ -1537,18 +1534,7 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport,
                assoc = nvmet_fc_find_target_assoc(tgtport,
                                be64_to_cpu(rqst->associd.association_id));
                iod->assoc = assoc;
-               if (assoc) {
-                       if (rqst->discon_cmd.scope ==
-                                       FCNVME_DISCONN_CONNECTION) {
-                               queue = nvmet_fc_find_target_queue(tgtport,
-                                               be64_to_cpu(
-                                                       rqst->discon_cmd.id));
-                               if (!queue) {
-                                       nvmet_fc_tgt_a_put(assoc);
-                                       ret = VERR_NO_CONN;
-                               }
-                       }
-               } else
+               if (!assoc)
                        ret = VERR_NO_ASSOC;
        }
 
@@ -1576,26 +1562,10 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport,
                                sizeof(struct fcnvme_ls_disconnect_acc)),
                        FCNVME_LS_DISCONNECT);
 
-
-       /* are we to delete a Connection ID (queue) */
-       if (queue) {
-               int qid = queue->qid;
-
-               nvmet_fc_delete_target_queue(queue);
-
-               /* release the get taken by find_target_queue */
-               nvmet_fc_tgt_q_put(queue);
-
-               /* tear association down if io queue terminated */
-               if (!qid)
-                       del_assoc = true;
-       }
-
        /* release get taken in nvmet_fc_find_target_assoc */
        nvmet_fc_tgt_a_put(iod->assoc);
 
-       if (del_assoc)
-               nvmet_fc_delete_target_assoc(iod->assoc);
+       nvmet_fc_delete_target_assoc(iod->assoc);
 }
 
 
index 71dfedbadc262f8a5512f32eb3d4d67b1c2cb3c9..a065dbfc43b19895defa7788ce3b1858bb75739d 100644 (file)
@@ -194,11 +194,11 @@ static u16 nvmet_bdev_discard_range(struct nvmet_req *req,
                        le64_to_cpu(range->slba) << (ns->blksize_shift - 9),
                        le32_to_cpu(range->nlb) << (ns->blksize_shift - 9),
                        GFP_KERNEL, 0, bio);
-
-       if (ret)
+       if (ret && ret != -EOPNOTSUPP) {
                req->error_slba = le64_to_cpu(range->slba);
-
-       return blk_to_nvme_status(req, errno_to_blk_status(ret));
+               return blk_to_nvme_status(req, errno_to_blk_status(ret));
+       }
+       return NVME_SC_SUCCESS;
 }
 
 static void nvmet_bdev_execute_discard(struct nvmet_req *req)
index 517522305e5cbcd1d548a8e79176eef35f90b8a6..3e43212d3c1c6bba5a6d553dc2a965188c5ccbf5 100644 (file)
@@ -297,7 +297,7 @@ static void nvmet_file_execute_discard(struct nvmet_req *req)
                }
 
                ret = vfs_fallocate(req->ns->file, mode, offset, len);
-               if (ret) {
+               if (ret && ret != -EOPNOTSUPP) {
                        req->error_slba = le64_to_cpu(range.slba);
                        status = errno_to_nvme_status(req, ret);
                        break;
index a0baee25134c0cd0b6a6e35df694ae763a4b0a21..4159c63a5fd2bbba9b9c2949fde8c56ba9030a89 100644 (file)
@@ -1382,3 +1382,40 @@ int chsc_pnso_brinfo(struct subchannel_id schid,
        return chsc_error_from_response(brinfo_area->response.code);
 }
 EXPORT_SYMBOL_GPL(chsc_pnso_brinfo);
+
+int chsc_sgib(u32 origin)
+{
+       struct {
+               struct chsc_header request;
+               u16 op;
+               u8  reserved01[2];
+               u8  reserved02:4;
+               u8  fmt:4;
+               u8  reserved03[7];
+               /* operation data area begin */
+               u8  reserved04[4];
+               u32 gib_origin;
+               u8  reserved05[10];
+               u8  aix;
+               u8  reserved06[4029];
+               struct chsc_header response;
+               u8  reserved07[4];
+       } *sgib_area;
+       int ret;
+
+       spin_lock_irq(&chsc_page_lock);
+       memset(chsc_page, 0, PAGE_SIZE);
+       sgib_area = chsc_page;
+       sgib_area->request.length = 0x0fe0;
+       sgib_area->request.code = 0x0021;
+       sgib_area->op = 0x1;
+       sgib_area->gib_origin = origin;
+
+       ret = chsc(sgib_area);
+       if (ret == 0)
+               ret = chsc_error_from_response(sgib_area->response.code);
+       spin_unlock_irq(&chsc_page_lock);
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(chsc_sgib);
index 78aba8d94eec33eae2c0594dde4a79bf12c8ba82..e57d68e325a372424fd40a729d99265a0530a522 100644 (file)
@@ -164,6 +164,7 @@ int chsc_get_channel_measurement_chars(struct channel_path *chp);
 int chsc_ssqd(struct subchannel_id schid, struct chsc_ssqd_area *ssqd);
 int chsc_sadc(struct subchannel_id schid, struct chsc_scssc_area *scssc,
              u64 summary_indicator_addr, u64 subchannel_indicator_addr);
+int chsc_sgib(u32 origin);
 int chsc_error_from_response(int response);
 
 int chsc_siosl(struct subchannel_id schid);
index a45f81ec80cec911f6a95e095bd21249c34ea535..8e28a505f7e8c0f8c18c30d59574da564ac698be 100644 (file)
@@ -413,13 +413,16 @@ static int aac_slave_configure(struct scsi_device *sdev)
        if (chn < AAC_MAX_BUSES && tid < AAC_MAX_TARGETS && aac->sa_firmware) {
                devtype = aac->hba_map[chn][tid].devtype;
 
-               if (devtype == AAC_DEVTYPE_NATIVE_RAW)
+               if (devtype == AAC_DEVTYPE_NATIVE_RAW) {
                        depth = aac->hba_map[chn][tid].qd_limit;
-               else if (devtype == AAC_DEVTYPE_ARC_RAW)
+                       set_timeout = 1;
+                       goto common_config;
+               }
+               if (devtype == AAC_DEVTYPE_ARC_RAW) {
                        set_qd_dev_type = true;
-
-               set_timeout = 1;
-               goto common_config;
+                       set_timeout = 1;
+                       goto common_config;
+               }
        }
 
        if (aac->jbod && (sdev->type == TYPE_DISK))
index 6c87bd34509af127039f743ce1251af5480432e0..9bfa9f12d81e8ab4fe81f0032fb05ad72dff191a 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/dmapool.h>
 #include <linux/iopoll.h>
 #include <linux/lcm.h>
+#include <linux/libata.h>
 #include <linux/mfd/syscon.h>
 #include <linux/module.h>
 #include <linux/of_address.h>
@@ -94,6 +95,11 @@ enum {
        PORT_TYPE_SATA = (1U << 0),
 };
 
+enum dev_status {
+       HISI_SAS_DEV_INIT,
+       HISI_SAS_DEV_NORMAL,
+};
+
 enum {
        HISI_SAS_INT_ABT_CMD = 0,
        HISI_SAS_INT_ABT_DEV = 1,
@@ -161,6 +167,7 @@ struct hisi_sas_phy {
        u8              in_reset;
        u8              reserved[2];
        u32             phy_type;
+       u32             code_violation_err_count;
        enum sas_linkrate       minimum_linkrate;
        enum sas_linkrate       maximum_linkrate;
 };
@@ -194,6 +201,7 @@ struct hisi_sas_device {
        struct hisi_sas_dq      *dq;
        struct list_head        list;
        enum sas_device_type    dev_type;
+       enum dev_status dev_status;
        int device_id;
        int sata_idx;
        spinlock_t lock; /* For protecting slots */
index 13ca5a0bdf6be336cd6f3754c15632440995d765..3c3cf89f713fbfaf8a7c15fbca17120af609fe87 100644 (file)
@@ -10,6 +10,7 @@
  */
 
 #include "hisi_sas.h"
+#include "../libsas/sas_internal.h"
 #define DRV_NAME "hisi_sas"
 
 #define DEV_IS_GONE(dev) \
@@ -707,6 +708,7 @@ static struct hisi_sas_device *hisi_sas_alloc_dev(struct domain_device *device)
 
                        hisi_hba->devices[i].device_id = i;
                        sas_dev = &hisi_hba->devices[i];
+                       sas_dev->dev_status = HISI_SAS_DEV_INIT;
                        sas_dev->dev_type = device->dev_type;
                        sas_dev->hisi_hba = hisi_hba;
                        sas_dev->sas_device = device;
@@ -731,6 +733,8 @@ static int hisi_sas_init_device(struct domain_device *device)
        struct hisi_sas_tmf_task tmf_task;
        int retry = HISI_SAS_SRST_ATA_DISK_CNT;
        struct hisi_hba *hisi_hba = dev_to_hisi_hba(device);
+       struct device *dev = hisi_hba->dev;
+       struct sas_phy *local_phy;
 
        switch (device->dev_type) {
        case SAS_END_DEVICE:
@@ -746,6 +750,31 @@ static int hisi_sas_init_device(struct domain_device *device)
        case SAS_SATA_PM:
        case SAS_SATA_PM_PORT:
        case SAS_SATA_PENDING:
+               /*
+                * send HARD RESET to clear previous affiliation of
+                * STP target port
+                */
+               local_phy = sas_get_local_phy(device);
+               if (!scsi_is_sas_phy_local(local_phy)) {
+                       unsigned long deadline = ata_deadline(jiffies, 20000);
+                       struct sata_device *sata_dev = &device->sata_dev;
+                       struct ata_host *ata_host = sata_dev->ata_host;
+                       struct ata_port_operations *ops = ata_host->ops;
+                       struct ata_port *ap = sata_dev->ap;
+                       struct ata_link *link;
+                       unsigned int classes;
+
+                       ata_for_each_link(link, ap, EDGE)
+                               rc = ops->hardreset(link, &classes,
+                                                   deadline);
+               }
+               sas_put_local_phy(local_phy);
+               if (rc) {
+                       dev_warn(dev, "SATA disk hardreset fail: 0x%x\n",
+                                rc);
+                       return rc;
+               }
+
                while (retry-- > 0) {
                        rc = hisi_sas_softreset_ata_disk(device);
                        if (!rc)
@@ -808,6 +837,7 @@ static int hisi_sas_dev_found(struct domain_device *device)
        rc = hisi_sas_init_device(device);
        if (rc)
                goto err_out;
+       sas_dev->dev_status = HISI_SAS_DEV_NORMAL;
        return 0;
 
 err_out:
@@ -980,7 +1010,8 @@ static void hisi_sas_do_release_task(struct hisi_hba *hisi_hba, struct sas_task
                spin_lock_irqsave(&task->task_state_lock, flags);
                task->task_state_flags &=
                        ~(SAS_TASK_STATE_PENDING | SAS_TASK_AT_INITIATOR);
-               task->task_state_flags |= SAS_TASK_STATE_DONE;
+               if (!slot->is_internal && task->task_proto != SAS_PROTOCOL_SMP)
+                       task->task_state_flags |= SAS_TASK_STATE_DONE;
                spin_unlock_irqrestore(&task->task_state_lock, flags);
        }
 
@@ -1713,20 +1744,23 @@ static int hisi_sas_clear_aca(struct domain_device *device, u8 *lun)
 static int hisi_sas_debug_I_T_nexus_reset(struct domain_device *device)
 {
        struct sas_phy *local_phy = sas_get_local_phy(device);
-       int rc, reset_type = (device->dev_type == SAS_SATA_DEV ||
-                       (device->tproto & SAS_PROTOCOL_STP)) ? 0 : 1;
+       struct hisi_sas_device *sas_dev = device->lldd_dev;
        struct hisi_hba *hisi_hba = dev_to_hisi_hba(device);
        struct sas_ha_struct *sas_ha = &hisi_hba->sha;
        struct asd_sas_phy *sas_phy = sas_ha->sas_phy[local_phy->number];
        struct hisi_sas_phy *phy = container_of(sas_phy,
                        struct hisi_sas_phy, sas_phy);
        DECLARE_COMPLETION_ONSTACK(phyreset);
+       int rc, reset_type;
 
        if (scsi_is_sas_phy_local(local_phy)) {
                phy->in_reset = 1;
                phy->reset_completion = &phyreset;
        }
 
+       reset_type = (sas_dev->dev_status == HISI_SAS_DEV_INIT ||
+                     !dev_is_sata(device)) ? 1 : 0;
+
        rc = sas_phy_reset(local_phy, reset_type);
        sas_put_local_phy(local_phy);
 
@@ -1742,8 +1776,13 @@ static int hisi_sas_debug_I_T_nexus_reset(struct domain_device *device)
                /* report PHY down if timed out */
                if (!ret)
                        hisi_sas_phy_down(hisi_hba, sas_phy->id, 0);
-       } else
+       } else if (sas_dev->dev_status != HISI_SAS_DEV_INIT) {
+               /*
+                * If in init state, we rely on caller to wait for link to be
+                * ready; otherwise, delay.
+                */
                msleep(2000);
+       }
 
        return rc;
 }
@@ -2125,9 +2164,18 @@ static int hisi_sas_write_gpio(struct sas_ha_struct *sha, u8 reg_type,
 
 static void hisi_sas_phy_disconnected(struct hisi_sas_phy *phy)
 {
+       struct asd_sas_phy *sas_phy = &phy->sas_phy;
+       struct sas_phy *sphy = sas_phy->phy;
+       struct sas_phy_data *d = sphy->hostdata;
+
        phy->phy_attached = 0;
        phy->phy_type = 0;
        phy->port = NULL;
+
+       if (d->enable)
+               sphy->negotiated_linkrate = SAS_LINK_RATE_UNKNOWN;
+       else
+               sphy->negotiated_linkrate = SAS_PHY_DISABLED;
 }
 
 void hisi_sas_phy_down(struct hisi_hba *hisi_hba, int phy_no, int rdy)
@@ -2253,6 +2301,7 @@ int hisi_sas_alloc(struct hisi_hba *hisi_hba)
        for (i = 0; i < HISI_SAS_MAX_DEVICES; i++) {
                hisi_hba->devices[i].dev_type = SAS_PHY_UNUSED;
                hisi_hba->devices[i].device_id = i;
+               hisi_hba->devices[i].dev_status = HISI_SAS_DEV_INIT;
        }
 
        for (i = 0; i < hisi_hba->queue_count; i++) {
index e40cc6b3b67be09cfc84d7daeed41f4f428db2f5..89160ab3efb05648dc6a7b23acd8a45e87ca61a6 100644 (file)
@@ -868,6 +868,7 @@ hisi_sas_device *alloc_dev_quirk_v2_hw(struct domain_device *device)
 
                        hisi_hba->devices[i].device_id = i;
                        sas_dev = &hisi_hba->devices[i];
+                       sas_dev->dev_status = HISI_SAS_DEV_INIT;
                        sas_dev->dev_type = device->dev_type;
                        sas_dev->hisi_hba = hisi_hba;
                        sas_dev->sas_device = device;
index 9ec8848ec54139248033fa332a70da64195a1f24..086695a4099fc13bd4c555309c5a7bffe6a18e82 100644 (file)
 #define PHY_CTRL_RESET_MSK             (0x1 << PHY_CTRL_RESET_OFF)
 #define CMD_HDR_PIR_OFF                        8
 #define CMD_HDR_PIR_MSK                        (0x1 << CMD_HDR_PIR_OFF)
+#define SERDES_CFG                     (PORT_BASE + 0x1c)
 #define SL_CFG                         (PORT_BASE + 0x84)
 #define AIP_LIMIT                      (PORT_BASE + 0x90)
 #define SL_CONTROL                     (PORT_BASE + 0x94)
 #define CHL_INT1_DMAC_RX_AXI_RD_ERR_OFF        22
 #define CHL_INT2                       (PORT_BASE + 0x1bc)
 #define CHL_INT2_SL_IDAF_TOUT_CONF_OFF 0
+#define CHL_INT2_RX_DISP_ERR_OFF       28
+#define CHL_INT2_RX_CODE_ERR_OFF       29
 #define CHL_INT2_RX_INVLD_DW_OFF       30
 #define CHL_INT2_STP_LINK_TIMEOUT_OFF  31
 #define CHL_INT0_MSK                   (PORT_BASE + 0x1c0)
@@ -523,6 +526,7 @@ static void init_reg_v3_hw(struct hisi_hba *hisi_hba)
                }
                hisi_sas_phy_write32(hisi_hba, i, PROG_PHY_LINK_RATE,
                        prog_phy_link_rate);
+               hisi_sas_phy_write32(hisi_hba, i, SERDES_CFG, 0xffc00);
                hisi_sas_phy_write32(hisi_hba, i, SAS_RX_TRAIN_TIMER, 0x13e80);
                hisi_sas_phy_write32(hisi_hba, i, CHL_INT0, 0xffffffff);
                hisi_sas_phy_write32(hisi_hba, i, CHL_INT1, 0xffffffff);
@@ -544,6 +548,8 @@ static void init_reg_v3_hw(struct hisi_hba *hisi_hba)
                hisi_sas_phy_write32(hisi_hba, i, STP_LINK_TIMER, 0x7f7a120);
                hisi_sas_phy_write32(hisi_hba, i, CON_CFG_DRIVER, 0x2a0a01);
                hisi_sas_phy_write32(hisi_hba, i, SAS_SSP_CON_TIMER_CFG, 0x32);
+               hisi_sas_phy_write32(hisi_hba, i, SAS_EC_INT_COAL_TIME,
+                                    0x30f4240);
                /* used for 12G negotiate */
                hisi_sas_phy_write32(hisi_hba, i, COARSETUNE_TIME, 0x1e);
                hisi_sas_phy_write32(hisi_hba, i, AIP_LIMIT, 0x2ffff);
@@ -1344,7 +1350,8 @@ static void prep_abort_v3_hw(struct hisi_hba *hisi_hba,
 
 static irqreturn_t phy_up_v3_hw(int phy_no, struct hisi_hba *hisi_hba)
 {
-       int i, res;
+       int i;
+       irqreturn_t res;
        u32 context, port_id, link_rate;
        struct hisi_sas_phy *phy = &hisi_hba->phy[phy_no];
        struct asd_sas_phy *sas_phy = &phy->sas_phy;
@@ -1575,6 +1582,39 @@ static void handle_chl_int1_v3_hw(struct hisi_hba *hisi_hba, int phy_no)
        hisi_sas_phy_write32(hisi_hba, phy_no, CHL_INT1, irq_value);
 }
 
+static void phy_get_events_v3_hw(struct hisi_hba *hisi_hba, int phy_no)
+{
+       struct hisi_sas_phy *phy = &hisi_hba->phy[phy_no];
+       struct asd_sas_phy *sas_phy = &phy->sas_phy;
+       struct sas_phy *sphy = sas_phy->phy;
+       unsigned long flags;
+       u32 reg_value;
+
+       spin_lock_irqsave(&phy->lock, flags);
+
+       /* loss dword sync */
+       reg_value = hisi_sas_phy_read32(hisi_hba, phy_no, ERR_CNT_DWS_LOST);
+       sphy->loss_of_dword_sync_count += reg_value;
+
+       /* phy reset problem */
+       reg_value = hisi_sas_phy_read32(hisi_hba, phy_no, ERR_CNT_RESET_PROB);
+       sphy->phy_reset_problem_count += reg_value;
+
+       /* invalid dword */
+       reg_value = hisi_sas_phy_read32(hisi_hba, phy_no, ERR_CNT_INVLD_DW);
+       sphy->invalid_dword_count += reg_value;
+
+       /* disparity err */
+       reg_value = hisi_sas_phy_read32(hisi_hba, phy_no, ERR_CNT_DISP_ERR);
+       sphy->running_disparity_error_count += reg_value;
+
+       /* code violation error */
+       reg_value = hisi_sas_phy_read32(hisi_hba, phy_no, ERR_CNT_CODE_ERR);
+       phy->code_violation_err_count += reg_value;
+
+       spin_unlock_irqrestore(&phy->lock, flags);
+}
+
 static void handle_chl_int2_v3_hw(struct hisi_hba *hisi_hba, int phy_no)
 {
        u32 irq_msk = hisi_sas_phy_read32(hisi_hba, phy_no, CHL_INT2_MSK);
@@ -1582,6 +1622,9 @@ static void handle_chl_int2_v3_hw(struct hisi_hba *hisi_hba, int phy_no)
        struct hisi_sas_phy *phy = &hisi_hba->phy[phy_no];
        struct pci_dev *pci_dev = hisi_hba->pci_dev;
        struct device *dev = hisi_hba->dev;
+       static const u32 msk = BIT(CHL_INT2_RX_DISP_ERR_OFF) |
+                       BIT(CHL_INT2_RX_CODE_ERR_OFF) |
+                       BIT(CHL_INT2_RX_INVLD_DW_OFF);
 
        irq_value &= ~irq_msk;
        if (!irq_value)
@@ -1602,6 +1645,25 @@ static void handle_chl_int2_v3_hw(struct hisi_hba *hisi_hba, int phy_no)
                        hisi_sas_notify_phy_event(phy, HISI_PHYE_LINK_RESET);
        }
 
+       if (pci_dev->revision > 0x20 && (irq_value & msk)) {
+               struct asd_sas_phy *sas_phy = &phy->sas_phy;
+               struct sas_phy *sphy = sas_phy->phy;
+
+               phy_get_events_v3_hw(hisi_hba, phy_no);
+
+               if (irq_value & BIT(CHL_INT2_RX_INVLD_DW_OFF))
+                       dev_info(dev, "phy%d invalid dword cnt:   %u\n", phy_no,
+                                sphy->invalid_dword_count);
+
+               if (irq_value & BIT(CHL_INT2_RX_CODE_ERR_OFF))
+                       dev_info(dev, "phy%d code violation cnt:  %u\n", phy_no,
+                                phy->code_violation_err_count);
+
+               if (irq_value & BIT(CHL_INT2_RX_DISP_ERR_OFF))
+                       dev_info(dev, "phy%d disparity error cnt: %u\n", phy_no,
+                                sphy->running_disparity_error_count);
+       }
+
        if ((irq_value & BIT(CHL_INT2_RX_INVLD_DW_OFF)) &&
            (pci_dev->revision == 0x20)) {
                u32 reg_value;
@@ -2230,31 +2292,6 @@ static u32 get_phys_state_v3_hw(struct hisi_hba *hisi_hba)
        return hisi_sas_read32(hisi_hba, PHY_STATE);
 }
 
-static void phy_get_events_v3_hw(struct hisi_hba *hisi_hba, int phy_no)
-{
-       struct hisi_sas_phy *phy = &hisi_hba->phy[phy_no];
-       struct asd_sas_phy *sas_phy = &phy->sas_phy;
-       struct sas_phy *sphy = sas_phy->phy;
-       u32 reg_value;
-
-       /* loss dword sync */
-       reg_value = hisi_sas_phy_read32(hisi_hba, phy_no, ERR_CNT_DWS_LOST);
-       sphy->loss_of_dword_sync_count += reg_value;
-
-       /* phy reset problem */
-       reg_value = hisi_sas_phy_read32(hisi_hba, phy_no, ERR_CNT_RESET_PROB);
-       sphy->phy_reset_problem_count += reg_value;
-
-       /* invalid dword */
-       reg_value = hisi_sas_phy_read32(hisi_hba, phy_no, ERR_CNT_INVLD_DW);
-       sphy->invalid_dword_count += reg_value;
-
-       /* disparity err */
-       reg_value = hisi_sas_phy_read32(hisi_hba, phy_no, ERR_CNT_DISP_ERR);
-       sphy->running_disparity_error_count += reg_value;
-
-}
-
 static int disable_host_v3_hw(struct hisi_hba *hisi_hba)
 {
        struct device *dev = hisi_hba->dev;
index 21309d5b456ddaf6eb39a57f98b932ba05fa1da8..e893949a3d118421481d3fb5de90f170a70440af 100644 (file)
@@ -798,7 +798,7 @@ EXPORT_SYMBOL_GPL(iscsi_conn_send_pdu);
  * @datalen: len of buffer
  *
  * iscsi_cmd_rsp sets up the scsi_cmnd fields based on the PDU and
- * then completes the command and task.
+ * then completes the command and task. called under back_lock
  **/
 static void iscsi_scsi_cmd_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
                               struct iscsi_task *task, char *data,
@@ -894,6 +894,9 @@ out:
  * @conn: iscsi connection
  * @hdr:  iscsi pdu
  * @task: scsi command task
+ *
+ * iscsi_data_in_rsp sets up the scsi_cmnd fields based on the data received
+ * then completes the command and task. called under back_lock
  **/
 static void
 iscsi_data_in_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
@@ -978,6 +981,16 @@ static int iscsi_send_nopout(struct iscsi_conn *conn, struct iscsi_nopin *rhdr)
        return 0;
 }
 
+/**
+ * iscsi_nop_out_rsp - SCSI NOP Response processing
+ * @task: scsi command task
+ * @nop: the nop structure
+ * @data: where to put the data
+ * @datalen: length of data
+ *
+ * iscsi_nop_out_rsp handles nop response from use or
+ * from user space. called under back_lock
+ **/
 static int iscsi_nop_out_rsp(struct iscsi_task *task,
                             struct iscsi_nopin *nop, char *data, int datalen)
 {
@@ -1750,7 +1763,9 @@ int iscsi_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc)
        return 0;
 
 prepd_reject:
+       spin_lock_bh(&session->back_lock);
        iscsi_complete_task(task, ISCSI_TASK_REQUEUE_SCSIQ);
+       spin_unlock_bh(&session->back_lock);
 reject:
        spin_unlock_bh(&session->frwd_lock);
        ISCSI_DBG_SESSION(session, "cmd 0x%x rejected (%d)\n",
@@ -1758,7 +1773,9 @@ reject:
        return SCSI_MLQUEUE_TARGET_BUSY;
 
 prepd_fault:
+       spin_lock_bh(&session->back_lock);
        iscsi_complete_task(task, ISCSI_TASK_REQUEUE_SCSIQ);
+       spin_unlock_bh(&session->back_lock);
 fault:
        spin_unlock_bh(&session->frwd_lock);
        ISCSI_DBG_SESSION(session, "iscsi: cmd 0x%x is not queued (%d)\n",
@@ -3075,8 +3092,9 @@ fail_mgmt_tasks(struct iscsi_session *session, struct iscsi_conn *conn)
                state = ISCSI_TASK_ABRT_SESS_RECOV;
                if (task->state == ISCSI_TASK_PENDING)
                        state = ISCSI_TASK_COMPLETED;
+               spin_lock_bh(&session->back_lock);
                iscsi_complete_task(task, state);
-
+               spin_unlock_bh(&session->back_lock);
        }
 }
 
index 9923e9e3b8843b44e6a4212a7411628946892f69..c3fe3f3a78f591a50c640dd810a719cebc83049a 100644 (file)
@@ -129,12 +129,17 @@ static void iscsi_tcp_segment_map(struct iscsi_segment *segment, int recv)
        BUG_ON(sg->length == 0);
 
        /*
+        * We always map for the recv path.
+        *
         * If the page count is greater than one it is ok to send
         * to the network layer's zero copy send path. If not we
-        * have to go the slow sendmsg path. We always map for the
-        * recv path.
+        * have to go the slow sendmsg path.
+        *
+        * Same goes for slab pages: skb_can_coalesce() allows
+        * coalescing neighboring slab objects into a single frag which
+        * triggers one of hardened usercopy checks.
         */
-       if (page_count(sg_page(sg)) >= 1 && !recv)
+       if (!recv && page_count(sg_page(sg)) >= 1 && !PageSlab(sg_page(sg)))
                return;
 
        if (recv) {
index 3b5873f6751e86cfa1c6252bf6d51d7eb0b9414c..7fcdaed3fa945539a64fc78e349a00387cd41ea2 100644 (file)
@@ -4090,7 +4090,7 @@ lpfc_new_io_buf(struct lpfc_hba *phba, int num_to_alloc)
        /* Sanity check to ensure our sizing is right for both SCSI and NVME */
        if (sizeof(struct lpfc_io_buf) > LPFC_COMMON_IO_BUF_SZ) {
                lpfc_printf_log(phba, KERN_ERR, LOG_FCP,
-                               "6426 Common buffer size %ld exceeds %d\n",
+                               "6426 Common buffer size %zd exceeds %d\n",
                                sizeof(struct lpfc_io_buf),
                                LPFC_COMMON_IO_BUF_SZ);
                return 0;
@@ -10052,7 +10052,7 @@ lpfc_sli4_pci_mem_setup(struct lpfc_hba *phba)
 {
        struct pci_dev *pdev = phba->pcidev;
        unsigned long bar0map_len, bar1map_len, bar2map_len;
-       int error = -ENODEV;
+       int error;
        uint32_t if_type;
 
        if (!pdev)
@@ -10071,7 +10071,7 @@ lpfc_sli4_pci_mem_setup(struct lpfc_hba *phba)
         */
        if (pci_read_config_dword(pdev, LPFC_SLI_INTF,
                                  &phba->sli4_hba.sli_intf.word0)) {
-               return error;
+               return -ENODEV;
        }
 
        /* There is no SLI3 failback for SLI4 devices. */
@@ -10081,7 +10081,7 @@ lpfc_sli4_pci_mem_setup(struct lpfc_hba *phba)
                                "2894 SLI_INTF reg contents invalid "
                                "sli_intf reg 0x%x\n",
                                phba->sli4_hba.sli_intf.word0);
-               return error;
+               return -ENODEV;
        }
 
        if_type = bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf);
@@ -10105,7 +10105,7 @@ lpfc_sli4_pci_mem_setup(struct lpfc_hba *phba)
                        dev_printk(KERN_ERR, &pdev->dev,
                                   "ioremap failed for SLI4 PCI config "
                                   "registers.\n");
-                       goto out;
+                       return -ENODEV;
                }
                phba->pci_bar0_memmap_p = phba->sli4_hba.conf_regs_memmap_p;
                /* Set up BAR0 PCI config space register memory map */
@@ -10116,7 +10116,7 @@ lpfc_sli4_pci_mem_setup(struct lpfc_hba *phba)
                if (if_type >= LPFC_SLI_INTF_IF_TYPE_2) {
                        dev_printk(KERN_ERR, &pdev->dev,
                           "FATAL - No BAR0 mapping for SLI4, if_type 2\n");
-                       goto out;
+                       return -ENODEV;
                }
                phba->sli4_hba.conf_regs_memmap_p =
                                ioremap(phba->pci_bar0_map, bar0map_len);
@@ -10124,7 +10124,7 @@ lpfc_sli4_pci_mem_setup(struct lpfc_hba *phba)
                        dev_printk(KERN_ERR, &pdev->dev,
                                "ioremap failed for SLI4 PCI config "
                                "registers.\n");
-                       goto out;
+                       return -ENODEV;
                }
                lpfc_sli4_bar0_register_memmap(phba, if_type);
        }
@@ -10170,6 +10170,7 @@ lpfc_sli4_pci_mem_setup(struct lpfc_hba *phba)
                if (!phba->sli4_hba.drbl_regs_memmap_p) {
                        dev_err(&pdev->dev,
                           "ioremap failed for SLI4 HBA doorbell registers.\n");
+                       error = -ENOMEM;
                        goto out_iounmap_conf;
                }
                phba->pci_bar2_memmap_p = phba->sli4_hba.drbl_regs_memmap_p;
@@ -10219,6 +10220,7 @@ lpfc_sli4_pci_mem_setup(struct lpfc_hba *phba)
                if (!phba->sli4_hba.dpp_regs_memmap_p) {
                        dev_err(&pdev->dev,
                           "ioremap failed for SLI4 HBA dpp registers.\n");
+                       error = -ENOMEM;
                        goto out_iounmap_ctrl;
                }
                phba->pci_bar4_memmap_p = phba->sli4_hba.dpp_regs_memmap_p;
@@ -10249,7 +10251,7 @@ out_iounmap_ctrl:
        iounmap(phba->sli4_hba.ctrl_regs_memmap_p);
 out_iounmap_conf:
        iounmap(phba->sli4_hba.conf_regs_memmap_p);
-out:
+
        return error;
 }
 
@@ -11137,7 +11139,8 @@ lpfc_sli4_hba_unset(struct lpfc_hba *phba)
                lpfc_sli4_ras_dma_free(phba);
 
        /* Stop the SLI4 device port */
-       phba->pport->work_port_events = 0;
+       if (phba->pport)
+               phba->pport->work_port_events = 0;
 }
 
  /**
index 55ab9d3ee4ba09a217da7889f9bf8de19dfd7d77..1aa00d2c3f74e329341114562527c3f78914f6ec 100644 (file)
@@ -965,7 +965,7 @@ lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn,
        struct lpfc_nodelist *ndlp;
        struct lpfc_nvme_fcpreq_priv *freqpriv;
        struct lpfc_nvme_lport *lport;
-       uint32_t code, status, idx, cpu;
+       uint32_t code, status, idx;
        uint16_t cid, sqhd, data;
        uint32_t *ptr;
 
@@ -1138,6 +1138,7 @@ out_err:
                lpfc_nvme_ktime(phba, lpfc_ncmd);
        }
        if (phba->cpucheck_on & LPFC_CHECK_NVME_IO) {
+               uint32_t cpu;
                idx = lpfc_ncmd->cur_iocbq.hba_wqidx;
                cpu = smp_processor_id();
                if (cpu < LPFC_CHECK_CPU_CNT) {
index d0817facdae36a6d188dfa7cf3dcb48d014cc591..57b4a463b5892d2f0cd0f56697d3a4b168b9a500 100644 (file)
@@ -9881,7 +9881,7 @@ __lpfc_sli_issue_iocb_s4(struct lpfc_hba *phba, uint32_t ring_number,
         * The WQE can be either 64 or 128 bytes,
         */
 
-       lockdep_assert_held(&phba->hbalock);
+       lockdep_assert_held(&pring->ring_lock);
 
        if (piocb->sli4_xritag == NO_XRI) {
                if (piocb->iocb.ulpCommand == CMD_ABORT_XRI_CN ||
index dace907744a5dc02edca5751441f97ff37432b12..293f5cf524d7a3918f7151661dc6fae0af8da26e 100644 (file)
@@ -3924,12 +3924,12 @@ megasas_transition_to_ready(struct megasas_instance *instance, int ocr)
                /*
                 * The cur_state should not last for more than max_wait secs
                 */
-               for (i = 0; i < max_wait; i++) {
+               for (i = 0; i < max_wait * 50; i++) {
                        curr_abs_state = instance->instancet->
                                read_fw_status_reg(instance);
 
                        if (abs_state == curr_abs_state) {
-                               msleep(1000);
+                               msleep(20);
                        } else
                                break;
                }
index 2eb1ae721a7d3663512d1b4b18850b4cd7c500c5..f928c4d3a1efe3f5b8a5441344e358aaa9875fd0 100644 (file)
@@ -1652,6 +1652,8 @@ qla2x00_port_speed_store(struct device *dev, struct device_attribute *attr,
        }
 
        rval = kstrtol(buf, 10, &type);
+       if (rval)
+               return rval;
        speed = type;
        if (type == 40 || type == 80 || type == 160 ||
            type == 320) {
index ead17288e2a77b4a7b2ea155f976604e35390108..5819a45ac5ef719f0ab5eb77f9d8e508e4a9da1b 100644 (file)
@@ -193,6 +193,8 @@ qla_dfs_tgt_counters_show(struct seq_file *s, void *unused)
 
        for (i = 0; i < vha->hw->max_qpairs; i++) {
                qpair = vha->hw->queue_pair_map[i];
+               if (!qpair)
+                       continue;
                qla_core_sbt_cmd += qpair->tgt_counters.qla_core_sbt_cmd;
                core_qla_que_buf += qpair->tgt_counters.core_qla_que_buf;
                qla_core_ret_ctio += qpair->tgt_counters.qla_core_ret_ctio;
index 63f8e3c1984163343109c5760bf68eb58892f0c6..456a41d2e2c6c1958ce519c09c598eb09655925b 100644 (file)
@@ -1132,7 +1132,7 @@ qla24xx_walk_and_build_prot_sglist(struct qla_hw_data *ha, srb_t *sp,
        /* if initiator doing write or target doing read */
        if (direction_to_device) {
                for_each_sg(sgl, sg, tot_dsds, i) {
-                       dma_addr_t sle_phys = sg_phys(sg);
+                       u64 sle_phys = sg_phys(sg);
 
                        /* If SGE addr + len flips bits in upper 32-bits */
                        if (MSD(sle_phys + sg->length) ^ MSD(sle_phys)) {
@@ -1178,7 +1178,7 @@ qla24xx_walk_and_build_prot_sglist(struct qla_hw_data *ha, srb_t *sp,
 
                        ql_dbg(ql_dbg_tgt + ql_dbg_verbose, vha, 0xe023,
                            "%s: sg[%x] (phys=%llx sglen=%x) ldma_sg_len: %x dif_bundl_len: %x ldma_needed: %x\n",
-                           __func__, i, sg_phys(sg), sglen, ldma_sg_len,
+                           __func__, i, (u64)sg_phys(sg), sglen, ldma_sg_len,
                            difctx->dif_bundl_len, ldma_needed);
 
                        while (sglen) {
index 5d9ccbab75815b43bc6238a96ee0e120f368649c..75ec43aa8df381c0f82a75ecd393fc19c4251c14 100644 (file)
@@ -2764,6 +2764,12 @@ static void pqi_process_raid_io_error(struct pqi_io_request *io_request)
                                sshdr.sense_key == HARDWARE_ERROR &&
                                sshdr.asc == 0x3e &&
                                sshdr.ascq == 0x1) {
+                       struct pqi_ctrl_info *ctrl_info = shost_to_hba(scmd->device->host);
+                       struct pqi_scsi_dev *device = scmd->device->hostdata;
+
+                       if (printk_ratelimit())
+                               scmd_printk(KERN_ERR, scmd, "received 'logical unit failure' from controller for scsi %d:%d:%d:%d\n",
+                                       ctrl_info->scsi_host->host_no, device->bus, device->target, device->lun);
                        pqi_take_device_offline(scmd->device, "RAID");
                        host_byte = DID_NO_CONNECT;
                }
index f2d3df357a973ab2ad13cc4ac22819a4affb7538..0e855b5afe82a7d93b1183cee2b9223bf0b150c4 100644 (file)
@@ -640,7 +640,7 @@ static int ufs_hi3670_init(struct ufs_hba *hba)
        return 0;
 }
 
-static struct ufs_hba_variant_ops ufs_hba_hi3660_vops = {
+static const struct ufs_hba_variant_ops ufs_hba_hi3660_vops = {
        .name = "hi3660",
        .init = ufs_hi3660_init,
        .link_startup_notify = ufs_hisi_link_startup_notify,
@@ -649,7 +649,7 @@ static struct ufs_hba_variant_ops ufs_hba_hi3660_vops = {
        .resume = ufs_hisi_resume,
 };
 
-static struct ufs_hba_variant_ops ufs_hba_hi3670_vops = {
+static const struct ufs_hba_variant_ops ufs_hba_hi3670_vops = {
        .name = "hi3670",
        .init = ufs_hi3670_init,
        .link_startup_notify = ufs_hisi_link_startup_notify,
@@ -669,13 +669,10 @@ MODULE_DEVICE_TABLE(of, ufs_hisi_of_match);
 static int ufs_hisi_probe(struct platform_device *pdev)
 {
        const struct of_device_id *of_id;
-       struct ufs_hba_variant_ops *vops;
-       struct device *dev = &pdev->dev;
 
-       of_id = of_match_node(ufs_hisi_of_match, dev->of_node);
-       vops = (struct ufs_hba_variant_ops *)of_id->data;
+       of_id = of_match_node(ufs_hisi_of_match, pdev->dev.of_node);
 
-       return ufshcd_pltfrm_init(pdev, vops);
+       return ufshcd_pltfrm_init(pdev, of_id->data);
 }
 
 static int ufs_hisi_remove(struct platform_device *pdev)
index 895a9b5ac98993ecac1c7c3ff2621548b3dd6cea..27213676329c039281becc3bd3d97a1128b3b62c 100644 (file)
@@ -297,7 +297,7 @@ static void ufshcd_init_lanes_per_dir(struct ufs_hba *hba)
  * Returns 0 on success, non-zero value on failure
  */
 int ufshcd_pltfrm_init(struct platform_device *pdev,
-                      struct ufs_hba_variant_ops *vops)
+                      const struct ufs_hba_variant_ops *vops)
 {
        struct ufs_hba *hba;
        void __iomem *mmio_base;
index df64c418034046bd9810cc66bb5ca655a50da2b4..1f29e1fd6d5206f579193759792370a6dec91b26 100644 (file)
@@ -17,7 +17,7 @@
 #include "ufshcd.h"
 
 int ufshcd_pltfrm_init(struct platform_device *pdev,
-                      struct ufs_hba_variant_ops *vops);
+                      const struct ufs_hba_variant_ops *vops);
 void ufshcd_pltfrm_shutdown(struct platform_device *pdev);
 
 #ifdef CONFIG_PM
index 69ba7445d2b3705556c441ceb84b7aeb964d18f2..ecfa898b9ccc060de3cad2bd08084b2554a39a9f 100644 (file)
@@ -546,7 +546,7 @@ struct ufs_hba {
        int nutrs;
        int nutmrs;
        u32 ufs_version;
-       struct ufs_hba_variant_ops *vops;
+       const struct ufs_hba_variant_ops *vops;
        void *priv;
        unsigned int irq;
        bool is_irq_enabled;
index 1a6f150cd2d873682d21e58660958d2c7bb51893..8af01777d09c74f344ad325256dbd30248febe32 100644 (file)
@@ -586,7 +586,6 @@ static int virtscsi_device_reset(struct scsi_cmnd *sc)
                return FAILED;
 
        memset(cmd, 0, sizeof(*cmd));
-       cmd->sc = sc;
        cmd->req.tmf = (struct virtio_scsi_ctrl_tmf_req){
                .type = VIRTIO_SCSI_T_TMF,
                .subtype = cpu_to_virtio32(vscsi->vdev,
@@ -645,7 +644,6 @@ static int virtscsi_abort(struct scsi_cmnd *sc)
                return FAILED;
 
        memset(cmd, 0, sizeof(*cmd));
-       cmd->sc = sc;
        cmd->req.tmf = (struct virtio_scsi_ctrl_tmf_req){
                .type = VIRTIO_SCSI_T_TMF,
                .subtype = VIRTIO_SCSI_T_TMF_ABORT_TASK,
index 5831e0eecea120f9157cb566311839b9273755ce..9704b135a7bc5f10fee7b37140686485ed24d59b 100644 (file)
@@ -1663,7 +1663,7 @@ static void tcmu_dev_kref_release(struct kref *kref)
        WARN_ON(!all_expired);
 
        tcmu_blocks_release(&udev->data_blocks, 0, udev->dbi_max + 1);
-       kfree(udev->data_bitmap);
+       bitmap_free(udev->data_bitmap);
        mutex_unlock(&udev->cmdr_lock);
 
        call_rcu(&dev->rcu_head, tcmu_dev_call_rcu);
@@ -1794,11 +1794,12 @@ static int tcmu_netlink_event_send(struct tcmu_dev *udev,
 
        ret = genlmsg_multicast_allns(&tcmu_genl_family, skb, 0,
                                      TCMU_MCGRP_CONFIG, GFP_KERNEL);
-       /* We don't care if no one is listening */
-       if (ret == -ESRCH)
-               ret = 0;
-       if (!ret)
-               ret = tcmu_wait_genl_cmd_reply(udev);
+
+       /* Wait during an add as the listener may not be up yet */
+       if (ret == 0 ||
+          (ret == -ESRCH && cmd == TCMU_CMD_ADDED_DEVICE))
+               return tcmu_wait_genl_cmd_reply(udev);
+
        return ret;
 }
 
@@ -1870,9 +1871,7 @@ static int tcmu_configure_device(struct se_device *dev)
        info = &udev->uio_info;
 
        mutex_lock(&udev->cmdr_lock);
-       udev->data_bitmap = kcalloc(BITS_TO_LONGS(udev->max_blocks),
-                                   sizeof(unsigned long),
-                                   GFP_KERNEL);
+       udev->data_bitmap = bitmap_zalloc(udev->max_blocks, GFP_KERNEL);
        mutex_unlock(&udev->cmdr_lock);
        if (!udev->data_bitmap) {
                ret = -ENOMEM;
@@ -1959,7 +1958,7 @@ err_register:
        vfree(udev->mb_addr);
        udev->mb_addr = NULL;
 err_vzalloc:
-       kfree(udev->data_bitmap);
+       bitmap_free(udev->data_bitmap);
        udev->data_bitmap = NULL;
 err_bitmap_alloc:
        kfree(info->name);
index e695adb0e5733db9e00f0934506a0af57de09601..2dc5703eac519f0bbc706092b4f50155aa0fa4b0 100644 (file)
@@ -2844,8 +2844,8 @@ void radeonfb_pm_init(struct radeonfb_info *rinfo, int dynclk, int ignore_devlis
                 * in some desktop G4s), Via (M9+ chip on iBook G4) and
                 * Snowy (M11 chip on iBook G4 manufactured after July 2005)
                 */
-               if (!strcmp(rinfo->of_node->name, "ATY,JasperParent") ||
-                   !strcmp(rinfo->of_node->name, "ATY,SnowyParent")) {
+               if (of_node_name_eq(rinfo->of_node, "ATY,JasperParent") ||
+                   of_node_name_eq(rinfo->of_node, "ATY,SnowyParent")) {
                        rinfo->reinit_func = radeon_reinitialize_M10;
                        rinfo->pm_mode |= radeon_pm_off;
                }
@@ -2855,7 +2855,7 @@ void radeonfb_pm_init(struct radeonfb_info *rinfo, int dynclk, int ignore_devlis
                        rinfo->pm_mode |= radeon_pm_off;
                }
 #endif
-               if (!strcmp(rinfo->of_node->name, "ATY,ViaParent")) {
+               if (of_node_name_eq(rinfo->of_node, "ATY,ViaParent")) {
                        rinfo->reinit_func = radeon_reinitialize_M9P;
                        rinfo->pm_mode |= radeon_pm_off;
                }
index 9af54c2368fdb7252219329e9405a73ad4a2f479..a6dce1a78490ae21d82c3250ac42507002745ef4 100644 (file)
@@ -486,8 +486,8 @@ static int cg14_probe(struct platform_device *op)
                                          info->var.xres);
        info->fix.smem_len = PAGE_ALIGN(linebytes * info->var.yres);
 
-       if (!strcmp(dp->parent->name, "sbus") ||
-           !strcmp(dp->parent->name, "sbi")) {
+       if (of_node_name_eq(dp->parent, "sbus") ||
+           of_node_name_eq(dp->parent, "sbi")) {
                info->fix.smem_start = op->resource[0].start;
                par->iospace = op->resource[0].flags & IORESOURCE_BITS;
        } else {
index 1bd95b02f3aa413463183503f0b29223ddf09822..6d42def8436b24b1d36e3a453d5eb466e0a872de 100644 (file)
@@ -369,7 +369,7 @@ static int cg3_probe(struct platform_device *op)
        info->var.red.length = 8;
        info->var.green.length = 8;
        info->var.blue.length = 8;
-       if (!strcmp(dp->name, "cgRDI"))
+       if (of_node_name_eq(dp, "cgRDI"))
                par->flags |= CG3_FLAG_RDI;
        if (par->flags & CG3_FLAG_RDI)
                cg3_rdi_maybe_fixup_var(&info->var, dp);
index 40182ed85648390ba6c803a555373b5fb63ae2d9..ca549e1532e657ad98515033761adfc833a7a874 100644 (file)
@@ -349,7 +349,7 @@ static void init_chips(struct fb_info *p, unsigned long addr)
 static int chipsfb_pci_init(struct pci_dev *dp, const struct pci_device_id *ent)
 {
        struct fb_info *p;
-       unsigned long addr, size;
+       unsigned long addr;
        unsigned short cmd;
        int rc = -ENODEV;
 
@@ -361,7 +361,6 @@ static int chipsfb_pci_init(struct pci_dev *dp, const struct pci_device_id *ent)
        if ((dp->resource[0].flags & IORESOURCE_MEM) == 0)
                goto err_disable;
        addr = pci_resource_start(dp, 0);
-       size = pci_resource_len(dp, 0);
        if (addr == 0)
                goto err_disable;
 
index 39509ccd92f1ef011c2543cfb4dff1112d7f50fd..3b5bd666b9525551ca616333c6a13f566d6e047f 100644 (file)
@@ -75,36 +75,33 @@ EXPORT_SYMBOL(fb_get_options);
  *     NOTE: This function is a __setup and __init function.
  *            It only stores the options.  Drivers have to call
  *            fb_get_options() as necessary.
- *
- *     Returns zero.
- *
  */
 static int __init video_setup(char *options)
 {
-       int i, global = 0;
-
        if (!options || !*options)
-               global = 1;
+               goto out;
 
-       if (!global && !strncmp(options, "ofonly", 6)) {
+       if (!strncmp(options, "ofonly", 6)) {
                ofonly = 1;
-               global = 1;
+               goto out;
        }
 
-       if (!global && !strchr(options, ':')) {
-               fb_mode_option = options;
-               global = 1;
-       }
+       if (strchr(options, ':')) {
+               /* named */
+               int i;
 
-       if (!global) {
                for (i = 0; i < FB_MAX; i++) {
                        if (video_options[i] == NULL) {
                                video_options[i] = options;
                                break;
                        }
                }
+       } else {
+               /* global */
+               fb_mode_option = options;
        }
 
+out:
        return 1;
 }
 __setup("video=", video_setup);
index bfa1360ec750491e2cc065ce8021a3f62495b71c..cd059a801662e9f78d0bc43669d21c64d9e9b47f 100644 (file)
@@ -656,11 +656,14 @@ static void fbcon_prepare_logo(struct vc_data *vc, struct fb_info *info,
                kfree(save);
        }
 
+       if (logo_shown == FBCON_LOGO_DONTSHOW)
+               return;
+
        if (logo_lines > vc->vc_bottom) {
                logo_shown = FBCON_LOGO_CANSHOW;
                printk(KERN_INFO
                       "fbcon_init: disable boot-logo (boot-logo bigger than screen).\n");
-       } else if (logo_shown != FBCON_LOGO_DONTSHOW) {
+       } else {
                logo_shown = FBCON_LOGO_DRAW;
                vc->vc_top = logo_lines;
        }
@@ -999,7 +1002,7 @@ static const char *fbcon_startup(void)
                        if (!softback_buf) {
                                softback_buf =
                                    (unsigned long)
-                                   kmalloc(fbcon_softback_size,
+                                   kvmalloc(fbcon_softback_size,
                                            GFP_KERNEL);
                                if (!softback_buf) {
                                        fbcon_softback_size = 0;
@@ -1008,7 +1011,7 @@ static const char *fbcon_startup(void)
                        }
                } else {
                        if (softback_buf) {
-                               kfree((void *) softback_buf);
+                               kvfree((void *) softback_buf);
                                softback_buf = 0;
                                softback_top = 0;
                        }
@@ -1066,6 +1069,9 @@ static void fbcon_init(struct vc_data *vc, int init)
 
        cap = info->flags;
 
+       if (console_loglevel <= CONSOLE_LOGLEVEL_QUIET)
+               logo_shown = FBCON_LOGO_DONTSHOW;
+
        if (vc != svc || logo_shown == FBCON_LOGO_DONTSHOW ||
            (info->fix.type == FB_TYPE_TEXT))
                logo = 0;
@@ -3672,7 +3678,7 @@ static void fbcon_exit(void)
        }
 #endif
 
-       kfree((void *)softback_buf);
+       kvfree((void *)softback_buf);
        softback_buf = 0UL;
 
        for_each_registered_fb(i) {
index cb43a2258c5185ec8c8112eab30f6375cdf6c899..4721491e6c8cf67ab100a957809f453ef405ab6b 100644 (file)
@@ -431,6 +431,9 @@ static void fb_do_show_logo(struct fb_info *info, struct fb_image *image,
 {
        unsigned int x;
 
+       if (image->width > info->var.xres || image->height > info->var.yres)
+               return;
+
        if (rotate == FB_ROTATE_UR) {
                for (x = 0;
                     x < num && image->dx + image->width <= info->var.xres;
index dd3128990776ef35670fbc4e3cab09bbf6b10ebe..3558a70a66640afea5d1259bb7b582b0ba0c5891 100644 (file)
@@ -978,6 +978,8 @@ void fb_edid_to_monspecs(unsigned char *edid, struct fb_monspecs *specs)
        get_monspecs(edid, specs);
 
        specs->modedb = fb_create_modedb(edid, &specs->modedb_len, specs);
+       if (!specs->modedb)
+               return;
 
        /*
         * Workaround for buggy EDIDs that sets that the first
index 6b1915872af1658c06f43d057f387303f6e69386..b7aee0c427a87978fbac441eaeb7edfca37057a7 100644 (file)
@@ -944,7 +944,7 @@ static int ffb_probe(struct platform_device *op)
 
        info->var.accel_flags = FB_ACCELF_TEXT;
 
-       if (!strcmp(dp->name, "SUNW,afb"))
+       if (of_node_name_eq(dp, "SUNW,afb"))
                par->flags |= FFB_FLAG_AFB;
 
        par->board_type = of_getintprop_default(dp, "board_type", 0);
index f4f76373b2a871ae26532afe8729282939988942..b1906cf5a8f05e6756935921d3838bccd93ec3c5 100644 (file)
@@ -33,6 +33,8 @@
 #include <linux/pci.h>
 #include <linux/cs5535.h>
 
+#include <asm/olpc.h>
+
 #include "gxfb.h"
 
 static char *mode_option;
@@ -107,9 +109,6 @@ static struct fb_videomode gx_modedb[] = {
          FB_VMODE_NONINTERLACED, FB_MODE_IS_VESA },
 };
 
-#ifdef CONFIG_OLPC
-#include <asm/olpc.h>
-
 static struct fb_videomode gx_dcon_modedb[] = {
        /* The only mode the DCON has is 1200x900 */
        { NULL, 50, 1200, 900, 17460, 24, 8, 4, 5, 8, 3,
@@ -128,14 +127,6 @@ static void get_modedb(struct fb_videomode **modedb, unsigned int *size)
        }
 }
 
-#else
-static void get_modedb(struct fb_videomode **modedb, unsigned int *size)
-{
-       *modedb = (struct fb_videomode *) gx_modedb;
-       *size = ARRAY_SIZE(gx_modedb);
-}
-#endif
-
 static int gxfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
 {
        if (var->xres > 1600 || var->yres > 1200)
index 138da6cb6cbcd5efbb8d1668ab66b87abfd99de7..17ab905811b1e8db56454410bdd7b96f5d1e2fb2 100644 (file)
@@ -23,6 +23,8 @@
 #include <linux/pci.h>
 #include <linux/uaccess.h>
 
+#include <asm/olpc.h>
+
 #include "lxfb.h"
 
 static char *mode_option;
@@ -216,9 +218,6 @@ static struct fb_videomode geode_modedb[] = {
          0, FB_VMODE_NONINTERLACED, 0 },
 };
 
-#ifdef CONFIG_OLPC
-#include <asm/olpc.h>
-
 static struct fb_videomode olpc_dcon_modedb[] = {
        /* The only mode the DCON has is 1200x900 */
        { NULL, 50, 1200, 900, 17460, 24, 8, 4, 5, 8, 3,
@@ -237,14 +236,6 @@ static void get_modedb(struct fb_videomode **modedb, unsigned int *size)
        }
 }
 
-#else
-static void get_modedb(struct fb_videomode **modedb, unsigned int *size)
-{
-       *modedb = (struct fb_videomode *) geode_modedb;
-       *size = ARRAY_SIZE(geode_modedb);
-}
-#endif
-
 static int lxfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
 {
        if (var->xres > 1920 || var->yres > 1440)
index 5d9670daf60ec18fee03e8c896d1f92617606048..4b9615e4ce746242015010cdf7b0d956c0fde6f5 100644 (file)
@@ -1497,8 +1497,8 @@ static int imsttfb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        switch (pdev->device) {
                case PCI_DEVICE_ID_IMS_TT128: /* IMS,tt128mbA */
                        par->ramdac = IBM;
-                       if (dp && ((strcmp(dp->name, "IMS,tt128mb8") == 0) ||
-                                  (strcmp(dp->name, "IMS,tt128mb8A") == 0)))
+                       if (of_node_name_eq(dp, "IMS,tt128mb8") ||
+                           of_node_name_eq(dp, "IMS,tt128mb8A"))
                                par->ramdac = TVP;
                        break;
                case PCI_DEVICE_ID_IMS_TT3D:  /* IMS,tt3d */
index 2bd328883178d0158788fd1152360853792eaec5..09af721638fb64cc678762962826390a9be19bb7 100644 (file)
@@ -211,36 +211,22 @@ static const struct file_operations misc_fops = {
 static void mbxfb_debugfs_init(struct fb_info *fbi)
 {
        struct mbxfb_info *mfbi = fbi->par;
-       struct mbxfb_debugfs_data *dbg;
-
-       dbg = kzalloc(sizeof(struct mbxfb_debugfs_data), GFP_KERNEL);
-       mfbi->debugfs_data = dbg;
-
-       dbg->dir = debugfs_create_dir("mbxfb", NULL);
-       dbg->sysconf = debugfs_create_file("sysconf", 0444, dbg->dir,
-                                     fbi, &sysconf_fops);
-       dbg->clock = debugfs_create_file("clock", 0444, dbg->dir,
-                                   fbi, &clock_fops);
-       dbg->display = debugfs_create_file("display", 0444, dbg->dir,
-                                     fbi, &display_fops);
-       dbg->gsctl = debugfs_create_file("gsctl", 0444, dbg->dir,
-                                   fbi, &gsctl_fops);
-       dbg->sdram = debugfs_create_file("sdram", 0444, dbg->dir,
-                                       fbi, &sdram_fops);
-       dbg->misc = debugfs_create_file("misc", 0444, dbg->dir,
-                                       fbi, &misc_fops);
+       struct dentry *dir;
+
+       dir = debugfs_create_dir("mbxfb", NULL);
+       mfbi->debugfs_dir = dir;
+
+       debugfs_create_file("sysconf", 0444, dir, fbi, &sysconf_fops);
+       debugfs_create_file("clock", 0444, dir, fbi, &clock_fops);
+       debugfs_create_file("display", 0444, dir, fbi, &display_fops);
+       debugfs_create_file("gsctl", 0444, dir, fbi, &gsctl_fops);
+       debugfs_create_file("sdram", 0444, dir, fbi, &sdram_fops);
+       debugfs_create_file("misc", 0444, dir, fbi, &misc_fops);
 }
 
 static void mbxfb_debugfs_remove(struct fb_info *fbi)
 {
        struct mbxfb_info *mfbi = fbi->par;
-       struct mbxfb_debugfs_data *dbg = mfbi->debugfs_data;
-
-       debugfs_remove(dbg->misc);
-       debugfs_remove(dbg->sdram);
-       debugfs_remove(dbg->gsctl);
-       debugfs_remove(dbg->display);
-       debugfs_remove(dbg->clock);
-       debugfs_remove(dbg->sysconf);
-       debugfs_remove(dbg->dir);
+
+       debugfs_remove_recursive(mfbi->debugfs_dir);
 }
index 539b85da08973b9dda4261436e7811fcbf76f7ed..6ded480a69b4ae20ca3bca0eea85d501ad2f3b38 100644 (file)
@@ -74,7 +74,7 @@ struct mbxfb_info {
 
        u32 pseudo_palette[MAX_PALETTES];
 #ifdef CONFIG_FB_MBX_DEBUG
-       void *debugfs_data;
+       struct dentry *debugfs_dir;
 #endif
 
 };
index 057d3cdef92e67963291f1a130cb3df700f4807a..fbc6eafb63c7799478d8bc9374a5bd2f9c1072d1 100644 (file)
@@ -141,6 +141,7 @@ static int offb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
                /* Clear PALETTE_ACCESS_CNTL in DAC_CNTL */
                out_le32(par->cmap_adr + 0x58,
                         in_le32(par->cmap_adr + 0x58) & ~0x20);
+               /* fall through */
        case cmap_r128:
                /* Set palette index & data */
                out_8(par->cmap_adr + 0xb0, regno);
@@ -210,6 +211,7 @@ static int offb_blank(int blank, struct fb_info *info)
                                /* Clear PALETTE_ACCESS_CNTL in DAC_CNTL */
                                out_le32(par->cmap_adr + 0x58,
                                         in_le32(par->cmap_adr + 0x58) & ~0x20);
+                               /* fall through */
                        case cmap_r128:
                                /* Set palette index & data */
                                out_8(par->cmap_adr + 0xb0, i);
@@ -646,7 +648,7 @@ static void __init offb_init_nodriver(struct device_node *dp, int no_real_node)
                }
 #endif
                /* kludge for valkyrie */
-               if (strcmp(dp->name, "valkyrie") == 0)
+               if (of_node_name_eq(dp, "valkyrie"))
                        address += 0x1000;
                offb_init_fb(no_real_node ? "bootx" : NULL,
                             width, height, depth, pitch, address,
index b4bcf3a4a6475be6bd1da796c476bbbf2a47f915..b5956a1a30d44bf9d913c8ff96e06f60b7f112ca 100644 (file)
@@ -110,19 +110,12 @@ DEFINE_SHOW_ATTRIBUTE(dss);
 
 static struct dentry *dss_debugfs_dir;
 
-static int dss_initialize_debugfs(void)
+static void dss_initialize_debugfs(void)
 {
        dss_debugfs_dir = debugfs_create_dir("omapdss", NULL);
-       if (IS_ERR(dss_debugfs_dir)) {
-               int err = PTR_ERR(dss_debugfs_dir);
-               dss_debugfs_dir = NULL;
-               return err;
-       }
 
        debugfs_create_file("clk", S_IRUGO, dss_debugfs_dir,
                        &dss_debug_dump_clocks, &dss_fops);
-
-       return 0;
 }
 
 static void dss_uninitialize_debugfs(void)
@@ -130,26 +123,19 @@ static void dss_uninitialize_debugfs(void)
        debugfs_remove_recursive(dss_debugfs_dir);
 }
 
-int dss_debugfs_create_file(const char *name, void (*write)(struct seq_file *))
+void dss_debugfs_create_file(const char *name, void (*write)(struct seq_file *))
 {
-       struct dentry *d;
-
-       d = debugfs_create_file(name, S_IRUGO, dss_debugfs_dir,
-                       write, &dss_fops);
-
-       return PTR_ERR_OR_ZERO(d);
+       debugfs_create_file(name, S_IRUGO, dss_debugfs_dir, write, &dss_fops);
 }
 #else /* CONFIG_FB_OMAP2_DSS_DEBUGFS */
-static inline int dss_initialize_debugfs(void)
+static inline void dss_initialize_debugfs(void)
 {
-       return 0;
 }
 static inline void dss_uninitialize_debugfs(void)
 {
 }
-int dss_debugfs_create_file(const char *name, void (*write)(struct seq_file *))
+void dss_debugfs_create_file(const char *name, void (*write)(struct seq_file *))
 {
-       return 0;
 }
 #endif /* CONFIG_FB_OMAP2_DSS_DEBUGFS */
 
@@ -182,15 +168,11 @@ static struct notifier_block omap_dss_pm_notif_block = {
 
 static int __init omap_dss_probe(struct platform_device *pdev)
 {
-       int r;
-
        core.pdev = pdev;
 
        dss_features_init(omapdss_get_version());
 
-       r = dss_initialize_debugfs();
-       if (r)
-               goto err_debugfs;
+       dss_initialize_debugfs();
 
        if (def_disp_name)
                core.default_display_name = def_disp_name;
@@ -198,10 +180,6 @@ static int __init omap_dss_probe(struct platform_device *pdev)
        register_pm_notifier(&omap_dss_pm_notif_block);
 
        return 0;
-
-err_debugfs:
-
-       return r;
 }
 
 static int omap_dss_remove(struct platform_device *pdev)
index f1eb8b0f8a2aac3358b4a23a133f244ed385a127..5ce893c1923d283758e46e7c1e6690cefc10e022 100644 (file)
@@ -60,7 +60,7 @@ omapdss_of_get_next_port(const struct device_node *parent,
                                return NULL;
                        }
                        prev = port;
-               } while (of_node_cmp(port->name, "port") != 0);
+               } while (!of_node_name_eq(port, "port"));
 
                of_node_put(ports);
        }
@@ -83,7 +83,7 @@ omapdss_of_get_next_endpoint(const struct device_node *parent,
                if (!ep)
                        return NULL;
                prev = ep;
-       } while (of_node_cmp(ep->name, "endpoint") != 0);
+       } while (!of_node_name_eq(ep, "endpoint"));
 
        return ep;
 }
index a3cc0ca8f9d240d3b5646e3eda4fc4193033fe81..b1a354494144711e95882f1b05c97a583a000911 100644 (file)
@@ -214,7 +214,7 @@ struct platform_device *dss_get_core_pdev(void);
 int dss_dsi_enable_pads(int dsi_id, unsigned lane_mask);
 void dss_dsi_disable_pads(int dsi_id, unsigned lane_mask);
 int dss_set_min_bus_tput(struct device *dev, unsigned long tput);
-int dss_debugfs_create_file(const char *name, void (*write)(struct seq_file *));
+void dss_debugfs_create_file(const char *name, void (*write)(struct seq_file *));
 
 /* display */
 int dss_suspend_all_devices(void);
index fa72e735dad2f995c127a265cd6b82a43bf9c73c..d146793dd044a7318b5931e2fc2136c90afc6523 100644 (file)
@@ -712,7 +712,7 @@ int hdmi4_audio_config(struct hdmi_core_data *core, struct hdmi_wp_data *wp,
        else
                acore.i2s_cfg.justification = HDMI_AUDIO_JUSTIFY_RIGHT;
        /*
-        * The I2S input word length is twice the lenght given in the IEC-60958
+        * The I2S input word length is twice the length given in the IEC-60958
         * status word. If the word size is greater than
         * 20 bits, increment by one.
         */
index 4061a20cfe24a2a33b0e8eda203ac6bad718cb3d..3b361bc9feb8adbb4fce983b9d70d3adfe25e6be 100644 (file)
@@ -667,10 +667,10 @@ static int ssd1307fb_probe(struct i2c_client *client,
 
        if (par->reset) {
                /* Reset the screen */
-               gpiod_set_value_cansleep(par->reset, 0);
-               udelay(4);
                gpiod_set_value_cansleep(par->reset, 1);
                udelay(4);
+               gpiod_set_value_cansleep(par->reset, 0);
+               udelay(4);
        }
 
        if (par->vbat_reg) {
index 7bb7e90b8f006f28548da5ed6b4944ee0f9131e6..bdf5a0ea876de6a0d434c0143f8291eef41c9923 100644 (file)
@@ -2110,7 +2110,7 @@ MODULE_PARM_DESC(viafb_lcd_panel_id,
 
 module_param(viafb_lcd_dsp_method, int, S_IRUSR);
 MODULE_PARM_DESC(viafb_lcd_dsp_method,
-       "Set Flat Panel display scaling method.(Default=Expandsion)");
+       "Set Flat Panel display scaling method.(Default=Expansion)");
 
 module_param(viafb_SAMM_ON, int, S_IRUSR);
 MODULE_PARM_DESC(viafb_SAMM_ON,
index 39b229f9e256e3ab23a5ee739f5ebd80ac55f515..d37dd5bb7a8fb73fb014466ff8a647ddf17fd8c0 100644 (file)
@@ -604,6 +604,7 @@ int alloc_xenballooned_pages(int nr_pages, struct page **pages)
        while (pgno < nr_pages) {
                page = balloon_retrieve(true);
                if (page) {
+                       __ClearPageOffline(page);
                        pages[pgno++] = page;
 #ifdef CONFIG_XEN_HAVE_PVMMU
                        /*
@@ -645,8 +646,10 @@ void free_xenballooned_pages(int nr_pages, struct page **pages)
        mutex_lock(&balloon_mutex);
 
        for (i = 0; i < nr_pages; i++) {
-               if (pages[i])
+               if (pages[i]) {
+                       __SetPageOffline(pages[i]);
                        balloon_append(pages[i]);
+               }
        }
 
        balloon_stats.target_unpopulated -= nr_pages;
index 5a0db6dec8d1fd4ad2673a03188a910606ae42b3..aaee1e6584e65240995e7f78c408cb257420997f 100644 (file)
@@ -40,6 +40,9 @@
  */
 #define P9_LOCK_TIMEOUT (30*HZ)
 
+/* flags for v9fs_stat2inode() & v9fs_stat2inode_dotl() */
+#define V9FS_STAT2INODE_KEEP_ISIZE 1
+
 extern struct file_system_type v9fs_fs_type;
 extern const struct address_space_operations v9fs_addr_operations;
 extern const struct file_operations v9fs_file_operations;
@@ -61,8 +64,10 @@ int v9fs_init_inode(struct v9fs_session_info *v9ses,
                    struct inode *inode, umode_t mode, dev_t);
 void v9fs_evict_inode(struct inode *inode);
 ino_t v9fs_qid2ino(struct p9_qid *qid);
-void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *);
-void v9fs_stat2inode_dotl(struct p9_stat_dotl *, struct inode *);
+void v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
+                     struct super_block *sb, unsigned int flags);
+void v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode,
+                          unsigned int flags);
 int v9fs_dir_release(struct inode *inode, struct file *filp);
 int v9fs_file_open(struct inode *inode, struct file *file);
 void v9fs_inode2stat(struct inode *inode, struct p9_wstat *stat);
@@ -83,4 +88,18 @@ static inline void v9fs_invalidate_inode_attr(struct inode *inode)
 }
 
 int v9fs_open_to_dotl_flags(int flags);
+
+static inline void v9fs_i_size_write(struct inode *inode, loff_t i_size)
+{
+       /*
+        * 32-bit need the lock, concurrent updates could break the
+        * sequences and make i_size_read() loop forever.
+        * 64-bit updates are atomic and can skip the locking.
+        */
+       if (sizeof(i_size) > sizeof(long))
+               spin_lock(&inode->i_lock);
+       i_size_write(inode, i_size);
+       if (sizeof(i_size) > sizeof(long))
+               spin_unlock(&inode->i_lock);
+}
 #endif
index a25efa782fccbab2c30a3a743a1c4f547b72e9c4..9a1125305d8425a67f18f79c29eed253ffdaf929 100644 (file)
@@ -446,7 +446,11 @@ v9fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
                i_size = i_size_read(inode);
                if (iocb->ki_pos > i_size) {
                        inode_add_bytes(inode, iocb->ki_pos - i_size);
-                       i_size_write(inode, iocb->ki_pos);
+                       /*
+                        * Need to serialize against i_size_write() in
+                        * v9fs_stat2inode()
+                        */
+                       v9fs_i_size_write(inode, iocb->ki_pos);
                }
                return retval;
        }
index 85ff859d3af5f36c5ad3ae1d8823e7c5d4f7c366..72b779bc094222d046cc024c978791d698961450 100644 (file)
@@ -538,7 +538,7 @@ static struct inode *v9fs_qid_iget(struct super_block *sb,
        if (retval)
                goto error;
 
-       v9fs_stat2inode(st, inode, sb);
+       v9fs_stat2inode(st, inode, sb, 0);
        v9fs_cache_inode_get_cookie(inode);
        unlock_new_inode(inode);
        return inode;
@@ -1092,7 +1092,7 @@ v9fs_vfs_getattr(const struct path *path, struct kstat *stat,
        if (IS_ERR(st))
                return PTR_ERR(st);
 
-       v9fs_stat2inode(st, d_inode(dentry), dentry->d_sb);
+       v9fs_stat2inode(st, d_inode(dentry), dentry->d_sb, 0);
        generic_fillattr(d_inode(dentry), stat);
 
        p9stat_free(st);
@@ -1170,12 +1170,13 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
  * @stat: Plan 9 metadata (mistat) structure
  * @inode: inode to populate
  * @sb: superblock of filesystem
+ * @flags: control flags (e.g. V9FS_STAT2INODE_KEEP_ISIZE)
  *
  */
 
 void
 v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
-       struct super_block *sb)
+                struct super_block *sb, unsigned int flags)
 {
        umode_t mode;
        char ext[32];
@@ -1216,10 +1217,11 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
        mode = p9mode2perm(v9ses, stat);
        mode |= inode->i_mode & ~S_IALLUGO;
        inode->i_mode = mode;
-       i_size_write(inode, stat->length);
 
+       if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE))
+               v9fs_i_size_write(inode, stat->length);
        /* not real number of blocks, but 512 byte ones ... */
-       inode->i_blocks = (i_size_read(inode) + 512 - 1) >> 9;
+       inode->i_blocks = (stat->length + 512 - 1) >> 9;
        v9inode->cache_validity &= ~V9FS_INO_INVALID_ATTR;
 }
 
@@ -1416,9 +1418,9 @@ int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode)
 {
        int umode;
        dev_t rdev;
-       loff_t i_size;
        struct p9_wstat *st;
        struct v9fs_session_info *v9ses;
+       unsigned int flags;
 
        v9ses = v9fs_inode2v9ses(inode);
        st = p9_client_stat(fid);
@@ -1431,16 +1433,13 @@ int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode)
        if ((inode->i_mode & S_IFMT) != (umode & S_IFMT))
                goto out;
 
-       spin_lock(&inode->i_lock);
        /*
         * We don't want to refresh inode->i_size,
         * because we may have cached data
         */
-       i_size = inode->i_size;
-       v9fs_stat2inode(st, inode, inode->i_sb);
-       if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
-               inode->i_size = i_size;
-       spin_unlock(&inode->i_lock);
+       flags = (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) ?
+               V9FS_STAT2INODE_KEEP_ISIZE : 0;
+       v9fs_stat2inode(st, inode, inode->i_sb, flags);
 out:
        p9stat_free(st);
        kfree(st);
index 4823e1c4699945bf5c8170928dbaa6228c06995d..a950a927a626f2bad92969b3f617ee68a04d3336 100644 (file)
@@ -143,7 +143,7 @@ static struct inode *v9fs_qid_iget_dotl(struct super_block *sb,
        if (retval)
                goto error;
 
-       v9fs_stat2inode_dotl(st, inode);
+       v9fs_stat2inode_dotl(st, inode, 0);
        v9fs_cache_inode_get_cookie(inode);
        retval = v9fs_get_acl(inode, fid);
        if (retval)
@@ -496,7 +496,7 @@ v9fs_vfs_getattr_dotl(const struct path *path, struct kstat *stat,
        if (IS_ERR(st))
                return PTR_ERR(st);
 
-       v9fs_stat2inode_dotl(st, d_inode(dentry));
+       v9fs_stat2inode_dotl(st, d_inode(dentry), 0);
        generic_fillattr(d_inode(dentry), stat);
        /* Change block size to what the server returned */
        stat->blksize = st->st_blksize;
@@ -607,11 +607,13 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
  * v9fs_stat2inode_dotl - populate an inode structure with stat info
  * @stat: stat structure
  * @inode: inode to populate
+ * @flags: ctrl flags (e.g. V9FS_STAT2INODE_KEEP_ISIZE)
  *
  */
 
 void
-v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
+v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode,
+                     unsigned int flags)
 {
        umode_t mode;
        struct v9fs_inode *v9inode = V9FS_I(inode);
@@ -631,7 +633,8 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
                mode |= inode->i_mode & ~S_IALLUGO;
                inode->i_mode = mode;
 
-               i_size_write(inode, stat->st_size);
+               if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE))
+                       v9fs_i_size_write(inode, stat->st_size);
                inode->i_blocks = stat->st_blocks;
        } else {
                if (stat->st_result_mask & P9_STATS_ATIME) {
@@ -661,8 +664,9 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
                }
                if (stat->st_result_mask & P9_STATS_RDEV)
                        inode->i_rdev = new_decode_dev(stat->st_rdev);
-               if (stat->st_result_mask & P9_STATS_SIZE)
-                       i_size_write(inode, stat->st_size);
+               if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE) &&
+                   stat->st_result_mask & P9_STATS_SIZE)
+                       v9fs_i_size_write(inode, stat->st_size);
                if (stat->st_result_mask & P9_STATS_BLOCKS)
                        inode->i_blocks = stat->st_blocks;
        }
@@ -928,9 +932,9 @@ v9fs_vfs_get_link_dotl(struct dentry *dentry,
 
 int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode)
 {
-       loff_t i_size;
        struct p9_stat_dotl *st;
        struct v9fs_session_info *v9ses;
+       unsigned int flags;
 
        v9ses = v9fs_inode2v9ses(inode);
        st = p9_client_getattr_dotl(fid, P9_STATS_ALL);
@@ -942,16 +946,13 @@ int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode)
        if ((inode->i_mode & S_IFMT) != (st->st_mode & S_IFMT))
                goto out;
 
-       spin_lock(&inode->i_lock);
        /*
         * We don't want to refresh inode->i_size,
         * because we may have cached data
         */
-       i_size = inode->i_size;
-       v9fs_stat2inode_dotl(st, inode);
-       if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
-               inode->i_size = i_size;
-       spin_unlock(&inode->i_lock);
+       flags = (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) ?
+               V9FS_STAT2INODE_KEEP_ISIZE : 0;
+       v9fs_stat2inode_dotl(st, inode, flags);
 out:
        kfree(st);
        return 0;
index 10d3bd3f534bce3a40b780d176fdb95ed4e0a772..d13d35cf69c0e416c04694915215984fb1d09308 100644 (file)
@@ -172,7 +172,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
                        goto release_sb;
                }
                d_inode(root)->i_ino = v9fs_qid2ino(&st->qid);
-               v9fs_stat2inode_dotl(st, d_inode(root));
+               v9fs_stat2inode_dotl(st, d_inode(root), 0);
                kfree(st);
        } else {
                struct p9_wstat *st = NULL;
@@ -183,7 +183,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
                }
 
                d_inode(root)->i_ino = v9fs_qid2ino(&st->qid);
-               v9fs_stat2inode(st, d_inode(root), sb);
+               v9fs_stat2inode(st, d_inode(root), sb, 0);
 
                p9stat_free(st);
                kfree(st);
index e92a2fee3c577bd7fc8c63e3e10edf079ca97686..13c1288b04a7318bde37f578e6a2c566fb692c07 100644 (file)
@@ -115,7 +115,12 @@ static void cifs_debug_tcon(struct seq_file *m, struct cifs_tcon *tcon)
                seq_puts(m, " type: CDROM ");
        else
                seq_printf(m, " type: %d ", dev_type);
-       if (tcon->seal)
+
+       seq_printf(m, "Serial Number: 0x%x", tcon->vol_serial_number);
+
+       if ((tcon->seal) ||
+           (tcon->ses->session_flags & SMB2_SESSION_FLAG_ENCRYPT_DATA) ||
+           (tcon->share_flags & SHI1005_FLAGS_ENCRYPT_DATA))
                seq_printf(m, " Encrypted");
        if (tcon->nocase)
                seq_printf(m, " nocase");
@@ -371,6 +376,10 @@ skip_rdma:
                                atomic_read(&server->in_send),
                                atomic_read(&server->num_waiters));
 #endif
+                       if (ses->session_flags & SMB2_SESSION_FLAG_ENCRYPT_DATA)
+                               seq_puts(m, " encrypted");
+                       if (ses->sign)
+                               seq_puts(m, " signed");
 
                        seq_puts(m, "\n\tShares:");
                        j = 0;
index d8bce2f862de8ef756161eef30dd2e1d96725a0f..086ddc5108af7d0147eddd523e591a00c2e445cc 100644 (file)
@@ -43,6 +43,9 @@ struct smb_snapshot_array {
        /*      snapshots[]; */
 } __packed;
 
+/* query_info flags */
+#define PASSTHRU_QUERY_INFO    0x00000000
+#define PASSTHRU_FSCTL         0x00000001
 struct smb_query_info {
        __u32   info_type;
        __u32   file_info_class;
index f293e052e351de99e4ebad80cc9805375e5980b4..38feae812b4704b315ee3adfe2a1eaa2c4740e45 100644 (file)
@@ -479,6 +479,14 @@ struct smb_version_operations {
                                struct cifs_tcon *tcon,
                                __le16 *path, int is_dir,
                                unsigned long p);
+       /* make unix special files (block, char, fifo, socket) */
+       int (*make_node)(unsigned int xid,
+                        struct inode *inode,
+                        struct dentry *dentry,
+                        struct cifs_tcon *tcon,
+                        char *full_path,
+                        umode_t mode,
+                        dev_t device_number);
 };
 
 struct smb_version_values {
@@ -735,13 +743,13 @@ in_flight(struct TCP_Server_Info *server)
 }
 
 static inline bool
-has_credits(struct TCP_Server_Info *server, int *credits)
+has_credits(struct TCP_Server_Info *server, int *credits, int num_credits)
 {
        int num;
        spin_lock(&server->req_lock);
        num = *credits;
        spin_unlock(&server->req_lock);
-       return num > 0;
+       return num >= num_credits;
 }
 
 static inline void
@@ -962,11 +970,14 @@ cap_unix(struct cifs_ses *ses)
 
 struct cached_fid {
        bool is_valid:1;        /* Do we have a useable root fid */
+       bool file_all_info_is_valid:1;
+
        struct kref refcount;
        struct cifs_fid *fid;
        struct mutex fid_mutex;
        struct cifs_tcon *tcon;
        struct work_struct lease_break;
+       struct smb2_file_all_info file_all_info;
 };
 
 /*
@@ -1735,6 +1746,7 @@ require use of the stronger protocol */
  *  GlobalMid_Lock protects:
  *     list operations on pending_mid_q and oplockQ
  *      updates to XID counters, multiplex id  and SMB sequence numbers
+ *      list operations on global DnotifyReqList
  *  tcp_ses_lock protects:
  *     list operations on tcp and SMB session lists
  *  tcon->open_file_lock protects the list of open files hanging off the tcon
index b95db2b593cb078639ba71cfdcafe6c9d19c5a4b..a8e9738db691294736105bf8a0cd03772a9a2447 100644 (file)
@@ -1191,10 +1191,6 @@ next_pdu:
                        continue;
                }
 
-               if (server->large_buf)
-                       buf = server->bigbuf;
-
-
                server->lstrp = jiffies;
 
                for (i = 0; i < num_mids; i++) {
index 907e85d65bb4e09b5fdc8f7c1e6c35ef56b519ad..f26a48dd2e39508ab01c11c91d85fbb471beb16c 100644 (file)
@@ -621,20 +621,10 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, umode_t mode,
 {
        int rc = -EPERM;
        unsigned int xid;
-       int create_options = CREATE_NOT_DIR | CREATE_OPTION_SPECIAL;
        struct cifs_sb_info *cifs_sb;
        struct tcon_link *tlink;
        struct cifs_tcon *tcon;
-       struct cifs_io_parms io_parms;
        char *full_path = NULL;
-       struct inode *newinode = NULL;
-       __u32 oplock = 0;
-       struct cifs_fid fid;
-       struct cifs_open_parms oparms;
-       FILE_ALL_INFO *buf = NULL;
-       unsigned int bytes_written;
-       struct win_dev *pdev;
-       struct kvec iov[2];
 
        if (!old_valid_dev(device_number))
                return -EINVAL;
@@ -654,103 +644,12 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, umode_t mode,
                goto mknod_out;
        }
 
-       if (tcon->unix_ext) {
-               struct cifs_unix_set_info_args args = {
-                       .mode   = mode & ~current_umask(),
-                       .ctime  = NO_CHANGE_64,
-                       .atime  = NO_CHANGE_64,
-                       .mtime  = NO_CHANGE_64,
-                       .device = device_number,
-               };
-               if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
-                       args.uid = current_fsuid();
-                       args.gid = current_fsgid();
-               } else {
-                       args.uid = INVALID_UID; /* no change */
-                       args.gid = INVALID_GID; /* no change */
-               }
-               rc = CIFSSMBUnixSetPathInfo(xid, tcon, full_path, &args,
-                                           cifs_sb->local_nls,
-                                           cifs_remap(cifs_sb));
-               if (rc)
-                       goto mknod_out;
-
-               rc = cifs_get_inode_info_unix(&newinode, full_path,
-                                               inode->i_sb, xid);
-
-               if (rc == 0)
-                       d_instantiate(direntry, newinode);
-               goto mknod_out;
-       }
-
-       if (!S_ISCHR(mode) && !S_ISBLK(mode))
-               goto mknod_out;
-
-       if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL))
-               goto mknod_out;
-
-
-       cifs_dbg(FYI, "sfu compat create special file\n");
-
-       buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
-       if (buf == NULL) {
-               rc = -ENOMEM;
-               goto mknod_out;
-       }
-
-       if (backup_cred(cifs_sb))
-               create_options |= CREATE_OPEN_BACKUP_INTENT;
-
-       oparms.tcon = tcon;
-       oparms.cifs_sb = cifs_sb;
-       oparms.desired_access = GENERIC_WRITE;
-       oparms.create_options = create_options;
-       oparms.disposition = FILE_CREATE;
-       oparms.path = full_path;
-       oparms.fid = &fid;
-       oparms.reconnect = false;
-
-       if (tcon->ses->server->oplocks)
-               oplock = REQ_OPLOCK;
-       else
-               oplock = 0;
-       rc = tcon->ses->server->ops->open(xid, &oparms, &oplock, buf);
-       if (rc)
-               goto mknod_out;
-
-       /*
-        * BB Do not bother to decode buf since no local inode yet to put
-        * timestamps in, but we can reuse it safely.
-        */
-
-       pdev = (struct win_dev *)buf;
-       io_parms.pid = current->tgid;
-       io_parms.tcon = tcon;
-       io_parms.offset = 0;
-       io_parms.length = sizeof(struct win_dev);
-       iov[1].iov_base = buf;
-       iov[1].iov_len = sizeof(struct win_dev);
-       if (S_ISCHR(mode)) {
-               memcpy(pdev->type, "IntxCHR", 8);
-               pdev->major = cpu_to_le64(MAJOR(device_number));
-               pdev->minor = cpu_to_le64(MINOR(device_number));
-               rc = tcon->ses->server->ops->sync_write(xid, &fid, &io_parms,
-                                                       &bytes_written, iov, 1);
-       } else if (S_ISBLK(mode)) {
-               memcpy(pdev->type, "IntxBLK", 8);
-               pdev->major = cpu_to_le64(MAJOR(device_number));
-               pdev->minor = cpu_to_le64(MINOR(device_number));
-               rc = tcon->ses->server->ops->sync_write(xid, &fid, &io_parms,
-                                                       &bytes_written, iov, 1);
-       }
-       tcon->ses->server->ops->close(xid, tcon, &fid);
-       d_drop(direntry);
-
-       /* FIXME: add code here to set EAs */
+       rc = tcon->ses->server->ops->make_node(xid, inode, direntry, tcon,
+                                              full_path, mode,
+                                              device_number);
 
 mknod_out:
        kfree(full_path);
-       kfree(buf);
        free_xid(xid);
        cifs_put_tlink(tlink);
        return rc;
index 4c144c1f50eb547d78076a1c2fd311f1fc6e5d28..2a6d20c0ce0288d37ad2405424200be3fefaa9f4 100644 (file)
@@ -1645,8 +1645,20 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
                rc = server->ops->mand_unlock_range(cfile, flock, xid);
 
 out:
-       if (flock->fl_flags & FL_POSIX && !rc)
+       if (flock->fl_flags & FL_POSIX) {
+               /*
+                * If this is a request to remove all locks because we
+                * are closing the file, it doesn't matter if the
+                * unlocking failed as both cifs.ko and the SMB server
+                * remove the lock on file close
+                */
+               if (rc) {
+                       cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
+                       if (!(flock->fl_flags & FL_CLOSE))
+                               return rc;
+               }
                rc = locks_lock_file_wait(file, flock);
+       }
        return rc;
 }
 
index f0ce27c3c6e44e6a2c5a2f548dd09bd616443c85..c711f1f39bf2eaded7827d3ae29ae4b438328edf 100644 (file)
@@ -1027,6 +1027,131 @@ cifs_can_echo(struct TCP_Server_Info *server)
        return false;
 }
 
+static int
+cifs_make_node(unsigned int xid, struct inode *inode,
+              struct dentry *dentry, struct cifs_tcon *tcon,
+              char *full_path, umode_t mode, dev_t dev)
+{
+       struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+       struct inode *newinode = NULL;
+       int rc = -EPERM;
+       int create_options = CREATE_NOT_DIR | CREATE_OPTION_SPECIAL;
+       FILE_ALL_INFO *buf = NULL;
+       struct cifs_io_parms io_parms;
+       __u32 oplock = 0;
+       struct cifs_fid fid;
+       struct cifs_open_parms oparms;
+       unsigned int bytes_written;
+       struct win_dev *pdev;
+       struct kvec iov[2];
+
+       if (tcon->unix_ext) {
+               /*
+                * SMB1 Unix Extensions: requires server support but
+                * works with all special files
+                */
+               struct cifs_unix_set_info_args args = {
+                       .mode   = mode & ~current_umask(),
+                       .ctime  = NO_CHANGE_64,
+                       .atime  = NO_CHANGE_64,
+                       .mtime  = NO_CHANGE_64,
+                       .device = dev,
+               };
+               if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
+                       args.uid = current_fsuid();
+                       args.gid = current_fsgid();
+               } else {
+                       args.uid = INVALID_UID; /* no change */
+                       args.gid = INVALID_GID; /* no change */
+               }
+               rc = CIFSSMBUnixSetPathInfo(xid, tcon, full_path, &args,
+                                           cifs_sb->local_nls,
+                                           cifs_remap(cifs_sb));
+               if (rc)
+                       goto out;
+
+               rc = cifs_get_inode_info_unix(&newinode, full_path,
+                                             inode->i_sb, xid);
+
+               if (rc == 0)
+                       d_instantiate(dentry, newinode);
+               goto out;
+       }
+
+       /*
+        * SMB1 SFU emulation: should work with all servers, but only
+        * support block and char device (no socket & fifo)
+        */
+       if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL))
+               goto out;
+
+       if (!S_ISCHR(mode) && !S_ISBLK(mode))
+               goto out;
+
+       cifs_dbg(FYI, "sfu compat create special file\n");
+
+       buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
+       if (buf == NULL) {
+               rc = -ENOMEM;
+               goto out;
+       }
+
+       if (backup_cred(cifs_sb))
+               create_options |= CREATE_OPEN_BACKUP_INTENT;
+
+       oparms.tcon = tcon;
+       oparms.cifs_sb = cifs_sb;
+       oparms.desired_access = GENERIC_WRITE;
+       oparms.create_options = create_options;
+       oparms.disposition = FILE_CREATE;
+       oparms.path = full_path;
+       oparms.fid = &fid;
+       oparms.reconnect = false;
+
+       if (tcon->ses->server->oplocks)
+               oplock = REQ_OPLOCK;
+       else
+               oplock = 0;
+       rc = tcon->ses->server->ops->open(xid, &oparms, &oplock, buf);
+       if (rc)
+               goto out;
+
+       /*
+        * BB Do not bother to decode buf since no local inode yet to put
+        * timestamps in, but we can reuse it safely.
+        */
+
+       pdev = (struct win_dev *)buf;
+       io_parms.pid = current->tgid;
+       io_parms.tcon = tcon;
+       io_parms.offset = 0;
+       io_parms.length = sizeof(struct win_dev);
+       iov[1].iov_base = buf;
+       iov[1].iov_len = sizeof(struct win_dev);
+       if (S_ISCHR(mode)) {
+               memcpy(pdev->type, "IntxCHR", 8);
+               pdev->major = cpu_to_le64(MAJOR(dev));
+               pdev->minor = cpu_to_le64(MINOR(dev));
+               rc = tcon->ses->server->ops->sync_write(xid, &fid, &io_parms,
+                                                       &bytes_written, iov, 1);
+       } else if (S_ISBLK(mode)) {
+               memcpy(pdev->type, "IntxBLK", 8);
+               pdev->major = cpu_to_le64(MAJOR(dev));
+               pdev->minor = cpu_to_le64(MINOR(dev));
+               rc = tcon->ses->server->ops->sync_write(xid, &fid, &io_parms,
+                                                       &bytes_written, iov, 1);
+       }
+       tcon->ses->server->ops->close(xid, tcon, &fid);
+       d_drop(dentry);
+
+       /* FIXME: add code here to set EAs */
+out:
+       kfree(buf);
+       return rc;
+}
+
+
+
 struct smb_version_operations smb1_operations = {
        .send_cancel = send_nt_cancel,
        .compare_fids = cifs_compare_fids,
@@ -1110,6 +1235,7 @@ struct smb_version_operations smb1_operations = {
        .get_acl_by_fid = get_cifs_acl_by_fid,
        .set_acl = set_cifs_acl,
 #endif /* CIFS_ACL */
+       .make_node = cifs_make_node,
 };
 
 struct smb_version_values smb1_values = {
index 01a76bccdb8dfd28c1a0fff2036270bea70f08db..278405d26c47e9f0f0cc6b4361af911a1fb1bd9e 100644 (file)
 #include "smb2pdu.h"
 #include "smb2proto.h"
 
+static void
+free_set_inf_compound(struct smb_rqst *rqst)
+{
+       if (rqst[1].rq_iov)
+               SMB2_set_info_free(&rqst[1]);
+       if (rqst[2].rq_iov)
+               SMB2_close_free(&rqst[2]);
+}
+
+
 static int
 smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon,
                 struct cifs_sb_info *cifs_sb, const char *full_path,
@@ -112,14 +122,18 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon,
                                          PATH_MAX * 2, 0, NULL);
                smb2_set_next_command(tcon, &rqst[num_rqst]);
                smb2_set_related(&rqst[num_rqst++]);
+               trace_smb3_query_info_compound_enter(xid, ses->Suid, tcon->tid,
+                                                    full_path);
                break;
        case SMB2_OP_DELETE:
+               trace_smb3_delete_enter(xid, ses->Suid, tcon->tid, full_path);
                break;
        case SMB2_OP_MKDIR:
                /*
                 * Directories are created through parameters in the
                 * SMB2_open() call.
                 */
+               trace_smb3_mkdir_enter(xid, ses->Suid, tcon->tid, full_path);
                break;
        case SMB2_OP_RMDIR:
                memset(&si_iov, 0, sizeof(si_iov));
@@ -135,6 +149,7 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon,
                                        SMB2_O_INFO_FILE, 0, data, size);
                smb2_set_next_command(tcon, &rqst[num_rqst]);
                smb2_set_related(&rqst[num_rqst++]);
+               trace_smb3_rmdir_enter(xid, ses->Suid, tcon->tid, full_path);
                break;
        case SMB2_OP_SET_EOF:
                memset(&si_iov, 0, sizeof(si_iov));
@@ -150,6 +165,7 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon,
                                        SMB2_O_INFO_FILE, 0, data, size);
                smb2_set_next_command(tcon, &rqst[num_rqst]);
                smb2_set_related(&rqst[num_rqst++]);
+               trace_smb3_set_eof_enter(xid, ses->Suid, tcon->tid, full_path);
                break;
        case SMB2_OP_SET_INFO:
                memset(&si_iov, 0, sizeof(si_iov));
@@ -166,6 +182,8 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon,
                                        SMB2_O_INFO_FILE, 0, data, size);
                smb2_set_next_command(tcon, &rqst[num_rqst]);
                smb2_set_related(&rqst[num_rqst++]);
+               trace_smb3_set_info_compound_enter(xid, ses->Suid, tcon->tid,
+                                                  full_path);
                break;
        case SMB2_OP_RENAME:
                memset(&si_iov, 0, sizeof(si_iov));
@@ -190,6 +208,7 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon,
                                        SMB2_O_INFO_FILE, 0, data, size);
                smb2_set_next_command(tcon, &rqst[num_rqst]);
                smb2_set_related(&rqst[num_rqst++]);
+               trace_smb3_rename_enter(xid, ses->Suid, tcon->tid, full_path);
                break;
        case SMB2_OP_HARDLINK:
                memset(&si_iov, 0, sizeof(si_iov));
@@ -214,6 +233,7 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon,
                                        SMB2_O_INFO_FILE, 0, data, size);
                smb2_set_next_command(tcon, &rqst[num_rqst]);
                smb2_set_related(&rqst[num_rqst++]);
+               trace_smb3_hardlink_enter(xid, ses->Suid, tcon->tid, full_path);
                break;
        default:
                cifs_dbg(VFS, "Invalid command\n");
@@ -252,21 +272,65 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon,
                        SMB2_query_info_free(&rqst[1]);
                if (rqst[2].rq_iov)
                        SMB2_close_free(&rqst[2]);
+               if (rc)
+                       trace_smb3_query_info_compound_err(xid,  ses->Suid,
+                                               tcon->tid, rc);
+               else
+                       trace_smb3_query_info_compound_done(xid, ses->Suid,
+                                               tcon->tid);
                break;
        case SMB2_OP_DELETE:
+               if (rc)
+                       trace_smb3_delete_err(xid,  ses->Suid, tcon->tid, rc);
+               else
+                       trace_smb3_delete_done(xid, ses->Suid, tcon->tid);
+               if (rqst[1].rq_iov)
+                       SMB2_close_free(&rqst[1]);
+               break;
        case SMB2_OP_MKDIR:
+               if (rc)
+                       trace_smb3_mkdir_err(xid,  ses->Suid, tcon->tid, rc);
+               else
+                       trace_smb3_mkdir_done(xid, ses->Suid, tcon->tid);
                if (rqst[1].rq_iov)
                        SMB2_close_free(&rqst[1]);
                break;
        case SMB2_OP_HARDLINK:
+               if (rc)
+                       trace_smb3_hardlink_err(xid,  ses->Suid, tcon->tid, rc);
+               else
+                       trace_smb3_hardlink_done(xid, ses->Suid, tcon->tid);
+               free_set_inf_compound(rqst);
+               break;
        case SMB2_OP_RENAME:
+               if (rc)
+                       trace_smb3_rename_err(xid,  ses->Suid, tcon->tid, rc);
+               else
+                       trace_smb3_rename_done(xid, ses->Suid, tcon->tid);
+               free_set_inf_compound(rqst);
+               break;
        case SMB2_OP_RMDIR:
+               if (rc)
+                       trace_smb3_rmdir_err(xid,  ses->Suid, tcon->tid, rc);
+               else
+                       trace_smb3_rmdir_done(xid, ses->Suid, tcon->tid);
+               free_set_inf_compound(rqst);
+               break;
        case SMB2_OP_SET_EOF:
+               if (rc)
+                       trace_smb3_set_eof_err(xid,  ses->Suid, tcon->tid, rc);
+               else
+                       trace_smb3_set_eof_done(xid, ses->Suid, tcon->tid);
+               free_set_inf_compound(rqst);
+               break;
        case SMB2_OP_SET_INFO:
-               if (rqst[1].rq_iov)
-                       SMB2_set_info_free(&rqst[1]);
-               if (rqst[2].rq_iov)
-                       SMB2_close_free(&rqst[2]);
+               if (rc)
+                       trace_smb3_set_info_compound_err(xid,  ses->Suid,
+                                               tcon->tid, rc);
+               else
+                       trace_smb3_set_info_compound_done(xid, ses->Suid,
+                                               tcon->tid);
+               free_set_inf_compound(rqst);
                break;
        }
        free_rsp_buf(resp_buftype[0], rsp_iov[0].iov_base);
@@ -309,12 +373,17 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
                rc = open_shroot(xid, tcon, &fid);
                if (rc)
                        goto out;
-               rc = SMB2_query_info(xid, tcon, fid.persistent_fid,
-                                    fid.volatile_fid, smb2_data);
+
+               if (tcon->crfid.file_all_info_is_valid) {
+                       move_smb2_info_to_cifs(data,
+                                              &tcon->crfid.file_all_info);
+               } else {
+                       rc = SMB2_query_info(xid, tcon, fid.persistent_fid,
+                                            fid.volatile_fid, smb2_data);
+                       if (!rc)
+                               move_smb2_info_to_cifs(data, smb2_data);
+               }
                close_shroot(&tcon->crfid);
-               if (rc)
-                       goto out;
-               move_smb2_info_to_cifs(data, smb2_data);
                goto out;
        }
 
index 085e91436da7e6d24b89e87ba753b9b672b89f6a..1022a3771e140d819e767ba5a75677a88d65f911 100644 (file)
@@ -185,7 +185,7 @@ smb2_wait_mtu_credits(struct TCP_Server_Info *server, unsigned int size,
                        spin_unlock(&server->req_lock);
                        cifs_num_waiters_inc(server);
                        rc = wait_event_killable(server->request_q,
-                                       has_credits(server, &server->credits));
+                               has_credits(server, &server->credits, 1));
                        cifs_num_waiters_dec(server);
                        if (rc)
                                return rc;
@@ -619,6 +619,7 @@ smb2_close_cached_fid(struct kref *ref)
                SMB2_close(0, cfid->tcon, cfid->fid->persistent_fid,
                           cfid->fid->volatile_fid);
                cfid->is_valid = false;
+               cfid->file_all_info_is_valid = false;
        }
 }
 
@@ -643,9 +644,18 @@ smb2_cached_lease_break(struct work_struct *work)
  */
 int open_shroot(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *pfid)
 {
-       struct cifs_open_parms oparams;
-       int rc;
-       __le16 srch_path = 0; /* Null - since an open of top of share */
+       struct cifs_ses *ses = tcon->ses;
+       struct TCP_Server_Info *server = ses->server;
+       struct cifs_open_parms oparms;
+       struct smb2_create_rsp *o_rsp = NULL;
+       struct smb2_query_info_rsp *qi_rsp = NULL;
+       int resp_buftype[2];
+       struct smb_rqst rqst[2];
+       struct kvec rsp_iov[2];
+       struct kvec open_iov[SMB2_CREATE_IOV_SIZE];
+       struct kvec qi_iov[1];
+       int rc, flags = 0;
+       __le16 utf16_path = 0; /* Null - since an open of top of share */
        u8 oplock = SMB2_OPLOCK_LEVEL_II;
 
        mutex_lock(&tcon->crfid.fid_mutex);
@@ -657,22 +667,89 @@ int open_shroot(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *pfid)
                return 0;
        }
 
-       oparams.tcon = tcon;
-       oparams.create_options = 0;
-       oparams.desired_access = FILE_READ_ATTRIBUTES;
-       oparams.disposition = FILE_OPEN;
-       oparams.fid = pfid;
-       oparams.reconnect = false;
-
-       rc = SMB2_open(xid, &oparams, &srch_path, &oplock, NULL, NULL, NULL);
-       if (rc == 0) {
-               memcpy(tcon->crfid.fid, pfid, sizeof(struct cifs_fid));
-               tcon->crfid.tcon = tcon;
-               tcon->crfid.is_valid = true;
-               kref_init(&tcon->crfid.refcount);
-               kref_get(&tcon->crfid.refcount);
-       }
+       if (smb3_encryption_required(tcon))
+               flags |= CIFS_TRANSFORM_REQ;
+
+       memset(rqst, 0, sizeof(rqst));
+       resp_buftype[0] = resp_buftype[1] = CIFS_NO_BUFFER;
+       memset(rsp_iov, 0, sizeof(rsp_iov));
+
+       /* Open */
+       memset(&open_iov, 0, sizeof(open_iov));
+       rqst[0].rq_iov = open_iov;
+       rqst[0].rq_nvec = SMB2_CREATE_IOV_SIZE;
+
+       oparms.tcon = tcon;
+       oparms.create_options = 0;
+       oparms.desired_access = FILE_READ_ATTRIBUTES;
+       oparms.disposition = FILE_OPEN;
+       oparms.fid = pfid;
+       oparms.reconnect = false;
+
+       rc = SMB2_open_init(tcon, &rqst[0], &oplock, &oparms, &utf16_path);
+       if (rc)
+               goto oshr_exit;
+       smb2_set_next_command(tcon, &rqst[0]);
+
+       memset(&qi_iov, 0, sizeof(qi_iov));
+       rqst[1].rq_iov = qi_iov;
+       rqst[1].rq_nvec = 1;
+
+       rc = SMB2_query_info_init(tcon, &rqst[1], COMPOUND_FID,
+                                 COMPOUND_FID, FILE_ALL_INFORMATION,
+                                 SMB2_O_INFO_FILE, 0,
+                                 sizeof(struct smb2_file_all_info) +
+                                 PATH_MAX * 2, 0, NULL);
+       if (rc)
+               goto oshr_exit;
+
+       smb2_set_related(&rqst[1]);
+
+       rc = compound_send_recv(xid, ses, flags, 2, rqst,
+                               resp_buftype, rsp_iov);
+       if (rc)
+               goto oshr_exit;
+
+       o_rsp = (struct smb2_create_rsp *)rsp_iov[0].iov_base;
+       oparms.fid->persistent_fid = o_rsp->PersistentFileId;
+       oparms.fid->volatile_fid = o_rsp->VolatileFileId;
+#ifdef CONFIG_CIFS_DEBUG2
+       oparms.fid->mid = le64_to_cpu(o_rsp->sync_hdr.MessageId);
+#endif /* CIFS_DEBUG2 */
+
+       if (o_rsp->OplockLevel == SMB2_OPLOCK_LEVEL_LEASE)
+               oplock = smb2_parse_lease_state(server, o_rsp,
+                                               &oparms.fid->epoch,
+                                               oparms.fid->lease_key);
+       else
+               goto oshr_exit;
+
+
+       memcpy(tcon->crfid.fid, pfid, sizeof(struct cifs_fid));
+       tcon->crfid.tcon = tcon;
+       tcon->crfid.is_valid = true;
+       kref_init(&tcon->crfid.refcount);
+       kref_get(&tcon->crfid.refcount);
+
+
+       qi_rsp = (struct smb2_query_info_rsp *)rsp_iov[1].iov_base;
+       if (le32_to_cpu(qi_rsp->OutputBufferLength) < sizeof(struct smb2_file_all_info))
+               goto oshr_exit;
+       rc = smb2_validate_and_copy_iov(
+                               le16_to_cpu(qi_rsp->OutputBufferOffset),
+                               sizeof(struct smb2_file_all_info),
+                               &rsp_iov[1], sizeof(struct smb2_file_all_info),
+                               (char *)&tcon->crfid.file_all_info);
+       if (rc)
+               goto oshr_exit;
+       tcon->crfid.file_all_info_is_valid = 1;
+
+ oshr_exit:
        mutex_unlock(&tcon->crfid.fid_mutex);
+       SMB2_open_free(&rqst[0]);
+       SMB2_query_info_free(&rqst[1]);
+       free_rsp_buf(resp_buftype[0], rsp_iov[0].iov_base);
+       free_rsp_buf(resp_buftype[1], rsp_iov[1].iov_base);
        return rc;
 }
 
@@ -1253,7 +1330,8 @@ smb2_ioctl_query_info(const unsigned int xid,
        struct smb_query_info __user *pqi;
        int rc = 0;
        int flags = 0;
-       struct smb2_query_info_rsp *rsp = NULL;
+       struct smb2_query_info_rsp *qi_rsp = NULL;
+       struct smb2_ioctl_rsp *io_rsp = NULL;
        void *buffer = NULL;
        struct smb_rqst rqst[3];
        int resp_buftype[3];
@@ -1263,6 +1341,7 @@ smb2_ioctl_query_info(const unsigned int xid,
        u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
        struct cifs_fid fid;
        struct kvec qi_iov[1];
+       struct kvec io_iov[SMB2_IOCTL_IOV_SIZE];
        struct kvec close_iov[1];
 
        memset(rqst, 0, sizeof(rqst));
@@ -1313,15 +1392,35 @@ smb2_ioctl_query_info(const unsigned int xid,
        smb2_set_next_command(tcon, &rqst[0]);
 
        /* Query */
-       memset(&qi_iov, 0, sizeof(qi_iov));
-       rqst[1].rq_iov = qi_iov;
-       rqst[1].rq_nvec = 1;
-
-       rc = SMB2_query_info_init(tcon, &rqst[1], COMPOUND_FID, COMPOUND_FID,
-                                 qi.file_info_class, qi.info_type,
-                                 qi.additional_information,
+       if (qi.flags & PASSTHRU_FSCTL) {
+               /* Can eventually relax perm check since server enforces too */
+               if (!capable(CAP_SYS_ADMIN))
+                       rc = -EPERM;
+               else  {
+                       memset(&io_iov, 0, sizeof(io_iov));
+                       rqst[1].rq_iov = io_iov;
+                       rqst[1].rq_nvec = SMB2_IOCTL_IOV_SIZE;
+
+                       rc = SMB2_ioctl_init(tcon, &rqst[1],
+                                            COMPOUND_FID, COMPOUND_FID,
+                                            qi.info_type, true, NULL,
+                                            0);
+               }
+       } else if (qi.flags == PASSTHRU_QUERY_INFO) {
+               memset(&qi_iov, 0, sizeof(qi_iov));
+               rqst[1].rq_iov = qi_iov;
+               rqst[1].rq_nvec = 1;
+
+               rc = SMB2_query_info_init(tcon, &rqst[1], COMPOUND_FID,
+                                 COMPOUND_FID, qi.file_info_class,
+                                 qi.info_type, qi.additional_information,
                                  qi.input_buffer_length,
                                  qi.output_buffer_length, buffer);
+       } else { /* unknown flags */
+               cifs_dbg(VFS, "invalid passthru query flags: 0x%x\n", qi.flags);
+               rc = -EINVAL;
+       }
+
        if (rc)
                goto iqinf_exit;
        smb2_set_next_command(tcon, &rqst[1]);
@@ -1341,24 +1440,44 @@ smb2_ioctl_query_info(const unsigned int xid,
                                resp_buftype, rsp_iov);
        if (rc)
                goto iqinf_exit;
-       pqi = (struct smb_query_info __user *)arg;
-       rsp = (struct smb2_query_info_rsp *)rsp_iov[1].iov_base;
-       if (le32_to_cpu(rsp->OutputBufferLength) < qi.input_buffer_length)
-               qi.input_buffer_length = le32_to_cpu(rsp->OutputBufferLength);
-       if (copy_to_user(&pqi->input_buffer_length, &qi.input_buffer_length,
-                        sizeof(qi.input_buffer_length))) {
-               rc = -EFAULT;
-               goto iqinf_exit;
-       }
-       if (copy_to_user(pqi + 1, rsp->Buffer, qi.input_buffer_length)) {
-               rc = -EFAULT;
-               goto iqinf_exit;
+       if (qi.flags & PASSTHRU_FSCTL) {
+               pqi = (struct smb_query_info __user *)arg;
+               io_rsp = (struct smb2_ioctl_rsp *)rsp_iov[1].iov_base;
+               if (le32_to_cpu(io_rsp->OutputCount) < qi.input_buffer_length)
+                       qi.input_buffer_length = le32_to_cpu(io_rsp->OutputCount);
+               if (copy_to_user(&pqi->input_buffer_length, &qi.input_buffer_length,
+                                sizeof(qi.input_buffer_length))) {
+                       rc = -EFAULT;
+                       goto iqinf_exit;
+               }
+               if (copy_to_user(pqi + 1, &io_rsp[1], qi.input_buffer_length)) {
+                       rc = -EFAULT;
+                       goto iqinf_exit;
+               }
+       } else {
+               pqi = (struct smb_query_info __user *)arg;
+               qi_rsp = (struct smb2_query_info_rsp *)rsp_iov[1].iov_base;
+               if (le32_to_cpu(qi_rsp->OutputBufferLength) < qi.input_buffer_length)
+                       qi.input_buffer_length = le32_to_cpu(qi_rsp->OutputBufferLength);
+               if (copy_to_user(&pqi->input_buffer_length, &qi.input_buffer_length,
+                                sizeof(qi.input_buffer_length))) {
+                       rc = -EFAULT;
+                       goto iqinf_exit;
+               }
+               if (copy_to_user(pqi + 1, qi_rsp->Buffer, qi.input_buffer_length)) {
+                       rc = -EFAULT;
+                       goto iqinf_exit;
+               }
        }
 
  iqinf_exit:
        kfree(buffer);
        SMB2_open_free(&rqst[0]);
-       SMB2_query_info_free(&rqst[1]);
+       if (qi.flags & PASSTHRU_FSCTL)
+               SMB2_ioctl_free(&rqst[1]);
+       else
+               SMB2_query_info_free(&rqst[1]);
+
        SMB2_close_free(&rqst[2]);
        free_rsp_buf(resp_buftype[0], rsp_iov[0].iov_base);
        free_rsp_buf(resp_buftype[1], rsp_iov[1].iov_base);
@@ -2472,22 +2591,38 @@ get_smb2_acl(struct cifs_sb_info *cifs_sb,
 static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
                            loff_t offset, loff_t len, bool keep_size)
 {
+       struct cifs_ses *ses = tcon->ses;
        struct inode *inode;
        struct cifsInodeInfo *cifsi;
        struct cifsFileInfo *cfile = file->private_data;
        struct file_zero_data_information fsctl_buf;
+       struct smb_rqst rqst[2];
+       int resp_buftype[2];
+       struct kvec rsp_iov[2];
+       struct kvec io_iov[SMB2_IOCTL_IOV_SIZE];
+       struct kvec si_iov[1];
+       unsigned int size[1];
+       void *data[1];
        long rc;
        unsigned int xid;
+       int num = 0, flags = 0;
+       __le64 eof;
 
        xid = get_xid();
 
        inode = d_inode(cfile->dentry);
        cifsi = CIFS_I(inode);
 
+        trace_smb3_zero_enter(xid, cfile->fid.persistent_fid, tcon->tid,
+                             ses->Suid, offset, len);
+
+
        /* if file not oplocked can't be sure whether asking to extend size */
        if (!CIFS_CACHE_READ(cifsi))
                if (keep_size == false) {
                        rc = -EOPNOTSUPP;
+                       trace_smb3_zero_err(xid, cfile->fid.persistent_fid,
+                               tcon->tid, ses->Suid, offset, len, rc);
                        free_xid(xid);
                        return rc;
                }
@@ -2498,33 +2633,73 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
         */
        if (!(cifsi->cifsAttrs & FILE_ATTRIBUTE_SPARSE_FILE)) {
                rc = -EOPNOTSUPP;
+               trace_smb3_zero_err(xid, cfile->fid.persistent_fid, tcon->tid,
+                             ses->Suid, offset, len, rc);
                free_xid(xid);
                return rc;
        }
 
-       /*
-        * need to make sure we are not asked to extend the file since the SMB3
-        * fsctl does not change the file size. In the future we could change
-        * this to zero the first part of the range then set the file size
-        * which for a non sparse file would zero the newly extended range
-        */
-       if (keep_size == false)
-               if (i_size_read(inode) < offset + len) {
-                       rc = -EOPNOTSUPP;
-                       free_xid(xid);
-                       return rc;
-               }
-
        cifs_dbg(FYI, "offset %lld len %lld", offset, len);
 
        fsctl_buf.FileOffset = cpu_to_le64(offset);
        fsctl_buf.BeyondFinalZero = cpu_to_le64(offset + len);
 
-       rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
-                       cfile->fid.volatile_fid, FSCTL_SET_ZERO_DATA,
-                       true /* is_fctl */, (char *)&fsctl_buf,
-                       sizeof(struct file_zero_data_information), NULL, NULL);
+       if (smb3_encryption_required(tcon))
+               flags |= CIFS_TRANSFORM_REQ;
+
+       memset(rqst, 0, sizeof(rqst));
+       resp_buftype[0] = resp_buftype[1] = CIFS_NO_BUFFER;
+       memset(rsp_iov, 0, sizeof(rsp_iov));
+
+
+       memset(&io_iov, 0, sizeof(io_iov));
+       rqst[num].rq_iov = io_iov;
+       rqst[num].rq_nvec = SMB2_IOCTL_IOV_SIZE;
+       rc = SMB2_ioctl_init(tcon, &rqst[num++], cfile->fid.persistent_fid,
+                            cfile->fid.volatile_fid, FSCTL_SET_ZERO_DATA,
+                            true /* is_fctl */, (char *)&fsctl_buf,
+                            sizeof(struct file_zero_data_information));
+       if (rc)
+               goto zero_range_exit;
+
+       /*
+        * do we also need to change the size of the file?
+        */
+       if (keep_size == false && i_size_read(inode) < offset + len) {
+               smb2_set_next_command(tcon, &rqst[0]);
+
+               memset(&si_iov, 0, sizeof(si_iov));
+               rqst[num].rq_iov = si_iov;
+               rqst[num].rq_nvec = 1;
+
+               eof = cpu_to_le64(offset + len);
+               size[0] = 8; /* sizeof __le64 */
+               data[0] = &eof;
+
+               rc = SMB2_set_info_init(tcon, &rqst[num++],
+                                       cfile->fid.persistent_fid,
+                                       cfile->fid.persistent_fid,
+                                       current->tgid,
+                                       FILE_END_OF_FILE_INFORMATION,
+                                       SMB2_O_INFO_FILE, 0, data, size);
+               smb2_set_related(&rqst[1]);
+       }
+
+       rc = compound_send_recv(xid, ses, flags, num, rqst,
+                               resp_buftype, rsp_iov);
+
+ zero_range_exit:
+       SMB2_ioctl_free(&rqst[0]);
+       SMB2_set_info_free(&rqst[1]);
+       free_rsp_buf(resp_buftype[0], rsp_iov[0].iov_base);
+       free_rsp_buf(resp_buftype[1], rsp_iov[1].iov_base);
        free_xid(xid);
+       if (rc)
+               trace_smb3_zero_err(xid, cfile->fid.persistent_fid, tcon->tid,
+                             ses->Suid, offset, len, rc);
+       else
+               trace_smb3_zero_done(xid, cfile->fid.persistent_fid, tcon->tid,
+                             ses->Suid, offset, len);
        return rc;
 }
 
@@ -2573,15 +2748,20 @@ static long smb3_simple_falloc(struct file *file, struct cifs_tcon *tcon,
        struct cifsFileInfo *cfile = file->private_data;
        long rc = -EOPNOTSUPP;
        unsigned int xid;
+       __le64 eof;
 
        xid = get_xid();
 
        inode = d_inode(cfile->dentry);
        cifsi = CIFS_I(inode);
 
+       trace_smb3_falloc_enter(xid, cfile->fid.persistent_fid, tcon->tid,
+                               tcon->ses->Suid, off, len);
        /* if file not oplocked can't be sure whether asking to extend size */
        if (!CIFS_CACHE_READ(cifsi))
                if (keep_size == false) {
+                       trace_smb3_falloc_err(xid, cfile->fid.persistent_fid,
+                               tcon->tid, tcon->ses->Suid, off, len, rc);
                        free_xid(xid);
                        return rc;
                }
@@ -2601,6 +2781,12 @@ static long smb3_simple_falloc(struct file *file, struct cifs_tcon *tcon,
                /* BB: in future add else clause to extend file */
                else
                        rc = -EOPNOTSUPP;
+               if (rc)
+                       trace_smb3_falloc_err(xid, cfile->fid.persistent_fid,
+                               tcon->tid, tcon->ses->Suid, off, len, rc);
+               else
+                       trace_smb3_falloc_done(xid, cfile->fid.persistent_fid,
+                               tcon->tid, tcon->ses->Suid, off, len);
                free_xid(xid);
                return rc;
        }
@@ -2616,14 +2802,31 @@ static long smb3_simple_falloc(struct file *file, struct cifs_tcon *tcon,
                 */
                if ((off > 8192) || (off + len + 8192 < i_size_read(inode))) {
                        rc = -EOPNOTSUPP;
+                       trace_smb3_falloc_err(xid, cfile->fid.persistent_fid,
+                               tcon->tid, tcon->ses->Suid, off, len, rc);
                        free_xid(xid);
                        return rc;
                }
 
-               rc = smb2_set_sparse(xid, tcon, cfile, inode, false);
+               smb2_set_sparse(xid, tcon, cfile, inode, false);
+               rc = 0;
+       } else {
+               smb2_set_sparse(xid, tcon, cfile, inode, false);
+               rc = 0;
+               if (i_size_read(inode) < off + len) {
+                       eof = cpu_to_le64(off + len);
+                       rc = SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid,
+                                         cfile->fid.volatile_fid, cfile->pid,
+                                         &eof);
+               }
        }
-       /* BB: else ... in future add code to extend file and set sparse */
 
+       if (rc)
+               trace_smb3_falloc_err(xid, cfile->fid.persistent_fid, tcon->tid,
+                               tcon->ses->Suid, off, len, rc);
+       else
+               trace_smb3_falloc_done(xid, cfile->fid.persistent_fid, tcon->tid,
+                               tcon->ses->Suid, off, len);
 
        free_xid(xid);
        return rc;
@@ -3604,6 +3807,104 @@ smb2_next_header(char *buf)
        return le32_to_cpu(hdr->NextCommand);
 }
 
+static int
+smb2_make_node(unsigned int xid, struct inode *inode,
+              struct dentry *dentry, struct cifs_tcon *tcon,
+              char *full_path, umode_t mode, dev_t dev)
+{
+       struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+       int rc = -EPERM;
+       int create_options = CREATE_NOT_DIR | CREATE_OPTION_SPECIAL;
+       FILE_ALL_INFO *buf = NULL;
+       struct cifs_io_parms io_parms;
+       __u32 oplock = 0;
+       struct cifs_fid fid;
+       struct cifs_open_parms oparms;
+       unsigned int bytes_written;
+       struct win_dev *pdev;
+       struct kvec iov[2];
+
+       /*
+        * Check if mounted with mount parm 'sfu' mount parm.
+        * SFU emulation should work with all servers, but only
+        * supports block and char device (no socket & fifo),
+        * and was used by default in earlier versions of Windows
+        */
+       if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL))
+               goto out;
+
+       /*
+        * TODO: Add ability to create instead via reparse point. Windows (e.g.
+        * their current NFS server) uses this approach to expose special files
+        * over SMB2/SMB3 and Samba will do this with SMB3.1.1 POSIX Extensions
+        */
+
+       if (!S_ISCHR(mode) && !S_ISBLK(mode))
+               goto out;
+
+       cifs_dbg(FYI, "sfu compat create special file\n");
+
+       buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
+       if (buf == NULL) {
+               rc = -ENOMEM;
+               goto out;
+       }
+
+       if (backup_cred(cifs_sb))
+               create_options |= CREATE_OPEN_BACKUP_INTENT;
+
+       oparms.tcon = tcon;
+       oparms.cifs_sb = cifs_sb;
+       oparms.desired_access = GENERIC_WRITE;
+       oparms.create_options = create_options;
+       oparms.disposition = FILE_CREATE;
+       oparms.path = full_path;
+       oparms.fid = &fid;
+       oparms.reconnect = false;
+
+       if (tcon->ses->server->oplocks)
+               oplock = REQ_OPLOCK;
+       else
+               oplock = 0;
+       rc = tcon->ses->server->ops->open(xid, &oparms, &oplock, buf);
+       if (rc)
+               goto out;
+
+       /*
+        * BB Do not bother to decode buf since no local inode yet to put
+        * timestamps in, but we can reuse it safely.
+        */
+
+       pdev = (struct win_dev *)buf;
+       io_parms.pid = current->tgid;
+       io_parms.tcon = tcon;
+       io_parms.offset = 0;
+       io_parms.length = sizeof(struct win_dev);
+       iov[1].iov_base = buf;
+       iov[1].iov_len = sizeof(struct win_dev);
+       if (S_ISCHR(mode)) {
+               memcpy(pdev->type, "IntxCHR", 8);
+               pdev->major = cpu_to_le64(MAJOR(dev));
+               pdev->minor = cpu_to_le64(MINOR(dev));
+               rc = tcon->ses->server->ops->sync_write(xid, &fid, &io_parms,
+                                                       &bytes_written, iov, 1);
+       } else if (S_ISBLK(mode)) {
+               memcpy(pdev->type, "IntxBLK", 8);
+               pdev->major = cpu_to_le64(MAJOR(dev));
+               pdev->minor = cpu_to_le64(MINOR(dev));
+               rc = tcon->ses->server->ops->sync_write(xid, &fid, &io_parms,
+                                                       &bytes_written, iov, 1);
+       }
+       tcon->ses->server->ops->close(xid, tcon, &fid);
+       d_drop(dentry);
+
+       /* FIXME: add code here to set EAs */
+out:
+       kfree(buf);
+       return rc;
+}
+
+
 struct smb_version_operations smb20_operations = {
        .compare_fids = smb2_compare_fids,
        .setup_request = smb2_setup_request,
@@ -3698,6 +3999,7 @@ struct smb_version_operations smb20_operations = {
 #endif /* CIFS_ACL */
        .next_header = smb2_next_header,
        .ioctl_query_info = smb2_ioctl_query_info,
+       .make_node = smb2_make_node,
 };
 
 struct smb_version_operations smb21_operations = {
@@ -3796,6 +4098,7 @@ struct smb_version_operations smb21_operations = {
 #endif /* CIFS_ACL */
        .next_header = smb2_next_header,
        .ioctl_query_info = smb2_ioctl_query_info,
+       .make_node = smb2_make_node,
 };
 
 struct smb_version_operations smb30_operations = {
@@ -3903,6 +4206,7 @@ struct smb_version_operations smb30_operations = {
 #endif /* CIFS_ACL */
        .next_header = smb2_next_header,
        .ioctl_query_info = smb2_ioctl_query_info,
+       .make_node = smb2_make_node,
 };
 
 struct smb_version_operations smb311_operations = {
@@ -4011,6 +4315,7 @@ struct smb_version_operations smb311_operations = {
 #endif /* CIFS_ACL */
        .next_header = smb2_next_header,
        .ioctl_query_info = smb2_ioctl_query_info,
+       .make_node = smb2_make_node,
 };
 
 struct smb_version_values smb20_values = {
index 60fbe306f60431b40554d916d1ee8ba7351ebe27..c399e09b76e62a7c733857075ff348b039e49a58 100644 (file)
@@ -1797,9 +1797,10 @@ create_reconnect_durable_buf(struct cifs_fid *fid)
        return buf;
 }
 
-static __u8
-parse_lease_state(struct TCP_Server_Info *server, struct smb2_create_rsp *rsp,
-                 unsigned int *epoch, char *lease_key)
+__u8
+smb2_parse_lease_state(struct TCP_Server_Info *server,
+                      struct smb2_create_rsp *rsp,
+                      unsigned int *epoch, char *lease_key)
 {
        char *data_offset;
        struct create_context *cc;
@@ -2456,8 +2457,9 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
        }
 
        if (rsp->OplockLevel == SMB2_OPLOCK_LEVEL_LEASE)
-               *oplock = parse_lease_state(server, rsp, &oparms->fid->epoch,
-                                           oparms->fid->lease_key);
+               *oplock = smb2_parse_lease_state(server, rsp,
+                                                &oparms->fid->epoch,
+                                                oparms->fid->lease_key);
        else
                *oplock = rsp->OplockLevel;
 creat_exit:
@@ -2466,65 +2468,46 @@ creat_exit:
        return rc;
 }
 
-/*
- *     SMB2 IOCTL is used for both IOCTLs and FSCTLs
- */
 int
-SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
-          u64 volatile_fid, u32 opcode, bool is_fsctl,
-          char *in_data, u32 indatalen,
-          char **out_data, u32 *plen /* returned data len */)
+SMB2_ioctl_init(struct cifs_tcon *tcon, struct smb_rqst *rqst,
+               u64 persistent_fid, u64 volatile_fid, u32 opcode,
+               bool is_fsctl, char *in_data, u32 indatalen)
 {
-       struct smb_rqst rqst;
        struct smb2_ioctl_req *req;
-       struct smb2_ioctl_rsp *rsp;
-       struct cifs_ses *ses;
-       struct kvec iov[2];
-       struct kvec rsp_iov;
-       int resp_buftype;
-       int n_iov;
-       int rc = 0;
-       int flags = 0;
+       struct kvec *iov = rqst->rq_iov;
        unsigned int total_len;
-
-       cifs_dbg(FYI, "SMB2 IOCTL\n");
-
-       if (out_data != NULL)
-               *out_data = NULL;
-
-       /* zero out returned data len, in case of error */
-       if (plen)
-               *plen = 0;
-
-       if (tcon)
-               ses = tcon->ses;
-       else
-               return -EIO;
-
-       if (!ses || !(ses->server))
-               return -EIO;
+       int rc;
 
        rc = smb2_plain_req_init(SMB2_IOCTL, tcon, (void **) &req, &total_len);
        if (rc)
                return rc;
 
-       if (smb3_encryption_required(tcon))
-               flags |= CIFS_TRANSFORM_REQ;
-
        req->CtlCode = cpu_to_le32(opcode);
        req->PersistentFileId = persistent_fid;
        req->VolatileFileId = volatile_fid;
 
+       iov[0].iov_base = (char *)req;
+       /*
+        * If no input data, the size of ioctl struct in
+        * protocol spec still includes a 1 byte data buffer,
+        * but if input data passed to ioctl, we do not
+        * want to double count this, so we do not send
+        * the dummy one byte of data in iovec[0] if sending
+        * input data (in iovec[1]).
+        */
        if (indatalen) {
                req->InputCount = cpu_to_le32(indatalen);
                /* do not set InputOffset if no input data */
                req->InputOffset =
                       cpu_to_le32(offsetof(struct smb2_ioctl_req, Buffer));
+               rqst->rq_nvec = 2;
+               iov[0].iov_len = total_len - 1;
                iov[1].iov_base = in_data;
                iov[1].iov_len = indatalen;
-               n_iov = 2;
-       } else
-               n_iov = 1;
+       } else {
+               rqst->rq_nvec = 1;
+               iov[0].iov_len = total_len;
+       }
 
        req->OutputOffset = 0;
        req->OutputCount = 0; /* MBZ */
@@ -2546,33 +2529,70 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
        else
                req->Flags = 0;
 
-       iov[0].iov_base = (char *)req;
-
-       /*
-        * If no input data, the size of ioctl struct in
-        * protocol spec still includes a 1 byte data buffer,
-        * but if input data passed to ioctl, we do not
-        * want to double count this, so we do not send
-        * the dummy one byte of data in iovec[0] if sending
-        * input data (in iovec[1]).
-        */
-
-       if (indatalen) {
-               iov[0].iov_len = total_len - 1;
-       } else
-               iov[0].iov_len = total_len;
-
        /* validate negotiate request must be signed - see MS-SMB2 3.2.5.5 */
        if (opcode == FSCTL_VALIDATE_NEGOTIATE_INFO)
                req->sync_hdr.Flags |= SMB2_FLAGS_SIGNED;
 
+       return 0;
+}
+
+void
+SMB2_ioctl_free(struct smb_rqst *rqst)
+{
+       if (rqst && rqst->rq_iov)
+               cifs_small_buf_release(rqst->rq_iov[0].iov_base); /* request */
+}
+
+/*
+ *     SMB2 IOCTL is used for both IOCTLs and FSCTLs
+ */
+int
+SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
+          u64 volatile_fid, u32 opcode, bool is_fsctl,
+          char *in_data, u32 indatalen,
+          char **out_data, u32 *plen /* returned data len */)
+{
+       struct smb_rqst rqst;
+       struct smb2_ioctl_rsp *rsp = NULL;
+       struct cifs_ses *ses;
+       struct kvec iov[SMB2_IOCTL_IOV_SIZE];
+       struct kvec rsp_iov = {NULL, 0};
+       int resp_buftype = CIFS_NO_BUFFER;
+       int rc = 0;
+       int flags = 0;
+
+       cifs_dbg(FYI, "SMB2 IOCTL\n");
+
+       if (out_data != NULL)
+               *out_data = NULL;
+
+       /* zero out returned data len, in case of error */
+       if (plen)
+               *plen = 0;
+
+       if (tcon)
+               ses = tcon->ses;
+       else
+               return -EIO;
+
+       if (!ses || !(ses->server))
+               return -EIO;
+
+       if (smb3_encryption_required(tcon))
+               flags |= CIFS_TRANSFORM_REQ;
+
        memset(&rqst, 0, sizeof(struct smb_rqst));
+       memset(&iov, 0, sizeof(iov));
        rqst.rq_iov = iov;
-       rqst.rq_nvec = n_iov;
+       rqst.rq_nvec = SMB2_IOCTL_IOV_SIZE;
+
+       rc = SMB2_ioctl_init(tcon, &rqst, persistent_fid, volatile_fid,
+                            opcode, is_fsctl, in_data, indatalen);
+       if (rc)
+               goto ioctl_exit;
 
        rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags,
                            &rsp_iov);
-       cifs_small_buf_release(req);
        rsp = (struct smb2_ioctl_rsp *)rsp_iov.iov_base;
 
        if (rc != 0)
@@ -2622,6 +2642,7 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
        }
 
 ioctl_exit:
+       SMB2_ioctl_free(&rqst);
        free_rsp_buf(resp_buftype, rsp);
        return rc;
 }
index 0bd4d4802701320e73bc795730002b729688b732..ee8977688e21f595bd6b8fb585e43ace39a2b162 100644 (file)
@@ -959,6 +959,13 @@ struct duplicate_extents_to_file {
        __le64 ByteCount;  /* Bytes to be copied */
 } __packed;
 
+/*
+ * Maximum number of iovs we need for an ioctl request.
+ * [0] : struct smb2_ioctl_req
+ * [1] : in_data
+ */
+#define SMB2_IOCTL_IOV_SIZE 2
+
 struct smb2_ioctl_req {
        struct smb2_sync_hdr sync_hdr;
        __le16 StructureSize;   /* Must be 57 */
index 87733b27a65fef06c137e0782a04ab1151064ca6..3c32d0cfea69b0c7191336e5b38247de578ed63b 100644 (file)
@@ -144,6 +144,10 @@ extern int SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon,
                     u64 persistent_fid, u64 volatile_fid, u32 opcode,
                     bool is_fsctl, char *in_data, u32 indatalen,
                     char **out_data, u32 *plen /* returned data len */);
+extern int SMB2_ioctl_init(struct cifs_tcon *tcon, struct smb_rqst *rqst,
+                          u64 persistent_fid, u64 volatile_fid, u32 opcode,
+                          bool is_fsctl, char *in_data, u32 indatalen);
+extern void SMB2_ioctl_free(struct smb_rqst *rqst);
 extern int SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
                      u64 persistent_file_id, u64 volatile_file_id);
 extern int SMB2_close_flags(const unsigned int xid, struct cifs_tcon *tcon,
@@ -223,6 +227,9 @@ extern int smb3_validate_negotiate(const unsigned int, struct cifs_tcon *);
 
 extern enum securityEnum smb2_select_sectype(struct TCP_Server_Info *,
                                        enum securityEnum);
+extern __u8 smb2_parse_lease_state(struct TCP_Server_Info *server,
+                                  struct smb2_create_rsp *rsp,
+                                  unsigned int *epoch, char *lease_key);
 extern int smb3_encryption_required(const struct cifs_tcon *tcon);
 extern int smb2_validate_iov(unsigned int offset, unsigned int buffer_length,
                             struct kvec *iov, unsigned int min_buf_size);
index 3d5f62150de42d0f6f2328bab1e6733f3b360690..447c0c6e4c640f818e3da2988befb847e3638180 100644 (file)
@@ -30,9 +30,9 @@
  */
 
 #define STATUS_SEVERITY_SUCCESS __constant_cpu_to_le32(0x0000)
-#define STATUS_SEVERITY_INFORMATIONAL __constanst_cpu_to_le32(0x0001)
-#define STATUS_SEVERITY_WARNING __constanst_cpu_to_le32(0x0002)
-#define STATUS_SEVERITY_ERROR __constanst_cpu_to_le32(0x0003)
+#define STATUS_SEVERITY_INFORMATIONAL cpu_to_le32(0x0001)
+#define STATUS_SEVERITY_WARNING cpu_to_le32(0x0002)
+#define STATUS_SEVERITY_ERROR cpu_to_le32(0x0003)
 
 struct ntstatus {
        /* Facility is the high 12 bits of the following field */
index d8b049afa6062f21bde5f2de7ebf40347da5081d..fa226de48ef38c0c0696aca767c69cc2ac8dbdd8 100644 (file)
@@ -59,6 +59,8 @@ DEFINE_EVENT(smb3_rw_err_class, smb3_##name,    \
 DEFINE_SMB3_RW_ERR_EVENT(write_err);
 DEFINE_SMB3_RW_ERR_EVENT(read_err);
 DEFINE_SMB3_RW_ERR_EVENT(query_dir_err);
+DEFINE_SMB3_RW_ERR_EVENT(zero_err);
+DEFINE_SMB3_RW_ERR_EVENT(falloc_err);
 
 
 /* For logging successful read or write */
@@ -104,9 +106,13 @@ DEFINE_EVENT(smb3_rw_done_class, smb3_##name,   \
 DEFINE_SMB3_RW_DONE_EVENT(write_enter);
 DEFINE_SMB3_RW_DONE_EVENT(read_enter);
 DEFINE_SMB3_RW_DONE_EVENT(query_dir_enter);
+DEFINE_SMB3_RW_DONE_EVENT(zero_enter);
+DEFINE_SMB3_RW_DONE_EVENT(falloc_enter);
 DEFINE_SMB3_RW_DONE_EVENT(write_done);
 DEFINE_SMB3_RW_DONE_EVENT(read_done);
 DEFINE_SMB3_RW_DONE_EVENT(query_dir_done);
+DEFINE_SMB3_RW_DONE_EVENT(zero_done);
+DEFINE_SMB3_RW_DONE_EVENT(falloc_done);
 
 /*
  * For handle based calls other than read and write, and get/set info
@@ -242,6 +248,123 @@ DEFINE_SMB3_INF_ERR_EVENT(query_info_err);
 DEFINE_SMB3_INF_ERR_EVENT(set_info_err);
 DEFINE_SMB3_INF_ERR_EVENT(fsctl_err);
 
+DECLARE_EVENT_CLASS(smb3_inf_compound_enter_class,
+       TP_PROTO(unsigned int xid,
+               __u32   tid,
+               __u64   sesid,
+               const char *full_path),
+       TP_ARGS(xid, tid, sesid, full_path),
+       TP_STRUCT__entry(
+               __field(unsigned int, xid)
+               __field(__u32, tid)
+               __field(__u64, sesid)
+               __string(path, full_path)
+       ),
+       TP_fast_assign(
+               __entry->xid = xid;
+               __entry->tid = tid;
+               __entry->sesid = sesid;
+               __assign_str(path, full_path);
+       ),
+       TP_printk("xid=%u sid=0x%llx tid=0x%x path=%s",
+               __entry->xid, __entry->sesid, __entry->tid,
+               __get_str(path))
+)
+
+#define DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(name)     \
+DEFINE_EVENT(smb3_inf_compound_enter_class, smb3_##name,    \
+       TP_PROTO(unsigned int xid,              \
+               __u32   tid,                    \
+               __u64   sesid,                  \
+               const char *full_path),         \
+       TP_ARGS(xid, tid, sesid, full_path))
+
+DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(query_info_compound_enter);
+DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(hardlink_enter);
+DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(rename_enter);
+DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(rmdir_enter);
+DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(set_eof_enter);
+DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(set_info_compound_enter);
+DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(delete_enter);
+DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(mkdir_enter);
+
+
+DECLARE_EVENT_CLASS(smb3_inf_compound_done_class,
+       TP_PROTO(unsigned int xid,
+               __u32   tid,
+               __u64   sesid),
+       TP_ARGS(xid, tid, sesid),
+       TP_STRUCT__entry(
+               __field(unsigned int, xid)
+               __field(__u32, tid)
+               __field(__u64, sesid)
+       ),
+       TP_fast_assign(
+               __entry->xid = xid;
+               __entry->tid = tid;
+               __entry->sesid = sesid;
+       ),
+       TP_printk("xid=%u sid=0x%llx tid=0x%x",
+               __entry->xid, __entry->sesid, __entry->tid)
+)
+
+#define DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(name)     \
+DEFINE_EVENT(smb3_inf_compound_done_class, smb3_##name,    \
+       TP_PROTO(unsigned int xid,              \
+               __u32   tid,                    \
+               __u64   sesid),                 \
+       TP_ARGS(xid, tid, sesid))
+
+DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(query_info_compound_done);
+DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(hardlink_done);
+DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(rename_done);
+DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(rmdir_done);
+DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(set_eof_done);
+DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(set_info_compound_done);
+DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(delete_done);
+DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(mkdir_done);
+
+
+DECLARE_EVENT_CLASS(smb3_inf_compound_err_class,
+       TP_PROTO(unsigned int xid,
+               __u32   tid,
+               __u64   sesid,
+               int     rc),
+       TP_ARGS(xid, tid, sesid, rc),
+       TP_STRUCT__entry(
+               __field(unsigned int, xid)
+               __field(__u32, tid)
+               __field(__u64, sesid)
+               __field(int, rc)
+       ),
+       TP_fast_assign(
+               __entry->xid = xid;
+               __entry->tid = tid;
+               __entry->sesid = sesid;
+               __entry->rc = rc;
+       ),
+       TP_printk("xid=%u sid=0x%llx tid=0x%x rc=%d",
+               __entry->xid, __entry->sesid, __entry->tid,
+               __entry->rc)
+)
+
+#define DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(name)     \
+DEFINE_EVENT(smb3_inf_compound_err_class, smb3_##name,    \
+       TP_PROTO(unsigned int xid,              \
+               __u32   tid,                    \
+               __u64   sesid,                  \
+               int rc),                        \
+       TP_ARGS(xid, tid, sesid, rc))
+
+DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(query_info_compound_err);
+DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(hardlink_err);
+DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(rename_err);
+DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(rmdir_err);
+DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(set_eof_err);
+DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(set_info_compound_err);
+DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(mkdir_err);
+DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(delete_err);
+
 /*
  * For logging SMB3 Status code and Command for responses which return errors
  */
@@ -713,6 +836,7 @@ DEFINE_EVENT(smb3_credit_class, smb3_##name,  \
        TP_ARGS(currmid, hostname, credits))
 
 DEFINE_SMB3_CREDIT_EVENT(reconnect_with_invalid_credits);
+DEFINE_SMB3_CREDIT_EVENT(credit_timeout);
 
 #endif /* _CIFS_TRACE_H */
 
index 7ce8a585abd620a847c325e0c310ab5669ed3e0d..1de8e996e566fd0bcc5642dcaba7bd88fed191ad 100644 (file)
@@ -486,15 +486,31 @@ smb_send(struct TCP_Server_Info *server, struct smb_hdr *smb_buffer,
 }
 
 static int
-wait_for_free_credits(struct TCP_Server_Info *server, const int timeout,
-                     int *credits, unsigned int *instance)
+wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits,
+                     const int timeout, const int flags,
+                     unsigned int *instance)
 {
        int rc;
+       int *credits;
+       int optype;
+       long int t;
+
+       if (timeout < 0)
+               t = MAX_JIFFY_OFFSET;
+       else
+               t = msecs_to_jiffies(timeout);
+
+       optype = flags & CIFS_OP_MASK;
 
        *instance = 0;
 
+       credits = server->ops->get_credits_field(server, optype);
+       /* Since an echo is already inflight, no need to wait to send another */
+       if (*credits <= 0 && optype == CIFS_ECHO_OP)
+               return -EAGAIN;
+
        spin_lock(&server->req_lock);
-       if (timeout == CIFS_ASYNC_OP) {
+       if ((flags & CIFS_TIMEOUT_MASK) == CIFS_ASYNC_OP) {
                /* oplock breaks must not be held up */
                server->in_flight++;
                *credits -= 1;
@@ -504,14 +520,21 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int timeout,
        }
 
        while (1) {
-               if (*credits <= 0) {
+               if (*credits < num_credits) {
                        spin_unlock(&server->req_lock);
                        cifs_num_waiters_inc(server);
-                       rc = wait_event_killable(server->request_q,
-                                                has_credits(server, credits));
+                       rc = wait_event_killable_timeout(server->request_q,
+                               has_credits(server, credits, num_credits), t);
                        cifs_num_waiters_dec(server);
-                       if (rc)
-                               return rc;
+                       if (!rc) {
+                               trace_smb3_credit_timeout(server->CurrentMid,
+                                       server->hostname, num_credits);
+                               cifs_dbg(VFS, "wait timed out after %d ms\n",
+                                        timeout);
+                               return -ENOTSUPP;
+                       }
+                       if (rc == -ERESTARTSYS)
+                               return -ERESTARTSYS;
                        spin_lock(&server->req_lock);
                } else {
                        if (server->tcpStatus == CifsExiting) {
@@ -519,15 +542,53 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int timeout,
                                return -ENOENT;
                        }
 
+                       /*
+                        * For normal commands, reserve the last MAX_COMPOUND
+                        * credits to compound requests.
+                        * Otherwise these compounds could be permanently
+                        * starved for credits by single-credit requests.
+                        *
+                        * To prevent spinning CPU, block this thread until
+                        * there are >MAX_COMPOUND credits available.
+                        * But only do this is we already have a lot of
+                        * credits in flight to avoid triggering this check
+                        * for servers that are slow to hand out credits on
+                        * new sessions.
+                        */
+                       if (!optype && num_credits == 1 &&
+                           server->in_flight > 2 * MAX_COMPOUND &&
+                           *credits <= MAX_COMPOUND) {
+                               spin_unlock(&server->req_lock);
+                               cifs_num_waiters_inc(server);
+                               rc = wait_event_killable_timeout(
+                                       server->request_q,
+                                       has_credits(server, credits,
+                                                   MAX_COMPOUND + 1),
+                                       t);
+                               cifs_num_waiters_dec(server);
+                               if (!rc) {
+                                       trace_smb3_credit_timeout(
+                                               server->CurrentMid,
+                                               server->hostname, num_credits);
+                                       cifs_dbg(VFS, "wait timed out after %d ms\n",
+                                                timeout);
+                                       return -ENOTSUPP;
+                               }
+                               if (rc == -ERESTARTSYS)
+                                       return -ERESTARTSYS;
+                               spin_lock(&server->req_lock);
+                               continue;
+                       }
+
                        /*
                         * Can not count locking commands against total
                         * as they are allowed to block on server.
                         */
 
                        /* update # of requests on the wire to server */
-                       if (timeout != CIFS_BLOCKING_OP) {
-                               *credits -= 1;
-                               server->in_flight++;
+                       if ((flags & CIFS_TIMEOUT_MASK) != CIFS_BLOCKING_OP) {
+                               *credits -= num_credits;
+                               server->in_flight += num_credits;
                                *instance = server->reconnect_instance;
                        }
                        spin_unlock(&server->req_lock);
@@ -538,16 +599,36 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int timeout,
 }
 
 static int
-wait_for_free_request(struct TCP_Server_Info *server, const int timeout,
-                     const int optype, unsigned int *instance)
+wait_for_free_request(struct TCP_Server_Info *server, const int flags,
+                     unsigned int *instance)
 {
-       int *val;
+       return wait_for_free_credits(server, 1, -1, flags,
+                                    instance);
+}
 
-       val = server->ops->get_credits_field(server, optype);
-       /* Since an echo is already inflight, no need to wait to send another */
-       if (*val <= 0 && optype == CIFS_ECHO_OP)
-               return -EAGAIN;
-       return wait_for_free_credits(server, timeout, val, instance);
+static int
+wait_for_compound_request(struct TCP_Server_Info *server, int num,
+                         const int flags, unsigned int *instance)
+{
+       int *credits;
+
+       credits = server->ops->get_credits_field(server, flags & CIFS_OP_MASK);
+
+       spin_lock(&server->req_lock);
+       if (*credits < num) {
+               /*
+                * Return immediately if not too many requests in flight since
+                * we will likely be stuck on waiting for credits.
+                */
+               if (server->in_flight < num - *credits) {
+                       spin_unlock(&server->req_lock);
+                       return -ENOTSUPP;
+               }
+       }
+       spin_unlock(&server->req_lock);
+
+       return wait_for_free_credits(server, num, 60000, flags,
+                                    instance);
 }
 
 int
@@ -646,16 +727,16 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst,
                mid_handle_t *handle, void *cbdata, const int flags,
                const struct cifs_credits *exist_credits)
 {
-       int rc, timeout, optype;
+       int rc;
        struct mid_q_entry *mid;
        struct cifs_credits credits = { .value = 0, .instance = 0 };
        unsigned int instance;
+       int optype;
 
-       timeout = flags & CIFS_TIMEOUT_MASK;
        optype = flags & CIFS_OP_MASK;
 
        if ((flags & CIFS_HAS_CREDITS) == 0) {
-               rc = wait_for_free_request(server, timeout, optype, &instance);
+               rc = wait_for_free_request(server, flags, &instance);
                if (rc)
                        return rc;
                credits.value = 1;
@@ -871,18 +952,15 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
                   const int flags, const int num_rqst, struct smb_rqst *rqst,
                   int *resp_buf_type, struct kvec *resp_iov)
 {
-       int i, j, rc = 0;
-       int timeout, optype;
+       int i, j, optype, rc = 0;
        struct mid_q_entry *midQ[MAX_COMPOUND];
        bool cancelled_mid[MAX_COMPOUND] = {false};
        struct cifs_credits credits[MAX_COMPOUND] = {
                { .value = 0, .instance = 0 }
        };
        unsigned int instance;
-       unsigned int first_instance = 0;
        char *buf;
 
-       timeout = flags & CIFS_TIMEOUT_MASK;
        optype = flags & CIFS_OP_MASK;
 
        for (i = 0; i < num_rqst; i++)
@@ -896,81 +974,24 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
        if (ses->server->tcpStatus == CifsExiting)
                return -ENOENT;
 
-       spin_lock(&ses->server->req_lock);
-       if (ses->server->credits < num_rqst) {
-               /*
-                * Return immediately if not too many requests in flight since
-                * we will likely be stuck on waiting for credits.
-                */
-               if (ses->server->in_flight < num_rqst - ses->server->credits) {
-                       spin_unlock(&ses->server->req_lock);
-                       return -ENOTSUPP;
-               }
-       } else {
-               /* enough credits to send the whole compounded request */
-               ses->server->credits -= num_rqst;
-               ses->server->in_flight += num_rqst;
-               first_instance = ses->server->reconnect_instance;
-       }
-       spin_unlock(&ses->server->req_lock);
-
-       if (first_instance) {
-               cifs_dbg(FYI, "Acquired %d credits at once\n", num_rqst);
-               for (i = 0; i < num_rqst; i++) {
-                       credits[i].value = 1;
-                       credits[i].instance = first_instance;
-               }
-               goto setup_rqsts;
-       }
-
        /*
-        * There are not enough credits to send the whole compound request but
-        * there are requests in flight that may bring credits from the server.
+        * Wait for all the requests to become available.
         * This approach still leaves the possibility to be stuck waiting for
         * credits if the server doesn't grant credits to the outstanding
-        * requests. This should be fixed by returning immediately and letting
-        * a caller fallback to sequential commands instead of compounding.
-        * Ensure we obtain 1 credit per request in the compound chain.
+        * requests and if the client is completely idle, not generating any
+        * other requests.
+        * This can be handled by the eventual session reconnect.
         */
-       for (i = 0; i < num_rqst; i++) {
-               rc = wait_for_free_request(ses->server, timeout, optype,
-                                          &instance);
-
-               if (rc == 0) {
-                       credits[i].value = 1;
-                       credits[i].instance = instance;
-                       /*
-                        * All parts of the compound chain must get credits from
-                        * the same session, otherwise we may end up using more
-                        * credits than the server granted. If there were
-                        * reconnects in between, return -EAGAIN and let callers
-                        * handle it.
-                        */
-                       if (i == 0)
-                               first_instance = instance;
-                       else if (first_instance != instance) {
-                               i++;
-                               rc = -EAGAIN;
-                       }
-               }
+       rc = wait_for_compound_request(ses->server, num_rqst, flags,
+                                      &instance);
+       if (rc)
+               return rc;
 
-               if (rc) {
-                       /*
-                        * We haven't sent an SMB packet to the server yet but
-                        * we already obtained credits for i requests in the
-                        * compound chain - need to return those credits back
-                        * for future use. Note that we need to call add_credits
-                        * multiple times to match the way we obtained credits
-                        * in the first place and to account for in flight
-                        * requests correctly.
-                        */
-                       for (j = 0; j < i; j++)
-                               add_credits(ses->server, &credits[j], optype);
-                       return rc;
-               }
+       for (i = 0; i < num_rqst; i++) {
+               credits[i].value = 1;
+               credits[i].instance = instance;
        }
 
-setup_rqsts:
        /*
         * Make sure that we sign in the same order that we send on this socket
         * and avoid races inside tcp sendmsg code that could cause corruption
@@ -981,14 +1002,12 @@ setup_rqsts:
 
        /*
         * All the parts of the compound chain belong obtained credits from the
-        * same session (see the appropriate checks above). In the same time
-        * there might be reconnects after those checks but before we acquired
-        * the srv_mutex. We can not use credits obtained from the previous
+        * same session. We can not use credits obtained from the previous
         * session to send this request. Check if there were reconnects after
         * we obtained credits and return -EAGAIN in such cases to let callers
         * handle it.
         */
-       if (first_instance != ses->server->reconnect_instance) {
+       if (instance != ses->server->reconnect_instance) {
                mutex_unlock(&ses->server->srv_mutex);
                for (j = 0; j < num_rqst; j++)
                        add_credits(ses->server, &credits[j], optype);
@@ -1057,7 +1076,7 @@ setup_rqsts:
                smb311_update_preauth_hash(ses, rqst[0].rq_iov,
                                           rqst[0].rq_nvec);
 
-       if (timeout == CIFS_ASYNC_OP)
+       if ((flags & CIFS_TIMEOUT_MASK) == CIFS_ASYNC_OP)
                goto out;
 
        for (i = 0; i < num_rqst; i++) {
@@ -1194,7 +1213,7 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses,
 int
 SendReceive(const unsigned int xid, struct cifs_ses *ses,
            struct smb_hdr *in_buf, struct smb_hdr *out_buf,
-           int *pbytes_returned, const int timeout)
+           int *pbytes_returned, const int flags)
 {
        int rc = 0;
        struct mid_q_entry *midQ;
@@ -1225,7 +1244,7 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses,
                return -EIO;
        }
 
-       rc = wait_for_free_request(ses->server, timeout, 0, &credits.instance);
+       rc = wait_for_free_request(ses->server, flags, &credits.instance);
        if (rc)
                return rc;
 
@@ -1264,7 +1283,7 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses,
        if (rc < 0)
                goto out;
 
-       if (timeout == CIFS_ASYNC_OP)
+       if ((flags & CIFS_TIMEOUT_MASK) == CIFS_ASYNC_OP)
                goto out;
 
        rc = wait_for_response(ses->server, midQ);
@@ -1367,8 +1386,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon,
                return -EIO;
        }
 
-       rc = wait_for_free_request(ses->server, CIFS_BLOCKING_OP, 0,
-                                  &instance);
+       rc = wait_for_free_request(ses->server, CIFS_BLOCKING_OP, &instance);
        if (rc)
                return rc;
 
index f955cd3e067798142c3f0f812af8912f55e75707..a98e1b02279ea1cb85fde3744206e9228f9d3783 100644 (file)
@@ -306,8 +306,9 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
                goto skip_write;
 
        /* collect a number of dirty meta pages and write together */
-       if (wbc->for_kupdate ||
-               get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META))
+       if (wbc->sync_mode != WB_SYNC_ALL &&
+                       get_pages(sbi, F2FS_DIRTY_META) <
+                                       nr_pages_to_skip(sbi, META))
                goto skip_write;
 
        /* if locked failed, cp will flush dirty pages instead */
@@ -405,7 +406,7 @@ static int f2fs_set_meta_page_dirty(struct page *page)
        if (!PageDirty(page)) {
                __set_page_dirty_nobuffers(page);
                inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META);
-               SetPagePrivate(page);
+               f2fs_set_page_private(page, 0);
                f2fs_trace_pid(page);
                return 1;
        }
@@ -956,7 +957,7 @@ void f2fs_update_dirty_page(struct inode *inode, struct page *page)
        inode_inc_dirty_pages(inode);
        spin_unlock(&sbi->inode_lock[type]);
 
-       SetPagePrivate(page);
+       f2fs_set_page_private(page, 0);
        f2fs_trace_pid(page);
 }
 
@@ -1259,10 +1260,17 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
        else
                __clear_ckpt_flags(ckpt, CP_DISABLED_FLAG);
 
+       if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK))
+               __set_ckpt_flags(ckpt, CP_DISABLED_QUICK_FLAG);
+       else
+               __clear_ckpt_flags(ckpt, CP_DISABLED_QUICK_FLAG);
+
        if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH))
                __set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
-       else
-               __clear_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
+       /*
+        * TODO: we count on fsck.f2fs to clear this flag until we figure out
+        * missing cases which clear it incorrectly.
+        */
 
        if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR))
                __set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
index 568e1d09eb4864d42ed7ceedec55cb6465b229b2..9727944139f210b767b9ca280751f7acb3810dfd 100644 (file)
@@ -301,9 +301,10 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi,
                for (; start < F2FS_IO_SIZE(sbi); start++) {
                        struct page *page =
                                mempool_alloc(sbi->write_io_dummy,
-                                       GFP_NOIO | __GFP_ZERO | __GFP_NOFAIL);
+                                             GFP_NOIO | __GFP_NOFAIL);
                        f2fs_bug_on(sbi, !page);
 
+                       zero_user_segment(page, 0, PAGE_SIZE);
                        SetPagePrivate(page);
                        set_page_private(page, (unsigned long)DUMMY_WRITTEN_PAGE);
                        lock_page(page);
@@ -1553,6 +1554,9 @@ static int f2fs_mpage_readpages(struct address_space *mapping,
                if (last_block > last_block_in_file)
                        last_block = last_block_in_file;
 
+               /* just zeroing out page which is beyond EOF */
+               if (block_in_file >= last_block)
+                       goto zero_out;
                /*
                 * Map blocks using the previous result first.
                 */
@@ -1565,16 +1569,11 @@ static int f2fs_mpage_readpages(struct address_space *mapping,
                 * Then do more f2fs_map_blocks() calls until we are
                 * done with this page.
                 */
-               map.m_flags = 0;
-
-               if (block_in_file < last_block) {
-                       map.m_lblk = block_in_file;
-                       map.m_len = last_block - block_in_file;
+               map.m_lblk = block_in_file;
+               map.m_len = last_block - block_in_file;
 
-                       if (f2fs_map_blocks(inode, &map, 0,
-                                               F2FS_GET_BLOCK_DEFAULT))
-                               goto set_error_page;
-               }
+               if (f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT))
+                       goto set_error_page;
 got_it:
                if ((map.m_flags & F2FS_MAP_MAPPED)) {
                        block_nr = map.m_pblk + block_in_file - map.m_lblk;
@@ -1589,6 +1588,7 @@ got_it:
                                                                DATA_GENERIC))
                                goto set_error_page;
                } else {
+zero_out:
                        zero_user_segment(page, 0, PAGE_SIZE);
                        if (!PageUptodate(page))
                                SetPageUptodate(page);
@@ -1863,8 +1863,13 @@ got_it:
                if (fio->need_lock == LOCK_REQ)
                        f2fs_unlock_op(fio->sbi);
                err = f2fs_inplace_write_data(fio);
-               if (err && PageWriteback(page))
-                       end_page_writeback(page);
+               if (err) {
+                       if (f2fs_encrypted_file(inode))
+                               fscrypt_pullback_bio_page(&fio->encrypted_page,
+                                                                       true);
+                       if (PageWriteback(page))
+                               end_page_writeback(page);
+               }
                trace_f2fs_do_write_data_page(fio->page, IPU);
                set_inode_flag(inode, FI_UPDATE_WRITE);
                return err;
@@ -2315,7 +2320,8 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to)
                down_write(&F2FS_I(inode)->i_mmap_sem);
 
                truncate_pagecache(inode, i_size);
-               f2fs_truncate_blocks(inode, i_size, true, true);
+               if (!IS_NOQUOTA(inode))
+                       f2fs_truncate_blocks(inode, i_size, true);
 
                up_write(&F2FS_I(inode)->i_mmap_sem);
                up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
@@ -2585,14 +2591,11 @@ static void f2fs_dio_submit_bio(struct bio *bio, struct inode *inode,
 {
        struct f2fs_private_dio *dio;
        bool write = (bio_op(bio) == REQ_OP_WRITE);
-       int err;
 
        dio = f2fs_kzalloc(F2FS_I_SB(inode),
                        sizeof(struct f2fs_private_dio), GFP_NOFS);
-       if (!dio) {
-               err = -ENOMEM;
+       if (!dio)
                goto out;
-       }
 
        dio->inode = inode;
        dio->orig_end_io = bio->bi_end_io;
@@ -2710,12 +2713,10 @@ void f2fs_invalidate_page(struct page *page, unsigned int offset,
 
        clear_cold_data(page);
 
-       /* This is atomic written page, keep Private */
        if (IS_ATOMIC_WRITTEN_PAGE(page))
                return f2fs_drop_inmem_page(inode, page);
 
-       set_page_private(page, 0);
-       ClearPagePrivate(page);
+       f2fs_clear_page_private(page);
 }
 
 int f2fs_release_page(struct page *page, gfp_t wait)
@@ -2729,8 +2730,7 @@ int f2fs_release_page(struct page *page, gfp_t wait)
                return 0;
 
        clear_cold_data(page);
-       set_page_private(page, 0);
-       ClearPagePrivate(page);
+       f2fs_clear_page_private(page);
        return 1;
 }
 
@@ -2798,12 +2798,8 @@ int f2fs_migrate_page(struct address_space *mapping,
                        return -EAGAIN;
        }
 
-       /*
-        * A reference is expected if PagePrivate set when move mapping,
-        * however F2FS breaks this for maintaining dirty page counts when
-        * truncating pages. So here adjusting the 'extra_count' make it work.
-        */
-       extra_count = (atomic_written ? 1 : 0) - page_has_private(page);
+       /* one extra reference was held for atomic_write page */
+       extra_count = atomic_written ? 1 : 0;
        rc = migrate_page_move_mapping(mapping, newpage,
                                page, mode, extra_count);
        if (rc != MIGRATEPAGE_SUCCESS) {
@@ -2824,9 +2820,10 @@ int f2fs_migrate_page(struct address_space *mapping,
                get_page(newpage);
        }
 
-       if (PagePrivate(page))
-               SetPagePrivate(newpage);
-       set_page_private(newpage, page_private(page));
+       if (PagePrivate(page)) {
+               f2fs_set_page_private(newpage, page_private(page));
+               f2fs_clear_page_private(page);
+       }
 
        if (mode != MIGRATE_SYNC_NO_COPY)
                migrate_page_copy(newpage, page);
index fd7f170e2f2d6ac93b20b80b9c8e5a898b674dcf..99e9a5c37b7110eb2d20484c87ad78c0eb85f225 100644 (file)
@@ -96,8 +96,10 @@ static void update_general_status(struct f2fs_sb_info *sbi)
        si->free_secs = free_sections(sbi);
        si->prefree_count = prefree_segments(sbi);
        si->dirty_count = dirty_segments(sbi);
-       si->node_pages = NODE_MAPPING(sbi)->nrpages;
-       si->meta_pages = META_MAPPING(sbi)->nrpages;
+       if (sbi->node_inode)
+               si->node_pages = NODE_MAPPING(sbi)->nrpages;
+       if (sbi->meta_inode)
+               si->meta_pages = META_MAPPING(sbi)->nrpages;
        si->nats = NM_I(sbi)->nat_cnt;
        si->dirty_nats = NM_I(sbi)->dirty_nat_cnt;
        si->sits = MAIN_SEGS(sbi);
@@ -175,7 +177,6 @@ static void update_sit_info(struct f2fs_sb_info *sbi)
 static void update_mem_info(struct f2fs_sb_info *sbi)
 {
        struct f2fs_stat_info *si = F2FS_STAT(sbi);
-       unsigned npages;
        int i;
 
        if (si->base_mem)
@@ -258,10 +259,14 @@ get_cache:
                                                sizeof(struct extent_node);
 
        si->page_mem = 0;
-       npages = NODE_MAPPING(sbi)->nrpages;
-       si->page_mem += (unsigned long long)npages << PAGE_SHIFT;
-       npages = META_MAPPING(sbi)->nrpages;
-       si->page_mem += (unsigned long long)npages << PAGE_SHIFT;
+       if (sbi->node_inode) {
+               unsigned npages = NODE_MAPPING(sbi)->nrpages;
+               si->page_mem += (unsigned long long)npages << PAGE_SHIFT;
+       }
+       if (sbi->meta_inode) {
+               unsigned npages = META_MAPPING(sbi)->nrpages;
+               si->page_mem += (unsigned long long)npages << PAGE_SHIFT;
+       }
 }
 
 static int stat_show(struct seq_file *s, void *v)
index 713b36a10a7900a99bf3ddf24b138e2714a9cdab..59bc460178554af32108d8409cc637c424ed223c 100644 (file)
@@ -728,7 +728,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
                !f2fs_truncate_hole(dir, page->index, page->index + 1)) {
                f2fs_clear_page_cache_dirty_tag(page);
                clear_page_dirty_for_io(page);
-               ClearPagePrivate(page);
+               f2fs_clear_page_private(page);
                ClearPageUptodate(page);
                clear_cold_data(page);
                inode_dec_dirty_pages(dir);
@@ -800,6 +800,10 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
                if (de->name_len == 0) {
                        bit_pos++;
                        ctx->pos = start_pos + bit_pos;
+                       printk_ratelimited(
+                               "%s, invalid namelen(0), ino:%u, run fsck to fix.",
+                               KERN_WARNING, le32_to_cpu(de->ino));
+                       set_sbi_flag(sbi, SBI_NEED_FSCK);
                        continue;
                }
 
@@ -810,7 +814,8 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
 
                /* check memory boundary before moving forward */
                bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
-               if (unlikely(bit_pos > d->max)) {
+               if (unlikely(bit_pos > d->max ||
+                               le16_to_cpu(de->name_len) > F2FS_NAME_LEN)) {
                        f2fs_msg(sbi->sb, KERN_WARNING,
                                "%s: corrupted namelen=%d, run fsck to fix.",
                                __func__, le16_to_cpu(de->name_len));
@@ -891,7 +896,7 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
                        page_cache_sync_readahead(inode->i_mapping, ra, file, n,
                                min(npages - n, (pgoff_t)MAX_DIR_RA_PAGES));
 
-               dentry_page = f2fs_get_lock_data_page(inode, n, false);
+               dentry_page = f2fs_find_data_page(inode, n);
                if (IS_ERR(dentry_page)) {
                        err = PTR_ERR(dentry_page);
                        if (err == -ENOENT) {
@@ -909,11 +914,11 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
                err = f2fs_fill_dentries(ctx, &d,
                                n * NR_DENTRY_IN_BLOCK, &fstr);
                if (err) {
-                       f2fs_put_page(dentry_page, 1);
+                       f2fs_put_page(dentry_page, 0);
                        break;
                }
 
-               f2fs_put_page(dentry_page, 1);
+               f2fs_put_page(dentry_page, 0);
        }
 out_free:
        fscrypt_fname_free_buffer(&fstr);
index 1cb0fcc67d2df66d891c0d33f66d1c49cb16a3ca..caf77fe8ac073c56a4bbc7f0312e4cdf5fca162c 100644 (file)
@@ -506,7 +506,7 @@ static void f2fs_update_extent_tree_range(struct inode *inode,
        unsigned int end = fofs + len;
        unsigned int pos = (unsigned int)fofs;
        bool updated = false;
-       bool leftmost;
+       bool leftmost = false;
 
        if (!et)
                return;
index 7ea5c9cede3712418268758bdb4e028c97868744..87f75ebd2fd609f7ab450ab5f528e9df59e6c062 100644 (file)
@@ -190,6 +190,8 @@ enum {
 #define DEF_CP_INTERVAL                        60      /* 60 secs */
 #define DEF_IDLE_INTERVAL              5       /* 5 secs */
 #define DEF_DISABLE_INTERVAL           5       /* 5 secs */
+#define DEF_DISABLE_QUICK_INTERVAL     1       /* 1 secs */
+#define DEF_UMOUNT_DISCARD_TIMEOUT     5       /* 5 secs */
 
 struct cp_control {
        int reason;
@@ -253,7 +255,7 @@ struct discard_entry {
 /* max discard pend list number */
 #define MAX_PLIST_NUM          512
 #define plist_idx(blk_num)     ((blk_num) >= MAX_PLIST_NUM ?           \
-                                       (MAX_PLIST_NUM - 1) : (blk_num - 1))
+                                       (MAX_PLIST_NUM - 1) : ((blk_num) - 1))
 
 enum {
        D_PREP,                 /* initial */
@@ -309,6 +311,7 @@ struct discard_policy {
        bool sync;                      /* submit discard with REQ_SYNC flag */
        bool ordered;                   /* issue discard by lba order */
        unsigned int granularity;       /* discard granularity */
+       int timeout;                    /* discard timeout for put_super */
 };
 
 struct discard_cmd_control {
@@ -455,7 +458,6 @@ struct f2fs_flush_device {
 
 /* for inline stuff */
 #define DEF_INLINE_RESERVED_SIZE       1
-#define DEF_MIN_INLINE_SIZE            1
 static inline int get_extra_isize(struct inode *inode);
 static inline int get_inline_xattr_addrs(struct inode *inode);
 #define MAX_INLINE_DATA(inode) (sizeof(__le32) *                       \
@@ -1098,6 +1100,7 @@ enum {
        SBI_IS_SHUTDOWN,                        /* shutdown by ioctl */
        SBI_IS_RECOVERED,                       /* recovered orphan/data */
        SBI_CP_DISABLED,                        /* CP was disabled last mount */
+       SBI_CP_DISABLED_QUICK,                  /* CP was disabled quickly */
        SBI_QUOTA_NEED_FLUSH,                   /* need to flush quota info in CP */
        SBI_QUOTA_SKIP_FLUSH,                   /* skip flushing quota in current CP */
        SBI_QUOTA_NEED_REPAIR,                  /* quota file may be corrupted */
@@ -1109,6 +1112,7 @@ enum {
        DISCARD_TIME,
        GC_TIME,
        DISABLE_TIME,
+       UMOUNT_DISCARD_TIMEOUT,
        MAX_TIME,
 };
 
@@ -1237,8 +1241,6 @@ struct f2fs_sb_info {
 
        unsigned int nquota_files;              /* # of quota sysfile */
 
-       u32 s_next_generation;                  /* for NFS support */
-
        /* # of pages, see count_type */
        atomic_t nr_pages[NR_COUNT_TYPE];
        /* # of allocated blocks */
@@ -1798,13 +1800,12 @@ static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type)
 {
        atomic_inc(&sbi->nr_pages[count_type]);
 
-       if (count_type == F2FS_DIRTY_DATA || count_type == F2FS_INMEM_PAGES ||
-               count_type == F2FS_WB_CP_DATA || count_type == F2FS_WB_DATA ||
-               count_type == F2FS_RD_DATA || count_type == F2FS_RD_NODE ||
-               count_type == F2FS_RD_META)
-               return;
-
-       set_sbi_flag(sbi, SBI_IS_DIRTY);
+       if (count_type == F2FS_DIRTY_DENTS ||
+                       count_type == F2FS_DIRTY_NODES ||
+                       count_type == F2FS_DIRTY_META ||
+                       count_type == F2FS_DIRTY_QDATA ||
+                       count_type == F2FS_DIRTY_IMETA)
+               set_sbi_flag(sbi, SBI_IS_DIRTY);
 }
 
 static inline void inode_inc_dirty_pages(struct inode *inode)
@@ -2156,10 +2157,17 @@ static inline bool is_idle(struct f2fs_sb_info *sbi, int type)
                get_pages(sbi, F2FS_RD_META) || get_pages(sbi, F2FS_WB_DATA) ||
                get_pages(sbi, F2FS_WB_CP_DATA) ||
                get_pages(sbi, F2FS_DIO_READ) ||
-               get_pages(sbi, F2FS_DIO_WRITE) ||
-               atomic_read(&SM_I(sbi)->dcc_info->queued_discard) ||
-               atomic_read(&SM_I(sbi)->fcc_info->queued_flush))
+               get_pages(sbi, F2FS_DIO_WRITE))
                return false;
+
+       if (SM_I(sbi) && SM_I(sbi)->dcc_info &&
+                       atomic_read(&SM_I(sbi)->dcc_info->queued_discard))
+               return false;
+
+       if (SM_I(sbi) && SM_I(sbi)->fcc_info &&
+                       atomic_read(&SM_I(sbi)->fcc_info->queued_flush))
+               return false;
+
        return f2fs_time_over(sbi, type);
 }
 
@@ -2300,11 +2308,12 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr)
 #define F2FS_EXTENTS_FL                        0x00080000 /* Inode uses extents */
 #define F2FS_EA_INODE_FL               0x00200000 /* Inode used for large EA */
 #define F2FS_EOFBLOCKS_FL              0x00400000 /* Blocks allocated beyond EOF */
+#define F2FS_NOCOW_FL                  0x00800000 /* Do not cow file */
 #define F2FS_INLINE_DATA_FL            0x10000000 /* Inode has inline data. */
 #define F2FS_PROJINHERIT_FL            0x20000000 /* Create with parents projid */
 #define F2FS_RESERVED_FL               0x80000000 /* reserved for ext4 lib */
 
-#define F2FS_FL_USER_VISIBLE           0x304BDFFF /* User visible flags */
+#define F2FS_FL_USER_VISIBLE           0x30CBDFFF /* User visible flags */
 #define F2FS_FL_USER_MODIFIABLE                0x204BC0FF /* User modifiable flags */
 
 /* Flags we can manipulate with through F2FS_IOC_FSSETXATTR */
@@ -2761,9 +2770,9 @@ static inline int get_inline_xattr_addrs(struct inode *inode)
 
 #define F2FS_OLD_ATTRIBUTE_SIZE        (offsetof(struct f2fs_inode, i_addr))
 #define F2FS_FITS_IN_INODE(f2fs_inode, extra_isize, field)             \
-               ((offsetof(typeof(*f2fs_inode), field) +        \
+               ((offsetof(typeof(*(f2fs_inode)), field) +      \
                sizeof((f2fs_inode)->field))                    \
-               <= (F2FS_OLD_ATTRIBUTE_SIZE + extra_isize))     \
+               <= (F2FS_OLD_ATTRIBUTE_SIZE + (extra_isize)))   \
 
 static inline void f2fs_reset_iostat(struct f2fs_sb_info *sbi)
 {
@@ -2792,8 +2801,8 @@ static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi,
 
 #define __is_large_section(sbi)                ((sbi)->segs_per_sec > 1)
 
-#define __is_meta_io(fio) (PAGE_TYPE_OF_BIO(fio->type) == META &&      \
-                               (!is_read_io(fio->op) || fio->is_meta))
+#define __is_meta_io(fio) (PAGE_TYPE_OF_BIO((fio)->type) == META &&    \
+                               (!is_read_io((fio)->op) || (fio)->is_meta))
 
 bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
                                        block_t blkaddr, int type);
@@ -2825,13 +2834,33 @@ static inline bool is_valid_data_blkaddr(struct f2fs_sb_info *sbi,
        return true;
 }
 
+static inline void f2fs_set_page_private(struct page *page,
+                                               unsigned long data)
+{
+       if (PagePrivate(page))
+               return;
+
+       get_page(page);
+       SetPagePrivate(page);
+       set_page_private(page, data);
+}
+
+static inline void f2fs_clear_page_private(struct page *page)
+{
+       if (!PagePrivate(page))
+               return;
+
+       set_page_private(page, 0);
+       ClearPagePrivate(page);
+       f2fs_put_page(page, 0);
+}
+
 /*
  * file.c
  */
 int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
 void f2fs_truncate_data_blocks(struct dnode_of_data *dn);
-int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock,
-                                                       bool buf_write);
+int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock);
 int f2fs_truncate(struct inode *inode);
 int f2fs_getattr(const struct path *path, struct kstat *stat,
                        u32 request_mask, unsigned int flags);
@@ -3005,7 +3034,7 @@ void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr);
 bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr);
 void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi);
 void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi);
-bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi);
+bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi);
 void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
                                        struct cp_control *cpc);
 void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi);
@@ -3610,8 +3639,6 @@ extern void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
 #define f2fs_build_fault_attr(sbi, rate, type)         do { } while (0)
 #endif
 
-#endif
-
 static inline bool is_journalled_quota(struct f2fs_sb_info *sbi)
 {
 #ifdef CONFIG_QUOTA
@@ -3624,3 +3651,5 @@ static inline bool is_journalled_quota(struct f2fs_sb_info *sbi)
 #endif
        return false;
 }
+
+#endif
index ba5954f41e1406caa3194ec09a1a6cc133a001f2..5742ab8b57dc6798cdf11af401ab11092f399705 100644 (file)
@@ -589,8 +589,7 @@ truncate_out:
        return 0;
 }
 
-int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock,
-                                                       bool buf_write)
+int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
 {
        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
        struct dnode_of_data dn;
@@ -598,7 +597,6 @@ int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock,
        int count = 0, err = 0;
        struct page *ipage;
        bool truncate_page = false;
-       int flag = buf_write ? F2FS_GET_BLOCK_PRE_AIO : F2FS_GET_BLOCK_PRE_DIO;
 
        trace_f2fs_truncate_blocks_enter(inode, from);
 
@@ -608,7 +606,7 @@ int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock,
                goto free_partial;
 
        if (lock)
-               __do_map_lock(sbi, flag, true);
+               f2fs_lock_op(sbi);
 
        ipage = f2fs_get_node_page(sbi, inode->i_ino);
        if (IS_ERR(ipage)) {
@@ -646,7 +644,7 @@ free_next:
        err = f2fs_truncate_inode_blocks(inode, free_from);
 out:
        if (lock)
-               __do_map_lock(sbi, flag, false);
+               f2fs_unlock_op(sbi);
 free_partial:
        /* lastly zero out the first data page */
        if (!err)
@@ -681,7 +679,7 @@ int f2fs_truncate(struct inode *inode)
                        return err;
        }
 
-       err = f2fs_truncate_blocks(inode, i_size_read(inode), true, false);
+       err = f2fs_truncate_blocks(inode, i_size_read(inode), true);
        if (err)
                return err;
 
@@ -768,7 +766,6 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
 {
        struct inode *inode = d_inode(dentry);
        int err;
-       bool size_changed = false;
 
        if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
                return -EIO;
@@ -843,8 +840,6 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
                down_write(&F2FS_I(inode)->i_sem);
                F2FS_I(inode)->last_disk_size = i_size_read(inode);
                up_write(&F2FS_I(inode)->i_sem);
-
-               size_changed = true;
        }
 
        __setattr_copy(inode, attr);
@@ -858,7 +853,7 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
        }
 
        /* file size may changed here */
-       f2fs_mark_inode_dirty_sync(inode, size_changed);
+       f2fs_mark_inode_dirty_sync(inode, true);
 
        /* inode change will produce dirty node pages flushed by checkpoint */
        f2fs_balance_fs(F2FS_I_SB(inode), true);
@@ -1262,7 +1257,7 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
        new_size = i_size_read(inode) - len;
        truncate_pagecache(inode, new_size);
 
-       ret = f2fs_truncate_blocks(inode, new_size, true, false);
+       ret = f2fs_truncate_blocks(inode, new_size, true);
        up_write(&F2FS_I(inode)->i_mmap_sem);
        if (!ret)
                f2fs_i_size_write(inode, new_size);
@@ -1447,7 +1442,7 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
        f2fs_balance_fs(sbi, true);
 
        down_write(&F2FS_I(inode)->i_mmap_sem);
-       ret = f2fs_truncate_blocks(inode, i_size_read(inode), true, false);
+       ret = f2fs_truncate_blocks(inode, i_size_read(inode), true);
        up_write(&F2FS_I(inode)->i_mmap_sem);
        if (ret)
                return ret;
@@ -1651,6 +1646,8 @@ static int f2fs_ioc_getflags(struct file *filp, unsigned long arg)
                flags |= F2FS_ENCRYPT_FL;
        if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode))
                flags |= F2FS_INLINE_DATA_FL;
+       if (is_inode_flag_set(inode, FI_PIN_FILE))
+               flags |= F2FS_NOCOW_FL;
 
        flags &= F2FS_FL_USER_VISIBLE;
 
@@ -1750,10 +1747,12 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
 
        down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
 
-       if (!get_dirty_pages(inode))
-               goto skip_flush;
-
-       f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING,
+       /*
+        * Should wait end_io to count F2FS_WB_CP_DATA correctly by
+        * f2fs_is_atomic_file.
+        */
+       if (get_dirty_pages(inode))
+               f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING,
                "Unexpected flush for atomic writes: ino=%lu, npages=%u",
                                        inode->i_ino, get_dirty_pages(inode));
        ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
@@ -1761,7 +1760,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
                up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
                goto out;
        }
-skip_flush:
+
        set_inode_flag(inode, FI_ATOMIC_FILE);
        clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
        up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
@@ -1968,11 +1967,11 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
                break;
        case F2FS_GOING_DOWN_NEED_FSCK:
                set_sbi_flag(sbi, SBI_NEED_FSCK);
+               set_sbi_flag(sbi, SBI_CP_DISABLED_QUICK);
+               set_sbi_flag(sbi, SBI_IS_DIRTY);
                /* do checkpoint only */
                ret = f2fs_sync_fs(sb, 1);
-               if (ret)
-                       goto out;
-               break;
+               goto out;
        default:
                ret = -EINVAL;
                goto out;
@@ -1988,6 +1987,9 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
 out:
        if (in != F2FS_GOING_DOWN_FULLSYNC)
                mnt_drop_write_file(filp);
+
+       trace_f2fs_shutdown(sbi, in, ret);
+
        return ret;
 }
 
@@ -2871,8 +2873,8 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
        __u32 pin;
        int ret = 0;
 
-       if (!inode_owner_or_capable(inode))
-               return -EACCES;
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
 
        if (get_user(pin, (__u32 __user *)arg))
                return -EFAULT;
index d636cbcf68f2ef331cc83b40e31206a3ae6d3df7..bb6a152310ef437109a3a2b03a6c5413411e4308 100644 (file)
@@ -298,7 +298,7 @@ process_inline:
                clear_inode_flag(inode, FI_INLINE_DATA);
                f2fs_put_page(ipage, 1);
        } else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) {
-               if (f2fs_truncate_blocks(inode, 0, false, false))
+               if (f2fs_truncate_blocks(inode, 0, false))
                        return false;
                goto process_inline;
        }
@@ -470,7 +470,7 @@ static int f2fs_add_inline_entries(struct inode *dir, void *inline_dentry)
        return 0;
 punch_dentry_pages:
        truncate_inode_pages(&dir->i_data, 0);
-       f2fs_truncate_blocks(dir, 0, false, false);
+       f2fs_truncate_blocks(dir, 0, false);
        f2fs_remove_dirty_inode(dir);
        return err;
 }
@@ -659,6 +659,12 @@ int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx,
        if (IS_ERR(ipage))
                return PTR_ERR(ipage);
 
+       /*
+        * f2fs_readdir was protected by inode.i_rwsem, it is safe to access
+        * ipage without page's lock held.
+        */
+       unlock_page(ipage);
+
        inline_dentry = inline_data_addr(inode, ipage);
 
        make_dentry_ptr_inline(inode, &d, inline_dentry);
@@ -667,7 +673,7 @@ int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx,
        if (!err)
                ctx->pos = d.max;
 
-       f2fs_put_page(ipage, 1);
+       f2fs_put_page(ipage, 0);
        return err < 0 ? err : 0;
 }
 
index d910a820ae675cd19de332df5d296225eb9aa86a..e7f2e8759315636431ae7795be7cbe002b155fc4 100644 (file)
@@ -14,6 +14,7 @@
 #include "f2fs.h"
 #include "node.h"
 #include "segment.h"
+#include "xattr.h"
 
 #include <trace/events/f2fs.h>
 
@@ -248,6 +249,20 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page)
                return false;
        }
 
+       if (f2fs_has_extra_attr(inode) &&
+               f2fs_sb_has_flexible_inline_xattr(sbi) &&
+               f2fs_has_inline_xattr(inode) &&
+               (!fi->i_inline_xattr_size ||
+               fi->i_inline_xattr_size > MAX_INLINE_XATTR_SIZE)) {
+               set_sbi_flag(sbi, SBI_NEED_FSCK);
+               f2fs_msg(sbi->sb, KERN_WARNING,
+                       "%s: inode (ino=%lx) has corrupted "
+                       "i_inline_xattr_size: %d, max: %zu",
+                       __func__, inode->i_ino, fi->i_inline_xattr_size,
+                       MAX_INLINE_XATTR_SIZE);
+               return false;
+       }
+
        if (F2FS_I(inode)->extent_tree) {
                struct extent_info *ei = &F2FS_I(inode)->extent_tree->largest;
 
index e967d27c1a89012a4236b750ca8d7cc923b28907..f5e34e4670031e86c031f9ffd029333802bd5245 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/pagemap.h>
 #include <linux/sched.h>
 #include <linux/ctype.h>
+#include <linux/random.h>
 #include <linux/dcache.h>
 #include <linux/namei.h>
 #include <linux/quotaops.h>
@@ -50,7 +51,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
        inode->i_blocks = 0;
        inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
        F2FS_I(inode)->i_crtime = inode->i_mtime;
-       inode->i_generation = sbi->s_next_generation++;
+       inode->i_generation = prandom_u32();
 
        if (S_ISDIR(inode->i_mode))
                F2FS_I(inode)->i_current_depth = 1;
index 4f450e5733124655761f3f61c408cb7583da06e7..3f99ab2886955dd74467ea3d1ac00daeba046f5e 100644 (file)
@@ -1920,7 +1920,9 @@ static int f2fs_write_node_pages(struct address_space *mapping,
        f2fs_balance_fs_bg(sbi);
 
        /* collect a number of dirty node pages and write together */
-       if (get_pages(sbi, F2FS_DIRTY_NODES) < nr_pages_to_skip(sbi, NODE))
+       if (wbc->sync_mode != WB_SYNC_ALL &&
+                       get_pages(sbi, F2FS_DIRTY_NODES) <
+                                       nr_pages_to_skip(sbi, NODE))
                goto skip_write;
 
        if (wbc->sync_mode == WB_SYNC_ALL)
@@ -1959,7 +1961,7 @@ static int f2fs_set_node_page_dirty(struct page *page)
        if (!PageDirty(page)) {
                __set_page_dirty_nobuffers(page);
                inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES);
-               SetPagePrivate(page);
+               f2fs_set_page_private(page, 0);
                f2fs_trace_pid(page);
                return 1;
        }
index 9b79056d705d2b2f031eb716de2f030fe77e3a57..aa7fe79b62b2e0c386e4a44a947db076b78cf2f0 100644 (file)
@@ -191,8 +191,7 @@ void f2fs_register_inmem_page(struct inode *inode, struct page *page)
 
        f2fs_trace_pid(page);
 
-       set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE);
-       SetPagePrivate(page);
+       f2fs_set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE);
 
        new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);
 
@@ -215,7 +214,8 @@ void f2fs_register_inmem_page(struct inode *inode, struct page *page)
 }
 
 static int __revoke_inmem_pages(struct inode *inode,
-                               struct list_head *head, bool drop, bool recover)
+                               struct list_head *head, bool drop, bool recover,
+                               bool trylock)
 {
        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
        struct inmem_pages *cur, *tmp;
@@ -227,7 +227,16 @@ static int __revoke_inmem_pages(struct inode *inode,
                if (drop)
                        trace_f2fs_commit_inmem_page(page, INMEM_DROP);
 
-               lock_page(page);
+               if (trylock) {
+                       /*
+                        * to avoid deadlock in between page lock and
+                        * inmem_lock.
+                        */
+                       if (!trylock_page(page))
+                               continue;
+               } else {
+                       lock_page(page);
+               }
 
                f2fs_wait_on_page_writeback(page, DATA, true, true);
 
@@ -270,8 +279,7 @@ next:
                        ClearPageUptodate(page);
                        clear_cold_data(page);
                }
-               set_page_private(page, 0);
-               ClearPagePrivate(page);
+               f2fs_clear_page_private(page);
                f2fs_put_page(page, 1);
 
                list_del(&cur->list);
@@ -318,13 +326,19 @@ void f2fs_drop_inmem_pages(struct inode *inode)
        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
        struct f2fs_inode_info *fi = F2FS_I(inode);
 
-       mutex_lock(&fi->inmem_lock);
-       __revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
-       spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
-       if (!list_empty(&fi->inmem_ilist))
-               list_del_init(&fi->inmem_ilist);
-       spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
-       mutex_unlock(&fi->inmem_lock);
+       while (!list_empty(&fi->inmem_pages)) {
+               mutex_lock(&fi->inmem_lock);
+               __revoke_inmem_pages(inode, &fi->inmem_pages,
+                                               true, false, true);
+
+               if (list_empty(&fi->inmem_pages)) {
+                       spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
+                       if (!list_empty(&fi->inmem_ilist))
+                               list_del_init(&fi->inmem_ilist);
+                       spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
+               }
+               mutex_unlock(&fi->inmem_lock);
+       }
 
        clear_inode_flag(inode, FI_ATOMIC_FILE);
        fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0;
@@ -354,8 +368,7 @@ void f2fs_drop_inmem_page(struct inode *inode, struct page *page)
        kmem_cache_free(inmem_entry_slab, cur);
 
        ClearPageUptodate(page);
-       set_page_private(page, 0);
-       ClearPagePrivate(page);
+       f2fs_clear_page_private(page);
        f2fs_put_page(page, 0);
 
        trace_f2fs_commit_inmem_page(page, INMEM_INVALIDATE);
@@ -429,12 +442,15 @@ retry:
                 * recovery or rewrite & commit last transaction. For other
                 * error number, revoking was done by filesystem itself.
                 */
-               err = __revoke_inmem_pages(inode, &revoke_list, false, true);
+               err = __revoke_inmem_pages(inode, &revoke_list,
+                                               false, true, false);
 
                /* drop all uncommitted pages */
-               __revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
+               __revoke_inmem_pages(inode, &fi->inmem_pages,
+                                               true, false, false);
        } else {
-               __revoke_inmem_pages(inode, &revoke_list, false, false);
+               __revoke_inmem_pages(inode, &revoke_list,
+                                               false, false, false);
        }
 
        return err;
@@ -542,9 +558,13 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
 static int __submit_flush_wait(struct f2fs_sb_info *sbi,
                                struct block_device *bdev)
 {
-       struct bio *bio = f2fs_bio_alloc(sbi, 0, true);
+       struct bio *bio;
        int ret;
 
+       bio = f2fs_bio_alloc(sbi, 0, false);
+       if (!bio)
+               return -ENOMEM;
+
        bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH;
        bio_set_dev(bio, bdev);
        ret = submit_bio_wait(bio);
@@ -868,6 +888,9 @@ int f2fs_disable_cp_again(struct f2fs_sb_info *sbi)
 
        if (holes[DATA] > ovp || holes[NODE] > ovp)
                return -EAGAIN;
+       if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK) &&
+               dirty_segments(sbi) > overprovision_segments(sbi))
+               return -EAGAIN;
        return 0;
 }
 
@@ -1037,6 +1060,7 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi,
 
        dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
        dpolicy->io_aware_gran = MAX_PLIST_NUM;
+       dpolicy->timeout = 0;
 
        if (discard_type == DPOLICY_BG) {
                dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
@@ -1059,6 +1083,8 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi,
        } else if (discard_type == DPOLICY_UMOUNT) {
                dpolicy->max_requests = UINT_MAX;
                dpolicy->io_aware = false;
+               /* we need to issue all to keep CP_TRIMMED_FLAG */
+               dpolicy->granularity = 1;
        }
 }
 
@@ -1424,7 +1450,14 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
        int i, issued = 0;
        bool io_interrupted = false;
 
+       if (dpolicy->timeout != 0)
+               f2fs_update_time(sbi, dpolicy->timeout);
+
        for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
+               if (dpolicy->timeout != 0 &&
+                               f2fs_time_over(sbi, dpolicy->timeout))
+                       break;
+
                if (i + 1 < dpolicy->granularity)
                        break;
 
@@ -1611,7 +1644,7 @@ void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi)
 }
 
 /* This comes from f2fs_put_super */
-bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi)
+bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi)
 {
        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
        struct discard_policy dpolicy;
@@ -1619,6 +1652,7 @@ bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi)
 
        __init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT,
                                        dcc->discard_granularity);
+       dpolicy.timeout = UMOUNT_DISCARD_TIMEOUT;
        __issue_discard_cmd(sbi, &dpolicy);
        dropped = __drop_discard_cmd(sbi);
 
@@ -3164,10 +3198,10 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio)
        stat_inc_inplace_blocks(fio->sbi);
 
        err = f2fs_submit_page_bio(fio);
-       if (!err)
+       if (!err) {
                update_device_state(fio);
-
-       f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
+               f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
+       }
 
        return err;
 }
index a77f76f528b61597c3ebce03c18d05ff1f25ef74..5c7ed0442d6e25dd08caa73efb4a506ecfd2d894 100644 (file)
@@ -865,7 +865,7 @@ static inline void wake_up_discard_thread(struct f2fs_sb_info *sbi, bool force)
                }
        }
        mutex_unlock(&dcc->cmd_lock);
-       if (!wakeup)
+       if (!wakeup || !is_idle(sbi, DISCARD_TIME))
                return;
 wake_up:
        dcc->discard_wake = 1;
index d1ccc52afc939b0514f2796ec545a138a2abd068..f2aaa2cc6b3e01e88a7aeea026cfaa97f73ae1de 100644 (file)
@@ -269,7 +269,7 @@ static int f2fs_set_qf_name(struct super_block *sb, int qtype,
        if (!qname) {
                f2fs_msg(sb, KERN_ERR,
                        "Not enough memory for storing quotafile name");
-               return -EINVAL;
+               return -ENOMEM;
        }
        if (F2FS_OPTION(sbi).s_qf_names[qtype]) {
                if (strcmp(F2FS_OPTION(sbi).s_qf_names[qtype], qname) == 0)
@@ -586,7 +586,7 @@ static int parse_options(struct super_block *sb, char *options)
                case Opt_io_size_bits:
                        if (args->from && match_int(args, &arg))
                                return -EINVAL;
-                       if (arg > __ilog2_u32(BIO_MAX_PAGES)) {
+                       if (arg <= 0 || arg > __ilog2_u32(BIO_MAX_PAGES)) {
                                f2fs_msg(sb, KERN_WARNING,
                                        "Not support %d, larger than %d",
                                        1 << arg, BIO_MAX_PAGES);
@@ -821,6 +821,8 @@ static int parse_options(struct super_block *sb, char *options)
        }
 
        if (test_opt(sbi, INLINE_XATTR_SIZE)) {
+               int min_size, max_size;
+
                if (!f2fs_sb_has_extra_attr(sbi) ||
                        !f2fs_sb_has_flexible_inline_xattr(sbi)) {
                        f2fs_msg(sb, KERN_ERR,
@@ -834,14 +836,15 @@ static int parse_options(struct super_block *sb, char *options)
                                        "set with inline_xattr option");
                        return -EINVAL;
                }
-               if (!F2FS_OPTION(sbi).inline_xattr_size ||
-                       F2FS_OPTION(sbi).inline_xattr_size >=
-                                       DEF_ADDRS_PER_INODE -
-                                       F2FS_TOTAL_EXTRA_ATTR_SIZE -
-                                       DEF_INLINE_RESERVED_SIZE -
-                                       DEF_MIN_INLINE_SIZE) {
+
+               min_size = sizeof(struct f2fs_xattr_header) / sizeof(__le32);
+               max_size = MAX_INLINE_XATTR_SIZE;
+
+               if (F2FS_OPTION(sbi).inline_xattr_size < min_size ||
+                               F2FS_OPTION(sbi).inline_xattr_size > max_size) {
                        f2fs_msg(sb, KERN_ERR,
-                                       "inline xattr size is out of range");
+                               "inline xattr size is out of range: %d ~ %d",
+                               min_size, max_size);
                        return -EINVAL;
                }
        }
@@ -915,6 +918,10 @@ static int f2fs_drop_inode(struct inode *inode)
                        sb_start_intwrite(inode->i_sb);
                        f2fs_i_size_write(inode, 0);
 
+                       f2fs_submit_merged_write_cond(F2FS_I_SB(inode),
+                                       inode, NULL, 0, DATA);
+                       truncate_inode_pages_final(inode->i_mapping);
+
                        if (F2FS_HAS_BLOCKS(inode))
                                f2fs_truncate(inode);
 
@@ -1048,7 +1055,7 @@ static void f2fs_put_super(struct super_block *sb)
        }
 
        /* be sure to wait for any on-going discard commands */
-       dropped = f2fs_wait_discard_bios(sbi);
+       dropped = f2fs_issue_discard_timeout(sbi);
 
        if ((f2fs_hw_support_discard(sbi) || f2fs_hw_should_discard(sbi)) &&
                                        !sbi->discard_blks && !dropped) {
@@ -1075,7 +1082,10 @@ static void f2fs_put_super(struct super_block *sb)
        f2fs_bug_on(sbi, sbi->fsync_node_num);
 
        iput(sbi->node_inode);
+       sbi->node_inode = NULL;
+
        iput(sbi->meta_inode);
+       sbi->meta_inode = NULL;
 
        /*
         * iput() can update stat information, if f2fs_write_checkpoint()
@@ -1455,9 +1465,16 @@ static int f2fs_enable_quotas(struct super_block *sb);
 
 static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
 {
+       unsigned int s_flags = sbi->sb->s_flags;
        struct cp_control cpc;
-       int err;
+       int err = 0;
+       int ret;
 
+       if (s_flags & SB_RDONLY) {
+               f2fs_msg(sbi->sb, KERN_ERR,
+                               "checkpoint=disable on readonly fs");
+               return -EINVAL;
+       }
        sbi->sb->s_flags |= SB_ACTIVE;
 
        f2fs_update_time(sbi, DISABLE_TIME);
@@ -1465,18 +1482,24 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
        while (!f2fs_time_over(sbi, DISABLE_TIME)) {
                mutex_lock(&sbi->gc_mutex);
                err = f2fs_gc(sbi, true, false, NULL_SEGNO);
-               if (err == -ENODATA)
+               if (err == -ENODATA) {
+                       err = 0;
                        break;
+               }
                if (err && err != -EAGAIN)
-                       return err;
+                       break;
        }
 
-       err = sync_filesystem(sbi->sb);
-       if (err)
-               return err;
+       ret = sync_filesystem(sbi->sb);
+       if (ret || err) {
+               err = ret ? ret: err;
+               goto restore_flag;
+       }
 
-       if (f2fs_disable_cp_again(sbi))
-               return -EAGAIN;
+       if (f2fs_disable_cp_again(sbi)) {
+               err = -EAGAIN;
+               goto restore_flag;
+       }
 
        mutex_lock(&sbi->gc_mutex);
        cpc.reason = CP_PAUSE;
@@ -1485,7 +1508,9 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
 
        sbi->unusable_block_count = 0;
        mutex_unlock(&sbi->gc_mutex);
-       return 0;
+restore_flag:
+       sbi->sb->s_flags = s_flags;     /* Restore MS_RDONLY status */
+       return err;
 }
 
 static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi)
@@ -2023,6 +2048,12 @@ void f2fs_quota_off_umount(struct super_block *sb)
                        set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
                }
        }
+       /*
+        * In case of checkpoint=disable, we must flush quota blocks.
+        * This can cause NULL exception for node_inode in end_io, since
+        * put_super already dropped it.
+        */
+       sync_filesystem(sb);
 }
 
 static void f2fs_truncate_quota_inode_pages(struct super_block *sb)
@@ -2703,6 +2734,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
        sbi->interval_time[DISCARD_TIME] = DEF_IDLE_INTERVAL;
        sbi->interval_time[GC_TIME] = DEF_IDLE_INTERVAL;
        sbi->interval_time[DISABLE_TIME] = DEF_DISABLE_INTERVAL;
+       sbi->interval_time[UMOUNT_DISCARD_TIMEOUT] =
+                               DEF_UMOUNT_DISCARD_TIMEOUT;
        clear_sbi_flag(sbi, SBI_NEED_FSCK);
 
        for (i = 0; i < NR_COUNT_TYPE; i++)
@@ -3022,10 +3055,11 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
        struct f2fs_super_block *raw_super;
        struct inode *root;
        int err;
-       bool retry = true, need_fsck = false;
+       bool skip_recovery = false, need_fsck = false;
        char *options = NULL;
        int recovery, i, valid_super_block;
        struct curseg_info *seg_i;
+       int retry_cnt = 1;
 
 try_onemore:
        err = -EINVAL;
@@ -3097,7 +3131,6 @@ try_onemore:
        sb->s_maxbytes = sbi->max_file_blocks <<
                                le32_to_cpu(raw_super->log_blocksize);
        sb->s_max_links = F2FS_LINK_MAX;
-       get_random_bytes(&sbi->s_next_generation, sizeof(u32));
 
 #ifdef CONFIG_QUOTA
        sb->dq_op = &f2fs_quota_operations;
@@ -3200,6 +3233,10 @@ try_onemore:
 
        if (__is_set_ckpt_flags(F2FS_CKPT(sbi), CP_QUOTA_NEED_FSCK_FLAG))
                set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+       if (__is_set_ckpt_flags(F2FS_CKPT(sbi), CP_DISABLED_QUICK_FLAG)) {
+               set_sbi_flag(sbi, SBI_CP_DISABLED_QUICK);
+               sbi->interval_time[DISABLE_TIME] = DEF_DISABLE_QUICK_INTERVAL;
+       }
 
        /* Initialize device list */
        err = f2fs_scan_devices(sbi);
@@ -3288,7 +3325,7 @@ try_onemore:
        sb->s_root = d_make_root(root); /* allocate root dentry */
        if (!sb->s_root) {
                err = -ENOMEM;
-               goto free_root_inode;
+               goto free_node_inode;
        }
 
        err = f2fs_register_sysfs(sbi);
@@ -3310,7 +3347,7 @@ try_onemore:
                goto free_meta;
 
        if (unlikely(is_set_ckpt_flags(sbi, CP_DISABLED_FLAG)))
-               goto skip_recovery;
+               goto reset_checkpoint;
 
        /* recover fsynced data */
        if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
@@ -3327,11 +3364,13 @@ try_onemore:
                if (need_fsck)
                        set_sbi_flag(sbi, SBI_NEED_FSCK);
 
-               if (!retry)
-                       goto skip_recovery;
+               if (skip_recovery)
+                       goto reset_checkpoint;
 
                err = f2fs_recover_fsync_data(sbi, false);
                if (err < 0) {
+                       if (err != -ENOMEM)
+                               skip_recovery = true;
                        need_fsck = true;
                        f2fs_msg(sb, KERN_ERR,
                                "Cannot recover all fsync data errno=%d", err);
@@ -3347,14 +3386,14 @@ try_onemore:
                        goto free_meta;
                }
        }
-skip_recovery:
+reset_checkpoint:
        /* f2fs_recover_fsync_data() cleared this already */
        clear_sbi_flag(sbi, SBI_POR_DOING);
 
        if (test_opt(sbi, DISABLE_CHECKPOINT)) {
                err = f2fs_disable_checkpoint(sbi);
                if (err)
-                       goto free_meta;
+                       goto sync_free_meta;
        } else if (is_set_ckpt_flags(sbi, CP_DISABLED_FLAG)) {
                f2fs_enable_checkpoint(sbi);
        }
@@ -3367,7 +3406,7 @@ skip_recovery:
                /* After POR, we can run background GC thread.*/
                err = f2fs_start_gc_thread(sbi);
                if (err)
-                       goto free_meta;
+                       goto sync_free_meta;
        }
        kvfree(options);
 
@@ -3387,8 +3426,14 @@ skip_recovery:
                                cur_cp_version(F2FS_CKPT(sbi)));
        f2fs_update_time(sbi, CP_TIME);
        f2fs_update_time(sbi, REQ_TIME);
+       clear_sbi_flag(sbi, SBI_CP_DISABLED_QUICK);
        return 0;
 
+sync_free_meta:
+       /* safe to flush all the data */
+       sync_filesystem(sbi->sb);
+       retry_cnt = 0;
+
 free_meta:
 #ifdef CONFIG_QUOTA
        f2fs_truncate_quota_inode_pages(sb);
@@ -3402,6 +3447,8 @@ free_meta:
         * falls into an infinite loop in f2fs_sync_meta_pages().
         */
        truncate_inode_pages_final(META_MAPPING(sbi));
+       /* evict some inodes being cached by GC */
+       evict_inodes(sb);
        f2fs_unregister_sysfs(sbi);
 free_root_inode:
        dput(sb->s_root);
@@ -3410,6 +3457,7 @@ free_node_inode:
        f2fs_release_ino_entry(sbi, true);
        truncate_inode_pages_final(NODE_MAPPING(sbi));
        iput(sbi->node_inode);
+       sbi->node_inode = NULL;
 free_stats:
        f2fs_destroy_stats(sbi);
 free_nm:
@@ -3422,6 +3470,7 @@ free_devices:
 free_meta_inode:
        make_bad_inode(sbi->meta_inode);
        iput(sbi->meta_inode);
+       sbi->meta_inode = NULL;
 free_io_dummy:
        mempool_destroy(sbi->write_io_dummy);
 free_percpu:
@@ -3443,8 +3492,8 @@ free_sbi:
        kvfree(sbi);
 
        /* give only one another chance */
-       if (retry) {
-               retry = false;
+       if (retry_cnt > 0 && skip_recovery) {
+               retry_cnt--;
                shrink_dcache_sb(sb);
                goto try_onemore;
        }
index 70da6801c86f4e10e140e8394426415fc0209462..729f46a3c9ee0b5831ee18b6e08278a4801ee33a 100644 (file)
@@ -222,6 +222,8 @@ out:
 #ifdef CONFIG_F2FS_FAULT_INJECTION
        if (a->struct_type == FAULT_INFO_TYPE && t >= (1 << FAULT_MAX))
                return -EINVAL;
+       if (a->struct_type == FAULT_INFO_RATE && t >= UINT_MAX)
+               return -EINVAL;
 #endif
        if (a->struct_type == RESERVED_BLOCKS) {
                spin_lock(&sbi->stat_lock);
@@ -278,10 +280,16 @@ out:
                return count;
        }
 
-       *ui = t;
 
-       if (!strcmp(a->attr.name, "iostat_enable") && *ui == 0)
-               f2fs_reset_iostat(sbi);
+       if (!strcmp(a->attr.name, "iostat_enable")) {
+               sbi->iostat_enable = !!t;
+               if (!sbi->iostat_enable)
+                       f2fs_reset_iostat(sbi);
+               return count;
+       }
+
+       *ui = (unsigned int)t;
+
        return count;
 }
 
@@ -418,6 +426,8 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, discard_idle_interval,
                                        interval_time[DISCARD_TIME]);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_idle_interval, interval_time[GC_TIME]);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info,
+               umount_discard_timeout, interval_time[UMOUNT_DISCARD_TIMEOUT]);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_enable, iostat_enable);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, readdir_ra, readdir_ra);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_pin_file_thresh, gc_pin_file_threshold);
@@ -475,6 +485,7 @@ static struct attribute *f2fs_attrs[] = {
        ATTR_LIST(idle_interval),
        ATTR_LIST(discard_idle_interval),
        ATTR_LIST(gc_idle_interval),
+       ATTR_LIST(umount_discard_timeout),
        ATTR_LIST(iostat_enable),
        ATTR_LIST(readdir_ra),
        ATTR_LIST(gc_pin_file_thresh),
index ce2a5eb210b6681f5b5f317314ca9335eb7c9d9b..d0ab533a9ce89590450d509b5a6270b82343d3a5 100644 (file)
@@ -14,7 +14,7 @@
 #include "trace.h"
 
 static RADIX_TREE(pids, GFP_ATOMIC);
-static struct mutex pids_lock;
+static spinlock_t pids_lock;
 static struct last_io_info last_io;
 
 static inline void __print_last_io(void)
@@ -58,23 +58,29 @@ void f2fs_trace_pid(struct page *page)
 
        set_page_private(page, (unsigned long)pid);
 
+retry:
        if (radix_tree_preload(GFP_NOFS))
                return;
 
-       mutex_lock(&pids_lock);
+       spin_lock(&pids_lock);
        p = radix_tree_lookup(&pids, pid);
        if (p == current)
                goto out;
        if (p)
                radix_tree_delete(&pids, pid);
 
-       f2fs_radix_tree_insert(&pids, pid, current);
+       if (radix_tree_insert(&pids, pid, current)) {
+               spin_unlock(&pids_lock);
+               radix_tree_preload_end();
+               cond_resched();
+               goto retry;
+       }
 
        trace_printk("%3x:%3x %4x %-16s\n",
                        MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev),
                        pid, current->comm);
 out:
-       mutex_unlock(&pids_lock);
+       spin_unlock(&pids_lock);
        radix_tree_preload_end();
 }
 
@@ -119,7 +125,7 @@ void f2fs_trace_ios(struct f2fs_io_info *fio, int flush)
 
 void f2fs_build_trace_ios(void)
 {
-       mutex_init(&pids_lock);
+       spin_lock_init(&pids_lock);
 }
 
 #define PIDVEC_SIZE    128
@@ -147,7 +153,7 @@ void f2fs_destroy_trace_ios(void)
        pid_t next_pid = 0;
        unsigned int found;
 
-       mutex_lock(&pids_lock);
+       spin_lock(&pids_lock);
        while ((found = gang_lookup_pids(pid, next_pid, PIDVEC_SIZE))) {
                unsigned idx;
 
@@ -155,5 +161,5 @@ void f2fs_destroy_trace_ios(void)
                for (idx = 0; idx < found; idx++)
                        radix_tree_delete(&pids, pid[idx]);
        }
-       mutex_unlock(&pids_lock);
+       spin_unlock(&pids_lock);
 }
index 18d5ffbc5e8c63a7e9f8370c5dc82deb0f8d7ca8..848a785abe253c0bfea21644407a842f317ea4b7 100644 (file)
@@ -224,11 +224,11 @@ static struct f2fs_xattr_entry *__find_inline_xattr(struct inode *inode,
 {
        struct f2fs_xattr_entry *entry;
        unsigned int inline_size = inline_xattr_size(inode);
+       void *max_addr = base_addr + inline_size;
 
        list_for_each_xattr(entry, base_addr) {
-               if ((void *)entry + sizeof(__u32) > base_addr + inline_size ||
-                       (void *)XATTR_NEXT_ENTRY(entry) + sizeof(__u32) >
-                       base_addr + inline_size) {
+               if ((void *)entry + sizeof(__u32) > max_addr ||
+                       (void *)XATTR_NEXT_ENTRY(entry) > max_addr) {
                        *last_addr = entry;
                        return NULL;
                }
@@ -239,6 +239,13 @@ static struct f2fs_xattr_entry *__find_inline_xattr(struct inode *inode,
                if (!memcmp(entry->e_name, name, len))
                        break;
        }
+
+       /* inline xattr header or entry across max inline xattr size */
+       if (IS_XATTR_LAST_ENTRY(entry) &&
+               (void *)entry + sizeof(__u32) > max_addr) {
+               *last_addr = entry;
+               return NULL;
+       }
        return entry;
 }
 
@@ -340,7 +347,7 @@ check:
        *base_addr = txattr_addr;
        return 0;
 out:
-       kzfree(txattr_addr);
+       kvfree(txattr_addr);
        return err;
 }
 
@@ -383,7 +390,7 @@ static int read_all_xattrs(struct inode *inode, struct page *ipage,
        *base_addr = txattr_addr;
        return 0;
 fail:
-       kzfree(txattr_addr);
+       kvfree(txattr_addr);
        return err;
 }
 
@@ -510,7 +517,7 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name,
        }
        error = size;
 out:
-       kzfree(base_addr);
+       kvfree(base_addr);
        return error;
 }
 
@@ -538,7 +545,7 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
                if (!handler || (handler->list && !handler->list(dentry)))
                        continue;
 
-               prefix = handler->prefix ?: handler->name;
+               prefix = xattr_prefix(handler);
                prefix_len = strlen(prefix);
                size = prefix_len + entry->e_name_len + 1;
                if (buffer) {
@@ -556,7 +563,7 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
        }
        error = buffer_size - rest;
 cleanup:
-       kzfree(base_addr);
+       kvfree(base_addr);
        return error;
 }
 
@@ -687,7 +694,7 @@ static int __f2fs_setxattr(struct inode *inode, int index,
        if (!error && S_ISDIR(inode->i_mode))
                set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_CP);
 exit:
-       kzfree(base_addr);
+       kvfree(base_addr);
        return error;
 }
 
index 67db134da0f5f6da01ff505d956e60069114767d..9172ee082ca830f9159f5a7ac73b3fca52cfa4e4 100644 (file)
@@ -78,6 +78,12 @@ struct f2fs_xattr_entry {
                                sizeof(struct f2fs_xattr_header) -      \
                                sizeof(struct f2fs_xattr_entry))
 
+#define MAX_INLINE_XATTR_SIZE                                          \
+                       (DEF_ADDRS_PER_INODE -                          \
+                       F2FS_TOTAL_EXTRA_ATTR_SIZE / sizeof(__le32) -   \
+                       DEF_INLINE_RESERVED_SIZE -                      \
+                       MIN_INLINE_DENTRY_SIZE / sizeof(__le32))
+
 /*
  * On-disk structure of f2fs_xattr
  * We use inline xattrs space + 1 block for xattr.
index 8247bd1634cb8c80bee4e4658a2e9754a0ebbf8b..7066cd7c7aff33eff193ce22e7eb28c6a39abecb 100644 (file)
@@ -1889,7 +1889,7 @@ lookup_again:
            atomic_read(&lo->plh_outstanding) != 0) {
                spin_unlock(&ino->i_lock);
                lseg = ERR_PTR(wait_var_event_killable(&lo->plh_outstanding,
-                                       atomic_read(&lo->plh_outstanding)));
+                                       !atomic_read(&lo->plh_outstanding)));
                if (IS_ERR(lseg) || !list_empty(&lo->plh_segs))
                        goto out_put_layout_hdr;
                pnfs_put_layout_hdr(lo);
index f5ebdd87afb244fefa0cd3342728e1ade5342d83..ddef482f133406737e09e5df4966aea9b6ec06aa 100644 (file)
@@ -3074,6 +3074,15 @@ static const struct file_operations proc_tgid_base_operations = {
        .llseek         = generic_file_llseek,
 };
 
+struct pid *tgid_pidfd_to_pid(const struct file *file)
+{
+       if (!d_is_dir(file->f_path.dentry) ||
+           (file->f_op != &proc_tgid_base_operations))
+               return ERR_PTR(-EBADF);
+
+       return proc_pid(file_inode(file));
+}
+
 static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
 {
        return proc_pident_lookup(dir, dentry,
index 4cb21b558a856188de12cb33d5b0bba4a85fb551..1b56686ab178dc6d4363ea2f76fde6cff90926da 100644 (file)
@@ -71,9 +71,11 @@ static int sysfs_init_fs_context(struct fs_context *fc)
        kfc->magic = SYSFS_MAGIC;
        fc->fs_private = kfc;
        fc->ops = &sysfs_fs_context_ops;
-       if (fc->user_ns)
-               put_user_ns(fc->user_ns);
-       fc->user_ns = get_user_ns(netns->user_ns);
+       if (netns) {
+               if (fc->user_ns)
+                       put_user_ns(fc->user_ns);
+               fc->user_ns = get_user_ns(netns->user_ns);
+       }
        fc->global = true;
        return 0;
 }
index 9a3767818c507b61377434b971c7309b9e768050..9c2a0a13ed61289b43cb2fbb6b86a29452c0323b 100644 (file)
@@ -563,43 +563,40 @@ xfs_dir3_leaf_find_entry(
  */
 int                                            /* error */
 xfs_dir2_leaf_addname(
-       xfs_da_args_t           *args)          /* operation arguments */
+       struct xfs_da_args      *args)          /* operation arguments */
 {
+       struct xfs_dir3_icleaf_hdr leafhdr;
+       struct xfs_trans        *tp = args->trans;
        __be16                  *bestsp;        /* freespace table in leaf */
-       int                     compact;        /* need to compact leaves */
-       xfs_dir2_data_hdr_t     *hdr;           /* data block header */
+       __be16                  *tagp;          /* end of data entry */
        struct xfs_buf          *dbp;           /* data block buffer */
-       xfs_dir2_data_entry_t   *dep;           /* data block entry */
-       xfs_inode_t             *dp;            /* incore directory inode */
-       xfs_dir2_data_unused_t  *dup;           /* data unused entry */
+       struct xfs_buf          *lbp;           /* leaf's buffer */
+       struct xfs_dir2_leaf    *leaf;          /* leaf structure */
+       struct xfs_inode        *dp = args->dp; /* incore directory inode */
+       struct xfs_dir2_data_hdr *hdr;          /* data block header */
+       struct xfs_dir2_data_entry *dep;        /* data block entry */
+       struct xfs_dir2_leaf_entry *lep;        /* leaf entry table pointer */
+       struct xfs_dir2_leaf_entry *ents;
+       struct xfs_dir2_data_unused *dup;       /* data unused entry */
+       struct xfs_dir2_leaf_tail *ltp;         /* leaf tail pointer */
+       struct xfs_dir2_data_free *bf;          /* bestfree table */
+       int                     compact;        /* need to compact leaves */
        int                     error;          /* error return value */
        int                     grown;          /* allocated new data block */
-       int                     highstale;      /* index of next stale leaf */
+       int                     highstale = 0;  /* index of next stale leaf */
        int                     i;              /* temporary, index */
        int                     index;          /* leaf table position */
-       struct xfs_buf          *lbp;           /* leaf's buffer */
-       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
        int                     length;         /* length of new entry */
-       xfs_dir2_leaf_entry_t   *lep;           /* leaf entry table pointer */
        int                     lfloglow;       /* low leaf logging index */
        int                     lfloghigh;      /* high leaf logging index */
-       int                     lowstale;       /* index of prev stale leaf */
-       xfs_dir2_leaf_tail_t    *ltp;           /* leaf tail pointer */
+       int                     lowstale = 0;   /* index of prev stale leaf */
        int                     needbytes;      /* leaf block bytes needed */
        int                     needlog;        /* need to log data header */
        int                     needscan;       /* need to rescan data free */
-       __be16                  *tagp;          /* end of data entry */
-       xfs_trans_t             *tp;            /* transaction pointer */
        xfs_dir2_db_t           use_block;      /* data block number */
-       struct xfs_dir2_data_free *bf;          /* bestfree table */
-       struct xfs_dir2_leaf_entry *ents;
-       struct xfs_dir3_icleaf_hdr leafhdr;
 
        trace_xfs_dir2_leaf_addname(args);
 
-       dp = args->dp;
-       tp = args->trans;
-
        error = xfs_dir3_leaf_read(tp, dp, args->geo->leafblk, -1, &lbp);
        if (error)
                return error;
index 3b03703c5c3dbb3e3058356f76b0220bd37f300e..16731d2d684be4097277695d9d87937f4b5b0afe 100644 (file)
@@ -426,24 +426,22 @@ xfs_dir2_leaf_to_node(
 static int                                     /* error */
 xfs_dir2_leafn_add(
        struct xfs_buf          *bp,            /* leaf buffer */
-       xfs_da_args_t           *args,          /* operation arguments */
+       struct xfs_da_args      *args,          /* operation arguments */
        int                     index)          /* insertion pt for new entry */
 {
+       struct xfs_dir3_icleaf_hdr leafhdr;
+       struct xfs_inode        *dp = args->dp;
+       struct xfs_dir2_leaf    *leaf = bp->b_addr;
+       struct xfs_dir2_leaf_entry *lep;
+       struct xfs_dir2_leaf_entry *ents;
        int                     compact;        /* compacting stale leaves */
-       xfs_inode_t             *dp;            /* incore directory inode */
-       int                     highstale;      /* next stale entry */
-       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
-       xfs_dir2_leaf_entry_t   *lep;           /* leaf entry */
+       int                     highstale = 0;  /* next stale entry */
        int                     lfloghigh;      /* high leaf entry logging */
        int                     lfloglow;       /* low leaf entry logging */
-       int                     lowstale;       /* previous stale entry */
-       struct xfs_dir3_icleaf_hdr leafhdr;
-       struct xfs_dir2_leaf_entry *ents;
+       int                     lowstale = 0;   /* previous stale entry */
 
        trace_xfs_dir2_leafn_add(args, index);
 
-       dp = args->dp;
-       leaf = bp->b_addr;
        dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
        ents = dp->d_ops->leaf_ents_p(leaf);
 
index 349e5957c9498db170d85c434d9fc037d5d2acba..702967d996bb38edd9eaa3c9f7d3300f38f3b08c 100644 (file)
@@ -74,6 +74,7 @@ enum arch_timer_spi_nr {
 struct arch_timer_kvm_info {
        struct timecounter timecounter;
        int virtual_irq;
+       int physical_irq;
 };
 
 struct arch_timer_mem_frame {
index bb9acea61369396f993a15dae2fc336e2c2b218e..286d58efed5d00afea46b3b346dd672c97631fe9 100644 (file)
@@ -36,6 +36,7 @@ struct drm_fb_helper;
 #include <drm/drm_crtc.h>
 #include <drm/drm_device.h>
 #include <linux/kgdb.h>
+#include <linux/vgaarb.h>
 
 enum mode_set_atomic {
        LEAVE_ATOMIC_MODE_SET,
@@ -642,11 +643,18 @@ drm_fb_helper_remove_conflicting_pci_framebuffers(struct pci_dev *pdev,
                                                  int resource_id,
                                                  const char *name)
 {
+       int ret = 0;
+
+       /*
+        * WARNING: Apparently we must kick fbdev drivers before vgacon,
+        * otherwise the vga fbdev driver falls over.
+        */
 #if IS_REACHABLE(CONFIG_FB)
-       return remove_conflicting_pci_framebuffers(pdev, resource_id, name);
-#else
-       return 0;
+       ret = remove_conflicting_pci_framebuffers(pdev, resource_id, name);
 #endif
+       if (ret == 0)
+               ret = vga_remove_vgacon(pdev);
+       return ret;
 }
 
 #endif
index 33771352dcd6928b449e5c9b77b6a80cee74e6e4..05a18dd265b5538434d07e664e535d45c0f3a1fa 100644 (file)
 #include <linux/clocksource.h>
 #include <linux/hrtimer.h>
 
+enum kvm_arch_timers {
+       TIMER_PTIMER,
+       TIMER_VTIMER,
+       NR_KVM_TIMERS
+};
+
+enum kvm_arch_timer_regs {
+       TIMER_REG_CNT,
+       TIMER_REG_CVAL,
+       TIMER_REG_TVAL,
+       TIMER_REG_CTL,
+};
+
 struct arch_timer_context {
+       struct kvm_vcpu                 *vcpu;
+
        /* Registers: control register, timer value */
        u32                             cnt_ctl;
        u64                             cnt_cval;
@@ -30,30 +45,36 @@ struct arch_timer_context {
        /* Timer IRQ */
        struct kvm_irq_level            irq;
 
+       /* Virtual offset */
+       u64                             cntvoff;
+
+       /* Emulated Timer (may be unused) */
+       struct hrtimer                  hrtimer;
+
        /*
-        * We have multiple paths which can save/restore the timer state
-        * onto the hardware, so we need some way of keeping track of
-        * where the latest state is.
-        *
-        * loaded == true:  State is loaded on the hardware registers.
-        * loaded == false: State is stored in memory.
+        * We have multiple paths which can save/restore the timer state onto
+        * the hardware, so we need some way of keeping track of where the
+        * latest state is.
         */
-       bool                    loaded;
+       bool                            loaded;
 
-       /* Virtual offset */
-       u64                     cntvoff;
+       /* Duplicated state from arch_timer.c for convenience */
+       u32                             host_timer_irq;
+       u32                             host_timer_irq_flags;
+};
+
+struct timer_map {
+       struct arch_timer_context *direct_vtimer;
+       struct arch_timer_context *direct_ptimer;
+       struct arch_timer_context *emul_ptimer;
 };
 
 struct arch_timer_cpu {
-       struct arch_timer_context       vtimer;
-       struct arch_timer_context       ptimer;
+       struct arch_timer_context timers[NR_KVM_TIMERS];
 
        /* Background timer used when the guest is not running */
        struct hrtimer                  bg_timer;
 
-       /* Physical timer emulation */
-       struct hrtimer                  phys_timer;
-
        /* Is the timer enabled */
        bool                    enabled;
 };
@@ -76,9 +97,6 @@ int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr);
 
 bool kvm_timer_is_pending(struct kvm_vcpu *vcpu);
 
-void kvm_timer_schedule(struct kvm_vcpu *vcpu);
-void kvm_timer_unschedule(struct kvm_vcpu *vcpu);
-
 u64 kvm_phys_timer_read(void);
 
 void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu);
@@ -88,7 +106,19 @@ void kvm_timer_init_vhe(void);
 
 bool kvm_arch_timer_get_input_level(int vintid);
 
-#define vcpu_vtimer(v) (&(v)->arch.timer_cpu.vtimer)
-#define vcpu_ptimer(v) (&(v)->arch.timer_cpu.ptimer)
+#define vcpu_timer(v)  (&(v)->arch.timer_cpu)
+#define vcpu_get_timer(v,t)    (&vcpu_timer(v)->timers[(t)])
+#define vcpu_vtimer(v) (&(v)->arch.timer_cpu.timers[TIMER_VTIMER])
+#define vcpu_ptimer(v) (&(v)->arch.timer_cpu.timers[TIMER_PTIMER])
+
+#define arch_timer_ctx_index(ctx)      ((ctx) - vcpu_timer((ctx)->vcpu)->timers)
+
+u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu,
+                             enum kvm_arch_timers tmr,
+                             enum kvm_arch_timer_regs treg);
+void kvm_arm_timer_write_sysreg(struct kvm_vcpu *vcpu,
+                               enum kvm_arch_timers tmr,
+                               enum kvm_arch_timer_regs treg,
+                               u64 val);
 
 #endif
index 6ac47f5ea514ac72d8d9fa80b5a0b8cac369f7a1..d5dcebd7aad334e635bfb12a2abc79e09b2323ba 100644 (file)
@@ -400,12 +400,17 @@ extern bool acpi_osi_is_win8(void);
 
 #ifdef CONFIG_ACPI_NUMA
 int acpi_map_pxm_to_online_node(int pxm);
+int acpi_map_pxm_to_node(int pxm);
 int acpi_get_node(acpi_handle handle);
 #else
 static inline int acpi_map_pxm_to_online_node(int pxm)
 {
        return 0;
 }
+static inline int acpi_map_pxm_to_node(int pxm)
+{
+       return 0;
+}
 static inline int acpi_get_node(acpi_handle handle)
 {
        return 0;
index d143c13bed26db54824255111343f9a3d45781eb..f99b74a6e4cab30040fbb95a3ec8a9e31a4f375d 100644 (file)
 #define AMBA_CID       0xb105f00d
 #define CORESIGHT_CID  0xb105900d
 
+/*
+ * CoreSight Architecture specification updates the ID specification
+ * for components on the AMBA bus. (ARM IHI 0029E)
+ *
+ * Bits 15:12 of the CID are the device class.
+ *
+ * Class 0xF remains for PrimeCell and legacy components. (AMBA_CID above)
+ * Class 0x9 defines the component as CoreSight (CORESIGHT_CID above)
+ * Class 0x0, 0x1, 0xB, 0xE define components that do not have driver support
+ * at present.
+ * Class 0x2-0x8,0xA and 0xD-0xD are presently reserved.
+ *
+ * Remaining CID bits stay as 0xb105-00d
+ */
+
+/**
+ * Class 0x9 components use additional values to form a Unique Component
+ * Identifier (UCI), where peripheral ID values are identical for different
+ * components. Passed to the amba bus code from the component driver via
+ * the amba_id->data pointer.
+ * @devarch    : coresight devarch register value
+ * @devarch_mask: mask bits used for matching. 0 indicates UCI not used.
+ * @devtype    : coresight device type value
+ * @data       : additional driver data. As we have usurped the original
+ *             pointer some devices may still need additional data
+ */
+struct amba_cs_uci_id {
+       unsigned int devarch;
+       unsigned int devarch_mask;
+       unsigned int devtype;
+       void *data;
+};
+
+/* define offsets for registers used by UCI */
+#define UCI_REG_DEVTYPE_OFFSET 0xFCC
+#define UCI_REG_DEVARCH_OFFSET 0xFBC
+
 struct clk;
 
 struct amba_device {
@@ -32,6 +69,8 @@ struct amba_device {
        struct resource         res;
        struct clk              *pclk;
        unsigned int            periphid;
+       unsigned int            cid;
+       struct amba_cs_uci_id   uci;
        unsigned int            irq[AMBA_NR_IRQS];
        char                    *driver_override;
 };
index d7711048ef93b81827f2d593c08afeb5611fc6f0..f5740423b0023e9abf5c5023a5f146a5da59012a 100644 (file)
@@ -116,6 +116,7 @@ struct f2fs_super_block {
 /*
  * For checkpoint
  */
+#define CP_DISABLED_QUICK_FLAG         0x00002000
 #define CP_DISABLED_FLAG               0x00001000
 #define CP_QUOTA_NEED_FSCK_FLAG                0x00000800
 #define CP_LARGE_NAT_BITMAP_FLAG       0x00000400
@@ -186,7 +187,7 @@ struct f2fs_orphan_block {
 struct f2fs_extent {
        __le32 fofs;            /* start file offset of the extent */
        __le32 blk;             /* start block address of the extent */
-       __le32 len;             /* lengh of the extent */
+       __le32 len;             /* length of the extent */
 } __packed;
 
 #define F2FS_NAME_LEN          255
@@ -284,7 +285,7 @@ enum {
 
 struct node_footer {
        __le32 nid;             /* node id */
-       __le32 ino;             /* inode nunmber */
+       __le32 ino;             /* inode number */
        __le32 flag;            /* include cold/fsync/dentry marks and offset */
        __le64 cp_ver;          /* checkpoint version */
        __le32 next_blkaddr;    /* next node page block address */
@@ -489,12 +490,12 @@ typedef __le32    f2fs_hash_t;
 
 /*
  * space utilization of regular dentry and inline dentry (w/o extra reservation)
- *             regular dentry                  inline dentry
- * bitmap      1 * 27 = 27                     1 * 23 = 23
- * reserved    1 * 3 = 3                       1 * 7 = 7
- * dentry      11 * 214 = 2354                 11 * 182 = 2002
- * filename    8 * 214 = 1712                  8 * 182 = 1456
- * total       4096                            3488
+ *             regular dentry          inline dentry (def)     inline dentry (min)
+ * bitmap      1 * 27 = 27             1 * 23 = 23             1 * 1 = 1
+ * reserved    1 * 3 = 3               1 * 7 = 7               1 * 1 = 1
+ * dentry      11 * 214 = 2354         11 * 182 = 2002         11 * 2 = 22
+ * filename    8 * 214 = 1712          8 * 182 = 1456          8 * 2 = 16
+ * total       4096                    3488                    40
  *
  * Note: there are more reserved space in inline dentry than in regular
  * dentry, when converting inline dentry we should handle this carefully.
@@ -506,12 +507,13 @@ typedef __le32    f2fs_hash_t;
 #define SIZE_OF_RESERVED       (PAGE_SIZE - ((SIZE_OF_DIR_ENTRY + \
                                F2FS_SLOT_LEN) * \
                                NR_DENTRY_IN_BLOCK + SIZE_OF_DENTRY_BITMAP))
+#define MIN_INLINE_DENTRY_SIZE         40      /* just include '.' and '..' entries */
 
 /* One directory entry slot representing F2FS_SLOT_LEN-sized file name */
 struct f2fs_dir_entry {
        __le32 hash_code;       /* hash code of file name */
        __le32 ino;             /* inode number */
-       __le16 name_len;        /* lengh of file name */
+       __le16 name_len;        /* length of file name */
        __u8 file_type;         /* file type */
 } __packed;
 
index c38cc5eb7e737d835a2d2f81e502a64daf28460e..9d55c63db09b5dcb9ac997d802cb00ff356d4353 100644 (file)
  */
 #define KVM_MEMSLOT_INVALID    (1UL << 16)
 
+/*
+ * Bit 63 of the memslot generation number is an "update in-progress flag",
+ * e.g. is temporarily set for the duration of install_new_memslots().
+ * This flag effectively creates a unique generation number that is used to
+ * mark cached memslot data, e.g. MMIO accesses, as potentially being stale,
+ * i.e. may (or may not) have come from the previous memslots generation.
+ *
+ * This is necessary because the actual memslots update is not atomic with
+ * respect to the generation number update.  Updating the generation number
+ * first would allow a vCPU to cache a spte from the old memslots using the
+ * new generation number, and updating the generation number after switching
+ * to the new memslots would allow cache hits using the old generation number
+ * to reference the defunct memslots.
+ *
+ * This mechanism is used to prevent getting hits in KVM's caches while a
+ * memslot update is in-progress, and to prevent cache hits *after* updating
+ * the actual generation number against accesses that were inserted into the
+ * cache *before* the memslots were updated.
+ */
+#define KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS     BIT_ULL(63)
+
 /* Two fragments for cross MMIO pages. */
 #define KVM_MAX_MMIO_FRAGMENTS 2
 
@@ -634,7 +655,7 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
                           struct kvm_memory_slot *dont);
 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
                            unsigned long npages);
-void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots);
+void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen);
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
                                struct kvm_memory_slot *memslot,
                                const struct kvm_userspace_memory_region *mem,
@@ -1182,6 +1203,7 @@ extern bool kvm_rebooting;
 
 extern unsigned int halt_poll_ns;
 extern unsigned int halt_poll_ns_grow;
+extern unsigned int halt_poll_ns_grow_start;
 extern unsigned int halt_poll_ns_shrink;
 
 struct kvm_device {
index 43348303cb4bfb6323e9db9fbc7da2eb349eca3f..feb342d026f2e935a3e01515c2dc94852a83ef77 100644 (file)
@@ -130,6 +130,7 @@ struct nd_region_desc {
        void *provider_data;
        int num_lanes;
        int numa_node;
+       int target_node;
        unsigned long flags;
        struct device_node *of_node;
 };
index 181d16601dd9c8bc1fa4112099dea637e962cfd0..56a92e3ae3ae5f0785c74eabcb56ae90a4a10b93 100644 (file)
@@ -296,7 +296,8 @@ struct ntb_dev_ops {
        int (*db_clear_mask)(struct ntb_dev *ntb, u64 db_bits);
 
        int (*peer_db_addr)(struct ntb_dev *ntb,
-                           phys_addr_t *db_addr, resource_size_t *db_size);
+                           phys_addr_t *db_addr, resource_size_t *db_size,
+                               u64 *db_data, int db_bit);
        u64 (*peer_db_read)(struct ntb_dev *ntb);
        int (*peer_db_set)(struct ntb_dev *ntb, u64 db_bits);
        int (*peer_db_clear)(struct ntb_dev *ntb, u64 db_bits);
@@ -1078,6 +1079,8 @@ static inline int ntb_db_clear_mask(struct ntb_dev *ntb, u64 db_bits)
  * @ntb:       NTB device context.
  * @db_addr:   OUT - The address of the peer doorbell register.
  * @db_size:   OUT - The number of bytes to write the peer doorbell register.
+ * @db_data:   OUT - The data of peer doorbell register
+ * @db_bit:            door bell bit number
  *
  * Return the address of the peer doorbell register.  This may be used, for
  * example, by drivers that offload memory copy operations to a dma engine.
@@ -1091,12 +1094,13 @@ static inline int ntb_db_clear_mask(struct ntb_dev *ntb, u64 db_bits)
  */
 static inline int ntb_peer_db_addr(struct ntb_dev *ntb,
                                   phys_addr_t *db_addr,
-                                  resource_size_t *db_size)
+                                  resource_size_t *db_size,
+                                  u64 *db_data, int db_bit)
 {
        if (!ntb->ops->peer_db_addr)
                return -EINVAL;
 
-       return ntb->ops->peer_db_addr(ntb, db_addr, db_size);
+       return ntb->ops->peer_db_addr(ntb, db_addr, db_size, db_data, db_bit);
 }
 
 /**
index b477a70cc2e4587fd828ee921ce0b9ec2138775d..bcf909d0de5f8e53f3fec18eb01c04fdd20ad5d8 100644 (file)
@@ -239,6 +239,7 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping,
 #define FGP_WRITE              0x00000008
 #define FGP_NOFS               0x00000010
 #define FGP_NOWAIT             0x00000020
+#define FGP_FOR_MMAP           0x00000040
 
 struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset,
                int fgp_flags, gfp_t cache_gfp_mask);
index d0e1f1522a78e4213381614f77eea1a0d37d3f9e..52a283ba0465a15766b273199d17b2bacb349527 100644 (file)
@@ -73,6 +73,7 @@ struct proc_dir_entry *proc_create_net_single_write(const char *name, umode_t mo
                                                    int (*show)(struct seq_file *, void *),
                                                    proc_write_t write,
                                                    void *data);
+extern struct pid *tgid_pidfd_to_pid(const struct file *file);
 
 #else /* CONFIG_PROC_FS */
 
@@ -114,6 +115,11 @@ static inline int remove_proc_subtree(const char *name, struct proc_dir_entry *p
 #define proc_create_net(name, mode, parent, state_size, ops) ({NULL;})
 #define proc_create_net_single(name, mode, parent, show, data) ({NULL;})
 
+static inline struct pid *tgid_pidfd_to_pid(const struct file *file)
+{
+       return ERR_PTR(-EBADF);
+}
+
 #endif /* CONFIG_PROC_FS */
 
 struct net;
index f1429675f25283f3e7cb2651a9a4eb8cf2c3788a..1a40277b512c959813f87e000810d40d719b0071 100644 (file)
@@ -128,7 +128,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts,
                    unsigned long *lost_events);
 
 struct ring_buffer_iter *
-ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu);
+ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu, gfp_t flags);
 void ring_buffer_read_prepare_sync(void);
 void ring_buffer_read_start(struct ring_buffer_iter *iter);
 void ring_buffer_read_finish(struct ring_buffer_iter *iter);
index eee0412bdf4b82bb4ddd6063b636d9dabe145fdb..52a079b3a9a6d0d1fb889343c3d75a605a6c8f80 100644 (file)
@@ -248,9 +248,13 @@ struct ntb_ctrl_regs {
                u32 win_size;
                u64 xlate_addr;
        } bar_entry[6];
-       u32 reserved2[216];
-       u32 req_id_table[256];
-       u32 reserved3[512];
+       struct {
+               u32 win_size;
+               u32 reserved[3];
+       } bar_ext_entry[6];
+       u32 reserved2[192];
+       u32 req_id_table[512];
+       u32 reserved3[256];
        u64 lut_entry[512];
 } __packed;
 
index c2962953bf11c399266b765450186bf31689393d..e446806a561f876c51ea7247f852e871bd6510b9 100644 (file)
@@ -985,6 +985,9 @@ asmlinkage long sys_statx(int dfd, const char __user *path, unsigned flags,
                          unsigned mask, struct statx __user *buffer);
 asmlinkage long sys_rseq(struct rseq __user *rseq, uint32_t rseq_len,
                         int flags, uint32_t sig);
+asmlinkage long sys_pidfd_send_signal(int pidfd, int sig,
+                                      siginfo_t __user *info,
+                                      unsigned int flags);
 
 /*
  * Architecture-specific system calls
index ee162e3e879b97889776015c2b9678304f8a61fe..553b34c8b5f700e1db484ca126d32a4c9582f59f 100644 (file)
@@ -125,9 +125,11 @@ extern void vga_put(struct pci_dev *pdev, unsigned int rsrc);
 #ifdef CONFIG_VGA_ARB
 extern struct pci_dev *vga_default_device(void);
 extern void vga_set_default_device(struct pci_dev *pdev);
+extern int vga_remove_vgacon(struct pci_dev *pdev);
 #else
 static inline struct pci_dev *vga_default_device(void) { return NULL; };
 static inline void vga_set_default_device(struct pci_dev *pdev) { };
+static inline int vga_remove_vgacon(struct pci_dev *pdev) { return 0; };
 #endif
 
 /*
index 465d7d033c4c61446bb288bc76f844304bb285d3..18bd8c3ea6053444b21c13b557a4b0097f3995c9 100644 (file)
@@ -750,7 +750,7 @@ static inline snd_pcm_uframes_t snd_pcm_playback_avail(struct snd_pcm_runtime *r
 }
 
 /**
- * snd_pcm_playback_avail - Get the available (readable) space for capture
+ * snd_pcm_capture_avail - Get the available (readable) space for capture
  * @runtime: PCM runtime instance
  *
  * Result is between 0 ... (boundary - 1)
index 3ec73f17ee2a4bc9517f9b8dd92e9c616d67073e..a3916b4dd57ed2dc227ec78437524460616e8ae0 100644 (file)
@@ -149,6 +149,17 @@ TRACE_DEFINE_ENUM(CP_TRIMMED);
                { CP_SPEC_LOG_NUM,      "log type is 2" },              \
                { CP_RECOVER_DIR,       "dir needs recovery" })
 
+#define show_shutdown_mode(type)                                       \
+       __print_symbolic(type,                                          \
+               { F2FS_GOING_DOWN_FULLSYNC,     "full sync" },          \
+               { F2FS_GOING_DOWN_METASYNC,     "meta sync" },          \
+               { F2FS_GOING_DOWN_NOSYNC,       "no sync" },            \
+               { F2FS_GOING_DOWN_METAFLUSH,    "meta flush" },         \
+               { F2FS_GOING_DOWN_NEED_FSCK,    "need fsck" })
+
+struct f2fs_sb_info;
+struct f2fs_io_info;
+struct extent_info;
 struct victim_sel_policy;
 struct f2fs_map_blocks;
 
@@ -533,6 +544,9 @@ TRACE_EVENT(f2fs_map_blocks,
                __field(block_t,        m_lblk)
                __field(block_t,        m_pblk)
                __field(unsigned int,   m_len)
+               __field(unsigned int,   m_flags)
+               __field(int,    m_seg_type)
+               __field(bool,   m_may_create)
                __field(int,    ret)
        ),
 
@@ -542,15 +556,22 @@ TRACE_EVENT(f2fs_map_blocks,
                __entry->m_lblk         = map->m_lblk;
                __entry->m_pblk         = map->m_pblk;
                __entry->m_len          = map->m_len;
+               __entry->m_flags        = map->m_flags;
+               __entry->m_seg_type     = map->m_seg_type;
+               __entry->m_may_create   = map->m_may_create;
                __entry->ret            = ret;
        ),
 
        TP_printk("dev = (%d,%d), ino = %lu, file offset = %llu, "
-               "start blkaddr = 0x%llx, len = 0x%llx, err = %d",
+               "start blkaddr = 0x%llx, len = 0x%llx, flags = %u,"
+               "seg_type = %d, may_create = %d, err = %d",
                show_dev_ino(__entry),
                (unsigned long long)__entry->m_lblk,
                (unsigned long long)__entry->m_pblk,
                (unsigned long long)__entry->m_len,
+               __entry->m_flags,
+               __entry->m_seg_type,
+               __entry->m_may_create,
                __entry->ret)
 );
 
@@ -1616,6 +1637,30 @@ DEFINE_EVENT(f2fs_sync_dirty_inodes, f2fs_sync_dirty_inodes_exit,
        TP_ARGS(sb, type, count)
 );
 
+TRACE_EVENT(f2fs_shutdown,
+
+       TP_PROTO(struct f2fs_sb_info *sbi, unsigned int mode, int ret),
+
+       TP_ARGS(sbi, mode, ret),
+
+       TP_STRUCT__entry(
+               __field(dev_t,  dev)
+               __field(unsigned int, mode)
+               __field(int, ret)
+       ),
+
+       TP_fast_assign(
+               __entry->dev = sbi->sb->s_dev;
+               __entry->mode = mode;
+               __entry->ret = ret;
+       ),
+
+       TP_printk("dev = (%d,%d), mode: %s, ret:%d",
+               show_dev(__entry->dev),
+               show_shutdown_mode(__entry->mode),
+               __entry->ret)
+);
+
 #endif /* _TRACE_F2FS_H */
 
  /* This part must be outside protection */
index 8451f30c6a0fe8fa216c5f57a1ae81e8bded2e91..7e899e635d33abaecc009af281be99f7475f5d85 100644 (file)
@@ -712,7 +712,8 @@ TRACE_EVENT(xprt_transmit,
 
        TP_fast_assign(
                __entry->task_id = rqst->rq_task->tk_pid;
-               __entry->client_id = rqst->rq_task->tk_client->cl_clid;
+               __entry->client_id = rqst->rq_task->tk_client ?
+                       rqst->rq_task->tk_client->cl_clid : -1;
                __entry->xid = be32_to_cpu(rqst->rq_xid);
                __entry->seqno = rqst->rq_seqno;
                __entry->status = status;
@@ -742,7 +743,8 @@ TRACE_EVENT(xprt_enq_xmit,
 
        TP_fast_assign(
                __entry->task_id = task->tk_pid;
-               __entry->client_id = task->tk_client->cl_clid;
+               __entry->client_id = task->tk_client ?
+                       task->tk_client->cl_clid : -1;
                __entry->xid = be32_to_cpu(task->tk_rqstp->rq_xid);
                __entry->seqno = task->tk_rqstp->rq_seqno;
                __entry->stage = stage;
index bf4624efe5e622d3a3d7140e53c0e1e129cf95e9..dee7292e1df6b162a12d0e55e9ccdf875fad428d 100644 (file)
@@ -824,6 +824,8 @@ __SYSCALL(__NR_futex_time64, sys_futex)
 __SYSCALL(__NR_sched_rr_get_interval_time64, sys_sched_rr_get_interval)
 #endif
 
+#define __NR_pidfd_send_signal 424
+__SYSCALL(__NR_pidfd_send_signal, sys_pidfd_send_signal)
 #define __NR_io_uring_setup 425
 __SYSCALL(__NR_io_uring_setup, sys_io_uring_setup)
 #define __NR_io_uring_enter 426
index 6b7654b8001fc4d07dc8bcd4a00eddee31e09891..02ca827b8fac7f110535ba5d6aca8f1c87306def 100644 (file)
@@ -65,6 +65,7 @@ int console_printk[4] = {
        CONSOLE_LOGLEVEL_MIN,           /* minimum_console_loglevel */
        CONSOLE_LOGLEVEL_DEFAULT,       /* default_console_loglevel */
 };
+EXPORT_SYMBOL_GPL(console_printk);
 
 atomic_t ignore_console_lock_warning __read_mostly = ATOMIC_INIT(0);
 EXPORT_SYMBOL(ignore_console_lock_warning);
index e81b17b53fa53aa025ccb8f12c236158cb2e61b2..92190f62ebc53438b7da8fcd2845c7590f002e03 100644 (file)
@@ -382,7 +382,7 @@ static int __walk_iomem_res_desc(resource_size_t start, resource_size_t end,
                                 int (*func)(struct resource *, void *))
 {
        struct resource res;
-       int ret = -1;
+       int ret = -EINVAL;
 
        while (start < end &&
               !find_next_iomem_res(start, end, flags, desc, first_lvl, &res)) {
@@ -452,6 +452,9 @@ int walk_mem_res(u64 start, u64 end, void *arg,
  * This function calls the @func callback against all memory ranges of type
  * System RAM which are marked as IORESOURCE_SYSTEM_RAM and IORESOUCE_BUSY.
  * It is to be used only for System RAM.
+ *
+ * This will find System RAM ranges that are children of top-level resources
+ * in addition to top-level System RAM resources.
  */
 int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
                          void *arg, int (*func)(unsigned long, unsigned long, void *))
@@ -460,14 +463,14 @@ int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
        unsigned long flags;
        struct resource res;
        unsigned long pfn, end_pfn;
-       int ret = -1;
+       int ret = -EINVAL;
 
        start = (u64) start_pfn << PAGE_SHIFT;
        end = ((u64)(start_pfn + nr_pages) << PAGE_SHIFT) - 1;
        flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
        while (start < end &&
               !find_next_iomem_res(start, end, flags, IORES_DESC_NONE,
-                                   true, &res)) {
+                                   false, &res)) {
                pfn = (res.start + PAGE_SIZE - 1) >> PAGE_SHIFT;
                end_pfn = (res.end + 1) >> PAGE_SHIFT;
                if (end_pfn > pfn)
@@ -1128,6 +1131,15 @@ struct resource * __request_region(struct resource *parent,
                conflict = __request_resource(parent, res);
                if (!conflict)
                        break;
+               /*
+                * mm/hmm.c reserves physical addresses which then
+                * become unavailable to other users.  Conflicts are
+                * not expected.  Warn to aid debugging if encountered.
+                */
+               if (conflict->desc == IORES_DESC_DEVICE_PRIVATE_MEMORY) {
+                       pr_warn("Unaddressable device %s %pR conflicts with %pR",
+                               conflict->name, conflict, res);
+               }
                if (conflict != parent) {
                        if (!(conflict->flags & IORESOURCE_BUSY)) {
                                parent = conflict;
index 5d53183e270576a630aae70650e153c0889da8dd..b7953934aa994e7993254aa6b04438815ed37f1f 100644 (file)
@@ -19,7 +19,9 @@
 #include <linux/sched/task.h>
 #include <linux/sched/task_stack.h>
 #include <linux/sched/cputime.h>
+#include <linux/file.h>
 #include <linux/fs.h>
+#include <linux/proc_fs.h>
 #include <linux/tty.h>
 #include <linux/binfmts.h>
 #include <linux/coredump.h>
@@ -3487,6 +3489,16 @@ COMPAT_SYSCALL_DEFINE4(rt_sigtimedwait_time32, compat_sigset_t __user *, uthese,
 #endif
 #endif
 
+static inline void prepare_kill_siginfo(int sig, struct kernel_siginfo *info)
+{
+       clear_siginfo(info);
+       info->si_signo = sig;
+       info->si_errno = 0;
+       info->si_code = SI_USER;
+       info->si_pid = task_tgid_vnr(current);
+       info->si_uid = from_kuid_munged(current_user_ns(), current_uid());
+}
+
 /**
  *  sys_kill - send a signal to a process
  *  @pid: the PID of the process
@@ -3496,16 +3508,125 @@ SYSCALL_DEFINE2(kill, pid_t, pid, int, sig)
 {
        struct kernel_siginfo info;
 
-       clear_siginfo(&info);
-       info.si_signo = sig;
-       info.si_errno = 0;
-       info.si_code = SI_USER;
-       info.si_pid = task_tgid_vnr(current);
-       info.si_uid = from_kuid_munged(current_user_ns(), current_uid());
+       prepare_kill_siginfo(sig, &info);
 
        return kill_something_info(sig, &info, pid);
 }
 
+#ifdef CONFIG_PROC_FS
+/*
+ * Verify that the signaler and signalee either are in the same pid namespace
+ * or that the signaler's pid namespace is an ancestor of the signalee's pid
+ * namespace.
+ */
+static bool access_pidfd_pidns(struct pid *pid)
+{
+       struct pid_namespace *active = task_active_pid_ns(current);
+       struct pid_namespace *p = ns_of_pid(pid);
+
+       for (;;) {
+               if (!p)
+                       return false;
+               if (p == active)
+                       break;
+               p = p->parent;
+       }
+
+       return true;
+}
+
+static int copy_siginfo_from_user_any(kernel_siginfo_t *kinfo, siginfo_t *info)
+{
+#ifdef CONFIG_COMPAT
+       /*
+        * Avoid hooking up compat syscalls and instead handle necessary
+        * conversions here. Note, this is a stop-gap measure and should not be
+        * considered a generic solution.
+        */
+       if (in_compat_syscall())
+               return copy_siginfo_from_user32(
+                       kinfo, (struct compat_siginfo __user *)info);
+#endif
+       return copy_siginfo_from_user(kinfo, info);
+}
+
+/**
+ * sys_pidfd_send_signal - send a signal to a process through a task file
+ *                          descriptor
+ * @pidfd:  the file descriptor of the process
+ * @sig:    signal to be sent
+ * @info:   the signal info
+ * @flags:  future flags to be passed
+ *
+ * The syscall currently only signals via PIDTYPE_PID which covers
+ * kill(<positive-pid>, <signal>. It does not signal threads or process
+ * groups.
+ * In order to extend the syscall to threads and process groups the @flags
+ * argument should be used. In essence, the @flags argument will determine
+ * what is signaled and not the file descriptor itself. Put in other words,
+ * grouping is a property of the flags argument not a property of the file
+ * descriptor.
+ *
+ * Return: 0 on success, negative errno on failure
+ */
+SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
+               siginfo_t __user *, info, unsigned int, flags)
+{
+       int ret;
+       struct fd f;
+       struct pid *pid;
+       kernel_siginfo_t kinfo;
+
+       /* Enforce flags be set to 0 until we add an extension. */
+       if (flags)
+               return -EINVAL;
+
+       f = fdget_raw(pidfd);
+       if (!f.file)
+               return -EBADF;
+
+       /* Is this a pidfd? */
+       pid = tgid_pidfd_to_pid(f.file);
+       if (IS_ERR(pid)) {
+               ret = PTR_ERR(pid);
+               goto err;
+       }
+
+       ret = -EINVAL;
+       if (!access_pidfd_pidns(pid))
+               goto err;
+
+       if (info) {
+               ret = copy_siginfo_from_user_any(&kinfo, info);
+               if (unlikely(ret))
+                       goto err;
+
+               ret = -EINVAL;
+               if (unlikely(sig != kinfo.si_signo))
+                       goto err;
+
+               if ((task_pid(current) != pid) &&
+                   (kinfo.si_code >= 0 || kinfo.si_code == SI_TKILL)) {
+                       /* Only allow sending arbitrary signals to yourself. */
+                       ret = -EPERM;
+                       if (kinfo.si_code != SI_USER)
+                               goto err;
+
+                       /* Turn this into a regular kill signal. */
+                       prepare_kill_siginfo(sig, &kinfo);
+               }
+       } else {
+               prepare_kill_siginfo(sig, &kinfo);
+       }
+
+       ret = kill_pid_info(sig, &kinfo, pid);
+
+err:
+       fdput(f);
+       return ret;
+}
+#endif /* CONFIG_PROC_FS */
+
 static int
 do_send_specific(pid_t tgid, pid_t pid, int sig, struct kernel_siginfo *info)
 {
index 51d7c6794bf11219a24cf19af0258fe65b54b592..d21f4befaea4dcf25ed18b8163f24415fb5bd010 100644 (file)
@@ -168,6 +168,7 @@ COND_SYSCALL(syslog);
 /* kernel/sched/core.c */
 
 /* kernel/signal.c */
+COND_SYSCALL(pidfd_send_signal);
 
 /* kernel/sys.c */
 COND_SYSCALL(setregid);
index fac0ddf8a8e22505749be3064e6b964ba12d4930..e1c6d79fb4cc9f951a524c4535d7c8ee64282a14 100644 (file)
@@ -723,6 +723,7 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
 #endif
        case BLKTRACESTART:
                start = 1;
+               /* fall through */
        case BLKTRACESTOP:
                ret = __blk_trace_startstop(q, start);
                break;
index 9a91479bbbfe13e51085ec2fa7641f4b6b88e89e..41b6f96e5366231d72454e6c33015188066751ae 100644 (file)
@@ -4191,6 +4191,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_consume);
  * ring_buffer_read_prepare - Prepare for a non consuming read of the buffer
  * @buffer: The ring buffer to read from
  * @cpu: The cpu buffer to iterate over
+ * @flags: gfp flags to use for memory allocation
  *
  * This performs the initial preparations necessary to iterate
  * through the buffer.  Memory is allocated, buffer recording
@@ -4208,7 +4209,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_consume);
  * This overall must be paired with ring_buffer_read_finish.
  */
 struct ring_buffer_iter *
-ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu)
+ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu, gfp_t flags)
 {
        struct ring_buffer_per_cpu *cpu_buffer;
        struct ring_buffer_iter *iter;
@@ -4216,7 +4217,7 @@ ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu)
        if (!cpumask_test_cpu(cpu, buffer->cpumask))
                return NULL;
 
-       iter = kmalloc(sizeof(*iter), GFP_KERNEL);
+       iter = kmalloc(sizeof(*iter), flags);
        if (!iter)
                return NULL;
 
index 94ffc1c559d51613fd92c226a61aeb6051148aa7..21153e64bf1c366033213e90272438ba171b2822 100644 (file)
@@ -4079,7 +4079,8 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot)
        if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
                for_each_tracing_cpu(cpu) {
                        iter->buffer_iter[cpu] =
-                               ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
+                               ring_buffer_read_prepare(iter->trace_buffer->buffer,
+                                                        cpu, GFP_KERNEL);
                }
                ring_buffer_read_prepare_sync();
                for_each_tracing_cpu(cpu) {
@@ -4089,7 +4090,8 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot)
        } else {
                cpu = iter->cpu_file;
                iter->buffer_iter[cpu] =
-                       ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
+                       ring_buffer_read_prepare(iter->trace_buffer->buffer,
+                                                cpu, GFP_KERNEL);
                ring_buffer_read_prepare_sync();
                ring_buffer_read_start(iter->buffer_iter[cpu]);
                tracing_iter_reset(iter, cpu);
index d953c163a0794f5cd4578ae6957f0645ff1eaa0d..810d78a8d14c76b02efa3dff91b63e52e4c66feb 100644 (file)
@@ -51,14 +51,16 @@ static void ftrace_dump_buf(int skip_lines, long cpu_file)
        if (cpu_file == RING_BUFFER_ALL_CPUS) {
                for_each_tracing_cpu(cpu) {
                        iter.buffer_iter[cpu] =
-                       ring_buffer_read_prepare(iter.trace_buffer->buffer, cpu);
+                       ring_buffer_read_prepare(iter.trace_buffer->buffer,
+                                                cpu, GFP_ATOMIC);
                        ring_buffer_read_start(iter.buffer_iter[cpu]);
                        tracing_iter_reset(&iter, cpu);
                }
        } else {
                iter.cpu_file = cpu_file;
                iter.buffer_iter[cpu_file] =
-                       ring_buffer_read_prepare(iter.trace_buffer->buffer, cpu_file);
+                       ring_buffer_read_prepare(iter.trace_buffer->buffer,
+                                                cpu_file, GFP_ATOMIC);
                ring_buffer_read_start(iter.buffer_iter[cpu_file]);
                tracing_iter_reset(&iter, cpu_file);
        }
index 99592c27465e1156a0dce1dd2d953aef9ef3e8c8..5d5129b05df782bff2bd5d04764324863d3c6f10 100644 (file)
@@ -35,7 +35,7 @@ static struct dyn_event_operations trace_kprobe_ops = {
        .match = trace_kprobe_match,
 };
 
-/**
+/*
  * Kprobe event core functions
  */
 struct trace_kprobe {
@@ -221,7 +221,7 @@ static struct trace_kprobe *alloc_trace_kprobe(const char *group,
 
        tk->rp.maxactive = maxactive;
 
-       if (!event || !is_good_name(event)) {
+       if (!event || !group) {
                ret = -EINVAL;
                goto error;
        }
@@ -231,11 +231,6 @@ static struct trace_kprobe *alloc_trace_kprobe(const char *group,
        if (!tk->tp.call.name)
                goto error;
 
-       if (!group || !is_good_name(group)) {
-               ret = -EINVAL;
-               goto error;
-       }
-
        tk->tp.class.system = kstrdup(group, GFP_KERNEL);
        if (!tk->tp.class.system)
                goto error;
@@ -624,7 +619,11 @@ static int trace_kprobe_create(int argc, const char *argv[])
        if (event)
                event++;
 
-       if (is_return && isdigit(argv[0][1])) {
+       if (isdigit(argv[0][1])) {
+               if (!is_return) {
+                       pr_info("Maxactive is not for kprobe");
+                       return -EINVAL;
+               }
                if (event)
                        len = event - &argv[0][1] - 1;
                else
@@ -634,8 +633,8 @@ static int trace_kprobe_create(int argc, const char *argv[])
                memcpy(buf, &argv[0][1], len);
                buf[len] = '\0';
                ret = kstrtouint(buf, 0, &maxactive);
-               if (ret) {
-                       pr_info("Failed to parse maxactive.\n");
+               if (ret || !maxactive) {
+                       pr_info("Invalid maxactive number\n");
                        return ret;
                }
                /* kretprobes instances are iterated over via a list. The
@@ -694,9 +693,9 @@ static int trace_kprobe_create(int argc, const char *argv[])
        tk = alloc_trace_kprobe(group, event, addr, symbol, offset, maxactive,
                               argc, is_return);
        if (IS_ERR(tk)) {
-               pr_info("Failed to allocate trace_probe.(%d)\n",
-                       (int)PTR_ERR(tk));
                ret = PTR_ERR(tk);
+               /* This must return -ENOMEM otherwise there is a bug */
+               WARN_ON_ONCE(ret != -ENOMEM);
                goto out;
        }
 
index 89da34b326e3cf05b048f60011a08bc88d42b81f..8f8411e7835fdc0e138924c8ab5c35683894774e 100644 (file)
@@ -13,7 +13,7 @@
 
 #include "trace_probe.h"
 
-const char *reserved_field_names[] = {
+static const char *reserved_field_names[] = {
        "common_type",
        "common_flags",
        "common_preempt_count",
@@ -159,6 +159,7 @@ int traceprobe_parse_event_name(const char **pevent, const char **pgroup,
                                char *buf)
 {
        const char *slash, *event = *pevent;
+       int len;
 
        slash = strchr(event, '/');
        if (slash) {
@@ -171,12 +172,25 @@ int traceprobe_parse_event_name(const char **pevent, const char **pgroup,
                        return -E2BIG;
                }
                strlcpy(buf, event, slash - event + 1);
+               if (!is_good_name(buf)) {
+                       pr_info("Group name must follow the same rules as C identifiers\n");
+                       return -EINVAL;
+               }
                *pgroup = buf;
                *pevent = slash + 1;
+               event = *pevent;
        }
-       if (strlen(event) == 0) {
+       len = strlen(event);
+       if (len == 0) {
                pr_info("Event name is not specified\n");
                return -EINVAL;
+       } else if (len > MAX_EVENT_NAME_LEN) {
+               pr_info("Event name is too long\n");
+               return -E2BIG;
+       }
+       if (!is_good_name(event)) {
+               pr_info("Event name must follow the same rules as C identifiers\n");
+               return -EINVAL;
        }
        return 0;
 }
@@ -548,6 +562,8 @@ int traceprobe_parse_probe_arg(struct trace_probe *tp, int i, char *arg,
 
        body = strchr(arg, '=');
        if (body) {
+               if (body - arg > MAX_ARG_NAME_LEN || body == arg)
+                       return -EINVAL;
                parg->name = kmemdup_nul(arg, body - arg, GFP_KERNEL);
                body++;
        } else {
index 8a63f8bc01bc73ef0229fe735e2238f5a47bb561..2177c206de151c1b4e17b491020d0d9cd0af57a8 100644 (file)
@@ -32,6 +32,7 @@
 #define MAX_TRACE_ARGS         128
 #define MAX_ARGSTR_LEN         63
 #define MAX_ARRAY_LEN          64
+#define MAX_ARG_NAME_LEN       32
 #define MAX_STRING_SIZE                PATH_MAX
 
 /* Reserved field names */
index 9bde07c06362fb9a00196358f333e2408583c78a..be78d99ee6bc2b092c789d59070895b1da9aef8b 100644 (file)
@@ -273,10 +273,7 @@ alloc_trace_uprobe(const char *group, const char *event, int nargs, bool is_ret)
 {
        struct trace_uprobe *tu;
 
-       if (!event || !is_good_name(event))
-               return ERR_PTR(-EINVAL);
-
-       if (!group || !is_good_name(group))
+       if (!event || !group)
                return ERR_PTR(-EINVAL);
 
        tu = kzalloc(SIZEOF_TRACE_UPROBE(nargs), GFP_KERNEL);
@@ -524,8 +521,9 @@ static int trace_uprobe_create(int argc, const char **argv)
 
        tu = alloc_trace_uprobe(group, event, argc, is_return);
        if (IS_ERR(tu)) {
-               pr_info("Failed to allocate trace_uprobe.(%d)\n", (int)PTR_ERR(tu));
                ret = PTR_ERR(tu);
+               /* This must return -ENOMEM otherwise there is a bug */
+               WARN_ON_ONCE(ret != -ENOMEM);
                goto fail_address_parse;
        }
        tu->offset = offset;
index 4e90d443d1b09702d79bc1026872aabf3becc0c9..e723eacf7868d2af4442355b37480cc6f6c36d60 100644 (file)
@@ -39,7 +39,7 @@ endif
 ifeq ($(CONFIG_KERNEL_MODE_NEON),y)
 NEON_FLAGS := -ffreestanding
 ifeq ($(ARCH),arm)
-NEON_FLAGS += -mfloat-abi=softfp -mfpu=neon
+NEON_FLAGS += -march=armv7-a -mfloat-abi=softfp -mfpu=neon
 endif
 CFLAGS_recov_neon_inner.o += $(NEON_FLAGS)
 ifeq ($(ARCH),arm64)
index ec6566ffbd9017d98a10eb37237fc30294bf1a5d..d78f577baef2a17d1d525673d4e6519f4e950fbc 100644 (file)
@@ -1587,6 +1587,9 @@ EXPORT_SYMBOL(find_lock_entry);
  *   @gfp_mask and added to the page cache and the VM's LRU
  *   list. The page is returned locked and with an increased
  *   refcount.
+ * - FGP_FOR_MMAP: Similar to FGP_CREAT, only we want to allow the caller to do
+ *   its own locking dance if the page is already in cache, or unlock the page
+ *   before returning if we had to add the page to pagecache.
  *
  * If FGP_LOCK or FGP_CREAT are specified then the function may sleep even
  * if the GFP flags specified for FGP_CREAT are atomic.
@@ -1641,7 +1644,7 @@ no_page:
                if (!page)
                        return NULL;
 
-               if (WARN_ON_ONCE(!(fgp_flags & FGP_LOCK)))
+               if (WARN_ON_ONCE(!(fgp_flags & (FGP_LOCK | FGP_FOR_MMAP))))
                        fgp_flags |= FGP_LOCK;
 
                /* Init accessed so avoid atomic mark_page_accessed later */
@@ -1655,6 +1658,13 @@ no_page:
                        if (err == -EEXIST)
                                goto repeat;
                }
+
+               /*
+                * add_to_page_cache_lru locks the page, and for mmap we expect
+                * an unlocked page.
+                */
+               if (page && (fgp_flags & FGP_FOR_MMAP))
+                       unlock_page(page);
        }
 
        return page;
@@ -2379,64 +2389,98 @@ out:
 EXPORT_SYMBOL(generic_file_read_iter);
 
 #ifdef CONFIG_MMU
-/**
- * page_cache_read - adds requested page to the page cache if not already there
- * @file:      file to read
- * @offset:    page index
- * @gfp_mask:  memory allocation flags
- *
- * This adds the requested page to the page cache if it isn't already there,
- * and schedules an I/O to read in its contents from disk.
- *
- * Return: %0 on success, negative error code otherwise.
- */
-static int page_cache_read(struct file *file, pgoff_t offset, gfp_t gfp_mask)
+#define MMAP_LOTSAMISS  (100)
+static struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf,
+                                            struct file *fpin)
 {
-       struct address_space *mapping = file->f_mapping;
-       struct page *page;
-       int ret;
+       int flags = vmf->flags;
 
-       do {
-               page = __page_cache_alloc(gfp_mask);
-               if (!page)
-                       return -ENOMEM;
+       if (fpin)
+               return fpin;
 
-               ret = add_to_page_cache_lru(page, mapping, offset, gfp_mask);
-               if (ret == 0)
-                       ret = mapping->a_ops->readpage(file, page);
-               else if (ret == -EEXIST)
-                       ret = 0; /* losing race to add is OK */
+       /*
+        * FAULT_FLAG_RETRY_NOWAIT means we don't want to wait on page locks or
+        * anything, so we only pin the file and drop the mmap_sem if only
+        * FAULT_FLAG_ALLOW_RETRY is set.
+        */
+       if ((flags & (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT)) ==
+           FAULT_FLAG_ALLOW_RETRY) {
+               fpin = get_file(vmf->vma->vm_file);
+               up_read(&vmf->vma->vm_mm->mmap_sem);
+       }
+       return fpin;
+}
 
-               put_page(page);
+/*
+ * lock_page_maybe_drop_mmap - lock the page, possibly dropping the mmap_sem
+ * @vmf - the vm_fault for this fault.
+ * @page - the page to lock.
+ * @fpin - the pointer to the file we may pin (or is already pinned).
+ *
+ * This works similar to lock_page_or_retry in that it can drop the mmap_sem.
+ * It differs in that it actually returns the page locked if it returns 1 and 0
+ * if it couldn't lock the page.  If we did have to drop the mmap_sem then fpin
+ * will point to the pinned file and needs to be fput()'ed at a later point.
+ */
+static int lock_page_maybe_drop_mmap(struct vm_fault *vmf, struct page *page,
+                                    struct file **fpin)
+{
+       if (trylock_page(page))
+               return 1;
 
-       } while (ret == AOP_TRUNCATED_PAGE);
+       /*
+        * NOTE! This will make us return with VM_FAULT_RETRY, but with
+        * the mmap_sem still held. That's how FAULT_FLAG_RETRY_NOWAIT
+        * is supposed to work. We have way too many special cases..
+        */
+       if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT)
+               return 0;
 
-       return ret;
+       *fpin = maybe_unlock_mmap_for_io(vmf, *fpin);
+       if (vmf->flags & FAULT_FLAG_KILLABLE) {
+               if (__lock_page_killable(page)) {
+                       /*
+                        * We didn't have the right flags to drop the mmap_sem,
+                        * but all fault_handlers only check for fatal signals
+                        * if we return VM_FAULT_RETRY, so we need to drop the
+                        * mmap_sem here and return 0 if we don't have a fpin.
+                        */
+                       if (*fpin == NULL)
+                               up_read(&vmf->vma->vm_mm->mmap_sem);
+                       return 0;
+               }
+       } else
+               __lock_page(page);
+       return 1;
 }
 
-#define MMAP_LOTSAMISS  (100)
 
 /*
- * Synchronous readahead happens when we don't even find
- * a page in the page cache at all.
+ * Synchronous readahead happens when we don't even find a page in the page
+ * cache at all.  We don't want to perform IO under the mmap sem, so if we have
+ * to drop the mmap sem we return the file that was pinned in order for us to do
+ * that.  If we didn't pin a file then we return NULL.  The file that is
+ * returned needs to be fput()'ed when we're done with it.
  */
-static void do_sync_mmap_readahead(struct vm_fault *vmf)
+static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
 {
        struct file *file = vmf->vma->vm_file;
        struct file_ra_state *ra = &file->f_ra;
        struct address_space *mapping = file->f_mapping;
+       struct file *fpin = NULL;
        pgoff_t offset = vmf->pgoff;
 
        /* If we don't want any read-ahead, don't bother */
        if (vmf->vma->vm_flags & VM_RAND_READ)
-               return;
+               return fpin;
        if (!ra->ra_pages)
-               return;
+               return fpin;
 
        if (vmf->vma->vm_flags & VM_SEQ_READ) {
+               fpin = maybe_unlock_mmap_for_io(vmf, fpin);
                page_cache_sync_readahead(mapping, ra, file, offset,
                                          ra->ra_pages);
-               return;
+               return fpin;
        }
 
        /* Avoid banging the cache line if not needed */
@@ -2448,37 +2492,44 @@ static void do_sync_mmap_readahead(struct vm_fault *vmf)
         * stop bothering with read-ahead. It will only hurt.
         */
        if (ra->mmap_miss > MMAP_LOTSAMISS)
-               return;
+               return fpin;
 
        /*
         * mmap read-around
         */
+       fpin = maybe_unlock_mmap_for_io(vmf, fpin);
        ra->start = max_t(long, 0, offset - ra->ra_pages / 2);
        ra->size = ra->ra_pages;
        ra->async_size = ra->ra_pages / 4;
        ra_submit(ra, mapping, file);
+       return fpin;
 }
 
 /*
  * Asynchronous readahead happens when we find the page and PG_readahead,
- * so we want to possibly extend the readahead further..
+ * so we want to possibly extend the readahead further.  We return the file that
+ * was pinned if we have to drop the mmap_sem in order to do IO.
  */
-static void do_async_mmap_readahead(struct vm_fault *vmf,
-                                   struct page *page)
+static struct file *do_async_mmap_readahead(struct vm_fault *vmf,
+                                           struct page *page)
 {
        struct file *file = vmf->vma->vm_file;
        struct file_ra_state *ra = &file->f_ra;
        struct address_space *mapping = file->f_mapping;
+       struct file *fpin = NULL;
        pgoff_t offset = vmf->pgoff;
 
        /* If we don't want any read-ahead, don't bother */
        if (vmf->vma->vm_flags & VM_RAND_READ)
-               return;
+               return fpin;
        if (ra->mmap_miss > 0)
                ra->mmap_miss--;
-       if (PageReadahead(page))
+       if (PageReadahead(page)) {
+               fpin = maybe_unlock_mmap_for_io(vmf, fpin);
                page_cache_async_readahead(mapping, ra, file,
                                           page, offset, ra->ra_pages);
+       }
+       return fpin;
 }
 
 /**
@@ -2510,6 +2561,7 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
 {
        int error;
        struct file *file = vmf->vma->vm_file;
+       struct file *fpin = NULL;
        struct address_space *mapping = file->f_mapping;
        struct file_ra_state *ra = &file->f_ra;
        struct inode *inode = mapping->host;
@@ -2531,23 +2583,26 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
                 * We found the page, so try async readahead before
                 * waiting for the lock.
                 */
-               do_async_mmap_readahead(vmf, page);
+               fpin = do_async_mmap_readahead(vmf, page);
        } else if (!page) {
                /* No page in the page cache at all */
-               do_sync_mmap_readahead(vmf);
                count_vm_event(PGMAJFAULT);
                count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT);
                ret = VM_FAULT_MAJOR;
+               fpin = do_sync_mmap_readahead(vmf);
 retry_find:
-               page = find_get_page(mapping, offset);
-               if (!page)
-                       goto no_cached_page;
+               page = pagecache_get_page(mapping, offset,
+                                         FGP_CREAT|FGP_FOR_MMAP,
+                                         vmf->gfp_mask);
+               if (!page) {
+                       if (fpin)
+                               goto out_retry;
+                       return vmf_error(-ENOMEM);
+               }
        }
 
-       if (!lock_page_or_retry(page, vmf->vma->vm_mm, vmf->flags)) {
-               put_page(page);
-               return ret | VM_FAULT_RETRY;
-       }
+       if (!lock_page_maybe_drop_mmap(vmf, page, &fpin))
+               goto out_retry;
 
        /* Did it get truncated? */
        if (unlikely(page->mapping != mapping)) {
@@ -2564,6 +2619,16 @@ retry_find:
        if (unlikely(!PageUptodate(page)))
                goto page_not_uptodate;
 
+       /*
+        * We've made it this far and we had to drop our mmap_sem, now is the
+        * time to return to the upper layer and have it re-find the vma and
+        * redo the fault.
+        */
+       if (fpin) {
+               unlock_page(page);
+               goto out_retry;
+       }
+
        /*
         * Found the page and have a reference on it.
         * We must recheck i_size under page lock.
@@ -2578,28 +2643,6 @@ retry_find:
        vmf->page = page;
        return ret | VM_FAULT_LOCKED;
 
-no_cached_page:
-       /*
-        * We're only likely to ever get here if MADV_RANDOM is in
-        * effect.
-        */
-       error = page_cache_read(file, offset, vmf->gfp_mask);
-
-       /*
-        * The page we want has now been added to the page cache.
-        * In the unlikely event that someone removed it in the
-        * meantime, we'll just come back here and read it again.
-        */
-       if (error >= 0)
-               goto retry_find;
-
-       /*
-        * An error return from page_cache_read can result if the
-        * system is low on memory, or a problem occurs while trying
-        * to schedule I/O.
-        */
-       return vmf_error(error);
-
 page_not_uptodate:
        /*
         * Umm, take care of errors if the page isn't up-to-date.
@@ -2608,12 +2651,15 @@ page_not_uptodate:
         * and we need to check for errors.
         */
        ClearPageError(page);
+       fpin = maybe_unlock_mmap_for_io(vmf, fpin);
        error = mapping->a_ops->readpage(file, page);
        if (!error) {
                wait_on_page_locked(page);
                if (!PageUptodate(page))
                        error = -EIO;
        }
+       if (fpin)
+               goto out_retry;
        put_page(page);
 
        if (!error || error == AOP_TRUNCATED_PAGE)
@@ -2622,6 +2668,18 @@ page_not_uptodate:
        /* Things didn't work out. Return zero to tell the mm layer so. */
        shrink_readahead_size_eio(file, ra);
        return VM_FAULT_SIGBUS;
+
+out_retry:
+       /*
+        * We dropped the mmap_sem, we need to return to the fault handler to
+        * re-find the vma and come back and find our hopefully still populated
+        * page.
+        */
+       if (page)
+               put_page(page);
+       if (fpin)
+               fput(fpin);
+       return ret | VM_FAULT_RETRY;
 }
 EXPORT_SYMBOL(filemap_fault);
 
index cd23c081924deed7774f98407748becf817c21b0..f767582af4f8c0f28102f2d77d8dc6a667ec3df5 100644 (file)
@@ -101,28 +101,24 @@ u64 max_mem_size = U64_MAX;
 /* add this memory to iomem resource */
 static struct resource *register_memory_resource(u64 start, u64 size)
 {
-       struct resource *res, *conflict;
+       struct resource *res;
+       unsigned long flags =  IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
+       char *resource_name = "System RAM";
 
        if (start + size > max_mem_size)
                return ERR_PTR(-E2BIG);
 
-       res = kzalloc(sizeof(struct resource), GFP_KERNEL);
-       if (!res)
-               return ERR_PTR(-ENOMEM);
-
-       res->name = "System RAM";
-       res->start = start;
-       res->end = start + size - 1;
-       res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
-       conflict =  request_resource_conflict(&iomem_resource, res);
-       if (conflict) {
-               if (conflict->desc == IORES_DESC_DEVICE_PRIVATE_MEMORY) {
-                       pr_debug("Device unaddressable memory block "
-                                "memory hotplug at %#010llx !\n",
-                                (unsigned long long)start);
-               }
-               pr_debug("System RAM resource %pR cannot be added\n", res);
-               kfree(res);
+       /*
+        * Request ownership of the new memory range.  This might be
+        * a child of an existing resource that was present but
+        * not marked as busy.
+        */
+       res = __request_region(&iomem_resource, start, size,
+                              resource_name, flags);
+
+       if (!res) {
+               pr_debug("Unable to reserve System RAM region: %016llx->%016llx\n",
+                               start, start + size);
                return ERR_PTR(-EEXIST);
        }
        return res;
index 357214a51f13853ba1bac594f35938bdeacfd232..b85d51f4b8ebe969e167f03379084633e34b1549 100644 (file)
@@ -1061,7 +1061,7 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
                p9_debug(P9_DEBUG_ERROR,
                         "Please specify a msize of at least 4k\n");
                err = -EINVAL;
-               goto free_client;
+               goto close_trans;
        }
 
        err = p9_client_version(clnt);
index e2fbf3677b9baf3fa99ba98485f73caf0118f249..29420ebb8f070acfa01c11b979285ecd6c2c82a2 100644 (file)
@@ -513,7 +513,7 @@ static void xen_9pfs_front_changed(struct xenbus_device *dev,
        case XenbusStateClosed:
                if (dev->state == XenbusStateClosed)
                        break;
-               /* Missed the backend's CLOSING state -- fallthrough */
+               /* fall through - Missed the backend's CLOSING state */
        case XenbusStateClosing:
                xenbus_frontend_closed(dev);
                break;
index 4216fe33204a8e3f2634cae5ed2329335b1885da..228970e6e52ba8b407be724d055976dd67530f81 100644 (file)
@@ -1730,7 +1730,12 @@ call_allocate(struct rpc_task *task)
        req->rq_callsize = RPC_CALLHDRSIZE + (auth->au_cslack << 1) +
                           proc->p_arglen;
        req->rq_callsize <<= 2;
-       req->rq_rcvsize = RPC_REPHDRSIZE + auth->au_rslack + proc->p_replen;
+       /*
+        * Note: the reply buffer must at minimum allocate enough space
+        * for the 'struct accepted_reply' from RFC5531.
+        */
+       req->rq_rcvsize = RPC_REPHDRSIZE + auth->au_rslack + \
+                       max_t(size_t, proc->p_replen, 2);
        req->rq_rcvsize <<= 2;
 
        status = xprt->ops->buf_alloc(task);
@@ -2387,9 +2392,6 @@ call_decode(struct rpc_task *task)
        WARN_ON(memcmp(&req->rq_rcv_buf, &req->rq_private_buf,
                                sizeof(req->rq_rcv_buf)) != 0);
 
-       if (req->rq_rcv_buf.len < 12)
-               goto out_retry;
-
        xdr_init_decode(&xdr, &req->rq_rcv_buf,
                        req->rq_rcv_buf.head[0].iov_base, req);
        switch (rpc_decode_header(task, &xdr)) {
@@ -2400,7 +2402,6 @@ call_decode(struct rpc_task *task)
                        task->tk_pid, __func__, task->tk_status);
                return;
        case -EAGAIN:
-out_retry:
                task->tk_status = 0;
                /* Note: rpc_decode_header() may have freed the RPC slot */
                if (task->tk_rqstp == req) {
@@ -2449,7 +2450,7 @@ static noinline int
 rpc_decode_header(struct rpc_task *task, struct xdr_stream *xdr)
 {
        struct rpc_clnt *clnt = task->tk_client;
-       int error = -EACCES;
+       int error;
        __be32 *p;
 
        /* RFC-1014 says that the representation of XDR data must be a
@@ -2458,7 +2459,7 @@ rpc_decode_header(struct rpc_task *task, struct xdr_stream *xdr)
         *   undefined results
         */
        if (task->tk_rqstp->rq_rcv_buf.len & 3)
-               goto out_badlen;
+               goto out_unparsable;
 
        p = xdr_inline_decode(xdr, 3 * sizeof(*p));
        if (!p)
@@ -2492,10 +2493,12 @@ rpc_decode_header(struct rpc_task *task, struct xdr_stream *xdr)
                error = -EOPNOTSUPP;
                goto out_err;
        case rpc_garbage_args:
+       case rpc_system_err:
                trace_rpc__garbage_args(task);
+               error = -EIO;
                break;
        default:
-               trace_rpc__unparsable(task);
+               goto out_unparsable;
        }
 
 out_garbage:
@@ -2509,11 +2512,6 @@ out_err:
        rpc_exit(task, error);
        return error;
 
-out_badlen:
-       trace_rpc__unparsable(task);
-       error = -EIO;
-       goto out_err;
-
 out_unparsable:
        trace_rpc__unparsable(task);
        error = -EIO;
@@ -2524,6 +2522,7 @@ out_verifier:
        goto out_garbage;
 
 out_msg_denied:
+       error = -EACCES;
        p = xdr_inline_decode(xdr, sizeof(*p));
        if (!p)
                goto out_unparsable;
@@ -2535,9 +2534,7 @@ out_msg_denied:
                error = -EPROTONOSUPPORT;
                goto out_err;
        default:
-               trace_rpc__unparsable(task);
-               error = -EIO;
-               goto out_err;
+               goto out_unparsable;
        }
 
        p = xdr_inline_decode(xdr, sizeof(*p));
@@ -2572,8 +2569,7 @@ out_msg_denied:
                        task->tk_xprt->servername);
                break;
        default:
-               trace_rpc__unparsable(task);
-               error = -EIO;
+               goto out_unparsable;
        }
        goto out_err;
 }
index e096c5a725dff123a347741bffcfdef13227218f..d7117d24146017e477f0e4ec39dbc6f2c08b11e1 100644 (file)
@@ -664,7 +664,7 @@ void xprt_disconnect_done(struct rpc_xprt *xprt)
        spin_lock_bh(&xprt->transport_lock);
        xprt_clear_connected(xprt);
        xprt_clear_write_space_locked(xprt);
-       xprt_wake_pending_tasks(xprt, -EAGAIN);
+       xprt_wake_pending_tasks(xprt, -ENOTCONN);
        spin_unlock_bh(&xprt->transport_lock);
 }
 EXPORT_SYMBOL_GPL(xprt_disconnect_done);
index 42f45d33dc5675ce5980c994a891f310bcd7c8b8..9359539907bafb7ca1c13ad51485756ac89ce5d0 100644 (file)
@@ -453,7 +453,7 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
                        goto out;
                if (ret != want)
                        goto out;
-       } else
+       } else if (offset < seek_init)
                offset = seek_init;
        ret = -EMSGSIZE;
 out:
index f5dd288d1a7a39d9454bd1cf94a8e744601c07c1..76e9b41fcea2cb662966d325f20f48d9ce6b8a95 100644 (file)
@@ -95,7 +95,10 @@ void snd_hdac_stream_start(struct hdac_stream *azx_dev, bool fresh_start)
                              1 << azx_dev->index,
                              1 << azx_dev->index);
        /* set stripe control */
-       stripe_ctl = snd_hdac_get_stream_stripe_ctl(bus, azx_dev->substream);
+       if (azx_dev->substream)
+               stripe_ctl = snd_hdac_get_stream_stripe_ctl(bus, azx_dev->substream);
+       else
+               stripe_ctl = 0;
        snd_hdac_stream_updateb(azx_dev, SD_CTL_3B, SD_CTL_STRIPE_MASK,
                                stripe_ctl);
        /* set DMA start and interrupt mask */
index dbd8da5685cb5bb8ad18c4e366795ef0fa8c73d4..3d68f9ef769445814733e5650b5209aa16c8a7d8 100644 (file)
@@ -219,7 +219,6 @@ disable_hda:
        return rc;
 }
 
-#ifdef CONFIG_PM_SLEEP
 static void hda_tegra_disable_clocks(struct hda_tegra *data)
 {
        clk_disable_unprepare(data->hda2hdmi_clk);
@@ -230,7 +229,7 @@ static void hda_tegra_disable_clocks(struct hda_tegra *data)
 /*
  * power management
  */
-static int hda_tegra_suspend(struct device *dev)
+static int __maybe_unused hda_tegra_suspend(struct device *dev)
 {
        struct snd_card *card = dev_get_drvdata(dev);
        int rc;
@@ -243,7 +242,7 @@ static int hda_tegra_suspend(struct device *dev)
        return 0;
 }
 
-static int hda_tegra_resume(struct device *dev)
+static int __maybe_unused hda_tegra_resume(struct device *dev)
 {
        struct snd_card *card = dev_get_drvdata(dev);
        int rc;
@@ -255,10 +254,8 @@ static int hda_tegra_resume(struct device *dev)
 
        return 0;
 }
-#endif /* CONFIG_PM_SLEEP */
 
-#ifdef CONFIG_PM
-static int hda_tegra_runtime_suspend(struct device *dev)
+static int __maybe_unused hda_tegra_runtime_suspend(struct device *dev)
 {
        struct snd_card *card = dev_get_drvdata(dev);
        struct azx *chip = card->private_data;
@@ -275,7 +272,7 @@ static int hda_tegra_runtime_suspend(struct device *dev)
        return 0;
 }
 
-static int hda_tegra_runtime_resume(struct device *dev)
+static int __maybe_unused hda_tegra_runtime_resume(struct device *dev)
 {
        struct snd_card *card = dev_get_drvdata(dev);
        struct azx *chip = card->private_data;
@@ -292,7 +289,6 @@ static int hda_tegra_runtime_resume(struct device *dev)
 
        return 0;
 }
-#endif /* CONFIG_PM */
 
 static const struct dev_pm_ops hda_tegra_pm = {
        SET_SYSTEM_SLEEP_PM_OPS(hda_tegra_suspend, hda_tegra_resume)
index a4ee7656d9ee904e6f6de6a048cfbbe3a52ce027..fb65ad31e86c84b751abc07d3bd3af8bd8e4a8fa 100644 (file)
@@ -936,6 +936,9 @@ static const struct snd_pci_quirk cxt5066_fixups[] = {
        SND_PCI_QUIRK(0x103c, 0x8299, "HP 800 G3 SFF", CXT_FIXUP_HP_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x103c, 0x829a, "HP 800 G3 DM", CXT_FIXUP_HP_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x103c, 0x8455, "HP Z2 G4", CXT_FIXUP_HP_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x103c, 0x8456, "HP Z2 G4 SFF", CXT_FIXUP_HP_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x103c, 0x8457, "HP Z2 G4 mini", CXT_FIXUP_HP_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x103c, 0x8458, "HP Z2 G4 mini premium", CXT_FIXUP_HP_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1043, 0x138d, "Asus", CXT_FIXUP_HEADPHONE_MIC_PIN),
        SND_PCI_QUIRK(0x152d, 0x0833, "OLPC XO-1.5", CXT_FIXUP_OLPC_XO),
        SND_PCI_QUIRK(0x17aa, 0x20f2, "Lenovo T400", CXT_PINCFG_LENOVO_TP410),
index 73d7042ff8841b9c68216be084905be0d91ee660..8b3ac690efa368bb0929c06f0085b46635ed5a0a 100644 (file)
@@ -57,10 +57,11 @@ MODULE_PARM_DESC(static_hdmi_pcm, "Don't restrict PCM parameters per ELD info");
 #define is_geminilake(codec) (((codec)->core.vendor_id == 0x8086280d) || \
                                ((codec)->core.vendor_id == 0x80862800))
 #define is_cannonlake(codec) ((codec)->core.vendor_id == 0x8086280c)
+#define is_icelake(codec) ((codec)->core.vendor_id == 0x8086280f)
 #define is_haswell_plus(codec) (is_haswell(codec) || is_broadwell(codec) \
                                || is_skylake(codec) || is_broxton(codec) \
-                               || is_kabylake(codec)) || is_geminilake(codec) \
-                               || is_cannonlake(codec)
+                               || is_kabylake(codec) || is_geminilake(codec) \
+                               || is_cannonlake(codec) || is_icelake(codec))
 #define is_valleyview(codec) ((codec)->core.vendor_id == 0x80862882)
 #define is_cherryview(codec) ((codec)->core.vendor_id == 0x80862883)
 #define is_valleyview_plus(codec) (is_valleyview(codec) || is_cherryview(codec))
@@ -181,6 +182,8 @@ struct hdmi_spec {
 
        struct hdac_chmap chmap;
        hda_nid_t vendor_nid;
+       const int *port_map;
+       int port_num;
 };
 
 #ifdef CONFIG_SND_HDA_COMPONENT
@@ -2418,12 +2421,11 @@ static void intel_haswell_fixup_connect_list(struct hda_codec *codec,
        snd_hda_override_conn_list(codec, nid, spec->num_cvts, spec->cvt_nids);
 }
 
-#define INTEL_VENDOR_NID 0x08
-#define INTEL_GLK_VENDOR_NID 0x0B
-#define INTEL_GET_VENDOR_VERB 0xf81
-#define INTEL_SET_VENDOR_VERB 0x781
-#define INTEL_EN_DP12                  0x02 /* enable DP 1.2 features */
-#define INTEL_EN_ALL_PIN_CVTS  0x01 /* enable 2nd & 3rd pins and convertors */
+#define INTEL_GET_VENDOR_VERB  0xf81
+#define INTEL_GET_VENDOR_VERB  0xf81
+#define INTEL_SET_VENDOR_VERB  0x781
+#define INTEL_EN_DP12          0x02    /* enable DP 1.2 features */
+#define INTEL_EN_ALL_PIN_CVTS  0x01    /* enable 2nd & 3rd pins and convertors */
 
 static void intel_haswell_enable_all_pins(struct hda_codec *codec,
                                          bool update_tree)
@@ -2503,11 +2505,29 @@ static int intel_base_nid(struct hda_codec *codec)
 
 static int intel_pin2port(void *audio_ptr, int pin_nid)
 {
-       int base_nid = intel_base_nid(audio_ptr);
+       struct hda_codec *codec = audio_ptr;
+       struct hdmi_spec *spec = codec->spec;
+       int base_nid, i;
 
-       if (WARN_ON(pin_nid < base_nid || pin_nid >= base_nid + 3))
-               return -1;
-       return pin_nid - base_nid + 1; /* intel port is 1-based */
+       if (!spec->port_num) {
+               base_nid = intel_base_nid(codec);
+               if (WARN_ON(pin_nid < base_nid || pin_nid >= base_nid + 3))
+                       return -1;
+               return pin_nid - base_nid + 1; /* intel port is 1-based */
+       }
+
+       /*
+        * looking for the pin number in the mapping table and return
+        * the index which indicate the port number
+        */
+       for (i = 0; i < spec->port_num; i++) {
+               if (pin_nid == spec->port_map[i])
+                       return i + 1;
+       }
+
+       /* return -1 if pin number exceeds our expectation */
+       codec_info(codec, "Can't find the HDMI/DP port for pin %d\n", pin_nid);
+       return -1;
 }
 
 static void intel_pin_eld_notify(void *audio_ptr, int port, int pipe)
@@ -2608,7 +2628,8 @@ static int parse_intel_hdmi(struct hda_codec *codec)
 }
 
 /* Intel Haswell and onwards; audio component with eld notifier */
-static int intel_hsw_common_init(struct hda_codec *codec, hda_nid_t vendor_nid)
+static int intel_hsw_common_init(struct hda_codec *codec, hda_nid_t vendor_nid,
+                                const int *port_map, int port_num)
 {
        struct hdmi_spec *spec;
        int err;
@@ -2620,6 +2641,8 @@ static int intel_hsw_common_init(struct hda_codec *codec, hda_nid_t vendor_nid)
        codec->dp_mst = true;
        spec->dyn_pcm_assign = true;
        spec->vendor_nid = vendor_nid;
+       spec->port_map = port_map;
+       spec->port_num = port_num;
 
        intel_haswell_enable_all_pins(codec, true);
        intel_haswell_fixup_enable_dp12(codec);
@@ -2638,12 +2661,23 @@ static int intel_hsw_common_init(struct hda_codec *codec, hda_nid_t vendor_nid)
 
 static int patch_i915_hsw_hdmi(struct hda_codec *codec)
 {
-       return intel_hsw_common_init(codec, INTEL_VENDOR_NID);
+       return intel_hsw_common_init(codec, 0x08, NULL, 0);
 }
 
 static int patch_i915_glk_hdmi(struct hda_codec *codec)
 {
-       return intel_hsw_common_init(codec, INTEL_GLK_VENDOR_NID);
+       return intel_hsw_common_init(codec, 0x0b, NULL, 0);
+}
+
+static int patch_i915_icl_hdmi(struct hda_codec *codec)
+{
+       /*
+        * pin to port mapping table where the value indicate the pin number and
+        * the index indicate the port number with 1 base.
+        */
+       static const int map[] = {0x4, 0x6, 0x8, 0xa, 0xb};
+
+       return intel_hsw_common_init(codec, 0x02, map, ARRAY_SIZE(map));
 }
 
 /* Intel Baytrail and Braswell; with eld notifier */
@@ -3886,6 +3920,7 @@ HDA_CODEC_ENTRY(0x11069f81, "VX900 HDMI/DP",      patch_via_hdmi),
 HDA_CODEC_ENTRY(0x11069f84, "VX11 HDMI/DP",    patch_generic_hdmi),
 HDA_CODEC_ENTRY(0x11069f85, "VX11 HDMI/DP",    patch_generic_hdmi),
 HDA_CODEC_ENTRY(0x80860054, "IbexPeak HDMI",   patch_i915_cpt_hdmi),
+HDA_CODEC_ENTRY(0x80862800, "Geminilake HDMI", patch_i915_glk_hdmi),
 HDA_CODEC_ENTRY(0x80862801, "Bearlake HDMI",   patch_generic_hdmi),
 HDA_CODEC_ENTRY(0x80862802, "Cantiga HDMI",    patch_generic_hdmi),
 HDA_CODEC_ENTRY(0x80862803, "Eaglelake HDMI",  patch_generic_hdmi),
@@ -3899,7 +3934,7 @@ HDA_CODEC_ENTRY(0x8086280a, "Broxton HDMI",       patch_i915_hsw_hdmi),
 HDA_CODEC_ENTRY(0x8086280b, "Kabylake HDMI",   patch_i915_hsw_hdmi),
 HDA_CODEC_ENTRY(0x8086280c, "Cannonlake HDMI", patch_i915_glk_hdmi),
 HDA_CODEC_ENTRY(0x8086280d, "Geminilake HDMI", patch_i915_glk_hdmi),
-HDA_CODEC_ENTRY(0x80862800, "Geminilake HDMI", patch_i915_glk_hdmi),
+HDA_CODEC_ENTRY(0x8086280f, "Icelake HDMI",    patch_i915_icl_hdmi),
 HDA_CODEC_ENTRY(0x80862880, "CedarTrail HDMI", patch_generic_hdmi),
 HDA_CODEC_ENTRY(0x80862882, "Valleyview2 HDMI",        patch_i915_byt_hdmi),
 HDA_CODEC_ENTRY(0x80862883, "Braswell HDMI",   patch_i915_byt_hdmi),
index c8413d44973ca38e62e0d008dcc34b938f9553ec..384719d5c44ec89158e781b63a912734e3d24216 100644 (file)
@@ -5491,7 +5491,7 @@ static void alc_headset_btn_callback(struct hda_codec *codec,
        jack->jack->button_state = report;
 }
 
-static void alc_fixup_headset_jack(struct hda_codec *codec,
+static void alc295_fixup_chromebook(struct hda_codec *codec,
                                    const struct hda_fixup *fix, int action)
 {
 
@@ -5501,6 +5501,16 @@ static void alc_fixup_headset_jack(struct hda_codec *codec,
                                                    alc_headset_btn_callback);
                snd_hda_jack_add_kctl(codec, 0x55, "Headset Jack", false,
                                      SND_JACK_HEADSET, alc_headset_btn_keymap);
+               switch (codec->core.vendor_id) {
+               case 0x10ec0295:
+                       alc_update_coef_idx(codec, 0x4a, 0x8000, 1 << 15); /* Reset HP JD */
+                       alc_update_coef_idx(codec, 0x4a, 0x8000, 0 << 15);
+                       break;
+               case 0x10ec0236:
+                       alc_update_coef_idx(codec, 0x1b, 0x8000, 1 << 15); /* Reset HP JD */
+                       alc_update_coef_idx(codec, 0x1b, 0x8000, 0 << 15);
+                       break;
+               }
                break;
        case HDA_FIXUP_ACT_INIT:
                switch (codec->core.vendor_id) {
@@ -5670,9 +5680,13 @@ enum {
        ALC294_FIXUP_ASUS_MIC,
        ALC294_FIXUP_ASUS_HEADSET_MIC,
        ALC294_FIXUP_ASUS_SPK,
-       ALC225_FIXUP_HEADSET_JACK,
        ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE,
        ALC285_FIXUP_LENOVO_PC_BEEP_IN_NOISE,
+       ALC255_FIXUP_ACER_HEADSET_MIC,
+       ALC295_FIXUP_CHROME_BOOK,
+       ALC225_FIXUP_DELL_WYSE_AIO_MIC_NO_PRESENCE,
+       ALC225_FIXUP_WYSE_AUTO_MUTE,
+       ALC225_FIXUP_WYSE_DISABLE_MIC_VREF,
 };
 
 static const struct hda_fixup alc269_fixups[] = {
@@ -6615,9 +6629,9 @@ static const struct hda_fixup alc269_fixups[] = {
                .chained = true,
                .chain_id = ALC294_FIXUP_ASUS_HEADSET_MIC
        },
-       [ALC225_FIXUP_HEADSET_JACK] = {
+       [ALC295_FIXUP_CHROME_BOOK] = {
                .type = HDA_FIXUP_FUNC,
-               .v.func = alc_fixup_headset_jack,
+               .v.func = alc295_fixup_chromebook,
        },
        [ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE] = {
                .type = HDA_FIXUP_PINS,
@@ -6639,6 +6653,38 @@ static const struct hda_fixup alc269_fixups[] = {
                .chained = true,
                .chain_id = ALC285_FIXUP_LENOVO_HEADPHONE_NOISE
        },
+       [ALC255_FIXUP_ACER_HEADSET_MIC] = {
+               .type = HDA_FIXUP_PINS,
+               .v.pins = (const struct hda_pintbl[]) {
+                       { 0x19, 0x03a11130 },
+                       { 0x1a, 0x90a60140 }, /* use as internal mic */
+                       { }
+               },
+               .chained = true,
+               .chain_id = ALC255_FIXUP_HEADSET_MODE_NO_HP_MIC
+       },
+       [ALC225_FIXUP_DELL_WYSE_AIO_MIC_NO_PRESENCE] = {
+               .type = HDA_FIXUP_PINS,
+               .v.pins = (const struct hda_pintbl[]) {
+                       { 0x16, 0x01011020 }, /* Rear Line out */
+                       { 0x19, 0x01a1913c }, /* use as Front headset mic, without its own jack detect */
+                       { }
+               },
+               .chained = true,
+               .chain_id = ALC225_FIXUP_WYSE_AUTO_MUTE
+       },
+       [ALC225_FIXUP_WYSE_AUTO_MUTE] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = alc_fixup_auto_mute_via_amp,
+               .chained = true,
+               .chain_id = ALC225_FIXUP_WYSE_DISABLE_MIC_VREF
+       },
+       [ALC225_FIXUP_WYSE_DISABLE_MIC_VREF] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = alc_fixup_disable_mic_vref,
+               .chained = true,
+               .chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC
+       },
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -6658,6 +6704,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1025, 0x128f, "Acer Veriton Z6860G", ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1025, 0x1290, "Acer Veriton Z4860G", ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1025, 0x1291, "Acer Veriton Z4660G", ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1025, 0x1330, "Acer TravelMate X514-51T", ALC255_FIXUP_ACER_HEADSET_MIC),
        SND_PCI_QUIRK(0x1028, 0x0470, "Dell M101z", ALC269_FIXUP_DELL_M101Z),
        SND_PCI_QUIRK(0x1028, 0x054b, "Dell XPS one 2710", ALC275_FIXUP_DELL_XPS),
        SND_PCI_QUIRK(0x1028, 0x05bd, "Dell Latitude E6440", ALC292_FIXUP_DELL_E7X),
@@ -6702,6 +6749,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1028, 0x0871, "Dell Precision 3630", ALC255_FIXUP_DELL_HEADSET_MIC),
        SND_PCI_QUIRK(0x1028, 0x0872, "Dell Precision 3630", ALC255_FIXUP_DELL_HEADSET_MIC),
        SND_PCI_QUIRK(0x1028, 0x0873, "Dell Precision 3930", ALC255_FIXUP_DUMMY_LINEOUT_VERB),
+       SND_PCI_QUIRK(0x1028, 0x08ad, "Dell WYSE AIO", ALC225_FIXUP_DELL_WYSE_AIO_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1028, 0x08ae, "Dell WYSE NB", ALC225_FIXUP_DELL1_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1028, 0x0935, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB),
        SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
@@ -6764,11 +6813,13 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x103c, 0x2336, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
        SND_PCI_QUIRK(0x103c, 0x2337, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
        SND_PCI_QUIRK(0x103c, 0x221c, "HP EliteBook 755 G2", ALC280_FIXUP_HP_HEADSET_MIC),
+       SND_PCI_QUIRK(0x103c, 0x802e, "HP Z240 SFF", ALC221_FIXUP_HP_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x103c, 0x802f, "HP Z240", ALC221_FIXUP_HP_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x103c, 0x820d, "HP Pavilion 15", ALC269_FIXUP_HP_MUTE_LED_MIC3),
        SND_PCI_QUIRK(0x103c, 0x8256, "HP", ALC221_FIXUP_HP_FRONT_MIC),
        SND_PCI_QUIRK(0x103c, 0x827e, "HP x360", ALC295_FIXUP_HP_X360),
-       SND_PCI_QUIRK(0x103c, 0x82bf, "HP", ALC221_FIXUP_HP_MIC_NO_PRESENCE),
-       SND_PCI_QUIRK(0x103c, 0x82c0, "HP", ALC221_FIXUP_HP_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x103c, 0x82bf, "HP G3 mini", ALC221_FIXUP_HP_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x103c, 0x82c0, "HP G3 mini premium", ALC221_FIXUP_HP_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x103c, 0x83b9, "HP Spectre x360", ALC269_FIXUP_HP_MUTE_LED_MIC3),
        SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC),
        SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300),
@@ -7048,7 +7099,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = {
        {.id = ALC255_FIXUP_DUMMY_LINEOUT_VERB, .name = "alc255-dummy-lineout"},
        {.id = ALC255_FIXUP_DELL_HEADSET_MIC, .name = "alc255-dell-headset"},
        {.id = ALC295_FIXUP_HP_X360, .name = "alc295-hp-x360"},
-       {.id = ALC225_FIXUP_HEADSET_JACK, .name = "alc-sense-combo"},
+       {.id = ALC295_FIXUP_CHROME_BOOK, .name = "alc-sense-combo"},
        {}
 };
 #define ALC225_STANDARD_PINS \
index b0f8979ff2d2f0763e40f300ffc8d98ebb560dbf..221adf68bd0cb7a67f077a32f07bf7ee770a6071 100644 (file)
@@ -104,7 +104,12 @@ static int init_urbs(struct usb_stream_kernel *sk, unsigned use_packsize,
 
        for (u = 0; u < USB_STREAM_NURBS; ++u) {
                sk->inurb[u] = usb_alloc_urb(sk->n_o_ps, GFP_KERNEL);
+               if (!sk->inurb[u])
+                       return -ENOMEM;
+
                sk->outurb[u] = usb_alloc_urb(sk->n_o_ps, GFP_KERNEL);
+               if (!sk->outurb[u])
+                       return -ENOMEM;
        }
 
        if (init_pipe_urbs(sk, use_packsize, sk->inurb, indata, dev, in_pipe) ||
index 10ddf223055ba6c572bb4efd8032c7f8f83ba13e..e1286d2cdfbf928d9f419996baa90ac95e50f6af 100644 (file)
@@ -35,6 +35,8 @@ obj-$(CONFIG_DAX) += dax.o
 endif
 obj-$(CONFIG_DEV_DAX) += device_dax.o
 obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o
+obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem_core.o
+obj-$(CONFIG_DEV_DAX_PMEM_COMPAT) += dax_pmem_compat.o
 
 nfit-y := $(ACPI_SRC)/core.o
 nfit-y += $(ACPI_SRC)/intel.o
@@ -57,6 +59,7 @@ nd_e820-y := $(NVDIMM_SRC)/e820.o
 nd_e820-y += config_check.o
 
 dax-y := $(DAX_SRC)/super.o
+dax-y += $(DAX_SRC)/bus.o
 dax-y += config_check.o
 
 device_dax-y := $(DAX_SRC)/device.o
@@ -64,7 +67,9 @@ device_dax-y += dax-dev.o
 device_dax-y += device_dax_test.o
 device_dax-y += config_check.o
 
-dax_pmem-y := $(DAX_SRC)/pmem.o
+dax_pmem-y := $(DAX_SRC)/pmem/pmem.o
+dax_pmem_core-y := $(DAX_SRC)/pmem/core.o
+dax_pmem_compat-y := $(DAX_SRC)/pmem/compat.o
 dax_pmem-y += config_check.o
 
 libnvdimm-y := $(NVDIMM_SRC)/core.o
index 36ee3d8797c3bcdac889d2fa32d4c2280906f845..f36e708265b850fe5c423f7d0ea95456206dfe2f 100644 (file)
 phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff,
                unsigned long size)
 {
-       struct resource *res;
+       struct resource *res = &dev_dax->region->res;
        phys_addr_t addr;
-       int i;
 
-       for (i = 0; i < dev_dax->num_resources; i++) {
-               res = &dev_dax->res[i];
-               addr = pgoff * PAGE_SIZE + res->start;
-               if (addr >= res->start && addr <= res->end)
-                       break;
-               pgoff -= PHYS_PFN(resource_size(res));
-       }
-
-       if (i < dev_dax->num_resources) {
-               res = &dev_dax->res[i];
+       addr = pgoff * PAGE_SIZE + res->start;
+       if (addr >= res->start && addr <= res->end) {
                if (addr + size - 1 <= res->end) {
                        if (get_nfit_res(addr)) {
                                struct page *page;
@@ -44,6 +35,5 @@ phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff,
                                return addr;
                }
        }
-
        return -1;
 }
index fb5758ac469e6f89045d57aa5dc93fb9c016267f..971fc84281179a7860a41c7d6f1cec398b441408 100644 (file)
@@ -32,6 +32,7 @@ TARGETS += net
 TARGETS += netfilter
 TARGETS += networking/timestamping
 TARGETS += nsfs
+TARGETS += pidfd
 TARGETS += powerpc
 TARGETS += proc
 TARGETS += pstore
index 6210ba41c29e90111644f1715abf7b3ff9a1f5b4..2689d1ea6d7aab48474e027b8b6a8fb28822e431 100644 (file)
@@ -3,6 +3,7 @@
 /x86_64/platform_info_test
 /x86_64/set_sregs_test
 /x86_64/sync_regs_test
+/x86_64/vmx_close_while_nested_test
 /x86_64/vmx_tsc_adjust_test
 /x86_64/state_test
 /dirty_log_test
index f9a0e99384805a417100f4d8b34db3e4def88679..3c1f4bdf90000c7f163fd72a2974bfabf982c47f 100644 (file)
@@ -16,6 +16,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test
 TEST_GEN_PROGS_x86_64 += x86_64/state_test
 TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
 TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
+TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test
 TEST_GEN_PROGS_x86_64 += dirty_log_test
 TEST_GEN_PROGS_x86_64 += clear_dirty_log_test
 
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c
new file mode 100644 (file)
index 0000000..6edec6f
--- /dev/null
@@ -0,0 +1,95 @@
+/*
+ * vmx_close_while_nested
+ *
+ * Copyright (C) 2019, Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Verify that nothing bad happens if a KVM user exits with open
+ * file descriptors while executing a nested guest.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "kselftest.h"
+
+#define VCPU_ID                5
+
+enum {
+       PORT_L0_EXIT = 0x2000,
+};
+
+/* The virtual machine object. */
+static struct kvm_vm *vm;
+
+static void l2_guest_code(void)
+{
+       /* Exit to L0 */
+        asm volatile("inb %%dx, %%al"
+                     : : [port] "d" (PORT_L0_EXIT) : "rax");
+}
+
+static void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+       uint32_t control;
+       uintptr_t save_cr3;
+
+       GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+       GUEST_ASSERT(load_vmcs(vmx_pages));
+
+       /* Prepare the VMCS for L2 execution. */
+       prepare_vmcs(vmx_pages, l2_guest_code,
+                    &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+       GUEST_ASSERT(!vmlaunch());
+       GUEST_ASSERT(0);
+}
+
+int main(int argc, char *argv[])
+{
+       struct vmx_pages *vmx_pages;
+       vm_vaddr_t vmx_pages_gva;
+       struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
+
+       if (!(entry->ecx & CPUID_VMX)) {
+               fprintf(stderr, "nested VMX not enabled, skipping test\n");
+               exit(KSFT_SKIP);
+       }
+
+       vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
+       vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+
+       /* Allocate VMX pages and shared descriptors (vmx_pages). */
+       vmx_pages = vcpu_alloc_vmx(vm, &vmx_pages_gva);
+       vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+
+       for (;;) {
+               volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+               struct ucall uc;
+
+               vcpu_run(vm, VCPU_ID);
+               TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+                           "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
+                           run->exit_reason,
+                           exit_reason_str(run->exit_reason));
+
+               if (run->io.port == PORT_L0_EXIT)
+                       break;
+
+               switch (get_ucall(vm, VCPU_ID, &uc)) {
+               case UCALL_ABORT:
+                       TEST_ASSERT(false, "%s", (const char *)uc.args[0]);
+                       /* NOT REACHED */
+               default:
+                       TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd);
+               }
+       }
+}
diff --git a/tools/testing/selftests/pidfd/Makefile b/tools/testing/selftests/pidfd/Makefile
new file mode 100644 (file)
index 0000000..deaf807
--- /dev/null
@@ -0,0 +1,6 @@
+CFLAGS += -g -I../../../../usr/include/
+
+TEST_GEN_PROGS := pidfd_test
+
+include ../lib.mk
+
diff --git a/tools/testing/selftests/pidfd/pidfd_test.c b/tools/testing/selftests/pidfd/pidfd_test.c
new file mode 100644 (file)
index 0000000..d59378a
--- /dev/null
@@ -0,0 +1,381 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/types.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syscall.h>
+#include <sys/mount.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "../kselftest.h"
+
+static inline int sys_pidfd_send_signal(int pidfd, int sig, siginfo_t *info,
+                                       unsigned int flags)
+{
+       return syscall(__NR_pidfd_send_signal, pidfd, sig, info, flags);
+}
+
+static int signal_received;
+
+static void set_signal_received_on_sigusr1(int sig)
+{
+       if (sig == SIGUSR1)
+               signal_received = 1;
+}
+
+/*
+ * Straightforward test to see whether pidfd_send_signal() works is to send
+ * a signal to ourself.
+ */
+static int test_pidfd_send_signal_simple_success(void)
+{
+       int pidfd, ret;
+       const char *test_name = "pidfd_send_signal send SIGUSR1";
+
+       pidfd = open("/proc/self", O_DIRECTORY | O_CLOEXEC);
+       if (pidfd < 0)
+               ksft_exit_fail_msg(
+                       "%s test: Failed to open process file descriptor\n",
+                       test_name);
+
+       signal(SIGUSR1, set_signal_received_on_sigusr1);
+
+       ret = sys_pidfd_send_signal(pidfd, SIGUSR1, NULL, 0);
+       close(pidfd);
+       if (ret < 0)
+               ksft_exit_fail_msg("%s test: Failed to send signal\n",
+                                  test_name);
+
+       if (signal_received != 1)
+               ksft_exit_fail_msg("%s test: Failed to receive signal\n",
+                                  test_name);
+
+       signal_received = 0;
+       ksft_test_result_pass("%s test: Sent signal\n", test_name);
+       return 0;
+}
+
+static int wait_for_pid(pid_t pid)
+{
+       int status, ret;
+
+again:
+       ret = waitpid(pid, &status, 0);
+       if (ret == -1) {
+               if (errno == EINTR)
+                       goto again;
+
+               return -1;
+       }
+
+       if (ret != pid)
+               goto again;
+
+       if (!WIFEXITED(status))
+               return -1;
+
+       return WEXITSTATUS(status);
+}
+
+static int test_pidfd_send_signal_exited_fail(void)
+{
+       int pidfd, ret, saved_errno;
+       char buf[256];
+       pid_t pid;
+       const char *test_name = "pidfd_send_signal signal exited process";
+
+       pid = fork();
+       if (pid < 0)
+               ksft_exit_fail_msg("%s test: Failed to create new process\n",
+                                  test_name);
+
+       if (pid == 0)
+               _exit(EXIT_SUCCESS);
+
+       snprintf(buf, sizeof(buf), "/proc/%d", pid);
+
+       pidfd = open(buf, O_DIRECTORY | O_CLOEXEC);
+
+       (void)wait_for_pid(pid);
+
+       if (pidfd < 0)
+               ksft_exit_fail_msg(
+                       "%s test: Failed to open process file descriptor\n",
+                       test_name);
+
+       ret = sys_pidfd_send_signal(pidfd, 0, NULL, 0);
+       saved_errno = errno;
+       close(pidfd);
+       if (ret == 0)
+               ksft_exit_fail_msg(
+                       "%s test: Managed to send signal to process even though it should have failed\n",
+                       test_name);
+
+       if (saved_errno != ESRCH)
+               ksft_exit_fail_msg(
+                       "%s test: Expected to receive ESRCH as errno value but received %d instead\n",
+                       test_name, saved_errno);
+
+       ksft_test_result_pass("%s test: Failed to send signal as expected\n",
+                             test_name);
+       return 0;
+}
+
+/*
+ * The kernel reserves 300 pids via RESERVED_PIDS in kernel/pid.c
+ * That means, when it wraps around any pid < 300 will be skipped.
+ * So we need to use a pid > 300 in order to test recycling.
+ */
+#define PID_RECYCLE 1000
+
+/*
+ * Maximum number of cycles we allow. This is equivalent to PID_MAX_DEFAULT.
+ * If users set a higher limit or we have cycled PIDFD_MAX_DEFAULT number of
+ * times then we skip the test to not go into an infinite loop or block for a
+ * long time.
+ */
+#define PIDFD_MAX_DEFAULT 0x8000
+
+/*
+ * Define a few custom error codes for the child process to clearly indicate
+ * what is happening. This way we can tell the difference between a system
+ * error, a test error, etc.
+ */
+#define PIDFD_PASS 0
+#define PIDFD_FAIL 1
+#define PIDFD_ERROR 2
+#define PIDFD_SKIP 3
+#define PIDFD_XFAIL 4
+
+static int test_pidfd_send_signal_recycled_pid_fail(void)
+{
+       int i, ret;
+       pid_t pid1;
+       const char *test_name = "pidfd_send_signal signal recycled pid";
+
+       ret = unshare(CLONE_NEWPID);
+       if (ret < 0)
+               ksft_exit_fail_msg("%s test: Failed to unshare pid namespace\n",
+                                  test_name);
+
+       ret = unshare(CLONE_NEWNS);
+       if (ret < 0)
+               ksft_exit_fail_msg(
+                       "%s test: Failed to unshare mount namespace\n",
+                       test_name);
+
+       ret = mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0);
+       if (ret < 0)
+               ksft_exit_fail_msg("%s test: Failed to remount / private\n",
+                                  test_name);
+
+       /* pid 1 in new pid namespace */
+       pid1 = fork();
+       if (pid1 < 0)
+               ksft_exit_fail_msg("%s test: Failed to create new process\n",
+                                  test_name);
+
+       if (pid1 == 0) {
+               char buf[256];
+               pid_t pid2;
+               int pidfd = -1;
+
+               (void)umount2("/proc", MNT_DETACH);
+               ret = mount("proc", "/proc", "proc", 0, NULL);
+               if (ret < 0)
+                       _exit(PIDFD_ERROR);
+
+               /* grab pid PID_RECYCLE */
+               for (i = 0; i <= PIDFD_MAX_DEFAULT; i++) {
+                       pid2 = fork();
+                       if (pid2 < 0)
+                               _exit(PIDFD_ERROR);
+
+                       if (pid2 == 0)
+                               _exit(PIDFD_PASS);
+
+                       if (pid2 == PID_RECYCLE) {
+                               snprintf(buf, sizeof(buf), "/proc/%d", pid2);
+                               ksft_print_msg("pid to recycle is %d\n", pid2);
+                               pidfd = open(buf, O_DIRECTORY | O_CLOEXEC);
+                       }
+
+                       if (wait_for_pid(pid2))
+                               _exit(PIDFD_ERROR);
+
+                       if (pid2 >= PID_RECYCLE)
+                               break;
+               }
+
+               /*
+                * We want to be as predictable as we can so if we haven't been
+                * able to grab pid PID_RECYCLE skip the test.
+                */
+               if (pid2 != PID_RECYCLE) {
+                       /* skip test */
+                       close(pidfd);
+                       _exit(PIDFD_SKIP);
+               }
+
+               if (pidfd < 0)
+                       _exit(PIDFD_ERROR);
+
+               for (i = 0; i <= PIDFD_MAX_DEFAULT; i++) {
+                       char c;
+                       int pipe_fds[2];
+                       pid_t recycled_pid;
+                       int child_ret = PIDFD_PASS;
+
+                       ret = pipe2(pipe_fds, O_CLOEXEC);
+                       if (ret < 0)
+                               _exit(PIDFD_ERROR);
+
+                       recycled_pid = fork();
+                       if (recycled_pid < 0)
+                               _exit(PIDFD_ERROR);
+
+                       if (recycled_pid == 0) {
+                               close(pipe_fds[1]);
+                               (void)read(pipe_fds[0], &c, 1);
+                               close(pipe_fds[0]);
+
+                               _exit(PIDFD_PASS);
+                       }
+
+                       /*
+                        * Stop the child so we can inspect whether we have
+                        * recycled pid PID_RECYCLE.
+                        */
+                       close(pipe_fds[0]);
+                       ret = kill(recycled_pid, SIGSTOP);
+                       close(pipe_fds[1]);
+                       if (ret) {
+                               (void)wait_for_pid(recycled_pid);
+                               _exit(PIDFD_ERROR);
+                       }
+
+                       /*
+                        * We have recycled the pid. Try to signal it. This
+                        * needs to fail since this is a different process than
+                        * the one the pidfd refers to.
+                        */
+                       if (recycled_pid == PID_RECYCLE) {
+                               ret = sys_pidfd_send_signal(pidfd, SIGCONT,
+                                                           NULL, 0);
+                               if (ret && errno == ESRCH)
+                                       child_ret = PIDFD_XFAIL;
+                               else
+                                       child_ret = PIDFD_FAIL;
+                       }
+
+                       /* let the process move on */
+                       ret = kill(recycled_pid, SIGCONT);
+                       if (ret)
+                               (void)kill(recycled_pid, SIGKILL);
+
+                       if (wait_for_pid(recycled_pid))
+                               _exit(PIDFD_ERROR);
+
+                       switch (child_ret) {
+                       case PIDFD_FAIL:
+                               /* fallthrough */
+                       case PIDFD_XFAIL:
+                               _exit(child_ret);
+                       case PIDFD_PASS:
+                               break;
+                       default:
+                               /* not reached */
+                               _exit(PIDFD_ERROR);
+                       }
+
+                       /*
+                        * If the user set a custom pid_max limit we could be
+                        * in the millions.
+                        * Skip the test in this case.
+                        */
+                       if (recycled_pid > PIDFD_MAX_DEFAULT)
+                               _exit(PIDFD_SKIP);
+               }
+
+               /* failed to recycle pid */
+               _exit(PIDFD_SKIP);
+       }
+
+       ret = wait_for_pid(pid1);
+       switch (ret) {
+       case PIDFD_FAIL:
+               ksft_exit_fail_msg(
+                       "%s test: Managed to signal recycled pid %d\n",
+                       test_name, PID_RECYCLE);
+       case PIDFD_PASS:
+               ksft_exit_fail_msg("%s test: Failed to recycle pid %d\n",
+                                  test_name, PID_RECYCLE);
+       case PIDFD_SKIP:
+               ksft_print_msg("%s test: Skipping test\n", test_name);
+               ret = 0;
+               break;
+       case PIDFD_XFAIL:
+               ksft_test_result_pass(
+                       "%s test: Failed to signal recycled pid as expected\n",
+                       test_name);
+               ret = 0;
+               break;
+       default /* PIDFD_ERROR */:
+               ksft_exit_fail_msg("%s test: Error while running tests\n",
+                                  test_name);
+       }
+
+       return ret;
+}
+
+static int test_pidfd_send_signal_syscall_support(void)
+{
+       int pidfd, ret;
+       const char *test_name = "pidfd_send_signal check for support";
+
+       pidfd = open("/proc/self", O_DIRECTORY | O_CLOEXEC);
+       if (pidfd < 0)
+               ksft_exit_fail_msg(
+                       "%s test: Failed to open process file descriptor\n",
+                       test_name);
+
+       ret = sys_pidfd_send_signal(pidfd, 0, NULL, 0);
+       if (ret < 0) {
+               /*
+                * pidfd_send_signal() will currently return ENOSYS when
+                * CONFIG_PROC_FS is not set.
+                */
+               if (errno == ENOSYS)
+                       ksft_exit_skip(
+                               "%s test: pidfd_send_signal() syscall not supported (Ensure that CONFIG_PROC_FS=y is set)\n",
+                               test_name);
+
+               ksft_exit_fail_msg("%s test: Failed to send signal\n",
+                                  test_name);
+       }
+
+       close(pidfd);
+       ksft_test_result_pass(
+               "%s test: pidfd_send_signal() syscall is supported. Tests can be executed\n",
+               test_name);
+       return 0;
+}
+
+int main(int argc, char **argv)
+{
+       ksft_print_header();
+
+       test_pidfd_send_signal_syscall_support();
+       test_pidfd_send_signal_simple_success();
+       test_pidfd_send_signal_exited_fail();
+       test_pidfd_send_signal_recycled_pid_fail();
+
+       return ksft_exit_pass();
+}
index b07ac4614e1c644827935c0d0330bc4c56623529..3417f2dbc3667c372d4ed838a9f95f3826555650 100644 (file)
@@ -25,6 +25,7 @@
 
 #include <clocksource/arm_arch_timer.h>
 #include <asm/arch_timer.h>
+#include <asm/kvm_emulate.h>
 #include <asm/kvm_hyp.h>
 
 #include <kvm/arm_vgic.h>
@@ -34,7 +35,9 @@
 
 static struct timecounter *timecounter;
 static unsigned int host_vtimer_irq;
+static unsigned int host_ptimer_irq;
 static u32 host_vtimer_irq_flags;
+static u32 host_ptimer_irq_flags;
 
 static DEFINE_STATIC_KEY_FALSE(has_gic_active_state);
 
@@ -52,12 +55,34 @@ static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx);
 static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
                                 struct arch_timer_context *timer_ctx);
 static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx);
+static void kvm_arm_timer_write(struct kvm_vcpu *vcpu,
+                               struct arch_timer_context *timer,
+                               enum kvm_arch_timer_regs treg,
+                               u64 val);
+static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu,
+                             struct arch_timer_context *timer,
+                             enum kvm_arch_timer_regs treg);
 
 u64 kvm_phys_timer_read(void)
 {
        return timecounter->cc->read(timecounter->cc);
 }
 
+static void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map)
+{
+       if (has_vhe()) {
+               map->direct_vtimer = vcpu_vtimer(vcpu);
+               map->direct_ptimer = vcpu_ptimer(vcpu);
+               map->emul_ptimer = NULL;
+       } else {
+               map->direct_vtimer = vcpu_vtimer(vcpu);
+               map->direct_ptimer = NULL;
+               map->emul_ptimer = vcpu_ptimer(vcpu);
+       }
+
+       trace_kvm_get_timer_map(vcpu->vcpu_id, map);
+}
+
 static inline bool userspace_irqchip(struct kvm *kvm)
 {
        return static_branch_unlikely(&userspace_irqchip_in_use) &&
@@ -78,20 +103,27 @@ static void soft_timer_cancel(struct hrtimer *hrt)
 static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
 {
        struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
-       struct arch_timer_context *vtimer;
+       struct arch_timer_context *ctx;
+       struct timer_map map;
 
        /*
         * We may see a timer interrupt after vcpu_put() has been called which
         * sets the CPU's vcpu pointer to NULL, because even though the timer
-        * has been disabled in vtimer_save_state(), the hardware interrupt
+        * has been disabled in timer_save_state(), the hardware interrupt
         * signal may not have been retired from the interrupt controller yet.
         */
        if (!vcpu)
                return IRQ_HANDLED;
 
-       vtimer = vcpu_vtimer(vcpu);
-       if (kvm_timer_should_fire(vtimer))
-               kvm_timer_update_irq(vcpu, true, vtimer);
+       get_timer_map(vcpu, &map);
+
+       if (irq == host_vtimer_irq)
+               ctx = map.direct_vtimer;
+       else
+               ctx = map.direct_ptimer;
+
+       if (kvm_timer_should_fire(ctx))
+               kvm_timer_update_irq(vcpu, true, ctx);
 
        if (userspace_irqchip(vcpu->kvm) &&
            !static_branch_unlikely(&has_gic_active_state))
@@ -122,7 +154,9 @@ static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx)
 
 static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx)
 {
-       return !(timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_IT_MASK) &&
+       WARN_ON(timer_ctx && timer_ctx->loaded);
+       return timer_ctx &&
+              !(timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_IT_MASK) &&
                (timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_ENABLE);
 }
 
@@ -132,21 +166,22 @@ static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx)
  */
 static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu)
 {
-       u64 min_virt = ULLONG_MAX, min_phys = ULLONG_MAX;
-       struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
-       struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
+       u64 min_delta = ULLONG_MAX;
+       int i;
 
-       if (kvm_timer_irq_can_fire(vtimer))
-               min_virt = kvm_timer_compute_delta(vtimer);
+       for (i = 0; i < NR_KVM_TIMERS; i++) {
+               struct arch_timer_context *ctx = &vcpu->arch.timer_cpu.timers[i];
 
-       if (kvm_timer_irq_can_fire(ptimer))
-               min_phys = kvm_timer_compute_delta(ptimer);
+               WARN(ctx->loaded, "timer %d loaded\n", i);
+               if (kvm_timer_irq_can_fire(ctx))
+                       min_delta = min(min_delta, kvm_timer_compute_delta(ctx));
+       }
 
        /* If none of timers can fire, then return 0 */
-       if ((min_virt == ULLONG_MAX) && (min_phys == ULLONG_MAX))
+       if (min_delta == ULLONG_MAX)
                return 0;
 
-       return min(min_virt, min_phys);
+       return min_delta;
 }
 
 static enum hrtimer_restart kvm_bg_timer_expire(struct hrtimer *hrt)
@@ -173,41 +208,58 @@ static enum hrtimer_restart kvm_bg_timer_expire(struct hrtimer *hrt)
        return HRTIMER_NORESTART;
 }
 
-static enum hrtimer_restart kvm_phys_timer_expire(struct hrtimer *hrt)
+static enum hrtimer_restart kvm_hrtimer_expire(struct hrtimer *hrt)
 {
-       struct arch_timer_context *ptimer;
-       struct arch_timer_cpu *timer;
+       struct arch_timer_context *ctx;
        struct kvm_vcpu *vcpu;
        u64 ns;
 
-       timer = container_of(hrt, struct arch_timer_cpu, phys_timer);
-       vcpu = container_of(timer, struct kvm_vcpu, arch.timer_cpu);
-       ptimer = vcpu_ptimer(vcpu);
+       ctx = container_of(hrt, struct arch_timer_context, hrtimer);
+       vcpu = ctx->vcpu;
+
+       trace_kvm_timer_hrtimer_expire(ctx);
 
        /*
         * Check that the timer has really expired from the guest's
         * PoV (NTP on the host may have forced it to expire
         * early). If not ready, schedule for a later time.
         */
-       ns = kvm_timer_compute_delta(ptimer);
+       ns = kvm_timer_compute_delta(ctx);
        if (unlikely(ns)) {
                hrtimer_forward_now(hrt, ns_to_ktime(ns));
                return HRTIMER_RESTART;
        }
 
-       kvm_timer_update_irq(vcpu, true, ptimer);
+       kvm_timer_update_irq(vcpu, true, ctx);
        return HRTIMER_NORESTART;
 }
 
 static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx)
 {
+       enum kvm_arch_timers index;
        u64 cval, now;
 
+       if (!timer_ctx)
+               return false;
+
+       index = arch_timer_ctx_index(timer_ctx);
+
        if (timer_ctx->loaded) {
-               u32 cnt_ctl;
+               u32 cnt_ctl = 0;
+
+               switch (index) {
+               case TIMER_VTIMER:
+                       cnt_ctl = read_sysreg_el0(cntv_ctl);
+                       break;
+               case TIMER_PTIMER:
+                       cnt_ctl = read_sysreg_el0(cntp_ctl);
+                       break;
+               case NR_KVM_TIMERS:
+                       /* GCC is braindead */
+                       cnt_ctl = 0;
+                       break;
+               }
 
-               /* Only the virtual timer can be loaded so far */
-               cnt_ctl = read_sysreg_el0(cntv_ctl);
                return  (cnt_ctl & ARCH_TIMER_CTRL_ENABLE) &&
                        (cnt_ctl & ARCH_TIMER_CTRL_IT_STAT) &&
                       !(cnt_ctl & ARCH_TIMER_CTRL_IT_MASK);
@@ -224,13 +276,13 @@ static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx)
 
 bool kvm_timer_is_pending(struct kvm_vcpu *vcpu)
 {
-       struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
-       struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
+       struct timer_map map;
 
-       if (kvm_timer_should_fire(vtimer))
-               return true;
+       get_timer_map(vcpu, &map);
 
-       return kvm_timer_should_fire(ptimer);
+       return kvm_timer_should_fire(map.direct_vtimer) ||
+              kvm_timer_should_fire(map.direct_ptimer) ||
+              kvm_timer_should_fire(map.emul_ptimer);
 }
 
 /*
@@ -269,77 +321,70 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
        }
 }
 
-/* Schedule the background timer for the emulated timer. */
-static void phys_timer_emulate(struct kvm_vcpu *vcpu)
+static void timer_emulate(struct arch_timer_context *ctx)
 {
-       struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
-       struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
+       bool should_fire = kvm_timer_should_fire(ctx);
+
+       trace_kvm_timer_emulate(ctx, should_fire);
+
+       if (should_fire) {
+               kvm_timer_update_irq(ctx->vcpu, true, ctx);
+               return;
+       }
 
        /*
         * If the timer can fire now, we don't need to have a soft timer
         * scheduled for the future.  If the timer cannot fire at all,
         * then we also don't need a soft timer.
         */
-       if (kvm_timer_should_fire(ptimer) || !kvm_timer_irq_can_fire(ptimer)) {
-               soft_timer_cancel(&timer->phys_timer);
+       if (!kvm_timer_irq_can_fire(ctx)) {
+               soft_timer_cancel(&ctx->hrtimer);
                return;
        }
 
-       soft_timer_start(&timer->phys_timer, kvm_timer_compute_delta(ptimer));
+       soft_timer_start(&ctx->hrtimer, kvm_timer_compute_delta(ctx));
 }
 
-/*
- * Check if there was a change in the timer state, so that we should either
- * raise or lower the line level to the GIC or schedule a background timer to
- * emulate the physical timer.
- */
-static void kvm_timer_update_state(struct kvm_vcpu *vcpu)
+static void timer_save_state(struct arch_timer_context *ctx)
 {
-       struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
-       struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
-       struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
-       bool level;
+       struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu);
+       enum kvm_arch_timers index = arch_timer_ctx_index(ctx);
+       unsigned long flags;
 
-       if (unlikely(!timer->enabled))
+       if (!timer->enabled)
                return;
 
-       /*
-        * The vtimer virtual interrupt is a 'mapped' interrupt, meaning part
-        * of its lifecycle is offloaded to the hardware, and we therefore may
-        * not have lowered the irq.level value before having to signal a new
-        * interrupt, but have to signal an interrupt every time the level is
-        * asserted.
-        */
-       level = kvm_timer_should_fire(vtimer);
-       kvm_timer_update_irq(vcpu, level, vtimer);
+       local_irq_save(flags);
 
-       phys_timer_emulate(vcpu);
+       if (!ctx->loaded)
+               goto out;
 
-       if (kvm_timer_should_fire(ptimer) != ptimer->irq.level)
-               kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer);
-}
+       switch (index) {
+       case TIMER_VTIMER:
+               ctx->cnt_ctl = read_sysreg_el0(cntv_ctl);
+               ctx->cnt_cval = read_sysreg_el0(cntv_cval);
 
-static void vtimer_save_state(struct kvm_vcpu *vcpu)
-{
-       struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
-       struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
-       unsigned long flags;
+               /* Disable the timer */
+               write_sysreg_el0(0, cntv_ctl);
+               isb();
 
-       local_irq_save(flags);
+               break;
+       case TIMER_PTIMER:
+               ctx->cnt_ctl = read_sysreg_el0(cntp_ctl);
+               ctx->cnt_cval = read_sysreg_el0(cntp_cval);
 
-       if (!vtimer->loaded)
-               goto out;
+               /* Disable the timer */
+               write_sysreg_el0(0, cntp_ctl);
+               isb();
 
-       if (timer->enabled) {
-               vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl);
-               vtimer->cnt_cval = read_sysreg_el0(cntv_cval);
+               break;
+       case NR_KVM_TIMERS:
+               BUG();
        }
 
-       /* Disable the virtual timer */
-       write_sysreg_el0(0, cntv_ctl);
-       isb();
+       trace_kvm_timer_save_state(ctx);
 
-       vtimer->loaded = false;
+       ctx->loaded = false;
 out:
        local_irq_restore(flags);
 }
@@ -349,67 +394,72 @@ out:
  * thread is removed from its waitqueue and made runnable when there's a timer
  * interrupt to handle.
  */
-void kvm_timer_schedule(struct kvm_vcpu *vcpu)
+static void kvm_timer_blocking(struct kvm_vcpu *vcpu)
 {
-       struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
-       struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
-       struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
-
-       vtimer_save_state(vcpu);
+       struct arch_timer_cpu *timer = vcpu_timer(vcpu);
+       struct timer_map map;
 
-       /*
-        * No need to schedule a background timer if any guest timer has
-        * already expired, because kvm_vcpu_block will return before putting
-        * the thread to sleep.
-        */
-       if (kvm_timer_should_fire(vtimer) || kvm_timer_should_fire(ptimer))
-               return;
+       get_timer_map(vcpu, &map);
 
        /*
-        * If both timers are not capable of raising interrupts (disabled or
+        * If no timers are capable of raising interrupts (disabled or
         * masked), then there's no more work for us to do.
         */
-       if (!kvm_timer_irq_can_fire(vtimer) && !kvm_timer_irq_can_fire(ptimer))
+       if (!kvm_timer_irq_can_fire(map.direct_vtimer) &&
+           !kvm_timer_irq_can_fire(map.direct_ptimer) &&
+           !kvm_timer_irq_can_fire(map.emul_ptimer))
                return;
 
        /*
-        * The guest timers have not yet expired, schedule a background timer.
+        * At least one guest time will expire. Schedule a background timer.
         * Set the earliest expiration time among the guest timers.
         */
        soft_timer_start(&timer->bg_timer, kvm_timer_earliest_exp(vcpu));
 }
 
-static void vtimer_restore_state(struct kvm_vcpu *vcpu)
+static void kvm_timer_unblocking(struct kvm_vcpu *vcpu)
 {
-       struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
-       struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+       struct arch_timer_cpu *timer = vcpu_timer(vcpu);
+
+       soft_timer_cancel(&timer->bg_timer);
+}
+
+static void timer_restore_state(struct arch_timer_context *ctx)
+{
+       struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu);
+       enum kvm_arch_timers index = arch_timer_ctx_index(ctx);
        unsigned long flags;
 
+       if (!timer->enabled)
+               return;
+
        local_irq_save(flags);
 
-       if (vtimer->loaded)
+       if (ctx->loaded)
                goto out;
 
-       if (timer->enabled) {
-               write_sysreg_el0(vtimer->cnt_cval, cntv_cval);
+       switch (index) {
+       case TIMER_VTIMER:
+               write_sysreg_el0(ctx->cnt_cval, cntv_cval);
                isb();
-               write_sysreg_el0(vtimer->cnt_ctl, cntv_ctl);
+               write_sysreg_el0(ctx->cnt_ctl, cntv_ctl);
+               break;
+       case TIMER_PTIMER:
+               write_sysreg_el0(ctx->cnt_cval, cntp_cval);
+               isb();
+               write_sysreg_el0(ctx->cnt_ctl, cntp_ctl);
+               break;
+       case NR_KVM_TIMERS:
+               BUG();
        }
 
-       vtimer->loaded = true;
+       trace_kvm_timer_restore_state(ctx);
+
+       ctx->loaded = true;
 out:
        local_irq_restore(flags);
 }
 
-void kvm_timer_unschedule(struct kvm_vcpu *vcpu)
-{
-       struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
-
-       vtimer_restore_state(vcpu);
-
-       soft_timer_cancel(&timer->bg_timer);
-}
-
 static void set_cntvoff(u64 cntvoff)
 {
        u32 low = lower_32_bits(cntvoff);
@@ -425,23 +475,32 @@ static void set_cntvoff(u64 cntvoff)
        kvm_call_hyp(__kvm_timer_set_cntvoff, low, high);
 }
 
-static inline void set_vtimer_irq_phys_active(struct kvm_vcpu *vcpu, bool active)
+static inline void set_timer_irq_phys_active(struct arch_timer_context *ctx, bool active)
 {
        int r;
-       r = irq_set_irqchip_state(host_vtimer_irq, IRQCHIP_STATE_ACTIVE, active);
+       r = irq_set_irqchip_state(ctx->host_timer_irq, IRQCHIP_STATE_ACTIVE, active);
        WARN_ON(r);
 }
 
-static void kvm_timer_vcpu_load_gic(struct kvm_vcpu *vcpu)
+static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx)
 {
-       struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
-       bool phys_active;
+       struct kvm_vcpu *vcpu = ctx->vcpu;
+       bool phys_active = false;
+
+       /*
+        * Update the timer output so that it is likely to match the
+        * state we're about to restore. If the timer expires between
+        * this point and the register restoration, we'll take the
+        * interrupt anyway.
+        */
+       kvm_timer_update_irq(ctx->vcpu, kvm_timer_should_fire(ctx), ctx);
 
        if (irqchip_in_kernel(vcpu->kvm))
-               phys_active = kvm_vgic_map_is_active(vcpu, vtimer->irq.irq);
-       else
-               phys_active = vtimer->irq.level;
-       set_vtimer_irq_phys_active(vcpu, phys_active);
+               phys_active = kvm_vgic_map_is_active(vcpu, ctx->irq.irq);
+
+       phys_active |= ctx->irq.level;
+
+       set_timer_irq_phys_active(ctx, phys_active);
 }
 
 static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu)
@@ -466,28 +525,32 @@ static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu)
 
 void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
 {
-       struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
-       struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
-       struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
+       struct arch_timer_cpu *timer = vcpu_timer(vcpu);
+       struct timer_map map;
 
        if (unlikely(!timer->enabled))
                return;
 
-       if (static_branch_likely(&has_gic_active_state))
-               kvm_timer_vcpu_load_gic(vcpu);
-       else
+       get_timer_map(vcpu, &map);
+
+       if (static_branch_likely(&has_gic_active_state)) {
+               kvm_timer_vcpu_load_gic(map.direct_vtimer);
+               if (map.direct_ptimer)
+                       kvm_timer_vcpu_load_gic(map.direct_ptimer);
+       } else {
                kvm_timer_vcpu_load_nogic(vcpu);
+       }
 
-       set_cntvoff(vtimer->cntvoff);
+       set_cntvoff(map.direct_vtimer->cntvoff);
 
-       vtimer_restore_state(vcpu);
+       kvm_timer_unblocking(vcpu);
 
-       /* Set the background timer for the physical timer emulation. */
-       phys_timer_emulate(vcpu);
+       timer_restore_state(map.direct_vtimer);
+       if (map.direct_ptimer)
+               timer_restore_state(map.direct_ptimer);
 
-       /* If the timer fired while we weren't running, inject it now */
-       if (kvm_timer_should_fire(ptimer) != ptimer->irq.level)
-               kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer);
+       if (map.emul_ptimer)
+               timer_emulate(map.emul_ptimer);
 }
 
 bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)
@@ -509,15 +572,20 @@ bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)
 
 void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
 {
-       struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+       struct arch_timer_cpu *timer = vcpu_timer(vcpu);
+       struct timer_map map;
 
        if (unlikely(!timer->enabled))
                return;
 
-       vtimer_save_state(vcpu);
+       get_timer_map(vcpu, &map);
+
+       timer_save_state(map.direct_vtimer);
+       if (map.direct_ptimer)
+               timer_save_state(map.direct_ptimer);
 
        /*
-        * Cancel the physical timer emulation, because the only case where we
+        * Cancel soft timer emulation, because the only case where we
         * need it after a vcpu_put is in the context of a sleeping VCPU, and
         * in that case we already factor in the deadline for the physical
         * timer when scheduling the bg_timer.
@@ -525,7 +593,11 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
         * In any case, we re-schedule the hrtimer for the physical timer when
         * coming back to the VCPU thread in kvm_timer_vcpu_load().
         */
-       soft_timer_cancel(&timer->phys_timer);
+       if (map.emul_ptimer)
+               soft_timer_cancel(&map.emul_ptimer->hrtimer);
+
+       if (swait_active(kvm_arch_vcpu_wq(vcpu)))
+               kvm_timer_blocking(vcpu);
 
        /*
         * The kernel may decide to run userspace after calling vcpu_put, so
@@ -534,8 +606,7 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
         * counter of non-VHE case. For VHE, the virtual counter uses a fixed
         * virtual offset of zero, so no need to zero CNTVOFF_EL2 register.
         */
-       if (!has_vhe())
-               set_cntvoff(0);
+       set_cntvoff(0);
 }
 
 /*
@@ -550,7 +621,7 @@ static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu)
        if (!kvm_timer_should_fire(vtimer)) {
                kvm_timer_update_irq(vcpu, false, vtimer);
                if (static_branch_likely(&has_gic_active_state))
-                       set_vtimer_irq_phys_active(vcpu, false);
+                       set_timer_irq_phys_active(vtimer, false);
                else
                        enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
        }
@@ -558,7 +629,7 @@ static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu)
 
 void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
 {
-       struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+       struct arch_timer_cpu *timer = vcpu_timer(vcpu);
 
        if (unlikely(!timer->enabled))
                return;
@@ -569,9 +640,10 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
 
 int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
 {
-       struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
-       struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
-       struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
+       struct arch_timer_cpu *timer = vcpu_timer(vcpu);
+       struct timer_map map;
+
+       get_timer_map(vcpu, &map);
 
        /*
         * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8
@@ -579,12 +651,22 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
         * resets the timer to be disabled and unmasked and is compliant with
         * the ARMv7 architecture.
         */
-       vtimer->cnt_ctl = 0;
-       ptimer->cnt_ctl = 0;
-       kvm_timer_update_state(vcpu);
+       vcpu_vtimer(vcpu)->cnt_ctl = 0;
+       vcpu_ptimer(vcpu)->cnt_ctl = 0;
 
-       if (timer->enabled && irqchip_in_kernel(vcpu->kvm))
-               kvm_vgic_reset_mapped_irq(vcpu, vtimer->irq.irq);
+       if (timer->enabled) {
+               kvm_timer_update_irq(vcpu, false, vcpu_vtimer(vcpu));
+               kvm_timer_update_irq(vcpu, false, vcpu_ptimer(vcpu));
+
+               if (irqchip_in_kernel(vcpu->kvm)) {
+                       kvm_vgic_reset_mapped_irq(vcpu, map.direct_vtimer->irq.irq);
+                       if (map.direct_ptimer)
+                               kvm_vgic_reset_mapped_irq(vcpu, map.direct_ptimer->irq.irq);
+               }
+       }
+
+       if (map.emul_ptimer)
+               soft_timer_cancel(&map.emul_ptimer->hrtimer);
 
        return 0;
 }
@@ -610,56 +692,76 @@ static void update_vtimer_cntvoff(struct kvm_vcpu *vcpu, u64 cntvoff)
 
 void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
 {
-       struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+       struct arch_timer_cpu *timer = vcpu_timer(vcpu);
        struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
        struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
 
        /* Synchronize cntvoff across all vtimers of a VM. */
        update_vtimer_cntvoff(vcpu, kvm_phys_timer_read());
-       vcpu_ptimer(vcpu)->cntvoff = 0;
+       ptimer->cntvoff = 0;
 
        hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
        timer->bg_timer.function = kvm_bg_timer_expire;
 
-       hrtimer_init(&timer->phys_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
-       timer->phys_timer.function = kvm_phys_timer_expire;
+       hrtimer_init(&vtimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+       hrtimer_init(&ptimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+       vtimer->hrtimer.function = kvm_hrtimer_expire;
+       ptimer->hrtimer.function = kvm_hrtimer_expire;
 
        vtimer->irq.irq = default_vtimer_irq.irq;
        ptimer->irq.irq = default_ptimer_irq.irq;
+
+       vtimer->host_timer_irq = host_vtimer_irq;
+       ptimer->host_timer_irq = host_ptimer_irq;
+
+       vtimer->host_timer_irq_flags = host_vtimer_irq_flags;
+       ptimer->host_timer_irq_flags = host_ptimer_irq_flags;
+
+       vtimer->vcpu = vcpu;
+       ptimer->vcpu = vcpu;
 }
 
 static void kvm_timer_init_interrupt(void *info)
 {
        enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
+       enable_percpu_irq(host_ptimer_irq, host_ptimer_irq_flags);
 }
 
 int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
 {
-       struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
-       struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
+       struct arch_timer_context *timer;
+       bool level;
 
        switch (regid) {
        case KVM_REG_ARM_TIMER_CTL:
-               vtimer->cnt_ctl = value & ~ARCH_TIMER_CTRL_IT_STAT;
+               timer = vcpu_vtimer(vcpu);
+               kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value);
                break;
        case KVM_REG_ARM_TIMER_CNT:
+               timer = vcpu_vtimer(vcpu);
                update_vtimer_cntvoff(vcpu, kvm_phys_timer_read() - value);
                break;
        case KVM_REG_ARM_TIMER_CVAL:
-               vtimer->cnt_cval = value;
+               timer = vcpu_vtimer(vcpu);
+               kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value);
                break;
        case KVM_REG_ARM_PTIMER_CTL:
-               ptimer->cnt_ctl = value & ~ARCH_TIMER_CTRL_IT_STAT;
+               timer = vcpu_ptimer(vcpu);
+               kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value);
                break;
        case KVM_REG_ARM_PTIMER_CVAL:
-               ptimer->cnt_cval = value;
+               timer = vcpu_ptimer(vcpu);
+               kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value);
                break;
 
        default:
                return -1;
        }
 
-       kvm_timer_update_state(vcpu);
+       level = kvm_timer_should_fire(timer);
+       kvm_timer_update_irq(vcpu, level, timer);
+       timer_emulate(timer);
+
        return 0;
 }
 
@@ -679,26 +781,113 @@ static u64 read_timer_ctl(struct arch_timer_context *timer)
 
 u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid)
 {
-       struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
-       struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
-
        switch (regid) {
        case KVM_REG_ARM_TIMER_CTL:
-               return read_timer_ctl(vtimer);
+               return kvm_arm_timer_read(vcpu,
+                                         vcpu_vtimer(vcpu), TIMER_REG_CTL);
        case KVM_REG_ARM_TIMER_CNT:
-               return kvm_phys_timer_read() - vtimer->cntvoff;
+               return kvm_arm_timer_read(vcpu,
+                                         vcpu_vtimer(vcpu), TIMER_REG_CNT);
        case KVM_REG_ARM_TIMER_CVAL:
-               return vtimer->cnt_cval;
+               return kvm_arm_timer_read(vcpu,
+                                         vcpu_vtimer(vcpu), TIMER_REG_CVAL);
        case KVM_REG_ARM_PTIMER_CTL:
-               return read_timer_ctl(ptimer);
-       case KVM_REG_ARM_PTIMER_CVAL:
-               return ptimer->cnt_cval;
+               return kvm_arm_timer_read(vcpu,
+                                         vcpu_ptimer(vcpu), TIMER_REG_CTL);
        case KVM_REG_ARM_PTIMER_CNT:
-               return kvm_phys_timer_read();
+               return kvm_arm_timer_read(vcpu,
+                                         vcpu_vtimer(vcpu), TIMER_REG_CNT);
+       case KVM_REG_ARM_PTIMER_CVAL:
+               return kvm_arm_timer_read(vcpu,
+                                         vcpu_ptimer(vcpu), TIMER_REG_CVAL);
        }
        return (u64)-1;
 }
 
+static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu,
+                             struct arch_timer_context *timer,
+                             enum kvm_arch_timer_regs treg)
+{
+       u64 val;
+
+       switch (treg) {
+       case TIMER_REG_TVAL:
+               val = kvm_phys_timer_read() - timer->cntvoff - timer->cnt_cval;
+               break;
+
+       case TIMER_REG_CTL:
+               val = read_timer_ctl(timer);
+               break;
+
+       case TIMER_REG_CVAL:
+               val = timer->cnt_cval;
+               break;
+
+       case TIMER_REG_CNT:
+               val = kvm_phys_timer_read() - timer->cntvoff;
+               break;
+
+       default:
+               BUG();
+       }
+
+       return val;
+}
+
+u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu,
+                             enum kvm_arch_timers tmr,
+                             enum kvm_arch_timer_regs treg)
+{
+       u64 val;
+
+       preempt_disable();
+       kvm_timer_vcpu_put(vcpu);
+
+       val = kvm_arm_timer_read(vcpu, vcpu_get_timer(vcpu, tmr), treg);
+
+       kvm_timer_vcpu_load(vcpu);
+       preempt_enable();
+
+       return val;
+}
+
+static void kvm_arm_timer_write(struct kvm_vcpu *vcpu,
+                               struct arch_timer_context *timer,
+                               enum kvm_arch_timer_regs treg,
+                               u64 val)
+{
+       switch (treg) {
+       case TIMER_REG_TVAL:
+               timer->cnt_cval = val - kvm_phys_timer_read() - timer->cntvoff;
+               break;
+
+       case TIMER_REG_CTL:
+               timer->cnt_ctl = val & ~ARCH_TIMER_CTRL_IT_STAT;
+               break;
+
+       case TIMER_REG_CVAL:
+               timer->cnt_cval = val;
+               break;
+
+       default:
+               BUG();
+       }
+}
+
+void kvm_arm_timer_write_sysreg(struct kvm_vcpu *vcpu,
+                               enum kvm_arch_timers tmr,
+                               enum kvm_arch_timer_regs treg,
+                               u64 val)
+{
+       preempt_disable();
+       kvm_timer_vcpu_put(vcpu);
+
+       kvm_arm_timer_write(vcpu, vcpu_get_timer(vcpu, tmr), treg, val);
+
+       kvm_timer_vcpu_load(vcpu);
+       preempt_enable();
+}
+
 static int kvm_timer_starting_cpu(unsigned int cpu)
 {
        kvm_timer_init_interrupt(NULL);
@@ -724,6 +913,8 @@ int kvm_timer_hyp_init(bool has_gic)
                return -ENODEV;
        }
 
+       /* First, do the virtual EL1 timer irq */
+
        if (info->virtual_irq <= 0) {
                kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n",
                        info->virtual_irq);
@@ -734,15 +925,15 @@ int kvm_timer_hyp_init(bool has_gic)
        host_vtimer_irq_flags = irq_get_trigger_type(host_vtimer_irq);
        if (host_vtimer_irq_flags != IRQF_TRIGGER_HIGH &&
            host_vtimer_irq_flags != IRQF_TRIGGER_LOW) {
-               kvm_err("Invalid trigger for IRQ%d, assuming level low\n",
+               kvm_err("Invalid trigger for vtimer IRQ%d, assuming level low\n",
                        host_vtimer_irq);
                host_vtimer_irq_flags = IRQF_TRIGGER_LOW;
        }
 
        err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler,
-                                "kvm guest timer", kvm_get_running_vcpus());
+                                "kvm guest vtimer", kvm_get_running_vcpus());
        if (err) {
-               kvm_err("kvm_arch_timer: can't request interrupt %d (%d)\n",
+               kvm_err("kvm_arch_timer: can't request vtimer interrupt %d (%d)\n",
                        host_vtimer_irq, err);
                return err;
        }
@@ -760,6 +951,43 @@ int kvm_timer_hyp_init(bool has_gic)
 
        kvm_debug("virtual timer IRQ%d\n", host_vtimer_irq);
 
+       /* Now let's do the physical EL1 timer irq */
+
+       if (info->physical_irq > 0) {
+               host_ptimer_irq = info->physical_irq;
+               host_ptimer_irq_flags = irq_get_trigger_type(host_ptimer_irq);
+               if (host_ptimer_irq_flags != IRQF_TRIGGER_HIGH &&
+                   host_ptimer_irq_flags != IRQF_TRIGGER_LOW) {
+                       kvm_err("Invalid trigger for ptimer IRQ%d, assuming level low\n",
+                               host_ptimer_irq);
+                       host_ptimer_irq_flags = IRQF_TRIGGER_LOW;
+               }
+
+               err = request_percpu_irq(host_ptimer_irq, kvm_arch_timer_handler,
+                                        "kvm guest ptimer", kvm_get_running_vcpus());
+               if (err) {
+                       kvm_err("kvm_arch_timer: can't request ptimer interrupt %d (%d)\n",
+                               host_ptimer_irq, err);
+                       return err;
+               }
+
+               if (has_gic) {
+                       err = irq_set_vcpu_affinity(host_ptimer_irq,
+                                                   kvm_get_running_vcpus());
+                       if (err) {
+                               kvm_err("kvm_arch_timer: error setting vcpu affinity\n");
+                               goto out_free_irq;
+                       }
+               }
+
+               kvm_debug("physical timer IRQ%d\n", host_ptimer_irq);
+       } else if (has_vhe()) {
+               kvm_err("kvm_arch_timer: invalid physical timer IRQ: %d\n",
+                       info->physical_irq);
+               err = -ENODEV;
+               goto out_free_irq;
+       }
+
        cpuhp_setup_state(CPUHP_AP_KVM_ARM_TIMER_STARTING,
                          "kvm/arm/timer:starting", kvm_timer_starting_cpu,
                          kvm_timer_dying_cpu);
@@ -771,7 +999,7 @@ out_free_irq:
 
 void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
 {
-       struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+       struct arch_timer_cpu *timer = vcpu_timer(vcpu);
 
        soft_timer_cancel(&timer->bg_timer);
 }
@@ -807,16 +1035,18 @@ bool kvm_arch_timer_get_input_level(int vintid)
 
        if (vintid == vcpu_vtimer(vcpu)->irq.irq)
                timer = vcpu_vtimer(vcpu);
+       else if (vintid == vcpu_ptimer(vcpu)->irq.irq)
+               timer = vcpu_ptimer(vcpu);
        else
-               BUG(); /* We only map the vtimer so far */
+               BUG();
 
        return kvm_timer_should_fire(timer);
 }
 
 int kvm_timer_enable(struct kvm_vcpu *vcpu)
 {
-       struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
-       struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+       struct arch_timer_cpu *timer = vcpu_timer(vcpu);
+       struct timer_map map;
        int ret;
 
        if (timer->enabled)
@@ -834,19 +1064,33 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
                return -EINVAL;
        }
 
-       ret = kvm_vgic_map_phys_irq(vcpu, host_vtimer_irq, vtimer->irq.irq,
+       get_timer_map(vcpu, &map);
+
+       ret = kvm_vgic_map_phys_irq(vcpu,
+                                   map.direct_vtimer->host_timer_irq,
+                                   map.direct_vtimer->irq.irq,
                                    kvm_arch_timer_get_input_level);
        if (ret)
                return ret;
 
+       if (map.direct_ptimer) {
+               ret = kvm_vgic_map_phys_irq(vcpu,
+                                           map.direct_ptimer->host_timer_irq,
+                                           map.direct_ptimer->irq.irq,
+                                           kvm_arch_timer_get_input_level);
+       }
+
+       if (ret)
+               return ret;
+
 no_vgic:
        timer->enabled = 1;
        return 0;
 }
 
 /*
- * On VHE system, we only need to configure trap on physical timer and counter
- * accesses in EL0 and EL1 once, not for every world switch.
+ * On VHE system, we only need to configure the EL2 timer trap register once,
+ * not for every world switch.
  * The host kernel runs at EL2 with HCR_EL2.TGE == 1,
  * and this makes those bits have no effect for the host kernel execution.
  */
@@ -857,11 +1101,11 @@ void kvm_timer_init_vhe(void)
        u64 val;
 
        /*
-        * Disallow physical timer access for the guest.
-        * Physical counter access is allowed.
+        * VHE systems allow the guest direct access to the EL1 physical
+        * timer/counter.
         */
        val = read_sysreg(cnthctl_el2);
-       val &= ~(CNTHCTL_EL1PCEN << cnthctl_shift);
+       val |= (CNTHCTL_EL1PCEN << cnthctl_shift);
        val |= (CNTHCTL_EL1PCTEN << cnthctl_shift);
        write_sysreg(val, cnthctl_el2);
 }
index 9c486fad3f9f8289e2d8989928a34459bb2dd105..99c37384ba7bd3766db4fb863d6baea908210c75 100644 (file)
@@ -65,7 +65,6 @@ static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_arm_running_vcpu);
 /* The VMID used in the VTTBR */
 static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
 static u32 kvm_next_vmid;
-static unsigned int kvm_vmid_bits __read_mostly;
 static DEFINE_SPINLOCK(kvm_vmid_lock);
 
 static bool vgic_present;
@@ -142,7 +141,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
        kvm_vgic_early_init(kvm);
 
        /* Mark the initial VMID generation invalid */
-       kvm->arch.vmid_gen = 0;
+       kvm->arch.vmid.vmid_gen = 0;
 
        /* The maximum number of VCPUs is limited by the host's GIC model */
        kvm->arch.max_vcpus = vgic_present ?
@@ -336,13 +335,11 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 
 void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
 {
-       kvm_timer_schedule(vcpu);
        kvm_vgic_v4_enable_doorbell(vcpu);
 }
 
 void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
 {
-       kvm_timer_unschedule(vcpu);
        kvm_vgic_v4_disable_doorbell(vcpu);
 }
 
@@ -472,37 +469,31 @@ void force_vm_exit(const cpumask_t *mask)
 
 /**
  * need_new_vmid_gen - check that the VMID is still valid
- * @kvm: The VM's VMID to check
+ * @vmid: The VMID to check
  *
  * return true if there is a new generation of VMIDs being used
  *
- * The hardware supports only 256 values with the value zero reserved for the
- * host, so we check if an assigned value belongs to a previous generation,
- * which which requires us to assign a new value. If we're the first to use a
- * VMID for the new generation, we must flush necessary caches and TLBs on all
- * CPUs.
+ * The hardware supports a limited set of values with the value zero reserved
+ * for the host, so we check if an assigned value belongs to a previous
+ * generation, which which requires us to assign a new value. If we're the
+ * first to use a VMID for the new generation, we must flush necessary caches
+ * and TLBs on all CPUs.
  */
-static bool need_new_vmid_gen(struct kvm *kvm)
+static bool need_new_vmid_gen(struct kvm_vmid *vmid)
 {
        u64 current_vmid_gen = atomic64_read(&kvm_vmid_gen);
        smp_rmb(); /* Orders read of kvm_vmid_gen and kvm->arch.vmid */
-       return unlikely(READ_ONCE(kvm->arch.vmid_gen) != current_vmid_gen);
+       return unlikely(READ_ONCE(vmid->vmid_gen) != current_vmid_gen);
 }
 
 /**
- * update_vttbr - Update the VTTBR with a valid VMID before the guest runs
- * @kvm        The guest that we are about to run
- *
- * Called from kvm_arch_vcpu_ioctl_run before entering the guest to ensure the
- * VM has a valid VMID, otherwise assigns a new one and flushes corresponding
- * caches and TLBs.
+ * update_vmid - Update the vmid with a valid VMID for the current generation
+ * @kvm: The guest that struct vmid belongs to
+ * @vmid: The stage-2 VMID information struct
  */
-static void update_vttbr(struct kvm *kvm)
+static void update_vmid(struct kvm_vmid *vmid)
 {
-       phys_addr_t pgd_phys;
-       u64 vmid, cnp = kvm_cpu_has_cnp() ? VTTBR_CNP_BIT : 0;
-
-       if (!need_new_vmid_gen(kvm))
+       if (!need_new_vmid_gen(vmid))
                return;
 
        spin_lock(&kvm_vmid_lock);
@@ -512,7 +503,7 @@ static void update_vttbr(struct kvm *kvm)
         * already allocated a valid vmid for this vm, then this vcpu should
         * use the same vmid.
         */
-       if (!need_new_vmid_gen(kvm)) {
+       if (!need_new_vmid_gen(vmid)) {
                spin_unlock(&kvm_vmid_lock);
                return;
        }
@@ -536,18 +527,12 @@ static void update_vttbr(struct kvm *kvm)
                kvm_call_hyp(__kvm_flush_vm_context);
        }
 
-       kvm->arch.vmid = kvm_next_vmid;
+       vmid->vmid = kvm_next_vmid;
        kvm_next_vmid++;
-       kvm_next_vmid &= (1 << kvm_vmid_bits) - 1;
-
-       /* update vttbr to be used with the new vmid */
-       pgd_phys = virt_to_phys(kvm->arch.pgd);
-       BUG_ON(pgd_phys & ~kvm_vttbr_baddr_mask(kvm));
-       vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits);
-       kvm->arch.vttbr = kvm_phys_to_vttbr(pgd_phys) | vmid | cnp;
+       kvm_next_vmid &= (1 << kvm_get_vmid_bits()) - 1;
 
        smp_wmb();
-       WRITE_ONCE(kvm->arch.vmid_gen, atomic64_read(&kvm_vmid_gen));
+       WRITE_ONCE(vmid->vmid_gen, atomic64_read(&kvm_vmid_gen));
 
        spin_unlock(&kvm_vmid_lock);
 }
@@ -700,7 +685,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
                 */
                cond_resched();
 
-               update_vttbr(vcpu->kvm);
+               update_vmid(&vcpu->kvm->arch.vmid);
 
                check_vcpu_requests(vcpu);
 
@@ -749,7 +734,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
                 */
                smp_store_mb(vcpu->mode, IN_GUEST_MODE);
 
-               if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) ||
+               if (ret <= 0 || need_new_vmid_gen(&vcpu->kvm->arch.vmid) ||
                    kvm_request_pending(vcpu)) {
                        vcpu->mode = OUTSIDE_GUEST_MODE;
                        isb(); /* Ensure work in x_flush_hwstate is committed */
@@ -775,7 +760,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
                        ret = kvm_vcpu_run_vhe(vcpu);
                        kvm_arm_vhe_guest_exit();
                } else {
-                       ret = kvm_call_hyp(__kvm_vcpu_run_nvhe, vcpu);
+                       ret = kvm_call_hyp_ret(__kvm_vcpu_run_nvhe, vcpu);
                }
 
                vcpu->mode = OUTSIDE_GUEST_MODE;
@@ -1427,10 +1412,6 @@ static inline void hyp_cpu_pm_exit(void)
 
 static int init_common_resources(void)
 {
-       /* set size of VMID supported by CPU */
-       kvm_vmid_bits = kvm_get_vmid_bits();
-       kvm_info("%d-bit VMID\n", kvm_vmid_bits);
-
        kvm_set_ipa_limit();
 
        return 0;
@@ -1571,6 +1552,7 @@ static int init_hyp_mode(void)
                kvm_cpu_context_t *cpu_ctxt;
 
                cpu_ctxt = per_cpu_ptr(&kvm_host_cpu_state, cpu);
+               kvm_init_host_cpu_context(cpu_ctxt, cpu);
                err = create_hyp_mappings(cpu_ctxt, cpu_ctxt + 1, PAGE_HYP);
 
                if (err) {
@@ -1581,7 +1563,7 @@ static int init_hyp_mode(void)
 
        err = hyp_map_aux_data();
        if (err)
-               kvm_err("Cannot map host auxilary data: %d\n", err);
+               kvm_err("Cannot map host auxiliary data: %d\n", err);
 
        return 0;
 
index 9652c453480f55eda8b76515aa5501d11674fd1d..264d92da32403810ea316912f9cba05eea040215 100644 (file)
@@ -226,7 +226,7 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
                int i;
                u32 elrsr;
 
-               elrsr = read_gicreg(ICH_ELSR_EL2);
+               elrsr = read_gicreg(ICH_ELRSR_EL2);
 
                write_gicreg(cpu_if->vgic_hcr & ~ICH_HCR_EN, ICH_HCR_EL2);
 
index e9d28a7ca6734462d68aa924fec04ab86d1bdccb..ffd7acdceac7397d126bfb70f010981c1489ae6c 100644 (file)
@@ -908,6 +908,7 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
  */
 int kvm_alloc_stage2_pgd(struct kvm *kvm)
 {
+       phys_addr_t pgd_phys;
        pgd_t *pgd;
 
        if (kvm->arch.pgd != NULL) {
@@ -920,7 +921,12 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
        if (!pgd)
                return -ENOMEM;
 
+       pgd_phys = virt_to_phys(pgd);
+       if (WARN_ON(pgd_phys & ~kvm_vttbr_baddr_mask(kvm)))
+               return -EINVAL;
+
        kvm->arch.pgd = pgd;
+       kvm->arch.pgd_phys = pgd_phys;
        return 0;
 }
 
@@ -1008,6 +1014,7 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
                unmap_stage2_range(kvm, 0, kvm_phys_size(kvm));
                pgd = READ_ONCE(kvm->arch.pgd);
                kvm->arch.pgd = NULL;
+               kvm->arch.pgd_phys = 0;
        }
        spin_unlock(&kvm->mmu_lock);
 
@@ -1396,14 +1403,6 @@ static bool transparent_hugepage_adjust(kvm_pfn_t *pfnp, phys_addr_t *ipap)
        return false;
 }
 
-static bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
-{
-       if (kvm_vcpu_trap_is_iabt(vcpu))
-               return false;
-
-       return kvm_vcpu_dabt_iswrite(vcpu);
-}
-
 /**
  * stage2_wp_ptes - write protect PMD range
  * @pmd:       pointer to pmd entry
@@ -1598,14 +1597,13 @@ static void kvm_send_hwpoison_signal(unsigned long address,
 static bool fault_supports_stage2_pmd_mappings(struct kvm_memory_slot *memslot,
                                               unsigned long hva)
 {
-       gpa_t gpa_start, gpa_end;
+       gpa_t gpa_start;
        hva_t uaddr_start, uaddr_end;
        size_t size;
 
        size = memslot->npages * PAGE_SIZE;
 
        gpa_start = memslot->base_gfn << PAGE_SHIFT;
-       gpa_end = gpa_start + size;
 
        uaddr_start = memslot->userspace_addr;
        uaddr_end = uaddr_start + size;
@@ -2353,7 +2351,7 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
        return 0;
 }
 
-void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots)
+void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen)
 {
 }
 
index 3828beab93f26bdbdba3adcd50860690c543a963..204d210d01c29a3282e16ec7c6ed21d25dc315c3 100644 (file)
@@ -2,6 +2,7 @@
 #if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
 #define _TRACE_KVM_H
 
+#include <kvm/arm_arch_timer.h>
 #include <linux/tracepoint.h>
 
 #undef TRACE_SYSTEM
@@ -262,10 +263,114 @@ TRACE_EVENT(kvm_timer_update_irq,
                  __entry->vcpu_id, __entry->irq, __entry->level)
 );
 
+TRACE_EVENT(kvm_get_timer_map,
+       TP_PROTO(unsigned long vcpu_id, struct timer_map *map),
+       TP_ARGS(vcpu_id, map),
+
+       TP_STRUCT__entry(
+               __field(        unsigned long,          vcpu_id )
+               __field(        int,                    direct_vtimer   )
+               __field(        int,                    direct_ptimer   )
+               __field(        int,                    emul_ptimer     )
+       ),
+
+       TP_fast_assign(
+               __entry->vcpu_id                = vcpu_id;
+               __entry->direct_vtimer          = arch_timer_ctx_index(map->direct_vtimer);
+               __entry->direct_ptimer =
+                       (map->direct_ptimer) ? arch_timer_ctx_index(map->direct_ptimer) : -1;
+               __entry->emul_ptimer =
+                       (map->emul_ptimer) ? arch_timer_ctx_index(map->emul_ptimer) : -1;
+       ),
+
+       TP_printk("VCPU: %ld, dv: %d, dp: %d, ep: %d",
+                 __entry->vcpu_id,
+                 __entry->direct_vtimer,
+                 __entry->direct_ptimer,
+                 __entry->emul_ptimer)
+);
+
+TRACE_EVENT(kvm_timer_save_state,
+       TP_PROTO(struct arch_timer_context *ctx),
+       TP_ARGS(ctx),
+
+       TP_STRUCT__entry(
+               __field(        unsigned long,          ctl             )
+               __field(        unsigned long long,     cval            )
+               __field(        int,                    timer_idx       )
+       ),
+
+       TP_fast_assign(
+               __entry->ctl                    = ctx->cnt_ctl;
+               __entry->cval                   = ctx->cnt_cval;
+               __entry->timer_idx              = arch_timer_ctx_index(ctx);
+       ),
+
+       TP_printk("   CTL: %#08lx CVAL: %#16llx arch_timer_ctx_index: %d",
+                 __entry->ctl,
+                 __entry->cval,
+                 __entry->timer_idx)
+);
+
+TRACE_EVENT(kvm_timer_restore_state,
+       TP_PROTO(struct arch_timer_context *ctx),
+       TP_ARGS(ctx),
+
+       TP_STRUCT__entry(
+               __field(        unsigned long,          ctl             )
+               __field(        unsigned long long,     cval            )
+               __field(        int,                    timer_idx       )
+       ),
+
+       TP_fast_assign(
+               __entry->ctl                    = ctx->cnt_ctl;
+               __entry->cval                   = ctx->cnt_cval;
+               __entry->timer_idx              = arch_timer_ctx_index(ctx);
+       ),
+
+       TP_printk("CTL: %#08lx CVAL: %#16llx arch_timer_ctx_index: %d",
+                 __entry->ctl,
+                 __entry->cval,
+                 __entry->timer_idx)
+);
+
+TRACE_EVENT(kvm_timer_hrtimer_expire,
+       TP_PROTO(struct arch_timer_context *ctx),
+       TP_ARGS(ctx),
+
+       TP_STRUCT__entry(
+               __field(        int,                    timer_idx       )
+       ),
+
+       TP_fast_assign(
+               __entry->timer_idx              = arch_timer_ctx_index(ctx);
+       ),
+
+       TP_printk("arch_timer_ctx_index: %d", __entry->timer_idx)
+);
+
+TRACE_EVENT(kvm_timer_emulate,
+       TP_PROTO(struct arch_timer_context *ctx, bool should_fire),
+       TP_ARGS(ctx, should_fire),
+
+       TP_STRUCT__entry(
+               __field(        int,                    timer_idx       )
+               __field(        bool,                   should_fire     )
+       ),
+
+       TP_fast_assign(
+               __entry->timer_idx              = arch_timer_ctx_index(ctx);
+               __entry->should_fire            = should_fire;
+       ),
+
+       TP_printk("arch_timer_ctx_index: %d (should_fire: %d)",
+                 __entry->timer_idx, __entry->should_fire)
+);
+
 #endif /* _TRACE_KVM_H */
 
 #undef TRACE_INCLUDE_PATH
-#define TRACE_INCLUDE_PATH ../../../virt/kvm/arm
+#define TRACE_INCLUDE_PATH ../../virt/kvm/arm
 #undef TRACE_INCLUDE_FILE
 #define TRACE_INCLUDE_FILE trace
 
index 4ee0aeb9a9058165bce669c92217cbe73fce1fe6..408a78eb6a97b13d48ff576d81a113e0c3da49b9 100644 (file)
@@ -589,7 +589,7 @@ early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable);
  */
 int vgic_v3_probe(const struct gic_kvm_info *info)
 {
-       u32 ich_vtr_el2 = kvm_call_hyp(__vgic_v3_get_ich_vtr_el2);
+       u32 ich_vtr_el2 = kvm_call_hyp_ret(__vgic_v3_get_ich_vtr_el2);
        int ret;
 
        /*
@@ -679,7 +679,7 @@ void vgic_v3_put(struct kvm_vcpu *vcpu)
        struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
 
        if (likely(cpu_if->vgic_sre))
-               cpu_if->vgic_vmcr = kvm_call_hyp(__vgic_v3_read_vmcr);
+               cpu_if->vgic_vmcr = kvm_call_hyp_ret(__vgic_v3_read_vmcr);
 
        kvm_call_hyp(__vgic_v3_save_aprs, vcpu);
 
index 6855cce3e528793fd47b90f5f9811ea5617c693a..5294abb3f1788cbe07fa4835e18cdcf95e2ecd49 100644 (file)
@@ -144,7 +144,8 @@ int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
        if (zone->pio != 1 && zone->pio != 0)
                return -EINVAL;
 
-       dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL);
+       dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev),
+                     GFP_KERNEL_ACCOUNT);
        if (!dev)
                return -ENOMEM;
 
index b20b751286fc612214c59c95e787c9fb0fac50b7..4325250afd728447630a2decb1333965d5fd2cf8 100644 (file)
@@ -297,7 +297,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
        if (!kvm_arch_intc_initialized(kvm))
                return -EAGAIN;
 
-       irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL);
+       irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL_ACCOUNT);
        if (!irqfd)
                return -ENOMEM;
 
@@ -345,7 +345,8 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
                }
 
                if (!irqfd->resampler) {
-                       resampler = kzalloc(sizeof(*resampler), GFP_KERNEL);
+                       resampler = kzalloc(sizeof(*resampler),
+                                           GFP_KERNEL_ACCOUNT);
                        if (!resampler) {
                                ret = -ENOMEM;
                                mutex_unlock(&kvm->irqfds.resampler_lock);
@@ -797,7 +798,7 @@ static int kvm_assign_ioeventfd_idx(struct kvm *kvm,
        if (IS_ERR(eventfd))
                return PTR_ERR(eventfd);
 
-       p = kzalloc(sizeof(*p), GFP_KERNEL);
+       p = kzalloc(sizeof(*p), GFP_KERNEL_ACCOUNT);
        if (!p) {
                ret = -ENOMEM;
                goto fail;
index b1286c4e0712259fac5d66b9bbc3aaf388d3d3f9..3547b0d8c91ea2c84e0869b769e9947829fe4286 100644 (file)
@@ -196,7 +196,7 @@ int kvm_set_irq_routing(struct kvm *kvm,
        nr_rt_entries += 1;
 
        new = kzalloc(sizeof(*new) + (nr_rt_entries * sizeof(struct hlist_head)),
-                     GFP_KERNEL);
+                     GFP_KERNEL_ACCOUNT);
 
        if (!new)
                return -ENOMEM;
@@ -208,7 +208,7 @@ int kvm_set_irq_routing(struct kvm *kvm,
 
        for (i = 0; i < nr; ++i) {
                r = -ENOMEM;
-               e = kzalloc(sizeof(*e), GFP_KERNEL);
+               e = kzalloc(sizeof(*e), GFP_KERNEL_ACCOUNT);
                if (!e)
                        goto out;
 
index d237d3350a99c8055999544500f5a0e8fcd3ed93..f25aa98a94df430b6064c31e89ff1d614d8846b8 100644 (file)
@@ -81,6 +81,11 @@ unsigned int halt_poll_ns_grow = 2;
 module_param(halt_poll_ns_grow, uint, 0644);
 EXPORT_SYMBOL_GPL(halt_poll_ns_grow);
 
+/* The start value to grow halt_poll_ns from */
+unsigned int halt_poll_ns_grow_start = 10000; /* 10us */
+module_param(halt_poll_ns_grow_start, uint, 0644);
+EXPORT_SYMBOL_GPL(halt_poll_ns_grow_start);
+
 /* Default resets per-vcpu halt_poll_ns . */
 unsigned int halt_poll_ns_shrink;
 module_param(halt_poll_ns_shrink, uint, 0644);
@@ -525,7 +530,7 @@ static struct kvm_memslots *kvm_alloc_memslots(void)
        int i;
        struct kvm_memslots *slots;
 
-       slots = kvzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
+       slots = kvzalloc(sizeof(struct kvm_memslots), GFP_KERNEL_ACCOUNT);
        if (!slots)
                return NULL;
 
@@ -601,12 +606,12 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
 
        kvm->debugfs_stat_data = kcalloc(kvm_debugfs_num_entries,
                                         sizeof(*kvm->debugfs_stat_data),
-                                        GFP_KERNEL);
+                                        GFP_KERNEL_ACCOUNT);
        if (!kvm->debugfs_stat_data)
                return -ENOMEM;
 
        for (p = debugfs_entries; p->name; p++) {
-               stat_data = kzalloc(sizeof(*stat_data), GFP_KERNEL);
+               stat_data = kzalloc(sizeof(*stat_data), GFP_KERNEL_ACCOUNT);
                if (!stat_data)
                        return -ENOMEM;
 
@@ -656,12 +661,8 @@ static struct kvm *kvm_create_vm(unsigned long type)
                struct kvm_memslots *slots = kvm_alloc_memslots();
                if (!slots)
                        goto out_err_no_srcu;
-               /*
-                * Generations must be different for each address space.
-                * Init kvm generation close to the maximum to easily test the
-                * code of handling generation number wrap-around.
-                */
-               slots->generation = i * 2 - 150;
+               /* Generations must be different for each address space. */
+               slots->generation = i;
                rcu_assign_pointer(kvm->memslots[i], slots);
        }
 
@@ -671,7 +672,7 @@ static struct kvm *kvm_create_vm(unsigned long type)
                goto out_err_no_irq_srcu;
        for (i = 0; i < KVM_NR_BUSES; i++) {
                rcu_assign_pointer(kvm->buses[i],
-                       kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL));
+                       kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL_ACCOUNT));
                if (!kvm->buses[i])
                        goto out_err;
        }
@@ -789,7 +790,7 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
 {
        unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot);
 
-       memslot->dirty_bitmap = kvzalloc(dirty_bytes, GFP_KERNEL);
+       memslot->dirty_bitmap = kvzalloc(dirty_bytes, GFP_KERNEL_ACCOUNT);
        if (!memslot->dirty_bitmap)
                return -ENOMEM;
 
@@ -874,31 +875,34 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
                int as_id, struct kvm_memslots *slots)
 {
        struct kvm_memslots *old_memslots = __kvm_memslots(kvm, as_id);
+       u64 gen = old_memslots->generation;
 
-       /*
-        * Set the low bit in the generation, which disables SPTE caching
-        * until the end of synchronize_srcu_expedited.
-        */
-       WARN_ON(old_memslots->generation & 1);
-       slots->generation = old_memslots->generation + 1;
+       WARN_ON(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS);
+       slots->generation = gen | KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS;
 
        rcu_assign_pointer(kvm->memslots[as_id], slots);
        synchronize_srcu_expedited(&kvm->srcu);
 
        /*
-        * Increment the new memslot generation a second time. This prevents
-        * vm exits that race with memslot updates from caching a memslot
-        * generation that will (potentially) be valid forever.
-        *
+        * Increment the new memslot generation a second time, dropping the
+        * update in-progress flag and incrementing then generation based on
+        * the number of address spaces.  This provides a unique and easily
+        * identifiable generation number while the memslots are in flux.
+        */
+       gen = slots->generation & ~KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS;
+
+       /*
         * Generations must be unique even across address spaces.  We do not need
         * a global counter for that, instead the generation space is evenly split
         * across address spaces.  For example, with two address spaces, address
-        * space 0 will use generations 0, 4, 8, ... while * address space 1 will
-        * use generations 2, 6, 10, 14, ...
+        * space 0 will use generations 0, 2, 4, ... while address space 1 will
+        * use generations 1, 3, 5, ...
         */
-       slots->generation += KVM_ADDRESS_SPACE_NUM * 2 - 1;
+       gen += KVM_ADDRESS_SPACE_NUM;
+
+       kvm_arch_memslots_updated(kvm, gen);
 
-       kvm_arch_memslots_updated(kvm, slots);
+       slots->generation = gen;
 
        return old_memslots;
 }
@@ -1018,7 +1022,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
                        goto out_free;
        }
 
-       slots = kvzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
+       slots = kvzalloc(sizeof(struct kvm_memslots), GFP_KERNEL_ACCOUNT);
        if (!slots)
                goto out_free;
        memcpy(slots, __kvm_memslots(kvm, as_id), sizeof(struct kvm_memslots));
@@ -1201,11 +1205,9 @@ int kvm_get_dirty_log_protect(struct kvm *kvm,
                        mask = xchg(&dirty_bitmap[i], 0);
                        dirty_bitmap_buffer[i] = mask;
 
-                       if (mask) {
-                               offset = i * BITS_PER_LONG;
-                               kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot,
-                                                                       offset, mask);
-                       }
+                       offset = i * BITS_PER_LONG;
+                       kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot,
+                                                               offset, mask);
                }
                spin_unlock(&kvm->mmu_lock);
        }
@@ -2185,20 +2187,23 @@ void kvm_sigset_deactivate(struct kvm_vcpu *vcpu)
 
 static void grow_halt_poll_ns(struct kvm_vcpu *vcpu)
 {
-       unsigned int old, val, grow;
+       unsigned int old, val, grow, grow_start;
 
        old = val = vcpu->halt_poll_ns;
+       grow_start = READ_ONCE(halt_poll_ns_grow_start);
        grow = READ_ONCE(halt_poll_ns_grow);
-       /* 10us base */
-       if (val == 0 && grow)
-               val = 10000;
-       else
-               val *= grow;
+       if (!grow)
+               goto out;
+
+       val *= grow;
+       if (val < grow_start)
+               val = grow_start;
 
        if (val > halt_poll_ns)
                val = halt_poll_ns;
 
        vcpu->halt_poll_ns = val;
+out:
        trace_kvm_halt_poll_ns_grow(vcpu->vcpu_id, val, old);
 }
 
@@ -2683,7 +2688,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
                struct kvm_regs *kvm_regs;
 
                r = -ENOMEM;
-               kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL);
+               kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL_ACCOUNT);
                if (!kvm_regs)
                        goto out;
                r = kvm_arch_vcpu_ioctl_get_regs(vcpu, kvm_regs);
@@ -2711,7 +2716,8 @@ out_free1:
                break;
        }
        case KVM_GET_SREGS: {
-               kvm_sregs = kzalloc(sizeof(struct kvm_sregs), GFP_KERNEL);
+               kvm_sregs = kzalloc(sizeof(struct kvm_sregs),
+                                   GFP_KERNEL_ACCOUNT);
                r = -ENOMEM;
                if (!kvm_sregs)
                        goto out;
@@ -2803,7 +2809,7 @@ out_free1:
                break;
        }
        case KVM_GET_FPU: {
-               fpu = kzalloc(sizeof(struct kvm_fpu), GFP_KERNEL);
+               fpu = kzalloc(sizeof(struct kvm_fpu), GFP_KERNEL_ACCOUNT);
                r = -ENOMEM;
                if (!fpu)
                        goto out;
@@ -2980,7 +2986,7 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
        if (test)
                return 0;
 
-       dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+       dev = kzalloc(sizeof(*dev), GFP_KERNEL_ACCOUNT);
        if (!dev)
                return -ENOMEM;
 
@@ -3625,6 +3631,7 @@ int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
        r = __kvm_io_bus_write(vcpu, bus, &range, val);
        return r < 0 ? r : 0;
 }
+EXPORT_SYMBOL_GPL(kvm_io_bus_write);
 
 /* kvm_io_bus_write_cookie - called under kvm->slots_lock */
 int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx,
@@ -3675,7 +3682,6 @@ static int __kvm_io_bus_read(struct kvm_vcpu *vcpu, struct kvm_io_bus *bus,
 
        return -EOPNOTSUPP;
 }
-EXPORT_SYMBOL_GPL(kvm_io_bus_write);
 
 /* kvm_io_bus_read - called under kvm->slots_lock */
 int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
@@ -3697,7 +3703,6 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
        return r < 0 ? r : 0;
 }
 
-
 /* Caller must hold slots_lock. */
 int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
                            int len, struct kvm_io_device *dev)
@@ -3714,8 +3719,8 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
        if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1)
                return -ENOSPC;
 
-       new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count + 1) *
-                         sizeof(struct kvm_io_range)), GFP_KERNEL);
+       new_bus = kmalloc(struct_size(bus, range, bus->dev_count + 1),
+                         GFP_KERNEL_ACCOUNT);
        if (!new_bus)
                return -ENOMEM;
 
@@ -3760,8 +3765,8 @@ void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
        if (i == bus->dev_count)
                return;
 
-       new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count - 1) *
-                         sizeof(struct kvm_io_range)), GFP_KERNEL);
+       new_bus = kmalloc(struct_size(bus, range, bus->dev_count - 1),
+                         GFP_KERNEL_ACCOUNT);
        if (!new_bus)  {
                pr_err("kvm: failed to shrink bus, removing it completely\n");
                goto broken;
@@ -4029,7 +4034,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm)
        active = kvm_active_vms;
        spin_unlock(&kvm_lock);
 
-       env = kzalloc(sizeof(*env), GFP_KERNEL);
+       env = kzalloc(sizeof(*env), GFP_KERNEL_ACCOUNT);
        if (!env)
                return;
 
@@ -4045,7 +4050,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm)
        add_uevent_var(env, "PID=%d", kvm->userspace_pid);
 
        if (!IS_ERR_OR_NULL(kvm->debugfs_dentry)) {
-               char *tmp, *p = kmalloc(PATH_MAX, GFP_KERNEL);
+               char *tmp, *p = kmalloc(PATH_MAX, GFP_KERNEL_ACCOUNT);
 
                if (p) {
                        tmp = dentry_path_raw(kvm->debugfs_dentry, p, PATH_MAX);
index d99850c462a1879e0a530c387ad4f756d0e64227..524cbd20379fb51dad80eef5e1489e4b3361080f 100644 (file)
@@ -219,7 +219,7 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
                        }
                }
 
-               kvg = kzalloc(sizeof(*kvg), GFP_KERNEL);
+               kvg = kzalloc(sizeof(*kvg), GFP_KERNEL_ACCOUNT);
                if (!kvg) {
                        mutex_unlock(&kv->lock);
                        kvm_vfio_group_put_external_user(vfio_group);
@@ -405,7 +405,7 @@ static int kvm_vfio_create(struct kvm_device *dev, u32 type)
                if (tmp->ops == &kvm_vfio_ops)
                        return -EBUSY;
 
-       kv = kzalloc(sizeof(*kv), GFP_KERNEL);
+       kv = kzalloc(sizeof(*kv), GFP_KERNEL_ACCOUNT);
        if (!kv)
                return -ENOMEM;