Merge branch 'akpm' (patches from Andrew)
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 2 Jul 2015 00:47:51 +0000 (17:47 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 2 Jul 2015 00:47:51 +0000 (17:47 -0700)
Merge third patchbomb from Andrew Morton:

 - the rest of MM

 - scripts/gdb updates

 - ipc/ updates

 - lib/ updates

 - MAINTAINERS updates

 - various other misc things

* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (67 commits)
  genalloc: rename of_get_named_gen_pool() to of_gen_pool_get()
  genalloc: rename dev_get_gen_pool() to gen_pool_get()
  x86: opt into HAVE_COPY_THREAD_TLS, for both 32-bit and 64-bit
  MAINTAINERS: add zpool
  MAINTAINERS: BCACHE: Kent Overstreet has changed email address
  MAINTAINERS: move Jens Osterkamp to CREDITS
  MAINTAINERS: remove unused nbd.h pattern
  MAINTAINERS: update brcm gpio filename pattern
  MAINTAINERS: update brcm dts pattern
  MAINTAINERS: update sound soc intel patterns
  MAINTAINERS: remove website for paride
  MAINTAINERS: update Emulex ocrdma email addresses
  bcache: use kvfree() in various places
  libcxgbi: use kvfree() in cxgbi_free_big_mem()
  target: use kvfree() in session alloc and free
  IB/ehca: use kvfree() in ipz_queue_{cd}tor()
  drm/nouveau/gem: use kvfree() in u_free()
  drm: use kvfree() in drm_free_large()
  cxgb4: use kvfree() in t4_free_mem()
  cxgb3: use kvfree() in cxgb_free_mem()
  ...

462 files changed:
Documentation/devicetree/bindings/arc/archs-idu-intc.txt [new file with mode: 0644]
Documentation/devicetree/bindings/arc/archs-intc.txt [new file with mode: 0644]
Documentation/devicetree/bindings/arc/axs101.txt [new file with mode: 0644]
Documentation/devicetree/bindings/arc/axs103.txt [new file with mode: 0644]
Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt
Documentation/filesystems/xfs.txt
Kbuild
MAINTAINERS
arch/arc/Kconfig
arch/arc/Makefile
arch/arc/boot/dts/Makefile
arch/arc/boot/dts/angel4.dts [deleted file]
arch/arc/boot/dts/axc001.dtsi [new file with mode: 0644]
arch/arc/boot/dts/axc003.dtsi [new file with mode: 0644]
arch/arc/boot/dts/axc003_idu.dtsi [new file with mode: 0644]
arch/arc/boot/dts/axs101.dts [new file with mode: 0644]
arch/arc/boot/dts/axs103.dts [new file with mode: 0644]
arch/arc/boot/dts/axs103_idu.dts [new file with mode: 0644]
arch/arc/boot/dts/axs10x_mb.dtsi [new file with mode: 0644]
arch/arc/boot/dts/nsim_700.dts [new file with mode: 0644]
arch/arc/boot/dts/nsim_hs.dts [new file with mode: 0644]
arch/arc/boot/dts/nsim_hs_idu.dts [new file with mode: 0644]
arch/arc/boot/dts/nsimosci_hs.dts [new file with mode: 0644]
arch/arc/boot/dts/nsimosci_hs_idu.dts [new file with mode: 0644]
arch/arc/boot/dts/vdk_axc003.dtsi [new file with mode: 0644]
arch/arc/boot/dts/vdk_axc003_idu.dtsi [new file with mode: 0644]
arch/arc/boot/dts/vdk_axs10x_mb.dtsi [new file with mode: 0644]
arch/arc/boot/dts/vdk_hs38.dts [new file with mode: 0644]
arch/arc/boot/dts/vdk_hs38_smp.dts [new file with mode: 0644]
arch/arc/configs/axs101_defconfig [new file with mode: 0644]
arch/arc/configs/axs103_defconfig [new file with mode: 0644]
arch/arc/configs/axs103_smp_defconfig [new file with mode: 0644]
arch/arc/configs/nsim_700_defconfig
arch/arc/configs/nsim_hs_defconfig [new file with mode: 0644]
arch/arc/configs/nsim_hs_smp_defconfig [new file with mode: 0644]
arch/arc/configs/nsimosci_defconfig
arch/arc/configs/nsimosci_hs_defconfig [new file with mode: 0644]
arch/arc/configs/nsimosci_hs_smp_defconfig [new file with mode: 0644]
arch/arc/configs/tb10x_defconfig
arch/arc/configs/vdk_hs38_defconfig [new file with mode: 0644]
arch/arc/configs/vdk_hs38_smp_defconfig [new file with mode: 0644]
arch/arc/include/asm/Kbuild
arch/arc/include/asm/arcregs.h
arch/arc/include/asm/atomic.h
arch/arc/include/asm/barrier.h [new file with mode: 0644]
arch/arc/include/asm/bitops.h
arch/arc/include/asm/cache.h
arch/arc/include/asm/cacheflush.h
arch/arc/include/asm/cmpxchg.h
arch/arc/include/asm/delay.h
arch/arc/include/asm/dma-mapping.h
arch/arc/include/asm/elf.h
arch/arc/include/asm/entry-arcv2.h [new file with mode: 0644]
arch/arc/include/asm/entry-compact.h [new file with mode: 0644]
arch/arc/include/asm/entry.h
arch/arc/include/asm/io.h
arch/arc/include/asm/irq.h
arch/arc/include/asm/irqflags-arcv2.h [new file with mode: 0644]
arch/arc/include/asm/irqflags-compact.h [new file with mode: 0644]
arch/arc/include/asm/irqflags.h
arch/arc/include/asm/mcip.h [new file with mode: 0644]
arch/arc/include/asm/mmu.h
arch/arc/include/asm/pgtable.h
arch/arc/include/asm/processor.h
arch/arc/include/asm/ptrace.h
arch/arc/include/asm/spinlock.h
arch/arc/include/asm/thread_info.h
arch/arc/include/asm/uaccess.h
arch/arc/include/uapi/asm/page.h
arch/arc/kernel/Makefile
arch/arc/kernel/asm-offsets.c
arch/arc/kernel/devtree.c
arch/arc/kernel/entry-arcv2.S [new file with mode: 0644]
arch/arc/kernel/entry-compact.S [new file with mode: 0644]
arch/arc/kernel/entry.S
arch/arc/kernel/head.S
arch/arc/kernel/intc-arcv2.c [new file with mode: 0644]
arch/arc/kernel/intc-compact.c [new file with mode: 0644]
arch/arc/kernel/irq.c
arch/arc/kernel/mcip.c [new file with mode: 0644]
arch/arc/kernel/perf_event.c
arch/arc/kernel/process.c
arch/arc/kernel/ptrace.c
arch/arc/kernel/setup.c
arch/arc/kernel/signal.c
arch/arc/kernel/smp.c
arch/arc/kernel/stacktrace.c
arch/arc/kernel/time.c
arch/arc/kernel/troubleshoot.c
arch/arc/lib/Makefile
arch/arc/lib/memcmp.S
arch/arc/lib/memcpy-archs.S [new file with mode: 0644]
arch/arc/lib/memset-archs.S [new file with mode: 0644]
arch/arc/lib/strcmp-archs.S [new file with mode: 0644]
arch/arc/mm/Makefile
arch/arc/mm/cache.c [new file with mode: 0644]
arch/arc/mm/cache_arc700.c [deleted file]
arch/arc/mm/dma.c
arch/arc/mm/tlb.c
arch/arc/mm/tlbex.S
arch/arc/plat-arcfpga/Kconfig [deleted file]
arch/arc/plat-arcfpga/Makefile [deleted file]
arch/arc/plat-arcfpga/include/plat/smp.h [deleted file]
arch/arc/plat-arcfpga/platform.c [deleted file]
arch/arc/plat-arcfpga/smp.c [deleted file]
arch/arc/plat-axs10x/Kconfig [new file with mode: 0644]
arch/arc/plat-axs10x/Makefile [new file with mode: 0644]
arch/arc/plat-axs10x/axs10x.c [new file with mode: 0644]
arch/arc/plat-sim/Kconfig [new file with mode: 0644]
arch/arc/plat-sim/Makefile [new file with mode: 0644]
arch/arc/plat-sim/platform.c [new file with mode: 0644]
arch/arm/boot/dts/armada-370-xp.dtsi
arch/arm/boot/dts/armada-370.dtsi
arch/arm/boot/dts/armada-xp-mv78260.dtsi
arch/arm/boot/dts/armada-xp-mv78460.dtsi
arch/arm/boot/dts/armada-xp.dtsi
arch/arm/include/asm/xen/hypervisor.h
arch/arm/include/asm/xen/page.h
arch/arm/kernel/setup.c
arch/arm/mach-lpc32xx/irq.c
arch/arm/xen/enlighten.c
arch/arm/xen/mm.c
arch/arm/xen/p2m.c
arch/arm64/kernel/setup.c
arch/avr32/mach-at32ap/extint.c
arch/m68k/mac/psc.c
arch/mips/ath25/ar2315.c
arch/mips/ath25/ar5312.c
arch/mips/pci/pci-ar2315.c
arch/mips/ralink/irq.c
arch/mn10300/kernel/irq.c
arch/s390/configs/default_defconfig
arch/s390/configs/gcov_defconfig
arch/s390/configs/performance_defconfig
arch/s390/defconfig
arch/s390/include/asm/cpu.h
arch/s390/include/asm/ipl.h
arch/s390/include/asm/sclp.h
arch/s390/include/asm/smp.h
arch/s390/kernel/base.S
arch/s390/kernel/crash_dump.c
arch/s390/kernel/perf_cpum_sf.c
arch/s390/kernel/setup.c
arch/s390/kernel/smp.c
arch/s390/net/bpf_jit_comp.c
arch/tile/Kconfig
arch/tile/include/asm/irq.h
arch/tile/include/asm/processor.h
arch/tile/include/asm/spinlock_32.h
arch/tile/include/asm/spinlock_64.h
arch/tile/include/asm/stack.h
arch/tile/include/asm/thread_info.h
arch/tile/include/asm/traps.h
arch/tile/include/asm/uaccess.h
arch/tile/include/asm/word-at-a-time.h [new file with mode: 0644]
arch/tile/include/hv/hypervisor.h
arch/tile/kernel/entry.S
arch/tile/kernel/hvglue.S
arch/tile/kernel/hvglue_trace.c
arch/tile/kernel/intvec_64.S
arch/tile/kernel/process.c
arch/tile/kernel/setup.c
arch/tile/kernel/stack.c
arch/tile/kernel/traps.c
arch/tile/kernel/vdso/vgettimeofday.c
arch/tile/lib/exports.c
arch/tile/lib/spinlock_32.c
arch/tile/lib/spinlock_64.c
arch/tile/lib/usercopy_32.S
arch/tile/lib/usercopy_64.S
arch/tile/mm/fault.c
arch/um/drivers/hostaudio_kern.c
arch/x86/configs/xen.config [new file with mode: 0644]
arch/x86/crypto/aesni-intel_glue.c
arch/x86/kvm/mmu_audit.c
arch/x86/platform/uv/uv_nmi.c
block/scsi_ioctl.c
drivers/acpi/resource.c
drivers/base/property.c
drivers/block/null_blk.c
drivers/block/nvme-core.c
drivers/block/xen-blkback/blkback.c
drivers/block/xen-blkback/common.h
drivers/block/xen-blkback/xenbus.c
drivers/block/xen-blkfront.c
drivers/char/ipmi/ipmi_watchdog.c
drivers/clocksource/exynos_mct.c
drivers/cpuidle/cpuidle-powernv.c
drivers/crypto/qat/qat_common/adf_accel_engine.c
drivers/crypto/qat/qat_common/adf_transport.c
drivers/dma/dmatest.c
drivers/gpio/gpio-bcm-kona.c
drivers/gpio/gpio-dwapb.c
drivers/gpio/gpio-msic.c
drivers/gpio/gpiolib.c
drivers/ide/ide.c
drivers/infiniband/ulp/srp/ib_srp.c
drivers/input/misc/ati_remote2.c
drivers/input/mouse/psmouse-base.c
drivers/iommu/amd_iommu.c
drivers/iommu/arm-smmu-v3.c
drivers/iommu/arm-smmu.c
drivers/iommu/iommu.c
drivers/leds/leds-gpio.c
drivers/mfd/asic3.c
drivers/misc/lis3lv02d/lis3lv02d.c
drivers/mtd/ubi/block.c
drivers/net/ethernet/amd/xgbe/xgbe-desc.c
drivers/net/ethernet/apm/xgene/xgene_enet_main.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
drivers/net/ethernet/broadcom/genet/bcmgenet.h
drivers/net/ethernet/broadcom/genet/bcmmii.c
drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
drivers/net/ethernet/cavium/liquidio/octeon_device.c
drivers/net/ethernet/cavium/liquidio/octeon_droq.c
drivers/net/ethernet/cavium/liquidio/request_manager.c
drivers/net/ethernet/cisco/enic/enic_main.c
drivers/net/ethernet/cisco/enic/vnic_rq.h
drivers/net/ethernet/freescale/Kconfig
drivers/net/ethernet/freescale/fec.h
drivers/net/ethernet/freescale/fec_main.c
drivers/net/ethernet/icplus/ipg.c
drivers/net/ethernet/icplus/ipg.h
drivers/net/ethernet/intel/e1000e/ich8lan.c
drivers/net/ethernet/intel/e1000e/netdev.c
drivers/net/ethernet/intel/i40evf/i40e_txrx.c
drivers/net/ethernet/intel/i40evf/i40evf.h
drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
drivers/net/ethernet/intel/i40evf/i40evf_main.c
drivers/net/ethernet/intel/igb/e1000_82575.c
drivers/net/ethernet/intel/igb/e1000_defines.h
drivers/net/ethernet/intel/igb/igb_main.c
drivers/net/ethernet/marvell/mvneta.c
drivers/net/ethernet/mellanox/mlx4/en_netdev.c
drivers/net/ethernet/mellanox/mlx4/en_rx.c
drivers/net/ethernet/mellanox/mlx4/en_tx.c
drivers/net/ethernet/mellanox/mlx4/intf.c
drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
drivers/net/ethernet/myricom/myri10ge/myri10ge.c
drivers/net/ethernet/renesas/ravb_ptp.c
drivers/net/ethernet/sis/sis900.h
drivers/net/ethernet/stmicro/stmmac/mmc_core.c
drivers/net/ethernet/via/Kconfig
drivers/net/phy/bcm7xxx.c
drivers/net/phy/mdio-bcm-unimac.c
drivers/net/phy/phy_device.c
drivers/net/phy/vitesse.c
drivers/net/vmxnet3/vmxnet3_int.h
drivers/net/wireless/ath/wil6210/main.c
drivers/net/wireless/libertas_tf/if_usb.c
drivers/net/xen-netback/netback.c
drivers/net/xen-netfront.c
drivers/pci/host/pci-keystone.c
drivers/pci/xen-pcifront.c
drivers/pinctrl/mediatek/pinctrl-mtk-common.c
drivers/pinctrl/pinctrl-adi2.c
drivers/pinctrl/pinctrl-st.c
drivers/pinctrl/samsung/pinctrl-exynos.c
drivers/pinctrl/samsung/pinctrl-s3c24xx.c
drivers/pinctrl/samsung/pinctrl-s3c64xx.c
drivers/pinctrl/sunxi/pinctrl-sunxi.c
drivers/power/test_power.c
drivers/s390/char/sclp_cmd.c
drivers/s390/char/sclp_early.c
drivers/s390/char/zcore.c
drivers/s390/crypto/ap_bus.c
drivers/s390/crypto/ap_bus.h
drivers/s390/crypto/zcrypt_cex4.c
drivers/sh/intc/core.c
drivers/sh/intc/virq.c
drivers/thermal/intel_powerclamp.c
drivers/tty/hvc/hvc_iucv.c
drivers/tty/hvc/hvc_tile.c
drivers/tty/hvc/hvc_xen.c
drivers/tty/sysrq.c
drivers/usb/atm/ueagle-atm.c
drivers/video/fbdev/uvesafb.c
drivers/video/fbdev/vt8623fb.c
drivers/virtio/virtio_mmio.c
drivers/xen/events/events_base.c
drivers/xen/events/events_fifo.c
drivers/xen/gntdev.c
drivers/xen/grant-table.c
drivers/xen/manage.c
drivers/xen/tmem.c
drivers/xen/xenbus/xenbus_client.c
drivers/xen/xenbus/xenbus_probe.c
fs/block_dev.c
fs/btrfs/async-thread.c
fs/btrfs/async-thread.h
fs/btrfs/backref.c
fs/btrfs/ctree.c
fs/btrfs/ctree.h
fs/btrfs/delayed-ref.c
fs/btrfs/delayed-ref.h
fs/btrfs/dev-replace.c
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c
fs/btrfs/extent-tree.h [new file with mode: 0644]
fs/btrfs/extent_io.c
fs/btrfs/file.c
fs/btrfs/free-space-cache.c
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/ordered-data.c
fs/btrfs/ordered-data.h
fs/btrfs/qgroup.c
fs/btrfs/qgroup.h
fs/btrfs/relocation.c
fs/btrfs/scrub.c
fs/btrfs/send.c
fs/btrfs/super.c
fs/btrfs/sysfs.c
fs/btrfs/sysfs.h
fs/btrfs/tests/qgroup-tests.c
fs/btrfs/transaction.c
fs/btrfs/transaction.h
fs/btrfs/tree-defrag.c
fs/btrfs/tree-log.c
fs/btrfs/ulist.c
fs/btrfs/ulist.h
fs/btrfs/volumes.c
fs/btrfs/volumes.h
fs/cifs/Kconfig
fs/cifs/cifsglob.h
fs/cifs/cifspdu.h
fs/cifs/cifssmb.c
fs/cifs/connect.c
fs/cifs/ioctl.c
fs/cifs/smb2ops.c
fs/cifs/smb2pdu.c
fs/cifs/smb2pdu.h
fs/cifs/smbfsctl.h
fs/dax.c
fs/ext2/file.c
fs/ext4/file.c
fs/ext4/inode.c
fs/nfs/super.c
fs/seq_file.c
fs/xfs/libxfs/xfs_alloc.c
fs/xfs/libxfs/xfs_alloc.h
fs/xfs/libxfs/xfs_attr.c
fs/xfs/libxfs/xfs_bmap.c
fs/xfs/libxfs/xfs_format.h
fs/xfs/libxfs/xfs_fs.h
fs/xfs/libxfs/xfs_ialloc.c
fs/xfs/libxfs/xfs_ialloc.h
fs/xfs/libxfs/xfs_ialloc_btree.c
fs/xfs/libxfs/xfs_ialloc_btree.h
fs/xfs/libxfs/xfs_inode_buf.c
fs/xfs/libxfs/xfs_sb.c
fs/xfs/libxfs/xfs_shared.h
fs/xfs/libxfs/xfs_trans_resv.h
fs/xfs/libxfs/xfs_trans_space.h
fs/xfs/xfs_aops.c
fs/xfs/xfs_aops.h
fs/xfs/xfs_attr_inactive.c
fs/xfs/xfs_bmap_util.c
fs/xfs/xfs_buf.c
fs/xfs/xfs_buf.h
fs/xfs/xfs_dquot.c
fs/xfs/xfs_error.c
fs/xfs/xfs_error.h
fs/xfs/xfs_extfree_item.c
fs/xfs/xfs_file.c
fs/xfs/xfs_filestream.c
fs/xfs/xfs_fsops.c
fs/xfs/xfs_inode.c
fs/xfs/xfs_ioctl.c
fs/xfs/xfs_iomap.c
fs/xfs/xfs_iops.c
fs/xfs/xfs_itable.c
fs/xfs/xfs_linux.h
fs/xfs/xfs_log.c
fs/xfs/xfs_log.h
fs/xfs/xfs_log_cil.c
fs/xfs/xfs_log_priv.h
fs/xfs/xfs_log_recover.c
fs/xfs/xfs_mount.c
fs/xfs/xfs_mount.h
fs/xfs/xfs_pnfs.c
fs/xfs/xfs_qm.c
fs/xfs/xfs_qm_syscalls.c
fs/xfs/xfs_quota.h
fs/xfs/xfs_rtalloc.c
fs/xfs/xfs_super.c
fs/xfs/xfs_symlink.c
fs/xfs/xfs_trace.h
fs/xfs/xfs_trans.c
fs/xfs/xfs_trans.h
fs/xfs/xfs_trans_ail.c
fs/xfs/xfs_trans_dquot.c
fs/xfs/xfs_trans_priv.h
include/acpi/acpi_bus.h
include/acpi/video.h
include/asm-generic/barrier.h
include/linux/acpi.h
include/linux/compiler.h
include/linux/fs.h
include/linux/irq.h
include/linux/irqdesc.h
include/linux/irqnr.h
include/linux/kernel.h
include/linux/module.h
include/linux/moduleparam.h
include/linux/of.h
include/linux/rbtree.h
include/linux/rbtree_augmented.h
include/linux/rbtree_latch.h [new file with mode: 0644]
include/linux/rcupdate.h
include/linux/seqlock.h
include/net/ax25.h
include/net/sock.h
include/trace/events/btrfs.h
include/uapi/linux/in.h
include/uapi/linux/libc-compat.h
init/Kconfig
kernel/configs/xen.config [new file with mode: 0644]
kernel/jump_label.c
kernel/module.c
kernel/params.c
kernel/power/Kconfig
kernel/power/hibernate.c
kernel/time/Makefile
kernel/time/timekeeping.c
kernel/time/timer.c
kernel/workqueue.c
lib/bug.c
lib/kobject.c
lib/rbtree.c
mm/slab_common.c
net/ax25/af_ax25.c
net/ax25/ax25_in.c
net/core/flow_dissector.c
net/core/sock.c
net/dsa/slave.c
net/ipv4/fib_semantics.c
net/mac80211/rate.c
net/sched/cls_flower.c
net/sctp/output.c
net/sctp/socket.c
net/sunrpc/auth.c
net/sunrpc/xprtsock.c
net/tipc/bcast.c
net/tipc/link.c
net/tipc/link.h
scripts/kconfig/Makefile
scripts/sortextable.c
security/apparmor/lsm.c
security/integrity/ima/ima_crypto.c
sound/core/ctljack.c
sound/core/init.c
sound/pci/hda/hda_intel.c
sound/pci/hda/patch_hdmi.c
sound/pci/hda/patch_realtek.c
sound/pci/hda/patch_via.c

diff --git a/Documentation/devicetree/bindings/arc/archs-idu-intc.txt b/Documentation/devicetree/bindings/arc/archs-idu-intc.txt
new file mode 100644 (file)
index 0000000..0dcb7c7
--- /dev/null
@@ -0,0 +1,46 @@
+* ARC-HS Interrupt Distribution Unit
+
+  This optional 2nd level interrupt controller can be used in SMP configurations for
+  dynamic IRQ routing, load balancing of common/external IRQs towards core intc.
+
+Properties:
+
+- compatible: "snps,archs-idu-intc"
+- interrupt-controller: This is an interrupt controller.
+- interrupt-parent: <reference to parent core intc>
+- #interrupt-cells: Must be <2>.
+- interrupts: <...> specifies the upstream core irqs
+
+  First cell specifies the "common" IRQ from peripheral to IDU
+  Second cell specifies the irq distribution mode to cores
+     0=Round Robin; 1=cpu0, 2=cpu1, 4=cpu2, 8=cpu3
+
+  intc accessed via the special ARC AUX register interface, hence "reg" property
+  is not specified.
+
+Example:
+       core_intc: core-interrupt-controller {
+               compatible = "snps,archs-intc";
+               interrupt-controller;
+               #interrupt-cells = <1>;
+       };
+
+       idu_intc: idu-interrupt-controller {
+               compatible = "snps,archs-idu-intc";
+               interrupt-controller;
+               interrupt-parent = <&core_intc>;
+
+               /*
+                * <hwirq  distribution>
+                * distribution: 0=RR; 1=cpu0, 2=cpu1, 4=cpu2, 8=cpu3
+                */
+               #interrupt-cells = <2>;
+
+               /* upstream core irqs: downstream these are "COMMON" irq 0,1..  */
+               interrupts = <24 25 26 27 28 29 30 31>;
+       };
+
+       some_device: serial@c0fc1000 {
+               interrupt-parent = <&idu_intc>;
+               interrupts = <0 0>;     /* upstream idu IRQ #24, Round Robin */
+       };
diff --git a/Documentation/devicetree/bindings/arc/archs-intc.txt b/Documentation/devicetree/bindings/arc/archs-intc.txt
new file mode 100644 (file)
index 0000000..69f326d
--- /dev/null
@@ -0,0 +1,22 @@
+* ARC-HS incore Interrupt Controller (Provided by cores implementing ARCv2 ISA)
+
+Properties:
+
+- compatible: "snps,archs-intc"
+- interrupt-controller: This is an interrupt controller.
+- #interrupt-cells: Must be <1>.
+
+  Single Cell "interrupts" property of a device specifies the IRQ number
+  between 16 to 256
+
+  intc accessed via the special ARC AUX register interface, hence "reg" property
+  is not specified.
+
+Example:
+
+       intc: interrupt-controller {
+               compatible = "snps,archs-intc";
+               interrupt-controller;
+               #interrupt-cells = <1>;
+               interrupts = <16 17 18 19 20 21 22 23 24 25>;
+       };
diff --git a/Documentation/devicetree/bindings/arc/axs101.txt b/Documentation/devicetree/bindings/arc/axs101.txt
new file mode 100644 (file)
index 0000000..48290d5
--- /dev/null
@@ -0,0 +1,7 @@
+Synopsys DesignWare ARC Software Development Platforms Device Tree Bindings
+---------------------------------------------------------------------------
+
+SDP Main Board with an AXC001 CPU Card hoisting ARC700 core in silicon
+
+Required root node properties:
+    - compatible = "snps,axs101", "snps,arc-sdp";
diff --git a/Documentation/devicetree/bindings/arc/axs103.txt b/Documentation/devicetree/bindings/arc/axs103.txt
new file mode 100644 (file)
index 0000000..6eea862
--- /dev/null
@@ -0,0 +1,8 @@
+Synopsys DesignWare ARC Software Development Platforms Device Tree Bindings
+---------------------------------------------------------------------------
+
+SDP Main Board with an AXC003 FPGA Card which can contain various flavours of
+HS38x cores.
+
+Required root node properties:
+    - compatible = "snps,axs103", "snps,arc-sdp";
index 750d577e8083ee3f96c8bf823c986c162d4ac5b3..f5a8ca29aff06e84d49e3c75caf125b35c660055 100644 (file)
@@ -1,7 +1,7 @@
 * Marvell Armada 370 / Armada XP Ethernet Controller (NETA)
 
 Required properties:
-- compatible: should be "marvell,armada-370-neta".
+- compatible: "marvell,armada-370-neta" or "marvell,armada-xp-neta".
 - reg: address and length of the register set for the device.
 - interrupts: interrupt for the device
 - phy: See ethernet.txt file in the same directory.
index 5a5a05582b583c42468764fc5a80bf8c126072e6..8146e9fd5ffc09fa104158c6d27ca16274db5838 100644 (file)
@@ -236,10 +236,10 @@ Removed Mount Options
 
   Name                         Removed
   ----                         -------
-  delaylog/nodelaylog          v3.20
-  ihashsize                    v3.20
-  irixsgid                     v3.20
-  osyncisdsync/osyncisosync    v3.20
+  delaylog/nodelaylog          v4.0
+  ihashsize                    v4.0
+  irixsgid                     v4.0
+  osyncisdsync/osyncisosync    v4.0
 
 
 sysctls
@@ -346,5 +346,5 @@ Removed Sysctls
 
   Name                         Removed
   ----                         -------
-  fs.xfs.xfsbufd_centisec      v3.20
-  fs.xfs.age_buffer_centisecs  v3.20
+  fs.xfs.xfsbufd_centisec      v4.0
+  fs.xfs.age_buffer_centisecs  v4.0
diff --git a/Kbuild b/Kbuild
index df99a5f53beb880482871e99453bf04ef2f0fb06..f55cefd9bf29a2fa2746f7039f2481dfdceadfc7 100644 (file)
--- a/Kbuild
+++ b/Kbuild
@@ -52,7 +52,6 @@ $(obj)/$(bounds-file): kernel/bounds.s FORCE
 
 timeconst-file := include/generated/timeconst.h
 
-#always  += $(timeconst-file)
 targets += $(timeconst-file)
 
 quiet_cmd_gentimeconst = GEN     $@
index af61ea8d21626c8d42e6e386f20f63f618eb8ed6..058b0fbc52ff041f544f3f4e955159eae9c7efa1 100644 (file)
@@ -9828,6 +9828,13 @@ F:       arch/arc/
 F:     Documentation/devicetree/bindings/arc/
 F:     drivers/tty/serial/arc_uart.c
 
+SYNOPSYS ARC SDP platform support
+M:     Alexey Brodkin <abrodkin@synopsys.com>
+S:     Supported
+F:     arch/arc/plat-axs10x
+F:     arch/arc/boot/dts/ax*
+F:     Documentation/devicetree/bindings/arc/axs10*
+
 SYSTEM CONFIGURATION (SYSCON)
 M:     Lee Jones <lee.jones@linaro.org>
 M:     Arnd Bergmann <arnd@arndb.de>
index df94ac1f75b6ac517784364cdeff6781eead9a6b..e7cee0a5c56dfa80222d8286a63342b10c07bc42 100644 (file)
@@ -81,17 +81,37 @@ menu "ARC Architecture Configuration"
 
 menu "ARC Platform/SoC/Board"
 
-source "arch/arc/plat-arcfpga/Kconfig"
+source "arch/arc/plat-sim/Kconfig"
 source "arch/arc/plat-tb10x/Kconfig"
+source "arch/arc/plat-axs10x/Kconfig"
 #New platform adds here
 
 endmenu
 
+choice
+       prompt "ARC Instruction Set"
+       default ISA_ARCOMPACT
+
+config ISA_ARCOMPACT
+       bool "ARCompact ISA"
+       help
+         The original ARC ISA of ARC600/700 cores
+
+config ISA_ARCV2
+       bool "ARC ISA v2"
+       help
+         ISA for the Next Generation ARC-HS cores
+
+endchoice
+
 menu "ARC CPU Configuration"
 
 choice
        prompt "ARC Core"
-       default ARC_CPU_770
+       default ARC_CPU_770 if ISA_ARCOMPACT
+       default ARC_CPU_HS if ISA_ARCV2
+
+if ISA_ARCOMPACT
 
 config ARC_CPU_750D
        bool "ARC750D"
@@ -100,7 +120,7 @@ config ARC_CPU_750D
 
 config ARC_CPU_770
        bool "ARC770"
-       select ARC_CPU_REL_4_10
+       select ARC_HAS_SWAPE
        help
          Support for ARC770 core introduced with Rel 4.10 (Summer 2011)
          This core has a bunch of cool new features:
@@ -109,6 +129,27 @@ config ARC_CPU_770
          -Caches: New Prog Model, Region Flush
          -Insns: endian swap, load-locked/store-conditional, time-stamp-ctr
 
+endif  #ISA_ARCOMPACT
+
+config ARC_CPU_HS
+       bool "ARC-HS"
+       depends on ISA_ARCV2
+       help
+         Support for ARC HS38x Cores based on ARCv2 ISA
+         The notable features are:
+           - SMP configurations of upto 4 core with coherency
+           - Optional L2 Cache and IO-Coherency
+           - Revised Interrupt Architecture (multiple priorites, reg banks,
+               auto stack switch, auto regfile save/restore)
+           - MMUv4 (PIPT dcache, Huge Pages)
+           - Instructions for
+               * 64bit load/store: LDD, STD
+               * Hardware assisted divide/remainder: DIV, REM
+               * Function prologue/epilogue: ENTER_S, LEAVE_S
+               * IRQ enable/disable: CLRI, SETI
+               * pop count: FFS, FLS
+               * SETcc, BMSKN, XBFU...
+
 endchoice
 
 config CPU_BIG_ENDIAN
@@ -117,17 +158,13 @@ config CPU_BIG_ENDIAN
        help
          Build kernel for Big Endian Mode of ARC CPU
 
-# If a platform can't work with 0x8000_0000 based dma_addr_t
-config ARC_PLAT_NEEDS_CPU_TO_DMA
-       bool
-
 config SMP
-       bool "Symmetric Multi-Processing (Incomplete)"
+       bool "Symmetric Multi-Processing"
        default n
+       select ARC_HAS_COH_CACHES if ISA_ARCV2
+       select ARC_MCIP if ISA_ARCV2
        help
-         This enables support for systems with more than one CPU. If you have
-         a system with only one CPU, say N. If you have a system with more
-         than one CPU, say Y.
+         This enables support for systems with more than one CPU.
 
 if SMP
 
@@ -137,13 +174,20 @@ config ARC_HAS_COH_CACHES
 config ARC_HAS_REENTRANT_IRQ_LV2
        def_bool n
 
-endif
+config ARC_MCIP
+       bool "ARConnect Multicore IP (MCIP) Support "
+       depends on ISA_ARCV2
+       help
+         This IP block enables SMP in ARC-HS38 cores.
+         It provides for cross-core interrupts, multi-core debug
+         hardware semaphores, shared memory,....
 
 config NR_CPUS
        int "Maximum number of CPUs (2-4096)"
        range 2 4096
-       depends on SMP
-       default "2"
+       default "4"
+
+endif  #SMP
 
 menuconfig ARC_CACHE
        bool "Enable Cache Support"
@@ -185,7 +229,7 @@ config ARC_CACHE_PAGES
 
 config ARC_CACHE_VIPT_ALIASING
        bool "Support VIPT Aliasing D$"
-       depends on ARC_HAS_DCACHE
+       depends on ARC_HAS_DCACHE && ISA_ARCOMPACT
        default n
 
 endif  #ARC_CACHE
@@ -226,9 +270,10 @@ config ARC_HAS_HW_MPY
          Multipler. Otherwise software multipy lib is used
 
 choice
-       prompt "ARC700 MMU Version"
+       prompt "MMU Version"
        default ARC_MMU_V3 if ARC_CPU_770
        default ARC_MMU_V2 if ARC_CPU_750D
+       default ARC_MMU_V4 if ARC_CPU_HS
 
 config ARC_MMU_V1
        bool "MMU v1"
@@ -249,6 +294,10 @@ config ARC_MMU_V3
          Variable Page size (1k-16k), var JTLB size 128 x (2 or 4)
          Shared Address Spaces (SASID)
 
+config ARC_MMU_V4
+       bool "MMU v4"
+       depends on ISA_ARCV2
+
 endchoice
 
 
@@ -271,6 +320,8 @@ config ARC_PAGE_SIZE_4K
 
 endchoice
 
+if ISA_ARCOMPACT
+
 config ARC_COMPACT_IRQ_LEVELS
        bool "ARCompact IRQ Priorities: High(2)/Low(1)"
        default n
@@ -290,7 +341,7 @@ config ARC_IRQ5_LV2
 config ARC_IRQ6_LV2
        bool
 
-endif
+endif  #ARC_COMPACT_IRQ_LEVELS
 
 config ARC_FPU_SAVE_RESTORE
        bool "Enable FPU state persistence across context switch"
@@ -303,32 +354,53 @@ config ARC_FPU_SAVE_RESTORE
          based on actual usage of FPU by a task. Thus our implemn does
          this for all tasks in system.
 
+endif  #ISA_ARCOMPACT
+
 config ARC_CANT_LLSC
        def_bool n
 
-menuconfig ARC_CPU_REL_4_10
-       bool "Enable support for Rel 4.10 features"
-       default n
-       help
-         -ARC770 (and dependent features) enabled
-         -ARC750 also shares some of the new features with 770
-
 config ARC_HAS_LLSC
        bool "Insn: LLOCK/SCOND (efficient atomic ops)"
        default y
-       depends on ARC_CPU_770 && !ARC_CANT_LLSC
+       depends on !ARC_CPU_750D && !ARC_CANT_LLSC
 
 config ARC_HAS_SWAPE
        bool "Insn: SWAPE (endian-swap)"
        default y
-       depends on ARC_CPU_REL_4_10
 
-config ARC_HAS_RTSC
-       bool "Insn: RTSC (64-bit r/o cycle counter)"
+if ISA_ARCV2
+
+config ARC_HAS_LL64
+       bool "Insn: 64bit LDD/STD"
+       help
+         Enable gcc to generate 64-bit load/store instructions
+         ISA mandates even/odd registers to allow encoding of two
+         dest operands with 2 possible source operands.
        default y
-       depends on ARC_CPU_REL_4_10
+
+config ARC_HAS_RTC
+       bool "Local 64-bit r/o cycle counter"
+       default n
        depends on !SMP
 
+config ARC_HAS_GRTC
+       bool "SMP synchronized 64-bit cycle counter"
+       default y
+       depends on SMP
+
+config ARC_NUMBER_OF_INTERRUPTS
+       int "Number of interrupts"
+       range 8 240
+       default 32
+       help
+         This defines the number of interrupts on the ARCv2HS core.
+         It affects the size of vector table.
+         The initial 8 IRQs are fixed (Timer, ICI etc) and although configurable
+         in hardware, it keep things simple for Linux to assume they are always
+         present.
+
+endif  # ISA_ARCV2
+
 endmenu   # "ARC CPU Configuration"
 
 config LINUX_LINK_BASE
@@ -354,8 +426,10 @@ config ARC_CURR_IN_REG
 
 config ARC_EMUL_UNALIGNED
        bool "Emulate unaligned memory access (userspace only)"
+       default N
        select SYSCTL_ARCH_UNALIGN_NO_WARN
        select SYSCTL_ARCH_UNALIGN_ALLOW
+       depends on ISA_ARCOMPACT
        help
          This enables misaligned 16 & 32 bit memory access from user space.
          Use ONLY-IF-ABS-NECESSARY as it will be very slow and also can hide
@@ -378,9 +452,10 @@ menuconfig ARC_DBG
        bool "ARC debugging"
        default y
 
+if ARC_DBG
+
 config ARC_DW2_UNWIND
        bool "Enable DWARF specific kernel stack unwind"
-       depends on ARC_DBG
        default y
        select KALLSYMS
        help
@@ -394,18 +469,38 @@ config ARC_DW2_UNWIND
 
 config ARC_DBG_TLB_PARANOIA
        bool "Paranoia Checks in Low Level TLB Handlers"
-       depends on ARC_DBG
        default n
 
 config ARC_DBG_TLB_MISS_COUNT
        bool "Profile TLB Misses"
        default n
        select DEBUG_FS
-       depends on ARC_DBG
        help
          Counts number of I and D TLB Misses and exports them via Debugfs
          The counters can be cleared via Debugfs as well
 
+if SMP
+
+config ARC_IPI_DBG
+       bool "Debug Inter Core interrupts"
+       default n
+
+endif
+
+endif
+
+config ARC_UBOOT_SUPPORT
+       bool "Support uboot arg Handling"
+       default n
+       help
+         ARC Linux by default checks for uboot provided args as pointers to
+         external cmdline or DTB. This however breaks in absence of uboot,
+         when booting from Metaware debugger directly, as the registers are
+         not zeroed out on reset by mdb and/or ARCv2 based cores. The bogus
+         registers look like uboot args to kernel which then chokes.
+         So only enable the uboot arg checking/processing if users are sure
+         of uboot being in play.
+
 config ARC_BUILTIN_DTB_NAME
        string "Built in DTB"
        help
index db72fec0e160fc8e67fe17c5faf9c6ef54c8bcdb..6107062c01115dbea8a56e02bce254a8ba5b91af 100644 (file)
@@ -9,12 +9,14 @@
 UTS_MACHINE := arc
 
 ifeq ($(CROSS_COMPILE),)
-CROSS_COMPILE := arc-linux-uclibc-
+CROSS_COMPILE := arc-linux-
 endif
 
 KBUILD_DEFCONFIG := nsim_700_defconfig
 
-cflags-y       += -mA7 -fno-common -pipe -fno-builtin -D__linux__
+cflags-y       += -fno-common -pipe -fno-builtin -D__linux__
+cflags-$(CONFIG_ISA_ARCOMPACT) += -mA7
+cflags-$(CONFIG_ISA_ARCV2)     += -mcpu=archs
 
 ifdef CONFIG_ARC_CURR_IN_REG
 # For a global register defintion, make sure it gets passed to every file
@@ -33,7 +35,11 @@ cflags-$(atleast_gcc44)                      += -fsection-anchors
 
 cflags-$(CONFIG_ARC_HAS_LLSC)          += -mlock
 cflags-$(CONFIG_ARC_HAS_SWAPE)         += -mswape
-cflags-$(CONFIG_ARC_HAS_RTSC)          += -mrtsc
+
+ifndef CONFIG_ARC_HAS_LL64
+cflags-$(CONFIG_ISA_ARCV2)             += -mno-ll64
+endif
+
 cflags-$(CONFIG_ARC_DW2_UNWIND)                += -fasynchronous-unwind-tables
 
 # By default gcc 4.8 generates dwarf4 which kernel unwinder can't grok
@@ -81,8 +87,9 @@ core-y                += arch/arc/
 # w/o this dtb won't embed into kernel binary
 core-y         += arch/arc/boot/dts/
 
-core-$(CONFIG_ARC_PLAT_FPGA_LEGACY)    += arch/arc/plat-arcfpga/
-core-$(CONFIG_ARC_PLAT_TB10X)          += arch/arc/plat-tb10x/
+core-$(CONFIG_ARC_PLAT_SIM)    += arch/arc/plat-sim/
+core-$(CONFIG_ARC_PLAT_TB10X)  += arch/arc/plat-tb10x/
+core-$(CONFIG_ARC_PLAT_AXS10X) += arch/arc/plat-axs10x/
 
 drivers-$(CONFIG_OPROFILE)     += arch/arc/oprofile/
 
index faf240e29ec2cebc1b6f7762b0c0556de37f1a8f..b0e3f19bbd07e32cb57c91f803302603ebe429fc 100644 (file)
@@ -1,5 +1,5 @@
 # Built-in dtb
-builtindtb-y           := angel4
+builtindtb-y           := nsim_700
 
 ifneq ($(CONFIG_ARC_BUILTIN_DTB_NAME),"")
        builtindtb-y    := $(patsubst "%",%,$(CONFIG_ARC_BUILTIN_DTB_NAME))
diff --git a/arch/arc/boot/dts/angel4.dts b/arch/arc/boot/dts/angel4.dts
deleted file mode 100644 (file)
index 3b076fb..0000000
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (C) 2012 Synopsys, Inc. (www.synopsys.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-/dts-v1/;
-
-/include/ "skeleton.dtsi"
-
-/ {
-       compatible = "snps,arc-angel4";
-       clock-frequency = <80000000>;   /* 80 MHZ */
-       #address-cells = <1>;
-       #size-cells = <1>;
-       interrupt-parent = <&intc>;
-
-       chosen {
-               bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8";
-       };
-
-       aliases {
-               serial0 = &arcuart0;
-       };
-
-       fpga {
-               compatible = "simple-bus";
-               #address-cells = <1>;
-               #size-cells = <1>;
-
-               /* child and parent address space 1:1 mapped */
-               ranges;
-
-               intc: interrupt-controller {
-                       compatible = "snps,arc700-intc";
-                       interrupt-controller;
-                       #interrupt-cells = <1>;
-               };
-
-               arcuart0: serial@c0fc1000 {
-                       compatible = "snps,arc-uart";
-                       reg = <0xc0fc1000 0x100>;
-                       interrupts = <5>;
-                       clock-frequency = <80000000>;
-                       current-speed = <115200>;
-                       status = "okay";
-               };
-
-               ethernet@c0fc2000 {
-                       compatible = "snps,arc-emac";
-                       reg = <0xc0fc2000 0x3c>;
-                       interrupts = <6>;
-                       mac-address = [ 00 11 22 33 44 55 ];
-                       clock-frequency = <80000000>;
-                       max-speed = <100>;
-                       phy = <&phy0>;
-
-                       #address-cells = <1>;
-                       #size-cells = <0>;
-                       phy0: ethernet-phy@0 {
-                               reg = <1>;
-                       };
-               };
-
-               arcpmu0: pmu {
-                       compatible = "snps,arc700-pct";
-               };
-       };
-};
diff --git a/arch/arc/boot/dts/axc001.dtsi b/arch/arc/boot/dts/axc001.dtsi
new file mode 100644 (file)
index 0000000..a5e2726
--- /dev/null
@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2013-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * Device tree for AXC001 770D/EM6/AS221 CPU card
+ * Note that this file only supports the 770D CPU
+ */
+
+/ {
+       compatible = "snps,arc";
+       clock-frequency = <750000000>;  /* 750 MHZ */
+       #address-cells = <1>;
+       #size-cells = <1>;
+
+       cpu_card {
+               compatible = "simple-bus";
+               #address-cells = <1>;
+               #size-cells = <1>;
+
+               ranges = <0x00000000 0xf0000000 0x10000000>;
+
+               cpu_intc: arc700-intc@cpu {
+                       compatible = "snps,arc700-intc";
+                       interrupt-controller;
+                       #interrupt-cells = <1>;
+               };
+
+               /*
+                * this GPIO block ORs all interrupts on CPU card (creg,..)
+                * to uplink only 1 IRQ to ARC core intc
+                */
+               dw-apb-gpio@0x2000 {
+                       compatible = "snps,dw-apb-gpio";
+                       reg = < 0x2000 0x80 >;
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+
+                       ictl_intc: gpio-controller@0 {
+                               compatible = "snps,dw-apb-gpio-port";
+                               gpio-controller;
+                               #gpio-cells = <2>;
+                               snps,nr-gpios = <30>;
+                               reg = <0>;
+                               interrupt-controller;
+                               #interrupt-cells = <2>;
+                               interrupt-parent = <&cpu_intc>;
+                               interrupts = <15>;
+                       };
+               };
+
+               debug_uart: dw-apb-uart@0x5000 {
+                       compatible = "snps,dw-apb-uart";
+                       reg = <0x5000 0x100>;
+                       clock-frequency = <33333000>;
+                       interrupt-parent = <&ictl_intc>;
+                       interrupts = <19 4>;
+                       baud = <115200>;
+                       reg-shift = <2>;
+                       reg-io-width = <4>;
+               };
+
+               arcpmu0: pmu {
+                       compatible = "snps,arc700-pct";
+               };
+       };
+
+       /*
+        * This INTC is actually connected to DW APB GPIO
+        * which acts as a wire between MB INTC and CPU INTC.
+        * GPIO INTC is configured in platform init code
+        * and here we mimic direct connection from MB INTC to
+        * CPU INTC, thus we set "interrupts = <7>" instead of
+        * "interrupts = <12>"
+        *
+        * This intc actually resides on MB, but we move it here to
+        * avoid duplicating the MB dtsi file given that IRQ from
+        * this intc to cpu intc are different for axs101 and axs103
+        */
+       mb_intc: dw-apb-ictl@0xe0012000 {
+               #interrupt-cells = <1>;
+               compatible = "snps,dw-apb-ictl";
+               reg = < 0xe0012000 0x200 >;
+               interrupt-controller;
+               interrupt-parent = <&cpu_intc>;
+               interrupts = < 7 >;
+       };
+
+       memory {
+               #address-cells = <1>;
+               #size-cells = <1>;
+               ranges = <0x00000000 0x80000000 0x40000000>;
+               device_type = "memory";
+               reg = <0x00000000 0x20000000>;  /* 512MiB */
+       };
+};
diff --git a/arch/arc/boot/dts/axc003.dtsi b/arch/arc/boot/dts/axc003.dtsi
new file mode 100644 (file)
index 0000000..15c8d62
--- /dev/null
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * Device tree for AXC003 CPU card: HS38x UP configuration
+ */
+
+/ {
+       compatible = "snps,arc";
+       clock-frequency = <75000000>;
+       #address-cells = <1>;
+       #size-cells = <1>;
+
+       cpu_card {
+               compatible = "simple-bus";
+               #address-cells = <1>;
+               #size-cells = <1>;
+
+               ranges = <0x00000000 0xf0000000 0x10000000>;
+
+               cpu_intc: archs-intc@cpu {
+                       compatible = "snps,archs-intc";
+                       interrupt-controller;
+                       #interrupt-cells = <1>;
+               };
+
+               /*
+                * this GPIO block ORs all interrupts on CPU card (creg,..)
+                * to uplink only 1 IRQ to ARC core intc
+                */
+               dw-apb-gpio@0x2000 {
+                       compatible = "snps,dw-apb-gpio";
+                       reg = < 0x2000 0x80 >;
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+
+                       ictl_intc: gpio-controller@0 {
+                               compatible = "snps,dw-apb-gpio-port";
+                               gpio-controller;
+                               #gpio-cells = <2>;
+                               snps,nr-gpios = <30>;
+                               reg = <0>;
+                               interrupt-controller;
+                               #interrupt-cells = <2>;
+                               interrupt-parent = <&cpu_intc>;
+                               interrupts = <25>;
+                       };
+               };
+
+               debug_uart: dw-apb-uart@0x5000 {
+                       compatible = "snps,dw-apb-uart";
+                       reg = <0x5000 0x100>;
+                       clock-frequency = <33333000>;
+                       interrupt-parent = <&ictl_intc>;
+                       interrupts = <2 4>;
+                       baud = <115200>;
+                       reg-shift = <2>;
+                       reg-io-width = <4>;
+               };
+
+               arcpct0: pct {
+                       compatible = "snps,archs-pct";
+                       #interrupt-cells = <1>;
+                       interrupt-parent = <&cpu_intc>;
+                       interrupts = <20>;
+               };
+       };
+
+       /*
+        * This INTC is actually connected to DW APB GPIO
+        * which acts as a wire between MB INTC and CPU INTC.
+        * GPIO INTC is configured in platform init code
+        * and here we mimic direct connection from MB INTC to
+        * CPU INTC, thus we set "interrupts = <7>" instead of
+        * "interrupts = <12>"
+        *
+        * This intc actually resides on MB, but we move it here to
+        * avoid duplicating the MB dtsi file given that IRQ from
+        * this intc to cpu intc are different for axs101 and axs103
+        */
+       mb_intc: dw-apb-ictl@0xe0012000 {
+               #interrupt-cells = <1>;
+               compatible = "snps,dw-apb-ictl";
+               reg = < 0xe0012000 0x200 >;
+               interrupt-controller;
+               interrupt-parent = <&cpu_intc>;
+               interrupts = < 24 >;
+       };
+
+       memory {
+               #address-cells = <1>;
+               #size-cells = <1>;
+               ranges = <0x00000000 0x80000000 0x40000000>;
+               device_type = "memory";
+               reg = <0x00000000 0x20000000>;  /* 512MiB */
+       };
+};
diff --git a/arch/arc/boot/dts/axc003_idu.dtsi b/arch/arc/boot/dts/axc003_idu.dtsi
new file mode 100644 (file)
index 0000000..199d428
--- /dev/null
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2014, 2015 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * Device tree for AXC003 CPU card: HS38x2 (Dual Core) with IDU intc
+ */
+
+/ {
+       compatible = "snps,arc";
+       clock-frequency = <75000000>;
+       #address-cells = <1>;
+       #size-cells = <1>;
+
+       cpu_card {
+               compatible = "simple-bus";
+               #address-cells = <1>;
+               #size-cells = <1>;
+
+               ranges = <0x00000000 0xf0000000 0x10000000>;
+
+               cpu_intc: archs-intc@cpu {
+                       compatible = "snps,archs-intc";
+                       interrupt-controller;
+                       #interrupt-cells = <1>;
+               };
+
+               idu_intc: idu-interrupt-controller {
+                       compatible = "snps,archs-idu-intc";
+                       interrupt-controller;
+                       interrupt-parent = <&cpu_intc>;
+
+                       /*
+                        * <hwirq  distribution>
+                        * distribution: 0=RR; 1=cpu0, 2=cpu1, 4=cpu2, 8=cpu3
+                        */
+                       #interrupt-cells = <2>;
+
+                       /*
+                        * upstream irqs to core intc - downstream these are
+                        * "COMMON" irq 0,1..
+                        */
+                       interrupts = <24 25>;
+               };
+
+               /*
+                * this GPIO block ORs all interrupts on CPU card (creg,..)
+                * to uplink only 1 IRQ to ARC core intc
+                */
+               dw-apb-gpio@0x2000 {
+                       compatible = "snps,dw-apb-gpio";
+                       reg = < 0x2000 0x80 >;
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+
+                       ictl_intc: gpio-controller@0 {
+                               compatible = "snps,dw-apb-gpio-port";
+                               gpio-controller;
+                               #gpio-cells = <2>;
+                               snps,nr-gpios = <30>;
+                               reg = <0>;
+                               interrupt-controller;
+                               #interrupt-cells = <2>;
+                               interrupt-parent = <&idu_intc>;
+
+                               /*
+                                * cmn irq 1 -> cpu irq 25
+                                * Distribute to cpu0 only
+                                */
+                               interrupts = <1 1>;
+                       };
+               };
+
+               debug_uart: dw-apb-uart@0x5000 {
+                       compatible = "snps,dw-apb-uart";
+                       reg = <0x5000 0x100>;
+                       clock-frequency = <33333000>;
+                       interrupt-parent = <&ictl_intc>;
+                       interrupts = <2 4>;
+                       baud = <115200>;
+                       reg-shift = <2>;
+                       reg-io-width = <4>;
+               };
+
+               arcpct0: pct {
+                       compatible = "snps,archs-pct";
+                       #interrupt-cells = <1>;
+                       interrupt-parent = <&cpu_intc>;
+                       interrupts = <20>;
+               };
+       };
+
+       /*
+        * This INTC is actually connected to DW APB GPIO
+        * which acts as a wire between MB INTC and CPU INTC.
+        * GPIO INTC is configured in platform init code
+        * and here we mimic direct connection from MB INTC to
+        * CPU INTC, thus we set "interrupts = <0 1>" instead of
+        * "interrupts = <12>"
+        *
+        * This intc actually resides on MB, but we move it here to
+        * avoid duplicating the MB dtsi file given that IRQ from
+        * this intc to cpu intc are different for axs101 and axs103
+        */
+       mb_intc: dw-apb-ictl@0xe0012000 {
+               #interrupt-cells = <1>;
+               compatible = "snps,dw-apb-ictl";
+               reg = < 0xe0012000 0x200 >;
+               interrupt-controller;
+               interrupt-parent = <&idu_intc>;
+               interrupts = <0 1>;     /* cmn irq 0 -> cpu irq 24
+                                          distribute to cpu0 only */
+       };
+
+       memory {
+               #address-cells = <1>;
+               #size-cells = <1>;
+               ranges = <0x00000000 0x80000000 0x40000000>;
+               device_type = "memory";
+               reg = <0x00000000 0x20000000>;  /* 512MiB */
+       };
+};
diff --git a/arch/arc/boot/dts/axs101.dts b/arch/arc/boot/dts/axs101.dts
new file mode 100644 (file)
index 0000000..3f9b058
--- /dev/null
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2013-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * ARC AXS101 S/W development platform
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+/include/ "axc001.dtsi"
+/include/ "axs10x_mb.dtsi"
+
+/ {
+       compatible = "snps,axs101", "snps,arc-sdp";
+
+       chosen {
+               bootargs = "earlycon=uart8250,mmio32,0xe0022000,115200n8 console=tty0 console=ttyS3,115200n8 consoleblank=0";
+       };
+};
diff --git a/arch/arc/boot/dts/axs103.dts b/arch/arc/boot/dts/axs103.dts
new file mode 100644 (file)
index 0000000..e6d0e31
--- /dev/null
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * Device Tree for AXS103 SDP with AXS10X Main Board and
+ * AXC003 FPGA Card (with UP bitfile)
+ */
+/dts-v1/;
+
+/include/ "axc003.dtsi"
+/include/ "axs10x_mb.dtsi"
+
+/ {
+       compatible = "snps,axs103", "snps,arc-sdp";
+
+       chosen {
+               bootargs = "earlycon=uart8250,mmio32,0xe0022000,115200n8 console=ttyS3,115200n8 debug print-fatal-signals=1";
+       };
+};
diff --git a/arch/arc/boot/dts/axs103_idu.dts b/arch/arc/boot/dts/axs103_idu.dts
new file mode 100644 (file)
index 0000000..f999fef
--- /dev/null
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * Device Tree for AXS103 SDP with AXS10X Main Board and
+ * AXC003 FPGA Card (with SMP bitfile)
+ */
+/dts-v1/;
+
+/include/ "axc003_idu.dtsi"
+/include/ "axs10x_mb.dtsi"
+
+/ {
+       compatible = "snps,axs103", "snps,arc-sdp";
+
+       chosen {
+               bootargs = "earlycon=uart8250,mmio32,0xe0022000,115200n8 console=ttyS3,115200n8 debug print-fatal-signals=1";
+       };
+};
diff --git a/arch/arc/boot/dts/axs10x_mb.dtsi b/arch/arc/boot/dts/axs10x_mb.dtsi
new file mode 100644 (file)
index 0000000..f3db321
--- /dev/null
@@ -0,0 +1,224 @@
+/*
+ * Support for peripherals on the AXS10x mainboard
+ *
+ * Copyright (C) 2013-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/ {
+       axs10x_mb {
+               compatible = "simple-bus";
+               #address-cells = <1>;
+               #size-cells = <1>;
+               ranges = <0x00000000 0xe0000000 0x10000000>;
+               interrupt-parent = <&mb_intc>;
+
+               clocks {
+                       i2cclk: i2cclk {
+                               compatible = "fixed-clock";
+                               clock-frequency = <50000000>;
+                               #clock-cells = <0>;
+                       };
+
+                       apbclk: apbclk {
+                               compatible = "fixed-clock";
+                               clock-frequency = <50000000>;
+                               #clock-cells = <0>;
+                       };
+
+                       mmcclk: mmcclk {
+                               compatible = "fixed-clock";
+                               clock-frequency = <50000000>;
+                               #clock-cells = <0>;
+                       };
+               };
+
+               ethernet@0x18000 {
+                       #interrupt-cells = <1>;
+                       compatible = "snps,dwmac";
+                       reg = < 0x18000 0x2000 >;
+                       interrupts = < 4 >;
+                       interrupt-names = "macirq";
+                       phy-mode = "rgmii";
+                       snps,pbl = < 32 >;
+                       clocks = <&apbclk>;
+                       clock-names = "stmmaceth";
+               };
+
+               ehci@0x40000 {
+                       compatible = "generic-ehci";
+                       reg = < 0x40000 0x100 >;
+                       interrupts = < 8 >;
+               };
+
+               ohci@0x60000 {
+                       compatible = "generic-ohci";
+                       reg = < 0x60000 0x100 >;
+                       interrupts = < 8 >;
+               };
+
+               /*
+                * According to DW Mobile Storage databook it is required
+                * to use  "Hold Register" if card is enumerated in SDR12 or
+                * SDR25 modes.
+                *
+                * Utilization of "Hold Register" is already implemented via
+                * dw_mci_pltfm_prepare_command() which in its turn gets
+                * used through dw_mci_drv_data->prepare_command call-back.
+                * This call-back is used in Altera Socfpga platform and so
+                * we may reuse it saying that we're compatible with their
+                * "altr,socfpga-dw-mshc".
+                *
+                * Most probably "Hold Register" utilization is platform-
+                * independent requirement which means that single unified
+                * "snps,dw-mshc" should be enough for all users of DW MMC once
+                * dw_mci_pltfm_prepare_command() is used in generic platform
+                * code.
+                */
+               mmc@0x15000 {
+                       compatible = "altr,socfpga-dw-mshc";
+                       reg = < 0x15000 0x400 >;
+                       num-slots = < 1 >;
+                       fifo-depth = < 16 >;
+                       card-detect-delay = < 200 >;
+                       clocks = <&apbclk>, <&mmcclk>;
+                       clock-names = "biu", "ciu";
+                       interrupts = < 7 >;
+                       bus-width = < 4 >;
+               };
+
+               uart@0x20000 {
+                       compatible = "snps,dw-apb-uart";
+                       reg = <0x20000 0x100>;
+                       clock-frequency = <33333333>;
+                       interrupts = <17>;
+                       baud = <115200>;
+                       reg-shift = <2>;
+                       reg-io-width = <4>;
+               };
+
+               uart@0x21000 {
+                       compatible = "snps,dw-apb-uart";
+                       reg = <0x21000 0x100>;
+                       clock-frequency = <33333333>;
+                       interrupts = <18>;
+                       baud = <115200>;
+                       reg-shift = <2>;
+                       reg-io-width = <4>;
+               };
+
+               /* UART muxed with USB data port (ttyS3) */
+               uart@0x22000 {
+                       compatible = "snps,dw-apb-uart";
+                       reg = <0x22000 0x100>;
+                       clock-frequency = <33333333>;
+                       interrupts = <19>;
+                       baud = <115200>;
+                       reg-shift = <2>;
+                       reg-io-width = <4>;
+               };
+
+               i2c@0x1d000 {
+                       compatible = "snps,designware-i2c";
+                       reg = <0x1d000 0x100>;
+                       clock-frequency = <400000>;
+                       clocks = <&i2cclk>;
+                       interrupts = <14>;
+               };
+
+               i2c@0x1e000 {
+                       compatible = "snps,designware-i2c";
+                       reg = <0x1e000 0x100>;
+                       clock-frequency = <400000>;
+                       clocks = <&i2cclk>;
+                       interrupts = <15>;
+               };
+
+               i2c@0x1f000 {
+                       compatible = "snps,designware-i2c";
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       reg = <0x1f000 0x100>;
+                       clock-frequency = <400000>;
+                       clocks = <&i2cclk>;
+                       interrupts = <16>;
+
+                       eeprom@0x54{
+                               compatible = "24c01";
+                               reg = <0x54>;
+                               pagesize = <0x8>;
+                       };
+
+                       eeprom@0x57{
+                               compatible = "24c04";
+                               reg = <0x57>;
+                               pagesize = <0x8>;
+                       };
+               };
+
+               gpio0:gpio@13000 {
+                       compatible = "snps,dw-apb-gpio";
+                       reg = <0x13000 0x1000>;
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+
+                       gpio0_banka: gpio-controller@0 {
+                               compatible = "snps,dw-apb-gpio-port";
+                               gpio-controller;
+                               #gpio-cells = <2>;
+                               snps,nr-gpios = <32>;
+                               reg = <0>;
+                       };
+
+                       gpio0_bankb: gpio-controller@1 {
+                               compatible = "snps,dw-apb-gpio-port";
+                               gpio-controller;
+                               #gpio-cells = <2>;
+                               snps,nr-gpios = <8>;
+                               reg = <1>;
+                       };
+
+                       gpio0_bankc: gpio-controller@2 {
+                               compatible = "snps,dw-apb-gpio-port";
+                               gpio-controller;
+                               #gpio-cells = <2>;
+                               snps,nr-gpios = <8>;
+                               reg = <2>;
+                       };
+               };
+
+               gpio1:gpio@14000 {
+                       compatible = "snps,dw-apb-gpio";
+                       reg = <0x14000 0x1000>;
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+
+                       gpio1_banka: gpio-controller@0 {
+                               compatible = "snps,dw-apb-gpio-port";
+                               gpio-controller;
+                               #gpio-cells = <2>;
+                               snps,nr-gpios = <30>;
+                               reg = <0>;
+                       };
+
+                       gpio1_bankb: gpio-controller@1 {
+                               compatible = "snps,dw-apb-gpio-port";
+                               gpio-controller;
+                               #gpio-cells = <2>;
+                               snps,nr-gpios = <10>;
+                               reg = <1>;
+                       };
+
+                       gpio1_bankc: gpio-controller@2 {
+                               compatible = "snps,dw-apb-gpio-port";
+                               gpio-controller;
+                               #gpio-cells = <2>;
+                               snps,nr-gpios = <8>;
+                               reg = <2>;
+                       };
+               };
+       };
+};
diff --git a/arch/arc/boot/dts/nsim_700.dts b/arch/arc/boot/dts/nsim_700.dts
new file mode 100644 (file)
index 0000000..105a001
--- /dev/null
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+/include/ "skeleton.dtsi"
+
+/ {
+       compatible = "snps,nsim";
+       clock-frequency = <80000000>;   /* 80 MHZ */
+       #address-cells = <1>;
+       #size-cells = <1>;
+       interrupt-parent = <&intc>;
+
+       chosen {
+               bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8";
+       };
+
+       aliases {
+               serial0 = &arcuart0;
+       };
+
+       fpga {
+               compatible = "simple-bus";
+               #address-cells = <1>;
+               #size-cells = <1>;
+
+               /* child and parent address space 1:1 mapped */
+               ranges;
+
+               intc: interrupt-controller {
+                       compatible = "snps,arc700-intc";
+                       interrupt-controller;
+                       #interrupt-cells = <1>;
+               };
+
+               arcuart0: serial@c0fc1000 {
+                       compatible = "snps,arc-uart";
+                       reg = <0xc0fc1000 0x100>;
+                       interrupts = <5>;
+                       clock-frequency = <80000000>;
+                       current-speed = <115200>;
+                       status = "okay";
+               };
+
+               ethernet@c0fc2000 {
+                       compatible = "snps,arc-emac";
+                       reg = <0xc0fc2000 0x3c>;
+                       interrupts = <6>;
+                       mac-address = [ 00 11 22 33 44 55 ];
+                       clock-frequency = <80000000>;
+                       max-speed = <100>;
+                       phy = <&phy0>;
+
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       phy0: ethernet-phy@0 {
+                               reg = <1>;
+                       };
+               };
+
+               arcpmu0: pmu {
+                       compatible = "snps,arc700-pct";
+               };
+       };
+};
diff --git a/arch/arc/boot/dts/nsim_hs.dts b/arch/arc/boot/dts/nsim_hs.dts
new file mode 100644 (file)
index 0000000..911f069
--- /dev/null
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+/include/ "skeleton.dtsi"
+
+/ {
+       compatible = "snps,nsim_hs";
+       interrupt-parent = <&core_intc>;
+
+       chosen {
+               bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8";
+       };
+
+       aliases {
+               serial0 = &arcuart0;
+       };
+
+       fpga {
+               compatible = "simple-bus";
+               #address-cells = <1>;
+               #size-cells = <1>;
+
+               /* child and parent address space 1:1 mapped */
+               ranges;
+
+               core_intc: core-interrupt-controller {
+                       compatible = "snps,archs-intc";
+                       interrupt-controller;
+                       #interrupt-cells = <1>;
+               };
+
+               arcuart0: serial@c0fc1000 {
+                       compatible = "snps,arc-uart";
+                       reg = <0xc0fc1000 0x100>;
+                       interrupts = <24>;
+                       clock-frequency = <80000000>;
+                       current-speed = <115200>;
+                       status = "okay";
+               };
+
+               arcpct0: pct {
+                       compatible = "snps,archs-pct";
+                       #interrupt-cells = <1>;
+                       interrupts = <20>;
+               };
+       };
+};
diff --git a/arch/arc/boot/dts/nsim_hs_idu.dts b/arch/arc/boot/dts/nsim_hs_idu.dts
new file mode 100644 (file)
index 0000000..46ab319
--- /dev/null
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+/include/ "skeleton.dtsi"
+
+/ {
+       compatible = "snps,nsim_hs";
+       interrupt-parent = <&core_intc>;
+
+       chosen {
+               bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8";
+       };
+
+       aliases {
+               serial0 = &arcuart0;
+       };
+
+       fpga {
+               compatible = "simple-bus";
+               #address-cells = <1>;
+               #size-cells = <1>;
+
+               /* child and parent address space 1:1 mapped */
+               ranges;
+
+               core_intc: core-interrupt-controller {
+                       compatible = "snps,archs-intc";
+                       interrupt-controller;
+                       #interrupt-cells = <1>;
+               };
+
+               idu_intc: idu-interrupt-controller {
+                       compatible = "snps,archs-idu-intc";
+                       interrupt-controller;
+                       interrupt-parent = <&core_intc>;
+
+                       /*
+                        * <hwirq  distribution>
+                        * distribution: 0=RR; 1=cpu0, 2=cpu1, 4=cpu2, 8=cpu3
+                        */
+                       #interrupt-cells = <2>;
+
+                       /*
+                        * upstream irqs to core intc - downstream these are
+                        * "COMMON" irq 0,1..
+                        */
+                       interrupts = <24 25 26 27 28 29 30 31>;
+               };
+
+               arcuart0: serial@c0fc1000 {
+                       compatible = "snps,arc-uart";
+                       reg = <0xc0fc1000 0x100>;
+                       interrupt-parent = <&idu_intc>;
+                       interrupts = <0 0>;
+                       clock-frequency = <80000000>;
+                       current-speed = <115200>;
+                       status = "okay";
+               };
+
+               arcpct0: pct {
+                       compatible = "snps,archs-pct";
+                       #interrupt-cells = <1>;
+                       interrupts = <20>;
+               };
+       };
+};
diff --git a/arch/arc/boot/dts/nsimosci_hs.dts b/arch/arc/boot/dts/nsimosci_hs.dts
new file mode 100644 (file)
index 0000000..d64a96f
--- /dev/null
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+/include/ "skeleton.dtsi"
+
+/ {
+       compatible = "snps,nsimosci_hs";
+       clock-frequency = <20000000>;   /* 20 MHZ */
+       #address-cells = <1>;
+       #size-cells = <1>;
+       interrupt-parent = <&core_intc>;
+
+       chosen {
+               /* this is for console on PGU */
+               /* bootargs = "console=tty0 consoleblank=0"; */
+               /* this is for console on serial */
+               bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblank=0 debug";
+       };
+
+       aliases {
+               serial0 = &uart0;
+       };
+
+       fpga {
+               compatible = "simple-bus";
+               #address-cells = <1>;
+               #size-cells = <1>;
+
+               /* child and parent address space 1:1 mapped */
+               ranges;
+
+               core_intc: core-interrupt-controller {
+                       compatible = "snps,archs-intc";
+                       interrupt-controller;
+                       #interrupt-cells = <1>;
+               };
+
+               uart0: serial@f0000000 {
+                       compatible = "ns8250";
+                       reg = <0xf0000000 0x2000>;
+                       interrupts = <24>;
+                       clock-frequency = <3686400>;
+                       baud = <115200>;
+                       reg-shift = <2>;
+                       reg-io-width = <4>;
+                       no-loopback-test = <1>;
+               };
+
+               pgu0: pgu@f9000000 {
+                       compatible = "snps,arcpgufb";
+                       reg = <0xf9000000 0x400>;
+               };
+
+               ps2: ps2@f9001000 {
+                       compatible = "snps,arc_ps2";
+                       reg = <0xf9000400 0x14>;
+                       interrupts = <27>;
+                       interrupt-names = "arc_ps2_irq";
+               };
+
+               eth0: ethernet@f0003000 {
+                       compatible = "snps,oscilan";
+                       reg = <0xf0003000 0x44>;
+                       interrupts = <25>, <26>;
+                       interrupt-names = "rx", "tx";
+               };
+
+               arcpct0: pct {
+                       compatible = "snps,archs-pct";
+                       #interrupt-cells = <1>;
+                       interrupts = <20>;
+               };
+       };
+};
diff --git a/arch/arc/boot/dts/nsimosci_hs_idu.dts b/arch/arc/boot/dts/nsimosci_hs_idu.dts
new file mode 100644 (file)
index 0000000..f6bf0ca
--- /dev/null
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+/include/ "skeleton.dtsi"
+
+/ {
+       compatible = "snps,nsimosci_hs";
+       clock-frequency = <5000000>;    /* 5 MHZ */
+       #address-cells = <1>;
+       #size-cells = <1>;
+       interrupt-parent = <&core_intc>;
+
+       chosen {
+               /* this is for console on serial */
+               bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblan=0 debug";
+       };
+
+       aliases {
+               serial0 = &uart0;
+       };
+
+       fpga {
+               compatible = "simple-bus";
+               #address-cells = <1>;
+               #size-cells = <1>;
+
+               /* child and parent address space 1:1 mapped */
+               ranges;
+
+               core_intc: core-interrupt-controller {
+                       compatible = "snps,archs-intc";
+                       interrupt-controller;
+                       #interrupt-cells = <1>;
+/*                     interrupts = <16 17 18 19 20 21 22 23 24 25>; */
+               };
+
+               idu_intc: idu-interrupt-controller {
+                       compatible = "snps,archs-idu-intc";
+                       interrupt-controller;
+                       interrupt-parent = <&core_intc>;
+
+                       /*
+                        * <hwirq  distribution>
+                        * distribution: 0=RR; 1=cpu0, 2=cpu1, 4=cpu2, 8=cpu3
+                        */
+                       #interrupt-cells = <2>;
+
+                       /*
+                        * upstream irqs to core intc - downstream these are
+                        * "COMMON" irq 0,1..
+                        */
+                       interrupts = <24 25 26 27 28 29 30 31>;
+               };
+
+               uart0: serial@f0000000 {
+                       compatible = "ns8250";
+                       reg = <0xf0000000 0x2000>;
+                       interrupt-parent = <&idu_intc>;
+                       interrupts = <0 0>; /* cmn irq 0 -> cpu irq 24
+                                               RR distribute to all cpus */
+                       clock-frequency = <3686400>;
+                       baud = <115200>;
+                       reg-shift = <2>;
+                       reg-io-width = <4>;
+                       no-loopback-test = <1>;
+               };
+
+               pgu0: pgu@f9000000 {
+                       compatible = "snps,arcpgufb";
+                       reg = <0xf9000000 0x400>;
+               };
+
+               ps2: ps2@f9001000 {
+                       compatible = "snps,arc_ps2";
+                       reg = <0xf9000400 0x14>;
+                       interrupts = <3 0>;
+                       interrupt-parent = <&idu_intc>;
+                       interrupt-names = "arc_ps2_irq";
+               };
+
+               eth0: ethernet@f0003000 {
+                       compatible = "snps,oscilan";
+                       reg = <0xf0003000 0x44>;
+                       interrupt-parent = <&idu_intc>;
+                       interrupts = <1 2>, <2 2>;
+                       interrupt-names = "rx", "tx";
+               };
+
+               arcpct0: pct {
+                       compatible = "snps,archs-pct";
+                       #interrupt-cells = <1>;
+                       interrupts = <20>;
+               };
+       };
+};
diff --git a/arch/arc/boot/dts/vdk_axc003.dtsi b/arch/arc/boot/dts/vdk_axc003.dtsi
new file mode 100644 (file)
index 0000000..9393fd9
--- /dev/null
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2013, 2014 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * Device tree for AXC003 CPU card: HS38x UP configuration (VDK version)
+ */
+
+/ {
+       compatible = "snps,arc";
+       clock-frequency = <50000000>;
+       #address-cells = <1>;
+       #size-cells = <1>;
+
+       cpu_card {
+               compatible = "simple-bus";
+               #address-cells = <1>;
+               #size-cells = <1>;
+
+               ranges = <0x00000000 0xf0000000 0x10000000>;
+
+               cpu_intc: archs-intc@cpu {
+                       compatible = "snps,archs-intc";
+                       interrupt-controller;
+                       #interrupt-cells = <1>;
+               };
+
+               debug_uart: dw-apb-uart@0x5000 {
+                       compatible = "snps,dw-apb-uart";
+                       reg = <0x5000 0x100>;
+                       clock-frequency = <2403200>;
+                       interrupt-parent = <&cpu_intc>;
+                       interrupts = <19>;
+                       baud = <115200>;
+                       reg-shift = <2>;
+                       reg-io-width = <4>;
+               };
+
+       };
+
+       mb_intc: dw-apb-ictl@0xe0012000 {
+               #interrupt-cells = <1>;
+               compatible = "snps,dw-apb-ictl";
+               reg = < 0xe0012000 0x200 >;
+               interrupt-controller;
+               interrupt-parent = <&cpu_intc>;
+               interrupts = < 18 >;
+       };
+
+       memory {
+               #address-cells = <1>;
+               #size-cells = <1>;
+               ranges = <0x00000000 0x80000000 0x40000000>;
+               device_type = "memory";
+               reg = <0x00000000 0x20000000>;  /* 512MiB */
+       };
+};
diff --git a/arch/arc/boot/dts/vdk_axc003_idu.dtsi b/arch/arc/boot/dts/vdk_axc003_idu.dtsi
new file mode 100644 (file)
index 0000000..9bee8ed
--- /dev/null
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2014, 2015 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * Device tree for AXC003 CPU card:
+ * HS38x2 (Dual Core) with IDU intc (VDK version)
+ */
+
+/ {
+       compatible = "snps,arc";
+       clock-frequency = <50000000>;
+       #address-cells = <1>;
+       #size-cells = <1>;
+
+       cpu_card {
+               compatible = "simple-bus";
+               #address-cells = <1>;
+               #size-cells = <1>;
+
+               ranges = <0x00000000 0xf0000000 0x10000000>;
+
+               cpu_intc: archs-intc@cpu {
+                       compatible = "snps,archs-intc";
+                       interrupt-controller;
+                       #interrupt-cells = <1>;
+               };
+
+               idu_intc: idu-interrupt-controller {
+                       compatible = "snps,archs-idu-intc";
+                       interrupt-controller;
+                       interrupt-parent = <&cpu_intc>;
+
+                       /*
+                        * <hwirq  distribution>
+                        * distribution: 0=RR; 1=cpu0, 2=cpu1, 4=cpu2, 8=cpu3
+                        */
+                       #interrupt-cells = <2>;
+
+                       interrupts = <24 25 26 27>;
+               };
+
+               debug_uart: dw-apb-uart@0x5000 {
+                       compatible = "snps,dw-apb-uart";
+                       reg = <0x5000 0x100>;
+                       clock-frequency = <2403200>;
+                       interrupt-parent = <&idu_intc>;
+                       interrupts = <2 0>;
+                       baud = <115200>;
+                       reg-shift = <2>;
+                       reg-io-width = <4>;
+               };
+
+       };
+
+       mb_intc: dw-apb-ictl@0xe0012000 {
+               #interrupt-cells = <1>;
+               compatible = "snps,dw-apb-ictl";
+               reg = < 0xe0012000 0x200 >;
+               interrupt-controller;
+               interrupt-parent = <&idu_intc>;
+               interrupts = < 0 0 >;
+       };
+
+       memory {
+               #address-cells = <1>;
+               #size-cells = <1>;
+               ranges = <0x00000000 0x80000000 0x40000000>;
+               device_type = "memory";
+               reg = <0x00000000 0x20000000>;  /* 512MiB */
+       };
+};
diff --git a/arch/arc/boot/dts/vdk_axs10x_mb.dtsi b/arch/arc/boot/dts/vdk_axs10x_mb.dtsi
new file mode 100644 (file)
index 0000000..45cd665
--- /dev/null
@@ -0,0 +1,93 @@
+/*
+ * Support for peripherals on the AXS10x mainboard (VDK version)
+ *
+ * Copyright (C) 2013-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/ {
+       axs10x_mb_vdk {
+               compatible = "simple-bus";
+               #address-cells = <1>;
+               #size-cells = <1>;
+               ranges = <0x00000000 0xe0000000 0x10000000>;
+               interrupt-parent = <&mb_intc>;
+
+               clocks {
+                       apbclk: apbclk {
+                               compatible = "fixed-clock";
+                               clock-frequency = <50000000>;
+                               #clock-cells = <0>;
+                       };
+
+               };
+
+               ethernet@0x18000 {
+                       #interrupt-cells = <1>;
+                       compatible = "snps,dwmac";
+                       reg = < 0x18000 0x2000 >;
+                       interrupts = < 4 >;
+                       interrupt-names = "macirq";
+                       phy-mode = "rgmii";
+                       snps,phy-addr = < 0 >;  // VDK model phy address is 0
+                       snps,pbl = < 32 >;
+                       clocks = <&apbclk>;
+                       clock-names = "stmmaceth";
+               };
+
+               ehci@0x40000 {
+                       compatible = "generic-ehci";
+                       reg = < 0x40000 0x100 >;
+                       interrupts = < 8 >;
+               };
+
+               uart@0x20000 {
+                       compatible = "snps,dw-apb-uart";
+                       reg = <0x20000 0x100>;
+                       clock-frequency = <2403200>;
+                       interrupts = <17>;
+                       baud = <115200>;
+                       reg-shift = <2>;
+                       reg-io-width = <4>;
+               };
+
+               uart@0x21000 {
+                       compatible = "snps,dw-apb-uart";
+                       reg = <0x21000 0x100>;
+                       clock-frequency = <2403200>;
+                       interrupts = <18>;
+                       baud = <115200>;
+                       reg-shift = <2>;
+                       reg-io-width = <4>;
+               };
+
+               uart@0x22000 {
+                       compatible = "snps,dw-apb-uart";
+                       reg = <0x22000 0x100>;
+                       clock-frequency = <2403200>;
+                       interrupts = <19>;
+                       baud = <115200>;
+                       reg-shift = <2>;
+                       reg-io-width = <4>;
+               };
+
+/* PGU output directly sent to virtual LCD screen; hdmi controller not modelled */
+               pgu@0x17000 {
+                       compatible = "snps,arcpgufb";
+                       reg = <0x17000 0x400>;
+                       clock-frequency = <51000000>; /* PGU'clock is initated in init function */
+                       /* interrupts = <5>;   PGU interrupts not used, this vector is used for ps2 below */
+               };
+
+/* VDK has additional ps2 keyboard/mouse interface integrated in LCD screen model */
+               ps2: ps2@e0017400 {
+                       compatible = "snps,arc_ps2";
+                       reg = <0x17400 0x14>;
+                       interrupts = <5>;
+                       interrupt-names = "arc_ps2_irq";
+               };
+       };
+};
diff --git a/arch/arc/boot/dts/vdk_hs38.dts b/arch/arc/boot/dts/vdk_hs38.dts
new file mode 100644 (file)
index 0000000..5d803dd
--- /dev/null
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com)
+ *
+ * ARC HS38 Virtual Development Kit (VDK)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+/include/ "vdk_axc003.dtsi"
+/include/ "vdk_axs10x_mb.dtsi"
+
+/ {
+       compatible = "snps,axs103";
+
+       chosen {
+               bootargs = "earlycon=uart8250,mmio32,0xe0022000,115200n8 console=tty0 console=ttyS3,115200n8 consoleblank=0";
+       };
+};
diff --git a/arch/arc/boot/dts/vdk_hs38_smp.dts b/arch/arc/boot/dts/vdk_hs38_smp.dts
new file mode 100644 (file)
index 0000000..031a5bc
--- /dev/null
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com)
+ *
+ * ARC HS38 Virtual Development Kit, SMP version (VDK)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+/include/ "vdk_axc003_idu.dtsi"
+/include/ "vdk_axs10x_mb.dtsi"
+
+/ {
+       compatible = "snps,axs103";
+
+       chosen {
+               bootargs = "earlycon=uart8250,mmio32,0xe0022000,115200n8 console=tty0 console=ttyS3,115200n8 consoleblank=0";
+       };
+};
diff --git a/arch/arc/configs/axs101_defconfig b/arch/arc/configs/axs101_defconfig
new file mode 100644 (file)
index 0000000..562dac6
--- /dev/null
@@ -0,0 +1,111 @@
+CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+CONFIG_DEFAULT_HOSTNAME="ARCLinux"
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_NO_HZ_IDLE=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_NAMESPACES=y
+# CONFIG_UTS_NS is not set
+# CONFIG_PID_NS is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE="../arc_initramfs/"
+CONFIG_EMBEDDED=y
+CONFIG_PERF_EVENTS=y
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_SLUB_DEBUG is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_MODULES=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_ARC_PLAT_AXS10X=y
+CONFIG_AXS101=y
+CONFIG_ARC_CACHE_LINE_SHIFT=5
+CONFIG_ARC_BUILTIN_DTB_NAME="axs101"
+CONFIG_PREEMPT=y
+# CONFIG_COMPACTION is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_IPV6 is not set
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FIRMWARE_IN_KERNEL is not set
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_NETDEVICES=y
+# CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+# CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_SEEQ is not set
+CONFIG_STMMAC_ETH=y
+# CONFIG_NET_VENDOR_VIA is not set
+# CONFIG_NET_VENDOR_WIZNET is not set
+CONFIG_NATIONAL_PHY=y
+# CONFIG_USB_NET_DRIVERS is not set
+CONFIG_INPUT_EVDEV=y
+CONFIG_MOUSE_PS2_TOUCHKIT=y
+CONFIG_MOUSE_SERIAL=y
+CONFIG_MOUSE_SYNAPTICS_USB=y
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_DEVKMEM is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_DW=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_DESIGNWARE_PLATFORM=y
+# CONFIG_HWMON is not set
+CONFIG_FB=y
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
+CONFIG_LOGO=y
+# CONFIG_LOGO_LINUX_MONO is not set
+# CONFIG_LOGO_LINUX_VGA16 is not set
+# CONFIG_LOGO_LINUX_CLUT224 is not set
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_EHCI_HCD_PLATFORM=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_HCD_PLATFORM=y
+CONFIG_USB_STORAGE=y
+CONFIG_MMC=y
+CONFIG_MMC_SDHCI=y
+CONFIG_MMC_SDHCI_PLTFM=y
+CONFIG_MMC_DW=y
+CONFIG_MMC_DW_IDMAC=y
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_NTFS_FS=y
+CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_NFS_FS=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_STRIP_ASM_SYMS=y
+CONFIG_LOCKUP_DETECTOR=y
+CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=10
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_DEBUG_PREEMPT is not set
+# CONFIG_FTRACE is not set
diff --git a/arch/arc/configs/axs103_defconfig b/arch/arc/configs/axs103_defconfig
new file mode 100644 (file)
index 0000000..83a6d8d
--- /dev/null
@@ -0,0 +1,117 @@
+CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+CONFIG_DEFAULT_HOSTNAME="ARCLinux"
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_NO_HZ_IDLE=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_NAMESPACES=y
+# CONFIG_UTS_NS is not set
+# CONFIG_PID_NS is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE="../../arc_initramfs_hs/"
+CONFIG_EMBEDDED=y
+CONFIG_PERF_EVENTS=y
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_SLUB_DEBUG is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_MODULES=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_ARC_PLAT_AXS10X=y
+CONFIG_AXS103=y
+CONFIG_ISA_ARCV2=y
+CONFIG_ARC_BUILTIN_DTB_NAME="axs103"
+CONFIG_PREEMPT=y
+# CONFIG_COMPACTION is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_IPV6 is not set
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FIRMWARE_IN_KERNEL is not set
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_NAND=y
+CONFIG_MTD_NAND_AXS=y
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_NETDEVICES=y
+# CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+# CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_SEEQ is not set
+CONFIG_STMMAC_ETH=y
+# CONFIG_NET_VENDOR_VIA is not set
+# CONFIG_NET_VENDOR_WIZNET is not set
+CONFIG_NATIONAL_PHY=y
+# CONFIG_USB_NET_DRIVERS is not set
+CONFIG_INPUT_EVDEV=y
+CONFIG_MOUSE_PS2_TOUCHKIT=y
+CONFIG_MOUSE_SERIAL=y
+CONFIG_MOUSE_SYNAPTICS_USB=y
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_DEVKMEM is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_DW=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_DESIGNWARE_PLATFORM=y
+# CONFIG_HWMON is not set
+CONFIG_FB=y
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
+CONFIG_LOGO=y
+# CONFIG_LOGO_LINUX_MONO is not set
+# CONFIG_LOGO_LINUX_VGA16 is not set
+# CONFIG_LOGO_LINUX_CLUT224 is not set
+CONFIG_USB=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_EHCI_HCD_PLATFORM=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_HCD_PLATFORM=y
+CONFIG_USB_STORAGE=y
+CONFIG_MMC=y
+CONFIG_MMC_SDHCI=y
+CONFIG_MMC_SDHCI_PLTFM=y
+CONFIG_MMC_DW=y
+CONFIG_MMC_DW_IDMAC=y
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_NTFS_FS=y
+CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_NFS_FS=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_STRIP_ASM_SYMS=y
+CONFIG_LOCKUP_DETECTOR=y
+CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=10
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_DEBUG_PREEMPT is not set
+# CONFIG_FTRACE is not set
diff --git a/arch/arc/configs/axs103_smp_defconfig b/arch/arc/configs/axs103_smp_defconfig
new file mode 100644 (file)
index 0000000..f1e1c84
--- /dev/null
@@ -0,0 +1,118 @@
+CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+CONFIG_DEFAULT_HOSTNAME="ARCLinux"
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_NO_HZ_IDLE=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_NAMESPACES=y
+# CONFIG_UTS_NS is not set
+# CONFIG_PID_NS is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE="../../arc_initramfs_hs/"
+CONFIG_EMBEDDED=y
+CONFIG_PERF_EVENTS=y
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_SLAB=y
+CONFIG_MODULES=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_ARC_PLAT_AXS10X=y
+CONFIG_AXS103=y
+CONFIG_ISA_ARCV2=y
+CONFIG_SMP=y
+CONFIG_ARC_BUILTIN_DTB_NAME="axs103_idu"
+CONFIG_PREEMPT=y
+# CONFIG_COMPACTION is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_IPV6 is not set
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FIRMWARE_IN_KERNEL is not set
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_NAND=y
+CONFIG_MTD_NAND_AXS=y
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_NETDEVICES=y
+# CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+# CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_SEEQ is not set
+CONFIG_STMMAC_ETH=y
+# CONFIG_NET_VENDOR_VIA is not set
+# CONFIG_NET_VENDOR_WIZNET is not set
+CONFIG_NATIONAL_PHY=y
+# CONFIG_USB_NET_DRIVERS is not set
+CONFIG_INPUT_EVDEV=y
+CONFIG_MOUSE_PS2_TOUCHKIT=y
+CONFIG_MOUSE_SERIAL=y
+CONFIG_MOUSE_SYNAPTICS_USB=y
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_DEVKMEM is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_DW=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_DESIGNWARE_PLATFORM=y
+# CONFIG_HWMON is not set
+CONFIG_FB=y
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
+CONFIG_LOGO=y
+# CONFIG_LOGO_LINUX_MONO is not set
+# CONFIG_LOGO_LINUX_VGA16 is not set
+# CONFIG_LOGO_LINUX_CLUT224 is not set
+CONFIG_USB=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_EHCI_HCD_PLATFORM=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_HCD_PLATFORM=y
+CONFIG_USB_STORAGE=y
+CONFIG_MMC=y
+CONFIG_MMC_SDHCI=y
+CONFIG_MMC_SDHCI_PLTFM=y
+CONFIG_MMC_DW=y
+CONFIG_MMC_DW_IDMAC=y
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_NTFS_FS=y
+CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_NFS_FS=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_STRIP_ASM_SYMS=y
+CONFIG_LOCKUP_DETECTOR=y
+CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=10
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_DEBUG_PREEMPT is not set
+# CONFIG_FTRACE is not set
index ef4d3bc7b6c05fdaa414c3c0312bfb829ab769e9..138f9d8879570a8b329415d9238c710fa6e5b032 100644 (file)
@@ -1,4 +1,4 @@
-CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+CONFIG_CROSS_COMPILE="arc-linux-"
 # CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_DEFAULT_HOSTNAME="ARCLinux"
 # CONFIG_SWAP is not set
@@ -22,9 +22,8 @@ CONFIG_MODULES=y
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_IOSCHED_DEADLINE is not set
 # CONFIG_IOSCHED_CFQ is not set
-CONFIG_ARC_PLAT_FPGA_LEGACY=y
-# CONFIG_ARC_HAS_RTSC is not set
-CONFIG_ARC_BUILTIN_DTB_NAME="angel4"
+CONFIG_ARC_PLAT_SIM=y
+CONFIG_ARC_BUILTIN_DTB_NAME="nsim_700"
 CONFIG_PREEMPT=y
 # CONFIG_COMPACTION is not set
 # CONFIG_CROSS_MEMORY_ATTACH is not set
diff --git a/arch/arc/configs/nsim_hs_defconfig b/arch/arc/configs/nsim_hs_defconfig
new file mode 100644 (file)
index 0000000..f761a7c
--- /dev/null
@@ -0,0 +1,64 @@
+CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_DEFAULT_HOSTNAME="ARCLinux"
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_NAMESPACES=y
+# CONFIG_UTS_NS is not set
+# CONFIG_PID_NS is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE="../arc_initramfs_hs/"
+CONFIG_KALLSYMS_ALL=y
+CONFIG_EMBEDDED=y
+# CONFIG_SLUB_DEBUG is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_KPROBES=y
+CONFIG_MODULES=y
+# CONFIG_LBDAF is not set
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_IOSCHED_CFQ is not set
+CONFIG_ARC_PLAT_SIM=y
+CONFIG_ISA_ARCV2=y
+CONFIG_ARC_BUILTIN_DTB_NAME="nsim_hs"
+CONFIG_PREEMPT=y
+# CONFIG_COMPACTION is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_UNIX_DIAG=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+# CONFIG_IPV6 is not set
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FIRMWARE_IN_KERNEL is not set
+# CONFIG_BLK_DEV is not set
+# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_DEVKMEM is not set
+CONFIG_SERIAL_ARC=y
+CONFIG_SERIAL_ARC_CONSOLE=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+# CONFIG_VGA_CONSOLE is not set
+# CONFIG_HID is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_TMPFS=y
+# CONFIG_MISC_FILESYSTEMS is not set
+CONFIG_NFS_FS=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
+# CONFIG_DEBUG_PREEMPT is not set
+CONFIG_XZ_DEC=y
diff --git a/arch/arc/configs/nsim_hs_smp_defconfig b/arch/arc/configs/nsim_hs_smp_defconfig
new file mode 100644 (file)
index 0000000..dc6f74f
--- /dev/null
@@ -0,0 +1,63 @@
+CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_DEFAULT_HOSTNAME="ARCLinux"
+# CONFIG_SWAP is not set
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_NAMESPACES=y
+# CONFIG_UTS_NS is not set
+# CONFIG_PID_NS is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE="../arc_initramfs_hs/"
+CONFIG_KALLSYMS_ALL=y
+CONFIG_EMBEDDED=y
+# CONFIG_SLUB_DEBUG is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_KPROBES=y
+CONFIG_MODULES=y
+# CONFIG_LBDAF is not set
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_IOSCHED_CFQ is not set
+CONFIG_ARC_PLAT_SIM=y
+CONFIG_ARC_BOARD_ML509=y
+CONFIG_ISA_ARCV2=y
+CONFIG_SMP=y
+CONFIG_ARC_BUILTIN_DTB_NAME="nsim_hs_idu"
+CONFIG_PREEMPT=y
+# CONFIG_COMPACTION is not set
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_UNIX_DIAG=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+# CONFIG_IPV6 is not set
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FIRMWARE_IN_KERNEL is not set
+# CONFIG_BLK_DEV is not set
+# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_DEVKMEM is not set
+CONFIG_SERIAL_ARC=y
+CONFIG_SERIAL_ARC_CONSOLE=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+# CONFIG_VGA_CONSOLE is not set
+# CONFIG_HID is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_TMPFS=y
+# CONFIG_MISC_FILESYSTEMS is not set
+CONFIG_NFS_FS=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_XZ_DEC=y
index d2ac4e56ba1dd6955c43044aa0ec8da7e062c785..31e1d95764ff91dc10fe80d936a5613e6f713cc4 100644 (file)
@@ -1,4 +1,4 @@
-CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+CONFIG_CROSS_COMPILE="arc-linux-"
 # CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_DEFAULT_HOSTNAME="ARCLinux"
 # CONFIG_SWAP is not set
@@ -23,8 +23,7 @@ CONFIG_MODULES=y
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_IOSCHED_DEADLINE is not set
 # CONFIG_IOSCHED_CFQ is not set
-CONFIG_ARC_PLAT_FPGA_LEGACY=y
-# CONFIG_ARC_HAS_RTSC is not set
+CONFIG_ARC_PLAT_SIM=y
 CONFIG_ARC_BUILTIN_DTB_NAME="nsimosci"
 # CONFIG_COMPACTION is not set
 CONFIG_NET=y
diff --git a/arch/arc/configs/nsimosci_hs_defconfig b/arch/arc/configs/nsimosci_hs_defconfig
new file mode 100644 (file)
index 0000000..3fef0a2
--- /dev/null
@@ -0,0 +1,73 @@
+CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_DEFAULT_HOSTNAME="ARCLinux"
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_NAMESPACES=y
+# CONFIG_UTS_NS is not set
+# CONFIG_PID_NS is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE="../arc_initramfs_hs/"
+CONFIG_KALLSYMS_ALL=y
+CONFIG_EMBEDDED=y
+# CONFIG_SLUB_DEBUG is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_KPROBES=y
+CONFIG_MODULES=y
+# CONFIG_LBDAF is not set
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_IOSCHED_CFQ is not set
+CONFIG_ARC_PLAT_SIM=y
+CONFIG_ISA_ARCV2=y
+CONFIG_ARC_BUILTIN_DTB_NAME="nsimosci_hs"
+# CONFIG_COMPACTION is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_UNIX_DIAG=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+# CONFIG_IPV6 is not set
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FIRMWARE_IN_KERNEL is not set
+# CONFIG_BLK_DEV is not set
+CONFIG_NETDEVICES=y
+CONFIG_NET_OSCI_LAN=y
+CONFIG_INPUT_EVDEV=y
+# CONFIG_MOUSE_PS2_ALPS is not set
+# CONFIG_MOUSE_PS2_LOGIPS2PP is not set
+# CONFIG_MOUSE_PS2_SYNAPTICS is not set
+# CONFIG_MOUSE_PS2_TRACKPOINT is not set
+CONFIG_MOUSE_PS2_TOUCHKIT=y
+# CONFIG_SERIO_SERPORT is not set
+CONFIG_SERIO_ARC_PS2=y
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_DEVKMEM is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=1
+CONFIG_SERIAL_8250_RUNTIME_UARTS=1
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+CONFIG_FB=y
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_LOGO=y
+# CONFIG_HID is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_TMPFS=y
+# CONFIG_MISC_FILESYSTEMS is not set
+CONFIG_NFS_FS=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
diff --git a/arch/arc/configs/nsimosci_hs_smp_defconfig b/arch/arc/configs/nsimosci_hs_smp_defconfig
new file mode 100644 (file)
index 0000000..5178483
--- /dev/null
@@ -0,0 +1,93 @@
+CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+CONFIG_DEFAULT_HOSTNAME="ARCLinux"
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+# CONFIG_UTS_NS is not set
+# CONFIG_PID_NS is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE="../arc_initramfs_hs/"
+# CONFIG_COMPAT_BRK is not set
+CONFIG_KPROBES=y
+CONFIG_MODULES=y
+# CONFIG_LBDAF is not set
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_IOSCHED_CFQ is not set
+CONFIG_ARC_PLAT_SIM=y
+CONFIG_ARC_BOARD_ML509=y
+CONFIG_ISA_ARCV2=y
+CONFIG_SMP=y
+CONFIG_ARC_HAS_LL64=y
+# CONFIG_ARC_HAS_RTSC is not set
+CONFIG_ARC_BUILTIN_DTB_NAME="nsimosci_hs_idu"
+CONFIG_PREEMPT=y
+# CONFIG_COMPACTION is not set
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_PACKET_DIAG=y
+CONFIG_UNIX=y
+CONFIG_UNIX_DIAG=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_INET_LRO is not set
+# CONFIG_IPV6 is not set
+# CONFIG_WIRELESS is not set
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FIRMWARE_IN_KERNEL is not set
+# CONFIG_BLK_DEV is not set
+CONFIG_NETDEVICES=y
+# CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_CADENCE is not set
+# CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+# CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_SEEQ is not set
+# CONFIG_NET_VENDOR_STMICRO is not set
+# CONFIG_NET_VENDOR_VIA is not set
+# CONFIG_NET_VENDOR_WIZNET is not set
+CONFIG_NET_OSCI_LAN=y
+# CONFIG_WLAN is not set
+CONFIG_INPUT_EVDEV=y
+CONFIG_MOUSE_PS2_TOUCHKIT=y
+# CONFIG_SERIO_SERPORT is not set
+CONFIG_SERIO_LIBPS2=y
+CONFIG_SERIO_ARC_PS2=y
+CONFIG_VT_HW_CONSOLE_BINDING=y
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_DEVKMEM is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=1
+CONFIG_SERIAL_8250_RUNTIME_UARTS=1
+CONFIG_SERIAL_8250_DW=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+CONFIG_FB=y
+CONFIG_ARCPGU_RGB888=y
+CONFIG_ARCPGU_DISPTYPE=0
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_LOGO=y
+# CONFIG_HID is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_TMPFS=y
+# CONFIG_MISC_FILESYSTEMS is not set
+CONFIG_NFS_FS=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_FTRACE=y
index 6be6492442d640d3aba09b42239650995aa6e5be..3b4dc9cebcf15234f3d42a4efd2e40b9c5bd4150 100644 (file)
@@ -1,4 +1,4 @@
-CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+CONFIG_CROSS_COMPILE="arc-linux-"
 # CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_DEFAULT_HOSTNAME="tb10x"
 CONFIG_SYSVIPC=y
@@ -26,7 +26,6 @@ CONFIG_MODULE_UNLOAD=y
 # CONFIG_BLOCK is not set
 CONFIG_ARC_PLAT_TB10X=y
 CONFIG_ARC_CACHE_LINE_SHIFT=5
-# CONFIG_ARC_HAS_RTSC is not set
 CONFIG_ARC_STACK_NONEXEC=y
 CONFIG_HZ=250
 CONFIG_ARC_BUILTIN_DTB_NAME="abilis_tb100_dvk"
diff --git a/arch/arc/configs/vdk_hs38_defconfig b/arch/arc/configs/vdk_hs38_defconfig
new file mode 100644 (file)
index 0000000..ef35ef3
--- /dev/null
@@ -0,0 +1,102 @@
+CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_DEFAULT_HOSTNAME="ARCLinux"
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EMBEDDED=y
+CONFIG_PERF_EVENTS=y
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_SLUB_DEBUG is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_ARC_PLAT_AXS10X=y
+CONFIG_AXS103=y
+CONFIG_ISA_ARCV2=y
+CONFIG_ARC_UBOOT_SUPPORT=y
+CONFIG_ARC_BUILTIN_DTB_NAME="vdk_hs38"
+CONFIG_PREEMPT=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+# CONFIG_IPV6 is not set
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FIRMWARE_IN_KERNEL is not set
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_SLRAM=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_NETDEVICES=y
+# CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+# CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_SEEQ is not set
+CONFIG_STMMAC_ETH=y
+# CONFIG_NET_VENDOR_VIA is not set
+# CONFIG_NET_VENDOR_WIZNET is not set
+CONFIG_NATIONAL_PHY=y
+CONFIG_MOUSE_PS2_TOUCHKIT=y
+CONFIG_SERIO_ARC_PS2=y
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_DEVKMEM is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_DW=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+CONFIG_FB=y
+CONFIG_ARCPGU_RGB888=y
+CONFIG_ARCPGU_DISPTYPE=0
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
+CONFIG_LOGO=y
+# CONFIG_LOGO_LINUX_MONO is not set
+# CONFIG_LOGO_LINUX_VGA16 is not set
+# CONFIG_LOGO_LINUX_CLUT224 is not set
+CONFIG_USB=y
+CONFIG_USB_EHCI_HCD=y
+# CONFIG_USB_EHCI_TT_NEWSCHED is not set
+CONFIG_USB_EHCI_HCD_PLATFORM=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_HCD_PLATFORM=y
+CONFIG_USB_STORAGE=y
+CONFIG_USB_SERIAL=y
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_NTFS_FS=y
+CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_NFS_FS=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_STRIP_ASM_SYMS=y
+CONFIG_DEBUG_SHIRQ=y
+CONFIG_LOCKUP_DETECTOR=y
+CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=10
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_DEBUG_PREEMPT is not set
+# CONFIG_FTRACE is not set
diff --git a/arch/arc/configs/vdk_hs38_smp_defconfig b/arch/arc/configs/vdk_hs38_smp_defconfig
new file mode 100644 (file)
index 0000000..634509e
--- /dev/null
@@ -0,0 +1,104 @@
+CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_DEFAULT_HOSTNAME="ARCLinux"
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EMBEDDED=y
+CONFIG_PERF_EVENTS=y
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_SLUB_DEBUG is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_ARC_PLAT_AXS10X=y
+CONFIG_AXS103=y
+CONFIG_ISA_ARCV2=y
+CONFIG_SMP=y
+# CONFIG_ARC_HAS_GRTC is not set
+CONFIG_ARC_UBOOT_SUPPORT=y
+CONFIG_ARC_BUILTIN_DTB_NAME="vdk_hs38_smp"
+CONFIG_PREEMPT=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+# CONFIG_IPV6 is not set
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FIRMWARE_IN_KERNEL is not set
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_SLRAM=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_NETDEVICES=y
+# CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+# CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_SEEQ is not set
+CONFIG_STMMAC_ETH=y
+# CONFIG_NET_VENDOR_VIA is not set
+# CONFIG_NET_VENDOR_WIZNET is not set
+CONFIG_NATIONAL_PHY=y
+CONFIG_MOUSE_PS2_TOUCHKIT=y
+CONFIG_SERIO_ARC_PS2=y
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_DEVKMEM is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_DW=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+CONFIG_FB=y
+CONFIG_ARCPGU_RGB888=y
+CONFIG_ARCPGU_DISPTYPE=0
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
+CONFIG_LOGO=y
+# CONFIG_LOGO_LINUX_MONO is not set
+# CONFIG_LOGO_LINUX_VGA16 is not set
+# CONFIG_LOGO_LINUX_CLUT224 is not set
+CONFIG_USB=y
+CONFIG_USB_EHCI_HCD=y
+# CONFIG_USB_EHCI_TT_NEWSCHED is not set
+CONFIG_USB_EHCI_HCD_PLATFORM=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_HCD_PLATFORM=y
+CONFIG_USB_STORAGE=y
+CONFIG_USB_SERIAL=y
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_NTFS_FS=y
+CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_NFS_FS=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_STRIP_ASM_SYMS=y
+CONFIG_DEBUG_SHIRQ=y
+CONFIG_LOCKUP_DETECTOR=y
+CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=10
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_DEBUG_PREEMPT is not set
+# CONFIG_FTRACE is not set
index 769b312c1abb5406c34f52a2db5201a490be8a0b..1a80cc91a03ba323f8418dfb705098a00f031671 100644 (file)
@@ -1,5 +1,4 @@
 generic-y += auxvec.h
-generic-y += barrier.h
 generic-y += bitsperlong.h
 generic-y += bugs.h
 generic-y += clkdev.h
index e2b1b1211b0d4ddbb527ed7239eaf05759dfae27..070f58827a5c12c2e19469ff4280f7c69e0f36a3 100644 (file)
@@ -16,6 +16,8 @@
 #define ARC_REG_PERIBASE_BCR   0x69
 #define ARC_REG_FP_BCR         0x6B    /* ARCompact: Single-Precision FPU */
 #define ARC_REG_DPFP_BCR       0x6C    /* ARCompact: Dbl Precision FPU */
+#define ARC_REG_FP_V2_BCR      0xc8    /* ARCv2 FPU */
+#define ARC_REG_SLC_BCR                0xce
 #define ARC_REG_DCCM_BCR       0x74    /* DCCM Present + SZ */
 #define ARC_REG_TIMERS_BCR     0x75
 #define ARC_REG_AP_BCR         0x76
@@ -31,6 +33,7 @@
 #define ARC_REG_BPU_BCR                0xc0
 #define ARC_REG_ISA_CFG_BCR    0xc1
 #define ARC_REG_RTT_BCR                0xF2
+#define ARC_REG_IRQ_BCR                0xF3
 #define ARC_REG_SMART_BCR      0xFF
 
 /* status32 Bits Positions */
@@ -51,6 +54,7 @@
  * [15: 8] = Exception Cause Code
  * [ 7: 0] = Exception Parameters (for certain types only)
  */
+#ifdef CONFIG_ISA_ARCOMPACT
 #define ECR_V_MEM_ERR                  0x01
 #define ECR_V_INSN_ERR                 0x02
 #define ECR_V_MACH_CHK                 0x20
 #define ECR_V_DTLB_MISS                        0x22
 #define ECR_V_PROTV                    0x23
 #define ECR_V_TRAP                     0x25
+#else
+#define ECR_V_MEM_ERR                  0x01
+#define ECR_V_INSN_ERR                 0x02
+#define ECR_V_MACH_CHK                 0x03
+#define ECR_V_ITLB_MISS                        0x04
+#define ECR_V_DTLB_MISS                        0x05
+#define ECR_V_PROTV                    0x06
+#define ECR_V_TRAP                     0x09
+#endif
 
 /* DTLB Miss and Protection Violation Cause Codes */
 
@@ -76,9 +89,6 @@
 #define ECR_C_BIT_DTLB_LD_MISS         8
 #define ECR_C_BIT_DTLB_ST_MISS         9
 
-/* Dummy ECR values for Interrupts */
-#define event_IRQ1             0x0031abcd
-#define event_IRQ2             0x0032abcd
 
 /* Auxiliary registers */
 #define AUX_IDENTITY           4
@@ -204,9 +214,11 @@ struct bcr_identity {
 
 struct bcr_isa {
 #ifdef CONFIG_CPU_BIG_ENDIAN
-       unsigned int pad1:23, atomic1:1, ver:8;
+       unsigned int div_rem:4, pad2:4, ldd:1, unalign:1, atomic:1, be:1,
+                    pad1:11, atomic1:1, ver:8;
 #else
-       unsigned int ver:8, atomic1:1, pad1:23;
+       unsigned int ver:8, atomic1:1, pad1:11, be:1, atomic:1, unalign:1,
+                    ldd:1, pad2:4, div_rem:4;
 #endif
 };
 
@@ -269,11 +281,19 @@ struct bcr_fp_arcompact {
 #endif
 };
 
+struct bcr_fp_arcv2 {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+       unsigned int pad2:15, dp:1, pad1:7, sp:1, ver:8;
+#else
+       unsigned int ver:8, sp:1, pad1:7, dp:1, pad2:15;
+#endif
+};
+
 struct bcr_timer {
 #ifdef CONFIG_CPU_BIG_ENDIAN
-       unsigned int pad2:15, rtsc:1, pad1:6, t1:1, t0:1, ver:8;
+       unsigned int pad2:15, rtsc:1, pad1:5, rtc:1, t1:1, t0:1, ver:8;
 #else
-       unsigned int ver:8, t0:1, t1:1, pad1:6, rtsc:1, pad2:15;
+       unsigned int ver:8, t0:1, t1:1, rtc:1, pad1:5, rtsc:1, pad2:15;
 #endif
 };
 
@@ -285,6 +305,14 @@ struct bcr_bpu_arcompact {
 #endif
 };
 
+struct bcr_bpu_arcv2 {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+       unsigned int pad:6, fbe:2, tqe:2, ts:4, ft:1, rse:2, pte:3, bce:3, ver:8;
+#else
+       unsigned int ver:8, bce:3, pte:3, rse:2, ft:1, ts:4, tqe:2, fbe:2, pad:6;
+#endif
+};
+
 struct bcr_generic {
 #ifdef CONFIG_CPU_BIG_ENDIAN
        unsigned int pad:24, ver:8;
@@ -299,11 +327,12 @@ struct bcr_generic {
  */
 
 struct cpuinfo_arc_mmu {
-       unsigned int ver, pg_sz, sets, ways, u_dtlb, u_itlb, num_tlb;
+       unsigned int ver:4, pg_sz_k:8, s_pg_sz_m:8, u_dtlb:6, u_itlb:6;
+       unsigned int num_tlb:16, sets:12, ways:4;
 };
 
 struct cpuinfo_arc_cache {
-       unsigned int sz_k:8, line_len:8, assoc:4, ver:4, alias:1, vipt:1, pad:6;
+       unsigned int sz_k:14, line_len:8, assoc:4, ver:4, alias:1, vipt:1;
 };
 
 struct cpuinfo_arc_bpu {
@@ -315,14 +344,13 @@ struct cpuinfo_arc_ccm {
 };
 
 struct cpuinfo_arc {
-       struct cpuinfo_arc_cache icache, dcache;
+       struct cpuinfo_arc_cache icache, dcache, slc;
        struct cpuinfo_arc_mmu mmu;
        struct cpuinfo_arc_bpu bpu;
        struct bcr_identity core;
        struct bcr_isa isa;
        struct bcr_timer timers;
        unsigned int vec_base;
-       unsigned int uncached_base;
        struct cpuinfo_arc_ccm iccm, dccm;
        struct {
                unsigned int swap:1, norm:1, minmax:1, barrel:1, crc:1, pad1:3,
@@ -336,6 +364,22 @@ struct cpuinfo_arc {
 
 extern struct cpuinfo_arc cpuinfo_arc700[];
 
+static inline int is_isa_arcv2(void)
+{
+       return IS_ENABLED(CONFIG_ISA_ARCV2);
+}
+
+static inline int is_isa_arcompact(void)
+{
+       return IS_ENABLED(CONFIG_ISA_ARCOMPACT);
+}
+
+#if defined(CONFIG_ISA_ARCOMPACT) && !defined(_CPU_DEFAULT_A7)
+#error "Toolchain not configured for ARCompact builds"
+#elif defined(CONFIG_ISA_ARCV2) && !defined(_CPU_DEFAULT_HS)
+#error "Toolchain not configured for ARCv2 builds"
+#endif
+
 #endif /* __ASEMBLY__ */
 
 #endif /* _ASM_ARC_ARCREGS_H */
index 9917a45fc430d042a4f59006abf84ceedad1bca7..03484cb4d16d2eb4fada0095ee427726c23bd2e1 100644 (file)
 
 #define atomic_set(v, i) (((v)->counter) = (i))
 
+#ifdef CONFIG_ISA_ARCV2
+#define PREFETCHW      "       prefetchw   [%1]        \n"
+#else
+#define PREFETCHW
+#endif
+
 #define ATOMIC_OP(op, c_op, asm_op)                                    \
 static inline void atomic_##op(int i, atomic_t *v)                     \
 {                                                                      \
        unsigned int temp;                                              \
                                                                        \
        __asm__ __volatile__(                                           \
-       "1:     llock   %0, [%1]        \n"                             \
+       "1:                             \n"                             \
+       PREFETCHW                                                       \
+       "       llock   %0, [%1]        \n"                             \
        "       " #asm_op " %0, %0, %2  \n"                             \
        "       scond   %0, [%1]        \n"                             \
        "       bnz     1b              \n"                             \
@@ -43,8 +51,16 @@ static inline int atomic_##op##_return(int i, atomic_t *v)           \
 {                                                                      \
        unsigned int temp;                                              \
                                                                        \
+       /*                                                              \
+        * Explicit full memory barrier needed before/after as          \
+        * LLOCK/SCOND thmeselves don't provide any such semantics      \
+        */                                                             \
+       smp_mb();                                                       \
+                                                                       \
        __asm__ __volatile__(                                           \
-       "1:     llock   %0, [%1]        \n"                             \
+       "1:                             \n"                             \
+       PREFETCHW                                                       \
+       "       llock   %0, [%1]        \n"                             \
        "       " #asm_op " %0, %0, %2  \n"                             \
        "       scond   %0, [%1]        \n"                             \
        "       bnz     1b              \n"                             \
@@ -52,6 +68,8 @@ static inline int atomic_##op##_return(int i, atomic_t *v)            \
        : "r"(&v->counter), "ir"(i)                                     \
        : "cc");                                                        \
                                                                        \
+       smp_mb();                                                       \
+                                                                       \
        return temp;                                                    \
 }
 
@@ -105,6 +123,9 @@ static inline int atomic_##op##_return(int i, atomic_t *v)          \
        unsigned long flags;                                            \
        unsigned long temp;                                             \
                                                                        \
+       /*                                                              \
+        * spin lock/unlock provides the needed smp_mb() before/after   \
+        */                                                             \
        atomic_ops_lock(flags);                                         \
        temp = v->counter;                                              \
        temp c_op i;                                                    \
@@ -142,9 +163,19 @@ ATOMIC_OP(and, &=, and)
 #define __atomic_add_unless(v, a, u)                                   \
 ({                                                                     \
        int c, old;                                                     \
+                                                                       \
+       /*                                                              \
+        * Explicit full memory barrier needed before/after as          \
+        * LLOCK/SCOND thmeselves don't provide any such semantics      \
+        */                                                             \
+       smp_mb();                                                       \
+                                                                       \
        c = atomic_read(v);                                             \
        while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c)\
                c = old;                                                \
+                                                                       \
+       smp_mb();                                                       \
+                                                                       \
        c;                                                              \
 })
 
diff --git a/arch/arc/include/asm/barrier.h b/arch/arc/include/asm/barrier.h
new file mode 100644 (file)
index 0000000..a720998
--- /dev/null
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_BARRIER_H
+#define __ASM_BARRIER_H
+
+#ifdef CONFIG_ISA_ARCV2
+
+/*
+ * ARCv2 based HS38 cores are in-order issue, but still weakly ordered
+ * due to micro-arch buffering/queuing of load/store, cache hit vs. miss ...
+ *
+ * Explicit barrier provided by DMB instruction
+ *  - Operand supports fine grained load/store/load+store semantics
+ *  - Ensures that selected memory operation issued before it will complete
+ *    before any subsequent memory operation of same type
+ *  - DMB guarantees SMP as well as local barrier semantics
+ *    (asm-generic/barrier.h ensures sane smp_*mb if not defined here, i.e.
+ *    UP: barrier(), SMP: smp_*mb == *mb)
+ *  - DSYNC provides DMB+completion_of_cache_bpu_maintenance_ops hence not needed
+ *    in the general case. Plus it only provides full barrier.
+ */
+
+#define mb()   asm volatile("dmb 3\n" : : : "memory")
+#define rmb()  asm volatile("dmb 1\n" : : : "memory")
+#define wmb()  asm volatile("dmb 2\n" : : : "memory")
+
+#endif
+
+#ifdef CONFIG_ISA_ARCOMPACT
+
+/*
+ * ARCompact based cores (ARC700) only have SYNC instruction which is super
+ * heavy weight as it flushes the pipeline as well.
+ * There are no real SMP implementations of such cores.
+ */
+
+#define mb()   asm volatile("sync\n" : : : "memory")
+#endif
+
+#include <asm-generic/barrier.h>
+
+#endif
index 4051e9525939fd9050b06b048bec8e1a47307d7b..99fe118d3730bc050263e5be7dd3423ab659d46c 100644 (file)
 #include <linux/types.h>
 #include <linux/compiler.h>
 #include <asm/barrier.h>
+#ifndef CONFIG_ARC_HAS_LLSC
+#include <asm/smp.h>
+#endif
 
-/*
- * Hardware assisted read-modify-write using ARC700 LLOCK/SCOND insns.
- * The Kconfig glue ensures that in SMP, this is only set if the container
- * SoC/platform has cross-core coherent LLOCK/SCOND
- */
 #if defined(CONFIG_ARC_HAS_LLSC)
 
-static inline void set_bit(unsigned long nr, volatile unsigned long *m)
-{
-       unsigned int temp;
-
-       m += nr >> 5;
-
-       /*
-        * ARC ISA micro-optimization:
-        *
-        * Instructions dealing with bitpos only consider lower 5 bits (0-31)
-        * e.g (x << 33) is handled like (x << 1) by ASL instruction
-        *  (mem pointer still needs adjustment to point to next word)
-        *
-        * Hence the masking to clamp @nr arg can be elided in general.
-        *
-        * However if @nr is a constant (above assumed it in a register),
-        * and greater than 31, gcc can optimize away (x << 33) to 0,
-        * as overflow, given the 32-bit ISA. Thus masking needs to be done
-        * for constant @nr, but no code is generated due to const prop.
-        */
-       if (__builtin_constant_p(nr))
-               nr &= 0x1f;
-
-       __asm__ __volatile__(
-       "1:     llock   %0, [%1]        \n"
-       "       bset    %0, %0, %2      \n"
-       "       scond   %0, [%1]        \n"
-       "       bnz     1b      \n"
-       : "=&r"(temp)
-       : "r"(m), "ir"(nr)
-       : "cc");
-}
-
-static inline void clear_bit(unsigned long nr, volatile unsigned long *m)
-{
-       unsigned int temp;
-
-       m += nr >> 5;
-
-       if (__builtin_constant_p(nr))
-               nr &= 0x1f;
-
-       __asm__ __volatile__(
-       "1:     llock   %0, [%1]        \n"
-       "       bclr    %0, %0, %2      \n"
-       "       scond   %0, [%1]        \n"
-       "       bnz     1b      \n"
-       : "=&r"(temp)
-       : "r"(m), "ir"(nr)
-       : "cc");
-}
-
-static inline void change_bit(unsigned long nr, volatile unsigned long *m)
-{
-       unsigned int temp;
-
-       m += nr >> 5;
-
-       if (__builtin_constant_p(nr))
-               nr &= 0x1f;
+/*
+ * Hardware assisted Atomic-R-M-W
+ */
 
-       __asm__ __volatile__(
-       "1:     llock   %0, [%1]        \n"
-       "       bxor    %0, %0, %2      \n"
-       "       scond   %0, [%1]        \n"
-       "       bnz     1b              \n"
-       : "=&r"(temp)
-       : "r"(m), "ir"(nr)
-       : "cc");
+#define BIT_OP(op, c_op, asm_op)                                       \
+static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\
+{                                                                      \
+       unsigned int temp;                                              \
+                                                                       \
+       m += nr >> 5;                                                   \
+                                                                       \
+       /*                                                              \
+        * ARC ISA micro-optimization:                                  \
+        *                                                              \
+        * Instructions dealing with bitpos only consider lower 5 bits  \
+        * e.g (x << 33) is handled like (x << 1) by ASL instruction    \
+        *  (mem pointer still needs adjustment to point to next word)  \
+        *                                                              \
+        * Hence the masking to clamp @nr arg can be elided in general. \
+        *                                                              \
+        * However if @nr is a constant (above assumed in a register),  \
+        * and greater than 31, gcc can optimize away (x << 33) to 0,   \
+        * as overflow, given the 32-bit ISA. Thus masking needs to be  \
+        * done for const @nr, but no code is generated due to gcc      \
+        * const prop.                                                  \
+        */                                                             \
+       if (__builtin_constant_p(nr))                                   \
+               nr &= 0x1f;                                             \
+                                                                       \
+       __asm__ __volatile__(                                           \
+       "1:     llock       %0, [%1]            \n"                     \
+       "       " #asm_op " %0, %0, %2  \n"                             \
+       "       scond       %0, [%1]            \n"                     \
+       "       bnz         1b                  \n"                     \
+       : "=&r"(temp)   /* Early clobber, to prevent reg reuse */       \
+       : "r"(m),       /* Not "m": llock only supports reg direct addr mode */ \
+         "ir"(nr)                                                      \
+       : "cc");                                                        \
 }
 
 /*
@@ -108,75 +75,38 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *m)
  * Since ARC lacks a equivalent h/w primitive, the bit is set unconditionally
  * and the old value of bit is returned
  */
-static inline int test_and_set_bit(unsigned long nr, volatile unsigned long *m)
-{
-       unsigned long old, temp;
-
-       m += nr >> 5;
-
-       if (__builtin_constant_p(nr))
-               nr &= 0x1f;
-
-       __asm__ __volatile__(
-       "1:     llock   %0, [%2]        \n"
-       "       bset    %1, %0, %3      \n"
-       "       scond   %1, [%2]        \n"
-       "       bnz     1b              \n"
-       : "=&r"(old), "=&r"(temp)
-       : "r"(m), "ir"(nr)
-       : "cc");
-
-       return (old & (1 << nr)) != 0;
-}
-
-static inline int
-test_and_clear_bit(unsigned long nr, volatile unsigned long *m)
-{
-       unsigned int old, temp;
-
-       m += nr >> 5;
-
-       if (__builtin_constant_p(nr))
-               nr &= 0x1f;
-
-       __asm__ __volatile__(
-       "1:     llock   %0, [%2]        \n"
-       "       bclr    %1, %0, %3      \n"
-       "       scond   %1, [%2]        \n"
-       "       bnz     1b              \n"
-       : "=&r"(old), "=&r"(temp)
-       : "r"(m), "ir"(nr)
-       : "cc");
-
-       return (old & (1 << nr)) != 0;
-}
-
-static inline int
-test_and_change_bit(unsigned long nr, volatile unsigned long *m)
-{
-       unsigned int old, temp;
-
-       m += nr >> 5;
-
-       if (__builtin_constant_p(nr))
-               nr &= 0x1f;
-
-       __asm__ __volatile__(
-       "1:     llock   %0, [%2]        \n"
-       "       bxor    %1, %0, %3      \n"
-       "       scond   %1, [%2]        \n"
-       "       bnz     1b              \n"
-       : "=&r"(old), "=&r"(temp)
-       : "r"(m), "ir"(nr)
-       : "cc");
-
-       return (old & (1 << nr)) != 0;
+#define TEST_N_BIT_OP(op, c_op, asm_op)                                        \
+static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\
+{                                                                      \
+       unsigned long old, temp;                                        \
+                                                                       \
+       m += nr >> 5;                                                   \
+                                                                       \
+       if (__builtin_constant_p(nr))                                   \
+               nr &= 0x1f;                                             \
+                                                                       \
+       /*                                                              \
+        * Explicit full memory barrier needed before/after as          \
+        * LLOCK/SCOND themselves don't provide any such smenatic       \
+        */                                                             \
+       smp_mb();                                                       \
+                                                                       \
+       __asm__ __volatile__(                                           \
+       "1:     llock       %0, [%2]    \n"                             \
+       "       " #asm_op " %1, %0, %3  \n"                             \
+       "       scond       %1, [%2]    \n"                             \
+       "       bnz         1b          \n"                             \
+       : "=&r"(old), "=&r"(temp)                                       \
+       : "r"(m), "ir"(nr)                                              \
+       : "cc");                                                        \
+                                                                       \
+       smp_mb();                                                       \
+                                                                       \
+       return (old & (1 << nr)) != 0;                                  \
 }
 
 #else  /* !CONFIG_ARC_HAS_LLSC */
 
-#include <asm/smp.h>
-
 /*
  * Non hardware assisted Atomic-R-M-W
  * Locking would change to irq-disabling only (UP) and spinlocks (SMP)
@@ -193,108 +123,43 @@ test_and_change_bit(unsigned long nr, volatile unsigned long *m)
  *             at compile time)
  */
 
-static inline void set_bit(unsigned long nr, volatile unsigned long *m)
-{
-       unsigned long temp, flags;
-       m += nr >> 5;
-
-       if (__builtin_constant_p(nr))
-               nr &= 0x1f;
-
-       bitops_lock(flags);
-
-       temp = *m;
-       *m = temp | (1UL << nr);
-
-       bitops_unlock(flags);
-}
-
-static inline void clear_bit(unsigned long nr, volatile unsigned long *m)
-{
-       unsigned long temp, flags;
-       m += nr >> 5;
-
-       if (__builtin_constant_p(nr))
-               nr &= 0x1f;
-
-       bitops_lock(flags);
-
-       temp = *m;
-       *m = temp & ~(1UL << nr);
-
-       bitops_unlock(flags);
-}
-
-static inline void change_bit(unsigned long nr, volatile unsigned long *m)
-{
-       unsigned long temp, flags;
-       m += nr >> 5;
-
-       if (__builtin_constant_p(nr))
-               nr &= 0x1f;
-
-       bitops_lock(flags);
-
-       temp = *m;
-       *m = temp ^ (1UL << nr);
-
-       bitops_unlock(flags);
-}
-
-static inline int test_and_set_bit(unsigned long nr, volatile unsigned long *m)
-{
-       unsigned long old, flags;
-       m += nr >> 5;
-
-       if (__builtin_constant_p(nr))
-               nr &= 0x1f;
-
-       bitops_lock(flags);
-
-       old = *m;
-       *m = old | (1 << nr);
-
-       bitops_unlock(flags);
-
-       return (old & (1 << nr)) != 0;
-}
-
-static inline int
-test_and_clear_bit(unsigned long nr, volatile unsigned long *m)
-{
-       unsigned long old, flags;
-       m += nr >> 5;
-
-       if (__builtin_constant_p(nr))
-               nr &= 0x1f;
-
-       bitops_lock(flags);
-
-       old = *m;
-       *m = old & ~(1 << nr);
-
-       bitops_unlock(flags);
-
-       return (old & (1 << nr)) != 0;
+#define BIT_OP(op, c_op, asm_op)                                       \
+static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\
+{                                                                      \
+       unsigned long temp, flags;                                      \
+       m += nr >> 5;                                                   \
+                                                                       \
+       if (__builtin_constant_p(nr))                                   \
+               nr &= 0x1f;                                             \
+                                                                       \
+       /*                                                              \
+        * spin lock/unlock provide the needed smp_mb() before/after    \
+        */                                                             \
+       bitops_lock(flags);                                             \
+                                                                       \
+       temp = *m;                                                      \
+       *m = temp c_op (1UL << nr);                                     \
+                                                                       \
+       bitops_unlock(flags);                                           \
 }
 
-static inline int
-test_and_change_bit(unsigned long nr, volatile unsigned long *m)
-{
-       unsigned long old, flags;
-       m += nr >> 5;
-
-       if (__builtin_constant_p(nr))
-               nr &= 0x1f;
-
-       bitops_lock(flags);
-
-       old = *m;
-       *m = old ^ (1 << nr);
-
-       bitops_unlock(flags);
-
-       return (old & (1 << nr)) != 0;
+#define TEST_N_BIT_OP(op, c_op, asm_op)                                        \
+static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\
+{                                                                      \
+       unsigned long old, flags;                                       \
+       m += nr >> 5;                                                   \
+                                                                       \
+       if (__builtin_constant_p(nr))                                   \
+               nr &= 0x1f;                                             \
+                                                                       \
+       bitops_lock(flags);                                             \
+                                                                       \
+       old = *m;                                                       \
+       *m = old c_op (1 << nr);                                        \
+                                                                       \
+       bitops_unlock(flags);                                           \
+                                                                       \
+       return (old & (1 << nr)) != 0;                                  \
 }
 
 #endif /* CONFIG_ARC_HAS_LLSC */
@@ -303,86 +168,51 @@ test_and_change_bit(unsigned long nr, volatile unsigned long *m)
  * Non atomic variants
  **************************************/
 
-static inline void __set_bit(unsigned long nr, volatile unsigned long *m)
-{
-       unsigned long temp;
-       m += nr >> 5;
-
-       if (__builtin_constant_p(nr))
-               nr &= 0x1f;
-
-       temp = *m;
-       *m = temp | (1UL << nr);
-}
-
-static inline void __clear_bit(unsigned long nr, volatile unsigned long *m)
-{
-       unsigned long temp;
-       m += nr >> 5;
-
-       if (__builtin_constant_p(nr))
-               nr &= 0x1f;
-
-       temp = *m;
-       *m = temp & ~(1UL << nr);
-}
-
-static inline void __change_bit(unsigned long nr, volatile unsigned long *m)
-{
-       unsigned long temp;
-       m += nr >> 5;
-
-       if (__builtin_constant_p(nr))
-               nr &= 0x1f;
-
-       temp = *m;
-       *m = temp ^ (1UL << nr);
-}
-
-static inline int
-__test_and_set_bit(unsigned long nr, volatile unsigned long *m)
-{
-       unsigned long old;
-       m += nr >> 5;
-
-       if (__builtin_constant_p(nr))
-               nr &= 0x1f;
-
-       old = *m;
-       *m = old | (1 << nr);
-
-       return (old & (1 << nr)) != 0;
+#define __BIT_OP(op, c_op, asm_op)                                     \
+static inline void __##op##_bit(unsigned long nr, volatile unsigned long *m)   \
+{                                                                      \
+       unsigned long temp;                                             \
+       m += nr >> 5;                                                   \
+                                                                       \
+       if (__builtin_constant_p(nr))                                   \
+               nr &= 0x1f;                                             \
+                                                                       \
+       temp = *m;                                                      \
+       *m = temp c_op (1UL << nr);                                     \
 }
 
-static inline int
-__test_and_clear_bit(unsigned long nr, volatile unsigned long *m)
-{
-       unsigned long old;
-       m += nr >> 5;
-
-       if (__builtin_constant_p(nr))
-               nr &= 0x1f;
-
-       old = *m;
-       *m = old & ~(1 << nr);
-
-       return (old & (1 << nr)) != 0;
+#define __TEST_N_BIT_OP(op, c_op, asm_op)                              \
+static inline int __test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\
+{                                                                      \
+       unsigned long old;                                              \
+       m += nr >> 5;                                                   \
+                                                                       \
+       if (__builtin_constant_p(nr))                                   \
+               nr &= 0x1f;                                             \
+                                                                       \
+       old = *m;                                                       \
+       *m = old c_op (1 << nr);                                        \
+                                                                       \
+       return (old & (1 << nr)) != 0;                                  \
 }
 
-static inline int
-__test_and_change_bit(unsigned long nr, volatile unsigned long *m)
-{
-       unsigned long old;
-       m += nr >> 5;
-
-       if (__builtin_constant_p(nr))
-               nr &= 0x1f;
-
-       old = *m;
-       *m = old ^ (1 << nr);
-
-       return (old & (1 << nr)) != 0;
-}
+#define BIT_OPS(op, c_op, asm_op)                                      \
+                                                                       \
+       /* set_bit(), clear_bit(), change_bit() */                      \
+       BIT_OP(op, c_op, asm_op)                                        \
+                                                                       \
+       /* test_and_set_bit(), test_and_clear_bit(), test_and_change_bit() */\
+       TEST_N_BIT_OP(op, c_op, asm_op)                                 \
+                                                                       \
+       /* __set_bit(), __clear_bit(), __change_bit() */                \
+       __BIT_OP(op, c_op, asm_op)                                      \
+                                                                       \
+       /* __test_and_set_bit(), __test_and_clear_bit(), __test_and_change_bit() */\
+       __TEST_N_BIT_OP(op, c_op, asm_op)
+
+BIT_OPS(set, |, bset)
+BIT_OPS(clear, & ~, bclr)
+BIT_OPS(change, ^, bxor)
 
 /*
  * This routine doesn't need to be atomic.
@@ -402,6 +232,8 @@ test_bit(unsigned int nr, const volatile unsigned long *addr)
        return ((mask & *addr) != 0);
 }
 
+#ifdef CONFIG_ISA_ARCOMPACT
+
 /*
  * Count the number of zeros, starting from MSB
  * Helper for fls( ) friends
@@ -494,6 +326,75 @@ static inline __attribute__ ((const)) int __ffs(unsigned long word)
        return ffs(word) - 1;
 }
 
+#else  /* CONFIG_ISA_ARCV2 */
+
+/*
+ * fls = Find Last Set in word
+ * @result: [1-32]
+ * fls(1) = 1, fls(0x80000000) = 32, fls(0) = 0
+ */
+static inline __attribute__ ((const)) int fls(unsigned long x)
+{
+       int n;
+
+       asm volatile(
+       "       fls.f   %0, %1          \n"  /* 0:31; 0(Z) if src 0 */
+       "       add.nz  %0, %0, 1       \n"  /* 0:31 -> 1:32 */
+       : "=r"(n)       /* Early clobber not needed */
+       : "r"(x)
+       : "cc");
+
+       return n;
+}
+
+/*
+ * __fls: Similar to fls, but zero based (0-31). Also 0 if no bit set
+ */
+static inline __attribute__ ((const)) int __fls(unsigned long x)
+{
+       /* FLS insn has exactly same semantics as the API */
+       return  __builtin_arc_fls(x);
+}
+
+/*
+ * ffs = Find First Set in word (LSB to MSB)
+ * @result: [1-32], 0 if all 0's
+ */
+static inline __attribute__ ((const)) int ffs(unsigned long x)
+{
+       int n;
+
+       asm volatile(
+       "       ffs.f   %0, %1          \n"  /* 0:31; 31(Z) if src 0 */
+       "       add.nz  %0, %0, 1       \n"  /* 0:31 -> 1:32 */
+       "       mov.z   %0, 0           \n"  /* 31(Z)-> 0 */
+       : "=r"(n)       /* Early clobber not needed */
+       : "r"(x)
+       : "cc");
+
+       return n;
+}
+
+/*
+ * __ffs: Similar to ffs, but zero based (0-31)
+ */
+static inline __attribute__ ((const)) int __ffs(unsigned long x)
+{
+       int n;
+
+       asm volatile(
+       "       ffs.f   %0, %1          \n"  /* 0:31; 31(Z) if src 0 */
+       "       mov.z   %0, 0           \n"  /* 31(Z)-> 0 */
+       : "=r"(n)
+       : "r"(x)
+       : "cc");
+
+       return n;
+
+}
+
+#endif /* CONFIG_ISA_ARCOMPACT */
+
 /*
  * ffz = Find First Zero in word.
  * @return:[0-31], 32 if all 1's
index 7861255da32d64aa62e03d6fa881f84d9abf3b2f..d67345d3e2d444b1d357c004cbb1e33508d93060 100644 (file)
@@ -60,7 +60,7 @@ extern void read_decode_cache_bcr(void);
 #define ARC_REG_IC_IVIC                0x10
 #define ARC_REG_IC_CTRL                0x11
 #define ARC_REG_IC_IVIL                0x19
-#if defined(CONFIG_ARC_MMU_V3)
+#if defined(CONFIG_ARC_MMU_V3) || defined(CONFIG_ARC_MMU_V4)
 #define ARC_REG_IC_PTAG                0x1E
 #endif
 
@@ -74,12 +74,24 @@ extern void read_decode_cache_bcr(void);
 #define ARC_REG_DC_IVDL                0x4A
 #define ARC_REG_DC_FLSH                0x4B
 #define ARC_REG_DC_FLDL                0x4C
-#if defined(CONFIG_ARC_MMU_V3)
 #define ARC_REG_DC_PTAG                0x5C
-#endif
 
 /* Bit val in DC_CTRL */
 #define DC_CTRL_INV_MODE_FLUSH  0x40
 #define DC_CTRL_FLUSH_STATUS    0x100
 
+/*System-level cache (L2 cache) related Auxiliary registers */
+#define ARC_REG_SLC_CFG                0x901
+#define ARC_REG_SLC_CTRL       0x903
+#define ARC_REG_SLC_FLUSH      0x904
+#define ARC_REG_SLC_INVALIDATE 0x905
+#define ARC_REG_SLC_RGN_START  0x914
+#define ARC_REG_SLC_RGN_END    0x916
+
+/* Bit val in SLC_CONTROL */
+#define SLC_CTRL_IM            0x040
+#define SLC_CTRL_DISABLE       0x001
+#define SLC_CTRL_BUSY          0x100
+#define SLC_CTRL_RGN_OP_INV    0x200
+
 #endif /* _ASM_CACHE_H */
index 6abc4972bc93bbba015c3ebe92ea40926c7413a8..0992d3dbcc65f66e4e97925703ec9dc113a7b9a4 100644 (file)
@@ -34,9 +34,7 @@ void flush_cache_all(void);
 void flush_icache_range(unsigned long start, unsigned long end);
 void __sync_icache_dcache(unsigned long paddr, unsigned long vaddr, int len);
 void __inv_icache_page(unsigned long paddr, unsigned long vaddr);
-void ___flush_dcache_page(unsigned long paddr, unsigned long vaddr);
-#define __flush_dcache_page(p, v)      \
-               ___flush_dcache_page((unsigned long)p, (unsigned long)v)
+void __flush_dcache_page(unsigned long paddr, unsigned long vaddr);
 
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 
index 03cd6894855d614237851515d1cafb6d2a56908b..44fd531f4d7b93a9df7bff6dec976af5e571506c 100644 (file)
@@ -10,6 +10,8 @@
 #define __ASM_ARC_CMPXCHG_H
 
 #include <linux/types.h>
+
+#include <asm/barrier.h>
 #include <asm/smp.h>
 
 #ifdef CONFIG_ARC_HAS_LLSC
@@ -19,16 +21,25 @@ __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
 {
        unsigned long prev;
 
+       /*
+        * Explicit full memory barrier needed before/after as
+        * LLOCK/SCOND thmeselves don't provide any such semantics
+        */
+       smp_mb();
+
        __asm__ __volatile__(
        "1:     llock   %0, [%1]        \n"
        "       brne    %0, %2, 2f      \n"
        "       scond   %3, [%1]        \n"
        "       bnz     1b              \n"
        "2:                             \n"
-       : "=&r"(prev)
-       : "r"(ptr), "ir"(expected),
-         "r"(new) /* can't be "ir". scond can't take limm for "b" */
-       : "cc");
+       : "=&r"(prev)   /* Early clobber, to prevent reg reuse */
+       : "r"(ptr),     /* Not "m": llock only supports reg direct addr mode */
+         "ir"(expected),
+         "r"(new)      /* can't be "ir". scond can't take LIMM for "b" */
+       : "cc", "memory"); /* so that gcc knows memory is being written here */
+
+       smp_mb();
 
        return prev;
 }
@@ -42,6 +53,9 @@ __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
        int prev;
        volatile unsigned long *p = ptr;
 
+       /*
+        * spin lock/unlock provide the needed smp_mb() before/after
+        */
        atomic_ops_lock(flags);
        prev = *p;
        if (prev == expected)
@@ -77,12 +91,16 @@ static inline unsigned long __xchg(unsigned long val, volatile void *ptr,
 
        switch (size) {
        case 4:
+               smp_mb();
+
                __asm__ __volatile__(
                "       ex  %0, [%1]    \n"
                : "+r"(val)
                : "r"(ptr)
                : "memory");
 
+               smp_mb();
+
                return val;
        }
        return __xchg_bad_pointer();
index 43de302569815073bb4d4f23cb98b60a5e0c0552..08e7e2a16ac176a597ceb21c3b0f399b6ad98ea6 100644 (file)
 static inline void __delay(unsigned long loops)
 {
        __asm__ __volatile__(
-       "1:     sub.f %0, %0, 1 \n"
-       "       jpnz 1b         \n"
-       : "+r"(loops)
-       :
-       : "cc");
+       "       lp  1f  \n"
+       "       nop     \n"
+       "1:             \n"
+       : "+l"(loops));
 }
 
 extern void __bad_udelay(void);
index f787894613addf07030feaa4560726a6fd8cc9f9..2d28ba939d8edc71c693442b4464076f840ea5ff 100644 (file)
 #include <asm-generic/dma-coherent.h>
 #include <asm/cacheflush.h>
 
-#ifndef CONFIG_ARC_PLAT_NEEDS_CPU_TO_DMA
-/*
- * dma_map_* API take cpu addresses, which is kernel logical address in the
- * untranslated address space (0x8000_0000) based. The dma address (bus addr)
- * ideally needs to be 0x0000_0000 based hence these glue routines.
- * However given that intermediate bus bridges can ignore the high bit, we can
- * do with these routines being no-ops.
- * If a platform/device comes up which sriclty requires 0 based bus addr
- * (e.g. AHB-PCI bridge on Angel4 board), then it can provide it's own versions
- */
-#define plat_dma_addr_to_kernel(dev, addr) ((unsigned long)(addr))
-#define plat_kernel_addr_to_dma(dev, ptr) ((dma_addr_t)(ptr))
-
-#else
-#include <plat/dma_addr.h>
-#endif
-
 void *dma_alloc_noncoherent(struct device *dev, size_t size,
                            dma_addr_t *dma_handle, gfp_t gfp);
 
@@ -94,7 +77,7 @@ dma_map_single(struct device *dev, void *cpu_addr, size_t size,
               enum dma_data_direction dir)
 {
        _dma_cache_sync((unsigned long)cpu_addr, size, dir);
-       return plat_kernel_addr_to_dma(dev, cpu_addr);
+       return (dma_addr_t)cpu_addr;
 }
 
 static inline void
@@ -147,16 +130,14 @@ static inline void
 dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
                        size_t size, enum dma_data_direction dir)
 {
-       _dma_cache_sync(plat_dma_addr_to_kernel(dev, dma_handle), size,
-                       DMA_FROM_DEVICE);
+       _dma_cache_sync(dma_handle, size, DMA_FROM_DEVICE);
 }
 
 static inline void
 dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
                           size_t size, enum dma_data_direction dir)
 {
-       _dma_cache_sync(plat_dma_addr_to_kernel(dev, dma_handle), size,
-                       DMA_TO_DEVICE);
+       _dma_cache_sync(dma_handle, size, DMA_TO_DEVICE);
 }
 
 static inline void
@@ -164,8 +145,7 @@ dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle,
                              unsigned long offset, size_t size,
                              enum dma_data_direction direction)
 {
-       _dma_cache_sync(plat_dma_addr_to_kernel(dev, dma_handle) + offset,
-                       size, DMA_FROM_DEVICE);
+       _dma_cache_sync(dma_handle + offset, size, DMA_FROM_DEVICE);
 }
 
 static inline void
@@ -173,8 +153,7 @@ dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle,
                                 unsigned long offset, size_t size,
                                 enum dma_data_direction direction)
 {
-       _dma_cache_sync(plat_dma_addr_to_kernel(dev, dma_handle) + offset,
-                       size, DMA_TO_DEVICE);
+       _dma_cache_sync(dma_handle + offset, size, DMA_TO_DEVICE);
 }
 
 static inline void
index a262828576839d9d48e5e2be18ae65b9fad815e5..51a99e25fe338251b23fe00bf2b1849b6a7252a2 100644 (file)
 /* These ELF defines belong to uapi but libc elf.h already defines them */
 #define EM_ARCOMPACT           93
 
+#define EM_ARCV2               195     /* ARCv2 Cores */
+
+#define EM_ARC_INUSE           (IS_ENABLED(CONFIG_ISA_ARCOMPACT) ? \
+                                       EM_ARCOMPACT : EM_ARCV2)
+
 /* ARC Relocations (kernel Modules only) */
 #define  R_ARC_32              0x4
 #define  R_ARC_32_ME           0x1B
diff --git a/arch/arc/include/asm/entry-arcv2.h b/arch/arc/include/asm/entry-arcv2.h
new file mode 100644 (file)
index 0000000..b5ff87e
--- /dev/null
@@ -0,0 +1,190 @@
+
+#ifndef __ASM_ARC_ENTRY_ARCV2_H
+#define __ASM_ARC_ENTRY_ARCV2_H
+
+#include <asm/asm-offsets.h>
+#include <asm/irqflags-arcv2.h>
+#include <asm/thread_info.h>   /* For THREAD_SIZE */
+
+/*------------------------------------------------------------------------*/
+.macro INTERRUPT_PROLOGUE      called_from
+
+       ; Before jumping to Interrupt Vector, hardware micro-ops did following:
+       ;   1. SP auto-switched to kernel mode stack
+       ;   2. STATUS32.Z flag set to U mode at time of interrupt (U:1, K:0)
+       ;   3. Auto saved: r0-r11, blink, LPE,LPS,LPC, JLI,LDI,EI, PC, STAT32
+       ;
+       ; Now manually save: r12, sp, fp, gp, r25
+
+       PUSH    r12
+
+       ; Saving pt_regs->sp correctly requires some extra work due to the way
+       ; Auto stack switch works
+       ;  - U mode: retrieve it from AUX_USER_SP
+       ;  - K mode: add the offset from current SP where H/w starts auto push
+       ;
+       ; Utilize the fact that Z bit is set if Intr taken in U mode
+       mov.nz  r9, sp
+       add.nz  r9, r9, SZ_PT_REGS - PT_sp - 4
+       bnz     1f
+
+       lr      r9, [AUX_USER_SP]
+1:
+       PUSH    r9      ; SP
+
+       PUSH    fp
+       PUSH    gp
+
+#ifdef CONFIG_ARC_CURR_IN_REG
+       PUSH    r25                     ; user_r25
+       GET_CURR_TASK_ON_CPU    r25
+#else
+       sub     sp, sp, 4
+#endif
+
+.ifnc \called_from, exception
+       sub     sp, sp, 12      ; BTA/ECR/orig_r0 placeholder per pt_regs
+.endif
+
+.endm
+
+/*------------------------------------------------------------------------*/
+.macro INTERRUPT_EPILOGUE      called_from
+
+.ifnc \called_from, exception
+       add     sp, sp, 12      ; skip BTA/ECR/orig_r0 placeholderss
+.endif
+
+#ifdef CONFIG_ARC_CURR_IN_REG
+       POP     r25
+#else
+       add     sp, sp, 4
+#endif
+
+       POP     gp
+       POP     fp
+
+       ; Don't touch AUX_USER_SP if returning to K mode (Z bit set)
+       ; (Z bit set on K mode is inverse of INTERRUPT_PROLOGUE)
+       add.z   sp, sp, 4
+       bz      1f
+
+       POPAX   AUX_USER_SP
+1:
+       POP     r12
+
+.endm
+
+/*------------------------------------------------------------------------*/
+.macro EXCEPTION_PROLOGUE
+
+       ; Before jumping to Exception Vector, hardware micro-ops did following:
+       ;   1. SP auto-switched to kernel mode stack
+       ;   2. STATUS32.Z flag set to U mode at time of interrupt (U:1,K:0)
+       ;
+       ; Now manually save the complete reg file
+
+       PUSH    r9              ; freeup a register: slot of erstatus
+
+       PUSHAX  eret
+       sub     sp, sp, 12      ; skip JLI, LDI, EI
+       PUSH    lp_count
+       PUSHAX  lp_start
+       PUSHAX  lp_end
+       PUSH    blink
+
+       PUSH    r11
+       PUSH    r10
+
+       ld.as   r9,  [sp, 10]   ; load stashed r9 (status32 stack slot)
+       lr      r10, [erstatus]
+       st.as   r10, [sp, 10]   ; save status32 at it's right stack slot
+
+       PUSH    r9
+       PUSH    r8
+       PUSH    r7
+       PUSH    r6
+       PUSH    r5
+       PUSH    r4
+       PUSH    r3
+       PUSH    r2
+       PUSH    r1
+       PUSH    r0
+
+       ; -- for interrupts, regs above are auto-saved by h/w in that order --
+       ; Now do what ISR prologue does (manually save r12, sp, fp, gp, r25)
+       ;
+       ; Set Z flag if this was from U mode (expected by INTERRUPT_PROLOGUE)
+       ; Although H/w exception micro-ops do set Z flag for U mode (just like
+       ; for interrupts), it could get clobbered in case we soft land here from
+       ; a TLB Miss exception handler (tlbex.S)
+
+       and     r10, r10, STATUS_U_MASK
+       xor.f   0, r10, STATUS_U_MASK
+
+       INTERRUPT_PROLOGUE  exception
+
+       PUSHAX  erbta
+       PUSHAX  ecr             ; r9 contains ECR, expected by EV_Trap
+
+       PUSH    r0              ; orig_r0
+.endm
+
+/*------------------------------------------------------------------------*/
+.macro EXCEPTION_EPILOGUE
+
+       ; Assumes r0 has PT_status32
+       btst   r0, STATUS_U_BIT ; Z flag set if K, used in INTERRUPT_EPILOGUE
+
+       add     sp, sp, 8       ; orig_r0/ECR don't need restoring
+       POPAX   erbta
+
+       INTERRUPT_EPILOGUE  exception
+
+       POP     r0
+       POP     r1
+       POP     r2
+       POP     r3
+       POP     r4
+       POP     r5
+       POP     r6
+       POP     r7
+       POP     r8
+       POP     r9
+       POP     r10
+       POP     r11
+
+       POP     blink
+       POPAX   lp_end
+       POPAX   lp_start
+
+       POP     r9
+       mov     lp_count, r9
+
+       add     sp, sp, 12      ; skip JLI, LDI, EI
+       POPAX   eret
+       POPAX   erstatus
+
+       ld.as   r9, [sp, -12]   ; reload r9 which got clobbered
+.endm
+
+.macro FAKE_RET_FROM_EXCPN
+       lr      r9, [status32]
+       bic     r9, r9, (STATUS_U_MASK|STATUS_DE_MASK|STATUS_AE_MASK)
+       or      r9, r9, (STATUS_L_MASK|STATUS_IE_MASK)
+       kflag   r9
+.endm
+
+/* Get thread_info of "current" tsk */
+.macro GET_CURR_THR_INFO_FROM_SP  reg
+       bmskn \reg, sp, THREAD_SHIFT - 1
+.endm
+
+/* Get CPU-ID of this core */
+.macro  GET_CPU_ID  reg
+       lr  \reg, [identity]
+       xbfu \reg, \reg, 0xE8   /* 00111    01000 */
+                               /* M = 8-1  N = 8 */
+.endm
+
+#endif
diff --git a/arch/arc/include/asm/entry-compact.h b/arch/arc/include/asm/entry-compact.h
new file mode 100644 (file)
index 0000000..415443c
--- /dev/null
@@ -0,0 +1,307 @@
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Vineetg: March 2009 (Supporting 2 levels of Interrupts)
+ *  Stack switching code can no longer reliably rely on the fact that
+ *  if we are NOT in user mode, stack is switched to kernel mode.
+ *  e.g. L2 IRQ interrupted a L1 ISR which had not yet completed
+ *  it's prologue including stack switching from user mode
+ *
+ * Vineetg: Aug 28th 2008: Bug #94984
+ *  -Zero Overhead Loop Context shd be cleared when entering IRQ/EXcp/Trap
+ *   Normally CPU does this automatically, however when doing FAKE rtie,
+ *   we also need to explicitly do this. The problem in macros
+ *   FAKE_RET_FROM_EXCPN and FAKE_RET_FROM_EXCPN_LOCK_IRQ was that this bit
+ *   was being "CLEARED" rather then "SET". Actually "SET" clears ZOL context
+ *
+ * Vineetg: May 5th 2008
+ *  -Modified CALLEE_REG save/restore macros to handle the fact that
+ *      r25 contains the kernel current task ptr
+ *  - Defined Stack Switching Macro to be reused in all intr/excp hdlrs
+ *  - Shaved off 11 instructions from RESTORE_ALL_INT1 by using the
+ *      address Write back load ld.ab instead of seperate ld/add instn
+ *
+ * Amit Bhor, Sameer Dhavale: Codito Technologies 2004
+ */
+
+#ifndef __ASM_ARC_ENTRY_COMPACT_H
+#define __ASM_ARC_ENTRY_COMPACT_H
+
+#include <asm/asm-offsets.h>
+#include <asm/irqflags-compact.h>
+#include <asm/thread_info.h>   /* For THREAD_SIZE */
+
+/*--------------------------------------------------------------
+ * Switch to Kernel Mode stack if SP points to User Mode stack
+ *
+ * Entry   : r9 contains pre-IRQ/exception/trap status32
+ * Exit    : SP set to K mode stack
+ *           SP at the time of entry (K/U) saved @ pt_regs->sp
+ * Clobbers: r9
+ *-------------------------------------------------------------*/
+
+.macro SWITCH_TO_KERNEL_STK
+
+       /* User Mode when this happened ? Yes: Proceed to switch stack */
+       bbit1   r9, STATUS_U_BIT, 88f
+
+       /* OK we were already in kernel mode when this event happened, thus can
+        * assume SP is kernel mode SP. _NO_ need to do any stack switching
+        */
+
+#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS
+       /* However....
+        * If Level 2 Interrupts enabled, we may end up with a corner case:
+        * 1. User Task executing
+        * 2. L1 IRQ taken, ISR starts (CPU auto-switched to KERNEL mode)
+        * 3. But before it could switch SP from USER to KERNEL stack
+        *      a L2 IRQ "Interrupts" L1
+        * Thay way although L2 IRQ happened in Kernel mode, stack is still
+        * not switched.
+        * To handle this, we may need to switch stack even if in kernel mode
+        * provided SP has values in range of USER mode stack ( < 0x7000_0000 )
+        */
+       brlo sp, VMALLOC_START, 88f
+
+       /* TODO: vineetg:
+        * We need to be a bit more cautious here. What if a kernel bug in
+        * L1 ISR, caused SP to go whaco (some small value which looks like
+        * USER stk) and then we take L2 ISR.
+        * Above brlo alone would treat it as a valid L1-L2 sceanrio
+        * instead of shouting alound
+        * The only feasible way is to make sure this L2 happened in
+        * L1 prelogue ONLY i.e. ilink2 is less than a pre-set marker in
+        * L1 ISR before it switches stack
+        */
+
+#endif
+
+    /*------Intr/Ecxp happened in kernel mode, SP already setup ------ */
+       /* save it nevertheless @ pt_regs->sp for uniformity */
+
+       b.d     66f
+       st      sp, [sp, PT_sp - SZ_PT_REGS]
+
+88: /*------Intr/Ecxp happened in user mode, "switch" stack ------ */
+
+       GET_CURR_TASK_ON_CPU   r9
+
+       /* With current tsk in r9, get it's kernel mode stack base */
+       GET_TSK_STACK_BASE  r9, r9
+
+       /* save U mode SP @ pt_regs->sp */
+       st      sp, [r9, PT_sp - SZ_PT_REGS]
+
+       /* final SP switch */
+       mov     sp, r9
+66:
+.endm
+
+/*------------------------------------------------------------
+ * "FAKE" a rtie to return from CPU Exception context
+ * This is to re-enable Exceptions within exception
+ * Look at EV_ProtV to see how this is actually used
+ *-------------------------------------------------------------*/
+
+.macro FAKE_RET_FROM_EXCPN
+
+       ld  r9, [sp, PT_status32]
+       bic r9, r9, (STATUS_U_MASK|STATUS_DE_MASK)
+       bset  r9, r9, STATUS_L_BIT
+       sr  r9, [erstatus]
+       mov r9, 55f
+       sr  r9, [eret]
+
+       rtie
+55:
+.endm
+
+/*--------------------------------------------------------------
+ * For early Exception/ISR Prologue, a core reg is temporarily needed to
+ * code the rest of prolog (stack switching). This is done by stashing
+ * it to memory (non-SMP case) or SCRATCH0 Aux Reg (SMP).
+ *
+ * Before saving the full regfile - this reg is restored back, only
+ * to be saved again on kernel mode stack, as part of pt_regs.
+ *-------------------------------------------------------------*/
+.macro PROLOG_FREEUP_REG       reg, mem
+#ifdef CONFIG_SMP
+       sr  \reg, [ARC_REG_SCRATCH_DATA0]
+#else
+       st  \reg, [\mem]
+#endif
+.endm
+
+.macro PROLOG_RESTORE_REG      reg, mem
+#ifdef CONFIG_SMP
+       lr  \reg, [ARC_REG_SCRATCH_DATA0]
+#else
+       ld  \reg, [\mem]
+#endif
+.endm
+
+/*--------------------------------------------------------------
+ * Exception Entry prologue
+ * -Switches stack to K mode (if not already)
+ * -Saves the register file
+ *
+ * After this it is safe to call the "C" handlers
+ *-------------------------------------------------------------*/
+.macro EXCEPTION_PROLOGUE
+
+       /* Need at least 1 reg to code the early exception prologue */
+       PROLOG_FREEUP_REG r9, @ex_saved_reg1
+
+       /* U/K mode at time of exception (stack not switched if already K) */
+       lr  r9, [erstatus]
+
+       /* ARC700 doesn't provide auto-stack switching */
+       SWITCH_TO_KERNEL_STK
+
+#ifdef CONFIG_ARC_CURR_IN_REG
+       /* Treat r25 as scratch reg (save on stack) and load with "current" */
+       PUSH    r25
+       GET_CURR_TASK_ON_CPU   r25
+#else
+       sub     sp, sp, 4
+#endif
+
+       st.a    r0, [sp, -8]    /* orig_r0 needed for syscall (skip ECR slot) */
+       sub     sp, sp, 4       /* skip pt_regs->sp, already saved above */
+
+       /* Restore r9 used to code the early prologue */
+       PROLOG_RESTORE_REG  r9, @ex_saved_reg1
+
+       /* now we are ready to save the regfile */
+       SAVE_R0_TO_R12
+       PUSH    gp
+       PUSH    fp
+       PUSH    blink
+       PUSHAX  eret
+       PUSHAX  erstatus
+       PUSH    lp_count
+       PUSHAX  lp_end
+       PUSHAX  lp_start
+       PUSHAX  erbta
+
+       lr      r9, [ecr]
+       st      r9, [sp, PT_event]    /* EV_Trap expects r9 to have ECR */
+.endm
+
+/*--------------------------------------------------------------
+ * Restore all registers used by system call or Exceptions
+ * SP should always be pointing to the next free stack element
+ * when entering this macro.
+ *
+ * NOTE:
+ *
+ * It is recommended that lp_count/ilink1/ilink2 not be used as a dest reg
+ * for memory load operations. If used in that way interrupts are deffered
+ * by hardware and that is not good.
+ *-------------------------------------------------------------*/
+.macro EXCEPTION_EPILOGUE
+       POPAX   erbta
+       POPAX   lp_start
+       POPAX   lp_end
+
+       POP     r9
+       mov     lp_count, r9    ;LD to lp_count is not allowed
+
+       POPAX   erstatus
+       POPAX   eret
+       POP     blink
+       POP     fp
+       POP     gp
+       RESTORE_R12_TO_R0
+
+       ld  sp, [sp] /* restore original sp */
+       /* orig_r0, ECR, user_r25 skipped automatically */
+.endm
+
+/* Dummy ECR values for Interrupts */
+#define event_IRQ1             0x0031abcd
+#define event_IRQ2             0x0032abcd
+
+.macro INTERRUPT_PROLOGUE  LVL
+
+       /* free up r9 as scratchpad */
+       PROLOG_FREEUP_REG r9, @int\LVL\()_saved_reg
+
+       /* Which mode (user/kernel) was the system in when intr occured */
+       lr  r9, [status32_l\LVL\()]
+
+       SWITCH_TO_KERNEL_STK
+
+#ifdef CONFIG_ARC_CURR_IN_REG
+       /* Treat r25 as scratch reg (save on stack) and load with "current" */
+       PUSH    r25
+       GET_CURR_TASK_ON_CPU   r25
+#else
+       sub     sp, sp, 4
+#endif
+
+       PUSH    0x003\LVL\()abcd    /* Dummy ECR */
+       sub     sp, sp, 8           /* skip orig_r0 (not needed)
+                                      skip pt_regs->sp, already saved above */
+
+       /* Restore r9 used to code the early prologue */
+       PROLOG_RESTORE_REG  r9, @int\LVL\()_saved_reg
+
+       SAVE_R0_TO_R12
+       PUSH    gp
+       PUSH    fp
+       PUSH    blink
+       PUSH    ilink\LVL\()
+       PUSHAX  status32_l\LVL\()
+       PUSH    lp_count
+       PUSHAX  lp_end
+       PUSHAX  lp_start
+       PUSHAX  bta_l\LVL\()
+.endm
+
+/*--------------------------------------------------------------
+ * Restore all registers used by interrupt handlers.
+ *
+ * NOTE:
+ *
+ * It is recommended that lp_count/ilink1/ilink2 not be used as a dest reg
+ * for memory load operations. If used in that way interrupts are deffered
+ * by hardware and that is not good.
+ *-------------------------------------------------------------*/
+.macro INTERRUPT_EPILOGUE  LVL
+       POPAX   bta_l\LVL\()
+       POPAX   lp_start
+       POPAX   lp_end
+
+       POP     r9
+       mov     lp_count, r9    ;LD to lp_count is not allowed
+
+       POPAX   status32_l\LVL\()
+       POP     ilink\LVL\()
+       POP     blink
+       POP     fp
+       POP     gp
+       RESTORE_R12_TO_R0
+
+       ld  sp, [sp] /* restore original sp */
+       /* orig_r0, ECR, user_r25 skipped automatically */
+.endm
+
+/* Get thread_info of "current" tsk */
+.macro GET_CURR_THR_INFO_FROM_SP  reg
+       bic \reg, sp, (THREAD_SIZE - 1)
+.endm
+
+/* Get CPU-ID of this core */
+.macro  GET_CPU_ID  reg
+       lr  \reg, [identity]
+       lsr \reg, \reg, 8
+       bmsk \reg, \reg, 7
+.endm
+
+#endif  /* __ASM_ARC_ENTRY_COMPACT_H */
index 884081099f800fd6b4ba133bc1f1746084845c49..ad7860c5ce153c731264f770067e653b70568fd5 100644 (file)
@@ -1,45 +1,27 @@
 /*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
  * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
- *
- * Vineetg: March 2009 (Supporting 2 levels of Interrupts)
- *  Stack switching code can no longer reliably rely on the fact that
- *  if we are NOT in user mode, stack is switched to kernel mode.
- *  e.g. L2 IRQ interrupted a L1 ISR which had not yet completed
- *  it's prologue including stack switching from user mode
- *
- * Vineetg: Aug 28th 2008: Bug #94984
- *  -Zero Overhead Loop Context shd be cleared when entering IRQ/EXcp/Trap
- *   Normally CPU does this automatically, however when doing FAKE rtie,
- *   we also need to explicitly do this. The problem in macros
- *   FAKE_RET_FROM_EXCPN and FAKE_RET_FROM_EXCPN_LOCK_IRQ was that this bit
- *   was being "CLEARED" rather then "SET". Actually "SET" clears ZOL context
- *
- * Vineetg: May 5th 2008
- *  -Modified CALLEE_REG save/restore macros to handle the fact that
- *      r25 contains the kernel current task ptr
- *  - Defined Stack Switching Macro to be reused in all intr/excp hdlrs
- *  - Shaved off 11 instructions from RESTORE_ALL_INT1 by using the
- *      address Write back load ld.ab instead of seperate ld/add instn
- *
- * Amit Bhor, Sameer Dhavale: Codito Technologies 2004
  */
 
 #ifndef __ASM_ARC_ENTRY_H
 #define __ASM_ARC_ENTRY_H
 
-#ifdef __ASSEMBLY__
 #include <asm/unistd.h>                /* For NR_syscalls defination */
-#include <asm/asm-offsets.h>
 #include <asm/arcregs.h>
 #include <asm/ptrace.h>
 #include <asm/processor.h>     /* For VMALLOC_START */
-#include <asm/thread_info.h>   /* For THREAD_SIZE */
 #include <asm/mmu.h>
 
+#ifdef CONFIG_ISA_ARCOMPACT
+#include <asm/entry-compact.h> /* ISA specific bits */
+#else
+#include <asm/entry-arcv2.h>
+#endif
+
 /* Note on the LD/ST addr modes with addr reg wback
  *
  * LD.a same as LD.aw
        POP     r13
 .endm
 
-#define OFF_USER_R25_FROM_R24  (SZ_CALLEE_REGS + SZ_PT_REGS - 8)/4
-
 /*--------------------------------------------------------------
  * Collect User Mode callee regs as struct callee_regs - needed by
  * fork/do_signal/unaligned-access-emulation.
  *-------------------------------------------------------------*/
 .macro SAVE_CALLEE_SAVED_USER
 
+       mov     r12, sp         ; save SP as ref to pt_regs
        SAVE_R13_TO_R24
 
 #ifdef CONFIG_ARC_CURR_IN_REG
-       ; Retrieve orig r25 and save it on stack
-       ld.as   r12, [sp, OFF_USER_R25_FROM_R24]
-       st.a    r12, [sp, -4]
+       ; Retrieve orig r25 and save it with rest of callee_regs
+       ld.as   r12, [r12, PT_user_r25]
+       PUSH    r12
 #else
        PUSH    r25
 #endif
 .macro RESTORE_CALLEE_SAVED_USER
 
 #ifdef CONFIG_ARC_CURR_IN_REG
-       ld.ab   r12, [sp, 4]
-       st.as   r12, [sp, OFF_USER_R25_FROM_R24]
+       POP     r12
 #else
        POP     r25
 #endif
        RESTORE_R24_TO_R13
+
+       ; SP is back to start of pt_regs
+#ifdef CONFIG_ARC_CURR_IN_REG
+       st.as   r12, [sp, PT_user_r25]
+#endif
 .endm
 
 /*--------------------------------------------------------------
 
 .endm
 
-/*--------------------------------------------------------------
- * Switch to Kernel Mode stack if SP points to User Mode stack
- *
- * Entry   : r9 contains pre-IRQ/exception/trap status32
- * Exit    : SP is set to kernel mode stack pointer
- *           If CURR_IN_REG, r25 set to "current" task pointer
- * Clobbers: r9
- *-------------------------------------------------------------*/
-
-.macro SWITCH_TO_KERNEL_STK
-
-       /* User Mode when this happened ? Yes: Proceed to switch stack */
-       bbit1   r9, STATUS_U_BIT, 88f
-
-       /* OK we were already in kernel mode when this event happened, thus can
-        * assume SP is kernel mode SP. _NO_ need to do any stack switching
-        */
-
-#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS
-       /* However....
-        * If Level 2 Interrupts enabled, we may end up with a corner case:
-        * 1. User Task executing
-        * 2. L1 IRQ taken, ISR starts (CPU auto-switched to KERNEL mode)
-        * 3. But before it could switch SP from USER to KERNEL stack
-        *      a L2 IRQ "Interrupts" L1
-        * Thay way although L2 IRQ happened in Kernel mode, stack is still
-        * not switched.
-        * To handle this, we may need to switch stack even if in kernel mode
-        * provided SP has values in range of USER mode stack ( < 0x7000_0000 )
-        */
-       brlo sp, VMALLOC_START, 88f
-
-       /* TODO: vineetg:
-        * We need to be a bit more cautious here. What if a kernel bug in
-        * L1 ISR, caused SP to go whaco (some small value which looks like
-        * USER stk) and then we take L2 ISR.
-        * Above brlo alone would treat it as a valid L1-L2 sceanrio
-        * instead of shouting alound
-        * The only feasible way is to make sure this L2 happened in
-        * L1 prelogue ONLY i.e. ilink2 is less than a pre-set marker in
-        * L1 ISR before it switches stack
-        */
-
-#endif
-
-       /* Save Pre Intr/Exception KERNEL MODE SP on kernel stack
-        * safe-keeping not really needed, but it keeps the epilogue code
-        * (SP restore) simpler/uniform.
-        */
-       b.d     66f
-       mov     r9, sp
-
-88: /*------Intr/Ecxp happened in user mode, "switch" stack ------ */
-
-       GET_CURR_TASK_ON_CPU   r9
-
-       /* With current tsk in r9, get it's kernel mode stack base */
-       GET_TSK_STACK_BASE  r9, r9
-
-66:
-#ifdef CONFIG_ARC_CURR_IN_REG
-       /*
-        * Treat r25 as scratch reg, save it on stack first
-        * Load it with current task pointer
-        */
-       st      r25, [r9, -4]
-       GET_CURR_TASK_ON_CPU   r25
-#endif
-
-       /* Save Pre Intr/Exception User SP on kernel stack */
-       st.a    sp, [r9, -16]   ; Make room for orig_r0, ECR, user_r25
-
-       /* CAUTION:
-        * SP should be set at the very end when we are done with everything
-        * In case of 2 levels of interrupt we depend on value of SP to assume
-        * that everything else is done (loading r25 etc)
-        */
-
-       /* set SP to point to kernel mode stack */
-       mov sp, r9
-
-       /* ----- Stack Switched to kernel Mode, Now save REG FILE ----- */
-
-.endm
-
-/*------------------------------------------------------------
- * "FAKE" a rtie to return from CPU Exception context
- * This is to re-enable Exceptions within exception
- * Look at EV_ProtV to see how this is actually used
- *-------------------------------------------------------------*/
-
-.macro FAKE_RET_FROM_EXCPN  reg
-
-       ld  \reg, [sp, PT_status32]
-       bic  \reg, \reg, (STATUS_U_MASK|STATUS_DE_MASK)
-       bset \reg, \reg, STATUS_L_BIT
-       sr  \reg, [erstatus]
-       mov \reg, 55f
-       sr  \reg, [eret]
-
-       rtie
-55:
-.endm
-
-/*
- * @reg [OUT] &thread_info of "current"
- */
-.macro GET_CURR_THR_INFO_FROM_SP  reg
-       bic \reg, sp, (THREAD_SIZE - 1)
-.endm
-
 /*
  * @reg [OUT] thread_info->flags of "current"
  */
        ld  \reg, [\reg, THREAD_INFO_FLAGS]
 .endm
 
-/*--------------------------------------------------------------
- * For early Exception Prologue, a core reg is temporarily needed to
- * code the rest of prolog (stack switching). This is done by stashing
- * it to memory (non-SMP case) or SCRATCH0 Aux Reg (SMP).
- *
- * Before saving the full regfile - this reg is restored back, only
- * to be saved again on kernel mode stack, as part of pt_regs.
- *-------------------------------------------------------------*/
-.macro EXCPN_PROLOG_FREEUP_REG reg
-#ifdef CONFIG_SMP
-       sr  \reg, [ARC_REG_SCRATCH_DATA0]
-#else
-       st  \reg, [@ex_saved_reg1]
-#endif
-.endm
-
-.macro EXCPN_PROLOG_RESTORE_REG        reg
-#ifdef CONFIG_SMP
-       lr  \reg, [ARC_REG_SCRATCH_DATA0]
-#else
-       ld  \reg, [@ex_saved_reg1]
-#endif
-.endm
-
-/*--------------------------------------------------------------
- * Exception Entry prologue
- * -Switches stack to K mode (if not already)
- * -Saves the register file
- *
- * After this it is safe to call the "C" handlers
- *-------------------------------------------------------------*/
-.macro EXCEPTION_PROLOGUE
-
-       /* Need at least 1 reg to code the early exception prologue */
-       EXCPN_PROLOG_FREEUP_REG r9
-
-       /* U/K mode at time of exception (stack not switched if already K) */
-       lr  r9, [erstatus]
-
-       /* ARC700 doesn't provide auto-stack switching */
-       SWITCH_TO_KERNEL_STK
-
-       /* save the regfile */
-       SAVE_ALL_SYS
-.endm
-
-/*--------------------------------------------------------------
- * Save all registers used by Exceptions (TLB Miss, Prot-V, Mem err etc)
- * Requires SP to be already switched to kernel mode Stack
- * sp points to the next free element on the stack at exit of this macro.
- * Registers are pushed / popped in the order defined in struct ptregs
- * in asm/ptrace.h
- * Note that syscalls are implemented via TRAP which is also a exception
- * from CPU's point of view
- *-------------------------------------------------------------*/
-.macro SAVE_ALL_SYS
-
-       lr      r9, [ecr]
-       st      r9, [sp, 8]    /* ECR */
-       st      r0, [sp, 4]    /* orig_r0, needed only for sys calls */
-
-       /* Restore r9 used to code the early prologue */
-       EXCPN_PROLOG_RESTORE_REG  r9
-
-       SAVE_R0_TO_R12
-       PUSH    gp
-       PUSH    fp
-       PUSH    blink
-       PUSHAX  eret
-       PUSHAX  erstatus
-       PUSH    lp_count
-       PUSHAX  lp_end
-       PUSHAX  lp_start
-       PUSHAX  erbta
-.endm
-
-/*--------------------------------------------------------------
- * Restore all registers used by system call or Exceptions
- * SP should always be pointing to the next free stack element
- * when entering this macro.
- *
- * NOTE:
- *
- * It is recommended that lp_count/ilink1/ilink2 not be used as a dest reg
- * for memory load operations. If used in that way interrupts are deffered
- * by hardware and that is not good.
- *-------------------------------------------------------------*/
-.macro RESTORE_ALL_SYS
-       POPAX   erbta
-       POPAX   lp_start
-       POPAX   lp_end
-
-       POP     r9
-       mov     lp_count, r9    ;LD to lp_count is not allowed
-
-       POPAX   erstatus
-       POPAX   eret
-       POP     blink
-       POP     fp
-       POP     gp
-       RESTORE_R12_TO_R0
-
-       ld  sp, [sp] /* restore original sp */
-       /* orig_r0, ECR, user_r25 skipped automatically */
-.endm
-
-
-/*--------------------------------------------------------------
- * Save all registers used by interrupt handlers.
- *-------------------------------------------------------------*/
-.macro SAVE_ALL_INT1
-
-       /* restore original r9 to be saved as part of reg-file */
-#ifdef CONFIG_SMP
-       lr  r9, [ARC_REG_SCRATCH_DATA0]
-#else
-       ld  r9, [@int1_saved_reg]
-#endif
-
-       /* now we are ready to save the remaining context :) */
-       st      event_IRQ1, [sp, 8]    /* Dummy ECR */
-       st      0, [sp, 4]    /* orig_r0 , N/A for IRQ */
-
-       SAVE_R0_TO_R12
-       PUSH    gp
-       PUSH    fp
-       PUSH    blink
-       PUSH    ilink1
-       PUSHAX  status32_l1
-       PUSH    lp_count
-       PUSHAX  lp_end
-       PUSHAX  lp_start
-       PUSHAX  bta_l1
-.endm
-
-.macro SAVE_ALL_INT2
-
-       /* TODO-vineetg: SMP we can't use global nor can we use
-       *   SCRATCH0 as we do for int1 because while int1 is using
-       *   it, int2 can come
-       */
-       /* retsore original r9 , saved in sys_saved_r9 */
-       ld  r9, [@int2_saved_reg]
-
-       /* now we are ready to save the remaining context :) */
-       st      event_IRQ2, [sp, 8]    /* Dummy ECR */
-       st      0, [sp, 4]    /* orig_r0 , N/A for IRQ */
-
-       SAVE_R0_TO_R12
-       PUSH    gp
-       PUSH    fp
-       PUSH    blink
-       PUSH    ilink2
-       PUSHAX  status32_l2
-       PUSH    lp_count
-       PUSHAX  lp_end
-       PUSHAX  lp_start
-       PUSHAX  bta_l2
-.endm
-
-/*--------------------------------------------------------------
- * Restore all registers used by interrupt handlers.
- *
- * NOTE:
- *
- * It is recommended that lp_count/ilink1/ilink2 not be used as a dest reg
- * for memory load operations. If used in that way interrupts are deffered
- * by hardware and that is not good.
- *-------------------------------------------------------------*/
-
-.macro RESTORE_ALL_INT1
-       POPAX   bta_l1
-       POPAX   lp_start
-       POPAX   lp_end
-
-       POP     r9
-       mov     lp_count, r9    ;LD to lp_count is not allowed
-
-       POPAX   status32_l1
-       POP     ilink1
-       POP     blink
-       POP     fp
-       POP     gp
-       RESTORE_R12_TO_R0
-
-       ld  sp, [sp] /* restore original sp */
-       /* orig_r0, ECR, user_r25 skipped automatically */
-.endm
-
-.macro RESTORE_ALL_INT2
-       POPAX   bta_l2
-       POPAX   lp_start
-       POPAX   lp_end
-
-       POP     r9
-       mov     lp_count, r9    ;LD to lp_count is not allowed
-
-       POPAX   status32_l2
-       POP     ilink2
-       POP     blink
-       POP     fp
-       POP     gp
-       RESTORE_R12_TO_R0
-
-       ld  sp, [sp] /* restore original sp */
-       /* orig_r0, ECR, user_r25 skipped automatically */
-.endm
-
-
-/* Get CPU-ID of this core */
-.macro  GET_CPU_ID  reg
-       lr  \reg, [identity]
-       lsr \reg, \reg, 8
-       bmsk \reg, \reg, 7
-.endm
-
 #ifdef CONFIG_SMP
 
 /*-------------------------------------------------
 
 #endif /* CONFIG_ARC_CURR_IN_REG */
 
-#endif  /* __ASSEMBLY__ */
-
 #endif  /* __ASM_ARC_ENTRY_H */
index 7cc4ced5dbf4e4894c6b7d594a9810df74ab26d0..694ece8a024372bef7fd24b3ccb1e9dc323b4afe 100644 (file)
@@ -99,9 +99,45 @@ static inline void __raw_writel(u32 w, volatile void __iomem *addr)
 
 }
 
-#define readb_relaxed readb
-#define readw_relaxed readw
-#define readl_relaxed readl
+#ifdef CONFIG_ISA_ARCV2
+#include <asm/barrier.h>
+#define __iormb()              rmb()
+#define __iowmb()              wmb()
+#else
+#define __iormb()              do { } while (0)
+#define __iowmb()              do { } while (0)
+#endif
+
+/*
+ * MMIO can also get buffered/optimized in micro-arch, so barriers needed
+ * Based on ARM model for the typical use case
+ *
+ *     <ST [DMA buffer]>
+ *     <writel MMIO "go" reg>
+ *  or:
+ *     <readl MMIO "status" reg>
+ *     <LD [DMA buffer]>
+ *
+ * http://lkml.kernel.org/r/20150622133656.GG1583@arm.com
+ */
+#define readb(c)               ({ u8  __v = readb_relaxed(c); __iormb(); __v; })
+#define readw(c)               ({ u16 __v = readw_relaxed(c); __iormb(); __v; })
+#define readl(c)               ({ u32 __v = readl_relaxed(c); __iormb(); __v; })
+
+#define writeb(v,c)            ({ __iowmb(); writeb_relaxed(v,c); })
+#define writew(v,c)            ({ __iowmb(); writew_relaxed(v,c); })
+#define writel(v,c)            ({ __iowmb(); writel_relaxed(v,c); })
+
+/*
+ * Relaxed API for drivers which can handle any ordering themselves
+ */
+#define readb_relaxed(c)       __raw_readb(c)
+#define readw_relaxed(c)       __raw_readw(c)
+#define readl_relaxed(c)       __raw_readl(c)
+
+#define writeb_relaxed(v,c)    __raw_writeb(v,c)
+#define writew_relaxed(v,c)    __raw_writew(v,c)
+#define writel_relaxed(v,c)    __raw_writel(v,c)
 
 #include <asm-generic/io.h>
 
index f38652fb2ed772566b0a47cf51a01183a329004c..bc51036373261c6068292830adae89a80b5d0c9c 100644 (file)
 #define NR_IRQS                128 /* allow some CPU external IRQ handling */
 
 /* Platform Independent IRQs */
+#ifdef CONFIG_ISA_ARCOMPACT
 #define TIMER0_IRQ      3
 #define TIMER1_IRQ      4
+#else
+#define TIMER0_IRQ      16
+#define TIMER1_IRQ      17
+#define IPI_IRQ         19
+#endif
 
 #include <linux/interrupt.h>
 #include <asm-generic/irq.h>
diff --git a/arch/arc/include/asm/irqflags-arcv2.h b/arch/arc/include/asm/irqflags-arcv2.h
new file mode 100644 (file)
index 0000000..ad481c2
--- /dev/null
@@ -0,0 +1,124 @@
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_IRQFLAGS_ARCV2_H
+#define __ASM_IRQFLAGS_ARCV2_H
+
+#include <asm/arcregs.h>
+
+/* status32 Bits */
+#define STATUS_AD_BIT  19   /* Disable Align chk: core supports non-aligned */
+#define STATUS_IE_BIT  31
+
+#define STATUS_AD_MASK         (1<<STATUS_AD_BIT)
+#define STATUS_IE_MASK         (1<<STATUS_IE_BIT)
+
+#define AUX_USER_SP            0x00D
+#define AUX_IRQ_CTRL           0x00E
+#define AUX_IRQ_ACT            0x043   /* Active Intr across all levels */
+#define AUX_IRQ_LVL_PEND       0x200   /* Pending Intr across all levels */
+#define AUX_IRQ_PRIORITY       0x206
+#define ICAUSE                 0x40a
+#define AUX_IRQ_SELECT         0x40b
+#define AUX_IRQ_ENABLE         0x40c
+
+/* Was Intr taken in User Mode */
+#define AUX_IRQ_ACT_BIT_U      31
+
+/* 0 is highest level, but taken by FIRQs, if present in design */
+#define ARCV2_IRQ_DEF_PRIO             0
+
+/* seed value for status register */
+#define ISA_INIT_STATUS_BITS   (STATUS_IE_MASK | STATUS_AD_MASK | \
+                                       (ARCV2_IRQ_DEF_PRIO << 1))
+
+#ifndef __ASSEMBLY__
+
+/*
+ * Save IRQ state and disable IRQs
+ */
+static inline long arch_local_irq_save(void)
+{
+       unsigned long flags;
+
+       __asm__ __volatile__("  clri %0 \n" : "=r" (flags) : : "memory");
+
+       return flags;
+}
+
+/*
+ * restore saved IRQ state
+ */
+static inline void arch_local_irq_restore(unsigned long flags)
+{
+       __asm__ __volatile__("  seti %0 \n" : : "r" (flags) : "memory");
+}
+
+/*
+ * Unconditionally Enable IRQs
+ */
+static inline void arch_local_irq_enable(void)
+{
+       unsigned int irqact = read_aux_reg(AUX_IRQ_ACT);
+
+       if (irqact & 0xffff)
+               write_aux_reg(AUX_IRQ_ACT, irqact & ~0xffff);
+
+       __asm__ __volatile__("  seti    \n" : : : "memory");
+}
+
+/*
+ * Unconditionally Disable IRQs
+ */
+static inline void arch_local_irq_disable(void)
+{
+       __asm__ __volatile__("  clri    \n" : : : "memory");
+}
+
+/*
+ * save IRQ state
+ */
+static inline long arch_local_save_flags(void)
+{
+       unsigned long temp;
+
+       __asm__ __volatile__(
+       "       lr  %0, [status32]      \n"
+       : "=&r"(temp)
+       :
+       : "memory");
+
+       return temp;
+}
+
+/*
+ * Query IRQ state
+ */
+static inline int arch_irqs_disabled_flags(unsigned long flags)
+{
+       return !(flags & (STATUS_IE_MASK));
+}
+
+static inline int arch_irqs_disabled(void)
+{
+       return arch_irqs_disabled_flags(arch_local_save_flags());
+}
+
+#else
+
+.macro IRQ_DISABLE  scratch
+       clri
+.endm
+
+.macro IRQ_ENABLE  scratch
+       seti
+.endm
+
+#endif /* __ASSEMBLY__ */
+
+#endif
diff --git a/arch/arc/include/asm/irqflags-compact.h b/arch/arc/include/asm/irqflags-compact.h
new file mode 100644 (file)
index 0000000..aa80557
--- /dev/null
@@ -0,0 +1,183 @@
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_IRQFLAGS_ARCOMPACT_H
+#define __ASM_IRQFLAGS_ARCOMPACT_H
+
+/* vineetg: March 2010 : local_irq_save( ) optimisation
+ *  -Remove explicit mov of current status32 into reg, that is not needed
+ *  -Use BIC  insn instead of INVERTED + AND
+ *  -Conditionally disable interrupts (if they are not enabled, don't disable)
+*/
+
+#include <asm/arcregs.h>
+
+/* status32 Reg bits related to Interrupt Handling */
+#define STATUS_E1_BIT          1       /* Int 1 enable */
+#define STATUS_E2_BIT          2       /* Int 2 enable */
+#define STATUS_A1_BIT          3       /* Int 1 active */
+#define STATUS_A2_BIT          4       /* Int 2 active */
+
+#define STATUS_E1_MASK         (1<<STATUS_E1_BIT)
+#define STATUS_E2_MASK         (1<<STATUS_E2_BIT)
+#define STATUS_A1_MASK         (1<<STATUS_A1_BIT)
+#define STATUS_A2_MASK         (1<<STATUS_A2_BIT)
+#define STATUS_IE_MASK         (STATUS_E1_MASK | STATUS_E2_MASK)
+
+/* Other Interrupt Handling related Aux regs */
+#define AUX_IRQ_LEV            0x200   /* IRQ Priority: L1 or L2 */
+#define AUX_IRQ_HINT           0x201   /* For generating Soft Interrupts */
+#define AUX_IRQ_LV12           0x43    /* interrupt level register */
+
+#define AUX_IENABLE            0x40c
+#define AUX_ITRIGGER           0x40d
+#define AUX_IPULSE             0x415
+
+#define ISA_INIT_STATUS_BITS   STATUS_IE_MASK
+
+#ifndef __ASSEMBLY__
+
+/******************************************************************
+ * IRQ Control Macros
+ *
+ * All of them have "memory" clobber (compiler barrier) which is needed to
+ * ensure that LD/ST requiring irq safetly (R-M-W when LLSC is not available)
+ * are redone after IRQs are re-enabled (and gcc doesn't reuse stale register)
+ *
+ * Noted at the time of Abilis Timer List corruption
+ *     Orig Bug + Rejected solution    : https://lkml.org/lkml/2013/3/29/67
+ *     Reasoning                       : https://lkml.org/lkml/2013/4/8/15
+ *
+ ******************************************************************/
+
+/*
+ * Save IRQ state and disable IRQs
+ */
+static inline long arch_local_irq_save(void)
+{
+       unsigned long temp, flags;
+
+       __asm__ __volatile__(
+       "       lr  %1, [status32]      \n"
+       "       bic %0, %1, %2          \n"
+       "       and.f 0, %1, %2 \n"
+       "       flag.nz %0              \n"
+       : "=r"(temp), "=r"(flags)
+       : "n"((STATUS_E1_MASK | STATUS_E2_MASK))
+       : "memory", "cc");
+
+       return flags;
+}
+
+/*
+ * restore saved IRQ state
+ */
+static inline void arch_local_irq_restore(unsigned long flags)
+{
+
+       __asm__ __volatile__(
+       "       flag %0                 \n"
+       :
+       : "r"(flags)
+       : "memory");
+}
+
+/*
+ * Unconditionally Enable IRQs
+ */
+extern void arch_local_irq_enable(void);
+
+/*
+ * Unconditionally Disable IRQs
+ */
+static inline void arch_local_irq_disable(void)
+{
+       unsigned long temp;
+
+       __asm__ __volatile__(
+       "       lr  %0, [status32]      \n"
+       "       and %0, %0, %1          \n"
+       "       flag %0                 \n"
+       : "=&r"(temp)
+       : "n"(~(STATUS_E1_MASK | STATUS_E2_MASK))
+       : "memory");
+}
+
+/*
+ * save IRQ state
+ */
+static inline long arch_local_save_flags(void)
+{
+       unsigned long temp;
+
+       __asm__ __volatile__(
+       "       lr  %0, [status32]      \n"
+       : "=&r"(temp)
+       :
+       : "memory");
+
+       return temp;
+}
+
+/*
+ * Query IRQ state
+ */
+static inline int arch_irqs_disabled_flags(unsigned long flags)
+{
+       return !(flags & (STATUS_E1_MASK
+#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS
+                       | STATUS_E2_MASK
+#endif
+               ));
+}
+
+static inline int arch_irqs_disabled(void)
+{
+       return arch_irqs_disabled_flags(arch_local_save_flags());
+}
+
+#else
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+
+.macro TRACE_ASM_IRQ_DISABLE
+       bl      trace_hardirqs_off
+.endm
+
+.macro TRACE_ASM_IRQ_ENABLE
+       bl      trace_hardirqs_on
+.endm
+
+#else
+
+.macro TRACE_ASM_IRQ_DISABLE
+.endm
+
+.macro TRACE_ASM_IRQ_ENABLE
+.endm
+
+#endif
+
+.macro IRQ_DISABLE  scratch
+       lr      \scratch, [status32]
+       bic     \scratch, \scratch, (STATUS_E1_MASK | STATUS_E2_MASK)
+       flag    \scratch
+       TRACE_ASM_IRQ_DISABLE
+.endm
+
+.macro IRQ_ENABLE  scratch
+       lr      \scratch, [status32]
+       or      \scratch, \scratch, (STATUS_E1_MASK | STATUS_E2_MASK)
+       flag    \scratch
+       TRACE_ASM_IRQ_ENABLE
+.endm
+
+#endif /* __ASSEMBLY__ */
+
+#endif
index 27ecc6975a5845dee5960197bded8edeff261cbc..59bc6a64f75da77c9cf1972ba35625d74651d51e 100644 (file)
@@ -1,4 +1,5 @@
 /*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
  * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
  *
  * This program is free software; you can redistribute it and/or modify
 #ifndef __ASM_ARC_IRQFLAGS_H
 #define __ASM_ARC_IRQFLAGS_H
 
-/* vineetg: March 2010 : local_irq_save( ) optimisation
- *  -Remove explicit mov of current status32 into reg, that is not needed
- *  -Use BIC  insn instead of INVERTED + AND
- *  -Conditionally disable interrupts (if they are not enabled, don't disable)
-*/
-
-#include <asm/arcregs.h>
-
-/* status32 Reg bits related to Interrupt Handling */
-#define STATUS_E1_BIT          1       /* Int 1 enable */
-#define STATUS_E2_BIT          2       /* Int 2 enable */
-#define STATUS_A1_BIT          3       /* Int 1 active */
-#define STATUS_A2_BIT          4       /* Int 2 active */
-
-#define STATUS_E1_MASK         (1<<STATUS_E1_BIT)
-#define STATUS_E2_MASK         (1<<STATUS_E2_BIT)
-#define STATUS_A1_MASK         (1<<STATUS_A1_BIT)
-#define STATUS_A2_MASK         (1<<STATUS_A2_BIT)
-
-/* Other Interrupt Handling related Aux regs */
-#define AUX_IRQ_LEV            0x200   /* IRQ Priority: L1 or L2 */
-#define AUX_IRQ_HINT           0x201   /* For generating Soft Interrupts */
-#define AUX_IRQ_LV12           0x43    /* interrupt level register */
-
-#define AUX_IENABLE            0x40c
-#define AUX_ITRIGGER           0x40d
-#define AUX_IPULSE             0x415
-
-#ifndef __ASSEMBLY__
-
-/******************************************************************
- * IRQ Control Macros
- *
- * All of them have "memory" clobber (compiler barrier) which is needed to
- * ensure that LD/ST requiring irq safetly (R-M-W when LLSC is not available)
- * are redone after IRQs are re-enabled (and gcc doesn't reuse stale register)
- *
- * Noted at the time of Abilis Timer List corruption
- *     Orig Bug + Rejected solution    : https://lkml.org/lkml/2013/3/29/67
- *     Reasoning                       : https://lkml.org/lkml/2013/4/8/15
- *
- ******************************************************************/
-
-/*
- * Save IRQ state and disable IRQs
- */
-static inline long arch_local_irq_save(void)
-{
-       unsigned long temp, flags;
-
-       __asm__ __volatile__(
-       "       lr  %1, [status32]      \n"
-       "       bic %0, %1, %2          \n"
-       "       and.f 0, %1, %2 \n"
-       "       flag.nz %0              \n"
-       : "=r"(temp), "=r"(flags)
-       : "n"((STATUS_E1_MASK | STATUS_E2_MASK))
-       : "memory", "cc");
-
-       return flags;
-}
-
-/*
- * restore saved IRQ state
- */
-static inline void arch_local_irq_restore(unsigned long flags)
-{
-
-       __asm__ __volatile__(
-       "       flag %0                 \n"
-       :
-       : "r"(flags)
-       : "memory");
-}
-
-/*
- * Unconditionally Enable IRQs
- */
-extern void arch_local_irq_enable(void);
-
-/*
- * Unconditionally Disable IRQs
- */
-static inline void arch_local_irq_disable(void)
-{
-       unsigned long temp;
-
-       __asm__ __volatile__(
-       "       lr  %0, [status32]      \n"
-       "       and %0, %0, %1          \n"
-       "       flag %0                 \n"
-       : "=&r"(temp)
-       : "n"(~(STATUS_E1_MASK | STATUS_E2_MASK))
-       : "memory");
-}
-
-/*
- * save IRQ state
- */
-static inline long arch_local_save_flags(void)
-{
-       unsigned long temp;
-
-       __asm__ __volatile__(
-       "       lr  %0, [status32]      \n"
-       : "=&r"(temp)
-       :
-       : "memory");
-
-       return temp;
-}
-
-/*
- * Query IRQ state
- */
-static inline int arch_irqs_disabled_flags(unsigned long flags)
-{
-       return !(flags & (STATUS_E1_MASK
-#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS
-                       | STATUS_E2_MASK
-#endif
-               ));
-}
-
-static inline int arch_irqs_disabled(void)
-{
-       return arch_irqs_disabled_flags(arch_local_save_flags());
-}
-
-#else
-
-#ifdef CONFIG_TRACE_IRQFLAGS
-
-.macro TRACE_ASM_IRQ_DISABLE
-       bl      trace_hardirqs_off
-.endm
-
-.macro TRACE_ASM_IRQ_ENABLE
-       bl      trace_hardirqs_on
-.endm
-
+#ifdef CONFIG_ISA_ARCOMPACT
+#include <asm/irqflags-compact.h>
 #else
-
-.macro TRACE_ASM_IRQ_DISABLE
-.endm
-
-.macro TRACE_ASM_IRQ_ENABLE
-.endm
-
+#include <asm/irqflags-arcv2.h>
 #endif
 
-.macro IRQ_DISABLE  scratch
-       lr      \scratch, [status32]
-       bic     \scratch, \scratch, (STATUS_E1_MASK | STATUS_E2_MASK)
-       flag    \scratch
-       TRACE_ASM_IRQ_DISABLE
-.endm
-
-.macro IRQ_ENABLE  scratch
-       lr      \scratch, [status32]
-       or      \scratch, \scratch, (STATUS_E1_MASK | STATUS_E2_MASK)
-       flag    \scratch
-       TRACE_ASM_IRQ_ENABLE
-.endm
-
-#endif /* __ASSEMBLY__ */
-
 #endif
diff --git a/arch/arc/include/asm/mcip.h b/arch/arc/include/asm/mcip.h
new file mode 100644 (file)
index 0000000..52c11f0
--- /dev/null
@@ -0,0 +1,94 @@
+/*
+ * ARConnect IP Support (Multi core enabler: Cross core IPI, RTC ...)
+ *
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_MCIP_H
+#define __ASM_MCIP_H
+
+#ifdef CONFIG_ISA_ARCV2
+
+#include <asm/arcregs.h>
+
+#define ARC_REG_MCIP_BCR       0x0d0
+#define ARC_REG_MCIP_CMD       0x600
+#define ARC_REG_MCIP_WDATA     0x601
+#define ARC_REG_MCIP_READBACK  0x602
+
+struct mcip_cmd {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+       unsigned int pad:8, param:16, cmd:8;
+#else
+       unsigned int cmd:8, param:16, pad:8;
+#endif
+
+#define CMD_INTRPT_GENERATE_IRQ                0x01
+#define CMD_INTRPT_GENERATE_ACK                0x02
+#define CMD_INTRPT_READ_STATUS         0x03
+#define CMD_INTRPT_CHECK_SOURCE                0x04
+
+/* Semaphore Commands */
+#define CMD_SEMA_CLAIM_AND_READ                0x11
+#define CMD_SEMA_RELEASE               0x12
+
+#define CMD_DEBUG_SET_MASK             0x34
+#define CMD_DEBUG_SET_SELECT           0x36
+
+#define CMD_GRTC_READ_LO               0x42
+#define CMD_GRTC_READ_HI               0x43
+
+#define CMD_IDU_ENABLE                 0x71
+#define CMD_IDU_DISABLE                        0x72
+#define CMD_IDU_SET_MODE               0x74
+#define CMD_IDU_SET_DEST               0x76
+#define CMD_IDU_SET_MASK               0x7C
+
+#define IDU_M_TRIG_LEVEL               0x0
+#define IDU_M_TRIG_EDGE                        0x1
+
+#define IDU_M_DISTRI_RR                        0x0
+#define IDU_M_DISTRI_DEST              0x2
+};
+
+/*
+ * MCIP programming model
+ *
+ * - Simple commands write {cmd:8,param:16} to MCIP_CMD aux reg
+ *   (param could be irq, common_irq, core_id ...)
+ * - More involved commands setup MCIP_WDATA with cmd specific data
+ *   before invoking the simple command
+ */
+static inline void __mcip_cmd(unsigned int cmd, unsigned int param)
+{
+       struct mcip_cmd buf;
+
+       buf.pad = 0;
+       buf.cmd = cmd;
+       buf.param = param;
+
+       WRITE_AUX(ARC_REG_MCIP_CMD, buf);
+}
+
+/*
+ * Setup additional data for a cmd
+ * Callers need to lock to ensure atomicity
+ */
+static inline void __mcip_cmd_data(unsigned int cmd, unsigned int param,
+                                  unsigned int data)
+{
+       write_aux_reg(ARC_REG_MCIP_WDATA, data);
+
+       __mcip_cmd(cmd, param);
+}
+
+extern void mcip_init_early_smp(void);
+extern void mcip_init_smp(unsigned int cpu);
+
+#endif
+
+#endif
index 8c84ae98c33767937ce52f33c8cab6daddabe278..0f9c3eb5327e4494f4a310e62e194c4457c08bea 100644 (file)
 #define CONFIG_ARC_MMU_VER 2
 #elif defined(CONFIG_ARC_MMU_V3)
 #define CONFIG_ARC_MMU_VER 3
+#elif defined(CONFIG_ARC_MMU_V4)
+#define CONFIG_ARC_MMU_VER 4
 #endif
 
 /* MMU Management regs */
 #define ARC_REG_MMU_BCR                0x06f
+#if (CONFIG_ARC_MMU_VER < 4)
 #define ARC_REG_TLBPD0         0x405
 #define ARC_REG_TLBPD1         0x406
 #define ARC_REG_TLBINDEX       0x407
 #define ARC_REG_TLBCOMMAND     0x408
 #define ARC_REG_PID            0x409
 #define ARC_REG_SCRATCH_DATA0  0x418
+#else
+#define ARC_REG_TLBPD0         0x460
+#define ARC_REG_TLBPD1         0x461
+#define ARC_REG_TLBINDEX       0x464
+#define ARC_REG_TLBCOMMAND     0x465
+#define ARC_REG_PID            0x468
+#define ARC_REG_SCRATCH_DATA0  0x46c
+#endif
 
 /* Bits in MMU PID register */
-#define MMU_ENABLE             (1 << 31)       /* Enable MMU for process */
+#define __TLB_ENABLE           (1 << 31)
+#define __PROG_ENABLE          (1 << 30)
+#define MMU_ENABLE             (__TLB_ENABLE | __PROG_ENABLE)
 
 /* Error code if probe fails */
 #define TLB_LKUP_ERR           0x80000000
 
+#if (CONFIG_ARC_MMU_VER < 4)
 #define TLB_DUP_ERR    (TLB_LKUP_ERR | 0x00000001)
+#else
+#define TLB_DUP_ERR    (TLB_LKUP_ERR | 0x40000000)
+#endif
 
 /* TLB Commands */
 #define TLBWrite    0x1
 #define TLBIVUTLB   0x6                /* explicitly inv uTLBs */
 #endif
 
+#if (CONFIG_ARC_MMU_VER >= 4)
+#define TLBInsertEntry 0x7
+#define TLBDeleteEntry 0x8
+#endif
+
 #ifndef __ASSEMBLY__
 
 typedef struct {
index 9615fe1701c60af212b1d20e0d626120956462e1..1281718802f7c8e4d3f71bdf50b3affd57fd66d6 100644 (file)
 #define _PAGE_READ          (1<<3)     /* Page has user read perm (H) */
 #define _PAGE_ACCESSED      (1<<4)     /* Page is accessed (S) */
 #define _PAGE_MODIFIED      (1<<5)     /* Page modified (dirty) (S) */
+
+#if (CONFIG_ARC_MMU_VER >= 4)
+#define _PAGE_WTHRU         (1<<7)     /* Page cache mode write-thru (H) */
+#endif
+
 #define _PAGE_GLOBAL        (1<<8)     /* Page is global (H) */
 #define _PAGE_PRESENT       (1<<9)     /* TLB entry is valid (H) */
+
+#if (CONFIG_ARC_MMU_VER >= 4)
+#define _PAGE_SZ            (1<<10)    /* Page Size indicator (H) */
+#endif
+
 #define _PAGE_SHARED_CODE   (1<<11)    /* Shared Code page with cmn vaddr
                                           usable for shared TLB entries (H) */
 #endif
index 52312cb5dbe21490b48e21343ab8f82b7eecfc0e..ee682d8e0213c5c6c2fac2d70f39dff23b15854a 100644 (file)
@@ -77,7 +77,7 @@ struct task_struct;
  */
 #define TSK_K_ESP(tsk)         (tsk->thread.ksp)
 
-#define TSK_K_REG(tsk, off)    (*((unsigned int *)(TSK_K_ESP(tsk) + \
+#define TSK_K_REG(tsk, off)    (*((unsigned long *)(TSK_K_ESP(tsk) + \
                                        sizeof(struct callee_regs) + off)))
 
 #define TSK_K_BLINK(tsk)       TSK_K_REG(tsk, 4)
@@ -100,29 +100,26 @@ extern unsigned int get_wchan(struct task_struct *p);
 
 #endif /* !__ASSEMBLY__ */
 
-/* Kernels Virtual memory area.
- * Unlike other architectures(MIPS, sh, cris ) ARC 700 does not have a
- * "kernel translated" region (like KSEG2 in MIPS). So we use a upper part
- * of the translated bottom 2GB for kernel virtual memory and protect
- * these pages from user accesses by disabling Ru, Eu and Wu.
+/*
+ * System Memory Map on ARC
+ *
+ * ---------------------------- (lower 2G, Translated) -------------------------
+ * 0x0000_0000         0x5FFF_FFFF     (user vaddr: TASK_SIZE)
+ * 0x6000_0000         0x6FFF_FFFF     (reserved gutter between U/K)
+ * 0x7000_0000         0x7FFF_FFFF     (kvaddr: vmalloc/modules/pkmap..)
+ *
+ * PAGE_OFFSET ---------------- (Upper 2G, Untranslated) -----------------------
+ * 0x8000_0000         0xBFFF_FFFF     (kernel direct mapped)
+ * 0xC000_0000         0xFFFF_FFFF     (peripheral uncached space)
+ * -----------------------------------------------------------------------------
  */
-#define VMALLOC_SIZE   (0x10000000)    /* 256M */
-#define VMALLOC_START  (PAGE_OFFSET - VMALLOC_SIZE)
-#define VMALLOC_END    (PAGE_OFFSET)
+#define VMALLOC_START  0x70000000
+#define VMALLOC_SIZE   (PAGE_OFFSET - VMALLOC_START)
+#define VMALLOC_END    (VMALLOC_START + VMALLOC_SIZE)
 
-/* Most of the architectures seem to be keeping some kind of padding between
- * userspace TASK_SIZE and PAGE_OFFSET. i.e TASK_SIZE != PAGE_OFFSET.
- */
 #define USER_KERNEL_GUTTER    0x10000000
 
-/* User address space:
- * On ARC700, CPU allows the entire lower half of 32 bit address space to be
- * translated. Thus potentially 2G (0:0x7FFF_FFFF) could be User vaddr space.
- * However we steal 256M for kernel addr (0x7000_0000:0x7FFF_FFFF) and another
- * 256M (0x6000_0000:0x6FFF_FFFF) is gutter between user/kernel spaces
- * Thus total User vaddr space is (0:0x5FFF_FFFF)
- */
-#define TASK_SIZE      (PAGE_OFFSET - VMALLOC_SIZE - USER_KERNEL_GUTTER)
+#define TASK_SIZE      (VMALLOC_START - USER_KERNEL_GUTTER)
 
 #define STACK_TOP       TASK_SIZE
 #define STACK_TOP_MAX   STACK_TOP
index 1bfeec2c0558c2f6f91142105bee0c6ff70c7a75..91755972b9a25222c37a36e6b76dfd758cdbe771 100644 (file)
@@ -16,6 +16,7 @@
 
 /* THE pt_regs: Defines how regs are saved during entry into kernel */
 
+#ifdef CONFIG_ISA_ARCOMPACT
 struct pt_regs {
 
        /* Real registers */
@@ -56,6 +57,48 @@ struct pt_regs {
 
        long user_r25;
 };
+#else
+
+struct pt_regs {
+
+       long orig_r0;
+
+       union {
+               struct {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+                       unsigned long state:8, ecr_vec:8,
+                                     ecr_cause:8, ecr_param:8;
+#else
+                       unsigned long ecr_param:8, ecr_cause:8,
+                                     ecr_vec:8, state:8;
+#endif
+               };
+               unsigned long event;
+       };
+
+       long bta;       /* bta_l1, bta_l2, erbta */
+
+       long user_r25;
+
+       long r26;       /* gp */
+       long fp;
+       long sp;        /* user/kernel sp depending on where we came from  */
+
+       long r12;
+
+       /*------- Below list auto saved by h/w -----------*/
+       long r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11;
+
+       long blink;
+       long lp_end, lp_start, lp_count;
+
+       long ei, ldi, jli;
+
+       long ret;
+       long status32;
+};
+
+#endif
 
 /* Callee saved registers - need to be saved only when you are scheduled out */
 
index b6a8c2dfbe6e42cd51def893784f0780bc67264e..e1651df6a93d5bc8ab0af3a833c7c6ffd23acacc 100644 (file)
@@ -22,24 +22,46 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
 {
        unsigned int tmp = __ARCH_SPIN_LOCK_LOCKED__;
 
+       /*
+        * This smp_mb() is technically superfluous, we only need the one
+        * after the lock for providing the ACQUIRE semantics.
+        * However doing the "right" thing was regressing hackbench
+        * so keeping this, pending further investigation
+        */
+       smp_mb();
+
        __asm__ __volatile__(
        "1:     ex  %0, [%1]            \n"
        "       breq  %0, %2, 1b        \n"
        : "+&r" (tmp)
        : "r"(&(lock->slock)), "ir"(__ARCH_SPIN_LOCK_LOCKED__)
        : "memory");
+
+       /*
+        * ACQUIRE barrier to ensure load/store after taking the lock
+        * don't "bleed-up" out of the critical section (leak-in is allowed)
+        * http://www.spinics.net/lists/kernel/msg2010409.html
+        *
+        * ARCv2 only has load-load, store-store and all-all barrier
+        * thus need the full all-all barrier
+        */
+       smp_mb();
 }
 
 static inline int arch_spin_trylock(arch_spinlock_t *lock)
 {
        unsigned int tmp = __ARCH_SPIN_LOCK_LOCKED__;
 
+       smp_mb();
+
        __asm__ __volatile__(
        "1:     ex  %0, [%1]            \n"
        : "+r" (tmp)
        : "r"(&(lock->slock))
        : "memory");
 
+       smp_mb();
+
        return (tmp == __ARCH_SPIN_LOCK_UNLOCKED__);
 }
 
@@ -47,12 +69,22 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
 {
        unsigned int tmp = __ARCH_SPIN_LOCK_UNLOCKED__;
 
+       /*
+        * RELEASE barrier: given the instructions avail on ARCv2, full barrier
+        * is the only option
+        */
+       smp_mb();
+
        __asm__ __volatile__(
        "       ex  %0, [%1]            \n"
        : "+r" (tmp)
        : "r"(&(lock->slock))
        : "memory");
 
+       /*
+        * superfluous, but keeping for now - see pairing version in
+        * arch_spin_lock above
+        */
        smp_mb();
 }
 
index aca0d5a45c7b84c2bdc0f95da1549ad7d3f1af68..3af67455659af49d9f9d53b67ac26024cf3b77b9 100644 (file)
@@ -25,6 +25,7 @@
 #endif
 
 #define THREAD_SIZE     (PAGE_SIZE << THREAD_SIZE_ORDER)
+#define THREAD_SHIFT   (PAGE_SHIFT << THREAD_SIZE_ORDER)
 
 #ifndef __ASSEMBLY__
 
index 30c9baffa96f1f3a5cab5d6ec6fe83b9f4e86318..d1da6032b715a7fea35d71fcedeeeb8cdfe590e4 100644 (file)
@@ -659,31 +659,30 @@ static inline unsigned long __arc_clear_user(void __user *to, unsigned long n)
 static inline long
 __arc_strncpy_from_user(char *dst, const char __user *src, long count)
 {
-       long res = count;
+       long res = 0;
        char val;
-       unsigned int hw_count;
 
        if (count == 0)
                return 0;
 
        __asm__ __volatile__(
-       "       lp 2f           \n"
+       "       lp      3f                      \n"
        "1:     ldb.ab  %3, [%2, 1]             \n"
-       "       breq.d  %3, 0, 2f               \n"
+       "       breq.d  %3, 0, 3f               \n"
        "       stb.ab  %3, [%1, 1]             \n"
-       "2:     sub %0, %6, %4                  \n"
-       "3:     ;nop                            \n"
+       "       add     %0, %0, 1       # Num of NON NULL bytes copied  \n"
+       "3:                                                             \n"
        "       .section .fixup, \"ax\"         \n"
        "       .align 4                        \n"
-       "4:     mov %0, %5                      \n"
+       "4:     mov %0, %4              # sets @res as -EFAULT  \n"
        "       j   3b                          \n"
        "       .previous                       \n"
        "       .section __ex_table, \"a\"      \n"
        "       .align 4                        \n"
        "       .word   1b, 4b                  \n"
        "       .previous                       \n"
-       : "=r"(res), "+r"(dst), "+r"(src), "=&r"(val), "=l"(hw_count)
-       : "g"(-EFAULT), "ir"(count), "4"(count) /* this "4" seeds lp_count */
+       : "+r"(res), "+r"(dst), "+r"(src), "=r"(val)
+       : "g"(-EFAULT), "l"(count)
        : "memory");
 
        return res;
index e5d41e08240c561f4299d2b3ce6c6e30d7d3a236..9d129a2a1351951465b22fd020f568abe1843b7a 100644 (file)
@@ -30,7 +30,7 @@
 #define PAGE_OFFSET    (0x80000000)
 #else
 #define PAGE_SIZE      (1UL << PAGE_SHIFT)     /* Default 8K */
-#define PAGE_OFFSET    (0x80000000UL)  /* Kernel starts at 2G onwards */
+#define PAGE_OFFSET    (0x80000000UL)          /* Kernel starts at 2G onwards */
 #endif
 
 #define PAGE_MASK      (~(PAGE_SIZE-1))
index 113f2033da9f096a45588a40b00193c8305cf693..e7f3625a19b51dc5f117e13fc71b2930bbb4b52d 100644 (file)
@@ -8,12 +8,14 @@
 # Pass UTS_MACHINE for user_regset definition
 CFLAGS_ptrace.o                += -DUTS_MACHINE='"$(UTS_MACHINE)"'
 
-obj-y  := arcksyms.o setup.o irq.o time.o reset.o ptrace.o entry.o process.o
+obj-y  := arcksyms.o setup.o irq.o time.o reset.o ptrace.o process.o devtree.o
 obj-y  += signal.o traps.o sys.o troubleshoot.o stacktrace.o disasm.o clk.o
-obj-y  += devtree.o
+obj-$(CONFIG_ISA_ARCOMPACT)            += entry-compact.o intc-compact.o
+obj-$(CONFIG_ISA_ARCV2)                        += entry-arcv2.o intc-arcv2.o
 
 obj-$(CONFIG_MODULES)                  += arcksyms.o module.o
 obj-$(CONFIG_SMP)                      += smp.o
+obj-$(CONFIG_ARC_MCIP)                 += mcip.o
 obj-$(CONFIG_ARC_DW2_UNWIND)           += unwind.o
 obj-$(CONFIG_KPROBES)                  += kprobes.o
 obj-$(CONFIG_ARC_EMUL_UNALIGNED)       += unaligned.o
index 6c3aa0edb9b5bc1914d006b2a22b766f71adba9c..ecaf34e9235c20dba4729d79321bef0d3b462a37 100644 (file)
@@ -37,6 +37,8 @@ int main(void)
 
        DEFINE(TASK_ACT_MM, offsetof(struct task_struct, active_mm));
        DEFINE(TASK_TGID, offsetof(struct task_struct, tgid));
+       DEFINE(TASK_PID, offsetof(struct task_struct, pid));
+       DEFINE(TASK_COMM, offsetof(struct task_struct, comm));
 
        DEFINE(MM_CTXT, offsetof(struct mm_struct, context));
        DEFINE(MM_PGD, offsetof(struct mm_struct, pgd));
@@ -56,8 +58,11 @@ int main(void)
        DEFINE(PT_r5, offsetof(struct pt_regs, r5));
        DEFINE(PT_r6, offsetof(struct pt_regs, r6));
        DEFINE(PT_r7, offsetof(struct pt_regs, r7));
+       DEFINE(PT_ret, offsetof(struct pt_regs, ret));
 
        DEFINE(SZ_CALLEE_REGS, sizeof(struct callee_regs));
        DEFINE(SZ_PT_REGS, sizeof(struct pt_regs));
+       DEFINE(PT_user_r25, offsetof(struct pt_regs, user_r25));
+
        return 0;
 }
index e32b54abff51fdc6b16894ce466fd16d2063bd7f..7e844fd8213fda0c0e41f6dc436c8a348f2afd52 100644 (file)
@@ -32,6 +32,8 @@ static void __init arc_set_early_base_baud(unsigned long dt_root)
 
        if (of_flat_dt_is_compatible(dt_root, "abilis,arc-tb10x"))
                arc_base_baud = core_clk/3;
+       else if (of_flat_dt_is_compatible(dt_root, "snps,arc-sdp"))
+               arc_base_baud = 33333333;       /* Fixed 33MHz clk (AXS10x) */
        else
                arc_base_baud = core_clk;
 }
diff --git a/arch/arc/kernel/entry-arcv2.S b/arch/arc/kernel/entry-arcv2.S
new file mode 100644 (file)
index 0000000..bd7105d
--- /dev/null
@@ -0,0 +1,239 @@
+/*
+ * ARCv2 ISA based core Low Level Intr/Traps/Exceptions(non-TLB) Handling
+ *
+ * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>   /* ARC_{EXTRY,EXIT} */
+#include <asm/entry.h>       /* SAVE_ALL_{INT1,INT2,TRAP...} */
+#include <asm/errno.h>
+#include <asm/arcregs.h>
+#include <asm/irqflags.h>
+
+       .cpu HS
+
+#define VECTOR .word
+
+;############################ Vector Table #################################
+
+       .section .vector,"a",@progbits
+       .align 4
+
+# Initial 16 slots are Exception Vectors
+VECTOR stext                   ; Restart Vector (jump to entry point)
+VECTOR mem_service             ; Mem exception
+VECTOR instr_service           ; Instrn Error
+VECTOR EV_MachineCheck         ; Fatal Machine check
+VECTOR EV_TLBMissI             ; Intruction TLB miss
+VECTOR EV_TLBMissD             ; Data TLB miss
+VECTOR EV_TLBProtV             ; Protection Violation
+VECTOR EV_PrivilegeV           ; Privilege Violation
+VECTOR EV_SWI                  ; Software Breakpoint
+VECTOR EV_Trap                 ; Trap exception
+VECTOR EV_Extension            ; Extn Instruction Exception
+VECTOR EV_DivZero              ; Divide by Zero
+VECTOR EV_DCError              ; Data Cache Error
+VECTOR EV_Misaligned           ; Misaligned Data Access
+VECTOR reserved                ; Reserved slots
+VECTOR reserved                ; Reserved slots
+
+# Begin Interrupt Vectors
+VECTOR handle_interrupt        ; (16) Timer0
+VECTOR handle_interrupt        ; unused (Timer1)
+VECTOR handle_interrupt        ; unused (WDT)
+VECTOR handle_interrupt        ; (19) ICI (inter core interrupt)
+VECTOR handle_interrupt
+VECTOR handle_interrupt
+VECTOR handle_interrupt
+VECTOR handle_interrupt        ; (23) End of fixed IRQs
+
+.rept CONFIG_ARC_NUMBER_OF_INTERRUPTS - 8
+       VECTOR  handle_interrupt
+.endr
+
+       .section .text, "ax",@progbits
+
+res_service:           ; processor restart
+       flag    0x1     ; not implemented
+       nop
+       nop
+
+reserved:              ; processor restart
+       rtie            ; jump to processor initializations
+
+;##################### Interrupt Handling ##############################
+
+ENTRY(handle_interrupt)
+
+       INTERRUPT_PROLOGUE  irq
+
+       clri            ; To make status32.IE agree with CPU internal state
+
+       lr  r0, [ICAUSE]
+
+       mov   blink, ret_from_exception
+
+       b.d  arch_do_IRQ
+       mov r1, sp
+
+END(handle_interrupt)
+
+;################### Non TLB Exception Handling #############################
+
+ENTRY(EV_SWI)
+       flag 1
+END(EV_SWI)
+
+ENTRY(EV_DivZero)
+       flag 1
+END(EV_DivZero)
+
+ENTRY(EV_DCError)
+       flag 1
+END(EV_DCError)
+
+ENTRY(EV_Misaligned)
+
+       EXCEPTION_PROLOGUE
+
+       lr  r0, [efa]   ; Faulting Data address
+       mov r1, sp
+
+       FAKE_RET_FROM_EXCPN
+
+       SAVE_CALLEE_SAVED_USER
+       mov r2, sp              ; callee_regs
+
+       bl  do_misaligned_access
+
+       ; TBD: optimize - do this only if a callee reg was involved
+       ; either a dst of emulated LD/ST or src with address-writeback
+       RESTORE_CALLEE_SAVED_USER
+
+       b   ret_from_exception
+END(EV_Misaligned)
+
+; ---------------------------------------------
+; Protection Violation Exception Handler
+; ---------------------------------------------
+
+ENTRY(EV_TLBProtV)
+
+       EXCEPTION_PROLOGUE
+
+       lr  r0, [efa]   ; Faulting Data address
+       mov r1, sp      ; pt_regs
+
+       FAKE_RET_FROM_EXCPN
+
+       mov blink, ret_from_exception
+       b   do_page_fault
+
+END(EV_TLBProtV)
+
+; From Linux standpoint Slow Path I/D TLB Miss is same a ProtV as they
+; need to call do_page_fault().
+; ECR in pt_regs provides whether access was R/W/X
+
+.global        call_do_page_fault
+.set call_do_page_fault, EV_TLBProtV
+
+;############# Common Handlers for ARCompact and ARCv2 ##############
+
+#include "entry.S"
+
+;############# Return from Intr/Excp/Trap (ARCv2 ISA Specifics) ##############
+;
+; Restore the saved sys context (common exit-path for EXCPN/IRQ/Trap)
+; IRQ shd definitely not happen between now and rtie
+; All 2 entry points to here already disable interrupts
+
+.Lrestore_regs:
+
+       ld      r0, [sp, PT_status32]   ; U/K mode at time of entry
+       lr      r10, [AUX_IRQ_ACT]
+
+       bmsk    r11, r10, 15    ; AUX_IRQ_ACT.ACTIVE
+       breq    r11, 0, .Lexcept_ret    ; No intr active, ret from Exception
+
+;####### Return from Intr #######
+
+debug_marker_l1:
+       bbit1.nt r0, STATUS_DE_BIT, .Lintr_ret_to_delay_slot
+
+.Lisr_ret_fast_path:
+       ; Handle special case #1: (Entry via Exception, Return via IRQ)
+       ;
+       ; Exception in U mode, preempted in kernel, Intr taken (K mode), orig
+       ; task now returning to U mode (riding the Intr)
+       ; AUX_IRQ_ACTIVE won't have U bit set (since intr in K mode), hence SP
+       ; won't be switched to correct U mode value (from AUX_SP)
+       ; So force AUX_IRQ_ACT.U for such a case
+
+       btst    r0, STATUS_U_BIT                ; Z flag set if K (Z clear for U)
+       bset.nz r11, r11, AUX_IRQ_ACT_BIT_U     ; NZ means U
+       sr      r11, [AUX_IRQ_ACT]
+
+       INTERRUPT_EPILOGUE  irq
+       rtie
+
+;####### Return from Exception / pure kernel mode #######
+
+.Lexcept_ret:  ; Expects r0 has PT_status32
+
+debug_marker_syscall:
+       EXCEPTION_EPILOGUE
+       rtie
+
+;####### Return from Intr to insn in delay slot #######
+
+; Handle special case #2: (Entry via Exception in Delay Slot, Return via IRQ)
+;
+; Intr returning to a Delay Slot (DS) insn
+; (since IRQ NOT allowed in DS in ARCv2, this can only happen if orig
+; entry was via Exception in DS which got preempted in kernel).
+;
+; IRQ RTIE won't reliably restore DE bit and/or BTA, needs handling
+.Lintr_ret_to_delay_slot:
+debug_marker_ds:
+
+       ld      r2, [@intr_to_DE_cnt]
+       add     r2, r2, 1
+       st      r2, [@intr_to_DE_cnt]
+
+       ld      r2, [sp, PT_ret]
+       ld      r3, [sp, PT_status32]
+
+       bic     r0, r3, STATUS_U_MASK|STATUS_DE_MASK|STATUS_IE_MASK|STATUS_L_MASK
+       st      r0, [sp, PT_status32]
+
+       mov     r1, .Lintr_ret_to_delay_slot_2
+       st      r1, [sp, PT_ret]
+
+       st      r2, [sp, 0]
+       st      r3, [sp, 4]
+
+       b       .Lisr_ret_fast_path
+
+.Lintr_ret_to_delay_slot_2:
+       sub     sp, sp, SZ_PT_REGS
+       st      r9, [sp, -4]
+
+       ld      r9, [sp, 0]
+       sr      r9, [eret]
+
+       ld      r9, [sp, 4]
+       sr      r9, [erstatus]
+
+       ld      r9, [sp, 8]
+       sr      r9, [erbta]
+
+       ld      r9, [sp, -4]
+       add     sp, sp, SZ_PT_REGS
+       rtie
+
+END(ret_from_exception)
diff --git a/arch/arc/kernel/entry-compact.S b/arch/arc/kernel/entry-compact.S
new file mode 100644 (file)
index 0000000..15d457b
--- /dev/null
@@ -0,0 +1,393 @@
+/*
+ * Low Level Interrupts/Traps/Exceptions(non-TLB) Handling for ARCompact ISA
+ *
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * vineetg: May 2011
+ *  -Userspace unaligned access emulation
+ *
+ * vineetg: Feb 2011 (ptrace low level code fixes)
+ *  -traced syscall return code (r0) was not saved into pt_regs for restoring
+ *   into user reg-file when traded task rets to user space.
+ *  -syscalls needing arch-wrappers (mainly for passing sp as pt_regs)
+ *   were not invoking post-syscall trace hook (jumping directly into
+ *   ret_from_system_call)
+ *
+ * vineetg: Nov 2010:
+ *  -Vector table jumps (@8 bytes) converted into branches (@4 bytes)
+ *  -To maintain the slot size of 8 bytes/vector, added nop, which is
+ *   not executed at runtime.
+ *
+ * vineetg: Nov 2009 (Everything needed for TIF_RESTORE_SIGMASK)
+ *  -do_signal()invoked upon TIF_RESTORE_SIGMASK as well
+ *  -Wrappers for sys_{,rt_}sigsuspend() nolonger needed as they don't
+ *   need ptregs anymore
+ *
+ * Vineetg: Oct 2009
+ *  -In a rare scenario, Process gets a Priv-V exception and gets scheduled
+ *   out. Since we don't do FAKE RTIE for Priv-V, CPU excpetion state remains
+ *   active (AE bit enabled).  This causes a double fault for a subseq valid
+ *   exception. Thus FAKE RTIE needed in low level Priv-Violation handler.
+ *   Instr Error could also cause similar scenario, so same there as well.
+ *
+ * Vineetg: March 2009 (Supporting 2 levels of Interrupts)
+ *
+ * Vineetg: Aug 28th 2008: Bug #94984
+ *  -Zero Overhead Loop Context shd be cleared when entering IRQ/EXcp/Trap
+ *   Normally CPU does this automatically, however when doing FAKE rtie,
+ *   we need to explicitly do this. The problem in macros
+ *   FAKE_RET_FROM_EXCPN and FAKE_RET_FROM_EXCPN_LOCK_IRQ was that this bit
+ *   was being "CLEARED" rather then "SET". Since it is Loop INHIBIT Bit,
+ *   setting it and not clearing it clears ZOL context
+ *
+ * Vineetg: May 16th, 2008
+ *  - r25 now contains the Current Task when in kernel
+ *
+ * Vineetg: Dec 22, 2007
+ *    Minor Surgery of Low Level ISR to make it SMP safe
+ *    - MMU_SCRATCH0 Reg used for freeing up r9 in Level 1 ISR
+ *    - _current_task is made an array of NR_CPUS
+ *    - Access of _current_task wrapped inside a macro so that if hardware
+ *       team agrees for a dedicated reg, no other code is touched
+ *
+ * Amit Bhor, Rahul Trivedi, Kanika Nema, Sameer Dhavale : Codito Tech 2004
+ */
+
+#include <linux/errno.h>
+#include <linux/linkage.h>     /* {EXTRY,EXIT} */
+#include <asm/entry.h>
+#include <asm/irqflags.h>
+
+       .cpu A7
+
+;############################ Vector Table #################################
+
+.macro VECTOR  lbl
+#if 1   /* Just in case, build breaks */
+       j   \lbl
+#else
+       b   \lbl
+       nop
+#endif
+.endm
+
+       .section .vector, "ax",@progbits
+       .align 4
+
+/* Each entry in the vector table must occupy 2 words. Since it is a jump
+ * across sections (.vector to .text) we are gauranteed that 'j somewhere'
+ * will use the 'j limm' form of the intrsuction as long as somewhere is in
+ * a section other than .vector.
+ */
+
+; ********* Critical System Events **********************
+VECTOR   res_service             ; 0x0, Restart Vector  (0x0)
+VECTOR   mem_service             ; 0x8, Mem exception   (0x1)
+VECTOR   instr_service           ; 0x10, Instrn Error   (0x2)
+
+; ******************** Device ISRs **********************
+#ifdef CONFIG_ARC_IRQ3_LV2
+VECTOR   handle_interrupt_level2
+#else
+VECTOR   handle_interrupt_level1
+#endif
+
+VECTOR   handle_interrupt_level1
+
+#ifdef CONFIG_ARC_IRQ5_LV2
+VECTOR   handle_interrupt_level2
+#else
+VECTOR   handle_interrupt_level1
+#endif
+
+#ifdef CONFIG_ARC_IRQ6_LV2
+VECTOR   handle_interrupt_level2
+#else
+VECTOR   handle_interrupt_level1
+#endif
+
+.rept   25
+VECTOR   handle_interrupt_level1 ; Other devices
+.endr
+
+/* FOR ARC600: timer = 0x3, uart = 0x8, emac = 0x10 */
+
+; ******************** Exceptions **********************
+VECTOR   EV_MachineCheck         ; 0x100, Fatal Machine check   (0x20)
+VECTOR   EV_TLBMissI             ; 0x108, Intruction TLB miss   (0x21)
+VECTOR   EV_TLBMissD             ; 0x110, Data TLB miss         (0x22)
+VECTOR   EV_TLBProtV             ; 0x118, Protection Violation  (0x23)
+                                ;         or Misaligned Access
+VECTOR   EV_PrivilegeV           ; 0x120, Privilege Violation   (0x24)
+VECTOR   EV_Trap                 ; 0x128, Trap exception        (0x25)
+VECTOR   EV_Extension            ; 0x130, Extn Intruction Excp  (0x26)
+
+.rept   24
+VECTOR   reserved                ; Reserved Exceptions
+.endr
+
+
+;##################### Scratch Mem for IRQ stack switching #############
+
+ARCFP_DATA int1_saved_reg
+       .align 32
+       .type   int1_saved_reg, @object
+       .size   int1_saved_reg, 4
+int1_saved_reg:
+       .zero 4
+
+/* Each Interrupt level needs its own scratch */
+#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS
+
+ARCFP_DATA int2_saved_reg
+       .type   int2_saved_reg, @object
+       .size   int2_saved_reg, 4
+int2_saved_reg:
+       .zero 4
+
+#endif
+
+; ---------------------------------------------
+       .section .text, "ax",@progbits
+
+res_service:           ; processor restart
+       flag    0x1     ; not implemented
+       nop
+       nop
+
+reserved:              ; processor restart
+       rtie            ; jump to processor initializations
+
+;##################### Interrupt Handling ##############################
+
+#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS
+; ---------------------------------------------
+;  Level 2 ISR: Can interrupt a Level 1 ISR
+; ---------------------------------------------
+ENTRY(handle_interrupt_level2)
+
+       INTERRUPT_PROLOGUE 2
+
+       ;------------------------------------------------------
+       ; if L2 IRQ interrupted a L1 ISR, disable preemption
+       ;------------------------------------------------------
+
+       ld r9, [sp, PT_status32]        ; get statu32_l2 (saved in pt_regs)
+       bbit0 r9, STATUS_A1_BIT, 1f     ; L1 not active when L2 IRQ, so normal
+
+       ; A1 is set in status32_l2
+       ; bump thread_info->preempt_count (Disable preemption)
+       GET_CURR_THR_INFO_FROM_SP   r10
+       ld      r9, [r10, THREAD_INFO_PREEMPT_COUNT]
+       add     r9, r9, 1
+       st      r9, [r10, THREAD_INFO_PREEMPT_COUNT]
+
+1:
+       ;------------------------------------------------------
+       ; setup params for Linux common ISR and invoke it
+       ;------------------------------------------------------
+       lr  r0, [icause2]
+       and r0, r0, 0x1f
+
+       bl.d  @arch_do_IRQ
+       mov r1, sp
+
+       mov r8,0x2
+       sr r8, [AUX_IRQ_LV12]       ; clear bit in Sticky Status Reg
+
+       b   ret_from_exception
+
+END(handle_interrupt_level2)
+
+#endif
+
+; ---------------------------------------------
+;  Level 1 ISR
+; ---------------------------------------------
+ENTRY(handle_interrupt_level1)
+
+       INTERRUPT_PROLOGUE 1
+
+       lr  r0, [icause1]
+       and r0, r0, 0x1f
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+       ; icause1 needs to be read early, before calling tracing, which
+       ; can clobber scratch regs, hence use of stack to stash it
+       push r0
+       TRACE_ASM_IRQ_DISABLE
+       pop  r0
+#endif
+
+       bl.d  @arch_do_IRQ
+       mov r1, sp
+
+       mov r8,0x1
+       sr r8, [AUX_IRQ_LV12]       ; clear bit in Sticky Status Reg
+
+       b   ret_from_exception
+END(handle_interrupt_level1)
+
+;################### Non TLB Exception Handling #############################
+
+; ---------------------------------------------
+; Protection Violation Exception Handler
+; ---------------------------------------------
+
+ENTRY(EV_TLBProtV)
+
+       EXCEPTION_PROLOGUE
+
+       lr  r2, [ecr]
+       lr  r0, [efa]   ; Faulting Data address (not part of pt_regs saved above)
+
+       ; Exception auto-disables further Intr/exceptions.
+       ; Re-enable them by pretending to return from exception
+       ; (so rest of handler executes in pure K mode)
+
+       FAKE_RET_FROM_EXCPN
+
+       mov   r1, sp    ; Handle to pt_regs
+
+       ;------ (5) Type of Protection Violation? ----------
+       ;
+       ; ProtV Hardware Exception is triggered for Access Faults of 2 types
+       ;   -Access Violaton    : 00_23_(00|01|02|03)_00
+       ;                                x  r  w  r+w
+       ;   -Unaligned Access   : 00_23_04_00
+       ;
+       bbit1 r2, ECR_C_BIT_PROTV_MISALIG_DATA, 4f
+
+       ;========= (6a) Access Violation Processing ========
+       bl  do_page_fault
+       b   ret_from_exception
+
+       ;========== (6b) Non aligned access ============
+4:
+
+       SAVE_CALLEE_SAVED_USER
+       mov r2, sp              ; callee_regs
+
+       bl  do_misaligned_access
+
+       ; TBD: optimize - do this only if a callee reg was involved
+       ; either a dst of emulated LD/ST or src with address-writeback
+       RESTORE_CALLEE_SAVED_USER
+
+       b   ret_from_exception
+
+END(EV_TLBProtV)
+
+; Wrapper for Linux page fault handler called from EV_TLBMiss*
+; Very similar to ProtV handler case (6a) above, but avoids the extra checks
+; for Misaligned access
+;
+ENTRY(call_do_page_fault)
+
+       EXCEPTION_PROLOGUE
+       lr  r0, [efa]   ; Faulting Data address
+       mov   r1, sp
+       FAKE_RET_FROM_EXCPN
+
+       mov blink, ret_from_exception
+       b  do_page_fault
+
+END(call_do_page_fault)
+
+;############# Common Handlers for ARCompact and ARCv2 ##############
+
+#include "entry.S"
+
+;############# Return from Intr/Excp/Trap (ARC Specifics) ##############
+;
+; Restore the saved sys context (common exit-path for EXCPN/IRQ/Trap)
+; IRQ shd definitely not happen between now and rtie
+; All 2 entry points to here already disable interrupts
+
+.Lrestore_regs:
+
+       TRACE_ASM_IRQ_ENABLE
+
+       lr      r10, [status32]
+
+       ; Restore REG File. In case multiple Events outstanding,
+       ; use the same priorty as rtie: EXCPN, L2 IRQ, L1 IRQ, None
+       ; Note that we use realtime STATUS32 (not pt_regs->status32) to
+       ; decide that.
+
+       ; if Returning from Exception
+       btst   r10, STATUS_AE_BIT
+       bnz    .Lexcep_ret
+
+       ; Not Exception so maybe Interrupts (Level 1 or 2)
+
+#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS
+
+       ; Level 2 interrupt return Path - from hardware standpoint
+       bbit0  r10, STATUS_A2_BIT, not_level2_interrupt
+
+       ;------------------------------------------------------------------
+       ; However the context returning might not have taken L2 intr itself
+       ; e.g. Task'A' user-code -> L2 intr -> schedule -> 'B' user-code ret
+       ; Special considerations needed for the context which took L2 intr
+
+       ld   r9, [sp, PT_event]        ; Ensure this is L2 intr context
+       brne r9, event_IRQ2, 149f
+
+       ;------------------------------------------------------------------
+       ; if L2 IRQ interrupted an L1 ISR,  we'd disabled preemption earlier
+       ; so that sched doesn't move to new task, causing L1 to be delayed
+       ; undeterministically. Now that we've achieved that, let's reset
+       ; things to what they were, before returning from L2 context
+       ;----------------------------------------------------------------
+
+       ld r9, [sp, PT_status32]       ; get statu32_l2 (saved in pt_regs)
+       bbit0 r9, STATUS_A1_BIT, 149f  ; L1 not active when L2 IRQ, so normal
+
+       ; decrement thread_info->preempt_count (re-enable preemption)
+       GET_CURR_THR_INFO_FROM_SP   r10
+       ld      r9, [r10, THREAD_INFO_PREEMPT_COUNT]
+
+       ; paranoid check, given A1 was active when A2 happened, preempt count
+       ; must not be 0 because we would have incremented it.
+       ; If this does happen we simply HALT as it means a BUG !!!
+       cmp     r9, 0
+       bnz     2f
+       flag 1
+
+2:
+       sub     r9, r9, 1
+       st      r9, [r10, THREAD_INFO_PREEMPT_COUNT]
+
+149:
+       ;return from level 2
+       INTERRUPT_EPILOGUE 2
+debug_marker_l2:
+       rtie
+
+not_level2_interrupt:
+
+#endif
+
+       bbit0  r10, STATUS_A1_BIT, .Lpure_k_mode_ret
+
+       ;return from level 1
+       INTERRUPT_EPILOGUE 1
+debug_marker_l1:
+       rtie
+
+.Lexcep_ret:
+.Lpure_k_mode_ret:
+
+       ;this case is for syscalls or Exceptions or pure kernel mode
+
+       EXCEPTION_EPILOGUE
+debug_marker_syscall:
+       rtie
+
+END(ret_from_exception)
index d868289c5a26f74e780ee2e59ff6174987d68d68..f7a82fd4d6018b29c4e4297af9b839ca1a154fab 100644 (file)
@@ -1,60 +1,13 @@
 /*
- * Low Level Interrupts/Traps/Exceptions(non-TLB) Handling for ARC
+ * Common Low Level Interrupts/Traps/Exceptions(non-TLB) Handling for ARC
+ * (included from entry-<isa>.S
  *
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
  * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
- *
- * vineetg: May 2011
- *  -Userspace unaligned access emulation
- *
- * vineetg: Feb 2011 (ptrace low level code fixes)
- *  -traced syscall return code (r0) was not saved into pt_regs for restoring
- *   into user reg-file when traded task rets to user space.
- *  -syscalls needing arch-wrappers (mainly for passing sp as pt_regs)
- *   were not invoking post-syscall trace hook (jumping directly into
- *   ret_from_system_call)
- *
- * vineetg: Nov 2010:
- *  -Vector table jumps (@8 bytes) converted into branches (@4 bytes)
- *  -To maintain the slot size of 8 bytes/vector, added nop, which is
- *   not executed at runtime.
- *
- * vineetg: Nov 2009 (Everything needed for TIF_RESTORE_SIGMASK)
- *  -do_signal()invoked upon TIF_RESTORE_SIGMASK as well
- *  -Wrappers for sys_{,rt_}sigsuspend() nolonger needed as they don't
- *   need ptregs anymore
- *
- * Vineetg: Oct 2009
- *  -In a rare scenario, Process gets a Priv-V exception and gets scheduled
- *   out. Since we don't do FAKE RTIE for Priv-V, CPU excpetion state remains
- *   active (AE bit enabled).  This causes a double fault for a subseq valid
- *   exception. Thus FAKE RTIE needed in low level Priv-Violation handler.
- *   Instr Error could also cause similar scenario, so same there as well.
- *
- * Vineetg: March 2009 (Supporting 2 levels of Interrupts)
- *
- * Vineetg: Aug 28th 2008: Bug #94984
- *  -Zero Overhead Loop Context shd be cleared when entering IRQ/EXcp/Trap
- *   Normally CPU does this automatically, however when doing FAKE rtie,
- *   we need to explicitly do this. The problem in macros
- *   FAKE_RET_FROM_EXCPN and FAKE_RET_FROM_EXCPN_LOCK_IRQ was that this bit
- *   was being "CLEARED" rather then "SET". Since it is Loop INHIBIT Bit,
- *   setting it and not clearing it clears ZOL context
- *
- * Vineetg: May 16th, 2008
- *  - r25 now contains the Current Task when in kernel
- *
- * Vineetg: Dec 22, 2007
- *    Minor Surgery of Low Level ISR to make it SMP safe
- *    - MMU_SCRATCH0 Reg used for freeing up r9 in Level 1 ISR
- *    - _current_task is made an array of NR_CPUS
- *    - Access of _current_task wrapped inside a macro so that if hardware
- *       team agrees for a dedicated reg, no other code is touched
- *
- * Amit Bhor, Rahul Trivedi, Kanika Nema, Sameer Dhavale : Codito Tech 2004
  */
 
 /*------------------------------------------------------------------
  *  Global Pointer (gp)                 r26
  *  Frame Pointer (fp)                  r27
  *  Stack Pointer (sp)                  r28
- *  Interrupt link register (ilink1)    r29
- *  Interrupt link register (ilink2)    r30
  *  Branch link register (blink)        r31
  *------------------------------------------------------------------
  */
 
-       .cpu A7
-
-;############################ Vector Table #################################
-
-.macro VECTOR  lbl
-#if 1   /* Just in case, build breaks */
-       j   \lbl
-#else
-       b   \lbl
-       nop
-#endif
-.endm
-
-       .section .vector, "ax",@progbits
-       .align 4
-
-/* Each entry in the vector table must occupy 2 words. Since it is a jump
- * across sections (.vector to .text) we are gauranteed that 'j somewhere'
- * will use the 'j limm' form of the intrsuction as long as somewhere is in
- * a section other than .vector.
- */
-
-; ********* Critical System Events **********************
-VECTOR   res_service             ; 0x0, Restart Vector  (0x0)
-VECTOR   mem_service             ; 0x8, Mem exception   (0x1)
-VECTOR   instr_service           ; 0x10, Instrn Error   (0x2)
-
-; ******************** Device ISRs **********************
-#ifdef CONFIG_ARC_IRQ3_LV2
-VECTOR   handle_interrupt_level2
-#else
-VECTOR   handle_interrupt_level1
-#endif
-
-VECTOR   handle_interrupt_level1
-
-#ifdef CONFIG_ARC_IRQ5_LV2
-VECTOR   handle_interrupt_level2
-#else
-VECTOR   handle_interrupt_level1
-#endif
-
-#ifdef CONFIG_ARC_IRQ6_LV2
-VECTOR   handle_interrupt_level2
-#else
-VECTOR   handle_interrupt_level1
-#endif
-
-.rept   25
-VECTOR   handle_interrupt_level1 ; Other devices
-.endr
-
-/* FOR ARC600: timer = 0x3, uart = 0x8, emac = 0x10 */
-
-; ******************** Exceptions **********************
-VECTOR   EV_MachineCheck         ; 0x100, Fatal Machine check   (0x20)
-VECTOR   EV_TLBMissI             ; 0x108, Intruction TLB miss   (0x21)
-VECTOR   EV_TLBMissD             ; 0x110, Data TLB miss         (0x22)
-VECTOR   EV_TLBProtV             ; 0x118, Protection Violation  (0x23)
-                                ;         or Misaligned Access
-VECTOR   EV_PrivilegeV           ; 0x120, Privilege Violation   (0x24)
-VECTOR   EV_Trap                 ; 0x128, Trap exception        (0x25)
-VECTOR   EV_Extension            ; 0x130, Extn Intruction Excp  (0x26)
-
-.rept   24
-VECTOR   reserved                ; Reserved Exceptions
-.endr
-
-#include <linux/linkage.h>   /* {EXTRY,EXIT} */
-#include <asm/entry.h>       /* SAVE_ALL_{INT1,INT2,SYS...} */
-#include <asm/errno.h>
-#include <asm/arcregs.h>
-#include <asm/irqflags.h>
-
-;##################### Scratch Mem for IRQ stack switching #############
-
-ARCFP_DATA int1_saved_reg
-       .align 32
-       .type   int1_saved_reg, @object
-       .size   int1_saved_reg, 4
-int1_saved_reg:
-       .zero 4
-
-/* Each Interrupt level needs its own scratch */
-#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS
-
-ARCFP_DATA int2_saved_reg
-       .type   int2_saved_reg, @object
-       .size   int2_saved_reg, 4
-int2_saved_reg:
-       .zero 4
-
-#endif
-
-; ---------------------------------------------
-       .section .text, "ax",@progbits
-
-res_service:           ; processor restart
-       flag    0x1     ; not implemented
-       nop
-       nop
-
-reserved:              ; processor restart
-       rtie            ; jump to processor initializations
-
-;##################### Interrupt Handling ##############################
-
-#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS
-; ---------------------------------------------
-;  Level 2 ISR: Can interrupt a Level 1 ISR
-; ---------------------------------------------
-ENTRY(handle_interrupt_level2)
+;################### Special Sys Call Wrappers ##########################
 
-       ; TODO-vineetg for SMP this wont work
-       ; free up r9 as scratchpad
-       st  r9, [@int2_saved_reg]
+ENTRY(sys_clone_wrapper)
+       SAVE_CALLEE_SAVED_USER
+       bl  @sys_clone
+       DISCARD_CALLEE_SAVED_USER
 
-       ;Which mode (user/kernel) was the system in when intr occured
-       lr  r9, [status32_l2]
+       GET_CURR_THR_INFO_FLAGS   r10
+       btst r10, TIF_SYSCALL_TRACE
+       bnz  tracesys_exit
 
-       SWITCH_TO_KERNEL_STK
-       SAVE_ALL_INT2
+       b ret_from_system_call
+END(sys_clone_wrapper)
 
-       ;------------------------------------------------------
-       ; if L2 IRQ interrupted a L1 ISR, disable preemption
-       ;------------------------------------------------------
+ENTRY(ret_from_fork)
+       ; when the forked child comes here from the __switch_to function
+       ; r0 has the last task pointer.
+       ; put last task in scheduler queue
+       bl   @schedule_tail
 
-       ld r9, [sp, PT_status32]        ; get statu32_l2 (saved in pt_regs)
-       bbit0 r9, STATUS_A1_BIT, 1f     ; L1 not active when L2 IRQ, so normal
+       ld   r9, [sp, PT_status32]
+       brne r9, 0, 1f
 
-       ; A1 is set in status32_l2
-       ; bump thread_info->preempt_count (Disable preemption)
-       GET_CURR_THR_INFO_FROM_SP   r10
-       ld      r9, [r10, THREAD_INFO_PREEMPT_COUNT]
-       add     r9, r9, 1
-       st      r9, [r10, THREAD_INFO_PREEMPT_COUNT]
+       jl.d [r14]              ; kernel thread entry point
+       mov  r0, r13            ; (see PF_KTHREAD block in copy_thread)
 
 1:
-       ;------------------------------------------------------
-       ; setup params for Linux common ISR and invoke it
-       ;------------------------------------------------------
-       lr  r0, [icause2]
-       and r0, r0, 0x1f
-
-       bl.d  @arch_do_IRQ
-       mov r1, sp
-
-       mov r8,0x2
-       sr r8, [AUX_IRQ_LV12]       ; clear bit in Sticky Status Reg
-
-       b   ret_from_exception
-
-END(handle_interrupt_level2)
-
-#endif
-
-; ---------------------------------------------
-;  Level 1 ISR
-; ---------------------------------------------
-ENTRY(handle_interrupt_level1)
-
-       /* free up r9 as scratchpad */
-#ifdef CONFIG_SMP
-       sr  r9, [ARC_REG_SCRATCH_DATA0]
-#else
-       st   r9, [@int1_saved_reg]
-#endif
-
-       ;Which mode (user/kernel) was the system in when intr occured
-       lr  r9, [status32_l1]
-
-       SWITCH_TO_KERNEL_STK
-       SAVE_ALL_INT1
+       ; Return to user space
+       ; 1. Any forked task (Reach here via BRne above)
+       ; 2. First ever init task (Reach here via return from JL above)
+       ;    This is the historic "kernel_execve" use-case, to return to init
+       ;    user mode, in a round about way since that is always done from
+       ;    a kernel thread which is executed via JL above but always returns
+       ;    out whenever kernel_execve (now inline do_fork()) is involved
+       b    ret_from_exception
+END(ret_from_fork)
 
-       lr  r0, [icause1]
-       and r0, r0, 0x1f
+#ifdef CONFIG_ARC_DW2_UNWIND
+; Workaround for bug 94179 (STAR ):
+; Despite -fasynchronous-unwind-tables, linker is not making dwarf2 unwinder
+; section (.debug_frame) as loadable. So we force it here.
+; This also fixes STAR 9000487933 where the prev-workaround (objcopy --setflag)
+; would not work after a clean build due to kernel build system dependencies.
+.section .debug_frame, "wa",@progbits
 
-#ifdef CONFIG_TRACE_IRQFLAGS
-       ; icause1 needs to be read early, before calling tracing, which
-       ; can clobber scratch regs, hence use of stack to stash it
-       push r0
-       TRACE_ASM_IRQ_DISABLE
-       pop  r0
+; Reset to .text as this file is included in entry-<isa>.S
+.section .text, "ax",@progbits
 #endif
 
-       bl.d  @arch_do_IRQ
-       mov r1, sp
-
-       mov r8,0x1
-       sr r8, [AUX_IRQ_LV12]       ; clear bit in Sticky Status Reg
-
-       b   ret_from_exception
-END(handle_interrupt_level1)
-
 ;################### Non TLB Exception Handling #############################
 
 ; ---------------------------------------------
@@ -280,7 +86,7 @@ ENTRY(instr_service)
        lr  r0, [efa]
        mov r1, sp
 
-       FAKE_RET_FROM_EXCPN r9
+       FAKE_RET_FROM_EXCPN
 
        bl  do_insterror_or_kprobe
        b   ret_from_exception
@@ -297,7 +103,7 @@ ENTRY(mem_service)
        lr  r0, [efa]
        mov r1, sp
 
-       FAKE_RET_FROM_EXCPN r9
+       FAKE_RET_FROM_EXCPN
 
        bl  do_memory_error
        b   ret_from_exception
@@ -333,60 +139,6 @@ ENTRY(EV_MachineCheck)
 
 END(EV_MachineCheck)
 
-; ---------------------------------------------
-; Protection Violation Exception Handler
-; ---------------------------------------------
-
-ENTRY(EV_TLBProtV)
-
-       EXCEPTION_PROLOGUE
-
-       ;---------(3) Save some more regs-----------------
-       ;  vineetg: Mar 6th: Random Seg Fault issue #1
-       ;  ecr and efa were not saved in case an Intr sneaks in
-       ;  after fake rtie
-
-       lr  r2, [ecr]
-       lr  r0, [efa]   ; Faulting Data address
-
-       ; --------(4) Return from CPU Exception Mode ---------
-       ;  Fake a rtie, but rtie to next label
-       ;  That way, subsequently, do_page_fault ( ) executes in pure kernel
-       ;  mode with further Exceptions enabled
-
-       FAKE_RET_FROM_EXCPN r9
-
-       mov   r1, sp
-
-       ;------ (5) Type of Protection Violation? ----------
-       ;
-       ; ProtV Hardware Exception is triggered for Access Faults of 2 types
-       ;   -Access Violaton    : 00_23_(00|01|02|03)_00
-       ;                                x  r  w  r+w
-       ;   -Unaligned Access   : 00_23_04_00
-       ;
-       bbit1 r2, ECR_C_BIT_PROTV_MISALIG_DATA, 4f
-
-       ;========= (6a) Access Violation Processing ========
-       bl  do_page_fault
-       b   ret_from_exception
-
-       ;========== (6b) Non aligned access ============
-4:
-
-       SAVE_CALLEE_SAVED_USER
-       mov r2, sp              ; callee_regs
-
-       bl  do_misaligned_access
-
-       ; TBD: optimize - do this only if a callee reg was involved
-       ; either a dst of emulated LD/ST or src with address-writeback
-       RESTORE_CALLEE_SAVED_USER
-
-       b   ret_from_exception
-
-END(EV_TLBProtV)
-
 ; ---------------------------------------------
 ; Privilege Violation Exception Handler
 ; ---------------------------------------------
@@ -397,7 +149,7 @@ ENTRY(EV_PrivilegeV)
        lr  r0, [efa]
        mov r1, sp
 
-       FAKE_RET_FROM_EXCPN r9
+       FAKE_RET_FROM_EXCPN
 
        bl  do_privilege_fault
        b   ret_from_exception
@@ -413,14 +165,17 @@ ENTRY(EV_Extension)
        lr  r0, [efa]
        mov r1, sp
 
-       FAKE_RET_FROM_EXCPN r9
+       FAKE_RET_FROM_EXCPN
 
        bl  do_extension_fault
        b   ret_from_exception
 END(EV_Extension)
 
-;######################### System Call Tracing #########################
+;################ Trap Handling (Syscall, Breakpoint) ##################
 
+; ---------------------------------------------
+; syscall Tracing
+; ---------------------------------------------
 tracesys:
        ; save EFA in case tracer wants the PC of traced task
        ; using ERET won't work since next-PC has already committed
@@ -463,10 +218,9 @@ tracesys_exit:
        b   ret_from_exception ; NOT ret_from_system_call at is saves r0 which
        ; we'd done before calling post hook above
 
-;################### Break Point TRAP ##########################
-
-       ; ======= (5b) Trap is due to Break-Point =========
-
+; ---------------------------------------------
+; Breakpoint TRAP
+; ---------------------------------------------
 trap_with_param:
 
        ; stop_pc info by gdb needs this info
@@ -475,7 +229,7 @@ trap_with_param:
 
        ; Now that we have read EFA, it is safe to do "fake" rtie
        ;   and get out of CPU exception mode
-       FAKE_RET_FROM_EXCPN r11
+       FAKE_RET_FROM_EXCPN
 
        ; Save callee regs in case gdb wants to have a look
        ; SP will grow up by size of CALLEE Reg-File
@@ -494,37 +248,33 @@ trap_with_param:
 
        b   ret_from_exception
 
-;##################### Trap Handling ##############################
-;
-; EV_Trap caused by TRAP_S and TRAP0 instructions.
-;------------------------------------------------------------------
-;   (1) System Calls
-;       :parameters in r0-r7.
-;       :r8 has the system call number
-;   (2) Break Points
-;------------------------------------------------------------------
+; ---------------------------------------------
+; syscall TRAP
+; ABI: (r0-r7) upto 8 args, (r8) syscall number
+; ---------------------------------------------
 
 ENTRY(EV_Trap)
 
        EXCEPTION_PROLOGUE
 
-       ;------- (4) What caused the Trap --------------
-       lr     r12, [ecr]
-       bmsk.f 0, r12, 7
+       ;============ TRAP 1   :breakpoints
+       ; Check ECR for trap with arg (PROLOGUE ensures r9 has ECR)
+       bmsk.f 0, r9, 7
        bnz    trap_with_param
 
-       ; ======= (5a) Trap is due to System Call ========
+       ;============ TRAP  (no param): syscall top level
 
-       ; Before doing anything, return from CPU Exception Mode
-       FAKE_RET_FROM_EXCPN r11
+       ; First return from Exception to pure K mode (Exception/IRQs renabled)
+       FAKE_RET_FROM_EXCPN
 
-       ; If syscall tracing ongoing, invoke pre-pos-hooks
+       ; If syscall tracing ongoing, invoke pre-post-hooks
        GET_CURR_THR_INFO_FLAGS   r10
        btst r10, TIF_SYSCALL_TRACE
        bnz tracesys  ; this never comes back
 
-       ;============ This is normal System Call case ==========
-       ; Sys-call num shd not exceed the total system calls avail
+       ;============ Normal syscall case
+
+       ; syscall num shd not exceed the total system calls avail
        cmp     r8,  NR_syscalls
        mov.hi  r0, -ENOSYS
        bhi     ret_from_system_call
@@ -565,7 +315,7 @@ resume_user_mode_begin:
        ; Fast Path return to user mode if no pending work
        GET_CURR_THR_INFO_FLAGS   r9
        and.f  0,  r9, _TIF_WORK_MASK
-       bz     restore_regs
+       bz     .Lrestore_regs
 
        ; --- (Slow Path #1) task preemption ---
        bbit0  r9, TIF_NEED_RESCHED, .Lchk_pend_signals
@@ -624,11 +374,11 @@ resume_kernel_mode:
        ; Can't preempt if preemption disabled
        GET_CURR_THR_INFO_FROM_SP   r10
        ld  r8, [r10, THREAD_INFO_PREEMPT_COUNT]
-       brne  r8, 0, restore_regs
+       brne  r8, 0, .Lrestore_regs
 
        ; check if this task's NEED_RESCHED flag set
        ld  r9, [r10, THREAD_INFO_FLAGS]
-       bbit0  r9, TIF_NEED_RESCHED, restore_regs
+       bbit0  r9, TIF_NEED_RESCHED, .Lrestore_regs
 
        ; Invoke PREEMPTION
        bl      preempt_schedule_irq
@@ -636,142 +386,7 @@ resume_kernel_mode:
        ; preempt_schedule_irq() always returns with IRQ disabled
 #endif
 
-       ; fall through
-
-;############# Return from Intr/Excp/Trap (ARC Specifics) ##############
-;
-; Restore the saved sys context (common exit-path for EXCPN/IRQ/Trap)
-; IRQ shd definitely not happen between now and rtie
-; All 2 entry points to here already disable interrupts
-
-restore_regs :
-
-       TRACE_ASM_IRQ_ENABLE
-
-       lr      r10, [status32]
-
-       ; Restore REG File. In case multiple Events outstanding,
-       ; use the same priorty as rtie: EXCPN, L2 IRQ, L1 IRQ, None
-       ; Note that we use realtime STATUS32 (not pt_regs->status32) to
-       ; decide that.
-
-       ; if Returning from Exception
-       bbit0  r10, STATUS_AE_BIT, not_exception
-       RESTORE_ALL_SYS
-       rtie
-
-       ; Not Exception so maybe Interrupts (Level 1 or 2)
-
-not_exception:
-
-#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS
-
-       ; Level 2 interrupt return Path - from hardware standpoint
-       bbit0  r10, STATUS_A2_BIT, not_level2_interrupt
-
-       ;------------------------------------------------------------------
-       ; However the context returning might not have taken L2 intr itself
-       ; e.g. Task'A' user-code -> L2 intr -> schedule -> 'B' user-code ret
-       ; Special considerations needed for the context which took L2 intr
-
-       ld   r9, [sp, PT_event]        ; Ensure this is L2 intr context
-       brne r9, event_IRQ2, 149f
-
-       ;------------------------------------------------------------------
-       ; if L2 IRQ interrupted an L1 ISR,  we'd disabled preemption earlier
-       ; so that sched doesn't move to new task, causing L1 to be delayed
-       ; undeterministically. Now that we've achieved that, let's reset
-       ; things to what they were, before returning from L2 context
-       ;----------------------------------------------------------------
-
-       ld r9, [sp, PT_status32]       ; get statu32_l2 (saved in pt_regs)
-       bbit0 r9, STATUS_A1_BIT, 149f  ; L1 not active when L2 IRQ, so normal
-
-       ; decrement thread_info->preempt_count (re-enable preemption)
-       GET_CURR_THR_INFO_FROM_SP   r10
-       ld      r9, [r10, THREAD_INFO_PREEMPT_COUNT]
-
-       ; paranoid check, given A1 was active when A2 happened, preempt count
-       ; must not be 0 because we would have incremented it.
-       ; If this does happen we simply HALT as it means a BUG !!!
-       cmp     r9, 0
-       bnz     2f
-       flag 1
-
-2:
-       sub     r9, r9, 1
-       st      r9, [r10, THREAD_INFO_PREEMPT_COUNT]
-
-149:
-       ;return from level 2
-       RESTORE_ALL_INT2
-debug_marker_l2:
-       rtie
-
-not_level2_interrupt:
-
-#endif
-
-       bbit0  r10, STATUS_A1_BIT, not_level1_interrupt
+       b       .Lrestore_regs
 
-       ;return from level 1
+##### DONT ADD CODE HERE - .Lrestore_regs actually follows in entry-<isa>.S
 
-       RESTORE_ALL_INT1
-debug_marker_l1:
-       rtie
-
-not_level1_interrupt:
-
-       ;this case is for syscalls or Exceptions (with fake rtie)
-
-       RESTORE_ALL_SYS
-debug_marker_syscall:
-       rtie
-
-END(ret_from_exception)
-
-ENTRY(ret_from_fork)
-       ; when the forked child comes here from the __switch_to function
-       ; r0 has the last task pointer.
-       ; put last task in scheduler queue
-       bl   @schedule_tail
-
-       ld   r9, [sp, PT_status32]
-       brne r9, 0, 1f
-
-       jl.d [r14]              ; kernel thread entry point
-       mov  r0, r13            ; (see PF_KTHREAD block in copy_thread)
-
-1:
-       ; Return to user space
-       ; 1. Any forked task (Reach here via BRne above)
-       ; 2. First ever init task (Reach here via return from JL above)
-       ;    This is the historic "kernel_execve" use-case, to return to init
-       ;    user mode, in a round about way since that is always done from
-       ;    a kernel thread which is executed via JL above but always returns
-       ;    out whenever kernel_execve (now inline do_fork()) is involved
-       b    ret_from_exception
-END(ret_from_fork)
-
-;################### Special Sys Call Wrappers ##########################
-
-ENTRY(sys_clone_wrapper)
-       SAVE_CALLEE_SAVED_USER
-       bl  @sys_clone
-       DISCARD_CALLEE_SAVED_USER
-
-       GET_CURR_THR_INFO_FLAGS   r10
-       btst r10, TIF_SYSCALL_TRACE
-       bnz  tracesys_exit
-
-       b ret_from_system_call
-END(sys_clone_wrapper)
-
-#ifdef CONFIG_ARC_DW2_UNWIND
-; Workaround for bug 94179 (STAR ):
-; Despite -fasynchronous-unwind-tables, linker is not making dwarf2 unwinder
-; section (.debug_frame) as loadable. So we force it here.
-; This also fixes STAR 9000487933 where the prev-workaround (objcopy --setflag)
-; would not work after a clean build due to kernel build system dependencies.
-.section .debug_frame, "wa",@progbits
-#endif
index b0e8666fdccc755ac11763a3bf0b6594f4ed628a..812f95e6ae6946d56550cbd80a0baaa8a6e94cc0 100644 (file)
@@ -49,8 +49,6 @@
 1:
 .endm
 
-       .cpu A7
-
        .section .init.text, "ax",@progbits
        .type stext, @function
        .globl stext
@@ -83,6 +81,7 @@ stext:
        st.ab   0, [r5, 4]
 1:
 
+#ifdef CONFIG_ARC_UBOOT_SUPPORT
        ; Uboot - kernel ABI
        ;    r0 = [0] No uboot interaction, [1] cmdline in r2, [2] DTB in r2
        ;    r1 = magic number (board identity, unused as of now
@@ -90,6 +89,7 @@ stext:
        ; These are handled later in setup_arch()
        st      r0, [@uboot_tag]
        st      r2, [@uboot_arg]
+#endif
 
        ; setup "current" tsk and optionally cache it in dedicated r25
        mov     r9, @init_task
diff --git a/arch/arc/kernel/intc-arcv2.c b/arch/arc/kernel/intc-arcv2.c
new file mode 100644 (file)
index 0000000..6208c63
--- /dev/null
@@ -0,0 +1,143 @@
+/*
+ * Copyright (C) 2014 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/irqdomain.h>
+#include <linux/irqchip.h>
+#include "../../drivers/irqchip/irqchip.h"
+#include <asm/irq.h>
+
+/*
+ * Early Hardware specific Interrupt setup
+ * -Called very early (start_kernel -> setup_arch -> setup_processor)
+ * -Platform Independent (must for any ARC Core)
+ * -Needed for each CPU (hence not foldable into init_IRQ)
+ */
+void arc_init_IRQ(void)
+{
+       unsigned int tmp;
+
+       struct aux_irq_ctrl {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+               unsigned int res3:18, save_idx_regs:1, res2:1,
+                            save_u_to_u:1, save_lp_regs:1, save_blink:1,
+                            res:4, save_nr_gpr_pairs:5;
+#else
+               unsigned int save_nr_gpr_pairs:5, res:4,
+                            save_blink:1, save_lp_regs:1, save_u_to_u:1,
+                            res2:1, save_idx_regs:1, res3:18;
+#endif
+       } ictrl;
+
+       *(unsigned int *)&ictrl = 0;
+
+       ictrl.save_nr_gpr_pairs = 6;    /* r0 to r11 (r12 saved manually) */
+       ictrl.save_blink = 1;
+       ictrl.save_lp_regs = 1;         /* LP_COUNT, LP_START, LP_END */
+       ictrl.save_u_to_u = 0;          /* user ctxt saved on kernel stack */
+       ictrl.save_idx_regs = 1;        /* JLI, LDI, EI */
+
+       WRITE_AUX(AUX_IRQ_CTRL, ictrl);
+
+       /* setup status32, don't enable intr yet as kernel doesn't want */
+       tmp = read_aux_reg(0xa);
+       tmp |= ISA_INIT_STATUS_BITS;
+       tmp &= ~STATUS_IE_MASK;
+       asm volatile("flag %0   \n"::"r"(tmp));
+
+       /*
+        * ARCv2 core intc provides multiple interrupt priorities (upto 16).
+        * Typical builds though have only two levels (0-high, 1-low)
+        * Linux by default uses lower prio 1 for most irqs, reserving 0 for
+        * NMI style interrupts in future (say perf)
+        *
+        * Read the intc BCR to confirm that Linux default priority is avail
+        * in h/w
+        *
+        * Note:
+        *  IRQ_BCR[27..24] contains N-1 (for N priority levels) and prio level
+        *  is 0 based.
+        */
+       tmp = (read_aux_reg(ARC_REG_IRQ_BCR) >> 24 ) & 0xF;
+       if (ARCV2_IRQ_DEF_PRIO > tmp)
+               panic("Linux default irq prio incorrect\n");
+}
+
+static void arcv2_irq_mask(struct irq_data *data)
+{
+       write_aux_reg(AUX_IRQ_SELECT, data->irq);
+       write_aux_reg(AUX_IRQ_ENABLE, 0);
+}
+
+static void arcv2_irq_unmask(struct irq_data *data)
+{
+       write_aux_reg(AUX_IRQ_SELECT, data->irq);
+       write_aux_reg(AUX_IRQ_ENABLE, 1);
+}
+
+void arcv2_irq_enable(struct irq_data *data)
+{
+       /* set default priority */
+       write_aux_reg(AUX_IRQ_SELECT, data->irq);
+       write_aux_reg(AUX_IRQ_PRIORITY, ARCV2_IRQ_DEF_PRIO);
+
+       /*
+        * hw auto enables (linux unmask) all by default
+        * So no need to do IRQ_ENABLE here
+        * XXX: However OSCI LAN need it
+        */
+       write_aux_reg(AUX_IRQ_ENABLE, 1);
+}
+
+static struct irq_chip arcv2_irq_chip = {
+       .name           = "ARCv2 core Intc",
+       .irq_mask       = arcv2_irq_mask,
+       .irq_unmask     = arcv2_irq_unmask,
+       .irq_enable     = arcv2_irq_enable
+};
+
+static int arcv2_irq_map(struct irq_domain *d, unsigned int irq,
+                        irq_hw_number_t hw)
+{
+       if (irq == TIMER0_IRQ || irq == IPI_IRQ)
+               irq_set_chip_and_handler(irq, &arcv2_irq_chip, handle_percpu_irq);
+       else
+               irq_set_chip_and_handler(irq, &arcv2_irq_chip, handle_level_irq);
+
+       return 0;
+}
+
+static const struct irq_domain_ops arcv2_irq_ops = {
+       .xlate = irq_domain_xlate_onecell,
+       .map = arcv2_irq_map,
+};
+
+static struct irq_domain *root_domain;
+
+static int __init
+init_onchip_IRQ(struct device_node *intc, struct device_node *parent)
+{
+       if (parent)
+               panic("DeviceTree incore intc not a root irq controller\n");
+
+       root_domain = irq_domain_add_legacy(intc, NR_CPU_IRQS, 0, 0,
+                                           &arcv2_irq_ops, NULL);
+
+       if (!root_domain)
+               panic("root irq domain not avail\n");
+
+       /* with this we don't need to export root_domain */
+       irq_set_default_host(root_domain);
+
+       return 0;
+}
+
+IRQCHIP_DECLARE(arc_intc, "snps,archs-intc", init_onchip_IRQ);
diff --git a/arch/arc/kernel/intc-compact.c b/arch/arc/kernel/intc-compact.c
new file mode 100644 (file)
index 0000000..fcdddb6
--- /dev/null
@@ -0,0 +1,226 @@
+/*
+ * Copyright (C) 2011-12 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/irqdomain.h>
+#include <linux/irqchip.h>
+#include "../../drivers/irqchip/irqchip.h"
+#include <asm/irq.h>
+
+/*
+ * Early Hardware specific Interrupt setup
+ * -Platform independent, needed for each CPU (not foldable into init_IRQ)
+ * -Called very early (start_kernel -> setup_arch -> setup_processor)
+ *
+ * what it does ?
+ * -Optionally, setup the High priority Interrupts as Level 2 IRQs
+ */
+void arc_init_IRQ(void)
+{
+       int level_mask = 0;
+
+       /* setup any high priority Interrupts (Level2 in ARCompact jargon) */
+       level_mask |= IS_ENABLED(CONFIG_ARC_IRQ3_LV2) << 3;
+       level_mask |= IS_ENABLED(CONFIG_ARC_IRQ5_LV2) << 5;
+       level_mask |= IS_ENABLED(CONFIG_ARC_IRQ6_LV2) << 6;
+
+       /*
+        * Write to register, even if no LV2 IRQs configured to reset it
+        * in case bootloader had mucked with it
+        */
+       write_aux_reg(AUX_IRQ_LEV, level_mask);
+
+       if (level_mask)
+               pr_info("Level-2 interrupts bitset %x\n", level_mask);
+}
+
+/*
+ * ARC700 core includes a simple on-chip intc supporting
+ * -per IRQ enable/disable
+ * -2 levels of interrupts (high/low)
+ * -all interrupts being level triggered
+ *
+ * To reduce platform code, we assume all IRQs directly hooked-up into intc.
+ * Platforms with external intc, hence cascaded IRQs, are free to over-ride
+ * below, per IRQ.
+ */
+
+static void arc_irq_mask(struct irq_data *data)
+{
+       unsigned int ienb;
+
+       ienb = read_aux_reg(AUX_IENABLE);
+       ienb &= ~(1 << data->irq);
+       write_aux_reg(AUX_IENABLE, ienb);
+}
+
+static void arc_irq_unmask(struct irq_data *data)
+{
+       unsigned int ienb;
+
+       ienb = read_aux_reg(AUX_IENABLE);
+       ienb |= (1 << data->irq);
+       write_aux_reg(AUX_IENABLE, ienb);
+}
+
+static struct irq_chip onchip_intc = {
+       .name           = "ARC In-core Intc",
+       .irq_mask       = arc_irq_mask,
+       .irq_unmask     = arc_irq_unmask,
+};
+
+static int arc_intc_domain_map(struct irq_domain *d, unsigned int irq,
+                              irq_hw_number_t hw)
+{
+       /*
+        * XXX: the IPI IRQ needs to be handled like TIMER too. However ARC core
+        *      code doesn't own it (like TIMER0). ISS IDU / ezchip define it
+        *      in platform header which can't be included here as it goes
+        *      against multi-platform image philisophy
+        */
+       if (irq == TIMER0_IRQ)
+               irq_set_chip_and_handler(irq, &onchip_intc, handle_percpu_irq);
+       else
+               irq_set_chip_and_handler(irq, &onchip_intc, handle_level_irq);
+
+       return 0;
+}
+
+static const struct irq_domain_ops arc_intc_domain_ops = {
+       .xlate = irq_domain_xlate_onecell,
+       .map = arc_intc_domain_map,
+};
+
+static struct irq_domain *root_domain;
+
+static int __init
+init_onchip_IRQ(struct device_node *intc, struct device_node *parent)
+{
+       if (parent)
+               panic("DeviceTree incore intc not a root irq controller\n");
+
+       root_domain = irq_domain_add_legacy(intc, NR_CPU_IRQS, 0, 0,
+                                           &arc_intc_domain_ops, NULL);
+
+       if (!root_domain)
+               panic("root irq domain not avail\n");
+
+       /* with this we don't need to export root_domain */
+       irq_set_default_host(root_domain);
+
+       return 0;
+}
+
+IRQCHIP_DECLARE(arc_intc, "snps,arc700-intc", init_onchip_IRQ);
+
+/*
+ * arch_local_irq_enable - Enable interrupts.
+ *
+ * 1. Explicitly called to re-enable interrupts
+ * 2. Implicitly called from spin_unlock_irq, write_unlock_irq etc
+ *    which maybe in hard ISR itself
+ *
+ * Semantics of this function change depending on where it is called from:
+ *
+ * -If called from hard-ISR, it must not invert interrupt priorities
+ *  e.g. suppose TIMER is high priority (Level 2) IRQ
+ *    Time hard-ISR, timer_interrupt( ) calls spin_unlock_irq several times.
+ *    Here local_irq_enable( ) shd not re-enable lower priority interrupts
+ * -If called from soft-ISR, it must re-enable all interrupts
+ *    soft ISR are low prioity jobs which can be very slow, thus all IRQs
+ *    must be enabled while they run.
+ *    Now hardware context wise we may still be in L2 ISR (not done rtie)
+ *    still we must re-enable both L1 and L2 IRQs
+ *  Another twist is prev scenario with flow being
+ *     L1 ISR ==> interrupted by L2 ISR  ==> L2 soft ISR
+ *     here we must not re-enable Ll as prev Ll Interrupt's h/w context will get
+ *     over-written (this is deficiency in ARC700 Interrupt mechanism)
+ */
+
+#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS   /* Complex version for 2 IRQ levels */
+
+void arch_local_irq_enable(void)
+{
+
+       unsigned long flags = arch_local_save_flags();
+
+       /* Allow both L1 and L2 at the onset */
+       flags |= (STATUS_E1_MASK | STATUS_E2_MASK);
+
+       /* Called from hard ISR (between irq_enter and irq_exit) */
+       if (in_irq()) {
+
+               /* If in L2 ISR, don't re-enable any further IRQs as this can
+                * cause IRQ priorities to get upside down. e.g. it could allow
+                * L1 be taken while in L2 hard ISR which is wrong not only in
+                * theory, it can also cause the dreaded L1-L2-L1 scenario
+                */
+               if (flags & STATUS_A2_MASK)
+                       flags &= ~(STATUS_E1_MASK | STATUS_E2_MASK);
+
+               /* Even if in L1 ISR, allowe Higher prio L2 IRQs */
+               else if (flags & STATUS_A1_MASK)
+                       flags &= ~(STATUS_E1_MASK);
+       }
+
+       /* called from soft IRQ, ideally we want to re-enable all levels */
+
+       else if (in_softirq()) {
+
+               /* However if this is case of L1 interrupted by L2,
+                * re-enabling both may cause whaco L1-L2-L1 scenario
+                * because ARC700 allows level 1 to interrupt an active L2 ISR
+                * Thus we disable both
+                * However some code, executing in soft ISR wants some IRQs
+                * to be enabled so we re-enable L2 only
+                *
+                * How do we determine L1 intr by L2
+                *  -A2 is set (means in L2 ISR)
+                *  -E1 is set in this ISR's pt_regs->status32 which is
+                *      saved copy of status32_l2 when l2 ISR happened
+                */
+               struct pt_regs *pt = get_irq_regs();
+
+               if ((flags & STATUS_A2_MASK) && pt &&
+                   (pt->status32 & STATUS_A1_MASK)) {
+                       /*flags &= ~(STATUS_E1_MASK | STATUS_E2_MASK); */
+                       flags &= ~(STATUS_E1_MASK);
+               }
+       }
+
+       arch_local_irq_restore(flags);
+}
+
+#else /* ! CONFIG_ARC_COMPACT_IRQ_LEVELS */
+
+/*
+ * Simpler version for only 1 level of interrupt
+ * Here we only Worry about Level 1 Bits
+ */
+void arch_local_irq_enable(void)
+{
+       unsigned long flags;
+
+       /*
+        * ARC IDE Drivers tries to re-enable interrupts from hard-isr
+        * context which is simply wrong
+        */
+       if (in_irq()) {
+               WARN_ONCE(1, "IRQ enabled from hard-isr");
+               return;
+       }
+
+       flags = arch_local_save_flags();
+       flags |= (STATUS_E1_MASK | STATUS_E2_MASK);
+       arch_local_irq_restore(flags);
+}
+#endif
+EXPORT_SYMBOL(arch_local_irq_enable);
index 620ec2fe32a94f855c6e3db0a04fa25c41d5d1c2..2989a7bcf8a863709734d7f5343bb16c789089f2 100644 (file)
@@ -8,115 +8,9 @@
  */
 
 #include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/of.h>
-#include <linux/irqdomain.h>
 #include <linux/irqchip.h>
-#include "../../drivers/irqchip/irqchip.h"
-#include <asm/sections.h>
-#include <asm/irq.h>
 #include <asm/mach_desc.h>
 
-/*
- * Early Hardware specific Interrupt setup
- * -Platform independent, needed for each CPU (not foldable into init_IRQ)
- * -Called very early (start_kernel -> setup_arch -> setup_processor)
- *
- * what it does ?
- * -Optionally, setup the High priority Interrupts as Level 2 IRQs
- */
-void arc_init_IRQ(void)
-{
-       int level_mask = 0;
-
-       /* setup any high priority Interrupts (Level2 in ARCompact jargon) */
-       level_mask |= IS_ENABLED(CONFIG_ARC_IRQ3_LV2) << 3;
-       level_mask |= IS_ENABLED(CONFIG_ARC_IRQ5_LV2) << 5;
-       level_mask |= IS_ENABLED(CONFIG_ARC_IRQ6_LV2) << 6;
-
-       /*
-        * Write to register, even if no LV2 IRQs configured to reset it
-        * in case bootloader had mucked with it
-        */
-       write_aux_reg(AUX_IRQ_LEV, level_mask);
-
-       if (level_mask)
-               pr_info("Level-2 interrupts bitset %x\n", level_mask);
-}
-
-/*
- * ARC700 core includes a simple on-chip intc supporting
- * -per IRQ enable/disable
- * -2 levels of interrupts (high/low)
- * -all interrupts being level triggered
- *
- * To reduce platform code, we assume all IRQs directly hooked-up into intc.
- * Platforms with external intc, hence cascaded IRQs, are free to over-ride
- * below, per IRQ.
- */
-
-static void arc_irq_mask(struct irq_data *data)
-{
-       unsigned int ienb;
-
-       ienb = read_aux_reg(AUX_IENABLE);
-       ienb &= ~(1 << data->irq);
-       write_aux_reg(AUX_IENABLE, ienb);
-}
-
-static void arc_irq_unmask(struct irq_data *data)
-{
-       unsigned int ienb;
-
-       ienb = read_aux_reg(AUX_IENABLE);
-       ienb |= (1 << data->irq);
-       write_aux_reg(AUX_IENABLE, ienb);
-}
-
-static struct irq_chip onchip_intc = {
-       .name           = "ARC In-core Intc",
-       .irq_mask       = arc_irq_mask,
-       .irq_unmask     = arc_irq_unmask,
-};
-
-static int arc_intc_domain_map(struct irq_domain *d, unsigned int irq,
-                               irq_hw_number_t hw)
-{
-       if (irq == TIMER0_IRQ)
-               irq_set_chip_and_handler(irq, &onchip_intc, handle_percpu_irq);
-       else
-               irq_set_chip_and_handler(irq, &onchip_intc, handle_level_irq);
-
-       return 0;
-}
-
-static const struct irq_domain_ops arc_intc_domain_ops = {
-       .xlate = irq_domain_xlate_onecell,
-       .map = arc_intc_domain_map,
-};
-
-static struct irq_domain *root_domain;
-
-static int __init
-init_onchip_IRQ(struct device_node *intc, struct device_node *parent)
-{
-       if (parent)
-               panic("DeviceTree incore intc not a root irq controller\n");
-
-       root_domain = irq_domain_add_legacy(intc, NR_CPU_IRQS, 0, 0,
-                                           &arc_intc_domain_ops, NULL);
-
-       if (!root_domain)
-               panic("root irq domain not avail\n");
-
-       /* with this we don't need to export root_domain */
-       irq_set_default_host(root_domain);
-
-       return 0;
-}
-
-IRQCHIP_DECLARE(arc_intc, "snps,arc700-intc", init_onchip_IRQ);
-
 /*
  * Late Interrupt system init called from start_kernel for Boot CPU only
  *
@@ -178,107 +72,3 @@ void arc_request_percpu_irq(int irq, int cpu,
 
        enable_percpu_irq(irq, 0);
 }
-
-/*
- * arch_local_irq_enable - Enable interrupts.
- *
- * 1. Explicitly called to re-enable interrupts
- * 2. Implicitly called from spin_unlock_irq, write_unlock_irq etc
- *    which maybe in hard ISR itself
- *
- * Semantics of this function change depending on where it is called from:
- *
- * -If called from hard-ISR, it must not invert interrupt priorities
- *  e.g. suppose TIMER is high priority (Level 2) IRQ
- *    Time hard-ISR, timer_interrupt( ) calls spin_unlock_irq several times.
- *    Here local_irq_enable( ) shd not re-enable lower priority interrupts
- * -If called from soft-ISR, it must re-enable all interrupts
- *    soft ISR are low prioity jobs which can be very slow, thus all IRQs
- *    must be enabled while they run.
- *    Now hardware context wise we may still be in L2 ISR (not done rtie)
- *    still we must re-enable both L1 and L2 IRQs
- *  Another twist is prev scenario with flow being
- *     L1 ISR ==> interrupted by L2 ISR  ==> L2 soft ISR
- *     here we must not re-enable Ll as prev Ll Interrupt's h/w context will get
- *     over-written (this is deficiency in ARC700 Interrupt mechanism)
- */
-
-#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS   /* Complex version for 2 IRQ levels */
-
-void arch_local_irq_enable(void)
-{
-
-       unsigned long flags;
-       flags = arch_local_save_flags();
-
-       /* Allow both L1 and L2 at the onset */
-       flags |= (STATUS_E1_MASK | STATUS_E2_MASK);
-
-       /* Called from hard ISR (between irq_enter and irq_exit) */
-       if (in_irq()) {
-
-               /* If in L2 ISR, don't re-enable any further IRQs as this can
-                * cause IRQ priorities to get upside down. e.g. it could allow
-                * L1 be taken while in L2 hard ISR which is wrong not only in
-                * theory, it can also cause the dreaded L1-L2-L1 scenario
-                */
-               if (flags & STATUS_A2_MASK)
-                       flags &= ~(STATUS_E1_MASK | STATUS_E2_MASK);
-
-               /* Even if in L1 ISR, allowe Higher prio L2 IRQs */
-               else if (flags & STATUS_A1_MASK)
-                       flags &= ~(STATUS_E1_MASK);
-       }
-
-       /* called from soft IRQ, ideally we want to re-enable all levels */
-
-       else if (in_softirq()) {
-
-               /* However if this is case of L1 interrupted by L2,
-                * re-enabling both may cause whaco L1-L2-L1 scenario
-                * because ARC700 allows level 1 to interrupt an active L2 ISR
-                * Thus we disable both
-                * However some code, executing in soft ISR wants some IRQs
-                * to be enabled so we re-enable L2 only
-                *
-                * How do we determine L1 intr by L2
-                *  -A2 is set (means in L2 ISR)
-                *  -E1 is set in this ISR's pt_regs->status32 which is
-                *      saved copy of status32_l2 when l2 ISR happened
-                */
-               struct pt_regs *pt = get_irq_regs();
-               if ((flags & STATUS_A2_MASK) && pt &&
-                   (pt->status32 & STATUS_A1_MASK)) {
-                       /*flags &= ~(STATUS_E1_MASK | STATUS_E2_MASK); */
-                       flags &= ~(STATUS_E1_MASK);
-               }
-       }
-
-       arch_local_irq_restore(flags);
-}
-
-#else /* ! CONFIG_ARC_COMPACT_IRQ_LEVELS */
-
-/*
- * Simpler version for only 1 level of interrupt
- * Here we only Worry about Level 1 Bits
- */
-void arch_local_irq_enable(void)
-{
-       unsigned long flags;
-
-       /*
-        * ARC IDE Drivers tries to re-enable interrupts from hard-isr
-        * context which is simply wrong
-        */
-       if (in_irq()) {
-               WARN_ONCE(1, "IRQ enabled from hard-isr");
-               return;
-       }
-
-       flags = arch_local_save_flags();
-       flags |= (STATUS_E1_MASK | STATUS_E2_MASK);
-       arch_local_irq_restore(flags);
-}
-#endif
-EXPORT_SYMBOL(arch_local_irq_enable);
diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c
new file mode 100644 (file)
index 0000000..30284e8
--- /dev/null
@@ -0,0 +1,341 @@
+/*
+ * ARC ARConnect (MultiCore IP) support (formerly known as MCIP)
+ *
+ * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/smp.h>
+#include <linux/irq.h>
+#include <linux/spinlock.h>
+#include <asm/mcip.h>
+
+static char smp_cpuinfo_buf[128];
+static int idu_detected;
+
+static DEFINE_RAW_SPINLOCK(mcip_lock);
+
+/*
+ * Any SMP specific init any CPU does when it comes up.
+ * Here we setup the CPU to enable Inter-Processor-Interrupts
+ * Called for each CPU
+ * -Master      : init_IRQ()
+ * -Other(s)    : start_kernel_secondary()
+ */
+void mcip_init_smp(unsigned int cpu)
+{
+       smp_ipi_irq_setup(cpu, IPI_IRQ);
+}
+
+static void mcip_ipi_send(int cpu)
+{
+       unsigned long flags;
+       int ipi_was_pending;
+
+       /*
+        * NOTE: We must spin here if the other cpu hasn't yet
+        * serviced a previous message. This can burn lots
+        * of time, but we MUST follows this protocol or
+        * ipi messages can be lost!!!
+        * Also, we must release the lock in this loop because
+        * the other side may get to this same loop and not
+        * be able to ack -- thus causing deadlock.
+        */
+
+       do {
+               raw_spin_lock_irqsave(&mcip_lock, flags);
+               __mcip_cmd(CMD_INTRPT_READ_STATUS, cpu);
+               ipi_was_pending = read_aux_reg(ARC_REG_MCIP_READBACK);
+               if (ipi_was_pending == 0)
+                       break; /* break out but keep lock */
+               raw_spin_unlock_irqrestore(&mcip_lock, flags);
+       } while (1);
+
+       __mcip_cmd(CMD_INTRPT_GENERATE_IRQ, cpu);
+       raw_spin_unlock_irqrestore(&mcip_lock, flags);
+
+#ifdef CONFIG_ARC_IPI_DBG
+       if (ipi_was_pending)
+               pr_info("IPI ACK delayed from cpu %d\n", cpu);
+#endif
+}
+
+static void mcip_ipi_clear(int irq)
+{
+       unsigned int cpu, c;
+       unsigned long flags;
+       unsigned int __maybe_unused copy;
+
+       raw_spin_lock_irqsave(&mcip_lock, flags);
+
+       /* Who sent the IPI */
+       __mcip_cmd(CMD_INTRPT_CHECK_SOURCE, 0);
+
+       copy = cpu = read_aux_reg(ARC_REG_MCIP_READBACK);       /* 1,2,4,8... */
+
+       /*
+        * In rare case, multiple concurrent IPIs sent to same target can
+        * possibly be coalesced by MCIP into 1 asserted IRQ, so @cpus can be
+        * "vectored" (multiple bits sets) as opposed to typical single bit
+        */
+       do {
+               c = __ffs(cpu);                 /* 0,1,2,3 */
+               __mcip_cmd(CMD_INTRPT_GENERATE_ACK, c);
+               cpu &= ~(1U << c);
+       } while (cpu);
+
+       raw_spin_unlock_irqrestore(&mcip_lock, flags);
+
+#ifdef CONFIG_ARC_IPI_DBG
+       if (c != __ffs(copy))
+               pr_info("IPIs from %x coalesced to %x\n",
+                       copy, raw_smp_processor_id());
+#endif
+}
+
+volatile int wake_flag;
+
+static void mcip_wakeup_cpu(int cpu, unsigned long pc)
+{
+       BUG_ON(cpu == 0);
+       wake_flag = cpu;
+}
+
+void arc_platform_smp_wait_to_boot(int cpu)
+{
+       while (wake_flag != cpu)
+               ;
+
+       wake_flag = 0;
+       __asm__ __volatile__("j @first_lines_of_secondary       \n");
+}
+
+struct plat_smp_ops plat_smp_ops = {
+       .info           = smp_cpuinfo_buf,
+       .cpu_kick       = mcip_wakeup_cpu,
+       .ipi_send       = mcip_ipi_send,
+       .ipi_clear      = mcip_ipi_clear,
+};
+
+void mcip_init_early_smp(void)
+{
+#define IS_AVAIL1(var, str)    ((var) ? str : "")
+
+       struct mcip_bcr {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+               unsigned int pad3:8,
+                            idu:1, llm:1, num_cores:6,
+                            iocoh:1,  grtc:1, dbg:1, pad2:1,
+                            msg:1, sem:1, ipi:1, pad:1,
+                            ver:8;
+#else
+               unsigned int ver:8,
+                            pad:1, ipi:1, sem:1, msg:1,
+                            pad2:1, dbg:1, grtc:1, iocoh:1,
+                            num_cores:6, llm:1, idu:1,
+                            pad3:8;
+#endif
+       } mp;
+
+       READ_BCR(ARC_REG_MCIP_BCR, mp);
+
+       sprintf(smp_cpuinfo_buf,
+               "Extn [SMP]\t: ARConnect (v%d): %d cores with %s%s%s%s\n",
+               mp.ver, mp.num_cores,
+               IS_AVAIL1(mp.ipi, "IPI "),
+               IS_AVAIL1(mp.idu, "IDU "),
+               IS_AVAIL1(mp.dbg, "DEBUG "),
+               IS_AVAIL1(mp.grtc, "GRTC"));
+
+       idu_detected = mp.idu;
+
+       if (mp.dbg) {
+               __mcip_cmd_data(CMD_DEBUG_SET_SELECT, 0, 0xf);
+               __mcip_cmd_data(CMD_DEBUG_SET_MASK, 0xf, 0xf);
+       }
+
+       if (IS_ENABLED(CONFIG_ARC_HAS_GRTC) && !mp.grtc)
+               panic("kernel trying to use non-existent GRTC\n");
+}
+
+/***************************************************************************
+ * ARCv2 Interrupt Distribution Unit (IDU)
+ *
+ * Connects external "COMMON" IRQs to core intc, providing:
+ *  -dynamic routing (IRQ affinity)
+ *  -load balancing (Round Robin interrupt distribution)
+ *  -1:N distribution
+ *
+ * It physically resides in the MCIP hw block
+ */
+
+#include <linux/irqchip.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include "../../drivers/irqchip/irqchip.h"
+
+/*
+ * Set the DEST for @cmn_irq to @cpu_mask (1 bit per core)
+ */
+static void idu_set_dest(unsigned int cmn_irq, unsigned int cpu_mask)
+{
+       __mcip_cmd_data(CMD_IDU_SET_DEST, cmn_irq, cpu_mask);
+}
+
+static void idu_set_mode(unsigned int cmn_irq, unsigned int lvl,
+                          unsigned int distr)
+{
+       union {
+               unsigned int word;
+               struct {
+                       unsigned int distr:2, pad:2, lvl:1, pad2:27;
+               };
+       } data;
+
+       data.distr = distr;
+       data.lvl = lvl;
+       __mcip_cmd_data(CMD_IDU_SET_MODE, cmn_irq, data.word);
+}
+
+static void idu_irq_mask(struct irq_data *data)
+{
+       unsigned long flags;
+
+       raw_spin_lock_irqsave(&mcip_lock, flags);
+       __mcip_cmd_data(CMD_IDU_SET_MASK, data->hwirq, 1);
+       raw_spin_unlock_irqrestore(&mcip_lock, flags);
+}
+
+static void idu_irq_unmask(struct irq_data *data)
+{
+       unsigned long flags;
+
+       raw_spin_lock_irqsave(&mcip_lock, flags);
+       __mcip_cmd_data(CMD_IDU_SET_MASK, data->hwirq, 0);
+       raw_spin_unlock_irqrestore(&mcip_lock, flags);
+}
+
+static int
+idu_irq_set_affinity(struct irq_data *d, const struct cpumask *cpumask, bool f)
+{
+       return IRQ_SET_MASK_OK;
+}
+
+static struct irq_chip idu_irq_chip = {
+       .name                   = "MCIP IDU Intc",
+       .irq_mask               = idu_irq_mask,
+       .irq_unmask             = idu_irq_unmask,
+#ifdef CONFIG_SMP
+       .irq_set_affinity       = idu_irq_set_affinity,
+#endif
+
+};
+
+static int idu_first_irq;
+
+static void idu_cascade_isr(unsigned int core_irq, struct irq_desc *desc)
+{
+       struct irq_domain *domain = irq_desc_get_handler_data(desc);
+       unsigned int idu_irq;
+
+       idu_irq = core_irq - idu_first_irq;
+       generic_handle_irq(irq_find_mapping(domain, idu_irq));
+}
+
+static int idu_irq_map(struct irq_domain *d, unsigned int virq, irq_hw_number_t hwirq)
+{
+       irq_set_chip_and_handler(virq, &idu_irq_chip, handle_level_irq);
+       irq_set_status_flags(virq, IRQ_MOVE_PCNTXT);
+
+       return 0;
+}
+
+static int idu_irq_xlate(struct irq_domain *d, struct device_node *n,
+                        const u32 *intspec, unsigned int intsize,
+                        irq_hw_number_t *out_hwirq, unsigned int *out_type)
+{
+       irq_hw_number_t hwirq = *out_hwirq = intspec[0];
+       int distri = intspec[1];
+       unsigned long flags;
+
+       *out_type = IRQ_TYPE_NONE;
+
+       /* XXX: validate distribution scheme again online cpu mask */
+       if (distri == 0) {
+               /* 0 - Round Robin to all cpus, otherwise 1 bit per core */
+               raw_spin_lock_irqsave(&mcip_lock, flags);
+               idu_set_dest(hwirq, BIT(num_online_cpus()) - 1);
+               idu_set_mode(hwirq, IDU_M_TRIG_LEVEL, IDU_M_DISTRI_RR);
+               raw_spin_unlock_irqrestore(&mcip_lock, flags);
+       } else {
+               /*
+                * DEST based distribution for Level Triggered intr can only
+                * have 1 CPU, so generalize it to always contain 1 cpu
+                */
+               int cpu = ffs(distri);
+
+               if (cpu != fls(distri))
+                       pr_warn("IDU irq %lx distri mode set to cpu %x\n",
+                               hwirq, cpu);
+
+               raw_spin_lock_irqsave(&mcip_lock, flags);
+               idu_set_dest(hwirq, cpu);
+               idu_set_mode(hwirq, IDU_M_TRIG_LEVEL, IDU_M_DISTRI_DEST);
+               raw_spin_unlock_irqrestore(&mcip_lock, flags);
+       }
+
+       return 0;
+}
+
+static const struct irq_domain_ops idu_irq_ops = {
+       .xlate  = idu_irq_xlate,
+       .map    = idu_irq_map,
+};
+
+/*
+ * [16, 23]: Statically assigned always private-per-core (Timers, WDT, IPI)
+ * [24, 23+C]: If C > 0 then "C" common IRQs
+ * [24+C, N]: Not statically assigned, private-per-core
+ */
+
+
+static int __init
+idu_of_init(struct device_node *intc, struct device_node *parent)
+{
+       struct irq_domain *domain;
+       /* Read IDU BCR to confirm nr_irqs */
+       int nr_irqs = of_irq_count(intc);
+       int i, irq;
+
+       if (!idu_detected)
+               panic("IDU not detected, but DeviceTree using it");
+
+       pr_info("MCIP: IDU referenced from Devicetree %d irqs\n", nr_irqs);
+
+       domain = irq_domain_add_linear(intc, nr_irqs, &idu_irq_ops, NULL);
+
+       /* Parent interrupts (core-intc) are already mapped */
+
+       for (i = 0; i < nr_irqs; i++) {
+               /*
+                * Return parent uplink IRQs (towards core intc) 24,25,.....
+                * this step has been done before already
+                * however we need it to get the parent virq and set IDU handler
+                * as first level isr
+                */
+               irq = irq_of_parse_and_map(intc, i);
+               if (!i)
+                       idu_first_irq = irq;
+
+               irq_set_handler_data(irq, domain);
+               irq_set_chained_handler(irq, idu_cascade_isr);
+       }
+
+       __mcip_cmd(CMD_IDU_ENABLE, 0);
+
+       return 0;
+}
+IRQCHIP_DECLARE(arcv2_idu_intc, "snps,archs-idu-intc", idu_of_init);
index fd2ec50102f201254b1e5e51b4bbc7fa93fba95a..1287388c258ace8ef57f1030a46acb65f0fbb87e 100644 (file)
@@ -266,10 +266,9 @@ static int arc_pmu_add(struct perf_event *event, int flags)
 
 static int arc_pmu_device_probe(struct platform_device *pdev)
 {
-       struct arc_pmu *arc_pmu;
        struct arc_reg_pct_build pct_bcr;
        struct arc_reg_cc_build cc_bcr;
-       int i, j, ret;
+       int i, j;
 
        union cc_name {
                struct {
@@ -336,9 +335,7 @@ static int arc_pmu_device_probe(struct platform_device *pdev)
        /* ARC 700 PMU does not support sampling events */
        arc_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
 
-       ret = perf_pmu_register(&arc_pmu->pmu, pdev->name, PERF_TYPE_RAW);
-
-       return ret;
+       return perf_pmu_register(&arc_pmu->pmu, pdev->name, PERF_TYPE_RAW);
 }
 
 #ifdef CONFIG_OF
index e095c557afdddc3aefce744c97d5d7bb20a6f2e6..44092456776f8e9cd929b36e5083fbc4074dac9b 100644 (file)
@@ -44,7 +44,11 @@ SYSCALL_DEFINE0(arc_gettls)
 void arch_cpu_idle(void)
 {
        /* sleep, but enable all interrupts before committing */
-       __asm__("sleep 0x3");
+       if (is_isa_arcompact()) {
+               __asm__("sleep 0x3");
+       } else {
+               __asm__("sleep 0x10");
+       }
 }
 
 asmlinkage void ret_from_fork(void);
@@ -166,8 +170,7 @@ void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long usp)
         * [L] ZOL loop inhibited to begin with - cleared by a LP insn
         * Interrupts enabled
         */
-       regs->status32 = STATUS_U_MASK | STATUS_L_MASK |
-                        STATUS_E1_MASK | STATUS_E2_MASK;
+       regs->status32 = STATUS_U_MASK | STATUS_L_MASK | ISA_INIT_STATUS_BITS;
 
        /* bogus seed values for debugging */
        regs->lp_start = 0x10;
@@ -197,8 +200,11 @@ int elf_check_arch(const struct elf32_hdr *x)
 {
        unsigned int eflags;
 
-       if (x->e_machine != EM_ARCOMPACT)
+       if (x->e_machine != EM_ARC_INUSE) {
+               pr_err("ELF not built for %s ISA\n",
+                       is_isa_arcompact() ? "ARCompact":"ARCv2");
                return 0;
+       }
 
        eflags = x->e_flags;
        if ((eflags & EF_ARC_OSABI_MSK) < EF_ARC_OSABI_CURRENT) {
index 13b3ffb27a384f8c214bfe110943ba020b0c297f..4442204fe238e656886384c5133d4af85492dabb 100644 (file)
@@ -47,10 +47,47 @@ static int genregs_get(struct task_struct *target,
                        offsetof(struct user_regs_struct, LOC) + 4);
 
        REG_O_ZERO(pad);
-       REG_O_CHUNK(scratch, callee, ptregs);
+       REG_O_ONE(scratch.bta, &ptregs->bta);
+       REG_O_ONE(scratch.lp_start, &ptregs->lp_start);
+       REG_O_ONE(scratch.lp_end, &ptregs->lp_end);
+       REG_O_ONE(scratch.lp_count, &ptregs->lp_count);
+       REG_O_ONE(scratch.status32, &ptregs->status32);
+       REG_O_ONE(scratch.ret, &ptregs->ret);
+       REG_O_ONE(scratch.blink, &ptregs->blink);
+       REG_O_ONE(scratch.fp, &ptregs->fp);
+       REG_O_ONE(scratch.gp, &ptregs->r26);
+       REG_O_ONE(scratch.r12, &ptregs->r12);
+       REG_O_ONE(scratch.r11, &ptregs->r11);
+       REG_O_ONE(scratch.r10, &ptregs->r10);
+       REG_O_ONE(scratch.r9, &ptregs->r9);
+       REG_O_ONE(scratch.r8, &ptregs->r8);
+       REG_O_ONE(scratch.r7, &ptregs->r7);
+       REG_O_ONE(scratch.r6, &ptregs->r6);
+       REG_O_ONE(scratch.r5, &ptregs->r5);
+       REG_O_ONE(scratch.r4, &ptregs->r4);
+       REG_O_ONE(scratch.r3, &ptregs->r3);
+       REG_O_ONE(scratch.r2, &ptregs->r2);
+       REG_O_ONE(scratch.r1, &ptregs->r1);
+       REG_O_ONE(scratch.r0, &ptregs->r0);
+       REG_O_ONE(scratch.sp, &ptregs->sp);
+
        REG_O_ZERO(pad2);
-       REG_O_CHUNK(callee, efa, cregs);
-       REG_O_CHUNK(efa, stop_pc, &target->thread.fault_address);
+
+       REG_O_ONE(callee.r25, &cregs->r25);
+       REG_O_ONE(callee.r24, &cregs->r24);
+       REG_O_ONE(callee.r23, &cregs->r23);
+       REG_O_ONE(callee.r22, &cregs->r22);
+       REG_O_ONE(callee.r21, &cregs->r21);
+       REG_O_ONE(callee.r20, &cregs->r20);
+       REG_O_ONE(callee.r19, &cregs->r19);
+       REG_O_ONE(callee.r18, &cregs->r18);
+       REG_O_ONE(callee.r17, &cregs->r17);
+       REG_O_ONE(callee.r16, &cregs->r16);
+       REG_O_ONE(callee.r15, &cregs->r15);
+       REG_O_ONE(callee.r14, &cregs->r14);
+       REG_O_ONE(callee.r13, &cregs->r13);
+
+       REG_O_ONE(efa, &target->thread.fault_address);
 
        if (!ret) {
                if (in_brkpt_trap(ptregs)) {
@@ -97,12 +134,51 @@ static int genregs_set(struct task_struct *target,
                        offsetof(struct user_regs_struct, LOC) + 4);
 
        REG_IGNORE_ONE(pad);
-       /* TBD: disallow updates to STATUS32 etc*/
-       REG_IN_CHUNK(scratch, pad2, ptregs);    /* pt_regs[bta..sp] */
+
+       REG_IN_ONE(scratch.bta, &ptregs->bta);
+       REG_IN_ONE(scratch.lp_start, &ptregs->lp_start);
+       REG_IN_ONE(scratch.lp_end, &ptregs->lp_end);
+       REG_IN_ONE(scratch.lp_count, &ptregs->lp_count);
+
+       REG_IGNORE_ONE(scratch.status32);
+
+       REG_IN_ONE(scratch.ret, &ptregs->ret);
+       REG_IN_ONE(scratch.blink, &ptregs->blink);
+       REG_IN_ONE(scratch.fp, &ptregs->fp);
+       REG_IN_ONE(scratch.gp, &ptregs->r26);
+       REG_IN_ONE(scratch.r12, &ptregs->r12);
+       REG_IN_ONE(scratch.r11, &ptregs->r11);
+       REG_IN_ONE(scratch.r10, &ptregs->r10);
+       REG_IN_ONE(scratch.r9, &ptregs->r9);
+       REG_IN_ONE(scratch.r8, &ptregs->r8);
+       REG_IN_ONE(scratch.r7, &ptregs->r7);
+       REG_IN_ONE(scratch.r6, &ptregs->r6);
+       REG_IN_ONE(scratch.r5, &ptregs->r5);
+       REG_IN_ONE(scratch.r4, &ptregs->r4);
+       REG_IN_ONE(scratch.r3, &ptregs->r3);
+       REG_IN_ONE(scratch.r2, &ptregs->r2);
+       REG_IN_ONE(scratch.r1, &ptregs->r1);
+       REG_IN_ONE(scratch.r0, &ptregs->r0);
+       REG_IN_ONE(scratch.sp, &ptregs->sp);
+
        REG_IGNORE_ONE(pad2);
-       REG_IN_CHUNK(callee, efa, cregs);       /* callee_regs[r25..r13] */
+
+       REG_IN_ONE(callee.r25, &cregs->r25);
+       REG_IN_ONE(callee.r24, &cregs->r24);
+       REG_IN_ONE(callee.r23, &cregs->r23);
+       REG_IN_ONE(callee.r22, &cregs->r22);
+       REG_IN_ONE(callee.r21, &cregs->r21);
+       REG_IN_ONE(callee.r20, &cregs->r20);
+       REG_IN_ONE(callee.r19, &cregs->r19);
+       REG_IN_ONE(callee.r18, &cregs->r18);
+       REG_IN_ONE(callee.r17, &cregs->r17);
+       REG_IN_ONE(callee.r16, &cregs->r16);
+       REG_IN_ONE(callee.r15, &cregs->r15);
+       REG_IN_ONE(callee.r14, &cregs->r14);
+       REG_IN_ONE(callee.r13, &cregs->r13);
+
        REG_IGNORE_ONE(efa);                    /* efa update invalid */
-       REG_IGNORE_ONE(stop_pc);                        /* PC updated via @ret */
+       REG_IGNORE_ONE(stop_pc);                /* PC updated via @ret */
 
        return ret;
 }
@@ -124,7 +200,7 @@ static const struct user_regset arc_regsets[] = {
 
 static const struct user_regset_view user_arc_view = {
        .name           = UTS_MACHINE,
-       .e_machine      = EM_ARCOMPACT,
+       .e_machine      = EM_ARC_INUSE,
        .regsets        = arc_regsets,
        .n              = ARRAY_SIZE(arc_regsets)
 };
index 1d167c6df8caae8a48d59b2aa08539e9f20b51d6..a3d186211ed367bcf852718682a62f24b555f3de 100644 (file)
@@ -30,6 +30,8 @@
 
 #define FIX_PTR(x)  __asm__ __volatile__(";" : "+r"(x))
 
+unsigned int intr_to_DE_cnt;
+
 /* Part of U-boot ABI: see head.S */
 int __initdata uboot_tag;
 char __initdata *uboot_arg;
@@ -54,7 +56,7 @@ static void read_arc_build_cfg_regs(void)
        cpu->vec_base = read_aux_reg(AUX_INTR_VEC_BASE);
 
        READ_BCR(ARC_REG_D_UNCACH_BCR, uncached_space);
-       cpu->uncached_base = uncached_space.start << 24;
+       BUG_ON((uncached_space.start << 24) != ARC_UNCACHED_ADDR_SPACE);
 
        READ_BCR(ARC_REG_MUL_BCR, cpu->extn_mpy);
 
@@ -96,7 +98,7 @@ static void read_arc_build_cfg_regs(void)
        read_decode_mmu_bcr();
        read_decode_cache_bcr();
 
-       {
+       if (is_isa_arcompact()) {
                struct bcr_fp_arcompact sp, dp;
                struct bcr_bpu_arcompact bpu;
 
@@ -112,6 +114,19 @@ static void read_arc_build_cfg_regs(void)
                        cpu->bpu.num_cache = 256 << (bpu.ent - 1);
                        cpu->bpu.num_pred = 256 << (bpu.ent - 1);
                }
+       } else {
+               struct bcr_fp_arcv2 spdp;
+               struct bcr_bpu_arcv2 bpu;
+
+               READ_BCR(ARC_REG_FP_V2_BCR, spdp);
+               cpu->extn.fpu_sp = spdp.sp ? 1 : 0;
+               cpu->extn.fpu_dp = spdp.dp ? 1 : 0;
+
+               READ_BCR(ARC_REG_BPU_BCR, bpu);
+               cpu->bpu.ver = bpu.ver;
+               cpu->bpu.full = bpu.ft;
+               cpu->bpu.num_cache = 256 << bpu.bce;
+               cpu->bpu.num_pred = 2048 << bpu.pte;
        }
 
        READ_BCR(ARC_REG_AP_BCR, bcr);
@@ -131,6 +146,7 @@ static const struct cpuinfo_data arc_cpu_tbl[] = {
        { {0x30, "ARC 700"      }, 0x33},
        { {0x34, "ARC 700 R4.10"}, 0x34},
        { {0x35, "ARC 700 R4.11"}, 0x35},
+       { {0x50, "ARC HS38"     }, 0x51},
        { {0x00, NULL           } }
 };
 
@@ -149,13 +165,17 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len)
 
        FIX_PTR(cpu);
 
-       {
+       if (is_isa_arcompact()) {
                isa_nm = "ARCompact";
                be = IS_ENABLED(CONFIG_CPU_BIG_ENDIAN);
 
                atomic = cpu->isa.atomic1;
                if (!cpu->isa.ver)      /* ISA BCR absent, use Kconfig info */
                        atomic = IS_ENABLED(CONFIG_ARC_HAS_LLSC);
+       } else {
+               isa_nm = "ARCv2";
+               be = cpu->isa.be;
+               atomic = cpu->isa.atomic;
        }
 
        n += scnprintf(buf + n, len - n,
@@ -183,16 +203,34 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len)
        n += scnprintf(buf + n, len - n, "Timers\t\t: %s%s%s%s\nISA Extn\t: ",
                       IS_AVAIL1(cpu->timers.t0, "Timer0 "),
                       IS_AVAIL1(cpu->timers.t1, "Timer1 "),
-                      IS_AVAIL2(cpu->timers.rtsc, "64-bit RTSC ", CONFIG_ARC_HAS_RTSC));
+                      IS_AVAIL2(cpu->timers.rtc, "64-bit RTC ",
+                                CONFIG_ARC_HAS_RTC));
 
-       n += i = scnprintf(buf + n, len - n, "%s%s",
-                          IS_AVAIL2(atomic, "atomic ", CONFIG_ARC_HAS_LLSC));
+       n += i = scnprintf(buf + n, len - n, "%s%s%s%s%s",
+                          IS_AVAIL2(atomic, "atomic ", CONFIG_ARC_HAS_LLSC),
+                          IS_AVAIL2(cpu->isa.ldd, "ll64 ", CONFIG_ARC_HAS_LL64),
+                          IS_AVAIL1(cpu->isa.unalign, "unalign (not used)"));
 
        if (i)
                n += scnprintf(buf + n, len - n, "\n\t\t: ");
 
+       if (cpu->extn_mpy.ver) {
+               if (cpu->extn_mpy.ver <= 0x2) { /* ARCompact */
+                       n += scnprintf(buf + n, len - n, "mpy ");
+               } else {
+                       int opt = 2;    /* stock MPY/MPYH */
+
+                       if (cpu->extn_mpy.dsp)  /* OPT 7-9 */
+                               opt = cpu->extn_mpy.dsp + 6;
+
+                       n += scnprintf(buf + n, len - n, "mpy[opt %d] ", opt);
+               }
+               n += scnprintf(buf + n, len - n, "%s",
+                              IS_USED(CONFIG_ARC_HAS_HW_MPY));
+       }
+
        n += scnprintf(buf + n, len - n, "%s%s%s%s%s%s%s%s\n",
-                      IS_AVAIL1(cpu->extn_mpy.ver, "mpy "),
+                      IS_AVAIL1(cpu->isa.div_rem, "div_rem "),
                       IS_AVAIL1(cpu->extn.norm, "norm "),
                       IS_AVAIL1(cpu->extn.barrel, "barrel-shift "),
                       IS_AVAIL1(cpu->extn.swap, "swap "),
@@ -219,7 +257,7 @@ static char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len)
 
        n += scnprintf(buf + n, len - n,
                       "Vector Table\t: %#x\nUncached Base\t: %#x\n",
-                      cpu->vec_base, cpu->uncached_base);
+                      cpu->vec_base, ARC_UNCACHED_ADDR_SPACE);
 
        if (cpu->extn.fpu_sp || cpu->extn.fpu_dp)
                n += scnprintf(buf + n, len - n, "FPU\t\t: %s%s\n",
@@ -254,8 +292,8 @@ static void arc_chk_core_config(void)
        if (!cpu->timers.t1)
                panic("Timer1 is not present!\n");
 
-       if (IS_ENABLED(CONFIG_ARC_HAS_RTSC) && !cpu->timers.rtsc)
-               panic("RTSC is not present\n");
+       if (IS_ENABLED(CONFIG_ARC_HAS_RTC) && !cpu->timers.rtc)
+               panic("RTC is not present\n");
 
 #ifdef CONFIG_ARC_HAS_DCCM
        /*
@@ -323,13 +361,16 @@ static inline int is_kernel(unsigned long addr)
 
 void __init setup_arch(char **cmdline_p)
 {
+#ifdef CONFIG_ARC_UBOOT_SUPPORT
        /* make sure that uboot passed pointer to cmdline/dtb is valid */
        if (uboot_tag && is_kernel((unsigned long)uboot_arg))
                panic("Invalid uboot arg\n");
 
        /* See if u-boot passed an external Device Tree blob */
        machine_desc = setup_machine_fdt(uboot_arg);    /* uboot_tag == 2 */
-       if (!machine_desc) {
+       if (!machine_desc)
+#endif
+       {
                /* No, so try the embedded one */
                machine_desc = setup_machine_fdt(__dtb_start);
                if (!machine_desc)
index 2251fb4bbfd76c4e8ab67477302d1e215017e73d..004b7f0bc76cc58c6988547df1bb8705cd36004d 100644 (file)
@@ -67,7 +67,33 @@ stash_usr_regs(struct rt_sigframe __user *sf, struct pt_regs *regs,
               sigset_t *set)
 {
        int err;
-       err = __copy_to_user(&(sf->uc.uc_mcontext.regs.scratch), regs,
+       struct user_regs_struct uregs;
+
+       uregs.scratch.bta       = regs->bta;
+       uregs.scratch.lp_start  = regs->lp_start;
+       uregs.scratch.lp_end    = regs->lp_end;
+       uregs.scratch.lp_count  = regs->lp_count;
+       uregs.scratch.status32  = regs->status32;
+       uregs.scratch.ret       = regs->ret;
+       uregs.scratch.blink     = regs->blink;
+       uregs.scratch.fp        = regs->fp;
+       uregs.scratch.gp        = regs->r26;
+       uregs.scratch.r12       = regs->r12;
+       uregs.scratch.r11       = regs->r11;
+       uregs.scratch.r10       = regs->r10;
+       uregs.scratch.r9        = regs->r9;
+       uregs.scratch.r8        = regs->r8;
+       uregs.scratch.r7        = regs->r7;
+       uregs.scratch.r6        = regs->r6;
+       uregs.scratch.r5        = regs->r5;
+       uregs.scratch.r4        = regs->r4;
+       uregs.scratch.r3        = regs->r3;
+       uregs.scratch.r2        = regs->r2;
+       uregs.scratch.r1        = regs->r1;
+       uregs.scratch.r0        = regs->r0;
+       uregs.scratch.sp        = regs->sp;
+
+       err = __copy_to_user(&(sf->uc.uc_mcontext.regs.scratch), &uregs.scratch,
                             sizeof(sf->uc.uc_mcontext.regs.scratch));
        err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(sigset_t));
 
@@ -78,14 +104,40 @@ static int restore_usr_regs(struct pt_regs *regs, struct rt_sigframe __user *sf)
 {
        sigset_t set;
        int err;
+       struct user_regs_struct uregs;
 
        err = __copy_from_user(&set, &sf->uc.uc_sigmask, sizeof(set));
        if (!err)
                set_current_blocked(&set);
 
-       err |= __copy_from_user(regs, &(sf->uc.uc_mcontext.regs.scratch),
+       err |= __copy_from_user(&uregs.scratch,
+                               &(sf->uc.uc_mcontext.regs.scratch),
                                sizeof(sf->uc.uc_mcontext.regs.scratch));
 
+       regs->bta       = uregs.scratch.bta;
+       regs->lp_start  = uregs.scratch.lp_start;
+       regs->lp_end    = uregs.scratch.lp_end;
+       regs->lp_count  = uregs.scratch.lp_count;
+       regs->status32  = uregs.scratch.status32;
+       regs->ret       = uregs.scratch.ret;
+       regs->blink     = uregs.scratch.blink;
+       regs->fp        = uregs.scratch.fp;
+       regs->r26       = uregs.scratch.gp;
+       regs->r12       = uregs.scratch.r12;
+       regs->r11       = uregs.scratch.r11;
+       regs->r10       = uregs.scratch.r10;
+       regs->r9        = uregs.scratch.r9;
+       regs->r8        = uregs.scratch.r8;
+       regs->r7        = uregs.scratch.r7;
+       regs->r6        = uregs.scratch.r6;
+       regs->r5        = uregs.scratch.r5;
+       regs->r4        = uregs.scratch.r4;
+       regs->r3        = uregs.scratch.r3;
+       regs->r2        = uregs.scratch.r2;
+       regs->r1        = uregs.scratch.r1;
+       regs->r0        = uregs.scratch.r0;
+       regs->sp        = uregs.scratch.sp;
+
        return err;
 }
 
@@ -284,7 +336,7 @@ static void arc_restart_syscall(struct k_sigaction *ka, struct pt_regs *regs)
                 * their orig user space value when we ret from kernel
                 */
                regs->r0 = regs->orig_r0;
-               regs->ret -= 4;
+               regs->ret -= is_isa_arcv2() ? 2 : 4;
                break;
        }
 }
@@ -325,10 +377,10 @@ void do_signal(struct pt_regs *regs)
                if (regs->r0 == -ERESTARTNOHAND ||
                    regs->r0 == -ERESTARTSYS || regs->r0 == -ERESTARTNOINTR) {
                        regs->r0 = regs->orig_r0;
-                       regs->ret -= 4;
+                       regs->ret -= is_isa_arcv2() ? 2 : 4;
                } else if (regs->r0 == -ERESTART_RESTARTBLOCK) {
                        regs->r8 = __NR_restart_syscall;
-                       regs->ret -= 4;
+                       regs->ret -= is_isa_arcv2() ? 2 : 4;
                }
                syscall_wont_restart(regs);     /* No more restarts */
        }
index 6a400b1b0b62e07b9e0d7b14d549e8a57652d681..be13d12420bad642c5141a58fdc82d5798204b59 100644 (file)
@@ -31,7 +31,7 @@ arch_spinlock_t smp_atomic_ops_lock = __ARCH_SPIN_LOCK_UNLOCKED;
 arch_spinlock_t smp_bitops_lock = __ARCH_SPIN_LOCK_UNLOCKED;
 #endif
 
-struct plat_smp_ops  plat_smp_ops;
+struct plat_smp_ops  __weak plat_smp_ops;
 
 /* XXX: per cpu ? Only needed once in early seconday boot */
 struct task_struct *secondary_idle_tsk;
@@ -182,7 +182,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
 /*
  * not supported here
  */
-int __init setup_profiling_timer(unsigned int multiplier)
+int setup_profiling_timer(unsigned int multiplier)
 {
        return -EINVAL;
 }
@@ -278,8 +278,10 @@ static void ipi_cpu_stop(void)
        machine_halt();
 }
 
-static inline void __do_IPI(unsigned long msg)
+static inline int __do_IPI(unsigned long msg)
 {
+       int rc = 0;
+
        switch (msg) {
        case IPI_RESCHEDULE:
                scheduler_ipi();
@@ -294,8 +296,10 @@ static inline void __do_IPI(unsigned long msg)
                break;
 
        default:
-               pr_warn("IPI with unexpected msg %ld\n", msg);
+               rc = 1;
        }
+
+       return rc;
 }
 
 /*
@@ -305,6 +309,7 @@ static inline void __do_IPI(unsigned long msg)
 irqreturn_t do_IPI(int irq, void *dev_id)
 {
        unsigned long pending;
+       unsigned long __maybe_unused copy;
 
        pr_debug("IPI [%ld] received on cpu %d\n",
                 *this_cpu_ptr(&ipi_data), smp_processor_id());
@@ -316,11 +321,18 @@ irqreturn_t do_IPI(int irq, void *dev_id)
         * "dequeue" the msg corresponding to this IPI (and possibly other
         * piggybacked msg from elided IPIs: see ipi_send_msg_one() above)
         */
-       pending = xchg(this_cpu_ptr(&ipi_data), 0);
+       copy = pending = xchg(this_cpu_ptr(&ipi_data), 0);
 
        do {
                unsigned long msg = __ffs(pending);
-               __do_IPI(msg);
+               int rc;
+
+               rc = __do_IPI(msg);
+#ifdef CONFIG_ARC_IPI_DBG
+               /* IPI received but no valid @msg */
+               if (rc)
+                       pr_info("IPI with bogus msg %ld in %ld\n", msg, copy);
+#endif
                pending &= ~(1U << msg);
        } while (pending);
 
index 92320d6f737cf5149d0968f5da3cd73a47af9848..001de4ce711eae2b095451a52f68e77db3d81ef4 100644 (file)
@@ -122,19 +122,17 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs,
        while (1) {
                address = UNW_PC(&frame_info);
 
-               if (address && __kernel_text_address(address)) {
-                       if (consumer_fn(address, arg) == -1)
-                               break;
-               }
+               if (!address || !__kernel_text_address(address))
+                       break;
 
-               ret = arc_unwind(&frame_info);
+               if (consumer_fn(address, arg) == -1)
+                       break;
 
-               if (ret == 0) {
-                       frame_info.regs.r63 = frame_info.regs.r31;
-                       continue;
-               } else {
+               ret = arc_unwind(&frame_info);
+               if (ret)
                        break;
-               }
+
+               frame_info.regs.r63 = frame_info.regs.r31;
        }
 
        return address;         /* return the last address it saw */
index dbe74f418019bf7e498ed95bcdc08149a752ff51..3364d2bbc515471bba6478b8b34a417251ffde56 100644 (file)
@@ -26,6 +26,7 @@
  * while TIMER1 for free running (clocksource)
  *
  * Newer ARC700 cores have 64bit clk fetching RTSC insn, preferred over TIMER1
+ * which however is currently broken
  */
 
 #include <linux/spinlock.h>
@@ -44,6 +45,8 @@
 #include <asm/clk.h>
 #include <asm/mach_desc.h>
 
+#include <asm/mcip.h>
+
 /* Timer related Aux registers */
 #define ARC_REG_TIMER0_LIMIT   0x23    /* timer 0 limit */
 #define ARC_REG_TIMER0_CTRL    0x22    /* timer 0 control */
 
 /********** Clock Source Device *********/
 
-#ifdef CONFIG_ARC_HAS_RTSC
+#ifdef CONFIG_ARC_HAS_GRTC
 
-int arc_counter_setup(void)
+static int arc_counter_setup(void)
 {
-       /*
-        * For SMP this needs to be 0. However Kconfig glue doesn't
-        * enable this option for SMP configs
-        */
        return 1;
 }
 
@@ -75,45 +74,84 @@ static cycle_t arc_counter_read(struct clocksource *cs)
        unsigned long flags;
        union {
 #ifdef CONFIG_CPU_BIG_ENDIAN
-               struct { u32 high, low; };
+               struct { u32 h, l; };
 #else
-               struct { u32 low, high; };
+               struct { u32 lh; };
 #endif
                cycle_t  full;
        } stamp;
 
-       flags = arch_local_irq_save();
+       local_irq_save(flags);
 
-       __asm__ __volatile(
-       "       .extCoreRegister tsch, 58,  r, cannot_shortcut  \n"
-       "       rtsc %0, 0      \n"
-       "       mov  %1, 0      \n"
-       : "=r" (stamp.low), "=r" (stamp.high));
+       __mcip_cmd(CMD_GRTC_READ_LO, 0);
+       stamp.l = read_aux_reg(ARC_REG_MCIP_READBACK);
+
+       __mcip_cmd(CMD_GRTC_READ_HI, 0);
+       stamp.h = read_aux_reg(ARC_REG_MCIP_READBACK);
 
-       arch_local_irq_restore(flags);
+       local_irq_restore(flags);
 
        return stamp.full;
 }
 
 static struct clocksource arc_counter = {
-       .name   = "ARC RTSC",
-       .rating = 300,
+       .name   = "ARConnect GRTC",
+       .rating = 400,
        .read   = arc_counter_read,
-       .mask   = CLOCKSOURCE_MASK(32),
+       .mask   = CLOCKSOURCE_MASK(64),
        .flags  = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
-#else /* !CONFIG_ARC_HAS_RTSC */
+#else
+
+#ifdef CONFIG_ARC_HAS_RTC
+
+#define AUX_RTC_CTRL   0x103
+#define AUX_RTC_LOW    0x104
+#define AUX_RTC_HIGH   0x105
 
-static bool is_usable_as_clocksource(void)
+int arc_counter_setup(void)
 {
-#ifdef CONFIG_SMP
-       return 0;
+       write_aux_reg(AUX_RTC_CTRL, 1);
+
+       /* Not usable in SMP */
+       return !IS_ENABLED(CONFIG_SMP);
+}
+
+static cycle_t arc_counter_read(struct clocksource *cs)
+{
+       unsigned long status;
+       union {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+               struct { u32 high, low; };
 #else
-       return 1;
+               struct { u32 low, high; };
 #endif
+               cycle_t  full;
+       } stamp;
+
+
+       __asm__ __volatile(
+       "1:                                             \n"
+       "       lr              %0, [AUX_RTC_LOW]       \n"
+       "       lr              %1, [AUX_RTC_HIGH]      \n"
+       "       lr              %2, [AUX_RTC_CTRL]      \n"
+       "       bbit0.nt        %2, 31, 1b              \n"
+       : "=r" (stamp.low), "=r" (stamp.high), "=r" (status));
+
+       return stamp.full;
 }
 
+static struct clocksource arc_counter = {
+       .name   = "ARCv2 RTC",
+       .rating = 350,
+       .read   = arc_counter_read,
+       .mask   = CLOCKSOURCE_MASK(64),
+       .flags  = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+#else /* !CONFIG_ARC_HAS_RTC */
+
 /*
  * set 32bit TIMER1 to keep counting monotonically and wraparound
  */
@@ -123,7 +161,8 @@ int arc_counter_setup(void)
        write_aux_reg(ARC_REG_TIMER1_CNT, 0);
        write_aux_reg(ARC_REG_TIMER1_CTRL, TIMER_CTRL_NH);
 
-       return is_usable_as_clocksource();
+       /* Not usable in SMP */
+       return !IS_ENABLED(CONFIG_SMP);
 }
 
 static cycle_t arc_counter_read(struct clocksource *cs)
@@ -139,6 +178,7 @@ static struct clocksource arc_counter = {
        .flags  = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
+#endif
 #endif
 
 /********** Clock Event Device *********/
index e00a01879025ea4d55be8d404b80cf2d2e53c22b..e0cf998932123fae4df27f8c98712f57551d8849 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/proc_fs.h>
 #include <linux/file.h>
 #include <asm/arcregs.h>
+#include <asm/irqflags.h>
 
 /*
  * Common routine to print scratch regs (r0-r12) or callee regs (r13-r25)
@@ -34,7 +35,10 @@ static noinline void print_reg_file(long *reg_rev, int start_num)
                        n += scnprintf(buf + n, len - n, "\n");
 
                /* because pt_regs has regs reversed: r12..r0, r25..r13 */
-               reg_rev--;
+               if (is_isa_arcv2() && start_num == 0)
+                       reg_rev++;
+               else
+                       reg_rev--;
        }
 
        if (start_num != 0)
@@ -152,6 +156,15 @@ static void show_ecr_verbose(struct pt_regs *regs)
                                ((cause_code == 0x02) ? "Write" : "EX"));
        } else if (vec == ECR_V_INSN_ERR) {
                pr_cont("Illegal Insn\n");
+#ifdef CONFIG_ISA_ARCV2
+       } else if (vec == ECR_V_MEM_ERR) {
+               if (cause_code == 0x00)
+                       pr_cont("Bus Error from Insn Mem\n");
+               else if (cause_code == 0x10)
+                       pr_cont("Bus Error from Data Mem\n");
+               else
+                       pr_cont("Bus Error, check PRM\n");
+#endif
        } else {
                pr_cont("Check Programmer's Manual\n");
        }
@@ -185,12 +198,20 @@ void show_regs(struct pt_regs *regs)
 
        pr_info("[STAT32]: 0x%08lx", regs->status32);
 
-#define STS_BIT(r, bit)        r->status32 & STATUS_##bit##_MASK ? #bit : ""
-       if (!user_mode(regs))
-               pr_cont(" : %2s %2s %2s %2s %2s\n",
-                       STS_BIT(regs, AE), STS_BIT(regs, A2), STS_BIT(regs, A1),
-                       STS_BIT(regs, E2), STS_BIT(regs, E1));
+#define STS_BIT(r, bit)        r->status32 & STATUS_##bit##_MASK ? #bit" " : ""
 
+#ifdef CONFIG_ISA_ARCOMPACT
+       pr_cont(" : %2s%2s%2s%2s%2s%2s%2s\n",
+                       (regs->status32 & STATUS_U_MASK) ? "U " : "K ",
+                       STS_BIT(regs, DE), STS_BIT(regs, AE),
+                       STS_BIT(regs, A2), STS_BIT(regs, A1),
+                       STS_BIT(regs, E2), STS_BIT(regs, E1));
+#else
+       pr_cont(" : %2s%2s%2s%2s\n",
+                       STS_BIT(regs, IE),
+                       (regs->status32 & STATUS_U_MASK) ? "U " : "K ",
+                       STS_BIT(regs, DE), STS_BIT(regs, AE));
+#endif
        pr_info("BTA: 0x%08lx\t SP: 0x%08lx\t FP: 0x%08lx\n",
                regs->bta, regs->sp, regs->fp);
        pr_info("LPS: 0x%08lx\tLPE: 0x%08lx\tLPC: 0x%08lx\n",
index db46e200baba298cb7d1af1c59f0e26128755395..b1656d15609750910512c9e00799c8d736f665b2 100644 (file)
@@ -5,5 +5,7 @@
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.
 
-lib-y  := strchr-700.o strcmp.o strcpy-700.o strlen.o
-lib-y  += memcmp.o memcpy-700.o memset.o
+lib-y  := strchr-700.o strcpy-700.o strlen.o memcmp.o
+
+lib-$(CONFIG_ISA_ARCOMPACT)    += memcpy-700.o memset.o strcmp.o
+lib-$(CONFIG_ISA_ARCV2)                += memcpy-archs.o memset-archs.o strcmp-archs.o
index 978bf8314dfb47397b9732235428a5f74fce0447..a4015e7d9ab7aa6862b41ab6138b7d64c715693e 100644 (file)
@@ -24,14 +24,32 @@ ENTRY(memcmp)
        ld      r4,[r0,0]
        ld      r5,[r1,0]
        lsr.f   lp_count,r3,3
+#ifdef CONFIG_ISA_ARCV2
+       /* In ARCv2 a branch can't be the last instruction in a zero overhead
+        * loop.
+        * So we move the branch to the start of the loop, duplicate it
+        * after the end, and set up r12 so that the branch isn't taken
+        *  initially.
+        */
+       mov_s   r12,WORD2
+       lpne    .Loop_end
+       brne    WORD2,r12,.Lodd
+       ld      WORD2,[r0,4]
+#else
        lpne    .Loop_end
        ld_s    WORD2,[r0,4]
+#endif
        ld_s    r12,[r1,4]
        brne    r4,r5,.Leven
        ld.a    r4,[r0,8]
        ld.a    r5,[r1,8]
+#ifdef CONFIG_ISA_ARCV2
+.Loop_end:
+       brne    WORD2,r12,.Lodd
+#else
        brne    WORD2,r12,.Lodd
 .Loop_end:
+#endif
        asl_s   SHIFT,SHIFT,3
        bhs_s   .Last_cmp
        brne    r4,r5,.Leven
@@ -89,7 +107,6 @@ ENTRY(memcmp)
        bset.cs r0,r0,31
 .Lodd:
        cmp_s   WORD2,r12
-
        mov_s   r0,1
        j_s.d   [blink]
        bset.cs r0,r0,31
@@ -100,14 +117,25 @@ ENTRY(memcmp)
        ldb     r4,[r0,0]
        ldb     r5,[r1,0]
        lsr.f   lp_count,r3
+#ifdef CONFIG_ISA_ARCV2
+       mov     r12,r3
        lpne    .Lbyte_end
+       brne    r3,r12,.Lbyte_odd
+#else
+       lpne    .Lbyte_end
+#endif
        ldb_s   r3,[r0,1]
        ldb     r12,[r1,1]
        brne    r4,r5,.Lbyte_even
        ldb.a   r4,[r0,2]
        ldb.a   r5,[r1,2]
+#ifdef CONFIG_ISA_ARCV2
+.Lbyte_end:
+       brne    r3,r12,.Lbyte_odd
+#else
        brne    r3,r12,.Lbyte_odd
 .Lbyte_end:
+#endif
        bcc     .Lbyte_even
        brne    r4,r5,.Lbyte_even
        ldb_s   r3,[r0,1]
diff --git a/arch/arc/lib/memcpy-archs.S b/arch/arc/lib/memcpy-archs.S
new file mode 100644 (file)
index 0000000..1b2b3ac
--- /dev/null
@@ -0,0 +1,236 @@
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+
+#ifdef __LITTLE_ENDIAN__
+# define SHIFT_1(RX,RY,IMM)    asl     RX, RY, IMM     ; <<
+# define SHIFT_2(RX,RY,IMM)    lsr     RX, RY, IMM     ; >>
+# define MERGE_1(RX,RY,IMM)    asl     RX, RY, IMM
+# define MERGE_2(RX,RY,IMM)
+# define EXTRACT_1(RX,RY,IMM)  and     RX, RY, 0xFFFF
+# define EXTRACT_2(RX,RY,IMM)  lsr     RX, RY, IMM
+#else
+# define SHIFT_1(RX,RY,IMM)    lsr     RX, RY, IMM     ; >>
+# define SHIFT_2(RX,RY,IMM)    asl     RX, RY, IMM     ; <<
+# define MERGE_1(RX,RY,IMM)    asl     RX, RY, IMM     ; <<
+# define MERGE_2(RX,RY,IMM)    asl     RX, RY, IMM     ; <<
+# define EXTRACT_1(RX,RY,IMM)  lsr     RX, RY, IMM
+# define EXTRACT_2(RX,RY,IMM)  lsr     RX, RY, 0x08
+#endif
+
+#ifdef CONFIG_ARC_HAS_LL64
+# define PREFETCH_READ(RX)     prefetch    [RX, 56]
+# define PREFETCH_WRITE(RX)    prefetchw   [RX, 64]
+# define LOADX(DST,RX)         ldd.ab  DST, [RX, 8]
+# define STOREX(SRC,RX)                std.ab  SRC, [RX, 8]
+# define ZOLSHFT               5
+# define ZOLAND                        0x1F
+#else
+# define PREFETCH_READ(RX)     prefetch    [RX, 28]
+# define PREFETCH_WRITE(RX)    prefetchw   [RX, 32]
+# define LOADX(DST,RX)         ld.ab   DST, [RX, 4]
+# define STOREX(SRC,RX)                st.ab   SRC, [RX, 4]
+# define ZOLSHFT               4
+# define ZOLAND                        0xF
+#endif
+
+ENTRY(memcpy)
+       prefetch [r1]           ; Prefetch the read location
+       prefetchw [r0]          ; Prefetch the write location
+       mov.f   0, r2
+;;; if size is zero
+       jz.d    [blink]
+       mov     r3, r0          ; don;t clobber ret val
+
+;;; if size <= 8
+       cmp     r2, 8
+       bls.d   @smallchunk
+       mov.f   lp_count, r2
+
+       and.f   r4, r0, 0x03
+       rsub    lp_count, r4, 4
+       lpnz    @aligndestination
+       ;; LOOP BEGIN
+       ldb.ab  r5, [r1,1]
+       sub     r2, r2, 1
+       stb.ab  r5, [r3,1]
+aligndestination:
+
+;;; Check the alignment of the source
+       and.f   r4, r1, 0x03
+       bnz.d   @sourceunaligned
+
+;;; CASE 0: Both source and destination are 32bit aligned
+;;; Convert len to Dwords, unfold x4
+       lsr.f   lp_count, r2, ZOLSHFT
+       lpnz    @copy32_64bytes
+       ;; LOOP START
+       LOADX (r6, r1)
+       PREFETCH_READ (r1)
+       PREFETCH_WRITE (r3)
+       LOADX (r8, r1)
+       LOADX (r10, r1)
+       LOADX (r4, r1)
+       STOREX (r6, r3)
+       STOREX (r8, r3)
+       STOREX (r10, r3)
+       STOREX (r4, r3)
+copy32_64bytes:
+
+       and.f   lp_count, r2, ZOLAND ;Last remaining 31 bytes
+smallchunk:
+       lpnz    @copyremainingbytes
+       ;; LOOP START
+       ldb.ab  r5, [r1,1]
+       stb.ab  r5, [r3,1]
+copyremainingbytes:
+
+       j       [blink]
+;;; END CASE 0
+
+sourceunaligned:
+       cmp     r4, 2
+       beq.d   @unalignedOffby2
+       sub     r2, r2, 1
+
+       bhi.d   @unalignedOffby3
+       ldb.ab  r5, [r1, 1]
+
+;;; CASE 1: The source is unaligned, off by 1
+       ;; Hence I need to read 1 byte for a 16bit alignment
+       ;; and 2bytes to reach 32bit alignment
+       ldh.ab  r6, [r1, 2]
+       sub     r2, r2, 2
+       ;; Convert to words, unfold x2
+       lsr.f   lp_count, r2, 3
+       MERGE_1 (r6, r6, 8)
+       MERGE_2 (r5, r5, 24)
+       or      r5, r5, r6
+
+       ;; Both src and dst are aligned
+       lpnz    @copy8bytes_1
+       ;; LOOP START
+       ld.ab   r6, [r1, 4]
+       prefetch [r1, 28]       ;Prefetch the next read location
+       ld.ab   r8, [r1,4]
+       prefetchw [r3, 32]      ;Prefetch the next write location
+
+       SHIFT_1 (r7, r6, 24)
+       or      r7, r7, r5
+       SHIFT_2 (r5, r6, 8)
+
+       SHIFT_1 (r9, r8, 24)
+       or      r9, r9, r5
+       SHIFT_2 (r5, r8, 8)
+
+       st.ab   r7, [r3, 4]
+       st.ab   r9, [r3, 4]
+copy8bytes_1:
+
+       ;; Write back the remaining 16bits
+       EXTRACT_1 (r6, r5, 16)
+       sth.ab  r6, [r3, 2]
+       ;; Write back the remaining 8bits
+       EXTRACT_2 (r5, r5, 16)
+       stb.ab  r5, [r3, 1]
+
+       and.f   lp_count, r2, 0x07 ;Last 8bytes
+       lpnz    @copybytewise_1
+       ;; LOOP START
+       ldb.ab  r6, [r1,1]
+       stb.ab  r6, [r3,1]
+copybytewise_1:
+       j       [blink]
+
+unalignedOffby2:
+;;; CASE 2: The source is unaligned, off by 2
+       ldh.ab  r5, [r1, 2]
+       sub     r2, r2, 1
+
+       ;; Both src and dst are aligned
+       ;; Convert to words, unfold x2
+       lsr.f   lp_count, r2, 3
+#ifdef __BIG_ENDIAN__
+       asl.nz  r5, r5, 16
+#endif
+       lpnz    @copy8bytes_2
+       ;; LOOP START
+       ld.ab   r6, [r1, 4]
+       prefetch [r1, 28]       ;Prefetch the next read location
+       ld.ab   r8, [r1,4]
+       prefetchw [r3, 32]      ;Prefetch the next write location
+
+       SHIFT_1 (r7, r6, 16)
+       or      r7, r7, r5
+       SHIFT_2 (r5, r6, 16)
+
+       SHIFT_1 (r9, r8, 16)
+       or      r9, r9, r5
+       SHIFT_2 (r5, r8, 16)
+
+       st.ab   r7, [r3, 4]
+       st.ab   r9, [r3, 4]
+copy8bytes_2:
+
+#ifdef __BIG_ENDIAN__
+       lsr.nz  r5, r5, 16
+#endif
+       sth.ab  r5, [r3, 2]
+
+       and.f   lp_count, r2, 0x07 ;Last 8bytes
+       lpnz    @copybytewise_2
+       ;; LOOP START
+       ldb.ab  r6, [r1,1]
+       stb.ab  r6, [r3,1]
+copybytewise_2:
+       j       [blink]
+
+unalignedOffby3:
+;;; CASE 3: The source is unaligned, off by 3
+;;; Hence, I need to read 1byte for achieve the 32bit alignment
+
+       ;; Both src and dst are aligned
+       ;; Convert to words, unfold x2
+       lsr.f   lp_count, r2, 3
+#ifdef __BIG_ENDIAN__
+       asl.ne  r5, r5, 24
+#endif
+       lpnz    @copy8bytes_3
+       ;; LOOP START
+       ld.ab   r6, [r1, 4]
+       prefetch [r1, 28]       ;Prefetch the next read location
+       ld.ab   r8, [r1,4]
+       prefetch [r3, 32]       ;Prefetch the next write location
+
+       SHIFT_1 (r7, r6, 8)
+       or      r7, r7, r5
+       SHIFT_2 (r5, r6, 24)
+
+       SHIFT_1 (r9, r8, 8)
+       or      r9, r9, r5
+       SHIFT_2 (r5, r8, 24)
+
+       st.ab   r7, [r3, 4]
+       st.ab   r9, [r3, 4]
+copy8bytes_3:
+
+#ifdef __BIG_ENDIAN__
+       lsr.nz  r5, r5, 24
+#endif
+       stb.ab  r5, [r3, 1]
+
+       and.f   lp_count, r2, 0x07 ;Last 8bytes
+       lpnz    @copybytewise_3
+       ;; LOOP START
+       ldb.ab  r6, [r1,1]
+       stb.ab  r6, [r3,1]
+copybytewise_3:
+       j       [blink]
+
+END(memcpy)
diff --git a/arch/arc/lib/memset-archs.S b/arch/arc/lib/memset-archs.S
new file mode 100644 (file)
index 0000000..92d573c
--- /dev/null
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+
+#undef PREALLOC_NOT_AVAIL
+
+#ifdef PREALLOC_NOT_AVAIL
+#define PREWRITE(A,B)  prefetchw [(A),(B)]
+#else
+#define PREWRITE(A,B)  prealloc [(A),(B)]
+#endif
+
+ENTRY(memset)
+       prefetchw [r0]          ; Prefetch the write location
+       mov.f   0, r2
+;;; if size is zero
+       jz.d    [blink]
+       mov     r3, r0          ; don't clobber ret val
+
+;;; if length < 8
+       brls.d.nt       r2, 8, .Lsmallchunk
+       mov.f   lp_count,r2
+
+       and.f   r4, r0, 0x03
+       rsub    lp_count, r4, 4
+       lpnz    @.Laligndestination
+       ;; LOOP BEGIN
+       stb.ab  r1, [r3,1]
+       sub     r2, r2, 1
+.Laligndestination:
+
+;;; Destination is aligned
+       and     r1, r1, 0xFF
+       asl     r4, r1, 8
+       or      r4, r4, r1
+       asl     r5, r4, 16
+       or      r5, r5, r4
+       mov     r4, r5
+
+       sub3    lp_count, r2, 8
+       cmp     r2, 64
+       bmsk.hi r2, r2, 5
+       mov.ls  lp_count, 0
+       add3.hi r2, r2, 8
+
+;;; Convert len to Dwords, unfold x8
+       lsr.f   lp_count, lp_count, 6
+       lpnz    @.Lset64bytes
+       ;; LOOP START
+       PREWRITE(r3, 64)        ;Prefetch the next write location
+       std.ab  r4, [r3, 8]
+       std.ab  r4, [r3, 8]
+       std.ab  r4, [r3, 8]
+       std.ab  r4, [r3, 8]
+       std.ab  r4, [r3, 8]
+       std.ab  r4, [r3, 8]
+       std.ab  r4, [r3, 8]
+       std.ab  r4, [r3, 8]
+.Lset64bytes:
+
+       lsr.f   lp_count, r2, 5 ;Last remaining  max 124 bytes
+       lpnz    .Lset32bytes
+       ;; LOOP START
+       prefetchw   [r3, 32]    ;Prefetch the next write location
+       std.ab  r4, [r3, 8]
+       std.ab  r4, [r3, 8]
+       std.ab  r4, [r3, 8]
+       std.ab  r4, [r3, 8]
+.Lset32bytes:
+
+       and.f   lp_count, r2, 0x1F ;Last remaining 31 bytes
+.Lsmallchunk:
+       lpnz    .Lcopy3bytes
+       ;; LOOP START
+       stb.ab  r1, [r3, 1]
+.Lcopy3bytes:
+
+       j       [blink]
+
+END(memset)
+
+ENTRY(memzero)
+    ; adjust bzero args to memset args
+    mov r2, r1
+    b.d  memset    ;tail call so need to tinker with blink
+    mov r1, 0
+END(memzero)
diff --git a/arch/arc/lib/strcmp-archs.S b/arch/arc/lib/strcmp-archs.S
new file mode 100644 (file)
index 0000000..4f338ee
--- /dev/null
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+
+ENTRY(strcmp)
+       or      r2, r0, r1
+       bmsk_s  r2, r2, 1
+       brne    r2, 0, @.Lcharloop
+
+;;; s1 and s2 are word aligned
+       ld.ab   r2, [r0, 4]
+
+       mov_s   r12, 0x01010101
+       ror     r11, r12
+       .align  4
+.LwordLoop:
+       ld.ab   r3, [r1, 4]
+       ;; Detect NULL char in str1
+       sub     r4, r2, r12
+       ld.ab   r5, [r0, 4]
+       bic     r4, r4, r2
+       and     r4, r4, r11
+       brne.d.nt       r4, 0, .LfoundNULL
+       ;; Check if the read locations are the same
+       cmp     r2, r3
+       beq.d   .LwordLoop
+       mov.eq  r2, r5
+
+       ;; A match is found, spot it out
+#ifdef __LITTLE_ENDIAN__
+       swape   r3, r3
+       mov_s   r0, 1
+       swape   r2, r2
+#else
+       mov_s   r0, 1
+#endif
+       cmp_s   r2, r3
+       j_s.d   [blink]
+       bset.lo r0, r0, 31
+
+       .align 4
+.LfoundNULL:
+#ifdef __BIG_ENDIAN__
+       swape   r4, r4
+       swape   r2, r2
+       swape   r3, r3
+#endif
+       ;; Find null byte
+       ffs     r0, r4
+       bmsk    r2, r2, r0
+       bmsk    r3, r3, r0
+       swape   r2, r2
+       swape   r3, r3
+       ;; make the return value
+       sub.f   r0, r2, r3
+       mov.hi  r0, 1
+       j_s.d   [blink]
+       bset.lo r0, r0, 31
+
+       .align 4
+.Lcharloop:
+       ldb.ab  r2, [r0, 1]
+       ldb.ab  r3, [r1, 1]
+       nop
+       breq    r2, 0, .Lcmpend
+       breq    r2, r3, .Lcharloop
+
+       .align 4
+.Lcmpend:
+       j_s.d   [blink]
+       sub     r0, r2, r3
+END(strcmp)
index ac95cc239c1e47d3d2f5a133b1d66936c6af0b91..7beb941556c3f73567b8174b6dc1cd15c2ef2d49 100644 (file)
@@ -7,4 +7,4 @@
 #
 
 obj-y  := extable.o ioremap.o dma.o fault.o init.o
-obj-y  += tlb.o tlbex.o cache_arc700.o mmap.o
+obj-y  += tlb.o tlbex.o cache.o mmap.o
diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
new file mode 100644 (file)
index 0000000..b29d62e
--- /dev/null
@@ -0,0 +1,843 @@
+/*
+ * ARC Cache Management
+ *
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/cache.h>
+#include <linux/mmu_context.h>
+#include <linux/syscalls.h>
+#include <linux/uaccess.h>
+#include <linux/pagemap.h>
+#include <asm/cacheflush.h>
+#include <asm/cachectl.h>
+#include <asm/setup.h>
+
+static int l2_line_sz;
+
+void (*_cache_line_loop_ic_fn)(unsigned long paddr, unsigned long vaddr,
+                              unsigned long sz, const int cacheop);
+
+char *arc_cache_mumbojumbo(int c, char *buf, int len)
+{
+       int n = 0;
+       struct cpuinfo_arc_cache *p;
+
+#define PR_CACHE(p, cfg, str)                                          \
+       if (!(p)->ver)                                                  \
+               n += scnprintf(buf + n, len - n, str"\t\t: N/A\n");     \
+       else                                                            \
+               n += scnprintf(buf + n, len - n,                        \
+                       str"\t\t: %uK, %dway/set, %uB Line, %s%s%s\n",  \
+                       (p)->sz_k, (p)->assoc, (p)->line_len,           \
+                       (p)->vipt ? "VIPT" : "PIPT",                    \
+                       (p)->alias ? " aliasing" : "",                  \
+                       IS_ENABLED(cfg) ? "" : " (not used)");
+
+       PR_CACHE(&cpuinfo_arc700[c].icache, CONFIG_ARC_HAS_ICACHE, "I-Cache");
+       PR_CACHE(&cpuinfo_arc700[c].dcache, CONFIG_ARC_HAS_DCACHE, "D-Cache");
+
+       p = &cpuinfo_arc700[c].slc;
+       if (p->ver)
+               n += scnprintf(buf + n, len - n,
+                       "SLC\t\t: %uK, %uB Line\n", p->sz_k, p->line_len);
+
+       return buf;
+}
+
+/*
+ * Read the Cache Build Confuration Registers, Decode them and save into
+ * the cpuinfo structure for later use.
+ * No Validation done here, simply read/convert the BCRs
+ */
+void read_decode_cache_bcr(void)
+{
+       struct cpuinfo_arc_cache *p_ic, *p_dc, *p_slc;
+       unsigned int cpu = smp_processor_id();
+       struct bcr_cache {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+               unsigned int pad:12, line_len:4, sz:4, config:4, ver:8;
+#else
+               unsigned int ver:8, config:4, sz:4, line_len:4, pad:12;
+#endif
+       } ibcr, dbcr;
+
+       struct bcr_generic sbcr;
+
+       struct bcr_slc_cfg {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+               unsigned int pad:24, way:2, lsz:2, sz:4;
+#else
+               unsigned int sz:4, lsz:2, way:2, pad:24;
+#endif
+       } slc_cfg;
+
+       p_ic = &cpuinfo_arc700[cpu].icache;
+       READ_BCR(ARC_REG_IC_BCR, ibcr);
+
+       if (!ibcr.ver)
+               goto dc_chk;
+
+       if (ibcr.ver <= 3) {
+               BUG_ON(ibcr.config != 3);
+               p_ic->assoc = 2;                /* Fixed to 2w set assoc */
+       } else if (ibcr.ver >= 4) {
+               p_ic->assoc = 1 << ibcr.config; /* 1,2,4,8 */
+       }
+
+       p_ic->line_len = 8 << ibcr.line_len;
+       p_ic->sz_k = 1 << (ibcr.sz - 1);
+       p_ic->ver = ibcr.ver;
+       p_ic->vipt = 1;
+       p_ic->alias = p_ic->sz_k/p_ic->assoc/TO_KB(PAGE_SIZE) > 1;
+
+dc_chk:
+       p_dc = &cpuinfo_arc700[cpu].dcache;
+       READ_BCR(ARC_REG_DC_BCR, dbcr);
+
+       if (!dbcr.ver)
+               goto slc_chk;
+
+       if (dbcr.ver <= 3) {
+               BUG_ON(dbcr.config != 2);
+               p_dc->assoc = 4;                /* Fixed to 4w set assoc */
+               p_dc->vipt = 1;
+               p_dc->alias = p_dc->sz_k/p_dc->assoc/TO_KB(PAGE_SIZE) > 1;
+       } else if (dbcr.ver >= 4) {
+               p_dc->assoc = 1 << dbcr.config; /* 1,2,4,8 */
+               p_dc->vipt = 0;
+               p_dc->alias = 0;                /* PIPT so can't VIPT alias */
+       }
+
+       p_dc->line_len = 16 << dbcr.line_len;
+       p_dc->sz_k = 1 << (dbcr.sz - 1);
+       p_dc->ver = dbcr.ver;
+
+slc_chk:
+       if (!is_isa_arcv2())
+               return;
+
+       p_slc = &cpuinfo_arc700[cpu].slc;
+       READ_BCR(ARC_REG_SLC_BCR, sbcr);
+       if (sbcr.ver) {
+               READ_BCR(ARC_REG_SLC_CFG, slc_cfg);
+               p_slc->ver = sbcr.ver;
+               p_slc->sz_k = 128 << slc_cfg.sz;
+               l2_line_sz = p_slc->line_len = (slc_cfg.lsz == 0) ? 128 : 64;
+       }
+}
+
+/*
+ * Line Operation on {I,D}-Cache
+ */
+
+#define OP_INV         0x1
+#define OP_FLUSH       0x2
+#define OP_FLUSH_N_INV 0x3
+#define OP_INV_IC      0x4
+
+/*
+ *             I-Cache Aliasing in ARC700 VIPT caches (MMU v1-v3)
+ *
+ * ARC VIPT I-cache uses vaddr to index into cache and paddr to match the tag.
+ * The orig Cache Management Module "CDU" only required paddr to invalidate a
+ * certain line since it sufficed as index in Non-Aliasing VIPT cache-geometry.
+ * Infact for distinct V1,V2,P: all of {V1-P},{V2-P},{P-P} would end up fetching
+ * the exact same line.
+ *
+ * However for larger Caches (way-size > page-size) - i.e. in Aliasing config,
+ * paddr alone could not be used to correctly index the cache.
+ *
+ * ------------------
+ * MMU v1/v2 (Fixed Page Size 8k)
+ * ------------------
+ * The solution was to provide CDU with these additonal vaddr bits. These
+ * would be bits [x:13], x would depend on cache-geometry, 13 comes from
+ * standard page size of 8k.
+ * H/w folks chose [17:13] to be a future safe range, and moreso these 5 bits
+ * of vaddr could easily be "stuffed" in the paddr as bits [4:0] since the
+ * orig 5 bits of paddr were anyways ignored by CDU line ops, as they
+ * represent the offset within cache-line. The adv of using this "clumsy"
+ * interface for additional info was no new reg was needed in CDU programming
+ * model.
+ *
+ * 17:13 represented the max num of bits passable, actual bits needed were
+ * fewer, based on the num-of-aliases possible.
+ * -for 2 alias possibility, only bit 13 needed (32K cache)
+ * -for 4 alias possibility, bits 14:13 needed (64K cache)
+ *
+ * ------------------
+ * MMU v3
+ * ------------------
+ * This ver of MMU supports variable page sizes (1k-16k): although Linux will
+ * only support 8k (default), 16k and 4k.
+ * However from hardware perspective, smaller page sizes aggrevate aliasing
+ * meaning more vaddr bits needed to disambiguate the cache-line-op ;
+ * the existing scheme of piggybacking won't work for certain configurations.
+ * Two new registers IC_PTAG and DC_PTAG inttoduced.
+ * "tag" bits are provided in PTAG, index bits in existing IVIL/IVDL/FLDL regs
+ */
+
+static inline
+void __cache_line_loop_v2(unsigned long paddr, unsigned long vaddr,
+                         unsigned long sz, const int op)
+{
+       unsigned int aux_cmd;
+       int num_lines;
+       const int full_page = __builtin_constant_p(sz) && sz == PAGE_SIZE;
+
+       if (op == OP_INV_IC) {
+               aux_cmd = ARC_REG_IC_IVIL;
+       } else {
+               /* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */
+               aux_cmd = op & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL;
+       }
+
+       /* Ensure we properly floor/ceil the non-line aligned/sized requests
+        * and have @paddr - aligned to cache line and integral @num_lines.
+        * This however can be avoided for page sized since:
+        *  -@paddr will be cache-line aligned already (being page aligned)
+        *  -@sz will be integral multiple of line size (being page sized).
+        */
+       if (!full_page) {
+               sz += paddr & ~CACHE_LINE_MASK;
+               paddr &= CACHE_LINE_MASK;
+               vaddr &= CACHE_LINE_MASK;
+       }
+
+       num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
+
+       /* MMUv2 and before: paddr contains stuffed vaddrs bits */
+       paddr |= (vaddr >> PAGE_SHIFT) & 0x1F;
+
+       while (num_lines-- > 0) {
+               write_aux_reg(aux_cmd, paddr);
+               paddr += L1_CACHE_BYTES;
+       }
+}
+
+static inline
+void __cache_line_loop_v3(unsigned long paddr, unsigned long vaddr,
+                         unsigned long sz, const int op)
+{
+       unsigned int aux_cmd, aux_tag;
+       int num_lines;
+       const int full_page = __builtin_constant_p(sz) && sz == PAGE_SIZE;
+
+       if (op == OP_INV_IC) {
+               aux_cmd = ARC_REG_IC_IVIL;
+               aux_tag = ARC_REG_IC_PTAG;
+       } else {
+               aux_cmd = op & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL;
+               aux_tag = ARC_REG_DC_PTAG;
+       }
+
+       /* Ensure we properly floor/ceil the non-line aligned/sized requests
+        * and have @paddr - aligned to cache line and integral @num_lines.
+        * This however can be avoided for page sized since:
+        *  -@paddr will be cache-line aligned already (being page aligned)
+        *  -@sz will be integral multiple of line size (being page sized).
+        */
+       if (!full_page) {
+               sz += paddr & ~CACHE_LINE_MASK;
+               paddr &= CACHE_LINE_MASK;
+               vaddr &= CACHE_LINE_MASK;
+       }
+       num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
+
+       /*
+        * MMUv3, cache ops require paddr in PTAG reg
+        * if V-P const for loop, PTAG can be written once outside loop
+        */
+       if (full_page)
+               write_aux_reg(aux_tag, paddr);
+
+       while (num_lines-- > 0) {
+               if (!full_page) {
+                       write_aux_reg(aux_tag, paddr);
+                       paddr += L1_CACHE_BYTES;
+               }
+
+               write_aux_reg(aux_cmd, vaddr);
+               vaddr += L1_CACHE_BYTES;
+       }
+}
+
+/*
+ * In HS38x (MMU v4), although icache is VIPT, only paddr is needed for cache
+ * maintenance ops (in IVIL reg), as long as icache doesn't alias.
+ *
+ * For Aliasing icache, vaddr is also needed (in IVIL), while paddr is
+ * specified in PTAG (similar to MMU v3)
+ */
+static inline
+void __cache_line_loop_v4(unsigned long paddr, unsigned long vaddr,
+                         unsigned long sz, const int cacheop)
+{
+       unsigned int aux_cmd;
+       int num_lines;
+       const int full_page_op = __builtin_constant_p(sz) && sz == PAGE_SIZE;
+
+       if (cacheop == OP_INV_IC) {
+               aux_cmd = ARC_REG_IC_IVIL;
+       } else {
+               /* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */
+               aux_cmd = cacheop & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL;
+       }
+
+       /* Ensure we properly floor/ceil the non-line aligned/sized requests
+        * and have @paddr - aligned to cache line and integral @num_lines.
+        * This however can be avoided for page sized since:
+        *  -@paddr will be cache-line aligned already (being page aligned)
+        *  -@sz will be integral multiple of line size (being page sized).
+        */
+       if (!full_page_op) {
+               sz += paddr & ~CACHE_LINE_MASK;
+               paddr &= CACHE_LINE_MASK;
+       }
+
+       num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
+
+       while (num_lines-- > 0) {
+               write_aux_reg(aux_cmd, paddr);
+               paddr += L1_CACHE_BYTES;
+       }
+}
+
+#if (CONFIG_ARC_MMU_VER < 3)
+#define __cache_line_loop      __cache_line_loop_v2
+#elif (CONFIG_ARC_MMU_VER == 3)
+#define __cache_line_loop      __cache_line_loop_v3
+#elif (CONFIG_ARC_MMU_VER > 3)
+#define __cache_line_loop      __cache_line_loop_v4
+#endif
+
+#ifdef CONFIG_ARC_HAS_DCACHE
+
+/***************************************************************
+ * Machine specific helpers for Entire D-Cache or Per Line ops
+ */
+
+static inline void __before_dc_op(const int op)
+{
+       if (op == OP_FLUSH_N_INV) {
+               /* Dcache provides 2 cmd: FLUSH or INV
+                * INV inturn has sub-modes: DISCARD or FLUSH-BEFORE
+                * flush-n-inv is achieved by INV cmd but with IM=1
+                * So toggle INV sub-mode depending on op request and default
+                */
+               const unsigned int ctl = ARC_REG_DC_CTRL;
+               write_aux_reg(ctl, read_aux_reg(ctl) | DC_CTRL_INV_MODE_FLUSH);
+       }
+}
+
+static inline void __after_dc_op(const int op)
+{
+       if (op & OP_FLUSH) {
+               const unsigned int ctl = ARC_REG_DC_CTRL;
+               unsigned int reg;
+
+               /* flush / flush-n-inv both wait */
+               while ((reg = read_aux_reg(ctl)) & DC_CTRL_FLUSH_STATUS)
+                       ;
+
+               /* Switch back to default Invalidate mode */
+               if (op == OP_FLUSH_N_INV)
+                       write_aux_reg(ctl, reg & ~DC_CTRL_INV_MODE_FLUSH);
+       }
+}
+
+/*
+ * Operation on Entire D-Cache
+ * @op = {OP_INV, OP_FLUSH, OP_FLUSH_N_INV}
+ * Note that constant propagation ensures all the checks are gone
+ * in generated code
+ */
+static inline void __dc_entire_op(const int op)
+{
+       int aux;
+
+       __before_dc_op(op);
+
+       if (op & OP_INV)        /* Inv or flush-n-inv use same cmd reg */
+               aux = ARC_REG_DC_IVDC;
+       else
+               aux = ARC_REG_DC_FLSH;
+
+       write_aux_reg(aux, 0x1);
+
+       __after_dc_op(op);
+}
+
+/* For kernel mappings cache operation: index is same as paddr */
+#define __dc_line_op_k(p, sz, op)      __dc_line_op(p, p, sz, op)
+
+/*
+ * D-Cache Line ops: Per Line INV (discard or wback+discard) or FLUSH (wback)
+ */
+static inline void __dc_line_op(unsigned long paddr, unsigned long vaddr,
+                               unsigned long sz, const int op)
+{
+       unsigned long flags;
+
+       local_irq_save(flags);
+
+       __before_dc_op(op);
+
+       __cache_line_loop(paddr, vaddr, sz, op);
+
+       __after_dc_op(op);
+
+       local_irq_restore(flags);
+}
+
+#else
+
+#define __dc_entire_op(op)
+#define __dc_line_op(paddr, vaddr, sz, op)
+#define __dc_line_op_k(paddr, sz, op)
+
+#endif /* CONFIG_ARC_HAS_DCACHE */
+
+#ifdef CONFIG_ARC_HAS_ICACHE
+
+static inline void __ic_entire_inv(void)
+{
+       write_aux_reg(ARC_REG_IC_IVIC, 1);
+       read_aux_reg(ARC_REG_IC_CTRL);  /* blocks */
+}
+
+static inline void
+__ic_line_inv_vaddr_local(unsigned long paddr, unsigned long vaddr,
+                         unsigned long sz)
+{
+       unsigned long flags;
+
+       local_irq_save(flags);
+       (*_cache_line_loop_ic_fn)(paddr, vaddr, sz, OP_INV_IC);
+       local_irq_restore(flags);
+}
+
+#ifndef CONFIG_SMP
+
+#define __ic_line_inv_vaddr(p, v, s)   __ic_line_inv_vaddr_local(p, v, s)
+
+#else
+
+struct ic_inv_args {
+       unsigned long paddr, vaddr;
+       int sz;
+};
+
+static void __ic_line_inv_vaddr_helper(void *info)
+{
+        struct ic_inv_args *ic_inv = info;
+
+        __ic_line_inv_vaddr_local(ic_inv->paddr, ic_inv->vaddr, ic_inv->sz);
+}
+
+static void __ic_line_inv_vaddr(unsigned long paddr, unsigned long vaddr,
+                               unsigned long sz)
+{
+       struct ic_inv_args ic_inv = {
+               .paddr = paddr,
+               .vaddr = vaddr,
+               .sz    = sz
+       };
+
+       on_each_cpu(__ic_line_inv_vaddr_helper, &ic_inv, 1);
+}
+
+#endif /* CONFIG_SMP */
+
+#else  /* !CONFIG_ARC_HAS_ICACHE */
+
+#define __ic_entire_inv()
+#define __ic_line_inv_vaddr(pstart, vstart, sz)
+
+#endif /* CONFIG_ARC_HAS_ICACHE */
+
+noinline void slc_op(unsigned long paddr, unsigned long sz, const int op)
+{
+#ifdef CONFIG_ISA_ARCV2
+       unsigned long flags;
+       unsigned int ctrl;
+
+       local_irq_save(flags);
+
+       /*
+        * The Region Flush operation is specified by CTRL.RGN_OP[11..9]
+        *  - b'000 (default) is Flush,
+        *  - b'001 is Invalidate if CTRL.IM == 0
+        *  - b'001 is Flush-n-Invalidate if CTRL.IM == 1
+        */
+       ctrl = read_aux_reg(ARC_REG_SLC_CTRL);
+
+       /* Don't rely on default value of IM bit */
+       if (!(op & OP_FLUSH))           /* i.e. OP_INV */
+               ctrl &= ~SLC_CTRL_IM;   /* clear IM: Disable flush before Inv */
+       else
+               ctrl |= SLC_CTRL_IM;
+
+       if (op & OP_INV)
+               ctrl |= SLC_CTRL_RGN_OP_INV;    /* Inv or flush-n-inv */
+       else
+               ctrl &= ~SLC_CTRL_RGN_OP_INV;
+
+       write_aux_reg(ARC_REG_SLC_CTRL, ctrl);
+
+       /*
+        * Lower bits are ignored, no need to clip
+        * END needs to be setup before START (latter triggers the operation)
+        * END can't be same as START, so add (l2_line_sz - 1) to sz
+        */
+       write_aux_reg(ARC_REG_SLC_RGN_END, (paddr + sz + l2_line_sz - 1));
+       write_aux_reg(ARC_REG_SLC_RGN_START, paddr);
+
+       while (read_aux_reg(ARC_REG_SLC_CTRL) & SLC_CTRL_BUSY);
+
+       local_irq_restore(flags);
+#endif
+}
+
+static inline int need_slc_flush(void)
+{
+       return is_isa_arcv2() && l2_line_sz;
+}
+
+/***********************************************************
+ * Exported APIs
+ */
+
+/*
+ * Handle cache congruency of kernel and userspace mappings of page when kernel
+ * writes-to/reads-from
+ *
+ * The idea is to defer flushing of kernel mapping after a WRITE, possible if:
+ *  -dcache is NOT aliasing, hence any U/K-mappings of page are congruent
+ *  -U-mapping doesn't exist yet for page (finalised in update_mmu_cache)
+ *  -In SMP, if hardware caches are coherent
+ *
+ * There's a corollary case, where kernel READs from a userspace mapped page.
+ * If the U-mapping is not congruent to to K-mapping, former needs flushing.
+ */
+void flush_dcache_page(struct page *page)
+{
+       struct address_space *mapping;
+
+       if (!cache_is_vipt_aliasing()) {
+               clear_bit(PG_dc_clean, &page->flags);
+               return;
+       }
+
+       /* don't handle anon pages here */
+       mapping = page_mapping(page);
+       if (!mapping)
+               return;
+
+       /*
+        * pagecache page, file not yet mapped to userspace
+        * Make a note that K-mapping is dirty
+        */
+       if (!mapping_mapped(mapping)) {
+               clear_bit(PG_dc_clean, &page->flags);
+       } else if (page_mapped(page)) {
+
+               /* kernel reading from page with U-mapping */
+               unsigned long paddr = (unsigned long)page_address(page);
+               unsigned long vaddr = page->index << PAGE_CACHE_SHIFT;
+
+               if (addr_not_cache_congruent(paddr, vaddr))
+                       __flush_dcache_page(paddr, vaddr);
+       }
+}
+EXPORT_SYMBOL(flush_dcache_page);
+
+void dma_cache_wback_inv(unsigned long start, unsigned long sz)
+{
+       __dc_line_op_k(start, sz, OP_FLUSH_N_INV);
+
+       if (need_slc_flush())
+               slc_op(start, sz, OP_FLUSH_N_INV);
+}
+EXPORT_SYMBOL(dma_cache_wback_inv);
+
+void dma_cache_inv(unsigned long start, unsigned long sz)
+{
+       __dc_line_op_k(start, sz, OP_INV);
+
+       if (need_slc_flush())
+               slc_op(start, sz, OP_INV);
+}
+EXPORT_SYMBOL(dma_cache_inv);
+
+void dma_cache_wback(unsigned long start, unsigned long sz)
+{
+       __dc_line_op_k(start, sz, OP_FLUSH);
+
+       if (need_slc_flush())
+               slc_op(start, sz, OP_FLUSH);
+}
+EXPORT_SYMBOL(dma_cache_wback);
+
+/*
+ * This is API for making I/D Caches consistent when modifying
+ * kernel code (loadable modules, kprobes, kgdb...)
+ * This is called on insmod, with kernel virtual address for CODE of
+ * the module. ARC cache maintenance ops require PHY address thus we
+ * need to convert vmalloc addr to PHY addr
+ */
+void flush_icache_range(unsigned long kstart, unsigned long kend)
+{
+       unsigned int tot_sz;
+
+       WARN(kstart < TASK_SIZE, "%s() can't handle user vaddr", __func__);
+
+       /* Shortcut for bigger flush ranges.
+        * Here we don't care if this was kernel virtual or phy addr
+        */
+       tot_sz = kend - kstart;
+       if (tot_sz > PAGE_SIZE) {
+               flush_cache_all();
+               return;
+       }
+
+       /* Case: Kernel Phy addr (0x8000_0000 onwards) */
+       if (likely(kstart > PAGE_OFFSET)) {
+               /*
+                * The 2nd arg despite being paddr will be used to index icache
+                * This is OK since no alternate virtual mappings will exist
+                * given the callers for this case: kprobe/kgdb in built-in
+                * kernel code only.
+                */
+               __sync_icache_dcache(kstart, kstart, kend - kstart);
+               return;
+       }
+
+       /*
+        * Case: Kernel Vaddr (0x7000_0000 to 0x7fff_ffff)
+        * (1) ARC Cache Maintenance ops only take Phy addr, hence special
+        *     handling of kernel vaddr.
+        *
+        * (2) Despite @tot_sz being < PAGE_SIZE (bigger cases handled already),
+        *     it still needs to handle  a 2 page scenario, where the range
+        *     straddles across 2 virtual pages and hence need for loop
+        */
+       while (tot_sz > 0) {
+               unsigned int off, sz;
+               unsigned long phy, pfn;
+
+               off = kstart % PAGE_SIZE;
+               pfn = vmalloc_to_pfn((void *)kstart);
+               phy = (pfn << PAGE_SHIFT) + off;
+               sz = min_t(unsigned int, tot_sz, PAGE_SIZE - off);
+               __sync_icache_dcache(phy, kstart, sz);
+               kstart += sz;
+               tot_sz -= sz;
+       }
+}
+EXPORT_SYMBOL(flush_icache_range);
+
+/*
+ * General purpose helper to make I and D cache lines consistent.
+ * @paddr is phy addr of region
+ * @vaddr is typically user vaddr (breakpoint) or kernel vaddr (vmalloc)
+ *    However in one instance, when called by kprobe (for a breakpt in
+ *    builtin kernel code) @vaddr will be paddr only, meaning CDU operation will
+ *    use a paddr to index the cache (despite VIPT). This is fine since since a
+ *    builtin kernel page will not have any virtual mappings.
+ *    kprobe on loadable module will be kernel vaddr.
+ */
+void __sync_icache_dcache(unsigned long paddr, unsigned long vaddr, int len)
+{
+       __dc_line_op(paddr, vaddr, len, OP_FLUSH_N_INV);
+       __ic_line_inv_vaddr(paddr, vaddr, len);
+}
+
+/* wrapper to compile time eliminate alignment checks in flush loop */
+void __inv_icache_page(unsigned long paddr, unsigned long vaddr)
+{
+       __ic_line_inv_vaddr(paddr, vaddr, PAGE_SIZE);
+}
+
+/*
+ * wrapper to clearout kernel or userspace mappings of a page
+ * For kernel mappings @vaddr == @paddr
+ */
+void __flush_dcache_page(unsigned long paddr, unsigned long vaddr)
+{
+       __dc_line_op(paddr, vaddr & PAGE_MASK, PAGE_SIZE, OP_FLUSH_N_INV);
+}
+
+noinline void flush_cache_all(void)
+{
+       unsigned long flags;
+
+       local_irq_save(flags);
+
+       __ic_entire_inv();
+       __dc_entire_op(OP_FLUSH_N_INV);
+
+       local_irq_restore(flags);
+
+}
+
+#ifdef CONFIG_ARC_CACHE_VIPT_ALIASING
+
+void flush_cache_mm(struct mm_struct *mm)
+{
+       flush_cache_all();
+}
+
+void flush_cache_page(struct vm_area_struct *vma, unsigned long u_vaddr,
+                     unsigned long pfn)
+{
+       unsigned int paddr = pfn << PAGE_SHIFT;
+
+       u_vaddr &= PAGE_MASK;
+
+       __flush_dcache_page(paddr, u_vaddr);
+
+       if (vma->vm_flags & VM_EXEC)
+               __inv_icache_page(paddr, u_vaddr);
+}
+
+void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
+                      unsigned long end)
+{
+       flush_cache_all();
+}
+
+void flush_anon_page(struct vm_area_struct *vma, struct page *page,
+                    unsigned long u_vaddr)
+{
+       /* TBD: do we really need to clear the kernel mapping */
+       __flush_dcache_page(page_address(page), u_vaddr);
+       __flush_dcache_page(page_address(page), page_address(page));
+
+}
+
+#endif
+
+void copy_user_highpage(struct page *to, struct page *from,
+       unsigned long u_vaddr, struct vm_area_struct *vma)
+{
+       unsigned long kfrom = (unsigned long)page_address(from);
+       unsigned long kto = (unsigned long)page_address(to);
+       int clean_src_k_mappings = 0;
+
+       /*
+        * If SRC page was already mapped in userspace AND it's U-mapping is
+        * not congruent with K-mapping, sync former to physical page so that
+        * K-mapping in memcpy below, sees the right data
+        *
+        * Note that while @u_vaddr refers to DST page's userspace vaddr, it is
+        * equally valid for SRC page as well
+        */
+       if (page_mapped(from) && addr_not_cache_congruent(kfrom, u_vaddr)) {
+               __flush_dcache_page(kfrom, u_vaddr);
+               clean_src_k_mappings = 1;
+       }
+
+       copy_page((void *)kto, (void *)kfrom);
+
+       /*
+        * Mark DST page K-mapping as dirty for a later finalization by
+        * update_mmu_cache(). Although the finalization could have been done
+        * here as well (given that both vaddr/paddr are available).
+        * But update_mmu_cache() already has code to do that for other
+        * non copied user pages (e.g. read faults which wire in pagecache page
+        * directly).
+        */
+       clear_bit(PG_dc_clean, &to->flags);
+
+       /*
+        * if SRC was already usermapped and non-congruent to kernel mapping
+        * sync the kernel mapping back to physical page
+        */
+       if (clean_src_k_mappings) {
+               __flush_dcache_page(kfrom, kfrom);
+               set_bit(PG_dc_clean, &from->flags);
+       } else {
+               clear_bit(PG_dc_clean, &from->flags);
+       }
+}
+
+void clear_user_page(void *to, unsigned long u_vaddr, struct page *page)
+{
+       clear_page(to);
+       clear_bit(PG_dc_clean, &page->flags);
+}
+
+
+/**********************************************************************
+ * Explicit Cache flush request from user space via syscall
+ * Needed for JITs which generate code on the fly
+ */
+SYSCALL_DEFINE3(cacheflush, uint32_t, start, uint32_t, sz, uint32_t, flags)
+{
+       /* TBD: optimize this */
+       flush_cache_all();
+       return 0;
+}
+
+void arc_cache_init(void)
+{
+       unsigned int __maybe_unused cpu = smp_processor_id();
+       char str[256];
+
+       printk(arc_cache_mumbojumbo(0, str, sizeof(str)));
+
+       if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE)) {
+               struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache;
+
+               if (!ic->ver)
+                       panic("cache support enabled but non-existent cache\n");
+
+               if (ic->line_len != L1_CACHE_BYTES)
+                       panic("ICache line [%d] != kernel Config [%d]",
+                             ic->line_len, L1_CACHE_BYTES);
+
+               if (ic->ver != CONFIG_ARC_MMU_VER)
+                       panic("Cache ver [%d] doesn't match MMU ver [%d]\n",
+                             ic->ver, CONFIG_ARC_MMU_VER);
+
+               /*
+                * In MMU v4 (HS38x) the alising icache config uses IVIL/PTAG
+                * pair to provide vaddr/paddr respectively, just as in MMU v3
+                */
+               if (is_isa_arcv2() && ic->alias)
+                       _cache_line_loop_ic_fn = __cache_line_loop_v3;
+               else
+                       _cache_line_loop_ic_fn = __cache_line_loop;
+       }
+
+       if (IS_ENABLED(CONFIG_ARC_HAS_DCACHE)) {
+               struct cpuinfo_arc_cache *dc = &cpuinfo_arc700[cpu].dcache;
+
+               if (!dc->ver)
+                       panic("cache support enabled but non-existent cache\n");
+
+               if (dc->line_len != L1_CACHE_BYTES)
+                       panic("DCache line [%d] != kernel Config [%d]",
+                             dc->line_len, L1_CACHE_BYTES);
+
+               /* check for D-Cache aliasing on ARCompact: ARCv2 has PIPT */
+               if (is_isa_arcompact()) {
+                       int handled = IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING);
+
+                       if (dc->alias && !handled)
+                               panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n");
+                       else if (!dc->alias && handled)
+                               panic("Disable CONFIG_ARC_CACHE_VIPT_ALIASING\n");
+               }
+       }
+}
diff --git a/arch/arc/mm/cache_arc700.c b/arch/arc/mm/cache_arc700.c
deleted file mode 100644 (file)
index 12b2100..0000000
+++ /dev/null
@@ -1,723 +0,0 @@
-/*
- * ARC700 VIPT Cache Management
- *
- * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- *  vineetg: May 2011: for Non-aliasing VIPT D-cache following can be NOPs
- *   -flush_cache_dup_mm (fork)
- *   -likewise for flush_cache_mm (exit/execve)
- *   -likewise for flush_cache_range,flush_cache_page (munmap, exit, COW-break)
- *
- * vineetg: Apr 2011
- *  -Now that MMU can support larger pg sz (16K), the determiniation of
- *   aliasing shd not be based on assumption of 8k pg
- *
- * vineetg: Mar 2011
- *  -optimised version of flush_icache_range( ) for making I/D coherent
- *   when vaddr is available (agnostic of num of aliases)
- *
- * vineetg: Mar 2011
- *  -Added documentation about I-cache aliasing on ARC700 and the way it
- *   was handled up until MMU V2.
- *  -Spotted a three year old bug when killing the 4 aliases, which needs
- *   bottom 2 bits, so we need to do paddr | {0x00, 0x01, 0x02, 0x03}
- *                        instead of paddr | {0x00, 0x01, 0x10, 0x11}
- *   (Rajesh you owe me one now)
- *
- * vineetg: Dec 2010
- *  -Off-by-one error when computing num_of_lines to flush
- *   This broke signal handling with bionic which uses synthetic sigret stub
- *
- * vineetg: Mar 2010
- *  -GCC can't generate ZOL for core cache flush loops.
- *   Conv them into iterations based as opposed to while (start < end) types
- *
- * Vineetg: July 2009
- *  -In I-cache flush routine we used to chk for aliasing for every line INV.
- *   Instead now we setup routines per cache geometry and invoke them
- *   via function pointers.
- *
- * Vineetg: Jan 2009
- *  -Cache Line flush routines used to flush an extra line beyond end addr
- *   because check was while (end >= start) instead of (end > start)
- *     =Some call sites had to work around by doing -1, -4 etc to end param
- *     =Some callers didnt care. This was spec bad in case of INV routines
- *      which would discard valid data (cause of the horrible ext2 bug
- *      in ARC IDE driver)
- *
- * vineetg: June 11th 2008: Fixed flush_icache_range( )
- *  -Since ARC700 caches are not coherent (I$ doesnt snoop D$) both need
- *   to be flushed, which it was not doing.
- *  -load_module( ) passes vmalloc addr (Kernel Virtual Addr) to the API,
- *   however ARC cache maintenance OPs require PHY addr. Thus need to do
- *   vmalloc_to_phy.
- *  -Also added optimisation there, that for range > PAGE SIZE we flush the
- *   entire cache in one shot rather than line by line. For e.g. a module
- *   with Code sz 600k, old code flushed 600k worth of cache (line-by-line),
- *   while cache is only 16 or 32k.
- */
-
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/cache.h>
-#include <linux/mmu_context.h>
-#include <linux/syscalls.h>
-#include <linux/uaccess.h>
-#include <linux/pagemap.h>
-#include <asm/cacheflush.h>
-#include <asm/cachectl.h>
-#include <asm/setup.h>
-
-char *arc_cache_mumbojumbo(int c, char *buf, int len)
-{
-       int n = 0;
-
-#define PR_CACHE(p, cfg, str)                                          \
-       if (!(p)->ver)                                                  \
-               n += scnprintf(buf + n, len - n, str"\t\t: N/A\n");     \
-       else                                                            \
-               n += scnprintf(buf + n, len - n,                        \
-                       str"\t\t: %uK, %dway/set, %uB Line, %s%s%s\n",  \
-                       (p)->sz_k, (p)->assoc, (p)->line_len,           \
-                       (p)->vipt ? "VIPT" : "PIPT",                    \
-                       (p)->alias ? " aliasing" : "",                  \
-                       IS_ENABLED(cfg) ? "" : " (not used)");
-
-       PR_CACHE(&cpuinfo_arc700[c].icache, CONFIG_ARC_HAS_ICACHE, "I-Cache");
-       PR_CACHE(&cpuinfo_arc700[c].dcache, CONFIG_ARC_HAS_DCACHE, "D-Cache");
-
-       return buf;
-}
-
-/*
- * Read the Cache Build Confuration Registers, Decode them and save into
- * the cpuinfo structure for later use.
- * No Validation done here, simply read/convert the BCRs
- */
-void read_decode_cache_bcr(void)
-{
-       struct cpuinfo_arc_cache *p_ic, *p_dc;
-       unsigned int cpu = smp_processor_id();
-       struct bcr_cache {
-#ifdef CONFIG_CPU_BIG_ENDIAN
-               unsigned int pad:12, line_len:4, sz:4, config:4, ver:8;
-#else
-               unsigned int ver:8, config:4, sz:4, line_len:4, pad:12;
-#endif
-       } ibcr, dbcr;
-
-       p_ic = &cpuinfo_arc700[cpu].icache;
-       READ_BCR(ARC_REG_IC_BCR, ibcr);
-
-       if (!ibcr.ver)
-               goto dc_chk;
-
-       BUG_ON(ibcr.config != 3);
-       p_ic->assoc = 2;                /* Fixed to 2w set assoc */
-       p_ic->line_len = 8 << ibcr.line_len;
-       p_ic->sz_k = 1 << (ibcr.sz - 1);
-       p_ic->ver = ibcr.ver;
-       p_ic->vipt = 1;
-       p_ic->alias = p_ic->sz_k/p_ic->assoc/TO_KB(PAGE_SIZE) > 1;
-
-dc_chk:
-       p_dc = &cpuinfo_arc700[cpu].dcache;
-       READ_BCR(ARC_REG_DC_BCR, dbcr);
-
-       if (!dbcr.ver)
-               return;
-
-       BUG_ON(dbcr.config != 2);
-       p_dc->assoc = 4;                /* Fixed to 4w set assoc */
-       p_dc->line_len = 16 << dbcr.line_len;
-       p_dc->sz_k = 1 << (dbcr.sz - 1);
-       p_dc->ver = dbcr.ver;
-       p_dc->vipt = 1;
-       p_dc->alias = p_dc->sz_k/p_dc->assoc/TO_KB(PAGE_SIZE) > 1;
-}
-
-/*
- * 1. Validate the Cache Geomtery (compile time config matches hardware)
- * 2. If I-cache suffers from aliasing, setup work arounds (difft flush rtn)
- *    (aliasing D-cache configurations are not supported YET)
- * 3. Enable the Caches, setup default flush mode for D-Cache
- * 3. Calculate the SHMLBA used by user space
- */
-void arc_cache_init(void)
-{
-       unsigned int __maybe_unused cpu = smp_processor_id();
-       char str[256];
-
-       printk(arc_cache_mumbojumbo(0, str, sizeof(str)));
-
-       if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE)) {
-               struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache;
-
-               if (!ic->ver)
-                       panic("cache support enabled but non-existent cache\n");
-
-               if (ic->line_len != L1_CACHE_BYTES)
-                       panic("ICache line [%d] != kernel Config [%d]",
-                             ic->line_len, L1_CACHE_BYTES);
-
-               if (ic->ver != CONFIG_ARC_MMU_VER)
-                       panic("Cache ver [%d] doesn't match MMU ver [%d]\n",
-                             ic->ver, CONFIG_ARC_MMU_VER);
-       }
-
-       if (IS_ENABLED(CONFIG_ARC_HAS_DCACHE)) {
-               struct cpuinfo_arc_cache *dc = &cpuinfo_arc700[cpu].dcache;
-               int handled;
-
-               if (!dc->ver)
-                       panic("cache support enabled but non-existent cache\n");
-
-               if (dc->line_len != L1_CACHE_BYTES)
-                       panic("DCache line [%d] != kernel Config [%d]",
-                             dc->line_len, L1_CACHE_BYTES);
-
-               /* check for D-Cache aliasing */
-               handled = IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING);
-
-               if (dc->alias && !handled)
-                       panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n");
-               else if (!dc->alias && handled)
-                       panic("Don't need CONFIG_ARC_CACHE_VIPT_ALIASING\n");
-       }
-}
-
-#define OP_INV         0x1
-#define OP_FLUSH       0x2
-#define OP_FLUSH_N_INV 0x3
-#define OP_INV_IC      0x4
-
-/*
- * Common Helper for Line Operations on {I,D}-Cache
- */
-static inline void __cache_line_loop(unsigned long paddr, unsigned long vaddr,
-                                    unsigned long sz, const int cacheop)
-{
-       unsigned int aux_cmd, aux_tag;
-       int num_lines;
-       const int full_page_op = __builtin_constant_p(sz) && sz == PAGE_SIZE;
-
-       if (cacheop == OP_INV_IC) {
-               aux_cmd = ARC_REG_IC_IVIL;
-#if (CONFIG_ARC_MMU_VER > 2)
-               aux_tag = ARC_REG_IC_PTAG;
-#endif
-       }
-       else {
-               /* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */
-               aux_cmd = cacheop & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL;
-#if (CONFIG_ARC_MMU_VER > 2)
-               aux_tag = ARC_REG_DC_PTAG;
-#endif
-       }
-
-       /* Ensure we properly floor/ceil the non-line aligned/sized requests
-        * and have @paddr - aligned to cache line and integral @num_lines.
-        * This however can be avoided for page sized since:
-        *  -@paddr will be cache-line aligned already (being page aligned)
-        *  -@sz will be integral multiple of line size (being page sized).
-        */
-       if (!full_page_op) {
-               sz += paddr & ~CACHE_LINE_MASK;
-               paddr &= CACHE_LINE_MASK;
-               vaddr &= CACHE_LINE_MASK;
-       }
-
-       num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
-
-#if (CONFIG_ARC_MMU_VER <= 2)
-       /* MMUv2 and before: paddr contains stuffed vaddrs bits */
-       paddr |= (vaddr >> PAGE_SHIFT) & 0x1F;
-#else
-       /* if V-P const for loop, PTAG can be written once outside loop */
-       if (full_page_op)
-               write_aux_reg(aux_tag, paddr);
-#endif
-
-       while (num_lines-- > 0) {
-#if (CONFIG_ARC_MMU_VER > 2)
-               /* MMUv3, cache ops require paddr seperately */
-               if (!full_page_op) {
-                       write_aux_reg(aux_tag, paddr);
-                       paddr += L1_CACHE_BYTES;
-               }
-
-               write_aux_reg(aux_cmd, vaddr);
-               vaddr += L1_CACHE_BYTES;
-#else
-               write_aux_reg(aux_cmd, paddr);
-               paddr += L1_CACHE_BYTES;
-#endif
-       }
-}
-
-#ifdef CONFIG_ARC_HAS_DCACHE
-
-/***************************************************************
- * Machine specific helpers for Entire D-Cache or Per Line ops
- */
-
-static inline unsigned int __before_dc_op(const int op)
-{
-       unsigned int reg = reg;
-
-       if (op == OP_FLUSH_N_INV) {
-               /* Dcache provides 2 cmd: FLUSH or INV
-                * INV inturn has sub-modes: DISCARD or FLUSH-BEFORE
-                * flush-n-inv is achieved by INV cmd but with IM=1
-                * So toggle INV sub-mode depending on op request and default
-                */
-               reg = read_aux_reg(ARC_REG_DC_CTRL);
-               write_aux_reg(ARC_REG_DC_CTRL, reg | DC_CTRL_INV_MODE_FLUSH)
-                       ;
-       }
-
-       return reg;
-}
-
-static inline void __after_dc_op(const int op, unsigned int reg)
-{
-       if (op & OP_FLUSH)      /* flush / flush-n-inv both wait */
-               while (read_aux_reg(ARC_REG_DC_CTRL) & DC_CTRL_FLUSH_STATUS);
-
-       /* Switch back to default Invalidate mode */
-       if (op == OP_FLUSH_N_INV)
-               write_aux_reg(ARC_REG_DC_CTRL, reg & ~DC_CTRL_INV_MODE_FLUSH);
-}
-
-/*
- * Operation on Entire D-Cache
- * @cacheop = {OP_INV, OP_FLUSH, OP_FLUSH_N_INV}
- * Note that constant propagation ensures all the checks are gone
- * in generated code
- */
-static inline void __dc_entire_op(const int cacheop)
-{
-       unsigned int ctrl_reg;
-       int aux;
-
-       ctrl_reg = __before_dc_op(cacheop);
-
-       if (cacheop & OP_INV)   /* Inv or flush-n-inv use same cmd reg */
-               aux = ARC_REG_DC_IVDC;
-       else
-               aux = ARC_REG_DC_FLSH;
-
-       write_aux_reg(aux, 0x1);
-
-       __after_dc_op(cacheop, ctrl_reg);
-}
-
-/* For kernel mappings cache operation: index is same as paddr */
-#define __dc_line_op_k(p, sz, op)      __dc_line_op(p, p, sz, op)
-
-/*
- * D-Cache : Per Line INV (discard or wback+discard) or FLUSH (wback)
- */
-static inline void __dc_line_op(unsigned long paddr, unsigned long vaddr,
-                               unsigned long sz, const int cacheop)
-{
-       unsigned long flags;
-       unsigned int ctrl_reg;
-
-       local_irq_save(flags);
-
-       ctrl_reg = __before_dc_op(cacheop);
-
-       __cache_line_loop(paddr, vaddr, sz, cacheop);
-
-       __after_dc_op(cacheop, ctrl_reg);
-
-       local_irq_restore(flags);
-}
-
-#else
-
-#define __dc_entire_op(cacheop)
-#define __dc_line_op(paddr, vaddr, sz, cacheop)
-#define __dc_line_op_k(paddr, sz, cacheop)
-
-#endif /* CONFIG_ARC_HAS_DCACHE */
-
-
-#ifdef CONFIG_ARC_HAS_ICACHE
-
-/*
- *             I-Cache Aliasing in ARC700 VIPT caches
- *
- * ARC VIPT I-cache uses vaddr to index into cache and paddr to match the tag.
- * The orig Cache Management Module "CDU" only required paddr to invalidate a
- * certain line since it sufficed as index in Non-Aliasing VIPT cache-geometry.
- * Infact for distinct V1,V2,P: all of {V1-P},{V2-P},{P-P} would end up fetching
- * the exact same line.
- *
- * However for larger Caches (way-size > page-size) - i.e. in Aliasing config,
- * paddr alone could not be used to correctly index the cache.
- *
- * ------------------
- * MMU v1/v2 (Fixed Page Size 8k)
- * ------------------
- * The solution was to provide CDU with these additonal vaddr bits. These
- * would be bits [x:13], x would depend on cache-geometry, 13 comes from
- * standard page size of 8k.
- * H/w folks chose [17:13] to be a future safe range, and moreso these 5 bits
- * of vaddr could easily be "stuffed" in the paddr as bits [4:0] since the
- * orig 5 bits of paddr were anyways ignored by CDU line ops, as they
- * represent the offset within cache-line. The adv of using this "clumsy"
- * interface for additional info was no new reg was needed in CDU programming
- * model.
- *
- * 17:13 represented the max num of bits passable, actual bits needed were
- * fewer, based on the num-of-aliases possible.
- * -for 2 alias possibility, only bit 13 needed (32K cache)
- * -for 4 alias possibility, bits 14:13 needed (64K cache)
- *
- * ------------------
- * MMU v3
- * ------------------
- * This ver of MMU supports variable page sizes (1k-16k): although Linux will
- * only support 8k (default), 16k and 4k.
- * However from hardware perspective, smaller page sizes aggrevate aliasing
- * meaning more vaddr bits needed to disambiguate the cache-line-op ;
- * the existing scheme of piggybacking won't work for certain configurations.
- * Two new registers IC_PTAG and DC_PTAG inttoduced.
- * "tag" bits are provided in PTAG, index bits in existing IVIL/IVDL/FLDL regs
- */
-
-/***********************************************************
- * Machine specific helper for per line I-Cache invalidate.
- */
-
-static inline void __ic_entire_inv(void)
-{
-       write_aux_reg(ARC_REG_IC_IVIC, 1);
-       read_aux_reg(ARC_REG_IC_CTRL);  /* blocks */
-}
-
-static inline void
-__ic_line_inv_vaddr_local(unsigned long paddr, unsigned long vaddr,
-                         unsigned long sz)
-{
-       unsigned long flags;
-
-       local_irq_save(flags);
-       __cache_line_loop(paddr, vaddr, sz, OP_INV_IC);
-       local_irq_restore(flags);
-}
-
-#ifndef CONFIG_SMP
-
-#define __ic_line_inv_vaddr(p, v, s)   __ic_line_inv_vaddr_local(p, v, s)
-
-#else
-
-struct ic_inv_args {
-       unsigned long paddr, vaddr;
-       int sz;
-};
-
-static void __ic_line_inv_vaddr_helper(void *info)
-{
-        struct ic_inv_args *ic_inv = info;
-
-        __ic_line_inv_vaddr_local(ic_inv->paddr, ic_inv->vaddr, ic_inv->sz);
-}
-
-static void __ic_line_inv_vaddr(unsigned long paddr, unsigned long vaddr,
-                               unsigned long sz)
-{
-       struct ic_inv_args ic_inv = {
-               .paddr = paddr,
-               .vaddr = vaddr,
-               .sz    = sz
-       };
-
-       on_each_cpu(__ic_line_inv_vaddr_helper, &ic_inv, 1);
-}
-
-#endif /* CONFIG_SMP */
-
-#else  /* !CONFIG_ARC_HAS_ICACHE */
-
-#define __ic_entire_inv()
-#define __ic_line_inv_vaddr(pstart, vstart, sz)
-
-#endif /* CONFIG_ARC_HAS_ICACHE */
-
-
-/***********************************************************
- * Exported APIs
- */
-
-/*
- * Handle cache congruency of kernel and userspace mappings of page when kernel
- * writes-to/reads-from
- *
- * The idea is to defer flushing of kernel mapping after a WRITE, possible if:
- *  -dcache is NOT aliasing, hence any U/K-mappings of page are congruent
- *  -U-mapping doesn't exist yet for page (finalised in update_mmu_cache)
- *  -In SMP, if hardware caches are coherent
- *
- * There's a corollary case, where kernel READs from a userspace mapped page.
- * If the U-mapping is not congruent to to K-mapping, former needs flushing.
- */
-void flush_dcache_page(struct page *page)
-{
-       struct address_space *mapping;
-
-       if (!cache_is_vipt_aliasing()) {
-               clear_bit(PG_dc_clean, &page->flags);
-               return;
-       }
-
-       /* don't handle anon pages here */
-       mapping = page_mapping(page);
-       if (!mapping)
-               return;
-
-       /*
-        * pagecache page, file not yet mapped to userspace
-        * Make a note that K-mapping is dirty
-        */
-       if (!mapping_mapped(mapping)) {
-               clear_bit(PG_dc_clean, &page->flags);
-       } else if (page_mapped(page)) {
-
-               /* kernel reading from page with U-mapping */
-               void *paddr = page_address(page);
-               unsigned long vaddr = page->index << PAGE_CACHE_SHIFT;
-
-               if (addr_not_cache_congruent(paddr, vaddr))
-                       __flush_dcache_page(paddr, vaddr);
-       }
-}
-EXPORT_SYMBOL(flush_dcache_page);
-
-
-void dma_cache_wback_inv(unsigned long start, unsigned long sz)
-{
-       __dc_line_op_k(start, sz, OP_FLUSH_N_INV);
-}
-EXPORT_SYMBOL(dma_cache_wback_inv);
-
-void dma_cache_inv(unsigned long start, unsigned long sz)
-{
-       __dc_line_op_k(start, sz, OP_INV);
-}
-EXPORT_SYMBOL(dma_cache_inv);
-
-void dma_cache_wback(unsigned long start, unsigned long sz)
-{
-       __dc_line_op_k(start, sz, OP_FLUSH);
-}
-EXPORT_SYMBOL(dma_cache_wback);
-
-/*
- * This is API for making I/D Caches consistent when modifying
- * kernel code (loadable modules, kprobes, kgdb...)
- * This is called on insmod, with kernel virtual address for CODE of
- * the module. ARC cache maintenance ops require PHY address thus we
- * need to convert vmalloc addr to PHY addr
- */
-void flush_icache_range(unsigned long kstart, unsigned long kend)
-{
-       unsigned int tot_sz;
-
-       WARN(kstart < TASK_SIZE, "%s() can't handle user vaddr", __func__);
-
-       /* Shortcut for bigger flush ranges.
-        * Here we don't care if this was kernel virtual or phy addr
-        */
-       tot_sz = kend - kstart;
-       if (tot_sz > PAGE_SIZE) {
-               flush_cache_all();
-               return;
-       }
-
-       /* Case: Kernel Phy addr (0x8000_0000 onwards) */
-       if (likely(kstart > PAGE_OFFSET)) {
-               /*
-                * The 2nd arg despite being paddr will be used to index icache
-                * This is OK since no alternate virtual mappings will exist
-                * given the callers for this case: kprobe/kgdb in built-in
-                * kernel code only.
-                */
-               __sync_icache_dcache(kstart, kstart, kend - kstart);
-               return;
-       }
-
-       /*
-        * Case: Kernel Vaddr (0x7000_0000 to 0x7fff_ffff)
-        * (1) ARC Cache Maintenance ops only take Phy addr, hence special
-        *     handling of kernel vaddr.
-        *
-        * (2) Despite @tot_sz being < PAGE_SIZE (bigger cases handled already),
-        *     it still needs to handle  a 2 page scenario, where the range
-        *     straddles across 2 virtual pages and hence need for loop
-        */
-       while (tot_sz > 0) {
-               unsigned int off, sz;
-               unsigned long phy, pfn;
-
-               off = kstart % PAGE_SIZE;
-               pfn = vmalloc_to_pfn((void *)kstart);
-               phy = (pfn << PAGE_SHIFT) + off;
-               sz = min_t(unsigned int, tot_sz, PAGE_SIZE - off);
-               __sync_icache_dcache(phy, kstart, sz);
-               kstart += sz;
-               tot_sz -= sz;
-       }
-}
-EXPORT_SYMBOL(flush_icache_range);
-
-/*
- * General purpose helper to make I and D cache lines consistent.
- * @paddr is phy addr of region
- * @vaddr is typically user vaddr (breakpoint) or kernel vaddr (vmalloc)
- *    However in one instance, when called by kprobe (for a breakpt in
- *    builtin kernel code) @vaddr will be paddr only, meaning CDU operation will
- *    use a paddr to index the cache (despite VIPT). This is fine since since a
- *    builtin kernel page will not have any virtual mappings.
- *    kprobe on loadable module will be kernel vaddr.
- */
-void __sync_icache_dcache(unsigned long paddr, unsigned long vaddr, int len)
-{
-       __dc_line_op(paddr, vaddr, len, OP_FLUSH_N_INV);
-       __ic_line_inv_vaddr(paddr, vaddr, len);
-}
-
-/* wrapper to compile time eliminate alignment checks in flush loop */
-void __inv_icache_page(unsigned long paddr, unsigned long vaddr)
-{
-       __ic_line_inv_vaddr(paddr, vaddr, PAGE_SIZE);
-}
-
-/*
- * wrapper to clearout kernel or userspace mappings of a page
- * For kernel mappings @vaddr == @paddr
- */
-void ___flush_dcache_page(unsigned long paddr, unsigned long vaddr)
-{
-       __dc_line_op(paddr, vaddr & PAGE_MASK, PAGE_SIZE, OP_FLUSH_N_INV);
-}
-
-noinline void flush_cache_all(void)
-{
-       unsigned long flags;
-
-       local_irq_save(flags);
-
-       __ic_entire_inv();
-       __dc_entire_op(OP_FLUSH_N_INV);
-
-       local_irq_restore(flags);
-
-}
-
-#ifdef CONFIG_ARC_CACHE_VIPT_ALIASING
-
-void flush_cache_mm(struct mm_struct *mm)
-{
-       flush_cache_all();
-}
-
-void flush_cache_page(struct vm_area_struct *vma, unsigned long u_vaddr,
-                     unsigned long pfn)
-{
-       unsigned int paddr = pfn << PAGE_SHIFT;
-
-       u_vaddr &= PAGE_MASK;
-
-       ___flush_dcache_page(paddr, u_vaddr);
-
-       if (vma->vm_flags & VM_EXEC)
-               __inv_icache_page(paddr, u_vaddr);
-}
-
-void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
-                      unsigned long end)
-{
-       flush_cache_all();
-}
-
-void flush_anon_page(struct vm_area_struct *vma, struct page *page,
-                    unsigned long u_vaddr)
-{
-       /* TBD: do we really need to clear the kernel mapping */
-       __flush_dcache_page(page_address(page), u_vaddr);
-       __flush_dcache_page(page_address(page), page_address(page));
-
-}
-
-#endif
-
-void copy_user_highpage(struct page *to, struct page *from,
-       unsigned long u_vaddr, struct vm_area_struct *vma)
-{
-       void *kfrom = page_address(from);
-       void *kto = page_address(to);
-       int clean_src_k_mappings = 0;
-
-       /*
-        * If SRC page was already mapped in userspace AND it's U-mapping is
-        * not congruent with K-mapping, sync former to physical page so that
-        * K-mapping in memcpy below, sees the right data
-        *
-        * Note that while @u_vaddr refers to DST page's userspace vaddr, it is
-        * equally valid for SRC page as well
-        */
-       if (page_mapped(from) && addr_not_cache_congruent(kfrom, u_vaddr)) {
-               __flush_dcache_page(kfrom, u_vaddr);
-               clean_src_k_mappings = 1;
-       }
-
-       copy_page(kto, kfrom);
-
-       /*
-        * Mark DST page K-mapping as dirty for a later finalization by
-        * update_mmu_cache(). Although the finalization could have been done
-        * here as well (given that both vaddr/paddr are available).
-        * But update_mmu_cache() already has code to do that for other
-        * non copied user pages (e.g. read faults which wire in pagecache page
-        * directly).
-        */
-       clear_bit(PG_dc_clean, &to->flags);
-
-       /*
-        * if SRC was already usermapped and non-congruent to kernel mapping
-        * sync the kernel mapping back to physical page
-        */
-       if (clean_src_k_mappings) {
-               __flush_dcache_page(kfrom, kfrom);
-               set_bit(PG_dc_clean, &from->flags);
-       } else {
-               clear_bit(PG_dc_clean, &from->flags);
-       }
-}
-
-void clear_user_page(void *to, unsigned long u_vaddr, struct page *page)
-{
-       clear_page(to);
-       clear_bit(PG_dc_clean, &page->flags);
-}
-
-
-/**********************************************************************
- * Explicit Cache flush request from user space via syscall
- * Needed for JITs which generate code on the fly
- */
-SYSCALL_DEFINE3(cacheflush, uint32_t, start, uint32_t, sz, uint32_t, flags)
-{
-       /* TBD: optimize this */
-       flush_cache_all();
-       return 0;
-}
index 12cc6485b2185c4eae5399adbda4598da619a1dd..74a637a1cfc48b2c5d4f0047a0b25bc51d238e16 100644 (file)
@@ -14,8 +14,6 @@
  * Cache bit off in the TLB entry.
  *
  * The default DMA address == Phy address which is 0x8000_0000 based.
- * A platform/device can make it zero based, by over-riding
- * plat_{dma,kernel}_addr_to_{kernel,dma}
  */
 
 #include <linux/dma-mapping.h>
@@ -37,7 +35,7 @@ void *dma_alloc_noncoherent(struct device *dev, size_t size,
                return NULL;
 
        /* This is bus address, platform dependent */
-       *dma_handle = plat_kernel_addr_to_dma(dev, paddr);
+       *dma_handle = (dma_addr_t)paddr;
 
        return paddr;
 }
@@ -46,8 +44,7 @@ EXPORT_SYMBOL(dma_alloc_noncoherent);
 void dma_free_noncoherent(struct device *dev, size_t size, void *vaddr,
                          dma_addr_t dma_handle)
 {
-       free_pages_exact((void *)plat_dma_addr_to_kernel(dev, dma_handle),
-                        size);
+       free_pages_exact((void *)dma_handle, size);
 }
 EXPORT_SYMBOL(dma_free_noncoherent);
 
@@ -67,7 +64,19 @@ void *dma_alloc_coherent(struct device *dev, size_t size,
                memset(kvaddr, 0, size);
 
        /* This is bus address, platform dependent */
-       *dma_handle = plat_kernel_addr_to_dma(dev, paddr);
+       *dma_handle = (dma_addr_t)paddr;
+
+       /*
+        * Evict any existing L1 and/or L2 lines for the backing page
+        * in case it was used earlier as a normal "cached" page.
+        * Yeah this bit us - STAR 9000898266
+        *
+        * Although core does call flush_cache_vmap(), it gets kvaddr hence
+        * can't be used to efficiently flush L1 and/or L2 which need paddr
+        * Currently flush_cache_vmap nukes the L1 cache completely which
+        * will be optimized as a separate commit
+        */
+       dma_cache_wback_inv((unsigned long)paddr, size);
 
        return kvaddr;
 }
@@ -78,8 +87,7 @@ void dma_free_coherent(struct device *dev, size_t size, void *kvaddr,
 {
        iounmap((void __force __iomem *)kvaddr);
 
-       free_pages_exact((void *)plat_dma_addr_to_kernel(dev, dma_handle),
-                        size);
+       free_pages_exact((void *)dma_handle, size);
 }
 EXPORT_SYMBOL(dma_free_coherent);
 
index 7f47d2a56f44374e00939e6742ed6717c452c58a..2c7ce8bb74758c127673582426f214a0ee0d0af7 100644 (file)
@@ -113,6 +113,8 @@ static inline void __tlb_entry_erase(void)
        write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite);
 }
 
+#if (CONFIG_ARC_MMU_VER < 4)
+
 static inline unsigned int tlb_entry_lkup(unsigned long vaddr_n_asid)
 {
        unsigned int idx;
@@ -210,6 +212,28 @@ static void tlb_entry_insert(unsigned int pd0, unsigned int pd1)
        write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite);
 }
 
+#else  /* CONFIG_ARC_MMU_VER >= 4) */
+
+static void utlb_invalidate(void)
+{
+       /* No need since uTLB is always in sync with JTLB */
+}
+
+static void tlb_entry_erase(unsigned int vaddr_n_asid)
+{
+       write_aux_reg(ARC_REG_TLBPD0, vaddr_n_asid | _PAGE_PRESENT);
+       write_aux_reg(ARC_REG_TLBCOMMAND, TLBDeleteEntry);
+}
+
+static void tlb_entry_insert(unsigned int pd0, unsigned int pd1)
+{
+       write_aux_reg(ARC_REG_TLBPD0, pd0);
+       write_aux_reg(ARC_REG_TLBPD1, pd1);
+       write_aux_reg(ARC_REG_TLBCOMMAND, TLBInsertEntry);
+}
+
+#endif
+
 /*
  * Un-conditionally (without lookup) erase the entire MMU contents
  */
@@ -582,23 +606,42 @@ void read_decode_mmu_bcr(void)
 #endif
        } *mmu3;
 
+       struct bcr_mmu_4 {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+       unsigned int ver:8, sasid:1, sz1:4, sz0:4, res:2, pae:1,
+                    n_ways:2, n_entry:2, n_super:2, u_itlb:3, u_dtlb:3;
+#else
+       /*           DTLB      ITLB      JES        JE         JA      */
+       unsigned int u_dtlb:3, u_itlb:3, n_super:2, n_entry:2, n_ways:2,
+                    pae:1, res:2, sz0:4, sz1:4, sasid:1, ver:8;
+#endif
+       } *mmu4;
+
        tmp = read_aux_reg(ARC_REG_MMU_BCR);
        mmu->ver = (tmp >> 24);
 
        if (mmu->ver <= 2) {
                mmu2 = (struct bcr_mmu_1_2 *)&tmp;
-               mmu->pg_sz = PAGE_SIZE;
+               mmu->pg_sz_k = TO_KB(PAGE_SIZE);
                mmu->sets = 1 << mmu2->sets;
                mmu->ways = 1 << mmu2->ways;
                mmu->u_dtlb = mmu2->u_dtlb;
                mmu->u_itlb = mmu2->u_itlb;
-       } else {
+       } else if (mmu->ver == 3) {
                mmu3 = (struct bcr_mmu_3 *)&tmp;
-               mmu->pg_sz = 512 << mmu3->pg_sz;
+               mmu->pg_sz_k = 1 << (mmu3->pg_sz - 1);
                mmu->sets = 1 << mmu3->sets;
                mmu->ways = 1 << mmu3->ways;
                mmu->u_dtlb = mmu3->u_dtlb;
                mmu->u_itlb = mmu3->u_itlb;
+       } else {
+               mmu4 = (struct bcr_mmu_4 *)&tmp;
+               mmu->pg_sz_k = 1 << (mmu4->sz0 - 1);
+               mmu->s_pg_sz_m = 1 << (mmu4->sz1 - 11);
+               mmu->sets = 64 << mmu4->n_entry;
+               mmu->ways = mmu4->n_ways * 2;
+               mmu->u_dtlb = mmu4->u_dtlb * 4;
+               mmu->u_itlb = mmu4->u_itlb * 4;
        }
 
        mmu->num_tlb = mmu->sets * mmu->ways;
@@ -608,10 +651,15 @@ char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len)
 {
        int n = 0;
        struct cpuinfo_arc_mmu *p_mmu = &cpuinfo_arc700[cpu_id].mmu;
+       char super_pg[64] = "";
+
+       if (p_mmu->s_pg_sz_m)
+               scnprintf(super_pg, 64, "%dM Super Page%s, ",
+                         p_mmu->s_pg_sz_m, " (not used)");
 
        n += scnprintf(buf + n, len - n,
-                     "MMU [v%x]\t: %dk PAGE, JTLB %d (%dx%d), uDTLB %d, uITLB %d %s\n",
-                      p_mmu->ver, TO_KB(p_mmu->pg_sz),
+                     "MMU [v%x]\t: %dk PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d %s\n",
+                      p_mmu->ver, p_mmu->pg_sz_k, super_pg,
                       p_mmu->num_tlb, p_mmu->sets, p_mmu->ways,
                       p_mmu->u_dtlb, p_mmu->u_itlb,
                       IS_ENABLED(CONFIG_ARC_MMU_SASID) ? ",SASID" : "");
@@ -639,7 +687,7 @@ void arc_mmu_init(void)
                      mmu->ver, CONFIG_ARC_MMU_VER);
        }
 
-       if (mmu->pg_sz != PAGE_SIZE)
+       if (mmu->pg_sz_k != TO_KB(PAGE_SIZE))
                panic("MMU pg size != PAGE_SIZE (%luk)\n", TO_KB(PAGE_SIZE));
 
        /* Enable the MMU */
index d572f1c2c72470e4b8f321787a8499e02da518e0..f6f4c3cb505d1341a8c24b1172a6a89f71e6fc26 100644 (file)
@@ -35,8 +35,6 @@
  * Rahul Trivedi, Amit Bhor: Codito Technologies 2004
  */
 
-       .cpu A7
-
 #include <linux/linkage.h>
 #include <asm/entry.h>
 #include <asm/mmu.h>
@@ -46,6 +44,7 @@
 #include <asm/processor.h>
 #include <asm/tlb-mmu1.h>
 
+#ifdef CONFIG_ISA_ARCOMPACT
 ;-----------------------------------------------------------------
 ; ARC700 Exception Handling doesn't auto-switch stack and it only provides
 ; ONE scratch AUX reg "ARC_REG_SCRATCH_DATA0"
@@ -123,6 +122,24 @@ ex_saved_reg1:
 #endif
 .endm
 
+#else  /* ARCv2 */
+
+.macro TLBMISS_FREEUP_REGS
+       PUSH  r0
+       PUSH  r1
+       PUSH  r2
+       PUSH  r3
+.endm
+
+.macro TLBMISS_RESTORE_REGS
+       POP   r3
+       POP   r2
+       POP   r1
+       POP   r0
+.endm
+
+#endif
+
 ;============================================================================
 ;  Troubleshooting Stuff
 ;============================================================================
@@ -241,6 +258,7 @@ ex_saved_reg1:
 ; Commit the TLB entry into MMU
 
 .macro COMMIT_ENTRY_TO_MMU
+#if (CONFIG_ARC_MMU_VER < 4)
 
        /* Get free TLB slot: Set = computed from vaddr, way = random */
        sr  TLBGetIndex, [ARC_REG_TLBCOMMAND]
@@ -251,6 +269,10 @@ ex_saved_reg1:
 #else
        sr TLBWrite, [ARC_REG_TLBCOMMAND]
 #endif
+
+#else
+       sr TLBInsertEntry, [ARC_REG_TLBCOMMAND]
+#endif
 .endm
 
 
@@ -291,6 +313,7 @@ ENTRY(EV_TLBMissI)
        CONV_PTE_TO_TLB
        COMMIT_ENTRY_TO_MMU
        TLBMISS_RESTORE_REGS
+EV_TLBMissI_fast_ret:  ; additional label for VDK OS-kit instrumentation
        rtie
 
 END(EV_TLBMissI)
@@ -356,6 +379,7 @@ ENTRY(EV_TLBMissD)
 
        COMMIT_ENTRY_TO_MMU
        TLBMISS_RESTORE_REGS
+EV_TLBMissD_fast_ret:  ; additional label for VDK OS-kit instrumentation
        rtie
 
 ;-------- Common routine to call Linux Page Fault Handler -----------
@@ -366,19 +390,5 @@ do_slow_path_pf:
 
        ; Slow path TLB Miss handled as a regular ARC Exception
        ; (stack switching / save the complete reg-file).
-       EXCEPTION_PROLOGUE
-
-       ; ------- setup args for Linux Page fault Hanlder ---------
-       mov_s r1, sp
-       lr    r0, [efa]
-
-       ; We don't want exceptions to be disabled while the fault is handled.
-       ; Now that we have saved the context we return from exception hence
-       ; exceptions get re-enable
-
-       FAKE_RET_FROM_EXCPN  r9
-
-       bl  do_page_fault
-       b   ret_from_exception
-
+       b  call_do_page_fault
 END(EV_TLBMissD)
diff --git a/arch/arc/plat-arcfpga/Kconfig b/arch/arc/plat-arcfpga/Kconfig
deleted file mode 100644 (file)
index 217593a..0000000
+++ /dev/null
@@ -1,33 +0,0 @@
-#
-# Copyright (C) 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-#
-
-menuconfig ARC_PLAT_FPGA_LEGACY
-       bool "\"Legacy\" ARC FPGA dev Boards"
-       select ARC_HAS_COH_CACHES if SMP
-       help
-         Support for ARC development boards, provided by Synopsys.
-         These are based on FPGA or ISS. e.g.
-         - ARCAngel4
-         - ML509
-         - MetaWare ISS
-
-if ARC_PLAT_FPGA_LEGACY
-
-config ISS_SMP_EXTN
-       bool "ARC SMP Extensions (ISS Models only)"
-       default n
-       depends on SMP
-       help
-         SMP Extensions to ARC700, in a "simulation only" Model, supported in
-         ARC ISS (Instruction Set Simulator).
-         The SMP extensions include:
-         -IDU (Interrupt Distribution Unit)
-         -XTL (To enable CPU start/stop/set-PC for another CPU)
-         It doesn't provide coherent Caches and/or Atomic Ops (LLOCK/SCOND)
-
-endif
diff --git a/arch/arc/plat-arcfpga/Makefile b/arch/arc/plat-arcfpga/Makefile
deleted file mode 100644 (file)
index 66fd0ec..0000000
+++ /dev/null
@@ -1,12 +0,0 @@
-#
-# Copyright (C) 2011-2012 Synopsys, Inc. (www.synopsys.com)
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-#
-
-KBUILD_CFLAGS  += -Iarch/arc/plat-arcfpga/include
-
-obj-y := platform.o
-obj-$(CONFIG_ISS_SMP_EXTN)             += smp.o
diff --git a/arch/arc/plat-arcfpga/include/plat/smp.h b/arch/arc/plat-arcfpga/include/plat/smp.h
deleted file mode 100644 (file)
index c09eb4c..0000000
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- *  Rajeshwar Ranga: Interrupt Distribution Unit API's
- */
-
-#ifndef __PLAT_ARCFPGA_SMP_H
-#define __PLAT_ARCFPGA_SMP_H
-
-#ifdef CONFIG_SMP
-
-#include <linux/types.h>
-#include <asm/arcregs.h>
-
-#define ARC_AUX_IDU_REG_CMD            0x2000
-#define ARC_AUX_IDU_REG_PARAM          0x2001
-
-#define ARC_AUX_XTL_REG_CMD            0x2002
-#define ARC_AUX_XTL_REG_PARAM          0x2003
-
-#define ARC_REG_MP_BCR                 0x2021
-
-#define ARC_XTL_CMD_WRITE_PC           0x04
-#define ARC_XTL_CMD_CLEAR_HALT         0x02
-
-/*
- * Build Configuration Register which identifies the sub-components
- */
-struct bcr_mp {
-#ifdef CONFIG_CPU_BIG_ENDIAN
-       unsigned int mp_arch:16, pad:5, sdu:1, idu:1, scu:1, ver:8;
-#else
-       unsigned int ver:8, scu:1, idu:1, sdu:1, pad:5, mp_arch:16;
-#endif
-};
-
-/* IDU supports 256 common interrupts */
-#define NR_IDU_IRQS                    256
-
-/*
- * The Aux Regs layout is same bit-by-bit in both BE/LE modes.
- * However when casted as a bitfield encoded "C" struct, gcc treats it as
- * memory, generating different code for BE/LE, requiring strcture adj (see
- * include/asm/arcregs.h)
- *
- * However when manually "carving" the value for a Aux, no special handling
- * of BE is needed because of the property discribed above
- */
-#define IDU_SET_COMMAND(irq, cmd)                      \
-do {                                                   \
-       uint32_t __val;                                 \
-       __val = (((irq & 0xFF) << 8) | (cmd & 0xFF));   \
-       write_aux_reg(ARC_AUX_IDU_REG_CMD, __val);      \
-} while (0)
-
-#define IDU_SET_PARAM(par)  write_aux_reg(ARC_AUX_IDU_REG_PARAM, par)
-#define IDU_GET_PARAM()     read_aux_reg(ARC_AUX_IDU_REG_PARAM)
-
-/* IDU Commands */
-#define IDU_DISABLE                    0x00
-#define IDU_ENABLE                     0x01
-#define IDU_IRQ_CLEAR                  0x02
-#define IDU_IRQ_ASSERT                 0x03
-#define IDU_IRQ_WMODE                  0x04
-#define IDU_IRQ_STATUS                 0x05
-#define IDU_IRQ_ACK                    0x06
-#define IDU_IRQ_PEND                   0x07
-#define IDU_IRQ_RMODE                  0x08
-#define IDU_IRQ_WBITMASK               0x09
-#define IDU_IRQ_RBITMASK               0x0A
-
-#define idu_enable()           IDU_SET_COMMAND(0, IDU_ENABLE)
-#define idu_disable()          IDU_SET_COMMAND(0, IDU_DISABLE)
-
-#define idu_irq_assert(irq)    IDU_SET_COMMAND((irq), IDU_IRQ_ASSERT)
-#define idu_irq_clear(irq)     IDU_SET_COMMAND((irq), IDU_IRQ_CLEAR)
-
-/* IDU Interrupt Mode - Destination Encoding */
-#define IDU_IRQ_MOD_DISABLE            0x00
-#define IDU_IRQ_MOD_ROUND_RECP         0x01
-#define IDU_IRQ_MOD_TCPU_FIRSTRECP     0x02
-#define IDU_IRQ_MOD_TCPU_ALLRECP       0x03
-
-/* IDU Interrupt Mode  - Triggering Mode */
-#define IDU_IRQ_MODE_LEVEL_TRIG                0x00
-#define IDU_IRQ_MODE_PULSE_TRIG                0x01
-
-#define IDU_IRQ_MODE_PARAM(dest_mode, trig_mode)   \
-       (((trig_mode & 0x01) << 15) | (dest_mode & 0xFF))
-
-struct idu_irq_config {
-       uint8_t irq;
-       uint8_t dest_mode;
-       uint8_t trig_mode;
-};
-
-struct idu_irq_status {
-       uint8_t irq;
-       bool enabled;
-       bool status;
-       bool ack;
-       bool pend;
-       uint8_t next_rr;
-};
-
-extern void idu_irq_set_tgtcpu(uint8_t irq, uint32_t mask);
-extern void idu_irq_set_mode(uint8_t irq, uint8_t dest_mode, uint8_t trig_mode);
-
-extern void iss_model_init_smp(unsigned int cpu);
-extern void iss_model_init_early_smp(void);
-
-#endif /* CONFIG_SMP */
-
-#endif
diff --git a/arch/arc/plat-arcfpga/platform.c b/arch/arc/plat-arcfpga/platform.c
deleted file mode 100644 (file)
index afc8825..0000000
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * ARC FPGA Platform support code
- *
- * Copyright (C) 2012 Synopsys, Inc. (www.synopsys.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/init.h>
-#include <asm/mach_desc.h>
-#include <plat/smp.h>
-
-/*----------------------- Machine Descriptions ------------------------------
- *
- * Machine description is simply a set of platform/board specific callbacks
- * This is not directly related to DeviceTree based dynamic device creation,
- * however as part of early device tree scan, we also select the right
- * callback set, by matching the DT compatible name.
- */
-
-static const char *legacy_fpga_compat[] __initconst = {
-       "snps,arc-angel4",
-       "snps,arc-ml509",
-       NULL,
-};
-
-MACHINE_START(LEGACY_FPGA, "legacy_fpga")
-       .dt_compat      = legacy_fpga_compat,
-#ifdef CONFIG_ISS_SMP_EXTN
-       .init_early     = iss_model_init_early_smp,
-       .init_smp       = iss_model_init_smp,
-#endif
-MACHINE_END
-
-static const char *simulation_compat[] __initconst = {
-       "snps,nsim",
-       "snps,nsimosci",
-       NULL,
-};
-
-MACHINE_START(SIMULATION, "simulation")
-       .dt_compat      = simulation_compat,
-MACHINE_END
diff --git a/arch/arc/plat-arcfpga/smp.c b/arch/arc/plat-arcfpga/smp.c
deleted file mode 100644 (file)
index 64797ba..0000000
+++ /dev/null
@@ -1,186 +0,0 @@
-/*
- * ARC700 Simulation-only Extensions for SMP
- *
- * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- *  Vineet Gupta    - 2012 : split off arch common and plat specific SMP
- *  Rajeshwar Ranga - 2007 : Interrupt Distribution Unit API's
- */
-
-#include <linux/smp.h>
-#include <linux/irq.h>
-#include <plat/smp.h>
-
-#define IDU_INTERRUPT_0 16
-
-static char smp_cpuinfo_buf[128];
-
-/*
- *-------------------------------------------------------------------
- * Platform specific callbacks expected by arch SMP code
- *-------------------------------------------------------------------
- */
-
-/*
- * Master kick starting another CPU
- */
-static void iss_model_smp_wakeup_cpu(int cpu, unsigned long pc)
-{
-       /* setup the start PC */
-       write_aux_reg(ARC_AUX_XTL_REG_PARAM, pc);
-
-       /* Trigger WRITE_PC cmd for this cpu */
-       write_aux_reg(ARC_AUX_XTL_REG_CMD,
-                       (ARC_XTL_CMD_WRITE_PC | (cpu << 8)));
-
-       /* Take the cpu out of Halt */
-       write_aux_reg(ARC_AUX_XTL_REG_CMD,
-                       (ARC_XTL_CMD_CLEAR_HALT | (cpu << 8)));
-
-}
-
-static inline int get_hw_config_num_irq(void)
-{
-       uint32_t val = read_aux_reg(ARC_REG_VECBASE_BCR);
-
-       switch (val & 0x03) {
-       case 0:
-               return 16;
-       case 1:
-               return 32;
-       case 2:
-               return 8;
-       default:
-               return 0;
-       }
-
-       return 0;
-}
-
-/*
- * Any SMP specific init any CPU does when it comes up.
- * Here we setup the CPU to enable Inter-Processor-Interrupts
- * Called for each CPU
- * -Master      : init_IRQ()
- * -Other(s)    : start_kernel_secondary()
- */
-void iss_model_init_smp(unsigned int cpu)
-{
-       /* Check if CPU is configured for more than 16 interrupts */
-       if (NR_IRQS <= 16 || get_hw_config_num_irq() <= 16)
-               panic("[arcfpga] IRQ system can't support IDU IPI\n");
-
-       idu_disable();
-
-       /****************************************************************
-        * IDU provides a set of Common IRQs, each of which can be dynamically
-        * attached to (1|many|all) CPUs.
-        * The Common IRQs [0-15] are mapped as CPU pvt [16-31]
-        *
-        * Here we use a simple 1:1 mapping:
-        * A CPU 'x' is wired to Common IRQ 'x'.
-        * So an IDU ASSERT on IRQ 'x' will trigger Interupt on CPU 'x', which
-        * makes up for our simple IPI plumbing.
-        *
-        * TBD: Have a dedicated multicast IRQ for sending IPIs to all CPUs
-        *      w/o having to do one-at-a-time
-        ******************************************************************/
-
-       /*
-        * Claim an IRQ which would trigger IPI on this CPU.
-        * In IDU parlance it involves setting up a cpu bitmask for the IRQ
-        * The bitmap here contains only 1 CPU (self).
-        */
-       idu_irq_set_tgtcpu(cpu, 0x1 << cpu);
-
-       /* Set the IRQ destination to use the bitmask above */
-       idu_irq_set_mode(cpu, 7, /* XXX: IDU_IRQ_MOD_TCPU_ALLRECP: ISS bug */
-                        IDU_IRQ_MODE_PULSE_TRIG);
-
-       idu_enable();
-
-       /* Attach the arch-common IPI ISR to our IDU IRQ */
-       smp_ipi_irq_setup(cpu, IDU_INTERRUPT_0 + cpu);
-}
-
-static void iss_model_ipi_send(int cpu)
-{
-       idu_irq_assert(cpu);
-}
-
-static void iss_model_ipi_clear(int irq)
-{
-       idu_irq_clear(IDU_INTERRUPT_0 + smp_processor_id());
-}
-
-void iss_model_init_early_smp(void)
-{
-#define IS_AVAIL1(var, str)    ((var) ? str : "")
-
-       struct bcr_mp mp;
-
-       READ_BCR(ARC_REG_MP_BCR, mp);
-
-       sprintf(smp_cpuinfo_buf, "Extn [ISS-SMP]: v%d, arch(%d) %s %s %s\n",
-               mp.ver, mp.mp_arch, IS_AVAIL1(mp.scu, "SCU"),
-               IS_AVAIL1(mp.idu, "IDU"), IS_AVAIL1(mp.sdu, "SDU"));
-
-       plat_smp_ops.info = smp_cpuinfo_buf;
-
-       plat_smp_ops.cpu_kick = iss_model_smp_wakeup_cpu;
-       plat_smp_ops.ipi_send = iss_model_ipi_send;
-       plat_smp_ops.ipi_clear = iss_model_ipi_clear;
-}
-
-/*
- *-------------------------------------------------------------------
- * Low level Platform IPI Providers
- *-------------------------------------------------------------------
- */
-
-/* Set the Mode for the Common IRQ */
-void idu_irq_set_mode(uint8_t irq, uint8_t dest_mode, uint8_t trig_mode)
-{
-       uint32_t par = IDU_IRQ_MODE_PARAM(dest_mode, trig_mode);
-
-       IDU_SET_PARAM(par);
-       IDU_SET_COMMAND(irq, IDU_IRQ_WMODE);
-}
-
-/* Set the target cpu Bitmask for Common IRQ */
-void idu_irq_set_tgtcpu(uint8_t irq, uint32_t mask)
-{
-       IDU_SET_PARAM(mask);
-       IDU_SET_COMMAND(irq, IDU_IRQ_WBITMASK);
-}
-
-/* Get the Interrupt Acknowledged status for IRQ (as CPU Bitmask) */
-bool idu_irq_get_ack(uint8_t irq)
-{
-       uint32_t val;
-
-       IDU_SET_COMMAND(irq, IDU_IRQ_ACK);
-       val = IDU_GET_PARAM();
-
-       return val & (1 << irq);
-}
-
-/*
- * Get the Interrupt Pending status for IRQ (as CPU Bitmask)
- * -Pending means CPU has not yet noticed the IRQ (e.g. disabled)
- * -After Interrupt has been taken, the IPI expcitily needs to be
- *  cleared, to be acknowledged.
- */
-bool idu_irq_get_pend(uint8_t irq)
-{
-       uint32_t val;
-
-       IDU_SET_COMMAND(irq, IDU_IRQ_PEND);
-       val = IDU_GET_PARAM();
-
-       return val & (1 << irq);
-}
diff --git a/arch/arc/plat-axs10x/Kconfig b/arch/arc/plat-axs10x/Kconfig
new file mode 100644 (file)
index 0000000..d475f9d
--- /dev/null
@@ -0,0 +1,46 @@
+#
+# Copyright (C) 2013-15 Synopsys, Inc. (www.synopsys.com)
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+#
+
+menuconfig ARC_PLAT_AXS10X
+       bool "Synopsys ARC AXS10x Software Development Platforms"
+       select DW_APB_ICTL
+       select GPIO_DWAPB
+       select OF_GPIO
+       select GENERIC_IRQ_CHIP
+       select ARCH_REQUIRE_GPIOLIB
+       help
+         Support for the ARC AXS10x Software Development Platforms.
+
+         The AXS10x Platforms consist of a mainboard with peripherals,
+         on which several daughter cards can be placed. The daughter cards
+         typically contain a CPU and memory.
+
+if ARC_PLAT_AXS10X
+
+config AXS101
+       depends on ISA_ARCOMPACT
+       bool "AXS101 with AXC001 CPU Card (ARC 770D/EM6/AS221)"
+       help
+         This adds support for the 770D/EM6/AS221 CPU Card. Only the ARC
+         770D is supported in Linux.
+
+         The AXS101 Platform consists of an AXS10x mainboard with
+         this daughtercard. Please use the axs101.dts device tree
+         with this configuration.
+
+config AXS103
+       bool "AXS103 with AXC003 CPU Card (ARC HS38x)"
+       depends on ISA_ARCV2
+       help
+         This adds support for the HS38x CPU Card.
+
+         The AXS103 Platform consists of an AXS10x mainboard with
+         this daughtercard. Please use the axs103.dts device tree
+         with this configuration.
+
+endif
diff --git a/arch/arc/plat-axs10x/Makefile b/arch/arc/plat-axs10x/Makefile
new file mode 100644 (file)
index 0000000..d4748f2
--- /dev/null
@@ -0,0 +1,9 @@
+#
+# Copyright (C) 2013-15 Synopsys, Inc. (www.synopsys.com)
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+#
+
+obj-$(CONFIG_ARC_PLAT_AXS10X) += axs10x.o
diff --git a/arch/arc/plat-axs10x/axs10x.c b/arch/arc/plat-axs10x/axs10x.c
new file mode 100644 (file)
index 0000000..99f7da5
--- /dev/null
@@ -0,0 +1,484 @@
+/*
+ * AXS101/AXS103 Software Development Platform
+ *
+ * Copyright (C) 2013-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/of_platform.h>
+
+#include <asm/asm-offsets.h>
+#include <asm/clk.h>
+#include <asm/io.h>
+#include <asm/mach_desc.h>
+#include <asm/mcip.h>
+
+#define AXS_MB_CGU             0xE0010000
+#define AXS_MB_CREG            0xE0011000
+
+#define CREG_MB_IRQ_MUX                (AXS_MB_CREG + 0x214)
+#define CREG_MB_SW_RESET       (AXS_MB_CREG + 0x220)
+#define CREG_MB_VER            (AXS_MB_CREG + 0x230)
+#define CREG_MB_CONFIG         (AXS_MB_CREG + 0x234)
+
+#define AXC001_CREG            0xF0001000
+#define AXC001_GPIO_INTC       0xF0003000
+
+static void __init axs10x_enable_gpio_intc_wire(void)
+{
+       /*
+        * Peripherals on CPU Card and Mother Board are wired to cpu intc via
+        * intermediate DW APB GPIO blocks (mainly for debouncing)
+        *
+        *         ---------------------
+        *        |  snps,arc700-intc |
+        *        ---------------------
+        *          | #7          | #15
+        * -------------------   -------------------
+        * | snps,dw-apb-gpio |  | snps,dw-apb-gpio |
+        * -------------------   -------------------
+        *        |                         |
+        *        |                 [ Debug UART on cpu card ]
+        *        |
+        * ------------------------
+        * | snps,dw-apb-intc (MB)|
+        * ------------------------
+        *  |      |       |      |
+        * [eth] [uart]        [... other perip on Main Board]
+        *
+        * Current implementation of "irq-dw-apb-ictl" driver doesn't work well
+        * with stacked INTCs. In particular problem happens if its master INTC
+        * not yet instantiated. See discussion here -
+        * https://lkml.org/lkml/2015/3/4/755
+        *
+        * So setup the first gpio block as a passive pass thru and hide it from
+        * DT hardware topology - connect MB intc directly to cpu intc
+        * The GPIO "wire" needs to be init nevertheless (here)
+        *
+        * One side adv is that peripheral interrupt handling avoids one nested
+        * intc ISR hop
+        */
+#define GPIO_INTEN             (AXC001_GPIO_INTC + 0x30)
+#define GPIO_INTMASK           (AXC001_GPIO_INTC + 0x34)
+#define GPIO_INTTYPE_LEVEL     (AXC001_GPIO_INTC + 0x38)
+#define GPIO_INT_POLARITY      (AXC001_GPIO_INTC + 0x3c)
+#define MB_TO_GPIO_IRQ         12
+
+       iowrite32(~(1 << MB_TO_GPIO_IRQ), (void __iomem *) GPIO_INTMASK);
+       iowrite32(0, (void __iomem *) GPIO_INTTYPE_LEVEL);
+       iowrite32(~0, (void __iomem *) GPIO_INT_POLARITY);
+       iowrite32(1 << MB_TO_GPIO_IRQ, (void __iomem *) GPIO_INTEN);
+}
+
+static inline void __init
+write_cgu_reg(uint32_t value, void __iomem *reg, void __iomem *lock_reg)
+{
+       unsigned int loops = 128 * 1024, ctr;
+
+       iowrite32(value, reg);
+
+       ctr = loops;
+       while (((ioread32(lock_reg) & 1) == 1) && ctr--) /* wait for unlock */
+               cpu_relax();
+
+       ctr = loops;
+       while (((ioread32(lock_reg) & 1) == 0) && ctr--) /* wait for re-lock */
+               cpu_relax();
+}
+
+static void __init axs10x_print_board_ver(unsigned int creg, const char *str)
+{
+       union ver {
+               struct {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+                       unsigned int pad:11, y:12, m:4, d:5;
+#else
+                       unsigned int d:5, m:4, y:12, pad:11;
+#endif
+               };
+               unsigned int val;
+       } board;
+
+       board.val = ioread32((void __iomem *)creg);
+       pr_info("AXS: %s FPGA Date: %u-%u-%u\n", str, board.d, board.m,
+               board.y);
+}
+
+static void __init axs10x_early_init(void)
+{
+       int mb_rev;
+       char mb[32];
+
+       /* Determine motherboard version */
+       if (ioread32((void __iomem *) CREG_MB_CONFIG) & (1 << 28))
+               mb_rev = 3;     /* HT-3 (rev3.0) */
+       else
+               mb_rev = 2;     /* HT-2 (rev2.0) */
+
+       axs10x_enable_gpio_intc_wire();
+
+       scnprintf(mb, 32, "MainBoard v%d", mb_rev);
+       axs10x_print_board_ver(CREG_MB_VER, mb);
+}
+
+#ifdef CONFIG_AXS101
+
+#define CREG_CPU_ADDR_770      (AXC001_CREG + 0x20)
+#define CREG_CPU_ADDR_TUNN     (AXC001_CREG + 0x60)
+#define CREG_CPU_ADDR_770_UPD  (AXC001_CREG + 0x34)
+#define CREG_CPU_ADDR_TUNN_UPD (AXC001_CREG + 0x74)
+
+#define CREG_CPU_ARC770_IRQ_MUX        (AXC001_CREG + 0x114)
+#define CREG_CPU_GPIO_UART_MUX (AXC001_CREG + 0x120)
+
+/*
+ * Set up System Memory Map for ARC cpu / peripherals controllers
+ *
+ * Each AXI master has a 4GB memory map specified as 16 apertures of 256MB, each
+ * of which maps to a corresponding 256MB aperture in Target slave memory map.
+ *
+ * e.g. ARC cpu AXI Master's aperture 8 (0x8000_0000) is mapped to aperture 0
+ * (0x0000_0000) of DDR Port 0 (slave #1)
+ *
+ * Access from cpu to MB controllers such as GMAC is setup using AXI Tunnel:
+ * which has master/slaves on both ends.
+ * e.g. aperture 14 (0xE000_0000) of ARC cpu is mapped to aperture 14
+ * (0xE000_0000) of CPU Card AXI Tunnel slave (slave #3) which is mapped to
+ * MB AXI Tunnel Master, which also has a mem map setup
+ *
+ * In the reverse direction, MB AXI Masters (e.g. GMAC) mem map is setup
+ * to map to MB AXI Tunnel slave which connects to CPU Card AXI Tunnel Master
+ */
+struct aperture {
+       unsigned int slave_sel:4, slave_off:4, pad:24;
+};
+
+/* CPU Card target slaves */
+#define AXC001_SLV_NONE                        0
+#define AXC001_SLV_DDR_PORT0           1
+#define AXC001_SLV_SRAM                        2
+#define AXC001_SLV_AXI_TUNNEL          3
+#define AXC001_SLV_AXI2APB             6
+#define AXC001_SLV_DDR_PORT1           7
+
+/* MB AXI Target slaves */
+#define AXS_MB_SLV_NONE                        0
+#define AXS_MB_SLV_AXI_TUNNEL_CPU      1
+#define AXS_MB_SLV_AXI_TUNNEL_HAPS     2
+#define AXS_MB_SLV_SRAM                        3
+#define AXS_MB_SLV_CONTROL             4
+
+/* MB AXI masters */
+#define AXS_MB_MST_TUNNEL_CPU          0
+#define AXS_MB_MST_USB_OHCI            10
+
+/*
+ * memmap for ARC core on CPU Card
+ */
+static const struct aperture axc001_memmap[16] = {
+       {AXC001_SLV_AXI_TUNNEL,         0x0},
+       {AXC001_SLV_AXI_TUNNEL,         0x1},
+       {AXC001_SLV_SRAM,               0x0}, /* 0x2000_0000: Local SRAM */
+       {AXC001_SLV_NONE,               0x0},
+       {AXC001_SLV_NONE,               0x0},
+       {AXC001_SLV_NONE,               0x0},
+       {AXC001_SLV_NONE,               0x0},
+       {AXC001_SLV_NONE,               0x0},
+       {AXC001_SLV_DDR_PORT0,          0x0}, /* 0x8000_0000: DDR   0..256M */
+       {AXC001_SLV_DDR_PORT0,          0x1}, /* 0x9000_0000: DDR 256..512M */
+       {AXC001_SLV_DDR_PORT0,          0x2},
+       {AXC001_SLV_DDR_PORT0,          0x3},
+       {AXC001_SLV_NONE,               0x0},
+       {AXC001_SLV_AXI_TUNNEL,         0xD},
+       {AXC001_SLV_AXI_TUNNEL,         0xE}, /* MB: CREG, CGU... */
+       {AXC001_SLV_AXI2APB,            0x0}, /* CPU Card local CREG, CGU... */
+};
+
+/*
+ * memmap for CPU Card AXI Tunnel Master (for access by MB controllers)
+ * GMAC (MB) -> MB AXI Tunnel slave -> CPU Card AXI Tunnel Master -> DDR
+ */
+static const struct aperture axc001_axi_tunnel_memmap[16] = {
+       {AXC001_SLV_AXI_TUNNEL,         0x0},
+       {AXC001_SLV_AXI_TUNNEL,         0x1},
+       {AXC001_SLV_SRAM,               0x0},
+       {AXC001_SLV_NONE,               0x0},
+       {AXC001_SLV_NONE,               0x0},
+       {AXC001_SLV_NONE,               0x0},
+       {AXC001_SLV_NONE,               0x0},
+       {AXC001_SLV_NONE,               0x0},
+       {AXC001_SLV_DDR_PORT1,          0x0},
+       {AXC001_SLV_DDR_PORT1,          0x1},
+       {AXC001_SLV_DDR_PORT1,          0x2},
+       {AXC001_SLV_DDR_PORT1,          0x3},
+       {AXC001_SLV_NONE,               0x0},
+       {AXC001_SLV_AXI_TUNNEL,         0xD},
+       {AXC001_SLV_AXI_TUNNEL,         0xE},
+       {AXC001_SLV_AXI2APB,            0x0},
+};
+
+/*
+ * memmap for MB AXI Masters
+ * Same mem map for all perip controllers as well as MB AXI Tunnel Master
+ */
+static const struct aperture axs_mb_memmap[16] = {
+       {AXS_MB_SLV_SRAM,               0x0},
+       {AXS_MB_SLV_SRAM,               0x0},
+       {AXS_MB_SLV_NONE,               0x0},
+       {AXS_MB_SLV_NONE,               0x0},
+       {AXS_MB_SLV_NONE,               0x0},
+       {AXS_MB_SLV_NONE,               0x0},
+       {AXS_MB_SLV_NONE,               0x0},
+       {AXS_MB_SLV_NONE,               0x0},
+       {AXS_MB_SLV_AXI_TUNNEL_CPU,     0x8},   /* DDR on CPU Card */
+       {AXS_MB_SLV_AXI_TUNNEL_CPU,     0x9},   /* DDR on CPU Card */
+       {AXS_MB_SLV_AXI_TUNNEL_CPU,     0xA},
+       {AXS_MB_SLV_AXI_TUNNEL_CPU,     0xB},
+       {AXS_MB_SLV_NONE,               0x0},
+       {AXS_MB_SLV_AXI_TUNNEL_HAPS,    0xD},
+       {AXS_MB_SLV_CONTROL,            0x0},   /* MB Local CREG, CGU... */
+       {AXS_MB_SLV_AXI_TUNNEL_CPU,     0xF},
+};
+
+static noinline void __init
+axs101_set_memmap(void __iomem *base, const struct aperture map[16])
+{
+       unsigned int slave_select, slave_offset;
+       int i;
+
+       slave_select = slave_offset = 0;
+       for (i = 0; i < 8; i++) {
+               slave_select |= map[i].slave_sel << (i << 2);
+               slave_offset |= map[i].slave_off << (i << 2);
+       }
+
+       iowrite32(slave_select, base + 0x0);    /* SLV0 */
+       iowrite32(slave_offset, base + 0x8);    /* OFFSET0 */
+
+       slave_select = slave_offset = 0;
+       for (i = 0; i < 8; i++) {
+               slave_select |= map[i+8].slave_sel << (i << 2);
+               slave_offset |= map[i+8].slave_off << (i << 2);
+       }
+
+       iowrite32(slave_select, base + 0x4);    /* SLV1 */
+       iowrite32(slave_offset, base + 0xC);    /* OFFSET1 */
+}
+
+static void __init axs101_early_init(void)
+{
+       int i;
+
+       /* ARC 770D memory view */
+       axs101_set_memmap((void __iomem *) CREG_CPU_ADDR_770, axc001_memmap);
+       iowrite32(1, (void __iomem *) CREG_CPU_ADDR_770_UPD);
+
+       /* AXI tunnel memory map (incoming traffic from MB into CPU Card */
+       axs101_set_memmap((void __iomem *) CREG_CPU_ADDR_TUNN,
+                             axc001_axi_tunnel_memmap);
+       iowrite32(1, (void __iomem *) CREG_CPU_ADDR_TUNN_UPD);
+
+       /* MB peripherals memory map */
+       for (i = AXS_MB_MST_TUNNEL_CPU; i <= AXS_MB_MST_USB_OHCI; i++)
+               axs101_set_memmap((void __iomem *) AXS_MB_CREG + (i << 4),
+                                     axs_mb_memmap);
+
+       iowrite32(0x3ff, (void __iomem *) AXS_MB_CREG + 0x100); /* Update */
+
+       /* GPIO pins 18 and 19 are used as UART rx and tx, respectively. */
+       iowrite32(0x01, (void __iomem *) CREG_CPU_GPIO_UART_MUX);
+
+       /* Set up the MB interrupt system: mux interrupts to GPIO7) */
+       iowrite32(0x01, (void __iomem *) CREG_MB_IRQ_MUX);
+
+       /* reset ethernet and ULPI interfaces */
+       iowrite32(0x18, (void __iomem *) CREG_MB_SW_RESET);
+
+       /* map GPIO 14:10 to ARC 9:5 (IRQ mux change for MB v2 onwards) */
+       iowrite32(0x52, (void __iomem *) CREG_CPU_ARC770_IRQ_MUX);
+
+       axs10x_early_init();
+}
+
+#endif /* CONFIG_AXS101 */
+
+#ifdef CONFIG_AXS103
+
+#define AXC003_CGU     0xF0000000
+#define AXC003_CREG    0xF0001000
+#define AXC003_MST_AXI_TUNNEL  0
+#define AXC003_MST_HS38                1
+
+#define CREG_CPU_AXI_M0_IRQ_MUX        (AXC003_CREG + 0x440)
+#define CREG_CPU_GPIO_UART_MUX (AXC003_CREG + 0x480)
+#define CREG_CPU_TUN_IO_CTRL   (AXC003_CREG + 0x494)
+
+
+union pll_reg {
+       struct {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+               unsigned int pad:17, noupd:1, bypass:1, edge:1, high:6, low:6;
+#else
+               unsigned int low:6, high:6, edge:1, bypass:1, noupd:1, pad:17;
+#endif
+       };
+       unsigned int val;
+};
+
+static unsigned int __init axs103_get_freq(void)
+{
+       union pll_reg idiv, fbdiv, odiv;
+       unsigned int f = 33333333;
+
+       idiv.val = ioread32((void __iomem *)AXC003_CGU + 0x80 + 0);
+       fbdiv.val = ioread32((void __iomem *)AXC003_CGU + 0x80 + 4);
+       odiv.val = ioread32((void __iomem *)AXC003_CGU + 0x80 + 8);
+
+       if (idiv.bypass != 1)
+               f = f / (idiv.low + idiv.high);
+
+       if (fbdiv.bypass != 1)
+               f = f * (fbdiv.low + fbdiv.high);
+
+       if (odiv.bypass != 1)
+               f = f / (odiv.low + odiv.high);
+
+       f = (f + 500000) / 1000000; /* Rounding */
+       return f;
+}
+
+static inline unsigned int __init encode_div(unsigned int id, int upd)
+{
+       union pll_reg div;
+
+       div.val = 0;
+
+       div.noupd = !upd;
+       div.bypass = id == 1 ? 1 : 0;
+       div.edge = (id%2 == 0) ? 0 : 1;  /* 0 = rising */
+       div.low = (id%2 == 0) ? id >> 1 : (id >> 1)+1;
+       div.high = id >> 1;
+
+       return div.val;
+}
+
+noinline static void __init
+axs103_set_freq(unsigned int id, unsigned int fd, unsigned int od)
+{
+       write_cgu_reg(encode_div(id, 0),
+                     (void __iomem *)AXC003_CGU + 0x80 + 0,
+                     (void __iomem *)AXC003_CGU + 0x110);
+
+       write_cgu_reg(encode_div(fd, 0),
+                     (void __iomem *)AXC003_CGU + 0x80 + 4,
+                     (void __iomem *)AXC003_CGU + 0x110);
+
+       write_cgu_reg(encode_div(od, 1),
+                     (void __iomem *)AXC003_CGU + 0x80 + 8,
+                     (void __iomem *)AXC003_CGU + 0x110);
+}
+
+static void __init axs103_early_init(void)
+{
+       switch (arc_get_core_freq()/1000000) {
+       case 33:
+               axs103_set_freq(1, 1, 1);
+               break;
+       case 50:
+               axs103_set_freq(1, 30, 20);
+               break;
+       case 75:
+               axs103_set_freq(2, 45, 10);
+               break;
+       case 90:
+               axs103_set_freq(2, 54, 10);
+               break;
+       case 100:
+               axs103_set_freq(1, 30, 10);
+               break;
+       case 125:
+               axs103_set_freq(2, 45,  6);
+               break;
+       default:
+               /*
+                * In this case, core_frequency derived from
+                * DT "clock-frequency" might not match with board value.
+                * Hence update it to match the board value.
+                */
+               arc_set_core_freq(axs103_get_freq() * 1000000);
+               break;
+       }
+
+       pr_info("Freq is %dMHz\n", axs103_get_freq());
+
+       /* Memory maps already config in pre-bootloader */
+
+       /* set GPIO mux to UART */
+       iowrite32(0x01, (void __iomem *) CREG_CPU_GPIO_UART_MUX);
+
+       iowrite32((0x00100000U | 0x000C0000U | 0x00003322U),
+                 (void __iomem *) CREG_CPU_TUN_IO_CTRL);
+
+       /* Set up the AXS_MB interrupt system.*/
+       iowrite32(12, (void __iomem *) (CREG_CPU_AXI_M0_IRQ_MUX
+                                        + (AXC003_MST_HS38 << 2)));
+
+       /* connect ICTL - Main Board with GPIO line */
+       iowrite32(0x01, (void __iomem *) CREG_MB_IRQ_MUX);
+
+       axs10x_print_board_ver(AXC003_CREG + 4088, "AXC003 CPU Card");
+
+       axs10x_early_init();
+
+#ifdef CONFIG_ARC_MCIP
+       /* No Hardware init, but filling the smp ops callbacks */
+       mcip_init_early_smp();
+#endif
+}
+#endif
+
+#ifdef CONFIG_AXS101
+
+static const char *axs101_compat[] __initconst = {
+       "snps,axs101",
+       NULL,
+};
+
+MACHINE_START(AXS101, "axs101")
+       .dt_compat      = axs101_compat,
+       .init_early     = axs101_early_init,
+MACHINE_END
+
+#endif /* CONFIG_AXS101 */
+
+#ifdef CONFIG_AXS103
+
+static const char *axs103_compat[] __initconst = {
+       "snps,axs103",
+       NULL,
+};
+
+MACHINE_START(AXS103, "axs103")
+       .dt_compat      = axs103_compat,
+       .init_early     = axs103_early_init,
+#ifdef CONFIG_ARC_MCIP
+       .init_smp       = mcip_init_smp,
+#endif
+MACHINE_END
+
+/*
+ * For the VDK OS-kit, to get the offset to pid and command fields
+ */
+char coware_swa_pid_offset[TASK_PID];
+char coware_swa_comm_offset[TASK_COMM];
+
+#endif /* CONFIG_AXS103 */
diff --git a/arch/arc/plat-sim/Kconfig b/arch/arc/plat-sim/Kconfig
new file mode 100644 (file)
index 0000000..18e39fc
--- /dev/null
@@ -0,0 +1,14 @@
+#
+# Copyright (C) 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+#
+
+menuconfig ARC_PLAT_SIM
+       bool "ARC nSIM based simulation virtual platforms"
+       select ARC_HAS_COH_CACHES if SMP
+       help
+         Support for nSIM based ARC simulation platforms
+         This includes the standalone nSIM (uart only) vs. System C OSCI VP
diff --git a/arch/arc/plat-sim/Makefile b/arch/arc/plat-sim/Makefile
new file mode 100644 (file)
index 0000000..00b1a95
--- /dev/null
@@ -0,0 +1,9 @@
+#
+# Copyright (C) 2011-2012 Synopsys, Inc. (www.synopsys.com)
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+#
+
+obj-y := platform.o
diff --git a/arch/arc/plat-sim/platform.c b/arch/arc/plat-sim/platform.c
new file mode 100644 (file)
index 0000000..d9e35b4
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * ARC simulation Platform support code
+ *
+ * Copyright (C) 2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <asm/mach_desc.h>
+#include <asm/mcip.h>
+
+/*----------------------- Machine Descriptions ------------------------------
+ *
+ * Machine description is simply a set of platform/board specific callbacks
+ * This is not directly related to DeviceTree based dynamic device creation,
+ * however as part of early device tree scan, we also select the right
+ * callback set, by matching the DT compatible name.
+ */
+
+static const char *simulation_compat[] __initconst = {
+       "snps,nsim",
+       "snps,nsim_hs",
+       "snps,nsimosci",
+       "snps,nsimosci_hs",
+       NULL,
+};
+
+MACHINE_START(SIMULATION, "simulation")
+       .dt_compat      = simulation_compat,
+#ifdef CONFIG_ARC_MCIP
+       .init_early     = mcip_init_early_smp,
+       .init_smp       = mcip_init_smp,
+#endif
+MACHINE_END
index 7f0252c580e4bd0b32a2e2eb5de440bacbfd3b98..a718866ba52d8e827653c7cbcb3341778f480b83 100644 (file)
                        };
 
                        eth0: ethernet@70000 {
-                               compatible = "marvell,armada-370-neta";
                                reg = <0x70000 0x4000>;
                                interrupts = <8>;
                                clocks = <&gateclk 4>;
                        };
 
                        eth1: ethernet@74000 {
-                               compatible = "marvell,armada-370-neta";
                                reg = <0x74000 0x4000>;
                                interrupts = <10>;
                                clocks = <&gateclk 3>;
index 3f036bd635f4207ac2ffe87e809155a51debaa64..53a1a5abe14739d5c71a64b9689147fa7f0c37cb 100644 (file)
                                        dmacap,memset;
                                };
                        };
+
+                       ethernet@70000 {
+                               compatible = "marvell,armada-370-neta";
+                       };
+
+                       ethernet@74000 {
+                               compatible = "marvell,armada-370-neta";
+                       };
                };
        };
 };
index 8479fdc9e9c2468e072c3592528a263610c2acc1..c5fdc99f0dbebb47f88e135a4013fdfb9d732772 100644 (file)
                        };
 
                        eth3: ethernet@34000 {
-                               compatible = "marvell,armada-370-neta";
+                               compatible = "marvell,armada-xp-neta";
                                reg = <0x34000 0x4000>;
                                interrupts = <14>;
                                clocks = <&gateclk 1>;
index 661d54c815802d1bb1d2e1fa31cb255d90caf12e..0e24f1a38540e30ccc257972537c8982074f4c75 100644 (file)
                        };
 
                        eth3: ethernet@34000 {
-                               compatible = "marvell,armada-370-neta";
+                               compatible = "marvell,armada-xp-neta";
                                reg = <0x34000 0x4000>;
                                interrupts = <14>;
                                clocks = <&gateclk 1>;
index e78ce4ab6b75b03d4fd0785388b1d4d47c4ef1cd..0854d4493da7a8518c241dabe06e91ce22eb5fe9 100644 (file)
                        };
 
                        eth2: ethernet@30000 {
-                               compatible = "marvell,armada-370-neta";
+                               compatible = "marvell,armada-xp-neta";
                                reg = <0x30000 0x4000>;
                                interrupts = <12>;
                                clocks = <&gateclk 2>;
                                };
                        };
 
+                       ethernet@70000 {
+                               compatible = "marvell,armada-xp-neta";
+                       };
+
+                       ethernet@74000 {
+                               compatible = "marvell,armada-xp-neta";
+                       };
+
                        xor@f0900 {
                                compatible = "marvell,orion-xor";
                                reg = <0xF0900 0x100
index 1317ee40f4dfd6051daa9efaa77e52dfcee37fe5..04ff8e7b37dfd914e8319b0727aa3e8682efa0dc 100644 (file)
@@ -1,6 +1,8 @@
 #ifndef _ASM_ARM_XEN_HYPERVISOR_H
 #define _ASM_ARM_XEN_HYPERVISOR_H
 
+#include <linux/init.h>
+
 extern struct shared_info *HYPERVISOR_shared_info;
 extern struct start_info *xen_start_info;
 
@@ -18,4 +20,10 @@ static inline enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
 
 extern struct dma_map_ops *xen_dma_ops;
 
+#ifdef CONFIG_XEN
+void __init xen_early_init(void);
+#else
+static inline void xen_early_init(void) { return; }
+#endif
+
 #endif /* _ASM_ARM_XEN_HYPERVISOR_H */
index 0b579b2f4e0e6490bb396c301cd2adf433e3b1ad..1bee8ca124945cdde1226e3cbff5503b9132c526 100644 (file)
@@ -12,7 +12,6 @@
 #include <xen/interface/grant_table.h>
 
 #define phys_to_machine_mapping_valid(pfn) (1)
-#define mfn_to_virt(m)                 (__va(mfn_to_pfn(m) << PAGE_SHIFT))
 
 #define pte_mfn            pte_pfn
 #define mfn_pte            pfn_pte
index e6d8c7658ffda7d4e5a03d600d9d1ed20e6ac57f..36c18b73c1f4631f3882ba492dbeb1f99e9b7242 100644 (file)
@@ -46,6 +46,7 @@
 #include <asm/cacheflush.h>
 #include <asm/cachetype.h>
 #include <asm/tlbflush.h>
+#include <asm/xen/hypervisor.h>
 
 #include <asm/prom.h>
 #include <asm/mach/arch.h>
@@ -972,6 +973,7 @@ void __init setup_arch(char **cmdline_p)
 
        arm_dt_init_cpu_maps();
        psci_init();
+       xen_early_init();
 #ifdef CONFIG_SMP
        if (is_smp()) {
                if (!mdesc->smp_init || !mdesc->smp_init()) {
index 9ecb8f9c4ef507788e6187fef5f3371815d3c982..d4f7dc87042b28c565ff47721492d4876f655447 100644 (file)
@@ -283,25 +283,25 @@ static int lpc32xx_set_irq_type(struct irq_data *d, unsigned int type)
        case IRQ_TYPE_EDGE_RISING:
                /* Rising edge sensitive */
                __lpc32xx_set_irq_type(d->hwirq, 1, 1);
-               __irq_set_handler_locked(d->hwirq, handle_edge_irq);
+               __irq_set_handler_locked(d->irq, handle_edge_irq);
                break;
 
        case IRQ_TYPE_EDGE_FALLING:
                /* Falling edge sensitive */
                __lpc32xx_set_irq_type(d->hwirq, 0, 1);
-               __irq_set_handler_locked(d->hwirq, handle_edge_irq);
+               __irq_set_handler_locked(d->irq, handle_edge_irq);
                break;
 
        case IRQ_TYPE_LEVEL_LOW:
                /* Low level sensitive */
                __lpc32xx_set_irq_type(d->hwirq, 0, 0);
-               __irq_set_handler_locked(d->hwirq, handle_level_irq);
+               __irq_set_handler_locked(d->irq, handle_level_irq);
                break;
 
        case IRQ_TYPE_LEVEL_HIGH:
                /* High level sensitive */
                __lpc32xx_set_irq_type(d->hwirq, 1, 0);
-               __irq_set_handler_locked(d->hwirq, handle_level_irq);
+               __irq_set_handler_locked(d->irq, handle_level_irq);
                break;
 
        /* Other modes are not supported */
index 7d0f07020c809598c8a5ea292093d3676c083548..6c09cc440a2b24c4c0acfbd3027c74f9a1dc9efe 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/cpuidle.h>
 #include <linux/cpufreq.h>
 #include <linux/cpu.h>
+#include <linux/console.h>
 
 #include <linux/mm.h>
 
@@ -51,7 +52,9 @@ EXPORT_SYMBOL_GPL(xen_have_vector_callback);
 int xen_platform_pci_unplug = XEN_UNPLUG_ALL;
 EXPORT_SYMBOL_GPL(xen_platform_pci_unplug);
 
-static __read_mostly int xen_events_irq = -1;
+static __read_mostly unsigned int xen_events_irq;
+
+static __initdata struct device_node *xen_node;
 
 int xen_remap_domain_mfn_array(struct vm_area_struct *vma,
                               unsigned long addr,
@@ -150,40 +153,28 @@ static irqreturn_t xen_arm_callback(int irq, void *arg)
  * documentation of the Xen Device Tree format.
  */
 #define GRANT_TABLE_PHYSADDR 0
-static int __init xen_guest_init(void)
+void __init xen_early_init(void)
 {
-       struct xen_add_to_physmap xatp;
-       static struct shared_info *shared_info_page = 0;
-       struct device_node *node;
        int len;
        const char *s = NULL;
        const char *version = NULL;
        const char *xen_prefix = "xen,xen-";
-       struct resource res;
-       phys_addr_t grant_frames;
 
-       node = of_find_compatible_node(NULL, NULL, "xen,xen");
-       if (!node) {
+       xen_node = of_find_compatible_node(NULL, NULL, "xen,xen");
+       if (!xen_node) {
                pr_debug("No Xen support\n");
-               return 0;
+               return;
        }
-       s = of_get_property(node, "compatible", &len);
+       s = of_get_property(xen_node, "compatible", &len);
        if (strlen(xen_prefix) + 3  < len &&
                        !strncmp(xen_prefix, s, strlen(xen_prefix)))
                version = s + strlen(xen_prefix);
        if (version == NULL) {
                pr_debug("Xen version not found\n");
-               return 0;
+               return;
        }
-       if (of_address_to_resource(node, GRANT_TABLE_PHYSADDR, &res))
-               return 0;
-       grant_frames = res.start;
-       xen_events_irq = irq_of_parse_and_map(node, 0);
-       pr_info("Xen %s support found, events_irq=%d gnttab_frame=%pa\n",
-                       version, xen_events_irq, &grant_frames);
 
-       if (xen_events_irq < 0)
-               return -ENODEV;
+       pr_info("Xen %s support found\n", version);
 
        xen_domain_type = XEN_HVM_DOMAIN;
 
@@ -194,9 +185,34 @@ static int __init xen_guest_init(void)
        else
                xen_start_info->flags &= ~(SIF_INITDOMAIN|SIF_PRIVILEGED);
 
-       if (!shared_info_page)
-               shared_info_page = (struct shared_info *)
-                       get_zeroed_page(GFP_KERNEL);
+       if (!console_set_on_cmdline && !xen_initial_domain())
+               add_preferred_console("hvc", 0, NULL);
+}
+
+static int __init xen_guest_init(void)
+{
+       struct xen_add_to_physmap xatp;
+       struct shared_info *shared_info_page = NULL;
+       struct resource res;
+       phys_addr_t grant_frames;
+
+       if (!xen_domain())
+               return 0;
+
+       if (of_address_to_resource(xen_node, GRANT_TABLE_PHYSADDR, &res)) {
+               pr_err("Xen grant table base address not found\n");
+               return -ENODEV;
+       }
+       grant_frames = res.start;
+
+       xen_events_irq = irq_of_parse_and_map(xen_node, 0);
+       if (!xen_events_irq) {
+               pr_err("Xen event channel interrupt not found\n");
+               return -ENODEV;
+       }
+
+       shared_info_page = (struct shared_info *)get_zeroed_page(GFP_KERNEL);
+
        if (!shared_info_page) {
                pr_err("not enough memory\n");
                return -ENOMEM;
index 498325074a06fa911a5cfa9f37137c7936f8b9c4..03e75fef15b8254483929f1332de033a1eeeac05 100644 (file)
 #include <xen/xen.h>
 #include <xen/interface/grant_table.h>
 #include <xen/interface/memory.h>
+#include <xen/page.h>
 #include <xen/swiotlb-xen.h>
 
 #include <asm/cacheflush.h>
-#include <asm/xen/page.h>
 #include <asm/xen/hypercall.h>
 #include <asm/xen/interface.h>
 
index cb7a14c5cd69e6814df607cea20afe9342e0e8d2..887596c67b129a024ffd2fd0ce2db26d4762f89e 100644 (file)
 
 #include <xen/xen.h>
 #include <xen/interface/memory.h>
+#include <xen/page.h>
 #include <xen/swiotlb-xen.h>
 
 #include <asm/cacheflush.h>
-#include <asm/xen/page.h>
 #include <asm/xen/hypercall.h>
 #include <asm/xen/interface.h>
 
index ffd3970721bf6497db2f622bcd3ed31fd3007de1..f3067d4d4e35711680376372395667efdc45ad07 100644 (file)
@@ -64,6 +64,7 @@
 #include <asm/psci.h>
 #include <asm/efi.h>
 #include <asm/virt.h>
+#include <asm/xen/hypervisor.h>
 
 unsigned long elf_hwcap __read_mostly;
 EXPORT_SYMBOL_GPL(elf_hwcap);
@@ -401,6 +402,7 @@ void __init setup_arch(char **cmdline_p)
        } else {
                psci_acpi_init();
        }
+       xen_early_init();
 
        cpu_read_bootcpu_ops();
 #ifdef CONFIG_SMP
index cfb298d6630582b6924824a6b7bd4aed4663d526..2d48b6a461664e5fd03d775cc4ba922dd56dacf7 100644 (file)
@@ -231,8 +231,7 @@ static int __init eic_probe(struct platform_device *pdev)
                irq_set_chip_data(eic->first_irq + i, eic);
        }
 
-       irq_set_chained_handler(int_irq, demux_eic_irq);
-       irq_set_handler_data(int_irq, eic);
+       irq_set_chained_handler_and_data(int_irq, demux_eic_irq, eic);
 
        if (pdev->id == 0) {
                nmi_eic = eic;
index 835fa04511c85ad0d6e71ba8ee1f3a2dc26fcc1b..272dde481d170e815d740d0880ae613c3567cca8 100644 (file)
@@ -148,14 +148,10 @@ static void psc_irq(unsigned int irq, struct irq_desc *desc)
 
 void __init psc_register_interrupts(void)
 {
-       irq_set_chained_handler(IRQ_AUTO_3, psc_irq);
-       irq_set_handler_data(IRQ_AUTO_3, (void *)0x30);
-       irq_set_chained_handler(IRQ_AUTO_4, psc_irq);
-       irq_set_handler_data(IRQ_AUTO_4, (void *)0x40);
-       irq_set_chained_handler(IRQ_AUTO_5, psc_irq);
-       irq_set_handler_data(IRQ_AUTO_5, (void *)0x50);
-       irq_set_chained_handler(IRQ_AUTO_6, psc_irq);
-       irq_set_handler_data(IRQ_AUTO_6, (void *)0x60);
+       irq_set_chained_handler_and_data(IRQ_AUTO_3, psc_irq, (void *)0x30);
+       irq_set_chained_handler_and_data(IRQ_AUTO_4, psc_irq, (void *)0x40);
+       irq_set_chained_handler_and_data(IRQ_AUTO_5, psc_irq, (void *)0x50);
+       irq_set_chained_handler_and_data(IRQ_AUTO_6, psc_irq, (void *)0x60);
 }
 
 void psc_irq_enable(int irq) {
index 8742e1cee4928f9d522ec329bb60f113ad67a89d..ec9a371f1e62c49b823bdac3b86d101804796da3 100644 (file)
@@ -161,8 +161,8 @@ void __init ar2315_arch_init_irq(void)
        irq = irq_create_mapping(domain, AR2315_MISC_IRQ_AHB);
        setup_irq(irq, &ar2315_ahb_err_interrupt);
 
-       irq_set_chained_handler(AR2315_IRQ_MISC, ar2315_misc_irq_handler);
-       irq_set_handler_data(AR2315_IRQ_MISC, domain);
+       irq_set_chained_handler_and_data(AR2315_IRQ_MISC,
+                                        ar2315_misc_irq_handler, domain);
 
        ar2315_misc_irq_domain = domain;
 }
index 094b938fd603d0da938d4e24aae8533f1413159b..e63e38fa488033499cdf005df028d15e9b616139 100644 (file)
@@ -156,8 +156,8 @@ void __init ar5312_arch_init_irq(void)
        irq = irq_create_mapping(domain, AR5312_MISC_IRQ_AHB_PROC);
        setup_irq(irq, &ar5312_ahb_err_interrupt);
 
-       irq_set_chained_handler(AR5312_IRQ_MISC, ar5312_misc_irq_handler);
-       irq_set_handler_data(AR5312_IRQ_MISC, domain);
+       irq_set_chained_handler_and_data(AR5312_IRQ_MISC,
+                                        ar5312_misc_irq_handler, domain);
 
        ar5312_misc_irq_domain = domain;
 }
index dadb30306a0a10457782801eaec567544816c7d7..f8d0acb4f973635ff323585ce4e5a2166c081a3a 100644 (file)
@@ -384,8 +384,8 @@ static void ar2315_pci_irq_init(struct ar2315_pci_ctrl *apc)
 
        apc->irq_ext = irq_create_mapping(apc->domain, AR2315_PCI_IRQ_EXT);
 
-       irq_set_chained_handler(apc->irq, ar2315_pci_irq_handler);
-       irq_set_handler_data(apc->irq, apc);
+       irq_set_chained_handler_and_data(apc->irq, ar2315_pci_irq_handler,
+                                        apc);
 
        /* Clear any pending Abort or external Interrupts
         * and enable interrupt processing */
index da301e0a2f1f42825898616f1348e3e429228f93..53707aacc0f86cb13134546de07ccaf71d76321d 100644 (file)
@@ -184,8 +184,7 @@ static int __init intc_of_init(struct device_node *node,
 
        rt_intc_w32(INTC_INT_GLOBAL, INTC_REG_ENABLE);
 
-       irq_set_chained_handler(irq, ralink_intc_irq_handler);
-       irq_set_handler_data(irq, domain);
+       irq_set_chained_handler_and_data(irq, ralink_intc_irq_handler, domain);
 
        /* tell the kernel which irq is used for performance monitoring */
        rt_perfcount_irq = irq_create_mapping(domain, 9);
index 6ab3b73efcf8d4304d0b832d4d398f94a0f332d5..480de70f405980c1828f84b6a0eeba5c75203b73 100644 (file)
@@ -320,11 +320,11 @@ void migrate_irqs(void)
                if (irqd_is_per_cpu(data))
                        continue;
 
-               if (cpumask_test_cpu(self, &data->affinity) &&
+               if (cpumask_test_cpu(self, data->affinity) &&
                    !cpumask_intersects(&irq_affinity[irq], cpu_online_mask)) {
                        int cpu_id;
                        cpu_id = cpumask_first(cpu_online_mask);
-                       cpumask_set_cpu(cpu_id, &data->affinity);
+                       cpumask_set_cpu(cpu_id, data->affinity);
                }
                /* We need to operate irq_affinity_online atomically. */
                arch_local_cli_save(flags);
@@ -335,7 +335,7 @@ void migrate_irqs(void)
                        GxICR(irq) = x & GxICR_LEVEL;
                        tmp = GxICR(irq);
 
-                       new = cpumask_any_and(&data->affinity,
+                       new = cpumask_any_and(data->affinity,
                                              cpu_online_mask);
                        irq_affinity_online[irq] = new;
 
index 64707750c78071e1d0168de0f26e357d368d9507..940cbddd9237c833839d244ba59d4f69d608f46b 100644 (file)
@@ -17,13 +17,15 @@ CONFIG_CGROUP_FREEZER=y
 CONFIG_CGROUP_DEVICE=y
 CONFIG_CPUSETS=y
 CONFIG_CGROUP_CPUACCT=y
-CONFIG_RESOURCE_COUNTERS=y
 CONFIG_CGROUP_PERF=y
 CONFIG_CFS_BANDWIDTH=y
 CONFIG_RT_GROUP_SCHED=y
 CONFIG_BLK_CGROUP=y
+CONFIG_NAMESPACES=y
 CONFIG_SCHED_AUTOGROUP=y
 CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_BPF_SYSCALL=y
 # CONFIG_COMPAT_BRK is not set
 CONFIG_PROFILING=y
 CONFIG_OPROFILE=m
@@ -44,6 +46,7 @@ CONFIG_SOLARIS_X86_PARTITION=y
 CONFIG_UNIXWARE_DISKLABEL=y
 CONFIG_CFQ_GROUP_IOSCHED=y
 CONFIG_DEFAULT_DEADLINE=y
+CONFIG_LIVEPATCH=y
 CONFIG_MARCH_Z196=y
 CONFIG_TUNE_ZEC12=y
 CONFIG_NR_CPUS=256
@@ -242,9 +245,9 @@ CONFIG_NF_CONNTRACK_IPV4=m
 # CONFIG_NF_CONNTRACK_PROC_COMPAT is not set
 CONFIG_NF_TABLES_IPV4=m
 CONFIG_NFT_CHAIN_ROUTE_IPV4=m
-CONFIG_NFT_CHAIN_NAT_IPV4=m
 CONFIG_NF_TABLES_ARP=m
 CONFIG_NF_NAT_IPV4=m
+CONFIG_NFT_CHAIN_NAT_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
 CONFIG_IP_NF_MATCH_ECN=m
@@ -264,8 +267,8 @@ CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NF_CONNTRACK_IPV6=m
 CONFIG_NF_TABLES_IPV6=m
 CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_NF_NAT_IPV6=m
+CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -353,7 +356,6 @@ CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_OSD=m
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_BLK_DEV_XIP=y
 CONFIG_CDROM_PKTCDVD=m
 CONFIG_ATA_OVER_ETH=m
 CONFIG_VIRTIO_BLK=y
@@ -458,7 +460,6 @@ CONFIG_EXT2_FS=y
 CONFIG_EXT2_FS_XATTR=y
 CONFIG_EXT2_FS_POSIX_ACL=y
 CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT2_FS_XIP=y
 CONFIG_EXT3_FS=y
 # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_POSIX_ACL=y
@@ -544,7 +545,6 @@ CONFIG_FRAME_WARN=1024
 CONFIG_READABLE_ASM=y
 CONFIG_UNUSED_SYMBOLS=y
 CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_KERNEL=y
 CONFIG_DEBUG_PAGEALLOC=y
 CONFIG_DEBUG_OBJECTS=y
 CONFIG_DEBUG_OBJECTS_SELFTEST=y
@@ -558,6 +558,7 @@ CONFIG_SLUB_STATS=y
 CONFIG_DEBUG_STACK_USAGE=y
 CONFIG_DEBUG_VM=y
 CONFIG_DEBUG_VM_RB=y
+CONFIG_DEBUG_MEMORY_INIT=y
 CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m
 CONFIG_DEBUG_PER_CPU_MAPS=y
 CONFIG_DEBUG_SHIRQ=y
@@ -575,7 +576,6 @@ CONFIG_DEBUG_LIST=y
 CONFIG_DEBUG_SG=y
 CONFIG_DEBUG_NOTIFIERS=y
 CONFIG_DEBUG_CREDENTIALS=y
-CONFIG_PROVE_RCU=y
 CONFIG_RCU_TORTURE_TEST=m
 CONFIG_RCU_CPU_STALL_TIMEOUT=300
 CONFIG_NOTIFIER_ERROR_INJECTION=m
@@ -611,7 +611,6 @@ CONFIG_TEST_BPF=m
 # CONFIG_STRICT_DEVMEM is not set
 CONFIG_S390_PTDUMP=y
 CONFIG_ENCRYPTED_KEYS=m
-CONFIG_KEYS_DEBUG_PROC_KEYS=y
 CONFIG_SECURITY=y
 CONFIG_SECURITY_NETWORK=y
 CONFIG_SECURITY_SELINUX=y
index 5c3097272cd8c8eec82d252a9210dcf15cf3c36a..d793fec91797ba8d3b27cea97a14f85118dced51 100644 (file)
@@ -17,11 +17,13 @@ CONFIG_CGROUP_FREEZER=y
 CONFIG_CGROUP_DEVICE=y
 CONFIG_CPUSETS=y
 CONFIG_CGROUP_CPUACCT=y
-CONFIG_RESOURCE_COUNTERS=y
 CONFIG_CGROUP_PERF=y
 CONFIG_BLK_CGROUP=y
+CONFIG_NAMESPACES=y
 CONFIG_SCHED_AUTOGROUP=y
 CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_BPF_SYSCALL=y
 # CONFIG_COMPAT_BRK is not set
 CONFIG_PROFILING=y
 CONFIG_OPROFILE=m
@@ -240,9 +242,9 @@ CONFIG_NF_CONNTRACK_IPV4=m
 # CONFIG_NF_CONNTRACK_PROC_COMPAT is not set
 CONFIG_NF_TABLES_IPV4=m
 CONFIG_NFT_CHAIN_ROUTE_IPV4=m
-CONFIG_NFT_CHAIN_NAT_IPV4=m
 CONFIG_NF_TABLES_ARP=m
 CONFIG_NF_NAT_IPV4=m
+CONFIG_NFT_CHAIN_NAT_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
 CONFIG_IP_NF_MATCH_ECN=m
@@ -262,8 +264,8 @@ CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NF_CONNTRACK_IPV6=m
 CONFIG_NF_TABLES_IPV6=m
 CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_NF_NAT_IPV6=m
+CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -350,7 +352,6 @@ CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_OSD=m
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_BLK_DEV_XIP=y
 CONFIG_CDROM_PKTCDVD=m
 CONFIG_ATA_OVER_ETH=m
 CONFIG_VIRTIO_BLK=y
@@ -455,7 +456,6 @@ CONFIG_EXT2_FS=y
 CONFIG_EXT2_FS_XATTR=y
 CONFIG_EXT2_FS_POSIX_ACL=y
 CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT2_FS_XIP=y
 CONFIG_EXT3_FS=y
 # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_POSIX_ACL=y
@@ -538,7 +538,7 @@ CONFIG_DEBUG_INFO=y
 CONFIG_FRAME_WARN=1024
 CONFIG_UNUSED_SYMBOLS=y
 CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_MEMORY_INIT=y
 CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m
 CONFIG_PANIC_ON_OOPS=y
 CONFIG_TIMER_STATS=y
@@ -558,7 +558,6 @@ CONFIG_ATOMIC64_SELFTEST=y
 # CONFIG_STRICT_DEVMEM is not set
 CONFIG_S390_PTDUMP=y
 CONFIG_ENCRYPTED_KEYS=m
-CONFIG_KEYS_DEBUG_PROC_KEYS=y
 CONFIG_SECURITY=y
 CONFIG_SECURITY_NETWORK=y
 CONFIG_SECURITY_SELINUX=y
index bda70f1ffd2c59346839d3b293c07eac0f021853..38a77e9c8aa685b09f9aee9b433a85faafd8b431 100644 (file)
@@ -17,11 +17,13 @@ CONFIG_CGROUP_FREEZER=y
 CONFIG_CGROUP_DEVICE=y
 CONFIG_CPUSETS=y
 CONFIG_CGROUP_CPUACCT=y
-CONFIG_RESOURCE_COUNTERS=y
 CONFIG_CGROUP_PERF=y
 CONFIG_BLK_CGROUP=y
+CONFIG_NAMESPACES=y
 CONFIG_SCHED_AUTOGROUP=y
 CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_BPF_SYSCALL=y
 # CONFIG_COMPAT_BRK is not set
 CONFIG_PROFILING=y
 CONFIG_OPROFILE=m
@@ -42,9 +44,10 @@ CONFIG_SOLARIS_X86_PARTITION=y
 CONFIG_UNIXWARE_DISKLABEL=y
 CONFIG_CFQ_GROUP_IOSCHED=y
 CONFIG_DEFAULT_DEADLINE=y
+CONFIG_LIVEPATCH=y
 CONFIG_MARCH_Z196=y
 CONFIG_TUNE_ZEC12=y
-CONFIG_NR_CPUS=256
+CONFIG_NR_CPUS=512
 CONFIG_HZ_100=y
 CONFIG_MEMORY_HOTPLUG=y
 CONFIG_MEMORY_HOTREMOVE=y
@@ -238,9 +241,9 @@ CONFIG_NF_CONNTRACK_IPV4=m
 # CONFIG_NF_CONNTRACK_PROC_COMPAT is not set
 CONFIG_NF_TABLES_IPV4=m
 CONFIG_NFT_CHAIN_ROUTE_IPV4=m
-CONFIG_NFT_CHAIN_NAT_IPV4=m
 CONFIG_NF_TABLES_ARP=m
 CONFIG_NF_NAT_IPV4=m
+CONFIG_NFT_CHAIN_NAT_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
 CONFIG_IP_NF_MATCH_ECN=m
@@ -260,8 +263,8 @@ CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NF_CONNTRACK_IPV6=m
 CONFIG_NF_TABLES_IPV6=m
 CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_NF_NAT_IPV6=m
+CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -348,7 +351,6 @@ CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_OSD=m
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_BLK_DEV_XIP=y
 CONFIG_CDROM_PKTCDVD=m
 CONFIG_ATA_OVER_ETH=m
 CONFIG_VIRTIO_BLK=y
@@ -453,7 +455,6 @@ CONFIG_EXT2_FS=y
 CONFIG_EXT2_FS_XATTR=y
 CONFIG_EXT2_FS_POSIX_ACL=y
 CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT2_FS_XIP=y
 CONFIG_EXT3_FS=y
 # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_POSIX_ACL=y
@@ -536,7 +537,7 @@ CONFIG_DEBUG_INFO=y
 CONFIG_FRAME_WARN=1024
 CONFIG_UNUSED_SYMBOLS=y
 CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_MEMORY_INIT=y
 CONFIG_PANIC_ON_OOPS=y
 CONFIG_TIMER_STATS=y
 CONFIG_RCU_TORTURE_TEST=m
@@ -553,7 +554,6 @@ CONFIG_ATOMIC64_SELFTEST=y
 # CONFIG_STRICT_DEVMEM is not set
 CONFIG_S390_PTDUMP=y
 CONFIG_ENCRYPTED_KEYS=m
-CONFIG_KEYS_DEBUG_PROC_KEYS=y
 CONFIG_SECURITY=y
 CONFIG_SECURITY_NETWORK=y
 CONFIG_SECURITY_SELINUX=y
index 83ef702d2403686bb00c24251bdf1806fd0ad057..9256b48e7e4395a44d5f6dbf58ace934b93310f3 100644 (file)
@@ -8,7 +8,6 @@ CONFIG_TASKSTATS=y
 CONFIG_TASK_DELAY_ACCT=y
 CONFIG_TASK_XACCT=y
 CONFIG_TASK_IO_ACCOUNTING=y
-CONFIG_RCU_FAST_NO_HZ=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_CGROUPS=y
@@ -31,9 +30,11 @@ CONFIG_JUMP_LABEL=y
 CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
 CONFIG_MODVERSIONS=y
+CONFIG_BLK_DEV_INTEGRITY=y
 CONFIG_PARTITION_ADVANCED=y
 CONFIG_IBM_PARTITION=y
 CONFIG_DEFAULT_DEADLINE=y
+CONFIG_LIVEPATCH=y
 CONFIG_MARCH_Z196=y
 CONFIG_NR_CPUS=256
 CONFIG_HZ_100=y
@@ -41,7 +42,6 @@ CONFIG_MEMORY_HOTPLUG=y
 CONFIG_MEMORY_HOTREMOVE=y
 CONFIG_KSM=y
 CONFIG_TRANSPARENT_HUGEPAGE=y
-CONFIG_CMA=y
 CONFIG_CRASH_DUMP=y
 CONFIG_BINFMT_MISC=m
 CONFIG_HIBERNATION=y
@@ -125,6 +125,7 @@ CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_PAGEALLOC=y
 CONFIG_DETECT_HUNG_TASK=y
+CONFIG_PANIC_ON_OOPS=y
 CONFIG_TIMER_STATS=y
 CONFIG_DEBUG_RT_MUTEXES=y
 CONFIG_PROVE_LOCKING=y
@@ -135,12 +136,16 @@ CONFIG_DEBUG_LIST=y
 CONFIG_DEBUG_PI_LIST=y
 CONFIG_DEBUG_SG=y
 CONFIG_DEBUG_NOTIFIERS=y
-CONFIG_PROVE_RCU=y
 CONFIG_RCU_CPU_STALL_TIMEOUT=60
+# CONFIG_RCU_CPU_STALL_INFO is not set
 CONFIG_RCU_TRACE=y
 CONFIG_LATENCYTOP=y
 CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y
+CONFIG_TRACER_SNAPSHOT=y
+CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y
+CONFIG_STACK_TRACER=y
 CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_UPROBE_EVENT=y
 CONFIG_KPROBES_SANITY_TEST=y
 # CONFIG_STRICT_DEVMEM is not set
 CONFIG_S390_PTDUMP=y
@@ -187,6 +192,7 @@ CONFIG_CRYPTO_ZLIB=m
 CONFIG_CRYPTO_LZO=m
 CONFIG_CRYPTO_LZ4=m
 CONFIG_CRYPTO_LZ4HC=m
+CONFIG_CRYPTO_ANSI_CPRNG=m
 CONFIG_ZCRYPT=m
 CONFIG_CRYPTO_SHA1_S390=m
 CONFIG_CRYPTO_SHA256_S390=m
index f5a8e2fcde0c56a3ff17208e10c90cea2f5757b4..91541000378e4dca6e7dfa6d3b604e9c63082dc0 100644 (file)
@@ -8,8 +8,6 @@
 #ifndef _ASM_S390_CPU_H
 #define _ASM_S390_CPU_H
 
-#define MAX_CPU_ADDRESS 255
-
 #ifndef __ASSEMBLY__
 
 #include <linux/types.h>
index ece606c2ee8674111b81bf9c5a6342e8ae5db421..39ae6a3597478117e1c6328382e034a59b527c77 100644 (file)
@@ -94,7 +94,6 @@ struct dump_save_areas {
 };
 
 extern struct dump_save_areas dump_save_areas;
-struct save_area_ext *dump_save_area_create(int cpu);
 
 extern void do_reipl(void);
 extern void do_halt(void);
index c891f41b27532f5fdfd646cffbd3f8f15171311a..f6ff06077631c8ebf4ede37bced142df25dbc7de 100644 (file)
@@ -11,6 +11,7 @@
 #include <asm/cpu.h>
 
 #define SCLP_CHP_INFO_MASK_SIZE                32
+#define SCLP_MAX_CORES                 256
 
 struct sclp_chp_info {
        u8 recognized[SCLP_CHP_INFO_MASK_SIZE];
@@ -26,7 +27,7 @@ struct sclp_ipl_info {
        char loadparm[LOADPARM_LEN];
 };
 
-struct sclp_cpu_entry {
+struct sclp_core_entry {
        u8 core_id;
        u8 reserved0[2];
        u8 : 3;
@@ -38,12 +39,11 @@ struct sclp_cpu_entry {
        u8 reserved1;
 } __attribute__((packed));
 
-struct sclp_cpu_info {
+struct sclp_core_info {
        unsigned int configured;
        unsigned int standby;
        unsigned int combined;
-       int has_cpu_type;
-       struct sclp_cpu_entry cpu[MAX_CPU_ADDRESS + 1];
+       struct sclp_core_entry core[SCLP_MAX_CORES];
 };
 
 struct sclp_info {
@@ -51,7 +51,7 @@ struct sclp_info {
        unsigned char has_vt220 : 1;
        unsigned char has_siif : 1;
        unsigned char has_sigpif : 1;
-       unsigned char has_cpu_type : 1;
+       unsigned char has_core_type : 1;
        unsigned char has_sprp : 1;
        unsigned int ibc;
        unsigned int mtid;
@@ -60,15 +60,15 @@ struct sclp_info {
        unsigned long long rzm;
        unsigned long long rnmax;
        unsigned long long hamax;
-       unsigned int max_cpu;
+       unsigned int max_cores;
        unsigned long hsa_size;
        unsigned long long facilities;
 };
 extern struct sclp_info sclp;
 
-int sclp_get_cpu_info(struct sclp_cpu_info *info);
-int sclp_cpu_configure(u8 cpu);
-int sclp_cpu_deconfigure(u8 cpu);
+int sclp_get_core_info(struct sclp_core_info *info);
+int sclp_core_configure(u8 core);
+int sclp_core_deconfigure(u8 core);
 int sclp_sdias_blk_count(void);
 int sclp_sdias_copy(void *dest, int blk_num, int nr_blks);
 int sclp_chp_configure(struct chp_id chpid);
index b3bd0282dd9870c93f1ddd66472afc4170b356a1..5df26b11cf47c79ca360d5dbc0952399ba2dbe19 100644 (file)
@@ -29,6 +29,7 @@ extern void smp_call_ipl_cpu(void (*func)(void *), void *);
 
 extern int smp_find_processor_id(u16 address);
 extern int smp_store_status(int cpu);
+extern void smp_save_dump_cpus(void);
 extern int smp_vcpu_scheduled(int cpu);
 extern void smp_yield_cpu(int cpu);
 extern void smp_cpu_set_polarization(int cpu, int val);
@@ -54,6 +55,7 @@ static inline int smp_store_status(int cpu) { return 0; }
 static inline int smp_vcpu_scheduled(int cpu) { return 1; }
 static inline void smp_yield_cpu(int cpu) { }
 static inline void smp_fill_possible_mask(void) { }
+static inline void smp_save_dump_cpus(void) { }
 
 #endif /* CONFIG_SMP */
 
index daed3fde42ecb6bf1bd189244b6a5493f3e75e0b..326f717df587c749116a6e9853a5b246188108a8 100644 (file)
@@ -78,15 +78,20 @@ s390_base_pgm_handler_fn:
 #
 # Calls diag 308 subcode 1 and continues execution
 #
-# The following conditions must be ensured before calling this function:
-# * Prefix register = 0
-# * Lowcore protection is disabled
-#
 ENTRY(diag308_reset)
        larl    %r4,.Lctlregs           # Save control registers
        stctg   %c0,%c15,0(%r4)
+       lg      %r2,0(%r4)              # Disable lowcore protection
+       nilh    %r2,0xefff
+       larl    %r4,.Lctlreg0
+       stg     %r2,0(%r4)
+       lctlg   %c0,%c0,0(%r4)
        larl    %r4,.Lfpctl             # Floating point control register
        stfpc   0(%r4)
+       larl    %r4,.Lprefix            # Save prefix register
+       stpx    0(%r4)
+       larl    %r4,.Lprefix_zero       # Set prefix register to 0
+       spx     0(%r4)
        larl    %r4,.Lcontinue_psw      # Save PSW flags
        epsw    %r2,%r3
        stm     %r2,%r3,0(%r4)
@@ -106,6 +111,8 @@ ENTRY(diag308_reset)
        lctlg   %c0,%c15,0(%r4)
        larl    %r4,.Lfpctl             # Restore floating point ctl register
        lfpc    0(%r4)
+       larl    %r4,.Lprefix            # Restore prefix register
+       spx     0(%r4)
        larl    %r4,.Lcontinue_psw      # Restore PSW flags
        lpswe   0(%r4)
 .Lcontinue:
@@ -122,10 +129,16 @@ ENTRY(diag308_reset)
 
        .section .bss
 .align 8
+.Lctlreg0:
+       .quad   0
 .Lctlregs:
        .rept   16
        .quad   0
        .endr
 .Lfpctl:
        .long   0
+.Lprefix:
+       .long   0
+.Lprefix_zero:
+       .long   0
        .previous
index 7a75ad4594e3e721bd91daa1a80fb95fdcd47a9a..0c6c01eb36130b0885c88fae9e8e3c740a5500c9 100644 (file)
@@ -44,31 +44,6 @@ static struct memblock_type oldmem_type = {
 
 struct dump_save_areas dump_save_areas;
 
-/*
- * Allocate and add a save area for a CPU
- */
-struct save_area_ext *dump_save_area_create(int cpu)
-{
-       struct save_area_ext **save_areas, *save_area;
-
-       save_area = kmalloc(sizeof(*save_area), GFP_KERNEL);
-       if (!save_area)
-               return NULL;
-       if (cpu + 1 > dump_save_areas.count) {
-               dump_save_areas.count = cpu + 1;
-               save_areas = krealloc(dump_save_areas.areas,
-                                     dump_save_areas.count * sizeof(void *),
-                                     GFP_KERNEL | __GFP_ZERO);
-               if (!save_areas) {
-                       kfree(save_area);
-                       return NULL;
-               }
-               dump_save_areas.areas = save_areas;
-       }
-       dump_save_areas.areas[cpu] = save_area;
-       return save_area;
-}
-
 /*
  * Return physical address for virtual address
  */
@@ -416,7 +391,7 @@ static void *nt_s390_vx_low(void *ptr, __vector128 *vx_regs)
        ptr += len;
        /* Copy lower halves of SIMD registers 0-15 */
        for (i = 0; i < 16; i++) {
-               memcpy(ptr, &vx_regs[i], 8);
+               memcpy(ptr, &vx_regs[i].u[2], 8);
                ptr += 8;
        }
        return ptr;
index e6a1578fc00095929db02b51d4894fe9ad59802c..afe05bfb7e008a723388ff6c39f2d83d5693ad65 100644 (file)
@@ -1572,7 +1572,7 @@ static int param_set_sfb_size(const char *val, const struct kernel_param *kp)
 }
 
 #define param_check_sfb_size(name, p) __param_check(name, p, void)
-static struct kernel_param_ops param_ops_sfb_size = {
+static const struct kernel_param_ops param_ops_sfb_size = {
        .set = param_set_sfb_size,
        .get = param_get_sfb_size,
 };
index 73941bf4235088a5bd576b63bebac4076bbd42bf..f7f027caaaaacb33b279f76b33f3525374b6ae34 100644 (file)
@@ -868,6 +868,11 @@ void __init setup_arch(char **cmdline_p)
 
        check_initrd();
        reserve_crashkernel();
+       /*
+        * Be aware that smp_save_dump_cpus() triggers a system reset.
+        * Therefore CPU and device initialization should be done afterwards.
+        */
+       smp_save_dump_cpus();
 
        setup_resources();
        setup_vmcoreinfo();
index 0d9d59d4710e9878d99edd3d59bdfbbd67b93022..6f54c175f5c9012b5cc00f28d3d793c2bfc56ec5 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/cpu.h>
 #include <linux/slab.h>
 #include <linux/crash_dump.h>
+#include <linux/memblock.h>
 #include <asm/asm-offsets.h>
 #include <asm/switch_to.h>
 #include <asm/facility.h>
@@ -69,7 +70,7 @@ struct pcpu {
        u16 address;                    /* physical cpu address */
 };
 
-static u8 boot_cpu_type;
+static u8 boot_core_type;
 static struct pcpu pcpu_devices[NR_CPUS];
 
 unsigned int smp_cpu_mt_shift;
@@ -531,15 +532,12 @@ EXPORT_SYMBOL(smp_ctl_clear_bit);
 
 #ifdef CONFIG_CRASH_DUMP
 
-static inline void __smp_store_cpu_state(int cpu, u16 address, int is_boot_cpu)
+static void __smp_store_cpu_state(struct save_area_ext *sa_ext, u16 address,
+                                 int is_boot_cpu)
 {
-       void *lc = pcpu_devices[0].lowcore;
-       struct save_area_ext *sa_ext;
+       void *lc = (void *)(unsigned long) store_prefix();
        unsigned long vx_sa;
 
-       sa_ext = dump_save_area_create(cpu);
-       if (!sa_ext)
-               panic("could not allocate memory for save area\n");
        if (is_boot_cpu) {
                /* Copy the registers of the boot CPU. */
                copy_oldmem_page(1, (void *) &sa_ext->sa, sizeof(sa_ext->sa),
@@ -554,14 +552,33 @@ static inline void __smp_store_cpu_state(int cpu, u16 address, int is_boot_cpu)
        if (!MACHINE_HAS_VX)
                return;
        /* Get the VX registers */
-       vx_sa = __get_free_page(GFP_KERNEL);
+       vx_sa = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
        if (!vx_sa)
                panic("could not allocate memory for VX save area\n");
        __pcpu_sigp_relax(address, SIGP_STORE_ADDITIONAL_STATUS, vx_sa, NULL);
        memcpy(sa_ext->vx_regs, (void *) vx_sa, sizeof(sa_ext->vx_regs));
-       free_page(vx_sa);
+       memblock_free(vx_sa, PAGE_SIZE);
 }
 
+int smp_store_status(int cpu)
+{
+       unsigned long vx_sa;
+       struct pcpu *pcpu;
+
+       pcpu = pcpu_devices + cpu;
+       if (__pcpu_sigp_relax(pcpu->address, SIGP_STOP_AND_STORE_STATUS,
+                             0, NULL) != SIGP_CC_ORDER_CODE_ACCEPTED)
+               return -EIO;
+       if (!MACHINE_HAS_VX)
+               return 0;
+       vx_sa = __pa(pcpu->lowcore->vector_save_area_addr);
+       __pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS,
+                         vx_sa, NULL);
+       return 0;
+}
+
+#endif /* CONFIG_CRASH_DUMP */
+
 /*
  * Collect CPU state of the previous, crashed system.
  * There are four cases:
@@ -589,10 +606,12 @@ static inline void __smp_store_cpu_state(int cpu, u16 address, int is_boot_cpu)
  *    old system. The ELF sections are picked up by the crash_dump code
  *    via elfcorehdr_addr.
  */
-static void __init smp_store_cpu_states(struct sclp_cpu_info *info)
+void __init smp_save_dump_cpus(void)
 {
-       unsigned int cpu, address, i, j;
-       int is_boot_cpu;
+#ifdef CONFIG_CRASH_DUMP
+       int addr, cpu, boot_cpu_addr, max_cpu_addr;
+       struct save_area_ext *sa_ext;
+       bool is_boot_cpu;
 
        if (is_kdump_kernel())
                /* Previous system stored the CPU states. Nothing to do. */
@@ -602,42 +621,36 @@ static void __init smp_store_cpu_states(struct sclp_cpu_info *info)
                return;
        /* Set multi-threading state to the previous system. */
        pcpu_set_smt(sclp.mtid_prev);
-       /* Collect CPU states. */
-       cpu = 0;
-       for (i = 0; i < info->configured; i++) {
-               /* Skip CPUs with different CPU type. */
-               if (info->has_cpu_type && info->cpu[i].type != boot_cpu_type)
+       max_cpu_addr = SCLP_MAX_CORES << sclp.mtid_prev;
+       for (cpu = 0, addr = 0; addr <= max_cpu_addr; addr++) {
+               if (__pcpu_sigp_relax(addr, SIGP_SENSE, 0, NULL) ==
+                   SIGP_CC_NOT_OPERATIONAL)
                        continue;
-               for (j = 0; j <= smp_cpu_mtid; j++, cpu++) {
-                       address = (info->cpu[i].core_id << smp_cpu_mt_shift) + j;
-                       is_boot_cpu = (address == pcpu_devices[0].address);
-                       if (is_boot_cpu && !OLDMEM_BASE)
-                               /* Skip boot CPU for standard zfcp dump. */
-                               continue;
-                       /* Get state for this CPu. */
-                       __smp_store_cpu_state(cpu, address, is_boot_cpu);
-               }
+               cpu += 1;
        }
-}
-
-int smp_store_status(int cpu)
-{
-       unsigned long vx_sa;
-       struct pcpu *pcpu;
-
-       pcpu = pcpu_devices + cpu;
-       if (__pcpu_sigp_relax(pcpu->address, SIGP_STOP_AND_STORE_STATUS,
-                             0, NULL) != SIGP_CC_ORDER_CODE_ACCEPTED)
-               return -EIO;
-       if (!MACHINE_HAS_VX)
-               return 0;
-       vx_sa = __pa(pcpu->lowcore->vector_save_area_addr);
-       __pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS,
-                         vx_sa, NULL);
-       return 0;
-}
-
+       dump_save_areas.areas = (void *)memblock_alloc(sizeof(void *) * cpu, 8);
+       dump_save_areas.count = cpu;
+       boot_cpu_addr = stap();
+       for (cpu = 0, addr = 0; addr <= max_cpu_addr; addr++) {
+               if (__pcpu_sigp_relax(addr, SIGP_SENSE, 0, NULL) ==
+                   SIGP_CC_NOT_OPERATIONAL)
+                       continue;
+               sa_ext = (void *) memblock_alloc(sizeof(*sa_ext), 8);
+               dump_save_areas.areas[cpu] = sa_ext;
+               if (!sa_ext)
+                       panic("could not allocate memory for save area\n");
+               is_boot_cpu = (addr == boot_cpu_addr);
+               cpu += 1;
+               if (is_boot_cpu && !OLDMEM_BASE)
+                       /* Skip boot CPU for standard zfcp dump. */
+                       continue;
+               /* Get state for this CPU. */
+               __smp_store_cpu_state(sa_ext, addr, is_boot_cpu);
+       }
+       diag308_reset();
+       pcpu_set_smt(0);
 #endif /* CONFIG_CRASH_DUMP */
+}
 
 void smp_cpu_set_polarization(int cpu, int val)
 {
@@ -649,21 +662,22 @@ int smp_cpu_get_polarization(int cpu)
        return pcpu_devices[cpu].polarization;
 }
 
-static struct sclp_cpu_info *smp_get_cpu_info(void)
+static struct sclp_core_info *smp_get_core_info(void)
 {
        static int use_sigp_detection;
-       struct sclp_cpu_info *info;
+       struct sclp_core_info *info;
        int address;
 
        info = kzalloc(sizeof(*info), GFP_KERNEL);
-       if (info && (use_sigp_detection || sclp_get_cpu_info(info))) {
+       if (info && (use_sigp_detection || sclp_get_core_info(info))) {
                use_sigp_detection = 1;
-               for (address = 0; address <= MAX_CPU_ADDRESS;
+               for (address = 0;
+                    address < (SCLP_MAX_CORES << smp_cpu_mt_shift);
                     address += (1U << smp_cpu_mt_shift)) {
                        if (__pcpu_sigp_relax(address, SIGP_SENSE, 0, NULL) ==
                            SIGP_CC_NOT_OPERATIONAL)
                                continue;
-                       info->cpu[info->configured].core_id =
+                       info->core[info->configured].core_id =
                                address >> smp_cpu_mt_shift;
                        info->configured++;
                }
@@ -674,7 +688,7 @@ static struct sclp_cpu_info *smp_get_cpu_info(void)
 
 static int smp_add_present_cpu(int cpu);
 
-static int __smp_rescan_cpus(struct sclp_cpu_info *info, int sysfs_add)
+static int __smp_rescan_cpus(struct sclp_core_info *info, int sysfs_add)
 {
        struct pcpu *pcpu;
        cpumask_t avail;
@@ -685,9 +699,9 @@ static int __smp_rescan_cpus(struct sclp_cpu_info *info, int sysfs_add)
        cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask);
        cpu = cpumask_first(&avail);
        for (i = 0; (i < info->combined) && (cpu < nr_cpu_ids); i++) {
-               if (info->has_cpu_type && info->cpu[i].type != boot_cpu_type)
+               if (sclp.has_core_type && info->core[i].type != boot_core_type)
                        continue;
-               address = info->cpu[i].core_id << smp_cpu_mt_shift;
+               address = info->core[i].core_id << smp_cpu_mt_shift;
                for (j = 0; j <= smp_cpu_mtid; j++) {
                        if (pcpu_find_address(cpu_present_mask, address + j))
                                continue;
@@ -713,41 +727,37 @@ static int __smp_rescan_cpus(struct sclp_cpu_info *info, int sysfs_add)
 static void __init smp_detect_cpus(void)
 {
        unsigned int cpu, mtid, c_cpus, s_cpus;
-       struct sclp_cpu_info *info;
+       struct sclp_core_info *info;
        u16 address;
 
        /* Get CPU information */
-       info = smp_get_cpu_info();
+       info = smp_get_core_info();
        if (!info)
                panic("smp_detect_cpus failed to allocate memory\n");
 
        /* Find boot CPU type */
-       if (info->has_cpu_type) {
+       if (sclp.has_core_type) {
                address = stap();
                for (cpu = 0; cpu < info->combined; cpu++)
-                       if (info->cpu[cpu].core_id == address) {
+                       if (info->core[cpu].core_id == address) {
                                /* The boot cpu dictates the cpu type. */
-                               boot_cpu_type = info->cpu[cpu].type;
+                               boot_core_type = info->core[cpu].type;
                                break;
                        }
                if (cpu >= info->combined)
                        panic("Could not find boot CPU type");
        }
 
-#ifdef CONFIG_CRASH_DUMP
-       /* Collect CPU state of previous system */
-       smp_store_cpu_states(info);
-#endif
-
        /* Set multi-threading state for the current system */
-       mtid = boot_cpu_type ? sclp.mtid : sclp.mtid_cp;
+       mtid = boot_core_type ? sclp.mtid : sclp.mtid_cp;
        mtid = (mtid < smp_max_threads) ? mtid : smp_max_threads - 1;
        pcpu_set_smt(mtid);
 
        /* Print number of CPUs */
        c_cpus = s_cpus = 0;
        for (cpu = 0; cpu < info->combined; cpu++) {
-               if (info->has_cpu_type && info->cpu[cpu].type != boot_cpu_type)
+               if (sclp.has_core_type &&
+                   info->core[cpu].type != boot_core_type)
                        continue;
                if (cpu < info->configured)
                        c_cpus += smp_cpu_mtid + 1;
@@ -884,7 +894,7 @@ void __init smp_fill_possible_mask(void)
 
        sclp_max = max(sclp.mtid, sclp.mtid_cp) + 1;
        sclp_max = min(smp_max_threads, sclp_max);
-       sclp_max = sclp.max_cpu * sclp_max ?: nr_cpu_ids;
+       sclp_max = sclp.max_cores * sclp_max ?: nr_cpu_ids;
        possible = setup_possible_cpus ?: nr_cpu_ids;
        possible = min(possible, sclp_max);
        for (cpu = 0; cpu < possible && cpu < nr_cpu_ids; cpu++)
@@ -977,7 +987,7 @@ static ssize_t cpu_configure_store(struct device *dev,
        case 0:
                if (pcpu->state != CPU_STATE_CONFIGURED)
                        break;
-               rc = sclp_cpu_deconfigure(pcpu->address >> smp_cpu_mt_shift);
+               rc = sclp_core_deconfigure(pcpu->address >> smp_cpu_mt_shift);
                if (rc)
                        break;
                for (i = 0; i <= smp_cpu_mtid; i++) {
@@ -992,7 +1002,7 @@ static ssize_t cpu_configure_store(struct device *dev,
        case 1:
                if (pcpu->state != CPU_STATE_STANDBY)
                        break;
-               rc = sclp_cpu_configure(pcpu->address >> smp_cpu_mt_shift);
+               rc = sclp_core_configure(pcpu->address >> smp_cpu_mt_shift);
                if (rc)
                        break;
                for (i = 0; i <= smp_cpu_mtid; i++) {
@@ -1107,10 +1117,10 @@ out:
 
 int __ref smp_rescan_cpus(void)
 {
-       struct sclp_cpu_info *info;
+       struct sclp_core_info *info;
        int nr;
 
-       info = smp_get_cpu_info();
+       info = smp_get_core_info();
        if (!info)
                return -ENOMEM;
        get_online_cpus();
index d3766dd67e23266b6670d90eedc9b02c26304e7b..fee782acc2ee51f6a3aae4b28152ec76981c8350 100644 (file)
@@ -250,7 +250,7 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
 ({                                                             \
        /* Branch instruction needs 6 bytes */                  \
        int rel = (addrs[i + off + 1] - (addrs[i + 1] - 6)) / 2;\
-       _EMIT6(op1 | reg(b1, b2) << 16 | rel, op2 | mask);      \
+       _EMIT6(op1 | reg(b1, b2) << 16 | (rel & 0xffff), op2 | mask);   \
        REG_SET_SEEN(b1);                                       \
        REG_SET_SEEN(b2);                                       \
 })
index 59cf0b911898c6b800d375eb4c45d8f356e3547d..9def1f52d03a4f86e0cdfba6d90c0b83ec670c18 100644 (file)
@@ -24,11 +24,14 @@ config TILE
        select MODULES_USE_ELF_RELA
        select HAVE_ARCH_TRACEHOOK
        select HAVE_SYSCALL_TRACEPOINTS
+       select USER_STACKTRACE_SUPPORT
        select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
        select HAVE_DEBUG_STACKOVERFLOW
        select ARCH_WANT_FRAME_POINTERS
        select HAVE_CONTEXT_TRACKING
        select EDAC_SUPPORT
+       select GENERIC_STRNCPY_FROM_USER
+       select GENERIC_STRNLEN_USER
 
 # FIXME: investigate whether we need/want these options.
 #      select HAVE_IOREMAP_PROT
@@ -125,8 +128,10 @@ config HVC_TILE
        select HVC_IRQ if TILEGX
        def_bool y
 
+# Building with ARCH=tilegx (or ARCH=tile) implies using the
+# 64-bit TILE-Gx toolchain, so force CONFIG_TILEGX on.
 config TILEGX
-       bool "Building for TILE-Gx (64-bit) processor"
+       def_bool ARCH != "tilepro"
        select SPARSE_IRQ
        select GENERIC_IRQ_LEGACY_ALLOC_HWIRQ
        select HAVE_FUNCTION_TRACER
index 1fe86911838b272d80fc0c7232a9f56b21f0e949..84a924034bdbf816f5bb41442251ce13f23f30b5 100644 (file)
@@ -78,4 +78,9 @@ void tile_irq_activate(unsigned int irq, int tile_irq_type);
 
 void setup_irq_regs(void);
 
+#ifdef __tilegx__
+void arch_trigger_all_cpu_backtrace(bool self);
+#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
+#endif
+
 #endif /* _ASM_TILE_IRQ_H */
index dd4f9f17e30aa8c5887607c5f5f933f3ab00666e..139dfdee013404dfd9e15263d01d431f40ab2332 100644 (file)
@@ -111,8 +111,6 @@ struct thread_struct {
        unsigned long long interrupt_mask;
        /* User interrupt-control 0 state */
        unsigned long intctrl_0;
-       /* Is this task currently doing a backtrace? */
-       bool in_backtrace;
        /* Any other miscellaneous processor state bits */
        unsigned long proc_status;
 #if !CHIP_HAS_FIXED_INTVEC_BASE()
index c0a77b38d39a73c8b7064b4b92a340a92b7f83e8..b14b1ba5bf9c328959bea3a14643fbb87e4ae7b1 100644 (file)
@@ -41,8 +41,12 @@ static inline int arch_spin_is_locked(arch_spinlock_t *lock)
         * to claim the lock is held, since it will be momentarily
         * if not already.  There's no need to wait for a "valid"
         * lock->next_ticket to become available.
+        * Use READ_ONCE() to ensure that calling this in a loop is OK.
         */
-       return lock->next_ticket != lock->current_ticket;
+       int curr = READ_ONCE(lock->current_ticket);
+       int next = READ_ONCE(lock->next_ticket);
+
+       return next != curr;
 }
 
 void arch_spin_lock(arch_spinlock_t *lock);
index 9a12b9c7e5d36d13ba94fd8bdd5f3d824996f233..b9718fb4e74a7703a0fbf3c37691b9c00a84aced 100644 (file)
@@ -18,6 +18,8 @@
 #ifndef _ASM_TILE_SPINLOCK_64_H
 #define _ASM_TILE_SPINLOCK_64_H
 
+#include <linux/compiler.h>
+
 /* Shifts and masks for the various fields in "lock". */
 #define __ARCH_SPIN_CURRENT_SHIFT      17
 #define __ARCH_SPIN_NEXT_MASK          0x7fff
@@ -44,7 +46,8 @@ static inline u32 arch_spin_next(u32 val)
 /* The lock is locked if a task would have to wait to get it. */
 static inline int arch_spin_is_locked(arch_spinlock_t *lock)
 {
-       u32 val = lock->lock;
+       /* Use READ_ONCE() to ensure that calling this in a loop is OK. */
+       u32 val = READ_ONCE(lock->lock);
        return arch_spin_current(val) != arch_spin_next(val);
 }
 
index 0e9d382a2d451482ff2d5f686cdecaa8b7f56a05..c3cb42615a9fa40dc19be4307a91f666a0f2ba21 100644 (file)
@@ -58,17 +58,14 @@ extern int KBacktraceIterator_end(struct KBacktraceIterator *kbt);
 /* Advance to the next frame. */
 extern void KBacktraceIterator_next(struct KBacktraceIterator *kbt);
 
+/* Dump just the contents of the pt_regs structure. */
+extern void tile_show_regs(struct pt_regs *);
+
 /*
  * Dump stack given complete register info. Use only from the
  * architecture-specific code; show_stack()
- * and dump_stack() (in entry.S) are architecture-independent entry points.
+ * and dump_stack() are architecture-independent entry points.
  */
-extern void tile_show_stack(struct KBacktraceIterator *, int headers);
-
-/* Dump stack of current process, with registers to seed the backtrace. */
-extern void dump_stack_regs(struct pt_regs *);
-
-/* Helper method for assembly dump_stack(). */
-extern void _dump_stack(int dummy, ulong pc, ulong lr, ulong sp, ulong r52);
+extern void tile_show_stack(struct KBacktraceIterator *);
 
 #endif /* _ASM_TILE_STACK_H */
index f804c39a5e4d63998b9a9d75f9ba65fdc2bb0aff..dc1fb28d9636271962aa61250439a2b966e25379 100644 (file)
@@ -42,6 +42,7 @@ struct thread_info {
        unsigned long           unalign_jit_tmp[4]; /* temp r0..r3 storage */
        void __user             *unalign_jit_base; /* unalign fixup JIT base */
 #endif
+       bool in_backtrace;                      /* currently doing backtrace? */
 };
 
 /*
index 4b99a1c3aab2533c3bafe930cb6f92b5fc17117c..11c82270c1f5be730fa598314515c66764d66947 100644 (file)
@@ -52,6 +52,14 @@ void do_timer_interrupt(struct pt_regs *, int fault_num);
 /* kernel/messaging.c */
 void hv_message_intr(struct pt_regs *, int intnum);
 
+#define        TILE_NMI_DUMP_STACK     1       /* Dump stack for sysrq+'l' */
+
+/* kernel/process.c */
+void do_nmi_dump_stack(struct pt_regs *regs);
+
+/* kernel/traps.c */
+void do_nmi(struct pt_regs *, int fault_num, unsigned long reason);
+
 /* kernel/irq.c */
 void tile_dev_intr(struct pt_regs *, int intnum);
 
index a33276bf5ca1dfd284bc50545e107800c93cf651..0a9c4265763bd1a26f5f38aa20460c475670cea5 100644 (file)
@@ -64,6 +64,13 @@ static inline int is_arch_mappable_range(unsigned long addr,
 #define is_arch_mappable_range(addr, size) 0
 #endif
 
+/*
+ * Note that using this definition ignores is_arch_mappable_range(),
+ * so on tilepro code that uses user_addr_max() is constrained not
+ * to reference the tilepro user-interrupt region.
+ */
+#define user_addr_max() (current_thread_info()->addr_limit.seg)
+
 /*
  * Test whether a block of memory is a valid user space address.
  * Returns 0 if the range is valid, nonzero otherwise.
@@ -471,62 +478,9 @@ copy_in_user(void __user *to, const void __user *from, unsigned long n)
 #endif
 
 
-/**
- * strlen_user: - Get the size of a string in user space.
- * @str: The string to measure.
- *
- * Context: User context only.  This function may sleep.
- *
- * Get the size of a NUL-terminated string in user space.
- *
- * Returns the size of the string INCLUDING the terminating NUL.
- * On exception, returns 0.
- *
- * If there is a limit on the length of a valid string, you may wish to
- * consider using strnlen_user() instead.
- */
-extern long strnlen_user_asm(const char __user *str, long n);
-static inline long __must_check strnlen_user(const char __user *str, long n)
-{
-       might_fault();
-       return strnlen_user_asm(str, n);
-}
-#define strlen_user(str) strnlen_user(str, LONG_MAX)
-
-/**
- * strncpy_from_user: - Copy a NUL terminated string from userspace, with less checking.
- * @dst:   Destination address, in kernel space.  This buffer must be at
- *         least @count bytes long.
- * @src:   Source address, in user space.
- * @count: Maximum number of bytes to copy, including the trailing NUL.
- *
- * Copies a NUL-terminated string from userspace to kernel space.
- * Caller must check the specified block with access_ok() before calling
- * this function.
- *
- * On success, returns the length of the string (not including the trailing
- * NUL).
- *
- * If access to userspace fails, returns -EFAULT (some data may have been
- * copied).
- *
- * If @count is smaller than the length of the string, copies @count bytes
- * and returns @count.
- */
-extern long strncpy_from_user_asm(char *dst, const char __user *src, long);
-static inline long __must_check __strncpy_from_user(
-       char *dst, const char __user *src, long count)
-{
-       might_fault();
-       return strncpy_from_user_asm(dst, src, count);
-}
-static inline long __must_check strncpy_from_user(
-       char *dst, const char __user *src, long count)
-{
-       if (access_ok(VERIFY_READ, src, 1))
-               return __strncpy_from_user(dst, src, count);
-       return -EFAULT;
-}
+extern long strnlen_user(const char __user *str, long n);
+extern long strlen_user(const char __user *str);
+extern long strncpy_from_user(char *dst, const char __user *src, long);
 
 /**
  * clear_user: - Zero a block of memory in user space.
diff --git a/arch/tile/include/asm/word-at-a-time.h b/arch/tile/include/asm/word-at-a-time.h
new file mode 100644 (file)
index 0000000..9e5ce0d
--- /dev/null
@@ -0,0 +1,36 @@
+#ifndef _ASM_WORD_AT_A_TIME_H
+#define _ASM_WORD_AT_A_TIME_H
+
+#include <asm/byteorder.h>
+
+struct word_at_a_time { /* unused */ };
+#define WORD_AT_A_TIME_CONSTANTS {}
+
+/* Generate 0x01 byte values for non-zero bytes using a SIMD instruction. */
+static inline unsigned long has_zero(unsigned long val, unsigned long *data,
+                                    const struct word_at_a_time *c)
+{
+#ifdef __tilegx__
+       unsigned long mask = __insn_v1cmpeqi(val, 0);
+#else /* tilepro */
+       unsigned long mask = __insn_seqib(val, 0);
+#endif
+       *data = mask;
+       return mask;
+}
+
+/* These operations are both nops. */
+#define prep_zero_mask(val, data, c) (data)
+#define create_zero_mask(data) (data)
+
+/* And this operation just depends on endianness. */
+static inline long find_zero(unsigned long mask)
+{
+#ifdef __BIG_ENDIAN
+       return __builtin_clzl(mask) >> 3;
+#else
+       return __builtin_ctzl(mask) >> 3;
+#endif
+}
+
+#endif /* _ASM_WORD_AT_A_TIME_H */
index e0e6af4e783b077ca01968ae54af1a2a622834fb..f10b332b3b65d88f572ff2109ac45785ebc21eb0 100644 (file)
 /** hv_console_set_ipi */
 #define HV_DISPATCH_CONSOLE_SET_IPI               63
 
+/** hv_send_nmi */
+#define HV_DISPATCH_SEND_NMI                      65
+
 /** One more than the largest dispatch value */
-#define _HV_DISPATCH_END                          64
+#define _HV_DISPATCH_END                          66
 
 
 #ifndef __ASSEMBLER__
@@ -1253,6 +1256,11 @@ void hv_downcall_dispatch(void);
 #define INT_DMATLB_ACCESS_DWNCL  INT_DMA_CPL
 /** Device interrupt downcall interrupt vector */
 #define INT_DEV_INTR_DWNCL       INT_WORLD_ACCESS
+/** NMI downcall interrupt vector */
+#define INT_NMI_DWNCL            64
+
+#define HV_NMI_FLAG_FORCE    0x1  /**< Force an NMI downcall regardless of
+               the ICS bit of the client. */
 
 #ifndef __ASSEMBLER__
 
@@ -1780,6 +1788,56 @@ int hv_dev_poll(int devhdl, __hv32 events, HV_IntArg intarg);
 int hv_dev_poll_cancel(int devhdl);
 
 
+/** NMI information */
+typedef struct
+{
+  /** Result: negative error, or HV_NMI_RESULT_xxx. */
+  int result;
+
+  /** PC from interrupted remote core (if result != HV_NMI_RESULT_FAIL_HV). */
+  HV_VirtAddr pc;
+
+} HV_NMI_Info;
+
+/** NMI issued successfully. */
+#define HV_NMI_RESULT_OK        0
+
+/** NMI not issued: remote tile running at client PL with ICS set. */
+#define HV_NMI_RESULT_FAIL_ICS  1
+
+/** NMI not issued: remote tile waiting in hypervisor. */
+#define HV_NMI_RESULT_FAIL_HV   2
+
+/** Force an NMI downcall regardless of the ICS bit of the client. */
+#define HV_NMI_FLAG_FORCE    0x1
+
+/** Send an NMI interrupt request to a particular tile.
+ *
+ *  This will cause the NMI to be issued on the remote tile regardless
+ *  of the state of the client interrupt mask.  However, if the remote
+ *  tile is in the hypervisor, it will not execute the NMI, and
+ *  HV_NMI_RESULT_FAIL_HV will be returned.  Similarly, if the remote
+ *  tile is in a client interrupt critical section at the time of the
+ *  NMI, it will not execute the NMI, and HV_NMI_RESULT_FAIL_ICS will
+ *  be returned.  In this second case, however, if HV_NMI_FLAG_FORCE
+ *  is set in flags, then the remote tile will enter its NMI interrupt
+ *  vector regardless.  Forcing the NMI vector during an interrupt
+ *  critical section will mean that the client can not safely continue
+ *  execution after handling the interrupt.
+ *
+ *  @param tile Tile to which the NMI request is sent.
+ *  @param info NMI information which is defined by and interpreted by the
+ *         supervisor, is passed to the specified tile, and is
+ *         stored in the SPR register SYSTEM_SAVE_{CLIENT_PL}_2 on the
+ *         specified tile when entering the NMI handler routine.
+ *         Typically, this parameter stores the NMI type, or an aligned
+ *         VA plus some special bits, etc.
+ *  @param flags Flags (HV_NMI_FLAG_xxx).
+ *  @return Information about the requested NMI.
+ */
+HV_NMI_Info hv_send_nmi(HV_Coord tile, unsigned long info, __hv64 flags);
+
+
 /** Scatter-gather list for preada/pwritea calls. */
 typedef struct
 #if CHIP_VA_WIDTH() <= 32
index 3d9175992a203391bd2918b4c59532257207b702..670a3569450fd39eca59decc0f626565acb3d0b6 100644 (file)
@@ -27,13 +27,6 @@ STD_ENTRY(current_text_addr)
        { move r0, lr; jrp lr }
        STD_ENDPROC(current_text_addr)
 
-STD_ENTRY(dump_stack)
-       { move r2, lr; lnk r1 }
-       { move r4, r52; addli r1, r1, dump_stack - . }
-       { move r3, sp; j _dump_stack }
-       jrp lr   /* keep backtracer happy */
-       STD_ENDPROC(dump_stack)
-
 STD_ENTRY(KBacktraceIterator_init_current)
        { move r2, lr; lnk r1 }
        { move r4, r52; addli r1, r1, KBacktraceIterator_init_current - . }
index 2ab4566223912399504d6860e1a20032add367e4..d78ee2ad610c640c5ac048e09078c3adb936f4e5 100644 (file)
@@ -71,4 +71,5 @@ gensym hv_flush_all, 0x6e0, 32
 gensym hv_get_ipi_pte, 0x700, 32
 gensym hv_set_pte_super_shift, 0x720, 32
 gensym hv_console_set_ipi, 0x7e0, 32
-gensym hv_glue_internals, 0x800, 30720
+gensym hv_send_nmi, 0x820, 32
+gensym hv_glue_internals, 0x820, 30688
index 85c74ad29312d835aab1dd96e6f7b4e6e8f6d058..add0d71395c63221060c9e34bf154efa203d5745 100644 (file)
@@ -75,6 +75,7 @@
 #define hv_get_ipi_pte _hv_get_ipi_pte
 #define hv_set_pte_super_shift _hv_set_pte_super_shift
 #define hv_console_set_ipi _hv_console_set_ipi
+#define hv_send_nmi _hv_send_nmi
 #include <hv/hypervisor.h>
 #undef hv_init
 #undef hv_install_context
 #undef hv_get_ipi_pte
 #undef hv_set_pte_super_shift
 #undef hv_console_set_ipi
+#undef hv_send_nmi
 
 /*
  * Provide macros based on <linux/syscalls.h> to provide a wrapper
@@ -264,3 +266,5 @@ HV_WRAP9(int, hv_flush_remote, HV_PhysAddr, cache_pa,
         HV_VirtAddr, tlb_va, unsigned long, tlb_length,
         unsigned long, tlb_pgsize, unsigned long*, tlb_cpumask,
         HV_Remote_ASID*, asids, int, asidcount)
+HV_WRAP3(HV_NMI_Info, hv_send_nmi, HV_Coord, tile, unsigned long, info,
+        __hv64, flags)
index 5b67efcecabd17603deb47cd622d94e9b710d470..800b91d3f9dc79f15b22a08e3bba21f89640268a 100644 (file)
@@ -515,6 +515,10 @@ intvec_\vecname:
        .ifc \c_routine, handle_perf_interrupt
        mfspr   r2, AUX_PERF_COUNT_STS
        .endif
+       .ifc \c_routine, do_nmi
+       mfspr   r2, SPR_SYSTEM_SAVE_K_2   /* nmi type */
+       .else
+       .endif
        .endif
        .endif
        .endif
@@ -1571,3 +1575,5 @@ intrpt_start:
 
        /* Synthetic interrupt delivered only by the simulator */
        int_hand     INT_BREAKPOINT, BREAKPOINT, do_breakpoint
+       /* Synthetic interrupt delivered by hv */
+       int_hand     INT_NMI_DWNCL, NMI_DWNCL, do_nmi, handle_nmi
index b403c2e3e26344b7468a9d775c177bbcfd702118..a45213781ad0e677c36bde496120a8db1479aa3d 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/kernel.h>
 #include <linux/tracehook.h>
 #include <linux/signal.h>
+#include <linux/delay.h>
 #include <linux/context_tracking.h>
 #include <asm/stack.h>
 #include <asm/switch_to.h>
@@ -132,7 +133,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
                       (CALLEE_SAVED_REGS_COUNT - 2) * sizeof(unsigned long));
                callee_regs[0] = sp;   /* r30 = function */
                callee_regs[1] = arg;  /* r31 = arg */
-               childregs->ex1 = PL_ICS_EX1(KERNEL_PL, 0);
                p->thread.pc = (unsigned long) ret_from_kernel_thread;
                return 0;
        }
@@ -546,31 +546,141 @@ void exit_thread(void)
 #endif
 }
 
-void show_regs(struct pt_regs *regs)
+void tile_show_regs(struct pt_regs *regs)
 {
-       struct task_struct *tsk = validate_current();
        int i;
-
-       if (tsk != &corrupt_current)
-               show_regs_print_info(KERN_ERR);
 #ifdef __tilegx__
        for (i = 0; i < 17; i++)
-               pr_err(" r%-2d: " REGFMT " r%-2d: " REGFMT " r%-2d: " REGFMT "\n",
+               pr_err(" r%-2d: "REGFMT" r%-2d: "REGFMT" r%-2d: "REGFMT"\n",
                       i, regs->regs[i], i+18, regs->regs[i+18],
                       i+36, regs->regs[i+36]);
-       pr_err(" r17: " REGFMT " r35: " REGFMT " tp : " REGFMT "\n",
+       pr_err(" r17: "REGFMT" r35: "REGFMT" tp : "REGFMT"\n",
               regs->regs[17], regs->regs[35], regs->tp);
-       pr_err(" sp : " REGFMT " lr : " REGFMT "\n", regs->sp, regs->lr);
+       pr_err(" sp : "REGFMT" lr : "REGFMT"\n", regs->sp, regs->lr);
 #else
        for (i = 0; i < 13; i++)
-               pr_err(" r%-2d: " REGFMT " r%-2d: " REGFMT " r%-2d: " REGFMT " r%-2d: " REGFMT "\n",
+               pr_err(" r%-2d: "REGFMT" r%-2d: "REGFMT
+                      " r%-2d: "REGFMT" r%-2d: "REGFMT"\n",
                       i, regs->regs[i], i+14, regs->regs[i+14],
                       i+27, regs->regs[i+27], i+40, regs->regs[i+40]);
-       pr_err(" r13: " REGFMT " tp : " REGFMT " sp : " REGFMT " lr : " REGFMT "\n",
+       pr_err(" r13: "REGFMT" tp : "REGFMT" sp : "REGFMT" lr : "REGFMT"\n",
               regs->regs[13], regs->tp, regs->sp, regs->lr);
 #endif
-       pr_err(" pc : " REGFMT " ex1: %ld     faultnum: %ld\n",
-              regs->pc, regs->ex1, regs->faultnum);
+       pr_err(" pc : "REGFMT" ex1: %ld     faultnum: %ld flags:%s%s%s%s\n",
+              regs->pc, regs->ex1, regs->faultnum,
+              is_compat_task() ? " compat" : "",
+              (regs->flags & PT_FLAGS_DISABLE_IRQ) ? " noirq" : "",
+              !(regs->flags & PT_FLAGS_CALLER_SAVES) ? " nocallersave" : "",
+              (regs->flags & PT_FLAGS_RESTORE_REGS) ? " restoreregs" : "");
+}
+
+void show_regs(struct pt_regs *regs)
+{
+       struct KBacktraceIterator kbt;
+
+       show_regs_print_info(KERN_DEFAULT);
+       tile_show_regs(regs);
+
+       KBacktraceIterator_init(&kbt, NULL, regs);
+       tile_show_stack(&kbt);
+}
+
+/* To ensure stack dump on tiles occurs one by one. */
+static DEFINE_SPINLOCK(backtrace_lock);
+/* To ensure no backtrace occurs before all of the stack dump are done. */
+static atomic_t backtrace_cpus;
+/* The cpu mask to avoid reentrance. */
+static struct cpumask backtrace_mask;
 
-       dump_stack_regs(regs);
+void do_nmi_dump_stack(struct pt_regs *regs)
+{
+       int is_idle = is_idle_task(current) && !in_interrupt();
+       int cpu;
+
+       nmi_enter();
+       cpu = smp_processor_id();
+       if (WARN_ON_ONCE(!cpumask_test_and_clear_cpu(cpu, &backtrace_mask)))
+               goto done;
+
+       spin_lock(&backtrace_lock);
+       if (is_idle)
+               pr_info("CPU: %d idle\n", cpu);
+       else
+               show_regs(regs);
+       spin_unlock(&backtrace_lock);
+       atomic_dec(&backtrace_cpus);
+done:
+       nmi_exit();
+}
+
+#ifdef __tilegx__
+void arch_trigger_all_cpu_backtrace(bool self)
+{
+       struct cpumask mask;
+       HV_Coord tile;
+       unsigned int timeout;
+       int cpu;
+       int ongoing;
+       HV_NMI_Info info[NR_CPUS];
+
+       ongoing = atomic_cmpxchg(&backtrace_cpus, 0, num_online_cpus() - 1);
+       if (ongoing != 0) {
+               pr_err("Trying to do all-cpu backtrace.\n");
+               pr_err("But another all-cpu backtrace is ongoing (%d cpus left)\n",
+                      ongoing);
+               if (self) {
+                       pr_err("Reporting the stack on this cpu only.\n");
+                       dump_stack();
+               }
+               return;
+       }
+
+       cpumask_copy(&mask, cpu_online_mask);
+       cpumask_clear_cpu(smp_processor_id(), &mask);
+       cpumask_copy(&backtrace_mask, &mask);
+
+       /* Backtrace for myself first. */
+       if (self)
+               dump_stack();
+
+       /* Tentatively dump stack on remote tiles via NMI. */
+       timeout = 100;
+       while (!cpumask_empty(&mask) && timeout) {
+               for_each_cpu(cpu, &mask) {
+                       tile.x = cpu_x(cpu);
+                       tile.y = cpu_y(cpu);
+                       info[cpu] = hv_send_nmi(tile, TILE_NMI_DUMP_STACK, 0);
+                       if (info[cpu].result == HV_NMI_RESULT_OK)
+                               cpumask_clear_cpu(cpu, &mask);
+               }
+
+               mdelay(10);
+               timeout--;
+       }
+
+       /* Warn about cpus stuck in ICS and decrement their counts here. */
+       if (!cpumask_empty(&mask)) {
+               for_each_cpu(cpu, &mask) {
+                       switch (info[cpu].result) {
+                       case HV_NMI_RESULT_FAIL_ICS:
+                               pr_warn("Skipping stack dump of cpu %d in ICS at pc %#llx\n",
+                                       cpu, info[cpu].pc);
+                               break;
+                       case HV_NMI_RESULT_FAIL_HV:
+                               pr_warn("Skipping stack dump of cpu %d in hypervisor\n",
+                                       cpu);
+                               break;
+                       case HV_ENOSYS:
+                               pr_warn("Hypervisor too old to allow remote stack dumps.\n");
+                               goto skip_for_each;
+                       default:  /* should not happen */
+                               pr_warn("Skipping stack dump of cpu %d [%d,%#llx]\n",
+                                       cpu, info[cpu].result, info[cpu].pc);
+                               break;
+                       }
+               }
+skip_for_each:
+               atomic_sub(cpumask_weight(&mask), &backtrace_cpus);
+       }
 }
+#endif /* __tilegx_ */
index d366675e4bf88ef10e18f53637abd314b69b3725..99c9ff87e0187502179f8012afa5bd54309d2eb9 100644 (file)
@@ -71,7 +71,7 @@ static unsigned long __initdata node_percpu[MAX_NUMNODES];
  * per-CPU stack and boot info.
  */
 DEFINE_PER_CPU(unsigned long, boot_sp) =
-       (unsigned long)init_stack + THREAD_SIZE;
+       (unsigned long)init_stack + THREAD_SIZE - STACK_TOP_DELTA;
 
 #ifdef CONFIG_SMP
 DEFINE_PER_CPU(unsigned long, boot_pc) = (unsigned long)start_kernel;
index c42dce50acd81c94b26285410a9ce6e70d2e6e98..35d34635e4f1305473f5cf1990d185216657c59d 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/mmzone.h>
 #include <linux/dcache.h>
 #include <linux/fs.h>
+#include <linux/hardirq.h>
 #include <linux/string.h>
 #include <asm/backtrace.h>
 #include <asm/page.h>
@@ -109,7 +110,7 @@ static struct pt_regs *valid_fault_handler(struct KBacktraceIterator* kbt)
                if (kbt->verbose)
                        pr_err("  <%s while in user mode>\n", fault);
        } else {
-               if (kbt->verbose)
+               if (kbt->verbose && (p->pc != 0 || p->sp != 0 || p->ex1 != 0))
                        pr_err("  (odd fault: pc %#lx, sp %#lx, ex1 %#lx?)\n",
                               p->pc, p->sp, p->ex1);
                return NULL;
@@ -119,10 +120,12 @@ static struct pt_regs *valid_fault_handler(struct KBacktraceIterator* kbt)
        return p;
 }
 
-/* Is the pc pointing to a sigreturn trampoline? */
-static int is_sigreturn(unsigned long pc)
+/* Is the iterator pointing to a sigreturn trampoline? */
+static int is_sigreturn(struct KBacktraceIterator *kbt)
 {
-       return current->mm && (pc == VDSO_SYM(&__vdso_rt_sigreturn));
+       return kbt->task->mm &&
+               (kbt->it.pc == ((ulong)kbt->task->mm->context.vdso_base +
+                               (ulong)&__vdso_rt_sigreturn));
 }
 
 /* Return a pt_regs pointer for a valid signal handler frame */
@@ -131,7 +134,7 @@ static struct pt_regs *valid_sigframe(struct KBacktraceIterator* kbt,
 {
        BacktraceIterator *b = &kbt->it;
 
-       if (is_sigreturn(b->pc) && b->sp < PAGE_OFFSET &&
+       if (is_sigreturn(kbt) && b->sp < PAGE_OFFSET &&
            b->sp % sizeof(long) == 0) {
                int retval;
                pagefault_disable();
@@ -151,11 +154,6 @@ static struct pt_regs *valid_sigframe(struct KBacktraceIterator* kbt,
        return NULL;
 }
 
-static int KBacktraceIterator_is_sigreturn(struct KBacktraceIterator *kbt)
-{
-       return is_sigreturn(kbt->it.pc);
-}
-
 static int KBacktraceIterator_restart(struct KBacktraceIterator *kbt)
 {
        struct pt_regs *p;
@@ -178,7 +176,7 @@ static int KBacktraceIterator_next_item_inclusive(
 {
        for (;;) {
                do {
-                       if (!KBacktraceIterator_is_sigreturn(kbt))
+                       if (!is_sigreturn(kbt))
                                return KBT_ONGOING;
                } while (backtrace_next(&kbt->it));
 
@@ -357,51 +355,50 @@ static void describe_addr(struct KBacktraceIterator *kbt,
  */
 static bool start_backtrace(void)
 {
-       if (current->thread.in_backtrace) {
+       if (current_thread_info()->in_backtrace) {
                pr_err("Backtrace requested while in backtrace!\n");
                return false;
        }
-       current->thread.in_backtrace = true;
+       current_thread_info()->in_backtrace = true;
        return true;
 }
 
 static void end_backtrace(void)
 {
-       current->thread.in_backtrace = false;
+       current_thread_info()->in_backtrace = false;
 }
 
 /*
  * This method wraps the backtracer's more generic support.
  * It is only invoked from the architecture-specific code; show_stack()
- * and dump_stack() (in entry.S) are architecture-independent entry points.
+ * and dump_stack() are architecture-independent entry points.
  */
-void tile_show_stack(struct KBacktraceIterator *kbt, int headers)
+void tile_show_stack(struct KBacktraceIterator *kbt)
 {
        int i;
        int have_mmap_sem = 0;
 
        if (!start_backtrace())
                return;
-       if (headers) {
-               /*
-                * Add a blank line since if we are called from panic(),
-                * then bust_spinlocks() spit out a space in front of us
-                * and it will mess up our KERN_ERR.
-                */
-               pr_err("Starting stack dump of tid %d, pid %d (%s) on cpu %d at cycle %lld\n",
-                      kbt->task->pid, kbt->task->tgid, kbt->task->comm,
-                      raw_smp_processor_id(), get_cycles());
-       }
        kbt->verbose = 1;
        i = 0;
        for (; !KBacktraceIterator_end(kbt); KBacktraceIterator_next(kbt)) {
                char namebuf[KSYM_NAME_LEN+100];
                unsigned long address = kbt->it.pc;
 
-               /* Try to acquire the mmap_sem as we pass into userspace. */
-               if (address < PAGE_OFFSET && !have_mmap_sem && kbt->task->mm)
+               /*
+                * Try to acquire the mmap_sem as we pass into userspace.
+                * If we're in an interrupt context, don't even try, since
+                * it's not safe to call e.g. d_path() from an interrupt,
+                * since it uses spin locks without disabling interrupts.
+                * Note we test "kbt->task == current", not "kbt->is_current",
+                * since we're checking that "current" will work in d_path().
+                */
+               if (kbt->task == current && address < PAGE_OFFSET &&
+                   !have_mmap_sem && kbt->task->mm && !in_interrupt()) {
                        have_mmap_sem =
                                down_read_trylock(&kbt->task->mm->mmap_sem);
+               }
 
                describe_addr(kbt, address, have_mmap_sem,
                              namebuf, sizeof(namebuf));
@@ -416,24 +413,12 @@ void tile_show_stack(struct KBacktraceIterator *kbt, int headers)
        }
        if (kbt->end == KBT_LOOP)
                pr_err("Stack dump stopped; next frame identical to this one\n");
-       if (headers)
-               pr_err("Stack dump complete\n");
        if (have_mmap_sem)
                up_read(&kbt->task->mm->mmap_sem);
        end_backtrace();
 }
 EXPORT_SYMBOL(tile_show_stack);
 
-
-/* This is called from show_regs() and _dump_stack() */
-void dump_stack_regs(struct pt_regs *regs)
-{
-       struct KBacktraceIterator kbt;
-       KBacktraceIterator_init(&kbt, NULL, regs);
-       tile_show_stack(&kbt, 1);
-}
-EXPORT_SYMBOL(dump_stack_regs);
-
 static struct pt_regs *regs_to_pt_regs(struct pt_regs *regs,
                                       ulong pc, ulong lr, ulong sp, ulong r52)
 {
@@ -445,11 +430,15 @@ static struct pt_regs *regs_to_pt_regs(struct pt_regs *regs,
        return regs;
 }
 
-/* This is called from dump_stack() and just converts to pt_regs */
+/* Deprecated function currently only used by kernel_double_fault(). */
 void _dump_stack(int dummy, ulong pc, ulong lr, ulong sp, ulong r52)
 {
+       struct KBacktraceIterator kbt;
        struct pt_regs regs;
-       dump_stack_regs(regs_to_pt_regs(&regs, pc, lr, sp, r52));
+
+       regs_to_pt_regs(&regs, pc, lr, sp, r52);
+       KBacktraceIterator_init(&kbt, NULL, &regs);
+       tile_show_stack(&kbt);
 }
 
 /* This is called from KBacktraceIterator_init_current() */
@@ -461,22 +450,30 @@ void _KBacktraceIterator_init_current(struct KBacktraceIterator *kbt, ulong pc,
                                regs_to_pt_regs(&regs, pc, lr, sp, r52));
 }
 
-/* This is called only from kernel/sched/core.c, with esp == NULL */
+/*
+ * Called from sched_show_task() with task != NULL, or dump_stack()
+ * with task == NULL.  The esp argument is always NULL.
+ */
 void show_stack(struct task_struct *task, unsigned long *esp)
 {
        struct KBacktraceIterator kbt;
-       if (task == NULL || task == current)
+       if (task == NULL || task == current) {
                KBacktraceIterator_init_current(&kbt);
-       else
+               KBacktraceIterator_next(&kbt);  /* don't show first frame */
+       } else {
                KBacktraceIterator_init(&kbt, task, NULL);
-       tile_show_stack(&kbt, 0);
+       }
+       tile_show_stack(&kbt);
 }
 
 #ifdef CONFIG_STACKTRACE
 
 /* Support generic Linux stack API too */
 
-void save_stack_trace_tsk(struct task_struct *task, struct stack_trace *trace)
+static void save_stack_trace_common(struct task_struct *task,
+                                   struct pt_regs *regs,
+                                   bool user,
+                                   struct stack_trace *trace)
 {
        struct KBacktraceIterator kbt;
        int skip = trace->skip;
@@ -484,31 +481,57 @@ void save_stack_trace_tsk(struct task_struct *task, struct stack_trace *trace)
 
        if (!start_backtrace())
                goto done;
-       if (task == NULL || task == current)
+       if (regs != NULL) {
+               KBacktraceIterator_init(&kbt, NULL, regs);
+       } else if (task == NULL || task == current) {
                KBacktraceIterator_init_current(&kbt);
-       else
+               skip++;  /* don't show KBacktraceIterator_init_current */
+       } else {
                KBacktraceIterator_init(&kbt, task, NULL);
+       }
        for (; !KBacktraceIterator_end(&kbt); KBacktraceIterator_next(&kbt)) {
                if (skip) {
                        --skip;
                        continue;
                }
-               if (i >= trace->max_entries || kbt.it.pc < PAGE_OFFSET)
+               if (i >= trace->max_entries ||
+                   (!user && kbt.it.pc < PAGE_OFFSET))
                        break;
                trace->entries[i++] = kbt.it.pc;
        }
        end_backtrace();
 done:
+       if (i < trace->max_entries)
+               trace->entries[i++] = ULONG_MAX;
        trace->nr_entries = i;
 }
+
+void save_stack_trace_tsk(struct task_struct *task, struct stack_trace *trace)
+{
+       save_stack_trace_common(task, NULL, false, trace);
+}
 EXPORT_SYMBOL(save_stack_trace_tsk);
 
 void save_stack_trace(struct stack_trace *trace)
 {
-       save_stack_trace_tsk(NULL, trace);
+       save_stack_trace_common(NULL, NULL, false, trace);
 }
 EXPORT_SYMBOL_GPL(save_stack_trace);
 
+void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
+{
+       save_stack_trace_common(NULL, regs, false, trace);
+}
+
+void save_stack_trace_user(struct stack_trace *trace)
+{
+       /* Trace user stack if we are not a kernel thread. */
+       if (current->mm)
+               save_stack_trace_common(NULL, task_pt_regs(current),
+                                       true, trace);
+       else if (trace->nr_entries < trace->max_entries)
+               trace->entries[trace->nr_entries++] = ULONG_MAX;
+}
 #endif
 
 /* In entry.S */
index 312fc134c1cb12dc610b7b8bfea3c856b9752674..0011a9ff05259c51280b63713c70d340df558b7a 100644 (file)
@@ -395,6 +395,21 @@ done:
        exception_exit(prev_state);
 }
 
+void do_nmi(struct pt_regs *regs, int fault_num, unsigned long reason)
+{
+       switch (reason) {
+       case TILE_NMI_DUMP_STACK:
+               do_nmi_dump_stack(regs);
+               break;
+       default:
+               panic("Unexpected do_nmi type %ld", reason);
+               return;
+       }
+}
+
+/* Deprecated function currently only used here. */
+extern void _dump_stack(int dummy, ulong pc, ulong lr, ulong sp, ulong r52);
+
 void kernel_double_fault(int dummy, ulong pc, ulong lr, ulong sp, ulong r52)
 {
        _dump_stack(dummy, pc, lr, sp, r52);
index 8bb21eda07d8d0e3eff6a9fac689725ff939d31a..e63310c49742ca3036f9ed50556ff382bc36722b 100644 (file)
@@ -67,7 +67,7 @@ static inline int do_realtime(struct vdso_data *vdso, struct timespec *ts)
        u64 ns;
 
        do {
-               count = read_seqcount_begin(&vdso->tb_seq);
+               count = raw_read_seqcount_begin(&vdso->tb_seq);
                ts->tv_sec = vdso->wall_time_sec;
                ns = vdso->wall_time_snsec;
                ns += vgetsns(vdso);
@@ -86,7 +86,7 @@ static inline int do_monotonic(struct vdso_data *vdso, struct timespec *ts)
        u64 ns;
 
        do {
-               count = read_seqcount_begin(&vdso->tb_seq);
+               count = raw_read_seqcount_begin(&vdso->tb_seq);
                ts->tv_sec = vdso->monotonic_time_sec;
                ns = vdso->monotonic_time_snsec;
                ns += vgetsns(vdso);
@@ -105,7 +105,7 @@ static inline int do_realtime_coarse(struct vdso_data *vdso,
        unsigned count;
 
        do {
-               count = read_seqcount_begin(&vdso->tb_seq);
+               count = raw_read_seqcount_begin(&vdso->tb_seq);
                ts->tv_sec = vdso->wall_time_coarse_sec;
                ts->tv_nsec = vdso->wall_time_coarse_nsec;
        } while (unlikely(read_seqcount_retry(&vdso->tb_seq, count)));
@@ -119,7 +119,7 @@ static inline int do_monotonic_coarse(struct vdso_data *vdso,
        unsigned count;
 
        do {
-               count = read_seqcount_begin(&vdso->tb_seq);
+               count = raw_read_seqcount_begin(&vdso->tb_seq);
                ts->tv_sec = vdso->monotonic_time_coarse_sec;
                ts->tv_nsec = vdso->monotonic_time_coarse_nsec;
        } while (unlikely(read_seqcount_retry(&vdso->tb_seq, count)));
@@ -137,7 +137,7 @@ struct syscall_return_value __vdso_gettimeofday(struct timeval *tv,
        /* The use of the timezone is obsolete, normally tz is NULL. */
        if (unlikely(tz != NULL)) {
                do {
-                       count = read_seqcount_begin(&vdso->tz_seq);
+                       count = raw_read_seqcount_begin(&vdso->tz_seq);
                        tz->tz_minuteswest = vdso->tz_minuteswest;
                        tz->tz_dsttime = vdso->tz_dsttime;
                } while (unlikely(read_seqcount_retry(&vdso->tz_seq, count)));
index 82733c87d67ed0754fe6bef2b1063b5c22529b50..9d171ca4302c6761eab62b486aba39d86da2f873 100644 (file)
@@ -18,8 +18,6 @@
 
 /* arch/tile/lib/usercopy.S */
 #include <linux/uaccess.h>
-EXPORT_SYMBOL(strnlen_user_asm);
-EXPORT_SYMBOL(strncpy_from_user_asm);
 EXPORT_SYMBOL(clear_user_asm);
 EXPORT_SYMBOL(flush_user_asm);
 EXPORT_SYMBOL(finv_user_asm);
@@ -28,7 +26,6 @@ EXPORT_SYMBOL(finv_user_asm);
 #include <linux/kernel.h>
 #include <asm/processor.h>
 EXPORT_SYMBOL(current_text_addr);
-EXPORT_SYMBOL(dump_stack);
 
 /* arch/tile/kernel/head.S */
 EXPORT_SYMBOL(empty_zero_page);
index b34f79aada48efca7a1b350894f0dd7f4b98a7f4..88c2a53362e738110913134b840e1abe01df9fd9 100644 (file)
@@ -65,8 +65,17 @@ EXPORT_SYMBOL(arch_spin_trylock);
 void arch_spin_unlock_wait(arch_spinlock_t *lock)
 {
        u32 iterations = 0;
-       while (arch_spin_is_locked(lock))
+       int curr = READ_ONCE(lock->current_ticket);
+       int next = READ_ONCE(lock->next_ticket);
+
+       /* Return immediately if unlocked. */
+       if (next == curr)
+               return;
+
+       /* Wait until the current locker has released the lock. */
+       do {
                delay_backoff(iterations++);
+       } while (READ_ONCE(lock->current_ticket) == curr);
 }
 EXPORT_SYMBOL(arch_spin_unlock_wait);
 
index d6fb9581e980c67c54c8f8f4aad238827e41c083..c8d1f94ff1fe00e13f30a6c0e3ae51563a3226fa 100644 (file)
@@ -65,8 +65,17 @@ EXPORT_SYMBOL(arch_spin_trylock);
 void arch_spin_unlock_wait(arch_spinlock_t *lock)
 {
        u32 iterations = 0;
-       while (arch_spin_is_locked(lock))
+       u32 val = READ_ONCE(lock->lock);
+       u32 curr = arch_spin_current(val);
+
+       /* Return immediately if unlocked. */
+       if (arch_spin_next(val) == curr)
+               return;
+
+       /* Wait until the current locker has released the lock. */
+       do {
                delay_backoff(iterations++);
+       } while (arch_spin_current(READ_ONCE(lock->lock)) == curr);
 }
 EXPORT_SYMBOL(arch_spin_unlock_wait);
 
index 1bc162224638b48f7ba16c27f0317749d399184e..db93ad5fae2560e873b54e5e5257d720b986c8f5 100644 (file)
 
 /* Access user memory, but use MMU to avoid propagating kernel exceptions. */
 
-/*
- * strnlen_user_asm takes the pointer in r0, and the length bound in r1.
- * It returns the length, including the terminating NUL, or zero on exception.
- * If length is greater than the bound, returns one plus the bound.
- */
-STD_ENTRY(strnlen_user_asm)
-       { bz r1, 2f; addi r3, r0, -1 }  /* bias down to include NUL */
-1:      { lb_u r4, r0; addi r1, r1, -1 }
-       bz r4, 2f
-       { bnzt r1, 1b; addi r0, r0, 1 }
-2:      { sub r0, r0, r3; jrp lr }
-       STD_ENDPROC(strnlen_user_asm)
-       .pushsection .fixup,"ax"
-strnlen_user_fault:
-       { move r0, zero; jrp lr }
-       ENDPROC(strnlen_user_fault)
-       .section __ex_table,"a"
-       .align 4
-       .word 1b, strnlen_user_fault
-       .popsection
-
-/*
- * strncpy_from_user_asm takes the kernel target pointer in r0,
- * the userspace source pointer in r1, and the length bound (including
- * the trailing NUL) in r2.  On success, it returns the string length
- * (not including the trailing NUL), or -EFAULT on failure.
- */
-STD_ENTRY(strncpy_from_user_asm)
-       { bz r2, 2f; move r3, r0 }
-1:     { lb_u r4, r1; addi r1, r1, 1; addi r2, r2, -1 }
-       { sb r0, r4; addi r0, r0, 1 }
-       bz r4, 2f
-       bnzt r2, 1b
-       { sub r0, r0, r3; jrp lr }
-2:     addi r0, r0, -1   /* don't count the trailing NUL */
-       { sub r0, r0, r3; jrp lr }
-       STD_ENDPROC(strncpy_from_user_asm)
-       .pushsection .fixup,"ax"
-strncpy_from_user_fault:
-       { movei r0, -EFAULT; jrp lr }
-       ENDPROC(strncpy_from_user_fault)
-       .section __ex_table,"a"
-       .align 4
-       .word 1b, strncpy_from_user_fault
-       .popsection
-
 /*
  * clear_user_asm takes the user target address in r0 and the
  * number of bytes to zero in r1.
index b3b31a3306f8099ab38dfec13c97885306a8fbc0..9322dc551e91d2bcacb7236d5b320e93a184c851 100644 (file)
 
 /* Access user memory, but use MMU to avoid propagating kernel exceptions. */
 
-/*
- * strnlen_user_asm takes the pointer in r0, and the length bound in r1.
- * It returns the length, including the terminating NUL, or zero on exception.
- * If length is greater than the bound, returns one plus the bound.
- */
-STD_ENTRY(strnlen_user_asm)
-       { beqz r1, 2f; addi r3, r0, -1 }  /* bias down to include NUL */
-1:      { ld1u r4, r0; addi r1, r1, -1 }
-       beqz r4, 2f
-       { bnezt r1, 1b; addi r0, r0, 1 }
-2:      { sub r0, r0, r3; jrp lr }
-       STD_ENDPROC(strnlen_user_asm)
-       .pushsection .fixup,"ax"
-strnlen_user_fault:
-       { move r0, zero; jrp lr }
-       ENDPROC(strnlen_user_fault)
-       .section __ex_table,"a"
-       .align 8
-       .quad 1b, strnlen_user_fault
-       .popsection
-
-/*
- * strncpy_from_user_asm takes the kernel target pointer in r0,
- * the userspace source pointer in r1, and the length bound (including
- * the trailing NUL) in r2.  On success, it returns the string length
- * (not including the trailing NUL), or -EFAULT on failure.
- */
-STD_ENTRY(strncpy_from_user_asm)
-       { beqz r2, 2f; move r3, r0 }
-1:     { ld1u r4, r1; addi r1, r1, 1; addi r2, r2, -1 }
-       { st1 r0, r4; addi r0, r0, 1 }
-       beqz r4, 2f
-       bnezt r2, 1b
-       { sub r0, r0, r3; jrp lr }
-2:     addi r0, r0, -1   /* don't count the trailing NUL */
-       { sub r0, r0, r3; jrp lr }
-       STD_ENDPROC(strncpy_from_user_asm)
-       .pushsection .fixup,"ax"
-strncpy_from_user_fault:
-       { movei r0, -EFAULT; jrp lr }
-       ENDPROC(strncpy_from_user_fault)
-       .section __ex_table,"a"
-       .align 8
-       .quad 1b, strncpy_from_user_fault
-       .popsection
-
 /*
  * clear_user_asm takes the user target address in r0 and the
  * number of bytes to zero in r1.
index 3f4f58d34a92b6029615756892c1fe207d6f6701..13eac59bf16a5efbd9f67f600c4bb2cb7b4f8b74 100644 (file)
@@ -699,11 +699,10 @@ struct intvec_state do_page_fault_ics(struct pt_regs *regs, int fault_num,
  * interrupt away appropriately and return immediately.  We can't do
  * page faults for user code while in kernel mode.
  */
-void do_page_fault(struct pt_regs *regs, int fault_num,
-                  unsigned long address, unsigned long write)
+static inline void __do_page_fault(struct pt_regs *regs, int fault_num,
+                                  unsigned long address, unsigned long write)
 {
        int is_page_fault;
-       enum ctx_state prev_state = exception_enter();
 
 #ifdef CONFIG_KPROBES
        /*
@@ -713,7 +712,7 @@ void do_page_fault(struct pt_regs *regs, int fault_num,
         */
        if (notify_die(DIE_PAGE_FAULT, "page fault", regs, -1,
                       regs->faultnum, SIGSEGV) == NOTIFY_STOP)
-               goto done;
+               return;
 #endif
 
 #ifdef __tilegx__
@@ -835,18 +834,22 @@ void do_page_fault(struct pt_regs *regs, int fault_num,
                        async->is_fault = is_page_fault;
                        async->is_write = write;
                        async->address = address;
-                       goto done;
+                       return;
                }
        }
 #endif
 
        handle_page_fault(regs, fault_num, is_page_fault, address, write);
+}
 
-done:
+void do_page_fault(struct pt_regs *regs, int fault_num,
+                  unsigned long address, unsigned long write)
+{
+       enum ctx_state prev_state = exception_enter();
+       __do_page_fault(regs, fault_num, address, write);
        exception_exit(prev_state);
 }
 
-
 #if CHIP_HAS_TILE_DMA()
 /*
  * This routine effectively re-issues asynchronous page faults
index 9b90fdc4b151d325e9179424b1b4a44878e357ec..f6b911cc3923a1958f9c393a94799a879c4aa8d9 100644 (file)
@@ -185,9 +185,9 @@ static int hostaudio_open(struct inode *inode, struct file *file)
        int ret;
 
 #ifdef DEBUG
-       kparam_block_sysfs_write(dsp);
+       kernel_param_lock(THIS_MODULE);
        printk(KERN_DEBUG "hostaudio: open called (host: %s)\n", dsp);
-       kparam_unblock_sysfs_write(dsp);
+       kernel_param_unlock(THIS_MODULE);
 #endif
 
        state = kmalloc(sizeof(struct hostaudio_state), GFP_KERNEL);
@@ -199,11 +199,11 @@ static int hostaudio_open(struct inode *inode, struct file *file)
        if (file->f_mode & FMODE_WRITE)
                w = 1;
 
-       kparam_block_sysfs_write(dsp);
+       kernel_param_lock(THIS_MODULE);
        mutex_lock(&hostaudio_mutex);
        ret = os_open_file(dsp, of_set_rw(OPENFLAGS(), r, w), 0);
        mutex_unlock(&hostaudio_mutex);
-       kparam_unblock_sysfs_write(dsp);
+       kernel_param_unlock(THIS_MODULE);
 
        if (ret < 0) {
                kfree(state);
@@ -260,17 +260,17 @@ static int hostmixer_open_mixdev(struct inode *inode, struct file *file)
        if (file->f_mode & FMODE_WRITE)
                w = 1;
 
-       kparam_block_sysfs_write(mixer);
+       kernel_param_lock(THIS_MODULE);
        mutex_lock(&hostaudio_mutex);
        ret = os_open_file(mixer, of_set_rw(OPENFLAGS(), r, w), 0);
        mutex_unlock(&hostaudio_mutex);
-       kparam_unblock_sysfs_write(mixer);
+       kernel_param_unlock(THIS_MODULE);
 
        if (ret < 0) {
-               kparam_block_sysfs_write(dsp);
+               kernel_param_lock(THIS_MODULE);
                printk(KERN_ERR "hostaudio_open_mixdev failed to open '%s', "
                       "err = %d\n", dsp, -ret);
-               kparam_unblock_sysfs_write(dsp);
+               kernel_param_unlock(THIS_MODULE);
                kfree(state);
                return ret;
        }
@@ -326,10 +326,10 @@ MODULE_LICENSE("GPL");
 
 static int __init hostaudio_init_module(void)
 {
-       __kernel_param_lock();
+       kernel_param_lock(THIS_MODULE);
        printk(KERN_INFO "UML Audio Relay (host dsp = %s, host mixer = %s)\n",
               dsp, mixer);
-       __kernel_param_unlock();
+       kernel_param_unlock(THIS_MODULE);
 
        module_data.dev_audio = register_sound_dsp(&hostaudio_fops, -1);
        if (module_data.dev_audio < 0) {
diff --git a/arch/x86/configs/xen.config b/arch/x86/configs/xen.config
new file mode 100644 (file)
index 0000000..d9fc713
--- /dev/null
@@ -0,0 +1,28 @@
+# global x86 required specific stuff
+# On 32-bit HIGHMEM4G is not allowed
+CONFIG_HIGHMEM64G=y
+CONFIG_64BIT=y
+
+# These enable us to allow some of the
+# not so generic stuff below
+CONFIG_HYPERVISOR_GUEST=y
+CONFIG_PCI=y
+CONFIG_PCI_MSI=y
+CONFIG_X86_MCE=y
+CONFIG_ACPI_PROCESSOR=y
+CONFIG_CPU_FREQ=y
+
+# x86 xen specific config options
+CONFIG_XEN_PVH=y
+CONFIG_XEN_MAX_DOMAIN_MEMORY=500
+CONFIG_XEN_SAVE_RESTORE=y
+# CONFIG_XEN_DEBUG_FS is not set
+CONFIG_XEN_MCE_LOG=y
+CONFIG_XEN_ACPI_PROCESSOR=m
+# x86 specific backend drivers
+CONFIG_XEN_PCIDEV_BACKEND=m
+# x86 specific frontend drivers
+CONFIG_XEN_PCIDEV_FRONTEND=m
+# depends on MEMORY_HOTPLUG, arm64 doesn't enable this yet,
+# move to generic config if it ever does.
+CONFIG_XEN_BALLOON_MEMORY_HOTPLUG=y
index 2bfc8a7c88c11e1d4f20d4d3c58e62063c76328f..dccad38b59a8d741fe5f442b558b63de2b4d931d 100644 (file)
@@ -1537,7 +1537,7 @@ static void __exit aesni_exit(void)
        crypto_fpu_exit();
 }
 
-module_init(aesni_init);
+late_initcall(aesni_init);
 module_exit(aesni_exit);
 
 MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, Intel AES-NI instructions optimized");
index a4f62e6f2db2fe74cb78337ebeab9307b873c738..03d518e499a6d5a6184b19ae86b70adfcb3955ba 100644 (file)
@@ -297,7 +297,7 @@ static int mmu_audit_set(const char *val, const struct kernel_param *kp)
        return 0;
 }
 
-static struct kernel_param_ops audit_param_ops = {
+static const struct kernel_param_ops audit_param_ops = {
        .set = mmu_audit_set,
        .get = param_get_bool,
 };
index 7488cafab9553de5a11d860b8b57d81bec2f866f..020c101c255fec8386ba36c13c82ac8ddaf715b3 100644 (file)
@@ -104,7 +104,7 @@ static int param_set_local64(const char *val, const struct kernel_param *kp)
        return 0;
 }
 
-static struct kernel_param_ops param_ops_local64 = {
+static const struct kernel_param_ops param_ops_local64 = {
        .get = param_get_local64,
        .set = param_set_local64,
 };
index 55b6f15dac900af77a5ad7038cd98f3133d816a8..dda653ce7b24cfb959f668bdb4a676900ed7637d 100644 (file)
@@ -326,8 +326,8 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
                        goto out_put_request;
        }
 
-       ret = -EFAULT;
-       if (blk_fill_sghdr_rq(q, rq, hdr, mode))
+       ret = blk_fill_sghdr_rq(q, rq, hdr, mode);
+       if (ret < 0)
                goto out_free_cdb;
 
        ret = 0;
index fcb7807ea8b73de79163bb99c20091f4b202da0d..10561ce16ed135165cdbc8e558cb2aaea263d13b 100644 (file)
@@ -660,8 +660,10 @@ static int add_region_before(u64 start, u64 end, u8 space_id,
                return -ENOMEM;
 
        error = request_range(start, end, space_id, flags, desc);
-       if (error)
+       if (error) {
+               kfree(reg);
                return error;
+       }
 
        reg->start = start;
        reg->end = end;
index e645852396ba44430d77273b0a513cb5e8deb78a..f3f6d167f3f1f015fec8e36ede66b7e8e274338f 100644 (file)
@@ -129,9 +129,9 @@ EXPORT_SYMBOL_GPL(device_property_present);
 bool fwnode_property_present(struct fwnode_handle *fwnode, const char *propname)
 {
        if (is_of_node(fwnode))
-               return of_property_read_bool(of_node(fwnode), propname);
+               return of_property_read_bool(to_of_node(fwnode), propname);
        else if (is_acpi_node(fwnode))
-               return !acpi_dev_prop_get(acpi_node(fwnode), propname, NULL);
+               return !acpi_dev_prop_get(to_acpi_node(fwnode), propname, NULL);
 
        return !!pset_prop_get(to_pset(fwnode), propname);
 }
@@ -286,10 +286,10 @@ EXPORT_SYMBOL_GPL(device_property_read_string);
 ({ \
        int _ret_; \
        if (is_of_node(_fwnode_)) \
-               _ret_ = OF_DEV_PROP_READ_ARRAY(of_node(_fwnode_), _propname_, \
+               _ret_ = OF_DEV_PROP_READ_ARRAY(to_of_node(_fwnode_), _propname_, \
                                               _type_, _val_, _nval_); \
        else if (is_acpi_node(_fwnode_)) \
-               _ret_ = acpi_dev_prop_read(acpi_node(_fwnode_), _propname_, \
+               _ret_ = acpi_dev_prop_read(to_acpi_node(_fwnode_), _propname_, \
                                           _proptype_, _val_, _nval_); \
        else \
                _ret_ = pset_prop_read_array(to_pset(_fwnode_), _propname_, \
@@ -425,11 +425,11 @@ int fwnode_property_read_string_array(struct fwnode_handle *fwnode,
 {
        if (is_of_node(fwnode))
                return val ?
-                       of_property_read_string_array(of_node(fwnode), propname,
-                                                     val, nval) :
-                       of_property_count_strings(of_node(fwnode), propname);
+                       of_property_read_string_array(to_of_node(fwnode),
+                                                     propname, val, nval) :
+                       of_property_count_strings(to_of_node(fwnode), propname);
        else if (is_acpi_node(fwnode))
-               return acpi_dev_prop_read(acpi_node(fwnode), propname,
+               return acpi_dev_prop_read(to_acpi_node(fwnode), propname,
                                          DEV_PROP_STRING, val, nval);
 
        return pset_prop_read_array(to_pset(fwnode), propname,
@@ -456,9 +456,9 @@ int fwnode_property_read_string(struct fwnode_handle *fwnode,
                                const char *propname, const char **val)
 {
        if (is_of_node(fwnode))
-               return of_property_read_string(of_node(fwnode), propname, val);
+               return of_property_read_string(to_of_node(fwnode), propname, val);
        else if (is_acpi_node(fwnode))
-               return acpi_dev_prop_read(acpi_node(fwnode), propname,
+               return acpi_dev_prop_read(to_acpi_node(fwnode), propname,
                                          DEV_PROP_STRING, val, 1);
 
        return -ENXIO;
@@ -476,13 +476,13 @@ struct fwnode_handle *device_get_next_child_node(struct device *dev,
        if (IS_ENABLED(CONFIG_OF) && dev->of_node) {
                struct device_node *node;
 
-               node = of_get_next_available_child(dev->of_node, of_node(child));
+               node = of_get_next_available_child(dev->of_node, to_of_node(child));
                if (node)
                        return &node->fwnode;
        } else if (IS_ENABLED(CONFIG_ACPI)) {
                struct acpi_device *node;
 
-               node = acpi_get_next_child(dev, acpi_node(child));
+               node = acpi_get_next_child(dev, to_acpi_node(child));
                if (node)
                        return acpi_fwnode_handle(node);
        }
@@ -501,7 +501,7 @@ EXPORT_SYMBOL_GPL(device_get_next_child_node);
 void fwnode_handle_put(struct fwnode_handle *fwnode)
 {
        if (is_of_node(fwnode))
-               of_node_put(of_node(fwnode));
+               of_node_put(to_of_node(fwnode));
 }
 EXPORT_SYMBOL_GPL(fwnode_handle_put);
 
index 6f9b7534928e2a8db2d35732d6e3511a1c4455bc..69de41a87b74311b2b7478fb0226b8bc253c6ebc 100644 (file)
@@ -99,7 +99,7 @@ static int null_set_queue_mode(const char *str, const struct kernel_param *kp)
        return null_param_store_val(str, &queue_mode, NULL_Q_BIO, NULL_Q_MQ);
 }
 
-static struct kernel_param_ops null_queue_mode_param_ops = {
+static const struct kernel_param_ops null_queue_mode_param_ops = {
        .set    = null_set_queue_mode,
        .get    = param_get_int,
 };
@@ -127,7 +127,7 @@ static int null_set_irqmode(const char *str, const struct kernel_param *kp)
                                        NULL_IRQ_TIMER);
 }
 
-static struct kernel_param_ops null_irqmode_param_ops = {
+static const struct kernel_param_ops null_irqmode_param_ops = {
        .set    = null_set_irqmode,
        .get    = param_get_int,
 };
index e5112714188fb46bcbf9760ee2c394fdeb733890..34338d7438f56895f76ac8b110d8e92a120b3202 100644 (file)
@@ -193,6 +193,13 @@ static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
        return 0;
 }
 
+static void nvme_admin_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
+{
+       struct nvme_queue *nvmeq = hctx->driver_data;
+
+       nvmeq->tags = NULL;
+}
+
 static int nvme_admin_init_request(void *data, struct request *req,
                                unsigned int hctx_idx, unsigned int rq_idx,
                                unsigned int numa_node)
@@ -606,7 +613,10 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx,
                        return;
                }
                if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
-                       req->errors = status;
+                       if (cmd_rq->ctx == CMD_CTX_CANCELLED)
+                               req->errors = -EINTR;
+                       else
+                               req->errors = status;
                } else {
                        req->errors = nvme_error_status(status);
                }
@@ -1161,12 +1171,13 @@ static int adapter_delete_sq(struct nvme_dev *dev, u16 sqid)
 
 int nvme_identify_ctrl(struct nvme_dev *dev, struct nvme_id_ctrl **id)
 {
-       struct nvme_command c = {
-               .identify.opcode = nvme_admin_identify,
-               .identify.cns = cpu_to_le32(1),
-       };
+       struct nvme_command c = { };
        int error;
 
+       /* gcc-4.4.4 (at least) has issues with initializers and anon unions */
+       c.identify.opcode = nvme_admin_identify;
+       c.identify.cns = cpu_to_le32(1);
+
        *id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL);
        if (!*id)
                return -ENOMEM;
@@ -1181,12 +1192,13 @@ int nvme_identify_ctrl(struct nvme_dev *dev, struct nvme_id_ctrl **id)
 int nvme_identify_ns(struct nvme_dev *dev, unsigned nsid,
                struct nvme_id_ns **id)
 {
-       struct nvme_command c = {
-               .identify.opcode = nvme_admin_identify,
-               .identify.nsid = cpu_to_le32(nsid),
-       };
+       struct nvme_command c = { };
        int error;
 
+       /* gcc-4.4.4 (at least) has issues with initializers and anon unions */
+       c.identify.opcode = nvme_admin_identify,
+       c.identify.nsid = cpu_to_le32(nsid),
+
        *id = kmalloc(sizeof(struct nvme_id_ns), GFP_KERNEL);
        if (!*id)
                return -ENOMEM;
@@ -1230,14 +1242,14 @@ int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11,
 
 int nvme_get_log_page(struct nvme_dev *dev, struct nvme_smart_log **log)
 {
-       struct nvme_command c = {
-               .common.opcode = nvme_admin_get_log_page,
-               .common.nsid = cpu_to_le32(0xFFFFFFFF),
-               .common.cdw10[0] = cpu_to_le32(
+       struct nvme_command c = { };
+       int error;
+
+       c.common.opcode = nvme_admin_get_log_page,
+       c.common.nsid = cpu_to_le32(0xFFFFFFFF),
+       c.common.cdw10[0] = cpu_to_le32(
                        (((sizeof(struct nvme_smart_log) / 4) - 1) << 16) |
                         NVME_LOG_SMART),
-       };
-       int error;
 
        *log = kmalloc(sizeof(struct nvme_smart_log), GFP_KERNEL);
        if (!*log)
@@ -1606,6 +1618,7 @@ static struct blk_mq_ops nvme_mq_admin_ops = {
        .queue_rq       = nvme_queue_rq,
        .map_queue      = blk_mq_map_queue,
        .init_hctx      = nvme_admin_init_hctx,
+       .exit_hctx      = nvme_admin_exit_hctx,
        .init_request   = nvme_admin_init_request,
        .timeout        = nvme_timeout,
 };
@@ -1648,6 +1661,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
                }
                if (!blk_get_queue(dev->admin_q)) {
                        nvme_dev_remove_admin(dev);
+                       dev->admin_q = NULL;
                        return -ENODEV;
                }
        } else
@@ -2349,19 +2363,20 @@ static int nvme_dev_add(struct nvme_dev *dev)
        }
        kfree(ctrl);
 
-       dev->tagset.ops = &nvme_mq_ops;
-       dev->tagset.nr_hw_queues = dev->online_queues - 1;
-       dev->tagset.timeout = NVME_IO_TIMEOUT;
-       dev->tagset.numa_node = dev_to_node(dev->dev);
-       dev->tagset.queue_depth =
+       if (!dev->tagset.tags) {
+               dev->tagset.ops = &nvme_mq_ops;
+               dev->tagset.nr_hw_queues = dev->online_queues - 1;
+               dev->tagset.timeout = NVME_IO_TIMEOUT;
+               dev->tagset.numa_node = dev_to_node(dev->dev);
+               dev->tagset.queue_depth =
                                min_t(int, dev->q_depth, BLK_MQ_MAX_DEPTH) - 1;
-       dev->tagset.cmd_size = nvme_cmd_size(dev);
-       dev->tagset.flags = BLK_MQ_F_SHOULD_MERGE;
-       dev->tagset.driver_data = dev;
-
-       if (blk_mq_alloc_tag_set(&dev->tagset))
-               return 0;
+               dev->tagset.cmd_size = nvme_cmd_size(dev);
+               dev->tagset.flags = BLK_MQ_F_SHOULD_MERGE;
+               dev->tagset.driver_data = dev;
 
+               if (blk_mq_alloc_tag_set(&dev->tagset))
+                       return 0;
+       }
        schedule_work(&dev->scan_work);
        return 0;
 }
@@ -2734,8 +2749,10 @@ static void nvme_free_dev(struct kref *kref)
        put_device(dev->device);
        nvme_free_namespaces(dev);
        nvme_release_instance(dev);
-       blk_mq_free_tag_set(&dev->tagset);
-       blk_put_queue(dev->admin_q);
+       if (dev->tagset.tags)
+               blk_mq_free_tag_set(&dev->tagset);
+       if (dev->admin_q)
+               blk_put_queue(dev->admin_q);
        kfree(dev->queues);
        kfree(dev->entry);
        kfree(dev);
@@ -2866,6 +2883,9 @@ static int nvme_dev_start(struct nvme_dev *dev)
 
  free_tags:
        nvme_dev_remove_admin(dev);
+       blk_put_queue(dev->admin_q);
+       dev->admin_q = NULL;
+       dev->queues[0]->tags = NULL;
  disable:
        nvme_disable_queue(dev, 0);
        nvme_dev_list_remove(dev);
@@ -2907,25 +2927,43 @@ static int nvme_dev_resume(struct nvme_dev *dev)
                spin_unlock(&dev_list_lock);
        } else {
                nvme_unfreeze_queues(dev);
-               schedule_work(&dev->scan_work);
+               nvme_dev_add(dev);
                nvme_set_irq_hints(dev);
        }
        return 0;
 }
 
+static void nvme_dead_ctrl(struct nvme_dev *dev)
+{
+       dev_warn(dev->dev, "Device failed to resume\n");
+       kref_get(&dev->kref);
+       if (IS_ERR(kthread_run(nvme_remove_dead_ctrl, dev, "nvme%d",
+                                               dev->instance))) {
+               dev_err(dev->dev,
+                       "Failed to start controller remove task\n");
+               kref_put(&dev->kref, nvme_free_dev);
+       }
+}
+
 static void nvme_dev_reset(struct nvme_dev *dev)
 {
+       bool in_probe = work_busy(&dev->probe_work);
+
        nvme_dev_shutdown(dev);
-       if (nvme_dev_resume(dev)) {
-               dev_warn(dev->dev, "Device failed to resume\n");
-               kref_get(&dev->kref);
-               if (IS_ERR(kthread_run(nvme_remove_dead_ctrl, dev, "nvme%d",
-                                                       dev->instance))) {
-                       dev_err(dev->dev,
-                               "Failed to start controller remove task\n");
-                       kref_put(&dev->kref, nvme_free_dev);
-               }
+
+       /* Synchronize with device probe so that work will see failure status
+        * and exit gracefully without trying to schedule another reset */
+       flush_work(&dev->probe_work);
+
+       /* Fail this device if reset occured during probe to avoid
+        * infinite initialization loops. */
+       if (in_probe) {
+               nvme_dead_ctrl(dev);
+               return;
        }
+       /* Schedule device resume asynchronously so the reset work is available
+        * to cleanup errors that may occur during reinitialization */
+       schedule_work(&dev->probe_work);
 }
 
 static void nvme_reset_failed_dev(struct work_struct *ws)
@@ -2957,6 +2995,7 @@ static int nvme_reset(struct nvme_dev *dev)
 
        if (!ret) {
                flush_work(&dev->reset_work);
+               flush_work(&dev->probe_work);
                return 0;
        }
 
@@ -3053,26 +3092,9 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 static void nvme_async_probe(struct work_struct *work)
 {
        struct nvme_dev *dev = container_of(work, struct nvme_dev, probe_work);
-       int result;
 
-       result = nvme_dev_start(dev);
-       if (result)
-               goto reset;
-
-       if (dev->online_queues > 1)
-               result = nvme_dev_add(dev);
-       if (result)
-               goto reset;
-
-       nvme_set_irq_hints(dev);
-       return;
- reset:
-       spin_lock(&dev_list_lock);
-       if (!work_busy(&dev->reset_work)) {
-               dev->reset_workfn = nvme_reset_failed_dev;
-               queue_work(nvme_workq, &dev->reset_work);
-       }
-       spin_unlock(&dev_list_lock);
+       if (nvme_dev_resume(dev) && !work_busy(&dev->reset_work))
+               nvme_dead_ctrl(dev);
 }
 
 static void nvme_reset_notify(struct pci_dev *pdev, bool prepare)
@@ -3104,8 +3126,8 @@ static void nvme_remove(struct pci_dev *pdev)
        flush_work(&dev->reset_work);
        flush_work(&dev->scan_work);
        device_remove_file(dev->device, &dev_attr_reset_controller);
-       nvme_dev_shutdown(dev);
        nvme_dev_remove(dev);
+       nvme_dev_shutdown(dev);
        nvme_dev_remove_admin(dev);
        device_destroy(nvme_class, MKDEV(nvme_char_major, dev->instance));
        nvme_free_queues(dev, 0);
index 713fc9ff11492766efcb7a4795b4a1750ceb9707..ced96777b677b9bcddd65bae004a7a51b5cf0dc3 100644 (file)
@@ -83,6 +83,13 @@ module_param_named(max_persistent_grants, xen_blkif_max_pgrants, int, 0644);
 MODULE_PARM_DESC(max_persistent_grants,
                  "Maximum number of grants to map persistently");
 
+/*
+ * Maximum order of pages to be used for the shared ring between front and
+ * backend, 4KB page granularity is used.
+ */
+unsigned int xen_blkif_max_ring_order = XENBUS_MAX_RING_PAGE_ORDER;
+module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, S_IRUGO);
+MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring");
 /*
  * The LRU mechanism to clean the lists of persistent grants needs to
  * be executed periodically. The time interval between consecutive executions
@@ -729,7 +736,7 @@ static void xen_blkbk_unmap_and_respond(struct pending_req *req)
        struct grant_page **pages = req->segments;
        unsigned int invcount;
 
-       invcount = xen_blkbk_unmap_prepare(blkif, pages, req->nr_pages,
+       invcount = xen_blkbk_unmap_prepare(blkif, pages, req->nr_segs,
                                           req->unmap, req->unmap_pages);
 
        work->data = req;
@@ -915,7 +922,7 @@ static int xen_blkbk_map_seg(struct pending_req *pending_req)
        int rc;
 
        rc = xen_blkbk_map(pending_req->blkif, pending_req->segments,
-                          pending_req->nr_pages,
+                          pending_req->nr_segs,
                           (pending_req->operation != BLKIF_OP_READ));
 
        return rc;
@@ -931,7 +938,7 @@ static int xen_blkbk_parse_indirect(struct blkif_request *req,
        int indirect_grefs, rc, n, nseg, i;
        struct blkif_request_segment *segments = NULL;
 
-       nseg = pending_req->nr_pages;
+       nseg = pending_req->nr_segs;
        indirect_grefs = INDIRECT_PAGES(nseg);
        BUG_ON(indirect_grefs > BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST);
 
@@ -1251,7 +1258,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
        pending_req->id        = req->u.rw.id;
        pending_req->operation = req_operation;
        pending_req->status    = BLKIF_RSP_OKAY;
-       pending_req->nr_pages  = nseg;
+       pending_req->nr_segs   = nseg;
 
        if (req->operation != BLKIF_OP_INDIRECT) {
                preq.dev               = req->u.rw.handle;
@@ -1372,7 +1379,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
 
  fail_flush:
        xen_blkbk_unmap(blkif, pending_req->segments,
-                       pending_req->nr_pages);
+                       pending_req->nr_segs);
  fail_response:
        /* Haven't submitted any bio's yet. */
        make_response(blkif, req->u.rw.id, req_operation, BLKIF_RSP_ERROR);
@@ -1438,6 +1445,12 @@ static int __init xen_blkif_init(void)
        if (!xen_domain())
                return -ENODEV;
 
+       if (xen_blkif_max_ring_order > XENBUS_MAX_RING_PAGE_ORDER) {
+               pr_info("Invalid max_ring_order (%d), will use default max: %d.\n",
+                       xen_blkif_max_ring_order, XENBUS_MAX_RING_PAGE_ORDER);
+               xen_blkif_max_ring_order = XENBUS_MAX_RING_PAGE_ORDER;
+       }
+
        rc = xen_blkif_interface_init();
        if (rc)
                goto failed_init;
index f620b5d3f77c0270585f5cc4d86ba71bb4320092..45a044a53d1e562db4e606623840d0c667aa56e3 100644 (file)
@@ -44,6 +44,7 @@
 #include <xen/interface/io/blkif.h>
 #include <xen/interface/io/protocols.h>
 
+extern unsigned int xen_blkif_max_ring_order;
 /*
  * This is the maximum number of segments that would be allowed in indirect
  * requests. This value will also be passed to the frontend.
@@ -248,7 +249,7 @@ struct backend_info;
 #define PERSISTENT_GNT_WAS_ACTIVE      1
 
 /* Number of requests that we can fit in a ring */
-#define XEN_BLKIF_REQS                 32
+#define XEN_BLKIF_REQS_PER_PAGE                32
 
 struct persistent_gnt {
        struct page *page;
@@ -320,6 +321,7 @@ struct xen_blkif {
        struct work_struct      free_work;
        /* Thread shutdown wait queue. */
        wait_queue_head_t       shutdown_wq;
+       unsigned int nr_ring_pages;
 };
 
 struct seg_buf {
@@ -343,7 +345,7 @@ struct grant_page {
 struct pending_req {
        struct xen_blkif        *blkif;
        u64                     id;
-       int                     nr_pages;
+       int                     nr_segs;
        atomic_t                pendcnt;
        unsigned short          operation;
        int                     status;
index 6ab69ad61ee126c6f62a63f77674b13489de7f73..deb3f001791f159c5c7ebce19814de31e3106a5e 100644 (file)
@@ -25,6 +25,7 @@
 
 /* Enlarge the array size in order to fully show blkback name. */
 #define BLKBACK_NAME_LEN (20)
+#define RINGREF_NAME_LEN (20)
 
 struct backend_info {
        struct xenbus_device    *dev;
@@ -124,8 +125,6 @@ static void xen_update_blkif_status(struct xen_blkif *blkif)
 static struct xen_blkif *xen_blkif_alloc(domid_t domid)
 {
        struct xen_blkif *blkif;
-       struct pending_req *req, *n;
-       int i, j;
 
        BUILD_BUG_ON(MAX_INDIRECT_PAGES > BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST);
 
@@ -151,55 +150,15 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
 
        INIT_LIST_HEAD(&blkif->pending_free);
        INIT_WORK(&blkif->free_work, xen_blkif_deferred_free);
-
-       for (i = 0; i < XEN_BLKIF_REQS; i++) {
-               req = kzalloc(sizeof(*req), GFP_KERNEL);
-               if (!req)
-                       goto fail;
-               list_add_tail(&req->free_list,
-                             &blkif->pending_free);
-               for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
-                       req->segments[j] = kzalloc(sizeof(*req->segments[0]),
-                                                  GFP_KERNEL);
-                       if (!req->segments[j])
-                               goto fail;
-               }
-               for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
-                       req->indirect_pages[j] = kzalloc(sizeof(*req->indirect_pages[0]),
-                                                        GFP_KERNEL);
-                       if (!req->indirect_pages[j])
-                               goto fail;
-               }
-       }
        spin_lock_init(&blkif->pending_free_lock);
        init_waitqueue_head(&blkif->pending_free_wq);
        init_waitqueue_head(&blkif->shutdown_wq);
 
        return blkif;
-
-fail:
-       list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) {
-               list_del(&req->free_list);
-               for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
-                       if (!req->segments[j])
-                               break;
-                       kfree(req->segments[j]);
-               }
-               for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
-                       if (!req->indirect_pages[j])
-                               break;
-                       kfree(req->indirect_pages[j]);
-               }
-               kfree(req);
-       }
-
-       kmem_cache_free(xen_blkif_cachep, blkif);
-
-       return ERR_PTR(-ENOMEM);
 }
 
-static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t gref,
-                        unsigned int evtchn)
+static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t *gref,
+                        unsigned int nr_grefs, unsigned int evtchn)
 {
        int err;
 
@@ -207,7 +166,7 @@ static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t gref,
        if (blkif->irq)
                return 0;
 
-       err = xenbus_map_ring_valloc(blkif->be->dev, &gref, 1,
+       err = xenbus_map_ring_valloc(blkif->be->dev, gref, nr_grefs,
                                     &blkif->blk_ring);
        if (err < 0)
                return err;
@@ -217,21 +176,21 @@ static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t gref,
        {
                struct blkif_sring *sring;
                sring = (struct blkif_sring *)blkif->blk_ring;
-               BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE);
+               BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE * nr_grefs);
                break;
        }
        case BLKIF_PROTOCOL_X86_32:
        {
                struct blkif_x86_32_sring *sring_x86_32;
                sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring;
-               BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE);
+               BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE * nr_grefs);
                break;
        }
        case BLKIF_PROTOCOL_X86_64:
        {
                struct blkif_x86_64_sring *sring_x86_64;
                sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring;
-               BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE);
+               BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE * nr_grefs);
                break;
        }
        default:
@@ -312,7 +271,7 @@ static void xen_blkif_free(struct xen_blkif *blkif)
                i++;
        }
 
-       WARN_ON(i != XEN_BLKIF_REQS);
+       WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
 
        kmem_cache_free(xen_blkif_cachep, blkif);
 }
@@ -597,6 +556,11 @@ static int xen_blkbk_probe(struct xenbus_device *dev,
        if (err)
                goto fail;
 
+       err = xenbus_printf(XBT_NIL, dev->nodename, "max-ring-page-order", "%u",
+                           xen_blkif_max_ring_order);
+       if (err)
+               pr_warn("%s write out 'max-ring-page-order' failed\n", __func__);
+
        err = xenbus_switch_state(dev, XenbusStateInitWait);
        if (err)
                goto fail;
@@ -860,22 +824,66 @@ again:
 static int connect_ring(struct backend_info *be)
 {
        struct xenbus_device *dev = be->dev;
-       unsigned long ring_ref;
-       unsigned int evtchn;
+       unsigned int ring_ref[XENBUS_MAX_RING_PAGES];
+       unsigned int evtchn, nr_grefs, ring_page_order;
        unsigned int pers_grants;
        char protocol[64] = "";
-       int err;
+       struct pending_req *req, *n;
+       int err, i, j;
 
        pr_debug("%s %s\n", __func__, dev->otherend);
 
-       err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu",
-                           &ring_ref, "event-channel", "%u", &evtchn, NULL);
-       if (err) {
-               xenbus_dev_fatal(dev, err,
-                                "reading %s/ring-ref and event-channel",
+       err = xenbus_scanf(XBT_NIL, dev->otherend, "event-channel", "%u",
+                         &evtchn);
+       if (err != 1) {
+               err = -EINVAL;
+               xenbus_dev_fatal(dev, err, "reading %s/event-channel",
                                 dev->otherend);
                return err;
        }
+       pr_info("event-channel %u\n", evtchn);
+
+       err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-page-order", "%u",
+                         &ring_page_order);
+       if (err != 1) {
+               err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-ref",
+                                 "%u", &ring_ref[0]);
+               if (err != 1) {
+                       err = -EINVAL;
+                       xenbus_dev_fatal(dev, err, "reading %s/ring-ref",
+                                        dev->otherend);
+                       return err;
+               }
+               nr_grefs = 1;
+               pr_info("%s:using single page: ring-ref %d\n", dev->otherend,
+                       ring_ref[0]);
+       } else {
+               unsigned int i;
+
+               if (ring_page_order > xen_blkif_max_ring_order) {
+                       err = -EINVAL;
+                       xenbus_dev_fatal(dev, err, "%s/request %d ring page order exceed max:%d",
+                                        dev->otherend, ring_page_order,
+                                        xen_blkif_max_ring_order);
+                       return err;
+               }
+
+               nr_grefs = 1 << ring_page_order;
+               for (i = 0; i < nr_grefs; i++) {
+                       char ring_ref_name[RINGREF_NAME_LEN];
+
+                       snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
+                       err = xenbus_scanf(XBT_NIL, dev->otherend, ring_ref_name,
+                                          "%u", &ring_ref[i]);
+                       if (err != 1) {
+                               err = -EINVAL;
+                               xenbus_dev_fatal(dev, err, "reading %s/%s",
+                                                dev->otherend, ring_ref_name);
+                               return err;
+                       }
+                       pr_info("ring-ref%u: %u\n", i, ring_ref[i]);
+               }
+       }
 
        be->blkif->blk_protocol = BLKIF_PROTOCOL_DEFAULT;
        err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
@@ -900,20 +908,55 @@ static int connect_ring(struct backend_info *be)
 
        be->blkif->vbd.feature_gnt_persistent = pers_grants;
        be->blkif->vbd.overflow_max_grants = 0;
+       be->blkif->nr_ring_pages = nr_grefs;
 
-       pr_info("ring-ref %ld, event-channel %d, protocol %d (%s) %s\n",
-               ring_ref, evtchn, be->blkif->blk_protocol, protocol,
+       pr_info("ring-pages:%d, event-channel %d, protocol %d (%s) %s\n",
+               nr_grefs, evtchn, be->blkif->blk_protocol, protocol,
                pers_grants ? "persistent grants" : "");
 
+       for (i = 0; i < nr_grefs * XEN_BLKIF_REQS_PER_PAGE; i++) {
+               req = kzalloc(sizeof(*req), GFP_KERNEL);
+               if (!req)
+                       goto fail;
+               list_add_tail(&req->free_list, &be->blkif->pending_free);
+               for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
+                       req->segments[j] = kzalloc(sizeof(*req->segments[0]), GFP_KERNEL);
+                       if (!req->segments[j])
+                               goto fail;
+               }
+               for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
+                       req->indirect_pages[j] = kzalloc(sizeof(*req->indirect_pages[0]),
+                                                        GFP_KERNEL);
+                       if (!req->indirect_pages[j])
+                               goto fail;
+               }
+       }
+
        /* Map the shared frame, irq etc. */
-       err = xen_blkif_map(be->blkif, ring_ref, evtchn);
+       err = xen_blkif_map(be->blkif, ring_ref, nr_grefs, evtchn);
        if (err) {
-               xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u",
-                                ring_ref, evtchn);
+               xenbus_dev_fatal(dev, err, "mapping ring-ref port %u", evtchn);
                return err;
        }
 
        return 0;
+
+fail:
+       list_for_each_entry_safe(req, n, &be->blkif->pending_free, free_list) {
+               list_del(&req->free_list);
+               for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
+                       if (!req->segments[j])
+                               break;
+                       kfree(req->segments[j]);
+               }
+               for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
+                       if (!req->indirect_pages[j])
+                               break;
+                       kfree(req->indirect_pages[j]);
+               }
+               kfree(req);
+       }
+       return -ENOMEM;
 }
 
 static const struct xenbus_device_id xen_blkbk_ids[] = {
index 2c61cf8c6f61d1086ea60155e3b1c12b9bb706bf..6d89ed35d80c0caaf8bf57ba82c7e9f3a9194bb9 100644 (file)
@@ -98,7 +98,21 @@ static unsigned int xen_blkif_max_segments = 32;
 module_param_named(max, xen_blkif_max_segments, int, S_IRUGO);
 MODULE_PARM_DESC(max, "Maximum amount of segments in indirect requests (default is 32)");
 
-#define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE)
+/*
+ * Maximum order of pages to be used for the shared ring between front and
+ * backend, 4KB page granularity is used.
+ */
+static unsigned int xen_blkif_max_ring_order;
+module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, S_IRUGO);
+MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring");
+
+#define BLK_RING_SIZE(info) __CONST_RING_SIZE(blkif, PAGE_SIZE * (info)->nr_ring_pages)
+#define BLK_MAX_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE * XENBUS_MAX_RING_PAGES)
+/*
+ * ring-ref%i i=(-1UL) would take 11 characters + 'ring-ref' is 8, so 19
+ * characters are enough. Define to 20 to keep consist with backend.
+ */
+#define RINGREF_NAME_LEN (20)
 
 /*
  * We have one of these per vbd, whether ide, scsi or 'other'.  They
@@ -114,13 +128,14 @@ struct blkfront_info
        int vdevice;
        blkif_vdev_t handle;
        enum blkif_state connected;
-       int ring_ref;
+       int ring_ref[XENBUS_MAX_RING_PAGES];
+       unsigned int nr_ring_pages;
        struct blkif_front_ring ring;
        unsigned int evtchn, irq;
        struct request_queue *rq;
        struct work_struct work;
        struct gnttab_free_callback callback;
-       struct blk_shadow shadow[BLK_RING_SIZE];
+       struct blk_shadow shadow[BLK_MAX_RING_SIZE];
        struct list_head grants;
        struct list_head indirect_pages;
        unsigned int persistent_gnts_c;
@@ -139,8 +154,6 @@ static unsigned int nr_minors;
 static unsigned long *minors;
 static DEFINE_SPINLOCK(minor_lock);
 
-#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
-       (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
 #define GRANT_INVALID_REF      0
 
 #define PARTS_PER_DISK         16
@@ -170,7 +183,7 @@ static int blkfront_setup_indirect(struct blkfront_info *info);
 static int get_id_from_freelist(struct blkfront_info *info)
 {
        unsigned long free = info->shadow_free;
-       BUG_ON(free >= BLK_RING_SIZE);
+       BUG_ON(free >= BLK_RING_SIZE(info));
        info->shadow_free = info->shadow[free].req.u.rw.id;
        info->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */
        return free;
@@ -983,7 +996,7 @@ static void blkif_free(struct blkfront_info *info, int suspend)
                }
        }
 
-       for (i = 0; i < BLK_RING_SIZE; i++) {
+       for (i = 0; i < BLK_RING_SIZE(info); i++) {
                /*
                 * Clear persistent grants present in requests already
                 * on the shared ring
@@ -1033,12 +1046,15 @@ free_shadow:
        flush_work(&info->work);
 
        /* Free resources associated with old device channel. */
-       if (info->ring_ref != GRANT_INVALID_REF) {
-               gnttab_end_foreign_access(info->ring_ref, 0,
-                                         (unsigned long)info->ring.sring);
-               info->ring_ref = GRANT_INVALID_REF;
-               info->ring.sring = NULL;
+       for (i = 0; i < info->nr_ring_pages; i++) {
+               if (info->ring_ref[i] != GRANT_INVALID_REF) {
+                       gnttab_end_foreign_access(info->ring_ref[i], 0, 0);
+                       info->ring_ref[i] = GRANT_INVALID_REF;
+               }
        }
+       free_pages((unsigned long)info->ring.sring, get_order(info->nr_ring_pages * PAGE_SIZE));
+       info->ring.sring = NULL;
+
        if (info->irq)
                unbind_from_irqhandler(info->irq, info);
        info->evtchn = info->irq = 0;
@@ -1058,12 +1074,6 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
                s->req.u.indirect.nr_segments : s->req.u.rw.nr_segments;
 
        if (bret->operation == BLKIF_OP_READ && info->feature_persistent) {
-               /*
-                * Copy the data received from the backend into the bvec.
-                * Since bv_offset can be different than 0, and bv_len different
-                * than PAGE_SIZE, we have to keep track of the current offset,
-                * to be sure we are copying the data from the right shared page.
-                */
                for_each_sg(s->sg, sg, nseg, i) {
                        BUG_ON(sg->offset + sg->length > PAGE_SIZE);
                        shared_data = kmap_atomic(
@@ -1157,7 +1167,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
                 * never have given to it (we stamp it up to BLK_RING_SIZE -
                 * look in get_id_from_freelist.
                 */
-               if (id >= BLK_RING_SIZE) {
+               if (id >= BLK_RING_SIZE(info)) {
                        WARN(1, "%s: response to %s has incorrect id (%ld)\n",
                             info->gd->disk_name, op_name(bret->operation), id);
                        /* We can't safely get the 'struct request' as
@@ -1245,26 +1255,30 @@ static int setup_blkring(struct xenbus_device *dev,
                         struct blkfront_info *info)
 {
        struct blkif_sring *sring;
-       grant_ref_t gref;
-       int err;
+       int err, i;
+       unsigned long ring_size = info->nr_ring_pages * PAGE_SIZE;
+       grant_ref_t gref[XENBUS_MAX_RING_PAGES];
 
-       info->ring_ref = GRANT_INVALID_REF;
+       for (i = 0; i < info->nr_ring_pages; i++)
+               info->ring_ref[i] = GRANT_INVALID_REF;
 
-       sring = (struct blkif_sring *)__get_free_page(GFP_NOIO | __GFP_HIGH);
+       sring = (struct blkif_sring *)__get_free_pages(GFP_NOIO | __GFP_HIGH,
+                                                      get_order(ring_size));
        if (!sring) {
                xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
                return -ENOMEM;
        }
        SHARED_RING_INIT(sring);
-       FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
+       FRONT_RING_INIT(&info->ring, sring, ring_size);
 
-       err = xenbus_grant_ring(dev, info->ring.sring, 1, &gref);
+       err = xenbus_grant_ring(dev, info->ring.sring, info->nr_ring_pages, gref);
        if (err < 0) {
-               free_page((unsigned long)sring);
+               free_pages((unsigned long)sring, get_order(ring_size));
                info->ring.sring = NULL;
                goto fail;
        }
-       info->ring_ref = gref;
+       for (i = 0; i < info->nr_ring_pages; i++)
+               info->ring_ref[i] = gref[i];
 
        err = xenbus_alloc_evtchn(dev, &info->evtchn);
        if (err)
@@ -1292,7 +1306,18 @@ static int talk_to_blkback(struct xenbus_device *dev,
 {
        const char *message = NULL;
        struct xenbus_transaction xbt;
-       int err;
+       int err, i;
+       unsigned int max_page_order = 0;
+       unsigned int ring_page_order = 0;
+
+       err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+                          "max-ring-page-order", "%u", &max_page_order);
+       if (err != 1)
+               info->nr_ring_pages = 1;
+       else {
+               ring_page_order = min(xen_blkif_max_ring_order, max_page_order);
+               info->nr_ring_pages = 1 << ring_page_order;
+       }
 
        /* Create shared ring, alloc event channel. */
        err = setup_blkring(dev, info);
@@ -1306,11 +1331,32 @@ again:
                goto destroy_blkring;
        }
 
-       err = xenbus_printf(xbt, dev->nodename,
-                           "ring-ref", "%u", info->ring_ref);
-       if (err) {
-               message = "writing ring-ref";
-               goto abort_transaction;
+       if (info->nr_ring_pages == 1) {
+               err = xenbus_printf(xbt, dev->nodename,
+                                   "ring-ref", "%u", info->ring_ref[0]);
+               if (err) {
+                       message = "writing ring-ref";
+                       goto abort_transaction;
+               }
+       } else {
+               err = xenbus_printf(xbt, dev->nodename,
+                                   "ring-page-order", "%u", ring_page_order);
+               if (err) {
+                       message = "writing ring-page-order";
+                       goto abort_transaction;
+               }
+
+               for (i = 0; i < info->nr_ring_pages; i++) {
+                       char ring_ref_name[RINGREF_NAME_LEN];
+
+                       snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
+                       err = xenbus_printf(xbt, dev->nodename, ring_ref_name,
+                                           "%u", info->ring_ref[i]);
+                       if (err) {
+                               message = "writing ring-ref";
+                               goto abort_transaction;
+                       }
+               }
        }
        err = xenbus_printf(xbt, dev->nodename,
                            "event-channel", "%u", info->evtchn);
@@ -1338,6 +1384,9 @@ again:
                goto destroy_blkring;
        }
 
+       for (i = 0; i < BLK_RING_SIZE(info); i++)
+               info->shadow[i].req.u.rw.id = i+1;
+       info->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff;
        xenbus_switch_state(dev, XenbusStateInitialised);
 
        return 0;
@@ -1361,7 +1410,7 @@ again:
 static int blkfront_probe(struct xenbus_device *dev,
                          const struct xenbus_device_id *id)
 {
-       int err, vdevice, i;
+       int err, vdevice;
        struct blkfront_info *info;
 
        /* FIXME: Use dynamic device id if this is not set. */
@@ -1422,21 +1471,10 @@ static int blkfront_probe(struct xenbus_device *dev,
        info->connected = BLKIF_STATE_DISCONNECTED;
        INIT_WORK(&info->work, blkif_restart_queue);
 
-       for (i = 0; i < BLK_RING_SIZE; i++)
-               info->shadow[i].req.u.rw.id = i+1;
-       info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
-
        /* Front end dir is a number, which is used as the id. */
        info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
        dev_set_drvdata(&dev->dev, info);
 
-       err = talk_to_blkback(dev, info);
-       if (err) {
-               kfree(info);
-               dev_set_drvdata(&dev->dev, NULL);
-               return err;
-       }
-
        return 0;
 }
 
@@ -1476,10 +1514,10 @@ static int blkif_recover(struct blkfront_info *info)
 
        /* Stage 2: Set up free list. */
        memset(&info->shadow, 0, sizeof(info->shadow));
-       for (i = 0; i < BLK_RING_SIZE; i++)
+       for (i = 0; i < BLK_RING_SIZE(info); i++)
                info->shadow[i].req.u.rw.id = i+1;
        info->shadow_free = info->ring.req_prod_pvt;
-       info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
+       info->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff;
 
        rc = blkfront_setup_indirect(info);
        if (rc) {
@@ -1491,7 +1529,7 @@ static int blkif_recover(struct blkfront_info *info)
        blk_queue_max_segments(info->rq, segs);
        bio_list_init(&bio_list);
        INIT_LIST_HEAD(&requests);
-       for (i = 0; i < BLK_RING_SIZE; i++) {
+       for (i = 0; i < BLK_RING_SIZE(info); i++) {
                /* Not in use? */
                if (!copy[i].request)
                        continue;
@@ -1697,7 +1735,7 @@ static int blkfront_setup_indirect(struct blkfront_info *info)
                segs = info->max_indirect_segments;
        }
 
-       err = fill_grant_buffer(info, (segs + INDIRECT_GREFS(segs)) * BLK_RING_SIZE);
+       err = fill_grant_buffer(info, (segs + INDIRECT_GREFS(segs)) * BLK_RING_SIZE(info));
        if (err)
                goto out_of_memory;
 
@@ -1707,7 +1745,7 @@ static int blkfront_setup_indirect(struct blkfront_info *info)
                 * grants, we need to allocate a set of pages that can be
                 * used for mapping indirect grefs
                 */
-               int num = INDIRECT_GREFS(segs) * BLK_RING_SIZE;
+               int num = INDIRECT_GREFS(segs) * BLK_RING_SIZE(info);
 
                BUG_ON(!list_empty(&info->indirect_pages));
                for (i = 0; i < num; i++) {
@@ -1718,7 +1756,7 @@ static int blkfront_setup_indirect(struct blkfront_info *info)
                }
        }
 
-       for (i = 0; i < BLK_RING_SIZE; i++) {
+       for (i = 0; i < BLK_RING_SIZE(info); i++) {
                info->shadow[i].grants_used = kzalloc(
                        sizeof(info->shadow[i].grants_used[0]) * segs,
                        GFP_NOIO);
@@ -1740,7 +1778,7 @@ static int blkfront_setup_indirect(struct blkfront_info *info)
        return 0;
 
 out_of_memory:
-       for (i = 0; i < BLK_RING_SIZE; i++) {
+       for (i = 0; i < BLK_RING_SIZE(info); i++) {
                kfree(info->shadow[i].grants_used);
                info->shadow[i].grants_used = NULL;
                kfree(info->shadow[i].sg);
@@ -1906,8 +1944,15 @@ static void blkback_changed(struct xenbus_device *dev,
        dev_dbg(&dev->dev, "blkfront:blkback_changed to state %d.\n", backend_state);
 
        switch (backend_state) {
-       case XenbusStateInitialising:
        case XenbusStateInitWait:
+               if (dev->state != XenbusStateInitialising)
+                       break;
+               if (talk_to_blkback(dev, info)) {
+                       kfree(info);
+                       dev_set_drvdata(&dev->dev, NULL);
+                       break;
+               }
+       case XenbusStateInitialising:
        case XenbusStateInitialised:
        case XenbusStateReconfiguring:
        case XenbusStateReconfigured:
@@ -2091,6 +2136,12 @@ static int __init xlblk_init(void)
        if (!xen_domain())
                return -ENODEV;
 
+       if (xen_blkif_max_ring_order > XENBUS_MAX_RING_PAGE_ORDER) {
+               pr_info("Invalid max_ring_order (%d), will use default max: %d.\n",
+                       xen_blkif_max_ring_order, XENBUS_MAX_RING_PAGE_ORDER);
+               xen_blkif_max_ring_order = 0;
+       }
+
        if (!xen_has_pv_disk_devices())
                return -ENODEV;
 
index 37b8be7cba95f61a8f8788fda52cd8ecd3291c42..0ac3bd1a5497c5bae41f1cc51ac8cdf667ed73b4 100644 (file)
@@ -208,7 +208,7 @@ static int set_param_timeout(const char *val, const struct kernel_param *kp)
        return rv;
 }
 
-static struct kernel_param_ops param_ops_timeout = {
+static const struct kernel_param_ops param_ops_timeout = {
        .set = set_param_timeout,
        .get = param_get_int,
 };
@@ -270,14 +270,14 @@ static int set_param_wdog_ifnum(const char *val, const struct kernel_param *kp)
        return 0;
 }
 
-static struct kernel_param_ops param_ops_wdog_ifnum = {
+static const struct kernel_param_ops param_ops_wdog_ifnum = {
        .set = set_param_wdog_ifnum,
        .get = param_get_int,
 };
 
 #define param_check_wdog_ifnum param_check_int
 
-static struct kernel_param_ops param_ops_str = {
+static const struct kernel_param_ops param_ops_str = {
        .set = set_param_str,
        .get = get_param_str,
 };
index 935b05936dbdd9588764b0c04bf84db32b425af0..9064ff743598a42515333f073cbd3d86193a2524 100644 (file)
@@ -462,15 +462,12 @@ static int exynos4_local_timer_setup(struct clock_event_device *evt)
        exynos4_mct_write(TICK_BASE_CNT, mevt->base + MCT_L_TCNTB_OFFSET);
 
        if (mct_int_type == MCT_INT_SPI) {
-               evt->irq = mct_irqs[MCT_L0_IRQ + cpu];
-               if (request_irq(evt->irq, exynos4_mct_tick_isr,
-                               IRQF_TIMER | IRQF_NOBALANCING,
-                               evt->name, mevt)) {
-                       pr_err("exynos-mct: cannot register IRQ %d\n",
-                               evt->irq);
+
+               if (evt->irq == -1)
                        return -EIO;
-               }
-               irq_force_affinity(mct_irqs[MCT_L0_IRQ + cpu], cpumask_of(cpu));
+
+               irq_force_affinity(evt->irq, cpumask_of(cpu));
+               enable_irq(evt->irq);
        } else {
                enable_percpu_irq(mct_irqs[MCT_L0_IRQ], 0);
        }
@@ -483,10 +480,12 @@ static int exynos4_local_timer_setup(struct clock_event_device *evt)
 static void exynos4_local_timer_stop(struct clock_event_device *evt)
 {
        evt->set_mode(CLOCK_EVT_MODE_UNUSED, evt);
-       if (mct_int_type == MCT_INT_SPI)
-               free_irq(evt->irq, this_cpu_ptr(&percpu_mct_tick));
-       else
+       if (mct_int_type == MCT_INT_SPI) {
+               if (evt->irq != -1)
+                       disable_irq_nosync(evt->irq);
+       } else {
                disable_percpu_irq(mct_irqs[MCT_L0_IRQ]);
+       }
 }
 
 static int exynos4_mct_cpu_notify(struct notifier_block *self,
@@ -518,7 +517,7 @@ static struct notifier_block exynos4_mct_cpu_nb = {
 
 static void __init exynos4_timer_resources(struct device_node *np, void __iomem *base)
 {
-       int err;
+       int err, cpu;
        struct mct_clock_event_device *mevt = this_cpu_ptr(&percpu_mct_tick);
        struct clk *mct_clk, *tick_clk;
 
@@ -545,7 +544,25 @@ static void __init exynos4_timer_resources(struct device_node *np, void __iomem
                WARN(err, "MCT: can't request IRQ %d (%d)\n",
                     mct_irqs[MCT_L0_IRQ], err);
        } else {
-               irq_set_affinity(mct_irqs[MCT_L0_IRQ], cpumask_of(0));
+               for_each_possible_cpu(cpu) {
+                       int mct_irq = mct_irqs[MCT_L0_IRQ + cpu];
+                       struct mct_clock_event_device *pcpu_mevt =
+                               per_cpu_ptr(&percpu_mct_tick, cpu);
+
+                       pcpu_mevt->evt.irq = -1;
+
+                       irq_set_status_flags(mct_irq, IRQ_NOAUTOEN);
+                       if (request_irq(mct_irq,
+                                       exynos4_mct_tick_isr,
+                                       IRQF_TIMER | IRQF_NOBALANCING,
+                                       pcpu_mevt->name, pcpu_mevt)) {
+                               pr_err("exynos-mct: cannot register IRQ (cpu%d)\n",
+                                                                       cpu);
+
+                               continue;
+                       }
+                       pcpu_mevt->evt.irq = mct_irq;
+               }
        }
 
        err = register_cpu_notifier(&exynos4_mct_cpu_nb);
index 1e3ef5ec4784dcfc3b758a0d67cac25c0d33e4ba..845bafcfa7929fd66cbb5eb966b16e0f3320de7f 100644 (file)
@@ -67,6 +67,8 @@ static int nap_loop(struct cpuidle_device *dev,
        return index;
 }
 
+/* Register for fastsleep only in oneshot mode of broadcast */
+#ifdef CONFIG_TICK_ONESHOT
 static int fastsleep_loop(struct cpuidle_device *dev,
                                struct cpuidle_driver *drv,
                                int index)
@@ -90,7 +92,7 @@ static int fastsleep_loop(struct cpuidle_device *dev,
 
        return index;
 }
-
+#endif
 /*
  * States for dedicated partition case.
  */
@@ -216,7 +218,14 @@ static int powernv_add_idle_states(void)
                        powernv_states[nr_idle_states].flags = 0;
                        powernv_states[nr_idle_states].target_residency = 100;
                        powernv_states[nr_idle_states].enter = &nap_loop;
-               } else if (flags[i] & OPAL_PM_SLEEP_ENABLED ||
+               }
+
+               /*
+                * All cpuidle states with CPUIDLE_FLAG_TIMER_STOP set must come
+                * within this config dependency check.
+                */
+#ifdef CONFIG_TICK_ONESHOT
+               if (flags[i] & OPAL_PM_SLEEP_ENABLED ||
                        flags[i] & OPAL_PM_SLEEP_ENABLED_ER1) {
                        /* Add FASTSLEEP state */
                        strcpy(powernv_states[nr_idle_states].name, "FastSleep");
@@ -225,7 +234,7 @@ static int powernv_add_idle_states(void)
                        powernv_states[nr_idle_states].target_residency = 300000;
                        powernv_states[nr_idle_states].enter = &fastsleep_loop;
                }
-
+#endif
                powernv_states[nr_idle_states].exit_latency =
                                ((unsigned int)latency_ns[i]) / 1000;
 
index 7f8b66c915ed4bd7341961df49470e55bb767278..fdda8e7ae302511bec5c0e1c18d2c5c4b2b3d351 100644 (file)
@@ -88,10 +88,7 @@ void adf_ae_fw_release(struct adf_accel_dev *accel_dev)
 
        qat_uclo_del_uof_obj(loader_data->fw_loader);
        qat_hal_deinit(loader_data->fw_loader);
-
-       if (loader_data->uof_fw)
-               release_firmware(loader_data->uof_fw);
-
+       release_firmware(loader_data->uof_fw);
        loader_data->uof_fw = NULL;
        loader_data->fw_loader = NULL;
 }
index ccec327489daa5e6a64942b3d7d754da7eb36bed..db2926bff8a5bc843741b7cf56facb114ab853b7 100644 (file)
@@ -449,7 +449,7 @@ static int adf_init_bank(struct adf_accel_dev *accel_dev,
 err:
        for (i = 0; i < ADF_ETR_MAX_RINGS_PER_BANK; i++) {
                ring = &bank->rings[i];
-               if (hw_data->tx_rings_mask & (1 << i) && ring->inflights)
+               if (hw_data->tx_rings_mask & (1 << i))
                        kfree(ring->inflights);
        }
        return -ENOMEM;
index 220ee49633e49e88c041c8796a88a83370087e0f..b8576fd6bd0e544730bfc07a19ce92b21ab1aba6 100644 (file)
@@ -120,7 +120,7 @@ static struct dmatest_info {
 
 static int dmatest_run_set(const char *val, const struct kernel_param *kp);
 static int dmatest_run_get(char *val, const struct kernel_param *kp);
-static struct kernel_param_ops run_ops = {
+static const struct kernel_param_ops run_ops = {
        .set = dmatest_run_set,
        .get = dmatest_run_get,
 };
@@ -195,7 +195,7 @@ static int dmatest_wait_get(char *val, const struct kernel_param *kp)
        return param_get_bool(val, kp);
 }
 
-static struct kernel_param_ops wait_ops = {
+static const struct kernel_param_ops wait_ops = {
        .get = dmatest_wait_get,
        .set = param_set_bool,
 };
index 8333f878919c4de8e22c86e81f6fc5d2b67ad2e3..40343fa92c7b9c344f1f86a824bdb779ab8dd91e 100644 (file)
@@ -657,8 +657,9 @@ static int bcm_kona_gpio_probe(struct platform_device *pdev)
        }
        for (i = 0; i < kona_gpio->num_bank; i++) {
                bank = &kona_gpio->banks[i];
-               irq_set_chained_handler(bank->irq, bcm_kona_gpio_irq_handler);
-               irq_set_handler_data(bank->irq, bank);
+               irq_set_chained_handler_and_data(bank->irq,
+                                                bcm_kona_gpio_irq_handler,
+                                                bank);
        }
 
        spin_lock_init(&kona_gpio->lock);
index 58faf04fce5da02067d6706378ed6c1ca1feef33..55fa9853a7f2207984c0e99d3e1cee2c49ad788d 100644 (file)
@@ -348,8 +348,8 @@ static void dwapb_configure_irqs(struct dwapb_gpio *gpio,
        irq_gc->chip_types[1].handler = handle_edge_irq;
 
        if (!pp->irq_shared) {
-               irq_set_chained_handler(pp->irq, dwapb_irq_handler);
-               irq_set_handler_data(pp->irq, gpio);
+               irq_set_chained_handler_and_data(pp->irq, dwapb_irq_handler,
+                                                gpio);
        } else {
                /*
                 * Request a shared IRQ since where MFD would have devices
index 01acf0a8cdb1963c3d01f1ca591aaab76b55158e..7bcfb87a5fa6812a51465d5c510a4c8a61c5d7d4 100644 (file)
@@ -309,8 +309,7 @@ static int platform_msic_gpio_probe(struct platform_device *pdev)
                                         &msic_irqchip,
                                         handle_simple_irq);
        }
-       irq_set_chained_handler(mg->irq, msic_gpio_irq_handler);
-       irq_set_handler_data(mg->irq, mg);
+       irq_set_chained_handler_and_data(mg->irq, msic_gpio_irq_handler, mg);
 
        return 0;
 err:
index be42ab368a801ff0a3c5508b9bd53c3fbf6cf78d..bf4bd1d120c38a94fb997521c1712224a7712f8d 100644 (file)
@@ -2052,14 +2052,14 @@ struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode,
        if (is_of_node(fwnode)) {
                enum of_gpio_flags flags;
 
-               desc = of_get_named_gpiod_flags(of_node(fwnode), propname, 0,
+               desc = of_get_named_gpiod_flags(to_of_node(fwnode), propname, 0,
                                                &flags);
                if (!IS_ERR(desc))
                        active_low = flags & OF_GPIO_ACTIVE_LOW;
        } else if (is_acpi_node(fwnode)) {
                struct acpi_gpio_info info;
 
-               desc = acpi_get_gpiod_by_index(acpi_node(fwnode), propname, 0,
+               desc = acpi_get_gpiod_by_index(to_acpi_node(fwnode), propname, 0,
                                               &info);
                if (!IS_ERR(desc))
                        active_low = info.active_low;
index e29b02ca9e915d31735d35d9cf46438d38182e22..f086ef387475989867df3ab2b34f6b8232ba7fa5 100644 (file)
@@ -199,7 +199,7 @@ static int ide_set_dev_param_mask(const char *s, const struct kernel_param *kp)
        return 0;
 }
 
-static struct kernel_param_ops param_ops_ide_dev_mask = {
+static const struct kernel_param_ops param_ops_ide_dev_mask = {
        .set = ide_set_dev_param_mask
 };
 
index eada8f758ad4089ec0e15a7469ccc50cb626a98f..267dc4f7550236e89fae58ff80c4a3953cff877f 100644 (file)
@@ -99,7 +99,7 @@ module_param(register_always, bool, 0444);
 MODULE_PARM_DESC(register_always,
                 "Use memory registration even for contiguous memory regions");
 
-static struct kernel_param_ops srp_tmo_ops;
+static const struct kernel_param_ops srp_tmo_ops;
 
 static int srp_reconnect_delay = 10;
 module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay,
@@ -184,7 +184,7 @@ out:
        return res;
 }
 
-static struct kernel_param_ops srp_tmo_ops = {
+static const struct kernel_param_ops srp_tmo_ops = {
        .get = srp_tmo_get,
        .set = srp_tmo_set,
 };
index f63341f20b91aed210208d62bf50c17b9da261c5..cfd58e87da2620061f2d93e01244c9d052319991 100644 (file)
@@ -94,7 +94,7 @@ static int ati_remote2_get_mode_mask(char *buffer,
 
 static unsigned int channel_mask = ATI_REMOTE2_MAX_CHANNEL_MASK;
 #define param_check_channel_mask(name, p) __param_check(name, p, unsigned int)
-static struct kernel_param_ops param_ops_channel_mask = {
+static const struct kernel_param_ops param_ops_channel_mask = {
        .set = ati_remote2_set_channel_mask,
        .get = ati_remote2_get_channel_mask,
 };
@@ -103,7 +103,7 @@ MODULE_PARM_DESC(channel_mask, "Bitmask of channels to accept <15:Channel16>...<
 
 static unsigned int mode_mask = ATI_REMOTE2_MAX_MODE_MASK;
 #define param_check_mode_mask(name, p) __param_check(name, p, unsigned int)
-static struct kernel_param_ops param_ops_mode_mask = {
+static const struct kernel_param_ops param_ops_mode_mask = {
        .set = ati_remote2_set_mode_mask,
        .get = ati_remote2_get_mode_mask,
 };
index 7c4ba43d253e7661088c8e241fca6ab02e1f68d3..ec34770361501254e8e2c417315f74e3e00e5a97 100644 (file)
@@ -47,7 +47,7 @@ MODULE_LICENSE("GPL");
 static unsigned int psmouse_max_proto = PSMOUSE_AUTO;
 static int psmouse_set_maxproto(const char *val, const struct kernel_param *);
 static int psmouse_get_maxproto(char *buffer, const struct kernel_param *kp);
-static struct kernel_param_ops param_ops_proto_abbrev = {
+static const struct kernel_param_ops param_ops_proto_abbrev = {
        .set = psmouse_set_maxproto,
        .get = psmouse_get_maxproto,
 };
index d3e5e9abe3b6cc36f4b488491ad38e24f0a4bf8b..a57e9b7498953bb9ebf947695caea46d73e2bfeb 100644 (file)
@@ -117,6 +117,7 @@ struct kmem_cache *amd_iommu_irq_cache;
 
 static void update_domain(struct protection_domain *domain);
 static int alloc_passthrough_domain(void);
+static int protection_domain_init(struct protection_domain *domain);
 
 /****************************************************************************
  *
@@ -1881,12 +1882,9 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void)
        if (!dma_dom)
                return NULL;
 
-       spin_lock_init(&dma_dom->domain.lock);
-
-       dma_dom->domain.id = domain_id_alloc();
-       if (dma_dom->domain.id == 0)
+       if (protection_domain_init(&dma_dom->domain))
                goto free_dma_dom;
-       INIT_LIST_HEAD(&dma_dom->domain.dev_list);
+
        dma_dom->domain.mode = PAGE_MODE_2_LEVEL;
        dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);
        dma_dom->domain.flags = PD_DMA_OPS_MASK;
@@ -2916,6 +2914,18 @@ static void protection_domain_free(struct protection_domain *domain)
        kfree(domain);
 }
 
+static int protection_domain_init(struct protection_domain *domain)
+{
+       spin_lock_init(&domain->lock);
+       mutex_init(&domain->api_lock);
+       domain->id = domain_id_alloc();
+       if (!domain->id)
+               return -ENOMEM;
+       INIT_LIST_HEAD(&domain->dev_list);
+
+       return 0;
+}
+
 static struct protection_domain *protection_domain_alloc(void)
 {
        struct protection_domain *domain;
@@ -2924,12 +2934,8 @@ static struct protection_domain *protection_domain_alloc(void)
        if (!domain)
                return NULL;
 
-       spin_lock_init(&domain->lock);
-       mutex_init(&domain->api_lock);
-       domain->id = domain_id_alloc();
-       if (!domain->id)
+       if (protection_domain_init(domain))
                goto out_err;
-       INIT_LIST_HEAD(&domain->dev_list);
 
        add_domain_to_list(domain);
 
index f14130121298bfb5739cedab54480a737b3ba6ce..8e9ec81ce4bbd85473d6d6a35e7c3567569187ee 100644 (file)
@@ -1389,8 +1389,7 @@ static void arm_smmu_domain_free(struct iommu_domain *domain)
        struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
        struct arm_smmu_device *smmu = smmu_domain->smmu;
 
-       if (smmu_domain->pgtbl_ops)
-               free_io_pgtable_ops(smmu_domain->pgtbl_ops);
+       free_io_pgtable_ops(smmu_domain->pgtbl_ops);
 
        /* Free the CD and ASID, if we allocated them */
        if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
index dce041b1c1394be7057528a5b6fd23dcbbbf6253..4cd0c29cb585000c0e5899651948ad1dc2ffbf1f 100644 (file)
@@ -1566,7 +1566,7 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
                return -ENODEV;
        }
 
-       if ((id & ID0_S1TS) && ((smmu->version == 1) || (id & ID0_ATOSNS))) {
+       if ((id & ID0_S1TS) && ((smmu->version == 1) || !(id & ID0_ATOSNS))) {
                smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
                dev_notice(smmu->dev, "\taddress translation ops\n");
        }
index 49e7542510d15caac5622cdb01fdcf8b77bb80e8..f286090931cc874f6851eab4f279b5f9f44276d1 100644 (file)
@@ -847,13 +847,24 @@ static int add_iommu_group(struct device *dev, void *data)
 {
        struct iommu_callback_data *cb = data;
        const struct iommu_ops *ops = cb->ops;
+       int ret;
 
        if (!ops->add_device)
                return 0;
 
        WARN_ON(dev->iommu_group);
 
-       return ops->add_device(dev);
+       ret = ops->add_device(dev);
+
+       /*
+        * We ignore -ENODEV errors for now, as they just mean that the
+        * device is not translated by an IOMMU. We still care about
+        * other errors and fail to initialize when they happen.
+        */
+       if (ret == -ENODEV)
+               ret = 0;
+
+       return ret;
 }
 
 static int remove_iommu_group(struct device *dev, void *data)
index 15eb3f86f670ffe43605615b81c994aa58b40b2c..d2d54d62afee6b701835c1e39777fb5921cfc698 100644 (file)
@@ -191,7 +191,7 @@ static struct gpio_leds_priv *gpio_leds_create(struct platform_device *pdev)
                        goto err;
                }
 
-               np = of_node(child);
+               np = to_of_node(child);
 
                if (fwnode_property_present(child, "label")) {
                        fwnode_property_read_string(child, "label", &led.name);
index 977bd3a3eed01149d9d5ef670cfa175922250232..120df5c08741a1b0f2fe53ca592fae2d7fd94ee2 100644 (file)
@@ -417,9 +417,8 @@ static int __init asic3_irq_probe(struct platform_device *pdev)
        asic3_write_register(asic, ASIC3_OFFSET(INTR, INT_MASK),
                             ASIC3_INTMASK_GINTMASK);
 
-       irq_set_chained_handler(asic->irq_nr, asic3_irq_demux);
+       irq_set_chained_handler_and_data(asic->irq_nr, asic3_irq_demux, asic);
        irq_set_irq_type(asic->irq_nr, IRQ_TYPE_EDGE_RISING);
-       irq_set_handler_data(asic->irq_nr, asic);
 
        return 0;
 }
index 4739689d23ad08aec40c4478d57e3fae3a118adb..fb8705fc3aca7c37e421892b377622759834131a 100644 (file)
@@ -115,7 +115,7 @@ static int param_set_axis(const char *val, const struct kernel_param *kp)
        return ret;
 }
 
-static struct kernel_param_ops param_ops_axis = {
+static const struct kernel_param_ops param_ops_axis = {
        .set = param_set_axis,
        .get = param_get_int,
 };
index 1a92d30689e76e5cd1ce1a22bff6bbac6a1bbbbb..ebf46ad2d513edf5422e10d998043a14b9714747 100644 (file)
@@ -162,7 +162,7 @@ static int __init ubiblock_set_param(const char *val,
        return 0;
 }
 
-static struct kernel_param_ops ubiblock_param_ops = {
+static const struct kernel_param_ops ubiblock_param_ops = {
        .set    = ubiblock_set_param,
 };
 module_param_cb(block, &ubiblock_param_ops, NULL, 0);
index dd03ad865cafb83b16389e4f1840a335ff3fe9d4..661cdaa7ea96c26ebc6142c955f12dd19296e49f 100644 (file)
@@ -268,7 +268,7 @@ static int xgbe_alloc_pages(struct xgbe_prv_data *pdata,
        int ret;
 
        /* Try to obtain pages, decreasing order if necessary */
-       gfp |= __GFP_COLD | __GFP_COMP;
+       gfp |= __GFP_COLD | __GFP_COMP | __GFP_NOWARN;
        while (order >= 0) {
                pages = alloc_pages(gfp, order);
                if (pages)
index 95153b234c7158c655d95b7882db729d966a36dc..299eb4315fe647ba8d67302649a2cf928a4d59d5 100644 (file)
@@ -948,7 +948,7 @@ static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata)
        struct resource *res;
        void __iomem *base_addr;
        u32 offset;
-       int ret;
+       int ret = 0;
 
        pdev = pdata->pdev;
        dev = &pdev->dev;
index 7a4aaa3c01b69d43b8f0bd7f1023150f203c7179..cd4ae76bbff2f8acda89154e65cf699e141553e5 100644 (file)
@@ -530,7 +530,6 @@ enum bnx2x_tpa_mode_t {
 
 struct bnx2x_alloc_pool {
        struct page     *page;
-       dma_addr_t      dma;
        unsigned int    offset;
 };
 
@@ -2418,10 +2417,13 @@ void bnx2x_igu_clear_sb_gen(struct bnx2x *bp, u8 func, u8 idu_sb_id,
                                 AEU_INPUTS_ATTN_BITS_IGU_PARITY_ERROR | \
                                 AEU_INPUTS_ATTN_BITS_MISC_PARITY_ERROR)
 
-#define HW_PRTY_ASSERT_SET_3 (AEU_INPUTS_ATTN_BITS_MCP_LATCHED_ROM_PARITY | \
-               AEU_INPUTS_ATTN_BITS_MCP_LATCHED_UMP_RX_PARITY | \
-               AEU_INPUTS_ATTN_BITS_MCP_LATCHED_UMP_TX_PARITY | \
-               AEU_INPUTS_ATTN_BITS_MCP_LATCHED_SCPAD_PARITY)
+#define HW_PRTY_ASSERT_SET_3_WITHOUT_SCPAD \
+               (AEU_INPUTS_ATTN_BITS_MCP_LATCHED_ROM_PARITY | \
+                AEU_INPUTS_ATTN_BITS_MCP_LATCHED_UMP_RX_PARITY | \
+                AEU_INPUTS_ATTN_BITS_MCP_LATCHED_UMP_TX_PARITY)
+
+#define HW_PRTY_ASSERT_SET_3 (HW_PRTY_ASSERT_SET_3_WITHOUT_SCPAD | \
+                             AEU_INPUTS_ATTN_BITS_MCP_LATCHED_SCPAD_PARITY)
 
 #define HW_PRTY_ASSERT_SET_4 (AEU_INPUTS_ATTN_BITS_PGLUE_PARITY_ERROR | \
                              AEU_INPUTS_ATTN_BITS_ATC_PARITY_ERROR)
index e2a65334708d8d61703dd44d55ab3ef9d0dda67f..a90d7364334f9dfa3687dc813e068508a342861c 100644 (file)
@@ -563,23 +563,20 @@ static int bnx2x_alloc_rx_sge(struct bnx2x *bp, struct bnx2x_fastpath *fp,
                        return -ENOMEM;
                }
 
-               pool->dma = dma_map_page(&bp->pdev->dev, pool->page, 0,
-                                        PAGE_SIZE, DMA_FROM_DEVICE);
-               if (unlikely(dma_mapping_error(&bp->pdev->dev,
-                                              pool->dma))) {
-                       __free_pages(pool->page, PAGES_PER_SGE_SHIFT);
-                       pool->page = NULL;
-                       BNX2X_ERR("Can't map sge\n");
-                       return -ENOMEM;
-               }
                pool->offset = 0;
        }
 
+       mapping = dma_map_page(&bp->pdev->dev, pool->page,
+                              pool->offset, SGE_PAGE_SIZE, DMA_FROM_DEVICE);
+       if (unlikely(dma_mapping_error(&bp->pdev->dev, mapping))) {
+               BNX2X_ERR("Can't map sge\n");
+               return -ENOMEM;
+       }
+
        get_page(pool->page);
        sw_buf->page = pool->page;
        sw_buf->offset = pool->offset;
 
-       mapping = pool->dma + sw_buf->offset;
        dma_unmap_addr_set(sw_buf, mapping, mapping);
 
        sge->addr_hi = cpu_to_le32(U64_HI(mapping));
@@ -648,9 +645,9 @@ static int bnx2x_fill_frag_skb(struct bnx2x *bp, struct bnx2x_fastpath *fp,
                        return err;
                }
 
-               dma_unmap_single(&bp->pdev->dev,
-                                dma_unmap_addr(&old_rx_pg, mapping),
-                                SGE_PAGE_SIZE, DMA_FROM_DEVICE);
+               dma_unmap_page(&bp->pdev->dev,
+                              dma_unmap_addr(&old_rx_pg, mapping),
+                              SGE_PAGE_SIZE, DMA_FROM_DEVICE);
                /* Add one frag and update the appropriate fields in the skb */
                if (fp->mode == TPA_MODE_LRO)
                        skb_fill_page_desc(skb, j, old_rx_pg.page,
@@ -3421,8 +3418,13 @@ static int bnx2x_pkt_req_lin(struct bnx2x *bp, struct sk_buff *skb,
                        u32 wnd_sum = 0;
 
                        /* Headers length */
-                       hlen = (int)(skb_transport_header(skb) - skb->data) +
-                               tcp_hdrlen(skb);
+                       if (xmit_type & XMIT_GSO_ENC)
+                               hlen = (int)(skb_inner_transport_header(skb) -
+                                            skb->data) +
+                                            inner_tcp_hdrlen(skb);
+                       else
+                               hlen = (int)(skb_transport_header(skb) -
+                                            skb->data) + tcp_hdrlen(skb);
 
                        /* Amount of data (w/o headers) on linear part of SKB*/
                        first_bd_sz = skb_headlen(skb) - hlen;
index 2b30081ec26d128ec86c602eb5a097c77c664159..03b7404d5b9ba59c5470fe36ec0746d6b75f7eee 100644 (file)
@@ -807,8 +807,8 @@ static inline void bnx2x_free_rx_sge(struct bnx2x *bp,
        /* Since many fragments can share the same page, make sure to
         * only unmap and free the page once.
         */
-       dma_unmap_single(&bp->pdev->dev, dma_unmap_addr(sw_buf, mapping),
-                        SGE_PAGE_SIZE, DMA_FROM_DEVICE);
+       dma_unmap_page(&bp->pdev->dev, dma_unmap_addr(sw_buf, mapping),
+                      SGE_PAGE_SIZE, DMA_FROM_DEVICE);
 
        put_page(page);
 
@@ -974,14 +974,6 @@ static inline void bnx2x_free_rx_mem_pool(struct bnx2x *bp,
        if (!pool->page)
                return;
 
-       /* Page was not fully fragmented.  Unmap unused space */
-       if (pool->offset < PAGE_SIZE) {
-               dma_addr_t dma = pool->dma + pool->offset;
-               int size = PAGE_SIZE - pool->offset;
-
-               dma_unmap_single(&bp->pdev->dev, dma, size, DMA_FROM_DEVICE);
-       }
-
        put_page(pool->page);
 
        pool->page = NULL;
index 48ed005ba73fd3a9d9aa550871b647fdd0b59350..76b9052a961c517978494199d74398264583508c 100644 (file)
@@ -257,14 +257,15 @@ static int bnx2x_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 {
        struct bnx2x *bp = netdev_priv(dev);
        int cfg_idx = bnx2x_get_link_cfg_idx(bp);
+       u32 media_type;
 
        /* Dual Media boards present all available port types */
        cmd->supported = bp->port.supported[cfg_idx] |
                (bp->port.supported[cfg_idx ^ 1] &
                 (SUPPORTED_TP | SUPPORTED_FIBRE));
        cmd->advertising = bp->port.advertising[cfg_idx];
-       if (bp->link_params.phy[bnx2x_get_cur_phy_idx(bp)].media_type ==
-           ETH_PHY_SFP_1G_FIBER) {
+       media_type = bp->link_params.phy[bnx2x_get_cur_phy_idx(bp)].media_type;
+       if (media_type == ETH_PHY_SFP_1G_FIBER) {
                cmd->supported &= ~(SUPPORTED_10000baseT_Full);
                cmd->advertising &= ~(ADVERTISED_10000baseT_Full);
        }
@@ -312,12 +313,26 @@ static int bnx2x_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
                        cmd->lp_advertising |= ADVERTISED_100baseT_Full;
                if (status & LINK_STATUS_LINK_PARTNER_1000THD_CAPABLE)
                        cmd->lp_advertising |= ADVERTISED_1000baseT_Half;
-               if (status & LINK_STATUS_LINK_PARTNER_1000TFD_CAPABLE)
-                       cmd->lp_advertising |= ADVERTISED_1000baseT_Full;
+               if (status & LINK_STATUS_LINK_PARTNER_1000TFD_CAPABLE) {
+                       if (media_type == ETH_PHY_KR) {
+                               cmd->lp_advertising |=
+                                       ADVERTISED_1000baseKX_Full;
+                       } else {
+                               cmd->lp_advertising |=
+                                       ADVERTISED_1000baseT_Full;
+                       }
+               }
                if (status & LINK_STATUS_LINK_PARTNER_2500XFD_CAPABLE)
                        cmd->lp_advertising |= ADVERTISED_2500baseX_Full;
-               if (status & LINK_STATUS_LINK_PARTNER_10GXFD_CAPABLE)
-                       cmd->lp_advertising |= ADVERTISED_10000baseT_Full;
+               if (status & LINK_STATUS_LINK_PARTNER_10GXFD_CAPABLE) {
+                       if (media_type == ETH_PHY_KR) {
+                               cmd->lp_advertising |=
+                                       ADVERTISED_10000baseKR_Full;
+                       } else {
+                               cmd->lp_advertising |=
+                                       ADVERTISED_10000baseT_Full;
+                       }
+               }
                if (status & LINK_STATUS_LINK_PARTNER_20GXFD_CAPABLE)
                        cmd->lp_advertising |= ADVERTISED_20000baseKR2_Full;
        }
@@ -564,15 +579,20 @@ static int bnx2x_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
                                return -EINVAL;
                        }
 
-                       if (!(bp->port.supported[cfg_idx] &
-                             SUPPORTED_1000baseT_Full)) {
+                       if (bp->port.supported[cfg_idx] &
+                            SUPPORTED_1000baseT_Full) {
+                               advertising = (ADVERTISED_1000baseT_Full |
+                                              ADVERTISED_TP);
+
+                       } else if (bp->port.supported[cfg_idx] &
+                                  SUPPORTED_1000baseKX_Full) {
+                               advertising = ADVERTISED_1000baseKX_Full;
+                       } else {
                                DP(BNX2X_MSG_ETHTOOL,
                                   "1G full not supported\n");
                                return -EINVAL;
                        }
 
-                       advertising = (ADVERTISED_1000baseT_Full |
-                                      ADVERTISED_TP);
                        break;
 
                case SPEED_2500:
@@ -600,17 +620,22 @@ static int bnx2x_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
                                return -EINVAL;
                        }
                        phy_idx = bnx2x_get_cur_phy_idx(bp);
-                       if (!(bp->port.supported[cfg_idx]
-                             & SUPPORTED_10000baseT_Full) ||
-                           (bp->link_params.phy[phy_idx].media_type ==
+                       if ((bp->port.supported[cfg_idx] &
+                            SUPPORTED_10000baseT_Full) &&
+                           (bp->link_params.phy[phy_idx].media_type !=
                             ETH_PHY_SFP_1G_FIBER)) {
+                               advertising = (ADVERTISED_10000baseT_Full |
+                                              ADVERTISED_FIBRE);
+                       } else if (bp->port.supported[cfg_idx] &
+                              SUPPORTED_10000baseKR_Full) {
+                               advertising = (ADVERTISED_10000baseKR_Full |
+                                              ADVERTISED_FIBRE);
+                       } else {
                                DP(BNX2X_MSG_ETHTOOL,
                                   "10G full not supported\n");
                                return -EINVAL;
                        }
 
-                       advertising = (ADVERTISED_10000baseT_Full |
-                                      ADVERTISED_FIBRE);
                        break;
 
                default:
@@ -633,6 +658,7 @@ static int bnx2x_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
        bp->link_params.multi_phy_config = new_multi_phy_config;
        if (netif_running(dev)) {
                bnx2x_stats_handle(bp, STATS_EVENT_STOP);
+               bnx2x_force_link_reset(bp);
                bnx2x_link_set(bp);
        }
 
@@ -1204,6 +1230,7 @@ static int bnx2x_acquire_nvram_lock(struct bnx2x *bp)
        if (!(val & (MCPR_NVM_SW_ARB_ARB_ARB1 << port))) {
                DP(BNX2X_MSG_ETHTOOL | BNX2X_MSG_NVM,
                   "cannot get access to nvram interface\n");
+               bnx2x_release_hw_lock(bp, HW_LOCK_RESOURCE_NVRAM);
                return -EBUSY;
        }
 
@@ -1944,6 +1971,7 @@ static int bnx2x_set_pauseparam(struct net_device *dev,
 
        if (netif_running(dev)) {
                bnx2x_stats_handle(bp, STATS_EVENT_STOP);
+               bnx2x_force_link_reset(bp);
                bnx2x_link_set(bp);
        }
 
index 21a0d6afca4a53a24100289585f1e9b6d59ee497..a0b03c27e0a302c08fd1a78c5dbd2dd7606ae16a 100644 (file)
@@ -3392,9 +3392,9 @@ static void bnx2x_calc_ieee_aneg_adv(struct bnx2x_phy *phy,
        case BNX2X_FLOW_CTRL_AUTO:
                switch (params->req_fc_auto_adv) {
                case BNX2X_FLOW_CTRL_BOTH:
+               case BNX2X_FLOW_CTRL_RX:
                        *ieee_fc |= MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_BOTH;
                        break;
-               case BNX2X_FLOW_CTRL_RX:
                case BNX2X_FLOW_CTRL_TX:
                        *ieee_fc |=
                                MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_ASYMMETRIC;
@@ -3488,14 +3488,21 @@ static void bnx2x_ext_phy_set_pause(struct link_params *params,
        bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD, MDIO_AN_REG_ADV_PAUSE, val);
 }
 
-static void bnx2x_pause_resolve(struct link_vars *vars, u32 pause_result)
-{                                              /*  LD      LP   */
+static void bnx2x_pause_resolve(struct bnx2x_phy *phy,
+                               struct link_params *params,
+                               struct link_vars *vars,
+                               u32 pause_result)
+{
+       struct bnx2x *bp = params->bp;
+                                               /*  LD      LP   */
        switch (pause_result) {                 /* ASYM P ASYM P */
        case 0xb:                               /*   1  0   1  1 */
+               DP(NETIF_MSG_LINK, "Flow Control: TX only\n");
                vars->flow_ctrl = BNX2X_FLOW_CTRL_TX;
                break;
 
        case 0xe:                               /*   1  1   1  0 */
+               DP(NETIF_MSG_LINK, "Flow Control: RX only\n");
                vars->flow_ctrl = BNX2X_FLOW_CTRL_RX;
                break;
 
@@ -3503,10 +3510,22 @@ static void bnx2x_pause_resolve(struct link_vars *vars, u32 pause_result)
        case 0x7:                               /*   0  1   1  1 */
        case 0xd:                               /*   1  1   0  1 */
        case 0xf:                               /*   1  1   1  1 */
-               vars->flow_ctrl = BNX2X_FLOW_CTRL_BOTH;
+               /* If the user selected to advertise RX ONLY,
+                * although we advertised both, need to enable
+                * RX only.
+                */
+               if (params->req_fc_auto_adv == BNX2X_FLOW_CTRL_BOTH) {
+                       DP(NETIF_MSG_LINK, "Flow Control: RX & TX\n");
+                       vars->flow_ctrl = BNX2X_FLOW_CTRL_BOTH;
+               } else {
+                       DP(NETIF_MSG_LINK, "Flow Control: RX only\n");
+                       vars->flow_ctrl = BNX2X_FLOW_CTRL_RX;
+               }
                break;
 
        default:
+               DP(NETIF_MSG_LINK, "Flow Control: None\n");
+               vars->flow_ctrl = BNX2X_FLOW_CTRL_NONE;
                break;
        }
        if (pause_result & (1<<0))
@@ -3567,7 +3586,7 @@ static void bnx2x_ext_phy_update_adv_fc(struct bnx2x_phy *phy,
        pause_result |= (lp_pause &
                         MDIO_AN_REG_ADV_PAUSE_MASK) >> 10;
        DP(NETIF_MSG_LINK, "Ext PHY pause result 0x%x\n", pause_result);
-       bnx2x_pause_resolve(vars, pause_result);
+       bnx2x_pause_resolve(phy, params, vars, pause_result);
 
 }
 
@@ -5396,7 +5415,7 @@ static void bnx2x_update_adv_fc(struct bnx2x_phy *phy,
                                 MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_MASK)>>7;
                DP(NETIF_MSG_LINK, "pause_result CL37 0x%x\n", pause_result);
        }
-       bnx2x_pause_resolve(vars, pause_result);
+       bnx2x_pause_resolve(phy, params, vars, pause_result);
 
 }
 
@@ -7129,7 +7148,7 @@ static void bnx2x_8073_resolve_fc(struct bnx2x_phy *phy,
                pause_result |= (lp_pause &
                                 MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_BOTH) >> 7;
 
-               bnx2x_pause_resolve(vars, pause_result);
+               bnx2x_pause_resolve(phy, params, vars, pause_result);
                DP(NETIF_MSG_LINK, "Ext PHY CL37 pause result 0x%x\n",
                           pause_result);
        }
@@ -11474,7 +11493,9 @@ static const struct bnx2x_phy phy_warpcore = {
                           SUPPORTED_100baseT_Half |
                           SUPPORTED_100baseT_Full |
                           SUPPORTED_1000baseT_Full |
+                          SUPPORTED_1000baseKX_Full |
                           SUPPORTED_10000baseT_Full |
+                          SUPPORTED_10000baseKR_Full |
                           SUPPORTED_20000baseKR2_Full |
                           SUPPORTED_20000baseMLD2_Full |
                           SUPPORTED_FIBRE |
@@ -11980,8 +12001,8 @@ static int bnx2x_populate_int_phy(struct bnx2x *bp, u32 shmem_base, u8 port,
                        break;
                case PORT_HW_CFG_NET_SERDES_IF_KR:
                        phy->media_type = ETH_PHY_KR;
-                       phy->supported &= (SUPPORTED_1000baseT_Full |
-                                          SUPPORTED_10000baseT_Full |
+                       phy->supported &= (SUPPORTED_1000baseKX_Full |
+                                          SUPPORTED_10000baseKR_Full |
                                           SUPPORTED_FIBRE |
                                           SUPPORTED_Autoneg |
                                           SUPPORTED_Pause |
@@ -11999,8 +12020,8 @@ static int bnx2x_populate_int_phy(struct bnx2x *bp, u32 shmem_base, u8 port,
                        phy->media_type = ETH_PHY_KR;
                        phy->flags |= FLAGS_WC_DUAL_MODE;
                        phy->supported &= (SUPPORTED_20000baseKR2_Full |
-                                          SUPPORTED_10000baseT_Full |
-                                          SUPPORTED_1000baseT_Full |
+                                          SUPPORTED_10000baseKR_Full |
+                                          SUPPORTED_1000baseKX_Full |
                                           SUPPORTED_Autoneg |
                                           SUPPORTED_FIBRE |
                                           SUPPORTED_Pause |
index 33501bcddc48eb1f6157a08e3e3d1e08dc087c25..c27af12314ed29ae19e73a9c00f56c062a5aa830 100644 (file)
@@ -2287,13 +2287,11 @@ static int bnx2x_set_spio(struct bnx2x *bp, int spio, u32 mode)
 void bnx2x_calc_fc_adv(struct bnx2x *bp)
 {
        u8 cfg_idx = bnx2x_get_link_cfg_idx(bp);
+
+       bp->port.advertising[cfg_idx] &= ~(ADVERTISED_Asym_Pause |
+                                          ADVERTISED_Pause);
        switch (bp->link_vars.ieee_fc &
                MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_MASK) {
-       case MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_NONE:
-               bp->port.advertising[cfg_idx] &= ~(ADVERTISED_Asym_Pause |
-                                                  ADVERTISED_Pause);
-               break;
-
        case MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_BOTH:
                bp->port.advertising[cfg_idx] |= (ADVERTISED_Asym_Pause |
                                                  ADVERTISED_Pause);
@@ -2304,8 +2302,6 @@ void bnx2x_calc_fc_adv(struct bnx2x *bp)
                break;
 
        default:
-               bp->port.advertising[cfg_idx] &= ~(ADVERTISED_Asym_Pause |
-                                                  ADVERTISED_Pause);
                break;
        }
 }
@@ -2351,12 +2347,16 @@ int bnx2x_initial_phy_init(struct bnx2x *bp, int load_mode)
                if (load_mode == LOAD_DIAG) {
                        struct link_params *lp = &bp->link_params;
                        lp->loopback_mode = LOOPBACK_XGXS;
-                       /* do PHY loopback at 10G speed, if possible */
-                       if (lp->req_line_speed[cfx_idx] < SPEED_10000) {
+                       /* Prefer doing PHY loopback at highest speed */
+                       if (lp->req_line_speed[cfx_idx] < SPEED_20000) {
                                if (lp->speed_cap_mask[cfx_idx] &
-                                   PORT_HW_CFG_SPEED_CAPABILITY_D0_10G)
+                                   PORT_HW_CFG_SPEED_CAPABILITY_D0_20G)
                                        lp->req_line_speed[cfx_idx] =
-                                       SPEED_10000;
+                                       SPEED_20000;
+                               else if (lp->speed_cap_mask[cfx_idx] &
+                                           PORT_HW_CFG_SPEED_CAPABILITY_D0_10G)
+                                               lp->req_line_speed[cfx_idx] =
+                                               SPEED_10000;
                                else
                                        lp->req_line_speed[cfx_idx] =
                                        SPEED_1000;
@@ -4867,9 +4867,7 @@ static bool bnx2x_check_blocks_with_parity3(struct bnx2x *bp, u32 sig,
                                res = true;
                                break;
                        case AEU_INPUTS_ATTN_BITS_MCP_LATCHED_SCPAD_PARITY:
-                               if (print)
-                                       _print_next_block((*par_num)++,
-                                                         "MCP SCPAD");
+                               (*par_num)++;
                                /* clear latched SCPAD PATIRY from MCP */
                                REG_WR(bp, MISC_REG_AEU_CLR_LATCH_SIGNAL,
                                       1UL << 10);
@@ -4931,6 +4929,7 @@ static bool bnx2x_parity_attn(struct bnx2x *bp, bool *global, bool print,
            (sig[3] & HW_PRTY_ASSERT_SET_3) ||
            (sig[4] & HW_PRTY_ASSERT_SET_4)) {
                int par_num = 0;
+
                DP(NETIF_MSG_HW, "Was parity error: HW block parity attention:\n"
                                 "[0]:0x%08x [1]:0x%08x [2]:0x%08x [3]:0x%08x [4]:0x%08x\n",
                          sig[0] & HW_PRTY_ASSERT_SET_0,
@@ -4938,9 +4937,18 @@ static bool bnx2x_parity_attn(struct bnx2x *bp, bool *global, bool print,
                          sig[2] & HW_PRTY_ASSERT_SET_2,
                          sig[3] & HW_PRTY_ASSERT_SET_3,
                          sig[4] & HW_PRTY_ASSERT_SET_4);
-               if (print)
-                       netdev_err(bp->dev,
-                                  "Parity errors detected in blocks: ");
+               if (print) {
+                       if (((sig[0] & HW_PRTY_ASSERT_SET_0) ||
+                            (sig[1] & HW_PRTY_ASSERT_SET_1) ||
+                            (sig[2] & HW_PRTY_ASSERT_SET_2) ||
+                            (sig[4] & HW_PRTY_ASSERT_SET_4)) ||
+                            (sig[3] & HW_PRTY_ASSERT_SET_3_WITHOUT_SCPAD)) {
+                               netdev_err(bp->dev,
+                                          "Parity errors detected in blocks: ");
+                       } else {
+                               print = false;
+                       }
+               }
                res |= bnx2x_check_blocks_with_parity0(bp,
                        sig[0] & HW_PRTY_ASSERT_SET_0, &par_num, print);
                res |= bnx2x_check_blocks_with_parity1(bp,
@@ -8431,7 +8439,7 @@ int bnx2x_set_eth_mac(struct bnx2x *bp, bool set)
                                         BNX2X_ETH_MAC, &ramrod_flags);
        } else { /* vf */
                return bnx2x_vfpf_config_mac(bp, bp->dev->dev_addr,
-                                            bp->fp->index, true);
+                                            bp->fp->index, set);
        }
 }
 
@@ -9323,7 +9331,8 @@ unload_error:
         * function stop ramrod is sent, since as part of this ramrod FW access
         * PTP registers.
         */
-       bnx2x_stop_ptp(bp);
+       if (bp->flags & PTP_SUPPORTED)
+               bnx2x_stop_ptp(bp);
 
        /* Disable HW interrupts, NAPI */
        bnx2x_netif_stop(bp, 1);
@@ -11147,6 +11156,12 @@ static void bnx2x_link_settings_requested(struct bnx2x *bp)
                                bp->port.advertising[idx] |=
                                        (ADVERTISED_1000baseT_Full |
                                         ADVERTISED_TP);
+                       } else if (bp->port.supported[idx] &
+                                  SUPPORTED_1000baseKX_Full) {
+                               bp->link_params.req_line_speed[idx] =
+                                       SPEED_1000;
+                               bp->port.advertising[idx] |=
+                                       ADVERTISED_1000baseKX_Full;
                        } else {
                                BNX2X_ERR("NVRAM config error. Invalid link_config 0x%x  speed_cap_mask 0x%x\n",
                                    link_config,
@@ -11179,6 +11194,13 @@ static void bnx2x_link_settings_requested(struct bnx2x *bp)
                                bp->port.advertising[idx] |=
                                        (ADVERTISED_10000baseT_Full |
                                                ADVERTISED_FIBRE);
+                       } else if (bp->port.supported[idx] &
+                                  SUPPORTED_10000baseKR_Full) {
+                               bp->link_params.req_line_speed[idx] =
+                                       SPEED_10000;
+                               bp->port.advertising[idx] |=
+                                       (ADVERTISED_10000baseKR_Full |
+                                               ADVERTISED_FIBRE);
                        } else {
                                BNX2X_ERR("NVRAM config error. Invalid link_config 0x%x  speed_cap_mask 0x%x\n",
                                    link_config,
index 07cdf9bbffef2ee85ff3d589f33c1a10aea405d5..4ad415ac8cfe4a56ffd00858d1f70d9f1ab01456 100644 (file)
@@ -424,7 +424,7 @@ static void __bnx2x_vlan_mac_h_exec_pending(struct bnx2x *bp,
        o->head_exe_request = false;
        o->saved_ramrod_flags = 0;
        rc = bnx2x_exe_queue_step(bp, &o->exe_queue, &ramrod_flags);
-       if (rc != 0) {
+       if ((rc != 0) && (rc != 1)) {
                BNX2X_ERR("execution of pending commands failed with rc %d\n",
                          rc);
 #ifdef BNX2X_STOP_ON_ERROR
index 6f2887a5e0be693d625b6328349a2ad3b66d19ba..6159deab8c9850a0231ef3b3f1fad6dfaa31a588 100644 (file)
@@ -594,6 +594,7 @@ struct bcmgenet_priv {
        wait_queue_head_t wq;
        struct phy_device *phydev;
        struct device_node *phy_dn;
+       struct device_node *mdio_dn;
        struct mii_bus *mii_bus;
        u16 gphy_rev;
        struct clk *clk_eee;
index 6bef04e2f7354b2a9cadeac7eda5ea66e2fd25da..adf23d2ac4888e89f63c4246e7c3b33eaf3d0fd0 100644 (file)
@@ -408,6 +408,52 @@ static int bcmgenet_mii_probe(struct net_device *dev)
        return 0;
 }
 
+/* Workaround for integrated BCM7xxx Gigabit PHYs which have a problem with
+ * their internal MDIO management controller making them fail to successfully
+ * be read from or written to for the first transaction.  We insert a dummy
+ * BMSR read here to make sure that phy_get_device() and get_phy_id() can
+ * correctly read the PHY MII_PHYSID1/2 registers and successfully register a
+ * PHY device for this peripheral.
+ *
+ * Once the PHY driver is registered, we can workaround subsequent reads from
+ * there (e.g: during system-wide power management).
+ *
+ * bus->reset is invoked before mdiobus_scan during mdiobus_register and is
+ * therefore the right location to stick that workaround. Since we do not want
+ * to read from non-existing PHYs, we either use bus->phy_mask or do a manual
+ * Device Tree scan to limit the search area.
+ */
+static int bcmgenet_mii_bus_reset(struct mii_bus *bus)
+{
+       struct net_device *dev = bus->priv;
+       struct bcmgenet_priv *priv = netdev_priv(dev);
+       struct device_node *np = priv->mdio_dn;
+       struct device_node *child = NULL;
+       u32 read_mask = 0;
+       int addr = 0;
+
+       if (!np) {
+               read_mask = 1 << priv->phy_addr;
+       } else {
+               for_each_available_child_of_node(np, child) {
+                       addr = of_mdio_parse_addr(&dev->dev, child);
+                       if (addr < 0)
+                               continue;
+
+                       read_mask |= 1 << addr;
+               }
+       }
+
+       for (addr = 0; addr < PHY_MAX_ADDR; addr++) {
+               if (read_mask & 1 << addr) {
+                       dev_dbg(&dev->dev, "Workaround for PHY @ %d\n", addr);
+                       mdiobus_read(bus, addr, MII_BMSR);
+               }
+       }
+
+       return 0;
+}
+
 static int bcmgenet_mii_alloc(struct bcmgenet_priv *priv)
 {
        struct mii_bus *bus;
@@ -427,6 +473,7 @@ static int bcmgenet_mii_alloc(struct bcmgenet_priv *priv)
        bus->parent = &priv->pdev->dev;
        bus->read = bcmgenet_mii_read;
        bus->write = bcmgenet_mii_write;
+       bus->reset = bcmgenet_mii_bus_reset;
        snprintf(bus->id, MII_BUS_ID_SIZE, "%s-%d",
                 priv->pdev->name, priv->pdev->id);
 
@@ -443,7 +490,6 @@ static int bcmgenet_mii_of_init(struct bcmgenet_priv *priv)
 {
        struct device_node *dn = priv->pdev->dev.of_node;
        struct device *kdev = &priv->pdev->dev;
-       struct device_node *mdio_dn;
        char *compat;
        int ret;
 
@@ -451,14 +497,14 @@ static int bcmgenet_mii_of_init(struct bcmgenet_priv *priv)
        if (!compat)
                return -ENOMEM;
 
-       mdio_dn = of_find_compatible_node(dn, NULL, compat);
+       priv->mdio_dn = of_find_compatible_node(dn, NULL, compat);
        kfree(compat);
-       if (!mdio_dn) {
+       if (!priv->mdio_dn) {
                dev_err(kdev, "unable to find MDIO bus node\n");
                return -ENODEV;
        }
 
-       ret = of_mdiobus_register(priv->mii_bus, mdio_dn);
+       ret = of_mdiobus_register(priv->mii_bus, priv->mdio_dn);
        if (ret) {
                dev_err(kdev, "failed to register MDIO bus\n");
                return ret;
index 160f8077692ce487a7d8cb87aa41d62dc21c1ddc..29f33083178431ac3735094683663d1e4ab2b836 100644 (file)
@@ -434,8 +434,9 @@ static int lio_set_phys_id(struct net_device *netdev,
                        if (ret)
                                return ret;
 
-                       octnet_mdio45_access(lio, 1, LIO68XX_LED_BEACON_ADDR,
-                                            &lio->phy_beacon_val);
+                       ret = octnet_mdio45_access(lio, 1,
+                                                  LIO68XX_LED_BEACON_ADDR,
+                                                  &lio->phy_beacon_val);
                        if (ret)
                                return ret;
 
index 0d3106b464b29548ddca47eaebb56d08e879b552..f67641a2ff9eff652a7998f4c6d8f8fab873fc63 100644 (file)
@@ -650,14 +650,12 @@ void octeon_free_device_mem(struct octeon_device *oct)
 
        for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES; i++) {
                /* could check  mask as well */
-               if (oct->droq[i])
-                       vfree(oct->droq[i]);
+               vfree(oct->droq[i]);
        }
 
        for (i = 0; i < MAX_OCTEON_INSTR_QUEUES; i++) {
                /* could check mask as well */
-               if (oct->instr_queue[i])
-                       vfree(oct->instr_queue[i]);
+               vfree(oct->instr_queue[i]);
        }
 
        i = oct->octeon_id;
@@ -1078,10 +1076,7 @@ octeon_unregister_dispatch_fn(struct octeon_device *oct, u16 opcode,
                oct->dispatch.count--;
 
        spin_unlock_bh(&oct->dispatch.lock);
-
-       if (dfree)
-               vfree(dfree);
-
+       vfree(dfree);
        return retval;
 }
 
index 94b502a0cf33e54b954768688c3e35c056ed1d55..4dba86eaa04559649b012cbeff8707c47a176927 100644 (file)
@@ -216,9 +216,7 @@ int octeon_delete_droq(struct octeon_device *oct, u32 q_no)
        dev_dbg(&oct->pci_dev->dev, "%s[%d]\n", __func__, q_no);
 
        octeon_droq_destroy_ring_buffers(oct, droq);
-
-       if (droq->recv_buf_list)
-               vfree(droq->recv_buf_list);
+       vfree(droq->recv_buf_list);
 
        if (droq->info_base_addr)
                cnnic_free_aligned_dma(oct->pci_dev, droq->info_list,
index 356796bf9b871e82f4e300206e24757072bfa870..a2a24652c8f32826882f82910b76d38c8df49593 100644 (file)
@@ -175,8 +175,7 @@ int octeon_delete_instr_queue(struct octeon_device *oct, u32 iq_no)
                desc_size =
                    CFG_GET_IQ_INSTR_TYPE(CHIP_FIELD(oct, cn6xxx, conf));
 
-       if (iq->request_list)
-               vfree(iq->request_list);
+       vfree(iq->request_list);
 
        if (iq->base_addr) {
                q_size = iq->max_count * desc_size;
index eadae1b412c652974dde24a9a76c5d74a8c3fa29..da2004e2a74176959ece42065a26267aa2924a2b 100644 (file)
@@ -1208,7 +1208,7 @@ static int enic_poll(struct napi_struct *napi, int budget)
                napi_complete(napi);
                vnic_intr_unmask(&enic->intr[intr]);
        }
-       enic_poll_unlock_napi(&enic->rq[cq_rq]);
+       enic_poll_unlock_napi(&enic->rq[cq_rq], napi);
 
        return rq_work_done;
 }
@@ -1414,7 +1414,7 @@ static int enic_poll_msix_rq(struct napi_struct *napi, int budget)
                 */
                enic_calc_int_moderation(enic, &enic->rq[rq]);
 
-       enic_poll_unlock_napi(&enic->rq[rq]);
+       enic_poll_unlock_napi(&enic->rq[rq], napi);
        if (work_done < work_to_do) {
 
                /* Some work done, but not enough to stay in polling,
index 8111d5202df2f38c26a8c241a7bb1c1e7cda8228..b9c82f143d7e099948c9bd5e540fee64eeb68b46 100644 (file)
@@ -21,6 +21,7 @@
 #define _VNIC_RQ_H_
 
 #include <linux/pci.h>
+#include <linux/netdevice.h>
 
 #include "vnic_dev.h"
 #include "vnic_cq.h"
@@ -75,6 +76,12 @@ struct vnic_rq_buf {
        uint64_t wr_id;
 };
 
+enum enic_poll_state {
+       ENIC_POLL_STATE_IDLE,
+       ENIC_POLL_STATE_NAPI,
+       ENIC_POLL_STATE_POLL
+};
+
 struct vnic_rq {
        unsigned int index;
        struct vnic_dev *vdev;
@@ -86,19 +93,7 @@ struct vnic_rq {
        void *os_buf_head;
        unsigned int pkts_outstanding;
 #ifdef CONFIG_NET_RX_BUSY_POLL
-#define ENIC_POLL_STATE_IDLE           0
-#define ENIC_POLL_STATE_NAPI           (1 << 0) /* NAPI owns this poll */
-#define ENIC_POLL_STATE_POLL           (1 << 1) /* poll owns this poll */
-#define ENIC_POLL_STATE_NAPI_YIELD     (1 << 2) /* NAPI yielded this poll */
-#define ENIC_POLL_STATE_POLL_YIELD     (1 << 3) /* poll yielded this poll */
-#define ENIC_POLL_YIELD                        (ENIC_POLL_STATE_NAPI_YIELD |   \
-                                        ENIC_POLL_STATE_POLL_YIELD)
-#define ENIC_POLL_LOCKED               (ENIC_POLL_STATE_NAPI |         \
-                                        ENIC_POLL_STATE_POLL)
-#define ENIC_POLL_USER_PEND            (ENIC_POLL_STATE_POLL |         \
-                                        ENIC_POLL_STATE_POLL_YIELD)
-       unsigned int bpoll_state;
-       spinlock_t bpoll_lock;
+       atomic_t bpoll_state;
 #endif /* CONFIG_NET_RX_BUSY_POLL */
 };
 
@@ -215,76 +210,43 @@ static inline int vnic_rq_fill(struct vnic_rq *rq,
 #ifdef CONFIG_NET_RX_BUSY_POLL
 static inline void enic_busy_poll_init_lock(struct vnic_rq *rq)
 {
-       spin_lock_init(&rq->bpoll_lock);
-       rq->bpoll_state = ENIC_POLL_STATE_IDLE;
+       atomic_set(&rq->bpoll_state, ENIC_POLL_STATE_IDLE);
 }
 
 static inline bool enic_poll_lock_napi(struct vnic_rq *rq)
 {
-       bool rc = true;
-
-       spin_lock(&rq->bpoll_lock);
-       if (rq->bpoll_state & ENIC_POLL_LOCKED) {
-               WARN_ON(rq->bpoll_state & ENIC_POLL_STATE_NAPI);
-               rq->bpoll_state |= ENIC_POLL_STATE_NAPI_YIELD;
-               rc = false;
-       } else {
-               rq->bpoll_state = ENIC_POLL_STATE_NAPI;
-       }
-       spin_unlock(&rq->bpoll_lock);
+       int rc = atomic_cmpxchg(&rq->bpoll_state, ENIC_POLL_STATE_IDLE,
+                               ENIC_POLL_STATE_NAPI);
 
-       return rc;
+       return (rc == ENIC_POLL_STATE_IDLE);
 }
 
-static inline bool enic_poll_unlock_napi(struct vnic_rq *rq)
+static inline void enic_poll_unlock_napi(struct vnic_rq *rq,
+                                        struct napi_struct *napi)
 {
-       bool rc = false;
-
-       spin_lock(&rq->bpoll_lock);
-       WARN_ON(rq->bpoll_state &
-               (ENIC_POLL_STATE_POLL | ENIC_POLL_STATE_NAPI_YIELD));
-       if (rq->bpoll_state & ENIC_POLL_STATE_POLL_YIELD)
-               rc = true;
-       rq->bpoll_state = ENIC_POLL_STATE_IDLE;
-       spin_unlock(&rq->bpoll_lock);
-
-       return rc;
+       WARN_ON(atomic_read(&rq->bpoll_state) != ENIC_POLL_STATE_NAPI);
+       napi_gro_flush(napi, false);
+       atomic_set(&rq->bpoll_state, ENIC_POLL_STATE_IDLE);
 }
 
 static inline bool enic_poll_lock_poll(struct vnic_rq *rq)
 {
-       bool rc = true;
-
-       spin_lock_bh(&rq->bpoll_lock);
-       if (rq->bpoll_state & ENIC_POLL_LOCKED) {
-               rq->bpoll_state |= ENIC_POLL_STATE_POLL_YIELD;
-               rc = false;
-       } else {
-               rq->bpoll_state |= ENIC_POLL_STATE_POLL;
-       }
-       spin_unlock_bh(&rq->bpoll_lock);
+       int rc = atomic_cmpxchg(&rq->bpoll_state, ENIC_POLL_STATE_IDLE,
+                               ENIC_POLL_STATE_POLL);
 
-       return rc;
+       return (rc == ENIC_POLL_STATE_IDLE);
 }
 
-static inline bool enic_poll_unlock_poll(struct vnic_rq *rq)
-{
-       bool rc = false;
 
-       spin_lock_bh(&rq->bpoll_lock);
-       WARN_ON(rq->bpoll_state & ENIC_POLL_STATE_NAPI);
-       if (rq->bpoll_state & ENIC_POLL_STATE_POLL_YIELD)
-               rc = true;
-       rq->bpoll_state = ENIC_POLL_STATE_IDLE;
-       spin_unlock_bh(&rq->bpoll_lock);
-
-       return rc;
+static inline void enic_poll_unlock_poll(struct vnic_rq *rq)
+{
+       WARN_ON(atomic_read(&rq->bpoll_state) != ENIC_POLL_STATE_POLL);
+       atomic_set(&rq->bpoll_state, ENIC_POLL_STATE_IDLE);
 }
 
 static inline bool enic_poll_busy_polling(struct vnic_rq *rq)
 {
-       WARN_ON(!(rq->bpoll_state & ENIC_POLL_LOCKED));
-       return rq->bpoll_state & ENIC_POLL_USER_PEND;
+       return atomic_read(&rq->bpoll_state) & ENIC_POLL_STATE_POLL;
 }
 
 #else
@@ -298,7 +260,8 @@ static inline bool enic_poll_lock_napi(struct vnic_rq *rq)
        return true;
 }
 
-static inline bool enic_poll_unlock_napi(struct vnic_rq *rq)
+static inline bool enic_poll_unlock_napi(struct vnic_rq *rq,
+                                        struct napi_struct *napi)
 {
        return false;
 }
index b8de87b03046a13d2f1eff527137446de5081a41..ff76d4e9dc1ba5eab90413f82592a876092a8ddf 100644 (file)
@@ -83,12 +83,12 @@ config UGETH_TX_ON_DEMAND
 
 config GIANFAR
        tristate "Gianfar Ethernet"
-       depends on FSL_SOC
        select FSL_PQ_MDIO
        select PHYLIB
        select CRC32
        ---help---
          This driver supports the Gigabit TSEC on the MPC83xx, MPC85xx,
-         and MPC86xx family of chips, and the FEC on the 8540.
+         and MPC86xx family of chips, the eTSEC on LS1021A and the FEC
+         on the 8540.
 
 endif # NET_VENDOR_FREESCALE
index a86af8a7485dad1be3caf4a55b6d77c7c7b5c884..1eee73cccdf58deba85c810399930ffa55dfa03c 100644 (file)
@@ -428,6 +428,8 @@ struct bufdesc_ex {
 #define FEC_QUIRK_BUG_CAPTURE          (1 << 10)
 /* Controller has only one MDIO bus */
 #define FEC_QUIRK_SINGLE_MDIO          (1 << 11)
+/* Controller supports RACC register */
+#define FEC_QUIRK_HAS_RACC             (1 << 12)
 
 struct fec_enet_priv_tx_q {
        int index;
index e464aeaeed2cd9ece504a2e1494b2869dfadd138..1f89c59b43535f9b65e946c7468cb1fcb13a2022 100644 (file)
@@ -85,28 +85,30 @@ static struct platform_device_id fec_devtype[] = {
                .driver_data = 0,
        }, {
                .name = "imx25-fec",
-               .driver_data = FEC_QUIRK_USE_GASKET,
+               .driver_data = FEC_QUIRK_USE_GASKET | FEC_QUIRK_HAS_RACC,
        }, {
                .name = "imx27-fec",
-               .driver_data = 0,
+               .driver_data = FEC_QUIRK_HAS_RACC,
        }, {
                .name = "imx28-fec",
                .driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_SWAP_FRAME |
-                               FEC_QUIRK_SINGLE_MDIO,
+                               FEC_QUIRK_SINGLE_MDIO | FEC_QUIRK_HAS_RACC,
        }, {
                .name = "imx6q-fec",
                .driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_GBIT |
                                FEC_QUIRK_HAS_BUFDESC_EX | FEC_QUIRK_HAS_CSUM |
-                               FEC_QUIRK_HAS_VLAN | FEC_QUIRK_ERR006358,
+                               FEC_QUIRK_HAS_VLAN | FEC_QUIRK_ERR006358 |
+                               FEC_QUIRK_HAS_RACC,
        }, {
                .name = "mvf600-fec",
-               .driver_data = FEC_QUIRK_ENET_MAC,
+               .driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_RACC,
        }, {
                .name = "imx6sx-fec",
                .driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_GBIT |
                                FEC_QUIRK_HAS_BUFDESC_EX | FEC_QUIRK_HAS_CSUM |
                                FEC_QUIRK_HAS_VLAN | FEC_QUIRK_HAS_AVB |
-                               FEC_QUIRK_ERR007885 | FEC_QUIRK_BUG_CAPTURE,
+                               FEC_QUIRK_ERR007885 | FEC_QUIRK_BUG_CAPTURE |
+                               FEC_QUIRK_HAS_RACC,
        }, {
                /* sentinel */
        }
@@ -970,13 +972,15 @@ fec_restart(struct net_device *ndev)
        writel(fep->phy_speed, fep->hwp + FEC_MII_SPEED);
 
 #if !defined(CONFIG_M5272)
-       /* set RX checksum */
-       val = readl(fep->hwp + FEC_RACC);
-       if (fep->csum_flags & FLAG_RX_CSUM_ENABLED)
-               val |= FEC_RACC_OPTIONS;
-       else
-               val &= ~FEC_RACC_OPTIONS;
-       writel(val, fep->hwp + FEC_RACC);
+       if (fep->quirks & FEC_QUIRK_HAS_RACC) {
+               /* set RX checksum */
+               val = readl(fep->hwp + FEC_RACC);
+               if (fep->csum_flags & FLAG_RX_CSUM_ENABLED)
+                       val |= FEC_RACC_OPTIONS;
+               else
+                       val &= ~FEC_RACC_OPTIONS;
+               writel(val, fep->hwp + FEC_RACC);
+       }
 #endif
 
        /*
index ff2903652f4bbc5ffafcedcd5cb501f31b0436e9..c3b6af83f070e40f083d7580055242fc053c11a2 100644 (file)
@@ -1028,7 +1028,7 @@ static struct net_device_stats *ipg_nic_get_stats(struct net_device *dev)
 
        /* detailed rx_errors */
        sp->stats.rx_length_errors += ipg_r16(IPG_INRANGELENGTHERRORS) +
-               ipg_r16(IPG_FRAMETOOLONGERRRORS);
+               ipg_r16(IPG_FRAMETOOLONGERRORS);
        sp->stats.rx_crc_errors += ipg_r16(IPG_FRAMECHECKSEQERRORS);
 
        /* Unutilized IPG statistic registers. */
index a21e4f5702b57800271bfc8e5d340dd795dbfa58..de606281f97befcc63e1925875a0686f31faff26 100644 (file)
@@ -102,7 +102,7 @@ enum ipg_regs {
 #define        IPG_MCSTFRAMESRCVDOK            0xB8
 #define        IPG_BCSTFRAMESRCVDOK            0xBE
 #define        IPG_MACCONTROLFRAMESRCVD        0xC6
-#define        IPG_FRAMETOOLONGERRRORS         0xC8
+#define        IPG_FRAMETOOLONGERRORS          0xC8
 #define        IPG_INRANGELENGTHERRORS         0xCA
 #define        IPG_FRAMECHECKSEQERRORS         0xCC
 #define        IPG_FRAMESLOSTRXERRORS          0xCE
index b074b9a667b32cceae00965a0031632431e972fe..91a5a0ae9cd73932648492ce532b0e1260f1419c 100644 (file)
@@ -237,17 +237,19 @@ static bool e1000_phy_is_accessible_pchlan(struct e1000_hw *hw)
        if (ret_val)
                return false;
 out:
-       if ((hw->mac.type == e1000_pch_lpt) ||
-           (hw->mac.type == e1000_pch_spt)) {
-               /* Unforce SMBus mode in PHY */
-               e1e_rphy_locked(hw, CV_SMB_CTRL, &phy_reg);
-               phy_reg &= ~CV_SMB_CTRL_FORCE_SMBUS;
-               e1e_wphy_locked(hw, CV_SMB_CTRL, phy_reg);
+       if ((hw->mac.type == e1000_pch_lpt) || (hw->mac.type == e1000_pch_spt)) {
+               /* Only unforce SMBus if ME is not active */
+               if (!(er32(FWSM) & E1000_ICH_FWSM_FW_VALID)) {
+                       /* Unforce SMBus mode in PHY */
+                       e1e_rphy_locked(hw, CV_SMB_CTRL, &phy_reg);
+                       phy_reg &= ~CV_SMB_CTRL_FORCE_SMBUS;
+                       e1e_wphy_locked(hw, CV_SMB_CTRL, phy_reg);
 
-               /* Unforce SMBus mode in MAC */
-               mac_reg = er32(CTRL_EXT);
-               mac_reg &= ~E1000_CTRL_EXT_FORCE_SMBUS;
-               ew32(CTRL_EXT, mac_reg);
+                       /* Unforce SMBus mode in MAC */
+                       mac_reg = er32(CTRL_EXT);
+                       mac_reg &= ~E1000_CTRL_EXT_FORCE_SMBUS;
+                       ew32(CTRL_EXT, mac_reg);
+               }
        }
 
        return true;
@@ -1087,6 +1089,7 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx)
        u32 mac_reg;
        s32 ret_val = 0;
        u16 phy_reg;
+       u16 oem_reg = 0;
 
        if ((hw->mac.type < e1000_pch_lpt) ||
            (hw->adapter->pdev->device == E1000_DEV_ID_PCH_LPT_I217_LM) ||
@@ -1128,33 +1131,37 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx)
        if (ret_val)
                goto out;
 
+       /* Force SMBus mode in PHY */
+       ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &phy_reg);
+       if (ret_val)
+               goto release;
+       phy_reg |= CV_SMB_CTRL_FORCE_SMBUS;
+       e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, phy_reg);
+
+       /* Force SMBus mode in MAC */
+       mac_reg = er32(CTRL_EXT);
+       mac_reg |= E1000_CTRL_EXT_FORCE_SMBUS;
+       ew32(CTRL_EXT, mac_reg);
+
        /* Si workaround for ULP entry flow on i127/rev6 h/w.  Enable
         * LPLU and disable Gig speed when entering ULP
         */
        if ((hw->phy.type == e1000_phy_i217) && (hw->phy.revision == 6)) {
                ret_val = e1000_read_phy_reg_hv_locked(hw, HV_OEM_BITS,
-                                                      &phy_reg);
+                                                      &oem_reg);
                if (ret_val)
                        goto release;
+
+               phy_reg = oem_reg;
                phy_reg |= HV_OEM_BITS_LPLU | HV_OEM_BITS_GBE_DIS;
+
                ret_val = e1000_write_phy_reg_hv_locked(hw, HV_OEM_BITS,
                                                        phy_reg);
+
                if (ret_val)
                        goto release;
        }
 
-       /* Force SMBus mode in PHY */
-       ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &phy_reg);
-       if (ret_val)
-               goto release;
-       phy_reg |= CV_SMB_CTRL_FORCE_SMBUS;
-       e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, phy_reg);
-
-       /* Force SMBus mode in MAC */
-       mac_reg = er32(CTRL_EXT);
-       mac_reg |= E1000_CTRL_EXT_FORCE_SMBUS;
-       ew32(CTRL_EXT, mac_reg);
-
        /* Set Inband ULP Exit, Reset to SMBus mode and
         * Disable SMBus Release on PERST# in PHY
         */
@@ -1166,10 +1173,15 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx)
        if (to_sx) {
                if (er32(WUFC) & E1000_WUFC_LNKC)
                        phy_reg |= I218_ULP_CONFIG1_WOL_HOST;
+               else
+                       phy_reg &= ~I218_ULP_CONFIG1_WOL_HOST;
 
                phy_reg |= I218_ULP_CONFIG1_STICKY_ULP;
+               phy_reg &= ~I218_ULP_CONFIG1_INBAND_EXIT;
        } else {
                phy_reg |= I218_ULP_CONFIG1_INBAND_EXIT;
+               phy_reg &= ~I218_ULP_CONFIG1_STICKY_ULP;
+               phy_reg &= ~I218_ULP_CONFIG1_WOL_HOST;
        }
        e1000_write_phy_reg_hv_locked(hw, I218_ULP_CONFIG1, phy_reg);
 
@@ -1181,6 +1193,15 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx)
        /* Commit ULP changes in PHY by starting auto ULP configuration */
        phy_reg |= I218_ULP_CONFIG1_START;
        e1000_write_phy_reg_hv_locked(hw, I218_ULP_CONFIG1, phy_reg);
+
+       if ((hw->phy.type == e1000_phy_i217) && (hw->phy.revision == 6) &&
+           to_sx && (er32(STATUS) & E1000_STATUS_LU)) {
+               ret_val = e1000_write_phy_reg_hv_locked(hw, HV_OEM_BITS,
+                                                       oem_reg);
+               if (ret_val)
+                       goto release;
+       }
+
 release:
        hw->phy.ops.release(hw);
 out:
@@ -1379,16 +1400,20 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
        if (((hw->mac.type == e1000_pch2lan) ||
             (hw->mac.type == e1000_pch_lpt) ||
             (hw->mac.type == e1000_pch_spt)) && link) {
-               u32 reg;
+               u16 speed, duplex;
 
-               reg = er32(STATUS);
+               e1000e_get_speed_and_duplex_copper(hw, &speed, &duplex);
                tipg_reg = er32(TIPG);
                tipg_reg &= ~E1000_TIPG_IPGT_MASK;
 
-               if (!(reg & (E1000_STATUS_FD | E1000_STATUS_SPEED_MASK))) {
+               if (duplex == HALF_DUPLEX && speed == SPEED_10) {
                        tipg_reg |= 0xFF;
                        /* Reduce Rx latency in analog PHY */
                        emi_val = 0;
+               } else if (hw->mac.type == e1000_pch_spt &&
+                          duplex == FULL_DUPLEX && speed != SPEED_1000) {
+                       tipg_reg |= 0xC;
+                       emi_val = 1;
                } else {
 
                        /* Roll back the default values */
@@ -1412,14 +1437,59 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
 
                if (ret_val)
                        return ret_val;
+
+               if (hw->mac.type == e1000_pch_spt) {
+                       u16 data;
+                       u16 ptr_gap;
+
+                       if (speed == SPEED_1000) {
+                               ret_val = hw->phy.ops.acquire(hw);
+                               if (ret_val)
+                                       return ret_val;
+
+                               ret_val = e1e_rphy_locked(hw,
+                                                         PHY_REG(776, 20),
+                                                         &data);
+                               if (ret_val) {
+                                       hw->phy.ops.release(hw);
+                                       return ret_val;
+                               }
+
+                               ptr_gap = (data & (0x3FF << 2)) >> 2;
+                               if (ptr_gap < 0x18) {
+                                       data &= ~(0x3FF << 2);
+                                       data |= (0x18 << 2);
+                                       ret_val =
+                                           e1e_wphy_locked(hw,
+                                                           PHY_REG(776, 20),
+                                                           data);
+                               }
+                               hw->phy.ops.release(hw);
+                               if (ret_val)
+                                       return ret_val;
+                       }
+               }
+       }
+
+       /* I217 Packet Loss issue:
+        * ensure that FEXTNVM4 Beacon Duration is set correctly
+        * on power up.
+        * Set the Beacon Duration for I217 to 8 usec
+        */
+       if ((hw->mac.type == e1000_pch_lpt) || (hw->mac.type == e1000_pch_spt)) {
+               u32 mac_reg;
+
+               mac_reg = er32(FEXTNVM4);
+               mac_reg &= ~E1000_FEXTNVM4_BEACON_DURATION_MASK;
+               mac_reg |= E1000_FEXTNVM4_BEACON_DURATION_8USEC;
+               ew32(FEXTNVM4, mac_reg);
        }
 
        /* Work-around I218 hang issue */
        if ((hw->adapter->pdev->device == E1000_DEV_ID_PCH_LPTLP_I218_LM) ||
            (hw->adapter->pdev->device == E1000_DEV_ID_PCH_LPTLP_I218_V) ||
            (hw->adapter->pdev->device == E1000_DEV_ID_PCH_I218_LM3) ||
-           (hw->adapter->pdev->device == E1000_DEV_ID_PCH_I218_V3) ||
-           (hw->mac.type == e1000_pch_spt)) {
+           (hw->adapter->pdev->device == E1000_DEV_ID_PCH_I218_V3)) {
                ret_val = e1000_k1_workaround_lpt_lp(hw, link);
                if (ret_val)
                        return ret_val;
index e62b9dcb91fe51309ff7280a20fcbd4d37000548..89d788d8f263e5c362c10166dc76fa59f517e12c 100644 (file)
@@ -6354,13 +6354,14 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool runtime)
 }
 
 /**
- * e1000e_disable_aspm - Disable ASPM states
+ * __e1000e_disable_aspm - Disable ASPM states
  * @pdev: pointer to PCI device struct
  * @state: bit-mask of ASPM states to disable
+ * @locked: indication if this context holds pci_bus_sem locked.
  *
  * Some devices *must* have certain ASPM states disabled per hardware errata.
  **/
-static void e1000e_disable_aspm(struct pci_dev *pdev, u16 state)
+static void __e1000e_disable_aspm(struct pci_dev *pdev, u16 state, int locked)
 {
        struct pci_dev *parent = pdev->bus->self;
        u16 aspm_dis_mask = 0;
@@ -6399,7 +6400,10 @@ static void e1000e_disable_aspm(struct pci_dev *pdev, u16 state)
                 "L1" : "");
 
 #ifdef CONFIG_PCIEASPM
-       pci_disable_link_state_locked(pdev, state);
+       if (locked)
+               pci_disable_link_state_locked(pdev, state);
+       else
+               pci_disable_link_state(pdev, state);
 
        /* Double-check ASPM control.  If not disabled by the above, the
         * BIOS is preventing that from happening (or CONFIG_PCIEASPM is
@@ -6422,6 +6426,32 @@ static void e1000e_disable_aspm(struct pci_dev *pdev, u16 state)
                                           aspm_dis_mask);
 }
 
+/**
+ * e1000e_disable_aspm - Disable ASPM states.
+ * @pdev: pointer to PCI device struct
+ * @state: bit-mask of ASPM states to disable
+ *
+ * This function acquires the pci_bus_sem!
+ * Some devices *must* have certain ASPM states disabled per hardware errata.
+ **/
+static void e1000e_disable_aspm(struct pci_dev *pdev, u16 state)
+{
+       __e1000e_disable_aspm(pdev, state, 0);
+}
+
+/**
+ * e1000e_disable_aspm_locked   Disable ASPM states.
+ * @pdev: pointer to PCI device struct
+ * @state: bit-mask of ASPM states to disable
+ *
+ * This function must be called with pci_bus_sem acquired!
+ * Some devices *must* have certain ASPM states disabled per hardware errata.
+ **/
+static void e1000e_disable_aspm_locked(struct pci_dev *pdev, u16 state)
+{
+       __e1000e_disable_aspm(pdev, state, 1);
+}
+
 #ifdef CONFIG_PM
 static int __e1000_resume(struct pci_dev *pdev)
 {
@@ -6435,7 +6465,7 @@ static int __e1000_resume(struct pci_dev *pdev)
        if (adapter->flags2 & FLAG2_DISABLE_ASPM_L1)
                aspm_disable_flag |= PCIE_LINK_STATE_L1;
        if (aspm_disable_flag)
-               e1000e_disable_aspm(pdev, aspm_disable_flag);
+               e1000e_disable_aspm_locked(pdev, aspm_disable_flag);
 
        pci_set_master(pdev);
 
index f54996f196293d8cf0c1942effe40c2e0e77b77e..395f32f226c08ac924e7d3e707ef7124b2744ec5 100644 (file)
@@ -484,6 +484,8 @@ int i40evf_setup_tx_descriptors(struct i40e_ring *tx_ring)
        if (!dev)
                return -ENOMEM;
 
+       /* warn if we are about to overwrite the pointer */
+       WARN_ON(tx_ring->tx_bi);
        bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
        tx_ring->tx_bi = kzalloc(bi_size, GFP_KERNEL);
        if (!tx_ring->tx_bi)
@@ -644,6 +646,8 @@ int i40evf_setup_rx_descriptors(struct i40e_ring *rx_ring)
        struct device *dev = rx_ring->dev;
        int bi_size;
 
+       /* warn if we are about to overwrite the pointer */
+       WARN_ON(rx_ring->rx_bi);
        bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
        rx_ring->rx_bi = kzalloc(bi_size, GFP_KERNEL);
        if (!rx_ring->rx_bi)
index 1b98c25b3092ac4b753eb280a9e38bc3d1fe08bd..fea3b75a9a35fcdc58b9d5f5d0f6125dbf62e0cf 100644 (file)
@@ -264,7 +264,6 @@ extern const char i40evf_driver_version[];
 
 int i40evf_up(struct i40evf_adapter *adapter);
 void i40evf_down(struct i40evf_adapter *adapter);
-void i40evf_reinit_locked(struct i40evf_adapter *adapter);
 void i40evf_reset(struct i40evf_adapter *adapter);
 void i40evf_set_ethtool_ops(struct net_device *netdev);
 void i40evf_update_stats(struct i40evf_adapter *adapter);
index f4e77665bc54b9058c85c2c8e62add23fa49b9b8..2b53c870e7f113ca0695afab3636446e1015e4e8 100644 (file)
@@ -267,8 +267,10 @@ static int i40evf_set_ringparam(struct net_device *netdev,
        adapter->tx_desc_count = new_tx_count;
        adapter->rx_desc_count = new_rx_count;
 
-       if (netif_running(netdev))
-               i40evf_reinit_locked(adapter);
+       if (netif_running(netdev)) {
+               adapter->flags |= I40EVF_FLAG_RESET_NEEDED;
+               schedule_work(&adapter->reset_task);
+       }
 
        return 0;
 }
index 7c53aca4b5a6f0b8726c32bee935478d65e3cd47..4ab4ebba07a18e5b1b0539cf0c0b8a7122f6fdc2 100644 (file)
@@ -170,7 +170,8 @@ static void i40evf_tx_timeout(struct net_device *netdev)
        struct i40evf_adapter *adapter = netdev_priv(netdev);
 
        adapter->tx_timeout_count++;
-       if (!(adapter->flags & I40EVF_FLAG_RESET_PENDING)) {
+       if (!(adapter->flags & (I40EVF_FLAG_RESET_PENDING |
+                               I40EVF_FLAG_RESET_NEEDED))) {
                adapter->flags |= I40EVF_FLAG_RESET_NEEDED;
                schedule_work(&adapter->reset_task);
        }
@@ -1460,7 +1461,7 @@ static void i40evf_configure_rss(struct i40evf_adapter *adapter)
        for (i = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++) {
                lut = 0;
                for (j = 0; j < 4; j++) {
-                       if (cqueue == adapter->vsi_res->num_queue_pairs)
+                       if (cqueue == adapter->num_active_queues)
                                cqueue = 0;
                        lut |= ((cqueue) << (8 * j));
                        cqueue++;
@@ -1470,8 +1471,8 @@ static void i40evf_configure_rss(struct i40evf_adapter *adapter)
        i40e_flush(hw);
 }
 
-#define I40EVF_RESET_WAIT_MS 100
-#define I40EVF_RESET_WAIT_COUNT 200
+#define I40EVF_RESET_WAIT_MS 10
+#define I40EVF_RESET_WAIT_COUNT 500
 /**
  * i40evf_reset_task - Call-back task to handle hardware reset
  * @work: pointer to work_struct
@@ -1495,10 +1496,17 @@ static void i40evf_reset_task(struct work_struct *work)
                                &adapter->crit_section))
                usleep_range(500, 1000);
 
+       i40evf_misc_irq_disable(adapter);
        if (adapter->flags & I40EVF_FLAG_RESET_NEEDED) {
-               dev_info(&adapter->pdev->dev, "Requesting reset from PF\n");
+               adapter->flags &= ~I40EVF_FLAG_RESET_NEEDED;
+               /* Restart the AQ here. If we have been reset but didn't
+                * detect it, or if the PF had to reinit, our AQ will be hosed.
+                */
+               i40evf_shutdown_adminq(hw);
+               i40evf_init_adminq(hw);
                i40evf_request_reset(adapter);
        }
+       adapter->flags |= I40EVF_FLAG_RESET_PENDING;
 
        /* poll until we see the reset actually happen */
        for (i = 0; i < I40EVF_RESET_WAIT_COUNT; i++) {
@@ -1507,10 +1515,10 @@ static void i40evf_reset_task(struct work_struct *work)
                if ((rstat_val != I40E_VFR_VFACTIVE) &&
                    (rstat_val != I40E_VFR_COMPLETED))
                        break;
-               msleep(I40EVF_RESET_WAIT_MS);
+               usleep_range(500, 1000);
        }
        if (i == I40EVF_RESET_WAIT_COUNT) {
-               adapter->flags &= ~I40EVF_FLAG_RESET_PENDING;
+               dev_info(&adapter->pdev->dev, "Never saw reset\n");
                goto continue_reset; /* act like the reset happened */
        }
 
@@ -1518,11 +1526,12 @@ static void i40evf_reset_task(struct work_struct *work)
        for (i = 0; i < I40EVF_RESET_WAIT_COUNT; i++) {
                rstat_val = rd32(hw, I40E_VFGEN_RSTAT) &
                            I40E_VFGEN_RSTAT_VFR_STATE_MASK;
-               if ((rstat_val == I40E_VFR_VFACTIVE) ||
-                   (rstat_val == I40E_VFR_COMPLETED))
+               if (rstat_val == I40E_VFR_VFACTIVE)
                        break;
                msleep(I40EVF_RESET_WAIT_MS);
        }
+       /* extra wait to make sure minimum wait is met */
+       msleep(I40EVF_RESET_WAIT_MS);
        if (i == I40EVF_RESET_WAIT_COUNT) {
                struct i40evf_mac_filter *f, *ftmp;
                struct i40evf_vlan_filter *fv, *fvtmp;
@@ -1534,11 +1543,10 @@ static void i40evf_reset_task(struct work_struct *work)
 
                if (netif_running(adapter->netdev)) {
                        set_bit(__I40E_DOWN, &adapter->vsi.state);
-                       i40evf_irq_disable(adapter);
-                       i40evf_napi_disable_all(adapter);
-                       netif_tx_disable(netdev);
-                       netif_tx_stop_all_queues(netdev);
                        netif_carrier_off(netdev);
+                       netif_tx_disable(netdev);
+                       i40evf_napi_disable_all(adapter);
+                       i40evf_irq_disable(adapter);
                        i40evf_free_traffic_irqs(adapter);
                        i40evf_free_all_tx_resources(adapter);
                        i40evf_free_all_rx_resources(adapter);
@@ -1550,6 +1558,7 @@ static void i40evf_reset_task(struct work_struct *work)
                        list_del(&f->list);
                        kfree(f);
                }
+
                list_for_each_entry_safe(fv, fvtmp, &adapter->vlan_filter_list,
                                         list) {
                        list_del(&fv->list);
@@ -1564,22 +1573,27 @@ static void i40evf_reset_task(struct work_struct *work)
                i40evf_shutdown_adminq(hw);
                adapter->netdev->flags &= ~IFF_UP;
                clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section);
+               adapter->flags &= ~I40EVF_FLAG_RESET_PENDING;
+               dev_info(&adapter->pdev->dev, "Reset task did not complete, VF disabled\n");
                return; /* Do not attempt to reinit. It's dead, Jim. */
        }
 
 continue_reset:
-       adapter->flags &= ~I40EVF_FLAG_RESET_PENDING;
-
-       i40evf_irq_disable(adapter);
-
        if (netif_running(adapter->netdev)) {
-               i40evf_napi_disable_all(adapter);
-               netif_tx_disable(netdev);
-               netif_tx_stop_all_queues(netdev);
                netif_carrier_off(netdev);
+               netif_tx_stop_all_queues(netdev);
+               i40evf_napi_disable_all(adapter);
        }
+       i40evf_irq_disable(adapter);
 
        adapter->state = __I40EVF_RESETTING;
+       adapter->flags &= ~I40EVF_FLAG_RESET_PENDING;
+
+       /* free the Tx/Rx rings and descriptors, might be better to just
+        * re-use them sometime in the future
+        */
+       i40evf_free_all_rx_resources(adapter);
+       i40evf_free_all_tx_resources(adapter);
 
        /* kill and reinit the admin queue */
        if (i40evf_shutdown_adminq(hw))
@@ -1603,6 +1617,7 @@ continue_reset:
        adapter->aq_required = I40EVF_FLAG_AQ_ADD_MAC_FILTER;
        adapter->aq_required |= I40EVF_FLAG_AQ_ADD_VLAN_FILTER;
        clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section);
+       i40evf_misc_irq_enable(adapter);
 
        mod_timer(&adapter->watchdog_timer, jiffies + 2);
 
@@ -1624,7 +1639,10 @@ continue_reset:
                        goto reset_err;
 
                i40evf_irq_enable(adapter, true);
+       } else {
+               adapter->state = __I40EVF_DOWN;
        }
+
        return;
 reset_err:
        dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n");
@@ -1667,6 +1685,11 @@ static void i40evf_adminq_task(struct work_struct *work)
                        memset(event.msg_buf, 0, I40EVF_MAX_AQ_BUF_SIZE);
        } while (pending);
 
+       if ((adapter->flags &
+            (I40EVF_FLAG_RESET_PENDING | I40EVF_FLAG_RESET_NEEDED)) ||
+           adapter->state == __I40EVF_RESETTING)
+               goto freedom;
+
        /* check for error indications */
        val = rd32(hw, hw->aq.arq.len);
        oldval = val;
@@ -1702,6 +1725,7 @@ static void i40evf_adminq_task(struct work_struct *work)
        if (oldval != val)
                wr32(hw, hw->aq.asq.len, val);
 
+freedom:
        kfree(event.msg_buf);
 out:
        /* re-enable Admin queue interrupt cause */
@@ -1896,47 +1920,6 @@ static struct net_device_stats *i40evf_get_stats(struct net_device *netdev)
        return &adapter->net_stats;
 }
 
-/**
- * i40evf_reinit_locked - Software reinit
- * @adapter: board private structure
- *
- * Reinititalizes the ring structures in response to a software configuration
- * change. Roughly the same as close followed by open, but skips releasing
- * and reallocating the interrupts.
- **/
-void i40evf_reinit_locked(struct i40evf_adapter *adapter)
-{
-       struct net_device *netdev = adapter->netdev;
-       int err;
-
-       WARN_ON(in_interrupt());
-
-       i40evf_down(adapter);
-
-       /* allocate transmit descriptors */
-       err = i40evf_setup_all_tx_resources(adapter);
-       if (err)
-               goto err_reinit;
-
-       /* allocate receive descriptors */
-       err = i40evf_setup_all_rx_resources(adapter);
-       if (err)
-               goto err_reinit;
-
-       i40evf_configure(adapter);
-
-       err = i40evf_up_complete(adapter);
-       if (err)
-               goto err_reinit;
-
-       i40evf_irq_enable(adapter, true);
-       return;
-
-err_reinit:
-       dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n");
-       i40evf_close(netdev);
-}
-
 /**
  * i40evf_change_mtu - Change the Maximum Transfer Unit
  * @netdev: network interface device structure
@@ -1952,9 +1935,10 @@ static int i40evf_change_mtu(struct net_device *netdev, int new_mtu)
        if ((new_mtu < 68) || (max_frame > I40E_MAX_RXBUFFER))
                return -EINVAL;
 
-       /* must set new MTU before calling down or up */
        netdev->mtu = new_mtu;
-       i40evf_reinit_locked(adapter);
+       adapter->flags |= I40EVF_FLAG_RESET_NEEDED;
+       schedule_work(&adapter->reset_task);
+
        return 0;
 }
 
index 0f69ef81751a3d8154db558cc8f3d11e882928a0..b0182dd313464ccceb85dd19c9489fcd7b3cd9c6 100644 (file)
@@ -1,5 +1,5 @@
 /* Intel(R) Gigabit Ethernet Linux driver
- * Copyright(c) 2007-2014 Intel Corporation.
+ * Copyright(c) 2007-2015 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -1900,8 +1900,8 @@ static void igb_clear_hw_cntrs_82575(struct e1000_hw *hw)
  *  igb_rx_fifo_flush_82575 - Clean rx fifo after RX enable
  *  @hw: pointer to the HW structure
  *
- *  After rx enable if managability is enabled then there is likely some
- *  bad data at the start of the fifo and possibly in the DMA fifo.  This
+ *  After rx enable if manageability is enabled then there is likely some
+ *  bad data at the start of the fifo and possibly in the DMA fifo. This
  *  function clears the fifos and flushes any packets that came in as rx was
  *  being enabled.
  **/
@@ -1910,6 +1910,11 @@ void igb_rx_fifo_flush_82575(struct e1000_hw *hw)
        u32 rctl, rlpml, rxdctl[4], rfctl, temp_rctl, rx_enabled;
        int i, ms_wait;
 
+       /* disable IPv6 options as per hardware errata */
+       rfctl = rd32(E1000_RFCTL);
+       rfctl |= E1000_RFCTL_IPV6_EX_DIS;
+       wr32(E1000_RFCTL, rfctl);
+
        if (hw->mac.type != e1000_82575 ||
            !(rd32(E1000_MANC) & E1000_MANC_RCV_TCO_EN))
                return;
@@ -1937,7 +1942,6 @@ void igb_rx_fifo_flush_82575(struct e1000_hw *hw)
         * incoming packets are rejected.  Set enable and wait 2ms so that
         * any packet that was coming in as RCTL.EN was set is flushed
         */
-       rfctl = rd32(E1000_RFCTL);
        wr32(E1000_RFCTL, rfctl & ~E1000_RFCTL_LEF);
 
        rlpml = rd32(E1000_RLPML);
index 217f8138851bf3e229d6a0035b442e0cc3ae8175..f8684aa285be8cac987263db6676f9d1076b5f9b 100644 (file)
 #define E1000_RXCSUM_PCSD      0x00002000   /* packet checksum disabled */
 
 /* Header split receive */
-#define E1000_RFCTL_LEF        0x00040000
+#define E1000_RFCTL_IPV6_EX_DIS         0x00010000
+#define E1000_RFCTL_LEF                 0x00040000
 
 /* Collision related configuration parameters */
 #define E1000_COLLISION_THRESHOLD       15
index f287186192bb655ba2dc1a205fb251351d593e98..2f70a9b152bd1789349d9c4d995852e95be1e70d 100644 (file)
@@ -58,7 +58,7 @@
 
 #define MAJ 5
 #define MIN 2
-#define BUILD 15
+#define BUILD 18
 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
 __stringify(BUILD) "-k"
 char igb_driver_name[] = "igb";
index 5bdf78231a4e78f09c360e4a2e5c0c7c5f82482b..370e20ed224c5c76eaca92954be5800d09d81ada 100644 (file)
@@ -310,6 +310,7 @@ struct mvneta_port {
        unsigned int link;
        unsigned int duplex;
        unsigned int speed;
+       unsigned int tx_csum_limit;
        int use_inband_status:1;
 };
 
@@ -2508,8 +2509,10 @@ static int mvneta_change_mtu(struct net_device *dev, int mtu)
 
        dev->mtu = mtu;
 
-       if (!netif_running(dev))
+       if (!netif_running(dev)) {
+               netdev_update_features(dev);
                return 0;
+       }
 
        /* The interface is running, so we have to force a
         * reallocation of the queues
@@ -2538,9 +2541,26 @@ static int mvneta_change_mtu(struct net_device *dev, int mtu)
        mvneta_start_dev(pp);
        mvneta_port_up(pp);
 
+       netdev_update_features(dev);
+
        return 0;
 }
 
+static netdev_features_t mvneta_fix_features(struct net_device *dev,
+                                            netdev_features_t features)
+{
+       struct mvneta_port *pp = netdev_priv(dev);
+
+       if (pp->tx_csum_limit && dev->mtu > pp->tx_csum_limit) {
+               features &= ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
+               netdev_info(dev,
+                           "Disable IP checksum for MTU greater than %dB\n",
+                           pp->tx_csum_limit);
+       }
+
+       return features;
+}
+
 /* Get mac address */
 static void mvneta_get_mac_addr(struct mvneta_port *pp, unsigned char *addr)
 {
@@ -2862,6 +2882,7 @@ static const struct net_device_ops mvneta_netdev_ops = {
        .ndo_set_rx_mode     = mvneta_set_rx_mode,
        .ndo_set_mac_address = mvneta_set_mac_addr,
        .ndo_change_mtu      = mvneta_change_mtu,
+       .ndo_fix_features    = mvneta_fix_features,
        .ndo_get_stats64     = mvneta_get_stats64,
        .ndo_do_ioctl        = mvneta_ioctl,
 };
@@ -3107,6 +3128,9 @@ static int mvneta_probe(struct platform_device *pdev)
                }
        }
 
+       if (of_device_is_compatible(dn, "marvell,armada-370-neta"))
+               pp->tx_csum_limit = 1600;
+
        pp->tx_ring_size = MVNETA_MAX_TXD;
        pp->rx_ring_size = MVNETA_MAX_RXD;
 
@@ -3185,6 +3209,7 @@ static int mvneta_remove(struct platform_device *pdev)
 
 static const struct of_device_id mvneta_match[] = {
        { .compatible = "marvell,armada-370-neta" },
+       { .compatible = "marvell,armada-xp-neta" },
        { }
 };
 MODULE_DEVICE_TABLE(of, mvneta_match);
index 77179d7ae4cc786c9bc4bb4ea39763522da54d64..e0de2fd1ce124d3d668659b89544d172164037f4 100644 (file)
@@ -1977,10 +1977,6 @@ void mlx4_en_free_resources(struct mlx4_en_priv *priv)
                        mlx4_en_destroy_cq(priv, &priv->rx_cq[i]);
        }
 
-       if (priv->base_tx_qpn) {
-               mlx4_qp_release_range(priv->mdev->dev, priv->base_tx_qpn, priv->tx_ring_num);
-               priv->base_tx_qpn = 0;
-       }
 }
 
 int mlx4_en_alloc_resources(struct mlx4_en_priv *priv)
index 35f726c17e48c80bdadfc07ba6a43974619c6938..7a4f20bb7fcb4c2640ad8111f5a98ff95088075c 100644 (file)
@@ -718,7 +718,7 @@ static int get_fixed_ipv6_csum(__wsum hw_checksum, struct sk_buff *skb,
 }
 #endif
 static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va,
-                     int hwtstamp_rx_filter)
+                     netdev_features_t dev_features)
 {
        __wsum hw_checksum = 0;
 
@@ -726,14 +726,8 @@ static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va,
 
        hw_checksum = csum_unfold((__force __sum16)cqe->checksum);
 
-       if (((struct ethhdr *)va)->h_proto == htons(ETH_P_8021Q) &&
-           hwtstamp_rx_filter != HWTSTAMP_FILTER_NONE) {
-               /* next protocol non IPv4 or IPv6 */
-               if (((struct vlan_hdr *)hdr)->h_vlan_encapsulated_proto
-                   != htons(ETH_P_IP) &&
-                   ((struct vlan_hdr *)hdr)->h_vlan_encapsulated_proto
-                   != htons(ETH_P_IPV6))
-                       return -1;
+       if (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_VLAN_PRESENT_MASK) &&
+           !(dev_features & NETIF_F_HW_VLAN_CTAG_RX)) {
                hw_checksum = get_fixed_vlan_csum(hw_checksum, hdr);
                hdr += sizeof(struct vlan_hdr);
        }
@@ -896,7 +890,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
 
                        if (ip_summed == CHECKSUM_COMPLETE) {
                                void *va = skb_frag_address(skb_shinfo(gro_skb)->frags);
-                               if (check_csum(cqe, gro_skb, va, ring->hwtstamp_rx_filter)) {
+                               if (check_csum(cqe, gro_skb, va,
+                                              dev->features)) {
                                        ip_summed = CHECKSUM_NONE;
                                        ring->csum_none++;
                                        ring->csum_complete--;
@@ -951,7 +946,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
                }
 
                if (ip_summed == CHECKSUM_COMPLETE) {
-                       if (check_csum(cqe, skb, skb->data, ring->hwtstamp_rx_filter)) {
+                       if (check_csum(cqe, skb, skb->data, dev->features)) {
                                ip_summed = CHECKSUM_NONE;
                                ring->csum_complete--;
                                ring->csum_none++;
index 7bed3a88579fa9db92d7e42ad7d43265bd8a3d41..c10d98f6ad967b13640b5d9b2fe033f377565ff0 100644 (file)
@@ -66,6 +66,7 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
        ring->size = size;
        ring->size_mask = size - 1;
        ring->stride = stride;
+       ring->full_size = ring->size - HEADROOM - MAX_DESC_TXBBS;
 
        tmp = size * sizeof(struct mlx4_en_tx_info);
        ring->tx_info = kmalloc_node(tmp, GFP_KERNEL | __GFP_NOWARN, node);
@@ -180,6 +181,7 @@ void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv,
                mlx4_bf_free(mdev->dev, &ring->bf);
        mlx4_qp_remove(mdev->dev, &ring->qp);
        mlx4_qp_free(mdev->dev, &ring->qp);
+       mlx4_qp_release_range(priv->mdev->dev, ring->qpn, 1);
        mlx4_en_unmap_buffer(&ring->wqres.buf);
        mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
        kfree(ring->bounce_buf);
@@ -231,6 +233,11 @@ void mlx4_en_deactivate_tx_ring(struct mlx4_en_priv *priv,
                       MLX4_QP_STATE_RST, NULL, 0, 0, &ring->qp);
 }
 
+static inline bool mlx4_en_is_tx_ring_full(struct mlx4_en_tx_ring *ring)
+{
+       return ring->prod - ring->cons > ring->full_size;
+}
+
 static void mlx4_en_stamp_wqe(struct mlx4_en_priv *priv,
                              struct mlx4_en_tx_ring *ring, int index,
                              u8 owner)
@@ -473,11 +480,10 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev,
 
        netdev_tx_completed_queue(ring->tx_queue, packets, bytes);
 
-       /*
-        * Wakeup Tx queue if this stopped, and at least 1 packet
-        * was completed
+       /* Wakeup Tx queue if this stopped, and ring is not full.
         */
-       if (netif_tx_queue_stopped(ring->tx_queue) && txbbs_skipped > 0) {
+       if (netif_tx_queue_stopped(ring->tx_queue) &&
+           !mlx4_en_is_tx_ring_full(ring)) {
                netif_tx_wake_queue(ring->tx_queue);
                ring->wake_queue++;
        }
@@ -921,8 +927,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
        skb_tx_timestamp(skb);
 
        /* Check available TXBBs And 2K spare for prefetch */
-       stop_queue = (int)(ring->prod - ring_cons) >
-                     ring->size - HEADROOM - MAX_DESC_TXBBS;
+       stop_queue = mlx4_en_is_tx_ring_full(ring);
        if (unlikely(stop_queue)) {
                netif_tx_stop_queue(ring->tx_queue);
                ring->queue_stopped++;
@@ -991,8 +996,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
                smp_rmb();
 
                ring_cons = ACCESS_ONCE(ring->cons);
-               if (unlikely(((int)(ring->prod - ring_cons)) <=
-                            ring->size - HEADROOM - MAX_DESC_TXBBS)) {
+               if (unlikely(!mlx4_en_is_tx_ring_full(ring))) {
                        netif_tx_wake_queue(ring->tx_queue);
                        ring->wake_queue++;
                }
index 6fce58718837202bd82739dd8592b753ece7ef42..0d80aed5904371c2a2358a99618e7a2328b50c09 100644 (file)
@@ -93,8 +93,14 @@ int mlx4_register_interface(struct mlx4_interface *intf)
        mutex_lock(&intf_mutex);
 
        list_add_tail(&intf->list, &intf_list);
-       list_for_each_entry(priv, &dev_list, dev_list)
+       list_for_each_entry(priv, &dev_list, dev_list) {
+               if (mlx4_is_mfunc(&priv->dev) && (intf->flags & MLX4_INTFF_BONDING)) {
+                       mlx4_dbg(&priv->dev,
+                                "SRIOV, disabling HA mode for intf proto %d\n", intf->protocol);
+                       intf->flags &= ~MLX4_INTFF_BONDING;
+               }
                mlx4_add_device(intf, priv);
+       }
 
        mutex_unlock(&intf_mutex);
 
index d5f9adb6a78491d37522caa4de869e4695f83f25..666d1669eb5233f9a8e6baf5773621159375af25 100644 (file)
@@ -279,6 +279,7 @@ struct mlx4_en_tx_ring {
        u32                     size; /* number of TXBBs */
        u32                     size_mask;
        u16                     stride;
+       u32                     full_size;
        u16                     cqn;    /* index of port CQ associated with this ring */
        u32                     buf_size;
        __be32                  doorbell_qpn;
@@ -580,7 +581,6 @@ struct mlx4_en_priv {
        int vids[128];
        bool wol;
        struct device *ddev;
-       int base_tx_qpn;
        struct hlist_head mac_hash[MLX4_EN_MAC_HASH_SIZE];
        struct hwtstamp_config hwtstamp_config;
        u32 counter_index;
index 2bae50292dcd814a2b8cb338da1bb0a6beac82f0..83651ac8ddb9d54ca8d7548dc3ca070c29bc5e59 100644 (file)
@@ -279,7 +279,7 @@ MODULE_FIRMWARE("myri10ge_eth_z8e.dat");
 MODULE_FIRMWARE("myri10ge_rss_ethp_z8e.dat");
 MODULE_FIRMWARE("myri10ge_rss_eth_z8e.dat");
 
-/* Careful: must be accessed under kparam_block_sysfs_write */
+/* Careful: must be accessed under kernel_param_lock() */
 static char *myri10ge_fw_name = NULL;
 module_param(myri10ge_fw_name, charp, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(myri10ge_fw_name, "Firmware image name");
@@ -3427,7 +3427,7 @@ static void myri10ge_select_firmware(struct myri10ge_priv *mgp)
                }
        }
 
-       kparam_block_sysfs_write(myri10ge_fw_name);
+       kernel_param_lock(THIS_MODULE);
        if (myri10ge_fw_name != NULL) {
                char *fw_name = kstrdup(myri10ge_fw_name, GFP_KERNEL);
                if (fw_name) {
@@ -3435,7 +3435,7 @@ static void myri10ge_select_firmware(struct myri10ge_priv *mgp)
                        set_fw_name(mgp, fw_name, true);
                }
        }
-       kparam_unblock_sysfs_write(myri10ge_fw_name);
+       kernel_param_unlock(THIS_MODULE);
 
        if (mgp->board_number < MYRI10GE_MAX_BOARDS &&
            myri10ge_fw_names[mgp->board_number] != NULL &&
index 42656da5050063ca382a6bb24375ec08ff672385..7a8ce920c49e709b067321ae91306153f4f24390 100644 (file)
@@ -116,8 +116,10 @@ static int ravb_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
        priv->ptp.current_addend = addend;
 
        gccr = ravb_read(ndev, GCCR);
-       if (gccr & GCCR_LTI)
+       if (gccr & GCCR_LTI) {
+               spin_unlock_irqrestore(&priv->lock, flags);
                return -EBUSY;
+       }
        ravb_write(ndev, addend & GTI_TIV, GTI);
        ravb_write(ndev, gccr | GCCR_LTI, GCCR);
 
index 1341f33e60843029e1e1ecd035fcbebc0fccc1cb..7d430d3229310a45e3fa67df26d5570dc7b8992c 100644 (file)
@@ -56,7 +56,7 @@ enum sis900_configuration_register_bits {
        EDB_MASTER_EN = 0x00002000
 };
 
-enum sis900_eeprom_access_reigster_bits {
+enum sis900_eeprom_access_register_bits {
        MDC  = 0x00000040, MDDIR = 0x00000020, MDIO = 0x00000010, /* 7016 specific */
        EECS = 0x00000008, EECLK = 0x00000004, EEDO = 0x00000002,
        EEDI = 0x00000001
@@ -73,7 +73,7 @@ enum sis900_interrupt_register_bits {
        RxERR  = 0x00000004, RxDESC = 0x00000002, RxOK  = 0x00000001
 };
 
-enum sis900_interrupt_enable_reigster_bits {
+enum sis900_interrupt_enable_register_bits {
        IE = 0x00000001
 };
 
index 08c483bd2ec7bd94d5434f9567c75f609ce27d35..3f20bb1fe570c086e53d0bb5d1ca8124d971fe5d 100644 (file)
@@ -73,7 +73,7 @@
 #define MMC_RX_OCTETCOUNT_G            0x00000188
 #define MMC_RX_BROADCASTFRAME_G                0x0000018c
 #define MMC_RX_MULTICASTFRAME_G                0x00000190
-#define MMC_RX_CRC_ERRROR              0x00000194
+#define MMC_RX_CRC_ERROR               0x00000194
 #define MMC_RX_ALIGN_ERROR             0x00000198
 #define MMC_RX_RUN_ERROR               0x0000019C
 #define MMC_RX_JABBER_ERROR            0x000001A0
@@ -196,7 +196,7 @@ void dwmac_mmc_read(void __iomem *ioaddr, struct stmmac_counters *mmc)
        mmc->mmc_rx_octetcount_g += readl(ioaddr + MMC_RX_OCTETCOUNT_G);
        mmc->mmc_rx_broadcastframe_g += readl(ioaddr + MMC_RX_BROADCASTFRAME_G);
        mmc->mmc_rx_multicastframe_g += readl(ioaddr + MMC_RX_MULTICASTFRAME_G);
-       mmc->mmc_rx_crc_error += readl(ioaddr + MMC_RX_CRC_ERRROR);
+       mmc->mmc_rx_crc_error += readl(ioaddr + MMC_RX_CRC_ERROR);
        mmc->mmc_rx_align_error += readl(ioaddr + MMC_RX_ALIGN_ERROR);
        mmc->mmc_rx_run_error += readl(ioaddr + MMC_RX_RUN_ERROR);
        mmc->mmc_rx_jabber_error += readl(ioaddr + MMC_RX_JABBER_ERROR);
index 8b0b1d6aca72c4a36c718862a064da6418dfc9f7..2f1264b882b9555f02e0b1cb50aa914d13c929fa 100644 (file)
@@ -18,6 +18,7 @@ if NET_VENDOR_VIA
 config VIA_RHINE
        tristate "VIA Rhine support"
        depends on (PCI || OF_IRQ)
+       depends on HAS_DMA
        select CRC32
        select MII
        ---help---
@@ -42,6 +43,7 @@ config VIA_RHINE_MMIO
 config VIA_VELOCITY
        tristate "VIA Velocity support"
        depends on (PCI || (OF_ADDRESS && OF_IRQ))
+       depends on HAS_DMA
        select CRC32
        select CRC_CCITT
        select MII
index 4dea85bfc545b86d5874031531a9ce4963facbe2..6b701b3ded749642b6cdf2309e7b25d2b373ed37 100644 (file)
@@ -246,6 +246,13 @@ static int bcm7xxx_28nm_config_init(struct phy_device *phydev)
        pr_info_once("%s: %s PHY revision: 0x%02x, patch: %d\n",
                     dev_name(&phydev->dev), phydev->drv->name, rev, patch);
 
+       /* Dummy read to a register to workaround an issue upon reset where the
+        * internal inverter may not allow the first MDIO transaction to pass
+        * the MDIO management controller and make us return 0xffff for such
+        * reads.
+        */
+       phy_read(phydev, MII_BMSR);
+
        switch (rev) {
        case 0xb0:
                ret = bcm7xxx_28nm_b0_afe_config_init(phydev);
index fc7abc50b4f17544741d1166960fb549b8554fbd..6a52a7f0fa0dc5cace471b118a9e989d8c2713ea 100644 (file)
@@ -120,6 +120,48 @@ static int unimac_mdio_write(struct mii_bus *bus, int phy_id,
        return 0;
 }
 
+/* Workaround for integrated BCM7xxx Gigabit PHYs which have a problem with
+ * their internal MDIO management controller making them fail to successfully
+ * be read from or written to for the first transaction.  We insert a dummy
+ * BMSR read here to make sure that phy_get_device() and get_phy_id() can
+ * correctly read the PHY MII_PHYSID1/2 registers and successfully register a
+ * PHY device for this peripheral.
+ *
+ * Once the PHY driver is registered, we can workaround subsequent reads from
+ * there (e.g: during system-wide power management).
+ *
+ * bus->reset is invoked before mdiobus_scan during mdiobus_register and is
+ * therefore the right location to stick that workaround. Since we do not want
+ * to read from non-existing PHYs, we either use bus->phy_mask or do a manual
+ * Device Tree scan to limit the search area.
+ */
+static int unimac_mdio_reset(struct mii_bus *bus)
+{
+       struct device_node *np = bus->dev.of_node;
+       struct device_node *child;
+       u32 read_mask = 0;
+       int addr;
+
+       if (!np) {
+               read_mask = ~bus->phy_mask;
+       } else {
+               for_each_available_child_of_node(np, child) {
+                       addr = of_mdio_parse_addr(&bus->dev, child);
+                       if (addr < 0)
+                               continue;
+
+                       read_mask |= 1 << addr;
+               }
+       }
+
+       for (addr = 0; addr < PHY_MAX_ADDR; addr++) {
+               if (read_mask & 1 << addr)
+                       mdiobus_read(bus, addr, MII_BMSR);
+       }
+
+       return 0;
+}
+
 static int unimac_mdio_probe(struct platform_device *pdev)
 {
        struct unimac_mdio_priv *priv;
@@ -155,6 +197,7 @@ static int unimac_mdio_probe(struct platform_device *pdev)
        bus->parent = &pdev->dev;
        bus->read = unimac_mdio_read;
        bus->write = unimac_mdio_write;
+       bus->reset = unimac_mdio_reset;
        snprintf(bus->id, MII_BUS_ID_SIZE, "%s", pdev->name);
 
        bus->irq = kcalloc(PHY_MAX_ADDR, sizeof(int), GFP_KERNEL);
index bdfe51fc3a6507154edfcaf8be3413884bcf702f..0302483de24066a64446699cb360b2fb2ad6a890 100644 (file)
@@ -230,7 +230,7 @@ static int get_phy_c45_ids(struct mii_bus *bus, int addr, u32 *phy_id,
        for (i = 1;
             i < num_ids && c45_ids->devices_in_package == 0;
             i++) {
-               reg_addr = MII_ADDR_C45 | i << 16 | MDIO_DEVS2;
+retry:         reg_addr = MII_ADDR_C45 | i << 16 | MDIO_DEVS2;
                phy_reg = mdiobus_read(bus, addr, reg_addr);
                if (phy_reg < 0)
                        return -EIO;
@@ -242,12 +242,20 @@ static int get_phy_c45_ids(struct mii_bus *bus, int addr, u32 *phy_id,
                        return -EIO;
                c45_ids->devices_in_package |= (phy_reg & 0xffff);
 
-               /* If mostly Fs, there is no device there,
-                * let's get out of here.
-                */
                if ((c45_ids->devices_in_package & 0x1fffffff) == 0x1fffffff) {
-                       *phy_id = 0xffffffff;
-                       return 0;
+                       if (i) {
+                               /*  If mostly Fs, there is no device there,
+                                *  then let's continue to probe more, as some
+                                *  10G PHYs have zero Devices In package,
+                                *  e.g. Cortina CS4315/CS4340 PHY.
+                                */
+                               i = 0;
+                               goto retry;
+                       } else {
+                               /* no device there, let's get out of here */
+                               *phy_id = 0xffffffff;
+                               return 0;
+                       }
                }
        }
 
@@ -796,10 +804,11 @@ static int genphy_config_advert(struct phy_device *phydev)
        if (phydev->supported & (SUPPORTED_1000baseT_Half |
                                 SUPPORTED_1000baseT_Full)) {
                adv |= ethtool_adv_to_mii_ctrl1000_t(advertise);
-               if (adv != oldadv)
-                       changed = 1;
        }
 
+       if (adv != oldadv)
+               changed = 1;
+
        err = phy_write(phydev, MII_CTRL1000, adv);
        if (err < 0)
                return err;
index 76cad712ddb2c7c6bc2794389380e4e0a862d5f5..17cad185169dd28fd3cae12e69d918371fe9d06f 100644 (file)
@@ -66,6 +66,7 @@
 #define PHY_ID_VSC8244                 0x000fc6c0
 #define PHY_ID_VSC8514                 0x00070670
 #define PHY_ID_VSC8574                 0x000704a0
+#define PHY_ID_VSC8641                 0x00070431
 #define PHY_ID_VSC8662                 0x00070660
 #define PHY_ID_VSC8221                 0x000fc550
 #define PHY_ID_VSC8211                 0x000fc4b0
@@ -271,6 +272,18 @@ static struct phy_driver vsc82xx_driver[] = {
        .ack_interrupt  = &vsc824x_ack_interrupt,
        .config_intr    = &vsc82xx_config_intr,
        .driver         = { .owner = THIS_MODULE,},
+}, {
+       .phy_id         = PHY_ID_VSC8641,
+       .name           = "Vitesse VSC8641",
+       .phy_id_mask    = 0x000ffff0,
+       .features       = PHY_GBIT_FEATURES,
+       .flags          = PHY_HAS_INTERRUPT,
+       .config_init    = &vsc824x_config_init,
+       .config_aneg    = &vsc82x4_config_aneg,
+       .read_status    = &genphy_read_status,
+       .ack_interrupt  = &vsc824x_ack_interrupt,
+       .config_intr    = &vsc82xx_config_intr,
+       .driver         = { .owner = THIS_MODULE,},
 }, {
        .phy_id         = PHY_ID_VSC8662,
        .name           = "Vitesse VSC8662",
@@ -318,6 +331,7 @@ static struct mdio_device_id __maybe_unused vitesse_tbl[] = {
        { PHY_ID_VSC8244, 0x000fffc0 },
        { PHY_ID_VSC8514, 0x000ffff0 },
        { PHY_ID_VSC8574, 0x000ffff0 },
+       { PHY_ID_VSC8641, 0x000ffff0 },
        { PHY_ID_VSC8662, 0x000ffff0 },
        { PHY_ID_VSC8221, 0x000ffff0 },
        { PHY_ID_VSC8211, 0x000ffff0 },
index e9f1075f7d4c2055ccbf00c24b41c86a77a87558..2652245631d12f5016915721d5f93268cb4453c1 100644 (file)
 /*
  * Version numbers
  */
-#define VMXNET3_DRIVER_VERSION_STRING   "1.3.5.0-k"
+#define VMXNET3_DRIVER_VERSION_STRING   "1.4.2.0-k"
 
 /* a 32-bit int, each byte encode a verion number in VMXNET3_DRIVER_VERSION */
-#define VMXNET3_DRIVER_VERSION_NUM      0x01030500
+#define VMXNET3_DRIVER_VERSION_NUM      0x01040200
 
 #if defined(CONFIG_PCI_MSI)
        /* RSS only makes sense if MSI-X is supported. */
index b9febab8916735f113b99fa2e08d81662b13390a..6ca6193ab8a6100ac6257e24745d4575e17648a9 100644 (file)
@@ -62,7 +62,7 @@ static int mtu_max_set(const char *val, const struct kernel_param *kp)
        return ret;
 }
 
-static struct kernel_param_ops mtu_max_ops = {
+static const struct kernel_param_ops mtu_max_ops = {
        .set = mtu_max_set,
        .get = param_get_uint,
 };
@@ -91,7 +91,7 @@ static int ring_order_set(const char *val, const struct kernel_param *kp)
        return 0;
 }
 
-static struct kernel_param_ops ring_order_ops = {
+static const struct kernel_param_ops ring_order_ops = {
        .set = ring_order_set,
        .get = param_get_uint,
 };
index 1a20cee5febea93aa350e1ff401e88e04aa2c4b7..799a2efe57937241223aafcf3793586f7304346f 100644 (file)
@@ -821,15 +821,15 @@ static int if_usb_prog_firmware(struct if_usb_card *cardp)
 
        lbtf_deb_enter(LBTF_DEB_USB);
 
-       kparam_block_sysfs_write(fw_name);
+       kernel_param_lock(THIS_MODULE);
        ret = request_firmware(&cardp->fw, lbtf_fw_name, &cardp->udev->dev);
        if (ret < 0) {
                pr_err("request_firmware() failed with %#x\n", ret);
                pr_err("firmware %s not found\n", lbtf_fw_name);
-               kparam_unblock_sysfs_write(fw_name);
+               kernel_param_unlock(THIS_MODULE);
                goto done;
        }
-       kparam_unblock_sysfs_write(fw_name);
+       kernel_param_unlock(THIS_MODULE);
 
        if (check_fwfile_format(cardp->fw->data, cardp->fw->size))
                goto release_fw;
index 5485f91294e7182da2c31f85715570629cac62d9..880d0d63e872e5725d76fe998db0282c749f45d6 100644 (file)
@@ -44,9 +44,9 @@
 #include <xen/xen.h>
 #include <xen/events.h>
 #include <xen/interface/memory.h>
+#include <xen/page.h>
 
 #include <asm/xen/hypercall.h>
-#include <asm/xen/page.h>
 
 /* Provide an option to disable split event channels at load time as
  * event channels are limited resource. Split event channels are
index 56d8afd11077de5d1f7b475bba37d1164c92bf41..f948c46d51329970c186b2886c267ffba2e807db 100644 (file)
@@ -45,7 +45,6 @@
 #include <linux/slab.h>
 #include <net/ip.h>
 
-#include <asm/xen/page.h>
 #include <xen/xen.h>
 #include <xen/xenbus.h>
 #include <xen/events.h>
@@ -1245,10 +1244,6 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev)
        np                   = netdev_priv(netdev);
        np->xbdev            = dev;
 
-       /* No need to use rtnl_lock() before the call below as it
-        * happens before register_netdev().
-        */
-       netif_set_real_num_tx_queues(netdev, 0);
        np->queues = NULL;
 
        err = -ENOMEM;
@@ -1900,9 +1895,6 @@ abort_transaction_no_dev_fatal:
        xennet_disconnect_backend(info);
        kfree(info->queues);
        info->queues = NULL;
-       rtnl_lock();
-       netif_set_real_num_tx_queues(info->netdev, 0);
-       rtnl_unlock();
  out:
        return err;
 }
index b75d684aefcd78a81947c269c81215f8cc5020d4..734da589cdfb94b4312c3a8cd13dbe06c9c5604c 100644 (file)
@@ -221,10 +221,9 @@ static void ks_pcie_setup_interrupts(struct keystone_pcie *ks_pcie)
        /* MSI IRQ */
        if (IS_ENABLED(CONFIG_PCI_MSI)) {
                for (i = 0; i < ks_pcie->num_msi_host_irqs; i++) {
-                       irq_set_chained_handler(ks_pcie->msi_host_irqs[i],
-                                               ks_pcie_msi_irq_handler);
-                       irq_set_handler_data(ks_pcie->msi_host_irqs[i],
-                                            ks_pcie);
+                       irq_set_chained_handler_and_data(ks_pcie->msi_host_irqs[i],
+                                                        ks_pcie_msi_irq_handler,
+                                                        ks_pcie);
                }
        }
 }
index 240f388720857f0c1e3df0635d35fa71e6f05787..8b7a900cd28b25e5a8d52a10357768df1d6513aa 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/workqueue.h>
 #include <linux/bitops.h>
 #include <linux/time.h>
+#include <linux/ktime.h>
 #include <xen/platform_pci.h>
 
 #include <asm/xen/swiotlb-xen.h>
@@ -115,7 +116,6 @@ static int do_pci_op(struct pcifront_device *pdev, struct xen_pci_op *op)
        evtchn_port_t port = pdev->evtchn;
        unsigned irq = pdev->irq;
        s64 ns, ns_timeout;
-       struct timeval tv;
 
        spin_lock_irqsave(&pdev->sh_info_lock, irq_flags);
 
@@ -132,8 +132,7 @@ static int do_pci_op(struct pcifront_device *pdev, struct xen_pci_op *op)
         * (in the latter case we end up continually re-executing poll() with a
         * timeout in the past). 1s difference gives plenty of slack for error.
         */
-       do_gettimeofday(&tv);
-       ns_timeout = timeval_to_ns(&tv) + 2 * (s64)NSEC_PER_SEC;
+       ns_timeout = ktime_get_ns() + 2 * (s64)NSEC_PER_SEC;
 
        xen_clear_irq_pending(irq);
 
@@ -141,8 +140,7 @@ static int do_pci_op(struct pcifront_device *pdev, struct xen_pci_op *op)
                        (unsigned long *)&pdev->sh_info->flags)) {
                xen_poll_irq_timeout(irq, jiffies + 3*HZ);
                xen_clear_irq_pending(irq);
-               do_gettimeofday(&tv);
-               ns = timeval_to_ns(&tv);
+               ns = ktime_get_ns();
                if (ns > ns_timeout) {
                        dev_err(&pdev->xdev->dev,
                                "pciback not responding!!!\n");
index c4fc77aa766eebae13550798dc1ea049b7a64a92..ad1ea1695b4ae46781f7329416f8a94e2791cd4b 100644 (file)
@@ -1351,8 +1351,7 @@ int mtk_pctrl_init(struct platform_device *pdev,
                set_irq_flags(virq, IRQF_VALID);
        };
 
-       irq_set_chained_handler(irq, mtk_eint_irq_handler);
-       irq_set_handler_data(irq, pctl);
+       irq_set_chained_handler_and_data(irq, mtk_eint_irq_handler, pctl);
        set_irq_flags(irq, IRQF_VALID);
        return 0;
 
index 873433da0f2ce192651aa9003946399d16e82a0f..c3c3d2345fc6b23b4364d952e75d8887772f7221 100644 (file)
@@ -865,8 +865,8 @@ static int adi_gpio_pint_probe(struct platform_device *pdev)
        pint->pint_map_port = adi_pint_map_port;
        platform_set_drvdata(pdev, pint);
 
-       irq_set_chained_handler(pint->irq, adi_gpio_handle_pint_irq);
-       irq_set_handler_data(pint->irq, pint);
+       irq_set_chained_handler_and_data(pint->irq, adi_gpio_handle_pint_irq,
+                                        pint);
 
        list_add_tail(&pint->node, &adi_pint_list);
 
index d34ac879af9ef302f7b80906ff75db23d6b21217..c262e5f35c2808d873678560f4abc89dd75a1fae 100644 (file)
@@ -1661,8 +1661,8 @@ static int st_pctl_probe_dt(struct platform_device *pdev,
                if (IS_ERR(info->irqmux_base))
                        return PTR_ERR(info->irqmux_base);
 
-               irq_set_chained_handler(irq, st_gpio_irqmux_handler);
-               irq_set_handler_data(irq, info);
+               irq_set_chained_handler_and_data(irq, st_gpio_irqmux_handler,
+                                                info);
 
        }
 
index 0b7afa50121a249d385fddd5d1ae9a906a0f322b..b18dabba03a480e1cde292bc4ecca6263cce7c55 100644 (file)
@@ -563,8 +563,8 @@ static int exynos_eint_wkup_init(struct samsung_pinctrl_drv_data *d)
                return -ENOMEM;
        }
 
-       irq_set_chained_handler(irq, exynos_irq_demux_eint16_31);
-       irq_set_handler_data(irq, muxed_data);
+       irq_set_chained_handler_and_data(irq, exynos_irq_demux_eint16_31,
+                                        muxed_data);
 
        bank = d->pin_banks;
        idx = 0;
index f1993f42114c40ba565f3ae6a82a6c97f10ef1d7..01b43dbfb795b04eea99111a0d273ff70242e884 100644 (file)
@@ -514,8 +514,7 @@ static int s3c24xx_eint_init(struct samsung_pinctrl_drv_data *d)
                }
 
                eint_data->parents[i] = irq;
-               irq_set_chained_handler(irq, handlers[i]);
-               irq_set_handler_data(irq, eint_data);
+               irq_set_chained_handler_and_data(irq, handlers[i], eint_data);
        }
 
        bank = d->pin_banks;
index 7756c1e9e76313565d23431fd807278fa70b0246..ec8cc3b476213c4161faada14acf20bc3d1a9725 100644 (file)
@@ -506,8 +506,7 @@ static int s3c64xx_eint_gpio_init(struct samsung_pinctrl_drv_data *d)
                data->domains[nr_domains++] = bank->irq_domain;
        }
 
-       irq_set_chained_handler(d->irq, s3c64xx_eint_gpio_irq);
-       irq_set_handler_data(d->irq, data);
+       irq_set_chained_handler_and_data(d->irq, s3c64xx_eint_gpio_irq, data);
 
        return 0;
 }
@@ -731,8 +730,9 @@ static int s3c64xx_eint_eint0_init(struct samsung_pinctrl_drv_data *d)
                        return -ENXIO;
                }
 
-               irq_set_chained_handler(irq, s3c64xx_eint0_handlers[i]);
-               irq_set_handler_data(irq, data);
+               irq_set_chained_handler_and_data(irq,
+                                                s3c64xx_eint0_handlers[i],
+                                                data);
        }
 
        bank = d->pin_banks;
index d7857c72e627d9c65d8c21446d9f4392b1ccbb6c..f09573e132035a700d7b3f782b7c4aa50f70fc6d 100644 (file)
@@ -1005,9 +1005,9 @@ int sunxi_pinctrl_init(struct platform_device *pdev,
                writel(0xffffffff,
                        pctl->membase + sunxi_irq_status_reg_from_bank(i));
 
-               irq_set_chained_handler(pctl->irq[i],
-                                       sunxi_pinctrl_irq_handler);
-               irq_set_handler_data(pctl->irq[i], pctl);
+               irq_set_chained_handler_and_data(pctl->irq[i],
+                                                sunxi_pinctrl_irq_handler,
+                                                pctl);
        }
 
        dev_info(&pdev->dev, "initialized sunXi PIO driver\n");
index f986e0cca7acf68300796563b6a80ceebd3881e4..83c42ea88f2b252b0c77a65cfe1bab12135e9dca 100644 (file)
@@ -448,42 +448,42 @@ static int param_set_battery_voltage(const char *key,
 
 #define param_get_battery_voltage param_get_int
 
-static struct kernel_param_ops param_ops_ac_online = {
+static const struct kernel_param_ops param_ops_ac_online = {
        .set = param_set_ac_online,
        .get = param_get_ac_online,
 };
 
-static struct kernel_param_ops param_ops_usb_online = {
+static const struct kernel_param_ops param_ops_usb_online = {
        .set = param_set_usb_online,
        .get = param_get_usb_online,
 };
 
-static struct kernel_param_ops param_ops_battery_status = {
+static const struct kernel_param_ops param_ops_battery_status = {
        .set = param_set_battery_status,
        .get = param_get_battery_status,
 };
 
-static struct kernel_param_ops param_ops_battery_present = {
+static const struct kernel_param_ops param_ops_battery_present = {
        .set = param_set_battery_present,
        .get = param_get_battery_present,
 };
 
-static struct kernel_param_ops param_ops_battery_technology = {
+static const struct kernel_param_ops param_ops_battery_technology = {
        .set = param_set_battery_technology,
        .get = param_get_battery_technology,
 };
 
-static struct kernel_param_ops param_ops_battery_health = {
+static const struct kernel_param_ops param_ops_battery_health = {
        .set = param_set_battery_health,
        .get = param_get_battery_health,
 };
 
-static struct kernel_param_ops param_ops_battery_capacity = {
+static const struct kernel_param_ops param_ops_battery_capacity = {
        .set = param_set_battery_capacity,
        .get = param_get_battery_capacity,
 };
 
-static struct kernel_param_ops param_ops_battery_voltage = {
+static const struct kernel_param_ops param_ops_battery_voltage = {
        .set = param_set_battery_voltage,
        .get = param_get_battery_voltage,
 };
index f74c040d5c10f5b21f505dfa7b0131c82ab02c83..e9485fbbb373e2191601e7ab862dfe8e039afdd9 100644 (file)
@@ -92,8 +92,8 @@ struct read_cpu_info_sccb {
        u8      reserved[4096 - 16];
 } __attribute__((packed, aligned(PAGE_SIZE)));
 
-static void sclp_fill_cpu_info(struct sclp_cpu_info *info,
-                              struct read_cpu_info_sccb *sccb)
+static void sclp_fill_core_info(struct sclp_core_info *info,
+                               struct read_cpu_info_sccb *sccb)
 {
        char *page = (char *) sccb;
 
@@ -101,12 +101,11 @@ static void sclp_fill_cpu_info(struct sclp_cpu_info *info,
        info->configured = sccb->nr_configured;
        info->standby = sccb->nr_standby;
        info->combined = sccb->nr_configured + sccb->nr_standby;
-       info->has_cpu_type = sclp.has_cpu_type;
-       memcpy(&info->cpu, page + sccb->offset_configured,
-              info->combined * sizeof(struct sclp_cpu_entry));
+       memcpy(&info->core, page + sccb->offset_configured,
+              info->combined * sizeof(struct sclp_core_entry));
 }
 
-int sclp_get_cpu_info(struct sclp_cpu_info *info)
+int sclp_get_core_info(struct sclp_core_info *info)
 {
        int rc;
        struct read_cpu_info_sccb *sccb;
@@ -127,7 +126,7 @@ int sclp_get_cpu_info(struct sclp_cpu_info *info)
                rc = -EIO;
                goto out;
        }
-       sclp_fill_cpu_info(info, sccb);
+       sclp_fill_core_info(info, sccb);
 out:
        free_page((unsigned long) sccb);
        return rc;
@@ -137,7 +136,7 @@ struct cpu_configure_sccb {
        struct sccb_header header;
 } __attribute__((packed, aligned(8)));
 
-static int do_cpu_configure(sclp_cmdw_t cmd)
+static int do_core_configure(sclp_cmdw_t cmd)
 {
        struct cpu_configure_sccb *sccb;
        int rc;
@@ -171,14 +170,14 @@ out:
        return rc;
 }
 
-int sclp_cpu_configure(u8 cpu)
+int sclp_core_configure(u8 core)
 {
-       return do_cpu_configure(SCLP_CMDW_CONFIGURE_CPU | cpu << 8);
+       return do_core_configure(SCLP_CMDW_CONFIGURE_CPU | core << 8);
 }
 
-int sclp_cpu_deconfigure(u8 cpu)
+int sclp_core_deconfigure(u8 core)
 {
-       return do_cpu_configure(SCLP_CMDW_DECONFIGURE_CPU | cpu << 8);
+       return do_core_configure(SCLP_CMDW_DECONFIGURE_CPU | core << 8);
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG
index d7f696d95597734568bb4cfb6f35f577a7bc6b30..aeed7969fd792ba35f89cbf3b2f26e357b20de16 100644 (file)
@@ -98,7 +98,7 @@ static int __init sclp_read_info_early(struct read_info_sccb *sccb)
 
 static void __init sclp_facilities_detect(struct read_info_sccb *sccb)
 {
-       struct sclp_cpu_entry *cpue;
+       struct sclp_core_entry *cpue;
        u16 boot_cpu_address, cpu;
 
        if (sclp_read_info_early(sccb))
@@ -106,7 +106,7 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb)
 
        sclp.facilities = sccb->facilities;
        sclp.has_sprp = !!(sccb->fac84 & 0x02);
-       sclp.has_cpu_type = !!(sccb->fac84 & 0x01);
+       sclp.has_core_type = !!(sccb->fac84 & 0x01);
        if (sccb->fac85 & 0x02)
                S390_lowcore.machine_flags |= MACHINE_FLAG_ESOP;
        sclp.rnmax = sccb->rnmax ? sccb->rnmax : sccb->rnmax2;
@@ -116,11 +116,11 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb)
 
        if (!sccb->hcpua) {
                if (MACHINE_IS_VM)
-                       sclp.max_cpu = 64;
+                       sclp.max_cores = 64;
                else
-                       sclp.max_cpu = sccb->ncpurl;
+                       sclp.max_cores = sccb->ncpurl;
        } else {
-               sclp.max_cpu = sccb->hcpua + 1;
+               sclp.max_cores = sccb->hcpua + 1;
        }
 
        boot_cpu_address = stap();
index 9a3dd95029cc8a7ce8d5a43e8b5cd659ca035d95..823f41fc4bbd6762184737b73d55452967ca3e44 100644 (file)
@@ -154,7 +154,7 @@ static int __init init_cpu_info(enum arch_id arch)
 
        /* get info for boot cpu from lowcore, stored in the HSA */
 
-       sa_ext = dump_save_area_create(0);
+       sa_ext = dump_save_areas.areas[0];
        if (!sa_ext)
                return -ENOMEM;
        if (memcpy_hsa_kernel(&sa_ext->sa, sys_info.sa_base,
index 3ba61141975914aa25ef42658471686756fdefb5..559a9dcdb15d274d4bad423bdbef6f01da769639 100644 (file)
@@ -60,7 +60,7 @@ static int __ap_poll_device(struct ap_device *ap_dev, unsigned long *flags);
 static int ap_device_remove(struct device *dev);
 static int ap_device_probe(struct device *dev);
 static void ap_interrupt_handler(struct airq_struct *airq);
-static void ap_reset(struct ap_device *ap_dev);
+static void ap_reset(struct ap_device *ap_dev, unsigned long *flags);
 static void ap_config_timeout(unsigned long ptr);
 static int ap_select_domain(void);
 static void ap_query_configuration(void);
@@ -310,35 +310,26 @@ static inline int __ap_query_configuration(struct ap_config_info *config)
 static int ap_query_functions(ap_qid_t qid, unsigned int *functions)
 {
        struct ap_queue_status status;
-       int i;
+
        status = __ap_query_functions(qid, functions);
 
-       for (i = 0; i < AP_MAX_RESET; i++) {
-               if (ap_queue_status_invalid_test(&status))
-                       return -ENODEV;
+       if (ap_queue_status_invalid_test(&status))
+               return -ENODEV;
 
-               switch (status.response_code) {
-               case AP_RESPONSE_NORMAL:
-                       return 0;
-               case AP_RESPONSE_RESET_IN_PROGRESS:
-               case AP_RESPONSE_BUSY:
-                       break;
-               case AP_RESPONSE_Q_NOT_AVAIL:
-               case AP_RESPONSE_DECONFIGURED:
-               case AP_RESPONSE_CHECKSTOPPED:
-               case AP_RESPONSE_INVALID_ADDRESS:
-                       return -ENODEV;
-               case AP_RESPONSE_OTHERWISE_CHANGED:
-                       break;
-               default:
-                       break;
-               }
-               if (i < AP_MAX_RESET - 1) {
-                       udelay(5);
-                       status = __ap_query_functions(qid, functions);
-               }
+       switch (status.response_code) {
+       case AP_RESPONSE_NORMAL:
+               return 0;
+       case AP_RESPONSE_Q_NOT_AVAIL:
+       case AP_RESPONSE_DECONFIGURED:
+       case AP_RESPONSE_CHECKSTOPPED:
+       case AP_RESPONSE_INVALID_ADDRESS:
+               return -ENODEV;
+       case AP_RESPONSE_RESET_IN_PROGRESS:
+       case AP_RESPONSE_BUSY:
+       case AP_RESPONSE_OTHERWISE_CHANGED:
+       default:
+               return -EBUSY;
        }
-       return -EBUSY;
 }
 
 /**
@@ -350,47 +341,25 @@ static int ap_query_functions(ap_qid_t qid, unsigned int *functions)
  * on the return value it waits a while and tests the AP queue if interrupts
  * have been switched on using ap_test_queue().
  */
-static int ap_queue_enable_interruption(ap_qid_t qid, void *ind)
+static int ap_queue_enable_interruption(struct ap_device *ap_dev, void *ind)
 {
        struct ap_queue_status status;
-       int t_depth, t_device_type, rc, i;
 
-       rc = -EBUSY;
-       status = ap_queue_interruption_control(qid, ind);
-
-       for (i = 0; i < AP_MAX_RESET; i++) {
-               switch (status.response_code) {
-               case AP_RESPONSE_NORMAL:
-                       if (status.int_enabled)
-                               return 0;
-                       break;
-               case AP_RESPONSE_RESET_IN_PROGRESS:
-               case AP_RESPONSE_BUSY:
-                       if (i < AP_MAX_RESET - 1) {
-                               udelay(5);
-                               status = ap_queue_interruption_control(qid,
-                                                                      ind);
-                               continue;
-                       }
-                       break;
-               case AP_RESPONSE_Q_NOT_AVAIL:
-               case AP_RESPONSE_DECONFIGURED:
-               case AP_RESPONSE_CHECKSTOPPED:
-               case AP_RESPONSE_INVALID_ADDRESS:
-                       return -ENODEV;
-               case AP_RESPONSE_OTHERWISE_CHANGED:
-                       if (status.int_enabled)
-                               return 0;
-                       break;
-               default:
-                       break;
-               }
-               if (i < AP_MAX_RESET - 1) {
-                       udelay(5);
-                       status = ap_test_queue(qid, &t_depth, &t_device_type);
-               }
+       status = ap_queue_interruption_control(ap_dev->qid, ind);
+       switch (status.response_code) {
+       case AP_RESPONSE_NORMAL:
+       case AP_RESPONSE_OTHERWISE_CHANGED:
+               return 0;
+       case AP_RESPONSE_Q_NOT_AVAIL:
+       case AP_RESPONSE_DECONFIGURED:
+       case AP_RESPONSE_CHECKSTOPPED:
+       case AP_RESPONSE_INVALID_ADDRESS:
+               return -ENODEV;
+       case AP_RESPONSE_RESET_IN_PROGRESS:
+       case AP_RESPONSE_BUSY:
+       default:
+               return -EBUSY;
        }
-       return rc;
 }
 
 /**
@@ -510,110 +479,95 @@ int ap_recv(ap_qid_t qid, unsigned long long *psmid, void *msg, size_t length)
 }
 EXPORT_SYMBOL(ap_recv);
 
+/**
+ * __ap_schedule_poll_timer(): Schedule poll timer.
+ *
+ * Set up the timer to run the poll tasklet
+ */
+static inline void __ap_schedule_poll_timer(void)
+{
+       ktime_t hr_time;
+
+       spin_lock_bh(&ap_poll_timer_lock);
+       if (!hrtimer_is_queued(&ap_poll_timer) && !ap_suspend_flag) {
+               hr_time = ktime_set(0, poll_timeout);
+               hrtimer_forward_now(&ap_poll_timer, hr_time);
+               hrtimer_restart(&ap_poll_timer);
+       }
+       spin_unlock_bh(&ap_poll_timer_lock);
+}
+
+/**
+ * ap_schedule_poll_timer(): Schedule poll timer.
+ *
+ * Set up the timer to run the poll tasklet
+ */
+static inline void ap_schedule_poll_timer(void)
+{
+       if (ap_using_interrupts())
+               return;
+       __ap_schedule_poll_timer();
+}
+
+
 /**
  * ap_query_queue(): Check if an AP queue is available.
  * @qid: The AP queue number
  * @queue_depth: Pointer to queue depth value
  * @device_type: Pointer to device type value
- *
- * The test is repeated for AP_MAX_RESET times.
  */
 static int ap_query_queue(ap_qid_t qid, int *queue_depth, int *device_type)
 {
        struct ap_queue_status status;
-       int t_depth, t_device_type, rc, i;
+       int t_depth, t_device_type;
 
-       rc = -EBUSY;
-       for (i = 0; i < AP_MAX_RESET; i++) {
-               status = ap_test_queue(qid, &t_depth, &t_device_type);
-               switch (status.response_code) {
-               case AP_RESPONSE_NORMAL:
-                       *queue_depth = t_depth + 1;
-                       *device_type = t_device_type;
-                       rc = 0;
-                       break;
-               case AP_RESPONSE_Q_NOT_AVAIL:
-                       rc = -ENODEV;
-                       break;
-               case AP_RESPONSE_RESET_IN_PROGRESS:
-                       break;
-               case AP_RESPONSE_DECONFIGURED:
-                       rc = -ENODEV;
-                       break;
-               case AP_RESPONSE_CHECKSTOPPED:
-                       rc = -ENODEV;
-                       break;
-               case AP_RESPONSE_INVALID_ADDRESS:
-                       rc = -ENODEV;
-                       break;
-               case AP_RESPONSE_OTHERWISE_CHANGED:
-                       break;
-               case AP_RESPONSE_BUSY:
-                       break;
-               default:
-                       BUG();
-               }
-               if (rc != -EBUSY)
-                       break;
-               if (i < AP_MAX_RESET - 1)
-                       udelay(5);
+       status = ap_test_queue(qid, &t_depth, &t_device_type);
+       switch (status.response_code) {
+       case AP_RESPONSE_NORMAL:
+               *queue_depth = t_depth + 1;
+               *device_type = t_device_type;
+               return 0;
+       case AP_RESPONSE_Q_NOT_AVAIL:
+       case AP_RESPONSE_DECONFIGURED:
+       case AP_RESPONSE_CHECKSTOPPED:
+       case AP_RESPONSE_INVALID_ADDRESS:
+               return -ENODEV;
+       case AP_RESPONSE_RESET_IN_PROGRESS:
+       case AP_RESPONSE_OTHERWISE_CHANGED:
+       case AP_RESPONSE_BUSY:
+               return -EBUSY;
+       default:
+               BUG();
        }
-       return rc;
 }
 
 /**
  * ap_init_queue(): Reset an AP queue.
  * @qid: The AP queue number
  *
- * Reset an AP queue and wait for it to become available again.
+ * Submit the Reset command to an AP queue.
+ * Since the reset is asynchron set the state to 'RESET_IN_PROGRESS'
+ * and check later via ap_poll_queue() if the reset is done.
  */
-static int ap_init_queue(ap_qid_t qid)
+static int ap_init_queue(struct ap_device *ap_dev)
 {
        struct ap_queue_status status;
-       int rc, dummy, i;
 
-       rc = -ENODEV;
-       status = ap_reset_queue(qid);
-       for (i = 0; i < AP_MAX_RESET; i++) {
-               switch (status.response_code) {
-               case AP_RESPONSE_NORMAL:
-                       if (status.queue_empty)
-                               rc = 0;
-                       break;
-               case AP_RESPONSE_Q_NOT_AVAIL:
-               case AP_RESPONSE_DECONFIGURED:
-               case AP_RESPONSE_CHECKSTOPPED:
-                       i = AP_MAX_RESET;       /* return with -ENODEV */
-                       break;
-               case AP_RESPONSE_RESET_IN_PROGRESS:
-                       rc = -EBUSY;
-               case AP_RESPONSE_BUSY:
-               default:
-                       break;
-               }
-               if (rc != -ENODEV && rc != -EBUSY)
-                       break;
-               if (i < AP_MAX_RESET - 1) {
-                       /* Time we are waiting until we give up (0.7sec * 90).
-                        * Since the actual request (in progress) will not
-                        * interrupted immediately for the reset command,
-                        * we have to be patient. In worst case we have to
-                        * wait 60sec + reset time (some msec).
-                        */
-                       schedule_timeout(AP_RESET_TIMEOUT);
-                       status = ap_test_queue(qid, &dummy, &dummy);
-               }
-       }
-       if (rc == 0 && ap_using_interrupts()) {
-               rc = ap_queue_enable_interruption(qid, ap_airq.lsi_ptr);
-               /* If interruption mode is supported by the machine,
-               * but an AP can not be enabled for interruption then
-               * the AP will be discarded.    */
-               if (rc)
-                       pr_err("Registering adapter interrupts for "
-                              "AP %d failed\n", AP_QID_DEVICE(qid));
+       status = ap_reset_queue(ap_dev->qid);
+       switch (status.response_code) {
+       case AP_RESPONSE_NORMAL:
+               ap_dev->interrupt = AP_INTR_DISABLED;
+               ap_dev->reset = AP_RESET_IN_PROGRESS;
+               return 0;
+       case AP_RESPONSE_RESET_IN_PROGRESS:
+       case AP_RESPONSE_BUSY:
+               return -EBUSY;
+       case AP_RESPONSE_Q_NOT_AVAIL:
+       case AP_RESPONSE_DECONFIGURED:
+       case AP_RESPONSE_CHECKSTOPPED:
+       default:
+               return -ENODEV;
        }
-       return rc;
 }
 
 /**
@@ -729,10 +683,63 @@ static ssize_t ap_pendingq_count_show(struct device *dev,
 
 static DEVICE_ATTR(pendingq_count, 0444, ap_pendingq_count_show, NULL);
 
+static ssize_t ap_reset_show(struct device *dev,
+                                     struct device_attribute *attr, char *buf)
+{
+       struct ap_device *ap_dev = to_ap_dev(dev);
+       int rc = 0;
+
+       spin_lock_bh(&ap_dev->lock);
+       switch (ap_dev->reset) {
+       case AP_RESET_IGNORE:
+               rc = snprintf(buf, PAGE_SIZE, "No Reset Timer set.\n");
+               break;
+       case AP_RESET_ARMED:
+               rc = snprintf(buf, PAGE_SIZE, "Reset Timer armed.\n");
+               break;
+       case AP_RESET_DO:
+               rc = snprintf(buf, PAGE_SIZE, "Reset Timer expired.\n");
+               break;
+       case AP_RESET_IN_PROGRESS:
+               rc = snprintf(buf, PAGE_SIZE, "Reset in progress.\n");
+               break;
+       default:
+               break;
+       }
+       spin_unlock_bh(&ap_dev->lock);
+       return rc;
+}
+
+static DEVICE_ATTR(reset, 0444, ap_reset_show, NULL);
+
+static ssize_t ap_interrupt_show(struct device *dev,
+                                     struct device_attribute *attr, char *buf)
+{
+       struct ap_device *ap_dev = to_ap_dev(dev);
+       int rc = 0;
+
+       spin_lock_bh(&ap_dev->lock);
+       switch (ap_dev->interrupt) {
+       case AP_INTR_DISABLED:
+               rc = snprintf(buf, PAGE_SIZE, "Interrupts disabled.\n");
+               break;
+       case AP_INTR_ENABLED:
+               rc = snprintf(buf, PAGE_SIZE, "Interrupts enabled.\n");
+               break;
+       case AP_INTR_IN_PROGRESS:
+               rc = snprintf(buf, PAGE_SIZE, "Enable Interrupt pending.\n");
+               break;
+       }
+       spin_unlock_bh(&ap_dev->lock);
+       return rc;
+}
+
+static DEVICE_ATTR(interrupt, 0444, ap_interrupt_show, NULL);
+
 static ssize_t ap_modalias_show(struct device *dev,
                                struct device_attribute *attr, char *buf)
 {
-       return sprintf(buf, "ap:t%02X", to_ap_dev(dev)->device_type);
+       return sprintf(buf, "ap:t%02X\n", to_ap_dev(dev)->device_type);
 }
 
 static DEVICE_ATTR(modalias, 0444, ap_modalias_show, NULL);
@@ -753,6 +760,8 @@ static struct attribute *ap_dev_attrs[] = {
        &dev_attr_request_count.attr,
        &dev_attr_requestq_count.attr,
        &dev_attr_pendingq_count.attr,
+       &dev_attr_reset.attr,
+       &dev_attr_interrupt.attr,
        &dev_attr_modalias.attr,
        &dev_attr_ap_functions.attr,
        NULL
@@ -926,6 +935,10 @@ static int ap_device_probe(struct device *dev)
                spin_lock_bh(&ap_device_list_lock);
                list_del_init(&ap_dev->list);
                spin_unlock_bh(&ap_device_list_lock);
+       } else {
+               if (ap_dev->reset == AP_RESET_IN_PROGRESS ||
+                       ap_dev->interrupt == AP_INTR_IN_PROGRESS)
+                       __ap_schedule_poll_timer();
        }
        return rc;
 }
@@ -1411,7 +1424,7 @@ static void ap_scan_bus(struct work_struct *unused)
        struct ap_device *ap_dev;
        struct device *dev;
        ap_qid_t qid;
-       int queue_depth, device_type;
+       int queue_depth = 0, device_type = 0;
        unsigned int device_functions;
        int rc, i;
 
@@ -1429,15 +1442,9 @@ static void ap_scan_bus(struct work_struct *unused)
                else
                        rc = -ENODEV;
                if (dev) {
-                       if (rc == -EBUSY) {
-                               set_current_state(TASK_UNINTERRUPTIBLE);
-                               schedule_timeout(AP_RESET_TIMEOUT);
-                               rc = ap_query_queue(qid, &queue_depth,
-                                                   &device_type);
-                       }
                        ap_dev = to_ap_dev(dev);
                        spin_lock_bh(&ap_dev->lock);
-                       if (rc || ap_dev->unregistered) {
+                       if (rc == -ENODEV || ap_dev->unregistered) {
                                spin_unlock_bh(&ap_dev->lock);
                                if (ap_dev->unregistered)
                                        i--;
@@ -1449,15 +1456,17 @@ static void ap_scan_bus(struct work_struct *unused)
                        put_device(dev);
                        continue;
                }
-               if (rc)
-                       continue;
-               rc = ap_init_queue(qid);
                if (rc)
                        continue;
                ap_dev = kzalloc(sizeof(*ap_dev), GFP_KERNEL);
                if (!ap_dev)
                        break;
                ap_dev->qid = qid;
+               rc = ap_init_queue(ap_dev);
+               if ((rc != 0) && (rc != -EBUSY)) {
+                       kfree(ap_dev);
+                       continue;
+               }
                ap_dev->queue_depth = queue_depth;
                ap_dev->unregistered = 1;
                spin_lock_init(&ap_dev->lock);
@@ -1519,36 +1528,6 @@ ap_config_timeout(unsigned long ptr)
        add_timer(&ap_config_timer);
 }
 
-/**
- * __ap_schedule_poll_timer(): Schedule poll timer.
- *
- * Set up the timer to run the poll tasklet
- */
-static inline void __ap_schedule_poll_timer(void)
-{
-       ktime_t hr_time;
-
-       spin_lock_bh(&ap_poll_timer_lock);
-       if (!hrtimer_is_queued(&ap_poll_timer) && !ap_suspend_flag) {
-               hr_time = ktime_set(0, poll_timeout);
-               hrtimer_forward_now(&ap_poll_timer, hr_time);
-               hrtimer_restart(&ap_poll_timer);
-       }
-       spin_unlock_bh(&ap_poll_timer_lock);
-}
-
-/**
- * ap_schedule_poll_timer(): Schedule poll timer.
- *
- * Set up the timer to run the poll tasklet
- */
-static inline void ap_schedule_poll_timer(void)
-{
-       if (ap_using_interrupts())
-               return;
-       __ap_schedule_poll_timer();
-}
-
 /**
  * ap_poll_read(): Receive pending reply messages from an AP device.
  * @ap_dev: pointer to the AP device
@@ -1568,6 +1547,7 @@ static int ap_poll_read(struct ap_device *ap_dev, unsigned long *flags)
                           ap_dev->reply->message, ap_dev->reply->length);
        switch (status.response_code) {
        case AP_RESPONSE_NORMAL:
+               ap_dev->interrupt = status.int_enabled;
                atomic_dec(&ap_poll_requests);
                ap_decrease_queue_count(ap_dev);
                list_for_each_entry(ap_msg, &ap_dev->pendingq, list) {
@@ -1582,6 +1562,7 @@ static int ap_poll_read(struct ap_device *ap_dev, unsigned long *flags)
                        *flags |= 1;
                break;
        case AP_RESPONSE_NO_PENDING_REPLY:
+               ap_dev->interrupt = status.int_enabled;
                if (status.queue_empty) {
                        /* The card shouldn't forget requests but who knows. */
                        atomic_sub(ap_dev->queue_count, &ap_poll_requests);
@@ -1612,7 +1593,8 @@ static int ap_poll_write(struct ap_device *ap_dev, unsigned long *flags)
        struct ap_message *ap_msg;
 
        if (ap_dev->requestq_count <= 0 ||
-           ap_dev->queue_count >= ap_dev->queue_depth)
+           (ap_dev->queue_count >= ap_dev->queue_depth) ||
+           (ap_dev->reset == AP_RESET_IN_PROGRESS))
                return 0;
        /* Start the next request on the queue. */
        ap_msg = list_entry(ap_dev->requestq.next, struct ap_message, list);
@@ -1646,6 +1628,8 @@ static int ap_poll_write(struct ap_device *ap_dev, unsigned long *flags)
 
 /**
  * ap_poll_queue(): Poll AP device for pending replies and send new messages.
+ * Check if the queue has a pending reset. In case it's done re-enable
+ * interrupts, otherwise reschedule the poll_timer for another attempt.
  * @ap_dev: pointer to the bus device
  * @flags: pointer to control flags, bit 2^0 is set if another poll is
  *        required, bit 2^1 is set if the poll timer needs to get armed
@@ -1656,7 +1640,51 @@ static int ap_poll_write(struct ap_device *ap_dev, unsigned long *flags)
  */
 static inline int ap_poll_queue(struct ap_device *ap_dev, unsigned long *flags)
 {
-       int rc;
+       int rc, depth, type;
+       struct ap_queue_status status;
+
+
+       if (ap_dev->reset == AP_RESET_IN_PROGRESS) {
+               status = ap_test_queue(ap_dev->qid, &depth, &type);
+               switch (status.response_code) {
+               case AP_RESPONSE_NORMAL:
+                       ap_dev->reset = AP_RESET_IGNORE;
+                       if (ap_using_interrupts()) {
+                               rc = ap_queue_enable_interruption(
+                                       ap_dev, ap_airq.lsi_ptr);
+                               if (!rc)
+                                       ap_dev->interrupt = AP_INTR_IN_PROGRESS;
+                               else if (rc == -ENODEV) {
+                                       pr_err("Registering adapter interrupts for "
+                                       "AP %d failed\n", AP_QID_DEVICE(ap_dev->qid));
+                                       return rc;
+                               }
+                       }
+                       /* fall through */
+               case AP_RESPONSE_BUSY:
+               case AP_RESPONSE_RESET_IN_PROGRESS:
+                       *flags |= AP_POLL_AFTER_TIMEOUT;
+                       break;
+               case AP_RESPONSE_Q_NOT_AVAIL:
+               case AP_RESPONSE_DECONFIGURED:
+               case AP_RESPONSE_CHECKSTOPPED:
+                       return -ENODEV;
+               default:
+                       break;
+               }
+       }
+
+       if ((ap_dev->reset != AP_RESET_IN_PROGRESS) &&
+               (ap_dev->interrupt == AP_INTR_IN_PROGRESS)) {
+               status = ap_test_queue(ap_dev->qid, &depth, &type);
+               if (ap_using_interrupts()) {
+                       if (status.int_enabled == 1)
+                               ap_dev->interrupt = AP_INTR_ENABLED;
+                       else
+                               *flags |= AP_POLL_AFTER_TIMEOUT;
+               } else
+                       ap_dev->interrupt = AP_INTR_DISABLED;
+       }
 
        rc = ap_poll_read(ap_dev, flags);
        if (rc)
@@ -1676,7 +1704,8 @@ static int __ap_queue_message(struct ap_device *ap_dev, struct ap_message *ap_ms
        struct ap_queue_status status;
 
        if (list_empty(&ap_dev->requestq) &&
-           ap_dev->queue_count < ap_dev->queue_depth) {
+           (ap_dev->queue_count < ap_dev->queue_depth) &&
+           (ap_dev->reset != AP_RESET_IN_PROGRESS)) {
                status = __ap_send(ap_dev->qid, ap_msg->psmid,
                                   ap_msg->message, ap_msg->length,
                                   ap_msg->special);
@@ -1789,21 +1818,20 @@ static enum hrtimer_restart ap_poll_timeout(struct hrtimer *unused)
  * Reset a not responding AP device and move all requests from the
  * pending queue to the request queue.
  */
-static void ap_reset(struct ap_device *ap_dev)
+static void ap_reset(struct ap_device *ap_dev, unsigned long *flags)
 {
        int rc;
 
-       ap_dev->reset = AP_RESET_IGNORE;
        atomic_sub(ap_dev->queue_count, &ap_poll_requests);
        ap_dev->queue_count = 0;
        list_splice_init(&ap_dev->pendingq, &ap_dev->requestq);
        ap_dev->requestq_count += ap_dev->pendingq_count;
        ap_dev->pendingq_count = 0;
-       rc = ap_init_queue(ap_dev->qid);
+       rc = ap_init_queue(ap_dev);
        if (rc == -ENODEV)
                ap_dev->unregistered = 1;
        else
-               __ap_schedule_poll_timer();
+               *flags |= AP_POLL_AFTER_TIMEOUT;
 }
 
 static int __ap_poll_device(struct ap_device *ap_dev, unsigned long *flags)
@@ -1812,7 +1840,7 @@ static int __ap_poll_device(struct ap_device *ap_dev, unsigned long *flags)
                if (ap_poll_queue(ap_dev, flags))
                        ap_dev->unregistered = 1;
                if (ap_dev->reset == AP_RESET_DO)
-                       ap_reset(ap_dev);
+                       ap_reset(ap_dev, flags);
        }
        return 0;
 }
@@ -1845,9 +1873,9 @@ static void ap_poll_all(unsigned long dummy)
                        spin_unlock(&ap_dev->lock);
                }
                spin_unlock(&ap_device_list_lock);
-       } while (flags & 1);
-       if (flags & 2)
-               ap_schedule_poll_timer();
+       } while (flags & AP_POLL_IMMEDIATELY);
+       if (flags & AP_POLL_AFTER_TIMEOUT)
+               __ap_schedule_poll_timer();
 }
 
 /**
index 2737d261a32400797e2f58fb8d3a7161cf9a01bc..00468c8d0781c59aa817e4d9bb7b6f4d6f15a2e6 100644 (file)
 
 #define AP_DEVICES 64          /* Number of AP devices. */
 #define AP_DOMAINS 256         /* Number of AP domains. */
-#define AP_MAX_RESET 90                /* Maximum number of resets. */
 #define AP_RESET_TIMEOUT (HZ*0.7)      /* Time in ticks for reset timeouts. */
 #define AP_CONFIG_TIME 30      /* Time in seconds between AP bus rescans. */
 #define AP_POLL_TIME 1         /* Time in ticks between receive polls. */
 
+#define AP_POLL_IMMEDIATELY    1 /* continue running poll tasklet */
+#define AP_POLL_AFTER_TIMEOUT  2 /* run poll tasklet again after timout */
+
 extern int ap_domain_index;
 
 /**
@@ -135,6 +137,14 @@ static inline int ap_test_bit(unsigned int *ptr, unsigned int nr)
 #define AP_RESET_IGNORE        0       /* request timeout will be ignored */
 #define AP_RESET_ARMED 1       /* request timeout timer is active */
 #define AP_RESET_DO    2       /* AP reset required */
+#define AP_RESET_IN_PROGRESS   3       /* AP reset in progress */
+
+/*
+ * AP interrupt states
+ */
+#define AP_INTR_DISABLED       0       /* AP interrupt disabled */
+#define AP_INTR_ENABLED                1       /* AP interrupt enabled */
+#define AP_INTR_IN_PROGRESS    3       /* AP interrupt in progress */
 
 struct ap_device;
 struct ap_message;
@@ -168,6 +178,7 @@ struct ap_device {
        struct timer_list timeout;      /* Timer for request timeouts. */
        int reset;                      /* Reset required after req. timeout. */
 
+       int interrupt;                  /* indicate if interrupts are enabled */
        int queue_count;                /* # messages currently on AP queue. */
 
        struct list_head pendingq;      /* List of message sent to AP queue. */
index 71e698b8577286f74fce26c1e069f7cb7193ff30..bb3908818505e13e3bbc3c13ed66d15b65932498 100644 (file)
@@ -39,7 +39,7 @@
  * But the maximum time limit managed by the stomper code is set to 60sec.
  * Hence we have to wait at least that time period.
  */
-#define CEX4_CLEANUP_TIME      (61*HZ)
+#define CEX4_CLEANUP_TIME      (900*HZ)
 
 static struct ap_device_id zcrypt_cex4_ids[] = {
        { AP_DEVICE(AP_DEVICE_TYPE_CEX4)  },
index 81f22980b2def237fbefb4feac8386194188653e..156b790072b47263f5c9aec874e02694e6bb1c9b 100644 (file)
@@ -366,8 +366,9 @@ int __init register_intc_controller(struct intc_desc *desc)
 
                        /* redirect this interrupts to the first one */
                        irq_set_chip(irq2, &dummy_irq_chip);
-                       irq_set_chained_handler(irq2, intc_redirect_irq);
-                       irq_set_handler_data(irq2, (void *)irq);
+                       irq_set_chained_handler_and_data(irq2,
+                                                        intc_redirect_irq,
+                                                        (void *)irq);
                }
        }
 
index f30ac9354ff248f316fb96a26d534884faa8bf5d..f5f1b821241afc92e6854788262d13f665a0dd45 100644 (file)
@@ -243,8 +243,9 @@ restart:
                 */
                irq_set_nothread(irq);
 
-               irq_set_chained_handler(entry->pirq, intc_virq_handler);
+               /* Set handler data before installing the handler */
                add_virq_to_pirq(entry->pirq, irq);
+               irq_set_chained_handler(entry->pirq, intc_virq_handler);
 
                radix_tree_tag_clear(&d->tree, entry->enum_id,
                                     INTC_TAG_VIRQ_NEEDS_ALLOC);
index 2e6716104d3fa8c9aa45f3a39fdafe8c355d4ead..5820e851392798e9d550241dafc8a00ec76898e3 100644 (file)
@@ -119,7 +119,7 @@ exit:
        return ret;
 }
 
-static struct kernel_param_ops duration_ops = {
+static const struct kernel_param_ops duration_ops = {
        .set = duration_set,
        .get = param_get_int,
 };
@@ -167,7 +167,7 @@ exit_win:
        return ret;
 }
 
-static struct kernel_param_ops window_size_ops = {
+static const struct kernel_param_ops window_size_ops = {
        .set = window_size_set,
        .get = param_get_int,
 };
index f78a87b07872ffad69234d680953e1236f8dad81..bb809cf3661770e63e0a50ceb19017a0ea087d0f 100644 (file)
@@ -1345,7 +1345,7 @@ static int param_get_vmidfilter(char *buffer, const struct kernel_param *kp)
 
 #define param_check_vmidfilter(name, p) __param_check(name, p, void)
 
-static struct kernel_param_ops param_ops_vmidfilter = {
+static const struct kernel_param_ops param_ops_vmidfilter = {
        .set = param_set_vmidfilter,
        .get = param_get_vmidfilter,
 };
index 3f6cd3102db5238ba4998c1b5ad64b831e9b6abf..9da1e842bbe93b6883299e625d0dc411f85abb22 100644 (file)
@@ -51,7 +51,8 @@ int tile_console_write(const char *buf, int count)
                              _SIM_CONTROL_OPERATOR_BITS));
                return 0;
        } else {
-               return hv_console_write((HV_VirtAddr)buf, count);
+               /* Translate 0 bytes written to EAGAIN for hvc_console_print. */
+               return hv_console_write((HV_VirtAddr)buf, count) ?: -EAGAIN;
        }
 }
 
index 7a3d146a5f0efc0dbc3b94e854adb4d81c85da4d..a9d837f83ce832539a442643f10ec4221d0fa117 100644 (file)
@@ -302,7 +302,7 @@ static int xen_initial_domain_console_init(void)
 static void xen_console_update_evtchn(struct xencons_info *info)
 {
        if (xen_hvm_domain()) {
-               uint64_t v;
+               uint64_t v = 0;
                int err;
 
                err = hvm_get_parameter(HVM_PARAM_CONSOLE_EVTCHN, &v);
index 2847108cc8ddd228633d49bf97c40bfde6e4e3f3..b5b427888b2453d88e59f7d002ab8d4e512d23e6 100644 (file)
@@ -988,7 +988,7 @@ static int sysrq_reset_seq_param_set(const char *buffer,
        return 0;
 }
 
-static struct kernel_param_ops param_ops_sysrq_reset_seq = {
+static const struct kernel_param_ops param_ops_sysrq_reset_seq = {
        .get    = param_get_ushort,
        .set    = sysrq_reset_seq_param_set,
 };
index 888998a7fe314c4ee3f25298f2c81f0d429365c4..a2ae88dbda784f257923a730caf20c2986b22798 100644 (file)
@@ -1599,7 +1599,7 @@ static void cmvs_file_name(struct uea_softc *sc, char *const cmv_name, int ver)
        char file_arr[] = "CMVxy.bin";
        char *file;
 
-       kparam_block_sysfs_write(cmv_file);
+       kernel_param_lock(THIS_MODULE);
        /* set proper name corresponding modem version and line type */
        if (cmv_file[sc->modem_index] == NULL) {
                if (UEA_CHIP_VERSION(sc) == ADI930)
@@ -1618,7 +1618,7 @@ static void cmvs_file_name(struct uea_softc *sc, char *const cmv_name, int ver)
        strlcat(cmv_name, file, UEA_FW_NAME_MAX);
        if (ver == 2)
                strlcat(cmv_name, ".v2", UEA_FW_NAME_MAX);
-       kparam_unblock_sysfs_write(cmv_file);
+       kernel_param_unlock(THIS_MODULE);
 }
 
 static int request_cmvs_old(struct uea_softc *sc,
index d32d1c4d1b99f81a1962bb92bf7cc6b3e1ad77ff..178ae93b7ebd6a89e946803832dd3ba032b0f9c8 100644 (file)
@@ -1977,7 +1977,7 @@ static int param_set_scroll(const char *val, const struct kernel_param *kp)
 
        return 0;
 }
-static struct kernel_param_ops param_ops_scroll = {
+static const struct kernel_param_ops param_ops_scroll = {
        .set = param_set_scroll,
 };
 #define param_check_scroll(name, p) __param_check(name, p, void)
index ea7f056ed5fe3c4bbc04c45d9e09043bf7f134ce..8bac309c24b99c7134737ac901933fb0ed0155b5 100644 (file)
@@ -754,9 +754,9 @@ static int vt8623_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
 
        /* Prepare startup mode */
 
-       kparam_block_sysfs_write(mode_option);
+       kernel_param_lock(THIS_MODULE);
        rc = fb_find_mode(&(info->var), info, mode_option, NULL, 0, NULL, 8);
-       kparam_unblock_sysfs_write(mode_option);
+       kernel_param_unlock(THIS_MODULE);
        if (! ((rc == 1) || (rc == 2))) {
                rc = -EINVAL;
                dev_err(info->device, "mode %s not found\n", mode_option);
index 7a5e60dea6c59267ef48efa42ac2091346616b57..10189b5b627f962cb9a8e9527aae829e27b0aec2 100644 (file)
@@ -691,7 +691,7 @@ static int vm_cmdline_get(char *buffer, const struct kernel_param *kp)
        return strlen(buffer) + 1;
 }
 
-static struct kernel_param_ops vm_cmdline_param_ops = {
+static const struct kernel_param_ops vm_cmdline_param_ops = {
        .set = vm_cmdline_set,
        .get = vm_cmdline_get,
 };
index 38387950490eb8dbd07c85434ac59e5129af2c1f..96093ae369a5613938962b4970decda66c19d342 100644 (file)
@@ -39,8 +39,8 @@
 #include <asm/irq.h>
 #include <asm/idle.h>
 #include <asm/io_apic.h>
-#include <asm/xen/page.h>
 #include <asm/xen/pci.h>
+#include <xen/page.h>
 #endif
 #include <asm/sync_bitops.h>
 #include <asm/xen/hypercall.h>
index 417415d738d0f454da2d28d2815bb221d160613f..ed673e1acd6159a3ca34dc10238fef8936e43249 100644 (file)
 #include <asm/sync_bitops.h>
 #include <asm/xen/hypercall.h>
 #include <asm/xen/hypervisor.h>
-#include <asm/xen/page.h>
 
 #include <xen/xen.h>
 #include <xen/xen-ops.h>
 #include <xen/events.h>
 #include <xen/interface/xen.h>
 #include <xen/interface/event_channel.h>
+#include <xen/page.h>
 
 #include "events_internal.h"
 
index 89274850741b5e3ee457fa2bd19d6efaf16d1f5d..67b9163db7185402b0ff3811c5363c1a1022e2c7 100644 (file)
@@ -41,9 +41,9 @@
 #include <xen/balloon.h>
 #include <xen/gntdev.h>
 #include <xen/events.h>
+#include <xen/page.h>
 #include <asm/xen/hypervisor.h>
 #include <asm/xen/hypercall.h>
-#include <asm/xen/page.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Derek G. Murray <Derek.Murray@cl.cam.ac.uk>, "
index b1c7170e5c9e1edf85049c4d43fc4d1b9f2b18f8..62f591f8763ccfd4519248566b6f1d49eb78ce64 100644 (file)
@@ -138,7 +138,6 @@ static struct gnttab_free_callback *gnttab_free_callback_list;
 static int gnttab_expand(unsigned int req_entries);
 
 #define RPP (PAGE_SIZE / sizeof(grant_ref_t))
-#define SPP (PAGE_SIZE / sizeof(grant_status_t))
 
 static inline grant_ref_t *__gnttab_entry(grant_ref_t entry)
 {
index 9e6a85104a20820ff16ef748804eb8c6a98b4201..d10effee9b9eb16d46a0bfea9b108b5f22863be3 100644 (file)
 #include <xen/grant_table.h>
 #include <xen/events.h>
 #include <xen/hvc-console.h>
+#include <xen/page.h>
 #include <xen/xen-ops.h>
 
 #include <asm/xen/hypercall.h>
-#include <asm/xen/page.h>
 #include <asm/xen/hypervisor.h>
 
 enum shutdown_state {
index d88f36754bf7efcd67750cce72ec607564bc6fd4..239738f944badfa3f12f3d61581ef5cb4d6910d4 100644 (file)
@@ -17,8 +17,8 @@
 
 #include <xen/xen.h>
 #include <xen/interface/xen.h>
+#include <xen/page.h>
 #include <asm/xen/hypercall.h>
-#include <asm/xen/page.h>
 #include <asm/xen/hypervisor.h>
 #include <xen/tmem.h>
 
@@ -389,7 +389,7 @@ static int __init xen_tmem_init(void)
        }
 #endif
 #ifdef CONFIG_CLEANCACHE
-       BUG_ON(sizeof(struct cleancache_filekey) != sizeof(struct tmem_oid));
+       BUILD_BUG_ON(sizeof(struct cleancache_filekey) != sizeof(struct tmem_oid));
        if (tmem_enabled && cleancache) {
                int err;
 
index 96b2011d25f35f628288c523dea84c4150e8ce35..9ad327238ba931243967455b5790916dc6b184f1 100644 (file)
@@ -37,7 +37,7 @@
 #include <linux/vmalloc.h>
 #include <linux/export.h>
 #include <asm/xen/hypervisor.h>
-#include <asm/xen/page.h>
+#include <xen/page.h>
 #include <xen/interface/xen.h>
 #include <xen/interface/event_channel.h>
 #include <xen/balloon.h>
@@ -379,16 +379,16 @@ int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr,
        int i, j;
 
        for (i = 0; i < nr_pages; i++) {
-               unsigned long addr = (unsigned long)vaddr +
-                       (PAGE_SIZE * i);
                err = gnttab_grant_foreign_access(dev->otherend_id,
-                                                 virt_to_mfn(addr), 0);
+                                                 virt_to_mfn(vaddr), 0);
                if (err < 0) {
                        xenbus_dev_fatal(dev, err,
                                         "granting access to ring page");
                        goto fail;
                }
                grefs[i] = err;
+
+               vaddr = vaddr + PAGE_SIZE;
        }
 
        return 0;
index 5390a674b5e3a8d8ea62112343be83a34be58547..4308fb3cf7c2f717ffd446035f1c30b61f8dacf8 100644 (file)
@@ -742,7 +742,7 @@ static int xenbus_resume_cb(struct notifier_block *nb,
        int err = 0;
 
        if (xen_hvm_domain()) {
-               uint64_t v;
+               uint64_t v = 0;
 
                err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v);
                if (!err && v)
index b155d32db76633bc88d1904546f803d2272db179..4fe10f93db8a3e52ebbb5330e94b80ee92455e1d 100644 (file)
@@ -43,7 +43,7 @@ static inline struct bdev_inode *BDEV_I(struct inode *inode)
        return container_of(inode, struct bdev_inode, vfs_inode);
 }
 
-inline struct block_device *I_BDEV(struct inode *inode)
+struct block_device *I_BDEV(struct inode *inode)
 {
        return &BDEV_I(inode)->bdev;
 }
index df9932b00d08fdc666f63c84014fbd08889cf624..1ce06c849a86db84ca080a3d0fd0398a160b3b9a 100644 (file)
@@ -85,6 +85,7 @@ BTRFS_WORK_HELPER(extent_refs_helper);
 BTRFS_WORK_HELPER(scrub_helper);
 BTRFS_WORK_HELPER(scrubwrc_helper);
 BTRFS_WORK_HELPER(scrubnc_helper);
+BTRFS_WORK_HELPER(scrubparity_helper);
 
 static struct __btrfs_workqueue *
 __btrfs_alloc_workqueue(const char *name, unsigned int flags, int max_active,
index ec2ee477f8ba003ee7af4030da1b976891073dcf..b0b093b6afeca3654d44865a20d2b66305013f62 100644 (file)
@@ -64,6 +64,8 @@ BTRFS_WORK_HELPER_PROTO(extent_refs_helper);
 BTRFS_WORK_HELPER_PROTO(scrub_helper);
 BTRFS_WORK_HELPER_PROTO(scrubwrc_helper);
 BTRFS_WORK_HELPER_PROTO(scrubnc_helper);
+BTRFS_WORK_HELPER_PROTO(scrubparity_helper);
+
 
 struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name,
                                              unsigned int flags,
index 614aaa1969bdfded3485ae9a72146269dd9101eb..802fabb30e1575338eec8ae53f157e9f2d7a6b56 100644 (file)
@@ -250,8 +250,12 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
         * the first item to check. But sometimes, we may enter it with
         * slot==nritems. In that case, go to the next leaf before we continue.
         */
-       if (path->slots[0] >= btrfs_header_nritems(path->nodes[0]))
-               ret = btrfs_next_old_leaf(root, path, time_seq);
+       if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
+               if (time_seq == (u64)-1)
+                       ret = btrfs_next_leaf(root, path);
+               else
+                       ret = btrfs_next_old_leaf(root, path, time_seq);
+       }
 
        while (!ret && count < total_refs) {
                eb = path->nodes[0];
@@ -291,7 +295,10 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
                        eie = NULL;
                }
 next:
-               ret = btrfs_next_old_item(root, path, time_seq);
+               if (time_seq == (u64)-1)
+                       ret = btrfs_next_item(root, path);
+               else
+                       ret = btrfs_next_old_item(root, path, time_seq);
        }
 
        if (ret > 0)
@@ -334,6 +341,8 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
 
        if (path->search_commit_root)
                root_level = btrfs_header_level(root->commit_root);
+       else if (time_seq == (u64)-1)
+               root_level = btrfs_header_level(root->node);
        else
                root_level = btrfs_old_root_level(root, time_seq);
 
@@ -343,7 +352,12 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
        }
 
        path->lowest_level = level;
-       ret = btrfs_search_old_slot(root, &ref->key_for_search, path, time_seq);
+       if (time_seq == (u64)-1)
+               ret = btrfs_search_slot(NULL, root, &ref->key_for_search, path,
+                                       0, 0);
+       else
+               ret = btrfs_search_old_slot(root, &ref->key_for_search, path,
+                                           time_seq);
 
        /* root node has been locked, we can release @subvol_srcu safely here */
        srcu_read_unlock(&fs_info->subvol_srcu, index);
@@ -491,7 +505,9 @@ static int __add_missing_keys(struct btrfs_fs_info *fs_info,
                BUG_ON(!ref->wanted_disk_byte);
                eb = read_tree_block(fs_info->tree_root, ref->wanted_disk_byte,
                                     0);
-               if (!eb || !extent_buffer_uptodate(eb)) {
+               if (IS_ERR(eb)) {
+                       return PTR_ERR(eb);
+               } else if (!extent_buffer_uptodate(eb)) {
                        free_extent_buffer(eb);
                        return -EIO;
                }
@@ -507,7 +523,7 @@ static int __add_missing_keys(struct btrfs_fs_info *fs_info,
 }
 
 /*
- * merge two lists of backrefs and adjust counts accordingly
+ * merge backrefs and adjust counts accordingly
  *
  * mode = 1: merge identical keys, if key is set
  *    FIXME: if we add more keys in __add_prelim_ref, we can merge more here.
@@ -535,9 +551,9 @@ static void __merge_refs(struct list_head *head, int mode)
 
                        ref2 = list_entry(pos2, struct __prelim_ref, list);
 
+                       if (!ref_for_same_block(ref1, ref2))
+                               continue;
                        if (mode == 1) {
-                               if (!ref_for_same_block(ref1, ref2))
-                                       continue;
                                if (!ref1->parent && ref2->parent) {
                                        xchg = ref1;
                                        ref1 = ref2;
@@ -572,8 +588,8 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
                              struct list_head *prefs, u64 *total_refs,
                              u64 inum)
 {
+       struct btrfs_delayed_ref_node *node;
        struct btrfs_delayed_extent_op *extent_op = head->extent_op;
-       struct rb_node *n = &head->node.rb_node;
        struct btrfs_key key;
        struct btrfs_key op_key = {0};
        int sgn;
@@ -583,12 +599,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
                btrfs_disk_key_to_cpu(&op_key, &extent_op->key);
 
        spin_lock(&head->lock);
-       n = rb_first(&head->ref_root);
-       while (n) {
-               struct btrfs_delayed_ref_node *node;
-               node = rb_entry(n, struct btrfs_delayed_ref_node,
-                               rb_node);
-               n = rb_next(n);
+       list_for_each_entry(node, &head->ref_list, list) {
                if (node->seq > seq)
                        continue;
 
@@ -882,6 +893,11 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
  *
  * NOTE: This can return values > 0
  *
+ * If time_seq is set to (u64)-1, it will not search delayed_refs, and behave
+ * much like trans == NULL case, the difference only lies in it will not
+ * commit root.
+ * The special case is for qgroup to search roots in commit_transaction().
+ *
  * FIXME some caching might speed things up
  */
 static int find_parent_nodes(struct btrfs_trans_handle *trans,
@@ -920,6 +936,9 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
                path->skip_locking = 1;
        }
 
+       if (time_seq == (u64)-1)
+               path->skip_locking = 1;
+
        /*
         * grab both a lock on the path and a lock on the delayed ref head.
         * We need both to get a consistent picture of how the refs look
@@ -934,9 +953,10 @@ again:
        BUG_ON(ret == 0);
 
 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
-       if (trans && likely(trans->type != __TRANS_DUMMY)) {
+       if (trans && likely(trans->type != __TRANS_DUMMY) &&
+           time_seq != (u64)-1) {
 #else
-       if (trans) {
+       if (trans && time_seq != (u64)-1) {
 #endif
                /*
                 * look if there are updates for this ref queued and lock the
@@ -1034,7 +1054,10 @@ again:
 
                                eb = read_tree_block(fs_info->extent_root,
                                                           ref->parent, 0);
-                               if (!eb || !extent_buffer_uptodate(eb)) {
+                               if (IS_ERR(eb)) {
+                                       ret = PTR_ERR(eb);
+                                       goto out;
+                               } else if (!extent_buffer_uptodate(eb)) {
                                        free_extent_buffer(eb);
                                        ret = -EIO;
                                        goto out;
index 0f11ebc92f02ac69ffe3302624852cf9e467656a..54114b4887ddd5d1cdb393df3a857dac88cac5ac 100644 (file)
@@ -1439,8 +1439,9 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
                btrfs_tree_read_unlock(eb_root);
                free_extent_buffer(eb_root);
                old = read_tree_block(root, logical, 0);
-               if (WARN_ON(!old || !extent_buffer_uptodate(old))) {
-                       free_extent_buffer(old);
+               if (WARN_ON(IS_ERR(old) || !extent_buffer_uptodate(old))) {
+                       if (!IS_ERR(old))
+                               free_extent_buffer(old);
                        btrfs_warn(root->fs_info,
                                "failed to read tree block %llu from get_old_root", logical);
                } else {
@@ -1685,7 +1686,9 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
                if (!cur || !uptodate) {
                        if (!cur) {
                                cur = read_tree_block(root, blocknr, gen);
-                               if (!cur || !extent_buffer_uptodate(cur)) {
+                               if (IS_ERR(cur)) {
+                                       return PTR_ERR(cur);
+                               } else if (!extent_buffer_uptodate(cur)) {
                                        free_extent_buffer(cur);
                                        return -EIO;
                                }
@@ -1864,8 +1867,9 @@ static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root,
 
        eb = read_tree_block(root, btrfs_node_blockptr(parent, slot),
                             btrfs_node_ptr_generation(parent, slot));
-       if (eb && !extent_buffer_uptodate(eb)) {
-               free_extent_buffer(eb);
+       if (IS_ERR(eb) || !extent_buffer_uptodate(eb)) {
+               if (!IS_ERR(eb))
+                       free_extent_buffer(eb);
                eb = NULL;
        }
 
@@ -2494,7 +2498,7 @@ read_block_for_search(struct btrfs_trans_handle *trans,
 
        ret = -EAGAIN;
        tmp = read_tree_block(root, blocknr, 0);
-       if (tmp) {
+       if (!IS_ERR(tmp)) {
                /*
                 * If the read above didn't mark this buffer up to date,
                 * it will never end up being up to date.  Set ret to EIO now
index 6f364e1d8d3d1e9c08e246dbc1df443be49c33fd..80a9aefb0c46fc179119d8ad67eb9f8bff687ac1 100644 (file)
@@ -174,7 +174,7 @@ struct btrfs_ordered_sum;
 /* csum types */
 #define BTRFS_CSUM_TYPE_CRC32  0
 
-static int btrfs_csum_sizes[] = { 4, 0 };
+static int btrfs_csum_sizes[] = { 4 };
 
 /* four bytes for CRC32 */
 #define BTRFS_EMPTY_DIR_SIZE 0
@@ -1619,10 +1619,7 @@ struct btrfs_fs_info {
        struct task_struct *cleaner_kthread;
        int thread_pool_size;
 
-       struct kobject super_kobj;
        struct kobject *space_info_kobj;
-       struct kobject *device_dir_kobj;
-       struct completion kobj_unregister;
        int do_barriers;
        int closing;
        int log_root_recovering;
@@ -1698,6 +1695,7 @@ struct btrfs_fs_info {
        struct btrfs_workqueue *scrub_workers;
        struct btrfs_workqueue *scrub_wr_completion_workers;
        struct btrfs_workqueue *scrub_nocow_workers;
+       struct btrfs_workqueue *scrub_parity_workers;
 
 #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
        u32 check_integrity_print_mask;
@@ -1735,7 +1733,7 @@ struct btrfs_fs_info {
        /* list of dirty qgroups to be written at next commit */
        struct list_head dirty_qgroups;
 
-       /* used by btrfs_qgroup_record_ref for an efficient tree traversal */
+       /* used by qgroup for an efficient tree traversal */
        u64 qgroup_seq;
 
        /* qgroup rescan items */
@@ -3458,6 +3456,7 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes, u64 write_bytes)
 void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes);
 void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root);
+void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans);
 int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
                                  struct inode *inode);
 void btrfs_orphan_release_metadata(struct inode *inode);
@@ -3515,6 +3514,9 @@ int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
 int __get_raid_index(u64 flags);
 int btrfs_start_write_no_snapshoting(struct btrfs_root *root);
 void btrfs_end_write_no_snapshoting(struct btrfs_root *root);
+void check_system_chunk(struct btrfs_trans_handle *trans,
+                       struct btrfs_root *root,
+                       const u64 type);
 /* ctree.c */
 int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
                     int level, int *slot);
@@ -4050,6 +4052,7 @@ void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
 
 #ifdef CONFIG_BTRFS_ASSERT
 
+__cold
 static inline void assfail(char *expr, char *file, int line)
 {
        pr_err("BTRFS: assertion failed: %s, file: %s, line: %d",
@@ -4065,10 +4068,12 @@ static inline void assfail(char *expr, char *file, int line)
 
 #define btrfs_assert()
 __printf(5, 6)
+__cold
 void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
                     unsigned int line, int errno, const char *fmt, ...);
 
 
+__cold
 void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
                               struct btrfs_root *root, const char *function,
                               unsigned int line, int errno);
@@ -4111,11 +4116,17 @@ static inline int __btrfs_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag)
  * Call btrfs_abort_transaction as early as possible when an error condition is
  * detected, that way the exact line number is reported.
  */
-
 #define btrfs_abort_transaction(trans, root, errno)            \
 do {                                                           \
-       __btrfs_abort_transaction(trans, root, __func__,        \
-                                 __LINE__, errno);             \
+       /* Report first abort since mount */                    \
+       if (!test_and_set_bit(BTRFS_FS_STATE_TRANS_ABORTED,     \
+                       &((root)->fs_info->fs_state))) {        \
+               WARN(1, KERN_DEBUG                              \
+               "BTRFS: Transaction aborted (error %d)\n",      \
+               (errno));                                       \
+       }                                                       \
+       __btrfs_abort_transaction((trans), (root), __func__,    \
+                                 __LINE__, (errno));           \
 } while (0)
 
 #define btrfs_std_error(fs_info, errno)                                \
@@ -4132,6 +4143,7 @@ do {                                                              \
 } while (0)
 
 __printf(5, 6)
+__cold
 void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
                   unsigned int line, int errno, const char *fmt, ...);
 
index 8f8ed7d20bac5f4e058fc693d0fdc24fde7abeda..ac3e81da6d4edc8e33856840349cc88750771546 100644 (file)
@@ -22,6 +22,7 @@
 #include "ctree.h"
 #include "delayed-ref.h"
 #include "transaction.h"
+#include "qgroup.h"
 
 struct kmem_cache *btrfs_delayed_ref_head_cachep;
 struct kmem_cache *btrfs_delayed_tree_ref_cachep;
@@ -84,87 +85,6 @@ static int comp_data_refs(struct btrfs_delayed_data_ref *ref2,
        return 0;
 }
 
-/*
- * entries in the rb tree are ordered by the byte number of the extent,
- * type of the delayed backrefs and content of delayed backrefs.
- */
-static int comp_entry(struct btrfs_delayed_ref_node *ref2,
-                     struct btrfs_delayed_ref_node *ref1,
-                     bool compare_seq)
-{
-       if (ref1->bytenr < ref2->bytenr)
-               return -1;
-       if (ref1->bytenr > ref2->bytenr)
-               return 1;
-       if (ref1->is_head && ref2->is_head)
-               return 0;
-       if (ref2->is_head)
-               return -1;
-       if (ref1->is_head)
-               return 1;
-       if (ref1->type < ref2->type)
-               return -1;
-       if (ref1->type > ref2->type)
-               return 1;
-       if (ref1->no_quota > ref2->no_quota)
-               return 1;
-       if (ref1->no_quota < ref2->no_quota)
-               return -1;
-       /* merging of sequenced refs is not allowed */
-       if (compare_seq) {
-               if (ref1->seq < ref2->seq)
-                       return -1;
-               if (ref1->seq > ref2->seq)
-                       return 1;
-       }
-       if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
-           ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) {
-               return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2),
-                                     btrfs_delayed_node_to_tree_ref(ref1),
-                                     ref1->type);
-       } else if (ref1->type == BTRFS_EXTENT_DATA_REF_KEY ||
-                  ref1->type == BTRFS_SHARED_DATA_REF_KEY) {
-               return comp_data_refs(btrfs_delayed_node_to_data_ref(ref2),
-                                     btrfs_delayed_node_to_data_ref(ref1));
-       }
-       BUG();
-       return 0;
-}
-
-/*
- * insert a new ref into the rbtree.  This returns any existing refs
- * for the same (bytenr,parent) tuple, or NULL if the new node was properly
- * inserted.
- */
-static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
-                                                 struct rb_node *node)
-{
-       struct rb_node **p = &root->rb_node;
-       struct rb_node *parent_node = NULL;
-       struct btrfs_delayed_ref_node *entry;
-       struct btrfs_delayed_ref_node *ins;
-       int cmp;
-
-       ins = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
-       while (*p) {
-               parent_node = *p;
-               entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
-                                rb_node);
-
-               cmp = comp_entry(entry, ins, 1);
-               if (cmp < 0)
-                       p = &(*p)->rb_left;
-               else if (cmp > 0)
-                       p = &(*p)->rb_right;
-               else
-                       return entry;
-       }
-
-       rb_link_node(node, parent_node, p);
-       rb_insert_color(node, root);
-       return NULL;
-}
-
 /* insert a new ref to head ref rbtree */
 static struct btrfs_delayed_ref_head *htree_insert(struct rb_root *root,
                                                   struct rb_node *node)
@@ -268,7 +188,7 @@ static inline void drop_delayed_ref(struct btrfs_trans_handle *trans,
                rb_erase(&head->href_node, &delayed_refs->href_root);
        } else {
                assert_spin_locked(&head->lock);
-               rb_erase(&ref->rb_node, &head->ref_root);
+               list_del(&ref->list);
        }
        ref->in_tree = 0;
        btrfs_put_delayed_ref(ref);
@@ -277,99 +197,6 @@ static inline void drop_delayed_ref(struct btrfs_trans_handle *trans,
                trans->delayed_ref_updates--;
 }
 
-static int merge_ref(struct btrfs_trans_handle *trans,
-                    struct btrfs_delayed_ref_root *delayed_refs,
-                    struct btrfs_delayed_ref_head *head,
-                    struct btrfs_delayed_ref_node *ref, u64 seq)
-{
-       struct rb_node *node;
-       int mod = 0;
-       int done = 0;
-
-       node = rb_next(&ref->rb_node);
-       while (!done && node) {
-               struct btrfs_delayed_ref_node *next;
-
-               next = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
-               node = rb_next(node);
-               if (seq && next->seq >= seq)
-                       break;
-               if (comp_entry(ref, next, 0))
-                       continue;
-
-               if (ref->action == next->action) {
-                       mod = next->ref_mod;
-               } else {
-                       if (ref->ref_mod < next->ref_mod) {
-                               struct btrfs_delayed_ref_node *tmp;
-
-                               tmp = ref;
-                               ref = next;
-                               next = tmp;
-                               done = 1;
-                       }
-                       mod = -next->ref_mod;
-               }
-
-               drop_delayed_ref(trans, delayed_refs, head, next);
-               ref->ref_mod += mod;
-               if (ref->ref_mod == 0) {
-                       drop_delayed_ref(trans, delayed_refs, head, ref);
-                       done = 1;
-               } else {
-                       /*
-                        * You can't have multiples of the same ref on a tree
-                        * block.
-                        */
-                       WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
-                               ref->type == BTRFS_SHARED_BLOCK_REF_KEY);
-               }
-       }
-       return done;
-}
-
-void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
-                             struct btrfs_fs_info *fs_info,
-                             struct btrfs_delayed_ref_root *delayed_refs,
-                             struct btrfs_delayed_ref_head *head)
-{
-       struct rb_node *node;
-       u64 seq = 0;
-
-       assert_spin_locked(&head->lock);
-       /*
-        * We don't have too much refs to merge in the case of delayed data
-        * refs.
-        */
-       if (head->is_data)
-               return;
-
-       spin_lock(&fs_info->tree_mod_seq_lock);
-       if (!list_empty(&fs_info->tree_mod_seq_list)) {
-               struct seq_list *elem;
-
-               elem = list_first_entry(&fs_info->tree_mod_seq_list,
-                                       struct seq_list, list);
-               seq = elem->seq;
-       }
-       spin_unlock(&fs_info->tree_mod_seq_lock);
-
-       node = rb_first(&head->ref_root);
-       while (node) {
-               struct btrfs_delayed_ref_node *ref;
-
-               ref = rb_entry(node, struct btrfs_delayed_ref_node,
-                              rb_node);
-               /* We can't merge refs that are outside of our seq count */
-               if (seq && ref->seq >= seq)
-                       break;
-               if (merge_ref(trans, delayed_refs, head, ref, seq))
-                       node = rb_first(&head->ref_root);
-               else
-                       node = rb_next(&ref->rb_node);
-       }
-}
-
 int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
                            struct btrfs_delayed_ref_root *delayed_refs,
                            u64 seq)
@@ -443,45 +270,71 @@ again:
 }
 
 /*
- * helper function to update an extent delayed ref in the
- * rbtree.  existing and update must both have the same
- * bytenr and parent
+ * Helper to insert the ref_node to the tail or merge with tail.
  *
- * This may free existing if the update cancels out whatever
- * operation it was doing.
+ * Return 0 for insert.
+ * Return >0 for merge.
  */
-static noinline void
-update_existing_ref(struct btrfs_trans_handle *trans,
-                   struct btrfs_delayed_ref_root *delayed_refs,
-                   struct btrfs_delayed_ref_head *head,
-                   struct btrfs_delayed_ref_node *existing,
-                   struct btrfs_delayed_ref_node *update)
+static int
+add_delayed_ref_tail_merge(struct btrfs_trans_handle *trans,
+                          struct btrfs_delayed_ref_root *root,
+                          struct btrfs_delayed_ref_head *href,
+                          struct btrfs_delayed_ref_node *ref)
 {
-       if (update->action != existing->action) {
-               /*
-                * this is effectively undoing either an add or a
-                * drop.  We decrement the ref_mod, and if it goes
-                * down to zero we just delete the entry without
-                * every changing the extent allocation tree.
-                */
-               existing->ref_mod--;
-               if (existing->ref_mod == 0)
-                       drop_delayed_ref(trans, delayed_refs, head, existing);
-               else
-                       WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
-                               existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
+       struct btrfs_delayed_ref_node *exist;
+       int mod;
+       int ret = 0;
+
+       spin_lock(&href->lock);
+       /* Check whether we can merge the tail node with ref */
+       if (list_empty(&href->ref_list))
+               goto add_tail;
+       exist = list_entry(href->ref_list.prev, struct btrfs_delayed_ref_node,
+                          list);
+       /* No need to compare bytenr nor is_head */
+       if (exist->type != ref->type || exist->no_quota != ref->no_quota ||
+           exist->seq != ref->seq)
+               goto add_tail;
+
+       if ((exist->type == BTRFS_TREE_BLOCK_REF_KEY ||
+            exist->type == BTRFS_SHARED_BLOCK_REF_KEY) &&
+           comp_tree_refs(btrfs_delayed_node_to_tree_ref(exist),
+                          btrfs_delayed_node_to_tree_ref(ref),
+                          ref->type))
+               goto add_tail;
+       if ((exist->type == BTRFS_EXTENT_DATA_REF_KEY ||
+            exist->type == BTRFS_SHARED_DATA_REF_KEY) &&
+           comp_data_refs(btrfs_delayed_node_to_data_ref(exist),
+                          btrfs_delayed_node_to_data_ref(ref)))
+               goto add_tail;
+
+       /* Now we are sure we can merge */
+       ret = 1;
+       if (exist->action == ref->action) {
+               mod = ref->ref_mod;
        } else {
-               WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
-                       existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
-               /*
-                * the action on the existing ref matches
-                * the action on the ref we're trying to add.
-                * Bump the ref_mod by one so the backref that
-                * is eventually added/removed has the correct
-                * reference count
-                */
-               existing->ref_mod += update->ref_mod;
+               /* Need to change action */
+               if (exist->ref_mod < ref->ref_mod) {
+                       exist->action = ref->action;
+                       mod = -exist->ref_mod;
+                       exist->ref_mod = ref->ref_mod;
+               } else
+                       mod = -ref->ref_mod;
        }
+       exist->ref_mod += mod;
+
+       /* remove existing tail if its ref_mod is zero */
+       if (exist->ref_mod == 0)
+               drop_delayed_ref(trans, root, href, exist);
+       spin_unlock(&href->lock);
+       return ret;
+
+add_tail:
+       list_add_tail(&ref->list, &href->ref_list);
+       atomic_inc(&root->num_entries);
+       trans->delayed_ref_updates++;
+       spin_unlock(&href->lock);
+       return ret;
 }
 
 /*
@@ -568,12 +421,14 @@ update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs,
 static noinline struct btrfs_delayed_ref_head *
 add_delayed_ref_head(struct btrfs_fs_info *fs_info,
                     struct btrfs_trans_handle *trans,
-                    struct btrfs_delayed_ref_node *ref, u64 bytenr,
-                    u64 num_bytes, int action, int is_data)
+                    struct btrfs_delayed_ref_node *ref,
+                    struct btrfs_qgroup_extent_record *qrecord,
+                    u64 bytenr, u64 num_bytes, int action, int is_data)
 {
        struct btrfs_delayed_ref_head *existing;
        struct btrfs_delayed_ref_head *head_ref = NULL;
        struct btrfs_delayed_ref_root *delayed_refs;
+       struct btrfs_qgroup_extent_record *qexisting;
        int count_mod = 1;
        int must_insert_reserved = 0;
 
@@ -618,10 +473,22 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
        head_ref = btrfs_delayed_node_to_head(ref);
        head_ref->must_insert_reserved = must_insert_reserved;
        head_ref->is_data = is_data;
-       head_ref->ref_root = RB_ROOT;
+       INIT_LIST_HEAD(&head_ref->ref_list);
        head_ref->processing = 0;
        head_ref->total_ref_mod = count_mod;
 
+       /* Record qgroup extent info if provided */
+       if (qrecord) {
+               qrecord->bytenr = bytenr;
+               qrecord->num_bytes = num_bytes;
+               qrecord->old_roots = NULL;
+
+               qexisting = btrfs_qgroup_insert_dirty_extent(delayed_refs,
+                                                            qrecord);
+               if (qexisting)
+                       kfree(qrecord);
+       }
+
        spin_lock_init(&head_ref->lock);
        mutex_init(&head_ref->mutex);
 
@@ -659,10 +526,10 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
                     u64 num_bytes, u64 parent, u64 ref_root, int level,
                     int action, int no_quota)
 {
-       struct btrfs_delayed_ref_node *existing;
        struct btrfs_delayed_tree_ref *full_ref;
        struct btrfs_delayed_ref_root *delayed_refs;
        u64 seq = 0;
+       int ret;
 
        if (action == BTRFS_ADD_DELAYED_EXTENT)
                action = BTRFS_ADD_DELAYED_REF;
@@ -693,21 +560,14 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
 
        trace_add_delayed_tree_ref(ref, full_ref, action);
 
-       spin_lock(&head_ref->lock);
-       existing = tree_insert(&head_ref->ref_root, &ref->rb_node);
-       if (existing) {
-               update_existing_ref(trans, delayed_refs, head_ref, existing,
-                                   ref);
-               /*
-                * we've updated the existing ref, free the newly
-                * allocated ref
-                */
+       ret = add_delayed_ref_tail_merge(trans, delayed_refs, head_ref, ref);
+
+       /*
+        * XXX: memory should be freed at the same level allocated.
+        * But bad practice is anywhere... Follow it now. Need cleanup.
+        */
+       if (ret > 0)
                kmem_cache_free(btrfs_delayed_tree_ref_cachep, full_ref);
-       } else {
-               atomic_inc(&delayed_refs->num_entries);
-               trans->delayed_ref_updates++;
-       }
-       spin_unlock(&head_ref->lock);
 }
 
 /*
@@ -721,10 +581,10 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
                     u64 num_bytes, u64 parent, u64 ref_root, u64 owner,
                     u64 offset, int action, int no_quota)
 {
-       struct btrfs_delayed_ref_node *existing;
        struct btrfs_delayed_data_ref *full_ref;
        struct btrfs_delayed_ref_root *delayed_refs;
        u64 seq = 0;
+       int ret;
 
        if (action == BTRFS_ADD_DELAYED_EXTENT)
                action = BTRFS_ADD_DELAYED_REF;
@@ -758,21 +618,10 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
 
        trace_add_delayed_data_ref(ref, full_ref, action);
 
-       spin_lock(&head_ref->lock);
-       existing = tree_insert(&head_ref->ref_root, &ref->rb_node);
-       if (existing) {
-               update_existing_ref(trans, delayed_refs, head_ref, existing,
-                                   ref);
-               /*
-                * we've updated the existing ref, free the newly
-                * allocated ref
-                */
+       ret = add_delayed_ref_tail_merge(trans, delayed_refs, head_ref, ref);
+
+       if (ret > 0)
                kmem_cache_free(btrfs_delayed_data_ref_cachep, full_ref);
-       } else {
-               atomic_inc(&delayed_refs->num_entries);
-               trans->delayed_ref_updates++;
-       }
-       spin_unlock(&head_ref->lock);
 }
 
 /*
@@ -790,6 +639,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
        struct btrfs_delayed_tree_ref *ref;
        struct btrfs_delayed_ref_head *head_ref;
        struct btrfs_delayed_ref_root *delayed_refs;
+       struct btrfs_qgroup_extent_record *record = NULL;
 
        if (!is_fstree(ref_root) || !fs_info->quota_enabled)
                no_quota = 0;
@@ -800,9 +650,13 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
                return -ENOMEM;
 
        head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
-       if (!head_ref) {
-               kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
-               return -ENOMEM;
+       if (!head_ref)
+               goto free_ref;
+
+       if (fs_info->quota_enabled && is_fstree(ref_root)) {
+               record = kmalloc(sizeof(*record), GFP_NOFS);
+               if (!record)
+                       goto free_head_ref;
        }
 
        head_ref->extent_op = extent_op;
@@ -814,7 +668,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
         * insert both the head node and the new ref without dropping
         * the spin lock
         */
-       head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node,
+       head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record,
                                        bytenr, num_bytes, action, 0);
 
        add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr,
@@ -823,6 +677,13 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
        spin_unlock(&delayed_refs->lock);
 
        return 0;
+
+free_head_ref:
+       kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
+free_ref:
+       kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
+
+       return -ENOMEM;
 }
 
 /*
@@ -839,6 +700,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
        struct btrfs_delayed_data_ref *ref;
        struct btrfs_delayed_ref_head *head_ref;
        struct btrfs_delayed_ref_root *delayed_refs;
+       struct btrfs_qgroup_extent_record *record = NULL;
 
        if (!is_fstree(ref_root) || !fs_info->quota_enabled)
                no_quota = 0;
@@ -854,6 +716,16 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
                return -ENOMEM;
        }
 
+       if (fs_info->quota_enabled && is_fstree(ref_root)) {
+               record = kmalloc(sizeof(*record), GFP_NOFS);
+               if (!record) {
+                       kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
+                       kmem_cache_free(btrfs_delayed_ref_head_cachep,
+                                       head_ref);
+                       return -ENOMEM;
+               }
+       }
+
        head_ref->extent_op = extent_op;
 
        delayed_refs = &trans->transaction->delayed_refs;
@@ -863,7 +735,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
         * insert both the head node and the new ref without dropping
         * the spin lock
         */
-       head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node,
+       head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record,
                                        bytenr, num_bytes, action, 1);
 
        add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr,
@@ -891,9 +763,9 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
        delayed_refs = &trans->transaction->delayed_refs;
        spin_lock(&delayed_refs->lock);
 
-       add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
-                                  num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
-                                  extent_op->is_data);
+       add_delayed_ref_head(fs_info, trans, &head_ref->node, NULL, bytenr,
+                            num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
+                            extent_op->is_data);
 
        spin_unlock(&delayed_refs->lock);
        return 0;
index 5eb0892396d017ba95645f7ab179a1c8a3696253..13fb5e6090fe0efc7f55dc9b09e9faac57a3c91b 100644 (file)
 #define BTRFS_ADD_DELAYED_EXTENT 3 /* record a full extent allocation */
 #define BTRFS_UPDATE_DELAYED_HEAD 4 /* not changing ref count on head ref */
 
+/*
+ * XXX: Qu: I really hate the design that ref_head and tree/data ref shares the
+ * same ref_node structure.
+ * Ref_head is in a higher logic level than tree/data ref, and duplicated
+ * bytenr/num_bytes in ref_node is really a waste or memory, they should be
+ * referred from ref_head.
+ * This gets more disgusting after we use list to store tree/data ref in
+ * ref_head. Must clean this mess up later.
+ */
 struct btrfs_delayed_ref_node {
+       /*
+        * ref_head use rb tree, stored in ref_root->href.
+        * indexed by bytenr
+        */
        struct rb_node rb_node;
 
+       /*data/tree ref use list, stored in ref_head->ref_list. */
+       struct list_head list;
+
        /* the starting bytenr of the extent */
        u64 bytenr;
 
@@ -83,7 +99,7 @@ struct btrfs_delayed_ref_head {
        struct mutex mutex;
 
        spinlock_t lock;
-       struct rb_root ref_root;
+       struct list_head ref_list;
 
        struct rb_node href_node;
 
@@ -132,6 +148,9 @@ struct btrfs_delayed_ref_root {
        /* head ref rbtree */
        struct rb_root href_root;
 
+       /* dirty extent records */
+       struct rb_root dirty_extent_root;
+
        /* this spin lock protects the rbtree and the entries inside */
        spinlock_t lock;
 
@@ -156,6 +175,14 @@ struct btrfs_delayed_ref_root {
        int flushing;
 
        u64 run_delayed_start;
+
+       /*
+        * To make qgroup to skip given root.
+        * This is for snapshot, as btrfs_qgroup_inherit() will manully
+        * modify counters for snapshot and its source, so we should skip
+        * the snapshot in new_root/old_roots or it will get calculated twice
+        */
+       u64 qgroup_to_skip;
 };
 
 extern struct kmem_cache *btrfs_delayed_ref_head_cachep;
index 0573848c73337f2f7848dfdd2fd05e31d5f26a56..862fbc206755511a6bd3a02eeaa525e992c545bc 100644 (file)
@@ -376,6 +376,10 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
        WARN_ON(!tgt_device);
        dev_replace->tgtdev = tgt_device;
 
+       ret = btrfs_kobj_add_device(tgt_device->fs_devices, tgt_device);
+       if (ret)
+               btrfs_error(root->fs_info, ret, "kobj add dev failed");
+
        printk_in_rcu(KERN_INFO
                      "BTRFS: dev_replace from %s (devid %llu) to %s started\n",
                      src_device->missing ? "<missing disk>" :
@@ -583,8 +587,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
        mutex_unlock(&uuid_mutex);
 
        /* replace the sysfs entry */
-       btrfs_kobj_rm_device(fs_info, src_device);
-       btrfs_kobj_add_device(fs_info, tgt_device);
+       btrfs_kobj_rm_device(fs_info->fs_devices, src_device);
        btrfs_rm_dev_replace_free_srcdev(fs_info, src_device);
 
        /* write back the superblocks */
index 0bccf18dc1dca062cd5cd6071dceedd0c3b333a4..3f43bfea3684a13e378fbdd3d364c0635e14daca 100644 (file)
@@ -1149,12 +1149,12 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
 
        buf = btrfs_find_create_tree_block(root, bytenr);
        if (!buf)
-               return NULL;
+               return ERR_PTR(-ENOMEM);
 
        ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
        if (ret) {
                free_extent_buffer(buf);
-               return NULL;
+               return ERR_PTR(ret);
        }
        return buf;
 
@@ -1509,20 +1509,19 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
        generation = btrfs_root_generation(&root->root_item);
        root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
                                     generation);
-       if (!root->node) {
-               ret = -ENOMEM;
+       if (IS_ERR(root->node)) {
+               ret = PTR_ERR(root->node);
                goto find_fail;
        } else if (!btrfs_buffer_uptodate(root->node, generation, 0)) {
                ret = -EIO;
-               goto read_fail;
+               free_extent_buffer(root->node);
+               goto find_fail;
        }
        root->commit_root = btrfs_root_node(root);
 out:
        btrfs_free_path(path);
        return root;
 
-read_fail:
-       free_extent_buffer(root->node);
 find_fail:
        kfree(root);
 alloc_fail:
@@ -2320,8 +2319,12 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
 
        log_tree_root->node = read_tree_block(tree_root, bytenr,
                        fs_info->generation + 1);
-       if (!log_tree_root->node ||
-           !extent_buffer_uptodate(log_tree_root->node)) {
+       if (IS_ERR(log_tree_root->node)) {
+               printk(KERN_ERR "BTRFS: failed to read log tree\n");
+               ret = PTR_ERR(log_tree_root->node);
+               kfree(log_tree_root);
+               return ret;
+       } else if (!extent_buffer_uptodate(log_tree_root->node)) {
                printk(KERN_ERR "BTRFS: failed to read log tree\n");
                free_extent_buffer(log_tree_root->node);
                kfree(log_tree_root);
@@ -2494,7 +2497,6 @@ int open_ctree(struct super_block *sb,
        seqlock_init(&fs_info->profiles_lock);
        init_rwsem(&fs_info->delayed_iput_sem);
 
-       init_completion(&fs_info->kobj_unregister);
        INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
        INIT_LIST_HEAD(&fs_info->space_info);
        INIT_LIST_HEAD(&fs_info->tree_mod_seq_list);
@@ -2797,8 +2799,8 @@ int open_ctree(struct super_block *sb,
        chunk_root->node = read_tree_block(chunk_root,
                                           btrfs_super_chunk_root(disk_super),
                                           generation);
-       if (!chunk_root->node ||
-           !test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) {
+       if (IS_ERR(chunk_root->node) ||
+           !extent_buffer_uptodate(chunk_root->node)) {
                printk(KERN_ERR "BTRFS: failed to read chunk root on %s\n",
                       sb->s_id);
                goto fail_tree_roots;
@@ -2834,8 +2836,8 @@ retry_root_backup:
        tree_root->node = read_tree_block(tree_root,
                                          btrfs_super_root(disk_super),
                                          generation);
-       if (!tree_root->node ||
-           !test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) {
+       if (IS_ERR(tree_root->node) ||
+           !extent_buffer_uptodate(tree_root->node)) {
                printk(KERN_WARNING "BTRFS: failed to read tree root on %s\n",
                       sb->s_id);
 
@@ -2874,10 +2876,22 @@ retry_root_backup:
 
        btrfs_close_extra_devices(fs_devices, 1);
 
+       ret = btrfs_sysfs_add_fsid(fs_devices, NULL);
+       if (ret) {
+               pr_err("BTRFS: failed to init sysfs fsid interface: %d\n", ret);
+               goto fail_block_groups;
+       }
+
+       ret = btrfs_sysfs_add_device(fs_devices);
+       if (ret) {
+               pr_err("BTRFS: failed to init sysfs device interface: %d\n", ret);
+               goto fail_fsdev_sysfs;
+       }
+
        ret = btrfs_sysfs_add_one(fs_info);
        if (ret) {
                pr_err("BTRFS: failed to init sysfs interface: %d\n", ret);
-               goto fail_block_groups;
+               goto fail_fsdev_sysfs;
        }
 
        ret = btrfs_init_space_info(fs_info);
@@ -3055,6 +3069,9 @@ fail_cleaner:
 fail_sysfs:
        btrfs_sysfs_remove_one(fs_info);
 
+fail_fsdev_sysfs:
+       btrfs_sysfs_remove_fsid(fs_info->fs_devices);
+
 fail_block_groups:
        btrfs_put_block_group_cache(fs_info);
        btrfs_free_block_groups(fs_info);
@@ -3725,6 +3742,7 @@ void close_ctree(struct btrfs_root *root)
        }
 
        btrfs_sysfs_remove_one(fs_info);
+       btrfs_sysfs_remove_fsid(fs_info->fs_devices);
 
        btrfs_free_fs_roots(fs_info);
 
@@ -4053,6 +4071,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
 
        while ((node = rb_first(&delayed_refs->href_root)) != NULL) {
                struct btrfs_delayed_ref_head *head;
+               struct btrfs_delayed_ref_node *tmp;
                bool pin_bytes = false;
 
                head = rb_entry(node, struct btrfs_delayed_ref_head,
@@ -4068,11 +4087,10 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
                        continue;
                }
                spin_lock(&head->lock);
-               while ((node = rb_first(&head->ref_root)) != NULL) {
-                       ref = rb_entry(node, struct btrfs_delayed_ref_node,
-                                      rb_node);
+               list_for_each_entry_safe_reverse(ref, tmp, &head->ref_list,
+                                                list) {
                        ref->in_tree = 0;
-                       rb_erase(&ref->rb_node, &head->ref_root);
+                       list_del(&ref->list);
                        atomic_dec(&delayed_refs->num_entries);
                        btrfs_put_delayed_ref(ref);
                }
index 0ec3acd14cbf5e1273f331231f09165710d80d91..38b76cc02f486db1e9d7f887860aba8137d61205 100644 (file)
@@ -79,11 +79,10 @@ static int update_block_group(struct btrfs_trans_handle *trans,
                              u64 num_bytes, int alloc);
 static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root,
-                               u64 bytenr, u64 num_bytes, u64 parent,
+                               struct btrfs_delayed_ref_node *node, u64 parent,
                                u64 root_objectid, u64 owner_objectid,
                                u64 owner_offset, int refs_to_drop,
-                               struct btrfs_delayed_extent_op *extra_op,
-                               int no_quota);
+                               struct btrfs_delayed_extent_op *extra_op);
 static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
                                    struct extent_buffer *leaf,
                                    struct btrfs_extent_item *ei);
@@ -1967,10 +1966,9 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
 
 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
                                  struct btrfs_root *root,
-                                 u64 bytenr, u64 num_bytes,
+                                 struct btrfs_delayed_ref_node *node,
                                  u64 parent, u64 root_objectid,
                                  u64 owner, u64 offset, int refs_to_add,
-                                 int no_quota,
                                  struct btrfs_delayed_extent_op *extent_op)
 {
        struct btrfs_fs_info *fs_info = root->fs_info;
@@ -1978,9 +1976,11 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
        struct extent_buffer *leaf;
        struct btrfs_extent_item *item;
        struct btrfs_key key;
+       u64 bytenr = node->bytenr;
+       u64 num_bytes = node->num_bytes;
        u64 refs;
        int ret;
-       enum btrfs_qgroup_operation_type type = BTRFS_QGROUP_OPER_ADD_EXCL;
+       int no_quota = node->no_quota;
 
        path = btrfs_alloc_path();
        if (!path)
@@ -1996,26 +1996,8 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
                                           bytenr, num_bytes, parent,
                                           root_objectid, owner, offset,
                                           refs_to_add, extent_op);
-       if ((ret < 0 && ret != -EAGAIN) || (!ret && no_quota))
+       if ((ret < 0 && ret != -EAGAIN) || !ret)
                goto out;
-       /*
-        * Ok we were able to insert an inline extent and it appears to be a new
-        * reference, deal with the qgroup accounting.
-        */
-       if (!ret && !no_quota) {
-               ASSERT(root->fs_info->quota_enabled);
-               leaf = path->nodes[0];
-               btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
-               item = btrfs_item_ptr(leaf, path->slots[0],
-                                     struct btrfs_extent_item);
-               if (btrfs_extent_refs(leaf, item) > (u64)refs_to_add)
-                       type = BTRFS_QGROUP_OPER_ADD_SHARED;
-               btrfs_release_path(path);
-
-               ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid,
-                                             bytenr, num_bytes, type, 0);
-               goto out;
-       }
 
        /*
         * Ok we had -EAGAIN which means we didn't have space to insert and
@@ -2026,8 +2008,6 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
        btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
        item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
        refs = btrfs_extent_refs(leaf, item);
-       if (refs)
-               type = BTRFS_QGROUP_OPER_ADD_SHARED;
        btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
        if (extent_op)
                __run_delayed_extent_op(extent_op, leaf, item);
@@ -2035,13 +2015,6 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
        btrfs_mark_buffer_dirty(leaf);
        btrfs_release_path(path);
 
-       if (!no_quota) {
-               ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid,
-                                             bytenr, num_bytes, type, 0);
-               if (ret)
-                       goto out;
-       }
-
        path->reada = 1;
        path->leave_spinning = 1;
        /* now insert the actual backref */
@@ -2087,17 +2060,15 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
                                                 ref->objectid, ref->offset,
                                                 &ins, node->ref_mod);
        } else if (node->action == BTRFS_ADD_DELAYED_REF) {
-               ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
-                                            node->num_bytes, parent,
+               ret = __btrfs_inc_extent_ref(trans, root, node, parent,
                                             ref_root, ref->objectid,
                                             ref->offset, node->ref_mod,
-                                            node->no_quota, extent_op);
+                                            extent_op);
        } else if (node->action == BTRFS_DROP_DELAYED_REF) {
-               ret = __btrfs_free_extent(trans, root, node->bytenr,
-                                         node->num_bytes, parent,
+               ret = __btrfs_free_extent(trans, root, node, parent,
                                          ref_root, ref->objectid,
                                          ref->offset, node->ref_mod,
-                                         extent_op, node->no_quota);
+                                         extent_op);
        } else {
                BUG();
        }
@@ -2255,15 +2226,14 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
                                                ref->level, &ins,
                                                node->no_quota);
        } else if (node->action == BTRFS_ADD_DELAYED_REF) {
-               ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
-                                            node->num_bytes, parent, ref_root,
-                                            ref->level, 0, 1, node->no_quota,
+               ret = __btrfs_inc_extent_ref(trans, root, node,
+                                            parent, ref_root,
+                                            ref->level, 0, 1,
                                             extent_op);
        } else if (node->action == BTRFS_DROP_DELAYED_REF) {
-               ret = __btrfs_free_extent(trans, root, node->bytenr,
-                                         node->num_bytes, parent, ref_root,
-                                         ref->level, 0, 1, extent_op,
-                                         node->no_quota);
+               ret = __btrfs_free_extent(trans, root, node,
+                                         parent, ref_root,
+                                         ref->level, 0, 1, extent_op);
        } else {
                BUG();
        }
@@ -2323,28 +2293,14 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
        return ret;
 }
 
-static noinline struct btrfs_delayed_ref_node *
+static inline struct btrfs_delayed_ref_node *
 select_delayed_ref(struct btrfs_delayed_ref_head *head)
 {
-       struct rb_node *node;
-       struct btrfs_delayed_ref_node *ref, *last = NULL;;
+       if (list_empty(&head->ref_list))
+               return NULL;
 
-       /*
-        * select delayed ref of type BTRFS_ADD_DELAYED_REF first.
-        * this prevents ref count from going down to zero when
-        * there still are pending delayed ref.
-        */
-       node = rb_first(&head->ref_root);
-       while (node) {
-               ref = rb_entry(node, struct btrfs_delayed_ref_node,
-                               rb_node);
-               if (ref->action == BTRFS_ADD_DELAYED_REF)
-                       return ref;
-               else if (last == NULL)
-                       last = ref;
-               node = rb_next(node);
-       }
-       return last;
+       return list_entry(head->ref_list.next, struct btrfs_delayed_ref_node,
+                         list);
 }
 
 /*
@@ -2396,16 +2352,7 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
                        }
                }
 
-               /*
-                * We need to try and merge add/drops of the same ref since we
-                * can run into issues with relocate dropping the implicit ref
-                * and then it being added back again before the drop can
-                * finish.  If we merged anything we need to re-loop so we can
-                * get a good ref.
-                */
                spin_lock(&locked_ref->lock);
-               btrfs_merge_delayed_refs(trans, fs_info, delayed_refs,
-                                        locked_ref);
 
                /*
                 * locked_ref is the head node, so we have to go one
@@ -2482,7 +2429,7 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
                        spin_unlock(&locked_ref->lock);
                        spin_lock(&delayed_refs->lock);
                        spin_lock(&locked_ref->lock);
-                       if (rb_first(&locked_ref->ref_root) ||
+                       if (!list_empty(&locked_ref->ref_list) ||
                            locked_ref->extent_op) {
                                spin_unlock(&locked_ref->lock);
                                spin_unlock(&delayed_refs->lock);
@@ -2496,7 +2443,7 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
                } else {
                        actual_count++;
                        ref->in_tree = 0;
-                       rb_erase(&ref->rb_node, &locked_ref->ref_root);
+                       list_del(&ref->list);
                }
                atomic_dec(&delayed_refs->num_entries);
 
@@ -2864,9 +2811,6 @@ again:
                goto again;
        }
 out:
-       ret = btrfs_delayed_qgroup_accounting(trans, root->fs_info);
-       if (ret)
-               return ret;
        assert_qgroups_uptodate(trans);
        return 0;
 }
@@ -2905,7 +2849,6 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
        struct btrfs_delayed_ref_node *ref;
        struct btrfs_delayed_data_ref *data_ref;
        struct btrfs_delayed_ref_root *delayed_refs;
-       struct rb_node *node;
        int ret = 0;
 
        delayed_refs = &trans->transaction->delayed_refs;
@@ -2934,11 +2877,7 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
        spin_unlock(&delayed_refs->lock);
 
        spin_lock(&head->lock);
-       node = rb_first(&head->ref_root);
-       while (node) {
-               ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
-               node = rb_next(node);
-
+       list_for_each_entry(ref, &head->ref_list, list) {
                /* If it's a shared ref we know a cross reference exists */
                if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) {
                        ret = 1;
@@ -3693,7 +3632,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
                found->disk_total += total_bytes * factor;
                found->bytes_used += bytes_used;
                found->disk_used += bytes_used * factor;
-               found->full = 0;
+               if (total_bytes > 0)
+                       found->full = 0;
                spin_unlock(&found->lock);
                *space_info = found;
                return 0;
@@ -3721,7 +3661,10 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
        found->bytes_reserved = 0;
        found->bytes_readonly = 0;
        found->bytes_may_use = 0;
-       found->full = 0;
+       if (total_bytes > 0)
+               found->full = 0;
+       else
+               found->full = 1;
        found->force_alloc = CHUNK_ALLOC_NO_FORCE;
        found->chunk_alloc = 0;
        found->flush = 0;
@@ -3975,6 +3918,9 @@ commit_trans:
                    !atomic_read(&root->fs_info->open_ioctl_trans)) {
                        need_commit--;
 
+                       if (need_commit > 0)
+                               btrfs_wait_ordered_roots(fs_info, -1);
+
                        trans = btrfs_join_transaction(root);
                        if (IS_ERR(trans))
                                return PTR_ERR(trans);
@@ -4088,7 +4034,7 @@ static int should_alloc_chunk(struct btrfs_root *root,
        return 1;
 }
 
-static u64 get_system_chunk_thresh(struct btrfs_root *root, u64 type)
+static u64 get_profile_num_devs(struct btrfs_root *root, u64 type)
 {
        u64 num_dev;
 
@@ -4102,24 +4048,43 @@ static u64 get_system_chunk_thresh(struct btrfs_root *root, u64 type)
        else
                num_dev = 1;    /* DUP or single */
 
-       /* metadata for updaing devices and chunk tree */
-       return btrfs_calc_trans_metadata_size(root, num_dev + 1);
+       return num_dev;
 }
 
-static void check_system_chunk(struct btrfs_trans_handle *trans,
-                              struct btrfs_root *root, u64 type)
+/*
+ * If @is_allocation is true, reserve space in the system space info necessary
+ * for allocating a chunk, otherwise if it's false, reserve space necessary for
+ * removing a chunk.
+ */
+void check_system_chunk(struct btrfs_trans_handle *trans,
+                       struct btrfs_root *root,
+                       u64 type)
 {
        struct btrfs_space_info *info;
        u64 left;
        u64 thresh;
+       int ret = 0;
+       u64 num_devs;
+
+       /*
+        * Needed because we can end up allocating a system chunk and for an
+        * atomic and race free space reservation in the chunk block reserve.
+        */
+       ASSERT(mutex_is_locked(&root->fs_info->chunk_mutex));
 
        info = __find_space_info(root->fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
        spin_lock(&info->lock);
        left = info->total_bytes - info->bytes_used - info->bytes_pinned -
-               info->bytes_reserved - info->bytes_readonly;
+               info->bytes_reserved - info->bytes_readonly -
+               info->bytes_may_use;
        spin_unlock(&info->lock);
 
-       thresh = get_system_chunk_thresh(root, type);
+       num_devs = get_profile_num_devs(root, type);
+
+       /* num_devs device items to update and 1 chunk item to add or remove */
+       thresh = btrfs_calc_trunc_metadata_size(root, num_devs) +
+               btrfs_calc_trans_metadata_size(root, 1);
+
        if (left < thresh && btrfs_test_opt(root, ENOSPC_DEBUG)) {
                btrfs_info(root->fs_info, "left=%llu, need=%llu, flags=%llu",
                        left, thresh, type);
@@ -4130,7 +4095,21 @@ static void check_system_chunk(struct btrfs_trans_handle *trans,
                u64 flags;
 
                flags = btrfs_get_alloc_profile(root->fs_info->chunk_root, 0);
-               btrfs_alloc_chunk(trans, root, flags);
+               /*
+                * Ignore failure to create system chunk. We might end up not
+                * needing it, as we might not need to COW all nodes/leafs from
+                * the paths we visit in the chunk tree (they were already COWed
+                * or created in the current transaction for example).
+                */
+               ret = btrfs_alloc_chunk(trans, root, flags);
+       }
+
+       if (!ret) {
+               ret = btrfs_block_rsv_add(root->fs_info->chunk_root,
+                                         &root->fs_info->chunk_block_rsv,
+                                         thresh, BTRFS_RESERVE_NO_FLUSH);
+               if (!ret)
+                       trans->chunk_bytes_reserved += thresh;
        }
 }
 
@@ -5188,6 +5167,24 @@ void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
        trans->bytes_reserved = 0;
 }
 
+/*
+ * To be called after all the new block groups attached to the transaction
+ * handle have been created (btrfs_create_pending_block_groups()).
+ */
+void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans)
+{
+       struct btrfs_fs_info *fs_info = trans->root->fs_info;
+
+       if (!trans->chunk_bytes_reserved)
+               return;
+
+       WARN_ON_ONCE(!list_empty(&trans->new_bgs));
+
+       block_rsv_release_bytes(fs_info, &fs_info->chunk_block_rsv, NULL,
+                               trans->chunk_bytes_reserved);
+       trans->chunk_bytes_reserved = 0;
+}
+
 /* Can only return 0 or -ENOSPC */
 int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
                                  struct inode *inode)
@@ -6092,11 +6089,10 @@ static void add_pinned_bytes(struct btrfs_fs_info *fs_info, u64 num_bytes,
 
 static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root,
-                               u64 bytenr, u64 num_bytes, u64 parent,
+                               struct btrfs_delayed_ref_node *node, u64 parent,
                                u64 root_objectid, u64 owner_objectid,
                                u64 owner_offset, int refs_to_drop,
-                               struct btrfs_delayed_extent_op *extent_op,
-                               int no_quota)
+                               struct btrfs_delayed_extent_op *extent_op)
 {
        struct btrfs_key key;
        struct btrfs_path *path;
@@ -6110,10 +6106,12 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
        int extent_slot = 0;
        int found_extent = 0;
        int num_to_del = 1;
+       int no_quota = node->no_quota;
        u32 item_size;
        u64 refs;
+       u64 bytenr = node->bytenr;
+       u64 num_bytes = node->num_bytes;
        int last_ref = 0;
-       enum btrfs_qgroup_operation_type type = BTRFS_QGROUP_OPER_SUB_EXCL;
        bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
                                                 SKINNY_METADATA);
 
@@ -6294,7 +6292,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
        refs -= refs_to_drop;
 
        if (refs > 0) {
-               type = BTRFS_QGROUP_OPER_SUB_SHARED;
                if (extent_op)
                        __run_delayed_extent_op(extent_op, leaf, ei);
                /*
@@ -6356,18 +6353,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
        }
        btrfs_release_path(path);
 
-       /* Deal with the quota accounting */
-       if (!ret && last_ref && !no_quota) {
-               int mod_seq = 0;
-
-               if (owner_objectid >= BTRFS_FIRST_FREE_OBJECTID &&
-                   type == BTRFS_QGROUP_OPER_SUB_SHARED)
-                       mod_seq = 1;
-
-               ret = btrfs_qgroup_record_ref(trans, info, root_objectid,
-                                             bytenr, num_bytes, type,
-                                             mod_seq);
-       }
 out:
        btrfs_free_path(path);
        return ret;
@@ -6393,7 +6378,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
                goto out_delayed_unlock;
 
        spin_lock(&head->lock);
-       if (rb_first(&head->ref_root))
+       if (!list_empty(&head->ref_list))
                goto out;
 
        if (head->extent_op) {
@@ -7303,13 +7288,6 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
        btrfs_mark_buffer_dirty(path->nodes[0]);
        btrfs_free_path(path);
 
-       /* Always set parent to 0 here since its exclusive anyway. */
-       ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid,
-                                     ins->objectid, ins->offset,
-                                     BTRFS_QGROUP_OPER_ADD_EXCL, 0);
-       if (ret)
-               return ret;
-
        ret = update_block_group(trans, root, ins->objectid, ins->offset, 1);
        if (ret) { /* -ENOENT, logic error */
                btrfs_err(fs_info, "update block group failed for %llu %llu",
@@ -7391,14 +7369,6 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
        btrfs_mark_buffer_dirty(leaf);
        btrfs_free_path(path);
 
-       if (!no_quota) {
-               ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid,
-                                             ins->objectid, num_bytes,
-                                             BTRFS_QGROUP_OPER_ADD_EXCL, 0);
-               if (ret)
-                       return ret;
-       }
-
        ret = update_block_group(trans, root, ins->objectid, root->nodesize,
                                 1);
        if (ret) { /* -ENOENT, logic error */
@@ -7755,12 +7725,18 @@ reada:
        wc->reada_slot = slot;
 }
 
+/*
+ * TODO: Modify related function to add related node/leaf to dirty_extent_root,
+ * for later qgroup accounting.
+ *
+ * Current, this function does nothing.
+ */
 static int account_leaf_items(struct btrfs_trans_handle *trans,
                              struct btrfs_root *root,
                              struct extent_buffer *eb)
 {
        int nr = btrfs_header_nritems(eb);
-       int i, extent_type, ret;
+       int i, extent_type;
        struct btrfs_key key;
        struct btrfs_file_extent_item *fi;
        u64 bytenr, num_bytes;
@@ -7783,13 +7759,6 @@ static int account_leaf_items(struct btrfs_trans_handle *trans,
                        continue;
 
                num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
-
-               ret = btrfs_qgroup_record_ref(trans, root->fs_info,
-                                             root->objectid,
-                                             bytenr, num_bytes,
-                                             BTRFS_QGROUP_OPER_SUB_SUBTREE, 0);
-               if (ret)
-                       return ret;
        }
        return 0;
 }
@@ -7858,6 +7827,8 @@ static int adjust_slots_upwards(struct btrfs_root *root,
 
 /*
  * root_eb is the subtree root and is locked before this function is called.
+ * TODO: Modify this function to mark all (including complete shared node)
+ * to dirty_extent_root to allow it get accounted in qgroup.
  */
 static int account_shared_subtree(struct btrfs_trans_handle *trans,
                                  struct btrfs_root *root,
@@ -7920,7 +7891,11 @@ walk_down:
                        child_gen = btrfs_node_ptr_generation(eb, parent_slot);
 
                        eb = read_tree_block(root, child_bytenr, child_gen);
-                       if (!eb || !extent_buffer_uptodate(eb)) {
+                       if (IS_ERR(eb)) {
+                               ret = PTR_ERR(eb);
+                               goto out;
+                       } else if (!extent_buffer_uptodate(eb)) {
+                               free_extent_buffer(eb);
                                ret = -EIO;
                                goto out;
                        }
@@ -7931,16 +7906,6 @@ walk_down:
                        btrfs_tree_read_lock(eb);
                        btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
                        path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
-
-                       ret = btrfs_qgroup_record_ref(trans, root->fs_info,
-                                               root->objectid,
-                                               child_bytenr,
-                                               root->nodesize,
-                                               BTRFS_QGROUP_OPER_SUB_SUBTREE,
-                                               0);
-                       if (ret)
-                               goto out;
-
                }
 
                if (level == 0) {
@@ -8151,7 +8116,9 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
                if (reada && level == 1)
                        reada_walk_down(trans, root, wc, path);
                next = read_tree_block(root, bytenr, generation);
-               if (!next || !extent_buffer_uptodate(next)) {
+               if (IS_ERR(next)) {
+                       return PTR_ERR(next);
+               } else if (!extent_buffer_uptodate(next)) {
                        free_extent_buffer(next);
                        return -EIO;
                }
@@ -8533,24 +8500,6 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
                                goto out_end_trans;
                        }
 
-                       /*
-                        * Qgroup update accounting is run from
-                        * delayed ref handling. This usually works
-                        * out because delayed refs are normally the
-                        * only way qgroup updates are added. However,
-                        * we may have added updates during our tree
-                        * walk so run qgroups here to make sure we
-                        * don't lose any updates.
-                        */
-                       ret = btrfs_delayed_qgroup_accounting(trans,
-                                                             root->fs_info);
-                       if (ret)
-                               printk_ratelimited(KERN_ERR "BTRFS: Failure %d "
-                                                  "running qgroup updates "
-                                                  "during snapshot delete. "
-                                                  "Quota is out of sync, "
-                                                  "rescan required.\n", ret);
-
                        btrfs_end_transaction_throttle(trans, tree_root);
                        if (!for_reloc && btrfs_need_cleaner_sleep(root)) {
                                pr_debug("BTRFS: drop snapshot early exit\n");
@@ -8604,14 +8553,6 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
        }
        root_dropped = true;
 out_end_trans:
-       ret = btrfs_delayed_qgroup_accounting(trans, tree_root->fs_info);
-       if (ret)
-               printk_ratelimited(KERN_ERR "BTRFS: Failure %d "
-                                  "running qgroup updates "
-                                  "during snapshot delete. "
-                                  "Quota is out of sync, "
-                                  "rescan required.\n", ret);
-
        btrfs_end_transaction_throttle(trans, tree_root);
 out_free:
        kfree(wc);
@@ -9562,6 +9503,19 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
 
        free_excluded_extents(root, cache);
 
+       /*
+        * Call to ensure the corresponding space_info object is created and
+        * assigned to our block group, but don't update its counters just yet.
+        * We want our bg to be added to the rbtree with its ->space_info set.
+        */
+       ret = update_space_info(root->fs_info, cache->flags, 0, 0,
+                               &cache->space_info);
+       if (ret) {
+               btrfs_remove_free_space_cache(cache);
+               btrfs_put_block_group(cache);
+               return ret;
+       }
+
        ret = btrfs_add_block_group_cache(root->fs_info, cache);
        if (ret) {
                btrfs_remove_free_space_cache(cache);
@@ -9569,6 +9523,10 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
                return ret;
        }
 
+       /*
+        * Now that our block group has its ->space_info set and is inserted in
+        * the rbtree, update the space info's counters.
+        */
        ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
                                &cache->space_info);
        if (ret) {
diff --git a/fs/btrfs/extent-tree.h b/fs/btrfs/extent-tree.h
new file mode 100644 (file)
index 0000000..e69de29
index c374e1e71e5f3e1b80713f39fc4338fb659a8c90..02d05817cbdfe8330add4bbefd424ea1058cb09d 100644 (file)
@@ -1277,7 +1277,12 @@ int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
 int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
                      unsigned bits, gfp_t mask)
 {
-       return clear_extent_bit(tree, start, end, bits, 0, 0, NULL, mask);
+       int wake = 0;
+
+       if (bits & EXTENT_LOCKED)
+               wake = 1;
+
+       return clear_extent_bit(tree, start, end, bits, wake, 0, NULL, mask);
 }
 
 int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
@@ -4490,6 +4495,8 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                }
                if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
                        flags |= FIEMAP_EXTENT_ENCODED;
+               if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
+                       flags |= FIEMAP_EXTENT_UNWRITTEN;
 
                free_extent_map(em);
                em = NULL;
index b072e17479aa8e26a32ede03fa8b5315ca1f9a7e..795d754327a7277de47d13e0f1426aaa5c7fd85c 100644 (file)
@@ -1868,6 +1868,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
        struct btrfs_log_ctx ctx;
        int ret = 0;
        bool full_sync = 0;
+       const u64 len = end - start + 1;
 
        trace_btrfs_sync_file(file, datasync);
 
@@ -1896,7 +1897,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
                 * all extents are persisted and the respective file extent
                 * items are in the fs/subvol btree.
                 */
-               ret = btrfs_wait_ordered_range(inode, start, end - start + 1);
+               ret = btrfs_wait_ordered_range(inode, start, len);
        } else {
                /*
                 * Start any new ordered operations before starting to log the
@@ -1968,8 +1969,10 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
         */
        smp_mb();
        if (btrfs_inode_in_log(inode, root->fs_info->generation) ||
-           (full_sync && BTRFS_I(inode)->last_trans <=
-            root->fs_info->last_trans_committed)) {
+           (BTRFS_I(inode)->last_trans <=
+            root->fs_info->last_trans_committed &&
+            (full_sync ||
+             !btrfs_have_ordered_extents_in_range(inode, start, len)))) {
                /*
                 * We'v had everything committed since the last time we were
                 * modified so clear this flag in case it was set for whatever
index 9dbe5b548fa6a74029960de0ea1d8ebf63f835e8..fb5a6b1c62a65684b73ab7b0f6a2f2d918615935 100644 (file)
@@ -231,6 +231,7 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root,
 {
        int ret = 0;
        struct btrfs_path *path = btrfs_alloc_path();
+       bool locked = false;
 
        if (!path) {
                ret = -ENOMEM;
@@ -238,6 +239,7 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root,
        }
 
        if (block_group) {
+               locked = true;
                mutex_lock(&trans->transaction->cache_write_mutex);
                if (!list_empty(&block_group->io_list)) {
                        list_del_init(&block_group->io_list);
@@ -269,18 +271,14 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root,
         */
        ret = btrfs_truncate_inode_items(trans, root, inode,
                                         0, BTRFS_EXTENT_DATA_KEY);
-       if (ret) {
-               mutex_unlock(&trans->transaction->cache_write_mutex);
-               btrfs_abort_transaction(trans, root, ret);
-               return ret;
-       }
+       if (ret)
+               goto fail;
 
        ret = btrfs_update_inode(trans, root, inode);
 
-       if (block_group)
-               mutex_unlock(&trans->transaction->cache_write_mutex);
-
 fail:
+       if (locked)
+               mutex_unlock(&trans->transaction->cache_write_mutex);
        if (ret)
                btrfs_abort_transaction(trans, root, ret);
 
index 8bb013672aee061e81eb03fcba6d51db9cd169af..855935f6671ae59b1b025c3916d553bc689191ef 100644 (file)
@@ -4986,24 +4986,40 @@ static void evict_inode_truncate_pages(struct inode *inode)
        }
        write_unlock(&map_tree->lock);
 
+       /*
+        * Keep looping until we have no more ranges in the io tree.
+        * We can have ongoing bios started by readpages (called from readahead)
+        * that didn't get their end io callbacks called yet or they are still
+        * in progress ((extent_io.c:end_bio_extent_readpage()). This means some
+        * ranges can still be locked and eviction started because before
+        * submitting those bios, which are executed by a separate task (work
+        * queue kthread), inode references (inode->i_count) were not taken
+        * (which would be dropped in the end io callback of each bio).
+        * Therefore here we effectively end up waiting for those bios and
+        * anyone else holding locked ranges without having bumped the inode's
+        * reference count - if we don't do it, when they access the inode's
+        * io_tree to unlock a range it may be too late, leading to an
+        * use-after-free issue.
+        */
        spin_lock(&io_tree->lock);
        while (!RB_EMPTY_ROOT(&io_tree->state)) {
                struct extent_state *state;
                struct extent_state *cached_state = NULL;
+               u64 start;
+               u64 end;
 
                node = rb_first(&io_tree->state);
                state = rb_entry(node, struct extent_state, rb_node);
-               atomic_inc(&state->refs);
+               start = state->start;
+               end = state->end;
                spin_unlock(&io_tree->lock);
 
-               lock_extent_bits(io_tree, state->start, state->end,
-                                0, &cached_state);
-               clear_extent_bit(io_tree, state->start, state->end,
+               lock_extent_bits(io_tree, start, end, 0, &cached_state);
+               clear_extent_bit(io_tree, start, end,
                                 EXTENT_LOCKED | EXTENT_DIRTY |
                                 EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
                                 EXTENT_DEFRAG, 1, 1,
                                 &cached_state, GFP_NOFS);
-               free_extent_state(state);
 
                cond_resched();
                spin_lock(&io_tree->lock);
index 1c22c65185045c61b170db3f1db8f2c3627bbd84..c86b835da7a8739ec16396d589ccb651278ab44d 100644 (file)
@@ -553,8 +553,8 @@ static noinline int create_subvol(struct inode *dir,
        key.offset = (u64)-1;
        new_root = btrfs_read_fs_root_no_name(root->fs_info, &key);
        if (IS_ERR(new_root)) {
-               btrfs_abort_transaction(trans, root, PTR_ERR(new_root));
                ret = PTR_ERR(new_root);
+               btrfs_abort_transaction(trans, root, ret);
                goto fail;
        }
 
@@ -1318,7 +1318,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
                i = range->start >> PAGE_CACHE_SHIFT;
        }
        if (!max_to_defrag)
-               max_to_defrag = last_index + 1;
+               max_to_defrag = last_index - i + 1;
 
        /*
         * make writeback starts from i, so the defrag range can be
@@ -1368,7 +1368,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
                        ra_index = max(i, ra_index);
                        btrfs_force_ra(inode->i_mapping, ra, file, ra_index,
                                       cluster);
-                       ra_index += max_cluster;
+                       ra_index += cluster;
                }
 
                mutex_lock(&inode->i_mutex);
@@ -2271,10 +2271,7 @@ static noinline int btrfs_ioctl_ino_lookup(struct file *file,
 {
         struct btrfs_ioctl_ino_lookup_args *args;
         struct inode *inode;
-        int ret;
-
-       if (!capable(CAP_SYS_ADMIN))
-               return -EPERM;
+       int ret = 0;
 
        args = memdup_user(argp, sizeof(*args));
        if (IS_ERR(args))
@@ -2282,13 +2279,28 @@ static noinline int btrfs_ioctl_ino_lookup(struct file *file,
 
        inode = file_inode(file);
 
+       /*
+        * Unprivileged query to obtain the containing subvolume root id. The
+        * path is reset so it's consistent with btrfs_search_path_in_tree.
+        */
        if (args->treeid == 0)
                args->treeid = BTRFS_I(inode)->root->root_key.objectid;
 
+       if (args->objectid == BTRFS_FIRST_FREE_OBJECTID) {
+               args->name[0] = 0;
+               goto out;
+       }
+
+       if (!capable(CAP_SYS_ADMIN)) {
+               ret = -EPERM;
+               goto out;
+       }
+
        ret = btrfs_search_path_in_tree(BTRFS_I(inode)->root->fs_info,
                                        args->treeid, args->objectid,
                                        args->name);
 
+out:
        if (ret == 0 && copy_to_user(argp, args, sizeof(*args)))
                ret = -EFAULT;
 
@@ -2413,8 +2425,6 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
                goto out_unlock_inode;
        }
 
-       d_invalidate(dentry);
-
        down_write(&root->fs_info->subvol_sem);
 
        err = may_destroy_subvol(dest);
@@ -2508,7 +2518,7 @@ out_up_write:
 out_unlock_inode:
        mutex_unlock(&inode->i_mutex);
        if (!err) {
-               shrink_dcache_sb(root->fs_info->sb);
+               d_invalidate(dentry);
                btrfs_invalidate_inodes(dest);
                d_delete(dentry);
                ASSERT(dest->send_in_progress == 0);
@@ -2879,12 +2889,19 @@ static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst,
        return ret;
 }
 
-static int extent_same_check_offsets(struct inode *inode, u64 off, u64 len)
+static int extent_same_check_offsets(struct inode *inode, u64 off, u64 *plen,
+                                    u64 olen)
 {
+       u64 len = *plen;
        u64 bs = BTRFS_I(inode)->root->fs_info->sb->s_blocksize;
 
-       if (off + len > inode->i_size || off + len < off)
+       if (off + olen > inode->i_size || off + olen < off)
                return -EINVAL;
+
+       /* if we extend to eof, continue to block boundary */
+       if (off + len == inode->i_size)
+               *plen = len = ALIGN(inode->i_size, bs) - off;
+
        /* Check that we are block aligned - btrfs_clone() requires this */
        if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs))
                return -EINVAL;
@@ -2892,10 +2909,11 @@ static int extent_same_check_offsets(struct inode *inode, u64 off, u64 len)
        return 0;
 }
 
-static int btrfs_extent_same(struct inode *src, u64 loff, u64 len,
+static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
                             struct inode *dst, u64 dst_loff)
 {
        int ret;
+       u64 len = olen;
 
        /*
         * btrfs_clone() can't handle extents in the same file
@@ -2910,11 +2928,11 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 len,
 
        btrfs_double_lock(src, loff, dst, dst_loff, len);
 
-       ret = extent_same_check_offsets(src, loff, len);
+       ret = extent_same_check_offsets(src, loff, &len, olen);
        if (ret)
                goto out_unlock;
 
-       ret = extent_same_check_offsets(dst, dst_loff, len);
+       ret = extent_same_check_offsets(dst, dst_loff, &len, olen);
        if (ret)
                goto out_unlock;
 
@@ -2927,7 +2945,7 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 len,
 
        ret = btrfs_cmp_data(src, loff, dst, dst_loff, len);
        if (ret == 0)
-               ret = btrfs_clone(src, dst, loff, len, len, dst_loff);
+               ret = btrfs_clone(src, dst, loff, olen, len, dst_loff);
 
 out_unlock:
        btrfs_double_unlock(src, loff, dst, dst_loff, len);
index 760c4a5e096b4d5a403f7923ad4b65537a085886..89656d799ff6fa3772b52cb17d535ebdc1cbb910 100644 (file)
@@ -198,9 +198,6 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
        entry->file_offset = file_offset;
        entry->start = start;
        entry->len = len;
-       if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) &&
-           !(type == BTRFS_ORDERED_NOCOW))
-               entry->csum_bytes_left = disk_len;
        entry->disk_len = disk_len;
        entry->bytes_left = len;
        entry->inode = igrab(inode);
@@ -286,10 +283,6 @@ void btrfs_add_ordered_sum(struct inode *inode,
        tree = &BTRFS_I(inode)->ordered_tree;
        spin_lock_irq(&tree->lock);
        list_add_tail(&sum->list, &entry->list);
-       WARN_ON(entry->csum_bytes_left < sum->len);
-       entry->csum_bytes_left -= sum->len;
-       if (entry->csum_bytes_left == 0)
-               wake_up(&entry->wait);
        spin_unlock_irq(&tree->lock);
 }
 
@@ -509,7 +502,21 @@ void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans,
                wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE,
                                                   &ordered->flags));
 
-               list_add_tail(&ordered->trans_list, &trans->ordered);
+               /*
+                * If our ordered extent completed it means it updated the
+                * fs/subvol and csum trees already, so no need to make the
+                * current transaction's commit wait for it, as we end up
+                * holding memory unnecessarily and delaying the inode's iput
+                * until the transaction commit (we schedule an iput for the
+                * inode when the ordered extent's refcount drops to 0), which
+                * prevents it from being evictable until the transaction
+                * commits.
+                */
+               if (test_bit(BTRFS_ORDERED_COMPLETE, &ordered->flags))
+                       btrfs_put_ordered_extent(ordered);
+               else
+                       list_add_tail(&ordered->trans_list, &trans->ordered);
+
                spin_lock_irq(&log->log_extents_lock[index]);
        }
        spin_unlock_irq(&log->log_extents_lock[index]);
@@ -844,6 +851,20 @@ out:
        return entry;
 }
 
+bool btrfs_have_ordered_extents_in_range(struct inode *inode,
+                                        u64 file_offset,
+                                        u64 len)
+{
+       struct btrfs_ordered_extent *oe;
+
+       oe = btrfs_lookup_ordered_range(inode, file_offset, len);
+       if (oe) {
+               btrfs_put_ordered_extent(oe);
+               return true;
+       }
+       return false;
+}
+
 /*
  * lookup and return any extent before 'file_offset'.  NULL is returned
  * if none is found
index e96cd4ccd805d9220b997518e7b7e9d85ef890d4..7176cc0fe43f7074ffa3c959bb3a7eae0d3b90e2 100644 (file)
@@ -89,9 +89,6 @@ struct btrfs_ordered_extent {
        /* number of bytes that still need writing */
        u64 bytes_left;
 
-       /* number of bytes that still need csumming */
-       u64 csum_bytes_left;
-
        /*
         * the end of the ordered extent which is behind it but
         * didn't update disk_i_size. Please see the comment of
@@ -191,6 +188,9 @@ btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset);
 struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
                                                        u64 file_offset,
                                                        u64 len);
+bool btrfs_have_ordered_extents_in_range(struct inode *inode,
+                                        u64 file_offset,
+                                        u64 len);
 int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
                                struct btrfs_ordered_extent *ordered);
 int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
index 3d6546581bb9fd4f778f2c7536c54f7367717f53..d5f1f033b7a00f3c2e2826e336ae5ca5b3e31d4a 100644 (file)
@@ -34,6 +34,7 @@
 #include "extent_io.h"
 #include "qgroup.h"
 
+
 /* TODO XXX FIXME
  *  - subvol delete -> delete when ref goes to 0? delete limits also?
  *  - reorganize keys
@@ -84,11 +85,42 @@ struct btrfs_qgroup {
 
        /*
         * temp variables for accounting operations
+        * Refer to qgroup_shared_accouting() for details.
         */
        u64 old_refcnt;
        u64 new_refcnt;
 };
 
+static void btrfs_qgroup_update_old_refcnt(struct btrfs_qgroup *qg, u64 seq,
+                                          int mod)
+{
+       if (qg->old_refcnt < seq)
+               qg->old_refcnt = seq;
+       qg->old_refcnt += mod;
+}
+
+static void btrfs_qgroup_update_new_refcnt(struct btrfs_qgroup *qg, u64 seq,
+                                          int mod)
+{
+       if (qg->new_refcnt < seq)
+               qg->new_refcnt = seq;
+       qg->new_refcnt += mod;
+}
+
+static inline u64 btrfs_qgroup_get_old_refcnt(struct btrfs_qgroup *qg, u64 seq)
+{
+       if (qg->old_refcnt < seq)
+               return 0;
+       return qg->old_refcnt - seq;
+}
+
+static inline u64 btrfs_qgroup_get_new_refcnt(struct btrfs_qgroup *qg, u64 seq)
+{
+       if (qg->new_refcnt < seq)
+               return 0;
+       return qg->new_refcnt - seq;
+}
+
 /*
  * glue structure to represent the relations between qgroups.
  */
@@ -1115,14 +1147,14 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
        struct ulist *tmp;
        int ret = 0;
 
-       tmp = ulist_alloc(GFP_NOFS);
-       if (!tmp)
-               return -ENOMEM;
-
        /* Check the level of src and dst first */
        if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst))
                return -EINVAL;
 
+       tmp = ulist_alloc(GFP_NOFS);
+       if (!tmp)
+               return -ENOMEM;
+
        mutex_lock(&fs_info->qgroup_ioctl_lock);
        quota_root = fs_info->quota_root;
        if (!quota_root) {
@@ -1356,239 +1388,86 @@ out:
        return ret;
 }
 
-static int comp_oper_exist(struct btrfs_qgroup_operation *oper1,
-                          struct btrfs_qgroup_operation *oper2)
+int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans,
+                                        struct btrfs_fs_info *fs_info)
 {
-       /*
-        * Ignore seq and type here, we're looking for any operation
-        * at all related to this extent on that root.
-        */
-       if (oper1->bytenr < oper2->bytenr)
-               return -1;
-       if (oper1->bytenr > oper2->bytenr)
-               return 1;
-       if (oper1->ref_root < oper2->ref_root)
-               return -1;
-       if (oper1->ref_root > oper2->ref_root)
-               return 1;
-       return 0;
-}
+       struct btrfs_qgroup_extent_record *record;
+       struct btrfs_delayed_ref_root *delayed_refs;
+       struct rb_node *node;
+       u64 qgroup_to_skip;
+       int ret = 0;
 
-static int qgroup_oper_exists(struct btrfs_fs_info *fs_info,
-                             struct btrfs_qgroup_operation *oper)
-{
-       struct rb_node *n;
-       struct btrfs_qgroup_operation *cur;
-       int cmp;
+       delayed_refs = &trans->transaction->delayed_refs;
+       qgroup_to_skip = delayed_refs->qgroup_to_skip;
 
-       spin_lock(&fs_info->qgroup_op_lock);
-       n = fs_info->qgroup_op_tree.rb_node;
-       while (n) {
-               cur = rb_entry(n, struct btrfs_qgroup_operation, n);
-               cmp = comp_oper_exist(cur, oper);
-               if (cmp < 0) {
-                       n = n->rb_right;
-               } else if (cmp) {
-                       n = n->rb_left;
-               } else {
-                       spin_unlock(&fs_info->qgroup_op_lock);
-                       return -EEXIST;
-               }
+       /*
+        * No need to do lock, since this function will only be called in
+        * btrfs_commmit_transaction().
+        */
+       node = rb_first(&delayed_refs->dirty_extent_root);
+       while (node) {
+               record = rb_entry(node, struct btrfs_qgroup_extent_record,
+                                 node);
+               ret = btrfs_find_all_roots(NULL, fs_info, record->bytenr, 0,
+                                          &record->old_roots);
+               if (ret < 0)
+                       break;
+               if (qgroup_to_skip)
+                       ulist_del(record->old_roots, qgroup_to_skip, 0);
+               node = rb_next(node);
        }
-       spin_unlock(&fs_info->qgroup_op_lock);
-       return 0;
-}
-
-static int comp_oper(struct btrfs_qgroup_operation *oper1,
-                    struct btrfs_qgroup_operation *oper2)
-{
-       if (oper1->bytenr < oper2->bytenr)
-               return -1;
-       if (oper1->bytenr > oper2->bytenr)
-               return 1;
-       if (oper1->ref_root < oper2->ref_root)
-               return -1;
-       if (oper1->ref_root > oper2->ref_root)
-               return 1;
-       if (oper1->seq < oper2->seq)
-               return -1;
-       if (oper1->seq > oper2->seq)
-               return 1;
-       if (oper1->type < oper2->type)
-               return -1;
-       if (oper1->type > oper2->type)
-               return 1;
-       return 0;
+       return ret;
 }
 
-static int insert_qgroup_oper(struct btrfs_fs_info *fs_info,
-                             struct btrfs_qgroup_operation *oper)
+struct btrfs_qgroup_extent_record
+*btrfs_qgroup_insert_dirty_extent(struct btrfs_delayed_ref_root *delayed_refs,
+                                 struct btrfs_qgroup_extent_record *record)
 {
-       struct rb_node **p;
-       struct rb_node *parent = NULL;
-       struct btrfs_qgroup_operation *cur;
-       int cmp;
+       struct rb_node **p = &delayed_refs->dirty_extent_root.rb_node;
+       struct rb_node *parent_node = NULL;
+       struct btrfs_qgroup_extent_record *entry;
+       u64 bytenr = record->bytenr;
 
-       spin_lock(&fs_info->qgroup_op_lock);
-       p = &fs_info->qgroup_op_tree.rb_node;
        while (*p) {
-               parent = *p;
-               cur = rb_entry(parent, struct btrfs_qgroup_operation, n);
-               cmp = comp_oper(cur, oper);
-               if (cmp < 0) {
-                       p = &(*p)->rb_right;
-               } else if (cmp) {
+               parent_node = *p;
+               entry = rb_entry(parent_node, struct btrfs_qgroup_extent_record,
+                                node);
+               if (bytenr < entry->bytenr)
                        p = &(*p)->rb_left;
-               } else {
-                       spin_unlock(&fs_info->qgroup_op_lock);
-                       return -EEXIST;
-               }
-       }
-       rb_link_node(&oper->n, parent, p);
-       rb_insert_color(&oper->n, &fs_info->qgroup_op_tree);
-       spin_unlock(&fs_info->qgroup_op_lock);
-       return 0;
-}
-
-/*
- * Record a quota operation for processing later on.
- * @trans: the transaction we are adding the delayed op to.
- * @fs_info: the fs_info for this fs.
- * @ref_root: the root of the reference we are acting on,
- * @bytenr: the bytenr we are acting on.
- * @num_bytes: the number of bytes in the reference.
- * @type: the type of operation this is.
- * @mod_seq: do we need to get a sequence number for looking up roots.
- *
- * We just add it to our trans qgroup_ref_list and carry on and process these
- * operations in order at some later point.  If the reference root isn't a fs
- * root then we don't bother with doing anything.
- *
- * MUST BE HOLDING THE REF LOCK.
- */
-int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
-                           struct btrfs_fs_info *fs_info, u64 ref_root,
-                           u64 bytenr, u64 num_bytes,
-                           enum btrfs_qgroup_operation_type type, int mod_seq)
-{
-       struct btrfs_qgroup_operation *oper;
-       int ret;
-
-       if (!is_fstree(ref_root) || !fs_info->quota_enabled)
-               return 0;
-
-       oper = kmalloc(sizeof(*oper), GFP_NOFS);
-       if (!oper)
-               return -ENOMEM;
-
-       oper->ref_root = ref_root;
-       oper->bytenr = bytenr;
-       oper->num_bytes = num_bytes;
-       oper->type = type;
-       oper->seq = atomic_inc_return(&fs_info->qgroup_op_seq);
-       INIT_LIST_HEAD(&oper->elem.list);
-       oper->elem.seq = 0;
-
-       trace_btrfs_qgroup_record_ref(oper);
-
-       if (type == BTRFS_QGROUP_OPER_SUB_SUBTREE) {
-               /*
-                * If any operation for this bytenr/ref_root combo
-                * exists, then we know it's not exclusively owned and
-                * shouldn't be queued up.
-                *
-                * This also catches the case where we have a cloned
-                * extent that gets queued up multiple times during
-                * drop snapshot.
-                */
-               if (qgroup_oper_exists(fs_info, oper)) {
-                       kfree(oper);
-                       return 0;
-               }
-       }
-
-       ret = insert_qgroup_oper(fs_info, oper);
-       if (ret) {
-               /* Shouldn't happen so have an assert for developers */
-               ASSERT(0);
-               kfree(oper);
-               return ret;
+               else if (bytenr > entry->bytenr)
+                       p = &(*p)->rb_right;
+               else
+                       return entry;
        }
-       list_add_tail(&oper->list, &trans->qgroup_ref_list);
 
-       if (mod_seq)
-               btrfs_get_tree_mod_seq(fs_info, &oper->elem);
-
-       return 0;
-}
-
-static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
-                                 struct btrfs_qgroup_operation *oper)
-{
-       struct ulist *tmp;
-       int sign = 0;
-       int ret = 0;
-
-       tmp = ulist_alloc(GFP_NOFS);
-       if (!tmp)
-               return -ENOMEM;
-
-       spin_lock(&fs_info->qgroup_lock);
-       if (!fs_info->quota_root)
-               goto out;
-
-       switch (oper->type) {
-       case BTRFS_QGROUP_OPER_ADD_EXCL:
-               sign = 1;
-               break;
-       case BTRFS_QGROUP_OPER_SUB_EXCL:
-               sign = -1;
-               break;
-       default:
-               ASSERT(0);
-       }
-       ret = __qgroup_excl_accounting(fs_info, tmp, oper->ref_root,
-                                      oper->num_bytes, sign);
-out:
-       spin_unlock(&fs_info->qgroup_lock);
-       ulist_free(tmp);
-       return ret;
+       rb_link_node(&record->node, parent_node, p);
+       rb_insert_color(&record->node, &delayed_refs->dirty_extent_root);
+       return NULL;
 }
 
+#define UPDATE_NEW     0
+#define UPDATE_OLD     1
 /*
- * Walk all of the roots that pointed to our bytenr and adjust their refcnts as
- * properly.
+ * Walk all of the roots that points to the bytenr and adjust their refcnts.
  */
-static int qgroup_calc_old_refcnt(struct btrfs_fs_info *fs_info,
-                                 u64 root_to_skip, struct ulist *tmp,
-                                 struct ulist *roots, struct ulist *qgroups,
-                                 u64 seq, int *old_roots, int rescan)
+static int qgroup_update_refcnt(struct btrfs_fs_info *fs_info,
+                               struct ulist *roots, struct ulist *tmp,
+                               struct ulist *qgroups, u64 seq, int update_old)
 {
        struct ulist_node *unode;
        struct ulist_iterator uiter;
        struct ulist_node *tmp_unode;
        struct ulist_iterator tmp_uiter;
        struct btrfs_qgroup *qg;
-       int ret;
+       int ret = 0;
 
+       if (!roots)
+               return 0;
        ULIST_ITER_INIT(&uiter);
        while ((unode = ulist_next(roots, &uiter))) {
-               /* We don't count our current root here */
-               if (unode->val == root_to_skip)
-                       continue;
                qg = find_qgroup_rb(fs_info, unode->val);
                if (!qg)
                        continue;
-               /*
-                * We could have a pending removal of this same ref so we may
-                * not have actually found our ref root when doing
-                * btrfs_find_all_roots, so we need to keep track of how many
-                * old roots we find in case we removed ours and added a
-                * different one at the same time.  I don't think this could
-                * happen in practice but that sort of thinking leads to pain
-                * and suffering and to the dark side.
-                */
-               (*old_roots)++;
 
                ulist_reinit(tmp);
                ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg),
@@ -1603,29 +1482,10 @@ static int qgroup_calc_old_refcnt(struct btrfs_fs_info *fs_info,
                        struct btrfs_qgroup_list *glist;
 
                        qg = u64_to_ptr(tmp_unode->aux);
-                       /*
-                        * We use this sequence number to keep from having to
-                        * run the whole list and 0 out the refcnt every time.
-                        * We basically use sequnce as the known 0 count and
-                        * then add 1 everytime we see a qgroup.  This is how we
-                        * get how many of the roots actually point up to the
-                        * upper level qgroups in order to determine exclusive
-                        * counts.
-                        *
-                        * For rescan we want to set old_refcnt to seq so our
-                        * exclusive calculations end up correct.
-                        */
-                       if (rescan)
-                               qg->old_refcnt = seq;
-                       else if (qg->old_refcnt < seq)
-                               qg->old_refcnt = seq + 1;
+                       if (update_old)
+                               btrfs_qgroup_update_old_refcnt(qg, seq, 1);
                        else
-                               qg->old_refcnt++;
-
-                       if (qg->new_refcnt < seq)
-                               qg->new_refcnt = seq + 1;
-                       else
-                               qg->new_refcnt++;
+                               btrfs_qgroup_update_new_refcnt(qg, seq, 1);
                        list_for_each_entry(glist, &qg->groups, next_group) {
                                ret = ulist_add(qgroups, glist->group->qgroupid,
                                                ptr_to_u64(glist->group),
@@ -1644,161 +1504,46 @@ static int qgroup_calc_old_refcnt(struct btrfs_fs_info *fs_info,
 }
 
 /*
- * We need to walk forward in our operation tree and account for any roots that
- * were deleted after we made this operation.
- */
-static int qgroup_account_deleted_refs(struct btrfs_fs_info *fs_info,
-                                      struct btrfs_qgroup_operation *oper,
-                                      struct ulist *tmp,
-                                      struct ulist *qgroups, u64 seq,
-                                      int *old_roots)
-{
-       struct ulist_node *unode;
-       struct ulist_iterator uiter;
-       struct btrfs_qgroup *qg;
-       struct btrfs_qgroup_operation *tmp_oper;
-       struct rb_node *n;
-       int ret;
-
-       ulist_reinit(tmp);
-
-       /*
-        * We only walk forward in the tree since we're only interested in
-        * removals that happened _after_  our operation.
-        */
-       spin_lock(&fs_info->qgroup_op_lock);
-       n = rb_next(&oper->n);
-       spin_unlock(&fs_info->qgroup_op_lock);
-       if (!n)
-               return 0;
-       tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n);
-       while (tmp_oper->bytenr == oper->bytenr) {
-               /*
-                * If it's not a removal we don't care, additions work out
-                * properly with our refcnt tracking.
-                */
-               if (tmp_oper->type != BTRFS_QGROUP_OPER_SUB_SHARED &&
-                   tmp_oper->type != BTRFS_QGROUP_OPER_SUB_EXCL)
-                       goto next;
-               qg = find_qgroup_rb(fs_info, tmp_oper->ref_root);
-               if (!qg)
-                       goto next;
-               ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg),
-                               GFP_ATOMIC);
-               if (ret) {
-                       if (ret < 0)
-                               return ret;
-                       /*
-                        * We only want to increase old_roots if this qgroup is
-                        * not already in the list of qgroups.  If it is already
-                        * there then that means it must have been re-added or
-                        * the delete will be discarded because we had an
-                        * existing ref that we haven't looked up yet.  In this
-                        * case we don't want to increase old_roots.  So if ret
-                        * == 1 then we know that this is the first time we've
-                        * seen this qgroup and we can bump the old_roots.
-                        */
-                       (*old_roots)++;
-                       ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg),
-                                       GFP_ATOMIC);
-                       if (ret < 0)
-                               return ret;
-               }
-next:
-               spin_lock(&fs_info->qgroup_op_lock);
-               n = rb_next(&tmp_oper->n);
-               spin_unlock(&fs_info->qgroup_op_lock);
-               if (!n)
-                       break;
-               tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n);
-       }
-
-       /* Ok now process the qgroups we found */
-       ULIST_ITER_INIT(&uiter);
-       while ((unode = ulist_next(tmp, &uiter))) {
-               struct btrfs_qgroup_list *glist;
-
-               qg = u64_to_ptr(unode->aux);
-               if (qg->old_refcnt < seq)
-                       qg->old_refcnt = seq + 1;
-               else
-                       qg->old_refcnt++;
-               if (qg->new_refcnt < seq)
-                       qg->new_refcnt = seq + 1;
-               else
-                       qg->new_refcnt++;
-               list_for_each_entry(glist, &qg->groups, next_group) {
-                       ret = ulist_add(qgroups, glist->group->qgroupid,
-                                       ptr_to_u64(glist->group), GFP_ATOMIC);
-                       if (ret < 0)
-                               return ret;
-                       ret = ulist_add(tmp, glist->group->qgroupid,
-                                       ptr_to_u64(glist->group), GFP_ATOMIC);
-                       if (ret < 0)
-                               return ret;
-               }
-       }
-       return 0;
-}
-
-/* Add refcnt for the newly added reference. */
-static int qgroup_calc_new_refcnt(struct btrfs_fs_info *fs_info,
-                                 struct btrfs_qgroup_operation *oper,
-                                 struct btrfs_qgroup *qgroup,
-                                 struct ulist *tmp, struct ulist *qgroups,
-                                 u64 seq)
-{
-       struct ulist_node *unode;
-       struct ulist_iterator uiter;
-       struct btrfs_qgroup *qg;
-       int ret;
-
-       ulist_reinit(tmp);
-       ret = ulist_add(qgroups, qgroup->qgroupid, ptr_to_u64(qgroup),
-                       GFP_ATOMIC);
-       if (ret < 0)
-               return ret;
-       ret = ulist_add(tmp, qgroup->qgroupid, ptr_to_u64(qgroup),
-                       GFP_ATOMIC);
-       if (ret < 0)
-               return ret;
-       ULIST_ITER_INIT(&uiter);
-       while ((unode = ulist_next(tmp, &uiter))) {
-               struct btrfs_qgroup_list *glist;
-
-               qg = u64_to_ptr(unode->aux);
-               if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED) {
-                       if (qg->new_refcnt < seq)
-                               qg->new_refcnt = seq + 1;
-                       else
-                               qg->new_refcnt++;
-               } else {
-                       if (qg->old_refcnt < seq)
-                               qg->old_refcnt = seq + 1;
-                       else
-                               qg->old_refcnt++;
-               }
-               list_for_each_entry(glist, &qg->groups, next_group) {
-                       ret = ulist_add(tmp, glist->group->qgroupid,
-                                       ptr_to_u64(glist->group), GFP_ATOMIC);
-                       if (ret < 0)
-                               return ret;
-                       ret = ulist_add(qgroups, glist->group->qgroupid,
-                                       ptr_to_u64(glist->group), GFP_ATOMIC);
-                       if (ret < 0)
-                               return ret;
-               }
-       }
-       return 0;
-}
-
-/*
- * This adjusts the counters for all referenced qgroups if need be.
+ * Update qgroup rfer/excl counters.
+ * Rfer update is easy, codes can explain themselves.
+ *
+ * Excl update is tricky, the update is split into 2 part.
+ * Part 1: Possible exclusive <-> sharing detect:
+ *     |       A       |       !A      |
+ *  -------------------------------------
+ *  B  |       *       |       -       |
+ *  -------------------------------------
+ *  !B |       +       |       **      |
+ *  -------------------------------------
+ *
+ * Conditions:
+ * A:  cur_old_roots < nr_old_roots    (not exclusive before)
+ * !A: cur_old_roots == nr_old_roots   (possible exclusive before)
+ * B:  cur_new_roots < nr_new_roots    (not exclusive now)
+ * !B: cur_new_roots == nr_new_roots   (possible exclsuive now)
+ *
+ * Results:
+ * +: Possible sharing -> exclusive    -: Possible exclusive -> sharing
+ * *: Definitely not changed.          **: Possible unchanged.
+ *
+ * For !A and !B condition, the exception is cur_old/new_roots == 0 case.
+ *
+ * To make the logic clear, we first use condition A and B to split
+ * combination into 4 results.
+ *
+ * Then, for result "+" and "-", check old/new_roots == 0 case, as in them
+ * only on variant maybe 0.
+ *
+ * Lastly, check result **, since there are 2 variants maybe 0, split them
+ * again(2x2).
+ * But this time we don't need to consider other things, the codes and logic
+ * is easy to understand now.
  */
-static int qgroup_adjust_counters(struct btrfs_fs_info *fs_info,
-                                 u64 root_to_skip, u64 num_bytes,
-                                 struct ulist *qgroups, u64 seq,
-                                 int old_roots, int new_roots, int rescan)
+static int qgroup_update_counters(struct btrfs_fs_info *fs_info,
+                                 struct ulist *qgroups,
+                                 u64 nr_old_roots,
+                                 u64 nr_new_roots,
+                                 u64 num_bytes, u64 seq)
 {
        struct ulist_node *unode;
        struct ulist_iterator uiter;
@@ -1810,423 +1555,191 @@ static int qgroup_adjust_counters(struct btrfs_fs_info *fs_info,
                bool dirty = false;
 
                qg = u64_to_ptr(unode->aux);
-               /*
-                * Wasn't referenced before but is now, add to the reference
-                * counters.
-                */
-               if (qg->old_refcnt <= seq && qg->new_refcnt > seq) {
+               cur_old_count = btrfs_qgroup_get_old_refcnt(qg, seq);
+               cur_new_count = btrfs_qgroup_get_new_refcnt(qg, seq);
+
+               /* Rfer update part */
+               if (cur_old_count == 0 && cur_new_count > 0) {
                        qg->rfer += num_bytes;
                        qg->rfer_cmpr += num_bytes;
                        dirty = true;
                }
-
-               /*
-                * Was referenced before but isn't now, subtract from the
-                * reference counters.
-                */
-               if (qg->old_refcnt > seq && qg->new_refcnt <= seq) {
+               if (cur_old_count > 0 && cur_new_count == 0) {
                        qg->rfer -= num_bytes;
                        qg->rfer_cmpr -= num_bytes;
                        dirty = true;
                }
 
-               if (qg->old_refcnt < seq)
-                       cur_old_count = 0;
-               else
-                       cur_old_count = qg->old_refcnt - seq;
-               if (qg->new_refcnt < seq)
-                       cur_new_count = 0;
-               else
-                       cur_new_count = qg->new_refcnt - seq;
-
-               /*
-                * If our refcount was the same as the roots previously but our
-                * new count isn't the same as the number of roots now then we
-                * went from having a exclusive reference on this range to not.
-                */
-               if (old_roots && cur_old_count == old_roots &&
-                   (cur_new_count != new_roots || new_roots == 0)) {
-                       WARN_ON(cur_new_count != new_roots && new_roots == 0);
-                       qg->excl -= num_bytes;
-                       qg->excl_cmpr -= num_bytes;
-                       dirty = true;
+               /* Excl update part */
+               /* Exclusive/none -> shared case */
+               if (cur_old_count == nr_old_roots &&
+                   cur_new_count < nr_new_roots) {
+                       /* Exclusive -> shared */
+                       if (cur_old_count != 0) {
+                               qg->excl -= num_bytes;
+                               qg->excl_cmpr -= num_bytes;
+                               dirty = true;
+                       }
                }
 
-               /*
-                * If we didn't reference all the roots before but now we do we
-                * have an exclusive reference to this range.
-                */
-               if ((!old_roots || (old_roots && cur_old_count != old_roots))
-                   && cur_new_count == new_roots) {
-                       qg->excl += num_bytes;
-                       qg->excl_cmpr += num_bytes;
-                       dirty = true;
+               /* Shared -> exclusive/none case */
+               if (cur_old_count < nr_old_roots &&
+                   cur_new_count == nr_new_roots) {
+                       /* Shared->exclusive */
+                       if (cur_new_count != 0) {
+                               qg->excl += num_bytes;
+                               qg->excl_cmpr += num_bytes;
+                               dirty = true;
+                       }
                }
 
+               /* Exclusive/none -> exclusive/none case */
+               if (cur_old_count == nr_old_roots &&
+                   cur_new_count == nr_new_roots) {
+                       if (cur_old_count == 0) {
+                               /* None -> exclusive/none */
+
+                               if (cur_new_count != 0) {
+                                       /* None -> exclusive */
+                                       qg->excl += num_bytes;
+                                       qg->excl_cmpr += num_bytes;
+                                       dirty = true;
+                               }
+                               /* None -> none, nothing changed */
+                       } else {
+                               /* Exclusive -> exclusive/none */
+
+                               if (cur_new_count == 0) {
+                                       /* Exclusive -> none */
+                                       qg->excl -= num_bytes;
+                                       qg->excl_cmpr -= num_bytes;
+                                       dirty = true;
+                               }
+                               /* Exclusive -> exclusive, nothing changed */
+                       }
+               }
                if (dirty)
                        qgroup_dirty(fs_info, qg);
        }
        return 0;
 }
 
-/*
- * If we removed a data extent and there were other references for that bytenr
- * then we need to lookup all referenced roots to make sure we still don't
- * reference this bytenr.  If we do then we can just discard this operation.
- */
-static int check_existing_refs(struct btrfs_trans_handle *trans,
-                              struct btrfs_fs_info *fs_info,
-                              struct btrfs_qgroup_operation *oper)
-{
-       struct ulist *roots = NULL;
-       struct ulist_node *unode;
-       struct ulist_iterator uiter;
-       int ret = 0;
-
-       ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr,
-                                  oper->elem.seq, &roots);
-       if (ret < 0)
-               return ret;
-       ret = 0;
-
-       ULIST_ITER_INIT(&uiter);
-       while ((unode = ulist_next(roots, &uiter))) {
-               if (unode->val == oper->ref_root) {
-                       ret = 1;
-                       break;
-               }
-       }
-       ulist_free(roots);
-       btrfs_put_tree_mod_seq(fs_info, &oper->elem);
-
-       return ret;
-}
-
-/*
- * If we share a reference across multiple roots then we may need to adjust
- * various qgroups referenced and exclusive counters.  The basic premise is this
- *
- * 1) We have seq to represent a 0 count.  Instead of looping through all of the
- * qgroups and resetting their refcount to 0 we just constantly bump this
- * sequence number to act as the base reference count.  This means that if
- * anybody is equal to or below this sequence they were never referenced.  We
- * jack this sequence up by the number of roots we found each time in order to
- * make sure we don't have any overlap.
- *
- * 2) We first search all the roots that reference the area _except_ the root
- * we're acting on currently.  This makes up the old_refcnt of all the qgroups
- * before.
- *
- * 3) We walk all of the qgroups referenced by the root we are currently acting
- * on, and will either adjust old_refcnt in the case of a removal or the
- * new_refcnt in the case of an addition.
- *
- * 4) Finally we walk all the qgroups that are referenced by this range
- * including the root we are acting on currently.  We will adjust the counters
- * based on the number of roots we had and will have after this operation.
- *
- * Take this example as an illustration
- *
- *                     [qgroup 1/0]
- *                  /         |          \
- *             [qg 0/0]   [qg 0/1]     [qg 0/2]
- *                \          |            /
- *               [        extent           ]
- *
- * Say we are adding a reference that is covered by qg 0/0.  The first step
- * would give a refcnt of 1 to qg 0/1 and 0/2 and a refcnt of 2 to qg 1/0 with
- * old_roots being 2.  Because it is adding new_roots will be 1.  We then go
- * through qg 0/0 which will get the new_refcnt set to 1 and add 1 to qg 1/0's
- * new_refcnt, bringing it to 3.  We then walk through all of the qgroups, we
- * notice that the old refcnt for qg 0/0 < the new refcnt, so we added a
- * reference and thus must add the size to the referenced bytes.  Everything
- * else is the same so nothing else changes.
- */
-static int qgroup_shared_accounting(struct btrfs_trans_handle *trans,
-                                   struct btrfs_fs_info *fs_info,
-                                   struct btrfs_qgroup_operation *oper)
+int
+btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
+                           struct btrfs_fs_info *fs_info,
+                           u64 bytenr, u64 num_bytes,
+                           struct ulist *old_roots, struct ulist *new_roots)
 {
-       struct ulist *roots = NULL;
-       struct ulist *qgroups, *tmp;
-       struct btrfs_qgroup *qgroup;
-       struct seq_list elem = SEQ_LIST_INIT(elem);
+       struct ulist *qgroups = NULL;
+       struct ulist *tmp = NULL;
        u64 seq;
-       int old_roots = 0;
-       int new_roots = 0;
+       u64 nr_new_roots = 0;
+       u64 nr_old_roots = 0;
        int ret = 0;
 
-       if (oper->elem.seq) {
-               ret = check_existing_refs(trans, fs_info, oper);
-               if (ret < 0)
-                       return ret;
-               if (ret)
-                       return 0;
-       }
+       if (new_roots)
+               nr_new_roots = new_roots->nnodes;
+       if (old_roots)
+               nr_old_roots = old_roots->nnodes;
 
-       qgroups = ulist_alloc(GFP_NOFS);
-       if (!qgroups)
-               return -ENOMEM;
+       if (!fs_info->quota_enabled)
+               goto out_free;
+       BUG_ON(!fs_info->quota_root);
 
+       qgroups = ulist_alloc(GFP_NOFS);
+       if (!qgroups) {
+               ret = -ENOMEM;
+               goto out_free;
+       }
        tmp = ulist_alloc(GFP_NOFS);
        if (!tmp) {
-               ulist_free(qgroups);
-               return -ENOMEM;
+               ret = -ENOMEM;
+               goto out_free;
        }
 
-       btrfs_get_tree_mod_seq(fs_info, &elem);
-       ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, elem.seq,
-                                  &roots);
-       btrfs_put_tree_mod_seq(fs_info, &elem);
-       if (ret < 0) {
-               ulist_free(qgroups);
-               ulist_free(tmp);
-               return ret;
+       mutex_lock(&fs_info->qgroup_rescan_lock);
+       if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
+               if (fs_info->qgroup_rescan_progress.objectid <= bytenr) {
+                       mutex_unlock(&fs_info->qgroup_rescan_lock);
+                       ret = 0;
+                       goto out_free;
+               }
        }
+       mutex_unlock(&fs_info->qgroup_rescan_lock);
+
        spin_lock(&fs_info->qgroup_lock);
-       qgroup = find_qgroup_rb(fs_info, oper->ref_root);
-       if (!qgroup)
-               goto out;
        seq = fs_info->qgroup_seq;
 
-       /*
-        * So roots is the list of all the roots currently pointing at the
-        * bytenr, including the ref we are adding if we are adding, or not if
-        * we are removing a ref.  So we pass in the ref_root to skip that root
-        * in our calculations.  We set old_refnct and new_refcnt cause who the
-        * hell knows what everything looked like before, and it doesn't matter
-        * except...
-        */
-       ret = qgroup_calc_old_refcnt(fs_info, oper->ref_root, tmp, roots, qgroups,
-                                    seq, &old_roots, 0);
+       /* Update old refcnts using old_roots */
+       ret = qgroup_update_refcnt(fs_info, old_roots, tmp, qgroups, seq,
+                                  UPDATE_OLD);
        if (ret < 0)
                goto out;
 
-       /*
-        * Now adjust the refcounts of the qgroups that care about this
-        * reference, either the old_count in the case of removal or new_count
-        * in the case of an addition.
-        */
-       ret = qgroup_calc_new_refcnt(fs_info, oper, qgroup, tmp, qgroups,
-                                    seq);
+       /* Update new refcnts using new_roots */
+       ret = qgroup_update_refcnt(fs_info, new_roots, tmp, qgroups, seq,
+                                  UPDATE_NEW);
        if (ret < 0)
                goto out;
 
-       /*
-        * ...in the case of removals.  If we had a removal before we got around
-        * to processing this operation then we need to find that guy and count
-        * his references as if they really existed so we don't end up screwing
-        * up the exclusive counts.  Then whenever we go to process the delete
-        * everything will be grand and we can account for whatever exclusive
-        * changes need to be made there.  We also have to pass in old_roots so
-        * we have an accurate count of the roots as it pertains to this
-        * operations view of the world.
-        */
-       ret = qgroup_account_deleted_refs(fs_info, oper, tmp, qgroups, seq,
-                                         &old_roots);
-       if (ret < 0)
-               goto out;
+       qgroup_update_counters(fs_info, qgroups, nr_old_roots, nr_new_roots,
+                              num_bytes, seq);
 
        /*
-        * We are adding our root, need to adjust up the number of roots,
-        * otherwise old_roots is the number of roots we want.
+        * Bump qgroup_seq to avoid seq overlap
         */
-       if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED) {
-               new_roots = old_roots + 1;
-       } else {
-               new_roots = old_roots;
-               old_roots++;
-       }
-       fs_info->qgroup_seq += old_roots + 1;
-
-
-       /*
-        * And now the magic happens, bless Arne for having a pretty elegant
-        * solution for this.
-        */
-       qgroup_adjust_counters(fs_info, oper->ref_root, oper->num_bytes,
-                              qgroups, seq, old_roots, new_roots, 0);
+       fs_info->qgroup_seq += max(nr_old_roots, nr_new_roots) + 1;
 out:
        spin_unlock(&fs_info->qgroup_lock);
-       ulist_free(qgroups);
-       ulist_free(roots);
+out_free:
        ulist_free(tmp);
+       ulist_free(qgroups);
+       ulist_free(old_roots);
+       ulist_free(new_roots);
        return ret;
 }
 
-/*
- * Process a reference to a shared subtree. This type of operation is
- * queued during snapshot removal when we encounter extents which are
- * shared between more than one root.
- */
-static int qgroup_subtree_accounting(struct btrfs_trans_handle *trans,
-                                    struct btrfs_fs_info *fs_info,
-                                    struct btrfs_qgroup_operation *oper)
-{
-       struct ulist *roots = NULL;
-       struct ulist_node *unode;
-       struct ulist_iterator uiter;
-       struct btrfs_qgroup_list *glist;
-       struct ulist *parents;
-       int ret = 0;
-       int err;
-       struct btrfs_qgroup *qg;
-       u64 root_obj = 0;
-       struct seq_list elem = SEQ_LIST_INIT(elem);
-
-       parents = ulist_alloc(GFP_NOFS);
-       if (!parents)
-               return -ENOMEM;
-
-       btrfs_get_tree_mod_seq(fs_info, &elem);
-       ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr,
-                                  elem.seq, &roots);
-       btrfs_put_tree_mod_seq(fs_info, &elem);
-       if (ret < 0)
-               goto out;
-
-       if (roots->nnodes != 1)
-               goto out;
-
-       ULIST_ITER_INIT(&uiter);
-       unode = ulist_next(roots, &uiter); /* Only want 1 so no need to loop */
-       /*
-        * If we find our ref root then that means all refs
-        * this extent has to the root have not yet been
-        * deleted. In that case, we do nothing and let the
-        * last ref for this bytenr drive our update.
-        *
-        * This can happen for example if an extent is
-        * referenced multiple times in a snapshot (clone,
-        * etc). If we are in the middle of snapshot removal,
-        * queued updates for such an extent will find the
-        * root if we have not yet finished removing the
-        * snapshot.
-        */
-       if (unode->val == oper->ref_root)
-               goto out;
-
-       root_obj = unode->val;
-       BUG_ON(!root_obj);
-
-       spin_lock(&fs_info->qgroup_lock);
-       qg = find_qgroup_rb(fs_info, root_obj);
-       if (!qg)
-               goto out_unlock;
-
-       qg->excl += oper->num_bytes;
-       qg->excl_cmpr += oper->num_bytes;
-       qgroup_dirty(fs_info, qg);
-
-       /*
-        * Adjust counts for parent groups. First we find all
-        * parents, then in the 2nd loop we do the adjustment
-        * while adding parents of the parents to our ulist.
-        */
-       list_for_each_entry(glist, &qg->groups, next_group) {
-               err = ulist_add(parents, glist->group->qgroupid,
-                               ptr_to_u64(glist->group), GFP_ATOMIC);
-               if (err < 0) {
-                       ret = err;
-                       goto out_unlock;
-               }
-       }
-
-       ULIST_ITER_INIT(&uiter);
-       while ((unode = ulist_next(parents, &uiter))) {
-               qg = u64_to_ptr(unode->aux);
-               qg->excl += oper->num_bytes;
-               qg->excl_cmpr += oper->num_bytes;
-               qgroup_dirty(fs_info, qg);
-
-               /* Add any parents of the parents */
-               list_for_each_entry(glist, &qg->groups, next_group) {
-                       err = ulist_add(parents, glist->group->qgroupid,
-                                       ptr_to_u64(glist->group), GFP_ATOMIC);
-                       if (err < 0) {
-                               ret = err;
-                               goto out_unlock;
-                       }
-               }
-       }
-
-out_unlock:
-       spin_unlock(&fs_info->qgroup_lock);
-
-out:
-       ulist_free(roots);
-       ulist_free(parents);
-       return ret;
-}
-
-/*
- * btrfs_qgroup_account_ref is called for every ref that is added to or deleted
- * from the fs. First, all roots referencing the extent are searched, and
- * then the space is accounted accordingly to the different roots. The
- * accounting algorithm works in 3 steps documented inline.
- */
-static int btrfs_qgroup_account(struct btrfs_trans_handle *trans,
-                               struct btrfs_fs_info *fs_info,
-                               struct btrfs_qgroup_operation *oper)
+int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans,
+                                struct btrfs_fs_info *fs_info)
 {
+       struct btrfs_qgroup_extent_record *record;
+       struct btrfs_delayed_ref_root *delayed_refs;
+       struct ulist *new_roots = NULL;
+       struct rb_node *node;
+       u64 qgroup_to_skip;
        int ret = 0;
 
-       if (!fs_info->quota_enabled)
-               return 0;
-
-       BUG_ON(!fs_info->quota_root);
+       delayed_refs = &trans->transaction->delayed_refs;
+       qgroup_to_skip = delayed_refs->qgroup_to_skip;
+       while ((node = rb_first(&delayed_refs->dirty_extent_root))) {
+               record = rb_entry(node, struct btrfs_qgroup_extent_record,
+                                 node);
 
-       mutex_lock(&fs_info->qgroup_rescan_lock);
-       if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
-               if (fs_info->qgroup_rescan_progress.objectid <= oper->bytenr) {
-                       mutex_unlock(&fs_info->qgroup_rescan_lock);
-                       return 0;
+               if (!ret) {
+                       /*
+                        * Use (u64)-1 as time_seq to do special search, which
+                        * doesn't lock tree or delayed_refs and search current
+                        * root. It's safe inside commit_transaction().
+                        */
+                       ret = btrfs_find_all_roots(trans, fs_info,
+                                       record->bytenr, (u64)-1, &new_roots);
+                       if (ret < 0)
+                               goto cleanup;
+                       if (qgroup_to_skip)
+                               ulist_del(new_roots, qgroup_to_skip, 0);
+                       ret = btrfs_qgroup_account_extent(trans, fs_info,
+                                       record->bytenr, record->num_bytes,
+                                       record->old_roots, new_roots);
+                       record->old_roots = NULL;
+                       new_roots = NULL;
                }
-       }
-       mutex_unlock(&fs_info->qgroup_rescan_lock);
+cleanup:
+               ulist_free(record->old_roots);
+               ulist_free(new_roots);
+               new_roots = NULL;
+               rb_erase(node, &delayed_refs->dirty_extent_root);
+               kfree(record);
 
-       ASSERT(is_fstree(oper->ref_root));
-
-       trace_btrfs_qgroup_account(oper);
-
-       switch (oper->type) {
-       case BTRFS_QGROUP_OPER_ADD_EXCL:
-       case BTRFS_QGROUP_OPER_SUB_EXCL:
-               ret = qgroup_excl_accounting(fs_info, oper);
-               break;
-       case BTRFS_QGROUP_OPER_ADD_SHARED:
-       case BTRFS_QGROUP_OPER_SUB_SHARED:
-               ret = qgroup_shared_accounting(trans, fs_info, oper);
-               break;
-       case BTRFS_QGROUP_OPER_SUB_SUBTREE:
-               ret = qgroup_subtree_accounting(trans, fs_info, oper);
-               break;
-       default:
-               ASSERT(0);
-       }
-       return ret;
-}
-
-/*
- * Needs to be called everytime we run delayed refs, even if there is an error
- * in order to cleanup outstanding operations.
- */
-int btrfs_delayed_qgroup_accounting(struct btrfs_trans_handle *trans,
-                                   struct btrfs_fs_info *fs_info)
-{
-       struct btrfs_qgroup_operation *oper;
-       int ret = 0;
-
-       while (!list_empty(&trans->qgroup_ref_list)) {
-               oper = list_first_entry(&trans->qgroup_ref_list,
-                                       struct btrfs_qgroup_operation, list);
-               list_del_init(&oper->list);
-               if (!ret || !trans->aborted)
-                       ret = btrfs_qgroup_account(trans, fs_info, oper);
-               spin_lock(&fs_info->qgroup_op_lock);
-               rb_erase(&oper->n, &fs_info->qgroup_op_tree);
-               spin_unlock(&fs_info->qgroup_op_lock);
-               btrfs_put_tree_mod_seq(fs_info, &oper->elem);
-               kfree(oper);
        }
        return ret;
 }
@@ -2637,15 +2150,13 @@ void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
  */
 static int
 qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
-                  struct btrfs_trans_handle *trans, struct ulist *qgroups,
-                  struct ulist *tmp, struct extent_buffer *scratch_leaf)
+                  struct btrfs_trans_handle *trans,
+                  struct extent_buffer *scratch_leaf)
 {
        struct btrfs_key found;
        struct ulist *roots = NULL;
        struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem);
        u64 num_bytes;
-       u64 seq;
-       int new_roots;
        int slot;
        int ret;
 
@@ -2695,33 +2206,15 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
                else
                        num_bytes = found.offset;
 
-               ulist_reinit(qgroups);
                ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0,
                                           &roots);
                if (ret < 0)
                        goto out;
-               spin_lock(&fs_info->qgroup_lock);
-               seq = fs_info->qgroup_seq;
-               fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
-
-               new_roots = 0;
-               ret = qgroup_calc_old_refcnt(fs_info, 0, tmp, roots, qgroups,
-                                            seq, &new_roots, 1);
-               if (ret < 0) {
-                       spin_unlock(&fs_info->qgroup_lock);
-                       ulist_free(roots);
-                       goto out;
-               }
-
-               ret = qgroup_adjust_counters(fs_info, 0, num_bytes, qgroups,
-                                            seq, 0, new_roots, 1);
-               if (ret < 0) {
-                       spin_unlock(&fs_info->qgroup_lock);
-                       ulist_free(roots);
+               /* For rescan, just pass old_roots as NULL */
+               ret = btrfs_qgroup_account_extent(trans, fs_info,
+                               found.objectid, num_bytes, NULL, roots);
+               if (ret < 0)
                        goto out;
-               }
-               spin_unlock(&fs_info->qgroup_lock);
-               ulist_free(roots);
        }
 out:
        btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
@@ -2735,7 +2228,6 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
                                                     qgroup_rescan_work);
        struct btrfs_path *path;
        struct btrfs_trans_handle *trans = NULL;
-       struct ulist *tmp = NULL, *qgroups = NULL;
        struct extent_buffer *scratch_leaf = NULL;
        int err = -ENOMEM;
        int ret = 0;
@@ -2743,12 +2235,6 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
        path = btrfs_alloc_path();
        if (!path)
                goto out;
-       qgroups = ulist_alloc(GFP_NOFS);
-       if (!qgroups)
-               goto out;
-       tmp = ulist_alloc(GFP_NOFS);
-       if (!tmp)
-               goto out;
        scratch_leaf = kmalloc(sizeof(*scratch_leaf), GFP_NOFS);
        if (!scratch_leaf)
                goto out;
@@ -2764,7 +2250,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
                        err = -EINTR;
                } else {
                        err = qgroup_rescan_leaf(fs_info, path, trans,
-                                                qgroups, tmp, scratch_leaf);
+                                                scratch_leaf);
                }
                if (err > 0)
                        btrfs_commit_transaction(trans, fs_info->fs_root);
@@ -2774,8 +2260,6 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
 
 out:
        kfree(scratch_leaf);
-       ulist_free(qgroups);
-       ulist_free(tmp);
        btrfs_free_path(path);
 
        mutex_lock(&fs_info->qgroup_rescan_lock);
index c5242aa9a4b2a1cc7237bd42487c063a0790d87f..6387dcfa354c6ecac672a543bf1d6b8e587e911b 100644 (file)
 #ifndef __BTRFS_QGROUP__
 #define __BTRFS_QGROUP__
 
+#include "ulist.h"
+#include "delayed-ref.h"
+
 /*
- * A description of the operations, all of these operations only happen when we
- * are adding the 1st reference for that subvolume in the case of adding space
- * or on the last reference delete in the case of subtraction.  The only
- * exception is the last one, which is added for confusion.
- *
- * BTRFS_QGROUP_OPER_ADD_EXCL: adding bytes where this subvolume is the only
- * one pointing at the bytes we are adding.  This is called on the first
- * allocation.
- *
- * BTRFS_QGROUP_OPER_ADD_SHARED: adding bytes where this bytenr is going to be
- * shared between subvols.  This is called on the creation of a ref that already
- * has refs from a different subvolume, so basically reflink.
- *
- * BTRFS_QGROUP_OPER_SUB_EXCL: removing bytes where this subvolume is the only
- * one referencing the range.
- *
- * BTRFS_QGROUP_OPER_SUB_SHARED: removing bytes where this subvolume shares with
- * refs with other subvolumes.
+ * Record a dirty extent, and info qgroup to update quota on it
+ * TODO: Use kmem cache to alloc it.
  */
-enum btrfs_qgroup_operation_type {
-       BTRFS_QGROUP_OPER_ADD_EXCL,
-       BTRFS_QGROUP_OPER_ADD_SHARED,
-       BTRFS_QGROUP_OPER_SUB_EXCL,
-       BTRFS_QGROUP_OPER_SUB_SHARED,
-       BTRFS_QGROUP_OPER_SUB_SUBTREE,
-};
-
-struct btrfs_qgroup_operation {
-       u64 ref_root;
+struct btrfs_qgroup_extent_record {
+       struct rb_node node;
        u64 bytenr;
        u64 num_bytes;
-       u64 seq;
-       enum btrfs_qgroup_operation_type type;
-       struct seq_list elem;
-       struct rb_node n;
-       struct list_head list;
+       struct ulist *old_roots;
 };
 
 int btrfs_quota_enable(struct btrfs_trans_handle *trans,
@@ -79,16 +54,18 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
 int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info);
 void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info);
 struct btrfs_delayed_extent_op;
-int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
-                           struct btrfs_fs_info *fs_info, u64 ref_root,
+int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans,
+                                        struct btrfs_fs_info *fs_info);
+struct btrfs_qgroup_extent_record
+*btrfs_qgroup_insert_dirty_extent(struct btrfs_delayed_ref_root *delayed_refs,
+                                 struct btrfs_qgroup_extent_record *record);
+int
+btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
+                           struct btrfs_fs_info *fs_info,
                            u64 bytenr, u64 num_bytes,
-                           enum btrfs_qgroup_operation_type type,
-                           int mod_seq);
-int btrfs_delayed_qgroup_accounting(struct btrfs_trans_handle *trans,
-                                   struct btrfs_fs_info *fs_info);
-void btrfs_remove_qgroup_operation(struct btrfs_trans_handle *trans,
-                                  struct btrfs_fs_info *fs_info,
-                                  struct btrfs_qgroup_operation *oper);
+                           struct ulist *old_roots, struct ulist *new_roots);
+int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans,
+                                struct btrfs_fs_info *fs_info);
 int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
                      struct btrfs_fs_info *fs_info);
 int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
index 74b24b01d5740041a69aab1d083391c2e03ade8b..827951fbf7fcb7dac026e48ded6e29f2f723adef 100644 (file)
@@ -1847,8 +1847,10 @@ again:
                        }
 
                        eb = read_tree_block(dest, old_bytenr, old_ptr_gen);
-                       if (!eb || !extent_buffer_uptodate(eb)) {
-                               ret = (!eb) ? -ENOMEM : -EIO;
+                       if (IS_ERR(eb)) {
+                               ret = PTR_ERR(eb);
+                       } else if (!extent_buffer_uptodate(eb)) {
+                               ret = -EIO;
                                free_extent_buffer(eb);
                                break;
                        }
@@ -2002,7 +2004,9 @@ int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
 
                bytenr = btrfs_node_blockptr(eb, path->slots[i]);
                eb = read_tree_block(root, bytenr, ptr_gen);
-               if (!eb || !extent_buffer_uptodate(eb)) {
+               if (IS_ERR(eb)) {
+                       return PTR_ERR(eb);
+               } else if (!extent_buffer_uptodate(eb)) {
                        free_extent_buffer(eb);
                        return -EIO;
                }
@@ -2710,7 +2714,10 @@ static int do_relocation(struct btrfs_trans_handle *trans,
                blocksize = root->nodesize;
                generation = btrfs_node_ptr_generation(upper->eb, slot);
                eb = read_tree_block(root, bytenr, generation);
-               if (!eb || !extent_buffer_uptodate(eb)) {
+               if (IS_ERR(eb)) {
+                       err = PTR_ERR(eb);
+                       goto next;
+               } else if (!extent_buffer_uptodate(eb)) {
                        free_extent_buffer(eb);
                        err = -EIO;
                        goto next;
@@ -2873,7 +2880,9 @@ static int get_tree_block_key(struct reloc_control *rc,
        BUG_ON(block->key_ready);
        eb = read_tree_block(rc->extent_root, block->bytenr,
                             block->key.offset);
-       if (!eb || !extent_buffer_uptodate(eb)) {
+       if (IS_ERR(eb)) {
+               return PTR_ERR(eb);
+       } else if (!extent_buffer_uptodate(eb)) {
                free_extent_buffer(eb);
                return -EIO;
        }
index ab5811545a988edf685ef4acce607cbff1ac7b81..9f2feabe99f211f9c8eb5d489c44c85f54754c27 100644 (file)
@@ -2662,18 +2662,30 @@ static void scrub_free_parity(struct scrub_parity *sparity)
        kfree(sparity);
 }
 
+static void scrub_parity_bio_endio_worker(struct btrfs_work *work)
+{
+       struct scrub_parity *sparity = container_of(work, struct scrub_parity,
+                                                   work);
+       struct scrub_ctx *sctx = sparity->sctx;
+
+       scrub_free_parity(sparity);
+       scrub_pending_bio_dec(sctx);
+}
+
 static void scrub_parity_bio_endio(struct bio *bio, int error)
 {
        struct scrub_parity *sparity = (struct scrub_parity *)bio->bi_private;
-       struct scrub_ctx *sctx = sparity->sctx;
 
        if (error)
                bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
                          sparity->nsectors);
 
-       scrub_free_parity(sparity);
-       scrub_pending_bio_dec(sctx);
        bio_put(bio);
+
+       btrfs_init_work(&sparity->work, btrfs_scrubparity_helper,
+                       scrub_parity_bio_endio_worker, NULL, NULL);
+       btrfs_queue_work(sparity->sctx->dev_root->fs_info->scrub_parity_workers,
+                        &sparity->work);
 }
 
 static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
@@ -3589,6 +3601,13 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
                        ret = -ENOMEM;
                        goto out;
                }
+               fs_info->scrub_parity_workers =
+                       btrfs_alloc_workqueue("btrfs-scrubparity", flags,
+                                             max_active, 2);
+               if (!fs_info->scrub_parity_workers) {
+                       ret = -ENOMEM;
+                       goto out;
+               }
        }
        ++fs_info->scrub_workers_refcnt;
 out:
@@ -3601,6 +3620,7 @@ static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info)
                btrfs_destroy_workqueue(fs_info->scrub_workers);
                btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers);
                btrfs_destroy_workqueue(fs_info->scrub_nocow_workers);
+               btrfs_destroy_workqueue(fs_info->scrub_parity_workers);
        }
        WARN_ON(fs_info->scrub_workers_refcnt < 0);
 }
index a1216f9b49171510194109c7594c61b4277298ea..aa72bfd28f7dcbd88c73452aafd2a3d9e7f42e00 100644 (file)
@@ -243,6 +243,7 @@ struct waiting_dir_move {
         * after this directory is moved, we can try to rmdir the ino rmdir_ino.
         */
        u64 rmdir_ino;
+       bool orphanized;
 };
 
 struct orphan_dir_info {
@@ -1158,6 +1159,9 @@ struct backref_ctx {
        /* may be truncated in case it's the last extent in a file */
        u64 extent_len;
 
+       /* data offset in the file extent item */
+       u64 data_offset;
+
        /* Just to check for bugs in backref resolving */
        int found_itself;
 };
@@ -1221,7 +1225,7 @@ static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_)
        if (ret < 0)
                return ret;
 
-       if (offset + bctx->extent_len > i_size)
+       if (offset + bctx->data_offset + bctx->extent_len > i_size)
                return 0;
 
        /*
@@ -1363,6 +1367,19 @@ static int find_extent_clone(struct send_ctx *sctx,
        backref_ctx->cur_offset = data_offset;
        backref_ctx->found_itself = 0;
        backref_ctx->extent_len = num_bytes;
+       /*
+        * For non-compressed extents iterate_extent_inodes() gives us extent
+        * offsets that already take into account the data offset, but not for
+        * compressed extents, since the offset is logical and not relative to
+        * the physical extent locations. We must take this into account to
+        * avoid sending clone offsets that go beyond the source file's size,
+        * which would result in the clone ioctl failing with -EINVAL on the
+        * receiving end.
+        */
+       if (compressed == BTRFS_COMPRESS_NONE)
+               backref_ctx->data_offset = 0;
+       else
+               backref_ctx->data_offset = btrfs_file_extent_offset(eb, fi);
 
        /*
         * The last extent of a file may be too large due to page alignment.
@@ -1900,8 +1917,13 @@ static int did_overwrite_ref(struct send_ctx *sctx,
                goto out;
        }
 
-       /* we know that it is or will be overwritten. check this now */
-       if (ow_inode < sctx->send_progress)
+       /*
+        * We know that it is or will be overwritten. Check this now.
+        * The current inode being processed might have been the one that caused
+        * inode 'ino' to be orphanized, therefore ow_inode can actually be the
+        * same as sctx->send_progress.
+        */
+       if (ow_inode <= sctx->send_progress)
                ret = 1;
        else
                ret = 0;
@@ -2223,6 +2245,8 @@ static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen,
        fs_path_reset(dest);
 
        while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) {
+               struct waiting_dir_move *wdm;
+
                fs_path_reset(name);
 
                if (is_waiting_for_rm(sctx, ino)) {
@@ -2233,7 +2257,11 @@ static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen,
                        break;
                }
 
-               if (is_waiting_for_move(sctx, ino)) {
+               wdm = get_waiting_dir_move(sctx, ino);
+               if (wdm && wdm->orphanized) {
+                       ret = gen_unique_name(sctx, ino, gen, name);
+                       stop = 1;
+               } else if (wdm) {
                        ret = get_first_ref(sctx->parent_root, ino,
                                            &parent_inode, &parent_gen, name);
                } else {
@@ -2328,8 +2356,12 @@ static int send_subvol_begin(struct send_ctx *sctx)
        TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID,
                    le64_to_cpu(sctx->send_root->root_item.ctransid));
        if (parent_root) {
-               TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
-                               sctx->parent_root->root_item.uuid);
+               if (!btrfs_is_empty_uuid(parent_root->root_item.received_uuid))
+                       TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
+                                    parent_root->root_item.received_uuid);
+               else
+                       TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
+                                    parent_root->root_item.uuid);
                TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
                            le64_to_cpu(sctx->parent_root->root_item.ctransid));
        }
@@ -2923,7 +2955,7 @@ static int is_waiting_for_move(struct send_ctx *sctx, u64 ino)
        return entry != NULL;
 }
 
-static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino)
+static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino, bool orphanized)
 {
        struct rb_node **p = &sctx->waiting_dir_moves.rb_node;
        struct rb_node *parent = NULL;
@@ -2934,6 +2966,7 @@ static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino)
                return -ENOMEM;
        dm->ino = ino;
        dm->rmdir_ino = 0;
+       dm->orphanized = orphanized;
 
        while (*p) {
                parent = *p;
@@ -3030,7 +3063,7 @@ static int add_pending_dir_move(struct send_ctx *sctx,
                        goto out;
        }
 
-       ret = add_waiting_dir_move(sctx, pm->ino);
+       ret = add_waiting_dir_move(sctx, pm->ino, is_orphan);
        if (ret)
                goto out;
 
@@ -3353,8 +3386,40 @@ out:
        return ret;
 }
 
+/*
+ * Check if ino ino1 is an ancestor of inode ino2 in the given root.
+ * Return 1 if true, 0 if false and < 0 on error.
+ */
+static int is_ancestor(struct btrfs_root *root,
+                      const u64 ino1,
+                      const u64 ino1_gen,
+                      const u64 ino2,
+                      struct fs_path *fs_path)
+{
+       u64 ino = ino2;
+
+       while (ino > BTRFS_FIRST_FREE_OBJECTID) {
+               int ret;
+               u64 parent;
+               u64 parent_gen;
+
+               fs_path_reset(fs_path);
+               ret = get_first_ref(root, ino, &parent, &parent_gen, fs_path);
+               if (ret < 0) {
+                       if (ret == -ENOENT && ino == ino2)
+                               ret = 0;
+                       return ret;
+               }
+               if (parent == ino1)
+                       return parent_gen == ino1_gen ? 1 : 0;
+               ino = parent;
+       }
+       return 0;
+}
+
 static int wait_for_parent_move(struct send_ctx *sctx,
-                               struct recorded_ref *parent_ref)
+                               struct recorded_ref *parent_ref,
+                               const bool is_orphan)
 {
        int ret = 0;
        u64 ino = parent_ref->dir;
@@ -3374,11 +3439,24 @@ static int wait_for_parent_move(struct send_ctx *sctx,
         * Our current directory inode may not yet be renamed/moved because some
         * ancestor (immediate or not) has to be renamed/moved first. So find if
         * such ancestor exists and make sure our own rename/move happens after
-        * that ancestor is processed.
+        * that ancestor is processed to avoid path build infinite loops (done
+        * at get_cur_path()).
         */
        while (ino > BTRFS_FIRST_FREE_OBJECTID) {
                if (is_waiting_for_move(sctx, ino)) {
-                       ret = 1;
+                       /*
+                        * If the current inode is an ancestor of ino in the
+                        * parent root, we need to delay the rename of the
+                        * current inode, otherwise don't delayed the rename
+                        * because we can end up with a circular dependency
+                        * of renames, resulting in some directories never
+                        * getting the respective rename operations issued in
+                        * the send stream or getting into infinite path build
+                        * loops.
+                        */
+                       ret = is_ancestor(sctx->parent_root,
+                                         sctx->cur_ino, sctx->cur_inode_gen,
+                                         ino, path_before);
                        break;
                }
 
@@ -3420,7 +3498,7 @@ out:
                                           ino,
                                           &sctx->new_refs,
                                           &sctx->deleted_refs,
-                                          false);
+                                          is_orphan);
                if (!ret)
                        ret = 1;
        }
@@ -3589,6 +3667,17 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
                        }
                }
 
+               if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root &&
+                   can_rename) {
+                       ret = wait_for_parent_move(sctx, cur, is_orphan);
+                       if (ret < 0)
+                               goto out;
+                       if (ret == 1) {
+                               can_rename = false;
+                               *pending_move = 1;
+                       }
+               }
+
                /*
                 * link/move the ref to the new place. If we have an orphan
                 * inode, move it and update valid_path. If not, link or move
@@ -3609,18 +3698,11 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
                                 * dirs, we always have one new and one deleted
                                 * ref. The deleted ref is ignored later.
                                 */
-                               ret = wait_for_parent_move(sctx, cur);
-                               if (ret < 0)
-                                       goto out;
-                               if (ret) {
-                                       *pending_move = 1;
-                               } else {
-                                       ret = send_rename(sctx, valid_path,
-                                                         cur->full_path);
-                                       if (!ret)
-                                               ret = fs_path_copy(valid_path,
-                                                              cur->full_path);
-                               }
+                               ret = send_rename(sctx, valid_path,
+                                                 cur->full_path);
+                               if (!ret)
+                                       ret = fs_path_copy(valid_path,
+                                                          cur->full_path);
                                if (ret < 0)
                                        goto out;
                        } else {
@@ -4508,8 +4590,21 @@ verbose_printk("btrfs: send_clone offset=%llu, len=%d, clone_root=%llu, "
        if (ret < 0)
                goto out;
 
-       TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
-                       clone_root->root->root_item.uuid);
+       /*
+        * If the parent we're using has a received_uuid set then use that as
+        * our clone source as that is what we will look for when doing a
+        * receive.
+        *
+        * This covers the case that we create a snapshot off of a received
+        * subvolume and then use that as the parent and try to receive on a
+        * different host.
+        */
+       if (!btrfs_is_empty_uuid(clone_root->root->root_item.received_uuid))
+               TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
+                            clone_root->root->root_item.received_uuid);
+       else
+               TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
+                            clone_root->root->root_item.uuid);
        TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
                    le64_to_cpu(clone_root->root->root_item.ctransid));
        TLV_PUT_PATH(sctx, BTRFS_SEND_A_CLONE_PATH, p);
index 9e66f5e724db1e307f19f74df7862550c170f069..cd7ef34d2dce99987fe87f3040ba20038bb3f041 100644 (file)
@@ -135,6 +135,7 @@ static void btrfs_handle_error(struct btrfs_fs_info *fs_info)
  * __btrfs_std_error decodes expected errors from the caller and
  * invokes the approciate error response.
  */
+__cold
 void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
                       unsigned int line, int errno, const char *fmt, ...)
 {
@@ -247,18 +248,11 @@ void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
  * We'll complete the cleanup in btrfs_end_transaction and
  * btrfs_commit_transaction.
  */
+__cold
 void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
                               struct btrfs_root *root, const char *function,
                               unsigned int line, int errno)
 {
-       /*
-        * Report first abort since mount
-        */
-       if (!test_and_set_bit(BTRFS_FS_STATE_TRANS_ABORTED,
-                               &root->fs_info->fs_state)) {
-               WARN(1, KERN_DEBUG "BTRFS: Transaction aborted (error %d)\n",
-                               errno);
-       }
        trans->aborted = errno;
        /* Nothing used. The other threads that have joined this
         * transaction may be able to continue. */
@@ -281,6 +275,7 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
  * __btrfs_panic decodes unexpected, fatal errors from the caller,
  * issues an alert, and either panics or BUGs, depending on mount options.
  */
+__cold
 void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
                   unsigned int line, int errno, const char *fmt, ...)
 {
@@ -841,33 +836,153 @@ out:
        return error;
 }
 
-static struct dentry *get_default_root(struct super_block *sb,
-                                      u64 subvol_objectid)
+static char *get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,
+                                          u64 subvol_objectid)
 {
-       struct btrfs_fs_info *fs_info = btrfs_sb(sb);
        struct btrfs_root *root = fs_info->tree_root;
-       struct btrfs_root *new_root;
-       struct btrfs_dir_item *di;
-       struct btrfs_path *path;
-       struct btrfs_key location;
-       struct inode *inode;
-       u64 dir_id;
-       int new = 0;
+       struct btrfs_root *fs_root;
+       struct btrfs_root_ref *root_ref;
+       struct btrfs_inode_ref *inode_ref;
+       struct btrfs_key key;
+       struct btrfs_path *path = NULL;
+       char *name = NULL, *ptr;
+       u64 dirid;
+       int len;
+       int ret;
+
+       path = btrfs_alloc_path();
+       if (!path) {
+               ret = -ENOMEM;
+               goto err;
+       }
+       path->leave_spinning = 1;
+
+       name = kmalloc(PATH_MAX, GFP_NOFS);
+       if (!name) {
+               ret = -ENOMEM;
+               goto err;
+       }
+       ptr = name + PATH_MAX - 1;
+       ptr[0] = '\0';
 
        /*
-        * We have a specific subvol we want to mount, just setup location and
-        * go look up the root.
+        * Walk up the subvolume trees in the tree of tree roots by root
+        * backrefs until we hit the top-level subvolume.
         */
-       if (subvol_objectid) {
-               location.objectid = subvol_objectid;
-               location.type = BTRFS_ROOT_ITEM_KEY;
-               location.offset = (u64)-1;
-               goto find_root;
+       while (subvol_objectid != BTRFS_FS_TREE_OBJECTID) {
+               key.objectid = subvol_objectid;
+               key.type = BTRFS_ROOT_BACKREF_KEY;
+               key.offset = (u64)-1;
+
+               ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+               if (ret < 0) {
+                       goto err;
+               } else if (ret > 0) {
+                       ret = btrfs_previous_item(root, path, subvol_objectid,
+                                                 BTRFS_ROOT_BACKREF_KEY);
+                       if (ret < 0) {
+                               goto err;
+                       } else if (ret > 0) {
+                               ret = -ENOENT;
+                               goto err;
+                       }
+               }
+
+               btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+               subvol_objectid = key.offset;
+
+               root_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
+                                         struct btrfs_root_ref);
+               len = btrfs_root_ref_name_len(path->nodes[0], root_ref);
+               ptr -= len + 1;
+               if (ptr < name) {
+                       ret = -ENAMETOOLONG;
+                       goto err;
+               }
+               read_extent_buffer(path->nodes[0], ptr + 1,
+                                  (unsigned long)(root_ref + 1), len);
+               ptr[0] = '/';
+               dirid = btrfs_root_ref_dirid(path->nodes[0], root_ref);
+               btrfs_release_path(path);
+
+               key.objectid = subvol_objectid;
+               key.type = BTRFS_ROOT_ITEM_KEY;
+               key.offset = (u64)-1;
+               fs_root = btrfs_read_fs_root_no_name(fs_info, &key);
+               if (IS_ERR(fs_root)) {
+                       ret = PTR_ERR(fs_root);
+                       goto err;
+               }
+
+               /*
+                * Walk up the filesystem tree by inode refs until we hit the
+                * root directory.
+                */
+               while (dirid != BTRFS_FIRST_FREE_OBJECTID) {
+                       key.objectid = dirid;
+                       key.type = BTRFS_INODE_REF_KEY;
+                       key.offset = (u64)-1;
+
+                       ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0);
+                       if (ret < 0) {
+                               goto err;
+                       } else if (ret > 0) {
+                               ret = btrfs_previous_item(fs_root, path, dirid,
+                                                         BTRFS_INODE_REF_KEY);
+                               if (ret < 0) {
+                                       goto err;
+                               } else if (ret > 0) {
+                                       ret = -ENOENT;
+                                       goto err;
+                               }
+                       }
+
+                       btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+                       dirid = key.offset;
+
+                       inode_ref = btrfs_item_ptr(path->nodes[0],
+                                                  path->slots[0],
+                                                  struct btrfs_inode_ref);
+                       len = btrfs_inode_ref_name_len(path->nodes[0],
+                                                      inode_ref);
+                       ptr -= len + 1;
+                       if (ptr < name) {
+                               ret = -ENAMETOOLONG;
+                               goto err;
+                       }
+                       read_extent_buffer(path->nodes[0], ptr + 1,
+                                          (unsigned long)(inode_ref + 1), len);
+                       ptr[0] = '/';
+                       btrfs_release_path(path);
+               }
        }
 
+       btrfs_free_path(path);
+       if (ptr == name + PATH_MAX - 1) {
+               name[0] = '/';
+               name[1] = '\0';
+       } else {
+               memmove(name, ptr, name + PATH_MAX - ptr);
+       }
+       return name;
+
+err:
+       btrfs_free_path(path);
+       kfree(name);
+       return ERR_PTR(ret);
+}
+
+static int get_default_subvol_objectid(struct btrfs_fs_info *fs_info, u64 *objectid)
+{
+       struct btrfs_root *root = fs_info->tree_root;
+       struct btrfs_dir_item *di;
+       struct btrfs_path *path;
+       struct btrfs_key location;
+       u64 dir_id;
+
        path = btrfs_alloc_path();
        if (!path)
-               return ERR_PTR(-ENOMEM);
+               return -ENOMEM;
        path->leave_spinning = 1;
 
        /*
@@ -879,58 +994,23 @@ static struct dentry *get_default_root(struct super_block *sb,
        di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0);
        if (IS_ERR(di)) {
                btrfs_free_path(path);
-               return ERR_CAST(di);
+               return PTR_ERR(di);
        }
        if (!di) {
                /*
                 * Ok the default dir item isn't there.  This is weird since
                 * it's always been there, but don't freak out, just try and
-                * mount to root most subvolume.
+                * mount the top-level subvolume.
                 */
                btrfs_free_path(path);
-               dir_id = BTRFS_FIRST_FREE_OBJECTID;
-               new_root = fs_info->fs_root;
-               goto setup_root;
+               *objectid = BTRFS_FS_TREE_OBJECTID;
+               return 0;
        }
 
        btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location);
        btrfs_free_path(path);
-
-find_root:
-       new_root = btrfs_read_fs_root_no_name(fs_info, &location);
-       if (IS_ERR(new_root))
-               return ERR_CAST(new_root);
-
-       if (!(sb->s_flags & MS_RDONLY)) {
-               int ret;
-               down_read(&fs_info->cleanup_work_sem);
-               ret = btrfs_orphan_cleanup(new_root);
-               up_read(&fs_info->cleanup_work_sem);
-               if (ret)
-                       return ERR_PTR(ret);
-       }
-
-       dir_id = btrfs_root_dirid(&new_root->root_item);
-setup_root:
-       location.objectid = dir_id;
-       location.type = BTRFS_INODE_ITEM_KEY;
-       location.offset = 0;
-
-       inode = btrfs_iget(sb, &location, new_root, &new);
-       if (IS_ERR(inode))
-               return ERR_CAST(inode);
-
-       /*
-        * If we're just mounting the root most subvol put the inode and return
-        * a reference to the dentry.  We will have already gotten a reference
-        * to the inode in btrfs_fill_super so we're good to go.
-        */
-       if (!new && d_inode(sb->s_root) == inode) {
-               iput(inode);
-               return dget(sb->s_root);
-       }
-
-       return d_obtain_root(inode);
+       *objectid = location.objectid;
+       return 0;
 }
 
 static int btrfs_fill_super(struct super_block *sb,
@@ -1108,6 +1188,10 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
                seq_puts(seq, ",fatal_errors=panic");
        if (info->commit_interval != BTRFS_DEFAULT_COMMIT_INTERVAL)
                seq_printf(seq, ",commit=%d", info->commit_interval);
+       seq_printf(seq, ",subvolid=%llu",
+                 BTRFS_I(d_inode(dentry))->root->root_key.objectid);
+       seq_puts(seq, ",subvol=");
+       seq_dentry(seq, dentry, " \t\n\\");
        return 0;
 }
 
@@ -1138,107 +1222,139 @@ static inline int is_subvolume_inode(struct inode *inode)
 }
 
 /*
- * This will strip out the subvol=%s argument for an argument string and add
- * subvolid=0 to make sure we get the actual tree root for path walking to the
- * subvol we want.
+ * This will add subvolid=0 to the argument string while removing any subvol=
+ * and subvolid= arguments to make sure we get the top-level root for path
+ * walking to the subvol we want.
  */
 static char *setup_root_args(char *args)
 {
-       unsigned len = strlen(args) + 2 + 1;
-       char *src, *dst, *buf;
+       char *buf, *dst, *sep;
 
-       /*
-        * We need the same args as before, but with this substitution:
-        * s!subvol=[^,]+!subvolid=0!
-        *
-        * Since the replacement string is up to 2 bytes longer than the
-        * original, allocate strlen(args) + 2 + 1 bytes.
-        */
+       if (!args)
+               return kstrdup("subvolid=0", GFP_NOFS);
 
-       src = strstr(args, "subvol=");
-       /* This shouldn't happen, but just in case.. */
-       if (!src)
-               return NULL;
-
-       buf = dst = kmalloc(len, GFP_NOFS);
+       /* The worst case is that we add ",subvolid=0" to the end. */
+       buf = dst = kmalloc(strlen(args) + strlen(",subvolid=0") + 1, GFP_NOFS);
        if (!buf)
                return NULL;
 
-       /*
-        * If the subvol= arg is not at the start of the string,
-        * copy whatever precedes it into buf.
-        */
-       if (src != args) {
-               *src++ = '\0';
-               strcpy(buf, args);
-               dst += strlen(args);
+       while (1) {
+               sep = strchrnul(args, ',');
+               if (!strstarts(args, "subvol=") &&
+                   !strstarts(args, "subvolid=")) {
+                       memcpy(dst, args, sep - args);
+                       dst += sep - args;
+                       *dst++ = ',';
+               }
+               if (*sep)
+                       args = sep + 1;
+               else
+                       break;
        }
-
        strcpy(dst, "subvolid=0");
-       dst += strlen("subvolid=0");
-
-       /*
-        * If there is a "," after the original subvol=... string,
-        * copy that suffix into our buffer.  Otherwise, we're done.
-        */
-       src = strchr(src, ',');
-       if (src)
-               strcpy(dst, src);
 
        return buf;
 }
 
-static struct dentry *mount_subvol(const char *subvol_name, int flags,
-                                  const char *device_name, char *data)
+static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid,
+                                  int flags, const char *device_name,
+                                  char *data)
 {
        struct dentry *root;
-       struct vfsmount *mnt;
+       struct vfsmount *mnt = NULL;
        char *newargs;
+       int ret;
 
        newargs = setup_root_args(data);
-       if (!newargs)
-               return ERR_PTR(-ENOMEM);
-       mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name,
-                            newargs);
+       if (!newargs) {
+               root = ERR_PTR(-ENOMEM);
+               goto out;
+       }
 
-       if (PTR_RET(mnt) == -EBUSY) {
+       mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name, newargs);
+       if (PTR_ERR_OR_ZERO(mnt) == -EBUSY) {
                if (flags & MS_RDONLY) {
-                       mnt = vfs_kern_mount(&btrfs_fs_type, flags & ~MS_RDONLY, device_name,
-                                            newargs);
+                       mnt = vfs_kern_mount(&btrfs_fs_type, flags & ~MS_RDONLY,
+                                            device_name, newargs);
                } else {
-                       int r;
-                       mnt = vfs_kern_mount(&btrfs_fs_type, flags | MS_RDONLY, device_name,
-                                            newargs);
+                       mnt = vfs_kern_mount(&btrfs_fs_type, flags | MS_RDONLY,
+                                            device_name, newargs);
                        if (IS_ERR(mnt)) {
-                               kfree(newargs);
-                               return ERR_CAST(mnt);
+                               root = ERR_CAST(mnt);
+                               mnt = NULL;
+                               goto out;
                        }
 
-                       r = btrfs_remount(mnt->mnt_sb, &flags, NULL);
-                       if (r < 0) {
-                               /* FIXME: release vfsmount mnt ??*/
-                               kfree(newargs);
-                               return ERR_PTR(r);
+                       down_write(&mnt->mnt_sb->s_umount);
+                       ret = btrfs_remount(mnt->mnt_sb, &flags, NULL);
+                       up_write(&mnt->mnt_sb->s_umount);
+                       if (ret < 0) {
+                               root = ERR_PTR(ret);
+                               goto out;
                        }
                }
        }
+       if (IS_ERR(mnt)) {
+               root = ERR_CAST(mnt);
+               mnt = NULL;
+               goto out;
+       }
 
-       kfree(newargs);
+       if (!subvol_name) {
+               if (!subvol_objectid) {
+                       ret = get_default_subvol_objectid(btrfs_sb(mnt->mnt_sb),
+                                                         &subvol_objectid);
+                       if (ret) {
+                               root = ERR_PTR(ret);
+                               goto out;
+                       }
+               }
+               subvol_name = get_subvol_name_from_objectid(btrfs_sb(mnt->mnt_sb),
+                                                           subvol_objectid);
+               if (IS_ERR(subvol_name)) {
+                       root = ERR_CAST(subvol_name);
+                       subvol_name = NULL;
+                       goto out;
+               }
 
-       if (IS_ERR(mnt))
-               return ERR_CAST(mnt);
+       }
 
        root = mount_subtree(mnt, subvol_name);
+       /* mount_subtree() drops our reference on the vfsmount. */
+       mnt = NULL;
 
-       if (!IS_ERR(root) && !is_subvolume_inode(d_inode(root))) {
+       if (!IS_ERR(root)) {
                struct super_block *s = root->d_sb;
-               dput(root);
-               root = ERR_PTR(-EINVAL);
-               deactivate_locked_super(s);
-               printk(KERN_ERR "BTRFS: '%s' is not a valid subvolume\n",
-                               subvol_name);
+               struct inode *root_inode = d_inode(root);
+               u64 root_objectid = BTRFS_I(root_inode)->root->root_key.objectid;
+
+               ret = 0;
+               if (!is_subvolume_inode(root_inode)) {
+                       pr_err("BTRFS: '%s' is not a valid subvolume\n",
+                              subvol_name);
+                       ret = -EINVAL;
+               }
+               if (subvol_objectid && root_objectid != subvol_objectid) {
+                       /*
+                        * This will also catch a race condition where a
+                        * subvolume which was passed by ID is renamed and
+                        * another subvolume is renamed over the old location.
+                        */
+                       pr_err("BTRFS: subvol '%s' does not match subvolid %llu\n",
+                              subvol_name, subvol_objectid);
+                       ret = -EINVAL;
+               }
+               if (ret) {
+                       dput(root);
+                       root = ERR_PTR(ret);
+                       deactivate_locked_super(s);
+               }
        }
 
+out:
+       mntput(mnt);
+       kfree(newargs);
+       kfree(subvol_name);
        return root;
 }
 
@@ -1303,7 +1419,6 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
 {
        struct block_device *bdev = NULL;
        struct super_block *s;
-       struct dentry *root;
        struct btrfs_fs_devices *fs_devices = NULL;
        struct btrfs_fs_info *fs_info = NULL;
        struct security_mnt_opts new_sec_opts;
@@ -1323,10 +1438,10 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
                return ERR_PTR(error);
        }
 
-       if (subvol_name) {
-               root = mount_subvol(subvol_name, flags, device_name, data);
-               kfree(subvol_name);
-               return root;
+       if (subvol_name || subvol_objectid != BTRFS_FS_TREE_OBJECTID) {
+               /* mount_subvol() will free subvol_name. */
+               return mount_subvol(subvol_name, subvol_objectid, flags,
+                                   device_name, data);
        }
 
        security_init_mnt_opts(&new_sec_opts);
@@ -1392,23 +1507,19 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
                error = btrfs_fill_super(s, fs_devices, data,
                                         flags & MS_SILENT ? 1 : 0);
        }
-
-       root = !error ? get_default_root(s, subvol_objectid) : ERR_PTR(error);
-       if (IS_ERR(root)) {
+       if (error) {
                deactivate_locked_super(s);
-               error = PTR_ERR(root);
                goto error_sec_opts;
        }
 
        fs_info = btrfs_sb(s);
        error = setup_security_options(fs_info, s, &new_sec_opts);
        if (error) {
-               dput(root);
                deactivate_locked_super(s);
                goto error_sec_opts;
        }
 
-       return root;
+       return dget(s->s_root);
 
 error_close_devices:
        btrfs_close_devices(fs_devices);
index e8a4c86d274d5c82cda9fc7ac40c6974f429ece8..603b0cc2b9bbf627f0b07fec3dad216164e6ddf3 100644 (file)
@@ -33,6 +33,7 @@
 #include "volumes.h"
 
 static inline struct btrfs_fs_info *to_fs_info(struct kobject *kobj);
+static inline struct btrfs_fs_devices *to_fs_devs(struct kobject *kobj);
 
 static u64 get_features(struct btrfs_fs_info *fs_info,
                        enum btrfs_feature_set set)
@@ -428,7 +429,7 @@ static ssize_t btrfs_clone_alignment_show(struct kobject *kobj,
 
 BTRFS_ATTR(clone_alignment, btrfs_clone_alignment_show);
 
-static struct attribute *btrfs_attrs[] = {
+static const struct attribute *btrfs_attrs[] = {
        BTRFS_ATTR_PTR(label),
        BTRFS_ATTR_PTR(nodesize),
        BTRFS_ATTR_PTR(sectorsize),
@@ -438,21 +439,29 @@ static struct attribute *btrfs_attrs[] = {
 
 static void btrfs_release_super_kobj(struct kobject *kobj)
 {
-       struct btrfs_fs_info *fs_info = to_fs_info(kobj);
-       complete(&fs_info->kobj_unregister);
+       struct btrfs_fs_devices *fs_devs = to_fs_devs(kobj);
+
+       memset(&fs_devs->super_kobj, 0, sizeof(struct kobject));
+       complete(&fs_devs->kobj_unregister);
 }
 
 static struct kobj_type btrfs_ktype = {
        .sysfs_ops      = &kobj_sysfs_ops,
        .release        = btrfs_release_super_kobj,
-       .default_attrs  = btrfs_attrs,
 };
 
+static inline struct btrfs_fs_devices *to_fs_devs(struct kobject *kobj)
+{
+       if (kobj->ktype != &btrfs_ktype)
+               return NULL;
+       return container_of(kobj, struct btrfs_fs_devices, super_kobj);
+}
+
 static inline struct btrfs_fs_info *to_fs_info(struct kobject *kobj)
 {
        if (kobj->ktype != &btrfs_ktype)
                return NULL;
-       return container_of(kobj, struct btrfs_fs_info, super_kobj);
+       return to_fs_devs(kobj)->fs_info;
 }
 
 #define NUM_FEATURE_BITS 64
@@ -493,12 +502,12 @@ static int addrm_unknown_feature_attrs(struct btrfs_fs_info *fs_info, bool add)
                        attrs[0] = &fa->kobj_attr.attr;
                        if (add) {
                                int ret;
-                               ret = sysfs_merge_group(&fs_info->super_kobj,
+                               ret = sysfs_merge_group(&fs_info->fs_devices->super_kobj,
                                                        &agroup);
                                if (ret)
                                        return ret;
                        } else
-                               sysfs_unmerge_group(&fs_info->super_kobj,
+                               sysfs_unmerge_group(&fs_info->fs_devices->super_kobj,
                                                    &agroup);
                }
 
@@ -506,25 +515,49 @@ static int addrm_unknown_feature_attrs(struct btrfs_fs_info *fs_info, bool add)
        return 0;
 }
 
-static void __btrfs_sysfs_remove_one(struct btrfs_fs_info *fs_info)
+static void __btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs)
+{
+       if (fs_devs->device_dir_kobj) {
+               kobject_del(fs_devs->device_dir_kobj);
+               kobject_put(fs_devs->device_dir_kobj);
+               fs_devs->device_dir_kobj = NULL;
+       }
+
+       if (fs_devs->super_kobj.state_initialized) {
+               kobject_del(&fs_devs->super_kobj);
+               kobject_put(&fs_devs->super_kobj);
+               wait_for_completion(&fs_devs->kobj_unregister);
+       }
+}
+
+/* when fs_devs is NULL it will remove all fsid kobject */
+void btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs)
 {
-       kobject_del(&fs_info->super_kobj);
-       kobject_put(&fs_info->super_kobj);
-       wait_for_completion(&fs_info->kobj_unregister);
+       struct list_head *fs_uuids = btrfs_get_fs_uuids();
+
+       if (fs_devs) {
+               __btrfs_sysfs_remove_fsid(fs_devs);
+               return;
+       }
+
+       list_for_each_entry(fs_devs, fs_uuids, list) {
+               __btrfs_sysfs_remove_fsid(fs_devs);
+       }
 }
 
 void btrfs_sysfs_remove_one(struct btrfs_fs_info *fs_info)
 {
+       btrfs_reset_fs_info_ptr(fs_info);
+
        if (fs_info->space_info_kobj) {
                sysfs_remove_files(fs_info->space_info_kobj, allocation_attrs);
                kobject_del(fs_info->space_info_kobj);
                kobject_put(fs_info->space_info_kobj);
        }
-       kobject_del(fs_info->device_dir_kobj);
-       kobject_put(fs_info->device_dir_kobj);
        addrm_unknown_feature_attrs(fs_info, false);
-       sysfs_remove_group(&fs_info->super_kobj, &btrfs_feature_attr_group);
-       __btrfs_sysfs_remove_one(fs_info);
+       sysfs_remove_group(&fs_info->fs_devices->super_kobj, &btrfs_feature_attr_group);
+       sysfs_remove_files(&fs_info->fs_devices->super_kobj, btrfs_attrs);
+       btrfs_kobj_rm_device(fs_info->fs_devices, NULL);
 }
 
 const char * const btrfs_feature_set_names[3] = {
@@ -602,40 +635,60 @@ static void init_feature_attrs(void)
        }
 }
 
-int btrfs_kobj_rm_device(struct btrfs_fs_info *fs_info,
+/* when one_device is NULL, it removes all device links */
+
+int btrfs_kobj_rm_device(struct btrfs_fs_devices *fs_devices,
                struct btrfs_device *one_device)
 {
        struct hd_struct *disk;
        struct kobject *disk_kobj;
 
-       if (!fs_info->device_dir_kobj)
+       if (!fs_devices->device_dir_kobj)
                return -EINVAL;
 
        if (one_device && one_device->bdev) {
                disk = one_device->bdev->bd_part;
                disk_kobj = &part_to_dev(disk)->kobj;
 
-               sysfs_remove_link(fs_info->device_dir_kobj,
+               sysfs_remove_link(fs_devices->device_dir_kobj,
+                                               disk_kobj->name);
+       }
+
+       if (one_device)
+               return 0;
+
+       list_for_each_entry(one_device,
+                       &fs_devices->devices, dev_list) {
+               if (!one_device->bdev)
+                       continue;
+               disk = one_device->bdev->bd_part;
+               disk_kobj = &part_to_dev(disk)->kobj;
+
+               sysfs_remove_link(fs_devices->device_dir_kobj,
                                                disk_kobj->name);
        }
 
        return 0;
 }
 
-int btrfs_kobj_add_device(struct btrfs_fs_info *fs_info,
-               struct btrfs_device *one_device)
+int btrfs_sysfs_add_device(struct btrfs_fs_devices *fs_devs)
 {
-       int error = 0;
-       struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
-       struct btrfs_device *dev;
-
-       if (!fs_info->device_dir_kobj)
-               fs_info->device_dir_kobj = kobject_create_and_add("devices",
-                                               &fs_info->super_kobj);
+       if (!fs_devs->device_dir_kobj)
+               fs_devs->device_dir_kobj = kobject_create_and_add("devices",
+                                               &fs_devs->super_kobj);
 
-       if (!fs_info->device_dir_kobj)
+       if (!fs_devs->device_dir_kobj)
                return -ENOMEM;
 
+       return 0;
+}
+
+int btrfs_kobj_add_device(struct btrfs_fs_devices *fs_devices,
+                               struct btrfs_device *one_device)
+{
+       int error = 0;
+       struct btrfs_device *dev;
+
        list_for_each_entry(dev, &fs_devices->devices, dev_list) {
                struct hd_struct *disk;
                struct kobject *disk_kobj;
@@ -649,7 +702,7 @@ int btrfs_kobj_add_device(struct btrfs_fs_info *fs_info,
                disk = dev->bdev->bd_part;
                disk_kobj = &part_to_dev(disk)->kobj;
 
-               error = sysfs_create_link(fs_info->device_dir_kobj,
+               error = sysfs_create_link(fs_devices->device_dir_kobj,
                                          disk_kobj, disk_kobj->name);
                if (error)
                        break;
@@ -667,34 +720,51 @@ static struct dentry *btrfs_debugfs_root_dentry;
 /* Debugging tunables and exported data */
 u64 btrfs_debugfs_test;
 
+/*
+ * Can be called by the device discovery thread.
+ * And parent can be specified for seed device
+ */
+int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs,
+                               struct kobject *parent)
+{
+       int error;
+
+       init_completion(&fs_devs->kobj_unregister);
+       fs_devs->super_kobj.kset = btrfs_kset;
+       error = kobject_init_and_add(&fs_devs->super_kobj,
+                               &btrfs_ktype, parent, "%pU", fs_devs->fsid);
+       return error;
+}
+
 int btrfs_sysfs_add_one(struct btrfs_fs_info *fs_info)
 {
        int error;
+       struct btrfs_fs_devices *fs_devs = fs_info->fs_devices;
+       struct kobject *super_kobj = &fs_devs->super_kobj;
+
+       btrfs_set_fs_info_ptr(fs_info);
 
-       init_completion(&fs_info->kobj_unregister);
-       fs_info->super_kobj.kset = btrfs_kset;
-       error = kobject_init_and_add(&fs_info->super_kobj, &btrfs_ktype, NULL,
-                                    "%pU", fs_info->fsid);
+       error = btrfs_kobj_add_device(fs_devs, NULL);
        if (error)
                return error;
 
-       error = sysfs_create_group(&fs_info->super_kobj,
-                                  &btrfs_feature_attr_group);
+       error = sysfs_create_files(super_kobj, btrfs_attrs);
        if (error) {
-               __btrfs_sysfs_remove_one(fs_info);
+               btrfs_kobj_rm_device(fs_devs, NULL);
                return error;
        }
 
-       error = addrm_unknown_feature_attrs(fs_info, true);
+       error = sysfs_create_group(super_kobj,
+                                  &btrfs_feature_attr_group);
        if (error)
                goto failure;
 
-       error = btrfs_kobj_add_device(fs_info, NULL);
+       error = addrm_unknown_feature_attrs(fs_info, true);
        if (error)
                goto failure;
 
        fs_info->space_info_kobj = kobject_create_and_add("allocation",
-                                                 &fs_info->super_kobj);
+                                                 super_kobj);
        if (!fs_info->space_info_kobj) {
                error = -ENOMEM;
                goto failure;
index 3a4bbed723fde53e05228ace7dea9e49165f246a..6392527bcc15d5e56b66d39a3faac72fba730e2f 100644 (file)
@@ -82,8 +82,12 @@ char *btrfs_printable_features(enum btrfs_feature_set set, u64 flags);
 extern const char * const btrfs_feature_set_names[3];
 extern struct kobj_type space_info_ktype;
 extern struct kobj_type btrfs_raid_ktype;
-int btrfs_kobj_add_device(struct btrfs_fs_info *fs_info,
+int btrfs_kobj_add_device(struct btrfs_fs_devices *fs_devices,
                struct btrfs_device *one_device);
-int btrfs_kobj_rm_device(struct btrfs_fs_info *fs_info,
+int btrfs_kobj_rm_device(struct btrfs_fs_devices *fs_devices,
                 struct btrfs_device *one_device);
+int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs,
+                               struct kobject *parent);
+int btrfs_sysfs_add_device(struct btrfs_fs_devices *fs_devs);
+void btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs);
 #endif /* _BTRFS_SYSFS_H_ */
index c32a7ba76bcaed1262dffff1c2b8a6a8db63ce29..846d277b190137065a6f49192c9035486aab2241 100644 (file)
@@ -21,6 +21,7 @@
 #include "../transaction.h"
 #include "../disk-io.h"
 #include "../qgroup.h"
+#include "../backref.h"
 
 static void init_dummy_trans(struct btrfs_trans_handle *trans)
 {
@@ -227,6 +228,8 @@ static int test_no_shared_qgroup(struct btrfs_root *root)
 {
        struct btrfs_trans_handle trans;
        struct btrfs_fs_info *fs_info = root->fs_info;
+       struct ulist *old_roots = NULL;
+       struct ulist *new_roots = NULL;
        int ret;
 
        init_dummy_trans(&trans);
@@ -238,10 +241,15 @@ static int test_no_shared_qgroup(struct btrfs_root *root)
                return ret;
        }
 
-       ret = btrfs_qgroup_record_ref(&trans, fs_info, 5, 4096, 4096,
-                                     BTRFS_QGROUP_OPER_ADD_EXCL, 0);
+       /*
+        * Since the test trans doesn't havee the complicated delayed refs,
+        * we can only call btrfs_qgroup_account_extent() directly to test
+        * quota.
+        */
+       ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots);
        if (ret) {
-               test_msg("Couldn't add space to a qgroup %d\n", ret);
+               ulist_free(old_roots);
+               test_msg("Couldn't find old roots: %d\n", ret);
                return ret;
        }
 
@@ -249,9 +257,18 @@ static int test_no_shared_qgroup(struct btrfs_root *root)
        if (ret)
                return ret;
 
-       ret = btrfs_delayed_qgroup_accounting(&trans, fs_info);
+       ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots);
+       if (ret) {
+               ulist_free(old_roots);
+               ulist_free(new_roots);
+               test_msg("Couldn't find old roots: %d\n", ret);
+               return ret;
+       }
+
+       ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096,
+                                         old_roots, new_roots);
        if (ret) {
-               test_msg("Delayed qgroup accounting failed %d\n", ret);
+               test_msg("Couldn't account space for a qgroup %d\n", ret);
                return ret;
        }
 
@@ -259,21 +276,32 @@ static int test_no_shared_qgroup(struct btrfs_root *root)
                test_msg("Qgroup counts didn't match expected values\n");
                return -EINVAL;
        }
+       old_roots = NULL;
+       new_roots = NULL;
+
+       ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots);
+       if (ret) {
+               ulist_free(old_roots);
+               test_msg("Couldn't find old roots: %d\n", ret);
+               return ret;
+       }
 
        ret = remove_extent_item(root, 4096, 4096);
        if (ret)
                return -EINVAL;
 
-       ret = btrfs_qgroup_record_ref(&trans, fs_info, 5, 4096, 4096,
-                                     BTRFS_QGROUP_OPER_SUB_EXCL, 0);
+       ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots);
        if (ret) {
-               test_msg("Couldn't remove space from the qgroup %d\n", ret);
-               return -EINVAL;
+               ulist_free(old_roots);
+               ulist_free(new_roots);
+               test_msg("Couldn't find old roots: %d\n", ret);
+               return ret;
        }
 
-       ret = btrfs_delayed_qgroup_accounting(&trans, fs_info);
+       ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096,
+                                         old_roots, new_roots);
        if (ret) {
-               test_msg("Qgroup accounting failed %d\n", ret);
+               test_msg("Couldn't account space for a qgroup %d\n", ret);
                return -EINVAL;
        }
 
@@ -294,6 +322,8 @@ static int test_multiple_refs(struct btrfs_root *root)
 {
        struct btrfs_trans_handle trans;
        struct btrfs_fs_info *fs_info = root->fs_info;
+       struct ulist *old_roots = NULL;
+       struct ulist *new_roots = NULL;
        int ret;
 
        init_dummy_trans(&trans);
@@ -307,20 +337,29 @@ static int test_multiple_refs(struct btrfs_root *root)
                return ret;
        }
 
+       ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots);
+       if (ret) {
+               ulist_free(old_roots);
+               test_msg("Couldn't find old roots: %d\n", ret);
+               return ret;
+       }
+
        ret = insert_normal_tree_ref(root, 4096, 4096, 0, 5);
        if (ret)
                return ret;
 
-       ret = btrfs_qgroup_record_ref(&trans, fs_info, 5, 4096, 4096,
-                                     BTRFS_QGROUP_OPER_ADD_EXCL, 0);
+       ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots);
        if (ret) {
-               test_msg("Couldn't add space to a qgroup %d\n", ret);
+               ulist_free(old_roots);
+               ulist_free(new_roots);
+               test_msg("Couldn't find old roots: %d\n", ret);
                return ret;
        }
 
-       ret = btrfs_delayed_qgroup_accounting(&trans, fs_info);
+       ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096,
+                                         old_roots, new_roots);
        if (ret) {
-               test_msg("Delayed qgroup accounting failed %d\n", ret);
+               test_msg("Couldn't account space for a qgroup %d\n", ret);
                return ret;
        }
 
@@ -329,20 +368,29 @@ static int test_multiple_refs(struct btrfs_root *root)
                return -EINVAL;
        }
 
+       ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots);
+       if (ret) {
+               ulist_free(old_roots);
+               test_msg("Couldn't find old roots: %d\n", ret);
+               return ret;
+       }
+
        ret = add_tree_ref(root, 4096, 4096, 0, 256);
        if (ret)
                return ret;
 
-       ret = btrfs_qgroup_record_ref(&trans, fs_info, 256, 4096, 4096,
-                                     BTRFS_QGROUP_OPER_ADD_SHARED, 0);
+       ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots);
        if (ret) {
-               test_msg("Qgroup record ref failed %d\n", ret);
+               ulist_free(old_roots);
+               ulist_free(new_roots);
+               test_msg("Couldn't find old roots: %d\n", ret);
                return ret;
        }
 
-       ret = btrfs_delayed_qgroup_accounting(&trans, fs_info);
+       ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096,
+                                         old_roots, new_roots);
        if (ret) {
-               test_msg("Qgroup accounting failed %d\n", ret);
+               test_msg("Couldn't account space for a qgroup %d\n", ret);
                return ret;
        }
 
@@ -356,20 +404,29 @@ static int test_multiple_refs(struct btrfs_root *root)
                return -EINVAL;
        }
 
+       ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots);
+       if (ret) {
+               ulist_free(old_roots);
+               test_msg("Couldn't find old roots: %d\n", ret);
+               return ret;
+       }
+
        ret = remove_extent_ref(root, 4096, 4096, 0, 256);
        if (ret)
                return ret;
 
-       ret = btrfs_qgroup_record_ref(&trans, fs_info, 256, 4096, 4096,
-                                     BTRFS_QGROUP_OPER_SUB_SHARED, 0);
+       ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots);
        if (ret) {
-               test_msg("Qgroup record ref failed %d\n", ret);
+               ulist_free(old_roots);
+               ulist_free(new_roots);
+               test_msg("Couldn't find old roots: %d\n", ret);
                return ret;
        }
 
-       ret = btrfs_delayed_qgroup_accounting(&trans, fs_info);
+       ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096,
+                                         old_roots, new_roots);
        if (ret) {
-               test_msg("Qgroup accounting failed %d\n", ret);
+               test_msg("Couldn't account space for a qgroup %d\n", ret);
                return ret;
        }
 
index 5628e25250c0d2cca318f03d41f4e6d64b4d40e0..c0f18e7266b673c22357ebb033e40a4ec6e28b60 100644 (file)
@@ -225,12 +225,14 @@ loop:
        cur_trans->dirty_bg_run = 0;
 
        cur_trans->delayed_refs.href_root = RB_ROOT;
+       cur_trans->delayed_refs.dirty_extent_root = RB_ROOT;
        atomic_set(&cur_trans->delayed_refs.num_entries, 0);
        cur_trans->delayed_refs.num_heads_ready = 0;
        cur_trans->delayed_refs.pending_csums = 0;
        cur_trans->delayed_refs.num_heads = 0;
        cur_trans->delayed_refs.flushing = 0;
        cur_trans->delayed_refs.run_delayed_start = 0;
+       cur_trans->delayed_refs.qgroup_to_skip = 0;
 
        /*
         * although the tree mod log is per file system and not per transaction,
@@ -509,6 +511,7 @@ again:
        h->transaction = cur_trans;
        h->blocks_used = 0;
        h->bytes_reserved = 0;
+       h->chunk_bytes_reserved = 0;
        h->root = root;
        h->delayed_ref_updates = 0;
        h->use_count = 1;
@@ -792,6 +795,8 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
        if (!list_empty(&trans->new_bgs))
                btrfs_create_pending_block_groups(trans, root);
 
+       btrfs_trans_release_chunk_metadata(trans);
+
        if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) &&
            should_end_transaction(trans, root) &&
            ACCESS_ONCE(cur_trans->state) == TRANS_STATE_RUNNING) {
@@ -1290,6 +1295,12 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
        if (pending->error)
                goto no_free_objectid;
 
+       /*
+        * Make qgroup to skip current new snapshot's qgroupid, as it is
+        * accounted by later btrfs_qgroup_inherit().
+        */
+       btrfs_set_skip_qgroup(trans, objectid);
+
        btrfs_reloc_pre_snapshot(trans, pending, &to_reserve);
 
        if (to_reserve > 0) {
@@ -1298,7 +1309,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
                                                     to_reserve,
                                                     BTRFS_RESERVE_NO_FLUSH);
                if (pending->error)
-                       goto no_free_objectid;
+                       goto clear_skip_qgroup;
        }
 
        key.objectid = objectid;
@@ -1396,25 +1407,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
                btrfs_abort_transaction(trans, root, ret);
                goto fail;
        }
-
-       /*
-        * We need to flush delayed refs in order to make sure all of our quota
-        * operations have been done before we call btrfs_qgroup_inherit.
-        */
-       ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
-       if (ret) {
-               btrfs_abort_transaction(trans, root, ret);
-               goto fail;
-       }
-
-       ret = btrfs_qgroup_inherit(trans, fs_info,
-                                  root->root_key.objectid,
-                                  objectid, pending->inherit);
-       if (ret) {
-               btrfs_abort_transaction(trans, root, ret);
-               goto fail;
-       }
-
        /* see comments in should_cow_block() */
        set_bit(BTRFS_ROOT_FORCE_COW, &root->state);
        smp_wmb();
@@ -1497,11 +1489,37 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
                        goto fail;
                }
        }
+
+       ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
+       if (ret) {
+               btrfs_abort_transaction(trans, root, ret);
+               goto fail;
+       }
+
+       /*
+        * account qgroup counters before qgroup_inherit()
+        */
+       ret = btrfs_qgroup_prepare_account_extents(trans, fs_info);
+       if (ret)
+               goto fail;
+       ret = btrfs_qgroup_account_extents(trans, fs_info);
+       if (ret)
+               goto fail;
+       ret = btrfs_qgroup_inherit(trans, fs_info,
+                                  root->root_key.objectid,
+                                  objectid, pending->inherit);
+       if (ret) {
+               btrfs_abort_transaction(trans, root, ret);
+               goto fail;
+       }
+
 fail:
        pending->error = ret;
 dir_item_existed:
        trans->block_rsv = rsv;
        trans->bytes_reserved = 0;
+clear_skip_qgroup:
+       btrfs_clear_skip_qgroup(trans);
 no_free_objectid:
        kfree(new_root_item);
 root_item_alloc_fail:
@@ -1963,6 +1981,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                goto scrub_continue;
        }
 
+       /* Reocrd old roots for later qgroup accounting */
+       ret = btrfs_qgroup_prepare_account_extents(trans, root->fs_info);
+       if (ret) {
+               mutex_unlock(&root->fs_info->reloc_mutex);
+               goto scrub_continue;
+       }
+
        /*
         * make sure none of the code above managed to slip in a
         * delayed item
@@ -2004,6 +2029,17 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
         */
        btrfs_free_log_root_tree(trans, root->fs_info);
 
+       /*
+        * Since fs roots are all committed, we can get a quite accurate
+        * new_roots. So let's do quota accounting.
+        */
+       ret = btrfs_qgroup_account_extents(trans, root->fs_info);
+       if (ret < 0) {
+               mutex_unlock(&root->fs_info->tree_log_mutex);
+               mutex_unlock(&root->fs_info->reloc_mutex);
+               goto scrub_continue;
+       }
+
        ret = commit_cowonly_roots(trans, root);
        if (ret) {
                mutex_unlock(&root->fs_info->tree_log_mutex);
@@ -2054,6 +2090,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
        clear_bit(BTRFS_INODE_BTREE_LOG1_ERR, &btree_ino->runtime_flags);
        clear_bit(BTRFS_INODE_BTREE_LOG2_ERR, &btree_ino->runtime_flags);
 
+       btrfs_trans_release_chunk_metadata(trans);
+
        spin_lock(&root->fs_info->trans_lock);
        cur_trans->state = TRANS_STATE_UNBLOCKED;
        root->fs_info->running_transaction = NULL;
@@ -2123,6 +2161,7 @@ scrub_continue:
        btrfs_scrub_continue(root);
 cleanup_transaction:
        btrfs_trans_release_metadata(trans, root);
+       btrfs_trans_release_chunk_metadata(trans);
        trans->block_rsv = NULL;
        if (trans->qgroup_reserved) {
                btrfs_qgroup_free(root, trans->qgroup_reserved);
index 0b24755596bab2fe4c5ba0c60511db33aff51254..eb09c2067fa8d3e9f9e11680fbcbc152130354bc 100644 (file)
@@ -102,6 +102,7 @@ struct btrfs_transaction {
 struct btrfs_trans_handle {
        u64 transid;
        u64 bytes_reserved;
+       u64 chunk_bytes_reserved;
        u64 qgroup_reserved;
        unsigned long use_count;
        unsigned long blocks_reserved;
@@ -153,6 +154,29 @@ static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans,
        spin_unlock(&BTRFS_I(inode)->lock);
 }
 
+/*
+ * Make qgroup codes to skip given qgroupid, means the old/new_roots for
+ * qgroup won't contain the qgroupid in it.
+ */
+static inline void btrfs_set_skip_qgroup(struct btrfs_trans_handle *trans,
+                                        u64 qgroupid)
+{
+       struct btrfs_delayed_ref_root *delayed_refs;
+
+       delayed_refs = &trans->transaction->delayed_refs;
+       WARN_ON(delayed_refs->qgroup_to_skip);
+       delayed_refs->qgroup_to_skip = qgroupid;
+}
+
+static inline void btrfs_clear_skip_qgroup(struct btrfs_trans_handle *trans)
+{
+       struct btrfs_delayed_ref_root *delayed_refs;
+
+       delayed_refs = &trans->transaction->delayed_refs;
+       WARN_ON(!delayed_refs->qgroup_to_skip);
+       delayed_refs->qgroup_to_skip = 0;
+}
+
 int btrfs_end_transaction(struct btrfs_trans_handle *trans,
                          struct btrfs_root *root);
 struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
index a63719cc95787e2c6c2c8e55c4d674de3f58e19b..a4b9c8b2d35ab9d93676588ad426726788f032ce 100644 (file)
@@ -52,9 +52,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
        if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
                goto out;
 
-       if (btrfs_test_opt(root, SSD))
-               goto out;
-
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
index d04968374e9d8bd5b118024d53ea0a5bfa6d0bd2..1ce80c1c4eb6c1a414cfe88009c5c1c0e5c14132 100644 (file)
@@ -3881,12 +3881,6 @@ static int wait_ordered_extents(struct btrfs_trans_handle *trans,
                                     &ordered->flags))
                        continue;
 
-               if (ordered->csum_bytes_left) {
-                       btrfs_start_ordered_extent(inode, ordered, 0);
-                       wait_event(ordered->wait,
-                                  ordered->csum_bytes_left == 0);
-               }
-
                list_for_each_entry(sum, &ordered->list, list) {
                        ret = btrfs_csum_file_blocks(trans, log, sum);
                        if (ret)
index 840a38b2778a6e97ee013f0d2333e179c3c475e3..91feb2bdefeeb1f8849a3ae6e88fac5ec0e9701c 100644 (file)
@@ -132,6 +132,15 @@ static struct ulist_node *ulist_rbtree_search(struct ulist *ulist, u64 val)
        return NULL;
 }
 
+static void ulist_rbtree_erase(struct ulist *ulist, struct ulist_node *node)
+{
+       rb_erase(&node->rb_node, &ulist->root);
+       list_del(&node->list);
+       kfree(node);
+       BUG_ON(ulist->nnodes == 0);
+       ulist->nnodes--;
+}
+
 static int ulist_rbtree_insert(struct ulist *ulist, struct ulist_node *ins)
 {
        struct rb_node **p = &ulist->root.rb_node;
@@ -197,9 +206,6 @@ int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux,
 
        node->val = val;
        node->aux = aux;
-#ifdef CONFIG_BTRFS_DEBUG
-       node->seqnum = ulist->nnodes;
-#endif
 
        ret = ulist_rbtree_insert(ulist, node);
        ASSERT(!ret);
@@ -209,6 +215,33 @@ int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux,
        return 1;
 }
 
+/*
+ * ulist_del - delete one node from ulist
+ * @ulist:     ulist to remove node from
+ * @val:       value to delete
+ * @aux:       aux to delete
+ *
+ * The deletion will only be done when *BOTH* val and aux matches.
+ * Return 0 for successful delete.
+ * Return > 0 for not found.
+ */
+int ulist_del(struct ulist *ulist, u64 val, u64 aux)
+{
+       struct ulist_node *node;
+
+       node = ulist_rbtree_search(ulist, val);
+       /* Not found */
+       if (!node)
+               return 1;
+
+       if (node->aux != aux)
+               return 1;
+
+       /* Found and delete */
+       ulist_rbtree_erase(ulist, node);
+       return 0;
+}
+
 /**
  * ulist_next - iterate ulist
  * @ulist:     ulist to iterate
@@ -237,15 +270,7 @@ struct ulist_node *ulist_next(struct ulist *ulist, struct ulist_iterator *uiter)
                uiter->cur_list = uiter->cur_list->next;
        } else {
                uiter->cur_list = ulist->nodes.next;
-#ifdef CONFIG_BTRFS_DEBUG
-               uiter->i = 0;
-#endif
        }
        node = list_entry(uiter->cur_list, struct ulist_node, list);
-#ifdef CONFIG_BTRFS_DEBUG
-       ASSERT(node->seqnum == uiter->i);
-       ASSERT(uiter->i >= 0 && uiter->i < ulist->nnodes);
-       uiter->i++;
-#endif
        return node;
 }
index 4c29db604bbe135fb1bdb3743c7eee408499d25c..a01a2c45825f5c8cfa569218850acd91c3cf3e1c 100644 (file)
@@ -57,6 +57,7 @@ void ulist_free(struct ulist *ulist);
 int ulist_add(struct ulist *ulist, u64 val, u64 aux, gfp_t gfp_mask);
 int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux,
                    u64 *old_aux, gfp_t gfp_mask);
+int ulist_del(struct ulist *ulist, u64 val, u64 aux);
 
 /* just like ulist_add_merge() but take a pointer for the aux data */
 static inline int ulist_add_merge_ptr(struct ulist *ulist, u64 val, void *aux,
index 53af23f2c087ad015e720af094fbfc53e1671317..4b438b4c8c9195b3aaf11519339719efbef7b21b 100644 (file)
@@ -52,6 +52,10 @@ static void btrfs_dev_stat_print_on_load(struct btrfs_device *device);
 
 DEFINE_MUTEX(uuid_mutex);
 static LIST_HEAD(fs_uuids);
+struct list_head *btrfs_get_fs_uuids(void)
+{
+       return &fs_uuids;
+}
 
 static struct btrfs_fs_devices *__alloc_fs_devices(void)
 {
@@ -441,6 +445,61 @@ static void pending_bios_fn(struct btrfs_work *work)
        run_scheduled_bios(device);
 }
 
+
+void btrfs_free_stale_device(struct btrfs_device *cur_dev)
+{
+       struct btrfs_fs_devices *fs_devs;
+       struct btrfs_device *dev;
+
+       if (!cur_dev->name)
+               return;
+
+       list_for_each_entry(fs_devs, &fs_uuids, list) {
+               int del = 1;
+
+               if (fs_devs->opened)
+                       continue;
+               if (fs_devs->seeding)
+                       continue;
+
+               list_for_each_entry(dev, &fs_devs->devices, dev_list) {
+
+                       if (dev == cur_dev)
+                               continue;
+                       if (!dev->name)
+                               continue;
+
+                       /*
+                        * Todo: This won't be enough. What if the same device
+                        * comes back (with new uuid and) with its mapper path?
+                        * But for now, this does help as mostly an admin will
+                        * either use mapper or non mapper path throughout.
+                        */
+                       rcu_read_lock();
+                       del = strcmp(rcu_str_deref(dev->name),
+                                               rcu_str_deref(cur_dev->name));
+                       rcu_read_unlock();
+                       if (!del)
+                               break;
+               }
+
+               if (!del) {
+                       /* delete the stale device */
+                       if (fs_devs->num_devices == 1) {
+                               btrfs_sysfs_remove_fsid(fs_devs);
+                               list_del(&fs_devs->list);
+                               free_fs_devices(fs_devs);
+                       } else {
+                               fs_devs->num_devices--;
+                               list_del(&dev->dev_list);
+                               rcu_string_free(dev->name);
+                               kfree(dev);
+                       }
+                       break;
+               }
+       }
+}
+
 /*
  * Add new device to list of registered devices
  *
@@ -556,6 +615,12 @@ static noinline int device_list_add(const char *path,
        if (!fs_devices->opened)
                device->generation = found_transid;
 
+       /*
+        * if there is new btrfs on an already registered device,
+        * then remove the stale device entry.
+        */
+       btrfs_free_stale_device(device);
+
        *fs_devices_ret = fs_devices;
 
        return ret;
@@ -693,13 +758,13 @@ static void free_device(struct rcu_head *head)
 
 static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
 {
-       struct btrfs_device *device;
+       struct btrfs_device *device, *tmp;
 
        if (--fs_devices->opened > 0)
                return 0;
 
        mutex_lock(&fs_devices->device_list_mutex);
-       list_for_each_entry(device, &fs_devices->devices, dev_list) {
+       list_for_each_entry_safe(device, tmp, &fs_devices->devices, dev_list) {
                struct btrfs_device *new_device;
                struct rcu_string *name;
 
@@ -1067,15 +1132,31 @@ again:
 
                map = (struct map_lookup *)em->bdev;
                for (i = 0; i < map->num_stripes; i++) {
+                       u64 end;
+
                        if (map->stripes[i].dev != device)
                                continue;
                        if (map->stripes[i].physical >= physical_start + len ||
                            map->stripes[i].physical + em->orig_block_len <=
                            physical_start)
                                continue;
-                       *start = map->stripes[i].physical +
-                               em->orig_block_len;
-                       ret = 1;
+                       /*
+                        * Make sure that while processing the pinned list we do
+                        * not override our *start with a lower value, because
+                        * we can have pinned chunks that fall within this
+                        * device hole and that have lower physical addresses
+                        * than the pending chunks we processed before. If we
+                        * do not take this special care we can end up getting
+                        * 2 pending chunks that start at the same physical
+                        * device offsets because the end offset of a pinned
+                        * chunk can be equal to the start offset of some
+                        * pending chunk.
+                        */
+                       end = map->stripes[i].physical + em->orig_block_len;
+                       if (end > *start) {
+                               *start = end;
+                               ret = 1;
+                       }
                }
        }
        if (search_list == &trans->transaction->pending_chunks) {
@@ -1706,7 +1787,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
        if (device->bdev) {
                device->fs_devices->open_devices--;
                /* remove sysfs entry */
-               btrfs_kobj_rm_device(root->fs_info, device);
+               btrfs_kobj_rm_device(root->fs_info->fs_devices, device);
        }
 
        call_rcu(&device->rcu, free_device);
@@ -1875,6 +1956,9 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
        mutex_lock(&uuid_mutex);
        WARN_ON(!tgtdev);
        mutex_lock(&fs_info->fs_devices->device_list_mutex);
+
+       btrfs_kobj_rm_device(fs_info->fs_devices, tgtdev);
+
        if (tgtdev->bdev) {
                btrfs_scratch_superblock(tgtdev);
                fs_info->fs_devices->open_devices--;
@@ -2211,7 +2295,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
                                    tmp + 1);
 
        /* add sysfs device entry */
-       btrfs_kobj_add_device(root->fs_info, device);
+       btrfs_kobj_add_device(root->fs_info->fs_devices, device);
 
        /*
         * we've got more storage, clear any full flags on the space
@@ -2252,8 +2336,9 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
                 */
                snprintf(fsid_buf, BTRFS_UUID_UNPARSED_SIZE, "%pU",
                                                root->fs_info->fsid);
-               if (kobject_rename(&root->fs_info->super_kobj, fsid_buf))
-                       goto error_trans;
+               if (kobject_rename(&root->fs_info->fs_devices->super_kobj,
+                                                               fsid_buf))
+                       pr_warn("BTRFS: sysfs: failed to create fsid for sprout\n");
        }
 
        root->fs_info->num_tolerated_disk_barrier_failures =
@@ -2289,7 +2374,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
 error_trans:
        btrfs_end_transaction(trans, root);
        rcu_string_free(device->name);
-       btrfs_kobj_rm_device(root->fs_info, device);
+       btrfs_kobj_rm_device(root->fs_info->fs_devices, device);
        kfree(device);
 error:
        blkdev_put(bdev, FMODE_EXCL);
@@ -2609,6 +2694,9 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
                return -EINVAL;
        }
        map = (struct map_lookup *)em->bdev;
+       lock_chunks(root->fs_info->chunk_root);
+       check_system_chunk(trans, extent_root, map->type);
+       unlock_chunks(root->fs_info->chunk_root);
 
        for (i = 0; i < map->num_stripes; i++) {
                struct btrfs_device *device = map->stripes[i].dev;
@@ -3908,9 +3996,9 @@ int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info)
        uuid_root = btrfs_create_tree(trans, fs_info,
                                      BTRFS_UUID_TREE_OBJECTID);
        if (IS_ERR(uuid_root)) {
-               btrfs_abort_transaction(trans, tree_root,
-                                       PTR_ERR(uuid_root));
-               return PTR_ERR(uuid_root);
+               ret = PTR_ERR(uuid_root);
+               btrfs_abort_transaction(trans, tree_root, ret);
+               return ret;
        }
 
        fs_info->uuid_root = uuid_root;
@@ -3965,6 +4053,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
        int slot;
        int failed = 0;
        bool retried = false;
+       bool checked_pending_chunks = false;
        struct extent_buffer *l;
        struct btrfs_key key;
        struct btrfs_super_block *super_copy = root->fs_info->super_copy;
@@ -4045,15 +4134,6 @@ again:
                goto again;
        } else if (failed && retried) {
                ret = -ENOSPC;
-               lock_chunks(root);
-
-               btrfs_device_set_total_bytes(device, old_size);
-               if (device->writeable)
-                       device->fs_devices->total_rw_bytes += diff;
-               spin_lock(&root->fs_info->free_chunk_lock);
-               root->fs_info->free_chunk_space += diff;
-               spin_unlock(&root->fs_info->free_chunk_lock);
-               unlock_chunks(root);
                goto done;
        }
 
@@ -4065,6 +4145,35 @@ again:
        }
 
        lock_chunks(root);
+
+       /*
+        * We checked in the above loop all device extents that were already in
+        * the device tree. However before we have updated the device's
+        * total_bytes to the new size, we might have had chunk allocations that
+        * have not complete yet (new block groups attached to transaction
+        * handles), and therefore their device extents were not yet in the
+        * device tree and we missed them in the loop above. So if we have any
+        * pending chunk using a device extent that overlaps the device range
+        * that we can not use anymore, commit the current transaction and
+        * repeat the search on the device tree - this way we guarantee we will
+        * not have chunks using device extents that end beyond 'new_size'.
+        */
+       if (!checked_pending_chunks) {
+               u64 start = new_size;
+               u64 len = old_size - new_size;
+
+               if (contains_pending_extent(trans, device, &start, len)) {
+                       unlock_chunks(root);
+                       checked_pending_chunks = true;
+                       failed = 0;
+                       retried = false;
+                       ret = btrfs_commit_transaction(trans, root);
+                       if (ret)
+                               goto done;
+                       goto again;
+               }
+       }
+
        btrfs_device_set_disk_total_bytes(device, new_size);
        if (list_empty(&device->resized_list))
                list_add_tail(&device->resized_list,
@@ -4079,6 +4188,16 @@ again:
        btrfs_end_transaction(trans, root);
 done:
        btrfs_free_path(path);
+       if (ret) {
+               lock_chunks(root);
+               btrfs_device_set_total_bytes(device, old_size);
+               if (device->writeable)
+                       device->fs_devices->total_rw_bytes += diff;
+               spin_lock(&root->fs_info->free_chunk_lock);
+               root->fs_info->free_chunk_space += diff;
+               spin_unlock(&root->fs_info->free_chunk_lock);
+               unlock_chunks(root);
+       }
        return ret;
 }
 
@@ -6072,6 +6191,8 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
                                free_extent_map(em);
                                return -EIO;
                        }
+                       btrfs_warn(root->fs_info, "devid %llu uuid %pU is missing",
+                                               devid, uuid);
                }
                map->stripes[i].dev->in_fs_metadata = 1;
        }
@@ -6191,10 +6312,11 @@ static int read_one_dev(struct btrfs_root *root,
                if (!btrfs_test_opt(root, DEGRADED))
                        return -EIO;
 
-               btrfs_warn(root->fs_info, "devid %llu missing", devid);
                device = add_missing_dev(root, fs_devices, devid, dev_uuid);
                if (!device)
                        return -ENOMEM;
+               btrfs_warn(root->fs_info, "devid %llu uuid %pU missing",
+                               devid, dev_uuid);
        } else {
                if (!device->bdev && !btrfs_test_opt(root, DEGRADED))
                        return -EIO;
@@ -6722,3 +6844,21 @@ void btrfs_update_commit_device_bytes_used(struct btrfs_root *root,
        }
        unlock_chunks(root);
 }
+
+void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info)
+{
+       struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+       while (fs_devices) {
+               fs_devices->fs_info = fs_info;
+               fs_devices = fs_devices->seed;
+       }
+}
+
+void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info)
+{
+       struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+       while (fs_devices) {
+               fs_devices->fs_info = NULL;
+               fs_devices = fs_devices->seed;
+       }
+}
index cedae0356558d92abd20ee9daf72df3eadbbb7db..95842a909e7f7cbbb9be2a70e24376db7f08baf7 100644 (file)
@@ -253,6 +253,12 @@ struct btrfs_fs_devices {
         * nonrot flag set
         */
        int rotating;
+
+       struct btrfs_fs_info *fs_info;
+       /* sysfs kobjects */
+       struct kobject super_kobj;
+       struct kobject *device_dir_kobj;
+       struct completion kobj_unregister;
 };
 
 #define BTRFS_BIO_INLINE_CSUM_SIZE     64
@@ -535,5 +541,8 @@ static inline void unlock_chunks(struct btrfs_root *root)
        mutex_unlock(&root->fs_info->chunk_mutex);
 }
 
+struct list_head *btrfs_get_fs_uuids(void);
+void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info);
+void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info);
 
 #endif
index a2172f3f69e318915f092885da0b4d49755d33f1..e7b478b4998532231278ca1c07be4481c1951576 100644 (file)
@@ -192,6 +192,15 @@ config CIFS_SMB2
          options are also slightly simpler (compared to CIFS) due
          to protocol improvements.
 
+config CIFS_SMB311
+       bool "SMB3.1.1 network file system support (Experimental)"
+       depends on CIFS_SMB2 && INET
+
+       help
+         This enables experimental support for the newest, SMB3.1.1, dialect.
+         This dialect includes improved security negotiation features.
+         If unsure, say N
+
 config CIFS_FSCACHE
          bool "Provide CIFS client caching support"
          depends on CIFS=m && FSCACHE || CIFS=y && FSCACHE=y
index 22b289a3b1c4d3e12727cc0a005456fa9b295a00..b406a32deb1f6bf2f9e8d416bc019a71f589d8db 100644 (file)
@@ -171,6 +171,10 @@ enum smb_version {
        Smb_21,
        Smb_30,
        Smb_302,
+#ifdef CONFIG_CIFS_SMB311
+       Smb_311,
+#endif /* SMB311 */
+       Smb_version_err
 };
 
 struct mid_q_entry;
@@ -368,6 +372,8 @@ struct smb_version_operations {
        void (*new_lease_key)(struct cifs_fid *);
        int (*generate_signingkey)(struct cifs_ses *);
        int (*calc_signature)(struct smb_rqst *, struct TCP_Server_Info *);
+       int (*set_integrity)(const unsigned int, struct cifs_tcon *tcon,
+                            struct cifsFileInfo *src_file);
        int (*query_mf_symlink)(unsigned int, struct cifs_tcon *,
                                struct cifs_sb_info *, const unsigned char *,
                                char *, unsigned int *);
@@ -386,6 +392,9 @@ struct smb_version_operations {
        int (*clone_range)(const unsigned int, struct cifsFileInfo *src_file,
                        struct cifsFileInfo *target_file, u64 src_off, u64 len,
                        u64 dest_off);
+       int (*duplicate_extents)(const unsigned int, struct cifsFileInfo *src,
+                       struct cifsFileInfo *target_file, u64 src_off, u64 len,
+                       u64 dest_off);
        int (*validate_negotiate)(const unsigned int, struct cifs_tcon *);
        ssize_t (*query_all_EAs)(const unsigned int, struct cifs_tcon *,
                        const unsigned char *, const unsigned char *, char *,
@@ -1617,4 +1626,8 @@ extern struct smb_version_values smb30_values;
 #define SMB302_VERSION_STRING  "3.02"
 /*extern struct smb_version_operations smb302_operations;*/ /* not needed yet */
 extern struct smb_version_values smb302_values;
+#define SMB311_VERSION_STRING  "3.1.1"
+#define ALT_SMB311_VERSION_STRING "3.11"
+extern struct smb_version_operations smb311_operations;
+extern struct smb_version_values smb311_values;
 #endif /* _CIFS_GLOB_H */
index 5f9822ac0245dcd637838b3fda843b3cc4d09901..47b030da0781e988c7bcafefd462a5e8b45ff14c 100644 (file)
@@ -2255,6 +2255,8 @@ typedef struct {
 
 
 /* List of FileSystemAttributes - see 2.5.1 of MS-FSCC */
+#define FILE_SUPPORTS_SPARSE_VDL       0x10000000 /* faster nonsparse extend */
+#define FILE_SUPPORTS_BLOCK_REFCOUNTING        0x08000000 /* allow ioctl dup extents */
 #define FILE_SUPPORT_INTEGRITY_STREAMS 0x04000000
 #define FILE_SUPPORTS_USN_JOURNAL      0x02000000
 #define FILE_SUPPORTS_OPEN_BY_FILE_ID  0x01000000
@@ -2310,6 +2312,16 @@ typedef struct { /* data block encoding of response to level 263 QPathInfo */
        char FileName[1];
 } __attribute__((packed)) FILE_ALL_INFO;       /* level 0x107 QPathInfo */
 
+typedef struct {
+       __le64 AllocationSize;
+       __le64 EndOfFile;       /* size ie offset to first free byte in file */
+       __le32 NumberOfLinks;   /* hard links */
+       __u8 DeletePending;
+       __u8 Directory;
+       __u16 Pad;
+} __attribute__((packed)) FILE_STANDARD_INFO;  /* level 0x102 QPathInfo */
+
+
 /* defines for enumerating possible values of the Unix type field below */
 #define UNIX_FILE      0
 #define UNIX_DIR       1
index f26ffbfc64d8b4eca26b8e8101f705043fc7a4a0..672ef35c9f73c59d1f4b566bec2f16b4c74e4a8f 100644 (file)
@@ -625,9 +625,8 @@ CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses)
                server->negflavor = CIFS_NEGFLAVOR_UNENCAP;
                memcpy(ses->server->cryptkey, pSMBr->u.EncryptionKey,
                       CIFS_CRYPTO_KEY_SIZE);
-       } else if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC ||
-                       server->capabilities & CAP_EXTENDED_SECURITY) &&
-                               (pSMBr->EncryptionKeyLength == 0)) {
+       } else if (pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC ||
+                       server->capabilities & CAP_EXTENDED_SECURITY) {
                server->negflavor = CIFS_NEGFLAVOR_EXTENDED;
                rc = decode_ext_sec_blob(ses, pSMBr);
        } else if (server->sec_mode & SECMODE_PW_ENCRYPT) {
index 8383d5ea42028dac6788e642b6c3ed0f61459d51..773f4dc776305284f4df53e4d4e44741a74b467f 100644 (file)
@@ -280,6 +280,11 @@ static const match_table_t cifs_smb_version_tokens = {
        { Smb_21, SMB21_VERSION_STRING },
        { Smb_30, SMB30_VERSION_STRING },
        { Smb_302, SMB302_VERSION_STRING },
+#ifdef CONFIG_CIFS_SMB311
+       { Smb_311, SMB311_VERSION_STRING },
+       { Smb_311, ALT_SMB311_VERSION_STRING },
+#endif /* SMB311 */
+       { Smb_version_err, NULL }
 };
 
 static int ip_connect(struct TCP_Server_Info *server);
@@ -1133,6 +1138,12 @@ cifs_parse_smb_version(char *value, struct smb_vol *vol)
                vol->ops = &smb30_operations; /* currently identical with 3.0 */
                vol->vals = &smb302_values;
                break;
+#ifdef CONFIG_CIFS_SMB311
+       case Smb_311:
+               vol->ops = &smb311_operations;
+               vol->vals = &smb311_values;
+               break;
+#endif /* SMB311 */
 #endif
        default:
                cifs_dbg(VFS, "Unknown vers= option specified: %s\n", value);
@@ -3461,6 +3472,8 @@ try_mount_again:
                else if (ses)
                        cifs_put_smb_ses(ses);
 
+               cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_POSIX_PATHS;
+
                free_xid(xid);
        }
 #endif
index 8b7898b7670f88c3ea9ec596129ef569eed183bd..49b8b6e41a188b3a832c3e32c482ed7a99091aa1 100644 (file)
 #include "cifsproto.h"
 #include "cifs_debug.h"
 #include "cifsfs.h"
+#include <linux/btrfs.h>
 
 #define CIFS_IOCTL_MAGIC       0xCF
 #define CIFS_IOC_COPYCHUNK_FILE        _IOW(CIFS_IOCTL_MAGIC, 3, int)
+#define CIFS_IOC_SET_INTEGRITY  _IO(CIFS_IOCTL_MAGIC, 4)
 
 static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file,
-                       unsigned long srcfd, u64 off, u64 len, u64 destoff)
+                       unsigned long srcfd, u64 off, u64 len, u64 destoff,
+                       bool dup_extents)
 {
        int rc;
        struct cifsFileInfo *smb_file_target = dst_file->private_data;
@@ -109,9 +112,14 @@ static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file,
        truncate_inode_pages_range(&target_inode->i_data, destoff,
                                   PAGE_CACHE_ALIGN(destoff + len)-1);
 
-       if (target_tcon->ses->server->ops->clone_range)
+       if (dup_extents && target_tcon->ses->server->ops->duplicate_extents)
+               rc = target_tcon->ses->server->ops->duplicate_extents(xid,
+                       smb_file_src, smb_file_target, off, len, destoff);
+       else if (!dup_extents && target_tcon->ses->server->ops->clone_range)
                rc = target_tcon->ses->server->ops->clone_range(xid,
                        smb_file_src, smb_file_target, off, len, destoff);
+       else
+               rc = -EOPNOTSUPP;
 
        /* force revalidate of size and timestamps of target file now
           that target is updated on the server */
@@ -205,7 +213,20 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
                        }
                        break;
                case CIFS_IOC_COPYCHUNK_FILE:
-                       rc = cifs_ioctl_clone(xid, filep, arg, 0, 0, 0);
+                       rc = cifs_ioctl_clone(xid, filep, arg, 0, 0, 0, false);
+                       break;
+               case BTRFS_IOC_CLONE:
+                       rc = cifs_ioctl_clone(xid, filep, arg, 0, 0, 0, true);
+                       break;
+               case CIFS_IOC_SET_INTEGRITY:
+                       if (pSMBFile == NULL)
+                               break;
+                       tcon = tlink_tcon(pSMBFile->tlink);
+                       if (tcon->ses->server->ops->set_integrity)
+                               rc = tcon->ses->server->ops->set_integrity(xid,
+                                               tcon, pSMBFile);
+                       else
+                               rc = -EOPNOTSUPP;
                        break;
                default:
                        cifs_dbg(FYI, "unsupported ioctl\n");
index 54daee5ad4c102b71b3617317a58d15375f4e755..df91bcf56d67a35f0dd60d0361514f65cdc6705e 100644 (file)
@@ -806,6 +806,53 @@ smb2_set_file_size(const unsigned int xid, struct cifs_tcon *tcon,
                            cfile->fid.volatile_fid, cfile->pid, &eof, false);
 }
 
+#ifdef CONFIG_CIFS_SMB311
+static int
+smb2_duplicate_extents(const unsigned int xid,
+                       struct cifsFileInfo *srcfile,
+                       struct cifsFileInfo *trgtfile, u64 src_off,
+                       u64 len, u64 dest_off)
+{
+       int rc;
+       unsigned int ret_data_len;
+       char *retbuf = NULL;
+       struct duplicate_extents_to_file dup_ext_buf;
+       struct cifs_tcon *tcon = tlink_tcon(trgtfile->tlink);
+
+       /* server fileays advertise duplicate extent support with this flag */
+       if ((le32_to_cpu(tcon->fsAttrInfo.Attributes) &
+            FILE_SUPPORTS_BLOCK_REFCOUNTING) == 0)
+               return -EOPNOTSUPP;
+
+       dup_ext_buf.VolatileFileHandle = srcfile->fid.volatile_fid;
+       dup_ext_buf.PersistentFileHandle = srcfile->fid.persistent_fid;
+       dup_ext_buf.SourceFileOffset = cpu_to_le64(src_off);
+       dup_ext_buf.TargetFileOffset = cpu_to_le64(dest_off);
+       dup_ext_buf.ByteCount = cpu_to_le64(len);
+       cifs_dbg(FYI, "duplicate extents: src off %lld dst off %lld len %lld",
+               src_off, dest_off, len);
+
+       rc = smb2_set_file_size(xid, tcon, trgtfile, dest_off + len, false);
+       if (rc)
+               goto duplicate_extents_out;
+
+       rc = SMB2_ioctl(xid, tcon, trgtfile->fid.persistent_fid,
+                       trgtfile->fid.volatile_fid,
+                       FSCTL_DUPLICATE_EXTENTS_TO_FILE,
+                       true /* is_fsctl */, (char *)&dup_ext_buf,
+                       sizeof(struct duplicate_extents_to_file),
+                       (char **)&retbuf,
+                       &ret_data_len);
+
+       if (ret_data_len > 0)
+               cifs_dbg(FYI, "non-zero response length in duplicate extents");
+
+duplicate_extents_out:
+       return rc;
+}
+#endif /* CONFIG_CIFS_SMB311 */
+
+
 static int
 smb2_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
                   struct cifsFileInfo *cfile)
@@ -814,6 +861,28 @@ smb2_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
                            cfile->fid.volatile_fid);
 }
 
+static int
+smb3_set_integrity(const unsigned int xid, struct cifs_tcon *tcon,
+                  struct cifsFileInfo *cfile)
+{
+       struct fsctl_set_integrity_information_req integr_info;
+       char *retbuf = NULL;
+       unsigned int ret_data_len;
+
+       integr_info.ChecksumAlgorithm = cpu_to_le16(CHECKSUM_TYPE_UNCHANGED);
+       integr_info.Flags = 0;
+       integr_info.Reserved = 0;
+
+       return SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
+                       cfile->fid.volatile_fid,
+                       FSCTL_SET_INTEGRITY_INFORMATION,
+                       true /* is_fsctl */, (char *)&integr_info,
+                       sizeof(struct fsctl_set_integrity_information_req),
+                       (char **)&retbuf,
+                       &ret_data_len);
+
+}
+
 static int
 smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
                     const char *path, struct cifs_sb_info *cifs_sb,
@@ -1624,6 +1693,7 @@ struct smb_version_operations smb30_operations = {
        .new_lease_key = smb2_new_lease_key,
        .generate_signingkey = generate_smb3signingkey,
        .calc_signature = smb3_calc_signature,
+       .set_integrity  = smb3_set_integrity,
        .is_read_op = smb21_is_read_op,
        .set_oplock_level = smb3_set_oplock_level,
        .create_lease_buf = smb3_create_lease_buf,
@@ -1635,6 +1705,94 @@ struct smb_version_operations smb30_operations = {
        .fallocate = smb3_fallocate,
 };
 
+#ifdef CONFIG_CIFS_SMB311
+struct smb_version_operations smb311_operations = {
+       .compare_fids = smb2_compare_fids,
+       .setup_request = smb2_setup_request,
+       .setup_async_request = smb2_setup_async_request,
+       .check_receive = smb2_check_receive,
+       .add_credits = smb2_add_credits,
+       .set_credits = smb2_set_credits,
+       .get_credits_field = smb2_get_credits_field,
+       .get_credits = smb2_get_credits,
+       .wait_mtu_credits = smb2_wait_mtu_credits,
+       .get_next_mid = smb2_get_next_mid,
+       .read_data_offset = smb2_read_data_offset,
+       .read_data_length = smb2_read_data_length,
+       .map_error = map_smb2_to_linux_error,
+       .find_mid = smb2_find_mid,
+       .check_message = smb2_check_message,
+       .dump_detail = smb2_dump_detail,
+       .clear_stats = smb2_clear_stats,
+       .print_stats = smb2_print_stats,
+       .dump_share_caps = smb2_dump_share_caps,
+       .is_oplock_break = smb2_is_valid_oplock_break,
+       .downgrade_oplock = smb2_downgrade_oplock,
+       .need_neg = smb2_need_neg,
+       .negotiate = smb2_negotiate,
+       .negotiate_wsize = smb2_negotiate_wsize,
+       .negotiate_rsize = smb2_negotiate_rsize,
+       .sess_setup = SMB2_sess_setup,
+       .logoff = SMB2_logoff,
+       .tree_connect = SMB2_tcon,
+       .tree_disconnect = SMB2_tdis,
+       .qfs_tcon = smb3_qfs_tcon,
+       .is_path_accessible = smb2_is_path_accessible,
+       .can_echo = smb2_can_echo,
+       .echo = SMB2_echo,
+       .query_path_info = smb2_query_path_info,
+       .get_srv_inum = smb2_get_srv_inum,
+       .query_file_info = smb2_query_file_info,
+       .set_path_size = smb2_set_path_size,
+       .set_file_size = smb2_set_file_size,
+       .set_file_info = smb2_set_file_info,
+       .set_compression = smb2_set_compression,
+       .mkdir = smb2_mkdir,
+       .mkdir_setinfo = smb2_mkdir_setinfo,
+       .rmdir = smb2_rmdir,
+       .unlink = smb2_unlink,
+       .rename = smb2_rename_path,
+       .create_hardlink = smb2_create_hardlink,
+       .query_symlink = smb2_query_symlink,
+       .query_mf_symlink = smb3_query_mf_symlink,
+       .create_mf_symlink = smb3_create_mf_symlink,
+       .open = smb2_open_file,
+       .set_fid = smb2_set_fid,
+       .close = smb2_close_file,
+       .flush = smb2_flush_file,
+       .async_readv = smb2_async_readv,
+       .async_writev = smb2_async_writev,
+       .sync_read = smb2_sync_read,
+       .sync_write = smb2_sync_write,
+       .query_dir_first = smb2_query_dir_first,
+       .query_dir_next = smb2_query_dir_next,
+       .close_dir = smb2_close_dir,
+       .calc_smb_size = smb2_calc_size,
+       .is_status_pending = smb2_is_status_pending,
+       .oplock_response = smb2_oplock_response,
+       .queryfs = smb2_queryfs,
+       .mand_lock = smb2_mand_lock,
+       .mand_unlock_range = smb2_unlock_range,
+       .push_mand_locks = smb2_push_mandatory_locks,
+       .get_lease_key = smb2_get_lease_key,
+       .set_lease_key = smb2_set_lease_key,
+       .new_lease_key = smb2_new_lease_key,
+       .generate_signingkey = generate_smb3signingkey,
+       .calc_signature = smb3_calc_signature,
+       .set_integrity  = smb3_set_integrity,
+       .is_read_op = smb21_is_read_op,
+       .set_oplock_level = smb3_set_oplock_level,
+       .create_lease_buf = smb3_create_lease_buf,
+       .parse_lease_buf = smb3_parse_lease_buf,
+       .clone_range = smb2_clone_range,
+       .duplicate_extents = smb2_duplicate_extents,
+/*     .validate_negotiate = smb3_validate_negotiate, */ /* not used in 3.11 */
+       .wp_retry_size = smb2_wp_retry_size,
+       .dir_needs_close = smb2_dir_needs_close,
+       .fallocate = smb3_fallocate,
+};
+#endif /* CIFS_SMB311 */
+
 struct smb_version_values smb20_values = {
        .version_string = SMB20_VERSION_STRING,
        .protocol_id = SMB20_PROT_ID,
@@ -1714,3 +1872,25 @@ struct smb_version_values smb302_values = {
        .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED,
        .create_lease_size = sizeof(struct create_lease_v2),
 };
+
+#ifdef CONFIG_CIFS_SMB311
+struct smb_version_values smb311_values = {
+       .version_string = SMB311_VERSION_STRING,
+       .protocol_id = SMB311_PROT_ID,
+       .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU,
+       .large_lock_type = 0,
+       .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
+       .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
+       .unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
+       .header_size = sizeof(struct smb2_hdr),
+       .max_header_size = MAX_SMB2_HDR_SIZE,
+       .read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
+       .lock_cmd = SMB2_LOCK,
+       .cap_unix = 0,
+       .cap_nt_find = SMB2_NT_FIND,
+       .cap_large_files = SMB2_LARGE_FILES,
+       .signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED,
+       .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED,
+       .create_lease_size = sizeof(struct create_lease_v2),
+};
+#endif /* SMB311 */
index 54cbe19d9c0871a1bb47a17edfc1d414cb383b9f..b8b4f08ee094e2f8a2b811f076ca0eb4cc3e68b9 100644 (file)
@@ -304,6 +304,59 @@ small_smb2_init(__le16 smb2_command, struct cifs_tcon *tcon,
        return rc;
 }
 
+#ifdef CONFIG_CIFS_SMB311
+/* offset is sizeof smb2_negotiate_req - 4 but rounded up to 8 bytes */
+#define OFFSET_OF_NEG_CONTEXT 0x68  /* sizeof(struct smb2_negotiate_req) - 4 */
+
+
+#define SMB2_PREAUTH_INTEGRITY_CAPABILITIES    cpu_to_le16(1)
+#define SMB2_ENCRYPTION_CAPABILITIES           cpu_to_le16(2)
+
+static void
+build_preauth_ctxt(struct smb2_preauth_neg_context *pneg_ctxt)
+{
+       pneg_ctxt->ContextType = SMB2_PREAUTH_INTEGRITY_CAPABILITIES;
+       pneg_ctxt->DataLength = cpu_to_le16(38);
+       pneg_ctxt->HashAlgorithmCount = cpu_to_le16(1);
+       pneg_ctxt->SaltLength = cpu_to_le16(SMB311_SALT_SIZE);
+       get_random_bytes(pneg_ctxt->Salt, SMB311_SALT_SIZE);
+       pneg_ctxt->HashAlgorithms = SMB2_PREAUTH_INTEGRITY_SHA512;
+}
+
+static void
+build_encrypt_ctxt(struct smb2_encryption_neg_context *pneg_ctxt)
+{
+       pneg_ctxt->ContextType = SMB2_ENCRYPTION_CAPABILITIES;
+       pneg_ctxt->DataLength = cpu_to_le16(6);
+       pneg_ctxt->CipherCount = cpu_to_le16(2);
+       pneg_ctxt->Ciphers[0] = SMB2_ENCRYPTION_AES128_GCM;
+       pneg_ctxt->Ciphers[1] = SMB2_ENCRYPTION_AES128_CCM;
+}
+
+static void
+assemble_neg_contexts(struct smb2_negotiate_req *req)
+{
+
+       /* +4 is to account for the RFC1001 len field */
+       char *pneg_ctxt = (char *)req + OFFSET_OF_NEG_CONTEXT + 4;
+
+       build_preauth_ctxt((struct smb2_preauth_neg_context *)pneg_ctxt);
+       /* Add 2 to size to round to 8 byte boundary */
+       pneg_ctxt += 2 + sizeof(struct smb2_preauth_neg_context);
+       build_encrypt_ctxt((struct smb2_encryption_neg_context *)pneg_ctxt);
+       req->NegotiateContextOffset = cpu_to_le32(OFFSET_OF_NEG_CONTEXT);
+       req->NegotiateContextCount = cpu_to_le16(2);
+       inc_rfc1001_len(req, 4 + sizeof(struct smb2_preauth_neg_context) + 2
+                       + sizeof(struct smb2_encryption_neg_context)); /* calculate hash */
+}
+#else
+static void assemble_neg_contexts(struct smb2_negotiate_req *req)
+{
+       return;
+}
+#endif /* SMB311 */
+
+
 /*
  *
  *     SMB2 Worker functions follow:
@@ -363,10 +416,12 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
        /* ClientGUID must be zero for SMB2.02 dialect */
        if (ses->server->vals->protocol_id == SMB20_PROT_ID)
                memset(req->ClientGUID, 0, SMB2_CLIENT_GUID_SIZE);
-       else
+       else {
                memcpy(req->ClientGUID, server->client_guid,
                        SMB2_CLIENT_GUID_SIZE);
-
+               if (ses->server->vals->protocol_id == SMB311_PROT_ID)
+                       assemble_neg_contexts(req);
+       }
        iov[0].iov_base = (char *)req;
        /* 4 for rfc1002 length field */
        iov[0].iov_len = get_rfc1002_length(req) + 4;
@@ -393,8 +448,12 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
                cifs_dbg(FYI, "negotiated smb3.0 dialect\n");
        else if (rsp->DialectRevision == cpu_to_le16(SMB302_PROT_ID))
                cifs_dbg(FYI, "negotiated smb3.02 dialect\n");
+#ifdef CONFIG_CIFS_SMB311
+       else if (rsp->DialectRevision == cpu_to_le16(SMB311_PROT_ID))
+               cifs_dbg(FYI, "negotiated smb3.1.1 dialect\n");
+#endif /* SMB311 */
        else {
-               cifs_dbg(VFS, "Illegal dialect returned by server %d\n",
+               cifs_dbg(VFS, "Illegal dialect returned by server 0x%x\n",
                         le16_to_cpu(rsp->DialectRevision));
                rc = -EIO;
                goto neg_exit;
@@ -572,7 +631,7 @@ ssetup_ntlmssp_authenticate:
                return rc;
 
        req->hdr.SessionId = 0; /* First session, not a reauthenticate */
-       req->VcNumber = 0; /* MBZ */
+       req->Flags = 0; /* MBZ */
        /* to enable echos and oplocks */
        req->hdr.CreditRequest = cpu_to_le16(3);
 
index 70867d54fb8bf485cb5ff4dcb3049f67ca86cb45..451108284a2f438203568d90749f149ea567a602 100644 (file)
@@ -136,9 +136,6 @@ struct smb2_transform_hdr {
        __u64  SessionId;
 } __packed;
 
-/* Encryption Algorithms */
-#define SMB2_ENCRYPTION_AES128_CCM     cpu_to_le16(0x0001)
-
 /*
  *     SMB2 flag definitions
  */
@@ -191,7 +188,10 @@ struct smb2_negotiate_req {
        __le16 Reserved;        /* MBZ */
        __le32 Capabilities;
        __u8   ClientGUID[SMB2_CLIENT_GUID_SIZE];
-       __le64 ClientStartTime; /* MBZ */
+       /* In SMB3.02 and earlier next three were MBZ le64 ClientStartTime */
+       __le32 NegotiateContextOffset; /* SMB3.1.1 only. MBZ earlier */
+       __le16 NegotiateContextCount;  /* SMB3.1.1 only. MBZ earlier */
+       __le16 Reserved2;
        __le16 Dialects[1]; /* One dialect (vers=) at a time for now */
 } __packed;
 
@@ -200,6 +200,7 @@ struct smb2_negotiate_req {
 #define SMB21_PROT_ID 0x0210
 #define SMB30_PROT_ID 0x0300
 #define SMB302_PROT_ID 0x0302
+#define SMB311_PROT_ID 0x0311
 #define BAD_PROT_ID   0xFFFF
 
 /* SecurityMode flags */
@@ -217,12 +218,38 @@ struct smb2_negotiate_req {
 #define SMB2_NT_FIND                   0x00100000
 #define SMB2_LARGE_FILES               0x00200000
 
+#define SMB311_SALT_SIZE                       32
+/* Hash Algorithm Types */
+#define SMB2_PREAUTH_INTEGRITY_SHA512  cpu_to_le16(0x0001)
+
+struct smb2_preauth_neg_context {
+       __le16  ContextType; /* 1 */
+       __le16  DataLength;
+       __le32  Reserved;
+       __le16  HashAlgorithmCount; /* 1 */
+       __le16  SaltLength;
+       __le16  HashAlgorithms; /* HashAlgorithms[0] since only one defined */
+       __u8    Salt[SMB311_SALT_SIZE];
+} __packed;
+
+/* Encryption Algorithms Ciphers */
+#define SMB2_ENCRYPTION_AES128_CCM     cpu_to_le16(0x0001)
+#define SMB2_ENCRYPTION_AES128_GCM     cpu_to_le16(0x0002)
+
+struct smb2_encryption_neg_context {
+       __le16  ContextType; /* 2 */
+       __le16  DataLength;
+       __le32  Reserved;
+       __le16  CipherCount; /* AES-128-GCM and AES-128-CCM */
+       __le16  Ciphers[2]; /* Ciphers[0] since only one used now */
+} __packed;
+
 struct smb2_negotiate_rsp {
        struct smb2_hdr hdr;
        __le16 StructureSize;   /* Must be 65 */
        __le16 SecurityMode;
        __le16 DialectRevision;
-       __le16 Reserved;        /* MBZ */
+       __le16 NegotiateContextCount;   /* Prior to SMB3.1.1 was Reserved & MBZ */
        __u8   ServerGUID[16];
        __le32 Capabilities;
        __le32 MaxTransactSize;
@@ -232,14 +259,18 @@ struct smb2_negotiate_rsp {
        __le64 ServerStartTime;
        __le16 SecurityBufferOffset;
        __le16 SecurityBufferLength;
-       __le32 Reserved2;       /* may be any value, ignore */
+       __le32 NegotiateContextOffset;  /* Pre:SMB3.1.1 was reserved/ignored */
        __u8   Buffer[1];       /* variable length GSS security buffer */
 } __packed;
 
+/* Flags */
+#define SMB2_SESSION_REQ_FLAG_BINDING          0x01
+#define SMB2_SESSION_REQ_FLAG_ENCRYPT_DATA     0x04
+
 struct smb2_sess_setup_req {
        struct smb2_hdr hdr;
        __le16 StructureSize; /* Must be 25 */
-       __u8   VcNumber;
+       __u8   Flags;
        __u8   SecurityMode;
        __le32 Capabilities;
        __le32 Channel;
@@ -274,10 +305,13 @@ struct smb2_logoff_rsp {
        __le16 Reserved;
 } __packed;
 
+/* Flags/Reserved for SMB3.1.1 */
+#define SMB2_SHAREFLAG_CLUSTER_RECONNECT       0x0001
+
 struct smb2_tree_connect_req {
        struct smb2_hdr hdr;
        __le16 StructureSize;   /* Must be 9 */
-       __le16 Reserved;
+       __le16 Reserved; /* Flags in SMB3.1.1 */
        __le16 PathOffset;
        __le16 PathLength;
        __u8   Buffer[1];       /* variable length */
@@ -587,6 +621,29 @@ struct copychunk_ioctl_rsp {
        __le32 TotalBytesWritten;
 } __packed;
 
+struct fsctl_set_integrity_information_req {
+       __le16  ChecksumAlgorithm;
+       __le16  Reserved;
+       __le32  Flags;
+} __packed;
+
+struct fsctl_get_integrity_information_rsp {
+       __le16  ChecksumAlgorithm;
+       __le16  Reserved;
+       __le32  Flags;
+       __le32  ChecksumChunkSizeInBytes;
+       __le32  ClusterSizeInBytes;
+} __packed;
+
+/* Integrity ChecksumAlgorithm choices for above */
+#define        CHECKSUM_TYPE_NONE      0x0000
+#define        CHECKSUM_TYPE_CRC64     0x0002
+#define CHECKSUM_TYPE_UNCHANGED        0xFFFF  /* set only */
+
+/* Integrity flags for above */
+#define FSCTL_INTEGRITY_FLAG_CHECKSUM_ENFORCEMENT_OFF  0x00000001
+
+
 struct validate_negotiate_info_req {
        __le32 Capabilities;
        __u8   Guid[SMB2_CLIENT_GUID_SIZE];
@@ -620,6 +677,14 @@ struct compress_ioctl {
        __le16 CompressionState; /* See cifspdu.h for possible flag values */
 } __packed;
 
+struct duplicate_extents_to_file {
+       __u64 PersistentFileHandle; /* source file handle, opaque endianness */
+       __u64 VolatileFileHandle;
+       __le64 SourceFileOffset;
+       __le64 TargetFileOffset;
+       __le64 ByteCount;  /* Bytes to be copied */
+} __packed;
+
 struct smb2_ioctl_req {
        struct smb2_hdr hdr;
        __le16 StructureSize;   /* Must be 57 */
index 83efa59535bedf988292548c0fcba1526fcdde1c..a639d0dab453373f4ae5dd2e2a6ee63f3867176d 100644 (file)
 #define FSCTL_QUERY_SPARING_INFO     0x00090138 /* BB add struct */
 #define FSCTL_SET_ZERO_ON_DEALLOC    0x00090194 /* BB add struct */
 #define FSCTL_SET_SHORT_NAME_BEHAVIOR 0x000901B4 /* BB add struct */
+#define FSCTL_GET_INTEGRITY_INFORMATION 0x0009027C
 #define FSCTL_QUERY_ALLOCATED_RANGES 0x000940CF /* BB add struct */
 #define FSCTL_SET_DEFECT_MANAGEMENT  0x00098134 /* BB add struct */
 #define FSCTL_FILE_LEVEL_TRIM        0x00098208 /* BB add struct */
+#define FSCTL_DUPLICATE_EXTENTS_TO_FILE 0x00098344
 #define FSCTL_SIS_LINK_FILES         0x0009C104
+#define FSCTL_SET_INTEGRITY_INFORMATION 0x0009C280
 #define FSCTL_PIPE_PEEK              0x0011400C /* BB add struct */
 #define FSCTL_PIPE_TRANSCEIVE        0x0011C017 /* BB add struct */
 /* strange that the number for this op is not sequential with previous op */
index 6f65f00e58ecdc695284de68b832f3822247f594..99b5fbc38992db1f88be1a1e48dad4fda584a0c1 100644 (file)
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -309,14 +309,21 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
  out:
        i_mmap_unlock_read(mapping);
 
-       if (bh->b_end_io)
-               bh->b_end_io(bh, 1);
-
        return error;
 }
 
-static int do_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
-                       get_block_t get_block)
+/**
+ * __dax_fault - handle a page fault on a DAX file
+ * @vma: The virtual memory area where the fault occurred
+ * @vmf: The description of the fault
+ * @get_block: The filesystem method used to translate file offsets to blocks
+ *
+ * When a page fault occurs, filesystems may call this helper in their
+ * fault handler for DAX files. __dax_fault() assumes the caller has done all
+ * the necessary locking for the page fault to proceed successfully.
+ */
+int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
+                       get_block_t get_block, dax_iodone_t complete_unwritten)
 {
        struct file *file = vma->vm_file;
        struct address_space *mapping = file->f_mapping;
@@ -417,7 +424,19 @@ static int do_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
                page_cache_release(page);
        }
 
+       /*
+        * If we successfully insert the new mapping over an unwritten extent,
+        * we need to ensure we convert the unwritten extent. If there is an
+        * error inserting the mapping, the filesystem needs to leave it as
+        * unwritten to prevent exposure of the stale underlying data to
+        * userspace, but we still need to call the completion function so
+        * the private resources on the mapping buffer can be released. We
+        * indicate what the callback should do via the uptodate variable, same
+        * as for normal BH based IO completions.
+        */
        error = dax_insert_mapping(inode, &bh, vma, vmf);
+       if (buffer_unwritten(&bh))
+               complete_unwritten(&bh, !error);
 
  out:
        if (error == -ENOMEM)
@@ -434,6 +453,7 @@ static int do_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
        }
        goto out;
 }
+EXPORT_SYMBOL(__dax_fault);
 
 /**
  * dax_fault - handle a page fault on a DAX file
@@ -445,7 +465,7 @@ static int do_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
  * fault handler for DAX files.
  */
 int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
-                       get_block_t get_block)
+             get_block_t get_block, dax_iodone_t complete_unwritten)
 {
        int result;
        struct super_block *sb = file_inode(vma->vm_file)->i_sb;
@@ -454,7 +474,7 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
                sb_start_pagefault(sb);
                file_update_time(vma->vm_file);
        }
-       result = do_dax_fault(vma, vmf, get_block);
+       result = __dax_fault(vma, vmf, get_block, complete_unwritten);
        if (vmf->flags & FAULT_FLAG_WRITE)
                sb_end_pagefault(sb);
 
index 3a0a6c6406d000560c5c5971c733751e091c3382..3b57c9f83c9b9b6469b014a317b0e50c1b1f1cf4 100644 (file)
 #ifdef CONFIG_FS_DAX
 static int ext2_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
-       return dax_fault(vma, vmf, ext2_get_block);
+       return dax_fault(vma, vmf, ext2_get_block, NULL);
 }
 
 static int ext2_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
-       return dax_mkwrite(vma, vmf, ext2_get_block);
+       return dax_mkwrite(vma, vmf, ext2_get_block, NULL);
 }
 
 static const struct vm_operations_struct ext2_dax_vm_ops = {
index ac517f15741c602b189e2c2fa7a9bab0d428f16d..bc313ac5d3fa024a96574549690eb40eb35612db 100644 (file)
@@ -192,15 +192,27 @@ out:
 }
 
 #ifdef CONFIG_FS_DAX
+static void ext4_end_io_unwritten(struct buffer_head *bh, int uptodate)
+{
+       struct inode *inode = bh->b_assoc_map->host;
+       /* XXX: breaks on 32-bit > 16GB. Is that even supported? */
+       loff_t offset = (loff_t)(uintptr_t)bh->b_private << inode->i_blkbits;
+       int err;
+       if (!uptodate)
+               return;
+       WARN_ON(!buffer_unwritten(bh));
+       err = ext4_convert_unwritten_extents(NULL, inode, offset, bh->b_size);
+}
+
 static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
-       return dax_fault(vma, vmf, ext4_get_block);
+       return dax_fault(vma, vmf, ext4_get_block, ext4_end_io_unwritten);
                                        /* Is this the right get_block? */
 }
 
 static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
-       return dax_mkwrite(vma, vmf, ext4_get_block);
+       return dax_mkwrite(vma, vmf, ext4_get_block, ext4_end_io_unwritten);
 }
 
 static const struct vm_operations_struct ext4_dax_vm_ops = {
index f8a8d4ee7459f9a82af3a3d489cc5265915da1a9..41f8e55afcd11491c5f25bf0989f74c12b9baf69 100644 (file)
@@ -656,18 +656,6 @@ has_zeroout:
        return retval;
 }
 
-static void ext4_end_io_unwritten(struct buffer_head *bh, int uptodate)
-{
-       struct inode *inode = bh->b_assoc_map->host;
-       /* XXX: breaks on 32-bit > 16GB. Is that even supported? */
-       loff_t offset = (loff_t)(uintptr_t)bh->b_private << inode->i_blkbits;
-       int err;
-       if (!uptodate)
-               return;
-       WARN_ON(!buffer_unwritten(bh));
-       err = ext4_convert_unwritten_extents(NULL, inode, offset, bh->b_size);
-}
-
 /* Maximum number of blocks we map for direct IO at once. */
 #define DIO_MAX_BLOCKS 4096
 
@@ -705,10 +693,15 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock,
 
                map_bh(bh, inode->i_sb, map.m_pblk);
                bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
-               if (IS_DAX(inode) && buffer_unwritten(bh) && !io_end) {
+               if (IS_DAX(inode) && buffer_unwritten(bh)) {
+                       /*
+                        * dgc: I suspect unwritten conversion on ext4+DAX is
+                        * fundamentally broken here when there are concurrent
+                        * read/write in progress on this inode.
+                        */
+                       WARN_ON_ONCE(io_end);
                        bh->b_assoc_map = inode->i_mapping;
                        bh->b_private = (void *)(unsigned long)iblock;
-                       bh->b_end_io = ext4_end_io_unwritten;
                }
                if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN)
                        set_buffer_defer_completion(bh);
index f175b833b6ba75b22bced1f9d013ad670caed82d..aa62004f1706f9c685b368379ce021c1f3474c72 100644 (file)
@@ -2847,7 +2847,7 @@ static int param_set_portnr(const char *val, const struct kernel_param *kp)
        *((unsigned int *)kp->arg) = num;
        return 0;
 }
-static struct kernel_param_ops param_ops_portnr = {
+static const struct kernel_param_ops param_ops_portnr = {
        .set = param_set_portnr,
        .get = param_get_uint,
 };
index 760e25dad9850b77a3a73568cbf109e2a88a9a7c..1d9c1cbd4d0b4a1a17dd924e876c854b8d722169 100644 (file)
@@ -541,6 +541,7 @@ int seq_dentry(struct seq_file *m, struct dentry *dentry, const char *esc)
 
        return res;
 }
+EXPORT_SYMBOL(seq_dentry);
 
 static void *single_start(struct seq_file *p, loff_t *pos)
 {
index 516162be1398404a18e32909f8ff25c065364605..f9e9ffe6fb46f88691e337d56d73928bc3d6f5cb 100644 (file)
@@ -149,13 +149,27 @@ xfs_alloc_compute_aligned(
 {
        xfs_agblock_t   bno;
        xfs_extlen_t    len;
+       xfs_extlen_t    diff;
 
        /* Trim busy sections out of found extent */
        xfs_extent_busy_trim(args, foundbno, foundlen, &bno, &len);
 
+       /*
+        * If we have a largish extent that happens to start before min_agbno,
+        * see if we can shift it into range...
+        */
+       if (bno < args->min_agbno && bno + len > args->min_agbno) {
+               diff = args->min_agbno - bno;
+               if (len > diff) {
+                       bno += diff;
+                       len -= diff;
+               }
+       }
+
        if (args->alignment > 1 && len >= args->minlen) {
                xfs_agblock_t   aligned_bno = roundup(bno, args->alignment);
-               xfs_extlen_t    diff = aligned_bno - bno;
+
+               diff = aligned_bno - bno;
 
                *resbno = aligned_bno;
                *reslen = diff >= len ? 0 : len - diff;
@@ -795,9 +809,13 @@ xfs_alloc_find_best_extent(
                 * The good extent is closer than this one.
                 */
                if (!dir) {
+                       if (*sbnoa > args->max_agbno)
+                               goto out_use_good;
                        if (*sbnoa >= args->agbno + gdiff)
                                goto out_use_good;
                } else {
+                       if (*sbnoa < args->min_agbno)
+                               goto out_use_good;
                        if (*sbnoa <= args->agbno - gdiff)
                                goto out_use_good;
                }
@@ -884,6 +902,17 @@ xfs_alloc_ag_vextent_near(
        dofirst = prandom_u32() & 1;
 #endif
 
+       /* handle unitialized agbno range so caller doesn't have to */
+       if (!args->min_agbno && !args->max_agbno)
+               args->max_agbno = args->mp->m_sb.sb_agblocks - 1;
+       ASSERT(args->min_agbno <= args->max_agbno);
+
+       /* clamp agbno to the range if it's outside */
+       if (args->agbno < args->min_agbno)
+               args->agbno = args->min_agbno;
+       if (args->agbno > args->max_agbno)
+               args->agbno = args->max_agbno;
+
 restart:
        bno_cur_lt = NULL;
        bno_cur_gt = NULL;
@@ -976,6 +1005,8 @@ restart:
                                                  &ltbnoa, &ltlena);
                        if (ltlena < args->minlen)
                                continue;
+                       if (ltbnoa < args->min_agbno || ltbnoa > args->max_agbno)
+                               continue;
                        args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
                        xfs_alloc_fix_len(args);
                        ASSERT(args->len >= args->minlen);
@@ -1096,11 +1127,11 @@ restart:
                        XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
                        xfs_alloc_compute_aligned(args, ltbno, ltlen,
                                                  &ltbnoa, &ltlena);
-                       if (ltlena >= args->minlen)
+                       if (ltlena >= args->minlen && ltbnoa >= args->min_agbno)
                                break;
                        if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i)))
                                goto error0;
-                       if (!i) {
+                       if (!i || ltbnoa < args->min_agbno) {
                                xfs_btree_del_cursor(bno_cur_lt,
                                                     XFS_BTREE_NOERROR);
                                bno_cur_lt = NULL;
@@ -1112,11 +1143,11 @@ restart:
                        XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
                        xfs_alloc_compute_aligned(args, gtbno, gtlen,
                                                  &gtbnoa, &gtlena);
-                       if (gtlena >= args->minlen)
+                       if (gtlena >= args->minlen && gtbnoa <= args->max_agbno)
                                break;
                        if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
                                goto error0;
-                       if (!i) {
+                       if (!i || gtbnoa > args->max_agbno) {
                                xfs_btree_del_cursor(bno_cur_gt,
                                                     XFS_BTREE_NOERROR);
                                bno_cur_gt = NULL;
@@ -1216,6 +1247,7 @@ restart:
        ASSERT(ltnew >= ltbno);
        ASSERT(ltnew + rlen <= ltbnoa + ltlena);
        ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
+       ASSERT(ltnew >= args->min_agbno && ltnew <= args->max_agbno);
        args->agbno = ltnew;
 
        if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
@@ -1825,11 +1857,11 @@ xfs_alloc_compute_maxlevels(
 xfs_extlen_t
 xfs_alloc_longest_free_extent(
        struct xfs_mount        *mp,
-       struct xfs_perag        *pag)
+       struct xfs_perag        *pag,
+       xfs_extlen_t            need)
 {
-       xfs_extlen_t            need, delta = 0;
+       xfs_extlen_t            delta = 0;
 
-       need = XFS_MIN_FREELIST_PAG(pag, mp);
        if (need > pag->pagf_flcount)
                delta = need - pag->pagf_flcount;
 
@@ -1838,131 +1870,150 @@ xfs_alloc_longest_free_extent(
        return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
 }
 
+unsigned int
+xfs_alloc_min_freelist(
+       struct xfs_mount        *mp,
+       struct xfs_perag        *pag)
+{
+       unsigned int            min_free;
+
+       /* space needed by-bno freespace btree */
+       min_free = min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_BNOi] + 1,
+                                      mp->m_ag_maxlevels);
+       /* space needed by-size freespace btree */
+       min_free += min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_CNTi] + 1,
+                                      mp->m_ag_maxlevels);
+
+       return min_free;
+}
+
+/*
+ * Check if the operation we are fixing up the freelist for should go ahead or
+ * not. If we are freeing blocks, we always allow it, otherwise the allocation
+ * is dependent on whether the size and shape of free space available will
+ * permit the requested allocation to take place.
+ */
+static bool
+xfs_alloc_space_available(
+       struct xfs_alloc_arg    *args,
+       xfs_extlen_t            min_free,
+       int                     flags)
+{
+       struct xfs_perag        *pag = args->pag;
+       xfs_extlen_t            longest;
+       int                     available;
+
+       if (flags & XFS_ALLOC_FLAG_FREEING)
+               return true;
+
+       /* do we have enough contiguous free space for the allocation? */
+       longest = xfs_alloc_longest_free_extent(args->mp, pag, min_free);
+       if ((args->minlen + args->alignment + args->minalignslop - 1) > longest)
+               return false;
+
+       /* do have enough free space remaining for the allocation? */
+       available = (int)(pag->pagf_freeblks + pag->pagf_flcount -
+                         min_free - args->total);
+       if (available < (int)args->minleft)
+               return false;
+
+       return true;
+}
+
 /*
  * Decide whether to use this allocation group for this allocation.
  * If so, fix up the btree freelist's size.
  */
 STATIC int                     /* error */
 xfs_alloc_fix_freelist(
-       xfs_alloc_arg_t *args,  /* allocation argument structure */
-       int             flags)  /* XFS_ALLOC_FLAG_... */
+       struct xfs_alloc_arg    *args,  /* allocation argument structure */
+       int                     flags)  /* XFS_ALLOC_FLAG_... */
 {
-       xfs_buf_t       *agbp;  /* agf buffer pointer */
-       xfs_agf_t       *agf;   /* a.g. freespace structure pointer */
-       xfs_buf_t       *agflbp;/* agfl buffer pointer */
-       xfs_agblock_t   bno;    /* freelist block */
-       xfs_extlen_t    delta;  /* new blocks needed in freelist */
-       int             error;  /* error result code */
-       xfs_extlen_t    longest;/* longest extent in allocation group */
-       xfs_mount_t     *mp;    /* file system mount point structure */
-       xfs_extlen_t    need;   /* total blocks needed in freelist */
-       xfs_perag_t     *pag;   /* per-ag information structure */
-       xfs_alloc_arg_t targs;  /* local allocation arguments */
-       xfs_trans_t     *tp;    /* transaction pointer */
-
-       mp = args->mp;
+       struct xfs_mount        *mp = args->mp;
+       struct xfs_perag        *pag = args->pag;
+       struct xfs_trans        *tp = args->tp;
+       struct xfs_buf          *agbp = NULL;
+       struct xfs_buf          *agflbp = NULL;
+       struct xfs_alloc_arg    targs;  /* local allocation arguments */
+       xfs_agblock_t           bno;    /* freelist block */
+       xfs_extlen_t            need;   /* total blocks needed in freelist */
+       int                     error;
 
-       pag = args->pag;
-       tp = args->tp;
        if (!pag->pagf_init) {
-               if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags,
-                               &agbp)))
-                       return error;
+               error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
+               if (error)
+                       goto out_no_agbp;
                if (!pag->pagf_init) {
                        ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
                        ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
-                       args->agbp = NULL;
-                       return 0;
+                       goto out_agbp_relse;
                }
-       } else
-               agbp = NULL;
+       }
 
        /*
-        * If this is a metadata preferred pag and we are user data
-        * then try somewhere else if we are not being asked to
-        * try harder at this point
+        * If this is a metadata preferred pag and we are user data then try
+        * somewhere else if we are not being asked to try harder at this
+        * point
         */
        if (pag->pagf_metadata && args->userdata &&
            (flags & XFS_ALLOC_FLAG_TRYLOCK)) {
                ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
-               args->agbp = NULL;
-               return 0;
+               goto out_agbp_relse;
        }
 
-       if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
-               /*
-                * If it looks like there isn't a long enough extent, or enough
-                * total blocks, reject it.
-                */
-               need = XFS_MIN_FREELIST_PAG(pag, mp);
-               longest = xfs_alloc_longest_free_extent(mp, pag);
-               if ((args->minlen + args->alignment + args->minalignslop - 1) >
-                               longest ||
-                   ((int)(pag->pagf_freeblks + pag->pagf_flcount -
-                          need - args->total) < (int)args->minleft)) {
-                       if (agbp)
-                               xfs_trans_brelse(tp, agbp);
-                       args->agbp = NULL;
-                       return 0;
-               }
-       }
+       need = xfs_alloc_min_freelist(mp, pag);
+       if (!xfs_alloc_space_available(args, need, flags))
+               goto out_agbp_relse;
 
        /*
         * Get the a.g. freespace buffer.
         * Can fail if we're not blocking on locks, and it's held.
         */
-       if (agbp == NULL) {
-               if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags,
-                               &agbp)))
-                       return error;
-               if (agbp == NULL) {
+       if (!agbp) {
+               error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
+               if (error)
+                       goto out_no_agbp;
+               if (!agbp) {
                        ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
                        ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
-                       args->agbp = NULL;
-                       return 0;
-               }
-       }
-       /*
-        * Figure out how many blocks we should have in the freelist.
-        */
-       agf = XFS_BUF_TO_AGF(agbp);
-       need = XFS_MIN_FREELIST(agf, mp);
-       /*
-        * If there isn't enough total or single-extent, reject it.
-        */
-       if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
-               delta = need > be32_to_cpu(agf->agf_flcount) ?
-                       (need - be32_to_cpu(agf->agf_flcount)) : 0;
-               longest = be32_to_cpu(agf->agf_longest);
-               longest = (longest > delta) ? (longest - delta) :
-                       (be32_to_cpu(agf->agf_flcount) > 0 || longest > 0);
-               if ((args->minlen + args->alignment + args->minalignslop - 1) >
-                               longest ||
-                   ((int)(be32_to_cpu(agf->agf_freeblks) +
-                    be32_to_cpu(agf->agf_flcount) - need - args->total) <
-                               (int)args->minleft)) {
-                       xfs_trans_brelse(tp, agbp);
-                       args->agbp = NULL;
-                       return 0;
+                       goto out_no_agbp;
                }
        }
+
+       /* If there isn't enough total space or single-extent, reject it. */
+       need = xfs_alloc_min_freelist(mp, pag);
+       if (!xfs_alloc_space_available(args, need, flags))
+               goto out_agbp_relse;
+
        /*
         * Make the freelist shorter if it's too long.
+        *
+        * Note that from this point onwards, we will always release the agf and
+        * agfl buffers on error. This handles the case where we error out and
+        * the buffers are clean or may not have been joined to the transaction
+        * and hence need to be released manually. If they have been joined to
+        * the transaction, then xfs_trans_brelse() will handle them
+        * appropriately based on the recursion count and dirty state of the
+        * buffer.
+        *
+        * XXX (dgc): When we have lots of free space, does this buy us
+        * anything other than extra overhead when we need to put more blocks
+        * back on the free list? Maybe we should only do this when space is
+        * getting low or the AGFL is more than half full?
         */
-       while (be32_to_cpu(agf->agf_flcount) > need) {
-               xfs_buf_t       *bp;
+       while (pag->pagf_flcount > need) {
+               struct xfs_buf  *bp;
 
                error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
                if (error)
-                       return error;
-               if ((error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1)))
-                       return error;
+                       goto out_agbp_relse;
+               error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1);
+               if (error)
+                       goto out_agbp_relse;
                bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0);
                xfs_trans_binval(tp, bp);
        }
-       /*
-        * Initialize the args structure.
-        */
+
        memset(&targs, 0, sizeof(targs));
        targs.tp = tp;
        targs.mp = mp;
@@ -1971,21 +2022,20 @@ xfs_alloc_fix_freelist(
        targs.alignment = targs.minlen = targs.prod = targs.isfl = 1;
        targs.type = XFS_ALLOCTYPE_THIS_AG;
        targs.pag = pag;
-       if ((error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp)))
-               return error;
-       /*
-        * Make the freelist longer if it's too short.
-        */
-       while (be32_to_cpu(agf->agf_flcount) < need) {
+       error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp);
+       if (error)
+               goto out_agbp_relse;
+
+       /* Make the freelist longer if it's too short. */
+       while (pag->pagf_flcount < need) {
                targs.agbno = 0;
-               targs.maxlen = need - be32_to_cpu(agf->agf_flcount);
-               /*
-                * Allocate as many blocks as possible at once.
-                */
-               if ((error = xfs_alloc_ag_vextent(&targs))) {
-                       xfs_trans_brelse(tp, agflbp);
-                       return error;
-               }
+               targs.maxlen = need - pag->pagf_flcount;
+
+               /* Allocate as many blocks as possible at once. */
+               error = xfs_alloc_ag_vextent(&targs);
+               if (error)
+                       goto out_agflbp_relse;
+
                /*
                 * Stop if we run out.  Won't happen if callers are obeying
                 * the restrictions correctly.  Can happen for free calls
@@ -1994,9 +2044,7 @@ xfs_alloc_fix_freelist(
                if (targs.agbno == NULLAGBLOCK) {
                        if (flags & XFS_ALLOC_FLAG_FREEING)
                                break;
-                       xfs_trans_brelse(tp, agflbp);
-                       args->agbp = NULL;
-                       return 0;
+                       goto out_agflbp_relse;
                }
                /*
                 * Put each allocated block on the list.
@@ -2005,12 +2053,21 @@ xfs_alloc_fix_freelist(
                        error = xfs_alloc_put_freelist(tp, agbp,
                                                        agflbp, bno, 0);
                        if (error)
-                               return error;
+                               goto out_agflbp_relse;
                }
        }
        xfs_trans_brelse(tp, agflbp);
        args->agbp = agbp;
        return 0;
+
+out_agflbp_relse:
+       xfs_trans_brelse(tp, agflbp);
+out_agbp_relse:
+       if (agbp)
+               xfs_trans_brelse(tp, agbp);
+out_no_agbp:
+       args->agbp = NULL;
+       return error;
 }
 
 /*
index d1b4b6a5c8947ee038392ac877f58aeb6063e024..ca1c8168373aa444ebb2ef7eb2865e1d859c1ceb 100644 (file)
@@ -112,6 +112,8 @@ typedef struct xfs_alloc_arg {
        xfs_extlen_t    total;          /* total blocks needed in xaction */
        xfs_extlen_t    alignment;      /* align answer to multiple of this */
        xfs_extlen_t    minalignslop;   /* slop for minlen+alignment calcs */
+       xfs_agblock_t   min_agbno;      /* set an agbno range for NEAR allocs */
+       xfs_agblock_t   max_agbno;      /* ... */
        xfs_extlen_t    len;            /* output: actual size of extent */
        xfs_alloctype_t type;           /* allocation type XFS_ALLOCTYPE_... */
        xfs_alloctype_t otype;          /* original allocation type */
@@ -128,11 +130,9 @@ typedef struct xfs_alloc_arg {
 #define XFS_ALLOC_USERDATA             1       /* allocation is for user data*/
 #define XFS_ALLOC_INITIAL_USER_DATA    2       /* special case start of file */
 
-/*
- * Find the length of the longest extent in an AG.
- */
-xfs_extlen_t
-xfs_alloc_longest_free_extent(struct xfs_mount *mp,
+xfs_extlen_t xfs_alloc_longest_free_extent(struct xfs_mount *mp,
+               struct xfs_perag *pag, xfs_extlen_t need);
+unsigned int xfs_alloc_min_freelist(struct xfs_mount *mp,
                struct xfs_perag *pag);
 
 /*
index 0a472fbe06d46431a82cdb1f91aaa68755b2b944..3349c9a1e84569201cb5df93ae39dd1941f72209 100644 (file)
@@ -266,7 +266,7 @@ xfs_attr_set(
        tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
        error = xfs_trans_reserve(args.trans, &tres, args.total, 0);
        if (error) {
-               xfs_trans_cancel(args.trans, 0);
+               xfs_trans_cancel(args.trans);
                return error;
        }
        xfs_ilock(dp, XFS_ILOCK_EXCL);
@@ -276,7 +276,7 @@ xfs_attr_set(
                                       XFS_QMOPT_RES_REGBLKS);
        if (error) {
                xfs_iunlock(dp, XFS_ILOCK_EXCL);
-               xfs_trans_cancel(args.trans, XFS_TRANS_RELEASE_LOG_RES);
+               xfs_trans_cancel(args.trans);
                return error;
        }
 
@@ -320,8 +320,7 @@ xfs_attr_set(
                                xfs_trans_ichgtime(args.trans, dp,
                                                        XFS_ICHGTIME_CHG);
                        }
-                       err2 = xfs_trans_commit(args.trans,
-                                                XFS_TRANS_RELEASE_LOG_RES);
+                       err2 = xfs_trans_commit(args.trans);
                        xfs_iunlock(dp, XFS_ILOCK_EXCL);
 
                        return error ? error : err2;
@@ -383,16 +382,14 @@ xfs_attr_set(
         * Commit the last in the sequence of transactions.
         */
        xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE);
-       error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES);
+       error = xfs_trans_commit(args.trans);
        xfs_iunlock(dp, XFS_ILOCK_EXCL);
 
        return error;
 
 out:
-       if (args.trans) {
-               xfs_trans_cancel(args.trans,
-                       XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
-       }
+       if (args.trans)
+               xfs_trans_cancel(args.trans);
        xfs_iunlock(dp, XFS_ILOCK_EXCL);
        return error;
 }
@@ -462,7 +459,7 @@ xfs_attr_remove(
        error = xfs_trans_reserve(args.trans, &M_RES(mp)->tr_attrrm,
                                  XFS_ATTRRM_SPACE_RES(mp), 0);
        if (error) {
-               xfs_trans_cancel(args.trans, 0);
+               xfs_trans_cancel(args.trans);
                return error;
        }
 
@@ -501,16 +498,14 @@ xfs_attr_remove(
         * Commit the last in the sequence of transactions.
         */
        xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE);
-       error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES);
+       error = xfs_trans_commit(args.trans);
        xfs_iunlock(dp, XFS_ILOCK_EXCL);
 
        return error;
 
 out:
-       if (args.trans) {
-               xfs_trans_cancel(args.trans,
-                       XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
-       }
+       if (args.trans)
+               xfs_trans_cancel(args.trans);
        xfs_iunlock(dp, XFS_ILOCK_EXCL);
        return error;
 }
index f1026e86dabc9a00ead716785a3acb5c19ee8e10..63e05b663380d8e8997f8013d76de1e7d45572b1 100644 (file)
@@ -1112,7 +1112,6 @@ xfs_bmap_add_attrfork(
        int                     committed;      /* xaction was committed */
        int                     logflags;       /* logging flags */
        int                     error;          /* error return value */
-       int                     cancel_flags = 0;
 
        ASSERT(XFS_IFORK_Q(ip) == 0);
 
@@ -1124,17 +1123,15 @@ xfs_bmap_add_attrfork(
                tp->t_flags |= XFS_TRANS_RESERVE;
        error = xfs_trans_reserve(tp, &M_RES(mp)->tr_addafork, blks, 0);
        if (error) {
-               xfs_trans_cancel(tp, 0);
+               xfs_trans_cancel(tp);
                return error;
        }
-       cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
        xfs_ilock(ip, XFS_ILOCK_EXCL);
        error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ?
                        XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
                        XFS_QMOPT_RES_REGBLKS);
        if (error)
                goto trans_cancel;
-       cancel_flags |= XFS_TRANS_ABORT;
        if (XFS_IFORK_Q(ip))
                goto trans_cancel;
        if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) {
@@ -1218,14 +1215,14 @@ xfs_bmap_add_attrfork(
        error = xfs_bmap_finish(&tp, &flist, &committed);
        if (error)
                goto bmap_cancel;
-       error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+       error = xfs_trans_commit(tp);
        xfs_iunlock(ip, XFS_ILOCK_EXCL);
        return error;
 
 bmap_cancel:
        xfs_bmap_cancel(&flist);
 trans_cancel:
-       xfs_trans_cancel(tp, cancel_flags);
+       xfs_trans_cancel(tp);
        xfs_iunlock(ip, XFS_ILOCK_EXCL);
        return error;
 }
@@ -3521,7 +3518,8 @@ xfs_bmap_longest_free_extent(
                }
        }
 
-       longest = xfs_alloc_longest_free_extent(mp, pag);
+       longest = xfs_alloc_longest_free_extent(mp, pag,
+                                       xfs_alloc_min_freelist(mp, pag));
        if (*blen < longest)
                *blen = longest;
 
@@ -4424,7 +4422,15 @@ xfs_bmapi_convert_unwritten(
        error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx,
                        &bma->cur, mval, bma->firstblock, bma->flist,
                        &tmp_logflags);
-       bma->logflags |= tmp_logflags;
+       /*
+        * Log the inode core unconditionally in the unwritten extent conversion
+        * path because the conversion might not have done so (e.g., if the
+        * extent count hasn't changed). We need to make sure the inode is dirty
+        * in the transaction for the sake of fsync(), even if nothing has
+        * changed, because fsync() will not force the log for this transaction
+        * unless it sees the inode pinned.
+        */
+       bma->logflags |= tmp_logflags | XFS_ILOG_CORE;
        if (error)
                return error;
 
@@ -5918,7 +5924,7 @@ xfs_bmap_split_extent(
        error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
                        XFS_DIOSTRAT_SPACE_RES(mp, 0), 0);
        if (error) {
-               xfs_trans_cancel(tp, 0);
+               xfs_trans_cancel(tp);
                return error;
        }
 
@@ -5936,10 +5942,9 @@ xfs_bmap_split_extent(
        if (error)
                goto out;
 
-       return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
-
+       return xfs_trans_commit(tp);
 
 out:
-       xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
+       xfs_trans_cancel(tp);
        return error;
 }
index 4daaa662337b516f0794ea0d4680a12512ab6262..a0ae572051de78b4169d5f363ba749f4030c170d 100644 (file)
@@ -170,7 +170,7 @@ typedef struct xfs_sb {
        __uint32_t      sb_features_log_incompat;
 
        __uint32_t      sb_crc;         /* superblock crc */
-       __uint32_t      sb_pad;
+       xfs_extlen_t    sb_spino_align; /* sparse inode chunk alignment */
 
        xfs_ino_t       sb_pquotino;    /* project quota inode */
        xfs_lsn_t       sb_lsn;         /* last write sequence */
@@ -256,7 +256,7 @@ typedef struct xfs_dsb {
        __be32          sb_features_log_incompat;
 
        __le32          sb_crc;         /* superblock crc */
-       __be32          sb_pad;
+       __be32          sb_spino_align; /* sparse inode chunk alignment */
 
        __be64          sb_pquotino;    /* project quota inode */
        __be64          sb_lsn;         /* last write sequence */
@@ -457,8 +457,10 @@ xfs_sb_has_ro_compat_feature(
 }
 
 #define XFS_SB_FEAT_INCOMPAT_FTYPE     (1 << 0)        /* filetype in dirent */
+#define XFS_SB_FEAT_INCOMPAT_SPINODES  (1 << 1)        /* sparse inode chunks */
 #define XFS_SB_FEAT_INCOMPAT_ALL \
-               (XFS_SB_FEAT_INCOMPAT_FTYPE)
+               (XFS_SB_FEAT_INCOMPAT_FTYPE|    \
+                XFS_SB_FEAT_INCOMPAT_SPINODES)
 
 #define XFS_SB_FEAT_INCOMPAT_UNKNOWN   ~XFS_SB_FEAT_INCOMPAT_ALL
 static inline bool
@@ -506,6 +508,12 @@ static inline int xfs_sb_version_hasfinobt(xfs_sb_t *sbp)
                (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_FINOBT);
 }
 
+static inline bool xfs_sb_version_hassparseinodes(struct xfs_sb *sbp)
+{
+       return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 &&
+               xfs_sb_has_incompat_feature(sbp, XFS_SB_FEAT_INCOMPAT_SPINODES);
+}
+
 /*
  * end of superblock version macros
  */
@@ -758,19 +766,6 @@ typedef struct xfs_agfl {
 
 #define XFS_AGFL_CRC_OFF       offsetof(struct xfs_agfl, agfl_crc)
 
-
-#define        XFS_AG_MAXLEVELS(mp)            ((mp)->m_ag_maxlevels)
-#define        XFS_MIN_FREELIST_RAW(bl,cl,mp)  \
-       (MIN(bl + 1, XFS_AG_MAXLEVELS(mp)) + MIN(cl + 1, XFS_AG_MAXLEVELS(mp)))
-#define        XFS_MIN_FREELIST(a,mp)          \
-       (XFS_MIN_FREELIST_RAW(          \
-               be32_to_cpu((a)->agf_levels[XFS_BTNUM_BNOi]), \
-               be32_to_cpu((a)->agf_levels[XFS_BTNUM_CNTi]), mp))
-#define        XFS_MIN_FREELIST_PAG(pag,mp)    \
-       (XFS_MIN_FREELIST_RAW(          \
-               (unsigned int)(pag)->pagf_levels[XFS_BTNUM_BNOi], \
-               (unsigned int)(pag)->pagf_levels[XFS_BTNUM_CNTi], mp))
-
 #define XFS_AGB_TO_FSB(mp,agno,agbno)  \
        (((xfs_fsblock_t)(agno) << (mp)->m_sb.sb_agblklog) | (agbno))
 #define        XFS_FSB_TO_AGNO(mp,fsbno)       \
@@ -1216,26 +1211,54 @@ typedef __uint64_t      xfs_inofree_t;
 #define        XFS_INOBT_ALL_FREE              ((xfs_inofree_t)-1)
 #define        XFS_INOBT_MASK(i)               ((xfs_inofree_t)1 << (i))
 
+#define XFS_INOBT_HOLEMASK_FULL                0       /* holemask for full chunk */
+#define XFS_INOBT_HOLEMASK_BITS                (NBBY * sizeof(__uint16_t))
+#define XFS_INODES_PER_HOLEMASK_BIT    \
+       (XFS_INODES_PER_CHUNK / (NBBY * sizeof(__uint16_t)))
+
 static inline xfs_inofree_t xfs_inobt_maskn(int i, int n)
 {
        return ((n >= XFS_INODES_PER_CHUNK ? 0 : XFS_INOBT_MASK(n)) - 1) << i;
 }
 
 /*
- * Data record structure
+ * The on-disk inode record structure has two formats. The original "full"
+ * format uses a 4-byte freecount. The "sparse" format uses a 1-byte freecount
+ * and replaces the 3 high-order freecount bytes wth the holemask and inode
+ * count.
+ *
+ * The holemask of the sparse record format allows an inode chunk to have holes
+ * that refer to blocks not owned by the inode record. This facilitates inode
+ * allocation in the event of severe free space fragmentation.
  */
 typedef struct xfs_inobt_rec {
        __be32          ir_startino;    /* starting inode number */
-       __be32          ir_freecount;   /* count of free inodes (set bits) */
+       union {
+               struct {
+                       __be32  ir_freecount;   /* count of free inodes */
+               } f;
+               struct {
+                       __be16  ir_holemask;/* hole mask for sparse chunks */
+                       __u8    ir_count;       /* total inode count */
+                       __u8    ir_freecount;   /* count of free inodes */
+               } sp;
+       } ir_u;
        __be64          ir_free;        /* free inode mask */
 } xfs_inobt_rec_t;
 
 typedef struct xfs_inobt_rec_incore {
        xfs_agino_t     ir_startino;    /* starting inode number */
-       __int32_t       ir_freecount;   /* count of free inodes (set bits) */
+       __uint16_t      ir_holemask;    /* hole mask for sparse chunks */
+       __uint8_t       ir_count;       /* total inode count */
+       __uint8_t       ir_freecount;   /* count of free inodes (set bits) */
        xfs_inofree_t   ir_free;        /* free inode mask */
 } xfs_inobt_rec_incore_t;
 
+static inline bool xfs_inobt_issparse(uint16_t holemask)
+{
+       /* non-zero holemask represents a sparse rec. */
+       return holemask;
+}
 
 /*
  * Key structure
@@ -1453,8 +1476,8 @@ struct xfs_acl {
                sizeof(struct xfs_acl_entry) * XFS_ACL_MAX_ENTRIES((mp)))
 
 /* On-disk XFS extended attribute names */
-#define SGI_ACL_FILE           (unsigned char *)"SGI_ACL_FILE"
-#define SGI_ACL_DEFAULT                (unsigned char *)"SGI_ACL_DEFAULT"
+#define SGI_ACL_FILE           "SGI_ACL_FILE"
+#define SGI_ACL_DEFAULT                "SGI_ACL_DEFAULT"
 #define SGI_ACL_FILE_SIZE      (sizeof(SGI_ACL_FILE)-1)
 #define SGI_ACL_DEFAULT_SIZE   (sizeof(SGI_ACL_DEFAULT)-1)
 
index 18dc721ca19f85f7436ab7c3241e28506521abee..89689c6a43e2cf6a0f042f2abaa9dcd0da5a0873 100644 (file)
@@ -239,6 +239,7 @@ typedef struct xfs_fsop_resblks {
 #define XFS_FSOP_GEOM_FLAGS_V5SB       0x8000  /* version 5 superblock */
 #define XFS_FSOP_GEOM_FLAGS_FTYPE      0x10000 /* inode directory types */
 #define XFS_FSOP_GEOM_FLAGS_FINOBT     0x20000 /* free inode btree */
+#define XFS_FSOP_GEOM_FLAGS_SPINODES   0x40000 /* sparse inode chunks  */
 
 /*
  * Minimum and maximum sizes need for growth checks.
index 1c9e75521250ecf606639578ce79696b6ff4a682..66efc702452a0cd45920ce3fd021c689d7bdd40b 100644 (file)
@@ -65,6 +65,8 @@ xfs_inobt_lookup(
        int                     *stat)  /* success/failure */
 {
        cur->bc_rec.i.ir_startino = ino;
+       cur->bc_rec.i.ir_holemask = 0;
+       cur->bc_rec.i.ir_count = 0;
        cur->bc_rec.i.ir_freecount = 0;
        cur->bc_rec.i.ir_free = 0;
        return xfs_btree_lookup(cur, dir, stat);
@@ -82,7 +84,14 @@ xfs_inobt_update(
        union xfs_btree_rec     rec;
 
        rec.inobt.ir_startino = cpu_to_be32(irec->ir_startino);
-       rec.inobt.ir_freecount = cpu_to_be32(irec->ir_freecount);
+       if (xfs_sb_version_hassparseinodes(&cur->bc_mp->m_sb)) {
+               rec.inobt.ir_u.sp.ir_holemask = cpu_to_be16(irec->ir_holemask);
+               rec.inobt.ir_u.sp.ir_count = irec->ir_count;
+               rec.inobt.ir_u.sp.ir_freecount = irec->ir_freecount;
+       } else {
+               /* ir_holemask/ir_count not supported on-disk */
+               rec.inobt.ir_u.f.ir_freecount = cpu_to_be32(irec->ir_freecount);
+       }
        rec.inobt.ir_free = cpu_to_be64(irec->ir_free);
        return xfs_btree_update(cur, &rec);
 }
@@ -100,12 +109,27 @@ xfs_inobt_get_rec(
        int                     error;
 
        error = xfs_btree_get_rec(cur, &rec, stat);
-       if (!error && *stat == 1) {
-               irec->ir_startino = be32_to_cpu(rec->inobt.ir_startino);
-               irec->ir_freecount = be32_to_cpu(rec->inobt.ir_freecount);
-               irec->ir_free = be64_to_cpu(rec->inobt.ir_free);
+       if (error || *stat == 0)
+               return error;
+
+       irec->ir_startino = be32_to_cpu(rec->inobt.ir_startino);
+       if (xfs_sb_version_hassparseinodes(&cur->bc_mp->m_sb)) {
+               irec->ir_holemask = be16_to_cpu(rec->inobt.ir_u.sp.ir_holemask);
+               irec->ir_count = rec->inobt.ir_u.sp.ir_count;
+               irec->ir_freecount = rec->inobt.ir_u.sp.ir_freecount;
+       } else {
+               /*
+                * ir_holemask/ir_count not supported on-disk. Fill in hardcoded
+                * values for full inode chunks.
+                */
+               irec->ir_holemask = XFS_INOBT_HOLEMASK_FULL;
+               irec->ir_count = XFS_INODES_PER_CHUNK;
+               irec->ir_freecount =
+                               be32_to_cpu(rec->inobt.ir_u.f.ir_freecount);
        }
-       return error;
+       irec->ir_free = be64_to_cpu(rec->inobt.ir_free);
+
+       return 0;
 }
 
 /*
@@ -114,10 +138,14 @@ xfs_inobt_get_rec(
 STATIC int
 xfs_inobt_insert_rec(
        struct xfs_btree_cur    *cur,
+       __uint16_t              holemask,
+       __uint8_t               count,
        __int32_t               freecount,
        xfs_inofree_t           free,
        int                     *stat)
 {
+       cur->bc_rec.i.ir_holemask = holemask;
+       cur->bc_rec.i.ir_count = count;
        cur->bc_rec.i.ir_freecount = freecount;
        cur->bc_rec.i.ir_free = free;
        return xfs_btree_insert(cur, stat);
@@ -154,7 +182,9 @@ xfs_inobt_insert(
                }
                ASSERT(i == 0);
 
-               error = xfs_inobt_insert_rec(cur, XFS_INODES_PER_CHUNK,
+               error = xfs_inobt_insert_rec(cur, XFS_INOBT_HOLEMASK_FULL,
+                                            XFS_INODES_PER_CHUNK,
+                                            XFS_INODES_PER_CHUNK,
                                             XFS_INOBT_ALL_FREE, &i);
                if (error) {
                        xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
@@ -220,6 +250,7 @@ xfs_ialloc_inode_init(
        struct xfs_mount        *mp,
        struct xfs_trans        *tp,
        struct list_head        *buffer_list,
+       int                     icount,
        xfs_agnumber_t          agno,
        xfs_agblock_t           agbno,
        xfs_agblock_t           length,
@@ -275,7 +306,7 @@ xfs_ialloc_inode_init(
                 * they track in the AIL as if they were physically logged.
                 */
                if (tp)
-                       xfs_icreate_log(tp, agno, agbno, mp->m_ialloc_inos,
+                       xfs_icreate_log(tp, agno, agbno, icount,
                                        mp->m_sb.sb_inodesize, length, gen);
        } else
                version = 2;
@@ -346,6 +377,214 @@ xfs_ialloc_inode_init(
        return 0;
 }
 
+/*
+ * Align startino and allocmask for a recently allocated sparse chunk such that
+ * they are fit for insertion (or merge) into the on-disk inode btrees.
+ *
+ * Background:
+ *
+ * When enabled, sparse inode support increases the inode alignment from cluster
+ * size to inode chunk size. This means that the minimum range between two
+ * non-adjacent inode records in the inobt is large enough for a full inode
+ * record. This allows for cluster sized, cluster aligned block allocation
+ * without need to worry about whether the resulting inode record overlaps with
+ * another record in the tree. Without this basic rule, we would have to deal
+ * with the consequences of overlap by potentially undoing recent allocations in
+ * the inode allocation codepath.
+ *
+ * Because of this alignment rule (which is enforced on mount), there are two
+ * inobt possibilities for newly allocated sparse chunks. One is that the
+ * aligned inode record for the chunk covers a range of inodes not already
+ * covered in the inobt (i.e., it is safe to insert a new sparse record). The
+ * other is that a record already exists at the aligned startino that considers
+ * the newly allocated range as sparse. In the latter case, record content is
+ * merged in hope that sparse inode chunks fill to full chunks over time.
+ */
+STATIC void
+xfs_align_sparse_ino(
+       struct xfs_mount                *mp,
+       xfs_agino_t                     *startino,
+       uint16_t                        *allocmask)
+{
+       xfs_agblock_t                   agbno;
+       xfs_agblock_t                   mod;
+       int                             offset;
+
+       agbno = XFS_AGINO_TO_AGBNO(mp, *startino);
+       mod = agbno % mp->m_sb.sb_inoalignmt;
+       if (!mod)
+               return;
+
+       /* calculate the inode offset and align startino */
+       offset = mod << mp->m_sb.sb_inopblog;
+       *startino -= offset;
+
+       /*
+        * Since startino has been aligned down, left shift allocmask such that
+        * it continues to represent the same physical inodes relative to the
+        * new startino.
+        */
+       *allocmask <<= offset / XFS_INODES_PER_HOLEMASK_BIT;
+}
+
+/*
+ * Determine whether the source inode record can merge into the target. Both
+ * records must be sparse, the inode ranges must match and there must be no
+ * allocation overlap between the records.
+ */
+STATIC bool
+__xfs_inobt_can_merge(
+       struct xfs_inobt_rec_incore     *trec,  /* tgt record */
+       struct xfs_inobt_rec_incore     *srec)  /* src record */
+{
+       uint64_t                        talloc;
+       uint64_t                        salloc;
+
+       /* records must cover the same inode range */
+       if (trec->ir_startino != srec->ir_startino)
+               return false;
+
+       /* both records must be sparse */
+       if (!xfs_inobt_issparse(trec->ir_holemask) ||
+           !xfs_inobt_issparse(srec->ir_holemask))
+               return false;
+
+       /* both records must track some inodes */
+       if (!trec->ir_count || !srec->ir_count)
+               return false;
+
+       /* can't exceed capacity of a full record */
+       if (trec->ir_count + srec->ir_count > XFS_INODES_PER_CHUNK)
+               return false;
+
+       /* verify there is no allocation overlap */
+       talloc = xfs_inobt_irec_to_allocmask(trec);
+       salloc = xfs_inobt_irec_to_allocmask(srec);
+       if (talloc & salloc)
+               return false;
+
+       return true;
+}
+
+/*
+ * Merge the source inode record into the target. The caller must call
+ * __xfs_inobt_can_merge() to ensure the merge is valid.
+ */
+STATIC void
+__xfs_inobt_rec_merge(
+       struct xfs_inobt_rec_incore     *trec,  /* target */
+       struct xfs_inobt_rec_incore     *srec)  /* src */
+{
+       ASSERT(trec->ir_startino == srec->ir_startino);
+
+       /* combine the counts */
+       trec->ir_count += srec->ir_count;
+       trec->ir_freecount += srec->ir_freecount;
+
+       /*
+        * Merge the holemask and free mask. For both fields, 0 bits refer to
+        * allocated inodes. We combine the allocated ranges with bitwise AND.
+        */
+       trec->ir_holemask &= srec->ir_holemask;
+       trec->ir_free &= srec->ir_free;
+}
+
+/*
+ * Insert a new sparse inode chunk into the associated inode btree. The inode
+ * record for the sparse chunk is pre-aligned to a startino that should match
+ * any pre-existing sparse inode record in the tree. This allows sparse chunks
+ * to fill over time.
+ *
+ * This function supports two modes of handling preexisting records depending on
+ * the merge flag. If merge is true, the provided record is merged with the
+ * existing record and updated in place. The merged record is returned in nrec.
+ * If merge is false, an existing record is replaced with the provided record.
+ * If no preexisting record exists, the provided record is always inserted.
+ *
+ * It is considered corruption if a merge is requested and not possible. Given
+ * the sparse inode alignment constraints, this should never happen.
+ */
+STATIC int
+xfs_inobt_insert_sprec(
+       struct xfs_mount                *mp,
+       struct xfs_trans                *tp,
+       struct xfs_buf                  *agbp,
+       int                             btnum,
+       struct xfs_inobt_rec_incore     *nrec,  /* in/out: new/merged rec. */
+       bool                            merge)  /* merge or replace */
+{
+       struct xfs_btree_cur            *cur;
+       struct xfs_agi                  *agi = XFS_BUF_TO_AGI(agbp);
+       xfs_agnumber_t                  agno = be32_to_cpu(agi->agi_seqno);
+       int                             error;
+       int                             i;
+       struct xfs_inobt_rec_incore     rec;
+
+       cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, btnum);
+
+       /* the new record is pre-aligned so we know where to look */
+       error = xfs_inobt_lookup(cur, nrec->ir_startino, XFS_LOOKUP_EQ, &i);
+       if (error)
+               goto error;
+       /* if nothing there, insert a new record and return */
+       if (i == 0) {
+               error = xfs_inobt_insert_rec(cur, nrec->ir_holemask,
+                                            nrec->ir_count, nrec->ir_freecount,
+                                            nrec->ir_free, &i);
+               if (error)
+                       goto error;
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error);
+
+               goto out;
+       }
+
+       /*
+        * A record exists at this startino. Merge or replace the record
+        * depending on what we've been asked to do.
+        */
+       if (merge) {
+               error = xfs_inobt_get_rec(cur, &rec, &i);
+               if (error)
+                       goto error;
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error);
+               XFS_WANT_CORRUPTED_GOTO(mp,
+                                       rec.ir_startino == nrec->ir_startino,
+                                       error);
+
+               /*
+                * This should never fail. If we have coexisting records that
+                * cannot merge, something is seriously wrong.
+                */
+               XFS_WANT_CORRUPTED_GOTO(mp, __xfs_inobt_can_merge(nrec, &rec),
+                                       error);
+
+               trace_xfs_irec_merge_pre(mp, agno, rec.ir_startino,
+                                        rec.ir_holemask, nrec->ir_startino,
+                                        nrec->ir_holemask);
+
+               /* merge to nrec to output the updated record */
+               __xfs_inobt_rec_merge(nrec, &rec);
+
+               trace_xfs_irec_merge_post(mp, agno, nrec->ir_startino,
+                                         nrec->ir_holemask);
+
+               error = xfs_inobt_rec_check_count(mp, nrec);
+               if (error)
+                       goto error;
+       }
+
+       error = xfs_inobt_update(cur, nrec);
+       if (error)
+               goto error;
+
+out:
+       xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+       return 0;
+error:
+       xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+       return error;
+}
+
 /*
  * Allocate new inodes in the allocation group specified by agbp.
  * Return 0 for success, else error code.
@@ -364,11 +603,22 @@ xfs_ialloc_ag_alloc(
        xfs_agino_t     newlen;         /* new number of inodes */
        int             isaligned = 0;  /* inode allocation at stripe unit */
                                        /* boundary */
+       uint16_t        allocmask = (uint16_t) -1; /* init. to full chunk */
+       struct xfs_inobt_rec_incore rec;
        struct xfs_perag *pag;
+       int             do_sparse = 0;
 
        memset(&args, 0, sizeof(args));
        args.tp = tp;
        args.mp = tp->t_mountp;
+       args.fsbno = NULLFSBLOCK;
+
+#ifdef DEBUG
+       /* randomly do sparse inode allocations */
+       if (xfs_sb_version_hassparseinodes(&tp->t_mountp->m_sb) &&
+           args.mp->m_ialloc_min_blks < args.mp->m_ialloc_blks)
+               do_sparse = prandom_u32() & 1;
+#endif
 
        /*
         * Locking will ensure that we don't have two callers in here
@@ -390,6 +640,8 @@ xfs_ialloc_ag_alloc(
        agno = be32_to_cpu(agi->agi_seqno);
        args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) +
                     args.mp->m_ialloc_blks;
+       if (do_sparse)
+               goto sparse_alloc;
        if (likely(newino != NULLAGINO &&
                  (args.agbno < be32_to_cpu(agi->agi_length)))) {
                args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno);
@@ -428,8 +680,7 @@ xfs_ialloc_ag_alloc(
                 * subsequent requests.
                 */
                args.minalignslop = 0;
-       } else
-               args.fsbno = NULLFSBLOCK;
+       }
 
        if (unlikely(args.fsbno == NULLFSBLOCK)) {
                /*
@@ -480,6 +731,47 @@ xfs_ialloc_ag_alloc(
                        return error;
        }
 
+       /*
+        * Finally, try a sparse allocation if the filesystem supports it and
+        * the sparse allocation length is smaller than a full chunk.
+        */
+       if (xfs_sb_version_hassparseinodes(&args.mp->m_sb) &&
+           args.mp->m_ialloc_min_blks < args.mp->m_ialloc_blks &&
+           args.fsbno == NULLFSBLOCK) {
+sparse_alloc:
+               args.type = XFS_ALLOCTYPE_NEAR_BNO;
+               args.agbno = be32_to_cpu(agi->agi_root);
+               args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno);
+               args.alignment = args.mp->m_sb.sb_spino_align;
+               args.prod = 1;
+
+               args.minlen = args.mp->m_ialloc_min_blks;
+               args.maxlen = args.minlen;
+
+               /*
+                * The inode record will be aligned to full chunk size. We must
+                * prevent sparse allocation from AG boundaries that result in
+                * invalid inode records, such as records that start at agbno 0
+                * or extend beyond the AG.
+                *
+                * Set min agbno to the first aligned, non-zero agbno and max to
+                * the last aligned agbno that is at least one full chunk from
+                * the end of the AG.
+                */
+               args.min_agbno = args.mp->m_sb.sb_inoalignmt;
+               args.max_agbno = round_down(args.mp->m_sb.sb_agblocks,
+                                           args.mp->m_sb.sb_inoalignmt) -
+                                args.mp->m_ialloc_blks;
+
+               error = xfs_alloc_vextent(&args);
+               if (error)
+                       return error;
+
+               newlen = args.len << args.mp->m_sb.sb_inopblog;
+               ASSERT(newlen <= XFS_INODES_PER_CHUNK);
+               allocmask = (1 << (newlen / XFS_INODES_PER_HOLEMASK_BIT)) - 1;
+       }
+
        if (args.fsbno == NULLFSBLOCK) {
                *alloc = 0;
                return 0;
@@ -495,8 +787,8 @@ xfs_ialloc_ag_alloc(
         * rather than a linear progression to prevent the next generation
         * number from being easily guessable.
         */
-       error = xfs_ialloc_inode_init(args.mp, tp, NULL, agno, args.agbno,
-                       args.len, prandom_u32());
+       error = xfs_ialloc_inode_init(args.mp, tp, NULL, newlen, agno,
+                       args.agbno, args.len, prandom_u32());
 
        if (error)
                return error;
@@ -504,6 +796,73 @@ xfs_ialloc_ag_alloc(
         * Convert the results.
         */
        newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0);
+
+       if (xfs_inobt_issparse(~allocmask)) {
+               /*
+                * We've allocated a sparse chunk. Align the startino and mask.
+                */
+               xfs_align_sparse_ino(args.mp, &newino, &allocmask);
+
+               rec.ir_startino = newino;
+               rec.ir_holemask = ~allocmask;
+               rec.ir_count = newlen;
+               rec.ir_freecount = newlen;
+               rec.ir_free = XFS_INOBT_ALL_FREE;
+
+               /*
+                * Insert the sparse record into the inobt and allow for a merge
+                * if necessary. If a merge does occur, rec is updated to the
+                * merged record.
+                */
+               error = xfs_inobt_insert_sprec(args.mp, tp, agbp, XFS_BTNUM_INO,
+                                              &rec, true);
+               if (error == -EFSCORRUPTED) {
+                       xfs_alert(args.mp,
+       "invalid sparse inode record: ino 0x%llx holemask 0x%x count %u",
+                                 XFS_AGINO_TO_INO(args.mp, agno,
+                                                  rec.ir_startino),
+                                 rec.ir_holemask, rec.ir_count);
+                       xfs_force_shutdown(args.mp, SHUTDOWN_CORRUPT_INCORE);
+               }
+               if (error)
+                       return error;
+
+               /*
+                * We can't merge the part we've just allocated as for the inobt
+                * due to finobt semantics. The original record may or may not
+                * exist independent of whether physical inodes exist in this
+                * sparse chunk.
+                *
+                * We must update the finobt record based on the inobt record.
+                * rec contains the fully merged and up to date inobt record
+                * from the previous call. Set merge false to replace any
+                * existing record with this one.
+                */
+               if (xfs_sb_version_hasfinobt(&args.mp->m_sb)) {
+                       error = xfs_inobt_insert_sprec(args.mp, tp, agbp,
+                                                      XFS_BTNUM_FINO, &rec,
+                                                      false);
+                       if (error)
+                               return error;
+               }
+       } else {
+               /* full chunk - insert new records to both btrees */
+               error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen,
+                                        XFS_BTNUM_INO);
+               if (error)
+                       return error;
+
+               if (xfs_sb_version_hasfinobt(&args.mp->m_sb)) {
+                       error = xfs_inobt_insert(args.mp, tp, agbp, newino,
+                                                newlen, XFS_BTNUM_FINO);
+                       if (error)
+                               return error;
+               }
+       }
+
+       /*
+        * Update AGI counts and newino.
+        */
        be32_add_cpu(&agi->agi_count, newlen);
        be32_add_cpu(&agi->agi_freecount, newlen);
        pag = xfs_perag_get(args.mp, agno);
@@ -511,20 +870,6 @@ xfs_ialloc_ag_alloc(
        xfs_perag_put(pag);
        agi->agi_newino = cpu_to_be32(newino);
 
-       /*
-        * Insert records describing the new inode chunk into the btrees.
-        */
-       error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen,
-                                XFS_BTNUM_INO);
-       if (error)
-               return error;
-
-       if (xfs_sb_version_hasfinobt(&args.mp->m_sb)) {
-               error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen,
-                                        XFS_BTNUM_FINO);
-               if (error)
-                       return error;
-       }
        /*
         * Log allocation group header fields
         */
@@ -645,7 +990,7 @@ xfs_ialloc_ag_select(
                 * if we fail allocation due to alignment issues then it is most
                 * likely a real ENOSPC condition.
                 */
-               ineed = mp->m_ialloc_blks;
+               ineed = mp->m_ialloc_min_blks;
                if (flags && ineed > 1)
                        ineed += xfs_ialloc_cluster_alignment(mp);
                longest = pag->pagf_longest;
@@ -731,6 +1076,27 @@ xfs_ialloc_get_rec(
        return 0;
 }
 
+/*
+ * Return the offset of the first free inode in the record. If the inode chunk
+ * is sparsely allocated, we convert the record holemask to inode granularity
+ * and mask off the unallocated regions from the inode free mask.
+ */
+STATIC int
+xfs_inobt_first_free_inode(
+       struct xfs_inobt_rec_incore     *rec)
+{
+       xfs_inofree_t                   realfree;
+
+       /* if there are no holes, return the first available offset */
+       if (!xfs_inobt_issparse(rec->ir_holemask))
+               return xfs_lowbit64(rec->ir_free);
+
+       realfree = xfs_inobt_irec_to_allocmask(rec);
+       realfree &= rec->ir_free;
+
+       return xfs_lowbit64(realfree);
+}
+
 /*
  * Allocate an inode using the inobt-only algorithm.
  */
@@ -961,7 +1327,7 @@ newino:
        }
 
 alloc_inode:
-       offset = xfs_lowbit64(rec.ir_free);
+       offset = xfs_inobt_first_free_inode(&rec);
        ASSERT(offset >= 0);
        ASSERT(offset < XFS_INODES_PER_CHUNK);
        ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
@@ -1210,7 +1576,7 @@ xfs_dialloc_ag(
        if (error)
                goto error_cur;
 
-       offset = xfs_lowbit64(rec.ir_free);
+       offset = xfs_inobt_first_free_inode(&rec);
        ASSERT(offset >= 0);
        ASSERT(offset < XFS_INODES_PER_CHUNK);
        ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
@@ -1439,6 +1805,83 @@ out_error:
        return error;
 }
 
+/*
+ * Free the blocks of an inode chunk. We must consider that the inode chunk
+ * might be sparse and only free the regions that are allocated as part of the
+ * chunk.
+ */
+STATIC void
+xfs_difree_inode_chunk(
+       struct xfs_mount                *mp,
+       xfs_agnumber_t                  agno,
+       struct xfs_inobt_rec_incore     *rec,
+       struct xfs_bmap_free            *flist)
+{
+       xfs_agblock_t   sagbno = XFS_AGINO_TO_AGBNO(mp, rec->ir_startino);
+       int             startidx, endidx;
+       int             nextbit;
+       xfs_agblock_t   agbno;
+       int             contigblk;
+       DECLARE_BITMAP(holemask, XFS_INOBT_HOLEMASK_BITS);
+
+       if (!xfs_inobt_issparse(rec->ir_holemask)) {
+               /* not sparse, calculate extent info directly */
+               xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, agno,
+                                 XFS_AGINO_TO_AGBNO(mp, rec->ir_startino)),
+                                 mp->m_ialloc_blks, flist, mp);
+               return;
+       }
+
+       /* holemask is only 16-bits (fits in an unsigned long) */
+       ASSERT(sizeof(rec->ir_holemask) <= sizeof(holemask[0]));
+       holemask[0] = rec->ir_holemask;
+
+       /*
+        * Find contiguous ranges of zeroes (i.e., allocated regions) in the
+        * holemask and convert the start/end index of each range to an extent.
+        * We start with the start and end index both pointing at the first 0 in
+        * the mask.
+        */
+       startidx = endidx = find_first_zero_bit(holemask,
+                                               XFS_INOBT_HOLEMASK_BITS);
+       nextbit = startidx + 1;
+       while (startidx < XFS_INOBT_HOLEMASK_BITS) {
+               nextbit = find_next_zero_bit(holemask, XFS_INOBT_HOLEMASK_BITS,
+                                            nextbit);
+               /*
+                * If the next zero bit is contiguous, update the end index of
+                * the current range and continue.
+                */
+               if (nextbit != XFS_INOBT_HOLEMASK_BITS &&
+                   nextbit == endidx + 1) {
+                       endidx = nextbit;
+                       goto next;
+               }
+
+               /*
+                * nextbit is not contiguous with the current end index. Convert
+                * the current start/end to an extent and add it to the free
+                * list.
+                */
+               agbno = sagbno + (startidx * XFS_INODES_PER_HOLEMASK_BIT) /
+                                 mp->m_sb.sb_inopblock;
+               contigblk = ((endidx - startidx + 1) *
+                            XFS_INODES_PER_HOLEMASK_BIT) /
+                           mp->m_sb.sb_inopblock;
+
+               ASSERT(agbno % mp->m_sb.sb_spino_align == 0);
+               ASSERT(contigblk % mp->m_sb.sb_spino_align == 0);
+               xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, agno, agbno), contigblk,
+                                 flist, mp);
+
+               /* reset range to current bit and carry on... */
+               startidx = endidx = nextbit;
+
+next:
+               nextbit++;
+       }
+}
+
 STATIC int
 xfs_difree_inobt(
        struct xfs_mount                *mp,
@@ -1446,8 +1889,7 @@ xfs_difree_inobt(
        struct xfs_buf                  *agbp,
        xfs_agino_t                     agino,
        struct xfs_bmap_free            *flist,
-       int                             *deleted,
-       xfs_ino_t                       *first_ino,
+       struct xfs_icluster             *xic,
        struct xfs_inobt_rec_incore     *orec)
 {
        struct xfs_agi                  *agi = XFS_BUF_TO_AGI(agbp);
@@ -1501,20 +1943,23 @@ xfs_difree_inobt(
        rec.ir_freecount++;
 
        /*
-        * When an inode cluster is free, it becomes eligible for removal
+        * When an inode chunk is free, it becomes eligible for removal. Don't
+        * remove the chunk if the block size is large enough for multiple inode
+        * chunks (that might not be free).
         */
        if (!(mp->m_flags & XFS_MOUNT_IKEEP) &&
-           (rec.ir_freecount == mp->m_ialloc_inos)) {
-
-               *deleted = 1;
-               *first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino);
+           rec.ir_free == XFS_INOBT_ALL_FREE &&
+           mp->m_sb.sb_inopblock <= XFS_INODES_PER_CHUNK) {
+               xic->deleted = 1;
+               xic->first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino);
+               xic->alloc = xfs_inobt_irec_to_allocmask(&rec);
 
                /*
                 * Remove the inode cluster from the AGI B+Tree, adjust the
                 * AGI and Superblock inode counts, and mark the disk space
                 * to be freed when the transaction is committed.
                 */
-               ilen = mp->m_ialloc_inos;
+               ilen = rec.ir_freecount;
                be32_add_cpu(&agi->agi_count, -ilen);
                be32_add_cpu(&agi->agi_freecount, -(ilen - 1));
                xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT);
@@ -1530,11 +1975,9 @@ xfs_difree_inobt(
                        goto error0;
                }
 
-               xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, agno,
-                                 XFS_AGINO_TO_AGBNO(mp, rec.ir_startino)),
-                                 mp->m_ialloc_blks, flist, mp);
+               xfs_difree_inode_chunk(mp, agno, &rec, flist);
        } else {
-               *deleted = 0;
+               xic->deleted = 0;
 
                error = xfs_inobt_update(cur, &rec);
                if (error) {
@@ -1599,7 +2042,9 @@ xfs_difree_finobt(
                 */
                XFS_WANT_CORRUPTED_GOTO(mp, ibtrec->ir_freecount == 1, error);
 
-               error = xfs_inobt_insert_rec(cur, ibtrec->ir_freecount,
+               error = xfs_inobt_insert_rec(cur, ibtrec->ir_holemask,
+                                            ibtrec->ir_count,
+                                            ibtrec->ir_freecount,
                                             ibtrec->ir_free, &i);
                if (error)
                        goto error;
@@ -1634,8 +2079,13 @@ xfs_difree_finobt(
         * free inode. Hence, if all of the inodes are free and we aren't
         * keeping inode chunks permanently on disk, remove the record.
         * Otherwise, update the record with the new information.
+        *
+        * Note that we currently can't free chunks when the block size is large
+        * enough for multiple chunks. Leave the finobt record to remain in sync
+        * with the inobt.
         */
-       if (rec.ir_freecount == mp->m_ialloc_inos &&
+       if (rec.ir_free == XFS_INOBT_ALL_FREE &&
+           mp->m_sb.sb_inopblock <= XFS_INODES_PER_CHUNK &&
            !(mp->m_flags & XFS_MOUNT_IKEEP)) {
                error = xfs_btree_delete(cur, &i);
                if (error)
@@ -1671,8 +2121,7 @@ xfs_difree(
        struct xfs_trans        *tp,            /* transaction pointer */
        xfs_ino_t               inode,          /* inode to be freed */
        struct xfs_bmap_free    *flist,         /* extents to free */
-       int                     *deleted,/* set if inode cluster was deleted */
-       xfs_ino_t               *first_ino)/* first inode in deleted cluster */
+       struct xfs_icluster     *xic)   /* cluster info if deleted */
 {
        /* REFERENCED */
        xfs_agblock_t           agbno;  /* block number containing inode */
@@ -1723,8 +2172,7 @@ xfs_difree(
        /*
         * Fix up the inode allocation btree.
         */
-       error = xfs_difree_inobt(mp, tp, agbp, agino, flist, deleted, first_ino,
-                                &rec);
+       error = xfs_difree_inobt(mp, tp, agbp, agino, flist, xic, &rec);
        if (error)
                goto error0;
 
index 100007d56449d0a872d78500bef7c9e75c76a745..6e450df2979bfc80a7983cff0dbd79124741aa19 100644 (file)
@@ -28,6 +28,13 @@ struct xfs_btree_cur;
 /* Move inodes in clusters of this size */
 #define        XFS_INODE_BIG_CLUSTER_SIZE      8192
 
+struct xfs_icluster {
+       bool            deleted;        /* record is deleted */
+       xfs_ino_t       first_ino;      /* first inode number */
+       uint64_t        alloc;          /* inode phys. allocation bitmap for
+                                        * sparse chunks */
+};
+
 /* Calculate and return the number of filesystem blocks per inode cluster */
 static inline int
 xfs_icluster_size_fsb(
@@ -44,8 +51,7 @@ xfs_icluster_size_fsb(
 static inline struct xfs_dinode *
 xfs_make_iptr(struct xfs_mount *mp, struct xfs_buf *b, int o)
 {
-       return (struct xfs_dinode *)
-               (xfs_buf_offset(b, o << (mp)->m_sb.sb_inodelog));
+       return xfs_buf_offset(b, o << (mp)->m_sb.sb_inodelog);
 }
 
 /*
@@ -90,8 +96,7 @@ xfs_difree(
        struct xfs_trans *tp,           /* transaction pointer */
        xfs_ino_t       inode,          /* inode to be freed */
        struct xfs_bmap_free *flist,    /* extents to free */
-       int             *deleted,       /* set if inode cluster was deleted */
-       xfs_ino_t       *first_ino);    /* first inode in deleted cluster */
+       struct xfs_icluster *ifree);    /* cluster info if deleted */
 
 /*
  * Return the location of the inode in imap, for mapping it into a buffer.
@@ -156,7 +161,7 @@ int xfs_inobt_get_rec(struct xfs_btree_cur *cur,
  * Inode chunk initialisation routine
  */
 int xfs_ialloc_inode_init(struct xfs_mount *mp, struct xfs_trans *tp,
-                         struct list_head *buffer_list,
+                         struct list_head *buffer_list, int icount,
                          xfs_agnumber_t agno, xfs_agblock_t agbno,
                          xfs_agblock_t length, unsigned int gen);
 
index 964c465ca69c85f405db72b5781f61d48942ab0b..674ad8f760be25ea8fd969b182eee8cee0af7900 100644 (file)
@@ -167,7 +167,16 @@ xfs_inobt_init_rec_from_cur(
        union xfs_btree_rec     *rec)
 {
        rec->inobt.ir_startino = cpu_to_be32(cur->bc_rec.i.ir_startino);
-       rec->inobt.ir_freecount = cpu_to_be32(cur->bc_rec.i.ir_freecount);
+       if (xfs_sb_version_hassparseinodes(&cur->bc_mp->m_sb)) {
+               rec->inobt.ir_u.sp.ir_holemask =
+                                       cpu_to_be16(cur->bc_rec.i.ir_holemask);
+               rec->inobt.ir_u.sp.ir_count = cur->bc_rec.i.ir_count;
+               rec->inobt.ir_u.sp.ir_freecount = cur->bc_rec.i.ir_freecount;
+       } else {
+               /* ir_holemask/ir_count not supported on-disk */
+               rec->inobt.ir_u.f.ir_freecount =
+                                       cpu_to_be32(cur->bc_rec.i.ir_freecount);
+       }
        rec->inobt.ir_free = cpu_to_be64(cur->bc_rec.i.ir_free);
 }
 
@@ -418,3 +427,85 @@ xfs_inobt_maxrecs(
                return blocklen / sizeof(xfs_inobt_rec_t);
        return blocklen / (sizeof(xfs_inobt_key_t) + sizeof(xfs_inobt_ptr_t));
 }
+
+/*
+ * Convert the inode record holemask to an inode allocation bitmap. The inode
+ * allocation bitmap is inode granularity and specifies whether an inode is
+ * physically allocated on disk (not whether the inode is considered allocated
+ * or free by the fs).
+ *
+ * A bit value of 1 means the inode is allocated, a value of 0 means it is free.
+ */
+uint64_t
+xfs_inobt_irec_to_allocmask(
+       struct xfs_inobt_rec_incore     *rec)
+{
+       uint64_t                        bitmap = 0;
+       uint64_t                        inodespbit;
+       int                             nextbit;
+       uint                            allocbitmap;
+
+       /*
+        * The holemask has 16-bits for a 64 inode record. Therefore each
+        * holemask bit represents multiple inodes. Create a mask of bits to set
+        * in the allocmask for each holemask bit.
+        */
+       inodespbit = (1 << XFS_INODES_PER_HOLEMASK_BIT) - 1;
+
+       /*
+        * Allocated inodes are represented by 0 bits in holemask. Invert the 0
+        * bits to 1 and convert to a uint so we can use xfs_next_bit(). Mask
+        * anything beyond the 16 holemask bits since this casts to a larger
+        * type.
+        */
+       allocbitmap = ~rec->ir_holemask & ((1 << XFS_INOBT_HOLEMASK_BITS) - 1);
+
+       /*
+        * allocbitmap is the inverted holemask so every set bit represents
+        * allocated inodes. To expand from 16-bit holemask granularity to
+        * 64-bit (e.g., bit-per-inode), set inodespbit bits in the target
+        * bitmap for every holemask bit.
+        */
+       nextbit = xfs_next_bit(&allocbitmap, 1, 0);
+       while (nextbit != -1) {
+               ASSERT(nextbit < (sizeof(rec->ir_holemask) * NBBY));
+
+               bitmap |= (inodespbit <<
+                          (nextbit * XFS_INODES_PER_HOLEMASK_BIT));
+
+               nextbit = xfs_next_bit(&allocbitmap, 1, nextbit + 1);
+       }
+
+       return bitmap;
+}
+
+#if defined(DEBUG) || defined(XFS_WARN)
+/*
+ * Verify that an in-core inode record has a valid inode count.
+ */
+int
+xfs_inobt_rec_check_count(
+       struct xfs_mount                *mp,
+       struct xfs_inobt_rec_incore     *rec)
+{
+       int                             inocount = 0;
+       int                             nextbit = 0;
+       uint64_t                        allocbmap;
+       int                             wordsz;
+
+       wordsz = sizeof(allocbmap) / sizeof(unsigned int);
+       allocbmap = xfs_inobt_irec_to_allocmask(rec);
+
+       nextbit = xfs_next_bit((uint *) &allocbmap, wordsz, nextbit);
+       while (nextbit != -1) {
+               inocount++;
+               nextbit = xfs_next_bit((uint *) &allocbmap, wordsz,
+                                      nextbit + 1);
+       }
+
+       if (inocount != rec->ir_count)
+               return -EFSCORRUPTED;
+
+       return 0;
+}
+#endif /* DEBUG */
index d7ebea72c2d0127c588c2e19b5168eebe364ff50..bd88453217ceca0466fbd07e409707ab7e9354b3 100644 (file)
@@ -62,4 +62,14 @@ extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_mount *,
                xfs_btnum_t);
 extern int xfs_inobt_maxrecs(struct xfs_mount *, int, int);
 
+/* ir_holemask to inode allocation bitmap conversion */
+uint64_t xfs_inobt_irec_to_allocmask(struct xfs_inobt_rec_incore *);
+
+#if defined(DEBUG) || defined(XFS_WARN)
+int xfs_inobt_rec_check_count(struct xfs_mount *,
+                             struct xfs_inobt_rec_incore *);
+#else
+#define xfs_inobt_rec_check_count(mp, rec)     0
+#endif /* DEBUG */
+
 #endif /* __XFS_IALLOC_BTREE_H__ */
index 002b6b3a19885057386d25bb0220dbd11f53ddfe..6526e7696184b75fd3206ba4d6c48e354a85721d 100644 (file)
@@ -46,8 +46,7 @@ xfs_inobp_check(
        j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog;
 
        for (i = 0; i < j; i++) {
-               dip = (xfs_dinode_t *)xfs_buf_offset(bp,
-                                       i * mp->m_sb.sb_inodesize);
+               dip = xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize);
                if (!dip->di_next_unlinked)  {
                        xfs_alert(mp,
        "Detected bogus zero next_unlinked field in inode %d buffer 0x%llx.",
@@ -86,8 +85,7 @@ xfs_inode_buf_verify(
                int             di_ok;
                xfs_dinode_t    *dip;
 
-               dip = (struct xfs_dinode *)xfs_buf_offset(bp,
-                                       (i << mp->m_sb.sb_inodelog));
+               dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog));
                di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
                            XFS_DINODE_GOOD_VERSION(dip->di_version);
                if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
@@ -186,7 +184,7 @@ xfs_imap_to_bp(
        }
 
        *bpp = bp;
-       *dipp = (struct xfs_dinode *)xfs_buf_offset(bp, imap->im_boffset);
+       *dipp = xfs_buf_offset(bp, imap->im_boffset);
        return 0;
 }
 
index dc4bfc5d88fccf221609cf2ac13c80370de07221..df9851c46b5c2e053926b9f223a318ba04ee6489 100644 (file)
@@ -174,6 +174,27 @@ xfs_mount_validate_sb(
                        return -EFSCORRUPTED;
        }
 
+       /*
+        * Full inode chunks must be aligned to inode chunk size when
+        * sparse inodes are enabled to support the sparse chunk
+        * allocation algorithm and prevent overlapping inode records.
+        */
+       if (xfs_sb_version_hassparseinodes(sbp)) {
+               uint32_t        align;
+
+               xfs_alert(mp,
+       "EXPERIMENTAL sparse inode feature enabled. Use at your own risk!");
+
+               align = XFS_INODES_PER_CHUNK * sbp->sb_inodesize
+                               >> sbp->sb_blocklog;
+               if (sbp->sb_inoalignmt != align) {
+                       xfs_warn(mp,
+"Inode block alignment (%u) must match chunk size (%u) for sparse inodes.",
+                                sbp->sb_inoalignmt, align);
+                       return -EINVAL;
+               }
+       }
+
        if (unlikely(
            sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) {
                xfs_warn(mp,
@@ -374,7 +395,7 @@ __xfs_sb_from_disk(
                                be32_to_cpu(from->sb_features_log_incompat);
        /* crc is only used on disk, not in memory; just init to 0 here. */
        to->sb_crc = 0;
-       to->sb_pad = 0;
+       to->sb_spino_align = be32_to_cpu(from->sb_spino_align);
        to->sb_pquotino = be64_to_cpu(from->sb_pquotino);
        to->sb_lsn = be64_to_cpu(from->sb_lsn);
        /* Convert on-disk flags to in-memory flags? */
@@ -516,7 +537,7 @@ xfs_sb_to_disk(
                                cpu_to_be32(from->sb_features_incompat);
                to->sb_features_log_incompat =
                                cpu_to_be32(from->sb_features_log_incompat);
-               to->sb_pad = 0;
+               to->sb_spino_align = cpu_to_be32(from->sb_spino_align);
                to->sb_lsn = cpu_to_be64(from->sb_lsn);
        }
 }
@@ -689,6 +710,11 @@ xfs_sb_mount_common(
        mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK,
                                        sbp->sb_inopblock);
        mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog;
+
+       if (sbp->sb_spino_align)
+               mp->m_ialloc_min_blks = sbp->sb_spino_align;
+       else
+               mp->m_ialloc_min_blks = mp->m_ialloc_blks;
 }
 
 /*
@@ -792,12 +818,12 @@ xfs_sync_sb(
        tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_CHANGE, KM_SLEEP);
        error = xfs_trans_reserve(tp, &M_RES(mp)->tr_sb, 0, 0);
        if (error) {
-               xfs_trans_cancel(tp, 0);
+               xfs_trans_cancel(tp);
                return error;
        }
 
        xfs_log_sb(tp);
        if (wait)
                xfs_trans_set_sync(tp);
-       return xfs_trans_commit(tp, 0);
+       return xfs_trans_commit(tp);
 }
index 8dda4b321343ba6bf28ebc98516225b1e677dc6d..5be529707903fb42bc4ebccc176e57b7a3470d96 100644 (file)
@@ -181,12 +181,6 @@ int        xfs_log_calc_minimum_size(struct xfs_mount *);
 #define XFS_TRANS_RESERVE      0x20    /* OK to use reserved data blocks */
 #define XFS_TRANS_FREEZE_PROT  0x40    /* Transaction has elevated writer
                                           count in superblock */
-/*
- * Values for call flags parameter.
- */
-#define        XFS_TRANS_RELEASE_LOG_RES       0x4
-#define        XFS_TRANS_ABORT                 0x8
-
 /*
  * Field values for xfs_trans_mod_sb.
  */
index 2d5bdfce6d8fd2627fcc46603eb83faed12780e9..797815012c0e31fe711132b3c65aecd5591f7e38 100644 (file)
@@ -73,9 +73,9 @@ struct xfs_trans_resv {
  * 2 trees * (2 blocks/level * max depth - 1) * block size
  */
 #define        XFS_ALLOCFREE_LOG_RES(mp,nx) \
-       ((nx) * (2 * XFS_FSB_TO_B((mp), 2 * XFS_AG_MAXLEVELS(mp) - 1)))
+       ((nx) * (2 * XFS_FSB_TO_B((mp), 2 * (mp)->m_ag_maxlevels - 1)))
 #define        XFS_ALLOCFREE_LOG_COUNT(mp,nx) \
-       ((nx) * (2 * (2 * XFS_AG_MAXLEVELS(mp) - 1)))
+       ((nx) * (2 * (2 * (mp)->m_ag_maxlevels - 1)))
 
 /*
  * Per-directory log reservation for any directory change.
index bf9c4579334d500fea6c917d2a149ba05dead7fb..41e0428d8175a2ab7ea4be8d7f67ce932a7e3a3f 100644 (file)
@@ -67,7 +67,7 @@
 #define        XFS_DIOSTRAT_SPACE_RES(mp, v)   \
        (XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK) + (v))
 #define        XFS_GROWFS_SPACE_RES(mp)        \
-       (2 * XFS_AG_MAXLEVELS(mp))
+       (2 * (mp)->m_ag_maxlevels)
 #define        XFS_GROWFSRT_SPACE_RES(mp,b)    \
        ((b) + XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK))
 #define        XFS_LINK_SPACE_RES(mp,nl)       \
index e5099f26803285af3aa774a897635264911b1458..3859f5e27a4dc209be159659f33d777931d31cea 100644 (file)
@@ -109,7 +109,7 @@ xfs_setfilesize_trans_alloc(
 
        error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0);
        if (error) {
-               xfs_trans_cancel(tp, 0);
+               xfs_trans_cancel(tp);
                return error;
        }
 
@@ -145,7 +145,7 @@ xfs_setfilesize(
        isize = xfs_new_eof(ip, offset + size);
        if (!isize) {
                xfs_iunlock(ip, XFS_ILOCK_EXCL);
-               xfs_trans_cancel(tp, 0);
+               xfs_trans_cancel(tp);
                return 0;
        }
 
@@ -155,7 +155,7 @@ xfs_setfilesize(
        xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 
-       return xfs_trans_commit(tp, 0);
+       return xfs_trans_commit(tp);
 }
 
 STATIC int
@@ -1348,7 +1348,7 @@ __xfs_get_blocks(
        sector_t                iblock,
        struct buffer_head      *bh_result,
        int                     create,
-       int                     direct)
+       bool                    direct)
 {
        struct xfs_inode        *ip = XFS_I(inode);
        struct xfs_mount        *mp = ip->i_mount;
@@ -1413,6 +1413,7 @@ __xfs_get_blocks(
                        if (error)
                                return error;
                        new = 1;
+
                } else {
                        /*
                         * Delalloc reservations do not require a transaction,
@@ -1507,49 +1508,29 @@ xfs_get_blocks(
        struct buffer_head      *bh_result,
        int                     create)
 {
-       return __xfs_get_blocks(inode, iblock, bh_result, create, 0);
+       return __xfs_get_blocks(inode, iblock, bh_result, create, false);
 }
 
-STATIC int
+int
 xfs_get_blocks_direct(
        struct inode            *inode,
        sector_t                iblock,
        struct buffer_head      *bh_result,
        int                     create)
 {
-       return __xfs_get_blocks(inode, iblock, bh_result, create, 1);
+       return __xfs_get_blocks(inode, iblock, bh_result, create, true);
 }
 
-/*
- * Complete a direct I/O write request.
- *
- * The ioend structure is passed from __xfs_get_blocks() to tell us what to do.
- * If no ioend exists (i.e. @private == NULL) then the write IO is an overwrite
- * wholly within the EOF and so there is nothing for us to do. Note that in this
- * case the completion can be called in interrupt context, whereas if we have an
- * ioend we will always be called in task context (i.e. from a workqueue).
- */
-STATIC void
-xfs_end_io_direct_write(
-       struct kiocb            *iocb,
+static void
+__xfs_end_io_direct_write(
+       struct inode            *inode,
+       struct xfs_ioend        *ioend,
        loff_t                  offset,
-       ssize_t                 size,
-       void                    *private)
+       ssize_t                 size)
 {
-       struct inode            *inode = file_inode(iocb->ki_filp);
-       struct xfs_inode        *ip = XFS_I(inode);
-       struct xfs_mount        *mp = ip->i_mount;
-       struct xfs_ioend        *ioend = private;
-
-       trace_xfs_gbmap_direct_endio(ip, offset, size,
-                                    ioend ? ioend->io_type : 0, NULL);
+       struct xfs_mount        *mp = XFS_I(inode)->i_mount;
 
-       if (!ioend) {
-               ASSERT(offset + size <= i_size_read(inode));
-               return;
-       }
-
-       if (XFS_FORCED_SHUTDOWN(mp))
+       if (XFS_FORCED_SHUTDOWN(mp) || ioend->io_error)
                goto out_end_io;
 
        /*
@@ -1586,10 +1567,10 @@ xfs_end_io_direct_write(
         * here can result in EOF moving backwards and Bad Things Happen when
         * that occurs.
         */
-       spin_lock(&ip->i_flags_lock);
+       spin_lock(&XFS_I(inode)->i_flags_lock);
        if (offset + size > i_size_read(inode))
                i_size_write(inode, offset + size);
-       spin_unlock(&ip->i_flags_lock);
+       spin_unlock(&XFS_I(inode)->i_flags_lock);
 
        /*
         * If we are doing an append IO that needs to update the EOF on disk,
@@ -1606,6 +1587,98 @@ out_end_io:
        return;
 }
 
+/*
+ * Complete a direct I/O write request.
+ *
+ * The ioend structure is passed from __xfs_get_blocks() to tell us what to do.
+ * If no ioend exists (i.e. @private == NULL) then the write IO is an overwrite
+ * wholly within the EOF and so there is nothing for us to do. Note that in this
+ * case the completion can be called in interrupt context, whereas if we have an
+ * ioend we will always be called in task context (i.e. from a workqueue).
+ */
+STATIC void
+xfs_end_io_direct_write(
+       struct kiocb            *iocb,
+       loff_t                  offset,
+       ssize_t                 size,
+       void                    *private)
+{
+       struct inode            *inode = file_inode(iocb->ki_filp);
+       struct xfs_ioend        *ioend = private;
+
+       trace_xfs_gbmap_direct_endio(XFS_I(inode), offset, size,
+                                    ioend ? ioend->io_type : 0, NULL);
+
+       if (!ioend) {
+               ASSERT(offset + size <= i_size_read(inode));
+               return;
+       }
+
+       __xfs_end_io_direct_write(inode, ioend, offset, size);
+}
+
+/*
+ * For DAX we need a mapping buffer callback for unwritten extent conversion
+ * when page faults allocate blocks and then zero them. Note that in this
+ * case the mapping indicated by the ioend may extend beyond EOF. We most
+ * definitely do not want to extend EOF here, so we trim back the ioend size to
+ * EOF.
+ */
+#ifdef CONFIG_FS_DAX
+void
+xfs_end_io_dax_write(
+       struct buffer_head      *bh,
+       int                     uptodate)
+{
+       struct xfs_ioend        *ioend = bh->b_private;
+       struct inode            *inode = ioend->io_inode;
+       ssize_t                 size = ioend->io_size;
+
+       ASSERT(IS_DAX(ioend->io_inode));
+
+       /* if there was an error zeroing, then don't convert it */
+       if (!uptodate)
+               ioend->io_error = -EIO;
+
+       /*
+        * Trim update to EOF, so we don't extend EOF during unwritten extent
+        * conversion of partial EOF blocks.
+        */
+       spin_lock(&XFS_I(inode)->i_flags_lock);
+       if (ioend->io_offset + size > i_size_read(inode))
+               size = i_size_read(inode) - ioend->io_offset;
+       spin_unlock(&XFS_I(inode)->i_flags_lock);
+
+       __xfs_end_io_direct_write(inode, ioend, ioend->io_offset, size);
+
+}
+#else
+void xfs_end_io_dax_write(struct buffer_head *bh, int uptodate) { }
+#endif
+
+static inline ssize_t
+xfs_vm_do_dio(
+       struct inode            *inode,
+       struct kiocb            *iocb,
+       struct iov_iter         *iter,
+       loff_t                  offset,
+       void                    (*endio)(struct kiocb   *iocb,
+                                        loff_t         offset,
+                                        ssize_t        size,
+                                        void           *private),
+       int                     flags)
+{
+       struct block_device     *bdev;
+
+       if (IS_DAX(inode))
+               return dax_do_io(iocb, inode, iter, offset,
+                                xfs_get_blocks_direct, endio, 0);
+
+       bdev = xfs_find_bdev_for_inode(inode);
+       return  __blockdev_direct_IO(iocb, inode, bdev, iter, offset,
+                                    xfs_get_blocks_direct, endio, NULL, flags);
+}
+
 STATIC ssize_t
 xfs_vm_direct_IO(
        struct kiocb            *iocb,
@@ -1613,16 +1686,11 @@ xfs_vm_direct_IO(
        loff_t                  offset)
 {
        struct inode            *inode = iocb->ki_filp->f_mapping->host;
-       struct block_device     *bdev = xfs_find_bdev_for_inode(inode);
 
-       if (iov_iter_rw(iter) == WRITE) {
-               return __blockdev_direct_IO(iocb, inode, bdev, iter, offset,
-                                           xfs_get_blocks_direct,
-                                           xfs_end_io_direct_write, NULL,
-                                           DIO_ASYNC_EXTEND);
-       }
-       return __blockdev_direct_IO(iocb, inode, bdev, iter, offset,
-                                   xfs_get_blocks_direct, NULL, NULL, 0);
+       if (iov_iter_rw(iter) == WRITE)
+               return xfs_vm_do_dio(inode, iocb, iter, offset,
+                                    xfs_end_io_direct_write, DIO_ASYNC_EXTEND);
+       return xfs_vm_do_dio(inode, iocb, iter, offset, NULL, 0);
 }
 
 /*
index ac644e0137a49f021ba3c3d840e4b91017674095..86afd1ac7895f8d225fa2da2285c03f7014666e3 100644 (file)
@@ -53,7 +53,12 @@ typedef struct xfs_ioend {
 } xfs_ioend_t;
 
 extern const struct address_space_operations xfs_address_space_operations;
-extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int);
+
+int    xfs_get_blocks(struct inode *inode, sector_t offset,
+                      struct buffer_head *map_bh, int create);
+int    xfs_get_blocks_direct(struct inode *inode, sector_t offset,
+                             struct buffer_head *map_bh, int create);
+void   xfs_end_io_dax_write(struct buffer_head *bh, int uptodate);
 
 extern void xfs_count_page_state(struct page *, int *, int *);
 
index 3fbf167cfb4cddfcb42a57ca7d613096d5c97fe0..2bb959ada45bb5444830373a102fc4d7aced7273 100644 (file)
@@ -394,7 +394,6 @@ xfs_attr_inactive(
 {
        struct xfs_trans        *trans;
        struct xfs_mount        *mp;
-       int                     cancel_flags = 0;
        int                     lock_mode = XFS_ILOCK_SHARED;
        int                     error = 0;
 
@@ -423,7 +422,6 @@ xfs_attr_inactive(
                goto out_cancel;
 
        lock_mode = XFS_ILOCK_EXCL;
-       cancel_flags = XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT;
        xfs_ilock(dp, lock_mode);
 
        if (!XFS_IFORK_Q(dp))
@@ -435,8 +433,14 @@ xfs_attr_inactive(
         */
        xfs_trans_ijoin(trans, dp, 0);
 
-       /* invalidate and truncate the attribute fork extents */
-       if (dp->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) {
+       /*
+        * Invalidate and truncate the attribute fork extents. Make sure the
+        * fork actually has attributes as otherwise the invalidation has no
+        * blocks to read and returns an error. In this case, just do the fork
+        * removal below.
+        */
+       if (xfs_inode_hasattr(dp) &&
+           dp->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) {
                error = xfs_attr3_root_inactive(&trans, dp);
                if (error)
                        goto out_cancel;
@@ -449,12 +453,12 @@ xfs_attr_inactive(
        /* Reset the attribute fork - this also destroys the in-core fork */
        xfs_attr_fork_remove(dp, trans);
 
-       error = xfs_trans_commit(trans, XFS_TRANS_RELEASE_LOG_RES);
+       error = xfs_trans_commit(trans);
        xfs_iunlock(dp, lock_mode);
        return error;
 
 out_cancel:
-       xfs_trans_cancel(trans, cancel_flags);
+       xfs_trans_cancel(trans);
 out_destroy_fork:
        /* kill the in-core attr fork before we drop the inode lock */
        if (dp->i_afp)
index a52bbd3abc7df3ac7fa07a12b5dc9ec336d9ab49..0f34886cf7269b1cd67eadbc372655372253981b 100644 (file)
@@ -75,28 +75,20 @@ xfs_bmap_finish(
        xfs_efi_log_item_t      *efi;           /* extent free intention */
        int                     error;          /* error return value */
        xfs_bmap_free_item_t    *free;          /* free extent item */
-       struct xfs_trans_res    tres;           /* new log reservation */
        xfs_mount_t             *mp;            /* filesystem mount structure */
        xfs_bmap_free_item_t    *next;          /* next item on free list */
-       xfs_trans_t             *ntp;           /* new transaction pointer */
 
        ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
        if (flist->xbf_count == 0) {
                *committed = 0;
                return 0;
        }
-       ntp = *tp;
-       efi = xfs_trans_get_efi(ntp, flist->xbf_count);
+       efi = xfs_trans_get_efi(*tp, flist->xbf_count);
        for (free = flist->xbf_first; free; free = free->xbfi_next)
-               xfs_trans_log_efi_extent(ntp, efi, free->xbfi_startblock,
+               xfs_trans_log_efi_extent(*tp, efi, free->xbfi_startblock,
                        free->xbfi_blockcount);
 
-       tres.tr_logres = ntp->t_log_res;
-       tres.tr_logcount = ntp->t_log_count;
-       tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
-       ntp = xfs_trans_dup(*tp);
-       error = xfs_trans_commit(*tp, 0);
-       *tp = ntp;
+       error = xfs_trans_roll(tp, NULL);
        *committed = 1;
        /*
         * We have a new transaction, so we should return committed=1,
@@ -105,19 +97,10 @@ xfs_bmap_finish(
        if (error)
                return error;
 
-       /*
-        * transaction commit worked ok so we can drop the extra ticket
-        * reference that we gained in xfs_trans_dup()
-        */
-       xfs_log_ticket_put(ntp->t_ticket);
-
-       error = xfs_trans_reserve(ntp, &tres, 0, 0);
-       if (error)
-               return error;
-       efd = xfs_trans_get_efd(ntp, efi, flist->xbf_count);
+       efd = xfs_trans_get_efd(*tp, efi, flist->xbf_count);
        for (free = flist->xbf_first; free != NULL; free = next) {
                next = free->xbfi_next;
-               if ((error = xfs_free_extent(ntp, free->xbfi_startblock,
+               if ((error = xfs_free_extent(*tp, free->xbfi_startblock,
                                free->xbfi_blockcount))) {
                        /*
                         * The bmap free list will be cleaned up at a
@@ -127,7 +110,7 @@ xfs_bmap_finish(
                         * happens, since this transaction may not be
                         * dirty yet.
                         */
-                       mp = ntp->t_mountp;
+                       mp = (*tp)->t_mountp;
                        if (!XFS_FORCED_SHUTDOWN(mp))
                                xfs_force_shutdown(mp,
                                                   (error == -EFSCORRUPTED) ?
@@ -135,7 +118,7 @@ xfs_bmap_finish(
                                                   SHUTDOWN_META_IO_ERROR);
                        return error;
                }
-               xfs_trans_log_efd_extent(ntp, efd, free->xbfi_startblock,
+               xfs_trans_log_efd_extent(*tp, efd, free->xbfi_startblock,
                        free->xbfi_blockcount);
                xfs_bmap_del_free(flist, NULL, free);
        }
@@ -878,7 +861,7 @@ xfs_free_eofblocks(
 
                if (need_iolock) {
                        if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
-                               xfs_trans_cancel(tp, 0);
+                               xfs_trans_cancel(tp);
                                return -EAGAIN;
                        }
                }
@@ -886,7 +869,7 @@ xfs_free_eofblocks(
                error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
                if (error) {
                        ASSERT(XFS_FORCED_SHUTDOWN(mp));
-                       xfs_trans_cancel(tp, 0);
+                       xfs_trans_cancel(tp);
                        if (need_iolock)
                                xfs_iunlock(ip, XFS_IOLOCK_EXCL);
                        return error;
@@ -908,12 +891,9 @@ xfs_free_eofblocks(
                         * If we get an error at this point we simply don't
                         * bother truncating the file.
                         */
-                       xfs_trans_cancel(tp,
-                                        (XFS_TRANS_RELEASE_LOG_RES |
-                                         XFS_TRANS_ABORT));
+                       xfs_trans_cancel(tp);
                } else {
-                       error = xfs_trans_commit(tp,
-                                               XFS_TRANS_RELEASE_LOG_RES);
+                       error = xfs_trans_commit(tp);
                        if (!error)
                                xfs_inode_clear_eofblocks_tag(ip);
                }
@@ -1026,7 +1006,7 @@ xfs_alloc_file_space(
                         * Free the transaction structure.
                         */
                        ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp));
-                       xfs_trans_cancel(tp, 0);
+                       xfs_trans_cancel(tp);
                        break;
                }
                xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -1053,7 +1033,7 @@ xfs_alloc_file_space(
                        goto error0;
                }
 
-               error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+               error = xfs_trans_commit(tp);
                xfs_iunlock(ip, XFS_ILOCK_EXCL);
                if (error) {
                        break;
@@ -1077,7 +1057,7 @@ error0:   /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
        xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag);
 
 error1:        /* Just cancel transaction */
-       xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
+       xfs_trans_cancel(tp);
        xfs_iunlock(ip, XFS_ILOCK_EXCL);
        return error;
 }
@@ -1133,14 +1113,29 @@ xfs_zero_remaining_bytes(
                        break;
                ASSERT(imap.br_blockcount >= 1);
                ASSERT(imap.br_startoff == offset_fsb);
+               ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
+
+               if (imap.br_startblock == HOLESTARTBLOCK ||
+                   imap.br_state == XFS_EXT_UNWRITTEN) {
+                       /* skip the entire extent */
+                       lastoffset = XFS_FSB_TO_B(mp, imap.br_startoff +
+                                                     imap.br_blockcount) - 1;
+                       continue;
+               }
+
                lastoffset = XFS_FSB_TO_B(mp, imap.br_startoff + 1) - 1;
                if (lastoffset > endoff)
                        lastoffset = endoff;
-               if (imap.br_startblock == HOLESTARTBLOCK)
-                       continue;
-               ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
-               if (imap.br_state == XFS_EXT_UNWRITTEN)
+
+               /* DAX can just zero the backing device directly */
+               if (IS_DAX(VFS_I(ip))) {
+                       error = dax_zero_page_range(VFS_I(ip), offset,
+                                                   lastoffset - offset + 1,
+                                                   xfs_get_blocks_direct);
+                       if (error)
+                               return error;
                        continue;
+               }
 
                error = xfs_buf_read_uncached(XFS_IS_REALTIME_INODE(ip) ?
                                mp->m_rtdev_targp : mp->m_ddev_targp,
@@ -1289,7 +1284,7 @@ xfs_free_file_space(
                         * Free the transaction structure.
                         */
                        ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp));
-                       xfs_trans_cancel(tp, 0);
+                       xfs_trans_cancel(tp);
                        break;
                }
                xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -1320,7 +1315,7 @@ xfs_free_file_space(
                        goto error0;
                }
 
-               error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+               error = xfs_trans_commit(tp);
                xfs_iunlock(ip, XFS_ILOCK_EXCL);
        }
 
@@ -1330,7 +1325,7 @@ xfs_free_file_space(
  error0:
        xfs_bmap_cancel(&free_list);
  error1:
-       xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
+       xfs_trans_cancel(tp);
        xfs_iunlock(ip, XFS_ILOCK_EXCL);
        goto out;
 }
@@ -1462,7 +1457,7 @@ xfs_shift_file_space(
                error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
                                XFS_DIOSTRAT_SPACE_RES(mp, 0), 0);
                if (error) {
-                       xfs_trans_cancel(tp, 0);
+                       xfs_trans_cancel(tp);
                        break;
                }
 
@@ -1492,13 +1487,13 @@ xfs_shift_file_space(
                if (error)
                        goto out;
 
-               error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+               error = xfs_trans_commit(tp);
        }
 
        return error;
 
 out:
-       xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
+       xfs_trans_cancel(tp);
        return error;
 }
 
@@ -1718,7 +1713,7 @@ xfs_swap_extents(
        tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT);
        error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
        if (error) {
-               xfs_trans_cancel(tp, 0);
+               xfs_trans_cancel(tp);
                goto out_unlock;
        }
 
@@ -1901,7 +1896,7 @@ xfs_swap_extents(
        if (mp->m_flags & XFS_MOUNT_WSYNC)
                xfs_trans_set_sync(tp);
 
-       error = xfs_trans_commit(tp, 0);
+       error = xfs_trans_commit(tp);
 
        trace_xfs_swap_extent_after(ip, 0);
        trace_xfs_swap_extent_after(tip, 1);
@@ -1915,6 +1910,6 @@ out_unlock:
        goto out;
 
 out_trans_cancel:
-       xfs_trans_cancel(tp, 0);
+       xfs_trans_cancel(tp);
        goto out;
 }
index 1790b00bea7a7b2f1d4b86b733b3e652de15b20d..a4b7d92e946c1e827355e6197538bc854514e759 100644 (file)
@@ -1419,9 +1419,9 @@ xfs_buf_submit_wait(
        return error;
 }
 
-xfs_caddr_t
+void *
 xfs_buf_offset(
-       xfs_buf_t               *bp,
+       struct xfs_buf          *bp,
        size_t                  offset)
 {
        struct page             *page;
@@ -1431,7 +1431,7 @@ xfs_buf_offset(
 
        offset += bp->b_offset;
        page = bp->b_pages[offset >> PAGE_SHIFT];
-       return (xfs_caddr_t)page_address(page) + (offset & (PAGE_SIZE-1));
+       return page_address(page) + (offset & (PAGE_SIZE-1));
 }
 
 /*
index 75ff5d5a7d2ed62bc3aab9565416de4756c43cd0..331c1ccf826478732aeb18bd8ee59103100e5597 100644 (file)
@@ -299,7 +299,7 @@ extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
            xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
 
 /* Buffer Utility Routines */
-extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t);
+extern void *xfs_buf_offset(struct xfs_buf *, size_t);
 
 /* Delayed Write Buffer Routines */
 extern bool xfs_buf_delwri_queue(struct xfs_buf *, struct list_head *);
index 02c01bbbc7899410e9d23f423c51086d23afa940..4143dc75dca4b22b15003ebd1bce5b3561a5deb8 100644 (file)
@@ -568,8 +568,6 @@ xfs_qm_dqread(
        struct xfs_buf          *bp;
        struct xfs_trans        *tp = NULL;
        int                     error;
-       int                     cancelflags = 0;
-
 
        dqp = kmem_zone_zalloc(xfs_qm_dqzone, KM_SLEEP);
 
@@ -617,7 +615,6 @@ xfs_qm_dqread(
                                          XFS_QM_DQALLOC_SPACE_RES(mp), 0);
                if (error)
                        goto error1;
-               cancelflags = XFS_TRANS_RELEASE_LOG_RES;
        }
 
        /*
@@ -632,7 +629,6 @@ xfs_qm_dqread(
                 * allocate (ENOENT).
                 */
                trace_xfs_dqread_fail(dqp);
-               cancelflags |= XFS_TRANS_ABORT;
                goto error1;
        }
 
@@ -670,7 +666,7 @@ xfs_qm_dqread(
        xfs_trans_brelse(tp, bp);
 
        if (tp) {
-               error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+               error = xfs_trans_commit(tp);
                if (error)
                        goto error0;
        }
@@ -680,7 +676,7 @@ xfs_qm_dqread(
 
 error1:
        if (tp)
-               xfs_trans_cancel(tp, cancelflags);
+               xfs_trans_cancel(tp);
 error0:
        xfs_qm_dqdestroy(dqp);
        *O_dqpp = NULL;
index 338e50bbfd1ec8bc8345fbbd84cdc56b82d5f43a..74d0e5966ebca70f2ba489532b9ebfaae693c288 100644 (file)
@@ -127,7 +127,7 @@ xfs_error_report(
        struct xfs_mount        *mp,
        const char              *filename,
        int                     linenum,
-       inst_t                  *ra)
+       void                    *ra)
 {
        if (level <= xfs_error_level) {
                xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT,
@@ -146,7 +146,7 @@ xfs_corruption_error(
        void                    *p,
        const char              *filename,
        int                     linenum,
-       inst_t                  *ra)
+       void                    *ra)
 {
        if (level <= xfs_error_level)
                xfs_hex_dump(p, 64);
index c0394ed126fcc891df140fb6536b94ebdb520d01..4ed3042a0f1602945c884c60b7d3f7bb6c687a20 100644 (file)
 struct xfs_mount;
 
 extern void xfs_error_report(const char *tag, int level, struct xfs_mount *mp,
-                       const char *filename, int linenum, inst_t *ra);
+                       const char *filename, int linenum, void *ra);
 extern void xfs_corruption_error(const char *tag, int level,
                        struct xfs_mount *mp, void *p, const char *filename,
-                       int linenum, inst_t *ra);
+                       int linenum, void *ra);
 extern void xfs_verifier_error(struct xfs_buf *bp);
 
 #define        XFS_ERROR_REPORT(e, lvl, mp)    \
index cb7fe64cdbfa0d2cb41c4389ab1b03005c0bbe36..adc8f8fdd145ae4c0facfb377b177fdf3a59baeb 100644 (file)
@@ -239,7 +239,7 @@ xfs_efi_init(
 
        xfs_log_item_init(mp, &efip->efi_item, XFS_LI_EFI, &xfs_efi_item_ops);
        efip->efi_format.efi_nextents = nextents;
-       efip->efi_format.efi_id = (__psint_t)(void*)efip;
+       efip->efi_format.efi_id = (uintptr_t)(void *)efip;
        atomic_set(&efip->efi_next_extent, 0);
        atomic_set(&efip->efi_refcount, 2);
 
index 7c62fca53e2fc36b5c61f37a829e6532b7e210d2..874507de3485b818e94bfcd0348f79e9747fdce6 100644 (file)
@@ -80,14 +80,15 @@ xfs_rw_ilock_demote(
 }
 
 /*
- *     xfs_iozero
+ * xfs_iozero clears the specified range supplied via the page cache (except in
+ * the DAX case). Writes through the page cache will allocate blocks over holes,
+ * though the callers usually map the holes first and avoid them. If a block is
+ * not completely zeroed, then it will be read from disk before being partially
+ * zeroed.
  *
- *     xfs_iozero clears the specified range of buffer supplied,
- *     and marks all the affected blocks as valid and modified.  If
- *     an affected block is not allocated, it will be allocated.  If
- *     an affected block is not completely overwritten, and is not
- *     valid before the operation, it will be read from disk before
- *     being partially zeroed.
+ * In the DAX case, we can just directly write to the underlying pages. This
+ * will not allocate blocks, but will avoid holes and unwritten extents and so
+ * not do unnecessary work.
  */
 int
 xfs_iozero(
@@ -97,7 +98,8 @@ xfs_iozero(
 {
        struct page             *page;
        struct address_space    *mapping;
-       int                     status;
+       int                     status = 0;
+
 
        mapping = VFS_I(ip)->i_mapping;
        do {
@@ -109,20 +111,27 @@ xfs_iozero(
                if (bytes > count)
                        bytes = count;
 
-               status = pagecache_write_begin(NULL, mapping, pos, bytes,
-                                       AOP_FLAG_UNINTERRUPTIBLE,
-                                       &page, &fsdata);
-               if (status)
-                       break;
+               if (IS_DAX(VFS_I(ip))) {
+                       status = dax_zero_page_range(VFS_I(ip), pos, bytes,
+                                                    xfs_get_blocks_direct);
+                       if (status)
+                               break;
+               } else {
+                       status = pagecache_write_begin(NULL, mapping, pos, bytes,
+                                               AOP_FLAG_UNINTERRUPTIBLE,
+                                               &page, &fsdata);
+                       if (status)
+                               break;
 
-               zero_user(page, offset, bytes);
+                       zero_user(page, offset, bytes);
 
-               status = pagecache_write_end(NULL, mapping, pos, bytes, bytes,
-                                       page, fsdata);
-               WARN_ON(status <= 0); /* can't return less than zero! */
+                       status = pagecache_write_end(NULL, mapping, pos, bytes,
+                                               bytes, page, fsdata);
+                       WARN_ON(status <= 0); /* can't return less than zero! */
+                       status = 0;
+               }
                pos += bytes;
                count -= bytes;
-               status = 0;
        } while (count);
 
        return status;
@@ -139,7 +148,7 @@ xfs_update_prealloc_flags(
        tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_WRITEID);
        error = xfs_trans_reserve(tp, &M_RES(ip->i_mount)->tr_writeid, 0, 0);
        if (error) {
-               xfs_trans_cancel(tp, 0);
+               xfs_trans_cancel(tp);
                return error;
        }
 
@@ -161,7 +170,7 @@ xfs_update_prealloc_flags(
        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
        if (flags & XFS_PREALLOC_SYNC)
                xfs_trans_set_sync(tp);
-       return xfs_trans_commit(tp, 0);
+       return xfs_trans_commit(tp);
 }
 
 /*
@@ -285,7 +294,7 @@ xfs_file_read_iter(
        if (file->f_mode & FMODE_NOCMTIME)
                ioflags |= XFS_IO_INVIS;
 
-       if (unlikely(ioflags & XFS_IO_ISDIRECT)) {
+       if ((ioflags & XFS_IO_ISDIRECT) && !IS_DAX(inode)) {
                xfs_buftarg_t   *target =
                        XFS_IS_REALTIME_INODE(ip) ?
                                mp->m_rtdev_targp : mp->m_ddev_targp;
@@ -379,7 +388,11 @@ xfs_file_splice_read(
 
        trace_xfs_file_splice_read(ip, count, *ppos, ioflags);
 
-       ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
+       /* for dax, we need to avoid the page cache */
+       if (IS_DAX(VFS_I(ip)))
+               ret = default_file_splice_read(infilp, ppos, pipe, count, flags);
+       else
+               ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
        if (ret > 0)
                XFS_STATS_ADD(xs_read_bytes, ret);
 
@@ -673,7 +686,7 @@ xfs_file_dio_aio_write(
                                        mp->m_rtdev_targp : mp->m_ddev_targp;
 
        /* DIO must be aligned to device logical sector size */
-       if ((pos | count) & target->bt_logical_sectormask)
+       if (!IS_DAX(inode) && ((pos | count) & target->bt_logical_sectormask))
                return -EINVAL;
 
        /* "unaligned" here means not aligned to a filesystem block */
@@ -759,8 +772,11 @@ xfs_file_dio_aio_write(
 out:
        xfs_rw_iunlock(ip, iolock);
 
-       /* No fallback to buffered IO on errors for XFS. */
-       ASSERT(ret < 0 || ret == count);
+       /*
+        * No fallback to buffered IO on errors for XFS. DAX can result in
+        * partial writes, but direct IO will either complete fully or fail.
+        */
+       ASSERT(ret < 0 || ret == count || IS_DAX(VFS_I(ip)));
        return ret;
 }
 
@@ -843,7 +859,7 @@ xfs_file_write_iter(
        if (XFS_FORCED_SHUTDOWN(ip->i_mount))
                return -EIO;
 
-       if (unlikely(iocb->ki_flags & IOCB_DIRECT))
+       if ((iocb->ki_flags & IOCB_DIRECT) || IS_DAX(inode))
                ret = xfs_file_dio_aio_write(iocb, from);
        else
                ret = xfs_file_buffered_aio_write(iocb, from);
@@ -1064,17 +1080,6 @@ xfs_file_readdir(
        return xfs_readdir(ip, ctx, bufsize);
 }
 
-STATIC int
-xfs_file_mmap(
-       struct file     *filp,
-       struct vm_area_struct *vma)
-{
-       vma->vm_ops = &xfs_file_vm_ops;
-
-       file_accessed(filp);
-       return 0;
-}
-
 /*
  * This type is designed to indicate the type of offset we would like
  * to search from page cache for xfs_seek_hole_data().
@@ -1455,48 +1460,83 @@ xfs_file_llseek(
  * ordering of:
  *
  * mmap_sem (MM)
- *   i_mmap_lock (XFS - truncate serialisation)
- *     page_lock (MM)
- *       i_lock (XFS - extent map serialisation)
+ *   sb_start_pagefault(vfs, freeze)
+ *     i_mmap_lock (XFS - truncate serialisation)
+ *       page_lock (MM)
+ *         i_lock (XFS - extent map serialisation)
+ */
+
+/*
+ * mmap()d file has taken write protection fault and is being made writable. We
+ * can set the page state up correctly for a writable page, which means we can
+ * do correct delalloc accounting (ENOSPC checking!) and unwritten extent
+ * mapping.
  */
 STATIC int
-xfs_filemap_fault(
+xfs_filemap_page_mkwrite(
        struct vm_area_struct   *vma,
        struct vm_fault         *vmf)
 {
-       struct xfs_inode        *ip = XFS_I(vma->vm_file->f_mapping->host);
-       int                     error;
+       struct inode            *inode = file_inode(vma->vm_file);
+       int                     ret;
 
-       trace_xfs_filemap_fault(ip);
+       trace_xfs_filemap_page_mkwrite(XFS_I(inode));
 
-       xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
-       error = filemap_fault(vma, vmf);
-       xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
+       sb_start_pagefault(inode->i_sb);
+       file_update_time(vma->vm_file);
+       xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
 
-       return error;
+       if (IS_DAX(inode)) {
+               ret = __dax_mkwrite(vma, vmf, xfs_get_blocks_direct,
+                                   xfs_end_io_dax_write);
+       } else {
+               ret = __block_page_mkwrite(vma, vmf, xfs_get_blocks);
+               ret = block_page_mkwrite_return(ret);
+       }
+
+       xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
+       sb_end_pagefault(inode->i_sb);
+
+       return ret;
 }
 
-/*
- * mmap()d file has taken write protection fault and is being made writable. We
- * can set the page state up correctly for a writable page, which means we can
- * do correct delalloc accounting (ENOSPC checking!) and unwritten extent
- * mapping.
- */
 STATIC int
-xfs_filemap_page_mkwrite(
+xfs_filemap_fault(
        struct vm_area_struct   *vma,
        struct vm_fault         *vmf)
 {
-       struct xfs_inode        *ip = XFS_I(vma->vm_file->f_mapping->host);
-       int                     error;
+       struct xfs_inode        *ip = XFS_I(file_inode(vma->vm_file));
+       int                     ret;
+
+       trace_xfs_filemap_fault(ip);
 
-       trace_xfs_filemap_page_mkwrite(ip);
+       /* DAX can shortcut the normal fault path on write faults! */
+       if ((vmf->flags & FAULT_FLAG_WRITE) && IS_DAX(VFS_I(ip)))
+               return xfs_filemap_page_mkwrite(vma, vmf);
 
        xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
-       error = block_page_mkwrite(vma, vmf, xfs_get_blocks);
+       ret = filemap_fault(vma, vmf);
        xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
 
-       return error;
+       return ret;
+}
+
+static const struct vm_operations_struct xfs_file_vm_ops = {
+       .fault          = xfs_filemap_fault,
+       .map_pages      = filemap_map_pages,
+       .page_mkwrite   = xfs_filemap_page_mkwrite,
+};
+
+STATIC int
+xfs_file_mmap(
+       struct file     *filp,
+       struct vm_area_struct *vma)
+{
+       file_accessed(filp);
+       vma->vm_ops = &xfs_file_vm_ops;
+       if (IS_DAX(file_inode(filp)))
+               vma->vm_flags |= VM_MIXEDMAP;
+       return 0;
 }
 
 const struct file_operations xfs_file_operations = {
@@ -1527,9 +1567,3 @@ const struct file_operations xfs_dir_file_operations = {
 #endif
        .fsync          = xfs_dir_fsync,
 };
-
-static const struct vm_operations_struct xfs_file_vm_ops = {
-       .fault          = xfs_filemap_fault,
-       .map_pages      = filemap_map_pages,
-       .page_mkwrite   = xfs_filemap_page_mkwrite,
-};
index da82f1cb4b9bde824979bad9cca0b72c748a49d9..c4c130f9bfb64fec1d7d5dccb27963a236477ced 100644 (file)
@@ -196,7 +196,8 @@ xfs_filestream_pick_ag(
                        goto next_ag;
                }
 
-               longest = xfs_alloc_longest_free_extent(mp, pag);
+               longest = xfs_alloc_longest_free_extent(mp, pag,
+                                       xfs_alloc_min_freelist(mp, pag));
                if (((minlen && longest >= minlen) ||
                     (!minlen && pag->pagf_freeblks >= minfree)) &&
                    (!pag->pagf_metadata || !(flags & XFS_PICK_USERDATA) ||
index cb7e8a29dfb6e9e8b2b0f1044cc67a9dca5e1603..9b3438a7680f4251cf623d18e0dc9e78ef269ab7 100644 (file)
@@ -101,7 +101,9 @@ xfs_fs_geometry(
                        (xfs_sb_version_hasftype(&mp->m_sb) ?
                                XFS_FSOP_GEOM_FLAGS_FTYPE : 0) |
                        (xfs_sb_version_hasfinobt(&mp->m_sb) ?
-                               XFS_FSOP_GEOM_FLAGS_FINOBT : 0);
+                               XFS_FSOP_GEOM_FLAGS_FINOBT : 0) |
+                       (xfs_sb_version_hassparseinodes(&mp->m_sb) ?
+                               XFS_FSOP_GEOM_FLAGS_SPINODES : 0);
                geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ?
                                mp->m_sb.sb_logsectsize : BBSIZE;
                geo->rtsectsize = mp->m_sb.sb_blocksize;
@@ -201,7 +203,7 @@ xfs_growfs_data_private(
        error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growdata,
                                  XFS_GROWFS_SPACE_RES(mp), 0);
        if (error) {
-               xfs_trans_cancel(tp, 0);
+               xfs_trans_cancel(tp);
                return error;
        }
 
@@ -489,7 +491,7 @@ xfs_growfs_data_private(
        if (dpct)
                xfs_trans_mod_sb(tp, XFS_TRANS_SB_IMAXPCT, dpct);
        xfs_trans_set_sync(tp);
-       error = xfs_trans_commit(tp, 0);
+       error = xfs_trans_commit(tp);
        if (error)
                return error;
 
@@ -557,7 +559,7 @@ xfs_growfs_data_private(
        return saved_error ? saved_error : error;
 
  error0:
-       xfs_trans_cancel(tp, XFS_TRANS_ABORT);
+       xfs_trans_cancel(tp);
        return error;
 }
 
index 539a85fddbc26864004e80f5fb229c6c2de565b8..3da9f4da4f3d2e6b67ffd1bd752b4b0993af9fda 100644 (file)
@@ -905,7 +905,6 @@ xfs_dir_ialloc(
 
 {
        xfs_trans_t     *tp;
-       xfs_trans_t     *ntp;
        xfs_inode_t     *ip;
        xfs_buf_t       *ialloc_context = NULL;
        int             code;
@@ -954,8 +953,6 @@ xfs_dir_ialloc(
         * to succeed the second time.
         */
        if (ialloc_context) {
-               struct xfs_trans_res tres;
-
                /*
                 * Normally, xfs_trans_commit releases all the locks.
                 * We call bhold to hang on to the ialloc_context across
@@ -964,12 +961,6 @@ xfs_dir_ialloc(
                 * allocation group.
                 */
                xfs_trans_bhold(tp, ialloc_context);
-               /*
-                * Save the log reservation so we can use
-                * them in the next transaction.
-                */
-               tres.tr_logres = xfs_trans_get_log_res(tp);
-               tres.tr_logcount = xfs_trans_get_log_count(tp);
 
                /*
                 * We want the quota changes to be associated with the next
@@ -985,35 +976,9 @@ xfs_dir_ialloc(
                        tp->t_flags &= ~(XFS_TRANS_DQ_DIRTY);
                }
 
-               ntp = xfs_trans_dup(tp);
-               code = xfs_trans_commit(tp, 0);
-               tp = ntp;
-               if (committed != NULL) {
+               code = xfs_trans_roll(&tp, 0);
+               if (committed != NULL)
                        *committed = 1;
-               }
-               /*
-                * If we get an error during the commit processing,
-                * release the buffer that is still held and return
-                * to the caller.
-                */
-               if (code) {
-                       xfs_buf_relse(ialloc_context);
-                       if (dqinfo) {
-                               tp->t_dqinfo = dqinfo;
-                               xfs_trans_free_dqinfo(tp);
-                       }
-                       *tpp = ntp;
-                       *ipp = NULL;
-                       return code;
-               }
-
-               /*
-                * transaction commit worked ok so we can drop the extra ticket
-                * reference that we gained in xfs_trans_dup()
-                */
-               xfs_log_ticket_put(tp->t_ticket);
-               tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
-               code = xfs_trans_reserve(tp, &tres, 0, 0);
 
                /*
                 * Re-attach the quota info that we detached from prev trx.
@@ -1025,7 +990,7 @@ xfs_dir_ialloc(
 
                if (code) {
                        xfs_buf_relse(ialloc_context);
-                       *tpp = ntp;
+                       *tpp = tp;
                        *ipp = NULL;
                        return code;
                }
@@ -1127,7 +1092,6 @@ xfs_create(
        xfs_bmap_free_t         free_list;
        xfs_fsblock_t           first_block;
        bool                    unlock_dp_on_error = false;
-       uint                    cancel_flags;
        int                     committed;
        prid_t                  prid;
        struct xfs_dquot        *udqp = NULL;
@@ -1164,8 +1128,6 @@ xfs_create(
                tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE);
        }
 
-       cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
-
        /*
         * Initially assume that the file does not exist and
         * reserve the resources for that case.  If that is not
@@ -1183,10 +1145,9 @@ xfs_create(
                resblks = 0;
                error = xfs_trans_reserve(tp, tres, 0, 0);
        }
-       if (error) {
-               cancel_flags = 0;
+       if (error)
                goto out_trans_cancel;
-       }
+
 
        xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
        unlock_dp_on_error = true;
@@ -1217,7 +1178,7 @@ xfs_create(
        if (error) {
                if (error == -ENOSPC)
                        goto out_trans_cancel;
-               goto out_trans_abort;
+               goto out_trans_cancel;
        }
 
        /*
@@ -1235,7 +1196,7 @@ xfs_create(
                                        resblks - XFS_IALLOC_SPACE_RES(mp) : 0);
        if (error) {
                ASSERT(error != -ENOSPC);
-               goto out_trans_abort;
+               goto out_trans_cancel;
        }
        xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
        xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
@@ -1269,7 +1230,7 @@ xfs_create(
        if (error)
                goto out_bmap_cancel;
 
-       error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+       error = xfs_trans_commit(tp);
        if (error)
                goto out_release_inode;
 
@@ -1282,10 +1243,8 @@ xfs_create(
 
  out_bmap_cancel:
        xfs_bmap_cancel(&free_list);
- out_trans_abort:
-       cancel_flags |= XFS_TRANS_ABORT;
  out_trans_cancel:
-       xfs_trans_cancel(tp, cancel_flags);
+       xfs_trans_cancel(tp);
  out_release_inode:
        /*
         * Wait until after the current transaction is aborted to finish the
@@ -1317,7 +1276,6 @@ xfs_create_tmpfile(
        struct xfs_inode        *ip = NULL;
        struct xfs_trans        *tp = NULL;
        int                     error;
-       uint                    cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
        prid_t                  prid;
        struct xfs_dquot        *udqp = NULL;
        struct xfs_dquot        *gdqp = NULL;
@@ -1350,10 +1308,8 @@ xfs_create_tmpfile(
                resblks = 0;
                error = xfs_trans_reserve(tp, tres, 0, 0);
        }
-       if (error) {
-               cancel_flags = 0;
+       if (error)
                goto out_trans_cancel;
-       }
 
        error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp,
                                                pdqp, resblks, 1, 0);
@@ -1365,7 +1321,7 @@ xfs_create_tmpfile(
        if (error) {
                if (error == -ENOSPC)
                        goto out_trans_cancel;
-               goto out_trans_abort;
+               goto out_trans_cancel;
        }
 
        if (mp->m_flags & XFS_MOUNT_WSYNC)
@@ -1381,9 +1337,9 @@ xfs_create_tmpfile(
        ip->i_d.di_nlink--;
        error = xfs_iunlink(tp, ip);
        if (error)
-               goto out_trans_abort;
+               goto out_trans_cancel;
 
-       error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+       error = xfs_trans_commit(tp);
        if (error)
                goto out_release_inode;
 
@@ -1394,10 +1350,8 @@ xfs_create_tmpfile(
        *ipp = ip;
        return 0;
 
- out_trans_abort:
-       cancel_flags |= XFS_TRANS_ABORT;
  out_trans_cancel:
-       xfs_trans_cancel(tp, cancel_flags);
+       xfs_trans_cancel(tp);
  out_release_inode:
        /*
         * Wait until after the current transaction is aborted to finish the
@@ -1427,7 +1381,6 @@ xfs_link(
        int                     error;
        xfs_bmap_free_t         free_list;
        xfs_fsblock_t           first_block;
-       int                     cancel_flags;
        int                     committed;
        int                     resblks;
 
@@ -1447,17 +1400,14 @@ xfs_link(
                goto std_return;
 
        tp = xfs_trans_alloc(mp, XFS_TRANS_LINK);
-       cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
        resblks = XFS_LINK_SPACE_RES(mp, target_name->len);
        error = xfs_trans_reserve(tp, &M_RES(mp)->tr_link, resblks, 0);
        if (error == -ENOSPC) {
                resblks = 0;
                error = xfs_trans_reserve(tp, &M_RES(mp)->tr_link, 0, 0);
        }
-       if (error) {
-               cancel_flags = 0;
+       if (error)
                goto error_return;
-       }
 
        xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL);
 
@@ -1486,19 +1436,19 @@ xfs_link(
        if (sip->i_d.di_nlink == 0) {
                error = xfs_iunlink_remove(tp, sip);
                if (error)
-                       goto abort_return;
+                       goto error_return;
        }
 
        error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino,
                                        &first_block, &free_list, resblks);
        if (error)
-               goto abort_return;
+               goto error_return;
        xfs_trans_ichgtime(tp, tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
        xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE);
 
        error = xfs_bumplink(tp, sip);
        if (error)
-               goto abort_return;
+               goto error_return;
 
        /*
         * If this is a synchronous mount, make sure that the
@@ -1512,15 +1462,13 @@ xfs_link(
        error = xfs_bmap_finish (&tp, &free_list, &committed);
        if (error) {
                xfs_bmap_cancel(&free_list);
-               goto abort_return;
+               goto error_return;
        }
 
-       return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+       return xfs_trans_commit(tp);
 
- abort_return:
-       cancel_flags |= XFS_TRANS_ABORT;
  error_return:
-       xfs_trans_cancel(tp, cancel_flags);
+       xfs_trans_cancel(tp);
  std_return:
        return error;
 }
@@ -1555,7 +1503,6 @@ xfs_itruncate_extents(
 {
        struct xfs_mount        *mp = ip->i_mount;
        struct xfs_trans        *tp = *tpp;
-       struct xfs_trans        *ntp;
        xfs_bmap_free_t         free_list;
        xfs_fsblock_t           first_block;
        xfs_fileoff_t           first_unmap_block;
@@ -1613,29 +1560,7 @@ xfs_itruncate_extents(
                if (error)
                        goto out_bmap_cancel;
 
-               if (committed) {
-                       /*
-                        * Mark the inode dirty so it will be logged and
-                        * moved forward in the log as part of every commit.
-                        */
-                       xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-               }
-
-               ntp = xfs_trans_dup(tp);
-               error = xfs_trans_commit(tp, 0);
-               tp = ntp;
-
-               xfs_trans_ijoin(tp, ip, 0);
-
-               if (error)
-                       goto out;
-
-               /*
-                * Transaction commit worked ok so we can drop the extra ticket
-                * reference that we gained in xfs_trans_dup()
-                */
-               xfs_log_ticket_put(tp->t_ticket);
-               error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
+               error = xfs_trans_roll(&tp, ip);
                if (error)
                        goto out;
        }
@@ -1756,7 +1681,7 @@ xfs_inactive_truncate(
        error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
        if (error) {
                ASSERT(XFS_FORCED_SHUTDOWN(mp));
-               xfs_trans_cancel(tp, 0);
+               xfs_trans_cancel(tp);
                return error;
        }
 
@@ -1777,7 +1702,7 @@ xfs_inactive_truncate(
 
        ASSERT(ip->i_d.di_nextents == 0);
 
-       error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+       error = xfs_trans_commit(tp);
        if (error)
                goto error_unlock;
 
@@ -1785,7 +1710,7 @@ xfs_inactive_truncate(
        return 0;
 
 error_trans_cancel:
-       xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
+       xfs_trans_cancel(tp);
 error_unlock:
        xfs_iunlock(ip, XFS_ILOCK_EXCL);
        return error;
@@ -1835,7 +1760,7 @@ xfs_inactive_ifree(
                } else {
                        ASSERT(XFS_FORCED_SHUTDOWN(mp));
                }
-               xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES);
+               xfs_trans_cancel(tp);
                return error;
        }
 
@@ -1855,7 +1780,7 @@ xfs_inactive_ifree(
                                __func__, error);
                        xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
                }
-               xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
+               xfs_trans_cancel(tp);
                xfs_iunlock(ip, XFS_ILOCK_EXCL);
                return error;
        }
@@ -1874,7 +1799,7 @@ xfs_inactive_ifree(
        if (error)
                xfs_notice(mp, "%s: xfs_bmap_finish returned error %d",
                        __func__, error);
-       error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+       error = xfs_trans_commit(tp);
        if (error)
                xfs_notice(mp, "%s: xfs_trans_commit returned error %d",
                        __func__, error);
@@ -2235,28 +2160,42 @@ xfs_iunlink_remove(
  */
 STATIC int
 xfs_ifree_cluster(
-       xfs_inode_t     *free_ip,
-       xfs_trans_t     *tp,
-       xfs_ino_t       inum)
+       xfs_inode_t             *free_ip,
+       xfs_trans_t             *tp,
+       struct xfs_icluster     *xic)
 {
        xfs_mount_t             *mp = free_ip->i_mount;
        int                     blks_per_cluster;
        int                     inodes_per_cluster;
        int                     nbufs;
        int                     i, j;
+       int                     ioffset;
        xfs_daddr_t             blkno;
        xfs_buf_t               *bp;
        xfs_inode_t             *ip;
        xfs_inode_log_item_t    *iip;
        xfs_log_item_t          *lip;
        struct xfs_perag        *pag;
+       xfs_ino_t               inum;
 
+       inum = xic->first_ino;
        pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum));
        blks_per_cluster = xfs_icluster_size_fsb(mp);
        inodes_per_cluster = blks_per_cluster << mp->m_sb.sb_inopblog;
        nbufs = mp->m_ialloc_blks / blks_per_cluster;
 
        for (j = 0; j < nbufs; j++, inum += inodes_per_cluster) {
+               /*
+                * The allocation bitmap tells us which inodes of the chunk were
+                * physically allocated. Skip the cluster if an inode falls into
+                * a sparse region.
+                */
+               ioffset = inum - xic->first_ino;
+               if ((xic->alloc & XFS_INOBT_MASK(ioffset)) == 0) {
+                       ASSERT(do_mod(ioffset, inodes_per_cluster) == 0);
+                       continue;
+               }
+
                blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum),
                                         XFS_INO_TO_AGBNO(mp, inum));
 
@@ -2414,8 +2353,7 @@ xfs_ifree(
        xfs_bmap_free_t *flist)
 {
        int                     error;
-       int                     delete;
-       xfs_ino_t               first_ino;
+       struct xfs_icluster     xic = { 0 };
 
        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
        ASSERT(ip->i_d.di_nlink == 0);
@@ -2431,7 +2369,7 @@ xfs_ifree(
        if (error)
                return error;
 
-       error = xfs_difree(tp, ip->i_ino, flist, &delete, &first_ino);
+       error = xfs_difree(tp, ip->i_ino, flist, &xic);
        if (error)
                return error;
 
@@ -2448,8 +2386,8 @@ xfs_ifree(
        ip->i_d.di_gen++;
        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 
-       if (delete)
-               error = xfs_ifree_cluster(ip, tp, first_ino);
+       if (xic.deleted)
+               error = xfs_ifree_cluster(ip, tp, &xic);
 
        return error;
 }
@@ -2536,7 +2474,6 @@ xfs_remove(
        int                     error = 0;
        xfs_bmap_free_t         free_list;
        xfs_fsblock_t           first_block;
-       int                     cancel_flags;
        int                     committed;
        uint                    resblks;
 
@@ -2557,7 +2494,6 @@ xfs_remove(
                tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR);
        else
                tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE);
-       cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
 
        /*
         * We try to get the real space reservation first,
@@ -2576,7 +2512,6 @@ xfs_remove(
        }
        if (error) {
                ASSERT(error != -ENOSPC);
-               cancel_flags = 0;
                goto out_trans_cancel;
        }
 
@@ -2588,7 +2523,6 @@ xfs_remove(
        /*
         * If we're removing a directory perform some additional validation.
         */
-       cancel_flags |= XFS_TRANS_ABORT;
        if (is_dir) {
                ASSERT(ip->i_d.di_nlink >= 2);
                if (ip->i_d.di_nlink != 2) {
@@ -2644,7 +2578,7 @@ xfs_remove(
        if (error)
                goto out_bmap_cancel;
 
-       error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+       error = xfs_trans_commit(tp);
        if (error)
                goto std_return;
 
@@ -2656,7 +2590,7 @@ xfs_remove(
  out_bmap_cancel:
        xfs_bmap_cancel(&free_list);
  out_trans_cancel:
-       xfs_trans_cancel(tp, cancel_flags);
+       xfs_trans_cancel(tp);
  std_return:
        return error;
 }
@@ -2730,11 +2664,11 @@ xfs_finish_rename(
        error = xfs_bmap_finish(&tp, free_list, &committed);
        if (error) {
                xfs_bmap_cancel(free_list);
-               xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
+               xfs_trans_cancel(tp);
                return error;
        }
 
-       return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+       return xfs_trans_commit(tp);
 }
 
 /*
@@ -2855,7 +2789,7 @@ xfs_cross_rename(
 
 out_trans_abort:
        xfs_bmap_cancel(free_list);
-       xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
+       xfs_trans_cancel(tp);
        return error;
 }
 
@@ -2915,7 +2849,6 @@ xfs_rename(
        int                     num_inodes = __XFS_SORT_INODES;
        bool                    new_parent = (src_dp != target_dp);
        bool                    src_is_directory = S_ISDIR(src_ip->i_d.di_mode);
-       int                     cancel_flags = 0;
        int                     spaceres;
        int                     error;
 
@@ -2951,7 +2884,6 @@ xfs_rename(
        }
        if (error)
                goto out_trans_cancel;
-       cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
 
        /*
         * Attach the dquots to the inodes
@@ -3022,10 +2954,8 @@ xfs_rename(
                error = xfs_dir_createname(tp, target_dp, target_name,
                                                src_ip->i_ino, &first_block,
                                                &free_list, spaceres);
-               if (error == -ENOSPC)
-                       goto out_bmap_cancel;
                if (error)
-                       goto out_trans_abort;
+                       goto out_bmap_cancel;
 
                xfs_trans_ichgtime(tp, target_dp,
                                        XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
@@ -3033,7 +2963,7 @@ xfs_rename(
                if (new_parent && src_is_directory) {
                        error = xfs_bumplink(tp, target_dp);
                        if (error)
-                               goto out_trans_abort;
+                               goto out_bmap_cancel;
                }
        } else { /* target_ip != NULL */
                /*
@@ -3065,7 +2995,7 @@ xfs_rename(
                                        src_ip->i_ino,
                                        &first_block, &free_list, spaceres);
                if (error)
-                       goto out_trans_abort;
+                       goto out_bmap_cancel;
 
                xfs_trans_ichgtime(tp, target_dp,
                                        XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
@@ -3076,7 +3006,7 @@ xfs_rename(
                 */
                error = xfs_droplink(tp, target_ip);
                if (error)
-                       goto out_trans_abort;
+                       goto out_bmap_cancel;
 
                if (src_is_directory) {
                        /*
@@ -3084,7 +3014,7 @@ xfs_rename(
                         */
                        error = xfs_droplink(tp, target_ip);
                        if (error)
-                               goto out_trans_abort;
+                               goto out_bmap_cancel;
                }
        } /* target_ip != NULL */
 
@@ -3101,7 +3031,7 @@ xfs_rename(
                                        &first_block, &free_list, spaceres);
                ASSERT(error != -EEXIST);
                if (error)
-                       goto out_trans_abort;
+                       goto out_bmap_cancel;
        }
 
        /*
@@ -3127,7 +3057,7 @@ xfs_rename(
                 */
                error = xfs_droplink(tp, src_dp);
                if (error)
-                       goto out_trans_abort;
+                       goto out_bmap_cancel;
        }
 
        /*
@@ -3142,7 +3072,7 @@ xfs_rename(
                error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino,
                                           &first_block, &free_list, spaceres);
        if (error)
-               goto out_trans_abort;
+               goto out_bmap_cancel;
 
        /*
         * For whiteouts, we need to bump the link count on the whiteout inode.
@@ -3156,10 +3086,10 @@ xfs_rename(
                ASSERT(VFS_I(wip)->i_nlink == 0 && wip->i_d.di_nlink == 0);
                error = xfs_bumplink(tp, wip);
                if (error)
-                       goto out_trans_abort;
+                       goto out_bmap_cancel;
                error = xfs_iunlink_remove(tp, wip);
                if (error)
-                       goto out_trans_abort;
+                       goto out_bmap_cancel;
                xfs_trans_log_inode(tp, wip, XFS_ILOG_CORE);
 
                /*
@@ -3180,12 +3110,10 @@ xfs_rename(
                IRELE(wip);
        return error;
 
-out_trans_abort:
-       cancel_flags |= XFS_TRANS_ABORT;
 out_bmap_cancel:
        xfs_bmap_cancel(&free_list);
 out_trans_cancel:
-       xfs_trans_cancel(tp, cancel_flags);
+       xfs_trans_cancel(tp);
        if (wip)
                IRELE(wip);
        return error;
@@ -3464,7 +3392,7 @@ xfs_iflush_int(
        ASSERT(ip->i_d.di_version > 1);
 
        /* set *dip = inode's place in the buffer */
-       dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
+       dip = xfs_buf_offset(bp, ip->i_imap.im_boffset);
 
        if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC),
                               mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) {
index 87f67c6b654cb5c9899c5c566d238f5a9ea1f04a..ea7d85af53101f25ed0d1b9598edae5c8af77bcc 100644 (file)
@@ -336,7 +336,7 @@ xfs_set_dmattrs(
        tp = xfs_trans_alloc(mp, XFS_TRANS_SET_DMATTRS);
        error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
        if (error) {
-               xfs_trans_cancel(tp, 0);
+               xfs_trans_cancel(tp);
                return error;
        }
        xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -346,7 +346,7 @@ xfs_set_dmattrs(
        ip->i_d.di_dmstate  = state;
 
        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-       error = xfs_trans_commit(tp, 0);
+       error = xfs_trans_commit(tp);
 
        return error;
 }
@@ -1076,7 +1076,7 @@ xfs_ioctl_setattr_get_trans(
        return tp;
 
 out_cancel:
-       xfs_trans_cancel(tp, 0);
+       xfs_trans_cancel(tp);
        return ERR_PTR(error);
 }
 
@@ -1253,7 +1253,7 @@ xfs_ioctl_setattr(
        else
                ip->i_d.di_extsize = 0;
 
-       code = xfs_trans_commit(tp, 0);
+       code = xfs_trans_commit(tp);
 
        /*
         * Release any dquot(s) the inode had kept before chown.
@@ -1265,7 +1265,7 @@ xfs_ioctl_setattr(
        return code;
 
 error_trans_cancel:
-       xfs_trans_cancel(tp, 0);
+       xfs_trans_cancel(tp);
 error_free_dquots:
        xfs_qm_dqrele(udqp);
        xfs_qm_dqrele(pdqp);
@@ -1338,11 +1338,11 @@ xfs_ioc_setxflags(
 
        error = xfs_ioctl_setattr_xflags(tp, ip, &fa);
        if (error) {
-               xfs_trans_cancel(tp, 0);
+               xfs_trans_cancel(tp);
                goto out_drop_write;
        }
 
-       error = xfs_trans_commit(tp, 0);
+       error = xfs_trans_commit(tp);
 out_drop_write:
        mnt_drop_write_file(filp);
        return error;
index 38e633bad8c2a2c919d1b1c9a4b9bda1d24d2ec4..1f86033171c84ef4b12a201e112c9c37c7c69bdf 100644 (file)
@@ -183,7 +183,7 @@ xfs_iomap_write_direct(
         * Check for running out of space, note: need lock to return
         */
        if (error) {
-               xfs_trans_cancel(tp, 0);
+               xfs_trans_cancel(tp);
                return error;
        }
 
@@ -213,7 +213,7 @@ xfs_iomap_write_direct(
        error = xfs_bmap_finish(&tp, &free_list, &committed);
        if (error)
                goto out_bmap_cancel;
-       error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+       error = xfs_trans_commit(tp);
        if (error)
                goto out_unlock;
 
@@ -236,7 +236,7 @@ out_bmap_cancel:
        xfs_bmap_cancel(&free_list);
        xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag);
 out_trans_cancel:
-       xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
+       xfs_trans_cancel(tp);
        goto out_unlock;
 }
 
@@ -690,7 +690,7 @@ xfs_iomap_write_allocate(
                        error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
                                                  nres, 0);
                        if (error) {
-                               xfs_trans_cancel(tp, 0);
+                               xfs_trans_cancel(tp);
                                return error;
                        }
                        xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -760,7 +760,7 @@ xfs_iomap_write_allocate(
                        if (error)
                                goto trans_cancel;
 
-                       error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+                       error = xfs_trans_commit(tp);
                        if (error)
                                goto error0;
 
@@ -791,7 +791,7 @@ xfs_iomap_write_allocate(
 
 trans_cancel:
        xfs_bmap_cancel(&free_list);
-       xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
+       xfs_trans_cancel(tp);
 error0:
        xfs_iunlock(ip, XFS_ILOCK_EXCL);
        return error;
@@ -853,7 +853,7 @@ xfs_iomap_write_unwritten(
                error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
                                          resblks, 0);
                if (error) {
-                       xfs_trans_cancel(tp, 0);
+                       xfs_trans_cancel(tp);
                        return error;
                }
 
@@ -890,7 +890,7 @@ xfs_iomap_write_unwritten(
                if (error)
                        goto error_on_bmapi_transaction;
 
-               error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+               error = xfs_trans_commit(tp);
                xfs_iunlock(ip, XFS_ILOCK_EXCL);
                if (error)
                        return error;
@@ -914,7 +914,7 @@ xfs_iomap_write_unwritten(
 
 error_on_bmapi_transaction:
        xfs_bmap_cancel(&free_list);
-       xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT));
+       xfs_trans_cancel(tp);
        xfs_iunlock(ip, XFS_ILOCK_EXCL);
        return error;
 }
index 7f51f39f8acc0a2fd407a7be57c1477439213a0f..766b23f86ce9f7b9423ba0d8a7f690d27f2a5d1b 100644 (file)
@@ -699,7 +699,7 @@ xfs_setattr_nonsize(
 
        if (mp->m_flags & XFS_MOUNT_WSYNC)
                xfs_trans_set_sync(tp);
-       error = xfs_trans_commit(tp, 0);
+       error = xfs_trans_commit(tp);
 
        xfs_iunlock(ip, XFS_ILOCK_EXCL);
 
@@ -730,7 +730,7 @@ xfs_setattr_nonsize(
        return 0;
 
 out_trans_cancel:
-       xfs_trans_cancel(tp, 0);
+       xfs_trans_cancel(tp);
        xfs_iunlock(ip, XFS_ILOCK_EXCL);
 out_dqrele:
        xfs_qm_dqrele(udqp);
@@ -752,7 +752,6 @@ xfs_setattr_size(
        struct xfs_trans        *tp;
        int                     error;
        uint                    lock_flags = 0;
-       uint                    commit_flags = 0;
        bool                    did_zeroing = false;
 
        trace_xfs_setattr(ip);
@@ -848,7 +847,11 @@ xfs_setattr_size(
         * to hope that the caller sees ENOMEM and retries the truncate
         * operation.
         */
-       error = block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks);
+       if (IS_DAX(inode))
+               error = dax_truncate_page(inode, newsize, xfs_get_blocks_direct);
+       else
+               error = block_truncate_page(inode->i_mapping, newsize,
+                                           xfs_get_blocks);
        if (error)
                return error;
        truncate_setsize(inode, newsize);
@@ -858,7 +861,6 @@ xfs_setattr_size(
        if (error)
                goto out_trans_cancel;
 
-       commit_flags = XFS_TRANS_RELEASE_LOG_RES;
        lock_flags |= XFS_ILOCK_EXCL;
        xfs_ilock(ip, XFS_ILOCK_EXCL);
        xfs_trans_ijoin(tp, ip, 0);
@@ -898,7 +900,7 @@ xfs_setattr_size(
        if (newsize <= oldsize) {
                error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, newsize);
                if (error)
-                       goto out_trans_abort;
+                       goto out_trans_cancel;
 
                /*
                 * Truncated "down", so we're removing references to old data
@@ -925,16 +927,14 @@ xfs_setattr_size(
        if (mp->m_flags & XFS_MOUNT_WSYNC)
                xfs_trans_set_sync(tp);
 
-       error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+       error = xfs_trans_commit(tp);
 out_unlock:
        if (lock_flags)
                xfs_iunlock(ip, lock_flags);
        return error;
 
-out_trans_abort:
-       commit_flags |= XFS_TRANS_ABORT;
 out_trans_cancel:
-       xfs_trans_cancel(tp, commit_flags);
+       xfs_trans_cancel(tp);
        goto out_unlock;
 }
 
@@ -981,7 +981,7 @@ xfs_vn_update_time(
        tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
        error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0);
        if (error) {
-               xfs_trans_cancel(tp, 0);
+               xfs_trans_cancel(tp);
                return error;
        }
 
@@ -1003,7 +1003,7 @@ xfs_vn_update_time(
        }
        xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
        xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP);
-       return xfs_trans_commit(tp, 0);
+       return xfs_trans_commit(tp);
 }
 
 #define XFS_FIEMAP_FLAGS       (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
@@ -1188,22 +1188,22 @@ xfs_diflags_to_iflags(
        struct inode            *inode,
        struct xfs_inode        *ip)
 {
-       if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE)
+       uint16_t                flags = ip->i_d.di_flags;
+
+       inode->i_flags &= ~(S_IMMUTABLE | S_APPEND | S_SYNC |
+                           S_NOATIME | S_DAX);
+
+       if (flags & XFS_DIFLAG_IMMUTABLE)
                inode->i_flags |= S_IMMUTABLE;
-       else
-               inode->i_flags &= ~S_IMMUTABLE;
-       if (ip->i_d.di_flags & XFS_DIFLAG_APPEND)
+       if (flags & XFS_DIFLAG_APPEND)
                inode->i_flags |= S_APPEND;
-       else
-               inode->i_flags &= ~S_APPEND;
-       if (ip->i_d.di_flags & XFS_DIFLAG_SYNC)
+       if (flags & XFS_DIFLAG_SYNC)
                inode->i_flags |= S_SYNC;
-       else
-               inode->i_flags &= ~S_SYNC;
-       if (ip->i_d.di_flags & XFS_DIFLAG_NOATIME)
+       if (flags & XFS_DIFLAG_NOATIME)
                inode->i_flags |= S_NOATIME;
-       else
-               inode->i_flags &= ~S_NOATIME;
+       /* XXX: Also needs an on-disk per inode flag! */
+       if (ip->i_mount->m_flags & XFS_MOUNT_DAX)
+               inode->i_flags |= S_DAX;
 }
 
 /*
index 80429891dc9b59dcf893fc8cc4dad9fe85079c7b..f41b0c3fddab5558f5040c00501e16b6ef935992 100644 (file)
@@ -252,7 +252,7 @@ xfs_bulkstat_grab_ichunk(
                }
 
                irec->ir_free |= xfs_inobt_maskn(0, idx);
-               *icount = XFS_INODES_PER_CHUNK - irec->ir_freecount;
+               *icount = irec->ir_count - irec->ir_freecount;
        }
 
        return 0;
@@ -415,6 +415,8 @@ xfs_bulkstat(
                                goto del_cursor;
                        if (icount) {
                                irbp->ir_startino = r.ir_startino;
+                               irbp->ir_holemask = r.ir_holemask;
+                               irbp->ir_count = r.ir_count;
                                irbp->ir_freecount = r.ir_freecount;
                                irbp->ir_free = r.ir_free;
                                irbp++;
@@ -447,13 +449,15 @@ xfs_bulkstat(
                         * If this chunk has any allocated inodes, save it.
                         * Also start read-ahead now for this chunk.
                         */
-                       if (r.ir_freecount < XFS_INODES_PER_CHUNK) {
+                       if (r.ir_freecount < r.ir_count) {
                                xfs_bulkstat_ichunk_ra(mp, agno, &r);
                                irbp->ir_startino = r.ir_startino;
+                               irbp->ir_holemask = r.ir_holemask;
+                               irbp->ir_count = r.ir_count;
                                irbp->ir_freecount = r.ir_freecount;
                                irbp->ir_free = r.ir_free;
                                irbp++;
-                               icount += XFS_INODES_PER_CHUNK - r.ir_freecount;
+                               icount += r.ir_count - r.ir_freecount;
                        }
                        error = xfs_btree_increment(cur, 0, &stat);
                        if (error || stat == 0) {
@@ -599,8 +603,7 @@ xfs_inumbers(
                agino = r.ir_startino + XFS_INODES_PER_CHUNK - 1;
                buffer[bufidx].xi_startino =
                        XFS_AGINO_TO_INO(mp, agno, r.ir_startino);
-               buffer[bufidx].xi_alloccount =
-                       XFS_INODES_PER_CHUNK - r.ir_freecount;
+               buffer[bufidx].xi_alloccount = r.ir_count - r.ir_freecount;
                buffer[bufidx].xi_allocmask = ~r.ir_free;
                if (++bufidx == bcount) {
                        long    written;
index 7c7842c85a082f92c99b25f6ca818c1bf32a7224..85f883dd6207bd41b6b2951111009cb5097ae339 100644 (file)
@@ -32,26 +32,12 @@ typedef unsigned int                __uint32_t;
 typedef signed long long int   __int64_t;
 typedef unsigned long long int __uint64_t;
 
-typedef __uint32_t             inst_t;         /* an instruction */
-
 typedef __s64                  xfs_off_t;      /* <file offset> type */
 typedef unsigned long long     xfs_ino_t;      /* <inode> type */
 typedef __s64                  xfs_daddr_t;    /* <disk address> type */
-typedef char *                 xfs_caddr_t;    /* <core address> type */
 typedef __u32                  xfs_dev_t;
 typedef __u32                  xfs_nlink_t;
 
-/* __psint_t is the same size as a pointer */
-#if (BITS_PER_LONG == 32)
-typedef __int32_t __psint_t;
-typedef __uint32_t __psunsigned_t;
-#elif (BITS_PER_LONG == 64)
-typedef __int64_t __psint_t;
-typedef __uint64_t __psunsigned_t;
-#else
-#error BITS_PER_LONG must be 32 or 64
-#endif
-
 #include "xfs_types.h"
 
 #include "kmem.h"
index bcc7cfabb787079c5a0bf1dd9b4da3c2b159bcd8..08d4fe46f0fae9a161678e1a074cb4e09abcf86f 100644 (file)
@@ -109,7 +109,7 @@ xlog_ungrant_log_space(
 STATIC void
 xlog_verify_dest_ptr(
        struct xlog             *log,
-       char                    *ptr);
+       void                    *ptr);
 STATIC void
 xlog_verify_grant_tail(
        struct xlog *log);
@@ -513,7 +513,7 @@ xfs_log_done(
        struct xfs_mount        *mp,
        struct xlog_ticket      *ticket,
        struct xlog_in_core     **iclog,
-       uint                    flags)
+       bool                    regrant)
 {
        struct xlog             *log = mp->m_log;
        xfs_lsn_t               lsn = 0;
@@ -526,14 +526,11 @@ xfs_log_done(
            (((ticket->t_flags & XLOG_TIC_INITED) == 0) &&
             (xlog_commit_record(log, ticket, iclog, &lsn)))) {
                lsn = (xfs_lsn_t) -1;
-               if (ticket->t_flags & XLOG_TIC_PERM_RESERV) {
-                       flags |= XFS_LOG_REL_PERM_RESERV;
-               }
+               regrant = false;
        }
 
 
-       if ((ticket->t_flags & XLOG_TIC_PERM_RESERV) == 0 ||
-           (flags & XFS_LOG_REL_PERM_RESERV)) {
+       if (!regrant) {
                trace_xfs_log_done_nonperm(log, ticket);
 
                /*
@@ -541,7 +538,6 @@ xfs_log_done(
                 * request has been made to release a permanent reservation.
                 */
                xlog_ungrant_log_space(log, ticket);
-               xfs_log_ticket_put(ticket);
        } else {
                trace_xfs_log_done_perm(log, ticket);
 
@@ -553,6 +549,7 @@ xfs_log_done(
                ticket->t_flags |= XLOG_TIC_INITED;
        }
 
+       xfs_log_ticket_put(ticket);
        return lsn;
 }
 
@@ -1447,7 +1444,7 @@ xlog_alloc_log(
                iclog->ic_bp = bp;
                iclog->ic_data = bp->b_addr;
 #ifdef DEBUG
-               log->l_iclog_bak[i] = (xfs_caddr_t)&(iclog->ic_header);
+               log->l_iclog_bak[i] = &iclog->ic_header;
 #endif
                head = &iclog->ic_header;
                memset(head, 0, sizeof(xlog_rec_header_t));
@@ -1602,7 +1599,7 @@ xlog_pack_data(
        int                     i, j, k;
        int                     size = iclog->ic_offset + roundoff;
        __be32                  cycle_lsn;
-       xfs_caddr_t             dp;
+       char                    *dp;
 
        cycle_lsn = CYCLE_LSN_DISK(iclog->ic_header.h_lsn);
 
@@ -3664,7 +3661,7 @@ xlog_ticket_alloc(
 void
 xlog_verify_dest_ptr(
        struct xlog     *log,
-       char            *ptr)
+       void            *ptr)
 {
        int i;
        int good_ptr = 0;
@@ -3767,9 +3764,8 @@ xlog_verify_iclog(
        xlog_op_header_t        *ophead;
        xlog_in_core_t          *icptr;
        xlog_in_core_2_t        *xhdr;
-       xfs_caddr_t             ptr;
-       xfs_caddr_t             base_ptr;
-       __psint_t               field_offset;
+       void                    *base_ptr, *ptr, *p;
+       ptrdiff_t               field_offset;
        __uint8_t               clientid;
        int                     len, i, j, k, op_len;
        int                     idx;
@@ -3788,9 +3784,9 @@ xlog_verify_iclog(
        if (iclog->ic_header.h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM))
                xfs_emerg(log->l_mp, "%s: invalid magic num", __func__);
 
-       ptr = (xfs_caddr_t) &iclog->ic_header;
-       for (ptr += BBSIZE; ptr < ((xfs_caddr_t)&iclog->ic_header) + count;
-            ptr += BBSIZE) {
+       base_ptr = ptr = &iclog->ic_header;
+       p = &iclog->ic_header;
+       for (ptr += BBSIZE; ptr < base_ptr + count; ptr += BBSIZE) {
                if (*(__be32 *)ptr == cpu_to_be32(XLOG_HEADER_MAGIC_NUM))
                        xfs_emerg(log->l_mp, "%s: unexpected magic num",
                                __func__);
@@ -3798,20 +3794,19 @@ xlog_verify_iclog(
 
        /* check fields */
        len = be32_to_cpu(iclog->ic_header.h_num_logops);
-       ptr = iclog->ic_datap;
-       base_ptr = ptr;
-       ophead = (xlog_op_header_t *)ptr;
+       base_ptr = ptr = iclog->ic_datap;
+       ophead = ptr;
        xhdr = iclog->ic_data;
        for (i = 0; i < len; i++) {
-               ophead = (xlog_op_header_t *)ptr;
+               ophead = ptr;
 
                /* clientid is only 1 byte */
-               field_offset = (__psint_t)
-                              ((xfs_caddr_t)&(ophead->oh_clientid) - base_ptr);
+               p = &ophead->oh_clientid;
+               field_offset = p - base_ptr;
                if (!syncing || (field_offset & 0x1ff)) {
                        clientid = ophead->oh_clientid;
                } else {
-                       idx = BTOBBT((xfs_caddr_t)&(ophead->oh_clientid) - iclog->ic_datap);
+                       idx = BTOBBT((char *)&ophead->oh_clientid - iclog->ic_datap);
                        if (idx >= (XLOG_HEADER_CYCLE_SIZE / BBSIZE)) {
                                j = idx / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
                                k = idx % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
@@ -3829,13 +3824,13 @@ xlog_verify_iclog(
                                (unsigned long)field_offset);
 
                /* check length */
-               field_offset = (__psint_t)
-                              ((xfs_caddr_t)&(ophead->oh_len) - base_ptr);
+               p = &ophead->oh_len;
+               field_offset = p - base_ptr;
                if (!syncing || (field_offset & 0x1ff)) {
                        op_len = be32_to_cpu(ophead->oh_len);
                } else {
-                       idx = BTOBBT((__psint_t)&ophead->oh_len -
-                                   (__psint_t)iclog->ic_datap);
+                       idx = BTOBBT((uintptr_t)&ophead->oh_len -
+                                   (uintptr_t)iclog->ic_datap);
                        if (idx >= (XLOG_HEADER_CYCLE_SIZE / BBSIZE)) {
                                j = idx / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
                                k = idx % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
index 84e0deb95abda6181b5a10b2a3f790df9c6f04ff..fa27aaec72cb535b872840115374c13817d9b6a2 100644 (file)
@@ -110,15 +110,6 @@ static inline xfs_lsn_t    _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
 
 #define        XFS_LSN_CMP(x,y) _lsn_cmp(x,y)
 
-/*
- * Macros, structures, prototypes for interface to the log manager.
- */
-
-/*
- * Flags to xfs_log_done()
- */
-#define XFS_LOG_REL_PERM_RESERV        0x1
-
 /*
  * Flags to xfs_log_force()
  *
@@ -138,7 +129,7 @@ struct xfs_log_callback;
 xfs_lsn_t xfs_log_done(struct xfs_mount *mp,
                       struct xlog_ticket *ticket,
                       struct xlog_in_core **iclog,
-                      uint             flags);
+                      bool regrant);
 int      _xfs_log_force(struct xfs_mount *mp,
                         uint           flags,
                         int            *log_forced);
@@ -183,7 +174,7 @@ struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket);
 void     xfs_log_ticket_put(struct xlog_ticket *ticket);
 
 void   xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
-                               xfs_lsn_t *commit_lsn, int flags);
+                               xfs_lsn_t *commit_lsn, bool regrant);
 bool   xfs_log_item_in_current_chkpt(struct xfs_log_item *lip);
 
 void   xfs_log_work_queue(struct xfs_mount *mp);
index 45cc0ce18adf04cac50fe801884439a246285443..abc2ccbff73918663b374d795ad15bf803fa4c5b 100644 (file)
@@ -624,7 +624,7 @@ restart:
        spin_unlock(&cil->xc_push_lock);
 
        /* xfs_log_done always frees the ticket on error. */
-       commit_lsn = xfs_log_done(log->l_mp, tic, &commit_iclog, 0);
+       commit_lsn = xfs_log_done(log->l_mp, tic, &commit_iclog, false);
        if (commit_lsn == -1)
                goto out_abort;
 
@@ -773,14 +773,10 @@ xfs_log_commit_cil(
        struct xfs_mount        *mp,
        struct xfs_trans        *tp,
        xfs_lsn_t               *commit_lsn,
-       int                     flags)
+       bool                    regrant)
 {
        struct xlog             *log = mp->m_log;
        struct xfs_cil          *cil = log->l_cilp;
-       int                     log_flags = 0;
-
-       if (flags & XFS_TRANS_RELEASE_LOG_RES)
-               log_flags = XFS_LOG_REL_PERM_RESERV;
 
        /* lock out background commit */
        down_read(&cil->xc_ctx_lock);
@@ -795,7 +791,7 @@ xfs_log_commit_cil(
        if (commit_lsn)
                *commit_lsn = tp->t_commit_lsn;
 
-       xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
+       xfs_log_done(mp, tp->t_ticket, NULL, regrant);
        xfs_trans_unreserve_and_mod_sb(tp);
 
        /*
@@ -809,7 +805,7 @@ xfs_log_commit_cil(
         * the log items. This affects (at least) processing of stale buffers,
         * inodes and EFIs.
         */
-       xfs_trans_free_items(tp, tp->t_commit_lsn, 0);
+       xfs_trans_free_items(tp, tp->t_commit_lsn, false);
 
        xlog_cil_push_background(log);
 
index db7cbdeb2b42c766914682862f3179866393111b..1c87c8abfbed0aef749760ac794b102d217d30eb 100644 (file)
@@ -409,7 +409,7 @@ struct xlog {
 
        /* The following field are used for debugging; need to hold icloglock */
 #ifdef DEBUG
-       char                    *l_iclog_bak[XLOG_MAX_ICLOGS];
+       void                    *l_iclog_bak[XLOG_MAX_ICLOGS];
 #endif
 
 };
index 4f5784f85a5b22e4bad0946106dcb1302f817ace..01dd228ca05e315b88feb7afbfaf3e81d728878b 100644 (file)
@@ -147,7 +147,7 @@ xlog_put_bp(
  * Return the address of the start of the given block number's data
  * in a log buffer.  The buffer covers a log sector-aligned region.
  */
-STATIC xfs_caddr_t
+STATIC char *
 xlog_align(
        struct xlog     *log,
        xfs_daddr_t     blk_no,
@@ -203,7 +203,7 @@ xlog_bread(
        xfs_daddr_t     blk_no,
        int             nbblks,
        struct xfs_buf  *bp,
-       xfs_caddr_t     *offset)
+       char            **offset)
 {
        int             error;
 
@@ -225,9 +225,9 @@ xlog_bread_offset(
        xfs_daddr_t     blk_no,         /* block to read from */
        int             nbblks,         /* blocks to read */
        struct xfs_buf  *bp,
-       xfs_caddr_t     offset)
+       char            *offset)
 {
-       xfs_caddr_t     orig_offset = bp->b_addr;
+       char            *orig_offset = bp->b_addr;
        int             orig_len = BBTOB(bp->b_length);
        int             error, error2;
 
@@ -396,7 +396,7 @@ xlog_find_cycle_start(
        xfs_daddr_t     *last_blk,
        uint            cycle)
 {
-       xfs_caddr_t     offset;
+       char            *offset;
        xfs_daddr_t     mid_blk;
        xfs_daddr_t     end_blk;
        uint            mid_cycle;
@@ -443,7 +443,7 @@ xlog_find_verify_cycle(
        uint            cycle;
        xfs_buf_t       *bp;
        xfs_daddr_t     bufblks;
-       xfs_caddr_t     buf = NULL;
+       char            *buf = NULL;
        int             error = 0;
 
        /*
@@ -509,7 +509,7 @@ xlog_find_verify_log_record(
 {
        xfs_daddr_t             i;
        xfs_buf_t               *bp;
-       xfs_caddr_t             offset = NULL;
+       char                    *offset = NULL;
        xlog_rec_header_t       *head = NULL;
        int                     error = 0;
        int                     smallmem = 0;
@@ -616,7 +616,7 @@ xlog_find_head(
        xfs_daddr_t     *return_head_blk)
 {
        xfs_buf_t       *bp;
-       xfs_caddr_t     offset;
+       char            *offset;
        xfs_daddr_t     new_blk, first_blk, start_blk, last_blk, head_blk;
        int             num_scan_bblks;
        uint            first_half_cycle, last_half_cycle;
@@ -891,7 +891,7 @@ xlog_find_tail(
 {
        xlog_rec_header_t       *rhead;
        xlog_op_header_t        *op_head;
-       xfs_caddr_t             offset = NULL;
+       char                    *offset = NULL;
        xfs_buf_t               *bp;
        int                     error, i, found;
        xfs_daddr_t             umount_data_blk;
@@ -1099,7 +1099,7 @@ xlog_find_zeroed(
        xfs_daddr_t     *blk_no)
 {
        xfs_buf_t       *bp;
-       xfs_caddr_t     offset;
+       char            *offset;
        uint            first_cycle, last_cycle;
        xfs_daddr_t     new_blk, last_blk, start_blk;
        xfs_daddr_t     num_scan_bblks;
@@ -1199,7 +1199,7 @@ bp_err:
 STATIC void
 xlog_add_record(
        struct xlog             *log,
-       xfs_caddr_t             buf,
+       char                    *buf,
        int                     cycle,
        int                     block,
        int                     tail_cycle,
@@ -1227,7 +1227,7 @@ xlog_write_log_records(
        int             tail_cycle,
        int             tail_block)
 {
-       xfs_caddr_t     offset;
+       char            *offset;
        xfs_buf_t       *bp;
        int             balign, ealign;
        int             sectbb = log->l_sectBBsize;
@@ -1789,8 +1789,7 @@ xlog_recover_do_inode_buffer(
                        return -EFSCORRUPTED;
                }
 
-               buffer_nextp = (xfs_agino_t *)xfs_buf_offset(bp,
-                                             next_unlinked_offset);
+               buffer_nextp = xfs_buf_offset(bp, next_unlinked_offset);
                *buffer_nextp = *logged_nextp;
 
                /*
@@ -1798,7 +1797,7 @@ xlog_recover_do_inode_buffer(
                 * have to leave the inode in a consistent state for whoever
                 * reads it next....
                 */
-               xfs_dinode_calc_crc(mp, (struct xfs_dinode *)
+               xfs_dinode_calc_crc(mp,
                                xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize));
 
        }
@@ -2503,8 +2502,8 @@ xlog_recover_inode_pass2(
        xfs_buf_t               *bp;
        xfs_dinode_t            *dip;
        int                     len;
-       xfs_caddr_t             src;
-       xfs_caddr_t             dest;
+       char                    *src;
+       char                    *dest;
        int                     error;
        int                     attr_index;
        uint                    fields;
@@ -2546,7 +2545,7 @@ xlog_recover_inode_pass2(
                goto out_release;
        }
        ASSERT(in_f->ilf_fields & XFS_ILOG_CORE);
-       dip = (xfs_dinode_t *)xfs_buf_offset(bp, in_f->ilf_boffset);
+       dip = xfs_buf_offset(bp, in_f->ilf_boffset);
 
        /*
         * Make sure the place we're flushing out to really looks
@@ -2885,7 +2884,7 @@ xlog_recover_dquot_pass2(
                return error;
 
        ASSERT(bp);
-       ddq = (xfs_disk_dquot_t *)xfs_buf_offset(bp, dq_f->qlf_boffset);
+       ddq = xfs_buf_offset(bp, dq_f->qlf_boffset);
 
        /*
         * If the dquot has an LSN in it, recover the dquot only if it's less
@@ -3068,12 +3067,22 @@ xlog_recover_do_icreate_pass2(
                return -EINVAL;
        }
 
-       /* existing allocation is fixed value */
-       ASSERT(count == mp->m_ialloc_inos);
-       ASSERT(length == mp->m_ialloc_blks);
-       if (count != mp->m_ialloc_inos ||
-            length != mp->m_ialloc_blks) {
-               xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count 2");
+       /*
+        * The inode chunk is either full or sparse and we only support
+        * m_ialloc_min_blks sized sparse allocations at this time.
+        */
+       if (length != mp->m_ialloc_blks &&
+           length != mp->m_ialloc_min_blks) {
+               xfs_warn(log->l_mp,
+                        "%s: unsupported chunk length", __FUNCTION__);
+               return -EINVAL;
+       }
+
+       /* verify inode count is consistent with extent length */
+       if ((count >> mp->m_sb.sb_inopblog) != length) {
+               xfs_warn(log->l_mp,
+                        "%s: inconsistent inode count and chunk length",
+                        __FUNCTION__);
                return -EINVAL;
        }
 
@@ -3091,8 +3100,8 @@ xlog_recover_do_icreate_pass2(
                        XFS_AGB_TO_DADDR(mp, agno, agbno), length, 0))
                return 0;
 
-       xfs_ialloc_inode_init(mp, NULL, buffer_list, agno, agbno, length,
-                                       be32_to_cpu(icl->icl_gen));
+       xfs_ialloc_inode_init(mp, NULL, buffer_list, count, agno, agbno, length,
+                             be32_to_cpu(icl->icl_gen));
        return 0;
 }
 
@@ -3364,17 +3373,17 @@ STATIC int
 xlog_recover_add_to_cont_trans(
        struct xlog             *log,
        struct xlog_recover     *trans,
-       xfs_caddr_t             dp,
+       char                    *dp,
        int                     len)
 {
        xlog_recover_item_t     *item;
-       xfs_caddr_t             ptr, old_ptr;
+       char                    *ptr, *old_ptr;
        int                     old_len;
 
        if (list_empty(&trans->r_itemq)) {
                /* finish copying rest of trans header */
                xlog_recover_add_item(&trans->r_itemq);
-               ptr = (xfs_caddr_t) &trans->r_theader +
+               ptr = (char *)&trans->r_theader +
                                sizeof(xfs_trans_header_t) - len;
                memcpy(ptr, dp, len);
                return 0;
@@ -3410,12 +3419,12 @@ STATIC int
 xlog_recover_add_to_trans(
        struct xlog             *log,
        struct xlog_recover     *trans,
-       xfs_caddr_t             dp,
+       char                    *dp,
        int                     len)
 {
        xfs_inode_log_format_t  *in_f;                  /* any will do */
        xlog_recover_item_t     *item;
-       xfs_caddr_t             ptr;
+       char                    *ptr;
 
        if (!len)
                return 0;
@@ -3504,7 +3513,7 @@ STATIC int
 xlog_recovery_process_trans(
        struct xlog             *log,
        struct xlog_recover     *trans,
-       xfs_caddr_t             dp,
+       char                    *dp,
        unsigned int            len,
        unsigned int            flags,
        int                     pass)
@@ -3611,8 +3620,8 @@ xlog_recover_process_ophdr(
        struct hlist_head       rhash[],
        struct xlog_rec_header  *rhead,
        struct xlog_op_header   *ohead,
-       xfs_caddr_t             dp,
-       xfs_caddr_t             end,
+       char                    *dp,
+       char                    *end,
        int                     pass)
 {
        struct xlog_recover     *trans;
@@ -3661,11 +3670,11 @@ xlog_recover_process_data(
        struct xlog             *log,
        struct hlist_head       rhash[],
        struct xlog_rec_header  *rhead,
-       xfs_caddr_t             dp,
+       char                    *dp,
        int                     pass)
 {
        struct xlog_op_header   *ohead;
-       xfs_caddr_t             end;
+       char                    *end;
        int                     num_logops;
        int                     error;
 
@@ -3751,11 +3760,11 @@ xlog_recover_process_efi(
        }
 
        set_bit(XFS_EFI_RECOVERED, &efip->efi_flags);
-       error = xfs_trans_commit(tp, 0);
+       error = xfs_trans_commit(tp);
        return error;
 
 abort_error:
-       xfs_trans_cancel(tp, XFS_TRANS_ABORT);
+       xfs_trans_cancel(tp);
        return error;
 }
 
@@ -3857,13 +3866,13 @@ xlog_recover_clear_agi_bucket(
        xfs_trans_log_buf(tp, agibp, offset,
                          (offset + sizeof(xfs_agino_t) - 1));
 
-       error = xfs_trans_commit(tp, 0);
+       error = xfs_trans_commit(tp);
        if (error)
                goto out_error;
        return;
 
 out_abort:
-       xfs_trans_cancel(tp, XFS_TRANS_ABORT);
+       xfs_trans_cancel(tp);
 out_error:
        xfs_warn(mp, "%s: failed to clear agi %d. Continuing.", __func__, agno);
        return;
@@ -4010,7 +4019,7 @@ xlog_recover_process_iunlinks(
 STATIC int
 xlog_unpack_data_crc(
        struct xlog_rec_header  *rhead,
-       xfs_caddr_t             dp,
+       char                    *dp,
        struct xlog             *log)
 {
        __le32                  crc;
@@ -4040,7 +4049,7 @@ xlog_unpack_data_crc(
 STATIC int
 xlog_unpack_data(
        struct xlog_rec_header  *rhead,
-       xfs_caddr_t             dp,
+       char                    *dp,
        struct xlog             *log)
 {
        int                     i, j, k;
@@ -4122,7 +4131,7 @@ xlog_do_recovery_pass(
 {
        xlog_rec_header_t       *rhead;
        xfs_daddr_t             blk_no;
-       xfs_caddr_t             offset;
+       char                    *offset;
        xfs_buf_t               *hbp, *dbp;
        int                     error = 0, h_size;
        int                     bblks, split_bblks;
index 6f23fbdfb365adca1571eadece38b77a619c50ad..461e791efad71d66f1ffa50930be9fba6ee9f352 100644 (file)
@@ -724,6 +724,22 @@ xfs_mountfs(
                        mp->m_inode_cluster_size = new_size;
        }
 
+       /*
+        * If enabled, sparse inode chunk alignment is expected to match the
+        * cluster size. Full inode chunk alignment must match the chunk size,
+        * but that is checked on sb read verification...
+        */
+       if (xfs_sb_version_hassparseinodes(&mp->m_sb) &&
+           mp->m_sb.sb_spino_align !=
+                       XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)) {
+               xfs_warn(mp,
+       "Sparse inode block alignment (%u) must match cluster size (%llu).",
+                        mp->m_sb.sb_spino_align,
+                        XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size));
+               error = -EINVAL;
+               goto out_remove_uuid;
+       }
+
        /*
         * Set inode alignment fields
         */
index 8c995a2ccb6f94d01728812e47e361678ce61c22..7999e91cd49ad862d5fbeb29a5e5487ed94509a8 100644 (file)
@@ -101,6 +101,8 @@ typedef struct xfs_mount {
        __uint64_t              m_flags;        /* global mount flags */
        int                     m_ialloc_inos;  /* inodes in inode allocation */
        int                     m_ialloc_blks;  /* blocks in inode allocation */
+       int                     m_ialloc_min_blks;/* min blocks in sparse inode
+                                                  * allocation */
        int                     m_inoalign_mask;/* mask sb_inoalignmt if used */
        uint                    m_qflags;       /* quota status flags */
        struct xfs_trans_resv   m_resv;         /* precomputed res values */
@@ -179,6 +181,8 @@ typedef struct xfs_mount {
                                                   allocator */
 #define XFS_MOUNT_NOATTR2      (1ULL << 25)    /* disable use of attr2 format */
 
+#define XFS_MOUNT_DAX          (1ULL << 62)    /* TEST ONLY! */
+
 
 /*
  * Default minimum read and write sizes.
index 981a657eca396768dccafb187526de6f5395bfa0..ab4a6066f7ca7b24fbe4cdf2db0f2c1ddfdab3df 100644 (file)
@@ -306,7 +306,7 @@ xfs_fs_commit_blocks(
        tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
        error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
        if (error) {
-               xfs_trans_cancel(tp, 0);
+               xfs_trans_cancel(tp);
                goto out_drop_iolock;
        }
 
@@ -321,7 +321,7 @@ xfs_fs_commit_blocks(
        }
 
        xfs_trans_set_sync(tp);
-       error = xfs_trans_commit(tp, 0);
+       error = xfs_trans_commit(tp);
 
 out_drop_iolock:
        xfs_iunlock(ip, XFS_IOLOCK_EXCL);
index 5538468c7f630d21d16c34551b98f2f4c4c7c5a9..eac9549efd52c824d7e298b494a470f43923936e 100644 (file)
@@ -756,7 +756,7 @@ xfs_qm_qino_alloc(
        error = xfs_trans_reserve(tp, &M_RES(mp)->tr_create,
                                  XFS_QM_QINOCREATE_SPACE_RES(mp), 0);
        if (error) {
-               xfs_trans_cancel(tp, 0);
+               xfs_trans_cancel(tp);
                return error;
        }
 
@@ -764,8 +764,7 @@ xfs_qm_qino_alloc(
                error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip,
                                                                &committed);
                if (error) {
-                       xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
-                                        XFS_TRANS_ABORT);
+                       xfs_trans_cancel(tp);
                        return error;
                }
        }
@@ -796,7 +795,7 @@ xfs_qm_qino_alloc(
        spin_unlock(&mp->m_sb_lock);
        xfs_log_sb(tp);
 
-       error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+       error = xfs_trans_commit(tp);
        if (error) {
                ASSERT(XFS_FORCED_SHUTDOWN(mp));
                xfs_alert(mp, "%s failed (error %d)!", __func__, error);
index 9a25c9275fb32aa6be67a79e7b5b5d4eab96cd09..3640c6e896af70eb2e910a31786cb7ac2298f847 100644 (file)
@@ -239,7 +239,7 @@ xfs_qm_scall_trunc_qfile(
        tp = xfs_trans_alloc(mp, XFS_TRANS_TRUNCATE_FILE);
        error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
        if (error) {
-               xfs_trans_cancel(tp, 0);
+               xfs_trans_cancel(tp);
                xfs_iunlock(ip, XFS_IOLOCK_EXCL);
                goto out_put;
        }
@@ -252,15 +252,14 @@ xfs_qm_scall_trunc_qfile(
 
        error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 0);
        if (error) {
-               xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
-                                    XFS_TRANS_ABORT);
+               xfs_trans_cancel(tp);
                goto out_unlock;
        }
 
        ASSERT(ip->i_d.di_nextents == 0);
 
        xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
-       error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+       error = xfs_trans_commit(tp);
 
 out_unlock:
        xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
@@ -437,7 +436,7 @@ xfs_qm_scall_setqlim(
        tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM);
        error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_setqlim, 0, 0);
        if (error) {
-               xfs_trans_cancel(tp, 0);
+               xfs_trans_cancel(tp);
                goto out_rele;
        }
 
@@ -548,7 +547,7 @@ xfs_qm_scall_setqlim(
        dqp->dq_flags |= XFS_DQ_DIRTY;
        xfs_trans_log_dquot(tp, dqp);
 
-       error = xfs_trans_commit(tp, 0);
+       error = xfs_trans_commit(tp);
 
 out_rele:
        xfs_qm_dqrele(dqp);
@@ -571,7 +570,7 @@ xfs_qm_log_quotaoff_end(
 
        error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_equotaoff, 0, 0);
        if (error) {
-               xfs_trans_cancel(tp, 0);
+               xfs_trans_cancel(tp);
                return error;
        }
 
@@ -585,8 +584,7 @@ xfs_qm_log_quotaoff_end(
         * We don't care about quotoff's performance.
         */
        xfs_trans_set_sync(tp);
-       error = xfs_trans_commit(tp, 0);
-       return error;
+       return xfs_trans_commit(tp);
 }
 
 
@@ -605,7 +603,7 @@ xfs_qm_log_quotaoff(
        tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF);
        error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_quotaoff, 0, 0);
        if (error) {
-               xfs_trans_cancel(tp, 0);
+               xfs_trans_cancel(tp);
                goto out;
        }
 
@@ -624,7 +622,7 @@ xfs_qm_log_quotaoff(
         * We don't care about quotoff's performance.
         */
        xfs_trans_set_sync(tp);
-       error = xfs_trans_commit(tp, 0);
+       error = xfs_trans_commit(tp);
        if (error)
                goto out;
 
index 5376dd406ba2c099e23230014b1b23cbb832f6ad..ce6506adab7b715158418d99c1ff55011cd3bf9e 100644 (file)
@@ -55,7 +55,6 @@ struct xfs_trans;
 typedef struct xfs_dqtrx {
        struct xfs_dquot *qt_dquot;       /* the dquot this refers to */
        ulong           qt_blk_res;       /* blks reserved on a dquot */
-       ulong           qt_blk_res_used;  /* blks used from the reservation */
        ulong           qt_ino_res;       /* inode reserved on a dquot */
        ulong           qt_ino_res_used;  /* inodes used from the reservation */
        long            qt_bcount_delta;  /* dquot blk count changes */
index f2079b6911cc84dd50122380eb1b540a6ae9612c..f4e8c06eee26d0e5e4608bc40f3c571b3618d433 100644 (file)
@@ -780,7 +780,6 @@ xfs_growfs_rt_alloc(
         * Allocate space to the file, as necessary.
         */
        while (oblocks < nblocks) {
-               int             cancelflags = 0;
                xfs_trans_t     *tp;
 
                tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFSRT_ALLOC);
@@ -792,7 +791,6 @@ xfs_growfs_rt_alloc(
                                          resblks, 0);
                if (error)
                        goto error_cancel;
-               cancelflags = XFS_TRANS_RELEASE_LOG_RES;
                /*
                 * Lock the inode.
                 */
@@ -804,7 +802,6 @@ xfs_growfs_rt_alloc(
                 * Allocate blocks to the bitmap file.
                 */
                nmap = 1;
-               cancelflags |= XFS_TRANS_ABORT;
                error = xfs_bmapi_write(tp, ip, oblocks, nblocks - oblocks,
                                        XFS_BMAPI_METADATA, &firstblock,
                                        resblks, &map, &nmap, &flist);
@@ -818,14 +815,13 @@ xfs_growfs_rt_alloc(
                error = xfs_bmap_finish(&tp, &flist, &committed);
                if (error)
                        goto error_cancel;
-               error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+               error = xfs_trans_commit(tp);
                if (error)
                        goto error;
                /*
                 * Now we need to clear the allocated blocks.
                 * Do this one block per transaction, to keep it simple.
                 */
-               cancelflags = 0;
                for (bno = map.br_startoff, fsbno = map.br_startblock;
                     bno < map.br_startoff + map.br_blockcount;
                     bno++, fsbno++) {
@@ -851,7 +847,7 @@ xfs_growfs_rt_alloc(
                        if (bp == NULL) {
                                error = -EIO;
 error_cancel:
-                               xfs_trans_cancel(tp, cancelflags);
+                               xfs_trans_cancel(tp);
                                goto error;
                        }
                        memset(bp->b_addr, 0, mp->m_sb.sb_blocksize);
@@ -859,7 +855,7 @@ error_cancel:
                        /*
                         * Commit the transaction.
                         */
-                       error = xfs_trans_commit(tp, 0);
+                       error = xfs_trans_commit(tp);
                        if (error)
                                goto error;
                }
@@ -973,7 +969,6 @@ xfs_growfs_rt(
             bmbno < nrbmblocks;
             bmbno++) {
                xfs_trans_t     *tp;
-               int             cancelflags = 0;
 
                *nmp = *mp;
                nsbp = &nmp->m_sb;
@@ -1015,7 +1010,6 @@ xfs_growfs_rt(
                mp->m_rbmip->i_d.di_size =
                        nsbp->sb_rbmblocks * nsbp->sb_blocksize;
                xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE);
-               cancelflags |= XFS_TRANS_ABORT;
                /*
                 * Get the summary inode into the transaction.
                 */
@@ -1062,7 +1056,7 @@ xfs_growfs_rt(
                        nsbp->sb_rextents - sbp->sb_rextents, &bp, &sumbno);
                if (error) {
 error_cancel:
-                       xfs_trans_cancel(tp, cancelflags);
+                       xfs_trans_cancel(tp);
                        break;
                }
                /*
@@ -1076,7 +1070,7 @@ error_cancel:
                mp->m_rsumlevels = nrsumlevels;
                mp->m_rsumsize = nrsumsize;
 
-               error = xfs_trans_commit(tp, 0);
+               error = xfs_trans_commit(tp);
                if (error)
                        break;
        }
index 858e1e62bbaa390d0ea34a5b7766274f04361fd2..1fb16562c159947ac27adae43f6abb4f195e1cfa 100644 (file)
@@ -112,6 +112,8 @@ static struct xfs_kobj xfs_dbg_kobj;        /* global debug sysfs attrs */
 #define MNTOPT_DISCARD    "discard"    /* Discard unused blocks */
 #define MNTOPT_NODISCARD   "nodiscard" /* Do not discard unused blocks */
 
+#define MNTOPT_DAX     "dax"           /* Enable direct access to bdev pages */
+
 /*
  * Table driven mount option parser.
  *
@@ -363,6 +365,10 @@ xfs_parseargs(
                        mp->m_flags |= XFS_MOUNT_DISCARD;
                } else if (!strcmp(this_char, MNTOPT_NODISCARD)) {
                        mp->m_flags &= ~XFS_MOUNT_DISCARD;
+#ifdef CONFIG_FS_DAX
+               } else if (!strcmp(this_char, MNTOPT_DAX)) {
+                       mp->m_flags |= XFS_MOUNT_DAX;
+#endif
                } else {
                        xfs_warn(mp, "unknown mount option [%s].", this_char);
                        return -EINVAL;
@@ -452,8 +458,8 @@ done:
 }
 
 struct proc_xfs_info {
-       int     flag;
-       char    *str;
+       uint64_t        flag;
+       char            *str;
 };
 
 STATIC int
@@ -474,6 +480,7 @@ xfs_showargs(
                { XFS_MOUNT_GRPID,              "," MNTOPT_GRPID },
                { XFS_MOUNT_DISCARD,            "," MNTOPT_DISCARD },
                { XFS_MOUNT_SMALL_INUMS,        "," MNTOPT_32BITINODE },
+               { XFS_MOUNT_DAX,                "," MNTOPT_DAX },
                { 0, NULL }
        };
        static struct proc_xfs_info xfs_info_unset[] = {
@@ -1507,6 +1514,20 @@ xfs_fs_fill_super(
        if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5)
                sb->s_flags |= MS_I_VERSION;
 
+       if (mp->m_flags & XFS_MOUNT_DAX) {
+               xfs_warn(mp,
+       "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
+               if (sb->s_blocksize != PAGE_SIZE) {
+                       xfs_alert(mp,
+               "Filesystem block size invalid for DAX Turning DAX off.");
+                       mp->m_flags &= ~XFS_MOUNT_DAX;
+               } else if (!sb->s_bdev->bd_disk->fops->direct_access) {
+                       xfs_alert(mp,
+               "Block device does not support DAX Turning DAX off.");
+                       mp->m_flags &= ~XFS_MOUNT_DAX;
+               }
+       }
+
        error = xfs_mountfs(mp);
        if (error)
                goto out_filestream_unmount;
index 3df411eadb867c83388bdec2995ca41a7a815d7e..4be27b0210af863f3913f94b9b7134307a7c74ff 100644 (file)
@@ -104,7 +104,7 @@ xfs_readlink_bmap(
                        cur_chunk += sizeof(struct xfs_dsymlink_hdr);
                }
 
-               memcpy(link + offset, bp->b_addr, byte_cnt);
+               memcpy(link + offset, cur_chunk, byte_cnt);
 
                pathlen -= byte_cnt;
                offset += byte_cnt;
@@ -178,7 +178,6 @@ xfs_symlink(
        struct xfs_bmap_free    free_list;
        xfs_fsblock_t           first_block;
        bool                    unlock_dp_on_error = false;
-       uint                    cancel_flags;
        int                     committed;
        xfs_fileoff_t           first_fsb;
        xfs_filblks_t           fs_blocks;
@@ -224,7 +223,6 @@ xfs_symlink(
                return error;
 
        tp = xfs_trans_alloc(mp, XFS_TRANS_SYMLINK);
-       cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
        /*
         * The symlink will fit into the inode data fork?
         * There can't be any attributes so we get the whole variable part.
@@ -239,10 +237,8 @@ xfs_symlink(
                resblks = 0;
                error = xfs_trans_reserve(tp, &M_RES(mp)->tr_symlink, 0, 0);
        }
-       if (error) {
-               cancel_flags = 0;
+       if (error)
                goto out_trans_cancel;
-       }
 
        xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
        unlock_dp_on_error = true;
@@ -394,7 +390,7 @@ xfs_symlink(
        if (error)
                goto out_bmap_cancel;
 
-       error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+       error = xfs_trans_commit(tp);
        if (error)
                goto out_release_inode;
 
@@ -407,9 +403,8 @@ xfs_symlink(
 
 out_bmap_cancel:
        xfs_bmap_cancel(&free_list);
-       cancel_flags |= XFS_TRANS_ABORT;
 out_trans_cancel:
-       xfs_trans_cancel(tp, cancel_flags);
+       xfs_trans_cancel(tp);
 out_release_inode:
        /*
         * Wait until after the current transaction is aborted to finish the
@@ -464,7 +459,7 @@ xfs_inactive_symlink_rmt(
        tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
        error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
        if (error) {
-               xfs_trans_cancel(tp, 0);
+               xfs_trans_cancel(tp);
                return error;
        }
 
@@ -533,7 +528,7 @@ xfs_inactive_symlink_rmt(
        /*
         * Commit the transaction containing extent freeing and EFDs.
         */
-       error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+       error = xfs_trans_commit(tp);
        if (error) {
                ASSERT(XFS_FORCED_SHUTDOWN(mp));
                goto error_unlock;
@@ -552,7 +547,7 @@ xfs_inactive_symlink_rmt(
 error_bmap_cancel:
        xfs_bmap_cancel(&free_list);
 error_trans_cancel:
-       xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
+       xfs_trans_cancel(tp);
 error_unlock:
        xfs_iunlock(ip, XFS_ILOCK_EXCL);
        return error;
index 615781bf4ee5096e96c00e7f14f7a28f0cd16f9e..8d916d33d93da78a4fb6d214fbd4282948211bc8 100644 (file)
@@ -738,6 +738,53 @@ TRACE_EVENT(xfs_iomap_prealloc_size,
                  __entry->blocks, __entry->shift, __entry->writeio_blocks)
 )
 
+TRACE_EVENT(xfs_irec_merge_pre,
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agino_t agino,
+                uint16_t holemask, xfs_agino_t nagino, uint16_t nholemask),
+       TP_ARGS(mp, agno, agino, holemask, nagino, nholemask),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_agnumber_t, agno)
+               __field(xfs_agino_t, agino)
+               __field(uint16_t, holemask)
+               __field(xfs_agino_t, nagino)
+               __field(uint16_t, nholemask)
+       ),
+       TP_fast_assign(
+               __entry->dev = mp->m_super->s_dev;
+               __entry->agno = agno;
+               __entry->agino = agino;
+               __entry->holemask = holemask;
+               __entry->nagino = nagino;
+               __entry->nholemask = holemask;
+       ),
+       TP_printk("dev %d:%d agno %d inobt (%u:0x%x) new (%u:0x%x)",
+                 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno,
+                 __entry->agino, __entry->holemask, __entry->nagino,
+                 __entry->nholemask)
+)
+
+TRACE_EVENT(xfs_irec_merge_post,
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agino_t agino,
+                uint16_t holemask),
+       TP_ARGS(mp, agno, agino, holemask),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_agnumber_t, agno)
+               __field(xfs_agino_t, agino)
+               __field(uint16_t, holemask)
+       ),
+       TP_fast_assign(
+               __entry->dev = mp->m_super->s_dev;
+               __entry->agno = agno;
+               __entry->agino = agino;
+               __entry->holemask = holemask;
+       ),
+       TP_printk("dev %d:%d agno %d inobt (%u:0x%x)", MAJOR(__entry->dev),
+                 MINOR(__entry->dev), __entry->agno, __entry->agino,
+                 __entry->holemask)
+)
+
 #define DEFINE_IREF_EVENT(name) \
 DEFINE_EVENT(xfs_iref_class, name, \
        TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \
index 220ef2c906b2e2da7fcdfc0278a6256c59943691..0582a27107d4ee3dcd2e57c6739405a96280d79d 100644 (file)
@@ -113,7 +113,7 @@ xfs_trans_free(
  * blocks.  Locks and log items, however, are no inherited.  They must
  * be added to the new transaction explicitly.
  */
-xfs_trans_t *
+STATIC xfs_trans_t *
 xfs_trans_dup(
        xfs_trans_t     *tp)
 {
@@ -251,14 +251,7 @@ xfs_trans_reserve(
         */
 undo_log:
        if (resp->tr_logres > 0) {
-               int             log_flags;
-
-               if (resp->tr_logflags & XFS_TRANS_PERM_LOG_RES) {
-                       log_flags = XFS_LOG_REL_PERM_RESERV;
-               } else {
-                       log_flags = 0;
-               }
-               xfs_log_done(tp->t_mountp, tp->t_ticket, NULL, log_flags);
+               xfs_log_done(tp->t_mountp, tp->t_ticket, NULL, false);
                tp->t_ticket = NULL;
                tp->t_log_res = 0;
                tp->t_flags &= ~XFS_TRANS_PERM_LOG_RES;
@@ -744,7 +737,7 @@ void
 xfs_trans_free_items(
        struct xfs_trans        *tp,
        xfs_lsn_t               commit_lsn,
-       int                     flags)
+       bool                    abort)
 {
        struct xfs_log_item_desc *lidp, *next;
 
@@ -755,7 +748,7 @@ xfs_trans_free_items(
 
                if (commit_lsn != NULLCOMMITLSN)
                        lip->li_ops->iop_committing(lip, commit_lsn);
-               if (flags & XFS_TRANS_ABORT)
+               if (abort)
                        lip->li_flags |= XFS_LI_ABORTED;
                lip->li_ops->iop_unlock(lip);
 
@@ -892,26 +885,16 @@ xfs_trans_committed_bulk(
  * have already been unlocked as if the commit had succeeded.
  * Do not reference the transaction structure after this call.
  */
-int
-xfs_trans_commit(
+static int
+__xfs_trans_commit(
        struct xfs_trans        *tp,
-       uint                    flags)
+       bool                    regrant)
 {
        struct xfs_mount        *mp = tp->t_mountp;
        xfs_lsn_t               commit_lsn = -1;
        int                     error = 0;
-       int                     log_flags = 0;
        int                     sync = tp->t_flags & XFS_TRANS_SYNC;
 
-       /*
-        * Determine whether this commit is releasing a permanent
-        * log reservation or not.
-        */
-       if (flags & XFS_TRANS_RELEASE_LOG_RES) {
-               ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
-               log_flags = XFS_LOG_REL_PERM_RESERV;
-       }
-
        /*
         * If there is nothing to be logged by the transaction,
         * then unlock all of the items associated with the
@@ -936,7 +919,7 @@ xfs_trans_commit(
                xfs_trans_apply_sb_deltas(tp);
        xfs_trans_apply_dquot_deltas(tp);
 
-       xfs_log_commit_cil(mp, tp, &commit_lsn, flags);
+       xfs_log_commit_cil(mp, tp, &commit_lsn, regrant);
 
        current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
        xfs_trans_free(tp);
@@ -964,18 +947,25 @@ out_unreserve:
         */
        xfs_trans_unreserve_and_mod_dquots(tp);
        if (tp->t_ticket) {
-               commit_lsn = xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
+               commit_lsn = xfs_log_done(mp, tp->t_ticket, NULL, regrant);
                if (commit_lsn == -1 && !error)
                        error = -EIO;
        }
        current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
-       xfs_trans_free_items(tp, NULLCOMMITLSN, error ? XFS_TRANS_ABORT : 0);
+       xfs_trans_free_items(tp, NULLCOMMITLSN, !!error);
        xfs_trans_free(tp);
 
        XFS_STATS_INC(xs_trans_empty);
        return error;
 }
 
+int
+xfs_trans_commit(
+       struct xfs_trans        *tp)
+{
+       return __xfs_trans_commit(tp, false);
+}
+
 /*
  * Unlock all of the transaction's items and free the transaction.
  * The transaction must not have modified any of its items, because
@@ -986,29 +976,22 @@ out_unreserve:
  */
 void
 xfs_trans_cancel(
-       xfs_trans_t             *tp,
-       int                     flags)
+       struct xfs_trans        *tp)
 {
-       int                     log_flags;
-       xfs_mount_t             *mp = tp->t_mountp;
+       struct xfs_mount        *mp = tp->t_mountp;
+       bool                    dirty = (tp->t_flags & XFS_TRANS_DIRTY);
 
-       /*
-        * See if the caller is being too lazy to figure out if
-        * the transaction really needs an abort.
-        */
-       if ((flags & XFS_TRANS_ABORT) && !(tp->t_flags & XFS_TRANS_DIRTY))
-               flags &= ~XFS_TRANS_ABORT;
        /*
         * See if the caller is relying on us to shut down the
         * filesystem.  This happens in paths where we detect
         * corruption and decide to give up.
         */
-       if ((tp->t_flags & XFS_TRANS_DIRTY) && !XFS_FORCED_SHUTDOWN(mp)) {
+       if (dirty && !XFS_FORCED_SHUTDOWN(mp)) {
                XFS_ERROR_REPORT("xfs_trans_cancel", XFS_ERRLEVEL_LOW, mp);
                xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
        }
 #ifdef DEBUG
-       if (!(flags & XFS_TRANS_ABORT) && !XFS_FORCED_SHUTDOWN(mp)) {
+       if (!dirty && !XFS_FORCED_SHUTDOWN(mp)) {
                struct xfs_log_item_desc *lidp;
 
                list_for_each_entry(lidp, &tp->t_items, lid_trans)
@@ -1018,27 +1001,20 @@ xfs_trans_cancel(
        xfs_trans_unreserve_and_mod_sb(tp);
        xfs_trans_unreserve_and_mod_dquots(tp);
 
-       if (tp->t_ticket) {
-               if (flags & XFS_TRANS_RELEASE_LOG_RES) {
-                       ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
-                       log_flags = XFS_LOG_REL_PERM_RESERV;
-               } else {
-                       log_flags = 0;
-               }
-               xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
-       }
+       if (tp->t_ticket)
+               xfs_log_done(mp, tp->t_ticket, NULL, false);
 
        /* mark this thread as no longer being in a transaction */
        current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
 
-       xfs_trans_free_items(tp, NULLCOMMITLSN, flags);
+       xfs_trans_free_items(tp, NULLCOMMITLSN, dirty);
        xfs_trans_free(tp);
 }
 
 /*
  * Roll from one trans in the sequence of PERMANENT transactions to
  * the next: permanent transactions are only flushed out when
- * committed with XFS_TRANS_RELEASE_LOG_RES, but we still want as soon
+ * committed with xfs_trans_commit(), but we still want as soon
  * as possible to let chunks of it go to the log. So we commit the
  * chunk we've been working on and get a new transaction to continue.
  */
@@ -1055,7 +1031,8 @@ xfs_trans_roll(
         * Ensure that the inode is always logged.
         */
        trans = *tpp;
-       xfs_trans_log_inode(trans, dp, XFS_ILOG_CORE);
+       if (dp)
+               xfs_trans_log_inode(trans, dp, XFS_ILOG_CORE);
 
        /*
         * Copy the critical parameters from one trans to the next.
@@ -1071,19 +1048,12 @@ xfs_trans_roll(
         * is in progress. The caller takes the responsibility to cancel
         * the duplicate transaction that gets returned.
         */
-       error = xfs_trans_commit(trans, 0);
+       error = __xfs_trans_commit(trans, true);
        if (error)
                return error;
 
        trans = *tpp;
 
-       /*
-        * transaction commit worked ok so we can drop the extra ticket
-        * reference that we gained in xfs_trans_dup()
-        */
-       xfs_log_ticket_put(trans->t_ticket);
-
-
        /*
         * Reserve space in the log for th next transaction.
         * This also pushes items in the "AIL", the list of logged items,
@@ -1100,6 +1070,7 @@ xfs_trans_roll(
        if (error)
                return error;
 
-       xfs_trans_ijoin(trans, dp, 0);
+       if (dp)
+               xfs_trans_ijoin(trans, dp, 0);
        return 0;
 }
index b5bc1ab3c4da0561980333e85a3b0a9aae15322f..3b21b4e5e4678885f0a9197cd073684814a118a4 100644 (file)
@@ -133,8 +133,6 @@ typedef struct xfs_trans {
  * XFS transaction mechanism exported interfaces that are
  * actually macros.
  */
-#define        xfs_trans_get_log_res(tp)       ((tp)->t_log_res)
-#define        xfs_trans_get_log_count(tp)     ((tp)->t_log_count)
 #define        xfs_trans_get_block_res(tp)     ((tp)->t_blk_res)
 #define        xfs_trans_set_sync(tp)          ((tp)->t_flags |= XFS_TRANS_SYNC)
 
@@ -153,7 +151,6 @@ typedef struct xfs_trans {
  */
 xfs_trans_t    *xfs_trans_alloc(struct xfs_mount *, uint);
 xfs_trans_t    *_xfs_trans_alloc(struct xfs_mount *, uint, xfs_km_flags_t);
-xfs_trans_t    *xfs_trans_dup(xfs_trans_t *);
 int            xfs_trans_reserve(struct xfs_trans *, struct xfs_trans_res *,
                                  uint, uint);
 void           xfs_trans_mod_sb(xfs_trans_t *, uint, int64_t);
@@ -228,9 +225,9 @@ void                xfs_trans_log_efd_extent(xfs_trans_t *,
                                         struct xfs_efd_log_item *,
                                         xfs_fsblock_t,
                                         xfs_extlen_t);
-int            xfs_trans_commit(xfs_trans_t *, uint flags);
+int            xfs_trans_commit(struct xfs_trans *);
 int            xfs_trans_roll(struct xfs_trans **, struct xfs_inode *);
-void           xfs_trans_cancel(xfs_trans_t *, int);
+void           xfs_trans_cancel(xfs_trans_t *);
 int            xfs_trans_ail_init(struct xfs_mount *);
 void           xfs_trans_ail_destroy(struct xfs_mount *);
 
index 573aefb5a5731d37fa79fa6fb8a522fb29a43848..1098cf4901892830184c0a3f2306210be1930f9f 100644 (file)
@@ -159,7 +159,7 @@ xfs_trans_ail_cursor_next(
 {
        struct xfs_log_item     *lip = cur->item;
 
-       if ((__psint_t)lip & 1)
+       if ((uintptr_t)lip & 1)
                lip = xfs_ail_min(ailp);
        if (lip)
                cur->item = xfs_ail_next(ailp, lip);
@@ -196,7 +196,7 @@ xfs_trans_ail_cursor_clear(
        list_for_each_entry(cur, &ailp->xa_cursors, list) {
                if (cur->item == lip)
                        cur->item = (struct xfs_log_item *)
-                                       ((__psint_t)cur->item | 1);
+                                       ((uintptr_t)cur->item | 1);
        }
 }
 
@@ -287,7 +287,7 @@ xfs_ail_splice(
         * find the place in the AIL where the items belong.
         */
        lip = cur ? cur->item : NULL;
-       if (!lip || (__psint_t) lip & 1)
+       if (!lip || (uintptr_t)lip & 1)
                lip = __xfs_trans_ail_cursor_last(ailp, lsn);
 
        /*
index 76a16df55ef72ef11274cd2e25dc07bc8326dfdd..ce78534a047eebe9a1b86cec00f7f4b3f396489b 100644 (file)
@@ -90,8 +90,9 @@ xfs_trans_dup_dqinfo(
        xfs_trans_t     *ntp)
 {
        xfs_dqtrx_t     *oq, *nq;
-       int             i,j;
+       int             i, j;
        xfs_dqtrx_t     *oqa, *nqa;
+       ulong           blk_res_used;
 
        if (!otp->t_dqinfo)
                return;
@@ -102,18 +103,23 @@ xfs_trans_dup_dqinfo(
         * Because the quota blk reservation is carried forward,
         * it is also necessary to carry forward the DQ_DIRTY flag.
         */
-       if(otp->t_flags & XFS_TRANS_DQ_DIRTY)
+       if (otp->t_flags & XFS_TRANS_DQ_DIRTY)
                ntp->t_flags |= XFS_TRANS_DQ_DIRTY;
 
        for (j = 0; j < XFS_QM_TRANS_DQTYPES; j++) {
                oqa = otp->t_dqinfo->dqs[j];
                nqa = ntp->t_dqinfo->dqs[j];
                for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
+                       blk_res_used = 0;
+
                        if (oqa[i].qt_dquot == NULL)
                                break;
                        oq = &oqa[i];
                        nq = &nqa[i];
 
+                       if (oq->qt_blk_res && oq->qt_bcount_delta > 0)
+                               blk_res_used = oq->qt_bcount_delta;
+
                        nq->qt_dquot = oq->qt_dquot;
                        nq->qt_bcount_delta = nq->qt_icount_delta = 0;
                        nq->qt_rtbcount_delta = 0;
@@ -121,8 +127,8 @@ xfs_trans_dup_dqinfo(
                        /*
                         * Transfer whatever is left of the reservations.
                         */
-                       nq->qt_blk_res = oq->qt_blk_res - oq->qt_blk_res_used;
-                       oq->qt_blk_res = oq->qt_blk_res_used;
+                       nq->qt_blk_res = oq->qt_blk_res - blk_res_used;
+                       oq->qt_blk_res = blk_res_used;
 
                        nq->qt_rtblk_res = oq->qt_rtblk_res -
                                oq->qt_rtblk_res_used;
@@ -239,10 +245,6 @@ xfs_trans_mod_dquot(
                 * disk blocks used.
                 */
              case XFS_TRANS_DQ_BCOUNT:
-               if (qtrx->qt_blk_res && delta > 0) {
-                       qtrx->qt_blk_res_used += (ulong)delta;
-                       ASSERT(qtrx->qt_blk_res >= qtrx->qt_blk_res_used);
-               }
                qtrx->qt_bcount_delta += delta;
                break;
 
@@ -423,15 +425,19 @@ xfs_trans_apply_dquot_deltas(
                         * reservation that a transaction structure knows of.
                         */
                        if (qtrx->qt_blk_res != 0) {
-                               if (qtrx->qt_blk_res != qtrx->qt_blk_res_used) {
-                                       if (qtrx->qt_blk_res >
-                                           qtrx->qt_blk_res_used)
+                               ulong blk_res_used = 0;
+
+                               if (qtrx->qt_bcount_delta > 0)
+                                       blk_res_used = qtrx->qt_bcount_delta;
+
+                               if (qtrx->qt_blk_res != blk_res_used) {
+                                       if (qtrx->qt_blk_res > blk_res_used)
                                                dqp->q_res_bcount -= (xfs_qcnt_t)
                                                        (qtrx->qt_blk_res -
-                                                        qtrx->qt_blk_res_used);
+                                                        blk_res_used);
                                        else
                                                dqp->q_res_bcount -= (xfs_qcnt_t)
-                                                       (qtrx->qt_blk_res_used -
+                                                       (blk_res_used -
                                                         qtrx->qt_blk_res);
                                }
                        } else {
index bd1281862ad795c8a4b4536d377341403b388c65..1b736294558a76f0498f7e034885ddc5ce9c1497 100644 (file)
@@ -30,7 +30,7 @@ void  xfs_trans_init(struct xfs_mount *);
 void   xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *);
 void   xfs_trans_del_item(struct xfs_log_item *);
 void   xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn,
-                               int flags);
+                               bool abort);
 void   xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp);
 
 void   xfs_trans_committed_bulk(struct xfs_ail *ailp, struct xfs_log_vec *lv,
index b43276f339efd0114400ab6396ba7efd651a44c3..83061cac719bce830608daac2ed45e1760f5bd76 100644 (file)
@@ -420,7 +420,7 @@ static inline bool is_acpi_node(struct fwnode_handle *fwnode)
        return fwnode && fwnode->type == FWNODE_ACPI;
 }
 
-static inline struct acpi_device *acpi_node(struct fwnode_handle *fwnode)
+static inline struct acpi_device *to_acpi_node(struct fwnode_handle *fwnode)
 {
        return is_acpi_node(fwnode) ?
                container_of(fwnode, struct acpi_device, fwnode) : NULL;
index a7d7f1043e9c124f84dbd1130259c343a754c9db..e840b294c6f5beb2f8a178aa0d3bbb8e92040e40 100644 (file)
@@ -43,7 +43,7 @@ static inline enum acpi_backlight_type acpi_video_get_backlight_type(void)
 {
        return acpi_backlight_vendor;
 }
-static void acpi_video_set_dmi_backlight_type(enum acpi_backlight_type type)
+static inline void acpi_video_set_dmi_backlight_type(enum acpi_backlight_type type)
 {
 }
 #endif
index e6a83d712ef6772ac7265e914837da7806e21441..55e3abc2d027270b0e6e1c554b9f376d3cd355f1 100644 (file)
 #endif
 
 #ifdef CONFIG_SMP
+
+#ifndef smp_mb
 #define smp_mb()       mb()
+#endif
+
+#ifndef smp_rmb
 #define smp_rmb()      rmb()
+#endif
+
+#ifndef smp_wmb
 #define smp_wmb()      wmb()
+#endif
+
+#ifndef smp_read_barrier_depends
 #define smp_read_barrier_depends()     read_barrier_depends()
-#else
+#endif
+
+#else  /* !CONFIG_SMP */
+
+#ifndef smp_mb
 #define smp_mb()       barrier()
+#endif
+
+#ifndef smp_rmb
 #define smp_rmb()      barrier()
+#endif
+
+#ifndef smp_wmb
 #define smp_wmb()      barrier()
+#endif
+
+#ifndef smp_read_barrier_depends
 #define smp_read_barrier_depends()     do { } while (0)
 #endif
 
+#endif /* CONFIG_SMP */
+
 #ifndef smp_store_mb
 #define smp_store_mb(var, value)  do { WRITE_ONCE(var, value); mb(); } while (0)
 #endif
index 1618cdfb38c7b8e312f4a39b412593d4c480bc07..c471dfc93b716e162815709a25e5a705c4b45159 100644 (file)
@@ -53,7 +53,7 @@ static inline acpi_handle acpi_device_handle(struct acpi_device *adev)
        return adev ? adev->handle : NULL;
 }
 
-#define ACPI_COMPANION(dev)            acpi_node((dev)->fwnode)
+#define ACPI_COMPANION(dev)            to_acpi_node((dev)->fwnode)
 #define ACPI_COMPANION_SET(dev, adev)  set_primary_fwnode(dev, (adev) ? \
        acpi_fwnode_handle(adev) : NULL)
 #define ACPI_HANDLE(dev)               acpi_device_handle(ACPI_COMPANION(dev))
@@ -454,7 +454,7 @@ static inline bool is_acpi_node(struct fwnode_handle *fwnode)
        return false;
 }
 
-static inline struct acpi_device *acpi_node(struct fwnode_handle *fwnode)
+static inline struct acpi_device *to_acpi_node(struct fwnode_handle *fwnode)
 {
        return NULL;
 }
index 26fc8bc77f85644adf8cb955579252b1eebcc423..7f8ad9593da725438aaefb397b99d9fea4109f90 100644 (file)
@@ -475,6 +475,21 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
        (volatile typeof(x) *)&(x); })
 #define ACCESS_ONCE(x) (*__ACCESS_ONCE(x))
 
+/**
+ * lockless_dereference() - safely load a pointer for later dereference
+ * @p: The pointer to load
+ *
+ * Similar to rcu_dereference(), but for situations where the pointed-to
+ * object's lifetime is managed by something other than RCU.  That
+ * "something other" might be reference counting or simple immortality.
+ */
+#define lockless_dereference(p) \
+({ \
+       typeof(p) _________p1 = READ_ONCE(p); \
+       smp_read_barrier_depends(); /* Dependency order vs. p above. */ \
+       (_________p1); \
+})
+
 /* Ignore/forbid kprobes attach on very low level functions marked by this attribute: */
 #ifdef CONFIG_KPROBES
 # define __kprobes     __attribute__((__section__(".kprobes.text")))
index e351da4a934f415b4ba0d2cc84acaa52c8120614..3f1a84635da896c5c4da944a9c29c428fab2774b 100644 (file)
@@ -70,6 +70,7 @@ typedef int (get_block_t)(struct inode *inode, sector_t iblock,
                        struct buffer_head *bh_result, int create);
 typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
                        ssize_t bytes, void *private);
+typedef void (dax_iodone_t)(struct buffer_head *bh_map, int uptodate);
 
 #define MAY_EXEC               0x00000001
 #define MAY_WRITE              0x00000002
@@ -2655,9 +2656,13 @@ ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *, loff_t,
 int dax_clear_blocks(struct inode *, sector_t block, long size);
 int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t);
 int dax_truncate_page(struct inode *, loff_t from, get_block_t);
-int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t);
+int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
+               dax_iodone_t);
+int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
+               dax_iodone_t);
 int dax_pfn_mkwrite(struct vm_area_struct *, struct vm_fault *);
-#define dax_mkwrite(vma, vmf, gb)      dax_fault(vma, vmf, gb)
+#define dax_mkwrite(vma, vmf, gb, iod)         dax_fault(vma, vmf, gb, iod)
+#define __dax_mkwrite(vma, vmf, gb, iod)       __dax_fault(vma, vmf, gb, iod)
 
 #ifdef CONFIG_BLOCK
 typedef void (dio_submit_t)(int rw, struct bio *bio, struct inode *inode,
index 812149160d3bc5829750d5f2790ae23c4791092f..92188b0225bb31f33eba9deacc4b9b88036c1d8e 100644 (file)
@@ -407,7 +407,6 @@ enum {
        IRQCHIP_EOI_THREADED            = (1 <<  6),
 };
 
-/* This include will go away once we isolated irq_desc usage to core code */
 #include <linux/irqdesc.h>
 
 /*
index c52d1480f272448a18ae5ea8400328c82f0c76da..624a668e61f1a6c5096e2afa78bd403655c11e59 100644 (file)
@@ -3,9 +3,6 @@
 
 /*
  * Core internal functions to deal with irq descriptors
- *
- * This include will move to kernel/irq once we cleaned up the tree.
- * For now it's included from <linux/irq.h>
  */
 
 struct irq_affinity_notify;
@@ -103,6 +100,11 @@ static inline struct irq_desc *irq_data_to_desc(struct irq_data *data)
 #endif
 }
 
+static inline unsigned int irq_desc_get_irq(struct irq_desc *desc)
+{
+       return desc->irq_data.irq;
+}
+
 static inline struct irq_data *irq_desc_get_irq_data(struct irq_desc *desc)
 {
        return &desc->irq_data;
@@ -188,6 +190,47 @@ __irq_set_chip_handler_name_locked(unsigned int irq, struct irq_chip *chip,
        desc->name = name;
 }
 
+/**
+ * irq_set_handler_locked - Set irq handler from a locked region
+ * @data:      Pointer to the irq_data structure which identifies the irq
+ * @handler:   Flow control handler function for this interrupt
+ *
+ * Sets the handler in the irq descriptor associated to @data.
+ *
+ * Must be called with irq_desc locked and valid parameters. Typical
+ * call site is the irq_set_type() callback.
+ */
+static inline void irq_set_handler_locked(struct irq_data *data,
+                                         irq_flow_handler_t handler)
+{
+       struct irq_desc *desc = irq_data_to_desc(data);
+
+       desc->handle_irq = handler;
+}
+
+/**
+ * irq_set_chip_handler_name_locked - Set chip, handler and name from a locked region
+ * @data:      Pointer to the irq_data structure for which the chip is set
+ * @chip:      Pointer to the new irq chip
+ * @handler:   Flow control handler function for this interrupt
+ * @name:      Name of the interrupt
+ *
+ * Replace the irq chip at the proper hierarchy level in @data and
+ * sets the handler and name in the associated irq descriptor.
+ *
+ * Must be called with irq_desc locked and valid parameters.
+ */
+static inline void
+irq_set_chip_handler_name_locked(struct irq_data *data, struct irq_chip *chip,
+                                irq_flow_handler_t handler, const char *name)
+{
+       struct irq_desc *desc = irq_data_to_desc(data);
+
+       desc->handle_irq = handler;
+       desc->name = name;
+       data->chip = chip;
+}
+
 static inline int irq_balancing_disabled(unsigned int irq)
 {
        struct irq_desc *desc;
index fdd5cc16c9c43bcf4bb72cbd7c18d2047af7dd7c..9669bf9d4f48fa38381f6c5f931b6579fc10a800 100644 (file)
@@ -23,12 +23,6 @@ unsigned int irq_get_next_irq(unsigned int offset);
                        ;                                               \
                else
 
-#ifdef CONFIG_SMP
-#define irq_node(irq)  (irq_get_irq_data(irq)->node)
-#else
-#define irq_node(irq)  0
-#endif
-
 # define for_each_active_irq(irq)                      \
        for (irq = irq_get_next_irq(0); irq < nr_irqs;  \
             irq = irq_get_next_irq(irq + 1))
index 0dfa4e31563decb63f12c76a26a7a3f275453cff..5f0be58640ea6e73f88dd02cb3ac3bc7cdb6a0e6 100644 (file)
@@ -816,13 +816,15 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
 #endif
 
 /* Permissions on a sysfs file: you didn't miss the 0 prefix did you? */
-#define VERIFY_OCTAL_PERMISSIONS(perms)                                        \
-       (BUILD_BUG_ON_ZERO((perms) < 0) +                               \
-        BUILD_BUG_ON_ZERO((perms) > 0777) +                            \
-        /* User perms >= group perms >= other perms */                 \
-        BUILD_BUG_ON_ZERO(((perms) >> 6) < (((perms) >> 3) & 7)) +     \
-        BUILD_BUG_ON_ZERO((((perms) >> 3) & 7) < ((perms) & 7)) +      \
-        /* Other writable?  Generally considered a bad idea. */        \
-        BUILD_BUG_ON_ZERO((perms) & 2) +                               \
+#define VERIFY_OCTAL_PERMISSIONS(perms)                                                \
+       (BUILD_BUG_ON_ZERO((perms) < 0) +                                       \
+        BUILD_BUG_ON_ZERO((perms) > 0777) +                                    \
+        /* USER_READABLE >= GROUP_READABLE >= OTHER_READABLE */                \
+        BUILD_BUG_ON_ZERO((((perms) >> 6) & 4) < (((perms) >> 3) & 4)) +       \
+        BUILD_BUG_ON_ZERO((((perms) >> 3) & 4) < ((perms) & 4)) +              \
+        /* USER_WRITABLE >= GROUP_WRITABLE */                                  \
+        BUILD_BUG_ON_ZERO((((perms) >> 6) & 2) < (((perms) >> 3) & 2)) +       \
+        /* OTHER_WRITABLE?  Generally considered a bad idea. */                \
+        BUILD_BUG_ON_ZERO((perms) & 2) +                                       \
         (perms))
 #endif
index 7ffe0851d24438876faa9d756178d1654c7e825e..d67b1932cc59869cd5c3dc5d24efa5994b368386 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/moduleparam.h>
 #include <linux/jump_label.h>
 #include <linux/export.h>
+#include <linux/rbtree_latch.h>
 
 #include <linux/percpu.h>
 #include <asm/module.h>
@@ -210,6 +211,13 @@ enum module_state {
        MODULE_STATE_UNFORMED,  /* Still setting it up. */
 };
 
+struct module;
+
+struct mod_tree_node {
+       struct module *mod;
+       struct latch_tree_node node;
+};
+
 struct module {
        enum module_state state;
 
@@ -232,6 +240,9 @@ struct module {
        unsigned int num_syms;
 
        /* Kernel parameters. */
+#ifdef CONFIG_SYSFS
+       struct mutex param_lock;
+#endif
        struct kernel_param *kp;
        unsigned int num_kp;
 
@@ -271,8 +282,15 @@ struct module {
        /* Startup function. */
        int (*init)(void);
 
-       /* If this is non-NULL, vfree after init() returns */
-       void *module_init;
+       /*
+        * If this is non-NULL, vfree() after init() returns.
+        *
+        * Cacheline align here, such that:
+        *   module_init, module_core, init_size, core_size,
+        *   init_text_size, core_text_size and mtn_core::{mod,node[0]}
+        * are on the same cacheline.
+        */
+       void *module_init       ____cacheline_aligned;
 
        /* Here is the actual code + data, vfree'd on unload. */
        void *module_core;
@@ -283,6 +301,16 @@ struct module {
        /* The size of the executable code in each section.  */
        unsigned int init_text_size, core_text_size;
 
+#ifdef CONFIG_MODULES_TREE_LOOKUP
+       /*
+        * We want mtn_core::{mod,node[0]} to be in the same cacheline as the
+        * above entries such that a regular lookup will only touch one
+        * cacheline.
+        */
+       struct mod_tree_node    mtn_core;
+       struct mod_tree_node    mtn_init;
+#endif
+
        /* Size of RO sections of the module (text+rodata) */
        unsigned int init_ro_size, core_ro_size;
 
@@ -369,7 +397,7 @@ struct module {
        ctor_fn_t *ctors;
        unsigned int num_ctors;
 #endif
-};
+} ____cacheline_aligned;
 #ifndef MODULE_ARCH_INIT
 #define MODULE_ARCH_INIT {}
 #endif
@@ -423,14 +451,22 @@ struct symsearch {
        bool unused;
 };
 
-/* Search for an exported symbol by name. */
+/*
+ * Search for an exported symbol by name.
+ *
+ * Must be called with module_mutex held or preemption disabled.
+ */
 const struct kernel_symbol *find_symbol(const char *name,
                                        struct module **owner,
                                        const unsigned long **crc,
                                        bool gplok,
                                        bool warn);
 
-/* Walk the exported symbol table */
+/*
+ * Walk the exported symbol table
+ *
+ * Must be called with module_mutex held or preemption disabled.
+ */
 bool each_symbol_section(bool (*fn)(const struct symsearch *arr,
                                    struct module *owner,
                                    void *data), void *data);
index 6480dcaca275b62541964ebfe390f0c0da8d371f..c12f2147c350593fb827b7b29aebb33e8f9f0b4a 100644 (file)
@@ -67,8 +67,9 @@ enum {
 
 struct kernel_param {
        const char *name;
+       struct module *mod;
        const struct kernel_param_ops *ops;
-       u16 perm;
+       const u16 perm;
        s8 level;
        u8 flags;
        union {
@@ -108,7 +109,7 @@ struct kparam_array
  *
  * @perm is 0 if the the variable is not to appear in sysfs, or 0444
  * for world-readable, 0644 for root-writable, etc.  Note that if it
- * is writable, you may need to use kparam_block_sysfs_write() around
+ * is writable, you may need to use kernel_param_lock() around
  * accesses (esp. charp, which can be kfreed when it changes).
  *
  * The @type is simply pasted to refer to a param_ops_##type and a
@@ -216,16 +217,16 @@ struct kparam_array
    parameters. */
 #define __module_param_call(prefix, name, ops, arg, perm, level, flags)        \
        /* Default value instead of permissions? */                     \
-       static const char __param_str_##name[] = prefix #name; \
+       static const char __param_str_##name[] = prefix #name;          \
        static struct kernel_param __moduleparam_const __param_##name   \
        __used                                                          \
     __attribute__ ((unused,__section__ ("__param"),aligned(sizeof(void *)))) \
-       = { __param_str_##name, ops, VERIFY_OCTAL_PERMISSIONS(perm),    \
-           level, flags, { arg } }
+       = { __param_str_##name, THIS_MODULE, ops,                       \
+           VERIFY_OCTAL_PERMISSIONS(perm), level, flags, { arg } }
 
 /* Obsolete - use module_param_cb() */
 #define module_param_call(name, set, get, arg, perm)                   \
-       static struct kernel_param_ops __param_ops_##name =             \
+       static const struct kernel_param_ops __param_ops_##name =               \
                { .flags = 0, (void *)set, (void *)get };               \
        __module_param_call(MODULE_PARAM_PREFIX,                        \
                            name, &__param_ops_##name, arg,             \
@@ -238,58 +239,14 @@ __check_old_set_param(int (*oldset)(const char *, struct kernel_param *))
        return 0;
 }
 
-/**
- * kparam_block_sysfs_write - make sure a parameter isn't written via sysfs.
- * @name: the name of the parameter
- *
- * There's no point blocking write on a paramter that isn't writable via sysfs!
- */
-#define kparam_block_sysfs_write(name)                 \
-       do {                                            \
-               BUG_ON(!(__param_##name.perm & 0222));  \
-               __kernel_param_lock();                  \
-       } while (0)
-
-/**
- * kparam_unblock_sysfs_write - allows sysfs to write to a parameter again.
- * @name: the name of the parameter
- */
-#define kparam_unblock_sysfs_write(name)               \
-       do {                                            \
-               BUG_ON(!(__param_##name.perm & 0222));  \
-               __kernel_param_unlock();                \
-       } while (0)
-
-/**
- * kparam_block_sysfs_read - make sure a parameter isn't read via sysfs.
- * @name: the name of the parameter
- *
- * This also blocks sysfs writes.
- */
-#define kparam_block_sysfs_read(name)                  \
-       do {                                            \
-               BUG_ON(!(__param_##name.perm & 0444));  \
-               __kernel_param_lock();                  \
-       } while (0)
-
-/**
- * kparam_unblock_sysfs_read - allows sysfs to read a parameter again.
- * @name: the name of the parameter
- */
-#define kparam_unblock_sysfs_read(name)                        \
-       do {                                            \
-               BUG_ON(!(__param_##name.perm & 0444));  \
-               __kernel_param_unlock();                \
-       } while (0)
-
 #ifdef CONFIG_SYSFS
-extern void __kernel_param_lock(void);
-extern void __kernel_param_unlock(void);
+extern void kernel_param_lock(struct module *mod);
+extern void kernel_param_unlock(struct module *mod);
 #else
-static inline void __kernel_param_lock(void)
+static inline void kernel_param_lock(struct module *mod)
 {
 }
-static inline void __kernel_param_unlock(void)
+static inline void kernel_param_unlock(struct module *mod)
 {
 }
 #endif
@@ -386,64 +343,70 @@ static inline void destroy_params(const struct kernel_param *params,
 #define __param_check(name, p, type) \
        static inline type __always_unused *__check_##name(void) { return(p); }
 
-extern struct kernel_param_ops param_ops_byte;
+extern const struct kernel_param_ops param_ops_byte;
 extern int param_set_byte(const char *val, const struct kernel_param *kp);
 extern int param_get_byte(char *buffer, const struct kernel_param *kp);
 #define param_check_byte(name, p) __param_check(name, p, unsigned char)
 
-extern struct kernel_param_ops param_ops_short;
+extern const struct kernel_param_ops param_ops_short;
 extern int param_set_short(const char *val, const struct kernel_param *kp);
 extern int param_get_short(char *buffer, const struct kernel_param *kp);
 #define param_check_short(name, p) __param_check(name, p, short)
 
-extern struct kernel_param_ops param_ops_ushort;
+extern const struct kernel_param_ops param_ops_ushort;
 extern int param_set_ushort(const char *val, const struct kernel_param *kp);
 extern int param_get_ushort(char *buffer, const struct kernel_param *kp);
 #define param_check_ushort(name, p) __param_check(name, p, unsigned short)
 
-extern struct kernel_param_ops param_ops_int;
+extern const struct kernel_param_ops param_ops_int;
 extern int param_set_int(const char *val, const struct kernel_param *kp);
 extern int param_get_int(char *buffer, const struct kernel_param *kp);
 #define param_check_int(name, p) __param_check(name, p, int)
 
-extern struct kernel_param_ops param_ops_uint;
+extern const struct kernel_param_ops param_ops_uint;
 extern int param_set_uint(const char *val, const struct kernel_param *kp);
 extern int param_get_uint(char *buffer, const struct kernel_param *kp);
 #define param_check_uint(name, p) __param_check(name, p, unsigned int)
 
-extern struct kernel_param_ops param_ops_long;
+extern const struct kernel_param_ops param_ops_long;
 extern int param_set_long(const char *val, const struct kernel_param *kp);
 extern int param_get_long(char *buffer, const struct kernel_param *kp);
 #define param_check_long(name, p) __param_check(name, p, long)
 
-extern struct kernel_param_ops param_ops_ulong;
+extern const struct kernel_param_ops param_ops_ulong;
 extern int param_set_ulong(const char *val, const struct kernel_param *kp);
 extern int param_get_ulong(char *buffer, const struct kernel_param *kp);
 #define param_check_ulong(name, p) __param_check(name, p, unsigned long)
 
-extern struct kernel_param_ops param_ops_ullong;
+extern const struct kernel_param_ops param_ops_ullong;
 extern int param_set_ullong(const char *val, const struct kernel_param *kp);
 extern int param_get_ullong(char *buffer, const struct kernel_param *kp);
 #define param_check_ullong(name, p) __param_check(name, p, unsigned long long)
 
-extern struct kernel_param_ops param_ops_charp;
+extern const struct kernel_param_ops param_ops_charp;
 extern int param_set_charp(const char *val, const struct kernel_param *kp);
 extern int param_get_charp(char *buffer, const struct kernel_param *kp);
 #define param_check_charp(name, p) __param_check(name, p, char *)
 
 /* We used to allow int as well as bool.  We're taking that away! */
-extern struct kernel_param_ops param_ops_bool;
+extern const struct kernel_param_ops param_ops_bool;
 extern int param_set_bool(const char *val, const struct kernel_param *kp);
 extern int param_get_bool(char *buffer, const struct kernel_param *kp);
 #define param_check_bool(name, p) __param_check(name, p, bool)
 
-extern struct kernel_param_ops param_ops_invbool;
+extern const struct kernel_param_ops param_ops_bool_enable_only;
+extern int param_set_bool_enable_only(const char *val,
+                                     const struct kernel_param *kp);
+/* getter is the same as for the regular bool */
+#define param_check_bool_enable_only param_check_bool
+
+extern const struct kernel_param_ops param_ops_invbool;
 extern int param_set_invbool(const char *val, const struct kernel_param *kp);
 extern int param_get_invbool(char *buffer, const struct kernel_param *kp);
 #define param_check_invbool(name, p) __param_check(name, p, bool)
 
 /* An int, which can only be set like a bool (though it shows as an int). */
-extern struct kernel_param_ops param_ops_bint;
+extern const struct kernel_param_ops param_ops_bint;
 extern int param_set_bint(const char *val, const struct kernel_param *kp);
 #define param_get_bint param_get_int
 #define param_check_bint param_check_int
@@ -487,9 +450,9 @@ extern int param_set_bint(const char *val, const struct kernel_param *kp);
                            perm, -1, 0);                               \
        __MODULE_PARM_TYPE(name, "array of " #type)
 
-extern struct kernel_param_ops param_array_ops;
+extern const struct kernel_param_ops param_array_ops;
 
-extern struct kernel_param_ops param_ops_string;
+extern const struct kernel_param_ops param_ops_string;
 extern int param_set_copystring(const char *val, const struct kernel_param *);
 extern int param_get_string(char *buffer, const struct kernel_param *kp);
 
index b871ff9d81d7207333fa021e6a95cb6bdbcf34ac..f05fdcea4e6639310e4aa0e085d0df5a59b59296 100644 (file)
@@ -128,7 +128,7 @@ static inline bool is_of_node(struct fwnode_handle *fwnode)
        return fwnode && fwnode->type == FWNODE_OF;
 }
 
-static inline struct device_node *of_node(struct fwnode_handle *fwnode)
+static inline struct device_node *to_of_node(struct fwnode_handle *fwnode)
 {
        return fwnode ? container_of(fwnode, struct device_node, fwnode) : NULL;
 }
@@ -387,7 +387,7 @@ static inline bool is_of_node(struct fwnode_handle *fwnode)
        return false;
 }
 
-static inline struct device_node *of_node(struct fwnode_handle *fwnode)
+static inline struct device_node *to_of_node(struct fwnode_handle *fwnode)
 {
        return NULL;
 }
index fb31765e935a0590ae0a6d82dda16a4a89dc36a4..830c4992088d5806125577723c6a7c1272161689 100644 (file)
@@ -31,6 +31,7 @@
 
 #include <linux/kernel.h>
 #include <linux/stddef.h>
+#include <linux/rcupdate.h>
 
 struct rb_node {
        unsigned long  __rb_parent_color;
@@ -73,11 +74,11 @@ extern struct rb_node *rb_first_postorder(const struct rb_root *);
 extern struct rb_node *rb_next_postorder(const struct rb_node *);
 
 /* Fast replacement of a single node without remove/rebalance/add/rebalance */
-extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, 
+extern void rb_replace_node(struct rb_node *victim, struct rb_node *new,
                            struct rb_root *root);
 
-static inline void rb_link_node(struct rb_node * node, struct rb_node * parent,
-                               struct rb_node ** rb_link)
+static inline void rb_link_node(struct rb_node *node, struct rb_node *parent,
+                               struct rb_node **rb_link)
 {
        node->__rb_parent_color = (unsigned long)parent;
        node->rb_left = node->rb_right = NULL;
@@ -85,6 +86,15 @@ static inline void rb_link_node(struct rb_node * node, struct rb_node * parent,
        *rb_link = node;
 }
 
+static inline void rb_link_node_rcu(struct rb_node *node, struct rb_node *parent,
+                                   struct rb_node **rb_link)
+{
+       node->__rb_parent_color = (unsigned long)parent;
+       node->rb_left = node->rb_right = NULL;
+
+       rcu_assign_pointer(*rb_link, node);
+}
+
 #define rb_entry_safe(ptr, type, member) \
        ({ typeof(ptr) ____ptr = (ptr); \
           ____ptr ? rb_entry(____ptr, type, member) : NULL; \
index 378c5ee75f78f5e1a521d87afeaf13fe21999af5..14d7b831b63a8377dce8161f3bf0be55bf385258 100644 (file)
@@ -123,11 +123,11 @@ __rb_change_child(struct rb_node *old, struct rb_node *new,
 {
        if (parent) {
                if (parent->rb_left == old)
-                       parent->rb_left = new;
+                       WRITE_ONCE(parent->rb_left, new);
                else
-                       parent->rb_right = new;
+                       WRITE_ONCE(parent->rb_right, new);
        } else
-               root->rb_node = new;
+               WRITE_ONCE(root->rb_node, new);
 }
 
 extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root,
@@ -137,7 +137,8 @@ static __always_inline struct rb_node *
 __rb_erase_augmented(struct rb_node *node, struct rb_root *root,
                     const struct rb_augment_callbacks *augment)
 {
-       struct rb_node *child = node->rb_right, *tmp = node->rb_left;
+       struct rb_node *child = node->rb_right;
+       struct rb_node *tmp = node->rb_left;
        struct rb_node *parent, *rebalance;
        unsigned long pc;
 
@@ -167,6 +168,7 @@ __rb_erase_augmented(struct rb_node *node, struct rb_root *root,
                tmp = parent;
        } else {
                struct rb_node *successor = child, *child2;
+
                tmp = child->rb_left;
                if (!tmp) {
                        /*
@@ -180,6 +182,7 @@ __rb_erase_augmented(struct rb_node *node, struct rb_root *root,
                         */
                        parent = successor;
                        child2 = successor->rb_right;
+
                        augment->copy(node, successor);
                } else {
                        /*
@@ -201,19 +204,23 @@ __rb_erase_augmented(struct rb_node *node, struct rb_root *root,
                                successor = tmp;
                                tmp = tmp->rb_left;
                        } while (tmp);
-                       parent->rb_left = child2 = successor->rb_right;
-                       successor->rb_right = child;
+                       child2 = successor->rb_right;
+                       WRITE_ONCE(parent->rb_left, child2);
+                       WRITE_ONCE(successor->rb_right, child);
                        rb_set_parent(child, successor);
+
                        augment->copy(node, successor);
                        augment->propagate(parent, successor);
                }
 
-               successor->rb_left = tmp = node->rb_left;
+               tmp = node->rb_left;
+               WRITE_ONCE(successor->rb_left, tmp);
                rb_set_parent(tmp, successor);
 
                pc = node->__rb_parent_color;
                tmp = __rb_parent(pc);
                __rb_change_child(node, successor, tmp, root);
+
                if (child2) {
                        successor->__rb_parent_color = pc;
                        rb_set_parent_color(child2, parent, RB_BLACK);
diff --git a/include/linux/rbtree_latch.h b/include/linux/rbtree_latch.h
new file mode 100644 (file)
index 0000000..4f3432c
--- /dev/null
@@ -0,0 +1,212 @@
+/*
+ * Latched RB-trees
+ *
+ * Copyright (C) 2015 Intel Corp., Peter Zijlstra <peterz@infradead.org>
+ *
+ * Since RB-trees have non-atomic modifications they're not immediately suited
+ * for RCU/lockless queries. Even though we made RB-tree lookups non-fatal for
+ * lockless lookups; we cannot guarantee they return a correct result.
+ *
+ * The simplest solution is a seqlock + RB-tree, this will allow lockless
+ * lookups; but has the constraint (inherent to the seqlock) that read sides
+ * cannot nest in write sides.
+ *
+ * If we need to allow unconditional lookups (say as required for NMI context
+ * usage) we need a more complex setup; this data structure provides this by
+ * employing the latch technique -- see @raw_write_seqcount_latch -- to
+ * implement a latched RB-tree which does allow for unconditional lookups by
+ * virtue of always having (at least) one stable copy of the tree.
+ *
+ * However, while we have the guarantee that there is at all times one stable
+ * copy, this does not guarantee an iteration will not observe modifications.
+ * What might have been a stable copy at the start of the iteration, need not
+ * remain so for the duration of the iteration.
+ *
+ * Therefore, this does require a lockless RB-tree iteration to be non-fatal;
+ * see the comment in lib/rbtree.c. Note however that we only require the first
+ * condition -- not seeing partial stores -- because the latch thing isolates
+ * us from loops. If we were to interrupt a modification the lookup would be
+ * pointed at the stable tree and complete while the modification was halted.
+ */
+
+#ifndef RB_TREE_LATCH_H
+#define RB_TREE_LATCH_H
+
+#include <linux/rbtree.h>
+#include <linux/seqlock.h>
+
+struct latch_tree_node {
+       struct rb_node node[2];
+};
+
+struct latch_tree_root {
+       seqcount_t      seq;
+       struct rb_root  tree[2];
+};
+
+/**
+ * latch_tree_ops - operators to define the tree order
+ * @less: used for insertion; provides the (partial) order between two elements.
+ * @comp: used for lookups; provides the order between the search key and an element.
+ *
+ * The operators are related like:
+ *
+ *     comp(a->key,b) < 0  := less(a,b)
+ *     comp(a->key,b) > 0  := less(b,a)
+ *     comp(a->key,b) == 0 := !less(a,b) && !less(b,a)
+ *
+ * If these operators define a partial order on the elements we make no
+ * guarantee on which of the elements matching the key is found. See
+ * latch_tree_find().
+ */
+struct latch_tree_ops {
+       bool (*less)(struct latch_tree_node *a, struct latch_tree_node *b);
+       int  (*comp)(void *key,                 struct latch_tree_node *b);
+};
+
+static __always_inline struct latch_tree_node *
+__lt_from_rb(struct rb_node *node, int idx)
+{
+       return container_of(node, struct latch_tree_node, node[idx]);
+}
+
+static __always_inline void
+__lt_insert(struct latch_tree_node *ltn, struct latch_tree_root *ltr, int idx,
+           bool (*less)(struct latch_tree_node *a, struct latch_tree_node *b))
+{
+       struct rb_root *root = &ltr->tree[idx];
+       struct rb_node **link = &root->rb_node;
+       struct rb_node *node = &ltn->node[idx];
+       struct rb_node *parent = NULL;
+       struct latch_tree_node *ltp;
+
+       while (*link) {
+               parent = *link;
+               ltp = __lt_from_rb(parent, idx);
+
+               if (less(ltn, ltp))
+                       link = &parent->rb_left;
+               else
+                       link = &parent->rb_right;
+       }
+
+       rb_link_node_rcu(node, parent, link);
+       rb_insert_color(node, root);
+}
+
+static __always_inline void
+__lt_erase(struct latch_tree_node *ltn, struct latch_tree_root *ltr, int idx)
+{
+       rb_erase(&ltn->node[idx], &ltr->tree[idx]);
+}
+
+static __always_inline struct latch_tree_node *
+__lt_find(void *key, struct latch_tree_root *ltr, int idx,
+         int (*comp)(void *key, struct latch_tree_node *node))
+{
+       struct rb_node *node = rcu_dereference_raw(ltr->tree[idx].rb_node);
+       struct latch_tree_node *ltn;
+       int c;
+
+       while (node) {
+               ltn = __lt_from_rb(node, idx);
+               c = comp(key, ltn);
+
+               if (c < 0)
+                       node = rcu_dereference_raw(node->rb_left);
+               else if (c > 0)
+                       node = rcu_dereference_raw(node->rb_right);
+               else
+                       return ltn;
+       }
+
+       return NULL;
+}
+
+/**
+ * latch_tree_insert() - insert @node into the trees @root
+ * @node: nodes to insert
+ * @root: trees to insert @node into
+ * @ops: operators defining the node order
+ *
+ * It inserts @node into @root in an ordered fashion such that we can always
+ * observe one complete tree. See the comment for raw_write_seqcount_latch().
+ *
+ * The inserts use rcu_assign_pointer() to publish the element such that the
+ * tree structure is stored before we can observe the new @node.
+ *
+ * All modifications (latch_tree_insert, latch_tree_remove) are assumed to be
+ * serialized.
+ */
+static __always_inline void
+latch_tree_insert(struct latch_tree_node *node,
+                 struct latch_tree_root *root,
+                 const struct latch_tree_ops *ops)
+{
+       raw_write_seqcount_latch(&root->seq);
+       __lt_insert(node, root, 0, ops->less);
+       raw_write_seqcount_latch(&root->seq);
+       __lt_insert(node, root, 1, ops->less);
+}
+
+/**
+ * latch_tree_erase() - removes @node from the trees @root
+ * @node: nodes to remote
+ * @root: trees to remove @node from
+ * @ops: operators defining the node order
+ *
+ * Removes @node from the trees @root in an ordered fashion such that we can
+ * always observe one complete tree. See the comment for
+ * raw_write_seqcount_latch().
+ *
+ * It is assumed that @node will observe one RCU quiescent state before being
+ * reused of freed.
+ *
+ * All modifications (latch_tree_insert, latch_tree_remove) are assumed to be
+ * serialized.
+ */
+static __always_inline void
+latch_tree_erase(struct latch_tree_node *node,
+                struct latch_tree_root *root,
+                const struct latch_tree_ops *ops)
+{
+       raw_write_seqcount_latch(&root->seq);
+       __lt_erase(node, root, 0);
+       raw_write_seqcount_latch(&root->seq);
+       __lt_erase(node, root, 1);
+}
+
+/**
+ * latch_tree_find() - find the node matching @key in the trees @root
+ * @key: search key
+ * @root: trees to search for @key
+ * @ops: operators defining the node order
+ *
+ * Does a lockless lookup in the trees @root for the node matching @key.
+ *
+ * It is assumed that this is called while holding the appropriate RCU read
+ * side lock.
+ *
+ * If the operators define a partial order on the elements (there are multiple
+ * elements which have the same key value) it is undefined which of these
+ * elements will be found. Nor is it possible to iterate the tree to find
+ * further elements with the same key value.
+ *
+ * Returns: a pointer to the node matching @key or NULL.
+ */
+static __always_inline struct latch_tree_node *
+latch_tree_find(void *key, struct latch_tree_root *root,
+               const struct latch_tree_ops *ops)
+{
+       struct latch_tree_node *node;
+       unsigned int seq;
+
+       do {
+               seq = raw_read_seqcount_latch(&root->seq);
+               node = __lt_find(key, root, seq & 1, ops->comp);
+       } while (read_seqcount_retry(&root->seq, seq));
+
+       return node;
+}
+
+#endif /* RB_TREE_LATCH_H */
index 33a056bb886faeedeb9690faefd3a4adeeedd14b..4cf5f51b4c9c43c2900d8fd5fad2ea93f873b33b 100644 (file)
@@ -632,21 +632,6 @@ static inline void rcu_preempt_sleep_check(void)
  */
 #define RCU_INITIALIZER(v) (typeof(*(v)) __force __rcu *)(v)
 
-/**
- * lockless_dereference() - safely load a pointer for later dereference
- * @p: The pointer to load
- *
- * Similar to rcu_dereference(), but for situations where the pointed-to
- * object's lifetime is managed by something other than RCU.  That
- * "something other" might be reference counting or simple immortality.
- */
-#define lockless_dereference(p) \
-({ \
-       typeof(p) _________p1 = READ_ONCE(p); \
-       smp_read_barrier_depends(); /* Dependency order vs. p above. */ \
-       (_________p1); \
-})
-
 /**
  * rcu_assign_pointer() - assign to RCU-protected pointer
  * @p: pointer to assign to
index 486e685a226a82d5cb841e61fb2ebf1562c5adb7..e0582106ef4faba81db1ff7912246623c3f7f1c2 100644 (file)
@@ -35,6 +35,7 @@
 #include <linux/spinlock.h>
 #include <linux/preempt.h>
 #include <linux/lockdep.h>
+#include <linux/compiler.h>
 #include <asm/processor.h>
 
 /*
@@ -274,9 +275,87 @@ static inline void raw_write_seqcount_barrier(seqcount_t *s)
        s->sequence++;
 }
 
-/*
+static inline int raw_read_seqcount_latch(seqcount_t *s)
+{
+       return lockless_dereference(s->sequence);
+}
+
+/**
  * raw_write_seqcount_latch - redirect readers to even/odd copy
  * @s: pointer to seqcount_t
+ *
+ * The latch technique is a multiversion concurrency control method that allows
+ * queries during non-atomic modifications. If you can guarantee queries never
+ * interrupt the modification -- e.g. the concurrency is strictly between CPUs
+ * -- you most likely do not need this.
+ *
+ * Where the traditional RCU/lockless data structures rely on atomic
+ * modifications to ensure queries observe either the old or the new state the
+ * latch allows the same for non-atomic updates. The trade-off is doubling the
+ * cost of storage; we have to maintain two copies of the entire data
+ * structure.
+ *
+ * Very simply put: we first modify one copy and then the other. This ensures
+ * there is always one copy in a stable state, ready to give us an answer.
+ *
+ * The basic form is a data structure like:
+ *
+ * struct latch_struct {
+ *     seqcount_t              seq;
+ *     struct data_struct      data[2];
+ * };
+ *
+ * Where a modification, which is assumed to be externally serialized, does the
+ * following:
+ *
+ * void latch_modify(struct latch_struct *latch, ...)
+ * {
+ *     smp_wmb();      <- Ensure that the last data[1] update is visible
+ *     latch->seq++;
+ *     smp_wmb();      <- Ensure that the seqcount update is visible
+ *
+ *     modify(latch->data[0], ...);
+ *
+ *     smp_wmb();      <- Ensure that the data[0] update is visible
+ *     latch->seq++;
+ *     smp_wmb();      <- Ensure that the seqcount update is visible
+ *
+ *     modify(latch->data[1], ...);
+ * }
+ *
+ * The query will have a form like:
+ *
+ * struct entry *latch_query(struct latch_struct *latch, ...)
+ * {
+ *     struct entry *entry;
+ *     unsigned seq, idx;
+ *
+ *     do {
+ *             seq = lockless_dereference(latch->seq);
+ *
+ *             idx = seq & 0x01;
+ *             entry = data_query(latch->data[idx], ...);
+ *
+ *             smp_rmb();
+ *     } while (seq != latch->seq);
+ *
+ *     return entry;
+ * }
+ *
+ * So during the modification, queries are first redirected to data[1]. Then we
+ * modify data[0]. When that is complete, we redirect queries back to data[0]
+ * and we can modify data[1].
+ *
+ * NOTE: The non-requirement for atomic modifications does _NOT_ include
+ *       the publishing of new entries in the case where data is a dynamic
+ *       data structure.
+ *
+ *       An iteration might start in data[0] and get suspended long enough
+ *       to miss an entire modification sequence, once it resumes it might
+ *       observe the new entry.
+ *
+ * NOTE: When data is a dynamic data structure; one should use regular RCU
+ *       patterns to manage the lifetimes of the objects within.
  */
 static inline void raw_write_seqcount_latch(seqcount_t *s)
 {
index 16a923a3a43a8825d167b75025baca0b1b5d741c..e602f8177ebfbf3148bce0734a87ecac1b0eb590 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/slab.h>
 #include <linux/atomic.h>
 #include <net/neighbour.h>
+#include <net/sock.h>
 
 #define        AX25_T1CLAMPLO                  1
 #define        AX25_T1CLAMPHI                  (30 * HZ)
@@ -246,7 +247,20 @@ typedef struct ax25_cb {
        atomic_t                refcount;
 } ax25_cb;
 
-#define ax25_sk(__sk) ((ax25_cb *)(__sk)->sk_protinfo)
+struct ax25_sock {
+       struct sock             sk;
+       struct ax25_cb          *cb;
+};
+
+static inline struct ax25_sock *ax25_sk(const struct sock *sk)
+{
+       return (struct ax25_sock *) sk;
+}
+
+static inline struct ax25_cb *sk_to_ax25(const struct sock *sk)
+{
+       return ax25_sk(sk)->cb;
+}
 
 #define ax25_for_each(__ax25, list) \
        hlist_for_each_entry(__ax25, list, ax25_node)
index 14d539c040d70dfebaa5dae16312fc48536559a7..05a8c1aea25187c1692efcb1e350bb2c7f75a30b 100644 (file)
@@ -277,7 +277,6 @@ struct cg_proto;
   *    @sk_incoming_cpu: record cpu processing incoming packets
   *    @sk_txhash: computed flow hash for use on transmit
   *    @sk_filter: socket filtering instructions
-  *    @sk_protinfo: private area, net family specific, when not using slab
   *    @sk_timer: sock cleanup timer
   *    @sk_stamp: time stamp of last packet received
   *    @sk_tsflags: SO_TIMESTAMPING socket options
@@ -416,7 +415,6 @@ struct sock {
        const struct cred       *sk_peer_cred;
        long                    sk_rcvtimeo;
        long                    sk_sndtimeo;
-       void                    *sk_protinfo;
        struct timer_list       sk_timer;
        ktime_t                 sk_stamp;
        u16                     sk_tsflags;
index 7f79cf459591893c775e3520970a88c6bf572842..0b73af9be12f467d8b838c278297e30e273dbc49 100644 (file)
@@ -1117,61 +1117,6 @@ DEFINE_EVENT(btrfs__workqueue_done, btrfs_workqueue_destroy,
        TP_ARGS(wq)
 );
 
-#define show_oper_type(type)                                           \
-       __print_symbolic(type,                                          \
-               { BTRFS_QGROUP_OPER_ADD_EXCL,   "OPER_ADD_EXCL" },      \
-               { BTRFS_QGROUP_OPER_ADD_SHARED, "OPER_ADD_SHARED" },    \
-               { BTRFS_QGROUP_OPER_SUB_EXCL,   "OPER_SUB_EXCL" },      \
-               { BTRFS_QGROUP_OPER_SUB_SHARED, "OPER_SUB_SHARED" })
-
-DECLARE_EVENT_CLASS(btrfs_qgroup_oper,
-
-       TP_PROTO(struct btrfs_qgroup_operation *oper),
-
-       TP_ARGS(oper),
-
-       TP_STRUCT__entry(
-               __field(        u64,  ref_root          )
-               __field(        u64,  bytenr            )
-               __field(        u64,  num_bytes         )
-               __field(        u64,  seq               )
-               __field(        int,  type              )
-               __field(        u64,  elem_seq          )
-       ),
-
-       TP_fast_assign(
-               __entry->ref_root       = oper->ref_root;
-               __entry->bytenr         = oper->bytenr,
-               __entry->num_bytes      = oper->num_bytes;
-               __entry->seq            = oper->seq;
-               __entry->type           = oper->type;
-               __entry->elem_seq       = oper->elem.seq;
-       ),
-
-       TP_printk("ref_root = %llu, bytenr = %llu, num_bytes = %llu, "
-                 "seq = %llu, elem.seq = %llu, type = %s",
-                 (unsigned long long)__entry->ref_root,
-                 (unsigned long long)__entry->bytenr,
-                 (unsigned long long)__entry->num_bytes,
-                 (unsigned long long)__entry->seq,
-                 (unsigned long long)__entry->elem_seq,
-                 show_oper_type(__entry->type))
-);
-
-DEFINE_EVENT(btrfs_qgroup_oper, btrfs_qgroup_account,
-
-       TP_PROTO(struct btrfs_qgroup_operation *oper),
-
-       TP_ARGS(oper)
-);
-
-DEFINE_EVENT(btrfs_qgroup_oper, btrfs_qgroup_record_ref,
-
-       TP_PROTO(struct btrfs_qgroup_operation *oper),
-
-       TP_ARGS(oper)
-);
-
 #endif /* _TRACE_BTRFS_H */
 
 /* This part must be outside protection */
index 83d6236a2f083d787f4ed887b71aa41c76330d9e..eaf94919291aaf78419a5f43b17d66eb07f12600 100644 (file)
 #define _UAPI_LINUX_IN_H
 
 #include <linux/types.h>
+#include <linux/libc-compat.h>
 #include <linux/socket.h>
 
+#if __UAPI_DEF_IN_IPPROTO
 /* Standard well-defined IP protocols.  */
 enum {
   IPPROTO_IP = 0,              /* Dummy protocol for TCP               */
@@ -75,12 +77,14 @@ enum {
 #define IPPROTO_RAW            IPPROTO_RAW
   IPPROTO_MAX
 };
+#endif
 
-
+#if __UAPI_DEF_IN_ADDR
 /* Internet address. */
 struct in_addr {
        __be32  s_addr;
 };
+#endif
 
 #define IP_TOS         1
 #define IP_TTL         2
@@ -158,6 +162,7 @@ struct in_addr {
 
 /* Request struct for multicast socket ops */
 
+#if __UAPI_DEF_IP_MREQ
 struct ip_mreq  {
        struct in_addr imr_multiaddr;   /* IP multicast address of group */
        struct in_addr imr_interface;   /* local IP address of interface */
@@ -209,14 +214,18 @@ struct group_filter {
 #define GROUP_FILTER_SIZE(numsrc) \
        (sizeof(struct group_filter) - sizeof(struct __kernel_sockaddr_storage) \
        + (numsrc) * sizeof(struct __kernel_sockaddr_storage))
+#endif
 
+#if __UAPI_DEF_IN_PKTINFO
 struct in_pktinfo {
        int             ipi_ifindex;
        struct in_addr  ipi_spec_dst;
        struct in_addr  ipi_addr;
 };
+#endif
 
 /* Structure describing an Internet (IP) socket address. */
+#if  __UAPI_DEF_SOCKADDR_IN
 #define __SOCK_SIZE__  16              /* sizeof(struct sockaddr)      */
 struct sockaddr_in {
   __kernel_sa_family_t sin_family;     /* Address family               */
@@ -228,8 +237,9 @@ struct sockaddr_in {
                        sizeof(unsigned short int) - sizeof(struct in_addr)];
 };
 #define sin_zero       __pad           /* for BSD UNIX comp. -FvK      */
+#endif
 
-
+#if __UAPI_DEF_IN_CLASS
 /*
  * Definitions of the bits in an Internet address integer.
  * On subnets, host and network parts are found according
@@ -280,7 +290,7 @@ struct sockaddr_in {
 #define INADDR_ALLHOSTS_GROUP  0xe0000001U     /* 224.0.0.1   */
 #define INADDR_ALLRTRS_GROUP    0xe0000002U    /* 224.0.0.2 */
 #define INADDR_MAX_LOCAL_GROUP  0xe00000ffU    /* 224.0.0.255 */
-
+#endif
 
 /* <asm/byteorder.h> contains the htonl type stuff.. */
 #include <asm/byteorder.h> 
index fa673e9cc040aefcee4e96ee3e4bd6892c5be562..7d024ceb075d8d4cd657c1c25db37a748940fc89 100644 (file)
 
 /* GLIBC headers included first so don't define anything
  * that would already be defined. */
+#define __UAPI_DEF_IN_ADDR             0
+#define __UAPI_DEF_IN_IPPROTO          0
+#define __UAPI_DEF_IN_PKTINFO          0
+#define __UAPI_DEF_IP_MREQ             0
+#define __UAPI_DEF_SOCKADDR_IN         0
+#define __UAPI_DEF_IN_CLASS            0
+
 #define __UAPI_DEF_IN6_ADDR            0
 /* The exception is the in6_addr macros which must be defined
  * if the glibc code didn't define them. This guard matches
 /* Linux headers included first, and we must define everything
  * we need. The expectation is that glibc will check the
  * __UAPI_DEF_* defines and adjust appropriately. */
+#define __UAPI_DEF_IN_ADDR             1
+#define __UAPI_DEF_IN_IPPROTO          1
+#define __UAPI_DEF_IN_PKTINFO          1
+#define __UAPI_DEF_IP_MREQ             1
+#define __UAPI_DEF_SOCKADDR_IN         1
+#define __UAPI_DEF_IN_CLASS            1
+
 #define __UAPI_DEF_IN6_ADDR            1
 /* We unconditionally define the in6_addr macros and glibc must
  * coordinate. */
  * that we need. */
 #else /* !defined(__GLIBC__) */
 
+/* Definitions for in.h */
+#define __UAPI_DEF_IN_ADDR             1
+#define __UAPI_DEF_IN_IPPROTO          1
+#define __UAPI_DEF_IN_PKTINFO          1
+#define __UAPI_DEF_IP_MREQ             1
+#define __UAPI_DEF_SOCKADDR_IN         1
+#define __UAPI_DEF_IN_CLASS            1
+
 /* Definitions for in6.h */
 #define __UAPI_DEF_IN6_ADDR            1
 #define __UAPI_DEF_IN6_ADDR_ALT                1
index 7d1ffd2ae536996e45a8ae5a88981d7e32c65e15..bcc41bd19999276bcca2c8d1fd29644882c5f5fb 100644 (file)
@@ -1941,26 +1941,21 @@ config MODULE_COMPRESS
        bool "Compress modules on installation"
        depends on MODULES
        help
-         This option compresses the kernel modules when 'make
-         modules_install' is run.
 
-         The modules will be compressed either using gzip or xz depend on the
-         choice made in "Compression algorithm".
+         Compresses kernel modules when 'make modules_install' is run; gzip or
+         xz depending on "Compression algorithm" below.
 
-         module-init-tools has support for gzip format while kmod handle gzip
-         and xz compressed modules.
+         module-init-tools MAY support gzip, and kmod MAY support gzip and xz.
 
-         When a kernel module is installed from outside of the main kernel
-         source and uses the Kbuild system for installing modules then that
-         kernel module will also be compressed when it is installed.
+         Out-of-tree kernel modules installed using Kbuild will also be
+         compressed upon installation.
 
-         This option provides little benefit when the modules are to be used inside
-         an initrd or initramfs, it generally is more efficient to compress the whole
-         initrd or initramfs instead.
+         Note: for modules inside an initrd or initramfs, it's more efficient
+         to compress the whole initrd or initramfs instead.
 
-         This is fully compatible with signed modules while the signed module is
-         compressed. module-init-tools or kmod handles decompression and provide to
-         other layer the uncompressed but signed payload.
+         Note: This is fully compatible with signed modules.
+
+         If in doubt, say N.
 
 choice
        prompt "Compression algorithm"
@@ -1982,6 +1977,10 @@ endchoice
 
 endif # MODULES
 
+config MODULES_TREE_LOOKUP
+       def_bool y
+       depends on PERF_EVENTS || TRACING
+
 config INIT_ALL_POSSIBLE
        bool
        help
diff --git a/kernel/configs/xen.config b/kernel/configs/xen.config
new file mode 100644 (file)
index 0000000..ff75622
--- /dev/null
@@ -0,0 +1,48 @@
+# global stuff - these enable us to allow some
+# of the not so generic stuff below for xen
+CONFIG_PARAVIRT=y
+CONFIG_NET=y
+CONFIG_NET_CORE=y
+CONFIG_NETDEVICES=y
+CONFIG_BLOCK=y
+CONFIG_WATCHDOG=y
+CONFIG_TARGET_CORE=y
+CONFIG_SCSI=y
+CONFIG_FB=y
+CONFIG_INPUT_MISC=y
+CONFIG_MEMORY_HOTPLUG=y
+CONFIG_TTY=y
+# Technically not required but otherwise produces
+# pretty useless systems starting from allnoconfig
+# You want TCP/IP and ELF binaries right?
+CONFIG_INET=y
+CONFIG_BINFMT_ELF=y
+# generic config
+CONFIG_XEN=y
+CONFIG_XEN_DOM0=y
+# backend drivers
+CONFIG_XEN_BACKEND=y
+CONFIG_XEN_BLKDEV_BACKEND=m
+CONFIG_XEN_NETDEV_BACKEND=m
+CONFIG_HVC_XEN=y
+CONFIG_XEN_WDT=m
+CONFIG_XEN_SCSI_BACKEND=m
+# frontend drivers
+CONFIG_XEN_FBDEV_FRONTEND=m
+CONFIG_HVC_XEN_FRONTEND=y
+CONFIG_INPUT_XEN_KBDDEV_FRONTEND=m
+CONFIG_XEN_SCSI_FRONTEND=m
+# others
+CONFIG_XEN_BALLOON=y
+CONFIG_XEN_SCRUB_PAGES=y
+CONFIG_XEN_DEV_EVTCHN=m
+CONFIG_XEN_BLKDEV_FRONTEND=m
+CONFIG_XEN_NETDEV_FRONTEND=m
+CONFIG_XENFS=m
+CONFIG_XEN_COMPAT_XENFS=y
+CONFIG_XEN_SYS_HYPERVISOR=y
+CONFIG_XEN_XENBUS_FRONTEND=y
+CONFIG_XEN_GNTDEV=m
+CONFIG_XEN_GRANT_DEV_ALLOC=m
+CONFIG_SWIOTLB_XEN=y
+CONFIG_XEN_PRIVCMD=m
index 9019f15deab201127065e4d6987677fdcdd63676..52ebaca1b9fc16aaeba97873fadc9dc158f43f39 100644 (file)
@@ -302,7 +302,7 @@ static int jump_label_add_module(struct module *mod)
                        continue;
 
                key = iterk;
-               if (__module_address(iter->key) == mod) {
+               if (within_module(iter->key, mod)) {
                        /*
                         * Set key->entries to iter, but preserve JUMP_LABEL_TRUE_BRANCH.
                         */
@@ -339,7 +339,7 @@ static void jump_label_del_module(struct module *mod)
 
                key = (struct static_key *)(unsigned long)iter->key;
 
-               if (__module_address(iter->key) == mod)
+               if (within_module(iter->key, mod))
                        continue;
 
                prev = &key->next;
@@ -443,14 +443,16 @@ static void jump_label_update(struct static_key *key, int enable)
 {
        struct jump_entry *stop = __stop___jump_table;
        struct jump_entry *entry = jump_label_get_entries(key);
-
 #ifdef CONFIG_MODULES
-       struct module *mod = __module_address((unsigned long)key);
+       struct module *mod;
 
        __jump_label_mod_update(key, enable);
 
+       preempt_disable();
+       mod = __module_address((unsigned long)key);
        if (mod)
                stop = mod->jump_entries + mod->num_jump_entries;
+       preempt_enable();
 #endif
        /* if there are no users, entry can be NULL */
        if (entry)
index f80a97f7da1f14b96c87efaf82f9f8c88612ab54..3e0e19763d246a998acb109c6979e4df593e3958 100644 (file)
 DEFINE_MUTEX(module_mutex);
 EXPORT_SYMBOL_GPL(module_mutex);
 static LIST_HEAD(modules);
-#ifdef CONFIG_KGDB_KDB
-struct list_head *kdb_modules = &modules; /* kdb needs the list of modules */
-#endif /* CONFIG_KGDB_KDB */
 
-#ifdef CONFIG_MODULE_SIG
-#ifdef CONFIG_MODULE_SIG_FORCE
-static bool sig_enforce = true;
-#else
-static bool sig_enforce = false;
+#ifdef CONFIG_MODULES_TREE_LOOKUP
+
+/*
+ * Use a latched RB-tree for __module_address(); this allows us to use
+ * RCU-sched lookups of the address from any context.
+ *
+ * Because modules have two address ranges: init and core, we need two
+ * latch_tree_nodes entries. Therefore we need the back-pointer from
+ * mod_tree_node.
+ *
+ * Because init ranges are short lived we mark them unlikely and have placed
+ * them outside the critical cacheline in struct module.
+ *
+ * This is conditional on PERF_EVENTS || TRACING because those can really hit
+ * __module_address() hard by doing a lot of stack unwinding; potentially from
+ * NMI context.
+ */
 
-static int param_set_bool_enable_only(const char *val,
-                                     const struct kernel_param *kp)
+static __always_inline unsigned long __mod_tree_val(struct latch_tree_node *n)
 {
-       int err;
-       bool test;
-       struct kernel_param dummy_kp = *kp;
+       struct mod_tree_node *mtn = container_of(n, struct mod_tree_node, node);
+       struct module *mod = mtn->mod;
 
-       dummy_kp.arg = &test;
+       if (unlikely(mtn == &mod->mtn_init))
+               return (unsigned long)mod->module_init;
 
-       err = param_set_bool(val, &dummy_kp);
-       if (err)
-               return err;
+       return (unsigned long)mod->module_core;
+}
+
+static __always_inline unsigned long __mod_tree_size(struct latch_tree_node *n)
+{
+       struct mod_tree_node *mtn = container_of(n, struct mod_tree_node, node);
+       struct module *mod = mtn->mod;
 
-       /* Don't let them unset it once it's set! */
-       if (!test && sig_enforce)
-               return -EROFS;
+       if (unlikely(mtn == &mod->mtn_init))
+               return (unsigned long)mod->init_size;
+
+       return (unsigned long)mod->core_size;
+}
+
+static __always_inline bool
+mod_tree_less(struct latch_tree_node *a, struct latch_tree_node *b)
+{
+       return __mod_tree_val(a) < __mod_tree_val(b);
+}
+
+static __always_inline int
+mod_tree_comp(void *key, struct latch_tree_node *n)
+{
+       unsigned long val = (unsigned long)key;
+       unsigned long start, end;
+
+       start = __mod_tree_val(n);
+       if (val < start)
+               return -1;
+
+       end = start + __mod_tree_size(n);
+       if (val >= end)
+               return 1;
 
-       if (test)
-               sig_enforce = true;
        return 0;
 }
 
-static const struct kernel_param_ops param_ops_bool_enable_only = {
-       .flags = KERNEL_PARAM_OPS_FL_NOARG,
-       .set = param_set_bool_enable_only,
-       .get = param_get_bool,
+static const struct latch_tree_ops mod_tree_ops = {
+       .less = mod_tree_less,
+       .comp = mod_tree_comp,
 };
-#define param_check_bool_enable_only param_check_bool
 
+static struct mod_tree_root {
+       struct latch_tree_root root;
+       unsigned long addr_min;
+       unsigned long addr_max;
+} mod_tree __cacheline_aligned = {
+       .addr_min = -1UL,
+};
+
+#define module_addr_min mod_tree.addr_min
+#define module_addr_max mod_tree.addr_max
+
+static noinline void __mod_tree_insert(struct mod_tree_node *node)
+{
+       latch_tree_insert(&node->node, &mod_tree.root, &mod_tree_ops);
+}
+
+static void __mod_tree_remove(struct mod_tree_node *node)
+{
+       latch_tree_erase(&node->node, &mod_tree.root, &mod_tree_ops);
+}
+
+/*
+ * These modifications: insert, remove_init and remove; are serialized by the
+ * module_mutex.
+ */
+static void mod_tree_insert(struct module *mod)
+{
+       mod->mtn_core.mod = mod;
+       mod->mtn_init.mod = mod;
+
+       __mod_tree_insert(&mod->mtn_core);
+       if (mod->init_size)
+               __mod_tree_insert(&mod->mtn_init);
+}
+
+static void mod_tree_remove_init(struct module *mod)
+{
+       if (mod->init_size)
+               __mod_tree_remove(&mod->mtn_init);
+}
+
+static void mod_tree_remove(struct module *mod)
+{
+       __mod_tree_remove(&mod->mtn_core);
+       mod_tree_remove_init(mod);
+}
+
+static struct module *mod_find(unsigned long addr)
+{
+       struct latch_tree_node *ltn;
+
+       ltn = latch_tree_find((void *)addr, &mod_tree.root, &mod_tree_ops);
+       if (!ltn)
+               return NULL;
+
+       return container_of(ltn, struct mod_tree_node, node)->mod;
+}
+
+#else /* MODULES_TREE_LOOKUP */
+
+static unsigned long module_addr_min = -1UL, module_addr_max = 0;
+
+static void mod_tree_insert(struct module *mod) { }
+static void mod_tree_remove_init(struct module *mod) { }
+static void mod_tree_remove(struct module *mod) { }
+
+static struct module *mod_find(unsigned long addr)
+{
+       struct module *mod;
+
+       list_for_each_entry_rcu(mod, &modules, list) {
+               if (within_module(addr, mod))
+                       return mod;
+       }
+
+       return NULL;
+}
+
+#endif /* MODULES_TREE_LOOKUP */
+
+/*
+ * Bounds of module text, for speeding up __module_address.
+ * Protected by module_mutex.
+ */
+static void __mod_update_bounds(void *base, unsigned int size)
+{
+       unsigned long min = (unsigned long)base;
+       unsigned long max = min + size;
+
+       if (min < module_addr_min)
+               module_addr_min = min;
+       if (max > module_addr_max)
+               module_addr_max = max;
+}
+
+static void mod_update_bounds(struct module *mod)
+{
+       __mod_update_bounds(mod->module_core, mod->core_size);
+       if (mod->init_size)
+               __mod_update_bounds(mod->module_init, mod->init_size);
+}
+
+#ifdef CONFIG_KGDB_KDB
+struct list_head *kdb_modules = &modules; /* kdb needs the list of modules */
+#endif /* CONFIG_KGDB_KDB */
+
+static void module_assert_mutex(void)
+{
+       lockdep_assert_held(&module_mutex);
+}
+
+static void module_assert_mutex_or_preempt(void)
+{
+#ifdef CONFIG_LOCKDEP
+       if (unlikely(!debug_locks))
+               return;
+
+       WARN_ON(!rcu_read_lock_sched_held() &&
+               !lockdep_is_held(&module_mutex));
+#endif
+}
+
+static bool sig_enforce = IS_ENABLED(CONFIG_MODULE_SIG_FORCE);
+#ifndef CONFIG_MODULE_SIG_FORCE
 module_param(sig_enforce, bool_enable_only, 0644);
 #endif /* !CONFIG_MODULE_SIG_FORCE */
-#endif /* CONFIG_MODULE_SIG */
 
 /* Block module loading/unloading? */
 int modules_disabled = 0;
@@ -153,10 +306,6 @@ static DECLARE_WAIT_QUEUE_HEAD(module_wq);
 
 static BLOCKING_NOTIFIER_HEAD(module_notify_list);
 
-/* Bounds of module allocation, for speeding __module_address.
- * Protected by module_mutex. */
-static unsigned long module_addr_min = -1UL, module_addr_max = 0;
-
 int register_module_notifier(struct notifier_block *nb)
 {
        return blocking_notifier_chain_register(&module_notify_list, nb);
@@ -318,6 +467,8 @@ bool each_symbol_section(bool (*fn)(const struct symsearch *arr,
 #endif
        };
 
+       module_assert_mutex_or_preempt();
+
        if (each_symbol_in_section(arr, ARRAY_SIZE(arr), NULL, fn, data))
                return true;
 
@@ -457,6 +608,8 @@ static struct module *find_module_all(const char *name, size_t len,
 {
        struct module *mod;
 
+       module_assert_mutex();
+
        list_for_each_entry(mod, &modules, list) {
                if (!even_unformed && mod->state == MODULE_STATE_UNFORMED)
                        continue;
@@ -1169,11 +1322,17 @@ static inline int check_modstruct_version(Elf_Shdr *sechdrs,
 {
        const unsigned long *crc;
 
-       /* Since this should be found in kernel (which can't be removed),
-        * no locking is necessary. */
+       /*
+        * Since this should be found in kernel (which can't be removed), no
+        * locking is necessary -- use preempt_disable() to placate lockdep.
+        */
+       preempt_disable();
        if (!find_symbol(VMLINUX_SYMBOL_STR(module_layout), NULL,
-                        &crc, true, false))
+                        &crc, true, false)) {
+               preempt_enable();
                BUG();
+       }
+       preempt_enable();
        return check_version(sechdrs, versindex,
                             VMLINUX_SYMBOL_STR(module_layout), mod, crc,
                             NULL);
@@ -1661,6 +1820,10 @@ static void mod_sysfs_fini(struct module *mod)
        mod_kobject_put(mod);
 }
 
+static void init_param_lock(struct module *mod)
+{
+       mutex_init(&mod->param_lock);
+}
 #else /* !CONFIG_SYSFS */
 
 static int mod_sysfs_setup(struct module *mod,
@@ -1683,6 +1846,9 @@ static void del_usage_links(struct module *mod)
 {
 }
 
+static void init_param_lock(struct module *mod)
+{
+}
 #endif /* CONFIG_SYSFS */
 
 static void mod_sysfs_teardown(struct module *mod)
@@ -1852,10 +2018,11 @@ static void free_module(struct module *mod)
        mutex_lock(&module_mutex);
        /* Unlink carefully: kallsyms could be walking list. */
        list_del_rcu(&mod->list);
+       mod_tree_remove(mod);
        /* Remove this module from bug list, this uses list_del_rcu */
        module_bug_cleanup(mod);
-       /* Wait for RCU synchronizing before releasing mod->list and buglist. */
-       synchronize_rcu();
+       /* Wait for RCU-sched synchronizing before releasing mod->list and buglist. */
+       synchronize_sched();
        mutex_unlock(&module_mutex);
 
        /* This may be NULL, but that's OK */
@@ -2384,22 +2551,6 @@ void * __weak module_alloc(unsigned long size)
        return vmalloc_exec(size);
 }
 
-static void *module_alloc_update_bounds(unsigned long size)
-{
-       void *ret = module_alloc(size);
-
-       if (ret) {
-               mutex_lock(&module_mutex);
-               /* Update module bounds. */
-               if ((unsigned long)ret < module_addr_min)
-                       module_addr_min = (unsigned long)ret;
-               if ((unsigned long)ret + size > module_addr_max)
-                       module_addr_max = (unsigned long)ret + size;
-               mutex_unlock(&module_mutex);
-       }
-       return ret;
-}
-
 #ifdef CONFIG_DEBUG_KMEMLEAK
 static void kmemleak_load_module(const struct module *mod,
                                 const struct load_info *info)
@@ -2805,7 +2956,7 @@ static int move_module(struct module *mod, struct load_info *info)
        void *ptr;
 
        /* Do the allocs. */
-       ptr = module_alloc_update_bounds(mod->core_size);
+       ptr = module_alloc(mod->core_size);
        /*
         * The pointer to this block is stored in the module structure
         * which is inside the block. Just mark it as not being a
@@ -2819,7 +2970,7 @@ static int move_module(struct module *mod, struct load_info *info)
        mod->module_core = ptr;
 
        if (mod->init_size) {
-               ptr = module_alloc_update_bounds(mod->init_size);
+               ptr = module_alloc(mod->init_size);
                /*
                 * The pointer to this block is stored in the module structure
                 * which is inside the block. This block doesn't need to be
@@ -3119,6 +3270,7 @@ static noinline int do_init_module(struct module *mod)
        mod->symtab = mod->core_symtab;
        mod->strtab = mod->core_strtab;
 #endif
+       mod_tree_remove_init(mod);
        unset_module_init_ro_nx(mod);
        module_arch_freeing_init(mod);
        mod->module_init = NULL;
@@ -3127,11 +3279,11 @@ static noinline int do_init_module(struct module *mod)
        mod->init_text_size = 0;
        /*
         * We want to free module_init, but be aware that kallsyms may be
-        * walking this with preempt disabled.  In all the failure paths,
-        * we call synchronize_rcu/synchronize_sched, but we don't want
-        * to slow down the success path, so use actual RCU here.
+        * walking this with preempt disabled.  In all the failure paths, we
+        * call synchronize_sched(), but we don't want to slow down the success
+        * path, so use actual RCU here.
         */
-       call_rcu(&freeinit->rcu, do_free_init);
+       call_rcu_sched(&freeinit->rcu, do_free_init);
        mutex_unlock(&module_mutex);
        wake_up_all(&module_wq);
 
@@ -3188,7 +3340,9 @@ again:
                err = -EEXIST;
                goto out;
        }
+       mod_update_bounds(mod);
        list_add_rcu(&mod->list, &modules);
+       mod_tree_insert(mod);
        err = 0;
 
 out:
@@ -3304,6 +3458,8 @@ static int load_module(struct load_info *info, const char __user *uargs,
        if (err)
                goto unlink_mod;
 
+       init_param_lock(mod);
+
        /* Now we've got everything in the final locations, we can
         * find optional sections. */
        err = find_module_sections(mod, info);
@@ -3402,8 +3558,8 @@ static int load_module(struct load_info *info, const char __user *uargs,
        /* Unlink carefully: kallsyms could be walking list. */
        list_del_rcu(&mod->list);
        wake_up_all(&module_wq);
-       /* Wait for RCU synchronizing before releasing mod->list. */
-       synchronize_rcu();
+       /* Wait for RCU-sched synchronizing before releasing mod->list. */
+       synchronize_sched();
        mutex_unlock(&module_mutex);
  free_module:
        /* Free lock-classes; relies on the preceding sync_rcu() */
@@ -3527,19 +3683,15 @@ const char *module_address_lookup(unsigned long addr,
                            char **modname,
                            char *namebuf)
 {
-       struct module *mod;
        const char *ret = NULL;
+       struct module *mod;
 
        preempt_disable();
-       list_for_each_entry_rcu(mod, &modules, list) {
-               if (mod->state == MODULE_STATE_UNFORMED)
-                       continue;
-               if (within_module(addr, mod)) {
-                       if (modname)
-                               *modname = mod->name;
-                       ret = get_ksymbol(mod, addr, size, offset);
-                       break;
-               }
+       mod = __module_address(addr);
+       if (mod) {
+               if (modname)
+                       *modname = mod->name;
+               ret = get_ksymbol(mod, addr, size, offset);
        }
        /* Make a copy in here where it's safe */
        if (ret) {
@@ -3547,6 +3699,7 @@ const char *module_address_lookup(unsigned long addr,
                ret = namebuf;
        }
        preempt_enable();
+
        return ret;
 }
 
@@ -3670,6 +3823,8 @@ int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *,
        unsigned int i;
        int ret;
 
+       module_assert_mutex();
+
        list_for_each_entry(mod, &modules, list) {
                if (mod->state == MODULE_STATE_UNFORMED)
                        continue;
@@ -3844,13 +3999,15 @@ struct module *__module_address(unsigned long addr)
        if (addr < module_addr_min || addr > module_addr_max)
                return NULL;
 
-       list_for_each_entry_rcu(mod, &modules, list) {
+       module_assert_mutex_or_preempt();
+
+       mod = mod_find(addr);
+       if (mod) {
+               BUG_ON(!within_module(addr, mod));
                if (mod->state == MODULE_STATE_UNFORMED)
-                       continue;
-               if (within_module(addr, mod))
-                       return mod;
+                       mod = NULL;
        }
-       return NULL;
+       return mod;
 }
 EXPORT_SYMBOL_GPL(__module_address);
 
index 30288c1e15dd1afe441f7d4dd0cd8f8b288eed28..b6554aa71094473094a97010ec4e3f739f92b5c8 100644 (file)
 #include <linux/slab.h>
 #include <linux/ctype.h>
 
-/* Protects all parameters, and incidentally kmalloced_param list. */
+#ifdef CONFIG_SYSFS
+/* Protects all built-in parameters, modules use their own param_lock */
 static DEFINE_MUTEX(param_lock);
 
+/* Use the module's mutex, or if built-in use the built-in mutex */
+#ifdef CONFIG_MODULES
+#define KPARAM_MUTEX(mod)      ((mod) ? &(mod)->param_lock : &param_lock)
+#else
+#define KPARAM_MUTEX(mod)      (&param_lock)
+#endif
+
+static inline void check_kparam_locked(struct module *mod)
+{
+       BUG_ON(!mutex_is_locked(KPARAM_MUTEX(mod)));
+}
+#else
+static inline void check_kparam_locked(struct module *mod)
+{
+}
+#endif /* !CONFIG_SYSFS */
+
 /* This just allows us to keep track of which parameters are kmalloced. */
 struct kmalloced_param {
        struct list_head list;
        char val[];
 };
 static LIST_HEAD(kmalloced_params);
+static DEFINE_SPINLOCK(kmalloced_params_lock);
 
 static void *kmalloc_parameter(unsigned int size)
 {
@@ -43,7 +62,10 @@ static void *kmalloc_parameter(unsigned int size)
        if (!p)
                return NULL;
 
+       spin_lock(&kmalloced_params_lock);
        list_add(&p->list, &kmalloced_params);
+       spin_unlock(&kmalloced_params_lock);
+
        return p->val;
 }
 
@@ -52,6 +74,7 @@ static void maybe_kfree_parameter(void *param)
 {
        struct kmalloced_param *p;
 
+       spin_lock(&kmalloced_params_lock);
        list_for_each_entry(p, &kmalloced_params, list) {
                if (p->val == param) {
                        list_del(&p->list);
@@ -59,6 +82,7 @@ static void maybe_kfree_parameter(void *param)
                        break;
                }
        }
+       spin_unlock(&kmalloced_params_lock);
 }
 
 static char dash2underscore(char c)
@@ -119,10 +143,10 @@ static int parse_one(char *param,
                                return -EINVAL;
                        pr_debug("handling %s with %p\n", param,
                                params[i].ops->set);
-                       mutex_lock(&param_lock);
+                       kernel_param_lock(params[i].mod);
                        param_check_unsafe(&params[i]);
                        err = params[i].ops->set(val, &params[i]);
-                       mutex_unlock(&param_lock);
+                       kernel_param_unlock(params[i].mod);
                        return err;
                }
        }
@@ -254,7 +278,7 @@ char *parse_args(const char *doing,
                return scnprintf(buffer, PAGE_SIZE, format,             \
                                *((type *)kp->arg));                    \
        }                                                               \
-       struct kernel_param_ops param_ops_##name = {                    \
+       const struct kernel_param_ops param_ops_##name = {                      \
                .set = param_set_##name,                                \
                .get = param_get_##name,                                \
        };                                                              \
@@ -306,7 +330,7 @@ static void param_free_charp(void *arg)
        maybe_kfree_parameter(*((char **)arg));
 }
 
-struct kernel_param_ops param_ops_charp = {
+const struct kernel_param_ops param_ops_charp = {
        .set = param_set_charp,
        .get = param_get_charp,
        .free = param_free_charp,
@@ -331,13 +355,44 @@ int param_get_bool(char *buffer, const struct kernel_param *kp)
 }
 EXPORT_SYMBOL(param_get_bool);
 
-struct kernel_param_ops param_ops_bool = {
+const struct kernel_param_ops param_ops_bool = {
        .flags = KERNEL_PARAM_OPS_FL_NOARG,
        .set = param_set_bool,
        .get = param_get_bool,
 };
 EXPORT_SYMBOL(param_ops_bool);
 
+int param_set_bool_enable_only(const char *val, const struct kernel_param *kp)
+{
+       int err = 0;
+       bool new_value;
+       bool orig_value = *(bool *)kp->arg;
+       struct kernel_param dummy_kp = *kp;
+
+       dummy_kp.arg = &new_value;
+
+       err = param_set_bool(val, &dummy_kp);
+       if (err)
+               return err;
+
+       /* Don't let them unset it once it's set! */
+       if (!new_value && orig_value)
+               return -EROFS;
+
+       if (new_value)
+               err = param_set_bool(val, kp);
+
+       return err;
+}
+EXPORT_SYMBOL_GPL(param_set_bool_enable_only);
+
+const struct kernel_param_ops param_ops_bool_enable_only = {
+       .flags = KERNEL_PARAM_OPS_FL_NOARG,
+       .set = param_set_bool_enable_only,
+       .get = param_get_bool,
+};
+EXPORT_SYMBOL_GPL(param_ops_bool_enable_only);
+
 /* This one must be bool. */
 int param_set_invbool(const char *val, const struct kernel_param *kp)
 {
@@ -359,7 +414,7 @@ int param_get_invbool(char *buffer, const struct kernel_param *kp)
 }
 EXPORT_SYMBOL(param_get_invbool);
 
-struct kernel_param_ops param_ops_invbool = {
+const struct kernel_param_ops param_ops_invbool = {
        .set = param_set_invbool,
        .get = param_get_invbool,
 };
@@ -367,12 +422,11 @@ EXPORT_SYMBOL(param_ops_invbool);
 
 int param_set_bint(const char *val, const struct kernel_param *kp)
 {
-       struct kernel_param boolkp;
+       /* Match bool exactly, by re-using it. */
+       struct kernel_param boolkp = *kp;
        bool v;
        int ret;
 
-       /* Match bool exactly, by re-using it. */
-       boolkp = *kp;
        boolkp.arg = &v;
 
        ret = param_set_bool(val, &boolkp);
@@ -382,7 +436,7 @@ int param_set_bint(const char *val, const struct kernel_param *kp)
 }
 EXPORT_SYMBOL(param_set_bint);
 
-struct kernel_param_ops param_ops_bint = {
+const struct kernel_param_ops param_ops_bint = {
        .flags = KERNEL_PARAM_OPS_FL_NOARG,
        .set = param_set_bint,
        .get = param_get_int,
@@ -390,7 +444,8 @@ struct kernel_param_ops param_ops_bint = {
 EXPORT_SYMBOL(param_ops_bint);
 
 /* We break the rule and mangle the string. */
-static int param_array(const char *name,
+static int param_array(struct module *mod,
+                      const char *name,
                       const char *val,
                       unsigned int min, unsigned int max,
                       void *elem, int elemsize,
@@ -421,7 +476,7 @@ static int param_array(const char *name,
                /* nul-terminate and parse */
                save = val[len];
                ((char *)val)[len] = '\0';
-               BUG_ON(!mutex_is_locked(&param_lock));
+               check_kparam_locked(mod);
                ret = set(val, &kp);
 
                if (ret != 0)
@@ -443,7 +498,7 @@ static int param_array_set(const char *val, const struct kernel_param *kp)
        const struct kparam_array *arr = kp->arr;
        unsigned int temp_num;
 
-       return param_array(kp->name, val, 1, arr->max, arr->elem,
+       return param_array(kp->mod, kp->name, val, 1, arr->max, arr->elem,
                           arr->elemsize, arr->ops->set, kp->level,
                           arr->num ?: &temp_num);
 }
@@ -452,14 +507,13 @@ static int param_array_get(char *buffer, const struct kernel_param *kp)
 {
        int i, off, ret;
        const struct kparam_array *arr = kp->arr;
-       struct kernel_param p;
+       struct kernel_param p = *kp;
 
-       p = *kp;
        for (i = off = 0; i < (arr->num ? *arr->num : arr->max); i++) {
                if (i)
                        buffer[off++] = ',';
                p.arg = arr->elem + arr->elemsize * i;
-               BUG_ON(!mutex_is_locked(&param_lock));
+               check_kparam_locked(p.mod);
                ret = arr->ops->get(buffer + off, &p);
                if (ret < 0)
                        return ret;
@@ -479,7 +533,7 @@ static void param_array_free(void *arg)
                        arr->ops->free(arr->elem + arr->elemsize * i);
 }
 
-struct kernel_param_ops param_array_ops = {
+const struct kernel_param_ops param_array_ops = {
        .set = param_array_set,
        .get = param_array_get,
        .free = param_array_free,
@@ -507,7 +561,7 @@ int param_get_string(char *buffer, const struct kernel_param *kp)
 }
 EXPORT_SYMBOL(param_get_string);
 
-struct kernel_param_ops param_ops_string = {
+const struct kernel_param_ops param_ops_string = {
        .set = param_set_copystring,
        .get = param_get_string,
 };
@@ -542,9 +596,9 @@ static ssize_t param_attr_show(struct module_attribute *mattr,
        if (!attribute->param->ops->get)
                return -EPERM;
 
-       mutex_lock(&param_lock);
+       kernel_param_lock(mk->mod);
        count = attribute->param->ops->get(buf, attribute->param);
-       mutex_unlock(&param_lock);
+       kernel_param_unlock(mk->mod);
        if (count > 0) {
                strcat(buf, "\n");
                ++count;
@@ -554,7 +608,7 @@ static ssize_t param_attr_show(struct module_attribute *mattr,
 
 /* sysfs always hands a nul-terminated string in buf.  We rely on that. */
 static ssize_t param_attr_store(struct module_attribute *mattr,
-                               struct module_kobject *km,
+                               struct module_kobject *mk,
                                const char *buf, size_t len)
 {
        int err;
@@ -563,10 +617,10 @@ static ssize_t param_attr_store(struct module_attribute *mattr,
        if (!attribute->param->ops->set)
                return -EPERM;
 
-       mutex_lock(&param_lock);
+       kernel_param_lock(mk->mod);
        param_check_unsafe(attribute->param);
        err = attribute->param->ops->set(buf, attribute->param);
-       mutex_unlock(&param_lock);
+       kernel_param_unlock(mk->mod);
        if (!err)
                return len;
        return err;
@@ -580,17 +634,18 @@ static ssize_t param_attr_store(struct module_attribute *mattr,
 #endif
 
 #ifdef CONFIG_SYSFS
-void __kernel_param_lock(void)
+void kernel_param_lock(struct module *mod)
 {
-       mutex_lock(&param_lock);
+       mutex_lock(KPARAM_MUTEX(mod));
 }
-EXPORT_SYMBOL(__kernel_param_lock);
 
-void __kernel_param_unlock(void)
+void kernel_param_unlock(struct module *mod)
 {
-       mutex_unlock(&param_lock);
+       mutex_unlock(KPARAM_MUTEX(mod));
 }
-EXPORT_SYMBOL(__kernel_param_unlock);
+
+EXPORT_SYMBOL(kernel_param_lock);
+EXPORT_SYMBOL(kernel_param_unlock);
 
 /*
  * add_sysfs_param - add a parameter to sysfs
@@ -856,6 +911,7 @@ static void __init version_sysfs_builtin(void)
                mk = locate_module_kobject(vattr->module_name);
                if (mk) {
                        err = sysfs_create_file(&mk->kobj, &vattr->mattr.attr);
+                       WARN_ON_ONCE(err);
                        kobject_uevent(&mk->kobj, KOBJ_ADD);
                        kobject_put(&mk->kobj);
                }
index 7e01f78f041778abe405c9115c15e10a77f64d03..9e302315e33db1b1d57227d55a43b19266e428ce 100644 (file)
@@ -187,7 +187,7 @@ config DPM_WATCHDOG
 config DPM_WATCHDOG_TIMEOUT
        int "Watchdog timeout in seconds"
        range 1 120
-       default 12
+       default 60
        depends on DPM_WATCHDOG
 
 config PM_TRACE
index 2329daae5255374ca8db8619d577ed8e8274fccb..690f78f210f2cf4ec9436c9f37a3bbf5eb876397 100644 (file)
@@ -552,7 +552,7 @@ int hibernation_platform_enter(void)
 
        error = disable_nonboot_cpus();
        if (error)
-               goto Platform_finish;
+               goto Enable_cpus;
 
        local_irq_disable();
        syscore_suspend();
@@ -568,6 +568,8 @@ int hibernation_platform_enter(void)
  Power_up:
        syscore_resume();
        local_irq_enable();
+
+ Enable_cpus:
        enable_nonboot_cpus();
 
  Platform_finish:
index ffc4cc3dcd47b4e277df56735e11d51abdd87f6d..49eca0beed32ebc2972cdd6ccdb1b2b65bbd769f 100644 (file)
@@ -12,5 +12,3 @@ obj-$(CONFIG_TICK_ONESHOT)                    += tick-oneshot.o tick-sched.o
 obj-$(CONFIG_TIMER_STATS)                      += timer_stats.o
 obj-$(CONFIG_DEBUG_FS)                         += timekeeping_debug.o
 obj-$(CONFIG_TEST_UDELAY)                      += test_udelay.o
-
-$(obj)/time.o: $(objtree)/include/config/
index 30b7a409bf1ea19001e3eeb966679137bfe74029..bca3667a2de1f1a221d0fffe0f282137d82fa90e 100644 (file)
@@ -319,32 +319,7 @@ static inline s64 timekeeping_get_ns(struct tk_read_base *tkr)
  * We want to use this from any context including NMI and tracing /
  * instrumenting the timekeeping code itself.
  *
- * So we handle this differently than the other timekeeping accessor
- * functions which retry when the sequence count has changed. The
- * update side does:
- *
- * smp_wmb();  <- Ensure that the last base[1] update is visible
- * tkf->seq++;
- * smp_wmb();  <- Ensure that the seqcount update is visible
- * update(tkf->base[0], tkr);
- * smp_wmb();  <- Ensure that the base[0] update is visible
- * tkf->seq++;
- * smp_wmb();  <- Ensure that the seqcount update is visible
- * update(tkf->base[1], tkr);
- *
- * The reader side does:
- *
- * do {
- *     seq = tkf->seq;
- *     smp_rmb();
- *     idx = seq & 0x01;
- *     now = now(tkf->base[idx]);
- *     smp_rmb();
- * } while (seq != tkf->seq)
- *
- * As long as we update base[0] readers are forced off to
- * base[1]. Once base[0] is updated readers are redirected to base[0]
- * and the base[1] update takes place.
+ * Employ the latch technique; see @raw_write_seqcount_latch.
  *
  * So if a NMI hits the update of base[0] then it will use base[1]
  * which is still consistent. In the worst case this can result is a
@@ -407,7 +382,7 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
        u64 now;
 
        do {
-               seq = raw_read_seqcount(&tkf->seq);
+               seq = raw_read_seqcount_latch(&tkf->seq);
                tkr = tkf->base + (seq & 0x01);
                now = ktime_to_ns(tkr->base) + timekeeping_get_ns(tkr);
        } while (read_seqcount_retry(&tkf->seq, seq));
index 520499dd85af42e96b2bbd8c729df36d238ad27a..5e097fa9faf7016470b8283931023a15d20ed97d 100644 (file)
@@ -1566,7 +1566,7 @@ static void migrate_timers(int cpu)
 
        BUG_ON(cpu_online(cpu));
        old_base = per_cpu_ptr(&tvec_bases, cpu);
-       new_base = this_cpu_ptr(&tvec_bases);
+       new_base = get_cpu_ptr(&tvec_bases);
        /*
         * The caller is globally serialized and nobody else
         * takes two locks at once, deadlock is not possible.
@@ -1590,6 +1590,7 @@ static void migrate_timers(int cpu)
 
        spin_unlock(&old_base->lock);
        spin_unlock_irq(&new_base->lock);
+       put_cpu_ptr(&tvec_bases);
 }
 
 static int timer_cpu_notify(struct notifier_block *self,
index 5243d4b030876bc558f34f392d96805710a719c6..4c4f06176f748616b180254e94eda6bbed7dde25 100644 (file)
@@ -285,12 +285,7 @@ static bool wq_disable_numa;
 module_param_named(disable_numa, wq_disable_numa, bool, 0444);
 
 /* see the comment above the definition of WQ_POWER_EFFICIENT */
-#ifdef CONFIG_WQ_POWER_EFFICIENT_DEFAULT
-static bool wq_power_efficient = true;
-#else
-static bool wq_power_efficient;
-#endif
-
+static bool wq_power_efficient = IS_ENABLED(CONFIG_WQ_POWER_EFFICIENT_DEFAULT);
 module_param_named(power_efficient, wq_power_efficient, bool, 0444);
 
 static bool wq_numa_enabled;           /* unbound NUMA affinity enabled */
index 0c3bd9552b6fc4fa5e380ac013caf5ac618b1faf..cff145f032a550ff1703208cbadc033e2a6336b6 100644 (file)
--- a/lib/bug.c
+++ b/lib/bug.c
@@ -66,7 +66,7 @@ static const struct bug_entry *module_find_bug(unsigned long bugaddr)
        struct module *mod;
        const struct bug_entry *bug = NULL;
 
-       rcu_read_lock();
+       rcu_read_lock_sched();
        list_for_each_entry_rcu(mod, &module_bug_list, bug_list) {
                unsigned i;
 
@@ -77,7 +77,7 @@ static const struct bug_entry *module_find_bug(unsigned long bugaddr)
        }
        bug = NULL;
 out:
-       rcu_read_unlock();
+       rcu_read_unlock_sched();
 
        return bug;
 }
@@ -88,6 +88,8 @@ void module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
        char *secstrings;
        unsigned int i;
 
+       lockdep_assert_held(&module_mutex);
+
        mod->bug_table = NULL;
        mod->num_bugs = 0;
 
@@ -113,6 +115,7 @@ void module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
 
 void module_bug_cleanup(struct module *mod)
 {
+       lockdep_assert_held(&module_mutex);
        list_del_rcu(&mod->bug_list);
 }
 
index 75ee63834fd1acbfc30af4cb209d49d717857796..2e3bd01964a9047f481862c2323be8c4ea42db9c 100644 (file)
@@ -545,6 +545,7 @@ out:
        kfree(devpath);
        return error;
 }
+EXPORT_SYMBOL_GPL(kobject_move);
 
 /**
  * kobject_del - unlink kobject from hierarchy.
index c16c81a3d430e84a8084de93e97900715a8e6b8e..1356454e36de9f1c083b2c84f297ba4878498c0a 100644 (file)
  *  parentheses and have some accompanying text comment.
  */
 
+/*
+ * Notes on lockless lookups:
+ *
+ * All stores to the tree structure (rb_left and rb_right) must be done using
+ * WRITE_ONCE(). And we must not inadvertently cause (temporary) loops in the
+ * tree structure as seen in program order.
+ *
+ * These two requirements will allow lockless iteration of the tree -- not
+ * correct iteration mind you, tree rotations are not atomic so a lookup might
+ * miss entire subtrees.
+ *
+ * But they do guarantee that any such traversal will only see valid elements
+ * and that it will indeed complete -- does not get stuck in a loop.
+ *
+ * It also guarantees that if the lookup returns an element it is the 'correct'
+ * one. But not returning an element does _NOT_ mean it's not present.
+ *
+ * NOTE:
+ *
+ * Stores to __rb_parent_color are not important for simple lookups so those
+ * are left undone as of now. Nor did I check for loops involving parent
+ * pointers.
+ */
+
 static inline void rb_set_black(struct rb_node *rb)
 {
        rb->__rb_parent_color |= RB_BLACK;
@@ -129,8 +153,9 @@ __rb_insert(struct rb_node *node, struct rb_root *root,
                                 * This still leaves us in violation of 4), the
                                 * continuation into Case 3 will fix that.
                                 */
-                               parent->rb_right = tmp = node->rb_left;
-                               node->rb_left = parent;
+                               tmp = node->rb_left;
+                               WRITE_ONCE(parent->rb_right, tmp);
+                               WRITE_ONCE(node->rb_left, parent);
                                if (tmp)
                                        rb_set_parent_color(tmp, parent,
                                                            RB_BLACK);
@@ -149,8 +174,8 @@ __rb_insert(struct rb_node *node, struct rb_root *root,
                         *     /                 \
                         *    n                   U
                         */
-                       gparent->rb_left = tmp;  /* == parent->rb_right */
-                       parent->rb_right = gparent;
+                       WRITE_ONCE(gparent->rb_left, tmp); /* == parent->rb_right */
+                       WRITE_ONCE(parent->rb_right, gparent);
                        if (tmp)
                                rb_set_parent_color(tmp, gparent, RB_BLACK);
                        __rb_rotate_set_parents(gparent, parent, root, RB_RED);
@@ -171,8 +196,9 @@ __rb_insert(struct rb_node *node, struct rb_root *root,
                        tmp = parent->rb_left;
                        if (node == tmp) {
                                /* Case 2 - right rotate at parent */
-                               parent->rb_left = tmp = node->rb_right;
-                               node->rb_right = parent;
+                               tmp = node->rb_right;
+                               WRITE_ONCE(parent->rb_left, tmp);
+                               WRITE_ONCE(node->rb_right, parent);
                                if (tmp)
                                        rb_set_parent_color(tmp, parent,
                                                            RB_BLACK);
@@ -183,8 +209,8 @@ __rb_insert(struct rb_node *node, struct rb_root *root,
                        }
 
                        /* Case 3 - left rotate at gparent */
-                       gparent->rb_right = tmp;  /* == parent->rb_left */
-                       parent->rb_left = gparent;
+                       WRITE_ONCE(gparent->rb_right, tmp); /* == parent->rb_left */
+                       WRITE_ONCE(parent->rb_left, gparent);
                        if (tmp)
                                rb_set_parent_color(tmp, gparent, RB_BLACK);
                        __rb_rotate_set_parents(gparent, parent, root, RB_RED);
@@ -224,8 +250,9 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root,
                                 *      / \         / \
                                 *     Sl  Sr      N   Sl
                                 */
-                               parent->rb_right = tmp1 = sibling->rb_left;
-                               sibling->rb_left = parent;
+                               tmp1 = sibling->rb_left;
+                               WRITE_ONCE(parent->rb_right, tmp1);
+                               WRITE_ONCE(sibling->rb_left, parent);
                                rb_set_parent_color(tmp1, parent, RB_BLACK);
                                __rb_rotate_set_parents(parent, sibling, root,
                                                        RB_RED);
@@ -275,9 +302,10 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root,
                                 *                       \
                                 *                        Sr
                                 */
-                               sibling->rb_left = tmp1 = tmp2->rb_right;
-                               tmp2->rb_right = sibling;
-                               parent->rb_right = tmp2;
+                               tmp1 = tmp2->rb_right;
+                               WRITE_ONCE(sibling->rb_left, tmp1);
+                               WRITE_ONCE(tmp2->rb_right, sibling);
+                               WRITE_ONCE(parent->rb_right, tmp2);
                                if (tmp1)
                                        rb_set_parent_color(tmp1, sibling,
                                                            RB_BLACK);
@@ -297,8 +325,9 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root,
                         *        / \         / \
                         *      (sl) sr      N  (sl)
                         */
-                       parent->rb_right = tmp2 = sibling->rb_left;
-                       sibling->rb_left = parent;
+                       tmp2 = sibling->rb_left;
+                       WRITE_ONCE(parent->rb_right, tmp2);
+                       WRITE_ONCE(sibling->rb_left, parent);
                        rb_set_parent_color(tmp1, sibling, RB_BLACK);
                        if (tmp2)
                                rb_set_parent(tmp2, parent);
@@ -310,8 +339,9 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root,
                        sibling = parent->rb_left;
                        if (rb_is_red(sibling)) {
                                /* Case 1 - right rotate at parent */
-                               parent->rb_left = tmp1 = sibling->rb_right;
-                               sibling->rb_right = parent;
+                               tmp1 = sibling->rb_right;
+                               WRITE_ONCE(parent->rb_left, tmp1);
+                               WRITE_ONCE(sibling->rb_right, parent);
                                rb_set_parent_color(tmp1, parent, RB_BLACK);
                                __rb_rotate_set_parents(parent, sibling, root,
                                                        RB_RED);
@@ -336,9 +366,10 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root,
                                        break;
                                }
                                /* Case 3 - right rotate at sibling */
-                               sibling->rb_right = tmp1 = tmp2->rb_left;
-                               tmp2->rb_left = sibling;
-                               parent->rb_left = tmp2;
+                               tmp1 = tmp2->rb_left;
+                               WRITE_ONCE(sibling->rb_right, tmp1);
+                               WRITE_ONCE(tmp2->rb_left, sibling);
+                               WRITE_ONCE(parent->rb_left, tmp2);
                                if (tmp1)
                                        rb_set_parent_color(tmp1, sibling,
                                                            RB_BLACK);
@@ -347,8 +378,9 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root,
                                sibling = tmp2;
                        }
                        /* Case 4 - left rotate at parent + color flips */
-                       parent->rb_left = tmp2 = sibling->rb_right;
-                       sibling->rb_right = parent;
+                       tmp2 = sibling->rb_right;
+                       WRITE_ONCE(parent->rb_left, tmp2);
+                       WRITE_ONCE(sibling->rb_right, parent);
                        rb_set_parent_color(tmp1, sibling, RB_BLACK);
                        if (tmp2)
                                rb_set_parent(tmp2, parent);
index 983b78694c4637adcd617de8085498acecfbd288..3e5f8f29c28640e44af5f5f9d1c3553986064588 100644 (file)
@@ -855,7 +855,7 @@ void __init setup_kmalloc_cache_index_table(void)
        }
 }
 
-static void new_kmalloc_cache(int idx, unsigned long flags)
+static void __init new_kmalloc_cache(int idx, unsigned long flags)
 {
        kmalloc_caches[idx] = create_kmalloc_cache(kmalloc_info[idx].name,
                                        kmalloc_info[idx].size, flags);
index 9c891d0412a298428e7abd306347beee749f5b0d..ae3a47f9d1d5298406ca33900624fe49c9c85719 100644 (file)
@@ -57,7 +57,7 @@ static const struct proto_ops ax25_proto_ops;
 
 static void ax25_free_sock(struct sock *sk)
 {
-       ax25_cb_put(ax25_sk(sk));
+       ax25_cb_put(sk_to_ax25(sk));
 }
 
 /*
@@ -306,7 +306,7 @@ void ax25_destroy_socket(ax25_cb *ax25)
                while ((skb = skb_dequeue(&ax25->sk->sk_receive_queue)) != NULL) {
                        if (skb->sk != ax25->sk) {
                                /* A pending connection */
-                               ax25_cb *sax25 = ax25_sk(skb->sk);
+                               ax25_cb *sax25 = sk_to_ax25(skb->sk);
 
                                /* Queue the unaccepted socket for death */
                                sock_orphan(skb->sk);
@@ -551,7 +551,7 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname,
                return -EFAULT;
 
        lock_sock(sk);
-       ax25 = ax25_sk(sk);
+       ax25 = sk_to_ax25(sk);
 
        switch (optname) {
        case AX25_WINDOW:
@@ -697,7 +697,7 @@ static int ax25_getsockopt(struct socket *sock, int level, int optname,
        length = min_t(unsigned int, maxlen, sizeof(int));
 
        lock_sock(sk);
-       ax25 = ax25_sk(sk);
+       ax25 = sk_to_ax25(sk);
 
        switch (optname) {
        case AX25_WINDOW:
@@ -796,7 +796,7 @@ out:
 static struct proto ax25_proto = {
        .name     = "AX25",
        .owner    = THIS_MODULE,
-       .obj_size = sizeof(struct sock),
+       .obj_size = sizeof(struct ax25_sock),
 };
 
 static int ax25_create(struct net *net, struct socket *sock, int protocol,
@@ -858,7 +858,7 @@ static int ax25_create(struct net *net, struct socket *sock, int protocol,
        if (sk == NULL)
                return -ENOMEM;
 
-       ax25 = sk->sk_protinfo = ax25_create_cb();
+       ax25 = ax25_sk(sk)->cb = ax25_create_cb();
        if (!ax25) {
                sk_free(sk);
                return -ENOMEM;
@@ -910,7 +910,7 @@ struct sock *ax25_make_new(struct sock *osk, struct ax25_dev *ax25_dev)
        sk->sk_state    = TCP_ESTABLISHED;
        sock_copy_flags(sk, osk);
 
-       oax25 = ax25_sk(osk);
+       oax25 = sk_to_ax25(osk);
 
        ax25->modulus = oax25->modulus;
        ax25->backoff = oax25->backoff;
@@ -938,7 +938,7 @@ struct sock *ax25_make_new(struct sock *osk, struct ax25_dev *ax25_dev)
                }
        }
 
-       sk->sk_protinfo = ax25;
+       ax25_sk(sk)->cb = ax25;
        sk->sk_destruct = ax25_free_sock;
        ax25->sk    = sk;
 
@@ -956,7 +956,7 @@ static int ax25_release(struct socket *sock)
        sock_hold(sk);
        sock_orphan(sk);
        lock_sock(sk);
-       ax25 = ax25_sk(sk);
+       ax25 = sk_to_ax25(sk);
 
        if (sk->sk_type == SOCK_SEQPACKET) {
                switch (ax25->state) {
@@ -1066,7 +1066,7 @@ static int ax25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 
        lock_sock(sk);
 
-       ax25 = ax25_sk(sk);
+       ax25 = sk_to_ax25(sk);
        if (!sock_flag(sk, SOCK_ZAPPED)) {
                err = -EINVAL;
                goto out;
@@ -1113,7 +1113,7 @@ static int __must_check ax25_connect(struct socket *sock,
        struct sockaddr *uaddr, int addr_len, int flags)
 {
        struct sock *sk = sock->sk;
-       ax25_cb *ax25 = ax25_sk(sk), *ax25t;
+       ax25_cb *ax25 = sk_to_ax25(sk), *ax25t;
        struct full_sockaddr_ax25 *fsa = (struct full_sockaddr_ax25 *)uaddr;
        ax25_digi *digi = NULL;
        int ct = 0, err = 0;
@@ -1394,7 +1394,7 @@ static int ax25_getname(struct socket *sock, struct sockaddr *uaddr,
 
        memset(fsa, 0, sizeof(*fsa));
        lock_sock(sk);
-       ax25 = ax25_sk(sk);
+       ax25 = sk_to_ax25(sk);
 
        if (peer != 0) {
                if (sk->sk_state != TCP_ESTABLISHED) {
@@ -1446,7 +1446,7 @@ static int ax25_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
                return -EINVAL;
 
        lock_sock(sk);
-       ax25 = ax25_sk(sk);
+       ax25 = sk_to_ax25(sk);
 
        if (sock_flag(sk, SOCK_ZAPPED)) {
                err = -EADDRNOTAVAIL;
@@ -1621,7 +1621,7 @@ static int ax25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
        if (skb == NULL)
                goto out;
 
-       if (!ax25_sk(sk)->pidincl)
+       if (!sk_to_ax25(sk)->pidincl)
                skb_pull(skb, 1);               /* Remove PID */
 
        skb_reset_transport_header(skb);
@@ -1762,7 +1762,7 @@ static int ax25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 
        case SIOCAX25GETINFO:
        case SIOCAX25GETINFOOLD: {
-               ax25_cb *ax25 = ax25_sk(sk);
+               ax25_cb *ax25 = sk_to_ax25(sk);
                struct ax25_info_struct ax25_info;
 
                ax25_info.t1        = ax25->t1   / HZ;
index 29a3687237aa4288ed078e222c80b8fa85021a8f..bb5a0e4e98d9df09ec535a20ea73253817fe64db 100644 (file)
@@ -353,7 +353,7 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev,
                        return 0;
                }
 
-               ax25 = ax25_sk(make);
+               ax25 = sk_to_ax25(make);
                skb_set_owner_r(skb, make);
                skb_queue_head(&sk->sk_receive_queue, skb);
 
index 476e5dda59e19822dba98a931369ff2666c59c0d..2a834c6179b9973e45274d793e7d744939e5f49e 100644 (file)
@@ -129,7 +129,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
        struct flow_dissector_key_ports *key_ports;
        struct flow_dissector_key_tags *key_tags;
        struct flow_dissector_key_keyid *key_keyid;
-       u8 ip_proto;
+       u8 ip_proto = 0;
 
        if (!data) {
                data = skb->data;
index 1e1fe9a68d835983d760d50f9ef6a11309ffcfc1..08f16db46070a1520fcdd6892477093e9474af4f 100644 (file)
@@ -1454,7 +1454,7 @@ void sk_destruct(struct sock *sk)
 
 static void __sk_free(struct sock *sk)
 {
-       if (unlikely(sock_diag_has_destroy_listeners(sk)))
+       if (unlikely(sock_diag_has_destroy_listeners(sk) && sk->sk_net_refcnt))
                sock_diag_broadcast_destroy(sk);
        else
                sk_destruct(sk);
@@ -2269,7 +2269,6 @@ static void sock_def_write_space(struct sock *sk)
 
 static void sock_def_destruct(struct sock *sk)
 {
-       kfree(sk->sk_protinfo);
 }
 
 void sk_send_sigurg(struct sock *sk)
index 04ffad311704852a5d2c35c99eea2f1c4293f5e1..0917123790eaf09b001c97a733039185fdb0a800 100644 (file)
@@ -112,7 +112,7 @@ static int dsa_slave_open(struct net_device *dev)
 
 clear_promisc:
        if (dev->flags & IFF_PROMISC)
-               dev_set_promiscuity(master, 0);
+               dev_set_promiscuity(master, -1);
 clear_allmulti:
        if (dev->flags & IFF_ALLMULTI)
                dev_set_allmulti(master, -1);
index 3bfccd83551ce71cc2b69efe233426f3e70ea854..c7358ea4ae93530a7f6ef110a2dc204f19ac830e 100644 (file)
@@ -1045,7 +1045,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
                    nla_put_u32(skb, RTA_OIF, fi->fib_nh->nh_oif))
                        goto nla_put_failure;
                if (fi->fib_nh->nh_flags & RTNH_F_LINKDOWN) {
-                       in_dev = __in_dev_get_rcu(fi->fib_nh->nh_dev);
+                       in_dev = __in_dev_get_rtnl(fi->fib_nh->nh_dev);
                        if (in_dev &&
                            IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev))
                                rtm->rtm_flags |= RTNH_F_DEAD;
@@ -1074,7 +1074,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
 
                        rtnh->rtnh_flags = nh->nh_flags & 0xFF;
                        if (nh->nh_flags & RTNH_F_LINKDOWN) {
-                               in_dev = __in_dev_get_rcu(nh->nh_dev);
+                               in_dev = __in_dev_get_rtnl(nh->nh_dev);
                                if (in_dev &&
                                    IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev))
                                        rtnh->rtnh_flags |= RTNH_F_DEAD;
index 36ba7c4f028305ec05d055e1575f5a857ac3d110..fda33f961d83ce44f05ab3298113877d01768fb2 100644 (file)
@@ -103,7 +103,7 @@ ieee80211_rate_control_ops_get(const char *name)
        const struct rate_control_ops *ops;
        const char *alg_name;
 
-       kparam_block_sysfs_write(ieee80211_default_rc_algo);
+       kernel_param_lock(THIS_MODULE);
        if (!name)
                alg_name = ieee80211_default_rc_algo;
        else
@@ -117,7 +117,7 @@ ieee80211_rate_control_ops_get(const char *name)
        /* try built-in one if specific alg requested but not found */
        if (!ops && strlen(CONFIG_MAC80211_RC_DEFAULT))
                ops = ieee80211_try_rate_control_ops_get(CONFIG_MAC80211_RC_DEFAULT);
-       kparam_unblock_sysfs_write(ieee80211_default_rc_algo);
+       kernel_param_unlock(THIS_MODULE);
 
        return ops;
 }
index b92d3f49c23e0dd93ac2ff0766bb97f1add15baa..9d37ccd95062a6840d1bb1e140b173dd1fe0b9d0 100644 (file)
@@ -216,8 +216,8 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
        [TCA_FLOWER_KEY_IPV6_DST_MASK]  = { .len = sizeof(struct in6_addr) },
        [TCA_FLOWER_KEY_TCP_SRC]        = { .type = NLA_U16 },
        [TCA_FLOWER_KEY_TCP_DST]        = { .type = NLA_U16 },
-       [TCA_FLOWER_KEY_TCP_SRC]        = { .type = NLA_U16 },
-       [TCA_FLOWER_KEY_TCP_DST]        = { .type = NLA_U16 },
+       [TCA_FLOWER_KEY_UDP_SRC]        = { .type = NLA_U16 },
+       [TCA_FLOWER_KEY_UDP_DST]        = { .type = NLA_U16 },
 };
 
 static void fl_set_key_val(struct nlattr **tb,
index fc5e45b8a832d94367178536a341001021a19c44..abe7c2db24120a13992131e2be9a19c70a297de0 100644 (file)
@@ -599,7 +599,9 @@ out:
        return err;
 no_route:
        kfree_skb(nskb);
-       IP_INC_STATS(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES);
+
+       if (asoc)
+               IP_INC_STATS(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES);
 
        /* FIXME: Returning the 'err' will effect all the associations
         * associated with a socket, although only one of the paths of the
index 5f6c4e61325b65822be525d75ebe3bb7357b97e7..1425ec2bbd5ae359a8e0408a89a6da6bb60bd87e 100644 (file)
@@ -2121,12 +2121,6 @@ static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
        if (sp->subscribe.sctp_data_io_event)
                sctp_ulpevent_read_sndrcvinfo(event, msg);
 
-#if 0
-       /* FIXME: we should be calling IP/IPv6 layers.  */
-       if (sk->sk_protinfo.af_inet.cmsg_flags)
-               ip_cmsg_recv(msg, skb);
-#endif
-
        err = copied;
 
        /* If skb's length exceeds the user's buffer, update the skb and
index 47f38be4155fa9e6c3dbb98147027407645b2e91..02f53674dc39d6973d374f18b43bd0b1b93952c7 100644 (file)
@@ -72,7 +72,7 @@ static int param_get_hashtbl_sz(char *buffer, const struct kernel_param *kp)
 
 #define param_check_hashtbl_sz(name, p) __param_check(name, p, unsigned int);
 
-static struct kernel_param_ops param_ops_hashtbl_sz = {
+static const struct kernel_param_ops param_ops_hashtbl_sz = {
        .set = param_set_hashtbl_sz,
        .get = param_get_hashtbl_sz,
 };
index 66891e32c5e311b386c9afa25b2cf4cd25325427..b0517287075b2753101bbde70262c99c2a58f35b 100644 (file)
@@ -2982,7 +2982,7 @@ static int param_set_portnr(const char *val, const struct kernel_param *kp)
                        RPC_MAX_RESVPORT);
 }
 
-static struct kernel_param_ops param_ops_portnr = {
+static const struct kernel_param_ops param_ops_portnr = {
        .set = param_set_portnr,
        .get = param_get_uint,
 };
@@ -3001,7 +3001,7 @@ static int param_set_slot_table_size(const char *val,
                        RPC_MAX_SLOT_TABLE);
 }
 
-static struct kernel_param_ops param_ops_slot_table_size = {
+static const struct kernel_param_ops param_ops_slot_table_size = {
        .set = param_set_slot_table_size,
        .get = param_get_uint,
 };
@@ -3017,7 +3017,7 @@ static int param_set_max_slot_table_size(const char *val,
                        RPC_MAX_SLOT_TABLE_LIMIT);
 }
 
-static struct kernel_param_ops param_ops_max_slot_table_size = {
+static const struct kernel_param_ops param_ops_max_slot_table_size = {
        .set = param_set_max_slot_table_size,
        .get = param_get_uint,
 };
index 4906ca3c0f3a576a529eacb26631f8585291ae40..a816382fc8af1b9efb016f888493ca4dcc65fe3b 100644 (file)
@@ -108,6 +108,11 @@ void tipc_bclink_remove_node(struct net *net, u32 addr)
 
        tipc_bclink_lock(net);
        tipc_nmap_remove(&tn->bclink->bcast_nodes, addr);
+
+       /* Last node? => reset backlog queue */
+       if (!tn->bclink->bcast_nodes.count)
+               tipc_link_purge_backlog(&tn->bclink->link);
+
        tipc_bclink_unlock(net);
 }
 
index ca8b8e0f49b526ebbf7a87e2cc89491ecfedf988..eaa9fe54b4aebfb531610611637915dc1b0c7256 100644 (file)
@@ -404,7 +404,7 @@ void tipc_link_reset_fragments(struct tipc_link *l_ptr)
        l_ptr->reasm_buf = NULL;
 }
 
-static void tipc_link_purge_backlog(struct tipc_link *l)
+void tipc_link_purge_backlog(struct tipc_link *l)
 {
        __skb_queue_purge(&l->backlogq);
        l->backlog[TIPC_LOW_IMPORTANCE].len = 0;
index 0c02c973e98558c699f006cce06b81768891b08b..ae0a0ea572f2961aca2617f9244ea74ebba15c6a 100644 (file)
@@ -218,6 +218,7 @@ void tipc_link_reset_fragments(struct tipc_link *l_ptr);
 int tipc_link_is_up(struct tipc_link *l_ptr);
 int tipc_link_is_active(struct tipc_link *l_ptr);
 void tipc_link_purge_queues(struct tipc_link *l_ptr);
+void tipc_link_purge_backlog(struct tipc_link *l);
 void tipc_link_reset_all(struct tipc_node *node);
 void tipc_link_reset(struct tipc_link *l_ptr);
 int tipc_link_xmit_skb(struct net *net, struct sk_buff *skb, u32 dest,
index d9b1fef0c67e3cb42e29f64e661496a50af02e53..f52abae0ec5fe84feb006b217e495849373561d7 100644 (file)
@@ -115,6 +115,10 @@ PHONY += kvmconfig
 kvmconfig: kvm_guest.config
        @:
 
+PHONY += xenconfig
+xenconfig: xen.config
+       @:
+
 PHONY += tinyconfig
 tinyconfig:
        $(Q)$(MAKE) -f $(srctree)/Makefile allnoconfig tiny.config
@@ -139,7 +143,8 @@ help:
        @echo  '  randconfig      - New config with random answer to all options'
        @echo  '  listnewconfig   - List new options'
        @echo  '  olddefconfig    - Same as silentoldconfig but sets new symbols to their default value'
-       @echo  '  kvmconfig       - Enable additional options for guest kernel support'
+       @echo  '  kvmconfig       - Enable additional options for kvm guest kernel support'
+       @echo  '  xenconfig       - Enable additional options for xen dom0 and guest kernel support'
        @echo  '  tinyconfig      - Configure the tiniest possible kernel'
 
 # lxdialog stuff
index 1052d4834a44f502bda4f4f1ebe1202ace1608f5..c2423d913b46bd0e659ea4d4c057a3af6119c2d4 100644 (file)
 #define EM_MICROBLAZE  189
 #endif
 
+#ifndef EM_ARCV2
+#define EM_ARCV2       195
+#endif
+
 static int fd_map;     /* File descriptor for file being modified. */
 static int mmap_failed; /* Boolean flag. */
 static void *ehdr_curr; /* current ElfXX_Ehdr *  for resource cleanup */
@@ -281,6 +285,7 @@ do_file(char const *const fname)
                custom_sort = sort_relative_table;
                break;
        case EM_ARCOMPACT:
+       case EM_ARCV2:
        case EM_ARM:
        case EM_AARCH64:
        case EM_MICROBLAZE:
index 5696874e806264c50cdbddd317bbfdbdefe47260..dec607c17b6434d6b8e5416060a0202bc4cb52f4 100644 (file)
@@ -654,7 +654,7 @@ static struct security_hook_list apparmor_hooks[] = {
 static int param_set_aabool(const char *val, const struct kernel_param *kp);
 static int param_get_aabool(char *buffer, const struct kernel_param *kp);
 #define param_check_aabool param_check_bool
-static struct kernel_param_ops param_ops_aabool = {
+static const struct kernel_param_ops param_ops_aabool = {
        .flags = KERNEL_PARAM_OPS_FL_NOARG,
        .set = param_set_aabool,
        .get = param_get_aabool
@@ -663,7 +663,7 @@ static struct kernel_param_ops param_ops_aabool = {
 static int param_set_aauint(const char *val, const struct kernel_param *kp);
 static int param_get_aauint(char *buffer, const struct kernel_param *kp);
 #define param_check_aauint param_check_uint
-static struct kernel_param_ops param_ops_aauint = {
+static const struct kernel_param_ops param_ops_aauint = {
        .set = param_set_aauint,
        .get = param_get_aauint
 };
@@ -671,7 +671,7 @@ static struct kernel_param_ops param_ops_aauint = {
 static int param_set_aalockpolicy(const char *val, const struct kernel_param *kp);
 static int param_get_aalockpolicy(char *buffer, const struct kernel_param *kp);
 #define param_check_aalockpolicy param_check_bool
-static struct kernel_param_ops param_ops_aalockpolicy = {
+static const struct kernel_param_ops param_ops_aalockpolicy = {
        .flags = KERNEL_PARAM_OPS_FL_NOARG,
        .set = param_set_aalockpolicy,
        .get = param_get_aalockpolicy
index 686355fea7fd8aab766c5f5894cf3d5a4dd2e13d..e24121afb2f2773eacced7410dbdd5953a49c206 100644 (file)
@@ -55,7 +55,7 @@ static int param_set_bufsize(const char *val, const struct kernel_param *kp)
        return 0;
 }
 
-static struct kernel_param_ops param_ops_bufsize = {
+static const struct kernel_param_ops param_ops_bufsize = {
        .set = param_set_bufsize,
        .get = param_get_uint,
 };
index 9149a4aefa9548e5b09d8d15a4e28dd90cb5918a..84a3cd683068a3bbf4a752e05c399600ce83572b 100644 (file)
@@ -41,8 +41,11 @@ static int get_available_index(struct snd_card *card, const char *name)
        sid.iface = SNDRV_CTL_ELEM_IFACE_CARD;
        strlcpy(sid.name, name, sizeof(sid.name));
 
-       while (snd_ctl_find_id(card, &sid))
+       while (snd_ctl_find_id(card, &sid)) {
                sid.index++;
+               /* reset numid; otherwise snd_ctl_find_id() hits this again */
+               sid.numid = 0;
+       }
 
        return sid.index;
 }
index 3e0cebacefe1e74c42148f7e3e227f9de47852b7..20f37fb3800ecce8222fc67ef48afa91f65ecb4c 100644 (file)
@@ -109,13 +109,12 @@ static void snd_card_id_read(struct snd_info_entry *entry,
 
 static int init_info_for_card(struct snd_card *card)
 {
-       int err;
        struct snd_info_entry *entry;
 
        entry = snd_info_create_card_entry(card, "id", card->proc_root);
        if (!entry) {
                dev_dbg(card->dev, "unable to create card entry\n");
-               return err;
+               return -ENOMEM;
        }
        entry->c.text.read = snd_card_id_read;
        card->proc_id = entry;
index 7dea7987d2afcb1039a3a4ae79c9619f39788c16..745535d1840a6713e802aaa8c1d475733b88720e 100644 (file)
@@ -171,7 +171,7 @@ MODULE_PARM_DESC(beep_mode, "Select HDA Beep registration mode "
 
 #ifdef CONFIG_PM
 static int param_set_xint(const char *val, const struct kernel_param *kp);
-static struct kernel_param_ops param_ops_xint = {
+static const struct kernel_param_ops param_ops_xint = {
        .set = param_set_xint,
        .get = param_get_int,
 };
@@ -2180,6 +2180,8 @@ static const struct pci_device_id azx_ids[] = {
        { PCI_DEVICE(0x1022, 0x780d),
          .driver_data = AZX_DRIVER_GENERIC | AZX_DCAPS_PRESET_ATI_SB },
        /* ATI HDMI */
+       { PCI_DEVICE(0x1002, 0x1308),
+         .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS },
        { PCI_DEVICE(0x1002, 0x793b),
          .driver_data = AZX_DRIVER_ATIHDMI | AZX_DCAPS_PRESET_ATI_HDMI },
        { PCI_DEVICE(0x1002, 0x7919),
@@ -2188,6 +2190,8 @@ static const struct pci_device_id azx_ids[] = {
          .driver_data = AZX_DRIVER_ATIHDMI | AZX_DCAPS_PRESET_ATI_HDMI },
        { PCI_DEVICE(0x1002, 0x970f),
          .driver_data = AZX_DRIVER_ATIHDMI | AZX_DCAPS_PRESET_ATI_HDMI },
+       { PCI_DEVICE(0x1002, 0x9840),
+         .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS },
        { PCI_DEVICE(0x1002, 0xaa00),
          .driver_data = AZX_DRIVER_ATIHDMI | AZX_DCAPS_PRESET_ATI_HDMI },
        { PCI_DEVICE(0x1002, 0xaa08),
index f8527342a15062517af2be86ec2e9a5bb9e16a28..2f2433845d0487dd703301664cbc33ecfc77bc11 100644 (file)
@@ -591,7 +591,7 @@ static int eld_proc_new(struct hdmi_spec_per_pin *per_pin, int index)
 
 static void eld_proc_free(struct hdmi_spec_per_pin *per_pin)
 {
-       if (!per_pin->codec->bus->shutdown && per_pin->proc_entry) {
+       if (!per_pin->codec->bus->shutdown) {
                snd_info_free_entry(per_pin->proc_entry);
                per_pin->proc_entry = NULL;
        }
index 431a20b17df4cae617b7258e641c96a8c0b32a21..b3b44681d3cfbe90b2ff7bcbabba735dcfaac7f7 100644 (file)
@@ -4464,6 +4464,7 @@ enum {
        ALC269_FIXUP_LIFEBOOK,
        ALC269_FIXUP_LIFEBOOK_EXTMIC,
        ALC269_FIXUP_LIFEBOOK_HP_PIN,
+       ALC269_FIXUP_LIFEBOOK_NO_HP_TO_LINEOUT,
        ALC269_FIXUP_AMIC,
        ALC269_FIXUP_DMIC,
        ALC269VB_FIXUP_AMIC,
@@ -4484,6 +4485,7 @@ enum {
        ALC269_FIXUP_DELL3_MIC_NO_PRESENCE,
        ALC269_FIXUP_HEADSET_MODE,
        ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC,
+       ALC269_FIXUP_ASPIRE_HEADSET_MIC,
        ALC269_FIXUP_ASUS_X101_FUNC,
        ALC269_FIXUP_ASUS_X101_VERB,
        ALC269_FIXUP_ASUS_X101,
@@ -4511,6 +4513,7 @@ enum {
        ALC255_FIXUP_HEADSET_MODE_NO_HP_MIC,
        ALC293_FIXUP_DELL1_MIC_NO_PRESENCE,
        ALC292_FIXUP_TPT440_DOCK,
+       ALC292_FIXUP_TPT440_DOCK2,
        ALC283_FIXUP_BXBT2807_MIC,
        ALC255_FIXUP_DELL_WMI_MIC_MUTE_LED,
        ALC282_FIXUP_ASPIRE_V5_PINS,
@@ -4521,6 +4524,8 @@ enum {
        ALC288_FIXUP_DELL_HEADSET_MODE,
        ALC288_FIXUP_DELL1_MIC_NO_PRESENCE,
        ALC288_FIXUP_DELL_XPS_13_GPIO6,
+       ALC288_FIXUP_DELL_XPS_13,
+       ALC288_FIXUP_DISABLE_AAMIX,
        ALC292_FIXUP_DELL_E7X,
        ALC292_FIXUP_DISABLE_AAMIX,
        ALC298_FIXUP_DELL1_MIC_NO_PRESENCE,
@@ -4630,6 +4635,10 @@ static const struct hda_fixup alc269_fixups[] = {
                        { }
                },
        },
+       [ALC269_FIXUP_LIFEBOOK_NO_HP_TO_LINEOUT] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = alc269_fixup_pincfg_no_hp_to_lineout,
+       },
        [ALC269_FIXUP_AMIC] = {
                .type = HDA_FIXUP_PINS,
                .v.pins = (const struct hda_pintbl[]) {
@@ -4758,6 +4767,15 @@ static const struct hda_fixup alc269_fixups[] = {
                .type = HDA_FIXUP_FUNC,
                .v.func = alc_fixup_headset_mode_no_hp_mic,
        },
+       [ALC269_FIXUP_ASPIRE_HEADSET_MIC] = {
+               .type = HDA_FIXUP_PINS,
+               .v.pins = (const struct hda_pintbl[]) {
+                       { 0x19, 0x01a1913c }, /* headset mic w/o jack detect */
+                       { }
+               },
+               .chained = true,
+               .chain_id = ALC269_FIXUP_HEADSET_MODE,
+       },
        [ALC286_FIXUP_SONY_MIC_NO_PRESENCE] = {
                .type = HDA_FIXUP_PINS,
                .v.pins = (const struct hda_pintbl[]) {
@@ -4960,6 +4978,12 @@ static const struct hda_fixup alc269_fixups[] = {
                .chain_id = ALC269_FIXUP_HEADSET_MODE
        },
        [ALC292_FIXUP_TPT440_DOCK] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = alc269_fixup_pincfg_no_hp_to_lineout,
+               .chained = true,
+               .chain_id = ALC292_FIXUP_TPT440_DOCK2
+       },
+       [ALC292_FIXUP_TPT440_DOCK2] = {
                .type = HDA_FIXUP_PINS,
                .v.pins = (const struct hda_pintbl[]) {
                        { 0x16, 0x21211010 }, /* dock headphone */
@@ -5046,9 +5070,23 @@ static const struct hda_fixup alc269_fixups[] = {
                .chained = true,
                .chain_id = ALC288_FIXUP_DELL1_MIC_NO_PRESENCE
        },
+       [ALC288_FIXUP_DISABLE_AAMIX] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = alc_fixup_disable_aamix,
+               .chained = true,
+               .chain_id = ALC288_FIXUP_DELL_XPS_13_GPIO6
+       },
+       [ALC288_FIXUP_DELL_XPS_13] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = alc_fixup_dell_xps13,
+               .chained = true,
+               .chain_id = ALC288_FIXUP_DISABLE_AAMIX
+       },
        [ALC292_FIXUP_DISABLE_AAMIX] = {
                .type = HDA_FIXUP_FUNC,
                .v.func = alc_fixup_disable_aamix,
+               .chained = true,
+               .chain_id = ALC269_FIXUP_DELL2_MIC_NO_PRESENCE
        },
        [ALC292_FIXUP_DELL_E7X] = {
                .type = HDA_FIXUP_FUNC,
@@ -5073,6 +5111,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1025, 0x029b, "Acer 1810TZ", ALC269_FIXUP_INV_DMIC),
        SND_PCI_QUIRK(0x1025, 0x0349, "Acer AOD260", ALC269_FIXUP_INV_DMIC),
        SND_PCI_QUIRK(0x1025, 0x047c, "Acer AC700", ALC269_FIXUP_ACER_AC700),
+       SND_PCI_QUIRK(0x1025, 0x072d, "Acer Aspire V5-571G", ALC269_FIXUP_ASPIRE_HEADSET_MIC),
+       SND_PCI_QUIRK(0x1025, 0x080d, "Acer Aspire V5-122P", ALC269_FIXUP_ASPIRE_HEADSET_MIC),
        SND_PCI_QUIRK(0x1025, 0x0740, "Acer AO725", ALC271_FIXUP_HP_GATE_MIC_JACK),
        SND_PCI_QUIRK(0x1025, 0x0742, "Acer AO756", ALC271_FIXUP_HP_GATE_MIC_JACK),
        SND_PCI_QUIRK(0x1025, 0x0775, "Acer Aspire E1-572", ALC271_FIXUP_HP_GATE_MIC_JACK_E1_572),
@@ -5086,10 +5126,11 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1028, 0x05f6, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1028, 0x0615, "Dell Vostro 5470", ALC290_FIXUP_SUBWOOFER_HSJACK),
        SND_PCI_QUIRK(0x1028, 0x0616, "Dell Vostro 5470", ALC290_FIXUP_SUBWOOFER_HSJACK),
+       SND_PCI_QUIRK(0x1028, 0x062e, "Dell Latitude E7450", ALC292_FIXUP_DELL_E7X),
        SND_PCI_QUIRK(0x1028, 0x0638, "Dell Inspiron 5439", ALC290_FIXUP_MONO_SPEAKERS_HSJACK),
        SND_PCI_QUIRK(0x1028, 0x064a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1028, 0x064b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
-       SND_PCI_QUIRK(0x1028, 0x0665, "Dell XPS 13", ALC292_FIXUP_DELL_E7X),
+       SND_PCI_QUIRK(0x1028, 0x0665, "Dell XPS 13", ALC288_FIXUP_DELL_XPS_13),
        SND_PCI_QUIRK(0x1028, 0x06c7, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1028, 0x06d9, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1028, 0x06da, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
@@ -5173,6 +5214,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x104d, 0x9084, "Sony VAIO", ALC275_FIXUP_SONY_HWEQ),
        SND_PCI_QUIRK(0x104d, 0x9099, "Sony VAIO S13", ALC275_FIXUP_SONY_DISABLE_AAMIX),
        SND_PCI_QUIRK(0x10cf, 0x1475, "Lifebook", ALC269_FIXUP_LIFEBOOK),
+       SND_PCI_QUIRK(0x10cf, 0x159f, "Lifebook E780", ALC269_FIXUP_LIFEBOOK_NO_HP_TO_LINEOUT),
        SND_PCI_QUIRK(0x10cf, 0x15dc, "Lifebook T731", ALC269_FIXUP_LIFEBOOK_HP_PIN),
        SND_PCI_QUIRK(0x10cf, 0x1757, "Lifebook E752", ALC269_FIXUP_LIFEBOOK_HP_PIN),
        SND_PCI_QUIRK(0x10cf, 0x1845, "Lifebook U904", ALC269_FIXUP_LIFEBOOK_EXTMIC),
index 0521be8d46a81c08f6580e57a5a495066f22d977..da5366405eda55a6eccbca0e14bce5631c851cdc 100644 (file)
@@ -241,7 +241,9 @@ static int via_pin_power_ctl_get(struct snd_kcontrol *kcontrol,
                                 struct snd_ctl_elem_value *ucontrol)
 {
        struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
-       ucontrol->value.enumerated.item[0] = codec->power_save_node;
+       struct via_spec *spec = codec->spec;
+
+       ucontrol->value.enumerated.item[0] = spec->gen.power_down_unused;
        return 0;
 }
 
@@ -252,9 +254,9 @@ static int via_pin_power_ctl_put(struct snd_kcontrol *kcontrol,
        struct via_spec *spec = codec->spec;
        bool val = !!ucontrol->value.enumerated.item[0];
 
-       if (val == codec->power_save_node)
+       if (val == spec->gen.power_down_unused)
                return 0;
-       codec->power_save_node = val;
+       /* codec->power_save_node = val; */ /* widget PM seems yet broken */
        spec->gen.power_down_unused = val;
        analog_low_current_mode(codec);
        return 1;