Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma

author Linus Torvalds <torvalds@linux-foundation.org>

Tue, 22 Mar 2016 22:48:44 +0000 (15:48 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Tue, 22 Mar 2016 22:48:44 +0000 (15:48 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Tue, 22 Mar 2016 22:48:44 +0000 (15:48 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Tue, 22 Mar 2016 22:48:44 +0000 (15:48 -0700)
diff --combined MAINTAINERS

index 0f3063cce44cff0e957c86ef30634433e8c96b13,c62557e6893edef161242c4417c2a38d71951263..32bafda47c2fedacaf2f5b4a71347cc5889dc773
--- 1/MAINTAINERS
--- 2/MAINTAINERS
+++ b/MAINTAINERS
@@@ -151,7 -151,7 +151,7 @@@ S: Maintaine
   F:    drivers/scsi/53c700*
   
   6LOWPAN GENERIC (BTLE/IEEE 802.15.4)
- -M:    Alexander Aring <alex.aring@gmail.com>
+ +M:    Alexander Aring <aar@pengutronix.de>
   M:    Jukka Rissanen <jukka.rissanen@linux.intel.com>
   L:    linux-bluetooth@vger.kernel.org
   L:    linux-wpan@vger.kernel.org
@@@ -238,12 -238,6 +238,12 @@@ L:       lm-sensors@lm-sensors.or
   S:    Maintained
   F:    drivers/hwmon/abituguru3.c
   
+ +ACCES 104-DIO-48E GPIO DRIVER
+ +M:    William Breathitt Gray <vilhelm.gray@gmail.com>
+ +L:    linux-gpio@vger.kernel.org
+ +S:    Maintained
+ +F:    drivers/gpio/gpio-104-dio-48e.c
+ +
   ACCES 104-IDI-48 GPIO DRIVER
   M:    "William Breathitt Gray" <vilhelm.gray@gmail.com>
   L:    linux-gpio@vger.kernel.org
@@@ -679,19 -673,11 +679,19 @@@ F:      drivers/gpu/drm/radeon/radeon_kfd.
   F:    drivers/gpu/drm/radeon/radeon_kfd.h
   F:    include/uapi/linux/kfd_ioctl.h
   
+ +AMD SEATTLE DEVICE TREE SUPPORT
+ +M:    Brijesh Singh <brijeshkumar.singh@amd.com>
+ +M:    Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
+ +M:    Tom Lendacky <thomas.lendacky@amd.com>
+ +S:    Supported
+ +F:    arch/arm64/boot/dts/amd/
+ +
   AMD XGBE DRIVER
   M:    Tom Lendacky <thomas.lendacky@amd.com>
   L:    netdev@vger.kernel.org
   S:    Supported
   F:    drivers/net/ethernet/amd/xgbe/
+ +F:    arch/arm64/boot/dts/amd/amd-seattle-xgbe*.dtsi
   
   AMS (Apple Motion Sensor) DRIVER
   M:    Michael Hanselmann <linux-kernel@hansmi.ch>
@@@ -783,12 -769,6 +783,12 @@@ L:       alsa-devel@alsa-project.org (moderat
   S:    Maintained
   F:    sound/aoa/
   
+ +APEX EMBEDDED SYSTEMS STX104 DAC DRIVER
+ +M:    William Breathitt Gray <vilhelm.gray@gmail.com>
+ +L:    linux-iio@vger.kernel.org
+ +S:    Maintained
+ +F:    drivers/iio/dac/stx104.c
+ +
   APM DRIVER
   M:    Jiri Kosina <jikos@kernel.org>
   S:    Odd fixes
@@@ -847,12 -827,6 +847,12 @@@ S:       Maintaine
   F:    drivers/net/arcnet/
   F:    include/uapi/linux/if_arcnet.h
   
+ +ARM HDLCD DRM DRIVER
+ +M:    Liviu Dudau <liviu.dudau@arm.com>
+ +S:    Supported
+ +F:    drivers/gpu/drm/arm/
+ +F:    Documentation/devicetree/bindings/display/arm,hdlcd.txt
+ +
   ARM MFM AND FLOPPY DRIVERS
   M:    Ian Molton <spyro@f2s.com>
   S:    Maintained
@@@ -965,16 -939,6 +965,16 @@@ F:       arch/arm/boot/dts/alpine
   F:    arch/arm64/boot/dts/al/
   F:    drivers/*/*alpine*
   
+ +ARM/ARTPEC MACHINE SUPPORT
+ +M:    Jesper Nilsson <jesper.nilsson@axis.com>
+ +M:    Lars Persson <lars.persson@axis.com>
+ +M:    Niklas Cassel <niklas.cassel@axis.com>
+ +S:    Maintained
+ +L:    linux-arm-kernel@axis.com
+ +F:    arch/arm/mach-artpec
+ +F:    arch/arm/boot/dts/artpec6*
+ +F:    drivers/clk/clk-artpec6.c
+ +
   ARM/ATMEL AT91RM9200, AT91SAM9 AND SAMA5 SOC SUPPORT
   M:    Nicolas Ferre <nicolas.ferre@atmel.com>
   M:    Alexandre Belloni <alexandre.belloni@free-electrons.com>
@@@ -1321,7 -1285,6 +1321,7 @@@ F:      arch/arm/mach-mvebu
   F:    drivers/rtc/rtc-armada38x.c
   F:    arch/arm/boot/dts/armada*
   F:    arch/arm/boot/dts/kirkwood*
+ +F:    arch/arm64/boot/dts/marvell/armada*
   
   
   ARM/Marvell Berlin SoC support
@@@ -1540,7 -1503,6 +1540,7 @@@ F:      arch/arm/mach-s5p*
   F:    arch/arm/mach-exynos*/
   F:    drivers/*/*s3c2410*
   F:    drivers/*/*/*s3c2410*
+ +F:    drivers/soc/samsung/*
   F:    drivers/spi/spi-s3c*
   F:    sound/soc/samsung/*
   F:    Documentation/arm/Samsung/
@@@ -1838,13 -1800,11 +1838,13 @@@ F:   drivers/edac/synopsys_edac.
   
   ARM SMMU DRIVERS
   M:    Will Deacon <will.deacon@arm.com>
+ +R:    Robin Murphy <robin.murphy@arm.com>
   L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
   S:    Maintained
   F:    drivers/iommu/arm-smmu.c
   F:    drivers/iommu/arm-smmu-v3.c
   F:    drivers/iommu/io-pgtable-arm.c
+ +F:    drivers/iommu/io-pgtable-arm-v7s.c
   
   ARM64 PORT (AARCH64 ARCHITECTURE)
   M:    Catalin Marinas <catalin.marinas@arm.com>
@@@ -1996,12 -1956,6 +1996,12 @@@ M:    Nicolas Ferre <nicolas.ferre@atmel.c
   S:    Supported
   F:    drivers/tty/serial/atmel_serial.c
   
+ +ATMEL SAMA5D2 ADC DRIVER
+ +M:    Ludovic Desroches <ludovic.desroches@atmel.com>
+ +L:    linux-iio@vger.kernel.org
+ +S:    Supported
+ +F:    drivers/iio/adc/at91-sama5d2_adc.c
+ +
   ATMEL Audio ALSA driver
   M:    Nicolas Ferre <nicolas.ferre@atmel.com>
   L:    alsa-devel@alsa-project.org (moderated for non-subscribers)
@@@ -2204,8 -2158,7 +2204,8 @@@ M:      Marek Lindner <mareklindner@neomailb
   M:    Simon Wunderlich <sw@simonwunderlich.de>
   M:    Antonio Quartulli <a@unstable.cc>
   L:    b.a.t.m.a.n@lists.open-mesh.org
- -W:    http://www.open-mesh.org/
+ +W:    https://www.open-mesh.org/
+ +Q:    https://patchwork.open-mesh.org/project/batman/list/
   S:    Maintained
   F:    net/batman-adv/
   
@@@ -2434,9 -2387,8 +2434,9 @@@ F:      arch/arm/boot/dts/bcm470
   
   BROADCOM BCM63XX ARM ARCHITECTURE
   M:    Florian Fainelli <f.fainelli@gmail.com>
- -L:    linux-arm-kernel@lists.infradead.org
- -T:    git git://github.com/broadcom/arm-bcm63xx.git
+ +L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+ +L:    bcm-kernel-feedback-list@broadcom.com
+ +T:    git git://github.com/broadcom/stblinux.git
   S:    Maintained
   F:    arch/arm/mach-bcm/bcm63xx.c
   F:    arch/arm/include/debug/bcm63xx.S
@@@ -2470,14 -2422,12 +2470,14 @@@ F:   arch/mips/bmips/
   F:    arch/mips/include/asm/mach-bmips/*
   F:    arch/mips/kernel/*bmips*
   F:    arch/mips/boot/dts/brcm/bcm*.dts*
+ +F:    drivers/irqchip/irq-bcm63*
   F:    drivers/irqchip/irq-bcm7*
   F:    drivers/irqchip/irq-brcmstb*
   F:    include/linux/bcm963xx_nvram.h
   F:    include/linux/bcm963xx_tag.h
   
   BROADCOM TG3 GIGABIT ETHERNET DRIVER
+ +M:    Siva Reddy Kallam <siva.kallam@broadcom.com>
   M:    Prashant Sreedharan <prashant@broadcom.com>
   M:    Michael Chan <mchan@broadcom.com>
   L:    netdev@vger.kernel.org
@@@ -2569,13 -2519,6 +2569,13 @@@ L:    netdev@vger.kernel.or
   S:    Supported
   F:    drivers/net/ethernet/broadcom/bcmsysport.*
   
+ +BROADCOM VULCAN ARM64 SOC
+ +M:    Jayachandran C. <jchandra@broadcom.com>
+ +L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+ +L:    bcm-kernel-feedback-list@broadcom.com
+ +S:    Maintained
+ +F:    arch/arm64/boot/dts/broadcom/vulcan*
+ +
   BROCADE BFA FC SCSI DRIVER
   M:    Anil Gurumurthy <anil.gurumurthy@qlogic.com>
   M:    Sudarsana Kalluru <sudarsana.kalluru@qlogic.com>
@@@ -3561,14 -3504,6 +3561,14 @@@ F:    include/linux/device-mapper.
   F:    include/linux/dm-*.h
   F:    include/uapi/linux/dm-*.h
   
+ +DEVLINK
+ +M:    Jiri Pirko <jiri@mellanox.com>
+ +L:    netdev@vger.kernel.org
+ +S:    Supported
+ +F:    net/core/devlink.c
+ +F:    include/net/devlink.h
+ +F:    include/uapi/linux/devlink.h
+ +
   DIALOG SEMICONDUCTOR DRIVERS
   M:    Support Opensource <support.opensource@diasemi.com>
   W:    http://www.dialog-semiconductor.com/products
@@@ -3606,6 -3541,13 +3606,6 @@@ L:     driverdev-devel@linuxdriverproject.o
   S:    Maintained
   F:    drivers/staging/dgnc/
   
- -DIGI EPCA PCI PRODUCTS
- -M:    Lidza Louina <lidza.louina@gmail.com>
- -M:    Daeseok Youn <daeseok.youn@gmail.com>
- -L:    driverdev-devel@linuxdriverproject.org
- -S:    Maintained
- -F:    drivers/staging/dgap/
- -
   DIOLAN U2C-12 I2C DRIVER
   M:    Guenter Roeck <linux@roeck-us.net>
   L:    linux-i2c@vger.kernel.org
@@@ -3762,7 -3704,7 +3762,7 @@@ F:      drivers/gpu/vga
   F:    include/drm/
   F:    include/uapi/drm/
   
- -RADEON DRM DRIVERS
+ +RADEON and AMDGPU DRM DRIVERS
   M:    Alex Deucher <alexander.deucher@amd.com>
   M:    Christian König <christian.koenig@amd.com>
   L:    dri-devel@lists.freedesktop.org
@@@ -3770,8 -3712,6 +3770,8 @@@ T:      git git://people.freedesktop.org/~ag
   S:    Supported
   F:    drivers/gpu/drm/radeon/
   F:    include/uapi/drm/radeon*
+ +F:    drivers/gpu/drm/amd/
+ +F:    include/uapi/drm/amdgpu*
   
   DRM PANEL DRIVERS
   M:    Thierry Reding <thierry.reding@gmail.com>
@@@ -3816,7 -3756,7 +3816,7 @@@ F:      include/drm/exynos
   F:    include/uapi/drm/exynos*
   
   DRM DRIVERS FOR FREESCALE DCU
- -M:    Jianwei Wang <jianwei.wang.chn@gmail.com>
+ +M:    Stefan Agner <stefan@agner.ch>
   M:    Alison Wang <alison.wang@freescale.com>
   L:    dri-devel@lists.freedesktop.org
   S:    Supported
@@@ -4288,6 -4228,13 +4288,6 @@@ M:     Maxim Levitsky <maximlevitsky@gmail.
   S:    Maintained
   F:    drivers/media/rc/ene_ir.*
   
- -ENHANCED ERROR HANDLING (EEH)
- -M:    Gavin Shan <shangw@linux.vnet.ibm.com>
- -L:    linuxppc-dev@lists.ozlabs.org
- -S:    Supported
- -F:    Documentation/powerpc/eeh-pci-error-recovery.txt
- -F:    arch/powerpc/kernel/eeh*.c
- -
   EPSON S1D13XXX FRAMEBUFFER DRIVER
   M:    Kristoffer Ericson <kristoffer.ericson@gmail.com>
   S:    Maintained
@@@ -4364,12 -4311,6 +4364,12 @@@ L:    dri-devel@lists.freedesktop.or
   S:    Maintained
   F:    drivers/gpu/drm/exynos/exynos_dp*
   
+ +EXYNOS SYSMMU (IOMMU) driver
+ +M:    Marek Szyprowski <m.szyprowski@samsung.com>
+ +L:    iommu@lists.linux-foundation.org
+ +S:    Maintained
+ +F:    drivers/iommu/exynos-iommu.c
+ +
   EXYNOS MIPI DISPLAY DRIVERS
   M:    Inki Dae <inki.dae@samsung.com>
   M:    Donghwa Lee <dh09.lee@samsung.com>
@@@ -4577,12 -4518,6 +4577,12 @@@ L:    linuxppc-dev@lists.ozlabs.or
   S:    Maintained
   F:    drivers/dma/fsldma.*
   
+ +FREESCALE GPMI NAND DRIVER
+ +M:    Han Xu <han.xu@nxp.com>
+ +L:    linux-mtd@lists.infradead.org
+ +S:    Maintained
+ +F:    drivers/mtd/nand/gpmi-nand/*
+ +
   FREESCALE I2C CPM DRIVER
   M:    Jochen Friedrich <jochen@scram.de>
   L:    linuxppc-dev@lists.ozlabs.org
@@@ -4599,7 -4534,7 +4599,7 @@@ F:      include/linux/platform_data/video-im
   F:    drivers/video/fbdev/imxfb.c
   
   FREESCALE QUAD SPI DRIVER
- -M:    Han Xu <han.xu@freescale.com>
+ +M:    Han Xu <han.xu@nxp.com>
   L:    linux-mtd@lists.infradead.org
   S:    Maintained
   F:    drivers/mtd/spi-nor/fsl-quadspi.c
@@@ -4613,15 -4548,6 +4613,15 @@@ S:    Maintaine
   F:    drivers/net/ethernet/freescale/fs_enet/
   F:    include/linux/fs_enet_pd.h
   
+ +FREESCALE IMX / MXC FEC DRIVER
+ +M:    Fugang Duan <fugang.duan@nxp.com>
+ +L:    netdev@vger.kernel.org
+ +S:    Maintained
+ +F:    drivers/net/ethernet/freescale/fec_main.c
+ +F:    drivers/net/ethernet/freescale/fec_ptp.c
+ +F:    drivers/net/ethernet/freescale/fec.h
+ +F:    Documentation/devicetree/bindings/net/fsl-fec.txt
+ +
   FREESCALE QUICC ENGINE LIBRARY
   L:    linuxppc-dev@lists.ozlabs.org
   S:    Orphan
@@@ -4885,14 -4811,10 +4885,14 @@@ L:   linux-gpio@vger.kernel.or
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-gpio.git
   S:    Maintained
   F:    Documentation/gpio/
+ +F:    Documentation/ABI/testing/gpio-cdev
+ +F:    Documentation/ABI/obsolete/sysfs-gpio
   F:    drivers/gpio/
   F:    include/linux/gpio/
   F:    include/linux/gpio.h
   F:    include/asm-generic/gpio.h
+ +F:    include/uapi/linux/gpio.h
+ +F:    tools/gpio/
   
   GRE DEMULTIPLEXER DRIVER
   M:    Dmitry Kozlov <xeb@mail.ru>
@@@ -5041,7 -4963,6 +5041,7 @@@ F:      include/linux/hw_random.
   
   HARDWARE SPINLOCK CORE
   M:    Ohad Ben-Cohen <ohad@wizery.com>
+ +M:    Bjorn Andersson <bjorn.andersson@linaro.org>
   S:    Maintained
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/ohad/hwspinlock.git
   F:    Documentation/hwspinlock.txt
@@@ -5063,10 -4984,16 +5063,10 @@@ T:   git git://linuxtv.org/anttip/media_t
   S:    Maintained
   F:    drivers/media/dvb-frontends/hd29l2*
   
- -HEWLETT-PACKARD SMART2 RAID DRIVER
- -L:    iss_storagedev@hp.com
- -S:    Orphan
- -F:    Documentation/blockdev/cpqarray.txt
- -F:    drivers/block/cpqarray.*
- -
   HEWLETT-PACKARD SMART ARRAY RAID DRIVER (hpsa)
- -M:    Don Brace <don.brace@pmcs.com>
+ +M:    Don Brace <don.brace@microsemi.com>
   L:    iss_storagedev@hp.com
- -L:    storagedev@pmcs.com
+ +L:    esc.storagedev@microsemi.com
   L:    linux-scsi@vger.kernel.org
   S:    Supported
   F:    Documentation/scsi/hpsa.txt
@@@ -5075,9 -5002,9 +5075,9 @@@ F:      include/linux/cciss*.
   F:    include/uapi/linux/cciss*.h
   
   HEWLETT-PACKARD SMART CISS RAID DRIVER (cciss)
- -M:    Don Brace <don.brace@pmcs.com>
+ +M:    Don Brace <don.brace@microsemi.com>
   L:    iss_storagedev@hp.com
- -L:    storagedev@pmcs.com
+ +L:    esc.storagedev@microsemi.com
   L:    linux-scsi@vger.kernel.org
   S:    Supported
   F:    Documentation/blockdev/cciss.txt
@@@ -5262,7 -5189,6 +5262,7 @@@ F:      arch/x86/kernel/cpu/mshyperv.
   F:    drivers/hid/hid-hyperv.c
   F:    drivers/hv/
   F:    drivers/input/serio/hyperv-keyboard.c
+ +F:    drivers/pci/host/pci-hyperv.c
   F:    drivers/net/hyperv/
   F:    drivers/scsi/storvsc_drv.c
   F:    drivers/video/fbdev/hyperv_fb.c
@@@ -5270,16 -5196,6 +5270,16 @@@ F:    include/linux/hyperv.
   F:    tools/hv/
   F:    Documentation/ABI/stable/sysfs-bus-vmbus
   
+ +I2C MUXES
+ +M:    Peter Rosin <peda@axentia.se>
+ +L:    linux-i2c@vger.kernel.org
+ +S:    Maintained
+ +F:    Documentation/i2c/muxes/
+ +F:    Documentation/devicetree/bindings/i2c/i2c-mux*
+ +F:    drivers/i2c/i2c-mux.c
+ +F:    drivers/i2c/muxes/
+ +F:    include/linux/i2c-mux.h
+ +
   I2C OVER PARALLEL PORT
   M:    Jean Delvare <jdelvare@suse.com>
   L:    linux-i2c@vger.kernel.org
@@@ -5504,11 -5420,10 +5504,11 @@@ S:   Supporte
   F:    drivers/idle/i7300_idle.c
   
   IEEE 802.15.4 SUBSYSTEM
- -M:    Alexander Aring <alex.aring@gmail.com>
+ +M:    Alexander Aring <aar@pengutronix.de>
   L:    linux-wpan@vger.kernel.org
- -W:    https://github.com/linux-wpan
- -T:    git git://github.com/linux-wpan/linux-wpan-next.git
+ +W:    http://wpan.cakelab.org/
+ +T:    git git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth.git
+ +T:    git git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth-next.git
   S:    Maintained
   F:    net/ieee802154/
   F:    net/mac802154/
@@@ -5638,7 -5553,6 +5638,7 @@@ F:      drivers/input
   F:    include/linux/input.h
   F:    include/uapi/linux/input.h
   F:    include/linux/input/
+ +F:    Documentation/devicetree/bindings/input/
   
   INPUT MULTITOUCH (MT) PROTOCOL
   M:    Henrik Rydberg <rydberg@bitmath.org>
@@@ -5770,6 -5684,16 +5770,16 @@@ F:    Documentation/networking/i40evf.tx
   F:    drivers/net/ethernet/intel/
   F:    drivers/net/ethernet/intel/*/
   
+ INTEL RDMA RNIC DRIVER
+ M:     Faisal Latif <faisal.latif@intel.com>
+ R:     Chien Tin Tung <chien.tin.tung@intel.com>
+ R:     Mustafa Ismail <mustafa.ismail@intel.com>
+ R:     Shiraz Saleem <shiraz.saleem@intel.com>
+ R:     Tatyana Nikolova <tatyana.e.nikolova@intel.com>
+ L:     linux-rdma@vger.kernel.org
+ S:     Supported
+ F:     drivers/infiniband/hw/i40iw/
+ 
   INTEL-MID GPIO DRIVER
   M:    David Cohen <david.a.cohen@linux.intel.com>
   L:    linux-gpio@vger.kernel.org
@@@ -5833,7 -5757,6 +5843,7 @@@ S:      Supporte
   F:    include/uapi/linux/mei.h
   F:    include/linux/mei_cl_bus.h
   F:    drivers/misc/mei/*
+ +F:    drivers/watchdog/mei_wdt.c
   F:    Documentation/misc-devices/mei/*
   
   INTEL MIC DRIVERS (mic)
@@@ -6136,7 -6059,7 +6146,7 @@@ S:      Maintaine
   F:    drivers/media/platform/rcar_jpu.c
   
   JSM Neo PCI based serial card
- -M:    Thadeu Lima de Souza Cascardo <cascardo@linux.vnet.ibm.com>
+ +M:    Gabriel Krisman Bertazi <krisman@linux.vnet.ibm.com>
   L:    linux-serial@vger.kernel.org
   S:    Maintained
   F:    drivers/tty/serial/jsm/
@@@ -6654,10 -6577,9 +6664,10 @@@ F:    drivers/platform/x86/hp_accel.
   
   LIVE PATCHING
   M:    Josh Poimboeuf <jpoimboe@redhat.com>
- -M:    Seth Jennings <sjenning@redhat.com>
+ +M:    Jessica Yu <jeyu@redhat.com>
   M:    Jiri Kosina <jikos@kernel.org>
- -M:    Vojtech Pavlik <vojtech@suse.com>
+ +M:    Miroslav Benes <mbenes@suse.cz>
+ +R:    Petr Mladek <pmladek@suse.com>
   S:    Maintained
   F:    kernel/livepatch/
   F:    include/linux/livepatch.h
@@@ -6668,11 -6590,6 +6678,11 @@@ F:    samples/livepatch
   L:    live-patching@vger.kernel.org
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/jikos/livepatching.git
   
+ +LINUX KERNEL DUMP TEST MODULE (LKDTM)
+ +M:    Kees Cook <keescook@chromium.org>
+ +S:    Maintained
+ +F:    drivers/misc/lkdtm.c
+ +
   LLC (802.2)
   M:    Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
   S:    Maintained
@@@ -6758,12 -6675,13 +6768,12 @@@ S:   Maintaine
   F:    arch/arm/mach-lpc32xx/
   
   LSILOGIC MPT FUSION DRIVERS (FC/SAS/SPI)
- -M:    Nagalakshmi Nandigama <nagalakshmi.nandigama@avagotech.com>
- -M:    Praveen Krishnamoorthy <praveen.krishnamoorthy@avagotech.com>
- -M:    Sreekanth Reddy <sreekanth.reddy@avagotech.com>
- -M:    Abhijit Mahajan <abhijit.mahajan@avagotech.com>
- -L:    MPT-FusionLinux.pdl@avagotech.com
+ +M:    Sathya Prakash <sathya.prakash@broadcom.com>
+ +M:    Chaitra P B <chaitra.basappa@broadcom.com>
+ +M:    Suganath Prabu Subramani <suganath-prabu.subramani@broadcom.com>
+ +L:    MPT-FusionLinux.pdl@broadcom.com
   L:    linux-scsi@vger.kernel.org
- -W:    http://www.lsilogic.com/support
+ +W:    http://www.avagotech.com/support/
   S:    Supported
   F:    drivers/message/fusion/
   F:    drivers/scsi/mpt2sas/
@@@ -6856,7 -6774,6 +6866,7 @@@ S:      Maintaine
   F:    Documentation/networking/mac80211-injection.txt
   F:    include/net/mac80211.h
   F:    net/mac80211/
+ +F:    drivers/net/wireless/mac80211_hwsim.[ch]
   
   MACVLAN DRIVER
   M:    Patrick McHardy <kaber@trash.net>
@@@ -6986,7 -6903,7 +6996,7 @@@ MAXIM MAX77802 MULTIFUNCTION PMIC DEVIC
   M:    Javier Martinez Canillas <javier@osg.samsung.com>
   L:    linux-kernel@vger.kernel.org
   S:    Supported
- -F:    drivers/*/*max77802.c
+ +F:    drivers/*/*max77802*.c
   F:    Documentation/devicetree/bindings/*/*max77802.txt
   F:    include/dt-bindings/*/*max77802.h
   
@@@ -6996,7 -6913,7 +7006,7 @@@ M:      Krzysztof Kozlowski <k.kozlowski@sam
   L:    linux-kernel@vger.kernel.org
   S:    Supported
   F:    drivers/*/max14577.c
- -F:    drivers/*/max77686.c
+ +F:    drivers/*/max77686*.c
   F:    drivers/*/max77693.c
   F:    drivers/extcon/extcon-max14577.c
   F:    drivers/extcon/extcon-max77693.c
@@@ -7101,13 -7018,6 +7111,13 @@@ F:    include/uapi/linux/meye.
   F:    include/uapi/linux/ivtv*
   F:    include/uapi/linux/uvcvideo.h
   
+ +MEDIATEK ETHERNET DRIVER
+ +M:    Felix Fietkau <nbd@openwrt.org>
+ +M:    John Crispin <blogic@openwrt.org>
+ +L:    netdev@vger.kernel.org
+ +S:    Maintained
+ +F:    drivers/net/ethernet/mediatek/
+ +
   MEDIATEK MT7601U WIRELESS LAN DRIVER
   M:    Jakub Kicinski <kubakici@wp.pl>
   L:    linux-wireless@vger.kernel.org
@@@ -7319,8 -7229,10 +7329,8 @@@ L:     linux-media@vger.kernel.or
   W:    https://linuxtv.org
   W:    http://palosaari.fi/linux/
   Q:    http://patchwork.linuxtv.org/project/linux-media/list/
- -T:    git git://linuxtv.org/anttip/media_tree.git
   S:    Maintained
- -F:    drivers/staging/media/mn88473/
- -F:    drivers/media/dvb-frontends/mn88473.h
+ +F:    drivers/media/dvb-frontends/mn88473*
   
   MODULE SUPPORT
   M:    Rusty Russell <rusty@rustcorp.com.au>
@@@ -7481,17 -7393,6 +7491,17 @@@ W:    https://www.myricom.com/support/down
   S:    Supported
   F:    drivers/net/ethernet/myricom/myri10ge/
   
+ +NAND FLASH SUBSYSTEM
+ +M:    Boris Brezillon <boris.brezillon@free-electrons.com>
+ +R:    Richard Weinberger <richard@nod.at>
+ +L:    linux-mtd@lists.infradead.org
+ +W:    http://www.linux-mtd.infradead.org/
+ +Q:    http://patchwork.ozlabs.org/project/linux-mtd/list/
+ +T:    git git://github.com/linux-nand/linux.git
+ +S:    Maintained
+ +F:    drivers/mtd/nand/
+ +F:    include/linux/mtd/nand*.h
+ +
   NATSEMI ETHERNET DRIVER (DP8381x)
   S:    Orphan
   F:    drivers/net/ethernet/natsemi/natsemi.c
@@@ -7605,6 -7506,7 +7615,6 @@@ F:      net/netrom
   
   NETRONOME ETHERNET DRIVERS
   M:    Jakub Kicinski <jakub.kicinski@netronome.com>
- -M:    Rolf Neugebauer <rolf.neugebauer@netronome.com>
   L:    oss-drivers@netronome.com
   S:    Maintained
   F:    drivers/net/ethernet/netronome/
@@@ -7741,6 -7643,7 +7751,6 @@@ F:      net/nfc
   F:    include/net/nfc/
   F:    include/uapi/linux/nfc.h
   F:    drivers/nfc/
- -F:    include/linux/platform_data/microread.h
   F:    include/linux/platform_data/nfcmrvl.h
   F:    include/linux/platform_data/nxp-nci.h
   F:    include/linux/platform_data/pn544.h
@@@ -7891,11 -7794,6 +7901,11 @@@ L:    alsa-devel@alsa-project.org (moderat
   S:    Maintained
   F:    sound/soc/codecs/tfa9879*
   
+ +OBJTOOL
+ +M:    Josh Poimboeuf <jpoimboe@redhat.com>
+ +S:    Supported
+ +F:    tools/objtool/
+ +
   OMAP SUPPORT
   M:    Tony Lindgren <tony@atomide.com>
   L:    linux-omap@vger.kernel.org
@@@ -7946,7 -7844,7 +7956,7 @@@ S:      Maintaine
   F:    arch/arm/*omap*/*clock*
   
   OMAP POWER MANAGEMENT SUPPORT
- -M:    Kevin Hilman <khilman@deeprootsystems.com>
+ +M:    Kevin Hilman <khilman@kernel.org>
   L:    linux-omap@vger.kernel.org
   S:    Maintained
   F:    arch/arm/*omap*/*pm*
@@@ -8050,7 -7948,7 +8060,7 @@@ F:      arch/arm/*omap*/usb
   OMAP GPIO DRIVER
   M:    Grygorii Strashko <grygorii.strashko@ti.com>
   M:    Santosh Shilimkar <ssantosh@kernel.org>
- -M:    Kevin Hilman <khilman@deeprootsystems.com>
+ +M:    Kevin Hilman <khilman@kernel.org>
   L:    linux-omap@vger.kernel.org
   S:    Maintained
   F:    Documentation/devicetree/bindings/gpio/gpio-omap.txt
@@@ -8269,13 -8167,6 +8279,13 @@@ S:    Maintaine
   F:    Documentation/mn10300/
   F:    arch/mn10300/
   
+ +PARALLEL LCD/KEYPAD PANEL DRIVER
+ +M:      Willy Tarreau <willy@haproxy.com>
+ +M:      Ksenija Stanojevic <ksenija.stanojevic@gmail.com>
+ +S:      Odd Fixes
+ +F:      Documentation/misc-devices/lcd-panel-cgram.txt
+ +F:      drivers/misc/panel.c
+ +
   PARALLEL PORT SUBSYSTEM
   M:    Sudip Mukherjee <sudipm.mukherjee@gmail.com>
   M:    Sudip Mukherjee <sudip@vectorindia.org>
@@@ -8367,15 -8258,6 +8377,15 @@@ L:    linux-pci@vger.kernel.or
   S:    Supported
   F:    Documentation/PCI/pci-error-recovery.txt
   
+ +PCI ENHANCED ERROR HANDLING (EEH) FOR POWERPC
+ +M:    Russell Currey <ruscur@russell.cc>
+ +L:    linuxppc-dev@lists.ozlabs.org
+ +S:    Supported
+ +F:    Documentation/powerpc/eeh-pci-error-recovery.txt
+ +F:    arch/powerpc/kernel/eeh*.c
+ +F:    arch/powerpc/platforms/*/eeh*.c
+ +F:    arch/powerpc/include/*/eeh*.h
+ +
   PCI SUBSYSTEM
   M:    Bjorn Helgaas <bhelgaas@google.com>
   L:    linux-pci@vger.kernel.org
@@@ -8483,20 -8365,12 +8493,20 @@@ L:   linux-pci@vger.kernel.or
   S:    Maintained
   F:    drivers/pci/host/*designware*
   
+ +PCI DRIVER FOR SYNOPSYS PROTOTYPING DEVICE
+ +M:    Joao Pinto <jpinto@synopsys.com>
+ +L:    linux-pci@vger.kernel.org
+ +S:    Maintained
+ +F:    Documentation/devicetree/bindings/pci/designware-pcie.txt
+ +F:    drivers/pci/host/pcie-designware-plat.c
+ +
   PCI DRIVER FOR GENERIC OF HOSTS
   M:    Will Deacon <will.deacon@arm.com>
   L:    linux-pci@vger.kernel.org
   L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
   S:    Maintained
   F:    Documentation/devicetree/bindings/pci/host-generic-pci.txt
+ +F:    drivers/pci/host/pci-host-common.c
   F:    drivers/pci/host/pci-host-generic.c
   
   PCI DRIVER FOR INTEL VOLUME MANAGEMENT DEVICE (VMD)
@@@ -8542,14 -8416,6 +8552,14 @@@ L:     linux-arm-msm@vger.kernel.or
   S:     Maintained
   F:     drivers/pci/host/*qcom*
   
+ +PCIE DRIVER FOR CAVIUM THUNDERX
+ +M:    David Daney <david.daney@cavium.com>
+ +L:    linux-pci@vger.kernel.org
+ +L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+ +S:    Supported
+ +F:    Documentation/devicetree/bindings/pci/pci-thunder-*
+ +F:    drivers/pci/host/pci-thunder-*
+ +
   PCMCIA SUBSYSTEM
   P:    Linux PCMCIA Team
   L:    linux-pcmcia@lists.infradead.org
@@@ -8575,7 -8441,7 +8585,7 @@@ F:      include/crypto/pcrypt.
   
   PER-CPU MEMORY ALLOCATOR
   M:    Tejun Heo <tj@kernel.org>
- -M:    Christoph Lameter <cl@linux-foundation.org>
+ +M:    Christoph Lameter <cl@linux.com>
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu.git
   S:    Maintained
   F:    include/linux/percpu*.h
@@@ -8592,7 -8458,6 +8602,7 @@@ PERFORMANCE EVENTS SUBSYSTE
   M:    Peter Zijlstra <peterz@infradead.org>
   M:    Ingo Molnar <mingo@redhat.com>
   M:    Arnaldo Carvalho de Melo <acme@kernel.org>
+ +R:    Alexander Shishkin <alexander.shishkin@linux.intel.com>
   L:    linux-kernel@vger.kernel.org
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf/core
   S:    Supported
@@@ -9215,15 -9080,17 +9225,21 @@@ S:   Maintaine
   F:    drivers/net/ethernet/rdc/r6040.c
   
   RDS - RELIABLE DATAGRAM SOCKETS
- -M:    Chien Yen <chien.yen@oracle.com>
+ +M:    Santosh Shilimkar <santosh.shilimkar@oracle.com>
+ +L:    netdev@vger.kernel.org
+ +L:    linux-rdma@vger.kernel.org
   L:    rds-devel@oss.oracle.com (moderated for non-subscribers)
+ +W:    https://oss.oracle.com/projects/rds/
   S:    Supported
   F:    net/rds/
+ +F:    Documentation/networking/rds.txt
   
+ RDMAVT - RDMA verbs software
+ M:    Dennis Dalessandro <dennis.dalessandro@intel.com>
+ L:    linux-rdma@vger.kernel.org
+ S:    Supported
+ F:    drivers/infiniband/sw/rdmavt
+ 
   READ-COPY UPDATE (RCU)
   M:    "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
   M:    Josh Triplett <josh@joshtriplett.org>
@@@ -9275,7 -9142,6 +9291,7 @@@ F:      include/linux/regmap.
   
   REMOTE PROCESSOR (REMOTEPROC) SUBSYSTEM
   M:    Ohad Ben-Cohen <ohad@wizery.com>
+ +M:    Bjorn Andersson <bjorn.andersson@linaro.org>
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/ohad/remoteproc.git
   S:    Maintained
   F:    drivers/remoteproc/
@@@ -9284,7 -9150,6 +9300,7 @@@ F:      include/linux/remoteproc.
   
   REMOTE PROCESSOR MESSAGING (RPMSG) SUBSYSTEM
   M:    Ohad Ben-Cohen <ohad@wizery.com>
+ +M:    Bjorn Andersson <bjorn.andersson@linaro.org>
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/ohad/rpmsg.git
   S:    Maintained
   F:    drivers/rpmsg/
@@@ -9623,7 -9488,6 +9639,7 @@@ F:      drivers/media/i2c/s5k5baf.
   
   SAMSUNG S3FWRN5 NFC DRIVER
   M:    Robert Baldyga <r.baldyga@samsung.com>
+ +M:    Krzysztof Opasiak <k.opasiak@samsung.com>
   L:    linux-nfc@lists.01.org (moderated for non-subscribers)
   S:    Supported
   F:    drivers/nfc/s3fwrn5
@@@ -9686,9 -9550,9 +9702,9 @@@ F: Documentation/devicetree/bindings/ne
   F: drivers/net/ethernet/synopsys/dwc_eth_qos.c
   
   SYNOPSYS DESIGNWARE I2C DRIVER
- -M:    Andy Shevchenko <andriy.shevchenko@linux.intel.com>
   M:    Jarkko Nikula <jarkko.nikula@linux.intel.com>
- -M:    Mika Westerberg <mika.westerberg@linux.intel.com>
+ +R:    Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+ +R:    Mika Westerberg <mika.westerberg@linux.intel.com>
   L:    linux-i2c@vger.kernel.org
   S:    Maintained
   F:    drivers/i2c/busses/i2c-designware-*
@@@ -9797,7 -9661,7 +9813,7 @@@ F:      drivers/scsi/sg.
   F:    include/scsi/sg.h
   
   SCSI SUBSYSTEM
- -M:    "James E.J. Bottomley" <JBottomley@odin.com>
+ +M:    "James E.J. Bottomley" <jejb@linux.vnet.ibm.com>
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi.git
   M:    "Martin K. Petersen" <martin.petersen@oracle.com>
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/mkp/scsi.git
@@@ -9854,12 -9718,10 +9870,12 @@@ S:   Maintaine
   F:    drivers/mmc/host/sdricoh_cs.c
   
   SECURE DIGITAL HOST CONTROLLER INTERFACE (SDHCI) DRIVER
+ +M:    Adrian Hunter <adrian.hunter@intel.com>
   L:    linux-mmc@vger.kernel.org
- -S:    Orphan
- -F:    drivers/mmc/host/sdhci.*
- -F:    drivers/mmc/host/sdhci-pltfm.[ch]
+ +T:    git git://git.infradead.org/users/ahunter/linux-sdhci.git
+ +S:    Maintained
+ +F:    drivers/mmc/host/sdhci*
+ +F:    include/linux/mmc/sdhci*
   
   SECURE COMPUTING
   M:    Kees Cook <keescook@chromium.org>
@@@ -10109,7 -9971,7 +10125,7 @@@ F:    arch/arm/mach-s3c24xx/bast-irq.
   
   TI DAVINCI MACHINE SUPPORT
   M:    Sekhar Nori <nsekhar@ti.com>
- -M:    Kevin Hilman <khilman@deeprootsystems.com>
+ +M:    Kevin Hilman <khilman@kernel.org>
   T:    git git://gitorious.org/linux-davinci/linux-davinci.git
   Q:    http://patchwork.kernel.org/project/linux-davinci/list/
   S:    Supported
@@@ -10311,7 -10173,7 +10327,7 @@@ F:   drivers/media/pci/solo6x10
   SOFTWARE RAID (Multiple Disks) SUPPORT
   M:    Shaohua Li <shli@kernel.org>
   L:    linux-raid@vger.kernel.org
- -T:    git git://neil.brown.name/md
+ +T:    git git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git
   S:    Supported
   F:    drivers/md/
   F:    include/linux/raid/
@@@ -10545,6 -10407,19 +10561,6 @@@ L:  linux-tegra@vger.kernel.or
   S:    Maintained
   F:    drivers/staging/nvec/
   
- -STAGING - OLPC SECONDARY DISPLAY CONTROLLER (DCON)
- -M:    Jens Frederich <jfrederich@gmail.com>
- -M:    Daniel Drake <dsd@laptop.org>
- -M:    Jon Nettleton <jon.nettleton@gmail.com>
- -W:    http://wiki.laptop.org/go/DCON
- -S:    Maintained
- -F:    drivers/staging/olpc_dcon/
- -
- -STAGING - PARALLEL LCD/KEYPAD PANEL DRIVER
- -M:    Willy Tarreau <willy@meta-x.org>
- -S:    Odd Fixes
- -F:    drivers/staging/panel/
- -
   STAGING - REALTEK RTL8712U DRIVERS
   M:    Larry Finger <Larry.Finger@lwfinger.net>
   M:    Florian Schilhabel <florian.c.schilhabel@googlemail.com>.
@@@ -10993,14 -10868,6 +11009,14 @@@ L: linux-omap@vger.kernel.or
   S:    Maintained
   F:    drivers/thermal/ti-soc-thermal/
   
+ +TI VPE/CAL DRIVERS
+ +M:    Benoit Parrot <bparrot@ti.com>
+ +L:    linux-media@vger.kernel.org
+ +W:    http://linuxtv.org/
+ +Q:    http://patchwork.linuxtv.org/project/linux-media/list/
+ +S:    Maintained
+ +F:    drivers/media/platform/ti-vpe/
+ +
   TI CDCE706 CLOCK DRIVER
   M:    Max Filippov <jcmvbkbc@gmail.com>
   S:    Maintained
@@@ -11224,8 -11091,8 +11240,8 @@@ M:   Jarkko Sakkinen <jarkko.sakkinen@lin
   R:    Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
   W:    http://tpmdd.sourceforge.net
   L:    tpmdd-devel@lists.sourceforge.net (moderated for non-subscribers)
- -Q:    git git://github.com/PeterHuewe/linux-tpmdd.git
- -T:    git https://github.com/PeterHuewe/linux-tpmdd
+ +Q:    https://patchwork.kernel.org/project/tpmdd-devel/list/
+ +T:    git git://git.infradead.org/users/jjs/linux-tpmdd.git
   S:    Maintained
   F:    drivers/char/tpm/
   
@@@ -11380,6 -11247,7 +11396,6 @@@ F:   include/linux/cdrom.
   F:    include/uapi/linux/cdrom.h
   
   UNISYS S-PAR DRIVERS
- -M:    Benjamin Romer <benjamin.romer@unisys.com>
   M:    David Kershner <david.kershner@unisys.com>
   L:    sparmaintainer@unisys.com (Unisys internal)
   S:    Supported
@@@ -11404,7 -11272,7 +11420,7 @@@ F:   include/linux/mtd/ubi.
   F:    include/uapi/mtd/ubi-user.h
   
   USB ACM DRIVER
- -M:    Oliver Neukum <oliver@neukum.org>
+ +M:    Oliver Neukum <oneukum@suse.com>
   L:    linux-usb@vger.kernel.org
   S:    Maintained
   F:    Documentation/usb/acm.txt
@@@ -11489,13 -11357,6 +11505,13 @@@ S: Maintaine
   F:    drivers/usb/host/isp116x*
   F:    include/linux/usb/isp116x.h
   
+ +USB LAN78XX ETHERNET DRIVER
+ +M:    Woojung Huh <woojung.huh@microchip.com>
+ +M:    Microchip Linux Driver Support <UNGLinuxDriver@microchip.com>
+ +L:    netdev@vger.kernel.org
+ +S:    Maintained
+ +F:    drivers/net/usb/lan78xx.*
+ +
   USB MASS STORAGE DRIVER
   M:    Matthew Dharm <mdharm-usb@one-eyed-alien.net>
   L:    linux-usb@vger.kernel.org
@@@ -11535,7 -11396,6 +11551,7 @@@ M:   Valentina Manea <valentina.manea.m@g
   M:    Shuah Khan <shuah.kh@samsung.com>
   L:    linux-usb@vger.kernel.org
   S:    Maintained
+ +F:    Documentation/usb/usbip_protocol.txt
   F:    drivers/usb/usbip/
   F:    tools/usb/usbip/
   
@@@ -12026,18 -11886,6 +12042,18 @@@ M: David Härdeman <david@hardeman.nu
   S:    Maintained
   F:    drivers/media/rc/winbond-cir.c
   
+ +WINSYSTEMS EBC-C384 WATCHDOG DRIVER
+ +M:    William Breathitt Gray <vilhelm.gray@gmail.com>
+ +L:    linux-watchdog@vger.kernel.org
+ +S:    Maintained
+ +F:    drivers/watchdog/ebc-c384_wdt.c
+ +
+ +WINSYSTEMS WS16C48 GPIO DRIVER
+ +M:    William Breathitt Gray <vilhelm.gray@gmail.com>
+ +L:    linux-gpio@vger.kernel.org
+ +S:    Maintained
+ +F:    drivers/gpio/gpio-ws16c48.c
+ +
   WIMAX STACK
   M:    Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
   M:    linux-wimax@intel.com
diff --combined drivers/infiniband/core/sa_query.c

index b5656a2298ee0b1aa88f0cf50672f533b0a0fb6e,d2214a55ac4ac4bab9aa4f9c7540745f96ab02ec..8a09c0fb268d8d89529f0f22249422ee3fe05320
--- 1/drivers/infiniband/core/sa_query.c
--- 2/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@@ -885,6 -885,11 +885,11 @@@ static void update_sm_ah(struct work_st
         ah_attr.dlid     = port_attr.sm_lid;
         ah_attr.sl       = port_attr.sm_sl;
         ah_attr.port_num = port->port_num;
+       if (port_attr.grh_required) {
+               ah_attr.ah_flags = IB_AH_GRH;
+               ah_attr.grh.dgid.global.subnet_prefix = cpu_to_be64(port_attr.subnet_prefix);
+               ah_attr.grh.dgid.global.interface_id = cpu_to_be64(IB_SA_WELL_KNOWN_GUID);
+       }
   
         new_ah->ah = ib_create_ah(port->agent->qp->pd, &ah_attr);
         if (IS_ERR(new_ah->ah)) {
@@@ -1070,7 -1075,7 +1075,7 @@@ int ib_init_ah_from_path(struct ib_devi
                 }
         }
   
- -      if (rec->hop_limit > 1 || use_roce) {
+ +      if (rec->hop_limit > 0 || use_roce) {
                 ah_attr->ah_flags = IB_AH_GRH;
                 ah_attr->grh.dgid = rec->dgid;
   
diff --combined drivers/infiniband/hw/mlx5/main.c

index edd8b87418466a7b3717856915087b41122ff8cb,e305990b73f6b78bc49f5213d7678e2b859cf18a..5acf346e048e3bb45fc33d97eeecd1321bf2ef9d
--- 1/drivers/infiniband/hw/mlx5/main.c
--- 2/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@@ -42,7 -42,6 +42,7 @@@
   #include <rdma/ib_user_verbs.h>
   #include <rdma/ib_addr.h>
   #include <rdma/ib_cache.h>
+ +#include <linux/mlx5/port.h>
   #include <linux/mlx5/vport.h>
   #include <rdma/ib_smi.h>
   #include <rdma/ib_umem.h>
@@@ -284,7 -283,7 +284,7 @@@ __be16 mlx5_get_roce_udp_sport(struct m
   
   static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
   {
-       return !dev->mdev->issi;
+       return !MLX5_CAP_GEN(dev->mdev, ib_virt);
   }
   
   enum {
@@@ -563,6 -562,9 +563,9 @@@ static int mlx5_ib_query_device(struct 
         if (MLX5_CAP_GEN(mdev, cd))
                 props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL;
   
+       if (!mlx5_core_is_pf(mdev))
+               props->device_cap_flags |= IB_DEVICE_VIRTUAL_FUNCTION;
+ 
         return 0;
   }
   
@@@ -700,6 -702,7 +703,7 @@@ static int mlx5_query_hca_port(struct i
         props->qkey_viol_cntr   = rep->qkey_violation_counter;
         props->subnet_timeout   = rep->subnet_timeout;
         props->init_type_reply  = rep->init_type_reply;
+       props->grh_required     = rep->grh_required;
   
         err = mlx5_query_port_link_width_oper(mdev, &ib_link_width_oper, port);
         if (err)
@@@ -2350,6 -2353,12 +2354,12 @@@ static void *mlx5_ib_add(struct mlx5_co
         dev->ib_dev.map_mr_sg           = mlx5_ib_map_mr_sg;
         dev->ib_dev.check_mr_status     = mlx5_ib_check_mr_status;
         dev->ib_dev.get_port_immutable  = mlx5_port_immutable;
+       if (mlx5_core_is_pf(mdev)) {
+               dev->ib_dev.get_vf_config       = mlx5_ib_get_vf_config;
+               dev->ib_dev.set_vf_link_state   = mlx5_ib_set_vf_link_state;
+               dev->ib_dev.get_vf_stats        = mlx5_ib_get_vf_stats;
+               dev->ib_dev.set_vf_guid         = mlx5_ib_set_vf_guid;
+       }
   
         mlx5_ib_internal_fill_odp_caps(dev);
   
diff --combined drivers/infiniband/ulp/srpt/ib_srpt.c

index 1d1309091abace1362ad9ca4608a5de7c5c3b576,578c3703421ddb18a5bad7c64bde26c83d546964..0bd3cb2f3c671a21fefd57dbc4851a24daa61e82
--- 1/drivers/infiniband/ulp/srpt/ib_srpt.c
--- 2/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@@ -839,7 -839,7 +839,7 @@@ static void srpt_zerolength_write_done(
                 if (srpt_set_ch_state(ch, CH_DISCONNECTED))
                         schedule_work(&ch->release_work);
                 else
-                       WARN_ONCE("%s-%d\n", ch->sess_name, ch->qp->qp_num);
+                       WARN_ONCE(1, "%s-%d\n", ch->sess_name, ch->qp->qp_num);
         }
   }
   
@@@ -1264,26 -1264,40 +1264,26 @@@ free_mem
    */
   static struct srpt_send_ioctx *srpt_get_send_ioctx(struct srpt_rdma_ch *ch)
   {
+ +      struct se_session *se_sess;
         struct srpt_send_ioctx *ioctx;
- -      unsigned long flags;
+ +      int tag;
   
         BUG_ON(!ch);
+ +      se_sess = ch->sess;
   
- -      ioctx = NULL;
- -      spin_lock_irqsave(&ch->spinlock, flags);
- -      if (!list_empty(&ch->free_list)) {
- -              ioctx = list_first_entry(&ch->free_list,
- -                                       struct srpt_send_ioctx, free_list);
- -              list_del(&ioctx->free_list);
+ +      tag = percpu_ida_alloc(&se_sess->sess_tag_pool, TASK_RUNNING);
+ +      if (tag < 0) {
+ +              pr_err("Unable to obtain tag for srpt_send_ioctx\n");
+ +              return NULL;
         }
- -      spin_unlock_irqrestore(&ch->spinlock, flags);
- -
- -      if (!ioctx)
- -              return ioctx;
- -
- -      BUG_ON(ioctx->ch != ch);
+ +      ioctx = &((struct srpt_send_ioctx *)se_sess->sess_cmd_map)[tag];
+ +      memset(ioctx, 0, sizeof(struct srpt_send_ioctx));
+ +      ioctx->ch = ch;
         spin_lock_init(&ioctx->spinlock);
         ioctx->state = SRPT_STATE_NEW;
- -      ioctx->n_rbuf = 0;
- -      ioctx->rbufs = NULL;
- -      ioctx->n_rdma = 0;
- -      ioctx->n_rdma_wrs = 0;
- -      ioctx->rdma_wrs = NULL;
- -      ioctx->mapped_sg_count = 0;
         init_completion(&ioctx->tx_done);
- -      ioctx->queue_status_only = false;
- -      /*
- -       * transport_init_se_cmd() does not initialize all fields, so do it
- -       * here.
- -       */
- -      memset(&ioctx->cmd, 0, sizeof(ioctx->cmd));
- -      memset(&ioctx->sense_data, 0, sizeof(ioctx->sense_data));
+ +
+ +      ioctx->cmd.map_tag = tag;
   
         return ioctx;
   }
@@@ -2020,8 -2034,9 +2020,8 @@@ static int srpt_cm_req_recv(struct ib_c
         struct srp_login_rej *rej;
         struct ib_cm_rep_param *rep_param;
         struct srpt_rdma_ch *ch, *tmp_ch;
- -      struct se_node_acl *se_acl;
         u32 it_iu_len;
- -      int i, ret = 0;
+ +      int ret = 0;
         unsigned char *p;
   
         WARN_ON_ONCE(irqs_disabled());
@@@ -2143,6 -2158,12 +2143,6 @@@
         if (!ch->ioctx_ring)
                 goto free_ch;
   
- -      INIT_LIST_HEAD(&ch->free_list);
- -      for (i = 0; i < ch->rq_size; i++) {
- -              ch->ioctx_ring[i]->ch = ch;
- -              list_add_tail(&ch->ioctx_ring[i]->free_list, &ch->free_list);
- -      }
- -
         ret = srpt_create_ch_ib(ch);
         if (ret) {
                 rej->reason = cpu_to_be32(
@@@ -2172,13 -2193,19 +2172,13 @@@
         pr_debug("registering session %s\n", ch->sess_name);
         p = &ch->sess_name[0];
   
- -      ch->sess = transport_init_session(TARGET_PROT_NORMAL);
- -      if (IS_ERR(ch->sess)) {
- -              rej->reason = cpu_to_be32(
- -                              SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
- -              pr_debug("Failed to create session\n");
- -              goto destroy_ib;
- -      }
- -
   try_again:
- -      se_acl = core_tpg_get_initiator_node_acl(&sport->port_tpg_1, p);
- -      if (!se_acl) {
+ +      ch->sess = target_alloc_session(&sport->port_tpg_1, ch->rq_size,
+ +                                      sizeof(struct srpt_send_ioctx),
+ +                                      TARGET_PROT_NORMAL, p, ch, NULL);
+ +      if (IS_ERR(ch->sess)) {
                 pr_info("Rejected login because no ACL has been"
- -                      " configured yet for initiator %s.\n", ch->sess_name);
+ +                      " configured yet for initiator %s.\n", p);
                 /*
                  * XXX: Hack to retry of ch->i_port_id without leading '0x'
                  */
@@@ -2186,11 -2213,14 +2186,11 @@@
                         p += 2;
                         goto try_again;
                 }
- -              rej->reason = cpu_to_be32(
+ +              rej->reason = cpu_to_be32((PTR_ERR(ch->sess) == -ENOMEM) ?
+ +                              SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES :
                                 SRP_LOGIN_REJ_CHANNEL_LIMIT_REACHED);
- -              transport_free_session(ch->sess);
                 goto destroy_ib;
         }
- -      ch->sess->se_node_acl = se_acl;
- -
- -      transport_register_session(&sport->port_tpg_1, se_acl, ch->sess, ch);
   
         pr_debug("Establish connection sess=%p name=%s cm_id=%p\n", ch->sess,
                  ch->sess_name, ch->cm_id);
@@@ -2881,7 -2911,7 +2881,7 @@@ static void srpt_release_cmd(struct se_
         struct srpt_send_ioctx *ioctx = container_of(se_cmd,
                                 struct srpt_send_ioctx, cmd);
         struct srpt_rdma_ch *ch = ioctx->ch;
- -      unsigned long flags;
+ +      struct se_session *se_sess = ch->sess;
   
         WARN_ON(ioctx->state != SRPT_STATE_DONE);
         WARN_ON(ioctx->mapped_sg_count != 0);
@@@ -2892,7 -2922,9 +2892,7 @@@
                 ioctx->n_rbuf = 0;
         }
   
- -      spin_lock_irqsave(&ch->spinlock, flags);
- -      list_add(&ioctx->free_list, &ch->free_list);
- -      spin_unlock_irqrestore(&ch->spinlock, flags);
+ +      percpu_ida_free(&se_sess->sess_tag_pool, se_cmd->map_tag);
   }
   
   /**
diff --combined drivers/net/ethernet/intel/i40e/i40e.h

index 2f6210ae8ba0f3cf2621035b314d33ced74e0068,e734c649227d9d255dddff79bb898c70c2ed40d8..1ce6e9c0427d54505bb339b42d2d226d69726750
--- 1/drivers/net/ethernet/intel/i40e/i40e.h
--- 2/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@@ -1,7 -1,7 +1,7 @@@
   /*******************************************************************************
    *
    * Intel Ethernet Controller XL710 Family Linux Driver
- - * Copyright(c) 2013 - 2015 Intel Corporation.
+ + * Copyright(c) 2013 - 2016 Intel Corporation.
    *
    * This program is free software; you can redistribute it and/or modify it
    * under the terms and conditions of the GNU General Public License,
@@@ -58,12 -58,16 +58,13 @@@
   #ifdef I40E_FCOE
   #include "i40e_fcoe.h"
   #endif
+ #include "i40e_client.h"
   #include "i40e_virtchnl.h"
   #include "i40e_virtchnl_pf.h"
   #include "i40e_txrx.h"
   #include "i40e_dcb.h"
   
   /* Useful i40e defaults */
- -#define I40E_BASE_PF_SEID     16
- -#define I40E_BASE_VSI_SEID    512
- -#define I40E_BASE_VEB_SEID    288
   #define I40E_MAX_VEB          16
   
   #define I40E_MAX_NUM_DESCRIPTORS      4096
@@@ -101,7 -105,6 +102,7 @@@
   #define I40E_PRIV_FLAGS_FD_ATR                BIT(2)
   #define I40E_PRIV_FLAGS_VEB_STATS     BIT(3)
   #define I40E_PRIV_FLAGS_PS            BIT(4)
+ +#define I40E_PRIV_FLAGS_HW_ATR_EVICT  BIT(5)
   
   #define I40E_NVM_VERSION_LO_SHIFT  0
   #define I40E_NVM_VERSION_LO_MASK   (0xff << I40E_NVM_VERSION_LO_SHIFT)
@@@ -111,7 -114,6 +112,7 @@@
   #define I40E_OEM_VER_PATCH_MASK    0xff
   #define I40E_OEM_VER_BUILD_SHIFT   8
   #define I40E_OEM_VER_SHIFT         24
+ +#define I40E_PHY_DEBUG_PORT        BIT(4)
   
   /* The values in here are decimal coded as hex as is the case in the NVM map*/
   #define I40E_CURRENT_NVM_VERSION_HI 0x2
@@@ -136,19 -138,6 +137,19 @@@
   /* default to trying for four seconds */
   #define I40E_TRY_LINK_TIMEOUT (4 * HZ)
   
+ +/**
+ + * i40e_is_mac_710 - Return true if MAC is X710/XL710
+ + * @hw: ptr to the hardware info
+ + **/
+ +static inline bool i40e_is_mac_710(struct i40e_hw *hw)
+ +{
+ +      if ((hw->mac.type == I40E_MAC_X710) ||
+ +          (hw->mac.type == I40E_MAC_XL710))
+ +              return true;
+ +
+ +      return false;
+ +}
+ +
   /* driver state flags */
   enum i40e_state_t {
         __I40E_TESTING,
@@@ -190,6 -179,7 +191,7 @@@ struct i40e_lump_tracking 
         u16 search_hint;
         u16 list[0];
   #define I40E_PILE_VALID_BIT  0x8000
+ #define I40E_IWARP_IRQ_PILE_ID  (I40E_PILE_VALID_BIT - 2)
   };
   
   #define I40E_DEFAULT_ATR_SAMPLE_RATE  20
@@@ -282,6 -272,8 +284,8 @@@ struct i40e_pf 
   #endif /* I40E_FCOE */
         u16 num_lan_qps;           /* num lan queues this PF has set up */
         u16 num_lan_msix;          /* num queue vectors for the base PF vsi */
+       u16 num_iwarp_msix;        /* num of iwarp vectors for this PF */
+       int iwarp_base_vector;
         int queues_left;           /* queues left unclaimed */
         u16 alloc_rss_size;        /* allocated RSS queues */
         u16 rss_size_max;          /* HW defined max RSS queues */
@@@ -329,6 -321,7 +333,7 @@@
   #define I40E_FLAG_16BYTE_RX_DESC_ENABLED      BIT_ULL(13)
   #define I40E_FLAG_CLEAN_ADMINQ                        BIT_ULL(14)
   #define I40E_FLAG_FILTER_SYNC                 BIT_ULL(15)
+ #define I40E_FLAG_SERVICE_CLIENT_REQUESTED    BIT_ULL(16)
   #define I40E_FLAG_PROCESS_MDD_EVENT           BIT_ULL(17)
   #define I40E_FLAG_PROCESS_VFLR_EVENT          BIT_ULL(18)
   #define I40E_FLAG_SRIOV_ENABLED                       BIT_ULL(19)
@@@ -351,12 -344,6 +356,12 @@@
   #define I40E_FLAG_VEB_MODE_ENABLED            BIT_ULL(40)
   #define I40E_FLAG_GENEVE_OFFLOAD_CAPABLE      BIT_ULL(41)
   #define I40E_FLAG_NO_PCI_LINK_CHECK           BIT_ULL(42)
+ +#define I40E_FLAG_100M_SGMII_CAPABLE          BIT_ULL(43)
+ +#define I40E_FLAG_RESTART_AUTONEG             BIT_ULL(44)
+ +#define I40E_FLAG_NO_DCB_SUPPORT              BIT_ULL(45)
+ +#define I40E_FLAG_USE_SET_LLDP_MIB            BIT_ULL(46)
+ +#define I40E_FLAG_STOP_FW_LLDP                        BIT_ULL(47)
+ +#define I40E_FLAG_HAVE_10GBASET_PHY           BIT_ULL(48)
   #define I40E_FLAG_PF_MAC                      BIT_ULL(50)
   
         /* tracks features that get auto disabled by errors */
@@@ -409,7 -396,6 +414,7 @@@
         struct i40e_vf *vf;
         int num_alloc_vfs;      /* actual number of VFs allocated */
         u32 vf_aq_requests;
+ +      u32 arq_overflows;      /* Not fatal, possibly indicative of problems */
   
         /* DCBx/DCBNL capability for PF that indicates
          * whether DCBx is managed by firmware or host
@@@ -442,7 -428,6 +447,7 @@@
   
         u32 ioremap_len;
         u32 fd_inv;
+ +      u16 phy_led_val;
   };
   
   struct i40e_mac_filter {
@@@ -512,7 -497,6 +517,7 @@@ struct i40e_vsi 
         u32 tx_busy;
         u64 tx_linearize;
         u64 tx_force_wb;
+ +      u64 tx_lost_interrupt;
         u32 rx_buf_failed;
         u32 rx_page_failed;
   
@@@ -521,6 -505,13 +526,6 @@@
         struct i40e_ring **tx_rings;
   
         u16 work_limit;
- -      /* high bit set means dynamic, use accessor routines to read/write.
- -       * hardware only supports 2us resolution for the ITR registers.
- -       * these values always store the USER setting, and must be converted
- -       * before programming to a register.
- -       */
- -      u16 rx_itr_setting;
- -      u16 tx_itr_setting;
         u16 int_rate_limit;  /* value in usecs */
   
         u16 rss_table_size; /* HW RSS table size */
@@@ -571,6 -562,8 +576,8 @@@
         struct kobject *kobj;  /* sysfs object */
         bool current_isup;     /* Sync 'link up' logging */
   
+       void *priv;     /* client driver data reference. */
+ 
         /* VSI specific handlers */
         irqreturn_t (*irq_handler)(int irq, void *data);
   
@@@ -728,6 -721,10 +735,10 @@@ void i40e_vsi_setup_queue_map(struct i4
                               struct i40e_vsi_context *ctxt,
                               u8 enabled_tc, bool is_add);
   #endif
+ void i40e_service_event_schedule(struct i40e_pf *pf);
+ void i40e_notify_client_of_vf_msg(struct i40e_vsi *vsi, u32 vf_id,
+                                 u8 *msg, u16 len);
+ 
   int i40e_vsi_control_rings(struct i40e_vsi *vsi, bool enable);
   int i40e_reconfig_rss_queues(struct i40e_pf *pf, int queue_count);
   struct i40e_veb *i40e_veb_setup(struct i40e_pf *pf, u16 flags, u16 uplink_seid,
@@@ -750,6 -747,17 +761,17 @@@ static inline void i40e_dbg_pf_exit(str
   static inline void i40e_dbg_init(void) {}
   static inline void i40e_dbg_exit(void) {}
   #endif /* CONFIG_DEBUG_FS*/
+ /* needed by client drivers */
+ int i40e_lan_add_device(struct i40e_pf *pf);
+ int i40e_lan_del_device(struct i40e_pf *pf);
+ void i40e_client_subtask(struct i40e_pf *pf);
+ void i40e_notify_client_of_l2_param_changes(struct i40e_vsi *vsi);
+ void i40e_notify_client_of_netdev_open(struct i40e_vsi *vsi);
+ void i40e_notify_client_of_netdev_close(struct i40e_vsi *vsi, bool reset);
+ void i40e_notify_client_of_vf_enable(struct i40e_pf *pf, u32 num_vfs);
+ void i40e_notify_client_of_vf_reset(struct i40e_pf *pf, u32 vf_id);
+ int i40e_vf_client_capable(struct i40e_pf *pf, u32 vf_id,
+                          enum i40e_client_type type);
   /**
    * i40e_irq_dynamic_enable - Enable default interrupt generation settings
    * @vsi: pointer to a vsi
@@@ -761,9 -769,6 +783,9 @@@ static inline void i40e_irq_dynamic_ena
         struct i40e_hw *hw = &pf->hw;
         u32 val;
   
+ +      /* definitely clear the PBA here, as this function is meant to
+ +       * clean out all previous interrupts AND enable the interrupt
+ +       */
         val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
               I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
               (I40E_ITR_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT);
@@@ -771,8 -776,9 +793,8 @@@
         /* skip the flush */
   }
   
- -void i40e_irq_dynamic_disable(struct i40e_vsi *vsi, int vector);
   void i40e_irq_dynamic_disable_icr0(struct i40e_pf *pf);
- -void i40e_irq_dynamic_enable_icr0(struct i40e_pf *pf);
+ +void i40e_irq_dynamic_enable_icr0(struct i40e_pf *pf, bool clearpba);
   #ifdef I40E_FCOE
   struct rtnl_link_stats64 *i40e_get_netdev_stats_struct(
                                              struct net_device *netdev,
@@@ -802,8 -808,7 +824,8 @@@ struct i40e_mac_filter *i40e_find_mac(s
                                       bool is_vf, bool is_netdev);
   #ifdef I40E_FCOE
   int i40e_close(struct net_device *netdev);
- -int i40e_setup_tc(struct net_device *netdev, u8 tc);
+ +int __i40e_setup_tc(struct net_device *netdev, u32 handle, __be16 proto,
+ +                  struct tc_to_netdev *tc);
   void i40e_netpoll(struct net_device *netdev);
   int i40e_fcoe_enable(struct net_device *netdev);
   int i40e_fcoe_disable(struct net_device *netdev);
diff --combined drivers/net/ethernet/intel/i40e/i40e_main.c

index 70d9605a0d9e712f1bb89a38da053c8b07ee9a83,1df2629d37059b88502fca4cae95f0c06bf3764d..67006431726aa03c41f38c5e54bf9009e3bf9aa2
--- 1/drivers/net/ethernet/intel/i40e/i40e_main.c
--- 2/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@@ -1,7 -1,7 +1,7 @@@
   /*******************************************************************************
    *
    * Intel Ethernet Controller XL710 Family Linux Driver
- - * Copyright(c) 2013 - 2015 Intel Corporation.
+ + * Copyright(c) 2013 - 2016 Intel Corporation.
    *
    * This program is free software; you can redistribute it and/or modify it
    * under the terms and conditions of the GNU General Public License,
@@@ -28,6 -28,11 +28,6 @@@
   #include <linux/of_net.h>
   #include <linux/pci.h>
   
- -#ifdef CONFIG_SPARC
- -#include <asm/idprom.h>
- -#include <asm/prom.h>
- -#endif
- -
   /* Local includes */
   #include "i40e.h"
   #include "i40e_diag.h"
@@@ -46,7 -51,7 +46,7 @@@ static const char i40e_driver_string[] 
   
   #define DRV_VERSION_MAJOR 1
   #define DRV_VERSION_MINOR 4
- -#define DRV_VERSION_BUILD 8
+ +#define DRV_VERSION_BUILD 25
   #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \
              __stringify(DRV_VERSION_MINOR) "." \
              __stringify(DRV_VERSION_BUILD)    DRV_KERN
@@@ -85,8 -90,6 +85,8 @@@ static const struct pci_device_id i40e_
         {PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T), 0},
         {PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T4), 0},
         {PCI_VDEVICE(INTEL, I40E_DEV_ID_20G_KR2), 0},
+ +      {PCI_VDEVICE(INTEL, I40E_DEV_ID_KX_X722), 0},
+ +      {PCI_VDEVICE(INTEL, I40E_DEV_ID_QSFP_X722), 0},
         {PCI_VDEVICE(INTEL, I40E_DEV_ID_SFP_X722), 0},
         {PCI_VDEVICE(INTEL, I40E_DEV_ID_1G_BASE_T_X722), 0},
         {PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T_X722), 0},
@@@ -107,8 -110,6 +107,8 @@@ MODULE_DESCRIPTION("Intel(R) Ethernet C
   MODULE_LICENSE("GPL");
   MODULE_VERSION(DRV_VERSION);
   
+ +static struct workqueue_struct *i40e_wq;
+ +
   /**
    * i40e_allocate_dma_mem_d - OS specific memory alloc for shared code
    * @hw:   pointer to the HW structure
@@@ -289,12 -290,12 +289,12 @@@ struct i40e_vsi *i40e_find_vsi_from_id(
    *
    * If not already scheduled, this puts the task into the work queue
    **/
- static void i40e_service_event_schedule(struct i40e_pf *pf)
+ void i40e_service_event_schedule(struct i40e_pf *pf)
   {
         if (!test_bit(__I40E_DOWN, &pf->state) &&
             !test_bit(__I40E_RESET_RECOVERY_PENDING, &pf->state) &&
             !test_and_set_bit(__I40E_SERVICE_SCHED, &pf->state))
- -              schedule_work(&pf->service_task);
+ +              queue_work(i40e_wq, &pf->service_task);
   }
   
   /**
@@@ -768,7 -769,7 +768,7 @@@ static void i40e_update_fcoe_stats(stru
         if (vsi->type != I40E_VSI_FCOE)
                 return;
   
- -      idx = (pf->pf_seid - I40E_BASE_PF_SEID) + I40E_FCOE_PF_STAT_OFFSET;
+ +      idx = hw->pf_id + I40E_FCOE_PF_STAT_OFFSET;
         fs = &vsi->fcoe_stats;
         ofs = &vsi->fcoe_stats_offsets;
   
@@@ -819,7 -820,6 +819,7 @@@ static void i40e_update_vsi_stats(struc
         struct i40e_eth_stats *oes;
         struct i40e_eth_stats *es;     /* device's eth stats */
         u32 tx_restart, tx_busy;
+ +      u64 tx_lost_interrupt;
         struct i40e_ring *p;
         u32 rx_page, rx_buf;
         u64 bytes, packets;
@@@ -845,7 -845,6 +845,7 @@@
         rx_b = rx_p = 0;
         tx_b = tx_p = 0;
         tx_restart = tx_busy = tx_linearize = tx_force_wb = 0;
+ +      tx_lost_interrupt = 0;
         rx_page = 0;
         rx_buf = 0;
         rcu_read_lock();
@@@ -864,7 -863,6 +864,7 @@@
                 tx_busy += p->tx_stats.tx_busy;
                 tx_linearize += p->tx_stats.tx_linearize;
                 tx_force_wb += p->tx_stats.tx_force_wb;
+ +              tx_lost_interrupt += p->tx_stats.tx_lost_interrupt;
   
                 /* Rx queue is part of the same block as Tx queue */
                 p = &p[1];
@@@ -883,7 -881,6 +883,7 @@@
         vsi->tx_busy = tx_busy;
         vsi->tx_linearize = tx_linearize;
         vsi->tx_force_wb = tx_force_wb;
+ +      vsi->tx_lost_interrupt = tx_lost_interrupt;
         vsi->rx_page_failed = rx_page;
         vsi->rx_buf_failed = rx_buf;
   
@@@ -1371,7 -1368,7 +1371,7 @@@ struct i40e_mac_filter *i40e_add_filter
                 f->changed = true;
   
                 INIT_LIST_HEAD(&f->list);
- -              list_add(&f->list, &vsi->mac_filter_list);
+ +              list_add_tail(&f->list, &vsi->mac_filter_list);
         }
   
         /* increment counter and add a new flag if needed */
@@@ -1541,11 -1538,7 +1541,11 @@@ static int i40e_set_mac(struct net_devi
   
         ether_addr_copy(netdev->dev_addr, addr->sa_data);
   
- -      return i40e_sync_vsi_filters(vsi);
+ +      /* schedule our worker thread which will take care of
+ +       * applying the new filter changes
+ +       */
+ +      i40e_service_event_schedule(vsi->back);
+ +      return 0;
   }
   
   /**
@@@ -1769,11 -1762,6 +1769,11 @@@ bottom_of_search_loop
                 vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
                 vsi->back->flags |= I40E_FLAG_FILTER_SYNC;
         }
+ +
+ +      /* schedule our worker thread which will take care of
+ +       * applying the new filter changes
+ +       */
+ +      i40e_service_event_schedule(vsi->back);
   }
   
   /**
@@@ -1945,7 -1933,7 +1945,7 @@@ int i40e_sync_vsi_filters(struct i40e_v
                             sizeof(struct i40e_aqc_remove_macvlan_element_data);
                 del_list_size = filter_list_len *
                             sizeof(struct i40e_aqc_remove_macvlan_element_data);
- -              del_list = kzalloc(del_list_size, GFP_KERNEL);
+ +              del_list = kzalloc(del_list_size, GFP_ATOMIC);
                 if (!del_list) {
                         i40e_cleanup_add_list(&tmp_add_list);
   
@@@ -2023,7 -2011,7 +2023,7 @@@
                                sizeof(struct i40e_aqc_add_macvlan_element_data),
                 add_list_size = filter_list_len *
                                sizeof(struct i40e_aqc_add_macvlan_element_data);
- -              add_list = kzalloc(add_list_size, GFP_KERNEL);
+ +              add_list = kzalloc(add_list_size, GFP_ATOMIC);
                 if (!add_list) {
                         /* Purge element from temporary lists */
                         i40e_cleanup_add_list(&tmp_add_list);
@@@ -2122,9 -2110,7 +2122,9 @@@
                 cur_promisc = (!!(vsi->current_netdev_flags & IFF_PROMISC) ||
                                test_bit(__I40E_FILTER_OVERFLOW_PROMISC,
                                         &vsi->state));
- -              if (vsi->type == I40E_VSI_MAIN && pf->lan_veb != I40E_NO_VEB) {
+ +              if ((vsi->type == I40E_VSI_MAIN) &&
+ +                  (pf->lan_veb != I40E_NO_VEB) &&
+ +                  !(pf->flags & I40E_FLAG_MFP_ENABLED)) {
                         /* set defport ON for Main VSI instead of true promisc
                          * this way we will get all unicast/multicast and VLAN
                          * promisc behavior but will not get VF or VMDq traffic
@@@ -2174,10 -2160,6 +2174,10 @@@
                 }
         }
   out:
+ +      /* if something went wrong then set the changed flag so we try again */
+ +      if (retval)
+ +              vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
+ +
         clear_bit(__I40E_CONFIG_BUSY, &vsi->state);
         return retval;
   }
@@@ -2230,7 -2212,7 +2230,7 @@@ static int i40e_change_mtu(struct net_d
         netdev->mtu = new_mtu;
         if (netif_running(netdev))
                 i40e_vsi_reinit_locked(vsi);
- 
+       i40e_notify_client_of_l2_param_changes(vsi);
         return 0;
   }
   
@@@ -3124,11 -3106,11 +3124,11 @@@ static void i40e_vsi_configure_msix(str
                 struct i40e_q_vector *q_vector = vsi->q_vectors[i];
   
                 q_vector->itr_countdown = ITR_COUNTDOWN_START;
- -              q_vector->rx.itr = ITR_TO_REG(vsi->rx_itr_setting);
+ +              q_vector->rx.itr = ITR_TO_REG(vsi->rx_rings[i]->rx_itr_setting);
                 q_vector->rx.latency_range = I40E_LOW_LATENCY;
                 wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1),
                      q_vector->rx.itr);
- -              q_vector->tx.itr = ITR_TO_REG(vsi->tx_itr_setting);
+ +              q_vector->tx.itr = ITR_TO_REG(vsi->tx_rings[i]->tx_itr_setting);
                 q_vector->tx.latency_range = I40E_LOW_LATENCY;
                 wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1),
                      q_vector->tx.itr);
@@@ -3220,10 -3202,10 +3220,10 @@@ static void i40e_configure_msi_and_lega
   
         /* set the ITR configuration */
         q_vector->itr_countdown = ITR_COUNTDOWN_START;
- -      q_vector->rx.itr = ITR_TO_REG(vsi->rx_itr_setting);
+ +      q_vector->rx.itr = ITR_TO_REG(vsi->rx_rings[0]->rx_itr_setting);
         q_vector->rx.latency_range = I40E_LOW_LATENCY;
         wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), q_vector->rx.itr);
- -      q_vector->tx.itr = ITR_TO_REG(vsi->tx_itr_setting);
+ +      q_vector->tx.itr = ITR_TO_REG(vsi->tx_rings[0]->tx_itr_setting);
         q_vector->tx.latency_range = I40E_LOW_LATENCY;
         wr32(hw, I40E_PFINT_ITR0(I40E_TX_ITR), q_vector->tx.itr);
   
@@@ -3263,21 -3245,36 +3263,21 @@@ void i40e_irq_dynamic_disable_icr0(stru
   /**
    * i40e_irq_dynamic_enable_icr0 - Enable default interrupt generation for icr0
    * @pf: board private structure
+ + * @clearpba: true when all pending interrupt events should be cleared
    **/
- -void i40e_irq_dynamic_enable_icr0(struct i40e_pf *pf)
+ +void i40e_irq_dynamic_enable_icr0(struct i40e_pf *pf, bool clearpba)
   {
         struct i40e_hw *hw = &pf->hw;
         u32 val;
   
         val = I40E_PFINT_DYN_CTL0_INTENA_MASK   |
- -            I40E_PFINT_DYN_CTL0_CLEARPBA_MASK |
+ +            (clearpba ? I40E_PFINT_DYN_CTL0_CLEARPBA_MASK : 0) |
               (I40E_ITR_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT);
   
         wr32(hw, I40E_PFINT_DYN_CTL0, val);
         i40e_flush(hw);
   }
   
- -/**
- - * i40e_irq_dynamic_disable - Disable default interrupt generation settings
- - * @vsi: pointer to a vsi
- - * @vector: disable a particular Hw Interrupt vector
- - **/
- -void i40e_irq_dynamic_disable(struct i40e_vsi *vsi, int vector)
- -{
- -      struct i40e_pf *pf = vsi->back;
- -      struct i40e_hw *hw = &pf->hw;
- -      u32 val;
- -
- -      val = I40E_ITR_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT;
- -      wr32(hw, I40E_PFINT_DYN_CTLN(vector - 1), val);
- -      i40e_flush(hw);
- -}
- -
   /**
    * i40e_msix_clean_rings - MSIX mode Interrupt Handler
    * @irq: interrupt number
@@@ -3403,7 -3400,7 +3403,7 @@@ static int i40e_vsi_enable_irq(struct i
                 for (i = 0; i < vsi->num_q_vectors; i++)
                         i40e_irq_dynamic_enable(vsi, i);
         } else {
- -              i40e_irq_dynamic_enable_icr0(pf);
+ +              i40e_irq_dynamic_enable_icr0(pf, true);
         }
   
         i40e_flush(&pf->hw);
@@@ -3462,12 -3459,16 +3462,12 @@@ static irqreturn_t i40e_intr(int irq, v
                 struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
                 struct i40e_q_vector *q_vector = vsi->q_vectors[0];
   
- -              /* temporarily disable queue cause for NAPI processing */
- -              u32 qval = rd32(hw, I40E_QINT_RQCTL(0));
- -
- -              qval &= ~I40E_QINT_RQCTL_CAUSE_ENA_MASK;
- -              wr32(hw, I40E_QINT_RQCTL(0), qval);
- -
- -              qval = rd32(hw, I40E_QINT_TQCTL(0));
- -              qval &= ~I40E_QINT_TQCTL_CAUSE_ENA_MASK;
- -              wr32(hw, I40E_QINT_TQCTL(0), qval);
- -
+ +              /* We do not have a way to disarm Queue causes while leaving
+ +               * interrupt enabled for all other causes, ideally
+ +               * interrupt should be disabled while we are in NAPI but
+ +               * this is not a performance path and napi_schedule()
+ +               * can deal with rescheduling.
+ +               */
                 if (!test_bit(__I40E_DOWN, &pf->state))
                         napi_schedule_irqoff(&q_vector->napi);
         }
@@@ -3475,7 -3476,6 +3475,7 @@@
         if (icr0 & I40E_PFINT_ICR0_ADMINQ_MASK) {
                 ena_mask &= ~I40E_PFINT_ICR0_ENA_ADMINQ_MASK;
                 set_bit(__I40E_ADMINQ_EVENT_PENDING, &pf->state);
+ +              i40e_debug(&pf->hw, I40E_DEBUG_NVM, "AdminQ event\n");
         }
   
         if (icr0 & I40E_PFINT_ICR0_MAL_DETECT_MASK) {
@@@ -3546,7 -3546,7 +3546,7 @@@ enable_intr
         wr32(hw, I40E_PFINT_ICR0_ENA, ena_mask);
         if (!test_bit(__I40E_DOWN, &pf->state)) {
                 i40e_service_event_schedule(pf);
- -              i40e_irq_dynamic_enable_icr0(pf);
+ +              i40e_irq_dynamic_enable_icr0(pf, false);
         }
   
         return ret;
@@@ -3750,7 -3750,7 +3750,7 @@@ static int i40e_vsi_request_irq(struct 
   
   #ifdef CONFIG_NET_POLL_CONTROLLER
   /**
- - * i40e_netpoll - A Polling 'interrupt'handler
+ + * i40e_netpoll - A Polling 'interrupt' handler
    * @netdev: network interface device structure
    *
    * This is used by netconsole to send skbs without having to re-enable
@@@ -3929,9 -3929,6 +3929,9 @@@ static int i40e_vsi_control_rx(struct i
                 else
                         rx_reg &= ~I40E_QRX_ENA_QENA_REQ_MASK;
                 wr32(hw, I40E_QRX_ENA(pf_q), rx_reg);
+ +              /* No waiting for the Tx queue to disable */
+ +              if (!enable && test_bit(__I40E_PORT_TX_SUSPENDED, &pf->state))
+ +                      continue;
   
                 /* wait for the change to finish */
                 ret = i40e_pf_rxq_wait(pf, pf_q, enable);
@@@ -4169,6 -4166,9 +4169,9 @@@ static void i40e_clear_interrupt_scheme
                 free_irq(pf->msix_entries[0].vector, pf);
         }
   
+       i40e_put_lump(pf->irq_pile, pf->iwarp_base_vector,
+                     I40E_IWARP_IRQ_PILE_ID);
+ 
         i40e_put_lump(pf->irq_pile, 0, I40E_PILE_VALID_BIT-1);
         for (i = 0; i < pf->num_alloc_vsi; i++)
                 if (pf->vsi[i])
@@@ -4212,12 -4212,17 +4215,17 @@@ static void i40e_napi_disable_all(struc
    **/
   static void i40e_vsi_close(struct i40e_vsi *vsi)
   {
+       bool reset = false;
+ 
         if (!test_and_set_bit(__I40E_DOWN, &vsi->state))
                 i40e_down(vsi);
         i40e_vsi_free_irq(vsi);
         i40e_vsi_free_tx_resources(vsi);
         i40e_vsi_free_rx_resources(vsi);
         vsi->current_netdev_flags = 0;
+       if (test_bit(__I40E_RESET_RECOVERY_PENDING, &vsi->back->state))
+               reset = true;
+       i40e_notify_client_of_netdev_close(vsi, reset);
   }
   
   /**
@@@ -4290,12 -4295,12 +4298,12 @@@ static void i40e_pf_unquiesce_all_vsi(s
   
   #ifdef CONFIG_I40E_DCB
   /**
- - * i40e_vsi_wait_txq_disabled - Wait for VSI's queues to be disabled
+ + * i40e_vsi_wait_queues_disabled - Wait for VSI's queues to be disabled
    * @vsi: the VSI being configured
    *
- - * This function waits for the given VSI's Tx queues to be disabled.
+ + * This function waits for the given VSI's queues to be disabled.
    **/
- -static int i40e_vsi_wait_txq_disabled(struct i40e_vsi *vsi)
+ +static int i40e_vsi_wait_queues_disabled(struct i40e_vsi *vsi)
   {
         struct i40e_pf *pf = vsi->back;
         int i, pf_q, ret;
@@@ -4312,36 -4317,24 +4320,36 @@@
                 }
         }
   
+ +      pf_q = vsi->base_queue;
+ +      for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) {
+ +              /* Check and wait for the disable status of the queue */
+ +              ret = i40e_pf_rxq_wait(pf, pf_q, false);
+ +              if (ret) {
+ +                      dev_info(&pf->pdev->dev,
+ +                               "VSI seid %d Rx ring %d disable timeout\n",
+ +                               vsi->seid, pf_q);
+ +                      return ret;
+ +              }
+ +      }
+ +
         return 0;
   }
   
   /**
- - * i40e_pf_wait_txq_disabled - Wait for all queues of PF VSIs to be disabled
+ + * i40e_pf_wait_queues_disabled - Wait for all queues of PF VSIs to be disabled
    * @pf: the PF
    *
- - * This function waits for the Tx queues to be in disabled state for all the
+ + * This function waits for the queues to be in disabled state for all the
    * VSIs that are managed by this PF.
    **/
- -static int i40e_pf_wait_txq_disabled(struct i40e_pf *pf)
+ +static int i40e_pf_wait_queues_disabled(struct i40e_pf *pf)
   {
         int v, ret = 0;
   
         for (v = 0; v < pf->hw.func_caps.num_vsis; v++) {
                 /* No need to wait for FCoE VSI queues */
                 if (pf->vsi[v] && pf->vsi[v]->type != I40E_VSI_FCOE) {
- -                      ret = i40e_vsi_wait_txq_disabled(pf->vsi[v]);
+ +                      ret = i40e_vsi_wait_queues_disabled(pf->vsi[v]);
                         if (ret)
                                 break;
                 }
@@@ -4367,7 -4360,7 +4375,7 @@@ static void i40e_detect_recover_hung_qu
   {
         struct i40e_ring *tx_ring = NULL;
         struct i40e_pf  *pf;
- -      u32 head, val, tx_pending;
+ +      u32 head, val, tx_pending_hw;
         int i;
   
         pf = vsi->back;
@@@ -4393,9 -4386,16 +4401,9 @@@
         else
                 val = rd32(&pf->hw, I40E_PFINT_DYN_CTL0);
   
- -      /* Bail out if interrupts are disabled because napi_poll
- -       * execution in-progress or will get scheduled soon.
- -       * napi_poll cleans TX and RX queues and updates 'next_to_clean'.
- -       */
- -      if (!(val & I40E_PFINT_DYN_CTLN_INTENA_MASK))
- -              return;
- -
         head = i40e_get_head(tx_ring);
   
- -      tx_pending = i40e_get_tx_pending(tx_ring);
+ +      tx_pending_hw = i40e_get_tx_pending(tx_ring, false);
   
         /* HW is done executing descriptors, updated HEAD write back,
          * but SW hasn't processed those descriptors. If interrupt is
@@@ -4403,12 -4403,12 +4411,12 @@@
          * dev_watchdog detecting timeout on those netdev_queue,
          * hence proactively trigger SW interrupt.
          */
- -      if (tx_pending) {
+ +      if (tx_pending_hw && (!(val & I40E_PFINT_DYN_CTLN_INTENA_MASK))) {
                 /* NAPI Poll didn't run and clear since it was set */
                 if (test_and_clear_bit(I40E_Q_VECTOR_HUNG_DETECT,
                                        &tx_ring->q_vector->hung_detected)) {
- -                      netdev_info(vsi->netdev, "VSI_seid %d, Hung TX queue %d, tx_pending: %d, NTC:0x%x, HWB: 0x%x, NTU: 0x%x, TAIL: 0x%x\n",
- -                                  vsi->seid, q_idx, tx_pending,
+ +                      netdev_info(vsi->netdev, "VSI_seid %d, Hung TX queue %d, tx_pending_hw: %d, NTC:0x%x, HWB: 0x%x, NTU: 0x%x, TAIL: 0x%x\n",
+ +                                  vsi->seid, q_idx, tx_pending_hw,
                                     tx_ring->next_to_clean, head,
                                     tx_ring->next_to_use,
                                     readl(tx_ring->tail));
@@@ -4421,17 -4421,6 +4429,17 @@@
                                 &tx_ring->q_vector->hung_detected);
                 }
         }
+ +
+ +      /* This is the case where we have interrupts missing,
+ +       * so the tx_pending in HW will most likely be 0, but we
+ +       * will have tx_pending in SW since the WB happened but the
+ +       * interrupt got lost.
+ +       */
+ +      if ((!tx_pending_hw) && i40e_get_tx_pending(tx_ring, true) &&
+ +          (!(val & I40E_PFINT_DYN_CTLN_INTENA_MASK))) {
+ +              if (napi_reschedule(&tx_ring->q_vector->napi))
+ +                      tx_ring->tx_stats.tx_lost_interrupt++;
+ +      }
   }
   
   /**
@@@ -4850,6 -4839,12 +4858,12 @@@ static int i40e_vsi_config_tc(struct i4
         ctxt.info = vsi->info;
         i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, false);
   
+       if (vsi->back->flags & I40E_FLAG_IWARP_ENABLED) {
+               ctxt.info.valid_sections |=
+                               cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID);
+               ctxt.info.queueing_opt_flags |= I40E_AQ_VSI_QUE_OPT_TCP_ENA;
+       }
+ 
         /* Update the VSI after updating the VSI queue-mapping information */
         ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL);
         if (ret) {
@@@ -4993,6 -4988,7 +5007,7 @@@ static void i40e_dcb_reconfigure(struc
                         if (pf->vsi[v]->netdev)
                                 i40e_dcbnl_set_all(pf->vsi[v]);
                 }
+               i40e_notify_client_of_l2_param_changes(pf->vsi[v]);
         }
   }
   
@@@ -5035,7 -5031,8 +5050,7 @@@ static int i40e_init_pf_dcb(struct i40e
         int err = 0;
   
         /* Do not enable DCB for SW1 and SW2 images even if the FW is capable */
- -      if (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 33)) ||
- -          (pf->hw.aq.fw_maj_ver < 4))
+ +      if (pf->flags & I40E_FLAG_NO_DCB_SUPPORT)
                 goto out;
   
         /* Get the initial DCB configuration */
@@@ -5191,6 -5188,11 +5206,11 @@@ static int i40e_up_complete(struct i40e
                 }
                 i40e_fdir_filter_restore(vsi);
         }
+ 
+       /* On the next run of the service_task, notify any clients of the new
+        * opened netdev
+        */
+       pf->flags |= I40E_FLAG_SERVICE_CLIENT_REQUESTED;
         i40e_service_event_schedule(pf);
   
         return 0;
@@@ -5267,7 -5269,11 +5287,7 @@@ void i40e_down(struct i40e_vsi *vsi
    * @netdev: net device to configure
    * @tc: number of traffic classes to enable
    **/
- -#ifdef I40E_FCOE
- -int i40e_setup_tc(struct net_device *netdev, u8 tc)
- -#else
   static int i40e_setup_tc(struct net_device *netdev, u8 tc)
- -#endif
   {
         struct i40e_netdev_priv *np = netdev_priv(netdev);
         struct i40e_vsi *vsi = np->vsi;
@@@ -5320,19 -5326,6 +5340,19 @@@ exit
         return ret;
   }
   
+ +#ifdef I40E_FCOE
+ +int __i40e_setup_tc(struct net_device *netdev, u32 handle, __be16 proto,
+ +                  struct tc_to_netdev *tc)
+ +#else
+ +static int __i40e_setup_tc(struct net_device *netdev, u32 handle, __be16 proto,
+ +                         struct tc_to_netdev *tc)
+ +#endif
+ +{
+ +      if (handle != TC_H_ROOT || tc->type != TC_SETUP_MQPRIO)
+ +              return -EINVAL;
+ +      return i40e_setup_tc(netdev, tc->tc);
+ +}
+ +
   /**
    * i40e_open - Called when a network interface is made active
    * @netdev: network interface device structure
@@@ -5375,10 -5368,11 +5395,12 @@@ int i40e_open(struct net_device *netdev
         vxlan_get_rx_port(netdev);
   #endif
   #ifdef CONFIG_I40E_GENEVE
- -      geneve_get_rx_port(netdev);
+ +      if (pf->flags & I40E_FLAG_GENEVE_OFFLOAD_CAPABLE)
+ +              geneve_get_rx_port(netdev);
   #endif
   
+       i40e_notify_client_of_netdev_open(vsi);
+ 
         return 0;
   }
   
@@@ -5741,8 -5735,8 +5763,8 @@@ static int i40e_handle_lldp_event(struc
         if (ret)
                 goto exit;
   
- -      /* Wait for the PF's Tx queues to be disabled */
- -      ret = i40e_pf_wait_txq_disabled(pf);
+ +      /* Wait for the PF's queues to be disabled */
+ +      ret = i40e_pf_wait_queues_disabled(pf);
         if (ret) {
                 /* Schedule PF reset to recover */
                 set_bit(__I40E_PF_RESET_REQUESTED, &pf->state);
@@@ -6043,6 -6037,7 +6065,7 @@@ static void i40e_vsi_link_event(struct 
         case I40E_VSI_SRIOV:
         case I40E_VSI_VMDQ2:
         case I40E_VSI_CTRL:
+       case I40E_VSI_IWARP:
         case I40E_VSI_MIRROR:
         default:
                 /* there is no notification for other VSIs */
@@@ -6272,7 -6267,6 +6295,7 @@@ static void i40e_clean_adminq_subtask(s
                 if (hw->debug_mask & I40E_DEBUG_AQ)
                         dev_info(&pf->pdev->dev, "ARQ Overflow Error detected\n");
                 val &= ~I40E_PF_ARQLEN_ARQOVFL_MASK;
+ +              pf->arq_overflows++;
         }
         if (val & I40E_PF_ARQLEN_ARQCRIT_MASK) {
                 if (hw->debug_mask & I40E_DEBUG_AQ)
@@@ -6348,9 -6342,7 +6371,9 @@@
                 case i40e_aqc_opc_nvm_erase:
                 case i40e_aqc_opc_nvm_update:
                 case i40e_aqc_opc_oem_post_update:
- -                      i40e_debug(&pf->hw, I40E_DEBUG_NVM, "ARQ NVM operation completed\n");
+ +                      i40e_debug(&pf->hw, I40E_DEBUG_NVM,
+ +                                 "ARQ NVM operation 0x%04x completed\n",
+ +                                 opcode);
                         break;
                 default:
                         dev_info(&pf->pdev->dev,
@@@ -6834,12 -6826,12 +6857,12 @@@ static void i40e_reset_and_rebuild(stru
         if (ret)
                 goto end_core_reset;
   
- -      /* driver is only interested in link up/down and module qualification
- -       * reports from firmware
+ +      /* The driver only wants link up/down and module qualification
+ +       * reports from firmware.  Note the negative logic.
          */
         ret = i40e_aq_set_phy_int_mask(&pf->hw,
- -                                     I40E_AQ_EVENT_LINK_UPDOWN |
- -                                     I40E_AQ_EVENT_MODULE_QUAL_FAIL, NULL);
+ +                                     ~(I40E_AQ_EVENT_LINK_UPDOWN |
+ +                                       I40E_AQ_EVENT_MODULE_QUAL_FAIL), NULL);
         if (ret)
                 dev_info(&pf->pdev->dev, "set phy mask fail, err %s aq_err %s\n",
                          i40e_stat_str(&pf->hw, ret),
@@@ -6920,7 -6912,8 +6943,7 @@@
                 wr32(hw, I40E_REG_MSS, val);
         }
   
- -      if (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 33)) ||
- -          (pf->hw.aq.fw_maj_ver < 4)) {
+ +      if (pf->flags & I40E_FLAG_RESTART_AUTONEG) {
                 msleep(75);
                 ret = i40e_aq_set_link_restart_an(&pf->hw, true, NULL);
                 if (ret)
@@@ -7109,13 -7102,12 +7132,13 @@@ static void i40e_sync_udp_filters_subta
                                 ret = i40e_aq_del_udp_tunnel(hw, i, NULL);
   
                         if (ret) {
- -                              dev_info(&pf->pdev->dev,
- -                                       "%s vxlan port %d, index %d failed, err %s aq_err %s\n",
- -                                       port ? "add" : "delete",
- -                                       ntohs(port), i,
- -                                       i40e_stat_str(&pf->hw, ret),
- -                                       i40e_aq_str(&pf->hw,
+ +                              dev_dbg(&pf->pdev->dev,
+ +                                      "%s %s port %d, index %d failed, err %s aq_err %s\n",
+ +                                      pf->udp_ports[i].type ? "vxlan" : "geneve",
+ +                                      port ? "add" : "delete",
+ +                                      ntohs(port), i,
+ +                                      i40e_stat_str(&pf->hw, ret),
+ +                                      i40e_aq_str(&pf->hw,
                                                     pf->hw.aq.asq_last_status));
                                 pf->udp_ports[i].index = 0;
                         }
@@@ -7142,12 -7134,12 +7165,13 @@@ static void i40e_service_task(struct wo
         }
   
         i40e_detect_recover_hung(pf);
+ +      i40e_sync_filters_subtask(pf);
         i40e_reset_subtask(pf);
         i40e_handle_mdd_event(pf);
         i40e_vc_process_vflr_event(pf);
         i40e_watchdog_subtask(pf);
         i40e_fdir_reinit_subtask(pf);
+       i40e_client_subtask(pf);
         i40e_sync_filters_subtask(pf);
         i40e_sync_udp_filters_subtask(pf);
         i40e_clean_adminq_subtask(pf);
@@@ -7322,6 -7314,8 +7346,6 @@@ static int i40e_vsi_mem_alloc(struct i4
         set_bit(__I40E_DOWN, &vsi->state);
         vsi->flags = 0;
         vsi->idx = vsi_idx;
- -      vsi->rx_itr_setting = pf->rx_itr_default;
- -      vsi->tx_itr_setting = pf->tx_itr_default;
         vsi->int_rate_limit = 0;
         vsi->rss_table_size = (vsi->type == I40E_VSI_MAIN) ?
                                 pf->rss_table_size : 64;
@@@ -7488,7 -7482,8 +7512,7 @@@ static int i40e_alloc_rings(struct i40e
                 tx_ring->dcb_tc = 0;
                 if (vsi->back->flags & I40E_FLAG_WB_ON_ITR_CAPABLE)
                         tx_ring->flags = I40E_TXR_FLAGS_WB_ON_ITR;
- -              if (vsi->back->flags & I40E_FLAG_OUTER_UDP_CSUM_CAPABLE)
- -                      tx_ring->flags |= I40E_TXR_FLAGS_OUTER_UDP_CSUM;
+ +              tx_ring->tx_itr_setting = pf->tx_itr_default;
                 vsi->tx_rings[i] = tx_ring;
   
                 rx_ring = &tx_ring[1];
@@@ -7505,7 -7500,6 +7529,7 @@@
                         set_ring_16byte_desc_enabled(rx_ring);
                 else
                         clear_ring_16byte_desc_enabled(rx_ring);
+ +              rx_ring->rx_itr_setting = pf->rx_itr_default;
                 vsi->rx_rings[i] = rx_ring;
         }
   
@@@ -7550,6 -7544,7 +7574,7 @@@ static int i40e_init_msix(struct i40e_p
         int vectors_left;
         int v_budget, i;
         int v_actual;
+       int iwarp_requested = 0;
   
         if (!(pf->flags & I40E_FLAG_MSIX_ENABLED))
                 return -ENODEV;
@@@ -7563,6 -7558,7 +7588,7 @@@
          *              is governed by number of cpus in the system.
          *      - assumes symmetric Tx/Rx pairing
          *   - The number of VMDq pairs
+        *   - The CPU count within the NUMA node if iWARP is enabled
   #ifdef I40E_FCOE
          *   - The number of FCOE qps.
   #endif
@@@ -7609,6 -7605,16 +7635,16 @@@
         }
   
   #endif
+       /* can we reserve enough for iWARP? */
+       if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
+               if (!vectors_left)
+                       pf->num_iwarp_msix = 0;
+               else if (vectors_left < pf->num_iwarp_msix)
+                       pf->num_iwarp_msix = 1;
+               v_budget += pf->num_iwarp_msix;
+               vectors_left -= pf->num_iwarp_msix;
+       }
+ 
         /* any vectors left over go for VMDq support */
         if (pf->flags & I40E_FLAG_VMDQ_ENABLED) {
                 int vmdq_vecs_wanted = pf->num_vmdq_vsis * pf->num_vmdq_qps;
@@@ -7643,6 -7649,8 +7679,8 @@@
                  * of these features based on the policy and at the end disable
                  * the features that did not get any vectors.
                  */
+               iwarp_requested = pf->num_iwarp_msix;
+               pf->num_iwarp_msix = 0;
   #ifdef I40E_FCOE
                 pf->num_fcoe_qps = 0;
                 pf->num_fcoe_msix = 0;
@@@ -7681,17 -7689,33 +7719,33 @@@
                         pf->num_lan_msix = 1;
                         break;
                 case 3:
+                       if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
+                               pf->num_lan_msix = 1;
+                               pf->num_iwarp_msix = 1;
+                       } else {
+                               pf->num_lan_msix = 2;
+                       }
   #ifdef I40E_FCOE
                         /* give one vector to FCoE */
                         if (pf->flags & I40E_FLAG_FCOE_ENABLED) {
                                 pf->num_lan_msix = 1;
                                 pf->num_fcoe_msix = 1;
                         }
- #else
-                       pf->num_lan_msix = 2;
   #endif
                         break;
                 default:
+                       if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
+                               pf->num_iwarp_msix = min_t(int, (vec / 3),
+                                                iwarp_requested);
+                               pf->num_vmdq_vsis = min_t(int, (vec / 3),
+                                                 I40E_DEFAULT_NUM_VMDQ_VSI);
+                       } else {
+                               pf->num_vmdq_vsis = min_t(int, (vec / 2),
+                                                 I40E_DEFAULT_NUM_VMDQ_VSI);
+                       }
+                       pf->num_lan_msix = min_t(int,
+                              (vec - (pf->num_iwarp_msix + pf->num_vmdq_vsis)),
+                                                             pf->num_lan_msix);
   #ifdef I40E_FCOE
                         /* give one vector to FCoE */
                         if (pf->flags & I40E_FLAG_FCOE_ENABLED) {
@@@ -7699,8 -7723,6 +7753,6 @@@
                                 vec--;
                         }
   #endif
-                       /* give the rest to the PF */
-                       pf->num_lan_msix = min_t(int, vec, pf->num_lan_qps);
                         break;
                 }
         }
@@@ -7710,6 -7732,12 +7762,12 @@@
                 dev_info(&pf->pdev->dev, "VMDq disabled, not enough MSI-X vectors\n");
                 pf->flags &= ~I40E_FLAG_VMDQ_ENABLED;
         }
+ 
+       if ((pf->flags & I40E_FLAG_IWARP_ENABLED) &&
+           (pf->num_iwarp_msix == 0)) {
+               dev_info(&pf->pdev->dev, "IWARP disabled, not enough MSI-X vectors\n");
+               pf->flags &= ~I40E_FLAG_IWARP_ENABLED;
+       }
   #ifdef I40E_FCOE
   
         if ((pf->flags & I40E_FLAG_FCOE_ENABLED) && (pf->num_fcoe_msix == 0)) {
@@@ -7801,6 -7829,7 +7859,7 @@@ static int i40e_init_interrupt_scheme(s
                 vectors = i40e_init_msix(pf);
                 if (vectors < 0) {
                         pf->flags &= ~(I40E_FLAG_MSIX_ENABLED   |
+                                      I40E_FLAG_IWARP_ENABLED  |
   #ifdef I40E_FCOE
                                        I40E_FLAG_FCOE_ENABLED   |
   #endif
@@@ -7882,7 -7911,7 +7941,7 @@@ static int i40e_setup_misc_vector(struc
   
         i40e_flush(hw);
   
- -      i40e_irq_dynamic_enable_icr0(pf);
+ +      i40e_irq_dynamic_enable_icr0(pf, true);
   
         return err;
   }
@@@ -7965,52 -7994,6 +8024,52 @@@ static int i40e_vsi_config_rss(struct i
         return ret;
   }
   
+ +/**
+ + * i40e_get_rss_aq - Get RSS keys and lut by using AQ commands
+ + * @vsi: Pointer to vsi structure
+ + * @seed: Buffter to store the hash keys
+ + * @lut: Buffer to store the lookup table entries
+ + * @lut_size: Size of buffer to store the lookup table entries
+ + *
+ + * Return 0 on success, negative on failure
+ + */
+ +static int i40e_get_rss_aq(struct i40e_vsi *vsi, const u8 *seed,
+ +                         u8 *lut, u16 lut_size)
+ +{
+ +      struct i40e_pf *pf = vsi->back;
+ +      struct i40e_hw *hw = &pf->hw;
+ +      int ret = 0;
+ +
+ +      if (seed) {
+ +              ret = i40e_aq_get_rss_key(hw, vsi->id,
+ +                      (struct i40e_aqc_get_set_rss_key_data *)seed);
+ +              if (ret) {
+ +                      dev_info(&pf->pdev->dev,
+ +                               "Cannot get RSS key, err %s aq_err %s\n",
+ +                               i40e_stat_str(&pf->hw, ret),
+ +                               i40e_aq_str(&pf->hw,
+ +                                           pf->hw.aq.asq_last_status));
+ +                      return ret;
+ +              }
+ +      }
+ +
+ +      if (lut) {
+ +              bool pf_lut = vsi->type == I40E_VSI_MAIN ? true : false;
+ +
+ +              ret = i40e_aq_get_rss_lut(hw, vsi->id, pf_lut, lut, lut_size);
+ +              if (ret) {
+ +                      dev_info(&pf->pdev->dev,
+ +                               "Cannot get RSS lut, err %s aq_err %s\n",
+ +                               i40e_stat_str(&pf->hw, ret),
+ +                               i40e_aq_str(&pf->hw,
+ +                                           pf->hw.aq.asq_last_status));
+ +                      return ret;
+ +              }
+ +      }
+ +
+ +      return ret;
+ +}
+ +
   /**
    * i40e_config_rss_reg - Configure RSS keys and lut by writing registers
    * @vsi: Pointer to vsi structure
@@@ -8032,7 -8015,7 +8091,7 @@@ static int i40e_config_rss_reg(struct i
                 u32 *seed_dw = (u32 *)seed;
   
                 for (i = 0; i <= I40E_PFQF_HKEY_MAX_INDEX; i++)
- -                      wr32(hw, I40E_PFQF_HKEY(i), seed_dw[i]);
+ +                      i40e_write_rx_ctl(hw, I40E_PFQF_HKEY(i), seed_dw[i]);
         }
   
         if (lut) {
@@@ -8069,7 -8052,7 +8128,7 @@@ static int i40e_get_rss_reg(struct i40e
                 u32 *seed_dw = (u32 *)seed;
   
                 for (i = 0; i <= I40E_PFQF_HKEY_MAX_INDEX; i++)
- -                      seed_dw[i] = rd32(hw, I40E_PFQF_HKEY(i));
+ +                      seed_dw[i] = i40e_read_rx_ctl(hw, I40E_PFQF_HKEY(i));
         }
         if (lut) {
                 u32 *lut_dw = (u32 *)lut;
@@@ -8113,12 -8096,7 +8172,12 @@@ int i40e_config_rss(struct i40e_vsi *vs
    */
   int i40e_get_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size)
   {
- -      return i40e_get_rss_reg(vsi, seed, lut, lut_size);
+ +      struct i40e_pf *pf = vsi->back;
+ +
+ +      if (pf->flags & I40E_FLAG_RSS_AQ_CAPABLE)
+ +              return i40e_get_rss_aq(vsi, seed, lut, lut_size);
+ +      else
+ +              return i40e_get_rss_reg(vsi, seed, lut, lut_size);
   }
   
   /**
@@@ -8152,19 -8130,19 +8211,19 @@@ static int i40e_pf_config_rss(struct i4
         int ret;
   
         /* By default we enable TCP/UDP with IPv4/IPv6 ptypes */
- -      hena = (u64)rd32(hw, I40E_PFQF_HENA(0)) |
- -              ((u64)rd32(hw, I40E_PFQF_HENA(1)) << 32);
+ +      hena = (u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(0)) |
+ +              ((u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1)) << 32);
         hena |= i40e_pf_get_default_rss_hena(pf);
   
- -      wr32(hw, I40E_PFQF_HENA(0), (u32)hena);
- -      wr32(hw, I40E_PFQF_HENA(1), (u32)(hena >> 32));
+ +      i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), (u32)hena);
+ +      i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), (u32)(hena >> 32));
   
         /* Determine the RSS table size based on the hardware capabilities */
- -      reg_val = rd32(hw, I40E_PFQF_CTL_0);
+ +      reg_val = i40e_read_rx_ctl(hw, I40E_PFQF_CTL_0);
         reg_val = (pf->rss_table_size == 512) ?
                         (reg_val | I40E_PFQF_CTL_0_HASHLUTSIZE_512) :
                         (reg_val & ~I40E_PFQF_CTL_0_HASHLUTSIZE_512);
- -      wr32(hw, I40E_PFQF_CTL_0, reg_val);
+ +      i40e_write_rx_ctl(hw, I40E_PFQF_CTL_0, reg_val);
   
         /* Determine the RSS size of the VSI */
         if (!vsi->rss_size)
@@@ -8448,32 -8426,18 +8507,38 @@@ static int i40e_sw_init(struct i40e_pf 
                                  pf->hw.func_caps.fd_filters_best_effort;
         }
   
+ +      if (i40e_is_mac_710(&pf->hw) &&
+ +          (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 33)) ||
+ +          (pf->hw.aq.fw_maj_ver < 4))) {
+ +              pf->flags |= I40E_FLAG_RESTART_AUTONEG;
+ +              /* No DCB support  for FW < v4.33 */
+ +              pf->flags |= I40E_FLAG_NO_DCB_SUPPORT;
+ +      }
+ +
+ +      /* Disable FW LLDP if FW < v4.3 */
+ +      if (i40e_is_mac_710(&pf->hw) &&
+ +          (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 3)) ||
+ +          (pf->hw.aq.fw_maj_ver < 4)))
+ +              pf->flags |= I40E_FLAG_STOP_FW_LLDP;
+ +
+ +      /* Use the FW Set LLDP MIB API if FW > v4.40 */
+ +      if (i40e_is_mac_710(&pf->hw) &&
+ +          (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver >= 40)) ||
+ +          (pf->hw.aq.fw_maj_ver >= 5)))
+ +              pf->flags |= I40E_FLAG_USE_SET_LLDP_MIB;
+ +
         if (pf->hw.func_caps.vmdq) {
                 pf->num_vmdq_vsis = I40E_DEFAULT_NUM_VMDQ_VSI;
                 pf->flags |= I40E_FLAG_VMDQ_ENABLED;
                 pf->num_vmdq_qps = i40e_default_queues_per_vmdq(pf);
         }
   
+       if (pf->hw.func_caps.iwarp) {
+               pf->flags |= I40E_FLAG_IWARP_ENABLED;
+               /* IWARP needs one extra vector for CQP just like MISC.*/
+               pf->num_iwarp_msix = (int)num_online_cpus() + 1;
+       }
+ 
   #ifdef I40E_FCOE
         i40e_init_pf_fcoe(pf);
   
@@@ -8494,19 -8458,8 +8559,19 @@@
                              I40E_FLAG_OUTER_UDP_CSUM_CAPABLE |
                              I40E_FLAG_WB_ON_ITR_CAPABLE |
                              I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE |
+ +                           I40E_FLAG_100M_SGMII_CAPABLE |
+ +                           I40E_FLAG_USE_SET_LLDP_MIB |
                              I40E_FLAG_GENEVE_OFFLOAD_CAPABLE;
+ +      } else if ((pf->hw.aq.api_maj_ver > 1) ||
+ +                 ((pf->hw.aq.api_maj_ver == 1) &&
+ +                  (pf->hw.aq.api_min_ver > 4))) {
+ +              /* Supported in FW API version higher than 1.4 */
+ +              pf->flags |= I40E_FLAG_GENEVE_OFFLOAD_CAPABLE;
+ +              pf->auto_disable_flags = I40E_FLAG_HW_ATR_EVICT_CAPABLE;
+ +      } else {
+ +              pf->auto_disable_flags = I40E_FLAG_HW_ATR_EVICT_CAPABLE;
         }
+ +
         pf->eeprom_version = 0xDEAD;
         pf->lan_veb = I40E_NO_VEB;
         pf->lan_vsi = I40E_NO_VSI;
@@@ -8642,6 -8595,9 +8707,6 @@@ static void i40e_add_vxlan_port(struct 
         u8 next_idx;
         u8 idx;
   
- -      if (sa_family == AF_INET6)
- -              return;
- -
         idx = i40e_get_udp_port_idx(pf, port);
   
         /* Check if port already exists */
@@@ -8681,6 -8637,9 +8746,6 @@@ static void i40e_del_vxlan_port(struct 
         struct i40e_pf *pf = vsi->back;
         u8 idx;
   
- -      if (sa_family == AF_INET6)
- -              return;
- -
         idx = i40e_get_udp_port_idx(pf, port);
   
         /* Check if port already exists */
@@@ -8714,7 -8673,7 +8779,7 @@@ static void i40e_add_geneve_port(struc
         u8 next_idx;
         u8 idx;
   
- -      if (sa_family == AF_INET6)
+ +      if (!(pf->flags & I40E_FLAG_GENEVE_OFFLOAD_CAPABLE))
                 return;
   
         idx = i40e_get_udp_port_idx(pf, port);
@@@ -8758,7 -8717,7 +8823,7 @@@ static void i40e_del_geneve_port(struc
         struct i40e_pf *pf = vsi->back;
         u8 idx;
   
- -      if (sa_family == AF_INET6)
+ +      if (!(pf->flags & I40E_FLAG_GENEVE_OFFLOAD_CAPABLE))
                 return;
   
         idx = i40e_get_udp_port_idx(pf, port);
@@@ -8996,7 -8955,7 +9061,7 @@@ static const struct net_device_ops i40e
   #ifdef CONFIG_NET_POLL_CONTROLLER
         .ndo_poll_controller    = i40e_netpoll,
   #endif
- -      .ndo_setup_tc           = i40e_setup_tc,
+ +      .ndo_setup_tc           = __i40e_setup_tc,
   #ifdef I40E_FCOE
         .ndo_fcoe_enable        = i40e_fcoe_enable,
         .ndo_fcoe_disable       = i40e_fcoe_disable,
@@@ -9048,15 -9007,11 +9113,15 @@@ static int i40e_config_netdev(struct i4
         np = netdev_priv(netdev);
         np->vsi = vsi;
   
- -      netdev->hw_enc_features |= NETIF_F_IP_CSUM       |
- -                                NETIF_F_RXCSUM         |
- -                                NETIF_F_GSO_UDP_TUNNEL |
- -                                NETIF_F_GSO_GRE        |
- -                                NETIF_F_TSO;
+ +      netdev->hw_enc_features |= NETIF_F_IP_CSUM             |
+ +                                 NETIF_F_IPV6_CSUM           |
+ +                                 NETIF_F_TSO                 |
+ +                                 NETIF_F_TSO6                |
+ +                                 NETIF_F_TSO_ECN             |
+ +                                 NETIF_F_GSO_GRE             |
+ +                                 NETIF_F_GSO_UDP_TUNNEL      |
+ +                                 NETIF_F_GSO_UDP_TUNNEL_CSUM |
+ +                                 0;
   
         netdev->features = NETIF_F_SG                  |
                            NETIF_F_IP_CSUM             |
@@@ -9077,8 -9032,6 +9142,8 @@@
   
         if (!(pf->flags & I40E_FLAG_MFP_ENABLED))
                 netdev->features |= NETIF_F_NTUPLE;
+ +      if (pf->flags & I40E_FLAG_OUTER_UDP_CSUM_CAPABLE)
+ +              netdev->features |= NETIF_F_GSO_UDP_TUNNEL_CSUM;
   
         /* copy netdev features into list of user selectable features */
         netdev->hw_features |= netdev->features;
@@@ -9328,6 -9281,13 +9393,13 @@@ static int i40e_add_vsi(struct i40e_vs
                                 cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB);
                 }
   
+               if (vsi->back->flags & I40E_FLAG_IWARP_ENABLED) {
+                       ctxt.info.valid_sections |=
+                               cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID);
+                       ctxt.info.queueing_opt_flags |=
+                                               I40E_AQ_VSI_QUE_OPT_TCP_ENA;
+               }
+ 
                 ctxt.info.valid_sections |= cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID);
                 ctxt.info.port_vlan_flags |= I40E_AQ_VSI_PVLAN_MODE_ALL;
                 if (pf->vf[vsi->vf_id].spoofchk) {
@@@ -9351,6 -9311,10 +9423,10 @@@
                 break;
   
   #endif /* I40E_FCOE */
+       case I40E_VSI_IWARP:
+               /* send down message to iWARP */
+               break;
+ 
         default:
                 return -ENODEV;
         }
@@@ -9583,15 -9547,10 +9659,15 @@@ vector_setup_out
    **/
   static struct i40e_vsi *i40e_vsi_reinit_setup(struct i40e_vsi *vsi)
   {
- -      struct i40e_pf *pf = vsi->back;
+ +      struct i40e_pf *pf;
         u8 enabled_tc;
         int ret;
   
+ +      if (!vsi)
+ +              return NULL;
+ +
+ +      pf = vsi->back;
+ +
         i40e_put_lump(pf->qp_pile, vsi->base_queue, vsi->idx);
         i40e_vsi_clear_rings(vsi);
   
@@@ -10092,13 -10051,13 +10168,13 @@@ static int i40e_add_veb(struct i40e_ve
   {
         struct i40e_pf *pf = veb->pf;
         bool is_default = veb->pf->cur_promisc;
- -      bool is_cloud = false;
+ +      bool enable_stats = !!(pf->flags & I40E_FLAG_VEB_STATS_ENABLED);
         int ret;
   
         /* get a VEB from the hardware */
         ret = i40e_aq_add_veb(&pf->hw, veb->uplink_seid, vsi->seid,
                               veb->enabled_tc, is_default,
- -                            is_cloud, &veb->seid, NULL);
+ +                            &veb->seid, enable_stats, NULL);
         if (ret) {
                 dev_info(&pf->pdev->dev,
                          "couldn't add VEB, err %s aq_err %s\n",
@@@ -10467,6 -10426,7 +10543,7 @@@ static void i40e_determine_queue_usage(
   
                 /* make sure all the fancies are disabled */
                 pf->flags &= ~(I40E_FLAG_RSS_ENABLED    |
+                              I40E_FLAG_IWARP_ENABLED  |
   #ifdef I40E_FCOE
                                I40E_FLAG_FCOE_ENABLED   |
   #endif
@@@ -10484,6 -10444,7 +10561,7 @@@
                 queues_left -= pf->num_lan_qps;
   
                 pf->flags &= ~(I40E_FLAG_RSS_ENABLED    |
+                              I40E_FLAG_IWARP_ENABLED  |
   #ifdef I40E_FCOE
                                I40E_FLAG_FCOE_ENABLED   |
   #endif
@@@ -10655,9 -10616,21 +10733,9 @@@ static void i40e_print_features(struct 
    **/
   static void i40e_get_platform_mac_addr(struct pci_dev *pdev, struct i40e_pf *pf)
   {
- -      struct device_node *dp = pci_device_to_OF_node(pdev);
- -      const unsigned char *addr;
- -      u8 *mac_addr = pf->hw.mac.addr;
- -
         pf->flags &= ~I40E_FLAG_PF_MAC;
- -      addr = of_get_mac_address(dp);
- -      if (addr) {
- -              ether_addr_copy(mac_addr, addr);
+ +      if (!eth_platform_get_mac_address(&pdev->dev, pf->hw.mac.addr))
                 pf->flags |= I40E_FLAG_PF_MAC;
- -#ifdef CONFIG_SPARC
- -      } else {
- -              ether_addr_copy(mac_addr, idprom->id_ethaddr);
- -              pf->flags |= I40E_FLAG_PF_MAC;
- -#endif /* CONFIG_SPARC */
- -      }
   }
   
   /**
@@@ -10680,6 -10653,7 +10758,6 @@@ static int i40e_probe(struct pci_dev *p
         u16 wol_nvm_bits;
         u16 link_status;
         int err;
- -      u32 len;
         u32 val;
         u32 i;
         u8 set_fc_aq_fail;
@@@ -10862,7 -10836,8 +10940,7 @@@
          * Ignore error return codes because if it was already disabled via
          * hardware settings this will fail
          */
- -      if (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 3)) ||
- -          (pf->hw.aq.fw_maj_ver < 4)) {
+ +      if (pf->flags & I40E_FLAG_STOP_FW_LLDP) {
                 dev_info(&pdev->dev, "Stopping firmware LLDP agent.\n");
                 i40e_aq_stop_lldp(hw, true, NULL);
         }
@@@ -10937,8 -10912,8 +11015,8 @@@
                 pf->num_alloc_vsi = pf->hw.func_caps.num_vsis;
   
         /* Set up the *vsi struct and our local tracking of the MAIN PF vsi. */
- -      len = sizeof(struct i40e_vsi *) * pf->num_alloc_vsi;
- -      pf->vsi = kzalloc(len, GFP_KERNEL);
+ +      pf->vsi = kcalloc(pf->num_alloc_vsi, sizeof(struct i40e_vsi *),
+ +                        GFP_KERNEL);
         if (!pf->vsi) {
                 err = -ENOMEM;
                 goto err_switch_setup;
@@@ -10985,12 -10960,12 +11063,12 @@@
                 }
         }
   
- -      /* driver is only interested in link up/down and module qualification
- -       * reports from firmware
+ +      /* The driver only wants link up/down and module qualification
+ +       * reports from firmware.  Note the negative logic.
          */
         err = i40e_aq_set_phy_int_mask(&pf->hw,
- -                                     I40E_AQ_EVENT_LINK_UPDOWN |
- -                                     I40E_AQ_EVENT_MODULE_QUAL_FAIL, NULL);
+ +                                     ~(I40E_AQ_EVENT_LINK_UPDOWN |
+ +                                       I40E_AQ_EVENT_MODULE_QUAL_FAIL), NULL);
         if (err)
                 dev_info(&pf->pdev->dev, "set phy mask fail, err %s aq_err %s\n",
                          i40e_stat_str(&pf->hw, err),
@@@ -11007,7 -10982,8 +11085,7 @@@
                 wr32(hw, I40E_REG_MSS, val);
         }
   
- -      if (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 33)) ||
- -          (pf->hw.aq.fw_maj_ver < 4)) {
+ +      if (pf->flags & I40E_FLAG_RESTART_AUTONEG) {
                 msleep(75);
                 err = i40e_aq_set_link_restart_an(&pf->hw, true, NULL);
                 if (err)
@@@ -11041,6 -11017,8 +11119,6 @@@
         if ((pf->flags & I40E_FLAG_SRIOV_ENABLED) &&
             (pf->flags & I40E_FLAG_MSIX_ENABLED) &&
             !test_bit(__I40E_BAD_EEPROM, &pf->state)) {
- -              u32 val;
- -
                 /* disable link interrupts for VFs */
                 val = rd32(hw, I40E_PFGEN_PORTMDIO_NUM);
                 val &= ~I40E_PFGEN_PORTMDIO_NUM_VFLINK_STAT_ENA_MASK;
@@@ -11059,7 -11037,17 +11137,17 @@@
         }
   #endif /* CONFIG_PCI_IOV */
   
-       pfs_found++;
+       if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
+               pf->iwarp_base_vector = i40e_get_lump(pf, pf->irq_pile,
+                                                     pf->num_iwarp_msix,
+                                                     I40E_IWARP_IRQ_PILE_ID);
+               if (pf->iwarp_base_vector < 0) {
+                       dev_info(&pdev->dev,
+                                "failed to get tracking for %d vectors for IWARP err=%d\n",
+                                pf->num_iwarp_msix, pf->iwarp_base_vector);
+                       pf->flags &= ~I40E_FLAG_IWARP_ENABLED;
+               }
+       }
   
         i40e_dbg_pf_init(pf);
   
@@@ -11070,6 -11058,12 +11158,12 @@@
         mod_timer(&pf->service_timer,
                   round_jiffies(jiffies + pf->service_timer_period));
   
+       /* add this PF to client device list and launch a client service task */
+       err = i40e_lan_add_device(pf);
+       if (err)
+               dev_info(&pdev->dev, "Failed to add PF to client API service list: %d\n",
+                        err);
+ 
   #ifdef I40E_FCOE
         /* create FCoE interface */
         i40e_fcoe_vsi_setup(pf);
@@@ -11151,10 -11145,6 +11245,10 @@@
         i40e_add_filter_to_drop_tx_flow_control_frames(&pf->hw,
                                                        pf->main_vsi_seid);
   
+ +      if ((pf->hw.device_id == I40E_DEV_ID_10G_BASE_T) ||
+ +          (pf->hw.device_id == I40E_DEV_ID_10G_BASE_T4))
+ +              pf->flags |= I40E_FLAG_HAVE_10GBASET_PHY;
+ +
         /* print a string summarizing features */
         i40e_print_features(pf);
   
@@@ -11211,11 -11201,10 +11305,11 @@@ static void i40e_remove(struct pci_dev 
         i40e_ptp_stop(pf);
   
         /* Disable RSS in hw */
- -      wr32(hw, I40E_PFQF_HENA(0), 0);
- -      wr32(hw, I40E_PFQF_HENA(1), 0);
+ +      i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), 0);
+ +      i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), 0);
   
         /* no more scheduling of any task */
+ +      set_bit(__I40E_SUSPENDED, &pf->state);
         set_bit(__I40E_DOWN, &pf->state);
         del_timer_sync(&pf->service_timer);
         cancel_work_sync(&pf->service_task);
@@@ -11245,9 -11234,16 +11339,16 @@@
         if (pf->vsi[pf->lan_vsi])
                 i40e_vsi_release(pf->vsi[pf->lan_vsi]);
   
+       /* remove attached clients */
+       ret_code = i40e_lan_del_device(pf);
+       if (ret_code) {
+               dev_warn(&pdev->dev, "Failed to delete client device: %d\n",
+                        ret_code);
+       }
+ 
         /* shutdown and destroy the HMC */
- -      if (pf->hw.hmc.hmc_obj) {
- -              ret_code = i40e_shutdown_lan_hmc(&pf->hw);
+ +      if (hw->hmc.hmc_obj) {
+ +              ret_code = i40e_shutdown_lan_hmc(hw);
                 if (ret_code)
                         dev_warn(&pdev->dev,
                                  "Failed to destroy the HMC resources: %d\n",
@@@ -11255,7 -11251,7 +11356,7 @@@
         }
   
         /* shutdown the adminq */
- -      ret_code = i40e_shutdown_adminq(&pf->hw);
+ +      ret_code = i40e_shutdown_adminq(hw);
         if (ret_code)
                 dev_warn(&pdev->dev,
                          "Failed to destroy the Admin Queue resources: %d\n",
@@@ -11283,7 -11279,7 +11384,7 @@@
         kfree(pf->qp_pile);
         kfree(pf->vsi);
   
- -      iounmap(pf->hw.hw_addr);
+ +      iounmap(hw->hw_addr);
         kfree(pf);
         pci_release_selected_regions(pdev,
                                      pci_select_bars(pdev, IORESOURCE_MEM));
@@@ -11518,16 -11514,6 +11619,16 @@@ static int __init i40e_init_module(void
                 i40e_driver_string, i40e_driver_version_str);
         pr_info("%s: %s\n", i40e_driver_name, i40e_copyright);
   
+ +      /* we will see if single thread per module is enough for now,
+ +       * it can't be any worse than using the system workqueue which
+ +       * was already single threaded
+ +       */
+ +      i40e_wq = create_singlethread_workqueue(i40e_driver_name);
+ +      if (!i40e_wq) {
+ +              pr_err("%s: Failed to create workqueue\n", i40e_driver_name);
+ +              return -ENOMEM;
+ +      }
+ +
         i40e_dbg_init();
         return pci_register_driver(&i40e_driver);
   }
@@@ -11542,7 -11528,6 +11643,7 @@@ module_init(i40e_init_module)
   static void __exit i40e_exit_module(void)
   {
         pci_unregister_driver(&i40e_driver);
+ +      destroy_workqueue(i40e_wq);
         i40e_dbg_exit();
   }
   module_exit(i40e_exit_module);
diff --combined drivers/net/ethernet/intel/i40e/i40e_type.h

index 0a0baf71041b050d3cda5ee902f94a8e33b81548,79e975d29a1e0b70eb4989c8f773da9590b8003c..3335f9d13374d154b6c64a75fc5646ebe3dad2dc
--- 1/drivers/net/ethernet/intel/i40e/i40e_type.h
--- 2/drivers/net/ethernet/intel/i40e/i40e_type.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_type.h
@@@ -78,7 -78,7 +78,7 @@@ enum i40e_debug_mask 
         I40E_DEBUG_DCB                  = 0x00000400,
         I40E_DEBUG_DIAG                 = 0x00000800,
         I40E_DEBUG_FD                   = 0x00001000,
- 
+       I40E_DEBUG_IWARP                = 0x00F00000,
         I40E_DEBUG_AQ_MESSAGE           = 0x01000000,
         I40E_DEBUG_AQ_DESCRIPTOR        = 0x02000000,
         I40E_DEBUG_AQ_DESC_BUFFER       = 0x04000000,
@@@ -90,22 -90,6 +90,22 @@@
         I40E_DEBUG_ALL                  = 0xFFFFFFFF
   };
   
+ +#define I40E_MDIO_STCODE                0
+ +#define I40E_MDIO_OPCODE_ADDRESS        0
+ +#define I40E_MDIO_OPCODE_WRITE          I40E_MASK(1, \
+ +                                                I40E_GLGEN_MSCA_OPCODE_SHIFT)
+ +#define I40E_MDIO_OPCODE_READ_INC_ADDR  I40E_MASK(2, \
+ +                                                I40E_GLGEN_MSCA_OPCODE_SHIFT)
+ +#define I40E_MDIO_OPCODE_READ           I40E_MASK(3, \
+ +                                                I40E_GLGEN_MSCA_OPCODE_SHIFT)
+ +
+ +#define I40E_PHY_COM_REG_PAGE                   0x1E
+ +#define I40E_PHY_LED_LINK_MODE_MASK             0xF0
+ +#define I40E_PHY_LED_MANUAL_ON                  0x100
+ +#define I40E_PHY_LED_PROV_REG_1                 0xC430
+ +#define I40E_PHY_LED_MODE_MASK                  0xFFFF
+ +#define I40E_PHY_LED_MODE_ORIG                  0x80000000
+ +
   /* These are structs for managing the hardware information and the operations.
    * The structures of function pointers are filled out at init time when we
    * know for sure exactly which hardware we're working with.  This gives us the
@@@ -160,6 -144,7 +160,7 @@@ enum i40e_vsi_type 
         I40E_VSI_MIRROR = 5,
         I40E_VSI_SRIOV  = 6,
         I40E_VSI_FDIR   = 7,
+       I40E_VSI_IWARP  = 8,
         I40E_VSI_TYPE_UNKNOWN
   };
   
@@@ -1114,10 -1099,6 +1115,10 @@@ enum i40e_filter_program_desc_pcmd 
                                          I40E_TXD_FLTR_QW1_CMD_SHIFT)
   #define I40E_TXD_FLTR_QW1_ATR_MASK    BIT_ULL(I40E_TXD_FLTR_QW1_ATR_SHIFT)
   
+ +#define I40E_TXD_FLTR_QW1_ATR_SHIFT   (0xEULL + \
+ +                                       I40E_TXD_FLTR_QW1_CMD_SHIFT)
+ +#define I40E_TXD_FLTR_QW1_ATR_MASK    BIT_ULL(I40E_TXD_FLTR_QW1_ATR_SHIFT)
+ +
   #define I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT 20
   #define I40E_TXD_FLTR_QW1_CNTINDEX_MASK       (0x1FFUL << \
                                          I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT)
diff --combined drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c

index acd2693a4e97d9747ea446d4c499dd44f00918cf,bf35b64f6a4a0094b5c7edec399909898acd86f4..816c6bbf70931a98142085302ba2cbb41355072d
--- 1/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
--- 2/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@@ -1,7 -1,7 +1,7 @@@
   /*******************************************************************************
    *
    * Intel Ethernet Controller XL710 Family Linux Driver
- - * Copyright(c) 2013 - 2015 Intel Corporation.
+ + * Copyright(c) 2013 - 2016 Intel Corporation.
    *
    * This program is free software; you can redistribute it and/or modify it
    * under the terms and conditions of the GNU General Public License,
@@@ -351,6 -351,136 +351,136 @@@ irq_list_done
         i40e_flush(hw);
   }
   
+ /**
+  * i40e_release_iwarp_qvlist
+  * @vf: pointer to the VF.
+  *
+  **/
+ static void i40e_release_iwarp_qvlist(struct i40e_vf *vf)
+ {
+       struct i40e_pf *pf = vf->pf;
+       struct i40e_virtchnl_iwarp_qvlist_info *qvlist_info = vf->qvlist_info;
+       u32 msix_vf;
+       u32 i;
+ 
+       if (!vf->qvlist_info)
+               return;
+ 
+       msix_vf = pf->hw.func_caps.num_msix_vectors_vf;
+       for (i = 0; i < qvlist_info->num_vectors; i++) {
+               struct i40e_virtchnl_iwarp_qv_info *qv_info;
+               u32 next_q_index, next_q_type;
+               struct i40e_hw *hw = &pf->hw;
+               u32 v_idx, reg_idx, reg;
+ 
+               qv_info = &qvlist_info->qv_info[i];
+               if (!qv_info)
+                       continue;
+               v_idx = qv_info->v_idx;
+               if (qv_info->ceq_idx != I40E_QUEUE_INVALID_IDX) {
+                       /* Figure out the queue after CEQ and make that the
+                        * first queue.
+                        */
+                       reg_idx = (msix_vf - 1) * vf->vf_id + qv_info->ceq_idx;
+                       reg = rd32(hw, I40E_VPINT_CEQCTL(reg_idx));
+                       next_q_index = (reg & I40E_VPINT_CEQCTL_NEXTQ_INDX_MASK)
+                                       >> I40E_VPINT_CEQCTL_NEXTQ_INDX_SHIFT;
+                       next_q_type = (reg & I40E_VPINT_CEQCTL_NEXTQ_TYPE_MASK)
+                                       >> I40E_VPINT_CEQCTL_NEXTQ_TYPE_SHIFT;
+ 
+                       reg_idx = ((msix_vf - 1) * vf->vf_id) + (v_idx - 1);
+                       reg = (next_q_index &
+                              I40E_VPINT_LNKLSTN_FIRSTQ_INDX_MASK) |
+                              (next_q_type <<
+                              I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_SHIFT);
+ 
+                       wr32(hw, I40E_VPINT_LNKLSTN(reg_idx), reg);
+               }
+       }
+       kfree(vf->qvlist_info);
+       vf->qvlist_info = NULL;
+ }
+ 
+ /**
+  * i40e_config_iwarp_qvlist
+  * @vf: pointer to the VF info
+  * @qvlist_info: queue and vector list
+  *
+  * Return 0 on success or < 0 on error
+  **/
+ static int i40e_config_iwarp_qvlist(struct i40e_vf *vf,
+                                   struct i40e_virtchnl_iwarp_qvlist_info *qvlist_info)
+ {
+       struct i40e_pf *pf = vf->pf;
+       struct i40e_hw *hw = &pf->hw;
+       struct i40e_virtchnl_iwarp_qv_info *qv_info;
+       u32 v_idx, i, reg_idx, reg;
+       u32 next_q_idx, next_q_type;
+       u32 msix_vf, size;
+ 
+       size = sizeof(struct i40e_virtchnl_iwarp_qvlist_info) +
+              (sizeof(struct i40e_virtchnl_iwarp_qv_info) *
+                                               (qvlist_info->num_vectors - 1));
+       vf->qvlist_info = kzalloc(size, GFP_KERNEL);
+       vf->qvlist_info->num_vectors = qvlist_info->num_vectors;
+ 
+       msix_vf = pf->hw.func_caps.num_msix_vectors_vf;
+       for (i = 0; i < qvlist_info->num_vectors; i++) {
+               qv_info = &qvlist_info->qv_info[i];
+               if (!qv_info)
+                       continue;
+               v_idx = qv_info->v_idx;
+ 
+               /* Validate vector id belongs to this vf */
+               if (!i40e_vc_isvalid_vector_id(vf, v_idx))
+                       goto err;
+ 
+               vf->qvlist_info->qv_info[i] = *qv_info;
+ 
+               reg_idx = ((msix_vf - 1) * vf->vf_id) + (v_idx - 1);
+               /* We might be sharing the interrupt, so get the first queue
+                * index and type, push it down the list by adding the new
+                * queue on top. Also link it with the new queue in CEQCTL.
+                */
+               reg = rd32(hw, I40E_VPINT_LNKLSTN(reg_idx));
+               next_q_idx = ((reg & I40E_VPINT_LNKLSTN_FIRSTQ_INDX_MASK) >>
+                               I40E_VPINT_LNKLSTN_FIRSTQ_INDX_SHIFT);
+               next_q_type = ((reg & I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_MASK) >>
+                               I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_SHIFT);
+ 
+               if (qv_info->ceq_idx != I40E_QUEUE_INVALID_IDX) {
+                       reg_idx = (msix_vf - 1) * vf->vf_id + qv_info->ceq_idx;
+                       reg = (I40E_VPINT_CEQCTL_CAUSE_ENA_MASK |
+                       (v_idx << I40E_VPINT_CEQCTL_MSIX_INDX_SHIFT) |
+                       (qv_info->itr_idx << I40E_VPINT_CEQCTL_ITR_INDX_SHIFT) |
+                       (next_q_type << I40E_VPINT_CEQCTL_NEXTQ_TYPE_SHIFT) |
+                       (next_q_idx << I40E_VPINT_CEQCTL_NEXTQ_INDX_SHIFT));
+                       wr32(hw, I40E_VPINT_CEQCTL(reg_idx), reg);
+ 
+                       reg_idx = ((msix_vf - 1) * vf->vf_id) + (v_idx - 1);
+                       reg = (qv_info->ceq_idx &
+                              I40E_VPINT_LNKLSTN_FIRSTQ_INDX_MASK) |
+                              (I40E_QUEUE_TYPE_PE_CEQ <<
+                              I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_SHIFT);
+                       wr32(hw, I40E_VPINT_LNKLSTN(reg_idx), reg);
+               }
+ 
+               if (qv_info->aeq_idx != I40E_QUEUE_INVALID_IDX) {
+                       reg = (I40E_VPINT_AEQCTL_CAUSE_ENA_MASK |
+                       (v_idx << I40E_VPINT_AEQCTL_MSIX_INDX_SHIFT) |
+                       (qv_info->itr_idx << I40E_VPINT_AEQCTL_ITR_INDX_SHIFT));
+ 
+                       wr32(hw, I40E_VPINT_AEQCTL(vf->vf_id), reg);
+               }
+       }
+ 
+       return 0;
+ err:
+       kfree(vf->qvlist_info);
+       vf->qvlist_info = NULL;
+       return -EINVAL;
+ }
+ 
   /**
    * i40e_config_vsi_tx_queue
    * @vf: pointer to the VF info
@@@ -461,7 -591,7 +591,7 @@@ static int i40e_config_vsi_rx_queue(str
                 rx_ctx.hbuff = info->hdr_size >> I40E_RXQ_CTX_HBUFF_SHIFT;
   
                 /* set splitalways mode 10b */
- -              rx_ctx.dtype = 0x2;
+ +              rx_ctx.dtype = I40E_RX_DTYPE_HEADER_SPLIT;
         }
   
         /* databuffer length validation */
@@@ -602,8 -732,8 +732,8 @@@ static void i40e_enable_vf_mappings(str
          * that VF queues be mapped using this method, even when they are
          * contiguous in real life
          */
- -      wr32(hw, I40E_VSILAN_QBASE(vf->lan_vsi_id),
- -           I40E_VSILAN_QBASE_VSIQTABLE_ENA_MASK);
+ +      i40e_write_rx_ctl(hw, I40E_VSILAN_QBASE(vf->lan_vsi_id),
+ +                        I40E_VSILAN_QBASE_VSIQTABLE_ENA_MASK);
   
         /* enable VF vplan_qtable mappings */
         reg = I40E_VPLAN_MAPENA_TXRX_ENA_MASK;
@@@ -630,8 -760,7 +760,8 @@@
                                                       (j * 2) + 1);
                         reg |= qid << 16;
                 }
- -              wr32(hw, I40E_VSILAN_QTABLE(j, vf->lan_vsi_id), reg);
+ +              i40e_write_rx_ctl(hw, I40E_VSILAN_QTABLE(j, vf->lan_vsi_id),
+ +                                reg);
         }
   
         i40e_flush(hw);
@@@ -850,9 -979,11 +980,11 @@@ complete_reset
         /* reallocate VF resources to reset the VSI state */
         i40e_free_vf_res(vf);
         if (!i40e_alloc_vf_res(vf)) {
+               int abs_vf_id = vf->vf_id + hw->func_caps.vf_base_id;
                 i40e_enable_vf_mappings(vf);
                 set_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states);
                 clear_bit(I40E_VF_STAT_DISABLED, &vf->vf_states);
+               i40e_notify_client_of_vf_reset(pf, abs_vf_id);
         }
         /* tell the VF the reset is done */
         wr32(hw, I40E_VFGEN_RSTAT1(vf->vf_id), I40E_VFR_VFACTIVE);
@@@ -877,11 -1008,7 +1009,7 @@@ void i40e_free_vfs(struct i40e_pf *pf
         while (test_and_set_bit(__I40E_VF_DISABLE, &pf->state))
                 usleep_range(1000, 2000);
   
-       for (i = 0; i < pf->num_alloc_vfs; i++)
-               if (test_bit(I40E_VF_STAT_INIT, &pf->vf[i].vf_states))
-                       i40e_vsi_control_rings(pf->vsi[pf->vf[i].lan_vsi_idx],
-                                              false);
- 
+       i40e_notify_client_of_vf_enable(pf, 0);
         for (i = 0; i < pf->num_alloc_vfs; i++)
                 if (test_bit(I40E_VF_STAT_INIT, &pf->vf[i].vf_states))
                         i40e_vsi_control_rings(pf->vsi[pf->vf[i].lan_vsi_idx],
@@@ -953,6 -1080,7 +1081,7 @@@ int i40e_alloc_vfs(struct i40e_pf *pf, 
                         goto err_iov;
                 }
         }
+       i40e_notify_client_of_vf_enable(pf, num_alloc_vfs);
         /* allocate memory */
         vfs = kcalloc(num_alloc_vfs, sizeof(struct i40e_vf), GFP_KERNEL);
         if (!vfs) {
@@@ -981,7 -1109,7 +1110,7 @@@ err_alloc
                 i40e_free_vfs(pf);
   err_iov:
         /* Re-enable interrupt 0. */
- -      i40e_irq_dynamic_enable_icr0(pf);
+ +      i40e_irq_dynamic_enable_icr0(pf, false);
         return ret;
   }
   
@@@ -1206,6 -1334,13 +1335,13 @@@ static int i40e_vc_get_vf_resources_msg
         vsi = pf->vsi[vf->lan_vsi_idx];
         if (!vsi->info.pvid)
                 vfres->vf_offload_flags |= I40E_VIRTCHNL_VF_OFFLOAD_VLAN;
+ 
+       if (i40e_vf_client_capable(pf, vf->vf_id, I40E_CLIENT_IWARP) &&
+           (vf->driver_caps & I40E_VIRTCHNL_VF_OFFLOAD_IWARP)) {
+               vfres->vf_offload_flags |= I40E_VIRTCHNL_VF_OFFLOAD_IWARP;
+               set_bit(I40E_VF_STAT_IWARPENA, &vf->vf_states);
+       }
+ 
         if (pf->flags & I40E_FLAG_RSS_AQ_CAPABLE) {
                 if (vf->driver_caps & I40E_VIRTCHNL_VF_OFFLOAD_RSS_AQ)
                         vfres->vf_offload_flags |=
@@@ -1214,21 -1349,9 +1350,21 @@@
                 vfres->vf_offload_flags |= I40E_VIRTCHNL_VF_OFFLOAD_RSS_REG;
         }
   
+ +      if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE) {
+ +              if (vf->driver_caps & I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2)
+ +                      vfres->vf_offload_flags |=
+ +                              I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2;
+ +      }
+ +
         if (vf->driver_caps & I40E_VIRTCHNL_VF_OFFLOAD_RX_POLLING)
                 vfres->vf_offload_flags |= I40E_VIRTCHNL_VF_OFFLOAD_RX_POLLING;
   
+ +      if (pf->flags & I40E_FLAG_WB_ON_ITR_CAPABLE) {
+ +              if (vf->driver_caps & I40E_VIRTCHNL_VF_OFFLOAD_WB_ON_ITR)
+ +                      vfres->vf_offload_flags |=
+ +                                      I40E_VIRTCHNL_VF_OFFLOAD_WB_ON_ITR;
+ +      }
+ +
         vfres->num_vsis = num_vsis;
         vfres->num_queue_pairs = vf->num_queue_pairs;
         vfres->max_vectors = pf->hw.func_caps.num_msix_vectors_vf;
@@@ -1826,6 -1949,72 +1962,72 @@@ error_param
         return i40e_vc_send_resp_to_vf(vf, I40E_VIRTCHNL_OP_DEL_VLAN, aq_ret);
   }
   
+ /**
+  * i40e_vc_iwarp_msg
+  * @vf: pointer to the VF info
+  * @msg: pointer to the msg buffer
+  * @msglen: msg length
+  *
+  * called from the VF for the iwarp msgs
+  **/
+ static int i40e_vc_iwarp_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
+ {
+       struct i40e_pf *pf = vf->pf;
+       int abs_vf_id = vf->vf_id + pf->hw.func_caps.vf_base_id;
+       i40e_status aq_ret = 0;
+ 
+       if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states) ||
+           !test_bit(I40E_VF_STAT_IWARPENA, &vf->vf_states)) {
+               aq_ret = I40E_ERR_PARAM;
+               goto error_param;
+       }
+ 
+       i40e_notify_client_of_vf_msg(pf->vsi[pf->lan_vsi], abs_vf_id,
+                                    msg, msglen);
+ 
+ error_param:
+       /* send the response to the VF */
+       return i40e_vc_send_resp_to_vf(vf, I40E_VIRTCHNL_OP_IWARP,
+                                      aq_ret);
+ }
+ 
+ /**
+  * i40e_vc_iwarp_qvmap_msg
+  * @vf: pointer to the VF info
+  * @msg: pointer to the msg buffer
+  * @msglen: msg length
+  * @config: config qvmap or release it
+  *
+  * called from the VF for the iwarp msgs
+  **/
+ static int i40e_vc_iwarp_qvmap_msg(struct i40e_vf *vf, u8 *msg, u16 msglen,
+                                  bool config)
+ {
+       struct i40e_virtchnl_iwarp_qvlist_info *qvlist_info =
+                               (struct i40e_virtchnl_iwarp_qvlist_info *)msg;
+       i40e_status aq_ret = 0;
+ 
+       if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states) ||
+           !test_bit(I40E_VF_STAT_IWARPENA, &vf->vf_states)) {
+               aq_ret = I40E_ERR_PARAM;
+               goto error_param;
+       }
+ 
+       if (config) {
+               if (i40e_config_iwarp_qvlist(vf, qvlist_info))
+                       aq_ret = I40E_ERR_PARAM;
+       } else {
+               i40e_release_iwarp_qvlist(vf);
+       }
+ 
+ error_param:
+       /* send the response to the VF */
+       return i40e_vc_send_resp_to_vf(vf,
+                              config ? I40E_VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP :
+                              I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP,
+                              aq_ret);
+ }
+ 
   /**
    * i40e_vc_validate_vf_msg
    * @vf: pointer to the VF info
@@@ -1921,6 -2110,32 +2123,32 @@@ static int i40e_vc_validate_vf_msg(stru
         case I40E_VIRTCHNL_OP_GET_STATS:
                 valid_len = sizeof(struct i40e_virtchnl_queue_select);
                 break;
+       case I40E_VIRTCHNL_OP_IWARP:
+               /* These messages are opaque to us and will be validated in
+                * the RDMA client code. We just need to check for nonzero
+                * length. The firmware will enforce max length restrictions.
+                */
+               if (msglen)
+                       valid_len = msglen;
+               else
+                       err_msg_format = true;
+               break;
+       case I40E_VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP:
+               valid_len = 0;
+               break;
+       case I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP:
+               valid_len = sizeof(struct i40e_virtchnl_iwarp_qvlist_info);
+               if (msglen >= valid_len) {
+                       struct i40e_virtchnl_iwarp_qvlist_info *qv =
+                               (struct i40e_virtchnl_iwarp_qvlist_info *)msg;
+                       if (qv->num_vectors == 0) {
+                               err_msg_format = true;
+                               break;
+                       }
+                       valid_len += ((qv->num_vectors - 1) *
+                               sizeof(struct i40e_virtchnl_iwarp_qv_info));
+               }
+               break;
         /* These are always errors coming from the VF. */
         case I40E_VIRTCHNL_OP_EVENT:
         case I40E_VIRTCHNL_OP_UNKNOWN:
@@@ -2010,6 -2225,15 +2238,15 @@@ int i40e_vc_process_vf_msg(struct i40e_
         case I40E_VIRTCHNL_OP_GET_STATS:
                 ret = i40e_vc_get_stats_msg(vf, msg, msglen);
                 break;
+       case I40E_VIRTCHNL_OP_IWARP:
+               ret = i40e_vc_iwarp_msg(vf, msg, msglen);
+               break;
+       case I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP:
+               ret = i40e_vc_iwarp_qvmap_msg(vf, msg, msglen, true);
+               break;
+       case I40E_VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP:
+               ret = i40e_vc_iwarp_qvmap_msg(vf, msg, msglen, false);
+               break;
         case I40E_VIRTCHNL_OP_UNKNOWN:
         default:
                 dev_err(&pf->pdev->dev, "Unsupported opcode %d from VF %d\n",
@@@ -2038,11 -2262,7 +2275,11 @@@ int i40e_vc_process_vflr_event(struct i
         if (!test_bit(__I40E_VFLR_EVENT_PENDING, &pf->state))
                 return 0;
   
- -      /* re-enable vflr interrupt cause */
+ +      /* Re-enable the VFLR interrupt cause here, before looking for which
+ +       * VF got reset. Otherwise, if another VF gets a reset while the
+ +       * first one is being processed, that interrupt will be lost, and
+ +       * that VF will be stuck in reset forever.
+ +       */
         reg = rd32(hw, I40E_PFINT_ICR0_ENA);
         reg |= I40E_PFINT_ICR0_ENA_VFLR_MASK;
         wr32(hw, I40E_PFINT_ICR0_ENA, reg);
@@@ -2203,8 -2423,6 +2440,8 @@@ int i40e_ndo_set_vf_port_vlan(struct ne
                  * and then reloading the VF driver.
                  */
                 i40e_vc_disable_vf(pf, vf);
+ +              /* During reset the VF got a new VSI, so refresh the pointer. */
+ +              vsi = pf->vsi[vf->lan_vsi_idx];
         }
   
         /* Check for condition where there was already a port VLAN ID
@@@ -2313,9 -2531,6 +2550,9 @@@ int i40e_ndo_set_vf_bw(struct net_devic
         case I40E_LINK_SPEED_40GB:
                 speed = 40000;
                 break;
+ +      case I40E_LINK_SPEED_20GB:
+ +              speed = 20000;
+ +              break;
         case I40E_LINK_SPEED_10GB:
                 speed = 10000;
                 break;
diff --combined drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h

index e74642a0c42ef0dd0889f269ab358ccd4c082914,1da4d9ac4c7ab2b7ec1082d78dcb849882b4c47f..e7b2fba0309ee4af95ef247420987ad684eb1d7c
--- 1/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
--- 2/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
@@@ -58,6 -58,7 +58,7 @@@ enum i40e_queue_ctrl 
   enum i40e_vf_states {
         I40E_VF_STAT_INIT = 0,
         I40E_VF_STAT_ACTIVE,
+       I40E_VF_STAT_IWARPENA,
         I40E_VF_STAT_FCOEENA,
         I40E_VF_STAT_DISABLED,
   };
@@@ -66,6 -67,7 +67,7 @@@
   enum i40e_vf_capabilities {
         I40E_VIRTCHNL_VF_CAP_PRIVILEGE = 0,
         I40E_VIRTCHNL_VF_CAP_L2,
+       I40E_VIRTCHNL_VF_CAP_IWARP,
   };
   
   /* VF information structure */
@@@ -91,8 -93,8 +93,8 @@@ struct i40e_vf 
          * When assigned, these will be non-zero, because VSI 0 is always
          * the main LAN VSI for the PF.
          */
- -      u8 lan_vsi_idx;         /* index into PF struct */
- -      u8 lan_vsi_id;          /* ID as used by firmware */
+ +      u16 lan_vsi_idx;        /* index into PF struct */
+ +      u16 lan_vsi_id;         /* ID as used by firmware */
   
         u8 num_queue_pairs;     /* num of qps assigned to VF vsis */
         u64 num_mdd_events;     /* num of mdd events detected */
@@@ -106,6 -108,8 +108,8 @@@
         bool link_forced;
         bool link_up;           /* only valid if VF link is forced */
         bool spoofchk;
+       /* RDMA Client */
+       struct i40e_virtchnl_iwarp_qvlist_info *qvlist_info;
   };
   
   void i40e_free_vfs(struct i40e_pf *pf);
diff --combined drivers/net/ethernet/mellanox/mlx5/core/cmd.c

index 97f5114fc11394b1583ed5e6009064bfbfb0daa5,ebb4036b98e5773343e25f4bd91a1b508b702613..eb926e1ee71c259291850ca906dd1bdb3c79a753
--- 1/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
--- 2/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@@ -1,5 -1,5 +1,5 @@@
   /*
- - * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ + * Copyright (c) 2013-2016, Mellanox Technologies. All rights reserved.
    *
    * This software is available to you under a choice of one of two
    * licenses.  You may choose to be licensed under the terms of the GNU
@@@ -407,6 -407,12 +407,12 @@@ static int mlx5_internal_err_ret_value(
   const char *mlx5_command_str(int command)
   {
         switch (command) {
+       case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT:
+               return "QUERY_HCA_VPORT_CONTEXT";
+ 
+       case MLX5_CMD_OP_MODIFY_HCA_VPORT_CONTEXT:
+               return "MODIFY_HCA_VPORT_CONTEXT";
+ 
         case MLX5_CMD_OP_QUERY_HCA_CAP:
                 return "QUERY_HCA_CAP";
   
@@@ -560,18 -566,6 +566,18 @@@
         case MLX5_CMD_OP_ACCESS_REG:
                 return "MLX5_CMD_OP_ACCESS_REG";
   
+ +      case MLX5_CMD_OP_SET_WOL_ROL:
+ +              return "SET_WOL_ROL";
+ +
+ +      case MLX5_CMD_OP_QUERY_WOL_ROL:
+ +              return "QUERY_WOL_ROL";
+ +
+ +      case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
+ +              return "ADD_VXLAN_UDP_DPORT";
+ +
+ +      case MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT:
+ +              return "DELETE_VXLAN_UDP_DPORT";
+ +
         default: return "unknown command opcode";
         }
   }
diff --combined drivers/net/ethernet/mellanox/mlx5/core/main.c

index 72a94e72ee250ac2a24cf2223e21f999fd8e5185,f2354bc0ec19cbef3ce737d4ec810e30c6a4b3c7..3f3b2fae4991025a1018f4e4e4d87c88b6a30f1a
--- 1/drivers/net/ethernet/mellanox/mlx5/core/main.c
--- 2/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@@ -341,8 -341,9 +341,9 @@@ static u16 to_fw_pkey_sz(u32 size
         }
   }
   
- int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type,
-                      enum mlx5_cap_mode cap_mode)
+ static int mlx5_core_get_caps_mode(struct mlx5_core_dev *dev,
+                                  enum mlx5_cap_type cap_type,
+                                  enum mlx5_cap_mode cap_mode)
   {
         u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)];
         int out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
@@@ -392,6 -393,16 +393,16 @@@ query_ex
         return err;
   }
   
+ int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type)
+ {
+       int ret;
+ 
+       ret = mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_CUR);
+       if (ret)
+               return ret;
+       return mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_MAX);
+ }
+ 
   static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz, int opmod)
   {
         u32 out[MLX5_ST_SZ_DW(set_hca_cap_out)];
@@@ -419,8 -430,7 +430,7 @@@ static int handle_hca_cap_atomic(struc
         int err;
   
         if (MLX5_CAP_GEN(dev, atomic)) {
-               err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC,
-                                        HCA_CAP_OPMOD_GET_CUR);
+               err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC);
                 if (err)
                         return err;
         } else {
@@@ -462,11 -472,7 +472,7 @@@ static int handle_hca_cap(struct mlx5_c
         if (!set_ctx)
                 goto query_ex;
   
-       err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL, HCA_CAP_OPMOD_GET_MAX);
-       if (err)
-               goto query_ex;
- 
-       err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL, HCA_CAP_OPMOD_GET_CUR);
+       err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL);
         if (err)
                 goto query_ex;
   
@@@ -767,6 -773,22 +773,6 @@@ static int mlx5_core_set_issi(struct ml
         return -ENOTSUPP;
   }
   
- -static int map_bf_area(struct mlx5_core_dev *dev)
- -{
- -      resource_size_t bf_start = pci_resource_start(dev->pdev, 0);
- -      resource_size_t bf_len = pci_resource_len(dev->pdev, 0);
- -
- -      dev->priv.bf_mapping = io_mapping_create_wc(bf_start, bf_len);
- -
- -      return dev->priv.bf_mapping ? 0 : -ENOMEM;
- -}
- -
- -static void unmap_bf_area(struct mlx5_core_dev *dev)
- -{
- -      if (dev->priv.bf_mapping)
- -              io_mapping_free(dev->priv.bf_mapping);
- -}
- -
   static void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
   {
         struct mlx5_device_context *dev_ctx;
@@@ -1087,9 -1109,14 +1093,9 @@@ static int mlx5_load_one(struct mlx5_co
                 goto err_stop_eqs;
         }
   
- -      if (map_bf_area(dev))
- -              dev_err(&pdev->dev, "Failed to map blue flame area\n");
- -
         err = mlx5_irq_set_affinity_hints(dev);
- -      if (err) {
+ +      if (err)
                 dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n");
- -              goto err_unmap_bf_area;
- -      }
   
         MLX5_INIT_DOORBELL_LOCK(&priv->cq_uar_lock);
   
@@@ -1148,6 -1175,10 +1154,6 @@@ err_fs
         mlx5_cleanup_qp_table(dev);
         mlx5_cleanup_cq_table(dev);
         mlx5_irq_clear_affinity_hints(dev);
- -
- -err_unmap_bf_area:
- -      unmap_bf_area(dev);
- -
         free_comp_eqs(dev);
   
   err_stop_eqs:
@@@ -1217,6 -1248,7 +1223,6 @@@ static int mlx5_unload_one(struct mlx5_
         mlx5_cleanup_qp_table(dev);
         mlx5_cleanup_cq_table(dev);
         mlx5_irq_clear_affinity_hints(dev);
- -      unmap_bf_area(dev);
         free_comp_eqs(dev);
         mlx5_stop_eqs(dev);
         mlx5_free_uuars(dev, &priv->uuari);
diff --combined drivers/staging/rdma/hfi1/chip.c

index 46a1830b509b66a6578cdd99fbbddad68daad371,c29860c05ed441c21a6c085668cceadf19023859..16eb653903e0b873909f3cf3175cf597fae88ea8
--- 1/drivers/staging/rdma/hfi1/chip.c
--- 2/drivers/staging/rdma/hfi1/chip.c
+++ b/drivers/staging/rdma/hfi1/chip.c
@@@ -1,12 -1,11 +1,11 @@@
   /*
+  * Copyright(c) 2015, 2016 Intel Corporation.
    *
    * This file is provided under a dual BSD/GPLv2 license.  When using or
    * redistributing this file, you may do so under either license.
    *
    * GPL LICENSE SUMMARY
    *
-  * Copyright(c) 2015 Intel Corporation.
-  *
    * This program is free software; you can redistribute it and/or modify
    * it under the terms of version 2 of the GNU General Public License as
    * published by the Free Software Foundation.
@@@ -18,8 -17,6 +17,6 @@@
    *
    * BSD LICENSE
    *
-  * Copyright(c) 2015 Intel Corporation.
-  *
    * Redistribution and use in source and binary forms, with or without
    * modification, are permitted provided that the following conditions
    * are met:
@@@ -64,6 -61,8 +61,8 @@@
   #include "sdma.h"
   #include "eprom.h"
   #include "efivar.h"
+ #include "platform.h"
+ #include "aspm.h"
   
   #define NUM_IB_PORTS 1
   
@@@ -420,10 -419,10 +419,10 @@@ static struct flag_table pio_err_status
         SEC_SPC_FREEZE,
         SEND_PIO_ERR_STATUS_PIO_STATE_MACHINE_ERR_SMASK),
   /*23*/        FLAG_ENTRY("PioWriteQwValidParity",
-       SEC_WRITE_DROPPED|SEC_SPC_FREEZE,
+       SEC_WRITE_DROPPED | SEC_SPC_FREEZE,
         SEND_PIO_ERR_STATUS_PIO_WRITE_QW_VALID_PARITY_ERR_SMASK),
   /*24*/        FLAG_ENTRY("PioBlockQwCountParity",
-       SEC_WRITE_DROPPED|SEC_SPC_FREEZE,
+       SEC_WRITE_DROPPED | SEC_SPC_FREEZE,
         SEND_PIO_ERR_STATUS_PIO_BLOCK_QW_COUNT_PARITY_ERR_SMASK),
   /*25*/        FLAG_ENTRY("PioVlfVlLenParity",
         SEC_SPC_FREEZE,
@@@ -509,6 -508,12 +508,12 @@@ static struct flag_table sdma_err_statu
                 | SEND_DMA_ERR_STATUS_SDMA_CSR_PARITY_ERR_SMASK \
                 | SEND_DMA_ERR_STATUS_SDMA_PCIE_REQ_TRACKING_UNC_ERR_SMASK)
   
+ /* SendEgressErrInfo bits that correspond to a PortXmitDiscard counter */
+ #define PORT_DISCARD_EGRESS_ERRS \
+       (SEND_EGRESS_ERR_INFO_TOO_LONG_IB_PACKET_ERR_SMASK \
+       | SEND_EGRESS_ERR_INFO_VL_MAPPING_ERR_SMASK \
+       | SEND_EGRESS_ERR_INFO_VL_ERR_SMASK)
+ 
   /*
    * TXE Egress Error flags
    */
@@@ -936,7 -941,7 +941,7 @@@ static struct flag_table dc8051_err_fla
         FLAG_ENTRY0("IRAM_MBE", D8E(IRAM_MBE)),
         FLAG_ENTRY0("IRAM_SBE", D8E(IRAM_SBE)),
         FLAG_ENTRY0("UNMATCHED_SECURE_MSG_ACROSS_BCC_LANES",
-               D8E(UNMATCHED_SECURE_MSG_ACROSS_BCC_LANES)),
+                   D8E(UNMATCHED_SECURE_MSG_ACROSS_BCC_LANES)),
         FLAG_ENTRY0("INVALID_CSR_ADDR", D8E(INVALID_CSR_ADDR)),
   };
   
@@@ -950,7 -955,7 +955,7 @@@ static struct flag_table dc8051_info_er
         FLAG_ENTRY0("Unknown frame received",  UNKNOWN_FRAME),
         FLAG_ENTRY0("Target BER not met",      TARGET_BER_NOT_MET),
         FLAG_ENTRY0("Serdes internal loopback failure",
-                                       FAILED_SERDES_INTERNAL_LOOPBACK),
+                   FAILED_SERDES_INTERNAL_LOOPBACK),
         FLAG_ENTRY0("Failed SerDes init",      FAILED_SERDES_INIT),
         FLAG_ENTRY0("Failed LNI(Polling)",     FAILED_LNI_POLLING),
         FLAG_ENTRY0("Failed LNI(Debounce)",    FAILED_LNI_DEBOUNCE),
@@@ -958,7 -963,8 +963,8 @@@
         FLAG_ENTRY0("Failed LNI(OptEq)",       FAILED_LNI_OPTEQ),
         FLAG_ENTRY0("Failed LNI(VerifyCap_1)", FAILED_LNI_VERIFY_CAP1),
         FLAG_ENTRY0("Failed LNI(VerifyCap_2)", FAILED_LNI_VERIFY_CAP2),
-       FLAG_ENTRY0("Failed LNI(ConfigLT)",    FAILED_LNI_CONFIGLT)
+       FLAG_ENTRY0("Failed LNI(ConfigLT)",    FAILED_LNI_CONFIGLT),
+       FLAG_ENTRY0("Host Handshake Timeout",  HOST_HANDSHAKE_TIMEOUT)
   };
   
   /*
@@@ -978,7 -984,6 +984,6 @@@ static struct flag_table dc8051_info_ho
         FLAG_ENTRY0("Link going down", 0x0100),
   };
   
- 
   static u32 encoded_size(u32 size);
   static u32 chip_to_opa_lstate(struct hfi1_devdata *dd, u32 chip_lstate);
   static int set_physical_link_state(struct hfi1_devdata *dd, u64 state);
@@@ -1140,11 -1145,8 +1145,8 @@@ struct cntr_entry 
         /*
          * accessor for stat element, context either dd or ppd
          */
-       u64 (*rw_cntr)(const struct cntr_entry *,
-                              void *context,
-                              int vl,
-                              int mode,
-                              u64 data);
+       u64 (*rw_cntr)(const struct cntr_entry *, void *context, int vl,
+                      int mode, u64 data);
   };
   
   #define C_RCV_HDR_OVF_FIRST C_RCV_HDR_OVF_0
@@@ -1188,7 -1190,7 +1190,7 @@@ CNTR_ELEM(#name, 
   #define OVR_LBL(ctx) C_RCV_HDR_OVF_ ## ctx
   #define OVR_ELM(ctx) \
   CNTR_ELEM("RcvHdrOvr" #ctx, \
-         (RCV_HDR_OVFL_CNT + ctx*0x100), \
+         (RCV_HDR_OVFL_CNT + ctx * 0x100), \
           0, CNTR_NORMAL, port_access_u64_csr)
   
   /* 32bit TXE */
@@@ -1250,8 -1252,11 +1252,8 @@@ CNTR_ELEM(#name, 
   
   u64 read_csr(const struct hfi1_devdata *dd, u32 offset)
   {
- -      u64 val;
- -
         if (dd->flags & HFI1_PRESENT) {
- -              val = readq((void __iomem *)dd->kregbase + offset);
- -              return val;
+ +              return readq((void __iomem *)dd->kregbase + offset);
         }
         return -1;
   }
@@@ -1274,7 -1279,6 +1276,6 @@@ static inline u64 read_write_csr(const 
   {
         u64 ret;
   
- 
         if (mode == CNTR_MODE_R) {
                 ret = read_csr(dd, csr);
         } else if (mode == CNTR_MODE_W) {
@@@ -1291,17 -1295,65 +1292,65 @@@
   
   /* Dev Access */
   static u64 dev_access_u32_csr(const struct cntr_entry *entry,
-                           void *context, int vl, int mode, u64 data)
+                             void *context, int vl, int mode, u64 data)
   {
         struct hfi1_devdata *dd = context;
+       u64 csr = entry->csr;
   
-       if (vl != CNTR_INVALID_VL)
-               return 0;
-       return read_write_csr(dd, entry->csr, mode, data);
+       if (entry->flags & CNTR_SDMA) {
+               if (vl == CNTR_INVALID_VL)
+                       return 0;
+               csr += 0x100 * vl;
+       } else {
+               if (vl != CNTR_INVALID_VL)
+                       return 0;
+       }
+       return read_write_csr(dd, csr, mode, data);
+ }
+ 
+ static u64 access_sde_err_cnt(const struct cntr_entry *entry,
+                             void *context, int idx, int mode, u64 data)
+ {
+       struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
+ 
+       if (dd->per_sdma && idx < dd->num_sdma)
+               return dd->per_sdma[idx].err_cnt;
+       return 0;
+ }
+ 
+ static u64 access_sde_int_cnt(const struct cntr_entry *entry,
+                             void *context, int idx, int mode, u64 data)
+ {
+       struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
+ 
+       if (dd->per_sdma && idx < dd->num_sdma)
+               return dd->per_sdma[idx].sdma_int_cnt;
+       return 0;
+ }
+ 
+ static u64 access_sde_idle_int_cnt(const struct cntr_entry *entry,
+                                  void *context, int idx, int mode, u64 data)
+ {
+       struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
+ 
+       if (dd->per_sdma && idx < dd->num_sdma)
+               return dd->per_sdma[idx].idle_int_cnt;
+       return 0;
+ }
+ 
+ static u64 access_sde_progress_int_cnt(const struct cntr_entry *entry,
+                                      void *context, int idx, int mode,
+                                      u64 data)
+ {
+       struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
+ 
+       if (dd->per_sdma && idx < dd->num_sdma)
+               return dd->per_sdma[idx].progress_int_cnt;
+       return 0;
   }
   
   static u64 dev_access_u64_csr(const struct cntr_entry *entry, void *context,
-                           int vl, int mode, u64 data)
+                             int vl, int mode, u64 data)
   {
         struct hfi1_devdata *dd = context;
   
@@@ -1322,7 -1374,7 +1371,7 @@@
   }
   
   static u64 dc_access_lcb_cntr(const struct cntr_entry *entry, void *context,
-                           int vl, int mode, u64 data)
+                             int vl, int mode, u64 data)
   {
         struct hfi1_devdata *dd = context;
         u32 csr = entry->csr;
@@@ -1346,7 -1398,7 +1395,7 @@@
   
   /* Port Access */
   static u64 port_access_u32_csr(const struct cntr_entry *entry, void *context,
-                            int vl, int mode, u64 data)
+                              int vl, int mode, u64 data)
   {
         struct hfi1_pportdata *ppd = context;
   
@@@ -1356,7 -1408,7 +1405,7 @@@
   }
   
   static u64 port_access_u64_csr(const struct cntr_entry *entry,
-                            void *context, int vl, int mode, u64 data)
+                              void *context, int vl, int mode, u64 data)
   {
         struct hfi1_pportdata *ppd = context;
         u64 val;
@@@ -1396,7 -1448,7 +1445,7 @@@ static inline u64 read_write_sw(struct 
   }
   
   static u64 access_sw_link_dn_cnt(const struct cntr_entry *entry, void *context,
-                              int vl, int mode, u64 data)
+                                int vl, int mode, u64 data)
   {
         struct hfi1_pportdata *ppd = context;
   
@@@ -1406,7 -1458,7 +1455,7 @@@
   }
   
   static u64 access_sw_link_up_cnt(const struct cntr_entry *entry, void *context,
-                              int vl, int mode, u64 data)
+                                int vl, int mode, u64 data)
   {
         struct hfi1_pportdata *ppd = context;
   
@@@ -1427,18 -1479,25 +1476,25 @@@ static u64 access_sw_unknown_frame_cnt(
   }
   
   static u64 access_sw_xmit_discards(const struct cntr_entry *entry,
-                                   void *context, int vl, int mode, u64 data)
+                                  void *context, int vl, int mode, u64 data)
   {
-       struct hfi1_pportdata *ppd = context;
+       struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context;
+       u64 zero = 0;
+       u64 *counter;
   
-       if (vl != CNTR_INVALID_VL)
-               return 0;
+       if (vl == CNTR_INVALID_VL)
+               counter = &ppd->port_xmit_discards;
+       else if (vl >= 0 && vl < C_VL_COUNT)
+               counter = &ppd->port_xmit_discards_vl[vl];
+       else
+               counter = &zero;
   
-       return read_write_sw(ppd->dd, &ppd->port_xmit_discards, mode, data);
+       return read_write_sw(ppd->dd, counter, mode, data);
   }
   
   static u64 access_xmit_constraint_errs(const struct cntr_entry *entry,
-                                    void *context, int vl, int mode, u64 data)
+                                      void *context, int vl, int mode,
+                                      u64 data)
   {
         struct hfi1_pportdata *ppd = context;
   
@@@ -1450,7 -1509,7 +1506,7 @@@
   }
   
   static u64 access_rcv_constraint_errs(const struct cntr_entry *entry,
-                                    void *context, int vl, int mode, u64 data)
+                                     void *context, int vl, int mode, u64 data)
   {
         struct hfi1_pportdata *ppd = context;
   
@@@ -1475,7 -1534,6 +1531,6 @@@ static u64 read_write_cpu(struct hfi1_d
                           u64 __percpu *cntr,
                           int vl, int mode, u64 data)
   {
- 
         u64 ret = 0;
   
         if (vl != CNTR_INVALID_VL)
@@@ -1507,7 -1565,7 +1562,7 @@@ static u64 access_sw_cpu_intr(const str
   }
   
   static u64 access_sw_cpu_rcv_limit(const struct cntr_entry *entry,
-                             void *context, int vl, int mode, u64 data)
+                                  void *context, int vl, int mode, u64 data)
   {
         struct hfi1_devdata *dd = context;
   
@@@ -1523,6 -1581,14 +1578,14 @@@ static u64 access_sw_pio_wait(const str
         return dd->verbs_dev.n_piowait;
   }
   
+ static u64 access_sw_pio_drain(const struct cntr_entry *entry,
+                              void *context, int vl, int mode, u64 data)
+ {
+       struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
+ 
+       return dd->verbs_dev.n_piodrain;
+ }
+ 
   static u64 access_sw_vtx_wait(const struct cntr_entry *entry,
                               void *context, int vl, int mode, u64 data)
   {
@@@ -1540,11 -1606,12 +1603,12 @@@ static u64 access_sw_kmem_wait(const st
   }
   
   static u64 access_sw_send_schedule(const struct cntr_entry *entry,
-                              void *context, int vl, int mode, u64 data)
+                                  void *context, int vl, int mode, u64 data)
   {
         struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
   
-       return dd->verbs_dev.n_send_schedule;
+       return read_write_cpu(dd, &dd->z_send_schedule, dd->send_schedule, vl,
+                             mode, data);
   }
   
   /* Software counters for the error status bits within MISC_ERR_STATUS */
@@@ -3882,8 -3949,8 +3946,8 @@@ static u64 access_sw_cpu_##cntr(const s
                               void *context, int vl, int mode, u64 data)      \
   {                                                                           \
         struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context;        \
-       return read_write_cpu(ppd->dd, &ppd->ibport_data.z_ ##cntr,           \
-                             ppd->ibport_data.cntr, vl,                      \
+       return read_write_cpu(ppd->dd, &ppd->ibport_data.rvp.z_ ##cntr,       \
+                             ppd->ibport_data.rvp.cntr, vl,                  \
                               mode, data);                                    \
   }
   
@@@ -3900,7 -3967,7 +3964,7 @@@ static u64 access_ibp_##cntr(const stru
         if (vl != CNTR_INVALID_VL)                                            \
                 return 0;                                                     \
                                                                               \
-       return read_write_sw(ppd->dd, &ppd->ibport_data.n_ ##cntr,            \
+       return read_write_sw(ppd->dd, &ppd->ibport_data.rvp.n_ ##cntr,        \
                              mode, data);                                     \
   }
   
@@@ -4063,10 -4130,28 +4127,28 @@@ static struct cntr_entry dev_cntrs[DEV_
                             access_sw_vtx_wait),
   [C_SW_PIO_WAIT] = CNTR_ELEM("PioWait", 0, 0, CNTR_NORMAL,
                             access_sw_pio_wait),
+ [C_SW_PIO_DRAIN] = CNTR_ELEM("PioDrain", 0, 0, CNTR_NORMAL,
+                           access_sw_pio_drain),
   [C_SW_KMEM_WAIT] = CNTR_ELEM("KmemWait", 0, 0, CNTR_NORMAL,
                             access_sw_kmem_wait),
   [C_SW_SEND_SCHED] = CNTR_ELEM("SendSched", 0, 0, CNTR_NORMAL,
                             access_sw_send_schedule),
+ [C_SDMA_DESC_FETCHED_CNT] = CNTR_ELEM("SDEDscFdCn",
+                                     SEND_DMA_DESC_FETCHED_CNT, 0,
+                                     CNTR_NORMAL | CNTR_32BIT | CNTR_SDMA,
+                                     dev_access_u32_csr),
+ [C_SDMA_INT_CNT] = CNTR_ELEM("SDMAInt", 0, 0,
+                            CNTR_NORMAL | CNTR_32BIT | CNTR_SDMA,
+                            access_sde_int_cnt),
+ [C_SDMA_ERR_CNT] = CNTR_ELEM("SDMAErrCt", 0, 0,
+                            CNTR_NORMAL | CNTR_32BIT | CNTR_SDMA,
+                            access_sde_err_cnt),
+ [C_SDMA_IDLE_INT_CNT] = CNTR_ELEM("SDMAIdInt", 0, 0,
+                                 CNTR_NORMAL | CNTR_32BIT | CNTR_SDMA,
+                                 access_sde_idle_int_cnt),
+ [C_SDMA_PROGRESS_INT_CNT] = CNTR_ELEM("SDMAPrIntCn", 0, 0,
+                                     CNTR_NORMAL | CNTR_32BIT | CNTR_SDMA,
+                                     access_sde_progress_int_cnt),
   /* MISC_ERR_STATUS */
   [C_MISC_PLL_LOCK_FAIL_ERR] = CNTR_ELEM("MISC_PLL_LOCK_FAIL_ERR", 0, 0,
                                 CNTR_NORMAL,
@@@ -4876,28 -4961,28 +4958,28 @@@ static struct cntr_entry port_cntrs[POR
   [C_TX_WORDS] = TXE64_PORT_CNTR_ELEM(TxWords, SEND_DWORD_CNT, CNTR_NORMAL),
   [C_TX_WAIT] = TXE64_PORT_CNTR_ELEM(TxWait, SEND_WAIT_CNT, CNTR_SYNTH),
   [C_TX_FLIT_VL] = TXE64_PORT_CNTR_ELEM(TxFlitVL, SEND_DATA_VL0_CNT,
-                       CNTR_SYNTH | CNTR_VL),
+                                     CNTR_SYNTH | CNTR_VL),
   [C_TX_PKT_VL] = TXE64_PORT_CNTR_ELEM(TxPktVL, SEND_DATA_PKT_VL0_CNT,
-                       CNTR_SYNTH | CNTR_VL),
+                                    CNTR_SYNTH | CNTR_VL),
   [C_TX_WAIT_VL] = TXE64_PORT_CNTR_ELEM(TxWaitVL, SEND_WAIT_VL0_CNT,
-                       CNTR_SYNTH | CNTR_VL),
+                                     CNTR_SYNTH | CNTR_VL),
   [C_RX_PKT] = RXE64_PORT_CNTR_ELEM(RxPkt, RCV_DATA_PKT_CNT, CNTR_NORMAL),
   [C_RX_WORDS] = RXE64_PORT_CNTR_ELEM(RxWords, RCV_DWORD_CNT, CNTR_NORMAL),
   [C_SW_LINK_DOWN] = CNTR_ELEM("SwLinkDown", 0, 0, CNTR_SYNTH | CNTR_32BIT,
-                       access_sw_link_dn_cnt),
+                            access_sw_link_dn_cnt),
   [C_SW_LINK_UP] = CNTR_ELEM("SwLinkUp", 0, 0, CNTR_SYNTH | CNTR_32BIT,
-                       access_sw_link_up_cnt),
+                          access_sw_link_up_cnt),
   [C_SW_UNKNOWN_FRAME] = CNTR_ELEM("UnknownFrame", 0, 0, CNTR_NORMAL,
                                  access_sw_unknown_frame_cnt),
   [C_SW_XMIT_DSCD] = CNTR_ELEM("XmitDscd", 0, 0, CNTR_SYNTH | CNTR_32BIT,
-                       access_sw_xmit_discards),
+                            access_sw_xmit_discards),
   [C_SW_XMIT_DSCD_VL] = CNTR_ELEM("XmitDscdVl", 0, 0,
-                       CNTR_SYNTH | CNTR_32BIT | CNTR_VL,
-                       access_sw_xmit_discards),
+                               CNTR_SYNTH | CNTR_32BIT | CNTR_VL,
+                               access_sw_xmit_discards),
   [C_SW_XMIT_CSTR_ERR] = CNTR_ELEM("XmitCstrErr", 0, 0, CNTR_SYNTH,
-                       access_xmit_constraint_errs),
+                                access_xmit_constraint_errs),
   [C_SW_RCV_CSTR_ERR] = CNTR_ELEM("RcvCstrErr", 0, 0, CNTR_SYNTH,
-                       access_rcv_constraint_errs),
+                               access_rcv_constraint_errs),
   [C_SW_IBP_LOOP_PKTS] = SW_IBP_CNTR(LoopPkts, loop_pkts),
   [C_SW_IBP_RC_RESENDS] = SW_IBP_CNTR(RcResend, rc_resends),
   [C_SW_IBP_RNR_NAKS] = SW_IBP_CNTR(RnrNak, rnr_naks),
@@@ -4913,9 -4998,9 +4995,9 @@@
   [C_SW_CPU_RC_ACKS] = CNTR_ELEM("RcAcks", 0, 0, CNTR_NORMAL,
                                access_sw_cpu_rc_acks),
   [C_SW_CPU_RC_QACKS] = CNTR_ELEM("RcQacks", 0, 0, CNTR_NORMAL,
-                              access_sw_cpu_rc_qacks),
+                               access_sw_cpu_rc_qacks),
   [C_SW_CPU_RC_DELAYED_COMP] = CNTR_ELEM("RcDelayComp", 0, 0, CNTR_NORMAL,
-                              access_sw_cpu_rc_delayed_comp),
+                                      access_sw_cpu_rc_delayed_comp),
   [OVR_LBL(0)] = OVR_ELM(0), [OVR_LBL(1)] = OVR_ELM(1),
   [OVR_LBL(2)] = OVR_ELM(2), [OVR_LBL(3)] = OVR_ELM(3),
   [OVR_LBL(4)] = OVR_ELM(4), [OVR_LBL(5)] = OVR_ELM(5),
@@@ -5064,7 -5149,7 +5146,7 @@@ done
    * the buffer.  End in '*' if the buffer is too short.
    */
   static char *flag_string(char *buf, int buf_len, u64 flags,
-                               struct flag_table *table, int table_size)
+                        struct flag_table *table, int table_size)
   {
         char extra[32];
         char *p = buf;
@@@ -5125,10 -5210,8 +5207,8 @@@ static char *is_misc_err_name(char *buf
         if (source < ARRAY_SIZE(cce_misc_names))
                 strncpy(buf, cce_misc_names[source], bsize);
         else
-               snprintf(buf,
-                       bsize,
-                       "Reserved%u",
-                       source + IS_GENERAL_ERR_START);
+               snprintf(buf, bsize, "Reserved%u",
+                        source + IS_GENERAL_ERR_START);
   
         return buf;
   }
@@@ -5167,7 -5250,7 +5247,7 @@@ static char *is_various_name(char *buf
         if (source < ARRAY_SIZE(various_names))
                 strncpy(buf, various_names[source], bsize);
         else
-               snprintf(buf, bsize, "Reserved%u", source+IS_VARIOUS_START);
+               snprintf(buf, bsize, "Reserved%u", source + IS_VARIOUS_START);
         return buf;
   }
   
@@@ -5252,51 -5335,56 +5332,56 @@@ static char *is_reserved_name(char *buf
   static char *cce_err_status_string(char *buf, int buf_len, u64 flags)
   {
         return flag_string(buf, buf_len, flags,
-                       cce_err_status_flags, ARRAY_SIZE(cce_err_status_flags));
+                          cce_err_status_flags,
+                          ARRAY_SIZE(cce_err_status_flags));
   }
   
   static char *rxe_err_status_string(char *buf, int buf_len, u64 flags)
   {
         return flag_string(buf, buf_len, flags,
-                       rxe_err_status_flags, ARRAY_SIZE(rxe_err_status_flags));
+                          rxe_err_status_flags,
+                          ARRAY_SIZE(rxe_err_status_flags));
   }
   
   static char *misc_err_status_string(char *buf, int buf_len, u64 flags)
   {
         return flag_string(buf, buf_len, flags, misc_err_status_flags,
-                       ARRAY_SIZE(misc_err_status_flags));
+                          ARRAY_SIZE(misc_err_status_flags));
   }
   
   static char *pio_err_status_string(char *buf, int buf_len, u64 flags)
   {
         return flag_string(buf, buf_len, flags,
-                       pio_err_status_flags, ARRAY_SIZE(pio_err_status_flags));
+                          pio_err_status_flags,
+                          ARRAY_SIZE(pio_err_status_flags));
   }
   
   static char *sdma_err_status_string(char *buf, int buf_len, u64 flags)
   {
         return flag_string(buf, buf_len, flags,
-                       sdma_err_status_flags,
-                       ARRAY_SIZE(sdma_err_status_flags));
+                          sdma_err_status_flags,
+                          ARRAY_SIZE(sdma_err_status_flags));
   }
   
   static char *egress_err_status_string(char *buf, int buf_len, u64 flags)
   {
         return flag_string(buf, buf_len, flags,
-               egress_err_status_flags, ARRAY_SIZE(egress_err_status_flags));
+                          egress_err_status_flags,
+                          ARRAY_SIZE(egress_err_status_flags));
   }
   
   static char *egress_err_info_string(char *buf, int buf_len, u64 flags)
   {
         return flag_string(buf, buf_len, flags,
-               egress_err_info_flags, ARRAY_SIZE(egress_err_info_flags));
+                          egress_err_info_flags,
+                          ARRAY_SIZE(egress_err_info_flags));
   }
   
   static char *send_err_status_string(char *buf, int buf_len, u64 flags)
   {
         return flag_string(buf, buf_len, flags,
-                       send_err_status_flags,
-                       ARRAY_SIZE(send_err_status_flags));
+                          send_err_status_flags,
+                          ARRAY_SIZE(send_err_status_flags));
   }
   
   static void handle_cce_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
@@@ -5309,7 -5397,7 +5394,7 @@@
          * report or record it.
          */
         dd_dev_info(dd, "CCE Error: %s\n",
-               cce_err_status_string(buf, sizeof(buf), reg));
+                   cce_err_status_string(buf, sizeof(buf), reg));
   
         if ((reg & CCE_ERR_STATUS_CCE_CLI2_ASYNC_FIFO_PARITY_ERR_SMASK) &&
             is_ax(dd) && (dd->icode != ICODE_FUNCTIONAL_SIMULATOR)) {
@@@ -5339,14 -5427,14 +5424,14 @@@ static void update_rcverr_timer(unsigne
         u32 cur_ovfl_cnt = read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL);
   
         if (dd->rcv_ovfl_cnt < cur_ovfl_cnt &&
-               ppd->port_error_action & OPA_PI_MASK_EX_BUFFER_OVERRUN) {
+           ppd->port_error_action & OPA_PI_MASK_EX_BUFFER_OVERRUN) {
                 dd_dev_info(dd, "%s: PortErrorAction bounce\n", __func__);
-               set_link_down_reason(ppd,
-                 OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN, 0,
-                       OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN);
+               set_link_down_reason(
+               ppd, OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN, 0,
+               OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN);
                 queue_work(ppd->hfi1_wq, &ppd->link_bounce_work);
         }
-       dd->rcv_ovfl_cnt = (u32) cur_ovfl_cnt;
+       dd->rcv_ovfl_cnt = (u32)cur_ovfl_cnt;
   
         mod_timer(&dd->rcverr_timer, jiffies + HZ * RCVERR_CHECK_TIME);
   }
@@@ -5372,7 -5460,7 +5457,7 @@@ static void handle_rxe_err(struct hfi1_
         int i = 0;
   
         dd_dev_info(dd, "Receive Error: %s\n",
-               rxe_err_status_string(buf, sizeof(buf), reg));
+                   rxe_err_status_string(buf, sizeof(buf), reg));
   
         if (reg & ALL_RXE_FREEZE_ERR) {
                 int flags = 0;
@@@ -5399,7 -5487,7 +5484,7 @@@ static void handle_misc_err(struct hfi1
         int i = 0;
   
         dd_dev_info(dd, "Misc Error: %s",
-               misc_err_status_string(buf, sizeof(buf), reg));
+                   misc_err_status_string(buf, sizeof(buf), reg));
         for (i = 0; i < NUM_MISC_ERR_STATUS_COUNTERS; i++) {
                 if (reg & (1ull << i))
                         incr_cntr64(&dd->misc_err_status_cnt[i]);
@@@ -5412,7 -5500,7 +5497,7 @@@ static void handle_pio_err(struct hfi1_
         int i = 0;
   
         dd_dev_info(dd, "PIO Error: %s\n",
-               pio_err_status_string(buf, sizeof(buf), reg));
+                   pio_err_status_string(buf, sizeof(buf), reg));
   
         if (reg & ALL_PIO_FREEZE_ERR)
                 start_freeze_handling(dd->pport, 0);
@@@ -5429,7 -5517,7 +5514,7 @@@ static void handle_sdma_err(struct hfi1
         int i = 0;
   
         dd_dev_info(dd, "SDMA Error: %s\n",
-               sdma_err_status_string(buf, sizeof(buf), reg));
+                   sdma_err_status_string(buf, sizeof(buf), reg));
   
         if (reg & ALL_SDMA_FREEZE_ERR)
                 start_freeze_handling(dd->pport, 0);
@@@ -5440,12 -5528,14 +5525,14 @@@
         }
   }
   
- static void count_port_inactive(struct hfi1_devdata *dd)
+ static inline void __count_port_discards(struct hfi1_pportdata *ppd)
   {
-       struct hfi1_pportdata *ppd = dd->pport;
+       incr_cntr64(&ppd->port_xmit_discards);
+ }
   
-       if (ppd->port_xmit_discards < ~(u64)0)
-               ppd->port_xmit_discards++;
+ static void count_port_inactive(struct hfi1_devdata *dd)
+ {
+       __count_port_discards(dd->pport);
   }
   
   /*
@@@ -5457,7 -5547,8 +5544,8 @@@
    * egress error if more than one packet fails the same integrity check
    * since we cleared the corresponding bit in SEND_EGRESS_ERR_INFO.
    */
- static void handle_send_egress_err_info(struct hfi1_devdata *dd)
+ static void handle_send_egress_err_info(struct hfi1_devdata *dd,
+                                       int vl)
   {
         struct hfi1_pportdata *ppd = dd->pport;
         u64 src = read_csr(dd, SEND_EGRESS_ERR_SOURCE); /* read first */
@@@ -5468,14 -5559,44 +5556,44 @@@
         write_csr(dd, SEND_EGRESS_ERR_INFO, info);
   
         dd_dev_info(dd,
-               "Egress Error Info: 0x%llx, %s Egress Error Src 0x%llx\n",
-               info, egress_err_info_string(buf, sizeof(buf), info), src);
+                   "Egress Error Info: 0x%llx, %s Egress Error Src 0x%llx\n",
+                   info, egress_err_info_string(buf, sizeof(buf), info), src);
   
         /* Eventually add other counters for each bit */
+       if (info & PORT_DISCARD_EGRESS_ERRS) {
+               int weight, i;
   
-       if (info & SEND_EGRESS_ERR_INFO_TOO_LONG_IB_PACKET_ERR_SMASK) {
-               if (ppd->port_xmit_discards < ~(u64)0)
-                       ppd->port_xmit_discards++;
+               /*
+                * Count all applicable bits as individual errors and
+                * attribute them to the packet that triggered this handler.
+                * This may not be completely accurate due to limitations
+                * on the available hardware error information.  There is
+                * a single information register and any number of error
+                * packets may have occurred and contributed to it before
+                * this routine is called.  This means that:
+                * a) If multiple packets with the same error occur before
+                *    this routine is called, earlier packets are missed.
+                *    There is only a single bit for each error type.
+                * b) Errors may not be attributed to the correct VL.
+                *    The driver is attributing all bits in the info register
+                *    to the packet that triggered this call, but bits
+                *    could be an accumulation of different packets with
+                *    different VLs.
+                * c) A single error packet may have multiple counts attached
+                *    to it.  There is no way for the driver to know if
+                *    multiple bits set in the info register are due to a
+                *    single packet or multiple packets.  The driver assumes
+                *    multiple packets.
+                */
+               weight = hweight64(info & PORT_DISCARD_EGRESS_ERRS);
+               for (i = 0; i < weight; i++) {
+                       __count_port_discards(ppd);
+                       if (vl >= 0 && vl < TXE_NUM_DATA_VL)
+                               incr_cntr64(&ppd->port_xmit_discards_vl[vl]);
+                       else if (vl == 15)
+                               incr_cntr64(&ppd->port_xmit_discards_vl
+                                           [C_VL_15]);
+               }
         }
   }
   
@@@ -5493,12 -5614,71 +5611,71 @@@ static inline int port_inactive_err(u6
    * Input value is a bit position within the SEND_EGRESS_ERR_STATUS
    * register. Does it represent a 'disallowed packet' error?
    */
- static inline int disallowed_pkt_err(u64 posn)
+ static inline int disallowed_pkt_err(int posn)
   {
         return (posn >= SEES(TX_SDMA0_DISALLOWED_PACKET) &&
                 posn <= SEES(TX_SDMA15_DISALLOWED_PACKET));
   }
   
+ /*
+  * Input value is a bit position of one of the SDMA engine disallowed
+  * packet errors.  Return which engine.  Use of this must be guarded by
+  * disallowed_pkt_err().
+  */
+ static inline int disallowed_pkt_engine(int posn)
+ {
+       return posn - SEES(TX_SDMA0_DISALLOWED_PACKET);
+ }
+ 
+ /*
+  * Translate an SDMA engine to a VL.  Return -1 if the tranlation cannot
+  * be done.
+  */
+ static int engine_to_vl(struct hfi1_devdata *dd, int engine)
+ {
+       struct sdma_vl_map *m;
+       int vl;
+ 
+       /* range check */
+       if (engine < 0 || engine >= TXE_NUM_SDMA_ENGINES)
+               return -1;
+ 
+       rcu_read_lock();
+       m = rcu_dereference(dd->sdma_map);
+       vl = m->engine_to_vl[engine];
+       rcu_read_unlock();
+ 
+       return vl;
+ }
+ 
+ /*
+  * Translate the send context (sofware index) into a VL.  Return -1 if the
+  * translation cannot be done.
+  */
+ static int sc_to_vl(struct hfi1_devdata *dd, int sw_index)
+ {
+       struct send_context_info *sci;
+       struct send_context *sc;
+       int i;
+ 
+       sci = &dd->send_contexts[sw_index];
+ 
+       /* there is no information for user (PSM) and ack contexts */
+       if (sci->type != SC_KERNEL)
+               return -1;
+ 
+       sc = sci->sc;
+       if (!sc)
+               return -1;
+       if (dd->vld[15].sc == sc)
+               return 15;
+       for (i = 0; i < num_vls; i++)
+               if (dd->vld[i].sc == sc)
+                       return i;
+ 
+       return -1;
+ }
+ 
   static void handle_egress_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
   {
         u64 reg_copy = reg, handled = 0;
@@@ -5507,34 -5687,34 +5684,34 @@@
   
         if (reg & ALL_TXE_EGRESS_FREEZE_ERR)
                 start_freeze_handling(dd->pport, 0);
-       if (is_ax(dd) && (reg &
-                   SEND_EGRESS_ERR_STATUS_TX_CREDIT_RETURN_VL_ERR_SMASK)
-                   && (dd->icode != ICODE_FUNCTIONAL_SIMULATOR))
+       else if (is_ax(dd) &&
+                (reg & SEND_EGRESS_ERR_STATUS_TX_CREDIT_RETURN_VL_ERR_SMASK) &&
+                (dd->icode != ICODE_FUNCTIONAL_SIMULATOR))
                 start_freeze_handling(dd->pport, 0);
   
         while (reg_copy) {
                 int posn = fls64(reg_copy);
-               /*
-                * fls64() returns a 1-based offset, but we generally
-                * want 0-based offsets.
-                */
+               /* fls64() returns a 1-based offset, we want it zero based */
                 int shift = posn - 1;
+               u64 mask = 1ULL << shift;
   
                 if (port_inactive_err(shift)) {
                         count_port_inactive(dd);
-                       handled |= (1ULL << shift);
+                       handled |= mask;
                 } else if (disallowed_pkt_err(shift)) {
-                       handle_send_egress_err_info(dd);
-                       handled |= (1ULL << shift);
+                       int vl = engine_to_vl(dd, disallowed_pkt_engine(shift));
+ 
+                       handle_send_egress_err_info(dd, vl);
+                       handled |= mask;
                 }
-               clear_bit(shift, (unsigned long *)&reg_copy);
+               reg_copy &= ~mask;
         }
   
         reg &= ~handled;
   
         if (reg)
                 dd_dev_info(dd, "Egress Error: %s\n",
-                       egress_err_status_string(buf, sizeof(buf), reg));
+                           egress_err_status_string(buf, sizeof(buf), reg));
   
         for (i = 0; i < NUM_SEND_EGRESS_ERR_STATUS_COUNTERS; i++) {
                 if (reg & (1ull << i))
@@@ -5548,7 -5728,7 +5725,7 @@@ static void handle_txe_err(struct hfi1_
         int i = 0;
   
         dd_dev_info(dd, "Send Error: %s\n",
-               send_err_status_string(buf, sizeof(buf), reg));
+                   send_err_status_string(buf, sizeof(buf), reg));
   
         for (i = 0; i < NUM_SEND_ERR_STATUS_COUNTERS; i++) {
                 if (reg & (1ull << i))
@@@ -5594,7 -5774,7 +5771,7 @@@ static void interrupt_clear_down(struc
                         u64 mask;
   
                         dd_dev_err(dd, "Repeating %s bits 0x%llx - masking\n",
-                               eri->desc, reg);
+                                  eri->desc, reg);
                         /*
                          * Read-modify-write so any other masked bits
                          * remain masked.
@@@ -5618,14 -5798,15 +5795,15 @@@ static void is_misc_err_int(struct hfi1
                 interrupt_clear_down(dd, 0, eri);
         } else {
                 dd_dev_err(dd, "Unexpected misc interrupt (%u) - reserved\n",
-                       source);
+                          source);
         }
   }
   
   static char *send_context_err_status_string(char *buf, int buf_len, u64 flags)
   {
         return flag_string(buf, buf_len, flags,
-                       sc_err_status_flags, ARRAY_SIZE(sc_err_status_flags));
+                          sc_err_status_flags,
+                          ARRAY_SIZE(sc_err_status_flags));
   }
   
   /*
@@@ -5650,15 -5831,15 +5828,15 @@@ static void is_sendctxt_err_int(struct 
         sw_index = dd->hw_to_sw[hw_context];
         if (sw_index >= dd->num_send_contexts) {
                 dd_dev_err(dd,
-                       "out of range sw index %u for send context %u\n",
-                       sw_index, hw_context);
+                          "out of range sw index %u for send context %u\n",
+                          sw_index, hw_context);
                 return;
         }
         sci = &dd->send_contexts[sw_index];
         sc = sci->sc;
         if (!sc) {
                 dd_dev_err(dd, "%s: context %u(%u): no sc?\n", __func__,
-                       sw_index, hw_context);
+                          sw_index, hw_context);
                 return;
         }
   
@@@ -5668,10 -5849,11 +5846,11 @@@
         status = read_kctxt_csr(dd, hw_context, SEND_CTXT_ERR_STATUS);
   
         dd_dev_info(dd, "Send Context %u(%u) Error: %s\n", sw_index, hw_context,
-               send_context_err_status_string(flags, sizeof(flags), status));
+                   send_context_err_status_string(flags, sizeof(flags),
+                                                  status));
   
         if (status & SEND_CTXT_ERR_STATUS_PIO_DISALLOWED_PACKET_ERR_SMASK)
-               handle_send_egress_err_info(dd);
+               handle_send_egress_err_info(dd, sc_to_vl(dd, sw_index));
   
         /*
          * Automatically restart halted kernel contexts out of interrupt
@@@ -5704,6 -5886,7 +5883,7 @@@ static void handle_sdma_eng_err(struct 
         dd_dev_err(sde->dd, "CONFIG SDMA(%u) source: %u status 0x%llx\n",
                    sde->this_idx, source, (unsigned long long)status);
   #endif
+       sde->err_cnt++;
         sdma_engine_error(sde, status);
   
         /*
@@@ -5752,23 -5935,22 +5932,22 @@@ static void is_various_int(struct hfi1_
                 interrupt_clear_down(dd, 0, eri);
         else
                 dd_dev_info(dd,
-                       "%s: Unimplemented/reserved interrupt %d\n",
-                       __func__, source);
+                           "%s: Unimplemented/reserved interrupt %d\n",
+                           __func__, source);
   }
   
   static void handle_qsfp_int(struct hfi1_devdata *dd, u32 src_ctx, u64 reg)
   {
-       /* source is always zero */
+       /* src_ctx is always zero */
         struct hfi1_pportdata *ppd = dd->pport;
         unsigned long flags;
         u64 qsfp_int_mgmt = (u64)(QSFP_HFI0_INT_N | QSFP_HFI0_MODPRST_N);
   
         if (reg & QSFP_HFI0_MODPRST_N) {
- 
-               dd_dev_info(dd, "%s: ModPresent triggered QSFP interrupt\n",
-                               __func__);
- 
                 if (!qsfp_mod_present(ppd)) {
+                       dd_dev_info(dd, "%s: QSFP module removed\n",
+                                   __func__);
+ 
                         ppd->driver_link_ready = 0;
                         /*
                          * Cable removed, reset all our information about the
@@@ -5781,14 -5963,23 +5960,23 @@@
                          * an interrupt when a cable is inserted
                          */
                         ppd->qsfp_info.cache_valid = 0;
-                       ppd->qsfp_info.qsfp_interrupt_functional = 0;
+                       ppd->qsfp_info.reset_needed = 0;
+                       ppd->qsfp_info.limiting_active = 0;
                         spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock,
-                                               flags);
-                       write_csr(dd,
-                                       dd->hfi1_id ?
-                                               ASIC_QSFP2_INVERT :
-                                               ASIC_QSFP1_INVERT,
-                               qsfp_int_mgmt);
+                                              flags);
+                       /* Invert the ModPresent pin now to detect plug-in */
+                       write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_INVERT :
+                                 ASIC_QSFP1_INVERT, qsfp_int_mgmt);
+ 
+                       if ((ppd->offline_disabled_reason >
+                         HFI1_ODR_MASK(
+                         OPA_LINKDOWN_REASON_LOCAL_MEDIA_NOT_INSTALLED)) ||
+                         (ppd->offline_disabled_reason ==
+                         HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE)))
+                               ppd->offline_disabled_reason =
+                               HFI1_ODR_MASK(
+                               OPA_LINKDOWN_REASON_LOCAL_MEDIA_NOT_INSTALLED);
+ 
                         if (ppd->host_link_state == HLS_DN_POLL) {
                                 /*
                                  * The link is still in POLL. This means
@@@ -5799,28 -5990,33 +5987,33 @@@
                                 queue_work(ppd->hfi1_wq, &ppd->link_down_work);
                         }
                 } else {
+                       dd_dev_info(dd, "%s: QSFP module inserted\n",
+                                   __func__);
+ 
                         spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
                         ppd->qsfp_info.cache_valid = 0;
                         ppd->qsfp_info.cache_refresh_required = 1;
                         spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock,
-                                               flags);
+                                              flags);
   
+                       /*
+                        * Stop inversion of ModPresent pin to detect
+                        * removal of the cable
+                        */
                         qsfp_int_mgmt &= ~(u64)QSFP_HFI0_MODPRST_N;
-                       write_csr(dd,
-                                       dd->hfi1_id ?
-                                               ASIC_QSFP2_INVERT :
-                                               ASIC_QSFP1_INVERT,
-                               qsfp_int_mgmt);
+                       write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_INVERT :
+                                 ASIC_QSFP1_INVERT, qsfp_int_mgmt);
+ 
+                       ppd->offline_disabled_reason =
+                               HFI1_ODR_MASK(OPA_LINKDOWN_REASON_TRANSIENT);
                 }
         }
   
         if (reg & QSFP_HFI0_INT_N) {
- 
-               dd_dev_info(dd, "%s: IntN triggered QSFP interrupt\n",
-                               __func__);
+               dd_dev_info(dd, "%s: Interrupt received from QSFP module\n",
+                           __func__);
                 spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
                 ppd->qsfp_info.check_interrupt_flags = 1;
-               ppd->qsfp_info.qsfp_interrupt_functional = 1;
                 spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock, flags);
         }
   
@@@ -5834,11 -6030,11 +6027,11 @@@ static int request_host_lcb_access(stru
         int ret;
   
         ret = do_8051_command(dd, HCMD_MISC,
-               (u64)HCMD_MISC_REQUEST_LCB_ACCESS << LOAD_DATA_FIELD_ID_SHIFT,
-               NULL);
+                             (u64)HCMD_MISC_REQUEST_LCB_ACCESS <<
+                             LOAD_DATA_FIELD_ID_SHIFT, NULL);
         if (ret != HCMD_SUCCESS) {
                 dd_dev_err(dd, "%s: command failed with error %d\n",
-                       __func__, ret);
+                          __func__, ret);
         }
         return ret == HCMD_SUCCESS ? 0 : -EBUSY;
   }
@@@ -5848,11 -6044,11 +6041,11 @@@ static int request_8051_lcb_access(stru
         int ret;
   
         ret = do_8051_command(dd, HCMD_MISC,
-               (u64)HCMD_MISC_GRANT_LCB_ACCESS << LOAD_DATA_FIELD_ID_SHIFT,
-               NULL);
+                             (u64)HCMD_MISC_GRANT_LCB_ACCESS <<
+                             LOAD_DATA_FIELD_ID_SHIFT, NULL);
         if (ret != HCMD_SUCCESS) {
                 dd_dev_err(dd, "%s: command failed with error %d\n",
-                       __func__, ret);
+                          __func__, ret);
         }
         return ret == HCMD_SUCCESS ? 0 : -EBUSY;
   }
@@@ -5864,8 -6060,8 +6057,8 @@@
   static inline void set_host_lcb_access(struct hfi1_devdata *dd)
   {
         write_csr(dd, DC_DC8051_CFG_CSR_ACCESS_SEL,
-                               DC_DC8051_CFG_CSR_ACCESS_SEL_DCC_SMASK
-                               | DC_DC8051_CFG_CSR_ACCESS_SEL_LCB_SMASK);
+                 DC_DC8051_CFG_CSR_ACCESS_SEL_DCC_SMASK |
+                 DC_DC8051_CFG_CSR_ACCESS_SEL_LCB_SMASK);
   }
   
   /*
@@@ -5875,7 -6071,7 +6068,7 @@@
   static inline void set_8051_lcb_access(struct hfi1_devdata *dd)
   {
         write_csr(dd, DC_DC8051_CFG_CSR_ACCESS_SEL,
-                               DC_DC8051_CFG_CSR_ACCESS_SEL_DCC_SMASK);
+                 DC_DC8051_CFG_CSR_ACCESS_SEL_DCC_SMASK);
   }
   
   /*
@@@ -5909,7 -6105,7 +6102,7 @@@ int acquire_lcb_access(struct hfi1_devd
         /* this access is valid only when the link is up */
         if ((ppd->host_link_state & HLS_UP) == 0) {
                 dd_dev_info(dd, "%s: link state %s not up\n",
-                       __func__, link_state_name(ppd->host_link_state));
+                           __func__, link_state_name(ppd->host_link_state));
                 ret = -EBUSY;
                 goto done;
         }
@@@ -5918,8 -6114,8 +6111,8 @@@
                 ret = request_host_lcb_access(dd);
                 if (ret) {
                         dd_dev_err(dd,
-                               "%s: unable to acquire LCB access, err %d\n",
-                               __func__, ret);
+                                  "%s: unable to acquire LCB access, err %d\n",
+                                  __func__, ret);
                         goto done;
                 }
                 set_host_lcb_access(dd);
@@@ -5956,7 -6152,7 +6149,7 @@@ int release_lcb_access(struct hfi1_devd
   
         if (dd->lcb_access_count == 0) {
                 dd_dev_err(dd, "%s: LCB access count is zero.  Skipping.\n",
-                       __func__);
+                          __func__);
                 goto done;
         }
   
@@@ -5965,8 -6161,8 +6158,8 @@@
                 ret = request_8051_lcb_access(dd);
                 if (ret) {
                         dd_dev_err(dd,
-                               "%s: unable to release LCB access, err %d\n",
-                               __func__, ret);
+                                  "%s: unable to release LCB access, err %d\n",
+                                  __func__, ret);
                         /* restore host access if the grant didn't work */
                         set_host_lcb_access(dd);
                         goto done;
@@@ -5998,19 -6194,26 +6191,26 @@@ static void init_lcb_access(struct hfi1
   static void hreq_response(struct hfi1_devdata *dd, u8 return_code, u16 rsp_data)
   {
         write_csr(dd, DC_DC8051_CFG_EXT_DEV_0,
-               DC_DC8051_CFG_EXT_DEV_0_COMPLETED_SMASK
-               | (u64)return_code << DC_DC8051_CFG_EXT_DEV_0_RETURN_CODE_SHIFT
-               | (u64)rsp_data << DC_DC8051_CFG_EXT_DEV_0_RSP_DATA_SHIFT);
+                 DC_DC8051_CFG_EXT_DEV_0_COMPLETED_SMASK |
+                 (u64)return_code <<
+                 DC_DC8051_CFG_EXT_DEV_0_RETURN_CODE_SHIFT |
+                 (u64)rsp_data << DC_DC8051_CFG_EXT_DEV_0_RSP_DATA_SHIFT);
   }
   
   /*
-  * Handle requests from the 8051.
+  * Handle host requests from the 8051.
+  *
+  * This is a work-queue function outside of the interrupt.
    */
- static void handle_8051_request(struct hfi1_devdata *dd)
+ void handle_8051_request(struct work_struct *work)
   {
+       struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
+                                                       dc_host_req_work);
+       struct hfi1_devdata *dd = ppd->dd;
         u64 reg;
-       u16 data;
-       u8 type;
+       u16 data = 0;
+       u8 type, i, lanes, *cache = ppd->qsfp_info.cache;
+       u8 cdr_ctrl_byte = cache[QSFP_CDR_CTRL_BYTE_OFFS];
   
         reg = read_csr(dd, DC_DC8051_CFG_EXT_DEV_1);
         if ((reg & DC_DC8051_CFG_EXT_DEV_1_REQ_NEW_SMASK) == 0)
@@@ -6031,12 -6234,46 +6231,46 @@@
         case HREQ_READ_CONFIG:
         case HREQ_SET_TX_EQ_ABS:
         case HREQ_SET_TX_EQ_REL:
-       case HREQ_ENABLE:
                 dd_dev_info(dd, "8051 request: request 0x%x not supported\n",
-                       type);
+                           type);
                 hreq_response(dd, HREQ_NOT_SUPPORTED, 0);
                 break;
   
+       case HREQ_ENABLE:
+               lanes = data & 0xF;
+               for (i = 0; lanes; lanes >>= 1, i++) {
+                       if (!(lanes & 1))
+                               continue;
+                       if (data & 0x200) {
+                               /* enable TX CDR */
+                               if (cache[QSFP_MOD_PWR_OFFS] & 0x8 &&
+                                   cache[QSFP_CDR_INFO_OFFS] & 0x80)
+                                       cdr_ctrl_byte |= (1 << (i + 4));
+                       } else {
+                               /* disable TX CDR */
+                               if (cache[QSFP_MOD_PWR_OFFS] & 0x8 &&
+                                   cache[QSFP_CDR_INFO_OFFS] & 0x80)
+                                       cdr_ctrl_byte &= ~(1 << (i + 4));
+                       }
+ 
+                       if (data & 0x800) {
+                               /* enable RX CDR */
+                               if (cache[QSFP_MOD_PWR_OFFS] & 0x4 &&
+                                   cache[QSFP_CDR_INFO_OFFS] & 0x40)
+                                       cdr_ctrl_byte |= (1 << i);
+                       } else {
+                               /* disable RX CDR */
+                               if (cache[QSFP_MOD_PWR_OFFS] & 0x4 &&
+                                   cache[QSFP_CDR_INFO_OFFS] & 0x40)
+                                       cdr_ctrl_byte &= ~(1 << i);
+                       }
+               }
+               one_qsfp_write(ppd, dd->hfi1_id, QSFP_CDR_CTRL_BYTE_OFFS,
+                              &cdr_ctrl_byte, 1);
+               hreq_response(dd, HREQ_SUCCESS, data);
+               refresh_qsfp_cache(ppd, &ppd->qsfp_info);
+               break;
+ 
         case HREQ_CONFIG_DONE:
                 hreq_response(dd, HREQ_SUCCESS, 0);
                 break;
@@@ -6056,11 -6293,11 +6290,11 @@@ static void write_global_credit(struct 
                                 u8 vau, u16 total, u16 shared)
   {
         write_csr(dd, SEND_CM_GLOBAL_CREDIT,
-               ((u64)total
-                       << SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT)
-               | ((u64)shared
-                       << SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT)
-               | ((u64)vau << SEND_CM_GLOBAL_CREDIT_AU_SHIFT));
+                 ((u64)total <<
+                  SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT) |
+                 ((u64)shared <<
+                  SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT) |
+                 ((u64)vau << SEND_CM_GLOBAL_CREDIT_AU_SHIFT));
   }
   
   /*
@@@ -6097,7 -6334,7 +6331,7 @@@ void reset_link_credits(struct hfi1_dev
   
         /* remove all previous VL credit limits */
         for (i = 0; i < TXE_NUM_DATA_VL; i++)
-               write_csr(dd, SEND_CM_CREDIT_VL + (8*i), 0);
+               write_csr(dd, SEND_CM_CREDIT_VL + (8 * i), 0);
         write_csr(dd, SEND_CM_CREDIT_VL15, 0);
         write_global_credit(dd, 0, 0, 0);
         /* reset the CM block */
@@@ -6139,15 -6376,14 +6373,14 @@@ static void lcb_shutdown(struct hfi1_de
         write_csr(dd, DC_LCB_CFG_RUN, 0);
         /* set tx fifo reset: LCB_CFG_TX_FIFOS_RESET.VAL = 1 */
         write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET,
-               1ull << DC_LCB_CFG_TX_FIFOS_RESET_VAL_SHIFT);
+                 1ull << DC_LCB_CFG_TX_FIFOS_RESET_VAL_SHIFT);
         /* set dcc reset csr: DCC_CFG_RESET.{reset_lcb,reset_rx_fpe} = 1 */
         dd->lcb_err_en = read_csr(dd, DC_LCB_ERR_EN);
         reg = read_csr(dd, DCC_CFG_RESET);
-       write_csr(dd, DCC_CFG_RESET,
-               reg
-               | (1ull << DCC_CFG_RESET_RESET_LCB_SHIFT)
-               | (1ull << DCC_CFG_RESET_RESET_RX_FPE_SHIFT));
-       (void) read_csr(dd, DCC_CFG_RESET); /* make sure the write completed */
+       write_csr(dd, DCC_CFG_RESET, reg |
+                 (1ull << DCC_CFG_RESET_RESET_LCB_SHIFT) |
+                 (1ull << DCC_CFG_RESET_RESET_RX_FPE_SHIFT));
+       (void)read_csr(dd, DCC_CFG_RESET); /* make sure the write completed */
         if (!abort) {
                 udelay(1);    /* must hold for the longer of 16cclks or 20ns */
                 write_csr(dd, DCC_CFG_RESET, reg);
@@@ -6176,14 -6412,18 +6409,18 @@@ static void dc_shutdown(struct hfi1_dev
         spin_unlock_irqrestore(&dd->dc8051_lock, flags);
         /* Shutdown the LCB */
         lcb_shutdown(dd, 1);
-       /* Going to OFFLINE would have causes the 8051 to put the
+       /*
+        * Going to OFFLINE would have causes the 8051 to put the
          * SerDes into reset already. Just need to shut down the 8051,
-        * itself. */
+        * itself.
+        */
         write_csr(dd, DC_DC8051_CFG_RST, 0x1);
   }
   
- /* Calling this after the DC has been brought out of reset should not
-  * do any damage. */
+ /*
+  * Calling this after the DC has been brought out of reset should not
+  * do any damage.
+  */
   static void dc_start(struct hfi1_devdata *dd)
   {
         unsigned long flags;
@@@ -6199,7 -6439,7 +6436,7 @@@
         ret = wait_fm_ready(dd, TIMEOUT_8051_START);
         if (ret) {
                 dd_dev_err(dd, "%s: timeout starting 8051 firmware\n",
-                       __func__);
+                          __func__);
         }
         /* Take away reset for LCB and RX FPE (set in lcb_shutdown). */
         write_csr(dd, DCC_CFG_RESET, 0x10);
@@@ -6292,7 -6532,7 +6529,7 @@@ static void adjust_lcb_for_fpga_serdes(
         write_csr(dd, DC_LCB_CFG_RX_FIFOS_RADR, rx_radr);
         /* LCB_CFG_IGNORE_LOST_RCLK.EN = 1 */
         write_csr(dd, DC_LCB_CFG_IGNORE_LOST_RCLK,
-               DC_LCB_CFG_IGNORE_LOST_RCLK_EN_SMASK);
+                 DC_LCB_CFG_IGNORE_LOST_RCLK_EN_SMASK);
         write_csr(dd, DC_LCB_CFG_TX_FIFOS_RADR, tx_radr);
   }
   
@@@ -6309,8 -6549,10 +6546,10 @@@ void handle_sma_message(struct work_str
         u64 msg;
         int ret;
   
-       /* msg is bytes 1-4 of the 40-bit idle message - the command code
-          is stripped off */
+       /*
+        * msg is bytes 1-4 of the 40-bit idle message - the command code
+        * is stripped off
+        */
         ret = read_idle_sma(dd, &msg);
         if (ret)
                 return;
@@@ -6336,8 -6578,8 +6575,8 @@@
                  *
                  * Can activate the node.  Discard otherwise.
                  */
-               if (ppd->host_link_state == HLS_UP_ARMED
-                                       && ppd->is_active_optimize_enabled) {
+               if (ppd->host_link_state == HLS_UP_ARMED &&
+                   ppd->is_active_optimize_enabled) {
                         ppd->neighbor_normal = 1;
                         ret = set_link_state(ppd, HLS_UP_ACTIVE);
                         if (ret)
@@@ -6349,8 -6591,8 +6588,8 @@@
                 break;
         default:
                 dd_dev_err(dd,
-                       "%s: received unexpected SMA idle message 0x%llx\n",
-                       __func__, msg);
+                          "%s: received unexpected SMA idle message 0x%llx\n",
+                          __func__, msg);
                 break;
         }
   }
@@@ -6442,10 -6684,9 +6681,9 @@@ static void wait_for_freeze_status(stru
   
                 if (time_after(jiffies, timeout)) {
                         dd_dev_err(dd,
-                               "Time out waiting for SPC %sfreeze, bits 0x%llx, expecting 0x%llx, continuing",
-                               freeze ? "" : "un",
-                               reg & ALL_FROZE,
-                               freeze ? ALL_FROZE : 0ull);
+                                  "Time out waiting for SPC %sfreeze, bits 0x%llx, expecting 0x%llx, continuing",
+                                  freeze ? "" : "un", reg & ALL_FROZE,
+                                  freeze ? ALL_FROZE : 0ull);
                         return;
                 }
                 usleep_range(80, 120);
@@@ -6475,11 -6716,17 +6713,17 @@@ static void rxe_freeze(struct hfi1_devd
    */
   static void rxe_kernel_unfreeze(struct hfi1_devdata *dd)
   {
+       u32 rcvmask;
         int i;
   
         /* enable all kernel contexts */
-       for (i = 0; i < dd->n_krcv_queues; i++)
-               hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB, i);
+       for (i = 0; i < dd->n_krcv_queues; i++) {
+               rcvmask = HFI1_RCVCTRL_CTXT_ENB;
+               /* HFI1_RCVCTRL_TAILUPD_[ENB|DIS] needs to be set explicitly */
+               rcvmask |= HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, DMA_RTAIL) ?
+                       HFI1_RCVCTRL_TAILUPD_ENB : HFI1_RCVCTRL_TAILUPD_DIS;
+               hfi1_rcvctrl(dd, rcvmask, i);
+       }
   
         /* enable port */
         add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
@@@ -6564,7 -6811,7 +6808,7 @@@ void handle_freeze(struct work_struct *
   void handle_link_up(struct work_struct *work)
   {
         struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
-                                                               link_up_work);
+                                                 link_up_work);
         set_link_state(ppd, HLS_UP_INIT);
   
         /* cache the read of DC_LCB_STS_ROUND_TRIP_LTP_CNT */
@@@ -6583,17 -6830,20 +6827,20 @@@
         if ((ppd->link_speed_active & ppd->link_speed_enabled) == 0) {
                 /* oops - current speed is not enabled, bounce */
                 dd_dev_err(ppd->dd,
-                       "Link speed active 0x%x is outside enabled 0x%x, downing link\n",
-                       ppd->link_speed_active, ppd->link_speed_enabled);
+                          "Link speed active 0x%x is outside enabled 0x%x, downing link\n",
+                          ppd->link_speed_active, ppd->link_speed_enabled);
                 set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SPEED_POLICY, 0,
-                       OPA_LINKDOWN_REASON_SPEED_POLICY);
+                                    OPA_LINKDOWN_REASON_SPEED_POLICY);
                 set_link_state(ppd, HLS_DN_OFFLINE);
+               tune_serdes(ppd);
                 start_link(ppd);
         }
   }
   
- /* Several pieces of LNI information were cached for SMA in ppd.
-  * Reset these on link down */
+ /*
+  * Several pieces of LNI information were cached for SMA in ppd.
+  * Reset these on link down
+  */
   static void reset_neighbor_info(struct hfi1_pportdata *ppd)
   {
         ppd->neighbor_guid = 0;
@@@ -6613,7 -6863,13 +6860,13 @@@ void handle_link_down(struct work_struc
         struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
                                                                 link_down_work);
   
-       /* go offline first, then deal with reasons */
+       if ((ppd->host_link_state &
+            (HLS_DN_POLL | HLS_VERIFY_CAP | HLS_GOING_UP)) &&
+            ppd->port_type == PORT_TYPE_FIXED)
+               ppd->offline_disabled_reason =
+                       HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NOT_INSTALLED);
+ 
+       /* Go offline first, then deal with reading/writing through 8051 */
         set_link_state(ppd, HLS_DN_OFFLINE);
   
         lcl_reason = 0;
@@@ -6633,12 -6889,16 +6886,16 @@@
         /* disable the port */
         clear_rcvctrl(ppd->dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
   
-       /* If there is no cable attached, turn the DC off. Otherwise,
-        * start the link bring up. */
-       if (!qsfp_mod_present(ppd))
+       /*
+        * If there is no cable attached, turn the DC off. Otherwise,
+        * start the link bring up.
+        */
+       if (!qsfp_mod_present(ppd)) {
                 dc_shutdown(ppd->dd);
-       else
+       } else {
+               tune_serdes(ppd);
                 start_link(ppd);
+       }
   }
   
   void handle_link_bounce(struct work_struct *work)
@@@ -6651,10 -6911,11 +6908,11 @@@
          */
         if (ppd->host_link_state & HLS_UP) {
                 set_link_state(ppd, HLS_DN_OFFLINE);
+               tune_serdes(ppd);
                 start_link(ppd);
         } else {
                 dd_dev_info(ppd->dd, "%s: link not up (%s), nothing to do\n",
-                       __func__, link_state_name(ppd->host_link_state));
+                           __func__, link_state_name(ppd->host_link_state));
         }
   }
   
@@@ -6751,7 -7012,7 +7009,7 @@@ static u16 link_width_to_bits(struct hf
         case 3: return OPA_LINK_WIDTH_3X;
         default:
                 dd_dev_info(dd, "%s: invalid width %d, using 4\n",
-                       __func__, width);
+                           __func__, width);
                 /* fall through */
         case 4: return OPA_LINK_WIDTH_4X;
         }
@@@ -6763,6 -7024,7 +7021,7 @@@
   static const u8 bit_counts[16] = {
         0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4
   };
+ 
   static inline u8 nibble_to_count(u8 nibble)
   {
         return bit_counts[nibble & 0xf];
@@@ -6788,7 -7050,7 +7047,7 @@@ static void get_link_widths(struct hfi1
   
         /* read the active lanes */
         read_tx_settings(dd, &enable_lane_tx, &tx_polarity_inversion,
-                               &rx_polarity_inversion, &max_rate);
+                        &rx_polarity_inversion, &max_rate);
         read_local_lni(dd, &enable_lane_rx);
   
         /* convert to counts */
@@@ -6800,8 -7062,8 +7059,8 @@@
          * handle_verify_cap().  The ASIC 8051 firmware does not correctly
          * set the max_rate field in handle_verify_cap until v0.19.
          */
-       if ((dd->icode == ICODE_RTL_SILICON)
-                               && (dd->dc8051_ver < dc8051_ver(0, 19))) {
+       if ((dd->icode == ICODE_RTL_SILICON) &&
+           (dd->dc8051_ver < dc8051_ver(0, 19))) {
                 /* max_rate: 0 = 12.5G, 1 = 25G */
                 switch (max_rate) {
                 case 0:
@@@ -6809,8 -7071,8 +7068,8 @@@
                         break;
                 default:
                         dd_dev_err(dd,
-                               "%s: unexpected max rate %d, using 25Gb\n",
-                               __func__, (int)max_rate);
+                                  "%s: unexpected max rate %d, using 25Gb\n",
+                                  __func__, (int)max_rate);
                         /* fall through */
                 case 1:
                         dd->pport[0].link_speed_active = OPA_LINK_SPEED_25G;
@@@ -6819,8 -7081,8 +7078,8 @@@
         }
   
         dd_dev_info(dd,
-               "Fabric active lanes (width): tx 0x%x (%d), rx 0x%x (%d)\n",
-               enable_lane_tx, tx, enable_lane_rx, rx);
+                   "Fabric active lanes (width): tx 0x%x (%d), rx 0x%x (%d)\n",
+                   enable_lane_tx, tx, enable_lane_rx, rx);
         *tx_width = link_width_to_bits(dd, tx);
         *rx_width = link_width_to_bits(dd, rx);
   }
@@@ -6923,13 -7185,8 +7182,8 @@@ void handle_verify_cap(struct work_stru
          */
   
         read_vc_remote_phy(dd, &power_management, &continious);
-       read_vc_remote_fabric(
-               dd,
-               &vau,
-               &z,
-               &vcu,
-               &vl15buf,
-               &partner_supported_crc);
+       read_vc_remote_fabric(dd, &vau, &z, &vcu, &vl15buf,
+                             &partner_supported_crc);
         read_vc_remote_link_width(dd, &remote_tx_rate, &link_widths);
         read_remote_device_id(dd, &device_id, &device_rev);
         /*
@@@ -6940,19 -7197,16 +7194,16 @@@
         /* print the active widths */
         get_link_widths(dd, &active_tx, &active_rx);
         dd_dev_info(dd,
-               "Peer PHY: power management 0x%x, continuous updates 0x%x\n",
-               (int)power_management, (int)continious);
+                   "Peer PHY: power management 0x%x, continuous updates 0x%x\n",
+                   (int)power_management, (int)continious);
         dd_dev_info(dd,
-               "Peer Fabric: vAU %d, Z %d, vCU %d, vl15 credits 0x%x, CRC sizes 0x%x\n",
-               (int)vau,
-               (int)z,
-               (int)vcu,
-               (int)vl15buf,
-               (int)partner_supported_crc);
+                   "Peer Fabric: vAU %d, Z %d, vCU %d, vl15 credits 0x%x, CRC sizes 0x%x\n",
+                   (int)vau, (int)z, (int)vcu, (int)vl15buf,
+                   (int)partner_supported_crc);
         dd_dev_info(dd, "Peer Link Width: tx rate 0x%x, widths 0x%x\n",
-               (u32)remote_tx_rate, (u32)link_widths);
+                   (u32)remote_tx_rate, (u32)link_widths);
         dd_dev_info(dd, "Peer Device ID: 0x%04x, Revision 0x%02x\n",
-               (u32)device_id, (u32)device_rev);
+                   (u32)device_id, (u32)device_rev);
         /*
          * The peer vAU value just read is the peer receiver value.  HFI does
          * not support a transmit vAU of 0 (AU == 8).  We advertised that
@@@ -6987,10 -7241,10 +7238,10 @@@
         reg = read_csr(dd, SEND_CM_CTRL);
         if (crc_val == LCB_CRC_14B && crc_14b_sideband) {
                 write_csr(dd, SEND_CM_CTRL,
-                       reg | SEND_CM_CTRL_FORCE_CREDIT_MODE_SMASK);
+                         reg | SEND_CM_CTRL_FORCE_CREDIT_MODE_SMASK);
         } else {
                 write_csr(dd, SEND_CM_CTRL,
-                       reg & ~SEND_CM_CTRL_FORCE_CREDIT_MODE_SMASK);
+                         reg & ~SEND_CM_CTRL_FORCE_CREDIT_MODE_SMASK);
         }
   
         ppd->link_speed_active = 0;     /* invalid value */
@@@ -7015,7 -7269,7 +7266,7 @@@
         }
         if (ppd->link_speed_active == 0) {
                 dd_dev_err(dd, "%s: unexpected remote tx rate %d, using 25Gb\n",
-                       __func__, (int)remote_tx_rate);
+                          __func__, (int)remote_tx_rate);
                 ppd->link_speed_active = OPA_LINK_SPEED_25G;
         }
   
@@@ -7071,9 -7325,9 +7322,9 @@@
                 read_csr(dd, DC_DC8051_STS_REMOTE_FM_SECURITY) &
                 DC_DC8051_STS_LOCAL_FM_SECURITY_DISABLED_MASK;
         dd_dev_info(dd,
-               "Neighbor Guid: %llx Neighbor type %d MgmtAllowed %d FM security bypass %d\n",
-               ppd->neighbor_guid, ppd->neighbor_type,
-               ppd->mgmt_allowed, ppd->neighbor_fm_security);
+                   "Neighbor Guid: %llx Neighbor type %d MgmtAllowed %d FM security bypass %d\n",
+                   ppd->neighbor_guid, ppd->neighbor_type,
+                   ppd->mgmt_allowed, ppd->neighbor_fm_security);
         if (ppd->mgmt_allowed)
                 add_full_mgmt_pkey(ppd);
   
@@@ -7127,28 -7381,27 +7378,27 @@@ retry
   
                 /* bounce if not at starting active width */
                 if ((ppd->link_width_active !=
-                                       ppd->link_width_downgrade_tx_active)
-                               || (ppd->link_width_active !=
-                                       ppd->link_width_downgrade_rx_active)) {
+                    ppd->link_width_downgrade_tx_active) ||
+                   (ppd->link_width_active !=
+                    ppd->link_width_downgrade_rx_active)) {
                         dd_dev_err(ppd->dd,
-                               "Link downgrade is disabled and link has downgraded, downing link\n");
+                                  "Link downgrade is disabled and link has downgraded, downing link\n");
                         dd_dev_err(ppd->dd,
-                               "  original 0x%x, tx active 0x%x, rx active 0x%x\n",
-                               ppd->link_width_active,
-                               ppd->link_width_downgrade_tx_active,
-                               ppd->link_width_downgrade_rx_active);
+                                  "  original 0x%x, tx active 0x%x, rx active 0x%x\n",
+                                  ppd->link_width_active,
+                                  ppd->link_width_downgrade_tx_active,
+                                  ppd->link_width_downgrade_rx_active);
                         do_bounce = 1;
                 }
-       } else if ((lwde & ppd->link_width_downgrade_tx_active) == 0
-               || (lwde & ppd->link_width_downgrade_rx_active) == 0) {
+       } else if ((lwde & ppd->link_width_downgrade_tx_active) == 0 ||
+                  (lwde & ppd->link_width_downgrade_rx_active) == 0) {
                 /* Tx or Rx is outside the enabled policy */
                 dd_dev_err(ppd->dd,
-                       "Link is outside of downgrade allowed, downing link\n");
+                          "Link is outside of downgrade allowed, downing link\n");
                 dd_dev_err(ppd->dd,
-                       "  enabled 0x%x, tx active 0x%x, rx active 0x%x\n",
-                       lwde,
-                       ppd->link_width_downgrade_tx_active,
-                       ppd->link_width_downgrade_rx_active);
+                          "  enabled 0x%x, tx active 0x%x, rx active 0x%x\n",
+                          lwde, ppd->link_width_downgrade_tx_active,
+                          ppd->link_width_downgrade_rx_active);
                 do_bounce = 1;
         }
   
@@@ -7157,8 -7410,9 +7407,9 @@@ done
   
         if (do_bounce) {
                 set_link_down_reason(ppd, OPA_LINKDOWN_REASON_WIDTH_POLICY, 0,
-                 OPA_LINKDOWN_REASON_WIDTH_POLICY);
+                                    OPA_LINKDOWN_REASON_WIDTH_POLICY);
                 set_link_state(ppd, HLS_DN_OFFLINE);
+               tune_serdes(ppd);
                 start_link(ppd);
         }
   }
@@@ -7239,9 -7493,10 +7490,10 @@@ static void handle_8051_interrupt(struc
                             & (HLS_DN_POLL | HLS_VERIFY_CAP | HLS_GOING_UP)) {
                                 queue_link_down = 1;
                                 dd_dev_info(dd, "Link error: %s\n",
-                                       dc8051_info_err_string(buf,
-                                               sizeof(buf),
-                                               err & FAILED_LNI));
+                                           dc8051_info_err_string(buf,
+                                                                  sizeof(buf),
+                                                                  err &
+                                                                  FAILED_LNI));
                         }
                         err &= ~(u64)FAILED_LNI;
                 }
@@@ -7253,7 -7508,8 +7505,8 @@@
                 if (err) {
                         /* report remaining errors, but do not do anything */
                         dd_dev_err(dd, "8051 info error: %s\n",
-                               dc8051_info_err_string(buf, sizeof(buf), err));
+                                  dc8051_info_err_string(buf, sizeof(buf),
+                                                         err));
                 }
   
                 /*
@@@ -7281,7 -7537,7 +7534,7 @@@
                         host_msg &= ~(u64)LINKUP_ACHIEVED;
                 }
                 if (host_msg & EXT_DEVICE_CFG_REQ) {
-                       handle_8051_request(dd);
+                       queue_work(ppd->hfi1_wq, &ppd->dc_host_req_work);
                         host_msg &= ~(u64)EXT_DEVICE_CFG_REQ;
                 }
                 if (host_msg & VERIFY_CAP_FRAME) {
@@@ -7306,8 -7562,9 +7559,9 @@@
                 if (host_msg) {
                         /* report remaining messages, but do not do anything */
                         dd_dev_info(dd, "8051 info host message: %s\n",
-                               dc8051_info_host_msg_string(buf, sizeof(buf),
-                                       host_msg));
+                                   dc8051_info_host_msg_string(buf,
+                                                               sizeof(buf),
+                                                               host_msg));
                 }
   
                 reg &= ~DC_DC8051_ERR_FLG_SET_BY_8051_SMASK;
@@@ -7320,25 -7577,27 +7574,27 @@@
                  */
                 dd_dev_err(dd, "Lost 8051 heartbeat\n");
                 write_csr(dd, DC_DC8051_ERR_EN,
-                       read_csr(dd, DC_DC8051_ERR_EN)
-                         & ~DC_DC8051_ERR_EN_LOST_8051_HEART_BEAT_SMASK);
+                         read_csr(dd, DC_DC8051_ERR_EN) &
+                         ~DC_DC8051_ERR_EN_LOST_8051_HEART_BEAT_SMASK);
   
                 reg &= ~DC_DC8051_ERR_FLG_LOST_8051_HEART_BEAT_SMASK;
         }
         if (reg) {
                 /* report the error, but do not do anything */
                 dd_dev_err(dd, "8051 error: %s\n",
-                       dc8051_err_string(buf, sizeof(buf), reg));
+                          dc8051_err_string(buf, sizeof(buf), reg));
         }
   
         if (queue_link_down) {
-               /* if the link is already going down or disabled, do not
-                * queue another */
-               if ((ppd->host_link_state
-                                   & (HLS_GOING_OFFLINE|HLS_LINK_COOLDOWN))
-                               || ppd->link_enabled == 0) {
+               /*
+                * if the link is already going down or disabled, do not
+                * queue another
+                */
+               if ((ppd->host_link_state &
+                   (HLS_GOING_OFFLINE | HLS_LINK_COOLDOWN)) ||
+                   ppd->link_enabled == 0) {
                         dd_dev_info(dd, "%s: not queuing link down\n",
-                               __func__);
+                                   __func__);
                 } else {
                         queue_work(ppd->hfi1_wq, &ppd->link_down_work);
                 }
@@@ -7480,8 -7739,10 +7736,10 @@@ static void handle_dcc_err(struct hfi1_
                         /* set status bit */
                         dd->err_info_rcvport.status_and_code |=
                                 OPA_EI_STATUS_SMASK;
-                       /* save first 2 flits in the packet that caused
-                        * the error */
+                       /*
+                        * save first 2 flits in the packet that caused
+                        * the error
+                        */
                          dd->err_info_rcvport.packet_flit1 = hdr0;
                          dd->err_info_rcvport.packet_flit2 = hdr1;
                 }
@@@ -7514,7 -7775,7 +7772,7 @@@
                 /* just report this */
                 dd_dev_info(dd, "DCC Error: PortRcv error: %s\n", extra);
                 dd_dev_info(dd, "           hdr0 0x%llx, hdr1 0x%llx\n",
-                       hdr0, hdr1);
+                           hdr0, hdr1);
   
                 reg &= ~DCC_ERR_FLG_RCVPORT_ERR_SMASK;
         }
@@@ -7533,7 -7794,7 +7791,7 @@@
         /* report any remaining errors */
         if (reg)
                 dd_dev_info(dd, "DCC Error: %s\n",
-                       dcc_err_string(buf, sizeof(buf), reg));
+                           dcc_err_string(buf, sizeof(buf), reg));
   
         if (lcl_reason == 0)
                 lcl_reason = OPA_LINKDOWN_REASON_UNKNOWN;
@@@ -7550,7 -7811,7 +7808,7 @@@ static void handle_lcb_err(struct hfi1_
         char buf[96];
   
         dd_dev_info(dd, "LCB Error: %s\n",
-               lcb_err_string(buf, sizeof(buf), reg));
+                   lcb_err_string(buf, sizeof(buf), reg));
   }
   
   /*
@@@ -7640,7 -7901,7 +7898,7 @@@ static void is_rcv_avail_int(struct hfi
                 err_detail = "out of range";
         }
         dd_dev_err(dd, "unexpected %s receive available context interrupt %u\n",
-               err_detail, source);
+                  err_detail, source);
   }
   
   /*
@@@ -7666,7 -7927,7 +7924,7 @@@ static void is_rcv_urgent_int(struct hf
                 err_detail = "out of range";
         }
         dd_dev_err(dd, "unexpected %s receive urgent context interrupt %u\n",
-               err_detail, source);
+                  err_detail, source);
   }
   
   /*
@@@ -7677,12 -7938,14 +7935,14 @@@ static void is_reserved_int(struct hfi1
         char name[64];
   
         dd_dev_err(dd, "unexpected %s interrupt\n",
-                               is_reserved_name(name, sizeof(name), source));
+                  is_reserved_name(name, sizeof(name), source));
   }
   
   static const struct is_table is_table[] = {
- /* start                   end
-                               name func               interrupt func */
+ /*
+  * start               end
+  *                            name func               interrupt func
+  */
   { IS_GENERAL_ERR_START,  IS_GENERAL_ERR_END,
                                 is_misc_err_name,       is_misc_err_int },
   { IS_SDMAENG_ERR_START,  IS_SDMAENG_ERR_END,
@@@ -7753,7 -8016,7 +8013,7 @@@ static irqreturn_t general_interrupt(in
   
         /* phase 2: call the appropriate handler */
         for_each_set_bit(bit, (unsigned long *)&regs[0],
-                                               CCE_NUM_INT_CSRS*64) {
+                        CCE_NUM_INT_CSRS * 64) {
                 is_interrupt(dd, bit);
         }
   
@@@ -7776,27 -8039,27 +8036,27 @@@ static irqreturn_t sdma_interrupt(int i
   
         /* This read_csr is really bad in the hot path */
         status = read_csr(dd,
-                       CCE_INT_STATUS + (8*(IS_SDMA_START/64)))
-                       & sde->imask;
+                         CCE_INT_STATUS + (8 * (IS_SDMA_START / 64)))
+                         & sde->imask;
         if (likely(status)) {
                 /* clear the interrupt(s) */
                 write_csr(dd,
-                       CCE_INT_CLEAR + (8*(IS_SDMA_START/64)),
-                       status);
+                         CCE_INT_CLEAR + (8 * (IS_SDMA_START / 64)),
+                         status);
   
                 /* handle the interrupt(s) */
                 sdma_engine_interrupt(sde, status);
         } else
                 dd_dev_err(dd, "SDMA engine %u interrupt, but no status bits set\n",
-                       sde->this_idx);
+                          sde->this_idx);
   
         return IRQ_HANDLED;
   }
   
   /*
-  * Clear the receive interrupt, forcing the write and making sure
-  * we have data from the chip, pushing everything in front of it
-  * back to the host.
+  * Clear the receive interrupt.  Use a read of the interrupt clear CSR
+  * to insure that the write completed.  This does NOT guarantee that
+  * queued DMA writes to memory from the chip are pushed.
    */
   static inline void clear_recv_intr(struct hfi1_ctxtdata *rcd)
   {
@@@ -7810,27 -8073,45 +8070,45 @@@
   }
   
   /* force the receive interrupt */
- static inline void force_recv_intr(struct hfi1_ctxtdata *rcd)
+ void force_recv_intr(struct hfi1_ctxtdata *rcd)
   {
         write_csr(rcd->dd, CCE_INT_FORCE + (8 * rcd->ireg), rcd->imask);
   }
   
- /* return non-zero if a packet is present */
+ /*
+  * Return non-zero if a packet is present.
+  *
+  * This routine is called when rechecking for packets after the RcvAvail
+  * interrupt has been cleared down.  First, do a quick check of memory for
+  * a packet present.  If not found, use an expensive CSR read of the context
+  * tail to determine the actual tail.  The CSR read is necessary because there
+  * is no method to push pending DMAs to memory other than an interrupt and we
+  * are trying to determine if we need to force an interrupt.
+  */
   static inline int check_packet_present(struct hfi1_ctxtdata *rcd)
   {
+       u32 tail;
+       int present;
+ 
         if (!HFI1_CAP_IS_KSET(DMA_RTAIL))
-               return (rcd->seq_cnt ==
+               present = (rcd->seq_cnt ==
                                 rhf_rcv_seq(rhf_to_cpu(get_rhf_addr(rcd))));
+       else /* is RDMA rtail */
+               present = (rcd->head != get_rcvhdrtail(rcd));
+ 
+       if (present)
+               return 1;
   
-       /* else is RDMA rtail */
-       return (rcd->head != get_rcvhdrtail(rcd));
+       /* fall back to a CSR read, correct indpendent of DMA_RTAIL */
+       tail = (u32)read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_TAIL);
+       return rcd->head != tail;
   }
   
   /*
    * Receive packet IRQ handler.  This routine expects to be on its own IRQ.
    * This routine will try to handle packets immediately (latency), but if
    * it finds too many, it will invoke the thread handler (bandwitdh).  The
-  * chip receive interupt is *not* cleared down until this or the thread (if
+  * chip receive interrupt is *not* cleared down until this or the thread (if
    * invoked) is finished.  The intent is to avoid extra interrupts while we
    * are processing packets anyway.
    */
@@@ -7843,6 -8124,7 +8121,7 @@@ static irqreturn_t receive_context_inte
   
         trace_hfi1_receive_interrupt(dd, rcd->ctxt);
         this_cpu_inc(*dd->int_counter);
+       aspm_ctx_disable(rcd);
   
         /* receive interrupt remains blocked while processing packets */
         disposition = rcd->do_interrupt(rcd, 0);
@@@ -7909,7 -8191,7 +8188,7 @@@ u32 read_physical_state(struct hfi1_dev
                                 & DC_DC8051_STS_CUR_STATE_PORT_MASK;
   }
   
- static u32 read_logical_state(struct hfi1_devdata *dd)
+ u32 read_logical_state(struct hfi1_devdata *dd)
   {
         u64 reg;
   
@@@ -8157,8 -8439,8 +8436,8 @@@ static int set_physical_link_state(stru
         return do_8051_command(dd, HCMD_CHANGE_PHY_STATE, state, NULL);
   }
   
- static int load_8051_config(struct hfi1_devdata *dd, u8 field_id,
-                           u8 lane_id, u32 config_data)
+ int load_8051_config(struct hfi1_devdata *dd, u8 field_id,
+                    u8 lane_id, u32 config_data)
   {
         u64 data;
         int ret;
@@@ -8169,8 -8451,8 +8448,8 @@@
         ret = do_8051_command(dd, HCMD_LOAD_CONFIG_DATA, data, NULL);
         if (ret != HCMD_SUCCESS) {
                 dd_dev_err(dd,
-                       "load 8051 config: field id %d, lane %d, err %d\n",
-                       (int)field_id, (int)lane_id, ret);
+                          "load 8051 config: field id %d, lane %d, err %d\n",
+                          (int)field_id, (int)lane_id, ret);
         }
         return ret;
   }
@@@ -8180,8 -8462,8 +8459,8 @@@
    * set the result, even on error.
    * Return 0 on success, -errno on failure
    */
- static int read_8051_config(struct hfi1_devdata *dd, u8 field_id, u8 lane_id,
-                           u32 *result)
+ int read_8051_config(struct hfi1_devdata *dd, u8 field_id, u8 lane_id,
+                    u32 *result)
   {
         u64 big_data;
         u32 addr;
@@@ -8207,7 -8489,7 +8486,7 @@@
         } else {
                 *result = 0;
                 dd_dev_err(dd, "%s: direct read failed, lane %d, field %d!\n",
-                       __func__, lane_id, field_id);
+                          __func__, lane_id, field_id);
         }
   
         return ret;
@@@ -8244,7 -8526,7 +8523,7 @@@ static void read_vc_local_link_width(st
         u32 frame;
   
         read_8051_config(dd, VERIFY_CAP_LOCAL_LINK_WIDTH, GENERAL_CONFIG,
-                               &frame);
+                        &frame);
         *misc_bits = (frame >> MISC_CONFIG_BITS_SHIFT) & MISC_CONFIG_BITS_MASK;
         *flag_bits = (frame >> LOCAL_FLAG_BITS_SHIFT) & LOCAL_FLAG_BITS_MASK;
         *link_widths = (frame >> LINK_WIDTH_SHIFT) & LINK_WIDTH_MASK;
@@@ -8326,7 -8608,7 +8605,7 @@@ static void read_vc_remote_link_width(s
         u32 frame;
   
         read_8051_config(dd, VERIFY_CAP_REMOTE_LINK_WIDTH, GENERAL_CONFIG,
-                               &frame);
+                        &frame);
         *remote_tx_rate = (frame >> REMOTE_TX_RATE_SHIFT)
                                 & REMOTE_TX_RATE_MASK;
         *link_widths = (frame >> LINK_WIDTH_SHIFT) & LINK_WIDTH_MASK;
@@@ -8366,7 -8648,7 +8645,7 @@@ void hfi1_read_link_quality(struct hfi1
         *link_quality = 0;
         if (dd->pport->host_link_state & HLS_UP) {
                 ret = read_8051_config(dd, LINK_QUALITY_INFO, GENERAL_CONFIG,
-                                       &frame);
+                                      &frame);
                 if (ret == 0)
                         *link_quality = (frame >> LINK_QUALITY_SHIFT)
                                                 & LINK_QUALITY_MASK;
@@@ -8426,10 -8708,9 +8705,9 @@@ static void check_fabric_firmware_versi
         for (lane = 0; lane < 4; lane++) {
                 ret = read_8051_config(dd, SPICO_FW_VERSION, lane, &frame);
                 if (ret) {
-                       dd_dev_err(
-                               dd,
-                               "Unable to read lane %d firmware details\n",
-                               lane);
+                       dd_dev_err(dd,
+                                  "Unable to read lane %d firmware details\n",
+                                  lane);
                         continue;
                 }
                 version = (frame >> SPICO_ROM_VERSION_SHIFT)
@@@ -8437,8 -8718,8 +8715,8 @@@
                 prod_id = (frame >> SPICO_ROM_PROD_ID_SHIFT)
                                         & SPICO_ROM_PROD_ID_MASK;
                 dd_dev_info(dd,
-                       "Lane %d firmware: version 0x%04x, prod_id 0x%04x\n",
-                       lane, version, prod_id);
+                           "Lane %d firmware: version 0x%04x, prod_id 0x%04x\n",
+                           lane, version, prod_id);
         }
   }
   
@@@ -8451,11 -8732,10 +8729,10 @@@ static int read_idle_message(struct hfi
   {
         int ret;
   
-       ret = do_8051_command(dd, HCMD_READ_LCB_IDLE_MSG,
-               type, data_out);
+       ret = do_8051_command(dd, HCMD_READ_LCB_IDLE_MSG, type, data_out);
         if (ret != HCMD_SUCCESS) {
                 dd_dev_err(dd, "read idle message: type %d, err %d\n",
-                       (u32)type, ret);
+                          (u32)type, ret);
                 return -EINVAL;
         }
         dd_dev_info(dd, "%s: read idle message 0x%llx\n", __func__, *data_out);
@@@ -8472,8 -8752,8 +8749,8 @@@
    */
   static int read_idle_sma(struct hfi1_devdata *dd, u64 *data)
   {
-       return read_idle_message(dd,
-                       (u64)IDLE_SMA << IDLE_MSG_TYPE_SHIFT, data);
+       return read_idle_message(dd, (u64)IDLE_SMA << IDLE_MSG_TYPE_SHIFT,
+                                data);
   }
   
   /*
@@@ -8489,7 -8769,7 +8766,7 @@@ static int send_idle_message(struct hfi
         ret = do_8051_command(dd, HCMD_SEND_LCB_IDLE_MSG, data, NULL);
         if (ret != HCMD_SUCCESS) {
                 dd_dev_err(dd, "send idle message: data 0x%llx, err %d\n",
-                       data, ret);
+                          data, ret);
                 return -EINVAL;
         }
         return 0;
@@@ -8504,8 -8784,8 +8781,8 @@@ int send_idle_sma(struct hfi1_devdata *
   {
         u64 data;
   
-       data = ((message & IDLE_PAYLOAD_MASK) << IDLE_PAYLOAD_SHIFT)
-               | ((u64)IDLE_SMA << IDLE_MSG_TYPE_SHIFT);
+       data = ((message & IDLE_PAYLOAD_MASK) << IDLE_PAYLOAD_SHIFT) |
+               ((u64)IDLE_SMA << IDLE_MSG_TYPE_SHIFT);
         return send_idle_message(dd, data);
   }
   
@@@ -8527,7 -8807,7 +8804,7 @@@ static int do_quick_linkup(struct hfi1_
                 /* LCB_CFG_LOOPBACK.VAL = 2 */
                 /* LCB_CFG_LANE_WIDTH.VAL = 0 */
                 write_csr(dd, DC_LCB_CFG_LOOPBACK,
-                       IB_PACKET_TYPE << DC_LCB_CFG_LOOPBACK_VAL_SHIFT);
+                         IB_PACKET_TYPE << DC_LCB_CFG_LOOPBACK_VAL_SHIFT);
                 write_csr(dd, DC_LCB_CFG_LANE_WIDTH, 0);
         }
   
@@@ -8539,25 -8819,24 +8816,24 @@@
         if (loopback && dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
                 /* LCB_CFG_RUN.EN = 1 */
                 write_csr(dd, DC_LCB_CFG_RUN,
-                       1ull << DC_LCB_CFG_RUN_EN_SHIFT);
+                         1ull << DC_LCB_CFG_RUN_EN_SHIFT);
   
                 /* watch LCB_STS_LINK_TRANSFER_ACTIVE */
                 timeout = jiffies + msecs_to_jiffies(10);
                 while (1) {
-                       reg = read_csr(dd,
-                               DC_LCB_STS_LINK_TRANSFER_ACTIVE);
+                       reg = read_csr(dd, DC_LCB_STS_LINK_TRANSFER_ACTIVE);
                         if (reg)
                                 break;
                         if (time_after(jiffies, timeout)) {
                                 dd_dev_err(dd,
-                                       "timeout waiting for LINK_TRANSFER_ACTIVE\n");
+                                          "timeout waiting for LINK_TRANSFER_ACTIVE\n");
                                 return -ETIMEDOUT;
                         }
                         udelay(2);
                 }
   
                 write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP,
-                       1ull << DC_LCB_CFG_ALLOW_LINK_UP_VAL_SHIFT);
+                         1ull << DC_LCB_CFG_ALLOW_LINK_UP_VAL_SHIFT);
         }
   
         if (!loopback) {
@@@ -8569,10 -8848,9 +8845,9 @@@
                  * done with LCB set up before resuming.
                  */
                 dd_dev_err(dd,
-                       "Pausing for peer to be finished with LCB set up\n");
+                          "Pausing for peer to be finished with LCB set up\n");
                 msleep(5000);
-               dd_dev_err(dd,
-                       "Continuing with quick linkup\n");
+               dd_dev_err(dd, "Continuing with quick linkup\n");
         }
   
         write_csr(dd, DC_LCB_ERR_EN, 0); /* mask LCB errors */
@@@ -8586,8 -8864,8 +8861,8 @@@
         ret = set_physical_link_state(dd, PLS_QUICK_LINKUP);
         if (ret != HCMD_SUCCESS) {
                 dd_dev_err(dd,
-                       "%s: set physical link state to quick LinkUp failed with return %d\n",
-                       __func__, ret);
+                          "%s: set physical link state to quick LinkUp failed with return %d\n",
+                          __func__, ret);
   
                 set_host_lcb_access(dd);
                 write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */
@@@ -8612,8 -8890,8 +8887,8 @@@ static int set_serdes_loopback_mode(str
         if (ret == HCMD_SUCCESS)
                 return 0;
         dd_dev_err(dd,
-               "Set physical link state to SerDes Loopback failed with return %d\n",
-               ret);
+                  "Set physical link state to SerDes Loopback failed with return %d\n",
+                  ret);
         if (ret >= 0)
                 ret = -EINVAL;
         return ret;
@@@ -8628,7 -8906,7 +8903,7 @@@ static int init_loopback(struct hfi1_de
   
         /* all loopbacks should disable self GUID check */
         write_csr(dd, DC_DC8051_CFG_MODE,
-               (read_csr(dd, DC_DC8051_CFG_MODE) | DISABLE_SELF_GUID_CHECK));
+                 (read_csr(dd, DC_DC8051_CFG_MODE) | DISABLE_SELF_GUID_CHECK));
   
         /*
          * The simulator has only one loopback option - LCB.  Switch
@@@ -8636,10 -8914,9 +8911,9 @@@
          *
          * Accept all valid loopback values.
          */
-       if ((dd->icode == ICODE_FUNCTIONAL_SIMULATOR)
-               && (loopback == LOOPBACK_SERDES
-                       || loopback == LOOPBACK_LCB
-                       || loopback == LOOPBACK_CABLE)) {
+       if ((dd->icode == ICODE_FUNCTIONAL_SIMULATOR) &&
+           (loopback == LOOPBACK_SERDES || loopback == LOOPBACK_LCB ||
+            loopback == LOOPBACK_CABLE)) {
                 loopback = LOOPBACK_LCB;
                 quick_linkup = 1;
                 return 0;
@@@ -8660,7 -8937,7 +8934,7 @@@
                 /* not supported in emulation due to emulation RTL changes */
                 if (dd->icode == ICODE_FPGA_EMULATION) {
                         dd_dev_err(dd,
-                               "LCB loopback not supported in emulation\n");
+                                  "LCB loopback not supported in emulation\n");
                         return -EINVAL;
                 }
                 return 0;
@@@ -8687,10 -8964,10 +8961,10 @@@ static u16 opa_to_vc_link_widths(u16 op
                 u16 from;
                 u16 to;
         } opa_link_xlate[] = {
-               { OPA_LINK_WIDTH_1X, 1 << (1-1)  },
-               { OPA_LINK_WIDTH_2X, 1 << (2-1)  },
-               { OPA_LINK_WIDTH_3X, 1 << (3-1)  },
-               { OPA_LINK_WIDTH_4X, 1 << (4-1)  },
+               { OPA_LINK_WIDTH_1X, 1 << (1 - 1)  },
+               { OPA_LINK_WIDTH_2X, 1 << (2 - 1)  },
+               { OPA_LINK_WIDTH_3X, 1 << (3 - 1)  },
+               { OPA_LINK_WIDTH_4X, 1 << (4 - 1)  },
         };
   
         for (i = 0; i < ARRAY_SIZE(opa_link_xlate); i++) {
@@@ -8716,7 -8993,7 +8990,7 @@@ static int set_local_link_attributes(st
   
         /* set the local tx rate - need to read-modify-write */
         ret = read_tx_settings(dd, &enable_lane_tx, &tx_polarity_inversion,
-               &rx_polarity_inversion, &ppd->local_tx_rate);
+                              &rx_polarity_inversion, &ppd->local_tx_rate);
         if (ret)
                 goto set_local_link_attributes_fail;
   
@@@ -8737,15 -9014,16 +9011,16 @@@
   
         enable_lane_tx = 0xF; /* enable all four lanes */
         ret = write_tx_settings(dd, enable_lane_tx, tx_polarity_inversion,
-                    rx_polarity_inversion, ppd->local_tx_rate);
+                               rx_polarity_inversion, ppd->local_tx_rate);
         if (ret != HCMD_SUCCESS)
                 goto set_local_link_attributes_fail;
   
         /*
          * DC supports continuous updates.
          */
-       ret = write_vc_local_phy(dd, 0 /* no power management */,
-                                    1 /* continuous updates */);
+       ret = write_vc_local_phy(dd,
+                                0 /* no power management */,
+                                1 /* continuous updates */);
         if (ret != HCMD_SUCCESS)
                 goto set_local_link_attributes_fail;
   
@@@ -8756,7 -9034,8 +9031,8 @@@
                 goto set_local_link_attributes_fail;
   
         ret = write_vc_local_link_width(dd, 0, 0,
-                    opa_to_vc_link_widths(ppd->link_width_enabled));
+                                       opa_to_vc_link_widths(
+                                               ppd->link_width_enabled));
         if (ret != HCMD_SUCCESS)
                 goto set_local_link_attributes_fail;
   
@@@ -8767,8 -9046,8 +9043,8 @@@
   
   set_local_link_attributes_fail:
         dd_dev_err(dd,
-               "Failed to set local link attributes, return 0x%x\n",
-               ret);
+                  "Failed to set local link attributes, return 0x%x\n",
+                  ret);
         return ret;
   }
   
@@@ -8781,54 -9060,101 +9057,101 @@@ int start_link(struct hfi1_pportdata *p
   {
         if (!ppd->link_enabled) {
                 dd_dev_info(ppd->dd,
-                       "%s: stopping link start because link is disabled\n",
-                       __func__);
+                           "%s: stopping link start because link is disabled\n",
+                           __func__);
                 return 0;
         }
         if (!ppd->driver_link_ready) {
                 dd_dev_info(ppd->dd,
-                       "%s: stopping link start because driver is not ready\n",
-                       __func__);
+                           "%s: stopping link start because driver is not ready\n",
+                           __func__);
                 return 0;
         }
   
         if (qsfp_mod_present(ppd) || loopback == LOOPBACK_SERDES ||
-                       loopback == LOOPBACK_LCB ||
-                       ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR)
+           loopback == LOOPBACK_LCB ||
+           ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR)
                 return set_link_state(ppd, HLS_DN_POLL);
   
         dd_dev_info(ppd->dd,
-               "%s: stopping link start because no cable is present\n",
-               __func__);
+                   "%s: stopping link start because no cable is present\n",
+                   __func__);
         return -EAGAIN;
   }
   
- static void reset_qsfp(struct hfi1_pportdata *ppd)
+ static void wait_for_qsfp_init(struct hfi1_pportdata *ppd)
+ {
+       struct hfi1_devdata *dd = ppd->dd;
+       u64 mask;
+       unsigned long timeout;
+ 
+       /*
+        * Check for QSFP interrupt for t_init (SFF 8679)
+        */
+       timeout = jiffies + msecs_to_jiffies(2000);
+       while (1) {
+               mask = read_csr(dd, dd->hfi1_id ?
+                               ASIC_QSFP2_IN : ASIC_QSFP1_IN);
+               if (!(mask & QSFP_HFI0_INT_N)) {
+                       write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_CLEAR :
+                                 ASIC_QSFP1_CLEAR, QSFP_HFI0_INT_N);
+                       break;
+               }
+               if (time_after(jiffies, timeout)) {
+                       dd_dev_info(dd, "%s: No IntN detected, reset complete\n",
+                                   __func__);
+                       break;
+               }
+               udelay(2);
+       }
+ }
+ 
+ static void set_qsfp_int_n(struct hfi1_pportdata *ppd, u8 enable)
+ {
+       struct hfi1_devdata *dd = ppd->dd;
+       u64 mask;
+ 
+       mask = read_csr(dd, dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK);
+       if (enable)
+               mask |= (u64)QSFP_HFI0_INT_N;
+       else
+               mask &= ~(u64)QSFP_HFI0_INT_N;
+       write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK, mask);
+ }
+ 
+ void reset_qsfp(struct hfi1_pportdata *ppd)
   {
         struct hfi1_devdata *dd = ppd->dd;
         u64 mask, qsfp_mask;
   
+       /* Disable INT_N from triggering QSFP interrupts */
+       set_qsfp_int_n(ppd, 0);
+ 
+       /* Reset the QSFP */
         mask = (u64)QSFP_HFI0_RESET_N;
-       qsfp_mask = read_csr(dd,
-               dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE);
+       qsfp_mask = read_csr(dd, dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE);
         qsfp_mask |= mask;
-       write_csr(dd,
-               dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE,
-               qsfp_mask);
+       write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE, qsfp_mask);
   
         qsfp_mask = read_csr(dd,
-               dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT);
+                            dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT);
         qsfp_mask &= ~mask;
         write_csr(dd,
-               dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT,
-               qsfp_mask);
+                 dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT, qsfp_mask);
   
         udelay(10);
   
         qsfp_mask |= mask;
         write_csr(dd,
-               dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT,
-               qsfp_mask);
+                 dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT, qsfp_mask);
+ 
+       wait_for_qsfp_init(ppd);
+ 
+       /*
+        * Allow INT_N to trigger the QSFP interrupt to watch
+        * for alarms and warnings
+        */
+       set_qsfp_int_n(ppd, 1);
   }
   
   static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd,
@@@ -8837,102 -9163,86 +9160,86 @@@
         struct hfi1_devdata *dd = ppd->dd;
   
         if ((qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_ALARM) ||
-               (qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_WARNING))
-               dd_dev_info(dd,
-                       "%s: QSFP cable on fire\n",
-                       __func__);
+           (qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_WARNING))
+               dd_dev_info(dd, "%s: QSFP cable on fire\n",
+                           __func__);
   
         if ((qsfp_interrupt_status[0] & QSFP_LOW_TEMP_ALARM) ||
-               (qsfp_interrupt_status[0] & QSFP_LOW_TEMP_WARNING))
-               dd_dev_info(dd,
-                       "%s: QSFP cable temperature too low\n",
-                       __func__);
+           (qsfp_interrupt_status[0] & QSFP_LOW_TEMP_WARNING))
+               dd_dev_info(dd, "%s: QSFP cable temperature too low\n",
+                           __func__);
   
         if ((qsfp_interrupt_status[1] & QSFP_HIGH_VCC_ALARM) ||
-               (qsfp_interrupt_status[1] & QSFP_HIGH_VCC_WARNING))
-               dd_dev_info(dd,
-                       "%s: QSFP supply voltage too high\n",
-                       __func__);
+           (qsfp_interrupt_status[1] & QSFP_HIGH_VCC_WARNING))
+               dd_dev_info(dd, "%s: QSFP supply voltage too high\n",
+                           __func__);
   
         if ((qsfp_interrupt_status[1] & QSFP_LOW_VCC_ALARM) ||
-               (qsfp_interrupt_status[1] & QSFP_LOW_VCC_WARNING))
-               dd_dev_info(dd,
-                       "%s: QSFP supply voltage too low\n",
-                       __func__);
+           (qsfp_interrupt_status[1] & QSFP_LOW_VCC_WARNING))
+               dd_dev_info(dd, "%s: QSFP supply voltage too low\n",
+                           __func__);
   
         /* Byte 2 is vendor specific */
   
         if ((qsfp_interrupt_status[3] & QSFP_HIGH_POWER_ALARM) ||
-               (qsfp_interrupt_status[3] & QSFP_HIGH_POWER_WARNING))
-               dd_dev_info(dd,
-                       "%s: Cable RX channel 1/2 power too high\n",
-                       __func__);
+           (qsfp_interrupt_status[3] & QSFP_HIGH_POWER_WARNING))
+               dd_dev_info(dd, "%s: Cable RX channel 1/2 power too high\n",
+                           __func__);
   
         if ((qsfp_interrupt_status[3] & QSFP_LOW_POWER_ALARM) ||
-               (qsfp_interrupt_status[3] & QSFP_LOW_POWER_WARNING))
-               dd_dev_info(dd,
-                       "%s: Cable RX channel 1/2 power too low\n",
-                       __func__);
+           (qsfp_interrupt_status[3] & QSFP_LOW_POWER_WARNING))
+               dd_dev_info(dd, "%s: Cable RX channel 1/2 power too low\n",
+                           __func__);
   
         if ((qsfp_interrupt_status[4] & QSFP_HIGH_POWER_ALARM) ||
-               (qsfp_interrupt_status[4] & QSFP_HIGH_POWER_WARNING))
-               dd_dev_info(dd,
-                       "%s: Cable RX channel 3/4 power too high\n",
-                       __func__);
+           (qsfp_interrupt_status[4] & QSFP_HIGH_POWER_WARNING))
+               dd_dev_info(dd, "%s: Cable RX channel 3/4 power too high\n",
+                           __func__);
   
         if ((qsfp_interrupt_status[4] & QSFP_LOW_POWER_ALARM) ||
-               (qsfp_interrupt_status[4] & QSFP_LOW_POWER_WARNING))
-               dd_dev_info(dd,
-                       "%s: Cable RX channel 3/4 power too low\n",
-                       __func__);
+           (qsfp_interrupt_status[4] & QSFP_LOW_POWER_WARNING))
+               dd_dev_info(dd, "%s: Cable RX channel 3/4 power too low\n",
+                           __func__);
   
         if ((qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_ALARM) ||
-               (qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_WARNING))
-               dd_dev_info(dd,
-                       "%s: Cable TX channel 1/2 bias too high\n",
-                       __func__);
+           (qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_WARNING))
+               dd_dev_info(dd, "%s: Cable TX channel 1/2 bias too high\n",
+                           __func__);
   
         if ((qsfp_interrupt_status[5] & QSFP_LOW_BIAS_ALARM) ||
-               (qsfp_interrupt_status[5] & QSFP_LOW_BIAS_WARNING))
-               dd_dev_info(dd,
-                       "%s: Cable TX channel 1/2 bias too low\n",
-                       __func__);
+           (qsfp_interrupt_status[5] & QSFP_LOW_BIAS_WARNING))
+               dd_dev_info(dd, "%s: Cable TX channel 1/2 bias too low\n",
+                           __func__);
   
         if ((qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_ALARM) ||
-               (qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_WARNING))
-               dd_dev_info(dd,
-                       "%s: Cable TX channel 3/4 bias too high\n",
-                       __func__);
+           (qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_WARNING))
+               dd_dev_info(dd, "%s: Cable TX channel 3/4 bias too high\n",
+                           __func__);
   
         if ((qsfp_interrupt_status[6] & QSFP_LOW_BIAS_ALARM) ||
-               (qsfp_interrupt_status[6] & QSFP_LOW_BIAS_WARNING))
-               dd_dev_info(dd,
-                       "%s: Cable TX channel 3/4 bias too low\n",
-                       __func__);
+           (qsfp_interrupt_status[6] & QSFP_LOW_BIAS_WARNING))
+               dd_dev_info(dd, "%s: Cable TX channel 3/4 bias too low\n",
+                           __func__);
   
         if ((qsfp_interrupt_status[7] & QSFP_HIGH_POWER_ALARM) ||
-               (qsfp_interrupt_status[7] & QSFP_HIGH_POWER_WARNING))
-               dd_dev_info(dd,
-                       "%s: Cable TX channel 1/2 power too high\n",
-                       __func__);
+           (qsfp_interrupt_status[7] & QSFP_HIGH_POWER_WARNING))
+               dd_dev_info(dd, "%s: Cable TX channel 1/2 power too high\n",
+                           __func__);
   
         if ((qsfp_interrupt_status[7] & QSFP_LOW_POWER_ALARM) ||
-               (qsfp_interrupt_status[7] & QSFP_LOW_POWER_WARNING))
-               dd_dev_info(dd,
-                       "%s: Cable TX channel 1/2 power too low\n",
-                       __func__);
+           (qsfp_interrupt_status[7] & QSFP_LOW_POWER_WARNING))
+               dd_dev_info(dd, "%s: Cable TX channel 1/2 power too low\n",
+                           __func__);
   
         if ((qsfp_interrupt_status[8] & QSFP_HIGH_POWER_ALARM) ||
-               (qsfp_interrupt_status[8] & QSFP_HIGH_POWER_WARNING))
-               dd_dev_info(dd,
-                       "%s: Cable TX channel 3/4 power too high\n",
-                       __func__);
+           (qsfp_interrupt_status[8] & QSFP_HIGH_POWER_WARNING))
+               dd_dev_info(dd, "%s: Cable TX channel 3/4 power too high\n",
+                           __func__);
   
         if ((qsfp_interrupt_status[8] & QSFP_LOW_POWER_ALARM) ||
-               (qsfp_interrupt_status[8] & QSFP_LOW_POWER_WARNING))
-               dd_dev_info(dd,
-                       "%s: Cable TX channel 3/4 power too low\n",
-                       __func__);
+           (qsfp_interrupt_status[8] & QSFP_LOW_POWER_WARNING))
+               dd_dev_info(dd, "%s: Cable TX channel 3/4 power too low\n",
+                           __func__);
   
         /* Bytes 9-10 and 11-12 are reserved */
         /* Bytes 13-15 are vendor specific */
@@@ -8940,35 -9250,8 +9247,8 @@@
         return 0;
   }
   
- static int do_pre_lni_host_behaviors(struct hfi1_pportdata *ppd)
- {
-       refresh_qsfp_cache(ppd, &ppd->qsfp_info);
- 
-       return 0;
- }
- 
- static int do_qsfp_intr_fallback(struct hfi1_pportdata *ppd)
- {
-       struct hfi1_devdata *dd = ppd->dd;
-       u8 qsfp_interrupt_status = 0;
- 
-       if (qsfp_read(ppd, dd->hfi1_id, 2, &qsfp_interrupt_status, 1)
-               != 1) {
-               dd_dev_info(dd,
-                       "%s: Failed to read status of QSFP module\n",
-                       __func__);
-               return -EIO;
-       }
- 
-       /* We don't care about alarms & warnings with a non-functional INT_N */
-       if (!(qsfp_interrupt_status & QSFP_DATA_NOT_READY))
-               do_pre_lni_host_behaviors(ppd);
- 
-       return 0;
- }
- 
   /* This routine will only be scheduled if the QSFP module is present */
- static void qsfp_event(struct work_struct *work)
+ void qsfp_event(struct work_struct *work)
   {
         struct qsfp_data *qd;
         struct hfi1_pportdata *ppd;
@@@ -8990,76 -9273,75 +9270,75 @@@
         dc_start(dd);
   
         if (qd->cache_refresh_required) {
-               msleep(3000);
-               reset_qsfp(ppd);
+               set_qsfp_int_n(ppd, 0);
   
-               /* Check for QSFP interrupt after t_init (SFF 8679)
-                * + extra
+               wait_for_qsfp_init(ppd);
+ 
+               /*
+                * Allow INT_N to trigger the QSFP interrupt to watch
+                * for alarms and warnings
                  */
-               msleep(3000);
-               if (!qd->qsfp_interrupt_functional) {
-                       if (do_qsfp_intr_fallback(ppd) < 0)
-                               dd_dev_info(dd, "%s: QSFP fallback failed\n",
-                                       __func__);
-                       ppd->driver_link_ready = 1;
-                       start_link(ppd);
-               }
+               set_qsfp_int_n(ppd, 1);
+ 
+               tune_serdes(ppd);
+ 
+               start_link(ppd);
         }
   
         if (qd->check_interrupt_flags) {
                 u8 qsfp_interrupt_status[16] = {0,};
   
-               if (qsfp_read(ppd, dd->hfi1_id, 6,
-                             &qsfp_interrupt_status[0], 16) != 16) {
+               if (one_qsfp_read(ppd, dd->hfi1_id, 6,
+                                 &qsfp_interrupt_status[0], 16) != 16) {
                         dd_dev_info(dd,
-                               "%s: Failed to read status of QSFP module\n",
-                               __func__);
+                                   "%s: Failed to read status of QSFP module\n",
+                                   __func__);
                 } else {
                         unsigned long flags;
-                       u8 data_status;
   
+                       handle_qsfp_error_conditions(
+                                       ppd, qsfp_interrupt_status);
                         spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
                         ppd->qsfp_info.check_interrupt_flags = 0;
                         spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock,
-                                                               flags);
- 
-                       if (qsfp_read(ppd, dd->hfi1_id, 2, &data_status, 1)
-                                != 1) {
-                               dd_dev_info(dd,
-                               "%s: Failed to read status of QSFP module\n",
-                                       __func__);
-                       }
-                       if (!(data_status & QSFP_DATA_NOT_READY)) {
-                               do_pre_lni_host_behaviors(ppd);
-                               start_link(ppd);
-                       } else
-                               handle_qsfp_error_conditions(ppd,
-                                               qsfp_interrupt_status);
+                                              flags);
                 }
         }
   }
   
- void init_qsfp(struct hfi1_pportdata *ppd)
+ static void init_qsfp_int(struct hfi1_devdata *dd)
   {
-       struct hfi1_devdata *dd = ppd->dd;
-       u64 qsfp_mask;
+       struct hfi1_pportdata *ppd = dd->pport;
+       u64 qsfp_mask, cce_int_mask;
+       const int qsfp1_int_smask = QSFP1_INT % 64;
+       const int qsfp2_int_smask = QSFP2_INT % 64;
   
-       if (loopback == LOOPBACK_SERDES || loopback == LOOPBACK_LCB ||
-                       ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
-               ppd->driver_link_ready = 1;
-               return;
+       /*
+        * disable QSFP1 interrupts for HFI1, QSFP2 interrupts for HFI0
+        * Qsfp1Int and Qsfp2Int are adjacent bits in the same CSR,
+        * therefore just one of QSFP1_INT/QSFP2_INT can be used to find
+        * the index of the appropriate CSR in the CCEIntMask CSR array
+        */
+       cce_int_mask = read_csr(dd, CCE_INT_MASK +
+                               (8 * (QSFP1_INT / 64)));
+       if (dd->hfi1_id) {
+               cce_int_mask &= ~((u64)1 << qsfp1_int_smask);
+               write_csr(dd, CCE_INT_MASK + (8 * (QSFP1_INT / 64)),
+                         cce_int_mask);
+       } else {
+               cce_int_mask &= ~((u64)1 << qsfp2_int_smask);
+               write_csr(dd, CCE_INT_MASK + (8 * (QSFP2_INT / 64)),
+                         cce_int_mask);
         }
   
-       ppd->qsfp_info.ppd = ppd;
-       INIT_WORK(&ppd->qsfp_info.qsfp_work, qsfp_event);
- 
         qsfp_mask = (u64)(QSFP_HFI0_INT_N | QSFP_HFI0_MODPRST_N);
         /* Clear current status to avoid spurious interrupts */
-       write_csr(dd,
-                       dd->hfi1_id ?
-                               ASIC_QSFP2_CLEAR :
-                               ASIC_QSFP1_CLEAR,
-               qsfp_mask);
+       write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_CLEAR : ASIC_QSFP1_CLEAR,
+                 qsfp_mask);
+       write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK,
+                 qsfp_mask);
+ 
+       set_qsfp_int_n(ppd, 0);
   
         /* Handle active low nature of INT_N and MODPRST_N pins */
         if (qsfp_mod_present(ppd))
@@@ -9067,29 -9349,6 +9346,6 @@@
         write_csr(dd,
                   dd->hfi1_id ? ASIC_QSFP2_INVERT : ASIC_QSFP1_INVERT,
                   qsfp_mask);
- 
-       /* Allow only INT_N and MODPRST_N to trigger QSFP interrupts */
-       qsfp_mask |= (u64)QSFP_HFI0_MODPRST_N;
-       write_csr(dd,
-               dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK,
-               qsfp_mask);
- 
-       if (qsfp_mod_present(ppd)) {
-               msleep(3000);
-               reset_qsfp(ppd);
- 
-               /* Check for QSFP interrupt after t_init (SFF 8679)
-                * + extra
-                */
-               msleep(3000);
-               if (!ppd->qsfp_info.qsfp_interrupt_functional) {
-                       if (do_qsfp_intr_fallback(ppd) < 0)
-                               dd_dev_info(dd,
-                                       "%s: QSFP fallback failed\n",
-                                       __func__);
-                       ppd->driver_link_ready = 1;
-               }
-       }
   }
   
   /*
@@@ -9097,6 -9356,10 +9353,10 @@@
    */
   static void init_lcb(struct hfi1_devdata *dd)
   {
+       /* simulator does not correctly handle LCB cclk loopback, skip */
+       if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR)
+               return;
+ 
         /* the DC has been reset earlier in the driver load */
   
         /* set LCB for cclk loopback on the port */
@@@ -9125,8 -9388,6 +9385,6 @@@ int bringup_serdes(struct hfi1_pportdat
                 ppd->guid = guid;
         }
   
-       /* the link defaults to enabled */
-       ppd->link_enabled = 1;
         /* Set linkinit_reason on power up per OPA spec */
         ppd->linkinit_reason = OPA_LINKINIT_REASON_LINKUP;
   
@@@ -9139,6 -9400,12 +9397,12 @@@
                         return ret;
         }
   
+       /* tune the SERDES to a ballpark setting for
+        * optimal signal and bit error rate
+        * Needs to be done before starting the link
+        */
+       tune_serdes(ppd);
+ 
         return start_link(ppd);
   }
   
@@@ -9156,8 -9423,10 +9420,10 @@@ void hfi1_quiet_serdes(struct hfi1_ppor
         ppd->driver_link_ready = 0;
         ppd->link_enabled = 0;
   
+       ppd->offline_disabled_reason =
+                       HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED);
         set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SMA_DISABLED, 0,
-         OPA_LINKDOWN_REASON_SMA_DISABLED);
+                            OPA_LINKDOWN_REASON_SMA_DISABLED);
         set_link_state(ppd, HLS_DN_OFFLINE);
   
         /* disable the port */
@@@ -9171,14 -9440,14 +9437,14 @@@ static inline int init_cpu_counters(str
   
         ppd = (struct hfi1_pportdata *)(dd + 1);
         for (i = 0; i < dd->num_pports; i++, ppd++) {
-               ppd->ibport_data.rc_acks = NULL;
-               ppd->ibport_data.rc_qacks = NULL;
-               ppd->ibport_data.rc_acks = alloc_percpu(u64);
-               ppd->ibport_data.rc_qacks = alloc_percpu(u64);
-               ppd->ibport_data.rc_delayed_comp = alloc_percpu(u64);
-               if ((ppd->ibport_data.rc_acks == NULL) ||
-                   (ppd->ibport_data.rc_delayed_comp == NULL) ||
-                   (ppd->ibport_data.rc_qacks == NULL))
+               ppd->ibport_data.rvp.rc_acks = NULL;
+               ppd->ibport_data.rvp.rc_qacks = NULL;
+               ppd->ibport_data.rvp.rc_acks = alloc_percpu(u64);
+               ppd->ibport_data.rvp.rc_qacks = alloc_percpu(u64);
+               ppd->ibport_data.rvp.rc_delayed_comp = alloc_percpu(u64);
+               if (!ppd->ibport_data.rvp.rc_acks ||
+                   !ppd->ibport_data.rvp.rc_delayed_comp ||
+                   !ppd->ibport_data.rvp.rc_qacks)
                         return -ENOMEM;
         }
   
@@@ -9213,8 -9482,8 +9479,8 @@@ void hfi1_put_tid(struct hfi1_devdata *
                 pa = 0;
         } else if (type > PT_INVALID) {
                 dd_dev_err(dd,
-                       "unexpected receive array type %u for index %u, not handled\n",
-                       type, index);
+                          "unexpected receive array type %u for index %u, not handled\n",
+                          type, index);
                 goto done;
         }
   
@@@ -9429,12 -9698,15 +9695,15 @@@ static void set_send_length(struct hfi1
         /* all kernel receive contexts have the same hdrqentsize */
         for (i = 0; i < ppd->vls_supported; i++) {
                 sc_set_cr_threshold(dd->vld[i].sc,
-                       sc_mtu_to_threshold(dd->vld[i].sc, dd->vld[i].mtu,
-                               dd->rcd[0]->rcvhdrqentsize));
+                                   sc_mtu_to_threshold(dd->vld[i].sc,
+                                                       dd->vld[i].mtu,
+                                                       dd->rcd[0]->
+                                                       rcvhdrqentsize));
         }
         sc_set_cr_threshold(dd->vld[15].sc,
-               sc_mtu_to_threshold(dd->vld[15].sc, dd->vld[15].mtu,
-                       dd->rcd[0]->rcvhdrqentsize));
+                           sc_mtu_to_threshold(dd->vld[15].sc,
+                                               dd->vld[15].mtu,
+                                               dd->rcd[0]->rcvhdrqentsize));
   
         /* Adjust maximum MTU for the port in DC */
         dcmtu = maxvlmtu == 10240 ? DCC_CFG_PORT_MTU_CAP_10240 :
@@@ -9460,7 -9732,7 +9729,7 @@@ static void set_lidlmc(struct hfi1_ppor
         c1 &= ~(DCC_CFG_PORT_CONFIG1_TARGET_DLID_SMASK
                 | DCC_CFG_PORT_CONFIG1_DLID_MASK_SMASK);
         c1 |= ((ppd->lid & DCC_CFG_PORT_CONFIG1_TARGET_DLID_MASK)
-                       << DCC_CFG_PORT_CONFIG1_TARGET_DLID_SHIFT)|
+                       << DCC_CFG_PORT_CONFIG1_TARGET_DLID_SHIFT) |
               ((mask & DCC_CFG_PORT_CONFIG1_DLID_MASK_MASK)
                         << DCC_CFG_PORT_CONFIG1_DLID_MASK_SHIFT);
         write_csr(ppd->dd, DCC_CFG_PORT_CONFIG1, c1);
@@@ -9495,8 -9767,8 +9764,8 @@@ static int wait_phy_linkstate(struct hf
                         break;
                 if (time_after(jiffies, timeout)) {
                         dd_dev_err(dd,
-                               "timeout waiting for phy link state 0x%x, current state is 0x%x\n",
-                               state, curr_state);
+                                  "timeout waiting for phy link state 0x%x, current state is 0x%x\n",
+                                  state, curr_state);
                         return -ETIMEDOUT;
                 }
                 usleep_range(1950, 2050); /* sleep 2ms-ish */
@@@ -9539,17 -9811,18 +9808,18 @@@ static int goto_offline(struct hfi1_ppo
   
         if (do_transition) {
                 ret = set_physical_link_state(dd,
-                       PLS_OFFLINE | (rem_reason << 8));
+                                             (rem_reason << 8) | PLS_OFFLINE);
   
                 if (ret != HCMD_SUCCESS) {
                         dd_dev_err(dd,
-                               "Failed to transition to Offline link state, return %d\n",
-                               ret);
+                                  "Failed to transition to Offline link state, return %d\n",
+                                  ret);
                         return -EINVAL;
                 }
-               if (ppd->offline_disabled_reason == OPA_LINKDOWN_REASON_NONE)
+               if (ppd->offline_disabled_reason ==
+                               HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE))
                         ppd->offline_disabled_reason =
-                       OPA_LINKDOWN_REASON_TRANSIENT;
+                       HFI1_ODR_MASK(OPA_LINKDOWN_REASON_TRANSIENT);
         }
   
         if (do_wait) {
@@@ -9570,6 -9843,22 +9840,22 @@@
         write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */
         ppd->host_link_state = HLS_LINK_COOLDOWN; /* LCB access allowed */
   
+       if (ppd->port_type == PORT_TYPE_QSFP &&
+           ppd->qsfp_info.limiting_active &&
+           qsfp_mod_present(ppd)) {
+               int ret;
+ 
+               ret = acquire_chip_resource(dd, qsfp_resource(dd), QSFP_WAIT);
+               if (ret == 0) {
+                       set_qsfp_tx(ppd, 0);
+                       release_chip_resource(dd, qsfp_resource(dd));
+               } else {
+                       /* not fatal, but should warn */
+                       dd_dev_err(dd,
+                                  "Unable to acquire lock to turn off QSFP TX\n");
+               }
+       }
+ 
         /*
          * The LNI has a mandatory wait time after the physical state
          * moves to Offline.Quiet.  The wait time may be different
@@@ -9582,7 -9871,7 +9868,7 @@@
         ret = wait_fm_ready(dd, 7000);
         if (ret) {
                 dd_dev_err(dd,
-                       "After going offline, timed out waiting for the 8051 to become ready to accept host requests\n");
+                          "After going offline, timed out waiting for the 8051 to become ready to accept host requests\n");
                 /* state is really offline, so make it so */
                 ppd->host_link_state = HLS_DN_OFFLINE;
                 return ret;
@@@ -9605,8 -9894,8 +9891,8 @@@
                 read_last_local_state(dd, &last_local_state);
                 read_last_remote_state(dd, &last_remote_state);
                 dd_dev_err(dd,
-                       "LNI failure last states: local 0x%08x, remote 0x%08x\n",
-                       last_local_state, last_remote_state);
+                          "LNI failure last states: local 0x%08x, remote 0x%08x\n",
+                          last_local_state, last_remote_state);
         }
   
         /* the active link width (downgrade) is 0 on link down */
@@@ -9754,14 -10043,14 +10040,14 @@@ int set_link_state(struct hfi1_pportdat
                 state = dd->link_default;
   
         /* interpret poll -> poll as a link bounce */
-       poll_bounce = ppd->host_link_state == HLS_DN_POLL
-                               && state == HLS_DN_POLL;
+       poll_bounce = ppd->host_link_state == HLS_DN_POLL &&
+                     state == HLS_DN_POLL;
   
         dd_dev_info(dd, "%s: current %s, new %s %s%s\n", __func__,
-               link_state_name(ppd->host_link_state),
-               link_state_name(orig_new_state),
-               poll_bounce ? "(bounce) " : "",
-               link_state_reason_name(ppd, state));
+                   link_state_name(ppd->host_link_state),
+                   link_state_name(orig_new_state),
+                   poll_bounce ? "(bounce) " : "",
+                   link_state_reason_name(ppd, state));
   
         was_up = !!(ppd->host_link_state & HLS_UP);
   
@@@ -9782,8 -10071,8 +10068,8 @@@
   
         switch (state) {
         case HLS_UP_INIT:
-               if (ppd->host_link_state == HLS_DN_POLL && (quick_linkup
-                           || dd->icode == ICODE_FUNCTIONAL_SIMULATOR)) {
+               if (ppd->host_link_state == HLS_DN_POLL &&
+                   (quick_linkup || dd->icode == ICODE_FUNCTIONAL_SIMULATOR)) {
                         /*
                          * Quick link up jumps from polling to here.
                          *
@@@ -9791,7 -10080,7 +10077,7 @@@
                          * simulator jumps from polling to link up.
                          * Accept that here.
                          */
-                       /* OK */;
+                       /* OK */
                 } else if (ppd->host_link_state != HLS_GOING_UP) {
                         goto unexpected;
                 }
@@@ -9802,8 -10091,8 +10088,8 @@@
                         /* logical state didn't change, stay at going_up */
                         ppd->host_link_state = HLS_GOING_UP;
                         dd_dev_err(dd,
-                               "%s: logical state did not change to INIT\n",
-                               __func__);
+                                  "%s: logical state did not change to INIT\n",
+                                  __func__);
                 } else {
                         /* clear old transient LINKINIT_REASON code */
                         if (ppd->linkinit_reason >= OPA_LINKINIT_REASON_CLEAR)
@@@ -9827,8 -10116,8 +10113,8 @@@
                         /* logical state didn't change, stay at init */
                         ppd->host_link_state = HLS_UP_INIT;
                         dd_dev_err(dd,
-                               "%s: logical state did not change to ARMED\n",
-                               __func__);
+                                  "%s: logical state did not change to ARMED\n",
+                                  __func__);
                 }
                 /*
                  * The simulator does not currently implement SMA messages,
@@@ -9849,15 -10138,14 +10135,14 @@@
                         /* logical state didn't change, stay at armed */
                         ppd->host_link_state = HLS_UP_ARMED;
                         dd_dev_err(dd,
-                               "%s: logical state did not change to ACTIVE\n",
-                               __func__);
+                                  "%s: logical state did not change to ACTIVE\n",
+                                  __func__);
                 } else {
- 
                         /* tell all engines to go running */
                         sdma_all_running(dd);
   
                         /* Signal the IB layer that the port has went active */
-                       event.device = &dd->verbs_dev.ibdev;
+                       event.device = &dd->verbs_dev.rdi.ibdev;
                         event.element.port_num = ppd->port;
                         event.event = IB_EVENT_PORT_ACTIVE;
                 }
@@@ -9884,6 -10172,7 +10169,7 @@@
                                 ppd->link_enabled = 1;
                 }
   
+               set_all_slowpath(ppd->dd);
                 ret = set_local_link_attributes(ppd);
                 if (ret)
                         break;
@@@ -9898,12 -10187,13 +10184,13 @@@
                         ret1 = set_physical_link_state(dd, PLS_POLLING);
                         if (ret1 != HCMD_SUCCESS) {
                                 dd_dev_err(dd,
-                                       "Failed to transition to Polling link state, return 0x%x\n",
-                                       ret1);
+                                          "Failed to transition to Polling link state, return 0x%x\n",
+                                          ret1);
                                 ret = -EINVAL;
                         }
                 }
-               ppd->offline_disabled_reason = OPA_LINKDOWN_REASON_NONE;
+               ppd->offline_disabled_reason =
+                       HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE);
                 /*
                  * If an error occurred above, go back to offline.  The
                  * caller may reschedule another attempt.
@@@ -9928,8 -10218,8 +10215,8 @@@
                 ret1 = set_physical_link_state(dd, PLS_DISABLED);
                 if (ret1 != HCMD_SUCCESS) {
                         dd_dev_err(dd,
-                               "Failed to transition to Disabled link state, return 0x%x\n",
-                               ret1);
+                                  "Failed to transition to Disabled link state, return 0x%x\n",
+                                  ret1);
                         ret = -EINVAL;
                         break;
                 }
@@@ -9957,8 -10247,8 +10244,8 @@@
                 ret1 = set_physical_link_state(dd, PLS_LINKUP);
                 if (ret1 != HCMD_SUCCESS) {
                         dd_dev_err(dd,
-                               "Failed to transition to link up state, return 0x%x\n",
-                               ret1);
+                                  "Failed to transition to link up state, return 0x%x\n",
+                                  ret1);
                         ret = -EINVAL;
                         break;
                 }
@@@ -9969,7 -10259,7 +10256,7 @@@
         case HLS_LINK_COOLDOWN:         /* transient within goto_offline() */
         default:
                 dd_dev_info(dd, "%s: state 0x%x: not supported\n",
-                       __func__, state);
+                           __func__, state);
                 ret = -EINVAL;
                 break;
         }
@@@ -9989,8 -10279,8 +10276,8 @@@
   
   unexpected:
         dd_dev_err(dd, "%s: unexpected state transition from %s to %s\n",
-               __func__, link_state_name(ppd->host_link_state),
-               link_state_name(state));
+                  __func__, link_state_name(ppd->host_link_state),
+                  link_state_name(state));
         ret = -EINVAL;
   
   done:
@@@ -10016,7 -10306,7 +10303,7 @@@ int hfi1_set_ib_cfg(struct hfi1_pportda
                  * The VL Arbitrator high limit is sent in units of 4k
                  * bytes, while HFI stores it in units of 64 bytes.
                  */
-               val *= 4096/64;
+               val *= 4096 / 64;
                 reg = ((u64)val & SEND_HIGH_PRIORITY_LIMIT_LIMIT_MASK)
                         << SEND_HIGH_PRIORITY_LIMIT_LIMIT_SHIFT;
                 write_csr(ppd->dd, SEND_HIGH_PRIORITY_LIMIT, reg);
@@@ -10031,12 -10321,6 +10318,6 @@@
                         ppd->vls_operational = val;
                         if (!ppd->port)
                                 ret = -EINVAL;
-                       else
-                               ret = sdma_map_init(
-                                       ppd->dd,
-                                       ppd->port - 1,
-                                       val,
-                                       NULL);
                 }
                 break;
         /*
@@@ -10084,8 -10368,8 +10365,8 @@@
         default:
                 if (HFI1_CAP_IS_KSET(PRINT_UNIMPL))
                         dd_dev_info(ppd->dd,
-                         "%s: which %s, val 0x%x: not implemented\n",
-                         __func__, ib_cfg_name(which), val);
+                                   "%s: which %s, val 0x%x: not implemented\n",
+                                   __func__, ib_cfg_name(which), val);
                 break;
         }
         return ret;
@@@ -10152,6 -10436,7 +10433,7 @@@ static int vl_arb_match_cache(struct vl
   {
         return !memcmp(cache->table, vl, VL_ARB_TABLE_SIZE * sizeof(*vl));
   }
+ 
   /* end functions related to vl arbitration table caching */
   
   static int set_vl_weights(struct hfi1_pportdata *ppd, u32 target,
@@@ -10239,7 -10524,7 +10521,7 @@@ static int get_buffer_control(struct hf
   
         /* OPA and HFI have a 1-1 mapping */
         for (i = 0; i < TXE_NUM_DATA_VL; i++)
-               read_one_cm_vl(dd, SEND_CM_CREDIT_VL + (8*i), &bc->vl[i]);
+               read_one_cm_vl(dd, SEND_CM_CREDIT_VL + (8 * i), &bc->vl[i]);
   
         /* NOTE: assumes that VL* and VL15 CSRs are bit-wise identical */
         read_one_cm_vl(dd, SEND_CM_CREDIT_VL15, &bc->vl[15]);
@@@ -10293,41 -10578,41 +10575,41 @@@ static void get_vlarb_preempt(struct hf
   static void set_sc2vlnt(struct hfi1_devdata *dd, struct sc2vlnt *dp)
   {
         write_csr(dd, DCC_CFG_SC_VL_TABLE_15_0,
-               DC_SC_VL_VAL(15_0,
-               0, dp->vlnt[0] & 0xf,
-               1, dp->vlnt[1] & 0xf,
-               2, dp->vlnt[2] & 0xf,
-               3, dp->vlnt[3] & 0xf,
-               4, dp->vlnt[4] & 0xf,
-               5, dp->vlnt[5] & 0xf,
-               6, dp->vlnt[6] & 0xf,
-               7, dp->vlnt[7] & 0xf,
-               8, dp->vlnt[8] & 0xf,
-               9, dp->vlnt[9] & 0xf,
-               10, dp->vlnt[10] & 0xf,
-               11, dp->vlnt[11] & 0xf,
-               12, dp->vlnt[12] & 0xf,
-               13, dp->vlnt[13] & 0xf,
-               14, dp->vlnt[14] & 0xf,
-               15, dp->vlnt[15] & 0xf));
+                 DC_SC_VL_VAL(15_0,
+                              0, dp->vlnt[0] & 0xf,
+                              1, dp->vlnt[1] & 0xf,
+                              2, dp->vlnt[2] & 0xf,
+                              3, dp->vlnt[3] & 0xf,
+                              4, dp->vlnt[4] & 0xf,
+                              5, dp->vlnt[5] & 0xf,
+                              6, dp->vlnt[6] & 0xf,
+                              7, dp->vlnt[7] & 0xf,
+                              8, dp->vlnt[8] & 0xf,
+                              9, dp->vlnt[9] & 0xf,
+                              10, dp->vlnt[10] & 0xf,
+                              11, dp->vlnt[11] & 0xf,
+                              12, dp->vlnt[12] & 0xf,
+                              13, dp->vlnt[13] & 0xf,
+                              14, dp->vlnt[14] & 0xf,
+                              15, dp->vlnt[15] & 0xf));
         write_csr(dd, DCC_CFG_SC_VL_TABLE_31_16,
-               DC_SC_VL_VAL(31_16,
-               16, dp->vlnt[16] & 0xf,
-               17, dp->vlnt[17] & 0xf,
-               18, dp->vlnt[18] & 0xf,
-               19, dp->vlnt[19] & 0xf,
-               20, dp->vlnt[20] & 0xf,
-               21, dp->vlnt[21] & 0xf,
-               22, dp->vlnt[22] & 0xf,
-               23, dp->vlnt[23] & 0xf,
-               24, dp->vlnt[24] & 0xf,
-               25, dp->vlnt[25] & 0xf,
-               26, dp->vlnt[26] & 0xf,
-               27, dp->vlnt[27] & 0xf,
-               28, dp->vlnt[28] & 0xf,
-               29, dp->vlnt[29] & 0xf,
-               30, dp->vlnt[30] & 0xf,
-               31, dp->vlnt[31] & 0xf));
+                 DC_SC_VL_VAL(31_16,
+                              16, dp->vlnt[16] & 0xf,
+                              17, dp->vlnt[17] & 0xf,
+                              18, dp->vlnt[18] & 0xf,
+                              19, dp->vlnt[19] & 0xf,
+                              20, dp->vlnt[20] & 0xf,
+                              21, dp->vlnt[21] & 0xf,
+                              22, dp->vlnt[22] & 0xf,
+                              23, dp->vlnt[23] & 0xf,
+                              24, dp->vlnt[24] & 0xf,
+                              25, dp->vlnt[25] & 0xf,
+                              26, dp->vlnt[26] & 0xf,
+                              27, dp->vlnt[27] & 0xf,
+                              28, dp->vlnt[28] & 0xf,
+                              29, dp->vlnt[29] & 0xf,
+                              30, dp->vlnt[30] & 0xf,
+                              31, dp->vlnt[31] & 0xf));
   }
   
   static void nonzero_msg(struct hfi1_devdata *dd, int idx, const char *what,
@@@ -10335,7 -10620,7 +10617,7 @@@
   {
         if (limit != 0)
                 dd_dev_info(dd, "Invalid %s limit %d on VL %d, ignoring\n",
-                       what, (int)limit, idx);
+                           what, (int)limit, idx);
   }
   
   /* change only the shared limit portion of SendCmGLobalCredit */
@@@ -10413,14 -10698,14 +10695,14 @@@ static void wait_for_vl_status_clear(st
         }
   
         dd_dev_err(dd,
-               "%s credit change status not clearing after %dms, mask 0x%llx, not clear 0x%llx\n",
-               which, VL_STATUS_CLEAR_TIMEOUT, mask, reg);
+                  "%s credit change status not clearing after %dms, mask 0x%llx, not clear 0x%llx\n",
+                  which, VL_STATUS_CLEAR_TIMEOUT, mask, reg);
         /*
          * If this occurs, it is likely there was a credit loss on the link.
          * The only recovery from that is a link bounce.
          */
         dd_dev_err(dd,
-               "Continuing anyway.  A credit loss may occur.  Suggest a link bounce\n");
+                  "Continuing anyway.  A credit loss may occur.  Suggest a link bounce\n");
   }
   
   /*
@@@ -10447,13 -10732,15 +10729,15 @@@
    * raise = if the new limit is higher than the current value (may be changed
    *    earlier in the algorithm), set the new limit to the new value
    */
- static int set_buffer_control(struct hfi1_devdata *dd,
-                             struct buffer_control *new_bc)
+ int set_buffer_control(struct hfi1_pportdata *ppd,
+                      struct buffer_control *new_bc)
   {
+       struct hfi1_devdata *dd = ppd->dd;
         u64 changing_mask, ld_mask, stat_mask;
         int change_count;
         int i, use_all_mask;
         int this_shared_changing;
+       int vl_count = 0, ret;
         /*
          * A0: add the variable any_shared_limit_changing below and in the
          * algorithm above.  If removing A0 support, it can be removed.
@@@ -10478,7 -10765,6 +10762,6 @@@
   #define valid_vl(idx) ((idx) < TXE_NUM_DATA_VL || (idx) == 15)
   #define NUM_USABLE_VLS 16     /* look at VL15 and less */
   
- 
         /* find the new total credits, do sanity check on unused VLs */
         for (i = 0; i < OPA_MAX_VLS; i++) {
                 if (valid_vl(i)) {
@@@ -10486,9 -10772,9 +10769,9 @@@
                         continue;
                 }
                 nonzero_msg(dd, i, "dedicated",
-                       be16_to_cpu(new_bc->vl[i].dedicated));
+                           be16_to_cpu(new_bc->vl[i].dedicated));
                 nonzero_msg(dd, i, "shared",
-                       be16_to_cpu(new_bc->vl[i].shared));
+                           be16_to_cpu(new_bc->vl[i].shared));
                 new_bc->vl[i].dedicated = 0;
                 new_bc->vl[i].shared = 0;
         }
@@@ -10502,8 -10788,10 +10785,10 @@@
          */
         memset(changing, 0, sizeof(changing));
         memset(lowering_dedicated, 0, sizeof(lowering_dedicated));
-       /* NOTE: Assumes that the individual VL bits are adjacent and in
-          increasing order */
+       /*
+        * NOTE: Assumes that the individual VL bits are adjacent and in
+        * increasing order
+        */
         stat_mask =
                 SEND_CM_CREDIT_USED_STATUS_VL0_RETURN_CREDIT_STATUS_SMASK;
         changing_mask = 0;
@@@ -10517,8 -10805,8 +10802,8 @@@
                                                 != cur_bc.vl[i].shared;
                 if (this_shared_changing)
                         any_shared_limit_changing = 1;
-               if (new_bc->vl[i].dedicated != cur_bc.vl[i].dedicated
-                               || this_shared_changing) {
+               if (new_bc->vl[i].dedicated != cur_bc.vl[i].dedicated ||
+                   this_shared_changing) {
                         changing[i] = 1;
                         changing_mask |= stat_mask;
                         change_count++;
@@@ -10557,7 -10845,7 +10842,7 @@@
         }
   
         wait_for_vl_status_clear(dd, use_all_mask ? all_mask : changing_mask,
-               "shared");
+                                "shared");
   
         if (change_count > 0) {
                 for (i = 0; i < NUM_USABLE_VLS; i++) {
@@@ -10566,7 -10854,8 +10851,8 @@@
   
                         if (lowering_dedicated[i]) {
                                 set_vl_dedicated(dd, i,
-                                       be16_to_cpu(new_bc->vl[i].dedicated));
+                                                be16_to_cpu(new_bc->
+                                                            vl[i].dedicated));
                                 cur_bc.vl[i].dedicated =
                                                 new_bc->vl[i].dedicated;
                         }
@@@ -10582,7 -10871,8 +10868,8 @@@
                         if (be16_to_cpu(new_bc->vl[i].dedicated) >
                                         be16_to_cpu(cur_bc.vl[i].dedicated))
                                 set_vl_dedicated(dd, i,
-                                       be16_to_cpu(new_bc->vl[i].dedicated));
+                                                be16_to_cpu(new_bc->
+                                                            vl[i].dedicated));
                 }
         }
   
@@@ -10598,13 -10888,35 +10885,35 @@@
   
         /* finally raise the global shared */
         if (be16_to_cpu(new_bc->overall_shared_limit) >
-                       be16_to_cpu(cur_bc.overall_shared_limit))
+           be16_to_cpu(cur_bc.overall_shared_limit))
                 set_global_shared(dd,
-                       be16_to_cpu(new_bc->overall_shared_limit));
+                                 be16_to_cpu(new_bc->overall_shared_limit));
   
         /* bracket the credit change with a total adjustment */
         if (new_total < cur_total)
                 set_global_limit(dd, new_total);
+ 
+       /*
+        * Determine the actual number of operational VLS using the number of
+        * dedicated and shared credits for each VL.
+        */
+       if (change_count > 0) {
+               for (i = 0; i < TXE_NUM_DATA_VL; i++)
+                       if (be16_to_cpu(new_bc->vl[i].dedicated) > 0 ||
+                           be16_to_cpu(new_bc->vl[i].shared) > 0)
+                               vl_count++;
+               ppd->actual_vls_operational = vl_count;
+               ret = sdma_map_init(dd, ppd->port - 1, vl_count ?
+                                   ppd->actual_vls_operational :
+                                   ppd->vls_operational,
+                                   NULL);
+               if (ret == 0)
+                       ret = pio_map_init(dd, ppd->port - 1, vl_count ?
+                                          ppd->actual_vls_operational :
+                                          ppd->vls_operational, NULL);
+               if (ret)
+                       return ret;
+       }
         return 0;
   }
   
@@@ -10696,7 -11008,7 +11005,7 @@@ int fm_set_table(struct hfi1_pportdata 
                                      VL_ARB_LOW_PRIO_TABLE_SIZE, t);
                 break;
         case FM_TBL_BUFFER_CONTROL:
-               ret = set_buffer_control(ppd->dd, t);
+               ret = set_buffer_control(ppd, t);
                 break;
         case FM_TBL_SC2VLNT:
                 set_sc2vlnt(ppd->dd, t);
@@@ -10846,10 -11158,13 +11155,13 @@@ static void adjust_rcv_timeout(struct h
         }
   
         rcd->rcvavail_timeout = timeout;
-       /* timeout cannot be larger than rcv_intr_timeout_csr which has already
-          been verified to be in range */
+       /*
+        * timeout cannot be larger than rcv_intr_timeout_csr which has already
+        * been verified to be in range
+        */
         write_kctxt_csr(dd, rcd->ctxt, RCV_AVAIL_TIME_OUT,
-               (u64)timeout << RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_SHIFT);
+                       (u64)timeout <<
+                       RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_SHIFT);
   }
   
   void update_usrhead(struct hfi1_ctxtdata *rcd, u32 hd, u32 updegr, u32 egrhd,
@@@ -10915,16 -11230,16 +11227,16 @@@ u32 hdrqempty(struct hfi1_ctxtdata *rcd
   static u32 encoded_size(u32 size)
   {
         switch (size) {
-       case   4*1024: return 0x1;
-       case   8*1024: return 0x2;
-       case  16*1024: return 0x3;
-       case  32*1024: return 0x4;
-       case  64*1024: return 0x5;
-       case 128*1024: return 0x6;
-       case 256*1024: return 0x7;
-       case 512*1024: return 0x8;
-       case   1*1024*1024: return 0x9;
-       case   2*1024*1024: return 0xa;
+       case   4 * 1024: return 0x1;
+       case   8 * 1024: return 0x2;
+       case  16 * 1024: return 0x3;
+       case  32 * 1024: return 0x4;
+       case  64 * 1024: return 0x5;
+       case 128 * 1024: return 0x6;
+       case 256 * 1024: return 0x7;
+       case 512 * 1024: return 0x8;
+       case   1 * 1024 * 1024: return 0x9;
+       case   2 * 1024 * 1024: return 0xa;
         }
         return 0x1;     /* if invalid, go with the minimum size */
   }
@@@ -10943,8 -11258,8 +11255,8 @@@ void hfi1_rcvctrl(struct hfi1_devdata *
   
         rcvctrl = read_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL);
         /* if the context already enabled, don't do the extra steps */
-       if ((op & HFI1_RCVCTRL_CTXT_ENB)
-                       && !(rcvctrl & RCV_CTXT_CTRL_ENABLE_SMASK)) {
+       if ((op & HFI1_RCVCTRL_CTXT_ENB) &&
+           !(rcvctrl & RCV_CTXT_CTRL_ENABLE_SMASK)) {
                 /* reset the tail and hdr addresses, and sequence count */
                 write_kctxt_csr(dd, ctxt, RCV_HDR_ADDR,
                                 rcd->rcvhdrq_phys);
@@@ -11018,6 -11333,7 +11330,7 @@@
                 if (dd->rcvhdrtail_dummy_physaddr) {
                         write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR,
                                         dd->rcvhdrtail_dummy_physaddr);
+                       /* Enabling RcvCtxtCtrl.TailUpd is intentional. */
                         rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK;
                 }
   
@@@ -11029,15 -11345,20 +11342,20 @@@
                 rcvctrl &= ~RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
         if (op & HFI1_RCVCTRL_TAILUPD_ENB && rcd->rcvhdrqtailaddr_phys)
                 rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK;
-       if (op & HFI1_RCVCTRL_TAILUPD_DIS)
-               rcvctrl &= ~RCV_CTXT_CTRL_TAIL_UPD_SMASK;
+       if (op & HFI1_RCVCTRL_TAILUPD_DIS) {
+               /* See comment on RcvCtxtCtrl.TailUpd above */
+               if (!(op & HFI1_RCVCTRL_CTXT_DIS))
+                       rcvctrl &= ~RCV_CTXT_CTRL_TAIL_UPD_SMASK;
+       }
         if (op & HFI1_RCVCTRL_TIDFLOW_ENB)
                 rcvctrl |= RCV_CTXT_CTRL_TID_FLOW_ENABLE_SMASK;
         if (op & HFI1_RCVCTRL_TIDFLOW_DIS)
                 rcvctrl &= ~RCV_CTXT_CTRL_TID_FLOW_ENABLE_SMASK;
         if (op & HFI1_RCVCTRL_ONE_PKT_EGR_ENB) {
-               /* In one-packet-per-eager mode, the size comes from
-                  the RcvArray entry. */
+               /*
+                * In one-packet-per-eager mode, the size comes from
+                * the RcvArray entry.
+                */
                 rcvctrl &= ~RCV_CTXT_CTRL_EGR_BUF_SIZE_SMASK;
                 rcvctrl |= RCV_CTXT_CTRL_ONE_PACKET_PER_EGR_BUFFER_SMASK;
         }
@@@ -11056,19 -11377,19 +11374,19 @@@
         write_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL, rcd->rcvctrl);
   
         /* work around sticky RcvCtxtStatus.BlockedRHQFull */
-       if (did_enable
-           && (rcvctrl & RCV_CTXT_CTRL_DONT_DROP_RHQ_FULL_SMASK)) {
+       if (did_enable &&
+           (rcvctrl & RCV_CTXT_CTRL_DONT_DROP_RHQ_FULL_SMASK)) {
                 reg = read_kctxt_csr(dd, ctxt, RCV_CTXT_STATUS);
                 if (reg != 0) {
                         dd_dev_info(dd, "ctxt %d status %lld (blocked)\n",
-                               ctxt, reg);
+                                   ctxt, reg);
                         read_uctxt_csr(dd, ctxt, RCV_HDR_HEAD);
                         write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, 0x10);
                         write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, 0x00);
                         read_uctxt_csr(dd, ctxt, RCV_HDR_HEAD);
                         reg = read_kctxt_csr(dd, ctxt, RCV_CTXT_STATUS);
                         dd_dev_info(dd, "ctxt %d status %lld (%s blocked)\n",
-                               ctxt, reg, reg == 0 ? "not" : "still");
+                                   ctxt, reg, reg == 0 ? "not" : "still");
                 }
         }
   
@@@ -11079,7 -11400,7 +11397,7 @@@
                  */
                 /* set interrupt timeout */
                 write_kctxt_csr(dd, ctxt, RCV_AVAIL_TIME_OUT,
-                       (u64)rcd->rcvavail_timeout <<
+                               (u64)rcd->rcvavail_timeout <<
                                 RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_SHIFT);
   
                 /* set RcvHdrHead.Counter, zero RcvHdrHead.Head (again) */
@@@ -11097,28 -11418,19 +11415,19 @@@
                                 dd->rcvhdrtail_dummy_physaddr);
   }
   
- u32 hfi1_read_cntrs(struct hfi1_devdata *dd, loff_t pos, char **namep,
-                   u64 **cntrp)
+ u32 hfi1_read_cntrs(struct hfi1_devdata *dd, char **namep, u64 **cntrp)
   {
         int ret;
         u64 val = 0;
   
         if (namep) {
                 ret = dd->cntrnameslen;
-               if (pos != 0) {
-                       dd_dev_err(dd, "read_cntrs does not support indexing");
-                       return 0;
-               }
                 *namep = dd->cntrnames;
         } else {
                 const struct cntr_entry *entry;
                 int i, j;
   
                 ret = (dd->ndevcntrs) * sizeof(u64);
-               if (pos != 0) {
-                       dd_dev_err(dd, "read_cntrs does not support indexing");
-                       return 0;
-               }
   
                 /* Get the start of the block of counters */
                 *cntrp = dd->cntrs;
@@@ -11147,6 -11459,20 +11456,20 @@@
                                                 dd->cntrs[entry->offset + j] =
                                                                             val;
                                         }
+                               } else if (entry->flags & CNTR_SDMA) {
+                                       hfi1_cdbg(CNTR,
+                                                 "\t Per SDMA Engine\n");
+                                       for (j = 0; j < dd->chip_sdma_engines;
+                                            j++) {
+                                               val =
+                                               entry->rw_cntr(entry, dd, j,
+                                                              CNTR_MODE_R, 0);
+                                               hfi1_cdbg(CNTR,
+                                                         "\t\tRead 0x%llx for %d\n",
+                                                         val, j);
+                                               dd->cntrs[entry->offset + j] =
+                                                                       val;
+                                       }
                                 } else {
                                         val = entry->rw_cntr(entry, dd,
                                                         CNTR_INVALID_VL,
@@@ -11163,30 -11489,19 +11486,19 @@@
   /*
    * Used by sysfs to create files for hfi stats to read
    */
- u32 hfi1_read_portcntrs(struct hfi1_devdata *dd, loff_t pos, u32 port,
-                       char **namep, u64 **cntrp)
+ u32 hfi1_read_portcntrs(struct hfi1_pportdata *ppd, char **namep, u64 **cntrp)
   {
         int ret;
         u64 val = 0;
   
         if (namep) {
-               ret = dd->portcntrnameslen;
-               if (pos != 0) {
-                       dd_dev_err(dd, "index not supported");
-                       return 0;
-               }
-               *namep = dd->portcntrnames;
+               ret = ppd->dd->portcntrnameslen;
+               *namep = ppd->dd->portcntrnames;
         } else {
                 const struct cntr_entry *entry;
-               struct hfi1_pportdata *ppd;
                 int i, j;
   
-               ret = (dd->nportcntrs) * sizeof(u64);
-               if (pos != 0) {
-                       dd_dev_err(dd, "indexing not supported");
-                       return 0;
-               }
-               ppd = (struct hfi1_pportdata *)(dd + 1 + port);
+               ret = ppd->dd->nportcntrs * sizeof(u64);
                 *cntrp = ppd->cntrs;
   
                 for (i = 0; i < PORT_CNTR_LAST; i++) {
@@@ -11235,14 -11550,14 +11547,14 @@@ static void free_cntrs(struct hfi1_devd
         for (i = 0; i < dd->num_pports; i++, ppd++) {
                 kfree(ppd->cntrs);
                 kfree(ppd->scntrs);
-               free_percpu(ppd->ibport_data.rc_acks);
-               free_percpu(ppd->ibport_data.rc_qacks);
-               free_percpu(ppd->ibport_data.rc_delayed_comp);
+               free_percpu(ppd->ibport_data.rvp.rc_acks);
+               free_percpu(ppd->ibport_data.rvp.rc_qacks);
+               free_percpu(ppd->ibport_data.rvp.rc_delayed_comp);
                 ppd->cntrs = NULL;
                 ppd->scntrs = NULL;
-               ppd->ibport_data.rc_acks = NULL;
-               ppd->ibport_data.rc_qacks = NULL;
-               ppd->ibport_data.rc_delayed_comp = NULL;
+               ppd->ibport_data.rvp.rc_acks = NULL;
+               ppd->ibport_data.rvp.rc_qacks = NULL;
+               ppd->ibport_data.rvp.rc_delayed_comp = NULL;
         }
         kfree(dd->portcntrnames);
         dd->portcntrnames = NULL;
@@@ -11510,11 -11825,13 +11822,13 @@@ mod_timer(&dd->synth_stats_timer, jiffi
   #define C_MAX_NAME 13 /* 12 chars + one for /0 */
   static int init_cntrs(struct hfi1_devdata *dd)
   {
-       int i, rcv_ctxts, index, j;
+       int i, rcv_ctxts, j;
         size_t sz;
         char *p;
         char name[C_MAX_NAME];
         struct hfi1_pportdata *ppd;
+       const char *bit_type_32 = ",32";
+       const int bit_type_32_sz = strlen(bit_type_32);
   
         /* set up the stats timer; the add_timer is done at the end */
         setup_timer(&dd->synth_stats_timer, update_synth_timer,
@@@ -11527,49 -11844,57 +11841,57 @@@
         /* size names and determine how many we have*/
         dd->ndevcntrs = 0;
         sz = 0;
-       index = 0;
   
         for (i = 0; i < DEV_CNTR_LAST; i++) {
-               hfi1_dbg_early("Init cntr %s\n", dev_cntrs[i].name);
                 if (dev_cntrs[i].flags & CNTR_DISABLED) {
                         hfi1_dbg_early("\tSkipping %s\n", dev_cntrs[i].name);
                         continue;
                 }
   
                 if (dev_cntrs[i].flags & CNTR_VL) {
-                       hfi1_dbg_early("\tProcessing VL cntr\n");
-                       dev_cntrs[i].offset = index;
+                       dev_cntrs[i].offset = dd->ndevcntrs;
                         for (j = 0; j < C_VL_COUNT; j++) {
-                               memset(name, '\0', C_MAX_NAME);
                                 snprintf(name, C_MAX_NAME, "%s%d",
-                                       dev_cntrs[i].name,
-                                       vl_from_idx(j));
+                                        dev_cntrs[i].name, vl_from_idx(j));
+                               sz += strlen(name);
+                               /* Add ",32" for 32-bit counters */
+                               if (dev_cntrs[i].flags & CNTR_32BIT)
+                                       sz += bit_type_32_sz;
+                               sz++;
+                               dd->ndevcntrs++;
+                       }
+               } else if (dev_cntrs[i].flags & CNTR_SDMA) {
+                       dev_cntrs[i].offset = dd->ndevcntrs;
+                       for (j = 0; j < dd->chip_sdma_engines; j++) {
+                               snprintf(name, C_MAX_NAME, "%s%d",
+                                        dev_cntrs[i].name, j);
                                 sz += strlen(name);
+                               /* Add ",32" for 32-bit counters */
+                               if (dev_cntrs[i].flags & CNTR_32BIT)
+                                       sz += bit_type_32_sz;
                                 sz++;
-                               hfi1_dbg_early("\t\t%s\n", name);
                                 dd->ndevcntrs++;
-                               index++;
                         }
                 } else {
-                       /* +1 for newline  */
+                       /* +1 for newline. */
                         sz += strlen(dev_cntrs[i].name) + 1;
+                       /* Add ",32" for 32-bit counters */
+                       if (dev_cntrs[i].flags & CNTR_32BIT)
+                               sz += bit_type_32_sz;
+                       dev_cntrs[i].offset = dd->ndevcntrs;
                         dd->ndevcntrs++;
-                       dev_cntrs[i].offset = index;
-                       index++;
-                       hfi1_dbg_early("\tAdding %s\n", dev_cntrs[i].name);
                 }
         }
   
         /* allocate space for the counter values */
-       dd->cntrs = kcalloc(index, sizeof(u64), GFP_KERNEL);
+       dd->cntrs = kcalloc(dd->ndevcntrs, sizeof(u64), GFP_KERNEL);
         if (!dd->cntrs)
                 goto bail;
   
-       dd->scntrs = kcalloc(index, sizeof(u64), GFP_KERNEL);
+       dd->scntrs = kcalloc(dd->ndevcntrs, sizeof(u64), GFP_KERNEL);
         if (!dd->scntrs)
                 goto bail;
   
- 
         /* allocate space for the counter names */
         dd->cntrnameslen = sz;
         dd->cntrnames = kmalloc(sz, GFP_KERNEL);
@@@ -11577,27 -11902,51 +11899,51 @@@
                 goto bail;
   
         /* fill in the names */
-       for (p = dd->cntrnames, i = 0, index = 0; i < DEV_CNTR_LAST; i++) {
+       for (p = dd->cntrnames, i = 0; i < DEV_CNTR_LAST; i++) {
                 if (dev_cntrs[i].flags & CNTR_DISABLED) {
                         /* Nothing */
-               } else {
-                       if (dev_cntrs[i].flags & CNTR_VL) {
-                               for (j = 0; j < C_VL_COUNT; j++) {
-                                       memset(name, '\0', C_MAX_NAME);
-                                       snprintf(name, C_MAX_NAME, "%s%d",
-                                               dev_cntrs[i].name,
-                                               vl_from_idx(j));
-                                       memcpy(p, name, strlen(name));
-                                       p += strlen(name);
-                                       *p++ = '\n';
+               } else if (dev_cntrs[i].flags & CNTR_VL) {
+                       for (j = 0; j < C_VL_COUNT; j++) {
+                               snprintf(name, C_MAX_NAME, "%s%d",
+                                        dev_cntrs[i].name,
+                                        vl_from_idx(j));
+                               memcpy(p, name, strlen(name));
+                               p += strlen(name);
+ 
+                               /* Counter is 32 bits */
+                               if (dev_cntrs[i].flags & CNTR_32BIT) {
+                                       memcpy(p, bit_type_32, bit_type_32_sz);
+                                       p += bit_type_32_sz;
                                 }
-                       } else {
-                               memcpy(p, dev_cntrs[i].name,
-                                      strlen(dev_cntrs[i].name));
-                               p += strlen(dev_cntrs[i].name);
+ 
+                               *p++ = '\n';
+                       }
+               } else if (dev_cntrs[i].flags & CNTR_SDMA) {
+                       for (j = 0; j < dd->chip_sdma_engines; j++) {
+                               snprintf(name, C_MAX_NAME, "%s%d",
+                                        dev_cntrs[i].name, j);
+                               memcpy(p, name, strlen(name));
+                               p += strlen(name);
+ 
+                               /* Counter is 32 bits */
+                               if (dev_cntrs[i].flags & CNTR_32BIT) {
+                                       memcpy(p, bit_type_32, bit_type_32_sz);
+                                       p += bit_type_32_sz;
+                               }
+ 
                                 *p++ = '\n';
                         }
-                       index++;
+               } else {
+                       memcpy(p, dev_cntrs[i].name, strlen(dev_cntrs[i].name));
+                       p += strlen(dev_cntrs[i].name);
+ 
+                       /* Counter is 32 bits */
+                       if (dev_cntrs[i].flags & CNTR_32BIT) {
+                               memcpy(p, bit_type_32, bit_type_32_sz);
+                               p += bit_type_32_sz;
+                       }
+ 
+                       *p++ = '\n';
                 }
         }
   
@@@ -11620,31 -11969,31 +11966,31 @@@
         sz = 0;
         dd->nportcntrs = 0;
         for (i = 0; i < PORT_CNTR_LAST; i++) {
-               hfi1_dbg_early("Init pcntr %s\n", port_cntrs[i].name);
                 if (port_cntrs[i].flags & CNTR_DISABLED) {
                         hfi1_dbg_early("\tSkipping %s\n", port_cntrs[i].name);
                         continue;
                 }
   
                 if (port_cntrs[i].flags & CNTR_VL) {
-                       hfi1_dbg_early("\tProcessing VL cntr\n");
                         port_cntrs[i].offset = dd->nportcntrs;
                         for (j = 0; j < C_VL_COUNT; j++) {
-                               memset(name, '\0', C_MAX_NAME);
                                 snprintf(name, C_MAX_NAME, "%s%d",
-                                       port_cntrs[i].name,
-                                       vl_from_idx(j));
+                                        port_cntrs[i].name, vl_from_idx(j));
                                 sz += strlen(name);
+                               /* Add ",32" for 32-bit counters */
+                               if (port_cntrs[i].flags & CNTR_32BIT)
+                                       sz += bit_type_32_sz;
                                 sz++;
-                               hfi1_dbg_early("\t\t%s\n", name);
                                 dd->nportcntrs++;
                         }
                 } else {
-                       /* +1 for newline  */
+                       /* +1 for newline */
                         sz += strlen(port_cntrs[i].name) + 1;
+                       /* Add ",32" for 32-bit counters */
+                       if (port_cntrs[i].flags & CNTR_32BIT)
+                               sz += bit_type_32_sz;
                         port_cntrs[i].offset = dd->nportcntrs;
                         dd->nportcntrs++;
-                       hfi1_dbg_early("\tAdding %s\n", port_cntrs[i].name);
                 }
         }
   
@@@ -11661,18 -12010,30 +12007,30 @@@
   
                 if (port_cntrs[i].flags & CNTR_VL) {
                         for (j = 0; j < C_VL_COUNT; j++) {
-                               memset(name, '\0', C_MAX_NAME);
                                 snprintf(name, C_MAX_NAME, "%s%d",
-                                       port_cntrs[i].name,
-                                       vl_from_idx(j));
+                                        port_cntrs[i].name, vl_from_idx(j));
                                 memcpy(p, name, strlen(name));
                                 p += strlen(name);
+ 
+                               /* Counter is 32 bits */
+                               if (port_cntrs[i].flags & CNTR_32BIT) {
+                                       memcpy(p, bit_type_32, bit_type_32_sz);
+                                       p += bit_type_32_sz;
+                               }
+ 
                                 *p++ = '\n';
                         }
                 } else {
                         memcpy(p, port_cntrs[i].name,
                                strlen(port_cntrs[i].name));
                         p += strlen(port_cntrs[i].name);
+ 
+                       /* Counter is 32 bits */
+                       if (port_cntrs[i].flags & CNTR_32BIT) {
+                               memcpy(p, bit_type_32, bit_type_32_sz);
+                               p += bit_type_32_sz;
+                       }
+ 
                         *p++ = '\n';
                 }
         }
@@@ -11700,14 -12061,13 +12058,13 @@@ bail
         return -ENOMEM;
   }
   
- 
   static u32 chip_to_opa_lstate(struct hfi1_devdata *dd, u32 chip_lstate)
   {
         switch (chip_lstate) {
         default:
                 dd_dev_err(dd,
-                        "Unknown logical state 0x%x, reporting IB_PORT_DOWN\n",
-                        chip_lstate);
+                          "Unknown logical state 0x%x, reporting IB_PORT_DOWN\n",
+                          chip_lstate);
                 /* fall through */
         case LSTATE_DOWN:
                 return IB_PORT_DOWN;
@@@ -11726,7 -12086,7 +12083,7 @@@ u32 chip_to_opa_pstate(struct hfi1_devd
         switch (chip_pstate & 0xf0) {
         default:
                 dd_dev_err(dd, "Unexpected chip physical state of 0x%x\n",
-                       chip_pstate);
+                          chip_pstate);
                 /* fall through */
         case PLS_DISABLED:
                 return IB_PORTPHYSSTATE_DISABLED;
@@@ -11792,7 -12152,7 +12149,7 @@@ u32 get_logical_state(struct hfi1_pport
         new_state = chip_to_opa_lstate(ppd->dd, read_logical_state(ppd->dd));
         if (new_state != ppd->lstate) {
                 dd_dev_info(ppd->dd, "logical state changed to %s (0x%x)\n",
-                       opa_lstate_name(new_state), new_state);
+                           opa_lstate_name(new_state), new_state);
                 ppd->lstate = new_state;
         }
         /*
@@@ -11851,18 -12211,17 +12208,17 @@@ static int wait_logical_linkstate(struc
   
   u8 hfi1_ibphys_portstate(struct hfi1_pportdata *ppd)
   {
-       static u32 remembered_state = 0xff;
         u32 pstate;
         u32 ib_pstate;
   
         pstate = read_physical_state(ppd->dd);
         ib_pstate = chip_to_opa_pstate(ppd->dd, pstate);
-       if (remembered_state != ib_pstate) {
+       if (ppd->last_pstate != ib_pstate) {
                 dd_dev_info(ppd->dd,
-                       "%s: physical state changed to %s (0x%x), phy 0x%x\n",
-                       __func__, opa_pstate_name(ib_pstate), ib_pstate,
-                       pstate);
-               remembered_state = ib_pstate;
+                           "%s: physical state changed to %s (0x%x), phy 0x%x\n",
+                           __func__, opa_pstate_name(ib_pstate), ib_pstate,
+                           pstate);
+               ppd->last_pstate = ib_pstate;
         }
         return ib_pstate;
   }
@@@ -11906,7 -12265,7 +12262,7 @@@ u64 hfi1_gpio_mod(struct hfi1_devdata *
   
   int hfi1_init_ctxt(struct send_context *sc)
   {
-       if (sc != NULL) {
+       if (sc) {
                 struct hfi1_devdata *dd = sc->dd;
                 u64 reg;
                 u8 set = (sc->type == SC_USER ?
@@@ -11963,34 -12322,14 +12319,14 @@@ void set_intr_state(struct hfi1_devdat
          * In HFI, the mask needs to be 1 to allow interrupts.
          */
         if (enable) {
-               u64 cce_int_mask;
-               const int qsfp1_int_smask = QSFP1_INT % 64;
-               const int qsfp2_int_smask = QSFP2_INT % 64;
- 
                 /* enable all interrupts */
                 for (i = 0; i < CCE_NUM_INT_CSRS; i++)
-                       write_csr(dd, CCE_INT_MASK + (8*i), ~(u64)0);
+                       write_csr(dd, CCE_INT_MASK + (8 * i), ~(u64)0);
   
-               /*
-                * disable QSFP1 interrupts for HFI1, QSFP2 interrupts for HFI0
-                * Qsfp1Int and Qsfp2Int are adjacent bits in the same CSR,
-                * therefore just one of QSFP1_INT/QSFP2_INT can be used to find
-                * the index of the appropriate CSR in the CCEIntMask CSR array
-                */
-               cce_int_mask = read_csr(dd, CCE_INT_MASK +
-                                               (8*(QSFP1_INT/64)));
-               if (dd->hfi1_id) {
-                       cce_int_mask &= ~((u64)1 << qsfp1_int_smask);
-                       write_csr(dd, CCE_INT_MASK + (8*(QSFP1_INT/64)),
-                                       cce_int_mask);
-               } else {
-                       cce_int_mask &= ~((u64)1 << qsfp2_int_smask);
-                       write_csr(dd, CCE_INT_MASK + (8*(QSFP2_INT/64)),
-                                       cce_int_mask);
-               }
+               init_qsfp_int(dd);
         } else {
                 for (i = 0; i < CCE_NUM_INT_CSRS; i++)
-                       write_csr(dd, CCE_INT_MASK + (8*i), 0ull);
+                       write_csr(dd, CCE_INT_MASK + (8 * i), 0ull);
         }
   }
   
@@@ -12002,7 -12341,7 +12338,7 @@@ static void clear_all_interrupts(struc
         int i;
   
         for (i = 0; i < CCE_NUM_INT_CSRS; i++)
-               write_csr(dd, CCE_INT_CLEAR + (8*i), ~(u64)0);
+               write_csr(dd, CCE_INT_CLEAR + (8 * i), ~(u64)0);
   
         write_csr(dd, CCE_ERR_CLEAR, ~(u64)0);
         write_csr(dd, MISC_ERR_CLEAR, ~(u64)0);
@@@ -12037,10 -12376,9 +12373,9 @@@ static void clean_up_interrupts(struct 
                 struct hfi1_msix_entry *me = dd->msix_entries;
   
                 for (i = 0; i < dd->num_msix_entries; i++, me++) {
-                       if (me->arg == NULL) /* => no irq, no affinity */
-                               break;
-                       irq_set_affinity_hint(dd->msix_entries[i].msix.vector,
-                                       NULL);
+                       if (!me->arg) /* => no irq, no affinity */
+                               continue;
+                       hfi1_put_irq_affinity(dd, &dd->msix_entries[i]);
                         free_irq(me->msix.vector, me->arg);
                 }
         } else {
@@@ -12061,8 -12399,6 +12396,6 @@@
         }
   
         /* clean structures */
-       for (i = 0; i < dd->num_msix_entries; i++)
-               free_cpumask_var(dd->msix_entries[i].mask);
         kfree(dd->msix_entries);
         dd->msix_entries = NULL;
         dd->num_msix_entries = 0;
@@@ -12085,10 -12421,10 +12418,10 @@@ static void remap_intr(struct hfi1_devd
         /* direct the chip source to the given MSI-X interrupt */
         m = isrc / 8;
         n = isrc % 8;
-       reg = read_csr(dd, CCE_INT_MAP + (8*m));
-       reg &= ~((u64)0xff << (8*n));
-       reg |= ((u64)msix_intr & 0xff) << (8*n);
-       write_csr(dd, CCE_INT_MAP + (8*m), reg);
+       reg = read_csr(dd, CCE_INT_MAP + (8 * m));
+       reg &= ~((u64)0xff << (8 * n));
+       reg |= ((u64)msix_intr & 0xff) << (8 * n);
+       write_csr(dd, CCE_INT_MAP + (8 * m), reg);
   }
   
   static void remap_sdma_interrupts(struct hfi1_devdata *dd,
@@@ -12101,12 -12437,12 +12434,12 @@@
          *      SDMAProgress
          *      SDMAIdle
          */
-       remap_intr(dd, IS_SDMA_START + 0*TXE_NUM_SDMA_ENGINES + engine,
-               msix_intr);
-       remap_intr(dd, IS_SDMA_START + 1*TXE_NUM_SDMA_ENGINES + engine,
-               msix_intr);
-       remap_intr(dd, IS_SDMA_START + 2*TXE_NUM_SDMA_ENGINES + engine,
-               msix_intr);
+       remap_intr(dd, IS_SDMA_START + 0 * TXE_NUM_SDMA_ENGINES + engine,
+                  msix_intr);
+       remap_intr(dd, IS_SDMA_START + 1 * TXE_NUM_SDMA_ENGINES + engine,
+                  msix_intr);
+       remap_intr(dd, IS_SDMA_START + 2 * TXE_NUM_SDMA_ENGINES + engine,
+                  msix_intr);
   }
   
   static int request_intx_irq(struct hfi1_devdata *dd)
@@@ -12116,10 -12452,10 +12449,10 @@@
         snprintf(dd->intx_name, sizeof(dd->intx_name), DRIVER_NAME "_%d",
                  dd->unit);
         ret = request_irq(dd->pcidev->irq, general_interrupt,
-                                 IRQF_SHARED, dd->intx_name, dd);
+                         IRQF_SHARED, dd->intx_name, dd);
         if (ret)
                 dd_dev_err(dd, "unable to request INTx interrupt, err %d\n",
-                               ret);
+                          ret);
         else
                 dd->requested_intx_irq = 1;
         return ret;
@@@ -12127,69 -12463,19 +12460,19 @@@
   
   static int request_msix_irqs(struct hfi1_devdata *dd)
   {
-       const struct cpumask *local_mask;
-       cpumask_var_t def, rcv;
-       bool def_ret, rcv_ret;
         int first_general, last_general;
         int first_sdma, last_sdma;
         int first_rx, last_rx;
-       int first_cpu, curr_cpu;
-       int rcv_cpu, sdma_cpu;
-       int i, ret = 0, possible;
-       int ht;
+       int i, ret = 0;
   
         /* calculate the ranges we are going to use */
         first_general = 0;
-       first_sdma = last_general = first_general + 1;
-       first_rx = last_sdma = first_sdma + dd->num_sdma;
+       last_general = first_general + 1;
+       first_sdma = last_general;
+       last_sdma = first_sdma + dd->num_sdma;
+       first_rx = last_sdma;
         last_rx = first_rx + dd->n_krcv_queues;
   
-       /*
-        * Interrupt affinity.
-        *
-        * non-rcv avail gets a default mask that
-        * starts as possible cpus with threads reset
-        * and each rcv avail reset.
-        *
-        * rcv avail gets node relative 1 wrapping back
-        * to the node relative 1 as necessary.
-        *
-        */
-       local_mask = cpumask_of_pcibus(dd->pcidev->bus);
-       /* if first cpu is invalid, use NUMA 0 */
-       if (cpumask_first(local_mask) >= nr_cpu_ids)
-               local_mask = topology_core_cpumask(0);
- 
-       def_ret = zalloc_cpumask_var(&def, GFP_KERNEL);
-       rcv_ret = zalloc_cpumask_var(&rcv, GFP_KERNEL);
-       if (!def_ret || !rcv_ret)
-               goto bail;
-       /* use local mask as default */
-       cpumask_copy(def, local_mask);
-       possible = cpumask_weight(def);
-       /* disarm threads from default */
-       ht = cpumask_weight(
-                       topology_sibling_cpumask(cpumask_first(local_mask)));
-       for (i = possible/ht; i < possible; i++)
-               cpumask_clear_cpu(i, def);
-       /* def now has full cores on chosen node*/
-       first_cpu = cpumask_first(def);
-       if (nr_cpu_ids >= first_cpu)
-               first_cpu++;
-       curr_cpu = first_cpu;
- 
-       /*  One context is reserved as control context */
-       for (i = first_cpu; i < dd->n_krcv_queues + first_cpu - 1; i++) {
-               cpumask_clear_cpu(curr_cpu, def);
-               cpumask_set_cpu(curr_cpu, rcv);
-               curr_cpu = cpumask_next(curr_cpu, def);
-               if (curr_cpu >= nr_cpu_ids)
-                       break;
-       }
-       /* def mask has non-rcv, rcv has recv mask */
-       rcv_cpu = cpumask_first(rcv);
-       sdma_cpu = cpumask_first(def);
- 
         /*
          * Sanity check - the code expects all SDMA chip source
          * interrupts to be in the same CSR, starting at bit 0.  Verify
@@@ -12215,6 -12501,7 +12498,7 @@@
                         snprintf(me->name, sizeof(me->name),
                                  DRIVER_NAME "_%d", dd->unit);
                         err_info = "general";
+                       me->type = IRQ_GENERAL;
                 } else if (first_sdma <= i && i < last_sdma) {
                         idx = i - first_sdma;
                         sde = &dd->per_sdma[idx];
@@@ -12224,6 -12511,7 +12508,7 @@@
                                  DRIVER_NAME "_%d sdma%d", dd->unit, idx);
                         err_info = "sdma";
                         remap_sdma_interrupts(dd, idx, i);
+                       me->type = IRQ_SDMA;
                 } else if (first_rx <= i && i < last_rx) {
                         idx = i - first_rx;
                         rcd = dd->rcd[idx];
@@@ -12234,9 -12522,9 +12519,9 @@@
                          * Set the interrupt register and mask for this
                          * context's interrupt.
                          */
-                       rcd->ireg = (IS_RCVAVAIL_START+idx) / 64;
+                       rcd->ireg = (IS_RCVAVAIL_START + idx) / 64;
                         rcd->imask = ((u64)1) <<
-                                       ((IS_RCVAVAIL_START+idx) % 64);
+                                       ((IS_RCVAVAIL_START + idx) % 64);
                         handler = receive_context_interrupt;
                         thread = receive_context_thread;
                         arg = rcd;
@@@ -12244,25 -12532,27 +12529,27 @@@
                                  DRIVER_NAME "_%d kctxt%d", dd->unit, idx);
                         err_info = "receive context";
                         remap_intr(dd, IS_RCVAVAIL_START + idx, i);
+                       me->type = IRQ_RCVCTXT;
                 } else {
                         /* not in our expected range - complain, then
-                          ignore it */
+                        * ignore it
+                        */
                         dd_dev_err(dd,
-                               "Unexpected extra MSI-X interrupt %d\n", i);
+                                  "Unexpected extra MSI-X interrupt %d\n", i);
                         continue;
                 }
                 /* no argument, no interrupt */
-               if (arg == NULL)
+               if (!arg)
                         continue;
                 /* make sure the name is terminated */
-               me->name[sizeof(me->name)-1] = 0;
+               me->name[sizeof(me->name) - 1] = 0;
   
                 ret = request_threaded_irq(me->msix.vector, handler, thread, 0,
-                                               me->name, arg);
+                                          me->name, arg);
                 if (ret) {
                         dd_dev_err(dd,
-                               "unable to allocate %s interrupt, vector %d, index %d, err %d\n",
-                                err_info, me->msix.vector, idx, ret);
+                                  "unable to allocate %s interrupt, vector %d, index %d, err %d\n",
+                                  err_info, me->msix.vector, idx, ret);
                         return ret;
                 }
                 /*
@@@ -12271,52 -12561,13 +12558,13 @@@
                  */
                 me->arg = arg;
   
-               if (!zalloc_cpumask_var(
-                       &dd->msix_entries[i].mask,
-                       GFP_KERNEL))
-                       goto bail;
-               if (handler == sdma_interrupt) {
-                       dd_dev_info(dd, "sdma engine %d cpu %d\n",
-                               sde->this_idx, sdma_cpu);
-                       sde->cpu = sdma_cpu;
-                       cpumask_set_cpu(sdma_cpu, dd->msix_entries[i].mask);
-                       sdma_cpu = cpumask_next(sdma_cpu, def);
-                       if (sdma_cpu >= nr_cpu_ids)
-                               sdma_cpu = cpumask_first(def);
-               } else if (handler == receive_context_interrupt) {
-                       dd_dev_info(dd, "rcv ctxt %d cpu %d\n", rcd->ctxt,
-                                   (rcd->ctxt == HFI1_CTRL_CTXT) ?
-                                           cpumask_first(def) : rcv_cpu);
-                       if (rcd->ctxt == HFI1_CTRL_CTXT) {
-                               /* map to first default */
-                               cpumask_set_cpu(cpumask_first(def),
-                                               dd->msix_entries[i].mask);
-                       } else {
-                               cpumask_set_cpu(rcv_cpu,
-                                               dd->msix_entries[i].mask);
-                               rcv_cpu = cpumask_next(rcv_cpu, rcv);
-                               if (rcv_cpu >= nr_cpu_ids)
-                                       rcv_cpu = cpumask_first(rcv);
-                       }
-               } else {
-                       /* otherwise first def */
-                       dd_dev_info(dd, "%s cpu %d\n",
-                               err_info, cpumask_first(def));
-                       cpumask_set_cpu(
-                               cpumask_first(def), dd->msix_entries[i].mask);
-               }
-               irq_set_affinity_hint(
-                       dd->msix_entries[i].msix.vector,
-                       dd->msix_entries[i].mask);
+               ret = hfi1_get_irq_affinity(dd, me);
+               if (ret)
+                       dd_dev_err(dd,
+                                  "unable to pin IRQ %d\n", ret);
         }
   
- out:
-       free_cpumask_var(def);
-       free_cpumask_var(rcv);
         return ret;
- bail:
-       ret = -ENOMEM;
-       goto  out;
   }
   
   /*
@@@ -12333,7 -12584,7 +12581,7 @@@ static void reset_interrupts(struct hfi
   
         /* all chip interrupts map to MSI-X 0 */
         for (i = 0; i < CCE_NUM_INT_MAP_CSRS; i++)
-               write_csr(dd, CCE_INT_MAP + (8*i), 0);
+               write_csr(dd, CCE_INT_MAP + (8 * i), 0);
   }
   
   static int set_up_interrupts(struct hfi1_devdata *dd)
@@@ -12442,7 -12693,7 +12690,7 @@@ static int set_up_context_variables(str
                  */
                 num_kernel_contexts = n_krcvqs + MIN_KERNEL_KCTXTS - 1;
         else
-               num_kernel_contexts = num_online_nodes();
+               num_kernel_contexts = num_online_nodes() + 1;
         num_kernel_contexts =
                 max_t(int, MIN_KERNEL_KCTXTS, num_kernel_contexts);
         /*
@@@ -12483,13 -12734,14 +12731,14 @@@
         dd->num_rcv_contexts = total_contexts;
         dd->n_krcv_queues = num_kernel_contexts;
         dd->first_user_ctxt = num_kernel_contexts;
+       dd->num_user_contexts = num_user_contexts;
         dd->freectxts = num_user_contexts;
         dd_dev_info(dd,
-               "rcv contexts: chip %d, used %d (kernel %d, user %d)\n",
-               (int)dd->chip_rcv_contexts,
-               (int)dd->num_rcv_contexts,
-               (int)dd->n_krcv_queues,
-               (int)dd->num_rcv_contexts - dd->n_krcv_queues);
+                   "rcv contexts: chip %d, used %d (kernel %d, user %d)\n",
+                   (int)dd->chip_rcv_contexts,
+                   (int)dd->num_rcv_contexts,
+                   (int)dd->n_krcv_queues,
+                   (int)dd->num_rcv_contexts - dd->n_krcv_queues);
   
         /*
          * Receive array allocation:
@@@ -12515,8 -12767,8 +12764,8 @@@
                 dd->rcv_entries.ngroups = (MAX_EAGER_ENTRIES * 2) /
                         dd->rcv_entries.group_size;
                 dd_dev_info(dd,
-                  "RcvArray group count too high, change to %u\n",
-                  dd->rcv_entries.ngroups);
+                           "RcvArray group count too high, change to %u\n",
+                           dd->rcv_entries.ngroups);
                 dd->rcv_entries.nctxt_extra = 0;
         }
         /*
@@@ -12582,7 -12834,7 +12831,7 @@@ static void write_uninitialized_csrs_an
   
         /* CceIntMap */
         for (i = 0; i < CCE_NUM_INT_MAP_CSRS; i++)
-               write_csr(dd, CCE_INT_MAP+(8*i), 0);
+               write_csr(dd, CCE_INT_MAP + (8 * i), 0);
   
         /* SendCtxtCreditReturnAddr */
         for (i = 0; i < dd->chip_send_contexts; i++)
@@@ -12590,8 -12842,10 +12839,10 @@@
   
         /* PIO Send buffers */
         /* SDMA Send buffers */
-       /* These are not normally read, and (presently) have no method
-          to be read, so are not pre-initialized */
+       /*
+        * These are not normally read, and (presently) have no method
+        * to be read, so are not pre-initialized
+        */
   
         /* RcvHdrAddr */
         /* RcvHdrTailAddr */
@@@ -12600,13 -12854,13 +12851,13 @@@
                 write_kctxt_csr(dd, i, RCV_HDR_ADDR, 0);
                 write_kctxt_csr(dd, i, RCV_HDR_TAIL_ADDR, 0);
                 for (j = 0; j < RXE_NUM_TID_FLOWS; j++)
-                       write_uctxt_csr(dd, i, RCV_TID_FLOW_TABLE+(8*j), 0);
+                       write_uctxt_csr(dd, i, RCV_TID_FLOW_TABLE + (8 * j), 0);
         }
   
         /* RcvArray */
         for (i = 0; i < dd->chip_rcv_array_count; i++)
-               write_csr(dd, RCV_ARRAY + (8*i),
-                                       RCV_ARRAY_RT_WRITE_ENABLE_SMASK);
+               write_csr(dd, RCV_ARRAY + (8 * i),
+                         RCV_ARRAY_RT_WRITE_ENABLE_SMASK);
   
         /* RcvQPMapTable */
         for (i = 0; i < 32; i++)
@@@ -12638,8 -12892,8 +12889,8 @@@ static void clear_cce_status(struct hfi
                         return;
                 if (time_after(jiffies, timeout)) {
                         dd_dev_err(dd,
-                               "Timeout waiting for CceStatus to clear bits 0x%llx, remaining 0x%llx\n",
-                               status_bits, reg & status_bits);
+                                  "Timeout waiting for CceStatus to clear bits 0x%llx, remaining 0x%llx\n",
+                                  status_bits, reg & status_bits);
                         return;
                 }
                 udelay(1);
@@@ -12671,7 -12925,7 +12922,7 @@@ static void reset_cce_csrs(struct hfi1_
         for (i = 0; i < CCE_NUM_MSIX_VECTORS; i++) {
                 write_csr(dd, CCE_MSIX_TABLE_LOWER + (8 * i), 0);
                 write_csr(dd, CCE_MSIX_TABLE_UPPER + (8 * i),
-                                       CCE_MSIX_TABLE_UPPER_RESETCSR);
+                         CCE_MSIX_TABLE_UPPER_RESETCSR);
         }
         for (i = 0; i < CCE_NUM_MSIX_PBAS; i++) {
                 /* CCE_MSIX_PBA read-only */
@@@ -12691,91 -12945,6 +12942,6 @@@
                 write_csr(dd, CCE_INT_COUNTER_ARRAY32 + (8 * i), 0);
   }
   
- /* set ASIC CSRs to chip reset defaults */
- static void reset_asic_csrs(struct hfi1_devdata *dd)
- {
-       int i;
- 
-       /*
-        * If the HFIs are shared between separate nodes or VMs,
-        * then more will need to be done here.  One idea is a module
-        * parameter that returns early, letting the first power-on or
-        * a known first load do the reset and blocking all others.
-        */
- 
-       if (!(dd->flags & HFI1_DO_INIT_ASIC))
-               return;
- 
-       if (dd->icode != ICODE_FPGA_EMULATION) {
-               /* emulation does not have an SBus - leave these alone */
-               /*
-                * All writes to ASIC_CFG_SBUS_REQUEST do something.
-                * Notes:
-                * o The reset is not zero if aimed at the core.  See the
-                *   SBus documentation for details.
-                * o If the SBus firmware has been updated (e.g. by the BIOS),
-                *   will the reset revert that?
-                */
-               /* ASIC_CFG_SBUS_REQUEST leave alone */
-               write_csr(dd, ASIC_CFG_SBUS_EXECUTE, 0);
-       }
-       /* ASIC_SBUS_RESULT read-only */
-       write_csr(dd, ASIC_STS_SBUS_COUNTERS, 0);
-       for (i = 0; i < ASIC_NUM_SCRATCH; i++)
-               write_csr(dd, ASIC_CFG_SCRATCH + (8 * i), 0);
-       write_csr(dd, ASIC_CFG_MUTEX, 0);       /* this will clear it */
- 
-       /* We might want to retain this state across FLR if we ever use it */
-       write_csr(dd, ASIC_CFG_DRV_STR, 0);
- 
-       /* ASIC_CFG_THERM_POLL_EN leave alone */
-       /* ASIC_STS_THERM read-only */
-       /* ASIC_CFG_RESET leave alone */
- 
-       write_csr(dd, ASIC_PCIE_SD_HOST_CMD, 0);
-       /* ASIC_PCIE_SD_HOST_STATUS read-only */
-       write_csr(dd, ASIC_PCIE_SD_INTRPT_DATA_CODE, 0);
-       write_csr(dd, ASIC_PCIE_SD_INTRPT_ENABLE, 0);
-       /* ASIC_PCIE_SD_INTRPT_PROGRESS read-only */
-       write_csr(dd, ASIC_PCIE_SD_INTRPT_STATUS, ~0ull); /* clear */
-       /* ASIC_HFI0_PCIE_SD_INTRPT_RSPD_DATA read-only */
-       /* ASIC_HFI1_PCIE_SD_INTRPT_RSPD_DATA read-only */
-       for (i = 0; i < 16; i++)
-               write_csr(dd, ASIC_PCIE_SD_INTRPT_LIST + (8 * i), 0);
- 
-       /* ASIC_GPIO_IN read-only */
-       write_csr(dd, ASIC_GPIO_OE, 0);
-       write_csr(dd, ASIC_GPIO_INVERT, 0);
-       write_csr(dd, ASIC_GPIO_OUT, 0);
-       write_csr(dd, ASIC_GPIO_MASK, 0);
-       /* ASIC_GPIO_STATUS read-only */
-       write_csr(dd, ASIC_GPIO_CLEAR, ~0ull);
-       /* ASIC_GPIO_FORCE leave alone */
- 
-       /* ASIC_QSFP1_IN read-only */
-       write_csr(dd, ASIC_QSFP1_OE, 0);
-       write_csr(dd, ASIC_QSFP1_INVERT, 0);
-       write_csr(dd, ASIC_QSFP1_OUT, 0);
-       write_csr(dd, ASIC_QSFP1_MASK, 0);
-       /* ASIC_QSFP1_STATUS read-only */
-       write_csr(dd, ASIC_QSFP1_CLEAR, ~0ull);
-       /* ASIC_QSFP1_FORCE leave alone */
- 
-       /* ASIC_QSFP2_IN read-only */
-       write_csr(dd, ASIC_QSFP2_OE, 0);
-       write_csr(dd, ASIC_QSFP2_INVERT, 0);
-       write_csr(dd, ASIC_QSFP2_OUT, 0);
-       write_csr(dd, ASIC_QSFP2_MASK, 0);
-       /* ASIC_QSFP2_STATUS read-only */
-       write_csr(dd, ASIC_QSFP2_CLEAR, ~0ull);
-       /* ASIC_QSFP2_FORCE leave alone */
- 
-       write_csr(dd, ASIC_EEP_CTL_STAT, ASIC_EEP_CTL_STAT_RESETCSR);
-       /* this also writes a NOP command, clearing paging mode */
-       write_csr(dd, ASIC_EEP_ADDR_CMD, 0);
-       write_csr(dd, ASIC_EEP_DATA, 0);
- }
- 
   /* set MISC CSRs to chip reset defaults */
   static void reset_misc_csrs(struct hfi1_devdata *dd)
   {
@@@ -12786,8 -12955,10 +12952,10 @@@
                 write_csr(dd, MISC_CFG_RSA_SIGNATURE + (8 * i), 0);
                 write_csr(dd, MISC_CFG_RSA_MODULUS + (8 * i), 0);
         }
-       /* MISC_CFG_SHA_PRELOAD leave alone - always reads 0 and can
-          only be written 128-byte chunks */
+       /*
+        * MISC_CFG_SHA_PRELOAD leave alone - always reads 0 and can
+        * only be written 128-byte chunks
+        */
         /* init RSA engine to clear lingering errors */
         write_csr(dd, MISC_CFG_RSA_CMD, 1);
         write_csr(dd, MISC_CFG_RSA_MU, 0);
@@@ -12843,18 -13014,17 +13011,17 @@@ static void reset_txe_csrs(struct hfi1_
         write_csr(dd, SEND_ERR_CLEAR, ~0ull);
         /* SEND_ERR_FORCE read-only */
         for (i = 0; i < VL_ARB_LOW_PRIO_TABLE_SIZE; i++)
-               write_csr(dd, SEND_LOW_PRIORITY_LIST + (8*i), 0);
+               write_csr(dd, SEND_LOW_PRIORITY_LIST + (8 * i), 0);
         for (i = 0; i < VL_ARB_HIGH_PRIO_TABLE_SIZE; i++)
-               write_csr(dd, SEND_HIGH_PRIORITY_LIST + (8*i), 0);
-       for (i = 0; i < dd->chip_send_contexts/NUM_CONTEXTS_PER_SET; i++)
-               write_csr(dd, SEND_CONTEXT_SET_CTRL + (8*i), 0);
+               write_csr(dd, SEND_HIGH_PRIORITY_LIST + (8 * i), 0);
+       for (i = 0; i < dd->chip_send_contexts / NUM_CONTEXTS_PER_SET; i++)
+               write_csr(dd, SEND_CONTEXT_SET_CTRL + (8 * i), 0);
         for (i = 0; i < TXE_NUM_32_BIT_COUNTER; i++)
-               write_csr(dd, SEND_COUNTER_ARRAY32 + (8*i), 0);
+               write_csr(dd, SEND_COUNTER_ARRAY32 + (8 * i), 0);
         for (i = 0; i < TXE_NUM_64_BIT_COUNTER; i++)
-               write_csr(dd, SEND_COUNTER_ARRAY64 + (8*i), 0);
+               write_csr(dd, SEND_COUNTER_ARRAY64 + (8 * i), 0);
         write_csr(dd, SEND_CM_CTRL, SEND_CM_CTRL_RESETCSR);
-       write_csr(dd, SEND_CM_GLOBAL_CREDIT,
-                                       SEND_CM_GLOBAL_CREDIT_RESETCSR);
+       write_csr(dd, SEND_CM_GLOBAL_CREDIT, SEND_CM_GLOBAL_CREDIT_RESETCSR);
         /* SEND_CM_CREDIT_USED_STATUS read-only */
         write_csr(dd, SEND_CM_TIMER_CTRL, 0);
         write_csr(dd, SEND_CM_LOCAL_AU_TABLE0_TO3, 0);
@@@ -12862,7 -13032,7 +13029,7 @@@
         write_csr(dd, SEND_CM_REMOTE_AU_TABLE0_TO3, 0);
         write_csr(dd, SEND_CM_REMOTE_AU_TABLE4_TO7, 0);
         for (i = 0; i < TXE_NUM_DATA_VL; i++)
-               write_csr(dd, SEND_CM_CREDIT_VL + (8*i), 0);
+               write_csr(dd, SEND_CM_CREDIT_VL + (8 * i), 0);
         write_csr(dd, SEND_CM_CREDIT_VL15, 0);
         /* SEND_CM_CREDIT_USED_VL read-only */
         /* SEND_CM_CREDIT_USED_VL15 read-only */
@@@ -12948,8 -13118,8 +13115,8 @@@ static void init_rbufs(struct hfi1_devd
                  */
                 if (count++ > 500) {
                         dd_dev_err(dd,
-                               "%s: in-progress DMA not clearing: RcvStatus 0x%llx, continuing\n",
-                               __func__, reg);
+                                  "%s: in-progress DMA not clearing: RcvStatus 0x%llx, continuing\n",
+                                  __func__, reg);
                         break;
                 }
                 udelay(2); /* do not busy-wait the CSR */
@@@ -12978,8 -13148,8 +13145,8 @@@
                 /* give up after 100us - slowest possible at 33MHz is 73us */
                 if (count++ > 50) {
                         dd_dev_err(dd,
-                               "%s: RcvStatus.RxRbufInit not set, continuing\n",
-                               __func__);
+                                  "%s: RcvStatus.RxRbufInit not set, continuing\n",
+                                  __func__);
                         break;
                 }
         }
@@@ -13005,7 -13175,7 +13172,7 @@@ static void reset_rxe_csrs(struct hfi1_
         write_csr(dd, RCV_VL15, 0);
         /* this is a clear-down */
         write_csr(dd, RCV_ERR_INFO,
-                       RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK);
+                 RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK);
         /* RCV_ERR_STATUS read-only */
         write_csr(dd, RCV_ERR_MASK, 0);
         write_csr(dd, RCV_ERR_CLEAR, ~0ull);
@@@ -13051,8 -13221,8 +13218,8 @@@
                 write_uctxt_csr(dd, i, RCV_EGR_INDEX_HEAD, 0);
                 /* RCV_EGR_OFFSET_TAIL read-only */
                 for (j = 0; j < RXE_NUM_TID_FLOWS; j++) {
-                       write_uctxt_csr(dd, i, RCV_TID_FLOW_TABLE + (8 * j),
-                               0);
+                       write_uctxt_csr(dd, i,
+                                       RCV_TID_FLOW_TABLE + (8 * j), 0);
                 }
         }
   }
@@@ -13154,7 -13324,7 +13321,7 @@@ static void init_chip(struct hfi1_devda
                 write_csr(dd, RCV_CTXT_CTRL, 0);
         /* mask all interrupt sources */
         for (i = 0; i < CCE_NUM_INT_CSRS; i++)
-               write_csr(dd, CCE_INT_MASK + (8*i), 0ull);
+               write_csr(dd, CCE_INT_MASK + (8 * i), 0ull);
   
         /*
          * DC Reset: do a full DC reset before the register clear.
@@@ -13163,7 -13333,7 +13330,7 @@@
          * across the clear.
          */
         write_csr(dd, CCE_DC_CTRL, CCE_DC_CTRL_DC_RESET_SMASK);
-       (void) read_csr(dd, CCE_DC_CTRL);
+       (void)read_csr(dd, CCE_DC_CTRL);
   
         if (use_flr) {
                 /*
@@@ -13184,22 -13354,19 +13351,19 @@@
                         hfi1_pcie_flr(dd);
                         restore_pci_variables(dd);
                 }
- 
-               reset_asic_csrs(dd);
         } else {
                 dd_dev_info(dd, "Resetting CSRs with writes\n");
                 reset_cce_csrs(dd);
                 reset_txe_csrs(dd);
                 reset_rxe_csrs(dd);
-               reset_asic_csrs(dd);
                 reset_misc_csrs(dd);
         }
         /* clear the DC reset */
         write_csr(dd, CCE_DC_CTRL, 0);
   
         /* Set the LED off */
-       if (is_ax(dd))
-               setextled(dd, 0);
+       setextled(dd, 0);
+ 
         /*
          * Clear the QSFP reset.
          * An FLR enforces a 0 on all out pins. The driver does not touch
@@@ -13212,6 -13379,7 +13376,7 @@@
          */
         write_csr(dd, ASIC_QSFP1_OUT, 0x1f);
         write_csr(dd, ASIC_QSFP2_OUT, 0x1f);
+       init_chip_resources(dd);
   }
   
   static void init_early_variables(struct hfi1_devdata *dd)
@@@ -13252,12 -13420,12 +13417,12 @@@ static void init_kdeth_qp(struct hfi1_d
                 kdeth_qp = DEFAULT_KDETH_QP;
   
         write_csr(dd, SEND_BTH_QP,
-                       (kdeth_qp & SEND_BTH_QP_KDETH_QP_MASK)
-                               << SEND_BTH_QP_KDETH_QP_SHIFT);
+                 (kdeth_qp & SEND_BTH_QP_KDETH_QP_MASK) <<
+                 SEND_BTH_QP_KDETH_QP_SHIFT);
   
         write_csr(dd, RCV_BTH_QP,
-                       (kdeth_qp & RCV_BTH_QP_KDETH_QP_MASK)
-                               << RCV_BTH_QP_KDETH_QP_SHIFT);
+                 (kdeth_qp & RCV_BTH_QP_KDETH_QP_MASK) <<
+                 RCV_BTH_QP_KDETH_QP_SHIFT);
   }
   
   /**
@@@ -13382,22 -13550,21 +13547,21 @@@ static void init_qos(struct hfi1_devdat
                 write_csr(dd, RCV_RSM_MAP_TABLE + (8 * i), rsmmap[i]);
         /* add rule0 */
         write_csr(dd, RCV_RSM_CFG /* + (8 * 0) */,
-               RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_MASK
-                       << RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_SHIFT |
-               2ull << RCV_RSM_CFG_PACKET_TYPE_SHIFT);
+                 RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_MASK <<
+                 RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_SHIFT |
+                 2ull << RCV_RSM_CFG_PACKET_TYPE_SHIFT);
         write_csr(dd, RCV_RSM_SELECT /* + (8 * 0) */,
-               LRH_BTH_MATCH_OFFSET
-                       << RCV_RSM_SELECT_FIELD1_OFFSET_SHIFT |
-               LRH_SC_MATCH_OFFSET << RCV_RSM_SELECT_FIELD2_OFFSET_SHIFT |
-               LRH_SC_SELECT_OFFSET << RCV_RSM_SELECT_INDEX1_OFFSET_SHIFT |
-               ((u64)n) << RCV_RSM_SELECT_INDEX1_WIDTH_SHIFT |
-               QPN_SELECT_OFFSET << RCV_RSM_SELECT_INDEX2_OFFSET_SHIFT |
-               ((u64)m + (u64)n) << RCV_RSM_SELECT_INDEX2_WIDTH_SHIFT);
+                 LRH_BTH_MATCH_OFFSET << RCV_RSM_SELECT_FIELD1_OFFSET_SHIFT |
+                 LRH_SC_MATCH_OFFSET << RCV_RSM_SELECT_FIELD2_OFFSET_SHIFT |
+                 LRH_SC_SELECT_OFFSET << RCV_RSM_SELECT_INDEX1_OFFSET_SHIFT |
+                 ((u64)n) << RCV_RSM_SELECT_INDEX1_WIDTH_SHIFT |
+                 QPN_SELECT_OFFSET << RCV_RSM_SELECT_INDEX2_OFFSET_SHIFT |
+                 ((u64)m + (u64)n) << RCV_RSM_SELECT_INDEX2_WIDTH_SHIFT);
         write_csr(dd, RCV_RSM_MATCH /* + (8 * 0) */,
-               LRH_BTH_MASK << RCV_RSM_MATCH_MASK1_SHIFT |
-               LRH_BTH_VALUE << RCV_RSM_MATCH_VALUE1_SHIFT |
-               LRH_SC_MASK << RCV_RSM_MATCH_MASK2_SHIFT |
-               LRH_SC_VALUE << RCV_RSM_MATCH_VALUE2_SHIFT);
+                 LRH_BTH_MASK << RCV_RSM_MATCH_MASK1_SHIFT |
+                 LRH_BTH_VALUE << RCV_RSM_MATCH_VALUE1_SHIFT |
+                 LRH_SC_MASK << RCV_RSM_MATCH_MASK2_SHIFT |
+                 LRH_SC_VALUE << RCV_RSM_MATCH_VALUE2_SHIFT);
         /* Enable RSM */
         add_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK);
         kfree(rsmmap);
@@@ -13415,9 -13582,8 +13579,8 @@@ static void init_rxe(struct hfi1_devdat
         /* enable all receive errors */
         write_csr(dd, RCV_ERR_MASK, ~0ull);
         /* setup QPN map table - start where VL15 context leaves off */
-       init_qos(
-               dd,
-               dd->n_krcv_queues > MIN_KERNEL_KCTXTS ? MIN_KERNEL_KCTXTS : 0);
+       init_qos(dd, dd->n_krcv_queues > MIN_KERNEL_KCTXTS ?
+                MIN_KERNEL_KCTXTS : 0);
         /*
          * make sure RcvCtrl.RcvWcb <= PCIe Device Control
          * Register Max_Payload_Size (PCI_EXP_DEVCTL in Linux PCIe config
@@@ -13454,36 -13620,33 +13617,33 @@@ static void assign_cm_au_table(struct h
                                u32 csr0to3, u32 csr4to7)
   {
         write_csr(dd, csr0to3,
-                  0ull <<
-                       SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE0_SHIFT
-               |  1ull <<
-                       SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE1_SHIFT
-               |  2ull * cu <<
-                       SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE2_SHIFT
-               |  4ull * cu <<
-                       SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE3_SHIFT);
+                 0ull << SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE0_SHIFT |
+                 1ull << SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE1_SHIFT |
+                 2ull * cu <<
+                 SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE2_SHIFT |
+                 4ull * cu <<
+                 SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE3_SHIFT);
         write_csr(dd, csr4to7,
-                  8ull * cu <<
-                       SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE4_SHIFT
-               | 16ull * cu <<
-                       SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE5_SHIFT
-               | 32ull * cu <<
-                       SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE6_SHIFT
-               | 64ull * cu <<
-                       SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE7_SHIFT);
- 
+                 8ull * cu <<
+                 SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE4_SHIFT |
+                 16ull * cu <<
+                 SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE5_SHIFT |
+                 32ull * cu <<
+                 SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE6_SHIFT |
+                 64ull * cu <<
+                 SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE7_SHIFT);
   }
   
   static void assign_local_cm_au_table(struct hfi1_devdata *dd, u8 vcu)
   {
         assign_cm_au_table(dd, vcu_to_cu(vcu), SEND_CM_LOCAL_AU_TABLE0_TO3,
-                                       SEND_CM_LOCAL_AU_TABLE4_TO7);
+                          SEND_CM_LOCAL_AU_TABLE4_TO7);
   }
   
   void assign_remote_cm_au_table(struct hfi1_devdata *dd, u8 vcu)
   {
         assign_cm_au_table(dd, vcu_to_cu(vcu), SEND_CM_REMOTE_AU_TABLE0_TO3,
-                                       SEND_CM_REMOTE_AU_TABLE4_TO7);
+                          SEND_CM_REMOTE_AU_TABLE4_TO7);
   }
   
   static void init_txe(struct hfi1_devdata *dd)
@@@ -13586,9 -13749,9 +13746,9 @@@ int hfi1_set_ctxt_pkey(struct hfi1_devd
         int ret = 0;
         u64 reg;
   
-       if (ctxt < dd->num_rcv_contexts)
+       if (ctxt < dd->num_rcv_contexts) {
                 rcd = dd->rcd[ctxt];
-       else {
+       } else {
                 ret = -EINVAL;
                 goto done;
         }
@@@ -13614,9 -13777,9 +13774,9 @@@ int hfi1_clear_ctxt_pkey(struct hfi1_de
         int ret = 0;
         u64 reg;
   
-       if (ctxt < dd->num_rcv_contexts)
+       if (ctxt < dd->num_rcv_contexts) {
                 rcd = dd->rcd[ctxt];
-       else {
+       } else {
                 ret = -EINVAL;
                 goto done;
         }
@@@ -13639,24 -13802,26 +13799,26 @@@ done
    */
   void hfi1_start_cleanup(struct hfi1_devdata *dd)
   {
+       aspm_exit(dd);
         free_cntrs(dd);
         free_rcverr(dd);
         clean_up_interrupts(dd);
+       finish_chip_resources(dd);
   }
   
   #define HFI_BASE_GUID(dev) \
         ((dev)->base_guid & ~(1ULL << GUID_HFI_INDEX_SHIFT))
   
   /*
-  * Certain chip functions need to be initialized only once per asic
-  * instead of per-device. This function finds the peer device and
-  * checks whether that chip initialization needs to be done by this
-  * device.
+  * Information can be shared between the two HFIs on the same ASIC
+  * in the same OS.  This function finds the peer device and sets
+  * up a shared structure.
    */
- static void asic_should_init(struct hfi1_devdata *dd)
+ static int init_asic_data(struct hfi1_devdata *dd)
   {
         unsigned long flags;
         struct hfi1_devdata *tmp, *peer = NULL;
+       int ret = 0;
   
         spin_lock_irqsave(&hfi1_devs_lock, flags);
         /* Find our peer device */
@@@ -13668,13 -13833,21 +13830,21 @@@
                 }
         }
   
-       /*
-        * "Claim" the ASIC for initialization if it hasn't been
-        " "claimed" yet.
-        */
-       if (!peer || !(peer->flags & HFI1_DO_INIT_ASIC))
-               dd->flags |= HFI1_DO_INIT_ASIC;
+       if (peer) {
+               dd->asic_data = peer->asic_data;
+       } else {
+               dd->asic_data = kzalloc(sizeof(*dd->asic_data), GFP_KERNEL);
+               if (!dd->asic_data) {
+                       ret = -ENOMEM;
+                       goto done;
+               }
+               mutex_init(&dd->asic_data->asic_resource_mutex);
+       }
+       dd->asic_data->dds[dd->hfi1_id] = dd; /* self back-pointer */
+ 
+ done:
         spin_unlock_irqrestore(&hfi1_devs_lock, flags);
+       return ret;
   }
   
   /*
@@@ -13694,7 -13867,7 +13864,7 @@@ static int obtain_boardname(struct hfi1
         ret = read_hfi1_efi_var(dd, "description", &size,
                                 (void **)&dd->boardname);
         if (ret) {
-               dd_dev_err(dd, "Board description not found\n");
+               dd_dev_info(dd, "Board description not found\n");
                 /* use generic description */
                 dd->boardname = kstrdup(generic, GFP_KERNEL);
                 if (!dd->boardname)
@@@ -13703,6 -13876,50 +13873,50 @@@
         return 0;
   }
   
+ /*
+  * Check the interrupt registers to make sure that they are mapped correctly.
+  * It is intended to help user identify any mismapping by VMM when the driver
+  * is running in a VM. This function should only be called before interrupt
+  * is set up properly.
+  *
+  * Return 0 on success, -EINVAL on failure.
+  */
+ static int check_int_registers(struct hfi1_devdata *dd)
+ {
+       u64 reg;
+       u64 all_bits = ~(u64)0;
+       u64 mask;
+ 
+       /* Clear CceIntMask[0] to avoid raising any interrupts */
+       mask = read_csr(dd, CCE_INT_MASK);
+       write_csr(dd, CCE_INT_MASK, 0ull);
+       reg = read_csr(dd, CCE_INT_MASK);
+       if (reg)
+               goto err_exit;
+ 
+       /* Clear all interrupt status bits */
+       write_csr(dd, CCE_INT_CLEAR, all_bits);
+       reg = read_csr(dd, CCE_INT_STATUS);
+       if (reg)
+               goto err_exit;
+ 
+       /* Set all interrupt status bits */
+       write_csr(dd, CCE_INT_FORCE, all_bits);
+       reg = read_csr(dd, CCE_INT_STATUS);
+       if (reg != all_bits)
+               goto err_exit;
+ 
+       /* Restore the interrupt mask */
+       write_csr(dd, CCE_INT_CLEAR, all_bits);
+       write_csr(dd, CCE_INT_MASK, mask);
+ 
+       return 0;
+ err_exit:
+       write_csr(dd, CCE_INT_MASK, mask);
+       dd_dev_err(dd, "Interrupt registers not properly mapped by VMM\n");
+       return -EINVAL;
+ }
+ 
   /**
    * Allocate and initialize the device structure for the hfi.
    * @dev: the pci_dev for hfi1_ib device
@@@ -13727,9 -13944,10 +13941,10 @@@ struct hfi1_devdata *hfi1_init_dd(struc
                 "RTL FPGA emulation",
                 "Functional simulator"
         };
+       struct pci_dev *parent = pdev->bus->self;
   
-       dd = hfi1_alloc_devdata(pdev,
-               NUM_IB_PORTS * sizeof(struct hfi1_pportdata));
+       dd = hfi1_alloc_devdata(pdev, NUM_IB_PORTS *
+                               sizeof(struct hfi1_pportdata));
         if (IS_ERR(dd))
                 goto bail;
         ppd = dd->pport;
@@@ -13750,8 -13968,8 +13965,8 @@@
                 /* link width active is 0 when link is down */
                 /* link width downgrade active is 0 when link is down */
   
-               if (num_vls < HFI1_MIN_VLS_SUPPORTED
-                       || num_vls > HFI1_MAX_VLS_SUPPORTED) {
+               if (num_vls < HFI1_MIN_VLS_SUPPORTED ||
+                   num_vls > HFI1_MAX_VLS_SUPPORTED) {
                         hfi1_early_err(&pdev->dev,
                                        "Invalid num_vls %u, using %u VLs\n",
                                     num_vls, HFI1_MAX_VLS_SUPPORTED);
@@@ -13759,6 -13977,7 +13974,7 @@@
                 }
                 ppd->vls_supported = num_vls;
                 ppd->vls_operational = ppd->vls_supported;
+               ppd->actual_vls_operational = ppd->vls_supported;
                 /* Set the default MTU. */
                 for (vl = 0; vl < num_vls; vl++)
                         dd->vld[vl].mtu = hfi1_max_mtu;
@@@ -13778,6 -13997,7 +13994,7 @@@
                 /* start in offline */
                 ppd->host_link_state = HLS_DN_OFFLINE;
                 init_vl_arb_caches(ppd);
+               ppd->last_pstate = 0xff; /* invalid value */
         }
   
         dd->link_default = HLS_DN_POLL;
@@@ -13803,8 -14023,21 +14020,21 @@@
         dd->minrev = (dd->revision >> CCE_REVISION_CHIP_REV_MINOR_SHIFT)
                         & CCE_REVISION_CHIP_REV_MINOR_MASK;
   
-       /* obtain the hardware ID - NOT related to unit, which is a
-          software enumeration */
+       /*
+        * Check interrupt registers mapping if the driver has no access to
+        * the upstream component. In this case, it is likely that the driver
+        * is running in a VM.
+        */
+       if (!parent) {
+               ret = check_int_registers(dd);
+               if (ret)
+                       goto bail_cleanup;
+       }
+ 
+       /*
+        * obtain the hardware ID - NOT related to unit, which is a
+        * software enumeration
+        */
         reg = read_csr(dd, CCE_REVISION2);
         dd->hfi1_id = (reg >> CCE_REVISION2_HFI_ID_SHIFT)
                                         & CCE_REVISION2_HFI_ID_MASK;
@@@ -13812,8 -14045,8 +14042,8 @@@
         dd->icode = reg >> CCE_REVISION2_IMPL_CODE_SHIFT;
         dd->irev = reg >> CCE_REVISION2_IMPL_REVISION_SHIFT;
         dd_dev_info(dd, "Implementation: %s, revision 0x%x\n",
-               dd->icode < ARRAY_SIZE(inames) ? inames[dd->icode] : "unknown",
-               (int)dd->irev);
+                   dd->icode < ARRAY_SIZE(inames) ?
+                   inames[dd->icode] : "unknown", (int)dd->irev);
   
         /* speeds the hardware can support */
         dd->pport->link_speed_supported = OPA_LINK_SPEED_25G;
@@@ -13842,6 -14075,7 +14072,7 @@@
                            num_vls, dd->chip_sdma_engines);
                 num_vls = dd->chip_sdma_engines;
                 ppd->vls_supported = dd->chip_sdma_engines;
+               ppd->vls_operational = ppd->vls_supported;
         }
   
         /*
@@@ -13863,8 -14097,10 +14094,10 @@@
         /* needs to be done before we look for the peer device */
         read_guid(dd);
   
-       /* should this device init the ASIC block? */
-       asic_should_init(dd);
+       /* set up shared ASIC data with peer device */
+       ret = init_asic_data(dd);
+       if (ret)
+               goto bail_cleanup;
   
         /* obtain chip sizes, reset chip CSRs */
         init_chip(dd);
@@@ -13874,6 -14110,9 +14107,9 @@@
         if (ret)
                 goto bail_cleanup;
   
+       /* Needs to be called before hfi1_firmware_init */
+       get_platform_config(dd);
+ 
         /* read in firmware */
         ret = hfi1_firmware_init(dd);
         if (ret)
@@@ -13925,6 -14164,10 +14161,10 @@@
         /* set up KDETH QP prefix in both RX and TX CSRs */
         init_kdeth_qp(dd);
   
+       ret = hfi1_dev_affinity_init(dd);
+       if (ret)
+               goto bail_cleanup;
+ 
         /* send contexts must be set up before receive contexts */
         ret = init_send_contexts(dd);
         if (ret)
@@@ -14022,7 -14265,6 +14262,6 @@@ static u16 delay_cycles(struct hfi1_ppo
         return (u16)delta_cycles;
   }
   
- 
   /**
    * create_pbc - build a pbc for transmission
    * @flags: special case flags or-ed in built pbc
@@@ -14078,10 -14320,15 +14317,15 @@@ static int thermal_init(struct hfi1_dev
         int ret = 0;
   
         if (dd->icode != ICODE_RTL_SILICON ||
-           !(dd->flags & HFI1_DO_INIT_ASIC))
+           check_chip_resource(dd, CR_THERM_INIT, NULL))
                 return ret;
   
-       acquire_hw_mutex(dd);
+       ret = acquire_chip_resource(dd, CR_SBUS, SBUS_TIMEOUT);
+       if (ret) {
+               THERM_FAILURE(dd, ret, "Acquire SBus");
+               return ret;
+       }
+ 
         dd_dev_info(dd, "Initializing thermal sensor\n");
         /* Disable polling of thermal readings */
         write_csr(dd, ASIC_CFG_THERM_POLL_EN, 0x0);
@@@ -14128,8 -14375,14 +14372,14 @@@
   
         /* Enable polling of thermal readings */
         write_csr(dd, ASIC_CFG_THERM_POLL_EN, 0x1);
+ 
+       /* Set initialized flag */
+       ret = acquire_chip_resource(dd, CR_THERM_INIT, 0);
+       if (ret)
+               THERM_FAILURE(dd, ret, "Unable to set thermal init flag");
+ 
   done:
-       release_hw_mutex(dd);
+       release_chip_resource(dd, CR_SBUS);
         return ret;
   }
   
@@@ -14144,7 -14397,7 +14394,7 @@@ static void handle_temp_err(struct hfi1
         dd_dev_emerg(dd,
                      "Critical temperature reached! Forcing device into freeze mode!\n");
         dd->flags |= HFI1_FORCED_FREEZE;
-       start_freeze_handling(ppd, FREEZE_SELF|FREEZE_ABORT);
+       start_freeze_handling(ppd, FREEZE_SELF | FREEZE_ABORT);
         /*
          * Shut DC down as much and as quickly as possible.
          *
@@@ -14158,8 -14411,8 +14408,8 @@@
          */
         ppd->driver_link_ready = 0;
         ppd->link_enabled = 0;
-       set_physical_link_state(dd, PLS_OFFLINE |
-                               (OPA_LINKDOWN_REASON_SMA_DISABLED << 8));
+       set_physical_link_state(dd, (OPA_LINKDOWN_REASON_SMA_DISABLED << 8) |
+                               PLS_OFFLINE);
         /*
          * Step 2: Shutdown LCB and 8051
          *         After shutdown, do not restore DC_CFG_RESET value.
diff --combined drivers/staging/rdma/hfi1/diag.c

index e41159fe6889737e3899a0413c9c25a74530b5df,6546e91f85b753f77d5231418dc2d08191e680cb..c5b520bf610e29149baf6aff3b2f3f5a66ecafa2
--- 1/drivers/staging/rdma/hfi1/diag.c
--- 2/drivers/staging/rdma/hfi1/diag.c
+++ b/drivers/staging/rdma/hfi1/diag.c
@@@ -1,12 -1,11 +1,11 @@@
   /*
+  * Copyright(c) 2015, 2016 Intel Corporation.
    *
    * This file is provided under a dual BSD/GPLv2 license.  When using or
    * redistributing this file, you may do so under either license.
    *
    * GPL LICENSE SUMMARY
    *
-  * Copyright(c) 2015 Intel Corporation.
-  *
    * This program is free software; you can redistribute it and/or modify
    * it under the terms of version 2 of the GNU General Public License as
    * published by the Free Software Foundation.
@@@ -18,8 -17,6 +17,6 @@@
    *
    * BSD LICENSE
    *
-  * Copyright(c) 2015 Intel Corporation.
-  *
    * Redistribution and use in source and binary forms, with or without
    * modification, are permitted provided that the following conditions
    * are met:
@@@ -70,6 -67,7 +67,7 @@@
   #include "hfi.h"
   #include "device.h"
   #include "common.h"
+ #include "verbs_txreq.h"
   #include "trace.h"
   
   #undef pr_fmt
@@@ -80,15 -78,15 +78,15 @@@
   /* Snoop option mask */
   #define SNOOP_DROP_SEND               BIT(0)
   #define SNOOP_USE_METADATA    BIT(1)
+ #define SNOOP_SET_VL0TOVL15     BIT(2)
   
   static u8 snoop_flags;
   
   /*
    * Extract packet length from LRH header.
-  * Why & 0x7FF? Because len is only 11 bits in case it wasn't 0'd we throw the
-  * bogus bits away. This is in Dwords so multiply by 4 to get size in bytes
+  * This is in Dwords so multiply by 4 to get size in bytes
    */
- #define HFI1_GET_PKT_LEN(x)      (((be16_to_cpu((x)->lrh[2]) & 0x7FF)) << 2)
+ #define HFI1_GET_PKT_LEN(x)      (((be16_to_cpu((x)->lrh[2]) & 0xFFF)) << 2)
   
   enum hfi1_filter_status {
         HFI1_FILTER_HIT,
@@@ -257,7 -255,7 +255,7 @@@ static int hfi1_filter_ib_service_level
   static int hfi1_filter_ib_pkey(void *ibhdr, void *packet_data, void *value);
   static int hfi1_filter_direction(void *ibhdr, void *packet_data, void *value);
   
- -static struct hfi1_filter_array hfi1_filters[] = {
+ +static const struct hfi1_filter_array hfi1_filters[] = {
         { hfi1_filter_lid },
         { hfi1_filter_dlid },
         { hfi1_filter_mad_mgmt_class },
@@@ -860,7 -858,7 +858,7 @@@ static ssize_t hfi1_snoop_write(struct 
                         vl = sc4;
                 } else {
                         sl = (byte_two >> 4) & 0xf;
-                       ibp = to_iport(&dd->verbs_dev.ibdev, 1);
+                       ibp = to_iport(&dd->verbs_dev.rdi.ibdev, 1);
                         sc5 = ibp->sl_to_sc[sl];
                         vl = sc_to_vlt(dd, sc5);
                         if (vl != sc4) {
@@@ -966,6 -964,65 +964,65 @@@ static ssize_t hfi1_snoop_read(struct f
         return ret;
   }
   
+ /**
+  * hfi1_assign_snoop_link_credits -- Set up credits for VL15 and others
+  * @ppd : ptr to hfi1 port data
+  * @value : options from user space
+  *
+  * Assumes the rest of the CM credit registers are zero from a
+  * previous global or credit reset.
+  * Leave shared count at zero for both global and all vls.
+  * In snoop mode ideally we don't use shared credits
+  * Reserve 8.5k for VL15
+  * If total credits less than 8.5kbytes return error.
+  * Divide the rest of the credits across VL0 to VL7 and if
+  * each of these levels has less than 34 credits (at least 2048 + 128 bytes)
+  * return with an error.
+  * The credit registers will be reset to zero on link negotiation or link up
+  * so this function should be activated from user space only if the port has
+  * gone past link negotiation and link up.
+  *
+  * Return -- 0 if successful else error condition
+  *
+  */
+ static long hfi1_assign_snoop_link_credits(struct hfi1_pportdata *ppd,
+                                          int value)
+ {
+ #define  OPA_MIN_PER_VL_CREDITS  34  /* 2048 + 128 bytes */
+       struct buffer_control t;
+       int i;
+       struct hfi1_devdata *dd = ppd->dd;
+       u16  total_credits = (value >> 16) & 0xffff;
+       u16  vl15_credits = dd->vl15_init / 2;
+       u16  per_vl_credits;
+       __be16 be_per_vl_credits;
+ 
+       if (!(ppd->host_link_state & HLS_UP))
+               goto err_exit;
+       if (total_credits  <  vl15_credits)
+               goto err_exit;
+ 
+       per_vl_credits = (total_credits - vl15_credits) / TXE_NUM_DATA_VL;
+ 
+       if (per_vl_credits < OPA_MIN_PER_VL_CREDITS)
+               goto err_exit;
+ 
+       memset(&t, 0, sizeof(t));
+       be_per_vl_credits = cpu_to_be16(per_vl_credits);
+ 
+       for (i = 0; i < TXE_NUM_DATA_VL; i++)
+               t.vl[i].dedicated = be_per_vl_credits;
+ 
+       t.vl[15].dedicated  = cpu_to_be16(vl15_credits);
+       return set_buffer_control(ppd, &t);
+ 
+ err_exit:
+       snoop_dbg("port_state = 0x%x, total_credits = %d, vl15_credits = %d",
+                 ppd->host_link_state, total_credits, vl15_credits);
+ 
+       return -EINVAL;
+ }
+ 
   static long hfi1_ioctl(struct file *fp, unsigned int cmd, unsigned long arg)
   {
         struct hfi1_devdata *dd;
@@@ -1192,6 -1249,10 +1249,10 @@@
                         snoop_flags |= SNOOP_DROP_SEND;
                 if (value & SNOOP_USE_METADATA)
                         snoop_flags |= SNOOP_USE_METADATA;
+               if (value & (SNOOP_SET_VL0TOVL15)) {
+                       ppd = &dd->pport[0];  /* first port will do */
+                       ret = hfi1_assign_snoop_link_credits(ppd, value);
+               }
                 break;
         default:
                 return -ENOTTY;
@@@ -1603,7 -1664,7 +1664,7 @@@ int snoop_recv_handler(struct hfi1_pack
   /*
    * Handle snooping and capturing packets when sdma is being used.
    */
- int snoop_send_dma_handler(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
+ int snoop_send_dma_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
                            u64 pbc)
   {
         pr_alert("Snooping/Capture of Send DMA Packets Is Not Supported!\n");
@@@ -1616,20 -1677,19 +1677,19 @@@
    * bypass packets. The only way to send a bypass packet currently is to use the
    * diagpkt interface. When that interface is enable snoop/capture is not.
    */
- int snoop_send_pio_handler(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
+ int snoop_send_pio_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
                            u64 pbc)
   {
-       struct ahg_ib_header *ahdr = qp->s_hdr;
         u32 hdrwords = qp->s_hdrwords;
-       struct hfi1_sge_state *ss = qp->s_cur_sge;
+       struct rvt_sge_state *ss = qp->s_cur_sge;
         u32 len = qp->s_cur_size;
         u32 dwords = (len + 3) >> 2;
         u32 plen = hdrwords + dwords + 2; /* includes pbc */
         struct hfi1_pportdata *ppd = ps->ppd;
         struct snoop_packet *s_packet = NULL;
-       u32 *hdr = (u32 *)&ahdr->ibh;
+       u32 *hdr = (u32 *)&ps->s_txreq->phdr.hdr;
         u32 length = 0;
-       struct hfi1_sge_state temp_ss;
+       struct rvt_sge_state temp_ss;
         void *data = NULL;
         void *data_start = NULL;
         int ret;
@@@ -1638,7 -1698,7 +1698,7 @@@
         struct capture_md md;
         u32 vl;
         u32 hdr_len = hdrwords << 2;
-       u32 tlen = HFI1_GET_PKT_LEN(&ahdr->ibh);
+       u32 tlen = HFI1_GET_PKT_LEN(&ps->s_txreq->phdr.hdr);
   
         md.u.pbc = 0;
   
@@@ -1665,7 -1725,7 +1725,7 @@@
                 md.port = 1;
                 md.dir = PKT_DIR_EGRESS;
                 if (likely(pbc == 0)) {
-                       vl = be16_to_cpu(ahdr->ibh.lrh[0]) >> 12;
+                       vl = be16_to_cpu(ps->s_txreq->phdr.hdr.lrh[0]) >> 12;
                         md.u.pbc = create_pbc(ppd, 0, qp->s_srate, vl, plen);
                 } else {
                         md.u.pbc = 0;
@@@ -1727,7 -1787,7 +1787,7 @@@
                 ret = HFI1_FILTER_HIT;
         } else {
                 ret = ppd->dd->hfi1_snoop.filter_callback(
-                                       &ahdr->ibh,
+                                       &ps->s_txreq->phdr.hdr,
                                         NULL,
                                         ppd->dd->hfi1_snoop.filter_value);
         }
@@@ -1759,9 -1819,16 +1819,16 @@@
                                 spin_unlock_irqrestore(&qp->s_lock, flags);
                         } else if (qp->ibqp.qp_type == IB_QPT_RC) {
                                 spin_lock_irqsave(&qp->s_lock, flags);
-                               hfi1_rc_send_complete(qp, &ahdr->ibh);
+                               hfi1_rc_send_complete(qp,
+                                                     &ps->s_txreq->phdr.hdr);
                                 spin_unlock_irqrestore(&qp->s_lock, flags);
                         }
+ 
+                       /*
+                        * If snoop is dropping the packet we need to put the
+                        * txreq back because no one else will.
+                        */
+                       hfi1_put_txreq(ps->s_txreq);
                         return 0;
                 }
                 break;
diff --combined drivers/staging/rdma/hfi1/driver.c

index ee50bbf64d39603939bfbedff8ae7965e7678769,914beedb556b42eabd4bbdfc5680efd286655858..34511e5df1d56e7765c5d11d4e07d68514cf3894
--- 1/drivers/staging/rdma/hfi1/driver.c
--- 2/drivers/staging/rdma/hfi1/driver.c
+++ b/drivers/staging/rdma/hfi1/driver.c
@@@ -1,12 -1,11 +1,11 @@@
   /*
+  * Copyright(c) 2015, 2016 Intel Corporation.
    *
    * This file is provided under a dual BSD/GPLv2 license.  When using or
    * redistributing this file, you may do so under either license.
    *
    * GPL LICENSE SUMMARY
    *
-  * Copyright(c) 2015 Intel Corporation.
-  *
    * This program is free software; you can redistribute it and/or modify
    * it under the terms of version 2 of the GNU General Public License as
    * published by the Free Software Foundation.
@@@ -18,8 -17,6 +17,6 @@@
    *
    * BSD LICENSE
    *
-  * Copyright(c) 2015 Intel Corporation.
-  *
    * Redistribution and use in source and binary forms, with or without
    * modification, are permitted provided that the following conditions
    * are met:
@@@ -56,6 -53,7 +53,7 @@@
   #include <linux/vmalloc.h>
   #include <linux/module.h>
   #include <linux/prefetch.h>
+ #include <rdma/ib_verbs.h>
   
   #include "hfi.h"
   #include "trace.h"
@@@ -162,6 -160,22 +160,22 @@@ const char *get_unit_name(int unit
         return iname;
   }
   
+ const char *get_card_name(struct rvt_dev_info *rdi)
+ {
+       struct hfi1_ibdev *ibdev = container_of(rdi, struct hfi1_ibdev, rdi);
+       struct hfi1_devdata *dd = container_of(ibdev,
+                                              struct hfi1_devdata, verbs_dev);
+       return get_unit_name(dd->unit);
+ }
+ 
+ struct pci_dev *get_pci_dev(struct rvt_dev_info *rdi)
+ {
+       struct hfi1_ibdev *ibdev = container_of(rdi, struct hfi1_ibdev, rdi);
+       struct hfi1_devdata *dd = container_of(ibdev,
+                                              struct hfi1_devdata, verbs_dev);
+       return dd->pcidev;
+ }
+ 
   /*
    * Return count of units with at least one port ACTIVE.
    */
@@@ -246,7 -260,7 +260,7 @@@ static inline void *get_egrbuf(const st
    */
   inline int hfi1_rcvbuf_validate(u32 size, u8 type, u16 *encoded)
   {
- -      if (unlikely(!IS_ALIGNED(size, PAGE_SIZE)))
+ +      if (unlikely(!PAGE_ALIGNED(size)))
                 return 0;
         if (unlikely(size < MIN_EAGER_BUFFER))
                 return 0;
@@@ -265,6 -279,8 +279,8 @@@ static void rcv_hdrerr(struct hfi1_ctxt
         u32 rte = rhf_rcv_type_err(packet->rhf);
         int lnh = be16_to_cpu(rhdr->lrh[0]) & 3;
         struct hfi1_ibport *ibp = &ppd->ibport_data;
+       struct hfi1_devdata *dd = ppd->dd;
+       struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
   
         if (packet->rhf & (RHF_VCRC_ERR | RHF_ICRC_ERR))
                 return;
@@@ -283,9 -299,9 +299,9 @@@
                         goto drop;
   
                 /* Check for GRH */
-               if (lnh == HFI1_LRH_BTH)
+               if (lnh == HFI1_LRH_BTH) {
                         ohdr = &hdr->u.oth;
-               else if (lnh == HFI1_LRH_GRH) {
+               } else if (lnh == HFI1_LRH_GRH) {
                         u32 vtf;
   
                         ohdr = &hdr->u.l.oth;
@@@ -295,17 -311,17 +311,17 @@@
                         if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
                                 goto drop;
                         rcv_flags |= HFI1_HAS_GRH;
-               } else
+               } else {
                         goto drop;
- 
+               }
                 /* Get the destination QP number. */
-               qp_num = be32_to_cpu(ohdr->bth[1]) & HFI1_QPN_MASK;
-               if (lid < HFI1_MULTICAST_LID_BASE) {
-                       struct hfi1_qp *qp;
+               qp_num = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
+               if (lid < be16_to_cpu(IB_MULTICAST_LID_BASE)) {
+                       struct rvt_qp *qp;
                         unsigned long flags;
   
                         rcu_read_lock();
-                       qp = hfi1_lookup_qpn(ibp, qp_num);
+                       qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
                         if (!qp) {
                                 rcu_read_unlock();
                                 goto drop;
@@@ -318,9 -334,9 +334,9 @@@
                         spin_lock_irqsave(&qp->r_lock, flags);
   
                         /* Check for valid receive state. */
-                       if (!(ib_hfi1_state_ops[qp->state] &
-                             HFI1_PROCESS_RECV_OK)) {
-                               ibp->n_pkt_drops++;
+                       if (!(ib_rvt_state_ops[qp->state] &
+                             RVT_PROCESS_RECV_OK)) {
+                               ibp->rvp.n_pkt_drops++;
                         }
   
                         switch (qp->ibqp.qp_type) {
@@@ -352,7 -368,7 +368,7 @@@
                 if (rhf_use_egr_bfr(packet->rhf))
                         ebuf = packet->ebuf;
   
-               if (ebuf == NULL)
+               if (!ebuf)
                         goto drop; /* this should never happen */
   
                 if (lnh == HFI1_LRH_BTH)
@@@ -370,7 -386,7 +386,7 @@@
                          * Only in pre-B0 h/w is the CNP_OPCODE handled
                          * via this code path.
                          */
-                       struct hfi1_qp *qp = NULL;
+                       struct rvt_qp *qp = NULL;
                         u32 lqpn, rqpn;
                         u16 rlid;
                         u8 svc_type, sl, sc5;
@@@ -380,10 -396,10 +396,10 @@@
                                 sc5 |= 0x10;
                         sl = ibp->sc_to_sl[sc5];
   
-                       lqpn = be32_to_cpu(bth[1]) & HFI1_QPN_MASK;
+                       lqpn = be32_to_cpu(bth[1]) & RVT_QPN_MASK;
                         rcu_read_lock();
-                       qp = hfi1_lookup_qpn(ibp, lqpn);
-                       if (qp == NULL) {
+                       qp = rvt_lookup_qpn(rdi, &ibp->rvp, lqpn);
+                       if (!qp) {
                                 rcu_read_unlock();
                                 goto drop;
                         }
@@@ -419,9 -435,8 +435,8 @@@ drop
   }
   
   static inline void init_packet(struct hfi1_ctxtdata *rcd,
-                             struct hfi1_packet *packet)
+                              struct hfi1_packet *packet)
   {
- 
         packet->rsize = rcd->rcvhdrqentsize; /* words */
         packet->maxcnt = rcd->rcvhdrq_cnt * packet->rsize; /* words */
         packet->rcd = rcd;
@@@ -434,12 -449,7 +449,7 @@@
         packet->rcv_flags = 0;
   }
   
- #ifndef CONFIG_PRESCAN_RXQ
- static void prescan_rxq(struct hfi1_packet *packet) {}
- #else /* !CONFIG_PRESCAN_RXQ */
- static int prescan_receive_queue;
- 
- static void process_ecn(struct hfi1_qp *qp, struct hfi1_ib_header *hdr,
+ static void process_ecn(struct rvt_qp *qp, struct hfi1_ib_header *hdr,
                         struct hfi1_other_headers *ohdr,
                         u64 rhf, u32 bth1, struct ib_grh *grh)
   {
@@@ -453,7 -463,7 +463,7 @@@
         case IB_QPT_GSI:
         case IB_QPT_UD:
                 rlid = be16_to_cpu(hdr->lrh[3]);
-               rqpn = be32_to_cpu(ohdr->u.ud.deth[1]) & HFI1_QPN_MASK;
+               rqpn = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK;
                 svc_type = IB_CC_SVCTYPE_UD;
                 break;
         case IB_QPT_UC:
@@@ -483,7 -493,7 +493,7 @@@
   
         if (bth1 & HFI1_BECN_SMASK) {
                 struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
-               u32 lqpn = bth1 & HFI1_QPN_MASK;
+               u32 lqpn = bth1 & RVT_QPN_MASK;
                 u8 sl = ibp->sc_to_sl[sc5];
   
                 process_becn(ppd, sl, rlid, lqpn, rqpn, svc_type);
@@@ -562,26 -572,31 +572,31 @@@ static inline void update_ps_mdata(stru
    * containing Excplicit Congestion Notifications (FECNs, or BECNs).
    * When an ECN is found, process the Congestion Notification, and toggle
    * it off.
+  * This is declared as a macro to allow quick checking of the port to avoid
+  * the overhead of a function call if not enabled.
    */
- static void prescan_rxq(struct hfi1_packet *packet)
+ #define prescan_rxq(rcd, packet) \
+       do { \
+               if (rcd->ppd->cc_prescan) \
+                       __prescan_rxq(packet); \
+       } while (0)
+ static void __prescan_rxq(struct hfi1_packet *packet)
   {
         struct hfi1_ctxtdata *rcd = packet->rcd;
         struct ps_mdata mdata;
   
-       if (!prescan_receive_queue)
-               return;
- 
         init_ps_mdata(&mdata, packet);
   
         while (1) {
                 struct hfi1_devdata *dd = rcd->dd;
                 struct hfi1_ibport *ibp = &rcd->ppd->ibport_data;
-               __le32 *rhf_addr = (__le32 *) rcd->rcvhdrq + mdata.ps_head +
+               __le32 *rhf_addr = (__le32 *)rcd->rcvhdrq + mdata.ps_head +
                                          dd->rhf_offset;
-               struct hfi1_qp *qp;
+               struct rvt_qp *qp;
                 struct hfi1_ib_header *hdr;
                 struct hfi1_other_headers *ohdr;
                 struct ib_grh *grh = NULL;
+               struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
                 u64 rhf = rhf_to_cpu(rhf_addr);
                 u32 etype = rhf_rcv_type(rhf), qpn, bth1;
                 int is_ecn = 0;
@@@ -600,25 -615,25 +615,25 @@@
                         hfi1_get_msgheader(dd, rhf_addr);
                 lnh = be16_to_cpu(hdr->lrh[0]) & 3;
   
-               if (lnh == HFI1_LRH_BTH)
+               if (lnh == HFI1_LRH_BTH) {
                         ohdr = &hdr->u.oth;
-               else if (lnh == HFI1_LRH_GRH) {
+               } else if (lnh == HFI1_LRH_GRH) {
                         ohdr = &hdr->u.l.oth;
                         grh = &hdr->u.l.grh;
-               } else
+               } else {
                         goto next; /* just in case */
- 
+               }
                 bth1 = be32_to_cpu(ohdr->bth[1]);
                 is_ecn = !!(bth1 & (HFI1_FECN_SMASK | HFI1_BECN_SMASK));
   
                 if (!is_ecn)
                         goto next;
   
-               qpn = bth1 & HFI1_QPN_MASK;
+               qpn = bth1 & RVT_QPN_MASK;
                 rcu_read_lock();
-               qp = hfi1_lookup_qpn(ibp, qpn);
+               qp = rvt_lookup_qpn(rdi, &ibp->rvp, qpn);
   
-               if (qp == NULL) {
+               if (!qp) {
                         rcu_read_unlock();
                         goto next;
                 }
@@@ -633,7 -648,6 +648,6 @@@ next
                 update_ps_mdata(&mdata, rcd);
         }
   }
- #endif /* CONFIG_PRESCAN_RXQ */
   
   static inline int skip_rcv_packet(struct hfi1_packet *packet, int thread)
   {
@@@ -683,8 -697,9 +697,9 @@@ static inline int process_rcv_packet(st
                  * The +2 is the size of the RHF.
                  */
                 prefetch_range(packet->ebuf,
-                       packet->tlen - ((packet->rcd->rcvhdrqentsize -
-                                 (rhf_hdrq_offset(packet->rhf)+2)) * 4));
+                              packet->tlen - ((packet->rcd->rcvhdrqentsize -
+                                              (rhf_hdrq_offset(packet->rhf)
+                                               + 2)) * 4));
         }
   
         /*
@@@ -712,7 -727,7 +727,7 @@@
                 }
         }
   
-       packet->rhf_addr = (__le32 *) packet->rcd->rcvhdrq + packet->rhqoff +
+       packet->rhf_addr = (__le32 *)packet->rcd->rcvhdrq + packet->rhqoff +
                                       packet->rcd->dd->rhf_offset;
         packet->rhf = rhf_to_cpu(packet->rhf_addr);
   
@@@ -737,7 -752,6 +752,6 @@@ static inline void process_rcv_update(i
   
   static inline void finish_packet(struct hfi1_packet *packet)
   {
- 
         /*
          * Nothing we need to free for the packet.
          *
@@@ -746,14 -760,12 +760,12 @@@
          */
         update_usrhead(packet->rcd, packet->rcd->head, packet->updegr,
                        packet->etail, rcv_intr_dynamic, packet->numpkt);
- 
   }
   
   static inline void process_rcv_qp_work(struct hfi1_packet *packet)
   {
- 
         struct hfi1_ctxtdata *rcd;
-       struct hfi1_qp *qp, *nqp;
+       struct rvt_qp *qp, *nqp;
   
         rcd = packet->rcd;
         rcd->head = packet->rhqoff;
@@@ -764,17 -776,17 +776,17 @@@
          */
         list_for_each_entry_safe(qp, nqp, &rcd->qp_wait_list, rspwait) {
                 list_del_init(&qp->rspwait);
-               if (qp->r_flags & HFI1_R_RSP_DEFERED_ACK) {
-                       qp->r_flags &= ~HFI1_R_RSP_DEFERED_ACK;
+               if (qp->r_flags & RVT_R_RSP_NAK) {
+                       qp->r_flags &= ~RVT_R_RSP_NAK;
                         hfi1_send_rc_ack(rcd, qp, 0);
                 }
-               if (qp->r_flags & HFI1_R_RSP_SEND) {
+               if (qp->r_flags & RVT_R_RSP_SEND) {
                         unsigned long flags;
   
-                       qp->r_flags &= ~HFI1_R_RSP_SEND;
+                       qp->r_flags &= ~RVT_R_RSP_SEND;
                         spin_lock_irqsave(&qp->s_lock, flags);
-                       if (ib_hfi1_state_ops[qp->state] &
-                                       HFI1_PROCESS_OR_FLUSH_SEND)
+                       if (ib_rvt_state_ops[qp->state] &
+                                       RVT_PROCESS_OR_FLUSH_SEND)
                                 hfi1_schedule_send(qp);
                         spin_unlock_irqrestore(&qp->s_lock, flags);
                 }
@@@ -799,7 -811,7 +811,7 @@@ int handle_receive_interrupt_nodma_rtai
                 goto bail;
         }
   
-       prescan_rxq(&packet);
+       prescan_rxq(rcd, &packet);
   
         while (last == RCV_PKT_OK) {
                 last = process_rcv_packet(&packet, thread);
@@@ -830,7 -842,7 +842,7 @@@ int handle_receive_interrupt_dma_rtail(
         }
         smp_rmb();  /* prevent speculative reads of dma'ed hdrq */
   
-       prescan_rxq(&packet);
+       prescan_rxq(rcd, &packet);
   
         while (last == RCV_PKT_OK) {
                 last = process_rcv_packet(&packet, thread);
@@@ -862,6 -874,37 +874,37 @@@ static inline void set_all_dma_rtail(st
                         &handle_receive_interrupt_dma_rtail;
   }
   
+ void set_all_slowpath(struct hfi1_devdata *dd)
+ {
+       int i;
+ 
+       /* HFI1_CTRL_CTXT must always use the slow path interrupt handler */
+       for (i = HFI1_CTRL_CTXT + 1; i < dd->first_user_ctxt; i++)
+               dd->rcd[i]->do_interrupt = &handle_receive_interrupt;
+ }
+ 
+ static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd,
+                                     struct hfi1_packet packet,
+                                     struct hfi1_devdata *dd)
+ {
+       struct work_struct *lsaw = &rcd->ppd->linkstate_active_work;
+       struct hfi1_message_header *hdr = hfi1_get_msgheader(packet.rcd->dd,
+                                                            packet.rhf_addr);
+ 
+       if (hdr2sc(hdr, packet.rhf) != 0xf) {
+               int hwstate = read_logical_state(dd);
+ 
+               if (hwstate != LSTATE_ACTIVE) {
+                       dd_dev_info(dd, "Unexpected link state %d\n", hwstate);
+                       return 0;
+               }
+ 
+               queue_work(rcd->ppd->hfi1_wq, lsaw);
+               return 1;
+       }
+       return 0;
+ }
+ 
   /*
    * handle_receive_interrupt - receive a packet
    * @rcd: the context
@@@ -910,17 -953,17 +953,17 @@@ int handle_receive_interrupt(struct hfi
                 }
         }
   
-       prescan_rxq(&packet);
+       prescan_rxq(rcd, &packet);
   
         while (last == RCV_PKT_OK) {
- 
-               if (unlikely(dd->do_drop && atomic_xchg(&dd->drop_packet,
-                       DROP_PACKET_OFF) == DROP_PACKET_ON)) {
+               if (unlikely(dd->do_drop &&
+                            atomic_xchg(&dd->drop_packet, DROP_PACKET_OFF) ==
+                            DROP_PACKET_ON)) {
                         dd->do_drop = 0;
   
                         /* On to the next packet */
                         packet.rhqoff += packet.rsize;
-                       packet.rhf_addr = (__le32 *) rcd->rcvhdrq +
+                       packet.rhf_addr = (__le32 *)rcd->rcvhdrq +
                                           packet.rhqoff +
                                           dd->rhf_offset;
                         packet.rhf = rhf_to_cpu(packet.rhf_addr);
@@@ -929,6 -972,11 +972,11 @@@
                         last = skip_rcv_packet(&packet, thread);
                         skip_pkt = 0;
                 } else {
+                       /* Auto activate link on non-SC15 packet receive */
+                       if (unlikely(rcd->ppd->host_link_state ==
+                                    HLS_UP_ARMED) &&
+                           set_armed_to_active(rcd, packet, dd))
+                               goto bail;
                         last = process_rcv_packet(&packet, thread);
                 }
   
@@@ -940,8 -988,7 +988,7 @@@
                         if (seq != rcd->seq_cnt)
                                 last = RCV_PKT_DONE;
                         if (needset) {
-                               dd_dev_info(dd,
-                                       "Switching to NO_DMA_RTAIL\n");
+                               dd_dev_info(dd, "Switching to NO_DMA_RTAIL\n");
                                 set_all_nodma_rtail(dd);
                                 needset = 0;
                         }
@@@ -983,6 -1030,42 +1030,42 @@@ bail
         return last;
   }
   
+ /*
+  * We may discover in the interrupt that the hardware link state has
+  * changed from ARMED to ACTIVE (due to the arrival of a non-SC15 packet),
+  * and we need to update the driver's notion of the link state.  We cannot
+  * run set_link_state from interrupt context, so we queue this function on
+  * a workqueue.
+  *
+  * We delay the regular interrupt processing until after the state changes
+  * so that the link will be in the correct state by the time any application
+  * we wake up attempts to send a reply to any message it received.
+  * (Subsequent receive interrupts may possibly force the wakeup before we
+  * update the link state.)
+  *
+  * The rcd is freed in hfi1_free_ctxtdata after hfi1_postinit_cleanup invokes
+  * dd->f_cleanup(dd) to disable the interrupt handler and flush workqueues,
+  * so we're safe from use-after-free of the rcd.
+  */
+ void receive_interrupt_work(struct work_struct *work)
+ {
+       struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
+                                                 linkstate_active_work);
+       struct hfi1_devdata *dd = ppd->dd;
+       int i;
+ 
+       /* Received non-SC15 packet implies neighbor_normal */
+       ppd->neighbor_normal = 1;
+       set_link_state(ppd, HLS_UP_ACTIVE);
+ 
+       /*
+        * Interrupt all kernel contexts that could have had an
+        * interrupt during auto activation.
+        */
+       for (i = HFI1_CTRL_CTXT; i < dd->first_user_ctxt; i++)
+               force_recv_intr(dd->rcd[i]);
+ }
+ 
   /*
    * Convert a given MTU size to the on-wire MAD packet enumeration.
    * Return -1 if the size is invalid.
@@@ -1037,9 -1120,9 +1120,9 @@@ int set_mtu(struct hfi1_pportdata *ppd
         ppd->ibmaxlen = ppd->ibmtu + lrh_max_header_bytes(ppd->dd);
   
         mutex_lock(&ppd->hls_lock);
-       if (ppd->host_link_state == HLS_UP_INIT
-                       || ppd->host_link_state == HLS_UP_ARMED
-                       || ppd->host_link_state == HLS_UP_ACTIVE)
+       if (ppd->host_link_state == HLS_UP_INIT ||
+           ppd->host_link_state == HLS_UP_ARMED ||
+           ppd->host_link_state == HLS_UP_ACTIVE)
                 is_up = 1;
   
         drain = !is_ax(dd) && is_up;
@@@ -1082,79 -1165,80 +1165,80 @@@ int hfi1_set_lid(struct hfi1_pportdata 
         return 0;
   }
   
- /*
-  * Following deal with the "obviously simple" task of overriding the state
-  * of the LEDs, which normally indicate link physical and logical status.
-  * The complications arise in dealing with different hardware mappings
-  * and the board-dependent routine being called from interrupts.
-  * and then there's the requirement to _flash_ them.
-  */
- #define LED_OVER_FREQ_SHIFT 8
- #define LED_OVER_FREQ_MASK (0xFF<<LED_OVER_FREQ_SHIFT)
- /* Below is "non-zero" to force override, but both actual LEDs are off */
- #define LED_OVER_BOTH_OFF (8)
+ void shutdown_led_override(struct hfi1_pportdata *ppd)
+ {
+       struct hfi1_devdata *dd = ppd->dd;
+ 
+       /*
+        * This pairs with the memory barrier in hfi1_start_led_override to
+        * ensure that we read the correct state of LED beaconing represented
+        * by led_override_timer_active
+        */
+       smp_rmb();
+       if (atomic_read(&ppd->led_override_timer_active)) {
+               del_timer_sync(&ppd->led_override_timer);
+               atomic_set(&ppd->led_override_timer_active, 0);
+               /* Ensure the atomic_set is visible to all CPUs */
+               smp_wmb();
+       }
+ 
+       /* Hand control of the LED to the DC for normal operation */
+       write_csr(dd, DCC_CFG_LED_CNTRL, 0);
+ }
   
   static void run_led_override(unsigned long opaque)
   {
         struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)opaque;
         struct hfi1_devdata *dd = ppd->dd;
-       int timeoff;
-       int ph_idx;
+       unsigned long timeout;
+       int phase_idx;
   
         if (!(dd->flags & HFI1_INITTED))
                 return;
   
-       ph_idx = ppd->led_override_phase++ & 1;
-       ppd->led_override = ppd->led_override_vals[ph_idx];
-       timeoff = ppd->led_override_timeoff;
+       phase_idx = ppd->led_override_phase & 1;
   
-       /*
-        * don't re-fire the timer if user asked for it to be off; we let
-        * it fire one more time after they turn it off to simplify
-        */
-       if (ppd->led_override_vals[0] || ppd->led_override_vals[1])
-               mod_timer(&ppd->led_override_timer, jiffies + timeoff);
+       setextled(dd, phase_idx);
+ 
+       timeout = ppd->led_override_vals[phase_idx];
+ 
+       /* Set up for next phase */
+       ppd->led_override_phase = !ppd->led_override_phase;
+ 
+       mod_timer(&ppd->led_override_timer, jiffies + timeout);
   }
   
- void hfi1_set_led_override(struct hfi1_pportdata *ppd, unsigned int val)
+ /*
+  * To have the LED blink in a particular pattern, provide timeon and timeoff
+  * in milliseconds.
+  * To turn off custom blinking and return to normal operation, use
+  * shutdown_led_override()
+  */
+ void hfi1_start_led_override(struct hfi1_pportdata *ppd, unsigned int timeon,
+                            unsigned int timeoff)
   {
-       struct hfi1_devdata *dd = ppd->dd;
-       int timeoff, freq;
- 
-       if (!(dd->flags & HFI1_INITTED))
+       if (!(ppd->dd->flags & HFI1_INITTED))
                 return;
   
-       /* First check if we are blinking. If not, use 1HZ polling */
-       timeoff = HZ;
-       freq = (val & LED_OVER_FREQ_MASK) >> LED_OVER_FREQ_SHIFT;
+       /* Convert to jiffies for direct use in timer */
+       ppd->led_override_vals[0] = msecs_to_jiffies(timeoff);
+       ppd->led_override_vals[1] = msecs_to_jiffies(timeon);
   
-       if (freq) {
-               /* For blink, set each phase from one nybble of val */
-               ppd->led_override_vals[0] = val & 0xF;
-               ppd->led_override_vals[1] = (val >> 4) & 0xF;
-               timeoff = (HZ << 4)/freq;
-       } else {
-               /* Non-blink set both phases the same. */
-               ppd->led_override_vals[0] = val & 0xF;
-               ppd->led_override_vals[1] = val & 0xF;
-       }
-       ppd->led_override_timeoff = timeoff;
+       /* Arbitrarily start from LED on phase */
+       ppd->led_override_phase = 1;
   
         /*
          * If the timer has not already been started, do so. Use a "quick"
-        * timeout so the function will be called soon, to look at our request.
+        * timeout so the handler will be called soon to look at our request.
          */
-       if (atomic_inc_return(&ppd->led_override_timer_active) == 1) {
-               /* Need to start timer */
+       if (!timer_pending(&ppd->led_override_timer)) {
                 setup_timer(&ppd->led_override_timer, run_led_override,
-                               (unsigned long)ppd);
- 
+                           (unsigned long)ppd);
                 ppd->led_override_timer.expires = jiffies + 1;
                 add_timer(&ppd->led_override_timer);
-       } else {
-               if (ppd->led_override_vals[0] || ppd->led_override_vals[1])
-                       mod_timer(&ppd->led_override_timer, jiffies + 1);
-               atomic_dec(&ppd->led_override_timer_active);
+               atomic_set(&ppd->led_override_timer_active, 1);
+               /* Ensure the atomic_set is visible to all CPUs */
+               smp_wmb();
         }
   }
   
@@@ -1184,8 -1268,8 +1268,8 @@@ int hfi1_reset_device(int unit
   
         if (!dd->kregbase || !(dd->flags & HFI1_PRESENT)) {
                 dd_dev_info(dd,
-                       "Invalid unit number %u or not initialized or not present\n",
-                       unit);
+                           "Invalid unit number %u or not initialized or not present\n",
+                           unit);
                 ret = -ENXIO;
                 goto bail;
         }
@@@ -1203,14 -1287,8 +1287,8 @@@
   
         for (pidx = 0; pidx < dd->num_pports; ++pidx) {
                 ppd = dd->pport + pidx;
-               if (atomic_read(&ppd->led_override_timer_active)) {
-                       /* Need to stop LED timer, _then_ shut off LEDs */
-                       del_timer_sync(&ppd->led_override_timer);
-                       atomic_set(&ppd->led_override_timer_active, 0);
-               }
   
-               /* Shut off LEDs after we are sure timer is not running */
-               ppd->led_override = LED_OVER_BOTH_OFF;
+               shutdown_led_override(ppd);
         }
         if (dd->flags & HFI1_HAS_SEND_DMA)
                 sdma_exit(dd);
@@@ -1221,11 -1299,11 +1299,11 @@@
   
         if (ret)
                 dd_dev_err(dd,
-                       "Reinitialize unit %u after reset failed with %d\n",
-                       unit, ret);
+                          "Reinitialize unit %u after reset failed with %d\n",
+                          unit, ret);
         else
                 dd_dev_info(dd, "Reinitialized unit %u after resetting\n",
-                       unit);
+                           unit);
   
   bail:
         return ret;
@@@ -1282,7 -1360,7 +1360,7 @@@ int process_receive_bypass(struct hfi1_
                 handle_eflags(packet);
   
         dd_dev_err(packet->rcd->dd,
-          "Bypass packets are not supported in normal operation. Dropping\n");
+                  "Bypass packets are not supported in normal operation. Dropping\n");
         return RHF_RCV_CONTINUE;
   }
   
@@@ -1320,6 -1398,6 +1398,6 @@@ int kdeth_process_eager(struct hfi1_pac
   int process_receive_invalid(struct hfi1_packet *packet)
   {
         dd_dev_err(packet->rcd->dd, "Invalid packet type %d. Dropping\n",
-               rhf_rcv_type(packet->rhf));
+                  rhf_rcv_type(packet->rhf));
         return RHF_RCV_CONTINUE;
   }
diff --combined drivers/staging/rdma/hfi1/efivar.c

index 47dfe25847605ac2903d07f98f52bd227079534c,3f014f96f9e0a2b2a5f3d89b82e7f89ee586a371..106349fc1fb9bf5777284e15cfc83e9393ae380c
--- 1/drivers/staging/rdma/hfi1/efivar.c
--- 2/drivers/staging/rdma/hfi1/efivar.c
+++ b/drivers/staging/rdma/hfi1/efivar.c
@@@ -1,12 -1,11 +1,11 @@@
   /*
+  * Copyright(c) 2015, 2016 Intel Corporation.
    *
    * This file is provided under a dual BSD/GPLv2 license.  When using or
    * redistributing this file, you may do so under either license.
    *
    * GPL LICENSE SUMMARY
    *
-  * Copyright(c) 2015 Intel Corporation.
-  *
    * This program is free software; you can redistribute it and/or modify
    * it under the terms of version 2 of the GNU General Public License as
    * published by the Free Software Foundation.
@@@ -18,8 -17,6 +17,6 @@@
    *
    * BSD LICENSE
    *
-  * Copyright(c) 2015 Intel Corporation.
-  *
    * Redistribution and use in source and binary forms, with or without
    * modification, are permitted provided that the following conditions
    * are met:
@@@ -83,7 -80,8 +80,7 @@@ static int read_efi_var(const char *nam
         if (!efi_enabled(EFI_RUNTIME_SERVICES))
                 return -EOPNOTSUPP;
   
- -      uni_name = kzalloc(sizeof(efi_char16_t) * (strlen(name) + 1),
- -                         GFP_KERNEL);
+ +      uni_name = kcalloc(strlen(name) + 1, sizeof(efi_char16_t), GFP_KERNEL);
         temp_buffer = kzalloc(EFI_DATA_SIZE, GFP_KERNEL);
   
         if (!uni_name || !temp_buffer) {
diff --combined drivers/staging/rdma/hfi1/file_ops.c

index 8b911e8bf0df5edfe312d9343647d655ccc73466,e460261f94b7e39b04323609ead6ecbfb75d90ee..8396dc5fb6c1899bc32b775e19877b00bfd0151f
--- 1/drivers/staging/rdma/hfi1/file_ops.c
--- 2/drivers/staging/rdma/hfi1/file_ops.c
+++ b/drivers/staging/rdma/hfi1/file_ops.c
@@@ -1,12 -1,11 +1,11 @@@
   /*
+  * Copyright(c) 2015, 2016 Intel Corporation.
    *
    * This file is provided under a dual BSD/GPLv2 license.  When using or
    * redistributing this file, you may do so under either license.
    *
    * GPL LICENSE SUMMARY
    *
-  * Copyright(c) 2015 Intel Corporation.
-  *
    * This program is free software; you can redistribute it and/or modify
    * it under the terms of version 2 of the GNU General Public License as
    * published by the Free Software Foundation.
@@@ -18,8 -17,6 +17,6 @@@
    *
    * BSD LICENSE
    *
-  * Copyright(c) 2015 Intel Corporation.
-  *
    * Redistribution and use in source and binary forms, with or without
    * modification, are permitted provided that the following conditions
    * are met:
@@@ -60,6 -57,8 +57,8 @@@
   #include "user_sdma.h"
   #include "user_exp_rcv.h"
   #include "eprom.h"
+ #include "aspm.h"
+ #include "mmu_rb.h"
   
   #undef pr_fmt
   #define pr_fmt(fmt) DRIVER_NAME ": " fmt
@@@ -96,9 -95,6 +95,6 @@@ static int user_event_ack(struct hfi1_c
   static int set_ctxt_pkey(struct hfi1_ctxtdata *, unsigned, u16);
   static int manage_rcvq(struct hfi1_ctxtdata *, unsigned, int);
   static int vma_fault(struct vm_area_struct *, struct vm_fault *);
- static int exp_tid_setup(struct file *, struct hfi1_tid_info *);
- static int exp_tid_free(struct file *, struct hfi1_tid_info *);
- static void unlock_exp_tids(struct hfi1_ctxtdata *);
   
   static const struct file_operations hfi1_file_ops = {
         .owner = THIS_MODULE,
@@@ -164,7 -160,6 +160,6 @@@ enum mmap_types 
   #define dbg(fmt, ...)                         \
         pr_info(fmt, ##__VA_ARGS__)
   
- 
   static inline int is_valid_mmap(u64 token)
   {
         return (HFI1_MMAP_TOKEN_GET(MAGIC, token) == HFI1_MMAP_MAGIC);
@@@ -188,6 -183,7 +183,7 @@@ static ssize_t hfi1_file_write(struct f
         struct hfi1_cmd cmd;
         struct hfi1_user_info uinfo;
         struct hfi1_tid_info tinfo;
+       unsigned long addr;
         ssize_t consumed = 0, copy = 0, ret = 0;
         void *dest = NULL;
         __u64 user_val = 0;
@@@ -219,6 -215,7 +215,7 @@@
                 break;
         case HFI1_CMD_TID_UPDATE:
         case HFI1_CMD_TID_FREE:
+       case HFI1_CMD_TID_INVAL_READ:
                 copy = sizeof(tinfo);
                 dest = &tinfo;
                 break;
@@@ -294,9 -291,8 +291,8 @@@
                         sc_return_credits(uctxt->sc);
                 break;
         case HFI1_CMD_TID_UPDATE:
-               ret = exp_tid_setup(fp, &tinfo);
+               ret = hfi1_user_exp_rcv_setup(fp, &tinfo);
                 if (!ret) {
-                       unsigned long addr;
                         /*
                          * Copy the number of tidlist entries we used
                          * and the length of the buffer we registered.
@@@ -311,8 -307,25 +307,25 @@@
                                 ret = -EFAULT;
                 }
                 break;
+       case HFI1_CMD_TID_INVAL_READ:
+               ret = hfi1_user_exp_rcv_invalid(fp, &tinfo);
+               if (ret)
+                       break;
+               addr = (unsigned long)cmd.addr +
+                       offsetof(struct hfi1_tid_info, tidcnt);
+               if (copy_to_user((void __user *)addr, &tinfo.tidcnt,
+                                sizeof(tinfo.tidcnt)))
+                       ret = -EFAULT;
+               break;
         case HFI1_CMD_TID_FREE:
-               ret = exp_tid_free(fp, &tinfo);
+               ret = hfi1_user_exp_rcv_clear(fp, &tinfo);
+               if (ret)
+                       break;
+               addr = (unsigned long)cmd.addr +
+                       offsetof(struct hfi1_tid_info, tidcnt);
+               if (copy_to_user((void __user *)addr, &tinfo.tidcnt,
+                                sizeof(tinfo.tidcnt)))
+                       ret = -EFAULT;
                 break;
         case HFI1_CMD_RECV_CTRL:
                 ret = manage_rcvq(uctxt, fd->subctxt, (int)user_val);
@@@ -373,8 -386,10 +386,10 @@@
                                 break;
                         }
                         if (dd->flags & HFI1_FORCED_FREEZE) {
-                               /* Don't allow context reset if we are into
-                                * forced freeze */
+                               /*
+                                * Don't allow context reset if we are into
+                                * forced freeze
+                                */
                                 ret = -ENODEV;
                                 break;
                         }
@@@ -382,8 -397,9 +397,9 @@@
                         ret = sc_enable(sc);
                         hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB,
                                      uctxt->ctxt);
-               } else
+               } else {
                         ret = sc_restart(sc);
+               }
                 if (!ret)
                         sc_return_credits(sc);
                 break;
@@@ -393,7 -409,7 +409,7 @@@
         case HFI1_CMD_EP_ERASE_RANGE:
         case HFI1_CMD_EP_READ_RANGE:
         case HFI1_CMD_EP_WRITE_RANGE:
-               ret = handle_eprom_command(&cmd);
+               ret = handle_eprom_command(fp, &cmd);
                 break;
         }
   
@@@ -487,7 -503,8 +503,7 @@@ static int hfi1_file_mmap(struct file *
                  * Map only the amount allocated to the context, not the
                  * entire available context's PIO space.
                  */
- -              memlen = ALIGN(uctxt->sc->credits * PIO_BLOCK_SIZE,
- -                             PAGE_SIZE);
+ +              memlen = PAGE_ALIGN(uctxt->sc->credits * PIO_BLOCK_SIZE);
                 flags &= ~VM_MAYREAD;
                 flags |= VM_DONTCOPY | VM_DONTEXPAND;
                 vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
@@@ -637,7 -654,7 +653,7 @@@
                         goto done;
                 }
                 memaddr = (u64)cq->comps;
- -              memlen = ALIGN(sizeof(*cq->comps) * cq->nentries, PAGE_SIZE);
+ +              memlen = PAGE_ALIGN(sizeof(*cq->comps) * cq->nentries);
                 flags |= VM_IO | VM_DONTEXPAND;
                 vmf = 1;
                 break;
@@@ -732,6 -749,9 +748,9 @@@ static int hfi1_file_close(struct inod
         /* drain user sdma queue */
         hfi1_user_sdma_free_queues(fdata);
   
+       /* release the cpu */
+       hfi1_put_proc_affinity(dd, fdata->rec_cpu_num);
+ 
         /*
          * Clear any left over, unhandled events so the next process that
          * gets this context doesn't get confused.
@@@ -755,6 -775,7 +774,7 @@@
         hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS |
                      HFI1_RCVCTRL_TIDFLOW_DIS |
                      HFI1_RCVCTRL_INTRAVAIL_DIS |
+                    HFI1_RCVCTRL_TAILUPD_DIS |
                      HFI1_RCVCTRL_ONE_PKT_EGR_DIS |
                      HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
                      HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt->ctxt);
@@@ -777,14 -798,12 +797,12 @@@
         uctxt->pionowait = 0;
         uctxt->event_flags = 0;
   
-       hfi1_clear_tids(uctxt);
+       hfi1_user_exp_rcv_free(fdata);
         hfi1_clear_ctxt_pkey(dd, uctxt->ctxt);
   
-       if (uctxt->tid_pg_list)
-               unlock_exp_tids(uctxt);
- 
         hfi1_stats.sps_ctxts--;
-       dd->freectxts++;
+       if (++dd->freectxts == dd->num_user_contexts)
+               aspm_enable_all(dd);
         mutex_unlock(&hfi1_mutex);
         hfi1_free_ctxtdata(dd, uctxt);
   done:
@@@ -826,8 -845,16 +844,16 @@@ static int assign_ctxt(struct file *fp
   
         mutex_lock(&hfi1_mutex);
         /* First, lets check if we need to setup a shared context? */
-       if (uinfo->subctxt_cnt)
+       if (uinfo->subctxt_cnt) {
+               struct hfi1_filedata *fd = fp->private_data;
+ 
                 ret = find_shared_ctxt(fp, uinfo);
+               if (ret < 0)
+                       goto done_unlock;
+               if (ret)
+                       fd->rec_cpu_num = hfi1_get_proc_affinity(
+                               fd->uctxt->dd, fd->uctxt->numa_id);
+       }
   
         /*
          * We execute the following block if we couldn't find a
@@@ -837,6 -864,7 +863,7 @@@
                 i_minor = iminor(file_inode(fp)) - HFI1_USER_MINOR_BASE;
                 ret = get_user_context(fp, uinfo, i_minor - 1, alg);
         }
+ done_unlock:
         mutex_unlock(&hfi1_mutex);
   done:
         return ret;
@@@ -962,7 -990,7 +989,7 @@@ static int allocate_ctxt(struct file *f
         struct hfi1_filedata *fd = fp->private_data;
         struct hfi1_ctxtdata *uctxt;
         unsigned ctxt;
-       int ret;
+       int ret, numa;
   
         if (dd->flags & HFI1_FROZEN) {
                 /*
@@@ -982,17 -1010,26 +1009,26 @@@
         if (ctxt == dd->num_rcv_contexts)
                 return -EBUSY;
   
-       uctxt = hfi1_create_ctxtdata(dd->pport, ctxt);
+       fd->rec_cpu_num = hfi1_get_proc_affinity(dd, -1);
+       if (fd->rec_cpu_num != -1)
+               numa = cpu_to_node(fd->rec_cpu_num);
+       else
+               numa = numa_node_id();
+       uctxt = hfi1_create_ctxtdata(dd->pport, ctxt, numa);
         if (!uctxt) {
                 dd_dev_err(dd,
                            "Unable to allocate ctxtdata memory, failing open\n");
                 return -ENOMEM;
         }
+       hfi1_cdbg(PROC, "[%u:%u] pid %u assigned to CPU %d (NUMA %u)",
+                 uctxt->ctxt, fd->subctxt, current->pid, fd->rec_cpu_num,
+                 uctxt->numa_id);
+ 
         /*
          * Allocate and enable a PIO send context.
          */
         uctxt->sc = sc_alloc(dd, SC_USER, uctxt->rcvhdrqentsize,
-                            uctxt->numa_id);
+                            uctxt->dd->node);
         if (!uctxt->sc)
                 return -ENOMEM;
   
@@@ -1026,7 -1063,12 +1062,12 @@@
         INIT_LIST_HEAD(&uctxt->sdma_queues);
         spin_lock_init(&uctxt->sdma_qlock);
         hfi1_stats.sps_ctxts++;
-       dd->freectxts--;
+       /*
+        * Disable ASPM when there are open user/PSM contexts to avoid
+        * issues with ASPM L1 exit latency
+        */
+       if (dd->freectxts-- == dd->num_user_contexts)
+               aspm_disable_all(dd);
         fd->uctxt = uctxt;
   
         return 0;
@@@ -1035,22 -1077,19 +1076,19 @@@
   static int init_subctxts(struct hfi1_ctxtdata *uctxt,
                          const struct hfi1_user_info *uinfo)
   {
-       int ret = 0;
         unsigned num_subctxts;
   
         num_subctxts = uinfo->subctxt_cnt;
-       if (num_subctxts > HFI1_MAX_SHARED_CTXTS) {
-               ret = -EINVAL;
-               goto bail;
-       }
+       if (num_subctxts > HFI1_MAX_SHARED_CTXTS)
+               return -EINVAL;
   
         uctxt->subctxt_cnt = uinfo->subctxt_cnt;
         uctxt->subctxt_id = uinfo->subctxt_id;
         uctxt->active_slaves = 1;
         uctxt->redirect_seq_cnt = 1;
         set_bit(HFI1_CTXT_MASTER_UNINIT, &uctxt->event_flags);
- bail:
-       return ret;
+ 
+       return 0;
   }
   
   static int setup_subctxt(struct hfi1_ctxtdata *uctxt)
@@@ -1105,10 -1144,10 +1143,10 @@@ static int user_init(struct file *fp
          * has done it.
          */
         if (fd->subctxt) {
-               ret = wait_event_interruptible(uctxt->wait,
-                       !test_bit(HFI1_CTXT_MASTER_UNINIT,
-                       &uctxt->event_flags));
-               goto done;
+               ret = wait_event_interruptible(uctxt->wait, !test_bit(
+                                              HFI1_CTXT_MASTER_UNINIT,
+                                              &uctxt->event_flags));
+               goto expected;
         }
   
         /* initialize poll variables... */
@@@ -1146,8 -1185,16 +1184,16 @@@
                 rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB;
         if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_RHQ_FULL))
                 rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB;
+       /*
+        * The RcvCtxtCtrl.TailUpd bit has to be explicitly written.
+        * We can't rely on the correct value to be set from prior
+        * uses of the chip or ctxt. Therefore, add the rcvctrl op
+        * for both cases.
+        */
         if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL))
                 rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB;
+       else
+               rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_DIS;
         hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt->ctxt);
   
         /* Notify any waiting slaves */
@@@ -1155,8 -1202,18 +1201,18 @@@
                 clear_bit(HFI1_CTXT_MASTER_UNINIT, &uctxt->event_flags);
                 wake_up(&uctxt->wait);
         }
-       ret = 0;
   
+ expected:
+       /*
+        * Expected receive has to be setup for all processes (including
+        * shared contexts). However, it has to be done after the master
+        * context has been fully configured as it depends on the
+        * eager/expected split of the RcvArray entries.
+        * Setting it up here ensures that the subcontexts will be waiting
+        * (due to the above wait_event_interruptible() until the master
+        * is setup.
+        */
+       ret = hfi1_user_exp_rcv_init(fp);
   done:
         return ret;
   }
@@@ -1226,46 -1283,6 +1282,6 @@@ static int setup_ctxt(struct file *fp
                         if (ret)
                                 goto done;
                 }
-               /* Setup Expected Rcv memories */
-               uctxt->tid_pg_list = vzalloc(uctxt->expected_count *
-                                            sizeof(struct page **));
-               if (!uctxt->tid_pg_list) {
-                       ret = -ENOMEM;
-                       goto done;
-               }
-               uctxt->physshadow = vzalloc(uctxt->expected_count *
-                                           sizeof(*uctxt->physshadow));
-               if (!uctxt->physshadow) {
-                       ret = -ENOMEM;
-                       goto done;
-               }
-               /* allocate expected TID map and initialize the cursor */
-               atomic_set(&uctxt->tidcursor, 0);
-               uctxt->numtidgroups = uctxt->expected_count /
-                       dd->rcv_entries.group_size;
-               uctxt->tidmapcnt = uctxt->numtidgroups / BITS_PER_LONG +
-                       !!(uctxt->numtidgroups % BITS_PER_LONG);
-               uctxt->tidusemap = kzalloc_node(uctxt->tidmapcnt *
-                                               sizeof(*uctxt->tidusemap),
-                                               GFP_KERNEL, uctxt->numa_id);
-               if (!uctxt->tidusemap) {
-                       ret = -ENOMEM;
-                       goto done;
-               }
-               /*
-                * In case that the number of groups is not a multiple of
-                * 64 (the number of groups in a tidusemap element), mark
-                * the extra ones as used. This will effectively make them
-                * permanently used and should never be assigned. Otherwise,
-                * the code which checks how many free groups we have will
-                * get completely confused about the state of the bits.
-                */
-               if (uctxt->numtidgroups % BITS_PER_LONG)
-                       uctxt->tidusemap[uctxt->tidmapcnt - 1] =
-                               ~((1ULL << (uctxt->numtidgroups %
-                                           BITS_PER_LONG)) - 1);
-               trace_hfi1_exp_tid_map(uctxt->ctxt, fd->subctxt, 0,
-                                      uctxt->tidusemap, uctxt->tidmapcnt);
         }
         ret = hfi1_user_sdma_alloc_queues(uctxt, fp);
         if (ret)
@@@ -1391,8 -1408,9 +1407,9 @@@ static unsigned int poll_next(struct fi
                 set_bit(HFI1_CTXT_WAITING_RCV, &uctxt->event_flags);
                 hfi1_rcvctrl(dd, HFI1_RCVCTRL_INTRAVAIL_ENB, uctxt->ctxt);
                 pollflag = 0;
-       } else
+       } else {
                 pollflag = POLLIN | POLLRDNORM;
+       }
         spin_unlock_irq(&dd->uctxt_lock);
   
         return pollflag;
@@@ -1470,8 -1488,9 +1487,9 @@@ static int manage_rcvq(struct hfi1_ctxt
                 if (uctxt->rcvhdrtail_kvaddr)
                         clear_rcvhdrtail(uctxt);
                 rcvctrl_op = HFI1_RCVCTRL_CTXT_ENB;
-       } else
+       } else {
                 rcvctrl_op = HFI1_RCVCTRL_CTXT_DIS;
+       }
         hfi1_rcvctrl(dd, rcvctrl_op, uctxt->ctxt);
         /* always; new head should be equal to new tail; see above */
   bail:
@@@ -1504,367 -1523,6 +1522,6 @@@ static int user_event_ack(struct hfi1_c
         return 0;
   }
   
- #define num_user_pages(vaddr, len)                                    \
-       (1 + (((((unsigned long)(vaddr) +                               \
-                (unsigned long)(len) - 1) & PAGE_MASK) -               \
-              ((unsigned long)vaddr & PAGE_MASK)) >> PAGE_SHIFT))
- 
- /**
-  * tzcnt - count the number of trailing zeros in a 64bit value
-  * @value: the value to be examined
-  *
-  * Returns the number of trailing least significant zeros in the
-  * the input value. If the value is zero, return the number of
-  * bits of the value.
-  */
- static inline u8 tzcnt(u64 value)
- {
-       return value ? __builtin_ctzl(value) : sizeof(value) * 8;
- }
- 
- static inline unsigned num_free_groups(unsigned long map, u16 *start)
- {
-       unsigned free;
-       u16 bitidx = *start;
- 
-       if (bitidx >= BITS_PER_LONG)
-               return 0;
-       /* "Turn off" any bits set before our bit index */
-       map &= ~((1ULL << bitidx) - 1);
-       free = tzcnt(map) - bitidx;
-       while (!free && bitidx < BITS_PER_LONG) {
-               /* Zero out the last set bit so we look at the rest */
-               map &= ~(1ULL << bitidx);
-               /*
-                * Account for the previously checked bits and advance
-                * the bit index. We don't have to check for bitidx
-                * getting bigger than BITS_PER_LONG here as it would
-                * mean extra instructions that we don't need. If it
-                * did happen, it would push free to a negative value
-                * which will break the loop.
-                */
-               free = tzcnt(map) - ++bitidx;
-       }
-       *start = bitidx;
-       return free;
- }
- 
- static int exp_tid_setup(struct file *fp, struct hfi1_tid_info *tinfo)
- {
-       int ret = 0;
-       struct hfi1_filedata *fd = fp->private_data;
-       struct hfi1_ctxtdata *uctxt = fd->uctxt;
-       struct hfi1_devdata *dd = uctxt->dd;
-       unsigned tid, mapped = 0, npages, ngroups, exp_groups,
-               tidpairs = uctxt->expected_count / 2;
-       struct page **pages;
-       unsigned long vaddr, tidmap[uctxt->tidmapcnt];
-       dma_addr_t *phys;
-       u32 tidlist[tidpairs], pairidx = 0, tidcursor;
-       u16 useidx, idx, bitidx, tidcnt = 0;
- 
-       vaddr = tinfo->vaddr;
- 
-       if (offset_in_page(vaddr)) {
-               ret = -EINVAL;
-               goto bail;
-       }
- 
-       npages = num_user_pages(vaddr, tinfo->length);
-       if (!npages) {
-               ret = -EINVAL;
-               goto bail;
-       }
-       if (!access_ok(VERIFY_WRITE, (void __user *)vaddr,
-                      npages * PAGE_SIZE)) {
-               dd_dev_err(dd, "Fail vaddr %p, %u pages, !access_ok\n",
-                          (void *)vaddr, npages);
-               ret = -EFAULT;
-               goto bail;
-       }
- 
-       memset(tidmap, 0, sizeof(tidmap[0]) * uctxt->tidmapcnt);
-       memset(tidlist, 0, sizeof(tidlist[0]) * tidpairs);
- 
-       exp_groups = uctxt->expected_count / dd->rcv_entries.group_size;
-       /* which group set do we look at first? */
-       tidcursor = atomic_read(&uctxt->tidcursor);
-       useidx = (tidcursor >> 16) & 0xffff;
-       bitidx = tidcursor & 0xffff;
- 
-       /*
-        * Keep going until we've mapped all pages or we've exhausted all
-        * RcvArray entries.
-        * This iterates over the number of tidmaps + 1
-        * (idx <= uctxt->tidmapcnt) so we check the bitmap which we
-        * started from one more time for any free bits before the
-        * starting point bit.
-        */
-       for (mapped = 0, idx = 0;
-            mapped < npages && idx <= uctxt->tidmapcnt;) {
-               u64 i, offset = 0;
-               unsigned free, pinned, pmapped = 0, bits_used;
-               u16 grp;
- 
-               /*
-                * "Reserve" the needed group bits under lock so other
-                * processes can't step in the middle of it. Once
-                * reserved, we don't need the lock anymore since we
-                * are guaranteed the groups.
-                */
-               spin_lock(&uctxt->exp_lock);
-               if (uctxt->tidusemap[useidx] == -1ULL ||
-                   bitidx >= BITS_PER_LONG) {
-                       /* no free groups in the set, use the next */
-                       useidx = (useidx + 1) % uctxt->tidmapcnt;
-                       idx++;
-                       bitidx = 0;
-                       spin_unlock(&uctxt->exp_lock);
-                       continue;
-               }
-               ngroups = ((npages - mapped) / dd->rcv_entries.group_size) +
-                       !!((npages - mapped) % dd->rcv_entries.group_size);
- 
-               /*
-                * If we've gotten here, the current set of groups does have
-                * one or more free groups.
-                */
-               free = num_free_groups(uctxt->tidusemap[useidx], &bitidx);
-               if (!free) {
-                       /*
-                        * Despite the check above, free could still come back
-                        * as 0 because we don't check the entire bitmap but
-                        * we start from bitidx.
-                        */
-                       spin_unlock(&uctxt->exp_lock);
-                       continue;
-               }
-               bits_used = min(free, ngroups);
-               tidmap[useidx] |= ((1ULL << bits_used) - 1) << bitidx;
-               uctxt->tidusemap[useidx] |= tidmap[useidx];
-               spin_unlock(&uctxt->exp_lock);
- 
-               /*
-                * At this point, we know where in the map we have free bits.
-                * properly offset into the various "shadow" arrays and compute
-                * the RcvArray entry index.
-                */
-               offset = ((useidx * BITS_PER_LONG) + bitidx) *
-                       dd->rcv_entries.group_size;
-               pages = uctxt->tid_pg_list + offset;
-               phys = uctxt->physshadow + offset;
-               tid = uctxt->expected_base + offset;
- 
-               /* Calculate how many pages we can pin based on free bits */
-               pinned = min((bits_used * dd->rcv_entries.group_size),
-                            (npages - mapped));
-               /*
-                * Now that we know how many free RcvArray entries we have,
-                * we can pin that many user pages.
-                */
-               ret = hfi1_acquire_user_pages(vaddr + (mapped * PAGE_SIZE),
-                                             pinned, true, pages);
-               if (ret) {
-                       /*
-                        * We can't continue because the pages array won't be
-                        * initialized. This should never happen,
-                        * unless perhaps the user has mpin'ed the pages
-                        * themselves.
-                        */
-                       dd_dev_info(dd,
-                                   "Failed to lock addr %p, %u pages: errno %d\n",
-                                   (void *) vaddr, pinned, -ret);
-                       /*
-                        * Let go of the bits that we reserved since we are not
-                        * going to use them.
-                        */
-                       spin_lock(&uctxt->exp_lock);
-                       uctxt->tidusemap[useidx] &=
-                               ~(((1ULL << bits_used) - 1) << bitidx);
-                       spin_unlock(&uctxt->exp_lock);
-                       goto done;
-               }
-               /*
-                * How many groups do we need based on how many pages we have
-                * pinned?
-                */
-               ngroups = (pinned / dd->rcv_entries.group_size) +
-                       !!(pinned % dd->rcv_entries.group_size);
-               /*
-                * Keep programming RcvArray entries for all the <ngroups> free
-                * groups.
-                */
-               for (i = 0, grp = 0; grp < ngroups; i++, grp++) {
-                       unsigned j;
-                       u32 pair_size = 0, tidsize;
-                       /*
-                        * This inner loop will program an entire group or the
-                        * array of pinned pages (which ever limit is hit
-                        * first).
-                        */
-                       for (j = 0; j < dd->rcv_entries.group_size &&
-                                    pmapped < pinned; j++, pmapped++, tid++) {
-                               tidsize = PAGE_SIZE;
-                               phys[pmapped] = hfi1_map_page(dd->pcidev,
-                                                  pages[pmapped], 0,
-                                                  tidsize, PCI_DMA_FROMDEVICE);
-                               trace_hfi1_exp_rcv_set(uctxt->ctxt,
-                                                      fd->subctxt,
-                                                      tid, vaddr,
-                                                      phys[pmapped],
-                                                      pages[pmapped]);
-                               /*
-                                * Each RcvArray entry is programmed with one
-                                * page * worth of memory. This will handle
-                                * the 8K MTU as well as anything smaller
-                                * due to the fact that both entries in the
-                                * RcvTidPair are programmed with a page.
-                                * PSM currently does not handle anything
-                                * bigger than 8K MTU, so should we even worry
-                                * about 10K here?
-                                */
-                               hfi1_put_tid(dd, tid, PT_EXPECTED,
-                                            phys[pmapped],
-                                            ilog2(tidsize >> PAGE_SHIFT) + 1);
-                               pair_size += tidsize >> PAGE_SHIFT;
-                               EXP_TID_RESET(tidlist[pairidx], LEN, pair_size);
-                               if (!(tid % 2)) {
-                                       tidlist[pairidx] |=
-                                          EXP_TID_SET(IDX,
-                                               (tid - uctxt->expected_base)
-                                                      / 2);
-                                       tidlist[pairidx] |=
-                                               EXP_TID_SET(CTRL, 1);
-                                       tidcnt++;
-                               } else {
-                                       tidlist[pairidx] |=
-                                               EXP_TID_SET(CTRL, 2);
-                                       pair_size = 0;
-                                       pairidx++;
-                               }
-                       }
-                       /*
-                        * We've programmed the entire group (or as much of the
-                        * group as we'll use. Now, it's time to push it out...
-                        */
-                       flush_wc();
-               }
-               mapped += pinned;
-               atomic_set(&uctxt->tidcursor,
-                          (((useidx & 0xffffff) << 16) |
-                           ((bitidx + bits_used) & 0xffffff)));
-       }
-       trace_hfi1_exp_tid_map(uctxt->ctxt, fd->subctxt, 0, uctxt->tidusemap,
-                              uctxt->tidmapcnt);
- 
- done:
-       /* If we've mapped anything, copy relevant info to user */
-       if (mapped) {
-               if (copy_to_user((void __user *)(unsigned long)tinfo->tidlist,
-                                tidlist, sizeof(tidlist[0]) * tidcnt)) {
-                       ret = -EFAULT;
-                       goto done;
-               }
-               /* copy TID info to user */
-               if (copy_to_user((void __user *)(unsigned long)tinfo->tidmap,
-                                tidmap, sizeof(tidmap[0]) * uctxt->tidmapcnt))
-                       ret = -EFAULT;
-       }
- bail:
-       /*
-        * Calculate mapped length. New Exp TID protocol does not "unwind" and
-        * report an error if it can't map the entire buffer. It just reports
-        * the length that was mapped.
-        */
-       tinfo->length = mapped * PAGE_SIZE;
-       tinfo->tidcnt = tidcnt;
-       return ret;
- }
- 
- static int exp_tid_free(struct file *fp, struct hfi1_tid_info *tinfo)
- {
-       struct hfi1_filedata *fd = fp->private_data;
-       struct hfi1_ctxtdata *uctxt = fd->uctxt;
-       struct hfi1_devdata *dd = uctxt->dd;
-       unsigned long tidmap[uctxt->tidmapcnt];
-       struct page **pages;
-       dma_addr_t *phys;
-       u16 idx, bitidx, tid;
-       int ret = 0;
- 
-       if (copy_from_user(&tidmap, (void __user *)(unsigned long)
-                          tinfo->tidmap,
-                          sizeof(tidmap[0]) * uctxt->tidmapcnt)) {
-               ret = -EFAULT;
-               goto done;
-       }
-       for (idx = 0; idx < uctxt->tidmapcnt; idx++) {
-               unsigned long map;
- 
-               bitidx = 0;
-               if (!tidmap[idx])
-                       continue;
-               map = tidmap[idx];
-               while ((bitidx = tzcnt(map)) < BITS_PER_LONG) {
-                       int i, pcount = 0;
-                       struct page *pshadow[dd->rcv_entries.group_size];
-                       unsigned offset = ((idx * BITS_PER_LONG) + bitidx) *
-                               dd->rcv_entries.group_size;
- 
-                       pages = uctxt->tid_pg_list + offset;
-                       phys = uctxt->physshadow + offset;
-                       tid = uctxt->expected_base + offset;
-                       for (i = 0; i < dd->rcv_entries.group_size;
-                            i++, tid++) {
-                               if (pages[i]) {
-                                       hfi1_put_tid(dd, tid, PT_INVALID,
-                                                     0, 0);
-                                       trace_hfi1_exp_rcv_free(uctxt->ctxt,
-                                                               fd->subctxt,
-                                                               tid, phys[i],
-                                                               pages[i]);
-                                       pci_unmap_page(dd->pcidev, phys[i],
-                                             PAGE_SIZE, PCI_DMA_FROMDEVICE);
-                                       pshadow[pcount] = pages[i];
-                                       pages[i] = NULL;
-                                       pcount++;
-                                       phys[i] = 0;
-                               }
-                       }
-                       flush_wc();
-                       hfi1_release_user_pages(pshadow, pcount, true);
-                       clear_bit(bitidx, &uctxt->tidusemap[idx]);
-                       map &= ~(1ULL<<bitidx);
-               }
-       }
-       trace_hfi1_exp_tid_map(uctxt->ctxt, fd->subctxt, 1, uctxt->tidusemap,
-                              uctxt->tidmapcnt);
- done:
-       return ret;
- }
- 
- static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt)
- {
-       struct hfi1_devdata *dd = uctxt->dd;
-       unsigned tid;
- 
-       dd_dev_info(dd, "ctxt %u unlocking any locked expTID pages\n",
-                   uctxt->ctxt);
-       for (tid = 0; tid < uctxt->expected_count; tid++) {
-               struct page *p = uctxt->tid_pg_list[tid];
-               dma_addr_t phys;
- 
-               if (!p)
-                       continue;
- 
-               phys = uctxt->physshadow[tid];
-               uctxt->physshadow[tid] = 0;
-               uctxt->tid_pg_list[tid] = NULL;
-               pci_unmap_page(dd->pcidev, phys, PAGE_SIZE, PCI_DMA_FROMDEVICE);
-               hfi1_release_user_pages(&p, 1, true);
-       }
- }
- 
   static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, unsigned subctxt,
                          u16 pkey)
   {
@@@ -1933,10 -1591,9 +1590,9 @@@ static loff_t ui_lseek(struct file *fil
         return filp->f_pos;
   }
   
- 
   /* NOTE: assumes unsigned long is 8 bytes */
   static ssize_t ui_read(struct file *filp, char __user *buf, size_t count,
-                       loff_t *f_pos)
+                      loff_t *f_pos)
   {
         struct hfi1_devdata *dd = filp->private_data;
         void __iomem *base = dd->kregbase;
@@@ -1972,12 -1629,12 +1628,12 @@@
                  * them.  These registers are defined as having a read value
                  * of 0.
                  */
-               else if (csr_off == ASIC_GPIO_CLEAR
-                               || csr_off == ASIC_GPIO_FORCE
-                               || csr_off == ASIC_QSFP1_CLEAR
-                               || csr_off == ASIC_QSFP1_FORCE
-                               || csr_off == ASIC_QSFP2_CLEAR
-                               || csr_off == ASIC_QSFP2_FORCE)
+               else if (csr_off == ASIC_GPIO_CLEAR ||
+                        csr_off == ASIC_GPIO_FORCE ||
+                        csr_off == ASIC_QSFP1_CLEAR ||
+                        csr_off == ASIC_QSFP1_FORCE ||
+                        csr_off == ASIC_QSFP2_CLEAR ||
+                        csr_off == ASIC_QSFP2_FORCE)
                         data = 0;
                 else if (csr_off >= barlen) {
                         /*
diff --combined drivers/staging/rdma/hfi1/init.c

index 02df291eb172c23b44b9a44adc16421107326ff8,deabb0812023e8c899fc12368228c6fdc39f27bc..cfcdc16b41c371a18a4e1cec24b51c4c421b7e80
--- 1/drivers/staging/rdma/hfi1/init.c
--- 2/drivers/staging/rdma/hfi1/init.c
+++ b/drivers/staging/rdma/hfi1/init.c
@@@ -1,12 -1,11 +1,11 @@@
   /*
+  * Copyright(c) 2015, 2016 Intel Corporation.
    *
    * This file is provided under a dual BSD/GPLv2 license.  When using or
    * redistributing this file, you may do so under either license.
    *
    * GPL LICENSE SUMMARY
    *
-  * Copyright(c) 2015 Intel Corporation.
-  *
    * This program is free software; you can redistribute it and/or modify
    * it under the terms of version 2 of the GNU General Public License as
    * published by the Free Software Foundation.
@@@ -18,8 -17,6 +17,6 @@@
    *
    * BSD LICENSE
    *
-  * Copyright(c) 2015 Intel Corporation.
-  *
    * Redistribution and use in source and binary forms, with or without
    * modification, are permitted provided that the following conditions
    * are met:
@@@ -56,6 -53,7 +53,7 @@@
   #include <linux/module.h>
   #include <linux/printk.h>
   #include <linux/hrtimer.h>
+ #include <rdma/rdma_vt.h>
   
   #include "hfi.h"
   #include "device.h"
@@@ -65,6 -63,7 +63,7 @@@
   #include "sdma.h"
   #include "debugfs.h"
   #include "verbs.h"
+ #include "aspm.h"
   
   #undef pr_fmt
   #define pr_fmt(fmt) DRIVER_NAME ": " fmt
@@@ -75,6 -74,7 +74,7 @@@
   #define HFI1_MIN_USER_CTXT_BUFCNT 7
   
   #define HFI1_MIN_HDRQ_EGRBUF_CNT 2
+ #define HFI1_MAX_HDRQ_EGRBUF_CNT 16352
   #define HFI1_MIN_EAGER_BUFFER_SIZE (4 * 1024) /* 4KB */
   #define HFI1_MAX_EAGER_BUFFER_SIZE (256 * 1024) /* 256KB */
   
@@@ -87,9 -87,9 +87,9 @@@ module_param_named(num_user_contexts, n
   MODULE_PARM_DESC(
         num_user_contexts, "Set max number of user contexts to use");
   
- u8 krcvqs[RXE_NUM_DATA_VL];
+ uint krcvqs[RXE_NUM_DATA_VL];
   int krcvqsset;
- module_param_array(krcvqs, byte, &krcvqsset, S_IRUGO);
+ module_param_array(krcvqs, uint, &krcvqsset, S_IRUGO);
   MODULE_PARM_DESC(krcvqs, "Array of the number of non-control kernel receive queues by VL");
   
   /* computed based on above array */
@@@ -128,16 -128,12 +128,12 @@@ int hfi1_create_ctxts(struct hfi1_devda
   {
         unsigned i;
         int ret;
-       int local_node_id = pcibus_to_node(dd->pcidev->bus);
   
         /* Control context has to be always 0 */
         BUILD_BUG_ON(HFI1_CTRL_CTXT != 0);
   
-       if (local_node_id < 0)
-               local_node_id = numa_node_id();
-       dd->assigned_node_id = local_node_id;
- 
-       dd->rcd = kcalloc(dd->num_rcv_contexts, sizeof(*dd->rcd), GFP_KERNEL);
+       dd->rcd = kzalloc_node(dd->num_rcv_contexts * sizeof(*dd->rcd),
+                              GFP_KERNEL, dd->node);
         if (!dd->rcd)
                 goto nomem;
   
@@@ -147,10 -143,10 +143,10 @@@
                 struct hfi1_ctxtdata *rcd;
   
                 ppd = dd->pport + (i % dd->num_pports);
-               rcd = hfi1_create_ctxtdata(ppd, i);
+               rcd = hfi1_create_ctxtdata(ppd, i, dd->node);
                 if (!rcd) {
                         dd_dev_err(dd,
-                               "Unable to allocate kernel receive context, failing\n");
+                                  "Unable to allocate kernel receive context, failing\n");
                         goto nomem;
                 }
                 /*
@@@ -171,7 -167,7 +167,7 @@@
                 rcd->sc = sc_alloc(dd, SC_ACK, rcd->rcvhdrqentsize, dd->node);
                 if (!rcd->sc) {
                         dd_dev_err(dd,
-                               "Unable to allocate kernel send context, failing\n");
+                                  "Unable to allocate kernel send context, failing\n");
                         dd->rcd[rcd->ctxt] = NULL;
                         hfi1_free_ctxtdata(dd, rcd);
                         goto nomem;
@@@ -189,6 -185,12 +185,12 @@@
                 }
         }
   
+       /*
+        * Initialize aspm, to be done after gen3 transition and setting up
+        * contexts and before enabling interrupts
+        */
+       aspm_init(dd);
+ 
         return 0;
   nomem:
         ret = -ENOMEM;
@@@ -201,7 -203,8 +203,8 @@@ bail
   /*
    * Common code for user and kernel context setup.
    */
- struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt)
+ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
+                                          int numa)
   {
         struct hfi1_devdata *dd = ppd->dd;
         struct hfi1_ctxtdata *rcd;
@@@ -224,10 -227,10 +227,10 @@@
                 rcd->cnt = 1;
                 rcd->ctxt = ctxt;
                 dd->rcd[ctxt] = rcd;
-               rcd->numa_id = numa_node_id();
+               rcd->numa_id = numa;
                 rcd->rcv_array_groups = dd->rcv_entries.ngroups;
   
-               spin_lock_init(&rcd->exp_lock);
+               mutex_init(&rcd->exp_lock);
   
                 /*
                  * Calculate the context's RcvArray entry starting point.
@@@ -260,7 -263,7 +263,7 @@@
                 /* Validate and initialize Rcv Hdr Q variables */
                 if (rcvhdrcnt % HDRQ_INCREMENT) {
                         dd_dev_err(dd,
-                                  "ctxt%u: header queue count %d must be divisible by %d\n",
+                                  "ctxt%u: header queue count %d must be divisible by %lu\n",
                                    rcd->ctxt, rcvhdrcnt, HDRQ_INCREMENT);
                         goto bail;
                 }
@@@ -332,6 -335,7 +335,6 @@@
         }
         return rcd;
   bail:
- -      kfree(rcd->opstats);
         kfree(rcd->egrbufs.rcvtids);
         kfree(rcd->egrbufs.buffers);
         kfree(rcd);
@@@ -379,7 -383,7 +382,7 @@@ void set_link_ipg(struct hfi1_pportdat
   
         cc_state = get_cc_state(ppd);
   
-       if (cc_state == NULL)
+       if (!cc_state)
                 /*
                  * This should _never_ happen - rcu_read_lock() is held,
                  * and set_link_ipg() should not be called if cc_state
@@@ -431,7 -435,7 +434,7 @@@ static enum hrtimer_restart cca_timer_f
   
         cc_state = get_cc_state(ppd);
   
-       if (cc_state == NULL) {
+       if (!cc_state) {
                 rcu_read_unlock();
                 return HRTIMER_NORESTART;
         }
@@@ -493,14 -497,19 +496,19 @@@ void hfi1_init_pportdata(struct pci_de
         INIT_WORK(&ppd->link_vc_work, handle_verify_cap);
         INIT_WORK(&ppd->link_up_work, handle_link_up);
         INIT_WORK(&ppd->link_down_work, handle_link_down);
+       INIT_WORK(&ppd->dc_host_req_work, handle_8051_request);
         INIT_WORK(&ppd->freeze_work, handle_freeze);
         INIT_WORK(&ppd->link_downgrade_work, handle_link_downgrade);
         INIT_WORK(&ppd->sma_message_work, handle_sma_message);
         INIT_WORK(&ppd->link_bounce_work, handle_link_bounce);
+       INIT_WORK(&ppd->linkstate_active_work, receive_interrupt_work);
+       INIT_WORK(&ppd->qsfp_info.qsfp_work, qsfp_event);
+ 
         mutex_init(&ppd->hls_lock);
         spin_lock_init(&ppd->sdma_alllock);
         spin_lock_init(&ppd->qsfp_info.qsfp_lock);
   
+       ppd->qsfp_info.ppd = ppd;
         ppd->sm_trap_qp = 0x0;
         ppd->sa_qp = 0x1;
   
@@@ -582,8 -591,8 +590,8 @@@ static void enable_chip(struct hfi1_dev
          * Enable kernel ctxts' receive and receive interrupt.
          * Other ctxts done as user opens and initializes them.
          */
-       rcvmask = HFI1_RCVCTRL_CTXT_ENB | HFI1_RCVCTRL_INTRAVAIL_ENB;
         for (i = 0; i < dd->first_user_ctxt; ++i) {
+               rcvmask = HFI1_RCVCTRL_CTXT_ENB | HFI1_RCVCTRL_INTRAVAIL_ENB;
                 rcvmask |= HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, DMA_RTAIL) ?
                         HFI1_RCVCTRL_TAILUPD_ENB : HFI1_RCVCTRL_TAILUPD_DIS;
                 if (!HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, MULTI_PKT_EGR))
@@@ -729,14 -738,14 +737,14 @@@ int hfi1_init(struct hfi1_devdata *dd, 
                         lastfail = hfi1_setup_eagerbufs(rcd);
                 if (lastfail)
                         dd_dev_err(dd,
-                               "failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n");
+                                  "failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n");
         }
         if (lastfail)
                 ret = lastfail;
   
         /* Allocate enough memory for user event notification. */
- -      len = ALIGN(dd->chip_rcv_contexts * HFI1_MAX_SHARED_CTXTS *
- -                  sizeof(*dd->events), PAGE_SIZE);
+ +      len = PAGE_ALIGN(dd->chip_rcv_contexts * HFI1_MAX_SHARED_CTXTS *
+ +                       sizeof(*dd->events));
         dd->events = vmalloc_user(len);
         if (!dd->events)
                 dd_dev_err(dd, "Failed to allocate user events page\n");
@@@ -762,7 -771,6 +770,6 @@@
         /* enable chip even if we have an error, so we can debug cause */
         enable_chip(dd);
   
-       ret = hfi1_cq_init(dd);
   done:
         /*
          * Set status even if port serdes is not initialized
@@@ -779,20 -787,15 +786,15 @@@
                 for (pidx = 0; pidx < dd->num_pports; ++pidx) {
                         ppd = dd->pport + pidx;
   
-                       /* initialize the qsfp if it exists
-                        * Requires interrupts to be enabled so we are notified
-                        * when the QSFP completes reset, and has
-                        * to be done before bringing up the SERDES
+                       /*
+                        * start the serdes - must be after interrupts are
+                        * enabled so we are notified when the link goes up
                          */
-                       init_qsfp(ppd);
- 
-                       /* start the serdes - must be after interrupts are
-                          enabled so we are notified when the link goes up */
                         lastfail = bringup_serdes(ppd);
                         if (lastfail)
                                 dd_dev_info(dd,
-                                       "Failed to bring up port %u\n",
-                                       ppd->port);
+                                           "Failed to bring up port %u\n",
+                                           ppd->port);
   
                         /*
                          * Set status even if port serdes is not initialized
@@@ -904,6 -907,8 +906,8 @@@ static void shutdown_device(struct hfi1
                 /* disable the send device */
                 pio_send_control(dd, PSC_GLOBAL_DISABLE);
   
+               shutdown_led_override(ppd);
+ 
                 /*
                  * Clear SerdesEnable.
                  * We can't count on interrupts since we are stopping.
@@@ -961,17 -966,33 +965,33 @@@ void hfi1_free_ctxtdata(struct hfi1_dev
         kfree(rcd->egrbufs.buffers);
   
         sc_free(rcd->sc);
-       vfree(rcd->physshadow);
-       vfree(rcd->tid_pg_list);
         vfree(rcd->user_event_mask);
         vfree(rcd->subctxt_uregbase);
         vfree(rcd->subctxt_rcvegrbuf);
         vfree(rcd->subctxt_rcvhdr_base);
-       kfree(rcd->tidusemap);
         kfree(rcd->opstats);
         kfree(rcd);
   }
   
+ /*
+  * Release our hold on the shared asic data.  If we are the last one,
+  * free the structure.  Must be holding hfi1_devs_lock.
+  */
+ static void release_asic_data(struct hfi1_devdata *dd)
+ {
+       int other;
+ 
+       if (!dd->asic_data)
+               return;
+       dd->asic_data->dds[dd->hfi1_id] = NULL;
+       other = dd->hfi1_id ? 0 : 1;
+       if (!dd->asic_data->dds[other]) {
+               /* we are the last holder, free it */
+               kfree(dd->asic_data);
+       }
+       dd->asic_data = NULL;
+ }
+ 
   void hfi1_free_devdata(struct hfi1_devdata *dd)
   {
         unsigned long flags;
@@@ -979,12 -1000,15 +999,15 @@@
         spin_lock_irqsave(&hfi1_devs_lock, flags);
         idr_remove(&hfi1_unit_table, dd->unit);
         list_del(&dd->list);
+       release_asic_data(dd);
         spin_unlock_irqrestore(&hfi1_devs_lock, flags);
-       hfi1_dbg_ibdev_exit(&dd->verbs_dev);
+       free_platform_config(dd);
         rcu_barrier(); /* wait for rcu callbacks to complete */
         free_percpu(dd->int_counter);
         free_percpu(dd->rcv_limit);
-       ib_dealloc_device(&dd->verbs_dev.ibdev);
+       hfi1_dev_affinity_free(dd);
+       free_percpu(dd->send_schedule);
+       ib_dealloc_device(&dd->verbs_dev.rdi.ibdev);
   }
   
   /*
@@@ -999,19 -1023,19 +1022,19 @@@ struct hfi1_devdata *hfi1_alloc_devdata
   {
         unsigned long flags;
         struct hfi1_devdata *dd;
-       int ret;
+       int ret, nports;
   
-       dd = (struct hfi1_devdata *)ib_alloc_device(sizeof(*dd) + extra);
+       /* extra is * number of ports */
+       nports = extra / sizeof(struct hfi1_pportdata);
+ 
+       dd = (struct hfi1_devdata *)rvt_alloc_device(sizeof(*dd) + extra,
+                                                    nports);
         if (!dd)
                 return ERR_PTR(-ENOMEM);
-       /* extra is * number of ports */
-       dd->num_pports = extra / sizeof(struct hfi1_pportdata);
+       dd->num_pports = nports;
         dd->pport = (struct hfi1_pportdata *)(dd + 1);
   
         INIT_LIST_HEAD(&dd->list);
-       dd->node = dev_to_node(&pdev->dev);
-       if (dd->node < 0)
-               dd->node = 0;
         idr_preload(GFP_KERNEL);
         spin_lock_irqsave(&hfi1_devs_lock, flags);
   
@@@ -1041,9 -1065,9 +1064,9 @@@
         spin_lock_init(&dd->sc_init_lock);
         spin_lock_init(&dd->dc8051_lock);
         spin_lock_init(&dd->dc8051_memlock);
-       mutex_init(&dd->qsfp_i2c_mutex);
         seqlock_init(&dd->sc2vl_lock);
         spin_lock_init(&dd->sde_map_lock);
+       spin_lock_init(&dd->pio_map_lock);
         init_waitqueue_head(&dd->event_queue);
   
         dd->int_counter = alloc_percpu(u64);
@@@ -1062,6 -1086,14 +1085,14 @@@
                 goto bail;
         }
   
+       dd->send_schedule = alloc_percpu(u64);
+       if (!dd->send_schedule) {
+               ret = -ENOMEM;
+               hfi1_early_err(&pdev->dev,
+                              "Could not allocate per-cpu int_counter\n");
+               goto bail;
+       }
+ 
         if (!hfi1_cpulist_count) {
                 u32 count = num_online_cpus();
   
@@@ -1074,13 -1106,12 +1105,12 @@@
                         &pdev->dev,
                         "Could not alloc cpulist info, cpu affinity might be wrong\n");
         }
-       hfi1_dbg_ibdev_init(&dd->verbs_dev);
         return dd;
   
   bail:
         if (!list_empty(&dd->list))
                 list_del_init(&dd->list);
-       ib_dealloc_device(&dd->verbs_dev.ibdev);
+       ib_dealloc_device(&dd->verbs_dev.rdi.ibdev);
         return ERR_PTR(ret);
   }
   
@@@ -1173,8 -1204,10 +1203,10 @@@ static int __init hfi1_mod_init(void
                 user_credit_return_threshold = 100;
   
         compute_krcvqs();
-       /* sanitize receive interrupt count, time must wait until after
-          the hardware type is known */
+       /*
+        * sanitize receive interrupt count, time must wait until after
+        * the hardware type is known
+        */
         if (rcv_intr_count > RCV_HDR_HEAD_COUNTER_MASK)
                 rcv_intr_count = RCV_HDR_HEAD_COUNTER_MASK;
         /* reject invalid combinations */
@@@ -1209,6 -1242,9 +1241,9 @@@
         idr_init(&hfi1_unit_table);
   
         hfi1_dbg_init();
+       ret = hfi1_wss_init();
+       if (ret < 0)
+               goto bail_wss;
         ret = pci_register_driver(&hfi1_pci_driver);
         if (ret < 0) {
                 pr_err("Unable to register driver: error %d\n", -ret);
@@@ -1217,6 -1253,8 +1252,8 @@@
         goto bail; /* all OK */
   
   bail_dev:
+       hfi1_wss_exit();
+ bail_wss:
         hfi1_dbg_exit();
         idr_destroy(&hfi1_unit_table);
         dev_cleanup();
@@@ -1232,6 -1270,7 +1269,7 @@@ module_init(hfi1_mod_init)
   static void __exit hfi1_mod_cleanup(void)
   {
         pci_unregister_driver(&hfi1_pci_driver);
+       hfi1_wss_exit();
         hfi1_dbg_exit();
         hfi1_cpulist_count = 0;
         kfree(hfi1_cpulist);
@@@ -1303,16 -1342,18 +1341,18 @@@ static void cleanup_device_data(struct 
                 }
         }
         kfree(tmp);
+       free_pio_map(dd);
         /* must follow rcv context free - need to remove rcv's hooks */
         for (ctxt = 0; ctxt < dd->num_send_contexts; ctxt++)
                 sc_free(dd->send_contexts[ctxt].sc);
         dd->num_send_contexts = 0;
         kfree(dd->send_contexts);
         dd->send_contexts = NULL;
+       kfree(dd->hw_to_sw);
+       dd->hw_to_sw = NULL;
         kfree(dd->boardname);
         vfree(dd->events);
         vfree(dd->status);
-       hfi1_cq_exit(dd);
   }
   
   /*
@@@ -1346,6 -1387,13 +1386,13 @@@ static int init_one(struct pci_dev *pde
                 ret = -EINVAL;
                 goto bail;
         }
+       if (rcvhdrcnt > HFI1_MAX_HDRQ_EGRBUF_CNT) {
+               hfi1_early_err(&pdev->dev,
+                              "Receive header queue count cannot be greater than %u\n",
+                              HFI1_MAX_HDRQ_EGRBUF_CNT);
+               ret = -EINVAL;
+               goto bail;
+       }
         /* use the encoding function as a sanitization check */
         if (!encode_rcv_header_entry_size(hfi1_hdrq_entsize)) {
                 hfi1_early_err(&pdev->dev, "Invalid HdrQ Entry size %u\n",
@@@ -1422,8 -1470,11 +1469,11 @@@
          * we still create devices, so diags, etc. can be used
          * to determine cause of problem.
          */
-       if (!initfail && !ret)
+       if (!initfail && !ret) {
                 dd->flags |= HFI1_INITTED;
+               /* create debufs files after init and ib register */
+               hfi1_dbg_ibdev_init(&dd->verbs_dev);
+       }
   
         j = hfi1_device_create(dd);
         if (j)
@@@ -1464,6 -1515,8 +1514,8 @@@ static void remove_one(struct pci_dev *
   {
         struct hfi1_devdata *dd = pci_get_drvdata(pdev);
   
+       /* close debugfs files before ib unregister */
+       hfi1_dbg_ibdev_exit(&dd->verbs_dev);
         /* unregister from IB core */
         hfi1_unregister_ib_device(dd);
   
@@@ -1505,8 -1558,8 +1557,8 @@@ int hfi1_create_rcvhdrq(struct hfi1_dev
                  * rcvhdrqentsize is in DWs, so we have to convert to bytes
                  * (* sizeof(u32)).
                  */
- -              amt = ALIGN(rcd->rcvhdrq_cnt * rcd->rcvhdrqentsize *
- -                          sizeof(u32), PAGE_SIZE);
+ +              amt = PAGE_ALIGN(rcd->rcvhdrq_cnt * rcd->rcvhdrqentsize *
+ +                               sizeof(u32));
   
                 gfp_flags = (rcd->ctxt >= dd->first_user_ctxt) ?
                         GFP_USER : GFP_KERNEL;
@@@ -1516,18 -1569,11 +1568,11 @@@
   
                 if (!rcd->rcvhdrq) {
                         dd_dev_err(dd,
-                               "attempt to allocate %d bytes for ctxt %u rcvhdrq failed\n",
-                               amt, rcd->ctxt);
+                                  "attempt to allocate %d bytes for ctxt %u rcvhdrq failed\n",
+                                  amt, rcd->ctxt);
                         goto bail;
                 }
   
-               /* Event mask is per device now and is in hfi1_devdata */
-               /*if (rcd->ctxt >= dd->first_user_ctxt) {
-                       rcd->user_event_mask = vmalloc_user(PAGE_SIZE);
-                       if (!rcd->user_event_mask)
-                               goto bail_free_hdrq;
-                               }*/
- 
                 if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) {
                         rcd->rcvhdrtail_kvaddr = dma_zalloc_coherent(
                                 &dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail,
@@@ -1568,8 -1614,8 +1613,8 @@@
   
   bail_free:
         dd_dev_err(dd,
-               "attempt to allocate 1 page for ctxt %u rcvhdrqtailaddr failed\n",
-               rcd->ctxt);
+                  "attempt to allocate 1 page for ctxt %u rcvhdrqtailaddr failed\n",
+                  rcd->ctxt);
         vfree(rcd->user_event_mask);
         rcd->user_event_mask = NULL;
         dma_free_coherent(&dd->pcidev->dev, amt, rcd->rcvhdrq,
@@@ -1659,7 -1705,7 +1704,7 @@@ int hfi1_setup_eagerbufs(struct hfi1_ct
                         if (rcd->egrbufs.rcvtid_size == round_mtu ||
                             !HFI1_CAP_KGET_MASK(rcd->flags, MULTI_PKT_EGR)) {
                                 dd_dev_err(dd, "ctxt%u: Failed to allocate eager buffers\n",
-                                       rcd->ctxt);
+                                          rcd->ctxt);
                                 goto bail_rcvegrbuf_phys;
                         }
   
@@@ -1694,8 -1740,9 +1739,9 @@@
                                      rcd->egrbufs.buffers[j].len)) {
                                         j++;
                                         offset = 0;
-                               } else
+                               } else {
                                         offset += new_size;
+                               }
                         }
                         rcd->egrbufs.rcvtid_size = new_size;
                 }
@@@ -1708,7 -1755,6 +1754,6 @@@
                   rcd->ctxt, rcd->egrbufs.alloced, rcd->egrbufs.rcvtid_size,
                   rcd->egrbufs.size);
   
- 
         /*
          * Set the contexts rcv array head update threshold to the closest
          * power of 2 (so we can use a mask instead of modulo) below half
@@@ -1742,14 -1788,14 +1787,14 @@@
   
         for (idx = 0; idx < rcd->egrbufs.alloced; idx++) {
                 hfi1_put_tid(dd, rcd->eager_base + idx, PT_EAGER,
-                             rcd->egrbufs.rcvtids[idx].phys, order);
+                            rcd->egrbufs.rcvtids[idx].phys, order);
                 cond_resched();
         }
         goto bail;
   
   bail_rcvegrbuf_phys:
         for (idx = 0; idx < rcd->egrbufs.alloced &&
-                    rcd->egrbufs.buffers[idx].addr;
+            rcd->egrbufs.buffers[idx].addr;
              idx++) {
                 dma_free_coherent(&dd->pcidev->dev,
                                   rcd->egrbufs.buffers[idx].len,
diff --combined drivers/staging/rdma/hfi1/mad.c

index 77700b818e3d89fecf428756bba1886b7ce8ce28,0ec748e7e7b649419732351fc2e61b7ba7813355..d1e7f4d7cf6fdf3fb32d699af45348201511b38d
--- 1/drivers/staging/rdma/hfi1/mad.c
--- 2/drivers/staging/rdma/hfi1/mad.c
+++ b/drivers/staging/rdma/hfi1/mad.c
@@@ -1,12 -1,11 +1,11 @@@
   /*
+  * Copyright(c) 2015, 2016 Intel Corporation.
    *
    * This file is provided under a dual BSD/GPLv2 license.  When using or
    * redistributing this file, you may do so under either license.
    *
    * GPL LICENSE SUMMARY
    *
-  * Copyright(c) 2015 Intel Corporation.
-  *
    * This program is free software; you can redistribute it and/or modify
    * it under the terms of version 2 of the GNU General Public License as
    * published by the Free Software Foundation.
@@@ -18,8 -17,6 +17,6 @@@
    *
    * BSD LICENSE
    *
-  * Copyright(c) 2015 Intel Corporation.
-  *
    * Redistribution and use in source and binary forms, with or without
    * modification, are permitted provided that the following conditions
    * are met:
@@@ -55,6 -52,7 +52,7 @@@
   #include "hfi.h"
   #include "mad.h"
   #include "trace.h"
+ #include "qp.h"
   
   /* the reset value from the FM is supposed to be 0xffff, handle both */
   #define OPA_LINK_WIDTH_RESET_OLD 0x0fff
@@@ -91,7 -89,7 +89,7 @@@ static void send_trap(struct hfi1_ibpor
         int pkey_idx;
         u32 qpn = ppd_from_ibp(ibp)->sm_trap_qp;
   
-       agent = ibp->send_agent;
+       agent = ibp->rvp.send_agent;
         if (!agent)
                 return;
   
@@@ -100,7 -98,8 +98,8 @@@
                 return;
   
         /* o14-2 */
-       if (ibp->trap_timeout && time_before(jiffies, ibp->trap_timeout))
+       if (ibp->rvp.trap_timeout && time_before(jiffies,
+                                                ibp->rvp.trap_timeout))
                 return;
   
         pkey_idx = hfi1_lookup_pkey_idx(ibp, LIM_MGMT_P_KEY);
@@@ -121,42 -120,43 +120,43 @@@
         smp->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
         smp->class_version = OPA_SMI_CLASS_VERSION;
         smp->method = IB_MGMT_METHOD_TRAP;
-       ibp->tid++;
-       smp->tid = cpu_to_be64(ibp->tid);
+       ibp->rvp.tid++;
+       smp->tid = cpu_to_be64(ibp->rvp.tid);
         smp->attr_id = IB_SMP_ATTR_NOTICE;
         /* o14-1: smp->mkey = 0; */
         memcpy(smp->route.lid.data, data, len);
   
-       spin_lock_irqsave(&ibp->lock, flags);
-       if (!ibp->sm_ah) {
-               if (ibp->sm_lid != be16_to_cpu(IB_LID_PERMISSIVE)) {
+       spin_lock_irqsave(&ibp->rvp.lock, flags);
+       if (!ibp->rvp.sm_ah) {
+               if (ibp->rvp.sm_lid != be16_to_cpu(IB_LID_PERMISSIVE)) {
                         struct ib_ah *ah;
   
-                       ah = hfi1_create_qp0_ah(ibp, ibp->sm_lid);
-                       if (IS_ERR(ah))
+                       ah = hfi1_create_qp0_ah(ibp, ibp->rvp.sm_lid);
+                       if (IS_ERR(ah)) {
                                 ret = PTR_ERR(ah);
-                       else {
+                       } else {
                                 send_buf->ah = ah;
-                               ibp->sm_ah = to_iah(ah);
+                               ibp->rvp.sm_ah = ibah_to_rvtah(ah);
                                 ret = 0;
                         }
-               } else
+               } else {
                         ret = -EINVAL;
+               }
         } else {
-               send_buf->ah = &ibp->sm_ah->ibah;
+               send_buf->ah = &ibp->rvp.sm_ah->ibah;
                 ret = 0;
         }
-       spin_unlock_irqrestore(&ibp->lock, flags);
+       spin_unlock_irqrestore(&ibp->rvp.lock, flags);
   
         if (!ret)
                 ret = ib_post_send_mad(send_buf, NULL);
         if (!ret) {
                 /* 4.096 usec. */
-               timeout = (4096 * (1UL << ibp->subnet_timeout)) / 1000;
-               ibp->trap_timeout = jiffies + usecs_to_jiffies(timeout);
+               timeout = (4096 * (1UL << ibp->rvp.subnet_timeout)) / 1000;
+               ibp->rvp.trap_timeout = jiffies + usecs_to_jiffies(timeout);
         } else {
                 ib_free_send_mad(send_buf);
-               ibp->trap_timeout = 0;
+               ibp->rvp.trap_timeout = 0;
         }
   }
   
@@@ -174,10 -174,10 +174,10 @@@ void hfi1_bad_pqkey(struct hfi1_ibport 
         memset(&data, 0, sizeof(data));
   
         if (trap_num == OPA_TRAP_BAD_P_KEY)
-               ibp->pkey_violations++;
+               ibp->rvp.pkey_violations++;
         else
-               ibp->qkey_violations++;
-       ibp->n_pkt_drops++;
+               ibp->rvp.qkey_violations++;
+       ibp->rvp.n_pkt_drops++;
   
         /* Send violation trap */
         data.generic_type = IB_NOTICE_TYPE_SECURITY;
@@@ -233,9 -233,12 +233,12 @@@ static void bad_mkey(struct hfi1_ibpor
   /*
    * Send a Port Capability Mask Changed trap (ch. 14.3.11).
    */
- void hfi1_cap_mask_chg(struct hfi1_ibport *ibp)
+ void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num)
   {
         struct opa_mad_notice_attr data;
+       struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi);
+       struct hfi1_devdata *dd = dd_from_dev(verbs_dev);
+       struct hfi1_ibport *ibp = &dd->pport[port_num - 1].ibport_data;
         u32 lid = ppd_from_ibp(ibp)->lid;
   
         memset(&data, 0, sizeof(data));
@@@ -245,7 -248,7 +248,7 @@@
         data.trap_num = OPA_TRAP_CHANGE_CAPABILITY;
         data.issuer_lid = cpu_to_be32(lid);
         data.ntc_144.lid = data.issuer_lid;
-       data.ntc_144.new_cap_mask = cpu_to_be32(ibp->port_cap_flags);
+       data.ntc_144.new_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags);
   
         send_trap(ibp, &data, sizeof(data));
   }
@@@ -407,37 -410,38 +410,38 @@@ static int check_mkey(struct hfi1_ibpor
         int ret = 0;
   
         /* Is the mkey in the process of expiring? */
-       if (ibp->mkey_lease_timeout &&
-           time_after_eq(jiffies, ibp->mkey_lease_timeout)) {
+       if (ibp->rvp.mkey_lease_timeout &&
+           time_after_eq(jiffies, ibp->rvp.mkey_lease_timeout)) {
                 /* Clear timeout and mkey protection field. */
-               ibp->mkey_lease_timeout = 0;
-               ibp->mkeyprot = 0;
+               ibp->rvp.mkey_lease_timeout = 0;
+               ibp->rvp.mkeyprot = 0;
         }
   
-       if ((mad_flags & IB_MAD_IGNORE_MKEY) ||  ibp->mkey == 0 ||
-           ibp->mkey == mkey)
+       if ((mad_flags & IB_MAD_IGNORE_MKEY) ||  ibp->rvp.mkey == 0 ||
+           ibp->rvp.mkey == mkey)
                 valid_mkey = 1;
   
         /* Unset lease timeout on any valid Get/Set/TrapRepress */
-       if (valid_mkey && ibp->mkey_lease_timeout &&
+       if (valid_mkey && ibp->rvp.mkey_lease_timeout &&
             (mad->method == IB_MGMT_METHOD_GET ||
              mad->method == IB_MGMT_METHOD_SET ||
              mad->method == IB_MGMT_METHOD_TRAP_REPRESS))
-               ibp->mkey_lease_timeout = 0;
+               ibp->rvp.mkey_lease_timeout = 0;
   
         if (!valid_mkey) {
                 switch (mad->method) {
                 case IB_MGMT_METHOD_GET:
                         /* Bad mkey not a violation below level 2 */
-                       if (ibp->mkeyprot < 2)
+                       if (ibp->rvp.mkeyprot < 2)
                                 break;
                 case IB_MGMT_METHOD_SET:
                 case IB_MGMT_METHOD_TRAP_REPRESS:
-                       if (ibp->mkey_violations != 0xFFFF)
-                               ++ibp->mkey_violations;
-                       if (!ibp->mkey_lease_timeout && ibp->mkey_lease_period)
-                               ibp->mkey_lease_timeout = jiffies +
-                                       ibp->mkey_lease_period * HZ;
+                       if (ibp->rvp.mkey_violations != 0xFFFF)
+                               ++ibp->rvp.mkey_violations;
+                       if (!ibp->rvp.mkey_lease_timeout &&
+                           ibp->rvp.mkey_lease_period)
+                               ibp->rvp.mkey_lease_timeout = jiffies +
+                                       ibp->rvp.mkey_lease_period * HZ;
                         /* Generate a trap notice. */
                         bad_mkey(ibp, mad, mkey, dr_slid, return_path,
                                  hop_cnt);
@@@ -501,16 -505,6 +505,6 @@@ void read_ltp_rtt(struct hfi1_devdata *
                 write_lcb_cache(DC_LCB_STS_ROUND_TRIP_LTP_CNT, reg);
   }
   
- static u8 __opa_porttype(struct hfi1_pportdata *ppd)
- {
-       if (qsfp_mod_present(ppd)) {
-               if (ppd->qsfp_info.cache_valid)
-                       return OPA_PORT_TYPE_STANDARD;
-               return OPA_PORT_TYPE_DISCONNECTED;
-       }
-       return OPA_PORT_TYPE_UNKNOWN;
- }
- 
   static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
                                    struct ib_device *ibdev, u8 port,
                                    u32 *resp_len)
@@@ -522,6 -516,7 +516,7 @@@
         struct opa_port_info *pi = (struct opa_port_info *)data;
         u8 mtu;
         u8 credit_rate;
+       u8 is_beaconing_active;
         u32 state;
         u32 num_ports = OPA_AM_NPORT(am);
         u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
@@@ -538,8 -533,8 +533,8 @@@
         ppd = dd->pport + (port - 1);
         ibp = &ppd->ibport_data;
   
-       if (ppd->vls_supported/2 > ARRAY_SIZE(pi->neigh_mtu.pvlx_to_mtu) ||
-               ppd->vls_supported > ARRAY_SIZE(dd->vld)) {
+       if (ppd->vls_supported / 2 > ARRAY_SIZE(pi->neigh_mtu.pvlx_to_mtu) ||
+           ppd->vls_supported > ARRAY_SIZE(dd->vld)) {
                 smp->status |= IB_SMP_INVALID_FIELD;
                 return reply((struct ib_mad_hdr *)smp);
         }
@@@ -548,14 -543,14 +543,14 @@@
   
         /* Only return the mkey if the protection field allows it. */
         if (!(smp->method == IB_MGMT_METHOD_GET &&
-             ibp->mkey != smp->mkey &&
-             ibp->mkeyprot == 1))
-               pi->mkey = ibp->mkey;
- 
-       pi->subnet_prefix = ibp->gid_prefix;
-       pi->sm_lid = cpu_to_be32(ibp->sm_lid);
-       pi->ib_cap_mask = cpu_to_be32(ibp->port_cap_flags);
-       pi->mkey_lease_period = cpu_to_be16(ibp->mkey_lease_period);
+             ibp->rvp.mkey != smp->mkey &&
+             ibp->rvp.mkeyprot == 1))
+               pi->mkey = ibp->rvp.mkey;
+ 
+       pi->subnet_prefix = ibp->rvp.gid_prefix;
+       pi->sm_lid = cpu_to_be32(ibp->rvp.sm_lid);
+       pi->ib_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags);
+       pi->mkey_lease_period = cpu_to_be16(ibp->rvp.mkey_lease_period);
         pi->sm_trap_qp = cpu_to_be32(ppd->sm_trap_qp);
         pi->sa_qp = cpu_to_be32(ppd->sa_qp);
   
@@@ -581,38 -576,45 +576,45 @@@
         if (start_of_sm_config && (state == IB_PORT_INIT))
                 ppd->is_sm_config_started = 1;
   
-       pi->port_phys_conf = __opa_porttype(ppd) & 0xf;
+       pi->port_phys_conf = (ppd->port_type & 0xf);
   
   #if PI_LED_ENABLE_SUP
         pi->port_states.ledenable_offlinereason = ppd->neighbor_normal << 4;
         pi->port_states.ledenable_offlinereason |=
                 ppd->is_sm_config_started << 5;
+       /*
+        * This pairs with the memory barrier in hfi1_start_led_override to
+        * ensure that we read the correct state of LED beaconing represented
+        * by led_override_timer_active
+        */
+       smp_rmb();
+       is_beaconing_active = !!atomic_read(&ppd->led_override_timer_active);
+       pi->port_states.ledenable_offlinereason |= is_beaconing_active << 6;
         pi->port_states.ledenable_offlinereason |=
-               ppd->offline_disabled_reason & OPA_PI_MASK_OFFLINE_REASON;
+               ppd->offline_disabled_reason;
   #else
         pi->port_states.offline_reason = ppd->neighbor_normal << 4;
         pi->port_states.offline_reason |= ppd->is_sm_config_started << 5;
-       pi->port_states.offline_reason |= ppd->offline_disabled_reason &
-                                               OPA_PI_MASK_OFFLINE_REASON;
+       pi->port_states.offline_reason |= ppd->offline_disabled_reason;
   #endif /* PI_LED_ENABLE_SUP */
   
         pi->port_states.portphysstate_portstate =
                 (hfi1_ibphys_portstate(ppd) << 4) | state;
   
-       pi->mkeyprotect_lmc = (ibp->mkeyprot << 6) | ppd->lmc;
+       pi->mkeyprotect_lmc = (ibp->rvp.mkeyprot << 6) | ppd->lmc;
   
         memset(pi->neigh_mtu.pvlx_to_mtu, 0, sizeof(pi->neigh_mtu.pvlx_to_mtu));
         for (i = 0; i < ppd->vls_supported; i++) {
                 mtu = mtu_to_enum(dd->vld[i].mtu, HFI1_DEFAULT_ACTIVE_MTU);
                 if ((i % 2) == 0)
-                       pi->neigh_mtu.pvlx_to_mtu[i/2] |= (mtu << 4);
+                       pi->neigh_mtu.pvlx_to_mtu[i / 2] |= (mtu << 4);
                 else
-                       pi->neigh_mtu.pvlx_to_mtu[i/2] |= mtu;
+                       pi->neigh_mtu.pvlx_to_mtu[i / 2] |= mtu;
         }
         /* don't forget VL 15 */
         mtu = mtu_to_enum(dd->vld[15].mtu, 2048);
-       pi->neigh_mtu.pvlx_to_mtu[15/2] |= mtu;
-       pi->smsl = ibp->sm_sl & OPA_PI_MASK_SMSL;
+       pi->neigh_mtu.pvlx_to_mtu[15 / 2] |= mtu;
+       pi->smsl = ibp->rvp.sm_sl & OPA_PI_MASK_SMSL;
         pi->operational_vls = hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_OP_VLS);
         pi->partenforce_filterraw |=
                 (ppd->linkinit_reason & OPA_PI_MASK_LINKINIT_REASON);
@@@ -620,17 -622,17 +622,17 @@@
                 pi->partenforce_filterraw |= OPA_PI_MASK_PARTITION_ENFORCE_IN;
         if (ppd->part_enforce & HFI1_PART_ENFORCE_OUT)
                 pi->partenforce_filterraw |= OPA_PI_MASK_PARTITION_ENFORCE_OUT;
-       pi->mkey_violations = cpu_to_be16(ibp->mkey_violations);
+       pi->mkey_violations = cpu_to_be16(ibp->rvp.mkey_violations);
         /* P_KeyViolations are counted by hardware. */
-       pi->pkey_violations = cpu_to_be16(ibp->pkey_violations);
-       pi->qkey_violations = cpu_to_be16(ibp->qkey_violations);
+       pi->pkey_violations = cpu_to_be16(ibp->rvp.pkey_violations);
+       pi->qkey_violations = cpu_to_be16(ibp->rvp.qkey_violations);
   
         pi->vl.cap = ppd->vls_supported;
-       pi->vl.high_limit = cpu_to_be16(ibp->vl_high_limit);
+       pi->vl.high_limit = cpu_to_be16(ibp->rvp.vl_high_limit);
         pi->vl.arb_high_cap = (u8)hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_VL_HIGH_CAP);
         pi->vl.arb_low_cap = (u8)hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_VL_LOW_CAP);
   
-       pi->clientrereg_subnettimeout = ibp->subnet_timeout;
+       pi->clientrereg_subnettimeout = ibp->rvp.subnet_timeout;
   
         pi->port_link_mode  = cpu_to_be16(OPA_PORT_LINK_MODE_OPA << 10 |
                                           OPA_PORT_LINK_MODE_OPA << 5 |
@@@ -701,8 -703,10 +703,10 @@@
         /* read the cached value of DC_LCB_STS_ROUND_TRIP_LTP_CNT */
         read_lcb_cache(DC_LCB_STS_ROUND_TRIP_LTP_CNT, &tmp);
   
-       /* this counter is 16 bits wide, but the replay_depth.wire
-        * variable is only 8 bits */
+       /*
+        * this counter is 16 bits wide, but the replay_depth.wire
+        * variable is only 8 bits
+        */
         if (tmp > 0xff)
                 tmp = 0xff;
         pi->replay_depth.wire = tmp;
@@@ -749,7 -753,7 +753,7 @@@ static int __subn_get_opa_pkeytable(str
                 return reply((struct ib_mad_hdr *)smp);
         }
   
-       n_blocks_avail = (u16) (npkeys/OPA_PARTITION_TABLE_BLK_SIZE) + 1;
+       n_blocks_avail = (u16)(npkeys / OPA_PARTITION_TABLE_BLK_SIZE) + 1;
   
         size = (n_blocks_req * OPA_PARTITION_TABLE_BLK_SIZE) * sizeof(u16);
   
@@@ -763,7 -767,7 +767,7 @@@
                 return reply((struct ib_mad_hdr *)smp);
         }
   
-       p = (__be16 *) data;
+       p = (__be16 *)data;
         q = (u16 *)data;
         /* get the real pkeys if we are requesting the first block */
         if (start_block == 0) {
@@@ -772,9 -776,9 +776,9 @@@
                         p[i] = cpu_to_be16(q[i]);
                 if (resp_len)
                         *resp_len += size;
-       } else
+       } else {
                 smp->status |= IB_SMP_INVALID_FIELD;
- 
+       }
         return reply((struct ib_mad_hdr *)smp);
   }
   
@@@ -901,8 -905,8 +905,8 @@@ static int port_states_transition_allow
         u32 logical_old = driver_logical_state(ppd);
         int ret, logical_allowed, physical_allowed;
   
-       logical_allowed = ret =
-               logical_transition_allowed(logical_old, logical_new);
+       ret = logical_transition_allowed(logical_old, logical_new);
+       logical_allowed = ret;
   
         if (ret == HFI_TRANSITION_DISALLOWED ||
             ret == HFI_TRANSITION_UNDEFINED) {
@@@ -912,8 -916,8 +916,8 @@@
                 return ret;
         }
   
-       physical_allowed = ret =
-               physical_transition_allowed(physical_old, physical_new);
+       ret = physical_transition_allowed(physical_old, physical_new);
+       physical_allowed = ret;
   
         if (ret == HFI_TRANSITION_DISALLOWED ||
             ret == HFI_TRANSITION_UNDEFINED) {
@@@ -927,6 -931,14 +931,14 @@@
             physical_allowed == HFI_TRANSITION_IGNORED)
                 return HFI_TRANSITION_IGNORED;
   
+       /*
+        * A change request of Physical Port State from
+        * 'Offline' to 'Polling' should be ignored.
+        */
+       if ((physical_old == OPA_PORTPHYSSTATE_OFFLINE) &&
+           (physical_new == IB_PORTPHYSSTATE_POLLING))
+               return HFI_TRANSITION_IGNORED;
+ 
         /*
          * Either physical_allowed or logical_allowed is
          * HFI_TRANSITION_ALLOWED.
@@@ -972,16 -984,15 +984,15 @@@ static int set_port_states(struct hfi1_
                         break;
                 /* FALLTHROUGH */
         case IB_PORT_DOWN:
-               if (phys_state == IB_PORTPHYSSTATE_NOP)
+               if (phys_state == IB_PORTPHYSSTATE_NOP) {
                         link_state = HLS_DN_DOWNDEF;
-               else if (phys_state == IB_PORTPHYSSTATE_POLLING) {
+               } else if (phys_state == IB_PORTPHYSSTATE_POLLING) {
                         link_state = HLS_DN_POLL;
-                       set_link_down_reason(ppd,
-                            OPA_LINKDOWN_REASON_FM_BOUNCE, 0,
-                            OPA_LINKDOWN_REASON_FM_BOUNCE);
-               } else if (phys_state == IB_PORTPHYSSTATE_DISABLED)
+                       set_link_down_reason(ppd, OPA_LINKDOWN_REASON_FM_BOUNCE,
+                                            0, OPA_LINKDOWN_REASON_FM_BOUNCE);
+               } else if (phys_state == IB_PORTPHYSSTATE_DISABLED) {
                         link_state = HLS_DN_DISABLE;
-               else {
+               } else {
                         pr_warn("SubnSet(OPA_PortInfo) invalid physical state 0x%x\n",
                                 phys_state);
                         smp->status |= IB_SMP_INVALID_FIELD;
@@@ -991,11 -1002,11 +1002,11 @@@
                 set_link_state(ppd, link_state);
                 if (link_state == HLS_DN_DISABLE &&
                     (ppd->offline_disabled_reason >
-                    OPA_LINKDOWN_REASON_SMA_DISABLED ||
+                    HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED) ||
                      ppd->offline_disabled_reason ==
-                    OPA_LINKDOWN_REASON_NONE))
+                    HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE)))
                         ppd->offline_disabled_reason =
-                       OPA_LINKDOWN_REASON_SMA_DISABLED;
+                       HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED);
                 /*
                  * Don't send a reply if the response would be sent
                  * through the disabled port.
@@@ -1091,13 -1102,13 +1102,13 @@@ static int __subn_set_opa_portinfo(stru
   
         ls_old = driver_lstate(ppd);
   
-       ibp->mkey = pi->mkey;
-       ibp->gid_prefix = pi->subnet_prefix;
-       ibp->mkey_lease_period = be16_to_cpu(pi->mkey_lease_period);
+       ibp->rvp.mkey = pi->mkey;
+       ibp->rvp.gid_prefix = pi->subnet_prefix;
+       ibp->rvp.mkey_lease_period = be16_to_cpu(pi->mkey_lease_period);
   
         /* Must be a valid unicast LID address. */
         if ((lid == 0 && ls_old > IB_PORT_INIT) ||
-            lid >= HFI1_MULTICAST_LID_BASE) {
+           lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) {
                 smp->status |= IB_SMP_INVALID_FIELD;
                 pr_warn("SubnSet(OPA_PortInfo) lid invalid 0x%x\n",
                         lid);
@@@ -1130,23 -1141,23 +1141,23 @@@
   
         /* Must be a valid unicast LID address. */
         if ((smlid == 0 && ls_old > IB_PORT_INIT) ||
-            smlid >= HFI1_MULTICAST_LID_BASE) {
+           smlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) {
                 smp->status |= IB_SMP_INVALID_FIELD;
                 pr_warn("SubnSet(OPA_PortInfo) smlid invalid 0x%x\n", smlid);
-       } else if (smlid != ibp->sm_lid || msl != ibp->sm_sl) {
+       } else if (smlid != ibp->rvp.sm_lid || msl != ibp->rvp.sm_sl) {
                 pr_warn("SubnSet(OPA_PortInfo) smlid 0x%x\n", smlid);
-               spin_lock_irqsave(&ibp->lock, flags);
-               if (ibp->sm_ah) {
-                       if (smlid != ibp->sm_lid)
-                               ibp->sm_ah->attr.dlid = smlid;
-                       if (msl != ibp->sm_sl)
-                               ibp->sm_ah->attr.sl = msl;
+               spin_lock_irqsave(&ibp->rvp.lock, flags);
+               if (ibp->rvp.sm_ah) {
+                       if (smlid != ibp->rvp.sm_lid)
+                               ibp->rvp.sm_ah->attr.dlid = smlid;
+                       if (msl != ibp->rvp.sm_sl)
+                               ibp->rvp.sm_ah->attr.sl = msl;
                 }
-               spin_unlock_irqrestore(&ibp->lock, flags);
-               if (smlid != ibp->sm_lid)
-                       ibp->sm_lid = smlid;
-               if (msl != ibp->sm_sl)
-                       ibp->sm_sl = msl;
+               spin_unlock_irqrestore(&ibp->rvp.lock, flags);
+               if (smlid != ibp->rvp.sm_lid)
+                       ibp->rvp.sm_lid = smlid;
+               if (msl != ibp->rvp.sm_sl)
+                       ibp->rvp.sm_sl = msl;
                 event.event = IB_EVENT_SM_CHANGE;
                 ib_dispatch_event(&event);
         }
@@@ -1167,8 -1178,8 +1178,8 @@@
         ppd->port_error_action = be32_to_cpu(pi->port_error_action);
         lwe = be16_to_cpu(pi->link_width.enabled);
         if (lwe) {
-               if (lwe == OPA_LINK_WIDTH_RESET
-                               || lwe == OPA_LINK_WIDTH_RESET_OLD)
+               if (lwe == OPA_LINK_WIDTH_RESET ||
+                   lwe == OPA_LINK_WIDTH_RESET_OLD)
                         set_link_width_enabled(ppd, ppd->link_width_supported);
                 else if ((lwe & ~ppd->link_width_supported) == 0)
                         set_link_width_enabled(ppd, lwe);
@@@ -1177,19 -1188,21 +1188,21 @@@
         }
         lwe = be16_to_cpu(pi->link_width_downgrade.enabled);
         /* LWD.E is always applied - 0 means "disabled" */
-       if (lwe == OPA_LINK_WIDTH_RESET
-                       || lwe == OPA_LINK_WIDTH_RESET_OLD) {
+       if (lwe == OPA_LINK_WIDTH_RESET ||
+           lwe == OPA_LINK_WIDTH_RESET_OLD) {
                 set_link_width_downgrade_enabled(ppd,
-                               ppd->link_width_downgrade_supported);
+                                                ppd->
+                                                link_width_downgrade_supported
+                                                );
         } else if ((lwe & ~ppd->link_width_downgrade_supported) == 0) {
                 /* only set and apply if something changed */
                 if (lwe != ppd->link_width_downgrade_enabled) {
                         set_link_width_downgrade_enabled(ppd, lwe);
                         call_link_downgrade_policy = 1;
                 }
-       } else
+       } else {
                 smp->status |= IB_SMP_INVALID_FIELD;
- 
+       }
         lse = be16_to_cpu(pi->link_speed.enabled);
         if (lse) {
                 if (lse & be16_to_cpu(pi->link_speed.supported))
@@@ -1198,22 -1211,24 +1211,24 @@@
                         smp->status |= IB_SMP_INVALID_FIELD;
         }
   
-       ibp->mkeyprot = (pi->mkeyprotect_lmc & OPA_PI_MASK_MKEY_PROT_BIT) >> 6;
-       ibp->vl_high_limit = be16_to_cpu(pi->vl.high_limit) & 0xFF;
+       ibp->rvp.mkeyprot =
+               (pi->mkeyprotect_lmc & OPA_PI_MASK_MKEY_PROT_BIT) >> 6;
+       ibp->rvp.vl_high_limit = be16_to_cpu(pi->vl.high_limit) & 0xFF;
         (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_VL_HIGH_LIMIT,
-                                   ibp->vl_high_limit);
+                                   ibp->rvp.vl_high_limit);
   
-       if (ppd->vls_supported/2 > ARRAY_SIZE(pi->neigh_mtu.pvlx_to_mtu) ||
-               ppd->vls_supported > ARRAY_SIZE(dd->vld)) {
+       if (ppd->vls_supported / 2 > ARRAY_SIZE(pi->neigh_mtu.pvlx_to_mtu) ||
+           ppd->vls_supported > ARRAY_SIZE(dd->vld)) {
                 smp->status |= IB_SMP_INVALID_FIELD;
                 return reply((struct ib_mad_hdr *)smp);
         }
         for (i = 0; i < ppd->vls_supported; i++) {
                 if ((i % 2) == 0)
-                       mtu = enum_to_mtu((pi->neigh_mtu.pvlx_to_mtu[i/2] >> 4)
-                                         & 0xF);
+                       mtu = enum_to_mtu((pi->neigh_mtu.pvlx_to_mtu[i / 2] >>
+                                          4) & 0xF);
                 else
-                       mtu = enum_to_mtu(pi->neigh_mtu.pvlx_to_mtu[i/2] & 0xF);
+                       mtu = enum_to_mtu(pi->neigh_mtu.pvlx_to_mtu[i / 2] &
+                                         0xF);
                 if (mtu == 0xffff) {
                         pr_warn("SubnSet(OPA_PortInfo) mtu invalid %d (0x%x)\n",
                                 mtu,
@@@ -1223,8 -1238,8 +1238,8 @@@
                 }
                 if (dd->vld[i].mtu != mtu) {
                         dd_dev_info(dd,
-                               "MTU change on vl %d from %d to %d\n",
-                               i, dd->vld[i].mtu, mtu);
+                                   "MTU change on vl %d from %d to %d\n",
+                                   i, dd->vld[i].mtu, mtu);
                         dd->vld[i].mtu = mtu;
                         call_set_mtu++;
                 }
@@@ -1232,13 -1247,13 +1247,13 @@@
         /* As per OPAV1 spec: VL15 must support and be configured
          * for operation with a 2048 or larger MTU.
          */
-       mtu = enum_to_mtu(pi->neigh_mtu.pvlx_to_mtu[15/2] & 0xF);
+       mtu = enum_to_mtu(pi->neigh_mtu.pvlx_to_mtu[15 / 2] & 0xF);
         if (mtu < 2048 || mtu == 0xffff)
                 mtu = 2048;
         if (dd->vld[15].mtu != mtu) {
                 dd_dev_info(dd,
-                       "MTU change on vl 15 from %d to %d\n",
-                       dd->vld[15].mtu, mtu);
+                           "MTU change on vl 15 from %d to %d\n",
+                           dd->vld[15].mtu, mtu);
                 dd->vld[15].mtu = mtu;
                 call_set_mtu++;
         }
@@@ -1254,21 -1269,21 +1269,21 @@@
                         smp->status |= IB_SMP_INVALID_FIELD;
                 } else {
                         if (hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_OP_VLS,
-                                               vls) == -EINVAL)
+                                           vls) == -EINVAL)
                                 smp->status |= IB_SMP_INVALID_FIELD;
                 }
         }
   
         if (pi->mkey_violations == 0)
-               ibp->mkey_violations = 0;
+               ibp->rvp.mkey_violations = 0;
   
         if (pi->pkey_violations == 0)
-               ibp->pkey_violations = 0;
+               ibp->rvp.pkey_violations = 0;
   
         if (pi->qkey_violations == 0)
-               ibp->qkey_violations = 0;
+               ibp->rvp.qkey_violations = 0;
   
-       ibp->subnet_timeout =
+       ibp->rvp.subnet_timeout =
                 pi->clientrereg_subnettimeout & OPA_PI_MASK_SUBNET_TIMEOUT;
   
         crc_enabled = be16_to_cpu(pi->port_ltp_crc_mode);
@@@ -1388,7 -1403,7 +1403,7 @@@ static int set_pkeys(struct hfi1_devdat
                 (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
   
                 event.event = IB_EVENT_PKEY_CHANGE;
-               event.device = &dd->verbs_dev.ibdev;
+               event.device = &dd->verbs_dev.rdi.ibdev;
                 event.element.port_num = port;
                 ib_dispatch_event(&event);
         }
@@@ -1402,7 -1417,7 +1417,7 @@@ static int __subn_set_opa_pkeytable(str
         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
         u32 n_blocks_sent = OPA_AM_NBLK(am);
         u32 start_block = am & 0x7ff;
-       u16 *p = (u16 *) data;
+       u16 *p = (u16 *)data;
         __be16 *q = (__be16 *)data;
         int i;
         u16 n_blocks_avail;
@@@ -1415,7 -1430,7 +1430,7 @@@
                 return reply((struct ib_mad_hdr *)smp);
         }
   
-       n_blocks_avail = (u16)(npkeys/OPA_PARTITION_TABLE_BLK_SIZE) + 1;
+       n_blocks_avail = (u16)(npkeys / OPA_PARTITION_TABLE_BLK_SIZE) + 1;
   
         if (start_block + n_blocks_sent > n_blocks_avail ||
             n_blocks_sent > OPA_NUM_PKEY_BLOCKS_PER_SMP) {
@@@ -1514,14 -1529,22 +1529,22 @@@ static int __subn_set_opa_sl_to_sc(stru
         struct hfi1_ibport *ibp = to_iport(ibdev, port);
         u8 *p = data;
         int i;
+       u8 sc;
   
         if (am) {
                 smp->status |= IB_SMP_INVALID_FIELD;
                 return reply((struct ib_mad_hdr *)smp);
         }
   
-       for (i = 0; i <  ARRAY_SIZE(ibp->sl_to_sc); i++)
-               ibp->sl_to_sc[i] = *p++;
+       for (i = 0; i <  ARRAY_SIZE(ibp->sl_to_sc); i++) {
+               sc = *p++;
+               if (ibp->sl_to_sc[i] != sc) {
+                       ibp->sl_to_sc[i] = sc;
+ 
+                       /* Put all stale qps into error state */
+                       hfi1_error_port_qps(ibp, i);
+               }
+       }
   
         return __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port, resp_len);
   }
@@@ -1574,7 -1597,7 +1597,7 @@@ static int __subn_get_opa_sc_to_vlt(str
   {
         u32 n_blocks = OPA_AM_NBLK(am);
         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
-       void *vp = (void *) data;
+       void *vp = (void *)data;
         size_t size = 4 * sizeof(u64);
   
         if (n_blocks != 1) {
@@@ -1597,7 -1620,7 +1620,7 @@@ static int __subn_set_opa_sc_to_vlt(str
         u32 n_blocks = OPA_AM_NBLK(am);
         int async_update = OPA_AM_ASYNC(am);
         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
-       void *vp = (void *) data;
+       void *vp = (void *)data;
         struct hfi1_pportdata *ppd;
         int lstate;
   
@@@ -1609,8 -1632,10 +1632,10 @@@
         /* IB numbers ports from 1, hw from 0 */
         ppd = dd->pport + (port - 1);
         lstate = driver_lstate(ppd);
-       /* it's known that async_update is 0 by this point, but include
-        * the explicit check for clarity */
+       /*
+        * it's known that async_update is 0 by this point, but include
+        * the explicit check for clarity
+        */
         if (!async_update &&
             (lstate == IB_PORT_ARMED || lstate == IB_PORT_ACTIVE)) {
                 smp->status |= IB_SMP_INVALID_FIELD;
@@@ -1629,7 -1654,7 +1654,7 @@@ static int __subn_get_opa_sc_to_vlnt(st
         u32 n_blocks = OPA_AM_NPORT(am);
         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
         struct hfi1_pportdata *ppd;
-       void *vp = (void *) data;
+       void *vp = (void *)data;
         int size;
   
         if (n_blocks != 1) {
@@@ -1654,7 -1679,7 +1679,7 @@@ static int __subn_set_opa_sc_to_vlnt(st
         u32 n_blocks = OPA_AM_NPORT(am);
         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
         struct hfi1_pportdata *ppd;
-       void *vp = (void *) data;
+       void *vp = (void *)data;
         int lstate;
   
         if (n_blocks != 1) {
@@@ -1687,7 -1712,7 +1712,7 @@@ static int __subn_get_opa_psi(struct op
         u32 lstate;
         struct hfi1_ibport *ibp;
         struct hfi1_pportdata *ppd;
-       struct opa_port_state_info *psi = (struct opa_port_state_info *) data;
+       struct opa_port_state_info *psi = (struct opa_port_state_info *)data;
   
         if (nports != 1) {
                 smp->status |= IB_SMP_INVALID_FIELD;
@@@ -1707,12 -1732,11 +1732,11 @@@
         psi->port_states.ledenable_offlinereason |=
                 ppd->is_sm_config_started << 5;
         psi->port_states.ledenable_offlinereason |=
-               ppd->offline_disabled_reason & OPA_PI_MASK_OFFLINE_REASON;
+               ppd->offline_disabled_reason;
   #else
         psi->port_states.offline_reason = ppd->neighbor_normal << 4;
         psi->port_states.offline_reason |= ppd->is_sm_config_started << 5;
-       psi->port_states.offline_reason |= ppd->offline_disabled_reason &
-                               OPA_PI_MASK_OFFLINE_REASON;
+       psi->port_states.offline_reason |= ppd->offline_disabled_reason;
   #endif /* PI_LED_ENABLE_SUP */
   
         psi->port_states.portphysstate_portstate =
@@@ -1737,7 -1761,7 +1761,7 @@@ static int __subn_set_opa_psi(struct op
         u8 ls_new, ps_new;
         struct hfi1_ibport *ibp;
         struct hfi1_pportdata *ppd;
-       struct opa_port_state_info *psi = (struct opa_port_state_info *) data;
+       struct opa_port_state_info *psi = (struct opa_port_state_info *)data;
         int ret, invalid = 0;
   
         if (nports != 1) {
@@@ -1782,14 -1806,16 +1806,16 @@@ static int __subn_get_opa_cable_info(st
         u32 len = OPA_AM_CI_LEN(am) + 1;
         int ret;
   
- #define __CI_PAGE_SIZE (1 << 7) /* 128 bytes */
+ #define __CI_PAGE_SIZE BIT(7) /* 128 bytes */
   #define __CI_PAGE_MASK ~(__CI_PAGE_SIZE - 1)
   #define __CI_PAGE_NUM(a) ((a) & __CI_PAGE_MASK)
   
-       /* check that addr is within spec, and
-        * addr and (addr + len - 1) are on the same "page" */
+       /*
+        * check that addr is within spec, and
+        * addr and (addr + len - 1) are on the same "page"
+        */
         if (addr >= 4096 ||
-               (__CI_PAGE_NUM(addr) != __CI_PAGE_NUM(addr + len - 1))) {
+           (__CI_PAGE_NUM(addr) != __CI_PAGE_NUM(addr + len - 1))) {
                 smp->status |= IB_SMP_INVALID_FIELD;
                 return reply((struct ib_mad_hdr *)smp);
         }
@@@ -1823,7 -1849,7 +1849,7 @@@ static int __subn_get_opa_bct(struct op
         u32 num_ports = OPA_AM_NPORT(am);
         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
         struct hfi1_pportdata *ppd;
-       struct buffer_control *p = (struct buffer_control *) data;
+       struct buffer_control *p = (struct buffer_control *)data;
         int size;
   
         if (num_ports != 1) {
@@@ -1846,7 -1872,7 +1872,7 @@@ static int __subn_set_opa_bct(struct op
         u32 num_ports = OPA_AM_NPORT(am);
         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
         struct hfi1_pportdata *ppd;
-       struct buffer_control *p = (struct buffer_control *) data;
+       struct buffer_control *p = (struct buffer_control *)data;
   
         if (num_ports != 1) {
                 smp->status |= IB_SMP_INVALID_FIELD;
@@@ -1919,13 -1945,15 +1945,15 @@@ static int __subn_set_opa_vl_arb(struc
   
         switch (section) {
         case OPA_VLARB_LOW_ELEMENTS:
-               (void) fm_set_table(ppd, FM_TBL_VL_LOW_ARB, p);
+               (void)fm_set_table(ppd, FM_TBL_VL_LOW_ARB, p);
                 break;
         case OPA_VLARB_HIGH_ELEMENTS:
-               (void) fm_set_table(ppd, FM_TBL_VL_HIGH_ARB, p);
+               (void)fm_set_table(ppd, FM_TBL_VL_HIGH_ARB, p);
                 break;
-       /* neither OPA_VLARB_PREEMPT_ELEMENTS, or OPA_VLARB_PREEMPT_MATRIX
-        * can be changed from the default values */
+       /*
+        * neither OPA_VLARB_PREEMPT_ELEMENTS, or OPA_VLARB_PREEMPT_MATRIX
+        * can be changed from the default values
+        */
         case OPA_VLARB_PREEMPT_ELEMENTS:
                 /* FALLTHROUGH */
         case OPA_VLARB_PREEMPT_MATRIX:
@@@ -2137,8 -2165,10 +2165,10 @@@ struct opa_port_data_counters_msg 
   };
   
   struct opa_port_error_counters64_msg {
-       /* Request contains first two fields, response contains the
-        * whole magilla */
+       /*
+        * Request contains first two fields, response contains the
+        * whole magilla
+        */
         __be64 port_select_mask[4];
         __be32 vl_select_mask;
   
@@@ -2172,7 -2202,6 +2202,6 @@@ struct opa_port_error_info_msg 
         __be32 error_info_select_mask;
         __be32 reserved1;
         struct _port_ei {
- 
                 u8 port_number;
                 u8 reserved2[7];
   
@@@ -2251,7 -2280,7 +2280,7 @@@ enum error_info_selects 
   };
   
   static int pma_get_opa_classportinfo(struct opa_pma_mad *pmp,
-                               struct ib_device *ibdev, u32 *resp_len)
+                                    struct ib_device *ibdev, u32 *resp_len)
   {
         struct opa_class_port_info *p =
                 (struct opa_class_port_info *)pmp->data;
@@@ -2299,9 -2328,9 +2328,9 @@@ static void a0_portstatus(struct hfi1_p
         }
   }
   
- 
   static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
-                       struct ib_device *ibdev, u8 port, u32 *resp_len)
+                                 struct ib_device *ibdev,
+                                 u8 port, u32 *resp_len)
   {
         struct opa_port_status_req *req =
                 (struct opa_port_status_req *)pmp->data;
@@@ -2326,8 -2355,8 +2355,8 @@@
                 return reply((struct ib_mad_hdr *)pmp);
         }
   
-       if (nports != 1 || (port_num && port_num != port)
-           || num_vls > OPA_MAX_VLS || (vl_select_mask & ~VL_MASK_ALL)) {
+       if (nports != 1 || (port_num && port_num != port) ||
+           num_vls > OPA_MAX_VLS || (vl_select_mask & ~VL_MASK_ALL)) {
                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
                 return reply((struct ib_mad_hdr *)pmp);
         }
@@@ -2357,7 -2386,7 +2386,7 @@@
                                          CNTR_INVALID_VL));
         rsp->port_multicast_xmit_pkts =
                 cpu_to_be64(read_dev_cntr(dd, C_DC_MC_XMIT_PKTS,
-                                       CNTR_INVALID_VL));
+                                         CNTR_INVALID_VL));
         rsp->port_multicast_rcv_pkts =
                 cpu_to_be64(read_dev_cntr(dd, C_DC_MC_RCV_PKTS,
                                           CNTR_INVALID_VL));
@@@ -2386,7 -2415,7 +2415,7 @@@
         }
         tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
         tmp2 = tmp + read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
-                                       CNTR_INVALID_VL);
+                                  CNTR_INVALID_VL);
         if (tmp2 > (u32)UINT_MAX || tmp2 < tmp) {
                 /* overflow/wrapped */
                 rsp->link_error_recovery = cpu_to_be32(~0);
@@@ -2401,13 -2430,13 +2430,13 @@@
                 cpu_to_be64(read_dev_cntr(dd, C_DC_FM_CFG_ERR,
                                           CNTR_INVALID_VL));
         rsp->link_downed = cpu_to_be32(read_port_cntr(ppd, C_SW_LINK_DOWN,
-                                         CNTR_INVALID_VL));
+                                                     CNTR_INVALID_VL));
   
         /* rsp->uncorrectable_errors is 8 bits wide, and it pegs at 0xff */
         tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL);
         rsp->uncorrectable_errors = tmp < 0x100 ? (tmp & 0xff) : 0xff;
   
-       vlinfo = &(rsp->vls[0]);
+       vlinfo = &rsp->vls[0];
         vfi = 0;
         /* The vl_select_mask has been checked above, and we know
          * that it contains only entries which represent valid VLs.
@@@ -2423,27 -2452,27 +2452,27 @@@
   
                 rsp->vls[vfi].port_vl_rcv_pkts =
                         cpu_to_be64(read_dev_cntr(dd, C_DC_RX_PKT_VL,
-                                       idx_from_vl(vl)));
+                                                 idx_from_vl(vl)));
   
                 rsp->vls[vfi].port_vl_xmit_data =
                         cpu_to_be64(read_port_cntr(ppd, C_TX_FLIT_VL,
-                                       idx_from_vl(vl)));
+                                                  idx_from_vl(vl)));
   
                 rsp->vls[vfi].port_vl_xmit_pkts =
                         cpu_to_be64(read_port_cntr(ppd, C_TX_PKT_VL,
-                                       idx_from_vl(vl)));
+                                                  idx_from_vl(vl)));
   
                 rsp->vls[vfi].port_vl_xmit_wait =
                         cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT_VL,
-                                       idx_from_vl(vl)));
+                                                  idx_from_vl(vl)));
   
                 rsp->vls[vfi].port_vl_rcv_fecn =
                         cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN_VL,
-                                       idx_from_vl(vl)));
+                                                 idx_from_vl(vl)));
   
                 rsp->vls[vfi].port_vl_rcv_becn =
                         cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN_VL,
-                                       idx_from_vl(vl)));
+                                                 idx_from_vl(vl)));
   
                 vlinfo++;
                 vfi++;
@@@ -2473,7 -2502,7 +2502,7 @@@ static u64 get_error_counter_summary(st
         error_counter_summary += read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
                                                 CNTR_INVALID_VL);
         error_counter_summary += read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
-                                               CNTR_INVALID_VL);
+                                              CNTR_INVALID_VL);
         /* local link integrity must be right-shifted by the lli resolution */
         tmp = read_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL);
         tmp += read_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL);
@@@ -2483,10 -2512,10 +2512,10 @@@
         tmp += read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT, CNTR_INVALID_VL);
         error_counter_summary += (tmp >> res_ler);
         error_counter_summary += read_dev_cntr(dd, C_DC_RCV_ERR,
-                                               CNTR_INVALID_VL);
+                                              CNTR_INVALID_VL);
         error_counter_summary += read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL);
         error_counter_summary += read_dev_cntr(dd, C_DC_FM_CFG_ERR,
-                                               CNTR_INVALID_VL);
+                                              CNTR_INVALID_VL);
         /* ppd->link_downed is a 32-bit value */
         error_counter_summary += read_port_cntr(ppd, C_SW_LINK_DOWN,
                                                 CNTR_INVALID_VL);
@@@ -2512,7 -2541,7 +2541,7 @@@ static void a0_datacounters(struct hfi1
                                                  idx_from_vl(vl));
                         if (tmp < sum_vl_xmit_wait) {
                                 /* we wrapped */
-                               sum_vl_xmit_wait = (u64) ~0;
+                               sum_vl_xmit_wait = (u64)~0;
                                 break;
                         }
                         sum_vl_xmit_wait = tmp;
@@@ -2522,8 -2551,30 +2551,30 @@@
         }
   }
   
+ static void pma_get_opa_port_dctrs(struct ib_device *ibdev,
+                                  struct _port_dctrs *rsp)
+ {
+       struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
+ 
+       rsp->port_xmit_data = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_FLITS,
+                                               CNTR_INVALID_VL));
+       rsp->port_rcv_data = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FLITS,
+                                               CNTR_INVALID_VL));
+       rsp->port_xmit_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_PKTS,
+                                               CNTR_INVALID_VL));
+       rsp->port_rcv_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_PKTS,
+                                               CNTR_INVALID_VL));
+       rsp->port_multicast_xmit_pkts =
+               cpu_to_be64(read_dev_cntr(dd, C_DC_MC_XMIT_PKTS,
+                                         CNTR_INVALID_VL));
+       rsp->port_multicast_rcv_pkts =
+               cpu_to_be64(read_dev_cntr(dd, C_DC_MC_RCV_PKTS,
+                                         CNTR_INVALID_VL));
+ }
+ 
   static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
-                       struct ib_device *ibdev, u8 port, u32 *resp_len)
+                                   struct ib_device *ibdev,
+                                   u8 port, u32 *resp_len)
   {
         struct opa_port_data_counters_msg *req =
                 (struct opa_port_data_counters_msg *)pmp->data;
@@@ -2579,7 -2630,7 +2630,7 @@@
                 return reply((struct ib_mad_hdr *)pmp);
         }
   
- -      rsp = (struct _port_dctrs *)&req->port[0];
+ +      rsp = &req->port[0];
         memset(rsp, 0, sizeof(*rsp));
   
         rsp->port_number = port;
@@@ -2590,39 -2641,19 +2641,19 @@@
          */
         hfi1_read_link_quality(dd, &lq);
         rsp->link_quality_indicator = cpu_to_be32((u32)lq);
+       pma_get_opa_port_dctrs(ibdev, rsp);
   
-       /* rsp->sw_port_congestion is 0 for HFIs */
-       /* rsp->port_xmit_time_cong is 0 for HFIs */
-       /* rsp->port_xmit_wasted_bw ??? */
-       /* rsp->port_xmit_wait_data ??? */
-       /* rsp->port_mark_fecn is 0 for HFIs */
- 
-       rsp->port_xmit_data = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_FLITS,
-                                               CNTR_INVALID_VL));
-       rsp->port_rcv_data = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FLITS,
-                                               CNTR_INVALID_VL));
-       rsp->port_xmit_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_PKTS,
-                                               CNTR_INVALID_VL));
-       rsp->port_rcv_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_PKTS,
-                                               CNTR_INVALID_VL));
-       rsp->port_multicast_xmit_pkts =
-               cpu_to_be64(read_dev_cntr(dd, C_DC_MC_XMIT_PKTS,
-                                               CNTR_INVALID_VL));
-       rsp->port_multicast_rcv_pkts =
-               cpu_to_be64(read_dev_cntr(dd, C_DC_MC_RCV_PKTS,
-                                               CNTR_INVALID_VL));
         rsp->port_xmit_wait =
                 cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT, CNTR_INVALID_VL));
         rsp->port_rcv_fecn =
                 cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN, CNTR_INVALID_VL));
         rsp->port_rcv_becn =
                 cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN, CNTR_INVALID_VL));
- 
         rsp->port_error_counter_summary =
                 cpu_to_be64(get_error_counter_summary(ibdev, port,
                                                       res_lli, res_ler));
   
-       vlinfo = &(rsp->vls[0]);
+       vlinfo = &rsp->vls[0];
         vfi = 0;
         /* The vl_select_mask has been checked above, and we know
          * that it contains only entries which represent valid VLs.
@@@ -2630,44 -2661,45 +2661,45 @@@
          * any additional checks for vl.
          */
         for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
-                8 * sizeof(req->vl_select_mask)) {
+                        8 * sizeof(req->vl_select_mask)) {
                 memset(vlinfo, 0, sizeof(*vlinfo));
   
                 rsp->vls[vfi].port_vl_xmit_data =
                         cpu_to_be64(read_port_cntr(ppd, C_TX_FLIT_VL,
-                                                       idx_from_vl(vl)));
+                                                  idx_from_vl(vl)));
   
                 rsp->vls[vfi].port_vl_rcv_data =
                         cpu_to_be64(read_dev_cntr(dd, C_DC_RX_FLIT_VL,
-                                                       idx_from_vl(vl)));
+                                                 idx_from_vl(vl)));
   
                 rsp->vls[vfi].port_vl_xmit_pkts =
                         cpu_to_be64(read_port_cntr(ppd, C_TX_PKT_VL,
-                                                       idx_from_vl(vl)));
+                                                  idx_from_vl(vl)));
   
                 rsp->vls[vfi].port_vl_rcv_pkts =
                         cpu_to_be64(read_dev_cntr(dd, C_DC_RX_PKT_VL,
-                                                       idx_from_vl(vl)));
+                                                 idx_from_vl(vl)));
   
                 rsp->vls[vfi].port_vl_xmit_wait =
                         cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT_VL,
-                                                       idx_from_vl(vl)));
+                                                  idx_from_vl(vl)));
   
                 rsp->vls[vfi].port_vl_rcv_fecn =
                         cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN_VL,
-                                                       idx_from_vl(vl)));
+                                                 idx_from_vl(vl)));
                 rsp->vls[vfi].port_vl_rcv_becn =
                         cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN_VL,
-                                                       idx_from_vl(vl)));
+                                                 idx_from_vl(vl)));
   
                 /* rsp->port_vl_xmit_time_cong is 0 for HFIs */
                 /* rsp->port_vl_xmit_wasted_bw ??? */
                 /* port_vl_xmit_wait_data - TXE (table 13-9 HFI spec) ???
-                * does this differ from rsp->vls[vfi].port_vl_xmit_wait */
+                * does this differ from rsp->vls[vfi].port_vl_xmit_wait
+                */
                 /*rsp->vls[vfi].port_vl_mark_fecn =
-                       cpu_to_be64(read_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT
-                               + offset));
-               */
+                *      cpu_to_be64(read_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT
+                *              + offset));
+                */
                 vlinfo++;
                 vfi++;
         }
@@@ -2680,12 -2712,88 +2712,88 @@@
         return reply((struct ib_mad_hdr *)pmp);
   }
   
+ static int pma_get_ib_portcounters_ext(struct ib_pma_mad *pmp,
+                                      struct ib_device *ibdev, u8 port)
+ {
+       struct ib_pma_portcounters_ext *p = (struct ib_pma_portcounters_ext *)
+                                               pmp->data;
+       struct _port_dctrs rsp;
+ 
+       if (pmp->mad_hdr.attr_mod != 0 || p->port_select != port) {
+               pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
+               goto bail;
+       }
+ 
+       memset(&rsp, 0, sizeof(rsp));
+       pma_get_opa_port_dctrs(ibdev, &rsp);
+ 
+       p->port_xmit_data = rsp.port_xmit_data;
+       p->port_rcv_data = rsp.port_rcv_data;
+       p->port_xmit_packets = rsp.port_xmit_pkts;
+       p->port_rcv_packets = rsp.port_rcv_pkts;
+       p->port_unicast_xmit_packets = 0;
+       p->port_unicast_rcv_packets =  0;
+       p->port_multicast_xmit_packets = rsp.port_multicast_xmit_pkts;
+       p->port_multicast_rcv_packets = rsp.port_multicast_rcv_pkts;
+ 
+ bail:
+       return reply((struct ib_mad_hdr *)pmp);
+ }
+ 
+ static void pma_get_opa_port_ectrs(struct ib_device *ibdev,
+                                  struct _port_ectrs *rsp, u8 port)
+ {
+       u64 tmp, tmp2;
+       struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
+       struct hfi1_ibport *ibp = to_iport(ibdev, port);
+       struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
+ 
+       tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
+       tmp2 = tmp + read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
+                                       CNTR_INVALID_VL);
+       if (tmp2 > (u32)UINT_MAX || tmp2 < tmp) {
+               /* overflow/wrapped */
+               rsp->link_error_recovery = cpu_to_be32(~0);
+       } else {
+               rsp->link_error_recovery = cpu_to_be32(tmp2);
+       }
+ 
+       rsp->link_downed = cpu_to_be32(read_port_cntr(ppd, C_SW_LINK_DOWN,
+                                               CNTR_INVALID_VL));
+       rsp->port_rcv_errors =
+               cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL));
+       rsp->port_rcv_remote_physical_errors =
+               cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
+                                         CNTR_INVALID_VL));
+       rsp->port_rcv_switch_relay_errors = 0;
+       rsp->port_xmit_discards =
+               cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD,
+                                          CNTR_INVALID_VL));
+       rsp->port_xmit_constraint_errors =
+               cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
+                                          CNTR_INVALID_VL));
+       rsp->port_rcv_constraint_errors =
+               cpu_to_be64(read_port_cntr(ppd, C_SW_RCV_CSTR_ERR,
+                                          CNTR_INVALID_VL));
+       tmp = read_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL);
+       tmp2 = tmp + read_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL);
+       if (tmp2 < tmp) {
+               /* overflow/wrapped */
+               rsp->local_link_integrity_errors = cpu_to_be64(~0);
+       } else {
+               rsp->local_link_integrity_errors = cpu_to_be64(tmp2);
+       }
+       rsp->excessive_buffer_overruns =
+               cpu_to_be64(read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL));
+ }
+ 
   static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
-                       struct ib_device *ibdev, u8 port, u32 *resp_len)
+                                 struct ib_device *ibdev,
+                                 u8 port, u32 *resp_len)
   {
         size_t response_data_size;
         struct _port_ectrs *rsp;
-       unsigned long port_num;
+       u8 port_num;
         struct opa_port_error_counters64_msg *req;
         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
         u32 num_ports;
@@@ -2695,7 -2803,7 +2803,7 @@@
         struct hfi1_pportdata *ppd;
         struct _vls_ectrs *vlinfo;
         unsigned long vl;
-       u64 port_mask, tmp, tmp2;
+       u64 port_mask, tmp;
         u32 vl_select_mask;
         int vfi;
   
@@@ -2724,62 -2832,34 +2832,34 @@@
          */
         port_mask = be64_to_cpu(req->port_select_mask[3]);
         port_num = find_first_bit((unsigned long *)&port_mask,
-                                       sizeof(port_mask));
+                                 sizeof(port_mask));
   
-       if ((u8)port_num != port) {
+       if (port_num != port) {
                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
                 return reply((struct ib_mad_hdr *)pmp);
         }
   
- -      rsp = (struct _port_ectrs *)&req->port[0];
+ +      rsp = &req->port[0];
   
         ibp = to_iport(ibdev, port_num);
         ppd = ppd_from_ibp(ibp);
   
         memset(rsp, 0, sizeof(*rsp));
-       rsp->port_number = (u8)port_num;
+       rsp->port_number = port_num;
+ 
+       pma_get_opa_port_ectrs(ibdev, rsp, port_num);
   
-       rsp->port_rcv_constraint_errors =
-               cpu_to_be64(read_port_cntr(ppd, C_SW_RCV_CSTR_ERR,
-                                          CNTR_INVALID_VL));
-       /* port_rcv_switch_relay_errors is 0 for HFIs */
-       rsp->port_xmit_discards =
-               cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD,
-                                               CNTR_INVALID_VL));
         rsp->port_rcv_remote_physical_errors =
                 cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
-                                               CNTR_INVALID_VL));
-       tmp = read_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL);
-       tmp2 = tmp + read_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL);
-       if (tmp2 < tmp) {
-               /* overflow/wrapped */
-               rsp->local_link_integrity_errors = cpu_to_be64(~0);
-       } else {
-               rsp->local_link_integrity_errors = cpu_to_be64(tmp2);
-       }
-       tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
-       tmp2 = tmp + read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
-                                       CNTR_INVALID_VL);
-       if (tmp2 > (u32)UINT_MAX || tmp2 < tmp) {
-               /* overflow/wrapped */
-               rsp->link_error_recovery = cpu_to_be32(~0);
-       } else {
-               rsp->link_error_recovery = cpu_to_be32(tmp2);
-       }
-       rsp->port_xmit_constraint_errors =
-               cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
-                                          CNTR_INVALID_VL));
-       rsp->excessive_buffer_overruns =
-               cpu_to_be64(read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL));
+                                         CNTR_INVALID_VL));
         rsp->fm_config_errors =
                 cpu_to_be64(read_dev_cntr(dd, C_DC_FM_CFG_ERR,
-                                               CNTR_INVALID_VL));
-       rsp->link_downed = cpu_to_be32(read_port_cntr(ppd, C_SW_LINK_DOWN,
-                                               CNTR_INVALID_VL));
+                                         CNTR_INVALID_VL));
         tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL);
+ 
         rsp->uncorrectable_errors = tmp < 0x100 ? (tmp & 0xff) : 0xff;
   
- -      vlinfo = (struct _vls_ectrs *)&rsp->vls[0];
+ +      vlinfo = &rsp->vls[0];
         vfi = 0;
         vl_select_mask = be32_to_cpu(req->vl_select_mask);
         for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
@@@ -2796,8 -2876,94 +2876,94 @@@
         return reply((struct ib_mad_hdr *)pmp);
   }
   
+ static int pma_get_ib_portcounters(struct ib_pma_mad *pmp,
+                                  struct ib_device *ibdev, u8 port)
+ {
+       struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
+               pmp->data;
+       struct _port_ectrs rsp;
+       u64 temp_link_overrun_errors;
+       u64 temp_64;
+       u32 temp_32;
+ 
+       memset(&rsp, 0, sizeof(rsp));
+       pma_get_opa_port_ectrs(ibdev, &rsp, port);
+ 
+       if (pmp->mad_hdr.attr_mod != 0 || p->port_select != port) {
+               pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
+               goto bail;
+       }
+ 
+       p->symbol_error_counter = 0; /* N/A for OPA */
+ 
+       temp_32 = be32_to_cpu(rsp.link_error_recovery);
+       if (temp_32 > 0xFFUL)
+               p->link_error_recovery_counter = 0xFF;
+       else
+               p->link_error_recovery_counter = (u8)temp_32;
+ 
+       temp_32 = be32_to_cpu(rsp.link_downed);
+       if (temp_32 > 0xFFUL)
+               p->link_downed_counter = 0xFF;
+       else
+               p->link_downed_counter = (u8)temp_32;
+ 
+       temp_64 = be64_to_cpu(rsp.port_rcv_errors);
+       if (temp_64 > 0xFFFFUL)
+               p->port_rcv_errors = cpu_to_be16(0xFFFF);
+       else
+               p->port_rcv_errors = cpu_to_be16((u16)temp_64);
+ 
+       temp_64 = be64_to_cpu(rsp.port_rcv_remote_physical_errors);
+       if (temp_64 > 0xFFFFUL)
+               p->port_rcv_remphys_errors = cpu_to_be16(0xFFFF);
+       else
+               p->port_rcv_remphys_errors = cpu_to_be16((u16)temp_64);
+ 
+       temp_64 = be64_to_cpu(rsp.port_rcv_switch_relay_errors);
+       p->port_rcv_switch_relay_errors = cpu_to_be16((u16)temp_64);
+ 
+       temp_64 = be64_to_cpu(rsp.port_xmit_discards);
+       if (temp_64 > 0xFFFFUL)
+               p->port_xmit_discards = cpu_to_be16(0xFFFF);
+       else
+               p->port_xmit_discards = cpu_to_be16((u16)temp_64);
+ 
+       temp_64 = be64_to_cpu(rsp.port_xmit_constraint_errors);
+       if (temp_64 > 0xFFUL)
+               p->port_xmit_constraint_errors = 0xFF;
+       else
+               p->port_xmit_constraint_errors = (u8)temp_64;
+ 
+       temp_64 = be64_to_cpu(rsp.port_rcv_constraint_errors);
+       if (temp_64 > 0xFFUL)
+               p->port_rcv_constraint_errors = 0xFFUL;
+       else
+               p->port_rcv_constraint_errors = (u8)temp_64;
+ 
+       /* LocalLink: 7:4, BufferOverrun: 3:0 */
+       temp_64 = be64_to_cpu(rsp.local_link_integrity_errors);
+       if (temp_64 > 0xFUL)
+               temp_64 = 0xFUL;
+ 
+       temp_link_overrun_errors = temp_64 << 4;
+ 
+       temp_64 = be64_to_cpu(rsp.excessive_buffer_overruns);
+       if (temp_64 > 0xFUL)
+               temp_64 = 0xFUL;
+       temp_link_overrun_errors |= temp_64;
+ 
+       p->link_overrun_errors = (u8)temp_link_overrun_errors;
+ 
+       p->vl15_dropped = 0; /* N/A for OPA */
+ 
+ bail:
+       return reply((struct ib_mad_hdr *)pmp);
+ }
+ 
   static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp,
-                       struct ib_device *ibdev, u8 port, u32 *resp_len)
+                                struct ib_device *ibdev,
+                                u8 port, u32 *resp_len)
   {
         size_t response_data_size;
         struct _port_ei *rsp;
@@@ -2805,12 -2971,12 +2971,12 @@@
         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
         u64 port_mask;
         u32 num_ports;
-       unsigned long port_num;
+       u8 port_num;
         u8 num_pslm;
         u64 reg;
   
         req = (struct opa_port_error_info_msg *)pmp->data;
- -      rsp = (struct _port_ei *)&req->port[0];
+ +      rsp = &req->port[0];
   
         num_ports = OPA_AM_NPORT(be32_to_cpu(pmp->mad_hdr.attr_mod));
         num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
@@@ -2838,7 -3004,7 +3004,7 @@@
         port_num = find_first_bit((unsigned long *)&port_mask,
                                   sizeof(port_mask));
   
-       if ((u8)port_num != port) {
+       if (port_num != port) {
                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
                 return reply((struct ib_mad_hdr *)pmp);
         }
@@@ -2847,15 -3013,17 +3013,17 @@@
         rsp->port_rcv_ei.status_and_code =
                 dd->err_info_rcvport.status_and_code;
         memcpy(&rsp->port_rcv_ei.ei.ei1to12.packet_flit1,
-               &dd->err_info_rcvport.packet_flit1, sizeof(u64));
+              &dd->err_info_rcvport.packet_flit1, sizeof(u64));
         memcpy(&rsp->port_rcv_ei.ei.ei1to12.packet_flit2,
-               &dd->err_info_rcvport.packet_flit2, sizeof(u64));
+              &dd->err_info_rcvport.packet_flit2, sizeof(u64));
   
         /* ExcessiverBufferOverrunInfo */
         reg = read_csr(dd, RCV_ERR_INFO);
         if (reg & RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK) {
-               /* if the RcvExcessBufferOverrun bit is set, save SC of
-                * first pkt that encountered an excess buffer overrun */
+               /*
+                * if the RcvExcessBufferOverrun bit is set, save SC of
+                * first pkt that encountered an excess buffer overrun
+                */
                 u8 tmp = (u8)reg;
   
                 tmp &=  RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SC_SMASK;
@@@ -2892,7 -3060,8 +3060,8 @@@
   }
   
   static int pma_set_opa_portstatus(struct opa_pma_mad *pmp,
-                       struct ib_device *ibdev, u8 port, u32 *resp_len)
+                                 struct ib_device *ibdev,
+                                 u8 port, u32 *resp_len)
   {
         struct opa_clear_port_status *req =
                 (struct opa_clear_port_status *)pmp->data;
@@@ -2951,8 -3120,9 +3120,9 @@@
                 write_dev_cntr(dd, C_DC_RCV_BBL, CNTR_INVALID_VL, 0);
   
         /* Only applicable for switch */
-       /*if (counter_select & CS_PORT_MARK_FECN)
-               write_csr(dd, DCC_PRF_PORT_MARK_FECN_CNT, 0);*/
+       /* if (counter_select & CS_PORT_MARK_FECN)
+        *      write_csr(dd, DCC_PRF_PORT_MARK_FECN_CNT, 0);
+        */
   
         if (counter_select & CS_PORT_RCV_CONSTRAINT_ERRORS)
                 write_port_cntr(ppd, C_SW_RCV_CSTR_ERR, CNTR_INVALID_VL, 0);
@@@ -2975,7 -3145,7 +3145,7 @@@
         if (counter_select & CS_LINK_ERROR_RECOVERY) {
                 write_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL, 0);
                 write_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
-                                               CNTR_INVALID_VL, 0);
+                              CNTR_INVALID_VL, 0);
         }
   
         if (counter_select & CS_PORT_RCV_ERRORS)
@@@ -2997,7 -3167,6 +3167,6 @@@
   
         for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
                          8 * sizeof(vl_select_mask)) {
- 
                 if (counter_select & CS_PORT_XMIT_DATA)
                         write_port_cntr(ppd, C_TX_FLIT_VL, idx_from_vl(vl), 0);
   
@@@ -3026,9 -3195,9 +3195,9 @@@
                 if (counter_select & CS_PORT_RCV_BUBBLE)
                         write_dev_cntr(dd, C_DC_RCV_BBL_VL, idx_from_vl(vl), 0);
   
-               /*if (counter_select & CS_PORT_MARK_FECN)
-                    write_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT + offset, 0);
-               */
+               /* if (counter_select & CS_PORT_MARK_FECN)
+                *     write_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT + offset, 0);
+                */
                 /* port_vl_xmit_discards ??? */
         }
   
@@@ -3039,19 -3208,20 +3208,20 @@@
   }
   
   static int pma_set_opa_errorinfo(struct opa_pma_mad *pmp,
-                       struct ib_device *ibdev, u8 port, u32 *resp_len)
+                                struct ib_device *ibdev,
+                                u8 port, u32 *resp_len)
   {
         struct _port_ei *rsp;
         struct opa_port_error_info_msg *req;
         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
         u64 port_mask;
         u32 num_ports;
-       unsigned long port_num;
+       u8 port_num;
         u8 num_pslm;
         u32 error_info_select;
   
         req = (struct opa_port_error_info_msg *)pmp->data;
- -      rsp = (struct _port_ei *)&req->port[0];
+ +      rsp = &req->port[0];
   
         num_ports = OPA_AM_NPORT(be32_to_cpu(pmp->mad_hdr.attr_mod));
         num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
@@@ -3071,7 -3241,7 +3241,7 @@@
         port_num = find_first_bit((unsigned long *)&port_mask,
                                   sizeof(port_mask));
   
-       if ((u8)port_num != port) {
+       if (port_num != port) {
                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
                 return reply((struct ib_mad_hdr *)pmp);
         }
@@@ -3085,8 -3255,10 +3255,10 @@@
   
         /* ExcessiverBufferOverrunInfo */
         if (error_info_select & ES_EXCESSIVE_BUFFER_OVERRUN_INFO)
-               /* status bit is essentially kept in the h/w - bit 5 of
-                * RCV_ERR_INFO */
+               /*
+                * status bit is essentially kept in the h/w - bit 5 of
+                * RCV_ERR_INFO
+                */
                 write_csr(dd, RCV_ERR_INFO,
                           RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK);
   
@@@ -3138,13 -3310,12 +3310,12 @@@ static int __subn_get_opa_cong_info(str
   }
   
   static int __subn_get_opa_cong_setting(struct opa_smp *smp, u32 am,
-                                            u8 *data,
-                                            struct ib_device *ibdev,
-                                            u8 port, u32 *resp_len)
+                                      u8 *data, struct ib_device *ibdev,
+                                      u8 port, u32 *resp_len)
   {
         int i;
         struct opa_congestion_setting_attr *p =
-               (struct opa_congestion_setting_attr *) data;
+               (struct opa_congestion_setting_attr *)data;
         struct hfi1_ibport *ibp = to_iport(ibdev, port);
         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
         struct opa_congestion_setting_entry_shadow *entries;
@@@ -3154,7 -3325,7 +3325,7 @@@
   
         cc_state = get_cc_state(ppd);
   
-       if (cc_state == NULL) {
+       if (!cc_state) {
                 rcu_read_unlock();
                 return reply((struct ib_mad_hdr *)smp);
         }
@@@ -3183,7 -3354,7 +3354,7 @@@ static int __subn_set_opa_cong_setting(
                                        u32 *resp_len)
   {
         struct opa_congestion_setting_attr *p =
-               (struct opa_congestion_setting_attr *) data;
+               (struct opa_congestion_setting_attr *)data;
         struct hfi1_ibport *ibp = to_iport(ibdev, port);
         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
         struct opa_congestion_setting_entry_shadow *entries;
@@@ -3245,7 -3416,7 +3416,7 @@@ static int __subn_get_opa_hfi1_cong_log
                         continue;
                 memcpy(cong_log->events[i].local_qp_cn_entry, &cce->lqpn, 3);
                 memcpy(cong_log->events[i].remote_qp_number_cn_entry,
-                       &cce->rqpn, 3);
+                      &cce->rqpn, 3);
                 cong_log->events[i].sl_svc_type_cn_entry =
                         ((cce->sl & 0x1f) << 3) | (cce->svc_type & 0x7);
                 cong_log->events[i].remote_lid_cn_entry =
@@@ -3275,7 -3446,7 +3446,7 @@@ static int __subn_get_opa_cc_table(stru
                                    u32 *resp_len)
   {
         struct ib_cc_table_attr *cc_table_attr =
-               (struct ib_cc_table_attr *) data;
+               (struct ib_cc_table_attr *)data;
         struct hfi1_ibport *ibp = to_iport(ibdev, port);
         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
         u32 start_block = OPA_AM_START_BLK(am);
@@@ -3296,7 -3467,7 +3467,7 @@@
   
         cc_state = get_cc_state(ppd);
   
-       if (cc_state == NULL) {
+       if (!cc_state) {
                 rcu_read_unlock();
                 return reply((struct ib_mad_hdr *)smp);
         }
@@@ -3316,7 -3487,7 +3487,7 @@@
         rcu_read_unlock();
   
         if (resp_len)
-               *resp_len += sizeof(u16)*(IB_CCT_ENTRIES * n_blocks + 1);
+               *resp_len += sizeof(u16) * (IB_CCT_ENTRIES * n_blocks + 1);
   
         return reply((struct ib_mad_hdr *)smp);
   }
@@@ -3332,7 -3503,7 +3503,7 @@@ static int __subn_set_opa_cc_table(stru
                                    struct ib_device *ibdev, u8 port,
                                    u32 *resp_len)
   {
-       struct ib_cc_table_attr *p = (struct ib_cc_table_attr *) data;
+       struct ib_cc_table_attr *p = (struct ib_cc_table_attr *)data;
         struct hfi1_ibport *ibp = to_iport(ibdev, port);
         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
         u32 start_block = OPA_AM_START_BLK(am);
@@@ -3362,14 -3533,14 +3533,14 @@@
         }
   
         new_cc_state = kzalloc(sizeof(*new_cc_state), GFP_KERNEL);
-       if (new_cc_state == NULL)
+       if (!new_cc_state)
                 goto getit;
   
         spin_lock(&ppd->cc_state_lock);
   
         old_cc_state = get_cc_state(ppd);
   
-       if (old_cc_state == NULL) {
+       if (!old_cc_state) {
                 spin_unlock(&ppd->cc_state_lock);
                 kfree(new_cc_state);
                 return reply((struct ib_mad_hdr *)smp);
@@@ -3409,26 -3580,31 +3580,31 @@@ struct opa_led_info 
   };
   
   #define OPA_LED_SHIFT 31
- #define OPA_LED_MASK  (1 << OPA_LED_SHIFT)
+ #define OPA_LED_MASK  BIT(OPA_LED_SHIFT)
   
   static int __subn_get_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
                                    struct ib_device *ibdev, u8 port,
                                    u32 *resp_len)
   {
         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
-       struct opa_led_info *p = (struct opa_led_info *) data;
+       struct hfi1_pportdata *ppd = dd->pport;
+       struct opa_led_info *p = (struct opa_led_info *)data;
         u32 nport = OPA_AM_NPORT(am);
-       u64 reg;
+       u32 is_beaconing_active;
   
         if (nport != 1) {
                 smp->status |= IB_SMP_INVALID_FIELD;
                 return reply((struct ib_mad_hdr *)smp);
         }
   
-       reg = read_csr(dd, DCC_CFG_LED_CNTRL);
-       if ((reg & DCC_CFG_LED_CNTRL_LED_CNTRL_SMASK) &&
-               ((reg & DCC_CFG_LED_CNTRL_LED_SW_BLINK_RATE_SMASK) == 0xf))
-                       p->rsvd_led_mask = cpu_to_be32(OPA_LED_MASK);
+       /*
+        * This pairs with the memory barrier in hfi1_start_led_override to
+        * ensure that we read the correct state of LED beaconing represented
+        * by led_override_timer_active
+        */
+       smp_rmb();
+       is_beaconing_active = !!atomic_read(&ppd->led_override_timer_active);
+       p->rsvd_led_mask = cpu_to_be32(is_beaconing_active << OPA_LED_SHIFT);
   
         if (resp_len)
                 *resp_len += sizeof(struct opa_led_info);
@@@ -3441,7 -3617,7 +3617,7 @@@ static int __subn_set_opa_led_info(stru
                                    u32 *resp_len)
   {
         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
-       struct opa_led_info *p = (struct opa_led_info *) data;
+       struct opa_led_info *p = (struct opa_led_info *)data;
         u32 nport = OPA_AM_NPORT(am);
         int on = !!(be32_to_cpu(p->rsvd_led_mask) & OPA_LED_MASK);
   
@@@ -3450,7 -3626,10 +3626,10 @@@
                 return reply((struct ib_mad_hdr *)smp);
         }
   
-       setextled(dd, on);
+       if (on)
+               hfi1_start_led_override(dd->pport, 2000, 1500);
+       else
+               shutdown_led_override(dd->pport);
   
         return __subn_get_opa_led_info(smp, am, data, ibdev, port, resp_len);
   }
@@@ -3493,7 -3672,7 +3672,7 @@@ static int subn_get_opa_sma(__be16 attr
                 break;
         case OPA_ATTRIB_ID_SC_TO_VLNT_MAP:
                 ret = __subn_get_opa_sc_to_vlnt(smp, am, data, ibdev, port,
-                                              resp_len);
+                                               resp_len);
                 break;
         case OPA_ATTRIB_ID_PORT_STATE_INFO:
                 ret = __subn_get_opa_psi(smp, am, data, ibdev, port,
@@@ -3532,9 -3711,9 +3711,9 @@@
                                               resp_len);
                 break;
         case IB_SMP_ATTR_SM_INFO:
-               if (ibp->port_cap_flags & IB_PORT_SM_DISABLED)
+               if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED)
                         return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
-               if (ibp->port_cap_flags & IB_PORT_SM)
+               if (ibp->rvp.port_cap_flags & IB_PORT_SM)
                         return IB_MAD_RESULT_SUCCESS;
                 /* FALLTHROUGH */
         default:
@@@ -3575,7 -3754,7 +3754,7 @@@ static int subn_set_opa_sma(__be16 attr
                 break;
         case OPA_ATTRIB_ID_SC_TO_VLNT_MAP:
                 ret = __subn_set_opa_sc_to_vlnt(smp, am, data, ibdev, port,
-                                              resp_len);
+                                               resp_len);
                 break;
         case OPA_ATTRIB_ID_PORT_STATE_INFO:
                 ret = __subn_set_opa_psi(smp, am, data, ibdev, port,
@@@ -3602,9 -3781,9 +3781,9 @@@
                                               resp_len);
                 break;
         case IB_SMP_ATTR_SM_INFO:
-               if (ibp->port_cap_flags & IB_PORT_SM_DISABLED)
+               if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED)
                         return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
-               if (ibp->port_cap_flags & IB_PORT_SM)
+               if (ibp->rvp.port_cap_flags & IB_PORT_SM)
                         return IB_MAD_RESULT_SUCCESS;
                 /* FALLTHROUGH */
         default:
@@@ -3654,14 -3833,13 +3833,13 @@@ static int subn_get_opa_aggregate(struc
                 /* zero the payload for this segment */
                 memset(next_smp + sizeof(*agg), 0, agg_data_len);
   
-               (void) subn_get_opa_sma(agg->attr_id, smp, am, agg->data,
+               (void)subn_get_opa_sma(agg->attr_id, smp, am, agg->data,
                                         ibdev, port, NULL);
                 if (smp->status & ~IB_SMP_DIRECTION) {
                         set_aggr_error(agg);
                         return reply((struct ib_mad_hdr *)smp);
                 }
                 next_smp += agg_size;
- 
         }
   
         return reply((struct ib_mad_hdr *)smp);
@@@ -3698,14 -3876,13 +3876,13 @@@ static int subn_set_opa_aggregate(struc
                         return reply((struct ib_mad_hdr *)smp);
                 }
   
-               (void) subn_set_opa_sma(agg->attr_id, smp, am, agg->data,
+               (void)subn_set_opa_sma(agg->attr_id, smp, am, agg->data,
                                         ibdev, port, NULL);
                 if (smp->status & ~IB_SMP_DIRECTION) {
                         set_aggr_error(agg);
                         return reply((struct ib_mad_hdr *)smp);
                 }
                 next_smp += agg_size;
- 
         }
   
         return reply((struct ib_mad_hdr *)smp);
@@@ -3823,7 -4000,7 +4000,7 @@@ static int process_subn_opa(struct ib_d
         if (smp->class_version != OPA_SMI_CLASS_VERSION) {
                 smp->status |= IB_SMP_UNSUP_VERSION;
                 ret = reply((struct ib_mad_hdr *)smp);
-               goto bail;
+               return ret;
         }
         ret = check_mkey(ibp, (struct ib_mad_hdr *)smp, mad_flags, smp->mkey,
                          smp->route.dr.dr_slid, smp->route.dr.return_path,
@@@ -3843,13 -4020,13 +4020,13 @@@
                      smp->method == IB_MGMT_METHOD_SET) &&
                     port_num && port_num <= ibdev->phys_port_cnt &&
                     port != port_num)
-                       (void) check_mkey(to_iport(ibdev, port_num),
+                       (void)check_mkey(to_iport(ibdev, port_num),
                                           (struct ib_mad_hdr *)smp, 0,
                                           smp->mkey, smp->route.dr.dr_slid,
                                           smp->route.dr.return_path,
                                           smp->hop_cnt);
                 ret = IB_MAD_RESULT_FAILURE;
-               goto bail;
+               return ret;
         }
   
         *resp_len = opa_get_smp_header_size(smp);
@@@ -3861,23 -4038,25 +4038,25 @@@
                         clear_opa_smp_data(smp);
                         ret = subn_get_opa_sma(attr_id, smp, am, data,
                                                ibdev, port, resp_len);
-                       goto bail;
+                       break;
                 case OPA_ATTRIB_ID_AGGREGATE:
                         ret = subn_get_opa_aggregate(smp, ibdev, port,
                                                      resp_len);
-                       goto bail;
+                       break;
                 }
+               break;
         case IB_MGMT_METHOD_SET:
                 switch (attr_id) {
                 default:
                         ret = subn_set_opa_sma(attr_id, smp, am, data,
                                                ibdev, port, resp_len);
-                       goto bail;
+                       break;
                 case OPA_ATTRIB_ID_AGGREGATE:
                         ret = subn_set_opa_aggregate(smp, ibdev, port,
                                                      resp_len);
-                       goto bail;
+                       break;
                 }
+               break;
         case IB_MGMT_METHOD_TRAP:
         case IB_MGMT_METHOD_REPORT:
         case IB_MGMT_METHOD_REPORT_RESP:
@@@ -3888,13 -4067,13 +4067,13 @@@
                  * Just tell the caller to process it normally.
                  */
                 ret = IB_MAD_RESULT_SUCCESS;
-               goto bail;
+               break;
         default:
                 smp->status |= IB_SMP_UNSUP_METHOD;
                 ret = reply((struct ib_mad_hdr *)smp);
+               break;
         }
   
- bail:
         return ret;
   }
   
@@@ -3910,7 -4089,7 +4089,7 @@@ static int process_subn(struct ib_devic
         if (smp->class_version != 1) {
                 smp->status |= IB_SMP_UNSUP_VERSION;
                 ret = reply((struct ib_mad_hdr *)smp);
-               goto bail;
+               return ret;
         }
   
         ret = check_mkey(ibp, (struct ib_mad_hdr *)smp, mad_flags,
@@@ -3931,13 -4110,13 +4110,13 @@@
                      smp->method == IB_MGMT_METHOD_SET) &&
                     port_num && port_num <= ibdev->phys_port_cnt &&
                     port != port_num)
-                       (void) check_mkey(to_iport(ibdev, port_num),
-                                         (struct ib_mad_hdr *)smp, 0,
-                                         smp->mkey,
-                                         (__force __be32)smp->dr_slid,
-                                         smp->return_path, smp->hop_cnt);
+                       (void)check_mkey(to_iport(ibdev, port_num),
+                                        (struct ib_mad_hdr *)smp, 0,
+                                        smp->mkey,
+                                        (__force __be32)smp->dr_slid,
+                                        smp->return_path, smp->hop_cnt);
                 ret = IB_MAD_RESULT_FAILURE;
-               goto bail;
+               return ret;
         }
   
         switch (smp->method) {
@@@ -3945,15 -4124,77 +4124,77 @@@
                 switch (smp->attr_id) {
                 case IB_SMP_ATTR_NODE_INFO:
                         ret = subn_get_nodeinfo(smp, ibdev, port);
-                       goto bail;
+                       break;
                 default:
                         smp->status |= IB_SMP_UNSUP_METH_ATTR;
                         ret = reply((struct ib_mad_hdr *)smp);
-                       goto bail;
+                       break;
                 }
+               break;
+       }
+ 
+       return ret;
+ }
+ 
+ static int process_perf(struct ib_device *ibdev, u8 port,
+                       const struct ib_mad *in_mad,
+                       struct ib_mad *out_mad)
+ {
+       struct ib_pma_mad *pmp = (struct ib_pma_mad *)out_mad;
+       struct ib_class_port_info *cpi = (struct ib_class_port_info *)
+                                               &pmp->data;
+       int ret = IB_MAD_RESULT_FAILURE;
+ 
+       *out_mad = *in_mad;
+       if (pmp->mad_hdr.class_version != 1) {
+               pmp->mad_hdr.status |= IB_SMP_UNSUP_VERSION;
+               ret = reply((struct ib_mad_hdr *)pmp);
+               return ret;
+       }
+ 
+       switch (pmp->mad_hdr.method) {
+       case IB_MGMT_METHOD_GET:
+               switch (pmp->mad_hdr.attr_id) {
+               case IB_PMA_PORT_COUNTERS:
+                       ret = pma_get_ib_portcounters(pmp, ibdev, port);
+                       break;
+               case IB_PMA_PORT_COUNTERS_EXT:
+                       ret = pma_get_ib_portcounters_ext(pmp, ibdev, port);
+                       break;
+               case IB_PMA_CLASS_PORT_INFO:
+                       cpi->capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH;
+                       ret = reply((struct ib_mad_hdr *)pmp);
+                       break;
+               default:
+                       pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
+                       ret = reply((struct ib_mad_hdr *)pmp);
+                       break;
+               }
+               break;
+ 
+       case IB_MGMT_METHOD_SET:
+               if (pmp->mad_hdr.attr_id) {
+                       pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
+                       ret = reply((struct ib_mad_hdr *)pmp);
+               }
+               break;
+ 
+       case IB_MGMT_METHOD_TRAP:
+       case IB_MGMT_METHOD_GET_RESP:
+               /*
+                * The ib_mad module will call us to process responses
+                * before checking for other consumers.
+                * Just tell the caller to process it normally.
+                */
+               ret = IB_MAD_RESULT_SUCCESS;
+               break;
+ 
+       default:
+               pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD;
+               ret = reply((struct ib_mad_hdr *)pmp);
+               break;
         }
   
- bail:
         return ret;
   }
   
@@@ -3978,44 -4219,46 +4219,46 @@@ static int process_perf_opa(struct ib_d
                 switch (pmp->mad_hdr.attr_id) {
                 case IB_PMA_CLASS_PORT_INFO:
                         ret = pma_get_opa_classportinfo(pmp, ibdev, resp_len);
-                       goto bail;
+                       break;
                 case OPA_PM_ATTRIB_ID_PORT_STATUS:
                         ret = pma_get_opa_portstatus(pmp, ibdev, port,
-                                                               resp_len);
-                       goto bail;
+                                                    resp_len);
+                       break;
                 case OPA_PM_ATTRIB_ID_DATA_PORT_COUNTERS:
                         ret = pma_get_opa_datacounters(pmp, ibdev, port,
-                                                               resp_len);
-                       goto bail;
+                                                      resp_len);
+                       break;
                 case OPA_PM_ATTRIB_ID_ERROR_PORT_COUNTERS:
                         ret = pma_get_opa_porterrors(pmp, ibdev, port,
-                                                               resp_len);
-                       goto bail;
+                                                    resp_len);
+                       break;
                 case OPA_PM_ATTRIB_ID_ERROR_INFO:
                         ret = pma_get_opa_errorinfo(pmp, ibdev, port,
-                                                               resp_len);
-                       goto bail;
+                                                   resp_len);
+                       break;
                 default:
                         pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
                         ret = reply((struct ib_mad_hdr *)pmp);
-                       goto bail;
+                       break;
                 }
+               break;
   
         case IB_MGMT_METHOD_SET:
                 switch (pmp->mad_hdr.attr_id) {
                 case OPA_PM_ATTRIB_ID_CLEAR_PORT_STATUS:
                         ret = pma_set_opa_portstatus(pmp, ibdev, port,
-                                                               resp_len);
-                       goto bail;
+                                                    resp_len);
+                       break;
                 case OPA_PM_ATTRIB_ID_ERROR_INFO:
                         ret = pma_set_opa_errorinfo(pmp, ibdev, port,
-                                                               resp_len);
-                       goto bail;
+                                                   resp_len);
+                       break;
                 default:
                         pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
                         ret = reply((struct ib_mad_hdr *)pmp);
-                       goto bail;
+                       break;
                 }
+               break;
   
         case IB_MGMT_METHOD_TRAP:
         case IB_MGMT_METHOD_GET_RESP:
@@@ -4025,14 -4268,14 +4268,14 @@@
                  * Just tell the caller to process it normally.
                  */
                 ret = IB_MAD_RESULT_SUCCESS;
-               goto bail;
+               break;
   
         default:
                 pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD;
                 ret = reply((struct ib_mad_hdr *)pmp);
+               break;
         }
   
- bail:
         return ret;
   }
   
@@@ -4097,12 -4340,15 +4340,15 @@@ static int hfi1_process_ib_mad(struct i
         case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
         case IB_MGMT_CLASS_SUBN_LID_ROUTED:
                 ret = process_subn(ibdev, mad_flags, port, in_mad, out_mad);
-               goto bail;
+               break;
+       case IB_MGMT_CLASS_PERF_MGMT:
+               ret = process_perf(ibdev, port, in_mad, out_mad);
+               break;
         default:
                 ret = IB_MAD_RESULT_SUCCESS;
+               break;
         }
   
- bail:
         return ret;
   }
   
@@@ -4154,66 -4400,3 +4400,3 @@@ int hfi1_process_mad(struct ib_device *
   
         return IB_MAD_RESULT_FAILURE;
   }
- 
- static void send_handler(struct ib_mad_agent *agent,
-                        struct ib_mad_send_wc *mad_send_wc)
- {
-       ib_free_send_mad(mad_send_wc->send_buf);
- }
- 
- int hfi1_create_agents(struct hfi1_ibdev *dev)
- {
-       struct hfi1_devdata *dd = dd_from_dev(dev);
-       struct ib_mad_agent *agent;
-       struct hfi1_ibport *ibp;
-       int p;
-       int ret;
- 
-       for (p = 0; p < dd->num_pports; p++) {
-               ibp = &dd->pport[p].ibport_data;
-               agent = ib_register_mad_agent(&dev->ibdev, p + 1, IB_QPT_SMI,
-                                             NULL, 0, send_handler,
-                                             NULL, NULL, 0);
-               if (IS_ERR(agent)) {
-                       ret = PTR_ERR(agent);
-                       goto err;
-               }
- 
-               ibp->send_agent = agent;
-       }
- 
-       return 0;
- 
- err:
-       for (p = 0; p < dd->num_pports; p++) {
-               ibp = &dd->pport[p].ibport_data;
-               if (ibp->send_agent) {
-                       agent = ibp->send_agent;
-                       ibp->send_agent = NULL;
-                       ib_unregister_mad_agent(agent);
-               }
-       }
- 
-       return ret;
- }
- 
- void hfi1_free_agents(struct hfi1_ibdev *dev)
- {
-       struct hfi1_devdata *dd = dd_from_dev(dev);
-       struct ib_mad_agent *agent;
-       struct hfi1_ibport *ibp;
-       int p;
- 
-       for (p = 0; p < dd->num_pports; p++) {
-               ibp = &dd->pport[p].ibport_data;
-               if (ibp->send_agent) {
-                       agent = ibp->send_agent;
-                       ibp->send_agent = NULL;
-                       ib_unregister_mad_agent(agent);
-               }
-               if (ibp->sm_ah) {
-                       ib_destroy_ah(&ibp->sm_ah->ibah);
-                       ibp->sm_ah = NULL;
-               }
-       }
- }
diff --combined drivers/staging/rdma/hfi1/pcie.c

index 47ca6314e3284dd6d67ef5d61013894fd22756ff,42a409f1644955a449dd58f08630aafcb02583e6..0bac21e6a658ca242b856910e06be7a91cbf7284
--- 1/drivers/staging/rdma/hfi1/pcie.c
--- 2/drivers/staging/rdma/hfi1/pcie.c
+++ b/drivers/staging/rdma/hfi1/pcie.c
@@@ -1,12 -1,11 +1,11 @@@
   /*
+  * Copyright(c) 2015, 2016 Intel Corporation.
    *
    * This file is provided under a dual BSD/GPLv2 license.  When using or
    * redistributing this file, you may do so under either license.
    *
    * GPL LICENSE SUMMARY
    *
-  * Copyright(c) 2015 Intel Corporation.
-  *
    * This program is free software; you can redistribute it and/or modify
    * it under the terms of version 2 of the GNU General Public License as
    * published by the Free Software Foundation.
@@@ -18,8 -17,6 +17,6 @@@
    *
    * BSD LICENSE
    *
-  * Copyright(c) 2015 Intel Corporation.
-  *
    * Redistribution and use in source and binary forms, with or without
    * modification, are permitted provided that the following conditions
    * are met:
@@@ -57,6 -54,7 +54,7 @@@
   
   #include "hfi.h"
   #include "chip_registers.h"
+ #include "aspm.h"
   
   /* link speed vector for Gen3 speed - not in Linux headers */
   #define GEN1_SPEED_VECTOR 0x1
@@@ -122,8 -120,9 +120,9 @@@ int hfi1_pcie_init(struct pci_dev *pdev
                         goto bail;
                 }
                 ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
-       } else
+       } else {
                 ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+       }
         if (ret) {
                 hfi1_early_err(&pdev->dev,
                                "Unable to set DMA consistent mask: %d\n", ret);
@@@ -131,13 -130,7 +130,7 @@@
         }
   
         pci_set_master(pdev);
-       ret = pci_enable_pcie_error_reporting(pdev);
-       if (ret) {
-               hfi1_early_err(&pdev->dev,
-                              "Unable to enable pcie error reporting: %d\n",
-                             ret);
-               ret = 0;
-       }
+       (void)pci_enable_pcie_error_reporting(pdev);
         goto done;
   
   bail:
@@@ -222,10 -215,9 +215,9 @@@ int hfi1_pcie_ddinit(struct hfi1_devdat
         pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL, &dd->pcie_devctl);
         pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKCTL, &dd->pcie_lnkctl);
         pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL2,
-                                                       &dd->pcie_devctl2);
+                                 &dd->pcie_devctl2);
         pci_read_config_dword(dd->pcidev, PCI_CFG_MSIX0, &dd->pci_msix0);
-       pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE1,
-                                                       &dd->pci_lnkctl3);
+       pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE1, &dd->pci_lnkctl3);
         pci_read_config_dword(dd->pcidev, PCIE_CFG_TPH2, &dd->pci_tph2);
   
         return 0;
@@@ -238,7 -230,7 +230,7 @@@
    */
   void hfi1_pcie_ddcleanup(struct hfi1_devdata *dd)
   {
-       u64 __iomem *base = (void __iomem *) dd->kregbase;
+       u64 __iomem *base = (void __iomem *)dd->kregbase;
   
         dd->flags &= ~HFI1_PRESENT;
         dd->kregbase = NULL;
@@@ -247,6 -239,8 +239,6 @@@
                 iounmap(dd->rcvarray_wc);
         if (dd->piobase)
                 iounmap(dd->piobase);
- -
- -      pci_set_drvdata(dd->pcidev, NULL);
   }
   
   /*
@@@ -274,7 -268,7 +266,7 @@@ void hfi1_pcie_flr(struct hfi1_devdata 
   
   clear:
         pcie_capability_set_word(dd->pcidev, PCI_EXP_DEVCTL,
-                                               PCI_EXP_DEVCTL_BCR_FLR);
+                                PCI_EXP_DEVCTL_BCR_FLR);
         /* PCIe spec requires the function to be back within 100ms */
         msleep(100);
   }
@@@ -287,9 -281,11 +279,11 @@@ static void msix_setup(struct hfi1_devd
         struct msix_entry *msix_entry;
         int i;
   
-       /* We can't pass hfi1_msix_entry array to msix_setup
+       /*
+        * We can't pass hfi1_msix_entry array to msix_setup
          * so use a dummy msix_entry array and copy the allocated
-        * irq back to the hfi1_msix_entry array. */
+        * irq back to the hfi1_msix_entry array.
+        */
         msix_entry = kmalloc_array(nvec, sizeof(*msix_entry), GFP_KERNEL);
         if (!msix_entry) {
                 ret = -ENOMEM;
@@@ -319,7 -315,6 +313,6 @@@ do_intx
                    nvec, ret);
         *msixcnt = 0;
         hfi1_enable_intx(dd->pcidev);
- 
   }
   
   /* return the PCIe link speed from the given link status */
@@@ -367,6 -362,7 +360,7 @@@ static void update_lbus_info(struct hfi
   int pcie_speeds(struct hfi1_devdata *dd)
   {
         u32 linkcap;
+       struct pci_dev *parent = dd->pcidev->bus->self;
   
         if (!pci_is_pcie(dd->pcidev)) {
                 dd_dev_err(dd, "Can't find PCI Express capability!\n");
@@@ -379,15 -375,15 +373,15 @@@
         pcie_capability_read_dword(dd->pcidev, PCI_EXP_LNKCAP, &linkcap);
         if ((linkcap & PCI_EXP_LNKCAP_SLS) != GEN3_SPEED_VECTOR) {
                 dd_dev_info(dd,
-                       "This HFI is not Gen3 capable, max speed 0x%x, need 0x3\n",
-                       linkcap & PCI_EXP_LNKCAP_SLS);
+                           "This HFI is not Gen3 capable, max speed 0x%x, need 0x3\n",
+                           linkcap & PCI_EXP_LNKCAP_SLS);
                 dd->link_gen3_capable = 0;
         }
   
         /*
          * bus->max_bus_speed is set from the bridge's linkcap Max Link Speed
          */
-       if (dd->pcidev->bus->max_bus_speed != PCIE_SPEED_8_0GT) {
+       if (parent && dd->pcidev->bus->max_bus_speed != PCIE_SPEED_8_0GT) {
                 dd_dev_info(dd, "Parent PCIe bridge does not support Gen3\n");
                 dd->link_gen3_capable = 0;
         }
@@@ -395,9 -391,7 +389,7 @@@
         /* obtain the link width and current speed */
         update_lbus_info(dd);
   
-       /* check against expected pcie width and complain if "wrong" */
-       if (dd->lbus_width < 16)
-               dd_dev_err(dd, "PCIe width %u (x16 HFI)\n", dd->lbus_width);
+       dd_dev_info(dd, "%s\n", dd->lbus_info);
   
         return 0;
   }
@@@ -436,23 -430,18 +428,18 @@@ void hfi1_enable_intx(struct pci_dev *p
   void restore_pci_variables(struct hfi1_devdata *dd)
   {
         pci_write_config_word(dd->pcidev, PCI_COMMAND, dd->pci_command);
-       pci_write_config_dword(dd->pcidev,
-                               PCI_BASE_ADDRESS_0, dd->pcibar0);
-       pci_write_config_dword(dd->pcidev,
-                               PCI_BASE_ADDRESS_1, dd->pcibar1);
-       pci_write_config_dword(dd->pcidev,
-                               PCI_ROM_ADDRESS, dd->pci_rom);
+       pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0, dd->pcibar0);
+       pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1, dd->pcibar1);
+       pci_write_config_dword(dd->pcidev, PCI_ROM_ADDRESS, dd->pci_rom);
         pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL, dd->pcie_devctl);
         pcie_capability_write_word(dd->pcidev, PCI_EXP_LNKCTL, dd->pcie_lnkctl);
         pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL2,
-                                                       dd->pcie_devctl2);
+                                  dd->pcie_devctl2);
         pci_write_config_dword(dd->pcidev, PCI_CFG_MSIX0, dd->pci_msix0);
-       pci_write_config_dword(dd->pcidev, PCIE_CFG_SPCIE1,
-                                                       dd->pci_lnkctl3);
+       pci_write_config_dword(dd->pcidev, PCIE_CFG_SPCIE1, dd->pci_lnkctl3);
         pci_write_config_dword(dd->pcidev, PCIE_CFG_TPH2, dd->pci_tph2);
   }
   
- 
   /*
    * BIOS may not set PCIe bus-utilization parameters for best performance.
    * Check and optionally adjust them to maximize our throughput.
@@@ -461,6 -450,10 +448,10 @@@ static int hfi1_pcie_caps
   module_param_named(pcie_caps, hfi1_pcie_caps, int, S_IRUGO);
   MODULE_PARM_DESC(pcie_caps, "Max PCIe tuning: Payload (0..3), ReadReq (4..7)");
   
+ uint aspm_mode = ASPM_MODE_DISABLED;
+ module_param_named(aspm, aspm_mode, uint, S_IRUGO);
+ MODULE_PARM_DESC(aspm, "PCIe ASPM: 0: disable, 1: enable, 2: dynamic");
+ 
   static void tune_pcie_caps(struct hfi1_devdata *dd)
   {
         struct pci_dev *parent;
@@@ -479,6 -472,12 +470,12 @@@
         }
         /* Find out supported and configured values for parent (root) */
         parent = dd->pcidev->bus->self;
+       /*
+        * The driver cannot perform the tuning if it does not have
+        * access to the upstream component.
+        */
+       if (!parent)
+               return;
         if (!pci_is_root_bus(parent->bus)) {
                 dd_dev_info(dd, "Parent not root\n");
                 return;
@@@ -532,6 -531,7 +529,7 @@@
                 pcie_set_readrq(dd->pcidev, ep_mrrs);
         }
   }
+ 
   /* End of PCIe capability tuning */
   
   /*
@@@ -746,21 -746,22 +744,22 @@@ static int load_eq_table(struct hfi1_de
                 c0 = fs - (eq[i][PREC] / div) - (eq[i][POST] / div);
                 c_plus1 = eq[i][POST] / div;
                 pci_write_config_dword(pdev, PCIE_CFG_REG_PL102,
-                       eq_value(c_minus1, c0, c_plus1));
+                                      eq_value(c_minus1, c0, c_plus1));
                 /* check if these coefficients violate EQ rules */
                 pci_read_config_dword(dd->pcidev, PCIE_CFG_REG_PL105,
-                                                               &violation);
+                                     &violation);
                 if (violation
                     & PCIE_CFG_REG_PL105_GEN3_EQ_VIOLATE_COEF_RULES_SMASK){
                         if (hit_error == 0) {
                                 dd_dev_err(dd,
-                                       "Gen3 EQ Table Coefficient rule violations\n");
+                                          "Gen3 EQ Table Coefficient rule violations\n");
                                 dd_dev_err(dd, "         prec   attn   post\n");
                         }
                         dd_dev_err(dd, "   p%02d:   %02x     %02x     %02x\n",
-                               i, (u32)eq[i][0], (u32)eq[i][1], (u32)eq[i][2]);
+                                  i, (u32)eq[i][0], (u32)eq[i][1],
+                                  (u32)eq[i][2]);
                         dd_dev_err(dd, "            %02x     %02x     %02x\n",
-                               (u32)c_minus1, (u32)c0, (u32)c_plus1);
+                                  (u32)c_minus1, (u32)c0, (u32)c_plus1);
                         hit_error = 1;
                 }
         }
@@@ -772,7 -773,7 +771,7 @@@
   /*
    * Steps to be done after the PCIe firmware is downloaded and
    * before the SBR for the Pcie Gen3.
-  * The hardware mutex is already being held.
+  * The SBus resource is already being held.
    */
   static void pcie_post_steps(struct hfi1_devdata *dd)
   {
@@@ -815,8 -816,8 +814,8 @@@ static int trigger_sbr(struct hfi1_devd
         list_for_each_entry(pdev, &dev->bus->devices, bus_list)
                 if (pdev != dev) {
                         dd_dev_err(dd,
-                               "%s: another device is on the same bus\n",
-                               __func__);
+                                  "%s: another device is on the same bus\n",
+                                  __func__);
                         return -ENOTTY;
                 }
   
@@@ -840,8 -841,8 +839,8 @@@ static void write_gasket_interrupt(stru
                                    u16 code, u16 data)
   {
         write_csr(dd, ASIC_PCIE_SD_INTRPT_LIST + (index * 8),
-           (((u64)code << ASIC_PCIE_SD_INTRPT_LIST_INTRPT_CODE_SHIFT)
-           |((u64)data << ASIC_PCIE_SD_INTRPT_LIST_INTRPT_DATA_SHIFT)));
+                 (((u64)code << ASIC_PCIE_SD_INTRPT_LIST_INTRPT_CODE_SHIFT) |
+                  ((u64)data << ASIC_PCIE_SD_INTRPT_LIST_INTRPT_DATA_SHIFT)));
   }
   
   /*
@@@ -851,14 -852,13 +850,13 @@@ static void arm_gasket_logic(struct hfi
   {
         u64 reg;
   
-       reg = (((u64)1 << dd->hfi1_id)
-                       << ASIC_PCIE_SD_HOST_CMD_INTRPT_CMD_SHIFT)
-               | ((u64)pcie_serdes_broadcast[dd->hfi1_id]
-                       << ASIC_PCIE_SD_HOST_CMD_SBUS_RCVR_ADDR_SHIFT
-               | ASIC_PCIE_SD_HOST_CMD_SBR_MODE_SMASK
-               | ((u64)SBR_DELAY_US & ASIC_PCIE_SD_HOST_CMD_TIMER_MASK)
-                       << ASIC_PCIE_SD_HOST_CMD_TIMER_SHIFT
-               );
+       reg = (((u64)1 << dd->hfi1_id) <<
+              ASIC_PCIE_SD_HOST_CMD_INTRPT_CMD_SHIFT) |
+             ((u64)pcie_serdes_broadcast[dd->hfi1_id] <<
+              ASIC_PCIE_SD_HOST_CMD_SBUS_RCVR_ADDR_SHIFT |
+              ASIC_PCIE_SD_HOST_CMD_SBR_MODE_SMASK |
+              ((u64)SBR_DELAY_US & ASIC_PCIE_SD_HOST_CMD_TIMER_MASK) <<
+              ASIC_PCIE_SD_HOST_CMD_TIMER_SHIFT);
         write_csr(dd, ASIC_PCIE_SD_HOST_CMD, reg);
         /* read back to push the write */
         read_csr(dd, ASIC_PCIE_SD_HOST_CMD);
@@@ -946,7 -946,7 +944,7 @@@ static void write_xmt_margin(struct hfi
    */
   int do_pcie_gen3_transition(struct hfi1_devdata *dd)
   {
-       struct pci_dev *parent;
+       struct pci_dev *parent = dd->pcidev->bus->self;
         u64 fw_ctrl;
         u64 reg, therm;
         u32 reg32, fs, lf;
@@@ -955,8 -955,7 +953,7 @@@
         int do_retry, retry_count = 0;
         uint default_pset;
         u16 target_vector, target_speed;
-       u16 lnkctl, lnkctl2, vendor;
-       u8 nsbr = 1;
+       u16 lnkctl2, vendor;
         u8 div;
         const u8 (*eq)[3];
         int return_error = 0;
@@@ -983,17 -982,21 +980,21 @@@
         /* if already at target speed, done (unless forced) */
         if (dd->lbus_speed == target_speed) {
                 dd_dev_info(dd, "%s: PCIe already at gen%d, %s\n", __func__,
-                       pcie_target,
-                       pcie_force ? "re-doing anyway" : "skipping");
+                           pcie_target,
+                           pcie_force ? "re-doing anyway" : "skipping");
                 if (!pcie_force)
                         return 0;
         }
   
         /*
-        * A0 needs an additional SBR
+        * The driver cannot do the transition if it has no access to the
+        * upstream component
          */
-       if (is_ax(dd))
-               nsbr++;
+       if (!parent) {
+               dd_dev_info(dd, "%s: No upstream, Can't do gen3 transition\n",
+                           __func__);
+               return 0;
+       }
   
         /*
          * Do the Gen3 transition.  Steps are those of the PCIe Gen3
@@@ -1009,10 -1012,13 +1010,13 @@@
                 goto done_no_mutex;
         }
   
-       /* hold the HW mutex across the firmware download and SBR */
-       ret = acquire_hw_mutex(dd);
-       if (ret)
+       /* hold the SBus resource across the firmware download and SBR */
+       ret = acquire_chip_resource(dd, CR_SBUS, SBUS_TIMEOUT);
+       if (ret) {
+               dd_dev_err(dd, "%s: unable to acquire SBus resource\n",
+                          __func__);
                 return ret;
+       }
   
         /* make sure thermal polling is not causing interrupts */
         therm = read_csr(dd, ASIC_CFG_THERM_POLL_EN);
@@@ -1030,8 -1036,11 +1034,11 @@@ retry
         /* step 4: download PCIe Gen3 SerDes firmware */
         dd_dev_info(dd, "%s: downloading firmware\n", __func__);
         ret = load_pcie_firmware(dd);
-       if (ret)
+       if (ret) {
+               /* do not proceed if the firmware cannot be downloaded */
+               return_error = 1;
                 goto done;
+       }
   
         /* step 5: set up device parameter settings */
         dd_dev_info(dd, "%s: setting PCIe registers\n", __func__);
@@@ -1091,8 -1100,10 +1098,10 @@@
                 default_pset = DEFAULT_MCP_PSET;
         }
         pci_write_config_dword(dd->pcidev, PCIE_CFG_REG_PL101,
-               (fs << PCIE_CFG_REG_PL101_GEN3_EQ_LOCAL_FS_SHIFT)
-               | (lf << PCIE_CFG_REG_PL101_GEN3_EQ_LOCAL_LF_SHIFT));
+                              (fs <<
+                               PCIE_CFG_REG_PL101_GEN3_EQ_LOCAL_FS_SHIFT) |
+                              (lf <<
+                               PCIE_CFG_REG_PL101_GEN3_EQ_LOCAL_LF_SHIFT));
         ret = load_eq_table(dd, eq, fs, div);
         if (ret)
                 goto done;
@@@ -1106,15 -1117,15 +1115,15 @@@
                 pcie_pset = default_pset;
         if (pcie_pset > 10) {   /* valid range is 0-10, inclusive */
                 dd_dev_err(dd, "%s: Invalid Eq Pset %u, setting to %d\n",
-                       __func__, pcie_pset, default_pset);
+                          __func__, pcie_pset, default_pset);
                 pcie_pset = default_pset;
         }
         dd_dev_info(dd, "%s: using EQ Pset %u\n", __func__, pcie_pset);
         pci_write_config_dword(dd->pcidev, PCIE_CFG_REG_PL106,
-               ((1 << pcie_pset)
-                       << PCIE_CFG_REG_PL106_GEN3_EQ_PSET_REQ_VEC_SHIFT)
-               | PCIE_CFG_REG_PL106_GEN3_EQ_EVAL2MS_DISABLE_SMASK
-               | PCIE_CFG_REG_PL106_GEN3_EQ_PHASE23_EXIT_MODE_SMASK);
+                              ((1 << pcie_pset) <<
+                       PCIE_CFG_REG_PL106_GEN3_EQ_PSET_REQ_VEC_SHIFT) |
+                       PCIE_CFG_REG_PL106_GEN3_EQ_EVAL2MS_DISABLE_SMASK |
+                       PCIE_CFG_REG_PL106_GEN3_EQ_PHASE23_EXIT_MODE_SMASK);
   
         /*
          * step 5b: Do post firmware download steps via SBus
@@@ -1142,11 -1153,12 +1151,12 @@@
          */
         write_xmt_margin(dd, __func__);
   
-       /* step 5e: disable active state power management (ASPM) */
+       /*
+        * step 5e: disable active state power management (ASPM). It
+        * will be enabled if required later
+        */
         dd_dev_info(dd, "%s: clearing ASPM\n", __func__);
-       pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKCTL, &lnkctl);
-       lnkctl &= ~PCI_EXP_LNKCTL_ASPMC;
-       pcie_capability_write_word(dd->pcidev, PCI_EXP_LNKCTL, lnkctl);
+       aspm_hw_disable_l1(dd);
   
         /*
          * step 5f: clear DirectSpeedChange
@@@ -1165,16 -1177,15 +1175,15 @@@
          * that it is Gen3 capable earlier.
          */
         dd_dev_info(dd, "%s: setting parent target link speed\n", __func__);
-       parent = dd->pcidev->bus->self;
         pcie_capability_read_word(parent, PCI_EXP_LNKCTL2, &lnkctl2);
         dd_dev_info(dd, "%s: ..old link control2: 0x%x\n", __func__,
-               (u32)lnkctl2);
+                   (u32)lnkctl2);
         /* only write to parent if target is not as high as ours */
         if ((lnkctl2 & LNKCTL2_TARGET_LINK_SPEED_MASK) < target_vector) {
                 lnkctl2 &= ~LNKCTL2_TARGET_LINK_SPEED_MASK;
                 lnkctl2 |= target_vector;
                 dd_dev_info(dd, "%s: ..new link control2: 0x%x\n", __func__,
-                       (u32)lnkctl2);
+                           (u32)lnkctl2);
                 pcie_capability_write_word(parent, PCI_EXP_LNKCTL2, lnkctl2);
         } else {
                 dd_dev_info(dd, "%s: ..target speed is OK\n", __func__);
@@@ -1183,17 -1194,17 +1192,17 @@@
         dd_dev_info(dd, "%s: setting target link speed\n", __func__);
         pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKCTL2, &lnkctl2);
         dd_dev_info(dd, "%s: ..old link control2: 0x%x\n", __func__,
-               (u32)lnkctl2);
+                   (u32)lnkctl2);
         lnkctl2 &= ~LNKCTL2_TARGET_LINK_SPEED_MASK;
         lnkctl2 |= target_vector;
         dd_dev_info(dd, "%s: ..new link control2: 0x%x\n", __func__,
-               (u32)lnkctl2);
+                   (u32)lnkctl2);
         pcie_capability_write_word(dd->pcidev, PCI_EXP_LNKCTL2, lnkctl2);
   
         /* step 5h: arm gasket logic */
         /* hold DC in reset across the SBR */
         write_csr(dd, CCE_DC_CTRL, CCE_DC_CTRL_DC_RESET_SMASK);
-       (void) read_csr(dd, CCE_DC_CTRL); /* DC reset hold */
+       (void)read_csr(dd, CCE_DC_CTRL); /* DC reset hold */
         /* save firmware control across the SBR */
         fw_ctrl = read_csr(dd, MISC_CFG_FW_CTRL);
   
@@@ -1224,8 -1235,8 +1233,8 @@@
         ret = pci_read_config_word(dd->pcidev, PCI_VENDOR_ID, &vendor);
         if (ret) {
                 dd_dev_info(dd,
-                       "%s: read of VendorID failed after SBR, err %d\n",
-                       __func__, ret);
+                           "%s: read of VendorID failed after SBR, err %d\n",
+                           __func__, ret);
                 return_error = 1;
                 goto done;
         }
@@@ -1265,8 -1276,7 +1274,7 @@@
         write_csr(dd, CCE_DC_CTRL, 0);
   
         /* Set the LED off */
-       if (is_ax(dd))
-               setextled(dd, 0);
+       setextled(dd, 0);
   
         /* check for any per-lane errors */
         pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE2, &reg32);
@@@ -1277,8 -1287,8 +1285,8 @@@
                         & ASIC_PCIE_SD_HOST_STATUS_FW_DNLD_STS_MASK;
         if ((status & (1 << dd->hfi1_id)) == 0) {
                 dd_dev_err(dd,
-                       "%s: gasket status 0x%x, expecting 0x%x\n",
-                       __func__, status, 1 << dd->hfi1_id);
+                          "%s: gasket status 0x%x, expecting 0x%x\n",
+                          __func__, status, 1 << dd->hfi1_id);
                 ret = -EIO;
                 goto done;
         }
@@@ -1295,13 -1305,13 +1303,13 @@@
         /* update our link information cache */
         update_lbus_info(dd);
         dd_dev_info(dd, "%s: new speed and width: %s\n", __func__,
-               dd->lbus_info);
+                   dd->lbus_info);
   
         if (dd->lbus_speed != target_speed) { /* not target */
                 /* maybe retry */
                 do_retry = retry_count < pcie_retry;
                 dd_dev_err(dd, "PCIe link speed did not switch to Gen%d%s\n",
-                       pcie_target, do_retry ? ", retrying" : "");
+                          pcie_target, do_retry ? ", retrying" : "");
                 retry_count++;
                 if (do_retry) {
                         msleep(100); /* allow time to settle */
@@@ -1317,7 -1327,7 +1325,7 @@@ done
                 dd_dev_info(dd, "%s: Re-enable therm polling\n",
                             __func__);
         }
-       release_hw_mutex(dd);
+       release_chip_resource(dd, CR_SBUS);
   done_no_mutex:
         /* return no error if it is OK to be at current speed */
         if (ret && !return_error) {
diff --combined drivers/staging/rdma/hfi1/pio_copy.c

index 64bef6c266539f75349300812f84ace27cff492e,228e9fb76e08045d85ee15ab23ea2089e2fc6b69..8c25e1b58849a17a22a96d6ca8c4f54de4992098
--- 1/drivers/staging/rdma/hfi1/pio_copy.c
--- 2/drivers/staging/rdma/hfi1/pio_copy.c
+++ b/drivers/staging/rdma/hfi1/pio_copy.c
@@@ -1,12 -1,11 +1,11 @@@
   /*
+  * Copyright(c) 2015, 2016 Intel Corporation.
    *
    * This file is provided under a dual BSD/GPLv2 license.  When using or
    * redistributing this file, you may do so under either license.
    *
    * GPL LICENSE SUMMARY
    *
-  * Copyright(c) 2015 Intel Corporation.
-  *
    * This program is free software; you can redistribute it and/or modify
    * it under the terms of version 2 of the GNU General Public License as
    * published by the Free Software Foundation.
@@@ -18,8 -17,6 +17,6 @@@
    *
    * BSD LICENSE
    *
-  * Copyright(c) 2015 Intel Corporation.
-  *
    * Redistribution and use in source and binary forms, with or without
    * modification, are permitted provided that the following conditions
    * are met:
@@@ -52,9 -49,9 +49,9 @@@
   
   /* additive distance between non-SOP and SOP space */
   #define SOP_DISTANCE (TXE_PIO_SIZE / 2)
- #define PIO_BLOCK_MASK (PIO_BLOCK_SIZE-1)
+ #define PIO_BLOCK_MASK (PIO_BLOCK_SIZE - 1)
   /* number of QUADWORDs in a block */
- #define PIO_BLOCK_QWS (PIO_BLOCK_SIZE/sizeof(u64))
+ #define PIO_BLOCK_QWS (PIO_BLOCK_SIZE / sizeof(u64))
   
   /**
    * pio_copy - copy data block to MMIO space
@@@ -83,11 -80,13 +80,13 @@@ void pio_copy(struct hfi1_devdata *dd, 
         dest += sizeof(u64);
   
         /* calculate where the QWORD data ends - in SOP=1 space */
-       dend = dest + ((count>>1) * sizeof(u64));
+       dend = dest + ((count >> 1) * sizeof(u64));
   
         if (dend < send) {
-               /* all QWORD data is within the SOP block, does *not*
-                  reach the end of the SOP block */
+               /*
+                * all QWORD data is within the SOP block, does *not*
+                * reach the end of the SOP block
+                */
   
                 while (dest < dend) {
                         writeq(*(u64 *)from, dest);
@@@ -152,8 -151,10 +151,10 @@@
                 writeq(val.val64, dest);
                 dest += sizeof(u64);
         }
-       /* fill in rest of block, no need to check pbuf->end
-          as we only wrap on a block boundary */
+       /*
+        * fill in rest of block, no need to check pbuf->end
+        * as we only wrap on a block boundary
+        */
         while (((unsigned long)dest & PIO_BLOCK_MASK) != 0) {
                 writeq(0, dest);
                 dest += sizeof(u64);
@@@ -177,7 -178,7 +178,7 @@@
    * "zero" shift - bit shift used to zero out upper bytes.  Input is
    * the count of LSB bytes to preserve.
    */
- #define zshift(x) (8 * (8-(x)))
+ #define zshift(x) (8 * (8 - (x)))
   
   /*
    * "merge" shift - bit shift used to merge with carry bytes.  Input is
@@@ -196,7 -197,7 +197,7 @@@
    * o nbytes must not span a QW boundary
    */
   static inline void read_low_bytes(struct pio_buf *pbuf, const void *from,
-                                                       unsigned int nbytes)
+                                 unsigned int nbytes)
   {
         unsigned long off;
   
@@@ -223,7 -224,7 +224,7 @@@
    * o nbytes may span a QW boundary
    */
   static inline void read_extra_bytes(struct pio_buf *pbuf,
-                                       const void *from, unsigned int nbytes)
+                                   const void *from, unsigned int nbytes)
   {
         unsigned long off = (unsigned long)from & 0x7;
         unsigned int room, xbytes;
@@@ -235,7 -236,7 +236,7 @@@
         while (nbytes) {
                 /* find the number of bytes in this u64 */
                 room = 8 - off; /* this u64 has room for this many bytes */
- -              xbytes = nbytes > room ? room : nbytes;
+ +              xbytes = min(room, nbytes);
   
                 /*
                  * shift down to zero lower bytes, shift up to zero upper
@@@ -244,7 -245,7 +245,7 @@@
                 pbuf->carry.val64 |= (((*(u64 *)from)
                                         >> mshift(off))
                                         << zshift(xbytes))
-                                       >> zshift(xbytes+pbuf->carry_bytes);
+                                       >> zshift(xbytes + pbuf->carry_bytes);
                 off = 0;
                 pbuf->carry_bytes += xbytes;
                 nbytes -= xbytes;
@@@ -362,7 -363,7 +363,7 @@@ static inline void jcopy(u8 *dest, cons
    * o from may _not_ be u64 aligned.
    */
   static inline void read_low_bytes(struct pio_buf *pbuf, const void *from,
-                                                       unsigned int nbytes)
+                                 unsigned int nbytes)
   {
         jcopy(&pbuf->carry.val8[0], from, nbytes);
         pbuf->carry_bytes = nbytes;
@@@ -377,7 -378,7 +378,7 @@@
    * o nbytes may span a QW boundary
    */
   static inline void read_extra_bytes(struct pio_buf *pbuf,
-                                       const void *from, unsigned int nbytes)
+                                   const void *from, unsigned int nbytes)
   {
         jcopy(&pbuf->carry.val8[pbuf->carry_bytes], from, nbytes);
         pbuf->carry_bytes += nbytes;
@@@ -411,7 -412,7 +412,7 @@@ static inline void merge_write8
   
         jcopy(&pbuf->carry.val8[pbuf->carry_bytes], src, remainder);
         writeq(pbuf->carry.val64, dest);
-       jcopy(&pbuf->carry.val8[0], src+remainder, pbuf->carry_bytes);
+       jcopy(&pbuf->carry.val8[0], src + remainder, pbuf->carry_bytes);
   }
   
   /*
@@@ -433,7 -434,7 +434,7 @@@ static inline int carry_write8(struct p
                 u64 zero = 0;
   
                 jcopy(&pbuf->carry.val8[pbuf->carry_bytes], (u8 *)&zero,
-                                               8 - pbuf->carry_bytes);
+                     8 - pbuf->carry_bytes);
                 writeq(pbuf->carry.val64, dest);
                 return 1;
         }
@@@ -453,7 -454,7 +454,7 @@@
    * @nbytes: bytes to copy
    */
   void seg_pio_copy_start(struct pio_buf *pbuf, u64 pbc,
-                               const void *from, size_t nbytes)
+                       const void *from, size_t nbytes)
   {
         void __iomem *dest = pbuf->start + SOP_DISTANCE;
         void __iomem *send = dest + PIO_BLOCK_SIZE;
@@@ -463,11 -464,13 +464,13 @@@
         dest += sizeof(u64);
   
         /* calculate where the QWORD data ends - in SOP=1 space */
-       dend = dest + ((nbytes>>3) * sizeof(u64));
+       dend = dest + ((nbytes >> 3) * sizeof(u64));
   
         if (dend < send) {
-               /* all QWORD data is within the SOP block, does *not*
-                  reach the end of the SOP block */
+               /*
+                * all QWORD data is within the SOP block, does *not*
+                * reach the end of the SOP block
+                */
   
                 while (dest < dend) {
                         writeq(*(u64 *)from, dest);
@@@ -562,10 -565,12 +565,12 @@@ static void mid_copy_mix(struct pio_bu
                 void __iomem *send;             /* SOP end */
                 void __iomem *xend;
   
-               /* calculate the end of data or end of block, whichever
-                  comes first */
+               /*
+                * calculate the end of data or end of block, whichever
+                * comes first
+                */
                 send = pbuf->start + PIO_BLOCK_SIZE;
- -              xend = send < dend ? send : dend;
+ +              xend = min(send, dend);
   
                 /* shift up to SOP=1 space */
                 dest += SOP_DISTANCE;
@@@ -639,13 -644,13 +644,13 @@@
    * Must handle nbytes < 8.
    */
   static void mid_copy_straight(struct pio_buf *pbuf,
-                                               const void *from, size_t nbytes)
+                             const void *from, size_t nbytes)
   {
         void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64));
         void __iomem *dend;                     /* 8-byte data end */
   
         /* calculate 8-byte data end */
-       dend = dest + ((nbytes>>3) * sizeof(u64));
+       dend = dest + ((nbytes >> 3) * sizeof(u64));
   
         if (pbuf->qw_written < PIO_BLOCK_QWS) {
                 /*
@@@ -656,10 -661,12 +661,12 @@@
                 void __iomem *send;             /* SOP end */
                 void __iomem *xend;
   
-               /* calculate the end of data or end of block, whichever
-                  comes first */
+               /*
+                * calculate the end of data or end of block, whichever
+                * comes first
+                */
                 send = pbuf->start + PIO_BLOCK_SIZE;
- -              xend = send < dend ? send : dend;
+ +              xend = min(send, dend);
   
                 /* shift up to SOP=1 space */
                 dest += SOP_DISTANCE;
@@@ -713,7 -720,7 +720,7 @@@
         /* we know carry_bytes was zero on entry to this routine */
         read_low_bytes(pbuf, from, nbytes & 0x7);
   
-       pbuf->qw_written += nbytes>>3;
+       pbuf->qw_written += nbytes >> 3;
   }
   
   /*
diff --combined drivers/staging/rdma/hfi1/user_sdma.c

index 9d4f5d6aaf33ebf1c76544311f7d1095336d1dc0,46e254d52dadd514e9fa7abcc433f4f6cbfdc313..ab6b6a42000f709020a001a2aa9594d0f2f5b851
--- 1/drivers/staging/rdma/hfi1/user_sdma.c
--- 2/drivers/staging/rdma/hfi1/user_sdma.c
+++ b/drivers/staging/rdma/hfi1/user_sdma.c
@@@ -1,12 -1,11 +1,11 @@@
   /*
+  * Copyright(c) 2015, 2016 Intel Corporation.
    *
    * This file is provided under a dual BSD/GPLv2 license.  When using or
    * redistributing this file, you may do so under either license.
    *
    * GPL LICENSE SUMMARY
    *
-  * Copyright(c) 2015 Intel Corporation.
-  *
    * This program is free software; you can redistribute it and/or modify
    * it under the terms of version 2 of the GNU General Public License as
    * published by the Free Software Foundation.
@@@ -18,8 -17,6 +17,6 @@@
    *
    * BSD LICENSE
    *
-  * Copyright(c) 2015 Intel Corporation.
-  *
    * Redistribution and use in source and binary forms, with or without
    * modification, are permitted provided that the following conditions
    * are met:
@@@ -67,9 -64,11 +64,10 @@@
   #include "hfi.h"
   #include "sdma.h"
   #include "user_sdma.h"
- -#include "sdma.h"
   #include "verbs.h"  /* for the headers */
   #include "common.h" /* for struct hfi1_tid_info */
   #include "trace.h"
+ #include "mmu_rb.h"
   
   static uint hfi1_sdma_comp_ring_size = 128;
   module_param_named(sdma_comp_size, hfi1_sdma_comp_ring_size, uint, S_IRUGO);
@@@ -146,7 -145,6 +144,6 @@@ MODULE_PARM_DESC(sdma_comp_size, "Size 
   
   /* Last packet in the request */
   #define TXREQ_FLAGS_REQ_LAST_PKT BIT(0)
- #define TXREQ_FLAGS_IOVEC_LAST_PKT BIT(0)
   
   #define SDMA_REQ_IN_USE     0
   #define SDMA_REQ_FOR_THREAD 1
@@@ -170,16 -168,28 +167,28 @@@ static unsigned initial_pkt_count = 8
   #define SDMA_IOWAIT_TIMEOUT 1000 /* in milliseconds */
   
   struct user_sdma_iovec {
+       struct list_head list;
         struct iovec iov;
         /* number of pages in this vector */
         unsigned npages;
         /* array of pinned pages for this vector */
         struct page **pages;
-       /* offset into the virtual address space of the vector at
-        * which we last left off. */
+       /*
+        * offset into the virtual address space of the vector at
+        * which we last left off.
+        */
         u64 offset;
   };
   
+ struct sdma_mmu_node {
+       struct mmu_rb_node rb;
+       struct list_head list;
+       struct hfi1_user_sdma_pkt_q *pq;
+       atomic_t refcount;
+       struct page **pages;
+       unsigned npages;
+ };
+ 
   struct user_sdma_request {
         struct sdma_req_info info;
         struct hfi1_user_sdma_pkt_q *pq;
@@@ -212,15 -222,6 +221,6 @@@
          * to 0.
          */
         u8 omfactor;
-       /*
-        * pointer to the user's mm_struct. We are going to
-        * get a reference to it so it doesn't get freed
-        * since we might not be in process context when we
-        * are processing the iov's.
-        * Using this mm_struct, we can get vma based on the
-        * iov's address (find_vma()).
-        */
-       struct mm_struct *user_mm;
         /*
          * We copy the iovs for this request (based on
          * info.iovcnt). These are only the data vectors
@@@ -238,13 -239,12 +238,12 @@@
         u16 tididx;
         u32 sent;
         u64 seqnum;
+       u64 seqcomp;
+       u64 seqsubmitted;
         struct list_head txps;
-       spinlock_t txcmp_lock;  /* protect txcmp list */
-       struct list_head txcmp;
         unsigned long flags;
         /* status of the last txreq completed */
         int status;
-       struct work_struct worker;
   };
   
   /*
@@@ -259,11 -259,6 +258,6 @@@ struct user_sdma_txreq 
         struct sdma_txreq txreq;
         struct list_head list;
         struct user_sdma_request *req;
-       struct {
-               struct user_sdma_iovec *vec;
-               u8 flags;
-       } iovecs[3];
-       int idx;
         u16 flags;
         unsigned busycount;
         u64 seqnum;
@@@ -279,21 -274,21 +273,21 @@@
   
   static int user_sdma_send_pkts(struct user_sdma_request *, unsigned);
   static int num_user_pages(const struct iovec *);
- static void user_sdma_txreq_cb(struct sdma_txreq *, int, int);
- static void user_sdma_delayed_completion(struct work_struct *);
- static void user_sdma_free_request(struct user_sdma_request *);
+ static void user_sdma_txreq_cb(struct sdma_txreq *, int);
+ static inline void pq_update(struct hfi1_user_sdma_pkt_q *);
+ static void user_sdma_free_request(struct user_sdma_request *, bool);
   static int pin_vector_pages(struct user_sdma_request *,
                             struct user_sdma_iovec *);
- static void unpin_vector_pages(struct user_sdma_request *,
-                              struct user_sdma_iovec *);
+ static void unpin_vector_pages(struct mm_struct *, struct page **, unsigned);
   static int check_header_template(struct user_sdma_request *,
                                  struct hfi1_pkt_header *, u32, u32);
   static int set_txreq_header(struct user_sdma_request *,
                             struct user_sdma_txreq *, u32);
   static int set_txreq_header_ahg(struct user_sdma_request *,
                                 struct user_sdma_txreq *, u32);
- static inline void set_comp_state(struct user_sdma_request *,
-                                       enum hfi1_sdma_comp_state, int);
+ static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *,
+                                 struct hfi1_user_sdma_comp_q *,
+                                 u16, enum hfi1_sdma_comp_state, int);
   static inline u32 set_pkt_bth_psn(__be32, u8, u32);
   static inline u32 get_lrh_len(struct hfi1_pkt_header, u32 len);
   
@@@ -303,6 -298,17 +297,17 @@@ static int defer_packet_queue
         struct sdma_txreq *,
         unsigned seq);
   static void activate_packet_queue(struct iowait *, int);
+ static bool sdma_rb_filter(struct mmu_rb_node *, unsigned long, unsigned long);
+ static int sdma_rb_insert(struct rb_root *, struct mmu_rb_node *);
+ static void sdma_rb_remove(struct rb_root *, struct mmu_rb_node *, bool);
+ static int sdma_rb_invalidate(struct rb_root *, struct mmu_rb_node *);
+ 
+ static struct mmu_rb_ops sdma_rb_ops = {
+       .filter = sdma_rb_filter,
+       .insert = sdma_rb_insert,
+       .remove = sdma_rb_remove,
+       .invalidate = sdma_rb_invalidate
+ };
   
   static int defer_packet_queue(
         struct sdma_engine *sde,
@@@ -345,7 -351,7 +350,7 @@@ static void activate_packet_queue(struc
   
   static void sdma_kmem_cache_ctor(void *obj)
   {
- -      struct user_sdma_txreq *tx = (struct user_sdma_txreq *)obj;
+ +      struct user_sdma_txreq *tx = obj;
   
         memset(tx, 0, sizeof(*tx));
   }
@@@ -380,7 -386,7 +385,7 @@@ int hfi1_user_sdma_alloc_queues(struct 
                 goto pq_nomem;
   
         memsize = sizeof(*pq->reqs) * hfi1_sdma_comp_ring_size;
-       pq->reqs = kmalloc(memsize, GFP_KERNEL);
+       pq->reqs = kzalloc(memsize, GFP_KERNEL);
         if (!pq->reqs)
                 goto pq_reqs_nomem;
   
@@@ -392,9 -398,12 +397,12 @@@
         pq->state = SDMA_PKT_Q_INACTIVE;
         atomic_set(&pq->n_reqs, 0);
         init_waitqueue_head(&pq->wait);
+       pq->sdma_rb_root = RB_ROOT;
+       INIT_LIST_HEAD(&pq->evict);
+       spin_lock_init(&pq->evict_lock);
   
         iowait_init(&pq->busy, 0, NULL, defer_packet_queue,
-                   activate_packet_queue);
+                   activate_packet_queue, NULL);
         pq->reqidx = 0;
         snprintf(buf, 64, "txreq-kmem-cache-%u-%u-%u", dd->unit, uctxt->ctxt,
                  fd->subctxt);
@@@ -413,7 -422,8 +421,7 @@@
         if (!cq)
                 goto cq_nomem;
   
- -      memsize = ALIGN(sizeof(*cq->comps) * hfi1_sdma_comp_ring_size,
- -                      PAGE_SIZE);
+ +      memsize = PAGE_ALIGN(sizeof(*cq->comps) * hfi1_sdma_comp_ring_size);
         cq->comps = vmalloc_user(memsize);
         if (!cq->comps)
                 goto cq_comps_nomem;
@@@ -421,6 -431,12 +429,12 @@@
         cq->nentries = hfi1_sdma_comp_ring_size;
         fd->cq = cq;
   
+       ret = hfi1_mmu_rb_register(&pq->sdma_rb_root, &sdma_rb_ops);
+       if (ret) {
+               dd_dev_err(dd, "Failed to register with MMU %d", ret);
+               goto done;
+       }
+ 
         spin_lock_irqsave(&uctxt->sdma_qlock, flags);
         list_add(&pq->list, &uctxt->sdma_queues);
         spin_unlock_irqrestore(&uctxt->sdma_qlock, flags);
@@@ -450,6 -466,7 +464,7 @@@ int hfi1_user_sdma_free_queues(struct h
         hfi1_cdbg(SDMA, "[%u:%u:%u] Freeing user SDMA queues", uctxt->dd->unit,
                   uctxt->ctxt, fd->subctxt);
         pq = fd->pq;
+       hfi1_mmu_rb_unregister(&pq->sdma_rb_root);
         if (pq) {
                 spin_lock_irqsave(&uctxt->sdma_qlock, flags);
                 if (!list_empty(&pq->list))
@@@ -466,7 -483,8 +481,7 @@@
                 fd->pq = NULL;
         }
         if (fd->cq) {
- -              if (fd->cq->comps)
- -                      vfree(fd->cq->comps);
+ +              vfree(fd->cq->comps);
                 kfree(fd->cq);
                 fd->cq = NULL;
         }
@@@ -476,7 -494,7 +491,7 @@@
   int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
                                    unsigned long dim, unsigned long *count)
   {
-       int ret = 0, i = 0, sent;
+       int ret = 0, i = 0;
         struct hfi1_filedata *fd = fp->private_data;
         struct hfi1_ctxtdata *uctxt = fd->uctxt;
         struct hfi1_user_sdma_pkt_q *pq = fd->pq;
@@@ -502,9 -520,11 +517,11 @@@
                           dd->unit, uctxt->ctxt, fd->subctxt, ret);
                 return -EFAULT;
         }
+ 
         trace_hfi1_sdma_user_reqinfo(dd, uctxt->ctxt, fd->subctxt,
                                      (u16 *)&info);
-       if (cq->comps[info.comp_idx].status == QUEUED) {
+       if (cq->comps[info.comp_idx].status == QUEUED ||
+           test_bit(SDMA_REQ_IN_USE, &pq->reqs[info.comp_idx].flags)) {
                 hfi1_cdbg(SDMA, "[%u:%u:%u] Entry %u is in QUEUED state",
                           dd->unit, uctxt->ctxt, fd->subctxt,
                           info.comp_idx);
@@@ -531,10 -551,7 +548,7 @@@
         req->cq = cq;
         req->status = -1;
         INIT_LIST_HEAD(&req->txps);
-       INIT_LIST_HEAD(&req->txcmp);
-       INIT_WORK(&req->worker, user_sdma_delayed_completion);
   
-       spin_lock_init(&req->txcmp_lock);
         memcpy(&req->info, &info, sizeof(info));
   
         if (req_opcode(info.ctrl) == EXPECTED)
@@@ -593,8 -610,10 +607,10 @@@
         }
   
         req->koffset = le32_to_cpu(req->hdr.kdeth.swdata[6]);
-       /* Calculate the initial TID offset based on the values of
-          KDETH.OFFSET and KDETH.OM that are passed in. */
+       /*
+        * Calculate the initial TID offset based on the values of
+        * KDETH.OFFSET and KDETH.OM that are passed in.
+        */
         req->tidoffset = KDETH_GET(req->hdr.kdeth.ver_tid_offset, OFFSET) *
                 (KDETH_GET(req->hdr.kdeth.ver_tid_offset, OM) ?
                  KDETH_OM_LARGE : KDETH_OM_SMALL);
@@@ -603,8 -622,13 +619,13 @@@
   
         /* Save all the IO vector structures */
         while (i < req->data_iovs) {
+               INIT_LIST_HEAD(&req->iovs[i].list);
                 memcpy(&req->iovs[i].iov, iovec + idx++, sizeof(struct iovec));
-               req->iovs[i].offset = 0;
+               ret = pin_vector_pages(req, &req->iovs[i]);
+               if (ret) {
+                       req->status = ret;
+                       goto free_req;
+               }
                 req->data_len += req->iovs[i++].iov.iov_len;
         }
         SDMA_DBG(req, "total data length %u", req->data_len);
@@@ -668,52 -692,59 +689,59 @@@
                 }
         }
   
-       set_comp_state(req, QUEUED, 0);
+       set_comp_state(pq, cq, info.comp_idx, QUEUED, 0);
+       atomic_inc(&pq->n_reqs);
         /* Send the first N packets in the request to buy us some time */
-       sent = user_sdma_send_pkts(req, pcount);
-       if (unlikely(sent < 0)) {
-               if (sent != -EBUSY) {
-                       req->status = sent;
-                       set_comp_state(req, ERROR, req->status);
-                       return sent;
-               } else
-                       sent = 0;
+       ret = user_sdma_send_pkts(req, pcount);
+       if (unlikely(ret < 0 && ret != -EBUSY)) {
+               req->status = ret;
+               goto free_req;
         }
-       atomic_inc(&pq->n_reqs);
-       xchg(&pq->state, SDMA_PKT_Q_ACTIVE);
   
-       if (sent < req->info.npkts) {
-               /*
-                * This is a somewhat blocking send implementation.
-                * The driver will block the caller until all packets of the
-                * request have been submitted to the SDMA engine. However, it
-                * will not wait for send completions.
-                */
-               while (!test_bit(SDMA_REQ_SEND_DONE, &req->flags)) {
-                       ret = user_sdma_send_pkts(req, pcount);
-                       if (ret < 0) {
-                               if (ret != -EBUSY) {
-                                       req->status = ret;
-                                       return ret;
-                               }
-                               wait_event_interruptible_timeout(
-                                       pq->busy.wait_dma,
-                                       (pq->state == SDMA_PKT_Q_ACTIVE),
-                                       msecs_to_jiffies(
-                                               SDMA_IOWAIT_TIMEOUT));
+       /*
+        * It is possible that the SDMA engine would have processed all the
+        * submitted packets by the time we get here. Therefore, only set
+        * packet queue state to ACTIVE if there are still uncompleted
+        * requests.
+        */
+       if (atomic_read(&pq->n_reqs))
+               xchg(&pq->state, SDMA_PKT_Q_ACTIVE);
+ 
+       /*
+        * This is a somewhat blocking send implementation.
+        * The driver will block the caller until all packets of the
+        * request have been submitted to the SDMA engine. However, it
+        * will not wait for send completions.
+        */
+       while (!test_bit(SDMA_REQ_SEND_DONE, &req->flags)) {
+               ret = user_sdma_send_pkts(req, pcount);
+               if (ret < 0) {
+                       if (ret != -EBUSY) {
+                               req->status = ret;
+                               set_bit(SDMA_REQ_DONE_ERROR, &req->flags);
+                               if (ACCESS_ONCE(req->seqcomp) ==
+                                   req->seqsubmitted - 1)
+                                       goto free_req;
+                               return ret;
                         }
+                       wait_event_interruptible_timeout(
+                               pq->busy.wait_dma,
+                               (pq->state == SDMA_PKT_Q_ACTIVE),
+                               msecs_to_jiffies(
+                                       SDMA_IOWAIT_TIMEOUT));
                 }
- 
         }
         *count += idx;
         return 0;
   free_req:
-       user_sdma_free_request(req);
+       user_sdma_free_request(req, true);
+       pq_update(pq);
+       set_comp_state(pq, cq, info.comp_idx, ERROR, req->status);
         return ret;
   }
   
   static inline u32 compute_data_length(struct user_sdma_request *req,
-                                           struct user_sdma_txreq *tx)
+                                     struct user_sdma_txreq *tx)
   {
         /*
          * Determine the proper size of the packet data.
@@@ -731,8 -762,10 +759,10 @@@
         } else if (req_opcode(req->info.ctrl) == EXPECTED) {
                 u32 tidlen = EXP_TID_GET(req->tids[req->tididx], LEN) *
                         PAGE_SIZE;
-               /* Get the data length based on the remaining space in the
-                * TID pair. */
+               /*
+                * Get the data length based on the remaining space in the
+                * TID pair.
+                */
                 len = min(tidlen - req->tidoffset, (u32)req->info.fragsize);
                 /* If we've filled up the TID pair, move to the next one. */
                 if (unlikely(!len) && ++req->tididx < req->n_tids &&
@@@ -742,12 -775,15 +772,15 @@@
                         req->tidoffset = 0;
                         len = min_t(u32, tidlen, req->info.fragsize);
                 }
-               /* Since the TID pairs map entire pages, make sure that we
+               /*
+                * Since the TID pairs map entire pages, make sure that we
                  * are not going to try to send more data that we have
-                * remaining. */
+                * remaining.
+                */
                 len = min(len, req->data_len - req->sent);
-       } else
+       } else {
                 len = min(req->data_len - req->sent, (u32)req->info.fragsize);
+       }
         SDMA_DBG(req, "Data Length = %u", len);
         return len;
   }
@@@ -810,9 -846,7 +843,7 @@@ static int user_sdma_send_pkts(struct u
                 tx->flags = 0;
                 tx->req = req;
                 tx->busycount = 0;
-               tx->idx = -1;
                 INIT_LIST_HEAD(&tx->list);
-               memset(tx->iovecs, 0, sizeof(tx->iovecs));
   
                 if (req->seqnum == req->info.npkts - 1)
                         tx->flags |= TXREQ_FLAGS_REQ_LAST_PKT;
@@@ -833,18 -867,6 +864,6 @@@
                                 WARN_ON(iovec->offset);
                         }
   
-                       /*
-                        * This request might include only a header and no user
-                        * data, so pin pages only if there is data and it the
-                        * pages have not been pinned already.
-                        */
-                       if (unlikely(!iovec->pages && iovec->iov.iov_len)) {
-                               ret = pin_vector_pages(req, iovec);
-                               if (ret)
-                                       goto free_tx;
-                       }
- 
-                       tx->iovecs[++tx->idx].vec = iovec;
                         datalen = compute_data_length(req, tx);
                         if (!datalen) {
                                 SDMA_DBG(req,
@@@ -923,8 -945,8 +942,8 @@@
                         unsigned pageidx, len;
   
                         base = (unsigned long)iovec->iov.iov_base;
- -                      offset = ((base + iovec->offset + iov_offset) &
- -                                ~PAGE_MASK);
+ +                      offset = offset_in_page(base + iovec->offset +
+ +                                              iov_offset);
                         pageidx = (((iovec->offset + iov_offset +
                                      base) - (base & PAGE_MASK)) >> PAGE_SHIFT);
                         len = offset + req->info.fragsize > PAGE_SIZE ?
@@@ -934,16 -956,8 +953,8 @@@
                                               iovec->pages[pageidx],
                                               offset, len);
                         if (ret) {
-                               int i;
- 
                                 SDMA_DBG(req, "SDMA txreq add page failed %d\n",
                                          ret);
-                               /* Mark all assigned vectors as complete so they
-                                * are unpinned in the callback. */
-                               for (i = tx->idx; i >= 0; i--) {
-                                       tx->iovecs[i].flags |=
-                                               TXREQ_FLAGS_IOVEC_LAST_PKT;
-                               }
                                 goto free_txreq;
                         }
                         iov_offset += len;
@@@ -951,19 -965,10 +962,10 @@@
                         data_sent += len;
                         if (unlikely(queued < datalen &&
                                      pageidx == iovec->npages &&
-                                    req->iov_idx < req->data_iovs - 1 &&
-                                    tx->idx < ARRAY_SIZE(tx->iovecs))) {
+                                    req->iov_idx < req->data_iovs - 1)) {
                                 iovec->offset += iov_offset;
-                               tx->iovecs[tx->idx].flags |=
-                                       TXREQ_FLAGS_IOVEC_LAST_PKT;
                                 iovec = &req->iovs[++req->iov_idx];
-                               if (!iovec->pages) {
-                                       ret = pin_vector_pages(req, iovec);
-                                       if (ret)
-                                               goto free_txreq;
-                               }
                                 iov_offset = 0;
-                               tx->iovecs[++tx->idx].vec = iovec;
                         }
                 }
                 /*
@@@ -974,28 -979,21 +976,21 @@@
                 if (req_opcode(req->info.ctrl) == EXPECTED)
                         req->tidoffset += datalen;
                 req->sent += data_sent;
-               if (req->data_len) {
-                       tx->iovecs[tx->idx].vec->offset += iov_offset;
-                       /* If we've reached the end of the io vector, mark it
-                        * so the callback can unpin the pages and free it. */
-                       if (tx->iovecs[tx->idx].vec->offset ==
-                           tx->iovecs[tx->idx].vec->iov.iov_len)
-                               tx->iovecs[tx->idx].flags |=
-                                       TXREQ_FLAGS_IOVEC_LAST_PKT;
-               }
- 
+               if (req->data_len)
+                       iovec->offset += iov_offset;
+               list_add_tail(&tx->txreq.list, &req->txps);
                 /*
                  * It is important to increment this here as it is used to
                  * generate the BTH.PSN and, therefore, can't be bulk-updated
                  * outside of the loop.
                  */
                 tx->seqnum = req->seqnum++;
-               list_add_tail(&tx->txreq.list, &req->txps);
                 npkts++;
         }
   dosend:
         ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps);
-       if (list_empty(&req->txps))
+       if (list_empty(&req->txps)) {
+               req->seqsubmitted = req->seqnum;
                 if (req->seqnum == req->info.npkts) {
                         set_bit(SDMA_REQ_SEND_DONE, &req->flags);
                         /*
@@@ -1007,6 -1005,10 +1002,10 @@@
                         if (test_bit(SDMA_REQ_HAVE_AHG, &req->flags))
                                 sdma_ahg_free(req->sde, req->ahg_idx);
                 }
+       } else if (ret > 0) {
+               req->seqsubmitted += ret;
+               ret = 0;
+       }
         return ret;
   
   free_txreq:
@@@ -1021,7 -1023,7 +1020,7 @@@ free_tx
    */
   static inline int num_user_pages(const struct iovec *iov)
   {
-       const unsigned long addr  = (unsigned long) iov->iov_base;
+       const unsigned long addr  = (unsigned long)iov->iov_base;
         const unsigned long len   = iov->iov_len;
         const unsigned long spage = addr & PAGE_MASK;
         const unsigned long epage = (addr + len - 1) & PAGE_MASK;
@@@ -1029,64 -1031,129 +1028,129 @@@
         return 1 + ((epage - spage) >> PAGE_SHIFT);
   }
   
- static int pin_vector_pages(struct user_sdma_request *req,
-                           struct user_sdma_iovec *iovec) {
-       int pinned, npages;
+ /* Caller must hold pq->evict_lock */
+ static u32 sdma_cache_evict(struct hfi1_user_sdma_pkt_q *pq, u32 npages)
+ {
+       u32 cleared = 0;
+       struct sdma_mmu_node *node, *ptr;
   
-       npages = num_user_pages(&iovec->iov);
-       iovec->pages = kcalloc(npages, sizeof(*iovec->pages), GFP_KERNEL);
-       if (!iovec->pages) {
-               SDMA_DBG(req, "Failed page array alloc");
-               return -ENOMEM;
+       list_for_each_entry_safe_reverse(node, ptr, &pq->evict, list) {
+               /* Make sure that no one is still using the node. */
+               if (!atomic_read(&node->refcount)) {
+                       /*
+                        * Need to use the page count now as the remove callback
+                        * will free the node.
+                        */
+                       cleared += node->npages;
+                       spin_unlock(&pq->evict_lock);
+                       hfi1_mmu_rb_remove(&pq->sdma_rb_root, &node->rb);
+                       spin_lock(&pq->evict_lock);
+                       if (cleared >= npages)
+                               break;
+               }
         }
+       return cleared;
+ }
   
-       /*
-        * Get a reference to the process's mm so we can use it when
-        * unpinning the io vectors.
-        */
-       req->pq->user_mm = get_task_mm(current);
+ static int pin_vector_pages(struct user_sdma_request *req,
+                           struct user_sdma_iovec *iovec) {
+       int ret = 0, pinned, npages, cleared;
+       struct page **pages;
+       struct hfi1_user_sdma_pkt_q *pq = req->pq;
+       struct sdma_mmu_node *node = NULL;
+       struct mmu_rb_node *rb_node;
+ 
+       rb_node = hfi1_mmu_rb_search(&pq->sdma_rb_root,
+                                    (unsigned long)iovec->iov.iov_base,
+                                    iovec->iov.iov_len);
+       if (rb_node)
+               node = container_of(rb_node, struct sdma_mmu_node, rb);
+ 
+       if (!node) {
+               node = kzalloc(sizeof(*node), GFP_KERNEL);
+               if (!node)
+                       return -ENOMEM;
   
-       pinned = hfi1_acquire_user_pages((unsigned long)iovec->iov.iov_base,
-                                        npages, 0, iovec->pages);
+               node->rb.addr = (unsigned long)iovec->iov.iov_base;
+               node->rb.len = iovec->iov.iov_len;
+               node->pq = pq;
+               atomic_set(&node->refcount, 0);
+               INIT_LIST_HEAD(&node->list);
+       }
   
-       if (pinned < 0)
-               return pinned;
+       npages = num_user_pages(&iovec->iov);
+       if (node->npages < npages) {
+               pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL);
+               if (!pages) {
+                       SDMA_DBG(req, "Failed page array alloc");
+                       ret = -ENOMEM;
+                       goto bail;
+               }
+               memcpy(pages, node->pages, node->npages * sizeof(*pages));
+ 
+               npages -= node->npages;
+ retry:
+               if (!hfi1_can_pin_pages(pq->dd, pq->n_locked, npages)) {
+                       spin_lock(&pq->evict_lock);
+                       cleared = sdma_cache_evict(pq, npages);
+                       spin_unlock(&pq->evict_lock);
+                       if (cleared >= npages)
+                               goto retry;
+               }
+               pinned = hfi1_acquire_user_pages(
+                       ((unsigned long)iovec->iov.iov_base +
+                        (node->npages * PAGE_SIZE)), npages, 0,
+                       pages + node->npages);
+               if (pinned < 0) {
+                       kfree(pages);
+                       ret = pinned;
+                       goto bail;
+               }
+               if (pinned != npages) {
+                       unpin_vector_pages(current->mm, pages, pinned);
+                       ret = -EFAULT;
+                       goto bail;
+               }
+               kfree(node->pages);
+               node->pages = pages;
+               node->npages += pinned;
+               npages = node->npages;
+               spin_lock(&pq->evict_lock);
+               if (!rb_node)
+                       list_add(&node->list, &pq->evict);
+               else
+                       list_move(&node->list, &pq->evict);
+               pq->n_locked += pinned;
+               spin_unlock(&pq->evict_lock);
+       }
+       iovec->pages = node->pages;
+       iovec->npages = npages;
   
-       iovec->npages = pinned;
-       if (pinned != npages) {
-               SDMA_DBG(req, "Failed to pin pages (%d/%u)", pinned, npages);
-               unpin_vector_pages(req, iovec);
-               return -EFAULT;
+       if (!rb_node) {
+               ret = hfi1_mmu_rb_insert(&req->pq->sdma_rb_root, &node->rb);
+               if (ret) {
+                       spin_lock(&pq->evict_lock);
+                       list_del(&node->list);
+                       pq->n_locked -= node->npages;
+                       spin_unlock(&pq->evict_lock);
+                       ret = 0;
+                       goto bail;
+               }
+       } else {
+               atomic_inc(&node->refcount);
         }
         return 0;
+ bail:
+       if (!rb_node)
+               kfree(node);
+       return ret;
   }
   
- static void unpin_vector_pages(struct user_sdma_request *req,
-                              struct user_sdma_iovec *iovec)
+ static void unpin_vector_pages(struct mm_struct *mm, struct page **pages,
+                              unsigned npages)
   {
-       /*
-        * Unpinning is done through the workqueue so use the
-        * process's mm if we have a reference to it.
-        */
-       if ((current->flags & PF_KTHREAD) && req->pq->user_mm)
-               use_mm(req->pq->user_mm);
- 
-       hfi1_release_user_pages(iovec->pages, iovec->npages, 0);
- 
-       /*
-        * Unuse the user's mm (see above) and release the
-        * reference to it.
-        */
-       if (req->pq->user_mm) {
-               if (current->flags & PF_KTHREAD)
-                       unuse_mm(req->pq->user_mm);
-               mmput(req->pq->user_mm);
-       }
- 
-       kfree(iovec->pages);
-       iovec->pages = NULL;
-       iovec->npages = 0;
-       iovec->offset = 0;
+       hfi1_release_user_pages(mm, pages, npages, 0);
+       kfree(pages);
   }
   
   static int check_header_template(struct user_sdma_request *req,
@@@ -1209,7 -1276,6 +1273,6 @@@ static int set_txreq_header(struct user
                 if (ret)
                         return ret;
                 goto done;
- 
         }
   
         hdr->bth[2] = cpu_to_be32(
@@@ -1219,7 -1285,7 +1282,7 @@@
   
         /* Set ACK request on last packet */
         if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT))
-               hdr->bth[2] |= cpu_to_be32(1UL<<31);
+               hdr->bth[2] |= cpu_to_be32(1UL << 31);
   
         /* Set the new offset */
         hdr->kdeth.swdata[6] = cpu_to_le32(req->koffset);
@@@ -1233,8 -1299,10 +1296,10 @@@
                 if ((req->tidoffset) == (EXP_TID_GET(tidval, LEN) *
                                          PAGE_SIZE)) {
                         req->tidoffset = 0;
-                       /* Since we don't copy all the TIDs, all at once,
-                        * we have to check again. */
+                       /*
+                        * Since we don't copy all the TIDs, all at once,
+                        * we have to check again.
+                        */
                         if (++req->tididx > req->n_tids - 1 ||
                             !req->tids[req->tididx]) {
                                 return -EINVAL;
@@@ -1315,8 -1383,10 +1380,10 @@@ static int set_txreq_header_ahg(struct 
                 if ((req->tidoffset) == (EXP_TID_GET(tidval, LEN) *
                                          PAGE_SIZE)) {
                         req->tidoffset = 0;
-                       /* Since we don't copy all the TIDs, all at once,
-                        * we have to check again. */
+                       /*
+                        * Since we don't copy all the TIDs, all at once,
+                        * we have to check again.
+                        */
                         if (++req->tididx > req->n_tids - 1 ||
                             !req->tids[req->tididx]) {
                                 return -EINVAL;
@@@ -1340,8 -1410,9 +1407,9 @@@
                                                                 INTR) >> 16);
                         val &= cpu_to_le16(~(1U << 13));
                         AHG_HEADER_SET(req->ahg, diff, 7, 16, 14, val);
-               } else
+               } else {
                         AHG_HEADER_SET(req->ahg, diff, 7, 16, 12, val);
+               }
         }
   
         trace_hfi1_sdma_user_header_ahg(pq->dd, pq->ctxt, pq->subctxt,
@@@ -1356,113 -1427,62 +1424,62 @@@
    * tx request have been processed by the DMA engine. Called in
    * interrupt context.
    */
- static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status,
-                              int drain)
+ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
   {
         struct user_sdma_txreq *tx =
                 container_of(txreq, struct user_sdma_txreq, txreq);
         struct user_sdma_request *req;
-       bool defer;
-       int i;
+       struct hfi1_user_sdma_pkt_q *pq;
+       struct hfi1_user_sdma_comp_q *cq;
+       u16 idx;
   
         if (!tx->req)
                 return;
   
         req = tx->req;
-       /*
-        * If this is the callback for the last packet of the request,
-        * queue up the request for clean up.
-        */
-       defer = (tx->seqnum == req->info.npkts - 1);
- 
-       /*
-        * If we have any io vectors associated with this txreq,
-        * check whether they need to be 'freed'. We can't free them
-        * here because the unpin function needs to be able to sleep.
-        */
-       for (i = tx->idx; i >= 0; i--) {
-               if (tx->iovecs[i].flags & TXREQ_FLAGS_IOVEC_LAST_PKT) {
-                       defer = true;
-                       break;
-               }
-       }
+       pq = req->pq;
+       cq = req->cq;
   
-       req->status = status;
         if (status != SDMA_TXREQ_S_OK) {
                 SDMA_DBG(req, "SDMA completion with error %d",
                          status);
                 set_bit(SDMA_REQ_HAS_ERROR, &req->flags);
-               defer = true;
         }
   
-       /*
-        * Defer the clean up of the iovectors and the request until later
-        * so it can be done outside of interrupt context.
-        */
-       if (defer) {
-               spin_lock(&req->txcmp_lock);
-               list_add_tail(&tx->list, &req->txcmp);
-               spin_unlock(&req->txcmp_lock);
-               schedule_work(&req->worker);
+       req->seqcomp = tx->seqnum;
+       kmem_cache_free(pq->txreq_cache, tx);
+       tx = NULL;
+ 
+       idx = req->info.comp_idx;
+       if (req->status == -1 && status == SDMA_TXREQ_S_OK) {
+               if (req->seqcomp == req->info.npkts - 1) {
+                       req->status = 0;
+                       user_sdma_free_request(req, false);
+                       pq_update(pq);
+                       set_comp_state(pq, cq, idx, COMPLETE, 0);
+               }
         } else {
-               kmem_cache_free(req->pq->txreq_cache, tx);
+               if (status != SDMA_TXREQ_S_OK)
+                       req->status = status;
+               if (req->seqcomp == (ACCESS_ONCE(req->seqsubmitted) - 1) &&
+                   (test_bit(SDMA_REQ_SEND_DONE, &req->flags) ||
+                    test_bit(SDMA_REQ_DONE_ERROR, &req->flags))) {
+                       user_sdma_free_request(req, false);
+                       pq_update(pq);
+                       set_comp_state(pq, cq, idx, ERROR, req->status);
+               }
         }
   }
   
- static void user_sdma_delayed_completion(struct work_struct *work)
+ static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq)
   {
-       struct user_sdma_request *req =
-               container_of(work, struct user_sdma_request, worker);
-       struct hfi1_user_sdma_pkt_q *pq = req->pq;
-       struct user_sdma_txreq *tx = NULL;
-       unsigned long flags;
-       u64 seqnum;
-       int i;
- 
-       while (1) {
-               spin_lock_irqsave(&req->txcmp_lock, flags);
-               if (!list_empty(&req->txcmp)) {
-                       tx = list_first_entry(&req->txcmp,
-                                             struct user_sdma_txreq, list);
-                       list_del(&tx->list);
-               }
-               spin_unlock_irqrestore(&req->txcmp_lock, flags);
-               if (!tx)
-                       break;
- 
-               for (i = tx->idx; i >= 0; i--)
-                       if (tx->iovecs[i].flags & TXREQ_FLAGS_IOVEC_LAST_PKT)
-                               unpin_vector_pages(req, tx->iovecs[i].vec);
- 
-               seqnum = tx->seqnum;
-               kmem_cache_free(pq->txreq_cache, tx);
-               tx = NULL;
- 
-               if (req->status != SDMA_TXREQ_S_OK) {
-                       if (seqnum == ACCESS_ONCE(req->seqnum) &&
-                           test_bit(SDMA_REQ_DONE_ERROR, &req->flags)) {
-                               atomic_dec(&pq->n_reqs);
-                               set_comp_state(req, ERROR, req->status);
-                               user_sdma_free_request(req);
-                               break;
-                       }
-               } else {
-                       if (seqnum == req->info.npkts - 1) {
-                               atomic_dec(&pq->n_reqs);
-                               set_comp_state(req, COMPLETE, 0);
-                               user_sdma_free_request(req);
-                               break;
-                       }
-               }
-       }
- 
-       if (!atomic_read(&pq->n_reqs)) {
+       if (atomic_dec_and_test(&pq->n_reqs)) {
                 xchg(&pq->state, SDMA_PKT_Q_INACTIVE);
                 wake_up(&pq->wait);
         }
   }
   
- static void user_sdma_free_request(struct user_sdma_request *req)
+ static void user_sdma_free_request(struct user_sdma_request *req, bool unpin)
   {
         if (!list_empty(&req->txps)) {
                 struct sdma_txreq *t, *p;
@@@ -1476,25 -1496,87 +1493,87 @@@
                 }
         }
         if (req->data_iovs) {
+               struct sdma_mmu_node *node;
+               struct mmu_rb_node *mnode;
                 int i;
   
-               for (i = 0; i < req->data_iovs; i++)
-                       if (req->iovs[i].npages && req->iovs[i].pages)
-                               unpin_vector_pages(req, &req->iovs[i]);
+               for (i = 0; i < req->data_iovs; i++) {
+                       mnode = hfi1_mmu_rb_search(
+                               &req->pq->sdma_rb_root,
+                               (unsigned long)req->iovs[i].iov.iov_base,
+                               req->iovs[i].iov.iov_len);
+                       if (!mnode)
+                               continue;
+ 
+                       node = container_of(mnode, struct sdma_mmu_node, rb);
+                       if (unpin)
+                               hfi1_mmu_rb_remove(&req->pq->sdma_rb_root,
+                                                  &node->rb);
+                       else
+                               atomic_dec(&node->refcount);
+               }
         }
         kfree(req->tids);
         clear_bit(SDMA_REQ_IN_USE, &req->flags);
   }
   
- static inline void set_comp_state(struct user_sdma_request *req,
-                                       enum hfi1_sdma_comp_state state,
-                                       int ret)
+ static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq,
+                                 struct hfi1_user_sdma_comp_q *cq,
+                                 u16 idx, enum hfi1_sdma_comp_state state,
+                                 int ret)
   {
-       SDMA_DBG(req, "Setting completion status %u %d", state, ret);
-       req->cq->comps[req->info.comp_idx].status = state;
+       hfi1_cdbg(SDMA, "[%u:%u:%u:%u] Setting completion status %u %d",
+                 pq->dd->unit, pq->ctxt, pq->subctxt, idx, state, ret);
+       cq->comps[idx].status = state;
         if (state == ERROR)
-               req->cq->comps[req->info.comp_idx].errcode = -ret;
-       trace_hfi1_sdma_user_completion(req->pq->dd, req->pq->ctxt,
-                                       req->pq->subctxt, req->info.comp_idx,
-                                       state, ret);
+               cq->comps[idx].errcode = -ret;
+       trace_hfi1_sdma_user_completion(pq->dd, pq->ctxt, pq->subctxt,
+                                       idx, state, ret);
+ }
+ 
+ static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr,
+                          unsigned long len)
+ {
+       return (bool)(node->addr == addr);
+ }
+ 
+ static int sdma_rb_insert(struct rb_root *root, struct mmu_rb_node *mnode)
+ {
+       struct sdma_mmu_node *node =
+               container_of(mnode, struct sdma_mmu_node, rb);
+ 
+       atomic_inc(&node->refcount);
+       return 0;
+ }
+ 
+ static void sdma_rb_remove(struct rb_root *root, struct mmu_rb_node *mnode,
+                          bool notifier)
+ {
+       struct sdma_mmu_node *node =
+               container_of(mnode, struct sdma_mmu_node, rb);
+ 
+       spin_lock(&node->pq->evict_lock);
+       list_del(&node->list);
+       node->pq->n_locked -= node->npages;
+       spin_unlock(&node->pq->evict_lock);
+ 
+       unpin_vector_pages(notifier ? NULL : current->mm, node->pages,
+                          node->npages);
+       /*
+        * If called by the MMU notifier, we have to adjust the pinned
+        * page count ourselves.
+        */
+       if (notifier)
+               current->mm->pinned_vm -= node->npages;
+       kfree(node);
+ }
+ 
+ static int sdma_rb_invalidate(struct rb_root *root, struct mmu_rb_node *mnode)
+ {
+       struct sdma_mmu_node *node =
+               container_of(mnode, struct sdma_mmu_node, rb);
+ 
+       if (!atomic_read(&node->refcount))
+               return 1;
+       return 0;
   }
diff --combined include/linux/mlx5/device.h

index 02ac3000ee3c14e83550e2a9481f5bb82df91422,4b531c44b3c7767660375515f093242394a37e84..8156e3c9239ce6a5c6883040b1b6d1b619ad1169
--- 1/include/linux/mlx5/device.h
--- 2/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@@ -373,12 -373,6 +373,12 @@@ enum 
         MLX5_SET_PORT_PKEY_TABLE        = 20,
   };
   
+ +enum {
+ +      MLX5_BW_NO_LIMIT   = 0,
+ +      MLX5_100_MBPS_UNIT = 3,
+ +      MLX5_GBPS_UNIT     = 4,
+ +};
+ +
   enum {
         MLX5_MAX_PAGE_SHIFT             = 31
   };
@@@ -1206,17 -1200,6 +1206,17 @@@ enum 
         MLX5_RQC_RQ_TYPE_MEMORY_RQ_RPM    = 0x1,
   };
   
+ +enum mlx5_wol_mode {
+ +      MLX5_WOL_DISABLE        = 0,
+ +      MLX5_WOL_SECURED_MAGIC  = 1 << 1,
+ +      MLX5_WOL_MAGIC          = 1 << 2,
+ +      MLX5_WOL_ARP            = 1 << 3,
+ +      MLX5_WOL_BROADCAST      = 1 << 4,
+ +      MLX5_WOL_MULTICAST      = 1 << 5,
+ +      MLX5_WOL_UNICAST        = 1 << 6,
+ +      MLX5_WOL_PHY_ACTIVITY   = 1 << 7,
+ +};
+ +
   /* MLX5 DEV CAPs */
   
   /* TODO: EAT.ME */
@@@ -1236,6 -1219,8 +1236,8 @@@ enum mlx5_cap_type 
         MLX5_CAP_FLOW_TABLE,
         MLX5_CAP_ESWITCH_FLOW_TABLE,
         MLX5_CAP_ESWITCH,
+       MLX5_CAP_RESERVED,
+       MLX5_CAP_VECTOR_CALC,
         /* NUM OF CAP Types */
         MLX5_CAP_NUM
   };
@@@ -1298,6 -1283,10 +1300,10 @@@
   #define MLX5_CAP_ODP(mdev, cap)\
         MLX5_GET(odp_cap, mdev->hca_caps_cur[MLX5_CAP_ODP], cap)
   
+ #define MLX5_CAP_VECTOR_CALC(mdev, cap) \
+       MLX5_GET(vector_calc_cap, \
+                mdev->hca_caps_cur[MLX5_CAP_VECTOR_CALC], cap)
+ 
   enum {
         MLX5_CMD_STAT_OK                        = 0x0,
         MLX5_CMD_STAT_INT_ERR                   = 0x1,
diff --combined include/linux/mlx5/driver.h

index 3a954465b2bfc00d81f7fff8d87a4ce038995cb6,e1d987fb49b2789e618a0dd94c413c296d4e45d0..dcd5ac8d3b1403875bce11aeddaf106acc0cd218
--- 1/include/linux/mlx5/driver.h
--- 2/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@@ -54,7 -54,7 +54,7 @@@ enum 
         /* one minute for the sake of bringup. Generally, commands must always
          * complete and we may need to increase this timeout value
          */
- -      MLX5_CMD_TIMEOUT_MSEC   = 7200 * 1000,
+ +      MLX5_CMD_TIMEOUT_MSEC   = 60 * 1000,
         MLX5_CMD_WQ_MAX_NAME    = 32,
   };
   
@@@ -99,8 -99,6 +99,8 @@@ enum 
   };
   
   enum {
+ +      MLX5_REG_QETCR           = 0x4005,
+ +      MLX5_REG_QTCT            = 0x400a,
         MLX5_REG_PCAP            = 0x5001,
         MLX5_REG_PMTU            = 0x5003,
         MLX5_REG_PTYS            = 0x5004,
@@@ -460,6 -458,8 +460,6 @@@ struct mlx5_priv 
         struct mlx5_uuar_info   uuari;
         MLX5_DECLARE_DOORBELL_LOCK(cq_uar_lock);
   
- -      struct io_mapping       *bf_mapping;
- -
         /* pages stuff */
         struct workqueue_struct *pg_wq;
         struct rb_root          page_root;
@@@ -613,7 -613,10 +613,10 @@@ struct mlx5_pas 
   };
   
   enum port_state_policy {
-       MLX5_AAA_000
+       MLX5_POLICY_DOWN        = 0,
+       MLX5_POLICY_UP          = 1,
+       MLX5_POLICY_FOLLOW      = 2,
+       MLX5_POLICY_INVALID     = 0xffffffff
   };
   
   enum phy_port_state {
@@@ -706,8 -709,7 +709,7 @@@ void mlx5_cmd_use_events(struct mlx5_co
   void mlx5_cmd_use_polling(struct mlx5_core_dev *dev);
   int mlx5_cmd_status_to_err(struct mlx5_outbox_hdr *hdr);
   int mlx5_cmd_status_to_err_v2(void *ptr);
- int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type,
-                      enum mlx5_cap_mode cap_mode);
+ int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type);
   int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
                   int out_size);
   int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size,
@@@ -717,8 -719,7 +719,8 @@@ int mlx5_cmd_alloc_uar(struct mlx5_core
   int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn);
   int mlx5_alloc_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari);
   int mlx5_free_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari);
- -int mlx5_alloc_map_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar);
+ +int mlx5_alloc_map_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar,
+ +                     bool map_wc);
   void mlx5_unmap_free_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar);
   void mlx5_health_cleanup(struct mlx5_core_dev *dev);
   int mlx5_health_init(struct mlx5_core_dev *dev);
@@@ -797,6 -798,37 +799,6 @@@ int mlx5_core_access_reg(struct mlx5_co
                          int size_in, void *data_out, int size_out,
                          u16 reg_num, int arg, int write);
   
- -int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps);
- -int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys,
- -                       int ptys_size, int proto_mask, u8 local_port);
- -int mlx5_query_port_proto_cap(struct mlx5_core_dev *dev,
- -                            u32 *proto_cap, int proto_mask);
- -int mlx5_query_port_proto_admin(struct mlx5_core_dev *dev,
- -                              u32 *proto_admin, int proto_mask);
- -int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev,
- -                                  u8 *link_width_oper, u8 local_port);
- -int mlx5_query_port_proto_oper(struct mlx5_core_dev *dev,
- -                             u8 *proto_oper, int proto_mask,
- -                             u8 local_port);
- -int mlx5_set_port_proto(struct mlx5_core_dev *dev, u32 proto_admin,
- -                      int proto_mask);
- -int mlx5_set_port_admin_status(struct mlx5_core_dev *dev,
- -                             enum mlx5_port_status status);
- -int mlx5_query_port_admin_status(struct mlx5_core_dev *dev,
- -                               enum mlx5_port_status *status);
- -
- -int mlx5_set_port_mtu(struct mlx5_core_dev *dev, int mtu, u8 port);
- -void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu, u8 port);
- -void mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu,
- -                            u8 port);
- -
- -int mlx5_query_port_vl_hw_cap(struct mlx5_core_dev *dev,
- -                            u8 *vl_hw_cap, u8 local_port);
- -
- -int mlx5_set_port_pause(struct mlx5_core_dev *dev, u32 rx_pause, u32 tx_pause);
- -int mlx5_query_port_pause(struct mlx5_core_dev *dev,
- -                        u32 *rx_pause, u32 *tx_pause);
- -
   int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
   void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
   int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
diff --combined include/linux/mlx5/mlx5_ifc.h

index e52730e01ed68bca8863cab4cca05e339b50cf87,bb9e07ca65345ac0b739b5a2b87c5f6c711c21a1..c15b8a8649377ea401a556c0f709b3bfe112f30f
--- 1/include/linux/mlx5/mlx5_ifc.h
--- 2/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@@ -166,8 -166,6 +166,8 @@@ enum 
         MLX5_CMD_OP_SET_L2_TABLE_ENTRY            = 0x829,
         MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY          = 0x82a,
         MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY         = 0x82b,
+ +      MLX5_CMD_OP_SET_WOL_ROL                   = 0x830,
+ +      MLX5_CMD_OP_QUERY_WOL_ROL                 = 0x831,
         MLX5_CMD_OP_CREATE_TIR                    = 0x900,
         MLX5_CMD_OP_MODIFY_TIR                    = 0x901,
         MLX5_CMD_OP_DESTROY_TIR                   = 0x902,
@@@ -618,6 -616,33 +618,33 @@@ struct mlx5_ifc_odp_cap_bits 
         u8         reserved_at_e0[0x720];
   };
   
+ struct mlx5_ifc_calc_op {
+       u8        reserved_at_0[0x10];
+       u8        reserved_at_10[0x9];
+       u8        op_swap_endianness[0x1];
+       u8        op_min[0x1];
+       u8        op_xor[0x1];
+       u8        op_or[0x1];
+       u8        op_and[0x1];
+       u8        op_max[0x1];
+       u8        op_add[0x1];
+ };
+ 
+ struct mlx5_ifc_vector_calc_cap_bits {
+       u8         calc_matrix[0x1];
+       u8         reserved_at_1[0x1f];
+       u8         reserved_at_20[0x8];
+       u8         max_vec_count[0x8];
+       u8         reserved_at_30[0xd];
+       u8         max_chunk_size[0x3];
+       struct mlx5_ifc_calc_op calc0;
+       struct mlx5_ifc_calc_op calc1;
+       struct mlx5_ifc_calc_op calc2;
+       struct mlx5_ifc_calc_op calc3;
+ 
+       u8         reserved_at_e0[0x720];
+ };
+ 
   enum {
         MLX5_WQ_TYPE_LINKED_LIST  = 0x0,
         MLX5_WQ_TYPE_CYCLIC       = 0x1,
@@@ -732,19 -757,7 +759,19 @@@ struct mlx5_ifc_cmd_hca_cap_bits 
   
         u8         reserved_at_1bf[0x3];
         u8         log_max_msg[0x5];
- -      u8         reserved_at_1c7[0x18];
+ +      u8         reserved_at_1c7[0x4];
+ +      u8         max_tc[0x4];
+ +      u8         reserved_at_1cf[0x6];
+ +      u8         rol_s[0x1];
+ +      u8         rol_g[0x1];
+ +      u8         reserved_at_1d7[0x1];
+ +      u8         wol_s[0x1];
+ +      u8         wol_g[0x1];
+ +      u8         wol_a[0x1];
+ +      u8         wol_b[0x1];
+ +      u8         wol_m[0x1];
+ +      u8         wol_u[0x1];
+ +      u8         wol_p[0x1];
   
         u8         stat_rate_support[0x10];
         u8         reserved_at_1ef[0xc];
@@@ -784,7 -797,8 +811,8 @@@
         u8         cd[0x1];
         u8         reserved_at_22c[0x1];
         u8         apm[0x1];
-       u8         reserved_at_22e[0x2];
+       u8         vector_calc[0x1];
+       u8         reserved_at_22f[0x1];
         u8         imaicl[0x1];
         u8         reserved_at_231[0x4];
         u8         qkv[0x1];
@@@ -1954,6 -1968,7 +1982,7 @@@ union mlx5_ifc_hca_cap_union_bits 
         struct mlx5_ifc_flow_table_nic_cap_bits flow_table_nic_cap;
         struct mlx5_ifc_flow_table_eswitch_cap_bits flow_table_eswitch_cap;
         struct mlx5_ifc_e_switch_cap_bits e_switch_cap;
+       struct mlx5_ifc_vector_calc_cap_bits vector_calc_cap;
         u8         reserved_at_0[0x8000];
   };
   
@@@ -3681,6 -3696,12 +3710,12 @@@ struct mlx5_ifc_query_hca_vport_pkey_in
         u8         pkey_index[0x10];
   };
   
+ enum {
+       MLX5_HCA_VPORT_SEL_PORT_GUID    = 1 << 0,
+       MLX5_HCA_VPORT_SEL_NODE_GUID    = 1 << 1,
+       MLX5_HCA_VPORT_SEL_STATE_POLICY = 1 << 2,
+ };
+ 
   struct mlx5_ifc_query_hca_vport_gid_out_bits {
         u8         status[0x8];
         u8         reserved_at_8[0x18];
@@@ -4297,9 -4318,7 +4332,9 @@@ struct mlx5_ifc_modify_tir_bitmask_bit
   
         u8         reserved_at_20[0x1b];
         u8         self_lb_en[0x1];
- -      u8         reserved_at_3c[0x3];
+ +      u8         reserved_at_3c[0x1];
+ +      u8         hash[0x1];
+ +      u8         reserved_at_3e[0x1];
         u8         lro[0x1];
   };
   
@@@ -6925,54 -6944,6 +6960,54 @@@ struct mlx5_ifc_mtt_bits 
         u8         rd_en[0x1];
   };
   
+ +struct mlx5_ifc_query_wol_rol_out_bits {
+ +      u8         status[0x8];
+ +      u8         reserved_at_8[0x18];
+ +
+ +      u8         syndrome[0x20];
+ +
+ +      u8         reserved_at_40[0x10];
+ +      u8         rol_mode[0x8];
+ +      u8         wol_mode[0x8];
+ +
+ +      u8         reserved_at_60[0x20];
+ +};
+ +
+ +struct mlx5_ifc_query_wol_rol_in_bits {
+ +      u8         opcode[0x10];
+ +      u8         reserved_at_10[0x10];
+ +
+ +      u8         reserved_at_20[0x10];
+ +      u8         op_mod[0x10];
+ +
+ +      u8         reserved_at_40[0x40];
+ +};
+ +
+ +struct mlx5_ifc_set_wol_rol_out_bits {
+ +      u8         status[0x8];
+ +      u8         reserved_at_8[0x18];
+ +
+ +      u8         syndrome[0x20];
+ +
+ +      u8         reserved_at_40[0x40];
+ +};
+ +
+ +struct mlx5_ifc_set_wol_rol_in_bits {
+ +      u8         opcode[0x10];
+ +      u8         reserved_at_10[0x10];
+ +
+ +      u8         reserved_at_20[0x10];
+ +      u8         op_mod[0x10];
+ +
+ +      u8         rol_mode_valid[0x1];
+ +      u8         wol_mode_valid[0x1];
+ +      u8         reserved_at_42[0xe];
+ +      u8         rol_mode[0x8];
+ +      u8         wol_mode[0x8];
+ +
+ +      u8         reserved_at_60[0x20];
+ +};
+ +
   enum {
         MLX5_INITIAL_SEG_NIC_INTERFACE_FULL_DRIVER  = 0x0,
         MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED     = 0x1,
@@@ -7164,49 -7135,4 +7199,49 @@@ struct mlx5_ifc_modify_flow_table_in_bi
         u8         reserved_at_100[0x100];
   };
   
+ +struct mlx5_ifc_ets_tcn_config_reg_bits {
+ +      u8         g[0x1];
+ +      u8         b[0x1];
+ +      u8         r[0x1];
+ +      u8         reserved_at_3[0x9];
+ +      u8         group[0x4];
+ +      u8         reserved_at_10[0x9];
+ +      u8         bw_allocation[0x7];
+ +
+ +      u8         reserved_at_20[0xc];
+ +      u8         max_bw_units[0x4];
+ +      u8         reserved_at_30[0x8];
+ +      u8         max_bw_value[0x8];
+ +};
+ +
+ +struct mlx5_ifc_ets_global_config_reg_bits {
+ +      u8         reserved_at_0[0x2];
+ +      u8         r[0x1];
+ +      u8         reserved_at_3[0x1d];
+ +
+ +      u8         reserved_at_20[0xc];
+ +      u8         max_bw_units[0x4];
+ +      u8         reserved_at_30[0x8];
+ +      u8         max_bw_value[0x8];
+ +};
+ +
+ +struct mlx5_ifc_qetc_reg_bits {
+ +      u8                                         reserved_at_0[0x8];
+ +      u8                                         port_number[0x8];
+ +      u8                                         reserved_at_10[0x30];
+ +
+ +      struct mlx5_ifc_ets_tcn_config_reg_bits    tc_configuration[0x8];
+ +      struct mlx5_ifc_ets_global_config_reg_bits global_configuration;
+ +};
+ +
+ +struct mlx5_ifc_qtct_reg_bits {
+ +      u8         reserved_at_0[0x8];
+ +      u8         port_number[0x8];
+ +      u8         reserved_at_10[0xd];
+ +      u8         prio[0x3];
+ +
+ +      u8         reserved_at_20[0x1d];
+ +      u8         tclass[0x3];
+ +};
+ +
   #endif /* MLX5_IFC_H */
diff --combined include/linux/netdevice.h

index be693b34662f9c95ec4de34ed4cf4b558a1e9738,7b4ae218b90bcfe6eeef660fe34972a3bddc3800..009c85adae4c5036be4351c2bd020753af5befc5
--- 1/include/linux/netdevice.h
--- 2/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@@ -51,7 -51,6 +51,7 @@@
   #include <linux/neighbour.h>
   #include <uapi/linux/netdevice.h>
   #include <uapi/linux/if_bonding.h>
+ +#include <uapi/linux/pkt_cls.h>
   
   struct netpoll_info;
   struct device;
@@@ -268,7 -267,6 +268,7 @@@ struct header_ops 
         void    (*cache_update)(struct hh_cache *hh,
                                 const struct net_device *dev,
                                 const unsigned char *haddr);
+ +      bool    (*validate)(const char *ll_header, unsigned int len);
   };
   
   /* These flag bits are private to the generic network queueing
@@@ -780,27 -778,6 +780,27 @@@ static inline bool netdev_phys_item_id_
   typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
                                        struct sk_buff *skb);
   
+ +/* These structures hold the attributes of qdisc and classifiers
+ + * that are being passed to the netdevice through the setup_tc op.
+ + */
+ +enum {
+ +      TC_SETUP_MQPRIO,
+ +      TC_SETUP_CLSU32,
+ +      TC_SETUP_CLSFLOWER,
+ +};
+ +
+ +struct tc_cls_u32_offload;
+ +
+ +struct tc_to_netdev {
+ +      unsigned int type;
+ +      union {
+ +              u8 tc;
+ +              struct tc_cls_u32_offload *cls_u32;
+ +              struct tc_cls_flower_offload *cls_flower;
+ +      };
+ +};
+ +
+ +
   /*
    * This structure defines the management hooks for network devices.
    * The following hooks can be defined; unless noted otherwise, they are
@@@ -1096,12 -1073,6 +1096,12 @@@
    *    This function is used to get egress tunnel information for given skb.
    *    This is useful for retrieving outer tunnel header parameters while
    *    sampling packet.
+ + * void (*ndo_set_rx_headroom)(struct net_device *dev, int needed_headroom);
+ + *    This function is used to specify the headroom that the skb must
+ + *    consider when allocation skb during packet reception. Setting
+ + *    appropriate rx headroom value allows avoiding skb head copy on
+ + *    forward. Setting a negative value reset the rx headroom to the
+ + *    default value.
    *
    */
   struct net_device_ops {
@@@ -1176,13 -1147,13 +1176,16 @@@
                                                    struct nlattr *port[]);
         int                     (*ndo_get_vf_port)(struct net_device *dev,
                                                    int vf, struct sk_buff *skb);
+       int                     (*ndo_set_vf_guid)(struct net_device *dev,
+                                                  int vf, u64 guid,
+                                                  int guid_type);
         int                     (*ndo_set_vf_rss_query_en)(
                                                    struct net_device *dev,
                                                    int vf, bool setting);
- -      int                     (*ndo_setup_tc)(struct net_device *dev, u8 tc);
+ +      int                     (*ndo_setup_tc)(struct net_device *dev,
+ +                                              u32 handle,
+ +                                              __be16 protocol,
+ +                                              struct tc_to_netdev *tc);
   #if IS_ENABLED(CONFIG_FCOE)
         int                     (*ndo_fcoe_enable)(struct net_device *dev);
         int                     (*ndo_fcoe_disable)(struct net_device *dev);
@@@ -1287,8 -1258,6 +1290,8 @@@
                                                          bool proto_down);
         int                     (*ndo_fill_metadata_dst)(struct net_device *dev,
                                                        struct sk_buff *skb);
+ +      void                    (*ndo_set_rx_headroom)(struct net_device *dev,
+ +                                                     int needed_headroom);
   };
   
   /**
@@@ -1325,10 -1294,6 +1328,10 @@@
    * @IFF_OPENVSWITCH: device is a Open vSwitch master
    * @IFF_L3MDEV_SLAVE: device is enslaved to an L3 master device
    * @IFF_TEAM: device is a team device
+ + * @IFF_RXFH_CONFIGURED: device has had Rx Flow indirection table configured
+ + * @IFF_PHONY_HEADROOM: the headroom value is controlled by an external
+ + *    entity (i.e. the master device for bridged veth)
+ + * @IFF_MACSEC: device is a MACsec device
    */
   enum netdev_priv_flags {
         IFF_802_1Q_VLAN                 = 1<<0,
@@@ -1356,9 -1321,6 +1359,9 @@@
         IFF_OPENVSWITCH                 = 1<<22,
         IFF_L3MDEV_SLAVE                = 1<<23,
         IFF_TEAM                        = 1<<24,
+ +      IFF_RXFH_CONFIGURED             = 1<<25,
+ +      IFF_PHONY_HEADROOM              = 1<<26,
+ +      IFF_MACSEC                      = 1<<27,
   };
   
   #define IFF_802_1Q_VLAN                       IFF_802_1Q_VLAN
@@@ -1386,8 -1348,6 +1389,8 @@@
   #define IFF_OPENVSWITCH                       IFF_OPENVSWITCH
   #define IFF_L3MDEV_SLAVE              IFF_L3MDEV_SLAVE
   #define IFF_TEAM                      IFF_TEAM
+ +#define IFF_RXFH_CONFIGURED           IFF_RXFH_CONFIGURED
+ +#define IFF_MACSEC                    IFF_MACSEC
   
   /**
    *    struct net_device - The DEVICE structure.
@@@ -1440,8 -1400,6 +1443,8 @@@
    *                    do not use this in drivers
    *    @tx_dropped:    Dropped packets by core network,
    *                    do not use this in drivers
+ + *    @rx_nohandler:  nohandler dropped packets by core network on
+ + *                    inactive devices, do not use this in drivers
    *
    *    @wireless_handlers:     List of functions to handle Wireless Extensions,
    *                            instead of ioctl,
@@@ -1465,7 -1423,8 +1468,7 @@@
    *    @dma:           DMA channel
    *    @mtu:           Interface MTU value
    *    @type:          Interface hardware type
- - *    @hard_header_len: Hardware header length, which means that this is the
- - *                      minimum size of a packet.
+ + *    @hard_header_len: Maximum hardware header length.
    *
    *    @needed_headroom: Extra headroom the hardware may need, but not in all
    *                      cases can this be guaranteed
@@@ -1655,7 -1614,6 +1658,7 @@@ struct net_device 
   
         atomic_long_t           rx_dropped;
         atomic_long_t           tx_dropped;
+ +      atomic_long_t           rx_nohandler;
   
   #ifdef CONFIG_WIRELESS_EXT
         const struct iw_handler_def *   wireless_handlers;
@@@ -1953,26 -1911,6 +1956,26 @@@ struct netdev_queue *netdev_pick_tx(str
                                     struct sk_buff *skb,
                                     void *accel_priv);
   
+ +/* returns the headroom that the master device needs to take in account
+ + * when forwarding to this dev
+ + */
+ +static inline unsigned netdev_get_fwd_headroom(struct net_device *dev)
+ +{
+ +      return dev->priv_flags & IFF_PHONY_HEADROOM ? 0 : dev->needed_headroom;
+ +}
+ +
+ +static inline void netdev_set_rx_headroom(struct net_device *dev, int new_hr)
+ +{
+ +      if (dev->netdev_ops->ndo_set_rx_headroom)
+ +              dev->netdev_ops->ndo_set_rx_headroom(dev, new_hr);
+ +}
+ +
+ +/* set the device rx headroom to the dev's default */
+ +static inline void netdev_reset_rx_headroom(struct net_device *dev)
+ +{
+ +      netdev_set_rx_headroom(dev, -1);
+ +}
+ +
   /*
    * Net namespace inlines
    */
@@@ -2692,24 -2630,6 +2695,24 @@@ static inline int dev_parse_header(cons
         return dev->header_ops->parse(skb, haddr);
   }
   
+ +/* ll_header must have at least hard_header_len allocated */
+ +static inline bool dev_validate_header(const struct net_device *dev,
+ +                                     char *ll_header, int len)
+ +{
+ +      if (likely(len >= dev->hard_header_len))
+ +              return true;
+ +
+ +      if (capable(CAP_SYS_RAWIO)) {
+ +              memset(ll_header + len, 0, dev->hard_header_len - len);
+ +              return true;
+ +      }
+ +
+ +      if (dev->header_ops && dev->header_ops->validate)
+ +              return dev->header_ops->validate(ll_header, len);
+ +
+ +      return false;
+ +}
+ +
   typedef int gifconf_func_t(struct net_device * dev, char __user * bufptr, int len);
   int register_gifconf(unsigned int family, gifconf_func_t *gifconf);
   static inline int unregister_gifconf(unsigned int family)
@@@ -3824,7 -3744,7 +3827,7 @@@ void netdev_lower_state_changed(struct 
   
   /* RSS keys are 40 or 52 bytes long */
   #define NETDEV_RSS_KEY_LEN 52
- -extern u8 netdev_rss_key[NETDEV_RSS_KEY_LEN];
+ +extern u8 netdev_rss_key[NETDEV_RSS_KEY_LEN] __read_mostly;
   void netdev_rss_key_fill(void *buffer, size_t len);
   
   int dev_get_nest_level(struct net_device *dev,
@@@ -4048,11 -3968,6 +4051,11 @@@ static inline void skb_gso_error_unwind
         skb->mac_len = mac_len;
   }
   
+ +static inline bool netif_is_macsec(const struct net_device *dev)
+ +{
+ +      return dev->priv_flags & IFF_MACSEC;
+ +}
+ +
   static inline bool netif_is_macvlan(const struct net_device *dev)
   {
         return dev->priv_flags & IFF_MACVLAN;
@@@ -4133,11 -4048,6 +4136,11 @@@ static inline bool netif_is_lag_port(co
         return netif_is_bond_slave(dev) || netif_is_team_port(dev);
   }
   
+ +static inline bool netif_is_rxfh_configured(const struct net_device *dev)
+ +{
+ +      return dev->priv_flags & IFF_RXFH_CONFIGURED;
+ +}
+ +
   /* This device needs to keep skb dst for qdisc enqueue or ndo_start_xmit() */
   static inline void netif_keep_dst(struct net_device *dev)
   {
diff --combined include/uapi/linux/if_link.h

index 8e3f88fa5b59056cb29e7736b5d4d89e08964321,1d01e8a4e5dd3ab4d052f234a541c7d7bc778c02..a62a0129d614049293e585a871ecc956afc17ad0
--- 1/include/uapi/linux/if_link.h
--- 2/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@@ -35,8 -35,6 +35,8 @@@ struct rtnl_link_stats 
         /* for cslip etc */
         __u32   rx_compressed;
         __u32   tx_compressed;
+ +
+ +      __u32   rx_nohandler;           /* dropped, no handler found    */
   };
   
   /* The main device statistics structure */
@@@ -70,8 -68,6 +70,8 @@@ struct rtnl_link_stats64 
         /* for cslip etc */
         __u64   rx_compressed;
         __u64   tx_compressed;
+ +
+ +      __u64   rx_nohandler;           /* dropped, no handler found    */
   };
   
   /* The struct should be in sync with struct ifmap */
@@@ -405,43 -401,6 +405,43 @@@ enum 
   
   #define IFLA_VRF_MAX (__IFLA_VRF_MAX - 1)
   
+ +enum {
+ +      IFLA_VRF_PORT_UNSPEC,
+ +      IFLA_VRF_PORT_TABLE,
+ +      __IFLA_VRF_PORT_MAX
+ +};
+ +
+ +#define IFLA_VRF_PORT_MAX (__IFLA_VRF_PORT_MAX - 1)
+ +
+ +/* MACSEC section */
+ +enum {
+ +      IFLA_MACSEC_UNSPEC,
+ +      IFLA_MACSEC_SCI,
+ +      IFLA_MACSEC_PORT,
+ +      IFLA_MACSEC_ICV_LEN,
+ +      IFLA_MACSEC_CIPHER_SUITE,
+ +      IFLA_MACSEC_WINDOW,
+ +      IFLA_MACSEC_ENCODING_SA,
+ +      IFLA_MACSEC_ENCRYPT,
+ +      IFLA_MACSEC_PROTECT,
+ +      IFLA_MACSEC_INC_SCI,
+ +      IFLA_MACSEC_ES,
+ +      IFLA_MACSEC_SCB,
+ +      IFLA_MACSEC_REPLAY_PROTECT,
+ +      IFLA_MACSEC_VALIDATION,
+ +      __IFLA_MACSEC_MAX,
+ +};
+ +
+ +#define IFLA_MACSEC_MAX (__IFLA_MACSEC_MAX - 1)
+ +
+ +enum macsec_validation_type {
+ +      MACSEC_VALIDATE_DISABLED = 0,
+ +      MACSEC_VALIDATE_CHECK = 1,
+ +      MACSEC_VALIDATE_STRICT = 2,
+ +      __MACSEC_VALIDATE_END,
+ +      MACSEC_VALIDATE_MAX = __MACSEC_VALIDATE_END - 1,
+ +};
+ +
   /* IPVLAN section */
   enum {
         IFLA_IPVLAN_UNSPEC,
@@@ -485,7 -444,6 +485,7 @@@ enum 
         IFLA_VXLAN_GBP,
         IFLA_VXLAN_REMCSUM_NOPARTIAL,
         IFLA_VXLAN_COLLECT_METADATA,
+ +      IFLA_VXLAN_LABEL,
         __IFLA_VXLAN_MAX
   };
   #define IFLA_VXLAN_MAX        (__IFLA_VXLAN_MAX - 1)
@@@ -508,7 -466,6 +508,7 @@@ enum 
         IFLA_GENEVE_UDP_CSUM,
         IFLA_GENEVE_UDP_ZERO_CSUM6_TX,
         IFLA_GENEVE_UDP_ZERO_CSUM6_RX,
+ +      IFLA_GENEVE_LABEL,
         __IFLA_GENEVE_MAX
   };
   #define IFLA_GENEVE_MAX       (__IFLA_GENEVE_MAX - 1)
@@@ -599,6 -556,8 +599,8 @@@ enum 
                                  */
         IFLA_VF_STATS,          /* network device statistics */
         IFLA_VF_TRUST,          /* Trust VF */
+       IFLA_VF_IB_NODE_GUID,   /* VF Infiniband node GUID */
+       IFLA_VF_IB_PORT_GUID,   /* VF Infiniband port GUID */
         __IFLA_VF_MAX,
   };
   
@@@ -631,6 -590,11 +633,11 @@@ struct ifla_vf_spoofchk 
         __u32 setting;
   };
   
+ struct ifla_vf_guid {
+       __u32 vf;
+       __u64 guid;
+ };
+ 
   enum {
         IFLA_VF_LINK_STATE_AUTO,        /* link state of the uplink */
         IFLA_VF_LINK_STATE_ENABLE,      /* link always up */
diff --combined net/core/rtnetlink.c

index d2d9e5ebf58ea827f8e0b5aaa85cea23cd3b77dd,4b6f3db9f8afb8589be7ec4363911d6770abae2c..167883e0931735a58d823a4efc46776b9c50a071
--- 1/net/core/rtnetlink.c
--- 2/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@@ -804,8 -804,6 +804,8 @@@ static void copy_rtnl_link_stats(struc
   
         a->rx_compressed = b->rx_compressed;
         a->tx_compressed = b->tx_compressed;
+ +
+ +      a->rx_nohandler = b->rx_nohandler;
   }
   
   static void copy_rtnl_link_stats64(void *v, const struct rtnl_link_stats64 *b)
@@@ -1389,8 -1387,19 +1389,10 @@@ static const struct nla_policy ifla_vf_
         [IFLA_VF_RSS_QUERY_EN]  = { .len = sizeof(struct ifla_vf_rss_query_en) },
         [IFLA_VF_STATS]         = { .type = NLA_NESTED },
         [IFLA_VF_TRUST]         = { .len = sizeof(struct ifla_vf_trust) },
+       [IFLA_VF_IB_NODE_GUID]  = { .len = sizeof(struct ifla_vf_guid) },
+       [IFLA_VF_IB_PORT_GUID]  = { .len = sizeof(struct ifla_vf_guid) },
   };
   
- -static const struct nla_policy ifla_vf_stats_policy[IFLA_VF_STATS_MAX + 1] = {
- -      [IFLA_VF_STATS_RX_PACKETS]      = { .type = NLA_U64 },
- -      [IFLA_VF_STATS_TX_PACKETS]      = { .type = NLA_U64 },
- -      [IFLA_VF_STATS_RX_BYTES]        = { .type = NLA_U64 },
- -      [IFLA_VF_STATS_TX_BYTES]        = { .type = NLA_U64 },
- -      [IFLA_VF_STATS_BROADCAST]       = { .type = NLA_U64 },
- -      [IFLA_VF_STATS_MULTICAST]       = { .type = NLA_U64 },
- -};
- -
   static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = {
         [IFLA_PORT_VF]          = { .type = NLA_U32 },
         [IFLA_PORT_PROFILE]     = { .type = NLA_STRING,
@@@ -1405,58 -1414,6 +1407,58 @@@
         [IFLA_PORT_RESPONSE]    = { .type = NLA_U16, },
   };
   
+ +static const struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla)
+ +{
+ +      const struct rtnl_link_ops *ops = NULL;
+ +      struct nlattr *linfo[IFLA_INFO_MAX + 1];
+ +
+ +      if (nla_parse_nested(linfo, IFLA_INFO_MAX, nla, ifla_info_policy) < 0)
+ +              return NULL;
+ +
+ +      if (linfo[IFLA_INFO_KIND]) {
+ +              char kind[MODULE_NAME_LEN];
+ +
+ +              nla_strlcpy(kind, linfo[IFLA_INFO_KIND], sizeof(kind));
+ +              ops = rtnl_link_ops_get(kind);
+ +      }
+ +
+ +      return ops;
+ +}
+ +
+ +static bool link_master_filtered(struct net_device *dev, int master_idx)
+ +{
+ +      struct net_device *master;
+ +
+ +      if (!master_idx)
+ +              return false;
+ +
+ +      master = netdev_master_upper_dev_get(dev);
+ +      if (!master || master->ifindex != master_idx)
+ +              return true;
+ +
+ +      return false;
+ +}
+ +
+ +static bool link_kind_filtered(const struct net_device *dev,
+ +                             const struct rtnl_link_ops *kind_ops)
+ +{
+ +      if (kind_ops && dev->rtnl_link_ops != kind_ops)
+ +              return true;
+ +
+ +      return false;
+ +}
+ +
+ +static bool link_dump_filtered(struct net_device *dev,
+ +                             int master_idx,
+ +                             const struct rtnl_link_ops *kind_ops)
+ +{
+ +      if (link_master_filtered(dev, master_idx) ||
+ +          link_kind_filtered(dev, kind_ops))
+ +              return true;
+ +
+ +      return false;
+ +}
+ +
   static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
   {
         struct net *net = sock_net(skb->sk);
@@@ -1466,9 -1423,6 +1468,9 @@@
         struct hlist_head *head;
         struct nlattr *tb[IFLA_MAX+1];
         u32 ext_filter_mask = 0;
+ +      const struct rtnl_link_ops *kind_ops = NULL;
+ +      unsigned int flags = NLM_F_MULTI;
+ +      int master_idx = 0;
         int err;
         int hdrlen;
   
@@@ -1491,29 -1445,18 +1493,29 @@@
   
                 if (tb[IFLA_EXT_MASK])
                         ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
+ +
+ +              if (tb[IFLA_MASTER])
+ +                      master_idx = nla_get_u32(tb[IFLA_MASTER]);
+ +
+ +              if (tb[IFLA_LINKINFO])
+ +                      kind_ops = linkinfo_to_kind_ops(tb[IFLA_LINKINFO]);
+ +
+ +              if (master_idx || kind_ops)
+ +                      flags |= NLM_F_DUMP_FILTERED;
         }
   
         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
                 idx = 0;
                 head = &net->dev_index_head[h];
                 hlist_for_each_entry(dev, head, index_hlist) {
+ +                      if (link_dump_filtered(dev, master_idx, kind_ops))
+ +                              continue;
                         if (idx < s_idx)
                                 goto cont;
                         err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
                                                NETLINK_CB(cb->skb).portid,
                                                cb->nlh->nlmsg_seq, 0,
- -                                             NLM_F_MULTI,
+ +                                             flags,
                                                ext_filter_mask);
                         /* If we ran out of room on the first message,
                          * we're in trouble
@@@ -1593,6 -1536,22 +1595,22 @@@ static int validate_linkmsg(struct net_
         return 0;
   }
   
+ static int handle_infiniband_guid(struct net_device *dev, struct ifla_vf_guid *ivt,
+                                 int guid_type)
+ {
+       const struct net_device_ops *ops = dev->netdev_ops;
+ 
+       return ops->ndo_set_vf_guid(dev, ivt->vf, ivt->guid, guid_type);
+ }
+ 
+ static int handle_vf_guid(struct net_device *dev, struct ifla_vf_guid *ivt, int guid_type)
+ {
+       if (dev->type != ARPHRD_INFINIBAND)
+               return -EOPNOTSUPP;
+ 
+       return handle_infiniband_guid(dev, ivt, guid_type);
+ }
+ 
   static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
   {
         const struct net_device_ops *ops = dev->netdev_ops;
@@@ -1695,6 -1654,24 +1713,24 @@@
                         return err;
         }
   
+       if (tb[IFLA_VF_IB_NODE_GUID]) {
+               struct ifla_vf_guid *ivt = nla_data(tb[IFLA_VF_IB_NODE_GUID]);
+ 
+               if (!ops->ndo_set_vf_guid)
+                       return -EOPNOTSUPP;
+ 
+               return handle_vf_guid(dev, ivt, IFLA_VF_IB_NODE_GUID);
+       }
+ 
+       if (tb[IFLA_VF_IB_PORT_GUID]) {
+               struct ifla_vf_guid *ivt = nla_data(tb[IFLA_VF_IB_PORT_GUID]);
+ 
+               if (!ops->ndo_set_vf_guid)
+                       return -EOPNOTSUPP;
+ 
+               return handle_vf_guid(dev, ivt, IFLA_VF_IB_PORT_GUID);
+       }
+ 
         return err;
   }
   
@@@ -2970,7 -2947,6 +3006,7 @@@ int ndo_dflt_fdb_dump(struct sk_buff *s
         nlmsg_populate_fdb(skb, cb, dev, &idx, &dev->mc);
   out:
         netif_addr_unlock_bh(dev);
+ +      cb->args[1] = err;
         return idx;
   }
   EXPORT_SYMBOL(ndo_dflt_fdb_dump);
@@@ -3004,7 -2980,6 +3040,7 @@@ static int rtnl_fdb_dump(struct sk_buf
                 ops = br_dev->netdev_ops;
         }
   
+ +      cb->args[1] = 0;
         for_each_netdev(net, dev) {
                 if (brport_idx && (dev->ifindex != brport_idx))
                         continue;
@@@ -3032,16 -3007,12 +3068,16 @@@
                                 idx = cops->ndo_fdb_dump(skb, cb, br_dev, dev,
                                                          idx);
                 }
+ +              if (cb->args[1] == -EMSGSIZE)
+ +                      break;
   
                 if (dev->netdev_ops->ndo_fdb_dump)
                         idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, NULL,
                                                             idx);
                 else
                         idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx);
+ +              if (cb->args[1] == -EMSGSIZE)
+ +                      break;
   
                 cops = NULL;
         }
author	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 22 Mar 2016 22:48:44 +0000 (15:48 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 22 Mar 2016 22:48:44 +0000 (15:48 -0700)
		1	2
MAINTAINERS	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/infiniband/core/sa_query.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/infiniband/hw/mlx5/main.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/infiniband/ulp/srpt/ib_srpt.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/intel/i40e/i40e.h	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/intel/i40e/i40e_main.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/intel/i40e/i40e_type.h	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/mellanox/mlx5/core/cmd.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/mellanox/mlx5/core/main.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/staging/rdma/hfi1/chip.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/staging/rdma/hfi1/diag.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/staging/rdma/hfi1/driver.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/staging/rdma/hfi1/efivar.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/staging/rdma/hfi1/file_ops.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/staging/rdma/hfi1/init.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/staging/rdma/hfi1/mad.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/staging/rdma/hfi1/pcie.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/staging/rdma/hfi1/pio_copy.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/staging/rdma/hfi1/user_sdma.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/mlx5/device.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/mlx5/driver.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/mlx5/mlx5_ifc.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/netdevice.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/uapi/linux/if_link.h	patch \|	diff1 \|	diff2 \|	blob \| history
net/core/rtnetlink.c	patch \|	diff1 \|	diff2 \|	blob \| history