Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 10 Oct 2016 00:04:33 +0000 (17:04 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 10 Oct 2016 00:04:33 +0000 (17:04 -0700)
Pull main rdma updates from Doug Ledford:
 "This is the main pull request for the rdma stack this release.  The
  code has been through 0day and I had it tagged for linux-next testing
  for a couple days.

  Summary:

   - updates to mlx5

   - updates to mlx4 (two conflicts, both minor and easily resolved)

   - updates to iw_cxgb4 (one conflict, not so obvious to resolve,
     proper resolution is to keep the code in cxgb4_main.c as it is in
     Linus' tree as attach_uld was refactored and moved into
     cxgb4_uld.c)

   - improvements to uAPI (moved vendor specific API elements to uAPI
     area)

   - add hns-roce driver and hns and hns-roce ACPI reset support

   - conversion of all rdma code away from deprecated
     create_singlethread_workqueue

   - security improvement: remove unsafe ib_get_dma_mr (breaks lustre in
     staging)"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma: (75 commits)
  staging/lustre: Disable InfiniBand support
  iw_cxgb4: add fast-path for small REG_MR operations
  cxgb4: advertise support for FR_NSMR_TPTE_WR
  IB/core: correctly handle rdma_rw_init_mrs() failure
  IB/srp: Fix infinite loop when FMR sg[0].offset != 0
  IB/srp: Remove an unused argument
  IB/core: Improve ib_map_mr_sg() documentation
  IB/mlx4: Fix possible vl/sl field mismatch in LRH header in QP1 packets
  IB/mthca: Move user vendor structures
  IB/nes: Move user vendor structures
  IB/ocrdma: Move user vendor structures
  IB/mlx4: Move user vendor structures
  IB/cxgb4: Move user vendor structures
  IB/cxgb3: Move user vendor structures
  IB/mlx5: Move and decouple user vendor structures
  IB/{core,hw}: Add constant for node_desc
  ipoib: Make ipoib_warn ratelimited
  IB/mlx4/alias_GUID: Remove deprecated create_singlethread_workqueue
  IB/ipoib_verbs: Remove deprecated create_singlethread_workqueue
  IB/ipoib: Remove deprecated create_singlethread_workqueue
  ...

40 files changed:
1  2 
MAINTAINERS
drivers/infiniband/core/cma.c
drivers/infiniband/core/multicast.c
drivers/infiniband/hw/cxgb4/cm.c
drivers/infiniband/hw/cxgb4/cq.c
drivers/infiniband/hw/cxgb4/iw_cxgb4.h
drivers/infiniband/hw/cxgb4/qp.c
drivers/infiniband/hw/cxgb4/t4.h
drivers/infiniband/hw/hfi1/verbs.c
drivers/infiniband/hw/i40iw/i40iw_cm.c
drivers/infiniband/hw/i40iw/i40iw_main.c
drivers/infiniband/hw/mlx4/cq.c
drivers/infiniband/hw/mlx4/mad.c
drivers/infiniband/hw/mlx4/main.c
drivers/infiniband/hw/mlx4/mcg.c
drivers/infiniband/hw/mlx4/mlx4_ib.h
drivers/infiniband/hw/mlx4/qp.c
drivers/infiniband/hw/mlx5/cq.c
drivers/infiniband/hw/mlx5/main.c
drivers/infiniband/hw/mlx5/mlx5_ib.h
drivers/infiniband/hw/mlx5/qp.c
drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
drivers/infiniband/hw/qib/qib_verbs.c
drivers/infiniband/ulp/ipoib/ipoib.h
drivers/infiniband/ulp/ipoib/ipoib_main.c
drivers/infiniband/ulp/isert/ib_isert.c
drivers/infiniband/ulp/srpt/ib_srpt.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
drivers/net/ethernet/mellanox/mlx4/fw.c
drivers/nvme/host/rdma.c
drivers/nvme/target/rdma.c
drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
include/linux/mlx4/device.h
include/rdma/ib_verbs.h
net/sunrpc/xprtrdma/verbs.c

diff --combined MAINTAINERS
index 5327bbe00d13b4ab961710308f3a75d940878066,cc77b9ab9d526f96e53bee5f811d9a3f6fc049c0..a009e004f8f776abbd705e3a2984066e6e0b79ef
@@@ -807,7 -807,6 +807,7 @@@ M: Laura Abbott <labbott@redhat.com
  M:    Sumit Semwal <sumit.semwal@linaro.org>
  L:    devel@driverdev.osuosl.org
  S:    Supported
 +F:    Documentation/devicetree/bindings/staging/ion/
  F:    drivers/staging/android/ion
  F:    drivers/staging/android/uapi/ion.h
  F:    drivers/staging/android/uapi/ion_test.h
@@@ -819,11 -818,11 +819,11 @@@ L:      alsa-devel@alsa-project.org (moderat
  S:    Maintained
  F:    sound/aoa/
  
 -APEX EMBEDDED SYSTEMS STX104 DAC DRIVER
 +APEX EMBEDDED SYSTEMS STX104 IIO DRIVER
  M:    William Breathitt Gray <vilhelm.gray@gmail.com>
  L:    linux-iio@vger.kernel.org
  S:    Maintained
 -F:    drivers/iio/dac/stx104.c
 +F:    drivers/iio/adc/stx104.c
  
  APM DRIVER
  M:    Jiri Kosina <jikos@kernel.org>
@@@ -866,13 -865,6 +866,13 @@@ F:       drivers/net/phy/mdio-xgene.
  F:    Documentation/devicetree/bindings/net/apm-xgene-enet.txt
  F:    Documentation/devicetree/bindings/net/apm-xgene-mdio.txt
  
 +APPLIED MICRO (APM) X-GENE SOC PMU
 +M:    Tai Nguyen <ttnguyen@apm.com>
 +S:    Supported
 +F:    drivers/perf/xgene_pmu.c
 +F:    Documentation/perf/xgene-pmu.txt
 +F:    Documentation/devicetree/bindings/perf/apm-xgene-pmu.txt
 +
  APTINA CAMERA SENSOR PLL
  M:    Laurent Pinchart <Laurent.pinchart@ideasonboard.com>
  L:    linux-media@vger.kernel.org
@@@ -898,15 -890,6 +898,15 @@@ S:       Supporte
  F:    drivers/gpu/drm/arc/
  F:    Documentation/devicetree/bindings/display/snps,arcpgu.txt
  
 +ARM ARCHITECTED TIMER DRIVER
 +M:    Mark Rutland <mark.rutland@arm.com>
 +M:    Marc Zyngier <marc.zyngier@arm.com>
 +L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 +S:    Maintained
 +F:    arch/arm/include/asm/arch_timer.h
 +F:    arch/arm64/include/asm/arch_timer.h
 +F:    drivers/clocksource/arm_arch_timer.c
 +
  ARM HDLCD DRM DRIVER
  M:    Liviu Dudau <liviu.dudau@arm.com>
  S:    Supported
@@@ -929,17 -912,15 +929,17 @@@ F:      arch/arm/include/asm/floppy.
  
  ARM PMU PROFILING AND DEBUGGING
  M:    Will Deacon <will.deacon@arm.com>
 -R:    Mark Rutland <mark.rutland@arm.com>
 +M:    Mark Rutland <mark.rutland@arm.com>
  S:    Maintained
 +L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  F:    arch/arm*/kernel/perf_*
  F:    arch/arm/oprofile/common.c
  F:    arch/arm*/kernel/hw_breakpoint.c
  F:    arch/arm*/include/asm/hw_breakpoint.h
  F:    arch/arm*/include/asm/perf_event.h
 -F:    drivers/perf/arm_pmu.c
 +F:    drivers/perf/*
  F:    include/linux/perf/arm_pmu.h
 +F:    Documentation/devicetree/bindings/arm/pmu.txt
  
  ARM PORT
  M:    Russell King <linux@armlinux.org.uk>
@@@ -1010,7 -991,6 +1010,7 @@@ M:       Chen-Yu Tsai <wens@csie.org
  L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  S:    Maintained
  N:    sun[x456789]i
 +F:    arch/arm/boot/dts/ntc-gr8*
  
  ARM/Allwinner SoC Clock Support
  M:    Emilio López <emilio@elopez.com.ar>
@@@ -1142,11 -1122,6 +1142,11 @@@ F:    drivers/hwtracing/coresight/
  F:    Documentation/trace/coresight.txt
  F:    Documentation/devicetree/bindings/arm/coresight.txt
  F:    Documentation/ABI/testing/sysfs-bus-coresight-devices-*
 +F:    tools/perf/arch/arm/util/pmu.c
 +F:    tools/perf/arch/arm/util/auxtrace.c
 +F:    tools/perf/arch/arm/util/cs-etm.c
 +F:    tools/perf/arch/arm/util/cs-etm.h
 +F:    tools/perf/util/cs-etm.h
  
  ARM/CORGI MACHINE SUPPORT
  M:    Richard Purdie <rpurdie@rpsys.net>
@@@ -1467,7 -1442,6 +1467,7 @@@ F:      arch/arm/mach-orion5x/ts78xx-
  ARM/OXNAS platform support
  M:    Neil Armstrong <narmstrong@baylibre.com>
  L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 +L:    linux-oxnas@lists.tuxfamily.org (moderated for non-subscribers)
  S:    Maintained
  F:    arch/arm/mach-oxnas/
  F:    arch/arm/boot/dts/oxnas*
@@@ -1649,8 -1623,7 +1649,8 @@@ N:      rockchi
  
  ARM/SAMSUNG EXYNOS ARM ARCHITECTURES
  M:    Kukjin Kim <kgene@kernel.org>
 -M:    Krzysztof Kozlowski <k.kozlowski@samsung.com>
 +M:    Krzysztof Kozlowski <krzk@kernel.org>
 +R:    Javier Martinez Canillas <javier@osg.samsung.com>
  L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  L:    linux-samsung-soc@vger.kernel.org (moderated for non-subscribers)
  S:    Maintained
@@@ -1670,6 -1643,7 +1670,6 @@@ F:      drivers/*/*s3c64xx
  F:    drivers/*/*s5pv210*
  F:    drivers/memory/samsung/*
  F:    drivers/soc/samsung/*
 -F:    drivers/spi/spi-s3c*
  F:    Documentation/arm/Samsung/
  F:    Documentation/devicetree/bindings/arm/samsung/
  F:    Documentation/devicetree/bindings/sram/samsung-sram.txt
@@@ -1857,7 -1831,6 +1857,7 @@@ T:      git git://git.kernel.org/pub/scm/lin
  ARM/UNIPHIER ARCHITECTURE
  M:    Masahiro Yamada <yamada.masahiro@socionext.com>
  L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 +T:    git git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-uniphier.git
  S:    Maintained
  F:    arch/arm/boot/dts/uniphier*
  F:    arch/arm/include/asm/hardware/cache-uniphier.h
@@@ -1865,10 -1838,8 +1865,10 @@@ F:    arch/arm/mach-uniphier
  F:    arch/arm/mm/cache-uniphier.c
  F:    arch/arm64/boot/dts/socionext/
  F:    drivers/bus/uniphier-system-bus.c
 +F:    drivers/clk/uniphier/
  F:    drivers/i2c/busses/i2c-uniphier*
  F:    drivers/pinctrl/uniphier/
 +F:    drivers/reset/reset-uniphier.c
  F:    drivers/tty/serial/8250/8250_uniphier.c
  N:    uniphier
  
@@@ -2003,13 -1974,6 +2003,13 @@@ S:    Maintaine
  F:    drivers/media/i2c/as3645a.c
  F:    include/media/i2c/as3645a.h
  
 +ASAHI KASEI AK8974 DRIVER
 +M:    Linus Walleij <linus.walleij@linaro.org>
 +L:    linux-iio@vger.kernel.org
 +W:    http://www.akm.com/
 +S:    Supported
 +F:    drivers/iio/magnetometer/ak8974.c
 +
  ASC7621 HARDWARE MONITOR DRIVER
  M:    George Joseph <george.joseph@fairview5.com>
  L:    linux-hwmon@vger.kernel.org
@@@ -2148,6 -2112,11 +2148,6 @@@ M:     Ludovic Desroches <ludovic.desroches
  S:    Maintained
  F:    drivers/mmc/host/atmel-mci.c
  
 -ATMEL AT91 / AT32 SERIAL DRIVER
 -M:    Nicolas Ferre <nicolas.ferre@atmel.com>
 -S:    Supported
 -F:    drivers/tty/serial/atmel_serial.c
 -
  ATMEL AT91 SAMA5D2-Compatible Shutdown Controller
  M:    Nicolas Ferre <nicolas.ferre@atmel.com>
  S:    Supported
@@@ -2482,7 -2451,6 +2482,7 @@@ T:      git git://git.kernel.org/pub/scm/lin
  S:    Maintained
  F:    block/
  F:    kernel/trace/blktrace.c
 +F:    lib/sbitmap.c
  
  BLOCK2MTD DRIVER
  M:    Joern Engel <joern@lazybastard.org>
@@@ -2516,7 -2484,7 +2516,7 @@@ F:      include/net/bluetooth
  BONDING DRIVER
  M:    Jay Vosburgh <j.vosburgh@gmail.com>
  M:    Veaceslav Falico <vfalico@gmail.com>
 -M:    Andy Gospodarek <gospo@cumulusnetworks.com>
 +M:    Andy Gospodarek <andy@greyhouse.net>
  L:    netdev@vger.kernel.org
  W:    http://sourceforge.net/projects/bonding/
  S:    Supported
@@@ -2531,7 -2499,7 +2531,7 @@@ S:      Supporte
  F:    kernel/bpf/
  
  BROADCOM B44 10/100 ETHERNET DRIVER
 -M:    Gary Zambrano <zambrano@broadcom.com>
 +M:    Michael Chan <michael.chan@broadcom.com>
  L:    netdev@vger.kernel.org
  S:    Supported
  F:    drivers/net/ethernet/broadcom/b44.*
@@@ -2606,13 -2574,6 +2606,13 @@@ F:    arch/arm/mach-bcm/bcm_5301x.
  F:    arch/arm/boot/dts/bcm5301x*.dtsi
  F:    arch/arm/boot/dts/bcm470*
  
 +BROADCOM BCM53573 ARM ARCHITECTURE
 +M:    RafaÅ‚ MiÅ‚ecki <rafal@milecki.pl>
 +L:    linux-arm-kernel@lists.infradead.org
 +S:    Maintained
 +F:    arch/arm/boot/dts/bcm53573*
 +F:    arch/arm/boot/dts/bcm47189*
 +
  BROADCOM BCM63XX ARM ARCHITECTURE
  M:    Florian Fainelli <f.fainelli@gmail.com>
  M:    bcm-kernel-feedback-list@broadcom.com
@@@ -2926,14 -2887,6 +2926,14 @@@ S:    Maintaine
  F:    drivers/iio/light/cm*
  F:    Documentation/devicetree/bindings/i2c/trivial-devices.txt
  
 +CAVIUM I2C DRIVER
 +M:    Jan Glauber <jglauber@cavium.com>
 +M:    David Daney <david.daney@cavium.com>
 +W:    http://www.cavium.com
 +S:    Supported
 +F:    drivers/i2c/busses/i2c-octeon*
 +F:    drivers/i2c/busses/i2c-thunderx*
 +
  CAVIUM LIQUIDIO NETWORK DRIVER
  M:     Derek Chickles <derek.chickles@caviumnetworks.com>
  M:     Satanand Burla <satananda.burla@caviumnetworks.com>
@@@ -3181,7 -3134,7 +3181,7 @@@ L:      cocci@systeme.lip6.fr (moderated fo
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/mmarek/kbuild.git misc
  W:    http://coccinelle.lip6.fr/
  S:    Supported
 -F:    Documentation/coccinelle.txt
 +F:    Documentation/dev-tools/coccinelle.rst
  F:    scripts/coccinelle/
  F:    scripts/coccicheck
  
@@@ -3207,7 -3160,6 +3207,7 @@@ COMMON CLK FRAMEWOR
  M:    Michael Turquette <mturquette@baylibre.com>
  M:    Stephen Boyd <sboyd@codeaurora.org>
  L:    linux-clk@vger.kernel.org
 +Q:    http://patchwork.kernel.org/project/linux-clk/list/
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/clk/linux.git
  S:    Maintained
  F:    Documentation/devicetree/bindings/clock/
@@@ -3295,7 -3247,7 +3295,7 @@@ F:      kernel/cpuset.
  CONTROL GROUP - MEMORY RESOURCE CONTROLLER (MEMCG)
  M:    Johannes Weiner <hannes@cmpxchg.org>
  M:    Michal Hocko <mhocko@kernel.org>
 -M:    Vladimir Davydov <vdavydov@virtuozzo.com>
 +M:    Vladimir Davydov <vdavydov.dev@gmail.com>
  L:    cgroups@vger.kernel.org
  L:    linux-mm@kvack.org
  S:    Maintained
@@@ -3316,7 -3268,7 +3316,7 @@@ S:      Maintaine
  F:    drivers/net/wan/cosa*
  
  CPMAC ETHERNET DRIVER
 -M:    Florian Fainelli <florian@openwrt.org>
 +M:    Florian Fainelli <f.fainelli@gmail.com>
  L:    netdev@vger.kernel.org
  S:    Maintained
  F:    drivers/net/ethernet/ti/cpmac.c
@@@ -3328,7 -3280,6 +3328,7 @@@ L:      linux-pm@vger.kernel.or
  S:    Maintained
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git
  T:    git git://git.linaro.org/people/vireshk/linux.git (For ARM Updates)
 +F:    Documentation/cpu-freq/
  F:    drivers/cpufreq/
  F:    include/linux/cpufreq.h
  
@@@ -3509,6 -3460,7 +3509,7 @@@ L:      linux-rdma@vger.kernel.or
  W:    http://www.openfabrics.org
  S:    Supported
  F:    drivers/infiniband/hw/cxgb3/
+ F:    include/uapi/rdma/cxgb3-abi.h
  
  CXGB4 ETHERNET DRIVER (CXGB4)
  M:    Hariprasad S <hariprasad@chelsio.com>
@@@ -3530,6 -3482,7 +3531,7 @@@ L:      linux-rdma@vger.kernel.or
  W:    http://www.openfabrics.org
  S:    Supported
  F:    drivers/infiniband/hw/cxgb4/
+ F:    include/uapi/rdma/cxgb4-abi.h
  
  CXGB4VF ETHERNET DRIVER (CXGB4VF)
  M:    Casey Leedom <leedom@chelsio.com>
@@@ -3540,14 -3493,14 +3542,14 @@@ F:   drivers/net/ethernet/chelsio/cxgb4vf
  
  CXL (IBM Coherent Accelerator Processor Interface CAPI) DRIVER
  M:    Ian Munsie <imunsie@au1.ibm.com>
 -M:    Michael Neuling <mikey@neuling.org>
 +M:    Frederic Barrat <fbarrat@linux.vnet.ibm.com>
  L:    linuxppc-dev@lists.ozlabs.org
  S:    Supported
 +F:    arch/powerpc/platforms/powernv/pci-cxl.c
  F:    drivers/misc/cxl/
  F:    include/misc/cxl*
  F:    include/uapi/misc/cxl.h
  F:    Documentation/powerpc/cxl.txt
 -F:    Documentation/powerpc/cxl.txt
  F:    Documentation/ABI/testing/sysfs-class-cxl
  
  CXLFLASH (IBM Coherent Accelerator Processor Interface CAPI Flash) SCSI DRIVER
@@@ -3810,8 -3763,8 +3812,8 @@@ F:      drivers/leds/leds-da90??.
  F:    drivers/mfd/da903x.c
  F:    drivers/mfd/da90??-*.c
  F:    drivers/mfd/da91??-*.c
 -F:    drivers/power/da9052-battery.c
 -F:    drivers/power/da91??-*.c
 +F:    drivers/power/supply/da9052-battery.c
 +F:    drivers/power/supply/da91??-*.c
  F:    drivers/regulator/da903x.c
  F:    drivers/regulator/da9???-regulator.[ch]
  F:    drivers/rtc/rtc-da90??.c
@@@ -3827,12 -3780,6 +3829,12 @@@ F:    include/linux/regulator/da9211.
  F:    include/sound/da[79]*.h
  F:    sound/soc/codecs/da[79]*.[ch]
  
 +DIAMOND SYSTEMS GPIO-MM GPIO DRIVER
 +M:    William Breathitt Gray <vilhelm.gray@gmail.com>
 +L:    linux-gpio@vger.kernel.org
 +S:    Maintained
 +F:    drivers/gpio/gpio-gpio-mm.c
 +
  DIGI NEO AND CLASSIC PCI PRODUCTS
  M:    Lidza Louina <lidza.louina@gmail.com>
  M:    Mark Hounschell <markh@compro.net>
@@@ -4458,6 -4405,7 +4460,6 @@@ F:      Documentation/filesystems/ecryptfs.t
  F:    fs/ecryptfs/
  
  EDAC-CORE
 -M:    Doug Thompson <dougthompson@xmission.com>
  M:    Borislav Petkov <bp@alien8.de>
  M:    Mauro Carvalho Chehab <mchehab@s-opensource.com>
  M:    Mauro Carvalho Chehab <mchehab@kernel.org>
@@@ -4470,12 -4418,14 +4472,12 @@@ F:   drivers/edac
  F:    include/linux/edac.h
  
  EDAC-AMD64
 -M:    Doug Thompson <dougthompson@xmission.com>
  M:    Borislav Petkov <bp@alien8.de>
  L:    linux-edac@vger.kernel.org
  S:    Maintained
  F:    drivers/edac/amd64_edac*
  
  EDAC-CALXEDA
 -M:    Doug Thompson <dougthompson@xmission.com>
  M:    Robert Richter <rric@kernel.org>
  L:    linux-edac@vger.kernel.org
  S:    Maintained
@@@ -4491,21 -4441,17 +4493,21 @@@ F:   drivers/edac/octeon_edac
  
  EDAC-E752X
  M:    Mark Gross <mark.gross@intel.com>
 -M:    Doug Thompson <dougthompson@xmission.com>
  L:    linux-edac@vger.kernel.org
  S:    Maintained
  F:    drivers/edac/e752x_edac.c
  
  EDAC-E7XXX
 -M:    Doug Thompson <dougthompson@xmission.com>
  L:    linux-edac@vger.kernel.org
  S:    Maintained
  F:    drivers/edac/e7xxx_edac.c
  
 +EDAC-FSL_DDR
 +M:    York Sun <york.sun@nxp.com>
 +L:    linux-edac@vger.kernel.org
 +S:    Maintained
 +F:    drivers/edac/fsl_ddr_edac.*
 +
  EDAC-GHES
  M:    Mauro Carvalho Chehab <mchehab@s-opensource.com>
  M:    Mauro Carvalho Chehab <mchehab@kernel.org>
@@@ -4520,11 -4466,13 +4522,11 @@@ S:   Maintaine
  F:    drivers/edac/i82443bxgx_edac.c
  
  EDAC-I3000
 -M:    Jason Uhlenkott <juhlenko@akamai.com>
  L:    linux-edac@vger.kernel.org
 -S:    Maintained
 +S:    Orphan
  F:    drivers/edac/i3000_edac.c
  
  EDAC-I5000
 -M:    Doug Thompson <dougthompson@xmission.com>
  L:    linux-edac@vger.kernel.org
  S:    Maintained
  F:    drivers/edac/i5000_edac.c
@@@ -4588,12 -4536,6 +4590,12 @@@ L:    linux-edac@vger.kernel.or
  S:    Maintained
  F:    drivers/edac/sb_edac.c
  
 +EDAC-SKYLAKE
 +M:    Tony Luck <tony.luck@intel.com>
 +L:    linux-edac@vger.kernel.org
 +S:    Maintained
 +F:    drivers/edac/skx_edac.c
 +
  EDAC-XGENE
  APPLIED MICRO (APM) X-GENE SOC EDAC
  M:     Loc Ho <lho@apm.com>
@@@ -4636,13 -4578,6 +4638,13 @@@ M:    Peter Jones <pjones@redhat.com
  S:    Maintained
  F:    drivers/video/fbdev/efifb.c
  
 +EFI TEST DRIVER
 +L:    linux-efi@vger.kernel.org
 +M:    Ivan Hu <ivan.hu@canonical.com>
 +M:    Matt Fleming <matt@codeblueprint.co.uk>
 +S:    Maintained
 +F:    drivers/firmware/efi/test/
 +
  EFS FILESYSTEM
  W:    http://aeschi.ch.eu.org/efs/
  S:    Orphan
@@@ -4910,7 -4845,6 +4912,7 @@@ F:      tools/firewire
  
  FIRMWARE LOADER (request_firmware)
  M:    Ming Lei <ming.lei@canonical.com>
 +M:    Luis R. Rodriguez <mcgrof@kernel.org>
  L:    linux-kernel@vger.kernel.org
  S:    Maintained
  F:    Documentation/firmware_class/
@@@ -5131,9 -5065,10 +5133,9 @@@ F:     include/linux/fscrypto.
  
  F2FS FILE SYSTEM
  M:    Jaegeuk Kim <jaegeuk@kernel.org>
 -M:    Changman Lee <cm224.lee@samsung.com>
 -R:    Chao Yu <yuchao0@huawei.com>
 +M:    Chao Yu <yuchao0@huawei.com>
  L:    linux-f2fs-devel@lists.sourceforge.net
 -W:    http://en.wikipedia.org/wiki/F2FS
 +W:    https://f2fs.wiki.kernel.org/
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git
  S:    Maintained
  F:    Documentation/filesystems/f2fs.txt
@@@ -5195,7 -5130,7 +5197,7 @@@ GCOV BASED KERNEL PROFILIN
  M:    Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
  S:    Maintained
  F:    kernel/gcov/
 -F:    Documentation/gcov.txt
 +F:    Documentation/dev-tools/gcov.rst
  
  GDT SCSI DISK ARRAY CONTROLLER DRIVER
  M:    Achim Leubner <achim_leubner@adaptec.com>
@@@ -5312,13 -5247,6 +5314,13 @@@ L:    linux-input@vger.kernel.or
  S:    Maintained
  F:    drivers/input/touchscreen/goodix.c
  
 +GPIO MOCKUP DRIVER
 +M:    Bamvor Jian Zhang <bamvor.zhangjian@linaro.org>
 +L:    linux-gpio@vger.kernel.org
 +S:    Maintained
 +F:    drivers/gpio/gpio-mockup.c
 +F:    tools/testing/selftests/gpio/
 +
  GPIO SUBSYSTEM
  M:    Linus Walleij <linus.walleij@linaro.org>
  M:    Alexandre Courbot <gnurou@gmail.com>
@@@ -5350,77 -5278,6 +5352,77 @@@ L:    netdev@vger.kernel.or
  S:    Maintained
  F:    drivers/net/ethernet/aeroflex/
  
 +GREYBUS SUBSYSTEM
 +M:    Johan Hovold <johan@kernel.org>
 +M:    Alex Elder <elder@kernel.org>
 +M:    Greg Kroah-Hartman <gregkh@linuxfoundation.org>
 +S:    Maintained
 +F:    drivers/staging/greybus/
 +
 +GREYBUS AUDIO PROTOCOLS DRIVERS
 +M:    Vaibhav Agarwal <vaibhav.sr@gmail.com>
 +M:    Mark Greer <mgreer@animalcreek.com>
 +S:    Maintained
 +F:    drivers/staging/greybus/audio_apbridgea.c
 +F:    drivers/staging/greybus/audio_apbridgea.h
 +F:    drivers/staging/greybus/audio_codec.c
 +F:    drivers/staging/greybus/audio_codec.h
 +F:    drivers/staging/greybus/audio_gb.c
 +F:    drivers/staging/greybus/audio_manager.c
 +F:    drivers/staging/greybus/audio_manager.h
 +F:    drivers/staging/greybus/audio_manager_module.c
 +F:    drivers/staging/greybus/audio_manager_private.h
 +F:    drivers/staging/greybus/audio_manager_sysfs.c
 +F:    drivers/staging/greybus/audio_module.c
 +F:    drivers/staging/greybus/audio_topology.c
 +
 +GREYBUS PROTOCOLS DRIVERS
 +M:    Rui Miguel Silva <rmfrfs@gmail.com>
 +S:    Maintained
 +F:    drivers/staging/greybus/sdio.c
 +F:    drivers/staging/greybus/light.c
 +F:    drivers/staging/greybus/gpio.c
 +F:    drivers/staging/greybus/power_supply.c
 +F:    drivers/staging/greybus/spi.c
 +F:    drivers/staging/greybus/spilib.c
 +
 +GREYBUS PROTOCOLS DRIVERS
 +M:    Bryan O'Donoghue <pure.logic@nexus-software.ie>
 +S:    Maintained
 +F:    drivers/staging/greybus/loopback.c
 +F:    drivers/staging/greybus/timesync.c
 +F:    drivers/staging/greybus/timesync_platform.c
 +
 +GREYBUS PROTOCOLS DRIVERS
 +M:    Viresh Kumar <vireshk@kernel.org>
 +S:    Maintained
 +F:    drivers/staging/greybus/authentication.c
 +F:    drivers/staging/greybus/bootrom.c
 +F:    drivers/staging/greybus/firmware.h
 +F:    drivers/staging/greybus/fw-core.c
 +F:    drivers/staging/greybus/fw-download.c
 +F:    drivers/staging/greybus/fw-managament.c
 +F:    drivers/staging/greybus/greybus_authentication.h
 +F:    drivers/staging/greybus/greybus_firmware.h
 +F:    drivers/staging/greybus/hid.c
 +F:    drivers/staging/greybus/i2c.c
 +F:    drivers/staging/greybus/spi.c
 +F:    drivers/staging/greybus/spilib.c
 +F:    drivers/staging/greybus/spilib.h
 +
 +GREYBUS PROTOCOLS DRIVERS
 +M:    David Lin <dtwlin@gmail.com>
 +S:    Maintained
 +F:    drivers/staging/greybus/uart.c
 +F:    drivers/staging/greybus/log.c
 +
 +GREYBUS PLATFORM DRIVERS
 +M:    Vaibhav Hiremath <hvaibhav.linux@gmail.com>
 +S:    Maintained
 +F:    drivers/staging/greybus/arche-platform.c
 +F:    drivers/staging/greybus/arche-apb-ctrl.c
 +F:    drivers/staging/greybus/arche_platform.h
 +
  GSPCA FINEPIX SUBDRIVER
  M:    Frank Zago <frank@zago.net>
  L:    linux-media@vger.kernel.org
@@@ -5712,6 -5569,14 +5714,14 @@@ S:    Maintaine
  F:    drivers/net/ethernet/hisilicon/
  F:    Documentation/devicetree/bindings/net/hisilicon*.txt
  
+ HISILICON ROCE DRIVER
+ M:    Lijun Ou <oulijun@huawei.com>
+ M:    Wei Hu(Xavier) <xavier.huwei@huawei.com>
+ L:    linux-rdma@vger.kernel.org
+ S:    Maintained
+ F:    drivers/infiniband/hw/hns/
+ F:    Documentation/devicetree/bindings/infiniband/hisilicon-hns-roce.txt
  HISILICON SAS Controller
  M:    John Garry <john.garry@huawei.com>
  W:    http://www.hisilicon.com
@@@ -5721,9 -5586,10 +5731,9 @@@ F:     Documentation/devicetree/bindings/sc
  
  HOST AP DRIVER
  M:    Jouni Malinen <j@w1.fi>
 -L:    hostap@shmoo.com (subscribers-only)
  L:    linux-wireless@vger.kernel.org
 -W:    http://hostap.epitest.fi/
 -S:    Maintained
 +W:    http://w1.fi/hostap-driver.html
 +S:    Obsolete
  F:    drivers/net/wireless/intersil/hostap/
  
  HP COMPAQ TC1100 TABLET WMI EXTRAS DRIVER
@@@ -5760,7 -5626,7 +5770,7 @@@ M:      Sebastian Reichel <sre@kernel.org
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/sre/linux-hsi.git
  S:    Maintained
  F:    Documentation/ABI/testing/sysfs-bus-hsi
 -F:    Documentation/hsi.txt
 +F:    Documentation/device-drivers/serial-interfaces.rst
  F:    drivers/hsi/
  F:    include/linux/hsi/
  F:    include/uapi/linux/hsi/
@@@ -5814,8 -5680,6 +5824,8 @@@ S:      Maintaine
  F:    Documentation/i2c/i2c-topology
  F:    Documentation/i2c/muxes/
  F:    Documentation/devicetree/bindings/i2c/i2c-mux*
 +F:    Documentation/devicetree/bindings/i2c/i2c-arb*
 +F:    Documentation/devicetree/bindings/i2c/i2c-gate*
  F:    drivers/i2c/i2c-mux.c
  F:    drivers/i2c/muxes/
  F:    include/linux/i2c-mux.h
@@@ -6233,13 -6097,6 +6243,13 @@@ T:    git git://git.kernel.org/pub/scm/lin
  S:    Supported
  F:    drivers/idle/intel_idle.c
  
 +INTEL INTEGRATED SENSOR HUB DRIVER
 +M:    Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
 +M:    Jiri Kosina <jikos@kernel.org>
 +L:    linux-input@vger.kernel.org
 +S:    Maintained
 +F:    drivers/hid/intel-ish-hid/
 +
  INTEL PSTATE DRIVER
  M:    Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  M:    Len Brown <lenb@kernel.org>
@@@ -6248,7 -6105,7 +6258,7 @@@ S:      Supporte
  F:    drivers/cpufreq/intel_pstate.c
  
  INTEL FRAMEBUFFER DRIVER (excluding 810 and 815)
 -M:    Maik Broemme <mbroemme@plusserver.de>
 +M:    Maik Broemme <mbroemme@libmpq.org>
  L:    linux-fbdev@vger.kernel.org
  S:    Maintained
  F:    Documentation/fb/intelfb.txt
@@@ -6750,7 -6607,7 +6760,7 @@@ L:      kasan-dev@googlegroups.co
  S:    Maintained
  F:    arch/*/include/asm/kasan.h
  F:    arch/*/mm/kasan_init*
 -F:    Documentation/kasan.txt
 +F:    Documentation/dev-tools/kasan.rst
  F:    include/linux/kasan*.h
  F:    lib/test_kasan.c
  F:    mm/kasan/
@@@ -6966,7 -6823,7 +6976,7 @@@ KMEMCHEC
  M:    Vegard Nossum <vegardno@ifi.uio.no>
  M:    Pekka Enberg <penberg@kernel.org>
  S:    Maintained
 -F:    Documentation/kmemcheck.txt
 +F:    Documentation/dev-tools/kmemcheck.rst
  F:    arch/x86/include/asm/kmemcheck.h
  F:    arch/x86/mm/kmemcheck/
  F:    include/linux/kmemcheck.h
@@@ -6975,7 -6832,7 +6985,7 @@@ F:      mm/kmemcheck.
  KMEMLEAK
  M:    Catalin Marinas <catalin.marinas@arm.com>
  S:    Maintained
 -F:    Documentation/kmemleak.txt
 +F:    Documentation/dev-tools/kmemleak.rst
  F:    include/linux/kmemleak.h
  F:    mm/kmemleak.c
  F:    mm/kmemleak-test.c
@@@ -7588,8 -7445,9 +7598,8 @@@ F:      Documentation/hwmon/max2075
  F:    drivers/hwmon/max20751.c
  
  MAX6650 HARDWARE MONITOR AND FAN CONTROLLER DRIVER
 -M:    "Hans J. Koch" <hjk@hansjkoch.de>
  L:    linux-hwmon@vger.kernel.org
 -S:    Maintained
 +S:    Orphan
  F:    Documentation/hwmon/max6650
  F:    drivers/hwmon/max6650.c
  
@@@ -7610,12 -7468,11 +7620,12 @@@ F:   Documentation/devicetree/bindings/so
  F:    sound/soc/codecs/max9860.*
  
  MAXIM MUIC CHARGER DRIVERS FOR EXYNOS BASED BOARDS
 -M:    Krzysztof Kozlowski <k.kozlowski@samsung.com>
 +M:    Krzysztof Kozlowski <krzk@kernel.org>
 +M:    Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
  L:    linux-pm@vger.kernel.org
  S:    Supported
 -F:    drivers/power/max14577_charger.c
 -F:    drivers/power/max77693_charger.c
 +F:    drivers/power/supply/max14577_charger.c
 +F:    drivers/power/supply/max77693_charger.c
  
  MAXIM MAX77802 MULTIFUNCTION PMIC DEVICE DRIVERS
  M:    Javier Martinez Canillas <javier@osg.samsung.com>
@@@ -7627,8 -7484,7 +7637,8 @@@ F:      include/dt-bindings/*/*max77802.
  
  MAXIM PMIC AND MUIC DRIVERS FOR EXYNOS BASED BOARDS
  M:    Chanwoo Choi <cw00.choi@samsung.com>
 -M:    Krzysztof Kozlowski <k.kozlowski@samsung.com>
 +M:    Krzysztof Kozlowski <krzk@kernel.org>
 +M:    Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
  L:    linux-kernel@vger.kernel.org
  S:    Supported
  F:    drivers/*/max14577*.c
@@@ -7660,12 -7516,6 +7670,12 @@@ L:    linux-iio@vger.kernel.or
  S:    Maintained
  F:    drivers/iio/potentiometer/mcp4531.c
  
 +MEASUREMENT COMPUTING CIO-DAC IIO DRIVER
 +M:    William Breathitt Gray <vilhelm.gray@gmail.com>
 +L:    linux-iio@vger.kernel.org
 +S:    Maintained
 +F:    drivers/iio/dac/cio-dac.c
 +
  MEDIA DRIVERS FOR RENESAS - FCP
  M:    Laurent Pinchart <laurent.pinchart@ideasonboard.com>
  L:    linux-media@vger.kernel.org
@@@ -7818,26 -7668,13 +7828,26 @@@ W:   http://www.mellanox.co
  Q:    http://patchwork.ozlabs.org/project/netdev/list/
  F:    drivers/net/ethernet/mellanox/mlxsw/
  
 +MELLANOX MLXCPLD LED DRIVER
 +M:    Vadim Pasternak <vadimp@mellanox.com>
 +L:    linux-leds@vger.kernel.org
 +S:    Supported
 +F:    drivers/leds/leds-mlxcpld.c
 +F:    Documentation/leds/leds-mlxcpld.txt
 +
 +MELLANOX PLATFORM DRIVER
 +M:      Vadim Pasternak <vadimp@mellanox.com>
 +L:      platform-driver-x86@vger.kernel.org
 +S:      Supported
 +F:      arch/x86/platform/mellanox/mlx-platform.c
 +
  SOFT-ROCE DRIVER (rxe)
  M:    Moni Shoua <monis@mellanox.com>
  L:    linux-rdma@vger.kernel.org
  S:    Supported
  W:    https://github.com/SoftRoCE/rxe-dev/wiki/rxe-dev:-Home
  Q:    http://patchwork.kernel.org/project/linux-rdma/list/
 -F:    drivers/infiniband/hw/rxe/
 +F:    drivers/infiniband/sw/rxe/
  F:    include/uapi/rdma/rdma_user_rxe.h
  
  MEMBARRIER SUPPORT
@@@ -7916,12 -7753,6 +7926,12 @@@ T:    git git://git.monstr.eu/linux-2.6-mi
  S:    Supported
  F:    arch/microblaze/
  
 +MICROCHIP / ATMEL AT91 / AT32 SERIAL DRIVER
 +M:    Richard Genoud <richard.genoud@gmail.com>
 +S:    Maintained
 +F:    drivers/tty/serial/atmel_serial.c
 +F:    include/linux/atmel_serial.h
 +
  MICROSOFT SURFACE PRO 3 BUTTON DRIVER
  M:    Chen Yu <yu.c.chen@intel.com>
  L:    platform-driver-x86@vger.kernel.org
@@@ -7970,6 -7801,7 +7980,7 @@@ Q:      http://patchwork.ozlabs.org/project/
  S:    Supported
  F:    drivers/net/ethernet/mellanox/mlx4/
  F:    include/linux/mlx4/
+ F:    include/uapi/rdma/mlx4-abi.h
  
  MELLANOX MLX4 IB driver
  M:    Yishai Hadas <yishaih@mellanox.com>
@@@ -7990,6 -7822,7 +8001,7 @@@ Q:      http://patchwork.ozlabs.org/project/
  S:    Supported
  F:    drivers/net/ethernet/mellanox/mlx5/core/
  F:    include/linux/mlx5/
+ F:    include/uapi/rdma/mlx5-abi.h
  
  MELLANOX MLX5 IB driver
  M:    Matan Barak <matanb@mellanox.com>
@@@ -8008,18 -7841,6 +8020,18 @@@ W:    http://www.melexis.co
  S:    Supported
  F:    drivers/iio/temperature/mlx90614.c
  
 +MICROSEMI SMART ARRAY SMARTPQI DRIVER (smartpqi)
 +M:    Don Brace <don.brace@microsemi.com>
 +L:    esc.storagedev@microsemi.com
 +L:    linux-scsi@vger.kernel.org
 +S:    Supported
 +F:    drivers/scsi/smartpqi/smartpqi*.[ch]
 +F:    drivers/scsi/smartpqi/Kconfig
 +F:    drivers/scsi/smartpqi/Makefile
 +F:    include/linux/cciss*.h
 +F:    include/uapi/linux/cciss*.h
 +F:    Documentation/scsi/smartpqi.txt
 +
  MN88472 MEDIA DRIVER
  M:    Antti Palosaari <crope@iki.fi>
  L:    linux-media@vger.kernel.org
@@@ -8145,7 -7966,6 +8157,7 @@@ MULTIFUNCTION DEVICES (MFD
  M:    Lee Jones <lee.jones@linaro.org>
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/lee/mfd.git
  S:    Supported
 +F:    Documentation/devicetree/bindings/mfd/
  F:    drivers/mfd/
  F:    include/linux/mfd/
  
@@@ -8232,16 -8052,20 +8244,16 @@@ M:   Michael Schmitz <schmitzmic@gmail.co
  L:    linux-scsi@vger.kernel.org
  S:    Maintained
  F:    Documentation/scsi/g_NCR5380.txt
 -F:    Documentation/scsi/dtc3x80.txt
  F:    drivers/scsi/NCR5380.*
  F:    drivers/scsi/arm/cumana_1.c
  F:    drivers/scsi/arm/oak.c
  F:    drivers/scsi/atari_scsi.*
  F:    drivers/scsi/dmx3191d.c
 -F:    drivers/scsi/dtc.*
  F:    drivers/scsi/g_NCR5380.*
  F:    drivers/scsi/g_NCR5380_mmio.c
  F:    drivers/scsi/mac_scsi.*
 -F:    drivers/scsi/pas16.*
  F:    drivers/scsi/sun3_scsi.*
  F:    drivers/scsi/sun3_scsi_vme.c
 -F:    drivers/scsi/t128.*
  
  NCR DUAL 700 SCSI DRIVER (MICROCHANNEL)
  M:    "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com>
@@@ -8262,6 -8086,7 +8274,7 @@@ L:      linux-rdma@vger.kernel.or
  W:    http://www.intel.com/Products/Server/Adapters/Server-Cluster/Server-Cluster-overview.htm
  S:    Supported
  F:    drivers/infiniband/hw/nes/
+ F:    include/uapi/rdma/nes-abi.h
  
  NETEM NETWORK EMULATOR
  M:    Stephen Hemminger <stephen@networkplumber.org>
@@@ -8339,15 -8164,6 +8352,15 @@@ S:    Maintaine
  W:    https://fedorahosted.org/dropwatch/
  F:    net/core/drop_monitor.c
  
 +NETWORKING [DSA]
 +M:    Andrew Lunn <andrew@lunn.ch>
 +M:    Vivien Didelot <vivien.didelot@savoirfairelinux.com>
 +M:    Florian Fainelli <f.fainelli@gmail.com>
 +S:    Maintained
 +F:    net/dsa/
 +F:    include/net/dsa.h
 +F:    drivers/net/dsa/
 +
  NETWORKING [GENERAL]
  M:    "David S. Miller" <davem@davemloft.net>
  L:    netdev@vger.kernel.org
@@@ -8530,11 -8346,11 +8543,11 @@@ R:   Pali Rohár <pali.rohar@gmail.com
  F:    include/linux/power/bq2415x_charger.h
  F:    include/linux/power/bq27xxx_battery.h
  F:    include/linux/power/isp1704_charger.h
 -F:    drivers/power/bq2415x_charger.c
 -F:    drivers/power/bq27xxx_battery.c
 -F:    drivers/power/bq27xxx_battery_i2c.c
 -F:    drivers/power/isp1704_charger.c
 -F:    drivers/power/rx51_battery.c
 +F:    drivers/power/supply/bq2415x_charger.c
 +F:    drivers/power/supply/bq27xxx_battery.c
 +F:    drivers/power/supply/bq27xxx_battery_i2c.c
 +F:    drivers/power/supply/isp1704_charger.c
 +F:    drivers/power/supply/rx51_battery.c
  
  NTB DRIVER CORE
  M:    Jon Mason <jdmason@kudzu.us>
@@@ -8923,7 -8739,7 +8936,7 @@@ F:      drivers/oprofile
  F:    include/linux/oprofile.h
  
  ORACLE CLUSTER FILESYSTEM 2 (OCFS2)
 -M:    Mark Fasheh <mfasheh@suse.com>
 +M:    Mark Fasheh <mfasheh@versity.com>
  M:    Joel Becker <jlbec@evilplan.org>
  L:    ocfs2-devel@oss.oracle.com (moderated for non-subscribers)
  W:    http://ocfs2.wiki.kernel.org
@@@ -9035,7 -8851,6 +9048,7 @@@ S:      Supporte
  F:    Documentation/virtual/paravirt_ops.txt
  F:    arch/*/kernel/paravirt*
  F:    arch/*/include/asm/paravirt.h
 +F:    include/linux/hypervisor.h
  
  PARIDE DRIVERS FOR PARALLEL PORT IDE DEVICES
  M:    Tim Waugh <tim@cyberelk.net>
@@@ -9291,15 -9106,6 +9304,15 @@@ S:    Maintaine
  F:    Documentation/devicetree/bindings/pci/hisilicon-pcie.txt
  F:    drivers/pci/host/pcie-hisi.c
  
 +PCIE DRIVER FOR ROCKCHIP
 +M:    Shawn Lin <shawn.lin@rock-chips.com>
 +M:    Wenrui Li <wenrui.li@rock-chips.com>
 +L:    linux-pci@vger.kernel.org
 +L:    linux-rockchip@lists.infradead.org
 +S:    Maintained
 +F:    Documentation/devicetree/bindings/pci/rockchip-pcie.txt
 +F:    drivers/pci/host/pcie-rockchip.c
 +
  PCIE DRIVER FOR QUALCOMM MSM
  M:     Stanimir Varbanov <svarbanov@mm-sol.com>
  L:     linux-pci@vger.kernel.org
@@@ -9447,14 -9253,12 +9460,14 @@@ F:   drivers/pinctrl/sh-pfc
  
  PIN CONTROLLER - SAMSUNG
  M:    Tomasz Figa <tomasz.figa@gmail.com>
 -M:    Krzysztof Kozlowski <k.kozlowski@samsung.com>
 +M:    Krzysztof Kozlowski <krzk@kernel.org>
  M:    Sylwester Nawrocki <s.nawrocki@samsung.com>
  L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  L:    linux-samsung-soc@vger.kernel.org (moderated for non-subscribers)
  S:    Maintained
  F:    drivers/pinctrl/samsung/
 +F:    include/dt-bindings/pinctrl/samsung.h
 +F:    Documentation/devicetree/bindings/pinctrl/samsung-pinctrl.txt
  
  PIN CONTROLLER - SINGLE
  M:    Tony Lindgren <tony@atomide.com>
@@@ -9545,12 -9349,16 +9558,12 @@@ F:   drivers/powercap
  
  POWER SUPPLY CLASS/SUBSYSTEM and DRIVERS
  M:    Sebastian Reichel <sre@kernel.org>
 -M:    Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
 -M:    David Woodhouse <dwmw2@infradead.org>
  L:    linux-pm@vger.kernel.org
 -T:    git git://git.infradead.org/battery-2.6.git
 +T:    git git://git.kernel.org/pub/scm/linux/kernel/git/sre/linux-power-supply.git
  S:    Maintained
 -F:    Documentation/devicetree/bindings/power/
 -F:    Documentation/devicetree/bindings/power_supply/
 +F:    Documentation/devicetree/bindings/power/supply/
  F:    include/linux/power_supply.h
 -F:    drivers/power/
 -X:    drivers/power/avs/
 +F:    drivers/power/supply/
  
  POWER STATE COORDINATION INTERFACE (PSCI)
  M:    Mark Rutland <mark.rutland@arm.com>
@@@ -9886,12 -9694,6 +9899,12 @@@ T:    git git://git.kernel.org/pub/scm/lin
  S:    Supported
  F:    drivers/net/wireless/ath/ath10k/
  
 +QUALCOMM EMAC GIGABIT ETHERNET DRIVER
 +M:    Timur Tabi <timur@codeaurora.org>
 +L:    netdev@vger.kernel.org
 +S:    Supported
 +F:    drivers/net/ethernet/qualcomm/emac/
 +
  QUALCOMM HEXAGON ARCHITECTURE
  M:    Richard Kuo <rkuo@codeaurora.org>
  L:    linux-hexagon@vger.kernel.org
@@@ -10112,12 -9914,6 +10125,12 @@@ F:  drivers/rpmsg
  F:    Documentation/rpmsg.txt
  F:    include/linux/rpmsg.h
  
 +RENESAS CLOCK DRIVERS
 +M:    Geert Uytterhoeven <geert+renesas@glider.be>
 +L:    linux-renesas-soc@vger.kernel.org
 +S:    Supported
 +F:    drivers/clk/renesas/
 +
  RENESAS ETHERNET DRIVERS
  R:    Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
  L:    netdev@vger.kernel.org
@@@ -10297,8 -10093,8 +10310,8 @@@ S:   Supporte
  F:    drivers/s390/cio/
  
  S390 DASD DRIVER
 -M:    Stefan Weinhuber <wein@de.ibm.com>
 -M:    Stefan Haberland <stefan.haberland@de.ibm.com>
 +M:    Stefan Haberland <sth@linux.vnet.ibm.com>
 +M:    Jan Hoeppner <hoeppner@linux.vnet.ibm.com>
  L:    linux-s390@vger.kernel.org
  W:    http://www.ibm.com/developerworks/linux/linux390/
  S:    Supported
@@@ -10391,7 -10187,7 +10404,7 @@@ S:   Maintaine
  F:    drivers/platform/x86/samsung-laptop.c
  
  SAMSUNG AUDIO (ASoC) DRIVERS
 -M:    Krzysztof Kozlowski <k.kozlowski@samsung.com>
 +M:    Krzysztof Kozlowski <krzk@kernel.org>
  M:    Sangbeom Kim <sbkim73@samsung.com>
  M:    Sylwester Nawrocki <s.nawrocki@samsung.com>
  L:    alsa-devel@alsa-project.org (moderated for non-subscribers)
@@@ -10406,8 -10202,7 +10419,8 @@@ F:   drivers/video/fbdev/s3c-fb.
  
  SAMSUNG MULTIFUNCTION PMIC DEVICE DRIVERS
  M:    Sangbeom Kim <sbkim73@samsung.com>
 -M:    Krzysztof Kozlowski <k.kozlowski@samsung.com>
 +M:    Krzysztof Kozlowski <krzk@kernel.org>
 +M:    Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
  L:    linux-kernel@vger.kernel.org
  L:    linux-samsung-soc@vger.kernel.org
  S:    Supported
@@@ -10462,23 -10257,9 +10475,23 @@@ F: drivers/nfc/s3fwrn
  SAMSUNG SOC CLOCK DRIVERS
  M:    Sylwester Nawrocki <s.nawrocki@samsung.com>
  M:    Tomasz Figa <tomasz.figa@gmail.com>
 +M:    Chanwoo Choi <cw00.choi@samsung.com>
  S:    Supported
  L:    linux-samsung-soc@vger.kernel.org (moderated for non-subscribers)
  F:    drivers/clk/samsung/
 +F:    include/dt-bindings/clock/exynos*.h
 +F:    Documentation/devicetree/bindings/clock/exynos*.txt
 +
 +SAMSUNG SPI DRIVERS
 +M:    Kukjin Kim <kgene@kernel.org>
 +M:    Krzysztof Kozlowski <krzk@kernel.org>
 +M:    Andi Shyti <andi.shyti@samsung.com>
 +L:    linux-spi@vger.kernel.org
 +L:    linux-samsung-soc@vger.kernel.org (moderated for non-subscribers)
 +S:    Maintained
 +F:    Documentation/devicetree/bindings/spi/spi-samsung.txt
 +F:    drivers/spi/spi-s3c*
 +F:    include/linux/platform_data/spi-s3c64xx.h
  
  SAMSUNG SXGBE DRIVERS
  M:    Byungho An <bh74.an@samsung.com>
@@@ -10565,8 -10346,8 +10578,8 @@@ F:   drivers/thunderbolt
  TI BQ27XXX POWER SUPPLY DRIVER
  R:    Andrew F. Davis <afd@ti.com>
  F:    include/linux/power/bq27xxx_battery.h
 -F:    drivers/power/bq27xxx_battery.c
 -F:    drivers/power/bq27xxx_battery_i2c.c
 +F:    drivers/power/supply/bq27xxx_battery.c
 +F:    drivers/power/supply/bq27xxx_battery_i2c.c
  
  TIMEKEEPING, CLOCKSOURCE CORE, NTP, ALARMTIMER
  M:    John Stultz <john.stultz@linaro.org>
@@@ -10794,12 -10575,12 +10807,12 @@@ S:        Maintaine
  F:    drivers/misc/phantom.c
  F:    include/uapi/linux/phantom.h
  
 -SERVER ENGINES 10Gbps iSCSI - BladeEngine 2 DRIVER
 -M:    Jayamohan Kallickal <jayamohan.kallickal@avagotech.com>
 -M:    Ketan Mukadam <ketan.mukadam@avagotech.com>
 -M:    John Soni Jose <sony.john@avagotech.com>
 +Emulex 10Gbps iSCSI - OneConnect DRIVER
 +M:    Subbu Seetharaman <subbu.seetharaman@broadcom.com>
 +M:    Ketan Mukadam <ketan.mukadam@broadcom.com>
 +M:    Jitendra Bhivare <jitendra.bhivare@broadcom.com>
  L:    linux-scsi@vger.kernel.org
 -W:    http://www.avagotech.com
 +W:    http://www.broadcom.com
  S:    Supported
  F:    drivers/scsi/be2iscsi/
  
@@@ -10821,6 -10602,7 +10834,7 @@@ L:   linux-rdma@vger.kernel.or
  W:    http://www.emulex.com
  S:    Supported
  F:    drivers/infiniband/hw/ocrdma/
+ F:    include/uapi/rdma/ocrdma-abi.h
  
  SFC NETWORK DRIVER
  M:    Solarflare linux maintainers <linux-net-drivers@solarflare.com>
@@@ -11326,7 -11108,6 +11340,7 @@@ F:   Documentation/spi
  F:    drivers/spi/
  F:    include/linux/spi/
  F:    include/uapi/linux/spi/
 +F:    tools/spi/
  
  SPIDERNET NETWORK DRIVER for CELL
  M:    Ishizaki Kou <kou.ishizaki@toshiba.co.jp>
@@@ -11397,7 -11178,6 +11411,7 @@@ F:   drivers/staging/media/lirc
  STAGING - LUSTRE PARALLEL FILESYSTEM
  M:    Oleg Drokin <oleg.drokin@intel.com>
  M:    Andreas Dilger <andreas.dilger@intel.com>
 +M:    James Simmons <jsimmons@infradead.org>
  L:    lustre-devel@lists.lustre.org (moderated for non-subscribers)
  W:    http://wiki.lustre.org/
  S:    Maintained
@@@ -11424,6 -11204,13 +11438,6 @@@ M:  Florian Schilhabel <florian.c.schilh
  S:    Odd Fixes
  F:    drivers/staging/rtl8712/
  
 -STAGING - REALTEK RTL8723U WIRELESS DRIVER
 -M:    Larry Finger <Larry.Finger@lwfinger.net>
 -M:    Jes Sorensen <Jes.Sorensen@redhat.com>
 -L:    linux-wireless@vger.kernel.org
 -S:    Maintained
 -F:    drivers/staging/rtl8723au/
 -
  STAGING - SILICON MOTION SM750 FRAME BUFFER DRIVER
  M:    Sudip Mukherjee <sudipm.mukherjee@gmail.com>
  M:    Teddy Wang <teddy.wang@siliconmotion.com>
@@@ -11454,8 -11241,12 +11468,8 @@@ S:  Odd Fixe
  F:    drivers/staging/vt665?/
  
  STAGING - WILC1000 WIFI DRIVER
 -M:    Johnny Kim <johnny.kim@atmel.com>
 -M:    Austin Shin <austin.shin@atmel.com>
 -M:    Chris Park <chris.park@atmel.com>
 -M:    Tony Cho <tony.cho@atmel.com>
 -M:    Glen Lee <glen.lee@atmel.com>
 -M:    Leo Kim <leo.kim@atmel.com>
 +M:    Aditya Shankar <aditya.shankar@microchip.com>
 +M:    Ganesh Krishna <ganesh.krishna@microchip.com>
  L:    linux-wireless@vger.kernel.org
  S:    Supported
  F:    drivers/staging/wilc1000/
@@@ -11563,14 -11354,6 +11577,14 @@@ T: git git://git.kernel.org/pub/scm/lin
  S:    Supported
  F:    drivers/mfd/syscon.c
  
 +SYSTEM RESET/SHUTDOWN DRIVERS
 +M:    Sebastian Reichel <sre@kernel.org>
 +L:    linux-pm@vger.kernel.org
 +T:    git git://git.kernel.org/pub/scm/linux/kernel/git/sre/linux-power-supply.git
 +S:    Maintained
 +F:    Documentation/devicetree/bindings/power/reset/
 +F:    drivers/power/reset/
 +
  SYSV FILESYSTEM
  M:    Christoph Hellwig <hch@infradead.org>
  S:    Maintained
@@@ -11831,7 -11614,7 +11845,7 @@@ F:   Documentation/devicetree/bindings/th
  THERMAL/CPU_COOLING
  M:    Amit Daniel Kachhap <amit.kachhap@gmail.com>
  M:    Viresh Kumar <viresh.kumar@linaro.org>
 -M:    Javi Merino <javi.merino@arm.com>
 +M:    Javi Merino <javi.merino@kernel.org>
  L:    linux-pm@vger.kernel.org
  S:    Supported
  F:    Documentation/thermal/cpu-cooling-api.txt
@@@ -11919,7 -11702,7 +11933,7 @@@ F:   include/linux/platform_data/lp855x.
  TI LP8727 CHARGER DRIVER
  M:    Milo Kim <milo.kim@ti.com>
  S:    Maintained
 -F:    drivers/power/lp8727_charger.c
 +F:    drivers/power/supply/lp8727_charger.c
  F:    include/linux/platform_data/lp8727.h
  
  TI LP8788 MFD DRIVER
@@@ -11928,7 -11711,7 +11942,7 @@@ S:   Maintaine
  F:    drivers/iio/adc/lp8788_adc.c
  F:    drivers/leds/leds-lp8788.c
  F:    drivers/mfd/lp8788*.c
 -F:    drivers/power/lp8788-charger.c
 +F:    drivers/power/supply/lp8788-charger.c
  F:    drivers/regulator/lp8788-*.c
  F:    include/linux/mfd/lp8788*.h
  
@@@ -12197,6 -11980,12 +12211,6 @@@ S:  Maintaine
  F:    drivers/tc/
  F:    include/linux/tc.h
  
 -U14-34F SCSI DRIVER
 -M:    Dario Ballabio <ballabio_dario@emc.com>
 -L:    linux-scsi@vger.kernel.org
 -S:    Maintained
 -F:    drivers/scsi/u14-34f.c
 -
  UBI FILE SYSTEM (UBIFS)
  M:    Richard Weinberger <richard@nod.at>
  M:    Artem Bityutskiy <dedekind1@gmail.com>
@@@ -12392,7 -12181,7 +12406,7 @@@ S:   Maintaine
  F:    drivers/net/usb/lan78xx.*
  
  USB MASS STORAGE DRIVER
 -M:    Matthew Dharm <mdharm-usb@one-eyed-alien.net>
 +M:    Alan Stern <stern@rowland.harvard.edu>
  L:    linux-usb@vger.kernel.org
  L:    usb-storage@lists.one-eyed-alien.net
  S:    Maintained
@@@ -12476,7 -12265,6 +12490,7 @@@ F:   drivers/net/usb/rtl8150.
  USB SERIAL SUBSYSTEM
  M:    Johan Hovold <johan@kernel.org>
  L:    linux-usb@vger.kernel.org
 +T:    git git://git.kernel.org/pub/scm/linux/kernel/git/johan/usb-serial.git
  S:    Maintained
  F:    Documentation/usb/usb-serial.txt
  F:    drivers/usb/serial/
@@@ -12490,7 -12278,6 +12504,7 @@@ F:   drivers/net/usb/smsc75xx.
  
  USB SMSC95XX ETHERNET DRIVER
  M:    Steve Glendinning <steve.glendinning@shawell.net>
 +M:    Microchip Linux Driver Support <UNGLinuxDriver@microchip.com>
  L:    netdev@vger.kernel.org
  S:    Maintained
  F:    drivers/net/usb/smsc95xx.*
@@@ -12597,6 -12384,7 +12611,6 @@@ F:   fs/hostfs
  F:    fs/hppfs/
  
  USERSPACE I/O (UIO)
 -M:    "Hans J. Koch" <hjk@hansjkoch.de>
  M:    Greg Kroah-Hartman <gregkh@linuxfoundation.org>
  S:    Maintained
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
@@@ -12778,7 -12566,7 +12792,7 @@@ F:   include/linux/if_*vlan.
  F:    net/8021q/
  
  VLYNQ BUS
 -M:    Florian Fainelli <florian@openwrt.org>
 +M:    Florian Fainelli <f.fainelli@gmail.com>
  L:    openwrt-devel@lists.openwrt.org (subscribers-only)
  S:    Maintained
  F:    drivers/vlynq/vlynq.c
@@@ -12924,6 -12712,12 +12938,6 @@@ F:  drivers/watchdog
  F:    include/linux/watchdog.h
  F:    include/uapi/linux/watchdog.h
  
 -WD7000 SCSI DRIVER
 -M:    Miroslav Zagorac <zaga@fly.cc.fer.hr>
 -L:    linux-scsi@vger.kernel.org
 -S:    Maintained
 -F:    drivers/scsi/wd7000.c
 -
  WIIMOTE HID DRIVER
  M:    David Herrmann <dh.herrmann@googlemail.com>
  L:    linux-input@vger.kernel.org
@@@ -12993,7 -12787,7 +13007,7 @@@ F:   drivers/input/touchscreen/wm97*.
  F:    drivers/mfd/arizona*
  F:    drivers/mfd/wm*.c
  F:    drivers/mfd/cs47l24*
 -F:    drivers/power/wm83*.c
 +F:    drivers/power/supply/wm83*.c
  F:    drivers/rtc/rtc-wm83*.c
  F:    drivers/regulator/wm8*.c
  F:    drivers/video/backlight/wm83*_bl.c
@@@ -13145,10 -12939,11 +13159,10 @@@ F:        arch/x86/xen/*swiotlb
  F:    drivers/xen/*swiotlb*
  
  XFS FILESYSTEM
 -P:    Silicon Graphics Inc
  M:    Dave Chinner <david@fromorbit.com>
 -M:    xfs@oss.sgi.com
 -L:    xfs@oss.sgi.com
 -W:    http://oss.sgi.com/projects/xfs
 +M:    linux-xfs@vger.kernel.org
 +L:    linux-xfs@vger.kernel.org
 +W:    http://xfs.org/
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs.git
  S:    Supported
  F:    Documentation/filesystems/xfs.txt
index 5f65a78b27c9c56c52414dddbe2d16e12f123905,8954792f1acc6317cb28f215797bc02848184ce9..36bf50ebb187eb40195f0ede0d2d3abcc47d8ccb
@@@ -2462,24 -2462,18 +2462,24 @@@ static int cma_resolve_iboe_route(struc
  
        if (addr->dev_addr.bound_dev_if) {
                ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if);
 -              if (!ndev)
 -                      return -ENODEV;
 +              if (!ndev) {
 +                      ret = -ENODEV;
 +                      goto err2;
 +              }
  
                if (ndev->flags & IFF_LOOPBACK) {
                        dev_put(ndev);
 -                      if (!id_priv->id.device->get_netdev)
 -                              return -EOPNOTSUPP;
 +                      if (!id_priv->id.device->get_netdev) {
 +                              ret = -EOPNOTSUPP;
 +                              goto err2;
 +                      }
  
                        ndev = id_priv->id.device->get_netdev(id_priv->id.device,
                                                              id_priv->id.port_num);
 -                      if (!ndev)
 -                              return -ENODEV;
 +                      if (!ndev) {
 +                              ret = -ENODEV;
 +                              goto err2;
 +                      }
                }
  
                route->path_rec->net = &init_net;
@@@ -4369,7 -4363,7 +4369,7 @@@ static int __init cma_init(void
  {
        int ret;
  
-       cma_wq = create_singlethread_workqueue("rdma_cm");
+       cma_wq = alloc_ordered_workqueue("rdma_cm", WQ_MEM_RECLAIM);
        if (!cma_wq)
                return -ENOMEM;
  
index 51c79b2fb0b837f4fbfe5a9672e98dd1dd304f61,49ecde98a3d90af3d8ee2e55db755247d15a9ba0..e51b739f6ea3d2e9bd6d8b776d5ea5d8e49844ba
@@@ -106,6 -106,7 +106,6 @@@ struct mcast_group 
        atomic_t                refcount;
        enum mcast_group_state  state;
        struct ib_sa_query      *query;
 -      int                     query_id;
        u16                     pkey_index;
        u8                      leave_state;
        int                     retries;
@@@ -339,7 -340,11 +339,7 @@@ static int send_join(struct mcast_grou
                                       member->multicast.comp_mask,
                                       3000, GFP_KERNEL, join_handler, group,
                                       &group->query);
 -      if (ret >= 0) {
 -              group->query_id = ret;
 -              ret = 0;
 -      }
 -      return ret;
 +      return (ret > 0) ? 0 : ret;
  }
  
  static int send_leave(struct mcast_group *group, u8 leave_state)
                                       IB_SA_MCMEMBER_REC_JOIN_STATE,
                                       3000, GFP_KERNEL, leave_handler,
                                       group, &group->query);
 -      if (ret >= 0) {
 -              group->query_id = ret;
 -              ret = 0;
 -      }
 -      return ret;
 +      return (ret > 0) ? 0 : ret;
  }
  
  static void join_group(struct mcast_group *group, struct mcast_member *member,
@@@ -873,7 -882,7 +873,7 @@@ int mcast_init(void
  {
        int ret;
  
-       mcast_wq = create_singlethread_workqueue("ib_mcast");
+       mcast_wq = alloc_ordered_workqueue("ib_mcast", WQ_MEM_RECLAIM);
        if (!mcast_wq)
                return -ENOMEM;
  
index 71c8867ef66bf6c10e4eb94300fb4ca58a286629,c9661d8f11dcc381b71e53f6714c5a761a278c04..f1510cc76d2dbe7027e81f5495f53bd7b6588536
@@@ -49,7 -49,6 +49,7 @@@
  
  #include <rdma/ib_addr.h>
  
 +#include <libcxgb_cm.h>
  #include "iw_cxgb4.h"
  #include "clip_tbl.h"
  
@@@ -240,13 -239,15 +240,13 @@@ int c4iw_ofld_send(struct c4iw_rdev *rd
  
  static void release_tid(struct c4iw_rdev *rdev, u32 hwtid, struct sk_buff *skb)
  {
 -      struct cpl_tid_release *req;
 +      u32 len = roundup(sizeof(struct cpl_tid_release), 16);
  
 -      skb = get_skb(skb, sizeof *req, GFP_KERNEL);
 +      skb = get_skb(skb, len, GFP_KERNEL);
        if (!skb)
                return;
 -      req = (struct cpl_tid_release *) skb_put(skb, sizeof(*req));
 -      INIT_TP_WR(req, hwtid);
 -      OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_TID_RELEASE, hwtid));
 -      set_wr_txq(skb, CPL_PRIORITY_SETUP, 0);
 +
 +      cxgb_mk_tid_release(skb, len, hwtid, 0);
        c4iw_ofld_send(rdev, skb);
        return;
  }
@@@ -332,8 -333,6 +332,8 @@@ static void remove_ep_tid(struct c4iw_e
  
        spin_lock_irqsave(&ep->com.dev->lock, flags);
        _remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid, 0);
 +      if (idr_is_empty(&ep->com.dev->hwtid_idr))
 +              wake_up(&ep->com.dev->wait);
        spin_unlock_irqrestore(&ep->com.dev->lock, flags);
  }
  
@@@ -465,6 -464,72 +465,6 @@@ static struct net_device *get_real_dev(
        return rdma_vlan_dev_real_dev(egress_dev) ? : egress_dev;
  }
  
 -static int our_interface(struct c4iw_dev *dev, struct net_device *egress_dev)
 -{
 -      int i;
 -
 -      egress_dev = get_real_dev(egress_dev);
 -      for (i = 0; i < dev->rdev.lldi.nports; i++)
 -              if (dev->rdev.lldi.ports[i] == egress_dev)
 -                      return 1;
 -      return 0;
 -}
 -
 -static struct dst_entry *find_route6(struct c4iw_dev *dev, __u8 *local_ip,
 -                                   __u8 *peer_ip, __be16 local_port,
 -                                   __be16 peer_port, u8 tos,
 -                                   __u32 sin6_scope_id)
 -{
 -      struct dst_entry *dst = NULL;
 -
 -      if (IS_ENABLED(CONFIG_IPV6)) {
 -              struct flowi6 fl6;
 -
 -              memset(&fl6, 0, sizeof(fl6));
 -              memcpy(&fl6.daddr, peer_ip, 16);
 -              memcpy(&fl6.saddr, local_ip, 16);
 -              if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL)
 -                      fl6.flowi6_oif = sin6_scope_id;
 -              dst = ip6_route_output(&init_net, NULL, &fl6);
 -              if (!dst)
 -                      goto out;
 -              if (!our_interface(dev, ip6_dst_idev(dst)->dev) &&
 -                  !(ip6_dst_idev(dst)->dev->flags & IFF_LOOPBACK)) {
 -                      dst_release(dst);
 -                      dst = NULL;
 -              }
 -      }
 -
 -out:
 -      return dst;
 -}
 -
 -static struct dst_entry *find_route(struct c4iw_dev *dev, __be32 local_ip,
 -                               __be32 peer_ip, __be16 local_port,
 -                               __be16 peer_port, u8 tos)
 -{
 -      struct rtable *rt;
 -      struct flowi4 fl4;
 -      struct neighbour *n;
 -
 -      rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip,
 -                                 peer_port, local_port, IPPROTO_TCP,
 -                                 tos, 0);
 -      if (IS_ERR(rt))
 -              return NULL;
 -      n = dst_neigh_lookup(&rt->dst, &peer_ip);
 -      if (!n)
 -              return NULL;
 -      if (!our_interface(dev, n->dev) &&
 -          !(n->dev->flags & IFF_LOOPBACK)) {
 -              neigh_release(n);
 -              dst_release(&rt->dst);
 -              return NULL;
 -      }
 -      neigh_release(n);
 -      return &rt->dst;
 -}
 -
  static void arp_failure_discard(void *handle, struct sk_buff *skb)
  {
        pr_err(MOD "ARP failure\n");
@@@ -639,32 -704,56 +639,32 @@@ static int send_flowc(struct c4iw_ep *e
  
  static int send_halfclose(struct c4iw_ep *ep)
  {
 -      struct cpl_close_con_req *req;
        struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list);
 -      int wrlen = roundup(sizeof *req, 16);
 +      u32 wrlen = roundup(sizeof(struct cpl_close_con_req), 16);
  
        PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
        if (WARN_ON(!skb))
                return -ENOMEM;
  
 -      set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
 -      t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
 -      req = (struct cpl_close_con_req *) skb_put(skb, wrlen);
 -      memset(req, 0, wrlen);
 -      INIT_TP_WR(req, ep->hwtid);
 -      OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_CLOSE_CON_REQ,
 -                                                  ep->hwtid));
 +      cxgb_mk_close_con_req(skb, wrlen, ep->hwtid, ep->txq_idx,
 +                            NULL, arp_failure_discard);
 +
        return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
  }
  
  static int send_abort(struct c4iw_ep *ep)
  {
 -      struct cpl_abort_req *req;
 -      int wrlen = roundup(sizeof *req, 16);
 +      u32 wrlen = roundup(sizeof(struct cpl_abort_req), 16);
        struct sk_buff *req_skb = skb_dequeue(&ep->com.ep_skb_list);
  
        PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
        if (WARN_ON(!req_skb))
                return -ENOMEM;
  
 -      set_wr_txq(req_skb, CPL_PRIORITY_DATA, ep->txq_idx);
 -      t4_set_arp_err_handler(req_skb, ep, abort_arp_failure);
 -      req = (struct cpl_abort_req *)skb_put(req_skb, wrlen);
 -      memset(req, 0, wrlen);
 -      INIT_TP_WR(req, ep->hwtid);
 -      OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid));
 -      req->cmd = CPL_ABORT_SEND_RST;
 -      return c4iw_l2t_send(&ep->com.dev->rdev, req_skb, ep->l2t);
 -}
 +      cxgb_mk_abort_req(req_skb, wrlen, ep->hwtid, ep->txq_idx,
 +                        ep, abort_arp_failure);
  
 -static void best_mtu(const unsigned short *mtus, unsigned short mtu,
 -                   unsigned int *idx, int use_ts, int ipv6)
 -{
 -      unsigned short hdr_size = (ipv6 ?
 -                                 sizeof(struct ipv6hdr) :
 -                                 sizeof(struct iphdr)) +
 -                                sizeof(struct tcphdr) +
 -                                (use_ts ?
 -                                 round_up(TCPOLEN_TIMESTAMP, 4) : 0);
 -      unsigned short data_size = mtu - hdr_size;
 -
 -      cxgb4_best_aligned_mtu(mtus, hdr_size, data_size, 8, idx);
 +      return c4iw_l2t_send(&ep->com.dev->rdev, req_skb, ep->l2t);
  }
  
  static int send_connect(struct c4iw_ep *ep)
        u64 opt0;
        u32 opt2;
        unsigned int mtu_idx;
 -      int wscale;
 +      u32 wscale;
        int win, sizev4, sizev6, wrlen;
        struct sockaddr_in *la = (struct sockaddr_in *)
                                 &ep->com.local_addr;
        }
        set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx);
  
 -      best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
 -               enable_tcp_timestamps,
 -               (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1);
 -      wscale = compute_wscale(rcv_win);
 +      cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
 +                    enable_tcp_timestamps,
 +                    (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1);
 +      wscale = cxgb_compute_wscale(rcv_win);
  
        /*
         * Specify the largest window that will fit in opt0. The
@@@ -1356,9 -1445,9 +1356,9 @@@ static void established_upcall(struct c
  
  static int update_rx_credits(struct c4iw_ep *ep, u32 credits)
  {
 -      struct cpl_rx_data_ack *req;
        struct sk_buff *skb;
 -      int wrlen = roundup(sizeof *req, 16);
 +      u32 wrlen = roundup(sizeof(struct cpl_rx_data_ack), 16);
 +      u32 credit_dack;
  
        PDBG("%s ep %p tid %u credits %u\n", __func__, ep, ep->hwtid, credits);
        skb = get_skb(NULL, wrlen, GFP_KERNEL);
        if (ep->rcv_win > RCV_BUFSIZ_M * 1024)
                credits += ep->rcv_win - RCV_BUFSIZ_M * 1024;
  
 -      req = (struct cpl_rx_data_ack *) skb_put(skb, wrlen);
 -      memset(req, 0, wrlen);
 -      INIT_TP_WR(req, ep->hwtid);
 -      OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK,
 -                                                  ep->hwtid));
 -      req->credit_dack = cpu_to_be32(credits | RX_FORCE_ACK_F |
 -                                     RX_DACK_CHANGE_F |
 -                                     RX_DACK_MODE_V(dack_mode));
 -      set_wr_txq(skb, CPL_PRIORITY_ACK, ep->ctrlq_idx);
 +      credit_dack = credits | RX_FORCE_ACK_F | RX_DACK_CHANGE_F |
 +                    RX_DACK_MODE_V(dack_mode);
 +
 +      cxgb_mk_rx_data_ack(skb, wrlen, ep->hwtid, ep->ctrlq_idx,
 +                          credit_dack);
 +
        c4iw_ofld_send(&ep->com.dev->rdev, skb);
        return credits;
  }
@@@ -1735,12 -1827,8 +1735,12 @@@ static int process_mpa_request(struct c
                                (ep->mpa_pkt + sizeof(*mpa));
                        ep->ird = ntohs(mpa_v2_params->ird) &
                                MPA_V2_IRD_ORD_MASK;
 +                      ep->ird = min_t(u32, ep->ird,
 +                                      cur_max_read_depth(ep->com.dev));
                        ep->ord = ntohs(mpa_v2_params->ord) &
                                MPA_V2_IRD_ORD_MASK;
 +                      ep->ord = min_t(u32, ep->ord,
 +                                      cur_max_read_depth(ep->com.dev));
                        PDBG("%s initiator ird %u ord %u\n", __func__, ep->ird,
                             ep->ord);
                        if (ntohs(mpa_v2_params->ird) & MPA_V2_PEER2PEER_MODEL)
@@@ -1878,7 -1966,7 +1878,7 @@@ static int send_fw_act_open_req(struct 
        struct sk_buff *skb;
        struct fw_ofld_connection_wr *req;
        unsigned int mtu_idx;
 -      int wscale;
 +      u32 wscale;
        struct sockaddr_in *sin;
        int win;
  
                        htons(FW_OFLD_CONNECTION_WR_CPLRXDATAACK_F);
        req->tcb.tx_max = (__force __be32) jiffies;
        req->tcb.rcv_adv = htons(1);
 -      best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
 -               enable_tcp_timestamps,
 -               (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1);
 -      wscale = compute_wscale(rcv_win);
 +      cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
 +                    enable_tcp_timestamps,
 +                    (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1);
 +      wscale = cxgb_compute_wscale(rcv_win);
  
        /*
         * Specify the largest window that will fit in opt0. The
@@@ -1960,6 -2048,15 +1960,6 @@@ static inline int act_open_has_tid(int 
                status != CPL_ERR_CONN_EXIST);
  }
  
 -/* Returns whether a CPL status conveys negative advice.
 - */
 -static int is_neg_adv(unsigned int status)
 -{
 -      return status == CPL_ERR_RTX_NEG_ADVICE ||
 -             status == CPL_ERR_PERSIST_NEG_ADVICE ||
 -             status == CPL_ERR_KEEPALV_NEG_ADVICE;
 -}
 -
  static char *neg_adv_str(unsigned int status)
  {
        switch (status) {
@@@ -2016,10 -2113,8 +2016,10 @@@ static int import_ep(struct c4iw_ep *ep
                }
                ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t,
                                        n, pdev, rt_tos2priority(tos));
 -              if (!ep->l2t)
 +              if (!ep->l2t) {
 +                      dev_put(pdev);
                        goto out;
 +              }
                ep->mtu = pdev->mtu;
                ep->tx_chan = cxgb4_port_chan(pdev);
                ep->smac_idx = cxgb4_tp_smt_idx(adapter_type,
@@@ -2115,21 -2210,16 +2115,21 @@@ static int c4iw_reconnect(struct c4iw_e
  
        /* find a route */
        if (ep->com.cm_id->m_local_addr.ss_family == AF_INET) {
 -              ep->dst = find_route(ep->com.dev, laddr->sin_addr.s_addr,
 -                                   raddr->sin_addr.s_addr, laddr->sin_port,
 -                                   raddr->sin_port, ep->com.cm_id->tos);
 +              ep->dst = cxgb_find_route(&ep->com.dev->rdev.lldi, get_real_dev,
 +                                        laddr->sin_addr.s_addr,
 +                                        raddr->sin_addr.s_addr,
 +                                        laddr->sin_port,
 +                                        raddr->sin_port, ep->com.cm_id->tos);
                iptype = 4;
                ra = (__u8 *)&raddr->sin_addr;
        } else {
 -              ep->dst = find_route6(ep->com.dev, laddr6->sin6_addr.s6_addr,
 -                                    raddr6->sin6_addr.s6_addr,
 -                                    laddr6->sin6_port, raddr6->sin6_port, 0,
 -                                    raddr6->sin6_scope_id);
 +              ep->dst = cxgb_find_route6(&ep->com.dev->rdev.lldi,
 +                                         get_real_dev,
 +                                         laddr6->sin6_addr.s6_addr,
 +                                         raddr6->sin6_addr.s6_addr,
 +                                         laddr6->sin6_port,
 +                                         raddr6->sin6_port, 0,
 +                                         raddr6->sin6_scope_id);
                iptype = 6;
                ra = (__u8 *)&raddr6->sin6_addr;
        }
@@@ -2201,7 -2291,7 +2201,7 @@@ static int act_open_rpl(struct c4iw_de
        PDBG("%s ep %p atid %u status %u errno %d\n", __func__, ep, atid,
             status, status2errno(status));
  
 -      if (is_neg_adv(status)) {
 +      if (cxgb_is_neg_adv(status)) {
                PDBG("%s Connection problems for atid %u status %u (%s)\n",
                     __func__, atid, status, neg_adv_str(status));
                ep->stats.connect_neg_adv++;
@@@ -2328,7 -2418,7 +2328,7 @@@ static int accept_cr(struct c4iw_ep *ep
        unsigned int mtu_idx;
        u64 opt0;
        u32 opt2;
 -      int wscale;
 +      u32 wscale;
        struct cpl_t5_pass_accept_rpl *rpl5 = NULL;
        int win;
        enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type;
        OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
                                                    ep->hwtid));
  
 -      best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
 -               enable_tcp_timestamps && req->tcpopt.tstamp,
 -               (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1);
 -      wscale = compute_wscale(rcv_win);
 +      cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
 +                    enable_tcp_timestamps && req->tcpopt.tstamp,
 +                    (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1);
 +      wscale = cxgb_compute_wscale(rcv_win);
  
        /*
         * Specify the largest window that will fit in opt0. The
@@@ -2424,6 -2514,42 +2424,6 @@@ static void reject_cr(struct c4iw_dev *
        return;
  }
  
 -static void get_4tuple(struct cpl_pass_accept_req *req, enum chip_type type,
 -                     int *iptype, __u8 *local_ip, __u8 *peer_ip,
 -                     __be16 *local_port, __be16 *peer_port)
 -{
 -      int eth_len = (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) ?
 -                    ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len)) :
 -                    T6_ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len));
 -      int ip_len = (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) ?
 -                   IP_HDR_LEN_G(be32_to_cpu(req->hdr_len)) :
 -                   T6_IP_HDR_LEN_G(be32_to_cpu(req->hdr_len));
 -      struct iphdr *ip = (struct iphdr *)((u8 *)(req + 1) + eth_len);
 -      struct ipv6hdr *ip6 = (struct ipv6hdr *)((u8 *)(req + 1) + eth_len);
 -      struct tcphdr *tcp = (struct tcphdr *)
 -                           ((u8 *)(req + 1) + eth_len + ip_len);
 -
 -      if (ip->version == 4) {
 -              PDBG("%s saddr 0x%x daddr 0x%x sport %u dport %u\n", __func__,
 -                   ntohl(ip->saddr), ntohl(ip->daddr), ntohs(tcp->source),
 -                   ntohs(tcp->dest));
 -              *iptype = 4;
 -              memcpy(peer_ip, &ip->saddr, 4);
 -              memcpy(local_ip, &ip->daddr, 4);
 -      } else {
 -              PDBG("%s saddr %pI6 daddr %pI6 sport %u dport %u\n", __func__,
 -                   ip6->saddr.s6_addr, ip6->daddr.s6_addr, ntohs(tcp->source),
 -                   ntohs(tcp->dest));
 -              *iptype = 6;
 -              memcpy(peer_ip, ip6->saddr.s6_addr, 16);
 -              memcpy(local_ip, ip6->daddr.s6_addr, 16);
 -      }
 -      *peer_port = tcp->source;
 -      *local_port = tcp->dest;
 -
 -      return;
 -}
 -
  static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
  {
        struct c4iw_ep *child_ep = NULL, *parent_ep;
                goto reject;
        }
  
 -      get_4tuple(req, parent_ep->com.dev->rdev.lldi.adapter_type, &iptype,
 -                 local_ip, peer_ip, &local_port, &peer_port);
 +      cxgb_get_4tuple(req, parent_ep->com.dev->rdev.lldi.adapter_type,
 +                      &iptype, local_ip, peer_ip, &local_port, &peer_port);
  
        /* Find output route */
        if (iptype == 4)  {
                     , __func__, parent_ep, hwtid,
                     local_ip, peer_ip, ntohs(local_port),
                     ntohs(peer_port), peer_mss);
 -              dst = find_route(dev, *(__be32 *)local_ip, *(__be32 *)peer_ip,
 -                               local_port, peer_port,
 -                               tos);
 +              dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev,
 +                                    *(__be32 *)local_ip, *(__be32 *)peer_ip,
 +                                    local_port, peer_port, tos);
        } else {
                PDBG("%s parent ep %p hwtid %u laddr %pI6 raddr %pI6 lport %d rport %d peer_mss %d\n"
                     , __func__, parent_ep, hwtid,
                     local_ip, peer_ip, ntohs(local_port),
                     ntohs(peer_port), peer_mss);
 -              dst = find_route6(dev, local_ip, peer_ip, local_port, peer_port,
 -                                PASS_OPEN_TOS_G(ntohl(req->tos_stid)),
 -                                ((struct sockaddr_in6 *)
 -                                &parent_ep->com.local_addr)->sin6_scope_id);
 +              dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev,
 +                              local_ip, peer_ip, local_port, peer_port,
 +                              PASS_OPEN_TOS_G(ntohl(req->tos_stid)),
 +                              ((struct sockaddr_in6 *)
 +                               &parent_ep->com.local_addr)->sin6_scope_id);
        }
        if (!dst) {
                printk(KERN_ERR MOD "%s - failed to find dst entry!\n",
@@@ -2706,18 -2831,18 +2706,18 @@@ static int peer_abort(struct c4iw_dev *
  {
        struct cpl_abort_req_rss *req = cplhdr(skb);
        struct c4iw_ep *ep;
 -      struct cpl_abort_rpl *rpl;
        struct sk_buff *rpl_skb;
        struct c4iw_qp_attributes attrs;
        int ret;
        int release = 0;
        unsigned int tid = GET_TID(req);
 +      u32 len = roundup(sizeof(struct cpl_abort_rpl), 16);
  
        ep = get_ep_from_tid(dev, tid);
        if (!ep)
                return 0;
  
 -      if (is_neg_adv(req->status)) {
 +      if (cxgb_is_neg_adv(req->status)) {
                PDBG("%s Negative advice on abort- tid %u status %d (%s)\n",
                     __func__, ep->hwtid, req->status,
                     neg_adv_str(req->status));
                release = 1;
                goto out;
        }
 -      set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
 -      rpl = (struct cpl_abort_rpl *) skb_put(rpl_skb, sizeof(*rpl));
 -      INIT_TP_WR(rpl, ep->hwtid);
 -      OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid));
 -      rpl->cmd = CPL_ABORT_NO_RST;
 +
 +      cxgb_mk_abort_rpl(rpl_skb, len, ep->hwtid, ep->txq_idx);
 +
        c4iw_ofld_send(&ep->com.dev->rdev, rpl_skb);
  out:
        if (release)
@@@ -3009,7 -3136,7 +3009,7 @@@ int c4iw_accept_cr(struct iw_cm_id *cm_
        if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
                if (conn_param->ord > ep->ird) {
                        if (RELAXED_IRD_NEGOTIATION) {
 -                              ep->ord = ep->ird;
 +                              conn_param->ord = ep->ird;
                        } else {
                                ep->ird = conn_param->ird;
                                ep->ord = conn_param->ord;
@@@ -3244,11 -3371,9 +3244,11 @@@ int c4iw_connect(struct iw_cm_id *cm_id
                PDBG("%s saddr %pI4 sport 0x%x raddr %pI4 rport 0x%x\n",
                     __func__, &laddr->sin_addr, ntohs(laddr->sin_port),
                     ra, ntohs(raddr->sin_port));
 -              ep->dst = find_route(dev, laddr->sin_addr.s_addr,
 -                                   raddr->sin_addr.s_addr, laddr->sin_port,
 -                                   raddr->sin_port, cm_id->tos);
 +              ep->dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev,
 +                                        laddr->sin_addr.s_addr,
 +                                        raddr->sin_addr.s_addr,
 +                                        laddr->sin_port,
 +                                        raddr->sin_port, cm_id->tos);
        } else {
                iptype = 6;
                ra = (__u8 *)&raddr6->sin6_addr;
                     __func__, laddr6->sin6_addr.s6_addr,
                     ntohs(laddr6->sin6_port),
                     raddr6->sin6_addr.s6_addr, ntohs(raddr6->sin6_port));
 -              ep->dst = find_route6(dev, laddr6->sin6_addr.s6_addr,
 -                                    raddr6->sin6_addr.s6_addr,
 -                                    laddr6->sin6_port, raddr6->sin6_port, 0,
 -                                    raddr6->sin6_scope_id);
 +              ep->dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev,
 +                                         laddr6->sin6_addr.s6_addr,
 +                                         raddr6->sin6_addr.s6_addr,
 +                                         laddr6->sin6_port,
 +                                         raddr6->sin6_port, 0,
 +                                         raddr6->sin6_scope_id);
        }
        if (!ep->dst) {
                printk(KERN_ERR MOD "%s - cannot find route.\n", __func__);
@@@ -3914,9 -4037,8 +3914,9 @@@ static int rx_pkt(struct c4iw_dev *dev
             ntohl(iph->daddr), ntohs(tcph->dest), ntohl(iph->saddr),
             ntohs(tcph->source), iph->tos);
  
 -      dst = find_route(dev, iph->daddr, iph->saddr, tcph->dest, tcph->source,
 -                       iph->tos);
 +      dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev,
 +                            iph->daddr, iph->saddr, tcph->dest,
 +                            tcph->source, iph->tos);
        if (!dst) {
                pr_err("%s - failed to find dst entry!\n",
                       __func__);
@@@ -4191,7 -4313,7 +4191,7 @@@ static int peer_abort_intr(struct c4iw_
                kfree_skb(skb);
                return 0;
        }
 -      if (is_neg_adv(req->status)) {
 +      if (cxgb_is_neg_adv(req->status)) {
                PDBG("%s Negative advice on abort- tid %u status %d (%s)\n",
                     __func__, ep->hwtid, req->status,
                     neg_adv_str(req->status));
@@@ -4235,7 -4357,7 +4235,7 @@@ int __init c4iw_cm_init(void
        spin_lock_init(&timeout_lock);
        skb_queue_head_init(&rxq);
  
-       workq = create_singlethread_workqueue("iw_cxgb4");
+       workq = alloc_ordered_workqueue("iw_cxgb4", WQ_MEM_RECLAIM);
        if (!workq)
                return -ENOMEM;
  
index ac926c942fee7bc72036e13f269e84156ded95d4,bc522a9b2bfa4b07285ce0d31b6dbd1db1c9f314..867b8cf82be8eb092f62c464802bedae0c3efe3a
@@@ -666,6 -666,18 +666,18 @@@ skip_cqe
        return ret;
  }
  
+ static void invalidate_mr(struct c4iw_dev *rhp, u32 rkey)
+ {
+       struct c4iw_mr *mhp;
+       unsigned long flags;
+       spin_lock_irqsave(&rhp->lock, flags);
+       mhp = get_mhp(rhp, rkey >> 8);
+       if (mhp)
+               mhp->attr.state = 0;
+       spin_unlock_irqrestore(&rhp->lock, flags);
+ }
  /*
   * Get one cq entry from c4iw and map it to openib.
   *
@@@ -721,6 -733,7 +733,7 @@@ static int c4iw_poll_cq_one(struct c4iw
                    CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_SE_INV) {
                        wc->ex.invalidate_rkey = CQE_WRID_STAG(&cqe);
                        wc->wc_flags |= IB_WC_WITH_INVALIDATE;
+                       invalidate_mr(qhp->rhp, wc->ex.invalidate_rkey);
                }
        } else {
                switch (CQE_OPCODE(&cqe)) {
                        break;
                case FW_RI_FAST_REGISTER:
                        wc->opcode = IB_WC_REG_MR;
+                       /* Invalidate the MR if the fastreg failed */
+                       if (CQE_STATUS(&cqe) != T4_ERR_SUCCESS)
+                               invalidate_mr(qhp->rhp, CQE_WRID_FR_STAG(&cqe));
                        break;
                default:
                        printk(KERN_ERR MOD "Unexpected opcode %d "
@@@ -1016,15 -1033,15 +1033,15 @@@ int c4iw_resize_cq(struct ib_cq *cq, in
  int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
  {
        struct c4iw_cq *chp;
 -      int ret;
 +      int ret = 0;
        unsigned long flag;
  
        chp = to_c4iw_cq(ibcq);
        spin_lock_irqsave(&chp->lock, flag);
 -      ret = t4_arm_cq(&chp->cq,
 -                      (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED);
 +      t4_arm_cq(&chp->cq,
 +                (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED);
 +      if (flags & IB_CQ_REPORT_MISSED_EVENTS)
 +              ret = t4_cq_notempty(&chp->cq);
        spin_unlock_irqrestore(&chp->lock, flag);
 -      if (ret && !(flags & IB_CQ_REPORT_MISSED_EVENTS))
 -              ret = 0;
        return ret;
  }
index cdcf3eeb6f4ab45c6c21575ae2dedce93cfff0bb,f83604b2f82da488ca0fa4ba79466652c4b381fc..7e7f79e5500654f6aa8b1c42b29c20f9491b3c1a
@@@ -58,7 -58,7 +58,7 @@@
  #include "cxgb4.h"
  #include "cxgb4_uld.h"
  #include "l2t.h"
- #include "user.h"
+ #include <rdma/cxgb4-abi.h>
  
  #define DRV_NAME "iw_cxgb4"
  #define MOD DRV_NAME ":"
@@@ -263,7 -263,6 +263,7 @@@ struct c4iw_dev 
        struct idr stid_idr;
        struct list_head db_fc_list;
        u32 avail_ird;
 +      wait_queue_head_t wait;
  };
  
  static inline struct c4iw_dev *to_c4iw_dev(struct ib_device *ibdev)
@@@ -882,6 -881,15 +882,6 @@@ static inline struct c4iw_listen_ep *to
        return cm_id->provider_data;
  }
  
 -static inline int compute_wscale(int win)
 -{
 -      int wscale = 0;
 -
 -      while (wscale < 14 && (65535<<wscale) < win)
 -              wscale++;
 -      return wscale;
 -}
 -
  static inline int ocqp_supported(const struct cxgb4_lld_info *infop)
  {
  #if defined(__i386__) || defined(__x86_64__) || defined(CONFIG_PPC64)
index 690435229be7a9afc17db2eb8c9a220b59546456,3467b906cff88e80c15c0226a53db8a007576313..f57deba6717ce69d08b12842ea7ff5fa8514c67a
@@@ -609,10 -609,42 +609,42 @@@ static int build_rdma_recv(struct c4iw_
        return 0;
  }
  
+ static void build_tpte_memreg(struct fw_ri_fr_nsmr_tpte_wr *fr,
+                             struct ib_reg_wr *wr, struct c4iw_mr *mhp,
+                             u8 *len16)
+ {
+       __be64 *p = (__be64 *)fr->pbl;
+       fr->r2 = cpu_to_be32(0);
+       fr->stag = cpu_to_be32(mhp->ibmr.rkey);
+       fr->tpte.valid_to_pdid = cpu_to_be32(FW_RI_TPTE_VALID_F |
+               FW_RI_TPTE_STAGKEY_V((mhp->ibmr.rkey & FW_RI_TPTE_STAGKEY_M)) |
+               FW_RI_TPTE_STAGSTATE_V(1) |
+               FW_RI_TPTE_STAGTYPE_V(FW_RI_STAG_NSMR) |
+               FW_RI_TPTE_PDID_V(mhp->attr.pdid));
+       fr->tpte.locread_to_qpid = cpu_to_be32(
+               FW_RI_TPTE_PERM_V(c4iw_ib_to_tpt_access(wr->access)) |
+               FW_RI_TPTE_ADDRTYPE_V(FW_RI_VA_BASED_TO) |
+               FW_RI_TPTE_PS_V(ilog2(wr->mr->page_size) - 12));
+       fr->tpte.nosnoop_pbladdr = cpu_to_be32(FW_RI_TPTE_PBLADDR_V(
+               PBL_OFF(&mhp->rhp->rdev, mhp->attr.pbl_addr)>>3));
+       fr->tpte.dca_mwbcnt_pstag = cpu_to_be32(0);
+       fr->tpte.len_hi = cpu_to_be32(0);
+       fr->tpte.len_lo = cpu_to_be32(mhp->ibmr.length);
+       fr->tpte.va_hi = cpu_to_be32(mhp->ibmr.iova >> 32);
+       fr->tpte.va_lo_fbo = cpu_to_be32(mhp->ibmr.iova & 0xffffffff);
+       p[0] = cpu_to_be64((u64)mhp->mpl[0]);
+       p[1] = cpu_to_be64((u64)mhp->mpl[1]);
+       *len16 = DIV_ROUND_UP(sizeof(*fr), 16);
+ }
  static int build_memreg(struct t4_sq *sq, union t4_wr *wqe,
-                       struct ib_reg_wr *wr, u8 *len16, bool dsgl_supported)
+                       struct ib_reg_wr *wr, struct c4iw_mr *mhp, u8 *len16,
+                       bool dsgl_supported)
  {
-       struct c4iw_mr *mhp = to_c4iw_mr(wr->mr);
        struct fw_ri_immd *imdp;
        __be64 *p;
        int i;
        return 0;
  }
  
- static int build_inv_stag(union t4_wr *wqe, struct ib_send_wr *wr,
-                         u8 *len16)
+ static int build_inv_stag(struct c4iw_dev *dev, union t4_wr *wqe,
+                         struct ib_send_wr *wr, u8 *len16)
  {
+       struct c4iw_mr *mhp = get_mhp(dev, wr->ex.invalidate_rkey >> 8);
+       mhp->attr.state = 0;
        wqe->inv.stag_inv = cpu_to_be32(wr->ex.invalidate_rkey);
        wqe->inv.r2 = 0;
        *len16 = DIV_ROUND_UP(sizeof wqe->inv, 16);
        return 0;
  }
  
 -void _free_qp(struct kref *kref)
 +static void _free_qp(struct kref *kref)
  {
        struct c4iw_qp *qhp;
  
@@@ -816,18 -851,32 +851,32 @@@ int c4iw_post_send(struct ib_qp *ibqp, 
                        if (!qhp->wq.sq.oldest_read)
                                qhp->wq.sq.oldest_read = swsqe;
                        break;
-               case IB_WR_REG_MR:
-                       fw_opcode = FW_RI_FR_NSMR_WR;
+               case IB_WR_REG_MR: {
+                       struct c4iw_mr *mhp = to_c4iw_mr(reg_wr(wr)->mr);
                        swsqe->opcode = FW_RI_FAST_REGISTER;
-                       err = build_memreg(&qhp->wq.sq, wqe, reg_wr(wr), &len16,
-                               qhp->rhp->rdev.lldi.ulptx_memwrite_dsgl);
+                       if (qhp->rhp->rdev.lldi.fr_nsmr_tpte_wr_support &&
+                           !mhp->attr.state && mhp->mpl_len <= 2) {
+                               fw_opcode = FW_RI_FR_NSMR_TPTE_WR;
+                               build_tpte_memreg(&wqe->fr_tpte, reg_wr(wr),
+                                                 mhp, &len16);
+                       } else {
+                               fw_opcode = FW_RI_FR_NSMR_WR;
+                               err = build_memreg(&qhp->wq.sq, wqe, reg_wr(wr),
+                                      mhp, &len16,
+                                      qhp->rhp->rdev.lldi.ulptx_memwrite_dsgl);
+                               if (err)
+                                       break;
+                       }
+                       mhp->attr.state = 1;
                        break;
+               }
                case IB_WR_LOCAL_INV:
                        if (wr->send_flags & IB_SEND_FENCE)
                                fw_flags |= FW_RI_LOCAL_FENCE_FLAG;
                        fw_opcode = FW_RI_INV_LSTAG_WR;
                        swsqe->opcode = FW_RI_LOCAL_INV;
-                       err = build_inv_stag(wqe, wr, &len16);
+                       err = build_inv_stag(qhp->rhp, wqe, wr, &len16);
                        break;
                default:
                        PDBG("%s post of type=%d TBD!\n", __func__,
index 02173f4315fa62bec5e67171a45ab864a1e096da,b2bfbb1eef1a2a3d91ecdd8aaa085e88dd5e88d9..862381aa83c824bb8712e408df39e49f47f11975
@@@ -95,6 -95,7 +95,7 @@@ union t4_wr 
        struct fw_ri_rdma_read_wr read;
        struct fw_ri_bind_mw_wr bind;
        struct fw_ri_fr_nsmr_wr fr;
+       struct fw_ri_fr_nsmr_tpte_wr fr_tpte;
        struct fw_ri_inv_lstag_wr inv;
        struct t4_status_page status;
        __be64 flits[T4_EQ_ENTRY_SIZE / sizeof(__be64) * T4_SQ_NUM_SLOTS];
@@@ -170,7 -171,7 +171,7 @@@ struct t4_cqe 
                        __be32 msn;
                } rcqe;
                struct {
-                       u32 nada1;
+                       u32 stag;
                        u16 nada2;
                        u16 cidx;
                } scqe;
  
  /* used for SQ completion processing */
  #define CQE_WRID_SQ_IDX(x)    ((x)->u.scqe.cidx)
+ #define CQE_WRID_FR_STAG(x)     (be32_to_cpu((x)->u.scqe.stag))
  
  /* generic accessor macros */
  #define CQE_WRID_HI(x)                (be32_to_cpu((x)->u.gen.wrid_hi))
@@@ -634,11 -636,6 +636,11 @@@ static inline int t4_valid_cqe(struct t
        return (CQE_GENBIT(cqe) == cq->gen);
  }
  
 +static inline int t4_cq_notempty(struct t4_cq *cq)
 +{
 +      return cq->sw_in_use || t4_valid_cqe(cq, &cq->queue[cq->cidx]);
 +}
 +
  static inline int t4_next_hw_cqe(struct t4_cq *cq, struct t4_cqe **cqe)
  {
        int ret;
index f2f6b5a78e0e99af55a29b9812fafcbce70783b4,f803f7b5ef5db1446ba4009a07945350b3e7edde..4b7a16ceb3629cad0cc8d8b5322e997ec4ed4486
@@@ -76,7 -76,7 +76,7 @@@ static unsigned int hfi1_max_ahs = 0xFF
  module_param_named(max_ahs, hfi1_max_ahs, uint, S_IRUGO);
  MODULE_PARM_DESC(max_ahs, "Maximum number of address handles to support");
  
 -unsigned int hfi1_max_cqes = 0x2FFFF;
 +unsigned int hfi1_max_cqes = 0x2FFFFF;
  module_param_named(max_cqes, hfi1_max_cqes, uint, S_IRUGO);
  MODULE_PARM_DESC(max_cqes,
                 "Maximum number of completion queue entries to support");
@@@ -89,7 -89,7 +89,7 @@@ unsigned int hfi1_max_qp_wrs = 0x3FFF
  module_param_named(max_qp_wrs, hfi1_max_qp_wrs, uint, S_IRUGO);
  MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support");
  
 -unsigned int hfi1_max_qps = 16384;
 +unsigned int hfi1_max_qps = 32768;
  module_param_named(max_qps, hfi1_max_qps, uint, S_IRUGO);
  MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support");
  
@@@ -335,7 -335,7 +335,7 @@@ const u8 hdr_len_by_opcode[256] = 
        [IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST]        = 12 + 8 + 4,
        [IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY]        = 12 + 8 + 4,
        [IB_OPCODE_RC_ACKNOWLEDGE]                    = 12 + 8 + 4,
 -      [IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE]             = 12 + 8 + 4,
 +      [IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE]             = 12 + 8 + 4 + 8,
        [IB_OPCODE_RC_COMPARE_SWAP]                   = 12 + 8 + 28,
        [IB_OPCODE_RC_FETCH_ADD]                      = 12 + 8 + 28,
        [IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE]      = 12 + 8 + 4,
@@@ -403,28 -403,6 +403,28 @@@ static const opcode_handler opcode_hand
        [IB_OPCODE_CNP]                               = &hfi1_cnp_rcv
  };
  
 +#define OPMASK 0x1f
 +
 +static const u32 pio_opmask[BIT(3)] = {
 +      /* RC */
 +      [IB_OPCODE_RC >> 5] =
 +              BIT(RC_OP(SEND_ONLY) & OPMASK) |
 +              BIT(RC_OP(SEND_ONLY_WITH_IMMEDIATE) & OPMASK) |
 +              BIT(RC_OP(RDMA_WRITE_ONLY) & OPMASK) |
 +              BIT(RC_OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE) & OPMASK) |
 +              BIT(RC_OP(RDMA_READ_REQUEST) & OPMASK) |
 +              BIT(RC_OP(ACKNOWLEDGE) & OPMASK) |
 +              BIT(RC_OP(ATOMIC_ACKNOWLEDGE) & OPMASK) |
 +              BIT(RC_OP(COMPARE_SWAP) & OPMASK) |
 +              BIT(RC_OP(FETCH_ADD) & OPMASK),
 +      /* UC */
 +      [IB_OPCODE_UC >> 5] =
 +              BIT(UC_OP(SEND_ONLY) & OPMASK) |
 +              BIT(UC_OP(SEND_ONLY_WITH_IMMEDIATE) & OPMASK) |
 +              BIT(UC_OP(RDMA_WRITE_ONLY) & OPMASK) |
 +              BIT(UC_OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE) & OPMASK),
 +};
 +
  /*
   * System image GUID.
   */
@@@ -589,7 -567,7 +589,7 @@@ static inline opcode_handler qp_ok(int 
  void hfi1_ib_rcv(struct hfi1_packet *packet)
  {
        struct hfi1_ctxtdata *rcd = packet->rcd;
 -      struct hfi1_ib_header *hdr = packet->hdr;
 +      struct ib_header *hdr = packet->hdr;
        u32 tlen = packet->tlen;
        struct hfi1_pportdata *ppd = rcd->ppd;
        struct hfi1_ibport *ibp = &ppd->ibport_data;
@@@ -741,7 -719,7 +741,7 @@@ static void verbs_sdma_complete
        if (tx->wqe) {
                hfi1_send_complete(qp, tx->wqe, IB_WC_SUCCESS);
        } else if (qp->ibqp.qp_type == IB_QPT_RC) {
 -              struct hfi1_ib_header *hdr;
 +              struct ib_header *hdr;
  
                hdr = &tx->phdr.hdr;
                hfi1_rc_send_complete(qp, hdr);
@@@ -770,7 -748,7 +770,7 @@@ static int wait_kmem(struct hfi1_ibdev 
                        qp->s_flags |= RVT_S_WAIT_KMEM;
                        list_add_tail(&priv->s_iowait.list, &dev->memwait);
                        trace_hfi1_qpsleep(qp, RVT_S_WAIT_KMEM);
 -                      atomic_inc(&qp->refcount);
 +                      rvt_get_qp(qp);
                }
                write_sequnlock(&dev->iowait_lock);
                qp->s_flags &= ~RVT_S_BUSY;
@@@ -981,7 -959,7 +981,7 @@@ static int pio_wait(struct rvt_qp *qp
                        was_empty = list_empty(&sc->piowait);
                        list_add_tail(&priv->s_iowait.list, &sc->piowait);
                        trace_hfi1_qpsleep(qp, RVT_S_WAIT_PIO);
 -                      atomic_inc(&qp->refcount);
 +                      rvt_get_qp(qp);
                        /* counting: only call wantpiobuf_intr if first user */
                        if (was_empty)
                                hfi1_sc_wantpiobuf_intr(sc, 1);
@@@ -1222,7 -1200,7 +1222,7 @@@ static inline send_routine get_send_rou
  {
        struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
        struct hfi1_qp_priv *priv = qp->priv;
 -      struct hfi1_ib_header *h = &tx->phdr.hdr;
 +      struct ib_header *h = &tx->phdr.hdr;
  
        if (unlikely(!(dd->flags & HFI1_HAS_SEND_DMA)))
                return dd->process_pio_send;
        case IB_QPT_GSI:
        case IB_QPT_UD:
                break;
 -      case IB_QPT_RC:
 -              if (piothreshold &&
 -                  qp->s_cur_size <= min(piothreshold, qp->pmtu) &&
 -                  (BIT(get_opcode(h) & 0x1f) & rc_only_opcode) &&
 -                  iowait_sdma_pending(&priv->s_iowait) == 0 &&
 -                  !sdma_txreq_built(&tx->txreq))
 -                      return dd->process_pio_send;
 -              break;
        case IB_QPT_UC:
 +      case IB_QPT_RC: {
 +              u8 op = get_opcode(h);
 +
                if (piothreshold &&
                    qp->s_cur_size <= min(piothreshold, qp->pmtu) &&
 -                  (BIT(get_opcode(h) & 0x1f) & uc_only_opcode) &&
 +                  (BIT(op & OPMASK) & pio_opmask[op >> 5]) &&
                    iowait_sdma_pending(&priv->s_iowait) == 0 &&
                    !sdma_txreq_built(&tx->txreq))
                        return dd->process_pio_send;
                break;
 +      }
        default:
                break;
        }
@@@ -1262,8 -1244,8 +1262,8 @@@ int hfi1_verbs_send(struct rvt_qp *qp, 
  {
        struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
        struct hfi1_qp_priv *priv = qp->priv;
 -      struct hfi1_other_headers *ohdr;
 -      struct hfi1_ib_header *hdr;
 +      struct ib_other_headers *ohdr;
 +      struct ib_header *hdr;
        send_routine sr;
        int ret;
        u8 lnh;
@@@ -1441,7 -1423,8 +1441,8 @@@ static int modify_device(struct ib_devi
        }
  
        if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC) {
-               memcpy(device->node_desc, device_modify->node_desc, 64);
+               memcpy(device->node_desc, device_modify->node_desc,
+                      IB_DEVICE_NODE_DESC_MAX);
                for (i = 0; i < dd->num_pports; i++) {
                        struct hfi1_ibport *ibp = &dd->pport[i].ibport_data;
  
@@@ -1772,7 -1755,7 +1773,7 @@@ void hfi1_cnp_rcv(struct hfi1_packet *p
  {
        struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data;
        struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
 -      struct hfi1_ib_header *hdr = packet->hdr;
 +      struct ib_header *hdr = packet->hdr;
        struct rvt_qp *qp = packet->qp;
        u32 lqpn, rqpn = 0;
        u16 rlid = 0;
                return;
        }
  
 -      sc5 = hdr2sc((struct hfi1_message_header *)hdr, packet->rhf);
 +      sc5 = hdr2sc(hdr, packet->rhf);
        sl = ibp->sc_to_sl[sc5];
        lqpn = qp->ibqp.qp_num;
  
index 7ca0638579c0b31d9cd0fdc71ea6858b77dba7ea,c490f8d498644103180bbe43fb97215af26f8fb2..85637696f6e9600a638e78a48aa9272d18c08883
@@@ -535,8 -535,8 +535,8 @@@ static struct i40iw_puda_buf *i40iw_for
                buf += hdr_len;
        }
  
 -      if (pd_len)
 -              memcpy(buf, pdata->addr, pd_len);
 +      if (pdata && pdata->addr)
 +              memcpy(buf, pdata->addr, pdata->size);
  
        atomic_set(&sqbuf->refcount, 1);
  
@@@ -3166,8 -3166,11 +3166,11 @@@ void i40iw_setup_cm_core(struct i40iw_d
        spin_lock_init(&cm_core->ht_lock);
        spin_lock_init(&cm_core->listen_list_lock);
  
-       cm_core->event_wq = create_singlethread_workqueue("iwewq");
-       cm_core->disconn_wq = create_singlethread_workqueue("iwdwq");
+       cm_core->event_wq = alloc_ordered_workqueue("iwewq",
+                                                   WQ_MEM_RECLAIM);
+       cm_core->disconn_wq = alloc_ordered_workqueue("iwdwq",
+                                                     WQ_MEM_RECLAIM);
  }
  
  /**
@@@ -3346,6 -3349,26 +3349,6 @@@ int i40iw_cm_disconn(struct i40iw_qp *i
        return 0;
  }
  
 -/**
 - * i40iw_loopback_nop - Send a nop
 - * @qp: associated hw qp
 - */
 -static void i40iw_loopback_nop(struct i40iw_sc_qp *qp)
 -{
 -      u64 *wqe;
 -      u64 header;
 -
 -      wqe = qp->qp_uk.sq_base->elem;
 -      set_64bit_val(wqe, 0, 0);
 -      set_64bit_val(wqe, 8, 0);
 -      set_64bit_val(wqe, 16, 0);
 -
 -      header = LS_64(I40IWQP_OP_NOP, I40IWQPSQ_OPCODE) |
 -          LS_64(0, I40IWQPSQ_SIGCOMPL) |
 -          LS_64(qp->qp_uk.swqe_polarity, I40IWQPSQ_VALID);
 -      set_64bit_val(wqe, 24, header);
 -}
 -
  /**
   * i40iw_qp_disconnect - free qp and close cm
   * @iwqp: associate qp for the connection
@@@ -3618,7 -3641,7 +3621,7 @@@ int i40iw_accept(struct iw_cm_id *cm_id
        } else {
                if (iwqp->page)
                        iwqp->sc_qp.qp_uk.sq_base = kmap(iwqp->page);
 -              i40iw_loopback_nop(&iwqp->sc_qp);
 +              dev->iw_priv_qp_ops->qp_send_lsmm(&iwqp->sc_qp, NULL, 0, 0);
        }
  
        if (iwqp->page)
index 445e230d5ff88f6de4e1b0e5d0d81160028842cf,798335fa3105e201d3a93f110c245d8ffbb29d48..ac2f3cd9478c9bd15581a3d488da592edfacf002
@@@ -100,7 -100,7 +100,7 @@@ static struct notifier_block i40iw_net_
        .notifier_call = i40iw_net_event
  };
  
 -static int i40iw_notifiers_registered;
 +static atomic_t i40iw_notifiers_registered;
  
  /**
   * i40iw_find_i40e_handler - find a handler given a client info
@@@ -1342,11 -1342,12 +1342,11 @@@ exit
   */
  static void i40iw_register_notifiers(void)
  {
 -      if (!i40iw_notifiers_registered) {
 +      if (atomic_inc_return(&i40iw_notifiers_registered) == 1) {
                register_inetaddr_notifier(&i40iw_inetaddr_notifier);
                register_inet6addr_notifier(&i40iw_inetaddr6_notifier);
                register_netevent_notifier(&i40iw_net_notifier);
        }
 -      i40iw_notifiers_registered++;
  }
  
  /**
@@@ -1428,7 -1429,8 +1428,7 @@@ static void i40iw_deinit_device(struct 
                        i40iw_del_macip_entry(iwdev, (u8)iwdev->mac_ip_table_idx);
                /* fallthrough */
        case INET_NOTIFIER:
 -              if (i40iw_notifiers_registered > 0) {
 -                      i40iw_notifiers_registered--;
 +              if (!atomic_dec_return(&i40iw_notifiers_registered)) {
                        unregister_netevent_notifier(&i40iw_net_notifier);
                        unregister_inetaddr_notifier(&i40iw_inetaddr_notifier);
                        unregister_inet6addr_notifier(&i40iw_inetaddr6_notifier);
@@@ -1556,10 -1558,6 +1556,10 @@@ static int i40iw_open(struct i40e_info 
        enum i40iw_status_code status;
        struct i40iw_handler *hdl;
  
 +      hdl = i40iw_find_netdev(ldev->netdev);
 +      if (hdl)
 +              return 0;
 +
        hdl = kzalloc(sizeof(*hdl), GFP_KERNEL);
        if (!hdl)
                return -ENOMEM;
                status = i40iw_hmc_init_pble(&iwdev->sc_dev, iwdev->pble_rsrc);
                if (status)
                        break;
-               iwdev->virtchnl_wq = create_singlethread_workqueue("iwvch");
+               iwdev->virtchnl_wq = alloc_ordered_workqueue("iwvch", WQ_MEM_RECLAIM);
                i40iw_register_notifiers();
                iwdev->init_state = INET_NOTIFIER;
                status = i40iw_add_mac_ip(iwdev);
index 5df63dacaaa32f2b0a4944559181e3f877bb94b5,2f0b4eed7eaeaef28d08e47a18df89906dcb1da0..1ea686b9e0f963cbfb49dc22fa4333f30922431e
@@@ -37,7 -37,7 +37,7 @@@
  #include <linux/slab.h>
  
  #include "mlx4_ib.h"
- #include "user.h"
+ #include <rdma/mlx4-abi.h>
  
  static void mlx4_ib_cq_comp(struct mlx4_cq *cq)
  {
@@@ -576,8 -576,8 +576,8 @@@ static int mlx4_ib_ipoib_csum_ok(__be1
                checksum == cpu_to_be16(0xffff);
  }
  
 -static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct ib_wc *wc,
 -                         unsigned tail, struct mlx4_cqe *cqe, int is_eth)
 +static void use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct ib_wc *wc,
 +                          unsigned tail, struct mlx4_cqe *cqe, int is_eth)
  {
        struct mlx4_ib_proxy_sqp_hdr *hdr;
  
                wc->slid        = be16_to_cpu(hdr->tun.slid_mac_47_32);
                wc->sl          = (u8) (be16_to_cpu(hdr->tun.sl_vid) >> 12);
        }
 -
 -      return 0;
  }
  
  static void mlx4_ib_qp_sw_comp(struct mlx4_ib_qp *qp, int num_entries,
@@@ -687,6 -689,12 +687,6 @@@ repoll
        is_error = (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
                MLX4_CQE_OPCODE_ERROR;
  
 -      if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_OPCODE_NOP &&
 -                   is_send)) {
 -              pr_warn("Completion for NOP opcode detected!\n");
 -              return -EINVAL;
 -      }
 -
        /* Resize CQ in progress */
        if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_CQE_OPCODE_RESIZE)) {
                if (cq->resize_buf) {
                 */
                mqp = __mlx4_qp_lookup(to_mdev(cq->ibcq.device)->dev,
                                       be32_to_cpu(cqe->vlan_my_qpn));
 -              if (unlikely(!mqp)) {
 -                      pr_warn("CQ %06x with entry for unknown QPN %06x\n",
 -                             cq->mcq.cqn, be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK);
 -                      return -EINVAL;
 -              }
 -
                *cur_qp = to_mibqp(mqp);
        }
  
                /* SRQ is also in the radix tree */
                msrq = mlx4_srq_lookup(to_mdev(cq->ibcq.device)->dev,
                                       srq_num);
 -              if (unlikely(!msrq)) {
 -                      pr_warn("CQ %06x with entry for unknown SRQN %06x\n",
 -                              cq->mcq.cqn, srq_num);
 -                      return -EINVAL;
 -              }
        }
  
        if (is_send) {
                if (mlx4_is_mfunc(to_mdev(cq->ibcq.device)->dev)) {
                        if ((*cur_qp)->mlx4_ib_qp_type &
                            (MLX4_IB_QPT_PROXY_SMI_OWNER |
 -                           MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI))
 -                              return use_tunnel_data(*cur_qp, cq, wc, tail,
 -                                                     cqe, is_eth);
 +                           MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) {
 +                              use_tunnel_data(*cur_qp, cq, wc, tail, cqe,
 +                                              is_eth);
 +                              return 0;
 +                      }
                }
  
                wc->slid           = be16_to_cpu(cqe->rlid);
@@@ -874,6 -891,7 +874,6 @@@ int mlx4_ib_poll_cq(struct ib_cq *ibcq
        struct mlx4_ib_qp *cur_qp = NULL;
        unsigned long flags;
        int npolled;
 -      int err = 0;
        struct mlx4_ib_dev *mdev = to_mdev(cq->ibcq.device);
  
        spin_lock_irqsave(&cq->lock, flags);
        }
  
        for (npolled = 0; npolled < num_entries; ++npolled) {
 -              err = mlx4_ib_poll_one(cq, &cur_qp, wc + npolled);
 -              if (err)
 +              if (mlx4_ib_poll_one(cq, &cur_qp, wc + npolled))
                        break;
        }
  
  out:
        spin_unlock_irqrestore(&cq->lock, flags);
  
 -      if (err == 0 || err == -EAGAIN)
 -              return npolled;
 -      else
 -              return err;
 +      return npolled;
  }
  
  int mlx4_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
index 0f21c3a25552d47b7305c0df2980a75b36a55970,1301a1db958c1721ccf5d141931215851be7f847..1672907ff219771fa0e479853176db0a988ea8c1
@@@ -230,6 -230,8 +230,8 @@@ static void smp_snoop(struct ib_device 
            mad->mad_hdr.method == IB_MGMT_METHOD_SET)
                switch (mad->mad_hdr.attr_id) {
                case IB_SMP_ATTR_PORT_INFO:
+                       if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV)
+                               return;
                        pinfo = (struct ib_port_info *) ((struct ib_smp *) mad)->data;
                        lid = be16_to_cpu(pinfo->lid);
  
                        break;
  
                case IB_SMP_ATTR_PKEY_TABLE:
+                       if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV)
+                               return;
                        if (!mlx4_is_mfunc(dev->dev)) {
                                mlx4_ib_dispatch_event(dev, port_num,
                                                       IB_EVENT_PKEY_CHANGE);
                        break;
  
                case IB_SMP_ATTR_GUID_INFO:
+                       if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV)
+                               return;
                        /* paravirtualized master's guid is guid 0 -- does not change */
                        if (!mlx4_is_master(dev->dev))
                                mlx4_ib_dispatch_event(dev, port_num,
                        }
                        break;
  
+               case IB_SMP_ATTR_SL_TO_VL_TABLE:
+                       /* cache sl to vl mapping changes for use in
+                        * filling QP1 LRH VL field when sending packets
+                        */
+                       if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV &&
+                           dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT)
+                               return;
+                       if (!mlx4_is_slave(dev->dev)) {
+                               union sl2vl_tbl_to_u64 sl2vl64;
+                               int jj;
+                               for (jj = 0; jj < 8; jj++) {
+                                       sl2vl64.sl8[jj] = ((struct ib_smp *)mad)->data[jj];
+                                       pr_debug("port %u, sl2vl[%d] = %02x\n",
+                                                port_num, jj, sl2vl64.sl8[jj]);
+                               }
+                               atomic64_set(&dev->sl2vl[port_num - 1], sl2vl64.sl64);
+                       }
+                       break;
                default:
                        break;
                }
@@@ -345,7 -371,8 +371,8 @@@ static void node_desc_override(struct i
            mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP &&
            mad->mad_hdr.attr_id == IB_SMP_ATTR_NODE_DESC) {
                spin_lock_irqsave(&to_mdev(dev)->sm_lock, flags);
-               memcpy(((struct ib_smp *) mad)->data, dev->node_desc, 64);
+               memcpy(((struct ib_smp *) mad)->data, dev->node_desc,
+                      IB_DEVICE_NODE_DESC_MAX);
                spin_unlock_irqrestore(&to_mdev(dev)->sm_lock, flags);
        }
  }
@@@ -805,8 -832,7 +832,7 @@@ static int ib_process_mad(struct ib_dev
                return IB_MAD_RESULT_FAILURE;
  
        if (!out_mad->mad_hdr.status) {
-               if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV))
-                       smp_snoop(ibdev, port_num, in_mad, prev_lid);
+               smp_snoop(ibdev, port_num, in_mad, prev_lid);
                /* slaves get node desc from FW */
                if (!mlx4_is_slave(to_mdev(ibdev)->dev))
                        node_desc_override(ibdev, out_mad);
@@@ -1037,6 -1063,23 +1063,23 @@@ static void handle_client_rereg_event(s
                                                    MLX4_EQ_PORT_INFO_CLIENT_REREG_MASK);
                }
        }
+       /* Update the sl to vl table from inside client rereg
+        * only if in secure-host mode (snooping is not possible)
+        * and the sl-to-vl change event is not generated by FW.
+        */
+       if (!mlx4_is_slave(dev->dev) &&
+           dev->dev->flags & MLX4_FLAG_SECURE_HOST &&
+           !(dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT)) {
+               if (mlx4_is_master(dev->dev))
+                       /* already in work queue from mlx4_ib_event queueing
+                        * mlx4_handle_port_mgmt_change_event, which calls
+                        * this procedure. Therefore, call sl2vl_update directly.
+                        */
+                       mlx4_ib_sl2vl_update(dev, port_num);
+               else
+                       mlx4_sched_ib_sl2vl_update_work(dev, port_num);
+       }
        mlx4_ib_dispatch_event(dev, port_num, IB_EVENT_CLIENT_REREGISTER);
  }
  
@@@ -1128,27 -1171,6 +1171,27 @@@ void handle_port_mgmt_change_event(stru
  
                /* Generate GUID changed event */
                if (changed_attr & MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK) {
 +                      if (mlx4_is_master(dev->dev)) {
 +                              union ib_gid gid;
 +                              int err = 0;
 +
 +                              if (!eqe->event.port_mgmt_change.params.port_info.gid_prefix)
 +                                      err = __mlx4_ib_query_gid(&dev->ib_dev, port, 0, &gid, 1);
 +                              else
 +                                      gid.global.subnet_prefix =
 +                                              eqe->event.port_mgmt_change.params.port_info.gid_prefix;
 +                              if (err) {
 +                                      pr_warn("Could not change QP1 subnet prefix for port %d: query_gid error (%d)\n",
 +                                              port, err);
 +                              } else {
 +                                      pr_debug("Changing QP1 subnet prefix for port %d. old=0x%llx. new=0x%llx\n",
 +                                               port,
 +                                               (u64)atomic64_read(&dev->sriov.demux[port - 1].subnet_prefix),
 +                                               be64_to_cpu(gid.global.subnet_prefix));
 +                                      atomic64_set(&dev->sriov.demux[port - 1].subnet_prefix,
 +                                                   be64_to_cpu(gid.global.subnet_prefix));
 +                              }
 +                      }
                        mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE);
                        /*if master, notify all slaves*/
                        if (mlx4_is_master(dev->dev))
                        handle_slaves_guid_change(dev, port, tbl_block, change_bitmap);
                }
                break;
+       case MLX4_DEV_PMC_SUBTYPE_SL_TO_VL_MAP:
+               /* cache sl to vl mapping changes for use in
+                * filling QP1 LRH VL field when sending packets
+                */
+               if (!mlx4_is_slave(dev->dev)) {
+                       union sl2vl_tbl_to_u64 sl2vl64;
+                       int jj;
+                       for (jj = 0; jj < 8; jj++) {
+                               sl2vl64.sl8[jj] =
+                                       eqe->event.port_mgmt_change.params.sl2vl_tbl_change_info.sl2vl_table[jj];
+                               pr_debug("port %u, sl2vl[%d] = %02x\n",
+                                        port, jj, sl2vl64.sl8[jj]);
+                       }
+                       atomic64_set(&dev->sl2vl[port - 1], sl2vl64.sl64);
+               }
+               break;
        default:
                pr_warn("Unsupported subtype 0x%x for "
                        "Port Management Change event\n", eqe->subtype);
@@@ -1918,7 -1958,7 +1979,7 @@@ static int create_pv_resources(struct i
                goto err_buf;
        }
  
-       ctx->pd = ib_alloc_pd(ctx->ib_dev);
+       ctx->pd = ib_alloc_pd(ctx->ib_dev, 0);
        if (IS_ERR(ctx->pd)) {
                ret = PTR_ERR(ctx->pd);
                pr_err("Couldn't create tunnel PD (%d)\n", ret);
@@@ -2091,7 -2131,7 +2152,7 @@@ static int mlx4_ib_alloc_demux_ctx(stru
        }
  
        snprintf(name, sizeof name, "mlx4_ibt%d", port);
-       ctx->wq = create_singlethread_workqueue(name);
+       ctx->wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM);
        if (!ctx->wq) {
                pr_err("Failed to create tunnelling WQ for port %d\n", port);
                ret = -ENOMEM;
        }
  
        snprintf(name, sizeof name, "mlx4_ibud%d", port);
-       ctx->ud_wq = create_singlethread_workqueue(name);
+       ctx->ud_wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM);
        if (!ctx->ud_wq) {
                pr_err("Failed to create up/down WQ for port %d\n", port);
                ret = -ENOMEM;
@@@ -2223,8 -2263,6 +2284,8 @@@ int mlx4_ib_init_sriov(struct mlx4_ib_d
                if (err)
                        goto demux_err;
                dev->sriov.demux[i].guid_cache[0] = gid.global.interface_id;
 +              atomic64_set(&dev->sriov.demux[i].subnet_prefix,
 +                           be64_to_cpu(gid.global.subnet_prefix));
                err = alloc_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1,
                                      &dev->sriov.sqps[i]);
                if (err)
index 87ba9bca4181c2b5115b0c455a691c2e9f66d649,1811eb5b6aabfca9246bd6271f17dab27c9358dd..b597e822759139f2de50c70dc41394e2a93998ab
@@@ -55,7 -55,7 +55,7 @@@
  #include <linux/mlx4/qp.h>
  
  #include "mlx4_ib.h"
- #include "user.h"
+ #include <rdma/mlx4-abi.h>
  
  #define DRV_NAME      MLX4_IB_DRV_NAME
  #define DRV_VERSION   "2.2-1"
@@@ -832,6 -832,66 +832,66 @@@ static int mlx4_ib_query_gid(struct ib_
        return ret;
  }
  
+ static int mlx4_ib_query_sl2vl(struct ib_device *ibdev, u8 port, u64 *sl2vl_tbl)
+ {
+       union sl2vl_tbl_to_u64 sl2vl64;
+       struct ib_smp *in_mad  = NULL;
+       struct ib_smp *out_mad = NULL;
+       int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
+       int err = -ENOMEM;
+       int jj;
+       if (mlx4_is_slave(to_mdev(ibdev)->dev)) {
+               *sl2vl_tbl = 0;
+               return 0;
+       }
+       in_mad  = kzalloc(sizeof(*in_mad), GFP_KERNEL);
+       out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
+       if (!in_mad || !out_mad)
+               goto out;
+       init_query_mad(in_mad);
+       in_mad->attr_id  = IB_SMP_ATTR_SL_TO_VL_TABLE;
+       in_mad->attr_mod = 0;
+       if (mlx4_is_mfunc(to_mdev(ibdev)->dev))
+               mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
+       err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
+                          in_mad, out_mad);
+       if (err)
+               goto out;
+       for (jj = 0; jj < 8; jj++)
+               sl2vl64.sl8[jj] = ((struct ib_smp *)out_mad)->data[jj];
+       *sl2vl_tbl = sl2vl64.sl64;
+ out:
+       kfree(in_mad);
+       kfree(out_mad);
+       return err;
+ }
+ static void mlx4_init_sl2vl_tbl(struct mlx4_ib_dev *mdev)
+ {
+       u64 sl2vl;
+       int i;
+       int err;
+       for (i = 1; i <= mdev->dev->caps.num_ports; i++) {
+               if (mdev->dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH)
+                       continue;
+               err = mlx4_ib_query_sl2vl(&mdev->ib_dev, i, &sl2vl);
+               if (err) {
+                       pr_err("Unable to get default sl to vl mapping for port %d.  Using all zeroes (%d)\n",
+                              i, err);
+                       sl2vl = 0;
+               }
+               atomic64_set(&mdev->sl2vl[i - 1], sl2vl);
+       }
+ }
  int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
                         u16 *pkey, int netw_view)
  {
@@@ -886,7 -946,7 +946,7 @@@ static int mlx4_ib_modify_device(struc
                return -EOPNOTSUPP;
  
        spin_lock_irqsave(&to_mdev(ibdev)->sm_lock, flags);
-       memcpy(ibdev->node_desc, props->node_desc, 64);
+       memcpy(ibdev->node_desc, props->node_desc, IB_DEVICE_NODE_DESC_MAX);
        spin_unlock_irqrestore(&to_mdev(ibdev)->sm_lock, flags);
  
        /*
        if (IS_ERR(mailbox))
                return 0;
  
-       memcpy(mailbox->buf, props->node_desc, 64);
+       memcpy(mailbox->buf, props->node_desc, IB_DEVICE_NODE_DESC_MAX);
        mlx4_cmd(to_mdev(ibdev)->dev, mailbox->dma, 1, 0,
                 MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
  
@@@ -1259,7 -1319,7 +1319,7 @@@ static struct ib_xrcd *mlx4_ib_alloc_xr
        if (err)
                goto err1;
  
-       xrcd->pd = ib_alloc_pd(ibdev);
+       xrcd->pd = ib_alloc_pd(ibdev, 0);
        if (IS_ERR(xrcd->pd)) {
                err = PTR_ERR(xrcd->pd);
                goto err2;
@@@ -1361,6 -1421,19 +1421,19 @@@ struct mlx4_ib_steering 
        union ib_gid gid;
  };
  
+ #define LAST_ETH_FIELD vlan_tag
+ #define LAST_IB_FIELD sl
+ #define LAST_IPV4_FIELD dst_ip
+ #define LAST_TCP_UDP_FIELD src_port
+ /* Field is the last supported field */
+ #define FIELDS_NOT_SUPPORTED(filter, field)\
+       memchr_inv((void *)&filter.field  +\
+                  sizeof(filter.field), 0,\
+                  sizeof(filter) -\
+                  offsetof(typeof(filter), field) -\
+                  sizeof(filter.field))
  static int parse_flow_attr(struct mlx4_dev *dev,
                           u32 qp_num,
                           union ib_flow_spec *ib_spec,
  
        switch (ib_spec->type) {
        case IB_FLOW_SPEC_ETH:
+               if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD))
+                       return -ENOTSUPP;
                type = MLX4_NET_TRANS_RULE_ID_ETH;
                memcpy(mlx4_spec->eth.dst_mac, ib_spec->eth.val.dst_mac,
                       ETH_ALEN);
                mlx4_spec->eth.vlan_tag_msk = ib_spec->eth.mask.vlan_tag;
                break;
        case IB_FLOW_SPEC_IB:
+               if (FIELDS_NOT_SUPPORTED(ib_spec->ib.mask, LAST_IB_FIELD))
+                       return -ENOTSUPP;
                type = MLX4_NET_TRANS_RULE_ID_IB;
                mlx4_spec->ib.l3_qpn =
                        cpu_to_be32(qp_num);
  
  
        case IB_FLOW_SPEC_IPV4:
+               if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD))
+                       return -ENOTSUPP;
                type = MLX4_NET_TRANS_RULE_ID_IPV4;
                mlx4_spec->ipv4.src_ip = ib_spec->ipv4.val.src_ip;
                mlx4_spec->ipv4.src_ip_msk = ib_spec->ipv4.mask.src_ip;
  
        case IB_FLOW_SPEC_TCP:
        case IB_FLOW_SPEC_UDP:
+               if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, LAST_TCP_UDP_FIELD))
+                       return -ENOTSUPP;
                type = ib_spec->type == IB_FLOW_SPEC_TCP ?
                                        MLX4_NET_TRANS_RULE_ID_TCP :
                                        MLX4_NET_TRANS_RULE_ID_UDP;
@@@ -2000,7 -2085,7 +2085,7 @@@ static int init_node_data(struct mlx4_i
        if (err)
                goto out;
  
-       memcpy(dev->ib_dev.node_desc, out_mad->data, 64);
+       memcpy(dev->ib_dev.node_desc, out_mad->data, IB_DEVICE_NODE_DESC_MAX);
  
        in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
  
@@@ -2202,9 -2287,6 +2287,9 @@@ static int mlx4_ib_alloc_diag_counters(
        bool per_port = !!(ibdev->dev->caps.flags2 &
                MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT);
  
 +      if (mlx4_is_slave(ibdev->dev))
 +              return 0;
 +
        for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) {
                /* i == 1 means we are building port counters */
                if (i && !per_port)
@@@ -2653,6 -2735,7 +2738,7 @@@ static void *mlx4_ib_add(struct mlx4_de
  
        if (init_node_data(ibdev))
                goto err_map;
+       mlx4_init_sl2vl_tbl(ibdev);
  
        for (i = 0; i < ibdev->num_ports; ++i) {
                mutex_init(&ibdev->counters_table[i].mutex);
@@@ -3101,6 -3184,47 +3187,47 @@@ static void handle_bonded_port_state_ev
        ib_dispatch_event(&ibev);
  }
  
+ void mlx4_ib_sl2vl_update(struct mlx4_ib_dev *mdev, int port)
+ {
+       u64 sl2vl;
+       int err;
+       err = mlx4_ib_query_sl2vl(&mdev->ib_dev, port, &sl2vl);
+       if (err) {
+               pr_err("Unable to get current sl to vl mapping for port %d.  Using all zeroes (%d)\n",
+                      port, err);
+               sl2vl = 0;
+       }
+       atomic64_set(&mdev->sl2vl[port - 1], sl2vl);
+ }
+ static void ib_sl2vl_update_work(struct work_struct *work)
+ {
+       struct ib_event_work *ew = container_of(work, struct ib_event_work, work);
+       struct mlx4_ib_dev *mdev = ew->ib_dev;
+       int port = ew->port;
+       mlx4_ib_sl2vl_update(mdev, port);
+       kfree(ew);
+ }
+ void mlx4_sched_ib_sl2vl_update_work(struct mlx4_ib_dev *ibdev,
+                                    int port)
+ {
+       struct ib_event_work *ew;
+       ew = kmalloc(sizeof(*ew), GFP_ATOMIC);
+       if (ew) {
+               INIT_WORK(&ew->work, ib_sl2vl_update_work);
+               ew->port = port;
+               ew->ib_dev = ibdev;
+               queue_work(wq, &ew->work);
+       } else {
+               pr_err("failed to allocate memory for sl2vl update work\n");
+       }
+ }
  static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
                          enum mlx4_dev_event event, unsigned long param)
  {
        case MLX4_DEV_EVENT_PORT_UP:
                if (p > ibdev->num_ports)
                        return;
-               if (mlx4_is_master(dev) &&
+               if (!mlx4_is_slave(dev) &&
                    rdma_port_get_link_layer(&ibdev->ib_dev, p) ==
                        IB_LINK_LAYER_INFINIBAND) {
-                       mlx4_ib_invalidate_all_guid_record(ibdev, p);
+                       if (mlx4_is_master(dev))
+                               mlx4_ib_invalidate_all_guid_record(ibdev, p);
+                       if (ibdev->dev->flags & MLX4_FLAG_SECURE_HOST &&
+                           !(ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT))
+                               mlx4_sched_ib_sl2vl_update_work(ibdev, p);
                }
                ibev.event = IB_EVENT_PORT_ACTIVE;
                break;
@@@ -3222,7 -3350,7 +3353,7 @@@ static int __init mlx4_ib_init(void
  {
        int err;
  
-       wq = create_singlethread_workqueue("mlx4_ib");
+       wq = alloc_ordered_workqueue("mlx4_ib", WQ_MEM_RECLAIM);
        if (!wq)
                return -ENOMEM;
  
index 097bfcc4ee997eaab178f8d8353efe79fa00d0a7,7d30be0f287bb2b2816a9f9d39bc33330d29e1a9..a21d37f02f354e953294f590c20947df03fbda4f
@@@ -489,7 -489,7 +489,7 @@@ static u8 get_leave_state(struct mcast_
                if (!group->members[i])
                        leave_state |= (1 << i);
  
 -      return leave_state & (group->rec.scope_join_state & 7);
 +      return leave_state & (group->rec.scope_join_state & 0xf);
  }
  
  static int join_group(struct mcast_group *group, int slave, u8 join_mask)
@@@ -564,8 -564,8 +564,8 @@@ static void mlx4_ib_mcg_timeout_handler
                } else
                        mcg_warn_group(group, "DRIVER BUG\n");
        } else if (group->state == MCAST_LEAVE_SENT) {
 -              if (group->rec.scope_join_state & 7)
 -                      group->rec.scope_join_state &= 0xf8;
 +              if (group->rec.scope_join_state & 0xf)
 +                      group->rec.scope_join_state &= 0xf0;
                group->state = MCAST_IDLE;
                mutex_unlock(&group->lock);
                if (release_group(group, 1))
@@@ -605,7 -605,7 +605,7 @@@ static int handle_leave_req(struct mcas
  static int handle_join_req(struct mcast_group *group, u8 join_mask,
                           struct mcast_req *req)
  {
 -      u8 group_join_state = group->rec.scope_join_state & 7;
 +      u8 group_join_state = group->rec.scope_join_state & 0xf;
        int ref = 0;
        u16 status;
        struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data;
@@@ -690,8 -690,8 +690,8 @@@ static void mlx4_ib_mcg_work_handler(st
                        u8 cur_join_state;
  
                        resp_join_state = ((struct ib_sa_mcmember_data *)
 -                                              group->response_sa_mad.data)->scope_join_state & 7;
 -                      cur_join_state = group->rec.scope_join_state & 7;
 +                                              group->response_sa_mad.data)->scope_join_state & 0xf;
 +                      cur_join_state = group->rec.scope_join_state & 0xf;
  
                        if (method == IB_MGMT_METHOD_GET_RESP) {
                                /* successfull join */
@@@ -710,7 -710,7 +710,7 @@@ process_requests
                req = list_first_entry(&group->pending_list, struct mcast_req,
                                       group_list);
                sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data;
 -              req_join_state = sa_data->scope_join_state & 0x7;
 +              req_join_state = sa_data->scope_join_state & 0xf;
  
                /* For a leave request, we will immediately answer the VF, and
                 * update our internal counters. The actual leave will be sent
@@@ -1045,7 -1045,7 +1045,7 @@@ int mlx4_ib_mcg_port_init(struct mlx4_i
  
        atomic_set(&ctx->tid, 0);
        sprintf(name, "mlx4_ib_mcg%d", ctx->port);
-       ctx->mcg_wq = create_singlethread_workqueue(name);
+       ctx->mcg_wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM);
        if (!ctx->mcg_wq)
                return -ENOMEM;
  
@@@ -1246,7 -1246,7 +1246,7 @@@ void clean_vf_mcast(struct mlx4_ib_demu
  
  int mlx4_ib_mcg_init(void)
  {
-       clean_wq = create_singlethread_workqueue("mlx4_ib_mcg");
+       clean_wq = alloc_ordered_workqueue("mlx4_ib_mcg", WQ_MEM_RECLAIM);
        if (!clean_wq)
                return -ENOMEM;
  
index 686ab48ff644163879049533985e81b33e4a054f,8db7cb1a3716a0b8a6a37ab1b3641a50fbdec928..35141f451e5c75f01c9670432cf395c7cfcbbe77
@@@ -448,7 -448,7 +448,7 @@@ struct mlx4_ib_demux_ctx 
        struct workqueue_struct *wq;
        struct workqueue_struct *ud_wq;
        spinlock_t ud_lock;
 -      __be64 subnet_prefix;
 +      atomic64_t subnet_prefix;
        __be64 guid_cache[128];
        struct mlx4_ib_dev *dev;
        /* the following lock protects both mcg_table and mcg_mgid0_list */
@@@ -570,6 -570,7 +570,7 @@@ struct mlx4_ib_dev 
        struct ib_mad_agent    *send_agent[MLX4_MAX_PORTS][2];
        struct ib_ah           *sm_ah[MLX4_MAX_PORTS];
        spinlock_t              sm_lock;
+       atomic64_t              sl2vl[MLX4_MAX_PORTS];
        struct mlx4_ib_sriov    sriov;
  
        struct mutex            cap_mask_mutex;
@@@ -600,6 -601,7 +601,7 @@@ struct ib_event_work 
        struct work_struct      work;
        struct mlx4_ib_dev      *ib_dev;
        struct mlx4_eqe         ib_eqe;
+       int                     port;
  };
  
  struct mlx4_ib_qp_tunnel_init_attr {
@@@ -883,4 -885,9 +885,9 @@@ int mlx4_ib_rereg_user_mr(struct ib_mr 
  int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
                                    u8 port_num, int index);
  
+ void mlx4_sched_ib_sl2vl_update_work(struct mlx4_ib_dev *ibdev,
+                                    int port);
+ void mlx4_ib_sl2vl_update(struct mlx4_ib_dev *mdev, int port);
  #endif /* MLX4_IB_H */
index 7fb9629bd12b9736c181d7c5e0935ff5b7524d9a,16f654dc8a4659aaadb5e30827ef15abfda39fc3..570bc866b1d62251f0ac50dc8c99606d57beedfd
@@@ -47,7 -47,7 +47,7 @@@
  #include <linux/mlx4/qp.h>
  
  #include "mlx4_ib.h"
- #include "user.h"
+ #include <rdma/mlx4-abi.h>
  
  static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq,
                             struct mlx4_ib_cq *recv_cq);
@@@ -2405,6 -2405,22 +2405,22 @@@ static int build_sriov_qp0_header(struc
        return 0;
  }
  
+ static u8 sl_to_vl(struct mlx4_ib_dev *dev, u8 sl, int port_num)
+ {
+       union sl2vl_tbl_to_u64 tmp_vltab;
+       u8 vl;
+       if (sl > 15)
+               return 0xf;
+       tmp_vltab.sl64 = atomic64_read(&dev->sl2vl[port_num - 1]);
+       vl = tmp_vltab.sl8[sl >> 1];
+       if (sl & 1)
+               vl &= 0x0f;
+       else
+               vl >>= 4;
+       return vl;
+ }
  #define MLX4_ROCEV2_QP1_SPORT 0xC000
  static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
                            void *wqe, unsigned *mlx_seg_len)
                sqp->ud_header.grh.flow_label    =
                        ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff);
                sqp->ud_header.grh.hop_limit     = ah->av.ib.hop_limit;
 -              if (is_eth)
 +              if (is_eth) {
                        memcpy(sqp->ud_header.grh.source_gid.raw, sgid.raw, 16);
 -              else {
 -              if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
 -                      /* When multi-function is enabled, the ib_core gid
 -                       * indexes don't necessarily match the hw ones, so
 -                       * we must use our own cache */
 -                      sqp->ud_header.grh.source_gid.global.subnet_prefix =
 -                              to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
 -                                                     subnet_prefix;
 -                      sqp->ud_header.grh.source_gid.global.interface_id =
 -                              to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
 -                                             guid_cache[ah->av.ib.gid_index];
 -              } else
 -                      ib_get_cached_gid(ib_dev,
 -                                        be32_to_cpu(ah->av.ib.port_pd) >> 24,
 -                                        ah->av.ib.gid_index,
 -                                        &sqp->ud_header.grh.source_gid, NULL);
 +              } else {
 +                      if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
 +                              /* When multi-function is enabled, the ib_core gid
 +                               * indexes don't necessarily match the hw ones, so
 +                               * we must use our own cache
 +                               */
 +                              sqp->ud_header.grh.source_gid.global.subnet_prefix =
 +                                      cpu_to_be64(atomic64_read(&(to_mdev(ib_dev)->sriov.
 +                                                                  demux[sqp->qp.port - 1].
 +                                                                  subnet_prefix)));
 +                              sqp->ud_header.grh.source_gid.global.interface_id =
 +                                      to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
 +                                                     guid_cache[ah->av.ib.gid_index];
 +                      } else {
 +                              ib_get_cached_gid(ib_dev,
 +                                                be32_to_cpu(ah->av.ib.port_pd) >> 24,
 +                                                ah->av.ib.gid_index,
 +                                                &sqp->ud_header.grh.source_gid, NULL);
 +                      }
                }
                memcpy(sqp->ud_header.grh.destination_gid.raw,
                       ah->av.ib.dgid, 16);
                        sqp->ud_header.vlan.tag = cpu_to_be16(vlan | pcp);
                }
        } else {
-               sqp->ud_header.lrh.virtual_lane    = !sqp->qp.ibqp.qp_num ? 15 : 0;
+               sqp->ud_header.lrh.virtual_lane    = !sqp->qp.ibqp.qp_num ? 15 :
+                                                       sl_to_vl(to_mdev(ib_dev),
+                                                                sqp->ud_header.lrh.service_level,
+                                                                sqp->qp.port);
+               if (sqp->qp.ibqp.qp_num && sqp->ud_header.lrh.virtual_lane == 15)
+                       return -EINVAL;
                if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)
                        sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
        }
index 5de9a65f53bc9afc20bd3f52b95f83a9176c1f4d,1188fef08450dd5e4696956139965bfe83c9ab7b..79d017baf6f49dac160090d1e75ec75fd8b3cf2c
@@@ -35,7 -35,6 +35,6 @@@
  #include <rdma/ib_user_verbs.h>
  #include <rdma/ib_cache.h>
  #include "mlx5_ib.h"
- #include "user.h"
  
  static void mlx5_ib_cq_comp(struct mlx5_core_cq *cq)
  {
@@@ -553,6 -552,12 +552,6 @@@ repoll
                 * from the table.
                 */
                mqp = __mlx5_qp_lookup(dev->mdev, qpn);
 -              if (unlikely(!mqp)) {
 -                      mlx5_ib_warn(dev, "CQE@CQ %06x for unknown QPN %6x\n",
 -                                   cq->mcq.cqn, qpn);
 -                      return -EINVAL;
 -              }
 -
                *cur_qp = to_mibqp(mqp);
        }
  
                read_lock(&dev->mdev->priv.mkey_table.lock);
                mmkey = __mlx5_mr_lookup(dev->mdev,
                                         mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey)));
 -              if (unlikely(!mmkey)) {
 -                      read_unlock(&dev->mdev->priv.mkey_table.lock);
 -                      mlx5_ib_warn(dev, "CQE@CQ %06x for unknown MR %6x\n",
 -                                   cq->mcq.cqn, be32_to_cpu(sig_err_cqe->mkey));
 -                      return -EINVAL;
 -              }
 -
                mr = to_mibmr(mmkey);
                get_sig_err_item(sig_err_cqe, &mr->sig->err_item);
                mr->sig->sig_err_exists = true;
@@@ -663,6 -675,7 +662,6 @@@ int mlx5_ib_poll_cq(struct ib_cq *ibcq
        unsigned long flags;
        int soft_polled = 0;
        int npolled;
 -      int err = 0;
  
        spin_lock_irqsave(&cq->lock, flags);
        if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
                soft_polled = poll_soft_wc(cq, num_entries, wc);
  
        for (npolled = 0; npolled < num_entries - soft_polled; npolled++) {
 -              err = mlx5_poll_one(cq, &cur_qp, wc + soft_polled + npolled);
 -              if (err)
 +              if (mlx5_poll_one(cq, &cur_qp, wc + soft_polled + npolled))
                        break;
        }
  
  out:
        spin_unlock_irqrestore(&cq->lock, flags);
  
 -      if (err == 0 || err == -EAGAIN)
 -              return soft_polled + npolled;
 -      else
 -              return err;
 +      return soft_polled + npolled;
  }
  
  int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
index 551aa0e789aabee95ca6a2bb15d7c6d619f9b15f,f4160d56dc4f3b43f65c25969c8bd34c1f371d43..22174774dbb8c392709936b0eb225d3e6768d2c4
@@@ -37,6 -37,7 +37,6 @@@
  #include <linux/pci.h>
  #include <linux/dma-mapping.h>
  #include <linux/slab.h>
 -#include <linux/io-mapping.h>
  #if defined(CONFIG_X86)
  #include <asm/pat.h>
  #endif
@@@ -53,7 -54,6 +53,6 @@@
  #include <linux/in.h>
  #include <linux/etherdevice.h>
  #include <linux/mlx5/fs.h>
- #include "user.h"
  #include "mlx5_ib.h"
  
  #define DRIVER_NAME "mlx5_ib"
@@@ -106,13 -106,42 +105,42 @@@ static int mlx5_netdev_event(struct not
        struct mlx5_ib_dev *ibdev = container_of(this, struct mlx5_ib_dev,
                                                 roce.nb);
  
-       if ((event != NETDEV_UNREGISTER) && (event != NETDEV_REGISTER))
-               return NOTIFY_DONE;
+       switch (event) {
+       case NETDEV_REGISTER:
+       case NETDEV_UNREGISTER:
+               write_lock(&ibdev->roce.netdev_lock);
+               if (ndev->dev.parent == &ibdev->mdev->pdev->dev)
+                       ibdev->roce.netdev = (event == NETDEV_UNREGISTER) ?
+                                            NULL : ndev;
+               write_unlock(&ibdev->roce.netdev_lock);
+               break;
+       case NETDEV_UP:
+       case NETDEV_DOWN: {
+               struct net_device *lag_ndev = mlx5_lag_get_roce_netdev(ibdev->mdev);
+               struct net_device *upper = NULL;
+               if (lag_ndev) {
+                       upper = netdev_master_upper_dev_get(lag_ndev);
+                       dev_put(lag_ndev);
+               }
+               if ((upper == ndev || (!upper && ndev == ibdev->roce.netdev))
+                   && ibdev->ib_active) {
+                       struct ib_event ibev = {0};
+                       ibev.device = &ibdev->ib_dev;
+                       ibev.event = (event == NETDEV_UP) ?
+                                    IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
+                       ibev.element.port_num = 1;
+                       ib_dispatch_event(&ibev);
+               }
+               break;
+       }
  
-       write_lock(&ibdev->roce.netdev_lock);
-       if (ndev->dev.parent == &ibdev->mdev->pdev->dev)
-               ibdev->roce.netdev = (event == NETDEV_UNREGISTER) ? NULL : ndev;
-       write_unlock(&ibdev->roce.netdev_lock);
+       default:
+               break;
+       }
  
        return NOTIFY_DONE;
  }
@@@ -123,6 -152,10 +151,10 @@@ static struct net_device *mlx5_ib_get_n
        struct mlx5_ib_dev *ibdev = to_mdev(device);
        struct net_device *ndev;
  
+       ndev = mlx5_lag_get_roce_netdev(ibdev->mdev);
+       if (ndev)
+               return ndev;
        /* Ensure ndev does not disappear before we invoke dev_hold()
         */
        read_lock(&ibdev->roce.netdev_lock);
@@@ -138,7 -171,7 +170,7 @@@ static int mlx5_query_port_roce(struct 
                                struct ib_port_attr *props)
  {
        struct mlx5_ib_dev *dev = to_mdev(device);
-       struct net_device *ndev;
+       struct net_device *ndev, *upper;
        enum ib_mtu ndev_ib_mtu;
        u16 qkey_viol_cntr;
  
        if (!ndev)
                return 0;
  
+       if (mlx5_lag_is_active(dev->mdev)) {
+               rcu_read_lock();
+               upper = netdev_master_upper_dev_get_rcu(ndev);
+               if (upper) {
+                       dev_put(ndev);
+                       ndev = upper;
+                       dev_hold(ndev);
+               }
+               rcu_read_unlock();
+       }
        if (netif_running(ndev) && netif_carrier_ok(ndev)) {
                props->state      = IB_PORT_ACTIVE;
                props->phys_state = 5;
@@@ -284,9 -328,7 +327,9 @@@ __be16 mlx5_get_roce_udp_sport(struct m
  
  static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
  {
 -      return !MLX5_CAP_GEN(dev->mdev, ib_virt);
 +      if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB)
 +              return !MLX5_CAP_GEN(dev->mdev, ib_virt);
 +      return 0;
  }
  
  enum {
@@@ -429,7 -471,7 +472,7 @@@ static int mlx5_query_node_guid(struct 
  }
  
  struct mlx5_reg_node_desc {
-       u8      desc[64];
+       u8      desc[IB_DEVICE_NODE_DESC_MAX];
  };
  
  static int mlx5_query_node_desc(struct mlx5_ib_dev *dev, char *node_desc)
@@@ -532,6 -574,26 +575,26 @@@ static int mlx5_ib_query_device(struct 
                                resp.response_length += sizeof(resp.tso_caps);
                        }
                }
+               if (field_avail(typeof(resp), rss_caps, uhw->outlen)) {
+                       resp.rss_caps.rx_hash_function =
+                                               MLX5_RX_HASH_FUNC_TOEPLITZ;
+                       resp.rss_caps.rx_hash_fields_mask =
+                                               MLX5_RX_HASH_SRC_IPV4 |
+                                               MLX5_RX_HASH_DST_IPV4 |
+                                               MLX5_RX_HASH_SRC_IPV6 |
+                                               MLX5_RX_HASH_DST_IPV6 |
+                                               MLX5_RX_HASH_SRC_PORT_TCP |
+                                               MLX5_RX_HASH_DST_PORT_TCP |
+                                               MLX5_RX_HASH_SRC_PORT_UDP |
+                                               MLX5_RX_HASH_DST_PORT_UDP;
+                       resp.response_length += sizeof(resp.rss_caps);
+               }
+       } else {
+               if (field_avail(typeof(resp), tso_caps, uhw->outlen))
+                       resp.response_length += sizeof(resp.tso_caps);
+               if (field_avail(typeof(resp), rss_caps, uhw->outlen))
+                       resp.response_length += sizeof(resp.rss_caps);
        }
  
        if (MLX5_CAP_GEN(mdev, ipoib_basic_offloads)) {
        if (!mlx5_core_is_pf(mdev))
                props->device_cap_flags |= IB_DEVICE_VIRTUAL_FUNCTION;
  
+       if (mlx5_ib_port_link_layer(ibdev, 1) ==
+           IB_LINK_LAYER_ETHERNET) {
+               props->rss_caps.max_rwq_indirection_tables =
+                       1 << MLX5_CAP_GEN(dev->mdev, log_max_rqt);
+               props->rss_caps.max_rwq_indirection_table_size =
+                       1 << MLX5_CAP_GEN(dev->mdev, log_max_rqt_size);
+               props->rss_caps.supported_qpts = 1 << IB_QPT_RAW_PACKET;
+               props->max_wq_type_rq =
+                       1 << MLX5_CAP_GEN(dev->mdev, log_max_rq);
+       }
        if (uhw->outlen) {
                err = ib_copy_to_udata(uhw, &resp, resp.response_length);
  
@@@ -846,13 -919,13 +920,13 @@@ static int mlx5_ib_modify_device(struc
         * If possible, pass node desc to FW, so it can generate
         * a 144 trap.  If cmd fails, just ignore.
         */
-       memcpy(&in, props->node_desc, 64);
+       memcpy(&in, props->node_desc, IB_DEVICE_NODE_DESC_MAX);
        err = mlx5_core_access_reg(dev->mdev, &in, sizeof(in), &out,
                                   sizeof(out), MLX5_REG_NODE_DESC, 0, 1);
        if (err)
                return err;
  
-       memcpy(ibdev->node_desc, props->node_desc, 64);
+       memcpy(ibdev->node_desc, props->node_desc, IB_DEVICE_NODE_DESC_MAX);
  
        return err;
  }
@@@ -1395,28 -1468,77 +1469,77 @@@ static int mlx5_ib_dealloc_pd(struct ib
        return 0;
  }
  
- static bool outer_header_zero(u32 *match_criteria)
+ enum {
+       MATCH_CRITERIA_ENABLE_OUTER_BIT,
+       MATCH_CRITERIA_ENABLE_MISC_BIT,
+       MATCH_CRITERIA_ENABLE_INNER_BIT
+ };
+ #define HEADER_IS_ZERO(match_criteria, headers)                                  \
+       !(memchr_inv(MLX5_ADDR_OF(fte_match_param, match_criteria, headers), \
+                   0, MLX5_FLD_SZ_BYTES(fte_match_param, headers)))       \
+ static u8 get_match_criteria_enable(u32 *match_criteria)
  {
-       int size = MLX5_ST_SZ_BYTES(fte_match_param);
-       char *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_criteria,
-                                            outer_headers);
+       u8 match_criteria_enable;
  
-       return outer_headers_c[0] == 0 && !memcmp(outer_headers_c,
-                                                 outer_headers_c + 1,
-                                                 size - 1);
+       match_criteria_enable =
+               (!HEADER_IS_ZERO(match_criteria, outer_headers)) <<
+               MATCH_CRITERIA_ENABLE_OUTER_BIT;
+       match_criteria_enable |=
+               (!HEADER_IS_ZERO(match_criteria, misc_parameters)) <<
+               MATCH_CRITERIA_ENABLE_MISC_BIT;
+       match_criteria_enable |=
+               (!HEADER_IS_ZERO(match_criteria, inner_headers)) <<
+               MATCH_CRITERIA_ENABLE_INNER_BIT;
+       return match_criteria_enable;
+ }
+ static void set_proto(void *outer_c, void *outer_v, u8 mask, u8 val)
+ {
+       MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask);
+       MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val);
  }
  
+ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
+ {
+       MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_ecn, mask);
+       MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_ecn, val);
+       MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_dscp, mask >> 2);
+       MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_dscp, val >> 2);
+ }
+ #define LAST_ETH_FIELD vlan_tag
+ #define LAST_IB_FIELD sl
+ #define LAST_IPV4_FIELD tos
+ #define LAST_IPV6_FIELD traffic_class
+ #define LAST_TCP_UDP_FIELD src_port
+ /* Field is the last supported field */
+ #define FIELDS_NOT_SUPPORTED(filter, field)\
+       memchr_inv((void *)&filter.field  +\
+                  sizeof(filter.field), 0,\
+                  sizeof(filter) -\
+                  offsetof(typeof(filter), field) -\
+                  sizeof(filter.field))
  static int parse_flow_attr(u32 *match_c, u32 *match_v,
-                          union ib_flow_spec *ib_spec)
+                          const union ib_flow_spec *ib_spec)
  {
        void *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
                                             outer_headers);
        void *outer_headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
                                             outer_headers);
+       void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
+                                          misc_parameters);
+       void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v,
+                                          misc_parameters);
        switch (ib_spec->type) {
        case IB_FLOW_SPEC_ETH:
-               if (ib_spec->size != sizeof(ib_spec->eth))
-                       return -EINVAL;
+               if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD))
+                       return -ENOTSUPP;
  
                ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
                                             dmac_47_16),
                                             dmac_47_16),
                                ib_spec->eth.val.dst_mac);
  
 +              ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
 +                                           smac_47_16),
 +                              ib_spec->eth.mask.src_mac);
 +              ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
 +                                           smac_47_16),
 +                              ib_spec->eth.val.src_mac);
 +
                if (ib_spec->eth.mask.vlan_tag) {
                        MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
                                 vlan_tag, 1);
                         ethertype, ntohs(ib_spec->eth.val.ether_type));
                break;
        case IB_FLOW_SPEC_IPV4:
-               if (ib_spec->size != sizeof(ib_spec->ipv4))
-                       return -EINVAL;
+               if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD))
+                       return -ENOTSUPP;
  
                MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
                         ethertype, 0xffff);
                                    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
                       &ib_spec->ipv4.val.dst_ip,
                       sizeof(ib_spec->ipv4.val.dst_ip));
+               set_tos(outer_headers_c, outer_headers_v,
+                       ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos);
+               set_proto(outer_headers_c, outer_headers_v,
+                         ib_spec->ipv4.mask.proto, ib_spec->ipv4.val.proto);
                break;
        case IB_FLOW_SPEC_IPV6:
-               if (ib_spec->size != sizeof(ib_spec->ipv6))
-                       return -EINVAL;
+               if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD))
+                       return -ENOTSUPP;
  
                MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
                         ethertype, 0xffff);
                                    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
                       &ib_spec->ipv6.val.dst_ip,
                       sizeof(ib_spec->ipv6.val.dst_ip));
+               set_tos(outer_headers_c, outer_headers_v,
+                       ib_spec->ipv6.mask.traffic_class,
+                       ib_spec->ipv6.val.traffic_class);
+               set_proto(outer_headers_c, outer_headers_v,
+                         ib_spec->ipv6.mask.next_hdr,
+                         ib_spec->ipv6.val.next_hdr);
+               MLX5_SET(fte_match_set_misc, misc_params_c,
+                        outer_ipv6_flow_label,
+                        ntohl(ib_spec->ipv6.mask.flow_label));
+               MLX5_SET(fte_match_set_misc, misc_params_v,
+                        outer_ipv6_flow_label,
+                        ntohl(ib_spec->ipv6.val.flow_label));
                break;
        case IB_FLOW_SPEC_TCP:
-               if (ib_spec->size != sizeof(ib_spec->tcp_udp))
-                       return -EINVAL;
+               if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
+                                        LAST_TCP_UDP_FIELD))
+                       return -ENOTSUPP;
  
                MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol,
                         0xff);
                         ntohs(ib_spec->tcp_udp.val.dst_port));
                break;
        case IB_FLOW_SPEC_UDP:
-               if (ib_spec->size != sizeof(ib_spec->tcp_udp))
-                       return -EINVAL;
+               if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
+                                        LAST_TCP_UDP_FIELD))
+                       return -ENOTSUPP;
  
                MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol,
                         0xff);
@@@ -1582,7 -1720,7 +1728,7 @@@ static bool flow_is_multicast_only(stru
               is_multicast_ether_addr(eth_spec->val.dst_mac);
  }
  
- static bool is_valid_attr(struct ib_flow_attr *flow_attr)
+ static bool is_valid_attr(const struct ib_flow_attr *flow_attr)
  {
        union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1);
        bool has_ipv4_spec = false;
@@@ -1626,12 -1764,13 +1772,13 @@@ static int mlx5_ib_destroy_flow(struct 
  
        list_for_each_entry_safe(iter, tmp, &handler->list, list) {
                mlx5_del_flow_rule(iter->rule);
+               put_flow_table(dev, iter->prio, true);
                list_del(&iter->list);
                kfree(iter);
        }
  
        mlx5_del_flow_rule(handler->rule);
-       put_flow_table(dev, &dev->flow_db.prios[handler->prio], true);
+       put_flow_table(dev, handler->prio, true);
        mutex_unlock(&dev->flow_db.lock);
  
        kfree(handler);
@@@ -1647,10 -1786,16 +1794,16 @@@ static int ib_prio_to_core_prio(unsigne
        return priority;
  }
  
+ enum flow_table_type {
+       MLX5_IB_FT_RX,
+       MLX5_IB_FT_TX
+ };
  #define MLX5_FS_MAX_TYPES      10
  #define MLX5_FS_MAX_ENTRIES    32000UL
  static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
-                                               struct ib_flow_attr *flow_attr)
+                                               struct ib_flow_attr *flow_attr,
+                                               enum flow_table_type ft_type)
  {
        bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP;
        struct mlx5_flow_namespace *ns = NULL;
                                         &num_entries,
                                         &num_groups);
                prio = &dev->flow_db.prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
+       } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
+               if (!MLX5_CAP_FLOWTABLE(dev->mdev,
+                                       allow_sniffer_and_nic_rx_shared_tir))
+                       return ERR_PTR(-ENOTSUPP);
+               ns = mlx5_get_flow_namespace(dev->mdev, ft_type == MLX5_IB_FT_RX ?
+                                            MLX5_FLOW_NAMESPACE_SNIFFER_RX :
+                                            MLX5_FLOW_NAMESPACE_SNIFFER_TX);
+               prio = &dev->flow_db.sniffer[ft_type];
+               priority = 0;
+               num_entries = 1;
+               num_groups = 1;
        }
  
        if (!ns)
  
  static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
                                                     struct mlx5_ib_flow_prio *ft_prio,
-                                                    struct ib_flow_attr *flow_attr,
+                                                    const struct ib_flow_attr *flow_attr,
                                                     struct mlx5_flow_destination *dst)
  {
        struct mlx5_flow_table  *ft = ft_prio->flow_table;
        struct mlx5_ib_flow_handler *handler;
        struct mlx5_flow_spec *spec;
-       void *ib_flow = flow_attr + 1;
+       const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr);
        unsigned int spec_index;
        u32 action;
        int err = 0;
                ib_flow += ((union ib_flow_spec *)ib_flow)->size;
        }
  
-       /* Outer header support only */
-       spec->match_criteria_enable = (!outer_header_zero(spec->match_criteria))
-               << 0;
+       spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
        action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
                MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
        handler->rule = mlx5_add_flow_rule(ft, spec,
                goto free;
        }
  
-       handler->prio = ft_prio - dev->flow_db.prios;
+       ft_prio->refcount++;
+       handler->prio = ft_prio;
  
        ft_prio->flow_table = ft;
  free:
@@@ -1777,6 -1934,7 +1942,7 @@@ static struct mlx5_ib_flow_handler *cre
                                               flow_attr, dst);
                if (IS_ERR(handler_dst)) {
                        mlx5_del_flow_rule(handler->rule);
+                       ft_prio->refcount--;
                        kfree(handler);
                        handler = handler_dst;
                } else {
@@@ -1838,6 -1996,8 +2004,8 @@@ static struct mlx5_ib_flow_handler *cre
                                                 &leftovers_specs[LEFTOVERS_UC].flow_attr,
                                                 dst);
                if (IS_ERR(handler_ucast)) {
+                       mlx5_del_flow_rule(handler->rule);
+                       ft_prio->refcount--;
                        kfree(handler);
                        handler = handler_ucast;
                } else {
        return handler;
  }
  
+ static struct mlx5_ib_flow_handler *create_sniffer_rule(struct mlx5_ib_dev *dev,
+                                                       struct mlx5_ib_flow_prio *ft_rx,
+                                                       struct mlx5_ib_flow_prio *ft_tx,
+                                                       struct mlx5_flow_destination *dst)
+ {
+       struct mlx5_ib_flow_handler *handler_rx;
+       struct mlx5_ib_flow_handler *handler_tx;
+       int err;
+       static const struct ib_flow_attr flow_attr  = {
+               .num_of_specs = 0,
+               .size = sizeof(flow_attr)
+       };
+       handler_rx = create_flow_rule(dev, ft_rx, &flow_attr, dst);
+       if (IS_ERR(handler_rx)) {
+               err = PTR_ERR(handler_rx);
+               goto err;
+       }
+       handler_tx = create_flow_rule(dev, ft_tx, &flow_attr, dst);
+       if (IS_ERR(handler_tx)) {
+               err = PTR_ERR(handler_tx);
+               goto err_tx;
+       }
+       list_add(&handler_tx->list, &handler_rx->list);
+       return handler_rx;
+ err_tx:
+       mlx5_del_flow_rule(handler_rx->rule);
+       ft_rx->refcount--;
+       kfree(handler_rx);
+ err:
+       return ERR_PTR(err);
+ }
  static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
                                           struct ib_flow_attr *flow_attr,
                                           int domain)
  {
        struct mlx5_ib_dev *dev = to_mdev(qp->device);
 +      struct mlx5_ib_qp *mqp = to_mqp(qp);
        struct mlx5_ib_flow_handler *handler = NULL;
        struct mlx5_flow_destination *dst = NULL;
+       struct mlx5_ib_flow_prio *ft_prio_tx = NULL;
        struct mlx5_ib_flow_prio *ft_prio;
        int err;
  
  
        mutex_lock(&dev->flow_db.lock);
  
-       ft_prio = get_flow_table(dev, flow_attr);
+       ft_prio = get_flow_table(dev, flow_attr, MLX5_IB_FT_RX);
        if (IS_ERR(ft_prio)) {
                err = PTR_ERR(ft_prio);
                goto unlock;
        }
+       if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
+               ft_prio_tx = get_flow_table(dev, flow_attr, MLX5_IB_FT_TX);
+               if (IS_ERR(ft_prio_tx)) {
+                       err = PTR_ERR(ft_prio_tx);
+                       ft_prio_tx = NULL;
+                       goto destroy_ft;
+               }
+       }
  
        dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
 -      dst->tir_num = to_mqp(qp)->raw_packet_qp.rq.tirn;
 +      if (mqp->flags & MLX5_IB_QP_RSS)
 +              dst->tir_num = mqp->rss_qp.tirn;
 +      else
 +              dst->tir_num = mqp->raw_packet_qp.rq.tirn;
  
        if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
                if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP)  {
                   flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
                handler = create_leftovers_rule(dev, ft_prio, flow_attr,
                                                dst);
+       } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
+               handler = create_sniffer_rule(dev, ft_prio, ft_prio_tx, dst);
        } else {
                err = -EINVAL;
                goto destroy_ft;
                goto destroy_ft;
        }
  
-       ft_prio->refcount++;
        mutex_unlock(&dev->flow_db.lock);
        kfree(dst);
  
  
  destroy_ft:
        put_flow_table(dev, ft_prio, false);
+       if (ft_prio_tx)
+               put_flow_table(dev, ft_prio_tx, false);
  unlock:
        mutex_unlock(&dev->flow_db.lock);
        kfree(dst);
@@@ -2105,14 -2310,19 +2322,19 @@@ static void mlx5_ib_event(struct mlx5_c
                break;
  
        case MLX5_DEV_EVENT_PORT_UP:
-               ibev.event = IB_EVENT_PORT_ACTIVE;
-               port = (u8)param;
-               break;
        case MLX5_DEV_EVENT_PORT_DOWN:
        case MLX5_DEV_EVENT_PORT_INITIALIZED:
-               ibev.event = IB_EVENT_PORT_ERR;
                port = (u8)param;
+               /* In RoCE, port up/down events are handled in
+                * mlx5_netdev_event().
+                */
+               if (mlx5_ib_port_link_layer(&ibdev->ib_dev, port) ==
+                       IB_LINK_LAYER_ETHERNET)
+                       return;
+               ibev.event = (event == MLX5_DEV_EVENT_PORT_UP) ?
+                            IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
                break;
  
        case MLX5_DEV_EVENT_LID_CHANGE:
@@@ -2235,7 -2445,7 +2457,7 @@@ static int create_umr_res(struct mlx5_i
                goto error_0;
        }
  
-       pd = ib_alloc_pd(&dev->ib_dev);
+       pd = ib_alloc_pd(&dev->ib_dev, 0);
        if (IS_ERR(pd)) {
                mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
                ret = PTR_ERR(pd);
@@@ -2517,30 -2727,88 +2739,88 @@@ static void get_dev_fw_str(struct ib_de
                       fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev));
  }
  
+ static int mlx5_roce_lag_init(struct mlx5_ib_dev *dev)
+ {
+       struct mlx5_core_dev *mdev = dev->mdev;
+       struct mlx5_flow_namespace *ns = mlx5_get_flow_namespace(mdev,
+                                                                MLX5_FLOW_NAMESPACE_LAG);
+       struct mlx5_flow_table *ft;
+       int err;
+       if (!ns || !mlx5_lag_is_active(mdev))
+               return 0;
+       err = mlx5_cmd_create_vport_lag(mdev);
+       if (err)
+               return err;
+       ft = mlx5_create_lag_demux_flow_table(ns, 0, 0);
+       if (IS_ERR(ft)) {
+               err = PTR_ERR(ft);
+               goto err_destroy_vport_lag;
+       }
+       dev->flow_db.lag_demux_ft = ft;
+       return 0;
+ err_destroy_vport_lag:
+       mlx5_cmd_destroy_vport_lag(mdev);
+       return err;
+ }
+ static void mlx5_roce_lag_cleanup(struct mlx5_ib_dev *dev)
+ {
+       struct mlx5_core_dev *mdev = dev->mdev;
+       if (dev->flow_db.lag_demux_ft) {
+               mlx5_destroy_flow_table(dev->flow_db.lag_demux_ft);
+               dev->flow_db.lag_demux_ft = NULL;
+               mlx5_cmd_destroy_vport_lag(mdev);
+       }
+ }
+ static void mlx5_remove_roce_notifier(struct mlx5_ib_dev *dev)
+ {
+       if (dev->roce.nb.notifier_call) {
+               unregister_netdevice_notifier(&dev->roce.nb);
+               dev->roce.nb.notifier_call = NULL;
+       }
+ }
  static int mlx5_enable_roce(struct mlx5_ib_dev *dev)
  {
        int err;
  
        dev->roce.nb.notifier_call = mlx5_netdev_event;
        err = register_netdevice_notifier(&dev->roce.nb);
-       if (err)
+       if (err) {
+               dev->roce.nb.notifier_call = NULL;
                return err;
+       }
  
        err = mlx5_nic_vport_enable_roce(dev->mdev);
        if (err)
                goto err_unregister_netdevice_notifier;
  
+       err = mlx5_roce_lag_init(dev);
+       if (err)
+               goto err_disable_roce;
        return 0;
  
+ err_disable_roce:
+       mlx5_nic_vport_disable_roce(dev->mdev);
  err_unregister_netdevice_notifier:
-       unregister_netdevice_notifier(&dev->roce.nb);
+       mlx5_remove_roce_notifier(dev);
        return err;
  }
  
  static void mlx5_disable_roce(struct mlx5_ib_dev *dev)
  {
+       mlx5_roce_lag_cleanup(dev);
        mlx5_nic_vport_disable_roce(dev->mdev);
-       unregister_netdevice_notifier(&dev->roce.nb);
  }
  
  static void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev)
@@@ -2655,6 -2923,7 +2935,7 @@@ static void *mlx5_ib_add(struct mlx5_co
        struct mlx5_ib_dev *dev;
        enum rdma_link_layer ll;
        int port_type_cap;
+       const char *name;
        int err;
        int i;
  
  
        MLX5_INIT_DOORBELL_LOCK(&dev->uar_lock);
  
-       strlcpy(dev->ib_dev.name, "mlx5_%d", IB_DEVICE_NAME_MAX);
+       if (!mlx5_lag_is_active(mdev))
+               name = "mlx5_%d";
+       else
+               name = "mlx5_bond_%d";
+       strlcpy(dev->ib_dev.name, name, IB_DEVICE_NAME_MAX);
        dev->ib_dev.owner               = THIS_MODULE;
        dev->ib_dev.node_type           = RDMA_NODE_IB_CA;
        dev->ib_dev.local_dma_lkey      = 0 /* not supported for now */;
@@@ -2889,8 -3163,10 +3175,10 @@@ err_rsrc
        destroy_dev_resources(&dev->devr);
  
  err_disable_roce:
-       if (ll == IB_LINK_LAYER_ETHERNET)
+       if (ll == IB_LINK_LAYER_ETHERNET) {
                mlx5_disable_roce(dev);
+               mlx5_remove_roce_notifier(dev);
+       }
  
  err_free_port:
        kfree(dev->port);
@@@ -2906,6 -3182,7 +3194,7 @@@ static void mlx5_ib_remove(struct mlx5_
        struct mlx5_ib_dev *dev = context;
        enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 1);
  
+       mlx5_remove_roce_notifier(dev);
        ib_unregister_device(&dev->ib_dev);
        mlx5_ib_dealloc_q_counters(dev);
        destroy_umrc_res(dev);
index 67cc7416fdff70f943407e821f0847cc998bd0f5,1df8a67d4f02328a6a2e9e47094ffd0dbfe6fbfd..dcdcd195fe53a4dd003b0d22426d146bb81c0380
@@@ -44,6 -44,7 +44,7 @@@
  #include <linux/types.h>
  #include <linux/mlx5/transobj.h>
  #include <rdma/ib_user_verbs.h>
+ #include <rdma/mlx5-abi.h>
  
  #define mlx5_ib_dbg(dev, format, arg...)                              \
  pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__,  \
@@@ -142,6 -143,7 +143,7 @@@ struct mlx5_ib_pd 
  #define MLX5_IB_FLOW_LEFTOVERS_PRIO   (MLX5_IB_FLOW_MCAST_PRIO + 1)
  
  #define MLX5_IB_NUM_FLOW_FT           (MLX5_IB_FLOW_LEFTOVERS_PRIO + 1)
+ #define MLX5_IB_NUM_SNIFFER_FTS               2
  struct mlx5_ib_flow_prio {
        struct mlx5_flow_table          *flow_table;
        unsigned int                    refcount;
  struct mlx5_ib_flow_handler {
        struct list_head                list;
        struct ib_flow                  ibflow;
-       unsigned int                    prio;
+       struct mlx5_ib_flow_prio        *prio;
        struct mlx5_flow_rule   *rule;
  };
  
  struct mlx5_ib_flow_db {
        struct mlx5_ib_flow_prio        prios[MLX5_IB_NUM_FLOW_FT];
+       struct mlx5_ib_flow_prio        sniffer[MLX5_IB_NUM_SNIFFER_FTS];
+       struct mlx5_flow_table          *lag_demux_ft;
        /* Protect flow steering bypass flow tables
         * when add/del flow rules.
         * only single add/removal of flow steering rule could be done
@@@ -225,7 -229,7 +229,7 @@@ struct mlx5_ib_wq 
  
  struct mlx5_ib_rwq {
        struct ib_wq            ibwq;
-       u32                     rqn;
+       struct mlx5_core_qp     core_qp;
        u32                     rq_num_pas;
        u32                     log_rq_stride;
        u32                     log_rq_size;
@@@ -402,7 -406,6 +406,7 @@@ enum mlx5_ib_qp_flags 
        /* QP uses 1 as its source QP number */
        MLX5_IB_QP_SQPN_QP1                     = 1 << 6,
        MLX5_IB_QP_CAP_SCATTER_FCS              = 1 << 7,
 +      MLX5_IB_QP_RSS                          = 1 << 8,
  };
  
  struct mlx5_umr_wr {
@@@ -603,6 -606,7 +607,7 @@@ struct mlx5_roce 
        rwlock_t                netdev_lock;
        struct net_device       *netdev;
        struct notifier_block   nb;
+       atomic_t                next_port;
  };
  
  struct mlx5_ib_dev {
@@@ -663,6 -667,11 +668,11 @@@ static inline struct mlx5_ib_qp *to_mib
        return container_of(mqp, struct mlx5_ib_qp_base, mqp)->container_mibqp;
  }
  
+ static inline struct mlx5_ib_rwq *to_mibrwq(struct mlx5_core_qp *core_qp)
+ {
+       return container_of(core_qp, struct mlx5_ib_rwq, core_qp);
+ }
  static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mkey *mmkey)
  {
        return container_of(mmkey, struct mlx5_ib_mr, mmkey);
@@@ -947,4 -956,40 +957,40 @@@ static inline int verify_assign_uidx(u
  
        return 0;
  }
+ static inline int get_qp_user_index(struct mlx5_ib_ucontext *ucontext,
+                                   struct mlx5_ib_create_qp *ucmd,
+                                   int inlen,
+                                   u32 *user_index)
+ {
+       u8 cqe_version = ucontext->cqe_version;
+       if (field_avail(struct mlx5_ib_create_qp, uidx, inlen) &&
+           !cqe_version && (ucmd->uidx == MLX5_IB_DEFAULT_UIDX))
+               return 0;
+       if (!!(field_avail(struct mlx5_ib_create_qp, uidx, inlen) !=
+              !!cqe_version))
+               return -EINVAL;
+       return verify_assign_uidx(cqe_version, ucmd->uidx, user_index);
+ }
+ static inline int get_srq_user_index(struct mlx5_ib_ucontext *ucontext,
+                                    struct mlx5_ib_create_srq *ucmd,
+                                    int inlen,
+                                    u32 *user_index)
+ {
+       u8 cqe_version = ucontext->cqe_version;
+       if (field_avail(struct mlx5_ib_create_srq, uidx, inlen) &&
+           !cqe_version && (ucmd->uidx == MLX5_IB_DEFAULT_UIDX))
+               return 0;
+       if (!!(field_avail(struct mlx5_ib_create_srq, uidx, inlen) !=
+              !!cqe_version))
+               return -EINVAL;
+       return verify_assign_uidx(cqe_version, ucmd->uidx, user_index);
+ }
  #endif /* MLX5_IB_H */
index 9529b464fbdc1962f44e7978b097669f38d67e6b,9d97a71a13353073893439af11efec77282540f5..41f4c2afbcdd6264a05c38d9c0cd2ce7d807bccc
@@@ -35,7 -35,6 +35,6 @@@
  #include <rdma/ib_cache.h>
  #include <rdma/ib_user_verbs.h>
  #include "mlx5_ib.h"
- #include "user.h"
  
  /* not supported currently */
  static int wq_signature;
@@@ -77,6 -76,17 +76,17 @@@ struct mlx5_wqe_eth_pad 
        u8 rsvd0[16];
  };
  
+ enum raw_qp_set_mask_map {
+       MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID         = 1UL << 0,
+ };
+ struct mlx5_modify_raw_qp_param {
+       u16 operation;
+       u32 set_mask; /* raw_qp_set_mask_map */
+       u8 rq_q_ctr_id;
+ };
  static void get_cqs(enum ib_qp_type qp_type,
                    struct ib_cq *ib_send_cq, struct ib_cq *ib_recv_cq,
                    struct mlx5_ib_cq **send_cq, struct mlx5_ib_cq **recv_cq);
@@@ -1457,7 -1467,6 +1467,7 @@@ create_tir
        kvfree(in);
        /* qpn is reserved for that QP */
        qp->trans_qp.base.mqp.qpn = 0;
 +      qp->flags |= MLX5_IB_QP_RSS;
        return 0;
  
  err:
@@@ -1863,7 -1872,8 +1873,8 @@@ static void get_cqs(enum ib_qp_type qp_
  }
  
  static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
-                               u16 operation);
+                               const struct mlx5_modify_raw_qp_param *raw_qp_param,
+                               u8 lag_tx_affinity);
  
  static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
  {
                                                  MLX5_CMD_OP_2RST_QP, 0,
                                                  NULL, &base->mqp);
                } else {
-                       err = modify_raw_packet_qp(dev, qp,
-                                                  MLX5_CMD_OP_2RST_QP);
+                       struct mlx5_modify_raw_qp_param raw_qp_param = {
+                               .operation = MLX5_CMD_OP_2RST_QP
+                       };
+                       err = modify_raw_packet_qp(dev, qp, &raw_qp_param, 0);
                }
                if (err)
                        mlx5_ib_warn(dev, "mlx5_ib: modify QP 0x%06x to RESET failed\n",
@@@ -2153,6 -2166,31 +2167,31 @@@ static int modify_raw_packet_eth_prio(s
        return err;
  }
  
+ static int modify_raw_packet_tx_affinity(struct mlx5_core_dev *dev,
+                                        struct mlx5_ib_sq *sq, u8 tx_affinity)
+ {
+       void *in;
+       void *tisc;
+       int inlen;
+       int err;
+       inlen = MLX5_ST_SZ_BYTES(modify_tis_in);
+       in = mlx5_vzalloc(inlen);
+       if (!in)
+               return -ENOMEM;
+       MLX5_SET(modify_tis_in, in, bitmask.lag_tx_port_affinity, 1);
+       tisc = MLX5_ADDR_OF(modify_tis_in, in, ctx);
+       MLX5_SET(tisc, tisc, lag_tx_port_affinity, tx_affinity);
+       err = mlx5_core_modify_tis(dev, sq->tisn, in, inlen);
+       kvfree(in);
+       return err;
+ }
  static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
                         const struct ib_ah_attr *ah,
                         struct mlx5_qp_path *path, u8 port, int attr_mask,
@@@ -2363,8 -2401,9 +2402,9 @@@ static int ib_mask_to_mlx5_opt(int ib_m
        return result;
  }
  
- static int modify_raw_packet_qp_rq(struct mlx5_core_dev *dev,
-                                  struct mlx5_ib_rq *rq, int new_state)
+ static int modify_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
+                                  struct mlx5_ib_rq *rq, int new_state,
+                                  const struct mlx5_modify_raw_qp_param *raw_qp_param)
  {
        void *in;
        void *rqc;
        rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
        MLX5_SET(rqc, rqc, state, new_state);
  
-       err = mlx5_core_modify_rq(dev, rq->base.mqp.qpn, in, inlen);
+       if (raw_qp_param->set_mask & MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID) {
+               if (MLX5_CAP_GEN(dev->mdev, modify_rq_counter_set_id)) {
+                       MLX5_SET64(modify_rq_in, in, modify_bitmask,
+                                  MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_MODIFY_RQ_COUNTER_SET_ID);
+                       MLX5_SET(rqc, rqc, counter_set_id, raw_qp_param->rq_q_ctr_id);
+               } else
+                       pr_info_once("%s: RAW PACKET QP counters are not supported on current FW\n",
+                                    dev->ib_dev.name);
+       }
+       err = mlx5_core_modify_rq(dev->mdev, rq->base.mqp.qpn, in, inlen);
        if (err)
                goto out;
  
@@@ -2422,7 -2471,8 +2472,8 @@@ out
  }
  
  static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
-                               u16 operation)
+                               const struct mlx5_modify_raw_qp_param *raw_qp_param,
+                               u8 tx_affinity)
  {
        struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
        struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
        int sq_state;
        int err;
  
-       switch (operation) {
+       switch (raw_qp_param->operation) {
        case MLX5_CMD_OP_RST2INIT_QP:
                rq_state = MLX5_RQC_STATE_RDY;
                sq_state = MLX5_SQC_STATE_RDY;
        case MLX5_CMD_OP_INIT2RTR_QP:
        case MLX5_CMD_OP_RTR2RTS_QP:
        case MLX5_CMD_OP_RTS2RTS_QP:
-               /* Nothing to do here... */
-               return 0;
+               if (raw_qp_param->set_mask)
+                       return -EINVAL;
+               else
+                       return 0;
        default:
                WARN_ON(1);
                return -EINVAL;
        }
  
        if (qp->rq.wqe_cnt) {
-               err =  modify_raw_packet_qp_rq(dev->mdev, rq, rq_state);
+               err = modify_raw_packet_qp_rq(dev, rq, rq_state, raw_qp_param);
                if (err)
                        return err;
        }
  
-       if (qp->sq.wqe_cnt)
+       if (qp->sq.wqe_cnt) {
+               if (tx_affinity) {
+                       err = modify_raw_packet_tx_affinity(dev->mdev, sq,
+                                                           tx_affinity);
+                       if (err)
+                               return err;
+               }
                return modify_raw_packet_qp_sq(dev->mdev, sq, sq_state);
+       }
  
        return 0;
  }
@@@ -2514,12 -2574,14 +2575,14 @@@ static int __mlx5_ib_modify_qp(struct i
        struct mlx5_ib_cq *send_cq, *recv_cq;
        struct mlx5_qp_context *context;
        struct mlx5_ib_pd *pd;
+       struct mlx5_ib_port *mibport = NULL;
        enum mlx5_qp_state mlx5_cur, mlx5_new;
        enum mlx5_qp_optpar optpar;
        int sqd_event;
        int mlx5_st;
        int err;
        u16 op;
+       u8 tx_affinity = 0;
  
        context = kzalloc(sizeof(*context), GFP_KERNEL);
        if (!context)
                }
        }
  
+       if ((cur_state == IB_QPS_RESET) && (new_state == IB_QPS_INIT)) {
+               if ((ibqp->qp_type == IB_QPT_RC) ||
+                   (ibqp->qp_type == IB_QPT_UD &&
+                    !(qp->flags & MLX5_IB_QP_SQPN_QP1)) ||
+                   (ibqp->qp_type == IB_QPT_UC) ||
+                   (ibqp->qp_type == IB_QPT_RAW_PACKET) ||
+                   (ibqp->qp_type == IB_QPT_XRC_INI) ||
+                   (ibqp->qp_type == IB_QPT_XRC_TGT)) {
+                       if (mlx5_lag_is_active(dev->mdev)) {
+                               tx_affinity = (unsigned int)atomic_add_return(1,
+                                               &dev->roce.next_port) %
+                                               MLX5_MAX_PORTS + 1;
+                               context->flags |= cpu_to_be32(tx_affinity << 24);
+                       }
+               }
+       }
        if (is_sqp(ibqp->qp_type)) {
                context->mtu_msgmax = (IB_MTU_256 << 5) | 8;
        } else if (ibqp->qp_type == IB_QPT_UD ||
        if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
                u8 port_num = (attr_mask & IB_QP_PORT ? attr->port_num :
                               qp->port) - 1;
-               struct mlx5_ib_port *mibport = &dev->port[port_num];
+               mibport = &dev->port[port_num];
                context->qp_counter_set_usr_page |=
                        cpu_to_be32((u32)(mibport->q_cnt_id) << 24);
        }
        optpar = ib_mask_to_mlx5_opt(attr_mask);
        optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st];
  
-       if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET)
-               err = modify_raw_packet_qp(dev, qp, op);
-       else
+       if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
+               struct mlx5_modify_raw_qp_param raw_qp_param = {};
+               raw_qp_param.operation = op;
+               if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
+                       raw_qp_param.rq_q_ctr_id = mibport->q_cnt_id;
+                       raw_qp_param.set_mask |= MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID;
+               }
+               err = modify_raw_packet_qp(dev, qp, &raw_qp_param, tx_affinity);
+       } else {
                err = mlx5_core_qp_modify(dev->mdev, op, optpar, context,
                                          &base->mqp);
+       }
        if (err)
                goto out;
  
@@@ -3657,8 -3744,12 +3745,8 @@@ static int begin_wqe(struct mlx5_ib_qp 
                     struct ib_send_wr *wr, unsigned *idx,
                     int *size, int nreq)
  {
 -      int err = 0;
 -
 -      if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) {
 -              err = -ENOMEM;
 -              return err;
 -      }
 +      if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)))
 +              return -ENOMEM;
  
        *idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
        *seg = mlx5_get_send_wqe(qp, *idx);
        *seg += sizeof(**ctrl);
        *size = sizeof(**ctrl) / 16;
  
 -      return err;
 +      return 0;
  }
  
  static void finish_wqe(struct mlx5_ib_qp *qp,
@@@ -3753,7 -3844,7 +3841,7 @@@ int mlx5_ib_post_send(struct ib_qp *ibq
                num_sge = wr->num_sge;
                if (unlikely(num_sge > qp->sq.max_gs)) {
                        mlx5_ib_warn(dev, "\n");
 -                      err = -ENOMEM;
 +                      err = -EINVAL;
                        *bad_wr = wr;
                        goto out;
                }
@@@ -4497,6 -4588,28 +4585,28 @@@ int mlx5_ib_dealloc_xrcd(struct ib_xrc
        return 0;
  }
  
+ static void mlx5_ib_wq_event(struct mlx5_core_qp *core_qp, int type)
+ {
+       struct mlx5_ib_rwq *rwq = to_mibrwq(core_qp);
+       struct mlx5_ib_dev *dev = to_mdev(rwq->ibwq.device);
+       struct ib_event event;
+       if (rwq->ibwq.event_handler) {
+               event.device     = rwq->ibwq.device;
+               event.element.wq = &rwq->ibwq;
+               switch (type) {
+               case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
+                       event.event = IB_EVENT_WQ_FATAL;
+                       break;
+               default:
+                       mlx5_ib_warn(dev, "Unexpected event type %d on WQ %06x\n", type, core_qp->qpn);
+                       return;
+               }
+               rwq->ibwq.event_handler(&event, rwq->ibwq.wq_context);
+       }
+ }
  static int  create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
                      struct ib_wq_init_attr *init_attr)
  {
        MLX5_SET64(wq, wq, dbr_addr, rwq->db.dma);
        rq_pas0 = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
        mlx5_ib_populate_pas(dev, rwq->umem, rwq->page_shift, rq_pas0, 0);
-       err = mlx5_core_create_rq(dev->mdev, in, inlen, &rwq->rqn);
+       err = mlx5_core_create_rq_tracked(dev->mdev, in, inlen, &rwq->core_qp);
        kvfree(in);
        return err;
  }
@@@ -4650,7 -4763,7 +4760,7 @@@ struct ib_wq *mlx5_ib_create_wq(struct 
                return ERR_PTR(-EINVAL);
        }
  
-       rwq->ibwq.wq_num = rwq->rqn;
+       rwq->ibwq.wq_num = rwq->core_qp.qpn;
        rwq->ibwq.state = IB_WQS_RESET;
        if (udata->outlen) {
                resp.response_length = offsetof(typeof(resp), response_length) +
                        goto err_copy;
        }
  
+       rwq->core_qp.event = mlx5_ib_wq_event;
+       rwq->ibwq.event_handler = init_attr->event_handler;
        return &rwq->ibwq;
  
  err_copy:
-       mlx5_core_destroy_rq(dev->mdev, rwq->rqn);
+       mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp);
  err_user_rq:
        destroy_user_rq(pd, rwq);
  err:
@@@ -4676,7 -4791,7 +4788,7 @@@ int mlx5_ib_destroy_wq(struct ib_wq *wq
        struct mlx5_ib_dev *dev = to_mdev(wq->device);
        struct mlx5_ib_rwq *rwq = to_mrwq(wq);
  
-       mlx5_core_destroy_rq(dev->mdev, rwq->rqn);
+       mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp);
        destroy_user_rq(wq->pd, rwq);
        kfree(rwq);
  
@@@ -4808,7 -4923,7 +4920,7 @@@ int mlx5_ib_modify_wq(struct ib_wq *wq
        MLX5_SET(modify_rq_in, in, rq_state, curr_wq_state);
        MLX5_SET(rqc, rqc, state, wq_state);
  
-       err = mlx5_core_modify_rq(dev->mdev, rwq->rqn, in, inlen);
+       err = mlx5_core_modify_rq(dev->mdev, rwq->core_qp.qpn, in, inlen);
        kvfree(in);
        if (!err)
                rwq->ibwq.state = (wq_state == MLX5_RQC_STATE_ERR) ? IB_WQS_ERR : wq_state;
index 0aa854737e74e98b871a2b714977c4d327f7cd17,71d0534960d613b4e37e0e355e4f2cfa234c372b..6af44f8db3d5499d46c584ac7fb386c1263f033d
@@@ -51,7 -51,7 +51,7 @@@
  #include "ocrdma.h"
  #include "ocrdma_hw.h"
  #include "ocrdma_verbs.h"
- #include "ocrdma_abi.h"
+ #include <rdma/ocrdma-abi.h>
  
  int ocrdma_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
  {
@@@ -125,8 -125,8 +125,8 @@@ int ocrdma_query_device(struct ib_devic
                                        IB_DEVICE_SYS_IMAGE_GUID |
                                        IB_DEVICE_LOCAL_DMA_LKEY |
                                        IB_DEVICE_MEM_MGT_EXTENSIONS;
 -      attr->max_sge = dev->attr.max_send_sge;
 -      attr->max_sge_rd = attr->max_sge;
 +      attr->max_sge = min(dev->attr.max_send_sge, dev->attr.max_recv_sge);
 +      attr->max_sge_rd = dev->attr.max_rdma_sge;
        attr->max_cq = dev->attr.max_cq;
        attr->max_cqe = dev->attr.max_cqe;
        attr->max_mr = dev->attr.max_mr;
index 876ebb442d3806d555d662805ee2ee4c01eda544,2d7e52619b554e89ed90da73aed07f83485eea45..954f15064514e1ba9dbf49f719a8999443bf09ac
@@@ -313,7 -313,7 +313,7 @@@ static void qib_copy_from_sge(void *dat
   * for the given QP.
   * Called at interrupt level.
   */
 -static void qib_qp_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
 +static void qib_qp_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr,
                       int has_grh, void *data, u32 tlen, struct rvt_qp *qp)
  {
        struct qib_ibport *ibp = &rcd->ppd->ibport_data;
@@@ -366,10 -366,10 +366,10 @@@ void qib_ib_rcv(struct qib_ctxtdata *rc
  {
        struct qib_pportdata *ppd = rcd->ppd;
        struct qib_ibport *ibp = &ppd->ibport_data;
 -      struct qib_ib_header *hdr = rhdr;
 +      struct ib_header *hdr = rhdr;
        struct qib_devdata *dd = ppd->dd;
        struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
 -      struct qib_other_headers *ohdr;
 +      struct ib_other_headers *ohdr;
        struct rvt_qp *qp;
        u32 qp_num;
        int lnh;
@@@ -841,7 -841,7 +841,7 @@@ static void sdma_complete(struct qib_sd
        if (tx->wqe)
                qib_send_complete(qp, tx->wqe, IB_WC_SUCCESS);
        else if (qp->ibqp.qp_type == IB_QPT_RC) {
 -              struct qib_ib_header *hdr;
 +              struct ib_header *hdr;
  
                if (tx->txreq.flags & QIB_SDMA_TXREQ_F_FREEBUF)
                        hdr = &tx->align_buf->hdr;
@@@ -889,7 -889,7 +889,7 @@@ static int wait_kmem(struct qib_ibdev *
        return ret;
  }
  
 -static int qib_verbs_send_dma(struct rvt_qp *qp, struct qib_ib_header *hdr,
 +static int qib_verbs_send_dma(struct rvt_qp *qp, struct ib_header *hdr,
                              u32 hdrwords, struct rvt_sge_state *ss, u32 len,
                              u32 plen, u32 dwords)
  {
@@@ -1025,7 -1025,7 +1025,7 @@@ static int no_bufs_available(struct rvt
        return ret;
  }
  
 -static int qib_verbs_send_pio(struct rvt_qp *qp, struct qib_ib_header *ibhdr,
 +static int qib_verbs_send_pio(struct rvt_qp *qp, struct ib_header *ibhdr,
                              u32 hdrwords, struct rvt_sge_state *ss, u32 len,
                              u32 plen, u32 dwords)
  {
@@@ -1133,7 -1133,7 +1133,7 @@@ done
   * Return zero if packet is sent or queued OK.
   * Return non-zero and clear qp->s_flags RVT_S_BUSY otherwise.
   */
 -int qib_verbs_send(struct rvt_qp *qp, struct qib_ib_header *hdr,
 +int qib_verbs_send(struct rvt_qp *qp, struct ib_header *hdr,
                   u32 hdrwords, struct rvt_sge_state *ss, u32 len)
  {
        struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device);
@@@ -1370,7 -1370,8 +1370,8 @@@ static int qib_modify_device(struct ib_
        }
  
        if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC) {
-               memcpy(device->node_desc, device_modify->node_desc, 64);
+               memcpy(device->node_desc, device_modify->node_desc,
+                      IB_DEVICE_NODE_DESC_MAX);
                for (i = 0; i < dd->num_pports; i++) {
                        struct qib_ibport *ibp = &dd->pport[i].ibport_data;
  
@@@ -1606,6 -1607,8 +1607,6 @@@ int qib_register_ib_device(struct qib_d
        /* Only need to initialize non-zero fields. */
        setup_timer(&dev->mem_timer, mem_timer, (unsigned long)dev);
  
 -      qpt_mask = dd->qpn_mask;
 -
        INIT_LIST_HEAD(&dev->piowait);
        INIT_LIST_HEAD(&dev->dmawait);
        INIT_LIST_HEAD(&dev->txwait);
index 9dbfcc0ab577694c71f853f48d784ae5b9c4592a,7899167536e381e93a9dc392fc8e50c91ecef5b3..7b8d2d9e22633f140b601d0056438bd7d9ca3e68
@@@ -478,7 -478,6 +478,7 @@@ void ipoib_send(struct net_device *dev
                struct ipoib_ah *address, u32 qpn);
  void ipoib_reap_ah(struct work_struct *work);
  
 +struct ipoib_path *__path_find(struct net_device *dev, void *gid);
  void ipoib_mark_paths_invalid(struct net_device *dev);
  void ipoib_flush_paths(struct net_device *dev);
  int ipoib_check_sm_sendonly_fullmember_support(struct ipoib_dev_priv *priv);
@@@ -772,7 -771,13 +772,13 @@@ static inline void ipoib_unregister_deb
  #define ipoib_printk(level, priv, format, arg...)     \
        printk(level "%s: " format, ((struct ipoib_dev_priv *) priv)->dev->name , ## arg)
  #define ipoib_warn(priv, format, arg...)              \
-       ipoib_printk(KERN_WARNING, priv, format , ## arg)
+ do {                                                  \
+       static DEFINE_RATELIMIT_STATE(_rs,              \
+               10 * HZ /*10 seconds */,                \
+               100);           \
+       if (__ratelimit(&_rs))                          \
+               ipoib_printk(KERN_WARNING, priv, format , ## arg);\
+ } while (0)
  
  extern int ipoib_sendq_size;
  extern int ipoib_recvq_size;
index cc1c1b062ea58d530756586ee1ec00144c79b38e,e95c02ee05c0e22735f83ba84e747dd122fe8ee2..5636fc3da6b867aaabe5c1ff7f197d3f0077df76
@@@ -485,7 -485,7 +485,7 @@@ int ipoib_set_mode(struct net_device *d
        return -EINVAL;
  }
  
 -static struct ipoib_path *__path_find(struct net_device *dev, void *gid)
 +struct ipoib_path *__path_find(struct net_device *dev, void *gid)
  {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        struct rb_node *n = priv->path_tree.rb_node;
@@@ -2196,7 -2196,8 +2196,8 @@@ static int __init ipoib_init_module(voi
         * its private workqueue, and we only queue up flush events
         * on our global flush workqueue.  This avoids the deadlocks.
         */
-       ipoib_workqueue = create_singlethread_workqueue("ipoib_flush");
+       ipoib_workqueue = alloc_ordered_workqueue("ipoib_flush",
+                                                 WQ_MEM_RECLAIM);
        if (!ipoib_workqueue) {
                ret = -ENOMEM;
                goto err_fs;
index cae9bbcc27e7c81f2b3dfedc3e55987e428f43ed,8df608ede3668e94a28f9a47aba3aaa3021dec35..6dd43f63238e8e68cbcd97b37947af039589a207
@@@ -309,7 -309,7 +309,7 @@@ isert_create_device_ib_res(struct isert
        if (ret)
                goto out;
  
-       device->pd = ib_alloc_pd(ib_dev);
+       device->pd = ib_alloc_pd(ib_dev, 0);
        if (IS_ERR(device->pd)) {
                ret = PTR_ERR(device->pd);
                isert_err("failed to allocate pd, device %p, ret=%d\n",
@@@ -403,7 -403,6 +403,7 @@@ isert_init_conn(struct isert_conn *iser
        INIT_LIST_HEAD(&isert_conn->node);
        init_completion(&isert_conn->login_comp);
        init_completion(&isert_conn->login_req_comp);
 +      init_waitqueue_head(&isert_conn->rem_wait);
        kref_init(&isert_conn->kref);
        mutex_init(&isert_conn->mutex);
        INIT_WORK(&isert_conn->release_work, isert_release_work);
@@@ -449,7 -448,7 +449,7 @@@ isert_alloc_login_buf(struct isert_con
  
        isert_conn->login_rsp_buf = kzalloc(ISER_RX_PAYLOAD_SIZE, GFP_KERNEL);
        if (!isert_conn->login_rsp_buf) {
 -              isert_err("Unable to allocate isert_conn->login_rspbuf\n");
 +              ret = -ENOMEM;
                goto out_unmap_login_req_buf;
        }
  
@@@ -579,8 -578,7 +579,8 @@@ isert_connect_release(struct isert_con
        BUG_ON(!device);
  
        isert_free_rx_descriptors(isert_conn);
 -      if (isert_conn->cm_id)
 +      if (isert_conn->cm_id &&
 +          !isert_conn->dev_removed)
                rdma_destroy_id(isert_conn->cm_id);
  
        if (isert_conn->qp) {
  
        isert_device_put(device);
  
 -      kfree(isert_conn);
 +      if (isert_conn->dev_removed)
 +              wake_up_interruptible(&isert_conn->rem_wait);
 +      else
 +              kfree(isert_conn);
  }
  
  static void
@@@ -758,7 -753,6 +758,7 @@@ static in
  isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
  {
        struct isert_np *isert_np = cma_id->context;
 +      struct isert_conn *isert_conn;
        int ret = 0;
  
        isert_info("%s (%d): status %d id %p np %p\n",
                break;
        case RDMA_CM_EVENT_ADDR_CHANGE:    /* FALLTHRU */
        case RDMA_CM_EVENT_DISCONNECTED:   /* FALLTHRU */
 -      case RDMA_CM_EVENT_DEVICE_REMOVAL: /* FALLTHRU */
        case RDMA_CM_EVENT_TIMEWAIT_EXIT:  /* FALLTHRU */
                ret = isert_disconnected_handler(cma_id, event->event);
                break;
 +      case RDMA_CM_EVENT_DEVICE_REMOVAL:
 +              isert_conn = cma_id->qp->qp_context;
 +              isert_conn->dev_removed = true;
 +              isert_disconnected_handler(cma_id, event->event);
 +              wait_event_interruptible(isert_conn->rem_wait,
 +                                       isert_conn->state == ISER_CONN_DOWN);
 +              kfree(isert_conn);
 +              /*
 +               * return non-zero from the callback to destroy
 +               * the rdma cm id
 +               */
 +              return 1;
        case RDMA_CM_EVENT_REJECTED:       /* FALLTHRU */
        case RDMA_CM_EVENT_UNREACHABLE:    /* FALLTHRU */
        case RDMA_CM_EVENT_CONNECT_ERROR:
index 883bbfe08e0efa64d87ab2652b0e773516d54450,48a44af740a6c50103b329bc9871bf3c3fa5c142..0b1f69ed2e92ff41bbf7c809608ca6bcd3984948
@@@ -522,11 -522,6 +522,11 @@@ static int srpt_refresh_port(struct srp
        if (ret)
                goto err_query_port;
  
 +      snprintf(sport->port_guid, sizeof(sport->port_guid),
 +              "0x%016llx%016llx",
 +              be64_to_cpu(sport->gid.global.subnet_prefix),
 +              be64_to_cpu(sport->gid.global.interface_id));
 +
        if (!sport->mad_agent) {
                memset(&reg_req, 0, sizeof(reg_req));
                reg_req.mgmt_class = IB_MGMT_CLASS_DEVICE_MGMT;
@@@ -2480,7 -2475,7 +2480,7 @@@ static void srpt_add_one(struct ib_devi
        init_waitqueue_head(&sdev->ch_releaseQ);
        mutex_init(&sdev->mutex);
  
-       sdev->pd = ib_alloc_pd(device);
+       sdev->pd = ib_alloc_pd(device, 0);
        if (IS_ERR(sdev->pd))
                goto free_dev;
  
                               sdev->device->name, i);
                        goto err_ring;
                }
 -              snprintf(sport->port_guid, sizeof(sport->port_guid),
 -                      "0x%016llx%016llx",
 -                      be64_to_cpu(sport->gid.global.subnet_prefix),
 -                      be64_to_cpu(sport->gid.global.interface_id));
        }
  
        spin_lock(&srpt_dev_lock);
index 28e653e9c85612094ab57bdad6008c1414880219,f6099d0c63515f2c8387385e67733142377b1fe0..2125903043fbb12c08f29d0f6f9e3f56e81e222b
@@@ -1,7 -1,7 +1,7 @@@
  /*
   * This file is part of the Chelsio T4 Ethernet driver for Linux.
   *
 - * Copyright (c) 2003-2014 Chelsio Communications, Inc. All rights reserved.
 + * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved.
   *
   * This software is available to you under a choice of one of two
   * licenses.  You may choose to be licensed under the terms of the GNU
@@@ -347,9 -347,9 +347,10 @@@ struct adapter_params 
        unsigned int ofldq_wr_cred;
        bool ulptx_memwrite_dsgl;          /* use of T5 DSGL allowed */
  
 +      unsigned int nsched_cls;          /* number of traffic classes */
        unsigned int max_ordird_qp;       /* Max read depth per RDMA QP */
        unsigned int max_ird_adapter;     /* Max read depth per adapter */
+       bool fr_nsmr_tpte_wr_support;     /* FW support for FR_NSMR_TPTE_WR */
  };
  
  /* State needed to monitor the forward progress of SGE Ingress DMA activities
@@@ -422,8 -422,8 +423,8 @@@ struct link_config 
        unsigned short supported;        /* link capabilities */
        unsigned short advertising;      /* advertised capabilities */
        unsigned short lp_advertising;   /* peer advertised capabilities */
 -      unsigned short requested_speed;  /* speed user has requested */
 -      unsigned short speed;            /* actual link speed */
 +      unsigned int   requested_speed;  /* speed user has requested */
 +      unsigned int   speed;            /* actual link speed */
        unsigned char  requested_fc;     /* flow control user has requested */
        unsigned char  fc;               /* actual link flow control */
        unsigned char  autoneg;          /* autonegotiating? */
@@@ -437,6 -437,11 +438,6 @@@ enum 
        MAX_ETH_QSETS = 32,           /* # of Ethernet Tx/Rx queue sets */
        MAX_OFLD_QSETS = 16,          /* # of offload Tx, iscsi Rx queue sets */
        MAX_CTRL_QUEUES = NCHAN,      /* # of control Tx queues */
 -      MAX_RDMA_QUEUES = NCHAN,      /* # of streaming RDMA Rx queues */
 -      MAX_RDMA_CIQS = 32,        /* # of  RDMA concentrator IQs */
 -
 -      /* # of streaming iSCSIT Rx queues */
 -      MAX_ISCSIT_QUEUES = MAX_OFLD_QSETS,
  };
  
  enum {
  enum {
        INGQ_EXTRAS = 2,        /* firmware event queue and */
                                /*   forwarded interrupts */
 -      MAX_INGQ = MAX_ETH_QSETS + MAX_OFLD_QSETS + MAX_RDMA_QUEUES +
 -                 MAX_RDMA_CIQS + MAX_ISCSIT_QUEUES + INGQ_EXTRAS,
 +      MAX_INGQ = MAX_ETH_QSETS + INGQ_EXTRAS,
  };
  
  struct adapter;
@@@ -490,7 -496,6 +491,7 @@@ struct port_info 
  #endif /* CONFIG_CHELSIO_T4_FCOE */
        bool rxtstamp;  /* Enable TS */
        struct hwtstamp_config tstamp_config;
 +      struct sched_table *sched_tbl;
  };
  
  struct dentry;
@@@ -698,6 -703,10 +699,6 @@@ struct sge 
        struct sge_ctrl_txq ctrlq[MAX_CTRL_QUEUES];
  
        struct sge_eth_rxq ethrxq[MAX_ETH_QSETS];
 -      struct sge_ofld_rxq iscsirxq[MAX_OFLD_QSETS];
 -      struct sge_ofld_rxq iscsitrxq[MAX_ISCSIT_QUEUES];
 -      struct sge_ofld_rxq rdmarxq[MAX_RDMA_QUEUES];
 -      struct sge_ofld_rxq rdmaciq[MAX_RDMA_CIQS];
        struct sge_rspq fw_evtq ____cacheline_aligned_in_smp;
        struct sge_uld_rxq_info **uld_rxq_info;
  
        u16 max_ethqsets;           /* # of available Ethernet queue sets */
        u16 ethqsets;               /* # of active Ethernet queue sets */
        u16 ethtxq_rover;           /* Tx queue to clean up next */
 -      u16 iscsiqsets;              /* # of active iSCSI queue sets */
 -      u16 niscsitq;               /* # of available iSCST Rx queues */
 -      u16 rdmaqs;                 /* # of available RDMA Rx queues */
 -      u16 rdmaciqs;               /* # of available RDMA concentrator IQs */
 +      u16 ofldqsets;              /* # of active ofld queue sets */
        u16 nqs_per_uld;            /* # of Rx queues per ULD */
 -      u16 iscsi_rxq[MAX_OFLD_QSETS];
 -      u16 iscsit_rxq[MAX_ISCSIT_QUEUES];
 -      u16 rdma_rxq[MAX_RDMA_QUEUES];
 -      u16 rdma_ciq[MAX_RDMA_CIQS];
        u16 timer_val[SGE_NTIMERS];
        u8 counter_val[SGE_NCOUNTERS];
        u32 fl_pg_order;            /* large page allocation size */
  };
  
  #define for_each_ethrxq(sge, i) for (i = 0; i < (sge)->ethqsets; i++)
 -#define for_each_iscsirxq(sge, i) for (i = 0; i < (sge)->iscsiqsets; i++)
 -#define for_each_iscsitrxq(sge, i) for (i = 0; i < (sge)->niscsitq; i++)
 -#define for_each_rdmarxq(sge, i) for (i = 0; i < (sge)->rdmaqs; i++)
 -#define for_each_rdmaciq(sge, i) for (i = 0; i < (sge)->rdmaciqs; i++)
 +#define for_each_ofldtxq(sge, i) for (i = 0; i < (sge)->ofldqsets; i++)
  
  struct l2t_data;
  
@@@ -766,12 -785,6 +767,12 @@@ struct uld_msix_bmap 
  struct uld_msix_info {
        unsigned short vec;
        char desc[IFNAMSIZ + 10];
 +      unsigned int idx;
 +};
 +
 +struct vf_info {
 +      unsigned char vf_mac_addr[ETH_ALEN];
 +      bool pf_set_mac;
  };
  
  struct adapter {
        unsigned int mbox;
        unsigned int pf;
        unsigned int flags;
 +      unsigned int adap_idx;
        enum chip_type chip;
  
        int msg_enable;
        } msix_info[MAX_INGQ + 1];
        struct uld_msix_info *msix_info_ulds; /* msix info for uld's */
        struct uld_msix_bmap msix_bmap_ulds; /* msix bitmap for all uld */
 -      unsigned int msi_idx;
 +      int msi_idx;
  
        struct doorbell_stats db_stats;
        struct sge sge;
        struct net_device *port[MAX_NPORTS];
        u8 chan_map[NCHAN];                   /* channel -> port map */
  
 +      struct vf_info *vfinfo;
 +      u8 num_vfs;
 +
        u32 filter_mode;
        unsigned int l2t_start;
        unsigned int l2t_end;
        unsigned int clipt_start;
        unsigned int clipt_end;
        struct clip_tbl *clipt;
 -      struct cxgb4_pci_uld_info *uld;
 +      struct cxgb4_uld_info *uld;
        void *uld_handle[CXGB4_ULD_MAX];
        unsigned int num_uld;
 +      unsigned int num_ofld_uld;
        struct list_head list_node;
        struct list_head rcu_node;
        struct list_head mac_hlist; /* list of MAC addresses in MPS Hash */
  #define T4_OS_LOG_MBOX_CMDS 256
        struct mbox_cmd_log *mbox_log;
  
 +      struct mutex uld_mutex;
 +
        struct dentry *debugfs_root;
        bool use_bd;     /* Use SGE Back Door intfc for reading SGE Contexts */
        bool trace_rss; /* 1 implies that different RSS flit per filter is
  
        spinlock_t stats_lock;
        spinlock_t win0_lock ____cacheline_aligned_in_smp;
 +
 +      /* TC u32 offload */
 +      struct cxgb4_tc_u32_table *tc_u32;
 +};
 +
 +/* Support for "sched-class" command to allow a TX Scheduling Class to be
 + * programmed with various parameters.
 + */
 +struct ch_sched_params {
 +      s8   type;                     /* packet or flow */
 +      union {
 +              struct {
 +                      s8   level;    /* scheduler hierarchy level */
 +                      s8   mode;     /* per-class or per-flow */
 +                      s8   rateunit; /* bit or packet rate */
 +                      s8   ratemode; /* %port relative or kbps absolute */
 +                      s8   channel;  /* scheduler channel [0..N] */
 +                      s8   class;    /* scheduler class [0..N] */
 +                      s32  minrate;  /* minimum rate */
 +                      s32  maxrate;  /* maximum rate */
 +                      s16  weight;   /* percent weight */
 +                      s16  pktsize;  /* average packet size */
 +              } params;
 +      } u;
 +};
 +
 +enum {
 +      SCHED_CLASS_TYPE_PACKET = 0,    /* class type */
 +};
 +
 +enum {
 +      SCHED_CLASS_LEVEL_CL_RL = 0,    /* class rate limiter */
 +};
 +
 +enum {
 +      SCHED_CLASS_MODE_CLASS = 0,     /* per-class scheduling */
 +};
 +
 +enum {
 +      SCHED_CLASS_RATEUNIT_BITS = 0,  /* bit rate scheduling */
 +};
 +
 +enum {
 +      SCHED_CLASS_RATEMODE_ABS = 1,   /* Kb/s */
 +};
 +
 +/* Support for "sched_queue" command to allow one or more NIC TX Queues
 + * to be bound to a TX Scheduling Class.
 + */
 +struct ch_sched_queue {
 +      s8   queue;    /* queue index */
 +      s8   class;    /* class index */
  };
  
  /* Defined bit width of user definable filter tuples
@@@ -1028,32 -982,6 +1029,32 @@@ enum 
        VLAN_REWRITE
  };
  
 +/* Host shadow copy of ingress filter entry.  This is in host native format
 + * and doesn't match the ordering or bit order, etc. of the hardware of the
 + * firmware command.  The use of bit-field structure elements is purely to
 + * remind ourselves of the field size limitations and save memory in the case
 + * where the filter table is large.
 + */
 +struct filter_entry {
 +      /* Administrative fields for filter. */
 +      u32 valid:1;            /* filter allocated and valid */
 +      u32 locked:1;           /* filter is administratively locked */
 +
 +      u32 pending:1;          /* filter action is pending firmware reply */
 +      u32 smtidx:8;           /* Source MAC Table index for smac */
 +      struct filter_ctx *ctx; /* Caller's completion hook */
 +      struct l2t_entry *l2t;  /* Layer Two Table entry for dmac */
 +      struct net_device *dev; /* Associated net device */
 +      u32 tid;                /* This will store the actual tid */
 +
 +      /* The filter itself.  Most of this is a straight copy of information
 +       * provided by the extended ioctl().  Some fields are translated to
 +       * internal forms -- for instance the Ingress Queue ID passed in from
 +       * the ioctl() is translated into the Absolute Ingress Queue ID.
 +       */
 +      struct ch_filter_specification fs;
 +};
 +
  static inline int is_offload(const struct adapter *adap)
  {
        return adap->params.offload;
@@@ -1064,11 -992,6 +1065,11 @@@ static inline int is_pci_uld(const stru
        return adap->params.crypto;
  }
  
 +static inline int is_uld(const struct adapter *adap)
 +{
 +      return (adap->params.offload || adap->params.crypto);
 +}
 +
  static inline u32 t4_read_reg(struct adapter *adap, u32 reg_addr)
  {
        return readl(adap->regs + reg_addr);
@@@ -1295,8 -1218,6 +1296,8 @@@ int t4_sge_alloc_eth_txq(struct adapte
  int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq,
                          struct net_device *dev, unsigned int iqid,
                          unsigned int cmplqid);
 +int t4_sge_mod_ctrl_txq(struct adapter *adap, unsigned int eqid,
 +                      unsigned int cmplqid);
  int t4_sge_alloc_ofld_txq(struct adapter *adap, struct sge_ofld_txq *txq,
                          struct net_device *dev, unsigned int iqid);
  irqreturn_t t4_sge_intr_msix(int irq, void *cookie);
@@@ -1643,9 -1564,6 +1644,9 @@@ void t4_get_trace_filter(struct adapte
                         int filter_index, int *enabled);
  int t4_fwaddrspace_write(struct adapter *adap, unsigned int mbox,
                         u32 addr, u32 val);
 +int t4_sched_params(struct adapter *adapter, int type, int level, int mode,
 +                  int rateunit, int ratemode, int channel, int class,
 +                  int minrate, int maxrate, int weight, int pktsize);
  void t4_sge_decode_idma_state(struct adapter *adapter, int state);
  void t4_free_mem(void *addr);
  void t4_idma_monitor_init(struct adapter *adapter,
@@@ -1655,9 -1573,7 +1656,9 @@@ void t4_idma_monitor(struct adapter *ad
                     int hz, int ticks);
  int t4_set_vf_mac_acl(struct adapter *adapter, unsigned int vf,
                      unsigned int naddr, u8 *addr);
 -void uld_mem_free(struct adapter *adap);
 -int uld_mem_alloc(struct adapter *adap);
 +void t4_uld_mem_free(struct adapter *adap);
 +int t4_uld_mem_alloc(struct adapter *adap);
 +void t4_uld_clean_up(struct adapter *adap);
 +void t4_register_netevent_notifier(void);
  void free_rspq_fl(struct adapter *adap, struct sge_rspq *rq, struct sge_fl *fl);
  #endif /* __CXGB4_H__ */
index cf147ca419a8c7e842a21889e23ccd8b0318f031,7e858b2768b71a4bb0ed44a649ebbfa48faf0028..f320497368f401deb36a3c1b93d0e288208a7fec
@@@ -1,7 -1,7 +1,7 @@@
  /*
   * This file is part of the Chelsio T4 Ethernet driver for Linux.
   *
 - * Copyright (c) 2003-2014 Chelsio Communications, Inc. All rights reserved.
 + * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved.
   *
   * This software is available to you under a choice of one of two
   * licenses.  You may choose to be licensed under the terms of the GNU
@@@ -67,7 -67,6 +67,7 @@@
  #include <linux/crash_dump.h>
  
  #include "cxgb4.h"
 +#include "cxgb4_filter.h"
  #include "t4_regs.h"
  #include "t4_values.h"
  #include "t4_msg.h"
@@@ -77,8 -76,6 +77,8 @@@
  #include "cxgb4_debugfs.h"
  #include "clip_tbl.h"
  #include "l2t.h"
 +#include "sched.h"
 +#include "cxgb4_tc_u32.h"
  
  char cxgb4_driver_name[] = KBUILD_MODNAME;
  
  const char cxgb4_driver_version[] = DRV_VERSION;
  #define DRV_DESC "Chelsio T4/T5/T6 Network Driver"
  
 -/* Host shadow copy of ingress filter entry.  This is in host native format
 - * and doesn't match the ordering or bit order, etc. of the hardware of the
 - * firmware command.  The use of bit-field structure elements is purely to
 - * remind ourselves of the field size limitations and save memory in the case
 - * where the filter table is large.
 - */
 -struct filter_entry {
 -      /* Administrative fields for filter.
 -       */
 -      u32 valid:1;            /* filter allocated and valid */
 -      u32 locked:1;           /* filter is administratively locked */
 -
 -      u32 pending:1;          /* filter action is pending firmware reply */
 -      u32 smtidx:8;           /* Source MAC Table index for smac */
 -      struct l2t_entry *l2t;  /* Layer Two Table entry for dmac */
 -
 -      /* The filter itself.  Most of this is a straight copy of information
 -       * provided by the extended ioctl().  Some fields are translated to
 -       * internal forms -- for instance the Ingress Queue ID passed in from
 -       * the ioctl() is translated into the Absolute Ingress Queue ID.
 -       */
 -      struct ch_filter_specification fs;
 -};
 -
  #define DFLT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \
                         NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\
                         NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR)
@@@ -204,6 -225,11 +204,6 @@@ static struct dentry *cxgb4_debugfs_roo
  
  LIST_HEAD(adapter_list);
  DEFINE_MUTEX(uld_mutex);
 -/* Adapter list to be accessed from atomic context */
 -static LIST_HEAD(adap_rcu_list);
 -static DEFINE_SPINLOCK(adap_rcu_lock);
 -static struct cxgb4_uld_info ulds[CXGB4_ULD_MAX];
 -static const char *const uld_str[] = { "RDMA", "iSCSI", "iSCSIT" };
  
  static void link_report(struct net_device *dev)
  {
@@@ -277,9 -303,11 +277,9 @@@ static void dcb_tx_queue_prio_enable(st
                        txq->dcb_prio = value;
        }
  }
 -#endif /* CONFIG_CHELSIO_T4_DCB */
  
 -int cxgb4_dcb_enabled(const struct net_device *dev)
 +static int cxgb4_dcb_enabled(const struct net_device *dev)
  {
 -#ifdef CONFIG_CHELSIO_T4_DCB
        struct port_info *pi = netdev_priv(dev);
  
        if (!pi->dcb.enabled)
  
        return ((pi->dcb.state == CXGB4_DCB_STATE_FW_ALLSYNCED) ||
                (pi->dcb.state == CXGB4_DCB_STATE_HOST));
 -#else
 -      return 0;
 -#endif
  }
 -EXPORT_SYMBOL(cxgb4_dcb_enabled);
 +#endif /* CONFIG_CHELSIO_T4_DCB */
  
  void t4_os_link_changed(struct adapter *adapter, int port_id, int link_stat)
  {
@@@ -500,6 -531,66 +500,6 @@@ static void dcb_rpl(struct adapter *ada
  }
  #endif /* CONFIG_CHELSIO_T4_DCB */
  
 -/* Clear a filter and release any of its resources that we own.  This also
 - * clears the filter's "pending" status.
 - */
 -static void clear_filter(struct adapter *adap, struct filter_entry *f)
 -{
 -      /* If the new or old filter have loopback rewriteing rules then we'll
 -       * need to free any existing Layer Two Table (L2T) entries of the old
 -       * filter rule.  The firmware will handle freeing up any Source MAC
 -       * Table (SMT) entries used for rewriting Source MAC Addresses in
 -       * loopback rules.
 -       */
 -      if (f->l2t)
 -              cxgb4_l2t_release(f->l2t);
 -
 -      /* The zeroing of the filter rule below clears the filter valid,
 -       * pending, locked flags, l2t pointer, etc. so it's all we need for
 -       * this operation.
 -       */
 -      memset(f, 0, sizeof(*f));
 -}
 -
 -/* Handle a filter write/deletion reply.
 - */
 -static void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl)
 -{
 -      unsigned int idx = GET_TID(rpl);
 -      unsigned int nidx = idx - adap->tids.ftid_base;
 -      unsigned int ret;
 -      struct filter_entry *f;
 -
 -      if (idx >= adap->tids.ftid_base && nidx <
 -         (adap->tids.nftids + adap->tids.nsftids)) {
 -              idx = nidx;
 -              ret = TCB_COOKIE_G(rpl->cookie);
 -              f = &adap->tids.ftid_tab[idx];
 -
 -              if (ret == FW_FILTER_WR_FLT_DELETED) {
 -                      /* Clear the filter when we get confirmation from the
 -                       * hardware that the filter has been deleted.
 -                       */
 -                      clear_filter(adap, f);
 -              } else if (ret == FW_FILTER_WR_SMT_TBL_FULL) {
 -                      dev_err(adap->pdev_dev, "filter %u setup failed due to full SMT\n",
 -                              idx);
 -                      clear_filter(adap, f);
 -              } else if (ret == FW_FILTER_WR_FLT_ADDED) {
 -                      f->smtidx = (be64_to_cpu(rpl->oldval) >> 24) & 0xff;
 -                      f->pending = 0;  /* asynchronous setup completed */
 -                      f->valid = 1;
 -              } else {
 -                      /* Something went wrong.  Issue a warning about the
 -                       * problem and clear everything out.
 -                       */
 -                      dev_err(adap->pdev_dev, "filter %u setup failed with error %u\n",
 -                              idx, ret);
 -                      clear_filter(adap, f);
 -              }
 -      }
 -}
 -
  /* Response queue handler for the FW event queue.
   */
  static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp,
@@@ -586,6 -677,56 +586,6 @@@ out
        return 0;
  }
  
 -/* Flush the aggregated lro sessions */
 -static void uldrx_flush_handler(struct sge_rspq *q)
 -{
 -      if (ulds[q->uld].lro_flush)
 -              ulds[q->uld].lro_flush(&q->lro_mgr);
 -}
 -
 -/**
 - *    uldrx_handler - response queue handler for ULD queues
 - *    @q: the response queue that received the packet
 - *    @rsp: the response queue descriptor holding the offload message
 - *    @gl: the gather list of packet fragments
 - *
 - *    Deliver an ingress offload packet to a ULD.  All processing is done by
 - *    the ULD, we just maintain statistics.
 - */
 -static int uldrx_handler(struct sge_rspq *q, const __be64 *rsp,
 -                       const struct pkt_gl *gl)
 -{
 -      struct sge_ofld_rxq *rxq = container_of(q, struct sge_ofld_rxq, rspq);
 -      int ret;
 -
 -      /* FW can send CPLs encapsulated in a CPL_FW4_MSG.
 -       */
 -      if (((const struct rss_header *)rsp)->opcode == CPL_FW4_MSG &&
 -          ((const struct cpl_fw4_msg *)(rsp + 1))->type == FW_TYPE_RSSCPL)
 -              rsp += 2;
 -
 -      if (q->flush_handler)
 -              ret = ulds[q->uld].lro_rx_handler(q->adap->uld_handle[q->uld],
 -                                                rsp, gl, &q->lro_mgr,
 -                                                &q->napi);
 -      else
 -              ret = ulds[q->uld].rx_handler(q->adap->uld_handle[q->uld],
 -                                            rsp, gl);
 -
 -      if (ret) {
 -              rxq->stats.nomem++;
 -              return -1;
 -      }
 -
 -      if (gl == NULL)
 -              rxq->stats.imm++;
 -      else if (gl == CXGB4_MSG_AN)
 -              rxq->stats.an++;
 -      else
 -              rxq->stats.pkts++;
 -      return 0;
 -}
 -
  static void disable_msi(struct adapter *adapter)
  {
        if (adapter->flags & USING_MSIX) {
@@@ -637,12 -778,30 +637,12 @@@ static void name_msix_vecs(struct adapt
                        snprintf(adap->msix_info[msi_idx].desc, n, "%s-Rx%d",
                                 d->name, i);
        }
 -
 -      /* offload queues */
 -      for_each_iscsirxq(&adap->sge, i)
 -              snprintf(adap->msix_info[msi_idx++].desc, n, "%s-iscsi%d",
 -                       adap->port[0]->name, i);
 -
 -      for_each_iscsitrxq(&adap->sge, i)
 -              snprintf(adap->msix_info[msi_idx++].desc, n, "%s-iSCSIT%d",
 -                       adap->port[0]->name, i);
 -
 -      for_each_rdmarxq(&adap->sge, i)
 -              snprintf(adap->msix_info[msi_idx++].desc, n, "%s-rdma%d",
 -                       adap->port[0]->name, i);
 -
 -      for_each_rdmaciq(&adap->sge, i)
 -              snprintf(adap->msix_info[msi_idx++].desc, n, "%s-rdma-ciq%d",
 -                       adap->port[0]->name, i);
  }
  
  static int request_msix_queue_irqs(struct adapter *adap)
  {
        struct sge *s = &adap->sge;
 -      int err, ethqidx, iscsiqidx = 0, rdmaqidx = 0, rdmaciqqidx = 0;
 -      int iscsitqidx = 0;
 +      int err, ethqidx;
        int msi_index = 2;
  
        err = request_irq(adap->msix_info[1].vec, t4_sge_intr_msix, 0,
                        goto unwind;
                msi_index++;
        }
 -      for_each_iscsirxq(s, iscsiqidx) {
 -              err = request_irq(adap->msix_info[msi_index].vec,
 -                                t4_sge_intr_msix, 0,
 -                                adap->msix_info[msi_index].desc,
 -                                &s->iscsirxq[iscsiqidx].rspq);
 -              if (err)
 -                      goto unwind;
 -              msi_index++;
 -      }
 -      for_each_iscsitrxq(s, iscsitqidx) {
 -              err = request_irq(adap->msix_info[msi_index].vec,
 -                                t4_sge_intr_msix, 0,
 -                                adap->msix_info[msi_index].desc,
 -                                &s->iscsitrxq[iscsitqidx].rspq);
 -              if (err)
 -                      goto unwind;
 -              msi_index++;
 -      }
 -      for_each_rdmarxq(s, rdmaqidx) {
 -              err = request_irq(adap->msix_info[msi_index].vec,
 -                                t4_sge_intr_msix, 0,
 -                                adap->msix_info[msi_index].desc,
 -                                &s->rdmarxq[rdmaqidx].rspq);
 -              if (err)
 -                      goto unwind;
 -              msi_index++;
 -      }
 -      for_each_rdmaciq(s, rdmaciqqidx) {
 -              err = request_irq(adap->msix_info[msi_index].vec,
 -                                t4_sge_intr_msix, 0,
 -                                adap->msix_info[msi_index].desc,
 -                                &s->rdmaciq[rdmaciqqidx].rspq);
 -              if (err)
 -                      goto unwind;
 -              msi_index++;
 -      }
        return 0;
  
  unwind:
 -      while (--rdmaciqqidx >= 0)
 -              free_irq(adap->msix_info[--msi_index].vec,
 -                       &s->rdmaciq[rdmaciqqidx].rspq);
 -      while (--rdmaqidx >= 0)
 -              free_irq(adap->msix_info[--msi_index].vec,
 -                       &s->rdmarxq[rdmaqidx].rspq);
 -      while (--iscsitqidx >= 0)
 -              free_irq(adap->msix_info[--msi_index].vec,
 -                       &s->iscsitrxq[iscsitqidx].rspq);
 -      while (--iscsiqidx >= 0)
 -              free_irq(adap->msix_info[--msi_index].vec,
 -                       &s->iscsirxq[iscsiqidx].rspq);
        while (--ethqidx >= 0)
                free_irq(adap->msix_info[--msi_index].vec,
                         &s->ethrxq[ethqidx].rspq);
@@@ -677,6 -884,16 +677,6 @@@ static void free_msix_queue_irqs(struc
        free_irq(adap->msix_info[1].vec, &s->fw_evtq);
        for_each_ethrxq(s, i)
                free_irq(adap->msix_info[msi_index++].vec, &s->ethrxq[i].rspq);
 -      for_each_iscsirxq(s, i)
 -              free_irq(adap->msix_info[msi_index++].vec,
 -                       &s->iscsirxq[i].rspq);
 -      for_each_iscsitrxq(s, i)
 -              free_irq(adap->msix_info[msi_index++].vec,
 -                       &s->iscsitrxq[i].rspq);
 -      for_each_rdmarxq(s, i)
 -              free_irq(adap->msix_info[msi_index++].vec, &s->rdmarxq[i].rspq);
 -      for_each_rdmaciq(s, i)
 -              free_irq(adap->msix_info[msi_index++].vec, &s->rdmaciq[i].rspq);
  }
  
  /**
@@@ -815,11 -1032,42 +815,11 @@@ static void enable_rx(struct adapter *a
        }
  }
  
 -static int alloc_ofld_rxqs(struct adapter *adap, struct sge_ofld_rxq *q,
 -                         unsigned int nq, unsigned int per_chan, int msi_idx,
 -                         u16 *ids, bool lro)
 -{
 -      int i, err;
 -
 -      for (i = 0; i < nq; i++, q++) {
 -              if (msi_idx > 0)
 -                      msi_idx++;
 -              err = t4_sge_alloc_rxq(adap, &q->rspq, false,
 -                                     adap->port[i / per_chan],
 -                                     msi_idx, q->fl.size ? &q->fl : NULL,
 -                                     uldrx_handler,
 -                                     lro ? uldrx_flush_handler : NULL,
 -                                     0);
 -              if (err)
 -                      return err;
 -              memset(&q->stats, 0, sizeof(q->stats));
 -              if (ids)
 -                      ids[i] = q->rspq.abs_id;
 -      }
 -      return 0;
 -}
  
 -/**
 - *    setup_sge_queues - configure SGE Tx/Rx/response queues
 - *    @adap: the adapter
 - *
 - *    Determines how many sets of SGE queues to use and initializes them.
 - *    We support multiple queue sets per port if we have MSI-X, otherwise
 - *    just one queue set per port.
 - */
 -static int setup_sge_queues(struct adapter *adap)
 +static int setup_fw_sge_queues(struct adapter *adap)
  {
 -      int err, i, j;
        struct sge *s = &adap->sge;
 +      int err = 0;
  
        bitmap_zero(s->starving_fl, s->egr_sz);
        bitmap_zero(s->txq_maperr, s->egr_sz);
                adap->msi_idx = -((int)s->intrq.abs_id + 1);
        }
  
 -      /* NOTE: If you add/delete any Ingress/Egress Queue allocations in here,
 -       * don't forget to update the following which need to be
 -       * synchronized to and changes here.
 -       *
 -       * 1. The calculations of MAX_INGQ in cxgb4.h.
 -       *
 -       * 2. Update enable_msix/name_msix_vecs/request_msix_queue_irqs
 -       *    to accommodate any new/deleted Ingress Queues
 -       *    which need MSI-X Vectors.
 -       *
 -       * 3. Update sge_qinfo_show() to include information on the
 -       *    new/deleted queues.
 -       */
        err = t4_sge_alloc_rxq(adap, &s->fw_evtq, true, adap->port[0],
                               adap->msi_idx, NULL, fwevtq_handler, NULL, -1);
 -      if (err) {
 -freeout:      t4_free_sge_resources(adap);
 -              return err;
 -      }
 +      if (err)
 +              t4_free_sge_resources(adap);
 +      return err;
 +}
 +
 +/**
 + *    setup_sge_queues - configure SGE Tx/Rx/response queues
 + *    @adap: the adapter
 + *
 + *    Determines how many sets of SGE queues to use and initializes them.
 + *    We support multiple queue sets per port if we have MSI-X, otherwise
 + *    just one queue set per port.
 + */
 +static int setup_sge_queues(struct adapter *adap)
 +{
 +      int err, i, j;
 +      struct sge *s = &adap->sge;
 +      struct sge_uld_rxq_info *rxq_info = s->uld_rxq_info[CXGB4_ULD_RDMA];
 +      unsigned int cmplqid = 0;
  
        for_each_port(adap, i) {
                struct net_device *dev = adap->port[i];
                }
        }
  
 -      j = s->iscsiqsets / adap->params.nports; /* iscsi queues per channel */
 -      for_each_iscsirxq(s, i) {
 +      j = s->ofldqsets / adap->params.nports; /* iscsi queues per channel */
 +      for_each_ofldtxq(s, i) {
                err = t4_sge_alloc_ofld_txq(adap, &s->ofldtxq[i],
                                            adap->port[i / j],
                                            s->fw_evtq.cntxt_id);
                        goto freeout;
        }
  
 -#define ALLOC_OFLD_RXQS(firstq, nq, per_chan, ids, lro) do { \
 -      err = alloc_ofld_rxqs(adap, firstq, nq, per_chan, adap->msi_idx, ids, lro); \
 -      if (err) \
 -              goto freeout; \
 -      if (adap->msi_idx > 0) \
 -              adap->msi_idx += nq; \
 -} while (0)
 -
 -      ALLOC_OFLD_RXQS(s->iscsirxq, s->iscsiqsets, j, s->iscsi_rxq, false);
 -      ALLOC_OFLD_RXQS(s->iscsitrxq, s->niscsitq, j, s->iscsit_rxq, true);
 -      ALLOC_OFLD_RXQS(s->rdmarxq, s->rdmaqs, 1, s->rdma_rxq, false);
 -      j = s->rdmaciqs / adap->params.nports; /* rdmaq queues per channel */
 -      ALLOC_OFLD_RXQS(s->rdmaciq, s->rdmaciqs, j, s->rdma_ciq, false);
 -
 -#undef ALLOC_OFLD_RXQS
 -
        for_each_port(adap, i) {
 -              /*
 -               * Note that ->rdmarxq[i].rspq.cntxt_id below is 0 if we don't
 +              /* Note that cmplqid below is 0 if we don't
                 * have RDMA queues, and that's the right value.
                 */
 +              if (rxq_info)
 +                      cmplqid = rxq_info->uldrxq[i].rspq.cntxt_id;
 +
                err = t4_sge_alloc_ctrl_txq(adap, &s->ctrlq[i], adap->port[i],
 -                                          s->fw_evtq.cntxt_id,
 -                                          s->rdmarxq[i].rspq.cntxt_id);
 +                                          s->fw_evtq.cntxt_id, cmplqid);
                if (err)
                        goto freeout;
        }
                     RSSCONTROL_V(netdev2pinfo(adap->port[0])->tx_chan) |
                     QUEUENUMBER_V(s->ethrxq[0].rspq.abs_id));
        return 0;
 +freeout:
 +      t4_free_sge_resources(adap);
 +      return err;
  }
  
  /*
@@@ -939,6 -1197,151 +939,6 @@@ void t4_free_mem(void *addr
        kvfree(addr);
  }
  
 -/* Send a Work Request to write the filter at a specified index.  We construct
 - * a Firmware Filter Work Request to have the work done and put the indicated
 - * filter into "pending" mode which will prevent any further actions against
 - * it till we get a reply from the firmware on the completion status of the
 - * request.
 - */
 -static int set_filter_wr(struct adapter *adapter, int fidx)
 -{
 -      struct filter_entry *f = &adapter->tids.ftid_tab[fidx];
 -      struct sk_buff *skb;
 -      struct fw_filter_wr *fwr;
 -      unsigned int ftid;
 -
 -      skb = alloc_skb(sizeof(*fwr), GFP_KERNEL);
 -      if (!skb)
 -              return -ENOMEM;
 -
 -      /* If the new filter requires loopback Destination MAC and/or VLAN
 -       * rewriting then we need to allocate a Layer 2 Table (L2T) entry for
 -       * the filter.
 -       */
 -      if (f->fs.newdmac || f->fs.newvlan) {
 -              /* allocate L2T entry for new filter */
 -              f->l2t = t4_l2t_alloc_switching(adapter, f->fs.vlan,
 -                                              f->fs.eport, f->fs.dmac);
 -              if (f->l2t == NULL) {
 -                      kfree_skb(skb);
 -                      return -ENOMEM;
 -              }
 -      }
 -
 -      ftid = adapter->tids.ftid_base + fidx;
 -
 -      fwr = (struct fw_filter_wr *)__skb_put(skb, sizeof(*fwr));
 -      memset(fwr, 0, sizeof(*fwr));
 -
 -      /* It would be nice to put most of the following in t4_hw.c but most
 -       * of the work is translating the cxgbtool ch_filter_specification
 -       * into the Work Request and the definition of that structure is
 -       * currently in cxgbtool.h which isn't appropriate to pull into the
 -       * common code.  We may eventually try to come up with a more neutral
 -       * filter specification structure but for now it's easiest to simply
 -       * put this fairly direct code in line ...
 -       */
 -      fwr->op_pkd = htonl(FW_WR_OP_V(FW_FILTER_WR));
 -      fwr->len16_pkd = htonl(FW_WR_LEN16_V(sizeof(*fwr)/16));
 -      fwr->tid_to_iq =
 -              htonl(FW_FILTER_WR_TID_V(ftid) |
 -                    FW_FILTER_WR_RQTYPE_V(f->fs.type) |
 -                    FW_FILTER_WR_NOREPLY_V(0) |
 -                    FW_FILTER_WR_IQ_V(f->fs.iq));
 -      fwr->del_filter_to_l2tix =
 -              htonl(FW_FILTER_WR_RPTTID_V(f->fs.rpttid) |
 -                    FW_FILTER_WR_DROP_V(f->fs.action == FILTER_DROP) |
 -                    FW_FILTER_WR_DIRSTEER_V(f->fs.dirsteer) |
 -                    FW_FILTER_WR_MASKHASH_V(f->fs.maskhash) |
 -                    FW_FILTER_WR_DIRSTEERHASH_V(f->fs.dirsteerhash) |
 -                    FW_FILTER_WR_LPBK_V(f->fs.action == FILTER_SWITCH) |
 -                    FW_FILTER_WR_DMAC_V(f->fs.newdmac) |
 -                    FW_FILTER_WR_SMAC_V(f->fs.newsmac) |
 -                    FW_FILTER_WR_INSVLAN_V(f->fs.newvlan == VLAN_INSERT ||
 -                                           f->fs.newvlan == VLAN_REWRITE) |
 -                    FW_FILTER_WR_RMVLAN_V(f->fs.newvlan == VLAN_REMOVE ||
 -                                          f->fs.newvlan == VLAN_REWRITE) |
 -                    FW_FILTER_WR_HITCNTS_V(f->fs.hitcnts) |
 -                    FW_FILTER_WR_TXCHAN_V(f->fs.eport) |
 -                    FW_FILTER_WR_PRIO_V(f->fs.prio) |
 -                    FW_FILTER_WR_L2TIX_V(f->l2t ? f->l2t->idx : 0));
 -      fwr->ethtype = htons(f->fs.val.ethtype);
 -      fwr->ethtypem = htons(f->fs.mask.ethtype);
 -      fwr->frag_to_ovlan_vldm =
 -              (FW_FILTER_WR_FRAG_V(f->fs.val.frag) |
 -               FW_FILTER_WR_FRAGM_V(f->fs.mask.frag) |
 -               FW_FILTER_WR_IVLAN_VLD_V(f->fs.val.ivlan_vld) |
 -               FW_FILTER_WR_OVLAN_VLD_V(f->fs.val.ovlan_vld) |
 -               FW_FILTER_WR_IVLAN_VLDM_V(f->fs.mask.ivlan_vld) |
 -               FW_FILTER_WR_OVLAN_VLDM_V(f->fs.mask.ovlan_vld));
 -      fwr->smac_sel = 0;
 -      fwr->rx_chan_rx_rpl_iq =
 -              htons(FW_FILTER_WR_RX_CHAN_V(0) |
 -                    FW_FILTER_WR_RX_RPL_IQ_V(adapter->sge.fw_evtq.abs_id));
 -      fwr->maci_to_matchtypem =
 -              htonl(FW_FILTER_WR_MACI_V(f->fs.val.macidx) |
 -                    FW_FILTER_WR_MACIM_V(f->fs.mask.macidx) |
 -                    FW_FILTER_WR_FCOE_V(f->fs.val.fcoe) |
 -                    FW_FILTER_WR_FCOEM_V(f->fs.mask.fcoe) |
 -                    FW_FILTER_WR_PORT_V(f->fs.val.iport) |
 -                    FW_FILTER_WR_PORTM_V(f->fs.mask.iport) |
 -                    FW_FILTER_WR_MATCHTYPE_V(f->fs.val.matchtype) |
 -                    FW_FILTER_WR_MATCHTYPEM_V(f->fs.mask.matchtype));
 -      fwr->ptcl = f->fs.val.proto;
 -      fwr->ptclm = f->fs.mask.proto;
 -      fwr->ttyp = f->fs.val.tos;
 -      fwr->ttypm = f->fs.mask.tos;
 -      fwr->ivlan = htons(f->fs.val.ivlan);
 -      fwr->ivlanm = htons(f->fs.mask.ivlan);
 -      fwr->ovlan = htons(f->fs.val.ovlan);
 -      fwr->ovlanm = htons(f->fs.mask.ovlan);
 -      memcpy(fwr->lip, f->fs.val.lip, sizeof(fwr->lip));
 -      memcpy(fwr->lipm, f->fs.mask.lip, sizeof(fwr->lipm));
 -      memcpy(fwr->fip, f->fs.val.fip, sizeof(fwr->fip));
 -      memcpy(fwr->fipm, f->fs.mask.fip, sizeof(fwr->fipm));
 -      fwr->lp = htons(f->fs.val.lport);
 -      fwr->lpm = htons(f->fs.mask.lport);
 -      fwr->fp = htons(f->fs.val.fport);
 -      fwr->fpm = htons(f->fs.mask.fport);
 -      if (f->fs.newsmac)
 -              memcpy(fwr->sma, f->fs.smac, sizeof(fwr->sma));
 -
 -      /* Mark the filter as "pending" and ship off the Filter Work Request.
 -       * When we get the Work Request Reply we'll clear the pending status.
 -       */
 -      f->pending = 1;
 -      set_wr_txq(skb, CPL_PRIORITY_CONTROL, f->fs.val.iport & 0x3);
 -      t4_ofld_send(adapter, skb);
 -      return 0;
 -}
 -
 -/* Delete the filter at a specified index.
 - */
 -static int del_filter_wr(struct adapter *adapter, int fidx)
 -{
 -      struct filter_entry *f = &adapter->tids.ftid_tab[fidx];
 -      struct sk_buff *skb;
 -      struct fw_filter_wr *fwr;
 -      unsigned int len, ftid;
 -
 -      len = sizeof(*fwr);
 -      ftid = adapter->tids.ftid_base + fidx;
 -
 -      skb = alloc_skb(len, GFP_KERNEL);
 -      if (!skb)
 -              return -ENOMEM;
 -
 -      fwr = (struct fw_filter_wr *)__skb_put(skb, len);
 -      t4_mk_filtdelwr(ftid, fwr, adapter->sge.fw_evtq.abs_id);
 -
 -      /* Mark the filter as "pending" and ship off the Filter Work Request.
 -       * When we get the Work Request Reply we'll clear the pending status.
 -       */
 -      f->pending = 1;
 -      t4_mgmt_tx(adapter, skb);
 -      return 0;
 -}
 -
  static u16 cxgb_select_queue(struct net_device *dev, struct sk_buff *skb,
                             void *accel_priv, select_queue_fallback_t fallback)
  {
@@@ -1320,22 -1723,19 +1320,22 @@@ EXPORT_SYMBOL(cxgb4_remove_tid)
   */
  static int tid_init(struct tid_info *t)
  {
 -      size_t size;
 -      unsigned int stid_bmap_size;
 -      unsigned int natids = t->natids;
        struct adapter *adap = container_of(t, struct adapter, tids);
 +      unsigned int max_ftids = t->nftids + t->nsftids;
 +      unsigned int natids = t->natids;
 +      unsigned int stid_bmap_size;
 +      unsigned int ftid_bmap_size;
 +      size_t size;
  
        stid_bmap_size = BITS_TO_LONGS(t->nstids + t->nsftids);
 +      ftid_bmap_size = BITS_TO_LONGS(t->nftids);
        size = t->ntids * sizeof(*t->tid_tab) +
               natids * sizeof(*t->atid_tab) +
               t->nstids * sizeof(*t->stid_tab) +
               t->nsftids * sizeof(*t->stid_tab) +
               stid_bmap_size * sizeof(long) +
 -             t->nftids * sizeof(*t->ftid_tab) +
 -             t->nsftids * sizeof(*t->ftid_tab);
 +             max_ftids * sizeof(*t->ftid_tab) +
 +             ftid_bmap_size * sizeof(long);
  
        t->tid_tab = t4_alloc_mem(size);
        if (!t->tid_tab)
        t->stid_tab = (struct serv_entry *)&t->atid_tab[natids];
        t->stid_bmap = (unsigned long *)&t->stid_tab[t->nstids + t->nsftids];
        t->ftid_tab = (struct filter_entry *)&t->stid_bmap[stid_bmap_size];
 +      t->ftid_bmap = (unsigned long *)&t->ftid_tab[max_ftids];
        spin_lock_init(&t->stid_lock);
        spin_lock_init(&t->atid_lock);
 +      spin_lock_init(&t->ftid_lock);
  
        t->stids_in_use = 0;
        t->sftids_in_use = 0;
                        t->atid_tab[natids - 1].next = &t->atid_tab[natids];
                t->afree = t->atid_tab;
        }
 -      bitmap_zero(t->stid_bmap, t->nstids + t->nsftids);
 -      /* Reserve stid 0 for T4/T5 adapters */
 -      if (!t->stid_base &&
 -          (CHELSIO_CHIP_VERSION(adap->params.chip) <= CHELSIO_T5))
 -              __set_bit(0, t->stid_bmap);
  
 +      if (is_offload(adap)) {
 +              bitmap_zero(t->stid_bmap, t->nstids + t->nsftids);
 +              /* Reserve stid 0 for T4/T5 adapters */
 +              if (!t->stid_base &&
 +                  CHELSIO_CHIP_VERSION(adap->params.chip) <= CHELSIO_T5)
 +                      __set_bit(0, t->stid_bmap);
 +      }
 +
 +      bitmap_zero(t->ftid_bmap, t->nftids);
        return 0;
  }
  
@@@ -1922,7 -2316,7 +1922,7 @@@ static void disable_dbs(struct adapter 
  
        for_each_ethrxq(&adap->sge, i)
                disable_txq_db(&adap->sge.ethtxq[i].q);
 -      for_each_iscsirxq(&adap->sge, i)
 +      for_each_ofldtxq(&adap->sge, i)
                disable_txq_db(&adap->sge.ofldtxq[i].q);
        for_each_port(adap, i)
                disable_txq_db(&adap->sge.ctrlq[i].q);
@@@ -1934,7 -2328,7 +1934,7 @@@ static void enable_dbs(struct adapter *
  
        for_each_ethrxq(&adap->sge, i)
                enable_txq_db(adap, &adap->sge.ethtxq[i].q);
 -      for_each_iscsirxq(&adap->sge, i)
 +      for_each_ofldtxq(&adap->sge, i)
                enable_txq_db(adap, &adap->sge.ofldtxq[i].q);
        for_each_port(adap, i)
                enable_txq_db(adap, &adap->sge.ctrlq[i].q);
  
  static void notify_rdma_uld(struct adapter *adap, enum cxgb4_control cmd)
  {
 -      if (adap->uld_handle[CXGB4_ULD_RDMA])
 -              ulds[CXGB4_ULD_RDMA].control(adap->uld_handle[CXGB4_ULD_RDMA],
 -                              cmd);
 +      enum cxgb4_uld type = CXGB4_ULD_RDMA;
 +
 +      if (adap->uld && adap->uld[type].handle)
 +              adap->uld[type].control(adap->uld[type].handle, cmd);
  }
  
  static void process_db_full(struct work_struct *work)
        if (ret)
                CH_WARN(adap, "DB drop recovery failed.\n");
  }
 +
  static void recover_all_queues(struct adapter *adap)
  {
        int i;
  
        for_each_ethrxq(&adap->sge, i)
                sync_txq_pidx(adap, &adap->sge.ethtxq[i].q);
 -      for_each_iscsirxq(&adap->sge, i)
 +      for_each_ofldtxq(&adap->sge, i)
                sync_txq_pidx(adap, &adap->sge.ofldtxq[i].q);
        for_each_port(adap, i)
                sync_txq_pidx(adap, &adap->sge.ctrlq[i].q);
@@@ -2071,12 -2463,95 +2071,12 @@@ void t4_db_dropped(struct adapter *adap
        queue_work(adap->workq, &adap->db_drop_task);
  }
  
 -static void uld_attach(struct adapter *adap, unsigned int uld)
 -{
 -      void *handle;
 -      struct cxgb4_lld_info lli;
 -      unsigned short i;
 -
 -      lli.pdev = adap->pdev;
 -      lli.pf = adap->pf;
 -      lli.l2t = adap->l2t;
 -      lli.tids = &adap->tids;
 -      lli.ports = adap->port;
 -      lli.vr = &adap->vres;
 -      lli.mtus = adap->params.mtus;
 -      if (uld == CXGB4_ULD_RDMA) {
 -              lli.rxq_ids = adap->sge.rdma_rxq;
 -              lli.ciq_ids = adap->sge.rdma_ciq;
 -              lli.nrxq = adap->sge.rdmaqs;
 -              lli.nciq = adap->sge.rdmaciqs;
 -      } else if (uld == CXGB4_ULD_ISCSI) {
 -              lli.rxq_ids = adap->sge.iscsi_rxq;
 -              lli.nrxq = adap->sge.iscsiqsets;
 -      } else if (uld == CXGB4_ULD_ISCSIT) {
 -              lli.rxq_ids = adap->sge.iscsit_rxq;
 -              lli.nrxq = adap->sge.niscsitq;
 -      }
 -      lli.ntxq = adap->sge.iscsiqsets;
 -      lli.nchan = adap->params.nports;
 -      lli.nports = adap->params.nports;
 -      lli.wr_cred = adap->params.ofldq_wr_cred;
 -      lli.adapter_type = adap->params.chip;
 -      lli.iscsi_iolen = MAXRXDATA_G(t4_read_reg(adap, TP_PARA_REG2_A));
 -      lli.iscsi_tagmask = t4_read_reg(adap, ULP_RX_ISCSI_TAGMASK_A);
 -      lli.iscsi_pgsz_order = t4_read_reg(adap, ULP_RX_ISCSI_PSZ_A);
 -      lli.iscsi_llimit = t4_read_reg(adap, ULP_RX_ISCSI_LLIMIT_A);
 -      lli.iscsi_ppm = &adap->iscsi_ppm;
 -      lli.cclk_ps = 1000000000 / adap->params.vpd.cclk;
 -      lli.udb_density = 1 << adap->params.sge.eq_qpp;
 -      lli.ucq_density = 1 << adap->params.sge.iq_qpp;
 -      lli.filt_mode = adap->params.tp.vlan_pri_map;
 -      /* MODQ_REQ_MAP sets queues 0-3 to chan 0-3 */
 -      for (i = 0; i < NCHAN; i++)
 -              lli.tx_modq[i] = i;
 -      lli.gts_reg = adap->regs + MYPF_REG(SGE_PF_GTS_A);
 -      lli.db_reg = adap->regs + MYPF_REG(SGE_PF_KDOORBELL_A);
 -      lli.fw_vers = adap->params.fw_vers;
 -      lli.dbfifo_int_thresh = dbfifo_int_thresh;
 -      lli.sge_ingpadboundary = adap->sge.fl_align;
 -      lli.sge_egrstatuspagesize = adap->sge.stat_len;
 -      lli.sge_pktshift = adap->sge.pktshift;
 -      lli.enable_fw_ofld_conn = adap->flags & FW_OFLD_CONN;
 -      lli.max_ordird_qp = adap->params.max_ordird_qp;
 -      lli.max_ird_adapter = adap->params.max_ird_adapter;
 -      lli.ulptx_memwrite_dsgl = adap->params.ulptx_memwrite_dsgl;
 -      lli.nodeid = dev_to_node(adap->pdev_dev);
 -      lli.fr_nsmr_tpte_wr_support = adap->params.fr_nsmr_tpte_wr_support;
 -
 -      handle = ulds[uld].add(&lli);
 -      if (IS_ERR(handle)) {
 -              dev_warn(adap->pdev_dev,
 -                       "could not attach to the %s driver, error %ld\n",
 -                       uld_str[uld], PTR_ERR(handle));
 -              return;
 -      }
 -
 -      adap->uld_handle[uld] = handle;
 -
 +void t4_register_netevent_notifier(void)
 +{
        if (!netevent_registered) {
                register_netevent_notifier(&cxgb4_netevent_nb);
                netevent_registered = true;
        }
 -
 -      if (adap->flags & FULL_INIT_DONE)
 -              ulds[uld].state_change(handle, CXGB4_STATE_UP);
 -}
 -
 -static void attach_ulds(struct adapter *adap)
 -{
 -      unsigned int i;
 -
 -      spin_lock(&adap_rcu_lock);
 -      list_add_tail_rcu(&adap->rcu_node, &adap_rcu_list);
 -      spin_unlock(&adap_rcu_lock);
 -
 -      mutex_lock(&uld_mutex);
 -      list_add_tail(&adap->list_node, &adapter_list);
 -      for (i = 0; i < CXGB4_ULD_MAX; i++)
 -              if (ulds[i].add)
 -                      uld_attach(adap, i);
 -      mutex_unlock(&uld_mutex);
  }
  
  static void detach_ulds(struct adapter *adap)
        mutex_lock(&uld_mutex);
        list_del(&adap->list_node);
        for (i = 0; i < CXGB4_ULD_MAX; i++)
 -              if (adap->uld_handle[i]) {
 -                      ulds[i].state_change(adap->uld_handle[i],
 -                                           CXGB4_STATE_DETACH);
 -                      adap->uld_handle[i] = NULL;
 -              }
 -      for (i = 0; i < CXGB4_PCI_ULD_MAX; i++)
                if (adap->uld && adap->uld[i].handle) {
                        adap->uld[i].state_change(adap->uld[i].handle,
                                             CXGB4_STATE_DETACH);
                netevent_registered = false;
        }
        mutex_unlock(&uld_mutex);
 -
 -      spin_lock(&adap_rcu_lock);
 -      list_del_rcu(&adap->rcu_node);
 -      spin_unlock(&adap_rcu_lock);
  }
  
  static void notify_ulds(struct adapter *adap, enum cxgb4_state new_state)
  
        mutex_lock(&uld_mutex);
        for (i = 0; i < CXGB4_ULD_MAX; i++)
 -              if (adap->uld_handle[i])
 -                      ulds[i].state_change(adap->uld_handle[i], new_state);
 -      for (i = 0; i < CXGB4_PCI_ULD_MAX; i++)
                if (adap->uld && adap->uld[i].handle)
                        adap->uld[i].state_change(adap->uld[i].handle,
                                                  new_state);
        mutex_unlock(&uld_mutex);
  }
  
 -/**
 - *    cxgb4_register_uld - register an upper-layer driver
 - *    @type: the ULD type
 - *    @p: the ULD methods
 - *
 - *    Registers an upper-layer driver with this driver and notifies the ULD
 - *    about any presently available devices that support its type.  Returns
 - *    %-EBUSY if a ULD of the same type is already registered.
 - */
 -int cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p)
 -{
 -      int ret = 0;
 -      struct adapter *adap;
 -
 -      if (type >= CXGB4_ULD_MAX)
 -              return -EINVAL;
 -      mutex_lock(&uld_mutex);
 -      if (ulds[type].add) {
 -              ret = -EBUSY;
 -              goto out;
 -      }
 -      ulds[type] = *p;
 -      list_for_each_entry(adap, &adapter_list, list_node)
 -              uld_attach(adap, type);
 -out:  mutex_unlock(&uld_mutex);
 -      return ret;
 -}
 -EXPORT_SYMBOL(cxgb4_register_uld);
 -
 -/**
 - *    cxgb4_unregister_uld - unregister an upper-layer driver
 - *    @type: the ULD type
 - *
 - *    Unregisters an existing upper-layer driver.
 - */
 -int cxgb4_unregister_uld(enum cxgb4_uld type)
 -{
 -      struct adapter *adap;
 -
 -      if (type >= CXGB4_ULD_MAX)
 -              return -EINVAL;
 -      mutex_lock(&uld_mutex);
 -      list_for_each_entry(adap, &adapter_list, list_node)
 -              adap->uld_handle[type] = NULL;
 -      ulds[type].add = NULL;
 -      mutex_unlock(&uld_mutex);
 -      return 0;
 -}
 -EXPORT_SYMBOL(cxgb4_unregister_uld);
 -
  #if IS_ENABLED(CONFIG_IPV6)
  static int cxgb4_inet6addr_handler(struct notifier_block *this,
                                   unsigned long event, void *data)
@@@ -2214,6 -2752,7 +2214,6 @@@ static int cxgb_up(struct adapter *adap
                                  adap->msix_info[0].desc, adap);
                if (err)
                        goto irq_err;
 -
                err = request_msix_queue_irqs(adap);
                if (err) {
                        free_irq(adap->msix_info[0].vec, adap);
@@@ -2291,6 -2830,40 +2291,6 @@@ static int cxgb_close(struct net_devic
        return t4_enable_vi(adapter, adapter->pf, pi->viid, false, false);
  }
  
 -/* Return an error number if the indicated filter isn't writable ...
 - */
 -static int writable_filter(struct filter_entry *f)
 -{
 -      if (f->locked)
 -              return -EPERM;
 -      if (f->pending)
 -              return -EBUSY;
 -
 -      return 0;
 -}
 -
 -/* Delete the filter at the specified index (if valid).  The checks for all
 - * the common problems with doing this like the filter being locked, currently
 - * pending in another operation, etc.
 - */
 -static int delete_filter(struct adapter *adapter, unsigned int fidx)
 -{
 -      struct filter_entry *f;
 -      int ret;
 -
 -      if (fidx >= adapter->tids.nftids + adapter->tids.nsftids)
 -              return -EINVAL;
 -
 -      f = &adapter->tids.ftid_tab[fidx];
 -      ret = writable_filter(f);
 -      if (ret)
 -              return ret;
 -      if (f->valid)
 -              return del_filter_wr(adapter, fidx);
 -
 -      return 0;
 -}
 -
  int cxgb4_create_server_filter(const struct net_device *dev, unsigned int stid,
                __be32 sip, __be16 sport, __be16 vlan,
                unsigned int queue, unsigned char port, unsigned char mask)
@@@ -2512,53 -3085,10 +2512,53 @@@ static int cxgb_change_mtu(struct net_d
  }
  
  #ifdef CONFIG_PCI_IOV
 +static int dummy_open(struct net_device *dev)
 +{
 +      /* Turn carrier off since we don't have to transmit anything on this
 +       * interface.
 +       */
 +      netif_carrier_off(dev);
 +      return 0;
 +}
 +
 +/* Fill MAC address that will be assigned by the FW */
 +static void fill_vf_station_mac_addr(struct adapter *adap)
 +{
 +      unsigned int i;
 +      u8 hw_addr[ETH_ALEN], macaddr[ETH_ALEN];
 +      int err;
 +      u8 *na;
 +      u16 a, b;
 +
 +      err = t4_get_raw_vpd_params(adap, &adap->params.vpd);
 +      if (!err) {
 +              na = adap->params.vpd.na;
 +              for (i = 0; i < ETH_ALEN; i++)
 +                      hw_addr[i] = (hex2val(na[2 * i + 0]) * 16 +
 +                                    hex2val(na[2 * i + 1]));
 +              a = (hw_addr[0] << 8) | hw_addr[1];
 +              b = (hw_addr[1] << 8) | hw_addr[2];
 +              a ^= b;
 +              a |= 0x0200;    /* locally assigned Ethernet MAC address */
 +              a &= ~0x0100;   /* not a multicast Ethernet MAC address */
 +              macaddr[0] = a >> 8;
 +              macaddr[1] = a & 0xff;
 +
 +              for (i = 2; i < 5; i++)
 +                      macaddr[i] = hw_addr[i + 1];
 +
 +              for (i = 0; i < adap->num_vfs; i++) {
 +                      macaddr[5] = adap->pf * 16 + i;
 +                      ether_addr_copy(adap->vfinfo[i].vf_mac_addr, macaddr);
 +              }
 +      }
 +}
 +
  static int cxgb_set_vf_mac(struct net_device *dev, int vf, u8 *mac)
  {
        struct port_info *pi = netdev_priv(dev);
        struct adapter *adap = pi->adapter;
 +      int ret;
  
        /* verify MAC addr is valid */
        if (!is_valid_ether_addr(mac)) {
  
        dev_info(pi->adapter->pdev_dev,
                 "Setting MAC %pM on VF %d\n", mac, vf);
 -      return t4_set_vf_mac_acl(adap, vf + 1, 1, mac);
 +      ret = t4_set_vf_mac_acl(adap, vf + 1, 1, mac);
 +      if (!ret)
 +              ether_addr_copy(adap->vfinfo[vf].vf_mac_addr, mac);
 +      return ret;
 +}
 +
 +static int cxgb_get_vf_config(struct net_device *dev,
 +                            int vf, struct ifla_vf_info *ivi)
 +{
 +      struct port_info *pi = netdev_priv(dev);
 +      struct adapter *adap = pi->adapter;
 +
 +      if (vf >= adap->num_vfs)
 +              return -EINVAL;
 +      ivi->vf = vf;
 +      ether_addr_copy(ivi->mac, adap->vfinfo[vf].vf_mac_addr);
 +      return 0;
  }
  #endif
  
@@@ -2626,116 -3140,6 +2626,116 @@@ static void cxgb_netpoll(struct net_dev
  }
  #endif
  
 +static int cxgb_set_tx_maxrate(struct net_device *dev, int index, u32 rate)
 +{
 +      struct port_info *pi = netdev_priv(dev);
 +      struct adapter *adap = pi->adapter;
 +      struct sched_class *e;
 +      struct ch_sched_params p;
 +      struct ch_sched_queue qe;
 +      u32 req_rate;
 +      int err = 0;
 +
 +      if (!can_sched(dev))
 +              return -ENOTSUPP;
 +
 +      if (index < 0 || index > pi->nqsets - 1)
 +              return -EINVAL;
 +
 +      if (!(adap->flags & FULL_INIT_DONE)) {
 +              dev_err(adap->pdev_dev,
 +                      "Failed to rate limit on queue %d. Link Down?\n",
 +                      index);
 +              return -EINVAL;
 +      }
 +
 +      /* Convert from Mbps to Kbps */
 +      req_rate = rate << 10;
 +
 +      /* Max rate is 10 Gbps */
 +      if (req_rate >= SCHED_MAX_RATE_KBPS) {
 +              dev_err(adap->pdev_dev,
 +                      "Invalid rate %u Mbps, Max rate is %u Gbps\n",
 +                      rate, SCHED_MAX_RATE_KBPS);
 +              return -ERANGE;
 +      }
 +
 +      /* First unbind the queue from any existing class */
 +      memset(&qe, 0, sizeof(qe));
 +      qe.queue = index;
 +      qe.class = SCHED_CLS_NONE;
 +
 +      err = cxgb4_sched_class_unbind(dev, (void *)(&qe), SCHED_QUEUE);
 +      if (err) {
 +              dev_err(adap->pdev_dev,
 +                      "Unbinding Queue %d on port %d fail. Err: %d\n",
 +                      index, pi->port_id, err);
 +              return err;
 +      }
 +
 +      /* Queue already unbound */
 +      if (!req_rate)
 +              return 0;
 +
 +      /* Fetch any available unused or matching scheduling class */
 +      memset(&p, 0, sizeof(p));
 +      p.type = SCHED_CLASS_TYPE_PACKET;
 +      p.u.params.level    = SCHED_CLASS_LEVEL_CL_RL;
 +      p.u.params.mode     = SCHED_CLASS_MODE_CLASS;
 +      p.u.params.rateunit = SCHED_CLASS_RATEUNIT_BITS;
 +      p.u.params.ratemode = SCHED_CLASS_RATEMODE_ABS;
 +      p.u.params.channel  = pi->tx_chan;
 +      p.u.params.class    = SCHED_CLS_NONE;
 +      p.u.params.minrate  = 0;
 +      p.u.params.maxrate  = req_rate;
 +      p.u.params.weight   = 0;
 +      p.u.params.pktsize  = dev->mtu;
 +
 +      e = cxgb4_sched_class_alloc(dev, &p);
 +      if (!e)
 +              return -ENOMEM;
 +
 +      /* Bind the queue to a scheduling class */
 +      memset(&qe, 0, sizeof(qe));
 +      qe.queue = index;
 +      qe.class = e->idx;
 +
 +      err = cxgb4_sched_class_bind(dev, (void *)(&qe), SCHED_QUEUE);
 +      if (err)
 +              dev_err(adap->pdev_dev,
 +                      "Queue rate limiting failed. Err: %d\n", err);
 +      return err;
 +}
 +
 +static int cxgb_setup_tc(struct net_device *dev, u32 handle, __be16 proto,
 +                       struct tc_to_netdev *tc)
 +{
 +      struct port_info *pi = netdev2pinfo(dev);
 +      struct adapter *adap = netdev2adap(dev);
 +
 +      if (!(adap->flags & FULL_INIT_DONE)) {
 +              dev_err(adap->pdev_dev,
 +                      "Failed to setup tc on port %d. Link Down?\n",
 +                      pi->port_id);
 +              return -EINVAL;
 +      }
 +
 +      if (TC_H_MAJ(handle) == TC_H_MAJ(TC_H_INGRESS) &&
 +          tc->type == TC_SETUP_CLSU32) {
 +              switch (tc->cls_u32->command) {
 +              case TC_CLSU32_NEW_KNODE:
 +              case TC_CLSU32_REPLACE_KNODE:
 +                      return cxgb4_config_knode(dev, proto, tc->cls_u32);
 +              case TC_CLSU32_DELETE_KNODE:
 +                      return cxgb4_delete_knode(dev, proto, tc->cls_u32);
 +              default:
 +                      return -EOPNOTSUPP;
 +              }
 +      }
 +
 +      return -EOPNOTSUPP;
 +}
 +
  static const struct net_device_ops cxgb4_netdev_ops = {
        .ndo_open             = cxgb_open,
        .ndo_stop             = cxgb_close,
  #ifdef CONFIG_NET_RX_BUSY_POLL
        .ndo_busy_poll        = cxgb_busy_poll,
  #endif
 +      .ndo_set_tx_maxrate   = cxgb_set_tx_maxrate,
 +      .ndo_setup_tc         = cxgb_setup_tc,
  };
  
 -static const struct net_device_ops cxgb4_mgmt_netdev_ops = {
  #ifdef CONFIG_PCI_IOV
 +static const struct net_device_ops cxgb4_mgmt_netdev_ops = {
 +      .ndo_open             = dummy_open,
        .ndo_set_vf_mac       = cxgb_set_vf_mac,
 -#endif
 +      .ndo_get_vf_config    = cxgb_get_vf_config,
  };
 +#endif
  
  static void get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
  {
@@@ -3625,12 -4025,6 +3625,12 @@@ static int adap_init0(struct adapter *a
        adap->clipt_start = val[0];
        adap->clipt_end = val[1];
  
 +      /* We don't yet have a PARAMs calls to retrieve the number of Traffic
 +       * Classes supported by the hardware/firmware so we hard code it here
 +       * for now.
 +       */
 +      adap->params.nsched_cls = is_t4(adap->params.chip) ? 15 : 16;
 +
        /* query params related to active filter region */
        params[0] = FW_PARAM_PFVF(ACTIVE_FILTER_START);
        params[1] = FW_PARAM_PFVF(ACTIVE_FILTER_END);
                adap->params.ulptx_memwrite_dsgl = (ret == 0 && val[0] != 0);
        }
  
+       /* See if FW supports FW_RI_FR_NSMR_TPTE_WR work request */
+       params[0] = FW_PARAM_DEV(RI_FR_NSMR_TPTE_WR);
+       ret = t4_query_params(adap, adap->mbox, adap->pf, 0,
+                             1, params, val);
+       adap->params.fr_nsmr_tpte_wr_support = (ret == 0 && val[0] != 0);
        /*
         * Get device capabilities so we can determine what resources we need
         * to manage.
                adap->params.ofldq_wr_cred = val[5];
  
                adap->params.offload = 1;
 +              adap->num_ofld_uld += 1;
        }
        if (caps_cmd.rdmacaps) {
                params[0] = FW_PARAM_PFVF(STAG_START);
                         "max_ordird_qp %d max_ird_adapter %d\n",
                         adap->params.max_ordird_qp,
                         adap->params.max_ird_adapter);
 +              adap->num_ofld_uld += 2;
        }
        if (caps_cmd.iscsicaps) {
                params[0] = FW_PARAM_PFVF(ISCSI_START);
                        goto bye;
                adap->vres.iscsi.start = val[0];
                adap->vres.iscsi.size = val[1] - val[0] + 1;
 +              /* LIO target and cxgb4i initiaitor */
 +              adap->num_ofld_uld += 2;
        }
        if (caps_cmd.cryptocaps) {
                /* Should query params here...TODO */
@@@ -3966,17 -4362,10 +3972,17 @@@ static const struct pci_error_handlers 
        .resume         = eeh_resume,
  };
  
 +/* Return true if the Link Configuration supports "High Speeds" (those greater
 + * than 1Gb/s).
 + */
  static inline bool is_x_10g_port(const struct link_config *lc)
  {
 -      return (lc->supported & FW_PORT_CAP_SPEED_10G) != 0 ||
 -             (lc->supported & FW_PORT_CAP_SPEED_40G) != 0;
 +      unsigned int speeds, high_speeds;
 +
 +      speeds = FW_PORT_CAP_SPEED_V(FW_PORT_CAP_SPEED_G(lc->supported));
 +      high_speeds = speeds & ~(FW_PORT_CAP_SPEED_100M | FW_PORT_CAP_SPEED_1G);
 +
 +      return high_speeds != 0;
  }
  
  /*
@@@ -3991,14 -4380,14 +3997,14 @@@ static void cfg_queues(struct adapter *
  #ifndef CONFIG_CHELSIO_T4_DCB
        int q10g = 0;
  #endif
 -      int ciq_size;
  
        /* Reduce memory usage in kdump environment, disable all offload.
         */
        if (is_kdump_kernel()) {
                adap->params.offload = 0;
                adap->params.crypto = 0;
 -      } else if (adap->num_uld && uld_mem_alloc(adap)) {
 +      } else if (is_uld(adap) && t4_uld_mem_alloc(adap)) {
 +              adap->params.offload = 0;
                adap->params.crypto = 0;
        }
  
        s->ethqsets = qidx;
        s->max_ethqsets = qidx;   /* MSI-X may lower it later */
  
 -      if (is_offload(adap)) {
 +      if (is_uld(adap)) {
                /*
                 * For offload we use 1 queue/channel if all ports are up to 1G,
                 * otherwise we divide all available queues amongst the channels
                 * capped by the number of available cores.
                 */
                if (n10g) {
 -                      i = min_t(int, ARRAY_SIZE(s->iscsirxq),
 -                                num_online_cpus());
 -                      s->iscsiqsets = roundup(i, adap->params.nports);
 -              } else
 -                      s->iscsiqsets = adap->params.nports;
 -              /* For RDMA one Rx queue per channel suffices */
 -              s->rdmaqs = adap->params.nports;
 -              /* Try and allow at least 1 CIQ per cpu rounding down
 -               * to the number of ports, with a minimum of 1 per port.
 -               * A 2 port card in a 6 cpu system: 6 CIQs, 3 / port.
 -               * A 4 port card in a 6 cpu system: 4 CIQs, 1 / port.
 -               * A 4 port card in a 2 cpu system: 4 CIQs, 1 / port.
 -               */
 -              s->rdmaciqs = min_t(int, MAX_RDMA_CIQS, num_online_cpus());
 -              s->rdmaciqs = (s->rdmaciqs / adap->params.nports) *
 -                              adap->params.nports;
 -              s->rdmaciqs = max_t(int, s->rdmaciqs, adap->params.nports);
 -
 -              if (!is_t4(adap->params.chip))
 -                      s->niscsitq = s->iscsiqsets;
 +                      i = num_online_cpus();
 +                      s->ofldqsets = roundup(i, adap->params.nports);
 +              } else {
 +                      s->ofldqsets = adap->params.nports;
 +              }
        }
  
        for (i = 0; i < ARRAY_SIZE(s->ethrxq); i++) {
        for (i = 0; i < ARRAY_SIZE(s->ofldtxq); i++)
                s->ofldtxq[i].q.size = 1024;
  
 -      for (i = 0; i < ARRAY_SIZE(s->iscsirxq); i++) {
 -              struct sge_ofld_rxq *r = &s->iscsirxq[i];
 -
 -              init_rspq(adap, &r->rspq, 5, 1, 1024, 64);
 -              r->rspq.uld = CXGB4_ULD_ISCSI;
 -              r->fl.size = 72;
 -      }
 -
 -      if (!is_t4(adap->params.chip)) {
 -              for (i = 0; i < ARRAY_SIZE(s->iscsitrxq); i++) {
 -                      struct sge_ofld_rxq *r = &s->iscsitrxq[i];
 -
 -                      init_rspq(adap, &r->rspq, 5, 1, 1024, 64);
 -                      r->rspq.uld = CXGB4_ULD_ISCSIT;
 -                      r->fl.size = 72;
 -              }
 -      }
 -
 -      for (i = 0; i < ARRAY_SIZE(s->rdmarxq); i++) {
 -              struct sge_ofld_rxq *r = &s->rdmarxq[i];
 -
 -              init_rspq(adap, &r->rspq, 5, 1, 511, 64);
 -              r->rspq.uld = CXGB4_ULD_RDMA;
 -              r->fl.size = 72;
 -      }
 -
 -      ciq_size = 64 + adap->vres.cq.size + adap->tids.nftids;
 -      if (ciq_size > SGE_MAX_IQ_SIZE) {
 -              CH_WARN(adap, "CIQ size too small for available IQs\n");
 -              ciq_size = SGE_MAX_IQ_SIZE;
 -      }
 -
 -      for (i = 0; i < ARRAY_SIZE(s->rdmaciq); i++) {
 -              struct sge_ofld_rxq *r = &s->rdmaciq[i];
 -
 -              init_rspq(adap, &r->rspq, 5, 1, ciq_size, 64);
 -              r->rspq.uld = CXGB4_ULD_RDMA;
 -      }
 -
        init_rspq(adap, &s->fw_evtq, 0, 1, 1024, 64);
 -      init_rspq(adap, &s->intrq, 0, 1, 2 * MAX_INGQ, 64);
 +      init_rspq(adap, &s->intrq, 0, 1, 512, 64);
  }
  
  /*
@@@ -4109,15 -4552,7 +4115,15 @@@ static void reduce_ethqs(struct adapte
  static int get_msix_info(struct adapter *adap)
  {
        struct uld_msix_info *msix_info;
 -      int max_ingq = (MAX_OFLD_QSETS * adap->num_uld);
 +      unsigned int max_ingq = 0;
 +
 +      if (is_offload(adap))
 +              max_ingq += MAX_OFLD_QSETS * adap->num_ofld_uld;
 +      if (is_pci_uld(adap))
 +              max_ingq += MAX_OFLD_QSETS * adap->num_uld;
 +
 +      if (!max_ingq)
 +              goto out;
  
        msix_info = kcalloc(max_ingq, sizeof(*msix_info), GFP_KERNEL);
        if (!msix_info)
        }
        spin_lock_init(&adap->msix_bmap_ulds.lock);
        adap->msix_info_ulds = msix_info;
 +out:
        return 0;
  }
  
  static void free_msix_info(struct adapter *adap)
  {
 -      if (!adap->num_uld)
 +      if (!(adap->num_uld && adap->num_ofld_uld))
                return;
  
        kfree(adap->msix_info_ulds);
@@@ -4156,32 -4590,32 +4162,32 @@@ static int enable_msix(struct adapter *
        struct msix_entry *entries;
        int max_ingq = MAX_INGQ;
  
 -      max_ingq += (MAX_OFLD_QSETS * adap->num_uld);
 +      if (is_pci_uld(adap))
 +              max_ingq += (MAX_OFLD_QSETS * adap->num_uld);
 +      if (is_offload(adap))
 +              max_ingq += (MAX_OFLD_QSETS * adap->num_ofld_uld);
        entries = kmalloc(sizeof(*entries) * (max_ingq + 1),
                          GFP_KERNEL);
        if (!entries)
                return -ENOMEM;
  
        /* map for msix */
 -      if (is_pci_uld(adap) && get_msix_info(adap))
 +      if (get_msix_info(adap)) {
 +              adap->params.offload = 0;
                adap->params.crypto = 0;
 +      }
  
        for (i = 0; i < max_ingq + 1; ++i)
                entries[i].entry = i;
  
        want = s->max_ethqsets + EXTRA_VECS;
        if (is_offload(adap)) {
 -              want += s->rdmaqs + s->rdmaciqs + s->iscsiqsets +
 -                      s->niscsitq;
 -              /* need nchan for each possible ULD */
 -              if (is_t4(adap->params.chip))
 -                      ofld_need = 3 * nchan;
 -              else
 -                      ofld_need = 4 * nchan;
 +              want += adap->num_ofld_uld * s->ofldqsets;
 +              ofld_need = adap->num_ofld_uld * nchan;
        }
        if (is_pci_uld(adap)) {
 -              want += netif_get_num_default_rss_queues() * nchan;
 -              uld_need = nchan;
 +              want += adap->num_uld * s->ofldqsets;
 +              uld_need = adap->num_uld * nchan;
        }
  #ifdef CONFIG_CHELSIO_T4_DCB
        /* For Data Center Bridging we need 8 Ethernet TX Priority Queues for
                if (i < s->ethqsets)
                        reduce_ethqs(adap, i);
        }
 -      if (is_pci_uld(adap)) {
 +      if (is_uld(adap)) {
                if (allocated < want)
                        s->nqs_per_uld = nchan;
                else
 -                      s->nqs_per_uld = netif_get_num_default_rss_queues() *
 -                                      nchan;
 -      }
 -
 -      if (is_offload(adap)) {
 -              if (allocated < want) {
 -                      s->rdmaqs = nchan;
 -                      s->rdmaciqs = nchan;
 -
 -                      if (!is_t4(adap->params.chip))
 -                              s->niscsitq = nchan;
 -              }
 -
 -              /* leftovers go to OFLD */
 -              i = allocated - EXTRA_VECS - s->max_ethqsets -
 -                      s->rdmaqs - s->rdmaciqs - s->niscsitq;
 -              if (is_pci_uld(adap))
 -                      i -= s->nqs_per_uld * adap->num_uld;
 -              s->iscsiqsets = (i / nchan) * nchan;  /* round down */
 -
 +                      s->nqs_per_uld = s->ofldqsets;
        }
  
 -      for (i = 0; i < (allocated - (s->nqs_per_uld * adap->num_uld)); ++i)
 +      for (i = 0; i < (s->max_ethqsets + EXTRA_VECS); ++i)
                adap->msix_info[i].vec = entries[i].vector;
 -      if (is_pci_uld(adap)) {
 -              for (j = 0 ; i < allocated; ++i, j++)
 +      if (is_uld(adap)) {
 +              for (j = 0 ; i < allocated; ++i, j++) {
                        adap->msix_info_ulds[j].vec = entries[i].vector;
 +                      adap->msix_info_ulds[j].idx = i;
 +              }
                adap->msix_bmap_ulds.mapsize = j;
        }
        dev_info(adap->pdev_dev, "%d MSI-X vectors allocated, "
 -               "nic %d iscsi %d rdma cpl %d rdma ciq %d uld %d\n",
 -               allocated, s->max_ethqsets, s->iscsiqsets, s->rdmaqs,
 -               s->rdmaciqs, s->nqs_per_uld);
 +               "nic %d per uld %d\n",
 +               allocated, s->max_ethqsets, s->nqs_per_uld);
  
        kfree(entries);
        return 0;
@@@ -4410,12 -4862,8 +4416,12 @@@ static void print_port_info(const struc
                bufp += sprintf(bufp, "1000/");
        if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_10G)
                bufp += sprintf(bufp, "10G/");
 +      if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_25G)
 +              bufp += sprintf(bufp, "25G/");
        if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_40G)
                bufp += sprintf(bufp, "40G/");
 +      if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_100G)
 +              bufp += sprintf(bufp, "100G/");
        if (bufp != buf)
                --bufp;
        sprintf(bufp, "BASE-%s", t4_get_port_type_description(pi->port_type));
@@@ -4441,9 -4889,7 +4447,9 @@@ static void free_some_resources(struct 
        unsigned int i;
  
        t4_free_mem(adapter->l2t);
 +      t4_cleanup_sched(adapter);
        t4_free_mem(adapter->tids.tid_tab);
 +      cxgb4_cleanup_tc_u32(adapter);
        kfree(adapter->sge.egr_map);
        kfree(adapter->sge.ingr_map);
        kfree(adapter->sge.starving_fl);
@@@ -4494,51 -4940,6 +4500,51 @@@ static int get_chip_type(struct pci_de
  }
  
  #ifdef CONFIG_PCI_IOV
 +static void dummy_setup(struct net_device *dev)
 +{
 +      dev->type = ARPHRD_NONE;
 +      dev->mtu = 0;
 +      dev->hard_header_len = 0;
 +      dev->addr_len = 0;
 +      dev->tx_queue_len = 0;
 +      dev->flags |= IFF_NOARP;
 +      dev->priv_flags |= IFF_NO_QUEUE;
 +
 +      /* Initialize the device structure. */
 +      dev->netdev_ops = &cxgb4_mgmt_netdev_ops;
 +      dev->ethtool_ops = &cxgb4_mgmt_ethtool_ops;
 +      dev->destructor = free_netdev;
 +}
 +
 +static int config_mgmt_dev(struct pci_dev *pdev)
 +{
 +      struct adapter *adap = pci_get_drvdata(pdev);
 +      struct net_device *netdev;
 +      struct port_info *pi;
 +      char name[IFNAMSIZ];
 +      int err;
 +
 +      snprintf(name, IFNAMSIZ, "mgmtpf%d%d", adap->adap_idx, adap->pf);
 +      netdev = alloc_netdev(0, name, NET_NAME_UNKNOWN, dummy_setup);
 +      if (!netdev)
 +              return -ENOMEM;
 +
 +      pi = netdev_priv(netdev);
 +      pi->adapter = adap;
 +      SET_NETDEV_DEV(netdev, &pdev->dev);
 +
 +      adap->port[0] = netdev;
 +
 +      err = register_netdev(adap->port[0]);
 +      if (err) {
 +              pr_info("Unable to register VF mgmt netdev %s\n", name);
 +              free_netdev(adap->port[0]);
 +              adap->port[0] = NULL;
 +              return err;
 +      }
 +      return 0;
 +}
 +
  static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs)
  {
        struct adapter *adap = pci_get_drvdata(pdev);
         */
        if (!num_vfs) {
                pci_disable_sriov(pdev);
 -              if (adap->port[0]->reg_state == NETREG_REGISTERED)
 +              if (adap->port[0]) {
                        unregister_netdev(adap->port[0]);
 +                      adap->port[0] = NULL;
 +              }
 +              /* free VF resources */
 +              kfree(adap->vfinfo);
 +              adap->vfinfo = NULL;
 +              adap->num_vfs = 0;
                return num_vfs;
        }
  
                if (err)
                        return err;
  
 -              if (adap->port[0]->reg_state == NETREG_UNINITIALIZED) {
 -                      err = register_netdev(adap->port[0]);
 -                      if (err < 0)
 -                              pr_info("Unable to register VF mgmt netdev\n");
 -              }
 +              adap->num_vfs = num_vfs;
 +              err = config_mgmt_dev(pdev);
 +              if (err)
 +                      return err;
        }
 +
 +      adap->vfinfo = kcalloc(adap->num_vfs,
 +                             sizeof(struct vf_info), GFP_KERNEL);
 +      if (adap->vfinfo)
 +              fill_vf_station_mac_addr(adap);
        return num_vfs;
  }
  #endif
@@@ -4610,6 -5001,9 +4616,6 @@@ static int init_one(struct pci_dev *pde
        bool highdma = false;
        struct adapter *adapter = NULL;
        struct net_device *netdev;
 -#ifdef CONFIG_PCI_IOV
 -      char name[IFNAMSIZ];
 -#endif
        void __iomem *regs;
        u32 whoami, pl_rev;
        enum chip_type chip;
                netdev->hw_features = NETIF_F_SG | TSO_FLAGS |
                        NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
                        NETIF_F_RXCSUM | NETIF_F_RXHASH |
 -                      NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
 +                      NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX |
 +                      NETIF_F_HW_TC;
                if (highdma)
                        netdev->hw_features |= NETIF_F_HIGHDMA;
                netdev->features |= netdev->hw_features;
                }
        }
  #endif
 -      if (is_offload(adapter) && tid_init(&adapter->tids) < 0) {
 +
 +      for_each_port(adapter, i) {
 +              pi = adap2pinfo(adapter, i);
 +              pi->sched_tbl = t4_init_sched(adapter->params.nsched_cls);
 +              if (!pi->sched_tbl)
 +                      dev_warn(&pdev->dev,
 +                               "could not activate scheduling on port %d\n",
 +                               i);
 +      }
 +
 +      if (tid_init(&adapter->tids) < 0) {
                dev_warn(&pdev->dev, "could not allocate TID table, "
                         "continuing\n");
                adapter->params.offload = 0;
 +      } else {
 +              adapter->tc_u32 = cxgb4_init_tc_u32(adapter,
 +                                                  CXGB4_MAX_LINK_HANDLE);
 +              if (!adapter->tc_u32)
 +                      dev_warn(&pdev->dev,
 +                               "could not offload tc u32, continuing\n");
        }
  
        if (is_offload(adapter)) {
        /* PCIe EEH recovery on powerpc platforms needs fundamental reset */
        pdev->needs_freset = 1;
  
 -      if (is_offload(adapter))
 -              attach_ulds(adapter);
 +      if (is_uld(adapter)) {
 +              mutex_lock(&uld_mutex);
 +              list_add_tail(&adapter->list_node, &adapter_list);
 +              mutex_unlock(&uld_mutex);
 +      }
  
        print_adapter_info(adapter);
 +      setup_fw_sge_queues(adapter);
        return 0;
  
  sriov:
                goto free_pci_region;
        }
  
 -      snprintf(name, IFNAMSIZ, "mgmtpf%d%d", adap_idx, func);
 -      netdev = alloc_netdev(0, name, NET_NAME_UNKNOWN, ether_setup);
 -      if (!netdev) {
 -              err = -ENOMEM;
 -              goto free_adapter;
 -      }
 -
        adapter->pdev = pdev;
        adapter->pdev_dev = &pdev->dev;
        adapter->name = pci_name(pdev);
        adapter->mbox = func;
        adapter->pf = func;
        adapter->regs = regs;
 +      adapter->adap_idx = adap_idx;
        adapter->mbox_log = kzalloc(sizeof(*adapter->mbox_log) +
                                    (sizeof(struct mbox_cmd) *
                                     T4_OS_LOG_MBOX_CMDS),
                                    GFP_KERNEL);
        if (!adapter->mbox_log) {
                err = -ENOMEM;
 -              goto free_netdevice;
 +              goto free_adapter;
        }
 -      pi = netdev_priv(netdev);
 -      pi->adapter = adapter;
 -      SET_NETDEV_DEV(netdev, &pdev->dev);
        pci_set_drvdata(pdev, adapter);
 -
 -      adapter->port[0] = netdev;
 -      netdev->netdev_ops = &cxgb4_mgmt_netdev_ops;
 -      netdev->ethtool_ops = &cxgb4_mgmt_ethtool_ops;
 -
        return 0;
  
 - free_netdevice:
 -      free_netdev(adapter->port[0]);
   free_adapter:
        kfree(adapter);
   free_pci_region:
        free_some_resources(adapter);
        if (adapter->flags & USING_MSIX)
                free_msix_info(adapter);
 -      if (adapter->num_uld)
 -              uld_mem_free(adapter);
 +      if (adapter->num_uld || adapter->num_ofld_uld)
 +              t4_uld_mem_free(adapter);
   out_unmap_bar:
        if (!is_t4(adapter->params.chip))
                iounmap(adapter->bar2);
@@@ -5052,7 -5441,7 +5058,7 @@@ static void remove_one(struct pci_dev *
                 */
                destroy_workqueue(adapter->workq);
  
 -              if (is_offload(adapter))
 +              if (is_uld(adapter))
                        detach_ulds(adapter);
  
                disable_interrupts(adapter);
                /* If we allocated filters, free up state associated with any
                 * valid filters ...
                 */
 -              if (adapter->tids.ftid_tab) {
 -                      struct filter_entry *f = &adapter->tids.ftid_tab[0];
 -                      for (i = 0; i < (adapter->tids.nftids +
 -                                      adapter->tids.nsftids); i++, f++)
 -                              if (f->valid)
 -                                      clear_filter(adapter, f);
 -              }
 +              clear_all_filters(adapter);
  
                if (adapter->flags & FULL_INIT_DONE)
                        cxgb_down(adapter);
  
                if (adapter->flags & USING_MSIX)
                        free_msix_info(adapter);
 -              if (adapter->num_uld)
 -                      uld_mem_free(adapter);
 +              if (adapter->num_uld || adapter->num_ofld_uld)
 +                      t4_uld_mem_free(adapter);
                free_some_resources(adapter);
  #if IS_ENABLED(CONFIG_IPV6)
                t4_cleanup_clip_tbl(adapter);
        }
  #ifdef CONFIG_PCI_IOV
        else {
 -              if (adapter->port[0]->reg_state == NETREG_REGISTERED)
 +              if (adapter->port[0])
 +                      unregister_netdev(adapter->port[0]);
 +              iounmap(adapter->regs);
 +              kfree(adapter->vfinfo);
 +              kfree(adapter);
 +              pci_disable_sriov(pdev);
 +              pci_release_regions(pdev);
 +      }
 +#endif
 +}
 +
 +/* "Shutdown" quiesces the device, stopping Ingress Packet and Interrupt
 + * delivery.  This is essentially a stripped down version of the PCI remove()
 + * function where we do the minimal amount of work necessary to shutdown any
 + * further activity.
 + */
 +static void shutdown_one(struct pci_dev *pdev)
 +{
 +      struct adapter *adapter = pci_get_drvdata(pdev);
 +
 +      /* As with remove_one() above (see extended comment), we only want do
 +       * do cleanup on PCI Devices which went all the way through init_one()
 +       * ...
 +       */
 +      if (!adapter) {
 +              pci_release_regions(pdev);
 +              return;
 +      }
 +
 +      if (adapter->pf == 4) {
 +              int i;
 +
 +              for_each_port(adapter, i)
 +                      if (adapter->port[i]->reg_state == NETREG_REGISTERED)
 +                              cxgb_close(adapter->port[i]);
 +
 +              t4_uld_clean_up(adapter);
 +              disable_interrupts(adapter);
 +              disable_msi(adapter);
 +
 +              t4_sge_stop(adapter);
 +              if (adapter->flags & FW_OK)
 +                      t4_fw_bye(adapter, adapter->mbox);
 +      }
 +#ifdef CONFIG_PCI_IOV
 +      else {
 +              if (adapter->port[0])
                        unregister_netdev(adapter->port[0]);
 -              free_netdev(adapter->port[0]);
                iounmap(adapter->regs);
 +              kfree(adapter->vfinfo);
                kfree(adapter);
                pci_disable_sriov(pdev);
                pci_release_regions(pdev);
@@@ -5156,7 -5505,7 +5162,7 @@@ static struct pci_driver cxgb4_driver 
        .id_table = cxgb4_pci_tbl,
        .probe    = init_one,
        .remove   = remove_one,
 -      .shutdown = remove_one,
 +      .shutdown = shutdown_one,
  #ifdef CONFIG_PCI_IOV
        .sriov_configure = cxgb4_iov_configure,
  #endif
index b4b2d20aab3caf07cc92abd152bde317b73af15c,aac6e444abf239a0c71712a4612890fd1741e0d7..0945fa49a5dd83251af4083535b27f081ae277b0
@@@ -36,6 -36,7 +36,6 @@@
   */
  
  #include <linux/kernel.h>
 -#include <linux/version.h>
  #include <linux/module.h>
  #include <linux/errno.h>
  #include <linux/types.h>
@@@ -82,24 -83,6 +82,24 @@@ static void free_msix_idx_in_bmap(struc
        spin_unlock_irqrestore(&bmap->lock, flags);
  }
  
 +/* Flush the aggregated lro sessions */
 +static void uldrx_flush_handler(struct sge_rspq *q)
 +{
 +      struct adapter *adap = q->adap;
 +
 +      if (adap->uld[q->uld].lro_flush)
 +              adap->uld[q->uld].lro_flush(&q->lro_mgr);
 +}
 +
 +/**
 + *    uldrx_handler - response queue handler for ULD queues
 + *    @q: the response queue that received the packet
 + *    @rsp: the response queue descriptor holding the offload message
 + *    @gl: the gather list of packet fragments
 + *
 + *    Deliver an ingress offload packet to a ULD.  All processing is done by
 + *    the ULD, we just maintain statistics.
 + */
  static int uldrx_handler(struct sge_rspq *q, const __be64 *rsp,
                         const struct pkt_gl *gl)
  {
@@@ -142,8 -125,8 +142,8 @@@ static int alloc_uld_rxqs(struct adapte
        struct sge_ofld_rxq *q = rxq_info->uldrxq + offset;
        unsigned short *ids = rxq_info->rspq_id + offset;
        unsigned int per_chan = nq / adap->params.nports;
 -      unsigned int msi_idx, bmap_idx;
 -      int i, err;
 +      unsigned int bmap_idx = 0;
 +      int i, err, msi_idx;
  
        if (adap->flags & USING_MSIX)
                msi_idx = 1;
        for (i = 0; i < nq; i++, q++) {
                if (msi_idx >= 0) {
                        bmap_idx = get_msix_idx_from_bmap(adap);
 -                      adap->msi_idx++;
 +                      msi_idx = adap->msix_info_ulds[bmap_idx].idx;
                }
                err = t4_sge_alloc_rxq(adap, &q->rspq, false,
                                       adap->port[i / per_chan],
 -                                     adap->msi_idx,
 +                                     msi_idx,
                                       q->fl.size ? &q->fl : NULL,
                                       uldrx_handler,
 -                                     NULL,
 +                                     lro ? uldrx_flush_handler : NULL,
                                       0);
                if (err)
                        goto freeout;
@@@ -177,6 -160,7 +177,6 @@@ freeout
                if (q->rspq.desc)
                        free_rspq_fl(adap, &q->rspq,
                                     q->fl.size ? &q->fl : NULL);
 -              adap->msi_idx--;
        }
  
        /* We need to free rxq also in case of ciq allocation failure */
                        if (q->rspq.desc)
                                free_rspq_fl(adap, &q->rspq,
                                             q->fl.size ? &q->fl : NULL);
 -                      adap->msi_idx--;
                }
        }
        return err;
  }
  
 -int setup_sge_queues_uld(struct adapter *adap, unsigned int uld_type, bool lro)
 +static int
 +setup_sge_queues_uld(struct adapter *adap, unsigned int uld_type, bool lro)
  {
        struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
 +      int i, ret = 0;
  
        if (adap->flags & USING_MSIX) {
 -              rxq_info->msix_tbl = kzalloc(rxq_info->nrxq + rxq_info->nciq,
 +              rxq_info->msix_tbl = kcalloc((rxq_info->nrxq + rxq_info->nciq),
 +                                           sizeof(unsigned short),
                                             GFP_KERNEL);
                if (!rxq_info->msix_tbl)
                        return -ENOMEM;
        }
  
 -      return !(!alloc_uld_rxqs(adap, rxq_info, rxq_info->nrxq, 0, lro) &&
 +      ret = !(!alloc_uld_rxqs(adap, rxq_info, rxq_info->nrxq, 0, lro) &&
                 !alloc_uld_rxqs(adap, rxq_info, rxq_info->nciq,
                                 rxq_info->nrxq, lro));
 +
 +      /* Tell uP to route control queue completions to rdma rspq */
 +      if (adap->flags & FULL_INIT_DONE &&
 +          !ret && uld_type == CXGB4_ULD_RDMA) {
 +              struct sge *s = &adap->sge;
 +              unsigned int cmplqid;
 +              u32 param, cmdop;
 +
 +              cmdop = FW_PARAMS_PARAM_DMAQ_EQ_CMPLIQID_CTRL;
 +              for_each_port(adap, i) {
 +                      cmplqid = rxq_info->uldrxq[i].rspq.cntxt_id;
 +                      param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) |
 +                               FW_PARAMS_PARAM_X_V(cmdop) |
 +                               FW_PARAMS_PARAM_YZ_V(s->ctrlq[i].q.cntxt_id));
 +                      ret = t4_set_params(adap, adap->mbox, adap->pf,
 +                                          0, 1, &param, &cmplqid);
 +              }
 +      }
 +      return ret;
  }
  
  static void t4_free_uld_rxqs(struct adapter *adap, int n,
                if (q->rspq.desc)
                        free_rspq_fl(adap, &q->rspq,
                                     q->fl.size ? &q->fl : NULL);
 -              adap->msi_idx--;
        }
  }
  
 -void free_sge_queues_uld(struct adapter *adap, unsigned int uld_type)
 +static void free_sge_queues_uld(struct adapter *adap, unsigned int uld_type)
  {
        struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
  
 +      if (adap->flags & FULL_INIT_DONE && uld_type == CXGB4_ULD_RDMA) {
 +              struct sge *s = &adap->sge;
 +              u32 param, cmdop, cmplqid = 0;
 +              int i;
 +
 +              cmdop = FW_PARAMS_PARAM_DMAQ_EQ_CMPLIQID_CTRL;
 +              for_each_port(adap, i) {
 +                      param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) |
 +                               FW_PARAMS_PARAM_X_V(cmdop) |
 +                               FW_PARAMS_PARAM_YZ_V(s->ctrlq[i].q.cntxt_id));
 +                      t4_set_params(adap, adap->mbox, adap->pf,
 +                                    0, 1, &param, &cmplqid);
 +              }
 +      }
 +
        if (rxq_info->nciq)
                t4_free_uld_rxqs(adap, rxq_info->nciq,
                                 rxq_info->uldrxq + rxq_info->nrxq);
                kfree(rxq_info->msix_tbl);
  }
  
 -int cfg_queues_uld(struct adapter *adap, unsigned int uld_type,
 -                 const struct cxgb4_pci_uld_info *uld_info)
 +static int cfg_queues_uld(struct adapter *adap, unsigned int uld_type,
 +                        const struct cxgb4_uld_info *uld_info)
  {
        struct sge *s = &adap->sge;
        struct sge_uld_rxq_info *rxq_info;
 -      int i, nrxq;
 +      int i, nrxq, ciq_size;
  
        rxq_info = kzalloc(sizeof(*rxq_info), GFP_KERNEL);
        if (!rxq_info)
                return -ENOMEM;
  
 -      if (uld_info->nrxq > s->nqs_per_uld)
 -              rxq_info->nrxq = s->nqs_per_uld;
 -      else
 -              rxq_info->nrxq = uld_info->nrxq;
 -      if (!uld_info->nciq)
 +      if (adap->flags & USING_MSIX && uld_info->nrxq > s->nqs_per_uld) {
 +              i = s->nqs_per_uld;
 +              rxq_info->nrxq = roundup(i, adap->params.nports);
 +      } else {
 +              i = min_t(int, uld_info->nrxq,
 +                        num_online_cpus());
 +              rxq_info->nrxq = roundup(i, adap->params.nports);
 +      }
 +      if (!uld_info->ciq) {
                rxq_info->nciq = 0;
 -      else if (uld_info->nciq && uld_info->nciq > s->nqs_per_uld)
 -              rxq_info->nciq = s->nqs_per_uld;
 -      else
 -              rxq_info->nciq = uld_info->nciq;
 +      } else  {
 +              if (adap->flags & USING_MSIX)
 +                      rxq_info->nciq = min_t(int, s->nqs_per_uld,
 +                                             num_online_cpus());
 +              else
 +                      rxq_info->nciq = min_t(int, MAX_OFLD_QSETS,
 +                                             num_online_cpus());
 +              rxq_info->nciq = ((rxq_info->nciq / adap->params.nports) *
 +                                adap->params.nports);
 +              rxq_info->nciq = max_t(int, rxq_info->nciq,
 +                                     adap->params.nports);
 +      }
  
        nrxq = rxq_info->nrxq + rxq_info->nciq; /* total rxq's */
        rxq_info->uldrxq = kcalloc(nrxq, sizeof(struct sge_ofld_rxq),
        }
  
        rxq_info->rspq_id = kcalloc(nrxq, sizeof(unsigned short), GFP_KERNEL);
 -      if (!rxq_info->uldrxq) {
 +      if (!rxq_info->rspq_id) {
                kfree(rxq_info->uldrxq);
                kfree(rxq_info);
                return -ENOMEM;
                r->fl.size = 72;
        }
  
 +      ciq_size = 64 + adap->vres.cq.size + adap->tids.nftids;
 +      if (ciq_size > SGE_MAX_IQ_SIZE) {
 +              dev_warn(adap->pdev_dev, "CIQ size too small for available IQs\n");
 +              ciq_size = SGE_MAX_IQ_SIZE;
 +      }
 +
        for (i = rxq_info->nrxq; i < nrxq; i++) {
                struct sge_ofld_rxq *r = &rxq_info->uldrxq[i];
  
 -              init_rspq(adap, &r->rspq, 5, 1, uld_info->ciq_size, 64);
 +              init_rspq(adap, &r->rspq, 5, 1, ciq_size, 64);
                r->rspq.uld = uld_type;
 -              r->fl.size = 72;
        }
  
        memcpy(rxq_info->name, uld_info->name, IFNAMSIZ);
        return 0;
  }
  
 -void free_queues_uld(struct adapter *adap, unsigned int uld_type)
 +static void free_queues_uld(struct adapter *adap, unsigned int uld_type)
  {
        struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
  
        kfree(rxq_info);
  }
  
 -int request_msix_queue_irqs_uld(struct adapter *adap, unsigned int uld_type)
 +static int
 +request_msix_queue_irqs_uld(struct adapter *adap, unsigned int uld_type)
  {
        struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
 -      int idx, bmap_idx, err = 0;
 +      int err = 0;
 +      unsigned int idx, bmap_idx;
  
        for_each_uldrxq(rxq_info, idx) {
                bmap_idx = rxq_info->msix_tbl[idx];
        }
        return 0;
  unwind:
 -      while (--idx >= 0) {
 +      while (idx-- > 0) {
                bmap_idx = rxq_info->msix_tbl[idx];
                free_msix_idx_in_bmap(adap, bmap_idx);
                free_irq(adap->msix_info_ulds[bmap_idx].vec,
        return err;
  }
  
 -void free_msix_queue_irqs_uld(struct adapter *adap, unsigned int uld_type)
 +static void
 +free_msix_queue_irqs_uld(struct adapter *adap, unsigned int uld_type)
  {
        struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
 -      int idx;
 +      unsigned int idx, bmap_idx;
  
        for_each_uldrxq(rxq_info, idx) {
 -              unsigned int bmap_idx = rxq_info->msix_tbl[idx];
 +              bmap_idx = rxq_info->msix_tbl[idx];
  
                free_msix_idx_in_bmap(adap, bmap_idx);
                free_irq(adap->msix_info_ulds[bmap_idx].vec,
        }
  }
  
 -void name_msix_vecs_uld(struct adapter *adap, unsigned int uld_type)
 +static void name_msix_vecs_uld(struct adapter *adap, unsigned int uld_type)
  {
        struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
        int n = sizeof(adap->msix_info_ulds[0].desc);
 -      int idx;
 +      unsigned int idx, bmap_idx;
  
        for_each_uldrxq(rxq_info, idx) {
 -              unsigned int bmap_idx = rxq_info->msix_tbl[idx];
 +              bmap_idx = rxq_info->msix_tbl[idx];
  
                snprintf(adap->msix_info_ulds[bmap_idx].desc, n, "%s-%s%d",
                         adap->port[0]->name, rxq_info->name, idx);
@@@ -433,7 -362,7 +433,7 @@@ static void quiesce_rx(struct adapter *
        }
  }
  
 -void enable_rx_uld(struct adapter *adap, unsigned int uld_type)
 +static void enable_rx_uld(struct adapter *adap, unsigned int uld_type)
  {
        struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
        int idx;
                enable_rx(adap, &rxq_info->uldrxq[idx].rspq);
  }
  
 -void quiesce_rx_uld(struct adapter *adap, unsigned int uld_type)
 +static void quiesce_rx_uld(struct adapter *adap, unsigned int uld_type)
  {
        struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
        int idx;
@@@ -462,15 -391,15 +462,15 @@@ static void uld_queue_init(struct adapt
        lli->nciq = rxq_info->nciq;
  }
  
 -int uld_mem_alloc(struct adapter *adap)
 +int t4_uld_mem_alloc(struct adapter *adap)
  {
        struct sge *s = &adap->sge;
  
 -      adap->uld = kcalloc(adap->num_uld, sizeof(*adap->uld), GFP_KERNEL);
 +      adap->uld = kcalloc(CXGB4_ULD_MAX, sizeof(*adap->uld), GFP_KERNEL);
        if (!adap->uld)
                return -ENOMEM;
  
 -      s->uld_rxq_info = kzalloc(adap->num_uld *
 +      s->uld_rxq_info = kzalloc(CXGB4_ULD_MAX *
                                  sizeof(struct sge_uld_rxq_info *),
                                  GFP_KERNEL);
        if (!s->uld_rxq_info)
@@@ -482,7 -411,7 +482,7 @@@ err_uld
        return -ENOMEM;
  }
  
 -void uld_mem_free(struct adapter *adap)
 +void t4_uld_mem_free(struct adapter *adap)
  {
        struct sge *s = &adap->sge;
  
        kfree(adap->uld);
  }
  
 +void t4_uld_clean_up(struct adapter *adap)
 +{
 +      struct sge_uld_rxq_info *rxq_info;
 +      unsigned int i;
 +
 +      if (!adap->uld)
 +              return;
 +      for (i = 0; i < CXGB4_ULD_MAX; i++) {
 +              if (!adap->uld[i].handle)
 +                      continue;
 +              rxq_info = adap->sge.uld_rxq_info[i];
 +              if (adap->flags & FULL_INIT_DONE)
 +                      quiesce_rx_uld(adap, i);
 +              if (adap->flags & USING_MSIX)
 +                      free_msix_queue_irqs_uld(adap, i);
 +              free_sge_queues_uld(adap, i);
 +              free_queues_uld(adap, i);
 +      }
 +}
 +
  static void uld_init(struct adapter *adap, struct cxgb4_lld_info *lld)
  {
        int i;
        lld->ports = adap->port;
        lld->vr = &adap->vres;
        lld->mtus = adap->params.mtus;
 -      lld->ntxq = adap->sge.iscsiqsets;
 +      lld->ntxq = adap->sge.ofldqsets;
        lld->nchan = adap->params.nports;
        lld->nports = adap->params.nports;
        lld->wr_cred = adap->params.ofldq_wr_cred;
 +      lld->iscsi_iolen = MAXRXDATA_G(t4_read_reg(adap, TP_PARA_REG2_A));
 +      lld->iscsi_tagmask = t4_read_reg(adap, ULP_RX_ISCSI_TAGMASK_A);
 +      lld->iscsi_pgsz_order = t4_read_reg(adap, ULP_RX_ISCSI_PSZ_A);
 +      lld->iscsi_llimit = t4_read_reg(adap, ULP_RX_ISCSI_LLIMIT_A);
 +      lld->iscsi_ppm = &adap->iscsi_ppm;
        lld->adapter_type = adap->params.chip;
        lld->cclk_ps = 1000000000 / adap->params.vpd.cclk;
        lld->udb_density = 1 << adap->params.sge.eq_qpp;
        lld->max_ird_adapter = adap->params.max_ird_adapter;
        lld->ulptx_memwrite_dsgl = adap->params.ulptx_memwrite_dsgl;
        lld->nodeid = dev_to_node(adap->pdev_dev);
++      lld->fr_nsmr_tpte_wr_support = adap->params.fr_nsmr_tpte_wr_support;
  }
  
  static void uld_attach(struct adapter *adap, unsigned int uld)
        }
  
        adap->uld[uld].handle = handle;
 +      t4_register_netevent_notifier();
  
        if (adap->flags & FULL_INIT_DONE)
                adap->uld[uld].state_change(handle, CXGB4_STATE_UP);
  }
  
 -int cxgb4_register_pci_uld(enum cxgb4_pci_uld type,
 -                         struct cxgb4_pci_uld_info *p)
 +/**
 + *    cxgb4_register_uld - register an upper-layer driver
 + *    @type: the ULD type
 + *    @p: the ULD methods
 + *
 + *    Registers an upper-layer driver with this driver and notifies the ULD
 + *    about any presently available devices that support its type.  Returns
 + *    %-EBUSY if a ULD of the same type is already registered.
 + */
 +int cxgb4_register_uld(enum cxgb4_uld type,
 +                     const struct cxgb4_uld_info *p)
  {
        int ret = 0;
 +      unsigned int adap_idx = 0;
        struct adapter *adap;
  
 -      if (type >= CXGB4_PCI_ULD_MAX)
 +      if (type >= CXGB4_ULD_MAX)
                return -EINVAL;
  
        mutex_lock(&uld_mutex);
        list_for_each_entry(adap, &adapter_list, list_node) {
 -              if (!is_pci_uld(adap))
 +              if ((type == CXGB4_ULD_CRYPTO && !is_pci_uld(adap)) ||
 +                  (type != CXGB4_ULD_CRYPTO && !is_offload(adap)))
 +                      continue;
 +              if (type == CXGB4_ULD_ISCSIT && is_t4(adap->params.chip))
                        continue;
                ret = cfg_queues_uld(adap, type, p);
                if (ret)
                }
                adap->uld[type] = *p;
                uld_attach(adap, type);
 +              adap_idx++;
        }
        mutex_unlock(&uld_mutex);
        return 0;
  
  free_irq:
 +      if (adap->flags & FULL_INIT_DONE)
 +              quiesce_rx_uld(adap, type);
        if (adap->flags & USING_MSIX)
                free_msix_queue_irqs_uld(adap, type);
  free_rxq:
  free_queues:
        free_queues_uld(adap, type);
  out:
 +
 +      list_for_each_entry(adap, &adapter_list, list_node) {
 +              if ((type == CXGB4_ULD_CRYPTO && !is_pci_uld(adap)) ||
 +                  (type != CXGB4_ULD_CRYPTO && !is_offload(adap)))
 +                      continue;
 +              if (type == CXGB4_ULD_ISCSIT && is_t4(adap->params.chip))
 +                      continue;
 +              if (!adap_idx)
 +                      break;
 +              adap->uld[type].handle = NULL;
 +              adap->uld[type].add = NULL;
 +              if (adap->flags & FULL_INIT_DONE)
 +                      quiesce_rx_uld(adap, type);
 +              if (adap->flags & USING_MSIX)
 +                      free_msix_queue_irqs_uld(adap, type);
 +              free_sge_queues_uld(adap, type);
 +              free_queues_uld(adap, type);
 +              adap_idx--;
 +      }
        mutex_unlock(&uld_mutex);
        return ret;
  }
 -EXPORT_SYMBOL(cxgb4_register_pci_uld);
 +EXPORT_SYMBOL(cxgb4_register_uld);
  
 -int cxgb4_unregister_pci_uld(enum cxgb4_pci_uld type)
 +/**
 + *    cxgb4_unregister_uld - unregister an upper-layer driver
 + *    @type: the ULD type
 + *
 + *    Unregisters an existing upper-layer driver.
 + */
 +int cxgb4_unregister_uld(enum cxgb4_uld type)
  {
        struct adapter *adap;
  
 -      if (type >= CXGB4_PCI_ULD_MAX)
 +      if (type >= CXGB4_ULD_MAX)
                return -EINVAL;
  
        mutex_lock(&uld_mutex);
        list_for_each_entry(adap, &adapter_list, list_node) {
 -              if (!is_pci_uld(adap))
 +              if ((type == CXGB4_ULD_CRYPTO && !is_pci_uld(adap)) ||
 +                  (type != CXGB4_ULD_CRYPTO && !is_offload(adap)))
 +                      continue;
 +              if (type == CXGB4_ULD_ISCSIT && is_t4(adap->params.chip))
                        continue;
                adap->uld[type].handle = NULL;
                adap->uld[type].add = NULL;
  
        return 0;
  }
 -EXPORT_SYMBOL(cxgb4_unregister_pci_uld);
 +EXPORT_SYMBOL(cxgb4_unregister_uld);
index 47bd14f602db4bb93c13ab883a04c66885c4ae11,93b1550ac7e6bb48433080dd0822a17009cf8c1b..2996793b1aaa3e463d8e11083d858c20acb91d15
@@@ -1,7 -1,7 +1,7 @@@
  /*
   * This file is part of the Chelsio T4 Ethernet driver for Linux.
   *
 - * Copyright (c) 2003-2014 Chelsio Communications, Inc. All rights reserved.
 + * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved.
   *
   * This software is available to you under a choice of one of two
   * licenses.  You may choose to be licensed under the terms of the GNU
@@@ -42,8 -42,6 +42,8 @@@
  #include <linux/atomic.h>
  #include "cxgb4.h"
  
 +#define MAX_ULD_QSETS 16
 +
  /* CPL message priority levels */
  enum {
        CPL_PRIORITY_DATA     = 0,  /* data messages */
@@@ -106,7 -104,6 +106,7 @@@ struct tid_info 
        unsigned int atid_base;
  
        struct filter_entry *ftid_tab;
 +      unsigned long *ftid_bmap;
        unsigned int nftids;
        unsigned int ftid_base;
        unsigned int aftid_base;
        atomic_t tids_in_use;
        /* TIDs in the HASH */
        atomic_t hash_tids_in_use;
 +      /* lock for setting/clearing filter bitmap */
 +      spinlock_t ftid_lock;
  };
  
  static inline void *lookup_tid(const struct tid_info *t, unsigned int tid)
@@@ -188,38 -183,15 +188,38 @@@ int cxgb4_create_server_filter(const st
  int cxgb4_remove_server_filter(const struct net_device *dev, unsigned int stid,
                               unsigned int queue, bool ipv6);
  
 +/* Filter operation context to allow callers of cxgb4_set_filter() and
 + * cxgb4_del_filter() to wait for an asynchronous completion.
 + */
 +struct filter_ctx {
 +      struct completion completion;   /* completion rendezvous */
 +      void *closure;                  /* caller's opaque information */
 +      int result;                     /* result of operation */
 +      u32 tid;                        /* to store tid */
 +};
 +
 +struct ch_filter_specification;
 +
 +int __cxgb4_set_filter(struct net_device *dev, int filter_id,
 +                     struct ch_filter_specification *fs,
 +                     struct filter_ctx *ctx);
 +int __cxgb4_del_filter(struct net_device *dev, int filter_id,
 +                     struct filter_ctx *ctx);
 +int cxgb4_set_filter(struct net_device *dev, int filter_id,
 +                   struct ch_filter_specification *fs);
 +int cxgb4_del_filter(struct net_device *dev, int filter_id);
 +
  static inline void set_wr_txq(struct sk_buff *skb, int prio, int queue)
  {
        skb_set_queue_mapping(skb, (queue << 1) | prio);
  }
  
  enum cxgb4_uld {
 +      CXGB4_ULD_INIT,
        CXGB4_ULD_RDMA,
        CXGB4_ULD_ISCSI,
        CXGB4_ULD_ISCSIT,
 +      CXGB4_ULD_CRYPTO,
        CXGB4_ULD_MAX
  };
  
@@@ -308,15 -280,36 +308,16 @@@ struct cxgb4_lld_info 
        unsigned int iscsi_llimit;           /* chip's iscsi region llimit */
        void **iscsi_ppm;                    /* iscsi page pod manager */
        int nodeid;                          /* device numa node id */
+       bool fr_nsmr_tpte_wr_support;        /* FW supports FR_NSMR_TPTE_WR */
  };
  
  struct cxgb4_uld_info {
        const char *name;
 -      void *(*add)(const struct cxgb4_lld_info *p);
 -      int (*rx_handler)(void *handle, const __be64 *rsp,
 -                        const struct pkt_gl *gl);
 -      int (*state_change)(void *handle, enum cxgb4_state new_state);
 -      int (*control)(void *handle, enum cxgb4_control control, ...);
 -      int (*lro_rx_handler)(void *handle, const __be64 *rsp,
 -                            const struct pkt_gl *gl,
 -                            struct t4_lro_mgr *lro_mgr,
 -                            struct napi_struct *napi);
 -      void (*lro_flush)(struct t4_lro_mgr *);
 -};
 -
 -enum cxgb4_pci_uld {
 -      CXGB4_PCI_ULD1,
 -      CXGB4_PCI_ULD_MAX
 -};
 -
 -struct cxgb4_pci_uld_info {
 -      const char *name;
 -      bool lro;
        void *handle;
        unsigned int nrxq;
 -      unsigned int nciq;
        unsigned int rxq_size;
 -      unsigned int ciq_size;
 +      bool ciq;
 +      bool lro;
        void *(*add)(const struct cxgb4_lld_info *p);
        int (*rx_handler)(void *handle, const __be64 *rsp,
                          const struct pkt_gl *gl);
  
  int cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p);
  int cxgb4_unregister_uld(enum cxgb4_uld type);
 -int cxgb4_register_pci_uld(enum cxgb4_pci_uld type,
 -                         struct cxgb4_pci_uld_info *p);
 -int cxgb4_unregister_pci_uld(enum cxgb4_pci_uld type);
  int cxgb4_ofld_send(struct net_device *dev, struct sk_buff *skb);
  unsigned int cxgb4_dbfifo_count(const struct net_device *dev, int lpfifo);
  unsigned int cxgb4_port_chan(const struct net_device *dev);
index 4b58b32105f7c5675751f220896e2afc6a315853,985a521ac5114cc0b18f6b67502f48ab817b1564..8d9e4b7a8e8439c3214cc7a6d760b471eeb6be57
@@@ -1,7 -1,7 +1,7 @@@
  /*
   * This file is part of the Chelsio T4 Ethernet driver for Linux.
   *
 - * Copyright (c) 2009-2014 Chelsio Communications, Inc. All rights reserved.
 + * Copyright (c) 2009-2016 Chelsio Communications, Inc. All rights reserved.
   *
   * This software is available to you under a choice of one of two
   * licenses.  You may choose to be licensed under the terms of the GNU
@@@ -100,6 -100,7 +100,7 @@@ enum fw_wr_opcodes 
        FW_RI_RECV_WR                  = 0x17,
        FW_RI_BIND_MW_WR               = 0x18,
        FW_RI_FR_NSMR_WR               = 0x19,
+       FW_RI_FR_NSMR_TPTE_WR          = 0x20,
        FW_RI_INV_LSTAG_WR             = 0x1a,
        FW_ISCSI_TX_DATA_WR            = 0x45,
        FW_CRYPTO_LOOKASIDE_WR         = 0X6d,
@@@ -681,7 -682,6 +682,7 @@@ enum fw_cmd_opcodes 
        FW_RSS_IND_TBL_CMD             = 0x20,
        FW_RSS_GLB_CONFIG_CMD          = 0x22,
        FW_RSS_VI_CONFIG_CMD           = 0x23,
 +      FW_SCHED_CMD                   = 0x24,
        FW_DEVLOG_CMD                  = 0x25,
        FW_CLIP_CMD                    = 0x28,
        FW_LASTC2E_CMD                 = 0x40,
@@@ -1121,6 -1121,7 +1122,7 @@@ enum fw_params_param_dev 
        FW_PARAMS_PARAM_DEV_MAXIRD_ADAPTER = 0x14, /* max supported adap IRD */
        FW_PARAMS_PARAM_DEV_ULPTX_MEMWRITE_DSGL = 0x17,
        FW_PARAMS_PARAM_DEV_FWCACHE = 0x18,
+       FW_PARAMS_PARAM_DEV_RI_FR_NSMR_TPTE_WR  = 0x1C,
  };
  
  /*
@@@ -2267,12 -2268,6 +2269,12 @@@ enum fw_port_cap 
        FW_PORT_CAP_802_3_ASM_DIR       = 0x8000,
  };
  
 +#define FW_PORT_CAP_SPEED_S     0
 +#define FW_PORT_CAP_SPEED_M     0x3f
 +#define FW_PORT_CAP_SPEED_V(x)  ((x) << FW_PORT_CAP_SPEED_S)
 +#define FW_PORT_CAP_SPEED_G(x) \
 +      (((x) >> FW_PORT_CAP_SPEED_S) & FW_PORT_CAP_SPEED_M)
 +
  enum fw_port_mdi {
        FW_PORT_CAP_MDI_UNCHANGED,
        FW_PORT_CAP_MDI_AUTO,
@@@ -2969,41 -2964,6 +2971,41 @@@ struct fw_rss_vi_config_cmd 
  #define FW_RSS_VI_CONFIG_CMD_UDPEN_V(x)       ((x) << FW_RSS_VI_CONFIG_CMD_UDPEN_S)
  #define FW_RSS_VI_CONFIG_CMD_UDPEN_F  FW_RSS_VI_CONFIG_CMD_UDPEN_V(1U)
  
 +enum fw_sched_sc {
 +      FW_SCHED_SC_PARAMS              = 1,
 +};
 +
 +struct fw_sched_cmd {
 +      __be32 op_to_write;
 +      __be32 retval_len16;
 +      union fw_sched {
 +              struct fw_sched_config {
 +                      __u8   sc;
 +                      __u8   type;
 +                      __u8   minmaxen;
 +                      __u8   r3[5];
 +                      __u8   nclasses[4];
 +                      __be32 r4;
 +              } config;
 +              struct fw_sched_params {
 +                      __u8   sc;
 +                      __u8   type;
 +                      __u8   level;
 +                      __u8   mode;
 +                      __u8   unit;
 +                      __u8   rate;
 +                      __u8   ch;
 +                      __u8   cl;
 +                      __be32 min;
 +                      __be32 max;
 +                      __be16 weight;
 +                      __be16 pktsize;
 +                      __be16 burstsize;
 +                      __be16 r4;
 +              } params;
 +      } u;
 +};
 +
  struct fw_clip_cmd {
        __be32 op_to_write;
        __be32 alloc_to_len16;
index eb448dff75643e6856d0fad2a71be64e4eba628a,9283bc60bb2405ec910ab7afd90c38f16d7bec30..8e5b3f51b47b80201d7c447a9f061116b69e141c
@@@ -116,8 -116,10 +116,8 @@@ int hns_dsaf_get_cfg(struct dsaf_devic
  
                        dsaf_dev->sc_base = devm_ioremap_resource(&pdev->dev,
                                                                  res);
 -                      if (IS_ERR(dsaf_dev->sc_base)) {
 -                              dev_err(dsaf_dev->dev, "subctrl can not map!\n");
 +                      if (IS_ERR(dsaf_dev->sc_base))
                                return PTR_ERR(dsaf_dev->sc_base);
 -                      }
  
                        res = platform_get_resource(pdev, IORESOURCE_MEM,
                                                    res_idx++);
  
                        dsaf_dev->sds_base = devm_ioremap_resource(&pdev->dev,
                                                                   res);
 -                      if (IS_ERR(dsaf_dev->sds_base)) {
 -                              dev_err(dsaf_dev->dev, "serdes-ctrl can not map!\n");
 +                      if (IS_ERR(dsaf_dev->sds_base))
                                return PTR_ERR(dsaf_dev->sds_base);
 -                      }
                } else {
                        dsaf_dev->sub_ctrl = syscon;
                }
                }
        }
        dsaf_dev->ppe_base = devm_ioremap_resource(&pdev->dev, res);
 -      if (IS_ERR(dsaf_dev->ppe_base)) {
 -              dev_err(dsaf_dev->dev, "ppe-base resource can not map!\n");
 +      if (IS_ERR(dsaf_dev->ppe_base))
                return PTR_ERR(dsaf_dev->ppe_base);
 -      }
        dsaf_dev->ppe_paddr = res->start;
  
        if (!HNS_DSAF_IS_DEBUG(dsaf_dev)) {
                        }
                }
                dsaf_dev->io_base = devm_ioremap_resource(&pdev->dev, res);
 -              if (IS_ERR(dsaf_dev->io_base)) {
 -                      dev_err(dsaf_dev->dev, "dsaf-base resource can not map!\n");
 +              if (IS_ERR(dsaf_dev->io_base))
                        return PTR_ERR(dsaf_dev->io_base);
 -              }
        }
  
        ret = device_property_read_u32(dsaf_dev->dev, "desc-num", &desc_num);
@@@ -2780,7 -2788,7 +2780,7 @@@ module_platform_driver(g_dsaf_driver)
   * @enable: false - request reset , true - drop reset
   * retuen 0 - success , negative -fail
   */
- int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool enable)
+ int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool dereset)
  {
        struct dsaf_device *dsaf_dev;
        struct platform_device *pdev;
                {DSAF_ROCE_SL_1, DSAF_ROCE_SL_1, DSAF_ROCE_SL_3},
        };
  
-       if (!is_of_node(dsaf_fwnode)) {
-               pr_err("hisi_dsaf: Only support DT node!\n");
+       /* find the platform device corresponding to fwnode */
+       if (is_of_node(dsaf_fwnode)) {
+               pdev = of_find_device_by_node(to_of_node(dsaf_fwnode));
+       } else if (is_acpi_device_node(dsaf_fwnode)) {
+               pdev = hns_dsaf_find_platform_device(dsaf_fwnode);
+       } else {
+               pr_err("fwnode is neither OF or ACPI type\n");
                return -EINVAL;
        }
-       pdev = of_find_device_by_node(to_of_node(dsaf_fwnode));
+       /* check if we were a success in fetching pdev */
+       if (!pdev) {
+               pr_err("couldn't find platform device for node\n");
+               return -ENODEV;
+       }
+       /* retrieve the dsaf_device from the driver data */
        dsaf_dev = dev_get_drvdata(&pdev->dev);
+       if (!dsaf_dev) {
+               dev_err(&pdev->dev, "dsaf_dev is NULL\n");
+               return -ENODEV;
+       }
+       /* now, make sure we are running on compatible SoC */
        if (AE_IS_VER1(dsaf_dev->dsaf_ver)) {
                dev_err(dsaf_dev->dev, "%s v1 chip doesn't support RoCE!\n",
                        dsaf_dev->ae_dev.name);
                return -ENODEV;
        }
  
-       if (!enable) {
-               /* Reset rocee-channels in dsaf and rocee */
-               hns_dsaf_srst_chns(dsaf_dev, DSAF_CHNS_MASK, false);
-               hns_dsaf_roce_srst(dsaf_dev, false);
+       /* do reset or de-reset according to the flag */
+       if (!dereset) {
+               /* reset rocee-channels in dsaf and rocee */
+               dsaf_dev->misc_op->hns_dsaf_srst_chns(dsaf_dev, DSAF_CHNS_MASK,
+                                                     false);
+               dsaf_dev->misc_op->hns_dsaf_roce_srst(dsaf_dev, false);
        } else {
-               /* Configure dsaf tx roce correspond to port map and sl map */
+               /* configure dsaf tx roce correspond to port map and sl map */
                mp = dsaf_read_dev(dsaf_dev, DSAF_ROCE_PORT_MAP_REG);
                for (i = 0; i < DSAF_ROCE_CREDIT_CHN; i++)
                        dsaf_set_field(mp, 7 << i * 3, i * 3,
                                       sl_map[i][DSAF_ROCE_6PORT_MODE]);
                dsaf_write_dev(dsaf_dev, DSAF_ROCE_SL_MAP_REG, sl);
  
-               /* De-reset rocee-channels in dsaf and rocee */
-               hns_dsaf_srst_chns(dsaf_dev, DSAF_CHNS_MASK, true);
+               /* de-reset rocee-channels in dsaf and rocee */
+               dsaf_dev->misc_op->hns_dsaf_srst_chns(dsaf_dev, DSAF_CHNS_MASK,
+                                                     true);
                msleep(SRST_TIME_INTERVAL);
-               hns_dsaf_roce_srst(dsaf_dev, true);
+               dsaf_dev->misc_op->hns_dsaf_roce_srst(dsaf_dev, true);
  
-               /* Eanble dsaf channel rocee credit */
+               /* enable dsaf channel rocee credit */
                credit = dsaf_read_dev(dsaf_dev, DSAF_SBM_ROCEE_CFG_REG_REG);
                dsaf_set_bit(credit, DSAF_SBM_ROCEE_CFG_CRD_EN_B, 0);
                dsaf_write_dev(dsaf_dev, DSAF_SBM_ROCEE_CFG_REG_REG, credit);
index f9cbc67f1694283c21a393b7f43b615e79a152e5,d87bbe65c2b0ed29cd669028bea4e2c036c65153..c41ab31a39f8c93d3caba280510b8a73413c7485
@@@ -159,7 -159,7 +159,8 @@@ static void dump_dev_cap_flags2(struct 
                [32] = "Loopback source checks support",
                [33] = "RoCEv2 support",
                [34] = "DMFS Sniffer support (UC & MC)",
 +              [35] = "QinQ VST mode support",
+               [36] = "sl to vl mapping table change event support"
        };
        int i;
  
@@@ -249,72 -249,6 +250,72 @@@ out
        return err;
  }
  
 +static int mlx4_activate_vst_qinq(struct mlx4_priv *priv, int slave, int port)
 +{
 +      struct mlx4_vport_oper_state *vp_oper;
 +      struct mlx4_vport_state *vp_admin;
 +      int err;
 +
 +      vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port];
 +      vp_admin = &priv->mfunc.master.vf_admin[slave].vport[port];
 +
 +      if (vp_admin->default_vlan != vp_oper->state.default_vlan) {
 +              err = __mlx4_register_vlan(&priv->dev, port,
 +                                         vp_admin->default_vlan,
 +                                         &vp_oper->vlan_idx);
 +              if (err) {
 +                      vp_oper->vlan_idx = NO_INDX;
 +                      mlx4_warn(&priv->dev,
 +                                "No vlan resources slave %d, port %d\n",
 +                                slave, port);
 +                      return err;
 +              }
 +              mlx4_dbg(&priv->dev, "alloc vlan %d idx  %d slave %d port %d\n",
 +                       (int)(vp_oper->state.default_vlan),
 +                       vp_oper->vlan_idx, slave, port);
 +      }
 +      vp_oper->state.vlan_proto   = vp_admin->vlan_proto;
 +      vp_oper->state.default_vlan = vp_admin->default_vlan;
 +      vp_oper->state.default_qos  = vp_admin->default_qos;
 +
 +      return 0;
 +}
 +
 +static int mlx4_handle_vst_qinq(struct mlx4_priv *priv, int slave, int port)
 +{
 +      struct mlx4_vport_oper_state *vp_oper;
 +      struct mlx4_slave_state *slave_state;
 +      struct mlx4_vport_state *vp_admin;
 +      int err;
 +
 +      vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port];
 +      vp_admin = &priv->mfunc.master.vf_admin[slave].vport[port];
 +      slave_state = &priv->mfunc.master.slave_state[slave];
 +
 +      if ((vp_admin->vlan_proto != htons(ETH_P_8021AD)) ||
 +          (!slave_state->active))
 +              return 0;
 +
 +      if (vp_oper->state.vlan_proto == vp_admin->vlan_proto &&
 +          vp_oper->state.default_vlan == vp_admin->default_vlan &&
 +          vp_oper->state.default_qos == vp_admin->default_qos)
 +              return 0;
 +
 +      if (!slave_state->vst_qinq_supported) {
 +              /* Warn and revert the request to set vst QinQ mode */
 +              vp_admin->vlan_proto   = vp_oper->state.vlan_proto;
 +              vp_admin->default_vlan = vp_oper->state.default_vlan;
 +              vp_admin->default_qos  = vp_oper->state.default_qos;
 +
 +              mlx4_warn(&priv->dev,
 +                        "Slave %d does not support VST QinQ mode\n", slave);
 +              return 0;
 +      }
 +
 +      err = mlx4_activate_vst_qinq(priv, slave, port);
 +      return err;
 +}
 +
  int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
                                struct mlx4_vhcr *vhcr,
                                struct mlx4_cmd_mailbox *inbox,
  #define QUERY_FUNC_CAP_VF_ENABLE_QP0          0x08
  
  #define QUERY_FUNC_CAP_FLAGS0_FORCE_PHY_WQE_GID 0x80
 -#define QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS (1 << 31)
  #define QUERY_FUNC_CAP_PHV_BIT                        0x40
 +#define QUERY_FUNC_CAP_VLAN_OFFLOAD_DISABLE   0x20
 +
 +#define QUERY_FUNC_CAP_SUPPORTS_VST_QINQ      BIT(30)
 +#define QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS BIT(31)
  
        if (vhcr->op_modifier == 1) {
                struct mlx4_active_ports actv_ports =
                        mlx4_get_active_ports(dev, slave);
                int converted_port = mlx4_slave_convert_port(
                                dev, slave, vhcr->in_modifier);
 +              struct mlx4_vport_oper_state *vp_oper;
  
                if (converted_port < 0)
                        return -EINVAL;
                MLX4_PUT(outbox->buf, dev->caps.phys_port_id[vhcr->in_modifier],
                         QUERY_FUNC_CAP_PHYS_PORT_ID);
  
 -              if (dev->caps.phv_bit[port]) {
 -                      field = QUERY_FUNC_CAP_PHV_BIT;
 -                      MLX4_PUT(outbox->buf, field,
 -                               QUERY_FUNC_CAP_FLAGS0_OFFSET);
 -              }
 +              vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port];
 +              err = mlx4_handle_vst_qinq(priv, slave, port);
 +              if (err)
 +                      return err;
 +
 +              field = 0;
 +              if (dev->caps.phv_bit[port])
 +                      field |= QUERY_FUNC_CAP_PHV_BIT;
 +              if (vp_oper->state.vlan_proto == htons(ETH_P_8021AD))
 +                      field |= QUERY_FUNC_CAP_VLAN_OFFLOAD_DISABLE;
 +              MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS0_OFFSET);
  
        } else if (vhcr->op_modifier == 0) {
                struct mlx4_active_ports actv_ports =
                        mlx4_get_active_ports(dev, slave);
 +              struct mlx4_slave_state *slave_state =
 +                      &priv->mfunc.master.slave_state[slave];
 +
                /* enable rdma and ethernet interfaces, new quota locations,
                 * and reserved lkey
                 */
  
                size = dev->caps.reserved_lkey + ((slave << 8) & 0xFF00);
                MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP_RESD_LKEY_OFFSET);
 +
 +              if (vhcr->in_modifier & QUERY_FUNC_CAP_SUPPORTS_VST_QINQ)
 +                      slave_state->vst_qinq_supported = true;
 +
        } else
                err = -EINVAL;
  
@@@ -538,12 -455,10 +539,12 @@@ int mlx4_QUERY_FUNC_CAP(struct mlx4_de
        u32                     size, qkey;
        int                     err = 0, quotas = 0;
        u32                     in_modifier;
 +      u32                     slave_caps;
  
        op_modifier = !!gen_or_port; /* 0 = general, 1 = logical port */
 -      in_modifier = op_modifier ? gen_or_port :
 +      slave_caps = QUERY_FUNC_CAP_SUPPORTS_VST_QINQ |
                QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS;
 +      in_modifier = op_modifier ? gen_or_port : slave_caps;
  
        mailbox = mlx4_alloc_cmd_mailbox(dev);
        if (IS_ERR(mailbox))
                MLX4_GET(func_cap->phys_port_id, outbox,
                         QUERY_FUNC_CAP_PHYS_PORT_ID);
  
 -      MLX4_GET(field, outbox, QUERY_FUNC_CAP_FLAGS0_OFFSET);
 -      func_cap->flags |= (field & QUERY_FUNC_CAP_PHV_BIT);
 +      MLX4_GET(func_cap->flags0, outbox, QUERY_FUNC_CAP_FLAGS0_OFFSET);
  
        /* All other resources are allocated by the master, but we still report
         * 'num' and 'reserved' capabilities as follows:
@@@ -775,7 -691,6 +776,7 @@@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev 
  #define QUERY_DEV_CAP_MAX_DESC_SZ_SQ_OFFSET   0x52
  #define QUERY_DEV_CAP_MAX_SG_RQ_OFFSET                0x55
  #define QUERY_DEV_CAP_MAX_DESC_SZ_RQ_OFFSET   0x56
 +#define QUERY_DEV_CAP_SVLAN_BY_QP_OFFSET      0x5D
  #define QUERY_DEV_CAP_MAX_QP_MCG_OFFSET               0x61
  #define QUERY_DEV_CAP_RSVD_MCG_OFFSET         0x62
  #define QUERY_DEV_CAP_MAX_MCG_OFFSET          0x63
  #define QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET      0x74
  #define QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET   0x76
  #define QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET     0x77
+ #define QUERY_DEV_CAP_SL2VL_EVENT_OFFSET      0x78
  #define QUERY_DEV_CAP_CQ_EQ_CACHE_LINE_STRIDE 0x7a
  #define QUERY_DEV_CAP_ECN_QCN_VER_OFFSET      0x7b
  #define QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET  0x80
        dev_cap->max_eqs = 1 << (field & 0xf);
        MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MTT_OFFSET);
        dev_cap->reserved_mtts = 1 << (field >> 4);
 -      MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MRW_SZ_OFFSET);
 -      dev_cap->max_mrw_sz = 1 << field;
        MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MRW_OFFSET);
        dev_cap->reserved_mrws = 1 << (field & 0xf);
 -      MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MTT_SEG_OFFSET);
 -      dev_cap->max_mtt_seg = 1 << (field & 0x3f);
        MLX4_GET(size, outbox, QUERY_DEV_CAP_NUM_SYS_EQ_OFFSET);
        dev_cap->num_sys_eqs = size & 0xfff;
        MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_REQ_QP_OFFSET);
                dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_DMFS_IPOIB;
        MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET);
        dev_cap->fs_max_num_qp_per_entry = field;
+       MLX4_GET(field, outbox, QUERY_DEV_CAP_SL2VL_EVENT_OFFSET);
+       if (field & (1 << 5))
+               dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT;
        MLX4_GET(field, outbox, QUERY_DEV_CAP_ECN_QCN_VER_OFFSET);
        if (field & 0x1)
                dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_QCN;
        MLX4_GET(size, outbox, QUERY_DEV_CAP_MAX_DESC_SZ_SQ_OFFSET);
        dev_cap->max_sq_desc_sz = size;
  
 +      MLX4_GET(field, outbox, QUERY_DEV_CAP_SVLAN_BY_QP_OFFSET);
 +      if (field & 0x1)
 +              dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_SVLAN_BY_QP;
        MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_QP_MCG_OFFSET);
        dev_cap->max_qp_per_mcg = 1 << field;
        MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MCG_OFFSET);
@@@ -2783,7 -2703,6 +2788,6 @@@ static int mlx4_check_smp_firewall_acti
  int mlx4_config_mad_demux(struct mlx4_dev *dev)
  {
        struct mlx4_cmd_mailbox *mailbox;
-       int secure_host_active;
        int err;
  
        /* Check if mad_demux is supported */
                goto out;
        }
  
-       secure_host_active = mlx4_check_smp_firewall_active(dev, mailbox);
+       if (mlx4_check_smp_firewall_active(dev, mailbox))
+               dev->flags |= MLX4_FLAG_SECURE_HOST;
  
        /* Config mad_demux to handle all MADs returned by the query above */
        err = mlx4_cmd(dev, mailbox->dma, 0x01 /* subn mgmt class */,
                goto out;
        }
  
-       if (secure_host_active)
+       if (dev->flags & MLX4_FLAG_SECURE_HOST)
                mlx4_warn(dev, "HCA operating in secure-host mode. SMP firewall activated.\n");
  out:
        mlx4_free_cmd_mailbox(dev, mailbox);
@@@ -2999,7 -2919,7 +3004,7 @@@ int get_phv_bit(struct mlx4_dev *dev, u
        memset(&func_cap, 0, sizeof(func_cap));
        err = mlx4_QUERY_FUNC_CAP(dev, port, &func_cap);
        if (!err)
 -              *phv = func_cap.flags & QUERY_FUNC_CAP_PHV_BIT;
 +              *phv = func_cap.flags0 & QUERY_FUNC_CAP_PHV_BIT;
        return err;
  }
  EXPORT_SYMBOL(get_phv_bit);
@@@ -3023,22 -2943,6 +3028,22 @@@ int set_phv_bit(struct mlx4_dev *dev, u
  }
  EXPORT_SYMBOL(set_phv_bit);
  
 +int mlx4_get_is_vlan_offload_disabled(struct mlx4_dev *dev, u8 port,
 +                                    bool *vlan_offload_disabled)
 +{
 +      struct mlx4_func_cap func_cap;
 +      int err;
 +
 +      memset(&func_cap, 0, sizeof(func_cap));
 +      err = mlx4_QUERY_FUNC_CAP(dev, port, &func_cap);
 +      if (!err)
 +              *vlan_offload_disabled =
 +                      !!(func_cap.flags0 &
 +                         QUERY_FUNC_CAP_VLAN_OFFLOAD_DISABLE);
 +      return err;
 +}
 +EXPORT_SYMBOL(mlx4_get_is_vlan_offload_disabled);
 +
  void mlx4_replace_zero_macs(struct mlx4_dev *dev)
  {
        int i;
diff --combined drivers/nvme/host/rdma.c
index fbdb2267e4603499021b39382d932c571874c505,1a1854796797097f911bc1f236fc8ed6534d71b6..28632292e85e49b5f2b83207c0f5341961449d18
  
  #define NVME_RDMA_MAX_INLINE_SEGMENTS 1
  
 -#define NVME_RDMA_MAX_PAGES_PER_MR    512
 -
 -#define NVME_RDMA_DEF_RECONNECT_DELAY 20
 -
  /*
   * We handle AEN commands ourselves and don't even let the
   * block layer know about them.
@@@ -54,7 -58,6 +54,6 @@@
  struct nvme_rdma_device {
        struct ib_device       *dev;
        struct ib_pd           *pd;
-       struct ib_mr           *mr;
        struct kref             ref;
        struct list_head        entry;
  };
@@@ -73,6 -76,7 +72,6 @@@ struct nvme_rdma_request 
        u32                     num_sge;
        int                     nents;
        bool                    inline_data;
 -      bool                    need_inval;
        struct ib_reg_wr        reg_wr;
        struct ib_cqe           reg_cqe;
        struct nvme_rdma_queue  *queue;
@@@ -82,8 -86,6 +81,8 @@@
  
  enum nvme_rdma_queue_flags {
        NVME_RDMA_Q_CONNECTED = (1 << 0),
 +      NVME_RDMA_IB_QUEUE_ALLOCATED = (1 << 1),
 +      NVME_RDMA_Q_DELETING = (1 << 2),
  };
  
  struct nvme_rdma_queue {
@@@ -283,7 -285,7 +282,7 @@@ static int nvme_rdma_reinit_request(voi
        struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
        int ret = 0;
  
 -      if (!req->need_inval)
 +      if (!req->mr->need_inval)
                goto out;
  
        ib_dereg_mr(req->mr);
        if (IS_ERR(req->mr)) {
                ret = PTR_ERR(req->mr);
                req->mr = NULL;
 +              goto out;
        }
  
 -      req->need_inval = false;
 +      req->mr->need_inval = false;
  
  out:
        return ret;
@@@ -408,10 -409,7 +407,7 @@@ static void nvme_rdma_free_dev(struct k
        list_del(&ndev->entry);
        mutex_unlock(&device_list_mutex);
  
-       if (!register_always)
-               ib_dereg_mr(ndev->mr);
        ib_dealloc_pd(ndev->pd);
        kfree(ndev);
  }
  
@@@ -444,24 -442,16 +440,16 @@@ nvme_rdma_find_get_device(struct rdma_c
        ndev->dev = cm_id->device;
        kref_init(&ndev->ref);
  
-       ndev->pd = ib_alloc_pd(ndev->dev);
+       ndev->pd = ib_alloc_pd(ndev->dev,
+               register_always ? 0 : IB_PD_UNSAFE_GLOBAL_RKEY);
        if (IS_ERR(ndev->pd))
                goto out_free_dev;
  
-       if (!register_always) {
-               ndev->mr = ib_get_dma_mr(ndev->pd,
-                                           IB_ACCESS_LOCAL_WRITE |
-                                           IB_ACCESS_REMOTE_READ |
-                                           IB_ACCESS_REMOTE_WRITE);
-               if (IS_ERR(ndev->mr))
-                       goto out_free_pd;
-       }
        if (!(ndev->dev->attrs.device_cap_flags &
              IB_DEVICE_MEM_MGT_EXTENSIONS)) {
                dev_err(&ndev->dev->dev,
                        "Memory registrations not supported.\n");
-               goto out_free_mr;
+               goto out_free_pd;
        }
  
        list_add(&ndev->entry, &device_list);
@@@ -469,9 -459,6 +457,6 @@@ out_unlock
        mutex_unlock(&device_list_mutex);
        return ndev;
  
- out_free_mr:
-       if (!register_always)
-               ib_dereg_mr(ndev->mr);
  out_free_pd:
        ib_dealloc_pd(ndev->pd);
  out_free_dev:
@@@ -483,14 -470,9 +468,14 @@@ out_err
  
  static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
  {
 -      struct nvme_rdma_device *dev = queue->device;
 -      struct ib_device *ibdev = dev->dev;
 +      struct nvme_rdma_device *dev;
 +      struct ib_device *ibdev;
 +
 +      if (!test_and_clear_bit(NVME_RDMA_IB_QUEUE_ALLOCATED, &queue->flags))
 +              return;
  
 +      dev = queue->device;
 +      ibdev = dev->dev;
        rdma_destroy_qp(queue->cm_id);
        ib_free_cq(queue->ib_cq);
  
@@@ -541,7 -523,6 +526,7 @@@ static int nvme_rdma_create_queue_ib(st
                ret = -ENOMEM;
                goto out_destroy_qp;
        }
 +      set_bit(NVME_RDMA_IB_QUEUE_ALLOCATED, &queue->flags);
  
        return 0;
  
@@@ -594,13 -575,11 +579,13 @@@ static int nvme_rdma_init_queue(struct 
                goto out_destroy_cm_id;
        }
  
 +      clear_bit(NVME_RDMA_Q_DELETING, &queue->flags);
        set_bit(NVME_RDMA_Q_CONNECTED, &queue->flags);
  
        return 0;
  
  out_destroy_cm_id:
 +      nvme_rdma_destroy_queue_ib(queue);
        rdma_destroy_id(queue->cm_id);
        return ret;
  }
@@@ -619,7 -598,7 +604,7 @@@ static void nvme_rdma_free_queue(struc
  
  static void nvme_rdma_stop_and_free_queue(struct nvme_rdma_queue *queue)
  {
 -      if (!test_and_clear_bit(NVME_RDMA_Q_CONNECTED, &queue->flags))
 +      if (test_and_set_bit(NVME_RDMA_Q_DELETING, &queue->flags))
                return;
        nvme_rdma_stop_queue(queue);
        nvme_rdma_free_queue(queue);
@@@ -651,8 -630,7 +636,8 @@@ static int nvme_rdma_init_io_queues(str
        int i, ret;
  
        for (i = 1; i < ctrl->queue_count; i++) {
 -              ret = nvme_rdma_init_queue(ctrl, i, ctrl->ctrl.sqsize);
 +              ret = nvme_rdma_init_queue(ctrl, i,
 +                                         ctrl->ctrl.opts->queue_size);
                if (ret) {
                        dev_info(ctrl->ctrl.device,
                                "failed to initialize i/o queue: %d\n", ret);
        return 0;
  
  out_free_queues:
 -      for (; i >= 1; i--)
 +      for (i--; i >= 1; i--)
                nvme_rdma_stop_and_free_queue(&ctrl->queues[i]);
  
        return ret;
@@@ -772,13 -750,8 +757,13 @@@ static void nvme_rdma_error_recovery_wo
  {
        struct nvme_rdma_ctrl *ctrl = container_of(work,
                        struct nvme_rdma_ctrl, err_work);
 +      int i;
  
        nvme_stop_keep_alive(&ctrl->ctrl);
 +
 +      for (i = 0; i < ctrl->queue_count; i++)
 +              clear_bit(NVME_RDMA_Q_CONNECTED, &ctrl->queues[i].flags);
 +
        if (ctrl->queue_count > 1)
                nvme_stop_queues(&ctrl->ctrl);
        blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
@@@ -861,7 -834,7 +846,7 @@@ static void nvme_rdma_unmap_data(struc
        if (!blk_rq_bytes(rq))
                return;
  
 -      if (req->need_inval) {
 +      if (req->mr->need_inval) {
                res = nvme_rdma_inv_rkey(queue, req);
                if (res < 0) {
                        dev_err(ctrl->ctrl.device,
@@@ -915,7 -888,7 +900,7 @@@ static int nvme_rdma_map_sg_single(stru
  
        sg->addr = cpu_to_le64(sg_dma_address(req->sg_table.sgl));
        put_unaligned_le24(sg_dma_len(req->sg_table.sgl), sg->length);
-       put_unaligned_le32(queue->device->mr->rkey, sg->key);
+       put_unaligned_le32(queue->device->pd->unsafe_global_rkey, sg->key);
        sg->type = NVME_KEY_SGL_FMT_DATA_DESC << 4;
        return 0;
  }
@@@ -947,7 -920,7 +932,7 @@@ static int nvme_rdma_map_sg_fr(struct n
                             IB_ACCESS_REMOTE_READ |
                             IB_ACCESS_REMOTE_WRITE;
  
 -      req->need_inval = true;
 +      req->mr->need_inval = true;
  
        sg->addr = cpu_to_le64(req->mr->iova);
        put_unaligned_le24(req->mr->length, sg->length);
@@@ -970,7 -943,7 +955,7 @@@ static int nvme_rdma_map_data(struct nv
  
        req->num_sge = 1;
        req->inline_data = false;
 -      req->need_inval = false;
 +      req->mr->need_inval = false;
  
        c->common.flags |= NVME_CMD_SGL_METABUF;
  
                    nvme_rdma_queue_idx(queue))
                        return nvme_rdma_map_sg_inline(queue, req, c);
  
-               if (!register_always)
+               if (dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)
                        return nvme_rdma_map_sg_single(queue, req, c);
        }
  
@@@ -1157,7 -1130,7 +1142,7 @@@ static int nvme_rdma_process_nvme_rsp(s
  
        if ((wc->wc_flags & IB_WC_WITH_INVALIDATE) &&
            wc->ex.invalidate_rkey == req->mr->rkey)
 -              req->need_inval = false;
 +              req->mr->need_inval = false;
  
        blk_mq_complete_request(rq, status);
  
@@@ -1290,22 -1263,8 +1275,22 @@@ static int nvme_rdma_route_resolved(str
  
        priv.recfmt = cpu_to_le16(NVME_RDMA_CM_FMT_1_0);
        priv.qid = cpu_to_le16(nvme_rdma_queue_idx(queue));
 -      priv.hrqsize = cpu_to_le16(queue->queue_size);
 -      priv.hsqsize = cpu_to_le16(queue->queue_size);
 +      /*
 +       * set the admin queue depth to the minimum size
 +       * specified by the Fabrics standard.
 +       */
 +      if (priv.qid == 0) {
 +              priv.hrqsize = cpu_to_le16(NVMF_AQ_DEPTH);
 +              priv.hsqsize = cpu_to_le16(NVMF_AQ_DEPTH - 1);
 +      } else {
 +              /*
 +               * current interpretation of the fabrics spec
 +               * is at minimum you make hrqsize sqsize+1, or a
 +               * 1's based representation of sqsize.
 +               */
 +              priv.hrqsize = cpu_to_le16(queue->queue_size);
 +              priv.hsqsize = cpu_to_le16(queue->ctrl->ctrl.sqsize);
 +      }
  
        ret = rdma_connect(queue->cm_id, &param);
        if (ret) {
@@@ -1321,6 -1280,58 +1306,6 @@@ out_destroy_queue_ib
        return ret;
  }
  
 -/**
 - * nvme_rdma_device_unplug() - Handle RDMA device unplug
 - * @queue:      Queue that owns the cm_id that caught the event
 - *
 - * DEVICE_REMOVAL event notifies us that the RDMA device is about
 - * to unplug so we should take care of destroying our RDMA resources.
 - * This event will be generated for each allocated cm_id.
 - *
 - * In our case, the RDMA resources are managed per controller and not
 - * only per queue. So the way we handle this is we trigger an implicit
 - * controller deletion upon the first DEVICE_REMOVAL event we see, and
 - * hold the event inflight until the controller deletion is completed.
 - *
 - * One exception that we need to handle is the destruction of the cm_id
 - * that caught the event. Since we hold the callout until the controller
 - * deletion is completed, we'll deadlock if the controller deletion will
 - * call rdma_destroy_id on this queue's cm_id. Thus, we claim ownership
 - * of destroying this queue before-hand, destroy the queue resources,
 - * then queue the controller deletion which won't destroy this queue and
 - * we destroy the cm_id implicitely by returning a non-zero rc to the callout.
 - */
 -static int nvme_rdma_device_unplug(struct nvme_rdma_queue *queue)
 -{
 -      struct nvme_rdma_ctrl *ctrl = queue->ctrl;
 -      int ret;
 -
 -      /* Own the controller deletion */
 -      if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
 -              return 0;
 -
 -      dev_warn(ctrl->ctrl.device,
 -              "Got rdma device removal event, deleting ctrl\n");
 -
 -      /* Get rid of reconnect work if its running */
 -      cancel_delayed_work_sync(&ctrl->reconnect_work);
 -
 -      /* Disable the queue so ctrl delete won't free it */
 -      if (test_and_clear_bit(NVME_RDMA_Q_CONNECTED, &queue->flags)) {
 -              /* Free this queue ourselves */
 -              nvme_rdma_stop_queue(queue);
 -              nvme_rdma_destroy_queue_ib(queue);
 -
 -              /* Return non-zero so the cm_id will destroy implicitly */
 -              ret = 1;
 -      }
 -
 -      /* Queue controller deletion */
 -      queue_work(nvme_rdma_wq, &ctrl->delete_work);
 -      flush_work(&ctrl->delete_work);
 -      return ret;
 -}
 -
  static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
                struct rdma_cm_event *ev)
  {
                nvme_rdma_error_recovery(queue->ctrl);
                break;
        case RDMA_CM_EVENT_DEVICE_REMOVAL:
 -              /* return 1 means impliciy CM ID destroy */
 -              return nvme_rdma_device_unplug(queue);
 +              /* device removal is handled via the ib_client API */
 +              break;
        default:
                dev_err(queue->ctrl->ctrl.device,
                        "Unexpected RDMA CM event (%d)\n", ev->event);
@@@ -1435,7 -1446,7 +1420,7 @@@ static int nvme_rdma_queue_rq(struct bl
        if (rq->cmd_type == REQ_TYPE_FS && req_op(rq) == REQ_OP_FLUSH)
                flush = true;
        ret = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
 -                      req->need_inval ? &req->reg_wr.wr : NULL, flush);
 +                      req->mr->need_inval ? &req->reg_wr.wr : NULL, flush);
        if (ret) {
                nvme_rdma_unmap_data(queue, rq);
                goto err;
@@@ -1664,19 -1675,15 +1649,19 @@@ static int __nvme_rdma_del_ctrl(struct 
  static int nvme_rdma_del_ctrl(struct nvme_ctrl *nctrl)
  {
        struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
 -      int ret;
 +      int ret = 0;
  
 +      /*
 +       * Keep a reference until all work is flushed since
 +       * __nvme_rdma_del_ctrl can free the ctrl mem
 +       */
 +      if (!kref_get_unless_zero(&ctrl->ctrl.kref))
 +              return -EBUSY;
        ret = __nvme_rdma_del_ctrl(ctrl);
 -      if (ret)
 -              return ret;
 -
 -      flush_work(&ctrl->delete_work);
 -
 -      return 0;
 +      if (!ret)
 +              flush_work(&ctrl->delete_work);
 +      nvme_put_ctrl(&ctrl->ctrl);
 +      return ret;
  }
  
  static void nvme_rdma_remove_ctrl_work(struct work_struct *work)
@@@ -1794,7 -1801,7 +1779,7 @@@ static int nvme_rdma_create_io_queues(s
  
        memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set));
        ctrl->tag_set.ops = &nvme_rdma_mq_ops;
 -      ctrl->tag_set.queue_depth = ctrl->ctrl.sqsize;
 +      ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size;
        ctrl->tag_set.reserved_tags = 1; /* fabric connect */
        ctrl->tag_set.numa_node = NUMA_NO_NODE;
        ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
@@@ -1892,7 -1899,7 +1877,7 @@@ static struct nvme_ctrl *nvme_rdma_crea
        spin_lock_init(&ctrl->lock);
  
        ctrl->queue_count = opts->nr_io_queues + 1; /* +1 for admin queue */
 -      ctrl->ctrl.sqsize = opts->queue_size;
 +      ctrl->ctrl.sqsize = opts->queue_size - 1;
        ctrl->ctrl.kato = opts->kato;
  
        ret = -ENOMEM;
@@@ -1973,57 -1980,27 +1958,57 @@@ static struct nvmf_transport_ops nvme_r
        .create_ctrl    = nvme_rdma_create_ctrl,
  };
  
 +static void nvme_rdma_add_one(struct ib_device *ib_device)
 +{
 +}
 +
 +static void nvme_rdma_remove_one(struct ib_device *ib_device, void *client_data)
 +{
 +      struct nvme_rdma_ctrl *ctrl;
 +
 +      /* Delete all controllers using this device */
 +      mutex_lock(&nvme_rdma_ctrl_mutex);
 +      list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list) {
 +              if (ctrl->device->dev != ib_device)
 +                      continue;
 +              dev_info(ctrl->ctrl.device,
 +                      "Removing ctrl: NQN \"%s\", addr %pISp\n",
 +                      ctrl->ctrl.opts->subsysnqn, &ctrl->addr);
 +              __nvme_rdma_del_ctrl(ctrl);
 +      }
 +      mutex_unlock(&nvme_rdma_ctrl_mutex);
 +
 +      flush_workqueue(nvme_rdma_wq);
 +}
 +
 +static struct ib_client nvme_rdma_ib_client = {
 +      .name   = "nvme_rdma",
 +      .add = nvme_rdma_add_one,
 +      .remove = nvme_rdma_remove_one
 +};
 +
  static int __init nvme_rdma_init_module(void)
  {
 +      int ret;
 +
        nvme_rdma_wq = create_workqueue("nvme_rdma_wq");
        if (!nvme_rdma_wq)
                return -ENOMEM;
  
 +      ret = ib_register_client(&nvme_rdma_ib_client);
 +      if (ret) {
 +              destroy_workqueue(nvme_rdma_wq);
 +              return ret;
 +      }
 +
        nvmf_register_transport(&nvme_rdma_transport);
        return 0;
  }
  
  static void __exit nvme_rdma_cleanup_module(void)
  {
 -      struct nvme_rdma_ctrl *ctrl;
 -
        nvmf_unregister_transport(&nvme_rdma_transport);
 -
 -      mutex_lock(&nvme_rdma_ctrl_mutex);
 -      list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list)
 -              __nvme_rdma_del_ctrl(ctrl);
 -      mutex_unlock(&nvme_rdma_ctrl_mutex);
 -
 +      ib_unregister_client(&nvme_rdma_ib_client);
        destroy_workqueue(nvme_rdma_wq);
  }
  
index 1cbe6e053b5b89f2187ae520e9a7fe792dd8cafa,187763a773555601c3adcc3b02db26958b5feb1f..f8d23999e0f2c28b98934c6a790b3a6c2bde26d1
@@@ -848,7 -848,7 +848,7 @@@ nvmet_rdma_find_get_device(struct rdma_
        ndev->device = cm_id->device;
        kref_init(&ndev->ref);
  
-       ndev->pd = ib_alloc_pd(ndev->device);
+       ndev->pd = ib_alloc_pd(ndev->device, 0);
        if (IS_ERR(ndev->pd))
                goto out_free_dev;
  
@@@ -978,11 -978,10 +978,11 @@@ static void nvmet_rdma_release_queue_wo
                container_of(w, struct nvmet_rdma_queue, release_work);
        struct rdma_cm_id *cm_id = queue->cm_id;
        struct nvmet_rdma_device *dev = queue->dev;
 +      enum nvmet_rdma_queue_state state = queue->state;
  
        nvmet_rdma_free_queue(queue);
  
 -      if (queue->state != NVMET_RDMA_IN_DEVICE_REMOVAL)
 +      if (state != NVMET_RDMA_IN_DEVICE_REMOVAL)
                rdma_destroy_id(cm_id);
  
        kref_put(&dev->ref, nvmet_rdma_free_dev);
@@@ -1004,10 -1003,10 +1004,10 @@@ nvmet_rdma_parse_cm_connect_req(struct 
        queue->host_qid = le16_to_cpu(req->qid);
  
        /*
 -       * req->hsqsize corresponds to our recv queue size
 +       * req->hsqsize corresponds to our recv queue size plus 1
         * req->hrqsize corresponds to our send queue size
         */
 -      queue->recv_queue_size = le16_to_cpu(req->hsqsize);
 +      queue->recv_queue_size = le16_to_cpu(req->hsqsize) + 1;
        queue->send_queue_size = le16_to_cpu(req->hrqsize);
  
        if (!queue->host_qid && queue->recv_queue_size > NVMF_AQ_DEPTH)
index c7a5d49e487f0ca6b65653226127d38d26cee696,0e4c6090bf625dd1deaeea83db12f3ab0db39146..9e8802181452831c52b9a87428ccec02c353a806
@@@ -128,7 -128,6 +128,7 @@@ static int kiblnd_msgtype2size(int type
  static int kiblnd_unpack_rd(struct kib_msg *msg, int flip)
  {
        struct kib_rdma_desc *rd;
 +      int msg_size;
        int nob;
        int n;
        int i;
  
        n = rd->rd_nfrags;
  
 -      if (n <= 0 || n > IBLND_MAX_RDMA_FRAGS) {
 -              CERROR("Bad nfrags: %d, should be 0 < n <= %d\n",
 -                     n, IBLND_MAX_RDMA_FRAGS);
 -              return 1;
 -      }
 -
        nob = offsetof(struct kib_msg, ibm_u) +
              kiblnd_rd_msg_size(rd, msg->ibm_type, n);
  
                return 1;
        }
  
 +      msg_size = kiblnd_rd_size(rd);
 +      if (msg_size <= 0 || msg_size > LNET_MAX_PAYLOAD) {
 +              CERROR("Bad msg_size: %d, should be 0 < n <= %d\n",
 +                     msg_size, LNET_MAX_PAYLOAD);
 +              return 1;
 +      }
 +
        if (!flip)
                return 0;
  
@@@ -620,7 -618,7 +620,7 @@@ static int kiblnd_get_completion_vector
  }
  
  struct kib_conn *kiblnd_create_conn(struct kib_peer *peer, struct rdma_cm_id *cmid,
 -                             int state, int version)
 +                                  int state, int version)
  {
        /*
         * CAVEAT EMPTOR:
@@@ -2467,7 -2465,7 +2467,7 @@@ int kiblnd_dev_failover(struct kib_dev 
        hdev->ibh_cmid  = cmid;
        hdev->ibh_ibdev = cmid->device;
  
-       pd = ib_alloc_pd(cmid->device);
+       pd = ib_alloc_pd(cmid->device, 0);
        if (IS_ERR(pd)) {
                rc = PTR_ERR(pd);
                CERROR("Can't allocate PD: %d\n", rc);
index 59b50d3eedb46aa2db7d52dae8921790952bcebb,062d10aaf5cb65bf12752fe07cd750d9c9829a29..f6a16429735812f678f96595dff75e603504a8af
@@@ -71,7 -71,8 +71,8 @@@ enum 
        MLX4_FLAG_SLAVE         = 1 << 3,
        MLX4_FLAG_SRIOV         = 1 << 4,
        MLX4_FLAG_OLD_REG_MAC   = 1 << 6,
-       MLX4_FLAG_BONDED        = 1 << 7
+       MLX4_FLAG_BONDED        = 1 << 7,
+       MLX4_FLAG_SECURE_HOST   = 1 << 8,
  };
  
  enum {
@@@ -221,7 -222,7 +222,8 @@@ enum 
        MLX4_DEV_CAP_FLAG2_ROCE_V1_V2           = 1ULL <<  33,
        MLX4_DEV_CAP_FLAG2_DMFS_UC_MC_SNIFFER   = 1ULL <<  34,
        MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT        = 1ULL <<  35,
 -      MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT = 1ULL << 36,
 +      MLX4_DEV_CAP_FLAG2_SVLAN_BY_QP          = 1ULL <<  36,
++      MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT = 1ULL << 37,
  };
  
  enum {
@@@ -449,6 -450,7 +451,7 @@@ enum 
        MLX4_DEV_PMC_SUBTYPE_GUID_INFO   = 0x14,
        MLX4_DEV_PMC_SUBTYPE_PORT_INFO   = 0x15,
        MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE  = 0x16,
+       MLX4_DEV_PMC_SUBTYPE_SL_TO_VL_MAP = 0x17,
  };
  
  /* Port mgmt change event handling */
@@@ -460,6 -462,11 +463,11 @@@ enum 
        MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK        = 1 << 4,
  };
  
+ union sl2vl_tbl_to_u64 {
+       u8      sl8[8];
+       u64     sl64;
+ };
  enum {
        MLX4_DEVICE_STATE_UP                    = 1 << 0,
        MLX4_DEVICE_STATE_INTERNAL_ERROR        = 1 << 1,
@@@ -946,6 -953,9 +954,9 @@@ struct mlx4_eqe 
                                        __be32 block_ptr;
                                        __be32 tbl_entries_mask;
                                } __packed tbl_change_info;
+                               struct {
+                                       u8 sl2vl_table[8];
+                               } __packed sl2vl_tbl_change_info;
                        } params;
                } __packed port_mgmt_change;
                struct {
@@@ -1372,8 -1382,6 +1383,8 @@@ int mlx4_SET_PORT_fcs_check(struct mlx4
  int mlx4_SET_PORT_VXLAN(struct mlx4_dev *dev, u8 port, u8 steering, int enable);
  int set_phv_bit(struct mlx4_dev *dev, u8 port, int new_val);
  int get_phv_bit(struct mlx4_dev *dev, u8 port, int *phv);
 +int mlx4_get_is_vlan_offload_disabled(struct mlx4_dev *dev, u8 port,
 +                                    bool *vlan_offload_disabled);
  int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx);
  int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx);
  int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index);
diff --combined include/rdma/ib_verbs.h
index 9e935655fccb6a6431474e37ed50b9a41b6bf754,d3fba0a56e1707ef843ce3cbd24c9c178ddaeded..5ad43a487745d93103c27b68333ab507eab652e1
@@@ -261,6 -261,16 +261,16 @@@ struct ib_odp_caps 
        } per_transport_caps;
  };
  
+ struct ib_rss_caps {
+       /* Corresponding bit will be set if qp type from
+        * 'enum ib_qp_type' is supported, e.g.
+        * supported_qpts |= 1 << IB_QPT_UD
+        */
+       u32 supported_qpts;
+       u32 max_rwq_indirection_tables;
+       u32 max_rwq_indirection_table_size;
+ };
  enum ib_cq_creation_flags {
        IB_CQ_FLAGS_TIMESTAMP_COMPLETION   = 1 << 0,
        IB_CQ_FLAGS_IGNORE_OVERRUN         = 1 << 1,
@@@ -318,6 -328,8 +328,8 @@@ struct ib_device_attr 
        struct ib_odp_caps      odp_caps;
        uint64_t                timestamp_mask;
        uint64_t                hca_core_clock; /* in KHZ */
+       struct ib_rss_caps      rss_caps;
+       u32                     max_wq_type_rq;
  };
  
  enum ib_mtu {
@@@ -525,9 -537,11 +537,11 @@@ enum ib_device_modify_flags 
        IB_DEVICE_MODIFY_NODE_DESC      = 1 << 1
  };
  
+ #define IB_DEVICE_NODE_DESC_MAX 64
  struct ib_device_modify {
        u64     sys_image_guid;
-       char    node_desc[64];
+       char    node_desc[IB_DEVICE_NODE_DESC_MAX];
  };
  
  enum ib_port_modify_flags {
@@@ -1370,10 -1384,17 +1384,17 @@@ struct ib_udata 
  
  struct ib_pd {
        u32                     local_dma_lkey;
+       u32                     flags;
        struct ib_device       *device;
        struct ib_uobject      *uobject;
        atomic_t                usecnt; /* count all resources */
-       struct ib_mr           *local_mr;
+       u32                     unsafe_global_rkey;
+       /*
+        * Implementation details of the RDMA core, don't use in drivers:
+        */
+       struct ib_mr           *__internal_mr;
  };
  
  struct ib_xrcd {
@@@ -1604,6 -1625,8 +1625,8 @@@ struct ib_flow_eth_filter 
        u8      src_mac[6];
        __be16  ether_type;
        __be16  vlan_tag;
+       /* Must be last */
+       u8      real_sz[0];
  };
  
  struct ib_flow_spec_eth {
  struct ib_flow_ib_filter {
        __be16 dlid;
        __u8   sl;
+       /* Must be last */
+       u8      real_sz[0];
  };
  
  struct ib_flow_spec_ib {
        struct ib_flow_ib_filter mask;
  };
  
+ /* IPv4 header flags */
+ enum ib_ipv4_flags {
+       IB_IPV4_DONT_FRAG = 0x2, /* Don't enable packet fragmentation */
+       IB_IPV4_MORE_FRAG = 0X4  /* For All fragmented packets except the
+                                   last have this flag set */
+ };
  struct ib_flow_ipv4_filter {
        __be32  src_ip;
        __be32  dst_ip;
+       u8      proto;
+       u8      tos;
+       u8      ttl;
+       u8      flags;
+       /* Must be last */
+       u8      real_sz[0];
  };
  
  struct ib_flow_spec_ipv4 {
  struct ib_flow_ipv6_filter {
        u8      src_ip[16];
        u8      dst_ip[16];
+       __be32  flow_label;
+       u8      next_hdr;
+       u8      traffic_class;
+       u8      hop_limit;
+       /* Must be last */
+       u8      real_sz[0];
  };
  
  struct ib_flow_spec_ipv6 {
  struct ib_flow_tcp_udp_filter {
        __be16  dst_port;
        __be16  src_port;
+       /* Must be last */
+       u8      real_sz[0];
  };
  
  struct ib_flow_spec_tcp_udp {
@@@ -1739,14 -1785,6 +1785,14 @@@ struct ib_dma_mapping_ops 
        void            (*unmap_sg)(struct ib_device *dev,
                                    struct scatterlist *sg, int nents,
                                    enum dma_data_direction direction);
 +      int             (*map_sg_attrs)(struct ib_device *dev,
 +                                      struct scatterlist *sg, int nents,
 +                                      enum dma_data_direction direction,
 +                                      unsigned long attrs);
 +      void            (*unmap_sg_attrs)(struct ib_device *dev,
 +                                        struct scatterlist *sg, int nents,
 +                                        enum dma_data_direction direction,
 +                                        unsigned long attrs);
        void            (*sync_single_for_cpu)(struct ib_device *dev,
                                               u64 dma_handle,
                                               size_t size,
@@@ -2041,7 -2079,7 +2087,7 @@@ struct ib_device 
        u64                          uverbs_cmd_mask;
        u64                          uverbs_ex_cmd_mask;
  
-       char                         node_desc[64];
+       char                         node_desc[IB_DEVICE_NODE_DESC_MAX];
        __be64                       node_guid;
        u32                          local_dma_lkey;
        u16                          is_switch:1;
@@@ -2123,17 -2161,22 +2169,17 @@@ static inline bool ib_is_udata_cleared(
                                       size_t len)
  {
        const void __user *p = udata->inbuf + offset;
 -      bool ret = false;
 +      bool ret;
        u8 *buf;
  
        if (len > USHRT_MAX)
                return false;
  
 -      buf = kmalloc(len, GFP_KERNEL);
 -      if (!buf)
 +      buf = memdup_user(p, len);
 +      if (IS_ERR(buf))
                return false;
  
 -      if (copy_from_user(buf, p, len))
 -              goto free;
 -
        ret = !memchr_inv(buf, 0, len);
 -
 -free:
        kfree(buf);
        return ret;
  }
@@@ -2505,8 -2548,23 +2551,23 @@@ int ib_find_gid(struct ib_device *devic
  int ib_find_pkey(struct ib_device *device,
                 u8 port_num, u16 pkey, u16 *index);
  
- struct ib_pd *ib_alloc_pd(struct ib_device *device);
+ enum ib_pd_flags {
+       /*
+        * Create a memory registration for all memory in the system and place
+        * the rkey for it into pd->unsafe_global_rkey.  This can be used by
+        * ULPs to avoid the overhead of dynamic MRs.
+        *
+        * This flag is generally considered unsafe and must only be used in
+        * extremly trusted environments.  Every use of it will log a warning
+        * in the kernel log.
+        */
+       IB_PD_UNSAFE_GLOBAL_RKEY        = 0x01,
+ };
  
+ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
+               const char *caller);
+ #define ib_alloc_pd(device, flags) \
+       __ib_alloc_pd((device), (flags), __func__)
  void ib_dealloc_pd(struct ib_pd *pd);
  
  /**
@@@ -2859,18 -2917,6 +2920,6 @@@ static inline int ib_req_ncomp_notif(st
                -ENOSYS;
  }
  
- /**
-  * ib_get_dma_mr - Returns a memory region for system memory that is
-  *   usable for DMA.
-  * @pd: The protection domain associated with the memory region.
-  * @mr_access_flags: Specifies the memory access rights.
-  *
-  * Note that the ib_dma_*() functions defined below must be used
-  * to create/destroy addresses used with the Lkey or Rkey returned
-  * by ib_get_dma_mr().
-  */
- struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags);
  /**
   * ib_dma_mapping_error - check a DMA addr for error
   * @dev: The device for which the dma_addr was created
@@@ -3008,12 -3054,8 +3057,12 @@@ static inline int ib_dma_map_sg_attrs(s
                                      enum dma_data_direction direction,
                                      unsigned long dma_attrs)
  {
 -      return dma_map_sg_attrs(dev->dma_device, sg, nents, direction,
 -                              dma_attrs);
 +      if (dev->dma_ops)
 +              return dev->dma_ops->map_sg_attrs(dev, sg, nents, direction,
 +                                                dma_attrs);
 +      else
 +              return dma_map_sg_attrs(dev->dma_device, sg, nents, direction,
 +                                      dma_attrs);
  }
  
  static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev,
                                         enum dma_data_direction direction,
                                         unsigned long dma_attrs)
  {
 -      dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, dma_attrs);
 +      if (dev->dma_ops)
 +              return dev->dma_ops->unmap_sg_attrs(dev, sg, nents, direction,
 +                                                dma_attrs);
 +      else
 +              dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction,
 +                                 dma_attrs);
  }
  /**
   * ib_sg_dma_address - Return the DMA address from a scatter/gather entry
index 799cce6cbe45e89f4030d4cd1a17a15b56cf3cf2,6561d4a35acbe5f7bfeb7198f9f6b58d3bb5048b..be3178e5e2d24c4a46549060f464f3d3b4debd6a
@@@ -51,7 -51,6 +51,7 @@@
  #include <linux/slab.h>
  #include <linux/prefetch.h>
  #include <linux/sunrpc/addr.h>
 +#include <linux/sunrpc/svc_rdma.h>
  #include <asm/bitops.h>
  #include <linux/module.h> /* try_module_get()/module_put() */
  
@@@ -387,7 -386,7 +387,7 @@@ rpcrdma_ia_open(struct rpcrdma_xprt *xp
        }
        ia->ri_device = ia->ri_id->device;
  
-       ia->ri_pd = ib_alloc_pd(ia->ri_device);
+       ia->ri_pd = ib_alloc_pd(ia->ri_device, 0);
        if (IS_ERR(ia->ri_pd)) {
                rc = PTR_ERR(ia->ri_pd);
                pr_err("rpcrdma: ib_alloc_pd() returned %d\n", rc);
@@@ -924,7 -923,7 +924,7 @@@ rpcrdma_buffer_create(struct rpcrdma_xp
        }
  
        INIT_LIST_HEAD(&buf->rb_recv_bufs);
 -      for (i = 0; i < buf->rb_max_requests; i++) {
 +      for (i = 0; i < buf->rb_max_requests + RPCRDMA_MAX_BC_REQUESTS; i++) {
                struct rpcrdma_rep *rep;
  
                rep = rpcrdma_create_rep(r_xprt);
@@@ -1019,7 -1018,6 +1019,7 @@@ rpcrdma_buffer_destroy(struct rpcrdma_b
                rep = rpcrdma_buffer_get_rep_locked(buf);
                rpcrdma_destroy_rep(ia, rep);
        }
 +      buf->rb_send_count = 0;
  
        spin_lock(&buf->rb_reqslock);
        while (!list_empty(&buf->rb_allreqs)) {
                spin_lock(&buf->rb_reqslock);
        }
        spin_unlock(&buf->rb_reqslock);
 +      buf->rb_recv_count = 0;
  
        rpcrdma_destroy_mrs(buf);
  }
@@@ -1077,27 -1074,8 +1077,27 @@@ rpcrdma_put_mw(struct rpcrdma_xprt *r_x
        spin_unlock(&buf->rb_mwlock);
  }
  
 +static struct rpcrdma_rep *
 +rpcrdma_buffer_get_rep(struct rpcrdma_buffer *buffers)
 +{
 +      /* If an RPC previously completed without a reply (say, a
 +       * credential problem or a soft timeout occurs) then hold off
 +       * on supplying more Receive buffers until the number of new
 +       * pending RPCs catches up to the number of posted Receives.
 +       */
 +      if (unlikely(buffers->rb_send_count < buffers->rb_recv_count))
 +              return NULL;
 +
 +      if (unlikely(list_empty(&buffers->rb_recv_bufs)))
 +              return NULL;
 +      buffers->rb_recv_count++;
 +      return rpcrdma_buffer_get_rep_locked(buffers);
 +}
 +
  /*
   * Get a set of request/reply buffers.
 + *
 + * Reply buffer (if available) is attached to send buffer upon return.
   */
  struct rpcrdma_req *
  rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
        spin_lock(&buffers->rb_lock);
        if (list_empty(&buffers->rb_send_bufs))
                goto out_reqbuf;
 +      buffers->rb_send_count++;
        req = rpcrdma_buffer_get_req_locked(buffers);
 -      if (list_empty(&buffers->rb_recv_bufs))
 -              goto out_repbuf;
 -      req->rl_reply = rpcrdma_buffer_get_rep_locked(buffers);
 +      req->rl_reply = rpcrdma_buffer_get_rep(buffers);
        spin_unlock(&buffers->rb_lock);
        return req;
  
  out_reqbuf:
        spin_unlock(&buffers->rb_lock);
 -      pr_warn("rpcrdma: out of request buffers (%p)\n", buffers);
 -      return NULL;
 -out_repbuf:
 -      list_add(&req->rl_free, &buffers->rb_send_bufs);
 -      spin_unlock(&buffers->rb_lock);
 -      pr_warn("rpcrdma: out of reply buffers (%p)\n", buffers);
 +      pr_warn("RPC:       %s: out of request buffers\n", __func__);
        return NULL;
  }
  
@@@ -1133,12 -1117,9 +1133,12 @@@ rpcrdma_buffer_put(struct rpcrdma_req *
        req->rl_reply = NULL;
  
        spin_lock(&buffers->rb_lock);
 +      buffers->rb_send_count--;
        list_add_tail(&req->rl_free, &buffers->rb_send_bufs);
 -      if (rep)
 +      if (rep) {
 +              buffers->rb_recv_count--;
                list_add_tail(&rep->rr_list, &buffers->rb_recv_bufs);
 +      }
        spin_unlock(&buffers->rb_lock);
  }
  
@@@ -1152,7 -1133,8 +1152,7 @@@ rpcrdma_recv_buffer_get(struct rpcrdma_
        struct rpcrdma_buffer *buffers = req->rl_buffer;
  
        spin_lock(&buffers->rb_lock);
 -      if (!list_empty(&buffers->rb_recv_bufs))
 -              req->rl_reply = rpcrdma_buffer_get_rep_locked(buffers);
 +      req->rl_reply = rpcrdma_buffer_get_rep(buffers);
        spin_unlock(&buffers->rb_lock);
  }
  
@@@ -1166,7 -1148,6 +1166,7 @@@ rpcrdma_recv_buffer_put(struct rpcrdma_
        struct rpcrdma_buffer *buffers = &rep->rr_rxprt->rx_buf;
  
        spin_lock(&buffers->rb_lock);
 +      buffers->rb_recv_count--;
        list_add_tail(&rep->rr_list, &buffers->rb_recv_bufs);
        spin_unlock(&buffers->rb_lock);
  }