Merge tag 'wireless-drivers-for-davem-2017-04-03' of git://git.kernel.org/pub/scm...
authorDavid S. Miller <davem@davemloft.net>
Tue, 4 Apr 2017 17:28:38 +0000 (10:28 -0700)
committerDavid S. Miller <davem@davemloft.net>
Tue, 4 Apr 2017 17:28:38 +0000 (10:28 -0700)
Kalle Valo says:

====================
wireless-drivers fixes for 4.11

iwlwifi

* an RCU fix
* a fix for a potential out-of-bounds access crash
* a fix for IBSS which has been broken since DQA was enabled

rtlwifi

* fix scheduling while atomic regression

brcmfmac

* fix use-after-free bug found by KASAN
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
996 files changed:
Documentation/ABI/testing/sysfs-class-net-qmi
Documentation/devicetree/bindings/net/brcm,bcmgenet.txt
Documentation/devicetree/bindings/net/brcm,unimac-mdio.txt
Documentation/devicetree/bindings/net/marvell-pp2.txt
Documentation/devicetree/bindings/net/stmmac.txt
Documentation/networking/i40e.txt
Documentation/networking/ip-sysctl.txt
Documentation/networking/ipvs-sysctl.txt
Documentation/networking/mpls-sysctl.txt
MAINTAINERS
Makefile
arch/alpha/include/uapi/asm/socket.h
arch/arm/tools/syscall.tbl
arch/arm64/Kconfig
arch/arm64/include/asm/cpufeature.h
arch/arm64/kernel/cpuidle.c
arch/arm64/kernel/probes/kprobes.c
arch/arm64/mm/kasan_init.c
arch/avr32/include/uapi/asm/socket.h
arch/frv/include/uapi/asm/socket.h
arch/ia64/include/uapi/asm/socket.h
arch/m32r/include/uapi/asm/socket.h
arch/mips/include/uapi/asm/socket.h
arch/mn10300/include/uapi/asm/socket.h
arch/openrisc/include/asm/cmpxchg.h
arch/openrisc/include/asm/uaccess.h
arch/openrisc/kernel/or32_ksyms.c
arch/openrisc/kernel/process.c
arch/parisc/include/asm/cacheflush.h
arch/parisc/include/asm/uaccess.h
arch/parisc/include/uapi/asm/socket.h
arch/parisc/include/uapi/asm/unistd.h
arch/parisc/kernel/cache.c
arch/parisc/kernel/module.c
arch/parisc/kernel/perf.c
arch/parisc/kernel/process.c
arch/parisc/kernel/syscall_table.S
arch/powerpc/include/asm/systbl.h
arch/powerpc/include/asm/unistd.h
arch/powerpc/include/uapi/asm/socket.h
arch/powerpc/include/uapi/asm/unistd.h
arch/powerpc/platforms/pseries/lpar.c
arch/s390/include/uapi/asm/socket.h
arch/sparc/include/uapi/asm/socket.h
arch/x86/events/core.c
arch/x86/include/asm/pgtable-3level.h
arch/x86/include/asm/pgtable.h
arch/x86/kernel/acpi/boot.c
arch/x86/kernel/apic/apic.c
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
arch/x86/kernel/head64.c
arch/x86/kernel/nmi.c
arch/x86/kernel/tsc.c
arch/x86/kernel/unwind_frame.c
arch/x86/mm/kasan_init_64.c
arch/x86/mm/mpx.c
arch/x86/platform/intel-mid/device_libs/Makefile
arch/x86/platform/intel-mid/device_libs/platform_mrfld_power_btn.c [new file with mode: 0644]
arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c
arch/x86/platform/intel-mid/mfld.c
arch/xtensa/include/uapi/asm/socket.h
block/bio.c
block/blk-core.c
block/blk-mq-tag.c
block/blk-mq.c
drivers/acpi/acpi_processor.c
drivers/acpi/bus.c
drivers/acpi/processor_core.c
drivers/atm/ambassador.c
drivers/base/core.c
drivers/bluetooth/Kconfig
drivers/bluetooth/btqcomsmd.c
drivers/clocksource/tcb_clksrc.c
drivers/cpufreq/cpufreq.c
drivers/cpufreq/intel_pstate.c
drivers/dax/dax.c
drivers/gpio/gpio-altera-a10sr.c
drivers/gpio/gpio-altera.c
drivers/gpio/gpio-mcp23s08.c
drivers/gpio/gpio-mockup.c
drivers/gpio/gpio-xgene.c
drivers/gpu/drm/amd/acp/Makefile
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
drivers/gpu/drm/amd/amdgpu/si_dpm.c
drivers/gpu/drm/amd/amdgpu/vi.c
drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c
drivers/gpu/drm/arm/malidp_crtc.c
drivers/gpu/drm/arm/malidp_hw.c
drivers/gpu/drm/arm/malidp_planes.c
drivers/gpu/drm/arm/malidp_regs.h
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_gem_evict.c
drivers/gpu/drm/i915/i915_gem_object.h
drivers/gpu/drm/i915/i915_vma.c
drivers/gpu/drm/i915/intel_display.c
drivers/gpu/drm/i915/intel_fbdev.c
drivers/gpu/drm/i915/intel_pm.c
drivers/gpu/drm/i915/intel_sprite.c
drivers/gpu/drm/i915/intel_uncore.c
drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c
drivers/gpu/drm/radeon/si_dpm.c
drivers/gpu/drm/tilcdc/tilcdc_crtc.c
drivers/hid/Kconfig
drivers/hid/hid-chicony.c
drivers/hid/hid-core.c
drivers/hid/hid-corsair.c
drivers/hid/hid-ids.h
drivers/hid/hid-sony.c
drivers/hid/usbhid/hid-quirks.c
drivers/hid/wacom_sys.c
drivers/hid/wacom_wac.c
drivers/hv/ring_buffer.c
drivers/infiniband/hw/nes/nes.h
drivers/infiniband/hw/qedr/main.c
drivers/infiniband/hw/qedr/qedr.h
drivers/infiniband/hw/qedr/qedr_cm.c
drivers/infiniband/hw/qedr/qedr_hsi.h [deleted file]
drivers/infiniband/hw/qedr/verbs.c
drivers/isdn/divert/isdn_divert.c
drivers/isdn/hardware/eicon/divasi.c
drivers/isdn/hardware/mISDN/Kconfig
drivers/isdn/hardware/mISDN/hfc_multi_8xx.h
drivers/isdn/hardware/mISDN/hfcmulti.c
drivers/isdn/hardware/mISDN/hfcpci.c
drivers/isdn/hardware/mISDN/mISDNipac.c
drivers/isdn/hardware/mISDN/mISDNisar.c
drivers/isdn/hardware/mISDN/w6692.c
drivers/isdn/hisax/amd7930_fn.c
drivers/isdn/hisax/arcofi.c
drivers/isdn/hisax/diva.c
drivers/isdn/hisax/elsa.c
drivers/isdn/hisax/fsm.c
drivers/isdn/hisax/hfc4s8s_l1.c
drivers/isdn/hisax/hfc_2bds0.c
drivers/isdn/hisax/hfc_pci.c
drivers/isdn/hisax/hfc_sx.c
drivers/isdn/hisax/hfc_usb.c
drivers/isdn/hisax/hfcscard.c
drivers/isdn/hisax/icc.c
drivers/isdn/hisax/ipacx.c
drivers/isdn/hisax/isac.c
drivers/isdn/hisax/isar.c
drivers/isdn/hisax/isdnl3.c
drivers/isdn/hisax/teleint.c
drivers/isdn/hisax/w6692.c
drivers/isdn/i4l/isdn_ppp.c
drivers/isdn/i4l/isdn_tty.c
drivers/isdn/mISDN/dsp_core.c
drivers/isdn/mISDN/fsm.c
drivers/isdn/mISDN/l1oip_core.c
drivers/md/dm.c
drivers/md/md-cluster.c
drivers/md/md.c
drivers/md/md.h
drivers/md/raid1.c
drivers/md/raid10.c
drivers/md/raid5.c
drivers/net/Makefile
drivers/net/bonding/bond_3ad.c
drivers/net/bonding/bond_main.c
drivers/net/cris/eth_v10.c
drivers/net/dsa/Kconfig
drivers/net/dsa/Makefile
drivers/net/dsa/bcm_sf2_cfp.c
drivers/net/dsa/dsa_loop.c [new file with mode: 0644]
drivers/net/dsa/dsa_loop.h [new file with mode: 0644]
drivers/net/dsa/dsa_loop_bdinfo.c [new file with mode: 0644]
drivers/net/dsa/mv88e6xxx/Makefile
drivers/net/dsa/mv88e6xxx/chip.c
drivers/net/dsa/mv88e6xxx/global1.c
drivers/net/dsa/mv88e6xxx/global1.h
drivers/net/dsa/mv88e6xxx/global1_atu.c [new file with mode: 0644]
drivers/net/dsa/mv88e6xxx/global2.c
drivers/net/dsa/mv88e6xxx/global2.h
drivers/net/dsa/mv88e6xxx/mv88e6xxx.h
drivers/net/dsa/mv88e6xxx/port.c
drivers/net/dsa/mv88e6xxx/port.h
drivers/net/dummy.c
drivers/net/ethernet/Kconfig
drivers/net/ethernet/Makefile
drivers/net/ethernet/adi/bfin_mac.h
drivers/net/ethernet/aeroflex/greth.c
drivers/net/ethernet/amd/xgbe/xgbe-common.h
drivers/net/ethernet/amd/xgbe/xgbe-drv.c
drivers/net/ethernet/amd/xgbe/xgbe-i2c.c
drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
drivers/net/ethernet/apm/Kconfig
drivers/net/ethernet/apm/Makefile
drivers/net/ethernet/apm/xgene-v2/Kconfig [new file with mode: 0644]
drivers/net/ethernet/apm/xgene-v2/Makefile [new file with mode: 0644]
drivers/net/ethernet/apm/xgene-v2/enet.c [new file with mode: 0644]
drivers/net/ethernet/apm/xgene-v2/enet.h [new file with mode: 0644]
drivers/net/ethernet/apm/xgene-v2/ethtool.c [new file with mode: 0644]
drivers/net/ethernet/apm/xgene-v2/mac.c [new file with mode: 0644]
drivers/net/ethernet/apm/xgene-v2/mac.h [new file with mode: 0644]
drivers/net/ethernet/apm/xgene-v2/main.c [new file with mode: 0644]
drivers/net/ethernet/apm/xgene-v2/main.h [new file with mode: 0644]
drivers/net/ethernet/apm/xgene-v2/mdio.c [new file with mode: 0644]
drivers/net/ethernet/apm/xgene-v2/ring.c [new file with mode: 0644]
drivers/net/ethernet/apm/xgene-v2/ring.h [new file with mode: 0644]
drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
drivers/net/ethernet/apm/xgene/xgene_enet_hw.h
drivers/net/ethernet/apm/xgene/xgene_enet_main.c
drivers/net/ethernet/apm/xgene/xgene_enet_main.h
drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c
drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h
drivers/net/ethernet/aquantia/atlantic/aq_main.c
drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0_internal.h
drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h
drivers/net/ethernet/broadcom/Kconfig
drivers/net/ethernet/broadcom/bcmsysport.c
drivers/net/ethernet/broadcom/bcmsysport.h
drivers/net/ethernet/broadcom/bgmac-bcma.c
drivers/net/ethernet/broadcom/bgmac-platform.c
drivers/net/ethernet/broadcom/bgmac.c
drivers/net/ethernet/broadcom/bgmac.h
drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
drivers/net/ethernet/broadcom/bnxt/bnxt.c
drivers/net/ethernet/broadcom/bnxt/bnxt.h
drivers/net/ethernet/broadcom/genet/bcmgenet.c
drivers/net/ethernet/broadcom/genet/bcmgenet.h
drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c
drivers/net/ethernet/broadcom/genet/bcmmii.c
drivers/net/ethernet/broadcom/tg3.c
drivers/net/ethernet/brocade/bna/bnad_debugfs.c
drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h
drivers/net/ethernet/cavium/liquidio/lio_core.c
drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
drivers/net/ethernet/cavium/liquidio/lio_main.c
drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
drivers/net/ethernet/cavium/liquidio/liquidio_common.h
drivers/net/ethernet/cavium/liquidio/octeon_device.c
drivers/net/ethernet/cavium/liquidio/octeon_device.h
drivers/net/ethernet/cavium/liquidio/octeon_droq.c
drivers/net/ethernet/cavium/liquidio/octeon_droq.h
drivers/net/ethernet/cavium/liquidio/octeon_iq.h
drivers/net/ethernet/cavium/liquidio/octeon_network.h
drivers/net/ethernet/cavium/liquidio/request_manager.c
drivers/net/ethernet/cavium/liquidio/response_manager.c
drivers/net/ethernet/cavium/liquidio/response_manager.h
drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
drivers/net/ethernet/chelsio/cxgb4/t4_values.h
drivers/net/ethernet/ethoc.c
drivers/net/ethernet/ezchip/nps_enet.c
drivers/net/ethernet/faraday/ftgmac100.c
drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
drivers/net/ethernet/freescale/dpaa/dpaa_eth.h
drivers/net/ethernet/freescale/fman/fman.c
drivers/net/ethernet/freescale/fman/fman.h
drivers/net/ethernet/freescale/fman/fman_dtsec.c
drivers/net/ethernet/freescale/fman/fman_memac.c
drivers/net/ethernet/freescale/fman/fman_memac.h
drivers/net/ethernet/freescale/fman/fman_port.c
drivers/net/ethernet/freescale/fs_enet/mac-fec.c
drivers/net/ethernet/freescale/fs_enet/mac-scc.c
drivers/net/ethernet/hisilicon/hns/hnae.c
drivers/net/ethernet/hisilicon/hns/hnae.h
drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h
drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c
drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c
drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h
drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c
drivers/net/ethernet/hisilicon/hns/hns_enet.c
drivers/net/ethernet/hisilicon/hns/hns_enet.h
drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
drivers/net/ethernet/hisilicon/hns_mdio.c
drivers/net/ethernet/ibm/ibmveth.h
drivers/net/ethernet/ibm/ibmvnic.c
drivers/net/ethernet/ibm/ibmvnic.h
drivers/net/ethernet/intel/Kconfig
drivers/net/ethernet/intel/e1000/e1000_ethtool.c
drivers/net/ethernet/intel/e1000e/ethtool.c
drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
drivers/net/ethernet/intel/fm10k/fm10k_pci.c
drivers/net/ethernet/intel/i40e/Makefile
drivers/net/ethernet/intel/i40e/i40e.h
drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
drivers/net/ethernet/intel/i40e/i40e_client.c
drivers/net/ethernet/intel/i40e/i40e_client.h
drivers/net/ethernet/intel/i40e/i40e_common.c
drivers/net/ethernet/intel/i40e/i40e_debugfs.c
drivers/net/ethernet/intel/i40e/i40e_ethtool.c
drivers/net/ethernet/intel/i40e/i40e_main.c
drivers/net/ethernet/intel/i40e/i40e_nvm.c
drivers/net/ethernet/intel/i40e/i40e_osdep.h
drivers/net/ethernet/intel/i40e/i40e_prototype.h
drivers/net/ethernet/intel/i40e/i40e_txrx.c
drivers/net/ethernet/intel/i40e/i40e_txrx.h
drivers/net/ethernet/intel/i40e/i40e_type.h
drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
drivers/net/ethernet/intel/i40evf/Makefile
drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h
drivers/net/ethernet/intel/i40evf/i40e_common.c
drivers/net/ethernet/intel/i40evf/i40e_txrx.c
drivers/net/ethernet/intel/i40evf/i40e_txrx.h
drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h
drivers/net/ethernet/intel/i40evf/i40evf.h
drivers/net/ethernet/intel/i40evf/i40evf_client.c [new file with mode: 0644]
drivers/net/ethernet/intel/i40evf/i40evf_client.h [new file with mode: 0644]
drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
drivers/net/ethernet/intel/i40evf/i40evf_main.c
drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
drivers/net/ethernet/intel/igb/igb.h
drivers/net/ethernet/intel/igb/igb_ethtool.c
drivers/net/ethernet/intel/igb/igb_main.c
drivers/net/ethernet/intel/igb/igb_ptp.c
drivers/net/ethernet/intel/igbvf/ethtool.c
drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c
drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
drivers/net/ethernet/marvell/Kconfig
drivers/net/ethernet/marvell/mvneta.c
drivers/net/ethernet/marvell/mvpp2.c
drivers/net/ethernet/mediatek/mtk_eth_soc.c
drivers/net/ethernet/mellanox/mlx4/cmd.c
drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
drivers/net/ethernet/mellanox/mlx4/en_netdev.c
drivers/net/ethernet/mellanox/mlx4/en_port.c
drivers/net/ethernet/mellanox/mlx4/en_rx.c
drivers/net/ethernet/mellanox/mlx4/en_selftest.c
drivers/net/ethernet/mellanox/mlx4/en_tx.c
drivers/net/ethernet/mellanox/mlx4/main.c
drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h
drivers/net/ethernet/mellanox/mlx5/core/cmd.c
drivers/net/ethernet/mellanox/mlx5/core/en.h
drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
drivers/net/ethernet/mellanox/mlx5/core/en_common.c
drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c
drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
drivers/net/ethernet/mellanox/mlx5/core/main.c
drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
drivers/net/ethernet/mellanox/mlxsw/Makefile
drivers/net/ethernet/mellanox/mlxsw/cmd.h
drivers/net/ethernet/mellanox/mlxsw/core.c
drivers/net/ethernet/mellanox/mlxsw/core.h
drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h
drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h
drivers/net/ethernet/mellanox/mlxsw/pci.c
drivers/net/ethernet/mellanox/mlxsw/port.h
drivers/net/ethernet/mellanox/mlxsw/reg.h
drivers/net/ethernet/mellanox/mlxsw/resources.h
drivers/net/ethernet/mellanox/mlxsw/spectrum.c
drivers/net/ethernet/mellanox/mlxsw/spectrum.h
drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h
drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
drivers/net/ethernet/mellanox/mlxsw/switchx2.c
drivers/net/ethernet/micrel/ks8851.c
drivers/net/ethernet/netronome/nfp/Makefile
drivers/net/ethernet/netronome/nfp/nfp_net.h
drivers/net/ethernet/netronome/nfp/nfp_net_common.c
drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c
drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
drivers/net/ethernet/netronome/nfp/nfp_net_main.c
drivers/net/ethernet/netronome/nfp/nfp_net_offload.c
drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c
drivers/net/ethernet/netronome/nfp/nfpcore/nfp.h
drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c
drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c
drivers/net/ethernet/netronome/nfp/nfpcore/nfp_mutex.c [new file with mode: 0644]
drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c
drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c
drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.h
drivers/net/ethernet/netronome/nfp/nfpcore/nfp_resource.c
drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c
drivers/net/ethernet/qlogic/qed/qed.h
drivers/net/ethernet/qlogic/qed/qed_cxt.c
drivers/net/ethernet/qlogic/qed/qed_cxt.h
drivers/net/ethernet/qlogic/qed/qed_dcbx.c
drivers/net/ethernet/qlogic/qed/qed_dcbx.h
drivers/net/ethernet/qlogic/qed/qed_debug.c
drivers/net/ethernet/qlogic/qed/qed_dev.c
drivers/net/ethernet/qlogic/qed/qed_dev_api.h
drivers/net/ethernet/qlogic/qed/qed_fcoe.c
drivers/net/ethernet/qlogic/qed/qed_hsi.h
drivers/net/ethernet/qlogic/qed/qed_hw.c
drivers/net/ethernet/qlogic/qed/qed_hw.h
drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c
drivers/net/ethernet/qlogic/qed/qed_init_ops.c
drivers/net/ethernet/qlogic/qed/qed_int.c
drivers/net/ethernet/qlogic/qed/qed_iscsi.c
drivers/net/ethernet/qlogic/qed/qed_l2.c
drivers/net/ethernet/qlogic/qed/qed_ll2.c
drivers/net/ethernet/qlogic/qed/qed_main.c
drivers/net/ethernet/qlogic/qed/qed_mcp.c
drivers/net/ethernet/qlogic/qed/qed_mcp.h
drivers/net/ethernet/qlogic/qed/qed_ptp.c
drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
drivers/net/ethernet/qlogic/qed/qed_roce.c
drivers/net/ethernet/qlogic/qed/qed_roce.h
drivers/net/ethernet/qlogic/qed/qed_spq.c
drivers/net/ethernet/qlogic/qed/qed_sriov.c
drivers/net/ethernet/qlogic/qed/qed_sriov.h
drivers/net/ethernet/qlogic/qed/qed_vf.c
drivers/net/ethernet/qlogic/qed/qed_vf.h
drivers/net/ethernet/qlogic/qede/qede.h
drivers/net/ethernet/qlogic/qede/qede_ethtool.c
drivers/net/ethernet/qlogic/qede/qede_main.c
drivers/net/ethernet/qualcomm/emac/emac-sgmii-qdf2400.c
drivers/net/ethernet/qualcomm/emac/emac-sgmii.c
drivers/net/ethernet/realtek/8139cp.c
drivers/net/ethernet/realtek/8139too.c
drivers/net/ethernet/realtek/r8169.c
drivers/net/ethernet/rocker/rocker_main.c
drivers/net/ethernet/sfc/efx.c
drivers/net/ethernet/sfc/falcon/tx.c
drivers/net/ethernet/sfc/tx.c
drivers/net/ethernet/sgi/ioc3-eth.c
drivers/net/ethernet/silan/sc92031.c
drivers/net/ethernet/sis/sis190.c
drivers/net/ethernet/sis/sis900.c
drivers/net/ethernet/smsc/epic100.c
drivers/net/ethernet/smsc/smc911x.c
drivers/net/ethernet/smsc/smc91c92_cs.c
drivers/net/ethernet/stmicro/stmmac/common.h
drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c
drivers/net/ethernet/stmicro/stmmac/dwmac4.h
drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h
drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h
drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
drivers/net/ethernet/sun/cassini.c
drivers/net/ethernet/sun/ldmvsw.c
drivers/net/ethernet/sun/niu.c
drivers/net/ethernet/sun/sungem.c
drivers/net/ethernet/sun/sunhme.c
drivers/net/ethernet/sun/sunvnet.c
drivers/net/ethernet/sun/sunvnet_common.c
drivers/net/ethernet/sun/sunvnet_common.h
drivers/net/ethernet/synopsys/Kconfig [new file with mode: 0644]
drivers/net/ethernet/synopsys/Makefile [new file with mode: 0644]
drivers/net/ethernet/synopsys/dwc-xlgmac-common.c [new file with mode: 0644]
drivers/net/ethernet/synopsys/dwc-xlgmac-desc.c [new file with mode: 0644]
drivers/net/ethernet/synopsys/dwc-xlgmac-hw.c [new file with mode: 0644]
drivers/net/ethernet/synopsys/dwc-xlgmac-net.c [new file with mode: 0644]
drivers/net/ethernet/synopsys/dwc-xlgmac-pci.c [new file with mode: 0644]
drivers/net/ethernet/synopsys/dwc-xlgmac-reg.h [new file with mode: 0644]
drivers/net/ethernet/synopsys/dwc-xlgmac.h [new file with mode: 0644]
drivers/net/ethernet/tehuti/tehuti.c
drivers/net/ethernet/ti/Kconfig
drivers/net/ethernet/ti/Makefile
drivers/net/ethernet/ti/netcp_core.c
drivers/net/ethernet/toshiba/ps3_gelic_net.c
drivers/net/ethernet/toshiba/spider_net_ethtool.c
drivers/net/ethernet/tundra/tsi108_eth.c
drivers/net/ethernet/via/via-rhine.c
drivers/net/ethernet/via/via-velocity.c
drivers/net/ethernet/xilinx/xilinx_axienet_main.c
drivers/net/fjes/fjes_ethtool.c
drivers/net/fjes/fjes_main.c
drivers/net/gtp.c
drivers/net/hyperv/hyperv_net.h
drivers/net/hyperv/netvsc.c
drivers/net/hyperv/netvsc_drv.c
drivers/net/hyperv/rndis_filter.c
drivers/net/ieee802154/mrf24j40.c
drivers/net/loopback.c
drivers/net/ntb_netdev.c
drivers/net/phy/Kconfig
drivers/net/phy/Makefile
drivers/net/phy/bcm-phy-lib.c
drivers/net/phy/bcm7xxx.c
drivers/net/phy/dp83867.c
drivers/net/phy/intel-xway.c
drivers/net/phy/mdio-bcm-unimac.c
drivers/net/phy/mdio-boardinfo.c
drivers/net/phy/mdio-boardinfo.h
drivers/net/phy/mdio-xgene.c
drivers/net/phy/mdio_bus.c
drivers/net/phy/micrel.c
drivers/net/phy/microchip.c
drivers/net/phy/phy-core.c [new file with mode: 0644]
drivers/net/phy/phy.c
drivers/net/phy/phy_device.c
drivers/net/phy/smsc.c
drivers/net/tun.c
drivers/net/usb/asix_devices.c
drivers/net/usb/ax88172a.c
drivers/net/usb/ax88179_178a.c
drivers/net/usb/catc.c
drivers/net/usb/cdc_mbim.c
drivers/net/usb/cdc_ncm.c
drivers/net/usb/dm9601.c
drivers/net/usb/int51x1.c
drivers/net/usb/lan78xx.c
drivers/net/usb/mcs7830.c
drivers/net/usb/pegasus.c
drivers/net/usb/qmi_wwan.c
drivers/net/usb/r8152.c
drivers/net/usb/rndis_host.c
drivers/net/usb/rtl8150.c
drivers/net/usb/sierra_net.c
drivers/net/usb/smsc75xx.c
drivers/net/usb/smsc95xx.c
drivers/net/usb/sr9700.c
drivers/net/usb/sr9800.c
drivers/net/usb/usbnet.c
drivers/net/veth.c
drivers/net/virtio_net.c
drivers/net/vmxnet3/vmxnet3_ethtool.c
drivers/net/vrf.c
drivers/net/vxlan.c
drivers/net/wireless/ath/wcn36xx/Kconfig
drivers/net/wireless/ath/wcn36xx/main.c
drivers/net/wireless/ath/wcn36xx/smd.c
drivers/net/wireless/ath/wcn36xx/smd.h
drivers/net/wireless/ath/wcn36xx/wcn36xx.h
drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
drivers/net/wireless/rndis_wlan.c
drivers/net/wireless/st/cw1200/cw1200_sdio.c
drivers/remoteproc/Kconfig
drivers/rpmsg/Kconfig
drivers/scsi/Kconfig
drivers/scsi/hpsa.c
drivers/scsi/hpsa.h
drivers/scsi/hpsa_cmd.h
drivers/scsi/lpfc/lpfc_attr.c
drivers/scsi/lpfc/lpfc_init.c
drivers/scsi/lpfc/lpfc_nvme.c
drivers/scsi/lpfc/lpfc_nvmet.c
drivers/scsi/megaraid/megaraid_sas.h
drivers/scsi/megaraid/megaraid_sas_base.c
drivers/scsi/megaraid/megaraid_sas_fusion.c
drivers/scsi/qedf/Makefile
drivers/scsi/qedf/drv_fcoe_fw_funcs.c [new file with mode: 0644]
drivers/scsi/qedf/drv_fcoe_fw_funcs.h [new file with mode: 0644]
drivers/scsi/qedf/drv_scsi_fw_funcs.c [new file with mode: 0644]
drivers/scsi/qedf/drv_scsi_fw_funcs.h [new file with mode: 0644]
drivers/scsi/qedf/qedf.h
drivers/scsi/qedf/qedf_els.c
drivers/scsi/qedf/qedf_io.c
drivers/scsi/qedi/Makefile
drivers/scsi/qedi/qedi_fw.c
drivers/scsi/qedi/qedi_fw_api.c [new file with mode: 0644]
drivers/scsi/qedi/qedi_fw_iscsi.h [new file with mode: 0644]
drivers/scsi/qedi/qedi_fw_scsi.h [new file with mode: 0644]
drivers/scsi/qedi/qedi_iscsi.c
drivers/scsi/qedi/qedi_iscsi.h
drivers/scsi/qedi/qedi_version.h
drivers/scsi/qla2xxx/Kconfig
drivers/scsi/qla2xxx/qla_attr.c
drivers/scsi/qla2xxx/qla_dbg.h
drivers/scsi/qla2xxx/qla_def.h
drivers/scsi/qla2xxx/qla_dfs.c
drivers/scsi/qla2xxx/qla_gbl.h
drivers/scsi/qla2xxx/qla_init.c
drivers/scsi/qla2xxx/qla_iocb.c
drivers/scsi/qla2xxx/qla_isr.c
drivers/scsi/qla2xxx/qla_mbx.c
drivers/scsi/qla2xxx/qla_mid.c
drivers/scsi/qla2xxx/qla_os.c
drivers/scsi/qla2xxx/qla_target.c
drivers/scsi/qla2xxx/qla_target.h
drivers/scsi/qla2xxx/qla_version.h
drivers/scsi/qla2xxx/tcm_qla2xxx.c
drivers/scsi/ufs/ufshcd.c
drivers/soc/qcom/Kconfig
drivers/soc/qcom/Makefile
drivers/soc/qcom/smd-rpm.c
drivers/soc/qcom/smd.c [deleted file]
drivers/soc/qcom/wcnss_ctrl.c
drivers/target/target_core_alua.c
drivers/target/target_core_configfs.c
drivers/target/target_core_pscsi.c
drivers/target/target_core_sbc.c
drivers/target/target_core_tpg.c
drivers/target/target_core_transport.c
drivers/target/target_core_user.c
drivers/tty/serial/st-asc.c
drivers/usb/gadget/function/f_ncm.c
drivers/vhost/vsock.c
drivers/xen/gntdev.c
fs/afs/callback.c
fs/afs/cmservice.c
fs/afs/file.c
fs/afs/fsclient.c
fs/afs/inode.c
fs/afs/internal.h
fs/afs/misc.c
fs/afs/mntpt.c
fs/afs/rxrpc.c
fs/afs/security.c
fs/afs/server.c
fs/afs/vlocation.c
fs/afs/write.c
fs/btrfs/extent_io.c
fs/btrfs/inode.c
fs/cifs/cifsfs.c
fs/cifs/connect.c
fs/cifs/smb2pdu.c
fs/eventpoll.c
fs/f2fs/debug.c
fs/f2fs/dir.c
fs/f2fs/f2fs.h
fs/f2fs/node.c
fs/f2fs/segment.c
fs/fs-writeback.c
fs/nfs/callback.c
fs/nfs/client.c
fs/nfs/filelayout/filelayoutdev.c
fs/nfs/flexfilelayout/flexfilelayout.h
fs/nfs/flexfilelayout/flexfilelayoutdev.c
fs/nfs/internal.h
fs/nfs/nfs4client.c
fs/nfs/nfs4proc.c
fs/nfs/nfs4xdr.c
fs/nfs/pnfs.h
fs/nfs/pnfs_nfs.c
fs/nfs/write.c
fs/select.c
fs/xfs/libxfs/xfs_dir2_priv.h
fs/xfs/libxfs/xfs_dir2_sf.c
fs/xfs/libxfs/xfs_inode_fork.c
fs/xfs/libxfs/xfs_inode_fork.h
fs/xfs/xfs_dir2_readdir.c
fs/xfs/xfs_inode.c
include/linux/acpi.h
include/linux/bpf.h
include/linux/bpf_verifier.h
include/linux/brcmphy.h
include/linux/device.h
include/linux/errqueue.h
include/linux/etherdevice.h
include/linux/ethtool.h
include/linux/filter.h
include/linux/gpio/consumer.h
include/linux/hyperv.h
include/linux/inetdevice.h
include/linux/ipv6.h
include/linux/kasan.h
include/linux/mlx4/device.h
include/linux/mlx5/driver.h
include/linux/mlx5/fs.h
include/linux/mlx5/mlx5_ifc.h
include/linux/netdevice.h
include/linux/of_mdio.h
include/linux/phy.h
include/linux/qed/common_hsi.h
include/linux/qed/eth_common.h
include/linux/qed/fcoe_common.h
include/linux/qed/iscsi_common.h
include/linux/qed/qed_if.h
include/linux/qed/rdma_common.h
include/linux/qed/roce_common.h
include/linux/qed/storage_common.h
include/linux/qed/tcp_common.h
include/linux/rhashtable.h
include/linux/rpmsg/qcom_smd.h
include/linux/soc/qcom/smd.h [deleted file]
include/linux/soc/qcom/wcnss_ctrl.h
include/linux/sock_diag.h
include/linux/stmmac.h
include/linux/udp.h
include/linux/usb/usbnet.h
include/linux/virtio_vsock.h
include/net/addrconf.h
include/net/af_vsock.h
include/net/bonding.h
include/net/busy_poll.h
include/net/devlink.h
include/net/dsa.h
include/net/fib_rules.h
include/net/flow.h
include/net/flowcache.h
include/net/ip_fib.h
include/net/ip_vs.h
include/net/mpls_iptunnel.h
include/net/ndisc.h
include/net/neighbour.h
include/net/netfilter/nf_conntrack_expect.h
include/net/netfilter/nf_conntrack_timeout.h
include/net/netfilter/nf_tables.h
include/net/netfilter/nft_fib.h
include/net/netns/ipv4.h
include/net/netns/mpls.h
include/net/pkt_sched.h
include/net/protocol.h
include/net/route.h
include/net/sch_generic.h
include/net/sctp/sm.h
include/net/sctp/structs.h
include/net/sctp/ulpevent.h
include/net/secure_seq.h
include/net/sock.h
include/net/tc_act/tc_pedit.h
include/net/tc_act/tc_vlan.h
include/net/tcp.h
include/net/udp.h
include/target/target_core_backend.h
include/target/target_core_base.h
include/uapi/asm-generic/socket.h
include/uapi/drm/omap_drm.h
include/uapi/linux/bpf.h
include/uapi/linux/btrfs.h
include/uapi/linux/devlink.h
include/uapi/linux/gtp.h
include/uapi/linux/if_link.h
include/uapi/linux/ipv6.h
include/uapi/linux/mpls_iptunnel.h
include/uapi/linux/netfilter/nf_tables.h
include/uapi/linux/openvswitch.h
include/uapi/linux/pkt_sched.h
include/uapi/linux/rtnetlink.h
include/uapi/linux/sctp.h
include/uapi/linux/snmp.h
include/uapi/linux/sysctl.h
kernel/bpf/Makefile
kernel/bpf/arraymap.c
kernel/bpf/hashtab.c
kernel/bpf/map_in_map.c [new file with mode: 0644]
kernel/bpf/map_in_map.h [new file with mode: 0644]
kernel/bpf/syscall.c
kernel/bpf/verifier.c
kernel/cpu.c
kernel/events/core.c
kernel/futex.c
kernel/locking/rwsem-spinlock.c
kernel/memremap.c
kernel/sched/deadline.c
kernel/sched/loadavg.c
mm/memory_hotplug.c
mm/swap_slots.c
mm/vmalloc.c
mm/z3fold.c
net/8021q/vlan_dev.c
net/Makefile
net/atm/clip.c
net/atm/common.c
net/batman-adv/bat_iv_ogm.c
net/batman-adv/bat_v.c
net/batman-adv/fragmentation.c
net/batman-adv/gateway_common.c
net/batman-adv/soft-interface.c
net/batman-adv/types.h
net/bpf/Makefile [new file with mode: 0644]
net/bpf/test_run.c [new file with mode: 0644]
net/bridge/br_fdb.c
net/bridge/br_if.c
net/bridge/br_netfilter_hooks.c
net/bridge/br_private.h
net/bridge/netfilter/ebt_log.c
net/bridge/netfilter/nft_reject_bridge.c
net/core/datagram.c
net/core/dev.c
net/core/devlink.c
net/core/drop_monitor.c
net/core/ethtool.c
net/core/fib_rules.c
net/core/filter.c
net/core/flow.c
net/core/flow_dissector.c
net/core/lwtunnel.c
net/core/neighbour.c
net/core/netclassid_cgroup.c
net/core/netprio_cgroup.c
net/core/rtnetlink.c
net/core/secure_seq.c
net/core/skbuff.c
net/core/sock.c
net/core/sock_diag.c
net/core/sock_reuseport.c
net/core/utils.c
net/decnet/af_decnet.c
net/dsa/Kconfig
net/dsa/dsa.c
net/dsa/dsa2.c
net/dsa/slave.c
net/dsa/switch.c
net/dsa/tag_brcm.c
net/dsa/tag_dsa.c
net/dsa/tag_edsa.c
net/dsa/tag_qca.c
net/dsa/tag_trailer.c
net/ipv4/Makefile
net/ipv4/af_inet.c
net/ipv4/arp.c
net/ipv4/devinet.c
net/ipv4/fib_frontend.c
net/ipv4/fib_notifier.c [new file with mode: 0644]
net/ipv4/fib_rules.c
net/ipv4/fib_semantics.c
net/ipv4/fib_trie.c
net/ipv4/icmp.c
net/ipv4/ip_fragment.c
net/ipv4/ip_input.c
net/ipv4/ipconfig.c
net/ipv4/ipmr.c
net/ipv4/netfilter/arp_tables.c
net/ipv4/netfilter/ipt_CLUSTERIP.c
net/ipv4/netfilter/nf_nat_snmp_basic.c
net/ipv4/netfilter/nf_reject_ipv4.c
net/ipv4/netfilter/nft_fib_ipv4.c
net/ipv4/proc.c
net/ipv4/protocol.c
net/ipv4/route.c
net/ipv4/sysctl_net_ipv4.c
net/ipv4/tcp.c
net/ipv4/tcp_input.c
net/ipv4/tcp_ipv4.c
net/ipv4/tcp_metrics.c
net/ipv4/tcp_minisocks.c
net/ipv4/tcp_output.c
net/ipv4/tcp_westwood.c
net/ipv6/Kconfig
net/ipv6/addrconf.c
net/ipv6/af_inet6.c
net/ipv6/ip6_input.c
net/ipv6/ip6mr.c
net/ipv6/mcast.c
net/ipv6/ndisc.c
net/ipv6/netfilter/nft_fib_ipv6.c
net/ipv6/protocol.c
net/ipv6/route.c
net/ipv6/seg6_iptunnel.c
net/ipv6/tcp_ipv6.c
net/ipv6/udp.c
net/mac802154/ieee802154_i.h
net/mpls/af_mpls.c
net/mpls/internal.h
net/mpls/mpls_iptunnel.c
net/netfilter/ipvs/ip_vs_conn.c
net/netfilter/ipvs/ip_vs_core.c
net/netfilter/ipvs/ip_vs_ctl.c
net/netfilter/ipvs/ip_vs_lblc.c
net/netfilter/ipvs/ip_vs_lblcr.c
net/netfilter/ipvs/ip_vs_nq.c
net/netfilter/ipvs/ip_vs_proto_sctp.c
net/netfilter/ipvs/ip_vs_proto_tcp.c
net/netfilter/ipvs/ip_vs_rr.c
net/netfilter/ipvs/ip_vs_sed.c
net/netfilter/ipvs/ip_vs_wlc.c
net/netfilter/ipvs/ip_vs_wrr.c
net/netfilter/nf_conntrack_core.c
net/netfilter/nf_conntrack_expect.c
net/netfilter/nf_conntrack_netlink.c
net/netfilter/nf_tables_api.c
net/netfilter/nfnetlink_acct.c
net/netfilter/nfnetlink_cttimeout.c
net/netfilter/nfnetlink_log.c
net/netfilter/nft_compat.c
net/netfilter/nft_counter.c
net/netfilter/nft_ct.c
net/netfilter/nft_dynset.c
net/netfilter/nft_exthdr.c
net/netfilter/nft_fib.c
net/netfilter/nft_hash.c
net/netfilter/nft_limit.c
net/netfilter/nft_lookup.c
net/netfilter/nft_masq.c
net/netfilter/nft_meta.c
net/netfilter/nft_nat.c
net/netfilter/nft_objref.c
net/netfilter/nft_quota.c
net/netfilter/nft_redir.c
net/netfilter/nft_reject.c
net/netfilter/nft_reject_inet.c
net/netfilter/nft_set_rbtree.c
net/netfilter/xt_limit.c
net/netlink/af_netlink.c
net/netlink/genetlink.c
net/openvswitch/actions.c
net/openvswitch/datapath.h
net/openvswitch/flow_netlink.c
net/qrtr/Kconfig
net/qrtr/smd.c
net/rds/connection.c
net/rds/ib_cm.c
net/rds/ib_fmr.c
net/rds/ib_mr.h
net/rds/threads.c
net/rxrpc/conn_event.c
net/sched/act_csum.c
net/sched/act_ife.c
net/sched/cls_flow.c
net/sched/sch_api.c
net/sched/sch_cbq.c
net/sched/sch_choke.c
net/sched/sch_drr.c
net/sched/sch_dsmark.c
net/sched/sch_fq_codel.c
net/sched/sch_generic.c
net/sched/sch_hfsc.c
net/sched/sch_htb.c
net/sched/sch_mq.c
net/sched/sch_mqprio.c
net/sched/sch_multiq.c
net/sched/sch_netem.c
net/sched/sch_prio.c
net/sched/sch_qfq.c
net/sched/sch_red.c
net/sched/sch_sfb.c
net/sched/sch_sfq.c
net/sched/sch_tbf.c
net/sctp/associola.c
net/sctp/chunk.c
net/sctp/output.c
net/sctp/outqueue.c
net/sctp/sm_statefuns.c
net/sctp/socket.c
net/sctp/stream.c
net/sctp/sysctl.c
net/sctp/ulpevent.c
net/smc/smc_ib.h
net/socket.c
net/sunrpc/xprtrdma/verbs.c
net/tipc/name_table.c
net/tipc/socket.c
net/tipc/subscr.c
net/tipc/subscr.h
net/unix/garbage.c
net/vmw_vsock/af_vsock.c
net/vmw_vsock/virtio_transport.c
net/vmw_vsock/virtio_transport_common.c
net/vmw_vsock/vmci_transport.c
net/wireless/nl80211.c
samples/bpf/Makefile
samples/bpf/bpf_helpers.h
samples/bpf/bpf_load.c
samples/bpf/cookie_uid_helper_example.c [new file with mode: 0644]
samples/bpf/libbpf.h
samples/bpf/map_perf_test_kern.c
samples/bpf/map_perf_test_user.c
samples/bpf/run_cookie_uid_helper_example.sh [new file with mode: 0644]
samples/bpf/test_map_in_map_kern.c [new file with mode: 0644]
samples/bpf/test_map_in_map_user.c [new file with mode: 0644]
security/selinux/nlmsgtab.c
sound/core/seq/seq_clientmgr.c
sound/core/seq/seq_fifo.c
sound/core/seq/seq_memory.c
sound/core/seq/seq_memory.h
sound/pci/ctxfi/cthw20k1.c
sound/pci/hda/patch_conexant.c
sound/pci/hda/patch_realtek.c
sound/x86/Kconfig
tools/hv/bondvf.sh
tools/include/uapi/linux/bpf.h
tools/lib/bpf/bpf.c
tools/lib/bpf/bpf.h
tools/lib/bpf/libbpf.c
tools/lib/bpf/libbpf.h
tools/perf/util/symbol.c
tools/testing/selftests/bpf/Makefile
tools/testing/selftests/bpf/test_iptunnel_common.h [new file with mode: 0644]
tools/testing/selftests/bpf/test_l4lb.c [new file with mode: 0644]
tools/testing/selftests/bpf/test_maps.c
tools/testing/selftests/bpf/test_pkt_access.c [new file with mode: 0644]
tools/testing/selftests/bpf/test_progs.c [new file with mode: 0644]
tools/testing/selftests/bpf/test_verifier.c
tools/testing/selftests/bpf/test_xdp.c [new file with mode: 0644]

index fa5a00bb114372bfbc67008e8d0ebe8d1566ed6b..7122d6264c49d6c02c2c0074e145f19b93fc63dd 100644 (file)
@@ -21,3 +21,30 @@ Description:
                is responsible for coordination of driver and firmware
                link framing mode, changing this setting to 'Y' if the
                firmware is configured for 'raw-ip' mode.
+
+What:          /sys/class/net/<iface>/qmi/add_mux
+Date:          March 2017
+KernelVersion: 4.11
+Contact:       Bjørn Mork <bjorn@mork.no>
+Description:
+               Unsigned integer.
+
+               Write a number ranging from 1 to 127 to add a qmap mux
+               based network device, supported by recent Qualcomm based
+               modems.
+
+               The network device will be called qmimux.
+
+               Userspace is in charge of managing the qmux network device
+               activation and data stream setup on the modem side by
+               using the proper QMI protocol requests.
+
+What:          /sys/class/net/<iface>/qmi/del_mux
+Date:          March 2017
+KernelVersion: 4.11
+Contact:       Bjørn Mork <bjorn@mork.no>
+Description:
+               Unsigned integer.
+
+               Write a number ranging from 1 to 127 to delete a previously
+               created qmap mux based network device.
index 10587bdadbbe5a8e8fe703e23c194420e3701f9f..26c77d985fafe06092467c5a2ea8a0e176d0d460 100644 (file)
@@ -2,11 +2,14 @@
 
 Required properties:
 - compatible: should contain one of "brcm,genet-v1", "brcm,genet-v2",
-  "brcm,genet-v3", "brcm,genet-v4".
+  "brcm,genet-v3", "brcm,genet-v4", "brcm,genet-v5".
 - reg: address and length of the register set for the device
-- interrupts: must be two cells, the first cell is the general purpose
-  interrupt line, while the second cell is the interrupt for the ring
-  RX and TX queues operating in ring mode
+- interrupts and/or interrupts-extended: must be two cells, the first cell
+  is the general purpose interrupt line, while the second cell is the
+  interrupt for the ring RX and TX queues operating in ring mode.  An
+  optional third interrupt cell for Wake-on-LAN can be specified.
+  See Documentation/devicetree/bindings/interrupt-controller/interrupts.txt
+  for information on the property specifics.
 - phy-mode: see ethernet.txt file in the same directory
 - #address-cells: should be 1
 - #size-cells: should be 1
@@ -29,15 +32,15 @@ Optional properties:
 
 Required child nodes:
 
-- mdio bus node: this node should always be present regarless of the PHY
+- mdio bus node: this node should always be present regardless of the PHY
   configuration of the GENET instance
 
 MDIO bus node required properties:
 
 - compatible: should contain one of "brcm,genet-mdio-v1", "brcm,genet-mdio-v2"
-  "brcm,genet-mdio-v3", "brcm,genet-mdio-v4", the version has to match the
-  parent node compatible property (e.g: brcm,genet-v4 pairs with
-  brcm,genet-mdio-v4)
+  "brcm,genet-mdio-v3", "brcm,genet-mdio-v4", "brcm,genet-mdio-v5", the version
+  has to match the parent node compatible property (e.g: brcm,genet-v4 pairs
+  with brcm,genet-mdio-v4)
 - reg: address and length relative to the parent node base register address
 - #address-cells: address cell for MDIO bus addressing, should be 1
 - #size-cells: size of the cells for MDIO bus addressing, should be 0
index ab0bb4247d14950959044cc138e71e7c736f85b9..4648948f7c3b8f26391292b06aa01743100e8796 100644 (file)
@@ -2,8 +2,9 @@
 
 Required properties:
 - compatible: should one from "brcm,genet-mdio-v1", "brcm,genet-mdio-v2",
-  "brcm,genet-mdio-v3", "brcm,genet-mdio-v4" or "brcm,unimac-mdio"
-- reg: address and length of the regsiter set for the device, first one is the
+  "brcm,genet-mdio-v3", "brcm,genet-mdio-v4", "brcm,genet-mdio-v5" or
+  "brcm,unimac-mdio"
+- reg: address and length of the register set for the device, first one is the
   base register, and the second one is optional and for indirect accesses to
   larger than 16-bits MDIO transactions
 - reg-names: name(s) of the register must be "mdio" and optional "mdio_indir_rw"
index 4754364df4c66adfc53e2546e31e0d124070dca1..6b4956beff8c42c3214906c83d94072fca8e9083 100644 (file)
@@ -1,17 +1,28 @@
-* Marvell Armada 375 Ethernet Controller (PPv2)
+* Marvell Armada 375 Ethernet Controller (PPv2.1)
+  Marvell Armada 7K/8K Ethernet Controller (PPv2.2)
 
 Required properties:
 
-- compatible: should be "marvell,armada-375-pp2"
+- compatible: should be one of:
+    "marvell,armada-375-pp2"
+    "marvell,armada-7k-pp2"
 - reg: addresses and length of the register sets for the device.
-  Must contain the following register sets:
+  For "marvell,armada-375-pp2", must contain the following register
+  sets:
        - common controller registers
        - LMS registers
-  In addition, at least one port register set is required.
-- clocks: a pointer to the reference clocks for this device, consequently:
-       - main controller clock
-       - GOP clock
-- clock-names: names of used clocks, must be "pp_clk" and "gop_clk".
+       - one register area per Ethernet port
+  For "marvell,armada-7k-pp2", must contain the following register
+  sets:
+       - packet processor registers
+       - networking interfaces registers
+
+- clocks: pointers to the reference clocks for this device, consequently:
+       - main controller clock (for both armada-375-pp2 and armada-7k-pp2)
+       - GOP clock (for both armada-375-pp2 and armada-7k-pp2)
+       - MG clock (only for armada-7k-pp2)
+- clock-names: names of used clocks, must be "pp_clk", "gop_clk" and
+  "mg_clk" (the latter only for armada-7k-pp2).
 
 The ethernet ports are represented by subnodes. At least one port is
 required.
@@ -19,8 +30,10 @@ required.
 Required properties (port):
 
 - interrupts: interrupt for the port
-- port-id: should be '0' or '1' for ethernet ports, and '2' for the
-           loopback port
+- port-id: ID of the port from the MAC point of view
+- gop-port-id: only for marvell,armada-7k-pp2, ID of the port from the
+  GOP (Group Of Ports) point of view. This ID is used to index the
+  per-port registers in the second register area.
 - phy-mode: See ethernet.txt file in the same directory
 
 Optional properties (port):
@@ -29,7 +42,7 @@ Optional properties (port):
 - phy: a phandle to a phy node defining the PHY address (as the reg
   property, a single integer).
 
-Example:
+Example for marvell,armada-375-pp2:
 
 ethernet@f0000 {
        compatible = "marvell,armada-375-pp2";
@@ -57,3 +70,30 @@ ethernet@f0000 {
                phy-mode = "gmii";
        };
 };
+
+Example for marvell,armada-7k-pp2:
+
+cpm_ethernet: ethernet@0 {
+       compatible = "marvell,armada-7k-pp22";
+       reg = <0x0 0x100000>, <0x129000 0xb000>;
+       clocks = <&cpm_syscon0 1 3>, <&cpm_syscon0 1 9>, <&cpm_syscon0 1 5>;
+       clock-names = "pp_clk", "gop_clk", "gp_clk";
+
+       eth0: eth0 {
+               interrupts = <GIC_SPI 37 IRQ_TYPE_LEVEL_HIGH>;
+               port-id = <0>;
+               gop-port-id = <0>;
+       };
+
+       eth1: eth1 {
+               interrupts = <GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>;
+               port-id = <1>;
+               gop-port-id = <2>;
+       };
+
+       eth2: eth2 {
+               interrupts = <GIC_SPI 39 IRQ_TYPE_LEVEL_HIGH>;
+               port-id = <2>;
+               gop-port-id = <3>;
+       };
+};
index d3bfc2b30fb5ecc07493b4c5510d5cdc32b3ff3a..f652b0c384ced4f1a2ed9cc6eeb1f7abb99669c7 100644 (file)
@@ -28,9 +28,9 @@ Optional properties:
   clocks may be specified in derived bindings.
 - clock-names: One name for each entry in the clocks property, the
   first one should be "stmmaceth" and the second one should be "pclk".
-- clk_ptp_ref: this is the PTP reference clock; in case of the PTP is
-  available this clock is used for programming the Timestamp Addend Register.
-  If not passed then the system clock will be used and this is fine on some
+- ptp_ref: this is the PTP reference clock; in case of the PTP is available
+  this clock is used for programming the Timestamp Addend Register. If not
+  passed then the system clock will be used and this is fine on some
   platforms.
 - tx-fifo-depth: See ethernet.txt file in the same directory
 - rx-fifo-depth: See ethernet.txt file in the same directory
@@ -72,7 +72,45 @@ Optional properties:
        - snps,mb: mixed-burst
        - snps,rb: rebuild INCRx Burst
 - mdio: with compatible = "snps,dwmac-mdio", create and register mdio bus.
-
+- Multiple RX Queues parameters: below the list of all the parameters to
+                                configure the multiple RX queues:
+       - snps,rx-queues-to-use: number of RX queues to be used in the driver
+       - Choose one of these RX scheduling algorithms:
+               - snps,rx-sched-sp: Strict priority
+               - snps,rx-sched-wsp: Weighted Strict priority
+       - For each RX queue
+               - Choose one of these modes:
+                       - snps,dcb-algorithm: Queue to be enabled as DCB
+                       - snps,avb-algorithm: Queue to be enabled as AVB
+               - snps,map-to-dma-channel: Channel to map
+               - Specifiy specific packet routing:
+                       - snps,route-avcp: AV Untagged Control packets
+                       - snps,route-ptp: PTP Packets
+                       - snps,route-dcbcp: DCB Control Packets
+                       - snps,route-up: Untagged Packets
+                       - snps,route-multi-broad: Multicast & Broadcast Packets
+               - snps,priority: RX queue priority (Range: 0x0 to 0xF)
+- Multiple TX Queues parameters: below the list of all the parameters to
+                                configure the multiple TX queues:
+       - snps,tx-queues-to-use: number of TX queues to be used in the driver
+       - Choose one of these TX scheduling algorithms:
+               - snps,tx-sched-wrr: Weighted Round Robin
+               - snps,tx-sched-wfq: Weighted Fair Queuing
+               - snps,tx-sched-dwrr: Deficit Weighted Round Robin
+               - snps,tx-sched-sp: Strict priority
+       - For each TX queue
+               - snps,weight: TX queue weight (if using a DCB weight algorithm)
+               - Choose one of these modes:
+                       - snps,dcb-algorithm: TX queue will be working in DCB
+                       - snps,avb-algorithm: TX queue will be working in AVB
+                         [Attention] Queue 0 is reserved for legacy traffic
+                         and so no AVB is available in this queue.
+               - Configure Credit Base Shaper (if AVB Mode selected):
+                       - snps,send_slope: enable Low Power Interface
+                       - snps,idle_slope: unlock on WoL
+                       - snps,high_credit: max write outstanding req. limit
+                       - snps,low_credit: max read outstanding req. limit
+               - snps,priority: TX queue priority (Range: 0x0 to 0xF)
 Examples:
 
        stmmac_axi_setup: stmmac-axi-config {
@@ -81,6 +119,35 @@ Examples:
                snps,blen = <256 128 64 32 0 0 0>;
        };
 
+       mtl_rx_setup: rx-queues-config {
+               snps,rx-queues-to-use = <1>;
+               snps,rx-sched-sp;
+               queue0 {
+                       snps,dcb-algorithm;
+                       snps,map-to-dma-channel = <0x0>;
+                       snps,priority = <0x0>;
+               };
+       };
+
+       mtl_tx_setup: tx-queues-config {
+               snps,tx-queues-to-use = <2>;
+               snps,tx-sched-wrr;
+               queue0 {
+                       snps,weight = <0x10>;
+                       snps,dcb-algorithm;
+                       snps,priority = <0x0>;
+               };
+
+               queue1 {
+                       snps,avb-algorithm;
+                       snps,send_slope = <0x1000>;
+                       snps,idle_slope = <0x1000>;
+                       snps,high_credit = <0x3E800>;
+                       snps,low_credit = <0xFFC18000>;
+                       snps,priority = <0x1>;
+               };
+       };
+
        gmac0: ethernet@e0800000 {
                compatible = "st,spear600-gmac";
                reg = <0xe0800000 0x8000>;
@@ -104,4 +171,6 @@ Examples:
                        phy1: ethernet-phy@0 {
                        };
                };
+               snps,mtl-rx-config = <&mtl_rx_setup>;
+               snps,mtl-tx-config = <&mtl_tx_setup>;
        };
index a251bf4fe9c9281baf497f17bff8630b4eb21705..57e616ed10b0b54fc8ca2ad1ea405f742db2ab1e 100644 (file)
@@ -63,6 +63,78 @@ Additional Configurations
   The latest release of ethtool can be found from
   https://www.kernel.org/pub/software/network/ethtool
 
+
+  Flow Director n-ntuple traffic filters (FDir)
+  ---------------------------------------------
+  The driver utilizes the ethtool interface for configuring ntuple filters,
+  via "ethtool -N <device> <filter>".
+
+  The sctp4, ip4, udp4, and tcp4 flow types are supported with the standard
+  fields including src-ip, dst-ip, src-port and dst-port. The driver only
+  supports fully enabling or fully masking the fields, so use of the mask
+  fields for partial matches is not supported.
+
+  Additionally, the driver supports using the action to specify filters for a
+  Virtual Function. You can specify the action as a 64bit value, where the
+  lower 32 bits represents the queue number, while the next 8 bits represent
+  which VF. Note that 0 is the PF, so the VF identifier is offset by 1. For
+  example:
+
+    ... action 0x800000002 ...
+
+  Would indicate to direct traffic for Virtual Function 7 (8 minus 1) on queue
+  2 of that VF.
+
+  The driver also supports using the user-defined field to specify 2 bytes of
+  arbitrary data to match within the packet payload in addition to the regular
+  fields. The data is specified in the lower 32bits of the user-def field in
+  the following way:
+
+  +----------------------------+---------------------------+
+  | 31    28    24    20    16 | 15    12     8     4     0|
+  +----------------------------+---------------------------+
+  | offset into packet payload |  2 bytes of flexible data |
+  +----------------------------+---------------------------+
+
+  As an example,
+
+    ... user-def 0x4FFFF ....
+
+  means to match the value 0xFFFF 4 bytes into the packet payload. Note that
+  the offset is based on the beginning of the payload, and not the beginning
+  of the packet. Thus
+
+    flow-type tcp4 ... user-def 0x8BEAF ....
+
+  would match TCP/IPv4 packets which have the value 0xBEAF 8bytes into the
+  TCP/IPv4 payload.
+
+  For ICMP, the hardware parses the ICMP header as 4 bytes of header and 4
+  bytes of payload, so if you want to match an ICMP frames payload you may need
+  to add 4 to the offset in order to match the data.
+
+  Furthermore, the offset can only be up to a value of 64, as the hardware
+  will only read up to 64 bytes of data from the payload. It must also be even
+  as the flexible data is 2 bytes long and must be aligned to byte 0 of the
+  packet payload.
+
+  When programming filters, the hardware is limited to using a single input
+  set for each flow type. This means that it is an error to program two
+  different filters with the same type that don't match on the same fields.
+  Thus the second of the following two commands will fail:
+
+    ethtool -N <device> flow-type tcp4 src-ip 192.168.0.7 action 5
+    ethtool -N <device> flow-type tcp4 dst-ip 192.168.15.18 action 1
+
+  This is because the first filter will be accepted and reprogram the input
+  set for TCPv4 filters, but the second filter will be unable to reprogram the
+  input set until all the conflicting TCPv4 filters are first removed.
+
+  Note that the user-defined flexible offset is also considered part of the
+  input set and cannot be programmed separately for multiple filters of the
+  same type. However, the flexible data is not part of the input set and
+  multiple filters may use the same offset but match against different data.
+
   Data Center Bridging (DCB)
   --------------------------
   DCB configuration is not currently supported.
index ab02304613771b6f6e120da96fb677c293d032d2..b1c6500e7a8df4d7377b291e9afc09363e66cd17 100644 (file)
@@ -73,6 +73,14 @@ fib_multipath_use_neigh - BOOLEAN
        0 - disabled
        1 - enabled
 
+fib_multipath_hash_policy - INTEGER
+       Controls which hash policy to use for multipath routes. Only valid
+       for kernels built with CONFIG_IP_ROUTE_MULTIPATH enabled.
+       Default: 0 (Layer 3)
+       Possible values:
+       0 - Layer 3
+       1 - Layer 4
+
 route/max_size - INTEGER
        Maximum number of routes allowed in the kernel.  Increase
        this when using large numbers of interfaces and/or routes.
@@ -640,11 +648,6 @@ tcp_tso_win_divisor - INTEGER
        building larger TSO frames.
        Default: 3
 
-tcp_tw_recycle - BOOLEAN
-       Enable fast recycling TIME-WAIT sockets. Default value is 0.
-       It should not be changed without advice/request of technical
-       experts.
-
 tcp_tw_reuse - BOOLEAN
        Allow to reuse TIME-WAIT sockets for new connections when it is
        safe from protocol viewpoint. Default value is 0.
@@ -853,12 +856,21 @@ ip_dynaddr - BOOLEAN
 ip_early_demux - BOOLEAN
        Optimize input packet processing down to one demux for
        certain kinds of local sockets.  Currently we only do this
-       for established TCP sockets.
+       for established TCP and connected UDP sockets.
 
        It may add an additional cost for pure routing workloads that
        reduces overall throughput, in such case you should disable it.
        Default: 1
 
+tcp_early_demux - BOOLEAN
+       Enable early demux for established TCP sockets.
+       Default: 1
+
+udp_early_demux - BOOLEAN
+       Enable early demux for connected UDP sockets. Disable this if
+       your system could experience more unconnected load.
+       Default: 1
+
 icmp_echo_ignore_all - BOOLEAN
        If set non-zero, then the kernel will ignore all ICMP ECHO
        requests sent to it.
@@ -1458,11 +1470,20 @@ accept_ra_pinfo - BOOLEAN
        Functional default: enabled if accept_ra is enabled.
                            disabled if accept_ra is disabled.
 
+accept_ra_rt_info_min_plen - INTEGER
+       Minimum prefix length of Route Information in RA.
+
+       Route Information w/ prefix smaller than this variable shall
+       be ignored.
+
+       Functional default: 0 if accept_ra_rtr_pref is enabled.
+                           -1 if accept_ra_rtr_pref is disabled.
+
 accept_ra_rt_info_max_plen - INTEGER
        Maximum prefix length of Route Information in RA.
 
-       Route Information w/ prefix larger than or equal to this
-       variable shall be ignored.
+       Route Information w/ prefix larger than this variable shall
+       be ignored.
 
        Functional default: 0 if accept_ra_rtr_pref is enabled.
                            -1 if accept_ra_rtr_pref is disabled.
index e6b1c025fdd89362a40e7aa676859a3a83646e7a..056898685d408e463f1a191f36e2384e9974ab6d 100644 (file)
@@ -175,6 +175,14 @@ nat_icmp_send - BOOLEAN
         for VS/NAT when the load balancer receives packets from real
         servers but the connection entries don't exist.
 
+pmtu_disc - BOOLEAN
+       0 - disabled
+       not 0 - enabled (default)
+
+       By default, reject with FRAG_NEEDED all DF packets that exceed
+       the PMTU, irrespective of the forwarding method. For TUN method
+       the flag can be disabled to fragment such packets.
+
 secure_tcp - INTEGER
         0  - disabled (default)
 
@@ -185,15 +193,59 @@ secure_tcp - INTEGER
         The value definition is the same as that of drop_entry and
         drop_packet.
 
-sync_threshold - INTEGER
-        default 3
+sync_threshold - vector of 2 INTEGERs: sync_threshold, sync_period
+       default 3 50
+
+       It sets synchronization threshold, which is the minimum number
+       of incoming packets that a connection needs to receive before
+       the connection will be synchronized. A connection will be
+       synchronized, every time the number of its incoming packets
+       modulus sync_period equals the threshold. The range of the
+       threshold is from 0 to sync_period.
+
+       When sync_period and sync_refresh_period are 0, send sync only
+       for state changes or only once when pkts matches sync_threshold
+
+sync_refresh_period - UNSIGNED INTEGER
+       default 0
+
+       In seconds, difference in reported connection timer that triggers
+       new sync message. It can be used to avoid sync messages for the
+       specified period (or half of the connection timeout if it is lower)
+       if connection state is not changed since last sync.
+
+       This is useful for normal connections with high traffic to reduce
+       sync rate. Additionally, retry sync_retries times with period of
+       sync_refresh_period/8.
+
+sync_retries - INTEGER
+       default 0
+
+       Defines sync retries with period of sync_refresh_period/8. Useful
+       to protect against loss of sync messages. The range of the
+       sync_retries is from 0 to 3.
+
+sync_qlen_max - UNSIGNED LONG
+
+       Hard limit for queued sync messages that are not sent yet. It
+       defaults to 1/32 of the memory pages but actually represents
+       number of messages. It will protect us from allocating large
+       parts of memory when the sending rate is lower than the queuing
+       rate.
+
+sync_sock_size - INTEGER
+       default 0
+
+       Configuration of SNDBUF (master) or RCVBUF (slave) socket limit.
+       Default value is 0 (preserve system defaults).
+
+sync_ports - INTEGER
+       default 1
 
-        It sets synchronization threshold, which is the minimum number
-        of incoming packets that a connection needs to receive before
-        the connection will be synchronized. A connection will be
-        synchronized, every time the number of its incoming packets
-        modulus 50 equals the threshold. The range of the threshold is
-        from 0 to 49.
+       The number of threads that master and backup servers can use for
+       sync traffic. Every thread will use single UDP port, thread 0 will
+       use the default port 8848 while last thread will use port
+       8848+sync_ports-1.
 
 snat_reroute - BOOLEAN
        0 - disabled
index 15d8d16934fd13727bb35e9c5078484127bbfa30..2f24a1912a48c73d360416d9889dd47c25bbbe28 100644 (file)
@@ -19,6 +19,25 @@ platform_labels - INTEGER
        Possible values: 0 - 1048575
        Default: 0
 
+ip_ttl_propagate - BOOL
+       Control whether TTL is propagated from the IPv4/IPv6 header to
+       the MPLS header on imposing labels and propagated from the
+       MPLS header to the IPv4/IPv6 header on popping the last label.
+
+       If disabled, the MPLS transport network will appear as a
+       single hop to transit traffic.
+
+       0 - disabled / RFC 3443 [Short] Pipe Model
+       1 - enabled / RFC 3443 Uniform Model (default)
+
+default_ttl - BOOL
+       Default TTL value to use for MPLS packets where it cannot be
+       propagated from an IP header, either because one isn't present
+       or ip_ttl_propagate has been disabled.
+
+       Possible values: 1 - 255
+       Default: 255
+
 conf/<interface>/input - BOOL
        Control whether packets can be input on this interface.
 
index 078c38217daabf603c82928b0ee684e1167176b4..a1ce88e914444c5c8ff0dc6b3b0dc8c7e5fda46f 100644 (file)
@@ -896,12 +896,19 @@ F:        arch/arm64/boot/dts/apm/
 APPLIED MICRO (APM) X-GENE SOC ETHERNET DRIVER
 M:     Iyappan Subramanian <isubramanian@apm.com>
 M:     Keyur Chudgar <kchudgar@apm.com>
+M:     Quan Nguyen <qnguyen@apm.com>
 S:     Supported
 F:     drivers/net/ethernet/apm/xgene/
 F:     drivers/net/phy/mdio-xgene.c
 F:     Documentation/devicetree/bindings/net/apm-xgene-enet.txt
 F:     Documentation/devicetree/bindings/net/apm-xgene-mdio.txt
 
+APPLIED MICRO (APM) X-GENE SOC ETHERNET (V2) DRIVER
+M:     Iyappan Subramanian <isubramanian@apm.com>
+M:     Keyur Chudgar <kchudgar@apm.com>
+S:     Supported
+F:     drivers/net/ethernet/apm/xgene-v2/
+
 APPLIED MICRO (APM) X-GENE SOC PMU
 M:     Tai Nguyen <ttnguyen@apm.com>
 S:     Supported
@@ -3216,7 +3223,6 @@ F:        drivers/platform/chrome/
 
 CISCO VIC ETHERNET NIC DRIVER
 M:     Christian Benvenuti <benve@cisco.com>
-M:     Sujith Sankar <ssujith@cisco.com>
 M:     Govindarajulu Varadarajan <_govind@gmx.com>
 M:     Neel Patel <neepatel@cisco.com>
 S:     Supported
@@ -11056,6 +11062,12 @@ F:     include/linux/dma/dw.h
 F:     include/linux/platform_data/dma-dw.h
 F:     drivers/dma/dw/
 
+SYNOPSYS DESIGNWARE ENTERPRISE ETHERNET DRIVER
+M:     Jie Deng <jiedeng@synopsys.com>
+L:     netdev@vger.kernel.org
+S:     Supported
+F:     drivers/net/ethernet/synopsys/
+
 SYNOPSYS DESIGNWARE I2C DRIVER
 M:     Jarkko Nikula <jarkko.nikula@linux.intel.com>
 R:     Andy Shevchenko <andriy.shevchenko@linux.intel.com>
index b841fb36beb2b5aa594907a4a5efbb8333e77548..b2faa93193729f43ea80d5fe83f8adf41b220b78 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 11
 SUBLEVEL = 0
-EXTRAVERSION = -rc2
+EXTRAVERSION = -rc3
 NAME = Fearless Coyote
 
 # *DOCUMENTATION*
index afc901b7a6f6e68c819aec1ab9199806f24fc1c1..1bb8cac61a284d7a9ac2150d079170f09044a692 100644 (file)
@@ -99,4 +99,8 @@
 
 #define SCM_TIMESTAMPING_OPT_STATS     54
 
+#define SO_MEMINFO             55
+
+#define SO_INCOMING_NAPI_ID    56
+
 #endif /* _UAPI_ASM_SOCKET_H */
index 3c2cb5d5adfa4f17bab53005b7722ffe8add022e..0bb0e9c6376c4aab7bb1ad43c2bd4fce87cef943 100644 (file)
 394    common  pkey_mprotect           sys_pkey_mprotect
 395    common  pkey_alloc              sys_pkey_alloc
 396    common  pkey_free               sys_pkey_free
+397    common  statx                   sys_statx
index 8c7c244247b6b3f6d0a52a4417a802873f53bde0..3741859765cfe050d2c4a174d613ff90e1074be0 100644 (file)
@@ -1073,6 +1073,10 @@ config SYSVIPC_COMPAT
        def_bool y
        depends on COMPAT && SYSVIPC
 
+config KEYS_COMPAT
+       def_bool y
+       depends on COMPAT && KEYS
+
 endmenu
 
 menu "Power management options"
index 05310ad8c5abec54a445cb2dfcd3df5fefcefe3a..f31c48d0cd6873f399a6d8f5f861e98fa3f66e10 100644 (file)
@@ -251,7 +251,7 @@ static inline bool system_supports_fpsimd(void)
 static inline bool system_uses_ttbr0_pan(void)
 {
        return IS_ENABLED(CONFIG_ARM64_SW_TTBR0_PAN) &&
-               !cpus_have_cap(ARM64_HAS_PAN);
+               !cpus_have_const_cap(ARM64_HAS_PAN);
 }
 
 #endif /* __ASSEMBLY__ */
index 75a0f8acef669ce5560f627f516dae54168a898d..fd691087dc9ad58ff0ff007f5ea7191a3f879380 100644 (file)
@@ -30,7 +30,7 @@ int arm_cpuidle_init(unsigned int cpu)
 }
 
 /**
- * cpu_suspend() - function to enter a low-power idle state
+ * arm_cpuidle_suspend() - function to enter a low-power idle state
  * @arg: argument to pass to CPU suspend operations
  *
  * Return: 0 on success, -EOPNOTSUPP if CPU suspend hook not initialized, CPU
index 2a07aae5b8a26431edcdfd2534a856474fc00b44..c5c45942fb6e6693c5f8c195bb6596e2fa9f6ff2 100644 (file)
@@ -372,12 +372,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr)
        return 0;
 }
 
-int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
-                                      unsigned long val, void *data)
-{
-       return NOTIFY_DONE;
-}
-
 static void __kprobes kprobe_handler(struct pt_regs *regs)
 {
        struct kprobe *p, *cur_kprobe;
index 55d1e9205543689a6883d983dc82cb8b9eb2be6a..687a358a37337af9cf7a0d50c27b0176cfbd2012 100644 (file)
@@ -162,7 +162,7 @@ void __init kasan_init(void)
        clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
 
        vmemmap_populate(kimg_shadow_start, kimg_shadow_end,
-                        pfn_to_nid(virt_to_pfn(_text)));
+                        pfn_to_nid(virt_to_pfn(lm_alias(_text))));
 
        /*
         * vmemmap_populate() has populated the shadow region that covers the
index 5a650426f35703e82db1149a4049b8575b21d85f..f824eeb0f2e4c6d8ba639765f1d5394988ffd81e 100644 (file)
@@ -92,4 +92,8 @@
 
 #define SCM_TIMESTAMPING_OPT_STATS     54
 
+#define SO_MEMINFO             55
+
+#define SO_INCOMING_NAPI_ID    56
+
 #endif /* _UAPI__ASM_AVR32_SOCKET_H */
index 81e03530ed39ee7e3b25b7442361f64aa883c179..a8ad9bebfc47e908d1b2d0b9d5ff4423ce29f631 100644 (file)
@@ -92,5 +92,9 @@
 
 #define SCM_TIMESTAMPING_OPT_STATS     54
 
+#define SO_MEMINFO             55
+
+#define SO_INCOMING_NAPI_ID    56
+
 #endif /* _ASM_SOCKET_H */
 
index 57feb0c1f7d707dd51ce20ffba0a418f5b5687ff..6af3253e420952697bd70518a7a1701bcf016a7d 100644 (file)
 
 #define SCM_TIMESTAMPING_OPT_STATS     54
 
+#define SO_MEMINFO             55
+
+#define SO_INCOMING_NAPI_ID    56
+
 #endif /* _ASM_IA64_SOCKET_H */
index 5853f8e92c20cda02450346d839b3f0b466359ee..e98b6bb897c0d06af2018297b97b9780e404e8e8 100644 (file)
@@ -92,4 +92,8 @@
 
 #define SCM_TIMESTAMPING_OPT_STATS     54
 
+#define SO_MEMINFO             55
+
+#define SO_INCOMING_NAPI_ID    56
+
 #endif /* _ASM_M32R_SOCKET_H */
index 566ecdcb5b4bcb2cd4d5888a1ce787b8fcbd0b97..ae2b62e39d4dbe7ba7911322404d84b718e27940 100644 (file)
 
 #define SCM_TIMESTAMPING_OPT_STATS     54
 
+#define SO_MEMINFO             55
+
+#define SO_INCOMING_NAPI_ID    56
+
 #endif /* _UAPI_ASM_SOCKET_H */
index 0e12527c4b0e6de154efaa91fe197eb995a1535c..e4ac1843ee0172436ca682d7669c23d763445045 100644 (file)
@@ -92,4 +92,8 @@
 
 #define SCM_TIMESTAMPING_OPT_STATS     54
 
+#define SO_MEMINFO             55
+
+#define SO_INCOMING_NAPI_ID    56
+
 #endif /* _ASM_SOCKET_H */
index 5fcb9ac72693850f50060a4822445a09d81b8a80..f0a5d8b844d6b85b16eb6c170f8af86f73ad8440 100644 (file)
@@ -77,7 +77,11 @@ static inline unsigned long __xchg(unsigned long val, volatile void *ptr,
        return val;
 }
 
-#define xchg(ptr, with) \
-       ((typeof(*(ptr)))__xchg((unsigned long)(with), (ptr), sizeof(*(ptr))))
+#define xchg(ptr, with)                                                \
+       ({                                                              \
+               (__typeof__(*(ptr))) __xchg((unsigned long)(with),      \
+                                           (ptr),                      \
+                                           sizeof(*(ptr)));            \
+       })
 
 #endif /* __ASM_OPENRISC_CMPXCHG_H */
index 140faa16685a2325f3a1b6cbf9cbb9c8e68fc913..1311e6b139916692bb5f81fbfd188a48b844d977 100644 (file)
@@ -211,7 +211,7 @@ do {                                                                        \
        case 1: __get_user_asm(x, ptr, retval, "l.lbz"); break;         \
        case 2: __get_user_asm(x, ptr, retval, "l.lhz"); break;         \
        case 4: __get_user_asm(x, ptr, retval, "l.lwz"); break;         \
-       case 8: __get_user_asm2(x, ptr, retval);                        \
+       case 8: __get_user_asm2(x, ptr, retval); break;                 \
        default: (x) = __get_user_bad();                                \
        }                                                               \
 } while (0)
index 5c4695d13542fc003054995b728ac468e18bd94c..ee3e604959e15c514bc91eb65118d8d04ea20b59 100644 (file)
@@ -30,6 +30,7 @@
 #include <asm/hardirq.h>
 #include <asm/delay.h>
 #include <asm/pgalloc.h>
+#include <asm/pgtable.h>
 
 #define DECLARE_EXPORT(name) extern void name(void); EXPORT_SYMBOL(name)
 
@@ -42,6 +43,9 @@ DECLARE_EXPORT(__muldi3);
 DECLARE_EXPORT(__ashrdi3);
 DECLARE_EXPORT(__ashldi3);
 DECLARE_EXPORT(__lshrdi3);
+DECLARE_EXPORT(__ucmpdi2);
 
+EXPORT_SYMBOL(empty_zero_page);
 EXPORT_SYMBOL(__copy_tofrom_user);
+EXPORT_SYMBOL(__clear_user);
 EXPORT_SYMBOL(memset);
index 828a29110459e8cb9f1e85b1b5033f30ef0348dd..f8da545854f979c33a7b3116d26d822caa46c494 100644 (file)
@@ -90,6 +90,7 @@ void arch_cpu_idle(void)
 }
 
 void (*pm_power_off) (void) = machine_power_off;
+EXPORT_SYMBOL(pm_power_off);
 
 /*
  * When a process does an "exec", machine state like FPU and debug
index 19c9c3c5f267eac813edf6c5fc6f358301d2a639..c7e15cc5c6683b423d028b1557fc0dc9b7dd5a16 100644 (file)
@@ -43,28 +43,9 @@ static inline void flush_kernel_dcache_page(struct page *page)
 
 #define flush_kernel_dcache_range(start,size) \
        flush_kernel_dcache_range_asm((start), (start)+(size));
-/* vmap range flushes and invalidates.  Architecturally, we don't need
- * the invalidate, because the CPU should refuse to speculate once an
- * area has been flushed, so invalidate is left empty */
-static inline void flush_kernel_vmap_range(void *vaddr, int size)
-{
-       unsigned long start = (unsigned long)vaddr;
-
-       flush_kernel_dcache_range_asm(start, start + size);
-}
-static inline void invalidate_kernel_vmap_range(void *vaddr, int size)
-{
-       unsigned long start = (unsigned long)vaddr;
-       void *cursor = vaddr;
 
-       for ( ; cursor < vaddr + size; cursor += PAGE_SIZE) {
-               struct page *page = vmalloc_to_page(cursor);
-
-               if (test_and_clear_bit(PG_dcache_dirty, &page->flags))
-                       flush_kernel_dcache_page(page);
-       }
-       flush_kernel_dcache_range_asm(start, start + size);
-}
+void flush_kernel_vmap_range(void *vaddr, int size);
+void invalidate_kernel_vmap_range(void *vaddr, int size);
 
 #define flush_cache_vmap(start, end)           flush_cache_all()
 #define flush_cache_vunmap(start, end)         flush_cache_all()
index fb4382c28259b3ff2f873014fce7e42f1373dac8..edfbf9d6a6dd76adae077d49d76a236d299bceda 100644 (file)
@@ -32,7 +32,8 @@
  * that put_user is the same as __put_user, etc.
  */
 
-#define access_ok(type, uaddr, size) (1)
+#define access_ok(type, uaddr, size)   \
+       ( (uaddr) == (uaddr) )
 
 #define put_user __put_user
 #define get_user __get_user
index 7a109b73ddf7e814f9c29ffa1ec9b99977ead5d1..f754c793e82a417ef5d50142e6a642059a108771 100644 (file)
@@ -91,4 +91,8 @@
 
 #define SCM_TIMESTAMPING_OPT_STATS     0x402F
 
+#define SO_MEMINFO             0x4030
+
+#define SO_INCOMING_NAPI_ID    0x4031
+
 #endif /* _UAPI_ASM_SOCKET_H */
index 6b0741e7a7ed3ee4060d619a8999b50dab12dac3..667c99421003e4dd07c6d204bef7db08fa905933 100644 (file)
 #define __NR_copy_file_range   (__NR_Linux + 346)
 #define __NR_preadv2           (__NR_Linux + 347)
 #define __NR_pwritev2          (__NR_Linux + 348)
+#define __NR_statx             (__NR_Linux + 349)
 
-#define __NR_Linux_syscalls    (__NR_pwritev2 + 1)
+#define __NR_Linux_syscalls    (__NR_statx + 1)
 
 
 #define __IGNORE_select                /* newselect */
index 0dc72d5de861539e5c16ff2ecd49f205e37775e6..c32a0909521665b5f08c22ef37fa8d8f9c654012 100644 (file)
@@ -616,3 +616,25 @@ flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long
                __flush_cache_page(vma, vmaddr, PFN_PHYS(pfn));
        }
 }
+
+void flush_kernel_vmap_range(void *vaddr, int size)
+{
+       unsigned long start = (unsigned long)vaddr;
+
+       if ((unsigned long)size > parisc_cache_flush_threshold)
+               flush_data_cache();
+       else
+               flush_kernel_dcache_range_asm(start, start + size);
+}
+EXPORT_SYMBOL(flush_kernel_vmap_range);
+
+void invalidate_kernel_vmap_range(void *vaddr, int size)
+{
+       unsigned long start = (unsigned long)vaddr;
+
+       if ((unsigned long)size > parisc_cache_flush_threshold)
+               flush_data_cache();
+       else
+               flush_kernel_dcache_range_asm(start, start + size);
+}
+EXPORT_SYMBOL(invalidate_kernel_vmap_range);
index a0ecdb4abcc878b3805d7a2d0f845272b1fc372d..c66c943d93224f342cb71c97bd6a690bf8fb225b 100644 (file)
@@ -620,6 +620,10 @@ int apply_relocate_add(Elf_Shdr *sechdrs,
                         */
                        *loc = fsel(val, addend); 
                        break;
+               case R_PARISC_SECREL32:
+                       /* 32-bit section relative address. */
+                       *loc = fsel(val, addend);
+                       break;
                case R_PARISC_DPREL21L:
                        /* left 21 bit of relative address */
                        val = lrsel(val - dp, addend);
@@ -807,6 +811,10 @@ int apply_relocate_add(Elf_Shdr *sechdrs,
                         */
                        *loc = fsel(val, addend); 
                        break;
+               case R_PARISC_SECREL32:
+                       /* 32-bit section relative address. */
+                       *loc = fsel(val, addend);
+                       break;
                case R_PARISC_FPTR64:
                        /* 64-bit function address */
                        if(in_local(me, (void *)(val + addend))) {
index e282a5131d77e10f62d274d4be426b076f655017..6017a5af2e6e2c8feb45de54adfb36865b65d3ee 100644 (file)
@@ -39,7 +39,7 @@
  *  the PDC INTRIGUE calls.  This is done to eliminate bugs introduced
  *  in various PDC revisions.  The code is much more maintainable
  *  and reliable this way vs having to debug on every version of PDC
- *  on every box. 
+ *  on every box.
  */
 
 #include <linux/capability.h>
@@ -195,8 +195,8 @@ static int perf_config(uint32_t *image_ptr);
 static int perf_release(struct inode *inode, struct file *file);
 static int perf_open(struct inode *inode, struct file *file);
 static ssize_t perf_read(struct file *file, char __user *buf, size_t cnt, loff_t *ppos);
-static ssize_t perf_write(struct file *file, const char __user *buf, size_t count, 
-       loff_t *ppos);
+static ssize_t perf_write(struct file *file, const char __user *buf,
+       size_t count, loff_t *ppos);
 static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
 static void perf_start_counters(void);
 static int perf_stop_counters(uint32_t *raddr);
@@ -222,7 +222,7 @@ extern void perf_intrigue_disable_perf_counters (void);
 /*
  * configure:
  *
- * Configure the cpu with a given data image.  First turn off the counters, 
+ * Configure the cpu with a given data image.  First turn off the counters,
  * then download the image, then turn the counters back on.
  */
 static int perf_config(uint32_t *image_ptr)
@@ -234,7 +234,7 @@ static int perf_config(uint32_t *image_ptr)
        error = perf_stop_counters(raddr);
        if (error != 0) {
                printk("perf_config: perf_stop_counters = %ld\n", error);
-               return -EINVAL; 
+               return -EINVAL;
        }
 
 printk("Preparing to write image\n");
@@ -242,7 +242,7 @@ printk("Preparing to write image\n");
        error = perf_write_image((uint64_t *)image_ptr);
        if (error != 0) {
                printk("perf_config: DOWNLOAD = %ld\n", error);
-               return -EINVAL; 
+               return -EINVAL;
        }
 
 printk("Preparing to start counters\n");
@@ -254,7 +254,7 @@ printk("Preparing to start counters\n");
 }
 
 /*
- * Open the device and initialize all of its memory.  The device is only 
+ * Open the device and initialize all of its memory.  The device is only
  * opened once, but can be "queried" by multiple processes that know its
  * file descriptor.
  */
@@ -298,19 +298,19 @@ static ssize_t perf_read(struct file *file, char __user *buf, size_t cnt, loff_t
  * called on the processor that the download should happen
  * on.
  */
-static ssize_t perf_write(struct file *file, const char __user *buf, size_t count, 
-       loff_t *ppos)
+static ssize_t perf_write(struct file *file, const char __user *buf,
+       size_t count, loff_t *ppos)
 {
        size_t image_size;
        uint32_t image_type;
        uint32_t interface_type;
        uint32_t test;
 
-       if (perf_processor_interface == ONYX_INTF) 
+       if (perf_processor_interface == ONYX_INTF)
                image_size = PCXU_IMAGE_SIZE;
-       else if (perf_processor_interface == CUDA_INTF) 
+       else if (perf_processor_interface == CUDA_INTF)
                image_size = PCXW_IMAGE_SIZE;
-       else 
+       else
                return -EFAULT;
 
        if (!capable(CAP_SYS_ADMIN))
@@ -330,22 +330,22 @@ static ssize_t perf_write(struct file *file, const char __user *buf, size_t coun
 
        /* First check the machine type is correct for
           the requested image */
-        if (((perf_processor_interface == CUDA_INTF) &&
-                      (interface_type != CUDA_INTF)) ||
-           ((perf_processor_interface == ONYX_INTF) &&
-                      (interface_type != ONYX_INTF))) 
+       if (((perf_processor_interface == CUDA_INTF) &&
+                       (interface_type != CUDA_INTF)) ||
+               ((perf_processor_interface == ONYX_INTF) &&
+                       (interface_type != ONYX_INTF)))
                return -EINVAL;
 
        /* Next check to make sure the requested image
           is valid */
-       if (((interface_type == CUDA_INTF) && 
+       if (((interface_type == CUDA_INTF) &&
                       (test >= MAX_CUDA_IMAGES)) ||
-           ((interface_type == ONYX_INTF) && 
-                      (test >= MAX_ONYX_IMAGES))) 
+           ((interface_type == ONYX_INTF) &&
+                      (test >= MAX_ONYX_IMAGES)))
                return -EINVAL;
 
        /* Copy the image into the processor */
-       if (interface_type == CUDA_INTF) 
+       if (interface_type == CUDA_INTF)
                return perf_config(cuda_images[test]);
        else
                return perf_config(onyx_images[test]);
@@ -359,7 +359,7 @@ static ssize_t perf_write(struct file *file, const char __user *buf, size_t coun
 static void perf_patch_images(void)
 {
 #if 0 /* FIXME!! */
-/* 
+/*
  * NOTE:  this routine is VERY specific to the current TLB image.
  * If the image is changed, this routine might also need to be changed.
  */
@@ -367,9 +367,9 @@ static void perf_patch_images(void)
        extern void $i_dtlb_miss_2_0();
        extern void PA2_0_iva();
 
-       /* 
+       /*
         * We can only use the lower 32-bits, the upper 32-bits should be 0
-        * anyway given this is in the kernel 
+        * anyway given this is in the kernel
         */
        uint32_t itlb_addr  = (uint32_t)&($i_itlb_miss_2_0);
        uint32_t dtlb_addr  = (uint32_t)&($i_dtlb_miss_2_0);
@@ -377,21 +377,21 @@ static void perf_patch_images(void)
 
        if (perf_processor_interface == ONYX_INTF) {
                /* clear last 2 bytes */
-               onyx_images[TLBMISS][15] &= 0xffffff00;  
+               onyx_images[TLBMISS][15] &= 0xffffff00;
                /* set 2 bytes */
                onyx_images[TLBMISS][15] |= (0x000000ff&((dtlb_addr) >> 24));
                onyx_images[TLBMISS][16] = (dtlb_addr << 8)&0xffffff00;
                onyx_images[TLBMISS][17] = itlb_addr;
 
                /* clear last 2 bytes */
-               onyx_images[TLBHANDMISS][15] &= 0xffffff00;  
+               onyx_images[TLBHANDMISS][15] &= 0xffffff00;
                /* set 2 bytes */
                onyx_images[TLBHANDMISS][15] |= (0x000000ff&((dtlb_addr) >> 24));
                onyx_images[TLBHANDMISS][16] = (dtlb_addr << 8)&0xffffff00;
                onyx_images[TLBHANDMISS][17] = itlb_addr;
 
                /* clear last 2 bytes */
-               onyx_images[BIG_CPI][15] &= 0xffffff00;  
+               onyx_images[BIG_CPI][15] &= 0xffffff00;
                /* set 2 bytes */
                onyx_images[BIG_CPI][15] |= (0x000000ff&((dtlb_addr) >> 24));
                onyx_images[BIG_CPI][16] = (dtlb_addr << 8)&0xffffff00;
@@ -404,24 +404,24 @@ static void perf_patch_images(void)
 
        } else if (perf_processor_interface == CUDA_INTF) {
                /* Cuda interface */
-               cuda_images[TLBMISS][16] =  
+               cuda_images[TLBMISS][16] =
                        (cuda_images[TLBMISS][16]&0xffff0000) |
                        ((dtlb_addr >> 8)&0x0000ffff);
-               cuda_images[TLBMISS][17] = 
+               cuda_images[TLBMISS][17] =
                        ((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff);
                cuda_images[TLBMISS][18] = (itlb_addr << 16)&0xffff0000;
 
-               cuda_images[TLBHANDMISS][16] = 
+               cuda_images[TLBHANDMISS][16] =
                        (cuda_images[TLBHANDMISS][16]&0xffff0000) |
                        ((dtlb_addr >> 8)&0x0000ffff);
-               cuda_images[TLBHANDMISS][17] = 
+               cuda_images[TLBHANDMISS][17] =
                        ((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff);
                cuda_images[TLBHANDMISS][18] = (itlb_addr << 16)&0xffff0000;
 
-               cuda_images[BIG_CPI][16] = 
+               cuda_images[BIG_CPI][16] =
                        (cuda_images[BIG_CPI][16]&0xffff0000) |
                        ((dtlb_addr >> 8)&0x0000ffff);
-               cuda_images[BIG_CPI][17] = 
+               cuda_images[BIG_CPI][17] =
                        ((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff);
                cuda_images[BIG_CPI][18] = (itlb_addr << 16)&0xffff0000;
        } else {
@@ -433,7 +433,7 @@ static void perf_patch_images(void)
 
 /*
  * ioctl routine
- * All routines effect the processor that they are executed on.  Thus you 
+ * All routines effect the processor that they are executed on.  Thus you
  * must be running on the processor that you wish to change.
  */
 
@@ -459,7 +459,7 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
                        }
 
                        /* copy out the Counters */
-                       if (copy_to_user((void __user *)arg, raddr, 
+                       if (copy_to_user((void __user *)arg, raddr,
                                        sizeof (raddr)) != 0) {
                                error =  -EFAULT;
                                break;
@@ -487,7 +487,7 @@ static const struct file_operations perf_fops = {
        .open = perf_open,
        .release = perf_release
 };
-       
+
 static struct miscdevice perf_dev = {
        MISC_DYNAMIC_MINOR,
        PA_PERF_DEV,
@@ -595,7 +595,7 @@ static int perf_stop_counters(uint32_t *raddr)
                /* OR sticky2 (bit 1496) to counter2 bit 32 */
                tmp64 |= (userbuf[23] >> 8) & 0x0000000080000000;
                raddr[2] = (uint32_t)tmp64;
-               
+
                /* Counter3 is bits 1497 to 1528 */
                tmp64 =  (userbuf[23] >> 7) & 0x00000000ffffffff;
                /* OR sticky3 (bit 1529) to counter3 bit 32 */
@@ -617,7 +617,7 @@ static int perf_stop_counters(uint32_t *raddr)
                userbuf[22] = 0;
                userbuf[23] = 0;
 
-               /* 
+               /*
                 * Write back the zeroed bytes + the image given
                 * the read was destructive.
                 */
@@ -625,13 +625,13 @@ static int perf_stop_counters(uint32_t *raddr)
        } else {
 
                /*
-                * Read RDR-15 which contains the counters and sticky bits 
+                * Read RDR-15 which contains the counters and sticky bits
                 */
                if (!perf_rdr_read_ubuf(15, userbuf)) {
                        return -13;
                }
 
-               /* 
+               /*
                 * Clear out the counters
                 */
                perf_rdr_clear(15);
@@ -644,7 +644,7 @@ static int perf_stop_counters(uint32_t *raddr)
                raddr[2] = (uint32_t)((userbuf[1] >> 32) & 0x00000000ffffffffUL);
                raddr[3] = (uint32_t)(userbuf[1] & 0x00000000ffffffffUL);
        }
+
        return 0;
 }
 
@@ -682,7 +682,7 @@ static int perf_rdr_read_ubuf(uint32_t      rdr_num, uint64_t *buffer)
        i = tentry->num_words;
        while (i--) {
                buffer[i] = 0;
-       }       
+       }
 
        /* Check for bits an even number of 64 */
        if ((xbits = width & 0x03f) != 0) {
@@ -808,18 +808,22 @@ static int perf_write_image(uint64_t *memaddr)
        }
 
        runway = ioremap_nocache(cpu_device->hpa.start, 4096);
+       if (!runway) {
+               pr_err("perf_write_image: ioremap failed!\n");
+               return -ENOMEM;
+       }
 
        /* Merge intrigue bits into Runway STATUS 0 */
        tmp64 = __raw_readq(runway + RUNWAY_STATUS) & 0xffecfffffffffffful;
-       __raw_writeq(tmp64 | (*memaddr++ & 0x0013000000000000ul), 
+       __raw_writeq(tmp64 | (*memaddr++ & 0x0013000000000000ul),
                     runway + RUNWAY_STATUS);
-       
+
        /* Write RUNWAY DEBUG registers */
        for (i = 0; i < 8; i++) {
                __raw_writeq(*memaddr++, runway + RUNWAY_DEBUG);
        }
 
-       return 0; 
+       return 0;
 }
 
 /*
@@ -843,7 +847,7 @@ printk("perf_rdr_write\n");
                        perf_rdr_shift_out_U(rdr_num, buffer[i]);
                } else {
                        perf_rdr_shift_out_W(rdr_num, buffer[i]);
-               }       
+               }
        }
 printk("perf_rdr_write done\n");
 }
index 06f7ca7fe70b616b4d68353ae10dd5d409bbbcab..b76f503eee4a83c14d7f8156f339952e521a26ed 100644 (file)
@@ -142,6 +142,8 @@ void machine_power_off(void)
 
        printk(KERN_EMERG "System shut down completed.\n"
               "Please power this system off now.");
+
+       for (;;);
 }
 
 void (*pm_power_off)(void) = machine_power_off;
index 3cfef1de8061af183820e98ca97467d674a8c463..44aeaa9c039fc421421a5b1b7524495e0d225eba 100644 (file)
        ENTRY_SAME(copy_file_range)
        ENTRY_COMP(preadv2)
        ENTRY_COMP(pwritev2)
+       ENTRY_SAME(statx)
 
 
 .ifne (. - 90b) - (__NR_Linux_syscalls * (91b - 90b))
index 4b369d83fe9ce1ea72b3f2a93590fb132d534512..1c9470881c4abe249fd943294c99e94f1916893b 100644 (file)
@@ -387,3 +387,4 @@ SYSCALL(copy_file_range)
 COMPAT_SYS_SPU(preadv2)
 COMPAT_SYS_SPU(pwritev2)
 SYSCALL(kexec_file_load)
+SYSCALL(statx)
index eb1acee91a2034c30d4277fe040cd797279f13b4..9ba11dbcaca98f88c53ee46c3bd009b22f13df01 100644 (file)
@@ -12,7 +12,7 @@
 #include <uapi/asm/unistd.h>
 
 
-#define NR_syscalls            383
+#define NR_syscalls            384
 
 #define __NR__exit __NR_exit
 
index 44583a52f882540986928cc48a63971251226a0f..5f84af7dcb2e59482419dc8d72b24c61c80d4818 100644 (file)
@@ -99,4 +99,8 @@
 
 #define SCM_TIMESTAMPING_OPT_STATS     54
 
+#define SO_MEMINFO             55
+
+#define SO_INCOMING_NAPI_ID    56
+
 #endif /* _ASM_POWERPC_SOCKET_H */
index 2f26335a3c42a8141d29156f07105ca82761a98c..b85f1422885746d918131216fb45fd76bb99338a 100644 (file)
 #define __NR_preadv2           380
 #define __NR_pwritev2          381
 #define __NR_kexec_file_load   382
+#define __NR_statx             383
 
 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */
index 251060cf171364f1dd4dd08452189133dd713265..8b1fe895daa3f076bf57b6b9fe3283a6ba686fad 100644 (file)
@@ -751,7 +751,9 @@ void __init hpte_init_pseries(void)
        mmu_hash_ops.flush_hash_range    = pSeries_lpar_flush_hash_range;
        mmu_hash_ops.hpte_clear_all      = pseries_hpte_clear_all;
        mmu_hash_ops.hugepage_invalidate = pSeries_lpar_hugepage_invalidate;
-       mmu_hash_ops.resize_hpt          = pseries_lpar_resize_hpt;
+
+       if (firmware_has_feature(FW_FEATURE_HPT_RESIZE))
+               mmu_hash_ops.resize_hpt = pseries_lpar_resize_hpt;
 }
 
 void radix_init_pseries(void)
index b24a64cbfeb10a91274a59117f2e76ea3c583e00..25ac4960e70758afedeb92bbcb546f6121fccf4b 100644 (file)
@@ -98,4 +98,8 @@
 
 #define SCM_TIMESTAMPING_OPT_STATS     54
 
+#define        SO_MEMINFO              55
+
+#define SO_INCOMING_NAPI_ID    56
+
 #endif /* _ASM_SOCKET_H */
index a25dc32f5d6a163c1b0e7b7ae775f43898ba4d58..b05513acd589ba8d763f5da7bdd7bc16a59ac95c 100644 (file)
 
 #define SCM_TIMESTAMPING_OPT_STATS     0x0038
 
+#define SO_MEMINFO             0x0039
+
+#define SO_INCOMING_NAPI_ID    0x003a
+
 /* Security levels - as per NRL IPv6 - don't actually do anything */
 #define SO_SECURITY_AUTHENTICATION             0x5001
 #define SO_SECURITY_ENCRYPTION_TRANSPORT       0x5002
index 349d4d17aa7fbd3a6268be3bd6e7bea909e76ccf..2aa1ad194db21a541f65c30b65fe20f2806fdff3 100644 (file)
@@ -2101,8 +2101,8 @@ static int x86_pmu_event_init(struct perf_event *event)
 
 static void refresh_pce(void *ignored)
 {
-       if (current->mm)
-               load_mm_cr4(current->mm);
+       if (current->active_mm)
+               load_mm_cr4(current->active_mm);
 }
 
 static void x86_pmu_event_mapped(struct perf_event *event)
@@ -2110,6 +2110,18 @@ static void x86_pmu_event_mapped(struct perf_event *event)
        if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
                return;
 
+       /*
+        * This function relies on not being called concurrently in two
+        * tasks in the same mm.  Otherwise one task could observe
+        * perf_rdpmc_allowed > 1 and return all the way back to
+        * userspace with CR4.PCE clear while another task is still
+        * doing on_each_cpu_mask() to propagate CR4.PCE.
+        *
+        * For now, this can't happen because all callers hold mmap_sem
+        * for write.  If this changes, we'll need a different solution.
+        */
+       lockdep_assert_held_exclusive(&current->mm->mmap_sem);
+
        if (atomic_inc_return(&current->mm->context.perf_rdpmc_allowed) == 1)
                on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
 }
index 72277b1028a5f54551962555fa56bfd5aebab15c..50d35e3185f553b92ce1eeba2700f13e33e49258 100644 (file)
@@ -121,12 +121,9 @@ static inline void native_pmd_clear(pmd_t *pmd)
        *(tmp + 1) = 0;
 }
 
-#if !defined(CONFIG_SMP) || (defined(CONFIG_HIGHMEM64G) && \
-               defined(CONFIG_PARAVIRT))
 static inline void native_pud_clear(pud_t *pudp)
 {
 }
-#endif
 
 static inline void pud_clear(pud_t *pudp)
 {
index 1cfb36b8c024ab07b8334121fc56ac79f2a35371..585ee0d42d18fc162601ff0d8a53827f0d011f5e 100644 (file)
@@ -62,7 +62,7 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page);
 # define set_pud(pudp, pud)            native_set_pud(pudp, pud)
 #endif
 
-#ifndef __PAGETABLE_PMD_FOLDED
+#ifndef __PAGETABLE_PUD_FOLDED
 #define pud_clear(pud)                 native_pud_clear(pud)
 #endif
 
index ae32838cac5fd2251e1ffa0bbb8b8c629e399a84..b2879cc23db470ec8cc2cbeacdea4ff2b94ec1e3 100644 (file)
@@ -179,10 +179,15 @@ static int acpi_register_lapic(int id, u32 acpiid, u8 enabled)
                return -EINVAL;
        }
 
+       if (!enabled) {
+               ++disabled_cpus;
+               return -EINVAL;
+       }
+
        if (boot_cpu_physical_apicid != -1U)
                ver = boot_cpu_apic_version;
 
-       cpu = __generic_processor_info(id, ver, enabled);
+       cpu = generic_processor_info(id, ver);
        if (cpu >= 0)
                early_per_cpu(x86_cpu_to_acpiid, cpu) = acpiid;
 
@@ -710,7 +715,7 @@ static void __init acpi_set_irq_model_ioapic(void)
 #ifdef CONFIG_ACPI_HOTPLUG_CPU
 #include <acpi/processor.h>
 
-int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
+static int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
 {
 #ifdef CONFIG_ACPI_NUMA
        int nid;
index aee7deddabd089b31bef1739c51a7972e04cb10d..8ccb7ef512e05dd9edaa6a3d7a852f70639a54d2 100644 (file)
@@ -2063,7 +2063,7 @@ static int allocate_logical_cpuid(int apicid)
        return nr_logical_cpuids++;
 }
 
-int __generic_processor_info(int apicid, int version, bool enabled)
+int generic_processor_info(int apicid, int version)
 {
        int cpu, max = nr_cpu_ids;
        bool boot_cpu_detected = physid_isset(boot_cpu_physical_apicid,
@@ -2121,11 +2121,9 @@ int __generic_processor_info(int apicid, int version, bool enabled)
        if (num_processors >= nr_cpu_ids) {
                int thiscpu = max + disabled_cpus;
 
-               if (enabled) {
-                       pr_warning("APIC: NR_CPUS/possible_cpus limit of %i "
-                                  "reached. Processor %d/0x%x ignored.\n",
-                                  max, thiscpu, apicid);
-               }
+               pr_warning("APIC: NR_CPUS/possible_cpus limit of %i "
+                          "reached. Processor %d/0x%x ignored.\n",
+                          max, thiscpu, apicid);
 
                disabled_cpus++;
                return -EINVAL;
@@ -2177,23 +2175,13 @@ int __generic_processor_info(int apicid, int version, bool enabled)
                apic->x86_32_early_logical_apicid(cpu);
 #endif
        set_cpu_possible(cpu, true);
-
-       if (enabled) {
-               num_processors++;
-               physid_set(apicid, phys_cpu_present_map);
-               set_cpu_present(cpu, true);
-       } else {
-               disabled_cpus++;
-       }
+       physid_set(apicid, phys_cpu_present_map);
+       set_cpu_present(cpu, true);
+       num_processors++;
 
        return cpu;
 }
 
-int generic_processor_info(int apicid, int version)
-{
-       return __generic_processor_info(apicid, version, true);
-}
-
 int hard_smp_processor_id(void)
 {
        return read_apic_id();
index c05509d38b1f1e5ed0f63940dc2c8496b360b032..9ac2a5cdd9c206e83f171847ac04d5bf4f2a3152 100644 (file)
@@ -727,7 +727,7 @@ void rdtgroup_kn_unlock(struct kernfs_node *kn)
        if (atomic_dec_and_test(&rdtgrp->waitcount) &&
            (rdtgrp->flags & RDT_DELETED)) {
                kernfs_unbreak_active_protection(kn);
-               kernfs_put(kn);
+               kernfs_put(rdtgrp->kn);
                kfree(rdtgrp);
        } else {
                kernfs_unbreak_active_protection(kn);
index 54a2372f5dbb1eb0598788e944ad28708b638671..b5785c197e534796d5e477b6cd86a502d229db7c 100644 (file)
@@ -4,6 +4,7 @@
  *  Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
  */
 
+#define DISABLE_BRANCH_PROFILING
 #include <linux/init.h>
 #include <linux/linkage.h>
 #include <linux/types.h>
index f088ea4c66e72e5787e6c2052b09bc95291cf131..a723ae9440ab2585303457dac977e53961f3cffd 100644 (file)
@@ -166,11 +166,9 @@ int __register_nmi_handler(unsigned int type, struct nmiaction *action)
        spin_lock_irqsave(&desc->lock, flags);
 
        /*
-        * most handlers of type NMI_UNKNOWN never return because
-        * they just assume the NMI is theirs.  Just a sanity check
-        * to manage expectations
+        * Indicate if there are multiple registrations on the
+        * internal NMI handler call chains (SERR and IO_CHECK).
         */
-       WARN_ON_ONCE(type == NMI_UNKNOWN && !list_empty(&desc->head));
        WARN_ON_ONCE(type == NMI_SERR && !list_empty(&desc->head));
        WARN_ON_ONCE(type == NMI_IO_CHECK && !list_empty(&desc->head));
 
index 4f7a9833d8e51f2e023c3a5c0f6b54813c70c4a0..c73a7f9e881aa25852cd4a1aa58950ee9bd79149 100644 (file)
@@ -1333,6 +1333,8 @@ static int __init init_tsc_clocksource(void)
         * the refined calibration and directly register it as a clocksource.
         */
        if (boot_cpu_has(X86_FEATURE_TSC_KNOWN_FREQ)) {
+               if (boot_cpu_has(X86_FEATURE_ART))
+                       art_related_clocksource = &clocksource_tsc;
                clocksource_register_khz(&clocksource_tsc, tsc_khz);
                return 0;
        }
index 478d15dbaee41b251c8bb28b59183e2b6c733326..08339262b666e56f2623406a10c42f3184c83e29 100644 (file)
@@ -82,19 +82,43 @@ static size_t regs_size(struct pt_regs *regs)
        return sizeof(*regs);
 }
 
+#ifdef CONFIG_X86_32
+#define GCC_REALIGN_WORDS 3
+#else
+#define GCC_REALIGN_WORDS 1
+#endif
+
 static bool is_last_task_frame(struct unwind_state *state)
 {
-       unsigned long bp = (unsigned long)state->bp;
-       unsigned long regs = (unsigned long)task_pt_regs(state->task);
+       unsigned long *last_bp = (unsigned long *)task_pt_regs(state->task) - 2;
+       unsigned long *aligned_bp = last_bp - GCC_REALIGN_WORDS;
 
        /*
         * We have to check for the last task frame at two different locations
         * because gcc can occasionally decide to realign the stack pointer and
-        * change the offset of the stack frame by a word in the prologue of a
-        * function called by head/entry code.
+        * change the offset of the stack frame in the prologue of a function
+        * called by head/entry code.  Examples:
+        *
+        * <start_secondary>:
+        *      push   %edi
+        *      lea    0x8(%esp),%edi
+        *      and    $0xfffffff8,%esp
+        *      pushl  -0x4(%edi)
+        *      push   %ebp
+        *      mov    %esp,%ebp
+        *
+        * <x86_64_start_kernel>:
+        *      lea    0x8(%rsp),%r10
+        *      and    $0xfffffffffffffff0,%rsp
+        *      pushq  -0x8(%r10)
+        *      push   %rbp
+        *      mov    %rsp,%rbp
+        *
+        * Note that after aligning the stack, it pushes a duplicate copy of
+        * the return address before pushing the frame pointer.
         */
-       return bp == regs - FRAME_HEADER_SIZE ||
-              bp == regs - FRAME_HEADER_SIZE - sizeof(long);
+       return (state->bp == last_bp ||
+               (state->bp == aligned_bp && *(aligned_bp+1) == *(last_bp+1)));
 }
 
 /*
index 8d63d7a104c3c445805dcf24a59fff2756a17b01..4c90cfdc128b832c6065cdb8830f89d16bff63dd 100644 (file)
@@ -1,3 +1,4 @@
+#define DISABLE_BRANCH_PROFILING
 #define pr_fmt(fmt) "kasan: " fmt
 #include <linux/bootmem.h>
 #include <linux/kasan.h>
index 5126dfd52b182dd66471a49a0464eb2411fbc7cd..cd44ae727df7f48ceba7fad00591c48cec151896 100644 (file)
@@ -590,7 +590,7 @@ static unsigned long mpx_bd_entry_to_bt_addr(struct mm_struct *mm,
  * we might run off the end of the bounds table if we are on
  * a 64-bit kernel and try to get 8 bytes.
  */
-int get_user_bd_entry(struct mm_struct *mm, unsigned long *bd_entry_ret,
+static int get_user_bd_entry(struct mm_struct *mm, unsigned long *bd_entry_ret,
                long __user *bd_entry_ptr)
 {
        u32 bd_entry_32;
index a7dbec4dce2758261c6e1680b7ed825e5e44a9d1..3dbde04febdccab382bc47ccba53b422ac7c72ea 100644 (file)
@@ -26,5 +26,6 @@ obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_pcal9555a.o
 obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_tca6416.o
 # MISC Devices
 obj-$(subst m,y,$(CONFIG_KEYBOARD_GPIO)) += platform_gpio_keys.o
+obj-$(subst m,y,$(CONFIG_INTEL_MID_POWER_BUTTON)) += platform_mrfld_power_btn.o
 obj-$(subst m,y,$(CONFIG_RTC_DRV_CMOS)) += platform_mrfld_rtc.o
 obj-$(subst m,y,$(CONFIG_INTEL_MID_WATCHDOG)) += platform_mrfld_wdt.o
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_power_btn.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_power_btn.c
new file mode 100644 (file)
index 0000000..a6c3705
--- /dev/null
@@ -0,0 +1,82 @@
+/*
+ * Intel Merrifield power button support
+ *
+ * (C) Copyright 2017 Intel Corporation
+ *
+ * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/platform_device.h>
+#include <linux/sfi.h>
+
+#include <asm/intel-mid.h>
+#include <asm/intel_scu_ipc.h>
+
+static struct resource mrfld_power_btn_resources[] = {
+       {
+               .flags          = IORESOURCE_IRQ,
+       },
+};
+
+static struct platform_device mrfld_power_btn_dev = {
+       .name           = "msic_power_btn",
+       .id             = PLATFORM_DEVID_NONE,
+       .num_resources  = ARRAY_SIZE(mrfld_power_btn_resources),
+       .resource       = mrfld_power_btn_resources,
+};
+
+static int mrfld_power_btn_scu_status_change(struct notifier_block *nb,
+                                            unsigned long code, void *data)
+{
+       if (code == SCU_DOWN) {
+               platform_device_unregister(&mrfld_power_btn_dev);
+               return 0;
+       }
+
+       return platform_device_register(&mrfld_power_btn_dev);
+}
+
+static struct notifier_block mrfld_power_btn_scu_notifier = {
+       .notifier_call  = mrfld_power_btn_scu_status_change,
+};
+
+static int __init register_mrfld_power_btn(void)
+{
+       if (intel_mid_identify_cpu() != INTEL_MID_CPU_CHIP_TANGIER)
+               return -ENODEV;
+
+       /*
+        * We need to be sure that the SCU IPC is ready before
+        * PMIC power button device can be registered:
+        */
+       intel_scu_notifier_add(&mrfld_power_btn_scu_notifier);
+
+       return 0;
+}
+arch_initcall(register_mrfld_power_btn);
+
+static void __init *mrfld_power_btn_platform_data(void *info)
+{
+       struct resource *res = mrfld_power_btn_resources;
+       struct sfi_device_table_entry *pentry = info;
+
+       res->start = res->end = pentry->irq;
+       return NULL;
+}
+
+static const struct devs_id mrfld_power_btn_dev_id __initconst = {
+       .name                   = "bcove_power_btn",
+       .type                   = SFI_DEV_TYPE_IPC,
+       .delay                  = 1,
+       .msic                   = 1,
+       .get_platform_data      = &mrfld_power_btn_platform_data,
+};
+
+sfi_device(mrfld_power_btn_dev_id);
index 86edd1e941eb07bc46187024ae332409c6924073..9e304e2ea4f55c456e7f0037a8963f6586ad2b19 100644 (file)
@@ -19,7 +19,7 @@
 #include <asm/intel_scu_ipc.h>
 #include <asm/io_apic.h>
 
-#define TANGIER_EXT_TIMER0_MSI 15
+#define TANGIER_EXT_TIMER0_MSI 12
 
 static struct platform_device wdt_dev = {
        .name = "intel_mid_wdt",
index e793fe509971f49fb2cfa6a12f8b365a937ae206..e42978d4deafeb184ea8595eb0cf3ef54ceb62bc 100644 (file)
 
 #include "intel_mid_weak_decls.h"
 
-static void penwell_arch_setup(void);
-/* penwell arch ops */
-static struct intel_mid_ops penwell_ops = {
-       .arch_setup = penwell_arch_setup,
-};
-
-static void mfld_power_off(void)
-{
-}
-
 static unsigned long __init mfld_calibrate_tsc(void)
 {
        unsigned long fast_calibrate;
@@ -63,9 +53,12 @@ static unsigned long __init mfld_calibrate_tsc(void)
 static void __init penwell_arch_setup(void)
 {
        x86_platform.calibrate_tsc = mfld_calibrate_tsc;
-       pm_power_off = mfld_power_off;
 }
 
+static struct intel_mid_ops penwell_ops = {
+       .arch_setup = penwell_arch_setup,
+};
+
 void *get_penwell_ops(void)
 {
        return &penwell_ops;
index 9fdbe1fe0473802caaf04782f9a5c05ca813f013..786606c81edd07af0b68a3c75079f2d80ba8a7c1 100644 (file)
 
 #define SCM_TIMESTAMPING_OPT_STATS     54
 
+#define SO_MEMINFO             55
+
+#define SO_INCOMING_NAPI_ID    56
+
 #endif /* _XTENSA_SOCKET_H */
index 5eec5e08417f6ff1989e3e2a07b31c62901953d5..e75878f8b14af8f852d814717c3900759b0ed6fc 100644 (file)
@@ -376,10 +376,14 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
        bio_list_init(&punt);
        bio_list_init(&nopunt);
 
-       while ((bio = bio_list_pop(current->bio_list)))
+       while ((bio = bio_list_pop(&current->bio_list[0])))
                bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
+       current->bio_list[0] = nopunt;
 
-       *current->bio_list = nopunt;
+       bio_list_init(&nopunt);
+       while ((bio = bio_list_pop(&current->bio_list[1])))
+               bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
+       current->bio_list[1] = nopunt;
 
        spin_lock(&bs->rescue_lock);
        bio_list_merge(&bs->rescue_list, &punt);
@@ -466,7 +470,9 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
                 * we retry with the original gfp_flags.
                 */
 
-               if (current->bio_list && !bio_list_empty(current->bio_list))
+               if (current->bio_list &&
+                   (!bio_list_empty(&current->bio_list[0]) ||
+                    !bio_list_empty(&current->bio_list[1])))
                        gfp_mask &= ~__GFP_DIRECT_RECLAIM;
 
                p = mempool_alloc(bs->bio_pool, gfp_mask);
index 0eeb99ef654f4ad6874cf579883a263c9894ca31..d772c221cc178bf3ecfe448f3367121ec1d077de 100644 (file)
@@ -1973,7 +1973,14 @@ end_io:
  */
 blk_qc_t generic_make_request(struct bio *bio)
 {
-       struct bio_list bio_list_on_stack;
+       /*
+        * bio_list_on_stack[0] contains bios submitted by the current
+        * make_request_fn.
+        * bio_list_on_stack[1] contains bios that were submitted before
+        * the current make_request_fn, but that haven't been processed
+        * yet.
+        */
+       struct bio_list bio_list_on_stack[2];
        blk_qc_t ret = BLK_QC_T_NONE;
 
        if (!generic_make_request_checks(bio))
@@ -1990,7 +1997,7 @@ blk_qc_t generic_make_request(struct bio *bio)
         * should be added at the tail
         */
        if (current->bio_list) {
-               bio_list_add(current->bio_list, bio);
+               bio_list_add(&current->bio_list[0], bio);
                goto out;
        }
 
@@ -2009,18 +2016,17 @@ blk_qc_t generic_make_request(struct bio *bio)
         * bio_list, and call into ->make_request() again.
         */
        BUG_ON(bio->bi_next);
-       bio_list_init(&bio_list_on_stack);
-       current->bio_list = &bio_list_on_stack;
+       bio_list_init(&bio_list_on_stack[0]);
+       current->bio_list = bio_list_on_stack;
        do {
                struct request_queue *q = bdev_get_queue(bio->bi_bdev);
 
                if (likely(blk_queue_enter(q, false) == 0)) {
-                       struct bio_list hold;
                        struct bio_list lower, same;
 
                        /* Create a fresh bio_list for all subordinate requests */
-                       hold = bio_list_on_stack;
-                       bio_list_init(&bio_list_on_stack);
+                       bio_list_on_stack[1] = bio_list_on_stack[0];
+                       bio_list_init(&bio_list_on_stack[0]);
                        ret = q->make_request_fn(q, bio);
 
                        blk_queue_exit(q);
@@ -2030,19 +2036,19 @@ blk_qc_t generic_make_request(struct bio *bio)
                         */
                        bio_list_init(&lower);
                        bio_list_init(&same);
-                       while ((bio = bio_list_pop(&bio_list_on_stack)) != NULL)
+                       while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL)
                                if (q == bdev_get_queue(bio->bi_bdev))
                                        bio_list_add(&same, bio);
                                else
                                        bio_list_add(&lower, bio);
                        /* now assemble so we handle the lowest level first */
-                       bio_list_merge(&bio_list_on_stack, &lower);
-                       bio_list_merge(&bio_list_on_stack, &same);
-                       bio_list_merge(&bio_list_on_stack, &hold);
+                       bio_list_merge(&bio_list_on_stack[0], &lower);
+                       bio_list_merge(&bio_list_on_stack[0], &same);
+                       bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]);
                } else {
                        bio_io_error(bio);
                }
-               bio = bio_list_pop(current->bio_list);
+               bio = bio_list_pop(&bio_list_on_stack[0]);
        } while (bio);
        current->bio_list = NULL; /* deactivate */
 
index e48bc2c72615de016f013a2e98ea72cd49713a04..9d97bfc4d4657b586d1a9b4d077a8e673300d79a 100644 (file)
@@ -295,6 +295,9 @@ int blk_mq_reinit_tagset(struct blk_mq_tag_set *set)
        for (i = 0; i < set->nr_hw_queues; i++) {
                struct blk_mq_tags *tags = set->tags[i];
 
+               if (!tags)
+                       continue;
+
                for (j = 0; j < tags->nr_tags; j++) {
                        if (!tags->static_rqs[j])
                                continue;
index 159187a28d66521b4ab0109d3db38e6225ac71b3..a4546f060e80933423638f1399ab1922db8331a9 100644 (file)
@@ -1434,7 +1434,8 @@ static blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, struct request *rq)
        return blk_tag_to_qc_t(rq->internal_tag, hctx->queue_num, true);
 }
 
-static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie)
+static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie,
+                                     bool may_sleep)
 {
        struct request_queue *q = rq->q;
        struct blk_mq_queue_data bd = {
@@ -1475,7 +1476,7 @@ static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie)
        }
 
 insert:
-       blk_mq_sched_insert_request(rq, false, true, true, false);
+       blk_mq_sched_insert_request(rq, false, true, false, may_sleep);
 }
 
 /*
@@ -1569,11 +1570,11 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 
                if (!(data.hctx->flags & BLK_MQ_F_BLOCKING)) {
                        rcu_read_lock();
-                       blk_mq_try_issue_directly(old_rq, &cookie);
+                       blk_mq_try_issue_directly(old_rq, &cookie, false);
                        rcu_read_unlock();
                } else {
                        srcu_idx = srcu_read_lock(&data.hctx->queue_rq_srcu);
-                       blk_mq_try_issue_directly(old_rq, &cookie);
+                       blk_mq_try_issue_directly(old_rq, &cookie, true);
                        srcu_read_unlock(&data.hctx->queue_rq_srcu, srcu_idx);
                }
                goto done;
index 4467a8089ab890695ccf7072220d9c43d1f29c2d..0143135b3abe3749d8a3bab492eb67b2e63a5d01 100644 (file)
@@ -182,11 +182,6 @@ int __weak arch_register_cpu(int cpu)
 
 void __weak arch_unregister_cpu(int cpu) {}
 
-int __weak acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
-{
-       return -ENODEV;
-}
-
 static int acpi_processor_hotadd_init(struct acpi_processor *pr)
 {
        unsigned long long sta;
@@ -285,6 +280,13 @@ static int acpi_processor_get_info(struct acpi_device *device)
                pr->acpi_id = value;
        }
 
+       if (acpi_duplicate_processor_id(pr->acpi_id)) {
+               dev_err(&device->dev,
+                       "Failed to get unique processor _UID (0x%x)\n",
+                       pr->acpi_id);
+               return -ENODEV;
+       }
+
        pr->phys_id = acpi_get_phys_id(pr->handle, device_declaration,
                                        pr->acpi_id);
        if (invalid_phys_cpuid(pr->phys_id))
@@ -585,7 +587,7 @@ static struct acpi_scan_handler processor_container_handler = {
 static int nr_unique_ids __initdata;
 
 /* The number of the duplicate processor IDs */
-static int nr_duplicate_ids __initdata;
+static int nr_duplicate_ids;
 
 /* Used to store the unique processor IDs */
 static int unique_processor_ids[] __initdata = {
@@ -593,7 +595,7 @@ static int unique_processor_ids[] __initdata = {
 };
 
 /* Used to store the duplicate processor IDs */
-static int duplicate_processor_ids[] __initdata = {
+static int duplicate_processor_ids[] = {
        [0 ... NR_CPUS - 1] = -1,
 };
 
@@ -638,28 +640,53 @@ static acpi_status __init acpi_processor_ids_walk(acpi_handle handle,
                                                  void **rv)
 {
        acpi_status status;
+       acpi_object_type acpi_type;
+       unsigned long long uid;
        union acpi_object object = { 0 };
        struct acpi_buffer buffer = { sizeof(union acpi_object), &object };
 
-       status = acpi_evaluate_object(handle, NULL, NULL, &buffer);
+       status = acpi_get_type(handle, &acpi_type);
        if (ACPI_FAILURE(status))
-               acpi_handle_info(handle, "Not get the processor object\n");
-       else
-               processor_validated_ids_update(object.processor.proc_id);
+               return false;
+
+       switch (acpi_type) {
+       case ACPI_TYPE_PROCESSOR:
+               status = acpi_evaluate_object(handle, NULL, NULL, &buffer);
+               if (ACPI_FAILURE(status))
+                       goto err;
+               uid = object.processor.proc_id;
+               break;
+
+       case ACPI_TYPE_DEVICE:
+               status = acpi_evaluate_integer(handle, "_UID", NULL, &uid);
+               if (ACPI_FAILURE(status))
+                       goto err;
+               break;
+       default:
+               goto err;
+       }
+
+       processor_validated_ids_update(uid);
+       return true;
+
+err:
+       acpi_handle_info(handle, "Invalid processor object\n");
+       return false;
 
-       return AE_OK;
 }
 
-static void __init acpi_processor_check_duplicates(void)
+void __init acpi_processor_check_duplicates(void)
 {
-       /* Search all processor nodes in ACPI namespace */
+       /* check the correctness for all processors in ACPI namespace */
        acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
                                                ACPI_UINT32_MAX,
                                                acpi_processor_ids_walk,
                                                NULL, NULL, NULL);
+       acpi_get_devices(ACPI_PROCESSOR_DEVICE_HID, acpi_processor_ids_walk,
+                                               NULL, NULL);
 }
 
-bool __init acpi_processor_validate_proc_id(int proc_id)
+bool acpi_duplicate_processor_id(int proc_id)
 {
        int i;
 
index 80cb5eb75b633db8aa278b5e709cfddd697f9a7e..34fbe027e73a26f195f981d2fbd373608f724415 100644 (file)
@@ -1249,7 +1249,6 @@ static int __init acpi_init(void)
        acpi_wakeup_device_init();
        acpi_debugger_init();
        acpi_setup_sb_notify_handler();
-       acpi_set_processor_mapping();
        return 0;
 }
 
index 611a5585a9024a728c71e60ada951b3a73936708..b933061b6b607c467e20317412c63c78728396fc 100644 (file)
@@ -32,12 +32,12 @@ static struct acpi_table_madt *get_madt_table(void)
 }
 
 static int map_lapic_id(struct acpi_subtable_header *entry,
-                u32 acpi_id, phys_cpuid_t *apic_id, bool ignore_disabled)
+                u32 acpi_id, phys_cpuid_t *apic_id)
 {
        struct acpi_madt_local_apic *lapic =
                container_of(entry, struct acpi_madt_local_apic, header);
 
-       if (ignore_disabled && !(lapic->lapic_flags & ACPI_MADT_ENABLED))
+       if (!(lapic->lapic_flags & ACPI_MADT_ENABLED))
                return -ENODEV;
 
        if (lapic->processor_id != acpi_id)
@@ -48,13 +48,12 @@ static int map_lapic_id(struct acpi_subtable_header *entry,
 }
 
 static int map_x2apic_id(struct acpi_subtable_header *entry,
-               int device_declaration, u32 acpi_id, phys_cpuid_t *apic_id,
-               bool ignore_disabled)
+               int device_declaration, u32 acpi_id, phys_cpuid_t *apic_id)
 {
        struct acpi_madt_local_x2apic *apic =
                container_of(entry, struct acpi_madt_local_x2apic, header);
 
-       if (ignore_disabled && !(apic->lapic_flags & ACPI_MADT_ENABLED))
+       if (!(apic->lapic_flags & ACPI_MADT_ENABLED))
                return -ENODEV;
 
        if (device_declaration && (apic->uid == acpi_id)) {
@@ -66,13 +65,12 @@ static int map_x2apic_id(struct acpi_subtable_header *entry,
 }
 
 static int map_lsapic_id(struct acpi_subtable_header *entry,
-               int device_declaration, u32 acpi_id, phys_cpuid_t *apic_id,
-               bool ignore_disabled)
+               int device_declaration, u32 acpi_id, phys_cpuid_t *apic_id)
 {
        struct acpi_madt_local_sapic *lsapic =
                container_of(entry, struct acpi_madt_local_sapic, header);
 
-       if (ignore_disabled && !(lsapic->lapic_flags & ACPI_MADT_ENABLED))
+       if (!(lsapic->lapic_flags & ACPI_MADT_ENABLED))
                return -ENODEV;
 
        if (device_declaration) {
@@ -89,13 +87,12 @@ static int map_lsapic_id(struct acpi_subtable_header *entry,
  * Retrieve the ARM CPU physical identifier (MPIDR)
  */
 static int map_gicc_mpidr(struct acpi_subtable_header *entry,
-               int device_declaration, u32 acpi_id, phys_cpuid_t *mpidr,
-               bool ignore_disabled)
+               int device_declaration, u32 acpi_id, phys_cpuid_t *mpidr)
 {
        struct acpi_madt_generic_interrupt *gicc =
            container_of(entry, struct acpi_madt_generic_interrupt, header);
 
-       if (ignore_disabled && !(gicc->flags & ACPI_MADT_ENABLED))
+       if (!(gicc->flags & ACPI_MADT_ENABLED))
                return -ENODEV;
 
        /* device_declaration means Device object in DSDT, in the
@@ -112,7 +109,7 @@ static int map_gicc_mpidr(struct acpi_subtable_header *entry,
 }
 
 static phys_cpuid_t map_madt_entry(struct acpi_table_madt *madt,
-                                  int type, u32 acpi_id, bool ignore_disabled)
+                                  int type, u32 acpi_id)
 {
        unsigned long madt_end, entry;
        phys_cpuid_t phys_id = PHYS_CPUID_INVALID;      /* CPU hardware ID */
@@ -130,20 +127,16 @@ static phys_cpuid_t map_madt_entry(struct acpi_table_madt *madt,
                struct acpi_subtable_header *header =
                        (struct acpi_subtable_header *)entry;
                if (header->type == ACPI_MADT_TYPE_LOCAL_APIC) {
-                       if (!map_lapic_id(header, acpi_id, &phys_id,
-                                         ignore_disabled))
+                       if (!map_lapic_id(header, acpi_id, &phys_id))
                                break;
                } else if (header->type == ACPI_MADT_TYPE_LOCAL_X2APIC) {
-                       if (!map_x2apic_id(header, type, acpi_id, &phys_id,
-                                          ignore_disabled))
+                       if (!map_x2apic_id(header, type, acpi_id, &phys_id))
                                break;
                } else if (header->type == ACPI_MADT_TYPE_LOCAL_SAPIC) {
-                       if (!map_lsapic_id(header, type, acpi_id, &phys_id,
-                                          ignore_disabled))
+                       if (!map_lsapic_id(header, type, acpi_id, &phys_id))
                                break;
                } else if (header->type == ACPI_MADT_TYPE_GENERIC_INTERRUPT) {
-                       if (!map_gicc_mpidr(header, type, acpi_id, &phys_id,
-                                           ignore_disabled))
+                       if (!map_gicc_mpidr(header, type, acpi_id, &phys_id))
                                break;
                }
                entry += header->length;
@@ -161,15 +154,14 @@ phys_cpuid_t __init acpi_map_madt_entry(u32 acpi_id)
        if (!madt)
                return PHYS_CPUID_INVALID;
 
-       rv = map_madt_entry(madt, 1, acpi_id, true);
+       rv = map_madt_entry(madt, 1, acpi_id);
 
        acpi_put_table((struct acpi_table_header *)madt);
 
        return rv;
 }
 
-static phys_cpuid_t map_mat_entry(acpi_handle handle, int type, u32 acpi_id,
-                                 bool ignore_disabled)
+static phys_cpuid_t map_mat_entry(acpi_handle handle, int type, u32 acpi_id)
 {
        struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
        union acpi_object *obj;
@@ -190,38 +182,30 @@ static phys_cpuid_t map_mat_entry(acpi_handle handle, int type, u32 acpi_id,
 
        header = (struct acpi_subtable_header *)obj->buffer.pointer;
        if (header->type == ACPI_MADT_TYPE_LOCAL_APIC)
-               map_lapic_id(header, acpi_id, &phys_id, ignore_disabled);
+               map_lapic_id(header, acpi_id, &phys_id);
        else if (header->type == ACPI_MADT_TYPE_LOCAL_SAPIC)
-               map_lsapic_id(header, type, acpi_id, &phys_id, ignore_disabled);
+               map_lsapic_id(header, type, acpi_id, &phys_id);
        else if (header->type == ACPI_MADT_TYPE_LOCAL_X2APIC)
-               map_x2apic_id(header, type, acpi_id, &phys_id, ignore_disabled);
+               map_x2apic_id(header, type, acpi_id, &phys_id);
        else if (header->type == ACPI_MADT_TYPE_GENERIC_INTERRUPT)
-               map_gicc_mpidr(header, type, acpi_id, &phys_id,
-                              ignore_disabled);
+               map_gicc_mpidr(header, type, acpi_id, &phys_id);
 
 exit:
        kfree(buffer.pointer);
        return phys_id;
 }
 
-static phys_cpuid_t __acpi_get_phys_id(acpi_handle handle, int type,
-                                      u32 acpi_id, bool ignore_disabled)
+phys_cpuid_t acpi_get_phys_id(acpi_handle handle, int type, u32 acpi_id)
 {
        phys_cpuid_t phys_id;
 
-       phys_id = map_mat_entry(handle, type, acpi_id, ignore_disabled);
+       phys_id = map_mat_entry(handle, type, acpi_id);
        if (invalid_phys_cpuid(phys_id))
-               phys_id = map_madt_entry(get_madt_table(), type, acpi_id,
-                                          ignore_disabled);
+               phys_id = map_madt_entry(get_madt_table(), type, acpi_id);
 
        return phys_id;
 }
 
-phys_cpuid_t acpi_get_phys_id(acpi_handle handle, int type, u32 acpi_id)
-{
-       return __acpi_get_phys_id(handle, type, acpi_id, true);
-}
-
 int acpi_map_cpuid(phys_cpuid_t phys_id, u32 acpi_id)
 {
 #ifdef CONFIG_SMP
@@ -278,79 +262,6 @@ int acpi_get_cpuid(acpi_handle handle, int type, u32 acpi_id)
 }
 EXPORT_SYMBOL_GPL(acpi_get_cpuid);
 
-#ifdef CONFIG_ACPI_HOTPLUG_CPU
-static bool __init
-map_processor(acpi_handle handle, phys_cpuid_t *phys_id, int *cpuid)
-{
-       int type, id;
-       u32 acpi_id;
-       acpi_status status;
-       acpi_object_type acpi_type;
-       unsigned long long tmp;
-       union acpi_object object = { 0 };
-       struct acpi_buffer buffer = { sizeof(union acpi_object), &object };
-
-       status = acpi_get_type(handle, &acpi_type);
-       if (ACPI_FAILURE(status))
-               return false;
-
-       switch (acpi_type) {
-       case ACPI_TYPE_PROCESSOR:
-               status = acpi_evaluate_object(handle, NULL, NULL, &buffer);
-               if (ACPI_FAILURE(status))
-                       return false;
-               acpi_id = object.processor.proc_id;
-
-               /* validate the acpi_id */
-               if(acpi_processor_validate_proc_id(acpi_id))
-                       return false;
-               break;
-       case ACPI_TYPE_DEVICE:
-               status = acpi_evaluate_integer(handle, "_UID", NULL, &tmp);
-               if (ACPI_FAILURE(status))
-                       return false;
-               acpi_id = tmp;
-               break;
-       default:
-               return false;
-       }
-
-       type = (acpi_type == ACPI_TYPE_DEVICE) ? 1 : 0;
-
-       *phys_id = __acpi_get_phys_id(handle, type, acpi_id, false);
-       id = acpi_map_cpuid(*phys_id, acpi_id);
-
-       if (id < 0)
-               return false;
-       *cpuid = id;
-       return true;
-}
-
-static acpi_status __init
-set_processor_node_mapping(acpi_handle handle, u32 lvl, void *context,
-                          void **rv)
-{
-       phys_cpuid_t phys_id;
-       int cpu_id;
-
-       if (!map_processor(handle, &phys_id, &cpu_id))
-               return AE_ERROR;
-
-       acpi_map_cpu2node(handle, cpu_id, phys_id);
-       return AE_OK;
-}
-
-void __init acpi_set_processor_mapping(void)
-{
-       /* Set persistent cpu <-> node mapping for all processors. */
-       acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
-                           ACPI_UINT32_MAX, set_processor_node_mapping,
-                           NULL, NULL, NULL);
-}
-#else
-void __init acpi_set_processor_mapping(void) {}
-#endif /* CONFIG_ACPI_HOTPLUG_CPU */
-
 #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
 static int get_ioapic_id(struct acpi_subtable_header *entry, u32 gsi_base,
                         u64 *phys_addr, int *ioapic_id)
index 4a610795b585fd41765676529eaaccc0685cda93..906705e5f7763c5ac91f1f00a140f49c54ee7c2a 100644 (file)
@@ -2267,9 +2267,8 @@ static int amb_probe(struct pci_dev *pci_dev,
        dev->atm_dev->ci_range.vpi_bits = NUM_VPI_BITS;
        dev->atm_dev->ci_range.vci_bits = NUM_VCI_BITS;
 
-       init_timer(&dev->housekeeping);
-       dev->housekeeping.function = do_housekeeping;
-       dev->housekeeping.data = (unsigned long) dev;
+       setup_timer(&dev->housekeeping, do_housekeeping,
+                   (unsigned long)dev);
        mod_timer(&dev->housekeeping, jiffies);
 
        // enable host interrupts
index 684bda4d14a187b41ff453bf33ad8df4774c977f..6bb60fb6a30b7b9b4fd42e2872261317b38c22b5 100644 (file)
@@ -639,11 +639,6 @@ int lock_device_hotplug_sysfs(void)
        return restart_syscall();
 }
 
-void assert_held_device_hotplug(void)
-{
-       lockdep_assert_held(&device_hotplug_lock);
-}
-
 #ifdef CONFIG_BLOCK
 static inline int device_is_not_partition(struct device *dev)
 {
index c2c14a12713b56038c8c21deae2212550d24422b..a6a9dd4d0eeffd66ffa446de2fc726270049b857 100644 (file)
@@ -344,7 +344,8 @@ config BT_WILINK
 
 config BT_QCOMSMD
        tristate "Qualcomm SMD based HCI support"
-       depends on (QCOM_SMD && QCOM_WCNSS_CTRL) || COMPILE_TEST
+       depends on RPMSG || (COMPILE_TEST && RPMSG=n)
+       depends on QCOM_WCNSS_CTRL || (COMPILE_TEST && QCOM_WCNSS_CTRL=n)
        select BT_QCA
        help
          Qualcomm SMD based HCI driver.
index 8d4868af9bbd88fff13eb8525dec5310654f54de..ef730c173d4b875063726ccd398b7cded2e6f660 100644 (file)
@@ -14,7 +14,7 @@
 
 #include <linux/module.h>
 #include <linux/slab.h>
-#include <linux/soc/qcom/smd.h>
+#include <linux/rpmsg.h>
 #include <linux/soc/qcom/wcnss_ctrl.h>
 #include <linux/platform_device.h>
 
@@ -26,8 +26,8 @@
 struct btqcomsmd {
        struct hci_dev *hdev;
 
-       struct qcom_smd_channel *acl_channel;
-       struct qcom_smd_channel *cmd_channel;
+       struct rpmsg_endpoint *acl_channel;
+       struct rpmsg_endpoint *cmd_channel;
 };
 
 static int btqcomsmd_recv(struct hci_dev *hdev, unsigned int type,
@@ -48,19 +48,19 @@ static int btqcomsmd_recv(struct hci_dev *hdev, unsigned int type,
        return hci_recv_frame(hdev, skb);
 }
 
-static int btqcomsmd_acl_callback(struct qcom_smd_channel *channel,
-                                 const void *data, size_t count)
+static int btqcomsmd_acl_callback(struct rpmsg_device *rpdev, void *data,
+                                 int count, void *priv, u32 addr)
 {
-       struct btqcomsmd *btq = qcom_smd_get_drvdata(channel);
+       struct btqcomsmd *btq = priv;
 
        btq->hdev->stat.byte_rx += count;
        return btqcomsmd_recv(btq->hdev, HCI_ACLDATA_PKT, data, count);
 }
 
-static int btqcomsmd_cmd_callback(struct qcom_smd_channel *channel,
-                                 const void *data, size_t count)
+static int btqcomsmd_cmd_callback(struct rpmsg_device *rpdev, void *data,
+                                 int count, void *priv, u32 addr)
 {
-       struct btqcomsmd *btq = qcom_smd_get_drvdata(channel);
+       struct btqcomsmd *btq = priv;
 
        return btqcomsmd_recv(btq->hdev, HCI_EVENT_PKT, data, count);
 }
@@ -72,12 +72,12 @@ static int btqcomsmd_send(struct hci_dev *hdev, struct sk_buff *skb)
 
        switch (hci_skb_pkt_type(skb)) {
        case HCI_ACLDATA_PKT:
-               ret = qcom_smd_send(btq->acl_channel, skb->data, skb->len);
+               ret = rpmsg_send(btq->acl_channel, skb->data, skb->len);
                hdev->stat.acl_tx++;
                hdev->stat.byte_tx += skb->len;
                break;
        case HCI_COMMAND_PKT:
-               ret = qcom_smd_send(btq->cmd_channel, skb->data, skb->len);
+               ret = rpmsg_send(btq->cmd_channel, skb->data, skb->len);
                hdev->stat.cmd_tx++;
                break;
        default:
@@ -114,18 +114,15 @@ static int btqcomsmd_probe(struct platform_device *pdev)
        wcnss = dev_get_drvdata(pdev->dev.parent);
 
        btq->acl_channel = qcom_wcnss_open_channel(wcnss, "APPS_RIVA_BT_ACL",
-                                                  btqcomsmd_acl_callback);
+                                                  btqcomsmd_acl_callback, btq);
        if (IS_ERR(btq->acl_channel))
                return PTR_ERR(btq->acl_channel);
 
        btq->cmd_channel = qcom_wcnss_open_channel(wcnss, "APPS_RIVA_BT_CMD",
-                                                  btqcomsmd_cmd_callback);
+                                                  btqcomsmd_cmd_callback, btq);
        if (IS_ERR(btq->cmd_channel))
                return PTR_ERR(btq->cmd_channel);
 
-       qcom_smd_set_drvdata(btq->acl_channel, btq);
-       qcom_smd_set_drvdata(btq->cmd_channel, btq);
-
        hdev = hci_alloc_dev();
        if (!hdev)
                return -ENOMEM;
@@ -158,6 +155,9 @@ static int btqcomsmd_remove(struct platform_device *pdev)
        hci_unregister_dev(btq->hdev);
        hci_free_dev(btq->hdev);
 
+       rpmsg_destroy_ept(btq->cmd_channel);
+       rpmsg_destroy_ept(btq->acl_channel);
+
        return 0;
 }
 
index 745844ee973e1deda08203725d9b9d1b8e412972..d4ca9962a7595a0206710a0dd4a95656f426ae8e 100644 (file)
@@ -10,7 +10,6 @@
 #include <linux/io.h>
 #include <linux/platform_device.h>
 #include <linux/atmel_tc.h>
-#include <linux/sched_clock.h>
 
 
 /*
@@ -57,14 +56,9 @@ static u64 tc_get_cycles(struct clocksource *cs)
        return (upper << 16) | lower;
 }
 
-static u32 tc_get_cv32(void)
-{
-       return __raw_readl(tcaddr + ATMEL_TC_REG(0, CV));
-}
-
 static u64 tc_get_cycles32(struct clocksource *cs)
 {
-       return tc_get_cv32();
+       return __raw_readl(tcaddr + ATMEL_TC_REG(0, CV));
 }
 
 static struct clocksource clksrc = {
@@ -75,11 +69,6 @@ static struct clocksource clksrc = {
        .flags          = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
-static u64 notrace tc_read_sched_clock(void)
-{
-       return tc_get_cv32();
-}
-
 #ifdef CONFIG_GENERIC_CLOCKEVENTS
 
 struct tc_clkevt_device {
@@ -350,9 +339,6 @@ static int __init tcb_clksrc_init(void)
                clksrc.read = tc_get_cycles32;
                /* setup ony channel 0 */
                tcb_setup_single_chan(tc, best_divisor_idx);
-
-               /* register sched_clock on chips with single 32 bit counter */
-               sched_clock_register(tc_read_sched_clock, 32, divided_rate);
        } else {
                /* tclib will give us three clocks no matter what the
                 * underlying platform supports.
index 38b9fdf854a49a7e4ba9950e365904d18b64caf5..b8ff617d449d928f97e2c4785639c07166135627 100644 (file)
@@ -680,9 +680,11 @@ static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy,
                                        char *buf)
 {
        unsigned int cur_freq = __cpufreq_get(policy);
-       if (!cur_freq)
-               return sprintf(buf, "<unknown>");
-       return sprintf(buf, "%u\n", cur_freq);
+
+       if (cur_freq)
+               return sprintf(buf, "%u\n", cur_freq);
+
+       return sprintf(buf, "<unknown>\n");
 }
 
 /**
index 3d37219a0dd7afc3108b017f1d2960868efb7903..08e134ffba68e28656374fc55fab95345050cf8c 100644 (file)
@@ -84,6 +84,11 @@ static inline u64 div_ext_fp(u64 x, u64 y)
        return div64_u64(x << EXT_FRAC_BITS, y);
 }
 
+static inline int32_t percent_ext_fp(int percent)
+{
+       return div_ext_fp(percent, 100);
+}
+
 /**
  * struct sample -     Store performance sample
  * @core_avg_perf:     Ratio of APERF/MPERF which is the actual average
@@ -845,12 +850,11 @@ static struct freq_attr *hwp_cpufreq_attrs[] = {
 
 static void intel_pstate_hwp_set(struct cpufreq_policy *policy)
 {
-       int min, hw_min, max, hw_max, cpu, range, adj_range;
+       int min, hw_min, max, hw_max, cpu;
        struct perf_limits *perf_limits = limits;
        u64 value, cap;
 
        for_each_cpu(cpu, policy->cpus) {
-               int max_perf_pct, min_perf_pct;
                struct cpudata *cpu_data = all_cpu_data[cpu];
                s16 epp;
 
@@ -863,20 +867,15 @@ static void intel_pstate_hwp_set(struct cpufreq_policy *policy)
                        hw_max = HWP_GUARANTEED_PERF(cap);
                else
                        hw_max = HWP_HIGHEST_PERF(cap);
-               range = hw_max - hw_min;
 
-               max_perf_pct = perf_limits->max_perf_pct;
-               min_perf_pct = perf_limits->min_perf_pct;
+               min = fp_ext_toint(hw_max * perf_limits->min_perf);
 
                rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value);
-               adj_range = min_perf_pct * range / 100;
-               min = hw_min + adj_range;
+
                value &= ~HWP_MIN_PERF(~0L);
                value |= HWP_MIN_PERF(min);
 
-               adj_range = max_perf_pct * range / 100;
-               max = hw_min + adj_range;
-
+               max = fp_ext_toint(hw_max * perf_limits->max_perf);
                value &= ~HWP_MAX_PERF(~0L);
                value |= HWP_MAX_PERF(max);
 
@@ -989,6 +988,7 @@ static void intel_pstate_update_policies(void)
 static int pid_param_set(void *data, u64 val)
 {
        *(u32 *)data = val;
+       pid_params.sample_rate_ns = pid_params.sample_rate_ms * NSEC_PER_MSEC;
        intel_pstate_reset_all_pid();
        return 0;
 }
@@ -1225,7 +1225,7 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b,
                                   limits->max_perf_pct);
        limits->max_perf_pct = max(limits->min_perf_pct,
                                   limits->max_perf_pct);
-       limits->max_perf = div_ext_fp(limits->max_perf_pct, 100);
+       limits->max_perf = percent_ext_fp(limits->max_perf_pct);
 
        intel_pstate_update_policies();
 
@@ -1262,7 +1262,7 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
                                   limits->min_perf_pct);
        limits->min_perf_pct = min(limits->max_perf_pct,
                                   limits->min_perf_pct);
-       limits->min_perf = div_ext_fp(limits->min_perf_pct, 100);
+       limits->min_perf = percent_ext_fp(limits->min_perf_pct);
 
        intel_pstate_update_policies();
 
@@ -2080,36 +2080,34 @@ static void intel_pstate_clear_update_util_hook(unsigned int cpu)
 static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy,
                                            struct perf_limits *limits)
 {
+       int32_t max_policy_perf, min_policy_perf;
 
-       limits->max_policy_pct = DIV_ROUND_UP(policy->max * 100,
-                                             policy->cpuinfo.max_freq);
-       limits->max_policy_pct = clamp_t(int, limits->max_policy_pct, 0, 100);
+       max_policy_perf = div_ext_fp(policy->max, policy->cpuinfo.max_freq);
+       max_policy_perf = clamp_t(int32_t, max_policy_perf, 0, int_ext_tofp(1));
        if (policy->max == policy->min) {
-               limits->min_policy_pct = limits->max_policy_pct;
+               min_policy_perf = max_policy_perf;
        } else {
-               limits->min_policy_pct = DIV_ROUND_UP(policy->min * 100,
-                                                     policy->cpuinfo.max_freq);
-               limits->min_policy_pct = clamp_t(int, limits->min_policy_pct,
-                                                0, 100);
+               min_policy_perf = div_ext_fp(policy->min,
+                                            policy->cpuinfo.max_freq);
+               min_policy_perf = clamp_t(int32_t, min_policy_perf,
+                                         0, max_policy_perf);
        }
 
-       /* Normalize user input to [min_policy_pct, max_policy_pct] */
-       limits->min_perf_pct = max(limits->min_policy_pct,
-                                  limits->min_sysfs_pct);
-       limits->min_perf_pct = min(limits->max_policy_pct,
-                                  limits->min_perf_pct);
-       limits->max_perf_pct = min(limits->max_policy_pct,
-                                  limits->max_sysfs_pct);
-       limits->max_perf_pct = max(limits->min_policy_pct,
-                                  limits->max_perf_pct);
+       /* Normalize user input to [min_perf, max_perf] */
+       limits->min_perf = max(min_policy_perf,
+                              percent_ext_fp(limits->min_sysfs_pct));
+       limits->min_perf = min(limits->min_perf, max_policy_perf);
+       limits->max_perf = min(max_policy_perf,
+                              percent_ext_fp(limits->max_sysfs_pct));
+       limits->max_perf = max(min_policy_perf, limits->max_perf);
 
-       /* Make sure min_perf_pct <= max_perf_pct */
-       limits->min_perf_pct = min(limits->max_perf_pct, limits->min_perf_pct);
+       /* Make sure min_perf <= max_perf */
+       limits->min_perf = min(limits->min_perf, limits->max_perf);
 
-       limits->min_perf = div_ext_fp(limits->min_perf_pct, 100);
-       limits->max_perf = div_ext_fp(limits->max_perf_pct, 100);
        limits->max_perf = round_up(limits->max_perf, EXT_FRAC_BITS);
        limits->min_perf = round_up(limits->min_perf, EXT_FRAC_BITS);
+       limits->max_perf_pct = fp_ext_toint(limits->max_perf * 100);
+       limits->min_perf_pct = fp_ext_toint(limits->min_perf * 100);
 
        pr_debug("cpu:%d max_perf_pct:%d min_perf_pct:%d\n", policy->cpu,
                 limits->max_perf_pct, limits->min_perf_pct);
index 8d9829ff2a784de9490404a86a194e2304ed65c7..80c6db279ae10cb8558b2e90a91a4c4dafa917e0 100644 (file)
@@ -427,6 +427,7 @@ static int __dax_dev_pte_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
        int rc = VM_FAULT_SIGBUS;
        phys_addr_t phys;
        pfn_t pfn;
+       unsigned int fault_size = PAGE_SIZE;
 
        if (check_vma(dax_dev, vmf->vma, __func__))
                return VM_FAULT_SIGBUS;
@@ -437,9 +438,12 @@ static int __dax_dev_pte_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
                return VM_FAULT_SIGBUS;
        }
 
+       if (fault_size != dax_region->align)
+               return VM_FAULT_SIGBUS;
+
        phys = pgoff_to_phys(dax_dev, vmf->pgoff, PAGE_SIZE);
        if (phys == -1) {
-               dev_dbg(dev, "%s: phys_to_pgoff(%#lx) failed\n", __func__,
+               dev_dbg(dev, "%s: pgoff_to_phys(%#lx) failed\n", __func__,
                                vmf->pgoff);
                return VM_FAULT_SIGBUS;
        }
@@ -464,6 +468,7 @@ static int __dax_dev_pmd_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
        phys_addr_t phys;
        pgoff_t pgoff;
        pfn_t pfn;
+       unsigned int fault_size = PMD_SIZE;
 
        if (check_vma(dax_dev, vmf->vma, __func__))
                return VM_FAULT_SIGBUS;
@@ -480,10 +485,20 @@ static int __dax_dev_pmd_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
                return VM_FAULT_SIGBUS;
        }
 
+       if (fault_size < dax_region->align)
+               return VM_FAULT_SIGBUS;
+       else if (fault_size > dax_region->align)
+               return VM_FAULT_FALLBACK;
+
+       /* if we are outside of the VMA */
+       if (pmd_addr < vmf->vma->vm_start ||
+                       (pmd_addr + PMD_SIZE) > vmf->vma->vm_end)
+               return VM_FAULT_SIGBUS;
+
        pgoff = linear_page_index(vmf->vma, pmd_addr);
        phys = pgoff_to_phys(dax_dev, pgoff, PMD_SIZE);
        if (phys == -1) {
-               dev_dbg(dev, "%s: phys_to_pgoff(%#lx) failed\n", __func__,
+               dev_dbg(dev, "%s: pgoff_to_phys(%#lx) failed\n", __func__,
                                pgoff);
                return VM_FAULT_SIGBUS;
        }
@@ -503,6 +518,8 @@ static int __dax_dev_pud_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
        phys_addr_t phys;
        pgoff_t pgoff;
        pfn_t pfn;
+       unsigned int fault_size = PUD_SIZE;
+
 
        if (check_vma(dax_dev, vmf->vma, __func__))
                return VM_FAULT_SIGBUS;
@@ -519,10 +536,20 @@ static int __dax_dev_pud_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
                return VM_FAULT_SIGBUS;
        }
 
+       if (fault_size < dax_region->align)
+               return VM_FAULT_SIGBUS;
+       else if (fault_size > dax_region->align)
+               return VM_FAULT_FALLBACK;
+
+       /* if we are outside of the VMA */
+       if (pud_addr < vmf->vma->vm_start ||
+                       (pud_addr + PUD_SIZE) > vmf->vma->vm_end)
+               return VM_FAULT_SIGBUS;
+
        pgoff = linear_page_index(vmf->vma, pud_addr);
        phys = pgoff_to_phys(dax_dev, pgoff, PUD_SIZE);
        if (phys == -1) {
-               dev_dbg(dev, "%s: phys_to_pgoff(%#lx) failed\n", __func__,
+               dev_dbg(dev, "%s: pgoff_to_phys(%#lx) failed\n", __func__,
                                pgoff);
                return VM_FAULT_SIGBUS;
        }
index 9e1a138fed53372a56dd1b7d2982ec198f46b3f1..16a8951b2beda389368c858848beb2edaf06949f 100644 (file)
@@ -96,7 +96,7 @@ static int altr_a10sr_gpio_probe(struct platform_device *pdev)
        gpio->regmap = a10sr->regmap;
 
        gpio->gp = altr_a10sr_gc;
-
+       gpio->gp.parent = pdev->dev.parent;
        gpio->gp.of_node = pdev->dev.of_node;
 
        ret = devm_gpiochip_add_data(&pdev->dev, &gpio->gp, gpio);
index 5bddbd507ca9f105aa18cfe5f43b673b676d551d..3fe6a21e05a5718d8769bf2dd505cb5968f41207 100644 (file)
@@ -90,21 +90,18 @@ static int altera_gpio_irq_set_type(struct irq_data *d,
 
        altera_gc = gpiochip_get_data(irq_data_get_irq_chip_data(d));
 
-       if (type == IRQ_TYPE_NONE)
+       if (type == IRQ_TYPE_NONE) {
+               irq_set_handler_locked(d, handle_bad_irq);
                return 0;
-       if (type == IRQ_TYPE_LEVEL_HIGH &&
-               altera_gc->interrupt_trigger == IRQ_TYPE_LEVEL_HIGH)
-               return 0;
-       if (type == IRQ_TYPE_EDGE_RISING &&
-               altera_gc->interrupt_trigger == IRQ_TYPE_EDGE_RISING)
-               return 0;
-       if (type == IRQ_TYPE_EDGE_FALLING &&
-               altera_gc->interrupt_trigger == IRQ_TYPE_EDGE_FALLING)
-               return 0;
-       if (type == IRQ_TYPE_EDGE_BOTH &&
-               altera_gc->interrupt_trigger == IRQ_TYPE_EDGE_BOTH)
+       }
+       if (type == altera_gc->interrupt_trigger) {
+               if (type == IRQ_TYPE_LEVEL_HIGH)
+                       irq_set_handler_locked(d, handle_level_irq);
+               else
+                       irq_set_handler_locked(d, handle_simple_irq);
                return 0;
-
+       }
+       irq_set_handler_locked(d, handle_bad_irq);
        return -EINVAL;
 }
 
@@ -230,7 +227,6 @@ static void altera_gpio_irq_edge_handler(struct irq_desc *desc)
        chained_irq_exit(chip, desc);
 }
 
-
 static void altera_gpio_irq_leveL_high_handler(struct irq_desc *desc)
 {
        struct altera_gpio_chip *altera_gc;
@@ -310,7 +306,7 @@ static int altera_gpio_probe(struct platform_device *pdev)
        altera_gc->interrupt_trigger = reg;
 
        ret = gpiochip_irqchip_add(&altera_gc->mmchip.gc, &altera_irq_chip, 0,
-               handle_simple_irq, IRQ_TYPE_NONE);
+               handle_bad_irq, IRQ_TYPE_NONE);
 
        if (ret) {
                dev_err(&pdev->dev, "could not add irqchip\n");
index bdb692345428ccc99c8f22bd3b460f25b41e3156..2a57d024481db8c354badd976843f83a365c72a9 100644 (file)
@@ -270,8 +270,10 @@ mcp23s08_direction_output(struct gpio_chip *chip, unsigned offset, int value)
 static irqreturn_t mcp23s08_irq(int irq, void *data)
 {
        struct mcp23s08 *mcp = data;
-       int intcap, intf, i;
+       int intcap, intf, i, gpio, gpio_orig, intcap_mask;
        unsigned int child_irq;
+       bool intf_set, intcap_changed, gpio_bit_changed,
+               defval_changed, gpio_set;
 
        mutex_lock(&mcp->lock);
        if (mcp_read(mcp, MCP_INTF, &intf) < 0) {
@@ -287,14 +289,67 @@ static irqreturn_t mcp23s08_irq(int irq, void *data)
        }
 
        mcp->cache[MCP_INTCAP] = intcap;
+
+       /* This clears the interrupt(configurable on S18) */
+       if (mcp_read(mcp, MCP_GPIO, &gpio) < 0) {
+               mutex_unlock(&mcp->lock);
+               return IRQ_HANDLED;
+       }
+       gpio_orig = mcp->cache[MCP_GPIO];
+       mcp->cache[MCP_GPIO] = gpio;
        mutex_unlock(&mcp->lock);
 
+       if (mcp->cache[MCP_INTF] == 0) {
+               /* There is no interrupt pending */
+               return IRQ_HANDLED;
+       }
+
+       dev_dbg(mcp->chip.parent,
+               "intcap 0x%04X intf 0x%04X gpio_orig 0x%04X gpio 0x%04X\n",
+               intcap, intf, gpio_orig, gpio);
 
        for (i = 0; i < mcp->chip.ngpio; i++) {
-               if ((BIT(i) & mcp->cache[MCP_INTF]) &&
-                   ((BIT(i) & intcap & mcp->irq_rise) ||
-                    (mcp->irq_fall & ~intcap & BIT(i)) ||
-                    (BIT(i) & mcp->cache[MCP_INTCON]))) {
+               /* We must check all of the inputs on the chip,
+                * otherwise we may not notice a change on >=2 pins.
+                *
+                * On at least the mcp23s17, INTCAP is only updated
+                * one byte at a time(INTCAPA and INTCAPB are
+                * not written to at the same time - only on a per-bank
+                * basis).
+                *
+                * INTF only contains the single bit that caused the
+                * interrupt per-bank.  On the mcp23s17, there is
+                * INTFA and INTFB.  If two pins are changed on the A
+                * side at the same time, INTF will only have one bit
+                * set.  If one pin on the A side and one pin on the B
+                * side are changed at the same time, INTF will have
+                * two bits set.  Thus, INTF can't be the only check
+                * to see if the input has changed.
+                */
+
+               intf_set = BIT(i) & mcp->cache[MCP_INTF];
+               if (i < 8 && intf_set)
+                       intcap_mask = 0x00FF;
+               else if (i >= 8 && intf_set)
+                       intcap_mask = 0xFF00;
+               else
+                       intcap_mask = 0x00;
+
+               intcap_changed = (intcap_mask &
+                       (BIT(i) & mcp->cache[MCP_INTCAP])) !=
+                       (intcap_mask & (BIT(i) & gpio_orig));
+               gpio_set = BIT(i) & mcp->cache[MCP_GPIO];
+               gpio_bit_changed = (BIT(i) & gpio_orig) !=
+                       (BIT(i) & mcp->cache[MCP_GPIO]);
+               defval_changed = (BIT(i) & mcp->cache[MCP_INTCON]) &&
+                       ((BIT(i) & mcp->cache[MCP_GPIO]) !=
+                       (BIT(i) & mcp->cache[MCP_DEFVAL]));
+
+               if (((gpio_bit_changed || intcap_changed) &&
+                       (BIT(i) & mcp->irq_rise) && gpio_set) ||
+                   ((gpio_bit_changed || intcap_changed) &&
+                       (BIT(i) & mcp->irq_fall) && !gpio_set) ||
+                   defval_changed) {
                        child_irq = irq_find_mapping(mcp->chip.irqdomain, i);
                        handle_nested_irq(child_irq);
                }
index 06dac72cb69c0c1c6e9005c748a613985dea111b..d993386892138757b67be09b4df8a822e39e4017 100644 (file)
@@ -197,7 +197,7 @@ static ssize_t gpio_mockup_event_write(struct file *file,
        struct seq_file *sfile;
        struct gpio_desc *desc;
        struct gpio_chip *gc;
-       int status, val;
+       int val;
        char buf;
 
        sfile = file->private_data;
@@ -206,9 +206,8 @@ static ssize_t gpio_mockup_event_write(struct file *file,
        chip = priv->chip;
        gc = &chip->gc;
 
-       status = copy_from_user(&buf, usr_buf, 1);
-       if (status)
-               return status;
+       if (copy_from_user(&buf, usr_buf, 1))
+               return -EFAULT;
 
        if (buf == '0')
                val = 0;
index 40a8881c2ce882bc1eef7eb59fff492afa6f378b..f1c6ec17b90a8352ecaf2e350aa8309a317925d8 100644 (file)
@@ -42,9 +42,7 @@ struct xgene_gpio {
        struct gpio_chip        chip;
        void __iomem            *base;
        spinlock_t              lock;
-#ifdef CONFIG_PM
        u32                     set_dr_val[XGENE_MAX_GPIO_BANKS];
-#endif
 };
 
 static int xgene_gpio_get(struct gpio_chip *gc, unsigned int offset)
@@ -138,8 +136,7 @@ static int xgene_gpio_dir_out(struct gpio_chip *gc,
        return 0;
 }
 
-#ifdef CONFIG_PM
-static int xgene_gpio_suspend(struct device *dev)
+static __maybe_unused int xgene_gpio_suspend(struct device *dev)
 {
        struct xgene_gpio *gpio = dev_get_drvdata(dev);
        unsigned long bank_offset;
@@ -152,7 +149,7 @@ static int xgene_gpio_suspend(struct device *dev)
        return 0;
 }
 
-static int xgene_gpio_resume(struct device *dev)
+static __maybe_unused int xgene_gpio_resume(struct device *dev)
 {
        struct xgene_gpio *gpio = dev_get_drvdata(dev);
        unsigned long bank_offset;
@@ -166,10 +163,6 @@ static int xgene_gpio_resume(struct device *dev)
 }
 
 static SIMPLE_DEV_PM_OPS(xgene_gpio_pm, xgene_gpio_suspend, xgene_gpio_resume);
-#define XGENE_GPIO_PM_OPS      (&xgene_gpio_pm)
-#else
-#define XGENE_GPIO_PM_OPS      NULL
-#endif
 
 static int xgene_gpio_probe(struct platform_device *pdev)
 {
@@ -241,7 +234,7 @@ static struct platform_driver xgene_gpio_driver = {
                .name = "xgene-gpio",
                .of_match_table = xgene_gpio_of_match,
                .acpi_match_table = ACPI_PTR(xgene_gpio_acpi_match),
-               .pm     = XGENE_GPIO_PM_OPS,
+               .pm     = &xgene_gpio_pm,
        },
        .probe = xgene_gpio_probe,
 };
index 8363cb57915b0b726c704b8be37805ecef2a18ee..8a08e81ee90d579774ca96bc70853093ba623f09 100644 (file)
@@ -3,6 +3,4 @@
 # of AMDSOC/AMDGPU drm driver.
 # It provides the HW control for ACP related functionalities.
 
-subdir-ccflags-y += -I$(AMDACPPATH)/ -I$(AMDACPPATH)/include
-
 AMD_ACP_FILES := $(AMDACPPATH)/acp_hw.o
index d2d0f60ff36d1f2fd4a80ef8b43d2d3d9737e1f9..99424cb8020bdf914b5627bffce01155ba8f6b73 100644 (file)
@@ -240,6 +240,8 @@ free_partial_kdata:
        for (; i >= 0; i--)
                drm_free_large(p->chunks[i].kdata);
        kfree(p->chunks);
+       p->chunks = NULL;
+       p->nchunks = 0;
 put_ctx:
        amdgpu_ctx_put(p->ctx);
 free_chunk:
index 4120b351a8e5cc856492ad628f4d0567614dfe57..a3a105ec99e2d797978c79355f6ea67d4d4df5b4 100644 (file)
@@ -2590,7 +2590,7 @@ static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf,
                use_bank = 0;
        }
 
-       *pos &= 0x3FFFF;
+       *pos &= (1UL << 22) - 1;
 
        if (use_bank) {
                if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) ||
@@ -2666,7 +2666,7 @@ static ssize_t amdgpu_debugfs_regs_write(struct file *f, const char __user *buf,
                use_bank = 0;
        }
 
-       *pos &= 0x3FFFF;
+       *pos &= (1UL << 22) - 1;
 
        if (use_bank) {
                if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) ||
index f55e45b52fbce2b658135bc5fc48b084332f811c..33b504bafb8824727f3ba60fe2b103608ffe61c0 100644 (file)
@@ -3464,6 +3464,12 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev,
                    (adev->pdev->device == 0x6667)) {
                        max_sclk = 75000;
                }
+       } else if (adev->asic_type == CHIP_OLAND) {
+               if ((adev->pdev->device == 0x6604) &&
+                   (adev->pdev->subsystem_vendor == 0x1028) &&
+                   (adev->pdev->subsystem_device == 0x066F)) {
+                       max_sclk = 75000;
+               }
        }
 
        if (rps->vce_active) {
index 50bdb24ef8d6e9f7e828ea661d873659beb3ce42..4a785d6acfb9afbde3b4f4b86116512134075759 100644 (file)
@@ -1051,7 +1051,7 @@ static int vi_common_early_init(void *handle)
                /* rev0 hardware requires workarounds to support PG */
                adev->pg_flags = 0;
                if (adev->rev_id != 0x00) {
-                       adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
+                       adev->pg_flags |=
                                AMD_PG_SUPPORT_GFX_SMG |
                                AMD_PG_SUPPORT_GFX_PIPELINE |
                                AMD_PG_SUPPORT_CP |
index 8cf71f3c6d0ea4706096222574c9d85871baba6c..261b828ad59086990f9f054906448a5526f4cbc4 100644 (file)
@@ -178,7 +178,7 @@ int smu7_powergate_vce(struct pp_hwmgr *hwmgr, bool bgate)
        if (bgate) {
                cgs_set_powergating_state(hwmgr->device,
                                                AMD_IP_BLOCK_TYPE_VCE,
-                                               AMD_PG_STATE_UNGATE);
+                                               AMD_PG_STATE_GATE);
                cgs_set_clockgating_state(hwmgr->device,
                                AMD_IP_BLOCK_TYPE_VCE,
                                AMD_CG_STATE_GATE);
index 08e6a71f5d05f412946496f39ee82303d19a56a4..294b53697334cc0855daa73925b8c58a19cf2222 100644 (file)
@@ -63,8 +63,7 @@ static void malidp_crtc_enable(struct drm_crtc *crtc)
 
        clk_prepare_enable(hwdev->pxlclk);
 
-       /* mclk needs to be set to the same or higher rate than pxlclk */
-       clk_set_rate(hwdev->mclk, crtc->state->adjusted_mode.crtc_clock * 1000);
+       /* We rely on firmware to set mclk to a sensible level. */
        clk_set_rate(hwdev->pxlclk, crtc->state->adjusted_mode.crtc_clock * 1000);
 
        hwdev->modeset(hwdev, &vm);
index 488aedf5b58d54e7997b2339c75b7a90f30dcfc1..9f5513006eeef8b4e54f6727b44b0e97562935d6 100644 (file)
@@ -83,7 +83,7 @@ static const struct malidp_layer malidp550_layers[] = {
        { DE_VIDEO1, MALIDP550_DE_LV1_BASE, MALIDP550_DE_LV1_PTR_BASE, MALIDP_DE_LV_STRIDE0 },
        { DE_GRAPHICS1, MALIDP550_DE_LG_BASE, MALIDP550_DE_LG_PTR_BASE, MALIDP_DE_LG_STRIDE },
        { DE_VIDEO2, MALIDP550_DE_LV2_BASE, MALIDP550_DE_LV2_PTR_BASE, MALIDP_DE_LV_STRIDE0 },
-       { DE_SMART, MALIDP550_DE_LS_BASE, MALIDP550_DE_LS_PTR_BASE, 0 },
+       { DE_SMART, MALIDP550_DE_LS_BASE, MALIDP550_DE_LS_PTR_BASE, MALIDP550_DE_LS_R1_STRIDE },
 };
 
 #define MALIDP_DE_DEFAULT_PREFETCH_START       5
index 414aada10fe5e7d43392aa835b4c01aba594bcb7..d5aec082294cbdde5a19986a5b1908aef974bb19 100644 (file)
@@ -37,6 +37,8 @@
 #define   LAYER_V_VAL(x)               (((x) & 0x1fff) << 16)
 #define MALIDP_LAYER_COMP_SIZE         0x010
 #define MALIDP_LAYER_OFFSET            0x014
+#define MALIDP550_LS_ENABLE            0x01c
+#define MALIDP550_LS_R1_IN_SIZE                0x020
 
 /*
  * This 4-entry look-up-table is used to determine the full 8-bit alpha value
@@ -242,6 +244,11 @@ static void malidp_de_plane_update(struct drm_plane *plane,
                        LAYER_V_VAL(plane->state->crtc_y),
                        mp->layer->base + MALIDP_LAYER_OFFSET);
 
+       if (mp->layer->id == DE_SMART)
+               malidp_hw_write(mp->hwdev,
+                               LAYER_H_VAL(src_w) | LAYER_V_VAL(src_h),
+                               mp->layer->base + MALIDP550_LS_R1_IN_SIZE);
+
        /* first clear the rotation bits */
        val = malidp_hw_read(mp->hwdev, mp->layer->base + MALIDP_LAYER_CONTROL);
        val &= ~LAYER_ROT_MASK;
@@ -330,9 +337,16 @@ int malidp_de_planes_init(struct drm_device *drm)
                plane->hwdev = malidp->dev;
                plane->layer = &map->layers[i];
 
-               /* Skip the features which the SMART layer doesn't have */
-               if (id == DE_SMART)
+               if (id == DE_SMART) {
+                       /*
+                        * Enable the first rectangle in the SMART layer to be
+                        * able to use it as a drm plane.
+                        */
+                       malidp_hw_write(malidp->dev, 1,
+                                       plane->layer->base + MALIDP550_LS_ENABLE);
+                       /* Skip the features which the SMART layer doesn't have. */
                        continue;
+               }
 
                drm_plane_create_rotation_property(&plane->base, DRM_ROTATE_0, flags);
                malidp_hw_write(malidp->dev, MALIDP_ALPHA_LUT,
index aff6d4a84e998c6cc1d01e3067d0f52712daa145..b816067a65c5727ab120000c5d5d080e022fee2c 100644 (file)
@@ -84,6 +84,7 @@
 /* Stride register offsets relative to Lx_BASE */
 #define MALIDP_DE_LG_STRIDE            0x18
 #define MALIDP_DE_LV_STRIDE0           0x18
+#define MALIDP550_DE_LS_R1_STRIDE      0x28
 
 /* macros to set values into registers */
 #define MALIDP_DE_H_FRONTPORCH(x)      (((x) & 0xfff) << 0)
index 0a4b42d313912c3c5b56a449cfac33e63afeb16e..7febe6eecf722ad4f89b8484b7d6ce31c4d84c58 100644 (file)
@@ -293,6 +293,7 @@ enum plane_id {
        PLANE_PRIMARY,
        PLANE_SPRITE0,
        PLANE_SPRITE1,
+       PLANE_SPRITE2,
        PLANE_CURSOR,
        I915_MAX_PLANES,
 };
index 6908123162d17cd998c1e7f0bf54a27064e67588..10777da730394f7a63a7c1a551e6cd40b132fd0d 100644 (file)
@@ -1434,6 +1434,12 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 
        trace_i915_gem_object_pwrite(obj, args->offset, args->size);
 
+       ret = -ENODEV;
+       if (obj->ops->pwrite)
+               ret = obj->ops->pwrite(obj, args);
+       if (ret != -ENODEV)
+               goto err;
+
        ret = i915_gem_object_wait(obj,
                                   I915_WAIT_INTERRUPTIBLE |
                                   I915_WAIT_ALL,
@@ -2119,6 +2125,7 @@ i915_gem_object_truncate(struct drm_i915_gem_object *obj)
         */
        shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
        obj->mm.madv = __I915_MADV_PURGED;
+       obj->mm.pages = ERR_PTR(-EFAULT);
 }
 
 /* Try to discard unwanted pages */
@@ -2218,7 +2225,9 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
 
        __i915_gem_object_reset_page_iter(obj);
 
-       obj->ops->put_pages(obj, pages);
+       if (!IS_ERR(pages))
+               obj->ops->put_pages(obj, pages);
+
 unlock:
        mutex_unlock(&obj->mm.lock);
 }
@@ -2437,7 +2446,7 @@ int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
        if (err)
                return err;
 
-       if (unlikely(!obj->mm.pages)) {
+       if (unlikely(IS_ERR_OR_NULL(obj->mm.pages))) {
                err = ____i915_gem_object_get_pages(obj);
                if (err)
                        goto unlock;
@@ -2515,7 +2524,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
 
        pinned = true;
        if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) {
-               if (unlikely(!obj->mm.pages)) {
+               if (unlikely(IS_ERR_OR_NULL(obj->mm.pages))) {
                        ret = ____i915_gem_object_get_pages(obj);
                        if (ret)
                                goto err_unlock;
@@ -2563,6 +2572,75 @@ err_unlock:
        goto out_unlock;
 }
 
+static int
+i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
+                          const struct drm_i915_gem_pwrite *arg)
+{
+       struct address_space *mapping = obj->base.filp->f_mapping;
+       char __user *user_data = u64_to_user_ptr(arg->data_ptr);
+       u64 remain, offset;
+       unsigned int pg;
+
+       /* Before we instantiate/pin the backing store for our use, we
+        * can prepopulate the shmemfs filp efficiently using a write into
+        * the pagecache. We avoid the penalty of instantiating all the
+        * pages, important if the user is just writing to a few and never
+        * uses the object on the GPU, and using a direct write into shmemfs
+        * allows it to avoid the cost of retrieving a page (either swapin
+        * or clearing-before-use) before it is overwritten.
+        */
+       if (READ_ONCE(obj->mm.pages))
+               return -ENODEV;
+
+       /* Before the pages are instantiated the object is treated as being
+        * in the CPU domain. The pages will be clflushed as required before
+        * use, and we can freely write into the pages directly. If userspace
+        * races pwrite with any other operation; corruption will ensue -
+        * that is userspace's prerogative!
+        */
+
+       remain = arg->size;
+       offset = arg->offset;
+       pg = offset_in_page(offset);
+
+       do {
+               unsigned int len, unwritten;
+               struct page *page;
+               void *data, *vaddr;
+               int err;
+
+               len = PAGE_SIZE - pg;
+               if (len > remain)
+                       len = remain;
+
+               err = pagecache_write_begin(obj->base.filp, mapping,
+                                           offset, len, 0,
+                                           &page, &data);
+               if (err < 0)
+                       return err;
+
+               vaddr = kmap(page);
+               unwritten = copy_from_user(vaddr + pg, user_data, len);
+               kunmap(page);
+
+               err = pagecache_write_end(obj->base.filp, mapping,
+                                         offset, len, len - unwritten,
+                                         page, data);
+               if (err < 0)
+                       return err;
+
+               if (unwritten)
+                       return -EFAULT;
+
+               remain -= len;
+               user_data += len;
+               offset += len;
+               pg = 0;
+       } while (remain);
+
+       return 0;
+}
+
 static bool ban_context(const struct i915_gem_context *ctx)
 {
        return (i915_gem_context_is_bannable(ctx) &&
@@ -3029,6 +3107,16 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
                args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start));
                if (args->timeout_ns < 0)
                        args->timeout_ns = 0;
+
+               /*
+                * Apparently ktime isn't accurate enough and occasionally has a
+                * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
+                * things up to make the test happy. We allow up to 1 jiffy.
+                *
+                * This is a regression from the timespec->ktime conversion.
+                */
+               if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns))
+                       args->timeout_ns = 0;
        }
 
        i915_gem_object_put(obj);
@@ -3974,8 +4062,11 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
        .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
                 I915_GEM_OBJECT_IS_SHRINKABLE,
+
        .get_pages = i915_gem_object_get_pages_gtt,
        .put_pages = i915_gem_object_put_pages_gtt,
+
+       .pwrite = i915_gem_object_pwrite_gtt,
 };
 
 struct drm_i915_gem_object *
index c181b1bb3d2c9e72addb040ee8a0d5a4b52f06c9..3be2503aa042c0c48cb2745ad26e9316a2409484 100644 (file)
@@ -293,12 +293,12 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
                 * those as well to make room for our guard pages.
                 */
                if (check_color) {
-                       if (vma->node.start + vma->node.size == node->start) {
-                               if (vma->node.color == node->color)
+                       if (node->start + node->size == target->start) {
+                               if (node->color == target->color)
                                        continue;
                        }
-                       if (vma->node.start == node->start + node->size) {
-                               if (vma->node.color == node->color)
+                       if (node->start == target->start + target->size) {
+                               if (node->color == target->color)
                                        continue;
                        }
                }
index bf90b07163d1266a6bb0c87f036e84fa78181991..76b80a0be79767be189c94694434c338c1f97e6a 100644 (file)
@@ -54,6 +54,9 @@ struct drm_i915_gem_object_ops {
        struct sg_table *(*get_pages)(struct drm_i915_gem_object *);
        void (*put_pages)(struct drm_i915_gem_object *, struct sg_table *);
 
+       int (*pwrite)(struct drm_i915_gem_object *,
+                     const struct drm_i915_gem_pwrite *);
+
        int (*dmabuf_export)(struct drm_i915_gem_object *);
        void (*release)(struct drm_i915_gem_object *);
 };
index 155906e848120ae2e1de533d81658080c546888d..df20e9bc1c0f3dee67eb555ae20741d907a6b430 100644 (file)
@@ -512,10 +512,36 @@ err_unpin:
        return ret;
 }
 
+static void
+i915_vma_remove(struct i915_vma *vma)
+{
+       struct drm_i915_gem_object *obj = vma->obj;
+
+       GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
+       GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
+
+       drm_mm_remove_node(&vma->node);
+       list_move_tail(&vma->vm_link, &vma->vm->unbound_list);
+
+       /* Since the unbound list is global, only move to that list if
+        * no more VMAs exist.
+        */
+       if (--obj->bind_count == 0)
+               list_move_tail(&obj->global_link,
+                              &to_i915(obj->base.dev)->mm.unbound_list);
+
+       /* And finally now the object is completely decoupled from this vma,
+        * we can drop its hold on the backing storage and allow it to be
+        * reaped by the shrinker.
+        */
+       i915_gem_object_unpin_pages(obj);
+       GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count) < obj->bind_count);
+}
+
 int __i915_vma_do_pin(struct i915_vma *vma,
                      u64 size, u64 alignment, u64 flags)
 {
-       unsigned int bound = vma->flags;
+       const unsigned int bound = vma->flags;
        int ret;
 
        lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
@@ -524,18 +550,18 @@ int __i915_vma_do_pin(struct i915_vma *vma,
 
        if (WARN_ON(bound & I915_VMA_PIN_OVERFLOW)) {
                ret = -EBUSY;
-               goto err;
+               goto err_unpin;
        }
 
        if ((bound & I915_VMA_BIND_MASK) == 0) {
                ret = i915_vma_insert(vma, size, alignment, flags);
                if (ret)
-                       goto err;
+                       goto err_unpin;
        }
 
        ret = i915_vma_bind(vma, vma->obj->cache_level, flags);
        if (ret)
-               goto err;
+               goto err_remove;
 
        if ((bound ^ vma->flags) & I915_VMA_GLOBAL_BIND)
                __i915_vma_set_map_and_fenceable(vma);
@@ -544,7 +570,12 @@ int __i915_vma_do_pin(struct i915_vma *vma,
        GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags));
        return 0;
 
-err:
+err_remove:
+       if ((bound & I915_VMA_BIND_MASK) == 0) {
+               GEM_BUG_ON(vma->pages);
+               i915_vma_remove(vma);
+       }
+err_unpin:
        __i915_vma_unpin(vma);
        return ret;
 }
@@ -657,9 +688,6 @@ int i915_vma_unbind(struct i915_vma *vma)
        }
        vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND);
 
-       drm_mm_remove_node(&vma->node);
-       list_move_tail(&vma->vm_link, &vma->vm->unbound_list);
-
        if (vma->pages != obj->mm.pages) {
                GEM_BUG_ON(!vma->pages);
                sg_free_table(vma->pages);
@@ -667,18 +695,7 @@ int i915_vma_unbind(struct i915_vma *vma)
        }
        vma->pages = NULL;
 
-       /* Since the unbound list is global, only move to that list if
-        * no more VMAs exist. */
-       if (--obj->bind_count == 0)
-               list_move_tail(&obj->global_link,
-                              &to_i915(obj->base.dev)->mm.unbound_list);
-
-       /* And finally now the object is completely decoupled from this vma,
-        * we can drop its hold on the backing storage and allow it to be
-        * reaped by the shrinker.
-        */
-       i915_gem_object_unpin_pages(obj);
-       GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count) < obj->bind_count);
+       i915_vma_remove(vma);
 
 destroy:
        if (unlikely(i915_vma_is_closed(vma)))
index 01341670738fbb118d8402bbda62d7234c8c3863..3282b0f4b13412162bfc8500576ab507eac36d14 100644 (file)
@@ -3669,10 +3669,6 @@ static void intel_update_pipe_config(struct intel_crtc *crtc,
        /* drm_atomic_helper_update_legacy_modeset_state might not be called. */
        crtc->base.mode = crtc->base.state->mode;
 
-       DRM_DEBUG_KMS("Updating pipe size %ix%i -> %ix%i\n",
-                     old_crtc_state->pipe_src_w, old_crtc_state->pipe_src_h,
-                     pipe_config->pipe_src_w, pipe_config->pipe_src_h);
-
        /*
         * Update pipe size and adjust fitter if needed: the reason for this is
         * that in compute_mode_changes we check the native mode (not the pfit
@@ -4796,23 +4792,17 @@ static void skylake_pfit_enable(struct intel_crtc *crtc)
        struct intel_crtc_scaler_state *scaler_state =
                &crtc->config->scaler_state;
 
-       DRM_DEBUG_KMS("for crtc_state = %p\n", crtc->config);
-
        if (crtc->config->pch_pfit.enabled) {
                int id;
 
-               if (WARN_ON(crtc->config->scaler_state.scaler_id < 0)) {
-                       DRM_ERROR("Requesting pfit without getting a scaler first\n");
+               if (WARN_ON(crtc->config->scaler_state.scaler_id < 0))
                        return;
-               }
 
                id = scaler_state->scaler_id;
                I915_WRITE(SKL_PS_CTRL(pipe, id), PS_SCALER_EN |
                        PS_FILTER_MEDIUM | scaler_state->scalers[id].mode);
                I915_WRITE(SKL_PS_WIN_POS(pipe, id), crtc->config->pch_pfit.pos);
                I915_WRITE(SKL_PS_WIN_SZ(pipe, id), crtc->config->pch_pfit.size);
-
-               DRM_DEBUG_KMS("for crtc_state = %p scaler_id = %d\n", crtc->config, id);
        }
 }
 
@@ -14379,6 +14369,24 @@ static void skl_update_crtcs(struct drm_atomic_state *state,
        } while (progress);
 }
 
+static void intel_atomic_helper_free_state(struct drm_i915_private *dev_priv)
+{
+       struct intel_atomic_state *state, *next;
+       struct llist_node *freed;
+
+       freed = llist_del_all(&dev_priv->atomic_helper.free_list);
+       llist_for_each_entry_safe(state, next, freed, freed)
+               drm_atomic_state_put(&state->base);
+}
+
+static void intel_atomic_helper_free_state_worker(struct work_struct *work)
+{
+       struct drm_i915_private *dev_priv =
+               container_of(work, typeof(*dev_priv), atomic_helper.free_work);
+
+       intel_atomic_helper_free_state(dev_priv);
+}
+
 static void intel_atomic_commit_tail(struct drm_atomic_state *state)
 {
        struct drm_device *dev = state->dev;
@@ -14545,6 +14553,8 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state)
         * can happen also when the device is completely off.
         */
        intel_uncore_arm_unclaimed_mmio_detection(dev_priv);
+
+       intel_atomic_helper_free_state(dev_priv);
 }
 
 static void intel_atomic_commit_work(struct work_struct *work)
@@ -14946,17 +14956,19 @@ static void intel_begin_crtc_commit(struct drm_crtc *crtc,
                to_intel_atomic_state(old_crtc_state->state);
        bool modeset = needs_modeset(crtc->state);
 
+       if (!modeset &&
+           (intel_cstate->base.color_mgmt_changed ||
+            intel_cstate->update_pipe)) {
+               intel_color_set_csc(crtc->state);
+               intel_color_load_luts(crtc->state);
+       }
+
        /* Perform vblank evasion around commit operation */
        intel_pipe_update_start(intel_crtc);
 
        if (modeset)
                goto out;
 
-       if (crtc->state->color_mgmt_changed || to_intel_crtc_state(crtc->state)->update_pipe) {
-               intel_color_set_csc(crtc->state);
-               intel_color_load_luts(crtc->state);
-       }
-
        if (intel_cstate->update_pipe)
                intel_update_pipe_config(intel_crtc, old_intel_cstate);
        else if (INTEL_GEN(dev_priv) >= 9)
@@ -16599,18 +16611,6 @@ fail:
        drm_modeset_acquire_fini(&ctx);
 }
 
-static void intel_atomic_helper_free_state(struct work_struct *work)
-{
-       struct drm_i915_private *dev_priv =
-               container_of(work, typeof(*dev_priv), atomic_helper.free_work);
-       struct intel_atomic_state *state, *next;
-       struct llist_node *freed;
-
-       freed = llist_del_all(&dev_priv->atomic_helper.free_list);
-       llist_for_each_entry_safe(state, next, freed, freed)
-               drm_atomic_state_put(&state->base);
-}
-
 int intel_modeset_init(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = to_i915(dev);
@@ -16631,7 +16631,7 @@ int intel_modeset_init(struct drm_device *dev)
        dev->mode_config.funcs = &intel_mode_funcs;
 
        INIT_WORK(&dev_priv->atomic_helper.free_work,
-                 intel_atomic_helper_free_state);
+                 intel_atomic_helper_free_state_worker);
 
        intel_init_quirks(dev);
 
index 1b8ba2e77539577f5eb997f9e1eb315f1f7ae078..2d449fb5d1d2b02dc016ebb50a026733b50acbf3 100644 (file)
@@ -357,14 +357,13 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper,
                                    bool *enabled, int width, int height)
 {
        struct drm_i915_private *dev_priv = to_i915(fb_helper->dev);
-       unsigned long conn_configured, mask;
+       unsigned long conn_configured, conn_seq, mask;
        unsigned int count = min(fb_helper->connector_count, BITS_PER_LONG);
        int i, j;
        bool *save_enabled;
        bool fallback = true;
        int num_connectors_enabled = 0;
        int num_connectors_detected = 0;
-       int pass = 0;
 
        save_enabled = kcalloc(count, sizeof(bool), GFP_KERNEL);
        if (!save_enabled)
@@ -374,6 +373,7 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper,
        mask = BIT(count) - 1;
        conn_configured = 0;
 retry:
+       conn_seq = conn_configured;
        for (i = 0; i < count; i++) {
                struct drm_fb_helper_connector *fb_conn;
                struct drm_connector *connector;
@@ -387,7 +387,7 @@ retry:
                if (conn_configured & BIT(i))
                        continue;
 
-               if (pass == 0 && !connector->has_tile)
+               if (conn_seq == 0 && !connector->has_tile)
                        continue;
 
                if (connector->status == connector_status_connected)
@@ -498,10 +498,8 @@ retry:
                conn_configured |= BIT(i);
        }
 
-       if ((conn_configured & mask) != mask) {
-               pass++;
+       if ((conn_configured & mask) != mask && conn_configured != conn_seq)
                goto retry;
-       }
 
        /*
         * If the BIOS didn't enable everything it could, fall back to have the
index 249623d45be0caa3e891e8a272706dff84dbc4be..940bab22d4649b848259a28f74ec5e77d6715fb6 100644 (file)
@@ -4891,6 +4891,12 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
                break;
        }
 
+       /* When byt can survive without system hang with dynamic
+        * sw freq adjustments, this restriction can be lifted.
+        */
+       if (IS_VALLEYVIEW(dev_priv))
+               goto skip_hw_write;
+
        I915_WRITE(GEN6_RP_UP_EI,
                   GT_INTERVAL_FROM_US(dev_priv, ei_up));
        I915_WRITE(GEN6_RP_UP_THRESHOLD,
@@ -4911,6 +4917,7 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
                   GEN6_RP_UP_BUSY_AVG |
                   GEN6_RP_DOWN_IDLE_AVG);
 
+skip_hw_write:
        dev_priv->rps.power = new_power;
        dev_priv->rps.up_threshold = threshold_up;
        dev_priv->rps.down_threshold = threshold_down;
@@ -7916,10 +7923,10 @@ static bool skl_pcode_try_request(struct drm_i915_private *dev_priv, u32 mbox,
  * @timeout_base_ms: timeout for polling with preemption enabled
  *
  * Keep resending the @request to @mbox until PCODE acknowledges it, PCODE
- * reports an error or an overall timeout of @timeout_base_ms+10 ms expires.
+ * reports an error or an overall timeout of @timeout_base_ms+50 ms expires.
  * The request is acknowledged once the PCODE reply dword equals @reply after
  * applying @reply_mask. Polling is first attempted with preemption enabled
- * for @timeout_base_ms and if this times out for another 10 ms with
+ * for @timeout_base_ms and if this times out for another 50 ms with
  * preemption disabled.
  *
  * Returns 0 on success, %-ETIMEDOUT in case of a timeout, <0 in case of some
@@ -7955,14 +7962,15 @@ int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request,
         * worst case) _and_ PCODE was busy for some reason even after a
         * (queued) request and @timeout_base_ms delay. As a workaround retry
         * the poll with preemption disabled to maximize the number of
-        * requests. Increase the timeout from @timeout_base_ms to 10ms to
+        * requests. Increase the timeout from @timeout_base_ms to 50ms to
         * account for interrupts that could reduce the number of these
-        * requests.
+        * requests, and for any quirks of the PCODE firmware that delays
+        * the request completion.
         */
        DRM_DEBUG_KMS("PCODE timeout, retrying with preemption disabled\n");
        WARN_ON_ONCE(timeout_base_ms > 3);
        preempt_disable();
-       ret = wait_for_atomic(COND, 10);
+       ret = wait_for_atomic(COND, 50);
        preempt_enable();
 
 out:
index 9ef54688872a86a70ab020a64b7209e040de70e0..9481ca9a3ae7e0a342957baf655a34f570a51eae 100644 (file)
@@ -254,9 +254,6 @@ skl_update_plane(struct drm_plane *drm_plane,
                int scaler_id = plane_state->scaler_id;
                const struct intel_scaler *scaler;
 
-               DRM_DEBUG_KMS("plane = %d PS_PLANE_SEL(plane) = 0x%x\n",
-                             plane_id, PS_PLANE_SEL(plane_id));
-
                scaler = &crtc_state->scaler_state.scalers[scaler_id];
 
                I915_WRITE(SKL_PS_CTRL(pipe, scaler_id),
index abe08885a5ba4ef1726d67809544534cf35a57df..b7ff592b14f5e00d68ff1cf6440dd45d6959606d 100644 (file)
@@ -119,6 +119,8 @@ fw_domains_get(struct drm_i915_private *dev_priv, enum forcewake_domains fw_doma
 
        for_each_fw_domain_masked(d, fw_domains, dev_priv)
                fw_domain_wait_ack(d);
+
+       dev_priv->uncore.fw_domains_active |= fw_domains;
 }
 
 static void
@@ -130,6 +132,8 @@ fw_domains_put(struct drm_i915_private *dev_priv, enum forcewake_domains fw_doma
                fw_domain_put(d);
                fw_domain_posting_read(d);
        }
+
+       dev_priv->uncore.fw_domains_active &= ~fw_domains;
 }
 
 static void
@@ -240,10 +244,8 @@ intel_uncore_fw_release_timer(struct hrtimer *timer)
        if (WARN_ON(domain->wake_count == 0))
                domain->wake_count++;
 
-       if (--domain->wake_count == 0) {
+       if (--domain->wake_count == 0)
                dev_priv->uncore.funcs.force_wake_put(dev_priv, domain->mask);
-               dev_priv->uncore.fw_domains_active &= ~domain->mask;
-       }
 
        spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
 
@@ -454,10 +456,8 @@ static void __intel_uncore_forcewake_get(struct drm_i915_private *dev_priv,
                        fw_domains &= ~domain->mask;
        }
 
-       if (fw_domains) {
+       if (fw_domains)
                dev_priv->uncore.funcs.force_wake_get(dev_priv, fw_domains);
-               dev_priv->uncore.fw_domains_active |= fw_domains;
-       }
 }
 
 /**
@@ -968,7 +968,6 @@ static noinline void ___force_wake_auto(struct drm_i915_private *dev_priv,
                fw_domain_arm_timer(domain);
 
        dev_priv->uncore.funcs.force_wake_get(dev_priv, fw_domains);
-       dev_priv->uncore.fw_domains_active |= fw_domains;
 }
 
 static inline void __force_wake_auto(struct drm_i915_private *dev_priv,
index af267c35d813cc7548f060ef5771d6cd4232b4c9..ee5883f59be5a1992c6bdd20c751285079f5d3c1 100644 (file)
@@ -147,9 +147,6 @@ static int omap_gem_dmabuf_mmap(struct dma_buf *buffer,
        struct drm_gem_object *obj = buffer->priv;
        int ret = 0;
 
-       if (WARN_ON(!obj->filp))
-               return -EINVAL;
-
        ret = drm_gem_mmap_obj(obj, omap_gem_mmap_size(obj), vma);
        if (ret < 0)
                return ret;
index d12b8978142f69b52e19a159f9a628080f7a18e5..72e1588580a1187f8ba0c05fe62fbaf1cde550d1 100644 (file)
@@ -2984,6 +2984,12 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev,
                    (rdev->pdev->device == 0x6667)) {
                        max_sclk = 75000;
                }
+       } else if (rdev->family == CHIP_OLAND) {
+               if ((rdev->pdev->device == 0x6604) &&
+                   (rdev->pdev->subsystem_vendor == 0x1028) &&
+                   (rdev->pdev->subsystem_device == 0x066F)) {
+                       max_sclk = 75000;
+               }
        }
 
        if (rps->vce_active) {
index f80bf9385e412db766424bf00cacd76458a64a8e..d745e8b50fb86458d09e400f5c35c9d257f4de2b 100644 (file)
@@ -464,6 +464,7 @@ static void tilcdc_crtc_enable(struct drm_crtc *crtc)
 {
        struct drm_device *dev = crtc->dev;
        struct tilcdc_crtc *tilcdc_crtc = to_tilcdc_crtc(crtc);
+       unsigned long flags;
 
        WARN_ON(!drm_modeset_is_locked(&crtc->mutex));
        mutex_lock(&tilcdc_crtc->enable_lock);
@@ -484,7 +485,17 @@ static void tilcdc_crtc_enable(struct drm_crtc *crtc)
        tilcdc_write_mask(dev, LCDC_RASTER_CTRL_REG,
                          LCDC_PALETTE_LOAD_MODE(DATA_ONLY),
                          LCDC_PALETTE_LOAD_MODE_MASK);
+
+       /* There is no real chance for a race here as the time stamp
+        * is taken before the raster DMA is started. The spin-lock is
+        * taken to have a memory barrier after taking the time-stamp
+        * and to avoid a context switch between taking the stamp and
+        * enabling the raster.
+        */
+       spin_lock_irqsave(&tilcdc_crtc->irq_lock, flags);
+       tilcdc_crtc->last_vblank = ktime_get();
        tilcdc_set(dev, LCDC_RASTER_CTRL_REG, LCDC_RASTER_ENABLE);
+       spin_unlock_irqrestore(&tilcdc_crtc->irq_lock, flags);
 
        drm_crtc_vblank_on(crtc);
 
@@ -539,7 +550,6 @@ static void tilcdc_crtc_off(struct drm_crtc *crtc, bool shutdown)
        }
 
        drm_flip_work_commit(&tilcdc_crtc->unref_work, priv->wq);
-       tilcdc_crtc->last_vblank = 0;
 
        tilcdc_crtc->enabled = false;
        mutex_unlock(&tilcdc_crtc->enable_lock);
@@ -602,7 +612,6 @@ int tilcdc_crtc_update_fb(struct drm_crtc *crtc,
 {
        struct tilcdc_crtc *tilcdc_crtc = to_tilcdc_crtc(crtc);
        struct drm_device *dev = crtc->dev;
-       unsigned long flags;
 
        WARN_ON(!drm_modeset_is_locked(&crtc->mutex));
 
@@ -614,28 +623,30 @@ int tilcdc_crtc_update_fb(struct drm_crtc *crtc,
        drm_framebuffer_reference(fb);
 
        crtc->primary->fb = fb;
+       tilcdc_crtc->event = event;
 
-       spin_lock_irqsave(&tilcdc_crtc->irq_lock, flags);
+       mutex_lock(&tilcdc_crtc->enable_lock);
 
-       if (crtc->hwmode.vrefresh && ktime_to_ns(tilcdc_crtc->last_vblank)) {
+       if (tilcdc_crtc->enabled) {
+               unsigned long flags;
                ktime_t next_vblank;
                s64 tdiff;
 
-               next_vblank = ktime_add_us(tilcdc_crtc->last_vblank,
-                       1000000 / crtc->hwmode.vrefresh);
+               spin_lock_irqsave(&tilcdc_crtc->irq_lock, flags);
 
+               next_vblank = ktime_add_us(tilcdc_crtc->last_vblank,
+                                          1000000 / crtc->hwmode.vrefresh);
                tdiff = ktime_to_us(ktime_sub(next_vblank, ktime_get()));
 
                if (tdiff < TILCDC_VBLANK_SAFETY_THRESHOLD_US)
                        tilcdc_crtc->next_fb = fb;
-       }
-
-       if (tilcdc_crtc->next_fb != fb)
-               set_scanout(crtc, fb);
+               else
+                       set_scanout(crtc, fb);
 
-       tilcdc_crtc->event = event;
+               spin_unlock_irqrestore(&tilcdc_crtc->irq_lock, flags);
+       }
 
-       spin_unlock_irqrestore(&tilcdc_crtc->irq_lock, flags);
+       mutex_unlock(&tilcdc_crtc->enable_lock);
 
        return 0;
 }
@@ -1036,5 +1047,5 @@ int tilcdc_crtc_create(struct drm_device *dev)
 
 fail:
        tilcdc_crtc_destroy(crtc);
-       return -ENOMEM;
+       return ret;
 }
index 1aeb80e5242461830f1d4075f0fb59bcb6ddc898..8c54cb8f5d6d1013ec1f4a39e8f88fcfe3333758 100644 (file)
@@ -175,11 +175,11 @@ config HID_CHERRY
        Support for Cherry Cymotion keyboard.
 
 config HID_CHICONY
-       tristate "Chicony Tactical pad"
+       tristate "Chicony devices"
        depends on HID
        default !EXPERT
        ---help---
-       Support for Chicony Tactical pad.
+       Support for Chicony Tactical pad and special keys on Chicony keyboards.
 
 config HID_CORSAIR
        tristate "Corsair devices"
@@ -190,6 +190,7 @@ config HID_CORSAIR
 
        Supported devices:
        - Vengeance K90
+       - Scimitar PRO RGB
 
 config HID_PRODIKEYS
        tristate "Prodikeys PC-MIDI Keyboard support"
index bc3cec199feefdf437d0c0141c5ff6f73aa10308..f04ed9aabc3f9fea0baf5b074acd83b6d07527c6 100644 (file)
@@ -86,6 +86,7 @@ static const struct hid_device_id ch_devices[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS2) },
        { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_AK1D) },
        { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_ACER_SWITCH12) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_JESS, USB_DEVICE_ID_JESS_ZEN_AIO_KBD) },
        { }
 };
 MODULE_DEVICE_TABLE(hid, ch_devices);
index e9e87d337446918f672771551f41041755d83d22..3ceb4a2af381f03d7f51b40cfe4d262be774de48 100644 (file)
@@ -1870,6 +1870,7 @@ static const struct hid_device_id hid_have_special_driver[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_AK1D) },
        { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_ACER_SWITCH12) },
        { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K90) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB) },
        { HID_USB_DEVICE(USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_PRODIKEYS_PCMIDI) },
        { HID_USB_DEVICE(USB_VENDOR_ID_CYGNAL, USB_DEVICE_ID_CYGNAL_CP2112) },
        { HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_BARCODE_1) },
@@ -1910,6 +1911,7 @@ static const struct hid_device_id hid_have_special_driver[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A081) },
        { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A0C2) },
        { HID_USB_DEVICE(USB_VENDOR_ID_HUION, USB_DEVICE_ID_HUION_TABLET) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_JESS, USB_DEVICE_ID_JESS_ZEN_AIO_KBD) },
        { HID_USB_DEVICE(USB_VENDOR_ID_JESS2, USB_DEVICE_ID_JESS2_COLOR_RUMBLE_PAD) },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ION, USB_DEVICE_ID_ICADE) },
        { HID_USB_DEVICE(USB_VENDOR_ID_KENSINGTON, USB_DEVICE_ID_KS_SLIMBLADE) },
index c0303f61c26a94f1998f6883d42a0fc8cb41f432..9ba5d98a118042a52dc40b895c3b2e8df67c0b39 100644 (file)
@@ -3,8 +3,10 @@
  *
  * Supported devices:
  *  - Vengeance K90 Keyboard
+ *  - Scimitar PRO RGB Gaming Mouse
  *
  * Copyright (c) 2015 Clement Vuchener
+ * Copyright (c) 2017 Oscar Campos
  */
 
 /*
@@ -670,10 +672,51 @@ static int corsair_input_mapping(struct hid_device *dev,
        return 0;
 }
 
+/*
+ * The report descriptor of Corsair Scimitar RGB Pro gaming mouse is
+ * non parseable as they define two consecutive Logical Minimum for
+ * the Usage Page (Consumer) in rdescs bytes 75 and 77 being 77 0x16
+ * that should be obviousy 0x26 for Logical Magimum of 16 bits. This
+ * prevents poper parsing of the report descriptor due Logical
+ * Minimum being larger than Logical Maximum.
+ *
+ * This driver fixes the report descriptor for:
+ * - USB ID b1c:1b3e, sold as Scimitar RGB Pro Gaming mouse
+ */
+
+static __u8 *corsair_mouse_report_fixup(struct hid_device *hdev, __u8 *rdesc,
+        unsigned int *rsize)
+{
+       struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
+
+       if (intf->cur_altsetting->desc.bInterfaceNumber == 1) {
+               /*
+                * Corsair Scimitar RGB Pro report descriptor is broken and
+                * defines two different Logical Minimum for the Consumer
+                * Application. The byte 77 should be a 0x26 defining a 16
+                * bits integer for the Logical Maximum but it is a 0x16
+                * instead (Logical Minimum)
+                */
+               switch (hdev->product) {
+               case USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB:
+                       if (*rsize >= 172 && rdesc[75] == 0x15 && rdesc[77] == 0x16
+                       && rdesc[78] == 0xff && rdesc[79] == 0x0f) {
+                               hid_info(hdev, "Fixing up report descriptor\n");
+                               rdesc[77] = 0x26;
+                       }
+                       break;
+               }
+
+       }
+       return rdesc;
+}
+
 static const struct hid_device_id corsair_devices[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K90),
                .driver_data = CORSAIR_USE_K90_MACRO |
                               CORSAIR_USE_K90_BACKLIGHT },
+       { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR,
+            USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB) },
        {}
 };
 
@@ -686,10 +729,14 @@ static struct hid_driver corsair_driver = {
        .event = corsair_event,
        .remove = corsair_remove,
        .input_mapping = corsair_input_mapping,
+       .report_fixup = corsair_mouse_report_fixup,
 };
 
 module_hid_driver(corsair_driver);
 
 MODULE_LICENSE("GPL");
+/* Original K90 driver author */
 MODULE_AUTHOR("Clement Vuchener");
+/* Scimitar PRO RGB driver author */
+MODULE_AUTHOR("Oscar Campos");
 MODULE_DESCRIPTION("HID driver for Corsair devices");
index 86c95d30ac801f2895caef97a575955289d352a4..0e2e7c571d2261a148baec5bcddeb5cc7aa75e56 100644 (file)
 #define USB_DEVICE_ID_CORSAIR_K70RGB    0x1b13
 #define USB_DEVICE_ID_CORSAIR_STRAFE    0x1b15
 #define USB_DEVICE_ID_CORSAIR_K65RGB    0x1b17
+#define USB_DEVICE_ID_CORSAIR_K70RGB_RAPIDFIRE  0x1b38
+#define USB_DEVICE_ID_CORSAIR_K65RGB_RAPIDFIRE  0x1b39
+#define USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB  0x1b3e
 
 #define USB_VENDOR_ID_CREATIVELABS     0x041e
 #define USB_DEVICE_ID_CREATIVE_SB_OMNI_SURROUND_51     0x322c
 
 #define USB_VENDOR_ID_JESS             0x0c45
 #define USB_DEVICE_ID_JESS_YUREX       0x1010
+#define USB_DEVICE_ID_JESS_ZEN_AIO_KBD 0x5112
 
 #define USB_VENDOR_ID_JESS2            0x0f30
 #define USB_DEVICE_ID_JESS2_COLOR_RUMBLE_PAD 0x0111
index f405b07d03816506215bd19fe3c878393370484a..740996f9bdd49dde3d26659f68d2addbff291c60 100644 (file)
@@ -2632,6 +2632,8 @@ err_stop:
                sony_leds_remove(sc);
        if (sc->quirks & SONY_BATTERY_SUPPORT)
                sony_battery_remove(sc);
+       if (sc->touchpad)
+               sony_unregister_touchpad(sc);
        sony_cancel_work_sync(sc);
        kfree(sc->output_report_dmabuf);
        sony_remove_dev_list(sc);
index d6847a664446529831395a962aacab7cb49ab8f5..a69a3c88ab29f5fd736ad18a358fc185f63be99c 100644 (file)
@@ -80,6 +80,9 @@ static const struct hid_blacklist {
        { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K70RGB, HID_QUIRK_NO_INIT_REPORTS },
        { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB, HID_QUIRK_NO_INIT_REPORTS },
        { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_STRAFE, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL },
+       { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K70RGB_RAPIDFIRE, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL },
+       { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB_RAPIDFIRE, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL },
+       { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL },
        { USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_CREATIVE_SB_OMNI_SURROUND_51, HID_QUIRK_NOGET },
        { USB_VENDOR_ID_DMI, USB_DEVICE_ID_DMI_ENC, HID_QUIRK_NOGET },
        { USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_WIIU, HID_QUIRK_MULTI_INPUT },
index be8f7e2a026f428f51200e395792dd715a612eeb..994bddc55b82272c52d6c3224828f75a42d24dbe 100644 (file)
@@ -2579,7 +2579,9 @@ static void wacom_remove(struct hid_device *hdev)
 
        /* make sure we don't trigger the LEDs */
        wacom_led_groups_release(wacom);
-       wacom_release_resources(wacom);
+
+       if (wacom->wacom_wac.features.type != REMOTE)
+               wacom_release_resources(wacom);
 
        hid_set_drvdata(hdev, NULL);
 }
index 4aa3de9f1163b30eb64b4304f285a4167aef0cf0..94250c293be2a18b247e2be006a0e7e4faf4f6f8 100644 (file)
@@ -1959,8 +1959,10 @@ static void wacom_wac_pen_usage_mapping(struct hid_device *hdev,
                input_set_capability(input, EV_KEY, BTN_TOOL_BRUSH);
                input_set_capability(input, EV_KEY, BTN_TOOL_PENCIL);
                input_set_capability(input, EV_KEY, BTN_TOOL_AIRBRUSH);
-               input_set_capability(input, EV_KEY, BTN_TOOL_MOUSE);
-               input_set_capability(input, EV_KEY, BTN_TOOL_LENS);
+               if (!(features->device_type & WACOM_DEVICETYPE_DIRECT)) {
+                       input_set_capability(input, EV_KEY, BTN_TOOL_MOUSE);
+                       input_set_capability(input, EV_KEY, BTN_TOOL_LENS);
+               }
                break;
        case WACOM_HID_WD_FINGERWHEEL:
                wacom_map_usage(input, usage, field, EV_ABS, ABS_WHEEL, 0);
@@ -4197,10 +4199,10 @@ static const struct wacom_features wacom_features_0x343 =
          WACOM_DTU_OFFSET, WACOM_DTU_OFFSET };
 static const struct wacom_features wacom_features_0x360 =
        { "Wacom Intuos Pro M", 44800, 29600, 8191, 63,
-         INTUOSP2_BT, WACOM_INTUOS_RES, WACOM_INTUOS_RES, 9, .touch_max = 10 };
+         INTUOSP2_BT, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES, 9, .touch_max = 10 };
 static const struct wacom_features wacom_features_0x361 =
        { "Wacom Intuos Pro L", 62200, 43200, 8191, 63,
-         INTUOSP2_BT, WACOM_INTUOS_RES, WACOM_INTUOS_RES, 9, .touch_max = 10 };
+         INTUOSP2_BT, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES, 9, .touch_max = 10 };
 
 static const struct wacom_features wacom_features_HID_ANY_ID =
        { "Wacom HID", .type = HID_GENERIC, .oVid = HID_ANY_ID, .oPid = HID_ANY_ID };
index 87799e81af97697cb4879acb227a30ea0bf792bd..c3f1a9e33cef0852a947295fefcb765ffed80a88 100644 (file)
@@ -32,6 +32,8 @@
 
 #include "hyperv_vmbus.h"
 
+#define VMBUS_PKT_TRAILER      8
+
 /*
  * When we write to the ring buffer, check if the host needs to
  * be signaled. Here is the details of this protocol:
@@ -336,6 +338,12 @@ int hv_ringbuffer_write(struct vmbus_channel *channel,
        return 0;
 }
 
+static inline void
+init_cached_read_index(struct hv_ring_buffer_info *rbi)
+{
+       rbi->cached_read_index = rbi->ring_buffer->read_index;
+}
+
 int hv_ringbuffer_read(struct vmbus_channel *channel,
                       void *buffer, u32 buflen, u32 *buffer_actual_len,
                       u64 *requestid, bool raw)
@@ -366,7 +374,8 @@ int hv_ringbuffer_read(struct vmbus_channel *channel,
                return ret;
        }
 
-       init_cached_read_index(channel);
+       init_cached_read_index(inring_info);
+
        next_read_location = hv_get_next_read_location(inring_info);
        next_read_location = hv_copyfrom_ringbuffer(inring_info, &desc,
                                                    sizeof(desc),
@@ -410,3 +419,86 @@ int hv_ringbuffer_read(struct vmbus_channel *channel,
 
        return ret;
 }
+
+/*
+ * Determine number of bytes available in ring buffer after
+ * the current iterator (priv_read_index) location.
+ *
+ * This is similar to hv_get_bytes_to_read but with private
+ * read index instead.
+ */
+static u32 hv_pkt_iter_avail(const struct hv_ring_buffer_info *rbi)
+{
+       u32 priv_read_loc = rbi->priv_read_index;
+       u32 write_loc = READ_ONCE(rbi->ring_buffer->write_index);
+
+       if (write_loc >= priv_read_loc)
+               return write_loc - priv_read_loc;
+       else
+               return (rbi->ring_datasize - priv_read_loc) + write_loc;
+}
+
+/*
+ * Get first vmbus packet from ring buffer after read_index
+ *
+ * If ring buffer is empty, returns NULL and no other action needed.
+ */
+struct vmpacket_descriptor *hv_pkt_iter_first(struct vmbus_channel *channel)
+{
+       struct hv_ring_buffer_info *rbi = &channel->inbound;
+
+       /* set state for later hv_signal_on_read() */
+       init_cached_read_index(rbi);
+
+       if (hv_pkt_iter_avail(rbi) < sizeof(struct vmpacket_descriptor))
+               return NULL;
+
+       return hv_get_ring_buffer(rbi) + rbi->priv_read_index;
+}
+EXPORT_SYMBOL_GPL(hv_pkt_iter_first);
+
+/*
+ * Get next vmbus packet from ring buffer.
+ *
+ * Advances the current location (priv_read_index) and checks for more
+ * data. If the end of the ring buffer is reached, then return NULL.
+ */
+struct vmpacket_descriptor *
+__hv_pkt_iter_next(struct vmbus_channel *channel,
+                  const struct vmpacket_descriptor *desc)
+{
+       struct hv_ring_buffer_info *rbi = &channel->inbound;
+       u32 packetlen = desc->len8 << 3;
+       u32 dsize = rbi->ring_datasize;
+
+       /* bump offset to next potential packet */
+       rbi->priv_read_index += packetlen + VMBUS_PKT_TRAILER;
+       if (rbi->priv_read_index >= dsize)
+               rbi->priv_read_index -= dsize;
+
+       /* more data? */
+       if (hv_pkt_iter_avail(rbi) < sizeof(struct vmpacket_descriptor))
+               return NULL;
+       else
+               return hv_get_ring_buffer(rbi) + rbi->priv_read_index;
+}
+EXPORT_SYMBOL_GPL(__hv_pkt_iter_next);
+
+/*
+ * Update host ring buffer after iterating over packets.
+ */
+void hv_pkt_iter_close(struct vmbus_channel *channel)
+{
+       struct hv_ring_buffer_info *rbi = &channel->inbound;
+
+       /*
+        * Make sure all reads are done before we update the read index since
+        * the writer may start writing to the read area once the read index
+        * is updated.
+        */
+       virt_rmb();
+       rbi->ring_buffer->read_index = rbi->priv_read_index;
+
+       hv_signal_on_read(channel);
+}
+EXPORT_SYMBOL_GPL(hv_pkt_iter_close);
index 85acd0843b503807e92eafb26bce1cf60abc55a7..3f9e56e8b3796d3f225e89c67df130d32d55d583 100644 (file)
@@ -36,6 +36,7 @@
 
 #include <linux/netdevice.h>
 #include <linux/inetdevice.h>
+#include <linux/interrupt.h>
 #include <linux/spinlock.h>
 #include <linux/kernel.h>
 #include <linux/delay.h>
index b9b47e5cc8b3bde5a053107c0ba6a4997754c1ee..ced0461d6e9ff822633d60505d4b390450f7a082 100644 (file)
@@ -587,9 +587,8 @@ void qedr_affiliated_event(void *context, u8 e_code, void *fw_handle)
 #define EVENT_TYPE_CQ          1
 #define EVENT_TYPE_QP          2
        struct qedr_dev *dev = (struct qedr_dev *)context;
-       union event_ring_data *data = fw_handle;
-       u64 roce_handle64 = ((u64)data->roce_handle.hi << 32) +
-                           data->roce_handle.lo;
+       struct regpair *async_handle = (struct regpair *)fw_handle;
+       u64 roce_handle64 = ((u64) async_handle->hi << 32) + async_handle->lo;
        u8 event_type = EVENT_TYPE_NOT_DEFINED;
        struct ib_event event;
        struct ib_cq *ibcq;
index bb32e4792ec9f022d201c0585bcce7a7cbae179c..5cb9195513bdd4c754bf7e19b2f0c7d0ee30dc08 100644 (file)
@@ -38,7 +38,8 @@
 #include <linux/qed/qed_chain.h>
 #include <linux/qed/qed_roce_if.h>
 #include <linux/qed/qede_roce.h>
-#include "qedr_hsi.h"
+#include <linux/qed/roce_common.h>
+#include "qedr_hsi_rdma.h"
 
 #define QEDR_MODULE_VERSION    "8.10.10.0"
 #define QEDR_NODE_DESC "QLogic 579xx RoCE HCA"
index 699632893dd9842c3a3b47153f631ae85765a531..a6280ce3e2a54c6cef7617c79862680e425f81d4 100644 (file)
 #include <rdma/ib_addr.h>
 #include <rdma/ib_cache.h>
 
-#include "qedr_hsi.h"
 #include <linux/qed/qed_if.h>
 #include <linux/qed/qed_roce_if.h>
 #include "qedr.h"
-#include "qedr_hsi.h"
 #include "verbs.h"
 #include <rdma/qedr-abi.h>
-#include "qedr_hsi.h"
 #include "qedr_cm.h"
 
 void qedr_inc_sw_gsi_cons(struct qedr_qp_hwq_info *info)
diff --git a/drivers/infiniband/hw/qedr/qedr_hsi.h b/drivers/infiniband/hw/qedr/qedr_hsi.h
deleted file mode 100644 (file)
index 66d2752..0000000
+++ /dev/null
@@ -1,56 +0,0 @@
-/* QLogic qedr NIC Driver
- * Copyright (c) 2015-2016  QLogic Corporation
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and /or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __QED_HSI_ROCE__
-#define __QED_HSI_ROCE__
-
-#include <linux/qed/common_hsi.h>
-#include <linux/qed/roce_common.h>
-#include "qedr_hsi_rdma.h"
-
-/* Affiliated asynchronous events / errors enumeration */
-enum roce_async_events_type {
-       ROCE_ASYNC_EVENT_NONE = 0,
-       ROCE_ASYNC_EVENT_COMM_EST = 1,
-       ROCE_ASYNC_EVENT_SQ_DRAINED,
-       ROCE_ASYNC_EVENT_SRQ_LIMIT,
-       ROCE_ASYNC_EVENT_LAST_WQE_REACHED,
-       ROCE_ASYNC_EVENT_CQ_ERR,
-       ROCE_ASYNC_EVENT_LOCAL_INVALID_REQUEST_ERR,
-       ROCE_ASYNC_EVENT_LOCAL_CATASTROPHIC_ERR,
-       ROCE_ASYNC_EVENT_LOCAL_ACCESS_ERR,
-       ROCE_ASYNC_EVENT_QP_CATASTROPHIC_ERR,
-       ROCE_ASYNC_EVENT_CQ_OVERFLOW_ERR,
-       ROCE_ASYNC_EVENT_SRQ_EMPTY,
-       MAX_ROCE_ASYNC_EVENTS_TYPE
-};
-
-#endif /* __QED_HSI_ROCE__ */
index 6b3bb32803bd8661d9efebd14dcefff0b601f6f3..2091902848e6c47bb1f3dfdc4cc08cec99386dc0 100644 (file)
@@ -43,7 +43,8 @@
 #include <rdma/ib_addr.h>
 #include <rdma/ib_cache.h>
 
-#include "qedr_hsi.h"
+#include <linux/qed/common_hsi.h>
+#include "qedr_hsi_rdma.h"
 #include <linux/qed/qed_if.h>
 #include "qedr.h"
 #include "verbs.h"
index 50749a70c5cacb99ee2dfbb0a1957b6baf336247..060d357f107f8c7720b08e6791dc352893c08fff 100644 (file)
@@ -157,10 +157,8 @@ int cf_command(int drvid, int mode,
        /* allocate mem for information struct */
        if (!(cs = kmalloc(sizeof(struct call_struc), GFP_ATOMIC)))
                return (-ENOMEM); /* no memory */
-       init_timer(&cs->timer);
+       setup_timer(&cs->timer, deflect_timer_expire, (ulong)cs);
        cs->info[0] = '\0';
-       cs->timer.function = deflect_timer_expire;
-       cs->timer.data = (ulong) cs; /* pointer to own structure */
        cs->ics.driver = drvid;
        cs->ics.command = ISDN_CMD_PROT_IO; /* protocol specific io */
        cs->ics.arg = DSS1_CMD_INVOKE; /* invoke supplementary service */
@@ -452,10 +450,9 @@ static int isdn_divert_icall(isdn_ctrl *ic)
                                        return (0); /* no external deflection needed */
                        if (!(cs = kmalloc(sizeof(struct call_struc), GFP_ATOMIC)))
                                return (0); /* no memory */
-                       init_timer(&cs->timer);
+                       setup_timer(&cs->timer, deflect_timer_expire,
+                                   (ulong)cs);
                        cs->info[0] = '\0';
-                       cs->timer.function = deflect_timer_expire;
-                       cs->timer.data = (ulong) cs; /* pointer to own structure */
 
                        cs->ics = *ic; /* copy incoming data */
                        if (!cs->ics.parm.setup.phone[0]) strcpy(cs->ics.parm.setup.phone, "0");
index cb88090f9cea3036af14b8e8bcaa9eaeb4d58434..c61049585cbd7b67f24e057238244b8938500a9e 100644 (file)
@@ -300,9 +300,8 @@ static int um_idi_open_adapter(struct file *file, int adapter_nr)
        p_os = (diva_um_idi_os_context_t *) diva_um_id_get_os_context(e);
        init_waitqueue_head(&p_os->read_wait);
        init_waitqueue_head(&p_os->close_wait);
-       init_timer(&p_os->diva_timer_id);
-       p_os->diva_timer_id.function = (void *) diva_um_timer_function;
-       p_os->diva_timer_id.data = (unsigned long) p_os;
+       setup_timer(&p_os->diva_timer_id, (void *)diva_um_timer_function,
+                   (unsigned long)p_os);
        p_os->aborted = 0;
        p_os->adapter_nr = adapter_nr;
        return (1);
index 09df54fc1fef2162bf06228dddc431f2f7a9feeb..fda912b0833ff9dad7c6e04898487074f094b2a3 100644 (file)
@@ -13,7 +13,7 @@ config MISDN_HFCPCI
 
 config MISDN_HFCMULTI
        tristate "Support for HFC multiport cards (HFC-4S/8S/E1)"
-       depends on PCI || 8xx
+       depends on PCI || CPM1
        depends on MISDN
        help
          Enable support for cards with Cologne Chip AG's HFC multiport
@@ -27,8 +27,8 @@ config MISDN_HFCMULTI_8xx
        bool "Support for XHFC embedded board in HFC multiport driver"
        depends on MISDN
        depends on MISDN_HFCMULTI
-       depends on 8xx
-       default 8xx
+       depends on CPM1
+       default CPM1
        help
          Enable support for the XHFC embedded solution from Speech Design.
 
index 0eafe9f04fca326a40f6e5bb549b78420bd39e26..8a254747768e9de4181d6b133da05b292830e0ae 100644 (file)
@@ -6,7 +6,7 @@
  *
  */
 
-#include <asm/8xx_immap.h>
+#include <asm/cpm1.h>
 
 /* Change this to the value used by your board */
 #ifndef IMAP_ADDR
index 480c2d7794ebdcfbcb38667a0807a156a1b2fac3..961c07ee47b7337005a2997b557b0539aebff2fa 100644 (file)
@@ -3878,9 +3878,8 @@ hfcmulti_initmode(struct dchannel *dch)
                if (hc->dnum[pt]) {
                        mode_hfcmulti(hc, dch->slot, dch->dev.D.protocol,
                                      -1, 0, -1, 0);
-                       dch->timer.function = (void *) hfcmulti_dbusy_timer;
-                       dch->timer.data = (long) dch;
-                       init_timer(&dch->timer);
+                       setup_timer(&dch->timer, (void *)hfcmulti_dbusy_timer,
+                                   (long)dch);
                }
                for (i = 1; i <= 31; i++) {
                        if (!((1 << i) & hc->bmask[pt])) /* skip unused chan */
@@ -3986,9 +3985,8 @@ hfcmulti_initmode(struct dchannel *dch)
                hc->chan[i].slot_rx = -1;
                hc->chan[i].conf = -1;
                mode_hfcmulti(hc, i, dch->dev.D.protocol, -1, 0, -1, 0);
-               dch->timer.function = (void *) hfcmulti_dbusy_timer;
-               dch->timer.data = (long) dch;
-               init_timer(&dch->timer);
+               setup_timer(&dch->timer, (void *)hfcmulti_dbusy_timer,
+                           (long)dch);
                hc->chan[i - 2].slot_tx = -1;
                hc->chan[i - 2].slot_rx = -1;
                hc->chan[i - 2].conf = -1;
index ff48da61c94c849bf06cbb9ab9cb149515dcd626..5dc246d71c167d5a69f449603cbd4a329a85fa9c 100644 (file)
@@ -1717,9 +1717,8 @@ static void
 inithfcpci(struct hfc_pci *hc)
 {
        printk(KERN_DEBUG "inithfcpci: entered\n");
-       hc->dch.timer.function = (void *) hfcpci_dbusy_timer;
-       hc->dch.timer.data = (long) &hc->dch;
-       init_timer(&hc->dch.timer);
+       setup_timer(&hc->dch.timer, (void *)hfcpci_dbusy_timer,
+                   (long)&hc->dch);
        hc->chanlimit = 2;
        mode_hfcpci(&hc->bch[0], 1, -1);
        mode_hfcpci(&hc->bch[1], 2, -1);
@@ -2044,9 +2043,7 @@ setup_hw(struct hfc_pci *hc)
        Write_hfc(hc, HFCPCI_INT_M1, hc->hw.int_m1);
        /* At this point the needed PCI config is done */
        /* fifos are still not enabled */
-       hc->hw.timer.function = (void *) hfcpci_Timer;
-       hc->hw.timer.data = (long) hc;
-       init_timer(&hc->hw.timer);
+       setup_timer(&hc->hw.timer, (void *)hfcpci_Timer, (long)hc);
        /* default PCM master */
        test_and_set_bit(HFC_CFG_MASTER, &hc->cfg);
        return 0;
index 77dec28ba874c7f220944afc07e6ebcb67fa00a0..6742b0dc082115df347955c77c619e5a47ffb2d6 100644 (file)
@@ -796,9 +796,8 @@ isac_init(struct isac_hw *isac)
        }
        isac->mon_tx = NULL;
        isac->mon_rx = NULL;
-       isac->dch.timer.function = (void *) dbusy_timer_handler;
-       isac->dch.timer.data = (long)isac;
-       init_timer(&isac->dch.timer);
+       setup_timer(&isac->dch.timer, (void *)dbusy_timer_handler,
+                   (long)isac);
        isac->mocr = 0xaa;
        if (isac->type & IPAC_TYPE_ISACX) {
                /* Disable all IRQ */
index feafa91c2ed99088c0958059e0de6c04dd832128..5b078591b6ee846455ac25642c25c8d932bde99d 100644 (file)
@@ -1635,13 +1635,11 @@ init_isar(struct isar_hw *isar)
        }
        if (isar->version != 1)
                return -EINVAL;
-       isar->ch[0].ftimer.function = &ftimer_handler;
-       isar->ch[0].ftimer.data = (long)&isar->ch[0];
-       init_timer(&isar->ch[0].ftimer);
+       setup_timer(&isar->ch[0].ftimer, &ftimer_handler,
+                   (long)&isar->ch[0]);
        test_and_set_bit(FLG_INITIALIZED, &isar->ch[0].bch.Flags);
-       isar->ch[1].ftimer.function = &ftimer_handler;
-       isar->ch[1].ftimer.data = (long)&isar->ch[1];
-       init_timer(&isar->ch[1].ftimer);
+       setup_timer(&isar->ch[1].ftimer, &ftimer_handler,
+                   (long)&isar->ch[1]);
        test_and_set_bit(FLG_INITIALIZED, &isar->ch[1].bch.Flags);
        return 0;
 }
index 3b067ea656bd9260d0172d7e2387d602d459d170..3052c836b89f70bc441520e439635a6e91dd2248 100644 (file)
@@ -852,9 +852,8 @@ static void initW6692(struct w6692_hw *card)
 {
        u8      val;
 
-       card->dch.timer.function = (void *)dbusy_timer_handler;
-       card->dch.timer.data = (u_long)&card->dch;
-       init_timer(&card->dch.timer);
+       setup_timer(&card->dch.timer, (void *)dbusy_timer_handler,
+                   (u_long)&card->dch);
        w6692_mode(&card->bc[0], ISDN_P_NONE);
        w6692_mode(&card->bc[1], ISDN_P_NONE);
        WriteW6692(card, W_D_CTL, 0x00);
index 36817e0a0b9465df6d1f9f8c4eee7cb0e5d24157..3a4c2f9e19e9adf81c4d93eb7847ed8b549745e2 100644 (file)
@@ -789,7 +789,5 @@ void Amd7930_init(struct IsdnCardState *cs)
 void setup_Amd7930(struct IsdnCardState *cs)
 {
        INIT_WORK(&cs->tqueue, Amd7930_bh);
-       cs->dbusytimer.function = (void *) dbusy_timer_handler;
-       cs->dbusytimer.data = (long) cs;
-       init_timer(&cs->dbusytimer);
+       setup_timer(&cs->dbusytimer, (void *)dbusy_timer_handler, (long)cs);
 }
index 29ec2dfbd155521022438964f24ea91ff333926d..9826bad49e2c1f60f11108661ecb610bcaf878b2 100644 (file)
@@ -125,9 +125,7 @@ clear_arcofi(struct IsdnCardState *cs) {
 
 void
 init_arcofi(struct IsdnCardState *cs) {
-       cs->dc.isac.arcofitimer.function = (void *) arcofi_timer;
-       cs->dc.isac.arcofitimer.data = (long) cs;
-       init_timer(&cs->dc.isac.arcofitimer);
+       setup_timer(&cs->dc.isac.arcofitimer, (void *)arcofi_timer, (long)cs);
        init_waitqueue_head(&cs->dc.isac.arcofi_wait);
        test_and_set_bit(HW_ARCOFI, &cs->HW_Flags);
 }
index 4fc90de68d18a46941cfd8c629dd5f6624600173..079336e593f95a2970b1b43606ee92b4b7d7ae0f 100644 (file)
@@ -976,9 +976,8 @@ static int setup_diva_common(struct IsdnCardState *cs)
                printk(KERN_INFO "Diva: IPACX Design Id: %x\n",
                       MemReadISAC_IPACX(cs, IPACX_ID) & 0x3F);
        } else { /* DIVA 2.0 */
-               cs->hw.diva.tl.function = (void *) diva_led_handler;
-               cs->hw.diva.tl.data = (long) cs;
-               init_timer(&cs->hw.diva.tl);
+               setup_timer(&cs->hw.diva.tl, (void *)diva_led_handler,
+                           (long)cs);
                cs->readisac  = &ReadISAC;
                cs->writeisac = &WriteISAC;
                cs->readisacfifo  = &ReadISACfifo;
index d8ef64da26f1fe6e0a25c5515401079ed19ff3cb..03bc5d504e2266774ab866c695a52546d72816b4 100644 (file)
@@ -1147,9 +1147,7 @@ static int setup_elsa_common(struct IsdnCard *card)
        init_arcofi(cs);
 #endif
        setup_isac(cs);
-       cs->hw.elsa.tl.function = (void *) elsa_led_handler;
-       cs->hw.elsa.tl.data = (long) cs;
-       init_timer(&cs->hw.elsa.tl);
+       setup_timer(&cs->hw.elsa.tl, (void *)elsa_led_handler, (long)cs);
        /* Teste Timer */
        if (cs->hw.elsa.timer) {
                byteout(cs->hw.elsa.trig, 0xff);
index c7a94713e9ec98fae0c36d61ffdc40cd27884d70..d63266fa8cbdb93bf37e630af6663f511292786d 100644 (file)
@@ -98,13 +98,11 @@ void
 FsmInitTimer(struct FsmInst *fi, struct FsmTimer *ft)
 {
        ft->fi = fi;
-       ft->tl.function = (void *) FsmExpireTimer;
-       ft->tl.data = (long) ft;
 #if FSM_TIMER_DEBUG
        if (ft->fi->debug)
                ft->fi->printdebug(ft->fi, "FsmInitTimer %lx", (long) ft);
 #endif
-       init_timer(&ft->tl);
+       setup_timer(&ft->tl, (void *)FsmExpireTimer, (long)ft);
 }
 
 void
index e034ed847ff32ca1fcf0a55e66eebe2171e3d32f..90f051ce02590f3e70ee2e1227d7aa507441a24c 100644 (file)
@@ -1396,9 +1396,8 @@ setup_instance(hfc4s8s_hw *hw)
                l1p = hw->l1 + i;
                spin_lock_init(&l1p->lock);
                l1p->hw = hw;
-               l1p->l1_timer.function = (void *) hfc_l1_timer;
-               l1p->l1_timer.data = (long) (l1p);
-               init_timer(&l1p->l1_timer);
+               setup_timer(&l1p->l1_timer, (void *)hfc_l1_timer,
+                           (long)(l1p));
                l1p->st_num = i;
                skb_queue_head_init(&l1p->d_tx_queue);
                l1p->d_if.ifc.priv = hw->l1 + i;
index a756e5cb6871cbbee83810875bfa29ed8ca93529..ad8597a1a07efd75e1979054fdcda31ecab8b7a6 100644 (file)
@@ -1073,8 +1073,6 @@ set_cs_func(struct IsdnCardState *cs)
        cs->writeisacfifo = &dummyf;
        cs->BC_Read_Reg = &ReadReg;
        cs->BC_Write_Reg = &WriteReg;
-       cs->dbusytimer.function = (void *) hfc_dbusy_timer;
-       cs->dbusytimer.data = (long) cs;
-       init_timer(&cs->dbusytimer);
+       setup_timer(&cs->dbusytimer, (void *)hfc_dbusy_timer, (long)cs);
        INIT_WORK(&cs->tqueue, hfcd_bh);
 }
index 90449e1e91e5a27924da01df6215dfa6c326651e..f9ca35cc32b135bc2ae8d31b7f2c87c05d5cfb73 100644 (file)
@@ -1582,9 +1582,7 @@ inithfcpci(struct IsdnCardState *cs)
        cs->bcs[1].BC_SetStack = setstack_2b;
        cs->bcs[0].BC_Close = close_hfcpci;
        cs->bcs[1].BC_Close = close_hfcpci;
-       cs->dbusytimer.function = (void *) hfcpci_dbusy_timer;
-       cs->dbusytimer.data = (long) cs;
-       init_timer(&cs->dbusytimer);
+       setup_timer(&cs->dbusytimer, (void *)hfcpci_dbusy_timer, (long)cs);
        mode_hfcpci(cs->bcs, 0, 0);
        mode_hfcpci(cs->bcs + 1, 0, 1);
 }
@@ -1746,9 +1744,7 @@ setup_hfcpci(struct IsdnCard *card)
        cs->BC_Write_Reg = NULL;
        cs->irq_func = &hfcpci_interrupt;
        cs->irq_flags |= IRQF_SHARED;
-       cs->hw.hfcpci.timer.function = (void *) hfcpci_Timer;
-       cs->hw.hfcpci.timer.data = (long) cs;
-       init_timer(&cs->hw.hfcpci.timer);
+       setup_timer(&cs->hw.hfcpci.timer, (void *)hfcpci_Timer, (long)cs);
        cs->cardmsg = &hfcpci_card_msg;
        cs->auxcmd = &hfcpci_auxcmd;
 
index 13b2151c10f54ff9fd5bcec76e1ceef2b99aa472..3aef8e1a90e4455c6b1aa1e5eb935c95cd018210 100644 (file)
@@ -1495,9 +1495,7 @@ int setup_hfcsx(struct IsdnCard *card)
        } else
                return (0);     /* no valid card type */
 
-       cs->dbusytimer.function = (void *) hfcsx_dbusy_timer;
-       cs->dbusytimer.data = (long) cs;
-       init_timer(&cs->dbusytimer);
+       setup_timer(&cs->dbusytimer, (void *)hfcsx_dbusy_timer, (long)cs);
        INIT_WORK(&cs->tqueue, hfcsx_bh);
        cs->readisac = NULL;
        cs->writeisac = NULL;
@@ -1507,11 +1505,9 @@ int setup_hfcsx(struct IsdnCard *card)
        cs->BC_Write_Reg = NULL;
        cs->irq_func = &hfcsx_interrupt;
 
-       cs->hw.hfcsx.timer.function = (void *) hfcsx_Timer;
-       cs->hw.hfcsx.timer.data = (long) cs;
        cs->hw.hfcsx.b_fifo_size = 0; /* fifo size still unknown */
        cs->hw.hfcsx.cirm = ccd_sp_irqtab[cs->irq & 0xF]; /* RAM not evaluated */
-       init_timer(&cs->hw.hfcsx.timer);
+       setup_timer(&cs->hw.hfcsx.timer, (void *)hfcsx_Timer, (long)cs);
 
        reset_hfcsx(cs);
        cs->cardmsg = &hfcsx_card_msg;
index 678bd5224bc338a2767a106fb8057f544f218050..6dbd1f1da14f1e8752caadf6c9f2ca192cc63a3d 100644 (file)
@@ -1165,14 +1165,10 @@ hfc_usb_init(hfcusb_data *hfc)
        hfc->old_led_state = 0;
 
        /* init the t3 timer */
-       init_timer(&hfc->t3_timer);
-       hfc->t3_timer.data = (long) hfc;
-       hfc->t3_timer.function = (void *) l1_timer_expire_t3;
+       setup_timer(&hfc->t3_timer, (void *)l1_timer_expire_t3, (long)hfc);
 
        /* init the t4 timer */
-       init_timer(&hfc->t4_timer);
-       hfc->t4_timer.data = (long) hfc;
-       hfc->t4_timer.function = (void *) l1_timer_expire_t4;
+       setup_timer(&hfc->t4_timer, (void *)l1_timer_expire_t4, (long)hfc);
 
        /* init the background machinery for control requests */
        hfc->ctrl_read.bRequestType = 0xc0;
index 394da646e97b7316e618f054519095f83c338caa..467287096918a6661c93ab552a1759fb83fd45c8 100644 (file)
@@ -253,9 +253,7 @@ int setup_hfcs(struct IsdnCard *card)
                outb(0x57, cs->hw.hfcD.addr | 1);
        }
        set_cs_func(cs);
-       cs->hw.hfcD.timer.function = (void *) hfcs_Timer;
-       cs->hw.hfcD.timer.data = (long) cs;
-       init_timer(&cs->hw.hfcD.timer);
+       setup_timer(&cs->hw.hfcD.timer, (void *)hfcs_Timer, (long)cs);
        cs->cardmsg = &hfcs_card_msg;
        cs->irq_func = &hfcs_interrupt;
        return (1);
index 96d1df05044fb48ffceb988dd90540db9f125cdd..c7c3797a817ebecc285248985fbb4dedc50d6493 100644 (file)
@@ -676,7 +676,5 @@ clear_pending_icc_ints(struct IsdnCardState *cs)
 void setup_icc(struct IsdnCardState *cs)
 {
        INIT_WORK(&cs->tqueue, icc_bh);
-       cs->dbusytimer.function = (void *) dbusy_timer_handler;
-       cs->dbusytimer.data = (long) cs;
-       init_timer(&cs->dbusytimer);
+       setup_timer(&cs->dbusytimer, (void *)dbusy_timer_handler, (long)cs);
 }
index 9cc26b40a43771dee4d670f2416711897bc9ac1a..43effe7082ed9bba9537648eef9d950a0e172785 100644 (file)
@@ -424,9 +424,7 @@ dch_init(struct IsdnCardState *cs)
 
        cs->setstack_d      = dch_setstack;
 
-       cs->dbusytimer.function = (void *) dbusy_timer_handler;
-       cs->dbusytimer.data = (long) cs;
-       init_timer(&cs->dbusytimer);
+       setup_timer(&cs->dbusytimer, (void *)dbusy_timer_handler, (long)cs);
 
        cs->writeisac(cs, IPACX_TR_CONF0, 0x00);  // clear LDD
        cs->writeisac(cs, IPACX_TR_CONF2, 0x00);  // enable transmitter
index df7e05ca8f9c197acf94040606555fcd45a51067..4273b4548825136624a3687cae20b971fa3e0244 100644 (file)
@@ -677,7 +677,5 @@ void clear_pending_isac_ints(struct IsdnCardState *cs)
 void setup_isac(struct IsdnCardState *cs)
 {
        INIT_WORK(&cs->tqueue, isac_bh);
-       cs->dbusytimer.function = (void *) dbusy_timer_handler;
-       cs->dbusytimer.data = (long) cs;
-       init_timer(&cs->dbusytimer);
+       setup_timer(&cs->dbusytimer, (void *)dbusy_timer_handler, (long)cs);
 }
index f4956c73aa116de71a99a9ae705c81088f3fbed2..0dc60b287c4b000b94f3bb97183ad10bfc1254b9 100644 (file)
@@ -1902,10 +1902,8 @@ void initisar(struct IsdnCardState *cs)
        cs->bcs[1].BC_SetStack = setstack_isar;
        cs->bcs[0].BC_Close = close_isarstate;
        cs->bcs[1].BC_Close = close_isarstate;
-       cs->bcs[0].hw.isar.ftimer.function = (void *) ftimer_handler;
-       cs->bcs[0].hw.isar.ftimer.data = (long) &cs->bcs[0];
-       init_timer(&cs->bcs[0].hw.isar.ftimer);
-       cs->bcs[1].hw.isar.ftimer.function = (void *) ftimer_handler;
-       cs->bcs[1].hw.isar.ftimer.data = (long) &cs->bcs[1];
-       init_timer(&cs->bcs[1].hw.isar.ftimer);
+       setup_timer(&cs->bcs[0].hw.isar.ftimer, (void *)ftimer_handler,
+                   (long)&cs->bcs[0]);
+       setup_timer(&cs->bcs[1].hw.isar.ftimer, (void *)ftimer_handler,
+                   (long)&cs->bcs[1]);
 }
index c754706f83cdc190ca18e299896590430e7b9824..569ce52c567b2beb9f42097bebe88af025d6dbed 100644 (file)
@@ -169,9 +169,7 @@ void
 L3InitTimer(struct l3_process *pc, struct L3Timer *t)
 {
        t->pc = pc;
-       t->tl.function = (void *) L3ExpireTimer;
-       t->tl.data = (long) t;
-       init_timer(&t->tl);
+       setup_timer(&t->tl, (void *)L3ExpireTimer, (long)t);
 }
 
 void
index bf647545c70c45b9bc5b0e381af27f67f17a3662..950399f066ef109cc558f09e438dc729a0843643 100644 (file)
@@ -278,9 +278,7 @@ int setup_TeleInt(struct IsdnCard *card)
        cs->bcs[0].hw.hfc.send = NULL;
        cs->bcs[1].hw.hfc.send = NULL;
        cs->hw.hfc.fifosize = 7 * 1024 + 512;
-       cs->hw.hfc.timer.function = (void *) TeleInt_Timer;
-       cs->hw.hfc.timer.data = (long) cs;
-       init_timer(&cs->hw.hfc.timer);
+       setup_timer(&cs->hw.hfc.timer, (void *)TeleInt_Timer, (long)cs);
        if (!request_region(cs->hw.hfc.addr, 2, "TeleInt isdn")) {
                printk(KERN_WARNING
                       "HiSax: TeleInt config port %x-%x already in use\n",
index a85895585d906a6367db5cce95b00f031484d05d..c99f0ec58a0189885cdfdea5ae612f73e30c5925 100644 (file)
@@ -901,9 +901,8 @@ static void initW6692(struct IsdnCardState *cs, int part)
        if (part & 1) {
                cs->setstack_d = setstack_W6692;
                cs->DC_Close = DC_Close_W6692;
-               cs->dbusytimer.function = (void *) dbusy_timer_handler;
-               cs->dbusytimer.data = (long) cs;
-               init_timer(&cs->dbusytimer);
+               setup_timer(&cs->dbusytimer, (void *)dbusy_timer_handler,
+                           (long)cs);
                resetW6692(cs);
                ph_command(cs, W_L1CMD_RST);
                cs->dc.w6692.ph_state = W_L1CMD_RST;
index 9c1e8adaf4fc825c54ff84e9c85d36a68ecb5da7..d07dd5196ffca59c11532051fb88e2ecdc7326c9 100644 (file)
@@ -2370,9 +2370,8 @@ static struct ippp_ccp_reset_state *isdn_ppp_ccp_reset_alloc_state(struct ippp_s
                rs->state = CCPResetIdle;
                rs->is = is;
                rs->id = id;
-               init_timer(&rs->timer);
-               rs->timer.data = (unsigned long)rs;
-               rs->timer.function = isdn_ppp_ccp_timer_callback;
+               setup_timer(&rs->timer, isdn_ppp_ccp_timer_callback,
+                           (unsigned long)rs);
                is->reset->rs[id] = rs;
        }
        return rs;
index 1b169559a240b0c41b9fb8c34d4837ea55d96f77..ddd8207e4e54cf617e76a2f784bc56aeff76b22e 100644 (file)
@@ -1812,9 +1812,8 @@ isdn_tty_modem_init(void)
                info->isdn_channel = -1;
                info->drv_index = -1;
                info->xmit_size = ISDN_SERIAL_XMIT_SIZE;
-               init_timer(&info->nc_timer);
-               info->nc_timer.function = isdn_tty_modem_do_ncarrier;
-               info->nc_timer.data = (unsigned long) info;
+               setup_timer(&info->nc_timer, isdn_tty_modem_do_ncarrier,
+                           (unsigned long)info);
                skb_queue_head_init(&info->xmit_queue);
 #ifdef CONFIG_ISDN_AUDIO
                skb_queue_head_init(&info->dtmf_queue);
index 9b85295aa6578f5ac5c86803e0923f29404b2e13..880e9d367a399ae6efc450a4eca70623717b8f51 100644 (file)
@@ -1092,9 +1092,7 @@ dspcreate(struct channel_req *crq)
        ndsp->pcm_bank_tx = -1;
        ndsp->hfc_conf = -1; /* current conference number */
        /* set tone timer */
-       ndsp->tone.tl.function = (void *)dsp_tone_timeout;
-       ndsp->tone.tl.data = (long) ndsp;
-       init_timer(&ndsp->tone.tl);
+       setup_timer(&ndsp->tone.tl, (void *)dsp_tone_timeout, (long)ndsp);
 
        if (dtmfthreshold < 20 || dtmfthreshold > 500)
                dtmfthreshold = 200;
index 26477d48bbda99cba2697e93f075703047098bab..78fc5d5e90514353b258658da350a85c408b89db 100644 (file)
@@ -110,13 +110,11 @@ void
 mISDN_FsmInitTimer(struct FsmInst *fi, struct FsmTimer *ft)
 {
        ft->fi = fi;
-       ft->tl.function = (void *) FsmExpireTimer;
-       ft->tl.data = (long) ft;
 #if FSM_TIMER_DEBUG
        if (ft->fi->debug)
                ft->fi->printdebug(ft->fi, "mISDN_FsmInitTimer %lx", (long) ft);
 #endif
-       init_timer(&ft->tl);
+       setup_timer(&ft->tl, (void *)FsmExpireTimer, (long)ft);
 }
 EXPORT_SYMBOL(mISDN_FsmInitTimer);
 
index 6ceca7db62ad42c91c10561a2e6f3330ddbefa2a..6be2041248d34832e0ee9af75d753b96c1ca8df2 100644 (file)
@@ -1443,9 +1443,7 @@ init_card(struct l1oip *hc, int pri, int bundle)
        hc->keep_tl.expires = jiffies + 2 * HZ; /* two seconds first time */
        add_timer(&hc->keep_tl);
 
-       hc->timeout_tl.function = (void *)l1oip_timeout;
-       hc->timeout_tl.data = (ulong)hc;
-       init_timer(&hc->timeout_tl);
+       setup_timer(&hc->timeout_tl, (void *)l1oip_timeout, (ulong)hc);
        hc->timeout_on = 0; /* state that we have timer off */
 
        return 0;
index f4ffd1eb8f44c3d5c44c50277fb703545157dbcf..dfb75979e4555d806ea52a494e161d4c6f8fa86b 100644 (file)
@@ -989,26 +989,29 @@ static void flush_current_bio_list(struct blk_plug_cb *cb, bool from_schedule)
        struct dm_offload *o = container_of(cb, struct dm_offload, cb);
        struct bio_list list;
        struct bio *bio;
+       int i;
 
        INIT_LIST_HEAD(&o->cb.list);
 
        if (unlikely(!current->bio_list))
                return;
 
-       list = *current->bio_list;
-       bio_list_init(current->bio_list);
-
-       while ((bio = bio_list_pop(&list))) {
-               struct bio_set *bs = bio->bi_pool;
-               if (unlikely(!bs) || bs == fs_bio_set) {
-                       bio_list_add(current->bio_list, bio);
-                       continue;
+       for (i = 0; i < 2; i++) {
+               list = current->bio_list[i];
+               bio_list_init(&current->bio_list[i]);
+
+               while ((bio = bio_list_pop(&list))) {
+                       struct bio_set *bs = bio->bi_pool;
+                       if (unlikely(!bs) || bs == fs_bio_set) {
+                               bio_list_add(&current->bio_list[i], bio);
+                               continue;
+                       }
+
+                       spin_lock(&bs->rescue_lock);
+                       bio_list_add(&bs->rescue_list, bio);
+                       queue_work(bs->rescue_workqueue, &bs->rescue_work);
+                       spin_unlock(&bs->rescue_lock);
                }
-
-               spin_lock(&bs->rescue_lock);
-               bio_list_add(&bs->rescue_list, bio);
-               queue_work(bs->rescue_workqueue, &bs->rescue_work);
-               spin_unlock(&bs->rescue_lock);
        }
 }
 
index 2b13117fb918cbe27775ba61cc68c6f78e5408ff..321ecac23027804d18ded577a5c05604ec46220a 100644 (file)
@@ -777,7 +777,6 @@ static int gather_all_resync_info(struct mddev *mddev, int total_slots)
                bm_lockres->flags |= DLM_LKF_NOQUEUE;
                ret = dlm_lock_sync(bm_lockres, DLM_LOCK_PW);
                if (ret == -EAGAIN) {
-                       memset(bm_lockres->lksb.sb_lvbptr, '\0', LVB_SIZE);
                        s = read_resync_info(mddev, bm_lockres);
                        if (s) {
                                pr_info("%s:%d Resync[%llu..%llu] in progress on %d\n",
@@ -974,6 +973,7 @@ static int leave(struct mddev *mddev)
        lockres_free(cinfo->bitmap_lockres);
        unlock_all_bitmaps(mddev);
        dlm_release_lockspace(cinfo->lockspace, 2);
+       kfree(cinfo);
        return 0;
 }
 
index 548d1b8014f89e9f4b1170daff8fa677d758f39a..f6ae1d67bcd02c6b743258ef3ff6a05896828cb5 100644 (file)
@@ -440,14 +440,6 @@ void md_flush_request(struct mddev *mddev, struct bio *bio)
 }
 EXPORT_SYMBOL(md_flush_request);
 
-void md_unplug(struct blk_plug_cb *cb, bool from_schedule)
-{
-       struct mddev *mddev = cb->data;
-       md_wakeup_thread(mddev->thread);
-       kfree(cb);
-}
-EXPORT_SYMBOL(md_unplug);
-
 static inline struct mddev *mddev_get(struct mddev *mddev)
 {
        atomic_inc(&mddev->active);
@@ -1887,7 +1879,7 @@ super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
        }
        sb = page_address(rdev->sb_page);
        sb->data_size = cpu_to_le64(num_sectors);
-       sb->super_offset = rdev->sb_start;
+       sb->super_offset = cpu_to_le64(rdev->sb_start);
        sb->sb_csum = calc_sb_1_csum(sb);
        do {
                md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
@@ -2295,7 +2287,7 @@ static bool does_sb_need_changing(struct mddev *mddev)
        /* Check if any mddev parameters have changed */
        if ((mddev->dev_sectors != le64_to_cpu(sb->size)) ||
            (mddev->reshape_position != le64_to_cpu(sb->reshape_position)) ||
-           (mddev->layout != le64_to_cpu(sb->layout)) ||
+           (mddev->layout != le32_to_cpu(sb->layout)) ||
            (mddev->raid_disks != le32_to_cpu(sb->raid_disks)) ||
            (mddev->chunk_sectors != le32_to_cpu(sb->chunksize)))
                return true;
@@ -6458,11 +6450,10 @@ static int set_array_info(struct mddev *mddev, mdu_array_info_t *info)
        mddev->layout        = info->layout;
        mddev->chunk_sectors = info->chunk_size >> 9;
 
-       mddev->max_disks     = MD_SB_DISKS;
-
        if (mddev->persistent) {
-               mddev->flags         = 0;
-               mddev->sb_flags         = 0;
+               mddev->max_disks = MD_SB_DISKS;
+               mddev->flags = 0;
+               mddev->sb_flags = 0;
        }
        set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
 
@@ -6533,8 +6524,12 @@ static int update_size(struct mddev *mddev, sector_t num_sectors)
                        return -ENOSPC;
        }
        rv = mddev->pers->resize(mddev, num_sectors);
-       if (!rv)
-               revalidate_disk(mddev->gendisk);
+       if (!rv) {
+               if (mddev->queue) {
+                       set_capacity(mddev->gendisk, mddev->array_sectors);
+                       revalidate_disk(mddev->gendisk);
+               }
+       }
        return rv;
 }
 
index b8859cbf84b618b39ed3d92a2887e8764c403919..dde8ecb760c87113ba36d50c0d6867bc6e215f02 100644 (file)
@@ -676,16 +676,10 @@ extern void mddev_resume(struct mddev *mddev);
 extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
                                   struct mddev *mddev);
 
-extern void md_unplug(struct blk_plug_cb *cb, bool from_schedule);
 extern void md_reload_sb(struct mddev *mddev, int raid_disk);
 extern void md_update_sb(struct mddev *mddev, int force);
 extern void md_kick_rdev_from_array(struct md_rdev * rdev);
 struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr);
-static inline int mddev_check_plugged(struct mddev *mddev)
-{
-       return !!blk_check_plugged(md_unplug, mddev,
-                                  sizeof(struct blk_plug_cb));
-}
 
 static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev)
 {
index fbc2d7851b497fec0cacd45832bbd9c9d258eaae..a34f58772022c9f40243e1d117a3473332bd76a2 100644 (file)
@@ -1027,7 +1027,7 @@ static int get_unqueued_pending(struct r1conf *conf)
 static void freeze_array(struct r1conf *conf, int extra)
 {
        /* Stop sync I/O and normal I/O and wait for everything to
-        * go quite.
+        * go quiet.
         * This is called in two situations:
         * 1) management command handlers (reshape, remove disk, quiesce).
         * 2) one normal I/O request failed.
@@ -1587,9 +1587,30 @@ static void raid1_make_request(struct mddev *mddev, struct bio *bio)
                        split = bio;
                }
 
-               if (bio_data_dir(split) == READ)
+               if (bio_data_dir(split) == READ) {
                        raid1_read_request(mddev, split);
-               else
+
+                       /*
+                        * If a bio is splitted, the first part of bio will
+                        * pass barrier but the bio is queued in
+                        * current->bio_list (see generic_make_request). If
+                        * there is a raise_barrier() called here, the second
+                        * part of bio can't pass barrier. But since the first
+                        * part bio isn't dispatched to underlaying disks yet,
+                        * the barrier is never released, hence raise_barrier
+                        * will alays wait. We have a deadlock.
+                        * Note, this only happens in read path. For write
+                        * path, the first part of bio is dispatched in a
+                        * schedule() call (because of blk plug) or offloaded
+                        * to raid10d.
+                        * Quitting from the function immediately can change
+                        * the bio order queued in bio_list and avoid the deadlock.
+                        */
+                       if (split != bio) {
+                               generic_make_request(bio);
+                               break;
+                       }
+               } else
                        raid1_write_request(mddev, split);
        } while (split != bio);
 }
@@ -3246,8 +3267,6 @@ static int raid1_resize(struct mddev *mddev, sector_t sectors)
                        return ret;
        }
        md_set_array_sectors(mddev, newsize);
-       set_capacity(mddev->gendisk, mddev->array_sectors);
-       revalidate_disk(mddev->gendisk);
        if (sectors > mddev->dev_sectors &&
            mddev->recovery_cp > mddev->dev_sectors) {
                mddev->recovery_cp = mddev->dev_sectors;
index 063c43d83b72c2f0f753edb7b08f8dd608fa15ad..e89a8d78a9ed537f417c414b2081ef5f9a97f291 100644 (file)
@@ -974,7 +974,8 @@ static void wait_barrier(struct r10conf *conf)
                                    !conf->barrier ||
                                    (atomic_read(&conf->nr_pending) &&
                                     current->bio_list &&
-                                    !bio_list_empty(current->bio_list)),
+                                    (!bio_list_empty(&current->bio_list[0]) ||
+                                     !bio_list_empty(&current->bio_list[1]))),
                                    conf->resync_lock);
                conf->nr_waiting--;
                if (!conf->nr_waiting)
@@ -1477,11 +1478,24 @@ retry_write:
                        mbio->bi_bdev = (void*)rdev;
 
                        atomic_inc(&r10_bio->remaining);
+
+                       cb = blk_check_plugged(raid10_unplug, mddev,
+                                              sizeof(*plug));
+                       if (cb)
+                               plug = container_of(cb, struct raid10_plug_cb,
+                                                   cb);
+                       else
+                               plug = NULL;
                        spin_lock_irqsave(&conf->device_lock, flags);
-                       bio_list_add(&conf->pending_bio_list, mbio);
-                       conf->pending_count++;
+                       if (plug) {
+                               bio_list_add(&plug->pending, mbio);
+                               plug->pending_cnt++;
+                       } else {
+                               bio_list_add(&conf->pending_bio_list, mbio);
+                               conf->pending_count++;
+                       }
                        spin_unlock_irqrestore(&conf->device_lock, flags);
-                       if (!mddev_check_plugged(mddev))
+                       if (!plug)
                                md_wakeup_thread(mddev->thread);
                }
        }
@@ -1571,7 +1585,25 @@ static void raid10_make_request(struct mddev *mddev, struct bio *bio)
                        split = bio;
                }
 
+               /*
+                * If a bio is splitted, the first part of bio will pass
+                * barrier but the bio is queued in current->bio_list (see
+                * generic_make_request). If there is a raise_barrier() called
+                * here, the second part of bio can't pass barrier. But since
+                * the first part bio isn't dispatched to underlaying disks
+                * yet, the barrier is never released, hence raise_barrier will
+                * alays wait. We have a deadlock.
+                * Note, this only happens in read path. For write path, the
+                * first part of bio is dispatched in a schedule() call
+                * (because of blk plug) or offloaded to raid10d.
+                * Quitting from the function immediately can change the bio
+                * order queued in bio_list and avoid the deadlock.
+                */
                __make_request(mddev, split);
+               if (split != bio && bio_data_dir(bio) == READ) {
+                       generic_make_request(bio);
+                       break;
+               }
        } while (split != bio);
 
        /* In case raid10d snuck in to freeze_array */
@@ -3943,10 +3975,6 @@ static int raid10_resize(struct mddev *mddev, sector_t sectors)
                        return ret;
        }
        md_set_array_sectors(mddev, size);
-       if (mddev->queue) {
-               set_capacity(mddev->gendisk, mddev->array_sectors);
-               revalidate_disk(mddev->gendisk);
-       }
        if (sectors > mddev->dev_sectors &&
            mddev->recovery_cp > oldsize) {
                mddev->recovery_cp = oldsize;
index 4fb09b3fcb410468a9b1939b93d9529e70dd592d..ed5cd705b985f13611d26b44e81aefbb0e93c306 100644 (file)
@@ -1401,7 +1401,8 @@ static int set_syndrome_sources(struct page **srcs,
                     (test_bit(R5_Wantdrain, &dev->flags) ||
                      test_bit(R5_InJournal, &dev->flags))) ||
                    (srctype == SYNDROME_SRC_WRITTEN &&
-                    dev->written)) {
+                    (dev->written ||
+                     test_bit(R5_InJournal, &dev->flags)))) {
                        if (test_bit(R5_InJournal, &dev->flags))
                                srcs[slot] = sh->dev[i].orig_page;
                        else
@@ -7605,8 +7606,6 @@ static int raid5_resize(struct mddev *mddev, sector_t sectors)
                        return ret;
        }
        md_set_array_sectors(mddev, newsize);
-       set_capacity(mddev->gendisk, mddev->array_sectors);
-       revalidate_disk(mddev->gendisk);
        if (sectors > mddev->dev_sectors &&
            mddev->recovery_cp > mddev->dev_sectors) {
                mddev->recovery_cp = mddev->dev_sectors;
index 98ed4d96987c87fda074a219a234cbf8e48f2b9a..57fc47ad5ab3cb79943f91fdb3969e63ee0c3bd8 100644 (file)
@@ -18,7 +18,7 @@ obj-$(CONFIG_MII) += mii.o
 obj-$(CONFIG_MDIO) += mdio.o
 obj-$(CONFIG_NET) += Space.o loopback.o
 obj-$(CONFIG_NETCONSOLE) += netconsole.o
-obj-$(CONFIG_PHYLIB) += phy/
+obj-y += phy/
 obj-$(CONFIG_RIONET) += rionet.o
 obj-$(CONFIG_NET_TEAM) += team/
 obj-$(CONFIG_TUN) += tun.o
index edc70ffad6607ac06d0a40b48316bef554c5f4c2..c5fd4259da331b27503644938ab22787e2eea8ae 100644 (file)
@@ -92,6 +92,7 @@ enum ad_link_speed_type {
        AD_LINK_SPEED_2500MBPS,
        AD_LINK_SPEED_10000MBPS,
        AD_LINK_SPEED_20000MBPS,
+       AD_LINK_SPEED_25000MBPS,
        AD_LINK_SPEED_40000MBPS,
        AD_LINK_SPEED_56000MBPS,
        AD_LINK_SPEED_100000MBPS,
@@ -260,6 +261,7 @@ static inline int __check_agg_selection_timer(struct port *port)
  *     %AD_LINK_SPEED_2500MBPS,
  *     %AD_LINK_SPEED_10000MBPS
  *     %AD_LINK_SPEED_20000MBPS
+ *     %AD_LINK_SPEED_25000MBPS
  *     %AD_LINK_SPEED_40000MBPS
  *     %AD_LINK_SPEED_56000MBPS
  *     %AD_LINK_SPEED_100000MBPS
@@ -302,6 +304,10 @@ static u16 __get_link_speed(struct port *port)
                        speed = AD_LINK_SPEED_20000MBPS;
                        break;
 
+               case SPEED_25000:
+                       speed = AD_LINK_SPEED_25000MBPS;
+                       break;
+
                case SPEED_40000:
                        speed = AD_LINK_SPEED_40000MBPS;
                        break;
@@ -707,6 +713,9 @@ static u32 __get_agg_bandwidth(struct aggregator *aggregator)
                case AD_LINK_SPEED_20000MBPS:
                        bandwidth = nports * 20000;
                        break;
+               case AD_LINK_SPEED_25000MBPS:
+                       bandwidth = nports * 25000;
+                       break;
                case AD_LINK_SPEED_40000MBPS:
                        bandwidth = nports * 40000;
                        break;
@@ -1052,8 +1061,7 @@ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port)
                port->sm_rx_state = AD_RX_INITIALIZE;
                port->sm_vars |= AD_PORT_CHURNED;
        /* check if port is not enabled */
-       } else if (!(port->sm_vars & AD_PORT_BEGIN)
-                && !port->is_enabled && !(port->sm_vars & AD_PORT_MOVED))
+       } else if (!(port->sm_vars & AD_PORT_BEGIN) && !port->is_enabled)
                port->sm_rx_state = AD_RX_PORT_DISABLED;
        /* check if new lacpdu arrived */
        else if (lacpdu && ((port->sm_rx_state == AD_RX_EXPIRED) ||
@@ -1081,11 +1089,8 @@ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port)
                        /* if no lacpdu arrived and no timer is on */
                        switch (port->sm_rx_state) {
                        case AD_RX_PORT_DISABLED:
-                               if (port->sm_vars & AD_PORT_MOVED)
-                                       port->sm_rx_state = AD_RX_INITIALIZE;
-                               else if (port->is_enabled
-                                        && (port->sm_vars
-                                            & AD_PORT_LACP_ENABLED))
+                               if (port->is_enabled &&
+                                   (port->sm_vars & AD_PORT_LACP_ENABLED))
                                        port->sm_rx_state = AD_RX_EXPIRED;
                                else if (port->is_enabled
                                         && ((port->sm_vars
@@ -1115,7 +1120,6 @@ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port)
                        port->sm_vars &= ~AD_PORT_SELECTED;
                        __record_default(port);
                        port->actor_oper_port_state &= ~AD_STATE_EXPIRED;
-                       port->sm_vars &= ~AD_PORT_MOVED;
                        port->sm_rx_state = AD_RX_PORT_DISABLED;
 
                        /* Fall Through */
@@ -2442,9 +2446,9 @@ void bond_3ad_adapter_speed_duplex_changed(struct slave *slave)
 
        spin_lock_bh(&slave->bond->mode_lock);
        ad_update_actor_keys(port, false);
+       spin_unlock_bh(&slave->bond->mode_lock);
        netdev_dbg(slave->bond->dev, "Port %d slave %s changed speed/duplex\n",
                   port->actor_port_number, slave->dev->name);
-       spin_unlock_bh(&slave->bond->mode_lock);
 }
 
 /**
@@ -2488,12 +2492,12 @@ void bond_3ad_handle_link_change(struct slave *slave, char link)
        agg = __get_first_agg(port);
        ad_agg_selection_logic(agg, &dummy);
 
+       spin_unlock_bh(&slave->bond->mode_lock);
+
        netdev_dbg(slave->bond->dev, "Port %d changed link status to %s\n",
                   port->actor_port_number,
                   link == BOND_LINK_UP ? "UP" : "DOWN");
 
-       spin_unlock_bh(&slave->bond->mode_lock);
-
        /* RTNL is held and mode_lock is released so it's safe
         * to update slave_array here.
         */
index 8a4ba8b88e52f9d5b1ba318e5dbfb53344f6ebca..27359dab78a1d750c520c3a02e5914ea138b093b 100644 (file)
@@ -201,12 +201,6 @@ atomic_t netpoll_block_tx = ATOMIC_INIT(0);
 
 unsigned int bond_net_id __read_mostly;
 
-static __be32 arp_target[BOND_MAX_ARP_TARGETS];
-static int arp_ip_count;
-static int bond_mode   = BOND_MODE_ROUNDROBIN;
-static int xmit_hashtype = BOND_XMIT_POLICY_LAYER2;
-static int lacp_fast;
-
 /*-------------------------- Forward declarations ---------------------------*/
 
 static int bond_init(struct net_device *bond_dev);
@@ -371,9 +365,10 @@ down:
 /* Get link speed and duplex from the slave's base driver
  * using ethtool. If for some reason the call fails or the
  * values are invalid, set speed and duplex to -1,
- * and return.
+ * and return. Return 1 if speed or duplex settings are
+ * UNKNOWN; 0 otherwise.
  */
-static void bond_update_speed_duplex(struct slave *slave)
+static int bond_update_speed_duplex(struct slave *slave)
 {
        struct net_device *slave_dev = slave->dev;
        struct ethtool_link_ksettings ecmd;
@@ -383,24 +378,27 @@ static void bond_update_speed_duplex(struct slave *slave)
        slave->duplex = DUPLEX_UNKNOWN;
 
        res = __ethtool_get_link_ksettings(slave_dev, &ecmd);
-       if (res < 0)
-               return;
-
-       if (ecmd.base.speed == 0 || ecmd.base.speed == ((__u32)-1))
-               return;
-
+       if (res < 0) {
+               slave->link = BOND_LINK_DOWN;
+               return 1;
+       }
+       if (ecmd.base.speed == 0 || ecmd.base.speed == ((__u32)-1)) {
+               slave->link = BOND_LINK_DOWN;
+               return 1;
+       }
        switch (ecmd.base.duplex) {
        case DUPLEX_FULL:
        case DUPLEX_HALF:
                break;
        default:
-               return;
+               slave->link = BOND_LINK_DOWN;
+               return 1;
        }
 
        slave->speed = ecmd.base.speed;
        slave->duplex = ecmd.base.duplex;
 
-       return;
+       return 0;
 }
 
 const char *bond_slave_link_status(s8 link)
@@ -2039,8 +2037,7 @@ static int bond_miimon_inspect(struct bonding *bond)
                        if (link_state)
                                continue;
 
-                       bond_set_slave_link_state(slave, BOND_LINK_FAIL,
-                                                 BOND_SLAVE_NOTIFY_LATER);
+                       bond_propose_link_state(slave, BOND_LINK_FAIL);
                        slave->delay = bond->params.downdelay;
                        if (slave->delay) {
                                netdev_info(bond->dev, "link status down for %sinterface %s, disabling it in %d ms\n",
@@ -2055,8 +2052,7 @@ static int bond_miimon_inspect(struct bonding *bond)
                case BOND_LINK_FAIL:
                        if (link_state) {
                                /* recovered before downdelay expired */
-                               bond_set_slave_link_state(slave, BOND_LINK_UP,
-                                                         BOND_SLAVE_NOTIFY_LATER);
+                               bond_propose_link_state(slave, BOND_LINK_UP);
                                slave->last_link_up = jiffies;
                                netdev_info(bond->dev, "link status up again after %d ms for interface %s\n",
                                            (bond->params.downdelay - slave->delay) *
@@ -2078,8 +2074,7 @@ static int bond_miimon_inspect(struct bonding *bond)
                        if (!link_state)
                                continue;
 
-                       bond_set_slave_link_state(slave, BOND_LINK_BACK,
-                                                 BOND_SLAVE_NOTIFY_LATER);
+                       bond_propose_link_state(slave, BOND_LINK_BACK);
                        slave->delay = bond->params.updelay;
 
                        if (slave->delay) {
@@ -2092,9 +2087,7 @@ static int bond_miimon_inspect(struct bonding *bond)
                        /*FALLTHRU*/
                case BOND_LINK_BACK:
                        if (!link_state) {
-                               bond_set_slave_link_state(slave,
-                                                         BOND_LINK_DOWN,
-                                                         BOND_SLAVE_NOTIFY_LATER);
+                               bond_propose_link_state(slave, BOND_LINK_DOWN);
                                netdev_info(bond->dev, "link status down again after %d ms for interface %s\n",
                                            (bond->params.updelay - slave->delay) *
                                            bond->params.miimon,
@@ -2132,7 +2125,12 @@ static void bond_miimon_commit(struct bonding *bond)
                        continue;
 
                case BOND_LINK_UP:
-                       bond_update_speed_duplex(slave);
+                       if (bond_update_speed_duplex(slave)) {
+                               netdev_warn(bond->dev,
+                                           "failed to get link speed/duplex for %s\n",
+                                           slave->dev->name);
+                               continue;
+                       }
                        bond_set_slave_link_state(slave, BOND_LINK_UP,
                                                  BOND_SLAVE_NOTIFY_NOW);
                        slave->last_link_up = jiffies;
@@ -2231,6 +2229,8 @@ static void bond_mii_monitor(struct work_struct *work)
                                            mii_work.work);
        bool should_notify_peers = false;
        unsigned long delay;
+       struct slave *slave;
+       struct list_head *iter;
 
        delay = msecs_to_jiffies(bond->params.miimon);
 
@@ -2251,6 +2251,9 @@ static void bond_mii_monitor(struct work_struct *work)
                        goto re_arm;
                }
 
+               bond_for_each_slave(bond, slave, iter) {
+                       bond_commit_link_state(slave, BOND_SLAVE_NOTIFY_LATER);
+               }
                bond_miimon_commit(bond);
 
                rtnl_unlock();  /* might sleep, hold no other locks */
@@ -2575,10 +2578,8 @@ static bool bond_time_in_interval(struct bonding *bond, unsigned long last_act,
  * arp is transmitted to generate traffic. see activebackup_arp_monitor for
  * arp monitoring in active backup mode.
  */
-static void bond_loadbalance_arp_mon(struct work_struct *work)
+static void bond_loadbalance_arp_mon(struct bonding *bond)
 {
-       struct bonding *bond = container_of(work, struct bonding,
-                                           arp_work.work);
        struct slave *slave, *oldcurrent;
        struct list_head *iter;
        int do_failover = 0, slave_state_changed = 0;
@@ -2916,10 +2917,8 @@ check_state:
        return should_notify_rtnl;
 }
 
-static void bond_activebackup_arp_mon(struct work_struct *work)
+static void bond_activebackup_arp_mon(struct bonding *bond)
 {
-       struct bonding *bond = container_of(work, struct bonding,
-                                           arp_work.work);
        bool should_notify_peers = false;
        bool should_notify_rtnl = false;
        int delta_in_ticks;
@@ -2972,6 +2971,17 @@ re_arm:
        }
 }
 
+static void bond_arp_monitor(struct work_struct *work)
+{
+       struct bonding *bond = container_of(work, struct bonding,
+                                           arp_work.work);
+
+       if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP)
+               bond_activebackup_arp_mon(bond);
+       else
+               bond_loadbalance_arp_mon(bond);
+}
+
 /*-------------------------- netdev event handling --------------------------*/
 
 /* Change device name */
@@ -3228,10 +3238,7 @@ static void bond_work_init_all(struct bonding *bond)
                          bond_resend_igmp_join_requests_delayed);
        INIT_DELAYED_WORK(&bond->alb_work, bond_alb_monitor);
        INIT_DELAYED_WORK(&bond->mii_work, bond_mii_monitor);
-       if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP)
-               INIT_DELAYED_WORK(&bond->arp_work, bond_activebackup_arp_mon);
-       else
-               INIT_DELAYED_WORK(&bond->arp_work, bond_loadbalance_arp_mon);
+       INIT_DELAYED_WORK(&bond->arp_work, bond_arp_monitor);
        INIT_DELAYED_WORK(&bond->ad_work, bond_3ad_state_machine_handler);
        INIT_DELAYED_WORK(&bond->slave_arr_work, bond_slave_arr_handler);
 }
@@ -3266,8 +3273,6 @@ static int bond_open(struct net_device *bond_dev)
                }
        }
 
-       bond_work_init_all(bond);
-
        if (bond_is_lb(bond)) {
                /* bond_alb_initialize must be called before the timer
                 * is started.
@@ -3327,12 +3332,17 @@ static void bond_fold_stats(struct rtnl_link_stats64 *_res,
        for (i = 0; i < sizeof(*_res) / sizeof(u64); i++) {
                u64 nv = new[i];
                u64 ov = old[i];
+               s64 delta = nv - ov;
 
                /* detects if this particular field is 32bit only */
                if (((nv | ov) >> 32) == 0)
-                       res[i] += (u32)nv - (u32)ov;
-               else
-                       res[i] += nv - ov;
+                       delta = (s64)(s32)((u32)nv - (u32)ov);
+
+               /* filter anomalies, some drivers reset their stats
+                * at down/up events.
+                */
+               if (delta > 0)
+                       res[i] += delta;
        }
 }
 
@@ -4252,6 +4262,12 @@ static int bond_check_params(struct bond_params *params)
        int arp_all_targets_value;
        u16 ad_actor_sys_prio = 0;
        u16 ad_user_port_key = 0;
+       __be32 arp_target[BOND_MAX_ARP_TARGETS];
+       int arp_ip_count;
+       int bond_mode   = BOND_MODE_ROUNDROBIN;
+       int xmit_hashtype = BOND_XMIT_POLICY_LAYER2;
+       int lacp_fast = 0;
+       int tlb_dynamic_lb = 0;
 
        /* Convert string parameters. */
        if (mode) {
@@ -4564,6 +4580,17 @@ static int bond_check_params(struct bond_params *params)
        }
        ad_user_port_key = valptr->value;
 
+       if (bond_mode == BOND_MODE_TLB) {
+               bond_opt_initstr(&newval, "default");
+               valptr = bond_opt_parse(bond_opt_get(BOND_OPT_TLB_DYNAMIC_LB),
+                                       &newval);
+               if (!valptr) {
+                       pr_err("Error: No tlb_dynamic_lb default value");
+                       return -EINVAL;
+               }
+               tlb_dynamic_lb = valptr->value;
+       }
+
        if (lp_interval == 0) {
                pr_warn("Warning: ip_interval must be between 1 and %d, so it was reset to %d\n",
                        INT_MAX, BOND_ALB_DEFAULT_LP_INTERVAL);
@@ -4591,7 +4618,7 @@ static int bond_check_params(struct bond_params *params)
        params->min_links = min_links;
        params->lp_interval = lp_interval;
        params->packets_per_slave = packets_per_slave;
-       params->tlb_dynamic_lb = 1; /* Default value */
+       params->tlb_dynamic_lb = tlb_dynamic_lb;
        params->ad_actor_sys_prio = ad_actor_sys_prio;
        eth_zero_addr(params->ad_actor_system);
        params->ad_user_port_key = ad_user_port_key;
@@ -4687,6 +4714,8 @@ int bond_create(struct net *net, const char *name)
 
        netif_carrier_off(bond_dev);
 
+       bond_work_init_all(bond);
+
        rtnl_unlock();
        if (res < 0)
                bond_destructor(bond_dev);
index 91c876a0a647ba1b17bdc734db168c9ee456409e..da020418a6526bed0465eeaa67a3fd61d83afb34 100644 (file)
@@ -1412,31 +1412,39 @@ e100_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
        return rc;
 }
 
-static int e100_get_settings(struct net_device *dev,
-                            struct ethtool_cmd *cmd)
+static int e100_get_link_ksettings(struct net_device *dev,
+                                  struct ethtool_link_ksettings *cmd)
 {
        struct net_local *np = netdev_priv(dev);
+       u32 supported;
        int err;
 
        spin_lock_irq(&np->lock);
-       err = mii_ethtool_gset(&np->mii_if, cmd);
+       err = mii_ethtool_get_link_ksettings(&np->mii_if, cmd);
        spin_unlock_irq(&np->lock);
 
        /* The PHY may support 1000baseT, but the Etrax100 does not.  */
-       cmd->supported &= ~(SUPPORTED_1000baseT_Half
-                           | SUPPORTED_1000baseT_Full);
+       ethtool_convert_link_mode_to_legacy_u32(&supported,
+                                               cmd->link_modes.supported);
+
+       supported &= ~(SUPPORTED_1000baseT_Half | SUPPORTED_1000baseT_Full);
+
+       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+                                               supported);
+
        return err;
 }
 
-static int e100_set_settings(struct net_device *dev,
-                            struct ethtool_cmd *ecmd)
+static int e100_set_link_ksettings(struct net_device *dev,
+                                  const struct ethtool_link_ksettings *ecmd)
 {
-       if (ecmd->autoneg == AUTONEG_ENABLE) {
+       if (ecmd->base.autoneg == AUTONEG_ENABLE) {
                e100_set_duplex(dev, autoneg);
                e100_set_speed(dev, 0);
        } else {
-               e100_set_duplex(dev, ecmd->duplex == DUPLEX_HALF ? half : full);
-               e100_set_speed(dev, ecmd->speed == SPEED_10 ? 10: 100);
+               e100_set_duplex(dev, ecmd->base.duplex == DUPLEX_HALF ?
+                               half : full);
+               e100_set_speed(dev, ecmd->base.speed == SPEED_10 ? 10 : 100);
        }
 
        return 0;
@@ -1459,11 +1467,11 @@ static int e100_nway_reset(struct net_device *dev)
 }
 
 static const struct ethtool_ops e100_ethtool_ops = {
-       .get_settings   = e100_get_settings,
-       .set_settings   = e100_set_settings,
        .get_drvinfo    = e100_get_drvinfo,
        .nway_reset     = e100_nway_reset,
        .get_link       = ethtool_op_get_link,
+       .get_link_ksettings     = e100_get_link_ksettings,
+       .set_link_ksettings     = e100_set_link_ksettings,
 };
 
 static int
index 065984670ff19a0a03c4818097f574de17907e87..ba2e655eec191a38171ae28fd208dd4a075ecfab 100644 (file)
@@ -11,7 +11,7 @@ config NET_DSA_MV88E6060
 
 config NET_DSA_BCM_SF2
        tristate "Broadcom Starfighter 2 Ethernet switch support"
-       depends on HAS_IOMEM && NET_DSA
+       depends on HAS_IOMEM && NET_DSA && OF_MDIO
        select NET_DSA_TAG_BRCM
        select FIXED_PHY
        select BCM7XXX_PHY
@@ -34,4 +34,12 @@ config NET_DSA_QCA8K
          This enables support for the Qualcomm Atheros QCA8K Ethernet
          switch chips.
 
+config NET_DSA_LOOP
+       tristate "DSA mock-up Ethernet switch chip support"
+       depends on NET_DSA
+       select FIXED_PHY
+       ---help---
+         This enables support for a fake mock-up switch chip which
+         exercises the DSA APIs.
+
 endmenu
index a3c94163221723c610791bd0e0a98df05c7dc221..5c8830991041b6e59314d4568fb17fcef21f5647 100644 (file)
@@ -2,6 +2,6 @@ obj-$(CONFIG_NET_DSA_MV88E6060) += mv88e6060.o
 obj-$(CONFIG_NET_DSA_BCM_SF2)  += bcm-sf2.o
 bcm-sf2-objs                   := bcm_sf2.o bcm_sf2_cfp.o
 obj-$(CONFIG_NET_DSA_QCA8K)    += qca8k.o
-
 obj-y                          += b53/
 obj-y                          += mv88e6xxx/
+obj-$(CONFIG_NET_DSA_LOOP)     += dsa_loop.o dsa_loop_bdinfo.o
index 346dd9a1232dff12e24fef05b6e6352f106ed2be..2fb32d67065f8aa164b3ea03d5cb914120c4e0c6 100644 (file)
  */
 
 #include <linux/list.h>
-#include <net/dsa.h>
 #include <linux/ethtool.h>
 #include <linux/if_ether.h>
 #include <linux/in.h>
+#include <linux/netdevice.h>
+#include <net/dsa.h>
 #include <linux/bitmap.h>
 
 #include "bcm_sf2.h"
diff --git a/drivers/net/dsa/dsa_loop.c b/drivers/net/dsa/dsa_loop.c
new file mode 100644 (file)
index 0000000..bc5acc1
--- /dev/null
@@ -0,0 +1,328 @@
+/*
+ * Distributed Switch Architecture loopback driver
+ *
+ * Copyright (C) 2016, Florian Fainelli <f.fainelli@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/platform_device.h>
+#include <linux/netdevice.h>
+#include <linux/phy.h>
+#include <linux/phy_fixed.h>
+#include <linux/export.h>
+#include <linux/workqueue.h>
+#include <linux/module.h>
+#include <linux/if_bridge.h>
+#include <net/switchdev.h>
+#include <net/dsa.h>
+
+#include "dsa_loop.h"
+
+struct dsa_loop_vlan {
+       u16 members;
+       u16 untagged;
+};
+
+#define DSA_LOOP_VLANS 5
+
+struct dsa_loop_priv {
+       struct mii_bus  *bus;
+       unsigned int    port_base;
+       struct dsa_loop_vlan vlans[DSA_LOOP_VLANS];
+       struct net_device *netdev;
+       u16 pvid;
+};
+
+static struct phy_device *phydevs[PHY_MAX_ADDR];
+
+static enum dsa_tag_protocol dsa_loop_get_protocol(struct dsa_switch *ds)
+{
+       dev_dbg(ds->dev, "%s\n", __func__);
+
+       return DSA_TAG_PROTO_NONE;
+}
+
+static int dsa_loop_setup(struct dsa_switch *ds)
+{
+       dev_dbg(ds->dev, "%s\n", __func__);
+
+       return 0;
+}
+
+static int dsa_loop_set_addr(struct dsa_switch *ds, u8 *addr)
+{
+       dev_dbg(ds->dev, "%s\n", __func__);
+
+       return 0;
+}
+
+static int dsa_loop_phy_read(struct dsa_switch *ds, int port, int regnum)
+{
+       struct dsa_loop_priv *ps = ds->priv;
+       struct mii_bus *bus = ps->bus;
+
+       dev_dbg(ds->dev, "%s\n", __func__);
+
+       return mdiobus_read_nested(bus, ps->port_base + port, regnum);
+}
+
+static int dsa_loop_phy_write(struct dsa_switch *ds, int port,
+                             int regnum, u16 value)
+{
+       struct dsa_loop_priv *ps = ds->priv;
+       struct mii_bus *bus = ps->bus;
+
+       dev_dbg(ds->dev, "%s\n", __func__);
+
+       return mdiobus_write_nested(bus, ps->port_base + port, regnum, value);
+}
+
+static int dsa_loop_port_bridge_join(struct dsa_switch *ds, int port,
+                                    struct net_device *bridge)
+{
+       dev_dbg(ds->dev, "%s\n", __func__);
+
+       return 0;
+}
+
+static void dsa_loop_port_bridge_leave(struct dsa_switch *ds, int port,
+                                      struct net_device *bridge)
+{
+       dev_dbg(ds->dev, "%s\n", __func__);
+}
+
+static void dsa_loop_port_stp_state_set(struct dsa_switch *ds, int port,
+                                       u8 state)
+{
+       dev_dbg(ds->dev, "%s\n", __func__);
+}
+
+static int dsa_loop_port_vlan_filtering(struct dsa_switch *ds, int port,
+                                       bool vlan_filtering)
+{
+       dev_dbg(ds->dev, "%s\n", __func__);
+
+       return 0;
+}
+
+static int dsa_loop_port_vlan_prepare(struct dsa_switch *ds, int port,
+                                     const struct switchdev_obj_port_vlan *vlan,
+                                     struct switchdev_trans *trans)
+{
+       struct dsa_loop_priv *ps = ds->priv;
+       struct mii_bus *bus = ps->bus;
+
+       dev_dbg(ds->dev, "%s\n", __func__);
+
+       /* Just do a sleeping operation to make lockdep checks effective */
+       mdiobus_read(bus, ps->port_base + port, MII_BMSR);
+
+       if (vlan->vid_end > DSA_LOOP_VLANS)
+               return -ERANGE;
+
+       return 0;
+}
+
+static void dsa_loop_port_vlan_add(struct dsa_switch *ds, int port,
+                                  const struct switchdev_obj_port_vlan *vlan,
+                                  struct switchdev_trans *trans)
+{
+       bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED;
+       bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID;
+       struct dsa_loop_priv *ps = ds->priv;
+       struct mii_bus *bus = ps->bus;
+       struct dsa_loop_vlan *vl;
+       u16 vid;
+
+       dev_dbg(ds->dev, "%s\n", __func__);
+
+       /* Just do a sleeping operation to make lockdep checks effective */
+       mdiobus_read(bus, ps->port_base + port, MII_BMSR);
+
+       for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) {
+               vl = &ps->vlans[vid];
+
+               vl->members |= BIT(port);
+               if (untagged)
+                       vl->untagged |= BIT(port);
+               else
+                       vl->untagged &= ~BIT(port);
+       }
+
+       if (pvid)
+               ps->pvid = vid;
+}
+
+static int dsa_loop_port_vlan_del(struct dsa_switch *ds, int port,
+                                 const struct switchdev_obj_port_vlan *vlan)
+{
+       bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED;
+       struct dsa_loop_priv *ps = ds->priv;
+       struct mii_bus *bus = ps->bus;
+       struct dsa_loop_vlan *vl;
+       u16 vid, pvid;
+
+       dev_dbg(ds->dev, "%s\n", __func__);
+
+       /* Just do a sleeping operation to make lockdep checks effective */
+       mdiobus_read(bus, ps->port_base + port, MII_BMSR);
+
+       for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) {
+               vl = &ps->vlans[vid];
+
+               vl->members &= ~BIT(port);
+               if (untagged)
+                       vl->untagged &= ~BIT(port);
+
+               if (pvid == vid)
+                       pvid = 1;
+       }
+       ps->pvid = pvid;
+
+       return 0;
+}
+
+static int dsa_loop_port_vlan_dump(struct dsa_switch *ds, int port,
+                                  struct switchdev_obj_port_vlan *vlan,
+                                  int (*cb)(struct switchdev_obj *obj))
+{
+       struct dsa_loop_priv *ps = ds->priv;
+       struct mii_bus *bus = ps->bus;
+       struct dsa_loop_vlan *vl;
+       u16 vid, vid_start = 0;
+       int err;
+
+       dev_dbg(ds->dev, "%s\n", __func__);
+
+       /* Just do a sleeping operation to make lockdep checks effective */
+       mdiobus_read(bus, ps->port_base + port, MII_BMSR);
+
+       for (vid = vid_start; vid < DSA_LOOP_VLANS; vid++) {
+               vl = &ps->vlans[vid];
+
+               if (!(vl->members & BIT(port)))
+                       continue;
+
+               vlan->vid_begin = vlan->vid_end = vid;
+               vlan->flags = 0;
+
+               if (vl->untagged & BIT(port))
+                       vlan->flags |= BRIDGE_VLAN_INFO_UNTAGGED;
+               if (ps->pvid == vid)
+                       vlan->flags |= BRIDGE_VLAN_INFO_PVID;
+
+               err = cb(&vlan->obj);
+               if (err)
+                       break;
+       }
+
+       return err;
+}
+
+static struct dsa_switch_ops dsa_loop_driver = {
+       .get_tag_protocol       = dsa_loop_get_protocol,
+       .setup                  = dsa_loop_setup,
+       .set_addr               = dsa_loop_set_addr,
+       .phy_read               = dsa_loop_phy_read,
+       .phy_write              = dsa_loop_phy_write,
+       .port_bridge_join       = dsa_loop_port_bridge_join,
+       .port_bridge_leave      = dsa_loop_port_bridge_leave,
+       .port_stp_state_set     = dsa_loop_port_stp_state_set,
+       .port_vlan_filtering    = dsa_loop_port_vlan_filtering,
+       .port_vlan_prepare      = dsa_loop_port_vlan_prepare,
+       .port_vlan_add          = dsa_loop_port_vlan_add,
+       .port_vlan_del          = dsa_loop_port_vlan_del,
+       .port_vlan_dump         = dsa_loop_port_vlan_dump,
+};
+
+static int dsa_loop_drv_probe(struct mdio_device *mdiodev)
+{
+       struct dsa_loop_pdata *pdata = mdiodev->dev.platform_data;
+       struct dsa_loop_priv *ps;
+       struct dsa_switch *ds;
+
+       if (!pdata)
+               return -ENODEV;
+
+       dev_info(&mdiodev->dev, "%s: 0x%0x\n",
+                pdata->name, pdata->enabled_ports);
+
+       ds = dsa_switch_alloc(&mdiodev->dev, DSA_MAX_PORTS);
+       if (!ds)
+               return -ENOMEM;
+
+       ps = devm_kzalloc(&mdiodev->dev, sizeof(*ps), GFP_KERNEL);
+       ps->netdev = dev_get_by_name(&init_net, pdata->netdev);
+       if (!ps->netdev)
+               return -EPROBE_DEFER;
+
+       pdata->cd.netdev[DSA_LOOP_CPU_PORT] = &ps->netdev->dev;
+
+       ds->dev = &mdiodev->dev;
+       ds->ops = &dsa_loop_driver;
+       ds->priv = ps;
+       ps->bus = mdiodev->bus;
+
+       dev_set_drvdata(&mdiodev->dev, ds);
+
+       return dsa_register_switch(ds, ds->dev);
+}
+
+static void dsa_loop_drv_remove(struct mdio_device *mdiodev)
+{
+       struct dsa_switch *ds = dev_get_drvdata(&mdiodev->dev);
+       struct dsa_loop_priv *ps = ds->priv;
+
+       dsa_unregister_switch(ds);
+       dev_put(ps->netdev);
+}
+
+static struct mdio_driver dsa_loop_drv = {
+       .mdiodrv.driver = {
+               .name   = "dsa-loop",
+       },
+       .probe  = dsa_loop_drv_probe,
+       .remove = dsa_loop_drv_remove,
+};
+
+#define NUM_FIXED_PHYS (DSA_LOOP_NUM_PORTS - 2)
+
+static void unregister_fixed_phys(void)
+{
+       unsigned int i;
+
+       for (i = 0; i < NUM_FIXED_PHYS; i++)
+               if (phydevs[i])
+                       fixed_phy_unregister(phydevs[i]);
+}
+
+static int __init dsa_loop_init(void)
+{
+       struct fixed_phy_status status = {
+               .link = 1,
+               .speed = SPEED_100,
+               .duplex = DUPLEX_FULL,
+       };
+       unsigned int i;
+
+       for (i = 0; i < NUM_FIXED_PHYS; i++)
+               phydevs[i] = fixed_phy_register(PHY_POLL, &status, -1, NULL);
+
+       return mdio_driver_register(&dsa_loop_drv);
+}
+module_init(dsa_loop_init);
+
+static void __exit dsa_loop_exit(void)
+{
+       mdio_driver_unregister(&dsa_loop_drv);
+       unregister_fixed_phys();
+}
+module_exit(dsa_loop_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Florian Fainelli");
+MODULE_DESCRIPTION("DSA loopback driver");
diff --git a/drivers/net/dsa/dsa_loop.h b/drivers/net/dsa/dsa_loop.h
new file mode 100644 (file)
index 0000000..dc39687
--- /dev/null
@@ -0,0 +1,19 @@
+#ifndef __DSA_LOOP_H
+#define __DSA_LOOP_H
+
+struct dsa_chip_data;
+
+struct dsa_loop_pdata {
+       /* Must be first, such that dsa_register_switch() can access this
+        * without gory pointer manipulations
+        */
+       struct dsa_chip_data cd;
+       const char *name;
+       unsigned int enabled_ports;
+       const char *netdev;
+};
+
+#define DSA_LOOP_NUM_PORTS     6
+#define DSA_LOOP_CPU_PORT      (DSA_LOOP_NUM_PORTS - 1)
+
+#endif /* __DSA_LOOP_H */
diff --git a/drivers/net/dsa/dsa_loop_bdinfo.c b/drivers/net/dsa/dsa_loop_bdinfo.c
new file mode 100644 (file)
index 0000000..fb8d5dc
--- /dev/null
@@ -0,0 +1,34 @@
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/phy.h>
+#include <net/dsa.h>
+
+#include "dsa_loop.h"
+
+static struct dsa_loop_pdata dsa_loop_pdata = {
+       .cd = {
+               .port_names[0] = "lan1",
+               .port_names[1] = "lan2",
+               .port_names[2] = "lan3",
+               .port_names[3] = "lan4",
+               .port_names[DSA_LOOP_CPU_PORT] = "cpu",
+       },
+       .name = "DSA mockup driver",
+       .enabled_ports = 0x1f,
+       .netdev = "eth0",
+};
+
+static const struct mdio_board_info bdinfo = {
+       .bus_id = "fixed-0",
+       .modalias = "dsa-loop",
+       .mdio_addr = 31,
+       .platform_data = &dsa_loop_pdata,
+};
+
+static int __init dsa_loop_bdinfo_init(void)
+{
+       return mdiobus_register_board_info(&bdinfo, 1);
+}
+arch_initcall(dsa_loop_bdinfo_init)
+
+MODULE_LICENSE("GPL");
index c36be318de1aaf1d6d936ad97194b1524112bcbd..31d37a90cec7f853940664a313296bab6743d3a2 100644 (file)
@@ -1,5 +1,6 @@
 obj-$(CONFIG_NET_DSA_MV88E6XXX) += mv88e6xxx.o
 mv88e6xxx-objs := chip.o
 mv88e6xxx-objs += global1.o
+mv88e6xxx-objs += global1_atu.o
 mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_GLOBAL2) += global2.o
 mv88e6xxx-objs += port.o
index 03dc886ed3d6be1747d5cef7616f2eb3074a5492..44ba8cff5631ca7360106189badc9704c2372100 100644 (file)
@@ -8,6 +8,9 @@
  *
  * Copyright (c) 2016 Andrew Lunn <andrew@lunn.ch>
  *
+ * Copyright (c) 2016-2017 Savoir-faire Linux Inc.
+ *     Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
@@ -687,11 +690,6 @@ static bool mv88e6xxx_6165_family(struct mv88e6xxx_chip *chip)
        return chip->info->family == MV88E6XXX_FAMILY_6165;
 }
 
-static bool mv88e6xxx_6320_family(struct mv88e6xxx_chip *chip)
-{
-       return chip->info->family == MV88E6XXX_FAMILY_6320;
-}
-
 static bool mv88e6xxx_6341_family(struct mv88e6xxx_chip *chip)
 {
        return chip->info->family == MV88E6XXX_FAMILY_6341;
@@ -1066,11 +1064,6 @@ static void mv88e6xxx_get_regs(struct dsa_switch *ds, int port,
        mutex_unlock(&chip->reg_lock);
 }
 
-static int _mv88e6xxx_atu_wait(struct mv88e6xxx_chip *chip)
-{
-       return mv88e6xxx_g1_wait(chip, GLOBAL_ATU_OP, GLOBAL_ATU_OP_BUSY);
-}
-
 static int mv88e6xxx_get_eee(struct dsa_switch *ds, int port,
                             struct ethtool_eee *e)
 {
@@ -1130,143 +1123,42 @@ out:
        return err;
 }
 
-static int _mv88e6xxx_atu_cmd(struct mv88e6xxx_chip *chip, u16 fid, u16 cmd)
-{
-       u16 val;
-       int err;
-
-       if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G1_ATU_FID)) {
-               err = mv88e6xxx_g1_write(chip, GLOBAL_ATU_FID, fid);
-               if (err)
-                       return err;
-       } else if (mv88e6xxx_num_databases(chip) == 256) {
-               /* ATU DBNum[7:4] are located in ATU Control 15:12 */
-               err = mv88e6xxx_g1_read(chip, GLOBAL_ATU_CONTROL, &val);
-               if (err)
-                       return err;
-
-               err = mv88e6xxx_g1_write(chip, GLOBAL_ATU_CONTROL,
-                                        (val & 0xfff) | ((fid << 8) & 0xf000));
-               if (err)
-                       return err;
-
-               /* ATU DBNum[3:0] are located in ATU Operation 3:0 */
-               cmd |= fid & 0xf;
-       }
-
-       err = mv88e6xxx_g1_write(chip, GLOBAL_ATU_OP, cmd);
-       if (err)
-               return err;
-
-       return _mv88e6xxx_atu_wait(chip);
-}
-
-static int _mv88e6xxx_atu_data_write(struct mv88e6xxx_chip *chip,
-                                    struct mv88e6xxx_atu_entry *entry)
-{
-       u16 data = entry->state & GLOBAL_ATU_DATA_STATE_MASK;
-
-       if (entry->state != GLOBAL_ATU_DATA_STATE_UNUSED) {
-               unsigned int mask, shift;
-
-               if (entry->trunk) {
-                       data |= GLOBAL_ATU_DATA_TRUNK;
-                       mask = GLOBAL_ATU_DATA_TRUNK_ID_MASK;
-                       shift = GLOBAL_ATU_DATA_TRUNK_ID_SHIFT;
-               } else {
-                       mask = GLOBAL_ATU_DATA_PORT_VECTOR_MASK;
-                       shift = GLOBAL_ATU_DATA_PORT_VECTOR_SHIFT;
-               }
-
-               data |= (entry->portv_trunkid << shift) & mask;
-       }
-
-       return mv88e6xxx_g1_write(chip, GLOBAL_ATU_DATA, data);
-}
-
-static int _mv88e6xxx_atu_flush_move(struct mv88e6xxx_chip *chip,
-                                    struct mv88e6xxx_atu_entry *entry,
-                                    bool static_too)
+static u16 mv88e6xxx_port_vlan(struct mv88e6xxx_chip *chip, int dev, int port)
 {
-       int op;
-       int err;
-
-       err = _mv88e6xxx_atu_wait(chip);
-       if (err)
-               return err;
-
-       err = _mv88e6xxx_atu_data_write(chip, entry);
-       if (err)
-               return err;
-
-       if (entry->fid) {
-               op = static_too ? GLOBAL_ATU_OP_FLUSH_MOVE_ALL_DB :
-                       GLOBAL_ATU_OP_FLUSH_MOVE_NON_STATIC_DB;
-       } else {
-               op = static_too ? GLOBAL_ATU_OP_FLUSH_MOVE_ALL :
-                       GLOBAL_ATU_OP_FLUSH_MOVE_NON_STATIC;
-       }
-
-       return _mv88e6xxx_atu_cmd(chip, entry->fid, op);
-}
-
-static int _mv88e6xxx_atu_flush(struct mv88e6xxx_chip *chip,
-                               u16 fid, bool static_too)
-{
-       struct mv88e6xxx_atu_entry entry = {
-               .fid = fid,
-               .state = 0, /* EntryState bits must be 0 */
-       };
+       struct dsa_switch *ds = NULL;
+       struct net_device *br;
+       u16 pvlan;
+       int i;
 
-       return _mv88e6xxx_atu_flush_move(chip, &entry, static_too);
-}
+       if (dev < DSA_MAX_SWITCHES)
+               ds = chip->ds->dst->ds[dev];
 
-static int _mv88e6xxx_atu_move(struct mv88e6xxx_chip *chip, u16 fid,
-                              int from_port, int to_port, bool static_too)
-{
-       struct mv88e6xxx_atu_entry entry = {
-               .trunk = false,
-               .fid = fid,
-       };
+       /* Prevent frames from unknown switch or port */
+       if (!ds || port >= ds->num_ports)
+               return 0;
 
-       /* EntryState bits must be 0xF */
-       entry.state = GLOBAL_ATU_DATA_STATE_MASK;
+       /* Frames from DSA links and CPU ports can egress any local port */
+       if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))
+               return mv88e6xxx_port_mask(chip);
 
-       /* ToPort and FromPort are respectively in PortVec bits 7:4 and 3:0 */
-       entry.portv_trunkid = (to_port & 0x0f) << 4;
-       entry.portv_trunkid |= from_port & 0x0f;
+       br = ds->ports[port].bridge_dev;
+       pvlan = 0;
 
-       return _mv88e6xxx_atu_flush_move(chip, &entry, static_too);
-}
+       /* Frames from user ports can egress any local DSA links and CPU ports,
+        * as well as any local member of their bridge group.
+        */
+       for (i = 0; i < mv88e6xxx_num_ports(chip); ++i)
+               if (dsa_is_cpu_port(chip->ds, i) ||
+                   dsa_is_dsa_port(chip->ds, i) ||
+                   (br && chip->ds->ports[i].bridge_dev == br))
+                       pvlan |= BIT(i);
 
-static int _mv88e6xxx_atu_remove(struct mv88e6xxx_chip *chip, u16 fid,
-                                int port, bool static_too)
-{
-       /* Destination port 0xF means remove the entries */
-       return _mv88e6xxx_atu_move(chip, fid, port, 0x0f, static_too);
+       return pvlan;
 }
 
-static int _mv88e6xxx_port_based_vlan_map(struct mv88e6xxx_chip *chip, int port)
+static int mv88e6xxx_port_vlan_map(struct mv88e6xxx_chip *chip, int port)
 {
-       struct dsa_switch *ds = chip->ds;
-       struct net_device *bridge = ds->ports[port].bridge_dev;
-       u16 output_ports = 0;
-       int i;
-
-       /* allow CPU port or DSA link(s) to send frames to every port */
-       if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) {
-               output_ports = ~0;
-       } else {
-               for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) {
-                       /* allow sending frames to every group member */
-                       if (bridge && ds->ports[i].bridge_dev == bridge)
-                               output_ports |= BIT(i);
-
-                       /* allow sending frames to CPU port and DSA link(s) */
-                       if (dsa_is_cpu_port(ds, i) || dsa_is_dsa_port(ds, i))
-                               output_ports |= BIT(i);
-               }
-       }
+       u16 output_ports = mv88e6xxx_port_vlan(chip, chip->ds->index, port);
 
        /* prevent frames from going back out of the port they came in on */
        output_ports &= ~BIT(port);
@@ -1306,13 +1198,68 @@ static void mv88e6xxx_port_stp_state_set(struct dsa_switch *ds, int port,
                netdev_err(ds->ports[port].netdev, "failed to update state\n");
 }
 
+static int mv88e6xxx_atu_setup(struct mv88e6xxx_chip *chip)
+{
+       int err;
+
+       err = mv88e6xxx_g1_atu_flush(chip, 0, true);
+       if (err)
+               return err;
+
+       err = mv88e6xxx_g1_atu_set_learn2all(chip, true);
+       if (err)
+               return err;
+
+       return mv88e6xxx_g1_atu_set_age_time(chip, 300000);
+}
+
+static int mv88e6xxx_pvt_map(struct mv88e6xxx_chip *chip, int dev, int port)
+{
+       u16 pvlan = 0;
+
+       if (!mv88e6xxx_has_pvt(chip))
+               return -EOPNOTSUPP;
+
+       /* Skip the local source device, which uses in-chip port VLAN */
+       if (dev != chip->ds->index)
+               pvlan = mv88e6xxx_port_vlan(chip, dev, port);
+
+       return mv88e6xxx_g2_pvt_write(chip, dev, port, pvlan);
+}
+
+static int mv88e6xxx_pvt_setup(struct mv88e6xxx_chip *chip)
+{
+       int dev, port;
+       int err;
+
+       if (!mv88e6xxx_has_pvt(chip))
+               return 0;
+
+       /* Clear 5 Bit Port for usage with Marvell Link Street devices:
+        * use 4 bits for the Src_Port/Src_Trunk and 5 bits for the Src_Dev.
+        */
+       err = mv88e6xxx_g2_misc_4_bit_port(chip);
+       if (err)
+               return err;
+
+       for (dev = 0; dev < MV88E6XXX_MAX_PVT_SWITCHES; ++dev) {
+               for (port = 0; port < MV88E6XXX_MAX_PVT_PORTS; ++port) {
+                       err = mv88e6xxx_pvt_map(chip, dev, port);
+                       if (err)
+                               return err;
+               }
+       }
+
+       return 0;
+}
+
 static void mv88e6xxx_port_fast_age(struct dsa_switch *ds, int port)
 {
        struct mv88e6xxx_chip *chip = ds->priv;
        int err;
 
        mutex_lock(&chip->reg_lock);
-       err = _mv88e6xxx_atu_remove(chip, 0, port, false);
+       err = mv88e6xxx_g1_atu_remove(chip, 0, port, false);
        mutex_unlock(&chip->reg_lock);
 
        if (err)
@@ -1662,7 +1609,7 @@ loadpurge:
        return _mv88e6xxx_vtu_cmd(chip, GLOBAL_VTU_OP_STU_LOAD_PURGE);
 }
 
-static int _mv88e6xxx_fid_new(struct mv88e6xxx_chip *chip, u16 *fid)
+static int mv88e6xxx_atu_new(struct mv88e6xxx_chip *chip, u16 *fid)
 {
        DECLARE_BITMAP(fid_bitmap, MV88E6XXX_N_FID);
        struct mv88e6xxx_vtu_entry vlan;
@@ -1703,7 +1650,7 @@ static int _mv88e6xxx_fid_new(struct mv88e6xxx_chip *chip, u16 *fid)
                return -ENOSPC;
 
        /* Clear the database */
-       return _mv88e6xxx_atu_flush(chip, *fid, true);
+       return mv88e6xxx_g1_atu_flush(chip, *fid, true);
 }
 
 static int _mv88e6xxx_vtu_new(struct mv88e6xxx_chip *chip, u16 vid,
@@ -1716,7 +1663,7 @@ static int _mv88e6xxx_vtu_new(struct mv88e6xxx_chip *chip, u16 vid,
        };
        int i, err;
 
-       err = _mv88e6xxx_fid_new(chip, &vlan.fid);
+       err = mv88e6xxx_atu_new(chip, &vlan.fid);
        if (err)
                return err;
 
@@ -1964,7 +1911,7 @@ static int _mv88e6xxx_port_vlan_del(struct mv88e6xxx_chip *chip,
        if (err)
                return err;
 
-       return _mv88e6xxx_atu_remove(chip, vlan.fid, port, false);
+       return mv88e6xxx_g1_atu_remove(chip, vlan.fid, port, false);
 }
 
 static int mv88e6xxx_port_vlan_del(struct dsa_switch *ds, int port,
@@ -2001,96 +1948,6 @@ unlock:
        return err;
 }
 
-static int _mv88e6xxx_atu_mac_write(struct mv88e6xxx_chip *chip,
-                                   const unsigned char *addr)
-{
-       int i, err;
-
-       for (i = 0; i < 3; i++) {
-               err = mv88e6xxx_g1_write(chip, GLOBAL_ATU_MAC_01 + i,
-                                        (addr[i * 2] << 8) | addr[i * 2 + 1]);
-               if (err)
-                       return err;
-       }
-
-       return 0;
-}
-
-static int _mv88e6xxx_atu_mac_read(struct mv88e6xxx_chip *chip,
-                                  unsigned char *addr)
-{
-       u16 val;
-       int i, err;
-
-       for (i = 0; i < 3; i++) {
-               err = mv88e6xxx_g1_read(chip, GLOBAL_ATU_MAC_01 + i, &val);
-               if (err)
-                       return err;
-
-               addr[i * 2] = val >> 8;
-               addr[i * 2 + 1] = val & 0xff;
-       }
-
-       return 0;
-}
-
-static int _mv88e6xxx_atu_load(struct mv88e6xxx_chip *chip,
-                              struct mv88e6xxx_atu_entry *entry)
-{
-       int ret;
-
-       ret = _mv88e6xxx_atu_wait(chip);
-       if (ret < 0)
-               return ret;
-
-       ret = _mv88e6xxx_atu_mac_write(chip, entry->mac);
-       if (ret < 0)
-               return ret;
-
-       ret = _mv88e6xxx_atu_data_write(chip, entry);
-       if (ret < 0)
-               return ret;
-
-       return _mv88e6xxx_atu_cmd(chip, entry->fid, GLOBAL_ATU_OP_LOAD_DB);
-}
-
-static int _mv88e6xxx_atu_getnext(struct mv88e6xxx_chip *chip, u16 fid,
-                                 struct mv88e6xxx_atu_entry *entry);
-
-static int mv88e6xxx_atu_get(struct mv88e6xxx_chip *chip, int fid,
-                            const u8 *addr, struct mv88e6xxx_atu_entry *entry)
-{
-       struct mv88e6xxx_atu_entry next;
-       int err;
-
-       memcpy(next.mac, addr, ETH_ALEN);
-       eth_addr_dec(next.mac);
-
-       err = _mv88e6xxx_atu_mac_write(chip, next.mac);
-       if (err)
-               return err;
-
-       do {
-               err = _mv88e6xxx_atu_getnext(chip, fid, &next);
-               if (err)
-                       return err;
-
-               if (next.state == GLOBAL_ATU_DATA_STATE_UNUSED)
-                       break;
-
-               if (ether_addr_equal(next.mac, addr)) {
-                       *entry = next;
-                       return 0;
-               }
-       } while (ether_addr_greater(addr, next.mac));
-
-       memset(entry, 0, sizeof(*entry));
-       entry->fid = fid;
-       ether_addr_copy(entry->mac, addr);
-
-       return 0;
-}
-
 static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port,
                                        const unsigned char *addr, u16 vid,
                                        u8 state)
@@ -2107,21 +1964,32 @@ static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port,
        if (err)
                return err;
 
-       err = mv88e6xxx_atu_get(chip, vlan.fid, addr, &entry);
+       entry.state = GLOBAL_ATU_DATA_STATE_UNUSED;
+       ether_addr_copy(entry.mac, addr);
+       eth_addr_dec(entry.mac);
+
+       err = mv88e6xxx_g1_atu_getnext(chip, vlan.fid, &entry);
        if (err)
                return err;
 
+       /* Initialize a fresh ATU entry if it isn't found */
+       if (entry.state == GLOBAL_ATU_DATA_STATE_UNUSED ||
+           !ether_addr_equal(entry.mac, addr)) {
+               memset(&entry, 0, sizeof(entry));
+               ether_addr_copy(entry.mac, addr);
+       }
+
        /* Purge the ATU entry only if no port is using it anymore */
        if (state == GLOBAL_ATU_DATA_STATE_UNUSED) {
-               entry.portv_trunkid &= ~BIT(port);
-               if (!entry.portv_trunkid)
+               entry.portvec &= ~BIT(port);
+               if (!entry.portvec)
                        entry.state = GLOBAL_ATU_DATA_STATE_UNUSED;
        } else {
-               entry.portv_trunkid |= BIT(port);
+               entry.portvec |= BIT(port);
                entry.state = state;
        }
 
-       return _mv88e6xxx_atu_load(chip, &entry);
+       return mv88e6xxx_g1_atu_loadpurge(chip, vlan.fid, &entry);
 }
 
 static int mv88e6xxx_port_fdb_prepare(struct dsa_switch *ds, int port,
@@ -2161,75 +2029,26 @@ static int mv88e6xxx_port_fdb_del(struct dsa_switch *ds, int port,
        return err;
 }
 
-static int _mv88e6xxx_atu_getnext(struct mv88e6xxx_chip *chip, u16 fid,
-                                 struct mv88e6xxx_atu_entry *entry)
-{
-       struct mv88e6xxx_atu_entry next = { 0 };
-       u16 val;
-       int err;
-
-       next.fid = fid;
-
-       err = _mv88e6xxx_atu_wait(chip);
-       if (err)
-               return err;
-
-       err = _mv88e6xxx_atu_cmd(chip, fid, GLOBAL_ATU_OP_GET_NEXT_DB);
-       if (err)
-               return err;
-
-       err = _mv88e6xxx_atu_mac_read(chip, next.mac);
-       if (err)
-               return err;
-
-       err = mv88e6xxx_g1_read(chip, GLOBAL_ATU_DATA, &val);
-       if (err)
-               return err;
-
-       next.state = val & GLOBAL_ATU_DATA_STATE_MASK;
-       if (next.state != GLOBAL_ATU_DATA_STATE_UNUSED) {
-               unsigned int mask, shift;
-
-               if (val & GLOBAL_ATU_DATA_TRUNK) {
-                       next.trunk = true;
-                       mask = GLOBAL_ATU_DATA_TRUNK_ID_MASK;
-                       shift = GLOBAL_ATU_DATA_TRUNK_ID_SHIFT;
-               } else {
-                       next.trunk = false;
-                       mask = GLOBAL_ATU_DATA_PORT_VECTOR_MASK;
-                       shift = GLOBAL_ATU_DATA_PORT_VECTOR_SHIFT;
-               }
-
-               next.portv_trunkid = (val & mask) >> shift;
-       }
-
-       *entry = next;
-       return 0;
-}
-
 static int mv88e6xxx_port_db_dump_fid(struct mv88e6xxx_chip *chip,
                                      u16 fid, u16 vid, int port,
                                      struct switchdev_obj *obj,
                                      int (*cb)(struct switchdev_obj *obj))
 {
-       struct mv88e6xxx_atu_entry addr = {
-               .mac = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
-       };
+       struct mv88e6xxx_atu_entry addr;
        int err;
 
-       err = _mv88e6xxx_atu_mac_write(chip, addr.mac);
-       if (err)
-               return err;
+       addr.state = GLOBAL_ATU_DATA_STATE_UNUSED;
+       eth_broadcast_addr(addr.mac);
 
        do {
-               err = _mv88e6xxx_atu_getnext(chip, fid, &addr);
+               err = mv88e6xxx_g1_atu_getnext(chip, fid, &addr);
                if (err)
                        return err;
 
                if (addr.state == GLOBAL_ATU_DATA_STATE_UNUSED)
                        break;
 
-               if (addr.trunk || (addr.portv_trunkid & BIT(port)) == 0)
+               if (addr.trunk || (addr.portvec & BIT(port)) == 0)
                        continue;
 
                if (obj->id == SWITCHDEV_OBJ_ID_PORT_FDB) {
@@ -2321,23 +2140,52 @@ static int mv88e6xxx_port_fdb_dump(struct dsa_switch *ds, int port,
        return err;
 }
 
-static int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, int port,
-                                     struct net_device *br)
+static int mv88e6xxx_bridge_map(struct mv88e6xxx_chip *chip,
+                               struct net_device *br)
 {
-       struct mv88e6xxx_chip *chip = ds->priv;
-       int i, err = 0;
-
-       mutex_lock(&chip->reg_lock);
+       struct dsa_switch *ds;
+       int port;
+       int dev;
+       int err;
 
-       /* Remap each port's VLANTable */
-       for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) {
-               if (ds->ports[i].bridge_dev == br) {
-                       err = _mv88e6xxx_port_based_vlan_map(chip, i);
+       /* Remap the Port VLAN of each local bridge group member */
+       for (port = 0; port < mv88e6xxx_num_ports(chip); ++port) {
+               if (chip->ds->ports[port].bridge_dev == br) {
+                       err = mv88e6xxx_port_vlan_map(chip, port);
                        if (err)
-                               break;
+                               return err;
                }
        }
 
+       if (!mv88e6xxx_has_pvt(chip))
+               return 0;
+
+       /* Remap the Port VLAN of each cross-chip bridge group member */
+       for (dev = 0; dev < DSA_MAX_SWITCHES; ++dev) {
+               ds = chip->ds->dst->ds[dev];
+               if (!ds)
+                       break;
+
+               for (port = 0; port < ds->num_ports; ++port) {
+                       if (ds->ports[port].bridge_dev == br) {
+                               err = mv88e6xxx_pvt_map(chip, dev, port);
+                               if (err)
+                                       return err;
+                       }
+               }
+       }
+
+       return 0;
+}
+
+static int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, int port,
+                                     struct net_device *br)
+{
+       struct mv88e6xxx_chip *chip = ds->priv;
+       int err;
+
+       mutex_lock(&chip->reg_lock);
+       err = mv88e6xxx_bridge_map(chip, br);
        mutex_unlock(&chip->reg_lock);
 
        return err;
@@ -2347,17 +2195,41 @@ static void mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, int port,
                                        struct net_device *br)
 {
        struct mv88e6xxx_chip *chip = ds->priv;
-       int i;
 
        mutex_lock(&chip->reg_lock);
+       if (mv88e6xxx_bridge_map(chip, br) ||
+           mv88e6xxx_port_vlan_map(chip, port))
+               dev_err(ds->dev, "failed to remap in-chip Port VLAN\n");
+       mutex_unlock(&chip->reg_lock);
+}
 
-       /* Remap each port's VLANTable */
-       for (i = 0; i < mv88e6xxx_num_ports(chip); ++i)
-               if (i == port || ds->ports[i].bridge_dev == br)
-                       if (_mv88e6xxx_port_based_vlan_map(chip, i))
-                               netdev_warn(ds->ports[i].netdev,
-                                           "failed to remap\n");
+static int mv88e6xxx_crosschip_bridge_join(struct dsa_switch *ds, int dev,
+                                          int port, struct net_device *br)
+{
+       struct mv88e6xxx_chip *chip = ds->priv;
+       int err;
+
+       if (!mv88e6xxx_has_pvt(chip))
+               return 0;
 
+       mutex_lock(&chip->reg_lock);
+       err = mv88e6xxx_pvt_map(chip, dev, port);
+       mutex_unlock(&chip->reg_lock);
+
+       return err;
+}
+
+static void mv88e6xxx_crosschip_bridge_leave(struct dsa_switch *ds, int dev,
+                                            int port, struct net_device *br)
+{
+       struct mv88e6xxx_chip *chip = ds->priv;
+
+       if (!mv88e6xxx_has_pvt(chip))
+               return;
+
+       mutex_lock(&chip->reg_lock);
+       if (mv88e6xxx_pvt_map(chip, dev, port))
+               dev_err(ds->dev, "failed to remap cross-chip Port VLAN\n");
        mutex_unlock(&chip->reg_lock);
 }
 
@@ -2433,70 +2305,85 @@ static int mv88e6xxx_serdes_power_on(struct mv88e6xxx_chip *chip)
        return err;
 }
 
-static int mv88e6xxx_setup_port_dsa(struct mv88e6xxx_chip *chip, int port,
-                                   int upstream_port)
+static int mv88e6xxx_set_port_mode(struct mv88e6xxx_chip *chip, int port,
+                                  enum mv88e6xxx_frame_mode frame, u16 egress,
+                                  u16 etype)
 {
        int err;
 
-       err = chip->info->ops->port_set_frame_mode(
-               chip, port, MV88E6XXX_FRAME_MODE_DSA);
+       if (!chip->info->ops->port_set_frame_mode)
+               return -EOPNOTSUPP;
+
+       err = mv88e6xxx_port_set_egress_mode(chip, port, egress);
        if (err)
                return err;
 
-       return chip->info->ops->port_set_egress_unknowns(
-               chip, port, port == upstream_port);
+       err = chip->info->ops->port_set_frame_mode(chip, port, frame);
+       if (err)
+               return err;
+
+       if (chip->info->ops->port_set_ether_type)
+               return chip->info->ops->port_set_ether_type(chip, port, etype);
+
+       return 0;
 }
 
-static int mv88e6xxx_setup_port_cpu(struct mv88e6xxx_chip *chip, int port)
+static int mv88e6xxx_set_port_mode_normal(struct mv88e6xxx_chip *chip, int port)
 {
-       int err;
+       return mv88e6xxx_set_port_mode(chip, port, MV88E6XXX_FRAME_MODE_NORMAL,
+                                      PORT_CONTROL_EGRESS_UNMODIFIED,
+                                      PORT_ETH_TYPE_DEFAULT);
+}
 
-       switch (chip->info->tag_protocol) {
-       case DSA_TAG_PROTO_EDSA:
-               err = chip->info->ops->port_set_frame_mode(
-                       chip, port, MV88E6XXX_FRAME_MODE_ETHERTYPE);
-               if (err)
-                       return err;
+static int mv88e6xxx_set_port_mode_dsa(struct mv88e6xxx_chip *chip, int port)
+{
+       return mv88e6xxx_set_port_mode(chip, port, MV88E6XXX_FRAME_MODE_DSA,
+                                      PORT_CONTROL_EGRESS_UNMODIFIED,
+                                      PORT_ETH_TYPE_DEFAULT);
+}
 
-               err = mv88e6xxx_port_set_egress_mode(
-                       chip, port, PORT_CONTROL_EGRESS_ADD_TAG);
-               if (err)
-                       return err;
+static int mv88e6xxx_set_port_mode_edsa(struct mv88e6xxx_chip *chip, int port)
+{
+       return mv88e6xxx_set_port_mode(chip, port,
+                                      MV88E6XXX_FRAME_MODE_ETHERTYPE,
+                                      PORT_CONTROL_EGRESS_ADD_TAG, ETH_P_EDSA);
+}
 
-               if (chip->info->ops->port_set_ether_type)
-                       err = chip->info->ops->port_set_ether_type(
-                               chip, port, ETH_P_EDSA);
-               break;
+static int mv88e6xxx_setup_port_mode(struct mv88e6xxx_chip *chip, int port)
+{
+       if (dsa_is_dsa_port(chip->ds, port))
+               return mv88e6xxx_set_port_mode_dsa(chip, port);
 
-       case DSA_TAG_PROTO_DSA:
-               err = chip->info->ops->port_set_frame_mode(
-                       chip, port, MV88E6XXX_FRAME_MODE_DSA);
-               if (err)
-                       return err;
+       if (dsa_is_normal_port(chip->ds, port))
+               return mv88e6xxx_set_port_mode_normal(chip, port);
 
-               err = mv88e6xxx_port_set_egress_mode(
-                       chip, port, PORT_CONTROL_EGRESS_UNMODIFIED);
-               break;
-       default:
-               err = -EINVAL;
-       }
+       /* Setup CPU port mode depending on its supported tag format */
+       if (chip->info->tag_protocol == DSA_TAG_PROTO_DSA)
+               return mv88e6xxx_set_port_mode_dsa(chip, port);
 
-       if (err)
-               return err;
+       if (chip->info->tag_protocol == DSA_TAG_PROTO_EDSA)
+               return mv88e6xxx_set_port_mode_edsa(chip, port);
 
-       return chip->info->ops->port_set_egress_unknowns(chip, port, true);
+       return -EINVAL;
 }
 
-static int mv88e6xxx_setup_port_normal(struct mv88e6xxx_chip *chip, int port)
+static int mv88e6xxx_setup_message_port(struct mv88e6xxx_chip *chip, int port)
 {
-       int err;
+       bool message = dsa_is_dsa_port(chip->ds, port);
 
-       err = chip->info->ops->port_set_frame_mode(
-               chip, port, MV88E6XXX_FRAME_MODE_NORMAL);
-       if (err)
-               return err;
+       return mv88e6xxx_port_set_message_port(chip, port, message);
+}
 
-       return chip->info->ops->port_set_egress_unknowns(chip, port, false);
+static int mv88e6xxx_setup_egress_floods(struct mv88e6xxx_chip *chip, int port)
+{
+       bool flood = port == dsa_upstream_port(chip->ds);
+
+       /* Upstream ports flood frames with unknown unicast or multicast DA */
+       if (chip->info->ops->port_set_egress_floods)
+               return chip->info->ops->port_set_egress_floods(chip, port,
+                                                              flood, flood);
+
+       return 0;
 }
 
 static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
@@ -2541,14 +2428,11 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
        if (err)
                return err;
 
-       if (dsa_is_cpu_port(ds, port)) {
-               err = mv88e6xxx_setup_port_cpu(chip, port);
-       } else if (dsa_is_dsa_port(ds, port)) {
-               err = mv88e6xxx_setup_port_dsa(chip, port,
-                                              dsa_upstream_port(ds));
-       } else {
-               err = mv88e6xxx_setup_port_normal(chip, port);
-       }
+       err = mv88e6xxx_setup_port_mode(chip, port);
+       if (err)
+               return err;
+
+       err = mv88e6xxx_setup_egress_floods(chip, port);
        if (err)
                return err;
 
@@ -2623,20 +2507,14 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
                        return err;
        }
 
-       if (mv88e6xxx_6352_family(chip) || mv88e6xxx_6351_family(chip) ||
-           mv88e6xxx_6165_family(chip) || mv88e6xxx_6097_family(chip) ||
-           mv88e6xxx_6320_family(chip) || mv88e6xxx_6341_family(chip)) {
-               /* Port ATU control: disable limiting the number of
-                * address database entries that this port is allowed
-                * to use.
-                */
-               err = mv88e6xxx_port_write(chip, port, PORT_ATU_CONTROL,
-                                          0x0000);
-               /* Priority Override: disable DA, SA and VTU priority
-                * override.
-                */
-               err = mv88e6xxx_port_write(chip, port, PORT_PRI_OVERRIDE,
-                                          0x0000);
+       if (chip->info->ops->port_disable_learn_limit) {
+               err = chip->info->ops->port_disable_learn_limit(chip, port);
+               if (err)
+                       return err;
+       }
+
+       if (chip->info->ops->port_disable_pri_override) {
+               err = chip->info->ops->port_disable_pri_override(chip, port);
                if (err)
                        return err;
        }
@@ -2653,10 +2531,7 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
                        return err;
        }
 
-       /* Port Control 1: disable trunking, disable sending
-        * learning messages to this port.
-        */
-       err = mv88e6xxx_port_write(chip, port, PORT_CONTROL_1, 0x0000);
+       err = mv88e6xxx_setup_message_port(chip, port);
        if (err)
                return err;
 
@@ -2668,7 +2543,7 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
        if (err)
                return err;
 
-       err = _mv88e6xxx_port_based_vlan_map(chip, port);
+       err = mv88e6xxx_port_vlan_map(chip, port);
        if (err)
                return err;
 
@@ -2697,33 +2572,6 @@ static int mv88e6xxx_g1_set_switch_mac(struct mv88e6xxx_chip *chip, u8 *addr)
        return 0;
 }
 
-static int mv88e6xxx_g1_set_age_time(struct mv88e6xxx_chip *chip,
-                                    unsigned int msecs)
-{
-       const unsigned int coeff = chip->info->age_time_coeff;
-       const unsigned int min = 0x01 * coeff;
-       const unsigned int max = 0xff * coeff;
-       u8 age_time;
-       u16 val;
-       int err;
-
-       if (msecs < min || msecs > max)
-               return -ERANGE;
-
-       /* Round to nearest multiple of coeff */
-       age_time = (msecs + coeff / 2) / coeff;
-
-       err = mv88e6xxx_g1_read(chip, GLOBAL_ATU_CONTROL, &val);
-       if (err)
-               return err;
-
-       /* AgeTime is 11:4 bits */
-       val &= ~0xff0;
-       val |= age_time << 4;
-
-       return mv88e6xxx_g1_write(chip, GLOBAL_ATU_CONTROL, val);
-}
-
 static int mv88e6xxx_set_ageing_time(struct dsa_switch *ds,
                                     unsigned int ageing_time)
 {
@@ -2731,7 +2579,7 @@ static int mv88e6xxx_set_ageing_time(struct dsa_switch *ds,
        int err;
 
        mutex_lock(&chip->reg_lock);
-       err = mv88e6xxx_g1_set_age_time(chip, ageing_time);
+       err = mv88e6xxx_g1_atu_set_age_time(chip, ageing_time);
        mutex_unlock(&chip->reg_lock);
 
        return err;
@@ -2774,24 +2622,6 @@ static int mv88e6xxx_g1_setup(struct mv88e6xxx_chip *chip)
        if (err < 0)
                return err;
 
-       /* Set the default address aging time to 5 minutes, and
-        * enable address learn messages to be sent to all message
-        * ports.
-        */
-       err = mv88e6xxx_g1_write(chip, GLOBAL_ATU_CONTROL,
-                                GLOBAL_ATU_CONTROL_LEARN2ALL);
-       if (err)
-               return err;
-
-       err = mv88e6xxx_g1_set_age_time(chip, 300000);
-       if (err)
-               return err;
-
-       /* Clear all ATU entries */
-       err = _mv88e6xxx_atu_flush(chip, 0, true);
-       if (err)
-               return err;
-
        /* Configure the IP ToS mapping registers. */
        err = mv88e6xxx_g1_write(chip, GLOBAL_IP_PRI_0, 0x0000);
        if (err)
@@ -2872,6 +2702,14 @@ static int mv88e6xxx_setup(struct dsa_switch *ds)
                        goto unlock;
        }
 
+       err = mv88e6xxx_pvt_setup(chip);
+       if (err)
+               goto unlock;
+
+       err = mv88e6xxx_atu_setup(chip);
+       if (err)
+               goto unlock;
+
        /* Some generations have the configuration of sending reserved
         * management frames to the CPU in global2, others in
         * global1. Hence it does not fit the two setup functions
@@ -3101,10 +2939,12 @@ static const struct mv88e6xxx_ops mv88e6085_ops = {
        .port_set_speed = mv88e6185_port_set_speed,
        .port_tag_remap = mv88e6095_port_tag_remap,
        .port_set_frame_mode = mv88e6351_port_set_frame_mode,
-       .port_set_egress_unknowns = mv88e6351_port_set_egress_unknowns,
+       .port_set_egress_floods = mv88e6352_port_set_egress_floods,
        .port_set_ether_type = mv88e6351_port_set_ether_type,
        .port_egress_rate_limiting = mv88e6097_port_egress_rate_limiting,
        .port_pause_config = mv88e6097_port_pause_config,
+       .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit,
+       .port_disable_pri_override = mv88e6xxx_port_disable_pri_override,
        .stats_snapshot = mv88e6xxx_g1_stats_snapshot,
        .stats_get_sset_count = mv88e6095_stats_get_sset_count,
        .stats_get_strings = mv88e6095_stats_get_strings,
@@ -3127,7 +2967,7 @@ static const struct mv88e6xxx_ops mv88e6095_ops = {
        .port_set_duplex = mv88e6xxx_port_set_duplex,
        .port_set_speed = mv88e6185_port_set_speed,
        .port_set_frame_mode = mv88e6085_port_set_frame_mode,
-       .port_set_egress_unknowns = mv88e6095_port_set_egress_unknowns,
+       .port_set_egress_floods = mv88e6185_port_set_egress_floods,
        .port_set_upstream_port = mv88e6095_port_set_upstream_port,
        .stats_snapshot = mv88e6xxx_g1_stats_snapshot,
        .stats_get_sset_count = mv88e6095_stats_get_sset_count,
@@ -3149,11 +2989,13 @@ static const struct mv88e6xxx_ops mv88e6097_ops = {
        .port_set_speed = mv88e6185_port_set_speed,
        .port_tag_remap = mv88e6095_port_tag_remap,
        .port_set_frame_mode = mv88e6351_port_set_frame_mode,
-       .port_set_egress_unknowns = mv88e6351_port_set_egress_unknowns,
+       .port_set_egress_floods = mv88e6352_port_set_egress_floods,
        .port_set_ether_type = mv88e6351_port_set_ether_type,
        .port_jumbo_config = mv88e6165_port_jumbo_config,
        .port_egress_rate_limiting = mv88e6095_port_egress_rate_limiting,
        .port_pause_config = mv88e6097_port_pause_config,
+       .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit,
+       .port_disable_pri_override = mv88e6xxx_port_disable_pri_override,
        .stats_snapshot = mv88e6xxx_g1_stats_snapshot,
        .stats_get_sset_count = mv88e6095_stats_get_sset_count,
        .stats_get_strings = mv88e6095_stats_get_strings,
@@ -3174,7 +3016,9 @@ static const struct mv88e6xxx_ops mv88e6123_ops = {
        .port_set_duplex = mv88e6xxx_port_set_duplex,
        .port_set_speed = mv88e6185_port_set_speed,
        .port_set_frame_mode = mv88e6085_port_set_frame_mode,
-       .port_set_egress_unknowns = mv88e6085_port_set_egress_unknowns,
+       .port_set_egress_floods = mv88e6352_port_set_egress_floods,
+       .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit,
+       .port_disable_pri_override = mv88e6xxx_port_disable_pri_override,
        .stats_snapshot = mv88e6xxx_g1_stats_snapshot,
        .stats_get_sset_count = mv88e6095_stats_get_sset_count,
        .stats_get_strings = mv88e6095_stats_get_strings,
@@ -3196,7 +3040,7 @@ static const struct mv88e6xxx_ops mv88e6131_ops = {
        .port_set_speed = mv88e6185_port_set_speed,
        .port_tag_remap = mv88e6095_port_tag_remap,
        .port_set_frame_mode = mv88e6351_port_set_frame_mode,
-       .port_set_egress_unknowns = mv88e6095_port_set_egress_unknowns,
+       .port_set_egress_floods = mv88e6185_port_set_egress_floods,
        .port_set_ether_type = mv88e6351_port_set_ether_type,
        .port_set_upstream_port = mv88e6095_port_set_upstream_port,
        .port_jumbo_config = mv88e6165_port_jumbo_config,
@@ -3215,6 +3059,37 @@ static const struct mv88e6xxx_ops mv88e6131_ops = {
        .reset = mv88e6185_g1_reset,
 };
 
+static const struct mv88e6xxx_ops mv88e6141_ops = {
+       /* MV88E6XXX_FAMILY_6341 */
+       .get_eeprom = mv88e6xxx_g2_get_eeprom8,
+       .set_eeprom = mv88e6xxx_g2_set_eeprom8,
+       .set_switch_mac = mv88e6xxx_g2_set_switch_mac,
+       .phy_read = mv88e6xxx_g2_smi_phy_read,
+       .phy_write = mv88e6xxx_g2_smi_phy_write,
+       .port_set_link = mv88e6xxx_port_set_link,
+       .port_set_duplex = mv88e6xxx_port_set_duplex,
+       .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay,
+       .port_set_speed = mv88e6390_port_set_speed,
+       .port_tag_remap = mv88e6095_port_tag_remap,
+       .port_set_frame_mode = mv88e6351_port_set_frame_mode,
+       .port_set_egress_floods = mv88e6352_port_set_egress_floods,
+       .port_set_ether_type = mv88e6351_port_set_ether_type,
+       .port_jumbo_config = mv88e6165_port_jumbo_config,
+       .port_egress_rate_limiting = mv88e6097_port_egress_rate_limiting,
+       .port_pause_config = mv88e6097_port_pause_config,
+       .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit,
+       .port_disable_pri_override = mv88e6xxx_port_disable_pri_override,
+       .stats_snapshot = mv88e6390_g1_stats_snapshot,
+       .stats_get_sset_count = mv88e6320_stats_get_sset_count,
+       .stats_get_strings = mv88e6320_stats_get_strings,
+       .stats_get_stats = mv88e6390_stats_get_stats,
+       .g1_set_cpu_port = mv88e6390_g1_set_cpu_port,
+       .g1_set_egress_port = mv88e6390_g1_set_egress_port,
+       .watchdog_ops = &mv88e6390_watchdog_ops,
+       .mgmt_rsvd2cpu =  mv88e6390_g1_mgmt_rsvd2cpu,
+       .reset = mv88e6352_g1_reset,
+};
+
 static const struct mv88e6xxx_ops mv88e6161_ops = {
        /* MV88E6XXX_FAMILY_6165 */
        .set_switch_mac = mv88e6xxx_g2_set_switch_mac,
@@ -3225,11 +3100,13 @@ static const struct mv88e6xxx_ops mv88e6161_ops = {
        .port_set_speed = mv88e6185_port_set_speed,
        .port_tag_remap = mv88e6095_port_tag_remap,
        .port_set_frame_mode = mv88e6351_port_set_frame_mode,
-       .port_set_egress_unknowns = mv88e6351_port_set_egress_unknowns,
+       .port_set_egress_floods = mv88e6352_port_set_egress_floods,
        .port_set_ether_type = mv88e6351_port_set_ether_type,
        .port_jumbo_config = mv88e6165_port_jumbo_config,
        .port_egress_rate_limiting = mv88e6097_port_egress_rate_limiting,
        .port_pause_config = mv88e6097_port_pause_config,
+       .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit,
+       .port_disable_pri_override = mv88e6xxx_port_disable_pri_override,
        .stats_snapshot = mv88e6xxx_g1_stats_snapshot,
        .stats_get_sset_count = mv88e6095_stats_get_sset_count,
        .stats_get_strings = mv88e6095_stats_get_strings,
@@ -3249,6 +3126,8 @@ static const struct mv88e6xxx_ops mv88e6165_ops = {
        .port_set_link = mv88e6xxx_port_set_link,
        .port_set_duplex = mv88e6xxx_port_set_duplex,
        .port_set_speed = mv88e6185_port_set_speed,
+       .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit,
+       .port_disable_pri_override = mv88e6xxx_port_disable_pri_override,
        .stats_snapshot = mv88e6xxx_g1_stats_snapshot,
        .stats_get_sset_count = mv88e6095_stats_get_sset_count,
        .stats_get_strings = mv88e6095_stats_get_strings,
@@ -3271,11 +3150,13 @@ static const struct mv88e6xxx_ops mv88e6171_ops = {
        .port_set_speed = mv88e6185_port_set_speed,
        .port_tag_remap = mv88e6095_port_tag_remap,
        .port_set_frame_mode = mv88e6351_port_set_frame_mode,
-       .port_set_egress_unknowns = mv88e6351_port_set_egress_unknowns,
+       .port_set_egress_floods = mv88e6352_port_set_egress_floods,
        .port_set_ether_type = mv88e6351_port_set_ether_type,
        .port_jumbo_config = mv88e6165_port_jumbo_config,
        .port_egress_rate_limiting = mv88e6097_port_egress_rate_limiting,
        .port_pause_config = mv88e6097_port_pause_config,
+       .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit,
+       .port_disable_pri_override = mv88e6xxx_port_disable_pri_override,
        .stats_snapshot = mv88e6320_g1_stats_snapshot,
        .stats_get_sset_count = mv88e6095_stats_get_sset_count,
        .stats_get_strings = mv88e6095_stats_get_strings,
@@ -3300,11 +3181,13 @@ static const struct mv88e6xxx_ops mv88e6172_ops = {
        .port_set_speed = mv88e6352_port_set_speed,
        .port_tag_remap = mv88e6095_port_tag_remap,
        .port_set_frame_mode = mv88e6351_port_set_frame_mode,
-       .port_set_egress_unknowns = mv88e6351_port_set_egress_unknowns,
+       .port_set_egress_floods = mv88e6352_port_set_egress_floods,
        .port_set_ether_type = mv88e6351_port_set_ether_type,
        .port_jumbo_config = mv88e6165_port_jumbo_config,
        .port_egress_rate_limiting = mv88e6097_port_egress_rate_limiting,
        .port_pause_config = mv88e6097_port_pause_config,
+       .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit,
+       .port_disable_pri_override = mv88e6xxx_port_disable_pri_override,
        .stats_snapshot = mv88e6320_g1_stats_snapshot,
        .stats_get_sset_count = mv88e6095_stats_get_sset_count,
        .stats_get_strings = mv88e6095_stats_get_strings,
@@ -3327,11 +3210,13 @@ static const struct mv88e6xxx_ops mv88e6175_ops = {
        .port_set_speed = mv88e6185_port_set_speed,
        .port_tag_remap = mv88e6095_port_tag_remap,
        .port_set_frame_mode = mv88e6351_port_set_frame_mode,
-       .port_set_egress_unknowns = mv88e6351_port_set_egress_unknowns,
+       .port_set_egress_floods = mv88e6352_port_set_egress_floods,
        .port_set_ether_type = mv88e6351_port_set_ether_type,
        .port_jumbo_config = mv88e6165_port_jumbo_config,
        .port_egress_rate_limiting = mv88e6097_port_egress_rate_limiting,
        .port_pause_config = mv88e6097_port_pause_config,
+       .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit,
+       .port_disable_pri_override = mv88e6xxx_port_disable_pri_override,
        .stats_snapshot = mv88e6320_g1_stats_snapshot,
        .stats_get_sset_count = mv88e6095_stats_get_sset_count,
        .stats_get_strings = mv88e6095_stats_get_strings,
@@ -3356,11 +3241,13 @@ static const struct mv88e6xxx_ops mv88e6176_ops = {
        .port_set_speed = mv88e6352_port_set_speed,
        .port_tag_remap = mv88e6095_port_tag_remap,
        .port_set_frame_mode = mv88e6351_port_set_frame_mode,
-       .port_set_egress_unknowns = mv88e6351_port_set_egress_unknowns,
+       .port_set_egress_floods = mv88e6352_port_set_egress_floods,
        .port_set_ether_type = mv88e6351_port_set_ether_type,
        .port_jumbo_config = mv88e6165_port_jumbo_config,
        .port_egress_rate_limiting = mv88e6097_port_egress_rate_limiting,
        .port_pause_config = mv88e6097_port_pause_config,
+       .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit,
+       .port_disable_pri_override = mv88e6xxx_port_disable_pri_override,
        .stats_snapshot = mv88e6320_g1_stats_snapshot,
        .stats_get_sset_count = mv88e6095_stats_get_sset_count,
        .stats_get_strings = mv88e6095_stats_get_strings,
@@ -3381,7 +3268,7 @@ static const struct mv88e6xxx_ops mv88e6185_ops = {
        .port_set_duplex = mv88e6xxx_port_set_duplex,
        .port_set_speed = mv88e6185_port_set_speed,
        .port_set_frame_mode = mv88e6085_port_set_frame_mode,
-       .port_set_egress_unknowns = mv88e6095_port_set_egress_unknowns,
+       .port_set_egress_floods = mv88e6185_port_set_egress_floods,
        .port_egress_rate_limiting = mv88e6095_port_egress_rate_limiting,
        .port_set_upstream_port = mv88e6095_port_set_upstream_port,
        .stats_snapshot = mv88e6xxx_g1_stats_snapshot,
@@ -3410,9 +3297,11 @@ static const struct mv88e6xxx_ops mv88e6190_ops = {
        .port_set_speed = mv88e6390_port_set_speed,
        .port_tag_remap = mv88e6390_port_tag_remap,
        .port_set_frame_mode = mv88e6351_port_set_frame_mode,
-       .port_set_egress_unknowns = mv88e6351_port_set_egress_unknowns,
+       .port_set_egress_floods = mv88e6352_port_set_egress_floods,
        .port_set_ether_type = mv88e6351_port_set_ether_type,
        .port_pause_config = mv88e6390_port_pause_config,
+       .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit,
+       .port_disable_pri_override = mv88e6xxx_port_disable_pri_override,
        .stats_snapshot = mv88e6390_g1_stats_snapshot,
        .stats_set_histogram = mv88e6390_g1_stats_set_histogram,
        .stats_get_sset_count = mv88e6320_stats_get_sset_count,
@@ -3438,9 +3327,11 @@ static const struct mv88e6xxx_ops mv88e6190x_ops = {
        .port_set_speed = mv88e6390x_port_set_speed,
        .port_tag_remap = mv88e6390_port_tag_remap,
        .port_set_frame_mode = mv88e6351_port_set_frame_mode,
-       .port_set_egress_unknowns = mv88e6351_port_set_egress_unknowns,
+       .port_set_egress_floods = mv88e6352_port_set_egress_floods,
        .port_set_ether_type = mv88e6351_port_set_ether_type,
        .port_pause_config = mv88e6390_port_pause_config,
+       .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit,
+       .port_disable_pri_override = mv88e6xxx_port_disable_pri_override,
        .stats_snapshot = mv88e6390_g1_stats_snapshot,
        .stats_set_histogram = mv88e6390_g1_stats_set_histogram,
        .stats_get_sset_count = mv88e6320_stats_get_sset_count,
@@ -3466,9 +3357,11 @@ static const struct mv88e6xxx_ops mv88e6191_ops = {
        .port_set_speed = mv88e6390_port_set_speed,
        .port_tag_remap = mv88e6390_port_tag_remap,
        .port_set_frame_mode = mv88e6351_port_set_frame_mode,
-       .port_set_egress_unknowns = mv88e6351_port_set_egress_unknowns,
+       .port_set_egress_floods = mv88e6352_port_set_egress_floods,
        .port_set_ether_type = mv88e6351_port_set_ether_type,
        .port_pause_config = mv88e6390_port_pause_config,
+       .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit,
+       .port_disable_pri_override = mv88e6xxx_port_disable_pri_override,
        .stats_snapshot = mv88e6390_g1_stats_snapshot,
        .stats_set_histogram = mv88e6390_g1_stats_set_histogram,
        .stats_get_sset_count = mv88e6320_stats_get_sset_count,
@@ -3494,11 +3387,13 @@ static const struct mv88e6xxx_ops mv88e6240_ops = {
        .port_set_speed = mv88e6352_port_set_speed,
        .port_tag_remap = mv88e6095_port_tag_remap,
        .port_set_frame_mode = mv88e6351_port_set_frame_mode,
-       .port_set_egress_unknowns = mv88e6351_port_set_egress_unknowns,
+       .port_set_egress_floods = mv88e6352_port_set_egress_floods,
        .port_set_ether_type = mv88e6351_port_set_ether_type,
        .port_jumbo_config = mv88e6165_port_jumbo_config,
        .port_egress_rate_limiting = mv88e6097_port_egress_rate_limiting,
        .port_pause_config = mv88e6097_port_pause_config,
+       .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit,
+       .port_disable_pri_override = mv88e6xxx_port_disable_pri_override,
        .stats_snapshot = mv88e6320_g1_stats_snapshot,
        .stats_get_sset_count = mv88e6095_stats_get_sset_count,
        .stats_get_strings = mv88e6095_stats_get_strings,
@@ -3523,10 +3418,12 @@ static const struct mv88e6xxx_ops mv88e6290_ops = {
        .port_set_speed = mv88e6390_port_set_speed,
        .port_tag_remap = mv88e6390_port_tag_remap,
        .port_set_frame_mode = mv88e6351_port_set_frame_mode,
-       .port_set_egress_unknowns = mv88e6351_port_set_egress_unknowns,
+       .port_set_egress_floods = mv88e6352_port_set_egress_floods,
        .port_set_ether_type = mv88e6351_port_set_ether_type,
        .port_pause_config = mv88e6390_port_pause_config,
        .port_set_cmode = mv88e6390x_port_set_cmode,
+       .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit,
+       .port_disable_pri_override = mv88e6xxx_port_disable_pri_override,
        .stats_snapshot = mv88e6390_g1_stats_snapshot,
        .stats_set_histogram = mv88e6390_g1_stats_set_histogram,
        .stats_get_sset_count = mv88e6320_stats_get_sset_count,
@@ -3551,11 +3448,13 @@ static const struct mv88e6xxx_ops mv88e6320_ops = {
        .port_set_speed = mv88e6185_port_set_speed,
        .port_tag_remap = mv88e6095_port_tag_remap,
        .port_set_frame_mode = mv88e6351_port_set_frame_mode,
-       .port_set_egress_unknowns = mv88e6351_port_set_egress_unknowns,
+       .port_set_egress_floods = mv88e6352_port_set_egress_floods,
        .port_set_ether_type = mv88e6351_port_set_ether_type,
        .port_jumbo_config = mv88e6165_port_jumbo_config,
        .port_egress_rate_limiting = mv88e6097_port_egress_rate_limiting,
        .port_pause_config = mv88e6097_port_pause_config,
+       .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit,
+       .port_disable_pri_override = mv88e6xxx_port_disable_pri_override,
        .stats_snapshot = mv88e6320_g1_stats_snapshot,
        .stats_get_sset_count = mv88e6320_stats_get_sset_count,
        .stats_get_strings = mv88e6320_stats_get_strings,
@@ -3578,11 +3477,13 @@ static const struct mv88e6xxx_ops mv88e6321_ops = {
        .port_set_speed = mv88e6185_port_set_speed,
        .port_tag_remap = mv88e6095_port_tag_remap,
        .port_set_frame_mode = mv88e6351_port_set_frame_mode,
-       .port_set_egress_unknowns = mv88e6351_port_set_egress_unknowns,
+       .port_set_egress_floods = mv88e6352_port_set_egress_floods,
        .port_set_ether_type = mv88e6351_port_set_ether_type,
        .port_jumbo_config = mv88e6165_port_jumbo_config,
        .port_egress_rate_limiting = mv88e6097_port_egress_rate_limiting,
        .port_pause_config = mv88e6097_port_pause_config,
+       .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit,
+       .port_disable_pri_override = mv88e6xxx_port_disable_pri_override,
        .stats_snapshot = mv88e6320_g1_stats_snapshot,
        .stats_get_sset_count = mv88e6320_stats_get_sset_count,
        .stats_get_strings = mv88e6320_stats_get_strings,
@@ -3592,6 +3493,37 @@ static const struct mv88e6xxx_ops mv88e6321_ops = {
        .reset = mv88e6352_g1_reset,
 };
 
+static const struct mv88e6xxx_ops mv88e6341_ops = {
+       /* MV88E6XXX_FAMILY_6341 */
+       .get_eeprom = mv88e6xxx_g2_get_eeprom8,
+       .set_eeprom = mv88e6xxx_g2_set_eeprom8,
+       .set_switch_mac = mv88e6xxx_g2_set_switch_mac,
+       .phy_read = mv88e6xxx_g2_smi_phy_read,
+       .phy_write = mv88e6xxx_g2_smi_phy_write,
+       .port_set_link = mv88e6xxx_port_set_link,
+       .port_set_duplex = mv88e6xxx_port_set_duplex,
+       .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay,
+       .port_set_speed = mv88e6390_port_set_speed,
+       .port_tag_remap = mv88e6095_port_tag_remap,
+       .port_set_frame_mode = mv88e6351_port_set_frame_mode,
+       .port_set_egress_floods = mv88e6352_port_set_egress_floods,
+       .port_set_ether_type = mv88e6351_port_set_ether_type,
+       .port_jumbo_config = mv88e6165_port_jumbo_config,
+       .port_egress_rate_limiting = mv88e6097_port_egress_rate_limiting,
+       .port_pause_config = mv88e6097_port_pause_config,
+       .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit,
+       .port_disable_pri_override = mv88e6xxx_port_disable_pri_override,
+       .stats_snapshot = mv88e6390_g1_stats_snapshot,
+       .stats_get_sset_count = mv88e6320_stats_get_sset_count,
+       .stats_get_strings = mv88e6320_stats_get_strings,
+       .stats_get_stats = mv88e6390_stats_get_stats,
+       .g1_set_cpu_port = mv88e6390_g1_set_cpu_port,
+       .g1_set_egress_port = mv88e6390_g1_set_egress_port,
+       .watchdog_ops = &mv88e6390_watchdog_ops,
+       .mgmt_rsvd2cpu =  mv88e6390_g1_mgmt_rsvd2cpu,
+       .reset = mv88e6352_g1_reset,
+};
+
 static const struct mv88e6xxx_ops mv88e6350_ops = {
        /* MV88E6XXX_FAMILY_6351 */
        .set_switch_mac = mv88e6xxx_g2_set_switch_mac,
@@ -3603,11 +3535,13 @@ static const struct mv88e6xxx_ops mv88e6350_ops = {
        .port_set_speed = mv88e6185_port_set_speed,
        .port_tag_remap = mv88e6095_port_tag_remap,
        .port_set_frame_mode = mv88e6351_port_set_frame_mode,
-       .port_set_egress_unknowns = mv88e6351_port_set_egress_unknowns,
+       .port_set_egress_floods = mv88e6352_port_set_egress_floods,
        .port_set_ether_type = mv88e6351_port_set_ether_type,
        .port_jumbo_config = mv88e6165_port_jumbo_config,
        .port_egress_rate_limiting = mv88e6097_port_egress_rate_limiting,
        .port_pause_config = mv88e6097_port_pause_config,
+       .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit,
+       .port_disable_pri_override = mv88e6xxx_port_disable_pri_override,
        .stats_snapshot = mv88e6320_g1_stats_snapshot,
        .stats_get_sset_count = mv88e6095_stats_get_sset_count,
        .stats_get_strings = mv88e6095_stats_get_strings,
@@ -3630,11 +3564,13 @@ static const struct mv88e6xxx_ops mv88e6351_ops = {
        .port_set_speed = mv88e6185_port_set_speed,
        .port_tag_remap = mv88e6095_port_tag_remap,
        .port_set_frame_mode = mv88e6351_port_set_frame_mode,
-       .port_set_egress_unknowns = mv88e6351_port_set_egress_unknowns,
+       .port_set_egress_floods = mv88e6352_port_set_egress_floods,
        .port_set_ether_type = mv88e6351_port_set_ether_type,
        .port_jumbo_config = mv88e6165_port_jumbo_config,
        .port_egress_rate_limiting = mv88e6097_port_egress_rate_limiting,
        .port_pause_config = mv88e6097_port_pause_config,
+       .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit,
+       .port_disable_pri_override = mv88e6xxx_port_disable_pri_override,
        .stats_snapshot = mv88e6320_g1_stats_snapshot,
        .stats_get_sset_count = mv88e6095_stats_get_sset_count,
        .stats_get_strings = mv88e6095_stats_get_strings,
@@ -3659,11 +3595,13 @@ static const struct mv88e6xxx_ops mv88e6352_ops = {
        .port_set_speed = mv88e6352_port_set_speed,
        .port_tag_remap = mv88e6095_port_tag_remap,
        .port_set_frame_mode = mv88e6351_port_set_frame_mode,
-       .port_set_egress_unknowns = mv88e6351_port_set_egress_unknowns,
+       .port_set_egress_floods = mv88e6352_port_set_egress_floods,
        .port_set_ether_type = mv88e6351_port_set_ether_type,
        .port_jumbo_config = mv88e6165_port_jumbo_config,
        .port_egress_rate_limiting = mv88e6097_port_egress_rate_limiting,
        .port_pause_config = mv88e6097_port_pause_config,
+       .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit,
+       .port_disable_pri_override = mv88e6xxx_port_disable_pri_override,
        .stats_snapshot = mv88e6320_g1_stats_snapshot,
        .stats_get_sset_count = mv88e6095_stats_get_sset_count,
        .stats_get_strings = mv88e6095_stats_get_strings,
@@ -3675,64 +3613,6 @@ static const struct mv88e6xxx_ops mv88e6352_ops = {
        .reset = mv88e6352_g1_reset,
 };
 
-static const struct mv88e6xxx_ops mv88e6141_ops = {
-       /* MV88E6XXX_FAMILY_6341 */
-       .get_eeprom = mv88e6xxx_g2_get_eeprom8,
-       .set_eeprom = mv88e6xxx_g2_set_eeprom8,
-       .set_switch_mac = mv88e6xxx_g2_set_switch_mac,
-       .phy_read = mv88e6xxx_g2_smi_phy_read,
-       .phy_write = mv88e6xxx_g2_smi_phy_write,
-       .port_set_link = mv88e6xxx_port_set_link,
-       .port_set_duplex = mv88e6xxx_port_set_duplex,
-       .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay,
-       .port_set_speed = mv88e6390_port_set_speed,
-       .port_tag_remap = mv88e6095_port_tag_remap,
-       .port_set_frame_mode = mv88e6351_port_set_frame_mode,
-       .port_set_egress_unknowns = mv88e6351_port_set_egress_unknowns,
-       .port_set_ether_type = mv88e6351_port_set_ether_type,
-       .port_jumbo_config = mv88e6165_port_jumbo_config,
-       .port_egress_rate_limiting = mv88e6097_port_egress_rate_limiting,
-       .port_pause_config = mv88e6097_port_pause_config,
-       .stats_snapshot = mv88e6390_g1_stats_snapshot,
-       .stats_get_sset_count = mv88e6320_stats_get_sset_count,
-       .stats_get_strings = mv88e6320_stats_get_strings,
-       .stats_get_stats = mv88e6390_stats_get_stats,
-       .g1_set_cpu_port = mv88e6390_g1_set_cpu_port,
-       .g1_set_egress_port = mv88e6390_g1_set_egress_port,
-       .watchdog_ops = &mv88e6390_watchdog_ops,
-       .mgmt_rsvd2cpu =  mv88e6390_g1_mgmt_rsvd2cpu,
-       .reset = mv88e6352_g1_reset,
-};
-
-static const struct mv88e6xxx_ops mv88e6341_ops = {
-       /* MV88E6XXX_FAMILY_6341 */
-       .get_eeprom = mv88e6xxx_g2_get_eeprom8,
-       .set_eeprom = mv88e6xxx_g2_set_eeprom8,
-       .set_switch_mac = mv88e6xxx_g2_set_switch_mac,
-       .phy_read = mv88e6xxx_g2_smi_phy_read,
-       .phy_write = mv88e6xxx_g2_smi_phy_write,
-       .port_set_link = mv88e6xxx_port_set_link,
-       .port_set_duplex = mv88e6xxx_port_set_duplex,
-       .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay,
-       .port_set_speed = mv88e6390_port_set_speed,
-       .port_tag_remap = mv88e6095_port_tag_remap,
-       .port_set_frame_mode = mv88e6351_port_set_frame_mode,
-       .port_set_egress_unknowns = mv88e6351_port_set_egress_unknowns,
-       .port_set_ether_type = mv88e6351_port_set_ether_type,
-       .port_jumbo_config = mv88e6165_port_jumbo_config,
-       .port_egress_rate_limiting = mv88e6097_port_egress_rate_limiting,
-       .port_pause_config = mv88e6097_port_pause_config,
-       .stats_snapshot = mv88e6390_g1_stats_snapshot,
-       .stats_get_sset_count = mv88e6320_stats_get_sset_count,
-       .stats_get_strings = mv88e6320_stats_get_strings,
-       .stats_get_stats = mv88e6390_stats_get_stats,
-       .g1_set_cpu_port = mv88e6390_g1_set_cpu_port,
-       .g1_set_egress_port = mv88e6390_g1_set_egress_port,
-       .watchdog_ops = &mv88e6390_watchdog_ops,
-       .mgmt_rsvd2cpu =  mv88e6390_g1_mgmt_rsvd2cpu,
-       .reset = mv88e6352_g1_reset,
-};
-
 static const struct mv88e6xxx_ops mv88e6390_ops = {
        /* MV88E6XXX_FAMILY_6390 */
        .get_eeprom = mv88e6xxx_g2_get_eeprom8,
@@ -3746,12 +3626,14 @@ static const struct mv88e6xxx_ops mv88e6390_ops = {
        .port_set_speed = mv88e6390_port_set_speed,
        .port_tag_remap = mv88e6390_port_tag_remap,
        .port_set_frame_mode = mv88e6351_port_set_frame_mode,
-       .port_set_egress_unknowns = mv88e6351_port_set_egress_unknowns,
+       .port_set_egress_floods = mv88e6352_port_set_egress_floods,
        .port_set_ether_type = mv88e6351_port_set_ether_type,
        .port_jumbo_config = mv88e6165_port_jumbo_config,
        .port_egress_rate_limiting = mv88e6097_port_egress_rate_limiting,
        .port_pause_config = mv88e6390_port_pause_config,
        .port_set_cmode = mv88e6390x_port_set_cmode,
+       .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit,
+       .port_disable_pri_override = mv88e6xxx_port_disable_pri_override,
        .stats_snapshot = mv88e6390_g1_stats_snapshot,
        .stats_set_histogram = mv88e6390_g1_stats_set_histogram,
        .stats_get_sset_count = mv88e6320_stats_get_sset_count,
@@ -3777,11 +3659,13 @@ static const struct mv88e6xxx_ops mv88e6390x_ops = {
        .port_set_speed = mv88e6390x_port_set_speed,
        .port_tag_remap = mv88e6390_port_tag_remap,
        .port_set_frame_mode = mv88e6351_port_set_frame_mode,
-       .port_set_egress_unknowns = mv88e6351_port_set_egress_unknowns,
+       .port_set_egress_floods = mv88e6352_port_set_egress_floods,
        .port_set_ether_type = mv88e6351_port_set_ether_type,
        .port_jumbo_config = mv88e6165_port_jumbo_config,
        .port_egress_rate_limiting = mv88e6097_port_egress_rate_limiting,
        .port_pause_config = mv88e6390_port_pause_config,
+       .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit,
+       .port_disable_pri_override = mv88e6xxx_port_disable_pri_override,
        .stats_snapshot = mv88e6390_g1_stats_snapshot,
        .stats_set_histogram = mv88e6390_g1_stats_set_histogram,
        .stats_get_sset_count = mv88e6320_stats_get_sset_count,
@@ -3794,50 +3678,6 @@ static const struct mv88e6xxx_ops mv88e6390x_ops = {
        .reset = mv88e6352_g1_reset,
 };
 
-static const struct mv88e6xxx_ops mv88e6391_ops = {
-       /* MV88E6XXX_FAMILY_6390 */
-       .get_eeprom = mv88e6xxx_g2_get_eeprom8,
-       .set_eeprom = mv88e6xxx_g2_set_eeprom8,
-       .set_switch_mac = mv88e6xxx_g2_set_switch_mac,
-       .phy_read = mv88e6xxx_g2_smi_phy_read,
-       .phy_write = mv88e6xxx_g2_smi_phy_write,
-       .port_set_link = mv88e6xxx_port_set_link,
-       .port_set_duplex = mv88e6xxx_port_set_duplex,
-       .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay,
-       .port_set_speed = mv88e6390_port_set_speed,
-       .port_tag_remap = mv88e6390_port_tag_remap,
-       .port_set_frame_mode = mv88e6351_port_set_frame_mode,
-       .port_set_egress_unknowns = mv88e6351_port_set_egress_unknowns,
-       .port_set_ether_type = mv88e6351_port_set_ether_type,
-       .port_pause_config = mv88e6390_port_pause_config,
-       .stats_snapshot = mv88e6390_g1_stats_snapshot,
-       .stats_set_histogram = mv88e6390_g1_stats_set_histogram,
-       .stats_get_sset_count = mv88e6320_stats_get_sset_count,
-       .stats_get_strings = mv88e6320_stats_get_strings,
-       .stats_get_stats = mv88e6390_stats_get_stats,
-       .g1_set_cpu_port = mv88e6390_g1_set_cpu_port,
-       .g1_set_egress_port = mv88e6390_g1_set_egress_port,
-       .watchdog_ops = &mv88e6390_watchdog_ops,
-       .mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu,
-       .reset = mv88e6352_g1_reset,
-};
-
-static int mv88e6xxx_verify_madatory_ops(struct mv88e6xxx_chip *chip,
-                                        const struct mv88e6xxx_ops *ops)
-{
-       if (!ops->port_set_frame_mode) {
-               dev_err(chip->dev, "Missing port_set_frame_mode");
-               return -EINVAL;
-       }
-
-       if (!ops->port_set_egress_unknowns) {
-               dev_err(chip->dev, "Missing port_set_egress_mode");
-               return -EINVAL;
-       }
-
-       return 0;
-}
-
 static const struct mv88e6xxx_info mv88e6xxx_table[] = {
        [MV88E6085] = {
                .prod_num = PORT_SWITCH_ID_PROD_NUM_6085,
@@ -3849,6 +3689,8 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .global1_addr = 0x1b,
                .age_time_coeff = 15000,
                .g1_irqs = 8,
+               .atu_move_port_mask = 0xf,
+               .pvt = true,
                .tag_protocol = DSA_TAG_PROTO_DSA,
                .flags = MV88E6XXX_FLAGS_FAMILY_6097,
                .ops = &mv88e6085_ops,
@@ -3864,6 +3706,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .global1_addr = 0x1b,
                .age_time_coeff = 15000,
                .g1_irqs = 8,
+               .atu_move_port_mask = 0xf,
                .tag_protocol = DSA_TAG_PROTO_DSA,
                .flags = MV88E6XXX_FLAGS_FAMILY_6095,
                .ops = &mv88e6095_ops,
@@ -3879,6 +3722,8 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .global1_addr = 0x1b,
                .age_time_coeff = 15000,
                .g1_irqs = 8,
+               .atu_move_port_mask = 0xf,
+               .pvt = true,
                .tag_protocol = DSA_TAG_PROTO_EDSA,
                .flags = MV88E6XXX_FLAGS_FAMILY_6097,
                .ops = &mv88e6097_ops,
@@ -3894,6 +3739,8 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .global1_addr = 0x1b,
                .age_time_coeff = 15000,
                .g1_irqs = 9,
+               .atu_move_port_mask = 0xf,
+               .pvt = true,
                .tag_protocol = DSA_TAG_PROTO_DSA,
                .flags = MV88E6XXX_FLAGS_FAMILY_6165,
                .ops = &mv88e6123_ops,
@@ -3909,11 +3756,28 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .global1_addr = 0x1b,
                .age_time_coeff = 15000,
                .g1_irqs = 9,
+               .atu_move_port_mask = 0xf,
                .tag_protocol = DSA_TAG_PROTO_DSA,
                .flags = MV88E6XXX_FLAGS_FAMILY_6185,
                .ops = &mv88e6131_ops,
        },
 
+       [MV88E6141] = {
+               .prod_num = PORT_SWITCH_ID_PROD_NUM_6141,
+               .family = MV88E6XXX_FAMILY_6341,
+               .name = "Marvell 88E6341",
+               .num_databases = 4096,
+               .num_ports = 6,
+               .port_base_addr = 0x10,
+               .global1_addr = 0x1b,
+               .age_time_coeff = 3750,
+               .atu_move_port_mask = 0x1f,
+               .pvt = true,
+               .tag_protocol = DSA_TAG_PROTO_EDSA,
+               .flags = MV88E6XXX_FLAGS_FAMILY_6341,
+               .ops = &mv88e6141_ops,
+       },
+
        [MV88E6161] = {
                .prod_num = PORT_SWITCH_ID_PROD_NUM_6161,
                .family = MV88E6XXX_FAMILY_6165,
@@ -3924,6 +3788,8 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .global1_addr = 0x1b,
                .age_time_coeff = 15000,
                .g1_irqs = 9,
+               .atu_move_port_mask = 0xf,
+               .pvt = true,
                .tag_protocol = DSA_TAG_PROTO_DSA,
                .flags = MV88E6XXX_FLAGS_FAMILY_6165,
                .ops = &mv88e6161_ops,
@@ -3939,6 +3805,8 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .global1_addr = 0x1b,
                .age_time_coeff = 15000,
                .g1_irqs = 9,
+               .atu_move_port_mask = 0xf,
+               .pvt = true,
                .tag_protocol = DSA_TAG_PROTO_DSA,
                .flags = MV88E6XXX_FLAGS_FAMILY_6165,
                .ops = &mv88e6165_ops,
@@ -3954,6 +3822,8 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .global1_addr = 0x1b,
                .age_time_coeff = 15000,
                .g1_irqs = 9,
+               .atu_move_port_mask = 0xf,
+               .pvt = true,
                .tag_protocol = DSA_TAG_PROTO_EDSA,
                .flags = MV88E6XXX_FLAGS_FAMILY_6351,
                .ops = &mv88e6171_ops,
@@ -3969,6 +3839,8 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .global1_addr = 0x1b,
                .age_time_coeff = 15000,
                .g1_irqs = 9,
+               .atu_move_port_mask = 0xf,
+               .pvt = true,
                .tag_protocol = DSA_TAG_PROTO_EDSA,
                .flags = MV88E6XXX_FLAGS_FAMILY_6352,
                .ops = &mv88e6172_ops,
@@ -3984,6 +3856,8 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .global1_addr = 0x1b,
                .age_time_coeff = 15000,
                .g1_irqs = 9,
+               .atu_move_port_mask = 0xf,
+               .pvt = true,
                .tag_protocol = DSA_TAG_PROTO_EDSA,
                .flags = MV88E6XXX_FLAGS_FAMILY_6351,
                .ops = &mv88e6175_ops,
@@ -3999,6 +3873,8 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .global1_addr = 0x1b,
                .age_time_coeff = 15000,
                .g1_irqs = 9,
+               .atu_move_port_mask = 0xf,
+               .pvt = true,
                .tag_protocol = DSA_TAG_PROTO_EDSA,
                .flags = MV88E6XXX_FLAGS_FAMILY_6352,
                .ops = &mv88e6176_ops,
@@ -4014,6 +3890,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .global1_addr = 0x1b,
                .age_time_coeff = 15000,
                .g1_irqs = 8,
+               .atu_move_port_mask = 0xf,
                .tag_protocol = DSA_TAG_PROTO_EDSA,
                .flags = MV88E6XXX_FLAGS_FAMILY_6185,
                .ops = &mv88e6185_ops,
@@ -4030,6 +3907,8 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .tag_protocol = DSA_TAG_PROTO_DSA,
                .age_time_coeff = 3750,
                .g1_irqs = 9,
+               .pvt = true,
+               .atu_move_port_mask = 0x1f,
                .flags = MV88E6XXX_FLAGS_FAMILY_6390,
                .ops = &mv88e6190_ops,
        },
@@ -4044,6 +3923,8 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .global1_addr = 0x1b,
                .age_time_coeff = 3750,
                .g1_irqs = 9,
+               .atu_move_port_mask = 0x1f,
+               .pvt = true,
                .tag_protocol = DSA_TAG_PROTO_DSA,
                .flags = MV88E6XXX_FLAGS_FAMILY_6390,
                .ops = &mv88e6190x_ops,
@@ -4059,9 +3940,11 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .global1_addr = 0x1b,
                .age_time_coeff = 3750,
                .g1_irqs = 9,
+               .atu_move_port_mask = 0x1f,
+               .pvt = true,
                .tag_protocol = DSA_TAG_PROTO_DSA,
                .flags = MV88E6XXX_FLAGS_FAMILY_6390,
-               .ops = &mv88e6391_ops,
+               .ops = &mv88e6191_ops,
        },
 
        [MV88E6240] = {
@@ -4074,6 +3957,8 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .global1_addr = 0x1b,
                .age_time_coeff = 15000,
                .g1_irqs = 9,
+               .atu_move_port_mask = 0xf,
+               .pvt = true,
                .tag_protocol = DSA_TAG_PROTO_EDSA,
                .flags = MV88E6XXX_FLAGS_FAMILY_6352,
                .ops = &mv88e6240_ops,
@@ -4089,6 +3974,8 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .global1_addr = 0x1b,
                .age_time_coeff = 3750,
                .g1_irqs = 9,
+               .atu_move_port_mask = 0x1f,
+               .pvt = true,
                .tag_protocol = DSA_TAG_PROTO_DSA,
                .flags = MV88E6XXX_FLAGS_FAMILY_6390,
                .ops = &mv88e6290_ops,
@@ -4104,6 +3991,8 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .global1_addr = 0x1b,
                .age_time_coeff = 15000,
                .g1_irqs = 8,
+               .atu_move_port_mask = 0xf,
+               .pvt = true,
                .tag_protocol = DSA_TAG_PROTO_EDSA,
                .flags = MV88E6XXX_FLAGS_FAMILY_6320,
                .ops = &mv88e6320_ops,
@@ -4119,25 +4008,12 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .global1_addr = 0x1b,
                .age_time_coeff = 15000,
                .g1_irqs = 8,
+               .atu_move_port_mask = 0xf,
                .tag_protocol = DSA_TAG_PROTO_EDSA,
                .flags = MV88E6XXX_FLAGS_FAMILY_6320,
                .ops = &mv88e6321_ops,
        },
 
-       [MV88E6141] = {
-               .prod_num = PORT_SWITCH_ID_PROD_NUM_6141,
-               .family = MV88E6XXX_FAMILY_6341,
-               .name = "Marvell 88E6341",
-               .num_databases = 4096,
-               .num_ports = 6,
-               .port_base_addr = 0x10,
-               .global1_addr = 0x1b,
-               .age_time_coeff = 3750,
-               .tag_protocol = DSA_TAG_PROTO_EDSA,
-               .flags = MV88E6XXX_FLAGS_FAMILY_6341,
-               .ops = &mv88e6141_ops,
-       },
-
        [MV88E6341] = {
                .prod_num = PORT_SWITCH_ID_PROD_NUM_6341,
                .family = MV88E6XXX_FAMILY_6341,
@@ -4147,6 +4023,8 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .port_base_addr = 0x10,
                .global1_addr = 0x1b,
                .age_time_coeff = 3750,
+               .atu_move_port_mask = 0x1f,
+               .pvt = true,
                .tag_protocol = DSA_TAG_PROTO_EDSA,
                .flags = MV88E6XXX_FLAGS_FAMILY_6341,
                .ops = &mv88e6341_ops,
@@ -4162,6 +4040,8 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .global1_addr = 0x1b,
                .age_time_coeff = 15000,
                .g1_irqs = 9,
+               .atu_move_port_mask = 0xf,
+               .pvt = true,
                .tag_protocol = DSA_TAG_PROTO_EDSA,
                .flags = MV88E6XXX_FLAGS_FAMILY_6351,
                .ops = &mv88e6350_ops,
@@ -4177,6 +4057,8 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .global1_addr = 0x1b,
                .age_time_coeff = 15000,
                .g1_irqs = 9,
+               .atu_move_port_mask = 0xf,
+               .pvt = true,
                .tag_protocol = DSA_TAG_PROTO_EDSA,
                .flags = MV88E6XXX_FLAGS_FAMILY_6351,
                .ops = &mv88e6351_ops,
@@ -4192,6 +4074,8 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .global1_addr = 0x1b,
                .age_time_coeff = 15000,
                .g1_irqs = 9,
+               .atu_move_port_mask = 0xf,
+               .pvt = true,
                .tag_protocol = DSA_TAG_PROTO_EDSA,
                .flags = MV88E6XXX_FLAGS_FAMILY_6352,
                .ops = &mv88e6352_ops,
@@ -4206,6 +4090,8 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .global1_addr = 0x1b,
                .age_time_coeff = 3750,
                .g1_irqs = 9,
+               .atu_move_port_mask = 0x1f,
+               .pvt = true,
                .tag_protocol = DSA_TAG_PROTO_DSA,
                .flags = MV88E6XXX_FLAGS_FAMILY_6390,
                .ops = &mv88e6390_ops,
@@ -4220,6 +4106,8 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .global1_addr = 0x1b,
                .age_time_coeff = 3750,
                .g1_irqs = 9,
+               .atu_move_port_mask = 0x1f,
+               .pvt = true,
                .tag_protocol = DSA_TAG_PROTO_DSA,
                .flags = MV88E6XXX_FLAGS_FAMILY_6390,
                .ops = &mv88e6390x_ops,
@@ -4455,6 +4343,8 @@ static const struct dsa_switch_ops mv88e6xxx_switch_ops = {
        .port_mdb_add           = mv88e6xxx_port_mdb_add,
        .port_mdb_del           = mv88e6xxx_port_mdb_del,
        .port_mdb_dump          = mv88e6xxx_port_mdb_dump,
+       .crosschip_bridge_join  = mv88e6xxx_crosschip_bridge_join,
+       .crosschip_bridge_leave = mv88e6xxx_crosschip_bridge_leave,
 };
 
 static struct dsa_switch_driver mv88e6xxx_switch_drv = {
@@ -4466,12 +4356,14 @@ static int mv88e6xxx_register_switch(struct mv88e6xxx_chip *chip)
        struct device *dev = chip->dev;
        struct dsa_switch *ds;
 
-       ds = dsa_switch_alloc(dev, DSA_MAX_PORTS);
+       ds = dsa_switch_alloc(dev, mv88e6xxx_num_ports(chip));
        if (!ds)
                return -ENOMEM;
 
        ds->priv = chip;
        ds->ops = &mv88e6xxx_switch_ops;
+       ds->ageing_time_min = chip->info->age_time_coeff;
+       ds->ageing_time_max = chip->info->age_time_coeff * U8_MAX;
 
        dev_set_drvdata(dev, ds);
 
@@ -4502,10 +4394,6 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev)
 
        chip->info = compat_info;
 
-       err = mv88e6xxx_verify_madatory_ops(chip, chip->info->ops);
-       if (err)
-               return err;
-
        err = mv88e6xxx_smi_init(chip, mdiodev->bus, mdiodev->addr);
        if (err)
                return err;
index 75af86a7fad80feb606fa6bab4cf78720d228eb0..39825837a1c9c062accd80af5a8dc3d6ccd4b771 100644 (file)
@@ -3,7 +3,8 @@
  *
  * Copyright (c) 2008 Marvell Semiconductor
  *
- * Copyright (c) 2016 Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+ * Copyright (c) 2016-2017 Savoir-faire Linux Inc.
+ *     Vivien Didelot <vivien.didelot@savoirfairelinux.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
index 1aec7382c02dff90864995d9910ab6f94676a792..e30cbe480d5b71efe212cb332833aaf759eb0a5a 100644 (file)
@@ -3,7 +3,8 @@
  *
  * Copyright (c) 2008 Marvell Semiconductor
  *
- * Copyright (c) 2016 Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+ * Copyright (c) 2016-2017 Savoir-faire Linux Inc.
+ *     Vivien Didelot <vivien.didelot@savoirfairelinux.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -38,4 +39,15 @@ int mv88e6095_g1_set_cpu_port(struct mv88e6xxx_chip *chip, int port);
 int mv88e6390_g1_set_cpu_port(struct mv88e6xxx_chip *chip, int port);
 int mv88e6390_g1_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip);
 
+int mv88e6xxx_g1_atu_set_learn2all(struct mv88e6xxx_chip *chip, bool learn2all);
+int mv88e6xxx_g1_atu_set_age_time(struct mv88e6xxx_chip *chip,
+                                 unsigned int msecs);
+int mv88e6xxx_g1_atu_getnext(struct mv88e6xxx_chip *chip, u16 fid,
+                            struct mv88e6xxx_atu_entry *entry);
+int mv88e6xxx_g1_atu_loadpurge(struct mv88e6xxx_chip *chip, u16 fid,
+                              struct mv88e6xxx_atu_entry *entry);
+int mv88e6xxx_g1_atu_flush(struct mv88e6xxx_chip *chip, u16 fid, bool all);
+int mv88e6xxx_g1_atu_remove(struct mv88e6xxx_chip *chip, u16 fid, int port,
+                           bool all);
+
 #endif /* _MV88E6XXX_GLOBAL1_H */
diff --git a/drivers/net/dsa/mv88e6xxx/global1_atu.c b/drivers/net/dsa/mv88e6xxx/global1_atu.c
new file mode 100644 (file)
index 0000000..fa7e7db
--- /dev/null
@@ -0,0 +1,305 @@
+/*
+ * Marvell 88E6xxx Address Translation Unit (ATU) support
+ *
+ * Copyright (c) 2008 Marvell Semiconductor
+ * Copyright (c) 2017 Savoir-faire Linux, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include "mv88e6xxx.h"
+#include "global1.h"
+
+/* Offset 0x01: ATU FID Register */
+
+static int mv88e6xxx_g1_atu_fid_write(struct mv88e6xxx_chip *chip, u16 fid)
+{
+       return mv88e6xxx_g1_write(chip, GLOBAL_ATU_FID, fid & 0xfff);
+}
+
+/* Offset 0x0A: ATU Control Register */
+
+int mv88e6xxx_g1_atu_set_learn2all(struct mv88e6xxx_chip *chip, bool learn2all)
+{
+       u16 val;
+       int err;
+
+       err = mv88e6xxx_g1_read(chip, GLOBAL_ATU_CONTROL, &val);
+       if (err)
+               return err;
+
+       if (learn2all)
+               val |= GLOBAL_ATU_CONTROL_LEARN2ALL;
+       else
+               val &= ~GLOBAL_ATU_CONTROL_LEARN2ALL;
+
+       return mv88e6xxx_g1_write(chip, GLOBAL_ATU_CONTROL, val);
+}
+
+int mv88e6xxx_g1_atu_set_age_time(struct mv88e6xxx_chip *chip,
+                                 unsigned int msecs)
+{
+       const unsigned int coeff = chip->info->age_time_coeff;
+       const unsigned int min = 0x01 * coeff;
+       const unsigned int max = 0xff * coeff;
+       u8 age_time;
+       u16 val;
+       int err;
+
+       if (msecs < min || msecs > max)
+               return -ERANGE;
+
+       /* Round to nearest multiple of coeff */
+       age_time = (msecs + coeff / 2) / coeff;
+
+       err = mv88e6xxx_g1_read(chip, GLOBAL_ATU_CONTROL, &val);
+       if (err)
+               return err;
+
+       /* AgeTime is 11:4 bits */
+       val &= ~0xff0;
+       val |= age_time << 4;
+
+       err = mv88e6xxx_g1_write(chip, GLOBAL_ATU_CONTROL, val);
+       if (err)
+               return err;
+
+       dev_dbg(chip->dev, "AgeTime set to 0x%02x (%d ms)\n", age_time,
+               age_time * coeff);
+
+       return 0;
+}
+
+/* Offset 0x0B: ATU Operation Register */
+
+static int mv88e6xxx_g1_atu_op_wait(struct mv88e6xxx_chip *chip)
+{
+       return mv88e6xxx_g1_wait(chip, GLOBAL_ATU_OP, GLOBAL_ATU_OP_BUSY);
+}
+
+static int mv88e6xxx_g1_atu_op(struct mv88e6xxx_chip *chip, u16 fid, u16 op)
+{
+       u16 val;
+       int err;
+
+       /* FID bits are dispatched all around gradually as more are supported */
+       if (mv88e6xxx_num_databases(chip) > 256) {
+               err = mv88e6xxx_g1_atu_fid_write(chip, fid);
+               if (err)
+                       return err;
+       } else {
+               if (mv88e6xxx_num_databases(chip) > 16) {
+                       /* ATU DBNum[7:4] are located in ATU Control 15:12 */
+                       err = mv88e6xxx_g1_read(chip, GLOBAL_ATU_CONTROL, &val);
+                       if (err)
+                               return err;
+
+                       val = (val & 0x0fff) | ((fid << 8) & 0xf000);
+                       err = mv88e6xxx_g1_write(chip, GLOBAL_ATU_CONTROL, val);
+                       if (err)
+                               return err;
+               }
+
+               /* ATU DBNum[3:0] are located in ATU Operation 3:0 */
+               op |= fid & 0xf;
+       }
+
+       err = mv88e6xxx_g1_write(chip, GLOBAL_ATU_OP, op);
+       if (err)
+               return err;
+
+       return mv88e6xxx_g1_atu_op_wait(chip);
+}
+
+/* Offset 0x0C: ATU Data Register */
+
+static int mv88e6xxx_g1_atu_data_read(struct mv88e6xxx_chip *chip,
+                                     struct mv88e6xxx_atu_entry *entry)
+{
+       u16 val;
+       int err;
+
+       err = mv88e6xxx_g1_read(chip, GLOBAL_ATU_DATA, &val);
+       if (err)
+               return err;
+
+       entry->state = val & 0xf;
+       if (entry->state != GLOBAL_ATU_DATA_STATE_UNUSED) {
+               entry->trunk = !!(val & GLOBAL_ATU_DATA_TRUNK);
+               entry->portvec = (val >> 4) & mv88e6xxx_port_mask(chip);
+       }
+
+       return 0;
+}
+
+static int mv88e6xxx_g1_atu_data_write(struct mv88e6xxx_chip *chip,
+                                      struct mv88e6xxx_atu_entry *entry)
+{
+       u16 data = entry->state & 0xf;
+
+       if (entry->state != GLOBAL_ATU_DATA_STATE_UNUSED) {
+               if (entry->trunk)
+                       data |= GLOBAL_ATU_DATA_TRUNK;
+
+               data |= (entry->portvec & mv88e6xxx_port_mask(chip)) << 4;
+       }
+
+       return mv88e6xxx_g1_write(chip, GLOBAL_ATU_DATA, data);
+}
+
+/* Offset 0x0D: ATU MAC Address Register Bytes 0 & 1
+ * Offset 0x0E: ATU MAC Address Register Bytes 2 & 3
+ * Offset 0x0F: ATU MAC Address Register Bytes 4 & 5
+ */
+
+static int mv88e6xxx_g1_atu_mac_read(struct mv88e6xxx_chip *chip,
+                                    struct mv88e6xxx_atu_entry *entry)
+{
+       u16 val;
+       int i, err;
+
+       for (i = 0; i < 3; i++) {
+               err = mv88e6xxx_g1_read(chip, GLOBAL_ATU_MAC_01 + i, &val);
+               if (err)
+                       return err;
+
+               entry->mac[i * 2] = val >> 8;
+               entry->mac[i * 2 + 1] = val & 0xff;
+       }
+
+       return 0;
+}
+
+static int mv88e6xxx_g1_atu_mac_write(struct mv88e6xxx_chip *chip,
+                                     struct mv88e6xxx_atu_entry *entry)
+{
+       u16 val;
+       int i, err;
+
+       for (i = 0; i < 3; i++) {
+               val = (entry->mac[i * 2] << 8) | entry->mac[i * 2 + 1];
+               err = mv88e6xxx_g1_write(chip, GLOBAL_ATU_MAC_01 + i, val);
+               if (err)
+                       return err;
+       }
+
+       return 0;
+}
+
+/* Address Translation Unit operations */
+
+int mv88e6xxx_g1_atu_getnext(struct mv88e6xxx_chip *chip, u16 fid,
+                            struct mv88e6xxx_atu_entry *entry)
+{
+       int err;
+
+       err = mv88e6xxx_g1_atu_op_wait(chip);
+       if (err)
+               return err;
+
+       /* Write the MAC address to iterate from only once */
+       if (entry->state == GLOBAL_ATU_DATA_STATE_UNUSED) {
+               err = mv88e6xxx_g1_atu_mac_write(chip, entry);
+               if (err)
+                       return err;
+       }
+
+       err = mv88e6xxx_g1_atu_op(chip, fid, GLOBAL_ATU_OP_GET_NEXT_DB);
+       if (err)
+               return err;
+
+       err = mv88e6xxx_g1_atu_data_read(chip, entry);
+       if (err)
+               return err;
+
+       return mv88e6xxx_g1_atu_mac_read(chip, entry);
+}
+
+int mv88e6xxx_g1_atu_loadpurge(struct mv88e6xxx_chip *chip, u16 fid,
+                              struct mv88e6xxx_atu_entry *entry)
+{
+       int err;
+
+       err = mv88e6xxx_g1_atu_op_wait(chip);
+       if (err)
+               return err;
+
+       err = mv88e6xxx_g1_atu_mac_write(chip, entry);
+       if (err)
+               return err;
+
+       err = mv88e6xxx_g1_atu_data_write(chip, entry);
+       if (err)
+               return err;
+
+       return mv88e6xxx_g1_atu_op(chip, fid, GLOBAL_ATU_OP_LOAD_DB);
+}
+
+static int mv88e6xxx_g1_atu_flushmove(struct mv88e6xxx_chip *chip, u16 fid,
+                                     struct mv88e6xxx_atu_entry *entry,
+                                     bool all)
+{
+       u16 op;
+       int err;
+
+       err = mv88e6xxx_g1_atu_op_wait(chip);
+       if (err)
+               return err;
+
+       err = mv88e6xxx_g1_atu_data_write(chip, entry);
+       if (err)
+               return err;
+
+       /* Flush/Move all or non-static entries from all or a given database */
+       if (all && fid)
+               op = GLOBAL_ATU_OP_FLUSH_MOVE_ALL_DB;
+       else if (fid)
+               op = GLOBAL_ATU_OP_FLUSH_MOVE_NON_STATIC_DB;
+       else if (all)
+               op = GLOBAL_ATU_OP_FLUSH_MOVE_ALL;
+       else
+               op = GLOBAL_ATU_OP_FLUSH_MOVE_NON_STATIC;
+
+       return mv88e6xxx_g1_atu_op(chip, fid, op);
+}
+
+int mv88e6xxx_g1_atu_flush(struct mv88e6xxx_chip *chip, u16 fid, bool all)
+{
+       struct mv88e6xxx_atu_entry entry = {
+               .state = 0, /* Null EntryState means Flush */
+       };
+
+       return mv88e6xxx_g1_atu_flushmove(chip, fid, &entry, all);
+}
+
+static int mv88e6xxx_g1_atu_move(struct mv88e6xxx_chip *chip, u16 fid,
+                                int from_port, int to_port, bool all)
+{
+       struct mv88e6xxx_atu_entry entry = { 0 };
+       unsigned long mask;
+       int shift;
+
+       if (!chip->info->atu_move_port_mask)
+               return -EOPNOTSUPP;
+
+       mask = chip->info->atu_move_port_mask;
+       shift = bitmap_weight(&mask, 16);
+
+       entry.state = 0xf, /* Full EntryState means Move */
+       entry.portvec = from_port & mask;
+       entry.portvec |= (to_port & mask) << shift;
+
+       return mv88e6xxx_g1_atu_flushmove(chip, fid, &entry, all);
+}
+
+int mv88e6xxx_g1_atu_remove(struct mv88e6xxx_chip *chip, u16 fid, int port,
+                           bool all)
+{
+       int from_port = port;
+       int to_port = chip->info->atu_move_port_mask;
+
+       return mv88e6xxx_g1_atu_move(chip, fid, from_port, to_port, all);
+}
index 8f15bc7b1f5f88d2e8150f78cdf79b9be28cb07c..7c6bc33a9516a65c9bce8a626fe631692928a5f8 100644 (file)
@@ -4,7 +4,8 @@
  *
  * Copyright (c) 2008 Marvell Semiconductor
  *
- * Copyright (c) 2016 Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+ * Copyright (c) 2016-2017 Savoir-faire Linux Inc.
+ *     Vivien Didelot <vivien.didelot@savoirfairelinux.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -12,6 +13,7 @@
  * (at your option) any later version.
  */
 
+#include <linux/interrupt.h>
 #include <linux/irqdomain.h>
 #include "mv88e6xxx.h"
 #include "global2.h"
@@ -170,6 +172,50 @@ static int mv88e6xxx_g2_clear_irl(struct mv88e6xxx_chip *chip)
        return err;
 }
 
+/* Offset 0x0B: Cross-chip Port VLAN (Addr) Register
+ * Offset 0x0C: Cross-chip Port VLAN Data Register
+ */
+
+static int mv88e6xxx_g2_pvt_op_wait(struct mv88e6xxx_chip *chip)
+{
+       return mv88e6xxx_g2_wait(chip, GLOBAL2_PVT_ADDR, GLOBAL2_PVT_ADDR_BUSY);
+}
+
+static int mv88e6xxx_g2_pvt_op(struct mv88e6xxx_chip *chip, int src_dev,
+                              int src_port, u16 op)
+{
+       int err;
+
+       /* 9-bit Cross-chip PVT pointer: with GLOBAL2_MISC_5_BIT_PORT cleared,
+        * source device is 5-bit, source port is 4-bit.
+        */
+       op |= (src_dev & 0x1f) << 4;
+       op |= (src_port & 0xf);
+
+       err = mv88e6xxx_g2_write(chip, GLOBAL2_PVT_ADDR, op);
+       if (err)
+               return err;
+
+       return mv88e6xxx_g2_pvt_op_wait(chip);
+}
+
+int mv88e6xxx_g2_pvt_write(struct mv88e6xxx_chip *chip, int src_dev,
+                          int src_port, u16 data)
+{
+       int err;
+
+       err = mv88e6xxx_g2_pvt_op_wait(chip);
+       if (err)
+               return err;
+
+       err = mv88e6xxx_g2_write(chip, GLOBAL2_PVT_DATA, data);
+       if (err)
+               return err;
+
+       return mv88e6xxx_g2_pvt_op(chip, src_dev, src_port,
+                                  GLOBAL2_PVT_ADDR_OP_WRITE_PVLAN);
+}
+
 /* Offset 0x0D: Switch MAC/WoL/WoF register */
 
 static int mv88e6xxx_g2_switch_mac_write(struct mv88e6xxx_chip *chip,
@@ -782,6 +828,31 @@ static int mv88e6xxx_g2_watchdog_setup(struct mv88e6xxx_chip *chip)
        return err;
 }
 
+/* Offset 0x1D: Misc Register */
+
+static int mv88e6xxx_g2_misc_5_bit_port(struct mv88e6xxx_chip *chip,
+                                       bool port_5_bit)
+{
+       u16 val;
+       int err;
+
+       err = mv88e6xxx_g2_read(chip, GLOBAL2_MISC, &val);
+       if (err)
+               return err;
+
+       if (port_5_bit)
+               val |= GLOBAL2_MISC_5_BIT_PORT;
+       else
+               val &= ~GLOBAL2_MISC_5_BIT_PORT;
+
+       return mv88e6xxx_g2_write(chip, GLOBAL2_MISC, val);
+}
+
+int mv88e6xxx_g2_misc_4_bit_port(struct mv88e6xxx_chip *chip)
+{
+       return mv88e6xxx_g2_misc_5_bit_port(chip, false);
+}
+
 static void mv88e6xxx_g2_irq_mask(struct irq_data *d)
 {
        struct mv88e6xxx_chip *chip = irq_data_get_irq_chip_data(d);
@@ -964,14 +1035,6 @@ int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip)
                                return err;
        }
 
-       if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_PVT)) {
-               /* Initialize Cross-chip Port VLAN Table to reset defaults */
-               err = mv88e6xxx_g2_write(chip, GLOBAL2_PVT_ADDR,
-                                        GLOBAL2_PVT_ADDR_OP_INIT_ONES);
-               if (err)
-                       return err;
-       }
-
        if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_POT)) {
                /* Clear the priority override table. */
                err = mv88e6xxx_g2_clear_pot(chip);
index a8b2f9486a4abad1227030f57ce00b6d8cbb1fbb..96046bb12ca17333530f237fddb46438c3298dea 100644 (file)
@@ -3,7 +3,8 @@
  *
  * Copyright (c) 2008 Marvell Semiconductor
  *
- * Copyright (c) 2016 Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+ * Copyright (c) 2016-2017 Savoir-faire Linux Inc.
+ *     Vivien Didelot <vivien.didelot@savoirfairelinux.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -41,6 +42,10 @@ int mv88e6xxx_g2_get_eeprom16(struct mv88e6xxx_chip *chip,
 int mv88e6xxx_g2_set_eeprom16(struct mv88e6xxx_chip *chip,
                              struct ethtool_eeprom *eeprom, u8 *data);
 
+int mv88e6xxx_g2_pvt_write(struct mv88e6xxx_chip *chip, int src_dev,
+                          int src_port, u16 data);
+int mv88e6xxx_g2_misc_4_bit_port(struct mv88e6xxx_chip *chip);
+
 int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip);
 int mv88e6xxx_g2_irq_setup(struct mv88e6xxx_chip *chip);
 void mv88e6xxx_g2_irq_free(struct mv88e6xxx_chip *chip);
@@ -109,6 +114,17 @@ static inline int mv88e6xxx_g2_set_eeprom16(struct mv88e6xxx_chip *chip,
        return -EOPNOTSUPP;
 }
 
+int mv88e6xxx_g2_pvt_write(struct mv88e6xxx_chip *chip, int src_dev,
+                          int src_port, u16 data)
+{
+       return -EOPNOTSUPP;
+}
+
+int mv88e6xxx_g2_misc_4_bit_port(struct mv88e6xxx_chip *chip)
+{
+       return -EOPNOTSUPP;
+}
+
 static inline int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip)
 {
        return -EOPNOTSUPP;
index 6033f2f6260a464418fcc981e5430e155e836bc3..c8f54986996b73893efbebff09e7bc40273c1959 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/irq.h>
 #include <linux/gpio/consumer.h>
 #include <linux/phy.h>
+#include <net/dsa.h>
 
 #ifndef UINT64_MAX
 #define UINT64_MAX             (u64)(~((u64)0))
 #define PORT_CONTROL_TAG_IF_BOTH       BIT(6)
 #define PORT_CONTROL_USE_IP            BIT(5)
 #define PORT_CONTROL_USE_TAG           BIT(4)
-#define PORT_CONTROL_FORWARD_UNKNOWN_MC        BIT(3)
 #define PORT_CONTROL_FORWARD_UNKNOWN   BIT(2)
-#define PORT_CONTROL_NOT_EGRESS_UNKNOWN_DA             (0x0 << 2)
-#define PORT_CONTROL_NOT_EGRESS_UNKNOWN_MULTICAST_DA   (0x1 << 2)
-#define PORT_CONTROL_NOT_EGRESS_UNKNOWN_UNITCAST_DA    (0x2 << 2)
-#define PORT_CONTROL_EGRESS_ALL_UNKNOWN_DA             (0x3 << 2)
+#define PORT_CONTROL_EGRESS_FLOODS_MASK                        (0x3 << 2)
+#define PORT_CONTROL_EGRESS_FLOODS_NO_UNKNOWN_DA       (0x0 << 2)
+#define PORT_CONTROL_EGRESS_FLOODS_NO_UNKNOWN_MC_DA    (0x1 << 2)
+#define PORT_CONTROL_EGRESS_FLOODS_NO_UNKNOWN_UC_DA    (0x2 << 2)
+#define PORT_CONTROL_EGRESS_FLOODS_ALL_UNKNOWN_DA      (0x3 << 2)
 #define PORT_CONTROL_STATE_MASK                0x03
 #define PORT_CONTROL_STATE_DISABLED    0x00
 #define PORT_CONTROL_STATE_BLOCKING    0x01
 #define PORT_CONTROL_STATE_LEARNING    0x02
 #define PORT_CONTROL_STATE_FORWARDING  0x03
 #define PORT_CONTROL_1         0x05
+#define PORT_CONTROL_1_MESSAGE_PORT    BIT(15)
 #define PORT_CONTROL_1_FID_11_4_MASK   (0xff << 0)
 #define PORT_BASE_VLAN         0x06
 #define PORT_BASE_VLAN_FID_3_0_MASK    (0xf << 12)
 #define PORT_CONTROL_2_DISCARD_UNTAGGED        BIT(8)
 #define PORT_CONTROL_2_MAP_DA          BIT(7)
 #define PORT_CONTROL_2_DEFAULT_FORWARD BIT(6)
-#define PORT_CONTROL_2_FORWARD_UNKNOWN BIT(6)
 #define PORT_CONTROL_2_EGRESS_MONITOR  BIT(5)
 #define PORT_CONTROL_2_INGRESS_MONITOR BIT(4)
 #define PORT_CONTROL_2_UPSTREAM_MASK   0x0f
 #define PORT_ATU_CONTROL       0x0c
 #define PORT_PRI_OVERRIDE      0x0d
 #define PORT_ETH_TYPE          0x0f
+#define PORT_ETH_TYPE_DEFAULT  0x9100
 #define PORT_IN_DISCARD_LO     0x10
 #define PORT_IN_DISCARD_HI     0x11
 #define PORT_IN_FILTERED       0x12
 #define GLOBAL2_WDOG_FORCE_IRQ                 BIT(0)
 #define GLOBAL2_QOS_WEIGHT     0x1c
 #define GLOBAL2_MISC           0x1d
+#define GLOBAL2_MISC_5_BIT_PORT        BIT(14)
 
 #define MV88E6XXX_N_FID                4096
 
+/* PVT limits for 4-bit port and 5-bit switch */
+#define MV88E6XXX_MAX_PVT_SWITCHES     32
+#define MV88E6XXX_MAX_PVT_PORTS                16
+
 enum mv88e6xxx_frame_mode {
        MV88E6XXX_FRAME_MODE_NORMAL,
        MV88E6XXX_FRAME_MODE_DSA,
@@ -525,8 +532,6 @@ enum mv88e6xxx_cap {
        MV88E6XXX_CAP_G2_MGMT_EN_0X,    /* (0x03) MGMT Enable Register 0x */
        MV88E6XXX_CAP_G2_IRL_CMD,       /* (0x09) Ingress Rate Command */
        MV88E6XXX_CAP_G2_IRL_DATA,      /* (0x0a) Ingress Rate Data */
-       MV88E6XXX_CAP_G2_PVT_ADDR,      /* (0x0b) Cross Chip Port VLAN Addr */
-       MV88E6XXX_CAP_G2_PVT_DATA,      /* (0x0c) Cross Chip Port VLAN Data */
        MV88E6XXX_CAP_G2_POT,           /* (0x0f) Priority Override Table */
 
        /* Per VLAN Spanning Tree Unit (STU).
@@ -551,7 +556,6 @@ enum mv88e6xxx_cap {
 
 #define MV88E6XXX_FLAG_SERDES          BIT_ULL(MV88E6XXX_CAP_SERDES)
 
-#define MV88E6XXX_FLAG_G1_ATU_FID      BIT_ULL(MV88E6XXX_CAP_G1_ATU_FID)
 #define MV88E6XXX_FLAG_G1_VTU_FID      BIT_ULL(MV88E6XXX_CAP_G1_VTU_FID)
 
 #define MV88E6XXX_FLAG_GLOBAL2         BIT_ULL(MV88E6XXX_CAP_GLOBAL2)
@@ -560,8 +564,6 @@ enum mv88e6xxx_cap {
 #define MV88E6XXX_FLAG_G2_MGMT_EN_0X   BIT_ULL(MV88E6XXX_CAP_G2_MGMT_EN_0X)
 #define MV88E6XXX_FLAG_G2_IRL_CMD      BIT_ULL(MV88E6XXX_CAP_G2_IRL_CMD)
 #define MV88E6XXX_FLAG_G2_IRL_DATA     BIT_ULL(MV88E6XXX_CAP_G2_IRL_DATA)
-#define MV88E6XXX_FLAG_G2_PVT_ADDR     BIT_ULL(MV88E6XXX_CAP_G2_PVT_ADDR)
-#define MV88E6XXX_FLAG_G2_PVT_DATA     BIT_ULL(MV88E6XXX_CAP_G2_PVT_DATA)
 #define MV88E6XXX_FLAG_G2_POT          BIT_ULL(MV88E6XXX_CAP_G2_POT)
 
 #define MV88E6XXX_FLAG_STU             BIT_ULL(MV88E6XXX_CAP_STU)
@@ -577,11 +579,6 @@ enum mv88e6xxx_cap {
        (MV88E6XXX_FLAG_SMI_CMD |       \
         MV88E6XXX_FLAG_SMI_DATA)
 
-/* Cross-chip Port VLAN Table */
-#define MV88E6XXX_FLAGS_PVT            \
-       (MV88E6XXX_FLAG_G2_PVT_ADDR |   \
-        MV88E6XXX_FLAG_G2_PVT_DATA)
-
 /* Fiber/SERDES Registers at SMI address F, page 1 */
 #define MV88E6XXX_FLAGS_SERDES         \
        (MV88E6XXX_FLAG_PHY_PAGE |      \
@@ -594,8 +591,7 @@ enum mv88e6xxx_cap {
         MV88E6XXX_FLAGS_MULTI_CHIP)
 
 #define MV88E6XXX_FLAGS_FAMILY_6097    \
-       (MV88E6XXX_FLAG_G1_ATU_FID |    \
-        MV88E6XXX_FLAG_G1_VTU_FID |    \
+       (MV88E6XXX_FLAG_G1_VTU_FID |    \
         MV88E6XXX_FLAG_GLOBAL2 |       \
         MV88E6XXX_FLAG_G2_INT |        \
         MV88E6XXX_FLAG_G2_MGMT_EN_2X | \
@@ -604,12 +600,10 @@ enum mv88e6xxx_cap {
         MV88E6XXX_FLAG_STU |           \
         MV88E6XXX_FLAG_VTU |           \
         MV88E6XXX_FLAGS_IRL |          \
-        MV88E6XXX_FLAGS_MULTI_CHIP |   \
-        MV88E6XXX_FLAGS_PVT)
+        MV88E6XXX_FLAGS_MULTI_CHIP)
 
 #define MV88E6XXX_FLAGS_FAMILY_6165    \
-       (MV88E6XXX_FLAG_G1_ATU_FID |    \
-        MV88E6XXX_FLAG_G1_VTU_FID |    \
+       (MV88E6XXX_FLAG_G1_VTU_FID |    \
         MV88E6XXX_FLAG_GLOBAL2 |       \
         MV88E6XXX_FLAG_G2_INT |        \
         MV88E6XXX_FLAG_G2_MGMT_EN_2X | \
@@ -618,8 +612,7 @@ enum mv88e6xxx_cap {
         MV88E6XXX_FLAG_STU |           \
         MV88E6XXX_FLAG_VTU |           \
         MV88E6XXX_FLAGS_IRL |          \
-        MV88E6XXX_FLAGS_MULTI_CHIP |   \
-        MV88E6XXX_FLAGS_PVT)
+        MV88E6XXX_FLAGS_MULTI_CHIP)
 
 #define MV88E6XXX_FLAGS_FAMILY_6185    \
        (MV88E6XXX_FLAG_GLOBAL2 |       \
@@ -636,12 +629,10 @@ enum mv88e6xxx_cap {
         MV88E6XXX_FLAG_G2_POT |        \
         MV88E6XXX_FLAG_VTU |           \
         MV88E6XXX_FLAGS_IRL |          \
-        MV88E6XXX_FLAGS_MULTI_CHIP |   \
-        MV88E6XXX_FLAGS_PVT)
+        MV88E6XXX_FLAGS_MULTI_CHIP)
 
 #define MV88E6XXX_FLAGS_FAMILY_6341    \
        (MV88E6XXX_FLAG_EEE |           \
-        MV88E6XXX_FLAG_G1_ATU_FID |    \
         MV88E6XXX_FLAG_G1_VTU_FID |    \
         MV88E6XXX_FLAG_GLOBAL2 |       \
         MV88E6XXX_FLAG_G2_INT |        \
@@ -650,12 +641,10 @@ enum mv88e6xxx_cap {
         MV88E6XXX_FLAG_VTU |           \
         MV88E6XXX_FLAGS_IRL |          \
         MV88E6XXX_FLAGS_MULTI_CHIP |   \
-        MV88E6XXX_FLAGS_PVT |          \
         MV88E6XXX_FLAGS_SERDES)
 
 #define MV88E6XXX_FLAGS_FAMILY_6351    \
-       (MV88E6XXX_FLAG_G1_ATU_FID |    \
-        MV88E6XXX_FLAG_G1_VTU_FID |    \
+       (MV88E6XXX_FLAG_G1_VTU_FID |    \
         MV88E6XXX_FLAG_GLOBAL2 |       \
         MV88E6XXX_FLAG_G2_INT |        \
         MV88E6XXX_FLAG_G2_MGMT_EN_2X | \
@@ -664,12 +653,10 @@ enum mv88e6xxx_cap {
         MV88E6XXX_FLAG_STU |           \
         MV88E6XXX_FLAG_VTU |           \
         MV88E6XXX_FLAGS_IRL |          \
-        MV88E6XXX_FLAGS_MULTI_CHIP |   \
-        MV88E6XXX_FLAGS_PVT)
+        MV88E6XXX_FLAGS_MULTI_CHIP)
 
 #define MV88E6XXX_FLAGS_FAMILY_6352    \
        (MV88E6XXX_FLAG_EEE |           \
-        MV88E6XXX_FLAG_G1_ATU_FID |    \
         MV88E6XXX_FLAG_G1_VTU_FID |    \
         MV88E6XXX_FLAG_GLOBAL2 |       \
         MV88E6XXX_FLAG_G2_INT |        \
@@ -680,7 +667,6 @@ enum mv88e6xxx_cap {
         MV88E6XXX_FLAG_VTU |           \
         MV88E6XXX_FLAGS_IRL |          \
         MV88E6XXX_FLAGS_MULTI_CHIP |   \
-        MV88E6XXX_FLAGS_PVT |          \
         MV88E6XXX_FLAGS_SERDES)
 
 #define MV88E6XXX_FLAGS_FAMILY_6390    \
@@ -690,8 +676,7 @@ enum mv88e6xxx_cap {
         MV88E6XXX_FLAG_STU |           \
         MV88E6XXX_FLAG_VTU |           \
         MV88E6XXX_FLAGS_IRL |          \
-        MV88E6XXX_FLAGS_MULTI_CHIP |   \
-        MV88E6XXX_FLAGS_PVT)
+        MV88E6XXX_FLAGS_MULTI_CHIP)
 
 struct mv88e6xxx_ops;
 
@@ -705,16 +690,21 @@ struct mv88e6xxx_info {
        unsigned int global1_addr;
        unsigned int age_time_coeff;
        unsigned int g1_irqs;
+       bool pvt;
        enum dsa_tag_protocol tag_protocol;
        unsigned long long flags;
+
+       /* Mask for FromPort and ToPort value of PortVec used in ATU Move
+        * operation. 0 means that the ATU Move operation is not supported.
+        */
+       u8 atu_move_port_mask;
        const struct mv88e6xxx_ops *ops;
 };
 
 struct mv88e6xxx_atu_entry {
-       u16     fid;
        u8      state;
        bool    trunk;
-       u16     portv_trunkid;
+       u16     portvec;
        u8      mac[ETH_ALEN];
 };
 
@@ -864,14 +854,16 @@ struct mv88e6xxx_ops {
 
        int (*port_set_frame_mode)(struct mv88e6xxx_chip *chip, int port,
                                   enum mv88e6xxx_frame_mode mode);
-       int (*port_set_egress_unknowns)(struct mv88e6xxx_chip *chip, int port,
-                                       bool on);
+       int (*port_set_egress_floods)(struct mv88e6xxx_chip *chip, int port,
+                                     bool unicast, bool multicast);
        int (*port_set_ether_type)(struct mv88e6xxx_chip *chip, int port,
                                   u16 etype);
        int (*port_jumbo_config)(struct mv88e6xxx_chip *chip, int port);
 
        int (*port_egress_rate_limiting)(struct mv88e6xxx_chip *chip, int port);
        int (*port_pause_config)(struct mv88e6xxx_chip *chip, int port);
+       int (*port_disable_learn_limit)(struct mv88e6xxx_chip *chip, int port);
+       int (*port_disable_pri_override)(struct mv88e6xxx_chip *chip, int port);
 
        /* CMODE control what PHY mode the MAC will use, eg. SGMII, RGMII, etc.
         * Some chips allow this to be configured on specific ports.
@@ -934,6 +926,11 @@ static inline bool mv88e6xxx_has(struct mv88e6xxx_chip *chip,
        return (chip->info->flags & flags) == flags;
 }
 
+static inline bool mv88e6xxx_has_pvt(struct mv88e6xxx_chip *chip)
+{
+       return chip->info->pvt;
+}
+
 static inline unsigned int mv88e6xxx_num_databases(struct mv88e6xxx_chip *chip)
 {
        return chip->info->num_databases;
@@ -944,6 +941,11 @@ static inline unsigned int mv88e6xxx_num_ports(struct mv88e6xxx_chip *chip)
        return chip->info->num_ports;
 }
 
+static inline u16 mv88e6xxx_port_mask(struct mv88e6xxx_chip *chip)
+{
+       return GENMASK(mv88e6xxx_num_ports(chip) - 1, 0);
+}
+
 int mv88e6xxx_read(struct mv88e6xxx_chip *chip, int addr, int reg, u16 *val);
 int mv88e6xxx_write(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val);
 int mv88e6xxx_update(struct mv88e6xxx_chip *chip, int addr, int reg,
index 8875784c4718feee699355f91c2092622417cce1..548a956637eec56b4aac9d7e4b2890e078f51285 100644 (file)
@@ -3,7 +3,8 @@
  *
  * Copyright (c) 2008 Marvell Semiconductor
  *
- * Copyright (c) 2016 Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+ * Copyright (c) 2016-2017 Savoir-faire Linux Inc.
+ *     Vivien Didelot <vivien.didelot@savoirfairelinux.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -497,8 +498,8 @@ int mv88e6351_port_set_frame_mode(struct mv88e6xxx_chip *chip, int port,
        return mv88e6xxx_port_write(chip, port, PORT_CONTROL, reg);
 }
 
-int mv88e6085_port_set_egress_unknowns(struct mv88e6xxx_chip *chip, int port,
-                                      bool on)
+static int mv88e6185_port_set_forward_unknown(struct mv88e6xxx_chip *chip,
+                                             int port, bool unicast)
 {
        int err;
        u16 reg;
@@ -507,7 +508,7 @@ int mv88e6085_port_set_egress_unknowns(struct mv88e6xxx_chip *chip, int port,
        if (err)
                return err;
 
-       if (on)
+       if (unicast)
                reg |= PORT_CONTROL_FORWARD_UNKNOWN;
        else
                reg &= ~PORT_CONTROL_FORWARD_UNKNOWN;
@@ -515,8 +516,8 @@ int mv88e6085_port_set_egress_unknowns(struct mv88e6xxx_chip *chip, int port,
        return mv88e6xxx_port_write(chip, port, PORT_CONTROL, reg);
 }
 
-int mv88e6351_port_set_egress_unknowns(struct mv88e6xxx_chip *chip, int port,
-                                      bool on)
+int mv88e6352_port_set_egress_floods(struct mv88e6xxx_chip *chip, int port,
+                                    bool unicast, bool multicast)
 {
        int err;
        u16 reg;
@@ -525,21 +526,45 @@ int mv88e6351_port_set_egress_unknowns(struct mv88e6xxx_chip *chip, int port,
        if (err)
                return err;
 
-       if (on)
-               reg |= PORT_CONTROL_EGRESS_ALL_UNKNOWN_DA;
+       reg &= ~PORT_CONTROL_EGRESS_FLOODS_MASK;
+
+       if (unicast && multicast)
+               reg |= PORT_CONTROL_EGRESS_FLOODS_ALL_UNKNOWN_DA;
+       else if (unicast)
+               reg |= PORT_CONTROL_EGRESS_FLOODS_NO_UNKNOWN_MC_DA;
+       else if (multicast)
+               reg |= PORT_CONTROL_EGRESS_FLOODS_NO_UNKNOWN_UC_DA;
        else
-               reg &= ~PORT_CONTROL_EGRESS_ALL_UNKNOWN_DA;
+               reg |= PORT_CONTROL_EGRESS_FLOODS_NO_UNKNOWN_DA;
 
        return mv88e6xxx_port_write(chip, port, PORT_CONTROL, reg);
 }
 
 /* Offset 0x05: Port Control 1 */
 
+int mv88e6xxx_port_set_message_port(struct mv88e6xxx_chip *chip, int port,
+                                   bool message_port)
+{
+       u16 val;
+       int err;
+
+       err = mv88e6xxx_port_read(chip, port, PORT_CONTROL_1, &val);
+       if (err)
+               return err;
+
+       if (message_port)
+               val |= PORT_CONTROL_1_MESSAGE_PORT;
+       else
+               val &= ~PORT_CONTROL_1_MESSAGE_PORT;
+
+       return mv88e6xxx_port_write(chip, port, PORT_CONTROL_1, val);
+}
+
 /* Offset 0x06: Port Based VLAN Map */
 
 int mv88e6xxx_port_set_vlan_map(struct mv88e6xxx_chip *chip, int port, u16 map)
 {
-       const u16 mask = GENMASK(mv88e6xxx_num_ports(chip) - 1, 0);
+       const u16 mask = mv88e6xxx_port_mask(chip);
        u16 reg;
        int err;
 
@@ -672,8 +697,8 @@ static const char * const mv88e6xxx_port_8021q_mode_names[] = {
        [PORT_CONTROL_2_8021Q_SECURE] = "Secure",
 };
 
-int mv88e6095_port_set_egress_unknowns(struct mv88e6xxx_chip *chip, int port,
-                                      bool on)
+static int mv88e6185_port_set_default_forward(struct mv88e6xxx_chip *chip,
+                                             int port, bool multicast)
 {
        int err;
        u16 reg;
@@ -682,14 +707,26 @@ int mv88e6095_port_set_egress_unknowns(struct mv88e6xxx_chip *chip, int port,
        if (err)
                return err;
 
-       if (on)
-               reg |= PORT_CONTROL_2_FORWARD_UNKNOWN;
+       if (multicast)
+               reg |= PORT_CONTROL_2_DEFAULT_FORWARD;
        else
-               reg &= ~PORT_CONTROL_2_FORWARD_UNKNOWN;
+               reg &= ~PORT_CONTROL_2_DEFAULT_FORWARD;
 
        return mv88e6xxx_port_write(chip, port, PORT_CONTROL_2, reg);
 }
 
+int mv88e6185_port_set_egress_floods(struct mv88e6xxx_chip *chip, int port,
+                                    bool unicast, bool multicast)
+{
+       int err;
+
+       err = mv88e6185_port_set_forward_unknown(chip, port, unicast);
+       if (err)
+               return err;
+
+       return mv88e6185_port_set_default_forward(chip, port, multicast);
+}
+
 int mv88e6095_port_set_upstream_port(struct mv88e6xxx_chip *chip, int port,
                                     int upstream_port)
 {
@@ -769,6 +806,20 @@ int mv88e6097_port_egress_rate_limiting(struct mv88e6xxx_chip *chip, int port)
        return mv88e6xxx_port_write(chip, port, PORT_RATE_CONTROL, 0x0001);
 }
 
+/* Offset 0x0C: Port ATU Control */
+
+int mv88e6xxx_port_disable_learn_limit(struct mv88e6xxx_chip *chip, int port)
+{
+       return mv88e6xxx_port_write(chip, port, PORT_ATU_CONTROL, 0);
+}
+
+/* Offset 0x0D: (Priority) Override Register */
+
+int mv88e6xxx_port_disable_pri_override(struct mv88e6xxx_chip *chip, int port)
+{
+       return mv88e6xxx_port_write(chip, port, PORT_PRI_OVERRIDE, 0);
+}
+
 /* Offset 0x0f: Port Ether type */
 
 int mv88e6351_port_set_ether_type(struct mv88e6xxx_chip *chip, int port,
index c83cbb3f449182317a21c9fb894a114d99a08ebb..86f40887b6d28d998316706fe36333c471ec1c25 100644 (file)
@@ -3,7 +3,8 @@
  *
  * Copyright (c) 2008 Marvell Semiconductor
  *
- * Copyright (c) 2016 Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+ * Copyright (c) 2016-2017 Savoir-faire Linux Inc.
+ *     Vivien Didelot <vivien.didelot@savoirfairelinux.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -56,14 +57,14 @@ int mv88e6085_port_set_frame_mode(struct mv88e6xxx_chip *chip, int port,
                                  enum mv88e6xxx_frame_mode mode);
 int mv88e6351_port_set_frame_mode(struct mv88e6xxx_chip *chip, int port,
                                  enum mv88e6xxx_frame_mode mode);
-int mv88e6085_port_set_egress_unknowns(struct mv88e6xxx_chip *chip, int port,
-                                      bool on);
-int mv88e6095_port_set_egress_unknowns(struct mv88e6xxx_chip *chip, int port,
-                                      bool on);
-int mv88e6351_port_set_egress_unknowns(struct mv88e6xxx_chip *chip, int port,
-                                      bool on);
+int mv88e6185_port_set_egress_floods(struct mv88e6xxx_chip *chip, int port,
+                                    bool unicast, bool multicast);
+int mv88e6352_port_set_egress_floods(struct mv88e6xxx_chip *chip, int port,
+                                    bool unicast, bool multicast);
 int mv88e6351_port_set_ether_type(struct mv88e6xxx_chip *chip, int port,
                                  u16 etype);
+int mv88e6xxx_port_set_message_port(struct mv88e6xxx_chip *chip, int port,
+                                   bool message_port);
 int mv88e6165_port_jumbo_config(struct mv88e6xxx_chip *chip, int port);
 int mv88e6095_port_egress_rate_limiting(struct mv88e6xxx_chip *chip, int port);
 int mv88e6097_port_egress_rate_limiting(struct mv88e6xxx_chip *chip, int port);
@@ -75,4 +76,8 @@ int mv88e6xxx_port_get_cmode(struct mv88e6xxx_chip *chip, int port, u8 *cmode);
 int mv88e6xxx_port_set_map_da(struct mv88e6xxx_chip *chip, int port);
 int mv88e6095_port_set_upstream_port(struct mv88e6xxx_chip *chip, int port,
                                     int upstream_port);
+
+int mv88e6xxx_port_disable_learn_limit(struct mv88e6xxx_chip *chip, int port);
+int mv88e6xxx_port_disable_pri_override(struct mv88e6xxx_chip *chip, int port);
+
 #endif /* _MV88E6XXX_PORT_H */
index 2c80611b94aef3c5ce9d0f98e8d92e497542123a..149244aac20aa765551b93ca25d78018b28f17f9 100644 (file)
@@ -35,6 +35,7 @@
 #include <linux/init.h>
 #include <linux/moduleparam.h>
 #include <linux/rtnetlink.h>
+#include <linux/net_tstamp.h>
 #include <net/rtnetlink.h>
 #include <linux/u64_stats_sync.h>
 
@@ -125,6 +126,7 @@ static netdev_tx_t dummy_xmit(struct sk_buff *skb, struct net_device *dev)
        dstats->tx_bytes += skb->len;
        u64_stats_update_end(&dstats->syncp);
 
+       skb_tx_timestamp(skb);
        dev_kfree_skb(skb);
        return NETDEV_TX_OK;
 }
@@ -304,8 +306,21 @@ static void dummy_get_drvinfo(struct net_device *dev,
        strlcpy(info->version, DRV_VERSION, sizeof(info->version));
 }
 
+static int dummy_get_ts_info(struct net_device *dev,
+                             struct ethtool_ts_info *ts_info)
+{
+       ts_info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE |
+                                  SOF_TIMESTAMPING_RX_SOFTWARE |
+                                  SOF_TIMESTAMPING_SOFTWARE;
+
+       ts_info->phc_index = -1;
+
+       return 0;
+};
+
 static const struct ethtool_ops dummy_ethtool_ops = {
        .get_drvinfo            = dummy_get_drvinfo,
+       .get_ts_info            = dummy_get_ts_info,
 };
 
 static void dummy_free_netdev(struct net_device *dev)
index 8c08f9deef9268e4cacc939a2534110a42be6c3b..edae15ac0e982e7a1678627253dc5bcd696737bc 100644 (file)
@@ -180,5 +180,6 @@ source "drivers/net/ethernet/via/Kconfig"
 source "drivers/net/ethernet/wiznet/Kconfig"
 source "drivers/net/ethernet/xilinx/Kconfig"
 source "drivers/net/ethernet/xircom/Kconfig"
+source "drivers/net/ethernet/synopsys/Kconfig"
 
 endif # ETHERNET
index 26dce5bf2c18c966c5b378cf79b385aa726f9b4f..bf7f4502cabcf2b40f735d3a928a475f6d03c061 100644 (file)
@@ -91,3 +91,4 @@ obj-$(CONFIG_NET_VENDOR_VIA) += via/
 obj-$(CONFIG_NET_VENDOR_WIZNET) += wiznet/
 obj-$(CONFIG_NET_VENDOR_XILINX) += xilinx/
 obj-$(CONFIG_NET_VENDOR_XIRCOM) += xircom/
+obj-$(CONFIG_NET_VENDOR_SYNOPSYS) += synopsys/
index 8c3b56198e4b731125648284086850eab1fdf0f2..4ad5b9be3f84c52b9e04f6919431c198a6d5a011 100644 (file)
@@ -68,13 +68,6 @@ struct net_dma_desc_tx {
 };
 
 struct bfin_mac_local {
-       /*
-        * these are things that the kernel wants me to keep, so users
-        * can find out semi-useless statistics of how well the card is
-        * performing
-        */
-       struct net_device_stats stats;
-
        spinlock_t lock;
 
        int wol;                /* Wake On Lan */
index 9f7422ada704e9484b79d336ece46a0547667abb..d8e133ced7b8a026682ee5f60396e380b4fa5ee6 100644 (file)
@@ -34,6 +34,7 @@
 #include <linux/crc32.h>
 #include <linux/mii.h>
 #include <linux/of_device.h>
+#include <linux/of_net.h>
 #include <linux/of_platform.h>
 #include <linux/slab.h>
 #include <asm/cacheflush.h>
@@ -1454,11 +1455,10 @@ static int greth_of_probe(struct platform_device *ofdev)
                        break;
        }
        if (i == 6) {
-               const unsigned char *addr;
-               int len;
-               addr = of_get_property(ofdev->dev.of_node, "local-mac-address",
-                                       &len);
-               if (addr != NULL && len == 6) {
+               const u8 *addr;
+
+               addr = of_get_mac_address(ofdev->dev.of_node);
+               if (addr) {
                        for (i = 0; i < 6; i++)
                                macaddr[i] = (unsigned int) addr[i];
                } else {
index 86f1626816ffa62212cbb703e12aae8bdf14fec4..127adbeefb105cc031f3782b534d175f29fb7143 100644 (file)
 #define XP_ECC_CNT1_DESC_DED_WIDTH             8
 #define XP_ECC_CNT1_DESC_SEC_INDEX             0
 #define XP_ECC_CNT1_DESC_SEC_WIDTH             8
-#define XP_ECC_IER_DESC_DED_INDEX              0
+#define XP_ECC_IER_DESC_DED_INDEX              5
 #define XP_ECC_IER_DESC_DED_WIDTH              1
-#define XP_ECC_IER_DESC_SEC_INDEX              1
+#define XP_ECC_IER_DESC_SEC_INDEX              4
 #define XP_ECC_IER_DESC_SEC_WIDTH              1
-#define XP_ECC_IER_RX_DED_INDEX                        2
+#define XP_ECC_IER_RX_DED_INDEX                        3
 #define XP_ECC_IER_RX_DED_WIDTH                        1
-#define XP_ECC_IER_RX_SEC_INDEX                        3
+#define XP_ECC_IER_RX_SEC_INDEX                        2
 #define XP_ECC_IER_RX_SEC_WIDTH                        1
-#define XP_ECC_IER_TX_DED_INDEX                        4
+#define XP_ECC_IER_TX_DED_INDEX                        1
 #define XP_ECC_IER_TX_DED_WIDTH                        1
-#define XP_ECC_IER_TX_SEC_INDEX                        5
+#define XP_ECC_IER_TX_SEC_INDEX                        0
 #define XP_ECC_IER_TX_SEC_WIDTH                        1
-#define XP_ECC_ISR_DESC_DED_INDEX              0
+#define XP_ECC_ISR_DESC_DED_INDEX              5
 #define XP_ECC_ISR_DESC_DED_WIDTH              1
-#define XP_ECC_ISR_DESC_SEC_INDEX              1
+#define XP_ECC_ISR_DESC_SEC_INDEX              4
 #define XP_ECC_ISR_DESC_SEC_WIDTH              1
-#define XP_ECC_ISR_RX_DED_INDEX                        2
+#define XP_ECC_ISR_RX_DED_INDEX                        3
 #define XP_ECC_ISR_RX_DED_WIDTH                        1
-#define XP_ECC_ISR_RX_SEC_INDEX                        3
+#define XP_ECC_ISR_RX_SEC_INDEX                        2
 #define XP_ECC_ISR_RX_SEC_WIDTH                        1
-#define XP_ECC_ISR_TX_DED_INDEX                        4
+#define XP_ECC_ISR_TX_DED_INDEX                        1
 #define XP_ECC_ISR_TX_DED_WIDTH                        1
-#define XP_ECC_ISR_TX_SEC_INDEX                        5
+#define XP_ECC_ISR_TX_SEC_INDEX                        0
 #define XP_ECC_ISR_TX_SEC_WIDTH                        1
 #define XP_I2C_MUTEX_BUSY_INDEX                        31
 #define XP_I2C_MUTEX_BUSY_WIDTH                        1
index a713abd9d03e63aea8aab96c4f5569d1d6b3e44d..c772420fa41caf57907610bce4528da9c70be63e 100644 (file)
 #include <linux/spinlock.h>
 #include <linux/tcp.h>
 #include <linux/if_vlan.h>
+#include <linux/interrupt.h>
 #include <net/busy_poll.h>
 #include <linux/clk.h>
 #include <linux/if_ether.h>
@@ -1854,7 +1855,8 @@ static int xgbe_setup_tc(struct net_device *netdev, u32 handle, __be16 proto,
        if (tc_to_netdev->type != TC_SETUP_MQPRIO)
                return -EINVAL;
 
-       tc = tc_to_netdev->tc;
+       tc_to_netdev->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+       tc = tc_to_netdev->mqprio->num_tc;
 
        if (tc > pdata->hw_feat.tc_cnt)
                return -EINVAL;
index 0c7088a426e90ddaa32c09795506bc8805a377f6..417bdb5982a93ca252962f7fcd6141a2f53fca37 100644 (file)
  */
 
 #include <linux/module.h>
+#include <linux/interrupt.h>
 #include <linux/kmod.h>
 #include <linux/delay.h>
 #include <linux/completion.h>
index 4c5b90eea4af2e389decec1d80b31e7134cee140..b672d92495397bb3132c90e25db17872c4589fa6 100644 (file)
  *     THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/kmod.h>
 #include <linux/mdio.h>
index ec63d706d464710af057591df62cfd0c4500b14b..59efe5b145ddf562e11fda61acad07b2da82548e 100644 (file)
@@ -1 +1,2 @@
 source "drivers/net/ethernet/apm/xgene/Kconfig"
+source "drivers/net/ethernet/apm/xgene-v2/Kconfig"
index 65ce32ad1b2cc66a2c017cf8a06f8e2099bbaa9e..946b2a4c882d3cb627849ac25817e9d4e45591de 100644 (file)
@@ -3,3 +3,4 @@
 #
 
 obj-$(CONFIG_NET_XGENE) += xgene/
+obj-$(CONFIG_NET_XGENE_V2) += xgene-v2/
diff --git a/drivers/net/ethernet/apm/xgene-v2/Kconfig b/drivers/net/ethernet/apm/xgene-v2/Kconfig
new file mode 100644 (file)
index 0000000..1205861
--- /dev/null
@@ -0,0 +1,11 @@
+config NET_XGENE_V2
+       tristate "APM X-Gene SoC Ethernet-v2 Driver"
+       depends on HAS_DMA
+       depends on ARCH_XGENE || COMPILE_TEST
+       help
+         This is the Ethernet driver for the on-chip ethernet interface
+         which uses a linked list of DMA descriptor architecture (v2) for
+         APM X-Gene SoCs.
+
+         To compile this driver as a module, choose M here. This module will
+         be called xgene-enet-v2.
diff --git a/drivers/net/ethernet/apm/xgene-v2/Makefile b/drivers/net/ethernet/apm/xgene-v2/Makefile
new file mode 100644 (file)
index 0000000..f16a2b3
--- /dev/null
@@ -0,0 +1,6 @@
+#
+# Makefile for APM X-Gene Ethernet v2 driver
+#
+
+xgene-enet-v2-objs := main.o mac.o enet.o ring.o mdio.o ethtool.o
+obj-$(CONFIG_NET_XGENE_V2) += xgene-enet-v2.o
diff --git a/drivers/net/ethernet/apm/xgene-v2/enet.c b/drivers/net/ethernet/apm/xgene-v2/enet.c
new file mode 100644 (file)
index 0000000..5998da0
--- /dev/null
@@ -0,0 +1,83 @@
+/*
+ * Applied Micro X-Gene SoC Ethernet v2 Driver
+ *
+ * Copyright (c) 2017, Applied Micro Circuits Corporation
+ * Author(s): Iyappan Subramanian <isubramanian@apm.com>
+ *           Keyur Chudgar <kchudgar@apm.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "main.h"
+
+void xge_wr_csr(struct xge_pdata *pdata, u32 offset, u32 val)
+{
+       void __iomem *addr = pdata->resources.base_addr + offset;
+
+       iowrite32(val, addr);
+}
+
+u32 xge_rd_csr(struct xge_pdata *pdata, u32 offset)
+{
+       void __iomem *addr = pdata->resources.base_addr + offset;
+
+       return ioread32(addr);
+}
+
+int xge_port_reset(struct net_device *ndev)
+{
+       struct xge_pdata *pdata = netdev_priv(ndev);
+       struct device *dev = &pdata->pdev->dev;
+       u32 data, wait = 10;
+
+       xge_wr_csr(pdata, ENET_CLKEN, 0x3);
+       xge_wr_csr(pdata, ENET_SRST, 0xf);
+       xge_wr_csr(pdata, ENET_SRST, 0);
+       xge_wr_csr(pdata, CFG_MEM_RAM_SHUTDOWN, 1);
+       xge_wr_csr(pdata, CFG_MEM_RAM_SHUTDOWN, 0);
+
+       do {
+               usleep_range(100, 110);
+               data = xge_rd_csr(pdata, BLOCK_MEM_RDY);
+       } while (data != MEM_RDY && wait--);
+
+       if (data != MEM_RDY) {
+               dev_err(dev, "ECC init failed: %x\n", data);
+               return -ETIMEDOUT;
+       }
+
+       xge_wr_csr(pdata, ENET_SHIM, DEVM_ARAUX_COH | DEVM_AWAUX_COH);
+
+       return 0;
+}
+
+static void xge_traffic_resume(struct net_device *ndev)
+{
+       struct xge_pdata *pdata = netdev_priv(ndev);
+
+       xge_wr_csr(pdata, CFG_FORCE_LINK_STATUS_EN, 1);
+       xge_wr_csr(pdata, FORCE_LINK_STATUS, 1);
+
+       xge_wr_csr(pdata, CFG_LINK_AGGR_RESUME, 1);
+       xge_wr_csr(pdata, RX_DV_GATE_REG, 1);
+}
+
+void xge_port_init(struct net_device *ndev)
+{
+       struct xge_pdata *pdata = netdev_priv(ndev);
+
+       pdata->phy_speed = SPEED_1000;
+       xge_mac_init(pdata);
+       xge_traffic_resume(ndev);
+}
diff --git a/drivers/net/ethernet/apm/xgene-v2/enet.h b/drivers/net/ethernet/apm/xgene-v2/enet.h
new file mode 100644 (file)
index 0000000..3fd36dc
--- /dev/null
@@ -0,0 +1,45 @@
+/*
+ * Applied Micro X-Gene SoC Ethernet v2 Driver
+ *
+ * Copyright (c) 2017, Applied Micro Circuits Corporation
+ * Author(s): Iyappan Subramanian <isubramanian@apm.com>
+ *           Keyur Chudgar <kchudgar@apm.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __XGENE_ENET_V2_ENET_H__
+#define __XGENE_ENET_V2_ENET_H__
+
+#define ENET_CLKEN             0xc008
+#define ENET_SRST              0xc000
+#define ENET_SHIM              0xc010
+#define CFG_MEM_RAM_SHUTDOWN   0xd070
+#define BLOCK_MEM_RDY          0xd074
+
+#define MEM_RDY                        0xffffffff
+#define DEVM_ARAUX_COH         BIT(19)
+#define DEVM_AWAUX_COH         BIT(3)
+
+#define CFG_FORCE_LINK_STATUS_EN       0x229c
+#define FORCE_LINK_STATUS              0x22a0
+#define CFG_LINK_AGGR_RESUME           0x27c8
+#define RX_DV_GATE_REG                 0x2dfc
+
+void xge_wr_csr(struct xge_pdata *pdata, u32 offset, u32 val);
+u32 xge_rd_csr(struct xge_pdata *pdata, u32 offset);
+int xge_port_reset(struct net_device *ndev);
+void xge_port_init(struct net_device *ndev);
+
+#endif  /* __XGENE_ENET_V2_ENET__H__ */
diff --git a/drivers/net/ethernet/apm/xgene-v2/ethtool.c b/drivers/net/ethernet/apm/xgene-v2/ethtool.c
new file mode 100644 (file)
index 0000000..0c426f5
--- /dev/null
@@ -0,0 +1,121 @@
+/*
+ * Applied Micro X-Gene SoC Ethernet v2 Driver
+ *
+ * Copyright (c) 2017, Applied Micro Circuits Corporation
+ * Author(s): Iyappan Subramanian <isubramanian@apm.com>
+ *           Keyur Chudgar <kchudgar@apm.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "main.h"
+
+struct xge_gstrings_stats {
+       char name[ETH_GSTRING_LEN];
+       int offset;
+};
+
+#define XGE_STAT(m)            { #m, offsetof(struct xge_pdata, stats.m) }
+
+static const struct xge_gstrings_stats gstrings_stats[] = {
+       XGE_STAT(rx_packets),
+       XGE_STAT(tx_packets),
+       XGE_STAT(rx_bytes),
+       XGE_STAT(tx_bytes),
+       XGE_STAT(rx_errors)
+};
+
+#define XGE_STATS_LEN          ARRAY_SIZE(gstrings_stats)
+
+static void xge_get_drvinfo(struct net_device *ndev,
+                           struct ethtool_drvinfo *info)
+{
+       struct xge_pdata *pdata = netdev_priv(ndev);
+       struct platform_device *pdev = pdata->pdev;
+
+       strcpy(info->driver, "xgene-enet-v2");
+       strcpy(info->version, XGENE_ENET_V2_VERSION);
+       snprintf(info->fw_version, ETHTOOL_FWVERS_LEN, "N/A");
+       sprintf(info->bus_info, "%s", pdev->name);
+}
+
+static void xge_get_strings(struct net_device *ndev, u32 stringset, u8 *data)
+{
+       u8 *p = data;
+       int i;
+
+       if (stringset != ETH_SS_STATS)
+               return;
+
+       for (i = 0; i < XGE_STATS_LEN; i++) {
+               memcpy(p, gstrings_stats[i].name, ETH_GSTRING_LEN);
+               p += ETH_GSTRING_LEN;
+       }
+}
+
+static int xge_get_sset_count(struct net_device *ndev, int sset)
+{
+       if (sset != ETH_SS_STATS)
+               return -EINVAL;
+
+       return XGE_STATS_LEN;
+}
+
+static void xge_get_ethtool_stats(struct net_device *ndev,
+                                 struct ethtool_stats *dummy,
+                                 u64 *data)
+{
+       void *pdata = netdev_priv(ndev);
+       int i;
+
+       for (i = 0; i < XGE_STATS_LEN; i++)
+               *data++ = *(u64 *)(pdata + gstrings_stats[i].offset);
+}
+
+static int xge_get_link_ksettings(struct net_device *ndev,
+                                 struct ethtool_link_ksettings *cmd)
+{
+       struct phy_device *phydev = ndev->phydev;
+
+       if (!phydev)
+               return -ENODEV;
+
+       return phy_ethtool_ksettings_get(phydev, cmd);
+}
+
+static int xge_set_link_ksettings(struct net_device *ndev,
+                                 const struct ethtool_link_ksettings *cmd)
+{
+       struct phy_device *phydev = ndev->phydev;
+
+       if (!phydev)
+               return -ENODEV;
+
+       return phy_ethtool_ksettings_set(phydev, cmd);
+}
+
+static const struct ethtool_ops xge_ethtool_ops = {
+       .get_drvinfo = xge_get_drvinfo,
+       .get_link = ethtool_op_get_link,
+       .get_strings = xge_get_strings,
+       .get_sset_count = xge_get_sset_count,
+       .get_ethtool_stats = xge_get_ethtool_stats,
+       .get_link_ksettings = xge_get_link_ksettings,
+       .set_link_ksettings = xge_set_link_ksettings,
+};
+
+void xge_set_ethtool_ops(struct net_device *ndev)
+{
+       ndev->ethtool_ops = &xge_ethtool_ops;
+}
diff --git a/drivers/net/ethernet/apm/xgene-v2/mac.c b/drivers/net/ethernet/apm/xgene-v2/mac.c
new file mode 100644 (file)
index 0000000..ee431e3
--- /dev/null
@@ -0,0 +1,116 @@
+/*
+ * Applied Micro X-Gene SoC Ethernet v2 Driver
+ *
+ * Copyright (c) 2017, Applied Micro Circuits Corporation
+ * Author(s): Iyappan Subramanian <isubramanian@apm.com>
+ *           Keyur Chudgar <kchudgar@apm.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "main.h"
+
+void xge_mac_reset(struct xge_pdata *pdata)
+{
+       xge_wr_csr(pdata, MAC_CONFIG_1, SOFT_RESET);
+       xge_wr_csr(pdata, MAC_CONFIG_1, 0);
+}
+
+void xge_mac_set_speed(struct xge_pdata *pdata)
+{
+       u32 icm0, icm2, ecm0, mc2;
+       u32 intf_ctrl, rgmii;
+
+       icm0 = xge_rd_csr(pdata, ICM_CONFIG0_REG_0);
+       icm2 = xge_rd_csr(pdata, ICM_CONFIG2_REG_0);
+       ecm0 = xge_rd_csr(pdata, ECM_CONFIG0_REG_0);
+       rgmii = xge_rd_csr(pdata, RGMII_REG_0);
+       mc2 = xge_rd_csr(pdata, MAC_CONFIG_2);
+       intf_ctrl = xge_rd_csr(pdata, INTERFACE_CONTROL);
+       icm2 |= CFG_WAITASYNCRD_EN;
+
+       switch (pdata->phy_speed) {
+       case SPEED_10:
+               SET_REG_BITS(&mc2, INTF_MODE, 1);
+               SET_REG_BITS(&intf_ctrl, HD_MODE, 0);
+               SET_REG_BITS(&icm0, CFG_MACMODE, 0);
+               SET_REG_BITS(&icm2, CFG_WAITASYNCRD, 500);
+               SET_REG_BIT(&rgmii, CFG_SPEED_125, 0);
+               break;
+       case SPEED_100:
+               SET_REG_BITS(&mc2, INTF_MODE, 1);
+               SET_REG_BITS(&intf_ctrl, HD_MODE, 1);
+               SET_REG_BITS(&icm0, CFG_MACMODE, 1);
+               SET_REG_BITS(&icm2, CFG_WAITASYNCRD, 80);
+               SET_REG_BIT(&rgmii, CFG_SPEED_125, 0);
+               break;
+       default:
+               SET_REG_BITS(&mc2, INTF_MODE, 2);
+               SET_REG_BITS(&intf_ctrl, HD_MODE, 2);
+               SET_REG_BITS(&icm0, CFG_MACMODE, 2);
+               SET_REG_BITS(&icm2, CFG_WAITASYNCRD, 16);
+               SET_REG_BIT(&rgmii, CFG_SPEED_125, 1);
+               break;
+       }
+
+       mc2 |= FULL_DUPLEX | CRC_EN | PAD_CRC;
+       SET_REG_BITS(&ecm0, CFG_WFIFOFULLTHR, 0x32);
+
+       xge_wr_csr(pdata, MAC_CONFIG_2, mc2);
+       xge_wr_csr(pdata, INTERFACE_CONTROL, intf_ctrl);
+       xge_wr_csr(pdata, RGMII_REG_0, rgmii);
+       xge_wr_csr(pdata, ICM_CONFIG0_REG_0, icm0);
+       xge_wr_csr(pdata, ICM_CONFIG2_REG_0, icm2);
+       xge_wr_csr(pdata, ECM_CONFIG0_REG_0, ecm0);
+}
+
+void xge_mac_set_station_addr(struct xge_pdata *pdata)
+{
+       u8 *dev_addr = pdata->ndev->dev_addr;
+       u32 addr0, addr1;
+
+       addr0 = (dev_addr[3] << 24) | (dev_addr[2] << 16) |
+               (dev_addr[1] << 8) | dev_addr[0];
+       addr1 = (dev_addr[5] << 24) | (dev_addr[4] << 16);
+
+       xge_wr_csr(pdata, STATION_ADDR0, addr0);
+       xge_wr_csr(pdata, STATION_ADDR1, addr1);
+}
+
+void xge_mac_init(struct xge_pdata *pdata)
+{
+       xge_mac_reset(pdata);
+       xge_mac_set_speed(pdata);
+       xge_mac_set_station_addr(pdata);
+}
+
+void xge_mac_enable(struct xge_pdata *pdata)
+{
+       u32 data;
+
+       data = xge_rd_csr(pdata, MAC_CONFIG_1);
+       data |= TX_EN | RX_EN;
+       xge_wr_csr(pdata, MAC_CONFIG_1, data);
+
+       data = xge_rd_csr(pdata, MAC_CONFIG_1);
+}
+
+void xge_mac_disable(struct xge_pdata *pdata)
+{
+       u32 data;
+
+       data = xge_rd_csr(pdata, MAC_CONFIG_1);
+       data &= ~(TX_EN | RX_EN);
+       xge_wr_csr(pdata, MAC_CONFIG_1, data);
+}
diff --git a/drivers/net/ethernet/apm/xgene-v2/mac.h b/drivers/net/ethernet/apm/xgene-v2/mac.h
new file mode 100644 (file)
index 0000000..18a9c9d
--- /dev/null
@@ -0,0 +1,110 @@
+/*
+ * Applied Micro X-Gene SoC Ethernet v2 Driver
+ *
+ * Copyright (c) 2017, Applied Micro Circuits Corporation
+ * Author(s): Iyappan Subramanian <isubramanian@apm.com>
+ *           Keyur Chudgar <kchudgar@apm.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __XGENE_ENET_V2_MAC_H__
+#define __XGENE_ENET_V2_MAC_H__
+
+/* Register offsets */
+#define MAC_CONFIG_1           0xa000
+#define MAC_CONFIG_2           0xa004
+#define MII_MGMT_CONFIG                0xa020
+#define MII_MGMT_COMMAND       0xa024
+#define MII_MGMT_ADDRESS       0xa028
+#define MII_MGMT_CONTROL       0xa02c
+#define MII_MGMT_STATUS                0xa030
+#define MII_MGMT_INDICATORS    0xa034
+#define INTERFACE_CONTROL      0xa038
+#define STATION_ADDR0          0xa040
+#define STATION_ADDR1          0xa044
+#define RBYT                   0xa09c
+#define RPKT                   0xa0a0
+#define RFCS                   0xa0a4
+
+#define RGMII_REG_0            0x27e0
+#define ICM_CONFIG0_REG_0      0x2c00
+#define ICM_CONFIG2_REG_0      0x2c08
+#define ECM_CONFIG0_REG_0      0x2d00
+
+/* Register fields */
+#define SOFT_RESET             BIT(31)
+#define TX_EN                  BIT(0)
+#define RX_EN                  BIT(2)
+#define PAD_CRC                        BIT(2)
+#define CRC_EN                 BIT(1)
+#define FULL_DUPLEX            BIT(0)
+
+#define INTF_MODE_POS          8
+#define INTF_MODE_LEN          2
+#define HD_MODE_POS            25
+#define HD_MODE_LEN            2
+#define CFG_MACMODE_POS                18
+#define CFG_MACMODE_LEN                2
+#define CFG_WAITASYNCRD_POS    0
+#define CFG_WAITASYNCRD_LEN    16
+#define CFG_SPEED_125_POS      24
+#define CFG_WFIFOFULLTHR_POS   0
+#define CFG_WFIFOFULLTHR_LEN   7
+#define MGMT_CLOCK_SEL_POS     0
+#define MGMT_CLOCK_SEL_LEN     3
+#define PHY_ADDR_POS           8
+#define PHY_ADDR_LEN           5
+#define REG_ADDR_POS           0
+#define REG_ADDR_LEN           5
+#define MII_MGMT_BUSY          BIT(0)
+#define MII_READ_CYCLE         BIT(0)
+#define CFG_WAITASYNCRD_EN     BIT(16)
+
+static inline void xgene_set_reg_bits(u32 *var, int pos, int len, u32 val)
+{
+       u32 mask = GENMASK(pos + len, pos);
+
+       *var &= ~mask;
+       *var |= ((val << pos) & mask);
+}
+
+static inline u32 xgene_get_reg_bits(u32 var, int pos, int len)
+{
+       u32 mask = GENMASK(pos + len, pos);
+
+       return (var & mask) >> pos;
+}
+
+#define SET_REG_BITS(var, field, val)                                  \
+       xgene_set_reg_bits(var, field ## _POS, field ## _LEN, val)
+
+#define SET_REG_BIT(var, field, val)                                   \
+       xgene_set_reg_bits(var, field ## _POS, 1, val)
+
+#define GET_REG_BITS(var, field)                                       \
+       xgene_get_reg_bits(var, field ## _POS, field ## _LEN)
+
+#define GET_REG_BIT(var, field)                ((var) & (field))
+
+struct xge_pdata;
+
+void xge_mac_reset(struct xge_pdata *pdata);
+void xge_mac_set_speed(struct xge_pdata *pdata);
+void xge_mac_enable(struct xge_pdata *pdata);
+void xge_mac_disable(struct xge_pdata *pdata);
+void xge_mac_init(struct xge_pdata *pdata);
+void xge_mac_set_station_addr(struct xge_pdata *pdata);
+
+#endif /* __XGENE_ENET_V2_MAC_H__ */
diff --git a/drivers/net/ethernet/apm/xgene-v2/main.c b/drivers/net/ethernet/apm/xgene-v2/main.c
new file mode 100644 (file)
index 0000000..0f2ad50
--- /dev/null
@@ -0,0 +1,759 @@
+/*
+ * Applied Micro X-Gene SoC Ethernet v2 Driver
+ *
+ * Copyright (c) 2017, Applied Micro Circuits Corporation
+ * Author(s): Iyappan Subramanian <isubramanian@apm.com>
+ *           Keyur Chudgar <kchudgar@apm.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "main.h"
+
+static const struct acpi_device_id xge_acpi_match[];
+
+static int xge_get_resources(struct xge_pdata *pdata)
+{
+       struct platform_device *pdev;
+       struct net_device *ndev;
+       int phy_mode, ret = 0;
+       struct resource *res;
+       struct device *dev;
+
+       pdev = pdata->pdev;
+       dev = &pdev->dev;
+       ndev = pdata->ndev;
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (!res) {
+               dev_err(dev, "Resource enet_csr not defined\n");
+               return -ENODEV;
+       }
+
+       pdata->resources.base_addr = devm_ioremap(dev, res->start,
+                                                 resource_size(res));
+       if (!pdata->resources.base_addr) {
+               dev_err(dev, "Unable to retrieve ENET Port CSR region\n");
+               return -ENOMEM;
+       }
+
+       if (!device_get_mac_address(dev, ndev->dev_addr, ETH_ALEN))
+               eth_hw_addr_random(ndev);
+
+       memcpy(ndev->perm_addr, ndev->dev_addr, ndev->addr_len);
+
+       phy_mode = device_get_phy_mode(dev);
+       if (phy_mode < 0) {
+               dev_err(dev, "Unable to get phy-connection-type\n");
+               return phy_mode;
+       }
+       pdata->resources.phy_mode = phy_mode;
+
+       if (pdata->resources.phy_mode != PHY_INTERFACE_MODE_RGMII) {
+               dev_err(dev, "Incorrect phy-connection-type specified\n");
+               return -ENODEV;
+       }
+
+       ret = platform_get_irq(pdev, 0);
+       if (ret < 0) {
+               dev_err(dev, "Unable to get irq\n");
+               return ret;
+       }
+       pdata->resources.irq = ret;
+
+       return 0;
+}
+
+static int xge_refill_buffers(struct net_device *ndev, u32 nbuf)
+{
+       struct xge_pdata *pdata = netdev_priv(ndev);
+       struct xge_desc_ring *ring = pdata->rx_ring;
+       const u8 slots = XGENE_ENET_NUM_DESC - 1;
+       struct device *dev = &pdata->pdev->dev;
+       struct xge_raw_desc *raw_desc;
+       u64 addr_lo, addr_hi;
+       u8 tail = ring->tail;
+       struct sk_buff *skb;
+       dma_addr_t dma_addr;
+       u16 len;
+       int i;
+
+       for (i = 0; i < nbuf; i++) {
+               raw_desc = &ring->raw_desc[tail];
+
+               len = XGENE_ENET_STD_MTU;
+               skb = netdev_alloc_skb(ndev, len);
+               if (unlikely(!skb))
+                       return -ENOMEM;
+
+               dma_addr = dma_map_single(dev, skb->data, len, DMA_FROM_DEVICE);
+               if (dma_mapping_error(dev, dma_addr)) {
+                       netdev_err(ndev, "DMA mapping error\n");
+                       dev_kfree_skb_any(skb);
+                       return -EINVAL;
+               }
+
+               ring->pkt_info[tail].skb = skb;
+               ring->pkt_info[tail].dma_addr = dma_addr;
+
+               addr_hi = GET_BITS(NEXT_DESC_ADDRH, le64_to_cpu(raw_desc->m1));
+               addr_lo = GET_BITS(NEXT_DESC_ADDRL, le64_to_cpu(raw_desc->m1));
+               raw_desc->m1 = cpu_to_le64(SET_BITS(NEXT_DESC_ADDRL, addr_lo) |
+                                          SET_BITS(NEXT_DESC_ADDRH, addr_hi) |
+                                          SET_BITS(PKT_ADDRH,
+                                                   upper_32_bits(dma_addr)));
+
+               dma_wmb();
+               raw_desc->m0 = cpu_to_le64(SET_BITS(PKT_ADDRL, dma_addr) |
+                                          SET_BITS(E, 1));
+               tail = (tail + 1) & slots;
+       }
+
+       ring->tail = tail;
+
+       return 0;
+}
+
+static int xge_init_hw(struct net_device *ndev)
+{
+       struct xge_pdata *pdata = netdev_priv(ndev);
+       int ret;
+
+       ret = xge_port_reset(ndev);
+       if (ret)
+               return ret;
+
+       xge_port_init(ndev);
+       pdata->nbufs = NUM_BUFS;
+
+       return 0;
+}
+
+static irqreturn_t xge_irq(const int irq, void *data)
+{
+       struct xge_pdata *pdata = data;
+
+       if (napi_schedule_prep(&pdata->napi)) {
+               xge_intr_disable(pdata);
+               __napi_schedule(&pdata->napi);
+       }
+
+       return IRQ_HANDLED;
+}
+
+static int xge_request_irq(struct net_device *ndev)
+{
+       struct xge_pdata *pdata = netdev_priv(ndev);
+       int ret;
+
+       snprintf(pdata->irq_name, IRQ_ID_SIZE, "%s", ndev->name);
+
+       ret = request_irq(pdata->resources.irq, xge_irq, 0, pdata->irq_name,
+                         pdata);
+       if (ret)
+               netdev_err(ndev, "Failed to request irq %s\n", pdata->irq_name);
+
+       return ret;
+}
+
+static void xge_free_irq(struct net_device *ndev)
+{
+       struct xge_pdata *pdata = netdev_priv(ndev);
+
+       free_irq(pdata->resources.irq, pdata);
+}
+
+static bool is_tx_slot_available(struct xge_raw_desc *raw_desc)
+{
+       if (GET_BITS(E, le64_to_cpu(raw_desc->m0)) &&
+           (GET_BITS(PKT_SIZE, le64_to_cpu(raw_desc->m0)) == SLOT_EMPTY))
+               return true;
+
+       return false;
+}
+
+static netdev_tx_t xge_start_xmit(struct sk_buff *skb, struct net_device *ndev)
+{
+       struct xge_pdata *pdata = netdev_priv(ndev);
+       struct device *dev = &pdata->pdev->dev;
+       struct xge_desc_ring *tx_ring;
+       struct xge_raw_desc *raw_desc;
+       static dma_addr_t dma_addr;
+       u64 addr_lo, addr_hi;
+       void *pkt_buf;
+       u8 tail;
+       u16 len;
+
+       tx_ring = pdata->tx_ring;
+       tail = tx_ring->tail;
+       len = skb_headlen(skb);
+       raw_desc = &tx_ring->raw_desc[tail];
+
+       if (!is_tx_slot_available(raw_desc)) {
+               netif_stop_queue(ndev);
+               return NETDEV_TX_BUSY;
+       }
+
+       /* Packet buffers should be 64B aligned */
+       pkt_buf = dma_zalloc_coherent(dev, XGENE_ENET_STD_MTU, &dma_addr,
+                                     GFP_ATOMIC);
+       if (unlikely(!pkt_buf)) {
+               dev_kfree_skb_any(skb);
+               return NETDEV_TX_OK;
+       }
+       memcpy(pkt_buf, skb->data, len);
+
+       addr_hi = GET_BITS(NEXT_DESC_ADDRH, le64_to_cpu(raw_desc->m1));
+       addr_lo = GET_BITS(NEXT_DESC_ADDRL, le64_to_cpu(raw_desc->m1));
+       raw_desc->m1 = cpu_to_le64(SET_BITS(NEXT_DESC_ADDRL, addr_lo) |
+                                  SET_BITS(NEXT_DESC_ADDRH, addr_hi) |
+                                  SET_BITS(PKT_ADDRH,
+                                           upper_32_bits(dma_addr)));
+
+       tx_ring->pkt_info[tail].skb = skb;
+       tx_ring->pkt_info[tail].dma_addr = dma_addr;
+       tx_ring->pkt_info[tail].pkt_buf = pkt_buf;
+
+       dma_wmb();
+
+       raw_desc->m0 = cpu_to_le64(SET_BITS(PKT_ADDRL, dma_addr) |
+                                  SET_BITS(PKT_SIZE, len) |
+                                  SET_BITS(E, 0));
+       skb_tx_timestamp(skb);
+       xge_wr_csr(pdata, DMATXCTRL, 1);
+
+       tx_ring->tail = (tail + 1) & (XGENE_ENET_NUM_DESC - 1);
+
+       return NETDEV_TX_OK;
+}
+
+static bool is_tx_hw_done(struct xge_raw_desc *raw_desc)
+{
+       if (GET_BITS(E, le64_to_cpu(raw_desc->m0)) &&
+           !GET_BITS(PKT_SIZE, le64_to_cpu(raw_desc->m0)))
+               return true;
+
+       return false;
+}
+
+static void xge_txc_poll(struct net_device *ndev)
+{
+       struct xge_pdata *pdata = netdev_priv(ndev);
+       struct device *dev = &pdata->pdev->dev;
+       struct xge_desc_ring *tx_ring;
+       struct xge_raw_desc *raw_desc;
+       dma_addr_t dma_addr;
+       struct sk_buff *skb;
+       void *pkt_buf;
+       u32 data;
+       u8 head;
+
+       tx_ring = pdata->tx_ring;
+       head = tx_ring->head;
+
+       data = xge_rd_csr(pdata, DMATXSTATUS);
+       if (!GET_BITS(TXPKTCOUNT, data))
+               return;
+
+       while (1) {
+               raw_desc = &tx_ring->raw_desc[head];
+
+               if (!is_tx_hw_done(raw_desc))
+                       break;
+
+               dma_rmb();
+
+               skb = tx_ring->pkt_info[head].skb;
+               dma_addr = tx_ring->pkt_info[head].dma_addr;
+               pkt_buf = tx_ring->pkt_info[head].pkt_buf;
+               pdata->stats.tx_packets++;
+               pdata->stats.tx_bytes += skb->len;
+               dma_free_coherent(dev, XGENE_ENET_STD_MTU, pkt_buf, dma_addr);
+               dev_kfree_skb_any(skb);
+
+               /* clear pktstart address and pktsize */
+               raw_desc->m0 = cpu_to_le64(SET_BITS(E, 1) |
+                                          SET_BITS(PKT_SIZE, SLOT_EMPTY));
+               xge_wr_csr(pdata, DMATXSTATUS, 1);
+
+               head = (head + 1) & (XGENE_ENET_NUM_DESC - 1);
+       }
+
+       if (netif_queue_stopped(ndev))
+               netif_wake_queue(ndev);
+
+       tx_ring->head = head;
+}
+
+static int xge_rx_poll(struct net_device *ndev, unsigned int budget)
+{
+       struct xge_pdata *pdata = netdev_priv(ndev);
+       struct device *dev = &pdata->pdev->dev;
+       struct xge_desc_ring *rx_ring;
+       struct xge_raw_desc *raw_desc;
+       struct sk_buff *skb;
+       dma_addr_t dma_addr;
+       int processed = 0;
+       u8 head, rx_error;
+       int i, ret;
+       u32 data;
+       u16 len;
+
+       rx_ring = pdata->rx_ring;
+       head = rx_ring->head;
+
+       data = xge_rd_csr(pdata, DMARXSTATUS);
+       if (!GET_BITS(RXPKTCOUNT, data))
+               return 0;
+
+       for (i = 0; i < budget; i++) {
+               raw_desc = &rx_ring->raw_desc[head];
+
+               if (GET_BITS(E, le64_to_cpu(raw_desc->m0)))
+                       break;
+
+               dma_rmb();
+
+               skb = rx_ring->pkt_info[head].skb;
+               rx_ring->pkt_info[head].skb = NULL;
+               dma_addr = rx_ring->pkt_info[head].dma_addr;
+               len = GET_BITS(PKT_SIZE, le64_to_cpu(raw_desc->m0));
+               dma_unmap_single(dev, dma_addr, XGENE_ENET_STD_MTU,
+                                DMA_FROM_DEVICE);
+
+               rx_error = GET_BITS(D, le64_to_cpu(raw_desc->m2));
+               if (unlikely(rx_error)) {
+                       pdata->stats.rx_errors++;
+                       dev_kfree_skb_any(skb);
+                       goto out;
+               }
+
+               skb_put(skb, len);
+               skb->protocol = eth_type_trans(skb, ndev);
+
+               pdata->stats.rx_packets++;
+               pdata->stats.rx_bytes += len;
+               napi_gro_receive(&pdata->napi, skb);
+out:
+               ret = xge_refill_buffers(ndev, 1);
+               xge_wr_csr(pdata, DMARXSTATUS, 1);
+               xge_wr_csr(pdata, DMARXCTRL, 1);
+
+               if (ret)
+                       break;
+
+               head = (head + 1) & (XGENE_ENET_NUM_DESC - 1);
+               processed++;
+       }
+
+       rx_ring->head = head;
+
+       return processed;
+}
+
+static void xge_delete_desc_ring(struct net_device *ndev,
+                                struct xge_desc_ring *ring)
+{
+       struct xge_pdata *pdata = netdev_priv(ndev);
+       struct device *dev = &pdata->pdev->dev;
+       u16 size;
+
+       if (!ring)
+               return;
+
+       size = XGENE_ENET_DESC_SIZE * XGENE_ENET_NUM_DESC;
+       if (ring->desc_addr)
+               dma_free_coherent(dev, size, ring->desc_addr, ring->dma_addr);
+
+       kfree(ring->pkt_info);
+       kfree(ring);
+}
+
+static void xge_free_buffers(struct net_device *ndev)
+{
+       struct xge_pdata *pdata = netdev_priv(ndev);
+       struct xge_desc_ring *ring = pdata->rx_ring;
+       struct device *dev = &pdata->pdev->dev;
+       struct sk_buff *skb;
+       dma_addr_t dma_addr;
+       int i;
+
+       for (i = 0; i < XGENE_ENET_NUM_DESC; i++) {
+               skb = ring->pkt_info[i].skb;
+               dma_addr = ring->pkt_info[i].dma_addr;
+
+               if (!skb)
+                       continue;
+
+               dma_unmap_single(dev, dma_addr, XGENE_ENET_STD_MTU,
+                                DMA_FROM_DEVICE);
+               dev_kfree_skb_any(skb);
+       }
+}
+
+static void xge_delete_desc_rings(struct net_device *ndev)
+{
+       struct xge_pdata *pdata = netdev_priv(ndev);
+
+       xge_txc_poll(ndev);
+       xge_delete_desc_ring(ndev, pdata->tx_ring);
+
+       xge_rx_poll(ndev, 64);
+       xge_free_buffers(ndev);
+       xge_delete_desc_ring(ndev, pdata->rx_ring);
+}
+
+static struct xge_desc_ring *xge_create_desc_ring(struct net_device *ndev)
+{
+       struct xge_pdata *pdata = netdev_priv(ndev);
+       struct device *dev = &pdata->pdev->dev;
+       struct xge_desc_ring *ring;
+       u16 size;
+
+       ring = kzalloc(sizeof(*ring), GFP_KERNEL);
+       if (!ring)
+               return NULL;
+
+       ring->ndev = ndev;
+
+       size = XGENE_ENET_DESC_SIZE * XGENE_ENET_NUM_DESC;
+       ring->desc_addr = dma_zalloc_coherent(dev, size, &ring->dma_addr,
+                                             GFP_KERNEL);
+       if (!ring->desc_addr)
+               goto err;
+
+       ring->pkt_info = kcalloc(XGENE_ENET_NUM_DESC, sizeof(*ring->pkt_info),
+                                GFP_KERNEL);
+       if (!ring->pkt_info)
+               goto err;
+
+       xge_setup_desc(ring);
+
+       return ring;
+
+err:
+       xge_delete_desc_ring(ndev, ring);
+
+       return NULL;
+}
+
+static int xge_create_desc_rings(struct net_device *ndev)
+{
+       struct xge_pdata *pdata = netdev_priv(ndev);
+       struct xge_desc_ring *ring;
+       int ret;
+
+       /* create tx ring */
+       ring = xge_create_desc_ring(ndev);
+       if (!ring)
+               goto err;
+
+       pdata->tx_ring = ring;
+       xge_update_tx_desc_addr(pdata);
+
+       /* create rx ring */
+       ring = xge_create_desc_ring(ndev);
+       if (!ring)
+               goto err;
+
+       pdata->rx_ring = ring;
+       xge_update_rx_desc_addr(pdata);
+
+       ret = xge_refill_buffers(ndev, XGENE_ENET_NUM_DESC);
+       if (ret)
+               goto err;
+
+       return 0;
+err:
+       xge_delete_desc_rings(ndev);
+
+       return -ENOMEM;
+}
+
+static int xge_open(struct net_device *ndev)
+{
+       struct xge_pdata *pdata = netdev_priv(ndev);
+       int ret;
+
+       ret = xge_create_desc_rings(ndev);
+       if (ret)
+               return ret;
+
+       napi_enable(&pdata->napi);
+       ret = xge_request_irq(ndev);
+       if (ret)
+               return ret;
+
+       xge_intr_enable(pdata);
+       xge_wr_csr(pdata, DMARXCTRL, 1);
+
+       phy_start(ndev->phydev);
+       xge_mac_enable(pdata);
+       netif_start_queue(ndev);
+
+       return 0;
+}
+
+static int xge_close(struct net_device *ndev)
+{
+       struct xge_pdata *pdata = netdev_priv(ndev);
+
+       netif_stop_queue(ndev);
+       xge_mac_disable(pdata);
+       phy_stop(ndev->phydev);
+
+       xge_intr_disable(pdata);
+       xge_free_irq(ndev);
+       napi_disable(&pdata->napi);
+       xge_delete_desc_rings(ndev);
+
+       return 0;
+}
+
+static int xge_napi(struct napi_struct *napi, const int budget)
+{
+       struct net_device *ndev = napi->dev;
+       struct xge_pdata *pdata;
+       int processed;
+
+       pdata = netdev_priv(ndev);
+
+       xge_txc_poll(ndev);
+       processed = xge_rx_poll(ndev, budget);
+
+       if (processed < budget) {
+               napi_complete_done(napi, processed);
+               xge_intr_enable(pdata);
+       }
+
+       return processed;
+}
+
+static int xge_set_mac_addr(struct net_device *ndev, void *addr)
+{
+       struct xge_pdata *pdata = netdev_priv(ndev);
+       int ret;
+
+       ret = eth_mac_addr(ndev, addr);
+       if (ret)
+               return ret;
+
+       xge_mac_set_station_addr(pdata);
+
+       return 0;
+}
+
+static bool is_tx_pending(struct xge_raw_desc *raw_desc)
+{
+       if (!GET_BITS(E, le64_to_cpu(raw_desc->m0)))
+               return true;
+
+       return false;
+}
+
+static void xge_free_pending_skb(struct net_device *ndev)
+{
+       struct xge_pdata *pdata = netdev_priv(ndev);
+       struct device *dev = &pdata->pdev->dev;
+       struct xge_desc_ring *tx_ring;
+       struct xge_raw_desc *raw_desc;
+       dma_addr_t dma_addr;
+       struct sk_buff *skb;
+       void *pkt_buf;
+       int i;
+
+       tx_ring = pdata->tx_ring;
+
+       for (i = 0; i < XGENE_ENET_NUM_DESC; i++) {
+               raw_desc = &tx_ring->raw_desc[i];
+
+               if (!is_tx_pending(raw_desc))
+                       continue;
+
+               skb = tx_ring->pkt_info[i].skb;
+               dma_addr = tx_ring->pkt_info[i].dma_addr;
+               pkt_buf = tx_ring->pkt_info[i].pkt_buf;
+               dma_free_coherent(dev, XGENE_ENET_STD_MTU, pkt_buf, dma_addr);
+               dev_kfree_skb_any(skb);
+       }
+}
+
+static void xge_timeout(struct net_device *ndev)
+{
+       struct xge_pdata *pdata = netdev_priv(ndev);
+
+       rtnl_lock();
+
+       if (!netif_running(ndev))
+               goto out;
+
+       netif_stop_queue(ndev);
+       xge_intr_disable(pdata);
+       napi_disable(&pdata->napi);
+
+       xge_wr_csr(pdata, DMATXCTRL, 0);
+       xge_txc_poll(ndev);
+       xge_free_pending_skb(ndev);
+       xge_wr_csr(pdata, DMATXSTATUS, ~0U);
+
+       xge_setup_desc(pdata->tx_ring);
+       xge_update_tx_desc_addr(pdata);
+       xge_mac_init(pdata);
+
+       napi_enable(&pdata->napi);
+       xge_intr_enable(pdata);
+       xge_mac_enable(pdata);
+       netif_start_queue(ndev);
+
+out:
+       rtnl_unlock();
+}
+
+static void xge_get_stats64(struct net_device *ndev,
+                           struct rtnl_link_stats64 *storage)
+{
+       struct xge_pdata *pdata = netdev_priv(ndev);
+       struct xge_stats *stats = &pdata->stats;
+
+       storage->tx_packets += stats->tx_packets;
+       storage->tx_bytes += stats->tx_bytes;
+
+       storage->rx_packets += stats->rx_packets;
+       storage->rx_bytes += stats->rx_bytes;
+       storage->rx_errors += stats->rx_errors;
+}
+
+static const struct net_device_ops xgene_ndev_ops = {
+       .ndo_open = xge_open,
+       .ndo_stop = xge_close,
+       .ndo_start_xmit = xge_start_xmit,
+       .ndo_set_mac_address = xge_set_mac_addr,
+       .ndo_tx_timeout = xge_timeout,
+       .ndo_get_stats64 = xge_get_stats64,
+};
+
+static int xge_probe(struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       struct net_device *ndev;
+       struct xge_pdata *pdata;
+       int ret;
+
+       ndev = alloc_etherdev(sizeof(*pdata));
+       if (!ndev)
+               return -ENOMEM;
+
+       pdata = netdev_priv(ndev);
+
+       pdata->pdev = pdev;
+       pdata->ndev = ndev;
+       SET_NETDEV_DEV(ndev, dev);
+       platform_set_drvdata(pdev, pdata);
+       ndev->netdev_ops = &xgene_ndev_ops;
+
+       ndev->features |= NETIF_F_GSO |
+                         NETIF_F_GRO;
+
+       ret = xge_get_resources(pdata);
+       if (ret)
+               goto err;
+
+       ndev->hw_features = ndev->features;
+       xge_set_ethtool_ops(ndev);
+
+       ret = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(64));
+       if (ret) {
+               netdev_err(ndev, "No usable DMA configuration\n");
+               goto err;
+       }
+
+       ret = xge_init_hw(ndev);
+       if (ret)
+               goto err;
+
+       ret = xge_mdio_config(ndev);
+       if (ret)
+               goto err;
+
+       netif_napi_add(ndev, &pdata->napi, xge_napi, NAPI_POLL_WEIGHT);
+
+       ret = register_netdev(ndev);
+       if (ret) {
+               netdev_err(ndev, "Failed to register netdev\n");
+               goto err;
+       }
+
+       return 0;
+
+err:
+       free_netdev(ndev);
+
+       return ret;
+}
+
+static int xge_remove(struct platform_device *pdev)
+{
+       struct xge_pdata *pdata;
+       struct net_device *ndev;
+
+       pdata = platform_get_drvdata(pdev);
+       ndev = pdata->ndev;
+
+       rtnl_lock();
+       if (netif_running(ndev))
+               dev_close(ndev);
+       rtnl_unlock();
+
+       xge_mdio_remove(ndev);
+       unregister_netdev(ndev);
+       free_netdev(ndev);
+
+       return 0;
+}
+
+static void xge_shutdown(struct platform_device *pdev)
+{
+       struct xge_pdata *pdata;
+
+       pdata = platform_get_drvdata(pdev);
+       if (!pdata)
+               return;
+
+       if (!pdata->ndev)
+               return;
+
+       xge_remove(pdev);
+}
+
+static const struct acpi_device_id xge_acpi_match[] = {
+       { "APMC0D80" },
+       { }
+};
+MODULE_DEVICE_TABLE(acpi, xge_acpi_match);
+
+static struct platform_driver xge_driver = {
+       .driver = {
+                  .name = "xgene-enet-v2",
+                  .acpi_match_table = ACPI_PTR(xge_acpi_match),
+       },
+       .probe = xge_probe,
+       .remove = xge_remove,
+       .shutdown = xge_shutdown,
+};
+module_platform_driver(xge_driver);
+
+MODULE_DESCRIPTION("APM X-Gene SoC Ethernet v2 driver");
+MODULE_AUTHOR("Iyappan Subramanian <isubramanian@apm.com>");
+MODULE_VERSION(XGENE_ENET_V2_VERSION);
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/apm/xgene-v2/main.h b/drivers/net/ethernet/apm/xgene-v2/main.h
new file mode 100644 (file)
index 0000000..db1178e
--- /dev/null
@@ -0,0 +1,80 @@
+/*
+ * Applied Micro X-Gene SoC Ethernet v2 Driver
+ *
+ * Copyright (c) 2017, Applied Micro Circuits Corporation
+ * Author(s): Iyappan Subramanian <isubramanian@apm.com>
+ *           Keyur Chudgar <kchudgar@apm.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __XGENE_ENET_V2_MAIN_H__
+#define __XGENE_ENET_V2_MAIN_H__
+
+#include <linux/acpi.h>
+#include <linux/clk.h>
+#include <linux/efi.h>
+#include <linux/if_vlan.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+#include <linux/of_net.h>
+#include <linux/of_mdio.h>
+#include <linux/prefetch.h>
+#include <linux/phy.h>
+#include <net/ip.h>
+#include "mac.h"
+#include "enet.h"
+#include "ring.h"
+
+#define XGENE_ENET_V2_VERSION  "v1.0"
+#define XGENE_ENET_STD_MTU     1536
+#define XGENE_ENET_MIN_FRAME   60
+#define IRQ_ID_SIZE             16
+
+struct xge_resource {
+       void __iomem *base_addr;
+       int phy_mode;
+       u32 irq;
+};
+
+struct xge_stats {
+       u64 tx_packets;
+       u64 tx_bytes;
+       u64 rx_packets;
+       u64 rx_bytes;
+       u64 rx_errors;
+};
+
+/* ethernet private data */
+struct xge_pdata {
+       struct xge_resource resources;
+       struct xge_desc_ring *tx_ring;
+       struct xge_desc_ring *rx_ring;
+       struct platform_device *pdev;
+       char irq_name[IRQ_ID_SIZE];
+       struct mii_bus *mdio_bus;
+       struct net_device *ndev;
+       struct napi_struct napi;
+       struct xge_stats stats;
+       int phy_speed;
+       u8 nbufs;
+};
+
+int xge_mdio_config(struct net_device *ndev);
+void xge_mdio_remove(struct net_device *ndev);
+void xge_set_ethtool_ops(struct net_device *ndev);
+
+#endif /* __XGENE_ENET_V2_MAIN_H__ */
diff --git a/drivers/net/ethernet/apm/xgene-v2/mdio.c b/drivers/net/ethernet/apm/xgene-v2/mdio.c
new file mode 100644 (file)
index 0000000..a583c6a
--- /dev/null
@@ -0,0 +1,167 @@
+/*
+ * Applied Micro X-Gene SoC Ethernet v2 Driver
+ *
+ * Copyright (c) 2017, Applied Micro Circuits Corporation
+ * Author(s): Iyappan Subramanian <isubramanian@apm.com>
+ *           Keyur Chudgar <kchudgar@apm.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "main.h"
+
+static int xge_mdio_write(struct mii_bus *bus, int phy_id, int reg, u16 data)
+{
+       struct xge_pdata *pdata = bus->priv;
+       u32 done, val = 0;
+       u8 wait = 10;
+
+       SET_REG_BITS(&val, PHY_ADDR, phy_id);
+       SET_REG_BITS(&val, REG_ADDR, reg);
+       xge_wr_csr(pdata, MII_MGMT_ADDRESS, val);
+
+       xge_wr_csr(pdata, MII_MGMT_CONTROL, data);
+       do {
+               usleep_range(5, 10);
+               done = xge_rd_csr(pdata, MII_MGMT_INDICATORS);
+       } while ((done & MII_MGMT_BUSY) && wait--);
+
+       if (done & MII_MGMT_BUSY) {
+               dev_err(&bus->dev, "MII_MGMT write failed\n");
+               return -ETIMEDOUT;
+       }
+
+       return 0;
+}
+
+static int xge_mdio_read(struct mii_bus *bus, int phy_id, int reg)
+{
+       struct xge_pdata *pdata = bus->priv;
+       u32 data, done, val = 0;
+       u8 wait = 10;
+
+       SET_REG_BITS(&val, PHY_ADDR, phy_id);
+       SET_REG_BITS(&val, REG_ADDR, reg);
+       xge_wr_csr(pdata, MII_MGMT_ADDRESS, val);
+
+       xge_wr_csr(pdata, MII_MGMT_COMMAND, MII_READ_CYCLE);
+       do {
+               usleep_range(5, 10);
+               done = xge_rd_csr(pdata, MII_MGMT_INDICATORS);
+       } while ((done & MII_MGMT_BUSY) && wait--);
+
+       if (done & MII_MGMT_BUSY) {
+               dev_err(&bus->dev, "MII_MGMT read failed\n");
+               return -ETIMEDOUT;
+       }
+
+       data = xge_rd_csr(pdata, MII_MGMT_STATUS);
+       xge_wr_csr(pdata, MII_MGMT_COMMAND, 0);
+
+       return data;
+}
+
+static void xge_adjust_link(struct net_device *ndev)
+{
+       struct xge_pdata *pdata = netdev_priv(ndev);
+       struct phy_device *phydev = ndev->phydev;
+
+       if (phydev->link) {
+               if (pdata->phy_speed != phydev->speed) {
+                       pdata->phy_speed = phydev->speed;
+                       xge_mac_set_speed(pdata);
+                       xge_mac_enable(pdata);
+                       phy_print_status(phydev);
+               }
+       } else {
+               if (pdata->phy_speed != SPEED_UNKNOWN) {
+                       pdata->phy_speed = SPEED_UNKNOWN;
+                       xge_mac_disable(pdata);
+                       phy_print_status(phydev);
+               }
+       }
+}
+
+void xge_mdio_remove(struct net_device *ndev)
+{
+       struct xge_pdata *pdata = netdev_priv(ndev);
+       struct mii_bus *mdio_bus = pdata->mdio_bus;
+
+       if (ndev->phydev)
+               phy_disconnect(ndev->phydev);
+
+       if (mdio_bus->state == MDIOBUS_REGISTERED)
+               mdiobus_unregister(mdio_bus);
+
+       mdiobus_free(mdio_bus);
+}
+
+int xge_mdio_config(struct net_device *ndev)
+{
+       struct xge_pdata *pdata = netdev_priv(ndev);
+       struct device *dev = &pdata->pdev->dev;
+       struct mii_bus *mdio_bus;
+       struct phy_device *phydev;
+       int ret;
+
+       mdio_bus = mdiobus_alloc();
+       if (!mdio_bus)
+               return -ENOMEM;
+
+       mdio_bus->name = "APM X-Gene Ethernet (v2) MDIO Bus";
+       mdio_bus->read = xge_mdio_read;
+       mdio_bus->write = xge_mdio_write;
+       mdio_bus->priv = pdata;
+       mdio_bus->parent = dev;
+       snprintf(mdio_bus->id, MII_BUS_ID_SIZE, "%s-mii", dev_name(dev));
+       pdata->mdio_bus = mdio_bus;
+
+       mdio_bus->phy_mask = 0x1;
+       ret = mdiobus_register(mdio_bus);
+       if (ret)
+               goto err;
+
+       phydev = phy_find_first(mdio_bus);
+       if (!phydev) {
+               dev_err(dev, "no PHY found\n");
+               goto err;
+       }
+       phydev = phy_connect(ndev, phydev_name(phydev),
+                            &xge_adjust_link,
+                            pdata->resources.phy_mode);
+
+       if (IS_ERR(phydev)) {
+               netdev_err(ndev, "Could not attach to PHY\n");
+               ret = PTR_ERR(phydev);
+               goto err;
+       }
+
+       phydev->supported &= ~(SUPPORTED_10baseT_Half |
+                              SUPPORTED_10baseT_Full |
+                              SUPPORTED_100baseT_Half |
+                              SUPPORTED_100baseT_Full |
+                              SUPPORTED_1000baseT_Half |
+                              SUPPORTED_AUI |
+                              SUPPORTED_MII |
+                              SUPPORTED_FIBRE |
+                              SUPPORTED_BNC);
+       phydev->advertising = phydev->supported;
+       pdata->phy_speed = SPEED_UNKNOWN;
+
+       return 0;
+err:
+       xge_mdio_remove(ndev);
+
+       return ret;
+}
diff --git a/drivers/net/ethernet/apm/xgene-v2/ring.c b/drivers/net/ethernet/apm/xgene-v2/ring.c
new file mode 100644 (file)
index 0000000..3881082
--- /dev/null
@@ -0,0 +1,81 @@
+/*
+ * Applied Micro X-Gene SoC Ethernet v2 Driver
+ *
+ * Copyright (c) 2017, Applied Micro Circuits Corporation
+ * Author(s): Iyappan Subramanian <isubramanian@apm.com>
+ *           Keyur Chudgar <kchudgar@apm.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "main.h"
+
+/* create circular linked list of descriptors */
+void xge_setup_desc(struct xge_desc_ring *ring)
+{
+       struct xge_raw_desc *raw_desc;
+       dma_addr_t dma_h, next_dma;
+       u16 offset;
+       int i;
+
+       for (i = 0; i < XGENE_ENET_NUM_DESC; i++) {
+               raw_desc = &ring->raw_desc[i];
+
+               offset = (i + 1) & (XGENE_ENET_NUM_DESC - 1);
+               next_dma = ring->dma_addr + (offset * XGENE_ENET_DESC_SIZE);
+
+               raw_desc->m0 = cpu_to_le64(SET_BITS(E, 1) |
+                                          SET_BITS(PKT_SIZE, SLOT_EMPTY));
+               dma_h = upper_32_bits(next_dma);
+               raw_desc->m1 = cpu_to_le64(SET_BITS(NEXT_DESC_ADDRL, next_dma) |
+                                          SET_BITS(NEXT_DESC_ADDRH, dma_h));
+       }
+}
+
+void xge_update_tx_desc_addr(struct xge_pdata *pdata)
+{
+       struct xge_desc_ring *ring = pdata->tx_ring;
+       dma_addr_t dma_addr = ring->dma_addr;
+
+       xge_wr_csr(pdata, DMATXDESCL, dma_addr);
+       xge_wr_csr(pdata, DMATXDESCH, upper_32_bits(dma_addr));
+
+       ring->head = 0;
+       ring->tail = 0;
+}
+
+void xge_update_rx_desc_addr(struct xge_pdata *pdata)
+{
+       struct xge_desc_ring *ring = pdata->rx_ring;
+       dma_addr_t dma_addr = ring->dma_addr;
+
+       xge_wr_csr(pdata, DMARXDESCL, dma_addr);
+       xge_wr_csr(pdata, DMARXDESCH, upper_32_bits(dma_addr));
+
+       ring->head = 0;
+       ring->tail = 0;
+}
+
+void xge_intr_enable(struct xge_pdata *pdata)
+{
+       u32 data;
+
+       data = RX_PKT_RCVD | TX_PKT_SENT;
+       xge_wr_csr(pdata, DMAINTRMASK, data);
+}
+
+void xge_intr_disable(struct xge_pdata *pdata)
+{
+       xge_wr_csr(pdata, DMAINTRMASK, 0);
+}
diff --git a/drivers/net/ethernet/apm/xgene-v2/ring.h b/drivers/net/ethernet/apm/xgene-v2/ring.h
new file mode 100644 (file)
index 0000000..abc8c9a
--- /dev/null
@@ -0,0 +1,119 @@
+/*
+ * Applied Micro X-Gene SoC Ethernet v2 Driver
+ *
+ * Copyright (c) 2017, Applied Micro Circuits Corporation
+ * Author(s): Iyappan Subramanian <isubramanian@apm.com>
+ *           Keyur Chudgar <kchudgar@apm.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __XGENE_ENET_V2_RING_H__
+#define __XGENE_ENET_V2_RING_H__
+
+#define XGENE_ENET_DESC_SIZE   64
+#define XGENE_ENET_NUM_DESC    256
+#define NUM_BUFS               8
+#define SLOT_EMPTY             0xfff
+
+#define DMATXCTRL              0xa180
+#define DMATXDESCL             0xa184
+#define DMATXDESCH             0xa1a0
+#define DMATXSTATUS            0xa188
+#define DMARXCTRL              0xa18c
+#define DMARXDESCL             0xa190
+#define DMARXDESCH             0xa1a4
+#define DMARXSTATUS            0xa194
+#define DMAINTRMASK            0xa198
+#define DMAINTERRUPT           0xa19c
+
+#define D_POS                  62
+#define D_LEN                  2
+#define E_POS                  63
+#define E_LEN                  1
+#define PKT_ADDRL_POS          0
+#define PKT_ADDRL_LEN          32
+#define PKT_ADDRH_POS          32
+#define PKT_ADDRH_LEN          10
+#define PKT_SIZE_POS           32
+#define PKT_SIZE_LEN           12
+#define NEXT_DESC_ADDRL_POS    0
+#define NEXT_DESC_ADDRL_LEN    32
+#define NEXT_DESC_ADDRH_POS    48
+#define NEXT_DESC_ADDRH_LEN    10
+
+#define TXPKTCOUNT_POS         16
+#define TXPKTCOUNT_LEN         8
+#define RXPKTCOUNT_POS         16
+#define RXPKTCOUNT_LEN         8
+
+#define TX_PKT_SENT            BIT(0)
+#define TX_BUS_ERROR           BIT(3)
+#define RX_PKT_RCVD            BIT(4)
+#define RX_BUS_ERROR           BIT(7)
+#define RXSTATUS_RXPKTRCVD     BIT(0)
+
+struct xge_raw_desc {
+       __le64 m0;
+       __le64 m1;
+       __le64 m2;
+       __le64 m3;
+       __le64 m4;
+       __le64 m5;
+       __le64 m6;
+       __le64 m7;
+};
+
+struct pkt_info {
+       struct sk_buff *skb;
+       dma_addr_t dma_addr;
+       void *pkt_buf;
+};
+
+/* software context of a descriptor ring */
+struct xge_desc_ring {
+       struct net_device *ndev;
+       dma_addr_t dma_addr;
+       u8 head;
+       u8 tail;
+       union {
+               void *desc_addr;
+               struct xge_raw_desc *raw_desc;
+       };
+       struct pkt_info (*pkt_info);
+};
+
+static inline u64 xge_set_desc_bits(int pos, int len, u64 val)
+{
+       return (val & ((1ULL << len) - 1)) << pos;
+}
+
+static inline u64 xge_get_desc_bits(int pos, int len, u64 src)
+{
+       return (src >> pos) & ((1ULL << len) - 1);
+}
+
+#define SET_BITS(field, val) \
+               xge_set_desc_bits(field ## _POS, field ## _LEN, val)
+
+#define GET_BITS(field, src) \
+               xge_get_desc_bits(field ## _POS, field ## _LEN, src)
+
+void xge_setup_desc(struct xge_desc_ring *ring);
+void xge_update_tx_desc_addr(struct xge_pdata *pdata);
+void xge_update_rx_desc_addr(struct xge_pdata *pdata);
+void xge_intr_enable(struct xge_pdata *pdata);
+void xge_intr_disable(struct xge_pdata *pdata);
+
+#endif  /* __XGENE_ENET_V2_RING_H__ */
index 06e681697c1734872b2317550f17f28c306a63bc..2a835e07adfb58b2b6e4021e4b93a2555be8ff73 100644 (file)
@@ -494,7 +494,7 @@ static void xgene_gmac_set_speed(struct xgene_enet_pdata *pdata)
                break;
        }
 
-       mc2 |= FULL_DUPLEX2 | PAD_CRC;
+       mc2 |= FULL_DUPLEX2 | PAD_CRC | LENGTH_CHK;
        xgene_enet_wr_mcx_mac(pdata, MAC_CONFIG_2_ADDR, mc2);
        xgene_enet_wr_mcx_mac(pdata, INTERFACE_CONTROL_ADDR, intf_ctl);
        xgene_enet_wr_csr(pdata, RGMII_REG_0_ADDR, rgmii);
@@ -623,6 +623,7 @@ static void xgene_enet_cle_bypass(struct xgene_enet_pdata *pdata,
        xgene_enet_rd_csr(pdata, CLE_BYPASS_REG0_0_ADDR, &cb);
        cb |= CFG_CLE_BYPASS_EN0;
        CFG_CLE_IP_PROTOCOL0_SET(&cb, 3);
+       CFG_CLE_IP_HDR_LEN_SET(&cb, 0);
        xgene_enet_wr_csr(pdata, CLE_BYPASS_REG0_0_ADDR, cb);
 
        xgene_enet_rd_csr(pdata, CLE_BYPASS_REG1_0_ADDR, &cb);
index 5f83037bb96b5f4e78b3ad9e7afc110112fe5ff3..d250bfe94d24cb8f2a3a1f19487ac98a9659332e 100644 (file)
@@ -163,6 +163,7 @@ enum xgene_enet_rm {
 #define CFG_RXCLK_MUXSEL0_SET(dst, val)        xgene_set_bits(dst, val, 26, 3)
 
 #define CFG_CLE_IP_PROTOCOL0_SET(dst, val)     xgene_set_bits(dst, val, 16, 2)
+#define CFG_CLE_IP_HDR_LEN_SET(dst, val)       xgene_set_bits(dst, val, 8, 5)
 #define CFG_CLE_DSTQID0_SET(dst, val)          xgene_set_bits(dst, val, 0, 12)
 #define CFG_CLE_FPSEL0_SET(dst, val)           xgene_set_bits(dst, val, 16, 4)
 #define CFG_CLE_NXTFPSEL0_SET(dst, val)                xgene_set_bits(dst, val, 20, 4)
@@ -215,6 +216,7 @@ enum xgene_enet_rm {
 #define ENET_GHD_MODE                  BIT(26)
 #define FULL_DUPLEX2                   BIT(0)
 #define PAD_CRC                                BIT(2)
+#define LENGTH_CHK                     BIT(4)
 #define SCAN_AUTO_INCR                 BIT(5)
 #define TBYT_ADDR                      0x38
 #define TPKT_ADDR                      0x39
index b3568c453b1451f179a3c6ebe18ac524825840ac..5f37ed3506d571d6ddaad170f2147f430a88e51c 100644 (file)
@@ -601,14 +601,24 @@ static netdev_tx_t xgene_enet_start_xmit(struct sk_buff *skb,
        return NETDEV_TX_OK;
 }
 
-static void xgene_enet_skip_csum(struct sk_buff *skb)
+static void xgene_enet_rx_csum(struct sk_buff *skb)
 {
+       struct net_device *ndev = skb->dev;
        struct iphdr *iph = ip_hdr(skb);
 
-       if (!ip_is_fragment(iph) ||
-           (iph->protocol != IPPROTO_TCP && iph->protocol != IPPROTO_UDP)) {
-               skb->ip_summed = CHECKSUM_UNNECESSARY;
-       }
+       if (!(ndev->features & NETIF_F_RXCSUM))
+               return;
+
+       if (skb->protocol != htons(ETH_P_IP))
+               return;
+
+       if (ip_is_fragment(iph))
+               return;
+
+       if (iph->protocol != IPPROTO_TCP && iph->protocol != IPPROTO_UDP)
+               return;
+
+       skb->ip_summed = CHECKSUM_UNNECESSARY;
 }
 
 static void xgene_enet_free_pagepool(struct xgene_enet_desc_ring *buf_pool,
@@ -648,12 +658,24 @@ static void xgene_enet_free_pagepool(struct xgene_enet_desc_ring *buf_pool,
        buf_pool->head = head;
 }
 
+/* Errata 10GE_8 and ENET_11 - allow packet with length <=64B */
+static bool xgene_enet_errata_10GE_8(struct sk_buff *skb, u32 len, u8 status)
+{
+       if (status == INGRESS_PKT_LEN && len == ETHER_MIN_PACKET) {
+               if (ntohs(eth_hdr(skb)->h_proto) < 46)
+                       return true;
+       }
+
+       return false;
+}
+
 static int xgene_enet_rx_frame(struct xgene_enet_desc_ring *rx_ring,
                               struct xgene_enet_raw_desc *raw_desc,
                               struct xgene_enet_raw_desc *exp_desc)
 {
        struct xgene_enet_desc_ring *buf_pool, *page_pool;
        u32 datalen, frag_size, skb_index;
+       struct xgene_enet_pdata *pdata;
        struct net_device *ndev;
        dma_addr_t dma_addr;
        struct sk_buff *skb;
@@ -666,6 +688,7 @@ static int xgene_enet_rx_frame(struct xgene_enet_desc_ring *rx_ring,
        bool nv;
 
        ndev = rx_ring->ndev;
+       pdata = netdev_priv(ndev);
        dev = ndev_to_dev(rx_ring->ndev);
        buf_pool = rx_ring->buf_pool;
        page_pool = rx_ring->page_pool;
@@ -676,30 +699,29 @@ static int xgene_enet_rx_frame(struct xgene_enet_desc_ring *rx_ring,
        skb = buf_pool->rx_skb[skb_index];
        buf_pool->rx_skb[skb_index] = NULL;
 
+       datalen = xgene_enet_get_data_len(le64_to_cpu(raw_desc->m1));
+       skb_put(skb, datalen);
+       prefetch(skb->data - NET_IP_ALIGN);
+       skb->protocol = eth_type_trans(skb, ndev);
+
        /* checking for error */
-       status = (GET_VAL(ELERR, le64_to_cpu(raw_desc->m0)) << LERR_LEN) ||
+       status = (GET_VAL(ELERR, le64_to_cpu(raw_desc->m0)) << LERR_LEN) |
                  GET_VAL(LERR, le64_to_cpu(raw_desc->m0));
-       if (unlikely(status > 2)) {
-               dev_kfree_skb_any(skb);
-               xgene_enet_free_pagepool(page_pool, raw_desc, exp_desc);
-               xgene_enet_parse_error(rx_ring, netdev_priv(rx_ring->ndev),
-                                      status);
-               ret = -EIO;
-               goto out;
+       if (unlikely(status)) {
+               if (!xgene_enet_errata_10GE_8(skb, datalen, status)) {
+                       dev_kfree_skb_any(skb);
+                       xgene_enet_free_pagepool(page_pool, raw_desc, exp_desc);
+                       xgene_enet_parse_error(rx_ring, pdata, status);
+                       goto out;
+               }
        }
 
-       /* strip off CRC as HW isn't doing this */
-       datalen = xgene_enet_get_data_len(le64_to_cpu(raw_desc->m1));
-
        nv = GET_VAL(NV, le64_to_cpu(raw_desc->m0));
-       if (!nv)
+       if (!nv) {
+               /* strip off CRC as HW isn't doing this */
                datalen -= 4;
-
-       skb_put(skb, datalen);
-       prefetch(skb->data - NET_IP_ALIGN);
-
-       if (!nv)
                goto skip_jumbo;
+       }
 
        slots = page_pool->slots - 1;
        head = page_pool->head;
@@ -728,11 +750,7 @@ static int xgene_enet_rx_frame(struct xgene_enet_desc_ring *rx_ring,
 
 skip_jumbo:
        skb_checksum_none_assert(skb);
-       skb->protocol = eth_type_trans(skb, ndev);
-       if (likely((ndev->features & NETIF_F_IP_CSUM) &&
-                  skb->protocol == htons(ETH_P_IP))) {
-               xgene_enet_skip_csum(skb);
-       }
+       xgene_enet_rx_csum(skb);
 
        rx_ring->rx_packets++;
        rx_ring->rx_bytes += datalen;
@@ -2039,7 +2057,7 @@ static int xgene_enet_probe(struct platform_device *pdev)
        xgene_enet_setup_ops(pdata);
 
        if (pdata->phy_mode == PHY_INTERFACE_MODE_XGMII) {
-               ndev->features |= NETIF_F_TSO;
+               ndev->features |= NETIF_F_TSO | NETIF_F_RXCSUM;
                spin_lock_init(&pdata->mss_lock);
        }
        ndev->hw_features = ndev->features;
index 52571741da9f5559145e5ce172a7ee910818bbc4..0d4be2425ebc2c24b4ecfa26fc4637f76ff76f91 100644 (file)
@@ -41,6 +41,7 @@
 #include "../../../phy/mdio-xgene.h"
 
 #define XGENE_DRV_VERSION      "v1.0"
+#define ETHER_MIN_PACKET       64
 #define XGENE_ENET_STD_MTU     1536
 #define XGENE_ENET_MAX_MTU     9600
 #define SKB_BUFFER_SIZE                (XGENE_ENET_STD_MTU - NET_IP_ALIGN)
index ece19e6d68e3bc53e0dddc8dbbb65e1bdf4af0e1..423240c97d398735d649d401dd6c1825911735cf 100644 (file)
@@ -341,8 +341,15 @@ static void xgene_xgmac_init(struct xgene_enet_pdata *pdata)
 
        xgene_enet_rd_csr(pdata, XG_RSIF_CONFIG_REG_ADDR, &data);
        data |= CFG_RSIF_FPBUFF_TIMEOUT_EN;
+       /* Errata 10GE_1 - FIFO threshold default value incorrect */
+       RSIF_CLE_BUFF_THRESH_SET(&data, XG_RSIF_CLE_BUFF_THRESH);
        xgene_enet_wr_csr(pdata, XG_RSIF_CONFIG_REG_ADDR, data);
 
+       /* Errata 10GE_1 - FIFO threshold default value incorrect */
+       xgene_enet_rd_csr(pdata, XG_RSIF_CONFIG1_REG_ADDR, &data);
+       RSIF_PLC_CLE_BUFF_THRESH_SET(&data, XG_RSIF_PLC_CLE_BUFF_THRESH);
+       xgene_enet_wr_csr(pdata, XG_RSIF_CONFIG1_REG_ADDR, data);
+
        xgene_enet_rd_csr(pdata, XG_ENET_SPARE_CFG_REG_ADDR, &data);
        data |= BIT(12);
        xgene_enet_wr_csr(pdata, XG_ENET_SPARE_CFG_REG_ADDR, data);
index 03b847ad89370ec2f79d2b731f9dbc24760a7224..e644a429ebf448dbba856b40a3bec57912cf1b8c 100644 (file)
 #define XG_DEF_PAUSE_THRES             0x390
 #define XG_DEF_PAUSE_OFF_THRES         0x2c0
 #define XG_RSIF_CONFIG_REG_ADDR                0x00a0
+#define XG_RSIF_CLE_BUFF_THRESH                0x3
+#define RSIF_CLE_BUFF_THRESH_SET(dst, val)     xgene_set_bits(dst, val, 0, 3)
+#define XG_RSIF_CONFIG1_REG_ADDR       0x00b8
+#define XG_RSIF_PLC_CLE_BUFF_THRESH    0x1
+#define RSIF_PLC_CLE_BUFF_THRESH_SET(dst, val) xgene_set_bits(dst, val, 0, 2)
 #define XCLE_BYPASS_REG0_ADDR           0x0160
 #define XCLE_BYPASS_REG1_ADDR           0x0164
 #define XG_CFG_BYPASS_ADDR             0x0204
index dad63623be6a93672974fb43ee50b518fff59ab5..d05fbfdce5e52e640042f36ba3eb831a3a42ce8a 100644 (file)
@@ -98,6 +98,7 @@ static int aq_ndev_change_mtu(struct net_device *ndev, int new_mtu)
 
        if (err < 0)
                goto err_exit;
+       ndev->mtu = new_mtu;
 
        if (netif_running(ndev)) {
                aq_ndev_close(ndev);
index 1093ea18823a32fc6cb441ab45b0b3a9a82fecc2..0592a0330cf0d601f4b9a27f0d349aeccc66f833 100644 (file)
@@ -137,6 +137,7 @@ static struct aq_hw_caps_s hw_atl_a0_hw_caps_ = {
        .tx_rings = HW_ATL_A0_TX_RINGS,
        .rx_rings = HW_ATL_A0_RX_RINGS,
        .hw_features = NETIF_F_HW_CSUM |
+                       NETIF_F_RXCSUM |
                        NETIF_F_RXHASH |
                        NETIF_F_SG |
                        NETIF_F_TSO,
index 8bdee3ddd5a0bd9044063caf5686fefcc6b5465f..f3957e9303405c3f26c9f7f7d6507009d5804534 100644 (file)
@@ -188,6 +188,7 @@ static struct aq_hw_caps_s hw_atl_b0_hw_caps_ = {
        .tx_rings = HW_ATL_B0_TX_RINGS,
        .rx_rings = HW_ATL_B0_RX_RINGS,
        .hw_features = NETIF_F_HW_CSUM |
+                       NETIF_F_RXCSUM |
                        NETIF_F_RXHASH |
                        NETIF_F_SG |
                        NETIF_F_TSO |
index 940fb24bba210ecd73f968082fefda0697628be1..96413808c72699319573e82481b73f587db612d8 100644 (file)
@@ -109,7 +109,6 @@ config TIGON3
        tristate "Broadcom Tigon3 support"
        depends on PCI
        select PHYLIB
-       select HWMON
        imply PTP_1588_CLOCK
        ---help---
          This driver supports Broadcom Tigon3 based gigabit Ethernet cards.
@@ -117,6 +116,13 @@ config TIGON3
          To compile this driver as a module, choose M here: the module
          will be called tg3.  This is recommended.
 
+config TIGON3_HWMON
+       bool "Broadcom Tigon3 HWMON support"
+       default y
+       depends on TIGON3 && HWMON && !(TIGON3=y && HWMON=m)
+       ---help---
+         Say Y if you want to expose the thermal sensor on Tigon3 devices.
+
 config BNX2X
        tristate "Broadcom NetXtremeII 10Gb support"
        depends on PCI
index a68d4889f5db74d895f1bfb9e74c46bd2b892dbc..099b374c1b17bbd8e9cabe68cdc7cd991a258737 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/of_mdio.h>
 #include <linux/phy.h>
 #include <linux/phy_fixed.h>
+#include <net/dsa.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
 
@@ -284,6 +285,7 @@ static const struct bcm_sysport_stats bcm_sysport_gstrings_stats[] = {
        STAT_MIB_SOFT("alloc_rx_buff_failed", mib.alloc_rx_buff_failed),
        STAT_MIB_SOFT("rx_dma_failed", mib.rx_dma_failed),
        STAT_MIB_SOFT("tx_dma_failed", mib.tx_dma_failed),
+       /* Per TX-queue statistics are dynamically appended */
 };
 
 #define BCM_SYSPORT_STATS_LEN  ARRAY_SIZE(bcm_sysport_gstrings_stats)
@@ -338,7 +340,8 @@ static int bcm_sysport_get_sset_count(struct net_device *dev, int string_set)
                                continue;
                        j++;
                }
-               return j;
+               /* Include per-queue statistics */
+               return j + dev->num_tx_queues * NUM_SYSPORT_TXQ_STAT;
        default:
                return -EOPNOTSUPP;
        }
@@ -349,6 +352,7 @@ static void bcm_sysport_get_strings(struct net_device *dev,
 {
        struct bcm_sysport_priv *priv = netdev_priv(dev);
        const struct bcm_sysport_stats *s;
+       char buf[128];
        int i, j;
 
        switch (stringset) {
@@ -363,6 +367,18 @@ static void bcm_sysport_get_strings(struct net_device *dev,
                               ETH_GSTRING_LEN);
                        j++;
                }
+
+               for (i = 0; i < dev->num_tx_queues; i++) {
+                       snprintf(buf, sizeof(buf), "txq%d_packets", i);
+                       memcpy(data + j * ETH_GSTRING_LEN, buf,
+                              ETH_GSTRING_LEN);
+                       j++;
+
+                       snprintf(buf, sizeof(buf), "txq%d_bytes", i);
+                       memcpy(data + j * ETH_GSTRING_LEN, buf,
+                              ETH_GSTRING_LEN);
+                       j++;
+               }
                break;
        default:
                break;
@@ -418,6 +434,7 @@ static void bcm_sysport_get_stats(struct net_device *dev,
                                  struct ethtool_stats *stats, u64 *data)
 {
        struct bcm_sysport_priv *priv = netdev_priv(dev);
+       struct bcm_sysport_tx_ring *ring;
        int i, j;
 
        if (netif_running(dev))
@@ -436,6 +453,22 @@ static void bcm_sysport_get_stats(struct net_device *dev,
                data[j] = *(unsigned long *)p;
                j++;
        }
+
+       /* For SYSTEMPORT Lite since we have holes in our statistics, j would
+        * be equal to BCM_SYSPORT_STATS_LEN at the end of the loop, but it
+        * needs to point to how many total statistics we have minus the
+        * number of per TX queue statistics
+        */
+       j = bcm_sysport_get_sset_count(dev, ETH_SS_STATS) -
+           dev->num_tx_queues * NUM_SYSPORT_TXQ_STAT;
+
+       for (i = 0; i < dev->num_tx_queues; i++) {
+               ring = &priv->tx_rings[i];
+               data[j] = ring->packets;
+               j++;
+               data[j] = ring->bytes;
+               j++;
+       }
 }
 
 static void bcm_sysport_get_wol(struct net_device *dev,
@@ -637,6 +670,9 @@ static unsigned int bcm_sysport_desc_rx(struct bcm_sysport_priv *priv,
        u16 len, status;
        struct bcm_rsb *rsb;
 
+       /* Clear status before servicing to reduce spurious interrupts */
+       intrl2_0_writel(priv, INTRL2_0_RDMA_MBDONE, INTRL2_CPU_CLEAR);
+
        /* Determine how much we should process since last call, SYSTEMPORT Lite
         * groups the producer and consumer indexes into the same 32-bit
         * which we access using RDMA_CONS_INDEX
@@ -647,11 +683,7 @@ static unsigned int bcm_sysport_desc_rx(struct bcm_sysport_priv *priv,
                p_index = rdma_readl(priv, RDMA_CONS_INDEX);
        p_index &= RDMA_PROD_INDEX_MASK;
 
-       if (p_index < priv->rx_c_index)
-               to_process = (RDMA_CONS_INDEX_MASK + 1) -
-                       priv->rx_c_index + p_index;
-       else
-               to_process = p_index - priv->rx_c_index;
+       to_process = (p_index - priv->rx_c_index) & RDMA_CONS_INDEX_MASK;
 
        netif_dbg(priv, rx_status, ndev,
                  "p_index=%d rx_c_index=%d to_process=%d\n",
@@ -746,26 +778,26 @@ next:
        return processed;
 }
 
-static void bcm_sysport_tx_reclaim_one(struct bcm_sysport_priv *priv,
+static void bcm_sysport_tx_reclaim_one(struct bcm_sysport_tx_ring *ring,
                                       struct bcm_sysport_cb *cb,
                                       unsigned int *bytes_compl,
                                       unsigned int *pkts_compl)
 {
+       struct bcm_sysport_priv *priv = ring->priv;
        struct device *kdev = &priv->pdev->dev;
-       struct net_device *ndev = priv->netdev;
 
        if (cb->skb) {
-               ndev->stats.tx_bytes += cb->skb->len;
+               ring->bytes += cb->skb->len;
                *bytes_compl += cb->skb->len;
                dma_unmap_single(kdev, dma_unmap_addr(cb, dma_addr),
                                 dma_unmap_len(cb, dma_len),
                                 DMA_TO_DEVICE);
-               ndev->stats.tx_packets++;
+               ring->packets++;
                (*pkts_compl)++;
                bcm_sysport_free_cb(cb);
        /* SKB fragment */
        } else if (dma_unmap_addr(cb, dma_addr)) {
-               ndev->stats.tx_bytes += dma_unmap_len(cb, dma_len);
+               ring->bytes += dma_unmap_len(cb, dma_len);
                dma_unmap_page(kdev, dma_unmap_addr(cb, dma_addr),
                               dma_unmap_len(cb, dma_len), DMA_TO_DEVICE);
                dma_unmap_addr_set(cb, dma_addr, 0);
@@ -782,6 +814,13 @@ static unsigned int __bcm_sysport_tx_reclaim(struct bcm_sysport_priv *priv,
        struct bcm_sysport_cb *cb;
        u32 hw_ind;
 
+       /* Clear status before servicing to reduce spurious interrupts */
+       if (!ring->priv->is_lite)
+               intrl2_1_writel(ring->priv, BIT(ring->index), INTRL2_CPU_CLEAR);
+       else
+               intrl2_0_writel(ring->priv, BIT(ring->index +
+                               INTRL2_0_TDMA_MBDONE_SHIFT), INTRL2_CPU_CLEAR);
+
        /* Compute how many descriptors have been processed since last call */
        hw_ind = tdma_readl(priv, TDMA_DESC_RING_PROD_CONS_INDEX(ring->index));
        c_index = (hw_ind >> RING_CONS_INDEX_SHIFT) & RING_CONS_INDEX_MASK;
@@ -803,7 +842,7 @@ static unsigned int __bcm_sysport_tx_reclaim(struct bcm_sysport_priv *priv,
 
        while (last_tx_cn-- > 0) {
                cb = ring->cbs + last_c_index;
-               bcm_sysport_tx_reclaim_one(priv, cb, &bytes_compl, &pkts_compl);
+               bcm_sysport_tx_reclaim_one(ring, cb, &bytes_compl, &pkts_compl);
 
                ring->desc_count++;
                last_c_index++;
@@ -1632,6 +1671,24 @@ static int bcm_sysport_change_mac(struct net_device *dev, void *p)
        return 0;
 }
 
+static struct net_device_stats *bcm_sysport_get_nstats(struct net_device *dev)
+{
+       struct bcm_sysport_priv *priv = netdev_priv(dev);
+       unsigned long tx_bytes = 0, tx_packets = 0;
+       struct bcm_sysport_tx_ring *ring;
+       unsigned int q;
+
+       for (q = 0; q < dev->num_tx_queues; q++) {
+               ring = &priv->tx_rings[q];
+               tx_bytes += ring->bytes;
+               tx_packets += ring->packets;
+       }
+
+       dev->stats.tx_bytes = tx_bytes;
+       dev->stats.tx_packets = tx_packets;
+       return &dev->stats;
+}
+
 static void bcm_sysport_netif_start(struct net_device *dev)
 {
        struct bcm_sysport_priv *priv = netdev_priv(dev);
@@ -1893,6 +1950,7 @@ static const struct net_device_ops bcm_sysport_netdev_ops = {
 #ifdef CONFIG_NET_POLL_CONTROLLER
        .ndo_poll_controller    = bcm_sysport_poll_controller,
 #endif
+       .ndo_get_stats          = bcm_sysport_get_nstats,
 };
 
 #define REV_FMT        "v%2x.%02x"
index 863ddd7870b77d2ce963685098fff71211f395a8..77a51c167a694734b5983f524464e9b94725e1d7 100644 (file)
@@ -647,6 +647,9 @@ enum bcm_sysport_stat_type {
        .reg_offset = ofs, \
 }
 
+/* TX bytes and packets */
+#define NUM_SYSPORT_TXQ_STAT   2
+
 struct bcm_sysport_stats {
        char stat_string[ETH_GSTRING_LEN];
        int stat_sizeof;
@@ -690,6 +693,8 @@ struct bcm_sysport_tx_ring {
        struct bcm_sysport_cb *cbs;     /* Transmit control blocks */
        struct dma_desc *desc_cpu;      /* CPU view of the descriptor */
        struct bcm_sysport_priv *priv;  /* private context backpointer */
+       unsigned long   packets;        /* packets statistics */
+       unsigned long   bytes;          /* bytes statistics */
 };
 
 /* Driver private structure */
index d59cfcc4c4d596d48957ecd06a77cf8a475090b7..6322594ab2600ac100a63140c9420855f07c86dc 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/bcma/bcma.h>
 #include <linux/brcmphy.h>
 #include <linux/etherdevice.h>
+#include <linux/of_net.h>
 #include "bgmac.h"
 
 static inline bool bgmac_is_bcm4707_family(struct bcma_device *core)
@@ -114,7 +115,7 @@ static int bgmac_probe(struct bcma_device *core)
        struct ssb_sprom *sprom = &core->bus->sprom;
        struct mii_bus *mii_bus;
        struct bgmac *bgmac;
-       u8 *mac;
+       const u8 *mac = NULL;
        int err;
 
        bgmac = bgmac_alloc(&core->dev);
@@ -127,21 +128,27 @@ static int bgmac_probe(struct bcma_device *core)
 
        bcma_set_drvdata(core, bgmac);
 
-       switch (core->core_unit) {
-       case 0:
-               mac = sprom->et0mac;
-               break;
-       case 1:
-               mac = sprom->et1mac;
-               break;
-       case 2:
-               mac = sprom->et2mac;
-               break;
-       default:
-               dev_err(bgmac->dev, "Unsupported core_unit %d\n",
-                       core->core_unit);
-               err = -ENOTSUPP;
-               goto err;
+       if (bgmac->dev->of_node)
+               mac = of_get_mac_address(bgmac->dev->of_node);
+
+       /* If no MAC address assigned via device tree, check SPROM */
+       if (!mac) {
+               switch (core->core_unit) {
+               case 0:
+                       mac = sprom->et0mac;
+                       break;
+               case 1:
+                       mac = sprom->et1mac;
+                       break;
+               case 2:
+                       mac = sprom->et2mac;
+                       break;
+               default:
+                       dev_err(bgmac->dev, "Unsupported core_unit %d\n",
+                               core->core_unit);
+                       err = -ENOTSUPP;
+                       goto err;
+               }
        }
 
        ether_addr_copy(bgmac->net_dev->dev_addr, mac);
@@ -192,36 +199,50 @@ static int bgmac_probe(struct bcma_device *core)
                goto err1;
        }
 
-       bgmac->has_robosw = !!(core->bus->sprom.boardflags_lo &
-                              BGMAC_BFL_ENETROBO);
+       bgmac->has_robosw = !!(sprom->boardflags_lo & BGMAC_BFL_ENETROBO);
        if (bgmac->has_robosw)
                dev_warn(bgmac->dev, "Support for Roboswitch not implemented\n");
 
-       if (core->bus->sprom.boardflags_lo & BGMAC_BFL_ENETADM)
+       if (sprom->boardflags_lo & BGMAC_BFL_ENETADM)
                dev_warn(bgmac->dev, "Support for ADMtek ethernet switch not implemented\n");
 
        /* Feature Flags */
-       switch (core->bus->chipinfo.id) {
+       switch (ci->id) {
+       /* BCM 471X/535X family */
+       case BCMA_CHIP_ID_BCM4716:
+               bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST;
+               /* fallthrough */
+       case BCMA_CHIP_ID_BCM47162:
+               bgmac->feature_flags |= BGMAC_FEAT_FLW_CTRL2;
+               bgmac->feature_flags |= BGMAC_FEAT_SET_RXQ_CLK;
+               break;
        case BCMA_CHIP_ID_BCM5357:
+       case BCMA_CHIP_ID_BCM53572:
                bgmac->feature_flags |= BGMAC_FEAT_SET_RXQ_CLK;
                bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST;
                bgmac->feature_flags |= BGMAC_FEAT_FLW_CTRL1;
                bgmac->feature_flags |= BGMAC_FEAT_SW_TYPE_PHY;
-               if (core->bus->chipinfo.pkg == BCMA_PKG_ID_BCM47186) {
-                       bgmac->feature_flags |= BGMAC_FEAT_IOST_ATTACHED;
+               if (ci->pkg == BCMA_PKG_ID_BCM47188 ||
+                   ci->pkg == BCMA_PKG_ID_BCM47186) {
                        bgmac->feature_flags |= BGMAC_FEAT_SW_TYPE_RGMII;
+                       bgmac->feature_flags |= BGMAC_FEAT_IOST_ATTACHED;
                }
-               if (core->bus->chipinfo.pkg == BCMA_PKG_ID_BCM5358)
+               if (ci->pkg == BCMA_PKG_ID_BCM5358)
                        bgmac->feature_flags |= BGMAC_FEAT_SW_TYPE_EPHYRMII;
                break;
-       case BCMA_CHIP_ID_BCM53572:
-               bgmac->feature_flags |= BGMAC_FEAT_SET_RXQ_CLK;
+       case BCMA_CHIP_ID_BCM53573:
                bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST;
-               bgmac->feature_flags |= BGMAC_FEAT_FLW_CTRL1;
-               bgmac->feature_flags |= BGMAC_FEAT_SW_TYPE_PHY;
-               if (core->bus->chipinfo.pkg == BCMA_PKG_ID_BCM47188) {
-                       bgmac->feature_flags |= BGMAC_FEAT_SW_TYPE_RGMII;
+               bgmac->feature_flags |= BGMAC_FEAT_SET_RXQ_CLK;
+               if (ci->pkg == BCMA_PKG_ID_BCM47189)
                        bgmac->feature_flags |= BGMAC_FEAT_IOST_ATTACHED;
+               if (core->core_unit == 0) {
+                       bgmac->feature_flags |= BGMAC_FEAT_CC4_IF_SW_TYPE;
+                       if (ci->pkg == BCMA_PKG_ID_BCM47189)
+                               bgmac->feature_flags |=
+                                       BGMAC_FEAT_CC4_IF_SW_TYPE_RGMII;
+               } else if (core->core_unit == 1) {
+                       bgmac->feature_flags |= BGMAC_FEAT_IRQ_ID_OOB_6;
+                       bgmac->feature_flags |= BGMAC_FEAT_CC7_IF_TYPE_RGMII;
                }
                break;
        case BCMA_CHIP_ID_BCM4749:
@@ -229,18 +250,11 @@ static int bgmac_probe(struct bcma_device *core)
                bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST;
                bgmac->feature_flags |= BGMAC_FEAT_FLW_CTRL1;
                bgmac->feature_flags |= BGMAC_FEAT_SW_TYPE_PHY;
-               if (core->bus->chipinfo.pkg == 10) {
+               if (ci->pkg == 10) {
                        bgmac->feature_flags |= BGMAC_FEAT_SW_TYPE_RGMII;
                        bgmac->feature_flags |= BGMAC_FEAT_IOST_ATTACHED;
                }
                break;
-       case BCMA_CHIP_ID_BCM4716:
-               bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST;
-               /* fallthrough */
-       case BCMA_CHIP_ID_BCM47162:
-               bgmac->feature_flags |= BGMAC_FEAT_FLW_CTRL2;
-               bgmac->feature_flags |= BGMAC_FEAT_SET_RXQ_CLK;
-               break;
        /* bcm4707_family */
        case BCMA_CHIP_ID_BCM4707:
        case BCMA_CHIP_ID_BCM47094:
@@ -249,21 +263,6 @@ static int bgmac_probe(struct bcma_device *core)
                bgmac->feature_flags |= BGMAC_FEAT_NO_RESET;
                bgmac->feature_flags |= BGMAC_FEAT_FORCE_SPEED_2500;
                break;
-       case BCMA_CHIP_ID_BCM53573:
-               bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST;
-               bgmac->feature_flags |= BGMAC_FEAT_SET_RXQ_CLK;
-               if (ci->pkg == BCMA_PKG_ID_BCM47189)
-                       bgmac->feature_flags |= BGMAC_FEAT_IOST_ATTACHED;
-               if (core->core_unit == 0) {
-                       bgmac->feature_flags |= BGMAC_FEAT_CC4_IF_SW_TYPE;
-                       if (ci->pkg == BCMA_PKG_ID_BCM47189)
-                               bgmac->feature_flags |=
-                                       BGMAC_FEAT_CC4_IF_SW_TYPE_RGMII;
-               } else if (core->core_unit == 1) {
-                       bgmac->feature_flags |= BGMAC_FEAT_IRQ_ID_OOB_6;
-                       bgmac->feature_flags |= BGMAC_FEAT_CC7_IF_TYPE_RGMII;
-               }
-               break;
        default:
                bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST;
                bgmac->feature_flags |= BGMAC_FEAT_SET_RXQ_CLK;
index da1b8b225eb9d31e001435ee33c7362f3c5565b5..73aca97a96bc70fb1e79f04ec2585589d0616cc5 100644 (file)
 #include <linux/of_net.h>
 #include "bgmac.h"
 
+#define NICPM_PADRING_CFG              0x00000004
 #define NICPM_IOMUX_CTRL               0x00000008
 
+#define NICPM_PADRING_CFG_INIT_VAL     0x74000000
+#define NICPM_IOMUX_CTRL_INIT_VAL_AX   0x21880000
+
 #define NICPM_IOMUX_CTRL_INIT_VAL      0x3196e000
 #define NICPM_IOMUX_CTRL_SPD_SHIFT     10
 #define NICPM_IOMUX_CTRL_SPD_10M       0
@@ -113,6 +117,10 @@ static void bgmac_nicpm_speed_set(struct net_device *net_dev)
        if (!bgmac->plat.nicpm_base)
                return;
 
+       /* SET RGMII IO CONFIG */
+       writel(NICPM_PADRING_CFG_INIT_VAL,
+              bgmac->plat.nicpm_base + NICPM_PADRING_CFG);
+
        val = NICPM_IOMUX_CTRL_INIT_VAL;
        switch (bgmac->net_dev->phydev->speed) {
        default:
@@ -244,6 +252,31 @@ static int bgmac_remove(struct platform_device *pdev)
        return 0;
 }
 
+#ifdef CONFIG_PM
+static int bgmac_suspend(struct device *dev)
+{
+       struct bgmac *bgmac = dev_get_drvdata(dev);
+
+       return bgmac_enet_suspend(bgmac);
+}
+
+static int bgmac_resume(struct device *dev)
+{
+       struct bgmac *bgmac = dev_get_drvdata(dev);
+
+       return bgmac_enet_resume(bgmac);
+}
+
+static const struct dev_pm_ops bgmac_pm_ops = {
+       .suspend = bgmac_suspend,
+       .resume = bgmac_resume
+};
+
+#define BGMAC_PM_OPS (&bgmac_pm_ops)
+#else
+#define BGMAC_PM_OPS NULL
+#endif /* CONFIG_PM */
+
 static const struct of_device_id bgmac_of_enet_match[] = {
        {.compatible = "brcm,amac",},
        {.compatible = "brcm,nsp-amac",},
@@ -257,6 +290,7 @@ static struct platform_driver bgmac_enet_driver = {
        .driver = {
                .name  = "bgmac-enet",
                .of_match_table = bgmac_of_enet_match,
+               .pm = BGMAC_PM_OPS
        },
        .probe = bgmac_probe,
        .remove = bgmac_remove,
index fd66fca00e0177aa6f2ebf8e01d59420cb14353e..ba4d2e145bb9bb81c32ade2e3855743e493c1c59 100644 (file)
@@ -11,6 +11,7 @@
 
 #include <linux/bcma/bcma.h>
 #include <linux/etherdevice.h>
+#include <linux/interrupt.h>
 #include <linux/bcm47xx_nvram.h>
 #include <linux/phy.h>
 #include <linux/phy_fixed.h>
@@ -1480,6 +1481,7 @@ int bgmac_enet_probe(struct bgmac *bgmac)
 
        net_dev->irq = bgmac->irq;
        SET_NETDEV_DEV(net_dev, bgmac->dev);
+       dev_set_drvdata(bgmac->dev, bgmac);
 
        if (!is_valid_ether_addr(net_dev->dev_addr)) {
                dev_err(bgmac->dev, "Invalid MAC addr: %pM\n",
@@ -1552,5 +1554,55 @@ void bgmac_enet_remove(struct bgmac *bgmac)
 }
 EXPORT_SYMBOL_GPL(bgmac_enet_remove);
 
+int bgmac_enet_suspend(struct bgmac *bgmac)
+{
+       if (!netif_running(bgmac->net_dev))
+               return 0;
+
+       phy_stop(bgmac->net_dev->phydev);
+
+       netif_stop_queue(bgmac->net_dev);
+
+       napi_disable(&bgmac->napi);
+
+       netif_tx_lock(bgmac->net_dev);
+       netif_device_detach(bgmac->net_dev);
+       netif_tx_unlock(bgmac->net_dev);
+
+       bgmac_chip_intrs_off(bgmac);
+       bgmac_chip_reset(bgmac);
+       bgmac_dma_cleanup(bgmac);
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(bgmac_enet_suspend);
+
+int bgmac_enet_resume(struct bgmac *bgmac)
+{
+       int rc;
+
+       if (!netif_running(bgmac->net_dev))
+               return 0;
+
+       rc = bgmac_dma_init(bgmac);
+       if (rc)
+               return rc;
+
+       bgmac_chip_init(bgmac);
+
+       napi_enable(&bgmac->napi);
+
+       netif_tx_lock(bgmac->net_dev);
+       netif_device_attach(bgmac->net_dev);
+       netif_tx_unlock(bgmac->net_dev);
+
+       netif_start_queue(bgmac->net_dev);
+
+       phy_start(bgmac->net_dev->phydev);
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(bgmac_enet_resume);
+
 MODULE_AUTHOR("RafaÅ‚ MiÅ‚ecki");
 MODULE_LICENSE("GPL");
index 6d1c6ff1ed963ef6bd672b8cd792b7b529c09b27..c1818766c501f8a33b309495b02566c2c1a1cb5e 100644 (file)
 
 #define BGMAC_WEIGHT   64
 
-#define ETHER_MAX_LEN   1518
+#define ETHER_MAX_LEN  (ETH_FRAME_LEN + ETH_FCS_LEN)
 
 /* Feature Flags */
 #define BGMAC_FEAT_TX_MASK_SETUP       BIT(0)
@@ -537,6 +537,8 @@ int bgmac_enet_probe(struct bgmac *bgmac);
 void bgmac_enet_remove(struct bgmac *bgmac);
 void bgmac_adjust_link(struct net_device *net_dev);
 int bgmac_phy_connect_direct(struct bgmac *bgmac);
+int bgmac_enet_suspend(struct bgmac *bgmac);
+int bgmac_enet_resume(struct bgmac *bgmac);
 
 struct mii_bus *bcma_mdio_mii_register(struct bgmac *bgmac);
 void bcma_mdio_mii_unregister(struct mii_bus *mii_bus);
index 9e8c06130c092d3f061089448797c1da74e15043..ad3e0631877e799d2b1cd8e3c07495bbb5a1fd96 100644 (file)
@@ -4277,7 +4277,10 @@ int __bnx2x_setup_tc(struct net_device *dev, u32 handle, __be16 proto,
 {
        if (tc->type != TC_SETUP_MQPRIO)
                return -EINVAL;
-       return bnx2x_setup_tc(dev, tc->tc);
+
+       tc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+
+       return bnx2x_setup_tc(dev, tc->mqprio->num_tc);
 }
 
 /* called with rtnl_lock */
index 32de4589d16a2cde27f2e5674b234e2b7185f00d..174ec8f846370869b4095e5418b12aeecb3793d4 100644 (file)
@@ -6905,7 +6905,9 @@ static int bnxt_setup_tc(struct net_device *dev, u32 handle, __be16 proto,
        if (ntc->type != TC_SETUP_MQPRIO)
                return -EINVAL;
 
-       return bnxt_setup_mq_tc(dev, ntc->tc);
+       ntc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+
+       return bnxt_setup_mq_tc(dev, ntc->mqprio->num_tc);
 }
 
 #ifdef CONFIG_RFS_ACCEL
index c7a5b84a5cb20ecb1112f831d868238f9cead76b..3cb07778a6906ef5ce599e685db54c7ceec5297c 100644 (file)
@@ -18,6 +18,8 @@
 #define DRV_VER_MIN    7
 #define DRV_VER_UPD    0
 
+#include <linux/interrupt.h>
+
 struct tx_bd {
        __le32 tx_bd_len_flags_type;
        #define TX_BD_TYPE                                      (0x3f << 0)
index 365895ed3c3e240584da21fad2caebc6384482d5..a205a9ff9e179ba9b5f854b9e9707a29b36a45bb 100644 (file)
@@ -621,7 +621,7 @@ static int bcmgenet_set_coalesce(struct net_device *dev,
 
        /* GENET TDMA hardware does not support a configurable timeout, but will
         * always generate an interrupt either after MBDONE packets have been
-        * transmitted, or when the ring is emtpy.
+        * transmitted, or when the ring is empty.
         */
        if (ec->tx_coalesce_usecs || ec->tx_coalesce_usecs_high ||
            ec->tx_coalesce_usecs_irq || ec->tx_coalesce_usecs_low)
@@ -707,6 +707,19 @@ struct bcmgenet_stats {
        .reg_offset = offset, \
 }
 
+#define STAT_GENET_Q(num) \
+       STAT_GENET_SOFT_MIB("txq" __stringify(num) "_packets", \
+                       tx_rings[num].packets), \
+       STAT_GENET_SOFT_MIB("txq" __stringify(num) "_bytes", \
+                       tx_rings[num].bytes), \
+       STAT_GENET_SOFT_MIB("rxq" __stringify(num) "_bytes", \
+                       rx_rings[num].bytes),    \
+       STAT_GENET_SOFT_MIB("rxq" __stringify(num) "_packets", \
+                       rx_rings[num].packets), \
+       STAT_GENET_SOFT_MIB("rxq" __stringify(num) "_errors", \
+                       rx_rings[num].errors), \
+       STAT_GENET_SOFT_MIB("rxq" __stringify(num) "_dropped", \
+                       rx_rings[num].dropped)
 
 /* There is a 0xC gap between the end of RX and beginning of TX stats and then
  * between the end of TX stats and the beginning of the RX RUNT
@@ -801,6 +814,12 @@ static const struct bcmgenet_stats bcmgenet_gstrings_stats[] = {
        STAT_GENET_SOFT_MIB("alloc_rx_buff_failed", mib.alloc_rx_buff_failed),
        STAT_GENET_SOFT_MIB("rx_dma_failed", mib.rx_dma_failed),
        STAT_GENET_SOFT_MIB("tx_dma_failed", mib.tx_dma_failed),
+       /* Per TX queues */
+       STAT_GENET_Q(0),
+       STAT_GENET_Q(1),
+       STAT_GENET_Q(2),
+       STAT_GENET_Q(3),
+       STAT_GENET_Q(16),
 };
 
 #define BCMGENET_STATS_LEN     ARRAY_SIZE(bcmgenet_gstrings_stats)
@@ -1078,8 +1097,17 @@ static int bcmgenet_power_down(struct bcmgenet_priv *priv,
                /* Power down LED */
                if (priv->hw_params->flags & GENET_HAS_EXT) {
                        reg = bcmgenet_ext_readl(priv, EXT_EXT_PWR_MGMT);
-                       reg |= (EXT_PWR_DOWN_PHY |
-                               EXT_PWR_DOWN_DLL | EXT_PWR_DOWN_BIAS);
+                       if (GENET_IS_V5(priv))
+                               reg |= EXT_PWR_DOWN_PHY_EN |
+                                      EXT_PWR_DOWN_PHY_RD |
+                                      EXT_PWR_DOWN_PHY_SD |
+                                      EXT_PWR_DOWN_PHY_RX |
+                                      EXT_PWR_DOWN_PHY_TX |
+                                      EXT_IDDQ_GLBL_PWR;
+                       else
+                               reg |= EXT_PWR_DOWN_PHY;
+
+                       reg |= (EXT_PWR_DOWN_DLL | EXT_PWR_DOWN_BIAS);
                        bcmgenet_ext_writel(priv, reg, EXT_EXT_PWR_MGMT);
 
                        bcmgenet_phy_power_set(priv->dev, false);
@@ -1104,12 +1132,34 @@ static void bcmgenet_power_up(struct bcmgenet_priv *priv,
 
        switch (mode) {
        case GENET_POWER_PASSIVE:
-               reg &= ~(EXT_PWR_DOWN_DLL | EXT_PWR_DOWN_PHY |
-                               EXT_PWR_DOWN_BIAS);
-               /* fallthrough */
+               reg &= ~(EXT_PWR_DOWN_DLL | EXT_PWR_DOWN_BIAS);
+               if (GENET_IS_V5(priv)) {
+                       reg &= ~(EXT_PWR_DOWN_PHY_EN |
+                                EXT_PWR_DOWN_PHY_RD |
+                                EXT_PWR_DOWN_PHY_SD |
+                                EXT_PWR_DOWN_PHY_RX |
+                                EXT_PWR_DOWN_PHY_TX |
+                                EXT_IDDQ_GLBL_PWR);
+                       reg |=   EXT_PHY_RESET;
+                       bcmgenet_ext_writel(priv, reg, EXT_EXT_PWR_MGMT);
+                       mdelay(1);
+
+                       reg &=  ~EXT_PHY_RESET;
+               } else {
+                       reg &= ~EXT_PWR_DOWN_PHY;
+                       reg |= EXT_PWR_DN_EN_LD;
+               }
+               bcmgenet_ext_writel(priv, reg, EXT_EXT_PWR_MGMT);
+               bcmgenet_phy_power_set(priv->dev, true);
+               bcmgenet_mii_reset(priv->dev);
+               break;
+
        case GENET_POWER_CABLE_SENSE:
                /* enable APD */
-               reg |= EXT_PWR_DN_EN_LD;
+               if (!GENET_IS_V5(priv)) {
+                       reg |= EXT_PWR_DN_EN_LD;
+                       bcmgenet_ext_writel(priv, reg, EXT_EXT_PWR_MGMT);
+               }
                break;
        case GENET_POWER_WOL_MAGIC:
                bcmgenet_wol_power_up_cfg(priv, mode);
@@ -1117,39 +1167,20 @@ static void bcmgenet_power_up(struct bcmgenet_priv *priv,
        default:
                break;
        }
-
-       bcmgenet_ext_writel(priv, reg, EXT_EXT_PWR_MGMT);
-       if (mode == GENET_POWER_PASSIVE) {
-               bcmgenet_phy_power_set(priv->dev, true);
-               bcmgenet_mii_reset(priv->dev);
-       }
 }
 
 /* ioctl handle special commands that are not present in ethtool. */
 static int bcmgenet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 {
        struct bcmgenet_priv *priv = netdev_priv(dev);
-       int val = 0;
 
        if (!netif_running(dev))
                return -EINVAL;
 
-       switch (cmd) {
-       case SIOCGMIIPHY:
-       case SIOCGMIIREG:
-       case SIOCSMIIREG:
-               if (!priv->phydev)
-                       val = -ENODEV;
-               else
-                       val = phy_mii_ioctl(priv->phydev, rq, cmd);
-               break;
-
-       default:
-               val = -EINVAL;
-               break;
-       }
+       if (!priv->phydev)
+               return -ENODEV;
 
-       return val;
+       return phy_mii_ioctl(priv->phydev, rq, cmd);
 }
 
 static struct enet_cb *bcmgenet_get_txcb(struct bcmgenet_priv *priv,
@@ -1240,14 +1271,18 @@ static unsigned int __bcmgenet_tx_reclaim(struct net_device *dev,
        unsigned int txbds_ready;
        unsigned int txbds_processed = 0;
 
-       /* Compute how many buffers are transmitted since last xmit call */
-       c_index = bcmgenet_tdma_ring_readl(priv, ring->index, TDMA_CONS_INDEX);
-       c_index &= DMA_C_INDEX_MASK;
-
-       if (likely(c_index >= ring->c_index))
-               txbds_ready = c_index - ring->c_index;
+       /* Clear status before servicing to reduce spurious interrupts */
+       if (ring->index == DESC_INDEX)
+               bcmgenet_intrl2_0_writel(priv, UMAC_IRQ_TXDMA_DONE,
+                                        INTRL2_CPU_CLEAR);
        else
-               txbds_ready = (DMA_C_INDEX_MASK + 1) - ring->c_index + c_index;
+               bcmgenet_intrl2_1_writel(priv, (1 << ring->index),
+                                        INTRL2_CPU_CLEAR);
+
+       /* Compute how many buffers are transmitted since last xmit call */
+       c_index = bcmgenet_tdma_ring_readl(priv, ring->index, TDMA_CONS_INDEX)
+               & DMA_C_INDEX_MASK;
+       txbds_ready = (c_index - ring->c_index) & DMA_C_INDEX_MASK;
 
        netif_dbg(priv, tx_done, dev,
                  "%s ring=%d old_c_index=%u c_index=%u txbds_ready=%u\n",
@@ -1280,15 +1315,15 @@ static unsigned int __bcmgenet_tx_reclaim(struct net_device *dev,
        }
 
        ring->free_bds += txbds_processed;
-       ring->c_index = (ring->c_index + txbds_processed) & DMA_C_INDEX_MASK;
+       ring->c_index = c_index;
 
-       dev->stats.tx_packets += pkts_compl;
-       dev->stats.tx_bytes += bytes_compl;
+       ring->packets += pkts_compl;
+       ring->bytes += bytes_compl;
 
        netdev_tx_completed_queue(netdev_get_tx_queue(dev, ring->queue),
                                  pkts_compl, bytes_compl);
 
-       return pkts_compl;
+       return txbds_processed;
 }
 
 static unsigned int bcmgenet_tx_reclaim(struct net_device *dev,
@@ -1657,18 +1692,28 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring,
        unsigned long dma_flag;
        int len;
        unsigned int rxpktprocessed = 0, rxpkttoprocess;
-       unsigned int p_index;
+       unsigned int p_index, mask;
        unsigned int discards;
        unsigned int chksum_ok = 0;
 
+       /* Clear status before servicing to reduce spurious interrupts */
+       if (ring->index == DESC_INDEX) {
+               bcmgenet_intrl2_0_writel(priv, UMAC_IRQ_RXDMA_DONE,
+                                        INTRL2_CPU_CLEAR);
+       } else {
+               mask = 1 << (UMAC_IRQ1_RX_INTR_SHIFT + ring->index);
+               bcmgenet_intrl2_1_writel(priv,
+                                        mask,
+                                        INTRL2_CPU_CLEAR);
+       }
+
        p_index = bcmgenet_rdma_ring_readl(priv, ring->index, RDMA_PROD_INDEX);
 
        discards = (p_index >> DMA_P_INDEX_DISCARD_CNT_SHIFT) &
                   DMA_P_INDEX_DISCARD_CNT_MASK;
        if (discards > ring->old_discards) {
                discards = discards - ring->old_discards;
-               dev->stats.rx_missed_errors += discards;
-               dev->stats.rx_errors += discards;
+               ring->errors += discards;
                ring->old_discards += discards;
 
                /* Clear HW register when we reach 75% of maximum 0xFFFF */
@@ -1680,12 +1725,7 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring,
        }
 
        p_index &= DMA_P_INDEX_MASK;
-
-       if (likely(p_index >= ring->c_index))
-               rxpkttoprocess = p_index - ring->c_index;
-       else
-               rxpkttoprocess = (DMA_C_INDEX_MASK + 1) - ring->c_index +
-                                p_index;
+       rxpkttoprocess = (p_index - ring->c_index) & DMA_C_INDEX_MASK;
 
        netif_dbg(priv, rx_status, dev,
                  "RDMA: rxpkttoprocess=%d\n", rxpkttoprocess);
@@ -1696,7 +1736,7 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring,
                skb = bcmgenet_rx_refill(priv, cb);
 
                if (unlikely(!skb)) {
-                       dev->stats.rx_dropped++;
+                       ring->dropped++;
                        goto next;
                }
 
@@ -1724,7 +1764,7 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring,
                if (unlikely(!(dma_flag & DMA_EOP) || !(dma_flag & DMA_SOP))) {
                        netif_err(priv, rx_status, dev,
                                  "dropping fragmented packet!\n");
-                       dev->stats.rx_errors++;
+                       ring->errors++;
                        dev_kfree_skb_any(skb);
                        goto next;
                }
@@ -1773,8 +1813,8 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring,
 
                /*Finish setting up the received SKB and send it to the kernel*/
                skb->protocol = eth_type_trans(skb, priv->dev);
-               dev->stats.rx_packets++;
-               dev->stats.rx_bytes += len;
+               ring->packets++;
+               ring->bytes += len;
                if (dma_flag & DMA_RX_MULT)
                        dev->stats.multicast++;
 
@@ -1912,10 +1952,8 @@ static void bcmgenet_intr_disable(struct bcmgenet_priv *priv)
        /* Mask all interrupts.*/
        bcmgenet_intrl2_0_writel(priv, 0xFFFFFFFF, INTRL2_CPU_MASK_SET);
        bcmgenet_intrl2_0_writel(priv, 0xFFFFFFFF, INTRL2_CPU_CLEAR);
-       bcmgenet_intrl2_0_writel(priv, 0, INTRL2_CPU_MASK_CLEAR);
        bcmgenet_intrl2_1_writel(priv, 0xFFFFFFFF, INTRL2_CPU_MASK_SET);
        bcmgenet_intrl2_1_writel(priv, 0xFFFFFFFF, INTRL2_CPU_CLEAR);
-       bcmgenet_intrl2_1_writel(priv, 0, INTRL2_CPU_MASK_CLEAR);
 }
 
 static void bcmgenet_link_intr_enable(struct bcmgenet_priv *priv)
@@ -1942,8 +1980,6 @@ static int init_umac(struct bcmgenet_priv *priv)
        int ret;
        u32 reg;
        u32 int0_enable = 0;
-       u32 int1_enable = 0;
-       int i;
 
        dev_dbg(&priv->pdev->dev, "bcmgenet: init_umac\n");
 
@@ -1970,12 +2006,6 @@ static int init_umac(struct bcmgenet_priv *priv)
 
        bcmgenet_intr_disable(priv);
 
-       /* Enable Rx default queue 16 interrupts */
-       int0_enable |= UMAC_IRQ_RXDMA_DONE;
-
-       /* Enable Tx default queue 16 interrupts */
-       int0_enable |= UMAC_IRQ_TXDMA_DONE;
-
        /* Configure backpressure vectors for MoCA */
        if (priv->phy_interface == PHY_INTERFACE_MODE_MOCA) {
                reg = bcmgenet_bp_mc_get(priv);
@@ -1993,18 +2023,8 @@ static int init_umac(struct bcmgenet_priv *priv)
        if (priv->hw_params->flags & GENET_HAS_MDIO_INTR)
                int0_enable |= (UMAC_IRQ_MDIO_DONE | UMAC_IRQ_MDIO_ERROR);
 
-       /* Enable Rx priority queue interrupts */
-       for (i = 0; i < priv->hw_params->rx_queues; ++i)
-               int1_enable |= (1 << (UMAC_IRQ1_RX_INTR_SHIFT + i));
-
-       /* Enable Tx priority queue interrupts */
-       for (i = 0; i < priv->hw_params->tx_queues; ++i)
-               int1_enable |= (1 << i);
-
        bcmgenet_intrl2_0_writel(priv, int0_enable, INTRL2_CPU_MASK_CLEAR);
-       bcmgenet_intrl2_1_writel(priv, int1_enable, INTRL2_CPU_MASK_CLEAR);
 
-       /* Enable rx/tx engine.*/
        dev_dbg(kdev, "done init umac\n");
 
        return 0;
@@ -2136,22 +2156,33 @@ static void bcmgenet_init_tx_napi(struct bcmgenet_priv *priv)
 static void bcmgenet_enable_tx_napi(struct bcmgenet_priv *priv)
 {
        unsigned int i;
+       u32 int0_enable = UMAC_IRQ_TXDMA_DONE;
+       u32 int1_enable = 0;
        struct bcmgenet_tx_ring *ring;
 
        for (i = 0; i < priv->hw_params->tx_queues; ++i) {
                ring = &priv->tx_rings[i];
                napi_enable(&ring->napi);
+               int1_enable |= (1 << i);
        }
 
        ring = &priv->tx_rings[DESC_INDEX];
        napi_enable(&ring->napi);
+
+       bcmgenet_intrl2_0_writel(priv, int0_enable, INTRL2_CPU_MASK_CLEAR);
+       bcmgenet_intrl2_1_writel(priv, int1_enable, INTRL2_CPU_MASK_CLEAR);
 }
 
 static void bcmgenet_disable_tx_napi(struct bcmgenet_priv *priv)
 {
        unsigned int i;
+       u32 int0_disable = UMAC_IRQ_TXDMA_DONE;
+       u32 int1_disable = 0xffff;
        struct bcmgenet_tx_ring *ring;
 
+       bcmgenet_intrl2_0_writel(priv, int0_disable, INTRL2_CPU_MASK_SET);
+       bcmgenet_intrl2_1_writel(priv, int1_disable, INTRL2_CPU_MASK_SET);
+
        for (i = 0; i < priv->hw_params->tx_queues; ++i) {
                ring = &priv->tx_rings[i];
                napi_disable(&ring->napi);
@@ -2264,22 +2295,33 @@ static void bcmgenet_init_rx_napi(struct bcmgenet_priv *priv)
 static void bcmgenet_enable_rx_napi(struct bcmgenet_priv *priv)
 {
        unsigned int i;
+       u32 int0_enable = UMAC_IRQ_RXDMA_DONE;
+       u32 int1_enable = 0;
        struct bcmgenet_rx_ring *ring;
 
        for (i = 0; i < priv->hw_params->rx_queues; ++i) {
                ring = &priv->rx_rings[i];
                napi_enable(&ring->napi);
+               int1_enable |= (1 << (UMAC_IRQ1_RX_INTR_SHIFT + i));
        }
 
        ring = &priv->rx_rings[DESC_INDEX];
        napi_enable(&ring->napi);
+
+       bcmgenet_intrl2_0_writel(priv, int0_enable, INTRL2_CPU_MASK_CLEAR);
+       bcmgenet_intrl2_1_writel(priv, int1_enable, INTRL2_CPU_MASK_CLEAR);
 }
 
 static void bcmgenet_disable_rx_napi(struct bcmgenet_priv *priv)
 {
        unsigned int i;
+       u32 int0_disable = UMAC_IRQ_RXDMA_DONE;
+       u32 int1_disable = 0xffff << UMAC_IRQ1_RX_INTR_SHIFT;
        struct bcmgenet_rx_ring *ring;
 
+       bcmgenet_intrl2_0_writel(priv, int0_disable, INTRL2_CPU_MASK_SET);
+       bcmgenet_intrl2_1_writel(priv, int1_disable, INTRL2_CPU_MASK_SET);
+
        for (i = 0; i < priv->hw_params->rx_queues; ++i) {
                ring = &priv->rx_rings[i];
                napi_disable(&ring->napi);
@@ -2634,6 +2676,15 @@ static irqreturn_t bcmgenet_isr0(int irq, void *dev_id)
                }
        }
 
+       if (priv->irq0_stat & (UMAC_IRQ_PHY_DET_R |
+                               UMAC_IRQ_PHY_DET_F |
+                               UMAC_IRQ_LINK_EVENT |
+                               UMAC_IRQ_HFB_SM |
+                               UMAC_IRQ_HFB_MM)) {
+               /* all other interested interrupts handled in bottom half */
+               schedule_work(&priv->bcmgenet_irq_work);
+       }
+
        if ((priv->hw_params->flags & GENET_HAS_MDIO_INTR) &&
                status & (UMAC_IRQ_MDIO_DONE | UMAC_IRQ_MDIO_ERROR)) {
                wake_up(&priv->wq);
@@ -2921,7 +2972,7 @@ static int bcmgenet_close(struct net_device *dev)
        if (ret)
                return ret;
 
-       /* Disable MAC transmit. TX DMA disabled have to done before this */
+       /* Disable MAC transmit. TX DMA disabled must be done before this */
        umac_enable_set(priv, CMD_TX_EN, false);
 
        /* tx reclaim */
@@ -3101,6 +3152,48 @@ static int bcmgenet_set_mac_addr(struct net_device *dev, void *p)
        return 0;
 }
 
+static struct net_device_stats *bcmgenet_get_stats(struct net_device *dev)
+{
+       struct bcmgenet_priv *priv = netdev_priv(dev);
+       unsigned long tx_bytes = 0, tx_packets = 0;
+       unsigned long rx_bytes = 0, rx_packets = 0;
+       unsigned long rx_errors = 0, rx_dropped = 0;
+       struct bcmgenet_tx_ring *tx_ring;
+       struct bcmgenet_rx_ring *rx_ring;
+       unsigned int q;
+
+       for (q = 0; q < priv->hw_params->tx_queues; q++) {
+               tx_ring = &priv->tx_rings[q];
+               tx_bytes += tx_ring->bytes;
+               tx_packets += tx_ring->packets;
+       }
+       tx_ring = &priv->tx_rings[DESC_INDEX];
+       tx_bytes += tx_ring->bytes;
+       tx_packets += tx_ring->packets;
+
+       for (q = 0; q < priv->hw_params->rx_queues; q++) {
+               rx_ring = &priv->rx_rings[q];
+
+               rx_bytes += rx_ring->bytes;
+               rx_packets += rx_ring->packets;
+               rx_errors += rx_ring->errors;
+               rx_dropped += rx_ring->dropped;
+       }
+       rx_ring = &priv->rx_rings[DESC_INDEX];
+       rx_bytes += rx_ring->bytes;
+       rx_packets += rx_ring->packets;
+       rx_errors += rx_ring->errors;
+       rx_dropped += rx_ring->dropped;
+
+       dev->stats.tx_bytes = tx_bytes;
+       dev->stats.tx_packets = tx_packets;
+       dev->stats.rx_bytes = rx_bytes;
+       dev->stats.rx_packets = rx_packets;
+       dev->stats.rx_errors = rx_errors;
+       dev->stats.rx_missed_errors = rx_errors;
+       return &dev->stats;
+}
+
 static const struct net_device_ops bcmgenet_netdev_ops = {
        .ndo_open               = bcmgenet_open,
        .ndo_stop               = bcmgenet_close,
@@ -3113,6 +3206,7 @@ static const struct net_device_ops bcmgenet_netdev_ops = {
 #ifdef CONFIG_NET_POLL_CONTROLLER
        .ndo_poll_controller    = bcmgenet_poll_controller,
 #endif
+       .ndo_get_stats          = bcmgenet_get_stats,
 };
 
 /* Array of GENET hardware parameters/characteristics */
@@ -3186,6 +3280,25 @@ static struct bcmgenet_hw_params bcmgenet_hw_params[] = {
                .flags = GENET_HAS_40BITS | GENET_HAS_EXT |
                         GENET_HAS_MDIO_INTR | GENET_HAS_MOCA_LINK_DET,
        },
+       [GENET_V5] = {
+               .tx_queues = 4,
+               .tx_bds_per_q = 32,
+               .rx_queues = 0,
+               .rx_bds_per_q = 0,
+               .bp_in_en_shift = 17,
+               .bp_in_mask = 0x1ffff,
+               .hfb_filter_cnt = 48,
+               .hfb_filter_size = 128,
+               .qtag_mask = 0x3F,
+               .tbuf_offset = 0x0600,
+               .hfb_offset = 0x8000,
+               .hfb_reg_offset = 0xfc00,
+               .rdma_offset = 0x2000,
+               .tdma_offset = 0x4000,
+               .words_per_bd = 3,
+               .flags = GENET_HAS_40BITS | GENET_HAS_EXT |
+                        GENET_HAS_MDIO_INTR | GENET_HAS_MOCA_LINK_DET,
+       },
 };
 
 /* Infer hardware parameters from the detected GENET version */
@@ -3196,26 +3309,22 @@ static void bcmgenet_set_hw_params(struct bcmgenet_priv *priv)
        u8 major;
        u16 gphy_rev;
 
-       if (GENET_IS_V4(priv)) {
+       if (GENET_IS_V5(priv) || GENET_IS_V4(priv)) {
                bcmgenet_dma_regs = bcmgenet_dma_regs_v3plus;
                genet_dma_ring_regs = genet_dma_ring_regs_v4;
                priv->dma_rx_chk_bit = DMA_RX_CHK_V3PLUS;
-               priv->version = GENET_V4;
        } else if (GENET_IS_V3(priv)) {
                bcmgenet_dma_regs = bcmgenet_dma_regs_v3plus;
                genet_dma_ring_regs = genet_dma_ring_regs_v123;
                priv->dma_rx_chk_bit = DMA_RX_CHK_V3PLUS;
-               priv->version = GENET_V3;
        } else if (GENET_IS_V2(priv)) {
                bcmgenet_dma_regs = bcmgenet_dma_regs_v2;
                genet_dma_ring_regs = genet_dma_ring_regs_v123;
                priv->dma_rx_chk_bit = DMA_RX_CHK_V12;
-               priv->version = GENET_V2;
        } else if (GENET_IS_V1(priv)) {
                bcmgenet_dma_regs = bcmgenet_dma_regs_v1;
                genet_dma_ring_regs = genet_dma_ring_regs_v123;
                priv->dma_rx_chk_bit = DMA_RX_CHK_V12;
-               priv->version = GENET_V1;
        }
 
        /* enum genet_version starts at 1 */
@@ -3225,7 +3334,9 @@ static void bcmgenet_set_hw_params(struct bcmgenet_priv *priv)
        /* Read GENET HW version */
        reg = bcmgenet_sys_readl(priv, SYS_REV_CTRL);
        major = (reg >> 24 & 0x0f);
-       if (major == 5)
+       if (major == 6)
+               major = 5;
+       else if (major == 5)
                major = 4;
        else if (major == 0)
                major = 1;
@@ -3253,19 +3364,25 @@ static void bcmgenet_set_hw_params(struct bcmgenet_priv *priv)
         */
        gphy_rev = reg & 0xffff;
 
+       if (GENET_IS_V5(priv)) {
+               /* The EPHY revision should come from the MDIO registers of
+                * the PHY not from GENET.
+                */
+               if (gphy_rev != 0) {
+                       pr_warn("GENET is reporting EPHY revision: 0x%04x\n",
+                               gphy_rev);
+               }
        /* This is reserved so should require special treatment */
-       if (gphy_rev == 0 || gphy_rev == 0x01ff) {
+       } else if (gphy_rev == 0 || gphy_rev == 0x01ff) {
                pr_warn("Invalid GPHY revision detected: 0x%04x\n", gphy_rev);
                return;
-       }
-
        /* This is the good old scheme, just GPHY major, no minor nor patch */
-       if ((gphy_rev & 0xf0) != 0)
+       } else if ((gphy_rev & 0xf0) != 0) {
                priv->gphy_rev = gphy_rev << 8;
-
        /* This is the new scheme, GPHY major rolls over with 0x10 = rev G0 */
-       else if ((gphy_rev & 0xff00) != 0)
+       } else if ((gphy_rev & 0xff00) != 0) {
                priv->gphy_rev = gphy_rev;
+       }
 
 #ifdef CONFIG_PHYS_ADDR_T_64BIT
        if (!(params->flags & GENET_HAS_40BITS))
@@ -3295,6 +3412,7 @@ static const struct of_device_id bcmgenet_match[] = {
        { .compatible = "brcm,genet-v2", .data = (void *)GENET_V2 },
        { .compatible = "brcm,genet-v3", .data = (void *)GENET_V3 },
        { .compatible = "brcm,genet-v4", .data = (void *)GENET_V4 },
+       { .compatible = "brcm,genet-v5", .data = (void *)GENET_V5 },
        { },
 };
 MODULE_DEVICE_TABLE(of, bcmgenet_match);
@@ -3493,7 +3611,7 @@ static int bcmgenet_suspend(struct device *d)
        if (ret)
                return ret;
 
-       /* Disable MAC transmit. TX DMA disabled have to done before this */
+       /* Disable MAC transmit. TX DMA disabled must be done before this */
        umac_enable_set(priv, CMD_TX_EN, false);
 
        /* tx reclaim */
index db7f289d65ae2abd1589446ee0cadc00ffbf0254..efd07020b89fc3a7bd3c68fce1bbd7fe406acfcf 100644 (file)
@@ -355,8 +355,14 @@ struct bcmgenet_mib_counters {
 #define  EXT_PWR_DN_EN_LD              (1 << 3)
 #define  EXT_ENERGY_DET                        (1 << 4)
 #define  EXT_IDDQ_FROM_PHY             (1 << 5)
+#define  EXT_IDDQ_GLBL_PWR             (1 << 7)
 #define  EXT_PHY_RESET                 (1 << 8)
 #define  EXT_ENERGY_DET_MASK           (1 << 12)
+#define  EXT_PWR_DOWN_PHY_TX           (1 << 16)
+#define  EXT_PWR_DOWN_PHY_RX           (1 << 17)
+#define  EXT_PWR_DOWN_PHY_SD           (1 << 18)
+#define  EXT_PWR_DOWN_PHY_RD           (1 << 19)
+#define  EXT_PWR_DOWN_PHY_EN           (1 << 20)
 
 #define EXT_RGMII_OOB_CTRL             0x0C
 #define  RGMII_LINK                    (1 << 4)
@@ -499,13 +505,15 @@ enum bcmgenet_version {
        GENET_V1 = 1,
        GENET_V2,
        GENET_V3,
-       GENET_V4
+       GENET_V4,
+       GENET_V5
 };
 
 #define GENET_IS_V1(p) ((p)->version == GENET_V1)
 #define GENET_IS_V2(p) ((p)->version == GENET_V2)
 #define GENET_IS_V3(p) ((p)->version == GENET_V3)
 #define GENET_IS_V4(p) ((p)->version == GENET_V4)
+#define GENET_IS_V5(p) ((p)->version == GENET_V5)
 
 /* Hardware flags */
 #define GENET_HAS_40BITS       (1 << 0)
@@ -544,6 +552,8 @@ struct bcmgenet_skb_cb {
 struct bcmgenet_tx_ring {
        spinlock_t      lock;           /* ring lock */
        struct napi_struct napi;        /* NAPI per tx queue */
+       unsigned long   packets;
+       unsigned long   bytes;
        unsigned int    index;          /* ring index */
        unsigned int    queue;          /* queue index */
        struct enet_cb  *cbs;           /* tx ring buffer control block*/
@@ -562,6 +572,10 @@ struct bcmgenet_tx_ring {
 
 struct bcmgenet_rx_ring {
        struct napi_struct napi;        /* Rx NAPI struct */
+       unsigned long   bytes;
+       unsigned long   packets;
+       unsigned long   errors;
+       unsigned long   dropped;
        unsigned int    index;          /* Rx ring index */
        struct enet_cb  *cbs;           /* Rx ring buffer control block */
        unsigned int    size;           /* Rx ring size */
index b97122926d3aa91210a8945d45f268d370c86ee4..2fbd027f0148f96003a08405177883e9df832769 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * Broadcom GENET (Gigabit Ethernet) Wake-on-LAN support
  *
- * Copyright (c) 2014 Broadcom Corporation
+ * Copyright (c) 2014-2017 Broadcom
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -127,7 +127,6 @@ int bcmgenet_wol_power_down_cfg(struct bcmgenet_priv *priv,
                                enum bcmgenet_power_mode mode)
 {
        struct net_device *dev = priv->dev;
-       u32 cpu_mask_clear;
        int retries = 0;
        u32 reg;
 
@@ -173,18 +172,12 @@ int bcmgenet_wol_power_down_cfg(struct bcmgenet_priv *priv,
                bcmgenet_ext_writel(priv, reg, EXT_EXT_PWR_MGMT);
        }
 
-       /* Enable the MPD interrupt */
-       cpu_mask_clear = UMAC_IRQ_MPD_R;
-
-       bcmgenet_intrl2_0_writel(priv, cpu_mask_clear, INTRL2_CPU_MASK_CLEAR);
-
        return 0;
 }
 
 void bcmgenet_wol_power_up_cfg(struct bcmgenet_priv *priv,
                               enum bcmgenet_power_mode mode)
 {
-       u32 cpu_mask_set;
        u32 reg;
 
        if (mode != GENET_POWER_WOL_MAGIC) {
@@ -201,10 +194,4 @@ void bcmgenet_wol_power_up_cfg(struct bcmgenet_priv *priv,
        reg &= ~CMD_CRC_FWD;
        bcmgenet_umac_writel(priv, reg, UMAC_CMD);
        priv->crc_fwd_en = 0;
-
-       /* Stop monitoring magic packet IRQ */
-       cpu_mask_set = UMAC_IRQ_MPD_R;
-
-       /* Stop monitoring magic packet IRQ */
-       bcmgenet_intrl2_0_writel(priv, cpu_mask_set, INTRL2_CPU_MASK_SET);
 }
index e87607621e62a076104d67046a10603305d66ecf..285676f8da6bf3b9b0bb42e8a5407be56e478796 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * Broadcom GENET MDIO routines
  *
- * Copyright (c) 2014 Broadcom Corporation
+ * Copyright (c) 2014-2017 Broadcom
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -195,53 +195,43 @@ void bcmgenet_phy_power_set(struct net_device *dev, bool enable)
        u32 reg = 0;
 
        /* EXT_GPHY_CTRL is only valid for GENETv4 and onward */
-       if (!GENET_IS_V4(priv))
-               return;
-
-       reg = bcmgenet_ext_readl(priv, EXT_GPHY_CTRL);
-       if (enable) {
-               reg &= ~EXT_CK25_DIS;
-               bcmgenet_ext_writel(priv, reg, EXT_GPHY_CTRL);
-               mdelay(1);
-
-               reg &= ~(EXT_CFG_IDDQ_BIAS | EXT_CFG_PWR_DOWN);
-               reg |= EXT_GPHY_RESET;
+       if (GENET_IS_V4(priv)) {
+               reg = bcmgenet_ext_readl(priv, EXT_GPHY_CTRL);
+               if (enable) {
+                       reg &= ~EXT_CK25_DIS;
+                       bcmgenet_ext_writel(priv, reg, EXT_GPHY_CTRL);
+                       mdelay(1);
+
+                       reg &= ~(EXT_CFG_IDDQ_BIAS | EXT_CFG_PWR_DOWN);
+                       reg |= EXT_GPHY_RESET;
+                       bcmgenet_ext_writel(priv, reg, EXT_GPHY_CTRL);
+                       mdelay(1);
+
+                       reg &= ~EXT_GPHY_RESET;
+               } else {
+                       reg |= EXT_CFG_IDDQ_BIAS | EXT_CFG_PWR_DOWN |
+                              EXT_GPHY_RESET;
+                       bcmgenet_ext_writel(priv, reg, EXT_GPHY_CTRL);
+                       mdelay(1);
+                       reg |= EXT_CK25_DIS;
+               }
                bcmgenet_ext_writel(priv, reg, EXT_GPHY_CTRL);
-               mdelay(1);
-
-               reg &= ~EXT_GPHY_RESET;
+               udelay(60);
        } else {
-               reg |= EXT_CFG_IDDQ_BIAS | EXT_CFG_PWR_DOWN | EXT_GPHY_RESET;
-               bcmgenet_ext_writel(priv, reg, EXT_GPHY_CTRL);
                mdelay(1);
-               reg |= EXT_CK25_DIS;
        }
-       bcmgenet_ext_writel(priv, reg, EXT_GPHY_CTRL);
-       udelay(60);
-}
-
-static void bcmgenet_internal_phy_setup(struct net_device *dev)
-{
-       struct bcmgenet_priv *priv = netdev_priv(dev);
-       u32 reg;
-
-       /* Power up PHY */
-       bcmgenet_phy_power_set(dev, true);
-       /* enable APD */
-       reg = bcmgenet_ext_readl(priv, EXT_EXT_PWR_MGMT);
-       reg |= EXT_PWR_DN_EN_LD;
-       bcmgenet_ext_writel(priv, reg, EXT_EXT_PWR_MGMT);
-       bcmgenet_mii_reset(dev);
 }
 
 static void bcmgenet_moca_phy_setup(struct bcmgenet_priv *priv)
 {
        u32 reg;
 
-       /* Speed settings are set in bcmgenet_mii_setup() */
-       reg = bcmgenet_sys_readl(priv, SYS_PORT_CTRL);
-       reg |= LED_ACT_SOURCE_MAC;
-       bcmgenet_sys_writel(priv, reg, SYS_PORT_CTRL);
+       if (!GENET_IS_V5(priv)) {
+               /* Speed settings are set in bcmgenet_mii_setup() */
+               reg = bcmgenet_sys_readl(priv, SYS_PORT_CTRL);
+               reg |= LED_ACT_SOURCE_MAC;
+               bcmgenet_sys_writel(priv, reg, SYS_PORT_CTRL);
+       }
 
        if (priv->hw_params->flags & GENET_HAS_MOCA_LINK_DET)
                fixed_phy_set_link_update(priv->phydev,
@@ -281,7 +271,6 @@ int bcmgenet_mii_config(struct net_device *dev)
 
                if (priv->internal_phy) {
                        phy_name = "internal PHY";
-                       bcmgenet_internal_phy_setup(dev);
                } else if (priv->phy_interface == PHY_INTERFACE_MODE_MOCA) {
                        phy_name = "MoCA";
                        bcmgenet_moca_phy_setup(priv);
index 30d1eb9ebec9afab2271db1f8c0b4f448b64da08..f395b951f5e77bca9a926ea3f1210bf1fcb13ded 100644 (file)
@@ -825,6 +825,7 @@ static int tg3_ape_event_lock(struct tg3 *tp, u32 timeout_us)
        return timeout_us ? 0 : -EBUSY;
 }
 
+#ifdef CONFIG_TIGON3_HWMON
 static int tg3_ape_wait_for_event(struct tg3 *tp, u32 timeout_us)
 {
        u32 i, apedata;
@@ -904,6 +905,7 @@ static int tg3_ape_scratchpad_read(struct tg3 *tp, u32 *data, u32 base_off,
 
        return 0;
 }
+#endif
 
 static int tg3_ape_send_event(struct tg3 *tp, u32 event)
 {
@@ -10744,6 +10746,7 @@ static int tg3_init_hw(struct tg3 *tp, bool reset_phy)
        return tg3_reset_hw(tp, reset_phy);
 }
 
+#ifdef CONFIG_TIGON3_HWMON
 static void tg3_sd_scan_scratchpad(struct tg3 *tp, struct tg3_ocir *ocir)
 {
        int i;
@@ -10826,6 +10829,10 @@ static void tg3_hwmon_open(struct tg3 *tp)
                dev_err(&pdev->dev, "Cannot register hwmon device, aborting\n");
        }
 }
+#else
+static inline void tg3_hwmon_close(struct tg3 *tp) { }
+static inline void tg3_hwmon_open(struct tg3 *tp) { }
+#endif /* CONFIG_TIGON3_HWMON */
 
 
 #define TG3_STAT_ADD32(PSTAT, REG) \
index 05c1c1dd7751bd720fac026876c7fcf7392eca03..cebfe3bd086e36f60f717579f03037058b1d1d9e 100644 (file)
@@ -325,7 +325,7 @@ bnad_debugfs_write_regrd(struct file *file, const char __user *buf,
                return PTR_ERR(kern_buf);
 
        rc = sscanf(kern_buf, "%x:%x", &addr, &len);
-       if (rc < 2) {
+       if (rc < 2 || len > UINT_MAX >> 2) {
                netdev_warn(bnad->netdev, "failed to read user buffer\n");
                kfree(kern_buf);
                return -EINVAL;
index 2fedd91f3df88fb5ea88f288e5121a8fc8fb3cf0..dee604651ba7d309686cca04f1b41222221adfe1 100644 (file)
@@ -43,6 +43,8 @@ struct octeon_cn23xx_pf {
        struct octeon_config *conf;
 };
 
+#define CN23XX_SLI_DEF_BP                      0x40
+
 int setup_cn23xx_octeon_pf_device(struct octeon_device *oct);
 
 int validate_cn23xx_pf_config_info(struct octeon_device *oct,
index f629c2fe04a44b16794db66d2a3e4c6bf5c09876..08676df6cef056f26bf77af33eacc1c4fb7ac39a 100644 (file)
@@ -26,6 +26,9 @@
 #include "octeon_main.h"
 #include "octeon_network.h"
 
+/* OOM task polling interval */
+#define LIO_OOM_POLL_INTERVAL_MS 250
+
 int liquidio_set_feature(struct net_device *netdev, int cmd, u16 param1)
 {
        struct lio *lio = GET_LIO(netdev);
@@ -131,11 +134,20 @@ void liquidio_link_ctrl_cmd_completion(void *nctrl_ptr)
 
        case OCTNET_CMD_CHANGE_MACADDR:
                mac = ((u8 *)&nctrl->udd[0]) + 2;
-               netif_info(lio, probe, lio->netdev,
-                          "MACAddr changed to %2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x\n",
-                          mac[0], mac[1],
-                          mac[2], mac[3],
-                          mac[4], mac[5]);
+               if (nctrl->ncmd.s.param1) {
+                       /* vfidx is 0 based, but vf_num (param1) is 1 based */
+                       int vfidx = nctrl->ncmd.s.param1 - 1;
+                       bool mac_is_admin_assigned = nctrl->ncmd.s.param2;
+
+                       if (mac_is_admin_assigned)
+                               netif_info(lio, probe, lio->netdev,
+                                          "MAC Address %pM is configured for VF %d\n",
+                                          mac, vfidx);
+               } else {
+                       netif_info(lio, probe, lio->netdev,
+                                  " MACAddr changed to %pM\n",
+                                  mac);
+               }
                break;
 
        case OCTNET_CMD_CHANGE_MTU:
@@ -284,3 +296,56 @@ void octeon_pf_changed_vf_macaddr(struct octeon_device *oct, u8 *mac)
         * the PF did that already
         */
 }
+
+static void octnet_poll_check_rxq_oom_status(struct work_struct *work)
+{
+       struct cavium_wk *wk = (struct cavium_wk *)work;
+       struct lio *lio = (struct lio *)wk->ctxptr;
+       struct octeon_device *oct = lio->oct_dev;
+       struct octeon_droq *droq;
+       int q, q_no = 0;
+
+       if (ifstate_check(lio, LIO_IFSTATE_RUNNING)) {
+               for (q = 0; q < lio->linfo.num_rxpciq; q++) {
+                       q_no = lio->linfo.rxpciq[q].s.q_no;
+                       droq = oct->droq[q_no];
+                       if (!droq)
+                               continue;
+                       octeon_droq_check_oom(droq);
+               }
+       }
+       queue_delayed_work(lio->rxq_status_wq.wq,
+                          &lio->rxq_status_wq.wk.work,
+                          msecs_to_jiffies(LIO_OOM_POLL_INTERVAL_MS));
+}
+
+int setup_rx_oom_poll_fn(struct net_device *netdev)
+{
+       struct lio *lio = GET_LIO(netdev);
+       struct octeon_device *oct = lio->oct_dev;
+
+       lio->rxq_status_wq.wq = alloc_workqueue("rxq-oom-status",
+                                               WQ_MEM_RECLAIM, 0);
+       if (!lio->rxq_status_wq.wq) {
+               dev_err(&oct->pci_dev->dev, "unable to create cavium rxq oom status wq\n");
+               return -ENOMEM;
+       }
+       INIT_DELAYED_WORK(&lio->rxq_status_wq.wk.work,
+                         octnet_poll_check_rxq_oom_status);
+       lio->rxq_status_wq.wk.ctxptr = lio;
+       queue_delayed_work(lio->rxq_status_wq.wq,
+                          &lio->rxq_status_wq.wk.work,
+                          msecs_to_jiffies(LIO_OOM_POLL_INTERVAL_MS));
+       return 0;
+}
+
+void cleanup_rx_oom_poll_fn(struct net_device *netdev)
+{
+       struct lio *lio = GET_LIO(netdev);
+
+       if (lio->rxq_status_wq.wq) {
+               cancel_delayed_work_sync(&lio->rxq_status_wq.wk.work);
+               flush_workqueue(lio->rxq_status_wq.wq);
+               destroy_workqueue(lio->rxq_status_wq.wq);
+       }
+}
index 50384cede8be9b84431690074022bbff4bbc9199..fac02ed2c44942bb81f544f5274292a68a67f279 100644 (file)
 
 static int octnet_get_link_stats(struct net_device *netdev);
 
+struct oct_intrmod_context {
+       int octeon_id;
+       wait_queue_head_t wc;
+       int cond;
+       int status;
+};
+
+struct oct_intrmod_resp {
+       u64     rh;
+       struct oct_intrmod_cfg intrmod;
+       u64     status;
+};
+
 struct oct_mdio_cmd_context {
        int octeon_id;
        wait_queue_head_t wc;
@@ -213,17 +226,23 @@ static int lio_get_link_ksettings(struct net_device *netdev,
        struct lio *lio = GET_LIO(netdev);
        struct octeon_device *oct = lio->oct_dev;
        struct oct_link_info *linfo;
-       u32 supported, advertising;
+       u32 supported = 0, advertising = 0;
 
        linfo = &lio->linfo;
 
        if (linfo->link.s.if_mode == INTERFACE_MODE_XAUI ||
            linfo->link.s.if_mode == INTERFACE_MODE_RXAUI ||
+           linfo->link.s.if_mode == INTERFACE_MODE_XLAUI ||
            linfo->link.s.if_mode == INTERFACE_MODE_XFI) {
                ecmd->base.port = PORT_FIBRE;
-               supported = (SUPPORTED_10000baseT_Full | SUPPORTED_FIBRE |
-                            SUPPORTED_Pause);
-               advertising = (ADVERTISED_10000baseT_Full | ADVERTISED_Pause);
+
+               if (linfo->link.s.speed == SPEED_10000) {
+                       supported = SUPPORTED_10000baseT_Full;
+                       advertising = ADVERTISED_10000baseT_Full;
+               }
+
+               supported |= SUPPORTED_FIBRE | SUPPORTED_Pause;
+               advertising |= ADVERTISED_Pause;
                ethtool_convert_legacy_u32_to_link_mode(
                        ecmd->link_modes.supported, supported);
                ethtool_convert_legacy_u32_to_link_mode(
@@ -1292,95 +1311,103 @@ static int lio_vf_get_sset_count(struct net_device *netdev, int sset)
        }
 }
 
-static int lio_get_intr_coalesce(struct net_device *netdev,
-                                struct ethtool_coalesce *intr_coal)
+/* Callback function for intrmod */
+static void octnet_intrmod_callback(struct octeon_device *oct_dev,
+                                   u32 status,
+                                   void *ptr)
 {
-       struct lio *lio = GET_LIO(netdev);
-       struct octeon_device *oct = lio->oct_dev;
-       struct octeon_instr_queue *iq;
-       struct oct_intrmod_cfg *intrmod_cfg;
+       struct octeon_soft_command *sc = (struct octeon_soft_command *)ptr;
+       struct oct_intrmod_context *ctx;
 
-       intrmod_cfg = &oct->intrmod;
+       ctx  = (struct oct_intrmod_context *)sc->ctxptr;
 
-       switch (oct->chip_id) {
-       case OCTEON_CN23XX_PF_VID:
-       case OCTEON_CN23XX_VF_VID:
-               if (!intrmod_cfg->rx_enable) {
-                       intr_coal->rx_coalesce_usecs = intrmod_cfg->rx_usecs;
-                       intr_coal->rx_max_coalesced_frames =
-                               intrmod_cfg->rx_frames;
-               }
-               if (!intrmod_cfg->tx_enable)
-                       intr_coal->tx_max_coalesced_frames =
-                               intrmod_cfg->tx_frames;
-               break;
-       case OCTEON_CN68XX:
-       case OCTEON_CN66XX: {
-               struct octeon_cn6xxx *cn6xxx =
-                       (struct octeon_cn6xxx *)oct->chip;
+       ctx->status = status;
 
-               if (!intrmod_cfg->rx_enable) {
-                       intr_coal->rx_coalesce_usecs =
-                               CFG_GET_OQ_INTR_TIME(cn6xxx->conf);
-                       intr_coal->rx_max_coalesced_frames =
-                               CFG_GET_OQ_INTR_PKT(cn6xxx->conf);
-               }
-               iq = oct->instr_queue[lio->linfo.txpciq[0].s.q_no];
-               intr_coal->tx_max_coalesced_frames = iq->fill_threshold;
-               break;
-       }
-       default:
-               netif_info(lio, drv, lio->netdev, "Unknown Chip !!\n");
+       oct_dev = lio_get_device(ctx->octeon_id);
+
+       WRITE_ONCE(ctx->cond, 1);
+
+       /* This barrier is required to be sure that the response has been
+        * written fully before waking up the handler
+        */
+       wmb();
+
+       wake_up_interruptible(&ctx->wc);
+}
+
+/*  get interrupt moderation parameters */
+static int octnet_get_intrmod_cfg(struct lio *lio,
+                                 struct oct_intrmod_cfg *intr_cfg)
+{
+       struct octeon_soft_command *sc;
+       struct oct_intrmod_context *ctx;
+       struct oct_intrmod_resp *resp;
+       int retval;
+       struct octeon_device *oct_dev = lio->oct_dev;
+
+       /* Alloc soft command */
+       sc = (struct octeon_soft_command *)
+               octeon_alloc_soft_command(oct_dev,
+                                         0,
+                                         sizeof(struct oct_intrmod_resp),
+                                         sizeof(struct oct_intrmod_context));
+
+       if (!sc)
+               return -ENOMEM;
+
+       resp = (struct oct_intrmod_resp *)sc->virtrptr;
+       memset(resp, 0, sizeof(struct oct_intrmod_resp));
+
+       ctx = (struct oct_intrmod_context *)sc->ctxptr;
+       memset(resp, 0, sizeof(struct oct_intrmod_context));
+       WRITE_ONCE(ctx->cond, 0);
+       ctx->octeon_id = lio_get_device_id(oct_dev);
+       init_waitqueue_head(&ctx->wc);
+
+       sc->iq_no = lio->linfo.txpciq[0].s.q_no;
+
+       octeon_prepare_soft_command(oct_dev, sc, OPCODE_NIC,
+                                   OPCODE_NIC_INTRMOD_PARAMS, 0, 0, 0);
+
+       sc->callback = octnet_intrmod_callback;
+       sc->callback_arg = sc;
+       sc->wait_time = 1000;
+
+       retval = octeon_send_soft_command(oct_dev, sc);
+       if (retval == IQ_SEND_FAILED) {
+               octeon_free_soft_command(oct_dev, sc);
                return -EINVAL;
        }
-       if (intrmod_cfg->rx_enable) {
-               intr_coal->use_adaptive_rx_coalesce =
-                       intrmod_cfg->rx_enable;
-               intr_coal->rate_sample_interval =
-                       intrmod_cfg->check_intrvl;
-               intr_coal->pkt_rate_high =
-                       intrmod_cfg->maxpkt_ratethr;
-               intr_coal->pkt_rate_low =
-                       intrmod_cfg->minpkt_ratethr;
-               intr_coal->rx_max_coalesced_frames_high =
-                       intrmod_cfg->rx_maxcnt_trigger;
-               intr_coal->rx_coalesce_usecs_high =
-                       intrmod_cfg->rx_maxtmr_trigger;
-               intr_coal->rx_coalesce_usecs_low =
-                       intrmod_cfg->rx_mintmr_trigger;
-               intr_coal->rx_max_coalesced_frames_low =
-                   intrmod_cfg->rx_mincnt_trigger;
+
+       /* Sleep on a wait queue till the cond flag indicates that the
+        * response arrived or timed-out.
+        */
+       if (sleep_cond(&ctx->wc, &ctx->cond) == -EINTR) {
+               dev_err(&oct_dev->pci_dev->dev, "Wait interrupted\n");
+               goto intrmod_info_wait_intr;
        }
-       if ((OCTEON_CN23XX_PF(oct) || OCTEON_CN23XX_VF(oct)) &&
-           (intrmod_cfg->tx_enable)) {
-               intr_coal->use_adaptive_tx_coalesce = intrmod_cfg->tx_enable;
-               intr_coal->tx_max_coalesced_frames_high =
-                   intrmod_cfg->tx_maxcnt_trigger;
-               intr_coal->tx_max_coalesced_frames_low =
-                   intrmod_cfg->tx_mincnt_trigger;
+
+       retval = ctx->status || resp->status;
+       if (retval) {
+               dev_err(&oct_dev->pci_dev->dev,
+                       "Get interrupt moderation parameters failed\n");
+               goto intrmod_info_wait_fail;
        }
-       return 0;
-}
 
-/* Callback function for intrmod */
-static void octnet_intrmod_callback(struct octeon_device *oct_dev,
-                                   u32 status,
-                                   void *ptr)
-{
-       struct oct_intrmod_cmd *cmd = ptr;
-       struct octeon_soft_command *sc = cmd->sc;
+       octeon_swap_8B_data((u64 *)&resp->intrmod,
+                           (sizeof(struct oct_intrmod_cfg)) / 8);
+       memcpy(intr_cfg, &resp->intrmod, sizeof(struct oct_intrmod_cfg));
+       octeon_free_soft_command(oct_dev, sc);
 
-       oct_dev = cmd->oct_dev;
+       return 0;
 
-       if (status)
-               dev_err(&oct_dev->pci_dev->dev, "intrmod config failed. Status: %llx\n",
-                       CVM_CAST64(status));
-       else
-               dev_info(&oct_dev->pci_dev->dev,
-                        "Rx-Adaptive Interrupt moderation enabled:%llx\n",
-                        oct_dev->intrmod.rx_enable);
+intrmod_info_wait_fail:
 
        octeon_free_soft_command(oct_dev, sc);
+
+intrmod_info_wait_intr:
+
+       return -ENODEV;
 }
 
 /*  Configure interrupt moderation parameters */
@@ -1388,7 +1415,7 @@ static int octnet_set_intrmod_cfg(struct lio *lio,
                                  struct oct_intrmod_cfg *intr_cfg)
 {
        struct octeon_soft_command *sc;
-       struct oct_intrmod_cmd *cmd;
+       struct oct_intrmod_context *ctx;
        struct oct_intrmod_cfg *cfg;
        int retval;
        struct octeon_device *oct_dev = lio->oct_dev;
@@ -1398,19 +1425,21 @@ static int octnet_set_intrmod_cfg(struct lio *lio,
                octeon_alloc_soft_command(oct_dev,
                                          sizeof(struct oct_intrmod_cfg),
                                          0,
-                                         sizeof(struct oct_intrmod_cmd));
+                                         sizeof(struct oct_intrmod_context));
 
        if (!sc)
                return -ENOMEM;
 
-       cmd = (struct oct_intrmod_cmd *)sc->ctxptr;
+       ctx = (struct oct_intrmod_context *)sc->ctxptr;
+
+       WRITE_ONCE(ctx->cond, 0);
+       ctx->octeon_id = lio_get_device_id(oct_dev);
+       init_waitqueue_head(&ctx->wc);
+
        cfg = (struct oct_intrmod_cfg *)sc->virtdptr;
 
        memcpy(cfg, intr_cfg, sizeof(struct oct_intrmod_cfg));
        octeon_swap_8B_data((u64 *)cfg, (sizeof(struct oct_intrmod_cfg)) / 8);
-       cmd->sc = sc;
-       cmd->cfg = cfg;
-       cmd->oct_dev = oct_dev;
 
        sc->iq_no = lio->linfo.txpciq[0].s.q_no;
 
@@ -1418,7 +1447,7 @@ static int octnet_set_intrmod_cfg(struct lio *lio,
                                    OPCODE_NIC_INTRMOD_CFG, 0, 0, 0);
 
        sc->callback = octnet_intrmod_callback;
-       sc->callback_arg = cmd;
+       sc->callback_arg = sc;
        sc->wait_time = 1000;
 
        retval = octeon_send_soft_command(oct_dev, sc);
@@ -1427,7 +1456,29 @@ static int octnet_set_intrmod_cfg(struct lio *lio,
                return -EINVAL;
        }
 
-       return 0;
+       /* Sleep on a wait queue till the cond flag indicates that the
+        * response arrived or timed-out.
+        */
+       if (sleep_cond(&ctx->wc, &ctx->cond) != -EINTR) {
+               retval = ctx->status;
+               if (retval)
+                       dev_err(&oct_dev->pci_dev->dev,
+                               "intrmod config failed. Status: %llx\n",
+                               CVM_CAST64(retval));
+               else
+                       dev_info(&oct_dev->pci_dev->dev,
+                                "Rx-Adaptive Interrupt moderation %s\n",
+                                (intr_cfg->rx_enable) ?
+                                "enabled" : "disabled");
+
+               octeon_free_soft_command(oct_dev, sc);
+
+               return ((retval) ? -ENODEV : 0);
+       }
+
+       dev_err(&oct_dev->pci_dev->dev, "iq/oq config failed\n");
+
+       return -EINTR;
 }
 
 static void
@@ -1584,80 +1635,106 @@ static int octnet_get_link_stats(struct net_device *netdev)
        return 0;
 }
 
-/* Enable/Disable auto interrupt Moderation */
-static int oct_cfg_adaptive_intr(struct lio *lio, struct ethtool_coalesce
-                                *intr_coal)
+static int lio_get_intr_coalesce(struct net_device *netdev,
+                                struct ethtool_coalesce *intr_coal)
 {
-       int ret = 0;
+       struct lio *lio = GET_LIO(netdev);
        struct octeon_device *oct = lio->oct_dev;
-       struct oct_intrmod_cfg *intrmod_cfg;
-
-       intrmod_cfg = &oct->intrmod;
-
-       if (oct->intrmod.rx_enable || oct->intrmod.tx_enable) {
-               if (intr_coal->rate_sample_interval)
-                       intrmod_cfg->check_intrvl =
-                               intr_coal->rate_sample_interval;
-               else
-                       intrmod_cfg->check_intrvl =
-                               LIO_INTRMOD_CHECK_INTERVAL;
+       struct octeon_instr_queue *iq;
+       struct oct_intrmod_cfg intrmod_cfg;
 
-               if (intr_coal->pkt_rate_high)
-                       intrmod_cfg->maxpkt_ratethr =
-                               intr_coal->pkt_rate_high;
-               else
-                       intrmod_cfg->maxpkt_ratethr =
-                               LIO_INTRMOD_MAXPKT_RATETHR;
+       if (octnet_get_intrmod_cfg(lio, &intrmod_cfg))
+               return -ENODEV;
 
-               if (intr_coal->pkt_rate_low)
-                       intrmod_cfg->minpkt_ratethr =
-                               intr_coal->pkt_rate_low;
-               else
-                       intrmod_cfg->minpkt_ratethr =
-                               LIO_INTRMOD_MINPKT_RATETHR;
+       switch (oct->chip_id) {
+       case OCTEON_CN23XX_PF_VID:
+       case OCTEON_CN23XX_VF_VID: {
+               if (!intrmod_cfg.rx_enable) {
+                       intr_coal->rx_coalesce_usecs = oct->rx_coalesce_usecs;
+                       intr_coal->rx_max_coalesced_frames =
+                               oct->rx_max_coalesced_frames;
+               }
+               if (!intrmod_cfg.tx_enable)
+                       intr_coal->tx_max_coalesced_frames =
+                               oct->tx_max_coalesced_frames;
+               break;
        }
-       if (oct->intrmod.rx_enable) {
-               if (intr_coal->rx_max_coalesced_frames_high)
-                       intrmod_cfg->rx_maxcnt_trigger =
-                               intr_coal->rx_max_coalesced_frames_high;
-               else
-                       intrmod_cfg->rx_maxcnt_trigger =
-                               LIO_INTRMOD_RXMAXCNT_TRIGGER;
+       case OCTEON_CN68XX:
+       case OCTEON_CN66XX: {
+               struct octeon_cn6xxx *cn6xxx =
+                       (struct octeon_cn6xxx *)oct->chip;
 
-               if (intr_coal->rx_coalesce_usecs_high)
-                       intrmod_cfg->rx_maxtmr_trigger =
-                               intr_coal->rx_coalesce_usecs_high;
-               else
-                       intrmod_cfg->rx_maxtmr_trigger =
-                               LIO_INTRMOD_RXMAXTMR_TRIGGER;
+               if (!intrmod_cfg.rx_enable) {
+                       intr_coal->rx_coalesce_usecs =
+                               CFG_GET_OQ_INTR_TIME(cn6xxx->conf);
+                       intr_coal->rx_max_coalesced_frames =
+                               CFG_GET_OQ_INTR_PKT(cn6xxx->conf);
+               }
+               iq = oct->instr_queue[lio->linfo.txpciq[0].s.q_no];
+               intr_coal->tx_max_coalesced_frames = iq->fill_threshold;
+               break;
+       }
+       default:
+               netif_info(lio, drv, lio->netdev, "Unknown Chip !!\n");
+               return -EINVAL;
+       }
+       if (intrmod_cfg.rx_enable) {
+               intr_coal->use_adaptive_rx_coalesce =
+                       intrmod_cfg.rx_enable;
+               intr_coal->rate_sample_interval =
+                       intrmod_cfg.check_intrvl;
+               intr_coal->pkt_rate_high =
+                       intrmod_cfg.maxpkt_ratethr;
+               intr_coal->pkt_rate_low =
+                       intrmod_cfg.minpkt_ratethr;
+               intr_coal->rx_max_coalesced_frames_high =
+                       intrmod_cfg.rx_maxcnt_trigger;
+               intr_coal->rx_coalesce_usecs_high =
+                       intrmod_cfg.rx_maxtmr_trigger;
+               intr_coal->rx_coalesce_usecs_low =
+                       intrmod_cfg.rx_mintmr_trigger;
+               intr_coal->rx_max_coalesced_frames_low =
+                       intrmod_cfg.rx_mincnt_trigger;
+       }
+       if ((OCTEON_CN23XX_PF(oct) || OCTEON_CN23XX_VF(oct)) &&
+           (intrmod_cfg.tx_enable)) {
+               intr_coal->use_adaptive_tx_coalesce =
+                       intrmod_cfg.tx_enable;
+               intr_coal->tx_max_coalesced_frames_high =
+                       intrmod_cfg.tx_maxcnt_trigger;
+               intr_coal->tx_max_coalesced_frames_low =
+                       intrmod_cfg.tx_mincnt_trigger;
+       }
+       return 0;
+}
 
-               if (intr_coal->rx_coalesce_usecs_low)
-                       intrmod_cfg->rx_mintmr_trigger =
-                               intr_coal->rx_coalesce_usecs_low;
-               else
-                       intrmod_cfg->rx_mintmr_trigger =
-                               LIO_INTRMOD_RXMINTMR_TRIGGER;
+/* Enable/Disable auto interrupt Moderation */
+static int oct_cfg_adaptive_intr(struct lio *lio,
+                                struct oct_intrmod_cfg *intrmod_cfg,
+                                struct ethtool_coalesce *intr_coal)
+{
+       int ret = 0;
 
-               if (intr_coal->rx_max_coalesced_frames_low)
-                       intrmod_cfg->rx_mincnt_trigger =
-                               intr_coal->rx_max_coalesced_frames_low;
-               else
-                       intrmod_cfg->rx_mincnt_trigger =
-                               LIO_INTRMOD_RXMINCNT_TRIGGER;
+       if (intrmod_cfg->rx_enable || intrmod_cfg->tx_enable) {
+               intrmod_cfg->check_intrvl = intr_coal->rate_sample_interval;
+               intrmod_cfg->maxpkt_ratethr = intr_coal->pkt_rate_high;
+               intrmod_cfg->minpkt_ratethr = intr_coal->pkt_rate_low;
        }
-       if (oct->intrmod.tx_enable) {
-               if (intr_coal->tx_max_coalesced_frames_high)
-                       intrmod_cfg->tx_maxcnt_trigger =
-                               intr_coal->tx_max_coalesced_frames_high;
-               else
-                       intrmod_cfg->tx_maxcnt_trigger =
-                               LIO_INTRMOD_TXMAXCNT_TRIGGER;
-               if (intr_coal->tx_max_coalesced_frames_low)
-                       intrmod_cfg->tx_mincnt_trigger =
-                               intr_coal->tx_max_coalesced_frames_low;
-               else
-                       intrmod_cfg->tx_mincnt_trigger =
-                               LIO_INTRMOD_TXMINCNT_TRIGGER;
+       if (intrmod_cfg->rx_enable) {
+               intrmod_cfg->rx_maxcnt_trigger =
+                       intr_coal->rx_max_coalesced_frames_high;
+               intrmod_cfg->rx_maxtmr_trigger =
+                       intr_coal->rx_coalesce_usecs_high;
+               intrmod_cfg->rx_mintmr_trigger =
+                       intr_coal->rx_coalesce_usecs_low;
+               intrmod_cfg->rx_mincnt_trigger =
+                       intr_coal->rx_max_coalesced_frames_low;
+       }
+       if (intrmod_cfg->tx_enable) {
+               intrmod_cfg->tx_maxcnt_trigger =
+                       intr_coal->tx_max_coalesced_frames_high;
+               intrmod_cfg->tx_mincnt_trigger =
+                       intr_coal->tx_max_coalesced_frames_low;
        }
 
        ret = octnet_set_intrmod_cfg(lio, intrmod_cfg);
@@ -1666,7 +1743,9 @@ static int oct_cfg_adaptive_intr(struct lio *lio, struct ethtool_coalesce
 }
 
 static int
-oct_cfg_rx_intrcnt(struct lio *lio, struct ethtool_coalesce *intr_coal)
+oct_cfg_rx_intrcnt(struct lio *lio,
+                  struct oct_intrmod_cfg *intrmod,
+                  struct ethtool_coalesce *intr_coal)
 {
        struct octeon_device *oct = lio->oct_dev;
        u32 rx_max_coalesced_frames;
@@ -1692,7 +1771,7 @@ oct_cfg_rx_intrcnt(struct lio *lio, struct ethtool_coalesce *intr_coal)
                int q_no;
 
                if (!intr_coal->rx_max_coalesced_frames)
-                       rx_max_coalesced_frames = oct->intrmod.rx_frames;
+                       rx_max_coalesced_frames = intrmod->rx_frames;
                else
                        rx_max_coalesced_frames =
                            intr_coal->rx_max_coalesced_frames;
@@ -1703,17 +1782,18 @@ oct_cfg_rx_intrcnt(struct lio *lio, struct ethtool_coalesce *intr_coal)
                            (octeon_read_csr64(
                                 oct, CN23XX_SLI_OQ_PKT_INT_LEVELS(q_no)) &
                             (0x3fffff00000000UL)) |
-                               rx_max_coalesced_frames);
+                               (rx_max_coalesced_frames - 1));
                        /*consider setting resend bit*/
                }
-               oct->intrmod.rx_frames = rx_max_coalesced_frames;
+               intrmod->rx_frames = rx_max_coalesced_frames;
+               oct->rx_max_coalesced_frames = rx_max_coalesced_frames;
                break;
        }
        case OCTEON_CN23XX_VF_VID: {
                int q_no;
 
                if (!intr_coal->rx_max_coalesced_frames)
-                       rx_max_coalesced_frames = oct->intrmod.rx_frames;
+                       rx_max_coalesced_frames = intrmod->rx_frames;
                else
                        rx_max_coalesced_frames =
                            intr_coal->rx_max_coalesced_frames;
@@ -1724,9 +1804,10 @@ oct_cfg_rx_intrcnt(struct lio *lio, struct ethtool_coalesce *intr_coal)
                                 oct, CN23XX_VF_SLI_OQ_PKT_INT_LEVELS(q_no)) &
                             (0x3fffff00000000UL)) |
                                rx_max_coalesced_frames);
-                       /* consider writing to resend bit here */
+                       /*consider writing to resend bit here*/
                }
-               oct->intrmod.rx_frames = rx_max_coalesced_frames;
+               intrmod->rx_frames = rx_max_coalesced_frames;
+               oct->rx_max_coalesced_frames = rx_max_coalesced_frames;
                break;
        }
        default:
@@ -1736,6 +1817,7 @@ oct_cfg_rx_intrcnt(struct lio *lio, struct ethtool_coalesce *intr_coal)
 }
 
 static int oct_cfg_rx_intrtime(struct lio *lio,
+                              struct oct_intrmod_cfg *intrmod,
                               struct ethtool_coalesce *intr_coal)
 {
        struct octeon_device *oct = lio->oct_dev;
@@ -1766,7 +1848,7 @@ static int oct_cfg_rx_intrtime(struct lio *lio,
                int q_no;
 
                if (!intr_coal->rx_coalesce_usecs)
-                       rx_coalesce_usecs = oct->intrmod.rx_usecs;
+                       rx_coalesce_usecs = intrmod->rx_usecs;
                else
                        rx_coalesce_usecs = intr_coal->rx_coalesce_usecs;
                time_threshold =
@@ -1775,11 +1857,12 @@ static int oct_cfg_rx_intrtime(struct lio *lio,
                        q_no += oct->sriov_info.pf_srn;
                        octeon_write_csr64(oct,
                                           CN23XX_SLI_OQ_PKT_INT_LEVELS(q_no),
-                                          (oct->intrmod.rx_frames |
-                                           (time_threshold << 32)));
+                                          (intrmod->rx_frames |
+                                           ((u64)time_threshold << 32)));
                        /*consider writing to resend bit here*/
                }
-               oct->intrmod.rx_usecs = rx_coalesce_usecs;
+               intrmod->rx_usecs = rx_coalesce_usecs;
+               oct->rx_coalesce_usecs = rx_coalesce_usecs;
                break;
        }
        case OCTEON_CN23XX_VF_VID: {
@@ -1787,7 +1870,7 @@ static int oct_cfg_rx_intrtime(struct lio *lio,
                int q_no;
 
                if (!intr_coal->rx_coalesce_usecs)
-                       rx_coalesce_usecs = oct->intrmod.rx_usecs;
+                       rx_coalesce_usecs = intrmod->rx_usecs;
                else
                        rx_coalesce_usecs = intr_coal->rx_coalesce_usecs;
 
@@ -1796,11 +1879,12 @@ static int oct_cfg_rx_intrtime(struct lio *lio,
                for (q_no = 0; q_no < oct->num_oqs; q_no++) {
                        octeon_write_csr64(
                                oct, CN23XX_VF_SLI_OQ_PKT_INT_LEVELS(q_no),
-                               (oct->intrmod.rx_frames |
-                                (time_threshold << 32)));
-                       /* consider setting resend bit */
+                               (intrmod->rx_frames |
+                                ((u64)time_threshold << 32)));
+                       /*consider setting resend bit*/
                }
-               oct->intrmod.rx_usecs = rx_coalesce_usecs;
+               intrmod->rx_usecs = rx_coalesce_usecs;
+               oct->rx_coalesce_usecs = rx_coalesce_usecs;
                break;
        }
        default:
@@ -1811,8 +1895,9 @@ static int oct_cfg_rx_intrtime(struct lio *lio,
 }
 
 static int
-oct_cfg_tx_intrcnt(struct lio *lio, struct ethtool_coalesce *intr_coal
-                  __attribute__((unused)))
+oct_cfg_tx_intrcnt(struct lio *lio,
+                  struct oct_intrmod_cfg *intrmod,
+                  struct ethtool_coalesce *intr_coal)
 {
        struct octeon_device *oct = lio->oct_dev;
        u32 iq_intr_pkt;
@@ -1839,12 +1924,13 @@ oct_cfg_tx_intrcnt(struct lio *lio, struct ethtool_coalesce *intr_coal
                        val = readq(inst_cnt_reg);
                        /*clear wmark and count.dont want to write count back*/
                        val = (val & 0xFFFF000000000000ULL) |
-                             ((u64)iq_intr_pkt
+                             ((u64)(iq_intr_pkt - 1)
                               << CN23XX_PKT_IN_DONE_WMARK_BIT_POS);
                        writeq(val, inst_cnt_reg);
                        /*consider setting resend bit*/
                }
-               oct->intrmod.tx_frames = iq_intr_pkt;
+               intrmod->tx_frames = iq_intr_pkt;
+               oct->tx_max_coalesced_frames = iq_intr_pkt;
                break;
        }
        default:
@@ -1859,6 +1945,7 @@ static int lio_set_intr_coalesce(struct net_device *netdev,
        struct lio *lio = GET_LIO(netdev);
        int ret;
        struct octeon_device *oct = lio->oct_dev;
+       struct oct_intrmod_cfg intrmod = {0};
        u32 j, q_no;
        int db_max, db_min;
 
@@ -1877,8 +1964,8 @@ static int lio_set_intr_coalesce(struct net_device *netdev,
                } else {
                        dev_err(&oct->pci_dev->dev,
                                "LIQUIDIO: Invalid tx-frames:%d. Range is min:%d max:%d\n",
-                               intr_coal->tx_max_coalesced_frames, db_min,
-                               db_max);
+                               intr_coal->tx_max_coalesced_frames,
+                               db_min, db_max);
                        return -EINVAL;
                }
                break;
@@ -1889,24 +1976,36 @@ static int lio_set_intr_coalesce(struct net_device *netdev,
                return -EINVAL;
        }
 
-       oct->intrmod.rx_enable = intr_coal->use_adaptive_rx_coalesce ? 1 : 0;
-       oct->intrmod.tx_enable = intr_coal->use_adaptive_tx_coalesce ? 1 : 0;
+       intrmod.rx_enable = intr_coal->use_adaptive_rx_coalesce ? 1 : 0;
+       intrmod.tx_enable = intr_coal->use_adaptive_tx_coalesce ? 1 : 0;
+       intrmod.rx_frames = CFG_GET_OQ_INTR_PKT(octeon_get_conf(oct));
+       intrmod.rx_usecs = CFG_GET_OQ_INTR_TIME(octeon_get_conf(oct));
+       intrmod.tx_frames = CFG_GET_IQ_INTR_PKT(octeon_get_conf(oct));
 
-       ret = oct_cfg_adaptive_intr(lio, intr_coal);
+       ret = oct_cfg_adaptive_intr(lio, &intrmod, intr_coal);
 
        if (!intr_coal->use_adaptive_rx_coalesce) {
-               ret = oct_cfg_rx_intrtime(lio, intr_coal);
+               ret = oct_cfg_rx_intrtime(lio, &intrmod, intr_coal);
                if (ret)
                        goto ret_intrmod;
 
-               ret = oct_cfg_rx_intrcnt(lio, intr_coal);
+               ret = oct_cfg_rx_intrcnt(lio, &intrmod, intr_coal);
                if (ret)
                        goto ret_intrmod;
+       } else {
+               oct->rx_coalesce_usecs =
+                       CFG_GET_OQ_INTR_TIME(octeon_get_conf(oct));
+               oct->rx_max_coalesced_frames =
+                       CFG_GET_OQ_INTR_PKT(octeon_get_conf(oct));
        }
+
        if (!intr_coal->use_adaptive_tx_coalesce) {
-               ret = oct_cfg_tx_intrcnt(lio, intr_coal);
+               ret = oct_cfg_tx_intrcnt(lio, &intrmod, intr_coal);
                if (ret)
                        goto ret_intrmod;
+       } else {
+               oct->tx_max_coalesced_frames =
+                       CFG_GET_IQ_INTR_PKT(octeon_get_conf(oct));
        }
 
        return 0;
index 92f46b1375c32527b29e24a4476d6b455835bd46..a8426d3d05d0ccc447ac02f70615b19dd61a8b9d 100644 (file)
@@ -16,6 +16,7 @@
  * NONINFRINGEMENT.  See the GNU General Public License for more details.
  ***********************************************************************/
 #include <linux/module.h>
+#include <linux/interrupt.h>
 #include <linux/pci.h>
 #include <linux/firmware.h>
 #include <net/vxlan.h>
@@ -60,12 +61,6 @@ MODULE_PARM_DESC(fw_type, "Type of firmware to be loaded. Default \"nic\"");
 
 static int ptp_enable = 1;
 
-/* Bit mask values for lio->ifstate */
-#define   LIO_IFSTATE_DROQ_OPS             0x01
-#define   LIO_IFSTATE_REGISTERED           0x02
-#define   LIO_IFSTATE_RUNNING              0x04
-#define   LIO_IFSTATE_RX_TIMESTAMP_ENABLED 0x08
-
 /* Polling interval for determining when NIC application is alive */
 #define LIQUIDIO_STARTER_POLL_INTERVAL_MS 100
 
@@ -530,36 +525,6 @@ static void liquidio_deinit_pci(void)
        pci_unregister_driver(&liquidio_pci_driver);
 }
 
-/**
- * \brief check interface state
- * @param lio per-network private data
- * @param state_flag flag state to check
- */
-static inline int ifstate_check(struct lio *lio, int state_flag)
-{
-       return atomic_read(&lio->ifstate) & state_flag;
-}
-
-/**
- * \brief set interface state
- * @param lio per-network private data
- * @param state_flag flag state to set
- */
-static inline void ifstate_set(struct lio *lio, int state_flag)
-{
-       atomic_set(&lio->ifstate, (atomic_read(&lio->ifstate) | state_flag));
-}
-
-/**
- * \brief clear interface state
- * @param lio per-network private data
- * @param state_flag flag state to clear
- */
-static inline void ifstate_reset(struct lio *lio, int state_flag)
-{
-       atomic_set(&lio->ifstate, (atomic_read(&lio->ifstate) & ~(state_flag)));
-}
-
 /**
  * \brief Stop Tx queues
  * @param netdev network device
@@ -748,7 +713,8 @@ static void delete_glists(struct lio *lio)
                                kfree(g);
                } while (g);
 
-               if (lio->glists_virt_base && lio->glists_virt_base[i]) {
+               if (lio->glists_virt_base && lio->glists_virt_base[i] &&
+                   lio->glists_dma_base && lio->glists_dma_base[i]) {
                        lio_dma_free(lio->oct_dev,
                                     lio->glist_entry_size * lio->tx_qsize,
                                     lio->glists_virt_base[i],
@@ -805,7 +771,7 @@ static int setup_glists(struct octeon_device *oct, struct lio *lio, int num_iqs)
        }
 
        for (i = 0; i < num_iqs; i++) {
-               int numa_node = cpu_to_node(i % num_online_cpus());
+               int numa_node = dev_to_node(&oct->pci_dev->dev);
 
                spin_lock_init(&lio->glist_lock[i]);
 
@@ -967,14 +933,13 @@ static void update_txq_status(struct octeon_device *oct, int iq_num)
                        INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, iq_num,
                                                  tx_restart, 1);
                        netif_wake_subqueue(netdev, iq->q_index);
-               } else {
-                       if (!octnet_iq_is_full(oct, lio->txq)) {
-                               INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev,
-                                                         lio->txq,
-                                                         tx_restart, 1);
-                               wake_q(netdev, lio->txq);
-                       }
                }
+       } else if (netif_queue_stopped(netdev) &&
+                  lio->linfo.link.s.link_up &&
+                  (!octnet_iq_is_full(oct, lio->txq))) {
+               INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev,
+                                         lio->txq, tx_restart, 1);
+               netif_wake_queue(netdev);
        }
 }
 
@@ -1084,16 +1049,35 @@ static int octeon_setup_interrupt(struct octeon_device *oct)
        int i;
        int num_ioq_vectors;
        int num_alloc_ioq_vectors;
+       char *queue_irq_names = NULL;
+       char *aux_irq_name = NULL;
 
        if (OCTEON_CN23XX_PF(oct) && oct->msix_on) {
                oct->num_msix_irqs = oct->sriov_info.num_pf_rings;
                /* one non ioq interrupt for handling sli_mac_pf_int_sum */
                oct->num_msix_irqs += 1;
 
+               /* allocate storage for the names assigned to each irq */
+               oct->irq_name_storage =
+                       kcalloc((MAX_IOQ_INTERRUPTS_PER_PF + 1), INTRNAMSIZ,
+                               GFP_KERNEL);
+               if (!oct->irq_name_storage) {
+                       dev_err(&oct->pci_dev->dev, "Irq name storage alloc failed...\n");
+                       return -ENOMEM;
+               }
+
+               queue_irq_names = oct->irq_name_storage;
+               aux_irq_name = &queue_irq_names
+                               [IRQ_NAME_OFF(MAX_IOQ_INTERRUPTS_PER_PF)];
+
                oct->msix_entries = kcalloc(
                    oct->num_msix_irqs, sizeof(struct msix_entry), GFP_KERNEL);
-               if (!oct->msix_entries)
-                       return 1;
+               if (!oct->msix_entries) {
+                       dev_err(&oct->pci_dev->dev, "Memory Alloc failed...\n");
+                       kfree(oct->irq_name_storage);
+                       oct->irq_name_storage = NULL;
+                       return -ENOMEM;
+               }
 
                msix_entries = (struct msix_entry *)oct->msix_entries;
                /*Assumption is that pf msix vectors start from pf srn to pf to
@@ -1111,7 +1095,9 @@ static int octeon_setup_interrupt(struct octeon_device *oct)
                        dev_err(&oct->pci_dev->dev, "unable to Allocate MSI-X interrupts\n");
                        kfree(oct->msix_entries);
                        oct->msix_entries = NULL;
-                       return 1;
+                       kfree(oct->irq_name_storage);
+                       oct->irq_name_storage = NULL;
+                       return num_alloc_ioq_vectors;
                }
                dev_dbg(&oct->pci_dev->dev, "OCTEON: Enough MSI-X interrupts are allocated...\n");
 
@@ -1119,9 +1105,12 @@ static int octeon_setup_interrupt(struct octeon_device *oct)
 
                /** For PF, there is one non-ioq interrupt handler */
                num_ioq_vectors -= 1;
+
+               snprintf(aux_irq_name, INTRNAMSIZ,
+                        "LiquidIO%u-pf%u-aux", oct->octeon_id, oct->pf_num);
                irqret = request_irq(msix_entries[num_ioq_vectors].vector,
-                                    liquidio_legacy_intr_handler, 0, "octeon",
-                                    oct);
+                                    liquidio_legacy_intr_handler, 0,
+                                    aux_irq_name, oct);
                if (irqret) {
                        dev_err(&oct->pci_dev->dev,
                                "OCTEON: Request_irq failed for MSIX interrupt Error: %d\n",
@@ -1129,13 +1118,20 @@ static int octeon_setup_interrupt(struct octeon_device *oct)
                        pci_disable_msix(oct->pci_dev);
                        kfree(oct->msix_entries);
                        oct->msix_entries = NULL;
-                       return 1;
+                       kfree(oct->irq_name_storage);
+                       oct->irq_name_storage = NULL;
+                       return irqret;
                }
 
                for (i = 0; i < num_ioq_vectors; i++) {
+                       snprintf(&queue_irq_names[IRQ_NAME_OFF(i)], INTRNAMSIZ,
+                                "LiquidIO%u-pf%u-rxtx-%u",
+                                oct->octeon_id, oct->pf_num, i);
+
                        irqret = request_irq(msix_entries[i].vector,
                                             liquidio_msix_intr_handler, 0,
-                                            "octeon", &oct->ioq_vector[i]);
+                                            &queue_irq_names[IRQ_NAME_OFF(i)],
+                                            &oct->ioq_vector[i]);
                        if (irqret) {
                                dev_err(&oct->pci_dev->dev,
                                        "OCTEON: Request_irq failed for MSIX interrupt Error: %d\n",
@@ -1155,7 +1151,9 @@ static int octeon_setup_interrupt(struct octeon_device *oct)
                                pci_disable_msix(oct->pci_dev);
                                kfree(oct->msix_entries);
                                oct->msix_entries = NULL;
-                               return 1;
+                               kfree(oct->irq_name_storage);
+                               oct->irq_name_storage = NULL;
+                               return irqret;
                        }
                        oct->ioq_vector[i].vector = msix_entries[i].vector;
                        /* assign the cpu mask for this msix interrupt vector */
@@ -1173,15 +1171,29 @@ static int octeon_setup_interrupt(struct octeon_device *oct)
                else
                        oct->flags |= LIO_FLAG_MSI_ENABLED;
 
+               /* allocate storage for the names assigned to the irq */
+               oct->irq_name_storage = kcalloc(1, INTRNAMSIZ, GFP_KERNEL);
+               if (!oct->irq_name_storage)
+                       return -ENOMEM;
+
+               queue_irq_names = oct->irq_name_storage;
+
+               snprintf(&queue_irq_names[IRQ_NAME_OFF(0)], INTRNAMSIZ,
+                        "LiquidIO%u-pf%u-rxtx-%u",
+                        oct->octeon_id, oct->pf_num, 0);
+
                irqret = request_irq(oct->pci_dev->irq,
-                                    liquidio_legacy_intr_handler, IRQF_SHARED,
-                                    "octeon", oct);
+                                    liquidio_legacy_intr_handler,
+                                    IRQF_SHARED,
+                                    &queue_irq_names[IRQ_NAME_OFF(0)], oct);
                if (irqret) {
                        if (oct->flags & LIO_FLAG_MSI_ENABLED)
                                pci_disable_msi(oct->pci_dev);
                        dev_err(&oct->pci_dev->dev, "Request IRQ failed with code: %d\n",
                                irqret);
-                       return 1;
+                       kfree(oct->irq_name_storage);
+                       oct->irq_name_storage = NULL;
+                       return irqret;
                }
        }
        return 0;
@@ -1369,6 +1381,12 @@ liquidio_probe(struct pci_dev *pdev,
        return 0;
 }
 
+static bool fw_type_is_none(void)
+{
+       return strncmp(fw_type, LIO_FW_NAME_TYPE_NONE,
+                      sizeof(LIO_FW_NAME_TYPE_NONE)) == 0;
+}
+
 /**
  *\brief Destroy resources associated with octeon device
  * @param pdev PCI device structure
@@ -1449,6 +1467,9 @@ static void octeon_destroy_resources(struct octeon_device *oct)
                                pci_disable_msi(oct->pci_dev);
                }
 
+               kfree(oct->irq_name_storage);
+               oct->irq_name_storage = NULL;
+
        /* fallthrough */
        case OCT_DEV_MSIX_ALLOC_VECTOR_DONE:
                if (OCTEON_CN23XX_PF(oct))
@@ -1508,9 +1529,12 @@ static void octeon_destroy_resources(struct octeon_device *oct)
 
                /* fallthrough */
        case OCT_DEV_PCI_MAP_DONE:
-               /* Soft reset the octeon device before exiting */
-               if ((!OCTEON_CN23XX_PF(oct)) || !oct->octeon_id)
-                       oct->fn_list.soft_reset(oct);
+               if (!fw_type_is_none()) {
+                       /* Soft reset the octeon device before exiting */
+                       if (!OCTEON_CN23XX_PF(oct) ||
+                           (OCTEON_CN23XX_PF(oct) && !oct->octeon_id))
+                               oct->fn_list.soft_reset(oct);
+               }
 
                octeon_unmap_pci_barx(oct, 0);
                octeon_unmap_pci_barx(oct, 1);
@@ -1643,6 +1667,15 @@ static void liquidio_destroy_nic_device(struct octeon_device *oct, int ifidx)
        if (atomic_read(&lio->ifstate) & LIO_IFSTATE_RUNNING)
                liquidio_stop(netdev);
 
+       if (fw_type_is_none()) {
+               struct octnic_ctrl_pkt nctrl;
+
+               memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
+               nctrl.ncmd.s.cmd = OCTNET_CMD_RESET_PF;
+               nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
+               octnet_send_nic_ctrl_pkt(oct, &nctrl);
+       }
+
        if (oct->props[lio->ifidx].napi_enabled == 1) {
                list_for_each_entry_safe(napi, n, &netdev->napi_list, dev_list)
                        napi_disable(napi);
@@ -1658,6 +1691,8 @@ static void liquidio_destroy_nic_device(struct octeon_device *oct, int ifidx)
 
        cleanup_link_status_change_wq(netdev);
 
+       cleanup_rx_oom_poll_fn(netdev);
+
        delete_glists(lio);
 
        free_netdev(netdev);
@@ -2126,8 +2161,7 @@ static int load_firmware(struct octeon_device *oct)
        char fw_name[LIO_MAX_FW_FILENAME_LEN];
        char *tmp_fw_type;
 
-       if (strncmp(fw_type, LIO_FW_NAME_TYPE_NONE,
-                   sizeof(LIO_FW_NAME_TYPE_NONE)) == 0) {
+       if (fw_type_is_none()) {
                dev_info(&oct->pci_dev->dev, "Skipping firmware load\n");
                return ret;
        }
@@ -2211,8 +2245,8 @@ static void if_cfg_callback(struct octeon_device *oct,
 
        oct = lio_get_device(ctx->octeon_id);
        if (resp->status)
-               dev_err(&oct->pci_dev->dev, "nic if cfg instruction failed. Status: %llx\n",
-                       CVM_CAST64(resp->status));
+               dev_err(&oct->pci_dev->dev, "nic if cfg instruction failed. Status: 0x%llx (0x%08x)\n",
+                       CVM_CAST64(resp->status), status);
        WRITE_ONCE(ctx->cond, 1);
 
        snprintf(oct->fw_info.liquidio_firmware_version, 32, "%s",
@@ -2437,8 +2471,11 @@ static int liquidio_napi_poll(struct napi_struct *napi, int budget)
        /* Flush the instruction queue */
        iq = oct->instr_queue[iq_no];
        if (iq) {
-               /* Process iq buffers with in the budget limits */
-               tx_done = octeon_flush_iq(oct, iq, budget);
+               if (atomic_read(&iq->instr_pending))
+                       /* Process iq buffers with in the budget limits */
+                       tx_done = octeon_flush_iq(oct, iq, budget);
+               else
+                       tx_done = 1;
                /* Update iq read-index rather than waiting for next interrupt.
                 * Return back if tx_done is false.
                 */
@@ -2555,6 +2592,15 @@ static inline int setup_io_queues(struct octeon_device *octeon_dev,
                                __func__);
                        return 1;
                }
+
+               if (octeon_dev->ioq_vector) {
+                       struct octeon_ioq_vector *ioq_vector;
+
+                       ioq_vector = &octeon_dev->ioq_vector[q];
+                       netif_set_xps_queue(netdev,
+                                           &ioq_vector->affinity_mask,
+                                           ioq_vector->iq_index);
+               }
        }
 
        return 0;
@@ -3596,7 +3642,8 @@ static int __liquidio_set_vf_mac(struct net_device *netdev, int vfidx,
        nctrl.ncmd.s.param2 = (is_admin_assigned ? 1 : 0);
        nctrl.ncmd.s.more = 1;
        nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
-       nctrl.cb_fn = 0;
+       nctrl.netpndev = (u64)netdev;
+       nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
        nctrl.wait_time = LIO_CMD_WAIT_TM;
 
        nctrl.udd[0] = 0;
@@ -4122,6 +4169,9 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
                if (setup_link_status_change_wq(netdev))
                        goto setup_nic_dev_fail;
 
+               if (setup_rx_oom_poll_fn(netdev))
+                       goto setup_nic_dev_fail;
+
                /* Register the network device with the OS */
                if (register_netdev(netdev)) {
                        dev_err(&octeon_dev->pci_dev->dev, "Device registration failed\n");
@@ -4271,7 +4321,6 @@ static int liquidio_enable_sriov(struct pci_dev *dev, int num_vfs)
  */
 static int liquidio_init_nic_module(struct octeon_device *oct)
 {
-       struct oct_intrmod_cfg *intrmod_cfg;
        int i, retval = 0;
        int num_nic_ports = CFG_GET_NUM_NIC_PORTS(octeon_get_conf(oct));
 
@@ -4296,22 +4345,6 @@ static int liquidio_init_nic_module(struct octeon_device *oct)
 
        liquidio_ptp_init(oct);
 
-       /* Initialize interrupt moderation params */
-       intrmod_cfg = &((struct octeon_device *)oct)->intrmod;
-       intrmod_cfg->rx_enable = 1;
-       intrmod_cfg->check_intrvl = LIO_INTRMOD_CHECK_INTERVAL;
-       intrmod_cfg->maxpkt_ratethr = LIO_INTRMOD_MAXPKT_RATETHR;
-       intrmod_cfg->minpkt_ratethr = LIO_INTRMOD_MINPKT_RATETHR;
-       intrmod_cfg->rx_maxcnt_trigger = LIO_INTRMOD_RXMAXCNT_TRIGGER;
-       intrmod_cfg->rx_maxtmr_trigger = LIO_INTRMOD_RXMAXTMR_TRIGGER;
-       intrmod_cfg->rx_mintmr_trigger = LIO_INTRMOD_RXMINTMR_TRIGGER;
-       intrmod_cfg->rx_mincnt_trigger = LIO_INTRMOD_RXMINCNT_TRIGGER;
-       intrmod_cfg->tx_enable = 1;
-       intrmod_cfg->tx_maxcnt_trigger = LIO_INTRMOD_TXMAXCNT_TRIGGER;
-       intrmod_cfg->tx_mincnt_trigger = LIO_INTRMOD_TXMINCNT_TRIGGER;
-       intrmod_cfg->rx_frames = CFG_GET_OQ_INTR_PKT(octeon_get_conf(oct));
-       intrmod_cfg->rx_usecs = CFG_GET_OQ_INTR_TIME(octeon_get_conf(oct));
-       intrmod_cfg->tx_frames = CFG_GET_IQ_INTR_PKT(octeon_get_conf(oct));
        dev_dbg(&oct->pci_dev->dev, "Network interfaces ready\n");
 
        return retval;
@@ -4447,14 +4480,16 @@ static int octeon_device_init(struct octeon_device *octeon_dev)
        if (OCTEON_CN23XX_PF(octeon_dev)) {
                if (!cn23xx_fw_loaded(octeon_dev)) {
                        fw_loaded = 0;
-                       /* Do a soft reset of the Octeon device. */
-                       if (octeon_dev->fn_list.soft_reset(octeon_dev))
-                               return 1;
-                       /* things might have changed */
-                       if (!cn23xx_fw_loaded(octeon_dev))
-                               fw_loaded = 0;
-                       else
-                               fw_loaded = 1;
+                       if (!fw_type_is_none()) {
+                               /* Do a soft reset of the Octeon device. */
+                               if (octeon_dev->fn_list.soft_reset(octeon_dev))
+                                       return 1;
+                               /* things might have changed */
+                               if (!cn23xx_fw_loaded(octeon_dev))
+                                       fw_loaded = 0;
+                               else
+                                       fw_loaded = 1;
+                       }
                } else {
                        fw_loaded = 1;
                }
index 7b83be4ce1fe0ce5cab0c7ff889edbf334a5a065..174d748b592846864ee47770fab1f83e60187453 100644 (file)
@@ -16,6 +16,7 @@
  * NONINFRINGEMENT.  See the GNU General Public License for more details.
  ***********************************************************************/
 #include <linux/module.h>
+#include <linux/interrupt.h>
 #include <linux/pci.h>
 #include <net/vxlan.h>
 #include "liquidio_common.h"
@@ -39,12 +40,6 @@ MODULE_PARM_DESC(debug, "NETIF_MSG debug bits");
 
 #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
 
-/* Bit mask values for lio->ifstate */
-#define   LIO_IFSTATE_DROQ_OPS             0x01
-#define   LIO_IFSTATE_REGISTERED           0x02
-#define   LIO_IFSTATE_RUNNING              0x04
-#define   LIO_IFSTATE_RX_TIMESTAMP_ENABLED 0x08
-
 struct liquidio_if_cfg_context {
        int octeon_id;
 
@@ -335,36 +330,6 @@ static struct pci_driver liquidio_vf_pci_driver = {
        .err_handler    = &liquidio_vf_err_handler,    /* For AER */
 };
 
-/**
- * \brief check interface state
- * @param lio per-network private data
- * @param state_flag flag state to check
- */
-static int ifstate_check(struct lio *lio, int state_flag)
-{
-       return atomic_read(&lio->ifstate) & state_flag;
-}
-
-/**
- * \brief set interface state
- * @param lio per-network private data
- * @param state_flag flag state to set
- */
-static void ifstate_set(struct lio *lio, int state_flag)
-{
-       atomic_set(&lio->ifstate, (atomic_read(&lio->ifstate) | state_flag));
-}
-
-/**
- * \brief clear interface state
- * @param lio per-network private data
- * @param state_flag flag state to clear
- */
-static void ifstate_reset(struct lio *lio, int state_flag)
-{
-       atomic_set(&lio->ifstate, (atomic_read(&lio->ifstate) & ~(state_flag)));
-}
-
 /**
  * \brief Stop Tx queues
  * @param netdev network device
@@ -506,7 +471,8 @@ static void delete_glists(struct lio *lio)
                                kfree(g);
                } while (g);
 
-               if (lio->glists_virt_base && lio->glists_virt_base[i]) {
+               if (lio->glists_virt_base && lio->glists_virt_base[i] &&
+                   lio->glists_dma_base && lio->glists_dma_base[i]) {
                        lio_dma_free(lio->oct_dev,
                                     lio->glist_entry_size * lio->tx_qsize,
                                     lio->glists_virt_base[i],
@@ -722,13 +688,12 @@ static void update_txq_status(struct octeon_device *oct, int iq_num)
                        netif_wake_subqueue(netdev, iq->q_index);
                        INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, iq_num,
                                                  tx_restart, 1);
-               } else {
-                       if (!octnet_iq_is_full(oct, lio->txq)) {
-                               INCR_INSTRQUEUE_PKT_COUNT(
-                                   lio->oct_dev, lio->txq, tx_restart, 1);
-                               wake_q(netdev, lio->txq);
-                       }
                }
+       } else if (netif_queue_stopped(netdev) && lio->linfo.link.s.link_up &&
+                  (!octnet_iq_is_full(oct, lio->txq))) {
+               INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev,
+                                         lio->txq, tx_restart, 1);
+               netif_wake_queue(netdev);
        }
 }
 
@@ -780,6 +745,7 @@ liquidio_msix_intr_handler(int irq __attribute__((unused)), void *dev)
 static int octeon_setup_interrupt(struct octeon_device *oct)
 {
        struct msix_entry *msix_entries;
+       char *queue_irq_names = NULL;
        int num_alloc_ioq_vectors;
        int num_ioq_vectors;
        int irqret;
@@ -788,10 +754,25 @@ static int octeon_setup_interrupt(struct octeon_device *oct)
        if (oct->msix_on) {
                oct->num_msix_irqs = oct->sriov_info.rings_per_vf;
 
+               /* allocate storage for the names assigned to each irq */
+               oct->irq_name_storage =
+                       kcalloc(MAX_IOQ_INTERRUPTS_PER_VF, INTRNAMSIZ,
+                               GFP_KERNEL);
+               if (!oct->irq_name_storage) {
+                       dev_err(&oct->pci_dev->dev, "Irq name storage alloc failed...\n");
+                       return -ENOMEM;
+               }
+
+               queue_irq_names = oct->irq_name_storage;
+
                oct->msix_entries = kcalloc(
                    oct->num_msix_irqs, sizeof(struct msix_entry), GFP_KERNEL);
-               if (!oct->msix_entries)
-                       return 1;
+               if (!oct->msix_entries) {
+                       dev_err(&oct->pci_dev->dev, "Memory Alloc failed...\n");
+                       kfree(oct->irq_name_storage);
+                       oct->irq_name_storage = NULL;
+                       return -ENOMEM;
+               }
 
                msix_entries = (struct msix_entry *)oct->msix_entries;
 
@@ -805,16 +786,23 @@ static int octeon_setup_interrupt(struct octeon_device *oct)
                        dev_err(&oct->pci_dev->dev, "unable to Allocate MSI-X interrupts\n");
                        kfree(oct->msix_entries);
                        oct->msix_entries = NULL;
-                       return 1;
+                       kfree(oct->irq_name_storage);
+                       oct->irq_name_storage = NULL;
+                       return num_alloc_ioq_vectors;
                }
                dev_dbg(&oct->pci_dev->dev, "OCTEON: Enough MSI-X interrupts are allocated...\n");
 
                num_ioq_vectors = oct->num_msix_irqs;
 
                for (i = 0; i < num_ioq_vectors; i++) {
+                       snprintf(&queue_irq_names[IRQ_NAME_OFF(i)], INTRNAMSIZ,
+                                "LiquidIO%u-vf%u-rxtx-%u",
+                                oct->octeon_id, oct->vf_num, i);
+
                        irqret = request_irq(msix_entries[i].vector,
                                             liquidio_msix_intr_handler, 0,
-                                            "octeon", &oct->ioq_vector[i]);
+                                            &queue_irq_names[IRQ_NAME_OFF(i)],
+                                            &oct->ioq_vector[i]);
                        if (irqret) {
                                dev_err(&oct->pci_dev->dev,
                                        "OCTEON: Request_irq failed for MSIX interrupt Error: %d\n",
@@ -830,7 +818,9 @@ static int octeon_setup_interrupt(struct octeon_device *oct)
                                pci_disable_msix(oct->pci_dev);
                                kfree(oct->msix_entries);
                                oct->msix_entries = NULL;
-                               return 1;
+                               kfree(oct->irq_name_storage);
+                               oct->irq_name_storage = NULL;
+                               return irqret;
                        }
                        oct->ioq_vector[i].vector = msix_entries[i].vector;
                        /* assign the cpu mask for this msix interrupt vector */
@@ -975,6 +965,8 @@ static void octeon_destroy_resources(struct octeon_device *oct)
                        pci_disable_msix(oct->pci_dev);
                        kfree(oct->msix_entries);
                        oct->msix_entries = NULL;
+                       kfree(oct->irq_name_storage);
+                       oct->irq_name_storage = NULL;
                }
                /* Soft reset the octeon device before exiting */
                if (oct->pci_dev->reset_fn)
@@ -1163,6 +1155,8 @@ static void liquidio_destroy_nic_device(struct octeon_device *oct, int ifidx)
        if (atomic_read(&lio->ifstate) & LIO_IFSTATE_REGISTERED)
                unregister_netdev(netdev);
 
+       cleanup_rx_oom_poll_fn(netdev);
+
        cleanup_link_status_change_wq(netdev);
 
        delete_glists(lio);
@@ -1642,8 +1636,12 @@ static int liquidio_napi_poll(struct napi_struct *napi, int budget)
        /* Flush the instruction queue */
        iq = oct->instr_queue[iq_no];
        if (iq) {
-               /* Process iq buffers with in the budget limits */
-               tx_done = octeon_flush_iq(oct, iq, budget);
+               if (atomic_read(&iq->instr_pending))
+                       /* Process iq buffers with in the budget limits */
+                       tx_done = octeon_flush_iq(oct, iq, budget);
+               else
+                       tx_done = 1;
+
                /* Update iq read-index rather than waiting for next interrupt.
                 * Return back if tx_done is false.
                 */
@@ -3003,6 +3001,9 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
                if (setup_link_status_change_wq(netdev))
                        goto setup_nic_dev_fail;
 
+               if (setup_rx_oom_poll_fn(netdev))
+                       goto setup_nic_dev_fail;
+
                /* Register the network device with the OS */
                if (register_netdev(netdev)) {
                        dev_err(&octeon_dev->pci_dev->dev, "Device registration failed\n");
@@ -3057,7 +3058,6 @@ setup_nic_wait_intr:
  */
 static int liquidio_init_nic_module(struct octeon_device *oct)
 {
-       struct oct_intrmod_cfg *intrmod_cfg;
        int num_nic_ports = 1;
        int i, retval = 0;
 
@@ -3079,22 +3079,6 @@ static int liquidio_init_nic_module(struct octeon_device *oct)
                goto octnet_init_failure;
        }
 
-       /* Initialize interrupt moderation params */
-       intrmod_cfg = &((struct octeon_device *)oct)->intrmod;
-       intrmod_cfg->rx_enable = 1;
-       intrmod_cfg->check_intrvl = LIO_INTRMOD_CHECK_INTERVAL;
-       intrmod_cfg->maxpkt_ratethr = LIO_INTRMOD_MAXPKT_RATETHR;
-       intrmod_cfg->minpkt_ratethr = LIO_INTRMOD_MINPKT_RATETHR;
-       intrmod_cfg->rx_maxcnt_trigger = LIO_INTRMOD_RXMAXCNT_TRIGGER;
-       intrmod_cfg->rx_maxtmr_trigger = LIO_INTRMOD_RXMAXTMR_TRIGGER;
-       intrmod_cfg->rx_mintmr_trigger = LIO_INTRMOD_RXMINTMR_TRIGGER;
-       intrmod_cfg->rx_mincnt_trigger = LIO_INTRMOD_RXMINCNT_TRIGGER;
-       intrmod_cfg->tx_enable = 1;
-       intrmod_cfg->tx_maxcnt_trigger = LIO_INTRMOD_TXMAXCNT_TRIGGER;
-       intrmod_cfg->tx_mincnt_trigger = LIO_INTRMOD_TXMINCNT_TRIGGER;
-       intrmod_cfg->rx_frames = CFG_GET_OQ_INTR_PKT(octeon_get_conf(oct));
-       intrmod_cfg->rx_usecs = CFG_GET_OQ_INTR_TIME(octeon_get_conf(oct));
-       intrmod_cfg->tx_frames = CFG_GET_IQ_INTR_PKT(octeon_get_conf(oct));
        dev_dbg(&oct->pci_dev->dev, "Network interfaces ready\n");
 
        return retval;
index 294c6f3c6b48254044c610c78625c9c3c86e9b1f..8ea2323d8d676a0b96b5653b3fba966bfd5dcd5d 100644 (file)
@@ -27,7 +27,7 @@
 
 #define LIQUIDIO_PACKAGE ""
 #define LIQUIDIO_BASE_MAJOR_VERSION 1
-#define LIQUIDIO_BASE_MINOR_VERSION 4
+#define LIQUIDIO_BASE_MINOR_VERSION 5
 #define LIQUIDIO_BASE_MICRO_VERSION 1
 #define LIQUIDIO_BASE_VERSION   __stringify(LIQUIDIO_BASE_MAJOR_VERSION) "." \
                                __stringify(LIQUIDIO_BASE_MINOR_VERSION)
@@ -83,6 +83,7 @@ enum octeon_tag_type {
 #define OPCODE_NIC_INTRMOD_CFG         0x08
 #define OPCODE_NIC_IF_CFG              0x09
 #define OPCODE_NIC_VF_DRV_NOTICE       0x0A
+#define OPCODE_NIC_INTRMOD_PARAMS      0x0B
 #define VF_DRV_LOADED                  1
 #define VF_DRV_REMOVED                -1
 #define VF_DRV_MACADDR_CHANGED         2
@@ -100,6 +101,11 @@ enum octeon_tag_type {
 
 #define BYTES_PER_DHLEN_UNIT        8
 #define MAX_REG_CNT                 2000000U
+#define INTRNAMSIZ                  32
+#define IRQ_NAME_OFF(i)             ((i) * INTRNAMSIZ)
+#define MAX_IOQ_INTERRUPTS_PER_PF   (64 * 2)
+#define MAX_IOQ_INTERRUPTS_PER_VF   (8 * 2)
+
 
 static inline u32 incr_index(u32 index, u32 count, u32 max)
 {
@@ -181,6 +187,7 @@ static inline void add_sg_size(struct octeon_sg_entry *sg_entry,
 #define   OCTNET_CMD_Q                0
 
 /* NIC Command types */
+#define   OCTNET_CMD_RESET_PF         0x0
 #define   OCTNET_CMD_CHANGE_MTU       0x1
 #define   OCTNET_CMD_CHANGE_MACADDR   0x2
 #define   OCTNET_CMD_CHANGE_DEVFLAGS  0x3
@@ -845,29 +852,6 @@ struct oct_mdio_cmd {
 
 #define OCT_LINK_STATS_SIZE   (sizeof(struct oct_link_stats))
 
-/* intrmod: max. packet rate threshold */
-#define LIO_INTRMOD_MAXPKT_RATETHR     196608
-/* intrmod: min. packet rate threshold */
-#define LIO_INTRMOD_MINPKT_RATETHR     9216
-/* intrmod: max. packets to trigger interrupt */
-#define LIO_INTRMOD_RXMAXCNT_TRIGGER   384
-/* intrmod: min. packets to trigger interrupt */
-#define LIO_INTRMOD_RXMINCNT_TRIGGER   0
-/* intrmod: max. time to trigger interrupt */
-#define LIO_INTRMOD_RXMAXTMR_TRIGGER   128
-/* 66xx:intrmod: min. time to trigger interrupt
- * (value of 1 is optimum for TCP_RR)
- */
-#define LIO_INTRMOD_RXMINTMR_TRIGGER   1
-
-/* intrmod: max. packets to trigger interrupt */
-#define LIO_INTRMOD_TXMAXCNT_TRIGGER   64
-/* intrmod: min. packets to trigger interrupt */
-#define LIO_INTRMOD_TXMINCNT_TRIGGER   0
-
-/* intrmod: poll interval in seconds */
-#define LIO_INTRMOD_CHECK_INTERVAL  1
-
 struct oct_intrmod_cfg {
        u64 rx_enable;
        u64 tx_enable;
index 9675ffbf25e6bd9bf34d346204f1b0fbcbcfa185..e21b477d0159f1e17570259e2db3bcb155378728 100644 (file)
@@ -793,7 +793,7 @@ int octeon_setup_instr_queues(struct octeon_device *oct)
        u32 num_descs = 0;
        u32 iq_no = 0;
        union oct_txpciq txpciq;
-       int numa_node = cpu_to_node(iq_no % num_online_cpus());
+       int numa_node = dev_to_node(&oct->pci_dev->dev);
 
        if (OCTEON_CN6XXX(oct))
                num_descs =
@@ -837,7 +837,7 @@ int octeon_setup_output_queues(struct octeon_device *oct)
        u32 num_descs = 0;
        u32 desc_size = 0;
        u32 oq_no = 0;
-       int numa_node = cpu_to_node(oq_no % num_online_cpus());
+       int numa_node = dev_to_node(&oct->pci_dev->dev);
 
        if (OCTEON_CN6XXX(oct)) {
                num_descs =
index c301a3852482845ee65bf260c48dbd69853e9522..dab35bfa46123a0b7a7dbeb3ff56e58ea76e6e84 100644 (file)
@@ -453,9 +453,6 @@ struct octeon_device {
        /** List of dispatch functions */
        struct octeon_dispatch_list dispatch;
 
-       /* Interrupt Moderation */
-       struct oct_intrmod_cfg intrmod;
-
        u32 int_status;
 
        u64 droq_intr;
@@ -517,6 +514,9 @@ struct octeon_device {
 
        void *msix_entries;
 
+       /* when requesting IRQs, the names are stored here */
+       void *irq_name_storage;
+
        struct octeon_sriov_info sriov_info;
 
        struct octeon_pf_vf_hs_word pfvf_hsword;
@@ -538,6 +538,10 @@ struct octeon_device {
        u32 priv_flags;
 
        void *watchdog_task;
+
+       u32 rx_coalesce_usecs;
+       u32 rx_max_coalesced_frames;
+       u32 tx_max_coalesced_frames;
 };
 
 #define  OCT_DRV_ONLINE 1
@@ -551,12 +555,6 @@ struct octeon_device {
 #define CHIP_CONF(oct, TYPE)             \
        (((struct octeon_ ## TYPE  *)((oct)->chip))->conf)
 
-struct oct_intrmod_cmd {
-       struct octeon_device *oct_dev;
-       struct octeon_soft_command *sc;
-       struct oct_intrmod_cfg *cfg;
-};
-
 /*------------------ Function Prototypes ----------------------*/
 
 /** Initialize device list memory */
index 79f809479af6e7d865cc7c280c84232622af982e..286be5539cef707c9464f1c4488cdd6134e9b343 100644 (file)
@@ -226,8 +226,7 @@ int octeon_init_droq(struct octeon_device *oct,
        struct octeon_droq *droq;
        u32 desc_ring_size = 0, c_num_descs = 0, c_buf_size = 0;
        u32 c_pkts_per_intr = 0, c_refill_threshold = 0;
-       int orig_node = dev_to_node(&oct->pci_dev->dev);
-       int numa_node = cpu_to_node(q_no % num_online_cpus());
+       int numa_node = dev_to_node(&oct->pci_dev->dev);
 
        dev_dbg(&oct->pci_dev->dev, "%s[%d]\n", __func__, q_no);
 
@@ -267,13 +266,8 @@ int octeon_init_droq(struct octeon_device *oct,
        droq->buffer_size = c_buf_size;
 
        desc_ring_size = droq->max_count * OCT_DROQ_DESC_SIZE;
-       set_dev_node(&oct->pci_dev->dev, numa_node);
        droq->desc_ring = lio_dma_alloc(oct, desc_ring_size,
                                        (dma_addr_t *)&droq->desc_ring_dma);
-       set_dev_node(&oct->pci_dev->dev, orig_node);
-       if (!droq->desc_ring)
-               droq->desc_ring = lio_dma_alloc(oct, desc_ring_size,
-                                       (dma_addr_t *)&droq->desc_ring_dma);
 
        if (!droq->desc_ring) {
                dev_err(&oct->pci_dev->dev,
@@ -519,6 +513,32 @@ octeon_droq_refill(struct octeon_device *octeon_dev, struct octeon_droq *droq)
        return desc_refilled;
 }
 
+/** check if we can allocate packets to get out of oom.
+ *  @param  droq - Droq being checked.
+ *  @return does not return anything
+ */
+void octeon_droq_check_oom(struct octeon_droq *droq)
+{
+       int desc_refilled;
+       struct octeon_device *oct = droq->oct_dev;
+
+       if (readl(droq->pkts_credit_reg) <= CN23XX_SLI_DEF_BP) {
+               spin_lock_bh(&droq->lock);
+               desc_refilled = octeon_droq_refill(oct, droq);
+               if (desc_refilled) {
+                       /* Flush the droq descriptor data to memory to be sure
+                        * that when we update the credits the data in memory
+                        * is accurate.
+                        */
+                       wmb();
+                       writel(desc_refilled, droq->pkts_credit_reg);
+                       /* make sure mmio write completes */
+                       mmiowb();
+               }
+               spin_unlock_bh(&droq->lock);
+       }
+}
+
 static inline u32
 octeon_droq_get_bufcount(u32 buf_size, u32 total_len)
 {
@@ -970,7 +990,7 @@ int octeon_create_droq(struct octeon_device *oct,
                       u32 desc_size, void *app_ctx)
 {
        struct octeon_droq *droq;
-       int numa_node = cpu_to_node(q_no % num_online_cpus());
+       int numa_node = dev_to_node(&oct->pci_dev->dev);
 
        if (oct->droq[q_no]) {
                dev_dbg(&oct->pci_dev->dev, "Droq already in use. Cannot create droq %d again\n",
index 6982c0af5eccb7129123fcbb4ba8363bb7f9710a..9781577115e76ff7d1ef27966bf3cbdba84ee7b4 100644 (file)
@@ -426,4 +426,6 @@ int octeon_droq_process_packets(struct octeon_device *oct,
 int octeon_process_droq_poll_cmd(struct octeon_device *oct, u32 q_no,
                                 int cmd, u32 arg);
 
+void octeon_droq_check_oom(struct octeon_droq *droq);
+
 #endif /*__OCTEON_DROQ_H__ */
index 4608a5af35a3204b54378dc03eef94c976370ac1..5063a12613e53646b9930f10090eae13c32301e3 100644 (file)
@@ -152,7 +152,7 @@ struct octeon_instr_queue {
        struct oct_iq_stats stats;
 
        /** DMA mapped base address of the input descriptor ring. */
-       u64 base_addr_dma;
+       dma_addr_t base_addr_dma;
 
        /** Application context */
        void *app_ctx;
index eef2a1e8a7e3f96b26f004ec0eec93e447a5d61f..454ec0ca56abd1182d57e0fc3f2de8ee60823294 100644 (file)
 #define LIO_MAX_MTU_SIZE (OCTNET_MAX_FRM_SIZE - OCTNET_FRM_HEADER_SIZE)
 #define LIO_MIN_MTU_SIZE ETH_MIN_MTU
 
+/* Bit mask values for lio->ifstate */
+#define   LIO_IFSTATE_DROQ_OPS             0x01
+#define   LIO_IFSTATE_REGISTERED           0x02
+#define   LIO_IFSTATE_RUNNING              0x04
+#define   LIO_IFSTATE_RX_TIMESTAMP_ENABLED 0x08
+
 struct oct_nic_stats_resp {
        u64     rh;
        struct oct_link_stats stats;
@@ -123,6 +129,9 @@ struct lio {
        /* work queue for  txq status */
        struct cavium_wq        txq_status_wq;
 
+       /* work queue for  rxq oom status */
+       struct cavium_wq        rxq_status_wq;
+
        /* work queue for  link status */
        struct cavium_wq        link_status_wq;
 
@@ -146,6 +155,10 @@ struct lio {
  */
 int liquidio_set_feature(struct net_device *netdev, int cmd, u16 param1);
 
+int setup_rx_oom_poll_fn(struct net_device *netdev);
+
+void cleanup_rx_oom_poll_fn(struct net_device *netdev);
+
 /**
  * \brief Link control command completion callback
  * @param nctrl_ptr pointer to control packet structure
@@ -438,4 +451,34 @@ static inline void octeon_fast_packet_next(struct octeon_droq *droq,
               get_rbd(droq->recv_buf_list[idx].buffer), copy_len);
 }
 
+/**
+ * \brief check interface state
+ * @param lio per-network private data
+ * @param state_flag flag state to check
+ */
+static inline int ifstate_check(struct lio *lio, int state_flag)
+{
+       return atomic_read(&lio->ifstate) & state_flag;
+}
+
+/**
+ * \brief set interface state
+ * @param lio per-network private data
+ * @param state_flag flag state to set
+ */
+static inline void ifstate_set(struct lio *lio, int state_flag)
+{
+       atomic_set(&lio->ifstate, (atomic_read(&lio->ifstate) | state_flag));
+}
+
+/**
+ * \brief clear interface state
+ * @param lio per-network private data
+ * @param state_flag flag state to clear
+ */
+static inline void ifstate_reset(struct lio *lio, int state_flag)
+{
+       atomic_set(&lio->ifstate, (atomic_read(&lio->ifstate) & ~(state_flag)));
+}
+
 #endif
index 707bc15adec61351c1384b8454c85a87a2c4b437..261f448f9de23d059d0e5a5f1552bef5069e4c65 100644 (file)
@@ -62,8 +62,7 @@ int octeon_init_instr_queue(struct octeon_device *oct,
        u32 iq_no = (u32)txpciq.s.q_no;
        u32 q_size;
        struct cavium_wq *db_wq;
-       int orig_node = dev_to_node(&oct->pci_dev->dev);
-       int numa_node = cpu_to_node(iq_no % num_online_cpus());
+       int numa_node = dev_to_node(&oct->pci_dev->dev);
 
        if (OCTEON_CN6XXX(oct))
                conf = &(CFG_GET_IQ_CFG(CHIP_CONF(oct, cn6xxx)));
@@ -91,13 +90,7 @@ int octeon_init_instr_queue(struct octeon_device *oct,
 
        iq->oct_dev = oct;
 
-       set_dev_node(&oct->pci_dev->dev, numa_node);
-       iq->base_addr = lio_dma_alloc(oct, q_size,
-                                     (dma_addr_t *)&iq->base_addr_dma);
-       set_dev_node(&oct->pci_dev->dev, orig_node);
-       if (!iq->base_addr)
-               iq->base_addr = lio_dma_alloc(oct, q_size,
-                                             (dma_addr_t *)&iq->base_addr_dma);
+       iq->base_addr = lio_dma_alloc(oct, q_size, &iq->base_addr_dma);
        if (!iq->base_addr) {
                dev_err(&oct->pci_dev->dev, "Cannot allocate memory for instr queue %d\n",
                        iq_no);
@@ -211,7 +204,7 @@ int octeon_setup_iq(struct octeon_device *oct,
                    void *app_ctx)
 {
        u32 iq_no = (u32)txpciq.s.q_no;
-       int numa_node = cpu_to_node(iq_no % num_online_cpus());
+       int numa_node = dev_to_node(&oct->pci_dev->dev);
 
        if (oct->instr_queue[iq_no]) {
                dev_dbg(&oct->pci_dev->dev, "IQ is in use. Cannot create the IQ: %d again\n",
index 2fbaae96b505fbd0dcd45e85d366ee2585ef9e55..3d691c69f74d1cf7567b2d58730dcfd656c0eefd 100644 (file)
@@ -69,50 +69,53 @@ int lio_process_ordered_list(struct octeon_device *octeon_dev,
        int resp_to_process = MAX_ORD_REQS_TO_PROCESS;
        u32 status;
        u64 status64;
-       struct octeon_instr_rdp *rdp;
-       u64 rptr;
 
        ordered_sc_list = &octeon_dev->response_list[OCTEON_ORDERED_SC_LIST];
 
        do {
                spin_lock_bh(&ordered_sc_list->lock);
 
-               if (ordered_sc_list->head.next == &ordered_sc_list->head) {
+               if (list_empty(&ordered_sc_list->head)) {
                        spin_unlock_bh(&ordered_sc_list->lock);
                        return 1;
                }
 
-               sc = (struct octeon_soft_command *)ordered_sc_list->
-                   head.next;
-               if (OCTEON_CN23XX_PF(octeon_dev) ||
-                   OCTEON_CN23XX_VF(octeon_dev)) {
-                       rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd3.rdp;
-                       rptr = sc->cmd.cmd3.rptr;
-               } else {
-                       rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp;
-                       rptr = sc->cmd.cmd2.rptr;
-               }
+               sc = list_first_entry(&ordered_sc_list->head,
+                                     struct octeon_soft_command, node);
 
                status = OCTEON_REQUEST_PENDING;
 
                /* check if octeon has finished DMA'ing a response
                 * to where rptr is pointing to
                 */
-               dma_sync_single_for_cpu(&octeon_dev->pci_dev->dev,
-                                       rptr, rdp->rlen,
-                                       DMA_FROM_DEVICE);
                status64 = *sc->status_word;
 
                if (status64 != COMPLETION_WORD_INIT) {
+                       /* This logic ensures that all 64b have been written.
+                        * 1. check byte 0 for non-FF
+                        * 2. if non-FF, then swap result from BE to host order
+                        * 3. check byte 7 (swapped to 0) for non-FF
+                        * 4. if non-FF, use the low 32-bit status code
+                        * 5. if either byte 0 or byte 7 is FF, don't use status
+                        */
                        if ((status64 & 0xff) != 0xff) {
                                octeon_swap_8B_data(&status64, 1);
                                if (((status64 & 0xff) != 0xff)) {
-                                       status = (u32)(status64 &
-                                                      0xffffffffULL);
+                                       /* retrieve 16-bit firmware status */
+                                       status = (u32)(status64 & 0xffffULL);
+                                       if (status) {
+                                               status =
+                                                 FIRMWARE_STATUS_CODE(status);
+                                       } else {
+                                               /* i.e. no error */
+                                               status = OCTEON_REQUEST_DONE;
+                                       }
                                }
                        }
                } else if (force_quit || (sc->timeout &&
                        time_after(jiffies, (unsigned long)sc->timeout))) {
+                       dev_err(&octeon_dev->pci_dev->dev, "%s: cmd failed, timeout (%ld, %ld)\n",
+                               __func__, (long)jiffies, (long)sc->timeout);
                        status = OCTEON_REQUEST_TIMEOUT;
                }
 
index cbb2d84e89323aea4852c31c164c2ff49c40257c..9169c2815dba36c59b7b8cea5642a7fb27bff439 100644 (file)
@@ -78,6 +78,8 @@ enum {
 
 /*------------   Error codes used by host driver   -----------------*/
 #define DRIVER_MAJOR_ERROR_CODE           0x0000
+/*------   Error codes used by firmware (bits 15..0 set by firmware */
+#define FIRMWARE_MAJOR_ERROR_CODE         0x0001
 
 /**  A value of 0x00000000 indicates no error i.e. success */
 #define DRIVER_ERROR_NONE                 0x00000000
@@ -116,6 +118,9 @@ enum {
 
 };
 
+#define FIRMWARE_STATUS_CODE(status) \
+       ((FIRMWARE_MAJOR_ERROR_CODE << 16) | (status))
+
 /** Initialize the response lists. The number of response lists to create is
  * given by count.
  * @param octeon_dev      - the octeon device structure.
index 87000cd397372ab999d25ea2c935f0385fa941ee..0de8eb72325c53ae50cd3ec535915486a9fb5064 100644 (file)
@@ -6369,7 +6369,6 @@ int t4_fixup_host_params(struct adapter *adap, unsigned int page_size,
        unsigned int stat_len = cache_line_size > 64 ? 128 : 64;
        unsigned int fl_align = cache_line_size < 32 ? 32 : cache_line_size;
        unsigned int fl_align_log = fls(fl_align) - 1;
-       unsigned int ingpad;
 
        t4_write_reg(adap, SGE_HOST_PAGE_SIZE_A,
                     HOSTPAGESIZEPF0_V(sge_hps) |
@@ -6389,6 +6388,10 @@ int t4_fixup_host_params(struct adapter *adap, unsigned int page_size,
                                                  INGPADBOUNDARY_SHIFT_X) |
                                 EGRSTATUSPAGESIZE_V(stat_len != 64));
        } else {
+               unsigned int pack_align;
+               unsigned int ingpad, ingpack;
+               unsigned int pcie_cap;
+
                /* T5 introduced the separation of the Free List Padding and
                 * Packing Boundaries.  Thus, we can select a smaller Padding
                 * Boundary to avoid uselessly chewing up PCIe Link and Memory
@@ -6401,27 +6404,62 @@ int t4_fixup_host_params(struct adapter *adap, unsigned int page_size,
                 * Size (the minimum unit of transfer to/from Memory).  If we
                 * have a Padding Boundary which is smaller than the Memory
                 * Line Size, that'll involve a Read-Modify-Write cycle on the
-                * Memory Controller which is never good.  For T5 the smallest
-                * Padding Boundary which we can select is 32 bytes which is
-                * larger than any known Memory Controller Line Size so we'll
-                * use that.
-                *
-                * T5 has a different interpretation of the "0" value for the
-                * Packing Boundary.  This corresponds to 16 bytes instead of
-                * the expected 32 bytes.  We never have a Packing Boundary
-                * less than 32 bytes so we can't use that special value but
-                * on the other hand, if we wanted 32 bytes, the best we can
-                * really do is 64 bytes.
-               */
-               if (fl_align <= 32) {
+                * Memory Controller which is never good.
+                */
+
+               /* We want the Packing Boundary to be based on the Cache Line
+                * Size in order to help avoid False Sharing performance
+                * issues between CPUs, etc.  We also want the Packing
+                * Boundary to incorporate the PCI-E Maximum Payload Size.  We
+                * get best performance when the Packing Boundary is a
+                * multiple of the Maximum Payload Size.
+                */
+               pack_align = fl_align;
+               pcie_cap = pci_find_capability(adap->pdev, PCI_CAP_ID_EXP);
+               if (pcie_cap) {
+                       unsigned int mps, mps_log;
+                       u16 devctl;
+
+                       /* The PCIe Device Control Maximum Payload Size field
+                        * [bits 7:5] encodes sizes as powers of 2 starting at
+                        * 128 bytes.
+                        */
+                       pci_read_config_word(adap->pdev,
+                                            pcie_cap + PCI_EXP_DEVCTL,
+                                            &devctl);
+                       mps_log = ((devctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5) + 7;
+                       mps = 1 << mps_log;
+                       if (mps > pack_align)
+                               pack_align = mps;
+               }
+
+               /* N.B. T5/T6 have a crazy special interpretation of the "0"
+                * value for the Packing Boundary.  This corresponds to 16
+                * bytes instead of the expected 32 bytes.  So if we want 32
+                * bytes, the best we can really do is 64 bytes ...
+                */
+               if (pack_align <= 16) {
+                       ingpack = INGPACKBOUNDARY_16B_X;
+                       fl_align = 16;
+               } else if (pack_align == 32) {
+                       ingpack = INGPACKBOUNDARY_64B_X;
                        fl_align = 64;
-                       fl_align_log = 6;
+               } else {
+                       unsigned int pack_align_log = fls(pack_align) - 1;
+
+                       ingpack = pack_align_log - INGPACKBOUNDARY_SHIFT_X;
+                       fl_align = pack_align;
                }
 
+               /* Use the smallest Ingress Padding which isn't smaller than
+                * the Memory Controller Read/Write Size.  We'll take that as
+                * being 8 bytes since we don't know of any system with a
+                * wider Memory Controller Bus Width.
+                */
                if (is_t5(adap->params.chip))
-                       ingpad = INGPCIEBOUNDARY_32B_X;
+                       ingpad = INGPADBOUNDARY_32B_X;
                else
-                       ingpad = T6_INGPADBOUNDARY_32B_X;
+                       ingpad = T6_INGPADBOUNDARY_8B_X;
 
                t4_set_reg_field(adap, SGE_CONTROL_A,
                                 INGPADBOUNDARY_V(INGPADBOUNDARY_M) |
@@ -6430,8 +6468,7 @@ int t4_fixup_host_params(struct adapter *adap, unsigned int page_size,
                                 EGRSTATUSPAGESIZE_V(stat_len != 64));
                t4_set_reg_field(adap, SGE_CONTROL2_A,
                                 INGPACKBOUNDARY_V(INGPACKBOUNDARY_M),
-                                INGPACKBOUNDARY_V(fl_align_log -
-                                                  INGPACKBOUNDARY_SHIFT_X));
+                                INGPACKBOUNDARY_V(ingpack));
        }
        /*
         * Adjust various SGE Free List Host Buffer Sizes.
index 36cf3073ca37d59c2cf3a129603637fc54619c13..f6558cbfc54ec3f7369699d137003cafb003f4c7 100644 (file)
 #define INGPADBOUNDARY_SHIFT_X         5
 
 #define T6_INGPADBOUNDARY_SHIFT_X      3
+#define T6_INGPADBOUNDARY_8B_X         0
 #define T6_INGPADBOUNDARY_32B_X                2
 
+#define INGPADBOUNDARY_32B_X           0
+
 /* CONTROL2 register */
 #define INGPACKBOUNDARY_SHIFT_X                5
 #define INGPACKBOUNDARY_16B_X          0
+#define INGPACKBOUNDARY_64B_X          1
 
 /* GTS register */
 #define SGE_TIMERREGS                  6
index 23d82748f52b9aa19ecd17226444f3326dfaaed4..e863ba74d005d7f255931b336825df2abadd2fc8 100644 (file)
@@ -1148,14 +1148,14 @@ static int ethoc_probe(struct platform_device *pdev)
 
        /* Allow the platform setup code to pass in a MAC address. */
        if (pdata) {
-               memcpy(netdev->dev_addr, pdata->hwaddr, IFHWADDRLEN);
+               ether_addr_copy(netdev->dev_addr, pdata->hwaddr);
                priv->phy_id = pdata->phy_id;
        } else {
                const void *mac;
 
                mac = of_get_mac_address(pdev->dev.of_node);
                if (mac)
-                       memcpy(netdev->dev_addr, mac, IFHWADDRLEN);
+                       ether_addr_copy(netdev->dev_addr, mac);
                priv->phy_id = -1;
        }
 
index 992ebe973d25bfbccff7b5c42dc1801ea41fc9ea..70165fcbff9c17af1ec009df1cae67a03bb2009f 100644 (file)
@@ -16,6 +16,7 @@
 
 #include <linux/module.h>
 #include <linux/etherdevice.h>
+#include <linux/interrupt.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
 #include <linux/of_net.h>
index 928b0df2b8e033e2b784759e32a0218e0b7e16f2..333265060de14350a818f2fafb014e0d42c565ce 100644 (file)
@@ -28,6 +28,7 @@
 #include <linux/io.h>
 #include <linux/module.h>
 #include <linux/netdevice.h>
+#include <linux/of.h>
 #include <linux/phy.h>
 #include <linux/platform_device.h>
 #include <net/ip.h>
index e2ca107f9d94f162cdab87e406c72095a6746651..9a520e4f0df9a0d47b75f71f01557414ba3d4eab 100644 (file)
@@ -137,6 +137,13 @@ MODULE_PARM_DESC(tx_timeout, "The Tx timeout in ms");
 /* L4 Type field: TCP */
 #define FM_L4_PARSE_RESULT_TCP 0x20
 
+/* FD status field indicating whether the FM Parser has attempted to validate
+ * the L4 csum of the frame.
+ * Note that having this bit set doesn't necessarily imply that the checksum
+ * is valid. One would have to check the parse results to find that out.
+ */
+#define FM_FD_STAT_L4CV         0x00000004
+
 #define DPAA_SGT_MAX_ENTRIES 16 /* maximum number of entries in SG Table */
 #define DPAA_BUFF_RELEASE_MAX 8 /* maximum number of buffers released at once */
 
@@ -235,6 +242,7 @@ static int dpaa_netdev_init(struct net_device *net_dev,
         * For conformity, we'll still declare GSO explicitly.
         */
        net_dev->features |= NETIF_F_GSO;
+       net_dev->features |= NETIF_F_RXCSUM;
 
        net_dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
        /* we do not want shared skbs on TX */
@@ -334,6 +342,45 @@ static void dpaa_get_stats64(struct net_device *net_dev,
        }
 }
 
+static int dpaa_setup_tc(struct net_device *net_dev, u32 handle, __be16 proto,
+                        struct tc_to_netdev *tc)
+{
+       struct dpaa_priv *priv = netdev_priv(net_dev);
+       u8 num_tc;
+       int i;
+
+       if (tc->type != TC_SETUP_MQPRIO)
+               return -EINVAL;
+
+       tc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+       num_tc = tc->mqprio->num_tc;
+
+       if (num_tc == priv->num_tc)
+               return 0;
+
+       if (!num_tc) {
+               netdev_reset_tc(net_dev);
+               goto out;
+       }
+
+       if (num_tc > DPAA_TC_NUM) {
+               netdev_err(net_dev, "Too many traffic classes: max %d supported.\n",
+                          DPAA_TC_NUM);
+               return -EINVAL;
+       }
+
+       netdev_set_num_tc(net_dev, num_tc);
+
+       for (i = 0; i < num_tc; i++)
+               netdev_set_tc_queue(net_dev, i, DPAA_TC_TXQ_NUM,
+                                   i * DPAA_TC_TXQ_NUM);
+
+out:
+       priv->num_tc = num_tc ? : 1;
+       netif_set_real_num_tx_queues(net_dev, priv->num_tc * DPAA_TC_TXQ_NUM);
+       return 0;
+}
+
 static struct mac_device *dpaa_mac_dev_get(struct platform_device *pdev)
 {
        struct platform_device *of_dev;
@@ -557,16 +604,18 @@ static void dpaa_bps_free(struct dpaa_priv *priv)
 
 /* Use multiple WQs for FQ assignment:
  *     - Tx Confirmation queues go to WQ1.
- *     - Rx Error and Tx Error queues go to WQ2 (giving them a better chance
- *       to be scheduled, in case there are many more FQs in WQ3).
- *     - Rx Default and Tx queues go to WQ3 (no differentiation between
- *       Rx and Tx traffic).
+ *     - Rx Error and Tx Error queues go to WQ5 (giving them a better chance
+ *       to be scheduled, in case there are many more FQs in WQ6).
+ *     - Rx Default goes to WQ6.
+ *     - Tx queues go to different WQs depending on their priority. Equal
+ *       chunks of NR_CPUS queues go to WQ6 (lowest priority), WQ2, WQ1 and
+ *       WQ0 (highest priority).
  * This ensures that Tx-confirmed buffers are timely released. In particular,
  * it avoids congestion on the Tx Confirm FQs, which can pile up PFDRs if they
  * are greatly outnumbered by other FQs in the system, while
  * dequeue scheduling is round-robin.
  */
-static inline void dpaa_assign_wq(struct dpaa_fq *fq)
+static inline void dpaa_assign_wq(struct dpaa_fq *fq, int idx)
 {
        switch (fq->fq_type) {
        case FQ_TYPE_TX_CONFIRM:
@@ -575,11 +624,33 @@ static inline void dpaa_assign_wq(struct dpaa_fq *fq)
                break;
        case FQ_TYPE_RX_ERROR:
        case FQ_TYPE_TX_ERROR:
-               fq->wq = 2;
+               fq->wq = 5;
                break;
        case FQ_TYPE_RX_DEFAULT:
+               fq->wq = 6;
+               break;
        case FQ_TYPE_TX:
-               fq->wq = 3;
+               switch (idx / DPAA_TC_TXQ_NUM) {
+               case 0:
+                       /* Low priority (best effort) */
+                       fq->wq = 6;
+                       break;
+               case 1:
+                       /* Medium priority */
+                       fq->wq = 2;
+                       break;
+               case 2:
+                       /* High priority */
+                       fq->wq = 1;
+                       break;
+               case 3:
+                       /* Very high priority */
+                       fq->wq = 0;
+                       break;
+               default:
+                       WARN(1, "Too many TX FQs: more than %d!\n",
+                            DPAA_ETH_TXQ_NUM);
+               }
                break;
        default:
                WARN(1, "Invalid FQ type %d for FQID %d!\n",
@@ -607,7 +678,7 @@ static struct dpaa_fq *dpaa_fq_alloc(struct device *dev,
        }
 
        for (i = 0; i < count; i++)
-               dpaa_assign_wq(dpaa_fq + i);
+               dpaa_assign_wq(dpaa_fq + i, i);
 
        return dpaa_fq;
 }
@@ -903,7 +974,7 @@ static int dpaa_fq_init(struct dpaa_fq *dpaa_fq, bool td_enable)
                 * Tx Confirmation FQs.
                 */
                if (dpaa_fq->fq_type == FQ_TYPE_TX_CONFIRM)
-                       initfq.fqd.fq_ctrl |= cpu_to_be16(QM_FQCTRL_HOLDACTIVE);
+                       initfq.fqd.fq_ctrl |= cpu_to_be16(QM_FQCTRL_AVOIDBLOCK);
 
                /* FQ placement */
                initfq.we_mask |= cpu_to_be16(QM_INITFQ_WE_DESTWQ);
@@ -985,7 +1056,8 @@ static int dpaa_fq_init(struct dpaa_fq *dpaa_fq, bool td_enable)
                /* Initialization common to all ingress queues */
                if (dpaa_fq->flags & QMAN_FQ_FLAG_NO_ENQUEUE) {
                        initfq.we_mask |= cpu_to_be16(QM_INITFQ_WE_CONTEXTA);
-                       initfq.fqd.fq_ctrl |= cpu_to_be16(QM_FQCTRL_HOLDACTIVE);
+                       initfq.fqd.fq_ctrl |= cpu_to_be16(QM_FQCTRL_HOLDACTIVE |
+                                               QM_FQCTRL_CTXASTASHING);
                        initfq.fqd.context_a.stashing.exclusive =
                                QM_STASHING_EXCL_DATA | QM_STASHING_EXCL_CTX |
                                QM_STASHING_EXCL_ANNOTATION;
@@ -1055,9 +1127,9 @@ static int dpaa_fq_free(struct device *dev, struct list_head *list)
        return err;
 }
 
-static void dpaa_eth_init_tx_port(struct fman_port *port, struct dpaa_fq *errq,
-                                 struct dpaa_fq *defq,
-                                 struct dpaa_buffer_layout *buf_layout)
+static int dpaa_eth_init_tx_port(struct fman_port *port, struct dpaa_fq *errq,
+                                struct dpaa_fq *defq,
+                                struct dpaa_buffer_layout *buf_layout)
 {
        struct fman_buffer_prefix_content buf_prefix_content;
        struct fman_port_params params;
@@ -1076,23 +1148,29 @@ static void dpaa_eth_init_tx_port(struct fman_port *port, struct dpaa_fq *errq,
        params.specific_params.non_rx_params.dflt_fqid = defq->fqid;
 
        err = fman_port_config(port, &params);
-       if (err)
+       if (err) {
                pr_err("%s: fman_port_config failed\n", __func__);
+               return err;
+       }
 
        err = fman_port_cfg_buf_prefix_content(port, &buf_prefix_content);
-       if (err)
+       if (err) {
                pr_err("%s: fman_port_cfg_buf_prefix_content failed\n",
                       __func__);
+               return err;
+       }
 
        err = fman_port_init(port);
        if (err)
                pr_err("%s: fm_port_init failed\n", __func__);
+
+       return err;
 }
 
-static void dpaa_eth_init_rx_port(struct fman_port *port, struct dpaa_bp **bps,
-                                 size_t count, struct dpaa_fq *errq,
-                                 struct dpaa_fq *defq,
-                                 struct dpaa_buffer_layout *buf_layout)
+static int dpaa_eth_init_rx_port(struct fman_port *port, struct dpaa_bp **bps,
+                                size_t count, struct dpaa_fq *errq,
+                                struct dpaa_fq *defq,
+                                struct dpaa_buffer_layout *buf_layout)
 {
        struct fman_buffer_prefix_content buf_prefix_content;
        struct fman_port_rx_params *rx_p;
@@ -1120,32 +1198,44 @@ static void dpaa_eth_init_rx_port(struct fman_port *port, struct dpaa_bp **bps,
        }
 
        err = fman_port_config(port, &params);
-       if (err)
+       if (err) {
                pr_err("%s: fman_port_config failed\n", __func__);
+               return err;
+       }
 
        err = fman_port_cfg_buf_prefix_content(port, &buf_prefix_content);
-       if (err)
+       if (err) {
                pr_err("%s: fman_port_cfg_buf_prefix_content failed\n",
                       __func__);
+               return err;
+       }
 
        err = fman_port_init(port);
        if (err)
                pr_err("%s: fm_port_init failed\n", __func__);
+
+       return err;
 }
 
-static void dpaa_eth_init_ports(struct mac_device *mac_dev,
-                               struct dpaa_bp **bps, size_t count,
-                               struct fm_port_fqs *port_fqs,
-                               struct dpaa_buffer_layout *buf_layout,
-                               struct device *dev)
+static int dpaa_eth_init_ports(struct mac_device *mac_dev,
+                              struct dpaa_bp **bps, size_t count,
+                              struct fm_port_fqs *port_fqs,
+                              struct dpaa_buffer_layout *buf_layout,
+                              struct device *dev)
 {
        struct fman_port *rxport = mac_dev->port[RX];
        struct fman_port *txport = mac_dev->port[TX];
+       int err;
+
+       err = dpaa_eth_init_tx_port(txport, port_fqs->tx_errq,
+                                   port_fqs->tx_defq, &buf_layout[TX]);
+       if (err)
+               return err;
+
+       err = dpaa_eth_init_rx_port(rxport, bps, count, port_fqs->rx_errq,
+                                   port_fqs->rx_defq, &buf_layout[RX]);
 
-       dpaa_eth_init_tx_port(txport, port_fqs->tx_errq,
-                             port_fqs->tx_defq, &buf_layout[TX]);
-       dpaa_eth_init_rx_port(rxport, bps, count, port_fqs->rx_errq,
-                             port_fqs->rx_defq, &buf_layout[RX]);
+       return err;
 }
 
 static int dpaa_bman_release(const struct dpaa_bp *dpaa_bp,
@@ -1526,6 +1616,23 @@ static struct sk_buff *dpaa_cleanup_tx_fd(const struct dpaa_priv *priv,
        return skb;
 }
 
+static u8 rx_csum_offload(const struct dpaa_priv *priv, const struct qm_fd *fd)
+{
+       /* The parser has run and performed L4 checksum validation.
+        * We know there were no parser errors (and implicitly no
+        * L4 csum error), otherwise we wouldn't be here.
+        */
+       if ((priv->net_dev->features & NETIF_F_RXCSUM) &&
+           (be32_to_cpu(fd->status) & FM_FD_STAT_L4CV))
+               return CHECKSUM_UNNECESSARY;
+
+       /* We're here because either the parser didn't run or the L4 checksum
+        * was not verified. This may include the case of a UDP frame with
+        * checksum zero or an L4 proto other than TCP/UDP
+        */
+       return CHECKSUM_NONE;
+}
+
 /* Build a linear skb around the received buffer.
  * We are guaranteed there is enough room at the end of the data buffer to
  * accommodate the shared info area of the skb.
@@ -1556,7 +1663,7 @@ static struct sk_buff *contig_fd_to_skb(const struct dpaa_priv *priv,
        skb_reserve(skb, fd_off);
        skb_put(skb, qm_fd_get_length(fd));
 
-       skb->ip_summed = CHECKSUM_NONE;
+       skb->ip_summed = rx_csum_offload(priv, fd);
 
        return skb;
 
@@ -1616,7 +1723,7 @@ static struct sk_buff *sg_fd_to_skb(const struct dpaa_priv *priv,
                        if (WARN_ON(unlikely(!skb)))
                                goto free_buffers;
 
-                       skb->ip_summed = CHECKSUM_NONE;
+                       skb->ip_summed = rx_csum_offload(priv, fd);
 
                        /* Make sure forwarded skbs will have enough space
                         * on Tx, if extra headers are added.
@@ -2093,7 +2200,7 @@ static enum qman_cb_dqrr_result rx_default_dqrr(struct qman_portal *portal,
        dma_addr_t addr = qm_fd_addr(fd);
        enum qm_fd_format fd_format;
        struct net_device *net_dev;
-       u32 fd_status = fd->status;
+       u32 fd_status;
        struct dpaa_bp *dpaa_bp;
        struct dpaa_priv *priv;
        unsigned int skb_len;
@@ -2350,6 +2457,7 @@ static const struct net_device_ops dpaa_ops = {
        .ndo_validate_addr = eth_validate_addr,
        .ndo_set_rx_mode = dpaa_set_rx_mode,
        .ndo_do_ioctl = dpaa_ioctl,
+       .ndo_setup_tc = dpaa_setup_tc,
 };
 
 static int dpaa_napi_add(struct net_device *net_dev)
@@ -2624,8 +2732,10 @@ static int dpaa_eth_probe(struct platform_device *pdev)
        priv->rx_headroom = dpaa_get_headroom(&priv->buf_layout[RX]);
 
        /* All real interfaces need their ports initialized */
-       dpaa_eth_init_ports(mac_dev, dpaa_bps, DPAA_BPS_NUM, &port_fqs,
-                           &priv->buf_layout[0], dev);
+       err = dpaa_eth_init_ports(mac_dev, dpaa_bps, DPAA_BPS_NUM, &port_fqs,
+                                 &priv->buf_layout[0], dev);
+       if (err)
+               goto init_ports_failed;
 
        priv->percpu_priv = devm_alloc_percpu(dev, *priv->percpu_priv);
        if (!priv->percpu_priv) {
@@ -2638,6 +2748,9 @@ static int dpaa_eth_probe(struct platform_device *pdev)
                memset(percpu_priv, 0, sizeof(*percpu_priv));
        }
 
+       priv->num_tc = 1;
+       netif_set_real_num_tx_queues(net_dev, priv->num_tc * DPAA_TC_TXQ_NUM);
+
        /* Initialize NAPI */
        err = dpaa_napi_add(net_dev);
        if (err < 0)
@@ -2658,6 +2771,7 @@ netdev_init_failed:
 napi_add_failed:
        dpaa_napi_del(net_dev);
 alloc_percpu_failed:
+init_ports_failed:
        dpaa_fq_free(dev, &priv->dpaa_fq_list);
 fq_alloc_failed:
        qman_delete_cgr_safe(&priv->ingress_cgr);
index 1f9aebf3f3c514517816ae92e379176c6861f219..9941a7866ebea43b8153c8a4150dc17f52e3afb7 100644 (file)
 #include "mac.h"
 #include "dpaa_eth_trace.h"
 
-#define DPAA_ETH_TXQ_NUM       NR_CPUS
+/* Number of prioritised traffic classes */
+#define DPAA_TC_NUM            4
+/* Number of Tx queues per traffic class */
+#define DPAA_TC_TXQ_NUM                NR_CPUS
+/* Total number of Tx queues */
+#define DPAA_ETH_TXQ_NUM       (DPAA_TC_NUM * DPAA_TC_TXQ_NUM)
 
 #define DPAA_BPS_NUM 3 /* number of bpools per interface */
 
@@ -152,6 +157,7 @@ struct dpaa_priv {
        u16 channel;
        struct list_head dpaa_fq_list;
 
+       u8 num_tc;
        u32 msg_enable; /* net_device message level */
 
        struct {
index f60845f0c6cad060b193fecdf00dd3a93127fb9c..4aefe24389695457ee307a9eb3403715091d283c 100644 (file)
@@ -59,6 +59,7 @@
 #define DMA_OFFSET             0x000C2000
 #define FPM_OFFSET             0x000C3000
 #define IMEM_OFFSET            0x000C4000
+#define HWP_OFFSET             0x000C7000
 #define CGP_OFFSET             0x000DB000
 
 /* Exceptions bit map */
 
 #define QMI_GS_HALT_NOT_BUSY           0x00000002
 
+/* HWP defines */
+#define HWP_RPIMAC_PEN                 0x00000001
+
 /* IRAM defines */
 #define IRAM_IADD_AIE                  0x80000000
 #define IRAM_READY                     0x80000000
@@ -475,6 +479,12 @@ struct fman_dma_regs {
        u32 res00e0[0x400 - 56];
 };
 
+struct fman_hwp_regs {
+       u32 res0000[0x844 / 4];         /* 0x000..0x843 */
+       u32 fmprrpimac; /* FM Parser Internal memory access control */
+       u32 res[(0x1000 - 0x848) / 4];  /* 0x848..0xFFF */
+};
+
 /* Structure that holds current FMan state.
  * Used for saving run time information.
  */
@@ -606,6 +616,7 @@ struct fman {
        struct fman_bmi_regs __iomem *bmi_regs;
        struct fman_qmi_regs __iomem *qmi_regs;
        struct fman_dma_regs __iomem *dma_regs;
+       struct fman_hwp_regs __iomem *hwp_regs;
        fman_exceptions_cb *exception_cb;
        fman_bus_error_cb *bus_error_cb;
        /* Spinlock for FMan use */
@@ -999,6 +1010,12 @@ static void qmi_init(struct fman_qmi_regs __iomem *qmi_rg,
        iowrite32be(tmp_reg, &qmi_rg->fmqm_ien);
 }
 
+static void hwp_init(struct fman_hwp_regs __iomem *hwp_rg)
+{
+       /* enable HW Parser */
+       iowrite32be(HWP_RPIMAC_PEN, &hwp_rg->fmprrpimac);
+}
+
 static int enable(struct fman *fman, struct fman_cfg *cfg)
 {
        u32 cfg_reg = 0;
@@ -1195,7 +1212,7 @@ static int fill_soc_specific_params(struct fman_state_struct *state)
                state->max_num_of_open_dmas     = 32;
                state->fm_port_num_of_cg        = 256;
                state->num_of_rx_ports  = 6;
-               state->total_fifo_size  = 122 * 1024;
+               state->total_fifo_size  = 136 * 1024;
                break;
 
        case 2:
@@ -1793,6 +1810,7 @@ static int fman_config(struct fman *fman)
        fman->bmi_regs = base_addr + BMI_OFFSET;
        fman->qmi_regs = base_addr + QMI_OFFSET;
        fman->dma_regs = base_addr + DMA_OFFSET;
+       fman->hwp_regs = base_addr + HWP_OFFSET;
        fman->base_addr = base_addr;
 
        spin_lock_init(&fman->spinlock);
@@ -2062,6 +2080,9 @@ static int fman_init(struct fman *fman)
        /* Init QMI Registers */
        qmi_init(fman->qmi_regs, fman->cfg);
 
+       /* Init HW Parser */
+       hwp_init(fman->hwp_regs);
+
        err = enable(fman, cfg);
        if (err != 0)
                return err;
index 57aae8d17d7710a9392fb91f7dee67904189d1ba..f53e1473dbccd667bebf8701a9b7c4cfac871663 100644 (file)
@@ -134,14 +134,14 @@ enum fman_exceptions {
 struct fman_prs_result {
        u8 lpid;                /* Logical port id */
        u8 shimr;               /* Shim header result  */
-       u16 l2r;                /* Layer 2 result */
-       u16 l3r;                /* Layer 3 result */
+       __be16 l2r;             /* Layer 2 result */
+       __be16 l3r;             /* Layer 3 result */
        u8 l4r;         /* Layer 4 result */
        u8 cplan;               /* Classification plan id */
-       u16 nxthdr;             /* Next Header  */
-       u16 cksum;              /* Running-sum */
+       __be16 nxthdr;          /* Next Header  */
+       __be16 cksum;           /* Running-sum */
        /* Flags&fragment-offset field of the last IP-header */
-       u16 flags_frag_off;
+       __be16 flags_frag_off;
        /* Routing type field of a IPV6 routing extension header */
        u8 route_type;
        /* Routing Extension Header Present; last bit is IP valid */
index 84ea130eed365b405655650e7999351135b533e5..98bba10fc38c1a5916108fc0ec4b1f6f136f0032 100644 (file)
@@ -381,6 +381,9 @@ static int init(struct dtsec_regs __iomem *regs, struct dtsec_cfg *cfg,
 
        /* check RGMII support */
        if (iface == PHY_INTERFACE_MODE_RGMII ||
+           iface == PHY_INTERFACE_MODE_RGMII_ID ||
+           iface == PHY_INTERFACE_MODE_RGMII_RXID ||
+           iface == PHY_INTERFACE_MODE_RGMII_TXID ||
            iface == PHY_INTERFACE_MODE_RMII)
                if (tmp & DTSEC_ID2_INT_REDUCED_OFF)
                        return -EINVAL;
@@ -390,7 +393,10 @@ static int init(struct dtsec_regs __iomem *regs, struct dtsec_cfg *cfg,
                if (tmp & DTSEC_ID2_INT_REDUCED_OFF)
                        return -EINVAL;
 
-       is_rgmii = iface == PHY_INTERFACE_MODE_RGMII;
+       is_rgmii = iface == PHY_INTERFACE_MODE_RGMII ||
+                  iface == PHY_INTERFACE_MODE_RGMII_ID ||
+                  iface == PHY_INTERFACE_MODE_RGMII_RXID ||
+                  iface == PHY_INTERFACE_MODE_RGMII_TXID;
        is_sgmii = iface == PHY_INTERFACE_MODE_SGMII;
        is_qsgmii = iface == PHY_INTERFACE_MODE_QSGMII;
 
index cd6a53eaf1614f7ffbeaadc7bbf6b28d8163e4c0..c0296880feba7f1afa505d4f9c08003e496de429 100644 (file)
@@ -443,7 +443,10 @@ static int init(struct memac_regs __iomem *regs, struct memac_cfg *cfg,
                break;
        default:
                tmp |= IF_MODE_GMII;
-               if (phy_if == PHY_INTERFACE_MODE_RGMII)
+               if (phy_if == PHY_INTERFACE_MODE_RGMII ||
+                   phy_if == PHY_INTERFACE_MODE_RGMII_ID ||
+                   phy_if == PHY_INTERFACE_MODE_RGMII_RXID ||
+                   phy_if == PHY_INTERFACE_MODE_RGMII_TXID)
                        tmp |= IF_MODE_RGMII | IF_MODE_RGMII_AUTO;
        }
        iowrite32be(tmp, &regs->if_mode);
index 173d8e0fd71668afe4292a506ef120d853f249f5..c4a66469a9074daab469f304751acc06df8f9839 100644 (file)
@@ -36,6 +36,7 @@
 #include "fman_mac.h"
 
 #include <linux/netdevice.h>
+#include <linux/phy_fixed.h>
 
 struct fman_mac *memac_config(struct fman_mac_params *params);
 int memac_set_promiscuous(struct fman_mac *memac, bool new_val);
index 9f3bb50a23651a4fd9edf51ff2d2fc29d321cae5..57bf44fa16a10ad54e1f56fa5cb5304d3039ed2c 100644 (file)
@@ -62,6 +62,7 @@
 
 #define BMI_PORT_REGS_OFFSET                           0
 #define QMI_PORT_REGS_OFFSET                           0x400
+#define HWP_PORT_REGS_OFFSET                           0x800
 
 /* Default values */
 #define DFLT_PORT_BUFFER_PREFIX_CONTEXT_DATA_ALIGN             \
 #define NIA_ENG_BMI                                    0x00500000
 #define NIA_ENG_QMI_ENQ                                        0x00540000
 #define NIA_ENG_QMI_DEQ                                        0x00580000
-
+#define NIA_ENG_HWP                                    0x00440000
 #define NIA_BMI_AC_ENQ_FRAME                           0x00000002
 #define NIA_BMI_AC_TX_RELEASE                          0x000002C0
 #define NIA_BMI_AC_RELEASE                             0x000000C0
@@ -317,6 +318,19 @@ struct fman_port_qmi_regs {
        u32 fmqm_pndcc;         /* PortID n Dequeue Confirm Counter */
 };
 
+#define HWP_HXS_COUNT 16
+#define HWP_HXS_PHE_REPORT 0x00000800
+#define HWP_HXS_PCAC_PSTAT 0x00000100
+#define HWP_HXS_PCAC_PSTOP 0x00000001
+struct fman_port_hwp_regs {
+       struct {
+               u32 ssa; /* Soft Sequence Attachment */
+               u32 lcv; /* Line-up Enable Confirmation Mask */
+       } pmda[HWP_HXS_COUNT]; /* Parse Memory Direct Access Registers */
+       u32 reserved080[(0x3f8 - 0x080) / 4]; /* (0x080-0x3f7) */
+       u32 fmpr_pcac; /* Configuration Access Control */
+};
+
 /* QMI dequeue prefetch modes */
 enum fman_port_deq_prefetch {
        FMAN_PORT_DEQ_NO_PREFETCH, /* No prefetch mode */
@@ -436,6 +450,7 @@ struct fman_port {
 
        union fman_port_bmi_regs __iomem *bmi_regs;
        struct fman_port_qmi_regs __iomem *qmi_regs;
+       struct fman_port_hwp_regs __iomem *hwp_regs;
 
        struct fman_sp_buffer_offsets buffer_offsets;
 
@@ -521,9 +536,12 @@ static int init_bmi_rx(struct fman_port *port)
        /* NIA */
        tmp = (u32)cfg->rx_fd_bits << BMI_NEXT_ENG_FD_BITS_SHIFT;
 
-       tmp |= NIA_ENG_BMI | NIA_BMI_AC_ENQ_FRAME;
+       tmp |= NIA_ENG_HWP;
        iowrite32be(tmp, &regs->fmbm_rfne);
 
+       /* Parser Next Engine NIA */
+       iowrite32be(NIA_ENG_BMI | NIA_BMI_AC_ENQ_FRAME, &regs->fmbm_rfpne);
+
        /* Enqueue NIA */
        iowrite32be(NIA_ENG_QMI_ENQ | NIA_ORDER_RESTOR, &regs->fmbm_rfene);
 
@@ -665,6 +683,50 @@ static int init_qmi(struct fman_port *port)
        return 0;
 }
 
+static void stop_port_hwp(struct fman_port *port)
+{
+       struct fman_port_hwp_regs __iomem *regs = port->hwp_regs;
+       int cnt = 100;
+
+       iowrite32be(HWP_HXS_PCAC_PSTOP, &regs->fmpr_pcac);
+
+       while (cnt-- > 0 &&
+              (ioread32be(&regs->fmpr_pcac) & HWP_HXS_PCAC_PSTAT))
+               udelay(10);
+       if (!cnt)
+               pr_err("Timeout stopping HW Parser\n");
+}
+
+static void start_port_hwp(struct fman_port *port)
+{
+       struct fman_port_hwp_regs __iomem *regs = port->hwp_regs;
+       int cnt = 100;
+
+       iowrite32be(0, &regs->fmpr_pcac);
+
+       while (cnt-- > 0 &&
+              !(ioread32be(&regs->fmpr_pcac) & HWP_HXS_PCAC_PSTAT))
+               udelay(10);
+       if (!cnt)
+               pr_err("Timeout starting HW Parser\n");
+}
+
+static void init_hwp(struct fman_port *port)
+{
+       struct fman_port_hwp_regs __iomem *regs = port->hwp_regs;
+       int i;
+
+       stop_port_hwp(port);
+
+       for (i = 0; i < HWP_HXS_COUNT; i++) {
+               /* enable HXS error reporting into FD[STATUS] PHE */
+               iowrite32be(0x00000000, &regs->pmda[i].ssa);
+               iowrite32be(0xffffffff, &regs->pmda[i].lcv);
+       }
+
+       start_port_hwp(port);
+}
+
 static int init(struct fman_port *port)
 {
        int err;
@@ -673,6 +735,8 @@ static int init(struct fman_port *port)
        switch (port->port_type) {
        case FMAN_PORT_TYPE_RX:
                err = init_bmi_rx(port);
+               if (!err)
+                       init_hwp(port);
                break;
        case FMAN_PORT_TYPE_TX:
                err = init_bmi_tx(port);
@@ -686,7 +750,8 @@ static int init(struct fman_port *port)
 
        /* Init QMI registers */
        err = init_qmi(port);
-       return err;
+       if (err)
+               return err;
 
        return 0;
 }
@@ -1247,7 +1312,7 @@ int fman_port_config(struct fman_port *port, struct fman_port_params *params)
        /* Allocate the FM driver's parameters structure */
        port->cfg = kzalloc(sizeof(*port->cfg), GFP_KERNEL);
        if (!port->cfg)
-               goto err_params;
+               return -EINVAL;
 
        /* Initialize FM port parameters which will be kept by the driver */
        port->port_type = port->dts_params.type;
@@ -1276,6 +1341,7 @@ int fman_port_config(struct fman_port *port, struct fman_port_params *params)
        /* set memory map pointers */
        port->bmi_regs = base_addr + BMI_PORT_REGS_OFFSET;
        port->qmi_regs = base_addr + QMI_PORT_REGS_OFFSET;
+       port->hwp_regs = base_addr + HWP_PORT_REGS_OFFSET;
 
        port->max_frame_length = DFLT_PORT_MAX_FRAME_LENGTH;
        /* resource distribution. */
@@ -1327,8 +1393,6 @@ int fman_port_config(struct fman_port *port, struct fman_port_params *params)
 
 err_port_cfg:
        kfree(port->cfg);
-err_params:
-       kfree(port);
        return -EINVAL;
 }
 EXPORT_SYMBOL(fman_port_config);
index db9c0bcf54cd9308cfac3533e2603f21955b99b5..1fc27c97e3b23205fe3466ddf9f5bb45f72faf09 100644 (file)
 #include <asm/irq.h>
 #include <linux/uaccess.h>
 
-#ifdef CONFIG_8xx
-#include <asm/8xx_immap.h>
-#include <asm/pgtable.h>
-#include <asm/cpm1.h>
-#endif
-
 #include "fs_enet.h"
 #include "fec.h"
 
index 96d44cf44fe09f4e3d8b5ff0270869a92315ad59..64300ac13e0253452ef885fcd786336de61929c9 100644 (file)
 #include <asm/irq.h>
 #include <linux/uaccess.h>
 
-#ifdef CONFIG_8xx
-#include <asm/8xx_immap.h>
-#include <asm/pgtable.h>
-#include <asm/cpm1.h>
-#endif
-
 #include "fs_enet.h"
 
 /*************************************************/
index b6ed818f78fffe21ee2b4c385c7c6222bc5df9f3..9d9b6e6dd9884fdb835e80043f2cdc83d5283bbf 100644 (file)
@@ -9,9 +9,9 @@
 
 #include <linux/dma-mapping.h>
 #include <linux/interrupt.h>
+#include <linux/of.h>
 #include <linux/skbuff.h>
 #include <linux/slab.h>
-
 #include "hnae.h"
 
 #define cls_to_ae_dev(dev) container_of(dev, struct hnae_ae_dev, cls_dev)
@@ -57,11 +57,15 @@ static int hnae_alloc_buffer(struct hnae_ring *ring, struct hnae_desc_cb *cb)
 
 static void hnae_free_buffer(struct hnae_ring *ring, struct hnae_desc_cb *cb)
 {
+       if (unlikely(!cb->priv))
+               return;
+
        if (cb->type == DESC_TYPE_SKB)
                dev_kfree_skb_any((struct sk_buff *)cb->priv);
        else if (unlikely(is_rx_ring(ring)))
                put_page((struct page *)cb->priv);
-       memset(cb, 0, sizeof(*cb));
+
+       cb->priv = NULL;
 }
 
 static int hnae_map_buffer(struct hnae_ring *ring, struct hnae_desc_cb *cb)
@@ -197,6 +201,7 @@ hnae_init_ring(struct hnae_queue *q, struct hnae_ring *ring, int flags)
 
        ring->q = q;
        ring->flags = flags;
+       spin_lock_init(&ring->lock);
        assert(!ring->desc && !ring->desc_cb && !ring->desc_dma_addr);
 
        /* not matter for tx or rx ring, the ntc and ntc start from 0 */
index 8016854796fb7fbe4eacd5799ccf40810b72b008..04211ac73b36a3152b6642a4c797f738076bd601 100644 (file)
@@ -67,6 +67,8 @@ do { \
 #define AE_IS_VER1(ver) ((ver) == AE_VERSION_1)
 #define AE_NAME_SIZE 16
 
+#define BD_SIZE_2048_MAX_MTU   6000
+
 /* some said the RX and TX RCB format should not be the same in the future. But
  * it is the same now...
  */
@@ -101,7 +103,6 @@ enum hnae_led_state {
 #define HNS_RX_FLAG_L4ID_TCP 0x1
 #define HNS_RX_FLAG_L4ID_SCTP 0x3
 
-
 #define HNS_TXD_ASID_S 0
 #define HNS_TXD_ASID_M (0xff << HNS_TXD_ASID_S)
 #define HNS_TXD_BUFNUM_S 8
@@ -273,6 +274,9 @@ struct hnae_ring {
        /* statistic */
        struct ring_stats stats;
 
+       /* ring lock for poll one */
+       spinlock_t lock;
+
        dma_addr_t desc_dma_addr;
        u32 buf_size;       /* size for hnae_desc->addr, preset by AE */
        u16 desc_num;       /* total number of desc */
@@ -483,11 +487,11 @@ struct hnae_ae_ops {
                              u32 auto_neg, u32 rx_en, u32 tx_en);
        void (*get_coalesce_usecs)(struct hnae_handle *handle,
                                   u32 *tx_usecs, u32 *rx_usecs);
-       void (*get_rx_max_coalesced_frames)(struct hnae_handle *handle,
-                                           u32 *tx_frames, u32 *rx_frames);
+       void (*get_max_coalesced_frames)(struct hnae_handle *handle,
+                                        u32 *tx_frames, u32 *rx_frames);
        int (*set_coalesce_usecs)(struct hnae_handle *handle, u32 timeout);
        int (*set_coalesce_frames)(struct hnae_handle *handle,
-                                  u32 coalesce_frames);
+                                  u32 tx_frames, u32 rx_frames);
        void (*get_coalesce_range)(struct hnae_handle *handle,
                                   u32 *tx_frames_low, u32 *rx_frames_low,
                                   u32 *tx_frames_high, u32 *rx_frames_high,
@@ -646,6 +650,41 @@ static inline void hnae_reuse_buffer(struct hnae_ring *ring, int i)
        ring->desc[i].rx.ipoff_bnum_pid_flag = 0;
 }
 
+/* when reinit buffer size, we should reinit buffer description */
+static inline void hnae_reinit_all_ring_desc(struct hnae_handle *h)
+{
+       int i, j;
+       struct hnae_ring *ring;
+
+       for (i = 0; i < h->q_num; i++) {
+               ring = &h->qs[i]->rx_ring;
+               for (j = 0; j < ring->desc_num; j++)
+                       ring->desc[j].addr = cpu_to_le64(ring->desc_cb[j].dma);
+       }
+
+       wmb();  /* commit all data before submit */
+}
+
+/* when reinit buffer size, we should reinit page offset */
+static inline void hnae_reinit_all_ring_page_off(struct hnae_handle *h)
+{
+       int i, j;
+       struct hnae_ring *ring;
+
+       for (i = 0; i < h->q_num; i++) {
+               ring = &h->qs[i]->rx_ring;
+               for (j = 0; j < ring->desc_num; j++) {
+                       ring->desc_cb[j].page_offset = 0;
+                       if (ring->desc[j].addr !=
+                           cpu_to_le64(ring->desc_cb[j].dma))
+                               ring->desc[j].addr =
+                                       cpu_to_le64(ring->desc_cb[j].dma);
+               }
+       }
+
+       wmb();  /* commit all data before submit */
+}
+
 #define hnae_set_field(origin, mask, shift, val) \
        do { \
                (origin) &= (~(mask)); \
index 0a9cdf00b31afa9608414a4ad3de3089e4f61d04..ff864a187d5a71fa277f2907659621b9f87ffdcf 100644 (file)
@@ -267,8 +267,32 @@ static int hns_ae_clr_multicast(struct hnae_handle *handle)
 static int hns_ae_set_mtu(struct hnae_handle *handle, int new_mtu)
 {
        struct hns_mac_cb *mac_cb = hns_get_mac_cb(handle);
+       struct hnae_queue *q;
+       u32 rx_buf_size;
+       int i, ret;
+
+       /* when buf_size is 2048, max mtu is 6K for rx ring max bd num is 3. */
+       if (!AE_IS_VER1(mac_cb->dsaf_dev->dsaf_ver)) {
+               if (new_mtu <= BD_SIZE_2048_MAX_MTU)
+                       rx_buf_size = 2048;
+               else
+                       rx_buf_size = 4096;
+       } else {
+               rx_buf_size = mac_cb->dsaf_dev->buf_size;
+       }
+
+       ret = hns_mac_set_mtu(mac_cb, new_mtu, rx_buf_size);
 
-       return hns_mac_set_mtu(mac_cb, new_mtu);
+       if (!ret) {
+               /* reinit ring buf_size */
+               for (i = 0; i < handle->q_num; i++) {
+                       q = handle->qs[i];
+                       q->rx_ring.buf_size = rx_buf_size;
+                       hns_rcb_set_rx_ring_bs(q, rx_buf_size);
+               }
+       }
+
+       return ret;
 }
 
 static void hns_ae_set_tso_stats(struct hnae_handle *handle, int enable)
@@ -463,15 +487,21 @@ static void hns_ae_get_coalesce_usecs(struct hnae_handle *handle,
                                               ring_pair->port_id_in_comm);
 }
 
-static void hns_ae_get_rx_max_coalesced_frames(struct hnae_handle *handle,
-                                              u32 *tx_frames, u32 *rx_frames)
+static void hns_ae_get_max_coalesced_frames(struct hnae_handle *handle,
+                                           u32 *tx_frames, u32 *rx_frames)
 {
        struct ring_pair_cb *ring_pair =
                container_of(handle->qs[0], struct ring_pair_cb, q);
+       struct dsaf_device *dsaf_dev = hns_ae_get_dsaf_dev(handle->dev);
 
-       *tx_frames = hns_rcb_get_coalesced_frames(ring_pair->rcb_common,
-                                                 ring_pair->port_id_in_comm);
-       *rx_frames = hns_rcb_get_coalesced_frames(ring_pair->rcb_common,
+       if (AE_IS_VER1(dsaf_dev->dsaf_ver) ||
+           handle->port_type == HNAE_PORT_DEBUG)
+               *tx_frames = hns_rcb_get_rx_coalesced_frames(
+                       ring_pair->rcb_common, ring_pair->port_id_in_comm);
+       else
+               *tx_frames = hns_rcb_get_tx_coalesced_frames(
+                       ring_pair->rcb_common, ring_pair->port_id_in_comm);
+       *rx_frames = hns_rcb_get_rx_coalesced_frames(ring_pair->rcb_common,
                                                  ring_pair->port_id_in_comm);
 }
 
@@ -485,15 +515,34 @@ static int hns_ae_set_coalesce_usecs(struct hnae_handle *handle,
                ring_pair->rcb_common, ring_pair->port_id_in_comm, timeout);
 }
 
-static int  hns_ae_set_coalesce_frames(struct hnae_handle *handle,
-                                      u32 coalesce_frames)
+static int hns_ae_set_coalesce_frames(struct hnae_handle *handle,
+                                     u32 tx_frames, u32 rx_frames)
 {
+       int ret;
        struct ring_pair_cb *ring_pair =
                container_of(handle->qs[0], struct ring_pair_cb, q);
+       struct dsaf_device *dsaf_dev = hns_ae_get_dsaf_dev(handle->dev);
 
-       return hns_rcb_set_coalesced_frames(
-               ring_pair->rcb_common,
-               ring_pair->port_id_in_comm, coalesce_frames);
+       if (AE_IS_VER1(dsaf_dev->dsaf_ver) ||
+           handle->port_type == HNAE_PORT_DEBUG) {
+               if (tx_frames != rx_frames)
+                       return -EINVAL;
+               return hns_rcb_set_rx_coalesced_frames(
+                       ring_pair->rcb_common,
+                       ring_pair->port_id_in_comm, rx_frames);
+       } else {
+               if (tx_frames != 1)
+                       return -EINVAL;
+               ret = hns_rcb_set_tx_coalesced_frames(
+                       ring_pair->rcb_common,
+                       ring_pair->port_id_in_comm, tx_frames);
+               if (ret)
+                       return ret;
+
+               return hns_rcb_set_rx_coalesced_frames(
+                       ring_pair->rcb_common,
+                       ring_pair->port_id_in_comm, rx_frames);
+       }
 }
 
 static void hns_ae_get_coalesce_range(struct hnae_handle *handle,
@@ -504,20 +553,27 @@ static void hns_ae_get_coalesce_range(struct hnae_handle *handle,
 {
        struct dsaf_device *dsaf_dev;
 
+       assert(handle);
+
        dsaf_dev = hns_ae_get_dsaf_dev(handle->dev);
 
-       *tx_frames_low  = HNS_RCB_MIN_COALESCED_FRAMES;
-       *rx_frames_low  = HNS_RCB_MIN_COALESCED_FRAMES;
-       *tx_frames_high =
-               (dsaf_dev->desc_num - 1 > HNS_RCB_MAX_COALESCED_FRAMES) ?
-               HNS_RCB_MAX_COALESCED_FRAMES : dsaf_dev->desc_num - 1;
-       *rx_frames_high =
-               (dsaf_dev->desc_num - 1 > HNS_RCB_MAX_COALESCED_FRAMES) ?
-                HNS_RCB_MAX_COALESCED_FRAMES : dsaf_dev->desc_num - 1;
-       *tx_usecs_low   = 0;
-       *rx_usecs_low   = 0;
-       *tx_usecs_high  = HNS_RCB_MAX_COALESCED_USECS;
-       *rx_usecs_high  = HNS_RCB_MAX_COALESCED_USECS;
+       *tx_frames_low  = HNS_RCB_TX_FRAMES_LOW;
+       *rx_frames_low  = HNS_RCB_RX_FRAMES_LOW;
+
+       if (AE_IS_VER1(dsaf_dev->dsaf_ver) ||
+           handle->port_type == HNAE_PORT_DEBUG)
+               *tx_frames_high =
+                       (dsaf_dev->desc_num - 1 > HNS_RCB_TX_FRAMES_HIGH) ?
+                       HNS_RCB_TX_FRAMES_HIGH : dsaf_dev->desc_num - 1;
+       else
+               *tx_frames_high = 1;
+
+       *rx_frames_high = (dsaf_dev->desc_num - 1 > HNS_RCB_RX_FRAMES_HIGH) ?
+               HNS_RCB_RX_FRAMES_HIGH : dsaf_dev->desc_num - 1;
+       *tx_usecs_low   = HNS_RCB_TX_USECS_LOW;
+       *rx_usecs_low   = HNS_RCB_RX_USECS_LOW;
+       *tx_usecs_high  = HNS_RCB_TX_USECS_HIGH;
+       *rx_usecs_high  = HNS_RCB_RX_USECS_HIGH;
 }
 
 void hns_ae_update_stats(struct hnae_handle *handle,
@@ -802,8 +858,9 @@ static int hns_ae_get_rss(struct hnae_handle *handle, u32 *indir, u8 *key,
                memcpy(key, ppe_cb->rss_key, HNS_PPEV2_RSS_KEY_SIZE);
 
        /* update the current hash->queue mappings from the shadow RSS table */
-       memcpy(indir, ppe_cb->rss_indir_table,
-              HNS_PPEV2_RSS_IND_TBL_SIZE * sizeof(*indir));
+       if (indir)
+               memcpy(indir, ppe_cb->rss_indir_table,
+                      HNS_PPEV2_RSS_IND_TBL_SIZE  * sizeof(*indir));
 
        return 0;
 }
@@ -814,15 +871,19 @@ static int hns_ae_set_rss(struct hnae_handle *handle, const u32 *indir,
        struct hns_ppe_cb *ppe_cb = hns_get_ppe_cb(handle);
 
        /* set the RSS Hash Key if specififed by the user */
-       if (key)
-               hns_ppe_set_rss_key(ppe_cb, (u32 *)key);
+       if (key) {
+               memcpy(ppe_cb->rss_key, key, HNS_PPEV2_RSS_KEY_SIZE);
+               hns_ppe_set_rss_key(ppe_cb, ppe_cb->rss_key);
+       }
 
-       /* update the shadow RSS table with user specified qids */
-       memcpy(ppe_cb->rss_indir_table, indir,
-              HNS_PPEV2_RSS_IND_TBL_SIZE * sizeof(*indir));
+       if (indir) {
+               /* update the shadow RSS table with user specified qids */
+               memcpy(ppe_cb->rss_indir_table, indir,
+                      HNS_PPEV2_RSS_IND_TBL_SIZE  * sizeof(*indir));
 
-       /* now update the hardware */
-       hns_ppe_set_indir_table(ppe_cb, ppe_cb->rss_indir_table);
+               /* now update the hardware */
+               hns_ppe_set_indir_table(ppe_cb, ppe_cb->rss_indir_table);
+       }
 
        return 0;
 }
@@ -846,7 +907,7 @@ static struct hnae_ae_ops hns_dsaf_ops = {
        .get_autoneg = hns_ae_get_autoneg,
        .set_pauseparam = hns_ae_set_pauseparam,
        .get_coalesce_usecs = hns_ae_get_coalesce_usecs,
-       .get_rx_max_coalesced_frames = hns_ae_get_rx_max_coalesced_frames,
+       .get_max_coalesced_frames = hns_ae_get_max_coalesced_frames,
        .set_coalesce_usecs = hns_ae_set_coalesce_usecs,
        .set_coalesce_frames = hns_ae_set_coalesce_frames,
        .get_coalesce_range = hns_ae_get_coalesce_range,
index 3382441fe7b51e84bb5e815ffc1e4fa192b09a91..74bd260ca02a887869a507f8746dfc928522d4be 100644 (file)
@@ -86,12 +86,11 @@ static void hns_gmac_disable(void *mac_drv, enum mac_commom_mode mode)
                dsaf_set_dev_bit(drv, GMAC_PORT_EN_REG, GMAC_PORT_RX_EN_B, 0);
 }
 
-/**
-*hns_gmac_get_en - get port enable
-*@mac_drv:mac device
-*@rx:rx enable
-*@tx:tx enable
-*/
+/* hns_gmac_get_en - get port enable
+ * @mac_drv:mac device
+ * @rx:rx enable
+ * @tx:tx enable
+ */
 static void hns_gmac_get_en(void *mac_drv, u32 *rx, u32 *tx)
 {
        struct mac_driver *drv = (struct mac_driver *)mac_drv;
@@ -148,6 +147,17 @@ static void hns_gmac_config_max_frame_length(void *mac_drv, u16 newval)
                           GMAC_MAX_FRM_SIZE_S, newval);
 }
 
+static void hns_gmac_config_pad_and_crc(void *mac_drv, u8 newval)
+{
+       u32 tx_ctrl;
+       struct mac_driver *drv = (struct mac_driver *)mac_drv;
+
+       tx_ctrl = dsaf_read_dev(drv, GMAC_TRANSMIT_CONTROL_REG);
+       dsaf_set_bit(tx_ctrl, GMAC_TX_PAD_EN_B, !!newval);
+       dsaf_set_bit(tx_ctrl, GMAC_TX_CRC_ADD_B, !!newval);
+       dsaf_write_dev(drv, GMAC_TRANSMIT_CONTROL_REG, tx_ctrl);
+}
+
 static void hns_gmac_config_an_mode(void *mac_drv, u8 newval)
 {
        struct mac_driver *drv = (struct mac_driver *)mac_drv;
@@ -250,7 +260,6 @@ static void hns_gmac_get_pausefrm_cfg(void *mac_drv, u32 *rx_pause_en,
 static int hns_gmac_adjust_link(void *mac_drv, enum mac_speed speed,
                                u32 full_duplex)
 {
-       u32 tx_ctrl;
        struct mac_driver *drv = (struct mac_driver *)mac_drv;
 
        dsaf_set_dev_bit(drv, GMAC_DUPLEX_TYPE_REG,
@@ -279,14 +288,6 @@ static int hns_gmac_adjust_link(void *mac_drv, enum mac_speed speed,
                return -EINVAL;
        }
 
-       tx_ctrl = dsaf_read_dev(drv, GMAC_TRANSMIT_CONTROL_REG);
-       dsaf_set_bit(tx_ctrl, GMAC_TX_PAD_EN_B, 1);
-       dsaf_set_bit(tx_ctrl, GMAC_TX_CRC_ADD_B, 1);
-       dsaf_write_dev(drv, GMAC_TRANSMIT_CONTROL_REG, tx_ctrl);
-
-       dsaf_set_dev_bit(drv, GMAC_MODE_CHANGE_EN_REG,
-                        GMAC_MODE_CHANGE_EB_B, 1);
-
        return 0;
 }
 
@@ -325,6 +326,17 @@ static void hns_gmac_init(void *mac_drv)
        hns_gmac_tx_loop_pkt_dis(mac_drv);
        if (drv->mac_cb->mac_type == HNAE_PORT_DEBUG)
                hns_gmac_set_uc_match(mac_drv, 0);
+
+       hns_gmac_config_pad_and_crc(mac_drv, 1);
+
+       dsaf_set_dev_bit(drv, GMAC_MODE_CHANGE_EN_REG,
+                        GMAC_MODE_CHANGE_EB_B, 1);
+
+       /* reduce gmac tx water line to avoid gmac hang-up
+        * in speed 100M and duplex half.
+        */
+       dsaf_set_dev_field(drv, GMAC_TX_WATER_LINE_REG, GMAC_TX_WATER_LINE_MASK,
+                          GMAC_TX_WATER_LINE_SHIFT, 8);
 }
 
 void hns_gmac_update_stats(void *mac_drv)
@@ -453,24 +465,6 @@ static int hns_gmac_config_loopback(void *mac_drv, enum hnae_loop loop_mode,
        return 0;
 }
 
-static void hns_gmac_config_pad_and_crc(void *mac_drv, u8 newval)
-{
-       u32 tx_ctrl;
-       struct mac_driver *drv = (struct mac_driver *)mac_drv;
-
-       tx_ctrl = dsaf_read_dev(drv, GMAC_TRANSMIT_CONTROL_REG);
-       dsaf_set_bit(tx_ctrl, GMAC_TX_PAD_EN_B, !!newval);
-       dsaf_set_bit(tx_ctrl, GMAC_TX_CRC_ADD_B, !!newval);
-       dsaf_write_dev(drv, GMAC_TRANSMIT_CONTROL_REG, tx_ctrl);
-}
-
-static void hns_gmac_get_id(void *mac_drv, u8 *mac_id)
-{
-       struct mac_driver *drv = (struct mac_driver *)mac_drv;
-
-       *mac_id = drv->mac_id;
-}
-
 static void hns_gmac_get_info(void *mac_drv, struct mac_info *mac_info)
 {
        enum hns_gmac_duplex_mdoe duplex;
@@ -712,7 +706,6 @@ void *hns_gmac_config(struct hns_mac_cb *mac_cb, struct mac_params *mac_param)
        mac_drv->config_pad_and_crc = hns_gmac_config_pad_and_crc;
        mac_drv->config_half_duplex = hns_gmac_set_duplex_type;
        mac_drv->set_rx_ignore_pause_frames = hns_gmac_set_rx_auto_pause_frames;
-       mac_drv->mac_get_id = hns_gmac_get_id;
        mac_drv->get_info = hns_gmac_get_info;
        mac_drv->autoneg_stat = hns_gmac_autoneg_stat;
        mac_drv->get_pause_enable = hns_gmac_get_pausefrm_cfg;
index 3239d27143b935dc0056490b32f700093163c74a..696f2ae8b075193dd11ef60d11610189ca0fe28a 100644 (file)
@@ -332,44 +332,6 @@ int hns_mac_set_multi(struct hns_mac_cb *mac_cb,
        return 0;
 }
 
-/**
- *hns_mac_del_mac - delete mac address into dsaf table,can't delete the same
- *                  address twice
- *@net_dev: net device
- *@vfn :   vf lan
- *@mac : mac address
- *return status
- */
-int hns_mac_del_mac(struct hns_mac_cb *mac_cb, u32 vfn, char *mac)
-{
-       struct mac_entry_idx *old_mac;
-       struct dsaf_device *dsaf_dev;
-       u32 ret;
-
-       dsaf_dev = mac_cb->dsaf_dev;
-
-       if (vfn < DSAF_MAX_VM_NUM) {
-               old_mac = &mac_cb->addr_entry_idx[vfn];
-       } else {
-               dev_err(mac_cb->dev,
-                       "vf queue is too large, %s mac%d queue = %#x!\n",
-                       mac_cb->dsaf_dev->ae_dev.name, mac_cb->mac_id, vfn);
-               return -EINVAL;
-       }
-
-       if (dsaf_dev) {
-               ret = hns_dsaf_del_mac_entry(dsaf_dev, old_mac->vlan_id,
-                                            mac_cb->mac_id, old_mac->addr);
-               if (ret)
-                       return ret;
-
-               if (memcmp(old_mac->addr, mac, sizeof(old_mac->addr)) == 0)
-                       old_mac->valid = 0;
-       }
-
-       return 0;
-}
-
 int hns_mac_clr_multicast(struct hns_mac_cb *mac_cb, int vfn)
 {
        struct dsaf_device *dsaf_dev = mac_cb->dsaf_dev;
@@ -491,10 +453,9 @@ void hns_mac_reset(struct hns_mac_cb *mac_cb)
        }
 }
 
-int hns_mac_set_mtu(struct hns_mac_cb *mac_cb, u32 new_mtu)
+int hns_mac_set_mtu(struct hns_mac_cb *mac_cb, u32 new_mtu, u32 buf_size)
 {
        struct mac_driver *drv = hns_mac_get_drv(mac_cb);
-       u32 buf_size = mac_cb->dsaf_dev->buf_size;
        u32 new_frm = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
        u32 max_frm = AE_IS_VER1(mac_cb->dsaf_dev->dsaf_ver) ?
                        MAC_MAX_MTU : MAC_MAX_MTU_V2;
index 2bb3d1e93c64a315c92f0e493573add3e5f1e023..24dfba53a0f216c5ce55850b4a5703a1a02772e5 100644 (file)
@@ -373,8 +373,6 @@ struct mac_driver {
        void (*set_rx_ignore_pause_frames)(void *mac_drv, u32 enable);
        /* config rx mode for promiscuous*/
        void (*set_promiscuous)(void *mac_drv, u8 enable);
-       /* get mac id */
-       void (*mac_get_id)(void *mac_drv, u8 *mac_id);
        void (*mac_pausefrm_cfg)(void *mac_drv, u32 rx_en, u32 tx_en);
 
        void (*autoneg_stat)(void *mac_drv, u32 *enable);
@@ -436,7 +434,6 @@ int hns_mac_set_multi(struct hns_mac_cb *mac_cb,
 int hns_mac_vm_config_bc_en(struct hns_mac_cb *mac_cb, u32 vm, bool enable);
 void hns_mac_start(struct hns_mac_cb *mac_cb);
 void hns_mac_stop(struct hns_mac_cb *mac_cb);
-int hns_mac_del_mac(struct hns_mac_cb *mac_cb, u32 vfn, char *mac);
 void hns_mac_uninit(struct dsaf_device *dsaf_dev);
 void hns_mac_adjust_link(struct hns_mac_cb *mac_cb, int speed, int duplex);
 void hns_mac_reset(struct hns_mac_cb *mac_cb);
@@ -444,7 +441,7 @@ void hns_mac_get_autoneg(struct hns_mac_cb *mac_cb, u32 *auto_neg);
 void hns_mac_get_pauseparam(struct hns_mac_cb *mac_cb, u32 *rx_en, u32 *tx_en);
 int hns_mac_set_autoneg(struct hns_mac_cb *mac_cb, u8 enable);
 int hns_mac_set_pauseparam(struct hns_mac_cb *mac_cb, u32 rx_en, u32 tx_en);
-int hns_mac_set_mtu(struct hns_mac_cb *mac_cb, u32 new_mtu);
+int hns_mac_set_mtu(struct hns_mac_cb *mac_cb, u32 new_mtu, u32 buf_size);
 int hns_mac_get_port_info(struct hns_mac_cb *mac_cb,
                          u8 *auto_neg, u16 *speed, u8 *duplex);
 int hns_mac_config_mac_loopback(struct hns_mac_cb *mac_cb,
index 90dbda7926144a41120d18c28a2c7d033f245f8c..d07b4fe45a4499c2f35c1131d1dbf428973536dd 100644 (file)
@@ -510,10 +510,10 @@ static void hns_dsafv2_sbm_bp_wl_cfg(struct dsaf_device *dsaf_dev)
                o_sbm_bp_cfg = dsaf_read_dev(dsaf_dev, reg);
                dsaf_set_field(o_sbm_bp_cfg,
                               DSAFV2_SBM_CFG3_SET_BUF_NUM_NO_PFC_M,
-                              DSAFV2_SBM_CFG3_SET_BUF_NUM_NO_PFC_S, 48);
+                              DSAFV2_SBM_CFG3_SET_BUF_NUM_NO_PFC_S, 55);
                dsaf_set_field(o_sbm_bp_cfg,
                               DSAFV2_SBM_CFG3_RESET_BUF_NUM_NO_PFC_M,
-                              DSAFV2_SBM_CFG3_RESET_BUF_NUM_NO_PFC_S, 80);
+                              DSAFV2_SBM_CFG3_RESET_BUF_NUM_NO_PFC_S, 110);
                dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg);
 
                /* for no enable pfc mode */
@@ -521,10 +521,10 @@ static void hns_dsafv2_sbm_bp_wl_cfg(struct dsaf_device *dsaf_dev)
                o_sbm_bp_cfg = dsaf_read_dev(dsaf_dev, reg);
                dsaf_set_field(o_sbm_bp_cfg,
                               DSAFV2_SBM_CFG4_SET_BUF_NUM_NO_PFC_M,
-                              DSAFV2_SBM_CFG4_SET_BUF_NUM_NO_PFC_S, 192);
+                              DSAFV2_SBM_CFG4_SET_BUF_NUM_NO_PFC_S, 128);
                dsaf_set_field(o_sbm_bp_cfg,
                               DSAFV2_SBM_CFG4_RESET_BUF_NUM_NO_PFC_M,
-                              DSAFV2_SBM_CFG4_RESET_BUF_NUM_NO_PFC_S, 240);
+                              DSAFV2_SBM_CFG4_RESET_BUF_NUM_NO_PFC_S, 192);
                dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg);
        }
 
@@ -1647,87 +1647,6 @@ int hns_dsaf_rm_mac_addr(
                                      mac_entry->addr);
 }
 
-/**
- * hns_dsaf_set_mac_mc_entry - set mac mc-entry
- * @dsaf_dev: dsa fabric device struct pointer
- * @mac_entry: mc-mac entry
- */
-int hns_dsaf_set_mac_mc_entry(
-       struct dsaf_device *dsaf_dev,
-       struct dsaf_drv_mac_multi_dest_entry *mac_entry)
-{
-       u16 entry_index = DSAF_INVALID_ENTRY_IDX;
-       struct dsaf_drv_tbl_tcam_key mac_key;
-       struct dsaf_tbl_tcam_mcast_cfg mac_data;
-       struct dsaf_drv_priv *priv =
-           (struct dsaf_drv_priv *)hns_dsaf_dev_priv(dsaf_dev);
-       struct dsaf_drv_soft_mac_tbl *soft_mac_entry = priv->soft_mac_tbl;
-       struct dsaf_drv_tbl_tcam_key tmp_mac_key;
-       struct dsaf_tbl_tcam_data tcam_data;
-
-       /* mac addr check */
-       if (MAC_IS_ALL_ZEROS(mac_entry->addr)) {
-               dev_err(dsaf_dev->dev, "set uc %s Mac %pM err!\n",
-                       dsaf_dev->ae_dev.name, mac_entry->addr);
-               return -EINVAL;
-       }
-
-       /*config key */
-       hns_dsaf_set_mac_key(dsaf_dev, &mac_key,
-                            mac_entry->in_vlan_id,
-                            mac_entry->in_port_num, mac_entry->addr);
-
-       /* entry ie exist? */
-       entry_index = hns_dsaf_find_soft_mac_entry(dsaf_dev, &mac_key);
-       if (entry_index == DSAF_INVALID_ENTRY_IDX) {
-               /*if hasnot, find enpty entry*/
-               entry_index = hns_dsaf_find_empty_mac_entry(dsaf_dev);
-               if (entry_index == DSAF_INVALID_ENTRY_IDX) {
-                       /*if hasnot empty, error*/
-                       dev_err(dsaf_dev->dev,
-                               "set_uc_entry failed, %s Mac key(%#x:%#x)\n",
-                               dsaf_dev->ae_dev.name,
-                               mac_key.high.val, mac_key.low.val);
-                       return -EINVAL;
-               }
-
-               /* config hardware entry */
-               memset(mac_data.tbl_mcast_port_msk,
-                      0, sizeof(mac_data.tbl_mcast_port_msk));
-       } else {
-               /* config hardware entry */
-               hns_dsaf_tcam_mc_get(dsaf_dev, entry_index, &tcam_data,
-                                    &mac_data);
-
-               tmp_mac_key.high.val =
-                       le32_to_cpu(tcam_data.tbl_tcam_data_high);
-               tmp_mac_key.low.val = le32_to_cpu(tcam_data.tbl_tcam_data_low);
-       }
-       mac_data.tbl_mcast_old_en = 0;
-       mac_data.tbl_mcast_item_vld = 1;
-       dsaf_set_field(mac_data.tbl_mcast_port_msk[0],
-                      0x3F, 0, mac_entry->port_mask[0]);
-
-       dev_dbg(dsaf_dev->dev,
-               "set_uc_entry, %s key(%#x:%#x) entry_index%d\n",
-               dsaf_dev->ae_dev.name, mac_key.high.val,
-               mac_key.low.val, entry_index);
-
-       tcam_data.tbl_tcam_data_high = cpu_to_le32(mac_key.high.val);
-       tcam_data.tbl_tcam_data_low = cpu_to_le32(mac_key.low.val);
-
-       hns_dsaf_tcam_mc_cfg(dsaf_dev, entry_index, &tcam_data, NULL,
-                            &mac_data);
-
-       /* config software entry */
-       soft_mac_entry += entry_index;
-       soft_mac_entry->index = entry_index;
-       soft_mac_entry->tcam_key.high.val = mac_key.high.val;
-       soft_mac_entry->tcam_key.low.val = mac_key.low.val;
-
-       return 0;
-}
-
 static void hns_dsaf_mc_mask_bit_clear(char *dst, const char *src)
 {
        u16 *a = (u16 *)dst;
@@ -2089,166 +2008,6 @@ int hns_dsaf_clr_mac_mc_port(struct dsaf_device *dsaf_dev, u8 mac_id,
        return ret;
 }
 
-/**
- * hns_dsaf_get_mac_uc_entry - get mac uc entry
- * @dsaf_dev: dsa fabric device struct pointer
- * @mac_entry: mac entry
- */
-int hns_dsaf_get_mac_uc_entry(struct dsaf_device *dsaf_dev,
-                             struct dsaf_drv_mac_single_dest_entry *mac_entry)
-{
-       u16 entry_index = DSAF_INVALID_ENTRY_IDX;
-       struct dsaf_drv_tbl_tcam_key mac_key;
-
-       struct dsaf_tbl_tcam_ucast_cfg mac_data;
-       struct dsaf_tbl_tcam_data tcam_data;
-
-       /* check macaddr */
-       if (MAC_IS_ALL_ZEROS(mac_entry->addr) ||
-           MAC_IS_BROADCAST(mac_entry->addr)) {
-               dev_err(dsaf_dev->dev, "get_entry failed,addr %pM\n",
-                       mac_entry->addr);
-               return -EINVAL;
-       }
-
-       /*config key */
-       hns_dsaf_set_mac_key(dsaf_dev, &mac_key, mac_entry->in_vlan_id,
-                            mac_entry->in_port_num, mac_entry->addr);
-
-       /*check exist? */
-       entry_index = hns_dsaf_find_soft_mac_entry(dsaf_dev, &mac_key);
-       if (entry_index == DSAF_INVALID_ENTRY_IDX) {
-               /*find none, error */
-               dev_err(dsaf_dev->dev,
-                       "get_uc_entry failed, %s Mac key(%#x:%#x)\n",
-                       dsaf_dev->ae_dev.name,
-                       mac_key.high.val, mac_key.low.val);
-               return -EINVAL;
-       }
-       dev_dbg(dsaf_dev->dev,
-               "get_uc_entry, %s Mac key(%#x:%#x) entry_index%d\n",
-               dsaf_dev->ae_dev.name, mac_key.high.val,
-               mac_key.low.val, entry_index);
-
-       /* read entry */
-       hns_dsaf_tcam_uc_get(dsaf_dev, entry_index, &tcam_data, &mac_data);
-
-       mac_key.high.val = le32_to_cpu(tcam_data.tbl_tcam_data_high);
-       mac_key.low.val = le32_to_cpu(tcam_data.tbl_tcam_data_low);
-
-       mac_entry->port_num = mac_data.tbl_ucast_out_port;
-
-       return 0;
-}
-
-/**
- * hns_dsaf_get_mac_mc_entry - get mac mc entry
- * @dsaf_dev: dsa fabric device struct pointer
- * @mac_entry: mac entry
- */
-int hns_dsaf_get_mac_mc_entry(struct dsaf_device *dsaf_dev,
-                             struct dsaf_drv_mac_multi_dest_entry *mac_entry)
-{
-       u16 entry_index = DSAF_INVALID_ENTRY_IDX;
-       struct dsaf_drv_tbl_tcam_key mac_key;
-
-       struct dsaf_tbl_tcam_mcast_cfg mac_data;
-       struct dsaf_tbl_tcam_data tcam_data;
-
-       /*check mac addr */
-       if (MAC_IS_ALL_ZEROS(mac_entry->addr) ||
-           MAC_IS_BROADCAST(mac_entry->addr)) {
-               dev_err(dsaf_dev->dev, "get_entry failed,addr %pM\n",
-                       mac_entry->addr);
-               return -EINVAL;
-       }
-
-       /*config key */
-       hns_dsaf_set_mac_key(dsaf_dev, &mac_key, mac_entry->in_vlan_id,
-                            mac_entry->in_port_num, mac_entry->addr);
-
-       /*check exist? */
-       entry_index = hns_dsaf_find_soft_mac_entry(dsaf_dev, &mac_key);
-       if (entry_index == DSAF_INVALID_ENTRY_IDX) {
-               /* find none, error */
-               dev_err(dsaf_dev->dev,
-                       "get_mac_uc_entry failed, %s Mac key(%#x:%#x)\n",
-                       dsaf_dev->ae_dev.name, mac_key.high.val,
-                       mac_key.low.val);
-               return -EINVAL;
-       }
-       dev_dbg(dsaf_dev->dev,
-               "get_mac_uc_entry, %s Mac key(%#x:%#x) entry_index%d\n",
-               dsaf_dev->ae_dev.name, mac_key.high.val,
-               mac_key.low.val, entry_index);
-
-       /*read entry */
-       hns_dsaf_tcam_mc_get(dsaf_dev, entry_index, &tcam_data, &mac_data);
-
-       mac_key.high.val = le32_to_cpu(tcam_data.tbl_tcam_data_high);
-       mac_key.low.val = le32_to_cpu(tcam_data.tbl_tcam_data_low);
-
-       mac_entry->port_mask[0] = mac_data.tbl_mcast_port_msk[0] & 0x3F;
-       return 0;
-}
-
-/**
- * hns_dsaf_get_mac_entry_by_index - get mac entry by tab index
- * @dsaf_dev: dsa fabric device struct pointer
- * @entry_index: tab entry index
- * @mac_entry: mac entry
- */
-int hns_dsaf_get_mac_entry_by_index(
-       struct dsaf_device *dsaf_dev,
-       u16 entry_index, struct dsaf_drv_mac_multi_dest_entry *mac_entry)
-{
-       struct dsaf_drv_tbl_tcam_key mac_key;
-
-       struct dsaf_tbl_tcam_mcast_cfg mac_data;
-       struct dsaf_tbl_tcam_ucast_cfg mac_uc_data;
-       struct dsaf_tbl_tcam_data tcam_data;
-       char mac_addr[ETH_ALEN] = {0};
-
-       if (entry_index >= dsaf_dev->tcam_max_num) {
-               /* find none, del error */
-               dev_err(dsaf_dev->dev, "get_uc_entry failed, %s\n",
-                       dsaf_dev->ae_dev.name);
-               return -EINVAL;
-       }
-
-       /* mc entry, do read opt */
-       hns_dsaf_tcam_mc_get(dsaf_dev, entry_index, &tcam_data, &mac_data);
-
-       mac_key.high.val = le32_to_cpu(tcam_data.tbl_tcam_data_high);
-       mac_key.low.val = le32_to_cpu(tcam_data.tbl_tcam_data_low);
-
-       mac_entry->port_mask[0] = mac_data.tbl_mcast_port_msk[0] & 0x3F;
-
-       /***get mac addr*/
-       mac_addr[0] = mac_key.high.bits.mac_0;
-       mac_addr[1] = mac_key.high.bits.mac_1;
-       mac_addr[2] = mac_key.high.bits.mac_2;
-       mac_addr[3] = mac_key.high.bits.mac_3;
-       mac_addr[4] = mac_key.low.bits.mac_4;
-       mac_addr[5] = mac_key.low.bits.mac_5;
-       /**is mc or uc*/
-       if (MAC_IS_MULTICAST((u8 *)mac_addr) ||
-           MAC_IS_L3_MULTICAST((u8 *)mac_addr)) {
-               /**mc donot do*/
-       } else {
-               /*is not mc, just uc... */
-               hns_dsaf_tcam_uc_get(dsaf_dev, entry_index, &tcam_data,
-                                    &mac_uc_data);
-
-               mac_key.high.val = le32_to_cpu(tcam_data.tbl_tcam_data_high);
-               mac_key.low.val = le32_to_cpu(tcam_data.tbl_tcam_data_low);
-
-               mac_entry->port_mask[0] = (1 << mac_uc_data.tbl_ucast_out_port);
-       }
-
-       return 0;
-}
-
 static struct dsaf_device *hns_dsaf_alloc_dev(struct device *dev,
                                              size_t sizeof_priv)
 {
index cef6bf46ae9309bf84c9f5ff466982d59d8bed93..4507e8222683c112c05eeca4633e65990b789b8f 100644 (file)
@@ -68,7 +68,7 @@ enum dsaf_roce_qos_sl {
 };
 
 #define DSAF_STATS_READ(p, offset) (*((u64 *)((u8 *)(p) + (offset))))
-#define HNS_DSAF_IS_DEBUG(dev) (dev->dsaf_mode == DSAF_MODE_DISABLE_SP)
+#define HNS_DSAF_IS_DEBUG(dev) ((dev)->dsaf_mode == DSAF_MODE_DISABLE_SP)
 
 enum hal_dsaf_mode {
        HRD_DSAF_NO_DSAF_MODE   = 0x0,
@@ -429,23 +429,12 @@ static inline struct hnae_vf_cb *hns_ae_get_vf_cb(
 
 int hns_dsaf_set_mac_uc_entry(struct dsaf_device *dsaf_dev,
                              struct dsaf_drv_mac_single_dest_entry *mac_entry);
-int hns_dsaf_set_mac_mc_entry(struct dsaf_device *dsaf_dev,
-                             struct dsaf_drv_mac_multi_dest_entry *mac_entry);
 int hns_dsaf_add_mac_mc_port(struct dsaf_device *dsaf_dev,
                             struct dsaf_drv_mac_single_dest_entry *mac_entry);
 int hns_dsaf_del_mac_entry(struct dsaf_device *dsaf_dev, u16 vlan_id,
                           u8 in_port_num, u8 *addr);
 int hns_dsaf_del_mac_mc_port(struct dsaf_device *dsaf_dev,
                             struct dsaf_drv_mac_single_dest_entry *mac_entry);
-int hns_dsaf_get_mac_uc_entry(struct dsaf_device *dsaf_dev,
-                             struct dsaf_drv_mac_single_dest_entry *mac_entry);
-int hns_dsaf_get_mac_mc_entry(struct dsaf_device *dsaf_dev,
-                             struct dsaf_drv_mac_multi_dest_entry *mac_entry);
-int hns_dsaf_get_mac_entry_by_index(
-       struct dsaf_device *dsaf_dev,
-       u16 entry_index,
-       struct dsaf_drv_mac_multi_dest_entry *mac_entry);
-
 void hns_dsaf_fix_mac_mode(struct hns_mac_cb *mac_cb);
 
 int hns_dsaf_ae_init(struct dsaf_device *dsaf_dev);
@@ -475,5 +464,4 @@ int hns_dsaf_rm_mac_addr(
 int hns_dsaf_clr_mac_mc_port(struct dsaf_device *dsaf_dev,
                             u8 mac_id, u8 port_num);
 
-
 #endif /* __HNS_DSAF_MAIN_H__ */
index 6ea872287307bd85b13436a42c1d8aacbad05f2d..eba406bea52fba77381f2aa4196014eef6678cd1 100644 (file)
@@ -496,17 +496,17 @@ void hns_ppe_get_stats(struct hns_ppe_cb *ppe_cb, u64 *data)
  */
 int hns_ppe_init(struct dsaf_device *dsaf_dev)
 {
-       int i, k;
        int ret;
+       int i;
 
        for (i = 0; i < HNS_PPE_COM_NUM; i++) {
                ret = hns_ppe_common_get_cfg(dsaf_dev, i);
                if (ret)
-                       goto get_ppe_cfg_fail;
+                       goto get_cfg_fail;
 
                ret = hns_rcb_common_get_cfg(dsaf_dev, i);
                if (ret)
-                       goto get_rcb_cfg_fail;
+                       goto get_cfg_fail;
 
                hns_ppe_get_cfg(dsaf_dev->ppe_common[i]);
 
@@ -518,13 +518,12 @@ int hns_ppe_init(struct dsaf_device *dsaf_dev)
 
        return 0;
 
-get_rcb_cfg_fail:
-       hns_ppe_common_free_cfg(dsaf_dev, i);
-get_ppe_cfg_fail:
-       for (k = i - 1; k >= 0; k--) {
-               hns_rcb_common_free_cfg(dsaf_dev, k);
-               hns_ppe_common_free_cfg(dsaf_dev, k);
+get_cfg_fail:
+       for (i = 0; i < HNS_PPE_COM_NUM; i++) {
+               hns_rcb_common_free_cfg(dsaf_dev, i);
+               hns_ppe_common_free_cfg(dsaf_dev, i);
        }
+
        return ret;
 }
 
index f0ed80d6ef9cd45a8408c987ab4315646098f438..c20a0f4f8f02b351eb44fe538ae7b1e0b34ac10d 100644 (file)
@@ -32,6 +32,9 @@
 #define RCB_RESET_WAIT_TIMES 30
 #define RCB_RESET_TRY_TIMES 10
 
+/* Because default mtu is 1500, rcb buffer size is set to 2048 enough */
+#define RCB_DEFAULT_BUFFER_SIZE 2048
+
 /**
  *hns_rcb_wait_fbd_clean - clean fbd
  *@qs: ring struct pointer array
@@ -192,6 +195,30 @@ void hns_rcb_common_init_commit_hw(struct rcb_common_cb *rcb_common)
        wmb();  /* Sync point after breakpoint */
 }
 
+/* hns_rcb_set_tx_ring_bs - init rcb ring buf size regester
+ *@q: hnae_queue
+ *@buf_size: buffer size set to hw
+ */
+void hns_rcb_set_tx_ring_bs(struct hnae_queue *q, u32 buf_size)
+{
+       u32 bd_size_type = hns_rcb_buf_size2type(buf_size);
+
+       dsaf_write_dev(q, RCB_RING_TX_RING_BD_LEN_REG,
+                      bd_size_type);
+}
+
+/* hns_rcb_set_rx_ring_bs - init rcb ring buf size regester
+ *@q: hnae_queue
+ *@buf_size: buffer size set to hw
+ */
+void hns_rcb_set_rx_ring_bs(struct hnae_queue *q, u32 buf_size)
+{
+       u32 bd_size_type = hns_rcb_buf_size2type(buf_size);
+
+       dsaf_write_dev(q, RCB_RING_RX_RING_BD_LEN_REG,
+                      bd_size_type);
+}
+
 /**
  *hns_rcb_ring_init - init rcb ring
  *@ring_pair: ring pair control block
@@ -200,8 +227,6 @@ void hns_rcb_common_init_commit_hw(struct rcb_common_cb *rcb_common)
 static void hns_rcb_ring_init(struct ring_pair_cb *ring_pair, int ring_type)
 {
        struct hnae_queue *q = &ring_pair->q;
-       struct rcb_common_cb *rcb_common = ring_pair->rcb_common;
-       u32 bd_size_type = rcb_common->dsaf_dev->buf_size_type;
        struct hnae_ring *ring =
                (ring_type == RX_RING) ? &q->rx_ring : &q->tx_ring;
        dma_addr_t dma = ring->desc_dma_addr;
@@ -212,8 +237,8 @@ static void hns_rcb_ring_init(struct ring_pair_cb *ring_pair, int ring_type)
                dsaf_write_dev(q, RCB_RING_RX_RING_BASEADDR_H_REG,
                               (u32)((dma >> 31) >> 1));
 
-               dsaf_write_dev(q, RCB_RING_RX_RING_BD_LEN_REG,
-                              bd_size_type);
+               hns_rcb_set_rx_ring_bs(q, ring->buf_size);
+
                dsaf_write_dev(q, RCB_RING_RX_RING_BD_NUM_REG,
                               ring_pair->port_id_in_comm);
                dsaf_write_dev(q, RCB_RING_RX_RING_PKTLINE_REG,
@@ -224,12 +249,12 @@ static void hns_rcb_ring_init(struct ring_pair_cb *ring_pair, int ring_type)
                dsaf_write_dev(q, RCB_RING_TX_RING_BASEADDR_H_REG,
                               (u32)((dma >> 31) >> 1));
 
-               dsaf_write_dev(q, RCB_RING_TX_RING_BD_LEN_REG,
-                              bd_size_type);
+               hns_rcb_set_tx_ring_bs(q, ring->buf_size);
+
                dsaf_write_dev(q, RCB_RING_TX_RING_BD_NUM_REG,
                               ring_pair->port_id_in_comm);
                dsaf_write_dev(q, RCB_RING_TX_RING_PKTLINE_REG,
-                              ring_pair->port_id_in_comm);
+                       ring_pair->port_id_in_comm + HNS_RCB_TX_PKTLINE_OFFSET);
        }
 }
 
@@ -259,13 +284,27 @@ static void hns_rcb_set_port_desc_cnt(struct rcb_common_cb *rcb_common,
 static void hns_rcb_set_port_timeout(
        struct rcb_common_cb *rcb_common, u32 port_idx, u32 timeout)
 {
-       if (AE_IS_VER1(rcb_common->dsaf_dev->dsaf_ver))
+       if (AE_IS_VER1(rcb_common->dsaf_dev->dsaf_ver)) {
                dsaf_write_dev(rcb_common, RCB_CFG_OVERTIME_REG,
                               timeout * HNS_RCB_CLK_FREQ_MHZ);
-       else
+       } else if (!HNS_DSAF_IS_DEBUG(rcb_common->dsaf_dev)) {
+               if (timeout > HNS_RCB_DEF_GAP_TIME_USECS)
+                       dsaf_write_dev(rcb_common,
+                                      RCB_PORT_INT_GAPTIME_REG + port_idx * 4,
+                                      HNS_RCB_DEF_GAP_TIME_USECS);
+               else
+                       dsaf_write_dev(rcb_common,
+                                      RCB_PORT_INT_GAPTIME_REG + port_idx * 4,
+                                      timeout);
+
+               dsaf_write_dev(rcb_common,
+                              RCB_PORT_CFG_OVERTIME_REG + port_idx * 4,
+                              timeout);
+       } else {
                dsaf_write_dev(rcb_common,
                               RCB_PORT_CFG_OVERTIME_REG + port_idx * 4,
                               timeout);
+       }
 }
 
 static int hns_rcb_common_get_port_num(struct rcb_common_cb *rcb_common)
@@ -327,8 +366,12 @@ int hns_rcb_common_init_hw(struct rcb_common_cb *rcb_common)
 
        for (i = 0; i < port_num; i++) {
                hns_rcb_set_port_desc_cnt(rcb_common, i, rcb_common->desc_num);
-               (void)hns_rcb_set_coalesced_frames(
-                       rcb_common, i, HNS_RCB_DEF_COALESCED_FRAMES);
+               hns_rcb_set_rx_coalesced_frames(
+                       rcb_common, i, HNS_RCB_DEF_RX_COALESCED_FRAMES);
+               if (!AE_IS_VER1(rcb_common->dsaf_dev->dsaf_ver) &&
+                   !HNS_DSAF_IS_DEBUG(rcb_common->dsaf_dev))
+                       hns_rcb_set_tx_coalesced_frames(
+                               rcb_common, i, HNS_RCB_DEF_TX_COALESCED_FRAMES);
                hns_rcb_set_port_timeout(
                        rcb_common, i, HNS_RCB_DEF_COALESCED_USECS);
        }
@@ -380,7 +423,6 @@ static void hns_rcb_ring_get_cfg(struct hnae_queue *q, int ring_type)
        struct hnae_ring *ring;
        struct rcb_common_cb *rcb_common;
        struct ring_pair_cb *ring_pair_cb;
-       u32 buf_size;
        u16 desc_num, mdnum_ppkt;
        bool irq_idx, is_ver1;
 
@@ -401,7 +443,6 @@ static void hns_rcb_ring_get_cfg(struct hnae_queue *q, int ring_type)
        }
 
        rcb_common = ring_pair_cb->rcb_common;
-       buf_size = rcb_common->dsaf_dev->buf_size;
        desc_num = rcb_common->dsaf_dev->desc_num;
 
        ring->desc = NULL;
@@ -410,7 +451,7 @@ static void hns_rcb_ring_get_cfg(struct hnae_queue *q, int ring_type)
        ring->irq = ring_pair_cb->virq[irq_idx];
        ring->desc_dma_addr = 0;
 
-       ring->buf_size = buf_size;
+       ring->buf_size = RCB_DEFAULT_BUFFER_SIZE;
        ring->desc_num = desc_num;
        ring->max_desc_num_per_pkt = mdnum_ppkt;
        ring->max_raw_data_sz_per_desc = HNS_RCB_MAX_PKT_SIZE;
@@ -430,7 +471,6 @@ static void hns_rcb_ring_pair_get_cfg(struct ring_pair_cb *ring_pair_cb)
 static int hns_rcb_get_port_in_comm(
        struct rcb_common_cb *rcb_common, int ring_idx)
 {
-
        return ring_idx / (rcb_common->max_q_per_vf * rcb_common->max_vfn);
 }
 
@@ -484,18 +524,34 @@ void hns_rcb_get_cfg(struct rcb_common_cb *rcb_common)
 }
 
 /**
- *hns_rcb_get_coalesced_frames - get rcb port coalesced frames
+ *hns_rcb_get_rx_coalesced_frames - get rcb port rx coalesced frames
  *@rcb_common: rcb_common device
  *@port_idx:port id in comm
  *
  *Returns: coalesced_frames
  */
-u32 hns_rcb_get_coalesced_frames(
+u32 hns_rcb_get_rx_coalesced_frames(
        struct rcb_common_cb *rcb_common, u32 port_idx)
 {
        return dsaf_read_dev(rcb_common, RCB_CFG_PKTLINE_REG + port_idx * 4);
 }
 
+/**
+ *hns_rcb_get_tx_coalesced_frames - get rcb port tx coalesced frames
+ *@rcb_common: rcb_common device
+ *@port_idx:port id in comm
+ *
+ *Returns: coalesced_frames
+ */
+u32 hns_rcb_get_tx_coalesced_frames(
+       struct rcb_common_cb *rcb_common, u32 port_idx)
+{
+       u64 reg;
+
+       reg = RCB_CFG_PKTLINE_REG + (port_idx + HNS_RCB_TX_PKTLINE_OFFSET) * 4;
+       return dsaf_read_dev(rcb_common, reg);
+}
+
 /**
  *hns_rcb_get_coalesce_usecs - get rcb port coalesced time_out
  *@rcb_common: rcb_common device
@@ -538,33 +594,47 @@ int hns_rcb_set_coalesce_usecs(
                        return -EINVAL;
                }
        }
-       if (timeout > HNS_RCB_MAX_COALESCED_USECS) {
+       if (timeout > HNS_RCB_MAX_COALESCED_USECS || timeout == 0) {
                dev_err(rcb_common->dsaf_dev->dev,
-                       "error: coalesce_usecs setting supports 0~1023us\n");
+                       "error: coalesce_usecs setting supports 1~1023us\n");
                return -EINVAL;
        }
+       hns_rcb_set_port_timeout(rcb_common, port_idx, timeout);
+       return 0;
+}
 
-       if (!AE_IS_VER1(rcb_common->dsaf_dev->dsaf_ver)) {
-               if (timeout == 0)
-                       /* set timeout to 0, Disable gap time */
-                       dsaf_set_reg_field(rcb_common->io_base,
-                                          RCB_INT_GAP_TIME_REG + port_idx * 4,
-                                          PPE_INT_GAPTIME_M, PPE_INT_GAPTIME_B,
-                                          0);
-               else
-                       /* set timeout non 0, restore gap time to 1 */
-                       dsaf_set_reg_field(rcb_common->io_base,
-                                          RCB_INT_GAP_TIME_REG + port_idx * 4,
-                                          PPE_INT_GAPTIME_M, PPE_INT_GAPTIME_B,
-                                          1);
+/**
+ *hns_rcb_set_tx_coalesced_frames - set rcb coalesced frames
+ *@rcb_common: rcb_common device
+ *@port_idx:port id in comm
+ *@coalesced_frames:tx/rx BD num for coalesced frames
+ *
+ * Returns:
+ * Zero for success, or an error code in case of failure
+ */
+int hns_rcb_set_tx_coalesced_frames(
+       struct rcb_common_cb *rcb_common, u32 port_idx, u32 coalesced_frames)
+{
+       u32 old_waterline =
+               hns_rcb_get_tx_coalesced_frames(rcb_common, port_idx);
+       u64 reg;
+
+       if (coalesced_frames == old_waterline)
+               return 0;
+
+       if (coalesced_frames != 1) {
+               dev_err(rcb_common->dsaf_dev->dev,
+                       "error: not support tx coalesce_frames setting!\n");
+               return -EINVAL;
        }
 
-       hns_rcb_set_port_timeout(rcb_common, port_idx, timeout);
+       reg = RCB_CFG_PKTLINE_REG + (port_idx + HNS_RCB_TX_PKTLINE_OFFSET) * 4;
+       dsaf_write_dev(rcb_common, reg, coalesced_frames);
        return 0;
 }
 
 /**
- *hns_rcb_set_coalesced_frames - set rcb coalesced frames
+ *hns_rcb_set_rx_coalesced_frames - set rcb rx coalesced frames
  *@rcb_common: rcb_common device
  *@port_idx:port id in comm
  *@coalesced_frames:tx/rx BD num for coalesced frames
@@ -572,10 +642,11 @@ int hns_rcb_set_coalesce_usecs(
  * Returns:
  * Zero for success, or an error code in case of failure
  */
-int hns_rcb_set_coalesced_frames(
+int hns_rcb_set_rx_coalesced_frames(
        struct rcb_common_cb *rcb_common, u32 port_idx, u32 coalesced_frames)
 {
-       u32 old_waterline = hns_rcb_get_coalesced_frames(rcb_common, port_idx);
+       u32 old_waterline =
+               hns_rcb_get_rx_coalesced_frames(rcb_common, port_idx);
 
        if (coalesced_frames == old_waterline)
                return 0;
index 99b4e1ba0a9411a9889cbf8343605a8e79616bb6..a664ee88ab457ced89f759deb85ad853b3e8ab11 100644 (file)
@@ -35,12 +35,23 @@ struct rcb_common_cb;
 
 #define HNS_RCB_REG_OFFSET                     0x10000
 
+#define HNS_RCB_TX_FRAMES_LOW          1
+#define HNS_RCB_RX_FRAMES_LOW          1
+#define HNS_RCB_TX_FRAMES_HIGH         1023
+#define HNS_RCB_RX_FRAMES_HIGH         1023
+#define HNS_RCB_TX_USECS_LOW           1
+#define HNS_RCB_RX_USECS_LOW           1
+#define HNS_RCB_TX_USECS_HIGH          1023
+#define HNS_RCB_RX_USECS_HIGH          1023
 #define HNS_RCB_MAX_COALESCED_FRAMES           1023
 #define HNS_RCB_MIN_COALESCED_FRAMES           1
-#define HNS_RCB_DEF_COALESCED_FRAMES           50
+#define HNS_RCB_DEF_RX_COALESCED_FRAMES                50
+#define HNS_RCB_DEF_TX_COALESCED_FRAMES                1
 #define HNS_RCB_CLK_FREQ_MHZ                   350
 #define HNS_RCB_MAX_COALESCED_USECS            0x3ff
-#define HNS_RCB_DEF_COALESCED_USECS            50
+#define HNS_RCB_DEF_COALESCED_USECS            30
+#define HNS_RCB_DEF_GAP_TIME_USECS             20
+#define HNS_RCB_TX_PKTLINE_OFFSET              8
 
 #define HNS_RCB_COMMON_ENDIAN                  1
 
@@ -125,13 +136,17 @@ void hns_rcbv2_int_clr_hw(struct hnae_queue *q, u32 flag);
 void hns_rcb_init_hw(struct ring_pair_cb *ring);
 void hns_rcb_reset_ring_hw(struct hnae_queue *q);
 void hns_rcb_wait_fbd_clean(struct hnae_queue **qs, int q_num, u32 flag);
-u32 hns_rcb_get_coalesced_frames(
+u32 hns_rcb_get_rx_coalesced_frames(
+       struct rcb_common_cb *rcb_common, u32 port_idx);
+u32 hns_rcb_get_tx_coalesced_frames(
        struct rcb_common_cb *rcb_common, u32 port_idx);
 u32 hns_rcb_get_coalesce_usecs(
        struct rcb_common_cb *rcb_common, u32 port_idx);
 int hns_rcb_set_coalesce_usecs(
        struct rcb_common_cb *rcb_common, u32 port_idx, u32 timeout);
-int hns_rcb_set_coalesced_frames(
+int hns_rcb_set_rx_coalesced_frames(
+       struct rcb_common_cb *rcb_common, u32 port_idx, u32 coalesced_frames);
+int hns_rcb_set_tx_coalesced_frames(
        struct rcb_common_cb *rcb_common, u32 port_idx, u32 coalesced_frames);
 void hns_rcb_update_stats(struct hnae_queue *queue);
 
@@ -146,4 +161,7 @@ int hns_rcb_get_ring_regs_count(void);
 void hns_rcb_get_ring_regs(struct hnae_queue *queue, void *data);
 
 void hns_rcb_get_strings(int stringset, u8 *data, int index);
+void hns_rcb_set_rx_ring_bs(struct hnae_queue *q, u32 buf_size);
+void hns_rcb_set_tx_ring_bs(struct hnae_queue *q, u32 buf_size);
+
 #endif /* _HNS_DSAF_RCB_H */
index 8fa18fc17cd2e25f2e3458e608abe6f5a96b60d9..46a52d9bb196326e5da7481f43616dab0afd12a2 100644 (file)
 #define RCB_CFG_OVERTIME_REG                   0x9300
 #define RCB_CFG_PKTLINE_INT_NUM_REG            0x9304
 #define RCB_CFG_OVERTIME_INT_NUM_REG           0x9308
-#define RCB_INT_GAP_TIME_REG                   0x9400
+#define RCB_PORT_INT_GAPTIME_REG               0x9400
 #define RCB_PORT_CFG_OVERTIME_REG              0x9430
 
 #define RCB_RING_RX_RING_BASEADDR_L_REG                0x00000
 
 #define GMAC_DUPLEX_TYPE_REG                   0x0008UL
 #define GMAC_FD_FC_TYPE_REG                    0x000CUL
+#define GMAC_TX_WATER_LINE_REG                 0x0010UL
 #define GMAC_FC_TX_TIMER_REG                   0x001CUL
 #define GMAC_FD_FC_ADDR_LOW_REG                        0x0020UL
 #define GMAC_FD_FC_ADDR_HIGH_REG               0x0024UL
 
 #define GMAC_DUPLEX_TYPE_B 0
 
+#define GMAC_TX_WATER_LINE_MASK                ((1UL << 8) - 1)
+#define GMAC_TX_WATER_LINE_SHIFT       0
+
 #define GMAC_FC_TX_TIMER_S 0
 #define GMAC_FC_TX_TIMER_M 0xffff
 
index aae830a93050ad5f99ece2b6901dd30531852d87..37a2fc35148f7f4201f529baa85563a56165114e 100644 (file)
@@ -299,18 +299,6 @@ static void hns_xgmac_set_tx_auto_pause_frames(void *mac_drv, u16 enable)
                dsaf_write_dev(drv, XGMAC_MAC_PAUSE_TIME_REG, enable);
 }
 
-/**
- *hns_xgmac_get_id - get xgmac port id
- *@mac_drv: mac driver
- *@newval:xgmac max frame length
- */
-static void hns_xgmac_get_id(void *mac_drv, u8 *mac_id)
-{
-       struct mac_driver *drv = (struct mac_driver *)mac_drv;
-
-       *mac_id = drv->mac_id;
-}
-
 /**
  *hns_xgmac_config_max_frame_length - set xgmac max frame length
  *@mac_drv: mac driver
@@ -833,7 +821,6 @@ void *hns_xgmac_config(struct hns_mac_cb *mac_cb, struct mac_params *mac_param)
        mac_drv->config_half_duplex = NULL;
        mac_drv->set_rx_ignore_pause_frames =
                hns_xgmac_set_rx_ignore_pause_frames;
-       mac_drv->mac_get_id = hns_xgmac_get_id;
        mac_drv->mac_free = hns_xgmac_free;
        mac_drv->adjust_link = NULL;
        mac_drv->set_tx_auto_pause_frames = hns_xgmac_set_tx_auto_pause_frames;
index fca37e2c7f017d76aa537daede5f7af14cb8e152..c6700b91a2dfd3da02dc8a4b6728fbf92beff4c1 100644 (file)
@@ -512,7 +512,8 @@ static void hns_nic_reuse_page(struct sk_buff *skb, int i,
        int last_offset;
        bool twobufs;
 
-       twobufs = ((PAGE_SIZE < 8192) && hnae_buf_size(ring) == HNS_BUFFER_SIZE_2048);
+       twobufs = ((PAGE_SIZE < 8192) &&
+               hnae_buf_size(ring) == HNS_BUFFER_SIZE_2048);
 
        desc = &ring->desc[ring->next_to_clean];
        size = le16_to_cpu(desc->rx.size);
@@ -859,7 +860,7 @@ out:
        return recv_pkts;
 }
 
-static void hns_nic_rx_fini_pro(struct hns_nic_ring_data *ring_data)
+static bool hns_nic_rx_fini_pro(struct hns_nic_ring_data *ring_data)
 {
        struct hnae_ring *ring = ring_data->ring;
        int num = 0;
@@ -873,22 +874,23 @@ static void hns_nic_rx_fini_pro(struct hns_nic_ring_data *ring_data)
                ring_data->ring->q->handle->dev->ops->toggle_ring_irq(
                        ring_data->ring, 1);
 
-               napi_schedule(&ring_data->napi);
+               return false;
+       } else {
+               return true;
        }
 }
 
-static void hns_nic_rx_fini_pro_v2(struct hns_nic_ring_data *ring_data)
+static bool hns_nic_rx_fini_pro_v2(struct hns_nic_ring_data *ring_data)
 {
        struct hnae_ring *ring = ring_data->ring;
-       int num = 0;
+       int num;
 
        num = readl_relaxed(ring->io_base + RCB_REG_FBDNUM);
 
-       if (num == 0)
-               ring_data->ring->q->handle->dev->ops->toggle_ring_irq(
-                       ring, 0);
+       if (!num)
+               return true;
        else
-               napi_schedule(&ring_data->napi);
+               return false;
 }
 
 static inline void hns_nic_reclaim_one_desc(struct hnae_ring *ring,
@@ -921,12 +923,13 @@ static int is_valid_clean_head(struct hnae_ring *ring, int h)
 
 /* netif_tx_lock will turn down the performance, set only when necessary */
 #ifdef CONFIG_NET_POLL_CONTROLLER
-#define NETIF_TX_LOCK(ndev) netif_tx_lock(ndev)
-#define NETIF_TX_UNLOCK(ndev) netif_tx_unlock(ndev)
+#define NETIF_TX_LOCK(ring) spin_lock(&(ring)->lock)
+#define NETIF_TX_UNLOCK(ring) spin_unlock(&(ring)->lock)
 #else
-#define NETIF_TX_LOCK(ndev)
-#define NETIF_TX_UNLOCK(ndev)
+#define NETIF_TX_LOCK(ring)
+#define NETIF_TX_UNLOCK(ring)
 #endif
+
 /* reclaim all desc in one budget
  * return error or number of desc left
  */
@@ -940,13 +943,13 @@ static int hns_nic_tx_poll_one(struct hns_nic_ring_data *ring_data,
        int head;
        int bytes, pkts;
 
-       NETIF_TX_LOCK(ndev);
+       NETIF_TX_LOCK(ring);
 
        head = readl_relaxed(ring->io_base + RCB_REG_HEAD);
        rmb(); /* make sure head is ready before touch any data */
 
        if (is_ring_empty(ring) || head == ring->next_to_clean) {
-               NETIF_TX_UNLOCK(ndev);
+               NETIF_TX_UNLOCK(ring);
                return 0; /* no data to poll */
        }
 
@@ -954,7 +957,7 @@ static int hns_nic_tx_poll_one(struct hns_nic_ring_data *ring_data,
                netdev_err(ndev, "wrong head (%d, %d-%d)\n", head,
                           ring->next_to_use, ring->next_to_clean);
                ring->stats.io_err_cnt++;
-               NETIF_TX_UNLOCK(ndev);
+               NETIF_TX_UNLOCK(ring);
                return -EIO;
        }
 
@@ -966,7 +969,7 @@ static int hns_nic_tx_poll_one(struct hns_nic_ring_data *ring_data,
                prefetch(&ring->desc_cb[ring->next_to_clean]);
        }
 
-       NETIF_TX_UNLOCK(ndev);
+       NETIF_TX_UNLOCK(ring);
 
        dev_queue = netdev_get_tx_queue(ndev, ring_data->queue_index);
        netdev_tx_completed_queue(dev_queue, pkts, bytes);
@@ -989,7 +992,7 @@ static int hns_nic_tx_poll_one(struct hns_nic_ring_data *ring_data,
        return 0;
 }
 
-static void hns_nic_tx_fini_pro(struct hns_nic_ring_data *ring_data)
+static bool hns_nic_tx_fini_pro(struct hns_nic_ring_data *ring_data)
 {
        struct hnae_ring *ring = ring_data->ring;
        int head;
@@ -1002,20 +1005,21 @@ static void hns_nic_tx_fini_pro(struct hns_nic_ring_data *ring_data)
                ring_data->ring->q->handle->dev->ops->toggle_ring_irq(
                        ring_data->ring, 1);
 
-               napi_schedule(&ring_data->napi);
+               return false;
+       } else {
+               return true;
        }
 }
 
-static void hns_nic_tx_fini_pro_v2(struct hns_nic_ring_data *ring_data)
+static bool hns_nic_tx_fini_pro_v2(struct hns_nic_ring_data *ring_data)
 {
        struct hnae_ring *ring = ring_data->ring;
        int head = readl_relaxed(ring->io_base + RCB_REG_HEAD);
 
        if (head == ring->next_to_clean)
-               ring_data->ring->q->handle->dev->ops->toggle_ring_irq(
-                       ring, 0);
+               return true;
        else
-               napi_schedule(&ring_data->napi);
+               return false;
 }
 
 static void hns_nic_tx_clr_all_bufs(struct hns_nic_ring_data *ring_data)
@@ -1026,7 +1030,7 @@ static void hns_nic_tx_clr_all_bufs(struct hns_nic_ring_data *ring_data)
        int head;
        int bytes, pkts;
 
-       NETIF_TX_LOCK(ndev);
+       NETIF_TX_LOCK(ring);
 
        head = ring->next_to_use; /* ntu :soft setted ring position*/
        bytes = 0;
@@ -1034,7 +1038,7 @@ static void hns_nic_tx_clr_all_bufs(struct hns_nic_ring_data *ring_data)
        while (head != ring->next_to_clean)
                hns_nic_reclaim_one_desc(ring, &bytes, &pkts);
 
-       NETIF_TX_UNLOCK(ndev);
+       NETIF_TX_UNLOCK(ring);
 
        dev_queue = netdev_get_tx_queue(ndev, ring_data->queue_index);
        netdev_tx_reset_queue(dev_queue);
@@ -1042,15 +1046,23 @@ static void hns_nic_tx_clr_all_bufs(struct hns_nic_ring_data *ring_data)
 
 static int hns_nic_common_poll(struct napi_struct *napi, int budget)
 {
+       int clean_complete = 0;
        struct hns_nic_ring_data *ring_data =
                container_of(napi, struct hns_nic_ring_data, napi);
-       int clean_complete = ring_data->poll_one(
-                               ring_data, budget, ring_data->ex_process);
+       struct hnae_ring *ring = ring_data->ring;
 
-       if (clean_complete >= 0 && clean_complete < budget) {
-               napi_complete(napi);
-               ring_data->fini_process(ring_data);
-               return 0;
+try_again:
+       clean_complete += ring_data->poll_one(
+                               ring_data, budget - clean_complete,
+                               ring_data->ex_process);
+
+       if (clean_complete < budget) {
+               if (ring_data->fini_process(ring_data)) {
+                       napi_complete(napi);
+                       ring->q->handle->dev->ops->toggle_ring_irq(ring, 0);
+               } else {
+                       goto try_again;
+               }
        }
 
        return clean_complete;
@@ -1196,54 +1208,31 @@ static void hns_nic_ring_close(struct net_device *netdev, int idx)
        napi_disable(&priv->ring_data[idx].napi);
 }
 
-static void hns_set_irq_affinity(struct hns_nic_priv *priv)
+static int hns_nic_init_affinity_mask(int q_num, int ring_idx,
+                                     struct hnae_ring *ring, cpumask_t *mask)
 {
-       struct hnae_handle *h = priv->ae_handle;
-       struct hns_nic_ring_data *rd;
-       int i;
        int cpu;
-       cpumask_var_t mask;
-
-       if (!alloc_cpumask_var(&mask, GFP_KERNEL))
-               return;
 
-       /*diffrent irq banlance for 16core and 32core*/
-       if (h->q_num == num_possible_cpus()) {
-               for (i = 0; i < h->q_num * 2; i++) {
-                       rd = &priv->ring_data[i];
-                       if (cpu_online(rd->queue_index)) {
-                               cpumask_clear(mask);
-                               cpu = rd->queue_index;
-                               cpumask_set_cpu(cpu, mask);
-                               (void)irq_set_affinity_hint(rd->ring->irq,
-                                                           mask);
-                       }
-               }
+       /* Diffrent irq banlance between 16core and 32core.
+        * The cpu mask set by ring index according to the ring flag
+        * which indicate the ring is tx or rx.
+        */
+       if (q_num == num_possible_cpus()) {
+               if (is_tx_ring(ring))
+                       cpu = ring_idx;
+               else
+                       cpu = ring_idx - q_num;
        } else {
-               for (i = 0; i < h->q_num; i++) {
-                       rd = &priv->ring_data[i];
-                       if (cpu_online(rd->queue_index * 2)) {
-                               cpumask_clear(mask);
-                               cpu = rd->queue_index * 2;
-                               cpumask_set_cpu(cpu, mask);
-                               (void)irq_set_affinity_hint(rd->ring->irq,
-                                                           mask);
-                       }
-               }
-
-               for (i = h->q_num; i < h->q_num * 2; i++) {
-                       rd = &priv->ring_data[i];
-                       if (cpu_online(rd->queue_index * 2 + 1)) {
-                               cpumask_clear(mask);
-                               cpu = rd->queue_index * 2 + 1;
-                               cpumask_set_cpu(cpu, mask);
-                               (void)irq_set_affinity_hint(rd->ring->irq,
-                                                           mask);
-                       }
-               }
+               if (is_tx_ring(ring))
+                       cpu = ring_idx * 2;
+               else
+                       cpu = (ring_idx - q_num) * 2 + 1;
        }
 
-       free_cpumask_var(mask);
+       cpumask_clear(mask);
+       cpumask_set_cpu(cpu, mask);
+
+       return cpu;
 }
 
 static int hns_nic_init_irq(struct hns_nic_priv *priv)
@@ -1252,6 +1241,7 @@ static int hns_nic_init_irq(struct hns_nic_priv *priv)
        struct hns_nic_ring_data *rd;
        int i;
        int ret;
+       int cpu;
 
        for (i = 0; i < h->q_num * 2; i++) {
                rd = &priv->ring_data[i];
@@ -1261,7 +1251,7 @@ static int hns_nic_init_irq(struct hns_nic_priv *priv)
 
                snprintf(rd->ring->ring_name, RCB_RING_NAME_LEN,
                         "%s-%s%d", priv->netdev->name,
-                        (i < h->q_num ? "tx" : "rx"), rd->queue_index);
+                        (is_tx_ring(rd->ring) ? "tx" : "rx"), rd->queue_index);
 
                rd->ring->ring_name[RCB_RING_NAME_LEN - 1] = '\0';
 
@@ -1273,12 +1263,17 @@ static int hns_nic_init_irq(struct hns_nic_priv *priv)
                        return ret;
                }
                disable_irq(rd->ring->irq);
+
+               cpu = hns_nic_init_affinity_mask(h->q_num, i,
+                                                rd->ring, &rd->mask);
+
+               if (cpu_online(cpu))
+                       irq_set_affinity_hint(rd->ring->irq,
+                                             &rd->mask);
+
                rd->ring->irq_init_flag = RCB_IRQ_INITED;
        }
 
-       /*set cpu affinity*/
-       hns_set_irq_affinity(priv);
-
        return 0;
 }
 
@@ -1487,32 +1482,259 @@ static netdev_tx_t hns_nic_net_xmit(struct sk_buff *skb,
        return (netdev_tx_t)ret;
 }
 
+static void hns_nic_drop_rx_fetch(struct hns_nic_ring_data *ring_data,
+                                 struct sk_buff *skb)
+{
+       dev_kfree_skb_any(skb);
+}
+
+#define HNS_LB_TX_RING 0
+static struct sk_buff *hns_assemble_skb(struct net_device *ndev)
+{
+       struct sk_buff *skb;
+       struct ethhdr *ethhdr;
+       int frame_len;
+
+       /* allocate test skb */
+       skb = alloc_skb(64, GFP_KERNEL);
+       if (!skb)
+               return NULL;
+
+       skb_put(skb, 64);
+       skb->dev = ndev;
+       memset(skb->data, 0xFF, skb->len);
+
+       /* must be tcp/ip package */
+       ethhdr = (struct ethhdr *)skb->data;
+       ethhdr->h_proto = htons(ETH_P_IP);
+
+       frame_len = skb->len & (~1ul);
+       memset(&skb->data[frame_len / 2], 0xAA,
+              frame_len / 2 - 1);
+
+       skb->queue_mapping = HNS_LB_TX_RING;
+
+       return skb;
+}
+
+static int hns_enable_serdes_lb(struct net_device *ndev)
+{
+       struct hns_nic_priv *priv = netdev_priv(ndev);
+       struct hnae_handle *h = priv->ae_handle;
+       struct hnae_ae_ops *ops = h->dev->ops;
+       int speed, duplex;
+       int ret;
+
+       ret = ops->set_loopback(h, MAC_INTERNALLOOP_SERDES, 1);
+       if (ret)
+               return ret;
+
+       ret = ops->start ? ops->start(h) : 0;
+       if (ret)
+               return ret;
+
+       /* link adjust duplex*/
+       if (h->phy_if != PHY_INTERFACE_MODE_XGMII)
+               speed = 1000;
+       else
+               speed = 10000;
+       duplex = 1;
+
+       ops->adjust_link(h, speed, duplex);
+
+       /* wait h/w ready */
+       mdelay(300);
+
+       return 0;
+}
+
+static void hns_disable_serdes_lb(struct net_device *ndev)
+{
+       struct hns_nic_priv *priv = netdev_priv(ndev);
+       struct hnae_handle *h = priv->ae_handle;
+       struct hnae_ae_ops *ops = h->dev->ops;
+
+       ops->stop(h);
+       ops->set_loopback(h, MAC_INTERNALLOOP_SERDES, 0);
+}
+
+/**
+ *hns_nic_clear_all_rx_fetch - clear the chip fetched descriptions. The
+ *function as follows:
+ *    1. if one rx ring has found the page_offset is not equal 0 between head
+ *       and tail, it means that the chip fetched the wrong descs for the ring
+ *       which buffer size is 4096.
+ *    2. we set the chip serdes loopback and set rss indirection to the ring.
+ *    3. construct 64-bytes ip broadcast packages, wait the associated rx ring
+ *       recieving all packages and it will fetch new descriptions.
+ *    4. recover to the original state.
+ *
+ *@ndev: net device
+ */
+static int hns_nic_clear_all_rx_fetch(struct net_device *ndev)
+{
+       struct hns_nic_priv *priv = netdev_priv(ndev);
+       struct hnae_handle *h = priv->ae_handle;
+       struct hnae_ae_ops *ops = h->dev->ops;
+       struct hns_nic_ring_data *rd;
+       struct hnae_ring *ring;
+       struct sk_buff *skb;
+       u32 *org_indir;
+       u32 *cur_indir;
+       int indir_size;
+       int head, tail;
+       int fetch_num;
+       int i, j;
+       bool found;
+       int retry_times;
+       int ret = 0;
+
+       /* alloc indir memory */
+       indir_size = ops->get_rss_indir_size(h) * sizeof(*org_indir);
+       org_indir = kzalloc(indir_size, GFP_KERNEL);
+       if (!org_indir)
+               return -ENOMEM;
+
+       /* store the orginal indirection */
+       ops->get_rss(h, org_indir, NULL, NULL);
+
+       cur_indir = kzalloc(indir_size, GFP_KERNEL);
+       if (!cur_indir) {
+               ret = -ENOMEM;
+               goto cur_indir_alloc_err;
+       }
+
+       /* set loopback */
+       if (hns_enable_serdes_lb(ndev)) {
+               ret = -EINVAL;
+               goto enable_serdes_lb_err;
+       }
+
+       /* foreach every rx ring to clear fetch desc */
+       for (i = 0; i < h->q_num; i++) {
+               ring = &h->qs[i]->rx_ring;
+               head = readl_relaxed(ring->io_base + RCB_REG_HEAD);
+               tail = readl_relaxed(ring->io_base + RCB_REG_TAIL);
+               found = false;
+               fetch_num = ring_dist(ring, head, tail);
+
+               while (head != tail) {
+                       if (ring->desc_cb[head].page_offset != 0) {
+                               found = true;
+                               break;
+                       }
+
+                       head++;
+                       if (head == ring->desc_num)
+                               head = 0;
+               }
+
+               if (found) {
+                       for (j = 0; j < indir_size / sizeof(*org_indir); j++)
+                               cur_indir[j] = i;
+                       ops->set_rss(h, cur_indir, NULL, 0);
+
+                       for (j = 0; j < fetch_num; j++) {
+                               /* alloc one skb and init */
+                               skb = hns_assemble_skb(ndev);
+                               if (!skb)
+                                       goto out;
+                               rd = &tx_ring_data(priv, skb->queue_mapping);
+                               hns_nic_net_xmit_hw(ndev, skb, rd);
+
+                               retry_times = 0;
+                               while (retry_times++ < 10) {
+                                       mdelay(10);
+                                       /* clean rx */
+                                       rd = &rx_ring_data(priv, i);
+                                       if (rd->poll_one(rd, fetch_num,
+                                                        hns_nic_drop_rx_fetch))
+                                               break;
+                               }
+
+                               retry_times = 0;
+                               while (retry_times++ < 10) {
+                                       mdelay(10);
+                                       /* clean tx ring 0 send package */
+                                       rd = &tx_ring_data(priv,
+                                                          HNS_LB_TX_RING);
+                                       if (rd->poll_one(rd, fetch_num, NULL))
+                                               break;
+                               }
+                       }
+               }
+       }
+
+out:
+       /* restore everything */
+       ops->set_rss(h, org_indir, NULL, 0);
+       hns_disable_serdes_lb(ndev);
+enable_serdes_lb_err:
+       kfree(cur_indir);
+cur_indir_alloc_err:
+       kfree(org_indir);
+
+       return ret;
+}
+
 static int hns_nic_change_mtu(struct net_device *ndev, int new_mtu)
 {
        struct hns_nic_priv *priv = netdev_priv(ndev);
        struct hnae_handle *h = priv->ae_handle;
+       bool if_running = netif_running(ndev);
        int ret;
 
+       /* MTU < 68 is an error and causes problems on some kernels */
+       if (new_mtu < 68)
+               return -EINVAL;
+
+       /* MTU no change */
+       if (new_mtu == ndev->mtu)
+               return 0;
+
        if (!h->dev->ops->set_mtu)
                return -ENOTSUPP;
 
-       if (netif_running(ndev)) {
+       if (if_running) {
                (void)hns_nic_net_stop(ndev);
                msleep(100);
+       }
 
-               ret = h->dev->ops->set_mtu(h, new_mtu);
-               if (ret)
-                       netdev_err(ndev, "set mtu fail, return value %d\n",
-                                  ret);
+       if (priv->enet_ver != AE_VERSION_1 &&
+           ndev->mtu <= BD_SIZE_2048_MAX_MTU &&
+           new_mtu > BD_SIZE_2048_MAX_MTU) {
+               /* update desc */
+               hnae_reinit_all_ring_desc(h);
 
-               if (hns_nic_net_open(ndev))
-                       netdev_err(ndev, "hns net open fail\n");
-       } else {
-               ret = h->dev->ops->set_mtu(h, new_mtu);
+               /* clear the package which the chip has fetched */
+               ret = hns_nic_clear_all_rx_fetch(ndev);
+
+               /* the page offset must be consist with desc */
+               hnae_reinit_all_ring_page_off(h);
+
+               if (ret) {
+                       netdev_err(ndev, "clear the fetched desc fail\n");
+                       goto out;
+               }
+       }
+
+       ret = h->dev->ops->set_mtu(h, new_mtu);
+       if (ret) {
+               netdev_err(ndev, "set mtu fail, return value %d\n",
+                          ret);
+               goto out;
        }
 
-       if (!ret)
-               ndev->mtu = new_mtu;
+       /* finally, set new mtu to netdevice */
+       ndev->mtu = new_mtu;
+
+out:
+       if (if_running) {
+               if (hns_nic_net_open(ndev)) {
+                       netdev_err(ndev, "hns net open fail\n");
+                       ret = -EINVAL;
+               }
+       }
 
        return ret;
 }
@@ -1791,7 +2013,7 @@ static void hns_nic_reset_subtask(struct hns_nic_priv *priv)
 static void hns_nic_service_event_complete(struct hns_nic_priv *priv)
 {
        WARN_ON(!test_bit(NIC_STATE_SERVICE_SCHED, &priv->state));
-
+       /* make sure to commit the things */
        smp_mb__before_atomic();
        clear_bit(NIC_STATE_SERVICE_SCHED, &priv->state);
 }
index 5b412de350aa28e9099ee251e0824ed66f37b2b1..1b83232082b2a22244b77a05f4125a69cc4a5f32 100644 (file)
@@ -37,10 +37,11 @@ enum hns_nic_state {
 struct hns_nic_ring_data {
        struct hnae_ring *ring;
        struct napi_struct napi;
+       cpumask_t mask; /* affinity mask */
        int queue_index;
        int (*poll_one)(struct hns_nic_ring_data *, int, void *);
        void (*ex_process)(struct hns_nic_ring_data *, struct sk_buff *);
-       void (*fini_process)(struct hns_nic_ring_data *);
+       bool (*fini_process)(struct hns_nic_ring_data *);
 };
 
 /* compatible the difference between two versions */
index 3ac2183dbd2119e0746d35510fcfc1e390ab6d9b..b8fab149690f880394f0d973560aecca69d1171e 100644 (file)
@@ -146,7 +146,7 @@ static int hns_nic_get_link_ksettings(struct net_device *net_dev,
 
        /* When there is no phy, autoneg is off. */
        cmd->base.autoneg = false;
-       cmd->base.cmd = speed;
+       cmd->base.speed = speed;
        cmd->base.duplex = duplex;
 
        if (net_dev->phydev)
@@ -764,14 +764,14 @@ static int hns_get_coalesce(struct net_device *net_dev,
        ec->use_adaptive_tx_coalesce = 1;
 
        if ((!ops->get_coalesce_usecs) ||
-           (!ops->get_rx_max_coalesced_frames))
+           (!ops->get_max_coalesced_frames))
                return -ESRCH;
 
        ops->get_coalesce_usecs(priv->ae_handle,
                                        &ec->tx_coalesce_usecs,
                                        &ec->rx_coalesce_usecs);
 
-       ops->get_rx_max_coalesced_frames(
+       ops->get_max_coalesced_frames(
                priv->ae_handle,
                &ec->tx_max_coalesced_frames,
                &ec->rx_max_coalesced_frames);
@@ -801,30 +801,28 @@ static int hns_set_coalesce(struct net_device *net_dev,
 {
        struct hns_nic_priv *priv = netdev_priv(net_dev);
        struct hnae_ae_ops *ops;
-       int ret;
+       int rc1, rc2;
 
        ops = priv->ae_handle->dev->ops;
 
        if (ec->tx_coalesce_usecs != ec->rx_coalesce_usecs)
                return -EINVAL;
 
-       if (ec->rx_max_coalesced_frames != ec->tx_max_coalesced_frames)
-               return -EINVAL;
-
        if ((!ops->set_coalesce_usecs) ||
            (!ops->set_coalesce_frames))
                return -ESRCH;
 
-       ret = ops->set_coalesce_usecs(priv->ae_handle,
+       rc1 = ops->set_coalesce_usecs(priv->ae_handle,
                                      ec->rx_coalesce_usecs);
-       if (ret)
-               return ret;
 
-       ret = ops->set_coalesce_frames(
-               priv->ae_handle,
-               ec->rx_max_coalesced_frames);
+       rc2 = ops->set_coalesce_frames(priv->ae_handle,
+                                      ec->tx_max_coalesced_frames,
+                                      ec->rx_max_coalesced_frames);
 
-       return ret;
+       if (rc1 || rc2)
+               return -EINVAL;
+
+       return 0;
 }
 
 /**
@@ -1253,12 +1251,10 @@ hns_set_rss(struct net_device *netdev, const u32 *indir, const u8 *key,
 
        ops = priv->ae_handle->dev->ops;
 
-       /* currently hfunc can only be Toeplitz hash */
-       if (key ||
-           (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP))
+       if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP) {
+               netdev_err(netdev, "Invalid hfunc!\n");
                return -EOPNOTSUPP;
-       if (!indir)
-               return 0;
+       }
 
        return ops->set_rss(priv->ae_handle, indir, key, hfunc);
 }
index 501eb2090ca62bcd118abc136e4c433bdaa38eb6..e5221d95afe195583b7496130cf1a544d1b1f97f 100644 (file)
 #include <linux/phy.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
-#include <linux/spinlock_types.h>
 
 #define MDIO_DRV_NAME "Hi-HNS_MDIO"
 #define MDIO_BUS_NAME "Hisilicon MII Bus"
-#define MDIO_DRV_VERSION "1.3.0"
-#define MDIO_COPYRIGHT "Copyright(c) 2015 Huawei Corporation."
-#define MDIO_DRV_STRING MDIO_BUS_NAME
-#define MDIO_DEFAULT_DEVICE_DESCR MDIO_BUS_NAME
-
-#define MDIO_CTL_DEV_ADDR(x)   (x & 0x1f)
-#define MDIO_CTL_PORT_ADDR(x)  ((x & 0x1f) << 5)
 
 #define MDIO_TIMEOUT                   1000000
 
@@ -64,9 +56,7 @@ struct hns_mdio_device {
 #define MDIO_CMD_DEVAD_S       0
 #define MDIO_CMD_PRTAD_M       0x1f
 #define MDIO_CMD_PRTAD_S       5
-#define MDIO_CMD_OP_M          0x3
 #define MDIO_CMD_OP_S          10
-#define MDIO_CMD_ST_M          0x3
 #define MDIO_CMD_ST_S          12
 #define MDIO_CMD_START_B       14
 
@@ -185,18 +175,20 @@ static int mdio_sc_cfg_reg_write(struct hns_mdio_device *mdio_dev,
 static int hns_mdio_wait_ready(struct mii_bus *bus)
 {
        struct hns_mdio_device *mdio_dev = bus->priv;
+       u32 cmd_reg_value;
        int i;
-       u32 cmd_reg_value = 1;
 
        /* waitting for MDIO_COMMAND_REG 's mdio_start==0 */
        /* after that can do read or write*/
-       for (i = 0; cmd_reg_value; i++) {
+       for (i = 0; i < MDIO_TIMEOUT; i++) {
                cmd_reg_value = MDIO_GET_REG_BIT(mdio_dev,
                                                 MDIO_COMMAND_REG,
                                                 MDIO_CMD_START_B);
-               if (i == MDIO_TIMEOUT)
-                       return -ETIMEDOUT;
+               if (!cmd_reg_value)
+                       break;
        }
+       if ((i == MDIO_TIMEOUT) && cmd_reg_value)
+               return -ETIMEDOUT;
 
        return 0;
 }
index 7acda04d034e909269bf7c75befe125313150870..ed8780cca982cd6935eadb8a39a2e80cdc00448e 100644 (file)
@@ -146,7 +146,6 @@ struct ibmveth_adapter {
     struct vio_dev *vdev;
     struct net_device *netdev;
     struct napi_struct napi;
-    struct net_device_stats stats;
     unsigned int mcastFilterSize;
     void * buffer_list_addr;
     void * filter_list_addr;
index 5f11b4dc95d2d1b271c9a47b3e890ef4662af2e4..7ba43cfadf3a86e4694f78632d72c40df3b466f6 100644 (file)
@@ -65,7 +65,6 @@
 #include <linux/irq.h>
 #include <linux/kthread.h>
 #include <linux/seq_file.h>
-#include <linux/debugfs.h>
 #include <linux/interrupt.h>
 #include <net/net_namespace.h>
 #include <asm/hvcall.h>
@@ -89,7 +88,6 @@ MODULE_VERSION(IBMVNIC_DRIVER_VERSION);
 static int ibmvnic_version = IBMVNIC_INITIAL_VERSION;
 static int ibmvnic_remove(struct vio_dev *);
 static void release_sub_crqs(struct ibmvnic_adapter *);
-static void release_sub_crqs_no_irqs(struct ibmvnic_adapter *);
 static int ibmvnic_reset_crq(struct ibmvnic_adapter *);
 static int ibmvnic_send_crq_init(struct ibmvnic_adapter *);
 static int ibmvnic_reenable_crq_queue(struct ibmvnic_adapter *);
@@ -110,6 +108,11 @@ static int ibmvnic_poll(struct napi_struct *napi, int data);
 static void send_map_query(struct ibmvnic_adapter *adapter);
 static void send_request_map(struct ibmvnic_adapter *, dma_addr_t, __be32, u8);
 static void send_request_unmap(struct ibmvnic_adapter *, u8);
+static void send_login(struct ibmvnic_adapter *adapter);
+static void send_cap_queries(struct ibmvnic_adapter *adapter);
+static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter);
+static int ibmvnic_init(struct ibmvnic_adapter *);
+static void release_crq_queue(struct ibmvnic_adapter *);
 
 struct ibmvnic_stat {
        char name[ETH_GSTRING_LEN];
@@ -159,21 +162,6 @@ static long h_reg_sub_crq(unsigned long unit_address, unsigned long token,
        return rc;
 }
 
-/* net_device_ops functions */
-
-static void init_rx_pool(struct ibmvnic_adapter *adapter,
-                        struct ibmvnic_rx_pool *rx_pool, int num, int index,
-                        int buff_size, int active)
-{
-       netdev_dbg(adapter->netdev,
-                  "Initializing rx_pool %d, %d buffs, %d bytes each\n",
-                  index, num, buff_size);
-       rx_pool->size = num;
-       rx_pool->index = index;
-       rx_pool->buff_size = buff_size;
-       rx_pool->active = active;
-}
-
 static int alloc_long_term_buff(struct ibmvnic_adapter *adapter,
                                struct ibmvnic_long_term_buff *ltb, int size)
 {
@@ -202,47 +190,14 @@ static void free_long_term_buff(struct ibmvnic_adapter *adapter,
 {
        struct device *dev = &adapter->vdev->dev;
 
+       if (!ltb->buff)
+               return;
+
        dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr);
        if (!adapter->failover)
                send_request_unmap(adapter, ltb->map_id);
 }
 
-static int alloc_rx_pool(struct ibmvnic_adapter *adapter,
-                        struct ibmvnic_rx_pool *pool)
-{
-       struct device *dev = &adapter->vdev->dev;
-       int i;
-
-       pool->free_map = kcalloc(pool->size, sizeof(int), GFP_KERNEL);
-       if (!pool->free_map)
-               return -ENOMEM;
-
-       pool->rx_buff = kcalloc(pool->size, sizeof(struct ibmvnic_rx_buff),
-                               GFP_KERNEL);
-
-       if (!pool->rx_buff) {
-               dev_err(dev, "Couldn't alloc rx buffers\n");
-               kfree(pool->free_map);
-               return -ENOMEM;
-       }
-
-       if (alloc_long_term_buff(adapter, &pool->long_term_buff,
-                                pool->size * pool->buff_size)) {
-               kfree(pool->free_map);
-               kfree(pool->rx_buff);
-               return -ENOMEM;
-       }
-
-       for (i = 0; i < pool->size; ++i)
-               pool->free_map[i] = i;
-
-       atomic_set(&pool->available, 0);
-       pool->next_alloc = 0;
-       pool->next_free = 0;
-
-       return 0;
-}
-
 static void replenish_rx_pool(struct ibmvnic_adapter *adapter,
                              struct ibmvnic_rx_pool *pool)
 {
@@ -347,93 +302,195 @@ static void replenish_pools(struct ibmvnic_adapter *adapter)
        }
 }
 
-static void free_rx_pool(struct ibmvnic_adapter *adapter,
-                        struct ibmvnic_rx_pool *pool)
+static void release_stats_token(struct ibmvnic_adapter *adapter)
 {
-       int i;
+       struct device *dev = &adapter->vdev->dev;
+
+       if (!adapter->stats_token)
+               return;
+
+       dma_unmap_single(dev, adapter->stats_token,
+                        sizeof(struct ibmvnic_statistics),
+                        DMA_FROM_DEVICE);
+       adapter->stats_token = 0;
+}
 
-       kfree(pool->free_map);
-       pool->free_map = NULL;
+static int init_stats_token(struct ibmvnic_adapter *adapter)
+{
+       struct device *dev = &adapter->vdev->dev;
+       dma_addr_t stok;
+
+       stok = dma_map_single(dev, &adapter->stats,
+                             sizeof(struct ibmvnic_statistics),
+                             DMA_FROM_DEVICE);
+       if (dma_mapping_error(dev, stok)) {
+               dev_err(dev, "Couldn't map stats buffer\n");
+               return -1;
+       }
+
+       adapter->stats_token = stok;
+       return 0;
+}
+
+static void release_rx_pools(struct ibmvnic_adapter *adapter)
+{
+       struct ibmvnic_rx_pool *rx_pool;
+       int rx_scrqs;
+       int i, j;
 
-       if (!pool->rx_buff)
+       if (!adapter->rx_pool)
                return;
 
-       for (i = 0; i < pool->size; i++) {
-               if (pool->rx_buff[i].skb) {
-                       dev_kfree_skb_any(pool->rx_buff[i].skb);
-                       pool->rx_buff[i].skb = NULL;
+       rx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs);
+       for (i = 0; i < rx_scrqs; i++) {
+               rx_pool = &adapter->rx_pool[i];
+
+               kfree(rx_pool->free_map);
+               free_long_term_buff(adapter, &rx_pool->long_term_buff);
+
+               if (!rx_pool->rx_buff)
+               continue;
+
+               for (j = 0; j < rx_pool->size; j++) {
+                       if (rx_pool->rx_buff[j].skb) {
+                               dev_kfree_skb_any(rx_pool->rx_buff[i].skb);
+                               rx_pool->rx_buff[i].skb = NULL;
+                       }
                }
+
+               kfree(rx_pool->rx_buff);
        }
-       kfree(pool->rx_buff);
-       pool->rx_buff = NULL;
+
+       kfree(adapter->rx_pool);
+       adapter->rx_pool = NULL;
 }
 
-static int ibmvnic_open(struct net_device *netdev)
+static int init_rx_pools(struct net_device *netdev)
 {
        struct ibmvnic_adapter *adapter = netdev_priv(netdev);
        struct device *dev = &adapter->vdev->dev;
-       struct ibmvnic_tx_pool *tx_pool;
-       union ibmvnic_crq crq;
+       struct ibmvnic_rx_pool *rx_pool;
        int rxadd_subcrqs;
        u64 *size_array;
-       int tx_subcrqs;
        int i, j;
 
        rxadd_subcrqs =
-           be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs);
-       tx_subcrqs =
-           be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs);
+               be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs);
        size_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
-                                 be32_to_cpu(adapter->login_rsp_buf->
-                                             off_rxadd_buff_size));
-       adapter->map_id = 1;
-       adapter->napi = kcalloc(adapter->req_rx_queues,
-                               sizeof(struct napi_struct), GFP_KERNEL);
-       if (!adapter->napi)
-               goto alloc_napi_failed;
-       for (i = 0; i < adapter->req_rx_queues; i++) {
-               netif_napi_add(netdev, &adapter->napi[i], ibmvnic_poll,
-                              NAPI_POLL_WEIGHT);
-               napi_enable(&adapter->napi[i]);
+               be32_to_cpu(adapter->login_rsp_buf->off_rxadd_buff_size));
+
+       adapter->rx_pool = kcalloc(rxadd_subcrqs,
+                                  sizeof(struct ibmvnic_rx_pool),
+                                  GFP_KERNEL);
+       if (!adapter->rx_pool) {
+               dev_err(dev, "Failed to allocate rx pools\n");
+               return -1;
        }
-       adapter->rx_pool =
-           kcalloc(rxadd_subcrqs, sizeof(struct ibmvnic_rx_pool), GFP_KERNEL);
 
-       if (!adapter->rx_pool)
-               goto rx_pool_arr_alloc_failed;
-       send_map_query(adapter);
        for (i = 0; i < rxadd_subcrqs; i++) {
-               init_rx_pool(adapter, &adapter->rx_pool[i],
-                            adapter->req_rx_add_entries_per_subcrq, i,
-                            be64_to_cpu(size_array[i]), 1);
-               if (alloc_rx_pool(adapter, &adapter->rx_pool[i])) {
-                       dev_err(dev, "Couldn't alloc rx pool\n");
-                       goto rx_pool_alloc_failed;
+               rx_pool = &adapter->rx_pool[i];
+
+               netdev_dbg(adapter->netdev,
+                          "Initializing rx_pool %d, %lld buffs, %lld bytes each\n",
+                          i, adapter->req_rx_add_entries_per_subcrq,
+                          be64_to_cpu(size_array[i]));
+
+               rx_pool->size = adapter->req_rx_add_entries_per_subcrq;
+               rx_pool->index = i;
+               rx_pool->buff_size = be64_to_cpu(size_array[i]);
+               rx_pool->active = 1;
+
+               rx_pool->free_map = kcalloc(rx_pool->size, sizeof(int),
+                                           GFP_KERNEL);
+               if (!rx_pool->free_map) {
+                       release_rx_pools(adapter);
+                       return -1;
+               }
+
+               rx_pool->rx_buff = kcalloc(rx_pool->size,
+                                          sizeof(struct ibmvnic_rx_buff),
+                                          GFP_KERNEL);
+               if (!rx_pool->rx_buff) {
+                       dev_err(dev, "Couldn't alloc rx buffers\n");
+                       release_rx_pools(adapter);
+                       return -1;
                }
+
+               if (alloc_long_term_buff(adapter, &rx_pool->long_term_buff,
+                                        rx_pool->size * rx_pool->buff_size)) {
+                       release_rx_pools(adapter);
+                       return -1;
+               }
+
+               for (j = 0; j < rx_pool->size; ++j)
+                       rx_pool->free_map[j] = j;
+
+               atomic_set(&rx_pool->available, 0);
+               rx_pool->next_alloc = 0;
+               rx_pool->next_free = 0;
+       }
+
+       return 0;
+}
+
+static void release_tx_pools(struct ibmvnic_adapter *adapter)
+{
+       struct ibmvnic_tx_pool *tx_pool;
+       int i, tx_scrqs;
+
+       if (!adapter->tx_pool)
+               return;
+
+       tx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs);
+       for (i = 0; i < tx_scrqs; i++) {
+               tx_pool = &adapter->tx_pool[i];
+               kfree(tx_pool->tx_buff);
+               free_long_term_buff(adapter, &tx_pool->long_term_buff);
+               kfree(tx_pool->free_map);
        }
-       adapter->tx_pool =
-           kcalloc(tx_subcrqs, sizeof(struct ibmvnic_tx_pool), GFP_KERNEL);
 
+       kfree(adapter->tx_pool);
+       adapter->tx_pool = NULL;
+}
+
+static int init_tx_pools(struct net_device *netdev)
+{
+       struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+       struct device *dev = &adapter->vdev->dev;
+       struct ibmvnic_tx_pool *tx_pool;
+       int tx_subcrqs;
+       int i, j;
+
+       tx_subcrqs = be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs);
+       adapter->tx_pool = kcalloc(tx_subcrqs,
+                                  sizeof(struct ibmvnic_tx_pool), GFP_KERNEL);
        if (!adapter->tx_pool)
-               goto tx_pool_arr_alloc_failed;
+               return -1;
+
        for (i = 0; i < tx_subcrqs; i++) {
                tx_pool = &adapter->tx_pool[i];
-               tx_pool->tx_buff =
-                   kcalloc(adapter->req_tx_entries_per_subcrq,
-                           sizeof(struct ibmvnic_tx_buff), GFP_KERNEL);
-               if (!tx_pool->tx_buff)
-                       goto tx_pool_alloc_failed;
+               tx_pool->tx_buff = kcalloc(adapter->req_tx_entries_per_subcrq,
+                                          sizeof(struct ibmvnic_tx_buff),
+                                          GFP_KERNEL);
+               if (!tx_pool->tx_buff) {
+                       dev_err(dev, "tx pool buffer allocation failed\n");
+                       release_tx_pools(adapter);
+                       return -1;
+               }
 
                if (alloc_long_term_buff(adapter, &tx_pool->long_term_buff,
                                         adapter->req_tx_entries_per_subcrq *
-                                        adapter->req_mtu))
-                       goto tx_ltb_alloc_failed;
+                                        adapter->req_mtu)) {
+                       release_tx_pools(adapter);
+                       return -1;
+               }
 
-               tx_pool->free_map =
-                   kcalloc(adapter->req_tx_entries_per_subcrq,
-                           sizeof(int), GFP_KERNEL);
-               if (!tx_pool->free_map)
-                       goto tx_fm_alloc_failed;
+               tx_pool->free_map = kcalloc(adapter->req_tx_entries_per_subcrq,
+                                           sizeof(int), GFP_KERNEL);
+               if (!tx_pool->free_map) {
+                       release_tx_pools(adapter);
+                       return -1;
+               }
 
                for (j = 0; j < adapter->req_tx_entries_per_subcrq; j++)
                        tx_pool->free_map[j] = j;
@@ -441,20 +498,153 @@ static int ibmvnic_open(struct net_device *netdev)
                tx_pool->consumer_index = 0;
                tx_pool->producer_index = 0;
        }
-       adapter->bounce_buffer_size =
-           (netdev->mtu + ETH_HLEN - 1) / PAGE_SIZE + 1;
-       adapter->bounce_buffer = kmalloc(adapter->bounce_buffer_size,
-                                        GFP_KERNEL);
+
+       return 0;
+}
+
+static void release_bounce_buffer(struct ibmvnic_adapter *adapter)
+{
+       struct device *dev = &adapter->vdev->dev;
+
        if (!adapter->bounce_buffer)
-               goto bounce_alloc_failed;
+               return;
 
-       adapter->bounce_buffer_dma = dma_map_single(dev, adapter->bounce_buffer,
-                                                   adapter->bounce_buffer_size,
-                                                   DMA_TO_DEVICE);
-       if (dma_mapping_error(dev, adapter->bounce_buffer_dma)) {
-               dev_err(dev, "Couldn't map tx bounce buffer\n");
-               goto bounce_map_failed;
+       if (!dma_mapping_error(dev, adapter->bounce_buffer_dma)) {
+               dma_unmap_single(dev, adapter->bounce_buffer_dma,
+                                adapter->bounce_buffer_size,
+                                DMA_BIDIRECTIONAL);
+               adapter->bounce_buffer_dma = DMA_ERROR_CODE;
+       }
+
+       kfree(adapter->bounce_buffer);
+       adapter->bounce_buffer = NULL;
+}
+
+static int init_bounce_buffer(struct net_device *netdev)
+{
+       struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+       struct device *dev = &adapter->vdev->dev;
+       char *buf;
+       int buf_sz;
+       dma_addr_t map_addr;
+
+       buf_sz = (netdev->mtu + ETH_HLEN - 1) / PAGE_SIZE + 1;
+       buf = kmalloc(adapter->bounce_buffer_size, GFP_KERNEL);
+       if (!buf)
+               return -1;
+
+       map_addr = dma_map_single(dev, buf, buf_sz, DMA_TO_DEVICE);
+       if (dma_mapping_error(dev, map_addr)) {
+               dev_err(dev, "Couldn't map bounce buffer\n");
+               kfree(buf);
+               return -1;
+       }
+
+       adapter->bounce_buffer = buf;
+       adapter->bounce_buffer_size = buf_sz;
+       adapter->bounce_buffer_dma = map_addr;
+       return 0;
+}
+
+static int ibmvnic_login(struct net_device *netdev)
+{
+       struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+       unsigned long timeout = msecs_to_jiffies(30000);
+       struct device *dev = &adapter->vdev->dev;
+
+       do {
+               if (adapter->renegotiate) {
+                       adapter->renegotiate = false;
+                       release_sub_crqs(adapter);
+
+                       reinit_completion(&adapter->init_done);
+                       send_cap_queries(adapter);
+                       if (!wait_for_completion_timeout(&adapter->init_done,
+                                                        timeout)) {
+                               dev_err(dev, "Capabilities query timeout\n");
+                               return -1;
+                       }
+               }
+
+               reinit_completion(&adapter->init_done);
+               send_login(adapter);
+               if (!wait_for_completion_timeout(&adapter->init_done,
+                                                timeout)) {
+                       dev_err(dev, "Login timeout\n");
+                       return -1;
+               }
+       } while (adapter->renegotiate);
+
+       return 0;
+}
+
+static void release_resources(struct ibmvnic_adapter *adapter)
+{
+       release_bounce_buffer(adapter);
+       release_tx_pools(adapter);
+       release_rx_pools(adapter);
+
+       release_sub_crqs(adapter);
+       release_crq_queue(adapter);
+
+       release_stats_token(adapter);
+}
+
+static int ibmvnic_open(struct net_device *netdev)
+{
+       struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+       struct device *dev = &adapter->vdev->dev;
+       union ibmvnic_crq crq;
+       int rc = 0;
+       int i;
+
+       if (adapter->is_closed) {
+               rc = ibmvnic_init(adapter);
+               if (rc)
+                       return rc;
+       }
+
+       rc = ibmvnic_login(netdev);
+       if (rc)
+               return rc;
+
+       rc = netif_set_real_num_tx_queues(netdev, adapter->req_tx_queues);
+       if (rc) {
+               dev_err(dev, "failed to set the number of tx queues\n");
+               return -1;
+       }
+
+       rc = init_sub_crq_irqs(adapter);
+       if (rc) {
+               dev_err(dev, "failed to initialize sub crq irqs\n");
+               return -1;
+       }
+
+       adapter->map_id = 1;
+       adapter->napi = kcalloc(adapter->req_rx_queues,
+                               sizeof(struct napi_struct), GFP_KERNEL);
+       if (!adapter->napi)
+               goto ibmvnic_open_fail;
+       for (i = 0; i < adapter->req_rx_queues; i++) {
+               netif_napi_add(netdev, &adapter->napi[i], ibmvnic_poll,
+                              NAPI_POLL_WEIGHT);
+               napi_enable(&adapter->napi[i]);
        }
+
+       send_map_query(adapter);
+
+       rc = init_rx_pools(netdev);
+       if (rc)
+               goto ibmvnic_open_fail;
+
+       rc = init_tx_pools(netdev);
+       if (rc)
+               goto ibmvnic_open_fail;
+
+       rc = init_bounce_buffer(netdev);
+       if (rc)
+               goto ibmvnic_open_fail;
+
        replenish_pools(adapter);
 
        /* We're ready to receive frames, enable the sub-crq interrupts and
@@ -473,48 +663,20 @@ static int ibmvnic_open(struct net_device *netdev)
        ibmvnic_send_crq(adapter, &crq);
 
        netif_tx_start_all_queues(netdev);
+       adapter->is_closed = false;
 
        return 0;
 
-bounce_map_failed:
-       kfree(adapter->bounce_buffer);
-bounce_alloc_failed:
-       i = tx_subcrqs - 1;
-       kfree(adapter->tx_pool[i].free_map);
-tx_fm_alloc_failed:
-       free_long_term_buff(adapter, &adapter->tx_pool[i].long_term_buff);
-tx_ltb_alloc_failed:
-       kfree(adapter->tx_pool[i].tx_buff);
-tx_pool_alloc_failed:
-       for (j = 0; j < i; j++) {
-               kfree(adapter->tx_pool[j].tx_buff);
-               free_long_term_buff(adapter,
-                                   &adapter->tx_pool[j].long_term_buff);
-               kfree(adapter->tx_pool[j].free_map);
-       }
-       kfree(adapter->tx_pool);
-       adapter->tx_pool = NULL;
-tx_pool_arr_alloc_failed:
-       i = rxadd_subcrqs;
-rx_pool_alloc_failed:
-       for (j = 0; j < i; j++) {
-               free_rx_pool(adapter, &adapter->rx_pool[j]);
-               free_long_term_buff(adapter,
-                                   &adapter->rx_pool[j].long_term_buff);
-       }
-       kfree(adapter->rx_pool);
-       adapter->rx_pool = NULL;
-rx_pool_arr_alloc_failed:
+ibmvnic_open_fail:
        for (i = 0; i < adapter->req_rx_queues; i++)
                napi_disable(&adapter->napi[i]);
-alloc_napi_failed:
+       release_resources(adapter);
        return -ENOMEM;
 }
 
 static int ibmvnic_close(struct net_device *netdev)
 {
        struct ibmvnic_adapter *adapter = netdev_priv(netdev);
-       struct device *dev = &adapter->vdev->dev;
        union ibmvnic_crq crq;
        int i;
 
@@ -526,45 +688,16 @@ static int ibmvnic_close(struct net_device *netdev)
        if (!adapter->failover)
                netif_tx_stop_all_queues(netdev);
 
-       if (adapter->bounce_buffer) {
-               if (!dma_mapping_error(dev, adapter->bounce_buffer_dma)) {
-                       dma_unmap_single(&adapter->vdev->dev,
-                                        adapter->bounce_buffer_dma,
-                                        adapter->bounce_buffer_size,
-                                        DMA_BIDIRECTIONAL);
-                       adapter->bounce_buffer_dma = DMA_ERROR_CODE;
-               }
-               kfree(adapter->bounce_buffer);
-               adapter->bounce_buffer = NULL;
-       }
-
        memset(&crq, 0, sizeof(crq));
        crq.logical_link_state.first = IBMVNIC_CRQ_CMD;
        crq.logical_link_state.cmd = LOGICAL_LINK_STATE;
        crq.logical_link_state.link_state = IBMVNIC_LOGICAL_LNK_DN;
        ibmvnic_send_crq(adapter, &crq);
 
-       for (i = 0; i < be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs);
-            i++) {
-               kfree(adapter->tx_pool[i].tx_buff);
-               free_long_term_buff(adapter,
-                                   &adapter->tx_pool[i].long_term_buff);
-               kfree(adapter->tx_pool[i].free_map);
-       }
-       kfree(adapter->tx_pool);
-       adapter->tx_pool = NULL;
-
-       for (i = 0; i < be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs);
-            i++) {
-               free_rx_pool(adapter, &adapter->rx_pool[i]);
-               free_long_term_buff(adapter,
-                                   &adapter->rx_pool[i].long_term_buff);
-       }
-       kfree(adapter->rx_pool);
-       adapter->rx_pool = NULL;
+       release_resources(adapter);
 
+       adapter->is_closed = true;
        adapter->closing = false;
-
        return 0;
 }
 
@@ -1249,47 +1382,40 @@ static void release_sub_crqs(struct ibmvnic_adapter *adapter)
        int i;
 
        if (adapter->tx_scrq) {
-               for (i = 0; i < adapter->req_tx_queues; i++)
-                       if (adapter->tx_scrq[i]) {
+               for (i = 0; i < adapter->req_tx_queues; i++) {
+                       if (!adapter->tx_scrq[i])
+                               continue;
+
+                       if (adapter->tx_scrq[i]->irq) {
                                free_irq(adapter->tx_scrq[i]->irq,
                                         adapter->tx_scrq[i]);
                                irq_dispose_mapping(adapter->tx_scrq[i]->irq);
-                               release_sub_crq_queue(adapter,
-                                                     adapter->tx_scrq[i]);
+                               adapter->tx_scrq[i]->irq = 0;
                        }
+
+                       release_sub_crq_queue(adapter, adapter->tx_scrq[i]);
+               }
+
+               kfree(adapter->tx_scrq);
                adapter->tx_scrq = NULL;
        }
 
        if (adapter->rx_scrq) {
-               for (i = 0; i < adapter->req_rx_queues; i++)
-                       if (adapter->rx_scrq[i]) {
+               for (i = 0; i < adapter->req_rx_queues; i++) {
+                       if (!adapter->rx_scrq[i])
+                               continue;
+
+                       if (adapter->rx_scrq[i]->irq) {
                                free_irq(adapter->rx_scrq[i]->irq,
                                         adapter->rx_scrq[i]);
                                irq_dispose_mapping(adapter->rx_scrq[i]->irq);
-                               release_sub_crq_queue(adapter,
-                                                     adapter->rx_scrq[i]);
+                               adapter->rx_scrq[i]->irq = 0;
                        }
-               adapter->rx_scrq = NULL;
-       }
-}
-
-static void release_sub_crqs_no_irqs(struct ibmvnic_adapter *adapter)
-{
-       int i;
 
-       if (adapter->tx_scrq) {
-               for (i = 0; i < adapter->req_tx_queues; i++)
-                       if (adapter->tx_scrq[i])
-                               release_sub_crq_queue(adapter,
-                                                     adapter->tx_scrq[i]);
-               adapter->tx_scrq = NULL;
-       }
+                       release_sub_crq_queue(adapter, adapter->rx_scrq[i]);
+               }
 
-       if (adapter->rx_scrq) {
-               for (i = 0; i < adapter->req_rx_queues; i++)
-                       if (adapter->rx_scrq[i])
-                               release_sub_crq_queue(adapter,
-                                                     adapter->rx_scrq[i]);
+               kfree(adapter->rx_scrq);
                adapter->rx_scrq = NULL;
        }
 }
@@ -1485,7 +1611,7 @@ req_tx_irq_failed:
                free_irq(adapter->tx_scrq[j]->irq, adapter->tx_scrq[j]);
                irq_dispose_mapping(adapter->rx_scrq[j]->irq);
        }
-       release_sub_crqs_no_irqs(adapter);
+       release_sub_crqs(adapter);
        return rc;
 }
 
@@ -2240,80 +2366,29 @@ static void handle_error_info_rsp(union ibmvnic_crq *crq,
        kfree(error_buff);
 }
 
-static void handle_dump_size_rsp(union ibmvnic_crq *crq,
-                                struct ibmvnic_adapter *adapter)
+static void handle_error_indication(union ibmvnic_crq *crq,
+                                   struct ibmvnic_adapter *adapter)
 {
-       int len = be32_to_cpu(crq->request_dump_size_rsp.len);
+       int detail_len = be32_to_cpu(crq->error_indication.detail_error_sz);
        struct ibmvnic_inflight_cmd *inflight_cmd;
        struct device *dev = &adapter->vdev->dev;
-       union ibmvnic_crq newcrq;
+       struct ibmvnic_error_buff *error_buff;
+       union ibmvnic_crq new_crq;
        unsigned long flags;
 
-       /* allocate and map buffer */
-       adapter->dump_data = kmalloc(len, GFP_KERNEL);
-       if (!adapter->dump_data) {
-               complete(&adapter->fw_done);
-               return;
-       }
+       dev_err(dev, "Firmware reports %serror id %x, cause %d\n",
+               crq->error_indication.
+                   flags & IBMVNIC_FATAL_ERROR ? "FATAL " : "",
+               be32_to_cpu(crq->error_indication.error_id),
+               be16_to_cpu(crq->error_indication.error_cause));
 
-       adapter->dump_data_token = dma_map_single(dev, adapter->dump_data, len,
-                                                 DMA_FROM_DEVICE);
+       error_buff = kmalloc(sizeof(*error_buff), GFP_ATOMIC);
+       if (!error_buff)
+               return;
 
-       if (dma_mapping_error(dev, adapter->dump_data_token)) {
-               if (!firmware_has_feature(FW_FEATURE_CMO))
-                       dev_err(dev, "Couldn't map dump data\n");
-               kfree(adapter->dump_data);
-               complete(&adapter->fw_done);
-               return;
-       }
-
-       inflight_cmd = kmalloc(sizeof(*inflight_cmd), GFP_ATOMIC);
-       if (!inflight_cmd) {
-               dma_unmap_single(dev, adapter->dump_data_token, len,
-                                DMA_FROM_DEVICE);
-               kfree(adapter->dump_data);
-               complete(&adapter->fw_done);
-               return;
-       }
-
-       memset(&newcrq, 0, sizeof(newcrq));
-       newcrq.request_dump.first = IBMVNIC_CRQ_CMD;
-       newcrq.request_dump.cmd = REQUEST_DUMP;
-       newcrq.request_dump.ioba = cpu_to_be32(adapter->dump_data_token);
-       newcrq.request_dump.len = cpu_to_be32(adapter->dump_data_size);
-
-       memcpy(&inflight_cmd->crq, &newcrq, sizeof(newcrq));
-
-       spin_lock_irqsave(&adapter->inflight_lock, flags);
-       list_add_tail(&inflight_cmd->list, &adapter->inflight);
-       spin_unlock_irqrestore(&adapter->inflight_lock, flags);
-
-       ibmvnic_send_crq(adapter, &newcrq);
-}
-
-static void handle_error_indication(union ibmvnic_crq *crq,
-                                   struct ibmvnic_adapter *adapter)
-{
-       int detail_len = be32_to_cpu(crq->error_indication.detail_error_sz);
-       struct ibmvnic_inflight_cmd *inflight_cmd;
-       struct device *dev = &adapter->vdev->dev;
-       struct ibmvnic_error_buff *error_buff;
-       union ibmvnic_crq new_crq;
-       unsigned long flags;
-
-       dev_err(dev, "Firmware reports %serror id %x, cause %d\n",
-               crq->error_indication.
-                   flags & IBMVNIC_FATAL_ERROR ? "FATAL " : "",
-               be32_to_cpu(crq->error_indication.error_id),
-               be16_to_cpu(crq->error_indication.error_cause));
-
-       error_buff = kmalloc(sizeof(*error_buff), GFP_ATOMIC);
-       if (!error_buff)
-               return;
-
-       error_buff->buff = kmalloc(detail_len, GFP_ATOMIC);
-       if (!error_buff->buff) {
-               kfree(error_buff);
+       error_buff->buff = kmalloc(detail_len, GFP_ATOMIC);
+       if (!error_buff->buff) {
+               kfree(error_buff);
                return;
        }
 
@@ -2426,7 +2501,7 @@ static void handle_request_cap_rsp(union ibmvnic_crq *crq,
                         *req_value,
                         (long int)be64_to_cpu(crq->request_capability_rsp.
                                               number), name);
-               release_sub_crqs_no_irqs(adapter);
+               release_sub_crqs(adapter);
                *req_value = be64_to_cpu(crq->request_capability_rsp.number);
                init_sub_crqs(adapter, 1);
                return;
@@ -2471,7 +2546,6 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq,
        struct device *dev = &adapter->vdev->dev;
        struct ibmvnic_login_rsp_buffer *login_rsp = adapter->login_rsp_buf;
        struct ibmvnic_login_buffer *login = adapter->login_buf;
-       union ibmvnic_crq crq;
        int i;
 
        dma_unmap_single(dev, adapter->login_buf_token, adapter->login_buf_sz,
@@ -2506,11 +2580,6 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq,
        }
        complete(&adapter->init_done);
 
-       memset(&crq, 0, sizeof(crq));
-       crq.request_ras_comp_num.first = IBMVNIC_CRQ_CMD;
-       crq.request_ras_comp_num.cmd = REQUEST_RAS_COMP_NUM;
-       ibmvnic_send_crq(adapter, &crq);
-
        return 0;
 }
 
@@ -2746,476 +2815,6 @@ out:
        }
 }
 
-static void handle_control_ras_rsp(union ibmvnic_crq *crq,
-                                  struct ibmvnic_adapter *adapter)
-{
-       u8 correlator = crq->control_ras_rsp.correlator;
-       struct device *dev = &adapter->vdev->dev;
-       bool found = false;
-       int i;
-
-       if (crq->control_ras_rsp.rc.code) {
-               dev_warn(dev, "Control ras failed rc=%d\n",
-                        crq->control_ras_rsp.rc.code);
-               return;
-       }
-
-       for (i = 0; i < adapter->ras_comp_num; i++) {
-               if (adapter->ras_comps[i].correlator == correlator) {
-                       found = true;
-                       break;
-               }
-       }
-
-       if (!found) {
-               dev_warn(dev, "Correlator not found on control_ras_rsp\n");
-               return;
-       }
-
-       switch (crq->control_ras_rsp.op) {
-       case IBMVNIC_TRACE_LEVEL:
-               adapter->ras_comps[i].trace_level = crq->control_ras.level;
-               break;
-       case IBMVNIC_ERROR_LEVEL:
-               adapter->ras_comps[i].error_check_level =
-                   crq->control_ras.level;
-               break;
-       case IBMVNIC_TRACE_PAUSE:
-               adapter->ras_comp_int[i].paused = 1;
-               break;
-       case IBMVNIC_TRACE_RESUME:
-               adapter->ras_comp_int[i].paused = 0;
-               break;
-       case IBMVNIC_TRACE_ON:
-               adapter->ras_comps[i].trace_on = 1;
-               break;
-       case IBMVNIC_TRACE_OFF:
-               adapter->ras_comps[i].trace_on = 0;
-               break;
-       case IBMVNIC_CHG_TRACE_BUFF_SZ:
-               /* trace_buff_sz is 3 bytes, stuff it into an int */
-               ((u8 *)(&adapter->ras_comps[i].trace_buff_size))[0] = 0;
-               ((u8 *)(&adapter->ras_comps[i].trace_buff_size))[1] =
-                   crq->control_ras_rsp.trace_buff_sz[0];
-               ((u8 *)(&adapter->ras_comps[i].trace_buff_size))[2] =
-                   crq->control_ras_rsp.trace_buff_sz[1];
-               ((u8 *)(&adapter->ras_comps[i].trace_buff_size))[3] =
-                   crq->control_ras_rsp.trace_buff_sz[2];
-               break;
-       default:
-               dev_err(dev, "invalid op %d on control_ras_rsp",
-                       crq->control_ras_rsp.op);
-       }
-}
-
-static ssize_t trace_read(struct file *file, char __user *user_buf, size_t len,
-                         loff_t *ppos)
-{
-       struct ibmvnic_fw_comp_internal *ras_comp_int = file->private_data;
-       struct ibmvnic_adapter *adapter = ras_comp_int->adapter;
-       struct device *dev = &adapter->vdev->dev;
-       struct ibmvnic_fw_trace_entry *trace;
-       int num = ras_comp_int->num;
-       union ibmvnic_crq crq;
-       dma_addr_t trace_tok;
-
-       if (*ppos >= be32_to_cpu(adapter->ras_comps[num].trace_buff_size))
-               return 0;
-
-       trace =
-           dma_alloc_coherent(dev,
-                              be32_to_cpu(adapter->ras_comps[num].
-                                          trace_buff_size), &trace_tok,
-                              GFP_KERNEL);
-       if (!trace) {
-               dev_err(dev, "Couldn't alloc trace buffer\n");
-               return 0;
-       }
-
-       memset(&crq, 0, sizeof(crq));
-       crq.collect_fw_trace.first = IBMVNIC_CRQ_CMD;
-       crq.collect_fw_trace.cmd = COLLECT_FW_TRACE;
-       crq.collect_fw_trace.correlator = adapter->ras_comps[num].correlator;
-       crq.collect_fw_trace.ioba = cpu_to_be32(trace_tok);
-       crq.collect_fw_trace.len = adapter->ras_comps[num].trace_buff_size;
-
-       init_completion(&adapter->fw_done);
-       ibmvnic_send_crq(adapter, &crq);
-       wait_for_completion(&adapter->fw_done);
-
-       if (*ppos + len > be32_to_cpu(adapter->ras_comps[num].trace_buff_size))
-               len =
-                   be32_to_cpu(adapter->ras_comps[num].trace_buff_size) -
-                   *ppos;
-
-       copy_to_user(user_buf, &((u8 *)trace)[*ppos], len);
-
-       dma_free_coherent(dev,
-                         be32_to_cpu(adapter->ras_comps[num].trace_buff_size),
-                         trace, trace_tok);
-       *ppos += len;
-       return len;
-}
-
-static const struct file_operations trace_ops = {
-       .owner          = THIS_MODULE,
-       .open           = simple_open,
-       .read           = trace_read,
-};
-
-static ssize_t paused_read(struct file *file, char __user *user_buf, size_t len,
-                          loff_t *ppos)
-{
-       struct ibmvnic_fw_comp_internal *ras_comp_int = file->private_data;
-       struct ibmvnic_adapter *adapter = ras_comp_int->adapter;
-       int num = ras_comp_int->num;
-       char buff[5]; /*  1 or 0 plus \n and \0 */
-       int size;
-
-       size = sprintf(buff, "%d\n", adapter->ras_comp_int[num].paused);
-
-       if (*ppos >= size)
-               return 0;
-
-       copy_to_user(user_buf, buff, size);
-       *ppos += size;
-       return size;
-}
-
-static ssize_t paused_write(struct file *file, const char __user *user_buf,
-                           size_t len, loff_t *ppos)
-{
-       struct ibmvnic_fw_comp_internal *ras_comp_int = file->private_data;
-       struct ibmvnic_adapter *adapter = ras_comp_int->adapter;
-       int num = ras_comp_int->num;
-       union ibmvnic_crq crq;
-       unsigned long val;
-       char buff[9]; /* decimal max int plus \n and \0 */
-
-       copy_from_user(buff, user_buf, sizeof(buff));
-       val = kstrtoul(buff, 10, NULL);
-
-       adapter->ras_comp_int[num].paused = val ? 1 : 0;
-
-       memset(&crq, 0, sizeof(crq));
-       crq.control_ras.first = IBMVNIC_CRQ_CMD;
-       crq.control_ras.cmd = CONTROL_RAS;
-       crq.control_ras.correlator = adapter->ras_comps[num].correlator;
-       crq.control_ras.op = val ? IBMVNIC_TRACE_PAUSE : IBMVNIC_TRACE_RESUME;
-       ibmvnic_send_crq(adapter, &crq);
-
-       return len;
-}
-
-static const struct file_operations paused_ops = {
-       .owner          = THIS_MODULE,
-       .open           = simple_open,
-       .read           = paused_read,
-       .write          = paused_write,
-};
-
-static ssize_t tracing_read(struct file *file, char __user *user_buf,
-                           size_t len, loff_t *ppos)
-{
-       struct ibmvnic_fw_comp_internal *ras_comp_int = file->private_data;
-       struct ibmvnic_adapter *adapter = ras_comp_int->adapter;
-       int num = ras_comp_int->num;
-       char buff[5]; /*  1 or 0 plus \n and \0 */
-       int size;
-
-       size = sprintf(buff, "%d\n", adapter->ras_comps[num].trace_on);
-
-       if (*ppos >= size)
-               return 0;
-
-       copy_to_user(user_buf, buff, size);
-       *ppos += size;
-       return size;
-}
-
-static ssize_t tracing_write(struct file *file, const char __user *user_buf,
-                            size_t len, loff_t *ppos)
-{
-       struct ibmvnic_fw_comp_internal *ras_comp_int = file->private_data;
-       struct ibmvnic_adapter *adapter = ras_comp_int->adapter;
-       int num = ras_comp_int->num;
-       union ibmvnic_crq crq;
-       unsigned long val;
-       char buff[9]; /* decimal max int plus \n and \0 */
-
-       copy_from_user(buff, user_buf, sizeof(buff));
-       val = kstrtoul(buff, 10, NULL);
-
-       memset(&crq, 0, sizeof(crq));
-       crq.control_ras.first = IBMVNIC_CRQ_CMD;
-       crq.control_ras.cmd = CONTROL_RAS;
-       crq.control_ras.correlator = adapter->ras_comps[num].correlator;
-       crq.control_ras.op = val ? IBMVNIC_TRACE_ON : IBMVNIC_TRACE_OFF;
-
-       return len;
-}
-
-static const struct file_operations tracing_ops = {
-       .owner          = THIS_MODULE,
-       .open           = simple_open,
-       .read           = tracing_read,
-       .write          = tracing_write,
-};
-
-static ssize_t error_level_read(struct file *file, char __user *user_buf,
-                               size_t len, loff_t *ppos)
-{
-       struct ibmvnic_fw_comp_internal *ras_comp_int = file->private_data;
-       struct ibmvnic_adapter *adapter = ras_comp_int->adapter;
-       int num = ras_comp_int->num;
-       char buff[5]; /* decimal max char plus \n and \0 */
-       int size;
-
-       size = sprintf(buff, "%d\n", adapter->ras_comps[num].error_check_level);
-
-       if (*ppos >= size)
-               return 0;
-
-       copy_to_user(user_buf, buff, size);
-       *ppos += size;
-       return size;
-}
-
-static ssize_t error_level_write(struct file *file, const char __user *user_buf,
-                                size_t len, loff_t *ppos)
-{
-       struct ibmvnic_fw_comp_internal *ras_comp_int = file->private_data;
-       struct ibmvnic_adapter *adapter = ras_comp_int->adapter;
-       int num = ras_comp_int->num;
-       union ibmvnic_crq crq;
-       unsigned long val;
-       char buff[9]; /* decimal max int plus \n and \0 */
-
-       copy_from_user(buff, user_buf, sizeof(buff));
-       val = kstrtoul(buff, 10, NULL);
-
-       if (val > 9)
-               val = 9;
-
-       memset(&crq, 0, sizeof(crq));
-       crq.control_ras.first = IBMVNIC_CRQ_CMD;
-       crq.control_ras.cmd = CONTROL_RAS;
-       crq.control_ras.correlator = adapter->ras_comps[num].correlator;
-       crq.control_ras.op = IBMVNIC_ERROR_LEVEL;
-       crq.control_ras.level = val;
-       ibmvnic_send_crq(adapter, &crq);
-
-       return len;
-}
-
-static const struct file_operations error_level_ops = {
-       .owner          = THIS_MODULE,
-       .open           = simple_open,
-       .read           = error_level_read,
-       .write          = error_level_write,
-};
-
-static ssize_t trace_level_read(struct file *file, char __user *user_buf,
-                               size_t len, loff_t *ppos)
-{
-       struct ibmvnic_fw_comp_internal *ras_comp_int = file->private_data;
-       struct ibmvnic_adapter *adapter = ras_comp_int->adapter;
-       int num = ras_comp_int->num;
-       char buff[5]; /* decimal max char plus \n and \0 */
-       int size;
-
-       size = sprintf(buff, "%d\n", adapter->ras_comps[num].trace_level);
-       if (*ppos >= size)
-               return 0;
-
-       copy_to_user(user_buf, buff, size);
-       *ppos += size;
-       return size;
-}
-
-static ssize_t trace_level_write(struct file *file, const char __user *user_buf,
-                                size_t len, loff_t *ppos)
-{
-       struct ibmvnic_fw_comp_internal *ras_comp_int = file->private_data;
-       struct ibmvnic_adapter *adapter = ras_comp_int->adapter;
-       union ibmvnic_crq crq;
-       unsigned long val;
-       char buff[9]; /* decimal max int plus \n and \0 */
-
-       copy_from_user(buff, user_buf, sizeof(buff));
-       val = kstrtoul(buff, 10, NULL);
-       if (val > 9)
-               val = 9;
-
-       memset(&crq, 0, sizeof(crq));
-       crq.control_ras.first = IBMVNIC_CRQ_CMD;
-       crq.control_ras.cmd = CONTROL_RAS;
-       crq.control_ras.correlator =
-           adapter->ras_comps[ras_comp_int->num].correlator;
-       crq.control_ras.op = IBMVNIC_TRACE_LEVEL;
-       crq.control_ras.level = val;
-       ibmvnic_send_crq(adapter, &crq);
-
-       return len;
-}
-
-static const struct file_operations trace_level_ops = {
-       .owner          = THIS_MODULE,
-       .open           = simple_open,
-       .read           = trace_level_read,
-       .write          = trace_level_write,
-};
-
-static ssize_t trace_buff_size_read(struct file *file, char __user *user_buf,
-                                   size_t len, loff_t *ppos)
-{
-       struct ibmvnic_fw_comp_internal *ras_comp_int = file->private_data;
-       struct ibmvnic_adapter *adapter = ras_comp_int->adapter;
-       int num = ras_comp_int->num;
-       char buff[9]; /* decimal max int plus \n and \0 */
-       int size;
-
-       size = sprintf(buff, "%d\n", adapter->ras_comps[num].trace_buff_size);
-       if (*ppos >= size)
-               return 0;
-
-       copy_to_user(user_buf, buff, size);
-       *ppos += size;
-       return size;
-}
-
-static ssize_t trace_buff_size_write(struct file *file,
-                                    const char __user *user_buf, size_t len,
-                                    loff_t *ppos)
-{
-       struct ibmvnic_fw_comp_internal *ras_comp_int = file->private_data;
-       struct ibmvnic_adapter *adapter = ras_comp_int->adapter;
-       union ibmvnic_crq crq;
-       unsigned long val;
-       char buff[9]; /* decimal max int plus \n and \0 */
-
-       copy_from_user(buff, user_buf, sizeof(buff));
-       val = kstrtoul(buff, 10, NULL);
-
-       memset(&crq, 0, sizeof(crq));
-       crq.control_ras.first = IBMVNIC_CRQ_CMD;
-       crq.control_ras.cmd = CONTROL_RAS;
-       crq.control_ras.correlator =
-           adapter->ras_comps[ras_comp_int->num].correlator;
-       crq.control_ras.op = IBMVNIC_CHG_TRACE_BUFF_SZ;
-       /* trace_buff_sz is 3 bytes, stuff an int into it */
-       crq.control_ras.trace_buff_sz[0] = ((u8 *)(&val))[5];
-       crq.control_ras.trace_buff_sz[1] = ((u8 *)(&val))[6];
-       crq.control_ras.trace_buff_sz[2] = ((u8 *)(&val))[7];
-       ibmvnic_send_crq(adapter, &crq);
-
-       return len;
-}
-
-static const struct file_operations trace_size_ops = {
-       .owner          = THIS_MODULE,
-       .open           = simple_open,
-       .read           = trace_buff_size_read,
-       .write          = trace_buff_size_write,
-};
-
-static void handle_request_ras_comps_rsp(union ibmvnic_crq *crq,
-                                        struct ibmvnic_adapter *adapter)
-{
-       struct device *dev = &adapter->vdev->dev;
-       struct dentry *dir_ent;
-       struct dentry *ent;
-       int i;
-
-       debugfs_remove_recursive(adapter->ras_comps_ent);
-
-       adapter->ras_comps_ent = debugfs_create_dir("ras_comps",
-                                                   adapter->debugfs_dir);
-       if (!adapter->ras_comps_ent || IS_ERR(adapter->ras_comps_ent)) {
-               dev_info(dev, "debugfs create ras_comps dir failed\n");
-               return;
-       }
-
-       for (i = 0; i < adapter->ras_comp_num; i++) {
-               dir_ent = debugfs_create_dir(adapter->ras_comps[i].name,
-                                            adapter->ras_comps_ent);
-               if (!dir_ent || IS_ERR(dir_ent)) {
-                       dev_info(dev, "debugfs create %s dir failed\n",
-                                adapter->ras_comps[i].name);
-                       continue;
-               }
-
-               adapter->ras_comp_int[i].adapter = adapter;
-               adapter->ras_comp_int[i].num = i;
-               adapter->ras_comp_int[i].desc_blob.data =
-                   &adapter->ras_comps[i].description;
-               adapter->ras_comp_int[i].desc_blob.size =
-                   sizeof(adapter->ras_comps[i].description);
-
-               /* Don't need to remember the dentry's because the debugfs dir
-                * gets removed recursively
-                */
-               ent = debugfs_create_blob("description", S_IRUGO, dir_ent,
-                                         &adapter->ras_comp_int[i].desc_blob);
-               ent = debugfs_create_file("trace_buf_size", S_IRUGO | S_IWUSR,
-                                         dir_ent, &adapter->ras_comp_int[i],
-                                         &trace_size_ops);
-               ent = debugfs_create_file("trace_level",
-                                         S_IRUGO |
-                                         (adapter->ras_comps[i].trace_level !=
-                                          0xFF  ? S_IWUSR : 0),
-                                          dir_ent, &adapter->ras_comp_int[i],
-                                          &trace_level_ops);
-               ent = debugfs_create_file("error_level",
-                                         S_IRUGO |
-                                         (adapter->
-                                          ras_comps[i].error_check_level !=
-                                          0xFF ? S_IWUSR : 0),
-                                         dir_ent, &adapter->ras_comp_int[i],
-                                         &trace_level_ops);
-               ent = debugfs_create_file("tracing", S_IRUGO | S_IWUSR,
-                                         dir_ent, &adapter->ras_comp_int[i],
-                                         &tracing_ops);
-               ent = debugfs_create_file("paused", S_IRUGO | S_IWUSR,
-                                         dir_ent, &adapter->ras_comp_int[i],
-                                         &paused_ops);
-               ent = debugfs_create_file("trace", S_IRUGO, dir_ent,
-                                         &adapter->ras_comp_int[i],
-                                         &trace_ops);
-       }
-}
-
-static void handle_request_ras_comp_num_rsp(union ibmvnic_crq *crq,
-                                           struct ibmvnic_adapter *adapter)
-{
-       int len = adapter->ras_comp_num * sizeof(struct ibmvnic_fw_component);
-       struct device *dev = &adapter->vdev->dev;
-       union ibmvnic_crq newcrq;
-
-       adapter->ras_comps = dma_alloc_coherent(dev, len,
-                                               &adapter->ras_comps_tok,
-                                               GFP_KERNEL);
-       if (!adapter->ras_comps) {
-               if (!firmware_has_feature(FW_FEATURE_CMO))
-                       dev_err(dev, "Couldn't alloc fw comps buffer\n");
-               return;
-       }
-
-       adapter->ras_comp_int = kmalloc(adapter->ras_comp_num *
-                                       sizeof(struct ibmvnic_fw_comp_internal),
-                                       GFP_KERNEL);
-       if (!adapter->ras_comp_int)
-               dma_free_coherent(dev, len, adapter->ras_comps,
-                                 adapter->ras_comps_tok);
-
-       memset(&newcrq, 0, sizeof(newcrq));
-       newcrq.request_ras_comps.first = IBMVNIC_CRQ_CMD;
-       newcrq.request_ras_comps.cmd = REQUEST_RAS_COMPS;
-       newcrq.request_ras_comps.ioba = cpu_to_be32(adapter->ras_comps_tok);
-       newcrq.request_ras_comps.len = cpu_to_be32(len);
-       ibmvnic_send_crq(adapter, &newcrq);
-}
-
 static void ibmvnic_free_inflight(struct ibmvnic_adapter *adapter)
 {
        struct ibmvnic_inflight_cmd *inflight_cmd, *tmp1;
@@ -3237,9 +2836,6 @@ static void ibmvnic_free_inflight(struct ibmvnic_adapter *adapter)
                        kfree(adapter->login_rsp_buf);
                        kfree(adapter->login_buf);
                        break;
-               case REQUEST_DUMP:
-                       complete(&adapter->fw_done);
-                       break;
                case REQUEST_ERROR_INFO:
                        spin_lock_irqsave(&adapter->error_list_lock, flags2);
                        list_for_each_entry_safe(error_buff, tmp2,
@@ -3399,14 +2995,6 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq,
                netdev_dbg(netdev, "Got Statistics Response\n");
                complete(&adapter->stats_done);
                break;
-       case REQUEST_DUMP_SIZE_RSP:
-               netdev_dbg(netdev, "Got Request Dump Size Response\n");
-               handle_dump_size_rsp(crq, adapter);
-               break;
-       case REQUEST_DUMP_RSP:
-               netdev_dbg(netdev, "Got Request Dump Response\n");
-               complete(&adapter->fw_done);
-               break;
        case QUERY_IP_OFFLOAD_RSP:
                netdev_dbg(netdev, "Got Query IP offload Response\n");
                handle_query_ip_offload_rsp(adapter);
@@ -3419,26 +3007,7 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq,
                dma_unmap_single(dev, adapter->ip_offload_ctrl_tok,
                                 sizeof(adapter->ip_offload_ctrl),
                                 DMA_TO_DEVICE);
-               /* We're done with the queries, perform the login */
-               send_login(adapter);
-               break;
-       case REQUEST_RAS_COMP_NUM_RSP:
-               netdev_dbg(netdev, "Got Request RAS Comp Num Response\n");
-               if (crq->request_ras_comp_num_rsp.rc.code == 10) {
-                       netdev_dbg(netdev, "Request RAS Comp Num not supported\n");
-                       break;
-               }
-               adapter->ras_comp_num =
-                   be32_to_cpu(crq->request_ras_comp_num_rsp.num_components);
-               handle_request_ras_comp_num_rsp(crq, adapter);
-               break;
-       case REQUEST_RAS_COMPS_RSP:
-               netdev_dbg(netdev, "Got Request RAS Comps Response\n");
-               handle_request_ras_comps_rsp(crq, adapter);
-               break;
-       case CONTROL_RAS_RSP:
-               netdev_dbg(netdev, "Got Control RAS Response\n");
-               handle_control_ras_rsp(crq, adapter);
+               complete(&adapter->init_done);
                break;
        case COLLECT_FW_TRACE_RSP:
                netdev_dbg(netdev, "Got Collect firmware trace Response\n");
@@ -3545,12 +3114,15 @@ static int ibmvnic_reset_crq(struct ibmvnic_adapter *adapter)
        return rc;
 }
 
-static void ibmvnic_release_crq_queue(struct ibmvnic_adapter *adapter)
+static void release_crq_queue(struct ibmvnic_adapter *adapter)
 {
        struct ibmvnic_crq_queue *crq = &adapter->crq;
        struct vio_dev *vdev = adapter->vdev;
        long rc;
 
+       if (!crq->msgs)
+               return;
+
        netdev_dbg(adapter->netdev, "Releasing CRQ\n");
        free_irq(vdev->irq, adapter);
        tasklet_kill(&adapter->tasklet);
@@ -3561,15 +3133,19 @@ static void ibmvnic_release_crq_queue(struct ibmvnic_adapter *adapter)
        dma_unmap_single(&vdev->dev, crq->msg_token, PAGE_SIZE,
                         DMA_BIDIRECTIONAL);
        free_page((unsigned long)crq->msgs);
+       crq->msgs = NULL;
 }
 
-static int ibmvnic_init_crq_queue(struct ibmvnic_adapter *adapter)
+static int init_crq_queue(struct ibmvnic_adapter *adapter)
 {
        struct ibmvnic_crq_queue *crq = &adapter->crq;
        struct device *dev = &adapter->vdev->dev;
        struct vio_dev *vdev = adapter->vdev;
        int rc, retrc = -ENOMEM;
 
+       if (crq->msgs)
+               return 0;
+
        crq->msgs = (union ibmvnic_crq *)get_zeroed_page(GFP_KERNEL);
        /* Should we allocate more than one page? */
 
@@ -3631,48 +3207,10 @@ reg_crq_failed:
        dma_unmap_single(dev, crq->msg_token, PAGE_SIZE, DMA_BIDIRECTIONAL);
 map_failed:
        free_page((unsigned long)crq->msgs);
+       crq->msgs = NULL;
        return retrc;
 }
 
-/* debugfs for dump */
-static int ibmvnic_dump_show(struct seq_file *seq, void *v)
-{
-       struct net_device *netdev = seq->private;
-       struct ibmvnic_adapter *adapter = netdev_priv(netdev);
-       struct device *dev = &adapter->vdev->dev;
-       union ibmvnic_crq crq;
-
-       memset(&crq, 0, sizeof(crq));
-       crq.request_dump_size.first = IBMVNIC_CRQ_CMD;
-       crq.request_dump_size.cmd = REQUEST_DUMP_SIZE;
-
-       init_completion(&adapter->fw_done);
-       ibmvnic_send_crq(adapter, &crq);
-       wait_for_completion(&adapter->fw_done);
-
-       seq_write(seq, adapter->dump_data, adapter->dump_data_size);
-
-       dma_unmap_single(dev, adapter->dump_data_token, adapter->dump_data_size,
-                        DMA_BIDIRECTIONAL);
-
-       kfree(adapter->dump_data);
-
-       return 0;
-}
-
-static int ibmvnic_dump_open(struct inode *inode, struct file *file)
-{
-       return single_open(file, ibmvnic_dump_show, inode->i_private);
-}
-
-static const struct file_operations ibmvnic_dump_ops = {
-       .owner          = THIS_MODULE,
-       .open           = ibmvnic_dump_open,
-       .read           = seq_read,
-       .llseek         = seq_lseek,
-       .release        = single_release,
-};
-
 static void handle_crq_init_rsp(struct work_struct *work)
 {
        struct ibmvnic_adapter *adapter = container_of(work,
@@ -3700,26 +3238,6 @@ static void handle_crq_init_rsp(struct work_struct *work)
                goto task_failed;
        }
 
-       do {
-               if (adapter->renegotiate) {
-                       adapter->renegotiate = false;
-                       release_sub_crqs_no_irqs(adapter);
-
-                       reinit_completion(&adapter->init_done);
-                       send_cap_queries(adapter);
-                       if (!wait_for_completion_timeout(&adapter->init_done,
-                                                        timeout)) {
-                               dev_err(dev, "Passive init timeout\n");
-                               goto task_failed;
-                       }
-               }
-       } while (adapter->renegotiate);
-       rc = init_sub_crq_irqs(adapter);
-
-       if (rc)
-               goto task_failed;
-
-       netdev->real_num_tx_queues = adapter->req_tx_queues;
        netdev->mtu = adapter->req_mtu - ETH_HLEN;
 
        if (adapter->failover) {
@@ -3751,14 +3269,40 @@ task_failed:
        dev_err(dev, "Passive initialization was not successful\n");
 }
 
-static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
+static int ibmvnic_init(struct ibmvnic_adapter *adapter)
 {
+       struct device *dev = &adapter->vdev->dev;
        unsigned long timeout = msecs_to_jiffies(30000);
+       int rc;
+
+       rc = init_crq_queue(adapter);
+       if (rc) {
+               dev_err(dev, "Couldn't initialize crq. rc=%d\n", rc);
+               return rc;
+       }
+
+       rc = init_stats_token(adapter);
+       if (rc) {
+               release_crq_queue(adapter);
+               return rc;
+       }
+
+       init_completion(&adapter->init_done);
+       ibmvnic_send_crq_init(adapter);
+       if (!wait_for_completion_timeout(&adapter->init_done, timeout)) {
+               dev_err(dev, "Initialization sequence timed out\n");
+               release_crq_queue(adapter);
+               return -1;
+       }
+
+       return 0;
+}
+
+static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
+{
        struct ibmvnic_adapter *adapter;
        struct net_device *netdev;
        unsigned char *mac_addr_p;
-       struct dentry *ent;
-       char buf[17]; /* debugfs name buf */
        int rc;
 
        dev_dbg(&dev->dev, "entering ibmvnic_probe for UA 0x%x\n",
@@ -3796,118 +3340,36 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
 
        spin_lock_init(&adapter->stats_lock);
 
-       rc = ibmvnic_init_crq_queue(adapter);
-       if (rc) {
-               dev_err(&dev->dev, "Couldn't initialize crq. rc=%d\n", rc);
-               goto free_netdev;
-       }
-
        INIT_LIST_HEAD(&adapter->errors);
        INIT_LIST_HEAD(&adapter->inflight);
        spin_lock_init(&adapter->error_list_lock);
        spin_lock_init(&adapter->inflight_lock);
 
-       adapter->stats_token = dma_map_single(&dev->dev, &adapter->stats,
-                                             sizeof(struct ibmvnic_statistics),
-                                             DMA_FROM_DEVICE);
-       if (dma_mapping_error(&dev->dev, adapter->stats_token)) {
-               if (!firmware_has_feature(FW_FEATURE_CMO))
-                       dev_err(&dev->dev, "Couldn't map stats buffer\n");
-               rc = -ENOMEM;
-               goto free_crq;
-       }
-
-       snprintf(buf, sizeof(buf), "ibmvnic_%x", dev->unit_address);
-       ent = debugfs_create_dir(buf, NULL);
-       if (!ent || IS_ERR(ent)) {
-               dev_info(&dev->dev, "debugfs create directory failed\n");
-               adapter->debugfs_dir = NULL;
-       } else {
-               adapter->debugfs_dir = ent;
-               ent = debugfs_create_file("dump", S_IRUGO, adapter->debugfs_dir,
-                                         netdev, &ibmvnic_dump_ops);
-               if (!ent || IS_ERR(ent)) {
-                       dev_info(&dev->dev,
-                                "debugfs create dump file failed\n");
-                       adapter->debugfs_dump = NULL;
-               } else {
-                       adapter->debugfs_dump = ent;
-               }
-       }
-
-       init_completion(&adapter->init_done);
-       ibmvnic_send_crq_init(adapter);
-       if (!wait_for_completion_timeout(&adapter->init_done, timeout))
-               return 0;
-
-       do {
-               if (adapter->renegotiate) {
-                       adapter->renegotiate = false;
-                       release_sub_crqs_no_irqs(adapter);
-
-                       reinit_completion(&adapter->init_done);
-                       send_cap_queries(adapter);
-                       if (!wait_for_completion_timeout(&adapter->init_done,
-                                                        timeout))
-                               return 0;
-               }
-       } while (adapter->renegotiate);
-
-       rc = init_sub_crq_irqs(adapter);
+       rc = ibmvnic_init(adapter);
        if (rc) {
-               dev_err(&dev->dev, "failed to initialize sub crq irqs\n");
-               goto free_debugfs;
+               free_netdev(netdev);
+               return rc;
        }
 
-       netdev->real_num_tx_queues = adapter->req_tx_queues;
        netdev->mtu = adapter->req_mtu - ETH_HLEN;
+       adapter->is_closed = false;
 
        rc = register_netdev(netdev);
        if (rc) {
                dev_err(&dev->dev, "failed to register netdev rc=%d\n", rc);
-               goto free_sub_crqs;
+               free_netdev(netdev);
+               return rc;
        }
        dev_info(&dev->dev, "ibmvnic registered\n");
 
        return 0;
-
-free_sub_crqs:
-       release_sub_crqs(adapter);
-free_debugfs:
-       if (adapter->debugfs_dir && !IS_ERR(adapter->debugfs_dir))
-               debugfs_remove_recursive(adapter->debugfs_dir);
-free_crq:
-       ibmvnic_release_crq_queue(adapter);
-free_netdev:
-       free_netdev(netdev);
-       return rc;
 }
 
 static int ibmvnic_remove(struct vio_dev *dev)
 {
        struct net_device *netdev = dev_get_drvdata(&dev->dev);
-       struct ibmvnic_adapter *adapter = netdev_priv(netdev);
 
        unregister_netdev(netdev);
-
-       release_sub_crqs(adapter);
-
-       ibmvnic_release_crq_queue(adapter);
-
-       if (adapter->debugfs_dir && !IS_ERR(adapter->debugfs_dir))
-               debugfs_remove_recursive(adapter->debugfs_dir);
-
-       dma_unmap_single(&dev->dev, adapter->stats_token,
-                        sizeof(struct ibmvnic_statistics), DMA_FROM_DEVICE);
-
-       if (adapter->ras_comps)
-               dma_free_coherent(&dev->dev,
-                                 adapter->ras_comp_num *
-                                 sizeof(struct ibmvnic_fw_component),
-                                 adapter->ras_comps, adapter->ras_comps_tok);
-
-       kfree(adapter->ras_comp_int);
-
        free_netdev(netdev);
        dev_set_drvdata(&dev->dev, NULL);
 
index 1993b42666f73d659773b6b88bcd8e8552ac97b7..b0d0b890d033a4d7d03632b1daa97798a3db5237 100644 (file)
@@ -772,20 +772,10 @@ enum ibmvnic_commands {
        ERROR_INDICATION = 0x08,
        REQUEST_ERROR_INFO = 0x09,
        REQUEST_ERROR_RSP = 0x89,
-       REQUEST_DUMP_SIZE = 0x0A,
-       REQUEST_DUMP_SIZE_RSP = 0x8A,
-       REQUEST_DUMP = 0x0B,
-       REQUEST_DUMP_RSP = 0x8B,
        LOGICAL_LINK_STATE = 0x0C,
        LOGICAL_LINK_STATE_RSP = 0x8C,
        REQUEST_STATISTICS = 0x0D,
        REQUEST_STATISTICS_RSP = 0x8D,
-       REQUEST_RAS_COMP_NUM = 0x0E,
-       REQUEST_RAS_COMP_NUM_RSP = 0x8E,
-       REQUEST_RAS_COMPS = 0x0F,
-       REQUEST_RAS_COMPS_RSP = 0x8F,
-       CONTROL_RAS = 0x10,
-       CONTROL_RAS_RSP = 0x90,
        COLLECT_FW_TRACE = 0x11,
        COLLECT_FW_TRACE_RSP = 0x91,
        LINK_STATE_INDICATION = 0x12,
@@ -806,8 +796,6 @@ enum ibmvnic_commands {
        ACL_CHANGE_INDICATION = 0x1A,
        ACL_QUERY = 0x1B,
        ACL_QUERY_RSP = 0x9B,
-       REQUEST_DEBUG_STATS = 0x1C,
-       REQUEST_DEBUG_STATS_RSP = 0x9C,
        QUERY_MAP = 0x1D,
        QUERY_MAP_RSP = 0x9D,
        REQUEST_MAP = 0x1E,
@@ -925,13 +913,6 @@ struct ibmvnic_error_buff {
        __be32 error_id;
 };
 
-struct ibmvnic_fw_comp_internal {
-       struct ibmvnic_adapter *adapter;
-       int num;
-       struct debugfs_blob_wrapper desc_blob;
-       int paused;
-};
-
 struct ibmvnic_inflight_cmd {
        union ibmvnic_crq crq;
        struct list_head list;
@@ -953,7 +934,6 @@ struct ibmvnic_adapter {
        dma_addr_t bounce_buffer_dma;
 
        /* Statistics */
-       struct net_device_stats net_stats;
        struct ibmvnic_statistics stats;
        dma_addr_t stats_token;
        struct completion stats_done;
@@ -996,18 +976,7 @@ struct ibmvnic_adapter {
        struct list_head errors;
        spinlock_t error_list_lock;
 
-       /* debugfs */
-       struct dentry *debugfs_dir;
-       struct dentry *debugfs_dump;
        struct completion fw_done;
-       char *dump_data;
-       dma_addr_t dump_data_token;
-       int dump_data_size;
-       int ras_comp_num;
-       struct ibmvnic_fw_component *ras_comps;
-       struct ibmvnic_fw_comp_internal *ras_comp_int;
-       dma_addr_t ras_comps_tok;
-       struct dentry *ras_comps_ent;
 
        /* in-flight commands that allocate and/or map memory*/
        struct list_head inflight;
@@ -1052,4 +1021,5 @@ struct ibmvnic_adapter {
        struct work_struct ibmvnic_xport;
        struct tasklet_struct tasklet;
        bool failover;
+       bool is_closed;
 };
index 1349b45f014dd1d5ce29fa9a9f38189bca2cbe3d..1542a2158e962d66915d6330e6d94fb215735b5c 100644 (file)
@@ -235,17 +235,6 @@ config I40E_DCB
 
          If unsure, say N.
 
-config I40E_FCOE
-       bool "Fibre Channel over Ethernet (FCoE)"
-       default n
-       depends on I40E && DCB && FCOE
-       ---help---
-         Say Y here if you want to use Fibre Channel over Ethernet (FCoE)
-         in the driver. This will create new netdev for exclusive FCoE
-         use with XL710 FCoE offloads enabled.
-
-         If unsure, say N.
-
 config I40EVF
        tristate "Intel(R) XL710 X710 Virtual Function Ethernet support"
        depends on PCI_MSI
index 975eeb885ca2b52bb6a3f28e2b95d140cdf47777..ec8aa4562cc90a90dff844872278722b24daec3c 100644 (file)
@@ -103,104 +103,104 @@ static const char e1000_gstrings_test[][ETH_GSTRING_LEN] = {
 
 #define E1000_TEST_LEN ARRAY_SIZE(e1000_gstrings_test)
 
-static int e1000_get_settings(struct net_device *netdev,
-                             struct ethtool_cmd *ecmd)
+static int e1000_get_link_ksettings(struct net_device *netdev,
+                                   struct ethtool_link_ksettings *cmd)
 {
        struct e1000_adapter *adapter = netdev_priv(netdev);
        struct e1000_hw *hw = &adapter->hw;
+       u32 supported, advertising;
 
        if (hw->media_type == e1000_media_type_copper) {
-               ecmd->supported = (SUPPORTED_10baseT_Half |
-                                  SUPPORTED_10baseT_Full |
-                                  SUPPORTED_100baseT_Half |
-                                  SUPPORTED_100baseT_Full |
-                                  SUPPORTED_1000baseT_Full|
-                                  SUPPORTED_Autoneg |
-                                  SUPPORTED_TP);
-               ecmd->advertising = ADVERTISED_TP;
+               supported = (SUPPORTED_10baseT_Half |
+                            SUPPORTED_10baseT_Full |
+                            SUPPORTED_100baseT_Half |
+                            SUPPORTED_100baseT_Full |
+                            SUPPORTED_1000baseT_Full|
+                            SUPPORTED_Autoneg |
+                            SUPPORTED_TP);
+               advertising = ADVERTISED_TP;
 
                if (hw->autoneg == 1) {
-                       ecmd->advertising |= ADVERTISED_Autoneg;
+                       advertising |= ADVERTISED_Autoneg;
                        /* the e1000 autoneg seems to match ethtool nicely */
-                       ecmd->advertising |= hw->autoneg_advertised;
+                       advertising |= hw->autoneg_advertised;
                }
 
-               ecmd->port = PORT_TP;
-               ecmd->phy_address = hw->phy_addr;
-
-               if (hw->mac_type == e1000_82543)
-                       ecmd->transceiver = XCVR_EXTERNAL;
-               else
-                       ecmd->transceiver = XCVR_INTERNAL;
-
+               cmd->base.port = PORT_TP;
+               cmd->base.phy_address = hw->phy_addr;
        } else {
-               ecmd->supported   = (SUPPORTED_1000baseT_Full |
-                                    SUPPORTED_FIBRE |
-                                    SUPPORTED_Autoneg);
+               supported   = (SUPPORTED_1000baseT_Full |
+                              SUPPORTED_FIBRE |
+                              SUPPORTED_Autoneg);
 
-               ecmd->advertising = (ADVERTISED_1000baseT_Full |
-                                    ADVERTISED_FIBRE |
-                                    ADVERTISED_Autoneg);
+               advertising = (ADVERTISED_1000baseT_Full |
+                              ADVERTISED_FIBRE |
+                              ADVERTISED_Autoneg);
 
-               ecmd->port = PORT_FIBRE;
-
-               if (hw->mac_type >= e1000_82545)
-                       ecmd->transceiver = XCVR_INTERNAL;
-               else
-                       ecmd->transceiver = XCVR_EXTERNAL;
+               cmd->base.port = PORT_FIBRE;
        }
 
        if (er32(STATUS) & E1000_STATUS_LU) {
                e1000_get_speed_and_duplex(hw, &adapter->link_speed,
                                           &adapter->link_duplex);
-               ethtool_cmd_speed_set(ecmd, adapter->link_speed);
+               cmd->base.speed = adapter->link_speed;
 
                /* unfortunately FULL_DUPLEX != DUPLEX_FULL
                 * and HALF_DUPLEX != DUPLEX_HALF
                 */
                if (adapter->link_duplex == FULL_DUPLEX)
-                       ecmd->duplex = DUPLEX_FULL;
+                       cmd->base.duplex = DUPLEX_FULL;
                else
-                       ecmd->duplex = DUPLEX_HALF;
+                       cmd->base.duplex = DUPLEX_HALF;
        } else {
-               ethtool_cmd_speed_set(ecmd, SPEED_UNKNOWN);
-               ecmd->duplex = DUPLEX_UNKNOWN;
+               cmd->base.speed = SPEED_UNKNOWN;
+               cmd->base.duplex = DUPLEX_UNKNOWN;
        }
 
-       ecmd->autoneg = ((hw->media_type == e1000_media_type_fiber) ||
+       cmd->base.autoneg = ((hw->media_type == e1000_media_type_fiber) ||
                         hw->autoneg) ? AUTONEG_ENABLE : AUTONEG_DISABLE;
 
        /* MDI-X => 1; MDI => 0 */
        if ((hw->media_type == e1000_media_type_copper) &&
            netif_carrier_ok(netdev))
-               ecmd->eth_tp_mdix = (!!adapter->phy_info.mdix_mode ?
+               cmd->base.eth_tp_mdix = (!!adapter->phy_info.mdix_mode ?
                                     ETH_TP_MDI_X : ETH_TP_MDI);
        else
-               ecmd->eth_tp_mdix = ETH_TP_MDI_INVALID;
+               cmd->base.eth_tp_mdix = ETH_TP_MDI_INVALID;
 
        if (hw->mdix == AUTO_ALL_MODES)
-               ecmd->eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO;
+               cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO;
        else
-               ecmd->eth_tp_mdix_ctrl = hw->mdix;
+               cmd->base.eth_tp_mdix_ctrl = hw->mdix;
+
+       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+                                               supported);
+       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+                                               advertising);
+
        return 0;
 }
 
-static int e1000_set_settings(struct net_device *netdev,
-                             struct ethtool_cmd *ecmd)
+static int e1000_set_link_ksettings(struct net_device *netdev,
+                                   const struct ethtool_link_ksettings *cmd)
 {
        struct e1000_adapter *adapter = netdev_priv(netdev);
        struct e1000_hw *hw = &adapter->hw;
+       u32 advertising;
+
+       ethtool_convert_link_mode_to_legacy_u32(&advertising,
+                                               cmd->link_modes.advertising);
 
        /* MDI setting is only allowed when autoneg enabled because
         * some hardware doesn't allow MDI setting when speed or
         * duplex is forced.
         */
-       if (ecmd->eth_tp_mdix_ctrl) {
+       if (cmd->base.eth_tp_mdix_ctrl) {
                if (hw->media_type != e1000_media_type_copper)
                        return -EOPNOTSUPP;
 
-               if ((ecmd->eth_tp_mdix_ctrl != ETH_TP_MDI_AUTO) &&
-                   (ecmd->autoneg != AUTONEG_ENABLE)) {
+               if ((cmd->base.eth_tp_mdix_ctrl != ETH_TP_MDI_AUTO) &&
+                   (cmd->base.autoneg != AUTONEG_ENABLE)) {
                        e_err(drv, "forcing MDI/MDI-X state is not supported when link speed and/or duplex are forced\n");
                        return -EINVAL;
                }
@@ -209,32 +209,31 @@ static int e1000_set_settings(struct net_device *netdev,
        while (test_and_set_bit(__E1000_RESETTING, &adapter->flags))
                msleep(1);
 
-       if (ecmd->autoneg == AUTONEG_ENABLE) {
+       if (cmd->base.autoneg == AUTONEG_ENABLE) {
                hw->autoneg = 1;
                if (hw->media_type == e1000_media_type_fiber)
                        hw->autoneg_advertised = ADVERTISED_1000baseT_Full |
-                                    ADVERTISED_FIBRE |
-                                    ADVERTISED_Autoneg;
+                                                ADVERTISED_FIBRE |
+                                                ADVERTISED_Autoneg;
                else
-                       hw->autoneg_advertised = ecmd->advertising |
+                       hw->autoneg_advertised = advertising |
                                                 ADVERTISED_TP |
                                                 ADVERTISED_Autoneg;
-               ecmd->advertising = hw->autoneg_advertised;
        } else {
-               u32 speed = ethtool_cmd_speed(ecmd);
+               u32 speed = cmd->base.speed;
                /* calling this overrides forced MDI setting */
-               if (e1000_set_spd_dplx(adapter, speed, ecmd->duplex)) {
+               if (e1000_set_spd_dplx(adapter, speed, cmd->base.duplex)) {
                        clear_bit(__E1000_RESETTING, &adapter->flags);
                        return -EINVAL;
                }
        }
 
        /* MDI-X => 2; MDI => 1; Auto => 3 */
-       if (ecmd->eth_tp_mdix_ctrl) {
-               if (ecmd->eth_tp_mdix_ctrl == ETH_TP_MDI_AUTO)
+       if (cmd->base.eth_tp_mdix_ctrl) {
+               if (cmd->base.eth_tp_mdix_ctrl == ETH_TP_MDI_AUTO)
                        hw->mdix = AUTO_ALL_MODES;
                else
-                       hw->mdix = ecmd->eth_tp_mdix_ctrl;
+                       hw->mdix = cmd->base.eth_tp_mdix_ctrl;
        }
 
        /* reset the link */
@@ -1875,8 +1874,6 @@ static void e1000_get_strings(struct net_device *netdev, u32 stringset,
 }
 
 static const struct ethtool_ops e1000_ethtool_ops = {
-       .get_settings           = e1000_get_settings,
-       .set_settings           = e1000_set_settings,
        .get_drvinfo            = e1000_get_drvinfo,
        .get_regs_len           = e1000_get_regs_len,
        .get_regs               = e1000_get_regs,
@@ -1901,6 +1898,8 @@ static const struct ethtool_ops e1000_ethtool_ops = {
        .get_coalesce           = e1000_get_coalesce,
        .set_coalesce           = e1000_set_coalesce,
        .get_ts_info            = ethtool_op_get_ts_info,
+       .get_link_ksettings     = e1000_get_link_ksettings,
+       .set_link_ksettings     = e1000_set_link_ksettings,
 };
 
 void e1000_set_ethtool_ops(struct net_device *netdev)
index 7aff68a4a4df527d26c50da69d0cad2dd91c2767..e70b1ebff60df2d0a20a876c04613938b89432cc 100644 (file)
@@ -117,55 +117,52 @@ static const char e1000_gstrings_test[][ETH_GSTRING_LEN] = {
 
 #define E1000_TEST_LEN ARRAY_SIZE(e1000_gstrings_test)
 
-static int e1000_get_settings(struct net_device *netdev,
-                             struct ethtool_cmd *ecmd)
+static int e1000_get_link_ksettings(struct net_device *netdev,
+                                   struct ethtool_link_ksettings *cmd)
 {
        struct e1000_adapter *adapter = netdev_priv(netdev);
        struct e1000_hw *hw = &adapter->hw;
-       u32 speed;
+       u32 speed, supported, advertising;
 
        if (hw->phy.media_type == e1000_media_type_copper) {
-               ecmd->supported = (SUPPORTED_10baseT_Half |
-                                  SUPPORTED_10baseT_Full |
-                                  SUPPORTED_100baseT_Half |
-                                  SUPPORTED_100baseT_Full |
-                                  SUPPORTED_1000baseT_Full |
-                                  SUPPORTED_Autoneg |
-                                  SUPPORTED_TP);
+               supported = (SUPPORTED_10baseT_Half |
+                            SUPPORTED_10baseT_Full |
+                            SUPPORTED_100baseT_Half |
+                            SUPPORTED_100baseT_Full |
+                            SUPPORTED_1000baseT_Full |
+                            SUPPORTED_Autoneg |
+                            SUPPORTED_TP);
                if (hw->phy.type == e1000_phy_ife)
-                       ecmd->supported &= ~SUPPORTED_1000baseT_Full;
-               ecmd->advertising = ADVERTISED_TP;
+                       supported &= ~SUPPORTED_1000baseT_Full;
+               advertising = ADVERTISED_TP;
 
                if (hw->mac.autoneg == 1) {
-                       ecmd->advertising |= ADVERTISED_Autoneg;
+                       advertising |= ADVERTISED_Autoneg;
                        /* the e1000 autoneg seems to match ethtool nicely */
-                       ecmd->advertising |= hw->phy.autoneg_advertised;
+                       advertising |= hw->phy.autoneg_advertised;
                }
 
-               ecmd->port = PORT_TP;
-               ecmd->phy_address = hw->phy.addr;
-               ecmd->transceiver = XCVR_INTERNAL;
-
+               cmd->base.port = PORT_TP;
+               cmd->base.phy_address = hw->phy.addr;
        } else {
-               ecmd->supported   = (SUPPORTED_1000baseT_Full |
-                                    SUPPORTED_FIBRE |
-                                    SUPPORTED_Autoneg);
+               supported   = (SUPPORTED_1000baseT_Full |
+                              SUPPORTED_FIBRE |
+                              SUPPORTED_Autoneg);
 
-               ecmd->advertising = (ADVERTISED_1000baseT_Full |
-                                    ADVERTISED_FIBRE |
-                                    ADVERTISED_Autoneg);
+               advertising = (ADVERTISED_1000baseT_Full |
+                              ADVERTISED_FIBRE |
+                              ADVERTISED_Autoneg);
 
-               ecmd->port = PORT_FIBRE;
-               ecmd->transceiver = XCVR_EXTERNAL;
+               cmd->base.port = PORT_FIBRE;
        }
 
        speed = SPEED_UNKNOWN;
-       ecmd->duplex = DUPLEX_UNKNOWN;
+       cmd->base.duplex = DUPLEX_UNKNOWN;
 
        if (netif_running(netdev)) {
                if (netif_carrier_ok(netdev)) {
                        speed = adapter->link_speed;
-                       ecmd->duplex = adapter->link_duplex - 1;
+                       cmd->base.duplex = adapter->link_duplex - 1;
                }
        } else if (!pm_runtime_suspended(netdev->dev.parent)) {
                u32 status = er32(STATUS);
@@ -179,30 +176,36 @@ static int e1000_get_settings(struct net_device *netdev,
                                speed = SPEED_10;
 
                        if (status & E1000_STATUS_FD)
-                               ecmd->duplex = DUPLEX_FULL;
+                               cmd->base.duplex = DUPLEX_FULL;
                        else
-                               ecmd->duplex = DUPLEX_HALF;
+                               cmd->base.duplex = DUPLEX_HALF;
                }
        }
 
-       ethtool_cmd_speed_set(ecmd, speed);
-       ecmd->autoneg = ((hw->phy.media_type == e1000_media_type_fiber) ||
+       cmd->base.speed = speed;
+       cmd->base.autoneg = ((hw->phy.media_type == e1000_media_type_fiber) ||
                         hw->mac.autoneg) ? AUTONEG_ENABLE : AUTONEG_DISABLE;
 
        /* MDI-X => 2; MDI =>1; Invalid =>0 */
        if ((hw->phy.media_type == e1000_media_type_copper) &&
            netif_carrier_ok(netdev))
-               ecmd->eth_tp_mdix = hw->phy.is_mdix ? ETH_TP_MDI_X : ETH_TP_MDI;
+               cmd->base.eth_tp_mdix = hw->phy.is_mdix ?
+                       ETH_TP_MDI_X : ETH_TP_MDI;
        else
-               ecmd->eth_tp_mdix = ETH_TP_MDI_INVALID;
+               cmd->base.eth_tp_mdix = ETH_TP_MDI_INVALID;
 
        if (hw->phy.mdix == AUTO_ALL_MODES)
-               ecmd->eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO;
+               cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO;
        else
-               ecmd->eth_tp_mdix_ctrl = hw->phy.mdix;
+               cmd->base.eth_tp_mdix_ctrl = hw->phy.mdix;
 
        if (hw->phy.media_type != e1000_media_type_copper)
-               ecmd->eth_tp_mdix_ctrl = ETH_TP_MDI_INVALID;
+               cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_INVALID;
+
+       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+                                               supported);
+       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+                                               advertising);
 
        return 0;
 }
@@ -262,12 +265,16 @@ err_inval:
        return -EINVAL;
 }
 
-static int e1000_set_settings(struct net_device *netdev,
-                             struct ethtool_cmd *ecmd)
+static int e1000_set_link_ksettings(struct net_device *netdev,
+                                   const struct ethtool_link_ksettings *cmd)
 {
        struct e1000_adapter *adapter = netdev_priv(netdev);
        struct e1000_hw *hw = &adapter->hw;
        int ret_val = 0;
+       u32 advertising;
+
+       ethtool_convert_link_mode_to_legacy_u32(&advertising,
+                                               cmd->link_modes.advertising);
 
        pm_runtime_get_sync(netdev->dev.parent);
 
@@ -285,14 +292,14 @@ static int e1000_set_settings(struct net_device *netdev,
         * some hardware doesn't allow MDI setting when speed or
         * duplex is forced.
         */
-       if (ecmd->eth_tp_mdix_ctrl) {
+       if (cmd->base.eth_tp_mdix_ctrl) {
                if (hw->phy.media_type != e1000_media_type_copper) {
                        ret_val = -EOPNOTSUPP;
                        goto out;
                }
 
-               if ((ecmd->eth_tp_mdix_ctrl != ETH_TP_MDI_AUTO) &&
-                   (ecmd->autoneg != AUTONEG_ENABLE)) {
+               if ((cmd->base.eth_tp_mdix_ctrl != ETH_TP_MDI_AUTO) &&
+                   (cmd->base.autoneg != AUTONEG_ENABLE)) {
                        e_err("forcing MDI/MDI-X state is not supported when link speed and/or duplex are forced\n");
                        ret_val = -EINVAL;
                        goto out;
@@ -302,35 +309,35 @@ static int e1000_set_settings(struct net_device *netdev,
        while (test_and_set_bit(__E1000_RESETTING, &adapter->state))
                usleep_range(1000, 2000);
 
-       if (ecmd->autoneg == AUTONEG_ENABLE) {
+       if (cmd->base.autoneg == AUTONEG_ENABLE) {
                hw->mac.autoneg = 1;
                if (hw->phy.media_type == e1000_media_type_fiber)
                        hw->phy.autoneg_advertised = ADVERTISED_1000baseT_Full |
                            ADVERTISED_FIBRE | ADVERTISED_Autoneg;
                else
-                       hw->phy.autoneg_advertised = ecmd->advertising |
+                       hw->phy.autoneg_advertised = advertising |
                            ADVERTISED_TP | ADVERTISED_Autoneg;
-               ecmd->advertising = hw->phy.autoneg_advertised;
+               advertising = hw->phy.autoneg_advertised;
                if (adapter->fc_autoneg)
                        hw->fc.requested_mode = e1000_fc_default;
        } else {
-               u32 speed = ethtool_cmd_speed(ecmd);
+               u32 speed = cmd->base.speed;
                /* calling this overrides forced MDI setting */
-               if (e1000_set_spd_dplx(adapter, speed, ecmd->duplex)) {
+               if (e1000_set_spd_dplx(adapter, speed, cmd->base.duplex)) {
                        ret_val = -EINVAL;
                        goto out;
                }
        }
 
        /* MDI-X => 2; MDI => 1; Auto => 3 */
-       if (ecmd->eth_tp_mdix_ctrl) {
+       if (cmd->base.eth_tp_mdix_ctrl) {
                /* fix up the value for auto (3 => 0) as zero is mapped
                 * internally to auto
                 */
-               if (ecmd->eth_tp_mdix_ctrl == ETH_TP_MDI_AUTO)
+               if (cmd->base.eth_tp_mdix_ctrl == ETH_TP_MDI_AUTO)
                        hw->phy.mdix = AUTO_ALL_MODES;
                else
-                       hw->phy.mdix = ecmd->eth_tp_mdix_ctrl;
+                       hw->phy.mdix = cmd->base.eth_tp_mdix_ctrl;
        }
 
        /* reset the link */
@@ -2313,8 +2320,6 @@ static int e1000e_get_ts_info(struct net_device *netdev,
 }
 
 static const struct ethtool_ops e1000_ethtool_ops = {
-       .get_settings           = e1000_get_settings,
-       .set_settings           = e1000_set_settings,
        .get_drvinfo            = e1000_get_drvinfo,
        .get_regs_len           = e1000_get_regs_len,
        .get_regs               = e1000_get_regs,
@@ -2342,6 +2347,8 @@ static const struct ethtool_ops e1000_ethtool_ops = {
        .get_ts_info            = e1000e_get_ts_info,
        .get_eee                = e1000e_get_eee,
        .set_eee                = e1000e_set_eee,
+       .get_link_ksettings     = e1000_get_link_ksettings,
+       .set_link_ksettings     = e1000_set_link_ksettings,
 };
 
 void e1000e_set_ethtool_ops(struct net_device *netdev)
index 01db688cf5398d434e81c2d4016e5764bbd49820..72481670478c3b75b15f8f7f09a21d4ae0731e36 100644 (file)
@@ -1226,7 +1226,9 @@ static int __fm10k_setup_tc(struct net_device *dev, u32 handle, __be16 proto,
        if (tc->type != TC_SETUP_MQPRIO)
                return -EINVAL;
 
-       return fm10k_setup_tc(dev, tc->tc);
+       tc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+
+       return fm10k_setup_tc(dev, tc->mqprio->num_tc);
 }
 
 static void fm10k_assign_l2_accel(struct fm10k_intfc *interface,
index e372a582348015355e5406eae522fbda148558bd..60d9b6aaf63a5470b21e6f2f93852803a0ffdd82 100644 (file)
@@ -19,6 +19,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/interrupt.h>
 #include <linux/aer.h>
 
 #include "fm10k.h"
index 3b3c63e54ed638f03278506b91c0eaa854a6c575..4f454d364d0d35c33e0ab60c6c075d3764413b5b 100644 (file)
@@ -45,4 +45,3 @@ i40e-objs := i40e_main.o \
        i40e_virtchnl_pf.o
 
 i40e-$(CONFIG_I40E_DCB) += i40e_dcb.o i40e_dcb_nl.o
-i40e-$(CONFIG_I40E_FCOE) += i40e_fcoe.o
index 82d8040fa418a3cf905d3f27335d1ec177f00b99..421ea57128d376f2efe4e716bd134b8d8b894138 100644 (file)
@@ -56,9 +56,6 @@
 #include <linux/ptp_clock_kernel.h>
 #include "i40e_type.h"
 #include "i40e_prototype.h"
-#ifdef I40E_FCOE
-#include "i40e_fcoe.h"
-#endif
 #include "i40e_client.h"
 #include "i40e_virtchnl.h"
 #include "i40e_virtchnl_pf.h"
                (((pf)->flags & I40E_FLAG_128_QP_RSS_CAPABLE) ? 128 : 64)
 #define I40E_FDIR_RING                 0
 #define I40E_FDIR_RING_COUNT           32
-#ifdef I40E_FCOE
-#define I40E_DEFAULT_FCOE              8 /* default number of QPs for FCoE */
-#define I40E_MINIMUM_FCOE              1 /* minimum number of QPs for FCoE */
-#endif /* I40E_FCOE */
 #define I40E_MAX_AQ_BUF_SIZE           4096
 #define I40E_AQ_LEN                    256
 #define I40E_AQ_WORK_LIMIT             66 /* max number of VFs + a little */
 #define I40E_QUEUE_WAIT_RETRY_LIMIT    10
 #define I40E_INT_NAME_STR_LEN          (IFNAMSIZ + 16)
 
-/* Ethtool Private Flags */
-#define I40E_PRIV_FLAGS_MFP_FLAG               BIT(0)
-#define I40E_PRIV_FLAGS_LINKPOLL_FLAG          BIT(1)
-#define I40E_PRIV_FLAGS_FD_ATR                 BIT(2)
-#define I40E_PRIV_FLAGS_VEB_STATS              BIT(3)
-#define I40E_PRIV_FLAGS_HW_ATR_EVICT           BIT(4)
-#define I40E_PRIV_FLAGS_TRUE_PROMISC_SUPPORT   BIT(5)
-
 #define I40E_NVM_VERSION_LO_SHIFT      0
 #define I40E_NVM_VERSION_LO_MASK       (0xff << I40E_NVM_VERSION_LO_SHIFT)
 #define I40E_NVM_VERSION_HI_SHIFT      12
@@ -202,17 +187,32 @@ enum i40e_fd_stat_idx {
 #define I40E_FD_ATR_TUNNEL_STAT_IDX(pf_id) \
                        (I40E_FD_STAT_PF_IDX(pf_id) + I40E_FD_STAT_ATR_TUNNEL)
 
+/* The following structure contains the data parsed from the user-defined
+ * field of the ethtool_rx_flow_spec structure.
+ */
+struct i40e_rx_flow_userdef {
+       bool flex_filter;
+       u16 flex_word;
+       u16 flex_offset;
+};
+
 struct i40e_fdir_filter {
        struct hlist_node fdir_node;
        /* filter ipnut set */
        u8 flow_type;
        u8 ip4_proto;
        /* TX packet view of src and dst */
-       __be32 dst_ip[4];
-       __be32 src_ip[4];
+       __be32 dst_ip;
+       __be32 src_ip;
        __be16 src_port;
        __be16 dst_port;
        __be32 sctp_v_tag;
+
+       /* Flexible data to match within the packet payload */
+       __be16 flex_word;
+       u16 flex_offset;
+       bool flex_filter;
+
        /* filter control */
        u16 q_index;
        u8  flex_off;
@@ -244,10 +244,80 @@ struct i40e_tc_configuration {
 };
 
 struct i40e_udp_port_config {
-       __be16 index;
+       /* AdminQ command interface expects port number in Host byte order */
+       u16 index;
        u8 type;
 };
 
+/* macros related to FLX_PIT */
+#define I40E_FLEX_SET_FSIZE(fsize) (((fsize) << \
+                                   I40E_PRTQF_FLX_PIT_FSIZE_SHIFT) & \
+                                   I40E_PRTQF_FLX_PIT_FSIZE_MASK)
+#define I40E_FLEX_SET_DST_WORD(dst) (((dst) << \
+                                    I40E_PRTQF_FLX_PIT_DEST_OFF_SHIFT) & \
+                                    I40E_PRTQF_FLX_PIT_DEST_OFF_MASK)
+#define I40E_FLEX_SET_SRC_WORD(src) (((src) << \
+                                    I40E_PRTQF_FLX_PIT_SOURCE_OFF_SHIFT) & \
+                                    I40E_PRTQF_FLX_PIT_SOURCE_OFF_MASK)
+#define I40E_FLEX_PREP_VAL(dst, fsize, src) (I40E_FLEX_SET_DST_WORD(dst) | \
+                                            I40E_FLEX_SET_FSIZE(fsize) | \
+                                            I40E_FLEX_SET_SRC_WORD(src))
+
+#define I40E_FLEX_PIT_GET_SRC(flex) (((flex) & \
+                                    I40E_PRTQF_FLX_PIT_SOURCE_OFF_MASK) >> \
+                                    I40E_PRTQF_FLX_PIT_SOURCE_OFF_SHIFT)
+#define I40E_FLEX_PIT_GET_DST(flex) (((flex) & \
+                                    I40E_PRTQF_FLX_PIT_DEST_OFF_MASK) >> \
+                                    I40E_PRTQF_FLX_PIT_DEST_OFF_SHIFT)
+#define I40E_FLEX_PIT_GET_FSIZE(flex) (((flex) & \
+                                      I40E_PRTQF_FLX_PIT_FSIZE_MASK) >> \
+                                      I40E_PRTQF_FLX_PIT_FSIZE_SHIFT)
+
+#define I40E_MAX_FLEX_SRC_OFFSET 0x1F
+
+/* macros related to GLQF_ORT */
+#define I40E_ORT_SET_IDX(idx)          (((idx) << \
+                                         I40E_GLQF_ORT_PIT_INDX_SHIFT) & \
+                                        I40E_GLQF_ORT_PIT_INDX_MASK)
+
+#define I40E_ORT_SET_COUNT(count)      (((count) << \
+                                         I40E_GLQF_ORT_FIELD_CNT_SHIFT) & \
+                                        I40E_GLQF_ORT_FIELD_CNT_MASK)
+
+#define I40E_ORT_SET_PAYLOAD(payload)  (((payload) << \
+                                         I40E_GLQF_ORT_FLX_PAYLOAD_SHIFT) & \
+                                        I40E_GLQF_ORT_FLX_PAYLOAD_MASK)
+
+#define I40E_ORT_PREP_VAL(idx, count, payload) (I40E_ORT_SET_IDX(idx) | \
+                                               I40E_ORT_SET_COUNT(count) | \
+                                               I40E_ORT_SET_PAYLOAD(payload))
+
+#define I40E_L3_GLQF_ORT_IDX           34
+#define I40E_L4_GLQF_ORT_IDX           35
+
+/* Flex PIT register index */
+#define I40E_FLEX_PIT_IDX_START_L2     0
+#define I40E_FLEX_PIT_IDX_START_L3     3
+#define I40E_FLEX_PIT_IDX_START_L4     6
+
+#define I40E_FLEX_PIT_TABLE_SIZE       3
+
+#define I40E_FLEX_DEST_UNUSED          63
+
+#define I40E_FLEX_INDEX_ENTRIES                8
+
+/* Flex MASK to disable all flexible entries */
+#define I40E_FLEX_INPUT_MASK   (I40E_FLEX_50_MASK | I40E_FLEX_51_MASK | \
+                                I40E_FLEX_52_MASK | I40E_FLEX_53_MASK | \
+                                I40E_FLEX_54_MASK | I40E_FLEX_55_MASK | \
+                                I40E_FLEX_56_MASK | I40E_FLEX_57_MASK)
+
+struct i40e_flex_pit {
+       struct list_head list;
+       u16 src_offset;
+       u8 pit_index;
+};
+
 /* struct that defines the Ethernet device */
 struct i40e_pf {
        struct pci_dev *pdev;
@@ -262,10 +332,6 @@ struct i40e_pf {
        u16 num_vmdq_msix;         /* num queue vectors per vmdq pool */
        u16 num_req_vfs;           /* num VFs requested for this VF */
        u16 num_vf_qps;            /* num queue pairs per VF */
-#ifdef I40E_FCOE
-       u16 num_fcoe_qps;          /* num fcoe queues this PF has set up */
-       u16 num_fcoe_msix;         /* num queue vectors per fcoe pool */
-#endif /* I40E_FCOE */
        u16 num_lan_qps;           /* num lan queues this PF has set up */
        u16 num_lan_msix;          /* num queue vectors for the base PF vsi */
        u16 num_fdsb_msix;         /* num queue vectors for sideband Fdir */
@@ -285,7 +351,23 @@ struct i40e_pf {
        u32 fd_flush_cnt;
        u32 fd_add_err;
        u32 fd_atr_cnt;
-       u32 fd_tcp_rule;
+
+       /* Book-keeping of side-band filter count per flow-type.
+        * This is used to detect and handle input set changes for
+        * respective flow-type.
+        */
+       u16 fd_tcp4_filter_cnt;
+       u16 fd_udp4_filter_cnt;
+       u16 fd_sctp4_filter_cnt;
+       u16 fd_ip4_filter_cnt;
+
+       /* Flexible filter table values that need to be programmed into
+        * hardware, which expects L3 and L4 to be programmed separately. We
+        * need to ensure that the values are in ascended order and don't have
+        * duplicates, so we track each L3 and L4 values in separate lists.
+        */
+       struct list_head l3_flex_pit_list;
+       struct list_head l4_flex_pit_list;
 
        struct i40e_udp_port_config udp_ports[I40E_MAX_PF_UDP_OFFLOAD_PORTS];
        u16 pending_udp_bitmap;
@@ -307,12 +389,8 @@ struct i40e_pf {
 #define I40E_FLAG_MSIX_ENABLED                 BIT_ULL(3)
 #define I40E_FLAG_RSS_ENABLED                  BIT_ULL(6)
 #define I40E_FLAG_VMDQ_ENABLED                 BIT_ULL(7)
-#define I40E_FLAG_FDIR_REQUIRES_REINIT         BIT_ULL(8)
 #define I40E_FLAG_NEED_LINK_UPDATE             BIT_ULL(9)
 #define I40E_FLAG_IWARP_ENABLED                        BIT_ULL(10)
-#ifdef I40E_FCOE
-#define I40E_FLAG_FCOE_ENABLED                 BIT_ULL(11)
-#endif /* I40E_FCOE */
 #define I40E_FLAG_CLEAN_ADMINQ                 BIT_ULL(14)
 #define I40E_FLAG_FILTER_SYNC                  BIT_ULL(15)
 #define I40E_FLAG_SERVICE_CLIENT_REQUESTED     BIT_ULL(16)
@@ -348,16 +426,20 @@ struct i40e_pf {
 #define I40E_FLAG_TRUE_PROMISC_SUPPORT         BIT_ULL(51)
 #define I40E_FLAG_HAVE_CRT_RETIMER             BIT_ULL(52)
 #define I40E_FLAG_PTP_L4_CAPABLE               BIT_ULL(53)
-#define I40E_FLAG_WOL_MC_MAGIC_PKT_WAKE                BIT_ULL(54)
+#define I40E_FLAG_CLIENT_RESET                 BIT_ULL(54)
 #define I40E_FLAG_TEMP_LINK_POLLING            BIT_ULL(55)
+#define I40E_FLAG_CLIENT_L2_CHANGE             BIT_ULL(56)
+#define I40E_FLAG_WOL_MC_MAGIC_PKT_WAKE                BIT_ULL(57)
+#define I40E_FLAG_LEGACY_RX                    BIT_ULL(58)
+
+       /* Tracks features that are disabled due to hw limitations.
+        * If a bit is set here, it means that the corresponding
+        * bit in the 'flags' field is cleared i.e that feature
+        * is disabled
+        */
+       u64 hw_disabled_flags;
 
-       /* tracks features that get auto disabled by errors */
-       u64 auto_disable_flags;
-
-#ifdef I40E_FCOE
-       struct i40e_fcoe fcoe;
-
-#endif /* I40E_FCOE */
+       struct i40e_client_instance *cinst;
        bool stat_offsets_loaded;
        struct i40e_hw_port_stats stats;
        struct i40e_hw_port_stats stats_offsets;
@@ -412,8 +494,6 @@ struct i40e_pf {
         */
        u16 dcbx_cap;
 
-       u32 fcoe_hmc_filt_num;
-       u32 fcoe_hmc_cntx_num;
        struct i40e_filter_control_settings filter_settings;
 
        struct ptp_clock *ptp_clock;
@@ -533,11 +613,6 @@ struct i40e_vsi {
        struct rtnl_link_stats64 net_stats_offsets;
        struct i40e_eth_stats eth_stats;
        struct i40e_eth_stats eth_stats_offsets;
-#ifdef I40E_FCOE
-       struct i40e_fcoe_stats fcoe_stats;
-       struct i40e_fcoe_stats fcoe_stats_offsets;
-       bool fcoe_stat_offsets_loaded;
-#endif
        u32 tx_restart;
        u32 tx_busy;
        u64 tx_linearize;
@@ -719,6 +794,43 @@ static inline int i40e_get_fd_cnt_all(struct i40e_pf *pf)
        return pf->hw.fdir_shared_filter_count + pf->fdir_pf_filter_count;
 }
 
+/**
+ * i40e_read_fd_input_set - reads value of flow director input set register
+ * @pf: pointer to the PF struct
+ * @addr: register addr
+ *
+ * This function reads value of flow director input set register
+ * specified by 'addr' (which is specific to flow-type)
+ **/
+static inline u64 i40e_read_fd_input_set(struct i40e_pf *pf, u16 addr)
+{
+       u64 val;
+
+       val = i40e_read_rx_ctl(&pf->hw, I40E_PRTQF_FD_INSET(addr, 1));
+       val <<= 32;
+       val += i40e_read_rx_ctl(&pf->hw, I40E_PRTQF_FD_INSET(addr, 0));
+
+       return val;
+}
+
+/**
+ * i40e_write_fd_input_set - writes value into flow director input set register
+ * @pf: pointer to the PF struct
+ * @addr: register addr
+ * @val: value to be written
+ *
+ * This function writes specified value to the register specified by 'addr'.
+ * This register is input set register based on flow-type.
+ **/
+static inline void i40e_write_fd_input_set(struct i40e_pf *pf,
+                                          u16 addr, u64 val)
+{
+       i40e_write_rx_ctl(&pf->hw, I40E_PRTQF_FD_INSET(addr, 1),
+                         (u32)(val >> 32));
+       i40e_write_rx_ctl(&pf->hw, I40E_PRTQF_FD_INSET(addr, 0),
+                         (u32)(val & 0xFFFFFFFFULL));
+}
+
 /* needed by i40e_ethtool.c */
 int i40e_up(struct i40e_vsi *vsi);
 void i40e_down(struct i40e_vsi *vsi);
@@ -773,11 +885,6 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi);
 struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type,
                                u16 uplink, u32 param1);
 int i40e_vsi_release(struct i40e_vsi *vsi);
-#ifdef I40E_FCOE
-void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi,
-                             struct i40e_vsi_context *ctxt,
-                             u8 enabled_tc, bool is_add);
-#endif
 void i40e_service_event_schedule(struct i40e_pf *pf);
 void i40e_notify_client_of_vf_msg(struct i40e_vsi *vsi, u32 vf_id,
                                  u8 *msg, u16 len);
@@ -813,8 +920,7 @@ void i40e_notify_client_of_l2_param_changes(struct i40e_vsi *vsi);
 void i40e_notify_client_of_netdev_close(struct i40e_vsi *vsi, bool reset);
 void i40e_notify_client_of_vf_enable(struct i40e_pf *pf, u32 num_vfs);
 void i40e_notify_client_of_vf_reset(struct i40e_pf *pf, u32 vf_id);
-int i40e_vf_client_capable(struct i40e_pf *pf, u32 vf_id,
-                          enum i40e_client_type type);
+int i40e_vf_client_capable(struct i40e_pf *pf, u32 vf_id);
 /**
  * i40e_irq_dynamic_enable - Enable default interrupt generation settings
  * @vsi: pointer to a vsi
@@ -838,20 +944,7 @@ static inline void i40e_irq_dynamic_enable(struct i40e_vsi *vsi, int vector)
 
 void i40e_irq_dynamic_disable_icr0(struct i40e_pf *pf);
 void i40e_irq_dynamic_enable_icr0(struct i40e_pf *pf, bool clearpba);
-#ifdef I40E_FCOE
-void i40e_get_netdev_stats_struct(struct net_device *netdev,
-                                 struct rtnl_link_stats64 *storage);
-int i40e_set_mac(struct net_device *netdev, void *p);
-void i40e_set_rx_mode(struct net_device *netdev);
-#endif
 int i40e_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd);
-#ifdef I40E_FCOE
-void i40e_tx_timeout(struct net_device *netdev);
-int i40e_vlan_rx_add_vid(struct net_device *netdev,
-                        __always_unused __be16 proto, u16 vid);
-int i40e_vlan_rx_kill_vid(struct net_device *netdev,
-                         __always_unused __be16 proto, u16 vid);
-#endif
 int i40e_open(struct net_device *netdev);
 int i40e_close(struct net_device *netdev);
 int i40e_vsi_open(struct i40e_vsi *vsi);
@@ -865,25 +958,6 @@ struct i40e_mac_filter *i40e_add_mac_filter(struct i40e_vsi *vsi,
 int i40e_del_mac_filter(struct i40e_vsi *vsi, const u8 *macaddr);
 bool i40e_is_vsi_in_vlan(struct i40e_vsi *vsi);
 struct i40e_mac_filter *i40e_find_mac(struct i40e_vsi *vsi, const u8 *macaddr);
-#ifdef I40E_FCOE
-int __i40e_setup_tc(struct net_device *netdev, u32 handle, __be16 proto,
-                   struct tc_to_netdev *tc);
-void i40e_netpoll(struct net_device *netdev);
-int i40e_fcoe_enable(struct net_device *netdev);
-int i40e_fcoe_disable(struct net_device *netdev);
-int i40e_fcoe_vsi_init(struct i40e_vsi *vsi, struct i40e_vsi_context *ctxt);
-u8 i40e_get_fcoe_tc_map(struct i40e_pf *pf);
-void i40e_fcoe_config_netdev(struct net_device *netdev, struct i40e_vsi *vsi);
-void i40e_fcoe_vsi_setup(struct i40e_pf *pf);
-void i40e_init_pf_fcoe(struct i40e_pf *pf);
-int i40e_fcoe_setup_ddp_resources(struct i40e_vsi *vsi);
-void i40e_fcoe_free_ddp_resources(struct i40e_vsi *vsi);
-int i40e_fcoe_handle_offload(struct i40e_ring *rx_ring,
-                            union i40e_rx_desc *rx_desc,
-                            struct sk_buff *skb);
-void i40e_fcoe_handle_status(struct i40e_ring *rx_ring,
-                            union i40e_rx_desc *rx_desc, u8 prog_id);
-#endif /* I40E_FCOE */
 void i40e_vlan_stripping_enable(struct i40e_vsi *vsi);
 #ifdef CONFIG_I40E_DCB
 void i40e_dcbnl_flush_apps(struct i40e_pf *pf,
index 451f48b7540aa0360615599b6681e2d31f2b6554..251074c677c497ceac12632976baf15af699a525 100644 (file)
@@ -132,6 +132,10 @@ enum i40e_admin_queue_opc {
        i40e_aqc_opc_list_func_capabilities     = 0x000A,
        i40e_aqc_opc_list_dev_capabilities      = 0x000B,
 
+       /* Proxy commands */
+       i40e_aqc_opc_set_proxy_config           = 0x0104,
+       i40e_aqc_opc_set_ns_proxy_table_entry   = 0x0105,
+
        /* LAA */
        i40e_aqc_opc_mac_address_read   = 0x0107,
        i40e_aqc_opc_mac_address_write  = 0x0108,
@@ -139,6 +143,10 @@ enum i40e_admin_queue_opc {
        /* PXE */
        i40e_aqc_opc_clear_pxe_mode     = 0x0110,
 
+       /* WoL commands */
+       i40e_aqc_opc_set_wol_filter     = 0x0120,
+       i40e_aqc_opc_get_wake_reason    = 0x0121,
+
        /* internal switch commands */
        i40e_aqc_opc_get_switch_config          = 0x0200,
        i40e_aqc_opc_add_statistics             = 0x0201,
@@ -177,6 +185,7 @@ enum i40e_admin_queue_opc {
        i40e_aqc_opc_remove_control_packet_filter       = 0x025B,
        i40e_aqc_opc_add_cloud_filters          = 0x025C,
        i40e_aqc_opc_remove_cloud_filters       = 0x025D,
+       i40e_aqc_opc_clear_wol_switch_filters   = 0x025E,
 
        i40e_aqc_opc_add_mirror_rule    = 0x0260,
        i40e_aqc_opc_delete_mirror_rule = 0x0261,
@@ -563,6 +572,56 @@ struct i40e_aqc_clear_pxe {
 
 I40E_CHECK_CMD_LENGTH(i40e_aqc_clear_pxe);
 
+/* Set WoL Filter (0x0120) */
+
+struct i40e_aqc_set_wol_filter {
+       __le16 filter_index;
+#define I40E_AQC_MAX_NUM_WOL_FILTERS   8
+#define I40E_AQC_SET_WOL_FILTER_TYPE_MAGIC_SHIFT       15
+#define I40E_AQC_SET_WOL_FILTER_TYPE_MAGIC_MASK        (0x1 << \
+               I40E_AQC_SET_WOL_FILTER_TYPE_MAGIC_SHIFT)
+
+#define I40E_AQC_SET_WOL_FILTER_INDEX_SHIFT            0
+#define I40E_AQC_SET_WOL_FILTER_INDEX_MASK     (0x7 << \
+               I40E_AQC_SET_WOL_FILTER_INDEX_SHIFT)
+       __le16 cmd_flags;
+#define I40E_AQC_SET_WOL_FILTER                                0x8000
+#define I40E_AQC_SET_WOL_FILTER_NO_TCO_WOL             0x4000
+#define I40E_AQC_SET_WOL_FILTER_ACTION_CLEAR           0
+#define I40E_AQC_SET_WOL_FILTER_ACTION_SET             1
+       __le16 valid_flags;
+#define I40E_AQC_SET_WOL_FILTER_ACTION_VALID           0x8000
+#define I40E_AQC_SET_WOL_FILTER_NO_TCO_ACTION_VALID    0x4000
+       u8 reserved[2];
+       __le32  address_high;
+       __le32  address_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_set_wol_filter);
+
+struct i40e_aqc_set_wol_filter_data {
+       u8 filter[128];
+       u8 mask[16];
+};
+
+I40E_CHECK_STRUCT_LEN(0x90, i40e_aqc_set_wol_filter_data);
+
+/* Get Wake Reason (0x0121) */
+
+struct i40e_aqc_get_wake_reason_completion {
+       u8 reserved_1[2];
+       __le16 wake_reason;
+#define I40E_AQC_GET_WAKE_UP_REASON_WOL_REASON_MATCHED_INDEX_SHIFT     0
+#define I40E_AQC_GET_WAKE_UP_REASON_WOL_REASON_MATCHED_INDEX_MASK (0xFF << \
+               I40E_AQC_GET_WAKE_UP_REASON_WOL_REASON_MATCHED_INDEX_SHIFT)
+#define I40E_AQC_GET_WAKE_UP_REASON_WOL_REASON_RESERVED_SHIFT  8
+#define I40E_AQC_GET_WAKE_UP_REASON_WOL_REASON_RESERVED_MASK   (0xFF << \
+               I40E_AQC_GET_WAKE_UP_REASON_WOL_REASON_RESERVED_SHIFT)
+       u8 reserved_2[12];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_get_wake_reason_completion);
+
 /* Switch configuration commands (0x02xx) */
 
 /* Used by many indirect commands that only pass an seid and a buffer in the
@@ -645,6 +704,8 @@ struct i40e_aqc_set_port_parameters {
 #define I40E_AQ_SET_P_PARAMS_PAD_SHORT_PACKETS 2 /* must set! */
 #define I40E_AQ_SET_P_PARAMS_DOUBLE_VLAN_ENA   4
        __le16  bad_frame_vsi;
+#define I40E_AQ_SET_P_PARAMS_BFRAME_SEID_SHIFT 0x0
+#define I40E_AQ_SET_P_PARAMS_BFRAME_SEID_MASK  0x3FF
        __le16  default_seid;        /* reserved for command */
        u8      reserved[10];
 };
@@ -696,6 +757,7 @@ I40E_CHECK_STRUCT_LEN(0x10, i40e_aqc_switch_resource_alloc_element_resp);
 /* Set Switch Configuration (direct 0x0205) */
 struct i40e_aqc_set_switch_config {
        __le16  flags;
+/* flags used for both fields below */
 #define I40E_AQ_SET_SWITCH_CFG_PROMISC         0x0001
 #define I40E_AQ_SET_SWITCH_CFG_L2_FILTER       0x0002
        __le16  valid_flags;
@@ -1844,11 +1906,12 @@ struct i40e_aqc_get_link_status {
 #define I40E_AQ_CONFIG_FEC_RS_ENA      0x02
 #define I40E_AQ_CONFIG_CRC_ENA         0x04
 #define I40E_AQ_CONFIG_PACING_MASK     0x78
-       u8      external_power_ability;
+       u8      power_desc;
 #define I40E_AQ_LINK_POWER_CLASS_1     0x00
 #define I40E_AQ_LINK_POWER_CLASS_2     0x01
 #define I40E_AQ_LINK_POWER_CLASS_3     0x02
 #define I40E_AQ_LINK_POWER_CLASS_4     0x03
+#define I40E_AQ_PWR_CLASS_MASK         0x03
        u8      reserved[4];
 };
 
index d570219efd9f33b8934fdfe8ad3e256fb78a2e97..191028b1489b1fb4bceb49306ba82e2e3046ad87 100644 (file)
 #include "i40e_client.h"
 
 static const char i40e_client_interface_version_str[] = I40E_CLIENT_VERSION_STR;
-
+static struct i40e_client *registered_client;
 static LIST_HEAD(i40e_devices);
 static DEFINE_MUTEX(i40e_device_mutex);
 
-static LIST_HEAD(i40e_clients);
-static DEFINE_MUTEX(i40e_client_mutex);
-
-static LIST_HEAD(i40e_client_instances);
-static DEFINE_MUTEX(i40e_client_instance_mutex);
-
 static int i40e_client_virtchnl_send(struct i40e_info *ldev,
                                     struct i40e_client *client,
                                     u32 vf_id, u8 *msg, u16 len);
@@ -66,28 +60,6 @@ static struct i40e_ops i40e_lan_ops = {
        .update_vsi_ctxt = i40e_client_update_vsi_ctxt,
 };
 
-/**
- * i40e_client_type_to_vsi_type - convert client type to vsi type
- * @client_type: the i40e_client type
- *
- * returns the related vsi type value
- **/
-static
-enum i40e_vsi_type i40e_client_type_to_vsi_type(enum i40e_client_type type)
-{
-       switch (type) {
-       case I40E_CLIENT_IWARP:
-               return I40E_VSI_IWARP;
-
-       case I40E_CLIENT_VMDQ2:
-               return I40E_VSI_VMDQ2;
-
-       default:
-               pr_err("i40e: Client type unknown\n");
-               return I40E_VSI_TYPE_UNKNOWN;
-       }
-}
-
 /**
  * i40e_client_get_params - Get the params that can change at runtime
  * @vsi: the VSI with the message
@@ -134,31 +106,22 @@ int i40e_client_get_params(struct i40e_vsi *vsi, struct i40e_params *params)
 void
 i40e_notify_client_of_vf_msg(struct i40e_vsi *vsi, u32 vf_id, u8 *msg, u16 len)
 {
-       struct i40e_client_instance *cdev;
+       struct i40e_pf *pf = vsi->back;
+       struct i40e_client_instance *cdev = pf->cinst;
 
-       if (!vsi)
+       if (!cdev || !cdev->client)
+               return;
+       if (!cdev->client->ops || !cdev->client->ops->virtchnl_receive) {
+               dev_dbg(&pf->pdev->dev,
+                       "Cannot locate client instance virtual channel receive routine\n");
+               return;
+       }
+       if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state)) {
+               dev_dbg(&pf->pdev->dev, "Client is not open, abort virtchnl_receive\n");
                return;
-       mutex_lock(&i40e_client_instance_mutex);
-       list_for_each_entry(cdev, &i40e_client_instances, list) {
-               if (cdev->lan_info.pf == vsi->back) {
-                       if (!cdev->client ||
-                           !cdev->client->ops ||
-                           !cdev->client->ops->virtchnl_receive) {
-                               dev_dbg(&vsi->back->pdev->dev,
-                                       "Cannot locate client instance virtual channel receive routine\n");
-                               continue;
-                       }
-                       if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED,
-                                     &cdev->state)) {
-                               dev_dbg(&vsi->back->pdev->dev, "Client is not open, abort virtchnl_receive\n");
-                               continue;
-                       }
-                       cdev->client->ops->virtchnl_receive(&cdev->lan_info,
-                                                           cdev->client,
-                                                           vf_id, msg, len);
-               }
        }
-       mutex_unlock(&i40e_client_instance_mutex);
+       cdev->client->ops->virtchnl_receive(&cdev->lan_info, cdev->client,
+                                           vf_id, msg, len);
 }
 
 /**
@@ -169,39 +132,30 @@ i40e_notify_client_of_vf_msg(struct i40e_vsi *vsi, u32 vf_id, u8 *msg, u16 len)
  **/
 void i40e_notify_client_of_l2_param_changes(struct i40e_vsi *vsi)
 {
-       struct i40e_client_instance *cdev;
+       struct i40e_pf *pf = vsi->back;
+       struct i40e_client_instance *cdev = pf->cinst;
        struct i40e_params params;
 
-       if (!vsi)
+       if (!cdev || !cdev->client)
                return;
-       mutex_lock(&i40e_client_instance_mutex);
-       list_for_each_entry(cdev, &i40e_client_instances, list) {
-               if (cdev->lan_info.pf == vsi->back) {
-                       if (!cdev->client ||
-                           !cdev->client->ops ||
-                           !cdev->client->ops->l2_param_change) {
-                               dev_dbg(&vsi->back->pdev->dev,
-                                       "Cannot locate client instance l2_param_change routine\n");
-                               continue;
-                       }
+       if (!cdev->client->ops || !cdev->client->ops->l2_param_change) {
+               dev_dbg(&vsi->back->pdev->dev,
+                       "Cannot locate client instance l2_param_change routine\n");
+               return;
+       }
+       if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state)) {
+               dev_dbg(&vsi->back->pdev->dev, "Client is not open, abort l2 param change\n");
+               return;
+       }
        memset(&params, 0, sizeof(params));
        i40e_client_get_params(vsi, &params);
-                       if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED,
-                                     &cdev->state)) {
-                               dev_dbg(&vsi->back->pdev->dev, "Client is not open, abort l2 param change\n");
-                               continue;
-                       }
-                       cdev->lan_info.params = params;
-                       cdev->client->ops->l2_param_change(&cdev->lan_info,
-                                                          cdev->client,
-                                                          &params);
-               }
-       }
-       mutex_unlock(&i40e_client_instance_mutex);
+       memcpy(&cdev->lan_info.params, &params, sizeof(struct i40e_params));
+       cdev->client->ops->l2_param_change(&cdev->lan_info, cdev->client,
+                                          &params);
 }
 
 /**
- * i40e_client_release_qvlist
+ * i40e_client_release_qvlist - release MSI-X vector mapping for client
  * @ldev: pointer to L2 context.
  *
  **/
@@ -237,26 +191,19 @@ static void i40e_client_release_qvlist(struct i40e_info *ldev)
  **/
 void i40e_notify_client_of_netdev_close(struct i40e_vsi *vsi, bool reset)
 {
-       struct i40e_client_instance *cdev;
+       struct i40e_pf *pf = vsi->back;
+       struct i40e_client_instance *cdev = pf->cinst;
 
-       if (!vsi)
+       if (!cdev || !cdev->client)
+               return;
+       if (!cdev->client->ops || !cdev->client->ops->close) {
+               dev_dbg(&vsi->back->pdev->dev,
+                       "Cannot locate client instance close routine\n");
                return;
-       mutex_lock(&i40e_client_instance_mutex);
-       list_for_each_entry(cdev, &i40e_client_instances, list) {
-               if (cdev->lan_info.netdev == vsi->netdev) {
-                       if (!cdev->client ||
-                           !cdev->client->ops || !cdev->client->ops->close) {
-                               dev_dbg(&vsi->back->pdev->dev,
-                                       "Cannot locate client instance close routine\n");
-                               continue;
-                       }
-                       cdev->client->ops->close(&cdev->lan_info, cdev->client,
-                                                reset);
-                       clear_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state);
-                       i40e_client_release_qvlist(&cdev->lan_info);
-               }
        }
-       mutex_unlock(&i40e_client_instance_mutex);
+       cdev->client->ops->close(&cdev->lan_info, cdev->client, reset);
+       clear_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state);
+       i40e_client_release_qvlist(&cdev->lan_info);
 }
 
 /**
@@ -268,30 +215,20 @@ void i40e_notify_client_of_netdev_close(struct i40e_vsi *vsi, bool reset)
  **/
 void i40e_notify_client_of_vf_reset(struct i40e_pf *pf, u32 vf_id)
 {
-       struct i40e_client_instance *cdev;
+       struct i40e_client_instance *cdev = pf->cinst;
 
-       if (!pf)
+       if (!cdev || !cdev->client)
+               return;
+       if (!cdev->client->ops || !cdev->client->ops->vf_reset) {
+               dev_dbg(&pf->pdev->dev,
+                       "Cannot locate client instance VF reset routine\n");
                return;
-       mutex_lock(&i40e_client_instance_mutex);
-       list_for_each_entry(cdev, &i40e_client_instances, list) {
-               if (cdev->lan_info.pf == pf) {
-                       if (!cdev->client ||
-                           !cdev->client->ops ||
-                           !cdev->client->ops->vf_reset) {
-                               dev_dbg(&pf->pdev->dev,
-                                       "Cannot locate client instance VF reset routine\n");
-                               continue;
-                       }
-                       if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED,
-                                     &cdev->state)) {
-                               dev_dbg(&pf->pdev->dev, "Client is not open, abort vf-reset\n");
-                               continue;
-                       }
-                       cdev->client->ops->vf_reset(&cdev->lan_info,
-                                                   cdev->client, vf_id);
-               }
        }
-       mutex_unlock(&i40e_client_instance_mutex);
+       if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED,  &cdev->state)) {
+               dev_dbg(&pf->pdev->dev, "Client is not open, abort vf-reset\n");
+               return;
+       }
+       cdev->client->ops->vf_reset(&cdev->lan_info, cdev->client, vf_id);
 }
 
 /**
@@ -303,30 +240,21 @@ void i40e_notify_client_of_vf_reset(struct i40e_pf *pf, u32 vf_id)
  **/
 void i40e_notify_client_of_vf_enable(struct i40e_pf *pf, u32 num_vfs)
 {
-       struct i40e_client_instance *cdev;
+       struct i40e_client_instance *cdev = pf->cinst;
 
-       if (!pf)
+       if (!cdev || !cdev->client)
+               return;
+       if (!cdev->client->ops || !cdev->client->ops->vf_enable) {
+               dev_dbg(&pf->pdev->dev,
+                       "Cannot locate client instance VF enable routine\n");
+               return;
+       }
+       if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED,
+                     &cdev->state)) {
+               dev_dbg(&pf->pdev->dev, "Client is not open, abort vf-enable\n");
                return;
-       mutex_lock(&i40e_client_instance_mutex);
-       list_for_each_entry(cdev, &i40e_client_instances, list) {
-               if (cdev->lan_info.pf == pf) {
-                       if (!cdev->client ||
-                           !cdev->client->ops ||
-                           !cdev->client->ops->vf_enable) {
-                               dev_dbg(&pf->pdev->dev,
-                                       "Cannot locate client instance VF enable routine\n");
-                               continue;
-                       }
-                       if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED,
-                                     &cdev->state)) {
-                               dev_dbg(&pf->pdev->dev, "Client is not open, abort vf-enable\n");
-                               continue;
-                       }
-                       cdev->client->ops->vf_enable(&cdev->lan_info,
-                                                    cdev->client, num_vfs);
-               }
        }
-       mutex_unlock(&i40e_client_instance_mutex);
+       cdev->client->ops->vf_enable(&cdev->lan_info, cdev->client, num_vfs);
 }
 
 /**
@@ -337,37 +265,25 @@ void i40e_notify_client_of_vf_enable(struct i40e_pf *pf, u32 num_vfs)
  * If there is a client of the specified type attached to this PF, call
  * its vf_capable routine
  **/
-int i40e_vf_client_capable(struct i40e_pf *pf, u32 vf_id,
-                          enum i40e_client_type type)
+int i40e_vf_client_capable(struct i40e_pf *pf, u32 vf_id)
 {
-       struct i40e_client_instance *cdev;
+       struct i40e_client_instance *cdev = pf->cinst;
        int capable = false;
 
-       if (!pf)
-               return false;
-       mutex_lock(&i40e_client_instance_mutex);
-       list_for_each_entry(cdev, &i40e_client_instances, list) {
-               if (cdev->lan_info.pf == pf) {
-                       if (!cdev->client ||
-                           !cdev->client->ops ||
-                           !cdev->client->ops->vf_capable ||
-                           !(cdev->client->type == type)) {
-                               dev_dbg(&pf->pdev->dev,
-                                       "Cannot locate client instance VF capability routine\n");
-                               continue;
-                       }
-                       if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED,
-                                     &cdev->state)) {
-                               dev_dbg(&pf->pdev->dev, "Client is not open, abort vf-capable\n");
-                               continue;
-                       }
-                       capable = cdev->client->ops->vf_capable(&cdev->lan_info,
-                                                               cdev->client,
-                                                               vf_id);
-                       break;
-               }
+       if (!cdev || !cdev->client)
+               goto out;
+       if (!cdev->client->ops || !cdev->client->ops->vf_capable) {
+               dev_info(&pf->pdev->dev,
+                        "Cannot locate client instance VF capability routine\n");
+               goto out;
        }
-       mutex_unlock(&i40e_client_instance_mutex);
+       if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state))
+               goto out;
+
+       capable = cdev->client->ops->vf_capable(&cdev->lan_info,
+                                               cdev->client,
+                                               vf_id);
+out:
        return capable;
 }
 
@@ -377,27 +293,19 @@ int i40e_vf_client_capable(struct i40e_pf *pf, u32 vf_id,
  * @client: pointer to a client struct in the client list.
  * @existing: if there was already an existing instance
  *
- * Returns cdev ptr on success or if already exists, NULL on failure
  **/
-static
-struct i40e_client_instance *i40e_client_add_instance(struct i40e_pf *pf,
-                                                    struct i40e_client *client,
-                                                    bool *existing)
+static void i40e_client_add_instance(struct i40e_pf *pf)
 {
-       struct i40e_client_instance *cdev;
+       struct i40e_client_instance *cdev = NULL;
        struct netdev_hw_addr *mac = NULL;
        struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
 
-       mutex_lock(&i40e_client_instance_mutex);
-       list_for_each_entry(cdev, &i40e_client_instances, list) {
-               if ((cdev->lan_info.pf == pf) && (cdev->client == client)) {
-                       *existing = true;
-                       goto out;
-               }
-       }
+       if (!registered_client || pf->cinst)
+               return;
+
        cdev = kzalloc(sizeof(*cdev), GFP_KERNEL);
        if (!cdev)
-               goto out;
+               return;
 
        cdev->lan_info.pf = (void *)pf;
        cdev->lan_info.netdev = vsi->netdev;
@@ -417,7 +325,7 @@ struct i40e_client_instance *i40e_client_add_instance(struct i40e_pf *pf,
        if (i40e_client_get_params(vsi, &cdev->lan_info.params)) {
                kfree(cdev);
                cdev = NULL;
-               goto out;
+               return;
        }
 
        cdev->lan_info.msix_count = pf->num_iwarp_msix;
@@ -430,41 +338,20 @@ struct i40e_client_instance *i40e_client_add_instance(struct i40e_pf *pf,
        else
                dev_err(&pf->pdev->dev, "MAC address list is empty!\n");
 
-       cdev->client = client;
-       INIT_LIST_HEAD(&cdev->list);
-       list_add(&cdev->list, &i40e_client_instances);
-out:
-       mutex_unlock(&i40e_client_instance_mutex);
-       return cdev;
+       cdev->client = registered_client;
+       pf->cinst = cdev;
 }
 
 /**
  * i40e_client_del_instance - removes a client instance from the list
  * @pf: pointer to the board struct
  *
- * Returns 0 on success or non-0 on error
  **/
 static
-int i40e_client_del_instance(struct i40e_pf *pf, struct i40e_client *client)
+void i40e_client_del_instance(struct i40e_pf *pf)
 {
-       struct i40e_client_instance *cdev, *tmp;
-       int ret = -ENODEV;
-
-       mutex_lock(&i40e_client_instance_mutex);
-       list_for_each_entry_safe(cdev, tmp, &i40e_client_instances, list) {
-               if ((cdev->lan_info.pf != pf) || (cdev->client != client))
-                       continue;
-
-               dev_info(&pf->pdev->dev, "Deleted instance of Client %s, of dev %d bus=0x%02x func=0x%02x)\n",
-                        client->name, pf->hw.pf_id,
-                        pf->hw.bus.device, pf->hw.bus.func);
-               list_del(&cdev->list);
-               kfree(cdev);
-               ret = 0;
-               break;
-       }
-       mutex_unlock(&i40e_client_instance_mutex);
-       return ret;
+       kfree(pf->cinst);
+       pf->cinst = NULL;
 }
 
 /**
@@ -473,67 +360,50 @@ int i40e_client_del_instance(struct i40e_pf *pf, struct i40e_client *client)
  **/
 void i40e_client_subtask(struct i40e_pf *pf)
 {
+       struct i40e_client *client = registered_client;
        struct i40e_client_instance *cdev;
-       struct i40e_client *client;
-       bool existing = false;
+       struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
        int ret = 0;
 
        if (!(pf->flags & I40E_FLAG_SERVICE_CLIENT_REQUESTED))
                return;
        pf->flags &= ~I40E_FLAG_SERVICE_CLIENT_REQUESTED;
+       cdev = pf->cinst;
 
        /* If we're down or resetting, just bail */
        if (test_bit(__I40E_DOWN, &pf->state) ||
            test_bit(__I40E_CONFIG_BUSY, &pf->state))
                return;
 
-       /* Check client state and instantiate client if client registered */
-       mutex_lock(&i40e_client_mutex);
-       list_for_each_entry(client, &i40e_clients, list) {
-               /* first check client is registered */
-               if (!test_bit(__I40E_CLIENT_REGISTERED, &client->state))
-                       continue;
-
-               /* Do we also need the LAN VSI to be up, to create instance */
-               if (!(client->flags & I40E_CLIENT_FLAGS_LAUNCH_ON_PROBE)) {
-                       /* check if L2 VSI is up, if not we are not ready */
-                       if (test_bit(__I40E_DOWN, &pf->vsi[pf->lan_vsi]->state))
-                               continue;
-               } else {
-                       dev_warn(&pf->pdev->dev, "This client %s is being instantiated at probe\n",
-                                client->name);
-               }
-
-               /* Add the client instance to the instance list */
-               cdev = i40e_client_add_instance(pf, client, &existing);
-               if (!cdev)
-                       continue;
-
-               if (!existing) {
-                       dev_info(&pf->pdev->dev, "Added instance of Client %s to PF%d bus=0x%02x dev=0x%02x func=0x%02x\n",
-                                client->name, pf->hw.pf_id,
-                                pf->hw.bus.bus_id, pf->hw.bus.device,
-                                pf->hw.bus.func);
-               }
+       if (!client || !cdev)
+               return;
 
-               mutex_lock(&i40e_client_instance_mutex);
-               if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED,
-                             &cdev->state)) {
-                       /* Send an Open request to the client */
-                       if (client->ops && client->ops->open)
-                               ret = client->ops->open(&cdev->lan_info,
-                                                       client);
-                       if (!ret) {
-                               set_bit(__I40E_CLIENT_INSTANCE_OPENED,
-                                       &cdev->state);
-                       } else {
-                               /* remove client instance */
-                               i40e_client_del_instance(pf, client);
+       /* Here we handle client opens. If the client is down, but
+        * the netdev is up, then open the client.
+        */
+       if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state)) {
+               if (!test_bit(__I40E_DOWN, &vsi->state) &&
+                   client->ops && client->ops->open) {
+                       set_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state);
+                       ret = client->ops->open(&cdev->lan_info, client);
+                       if (ret) {
+                               /* Remove failed client instance */
+                               clear_bit(__I40E_CLIENT_INSTANCE_OPENED,
+                                         &cdev->state);
+                               i40e_client_del_instance(pf);
                        }
                }
-               mutex_unlock(&i40e_client_instance_mutex);
+       } else {
+       /* Likewise for client close. If the client is up, but the netdev
+        * is down, then close the client.
+        */
+               if (test_bit(__I40E_DOWN, &vsi->state) &&
+                   client->ops && client->ops->close) {
+                       clear_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state);
+                       client->ops->close(&cdev->lan_info, client, false);
+                       i40e_client_release_qvlist(&cdev->lan_info);
+               }
        }
-       mutex_unlock(&i40e_client_mutex);
 }
 
 /**
@@ -601,7 +471,6 @@ int i40e_lan_del_device(struct i40e_pf *pf)
                        break;
                }
        }
-
        mutex_unlock(&i40e_device_mutex);
        return ret;
 }
@@ -610,22 +479,24 @@ int i40e_lan_del_device(struct i40e_pf *pf)
  * i40e_client_release - release client specific resources
  * @client: pointer to the registered client
  *
- * Return 0 on success or < 0 on error
  **/
-static int i40e_client_release(struct i40e_client *client)
+static void i40e_client_release(struct i40e_client *client)
 {
-       struct i40e_client_instance *cdev, *tmp;
+       struct i40e_client_instance *cdev;
+       struct i40e_device *ldev;
        struct i40e_pf *pf;
-       int ret = 0;
 
-       LIST_HEAD(cdevs_tmp);
-
-       mutex_lock(&i40e_client_instance_mutex);
-       list_for_each_entry_safe(cdev, tmp, &i40e_client_instances, list) {
-               if (strncmp(cdev->client->name, client->name,
-                           I40E_CLIENT_STR_LENGTH))
+       mutex_lock(&i40e_device_mutex);
+       list_for_each_entry(ldev, &i40e_devices, list) {
+               pf = ldev->pf;
+               cdev = pf->cinst;
+               if (!cdev)
                        continue;
-               pf = (struct i40e_pf *)cdev->lan_info.pf;
+
+               while (test_and_set_bit(__I40E_SERVICE_SCHED,
+                                       &pf->state))
+                       usleep_range(500, 1000);
+
                if (test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state)) {
                        if (client->ops && client->ops->close)
                                client->ops->close(&cdev->lan_info, client,
@@ -637,18 +508,13 @@ static int i40e_client_release(struct i40e_client *client)
                                 "Client %s instance for PF id %d closed\n",
                                 client->name, pf->hw.pf_id);
                }
-               /* delete the client instance from the list */
-               list_move(&cdev->list, &cdevs_tmp);
+               /* delete the client instance */
+               i40e_client_del_instance(pf);
                dev_info(&pf->pdev->dev, "Deleted client instance of Client %s\n",
                         client->name);
+               clear_bit(__I40E_SERVICE_SCHED, &pf->state);
        }
-       mutex_unlock(&i40e_client_instance_mutex);
-
-       /* free the client device and release its vsi */
-       list_for_each_entry_safe(cdev, tmp, &cdevs_tmp, list) {
-               kfree(cdev);
-       }
-       return ret;
+       mutex_unlock(&i40e_device_mutex);
 }
 
 /**
@@ -664,6 +530,7 @@ static void i40e_client_prepare(struct i40e_client *client)
        mutex_lock(&i40e_device_mutex);
        list_for_each_entry(ldev, &i40e_devices, list) {
                pf = ldev->pf;
+               i40e_client_add_instance(pf);
                /* Start the client subtask */
                pf->flags |= I40E_FLAG_SERVICE_CLIENT_REQUESTED;
                i40e_service_event_schedule(pf);
@@ -792,8 +659,8 @@ static void i40e_client_request_reset(struct i40e_info *ldev,
                break;
        default:
                dev_warn(&pf->pdev->dev,
-                        "Client %s instance for PF id %d request an unsupported reset: %d.\n",
-                        client->name, pf->hw.pf_id, reset_level);
+                        "Client for PF id %d requested an unsupported reset: %d.\n",
+                        pf->hw.pf_id, reset_level);
                break;
        }
 
@@ -852,8 +719,8 @@ static int i40e_client_update_vsi_ctxt(struct i40e_info *ldev,
        } else {
                update = false;
                dev_warn(&pf->pdev->dev,
-                        "Client %s instance for PF id %d request an unsupported Config: %x.\n",
-                        client->name, pf->hw.pf_id, flag);
+                        "Client for PF id %d request an unsupported Config: %x.\n",
+                        pf->hw.pf_id, flag);
        }
 
        if (update) {
@@ -878,7 +745,6 @@ static int i40e_client_update_vsi_ctxt(struct i40e_info *ldev,
 int i40e_register_client(struct i40e_client *client)
 {
        int ret = 0;
-       enum i40e_vsi_type vsi_type;
 
        if (!client) {
                ret = -EIO;
@@ -891,11 +757,9 @@ int i40e_register_client(struct i40e_client *client)
                goto out;
        }
 
-       mutex_lock(&i40e_client_mutex);
-       if (i40e_client_is_registered(client)) {
+       if (registered_client) {
                pr_info("i40e: Client %s has already been registered!\n",
                        client->name);
-               mutex_unlock(&i40e_client_mutex);
                ret = -EEXIST;
                goto out;
        }
@@ -908,22 +772,11 @@ int i40e_register_client(struct i40e_client *client)
                        client->version.major, client->version.minor,
                        client->version.build,
                        i40e_client_interface_version_str);
-               mutex_unlock(&i40e_client_mutex);
                ret = -EIO;
                goto out;
        }
 
-       vsi_type = i40e_client_type_to_vsi_type(client->type);
-       if (vsi_type == I40E_VSI_TYPE_UNKNOWN) {
-               pr_info("i40e: Failed to register client %s due to unknown client type %d\n",
-                       client->name, client->type);
-               mutex_unlock(&i40e_client_mutex);
-               ret = -EIO;
-               goto out;
-       }
-       list_add(&client->list, &i40e_clients);
-       set_bit(__I40E_CLIENT_REGISTERED, &client->state);
-       mutex_unlock(&i40e_client_mutex);
+       registered_client = client;
 
        i40e_client_prepare(client);
 
@@ -943,29 +796,21 @@ int i40e_unregister_client(struct i40e_client *client)
 {
        int ret = 0;
 
-       /* When a unregister request comes through we would have to send
-        * a close for each of the client instances that were opened.
-        * client_release function is called to handle this.
-        */
-       mutex_lock(&i40e_client_mutex);
-       if (!client || i40e_client_release(client)) {
-               ret = -EIO;
-               goto out;
-       }
-
-       /* TODO: check if device is in reset, or if that matters? */
-       if (!i40e_client_is_registered(client)) {
+       if (registered_client != client) {
                pr_info("i40e: Client %s has not been registered\n",
                        client->name);
                ret = -ENODEV;
                goto out;
        }
-       clear_bit(__I40E_CLIENT_REGISTERED, &client->state);
-       list_del(&client->list);
-       pr_info("i40e: Unregistered client %s with return code %d\n",
-               client->name, ret);
+       registered_client = NULL;
+       /* When a unregister request comes through we would have to send
+        * a close for each of the client instances that were opened.
+        * client_release function is called to handle this.
+        */
+       i40e_client_release(client);
+
+       pr_info("i40e: Unregistered client %s\n", client->name);
 out:
-       mutex_unlock(&i40e_client_mutex);
        return ret;
 }
 EXPORT_SYMBOL(i40e_unregister_client);
index 528bd79b05fecc68d981ea08b144d9898c6aaaa0..15b21a5315b597a0ee4cd933fc10bfa5cec0a510 100644 (file)
@@ -57,11 +57,6 @@ enum i40e_client_instance_state {
        __I40E_CLIENT_INSTANCE_OPENED,
 };
 
-enum i40e_client_type {
-       I40E_CLIENT_IWARP,
-       I40E_CLIENT_VMDQ2
-};
-
 struct i40e_ops;
 struct i40e_client;
 
@@ -214,7 +209,8 @@ struct i40e_client {
        u32 flags;
 #define I40E_CLIENT_FLAGS_LAUNCH_ON_PROBE      BIT(0)
 #define I40E_TX_FLAGS_NOTIFY_OTHER_EVENTS      BIT(2)
-       enum i40e_client_type type;
+       u8 type;
+#define I40E_CLIENT_IWARP 0
        const struct i40e_client_ops *ops; /* client ops provided by the client */
 };
 
index ece57d6a6e232f93ca28f493f91e27428b7cb41e..f9db95aa3a2036940a2fcb8d43b6a5bba201b863 100644 (file)
@@ -1088,33 +1088,6 @@ void i40e_pre_tx_queue_cfg(struct i40e_hw *hw, u32 queue, bool enable)
 
        wr32(hw, I40E_GLLAN_TXPRE_QDIS(reg_block), reg_val);
 }
-#ifdef I40E_FCOE
-
-/**
- * i40e_get_san_mac_addr - get SAN MAC address
- * @hw: pointer to the HW structure
- * @mac_addr: pointer to SAN MAC address
- *
- * Reads the adapter's SAN MAC address from NVM
- **/
-i40e_status i40e_get_san_mac_addr(struct i40e_hw *hw, u8 *mac_addr)
-{
-       struct i40e_aqc_mac_address_read_data addrs;
-       i40e_status status;
-       u16 flags = 0;
-
-       status = i40e_aq_mac_address_read(hw, &flags, &addrs, NULL);
-       if (status)
-               return status;
-
-       if (flags & I40E_AQC_SAN_ADDR_VALID)
-               ether_addr_copy(mac_addr, addrs.pf_san_mac);
-       else
-               status = I40E_ERR_INVALID_MAC_ADDR;
-
-       return status;
-}
-#endif
 
 /**
  *  i40e_read_pba_string - Reads part number string from EEPROM
@@ -4990,7 +4963,9 @@ u32 i40e_read_rx_ctl(struct i40e_hw *hw, u32 reg_addr)
        int retry = 5;
        u32 val = 0;
 
-       use_register = (hw->aq.api_maj_ver == 1) && (hw->aq.api_min_ver < 5);
+       use_register = (((hw->aq.api_maj_ver == 1) &&
+                       (hw->aq.api_min_ver < 5)) ||
+                       (hw->mac.type == I40E_MAC_X722));
        if (!use_register) {
 do_retry:
                status = i40e_aq_rx_ctl_read_register(hw, reg_addr, &val, NULL);
@@ -5049,7 +5024,9 @@ void i40e_write_rx_ctl(struct i40e_hw *hw, u32 reg_addr, u32 reg_val)
        bool use_register;
        int retry = 5;
 
-       use_register = (hw->aq.api_maj_ver == 1) && (hw->aq.api_min_ver < 5);
+       use_register = (((hw->aq.api_maj_ver == 1) &&
+                       (hw->aq.api_min_ver < 5)) ||
+                       (hw->mac.type == I40E_MAC_X722));
        if (!use_register) {
 do_retry:
                status = i40e_aq_rx_ctl_write_register(hw, reg_addr,
index 267ad2588255deeb196340da7788b10bc89d1f3e..c5f68cc1edcdf7919c9f82e02e85362bdc9794f6 100644 (file)
@@ -484,25 +484,6 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
                         vsi->bw_ets_limit_credits[i],
                         vsi->bw_ets_max_quanta[i]);
        }
-#ifdef I40E_FCOE
-       if (vsi->type == I40E_VSI_FCOE) {
-               dev_info(&pf->pdev->dev,
-                        "    fcoe_stats: rx_packets = %llu, rx_dwords = %llu, rx_dropped = %llu\n",
-                        vsi->fcoe_stats.rx_fcoe_packets,
-                        vsi->fcoe_stats.rx_fcoe_dwords,
-                        vsi->fcoe_stats.rx_fcoe_dropped);
-               dev_info(&pf->pdev->dev,
-                        "    fcoe_stats: tx_packets = %llu, tx_dwords = %llu\n",
-                        vsi->fcoe_stats.tx_fcoe_packets,
-                        vsi->fcoe_stats.tx_fcoe_dwords);
-               dev_info(&pf->pdev->dev,
-                        "    fcoe_stats: bad_crc = %llu, last_error = %llu\n",
-                        vsi->fcoe_stats.fcoe_bad_fccrc,
-                        vsi->fcoe_stats.fcoe_last_error);
-               dev_info(&pf->pdev->dev, "    fcoe_stats: ddp_count = %llu\n",
-                        vsi->fcoe_stats.fcoe_ddp_count);
-       }
-#endif
 }
 
 /**
index a22e26200bccb1a7e79716a7429e0eb7fe700ecc..c0c1a0cdaa5bce68d32cf313c3e907062e3a4060 100644 (file)
@@ -162,19 +162,6 @@ static const struct i40e_stats i40e_gstrings_stats[] = {
        I40E_PF_STAT("rx_lpi_count", stats.rx_lpi_count),
 };
 
-#ifdef I40E_FCOE
-static const struct i40e_stats i40e_gstrings_fcoe_stats[] = {
-       I40E_VSI_STAT("fcoe_bad_fccrc", fcoe_stats.fcoe_bad_fccrc),
-       I40E_VSI_STAT("rx_fcoe_dropped", fcoe_stats.rx_fcoe_dropped),
-       I40E_VSI_STAT("rx_fcoe_packets", fcoe_stats.rx_fcoe_packets),
-       I40E_VSI_STAT("rx_fcoe_dwords", fcoe_stats.rx_fcoe_dwords),
-       I40E_VSI_STAT("fcoe_ddp_count", fcoe_stats.fcoe_ddp_count),
-       I40E_VSI_STAT("fcoe_last_error", fcoe_stats.fcoe_last_error),
-       I40E_VSI_STAT("tx_fcoe_packets", fcoe_stats.tx_fcoe_packets),
-       I40E_VSI_STAT("tx_fcoe_dwords", fcoe_stats.tx_fcoe_dwords),
-};
-
-#endif /* I40E_FCOE */
 #define I40E_QUEUE_STATS_LEN(n) \
        (((struct i40e_netdev_priv *)netdev_priv((n)))->vsi->num_queue_pairs \
            * 2 /* Tx and Rx together */                                     \
@@ -182,17 +169,9 @@ static const struct i40e_stats i40e_gstrings_fcoe_stats[] = {
 #define I40E_GLOBAL_STATS_LEN  ARRAY_SIZE(i40e_gstrings_stats)
 #define I40E_NETDEV_STATS_LEN   ARRAY_SIZE(i40e_gstrings_net_stats)
 #define I40E_MISC_STATS_LEN    ARRAY_SIZE(i40e_gstrings_misc_stats)
-#ifdef I40E_FCOE
-#define I40E_FCOE_STATS_LEN    ARRAY_SIZE(i40e_gstrings_fcoe_stats)
-#define I40E_VSI_STATS_LEN(n)  (I40E_NETDEV_STATS_LEN + \
-                                I40E_FCOE_STATS_LEN + \
-                                I40E_MISC_STATS_LEN + \
-                                I40E_QUEUE_STATS_LEN((n)))
-#else
 #define I40E_VSI_STATS_LEN(n)   (I40E_NETDEV_STATS_LEN + \
                                 I40E_MISC_STATS_LEN + \
                                 I40E_QUEUE_STATS_LEN((n)))
-#endif /* I40E_FCOE */
 #define I40E_PFC_STATS_LEN ( \
                (FIELD_SIZEOF(struct i40e_pf, stats.priority_xoff_rx) + \
                 FIELD_SIZEOF(struct i40e_pf, stats.priority_xon_rx) + \
@@ -228,22 +207,37 @@ static const char i40e_gstrings_test[][ETH_GSTRING_LEN] = {
 
 #define I40E_TEST_LEN (sizeof(i40e_gstrings_test) / ETH_GSTRING_LEN)
 
-static const char i40e_priv_flags_strings[][ETH_GSTRING_LEN] = {
-       "MFP",
-       "LinkPolling",
-       "flow-director-atr",
-       "veb-stats",
-       "hw-atr-eviction",
+struct i40e_priv_flags {
+       char flag_string[ETH_GSTRING_LEN];
+       u64 flag;
+       bool read_only;
+};
+
+#define I40E_PRIV_FLAG(_name, _flag, _read_only) { \
+       .flag_string = _name, \
+       .flag = _flag, \
+       .read_only = _read_only, \
+}
+
+static const struct i40e_priv_flags i40e_gstrings_priv_flags[] = {
+       /* NOTE: MFP setting cannot be changed */
+       I40E_PRIV_FLAG("MFP", I40E_FLAG_MFP_ENABLED, 1),
+       I40E_PRIV_FLAG("LinkPolling", I40E_FLAG_LINK_POLLING_ENABLED, 0),
+       I40E_PRIV_FLAG("flow-director-atr", I40E_FLAG_FD_ATR_ENABLED, 0),
+       I40E_PRIV_FLAG("veb-stats", I40E_FLAG_VEB_STATS_ENABLED, 0),
+       I40E_PRIV_FLAG("hw-atr-eviction", I40E_FLAG_HW_ATR_EVICT_CAPABLE, 0),
+       I40E_PRIV_FLAG("legacy-rx", I40E_FLAG_LEGACY_RX, 0),
 };
 
-#define I40E_PRIV_FLAGS_STR_LEN ARRAY_SIZE(i40e_priv_flags_strings)
+#define I40E_PRIV_FLAGS_STR_LEN ARRAY_SIZE(i40e_gstrings_priv_flags)
 
 /* Private flags with a global effect, restricted to PF 0 */
-static const char i40e_gl_priv_flags_strings[][ETH_GSTRING_LEN] = {
-       "vf-true-promisc-support",
+static const struct i40e_priv_flags i40e_gl_gstrings_priv_flags[] = {
+       I40E_PRIV_FLAG("vf-true-promisc-support",
+                      I40E_FLAG_TRUE_PROMISC_SUPPORT, 0),
 };
 
-#define I40E_GL_PRIV_FLAGS_STR_LEN ARRAY_SIZE(i40e_gl_priv_flags_strings)
+#define I40E_GL_PRIV_FLAGS_STR_LEN ARRAY_SIZE(i40e_gl_gstrings_priv_flags)
 
 /**
  * i40e_partition_setting_complaint - generic complaint for MFP restriction
@@ -387,7 +381,7 @@ static void i40e_phy_type_to_ethtool(struct i40e_pf *pf, u32 *supported,
  *
  **/
 static void i40e_get_settings_link_up(struct i40e_hw *hw,
-                                     struct ethtool_cmd *ecmd,
+                                     struct ethtool_link_ksettings *cmd,
                                      struct net_device *netdev,
                                      struct i40e_pf *pf)
 {
@@ -395,90 +389,96 @@ static void i40e_get_settings_link_up(struct i40e_hw *hw,
        u32 link_speed = hw_link_info->link_speed;
        u32 e_advertising = 0x0;
        u32 e_supported = 0x0;
+       u32 supported, advertising;
+
+       ethtool_convert_link_mode_to_legacy_u32(&supported,
+                                               cmd->link_modes.supported);
+       ethtool_convert_link_mode_to_legacy_u32(&advertising,
+                                               cmd->link_modes.advertising);
 
        /* Initialize supported and advertised settings based on phy settings */
        switch (hw_link_info->phy_type) {
        case I40E_PHY_TYPE_40GBASE_CR4:
        case I40E_PHY_TYPE_40GBASE_CR4_CU:
-               ecmd->supported = SUPPORTED_Autoneg |
-                                 SUPPORTED_40000baseCR4_Full;
-               ecmd->advertising = ADVERTISED_Autoneg |
-                                   ADVERTISED_40000baseCR4_Full;
+               supported = SUPPORTED_Autoneg |
+                           SUPPORTED_40000baseCR4_Full;
+               advertising = ADVERTISED_Autoneg |
+                             ADVERTISED_40000baseCR4_Full;
                break;
        case I40E_PHY_TYPE_XLAUI:
        case I40E_PHY_TYPE_XLPPI:
        case I40E_PHY_TYPE_40GBASE_AOC:
-               ecmd->supported = SUPPORTED_40000baseCR4_Full;
+               supported = SUPPORTED_40000baseCR4_Full;
                break;
        case I40E_PHY_TYPE_40GBASE_SR4:
-               ecmd->supported = SUPPORTED_40000baseSR4_Full;
+               supported = SUPPORTED_40000baseSR4_Full;
                break;
        case I40E_PHY_TYPE_40GBASE_LR4:
-               ecmd->supported = SUPPORTED_40000baseLR4_Full;
+               supported = SUPPORTED_40000baseLR4_Full;
                break;
        case I40E_PHY_TYPE_10GBASE_SR:
        case I40E_PHY_TYPE_10GBASE_LR:
        case I40E_PHY_TYPE_1000BASE_SX:
        case I40E_PHY_TYPE_1000BASE_LX:
-               ecmd->supported = SUPPORTED_10000baseT_Full;
+               supported = SUPPORTED_10000baseT_Full;
                if (hw_link_info->module_type[2] &
                    I40E_MODULE_TYPE_1000BASE_SX ||
                    hw_link_info->module_type[2] &
                    I40E_MODULE_TYPE_1000BASE_LX) {
-                       ecmd->supported |= SUPPORTED_1000baseT_Full;
+                       supported |= SUPPORTED_1000baseT_Full;
                        if (hw_link_info->requested_speeds &
                            I40E_LINK_SPEED_1GB)
-                               ecmd->advertising |= ADVERTISED_1000baseT_Full;
+                               advertising |= ADVERTISED_1000baseT_Full;
                }
                if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
-                       ecmd->advertising |= ADVERTISED_10000baseT_Full;
+                       advertising |= ADVERTISED_10000baseT_Full;
                break;
        case I40E_PHY_TYPE_10GBASE_T:
        case I40E_PHY_TYPE_1000BASE_T:
        case I40E_PHY_TYPE_100BASE_TX:
-               ecmd->supported = SUPPORTED_Autoneg |
-                                 SUPPORTED_10000baseT_Full |
-                                 SUPPORTED_1000baseT_Full |
-                                 SUPPORTED_100baseT_Full;
-               ecmd->advertising = ADVERTISED_Autoneg;
+               supported = SUPPORTED_Autoneg |
+                           SUPPORTED_10000baseT_Full |
+                           SUPPORTED_1000baseT_Full |
+                           SUPPORTED_100baseT_Full;
+               advertising = ADVERTISED_Autoneg;
                if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
-                       ecmd->advertising |= ADVERTISED_10000baseT_Full;
+                       advertising |= ADVERTISED_10000baseT_Full;
                if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
-                       ecmd->advertising |= ADVERTISED_1000baseT_Full;
+                       advertising |= ADVERTISED_1000baseT_Full;
                if (hw_link_info->requested_speeds & I40E_LINK_SPEED_100MB)
-                       ecmd->advertising |= ADVERTISED_100baseT_Full;
+                       advertising |= ADVERTISED_100baseT_Full;
                break;
        case I40E_PHY_TYPE_1000BASE_T_OPTICAL:
-               ecmd->supported = SUPPORTED_Autoneg |
-                                 SUPPORTED_1000baseT_Full;
-               ecmd->advertising = ADVERTISED_Autoneg |
-                                   ADVERTISED_1000baseT_Full;
+               supported = SUPPORTED_Autoneg |
+                           SUPPORTED_1000baseT_Full;
+               advertising = ADVERTISED_Autoneg |
+                             ADVERTISED_1000baseT_Full;
                break;
        case I40E_PHY_TYPE_10GBASE_CR1_CU:
        case I40E_PHY_TYPE_10GBASE_CR1:
-               ecmd->supported = SUPPORTED_Autoneg |
-                                 SUPPORTED_10000baseT_Full;
-               ecmd->advertising = ADVERTISED_Autoneg |
-                                   ADVERTISED_10000baseT_Full;
+               supported = SUPPORTED_Autoneg |
+                           SUPPORTED_10000baseT_Full;
+               advertising = ADVERTISED_Autoneg |
+                             ADVERTISED_10000baseT_Full;
                break;
        case I40E_PHY_TYPE_XAUI:
        case I40E_PHY_TYPE_XFI:
        case I40E_PHY_TYPE_SFI:
        case I40E_PHY_TYPE_10GBASE_SFPP_CU:
        case I40E_PHY_TYPE_10GBASE_AOC:
-               ecmd->supported = SUPPORTED_10000baseT_Full;
-               ecmd->advertising = SUPPORTED_10000baseT_Full;
+               supported = SUPPORTED_10000baseT_Full;
+               advertising = SUPPORTED_10000baseT_Full;
                break;
        case I40E_PHY_TYPE_SGMII:
-               ecmd->supported = SUPPORTED_Autoneg |
-                                 SUPPORTED_1000baseT_Full;
+               supported = SUPPORTED_Autoneg |
+                           SUPPORTED_1000baseT_Full;
                if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
-                       ecmd->advertising |= ADVERTISED_1000baseT_Full;
+                       advertising |= ADVERTISED_1000baseT_Full;
                if (pf->flags & I40E_FLAG_100M_SGMII_CAPABLE) {
-                       ecmd->supported |= SUPPORTED_100baseT_Full;
+                       supported |= SUPPORTED_100baseT_Full;
                        if (hw_link_info->requested_speeds &
                            I40E_LINK_SPEED_100MB)
-                               ecmd->advertising |= ADVERTISED_100baseT_Full;
+                               advertising |= ADVERTISED_100baseT_Full;
                }
                break;
        case I40E_PHY_TYPE_40GBASE_KR4:
@@ -486,25 +486,25 @@ static void i40e_get_settings_link_up(struct i40e_hw *hw,
        case I40E_PHY_TYPE_10GBASE_KR:
        case I40E_PHY_TYPE_10GBASE_KX4:
        case I40E_PHY_TYPE_1000BASE_KX:
-               ecmd->supported |= SUPPORTED_40000baseKR4_Full |
-                                  SUPPORTED_20000baseKR2_Full |
-                                  SUPPORTED_10000baseKR_Full |
-                                  SUPPORTED_10000baseKX4_Full |
-                                  SUPPORTED_1000baseKX_Full |
-                                  SUPPORTED_Autoneg;
-               ecmd->advertising |= ADVERTISED_40000baseKR4_Full |
-                                    ADVERTISED_20000baseKR2_Full |
-                                    ADVERTISED_10000baseKR_Full |
-                                    ADVERTISED_10000baseKX4_Full |
-                                    ADVERTISED_1000baseKX_Full |
-                                    ADVERTISED_Autoneg;
+               supported |= SUPPORTED_40000baseKR4_Full |
+                            SUPPORTED_20000baseKR2_Full |
+                            SUPPORTED_10000baseKR_Full |
+                            SUPPORTED_10000baseKX4_Full |
+                            SUPPORTED_1000baseKX_Full |
+                            SUPPORTED_Autoneg;
+               advertising |= ADVERTISED_40000baseKR4_Full |
+                              ADVERTISED_20000baseKR2_Full |
+                              ADVERTISED_10000baseKR_Full |
+                              ADVERTISED_10000baseKX4_Full |
+                              ADVERTISED_1000baseKX_Full |
+                              ADVERTISED_Autoneg;
                break;
        case I40E_PHY_TYPE_25GBASE_KR:
        case I40E_PHY_TYPE_25GBASE_CR:
        case I40E_PHY_TYPE_25GBASE_SR:
        case I40E_PHY_TYPE_25GBASE_LR:
-               ecmd->supported = SUPPORTED_Autoneg;
-               ecmd->advertising = ADVERTISED_Autoneg;
+               supported = SUPPORTED_Autoneg;
+               advertising = ADVERTISED_Autoneg;
                /* TODO: add speeds when ethtool is ready to support*/
                break;
        default:
@@ -520,38 +520,43 @@ static void i40e_get_settings_link_up(struct i40e_hw *hw,
        i40e_phy_type_to_ethtool(pf, &e_supported,
                                 &e_advertising);
 
-       ecmd->supported = ecmd->supported & e_supported;
-       ecmd->advertising = ecmd->advertising & e_advertising;
+       supported = supported & e_supported;
+       advertising = advertising & e_advertising;
 
        /* Set speed and duplex */
        switch (link_speed) {
        case I40E_LINK_SPEED_40GB:
-               ethtool_cmd_speed_set(ecmd, SPEED_40000);
+               cmd->base.speed = SPEED_40000;
                break;
        case I40E_LINK_SPEED_25GB:
 #ifdef SPEED_25000
-               ethtool_cmd_speed_set(ecmd, SPEED_25000);
+               cmd->base.speed = SPEED_25000;
 #else
                netdev_info(netdev,
                            "Speed is 25G, display not supported by this version of ethtool.\n");
 #endif
                break;
        case I40E_LINK_SPEED_20GB:
-               ethtool_cmd_speed_set(ecmd, SPEED_20000);
+               cmd->base.speed = SPEED_20000;
                break;
        case I40E_LINK_SPEED_10GB:
-               ethtool_cmd_speed_set(ecmd, SPEED_10000);
+               cmd->base.speed = SPEED_10000;
                break;
        case I40E_LINK_SPEED_1GB:
-               ethtool_cmd_speed_set(ecmd, SPEED_1000);
+               cmd->base.speed = SPEED_1000;
                break;
        case I40E_LINK_SPEED_100MB:
-               ethtool_cmd_speed_set(ecmd, SPEED_100);
+               cmd->base.speed = SPEED_100;
                break;
        default:
                break;
        }
-       ecmd->duplex = DUPLEX_FULL;
+       cmd->base.duplex = DUPLEX_FULL;
+
+       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+                                               supported);
+       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+                                               advertising);
 }
 
 /**
@@ -562,18 +567,24 @@ static void i40e_get_settings_link_up(struct i40e_hw *hw,
  * Reports link settings that can be determined when link is down
  **/
 static void i40e_get_settings_link_down(struct i40e_hw *hw,
-                                       struct ethtool_cmd *ecmd,
+                                       struct ethtool_link_ksettings *cmd,
                                        struct i40e_pf *pf)
 {
+       u32 supported, advertising;
+
        /* link is down and the driver needs to fall back on
         * supported phy types to figure out what info to display
         */
-       i40e_phy_type_to_ethtool(pf, &ecmd->supported,
-                                &ecmd->advertising);
+       i40e_phy_type_to_ethtool(pf, &supported, &advertising);
+
+       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+                                               supported);
+       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+                                               advertising);
 
        /* With no link speed and duplex are unknown */
-       ethtool_cmd_speed_set(ecmd, SPEED_UNKNOWN);
-       ecmd->duplex = DUPLEX_UNKNOWN;
+       cmd->base.speed = SPEED_UNKNOWN;
+       cmd->base.duplex = DUPLEX_UNKNOWN;
 }
 
 /**
@@ -583,74 +594,85 @@ static void i40e_get_settings_link_down(struct i40e_hw *hw,
  *
  * Reports speed/duplex settings based on media_type
  **/
-static int i40e_get_settings(struct net_device *netdev,
-                            struct ethtool_cmd *ecmd)
+static int i40e_get_link_ksettings(struct net_device *netdev,
+                                  struct ethtool_link_ksettings *cmd)
 {
        struct i40e_netdev_priv *np = netdev_priv(netdev);
        struct i40e_pf *pf = np->vsi->back;
        struct i40e_hw *hw = &pf->hw;
        struct i40e_link_status *hw_link_info = &hw->phy.link_info;
        bool link_up = hw_link_info->link_info & I40E_AQ_LINK_UP;
+       u32 advertising;
 
        if (link_up)
-               i40e_get_settings_link_up(hw, ecmd, netdev, pf);
+               i40e_get_settings_link_up(hw, cmd, netdev, pf);
        else
-               i40e_get_settings_link_down(hw, ecmd, pf);
+               i40e_get_settings_link_down(hw, cmd, pf);
 
        /* Now set the settings that don't rely on link being up/down */
        /* Set autoneg settings */
-       ecmd->autoneg = ((hw_link_info->an_info & I40E_AQ_AN_COMPLETED) ?
+       cmd->base.autoneg = ((hw_link_info->an_info & I40E_AQ_AN_COMPLETED) ?
                          AUTONEG_ENABLE : AUTONEG_DISABLE);
 
        switch (hw->phy.media_type) {
        case I40E_MEDIA_TYPE_BACKPLANE:
-               ecmd->supported |= SUPPORTED_Autoneg |
-                                  SUPPORTED_Backplane;
-               ecmd->advertising |= ADVERTISED_Autoneg |
-                                    ADVERTISED_Backplane;
-               ecmd->port = PORT_NONE;
+               ethtool_link_ksettings_add_link_mode(cmd, supported,
+                                                    Autoneg);
+               ethtool_link_ksettings_add_link_mode(cmd, supported,
+                                                    Backplane);
+               ethtool_link_ksettings_add_link_mode(cmd, advertising,
+                                                    Autoneg);
+               ethtool_link_ksettings_add_link_mode(cmd, advertising,
+                                                    Backplane);
+               cmd->base.port = PORT_NONE;
                break;
        case I40E_MEDIA_TYPE_BASET:
-               ecmd->supported |= SUPPORTED_TP;
-               ecmd->advertising |= ADVERTISED_TP;
-               ecmd->port = PORT_TP;
+               ethtool_link_ksettings_add_link_mode(cmd, supported, TP);
+               ethtool_link_ksettings_add_link_mode(cmd, advertising, TP);
+               cmd->base.port = PORT_TP;
                break;
        case I40E_MEDIA_TYPE_DA:
        case I40E_MEDIA_TYPE_CX4:
-               ecmd->supported |= SUPPORTED_FIBRE;
-               ecmd->advertising |= ADVERTISED_FIBRE;
-               ecmd->port = PORT_DA;
+               ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE);
+               ethtool_link_ksettings_add_link_mode(cmd, advertising, FIBRE);
+               cmd->base.port = PORT_DA;
                break;
        case I40E_MEDIA_TYPE_FIBER:
-               ecmd->supported |= SUPPORTED_FIBRE;
-               ecmd->port = PORT_FIBRE;
+               ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE);
+               cmd->base.port = PORT_FIBRE;
                break;
        case I40E_MEDIA_TYPE_UNKNOWN:
        default:
-               ecmd->port = PORT_OTHER;
+               cmd->base.port = PORT_OTHER;
                break;
        }
 
-       /* Set transceiver */
-       ecmd->transceiver = XCVR_EXTERNAL;
-
        /* Set flow control settings */
-       ecmd->supported |= SUPPORTED_Pause;
+       ethtool_link_ksettings_add_link_mode(cmd, supported, Pause);
 
        switch (hw->fc.requested_mode) {
        case I40E_FC_FULL:
-               ecmd->advertising |= ADVERTISED_Pause;
+               ethtool_link_ksettings_add_link_mode(cmd, advertising,
+                                                    Pause);
                break;
        case I40E_FC_TX_PAUSE:
-               ecmd->advertising |= ADVERTISED_Asym_Pause;
+               ethtool_link_ksettings_add_link_mode(cmd, advertising,
+                                                    Asym_Pause);
                break;
        case I40E_FC_RX_PAUSE:
-               ecmd->advertising |= (ADVERTISED_Pause |
-                                     ADVERTISED_Asym_Pause);
+               ethtool_link_ksettings_add_link_mode(cmd, advertising,
+                                                    Pause);
+               ethtool_link_ksettings_add_link_mode(cmd, advertising,
+                                                    Asym_Pause);
                break;
        default:
-               ecmd->advertising &= ~(ADVERTISED_Pause |
-                                      ADVERTISED_Asym_Pause);
+               ethtool_convert_link_mode_to_legacy_u32(
+                       &advertising, cmd->link_modes.advertising);
+
+               advertising &= ~(ADVERTISED_Pause | ADVERTISED_Asym_Pause);
+
+               ethtool_convert_legacy_u32_to_link_mode(
+                       cmd->link_modes.advertising, advertising);
                break;
        }
 
@@ -664,8 +686,8 @@ static int i40e_get_settings(struct net_device *netdev,
  *
  * Set speed/duplex per media_types advertised/forced
  **/
-static int i40e_set_settings(struct net_device *netdev,
-                            struct ethtool_cmd *ecmd)
+static int i40e_set_link_ksettings(struct net_device *netdev,
+                                  const struct ethtool_link_ksettings *cmd)
 {
        struct i40e_netdev_priv *np = netdev_priv(netdev);
        struct i40e_aq_get_phy_abilities_resp abilities;
@@ -673,12 +695,14 @@ static int i40e_set_settings(struct net_device *netdev,
        struct i40e_pf *pf = np->vsi->back;
        struct i40e_vsi *vsi = np->vsi;
        struct i40e_hw *hw = &pf->hw;
-       struct ethtool_cmd safe_ecmd;
+       struct ethtool_link_ksettings safe_cmd;
+       struct ethtool_link_ksettings copy_cmd;
        i40e_status status = 0;
        bool change = false;
        int err = 0;
-       u8 autoneg;
+       u32 autoneg;
        u32 advertise;
+       u32 tmp;
 
        /* Changing port settings is not supported if this isn't the
         * port's controlling PF
@@ -706,23 +730,31 @@ static int i40e_set_settings(struct net_device *netdev,
                return -EOPNOTSUPP;
        }
 
+       /* copy the cmd to copy_cmd to avoid modifying the origin */
+       memcpy(&copy_cmd, cmd, sizeof(struct ethtool_link_ksettings));
+
        /* get our own copy of the bits to check against */
-       memset(&safe_ecmd, 0, sizeof(struct ethtool_cmd));
-       i40e_get_settings(netdev, &safe_ecmd);
+       memset(&safe_cmd, 0, sizeof(struct ethtool_link_ksettings));
+       i40e_get_link_ksettings(netdev, &safe_cmd);
 
-       /* save autoneg and speed out of ecmd */
-       autoneg = ecmd->autoneg;
-       advertise = ecmd->advertising;
+       /* save autoneg and speed out of cmd */
+       autoneg = cmd->base.autoneg;
+       ethtool_convert_link_mode_to_legacy_u32(&advertise,
+                                               cmd->link_modes.advertising);
 
        /* set autoneg and speed back to what they currently are */
-       ecmd->autoneg = safe_ecmd.autoneg;
-       ecmd->advertising = safe_ecmd.advertising;
+       copy_cmd.base.autoneg = safe_cmd.base.autoneg;
+       ethtool_convert_link_mode_to_legacy_u32(
+               &tmp, safe_cmd.link_modes.advertising);
+       ethtool_convert_legacy_u32_to_link_mode(
+               copy_cmd.link_modes.advertising, tmp);
+
+       copy_cmd.base.cmd = safe_cmd.base.cmd;
 
-       ecmd->cmd = safe_ecmd.cmd;
-       /* If ecmd and safe_ecmd are not the same now, then they are
+       /* If copy_cmd and safe_cmd are not the same now, then they are
         * trying to set something that we do not support
         */
-       if (memcmp(ecmd, &safe_ecmd, sizeof(struct ethtool_cmd)))
+       if (memcmp(&copy_cmd, &safe_cmd, sizeof(struct ethtool_link_ksettings)))
                return -EOPNOTSUPP;
 
        while (test_bit(__I40E_CONFIG_BUSY, &vsi->state))
@@ -745,7 +777,8 @@ static int i40e_set_settings(struct net_device *netdev,
                /* If autoneg was not already enabled */
                if (!(hw->phy.link_info.an_info & I40E_AQ_AN_COMPLETED)) {
                        /* If autoneg is not supported, return error */
-                       if (!(safe_ecmd.supported & SUPPORTED_Autoneg)) {
+                       if (!ethtool_link_ksettings_test_link_mode(
+                                   &safe_cmd, supported, Autoneg)) {
                                netdev_info(netdev, "Autoneg not supported on this phy\n");
                                return -EINVAL;
                        }
@@ -760,7 +793,8 @@ static int i40e_set_settings(struct net_device *netdev,
                        /* If autoneg is supported 10GBASE_T is the only PHY
                         * that can disable it, so otherwise return error
                         */
-                       if (safe_ecmd.supported & SUPPORTED_Autoneg &&
+                       if (ethtool_link_ksettings_test_link_mode(
+                                   &safe_cmd, supported, Autoneg) &&
                            hw->phy.link_info.phy_type !=
                            I40E_PHY_TYPE_10GBASE_T) {
                                netdev_info(netdev, "Autoneg cannot be disabled on this phy\n");
@@ -773,7 +807,9 @@ static int i40e_set_settings(struct net_device *netdev,
                }
        }
 
-       if (advertise & ~safe_ecmd.supported)
+       ethtool_convert_link_mode_to_legacy_u32(&tmp,
+                                               safe_cmd.link_modes.supported);
+       if (advertise & ~tmp)
                return -EINVAL;
 
        if (advertise & ADVERTISED_100baseT_Full)
@@ -1165,6 +1201,11 @@ static int i40e_get_eeprom_len(struct net_device *netdev)
        struct i40e_hw *hw = &np->vsi->back->hw;
        u32 val;
 
+#define X722_EEPROM_SCOPE_LIMIT 0x5B9FFF
+       if (hw->mac.type == I40E_MAC_X722) {
+               val = X722_EEPROM_SCOPE_LIMIT + 1;
+               return val;
+       }
        val = (rd32(hw, I40E_GLPCI_LBARCTRL)
                & I40E_GLPCI_LBARCTRL_FL_SIZE_MASK)
                >> I40E_GLPCI_LBARCTRL_FL_SIZE_SHIFT;
@@ -1483,13 +1524,6 @@ static void i40e_get_ethtool_stats(struct net_device *netdev,
                data[i++] = (i40e_gstrings_misc_stats[j].sizeof_stat ==
                            sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
        }
-#ifdef I40E_FCOE
-       for (j = 0; j < I40E_FCOE_STATS_LEN; j++) {
-               p = (char *)vsi + i40e_gstrings_fcoe_stats[j].stat_offset;
-               data[i++] = (i40e_gstrings_fcoe_stats[j].sizeof_stat ==
-                       sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
-       }
-#endif
        rcu_read_lock();
        for (j = 0; j < vsi->num_queue_pairs; j++) {
                tx_ring = ACCESS_ONCE(vsi->tx_rings[j]);
@@ -1577,13 +1611,6 @@ static void i40e_get_strings(struct net_device *netdev, u32 stringset,
                                 i40e_gstrings_misc_stats[i].stat_string);
                        p += ETH_GSTRING_LEN;
                }
-#ifdef I40E_FCOE
-               for (i = 0; i < I40E_FCOE_STATS_LEN; i++) {
-                       snprintf(p, ETH_GSTRING_LEN, "%s",
-                                i40e_gstrings_fcoe_stats[i].stat_string);
-                       p += ETH_GSTRING_LEN;
-               }
-#endif
                for (i = 0; i < vsi->num_queue_pairs; i++) {
                        snprintf(p, ETH_GSTRING_LEN, "tx-%d.tx_packets", i);
                        p += ETH_GSTRING_LEN;
@@ -1648,12 +1675,18 @@ static void i40e_get_strings(struct net_device *netdev, u32 stringset,
                /* BUG_ON(p - data != I40E_STATS_LEN * ETH_GSTRING_LEN); */
                break;
        case ETH_SS_PRIV_FLAGS:
-               memcpy(data, i40e_priv_flags_strings,
-                      I40E_PRIV_FLAGS_STR_LEN * ETH_GSTRING_LEN);
-               data += I40E_PRIV_FLAGS_STR_LEN * ETH_GSTRING_LEN;
-               if (pf->hw.pf_id == 0)
-                       memcpy(data, i40e_gl_priv_flags_strings,
-                              I40E_GL_PRIV_FLAGS_STR_LEN * ETH_GSTRING_LEN);
+               for (i = 0; i < I40E_PRIV_FLAGS_STR_LEN; i++) {
+                       snprintf(p, ETH_GSTRING_LEN, "%s",
+                                i40e_gstrings_priv_flags[i].flag_string);
+                       p += ETH_GSTRING_LEN;
+               }
+               if (pf->hw.pf_id != 0)
+                       break;
+               for (i = 0; i < I40E_GL_PRIV_FLAGS_STR_LEN; i++) {
+                       snprintf(p, ETH_GSTRING_LEN, "%s",
+                                i40e_gl_gstrings_priv_flags[i].flag_string);
+                       p += ETH_GSTRING_LEN;
+               }
                break;
        default:
                break;
@@ -2284,6 +2317,102 @@ static int i40e_get_rss_hash_opts(struct i40e_pf *pf, struct ethtool_rxnfc *cmd)
        return 0;
 }
 
+/**
+ * i40e_check_mask - Check whether a mask field is set
+ * @mask: the full mask value
+ * @field; mask of the field to check
+ *
+ * If the given mask is fully set, return positive value. If the mask for the
+ * field is fully unset, return zero. Otherwise return a negative error code.
+ **/
+static int i40e_check_mask(u64 mask, u64 field)
+{
+       u64 value = mask & field;
+
+       if (value == field)
+               return 1;
+       else if (!value)
+               return 0;
+       else
+               return -1;
+}
+
+/**
+ * i40e_parse_rx_flow_user_data - Deconstruct user-defined data
+ * @fsp: pointer to rx flow specification
+ * @data: pointer to userdef data structure for storage
+ *
+ * Read the user-defined data and deconstruct the value into a structure. No
+ * other code should read the user-defined data, so as to ensure that every
+ * place consistently reads the value correctly.
+ *
+ * The user-defined field is a 64bit Big Endian format value, which we
+ * deconstruct by reading bits or bit fields from it. Single bit flags shall
+ * be defined starting from the highest bits, while small bit field values
+ * shall be defined starting from the lowest bits.
+ *
+ * Returns 0 if the data is valid, and non-zero if the userdef data is invalid
+ * and the filter should be rejected. The data structure will always be
+ * modified even if FLOW_EXT is not set.
+ *
+ **/
+static int i40e_parse_rx_flow_user_data(struct ethtool_rx_flow_spec *fsp,
+                                       struct i40e_rx_flow_userdef *data)
+{
+       u64 value, mask;
+       int valid;
+
+       /* Zero memory first so it's always consistent. */
+       memset(data, 0, sizeof(*data));
+
+       if (!(fsp->flow_type & FLOW_EXT))
+               return 0;
+
+       value = be64_to_cpu(*((__be64 *)fsp->h_ext.data));
+       mask = be64_to_cpu(*((__be64 *)fsp->m_ext.data));
+
+#define I40E_USERDEF_FLEX_WORD         GENMASK_ULL(15, 0)
+#define I40E_USERDEF_FLEX_OFFSET       GENMASK_ULL(31, 16)
+#define I40E_USERDEF_FLEX_FILTER       GENMASK_ULL(31, 0)
+
+       valid = i40e_check_mask(mask, I40E_USERDEF_FLEX_FILTER);
+       if (valid < 0) {
+               return -EINVAL;
+       } else if (valid) {
+               data->flex_word = value & I40E_USERDEF_FLEX_WORD;
+               data->flex_offset =
+                       (value & I40E_USERDEF_FLEX_OFFSET) >> 16;
+               data->flex_filter = true;
+       }
+
+       return 0;
+}
+
+/**
+ * i40e_fill_rx_flow_user_data - Fill in user-defined data field
+ * @fsp: pointer to rx_flow specification
+ *
+ * Reads the userdef data structure and properly fills in the user defined
+ * fields of the rx_flow_spec.
+ **/
+static void i40e_fill_rx_flow_user_data(struct ethtool_rx_flow_spec *fsp,
+                                       struct i40e_rx_flow_userdef *data)
+{
+       u64 value = 0, mask = 0;
+
+       if (data->flex_filter) {
+               value |= data->flex_word;
+               value |= (u64)data->flex_offset << 16;
+               mask |= I40E_USERDEF_FLEX_FILTER;
+       }
+
+       if (value || mask)
+               fsp->flow_type |= FLOW_EXT;
+
+       *((__be64 *)fsp->h_ext.data) = cpu_to_be64(value);
+       *((__be64 *)fsp->m_ext.data) = cpu_to_be64(mask);
+}
+
 /**
  * i40e_get_ethtool_fdir_all - Populates the rule count of a command
  * @pf: Pointer to the physical function struct
@@ -2335,8 +2464,11 @@ static int i40e_get_ethtool_fdir_entry(struct i40e_pf *pf,
 {
        struct ethtool_rx_flow_spec *fsp =
                        (struct ethtool_rx_flow_spec *)&cmd->fs;
+       struct i40e_rx_flow_userdef userdef = {0};
        struct i40e_fdir_filter *rule = NULL;
        struct hlist_node *node2;
+       u64 input_set;
+       u16 index;
 
        hlist_for_each_entry_safe(rule, node2,
                                  &pf->fdir_filter_list, fdir_node) {
@@ -2359,8 +2491,48 @@ static int i40e_get_ethtool_fdir_entry(struct i40e_pf *pf,
         */
        fsp->h_u.tcp_ip4_spec.psrc = rule->dst_port;
        fsp->h_u.tcp_ip4_spec.pdst = rule->src_port;
-       fsp->h_u.tcp_ip4_spec.ip4src = rule->dst_ip[0];
-       fsp->h_u.tcp_ip4_spec.ip4dst = rule->src_ip[0];
+       fsp->h_u.tcp_ip4_spec.ip4src = rule->dst_ip;
+       fsp->h_u.tcp_ip4_spec.ip4dst = rule->src_ip;
+
+       switch (rule->flow_type) {
+       case SCTP_V4_FLOW:
+               index = I40E_FILTER_PCTYPE_NONF_IPV4_SCTP;
+               break;
+       case TCP_V4_FLOW:
+               index = I40E_FILTER_PCTYPE_NONF_IPV4_TCP;
+               break;
+       case UDP_V4_FLOW:
+               index = I40E_FILTER_PCTYPE_NONF_IPV4_UDP;
+               break;
+       case IP_USER_FLOW:
+               index = I40E_FILTER_PCTYPE_NONF_IPV4_OTHER;
+               break;
+       default:
+               /* If we have stored a filter with a flow type not listed here
+                * it is almost certainly a driver bug. WARN(), and then
+                * assign the input_set as if all fields are enabled to avoid
+                * reading unassigned memory.
+                */
+               WARN(1, "Missing input set index for flow_type %d\n",
+                    rule->flow_type);
+               input_set = 0xFFFFFFFFFFFFFFFFULL;
+               goto no_input_set;
+       }
+
+       input_set = i40e_read_fd_input_set(pf, index);
+
+no_input_set:
+       if (input_set & I40E_L3_SRC_MASK)
+               fsp->m_u.tcp_ip4_spec.ip4src = htonl(0xFFFF);
+
+       if (input_set & I40E_L3_DST_MASK)
+               fsp->m_u.tcp_ip4_spec.ip4dst = htonl(0xFFFF);
+
+       if (input_set & I40E_L4_SRC_MASK)
+               fsp->m_u.tcp_ip4_spec.psrc = htons(0xFFFFFFFF);
+
+       if (input_set & I40E_L4_DST_MASK)
+               fsp->m_u.tcp_ip4_spec.pdst = htons(0xFFFFFFFF);
 
        if (rule->dest_ctl == I40E_FILTER_PROGRAM_DESC_DEST_DROP_PACKET)
                fsp->ring_cookie = RX_CLS_FLOW_DISC;
@@ -2372,11 +2544,24 @@ static int i40e_get_ethtool_fdir_entry(struct i40e_pf *pf,
 
                vsi = i40e_find_vsi_from_id(pf, rule->dest_vsi);
                if (vsi && vsi->type == I40E_VSI_SRIOV) {
-                       fsp->h_ext.data[1] = htonl(vsi->vf_id);
-                       fsp->m_ext.data[1] = htonl(0x1);
+                       /* VFs are zero-indexed by the driver, but ethtool
+                        * expects them to be one-indexed, so add one here
+                        */
+                       u64 ring_vf = vsi->vf_id + 1;
+
+                       ring_vf <<= ETHTOOL_RX_FLOW_SPEC_RING_VF_OFF;
+                       fsp->ring_cookie |= ring_vf;
                }
        }
 
+       if (rule->flex_filter) {
+               userdef.flex_filter = true;
+               userdef.flex_word = be16_to_cpu(rule->flex_word);
+               userdef.flex_offset = rule->flex_offset;
+       }
+
+       i40e_fill_rx_flow_user_data(fsp, &userdef);
+
        return 0;
 }
 
@@ -2573,24 +2758,6 @@ static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc)
        return 0;
 }
 
-/**
- * i40e_match_fdir_input_set - Match a new filter against an existing one
- * @rule: The filter already added
- * @input: The new filter to comapre against
- *
- * Returns true if the two input set match
- **/
-static bool i40e_match_fdir_input_set(struct i40e_fdir_filter *rule,
-                                     struct i40e_fdir_filter *input)
-{
-       if ((rule->dst_ip[0] != input->dst_ip[0]) ||
-           (rule->src_ip[0] != input->src_ip[0]) ||
-           (rule->dst_port != input->dst_port) ||
-           (rule->src_port != input->src_port))
-               return false;
-       return true;
-}
-
 /**
  * i40e_update_ethtool_fdir_entry - Updates the fdir filter entry
  * @vsi: Pointer to the targeted VSI
@@ -2626,22 +2793,22 @@ static int i40e_update_ethtool_fdir_entry(struct i40e_vsi *vsi,
 
        /* if there is an old rule occupying our place remove it */
        if (rule && (rule->fd_id == sw_idx)) {
-               if (input && !i40e_match_fdir_input_set(rule, input))
-                       err = i40e_add_del_fdir(vsi, rule, false);
-               else if (!input)
-                       err = i40e_add_del_fdir(vsi, rule, false);
+               /* Remove this rule, since we're either deleting it, or
+                * replacing it.
+                */
+               err = i40e_add_del_fdir(vsi, rule, false);
                hlist_del(&rule->fdir_node);
                kfree(rule);
                pf->fdir_pf_active_filters--;
        }
 
-       /* If no input this was a delete, err should be 0 if a rule was
-        * successfully found and removed from the list else -EINVAL
+       /* If we weren't given an input, this is a delete, so just return the
+        * error code indicating if there was an entry at the requested slot
         */
        if (!input)
                return err;
 
-       /* initialize node and set software index */
+       /* Otherwise, install the new rule as requested */
        INIT_HLIST_NODE(&input->fdir_node);
 
        /* add filter to the list */
@@ -2657,6 +2824,69 @@ static int i40e_update_ethtool_fdir_entry(struct i40e_vsi *vsi,
        return 0;
 }
 
+/**
+ * i40e_prune_flex_pit_list - Cleanup unused entries in FLX_PIT table
+ * @pf: pointer to PF structure
+ *
+ * This function searches the list of filters and determines which FLX_PIT
+ * entries are still required. It will prune any entries which are no longer
+ * in use after the deletion.
+ **/
+static void i40e_prune_flex_pit_list(struct i40e_pf *pf)
+{
+       struct i40e_flex_pit *entry, *tmp;
+       struct i40e_fdir_filter *rule;
+
+       /* First, we'll check the l3 table */
+       list_for_each_entry_safe(entry, tmp, &pf->l3_flex_pit_list, list) {
+               bool found = false;
+
+               hlist_for_each_entry(rule, &pf->fdir_filter_list, fdir_node) {
+                       if (rule->flow_type != IP_USER_FLOW)
+                               continue;
+                       if (rule->flex_filter &&
+                           rule->flex_offset == entry->src_offset) {
+                               found = true;
+                               break;
+                       }
+               }
+
+               /* If we didn't find the filter, then we can prune this entry
+                * from the list.
+                */
+               if (!found) {
+                       list_del(&entry->list);
+                       kfree(entry);
+               }
+       }
+
+       /* Followed by the L4 table */
+       list_for_each_entry_safe(entry, tmp, &pf->l4_flex_pit_list, list) {
+               bool found = false;
+
+               hlist_for_each_entry(rule, &pf->fdir_filter_list, fdir_node) {
+                       /* Skip this filter if it's L3, since we already
+                        * checked those in the above loop
+                        */
+                       if (rule->flow_type == IP_USER_FLOW)
+                               continue;
+                       if (rule->flex_filter &&
+                           rule->flex_offset == entry->src_offset) {
+                               found = true;
+                               break;
+                       }
+               }
+
+               /* If we didn't find the filter, then we can prune this entry
+                * from the list.
+                */
+               if (!found) {
+                       list_del(&entry->list);
+                       kfree(entry);
+               }
+       }
+}
+
 /**
  * i40e_del_fdir_entry - Deletes a Flow Director filter entry
  * @vsi: Pointer to the targeted VSI
@@ -2684,10 +2914,690 @@ static int i40e_del_fdir_entry(struct i40e_vsi *vsi,
 
        ret = i40e_update_ethtool_fdir_entry(vsi, NULL, fsp->location, cmd);
 
+       i40e_prune_flex_pit_list(pf);
+
        i40e_fdir_check_and_reenable(pf);
        return ret;
 }
 
+/**
+ * i40e_unused_pit_index - Find an unused PIT index for given list
+ * @pf: the PF data structure
+ *
+ * Find the first unused flexible PIT index entry. We search both the L3 and
+ * L4 flexible PIT lists so that the returned index is unique and unused by
+ * either currently programmed L3 or L4 filters. We use a bit field as storage
+ * to track which indexes are already used.
+ **/
+static u8 i40e_unused_pit_index(struct i40e_pf *pf)
+{
+       unsigned long available_index = 0xFF;
+       struct i40e_flex_pit *entry;
+
+       /* We need to make sure that the new index isn't in use by either L3
+        * or L4 filters so that IP_USER_FLOW filters can program both L3 and
+        * L4 to use the same index.
+        */
+
+       list_for_each_entry(entry, &pf->l4_flex_pit_list, list)
+               clear_bit(entry->pit_index, &available_index);
+
+       list_for_each_entry(entry, &pf->l3_flex_pit_list, list)
+               clear_bit(entry->pit_index, &available_index);
+
+       return find_first_bit(&available_index, 8);
+}
+
+/**
+ * i40e_find_flex_offset - Find an existing flex src_offset
+ * @flex_pit_list: L3 or L4 flex PIT list
+ * @src_offset: new src_offset to find
+ *
+ * Searches the flex_pit_list for an existing offset. If no offset is
+ * currently programmed, then this will return an ERR_PTR if there is no space
+ * to add a new offset, otherwise it returns NULL.
+ **/
+static
+struct i40e_flex_pit *i40e_find_flex_offset(struct list_head *flex_pit_list,
+                                           u16 src_offset)
+{
+       struct i40e_flex_pit *entry;
+       int size = 0;
+
+       /* Search for the src_offset first. If we find a matching entry
+        * already programmed, we can simply re-use it.
+        */
+       list_for_each_entry(entry, flex_pit_list, list) {
+               size++;
+               if (entry->src_offset == src_offset)
+                       return entry;
+       }
+
+       /* If we haven't found an entry yet, then the provided src offset has
+        * not yet been programmed. We will program the src offset later on,
+        * but we need to indicate whether there is enough space to do so
+        * here. We'll make use of ERR_PTR for this purpose.
+        */
+       if (size >= I40E_FLEX_PIT_TABLE_SIZE)
+               return ERR_PTR(-ENOSPC);
+
+       return NULL;
+}
+
+/**
+ * i40e_add_flex_offset - Add src_offset to flex PIT table list
+ * @flex_pit_list: L3 or L4 flex PIT list
+ * @src_offset: new src_offset to add
+ * @pit_index: the PIT index to program
+ *
+ * This function programs the new src_offset to the list. It is expected that
+ * i40e_find_flex_offset has already been tried and returned NULL, indicating
+ * that this offset is not programmed, and that the list has enough space to
+ * store another offset.
+ *
+ * Returns 0 on success, and negative value on error.
+ **/
+static int i40e_add_flex_offset(struct list_head *flex_pit_list,
+                               u16 src_offset,
+                               u8 pit_index)
+{
+       struct i40e_flex_pit *new_pit, *entry;
+
+       new_pit = kzalloc(sizeof(*entry), GFP_KERNEL);
+       if (!new_pit)
+               return -ENOMEM;
+
+       new_pit->src_offset = src_offset;
+       new_pit->pit_index = pit_index;
+
+       /* We need to insert this item such that the list is sorted by
+        * src_offset in ascending order.
+        */
+       list_for_each_entry(entry, flex_pit_list, list) {
+               if (new_pit->src_offset < entry->src_offset) {
+                       list_add_tail(&new_pit->list, &entry->list);
+                       return 0;
+               }
+
+               /* If we found an entry with our offset already programmed we
+                * can simply return here, after freeing the memory. However,
+                * if the pit_index does not match we need to report an error.
+                */
+               if (new_pit->src_offset == entry->src_offset) {
+                       int err = 0;
+
+                       /* If the PIT index is not the same we can't re-use
+                        * the entry, so we must report an error.
+                        */
+                       if (new_pit->pit_index != entry->pit_index)
+                               err = -EINVAL;
+
+                       kfree(new_pit);
+                       return err;
+               }
+       }
+
+       /* If we reached here, then we haven't yet added the item. This means
+        * that we should add the item at the end of the list.
+        */
+       list_add_tail(&new_pit->list, flex_pit_list);
+       return 0;
+}
+
+/**
+ * __i40e_reprogram_flex_pit - Re-program specific FLX_PIT table
+ * @pf: Pointer to the PF structure
+ * @flex_pit_list: list of flexible src offsets in use
+ * #flex_pit_start: index to first entry for this section of the table
+ *
+ * In order to handle flexible data, the hardware uses a table of values
+ * called the FLX_PIT table. This table is used to indicate which sections of
+ * the input correspond to what PIT index values. Unfortunately, hardware is
+ * very restrictive about programming this table. Entries must be ordered by
+ * src_offset in ascending order, without duplicates. Additionally, unused
+ * entries must be set to the unused index value, and must have valid size and
+ * length according to the src_offset ordering.
+ *
+ * This function will reprogram the FLX_PIT register from a book-keeping
+ * structure that we guarantee is already ordered correctly, and has no more
+ * than 3 entries.
+ *
+ * To make things easier, we only support flexible values of one word length,
+ * rather than allowing variable length flexible values.
+ **/
+static void __i40e_reprogram_flex_pit(struct i40e_pf *pf,
+                                     struct list_head *flex_pit_list,
+                                     int flex_pit_start)
+{
+       struct i40e_flex_pit *entry = NULL;
+       u16 last_offset = 0;
+       int i = 0, j = 0;
+
+       /* First, loop over the list of flex PIT entries, and reprogram the
+        * registers.
+        */
+       list_for_each_entry(entry, flex_pit_list, list) {
+               /* We have to be careful when programming values for the
+                * largest SRC_OFFSET value. It is possible that adding
+                * additional empty values at the end would overflow the space
+                * for the SRC_OFFSET in the FLX_PIT register. To avoid this,
+                * we check here and add the empty values prior to adding the
+                * largest value.
+                *
+                * To determine this, we will use a loop from i+1 to 3, which
+                * will determine whether the unused entries would have valid
+                * SRC_OFFSET. Note that there cannot be extra entries past
+                * this value, because the only valid values would have been
+                * larger than I40E_MAX_FLEX_SRC_OFFSET, and thus would not
+                * have been added to the list in the first place.
+                */
+               for (j = i + 1; j < 3; j++) {
+                       u16 offset = entry->src_offset + j;
+                       int index = flex_pit_start + i;
+                       u32 value = I40E_FLEX_PREP_VAL(I40E_FLEX_DEST_UNUSED,
+                                                      1,
+                                                      offset - 3);
+
+                       if (offset > I40E_MAX_FLEX_SRC_OFFSET) {
+                               i40e_write_rx_ctl(&pf->hw,
+                                                 I40E_PRTQF_FLX_PIT(index),
+                                                 value);
+                               i++;
+                       }
+               }
+
+               /* Now, we can program the actual value into the table */
+               i40e_write_rx_ctl(&pf->hw,
+                                 I40E_PRTQF_FLX_PIT(flex_pit_start + i),
+                                 I40E_FLEX_PREP_VAL(entry->pit_index + 50,
+                                                    1,
+                                                    entry->src_offset));
+               i++;
+       }
+
+       /* In order to program the last entries in the table, we need to
+        * determine the valid offset. If the list is empty, we'll just start
+        * with 0. Otherwise, we'll start with the last item offset and add 1.
+        * This ensures that all entries have valid sizes. If we don't do this
+        * correctly, the hardware will disable flexible field parsing.
+        */
+       if (!list_empty(flex_pit_list))
+               last_offset = list_prev_entry(entry, list)->src_offset + 1;
+
+       for (; i < 3; i++, last_offset++) {
+               i40e_write_rx_ctl(&pf->hw,
+                                 I40E_PRTQF_FLX_PIT(flex_pit_start + i),
+                                 I40E_FLEX_PREP_VAL(I40E_FLEX_DEST_UNUSED,
+                                                    1,
+                                                    last_offset));
+       }
+}
+
+/**
+ * i40e_reprogram_flex_pit - Reprogram all FLX_PIT tables after input set change
+ * @pf: pointer to the PF structure
+ *
+ * This function reprograms both the L3 and L4 FLX_PIT tables. See the
+ * internal helper function for implementation details.
+ **/
+static void i40e_reprogram_flex_pit(struct i40e_pf *pf)
+{
+       __i40e_reprogram_flex_pit(pf, &pf->l3_flex_pit_list,
+                                 I40E_FLEX_PIT_IDX_START_L3);
+
+       __i40e_reprogram_flex_pit(pf, &pf->l4_flex_pit_list,
+                                 I40E_FLEX_PIT_IDX_START_L4);
+
+       /* We also need to program the L3 and L4 GLQF ORT register */
+       i40e_write_rx_ctl(&pf->hw,
+                         I40E_GLQF_ORT(I40E_L3_GLQF_ORT_IDX),
+                         I40E_ORT_PREP_VAL(I40E_FLEX_PIT_IDX_START_L3,
+                                           3, 1));
+
+       i40e_write_rx_ctl(&pf->hw,
+                         I40E_GLQF_ORT(I40E_L4_GLQF_ORT_IDX),
+                         I40E_ORT_PREP_VAL(I40E_FLEX_PIT_IDX_START_L4,
+                                           3, 1));
+}
+
+/**
+ * i40e_flow_str - Converts a flow_type into a human readable string
+ * @flow_type: the flow type from a flow specification
+ *
+ * Currently only flow types we support are included here, and the string
+ * value attempts to match what ethtool would use to configure this flow type.
+ **/
+static const char *i40e_flow_str(struct ethtool_rx_flow_spec *fsp)
+{
+       switch (fsp->flow_type & ~FLOW_EXT) {
+       case TCP_V4_FLOW:
+               return "tcp4";
+       case UDP_V4_FLOW:
+               return "udp4";
+       case SCTP_V4_FLOW:
+               return "sctp4";
+       case IP_USER_FLOW:
+               return "ip4";
+       default:
+               return "unknown";
+       }
+}
+
+/**
+ * i40e_pit_index_to_mask - Return the FLEX mask for a given PIT index
+ * @pit_index: PIT index to convert
+ *
+ * Returns the mask for a given PIT index. Will return 0 if the pit_index is
+ * of range.
+ **/
+static u64 i40e_pit_index_to_mask(int pit_index)
+{
+       switch (pit_index) {
+       case 0:
+               return I40E_FLEX_50_MASK;
+       case 1:
+               return I40E_FLEX_51_MASK;
+       case 2:
+               return I40E_FLEX_52_MASK;
+       case 3:
+               return I40E_FLEX_53_MASK;
+       case 4:
+               return I40E_FLEX_54_MASK;
+       case 5:
+               return I40E_FLEX_55_MASK;
+       case 6:
+               return I40E_FLEX_56_MASK;
+       case 7:
+               return I40E_FLEX_57_MASK;
+       default:
+               return 0;
+       }
+}
+
+/**
+ * i40e_print_input_set - Show changes between two input sets
+ * @vsi: the vsi being configured
+ * @old: the old input set
+ * @new: the new input set
+ *
+ * Print the difference between old and new input sets by showing which series
+ * of words are toggled on or off. Only displays the bits we actually support
+ * changing.
+ **/
+static void i40e_print_input_set(struct i40e_vsi *vsi, u64 old, u64 new)
+{
+       struct i40e_pf *pf = vsi->back;
+       bool old_value, new_value;
+       int i;
+
+       old_value = !!(old & I40E_L3_SRC_MASK);
+       new_value = !!(new & I40E_L3_SRC_MASK);
+       if (old_value != new_value)
+               netif_info(pf, drv, vsi->netdev, "L3 source address: %s -> %s\n",
+                          old_value ? "ON" : "OFF",
+                          new_value ? "ON" : "OFF");
+
+       old_value = !!(old & I40E_L3_DST_MASK);
+       new_value = !!(new & I40E_L3_DST_MASK);
+       if (old_value != new_value)
+               netif_info(pf, drv, vsi->netdev, "L3 destination address: %s -> %s\n",
+                          old_value ? "ON" : "OFF",
+                          new_value ? "ON" : "OFF");
+
+       old_value = !!(old & I40E_L4_SRC_MASK);
+       new_value = !!(new & I40E_L4_SRC_MASK);
+       if (old_value != new_value)
+               netif_info(pf, drv, vsi->netdev, "L4 source port: %s -> %s\n",
+                          old_value ? "ON" : "OFF",
+                          new_value ? "ON" : "OFF");
+
+       old_value = !!(old & I40E_L4_DST_MASK);
+       new_value = !!(new & I40E_L4_DST_MASK);
+       if (old_value != new_value)
+               netif_info(pf, drv, vsi->netdev, "L4 destination port: %s -> %s\n",
+                          old_value ? "ON" : "OFF",
+                          new_value ? "ON" : "OFF");
+
+       old_value = !!(old & I40E_VERIFY_TAG_MASK);
+       new_value = !!(new & I40E_VERIFY_TAG_MASK);
+       if (old_value != new_value)
+               netif_info(pf, drv, vsi->netdev, "SCTP verification tag: %s -> %s\n",
+                          old_value ? "ON" : "OFF",
+                          new_value ? "ON" : "OFF");
+
+       /* Show change of flexible filter entries */
+       for (i = 0; i < I40E_FLEX_INDEX_ENTRIES; i++) {
+               u64 flex_mask = i40e_pit_index_to_mask(i);
+
+               old_value = !!(old & flex_mask);
+               new_value = !!(new & flex_mask);
+               if (old_value != new_value)
+                       netif_info(pf, drv, vsi->netdev, "FLEX index %d: %s -> %s\n",
+                                  i,
+                                  old_value ? "ON" : "OFF",
+                                  new_value ? "ON" : "OFF");
+       }
+
+       netif_info(pf, drv, vsi->netdev, "  Current input set: %0llx\n",
+                  old);
+       netif_info(pf, drv, vsi->netdev, "Requested input set: %0llx\n",
+                  new);
+}
+
+/**
+ * i40e_check_fdir_input_set - Check that a given rx_flow_spec mask is valid
+ * @vsi: pointer to the targeted VSI
+ * @fsp: pointer to Rx flow specification
+ * @userdef: userdefined data from flow specification
+ *
+ * Ensures that a given ethtool_rx_flow_spec has a valid mask. Some support
+ * for partial matches exists with a few limitations. First, hardware only
+ * supports masking by word boundary (2 bytes) and not per individual bit.
+ * Second, hardware is limited to using one mask for a flow type and cannot
+ * use a separate mask for each filter.
+ *
+ * To support these limitations, if we already have a configured filter for
+ * the specified type, this function enforces that new filters of the type
+ * match the configured input set. Otherwise, if we do not have a filter of
+ * the specified type, we allow the input set to be updated to match the
+ * desired filter.
+ *
+ * To help ensure that administrators understand why filters weren't displayed
+ * as supported, we print a diagnostic message displaying how the input set
+ * would change and warning to delete the preexisting filters if required.
+ *
+ * Returns 0 on successful input set match, and a negative return code on
+ * failure.
+ **/
+static int i40e_check_fdir_input_set(struct i40e_vsi *vsi,
+                                    struct ethtool_rx_flow_spec *fsp,
+                                    struct i40e_rx_flow_userdef *userdef)
+{
+       struct i40e_pf *pf = vsi->back;
+       struct ethtool_tcpip4_spec *tcp_ip4_spec;
+       struct ethtool_usrip4_spec *usr_ip4_spec;
+       u64 current_mask, new_mask;
+       bool new_flex_offset = false;
+       bool flex_l3 = false;
+       u16 *fdir_filter_count;
+       u16 index, src_offset = 0;
+       u8 pit_index = 0;
+       int err;
+
+       switch (fsp->flow_type & ~FLOW_EXT) {
+       case SCTP_V4_FLOW:
+               index = I40E_FILTER_PCTYPE_NONF_IPV4_SCTP;
+               fdir_filter_count = &pf->fd_sctp4_filter_cnt;
+               break;
+       case TCP_V4_FLOW:
+               index = I40E_FILTER_PCTYPE_NONF_IPV4_TCP;
+               fdir_filter_count = &pf->fd_tcp4_filter_cnt;
+               break;
+       case UDP_V4_FLOW:
+               index = I40E_FILTER_PCTYPE_NONF_IPV4_UDP;
+               fdir_filter_count = &pf->fd_udp4_filter_cnt;
+               break;
+       case IP_USER_FLOW:
+               index = I40E_FILTER_PCTYPE_NONF_IPV4_OTHER;
+               fdir_filter_count = &pf->fd_ip4_filter_cnt;
+               flex_l3 = true;
+               break;
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       /* Read the current input set from register memory. */
+       current_mask = i40e_read_fd_input_set(pf, index);
+       new_mask = current_mask;
+
+       /* Determine, if any, the required changes to the input set in order
+        * to support the provided mask.
+        *
+        * Hardware only supports masking at word (2 byte) granularity and does
+        * not support full bitwise masking. This implementation simplifies
+        * even further and only supports fully enabled or fully disabled
+        * masks for each field, even though we could split the ip4src and
+        * ip4dst fields.
+        */
+       switch (fsp->flow_type & ~FLOW_EXT) {
+       case SCTP_V4_FLOW:
+               new_mask &= ~I40E_VERIFY_TAG_MASK;
+               /* Fall through */
+       case TCP_V4_FLOW:
+       case UDP_V4_FLOW:
+               tcp_ip4_spec = &fsp->m_u.tcp_ip4_spec;
+
+               /* IPv4 source address */
+               if (tcp_ip4_spec->ip4src == htonl(0xFFFFFFFF))
+                       new_mask |= I40E_L3_SRC_MASK;
+               else if (!tcp_ip4_spec->ip4src)
+                       new_mask &= ~I40E_L3_SRC_MASK;
+               else
+                       return -EOPNOTSUPP;
+
+               /* IPv4 destination address */
+               if (tcp_ip4_spec->ip4dst == htonl(0xFFFFFFFF))
+                       new_mask |= I40E_L3_DST_MASK;
+               else if (!tcp_ip4_spec->ip4dst)
+                       new_mask &= ~I40E_L3_DST_MASK;
+               else
+                       return -EOPNOTSUPP;
+
+               /* L4 source port */
+               if (tcp_ip4_spec->psrc == htons(0xFFFF))
+                       new_mask |= I40E_L4_SRC_MASK;
+               else if (!tcp_ip4_spec->psrc)
+                       new_mask &= ~I40E_L4_SRC_MASK;
+               else
+                       return -EOPNOTSUPP;
+
+               /* L4 destination port */
+               if (tcp_ip4_spec->pdst == htons(0xFFFF))
+                       new_mask |= I40E_L4_DST_MASK;
+               else if (!tcp_ip4_spec->pdst)
+                       new_mask &= ~I40E_L4_DST_MASK;
+               else
+                       return -EOPNOTSUPP;
+
+               /* Filtering on Type of Service is not supported. */
+               if (tcp_ip4_spec->tos)
+                       return -EOPNOTSUPP;
+
+               break;
+       case IP_USER_FLOW:
+               usr_ip4_spec = &fsp->m_u.usr_ip4_spec;
+
+               /* IPv4 source address */
+               if (usr_ip4_spec->ip4src == htonl(0xFFFFFFFF))
+                       new_mask |= I40E_L3_SRC_MASK;
+               else if (!usr_ip4_spec->ip4src)
+                       new_mask &= ~I40E_L3_SRC_MASK;
+               else
+                       return -EOPNOTSUPP;
+
+               /* IPv4 destination address */
+               if (usr_ip4_spec->ip4dst == htonl(0xFFFFFFFF))
+                       new_mask |= I40E_L3_DST_MASK;
+               else if (!usr_ip4_spec->ip4dst)
+                       new_mask &= ~I40E_L3_DST_MASK;
+               else
+                       return -EOPNOTSUPP;
+
+               /* First 4 bytes of L4 header */
+               if (usr_ip4_spec->l4_4_bytes == htonl(0xFFFFFFFF))
+                       new_mask |= I40E_L4_SRC_MASK | I40E_L4_DST_MASK;
+               else if (!usr_ip4_spec->l4_4_bytes)
+                       new_mask &= ~(I40E_L4_SRC_MASK | I40E_L4_DST_MASK);
+               else
+                       return -EOPNOTSUPP;
+
+               /* Filtering on Type of Service is not supported. */
+               if (usr_ip4_spec->tos)
+                       return -EOPNOTSUPP;
+
+               /* Filtering on IP version is not supported */
+               if (usr_ip4_spec->ip_ver)
+                       return -EINVAL;
+
+               /* Filtering on L4 protocol is not supported */
+               if (usr_ip4_spec->proto)
+                       return -EINVAL;
+
+               break;
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       /* First, clear all flexible filter entries */
+       new_mask &= ~I40E_FLEX_INPUT_MASK;
+
+       /* If we have a flexible filter, try to add this offset to the correct
+        * flexible filter PIT list. Once finished, we can update the mask.
+        * If the src_offset changed, we will get a new mask value which will
+        * trigger an input set change.
+        */
+       if (userdef->flex_filter) {
+               struct i40e_flex_pit *l3_flex_pit = NULL, *flex_pit = NULL;
+
+               /* Flexible offset must be even, since the flexible payload
+                * must be aligned on 2-byte boundary.
+                */
+               if (userdef->flex_offset & 0x1) {
+                       dev_warn(&pf->pdev->dev,
+                                "Flexible data offset must be 2-byte aligned\n");
+                       return -EINVAL;
+               }
+
+               src_offset = userdef->flex_offset >> 1;
+
+               /* FLX_PIT source offset value is only so large */
+               if (src_offset > I40E_MAX_FLEX_SRC_OFFSET) {
+                       dev_warn(&pf->pdev->dev,
+                                "Flexible data must reside within first 64 bytes of the packet payload\n");
+                       return -EINVAL;
+               }
+
+               /* See if this offset has already been programmed. If we get
+                * an ERR_PTR, then the filter is not safe to add. Otherwise,
+                * if we get a NULL pointer, this means we will need to add
+                * the offset.
+                */
+               flex_pit = i40e_find_flex_offset(&pf->l4_flex_pit_list,
+                                                src_offset);
+               if (IS_ERR(flex_pit))
+                       return PTR_ERR(flex_pit);
+
+               /* IP_USER_FLOW filters match both L4 (ICMP) and L3 (unknown)
+                * packet types, and thus we need to program both L3 and L4
+                * flexible values. These must have identical flexible index,
+                * as otherwise we can't correctly program the input set. So
+                * we'll find both an L3 and L4 index and make sure they are
+                * the same.
+                */
+               if (flex_l3) {
+                       l3_flex_pit =
+                               i40e_find_flex_offset(&pf->l3_flex_pit_list,
+                                                     src_offset);
+                       if (IS_ERR(l3_flex_pit))
+                               return PTR_ERR(l3_flex_pit);
+
+                       if (flex_pit) {
+                               /* If we already had a matching L4 entry, we
+                                * need to make sure that the L3 entry we
+                                * obtained uses the same index.
+                                */
+                               if (l3_flex_pit) {
+                                       if (l3_flex_pit->pit_index !=
+                                           flex_pit->pit_index) {
+                                               return -EINVAL;
+                                       }
+                               } else {
+                                       new_flex_offset = true;
+                               }
+                       } else {
+                               flex_pit = l3_flex_pit;
+                       }
+               }
+
+               /* If we didn't find an existing flex offset, we need to
+                * program a new one. However, we don't immediately program it
+                * here because we will wait to program until after we check
+                * that it is safe to change the input set.
+                */
+               if (!flex_pit) {
+                       new_flex_offset = true;
+                       pit_index = i40e_unused_pit_index(pf);
+               } else {
+                       pit_index = flex_pit->pit_index;
+               }
+
+               /* Update the mask with the new offset */
+               new_mask |= i40e_pit_index_to_mask(pit_index);
+       }
+
+       /* If the mask and flexible filter offsets for this filter match the
+        * currently programmed values we don't need any input set change, so
+        * this filter is safe to install.
+        */
+       if (new_mask == current_mask && !new_flex_offset)
+               return 0;
+
+       netif_info(pf, drv, vsi->netdev, "Input set change requested for %s flows:\n",
+                  i40e_flow_str(fsp));
+       i40e_print_input_set(vsi, current_mask, new_mask);
+       if (new_flex_offset) {
+               netif_info(pf, drv, vsi->netdev, "FLEX index %d: Offset -> %d",
+                          pit_index, src_offset);
+       }
+
+       /* Hardware input sets are global across multiple ports, so even the
+        * main port cannot change them when in MFP mode as this would impact
+        * any filters on the other ports.
+        */
+       if (pf->flags & I40E_FLAG_MFP_ENABLED) {
+               netif_err(pf, drv, vsi->netdev, "Cannot change Flow Director input sets while MFP is enabled\n");
+               return -EOPNOTSUPP;
+       }
+
+       /* This filter requires us to update the input set. However, hardware
+        * only supports one input set per flow type, and does not support
+        * separate masks for each filter. This means that we can only support
+        * a single mask for all filters of a specific type.
+        *
+        * If we have preexisting filters, they obviously depend on the
+        * current programmed input set. Display a diagnostic message in this
+        * case explaining why the filter could not be accepted.
+        */
+       if (*fdir_filter_count) {
+               netif_err(pf, drv, vsi->netdev, "Cannot change input set for %s flows until %d preexisting filters are removed\n",
+                         i40e_flow_str(fsp),
+                         *fdir_filter_count);
+               return -EOPNOTSUPP;
+       }
+
+       i40e_write_fd_input_set(pf, index, new_mask);
+
+       /* Add the new offset and update table, if necessary */
+       if (new_flex_offset) {
+               err = i40e_add_flex_offset(&pf->l4_flex_pit_list, src_offset,
+                                          pit_index);
+               if (err)
+                       return err;
+
+               if (flex_l3) {
+                       err = i40e_add_flex_offset(&pf->l3_flex_pit_list,
+                                                  src_offset,
+                                                  pit_index);
+                       if (err)
+                               return err;
+               }
+
+               i40e_reprogram_flex_pit(pf);
+       }
+
+       return 0;
+}
+
 /**
  * i40e_add_fdir_ethtool - Add/Remove Flow Director filters
  * @vsi: pointer to the targeted VSI
@@ -2699,11 +3609,13 @@ static int i40e_del_fdir_entry(struct i40e_vsi *vsi,
 static int i40e_add_fdir_ethtool(struct i40e_vsi *vsi,
                                 struct ethtool_rxnfc *cmd)
 {
+       struct i40e_rx_flow_userdef userdef;
        struct ethtool_rx_flow_spec *fsp;
        struct i40e_fdir_filter *input;
+       u16 dest_vsi = 0, q_index = 0;
        struct i40e_pf *pf;
        int ret = -EINVAL;
-       u16 vf_id;
+       u8 dest_ctl;
 
        if (!vsi)
                return -EINVAL;
@@ -2712,7 +3624,7 @@ static int i40e_add_fdir_ethtool(struct i40e_vsi *vsi,
        if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED))
                return -EOPNOTSUPP;
 
-       if (pf->auto_disable_flags & I40E_FLAG_FD_SB_ENABLED)
+       if (pf->hw_disabled_flags & I40E_FLAG_FD_SB_ENABLED)
                return -ENOSPC;
 
        if (test_bit(__I40E_RESET_RECOVERY_PENDING, &pf->state) ||
@@ -2724,14 +3636,49 @@ static int i40e_add_fdir_ethtool(struct i40e_vsi *vsi,
 
        fsp = (struct ethtool_rx_flow_spec *)&cmd->fs;
 
+       /* Parse the user-defined field */
+       if (i40e_parse_rx_flow_user_data(fsp, &userdef))
+               return -EINVAL;
+
+       /* Extended MAC field is not supported */
+       if (fsp->flow_type & FLOW_MAC_EXT)
+               return -EINVAL;
+
+       ret = i40e_check_fdir_input_set(vsi, fsp, &userdef);
+       if (ret)
+               return ret;
+
        if (fsp->location >= (pf->hw.func_caps.fd_filters_best_effort +
                              pf->hw.func_caps.fd_filters_guaranteed)) {
                return -EINVAL;
        }
 
-       if ((fsp->ring_cookie != RX_CLS_FLOW_DISC) &&
-           (fsp->ring_cookie >= vsi->num_queue_pairs))
-               return -EINVAL;
+       /* ring_cookie is either the drop index, or is a mask of the queue
+        * index and VF id we wish to target.
+        */
+       if (fsp->ring_cookie == RX_CLS_FLOW_DISC) {
+               dest_ctl = I40E_FILTER_PROGRAM_DESC_DEST_DROP_PACKET;
+       } else {
+               u32 ring = ethtool_get_flow_spec_ring(fsp->ring_cookie);
+               u8 vf = ethtool_get_flow_spec_ring_vf(fsp->ring_cookie);
+
+               if (!vf) {
+                       if (ring >= vsi->num_queue_pairs)
+                               return -EINVAL;
+                       dest_vsi = vsi->id;
+               } else {
+                       /* VFs are zero-indexed, so we subtract one here */
+                       vf--;
+
+                       if (vf >= pf->num_alloc_vfs)
+                               return -EINVAL;
+                       if (ring >= pf->vf[vf].num_queue_pairs)
+                               return -EINVAL;
+                       dest_vsi = pf->vf[vf].lan_vsi_id;
+               }
+               dest_ctl = I40E_FILTER_PROGRAM_DESC_DEST_DIRECT_PACKET_QINDEX;
+               q_index = ring;
+       }
 
        input = kzalloc(sizeof(*input), GFP_KERNEL);
 
@@ -2739,20 +3686,14 @@ static int i40e_add_fdir_ethtool(struct i40e_vsi *vsi,
                return -ENOMEM;
 
        input->fd_id = fsp->location;
-
-       if (fsp->ring_cookie == RX_CLS_FLOW_DISC)
-               input->dest_ctl = I40E_FILTER_PROGRAM_DESC_DEST_DROP_PACKET;
-       else
-               input->dest_ctl =
-                            I40E_FILTER_PROGRAM_DESC_DEST_DIRECT_PACKET_QINDEX;
-
-       input->q_index = fsp->ring_cookie;
-       input->flex_off = 0;
-       input->pctype = 0;
-       input->dest_vsi = vsi->id;
+       input->q_index = q_index;
+       input->dest_vsi = dest_vsi;
+       input->dest_ctl = dest_ctl;
        input->fd_status = I40E_FILTER_PROGRAM_DESC_FD_STATUS_FD_ID;
        input->cnt_index  = I40E_FD_SB_STAT_IDX(pf->hw.pf_id);
-       input->flow_type = fsp->flow_type;
+       input->dst_ip = fsp->h_u.tcp_ip4_spec.ip4src;
+       input->src_ip = fsp->h_u.tcp_ip4_spec.ip4dst;
+       input->flow_type = fsp->flow_type & ~FLOW_EXT;
        input->ip4_proto = fsp->h_u.usr_ip4_spec.proto;
 
        /* Reverse the src and dest notion, since the HW expects them to be from
@@ -2760,33 +3701,29 @@ static int i40e_add_fdir_ethtool(struct i40e_vsi *vsi,
         */
        input->dst_port = fsp->h_u.tcp_ip4_spec.psrc;
        input->src_port = fsp->h_u.tcp_ip4_spec.pdst;
-       input->dst_ip[0] = fsp->h_u.tcp_ip4_spec.ip4src;
-       input->src_ip[0] = fsp->h_u.tcp_ip4_spec.ip4dst;
-
-       if (ntohl(fsp->m_ext.data[1])) {
-               vf_id = ntohl(fsp->h_ext.data[1]);
-               if (vf_id >= pf->num_alloc_vfs) {
-                       netif_info(pf, drv, vsi->netdev,
-                                  "Invalid VF id %d\n", vf_id);
-                       goto free_input;
-               }
-               /* Find vsi id from vf id and override dest vsi */
-               input->dest_vsi = pf->vf[vf_id].lan_vsi_id;
-               if (input->q_index >= pf->vf[vf_id].num_queue_pairs) {
-                       netif_info(pf, drv, vsi->netdev,
-                                  "Invalid queue id %d for VF %d\n",
-                                  input->q_index, vf_id);
-                       goto free_input;
-               }
+       input->dst_ip = fsp->h_u.tcp_ip4_spec.ip4src;
+       input->src_ip = fsp->h_u.tcp_ip4_spec.ip4dst;
+
+       if (userdef.flex_filter) {
+               input->flex_filter = true;
+               input->flex_word = cpu_to_be16(userdef.flex_word);
+               input->flex_offset = userdef.flex_offset;
        }
 
        ret = i40e_add_del_fdir(vsi, input, true);
-free_input:
        if (ret)
-               kfree(input);
-       else
-               i40e_update_ethtool_fdir_entry(vsi, input, fsp->location, NULL);
+               goto free_input;
+
+       /* Add the input filter to the fdir_input_list, possibly replacing
+        * a previous filter. Do not free the input structure after adding it
+        * to the list as this would cause a use-after-free bug.
+        */
+       i40e_update_ethtool_fdir_entry(vsi, input, fsp->location, NULL);
 
+       return 0;
+
+free_input:
+       kfree(input);
        return ret;
 }
 
@@ -3036,7 +3973,7 @@ static int i40e_set_rxfh(struct net_device *netdev, const u32 *indir,
  * @dev: network interface device structure
  *
  * The get string set count and the string set should be matched for each
- * flag returned.  Add new strings for each flag to the i40e_priv_flags_strings
+ * flag returned.  Add new strings for each flag to the i40e_gstrings_priv_flags
  * array.
  *
  * Returns a u32 bitmap of flags.
@@ -3046,19 +3983,27 @@ static u32 i40e_get_priv_flags(struct net_device *dev)
        struct i40e_netdev_priv *np = netdev_priv(dev);
        struct i40e_vsi *vsi = np->vsi;
        struct i40e_pf *pf = vsi->back;
-       u32 ret_flags = 0;
+       u32 i, j, ret_flags = 0;
+
+       for (i = 0; i < I40E_PRIV_FLAGS_STR_LEN; i++) {
+               const struct i40e_priv_flags *priv_flags;
+
+               priv_flags = &i40e_gstrings_priv_flags[i];
+
+               if (priv_flags->flag & pf->flags)
+                       ret_flags |= BIT(i);
+       }
+
+       if (pf->hw.pf_id != 0)
+               return ret_flags;
+
+       for (j = 0; j < I40E_GL_PRIV_FLAGS_STR_LEN; j++) {
+               const struct i40e_priv_flags *priv_flags;
 
-       ret_flags |= pf->flags & I40E_FLAG_LINK_POLLING_ENABLED ?
-               I40E_PRIV_FLAGS_LINKPOLL_FLAG : 0;
-       ret_flags |= pf->flags & I40E_FLAG_FD_ATR_ENABLED ?
-               I40E_PRIV_FLAGS_FD_ATR : 0;
-       ret_flags |= pf->flags & I40E_FLAG_VEB_STATS_ENABLED ?
-               I40E_PRIV_FLAGS_VEB_STATS : 0;
-       ret_flags |= pf->auto_disable_flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE ?
-               0 : I40E_PRIV_FLAGS_HW_ATR_EVICT;
-       if (pf->hw.pf_id == 0) {
-               ret_flags |= pf->flags & I40E_FLAG_TRUE_PROMISC_SUPPORT ?
-                       I40E_PRIV_FLAGS_TRUE_PROMISC_SUPPORT : 0;
+               priv_flags = &i40e_gl_gstrings_priv_flags[j];
+
+               if (priv_flags->flag & pf->flags)
+                       ret_flags |= BIT(i + j);
        }
 
        return ret_flags;
@@ -3074,54 +4019,66 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags)
        struct i40e_netdev_priv *np = netdev_priv(dev);
        struct i40e_vsi *vsi = np->vsi;
        struct i40e_pf *pf = vsi->back;
-       u16 sw_flags = 0, valid_flags = 0;
-       bool reset_required = false;
-       bool promisc_change = false;
-       int ret;
+       u64 changed_flags;
+       u32 i, j;
 
-       /* NOTE: MFP is not settable */
+       changed_flags = pf->flags;
 
-       if (flags & I40E_PRIV_FLAGS_LINKPOLL_FLAG)
-               pf->flags |= I40E_FLAG_LINK_POLLING_ENABLED;
-       else
-               pf->flags &= ~I40E_FLAG_LINK_POLLING_ENABLED;
+       for (i = 0; i < I40E_PRIV_FLAGS_STR_LEN; i++) {
+               const struct i40e_priv_flags *priv_flags;
 
-       /* allow the user to control the state of the Flow
-        * Director ATR (Application Targeted Routing) feature
-        * of the driver
+               priv_flags = &i40e_gstrings_priv_flags[i];
+
+               if (priv_flags->read_only)
+                       continue;
+
+               if (flags & BIT(i))
+                       pf->flags |= priv_flags->flag;
+               else
+                       pf->flags &= ~(priv_flags->flag);
+       }
+
+       if (pf->hw.pf_id != 0)
+               goto flags_complete;
+
+       for (j = 0; j < I40E_GL_PRIV_FLAGS_STR_LEN; j++) {
+               const struct i40e_priv_flags *priv_flags;
+
+               priv_flags = &i40e_gl_gstrings_priv_flags[j];
+
+               if (priv_flags->read_only)
+                       continue;
+
+               if (flags & BIT(i + j))
+                       pf->flags |= priv_flags->flag;
+               else
+                       pf->flags &= ~(priv_flags->flag);
+       }
+
+flags_complete:
+       /* check for flags that changed */
+       changed_flags ^= pf->flags;
+
+       /* Process any additional changes needed as a result of flag changes.
+        * The changed_flags value reflects the list of bits that were
+        * changed in the code above.
         */
-       if (flags & I40E_PRIV_FLAGS_FD_ATR) {
-               pf->flags |= I40E_FLAG_FD_ATR_ENABLED;
-       } else {
-               pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED;
-               pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED;
 
-               /* flush current ATR settings */
+       /* Flush current ATR settings if ATR was disabled */
+       if ((changed_flags & I40E_FLAG_FD_ATR_ENABLED) &&
+           !(pf->flags & I40E_FLAG_FD_ATR_ENABLED)) {
+               pf->hw_disabled_flags |= I40E_FLAG_FD_ATR_ENABLED;
                set_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state);
        }
 
-       if ((flags & I40E_PRIV_FLAGS_VEB_STATS) &&
-           !(pf->flags & I40E_FLAG_VEB_STATS_ENABLED)) {
-               pf->flags |= I40E_FLAG_VEB_STATS_ENABLED;
-               reset_required = true;
-       } else if (!(flags & I40E_PRIV_FLAGS_VEB_STATS) &&
-                  (pf->flags & I40E_FLAG_VEB_STATS_ENABLED)) {
-               pf->flags &= ~I40E_FLAG_VEB_STATS_ENABLED;
-               reset_required = true;
-       }
-
-       if (pf->hw.pf_id == 0) {
-               if ((flags & I40E_PRIV_FLAGS_TRUE_PROMISC_SUPPORT) &&
-                   !(pf->flags & I40E_FLAG_TRUE_PROMISC_SUPPORT)) {
-                       pf->flags |= I40E_FLAG_TRUE_PROMISC_SUPPORT;
-                       promisc_change = true;
-               } else if (!(flags & I40E_PRIV_FLAGS_TRUE_PROMISC_SUPPORT) &&
-                          (pf->flags & I40E_FLAG_TRUE_PROMISC_SUPPORT)) {
-                       pf->flags &= ~I40E_FLAG_TRUE_PROMISC_SUPPORT;
-                       promisc_change = true;
-               }
-       }
-       if (promisc_change) {
+       /* Only allow ATR evict on hardware that is capable of handling it */
+       if (pf->hw_disabled_flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE)
+               pf->flags &= ~I40E_FLAG_HW_ATR_EVICT_CAPABLE;
+
+       if (changed_flags & I40E_FLAG_TRUE_PROMISC_SUPPORT) {
+               u16 sw_flags = 0, valid_flags = 0;
+               int ret;
+
                if (!(pf->flags & I40E_FLAG_TRUE_PROMISC_SUPPORT))
                        sw_flags = I40E_AQ_SET_SWITCH_CFG_PROMISC;
                valid_flags = I40E_AQ_SET_SWITCH_CFG_PROMISC;
@@ -3137,22 +4094,17 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags)
                }
        }
 
-       if ((flags & I40E_PRIV_FLAGS_HW_ATR_EVICT) &&
-           (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE))
-               pf->auto_disable_flags &= ~I40E_FLAG_HW_ATR_EVICT_CAPABLE;
-       else
-               pf->auto_disable_flags |= I40E_FLAG_HW_ATR_EVICT_CAPABLE;
-
-       /* if needed, issue reset to cause things to take effect */
-       if (reset_required)
+       /* Issue reset to cause things to take effect, as additional bits
+        * are added we will need to create a mask of bits requiring reset
+        */
+       if ((changed_flags & I40E_FLAG_VEB_STATS_ENABLED) ||
+           ((changed_flags & I40E_FLAG_LEGACY_RX) && netif_running(dev)))
                i40e_do_reset(pf, BIT(__I40E_PF_RESET_REQUESTED));
 
        return 0;
 }
 
 static const struct ethtool_ops i40e_ethtool_ops = {
-       .get_settings           = i40e_get_settings,
-       .set_settings           = i40e_set_settings,
        .get_drvinfo            = i40e_get_drvinfo,
        .get_regs_len           = i40e_get_regs_len,
        .get_regs               = i40e_get_regs,
@@ -3189,6 +4141,8 @@ static const struct ethtool_ops i40e_ethtool_ops = {
        .set_priv_flags         = i40e_set_priv_flags,
        .get_per_queue_coalesce = i40e_get_per_queue_coalesce,
        .set_per_queue_coalesce = i40e_set_per_queue_coalesce,
+       .get_link_ksettings     = i40e_get_link_ksettings,
+       .set_link_ksettings     = i40e_set_link_ksettings,
 };
 
 void i40e_set_ethtool_ops(struct net_device *netdev)
index e8a8351c8ea998a141bd8fb5f27d619b5b477b67..a0506e28d167201fa1b66c290eb9a1eccc50a8d3 100644 (file)
@@ -39,9 +39,9 @@ static const char i40e_driver_string[] =
 
 #define DRV_KERN "-k"
 
-#define DRV_VERSION_MAJOR 1
-#define DRV_VERSION_MINOR 6
-#define DRV_VERSION_BUILD 27
+#define DRV_VERSION_MAJOR 2
+#define DRV_VERSION_MINOR 1
+#define DRV_VERSION_BUILD 7
 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \
             __stringify(DRV_VERSION_MINOR) "." \
             __stringify(DRV_VERSION_BUILD)    DRV_KERN
@@ -299,11 +299,7 @@ void i40e_service_event_schedule(struct i40e_pf *pf)
  * device is munged, not just the one netdev port, so go for the full
  * reset.
  **/
-#ifdef I40E_FCOE
-void i40e_tx_timeout(struct net_device *netdev)
-#else
 static void i40e_tx_timeout(struct net_device *netdev)
-#endif
 {
        struct i40e_netdev_priv *np = netdev_priv(netdev);
        struct i40e_vsi *vsi = np->vsi;
@@ -408,10 +404,7 @@ struct rtnl_link_stats64 *i40e_get_vsi_stats_struct(struct i40e_vsi *vsi)
  * Returns the address of the device statistics structure.
  * The statistics are actually updated from the service task.
  **/
-#ifndef I40E_FCOE
-static
-#endif
-void i40e_get_netdev_stats_struct(struct net_device *netdev,
+static void i40e_get_netdev_stats_struct(struct net_device *netdev,
                                  struct rtnl_link_stats64 *stats)
 {
        struct i40e_netdev_priv *np = netdev_priv(netdev);
@@ -723,55 +716,6 @@ static void i40e_update_veb_stats(struct i40e_veb *veb)
        veb->stat_offsets_loaded = true;
 }
 
-#ifdef I40E_FCOE
-/**
- * i40e_update_fcoe_stats - Update FCoE-specific ethernet statistics counters.
- * @vsi: the VSI that is capable of doing FCoE
- **/
-static void i40e_update_fcoe_stats(struct i40e_vsi *vsi)
-{
-       struct i40e_pf *pf = vsi->back;
-       struct i40e_hw *hw = &pf->hw;
-       struct i40e_fcoe_stats *ofs;
-       struct i40e_fcoe_stats *fs;     /* device's eth stats */
-       int idx;
-
-       if (vsi->type != I40E_VSI_FCOE)
-               return;
-
-       idx = hw->pf_id + I40E_FCOE_PF_STAT_OFFSET;
-       fs = &vsi->fcoe_stats;
-       ofs = &vsi->fcoe_stats_offsets;
-
-       i40e_stat_update32(hw, I40E_GL_FCOEPRC(idx),
-                          vsi->fcoe_stat_offsets_loaded,
-                          &ofs->rx_fcoe_packets, &fs->rx_fcoe_packets);
-       i40e_stat_update48(hw, I40E_GL_FCOEDWRCH(idx), I40E_GL_FCOEDWRCL(idx),
-                          vsi->fcoe_stat_offsets_loaded,
-                          &ofs->rx_fcoe_dwords, &fs->rx_fcoe_dwords);
-       i40e_stat_update32(hw, I40E_GL_FCOERPDC(idx),
-                          vsi->fcoe_stat_offsets_loaded,
-                          &ofs->rx_fcoe_dropped, &fs->rx_fcoe_dropped);
-       i40e_stat_update32(hw, I40E_GL_FCOEPTC(idx),
-                          vsi->fcoe_stat_offsets_loaded,
-                          &ofs->tx_fcoe_packets, &fs->tx_fcoe_packets);
-       i40e_stat_update48(hw, I40E_GL_FCOEDWTCH(idx), I40E_GL_FCOEDWTCL(idx),
-                          vsi->fcoe_stat_offsets_loaded,
-                          &ofs->tx_fcoe_dwords, &fs->tx_fcoe_dwords);
-       i40e_stat_update32(hw, I40E_GL_FCOECRC(idx),
-                          vsi->fcoe_stat_offsets_loaded,
-                          &ofs->fcoe_bad_fccrc, &fs->fcoe_bad_fccrc);
-       i40e_stat_update32(hw, I40E_GL_FCOELAST(idx),
-                          vsi->fcoe_stat_offsets_loaded,
-                          &ofs->fcoe_last_error, &fs->fcoe_last_error);
-       i40e_stat_update32(hw, I40E_GL_FCOEDDPC(idx),
-                          vsi->fcoe_stat_offsets_loaded,
-                          &ofs->fcoe_ddp_count, &fs->fcoe_ddp_count);
-
-       vsi->fcoe_stat_offsets_loaded = true;
-}
-
-#endif
 /**
  * i40e_update_vsi_stats - Update the vsi statistics counters.
  * @vsi: the VSI to be updated
@@ -1101,13 +1045,13 @@ static void i40e_update_pf_stats(struct i40e_pf *pf)
                           &osd->rx_lpi_count, &nsd->rx_lpi_count);
 
        if (pf->flags & I40E_FLAG_FD_SB_ENABLED &&
-           !(pf->auto_disable_flags & I40E_FLAG_FD_SB_ENABLED))
+           !(pf->hw_disabled_flags & I40E_FLAG_FD_SB_ENABLED))
                nsd->fd_sb_status = true;
        else
                nsd->fd_sb_status = false;
 
        if (pf->flags & I40E_FLAG_FD_ATR_ENABLED &&
-           !(pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED))
+           !(pf->hw_disabled_flags & I40E_FLAG_FD_ATR_ENABLED))
                nsd->fd_atr_status = true;
        else
                nsd->fd_atr_status = false;
@@ -1129,9 +1073,6 @@ void i40e_update_stats(struct i40e_vsi *vsi)
                i40e_update_pf_stats(pf);
 
        i40e_update_vsi_stats(vsi);
-#ifdef I40E_FCOE
-       i40e_update_fcoe_stats(vsi);
-#endif
 }
 
 /**
@@ -1562,11 +1503,7 @@ int i40e_del_mac_filter(struct i40e_vsi *vsi, const u8 *macaddr)
  *
  * Returns 0 on success, negative on failure
  **/
-#ifdef I40E_FCOE
-int i40e_set_mac(struct net_device *netdev, void *p)
-#else
 static int i40e_set_mac(struct net_device *netdev, void *p)
-#endif
 {
        struct i40e_netdev_priv *np = netdev_priv(netdev);
        struct i40e_vsi *vsi = np->vsi;
@@ -1626,17 +1563,10 @@ static int i40e_set_mac(struct net_device *netdev, void *p)
  *
  * Setup VSI queue mapping for enabled traffic classes.
  **/
-#ifdef I40E_FCOE
-void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi,
-                             struct i40e_vsi_context *ctxt,
-                             u8 enabled_tc,
-                             bool is_add)
-#else
 static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi,
                                     struct i40e_vsi_context *ctxt,
                                     u8 enabled_tc,
                                     bool is_add)
-#endif
 {
        struct i40e_pf *pf = vsi->back;
        u16 sections = 0;
@@ -1686,11 +1616,6 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi,
                                qcount = min_t(int, pf->alloc_rss_size,
                                               num_tc_qps);
                                break;
-#ifdef I40E_FCOE
-                       case I40E_VSI_FCOE:
-                               qcount = num_tc_qps;
-                               break;
-#endif
                        case I40E_VSI_FDIR:
                        case I40E_VSI_SRIOV:
                        case I40E_VSI_VMDQ2:
@@ -1800,11 +1725,7 @@ static int i40e_addr_unsync(struct net_device *netdev, const u8 *addr)
  * i40e_set_rx_mode - NDO callback to set the netdev filters
  * @netdev: network interface device structure
  **/
-#ifdef I40E_FCOE
-void i40e_set_rx_mode(struct net_device *netdev)
-#else
 static void i40e_set_rx_mode(struct net_device *netdev)
-#endif
 {
        struct i40e_netdev_priv *np = netdev_priv(netdev);
        struct i40e_vsi *vsi = np->vsi;
@@ -1883,19 +1804,12 @@ static void i40e_undo_add_filter_entries(struct i40e_vsi *vsi,
 static
 struct i40e_new_mac_filter *i40e_next_filter(struct i40e_new_mac_filter *next)
 {
-       while (next) {
-               next = hlist_entry(next->hlist.next,
-                                  typeof(struct i40e_new_mac_filter),
-                                  hlist);
-
-               /* keep going if we found a broadcast filter */
-               if (next && is_broadcast_ether_addr(next->f->macaddr))
-                       continue;
-
-               break;
+       hlist_for_each_entry_continue(next, hlist) {
+               if (!is_broadcast_ether_addr(next->f->macaddr))
+                       return next;
        }
 
-       return next;
+       return NULL;
 }
 
 /**
@@ -2487,13 +2401,15 @@ static int i40e_change_mtu(struct net_device *netdev, int new_mtu)
 {
        struct i40e_netdev_priv *np = netdev_priv(netdev);
        struct i40e_vsi *vsi = np->vsi;
+       struct i40e_pf *pf = vsi->back;
 
        netdev_info(netdev, "changing MTU from %d to %d\n",
                    netdev->mtu, new_mtu);
        netdev->mtu = new_mtu;
        if (netif_running(netdev))
                i40e_vsi_reinit_locked(vsi);
-       i40e_notify_client_of_l2_param_changes(vsi);
+       pf->flags |= (I40E_FLAG_SERVICE_CLIENT_REQUESTED |
+                     I40E_FLAG_CLIENT_L2_CHANGE);
        return 0;
 }
 
@@ -2707,13 +2623,8 @@ void i40e_vsi_kill_vlan(struct i40e_vsi *vsi, u16 vid)
  *
  * net_device_ops implementation for adding vlan ids
  **/
-#ifdef I40E_FCOE
-int i40e_vlan_rx_add_vid(struct net_device *netdev,
-                        __always_unused __be16 proto, u16 vid)
-#else
 static int i40e_vlan_rx_add_vid(struct net_device *netdev,
                                __always_unused __be16 proto, u16 vid)
-#endif
 {
        struct i40e_netdev_priv *np = netdev_priv(netdev);
        struct i40e_vsi *vsi = np->vsi;
@@ -2744,13 +2655,8 @@ static int i40e_vlan_rx_add_vid(struct net_device *netdev,
  *
  * net_device_ops implementation for removing vlan ids
  **/
-#ifdef I40E_FCOE
-int i40e_vlan_rx_kill_vid(struct net_device *netdev,
-                         __always_unused __be16 proto, u16 vid)
-#else
 static int i40e_vlan_rx_kill_vid(struct net_device *netdev,
                                 __always_unused __be16 proto, u16 vid)
-#endif
 {
        struct i40e_netdev_priv *np = netdev_priv(netdev);
        struct i40e_vsi *vsi = np->vsi;
@@ -2920,9 +2826,6 @@ static int i40e_vsi_setup_rx_resources(struct i40e_vsi *vsi)
 
        for (i = 0; i < vsi->num_queue_pairs && !err; i++)
                err = i40e_setup_rx_descriptors(vsi->rx_rings[i]);
-#ifdef I40E_FCOE
-       i40e_fcoe_setup_ddp_resources(vsi);
-#endif
        return err;
 }
 
@@ -2942,9 +2845,6 @@ static void i40e_vsi_free_rx_resources(struct i40e_vsi *vsi)
        for (i = 0; i < vsi->num_queue_pairs; i++)
                if (vsi->rx_rings[i] && vsi->rx_rings[i]->desc)
                        i40e_free_rx_resources(vsi->rx_rings[i]);
-#ifdef I40E_FCOE
-       i40e_fcoe_free_ddp_resources(vsi);
-#endif
 }
 
 /**
@@ -3015,9 +2915,6 @@ static int i40e_configure_tx_ring(struct i40e_ring *ring)
        tx_ctx.qlen = ring->count;
        tx_ctx.fd_ena = !!(vsi->back->flags & (I40E_FLAG_FD_SB_ENABLED |
                                               I40E_FLAG_FD_ATR_ENABLED));
-#ifdef I40E_FCOE
-       tx_ctx.fc_ena = (vsi->type == I40E_VSI_FCOE);
-#endif
        tx_ctx.timesync_ena = !!(vsi->back->flags & I40E_FLAG_PTP);
        /* FDIR VSI tx ring can still use RS bit and writebacks */
        if (vsi->type != I40E_VSI_FDIR)
@@ -3098,7 +2995,8 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
 
        ring->rx_buf_len = vsi->rx_buf_len;
 
-       rx_ctx.dbuff = ring->rx_buf_len >> I40E_RXQ_CTX_DBUFF_SHIFT;
+       rx_ctx.dbuff = DIV_ROUND_UP(ring->rx_buf_len,
+                                   BIT_ULL(I40E_RXQ_CTX_DBUFF_SHIFT));
 
        rx_ctx.base = (ring->dma / 128);
        rx_ctx.qlen = ring->count;
@@ -3120,9 +3018,6 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
        rx_ctx.l2tsel = 1;
        /* this controls whether VLAN is stripped from inner headers */
        rx_ctx.showiv = 0;
-#ifdef I40E_FCOE
-       rx_ctx.fc_ena = (vsi->type == I40E_VSI_FCOE);
-#endif
        /* set the prefena field to 1 because the manual says to */
        rx_ctx.prefena = 1;
 
@@ -3181,27 +3076,19 @@ static int i40e_vsi_configure_rx(struct i40e_vsi *vsi)
        int err = 0;
        u16 i;
 
-       if (vsi->netdev && (vsi->netdev->mtu > ETH_DATA_LEN))
-               vsi->max_frame = vsi->netdev->mtu + ETH_HLEN
-                              + ETH_FCS_LEN + VLAN_HLEN;
-       else
-               vsi->max_frame = I40E_RXBUFFER_2048;
-
-       vsi->rx_buf_len = I40E_RXBUFFER_2048;
-
-#ifdef I40E_FCOE
-       /* setup rx buffer for FCoE */
-       if ((vsi->type == I40E_VSI_FCOE) &&
-           (vsi->back->flags & I40E_FLAG_FCOE_ENABLED)) {
-               vsi->rx_buf_len = I40E_RXBUFFER_3072;
-               vsi->max_frame = I40E_RXBUFFER_3072;
+       if (!vsi->netdev || (vsi->back->flags & I40E_FLAG_LEGACY_RX)) {
+               vsi->max_frame = I40E_MAX_RXBUFFER;
+               vsi->rx_buf_len = I40E_RXBUFFER_2048;
+#if (PAGE_SIZE < 8192)
+       } else if (vsi->netdev->mtu <= ETH_DATA_LEN) {
+               vsi->max_frame = I40E_RXBUFFER_1536 - NET_IP_ALIGN;
+               vsi->rx_buf_len = I40E_RXBUFFER_1536 - NET_IP_ALIGN;
+#endif
+       } else {
+               vsi->max_frame = I40E_MAX_RXBUFFER;
+               vsi->rx_buf_len = I40E_RXBUFFER_2048;
        }
 
-#endif /* I40E_FCOE */
-       /* round up for the chip's needs */
-       vsi->rx_buf_len = ALIGN(vsi->rx_buf_len,
-                               BIT_ULL(I40E_RXQ_CTX_DBUFF_SHIFT));
-
        /* set up individual rings */
        for (i = 0; i < vsi->num_queue_pairs && !err; i++)
                err = i40e_configure_rx_ring(vsi->rx_rings[i]);
@@ -3281,6 +3168,12 @@ static void i40e_fdir_filter_restore(struct i40e_vsi *vsi)
        if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED))
                return;
 
+       /* Reset FDir counters as we're replaying all existing filters */
+       pf->fd_tcp4_filter_cnt = 0;
+       pf->fd_udp4_filter_cnt = 0;
+       pf->fd_sctp4_filter_cnt = 0;
+       pf->fd_ip4_filter_cnt = 0;
+
        hlist_for_each_entry_safe(filter, node,
                                  &pf->fdir_filter_list, fdir_node) {
                i40e_add_del_fdir(vsi, filter, true);
@@ -3993,11 +3886,7 @@ static int i40e_vsi_request_irq(struct i40e_vsi *vsi, char *basename)
  * This is used by netconsole to send skbs without having to re-enable
  * interrupts.  It's not called while the normal interrupt routine is executing.
  **/
-#ifdef I40E_FCOE
-void i40e_netpoll(struct net_device *netdev)
-#else
 static void i40e_netpoll(struct net_device *netdev)
-#endif
 {
        struct i40e_netdev_priv *np = netdev_priv(netdev);
        struct i40e_vsi *vsi = np->vsi;
@@ -4100,8 +3989,6 @@ static int i40e_vsi_control_tx(struct i40e_vsi *vsi, bool enable)
                }
        }
 
-       if (hw->revision_id == 0)
-               mdelay(50);
        return ret;
 }
 
@@ -4180,6 +4067,12 @@ static int i40e_vsi_control_rx(struct i40e_vsi *vsi, bool enable)
                }
        }
 
+       /* Due to HW errata, on Rx disable only, the register can indicate done
+        * before it really is. Needs 50ms to be sure
+        */
+       if (!enable)
+               mdelay(50);
+
        return ret;
 }
 
@@ -4463,17 +4356,16 @@ static void i40e_napi_disable_all(struct i40e_vsi *vsi)
  **/
 static void i40e_vsi_close(struct i40e_vsi *vsi)
 {
-       bool reset = false;
-
+       struct i40e_pf *pf = vsi->back;
        if (!test_and_set_bit(__I40E_DOWN, &vsi->state))
                i40e_down(vsi);
        i40e_vsi_free_irq(vsi);
        i40e_vsi_free_tx_resources(vsi);
        i40e_vsi_free_rx_resources(vsi);
        vsi->current_netdev_flags = 0;
-       if (test_bit(__I40E_RESET_RECOVERY_PENDING, &vsi->back->state))
-               reset = true;
-       i40e_notify_client_of_netdev_close(vsi, reset);
+       pf->flags |= I40E_FLAG_SERVICE_CLIENT_REQUESTED;
+       if (test_bit(__I40E_RESET_RECOVERY_PENDING, &pf->state))
+               pf->flags |=  I40E_FLAG_CLIENT_RESET;
 }
 
 /**
@@ -4485,14 +4377,6 @@ static void i40e_quiesce_vsi(struct i40e_vsi *vsi)
        if (test_bit(__I40E_DOWN, &vsi->state))
                return;
 
-       /* No need to disable FCoE VSI when Tx suspended */
-       if ((test_bit(__I40E_PORT_TX_SUSPENDED, &vsi->back->state)) &&
-           vsi->type == I40E_VSI_FCOE) {
-               dev_dbg(&vsi->back->pdev->dev,
-                        "VSI seid %d skipping FCoE VSI disable\n", vsi->seid);
-               return;
-       }
-
        set_bit(__I40E_NEEDS_RESTART, &vsi->state);
        if (vsi->netdev && netif_running(vsi->netdev))
                vsi->netdev->netdev_ops->ndo_stop(vsi->netdev);
@@ -4595,8 +4479,7 @@ static int i40e_pf_wait_queues_disabled(struct i40e_pf *pf)
        int v, ret = 0;
 
        for (v = 0; v < pf->hw.func_caps.num_vsis; v++) {
-               /* No need to wait for FCoE VSI queues */
-               if (pf->vsi[v] && pf->vsi[v]->type != I40E_VSI_FCOE) {
+               if (pf->vsi[v]) {
                        ret = i40e_vsi_wait_queues_disabled(pf->vsi[v]);
                        if (ret)
                                break;
@@ -5220,20 +5103,12 @@ static void i40e_dcb_reconfigure(struct i40e_pf *pf)
                        continue;
 
                /* - Enable all TCs for the LAN VSI
-#ifdef I40E_FCOE
-                * - For FCoE VSI only enable the TC configured
-                *   as per the APP TLV
-#endif
                 * - For all others keep them at TC0 for now
                 */
                if (v == pf->lan_vsi)
                        tc_map = i40e_pf_get_tc_map(pf);
                else
                        tc_map = I40E_DEFAULT_TRAFFIC_CLASS;
-#ifdef I40E_FCOE
-               if (pf->vsi[v]->type == I40E_VSI_FCOE)
-                       tc_map = i40e_get_fcoe_tc_map(pf);
-#endif /* #ifdef I40E_FCOE */
 
                ret = i40e_vsi_config_tc(pf->vsi[v], tc_map);
                if (ret) {
@@ -5300,10 +5175,6 @@ static int i40e_init_pf_dcb(struct i40e_pf *pf)
                    (hw->dcbx_status == I40E_DCBX_STATUS_DISABLED)) {
                        dev_info(&pf->pdev->dev,
                                 "DCBX offload is not supported or is disabled for this PF.\n");
-
-                       if (pf->flags & I40E_FLAG_MFP_ENABLED)
-                               goto out;
-
                } else {
                        /* When status is not DISABLED then DCBX in FW */
                        pf->dcbx_cap = DCB_CAP_DCBX_LLD_MANAGED |
@@ -5464,13 +5335,8 @@ static int i40e_up_complete(struct i40e_vsi *vsi)
        /* replay FDIR SB filters */
        if (vsi->type == I40E_VSI_FDIR) {
                /* reset fd counters */
-               pf->fd_add_err = pf->fd_atr_cnt = 0;
-               if (pf->fd_tcp_rule > 0) {
-                       pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED;
-                       if (I40E_DEBUG_FD & pf->hw.debug_mask)
-                               dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 exist\n");
-                       pf->fd_tcp_rule = 0;
-               }
+               pf->fd_add_err = 0;
+               pf->fd_atr_cnt = 0;
                i40e_fdir_filter_restore(vsi);
        }
 
@@ -5542,8 +5408,6 @@ void i40e_down(struct i40e_vsi *vsi)
                i40e_clean_rx_ring(vsi->rx_rings[i]);
        }
 
-       i40e_notify_client_of_netdev_close(vsi, false);
-
 }
 
 /**
@@ -5604,17 +5468,15 @@ exit:
        return ret;
 }
 
-#ifdef I40E_FCOE
-int __i40e_setup_tc(struct net_device *netdev, u32 handle, __be16 proto,
-                   struct tc_to_netdev *tc)
-#else
 static int __i40e_setup_tc(struct net_device *netdev, u32 handle, __be16 proto,
                           struct tc_to_netdev *tc)
-#endif
 {
-       if (handle != TC_H_ROOT || tc->type != TC_SETUP_MQPRIO)
+       if (tc->type != TC_SETUP_MQPRIO)
                return -EINVAL;
-       return i40e_setup_tc(netdev, tc->tc);
+
+       tc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+
+       return i40e_setup_tc(netdev, tc->mqprio->num_tc);
 }
 
 /**
@@ -5745,6 +5607,7 @@ err_setup_tx:
 static void i40e_fdir_filter_exit(struct i40e_pf *pf)
 {
        struct i40e_fdir_filter *filter;
+       struct i40e_flex_pit *pit_entry, *tmp;
        struct hlist_node *node2;
 
        hlist_for_each_entry_safe(filter, node2,
@@ -5752,7 +5615,43 @@ static void i40e_fdir_filter_exit(struct i40e_pf *pf)
                hlist_del(&filter->fdir_node);
                kfree(filter);
        }
+
+       list_for_each_entry_safe(pit_entry, tmp, &pf->l3_flex_pit_list, list) {
+               list_del(&pit_entry->list);
+               kfree(pit_entry);
+       }
+       INIT_LIST_HEAD(&pf->l3_flex_pit_list);
+
+       list_for_each_entry_safe(pit_entry, tmp, &pf->l4_flex_pit_list, list) {
+               list_del(&pit_entry->list);
+               kfree(pit_entry);
+       }
+       INIT_LIST_HEAD(&pf->l4_flex_pit_list);
+
        pf->fdir_pf_active_filters = 0;
+       pf->fd_tcp4_filter_cnt = 0;
+       pf->fd_udp4_filter_cnt = 0;
+       pf->fd_sctp4_filter_cnt = 0;
+       pf->fd_ip4_filter_cnt = 0;
+
+       /* Reprogram the default input set for TCP/IPv4 */
+       i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV4_TCP,
+                               I40E_L3_SRC_MASK | I40E_L3_DST_MASK |
+                               I40E_L4_SRC_MASK | I40E_L4_DST_MASK);
+
+       /* Reprogram the default input set for UDP/IPv4 */
+       i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV4_UDP,
+                               I40E_L3_SRC_MASK | I40E_L3_DST_MASK |
+                               I40E_L4_SRC_MASK | I40E_L4_DST_MASK);
+
+       /* Reprogram the default input set for SCTP/IPv4 */
+       i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV4_SCTP,
+                               I40E_L3_SRC_MASK | I40E_L3_DST_MASK |
+                               I40E_L4_SRC_MASK | I40E_L4_DST_MASK);
+
+       /* Reprogram the default input set for Other/IPv4 */
+       i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV4_OTHER,
+                               I40E_L3_SRC_MASK | I40E_L3_DST_MASK);
 }
 
 /**
@@ -6021,8 +5920,8 @@ static int i40e_handle_lldp_event(struct i40e_pf *pf,
                i40e_service_event_schedule(pf);
        } else {
                i40e_pf_unquiesce_all_vsi(pf);
-               /* Notify the client for the DCB changes */
-               i40e_notify_client_of_l2_param_changes(pf->vsi[pf->lan_vsi]);
+       pf->flags |= (I40E_FLAG_SERVICE_CLIENT_REQUESTED |
+                     I40E_FLAG_CLIENT_L2_CHANGE);
        }
 
 exit:
@@ -6144,8 +6043,8 @@ void i40e_fdir_check_and_reenable(struct i40e_pf *pf)
            (pf->fd_add_err == 0) ||
            (i40e_get_current_atr_cnt(pf) < pf->fd_atr_cnt)) {
                if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) &&
-                   (pf->auto_disable_flags & I40E_FLAG_FD_SB_ENABLED)) {
-                       pf->auto_disable_flags &= ~I40E_FLAG_FD_SB_ENABLED;
+                   (pf->hw_disabled_flags & I40E_FLAG_FD_SB_ENABLED)) {
+                       pf->hw_disabled_flags &= ~I40E_FLAG_FD_SB_ENABLED;
                        if (I40E_DEBUG_FD & pf->hw.debug_mask)
                                dev_info(&pf->pdev->dev, "FD Sideband/ntuple is being enabled since we have space in the table now\n");
                }
@@ -6156,9 +6055,9 @@ void i40e_fdir_check_and_reenable(struct i40e_pf *pf)
         */
        if (fcnt_prog < (fcnt_avail - I40E_FDIR_BUFFER_HEAD_ROOM * 2)) {
                if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) &&
-                   (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED) &&
-                   (pf->fd_tcp_rule == 0)) {
-                       pf->auto_disable_flags &= ~I40E_FLAG_FD_ATR_ENABLED;
+                   (pf->hw_disabled_flags & I40E_FLAG_FD_ATR_ENABLED) &&
+                   (pf->fd_tcp4_filter_cnt == 0)) {
+                       pf->hw_disabled_flags &= ~I40E_FLAG_FD_ATR_ENABLED;
                        if (I40E_DEBUG_FD & pf->hw.debug_mask)
                                dev_info(&pf->pdev->dev, "ATR is being enabled since we have space in the table and there are no conflicting ntuple rules\n");
                }
@@ -6210,7 +6109,7 @@ static void i40e_fdir_flush_and_replay(struct i40e_pf *pf)
        }
 
        pf->fd_flush_timestamp = jiffies;
-       pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED;
+       pf->hw_disabled_flags |= I40E_FLAG_FD_ATR_ENABLED;
        /* flush all filters */
        wr32(&pf->hw, I40E_PFQF_CTL_1,
             I40E_PFQF_CTL_1_CLEARFDTABLE_MASK);
@@ -6229,8 +6128,8 @@ static void i40e_fdir_flush_and_replay(struct i40e_pf *pf)
        } else {
                /* replay sideband filters */
                i40e_fdir_filter_restore(pf->vsi[pf->lan_vsi]);
-               if (!disable_atr)
-                       pf->auto_disable_flags &= ~I40E_FLAG_FD_ATR_ENABLED;
+               if (!disable_atr && !pf->fd_tcp4_filter_cnt)
+                       pf->hw_disabled_flags &= ~I40E_FLAG_FD_ATR_ENABLED;
                clear_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state);
                if (I40E_DEBUG_FD & pf->hw.debug_mask)
                        dev_info(&pf->pdev->dev, "FD Filter table flushed and FD-SB replayed.\n");
@@ -6283,9 +6182,6 @@ static void i40e_vsi_link_event(struct i40e_vsi *vsi, bool link_up)
 
        switch (vsi->type) {
        case I40E_VSI_MAIN:
-#ifdef I40E_FCOE
-       case I40E_VSI_FCOE:
-#endif
                if (!vsi->netdev || !vsi->netdev_registered)
                        break;
 
@@ -6627,9 +6523,11 @@ static void i40e_clean_adminq_subtask(struct i40e_pf *pf)
                                 opcode);
                        break;
                }
-       } while (pending && (i++ < pf->adminq_work_limit));
+       } while (i++ < pf->adminq_work_limit);
+
+       if (i < pf->adminq_work_limit)
+               clear_bit(__I40E_ADMINQ_EVENT_PENDING, &pf->state);
 
-       clear_bit(__I40E_ADMINQ_EVENT_PENDING, &pf->state);
        /* re-enable Admin queue interrupt cause */
        val = rd32(hw, I40E_PFINT_ICR0_ENA);
        val |=  I40E_PFINT_ICR0_ENA_ADMINQ_MASK;
@@ -7067,8 +6965,7 @@ static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit)
                goto end_core_reset;
 
        ret = i40e_init_lan_hmc(hw, hw->func_caps.num_tx_qp,
-                               hw->func_caps.num_rx_qp,
-                               pf->fcoe_hmc_cntx_num, pf->fcoe_hmc_filt_num);
+                               hw->func_caps.num_rx_qp, 0, 0);
        if (ret) {
                dev_info(&pf->pdev->dev, "init_lan_hmc failed: %d\n", ret);
                goto end_core_reset;
@@ -7087,10 +6984,6 @@ static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit)
                /* Continue without DCB enabled */
        }
 #endif /* CONFIG_I40E_DCB */
-#ifdef I40E_FCOE
-       i40e_init_pf_fcoe(pf);
-
-#endif
        /* do basic switch setup */
        ret = i40e_setup_pf_switch(pf, reinit);
        if (ret)
@@ -7351,7 +7244,7 @@ static void i40e_sync_udp_filters_subtask(struct i40e_pf *pf)
 {
        struct i40e_hw *hw = &pf->hw;
        i40e_status ret;
-       __be16 port;
+       u16 port;
        int i;
 
        if (!(pf->flags & I40E_FLAG_UDP_FILTER_SYNC))
@@ -7375,7 +7268,7 @@ static void i40e_sync_udp_filters_subtask(struct i40e_pf *pf)
                                        "%s %s port %d, index %d failed, err %s aq_err %s\n",
                                        pf->udp_ports[i].type ? "vxlan" : "geneve",
                                        port ? "add" : "delete",
-                                       ntohs(port), i,
+                                       port, i,
                                        i40e_stat_str(&pf->hw, ret),
                                        i40e_aq_str(&pf->hw,
                                                    pf->hw.aq.asq_last_status));
@@ -7411,7 +7304,18 @@ static void i40e_service_task(struct work_struct *work)
        i40e_vc_process_vflr_event(pf);
        i40e_watchdog_subtask(pf);
        i40e_fdir_reinit_subtask(pf);
-       i40e_client_subtask(pf);
+       if (pf->flags & I40E_FLAG_CLIENT_RESET) {
+               /* Client subtask will reopen next time through. */
+               i40e_notify_client_of_netdev_close(pf->vsi[pf->lan_vsi], true);
+               pf->flags &= ~I40E_FLAG_CLIENT_RESET;
+       } else {
+               i40e_client_subtask(pf);
+               if (pf->flags & I40E_FLAG_CLIENT_L2_CHANGE) {
+                       i40e_notify_client_of_l2_param_changes(
+                                                       pf->vsi[pf->lan_vsi]);
+                       pf->flags &= ~I40E_FLAG_CLIENT_L2_CHANGE;
+               }
+       }
        i40e_sync_filters_subtask(pf);
        i40e_sync_udp_filters_subtask(pf);
        i40e_clean_adminq_subtask(pf);
@@ -7484,15 +7388,6 @@ static int i40e_set_num_rings_in_vsi(struct i40e_vsi *vsi)
                                      I40E_REQ_DESCRIPTOR_MULTIPLE);
                break;
 
-#ifdef I40E_FCOE
-       case I40E_VSI_FCOE:
-               vsi->alloc_queue_pairs = pf->num_fcoe_qps;
-               vsi->num_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
-                                     I40E_REQ_DESCRIPTOR_MULTIPLE);
-               vsi->num_q_vectors = pf->num_fcoe_msix;
-               break;
-
-#endif /* I40E_FCOE */
        default:
                WARN_ON(1);
                return -ENODATA;
@@ -7809,6 +7704,7 @@ static int i40e_reserve_msix_vectors(struct i40e_pf *pf, int vectors)
 static int i40e_init_msix(struct i40e_pf *pf)
 {
        struct i40e_hw *hw = &pf->hw;
+       int cpus, extra_vectors;
        int vectors_left;
        int v_budget, i;
        int v_actual;
@@ -7827,9 +7723,6 @@ static int i40e_init_msix(struct i40e_pf *pf)
         *      - assumes symmetric Tx/Rx pairing
         *   - The number of VMDq pairs
         *   - The CPU count within the NUMA node if iWARP is enabled
-#ifdef I40E_FCOE
-        *   - The number of FCOE qps.
-#endif
         * Once we count this up, try the request.
         *
         * If we can't get what we want, we'll simplify to nearly nothing
@@ -7844,10 +7737,16 @@ static int i40e_init_msix(struct i40e_pf *pf)
                vectors_left--;
        }
 
-       /* reserve vectors for the main PF traffic queues */
-       pf->num_lan_msix = min_t(int, num_online_cpus(), vectors_left);
+       /* reserve some vectors for the main PF traffic queues. Initially we
+        * only reserve at most 50% of the available vectors, in the case that
+        * the number of online CPUs is large. This ensures that we can enable
+        * extra features as well. Once we've enabled the other features, we
+        * will use any remaining vectors to reach as close as we can to the
+        * number of online CPUs.
+        */
+       cpus = num_online_cpus();
+       pf->num_lan_msix = min_t(int, cpus, vectors_left / 2);
        vectors_left -= pf->num_lan_msix;
-       v_budget += pf->num_lan_msix;
 
        /* reserve one vector for sideband flow director */
        if (pf->flags & I40E_FLAG_FD_SB_ENABLED) {
@@ -7860,20 +7759,6 @@ static int i40e_init_msix(struct i40e_pf *pf)
                }
        }
 
-#ifdef I40E_FCOE
-       /* can we reserve enough for FCoE? */
-       if (pf->flags & I40E_FLAG_FCOE_ENABLED) {
-               if (!vectors_left)
-                       pf->num_fcoe_msix = 0;
-               else if (vectors_left >= pf->num_fcoe_qps)
-                       pf->num_fcoe_msix = pf->num_fcoe_qps;
-               else
-                       pf->num_fcoe_msix = 1;
-               v_budget += pf->num_fcoe_msix;
-               vectors_left -= pf->num_fcoe_msix;
-       }
-
-#endif
        /* can we reserve enough for iWARP? */
        if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
                iwarp_requested = pf->num_iwarp_msix;
@@ -7910,6 +7795,23 @@ static int i40e_init_msix(struct i40e_pf *pf)
                }
        }
 
+       /* On systems with a large number of SMP cores, we previously limited
+        * the number of vectors for num_lan_msix to be at most 50% of the
+        * available vectors, to allow for other features. Now, we add back
+        * the remaining vectors. However, we ensure that the total
+        * num_lan_msix will not exceed num_online_cpus(). To do this, we
+        * calculate the number of vectors we can add without going over the
+        * cap of CPUs. For systems with a small number of CPUs this will be
+        * zero.
+        */
+       extra_vectors = min_t(int, cpus - pf->num_lan_msix, vectors_left);
+       pf->num_lan_msix += extra_vectors;
+       vectors_left -= extra_vectors;
+
+       WARN(vectors_left < 0,
+            "Calculation of remaining vectors underflowed. This is an accounting bug when determining total MSI-X vectors.\n");
+
+       v_budget += pf->num_lan_msix;
        pf->msix_entries = kcalloc(v_budget, sizeof(struct msix_entry),
                                   GFP_KERNEL);
        if (!pf->msix_entries)
@@ -7950,10 +7852,6 @@ static int i40e_init_msix(struct i40e_pf *pf)
                pf->num_vmdq_msix = 1;    /* force VMDqs to only one vector */
                pf->num_vmdq_vsis = 1;
                pf->num_vmdq_qps = 1;
-#ifdef I40E_FCOE
-               pf->num_fcoe_qps = 0;
-               pf->num_fcoe_msix = 0;
-#endif
 
                /* partition out the remaining vectors */
                switch (vec) {
@@ -7967,13 +7865,6 @@ static int i40e_init_msix(struct i40e_pf *pf)
                        } else {
                                pf->num_lan_msix = 2;
                        }
-#ifdef I40E_FCOE
-                       /* give one vector to FCoE */
-                       if (pf->flags & I40E_FLAG_FCOE_ENABLED) {
-                               pf->num_lan_msix = 1;
-                               pf->num_fcoe_msix = 1;
-                       }
-#endif
                        break;
                default:
                        if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
@@ -7993,13 +7884,6 @@ static int i40e_init_msix(struct i40e_pf *pf)
                               (vec - (pf->num_iwarp_msix + pf->num_vmdq_vsis)),
                                                              pf->num_lan_msix);
                        pf->num_lan_qps = pf->num_lan_msix;
-#ifdef I40E_FCOE
-                       /* give one vector to FCoE */
-                       if (pf->flags & I40E_FLAG_FCOE_ENABLED) {
-                               pf->num_fcoe_msix = 1;
-                               vec--;
-                       }
-#endif
                        break;
                }
        }
@@ -8020,13 +7904,6 @@ static int i40e_init_msix(struct i40e_pf *pf)
                dev_info(&pf->pdev->dev, "IWARP disabled, not enough MSI-X vectors\n");
                pf->flags &= ~I40E_FLAG_IWARP_ENABLED;
        }
-#ifdef I40E_FCOE
-
-       if ((pf->flags & I40E_FLAG_FCOE_ENABLED) && (pf->num_fcoe_msix == 0)) {
-               dev_info(&pf->pdev->dev, "FCOE disabled, not enough MSI-X vectors\n");
-               pf->flags &= ~I40E_FLAG_FCOE_ENABLED;
-       }
-#endif
        i40e_debug(&pf->hw, I40E_DEBUG_INIT,
                   "MSI-X vector distribution: PF %d, VMDq %d, FDSB %d, iWARP %d\n",
                   pf->num_lan_msix,
@@ -8125,9 +8002,6 @@ static int i40e_init_interrupt_scheme(struct i40e_pf *pf)
                if (vectors < 0) {
                        pf->flags &= ~(I40E_FLAG_MSIX_ENABLED   |
                                       I40E_FLAG_IWARP_ENABLED  |
-#ifdef I40E_FCOE
-                                      I40E_FLAG_FCOE_ENABLED   |
-#endif
                                       I40E_FLAG_RSS_ENABLED    |
                                       I40E_FLAG_DCB_CAPABLE    |
                                       I40E_FLAG_DCB_ENABLED    |
@@ -8360,13 +8234,10 @@ static int i40e_config_rss_reg(struct i40e_vsi *vsi, const u8 *seed,
 
                if (vsi->type == I40E_VSI_MAIN) {
                        for (i = 0; i <= I40E_PFQF_HKEY_MAX_INDEX; i++)
-                               i40e_write_rx_ctl(hw, I40E_PFQF_HKEY(i),
-                                                 seed_dw[i]);
+                               wr32(hw, I40E_PFQF_HKEY(i), seed_dw[i]);
                } else if (vsi->type == I40E_VSI_SRIOV) {
                        for (i = 0; i <= I40E_VFQF_HKEY1_MAX_INDEX; i++)
-                               i40e_write_rx_ctl(hw,
-                                                 I40E_VFQF_HKEY1(i, vf_id),
-                                                 seed_dw[i]);
+                               wr32(hw, I40E_VFQF_HKEY1(i, vf_id), seed_dw[i]);
                } else {
                        dev_err(&pf->pdev->dev, "Cannot set RSS seed - invalid VSI type\n");
                }
@@ -8384,9 +8255,7 @@ static int i40e_config_rss_reg(struct i40e_vsi *vsi, const u8 *seed,
                        if (lut_size != I40E_VF_HLUT_ARRAY_SIZE)
                                return -EINVAL;
                        for (i = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++)
-                               i40e_write_rx_ctl(hw,
-                                                 I40E_VFQF_HLUT1(i, vf_id),
-                                                 lut_dw[i]);
+                               wr32(hw, I40E_VFQF_HLUT1(i, vf_id), lut_dw[i]);
                } else {
                        dev_err(&pf->pdev->dev, "Cannot set RSS LUT - invalid VSI type\n");
                }
@@ -8514,9 +8383,12 @@ static int i40e_pf_config_rss(struct i40e_pf *pf)
        i40e_write_rx_ctl(hw, I40E_PFQF_CTL_0, reg_val);
 
        /* Determine the RSS size of the VSI */
-       if (!vsi->rss_size)
-               vsi->rss_size = min_t(int, pf->alloc_rss_size,
-                                     vsi->num_queue_pairs);
+       if (!vsi->rss_size) {
+               u16 qcount;
+
+               qcount = vsi->num_queue_pairs / vsi->tc_config.numtc;
+               vsi->rss_size = min_t(int, pf->alloc_rss_size, qcount);
+       }
        if (!vsi->rss_size)
                return -EINVAL;
 
@@ -8562,6 +8434,8 @@ int i40e_reconfig_rss_queues(struct i40e_pf *pf, int queue_count)
        new_rss_size = min_t(int, queue_count, pf->rss_size_max);
 
        if (queue_count != vsi->num_queue_pairs) {
+               u16 qcount;
+
                vsi->req_queue_pairs = queue_count;
                i40e_prep_for_reset(pf);
 
@@ -8579,8 +8453,8 @@ int i40e_reconfig_rss_queues(struct i40e_pf *pf, int queue_count)
                }
 
                /* Reset vsi->rss_size, as number of enabled queues changed */
-               vsi->rss_size = min_t(int, pf->alloc_rss_size,
-                                     vsi->num_queue_pairs);
+               qcount = vsi->num_queue_pairs / vsi->tc_config.numtc;
+               vsi->rss_size = min_t(int, pf->alloc_rss_size, qcount);
 
                i40e_pf_config_rss(pf);
        }
@@ -8813,10 +8687,6 @@ static int i40e_sw_init(struct i40e_pf *pf)
                pf->num_iwarp_msix = (int)num_online_cpus() + 1;
        }
 
-#ifdef I40E_FCOE
-       i40e_init_pf_fcoe(pf);
-
-#endif /* I40E_FCOE */
 #ifdef CONFIG_PCI_IOV
        if (pf->hw.func_caps.num_vfs && pf->hw.partition_id == 1) {
                pf->num_vf_qps = I40E_DEFAULT_QUEUES_PER_VF;
@@ -8843,9 +8713,9 @@ static int i40e_sw_init(struct i40e_pf *pf)
                    (pf->hw.aq.api_min_ver > 4))) {
                /* Supported in FW API version higher than 1.4 */
                pf->flags |= I40E_FLAG_GENEVE_OFFLOAD_CAPABLE;
-               pf->auto_disable_flags = I40E_FLAG_HW_ATR_EVICT_CAPABLE;
+               pf->hw_disabled_flags = I40E_FLAG_HW_ATR_EVICT_CAPABLE;
        } else {
-               pf->auto_disable_flags = I40E_FLAG_HW_ATR_EVICT_CAPABLE;
+               pf->hw_disabled_flags = I40E_FLAG_HW_ATR_EVICT_CAPABLE;
        }
 
        pf->eeprom_version = 0xDEAD;
@@ -8906,14 +8776,14 @@ bool i40e_set_ntuple(struct i40e_pf *pf, netdev_features_t features)
                        i40e_fdir_filter_exit(pf);
                }
                pf->flags &= ~I40E_FLAG_FD_SB_ENABLED;
-               pf->auto_disable_flags &= ~I40E_FLAG_FD_SB_ENABLED;
+               pf->hw_disabled_flags &= ~I40E_FLAG_FD_SB_ENABLED;
                /* reset fd counters */
-               pf->fd_add_err = pf->fd_atr_cnt = pf->fd_tcp_rule = 0;
-               pf->fdir_pf_active_filters = 0;
+               pf->fd_add_err = 0;
+               pf->fd_atr_cnt = 0;
                /* if ATR was auto disabled it can be re-enabled. */
                if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) &&
-                   (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED)) {
-                       pf->auto_disable_flags &= ~I40E_FLAG_FD_ATR_ENABLED;
+                   (pf->hw_disabled_flags & I40E_FLAG_FD_ATR_ENABLED)) {
+                       pf->hw_disabled_flags &= ~I40E_FLAG_FD_ATR_ENABLED;
                        if (I40E_DEBUG_FD & pf->hw.debug_mask)
                                dev_info(&pf->pdev->dev, "ATR re-enabled.\n");
                }
@@ -8982,7 +8852,7 @@ static int i40e_set_features(struct net_device *netdev,
  *
  * Returns the index number or I40E_MAX_PF_UDP_OFFLOAD_PORTS if port not found
  **/
-static u8 i40e_get_udp_port_idx(struct i40e_pf *pf, __be16 port)
+static u8 i40e_get_udp_port_idx(struct i40e_pf *pf, u16 port)
 {
        u8 i;
 
@@ -9005,7 +8875,7 @@ static void i40e_udp_tunnel_add(struct net_device *netdev,
        struct i40e_netdev_priv *np = netdev_priv(netdev);
        struct i40e_vsi *vsi = np->vsi;
        struct i40e_pf *pf = vsi->back;
-       __be16 port = ti->port;
+       u16 port = ntohs(ti->port);
        u8 next_idx;
        u8 idx;
 
@@ -9013,8 +8883,7 @@ static void i40e_udp_tunnel_add(struct net_device *netdev,
 
        /* Check if port already exists */
        if (idx < I40E_MAX_PF_UDP_OFFLOAD_PORTS) {
-               netdev_info(netdev, "port %d already offloaded\n",
-                           ntohs(port));
+               netdev_info(netdev, "port %d already offloaded\n", port);
                return;
        }
 
@@ -9023,7 +8892,7 @@ static void i40e_udp_tunnel_add(struct net_device *netdev,
 
        if (next_idx == I40E_MAX_PF_UDP_OFFLOAD_PORTS) {
                netdev_info(netdev, "maximum number of offloaded UDP ports reached, not adding port %d\n",
-                           ntohs(port));
+                           port);
                return;
        }
 
@@ -9057,7 +8926,7 @@ static void i40e_udp_tunnel_del(struct net_device *netdev,
        struct i40e_netdev_priv *np = netdev_priv(netdev);
        struct i40e_vsi *vsi = np->vsi;
        struct i40e_pf *pf = vsi->back;
-       __be16 port = ti->port;
+       u16 port = ntohs(ti->port);
        u8 idx;
 
        idx = i40e_get_udp_port_idx(pf, port);
@@ -9089,7 +8958,7 @@ static void i40e_udp_tunnel_del(struct net_device *netdev,
        return;
 not_found:
        netdev_warn(netdev, "UDP port %d was not found, not deleting\n",
-                   ntohs(port));
+                   port);
 }
 
 static int i40e_get_phys_port_id(struct net_device *netdev,
@@ -9344,10 +9213,6 @@ static const struct net_device_ops i40e_netdev_ops = {
        .ndo_poll_controller    = i40e_netpoll,
 #endif
        .ndo_setup_tc           = __i40e_setup_tc,
-#ifdef I40E_FCOE
-       .ndo_fcoe_enable        = i40e_fcoe_enable,
-       .ndo_fcoe_disable       = i40e_fcoe_disable,
-#endif
        .ndo_set_features       = i40e_set_features,
        .ndo_set_vf_mac         = i40e_ndo_set_vf_mac,
        .ndo_set_vf_vlan        = i40e_ndo_set_vf_port_vlan,
@@ -9432,10 +9297,10 @@ static int i40e_config_netdev(struct i40e_vsi *vsi)
        if (vsi->type == I40E_VSI_MAIN) {
                SET_NETDEV_DEV(netdev, &pf->pdev->dev);
                ether_addr_copy(mac_addr, hw->mac.perm_addr);
-               /* The following steps are necessary to prevent reception
-                * of tagged packets - some older NVM configurations load a
-                * default a MAC-VLAN filter that accepts any tagged packet
-                * which must be replaced by a normal filter.
+               /* The following steps are necessary to properly keep track of
+                * MAC-VLAN filters loaded into firmware - first we remove
+                * filter that is automatically generated by firmware and then
+                * add new filter both to the driver hash table and firmware.
                 */
                i40e_rm_default_mac_filter(vsi, mac_addr);
                spin_lock_bh(&vsi->mac_filter_hash_lock);
@@ -9481,9 +9346,6 @@ static int i40e_config_netdev(struct i40e_vsi *vsi)
        netdev->netdev_ops = &i40e_netdev_ops;
        netdev->watchdog_timeo = 5 * HZ;
        i40e_set_ethtool_ops(netdev);
-#ifdef I40E_FCOE
-       i40e_fcoe_config_netdev(netdev, vsi);
-#endif
 
        /* MTU range: 68 - 9706 */
        netdev->min_mtu = ETH_MIN_MTU;
@@ -9707,16 +9569,6 @@ static int i40e_add_vsi(struct i40e_vsi *vsi)
                i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, true);
                break;
 
-#ifdef I40E_FCOE
-       case I40E_VSI_FCOE:
-               ret = i40e_fcoe_vsi_init(vsi, &ctxt);
-               if (ret) {
-                       dev_info(&pf->pdev->dev, "failed to initialize FCoE VSI\n");
-                       return ret;
-               }
-               break;
-
-#endif /* I40E_FCOE */
        case I40E_VSI_IWARP:
                /* send down message to iWARP */
                break;
@@ -10133,7 +9985,6 @@ struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type,
                        }
                }
        case I40E_VSI_VMDQ2:
-       case I40E_VSI_FCOE:
                ret = i40e_config_netdev(vsi);
                if (ret)
                        goto err_netdev;
@@ -10793,9 +10644,6 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf)
        int queues_left;
 
        pf->num_lan_qps = 0;
-#ifdef I40E_FCOE
-       pf->num_fcoe_qps = 0;
-#endif
 
        /* Find the max queues to be put into basic use.  We'll always be
         * using TC0, whether or not DCB is running, and TC0 will get the
@@ -10812,9 +10660,6 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf)
                /* make sure all the fancies are disabled */
                pf->flags &= ~(I40E_FLAG_RSS_ENABLED    |
                               I40E_FLAG_IWARP_ENABLED  |
-#ifdef I40E_FCOE
-                              I40E_FLAG_FCOE_ENABLED   |
-#endif
                               I40E_FLAG_FD_SB_ENABLED  |
                               I40E_FLAG_FD_ATR_ENABLED |
                               I40E_FLAG_DCB_CAPABLE    |
@@ -10831,9 +10676,6 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf)
 
                pf->flags &= ~(I40E_FLAG_RSS_ENABLED    |
                               I40E_FLAG_IWARP_ENABLED  |
-#ifdef I40E_FCOE
-                              I40E_FLAG_FCOE_ENABLED   |
-#endif
                               I40E_FLAG_FD_SB_ENABLED  |
                               I40E_FLAG_FD_ATR_ENABLED |
                               I40E_FLAG_DCB_ENABLED    |
@@ -10854,22 +10696,6 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf)
                queues_left -= pf->num_lan_qps;
        }
 
-#ifdef I40E_FCOE
-       if (pf->flags & I40E_FLAG_FCOE_ENABLED) {
-               if (I40E_DEFAULT_FCOE <= queues_left) {
-                       pf->num_fcoe_qps = I40E_DEFAULT_FCOE;
-               } else if (I40E_MINIMUM_FCOE <= queues_left) {
-                       pf->num_fcoe_qps = I40E_MINIMUM_FCOE;
-               } else {
-                       pf->num_fcoe_qps = 0;
-                       pf->flags &= ~I40E_FLAG_FCOE_ENABLED;
-                       dev_info(&pf->pdev->dev, "not enough queues for FCoE. FCoE feature will be disabled\n");
-               }
-
-               queues_left -= pf->num_fcoe_qps;
-       }
-
-#endif
        if (pf->flags & I40E_FLAG_FD_SB_ENABLED) {
                if (queues_left > 1) {
                        queues_left -= 1; /* save 1 queue for FD */
@@ -10901,9 +10727,6 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf)
                pf->num_lan_qps, pf->alloc_rss_size, pf->num_req_vfs,
                pf->num_vf_qps, pf->num_vmdq_vsis, pf->num_vmdq_qps,
                queues_left);
-#ifdef I40E_FCOE
-       dev_dbg(&pf->pdev->dev, "fcoe queues = %d\n", pf->num_fcoe_qps);
-#endif
 }
 
 /**
@@ -10970,10 +10793,6 @@ static void i40e_print_features(struct i40e_pf *pf)
        i += snprintf(&buf[i], REMAIN(i), " Geneve");
        if (pf->flags & I40E_FLAG_PTP)
                i += snprintf(&buf[i], REMAIN(i), " PTP");
-#ifdef I40E_FCOE
-       if (pf->flags & I40E_FLAG_FCOE_ENABLED)
-               i += snprintf(&buf[i], REMAIN(i), " FCOE");
-#endif
        if (pf->flags & I40E_FLAG_VEB_MODE_ENABLED)
                i += snprintf(&buf[i], REMAIN(i), " VEB");
        else
@@ -11090,6 +10909,9 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        hw->bus.bus_id = pdev->bus->number;
        pf->instance = pfs_found;
 
+       INIT_LIST_HEAD(&pf->l3_flex_pit_list);
+       INIT_LIST_HEAD(&pf->l4_flex_pit_list);
+
        /* set up the locks for the AQ, do this only once in probe
         * and destroy them only once in remove
         */
@@ -11188,8 +11010,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        }
 
        err = i40e_init_lan_hmc(hw, hw->func_caps.num_tx_qp,
-                               hw->func_caps.num_rx_qp,
-                               pf->fcoe_hmc_cntx_num, pf->fcoe_hmc_filt_num);
+                               hw->func_caps.num_rx_qp, 0, 0);
        if (err) {
                dev_info(&pdev->dev, "init_lan_hmc failed: %d\n", err);
                goto err_init_lan_hmc;
@@ -11224,18 +11045,6 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        i40e_get_port_mac_addr(hw, hw->mac.port_addr);
        if (is_valid_ether_addr(hw->mac.port_addr))
                pf->flags |= I40E_FLAG_PORT_ID_VALID;
-#ifdef I40E_FCOE
-       err = i40e_get_san_mac_addr(hw, hw->mac.san_addr);
-       if (err)
-               dev_info(&pdev->dev,
-                        "(non-fatal) SAN MAC retrieval failed: %d\n", err);
-       if (!is_valid_ether_addr(hw->mac.san_addr)) {
-               dev_warn(&pdev->dev, "invalid SAN MAC address %pM, falling back to LAN MAC\n",
-                        hw->mac.san_addr);
-               ether_addr_copy(hw->mac.san_addr, hw->mac.addr);
-       }
-       dev_info(&pf->pdev->dev, "SAN MAC: %pM\n", hw->mac.san_addr);
-#endif /* I40E_FCOE */
 
        pci_set_drvdata(pdev, pf);
        pci_save_state(pdev);
@@ -11431,11 +11240,6 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
                dev_info(&pdev->dev, "Failed to add PF to client API service list: %d\n",
                         err);
 
-#ifdef I40E_FCOE
-       /* create FCoE interface */
-       i40e_fcoe_vsi_setup(pf);
-
-#endif
 #define PCI_SPEED_SIZE 8
 #define PCI_WIDTH_SIZE 8
        /* Devices on the IOSF bus do not have this information
index 38ee18f1112444df1ad753f845d1dd1b1fd6ddcc..800bd55d0159c083c3d2267c5eda0289b1d6435c 100644 (file)
@@ -292,14 +292,14 @@ i40e_status i40e_read_nvm_word(struct i40e_hw *hw, u16 offset,
 {
        enum i40e_status_code ret_code = 0;
 
-       if (hw->flags & I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE) {
-               ret_code = i40e_acquire_nvm(hw, I40E_RESOURCE_READ);
-               if (!ret_code) {
+       ret_code = i40e_acquire_nvm(hw, I40E_RESOURCE_READ);
+       if (!ret_code) {
+               if (hw->flags & I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE) {
                        ret_code = i40e_read_nvm_word_aq(hw, offset, data);
-                       i40e_release_nvm(hw);
+               } else {
+                       ret_code = i40e_read_nvm_word_srctl(hw, offset, data);
                }
-       } else {
-               ret_code = i40e_read_nvm_word_srctl(hw, offset, data);
+               i40e_release_nvm(hw);
        }
        return ret_code;
 }
index fea81ed065db8a57a26d89e31b87758b5c069d20..80e66da6b145e07ac4faaf761b9788d67118fc40 100644 (file)
@@ -78,7 +78,4 @@ do {                                                          \
 } while (0)
 
 typedef enum i40e_status_code i40e_status;
-#ifdef CONFIG_I40E_FCOE
-#define I40E_FCOE
-#endif
 #endif /* _I40E_OSDEP_H_ */
index 2551fc8274441f81196c64d34c6eef7d9dbecad1..dfc5e5901be5c54a1ff8e6a01b5d67ab7bc4fc49 100644 (file)
@@ -304,9 +304,6 @@ i40e_status i40e_read_pba_string(struct i40e_hw *hw, u8 *pba_num,
                                 u32 pba_num_size);
 i40e_status i40e_validate_mac_addr(u8 *mac_addr);
 void i40e_pre_tx_queue_cfg(struct i40e_hw *hw, u32 queue, bool enable);
-#ifdef I40E_FCOE
-i40e_status i40e_get_san_mac_addr(struct i40e_hw *hw, u8 *mac_addr);
-#endif
 /* prototype for functions used for NVM access */
 i40e_status i40e_init_nvm(struct i40e_hw *hw);
 i40e_status i40e_acquire_nvm(struct i40e_hw *hw,
index 97d46058d71d3c118dfa8f5408b888a63fd1a933..ebffca0cefac9e037efcc819e8a0d066beb621b6 100644 (file)
@@ -71,6 +71,9 @@ static void i40e_fdir(struct i40e_ring *tx_ring,
        flex_ptype |= I40E_TXD_FLTR_QW0_PCTYPE_MASK &
                      (fdata->pctype << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT);
 
+       flex_ptype |= I40E_TXD_FLTR_QW0_PCTYPE_MASK &
+                     (fdata->flex_offset << I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT);
+
        /* Use LAN VSI Id if not programmed by user */
        flex_ptype |= I40E_TXD_FLTR_QW0_DEST_VSI_MASK &
                      ((u32)(fdata->dest_vsi ? : pf->vsi[pf->lan_vsi]->id) <<
@@ -203,7 +206,6 @@ static int i40e_add_del_fdir_udpv4(struct i40e_vsi *vsi,
        struct i40e_pf *pf = vsi->back;
        struct udphdr *udp;
        struct iphdr *ip;
-       bool err = false;
        u8 *raw_packet;
        int ret;
        static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
@@ -219,18 +221,28 @@ static int i40e_add_del_fdir_udpv4(struct i40e_vsi *vsi,
        udp = (struct udphdr *)(raw_packet + IP_HEADER_OFFSET
              + sizeof(struct iphdr));
 
-       ip->daddr = fd_data->dst_ip[0];
+       ip->daddr = fd_data->dst_ip;
        udp->dest = fd_data->dst_port;
-       ip->saddr = fd_data->src_ip[0];
+       ip->saddr = fd_data->src_ip;
        udp->source = fd_data->src_port;
 
+       if (fd_data->flex_filter) {
+               u8 *payload = raw_packet + I40E_UDPIP_DUMMY_PACKET_LEN;
+               __be16 pattern = fd_data->flex_word;
+               u16 off = fd_data->flex_offset;
+
+               *((__force __be16 *)(payload + off)) = pattern;
+       }
+
        fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_UDP;
        ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
        if (ret) {
                dev_info(&pf->pdev->dev,
                         "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
                         fd_data->pctype, fd_data->fd_id, ret);
-               err = true;
+               /* Free the packet buffer since it wasn't added to the ring */
+               kfree(raw_packet);
+               return -EOPNOTSUPP;
        } else if (I40E_DEBUG_FD & pf->hw.debug_mask) {
                if (add)
                        dev_info(&pf->pdev->dev,
@@ -241,10 +253,13 @@ static int i40e_add_del_fdir_udpv4(struct i40e_vsi *vsi,
                                 "Filter deleted for PCTYPE %d loc = %d\n",
                                 fd_data->pctype, fd_data->fd_id);
        }
-       if (err)
-               kfree(raw_packet);
 
-       return err ? -EOPNOTSUPP : 0;
+       if (add)
+               pf->fd_udp4_filter_cnt++;
+       else
+               pf->fd_udp4_filter_cnt--;
+
+       return 0;
 }
 
 #define I40E_TCPIP_DUMMY_PACKET_LEN 54
@@ -263,7 +278,6 @@ static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi,
        struct i40e_pf *pf = vsi->back;
        struct tcphdr *tcp;
        struct iphdr *ip;
-       bool err = false;
        u8 *raw_packet;
        int ret;
        /* Dummy packet */
@@ -281,39 +295,116 @@ static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi,
        tcp = (struct tcphdr *)(raw_packet + IP_HEADER_OFFSET
              + sizeof(struct iphdr));
 
-       ip->daddr = fd_data->dst_ip[0];
+       ip->daddr = fd_data->dst_ip;
        tcp->dest = fd_data->dst_port;
-       ip->saddr = fd_data->src_ip[0];
+       ip->saddr = fd_data->src_ip;
        tcp->source = fd_data->src_port;
 
+       if (fd_data->flex_filter) {
+               u8 *payload = raw_packet + I40E_TCPIP_DUMMY_PACKET_LEN;
+               __be16 pattern = fd_data->flex_word;
+               u16 off = fd_data->flex_offset;
+
+               *((__force __be16 *)(payload + off)) = pattern;
+       }
+
+       fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP;
+       ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
+       if (ret) {
+               dev_info(&pf->pdev->dev,
+                        "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
+                        fd_data->pctype, fd_data->fd_id, ret);
+               /* Free the packet buffer since it wasn't added to the ring */
+               kfree(raw_packet);
+               return -EOPNOTSUPP;
+       } else if (I40E_DEBUG_FD & pf->hw.debug_mask) {
+               if (add)
+                       dev_info(&pf->pdev->dev, "Filter OK for PCTYPE %d loc = %d)\n",
+                                fd_data->pctype, fd_data->fd_id);
+               else
+                       dev_info(&pf->pdev->dev,
+                                "Filter deleted for PCTYPE %d loc = %d\n",
+                                fd_data->pctype, fd_data->fd_id);
+       }
+
        if (add) {
-               pf->fd_tcp_rule++;
+               pf->fd_tcp4_filter_cnt++;
                if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) &&
                    I40E_DEBUG_FD & pf->hw.debug_mask)
                        dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n");
-               pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED;
+               pf->hw_disabled_flags |= I40E_FLAG_FD_ATR_ENABLED;
        } else {
-               pf->fd_tcp_rule = (pf->fd_tcp_rule > 0) ?
-                                 (pf->fd_tcp_rule - 1) : 0;
-               if (pf->fd_tcp_rule == 0) {
+               pf->fd_tcp4_filter_cnt--;
+               if (pf->fd_tcp4_filter_cnt == 0) {
                        if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) &&
                            I40E_DEBUG_FD & pf->hw.debug_mask)
                                dev_info(&pf->pdev->dev, "ATR re-enabled due to no sideband TCP/IPv4 rules\n");
-                       pf->auto_disable_flags &= ~I40E_FLAG_FD_ATR_ENABLED;
+                       pf->hw_disabled_flags &= ~I40E_FLAG_FD_ATR_ENABLED;
                }
        }
 
-       fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP;
-       ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
+       return 0;
+}
 
+#define I40E_SCTPIP_DUMMY_PACKET_LEN 46
+/**
+ * i40e_add_del_fdir_sctpv4 - Add/Remove SCTPv4 Flow Director filters for
+ * a specific flow spec
+ * @vsi: pointer to the targeted VSI
+ * @fd_data: the flow director data required for the FDir descriptor
+ * @add: true adds a filter, false removes it
+ *
+ * Returns 0 if the filters were successfully added or removed
+ **/
+static int i40e_add_del_fdir_sctpv4(struct i40e_vsi *vsi,
+                                   struct i40e_fdir_filter *fd_data,
+                                   bool add)
+{
+       struct i40e_pf *pf = vsi->back;
+       struct sctphdr *sctp;
+       struct iphdr *ip;
+       u8 *raw_packet;
+       int ret;
+       /* Dummy packet */
+       static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
+               0x45, 0, 0, 0x20, 0, 0, 0x40, 0, 0x40, 0x84, 0, 0, 0, 0, 0, 0,
+               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+       raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
+       if (!raw_packet)
+               return -ENOMEM;
+       memcpy(raw_packet, packet, I40E_SCTPIP_DUMMY_PACKET_LEN);
+
+       ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
+       sctp = (struct sctphdr *)(raw_packet + IP_HEADER_OFFSET
+             + sizeof(struct iphdr));
+
+       ip->daddr = fd_data->dst_ip;
+       sctp->dest = fd_data->dst_port;
+       ip->saddr = fd_data->src_ip;
+       sctp->source = fd_data->src_port;
+
+       if (fd_data->flex_filter) {
+               u8 *payload = raw_packet + I40E_SCTPIP_DUMMY_PACKET_LEN;
+               __be16 pattern = fd_data->flex_word;
+               u16 off = fd_data->flex_offset;
+
+               *((__force __be16 *)(payload + off)) = pattern;
+       }
+
+       fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_SCTP;
+       ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
        if (ret) {
                dev_info(&pf->pdev->dev,
                         "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
                         fd_data->pctype, fd_data->fd_id, ret);
-               err = true;
+               /* Free the packet buffer since it wasn't added to the ring */
+               kfree(raw_packet);
+               return -EOPNOTSUPP;
        } else if (I40E_DEBUG_FD & pf->hw.debug_mask) {
                if (add)
-                       dev_info(&pf->pdev->dev, "Filter OK for PCTYPE %d loc = %d)\n",
+                       dev_info(&pf->pdev->dev,
+                                "Filter OK for PCTYPE %d loc = %d\n",
                                 fd_data->pctype, fd_data->fd_id);
                else
                        dev_info(&pf->pdev->dev,
@@ -321,10 +412,12 @@ static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi,
                                 fd_data->pctype, fd_data->fd_id);
        }
 
-       if (err)
-               kfree(raw_packet);
+       if (add)
+               pf->fd_sctp4_filter_cnt++;
+       else
+               pf->fd_sctp4_filter_cnt--;
 
-       return err ? -EOPNOTSUPP : 0;
+       return 0;
 }
 
 #define I40E_IP_DUMMY_PACKET_LEN 34
@@ -343,7 +436,6 @@ static int i40e_add_del_fdir_ipv4(struct i40e_vsi *vsi,
 {
        struct i40e_pf *pf = vsi->back;
        struct iphdr *ip;
-       bool err = false;
        u8 *raw_packet;
        int ret;
        int i;
@@ -359,18 +451,29 @@ static int i40e_add_del_fdir_ipv4(struct i40e_vsi *vsi,
                memcpy(raw_packet, packet, I40E_IP_DUMMY_PACKET_LEN);
                ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
 
-               ip->saddr = fd_data->src_ip[0];
-               ip->daddr = fd_data->dst_ip[0];
+               ip->saddr = fd_data->src_ip;
+               ip->daddr = fd_data->dst_ip;
                ip->protocol = 0;
 
+               if (fd_data->flex_filter) {
+                       u8 *payload = raw_packet + I40E_IP_DUMMY_PACKET_LEN;
+                       __be16 pattern = fd_data->flex_word;
+                       u16 off = fd_data->flex_offset;
+
+                       *((__force __be16 *)(payload + off)) = pattern;
+               }
+
                fd_data->pctype = i;
                ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
-
                if (ret) {
                        dev_info(&pf->pdev->dev,
                                 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
                                 fd_data->pctype, fd_data->fd_id, ret);
-                       err = true;
+                       /* The packet buffer wasn't added to the ring so we
+                        * need to free it now.
+                        */
+                       kfree(raw_packet);
+                       return -EOPNOTSUPP;
                } else if (I40E_DEBUG_FD & pf->hw.debug_mask) {
                        if (add)
                                dev_info(&pf->pdev->dev,
@@ -383,10 +486,12 @@ static int i40e_add_del_fdir_ipv4(struct i40e_vsi *vsi,
                }
        }
 
-       if (err)
-               kfree(raw_packet);
+       if (add)
+               pf->fd_ip4_filter_cnt++;
+       else
+               pf->fd_ip4_filter_cnt--;
 
-       return err ? -EOPNOTSUPP : 0;
+       return 0;
 }
 
 /**
@@ -409,6 +514,9 @@ int i40e_add_del_fdir(struct i40e_vsi *vsi,
        case UDP_V4_FLOW:
                ret = i40e_add_del_fdir_udpv4(vsi, input, add);
                break;
+       case SCTP_V4_FLOW:
+               ret = i40e_add_del_fdir_sctpv4(vsi, input, add);
+               break;
        case IP_USER_FLOW:
                switch (input->ip4_proto) {
                case IPPROTO_TCP:
@@ -417,6 +525,9 @@ int i40e_add_del_fdir(struct i40e_vsi *vsi,
                case IPPROTO_UDP:
                        ret = i40e_add_del_fdir_udpv4(vsi, input, add);
                        break;
+               case IPPROTO_SCTP:
+                       ret = i40e_add_del_fdir_sctpv4(vsi, input, add);
+                       break;
                case IPPROTO_IP:
                        ret = i40e_add_del_fdir_ipv4(vsi, input, add);
                        break;
@@ -484,8 +595,8 @@ static void i40e_fd_handle_status(struct i40e_ring *rx_ring,
                pf->fd_atr_cnt = i40e_get_current_atr_cnt(pf);
 
                if ((rx_desc->wb.qword0.hi_dword.fd_id == 0) &&
-                   (pf->auto_disable_flags & I40E_FLAG_FD_SB_ENABLED)) {
-                       pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED;
+                   (pf->hw_disabled_flags & I40E_FLAG_FD_SB_ENABLED)) {
+                       pf->hw_disabled_flags |= I40E_FLAG_FD_ATR_ENABLED;
                        set_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state);
                }
 
@@ -498,11 +609,11 @@ static void i40e_fd_handle_status(struct i40e_ring *rx_ring,
                 */
                if (fcnt_prog >= (fcnt_avail - I40E_FDIR_BUFFER_FULL_MARGIN)) {
                        if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) &&
-                           !(pf->auto_disable_flags &
+                           !(pf->hw_disabled_flags &
                                     I40E_FLAG_FD_SB_ENABLED)) {
                                if (I40E_DEBUG_FD & pf->hw.debug_mask)
                                        dev_warn(&pdev->dev, "FD filter space full, new ntuple rules will not be added\n");
-                               pf->auto_disable_flags |=
+                               pf->hw_disabled_flags |=
                                                        I40E_FLAG_FD_SB_ENABLED;
                        }
                }
@@ -951,11 +1062,6 @@ static void i40e_clean_programming_status(struct i40e_ring *rx_ring,
 
        if (id == I40E_RX_PROG_STATUS_DESC_FD_FILTER_STATUS)
                i40e_fd_handle_status(rx_ring, rx_desc, id);
-#ifdef I40E_FCOE
-       else if ((id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_PROG_STATUS) ||
-                (id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_INVL_STATUS))
-               i40e_fcoe_handle_status(rx_ring, rx_desc, id);
-#endif
 }
 
 /**
@@ -1010,7 +1116,6 @@ err:
  **/
 void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
 {
-       struct device *dev = rx_ring->dev;
        unsigned long bi_size;
        u16 i;
 
@@ -1030,8 +1135,21 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
                if (!rx_bi->page)
                        continue;
 
-               dma_unmap_page(dev, rx_bi->dma, PAGE_SIZE, DMA_FROM_DEVICE);
-               __free_pages(rx_bi->page, 0);
+               /* Invalidate cache lines that may have been written to by
+                * device so that we avoid corrupting memory.
+                */
+               dma_sync_single_range_for_cpu(rx_ring->dev,
+                                             rx_bi->dma,
+                                             rx_bi->page_offset,
+                                             I40E_RXBUFFER_2048,
+                                             DMA_FROM_DEVICE);
+
+               /* free resources associated with mapping */
+               dma_unmap_page_attrs(rx_ring->dev, rx_bi->dma,
+                                    PAGE_SIZE,
+                                    DMA_FROM_DEVICE,
+                                    I40E_RX_DMA_ATTR);
+               __page_frag_cache_drain(rx_bi->page, rx_bi->pagecnt_bias);
 
                rx_bi->page = NULL;
                rx_bi->page_offset = 0;
@@ -1159,7 +1277,10 @@ static bool i40e_alloc_mapped_page(struct i40e_ring *rx_ring,
        }
 
        /* map page for use */
-       dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE);
+       dma = dma_map_page_attrs(rx_ring->dev, page, 0,
+                                PAGE_SIZE,
+                                DMA_FROM_DEVICE,
+                                I40E_RX_DMA_ATTR);
 
        /* if mapping failed free memory back to system since
         * there isn't much point in holding memory we can't use
@@ -1174,6 +1295,9 @@ static bool i40e_alloc_mapped_page(struct i40e_ring *rx_ring,
        bi->page = page;
        bi->page_offset = 0;
 
+       /* initialize pagecnt_bias to 1 representing we fully own page */
+       bi->pagecnt_bias = 1;
+
        return true;
 }
 
@@ -1219,6 +1343,12 @@ bool i40e_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count)
                if (!i40e_alloc_mapped_page(rx_ring, bi))
                        goto no_buffers;
 
+               /* sync the buffer for use by the device */
+               dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
+                                                bi->page_offset,
+                                                I40E_RXBUFFER_2048,
+                                                DMA_FROM_DEVICE);
+
                /* Refresh the desc even if buffer_addrs didn't change
                 * because each write-back erases this info.
                 */
@@ -1259,8 +1389,6 @@ no_buffers:
  * @vsi: the VSI we care about
  * @skb: skb currently being received and modified
  * @rx_desc: the receive descriptor
- *
- * skb->protocol must be set before this function is called
  **/
 static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
                                    struct sk_buff *skb,
@@ -1422,12 +1550,12 @@ void i40e_process_skb_fields(struct i40e_ring *rx_ring,
 
        i40e_rx_hash(rx_ring, rx_desc, skb, rx_ptype);
 
-       /* modifies the skb - consumes the enet header */
-       skb->protocol = eth_type_trans(skb, rx_ring->netdev);
-
        i40e_rx_checksum(rx_ring->vsi, skb, rx_desc);
 
        skb_record_rx_queue(skb, rx_ring->queue_index);
+
+       /* modifies the skb - consumes the enet header */
+       skb->protocol = eth_type_trans(skb, rx_ring->netdev);
 }
 
 /**
@@ -1472,7 +1600,10 @@ static void i40e_reuse_rx_page(struct i40e_ring *rx_ring,
        rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
 
        /* transfer page from old buffer to new buffer */
-       *new_buff = *old_buff;
+       new_buff->dma           = old_buff->dma;
+       new_buff->page          = old_buff->page;
+       new_buff->page_offset   = old_buff->page_offset;
+       new_buff->pagecnt_bias  = old_buff->pagecnt_bias;
 }
 
 /**
@@ -1493,8 +1624,6 @@ static inline bool i40e_page_is_reusable(struct page *page)
  * the adapter for another receive
  *
  * @rx_buffer: buffer containing the page
- * @page: page address from rx_buffer
- * @truesize: actual size of the buffer in this page
  *
  * If page is reusable, rx_buffer->page_offset is adjusted to point to
  * an unused region in the page.
@@ -1517,13 +1646,13 @@ static inline bool i40e_page_is_reusable(struct page *page)
  *
  * In either case, if the page is reusable its refcount is increased.
  **/
-static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer,
-                                  struct page *page,
-                                  const unsigned int truesize)
+static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer)
 {
 #if (PAGE_SIZE >= 8192)
        unsigned int last_offset = PAGE_SIZE - I40E_RXBUFFER_2048;
 #endif
+       unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
+       struct page *page = rx_buffer->page;
 
        /* Is any reuse possible? */
        if (unlikely(!i40e_page_is_reusable(page)))
@@ -1531,21 +1660,21 @@ static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer,
 
 #if (PAGE_SIZE < 8192)
        /* if we are only owner of page we can reuse it */
-       if (unlikely(page_count(page) != 1))
+       if (unlikely((page_count(page) - pagecnt_bias) > 1))
                return false;
-
-       /* flip page offset to other buffer */
-       rx_buffer->page_offset ^= truesize;
 #else
-       /* move offset up to the next cache line */
-       rx_buffer->page_offset += truesize;
-
        if (rx_buffer->page_offset > last_offset)
                return false;
 #endif
 
-       /* Inc ref count on page before passing it up to the stack */
-       get_page(page);
+       /* If we have drained the page fragment pool we need to update
+        * the pagecnt_bias and page count so that we fully restock the
+        * number of references the driver holds.
+        */
+       if (unlikely(!pagecnt_bias)) {
+               page_ref_add(page, USHRT_MAX);
+               rx_buffer->pagecnt_bias = USHRT_MAX;
+       }
 
        return true;
 }
@@ -1554,145 +1683,155 @@ static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer,
  * i40e_add_rx_frag - Add contents of Rx buffer to sk_buff
  * @rx_ring: rx descriptor ring to transact packets on
  * @rx_buffer: buffer containing page to add
- * @size: packet length from rx_desc
  * @skb: sk_buff to place the data into
+ * @size: packet length from rx_desc
  *
  * This function will add the data contained in rx_buffer->page to the skb.
- * This is done either through a direct copy if the data in the buffer is
- * less than the skb header size, otherwise it will just attach the page as
- * a frag to the skb.
+ * It will just attach the page as a frag to the skb.
  *
- * The function will then update the page offset if necessary and return
- * true if the buffer can be reused by the adapter.
+ * The function will then update the page offset.
  **/
-static bool i40e_add_rx_frag(struct i40e_ring *rx_ring,
+static void i40e_add_rx_frag(struct i40e_ring *rx_ring,
                             struct i40e_rx_buffer *rx_buffer,
-                            unsigned int size,
-                            struct sk_buff *skb)
+                            struct sk_buff *skb,
+                            unsigned int size)
 {
-       struct page *page = rx_buffer->page;
-       unsigned char *va = page_address(page) + rx_buffer->page_offset;
 #if (PAGE_SIZE < 8192)
        unsigned int truesize = I40E_RXBUFFER_2048;
 #else
-       unsigned int truesize = ALIGN(size, L1_CACHE_BYTES);
+       unsigned int truesize = SKB_DATA_ALIGN(size);
 #endif
-       unsigned int pull_len;
-
-       if (unlikely(skb_is_nonlinear(skb)))
-               goto add_tail_frag;
 
-       /* will the data fit in the skb we allocated? if so, just
-        * copy it as it is pretty small anyway
-        */
-       if (size <= I40E_RX_HDR_SIZE) {
-               memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long)));
+       skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
+                       rx_buffer->page_offset, size, truesize);
 
-               /* page is reusable, we can reuse buffer as-is */
-               if (likely(i40e_page_is_reusable(page)))
-                       return true;
-
-               /* this page cannot be reused so discard it */
-               __free_pages(page, 0);
-               return false;
-       }
+       /* page is being used so we must update the page offset */
+#if (PAGE_SIZE < 8192)
+       rx_buffer->page_offset ^= truesize;
+#else
+       rx_buffer->page_offset += truesize;
+#endif
+}
 
-       /* we need the header to contain the greater of either
-        * ETH_HLEN or 60 bytes if the skb->len is less than
-        * 60 for skb_pad.
-        */
-       pull_len = eth_get_headlen(va, I40E_RX_HDR_SIZE);
+/**
+ * i40e_get_rx_buffer - Fetch Rx buffer and synchronize data for use
+ * @rx_ring: rx descriptor ring to transact packets on
+ * @size: size of buffer to add to skb
+ *
+ * This function will pull an Rx buffer from the ring and synchronize it
+ * for use by the CPU.
+ */
+static struct i40e_rx_buffer *i40e_get_rx_buffer(struct i40e_ring *rx_ring,
+                                                const unsigned int size)
+{
+       struct i40e_rx_buffer *rx_buffer;
 
-       /* align pull length to size of long to optimize
-        * memcpy performance
-        */
-       memcpy(__skb_put(skb, pull_len), va, ALIGN(pull_len, sizeof(long)));
+       rx_buffer = &rx_ring->rx_bi[rx_ring->next_to_clean];
+       prefetchw(rx_buffer->page);
 
-       /* update all of the pointers */
-       va += pull_len;
-       size -= pull_len;
+       /* we are reusing so sync this buffer for CPU use */
+       dma_sync_single_range_for_cpu(rx_ring->dev,
+                                     rx_buffer->dma,
+                                     rx_buffer->page_offset,
+                                     size,
+                                     DMA_FROM_DEVICE);
 
-add_tail_frag:
-       skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
-                       (unsigned long)va & ~PAGE_MASK, size, truesize);
+       /* We have pulled a buffer for use, so decrement pagecnt_bias */
+       rx_buffer->pagecnt_bias--;
 
-       return i40e_can_reuse_rx_page(rx_buffer, page, truesize);
+       return rx_buffer;
 }
 
 /**
- * i40e_fetch_rx_buffer - Allocate skb and populate it
+ * i40e_construct_skb - Allocate skb and populate it
  * @rx_ring: rx descriptor ring to transact packets on
- * @rx_desc: descriptor containing info written by hardware
+ * @rx_buffer: rx buffer to pull data from
+ * @size: size of buffer to add to skb
  *
- * This function allocates an skb on the fly, and populates it with the page
- * data from the current receive descriptor, taking care to set up the skb
- * correctly, as well as handling calling the page recycle function if
- * necessary.
+ * This function allocates an skb.  It then populates it with the page
+ * data from the current receive descriptor, taking care to set up the
+ * skb correctly.
  */
-static inline
-struct sk_buff *i40e_fetch_rx_buffer(struct i40e_ring *rx_ring,
-                                    union i40e_rx_desc *rx_desc,
-                                    struct sk_buff *skb)
+static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
+                                         struct i40e_rx_buffer *rx_buffer,
+                                         unsigned int size)
 {
-       u64 local_status_error_len =
-               le64_to_cpu(rx_desc->wb.qword1.status_error_len);
-       unsigned int size =
-               (local_status_error_len & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
-               I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
-       struct i40e_rx_buffer *rx_buffer;
-       struct page *page;
-
-       rx_buffer = &rx_ring->rx_bi[rx_ring->next_to_clean];
-       page = rx_buffer->page;
-       prefetchw(page);
-
-       if (likely(!skb)) {
-               void *page_addr = page_address(page) + rx_buffer->page_offset;
+       void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
+#if (PAGE_SIZE < 8192)
+       unsigned int truesize = I40E_RXBUFFER_2048;
+#else
+       unsigned int truesize = SKB_DATA_ALIGN(size);
+#endif
+       unsigned int headlen;
+       struct sk_buff *skb;
 
-               /* prefetch first cache line of first page */
-               prefetch(page_addr);
+       /* prefetch first cache line of first page */
+       prefetch(va);
 #if L1_CACHE_BYTES < 128
-               prefetch(page_addr + L1_CACHE_BYTES);
+       prefetch(va + L1_CACHE_BYTES);
 #endif
 
-               /* allocate a skb to store the frags */
-               skb = __napi_alloc_skb(&rx_ring->q_vector->napi,
-                                      I40E_RX_HDR_SIZE,
-                                      GFP_ATOMIC | __GFP_NOWARN);
-               if (unlikely(!skb)) {
-                       rx_ring->rx_stats.alloc_buff_failed++;
-                       return NULL;
-               }
+       /* allocate a skb to store the frags */
+       skb = __napi_alloc_skb(&rx_ring->q_vector->napi,
+                              I40E_RX_HDR_SIZE,
+                              GFP_ATOMIC | __GFP_NOWARN);
+       if (unlikely(!skb))
+               return NULL;
 
-               /* we will be copying header into skb->data in
-                * pskb_may_pull so it is in our interest to prefetch
-                * it now to avoid a possible cache miss
-                */
-               prefetchw(skb->data);
+       /* Determine available headroom for copy */
+       headlen = size;
+       if (headlen > I40E_RX_HDR_SIZE)
+               headlen = eth_get_headlen(va, I40E_RX_HDR_SIZE);
+
+       /* align pull length to size of long to optimize memcpy performance */
+       memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long)));
+
+       /* update all of the pointers */
+       size -= headlen;
+       if (size) {
+               skb_add_rx_frag(skb, 0, rx_buffer->page,
+                               rx_buffer->page_offset + headlen,
+                               size, truesize);
+
+               /* buffer is used by skb, update page_offset */
+#if (PAGE_SIZE < 8192)
+               rx_buffer->page_offset ^= truesize;
+#else
+               rx_buffer->page_offset += truesize;
+#endif
+       } else {
+               /* buffer is unused, reset bias back to rx_buffer */
+               rx_buffer->pagecnt_bias++;
        }
 
-       /* we are reusing so sync this buffer for CPU use */
-       dma_sync_single_range_for_cpu(rx_ring->dev,
-                                     rx_buffer->dma,
-                                     rx_buffer->page_offset,
-                                     size,
-                                     DMA_FROM_DEVICE);
+       return skb;
+}
 
-       /* pull page into skb */
-       if (i40e_add_rx_frag(rx_ring, rx_buffer, size, skb)) {
+/**
+ * i40e_put_rx_buffer - Clean up used buffer and either recycle or free
+ * @rx_ring: rx descriptor ring to transact packets on
+ * @rx_buffer: rx buffer to pull data from
+ *
+ * This function will clean up the contents of the rx_buffer.  It will
+ * either recycle the bufer or unmap it and free the associated resources.
+ */
+static void i40e_put_rx_buffer(struct i40e_ring *rx_ring,
+                              struct i40e_rx_buffer *rx_buffer)
+{
+       if (i40e_can_reuse_rx_page(rx_buffer)) {
                /* hand second half of page back to the ring */
                i40e_reuse_rx_page(rx_ring, rx_buffer);
                rx_ring->rx_stats.page_reuse_count++;
        } else {
                /* we are not reusing the buffer so unmap it */
-               dma_unmap_page(rx_ring->dev, rx_buffer->dma, PAGE_SIZE,
-                              DMA_FROM_DEVICE);
+               dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, PAGE_SIZE,
+                                    DMA_FROM_DEVICE, I40E_RX_DMA_ATTR);
+               __page_frag_cache_drain(rx_buffer->page,
+                                       rx_buffer->pagecnt_bias);
        }
 
        /* clear contents of buffer_info */
        rx_buffer->page = NULL;
-
-       return skb;
 }
 
 /**
@@ -1753,7 +1892,9 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
        bool failure = false;
 
        while (likely(total_rx_packets < budget)) {
+               struct i40e_rx_buffer *rx_buffer;
                union i40e_rx_desc *rx_desc;
+               unsigned int size;
                u16 vlan_tag;
                u8 rx_ptype;
                u64 qword;
@@ -1770,22 +1911,36 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
                /* status_error_len will always be zero for unused descriptors
                 * because it's cleared in cleanup, and overlaps with hdr_addr
                 * which is always zero because packet split isn't used, if the
-                * hardware wrote DD then it will be non-zero
+                * hardware wrote DD then the length will be non-zero
                 */
-               if (!i40e_test_staterr(rx_desc,
-                                      BIT(I40E_RX_DESC_STATUS_DD_SHIFT)))
+               qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
+               size = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
+                      I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
+               if (!size)
                        break;
 
                /* This memory barrier is needed to keep us from reading
-                * any other fields out of the rx_desc until we know the
-                * DD bit is set.
+                * any other fields out of the rx_desc until we have
+                * verified the descriptor has been written back.
                 */
                dma_rmb();
 
-               skb = i40e_fetch_rx_buffer(rx_ring, rx_desc, skb);
-               if (!skb)
+               rx_buffer = i40e_get_rx_buffer(rx_ring, size);
+
+               /* retrieve a buffer from the ring */
+               if (skb)
+                       i40e_add_rx_frag(rx_ring, rx_buffer, skb, size);
+               else
+                       skb = i40e_construct_skb(rx_ring, rx_buffer, size);
+
+               /* exit if we failed to retrieve a buffer */
+               if (!skb) {
+                       rx_ring->rx_stats.alloc_buff_failed++;
+                       rx_buffer->pagecnt_bias++;
                        break;
+               }
 
+               i40e_put_rx_buffer(rx_ring, rx_buffer);
                cleaned_count++;
 
                if (i40e_is_non_eop(rx_ring, rx_desc, skb))
@@ -1798,6 +1953,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
                 */
                if (unlikely(i40e_test_staterr(rx_desc, BIT(I40E_RXD_QW1_ERROR_SHIFT)))) {
                        dev_kfree_skb_any(skb);
+                       skb = NULL;
                        continue;
                }
 
@@ -1816,15 +1972,6 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
                /* populate checksum, VLAN, and protocol */
                i40e_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype);
 
-#ifdef I40E_FCOE
-               if (unlikely(
-                   i40e_rx_is_fcoe(rx_ptype) &&
-                   !i40e_fcoe_handle_offload(rx_ring, rx_desc, skb))) {
-                       dev_kfree_skb_any(skb);
-                       continue;
-               }
-#endif
-
                vlan_tag = (qword & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)) ?
                           le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1) : 0;
 
@@ -2079,7 +2226,7 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
        if (!(pf->flags & I40E_FLAG_FD_ATR_ENABLED))
                return;
 
-       if ((pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED))
+       if ((pf->hw_disabled_flags & I40E_FLAG_FD_ATR_ENABLED))
                return;
 
        /* if sampling is disabled do nothing */
@@ -2113,10 +2260,10 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
        th = (struct tcphdr *)(hdr.network + hlen);
 
        /* Due to lack of space, no more new filters can be programmed */
-       if (th->syn && (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED))
+       if (th->syn && (pf->hw_disabled_flags & I40E_FLAG_FD_ATR_ENABLED))
                return;
        if ((pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) &&
-           (!(pf->auto_disable_flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE))) {
+           (!(pf->hw_disabled_flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE))) {
                /* HW ATR eviction will take care of removing filters on FIN
                 * and RST packets.
                 */
@@ -2179,7 +2326,7 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
                        I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
 
        if ((pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) &&
-           (!(pf->auto_disable_flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE)))
+           (!(pf->hw_disabled_flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE)))
                dtype_cmd |= I40E_TXD_FLTR_QW1_ATR_MASK;
 
        fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype);
@@ -2200,15 +2347,9 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
  * Returns error code indicate the frame should be dropped upon error and the
  * otherwise  returns 0 to indicate the flags has been set properly.
  **/
-#ifdef I40E_FCOE
-inline int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
-                                     struct i40e_ring *tx_ring,
-                                     u32 *flags)
-#else
 static inline int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
                                             struct i40e_ring *tx_ring,
                                             u32 *flags)
-#endif
 {
        __be16 protocol = skb->protocol;
        u32  tx_flags = 0;
@@ -2716,15 +2857,9 @@ bool __i40e_chk_linearize(struct sk_buff *skb)
  * @td_cmd:   the command field in the descriptor
  * @td_offset: offset for checksum or crc
  **/
-#ifdef I40E_FCOE
-inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
-                       struct i40e_tx_buffer *first, u32 tx_flags,
-                       const u8 hdr_len, u32 td_cmd, u32 td_offset)
-#else
 static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
                               struct i40e_tx_buffer *first, u32 tx_flags,
                               const u8 hdr_len, u32 td_cmd, u32 td_offset)
-#endif
 {
        unsigned int data_len = skb->data_len;
        unsigned int size = skb_headlen(skb);
index f80979025c0131a07e7b956826f17877b8b7081a..d6609deace5796f74876389b3ff824b66be298e6 100644 (file)
@@ -117,10 +117,8 @@ enum i40e_dyn_idx_t {
 
 /* Supported Rx Buffer Sizes (a multiple of 128) */
 #define I40E_RXBUFFER_256   256
+#define I40E_RXBUFFER_1536  1536  /* 128B aligned standard Ethernet frame */
 #define I40E_RXBUFFER_2048  2048
-#define I40E_RXBUFFER_3072  3072   /* For FCoE MTU of 2158 */
-#define I40E_RXBUFFER_4096  4096
-#define I40E_RXBUFFER_8192  8192
 #define I40E_MAX_RXBUFFER   9728  /* largest size for single descriptor */
 
 /* NOTE: netdev_alloc_skb reserves up to 64 bytes, NET_IP_ALIGN means we
@@ -133,6 +131,9 @@ enum i40e_dyn_idx_t {
 #define I40E_RX_HDR_SIZE I40E_RXBUFFER_256
 #define i40e_rx_desc i40e_32byte_rx_desc
 
+#define I40E_RX_DMA_ATTR \
+       (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
+
 /**
  * i40e_test_staterr - tests bits in Rx descriptor status and error fields
  * @rx_desc: pointer to receive descriptor (in le64 format)
@@ -255,7 +256,12 @@ struct i40e_tx_buffer {
 struct i40e_rx_buffer {
        dma_addr_t dma;
        struct page *page;
-       unsigned int page_offset;
+#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536)
+       __u32 page_offset;
+#else
+       __u16 page_offset;
+#endif
+       __u16 pagecnt_bias;
 };
 
 struct i40e_queue_stats {
@@ -393,13 +399,6 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring);
 void i40e_free_tx_resources(struct i40e_ring *tx_ring);
 void i40e_free_rx_resources(struct i40e_ring *rx_ring);
 int i40e_napi_poll(struct napi_struct *napi, int budget);
-#ifdef I40E_FCOE
-void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
-                struct i40e_tx_buffer *first, u32 tx_flags,
-                const u8 hdr_len, u32 td_cmd, u32 td_offset);
-int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
-                              struct i40e_ring *tx_ring, u32 *flags);
-#endif
 void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector);
 u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw);
 int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
@@ -482,16 +481,6 @@ static inline bool i40e_chk_linearize(struct sk_buff *skb, int count)
        return count != I40E_MAX_BUFFER_TXD;
 }
 
-/**
- * i40e_rx_is_fcoe - returns true if the Rx packet type is FCoE
- * @ptype: the packet type field from Rx descriptor write-back
- **/
-static inline bool i40e_rx_is_fcoe(u16 ptype)
-{
-       return (ptype >= I40E_RX_PTYPE_L2_FCOE_PAY3) &&
-              (ptype <= I40E_RX_PTYPE_L2_FCOE_VFT_FCOTHER);
-}
-
 /**
  * txring_txq - Find the netdev Tx ring based on the i40e Tx ring
  * @ring: Tx ring to find the netdev equivalent of
index 939f9fdc8f8573fa35554d6917e1722348d2da29..9200f2d9c752b18d3525b3b8174dac67778e3819 100644 (file)
@@ -1213,25 +1213,6 @@ struct i40e_veb_tc_stats {
        u64 tc_tx_bytes[I40E_MAX_TRAFFIC_CLASS];
 };
 
-#ifdef I40E_FCOE
-/* Statistics collected per function for FCoE */
-struct i40e_fcoe_stats {
-       u64 rx_fcoe_packets;            /* fcoeprc */
-       u64 rx_fcoe_dwords;             /* focedwrc */
-       u64 rx_fcoe_dropped;            /* fcoerpdc */
-       u64 tx_fcoe_packets;            /* fcoeptc */
-       u64 tx_fcoe_dwords;             /* focedwtc */
-       u64 fcoe_bad_fccrc;             /* fcoecrc */
-       u64 fcoe_last_error;            /* fcoelast */
-       u64 fcoe_ddp_count;             /* fcoeddpc */
-};
-
-/* offset to per function FCoE statistics block */
-#define I40E_FCOE_VF_STAT_OFFSET       0
-#define I40E_FCOE_PF_STAT_OFFSET       128
-#define I40E_FCOE_STAT_MAX             (I40E_FCOE_PF_STAT_OFFSET + I40E_MAX_PF)
-
-#endif
 /* Statistics collected by the MAC */
 struct i40e_hw_port_stats {
        /* eth stats collected by the port */
@@ -1319,125 +1300,6 @@ struct i40e_hw_port_stats {
 
 #define I40E_SRRD_SRCTL_ATTEMPTS       100000
 
-#ifdef I40E_FCOE
-/* FCoE Tx context descriptor - Use the i40e_tx_context_desc struct */
-
-enum i40E_fcoe_tx_ctx_desc_cmd_bits {
-       I40E_FCOE_TX_CTX_DESC_OPCODE_SINGLE_SEND        = 0x00, /* 4 BITS */
-       I40E_FCOE_TX_CTX_DESC_OPCODE_TSO_FC_CLASS2      = 0x01, /* 4 BITS */
-       I40E_FCOE_TX_CTX_DESC_OPCODE_TSO_FC_CLASS3      = 0x05, /* 4 BITS */
-       I40E_FCOE_TX_CTX_DESC_OPCODE_ETSO_FC_CLASS2     = 0x02, /* 4 BITS */
-       I40E_FCOE_TX_CTX_DESC_OPCODE_ETSO_FC_CLASS3     = 0x06, /* 4 BITS */
-       I40E_FCOE_TX_CTX_DESC_OPCODE_DWO_FC_CLASS2      = 0x03, /* 4 BITS */
-       I40E_FCOE_TX_CTX_DESC_OPCODE_DWO_FC_CLASS3      = 0x07, /* 4 BITS */
-       I40E_FCOE_TX_CTX_DESC_OPCODE_DDP_CTX_INVL       = 0x08, /* 4 BITS */
-       I40E_FCOE_TX_CTX_DESC_OPCODE_DWO_CTX_INVL       = 0x09, /* 4 BITS */
-       I40E_FCOE_TX_CTX_DESC_RELOFF                    = 0x10,
-       I40E_FCOE_TX_CTX_DESC_CLRSEQ                    = 0x20,
-       I40E_FCOE_TX_CTX_DESC_DIFENA                    = 0x40,
-       I40E_FCOE_TX_CTX_DESC_IL2TAG2                   = 0x80
-};
-
-/* FCoE DDP Context descriptor */
-struct i40e_fcoe_ddp_context_desc {
-       __le64 rsvd;
-       __le64 type_cmd_foff_lsize;
-};
-
-#define I40E_FCOE_DDP_CTX_QW1_DTYPE_SHIFT      0
-#define I40E_FCOE_DDP_CTX_QW1_DTYPE_MASK       (0xFULL << \
-                                       I40E_FCOE_DDP_CTX_QW1_DTYPE_SHIFT)
-
-#define I40E_FCOE_DDP_CTX_QW1_CMD_SHIFT        4
-#define I40E_FCOE_DDP_CTX_QW1_CMD_MASK (0xFULL << \
-                                        I40E_FCOE_DDP_CTX_QW1_CMD_SHIFT)
-
-enum i40e_fcoe_ddp_ctx_desc_cmd_bits {
-       I40E_FCOE_DDP_CTX_DESC_BSIZE_512B       = 0x00, /* 2 BITS */
-       I40E_FCOE_DDP_CTX_DESC_BSIZE_4K         = 0x01, /* 2 BITS */
-       I40E_FCOE_DDP_CTX_DESC_BSIZE_8K         = 0x02, /* 2 BITS */
-       I40E_FCOE_DDP_CTX_DESC_BSIZE_16K        = 0x03, /* 2 BITS */
-       I40E_FCOE_DDP_CTX_DESC_DIFENA           = 0x04, /* 1 BIT  */
-       I40E_FCOE_DDP_CTX_DESC_LASTSEQH         = 0x08, /* 1 BIT  */
-};
-
-#define I40E_FCOE_DDP_CTX_QW1_FOFF_SHIFT       16
-#define I40E_FCOE_DDP_CTX_QW1_FOFF_MASK        (0x3FFFULL << \
-                                        I40E_FCOE_DDP_CTX_QW1_FOFF_SHIFT)
-
-#define I40E_FCOE_DDP_CTX_QW1_LSIZE_SHIFT      32
-#define I40E_FCOE_DDP_CTX_QW1_LSIZE_MASK       (0x3FFFULL << \
-                                       I40E_FCOE_DDP_CTX_QW1_LSIZE_SHIFT)
-
-/* FCoE DDP/DWO Queue Context descriptor */
-struct i40e_fcoe_queue_context_desc {
-       __le64 dmaindx_fbase;           /* 0:11 DMAINDX, 12:63 FBASE */
-       __le64 flen_tph;                /* 0:12 FLEN, 13:15 TPH */
-};
-
-#define I40E_FCOE_QUEUE_CTX_QW0_DMAINDX_SHIFT  0
-#define I40E_FCOE_QUEUE_CTX_QW0_DMAINDX_MASK   (0xFFFULL << \
-                                       I40E_FCOE_QUEUE_CTX_QW0_DMAINDX_SHIFT)
-
-#define I40E_FCOE_QUEUE_CTX_QW0_FBASE_SHIFT    12
-#define I40E_FCOE_QUEUE_CTX_QW0_FBASE_MASK     (0xFFFFFFFFFFFFFULL << \
-                                       I40E_FCOE_QUEUE_CTX_QW0_FBASE_SHIFT)
-
-#define I40E_FCOE_QUEUE_CTX_QW1_FLEN_SHIFT     0
-#define I40E_FCOE_QUEUE_CTX_QW1_FLEN_MASK      (0x1FFFULL << \
-                                       I40E_FCOE_QUEUE_CTX_QW1_FLEN_SHIFT)
-
-#define I40E_FCOE_QUEUE_CTX_QW1_TPH_SHIFT      13
-#define I40E_FCOE_QUEUE_CTX_QW1_TPH_MASK       (0x7ULL << \
-                                       I40E_FCOE_QUEUE_CTX_QW1_FLEN_SHIFT)
-
-enum i40e_fcoe_queue_ctx_desc_tph_bits {
-       I40E_FCOE_QUEUE_CTX_DESC_TPHRDESC       = 0x1,
-       I40E_FCOE_QUEUE_CTX_DESC_TPHDATA        = 0x2
-};
-
-#define I40E_FCOE_QUEUE_CTX_QW1_RECIPE_SHIFT   30
-#define I40E_FCOE_QUEUE_CTX_QW1_RECIPE_MASK    (0x3ULL << \
-                                       I40E_FCOE_QUEUE_CTX_QW1_RECIPE_SHIFT)
-
-/* FCoE DDP/DWO Filter Context descriptor */
-struct i40e_fcoe_filter_context_desc {
-       __le32 param;
-       __le16 seqn;
-
-       /* 48:51(0:3) RSVD, 52:63(4:15) DMAINDX */
-       __le16 rsvd_dmaindx;
-
-       /* 0:7 FLAGS, 8:52 RSVD, 53:63 LANQ */
-       __le64 flags_rsvd_lanq;
-};
-
-#define I40E_FCOE_FILTER_CTX_QW0_DMAINDX_SHIFT 4
-#define I40E_FCOE_FILTER_CTX_QW0_DMAINDX_MASK  (0xFFF << \
-                                       I40E_FCOE_FILTER_CTX_QW0_DMAINDX_SHIFT)
-
-enum i40e_fcoe_filter_ctx_desc_flags_bits {
-       I40E_FCOE_FILTER_CTX_DESC_CTYP_DDP      = 0x00,
-       I40E_FCOE_FILTER_CTX_DESC_CTYP_DWO      = 0x01,
-       I40E_FCOE_FILTER_CTX_DESC_ENODE_INIT    = 0x00,
-       I40E_FCOE_FILTER_CTX_DESC_ENODE_RSP     = 0x02,
-       I40E_FCOE_FILTER_CTX_DESC_FC_CLASS2     = 0x00,
-       I40E_FCOE_FILTER_CTX_DESC_FC_CLASS3     = 0x04
-};
-
-#define I40E_FCOE_FILTER_CTX_QW1_FLAGS_SHIFT   0
-#define I40E_FCOE_FILTER_CTX_QW1_FLAGS_MASK    (0xFFULL << \
-                                       I40E_FCOE_FILTER_CTX_QW1_FLAGS_SHIFT)
-
-#define I40E_FCOE_FILTER_CTX_QW1_PCTYPE_SHIFT     8
-#define I40E_FCOE_FILTER_CTX_QW1_PCTYPE_MASK      (0x3FULL << \
-                       I40E_FCOE_FILTER_CTX_QW1_PCTYPE_SHIFT)
-
-#define I40E_FCOE_FILTER_CTX_QW1_LANQINDX_SHIFT     53
-#define I40E_FCOE_FILTER_CTX_QW1_LANQINDX_MASK      (0x7FFULL << \
-                       I40E_FCOE_FILTER_CTX_QW1_LANQINDX_SHIFT)
-
-#endif /* I40E_FCOE */
 enum i40e_switch_element_types {
        I40E_SWITCH_ELEMENT_TYPE_MAC    = 1,
        I40E_SWITCH_ELEMENT_TYPE_PF     = 2,
index 78460c52b7c445112cd777385f0e81058f26e918..d526940ff951c8d036c5e8021e3adee95e2921b0 100644 (file)
@@ -702,10 +702,8 @@ static int i40e_alloc_vsi_res(struct i40e_vf *vf, enum i40e_vsi_type type)
                        dev_info(&pf->pdev->dev,
                                 "Could not allocate VF broadcast filter\n");
                spin_unlock_bh(&vsi->mac_filter_hash_lock);
-               i40e_write_rx_ctl(&pf->hw, I40E_VFQF_HENA1(0, vf->vf_id),
-                                 (u32)hena);
-               i40e_write_rx_ctl(&pf->hw, I40E_VFQF_HENA1(1, vf->vf_id),
-                                 (u32)(hena >> 32));
+               wr32(&pf->hw, I40E_VFQF_HENA1(0, vf->vf_id), (u32)hena);
+               wr32(&pf->hw, I40E_VFQF_HENA1(1, vf->vf_id), (u32)(hena >> 32));
        }
 
        /* program mac filter */
@@ -811,6 +809,11 @@ static void i40e_free_vf_res(struct i40e_vf *vf)
        u32 reg_idx, reg;
        int i, msix_vf;
 
+       /* Start by disabling VF's configuration API to prevent the OS from
+        * accessing the VF's VSI after it's freed / invalidated.
+        */
+       clear_bit(I40E_VF_STAT_INIT, &vf->vf_states);
+
        /* free vsi & disconnect it from the parent uplink */
        if (vf->lan_vsi_idx) {
                i40e_vsi_release(pf->vsi[vf->lan_vsi_idx]);
@@ -850,7 +853,6 @@ static void i40e_free_vf_res(struct i40e_vf *vf)
        /* reset some of the state variables keeping track of the resources */
        vf->num_queue_pairs = 0;
        vf->vf_states = 0;
-       clear_bit(I40E_VF_STAT_INIT, &vf->vf_states);
 }
 
 /**
@@ -941,6 +943,14 @@ void i40e_reset_vf(struct i40e_vf *vf, bool flr)
        /* warn the VF */
        clear_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states);
 
+       /* Disable VF's configuration API during reset. The flag is re-enabled
+        * in i40e_alloc_vf_res(), when it's safe again to access VF's VSI.
+        * It's normally disabled in i40e_free_vf_res(), but it's safer
+        * to do it earlier to give some time to finish to any VF config
+        * functions that may still be running at this point.
+        */
+       clear_bit(I40E_VF_STAT_INIT, &vf->vf_states);
+
        /* In the case of a VFLR, the HW has already reset the VF and we
         * just need to clean up, so don't hit the VFRTRIG register.
         */
@@ -984,11 +994,6 @@ void i40e_reset_vf(struct i40e_vf *vf, bool flr)
        if (!rsd)
                dev_err(&pf->pdev->dev, "VF reset check timeout on VF %d\n",
                        vf->vf_id);
-       wr32(hw, I40E_VFGEN_RSTAT1(vf->vf_id), I40E_VFR_COMPLETED);
-       /* clear the reset bit in the VPGEN_VFRTRIG reg */
-       reg = rd32(hw, I40E_VPGEN_VFRTRIG(vf->vf_id));
-       reg &= ~I40E_VPGEN_VFRTRIG_VFSWR_MASK;
-       wr32(hw, I40E_VPGEN_VFRTRIG(vf->vf_id), reg);
 
        /* On initial reset, we won't have any queues */
        if (vf->lan_vsi_idx == 0)
@@ -996,8 +1001,24 @@ void i40e_reset_vf(struct i40e_vf *vf, bool flr)
 
        i40e_vsi_stop_rings(pf->vsi[vf->lan_vsi_idx]);
 complete_reset:
-       /* reallocate VF resources to reset the VSI state */
+       /* free VF resources to begin resetting the VSI state */
        i40e_free_vf_res(vf);
+
+       /* Enable hardware by clearing the reset bit in the VPGEN_VFRTRIG reg.
+        * By doing this we allow HW to access VF memory at any point. If we
+        * did it any sooner, HW could access memory while it was being freed
+        * in i40e_free_vf_res(), causing an IOMMU fault.
+        *
+        * On the other hand, this needs to be done ASAP, because the VF driver
+        * is waiting for this to happen and may report a timeout. It's
+        * harmless, but it gets logged into Guest OS kernel log, so best avoid
+        * it.
+        */
+       reg = rd32(hw, I40E_VPGEN_VFRTRIG(vf->vf_id));
+       reg &= ~I40E_VPGEN_VFRTRIG_VFSWR_MASK;
+       wr32(hw, I40E_VPGEN_VFRTRIG(vf->vf_id), reg);
+
+       /* reallocate VF resources to finish resetting the VSI state */
        if (!i40e_alloc_vf_res(vf)) {
                int abs_vf_id = vf->vf_id + hw->func_caps.vf_base_id;
                i40e_enable_vf_mappings(vf);
@@ -1008,7 +1029,11 @@ complete_reset:
                        i40e_notify_client_of_vf_reset(pf, abs_vf_id);
                vf->num_vlan = 0;
        }
-       /* tell the VF the reset is done */
+
+       /* Tell the VF driver the reset is done. This needs to be done only
+        * after VF has been fully initialized, because the VF driver may
+        * request resources immediately after setting this flag.
+        */
        wr32(hw, I40E_VFGEN_RSTAT1(vf->vf_id), I40E_VFR_VFACTIVE);
 
        i40e_flush(hw);
@@ -1359,7 +1384,7 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg)
        if (!vsi->info.pvid)
                vfres->vf_offload_flags |= I40E_VIRTCHNL_VF_OFFLOAD_VLAN;
 
-       if (i40e_vf_client_capable(pf, vf->vf_id, I40E_CLIENT_IWARP) &&
+       if (i40e_vf_client_capable(pf, vf->vf_id) &&
            (vf->driver_caps & I40E_VIRTCHNL_VF_OFFLOAD_IWARP)) {
                vfres->vf_offload_flags |= I40E_VIRTCHNL_VF_OFFLOAD_IWARP;
                set_bit(I40E_VF_STAT_IWARPENA, &vf->vf_states);
@@ -1853,7 +1878,7 @@ error_param:
 }
 
 /* If the VF is not trusted restrict the number of MAC/VLAN it can program */
-#define I40E_VC_MAX_MAC_ADDR_PER_VF 8
+#define I40E_VC_MAX_MAC_ADDR_PER_VF 12
 #define I40E_VC_MAX_VLAN_PER_VF 8
 
 /**
index 4012d069939ab3211cd9f668a3de89dbe4561a07..37af437daa5daa499b16f3f5c469b023fb3d18de 100644 (file)
@@ -87,7 +87,6 @@ struct i40e_vf {
        u16 stag;
 
        struct i40e_virtchnl_ether_addr default_lan_addr;
-       struct i40e_virtchnl_ether_addr default_fcoe_addr;
        u16 port_vlan_id;
        bool pf_set_mac;        /* The VMM admin set the VF MAC address */
        bool trusted;
index 3a423836a565294aa82dadbeb93e4d45619ccd86..827c7a6ed0bafc7dc2d8300ed46701053d78eb34 100644 (file)
@@ -32,5 +32,5 @@
 obj-$(CONFIG_I40EVF) += i40evf.o
 
 i40evf-objs := i40evf_main.o i40evf_ethtool.o i40evf_virtchnl.o \
-               i40e_txrx.o i40e_common.o i40e_adminq.o
+               i40e_txrx.o i40e_common.o i40e_adminq.o i40evf_client.o
 
index eeb9864bc5b152a90508af879cd5f32b43c3bead..c28cb8f27243f36c8bd240f88bd24897288eea2c 100644 (file)
@@ -132,6 +132,10 @@ enum i40e_admin_queue_opc {
        i40e_aqc_opc_list_func_capabilities     = 0x000A,
        i40e_aqc_opc_list_dev_capabilities      = 0x000B,
 
+       /* Proxy commands */
+       i40e_aqc_opc_set_proxy_config           = 0x0104,
+       i40e_aqc_opc_set_ns_proxy_table_entry   = 0x0105,
+
        /* LAA */
        i40e_aqc_opc_mac_address_read   = 0x0107,
        i40e_aqc_opc_mac_address_write  = 0x0108,
@@ -139,6 +143,10 @@ enum i40e_admin_queue_opc {
        /* PXE */
        i40e_aqc_opc_clear_pxe_mode     = 0x0110,
 
+       /* WoL commands */
+       i40e_aqc_opc_set_wol_filter     = 0x0120,
+       i40e_aqc_opc_get_wake_reason    = 0x0121,
+
        /* internal switch commands */
        i40e_aqc_opc_get_switch_config          = 0x0200,
        i40e_aqc_opc_add_statistics             = 0x0201,
@@ -177,6 +185,7 @@ enum i40e_admin_queue_opc {
        i40e_aqc_opc_remove_control_packet_filter       = 0x025B,
        i40e_aqc_opc_add_cloud_filters          = 0x025C,
        i40e_aqc_opc_remove_cloud_filters       = 0x025D,
+       i40e_aqc_opc_clear_wol_switch_filters   = 0x025E,
 
        i40e_aqc_opc_add_mirror_rule    = 0x0260,
        i40e_aqc_opc_delete_mirror_rule = 0x0261,
@@ -558,6 +567,56 @@ struct i40e_aqc_clear_pxe {
 
 I40E_CHECK_CMD_LENGTH(i40e_aqc_clear_pxe);
 
+/* Set WoL Filter (0x0120) */
+
+struct i40e_aqc_set_wol_filter {
+       __le16 filter_index;
+#define I40E_AQC_MAX_NUM_WOL_FILTERS   8
+#define I40E_AQC_SET_WOL_FILTER_TYPE_MAGIC_SHIFT       15
+#define I40E_AQC_SET_WOL_FILTER_TYPE_MAGIC_MASK        (0x1 << \
+               I40E_AQC_SET_WOL_FILTER_TYPE_MAGIC_SHIFT)
+
+#define I40E_AQC_SET_WOL_FILTER_INDEX_SHIFT            0
+#define I40E_AQC_SET_WOL_FILTER_INDEX_MASK     (0x7 << \
+               I40E_AQC_SET_WOL_FILTER_INDEX_SHIFT)
+       __le16 cmd_flags;
+#define I40E_AQC_SET_WOL_FILTER                                0x8000
+#define I40E_AQC_SET_WOL_FILTER_NO_TCO_WOL             0x4000
+#define I40E_AQC_SET_WOL_FILTER_ACTION_CLEAR           0
+#define I40E_AQC_SET_WOL_FILTER_ACTION_SET             1
+       __le16 valid_flags;
+#define I40E_AQC_SET_WOL_FILTER_ACTION_VALID           0x8000
+#define I40E_AQC_SET_WOL_FILTER_NO_TCO_ACTION_VALID    0x4000
+       u8 reserved[2];
+       __le32  address_high;
+       __le32  address_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_set_wol_filter);
+
+struct i40e_aqc_set_wol_filter_data {
+       u8 filter[128];
+       u8 mask[16];
+};
+
+I40E_CHECK_STRUCT_LEN(0x90, i40e_aqc_set_wol_filter_data);
+
+/* Get Wake Reason (0x0121) */
+
+struct i40e_aqc_get_wake_reason_completion {
+       u8 reserved_1[2];
+       __le16 wake_reason;
+#define I40E_AQC_GET_WAKE_UP_REASON_WOL_REASON_MATCHED_INDEX_SHIFT     0
+#define I40E_AQC_GET_WAKE_UP_REASON_WOL_REASON_MATCHED_INDEX_MASK (0xFF << \
+               I40E_AQC_GET_WAKE_UP_REASON_WOL_REASON_MATCHED_INDEX_SHIFT)
+#define I40E_AQC_GET_WAKE_UP_REASON_WOL_REASON_RESERVED_SHIFT  8
+#define I40E_AQC_GET_WAKE_UP_REASON_WOL_REASON_RESERVED_MASK   (0xFF << \
+               I40E_AQC_GET_WAKE_UP_REASON_WOL_REASON_RESERVED_SHIFT)
+       u8 reserved_2[12];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_get_wake_reason_completion);
+
 /* Switch configuration commands (0x02xx) */
 
 /* Used by many indirect commands that only pass an seid and a buffer in the
@@ -640,6 +699,8 @@ struct i40e_aqc_set_port_parameters {
 #define I40E_AQ_SET_P_PARAMS_PAD_SHORT_PACKETS 2 /* must set! */
 #define I40E_AQ_SET_P_PARAMS_DOUBLE_VLAN_ENA   4
        __le16  bad_frame_vsi;
+#define I40E_AQ_SET_P_PARAMS_BFRAME_SEID_SHIFT 0x0
+#define I40E_AQ_SET_P_PARAMS_BFRAME_SEID_MASK  0x3FF
        __le16  default_seid;        /* reserved for command */
        u8      reserved[10];
 };
@@ -691,6 +752,7 @@ I40E_CHECK_STRUCT_LEN(0x10, i40e_aqc_switch_resource_alloc_element_resp);
 /* Set Switch Configuration (direct 0x0205) */
 struct i40e_aqc_set_switch_config {
        __le16  flags;
+/* flags used for both fields below */
 #define I40E_AQ_SET_SWITCH_CFG_PROMISC         0x0001
 #define I40E_AQ_SET_SWITCH_CFG_L2_FILTER       0x0002
        __le16  valid_flags;
@@ -1839,11 +1901,12 @@ struct i40e_aqc_get_link_status {
 #define I40E_AQ_CONFIG_FEC_RS_ENA      0x02
 #define I40E_AQ_CONFIG_CRC_ENA         0x04
 #define I40E_AQ_CONFIG_PACING_MASK     0x78
-       u8      external_power_ability;
+       u8      power_desc;
 #define I40E_AQ_LINK_POWER_CLASS_1     0x00
 #define I40E_AQ_LINK_POWER_CLASS_2     0x01
 #define I40E_AQ_LINK_POWER_CLASS_3     0x02
 #define I40E_AQ_LINK_POWER_CLASS_4     0x03
+#define I40E_AQ_PWR_CLASS_MASK         0x03
        u8      reserved[4];
 };
 
index 89dfdbca13db794afe5ac0cbecee9c5ee4720626..626fbf1ead4d3acca9935427073819e916d2ff2e 100644 (file)
@@ -958,7 +958,9 @@ u32 i40evf_read_rx_ctl(struct i40e_hw *hw, u32 reg_addr)
        int retry = 5;
        u32 val = 0;
 
-       use_register = (hw->aq.api_maj_ver == 1) && (hw->aq.api_min_ver < 5);
+       use_register = (((hw->aq.api_maj_ver == 1) &&
+                       (hw->aq.api_min_ver < 5)) ||
+                       (hw->mac.type == I40E_MAC_X722));
        if (!use_register) {
 do_retry:
                status = i40evf_aq_rx_ctl_read_register(hw, reg_addr,
@@ -1019,7 +1021,9 @@ void i40evf_write_rx_ctl(struct i40e_hw *hw, u32 reg_addr, u32 reg_val)
        bool use_register;
        int retry = 5;
 
-       use_register = (hw->aq.api_maj_ver == 1) && (hw->aq.api_min_ver < 5);
+       use_register = (((hw->aq.api_maj_ver == 1) &&
+                       (hw->aq.api_min_ver < 5)) ||
+                       (hw->mac.type == I40E_MAC_X722));
        if (!use_register) {
 do_retry:
                status = i40evf_aq_rx_ctl_write_register(hw, reg_addr,
index c91fcf43ccbc5eb7bfe95a9a3eec9c559519fcd0..95e383af41c4e565272d19feae0b686ce830da44 100644 (file)
@@ -137,10 +137,7 @@ u32 i40evf_get_tx_pending(struct i40e_ring *ring, bool in_sw)
 {
        u32 head, tail;
 
-       if (!in_sw)
-               head = i40e_get_head(ring);
-       else
-               head = ring->next_to_clean;
+       head = ring->next_to_clean;
        tail = readl(ring->tail);
 
        if (head != tail)
@@ -165,7 +162,6 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
 {
        u16 i = tx_ring->next_to_clean;
        struct i40e_tx_buffer *tx_buf;
-       struct i40e_tx_desc *tx_head;
        struct i40e_tx_desc *tx_desc;
        unsigned int total_bytes = 0, total_packets = 0;
        unsigned int budget = vsi->work_limit;
@@ -174,8 +170,6 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
        tx_desc = I40E_TX_DESC(tx_ring, i);
        i -= tx_ring->count;
 
-       tx_head = I40E_TX_DESC(tx_ring, i40e_get_head(tx_ring));
-
        do {
                struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch;
 
@@ -186,8 +180,9 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
                /* prevent any other reads prior to eop_desc */
                read_barrier_depends();
 
-               /* we have caught up to head, no work left to do */
-               if (tx_head == tx_desc)
+               /* if the descriptor isn't done, no work yet to do */
+               if (!(eop_desc->cmd_type_offset_bsz &
+                     cpu_to_le64(I40E_TX_DESC_DTYPE_DESC_DONE)))
                        break;
 
                /* clear next_to_watch to prevent false hangs */
@@ -464,10 +459,6 @@ int i40evf_setup_tx_descriptors(struct i40e_ring *tx_ring)
 
        /* round up to nearest 4K */
        tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc);
-       /* add u32 for head writeback, align after this takes care of
-        * guaranteeing this is at least one cache line in size
-        */
-       tx_ring->size += sizeof(u32);
        tx_ring->size = ALIGN(tx_ring->size, 4096);
        tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
                                           &tx_ring->dma, GFP_KERNEL);
@@ -493,7 +484,6 @@ err:
  **/
 void i40evf_clean_rx_ring(struct i40e_ring *rx_ring)
 {
-       struct device *dev = rx_ring->dev;
        unsigned long bi_size;
        u16 i;
 
@@ -513,8 +503,21 @@ void i40evf_clean_rx_ring(struct i40e_ring *rx_ring)
                if (!rx_bi->page)
                        continue;
 
-               dma_unmap_page(dev, rx_bi->dma, PAGE_SIZE, DMA_FROM_DEVICE);
-               __free_pages(rx_bi->page, 0);
+               /* Invalidate cache lines that may have been written to by
+                * device so that we avoid corrupting memory.
+                */
+               dma_sync_single_range_for_cpu(rx_ring->dev,
+                                             rx_bi->dma,
+                                             rx_bi->page_offset,
+                                             I40E_RXBUFFER_2048,
+                                             DMA_FROM_DEVICE);
+
+               /* free resources associated with mapping */
+               dma_unmap_page_attrs(rx_ring->dev, rx_bi->dma,
+                                    PAGE_SIZE,
+                                    DMA_FROM_DEVICE,
+                                    I40E_RX_DMA_ATTR);
+               __page_frag_cache_drain(rx_bi->page, rx_bi->pagecnt_bias);
 
                rx_bi->page = NULL;
                rx_bi->page_offset = 0;
@@ -642,7 +645,10 @@ static bool i40e_alloc_mapped_page(struct i40e_ring *rx_ring,
        }
 
        /* map page for use */
-       dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE);
+       dma = dma_map_page_attrs(rx_ring->dev, page, 0,
+                                PAGE_SIZE,
+                                DMA_FROM_DEVICE,
+                                I40E_RX_DMA_ATTR);
 
        /* if mapping failed free memory back to system since
         * there isn't much point in holding memory we can't use
@@ -657,6 +663,9 @@ static bool i40e_alloc_mapped_page(struct i40e_ring *rx_ring,
        bi->page = page;
        bi->page_offset = 0;
 
+       /* initialize pagecnt_bias to 1 representing we fully own page */
+       bi->pagecnt_bias = 1;
+
        return true;
 }
 
@@ -702,6 +711,12 @@ bool i40evf_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count)
                if (!i40e_alloc_mapped_page(rx_ring, bi))
                        goto no_buffers;
 
+               /* sync the buffer for use by the device */
+               dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
+                                                bi->page_offset,
+                                                I40E_RXBUFFER_2048,
+                                                DMA_FROM_DEVICE);
+
                /* Refresh the desc even if buffer_addrs didn't change
                 * because each write-back erases this info.
                 */
@@ -742,8 +757,6 @@ no_buffers:
  * @vsi: the VSI we care about
  * @skb: skb currently being received and modified
  * @rx_desc: the receive descriptor
- *
- * skb->protocol must be set before this function is called
  **/
 static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
                                    struct sk_buff *skb,
@@ -895,12 +908,12 @@ void i40evf_process_skb_fields(struct i40e_ring *rx_ring,
 {
        i40e_rx_hash(rx_ring, rx_desc, skb, rx_ptype);
 
-       /* modifies the skb - consumes the enet header */
-       skb->protocol = eth_type_trans(skb, rx_ring->netdev);
-
        i40e_rx_checksum(rx_ring->vsi, skb, rx_desc);
 
        skb_record_rx_queue(skb, rx_ring->queue_index);
+
+       /* modifies the skb - consumes the enet header */
+       skb->protocol = eth_type_trans(skb, rx_ring->netdev);
 }
 
 /**
@@ -945,7 +958,10 @@ static void i40e_reuse_rx_page(struct i40e_ring *rx_ring,
        rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
 
        /* transfer page from old buffer to new buffer */
-       *new_buff = *old_buff;
+       new_buff->dma           = old_buff->dma;
+       new_buff->page          = old_buff->page;
+       new_buff->page_offset   = old_buff->page_offset;
+       new_buff->pagecnt_bias  = old_buff->pagecnt_bias;
 }
 
 /**
@@ -966,8 +982,6 @@ static inline bool i40e_page_is_reusable(struct page *page)
  * the adapter for another receive
  *
  * @rx_buffer: buffer containing the page
- * @page: page address from rx_buffer
- * @truesize: actual size of the buffer in this page
  *
  * If page is reusable, rx_buffer->page_offset is adjusted to point to
  * an unused region in the page.
@@ -990,13 +1004,13 @@ static inline bool i40e_page_is_reusable(struct page *page)
  *
  * In either case, if the page is reusable its refcount is increased.
  **/
-static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer,
-                                  struct page *page,
-                                  const unsigned int truesize)
+static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer)
 {
 #if (PAGE_SIZE >= 8192)
        unsigned int last_offset = PAGE_SIZE - I40E_RXBUFFER_2048;
 #endif
+       unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
+       struct page *page = rx_buffer->page;
 
        /* Is any reuse possible? */
        if (unlikely(!i40e_page_is_reusable(page)))
@@ -1004,21 +1018,21 @@ static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer,
 
 #if (PAGE_SIZE < 8192)
        /* if we are only owner of page we can reuse it */
-       if (unlikely(page_count(page) != 1))
+       if (unlikely((page_count(page) - pagecnt_bias) > 1))
                return false;
-
-       /* flip page offset to other buffer */
-       rx_buffer->page_offset ^= truesize;
 #else
-       /* move offset up to the next cache line */
-       rx_buffer->page_offset += truesize;
-
        if (rx_buffer->page_offset > last_offset)
                return false;
 #endif
 
-       /* Inc ref count on page before passing it up to the stack */
-       get_page(page);
+       /* If we have drained the page fragment pool we need to update
+        * the pagecnt_bias and page count so that we fully restock the
+        * number of references the driver holds.
+        */
+       if (unlikely(!pagecnt_bias)) {
+               page_ref_add(page, USHRT_MAX);
+               rx_buffer->pagecnt_bias = USHRT_MAX;
+       }
 
        return true;
 }
@@ -1027,145 +1041,155 @@ static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer,
  * i40e_add_rx_frag - Add contents of Rx buffer to sk_buff
  * @rx_ring: rx descriptor ring to transact packets on
  * @rx_buffer: buffer containing page to add
- * @size: packet length from rx_desc
  * @skb: sk_buff to place the data into
+ * @size: packet length from rx_desc
  *
  * This function will add the data contained in rx_buffer->page to the skb.
- * This is done either through a direct copy if the data in the buffer is
- * less than the skb header size, otherwise it will just attach the page as
- * a frag to the skb.
+ * It will just attach the page as a frag to the skb.
  *
- * The function will then update the page offset if necessary and return
- * true if the buffer can be reused by the adapter.
+ * The function will then update the page offset.
  **/
-static bool i40e_add_rx_frag(struct i40e_ring *rx_ring,
+static void i40e_add_rx_frag(struct i40e_ring *rx_ring,
                             struct i40e_rx_buffer *rx_buffer,
-                            unsigned int size,
-                            struct sk_buff *skb)
+                            struct sk_buff *skb,
+                            unsigned int size)
 {
-       struct page *page = rx_buffer->page;
-       unsigned char *va = page_address(page) + rx_buffer->page_offset;
 #if (PAGE_SIZE < 8192)
        unsigned int truesize = I40E_RXBUFFER_2048;
 #else
-       unsigned int truesize = ALIGN(size, L1_CACHE_BYTES);
+       unsigned int truesize = SKB_DATA_ALIGN(size);
 #endif
-       unsigned int pull_len;
-
-       if (unlikely(skb_is_nonlinear(skb)))
-               goto add_tail_frag;
 
-       /* will the data fit in the skb we allocated? if so, just
-        * copy it as it is pretty small anyway
-        */
-       if (size <= I40E_RX_HDR_SIZE) {
-               memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long)));
+       skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
+                       rx_buffer->page_offset, size, truesize);
 
-               /* page is reusable, we can reuse buffer as-is */
-               if (likely(i40e_page_is_reusable(page)))
-                       return true;
-
-               /* this page cannot be reused so discard it */
-               __free_pages(page, 0);
-               return false;
-       }
+       /* page is being used so we must update the page offset */
+#if (PAGE_SIZE < 8192)
+       rx_buffer->page_offset ^= truesize;
+#else
+       rx_buffer->page_offset += truesize;
+#endif
+}
 
-       /* we need the header to contain the greater of either
-        * ETH_HLEN or 60 bytes if the skb->len is less than
-        * 60 for skb_pad.
-        */
-       pull_len = eth_get_headlen(va, I40E_RX_HDR_SIZE);
+/**
+ * i40e_get_rx_buffer - Fetch Rx buffer and synchronize data for use
+ * @rx_ring: rx descriptor ring to transact packets on
+ * @size: size of buffer to add to skb
+ *
+ * This function will pull an Rx buffer from the ring and synchronize it
+ * for use by the CPU.
+ */
+static struct i40e_rx_buffer *i40e_get_rx_buffer(struct i40e_ring *rx_ring,
+                                                const unsigned int size)
+{
+       struct i40e_rx_buffer *rx_buffer;
 
-       /* align pull length to size of long to optimize
-        * memcpy performance
-        */
-       memcpy(__skb_put(skb, pull_len), va, ALIGN(pull_len, sizeof(long)));
+       rx_buffer = &rx_ring->rx_bi[rx_ring->next_to_clean];
+       prefetchw(rx_buffer->page);
 
-       /* update all of the pointers */
-       va += pull_len;
-       size -= pull_len;
+       /* we are reusing so sync this buffer for CPU use */
+       dma_sync_single_range_for_cpu(rx_ring->dev,
+                                     rx_buffer->dma,
+                                     rx_buffer->page_offset,
+                                     size,
+                                     DMA_FROM_DEVICE);
 
-add_tail_frag:
-       skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
-                       (unsigned long)va & ~PAGE_MASK, size, truesize);
+       /* We have pulled a buffer for use, so decrement pagecnt_bias */
+       rx_buffer->pagecnt_bias--;
 
-       return i40e_can_reuse_rx_page(rx_buffer, page, truesize);
+       return rx_buffer;
 }
 
 /**
- * i40evf_fetch_rx_buffer - Allocate skb and populate it
+ * i40e_construct_skb - Allocate skb and populate it
  * @rx_ring: rx descriptor ring to transact packets on
- * @rx_desc: descriptor containing info written by hardware
+ * @rx_buffer: rx buffer to pull data from
+ * @size: size of buffer to add to skb
  *
- * This function allocates an skb on the fly, and populates it with the page
- * data from the current receive descriptor, taking care to set up the skb
- * correctly, as well as handling calling the page recycle function if
- * necessary.
+ * This function allocates an skb.  It then populates it with the page
+ * data from the current receive descriptor, taking care to set up the
+ * skb correctly.
  */
-static inline
-struct sk_buff *i40evf_fetch_rx_buffer(struct i40e_ring *rx_ring,
-                                      union i40e_rx_desc *rx_desc,
-                                      struct sk_buff *skb)
+static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
+                                         struct i40e_rx_buffer *rx_buffer,
+                                         unsigned int size)
 {
-       u64 local_status_error_len =
-               le64_to_cpu(rx_desc->wb.qword1.status_error_len);
-       unsigned int size =
-               (local_status_error_len & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
-               I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
-       struct i40e_rx_buffer *rx_buffer;
-       struct page *page;
-
-       rx_buffer = &rx_ring->rx_bi[rx_ring->next_to_clean];
-       page = rx_buffer->page;
-       prefetchw(page);
-
-       if (likely(!skb)) {
-               void *page_addr = page_address(page) + rx_buffer->page_offset;
+       void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
+#if (PAGE_SIZE < 8192)
+       unsigned int truesize = I40E_RXBUFFER_2048;
+#else
+       unsigned int truesize = SKB_DATA_ALIGN(size);
+#endif
+       unsigned int headlen;
+       struct sk_buff *skb;
 
-               /* prefetch first cache line of first page */
-               prefetch(page_addr);
+       /* prefetch first cache line of first page */
+       prefetch(va);
 #if L1_CACHE_BYTES < 128
-               prefetch(page_addr + L1_CACHE_BYTES);
+       prefetch(va + L1_CACHE_BYTES);
 #endif
 
-               /* allocate a skb to store the frags */
-               skb = __napi_alloc_skb(&rx_ring->q_vector->napi,
-                                      I40E_RX_HDR_SIZE,
-                                      GFP_ATOMIC | __GFP_NOWARN);
-               if (unlikely(!skb)) {
-                       rx_ring->rx_stats.alloc_buff_failed++;
-                       return NULL;
-               }
+       /* allocate a skb to store the frags */
+       skb = __napi_alloc_skb(&rx_ring->q_vector->napi,
+                              I40E_RX_HDR_SIZE,
+                              GFP_ATOMIC | __GFP_NOWARN);
+       if (unlikely(!skb))
+               return NULL;
 
-               /* we will be copying header into skb->data in
-                * pskb_may_pull so it is in our interest to prefetch
-                * it now to avoid a possible cache miss
-                */
-               prefetchw(skb->data);
+       /* Determine available headroom for copy */
+       headlen = size;
+       if (headlen > I40E_RX_HDR_SIZE)
+               headlen = eth_get_headlen(va, I40E_RX_HDR_SIZE);
+
+       /* align pull length to size of long to optimize memcpy performance */
+       memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long)));
+
+       /* update all of the pointers */
+       size -= headlen;
+       if (size) {
+               skb_add_rx_frag(skb, 0, rx_buffer->page,
+                               rx_buffer->page_offset + headlen,
+                               size, truesize);
+
+               /* buffer is used by skb, update page_offset */
+#if (PAGE_SIZE < 8192)
+               rx_buffer->page_offset ^= truesize;
+#else
+               rx_buffer->page_offset += truesize;
+#endif
+       } else {
+               /* buffer is unused, reset bias back to rx_buffer */
+               rx_buffer->pagecnt_bias++;
        }
 
-       /* we are reusing so sync this buffer for CPU use */
-       dma_sync_single_range_for_cpu(rx_ring->dev,
-                                     rx_buffer->dma,
-                                     rx_buffer->page_offset,
-                                     size,
-                                     DMA_FROM_DEVICE);
+       return skb;
+}
 
-       /* pull page into skb */
-       if (i40e_add_rx_frag(rx_ring, rx_buffer, size, skb)) {
+/**
+ * i40e_put_rx_buffer - Clean up used buffer and either recycle or free
+ * @rx_ring: rx descriptor ring to transact packets on
+ * @rx_buffer: rx buffer to pull data from
+ *
+ * This function will clean up the contents of the rx_buffer.  It will
+ * either recycle the bufer or unmap it and free the associated resources.
+ */
+static void i40e_put_rx_buffer(struct i40e_ring *rx_ring,
+                              struct i40e_rx_buffer *rx_buffer)
+{
+       if (i40e_can_reuse_rx_page(rx_buffer)) {
                /* hand second half of page back to the ring */
                i40e_reuse_rx_page(rx_ring, rx_buffer);
                rx_ring->rx_stats.page_reuse_count++;
        } else {
                /* we are not reusing the buffer so unmap it */
-               dma_unmap_page(rx_ring->dev, rx_buffer->dma, PAGE_SIZE,
-                              DMA_FROM_DEVICE);
+               dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, PAGE_SIZE,
+                                    DMA_FROM_DEVICE, I40E_RX_DMA_ATTR);
+               __page_frag_cache_drain(rx_buffer->page,
+                                       rx_buffer->pagecnt_bias);
        }
 
        /* clear contents of buffer_info */
        rx_buffer->page = NULL;
-
-       return skb;
 }
 
 /**
@@ -1221,7 +1245,9 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
        bool failure = false;
 
        while (likely(total_rx_packets < budget)) {
+               struct i40e_rx_buffer *rx_buffer;
                union i40e_rx_desc *rx_desc;
+               unsigned int size;
                u16 vlan_tag;
                u8 rx_ptype;
                u64 qword;
@@ -1238,22 +1264,36 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
                /* status_error_len will always be zero for unused descriptors
                 * because it's cleared in cleanup, and overlaps with hdr_addr
                 * which is always zero because packet split isn't used, if the
-                * hardware wrote DD then it will be non-zero
+                * hardware wrote DD then the length will be non-zero
                 */
-               if (!i40e_test_staterr(rx_desc,
-                                      BIT(I40E_RX_DESC_STATUS_DD_SHIFT)))
+               qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
+               size = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
+                      I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
+               if (!size)
                        break;
 
                /* This memory barrier is needed to keep us from reading
-                * any other fields out of the rx_desc until we know the
-                * DD bit is set.
+                * any other fields out of the rx_desc until we have
+                * verified the descriptor has been written back.
                 */
                dma_rmb();
 
-               skb = i40evf_fetch_rx_buffer(rx_ring, rx_desc, skb);
-               if (!skb)
+               rx_buffer = i40e_get_rx_buffer(rx_ring, size);
+
+               /* retrieve a buffer from the ring */
+               if (skb)
+                       i40e_add_rx_frag(rx_ring, rx_buffer, skb, size);
+               else
+                       skb = i40e_construct_skb(rx_ring, rx_buffer, size);
+
+               /* exit if we failed to retrieve a buffer */
+               if (!skb) {
+                       rx_ring->rx_stats.alloc_buff_failed++;
+                       rx_buffer->pagecnt_bias++;
                        break;
+               }
 
+               i40e_put_rx_buffer(rx_ring, rx_buffer);
                cleaned_count++;
 
                if (i40e_is_non_eop(rx_ring, rx_desc, skb))
@@ -1266,6 +1306,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
                 */
                if (unlikely(i40e_test_staterr(rx_desc, BIT(I40E_RXD_QW1_ERROR_SHIFT)))) {
                        dev_kfree_skb_any(skb);
+                       skb = NULL;
                        continue;
                }
 
@@ -1980,7 +2021,6 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
        u16 i = tx_ring->next_to_use;
        u32 td_tag = 0;
        dma_addr_t dma;
-       u16 desc_count = 1;
 
        if (tx_flags & I40E_TX_FLAGS_HW_VLAN) {
                td_cmd |= I40E_TX_DESC_CMD_IL2TAG1;
@@ -2016,7 +2056,6 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
 
                        tx_desc++;
                        i++;
-                       desc_count++;
 
                        if (i == tx_ring->count) {
                                tx_desc = I40E_TX_DESC(tx_ring, 0);
@@ -2038,7 +2077,6 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
 
                tx_desc++;
                i++;
-               desc_count++;
 
                if (i == tx_ring->count) {
                        tx_desc = I40E_TX_DESC(tx_ring, 0);
@@ -2064,46 +2102,8 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
 
        i40e_maybe_stop_tx(tx_ring, DESC_NEEDED);
 
-       /* write last descriptor with EOP bit */
-       td_cmd |= I40E_TX_DESC_CMD_EOP;
-
-       /* We can OR these values together as they both are checked against
-        * 4 below and at this point desc_count will be used as a boolean value
-        * after this if/else block.
-        */
-       desc_count |= ++tx_ring->packet_stride;
-
-       /* Algorithm to optimize tail and RS bit setting:
-        * if queue is stopped
-        *      mark RS bit
-        *      reset packet counter
-        * else if xmit_more is supported and is true
-        *      advance packet counter to 4
-        *      reset desc_count to 0
-        *
-        * if desc_count >= 4
-        *      mark RS bit
-        *      reset packet counter
-        * if desc_count > 0
-        *      update tail
-        *
-        * Note: If there are less than 4 descriptors
-        * pending and interrupts were disabled the service task will
-        * trigger a force WB.
-        */
-       if (netif_xmit_stopped(txring_txq(tx_ring))) {
-               goto do_rs;
-       } else if (skb->xmit_more) {
-               /* set stride to arm on next packet and reset desc_count */
-               tx_ring->packet_stride = WB_STRIDE;
-               desc_count = 0;
-       } else if (desc_count >= WB_STRIDE) {
-do_rs:
-               /* write last descriptor with RS bit set */
-               td_cmd |= I40E_TX_DESC_CMD_RS;
-               tx_ring->packet_stride = 0;
-       }
-
+       /* write last descriptor with RS and EOP bits */
+       td_cmd |= I40E_TXD_CMD;
        tx_desc->cmd_type_offset_bsz =
                        build_ctob(td_cmd, td_offset, size, td_tag);
 
@@ -2119,7 +2119,7 @@ do_rs:
        first->next_to_watch = tx_desc;
 
        /* notify HW of packet */
-       if (desc_count) {
+       if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) {
                writel(i, tx_ring->tail);
 
                /* we need this if more than one processor can write to our tail
index 8274ba68bd32a6583538f7af6fd80417dc25e6ec..3bb4d732e46706189785450c8b0ca0ba33287656 100644 (file)
@@ -104,10 +104,8 @@ enum i40e_dyn_idx_t {
 
 /* Supported Rx Buffer Sizes (a multiple of 128) */
 #define I40E_RXBUFFER_256   256
+#define I40E_RXBUFFER_1536  1536  /* 128B aligned standard Ethernet frame */
 #define I40E_RXBUFFER_2048  2048
-#define I40E_RXBUFFER_3072  3072   /* For FCoE MTU of 2158 */
-#define I40E_RXBUFFER_4096  4096
-#define I40E_RXBUFFER_8192  8192
 #define I40E_MAX_RXBUFFER   9728  /* largest size for single descriptor */
 
 /* NOTE: netdev_alloc_skb reserves up to 64 bytes, NET_IP_ALIGN means we
@@ -120,6 +118,9 @@ enum i40e_dyn_idx_t {
 #define I40E_RX_HDR_SIZE I40E_RXBUFFER_256
 #define i40e_rx_desc i40e_32byte_rx_desc
 
+#define I40E_RX_DMA_ATTR \
+       (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
+
 /**
  * i40e_test_staterr - tests bits in Rx descriptor status and error fields
  * @rx_desc: pointer to receive descriptor (in le64 format)
@@ -241,7 +242,12 @@ struct i40e_tx_buffer {
 struct i40e_rx_buffer {
        dma_addr_t dma;
        struct page *page;
-       unsigned int page_offset;
+#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536)
+       __u32 page_offset;
+#else
+       __u16 page_offset;
+#endif
+       __u16 pagecnt_bias;
 };
 
 struct i40e_queue_stats {
@@ -384,20 +390,6 @@ u32 i40evf_get_tx_pending(struct i40e_ring *ring, bool in_sw);
 int __i40evf_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
 bool __i40evf_chk_linearize(struct sk_buff *skb);
 
-/**
- * i40e_get_head - Retrieve head from head writeback
- * @tx_ring: Tx ring to fetch head of
- *
- * Returns value of Tx ring head based on value stored
- * in head write-back location
- **/
-static inline u32 i40e_get_head(struct i40e_ring *tx_ring)
-{
-       void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count;
-
-       return le32_to_cpu(*(volatile __le32 *)head);
-}
-
 /**
  * i40e_xmit_descriptor_count - calculate number of Tx descriptors needed
  * @skb:     send buffer
@@ -460,19 +452,7 @@ static inline bool i40e_chk_linearize(struct sk_buff *skb, int count)
        /* we can support up to 8 data buffers for a single send */
        return count != I40E_MAX_BUFFER_TXD;
 }
-
-/**
- * i40e_rx_is_fcoe - returns true if the Rx packet type is FCoE
- * @ptype: the packet type field from Rx descriptor write-back
- **/
-static inline bool i40e_rx_is_fcoe(u16 ptype)
-{
-       return (ptype >= I40E_RX_PTYPE_L2_FCOE_PAY3) &&
-              (ptype <= I40E_RX_PTYPE_L2_FCOE_VFT_FCOTHER);
-}
-
 /**
- * txring_txq - Find the netdev Tx ring based on the i40e Tx ring
  * @ring: Tx ring to find the netdev equivalent of
  **/
 static inline struct netdev_queue *txring_txq(const struct i40e_ring *ring)
index d38a2b2aea2b20d2b0bbf9700c90578ba9fc0197..f431fbc4a3e789af4d121575fb1faa8f63e817ab 100644 (file)
@@ -81,7 +81,9 @@ enum i40e_virtchnl_ops {
        I40E_VIRTCHNL_OP_GET_STATS = 15,
        I40E_VIRTCHNL_OP_FCOE = 16,
        I40E_VIRTCHNL_OP_EVENT = 17, /* must ALWAYS be 17 */
+       I40E_VIRTCHNL_OP_IWARP = 20,
        I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP = 21,
+       I40E_VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP = 22,
        I40E_VIRTCHNL_OP_CONFIG_RSS_KEY = 23,
        I40E_VIRTCHNL_OP_CONFIG_RSS_LUT = 24,
        I40E_VIRTCHNL_OP_GET_RSS_HENA_CAPS = 25,
@@ -393,6 +395,37 @@ struct i40e_virtchnl_pf_event {
        int severity;
 };
 
+/* I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP
+ * VF uses this message to request PF to map IWARP vectors to IWARP queues.
+ * The request for this originates from the VF IWARP driver through
+ * a client interface between VF LAN and VF IWARP driver.
+ * A vector could have an AEQ and CEQ attached to it although
+ * there is a single AEQ per VF IWARP instance in which case
+ * most vectors will have an INVALID_IDX for aeq and valid idx for ceq.
+ * There will never be a case where there will be multiple CEQs attached
+ * to a single vector.
+ * PF configures interrupt mapping and returns status.
+ */
+
+/* HW does not define a type value for AEQ; only for RX/TX and CEQ.
+ * In order for us to keep the interface simple, SW will define a
+ * unique type value for AEQ.
+ */
+#define I40E_QUEUE_TYPE_PE_AEQ  0x80
+#define I40E_QUEUE_INVALID_IDX  0xFFFF
+
+struct i40e_virtchnl_iwarp_qv_info {
+       u32 v_idx; /* msix_vector */
+       u16 ceq_idx;
+       u16 aeq_idx;
+       u8 itr_idx;
+};
+
+struct i40e_virtchnl_iwarp_qvlist_info {
+       u32 num_vectors;
+       struct i40e_virtchnl_iwarp_qv_info qv_info[1];
+};
+
 /* VF reset states - these are written into the RSTAT register:
  * I40E_VFGEN_RSTAT1 on the PF
  * I40E_VFGEN_RSTAT on the VF
index 00c42d80327668ef3cc8c614570c44ae58738791..d61ecf655091ac507cf15463b97b4f1353aea98b 100644 (file)
@@ -60,6 +60,7 @@ struct i40e_vsi {
        int base_vector;
        u16 work_limit;
        u16 qs_handle;
+       void *priv;     /* client driver data reference. */
 };
 
 /* How many Rx Buffers do we bundle into one write to the hardware ? */
@@ -71,10 +72,6 @@ struct i40e_vsi {
 #define I40EVF_MAX_RXD         4096
 #define I40EVF_MIN_RXD         64
 #define I40EVF_REQ_DESCRIPTOR_MULTIPLE 32
-
-/* Supported Rx Buffer Sizes */
-#define I40EVF_RXBUFFER_2048   2048
-#define I40EVF_MAX_RXBUFFER    16384  /* largest size for single descriptor */
 #define I40EVF_MAX_AQ_BUF_SIZE 4096
 #define I40EVF_AQ_LEN          32
 #define I40EVF_AQ_MAX_ERR      20 /* times to try before resetting AQ */
@@ -169,6 +166,7 @@ enum i40evf_state_t {
 
 enum i40evf_critical_section_t {
        __I40EVF_IN_CRITICAL_TASK,      /* cannot be interrupted */
+       __I40EVF_IN_CLIENT_TASK,
 };
 /* make common code happy */
 #define __I40E_DOWN __I40EVF_DOWN
@@ -178,6 +176,7 @@ struct i40evf_adapter {
        struct timer_list watchdog_timer;
        struct work_struct reset_task;
        struct work_struct adminq_task;
+       struct delayed_work client_task;
        struct delayed_work init_task;
        struct i40e_q_vector *q_vectors;
        struct list_head vlan_filter_list;
@@ -195,7 +194,10 @@ struct i40evf_adapter {
        u64 hw_csum_rx_error;
        u32 rx_desc_count;
        int num_msix_vectors;
+       int num_iwarp_msix;
+       int iwarp_base_vector;
        u32 client_pending;
+       struct i40e_client_instance *cinst;
        struct msix_entry *msix_entries;
 
        u32 flags;
@@ -211,8 +213,12 @@ struct i40evf_adapter {
 #define I40EVF_FLAG_OUTER_UDP_CSUM_CAPABLE     BIT(12)
 #define I40EVF_FLAG_ADDR_SET_BY_PF             BIT(13)
 #define I40EVF_FLAG_SERVICE_CLIENT_REQUESTED   BIT(14)
-#define I40EVF_FLAG_PROMISC_ON                 BIT(15)
-#define I40EVF_FLAG_ALLMULTI_ON                        BIT(16)
+#define I40EVF_FLAG_CLIENT_NEEDS_OPEN          BIT(15)
+#define I40EVF_FLAG_CLIENT_NEEDS_CLOSE         BIT(16)
+#define I40EVF_FLAG_CLIENT_NEEDS_L2_PARAMS     BIT(17)
+#define I40EVF_FLAG_PROMISC_ON                 BIT(18)
+#define I40EVF_FLAG_ALLMULTI_ON                        BIT(19)
+#define I40EVF_FLAG_LEGACY_RX                  BIT(20)
 /* duplicates for common code */
 #define I40E_FLAG_FDIR_ATR_ENABLED             0
 #define I40E_FLAG_DCB_ENABLED                  0
@@ -220,6 +226,7 @@ struct i40evf_adapter {
 #define I40E_FLAG_RX_CSUM_ENABLED              I40EVF_FLAG_RX_CSUM_ENABLED
 #define I40E_FLAG_WB_ON_ITR_CAPABLE            I40EVF_FLAG_WB_ON_ITR_CAPABLE
 #define I40E_FLAG_OUTER_UDP_CSUM_CAPABLE       I40EVF_FLAG_OUTER_UDP_CSUM_CAPABLE
+#define I40E_FLAG_LEGACY_RX                    I40EVF_FLAG_LEGACY_RX
        /* flags for admin queue service task */
        u32 aq_required;
 #define I40EVF_FLAG_AQ_ENABLE_QUEUES           BIT(0)
@@ -258,10 +265,11 @@ struct i40evf_adapter {
        bool link_up;
        enum i40e_aq_link_speed link_speed;
        enum i40e_virtchnl_ops current_op;
-#define CLIENT_ENABLED(_a) ((_a)->vf_res ? \
+#define CLIENT_ALLOWED(_a) ((_a)->vf_res ? \
                            (_a)->vf_res->vf_offload_flags & \
                                I40E_VIRTCHNL_VF_OFFLOAD_IWARP : \
                            0)
+#define CLIENT_ENABLED(_a) ((_a)->cinst)
 /* RSS by the PF should be preferred over RSS via other methods. */
 #define RSS_PF(_a) ((_a)->vf_res->vf_offload_flags & \
                    I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF)
@@ -292,6 +300,12 @@ struct i40evf_adapter {
 
 /* Ethtool Private Flags */
 
+/* lan device */
+struct i40e_device {
+       struct list_head list;
+       struct i40evf_adapter *vf;
+};
+
 /* needed by i40evf_ethtool.c */
 extern char i40evf_driver_name[];
 extern const char i40evf_driver_version[];
@@ -337,4 +351,11 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
                                enum i40e_virtchnl_ops v_opcode,
                                i40e_status v_retval, u8 *msg, u16 msglen);
 int i40evf_config_rss(struct i40evf_adapter *adapter);
+int i40evf_lan_add_device(struct i40evf_adapter *adapter);
+int i40evf_lan_del_device(struct i40evf_adapter *adapter);
+void i40evf_client_subtask(struct i40evf_adapter *adapter);
+void i40evf_notify_client_message(struct i40e_vsi *vsi, u8 *msg, u16 len);
+void i40evf_notify_client_l2_params(struct i40e_vsi *vsi);
+void i40evf_notify_client_open(struct i40e_vsi *vsi);
+void i40evf_notify_client_close(struct i40e_vsi *vsi, bool reset);
 #endif /* _I40EVF_H_ */
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_client.c b/drivers/net/ethernet/intel/i40evf/i40evf_client.c
new file mode 100644 (file)
index 0000000..ee73768
--- /dev/null
@@ -0,0 +1,564 @@
+#include <linux/list.h>
+#include <linux/errno.h>
+
+#include "i40evf.h"
+#include "i40e_prototype.h"
+#include "i40evf_client.h"
+
+static
+const char i40evf_client_interface_version_str[] = I40EVF_CLIENT_VERSION_STR;
+static struct i40e_client *vf_registered_client;
+static LIST_HEAD(i40evf_devices);
+static DEFINE_MUTEX(i40evf_device_mutex);
+
+static u32 i40evf_client_virtchnl_send(struct i40e_info *ldev,
+                                      struct i40e_client *client,
+                                      u8 *msg, u16 len);
+
+static int i40evf_client_setup_qvlist(struct i40e_info *ldev,
+                                     struct i40e_client *client,
+                                     struct i40e_qvlist_info *qvlist_info);
+
+static struct i40e_ops i40evf_lan_ops = {
+       .virtchnl_send = i40evf_client_virtchnl_send,
+       .setup_qvlist = i40evf_client_setup_qvlist,
+};
+
+/**
+ * i40evf_notify_client_message - call the client message receive callback
+ * @vsi: the VSI associated with this client
+ * @msg: message buffer
+ * @len: length of message
+ *
+ * If there is a client to this VSI, call the client
+ **/
+void i40evf_notify_client_message(struct i40e_vsi *vsi, u8 *msg, u16 len)
+{
+       struct i40e_client_instance *cinst;
+
+       if (!vsi)
+               return;
+
+       cinst = vsi->back->cinst;
+       if (!cinst || !cinst->client || !cinst->client->ops ||
+           !cinst->client->ops->virtchnl_receive) {
+               dev_dbg(&vsi->back->pdev->dev,
+                       "Cannot locate client instance virtchnl_receive function\n");
+               return;
+       }
+       cinst->client->ops->virtchnl_receive(&cinst->lan_info,  cinst->client,
+                                            msg, len);
+}
+
+/**
+ * i40evf_notify_client_l2_params - call the client notify callback
+ * @vsi: the VSI with l2 param changes
+ *
+ * If there is a client to this VSI, call the client
+ **/
+void i40evf_notify_client_l2_params(struct i40e_vsi *vsi)
+{
+       struct i40e_client_instance *cinst;
+       struct i40e_params params;
+
+       if (!vsi)
+               return;
+
+       cinst = vsi->back->cinst;
+       memset(&params, 0, sizeof(params));
+       params.mtu = vsi->netdev->mtu;
+       params.link_up = vsi->back->link_up;
+       params.qos.prio_qos[0].qs_handle = vsi->qs_handle;
+
+       if (!cinst || !cinst->client || !cinst->client->ops ||
+           !cinst->client->ops->l2_param_change) {
+               dev_dbg(&vsi->back->pdev->dev,
+                       "Cannot locate client instance l2_param_change function\n");
+               return;
+       }
+       cinst->client->ops->l2_param_change(&cinst->lan_info, cinst->client,
+                                           &params);
+}
+
+/**
+ * i40evf_notify_client_open - call the client open callback
+ * @vsi: the VSI with netdev opened
+ *
+ * If there is a client to this netdev, call the client with open
+ **/
+void i40evf_notify_client_open(struct i40e_vsi *vsi)
+{
+       struct i40evf_adapter *adapter = vsi->back;
+       struct i40e_client_instance *cinst = adapter->cinst;
+       int ret;
+
+       if (!cinst || !cinst->client || !cinst->client->ops ||
+           !cinst->client->ops->open) {
+               dev_dbg(&vsi->back->pdev->dev,
+                       "Cannot locate client instance open function\n");
+               return;
+       }
+       if (!(test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cinst->state))) {
+               ret = cinst->client->ops->open(&cinst->lan_info, cinst->client);
+               if (!ret)
+                       set_bit(__I40E_CLIENT_INSTANCE_OPENED, &cinst->state);
+       }
+}
+
+/**
+ * i40evf_client_release_qvlist - send a message to the PF to release iwarp qv map
+ * @ldev: pointer to L2 context.
+ *
+ * Return 0 on success or < 0 on error
+ **/
+static int i40evf_client_release_qvlist(struct i40e_info *ldev)
+{
+       struct i40evf_adapter *adapter = ldev->vf;
+       i40e_status err;
+
+       if (adapter->aq_required)
+               return -EAGAIN;
+
+       err = i40e_aq_send_msg_to_pf(&adapter->hw,
+                       I40E_VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP,
+                       I40E_SUCCESS, NULL, 0, NULL);
+
+       if (err)
+               dev_err(&adapter->pdev->dev,
+                       "Unable to send iWarp vector release message to PF, error %d, aq status %d\n",
+                       err, adapter->hw.aq.asq_last_status);
+
+       return err;
+}
+
+/**
+ * i40evf_notify_client_close - call the client close callback
+ * @vsi: the VSI with netdev closed
+ * @reset: true when close called due to reset pending
+ *
+ * If there is a client to this netdev, call the client with close
+ **/
+void i40evf_notify_client_close(struct i40e_vsi *vsi, bool reset)
+{
+       struct i40evf_adapter *adapter = vsi->back;
+       struct i40e_client_instance *cinst = adapter->cinst;
+
+       if (!cinst || !cinst->client || !cinst->client->ops ||
+           !cinst->client->ops->close) {
+               dev_dbg(&vsi->back->pdev->dev,
+                       "Cannot locate client instance close function\n");
+               return;
+       }
+       cinst->client->ops->close(&cinst->lan_info, cinst->client, reset);
+       i40evf_client_release_qvlist(&cinst->lan_info);
+       clear_bit(__I40E_CLIENT_INSTANCE_OPENED, &cinst->state);
+}
+
+/**
+ * i40evf_client_add_instance - add a client instance to the instance list
+ * @adapter: pointer to the board struct
+ * @client: pointer to a client struct in the client list.
+ *
+ * Returns cinst ptr on success, NULL on failure
+ **/
+static struct i40e_client_instance *
+i40evf_client_add_instance(struct i40evf_adapter *adapter)
+{
+       struct i40e_client_instance *cinst = NULL;
+       struct netdev_hw_addr *mac = NULL;
+       struct i40e_vsi *vsi = &adapter->vsi;
+       int i;
+
+       if (!vf_registered_client)
+               goto out;
+
+       if (adapter->cinst) {
+               cinst = adapter->cinst;
+               goto out;
+       }
+
+       cinst = kzalloc(sizeof(*cinst), GFP_KERNEL);
+       if (!cinst)
+               goto out;
+
+       cinst->lan_info.vf = (void *)adapter;
+       cinst->lan_info.netdev = vsi->netdev;
+       cinst->lan_info.pcidev = adapter->pdev;
+       cinst->lan_info.fid = 0;
+       cinst->lan_info.ftype = I40E_CLIENT_FTYPE_VF;
+       cinst->lan_info.hw_addr = adapter->hw.hw_addr;
+       cinst->lan_info.ops = &i40evf_lan_ops;
+       cinst->lan_info.version.major = I40EVF_CLIENT_VERSION_MAJOR;
+       cinst->lan_info.version.minor = I40EVF_CLIENT_VERSION_MINOR;
+       cinst->lan_info.version.build = I40EVF_CLIENT_VERSION_BUILD;
+       set_bit(__I40E_CLIENT_INSTANCE_NONE, &cinst->state);
+
+       cinst->lan_info.msix_count = adapter->num_iwarp_msix;
+       cinst->lan_info.msix_entries =
+                       &adapter->msix_entries[adapter->iwarp_base_vector];
+
+       for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) {
+               cinst->lan_info.params.qos.prio_qos[i].tc = 0;
+               cinst->lan_info.params.qos.prio_qos[i].qs_handle =
+                                                               vsi->qs_handle;
+       }
+
+       mac = list_first_entry(&cinst->lan_info.netdev->dev_addrs.list,
+                              struct netdev_hw_addr, list);
+       if (mac)
+               ether_addr_copy(cinst->lan_info.lanmac, mac->addr);
+       else
+               dev_err(&adapter->pdev->dev, "MAC address list is empty!\n");
+
+       cinst->client = vf_registered_client;
+       adapter->cinst = cinst;
+out:
+       return cinst;
+}
+
+/**
+ * i40evf_client_del_instance - removes a client instance from the list
+ * @adapter: pointer to the board struct
+ * @client: pointer to the client struct
+ *
+ **/
+static
+void i40evf_client_del_instance(struct i40evf_adapter *adapter)
+{
+       kfree(adapter->cinst);
+       adapter->cinst = NULL;
+}
+
+/**
+ * i40evf_client_subtask - client maintenance work
+ * @adapter: board private structure
+ **/
+void i40evf_client_subtask(struct i40evf_adapter *adapter)
+{
+       struct i40e_client *client = vf_registered_client;
+       struct i40e_client_instance *cinst;
+       int ret = 0;
+
+       if (adapter->state < __I40EVF_DOWN)
+               return;
+
+       /* first check client is registered */
+       if (!client)
+               return;
+
+       /* Add the client instance to the instance list */
+       cinst = i40evf_client_add_instance(adapter);
+       if (!cinst)
+               return;
+
+       dev_info(&adapter->pdev->dev, "Added instance of Client %s\n",
+                client->name);
+
+       if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cinst->state)) {
+               /* Send an Open request to the client */
+
+               if (client->ops && client->ops->open)
+                       ret = client->ops->open(&cinst->lan_info, client);
+               if (!ret)
+                       set_bit(__I40E_CLIENT_INSTANCE_OPENED,
+                               &cinst->state);
+               else
+                       /* remove client instance */
+                       i40evf_client_del_instance(adapter);
+       }
+}
+
+/**
+ * i40evf_lan_add_device - add a lan device struct to the list of lan devices
+ * @adapter: pointer to the board struct
+ *
+ * Returns 0 on success or none 0 on error
+ **/
+int i40evf_lan_add_device(struct i40evf_adapter *adapter)
+{
+       struct i40e_device *ldev;
+       int ret = 0;
+
+       mutex_lock(&i40evf_device_mutex);
+       list_for_each_entry(ldev, &i40evf_devices, list) {
+               if (ldev->vf == adapter) {
+                       ret = -EEXIST;
+                       goto out;
+               }
+       }
+       ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
+       if (!ldev) {
+               ret = -ENOMEM;
+               goto out;
+       }
+       ldev->vf = adapter;
+       INIT_LIST_HEAD(&ldev->list);
+       list_add(&ldev->list, &i40evf_devices);
+       dev_info(&adapter->pdev->dev, "Added LAN device bus=0x%02x dev=0x%02x func=0x%02x\n",
+                adapter->hw.bus.bus_id, adapter->hw.bus.device,
+                adapter->hw.bus.func);
+
+       /* Since in some cases register may have happened before a device gets
+        * added, we can schedule a subtask to go initiate the clients.
+        */
+       adapter->flags |= I40EVF_FLAG_SERVICE_CLIENT_REQUESTED;
+
+out:
+       mutex_unlock(&i40evf_device_mutex);
+       return ret;
+}
+
+/**
+ * i40evf_lan_del_device - removes a lan device from the device list
+ * @adapter: pointer to the board struct
+ *
+ * Returns 0 on success or non-0 on error
+ **/
+int i40evf_lan_del_device(struct i40evf_adapter *adapter)
+{
+       struct i40e_device *ldev, *tmp;
+       int ret = -ENODEV;
+
+       mutex_lock(&i40evf_device_mutex);
+       list_for_each_entry_safe(ldev, tmp, &i40evf_devices, list) {
+               if (ldev->vf == adapter) {
+                       dev_info(&adapter->pdev->dev,
+                                "Deleted LAN device bus=0x%02x dev=0x%02x func=0x%02x\n",
+                                adapter->hw.bus.bus_id, adapter->hw.bus.device,
+                                adapter->hw.bus.func);
+                       list_del(&ldev->list);
+                       kfree(ldev);
+                       ret = 0;
+                       break;
+               }
+       }
+
+       mutex_unlock(&i40evf_device_mutex);
+       return ret;
+}
+
+/**
+ * i40evf_client_release - release client specific resources
+ * @client: pointer to the registered client
+ *
+ **/
+static void i40evf_client_release(struct i40e_client *client)
+{
+       struct i40e_client_instance *cinst;
+       struct i40e_device *ldev;
+       struct i40evf_adapter *adapter;
+
+       mutex_lock(&i40evf_device_mutex);
+       list_for_each_entry(ldev, &i40evf_devices, list) {
+               adapter = ldev->vf;
+               cinst = adapter->cinst;
+               if (!cinst)
+                       continue;
+               if (test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cinst->state)) {
+                       if (client->ops && client->ops->close)
+                               client->ops->close(&cinst->lan_info, client,
+                                                  false);
+                       i40evf_client_release_qvlist(&cinst->lan_info);
+                       clear_bit(__I40E_CLIENT_INSTANCE_OPENED, &cinst->state);
+
+                       dev_warn(&adapter->pdev->dev,
+                                "Client %s instance closed\n", client->name);
+               }
+               /* delete the client instance */
+               i40evf_client_del_instance(adapter);
+               dev_info(&adapter->pdev->dev, "Deleted client instance of Client %s\n",
+                        client->name);
+       }
+       mutex_unlock(&i40evf_device_mutex);
+}
+
+/**
+ * i40evf_client_prepare - prepare client specific resources
+ * @client: pointer to the registered client
+ *
+ **/
+static void i40evf_client_prepare(struct i40e_client *client)
+{
+       struct i40e_device *ldev;
+       struct i40evf_adapter *adapter;
+
+       mutex_lock(&i40evf_device_mutex);
+       list_for_each_entry(ldev, &i40evf_devices, list) {
+               adapter = ldev->vf;
+               /* Signal the watchdog to service the client */
+               adapter->flags |= I40EVF_FLAG_SERVICE_CLIENT_REQUESTED;
+       }
+       mutex_unlock(&i40evf_device_mutex);
+}
+
+/**
+ * i40evf_client_virtchnl_send - send a message to the PF instance
+ * @ldev: pointer to L2 context.
+ * @client: Client pointer.
+ * @msg: pointer to message buffer
+ * @len: message length
+ *
+ * Return 0 on success or < 0 on error
+ **/
+static u32 i40evf_client_virtchnl_send(struct i40e_info *ldev,
+                                      struct i40e_client *client,
+                                      u8 *msg, u16 len)
+{
+       struct i40evf_adapter *adapter = ldev->vf;
+       i40e_status err;
+
+       if (adapter->aq_required)
+               return -EAGAIN;
+
+       err = i40e_aq_send_msg_to_pf(&adapter->hw, I40E_VIRTCHNL_OP_IWARP,
+                                    I40E_SUCCESS, msg, len, NULL);
+       if (err)
+               dev_err(&adapter->pdev->dev, "Unable to send iWarp message to PF, error %d, aq status %d\n",
+                       err, adapter->hw.aq.asq_last_status);
+
+       return err;
+}
+
+/**
+ * i40evf_client_setup_qvlist - send a message to the PF to setup iwarp qv map
+ * @ldev: pointer to L2 context.
+ * @client: Client pointer.
+ * @qv_info: queue and vector list
+ *
+ * Return 0 on success or < 0 on error
+ **/
+static int i40evf_client_setup_qvlist(struct i40e_info *ldev,
+                                     struct i40e_client *client,
+                                     struct i40e_qvlist_info *qvlist_info)
+{
+       struct i40e_virtchnl_iwarp_qvlist_info *v_qvlist_info;
+       struct i40evf_adapter *adapter = ldev->vf;
+       struct i40e_qv_info *qv_info;
+       i40e_status err;
+       u32 v_idx, i;
+       u32 msg_size;
+
+       if (adapter->aq_required)
+               return -EAGAIN;
+
+       /* A quick check on whether the vectors belong to the client */
+       for (i = 0; i < qvlist_info->num_vectors; i++) {
+               qv_info = &qvlist_info->qv_info[i];
+               if (!qv_info)
+                       continue;
+               v_idx = qv_info->v_idx;
+               if ((v_idx >=
+                   (adapter->iwarp_base_vector + adapter->num_iwarp_msix)) ||
+                   (v_idx < adapter->iwarp_base_vector))
+                       return -EINVAL;
+       }
+
+       v_qvlist_info = (struct i40e_virtchnl_iwarp_qvlist_info *)qvlist_info;
+       msg_size = sizeof(struct i40e_virtchnl_iwarp_qvlist_info) +
+                       (sizeof(struct i40e_virtchnl_iwarp_qv_info) *
+                       (v_qvlist_info->num_vectors - 1));
+
+       adapter->client_pending |= BIT(I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP);
+       err = i40e_aq_send_msg_to_pf(&adapter->hw,
+                       I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP,
+                       I40E_SUCCESS, (u8 *)v_qvlist_info, msg_size, NULL);
+
+       if (err) {
+               dev_err(&adapter->pdev->dev,
+                       "Unable to send iWarp vector config message to PF, error %d, aq status %d\n",
+                       err, adapter->hw.aq.asq_last_status);
+               goto out;
+       }
+
+       err = -EBUSY;
+       for (i = 0; i < 5; i++) {
+               msleep(100);
+               if (!(adapter->client_pending &
+                     BIT(I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP))) {
+                       err = 0;
+                       break;
+               }
+       }
+out:
+       return err;
+}
+
+/**
+ * i40evf_register_client - Register a i40e client driver with the L2 driver
+ * @client: pointer to the i40e_client struct
+ *
+ * Returns 0 on success or non-0 on error
+ **/
+int i40evf_register_client(struct i40e_client *client)
+{
+       int ret = 0;
+
+       if (!client) {
+               ret = -EIO;
+               goto out;
+       }
+
+       if (strlen(client->name) == 0) {
+               pr_info("i40evf: Failed to register client with no name\n");
+               ret = -EIO;
+               goto out;
+       }
+
+       if (vf_registered_client) {
+               pr_info("i40evf: Client %s has already been registered!\n",
+                       client->name);
+               ret = -EEXIST;
+               goto out;
+       }
+
+       if ((client->version.major != I40EVF_CLIENT_VERSION_MAJOR) ||
+           (client->version.minor != I40EVF_CLIENT_VERSION_MINOR)) {
+               pr_info("i40evf: Failed to register client %s due to mismatched client interface version\n",
+                       client->name);
+               pr_info("Client is using version: %02d.%02d.%02d while LAN driver supports %s\n",
+                       client->version.major, client->version.minor,
+                       client->version.build,
+                       i40evf_client_interface_version_str);
+               ret = -EIO;
+               goto out;
+       }
+
+       vf_registered_client = client;
+
+       i40evf_client_prepare(client);
+
+       pr_info("i40evf: Registered client %s with return code %d\n",
+               client->name, ret);
+out:
+       return ret;
+}
+EXPORT_SYMBOL(i40evf_register_client);
+
+/**
+ * i40evf_unregister_client - Unregister a i40e client driver with the L2 driver
+ * @client: pointer to the i40e_client struct
+ *
+ * Returns 0 on success or non-0 on error
+ **/
+int i40evf_unregister_client(struct i40e_client *client)
+{
+       int ret = 0;
+
+       /* When a unregister request comes through we would have to send
+        * a close for each of the client instances that were opened.
+        * client_release function is called to handle this.
+        */
+       i40evf_client_release(client);
+
+       if (vf_registered_client != client) {
+               pr_info("i40evf: Client %s has not been registered\n",
+                       client->name);
+               ret = -ENODEV;
+               goto out;
+       }
+       vf_registered_client = NULL;
+       pr_info("i40evf: Unregistered client %s\n", client->name);
+out:
+       return ret;
+}
+EXPORT_SYMBOL(i40evf_unregister_client);
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_client.h b/drivers/net/ethernet/intel/i40evf/i40evf_client.h
new file mode 100644 (file)
index 0000000..7d283c7
--- /dev/null
@@ -0,0 +1,166 @@
+#ifndef _I40E_CLIENT_H_
+#define _I40E_CLIENT_H_
+
+#define I40EVF_CLIENT_STR_LENGTH 10
+
+/* Client interface version should be updated anytime there is a change in the
+ * existing APIs or data structures.
+ */
+#define I40EVF_CLIENT_VERSION_MAJOR 0
+#define I40EVF_CLIENT_VERSION_MINOR 01
+#define I40EVF_CLIENT_VERSION_BUILD 00
+#define I40EVF_CLIENT_VERSION_STR     \
+       __stringify(I40EVF_CLIENT_VERSION_MAJOR) "." \
+       __stringify(I40EVF_CLIENT_VERSION_MINOR) "." \
+       __stringify(I40EVF_CLIENT_VERSION_BUILD)
+
+struct i40e_client_version {
+       u8 major;
+       u8 minor;
+       u8 build;
+       u8 rsvd;
+};
+
+enum i40e_client_state {
+       __I40E_CLIENT_NULL,
+       __I40E_CLIENT_REGISTERED
+};
+
+enum i40e_client_instance_state {
+       __I40E_CLIENT_INSTANCE_NONE,
+       __I40E_CLIENT_INSTANCE_OPENED,
+};
+
+struct i40e_ops;
+struct i40e_client;
+
+/* HW does not define a type value for AEQ; only for RX/TX and CEQ.
+ * In order for us to keep the interface simple, SW will define a
+ * unique type value for AEQ.
+ */
+#define I40E_QUEUE_TYPE_PE_AEQ  0x80
+#define I40E_QUEUE_INVALID_IDX 0xFFFF
+
+struct i40e_qv_info {
+       u32 v_idx; /* msix_vector */
+       u16 ceq_idx;
+       u16 aeq_idx;
+       u8 itr_idx;
+};
+
+struct i40e_qvlist_info {
+       u32 num_vectors;
+       struct i40e_qv_info qv_info[1];
+};
+
+#define I40E_CLIENT_MSIX_ALL 0xFFFFFFFF
+
+/* set of LAN parameters useful for clients managed by LAN */
+
+/* Struct to hold per priority info */
+struct i40e_prio_qos_params {
+       u16 qs_handle; /* qs handle for prio */
+       u8 tc; /* TC mapped to prio */
+       u8 reserved;
+};
+
+#define I40E_CLIENT_MAX_USER_PRIORITY        8
+/* Struct to hold Client QoS */
+struct i40e_qos_params {
+       struct i40e_prio_qos_params prio_qos[I40E_CLIENT_MAX_USER_PRIORITY];
+};
+
+struct i40e_params {
+       struct i40e_qos_params qos;
+       u16 mtu;
+       u16 link_up; /* boolean */
+};
+
+/* Structure to hold LAN device info for a client device */
+struct i40e_info {
+       struct i40e_client_version version;
+       u8 lanmac[6];
+       struct net_device *netdev;
+       struct pci_dev *pcidev;
+       u8 __iomem *hw_addr;
+       u8 fid; /* function id, PF id or VF id */
+#define I40E_CLIENT_FTYPE_PF 0
+#define I40E_CLIENT_FTYPE_VF 1
+       u8 ftype; /* function type, PF or VF */
+       void *vf; /* cast to i40evf_adapter */
+
+       /* All L2 params that could change during the life span of the device
+        * and needs to be communicated to the client when they change
+        */
+       struct i40e_params params;
+       struct i40e_ops *ops;
+
+       u16 msix_count;  /* number of msix vectors*/
+       /* Array down below will be dynamically allocated based on msix_count */
+       struct msix_entry *msix_entries;
+       u16 itr_index; /* Which ITR index the PE driver is suppose to use */
+};
+
+struct i40e_ops {
+       /* setup_q_vector_list enables queues with a particular vector */
+       int (*setup_qvlist)(struct i40e_info *ldev, struct i40e_client *client,
+                           struct i40e_qvlist_info *qv_info);
+
+       u32 (*virtchnl_send)(struct i40e_info *ldev, struct i40e_client *client,
+                            u8 *msg, u16 len);
+
+       /* If the PE Engine is unresponsive, RDMA driver can request a reset.*/
+       void (*request_reset)(struct i40e_info *ldev,
+                             struct i40e_client *client);
+};
+
+struct i40e_client_ops {
+       /* Should be called from register_client() or whenever the driver is
+        * ready to create a specific client instance.
+        */
+       int (*open)(struct i40e_info *ldev, struct i40e_client *client);
+
+       /* Should be closed when netdev is unavailable or when unregister
+        * call comes in. If the close happens due to a reset, set the reset
+        * bit to true.
+        */
+       void (*close)(struct i40e_info *ldev, struct i40e_client *client,
+                     bool reset);
+
+       /* called when some l2 managed parameters changes - mss */
+       void (*l2_param_change)(struct i40e_info *ldev,
+                               struct i40e_client *client,
+                               struct i40e_params *params);
+
+       /* called when a message is received from the PF */
+       int (*virtchnl_receive)(struct i40e_info *ldev,
+                               struct i40e_client *client,
+                               u8 *msg, u16 len);
+};
+
+/* Client device */
+struct i40e_client_instance {
+       struct list_head list;
+       struct i40e_info lan_info;
+       struct i40e_client *client;
+       unsigned long  state;
+};
+
+struct i40e_client {
+       struct list_head list;          /* list of registered clients */
+       char name[I40EVF_CLIENT_STR_LENGTH];
+       struct i40e_client_version version;
+       unsigned long state;            /* client state */
+       atomic_t ref_cnt;  /* Count of all the client devices of this kind */
+       u32 flags;
+#define I40E_CLIENT_FLAGS_LAUNCH_ON_PROBE      BIT(0)
+#define I40E_TX_FLAGS_NOTIFY_OTHER_EVENTS      BIT(2)
+       u8 type;
+#define I40E_CLIENT_IWARP 0
+       struct i40e_client_ops *ops;    /* client ops provided by the client */
+};
+
+/* used by clients */
+int i40evf_register_client(struct i40e_client *client);
+int i40evf_unregister_client(struct i40e_client *client);
+#endif /* _I40E_CLIENT_H_ */
index 272d600c1ed06bf7b891bd88b4576c2964512ca4..9bb2cc7dd4e4afa5d98ac4f76b3794ffa359e7aa 100644 (file)
@@ -63,52 +63,74 @@ static const struct i40evf_stats i40evf_gstrings_stats[] = {
 #define I40EVF_STATS_LEN(_dev) \
        (I40EVF_GLOBAL_STATS_LEN + I40EVF_QUEUE_STATS_LEN(_dev))
 
+/* For now we have one and only one private flag and it is only defined
+ * when we have support for the SKIP_CPU_SYNC DMA attribute.  Instead
+ * of leaving all this code sitting around empty we will strip it unless
+ * our one private flag is actually available.
+ */
+struct i40evf_priv_flags {
+       char flag_string[ETH_GSTRING_LEN];
+       u32 flag;
+       bool read_only;
+};
+
+#define I40EVF_PRIV_FLAG(_name, _flag, _read_only) { \
+       .flag_string = _name, \
+       .flag = _flag, \
+       .read_only = _read_only, \
+}
+
+static const struct i40evf_priv_flags i40evf_gstrings_priv_flags[] = {
+       I40EVF_PRIV_FLAG("legacy-rx", I40EVF_FLAG_LEGACY_RX, 0),
+};
+
+#define I40EVF_PRIV_FLAGS_STR_LEN ARRAY_SIZE(i40evf_gstrings_priv_flags)
+
 /**
- * i40evf_get_settings - Get Link Speed and Duplex settings
+ * i40evf_get_link_ksettings - Get Link Speed and Duplex settings
  * @netdev: network interface device structure
- * @ecmd: ethtool command
+ * @cmd: ethtool command
  *
  * Reports speed/duplex settings. Because this is a VF, we don't know what
  * kind of link we really have, so we fake it.
  **/
-static int i40evf_get_settings(struct net_device *netdev,
-                              struct ethtool_cmd *ecmd)
+static int i40evf_get_link_ksettings(struct net_device *netdev,
+                                    struct ethtool_link_ksettings *cmd)
 {
        struct i40evf_adapter *adapter = netdev_priv(netdev);
 
-       ecmd->supported = 0;
-       ecmd->autoneg = AUTONEG_DISABLE;
-       ecmd->transceiver = XCVR_DUMMY1;
-       ecmd->port = PORT_NONE;
+       ethtool_link_ksettings_zero_link_mode(cmd, supported);
+       cmd->base.autoneg = AUTONEG_DISABLE;
+       cmd->base.port = PORT_NONE;
        /* Set speed and duplex */
        switch (adapter->link_speed) {
        case I40E_LINK_SPEED_40GB:
-               ethtool_cmd_speed_set(ecmd, SPEED_40000);
+               cmd->base.speed = SPEED_40000;
                break;
        case I40E_LINK_SPEED_25GB:
 #ifdef SPEED_25000
-               ethtool_cmd_speed_set(ecmd, SPEED_25000);
+               cmd->base.speed = SPEED_25000;
 #else
                netdev_info(netdev,
                            "Speed is 25G, display not supported by this version of ethtool.\n");
 #endif
                break;
        case I40E_LINK_SPEED_20GB:
-               ethtool_cmd_speed_set(ecmd, SPEED_20000);
+               cmd->base.speed = SPEED_20000;
                break;
        case I40E_LINK_SPEED_10GB:
-               ethtool_cmd_speed_set(ecmd, SPEED_10000);
+               cmd->base.speed = SPEED_10000;
                break;
        case I40E_LINK_SPEED_1GB:
-               ethtool_cmd_speed_set(ecmd, SPEED_1000);
+               cmd->base.speed = SPEED_1000;
                break;
        case I40E_LINK_SPEED_100MB:
-               ethtool_cmd_speed_set(ecmd, SPEED_100);
+               cmd->base.speed = SPEED_100;
                break;
        default:
                break;
        }
-       ecmd->duplex = DUPLEX_FULL;
+       cmd->base.duplex = DUPLEX_FULL;
 
        return 0;
 }
@@ -125,6 +147,8 @@ static int i40evf_get_sset_count(struct net_device *netdev, int sset)
 {
        if (sset == ETH_SS_STATS)
                return I40EVF_STATS_LEN(netdev);
+       else if (sset == ETH_SS_PRIV_FLAGS)
+               return I40EVF_PRIV_FLAGS_STR_LEN;
        else
                return -EINVAL;
 }
@@ -190,7 +214,83 @@ static void i40evf_get_strings(struct net_device *netdev, u32 sset, u8 *data)
                        snprintf(p, ETH_GSTRING_LEN, "rx-%u.bytes", i);
                        p += ETH_GSTRING_LEN;
                }
+       } else if (sset == ETH_SS_PRIV_FLAGS) {
+               for (i = 0; i < I40EVF_PRIV_FLAGS_STR_LEN; i++) {
+                       snprintf(p, ETH_GSTRING_LEN, "%s",
+                                i40evf_gstrings_priv_flags[i].flag_string);
+                       p += ETH_GSTRING_LEN;
+               }
+       }
+}
+
+/**
+ * i40evf_get_priv_flags - report device private flags
+ * @dev: network interface device structure
+ *
+ * The get string set count and the string set should be matched for each
+ * flag returned.  Add new strings for each flag to the i40e_gstrings_priv_flags
+ * array.
+ *
+ * Returns a u32 bitmap of flags.
+ **/
+static u32 i40evf_get_priv_flags(struct net_device *netdev)
+{
+       struct i40evf_adapter *adapter = netdev_priv(netdev);
+       u32 i, ret_flags = 0;
+
+       for (i = 0; i < I40EVF_PRIV_FLAGS_STR_LEN; i++) {
+               const struct i40evf_priv_flags *priv_flags;
+
+               priv_flags = &i40evf_gstrings_priv_flags[i];
+
+               if (priv_flags->flag & adapter->flags)
+                       ret_flags |= BIT(i);
+       }
+
+       return ret_flags;
+}
+
+/**
+ * i40evf_set_priv_flags - set private flags
+ * @dev: network interface device structure
+ * @flags: bit flags to be set
+ **/
+static int i40evf_set_priv_flags(struct net_device *netdev, u32 flags)
+{
+       struct i40evf_adapter *adapter = netdev_priv(netdev);
+       u64 changed_flags;
+       u32 i;
+
+       changed_flags = adapter->flags;
+
+       for (i = 0; i < I40EVF_PRIV_FLAGS_STR_LEN; i++) {
+               const struct i40evf_priv_flags *priv_flags;
+
+               priv_flags = &i40evf_gstrings_priv_flags[i];
+
+               if (priv_flags->read_only)
+                       continue;
+
+               if (flags & BIT(i))
+                       adapter->flags |= priv_flags->flag;
+               else
+                       adapter->flags &= ~(priv_flags->flag);
+       }
+
+       /* check for flags that changed */
+       changed_flags ^= adapter->flags;
+
+       /* Process any additional changes needed as a result of flag changes. */
+
+       /* issue a reset to force legacy-rx change to take effect */
+       if (changed_flags & I40EVF_FLAG_LEGACY_RX) {
+               if (netif_running(netdev)) {
+                       adapter->flags |= I40EVF_FLAG_RESET_NEEDED;
+                       schedule_work(&adapter->reset_task);
+               }
        }
+
+       return 0;
 }
 
 /**
@@ -239,6 +339,7 @@ static void i40evf_get_drvinfo(struct net_device *netdev,
        strlcpy(drvinfo->version, i40evf_driver_version, 32);
        strlcpy(drvinfo->fw_version, "N/A", 4);
        strlcpy(drvinfo->bus_info, pci_name(adapter->pdev), 32);
+       drvinfo->n_priv_flags = I40EVF_PRIV_FLAGS_STR_LEN;
 }
 
 /**
@@ -643,7 +744,6 @@ static int i40evf_set_rxfh(struct net_device *netdev, const u32 *indir,
 }
 
 static const struct ethtool_ops i40evf_ethtool_ops = {
-       .get_settings           = i40evf_get_settings,
        .get_drvinfo            = i40evf_get_drvinfo,
        .get_link               = ethtool_op_get_link,
        .get_ringparam          = i40evf_get_ringparam,
@@ -651,6 +751,8 @@ static const struct ethtool_ops i40evf_ethtool_ops = {
        .get_strings            = i40evf_get_strings,
        .get_ethtool_stats      = i40evf_get_ethtool_stats,
        .get_sset_count         = i40evf_get_sset_count,
+       .get_priv_flags         = i40evf_get_priv_flags,
+       .set_priv_flags         = i40evf_set_priv_flags,
        .get_msglevel           = i40evf_get_msglevel,
        .set_msglevel           = i40evf_set_msglevel,
        .get_coalesce           = i40evf_get_coalesce,
@@ -663,6 +765,7 @@ static const struct ethtool_ops i40evf_ethtool_ops = {
        .set_rxfh               = i40evf_set_rxfh,
        .get_channels           = i40evf_get_channels,
        .get_rxfh_key_size      = i40evf_get_rxfh_key_size,
+       .get_link_ksettings     = i40evf_get_link_ksettings,
 };
 
 /**
index f35dcaac5bb7bd9bf86412c1bb40f8e971d086c7..fb2811c23024dbdd94848575a77737c8f16d4c0d 100644 (file)
@@ -26,6 +26,7 @@
 
 #include "i40evf.h"
 #include "i40e_prototype.h"
+#include "i40evf_client.h"
 static int i40evf_setup_all_tx_resources(struct i40evf_adapter *adapter);
 static int i40evf_setup_all_rx_resources(struct i40evf_adapter *adapter);
 static int i40evf_close(struct net_device *netdev);
@@ -36,9 +37,9 @@ static const char i40evf_driver_string[] =
 
 #define DRV_KERN "-k"
 
-#define DRV_VERSION_MAJOR 1
-#define DRV_VERSION_MINOR 6
-#define DRV_VERSION_BUILD 27
+#define DRV_VERSION_MAJOR 2
+#define DRV_VERSION_MINOR 1
+#define DRV_VERSION_BUILD 7
 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \
             __stringify(DRV_VERSION_MINOR) "." \
             __stringify(DRV_VERSION_BUILD) \
@@ -685,12 +686,26 @@ static void i40evf_configure_tx(struct i40evf_adapter *adapter)
  **/
 static void i40evf_configure_rx(struct i40evf_adapter *adapter)
 {
+       unsigned int rx_buf_len = I40E_RXBUFFER_2048;
+       struct net_device *netdev = adapter->netdev;
        struct i40e_hw *hw = &adapter->hw;
        int i;
 
+       /* Legacy Rx will always default to a 2048 buffer size. */
+#if (PAGE_SIZE < 8192)
+       if (!(adapter->flags & I40EVF_FLAG_LEGACY_RX)) {
+               /* We use a 1536 buffer size for configurations with
+                * standard Ethernet mtu.  On x86 this gives us enough room
+                * for shared info and 192 bytes of padding.
+                */
+               if (netdev->mtu <= ETH_DATA_LEN)
+                       rx_buf_len = I40E_RXBUFFER_1536 - NET_IP_ALIGN;
+       }
+#endif
+
        for (i = 0; i < adapter->num_active_queues; i++) {
                adapter->rx_rings[i].tail = hw->hw_addr + I40E_QRX_TAIL1(i);
-               adapter->rx_rings[i].rx_buf_len = I40EVF_RXBUFFER_2048;
+               adapter->rx_rings[i].rx_buf_len = rx_buf_len;
        }
 }
 
@@ -1058,6 +1073,8 @@ static void i40evf_up_complete(struct i40evf_adapter *adapter)
        i40evf_napi_enable_all(adapter);
 
        adapter->aq_required |= I40EVF_FLAG_AQ_ENABLE_QUEUES;
+       if (CLIENT_ENABLED(adapter))
+               adapter->flags |= I40EVF_FLAG_CLIENT_NEEDS_OPEN;
        mod_timer_pending(&adapter->watchdog_timer, jiffies + 1);
 }
 
@@ -1685,6 +1702,7 @@ static void i40evf_watchdog_task(struct work_struct *work)
                i40evf_set_promiscuous(adapter, 0);
                goto watchdog_done;
        }
+       schedule_delayed_work(&adapter->client_task, msecs_to_jiffies(5));
 
        if (adapter->state == __I40EVF_RUNNING)
                i40evf_request_stats(adapter);
@@ -1773,10 +1791,17 @@ static void i40evf_reset_task(struct work_struct *work)
        u32 reg_val;
        int i = 0, err;
 
-       while (test_and_set_bit(__I40EVF_IN_CRITICAL_TASK,
+       while (test_and_set_bit(__I40EVF_IN_CLIENT_TASK,
                                &adapter->crit_section))
                usleep_range(500, 1000);
-
+       if (CLIENT_ENABLED(adapter)) {
+               adapter->flags &= ~(I40EVF_FLAG_CLIENT_NEEDS_OPEN |
+                                   I40EVF_FLAG_CLIENT_NEEDS_CLOSE |
+                                   I40EVF_FLAG_CLIENT_NEEDS_L2_PARAMS |
+                                   I40EVF_FLAG_SERVICE_CLIENT_REQUESTED);
+               cancel_delayed_work_sync(&adapter->client_task);
+               i40evf_notify_client_close(&adapter->vsi, true);
+       }
        i40evf_misc_irq_disable(adapter);
        if (adapter->flags & I40EVF_FLAG_RESET_NEEDED) {
                adapter->flags &= ~I40EVF_FLAG_RESET_NEEDED;
@@ -1819,6 +1844,7 @@ static void i40evf_reset_task(struct work_struct *work)
                dev_err(&adapter->pdev->dev, "Reset never finished (%x)\n",
                        reg_val);
                i40evf_disable_vf(adapter);
+               clear_bit(__I40EVF_IN_CLIENT_TASK, &adapter->crit_section);
                return; /* Do not attempt to reinit. It's dead, Jim. */
        }
 
@@ -1861,9 +1887,8 @@ continue_reset:
        }
        adapter->aq_required |= I40EVF_FLAG_AQ_ADD_MAC_FILTER;
        adapter->aq_required |= I40EVF_FLAG_AQ_ADD_VLAN_FILTER;
-       /* Open RDMA Client again */
-       adapter->aq_required |= I40EVF_FLAG_SERVICE_CLIENT_REQUESTED;
        clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section);
+       clear_bit(__I40EVF_IN_CLIENT_TASK, &adapter->crit_section);
        i40evf_misc_irq_enable(adapter);
 
        mod_timer(&adapter->watchdog_timer, jiffies + 2);
@@ -1979,6 +2004,48 @@ out:
        i40evf_misc_irq_enable(adapter);
 }
 
+/**
+ * i40evf_client_task - worker thread to perform client work
+ * @work: pointer to work_struct containing our data
+ *
+ * This task handles client interactions. Because client calls can be
+ * reentrant, we can't handle them in the watchdog.
+ **/
+static void i40evf_client_task(struct work_struct *work)
+{
+       struct i40evf_adapter *adapter =
+               container_of(work, struct i40evf_adapter, client_task.work);
+
+       /* If we can't get the client bit, just give up. We'll be rescheduled
+        * later.
+        */
+
+       if (test_and_set_bit(__I40EVF_IN_CLIENT_TASK, &adapter->crit_section))
+               return;
+
+       if (adapter->flags & I40EVF_FLAG_SERVICE_CLIENT_REQUESTED) {
+               i40evf_client_subtask(adapter);
+               adapter->flags &= ~I40EVF_FLAG_SERVICE_CLIENT_REQUESTED;
+               goto out;
+       }
+       if (adapter->flags & I40EVF_FLAG_CLIENT_NEEDS_CLOSE) {
+               i40evf_notify_client_close(&adapter->vsi, false);
+               adapter->flags &= ~I40EVF_FLAG_CLIENT_NEEDS_CLOSE;
+               goto out;
+       }
+       if (adapter->flags & I40EVF_FLAG_CLIENT_NEEDS_OPEN) {
+               i40evf_notify_client_open(&adapter->vsi);
+               adapter->flags &= ~I40EVF_FLAG_CLIENT_NEEDS_OPEN;
+               goto out;
+       }
+       if (adapter->flags & I40EVF_FLAG_CLIENT_NEEDS_L2_PARAMS) {
+               i40evf_notify_client_l2_params(&adapter->vsi);
+               adapter->flags &= ~I40EVF_FLAG_CLIENT_NEEDS_L2_PARAMS;
+       }
+out:
+       clear_bit(__I40EVF_IN_CLIENT_TASK, &adapter->crit_section);
+}
+
 /**
  * i40evf_free_all_tx_resources - Free Tx Resources for All Queues
  * @adapter: board private structure
@@ -2148,6 +2215,8 @@ static int i40evf_close(struct net_device *netdev)
 
 
        set_bit(__I40E_DOWN, &adapter->vsi.state);
+       if (CLIENT_ENABLED(adapter))
+               adapter->flags |= I40EVF_FLAG_CLIENT_NEEDS_CLOSE;
 
        i40evf_down(adapter);
        adapter->state = __I40EVF_DOWN_PENDING;
@@ -2188,6 +2257,10 @@ static int i40evf_change_mtu(struct net_device *netdev, int new_mtu)
        struct i40evf_adapter *adapter = netdev_priv(netdev);
 
        netdev->mtu = new_mtu;
+       if (CLIENT_ENABLED(adapter)) {
+               i40evf_notify_client_l2_params(&adapter->vsi);
+               adapter->flags |= I40EVF_FLAG_SERVICE_CLIENT_REQUESTED;
+       }
        adapter->flags |= I40EVF_FLAG_RESET_NEEDED;
        schedule_work(&adapter->reset_task);
 
@@ -2581,6 +2654,12 @@ static void i40evf_init_task(struct work_struct *work)
        adapter->netdev_registered = true;
 
        netif_tx_stop_all_queues(netdev);
+       if (CLIENT_ALLOWED(adapter)) {
+               err = i40evf_lan_add_device(adapter);
+               if (err)
+                       dev_info(&pdev->dev, "Failed to add VF to client API service list: %d\n",
+                                err);
+       }
 
        dev_info(&pdev->dev, "MAC address: %pM\n", adapter->hw.mac.addr);
        if (netdev->features & NETIF_F_GRO)
@@ -2745,6 +2824,7 @@ static int i40evf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        INIT_WORK(&adapter->reset_task, i40evf_reset_task);
        INIT_WORK(&adapter->adminq_task, i40evf_adminq_task);
        INIT_WORK(&adapter->watchdog_task, i40evf_watchdog_task);
+       INIT_DELAYED_WORK(&adapter->client_task, i40evf_client_task);
        INIT_DELAYED_WORK(&adapter->init_task, i40evf_init_task);
        schedule_delayed_work(&adapter->init_task,
                              msecs_to_jiffies(5 * (pdev->devfn & 0x07)));
@@ -2857,14 +2937,21 @@ static void i40evf_remove(struct pci_dev *pdev)
        struct i40evf_adapter *adapter = netdev_priv(netdev);
        struct i40evf_mac_filter *f, *ftmp;
        struct i40e_hw *hw = &adapter->hw;
+       int err;
 
        cancel_delayed_work_sync(&adapter->init_task);
        cancel_work_sync(&adapter->reset_task);
-
+       cancel_delayed_work_sync(&adapter->client_task);
        if (adapter->netdev_registered) {
                unregister_netdev(netdev);
                adapter->netdev_registered = false;
        }
+       if (CLIENT_ALLOWED(adapter)) {
+               err = i40evf_lan_del_device(adapter);
+               if (err)
+                       dev_warn(&pdev->dev, "Failed to delete client device: %d\n",
+                                err);
+       }
 
        /* Shut down all the garbage mashers on the detention level */
        adapter->state = __I40EVF_REMOVE;
index bee58af390e1c633006ddc38efddf256a307161a..032be8d3928a7469cbf101f7081148b0790a3ab8 100644 (file)
@@ -26,6 +26,7 @@
 
 #include "i40evf.h"
 #include "i40e_prototype.h"
+#include "i40evf_client.h"
 
 /* busy wait delay in msec */
 #define I40EVF_BUSY_WAIT_DELAY 10
@@ -233,7 +234,7 @@ void i40evf_configure_queues(struct i40evf_adapter *adapter)
        struct i40e_virtchnl_vsi_queue_config_info *vqci;
        struct i40e_virtchnl_queue_pair_info *vqpi;
        int pairs = adapter->num_active_queues;
-       int i, len;
+       int i, len, max_frame = I40E_MAX_RXBUFFER;
 
        if (adapter->current_op != I40E_VIRTCHNL_OP_UNKNOWN) {
                /* bail because we already have a command pending */
@@ -248,6 +249,11 @@ void i40evf_configure_queues(struct i40evf_adapter *adapter)
        if (!vqci)
                return;
 
+       /* Limit maximum frame size when jumbo frames is not enabled */
+       if (!(adapter->flags & I40EVF_FLAG_LEGACY_RX) &&
+           (adapter->netdev->mtu <= ETH_DATA_LEN))
+               max_frame = I40E_RXBUFFER_1536 - NET_IP_ALIGN;
+
        vqci->vsi_id = adapter->vsi_res->vsi_id;
        vqci->num_queue_pairs = pairs;
        vqpi = vqci->qpair;
@@ -259,17 +265,14 @@ void i40evf_configure_queues(struct i40evf_adapter *adapter)
                vqpi->txq.queue_id = i;
                vqpi->txq.ring_len = adapter->tx_rings[i].count;
                vqpi->txq.dma_ring_addr = adapter->tx_rings[i].dma;
-               vqpi->txq.headwb_enabled = 1;
-               vqpi->txq.dma_headwb_addr = vqpi->txq.dma_ring_addr +
-                   (vqpi->txq.ring_len * sizeof(struct i40e_tx_desc));
-
                vqpi->rxq.vsi_id = vqci->vsi_id;
                vqpi->rxq.queue_id = i;
                vqpi->rxq.ring_len = adapter->rx_rings[i].count;
                vqpi->rxq.dma_ring_addr = adapter->rx_rings[i].dma;
-               vqpi->rxq.max_pkt_size = adapter->netdev->mtu
-                                       + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN;
-               vqpi->rxq.databuffer_size = adapter->rx_rings[i].rx_buf_len;
+               vqpi->rxq.max_pkt_size = max_frame;
+               vqpi->rxq.databuffer_size =
+                       ALIGN(adapter->rx_rings[i].rx_buf_len,
+                             BIT_ULL(I40E_RXQ_CTX_DBUFF_SHIFT));
                vqpi++;
        }
 
@@ -999,6 +1002,16 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
                if (v_opcode != adapter->current_op)
                        return;
                break;
+       case I40E_VIRTCHNL_OP_IWARP:
+               /* Gobble zero-length replies from the PF. They indicate that
+                * a previous message was received OK, and the client doesn't
+                * care about that.
+                */
+               if (msglen && CLIENT_ENABLED(adapter))
+                       i40evf_notify_client_message(&adapter->vsi,
+                                                    msg, msglen);
+               break;
+
        case I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP:
                adapter->client_pending &=
                                ~(BIT(I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP));
@@ -1014,7 +1027,7 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
                }
                break;
        default:
-               if (v_opcode != adapter->current_op)
+               if (adapter->current_op && (v_opcode != adapter->current_op))
                        dev_warn(&adapter->pdev->dev, "Expected response %d from PF, received %d\n",
                                 adapter->current_op, v_opcode);
                break;
index acbc3abe2dddfc7bdf87a3724a95b136237a519e..dc6e2980718f5d09c34a28660ebf47c4d5ff360d 100644 (file)
@@ -142,12 +142,24 @@ struct vf_data_storage {
 /* Supported Rx Buffer Sizes */
 #define IGB_RXBUFFER_256       256
 #define IGB_RXBUFFER_2048      2048
+#define IGB_RXBUFFER_3072      3072
 #define IGB_RX_HDR_LEN         IGB_RXBUFFER_256
-#define IGB_RX_BUFSZ           IGB_RXBUFFER_2048
+#define IGB_TS_HDR_LEN         16
+
+#define IGB_SKB_PAD            (NET_SKB_PAD + NET_IP_ALIGN)
+#if (PAGE_SIZE < 8192)
+#define IGB_MAX_FRAME_BUILD_SKB \
+       (SKB_WITH_OVERHEAD(IGB_RXBUFFER_2048) - IGB_SKB_PAD - IGB_TS_HDR_LEN)
+#else
+#define IGB_MAX_FRAME_BUILD_SKB (IGB_RXBUFFER_2048 - IGB_TS_HDR_LEN)
+#endif
 
 /* How many Rx Buffers do we bundle into one write to the hardware ? */
 #define IGB_RX_BUFFER_WRITE    16 /* Must be power of 2 */
 
+#define IGB_RX_DMA_ATTR \
+       (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
+
 #define AUTO_ALL_MODES         0
 #define IGB_EEPROM_APME                0x0400
 
@@ -301,12 +313,51 @@ struct igb_q_vector {
 };
 
 enum e1000_ring_flags_t {
+       IGB_RING_FLAG_RX_3K_BUFFER,
+       IGB_RING_FLAG_RX_BUILD_SKB_ENABLED,
        IGB_RING_FLAG_RX_SCTP_CSUM,
        IGB_RING_FLAG_RX_LB_VLAN_BSWAP,
        IGB_RING_FLAG_TX_CTX_IDX,
        IGB_RING_FLAG_TX_DETECT_HANG
 };
 
+#define ring_uses_large_buffer(ring) \
+       test_bit(IGB_RING_FLAG_RX_3K_BUFFER, &(ring)->flags)
+#define set_ring_uses_large_buffer(ring) \
+       set_bit(IGB_RING_FLAG_RX_3K_BUFFER, &(ring)->flags)
+#define clear_ring_uses_large_buffer(ring) \
+       clear_bit(IGB_RING_FLAG_RX_3K_BUFFER, &(ring)->flags)
+
+#define ring_uses_build_skb(ring) \
+       test_bit(IGB_RING_FLAG_RX_BUILD_SKB_ENABLED, &(ring)->flags)
+#define set_ring_build_skb_enabled(ring) \
+       set_bit(IGB_RING_FLAG_RX_BUILD_SKB_ENABLED, &(ring)->flags)
+#define clear_ring_build_skb_enabled(ring) \
+       clear_bit(IGB_RING_FLAG_RX_BUILD_SKB_ENABLED, &(ring)->flags)
+
+static inline unsigned int igb_rx_bufsz(struct igb_ring *ring)
+{
+#if (PAGE_SIZE < 8192)
+       if (ring_uses_large_buffer(ring))
+               return IGB_RXBUFFER_3072;
+
+       if (ring_uses_build_skb(ring))
+               return IGB_MAX_FRAME_BUILD_SKB + IGB_TS_HDR_LEN;
+#endif
+       return IGB_RXBUFFER_2048;
+}
+
+static inline unsigned int igb_rx_pg_order(struct igb_ring *ring)
+{
+#if (PAGE_SIZE < 8192)
+       if (ring_uses_large_buffer(ring))
+               return 1;
+#endif
+       return 0;
+}
+
+#define igb_rx_pg_size(_ring) (PAGE_SIZE << igb_rx_pg_order(_ring))
+
 #define IGB_TXD_DCMD (E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS)
 
 #define IGB_RX_DESC(R, i)      \
@@ -545,6 +596,7 @@ struct igb_adapter {
 #define IGB_FLAG_HAS_MSIX              BIT(13)
 #define IGB_FLAG_EEE                   BIT(14)
 #define IGB_FLAG_VLAN_PROMISC          BIT(15)
+#define IGB_FLAG_RX_LEGACY             BIT(16)
 
 /* Media Auto Sense */
 #define IGB_MAS_ENABLE_0               0X0001
@@ -558,7 +610,6 @@ struct igb_adapter {
 #define IGB_DMCTLX_DCFLUSH_DIS 0x80000000  /* Disable DMA Coal Flush */
 
 #define IGB_82576_TSYNC_SHIFT  19
-#define IGB_TS_HDR_LEN         16
 enum e1000_state_t {
        __IGB_TESTING,
        __IGB_RESETTING,
@@ -591,7 +642,6 @@ void igb_configure_rx_ring(struct igb_adapter *, struct igb_ring *);
 void igb_setup_tctl(struct igb_adapter *);
 void igb_setup_rctl(struct igb_adapter *);
 netdev_tx_t igb_xmit_frame_ring(struct sk_buff *, struct igb_ring *);
-void igb_unmap_and_free_tx_resource(struct igb_ring *, struct igb_tx_buffer *);
 void igb_alloc_rx_buffers(struct igb_ring *, u16);
 void igb_update_stats(struct igb_adapter *, struct rtnl_link_stats64 *);
 bool igb_has_link(struct igb_adapter *adapter);
@@ -604,7 +654,7 @@ void igb_ptp_reset(struct igb_adapter *adapter);
 void igb_ptp_suspend(struct igb_adapter *adapter);
 void igb_ptp_rx_hang(struct igb_adapter *adapter);
 void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb);
-void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, unsigned char *va,
+void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va,
                         struct sk_buff *skb);
 int igb_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr);
 int igb_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr);
index 737b664d004cbb372222f6f63acc5c9f38ad4c95..0efb62db6efdd0fa2bfa7d531343790a3156f2be 100644 (file)
@@ -144,7 +144,15 @@ static const char igb_gstrings_test[][ETH_GSTRING_LEN] = {
 };
 #define IGB_TEST_LEN (sizeof(igb_gstrings_test) / ETH_GSTRING_LEN)
 
-static int igb_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
+static const char igb_priv_flags_strings[][ETH_GSTRING_LEN] = {
+#define IGB_PRIV_FLAGS_LEGACY_RX       BIT(0)
+       "legacy-rx",
+};
+
+#define IGB_PRIV_FLAGS_STR_LEN ARRAY_SIZE(igb_priv_flags_strings)
+
+static int igb_get_link_ksettings(struct net_device *netdev,
+                                 struct ethtool_link_ksettings *cmd)
 {
        struct igb_adapter *adapter = netdev_priv(netdev);
        struct e1000_hw *hw = &adapter->hw;
@@ -152,76 +160,73 @@ static int igb_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
        struct e1000_sfp_flags *eth_flags = &dev_spec->eth_flags;
        u32 status;
        u32 speed;
+       u32 supported, advertising;
 
        status = rd32(E1000_STATUS);
        if (hw->phy.media_type == e1000_media_type_copper) {
 
-               ecmd->supported = (SUPPORTED_10baseT_Half |
-                                  SUPPORTED_10baseT_Full |
-                                  SUPPORTED_100baseT_Half |
-                                  SUPPORTED_100baseT_Full |
-                                  SUPPORTED_1000baseT_Full|
-                                  SUPPORTED_Autoneg |
-                                  SUPPORTED_TP |
-                                  SUPPORTED_Pause);
-               ecmd->advertising = ADVERTISED_TP;
+               supported = (SUPPORTED_10baseT_Half |
+                            SUPPORTED_10baseT_Full |
+                            SUPPORTED_100baseT_Half |
+                            SUPPORTED_100baseT_Full |
+                            SUPPORTED_1000baseT_Full|
+                            SUPPORTED_Autoneg |
+                            SUPPORTED_TP |
+                            SUPPORTED_Pause);
+               advertising = ADVERTISED_TP;
 
                if (hw->mac.autoneg == 1) {
-                       ecmd->advertising |= ADVERTISED_Autoneg;
+                       advertising |= ADVERTISED_Autoneg;
                        /* the e1000 autoneg seems to match ethtool nicely */
-                       ecmd->advertising |= hw->phy.autoneg_advertised;
+                       advertising |= hw->phy.autoneg_advertised;
                }
 
-               ecmd->port = PORT_TP;
-               ecmd->phy_address = hw->phy.addr;
-               ecmd->transceiver = XCVR_INTERNAL;
+               cmd->base.port = PORT_TP;
+               cmd->base.phy_address = hw->phy.addr;
        } else {
-               ecmd->supported = (SUPPORTED_FIBRE |
-                                  SUPPORTED_1000baseKX_Full |
-                                  SUPPORTED_Autoneg |
-                                  SUPPORTED_Pause);
-               ecmd->advertising = (ADVERTISED_FIBRE |
-                                    ADVERTISED_1000baseKX_Full);
+               supported = (SUPPORTED_FIBRE |
+                            SUPPORTED_1000baseKX_Full |
+                            SUPPORTED_Autoneg |
+                            SUPPORTED_Pause);
+               advertising = (ADVERTISED_FIBRE |
+                              ADVERTISED_1000baseKX_Full);
                if (hw->mac.type == e1000_i354) {
                        if ((hw->device_id ==
                             E1000_DEV_ID_I354_BACKPLANE_2_5GBPS) &&
                            !(status & E1000_STATUS_2P5_SKU_OVER)) {
-                               ecmd->supported |= SUPPORTED_2500baseX_Full;
-                               ecmd->supported &=
-                                       ~SUPPORTED_1000baseKX_Full;
-                               ecmd->advertising |= ADVERTISED_2500baseX_Full;
-                               ecmd->advertising &=
-                                       ~ADVERTISED_1000baseKX_Full;
+                               supported |= SUPPORTED_2500baseX_Full;
+                               supported &= ~SUPPORTED_1000baseKX_Full;
+                               advertising |= ADVERTISED_2500baseX_Full;
+                               advertising &= ~ADVERTISED_1000baseKX_Full;
                        }
                }
                if (eth_flags->e100_base_fx) {
-                       ecmd->supported |= SUPPORTED_100baseT_Full;
-                       ecmd->advertising |= ADVERTISED_100baseT_Full;
+                       supported |= SUPPORTED_100baseT_Full;
+                       advertising |= ADVERTISED_100baseT_Full;
                }
                if (hw->mac.autoneg == 1)
-                       ecmd->advertising |= ADVERTISED_Autoneg;
+                       advertising |= ADVERTISED_Autoneg;
 
-               ecmd->port = PORT_FIBRE;
-               ecmd->transceiver = XCVR_EXTERNAL;
+               cmd->base.port = PORT_FIBRE;
        }
        if (hw->mac.autoneg != 1)
-               ecmd->advertising &= ~(ADVERTISED_Pause |
-                                      ADVERTISED_Asym_Pause);
+               advertising &= ~(ADVERTISED_Pause |
+                                ADVERTISED_Asym_Pause);
 
        switch (hw->fc.requested_mode) {
        case e1000_fc_full:
-               ecmd->advertising |= ADVERTISED_Pause;
+               advertising |= ADVERTISED_Pause;
                break;
        case e1000_fc_rx_pause:
-               ecmd->advertising |= (ADVERTISED_Pause |
-                                     ADVERTISED_Asym_Pause);
+               advertising |= (ADVERTISED_Pause |
+                               ADVERTISED_Asym_Pause);
                break;
        case e1000_fc_tx_pause:
-               ecmd->advertising |=  ADVERTISED_Asym_Pause;
+               advertising |=  ADVERTISED_Asym_Pause;
                break;
        default:
-               ecmd->advertising &= ~(ADVERTISED_Pause |
-                                      ADVERTISED_Asym_Pause);
+               advertising &= ~(ADVERTISED_Pause |
+                                ADVERTISED_Asym_Pause);
        }
        if (status & E1000_STATUS_LU) {
                if ((status & E1000_STATUS_2P5_SKU) &&
@@ -236,39 +241,46 @@ static int igb_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
                }
                if ((status & E1000_STATUS_FD) ||
                    hw->phy.media_type != e1000_media_type_copper)
-                       ecmd->duplex = DUPLEX_FULL;
+                       cmd->base.duplex = DUPLEX_FULL;
                else
-                       ecmd->duplex = DUPLEX_HALF;
+                       cmd->base.duplex = DUPLEX_HALF;
        } else {
                speed = SPEED_UNKNOWN;
-               ecmd->duplex = DUPLEX_UNKNOWN;
+               cmd->base.duplex = DUPLEX_UNKNOWN;
        }
-       ethtool_cmd_speed_set(ecmd, speed);
+       cmd->base.speed = speed;
        if ((hw->phy.media_type == e1000_media_type_fiber) ||
            hw->mac.autoneg)
-               ecmd->autoneg = AUTONEG_ENABLE;
+               cmd->base.autoneg = AUTONEG_ENABLE;
        else
-               ecmd->autoneg = AUTONEG_DISABLE;
+               cmd->base.autoneg = AUTONEG_DISABLE;
 
        /* MDI-X => 2; MDI =>1; Invalid =>0 */
        if (hw->phy.media_type == e1000_media_type_copper)
-               ecmd->eth_tp_mdix = hw->phy.is_mdix ? ETH_TP_MDI_X :
+               cmd->base.eth_tp_mdix = hw->phy.is_mdix ? ETH_TP_MDI_X :
                                                      ETH_TP_MDI;
        else
-               ecmd->eth_tp_mdix = ETH_TP_MDI_INVALID;
+               cmd->base.eth_tp_mdix = ETH_TP_MDI_INVALID;
 
        if (hw->phy.mdix == AUTO_ALL_MODES)
-               ecmd->eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO;
+               cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO;
        else
-               ecmd->eth_tp_mdix_ctrl = hw->phy.mdix;
+               cmd->base.eth_tp_mdix_ctrl = hw->phy.mdix;
+
+       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+                                               supported);
+       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+                                               advertising);
 
        return 0;
 }
 
-static int igb_set_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
+static int igb_set_link_ksettings(struct net_device *netdev,
+                                 const struct ethtool_link_ksettings *cmd)
 {
        struct igb_adapter *adapter = netdev_priv(netdev);
        struct e1000_hw *hw = &adapter->hw;
+       u32 advertising;
 
        /* When SoL/IDER sessions are active, autoneg/speed/duplex
         * cannot be changed
@@ -283,12 +295,12 @@ static int igb_set_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
         * some hardware doesn't allow MDI setting when speed or
         * duplex is forced.
         */
-       if (ecmd->eth_tp_mdix_ctrl) {
+       if (cmd->base.eth_tp_mdix_ctrl) {
                if (hw->phy.media_type != e1000_media_type_copper)
                        return -EOPNOTSUPP;
 
-               if ((ecmd->eth_tp_mdix_ctrl != ETH_TP_MDI_AUTO) &&
-                   (ecmd->autoneg != AUTONEG_ENABLE)) {
+               if ((cmd->base.eth_tp_mdix_ctrl != ETH_TP_MDI_AUTO) &&
+                   (cmd->base.autoneg != AUTONEG_ENABLE)) {
                        dev_err(&adapter->pdev->dev, "forcing MDI/MDI-X state is not supported when link speed and/or duplex are forced\n");
                        return -EINVAL;
                }
@@ -297,10 +309,13 @@ static int igb_set_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
        while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
                usleep_range(1000, 2000);
 
-       if (ecmd->autoneg == AUTONEG_ENABLE) {
+       ethtool_convert_link_mode_to_legacy_u32(&advertising,
+                                               cmd->link_modes.advertising);
+
+       if (cmd->base.autoneg == AUTONEG_ENABLE) {
                hw->mac.autoneg = 1;
                if (hw->phy.media_type == e1000_media_type_fiber) {
-                       hw->phy.autoneg_advertised = ecmd->advertising |
+                       hw->phy.autoneg_advertised = advertising |
                                                     ADVERTISED_FIBRE |
                                                     ADVERTISED_Autoneg;
                        switch (adapter->link_speed) {
@@ -320,31 +335,31 @@ static int igb_set_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
                                break;
                        }
                } else {
-                       hw->phy.autoneg_advertised = ecmd->advertising |
+                       hw->phy.autoneg_advertised = advertising |
                                                     ADVERTISED_TP |
                                                     ADVERTISED_Autoneg;
                }
-               ecmd->advertising = hw->phy.autoneg_advertised;
+               advertising = hw->phy.autoneg_advertised;
                if (adapter->fc_autoneg)
                        hw->fc.requested_mode = e1000_fc_default;
        } else {
-               u32 speed = ethtool_cmd_speed(ecmd);
+               u32 speed = cmd->base.speed;
                /* calling this overrides forced MDI setting */
-               if (igb_set_spd_dplx(adapter, speed, ecmd->duplex)) {
+               if (igb_set_spd_dplx(adapter, speed, cmd->base.duplex)) {
                        clear_bit(__IGB_RESETTING, &adapter->state);
                        return -EINVAL;
                }
        }
 
        /* MDI-X => 2; MDI => 1; Auto => 3 */
-       if (ecmd->eth_tp_mdix_ctrl) {
+       if (cmd->base.eth_tp_mdix_ctrl) {
                /* fix up the value for auto (3 => 0) as zero is mapped
                 * internally to auto
                 */
-               if (ecmd->eth_tp_mdix_ctrl == ETH_TP_MDI_AUTO)
+               if (cmd->base.eth_tp_mdix_ctrl == ETH_TP_MDI_AUTO)
                        hw->phy.mdix = AUTO_ALL_MODES;
                else
-                       hw->phy.mdix = ecmd->eth_tp_mdix_ctrl;
+                       hw->phy.mdix = cmd->base.eth_tp_mdix_ctrl;
        }
 
        /* reset the link */
@@ -852,6 +867,8 @@ static void igb_get_drvinfo(struct net_device *netdev,
                sizeof(drvinfo->fw_version));
        strlcpy(drvinfo->bus_info, pci_name(adapter->pdev),
                sizeof(drvinfo->bus_info));
+
+       drvinfo->n_priv_flags = IGB_PRIV_FLAGS_STR_LEN;
 }
 
 static void igb_get_ringparam(struct net_device *netdev,
@@ -1811,14 +1828,14 @@ static int igb_clean_test_rings(struct igb_ring *rx_ring,
        tx_ntc = tx_ring->next_to_clean;
        rx_desc = IGB_RX_DESC(rx_ring, rx_ntc);
 
-       while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) {
+       while (rx_desc->wb.upper.length) {
                /* check Rx buffer */
                rx_buffer_info = &rx_ring->rx_buffer_info[rx_ntc];
 
                /* sync Rx buffer for CPU read */
                dma_sync_single_for_cpu(rx_ring->dev,
                                        rx_buffer_info->dma,
-                                       IGB_RX_BUFSZ,
+                                       size,
                                        DMA_FROM_DEVICE);
 
                /* verify contents of skb */
@@ -1828,12 +1845,21 @@ static int igb_clean_test_rings(struct igb_ring *rx_ring,
                /* sync Rx buffer for device write */
                dma_sync_single_for_device(rx_ring->dev,
                                           rx_buffer_info->dma,
-                                          IGB_RX_BUFSZ,
+                                          size,
                                           DMA_FROM_DEVICE);
 
                /* unmap buffer on Tx side */
                tx_buffer_info = &tx_ring->tx_buffer_info[tx_ntc];
-               igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
+
+               /* Free all the Tx ring sk_buffs */
+               dev_kfree_skb_any(tx_buffer_info->skb);
+
+               /* unmap skb header data */
+               dma_unmap_single(tx_ring->dev,
+                                dma_unmap_addr(tx_buffer_info, dma),
+                                dma_unmap_len(tx_buffer_info, len),
+                                DMA_TO_DEVICE);
+               dma_unmap_len_set(tx_buffer_info, len, 0);
 
                /* increment Rx/Tx next to clean counters */
                rx_ntc++;
@@ -2271,6 +2297,8 @@ static int igb_get_sset_count(struct net_device *netdev, int sset)
                return IGB_STATS_LEN;
        case ETH_SS_TEST:
                return IGB_TEST_LEN;
+       case ETH_SS_PRIV_FLAGS:
+               return IGB_PRIV_FLAGS_STR_LEN;
        default:
                return -ENOTSUPP;
        }
@@ -2376,6 +2404,10 @@ static void igb_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
                }
                /* BUG_ON(p - data != IGB_STATS_LEN * ETH_GSTRING_LEN); */
                break;
+       case ETH_SS_PRIV_FLAGS:
+               memcpy(data, igb_priv_flags_strings,
+                      IGB_PRIV_FLAGS_STR_LEN * ETH_GSTRING_LEN);
+               break;
        }
 }
 
@@ -3388,9 +3420,38 @@ static int igb_set_channels(struct net_device *netdev,
        return 0;
 }
 
+static u32 igb_get_priv_flags(struct net_device *netdev)
+{
+       struct igb_adapter *adapter = netdev_priv(netdev);
+       u32 priv_flags = 0;
+
+       if (adapter->flags & IGB_FLAG_RX_LEGACY)
+               priv_flags |= IGB_PRIV_FLAGS_LEGACY_RX;
+
+       return priv_flags;
+}
+
+static int igb_set_priv_flags(struct net_device *netdev, u32 priv_flags)
+{
+       struct igb_adapter *adapter = netdev_priv(netdev);
+       unsigned int flags = adapter->flags;
+
+       flags &= ~IGB_FLAG_RX_LEGACY;
+       if (priv_flags & IGB_PRIV_FLAGS_LEGACY_RX)
+               flags |= IGB_FLAG_RX_LEGACY;
+
+       if (flags != adapter->flags) {
+               adapter->flags = flags;
+
+               /* reset interface to repopulate queues */
+               if (netif_running(netdev))
+                       igb_reinit_locked(adapter);
+       }
+
+       return 0;
+}
+
 static const struct ethtool_ops igb_ethtool_ops = {
-       .get_settings           = igb_get_settings,
-       .set_settings           = igb_set_settings,
        .get_drvinfo            = igb_get_drvinfo,
        .get_regs_len           = igb_get_regs_len,
        .get_regs               = igb_get_regs,
@@ -3426,8 +3487,12 @@ static const struct ethtool_ops igb_ethtool_ops = {
        .set_rxfh               = igb_set_rxfh,
        .get_channels           = igb_get_channels,
        .set_channels           = igb_set_channels,
+       .get_priv_flags         = igb_get_priv_flags,
+       .set_priv_flags         = igb_set_priv_flags,
        .begin                  = igb_ethtool_begin,
        .complete               = igb_ethtool_complete,
+       .get_link_ksettings     = igb_get_link_ksettings,
+       .set_link_ksettings     = igb_set_link_ksettings,
 };
 
 void igb_set_ethtool_ops(struct net_device *netdev)
index be456bae816906e24338006a8b3597b539f86959..26a821fcd22012884843fbe3d81357cc4bcff985 100644 (file)
@@ -554,7 +554,7 @@ rx_ring_summary:
                                          16, 1,
                                          page_address(buffer_info->page) +
                                                      buffer_info->page_offset,
-                                         IGB_RX_BUFSZ, true);
+                                         igb_rx_bufsz(rx_ring), true);
                                }
                        }
                }
@@ -3293,7 +3293,7 @@ int igb_setup_tx_resources(struct igb_ring *tx_ring)
 
        size = sizeof(struct igb_tx_buffer) * tx_ring->count;
 
-       tx_ring->tx_buffer_info = vzalloc(size);
+       tx_ring->tx_buffer_info = vmalloc(size);
        if (!tx_ring->tx_buffer_info)
                goto err;
 
@@ -3404,6 +3404,10 @@ void igb_configure_tx_ring(struct igb_adapter *adapter,
        txdctl |= IGB_TX_HTHRESH << 8;
        txdctl |= IGB_TX_WTHRESH << 16;
 
+       /* reinitialize tx_buffer_info */
+       memset(ring->tx_buffer_info, 0,
+              sizeof(struct igb_tx_buffer) * ring->count);
+
        txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
        wr32(E1000_TXDCTL(reg_idx), txdctl);
 }
@@ -3435,7 +3439,7 @@ int igb_setup_rx_resources(struct igb_ring *rx_ring)
 
        size = sizeof(struct igb_rx_buffer) * rx_ring->count;
 
-       rx_ring->rx_buffer_info = vzalloc(size);
+       rx_ring->rx_buffer_info = vmalloc(size);
        if (!rx_ring->rx_buffer_info)
                goto err;
 
@@ -3720,6 +3724,7 @@ void igb_configure_rx_ring(struct igb_adapter *adapter,
                           struct igb_ring *ring)
 {
        struct e1000_hw *hw = &adapter->hw;
+       union e1000_adv_rx_desc *rx_desc;
        u64 rdba = ring->dma;
        int reg_idx = ring->reg_idx;
        u32 srrctl = 0, rxdctl = 0;
@@ -3741,7 +3746,10 @@ void igb_configure_rx_ring(struct igb_adapter *adapter,
 
        /* set descriptor configuration */
        srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
-       srrctl |= IGB_RX_BUFSZ >> E1000_SRRCTL_BSIZEPKT_SHIFT;
+       if (ring_uses_large_buffer(ring))
+               srrctl |= IGB_RXBUFFER_3072 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
+       else
+               srrctl |= IGB_RXBUFFER_2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
        srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
        if (hw->mac.type >= e1000_82580)
                srrctl |= E1000_SRRCTL_TIMESTAMP;
@@ -3758,11 +3766,39 @@ void igb_configure_rx_ring(struct igb_adapter *adapter,
        rxdctl |= IGB_RX_HTHRESH << 8;
        rxdctl |= IGB_RX_WTHRESH << 16;
 
+       /* initialize rx_buffer_info */
+       memset(ring->rx_buffer_info, 0,
+              sizeof(struct igb_rx_buffer) * ring->count);
+
+       /* initialize Rx descriptor 0 */
+       rx_desc = IGB_RX_DESC(ring, 0);
+       rx_desc->wb.upper.length = 0;
+
        /* enable receive descriptor fetching */
        rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
        wr32(E1000_RXDCTL(reg_idx), rxdctl);
 }
 
+static void igb_set_rx_buffer_len(struct igb_adapter *adapter,
+                                 struct igb_ring *rx_ring)
+{
+       /* set build_skb and buffer size flags */
+       clear_ring_build_skb_enabled(rx_ring);
+       clear_ring_uses_large_buffer(rx_ring);
+
+       if (adapter->flags & IGB_FLAG_RX_LEGACY)
+               return;
+
+       set_ring_build_skb_enabled(rx_ring);
+
+#if (PAGE_SIZE < 8192)
+       if (adapter->max_frame_size <= IGB_MAX_FRAME_BUILD_SKB)
+               return;
+
+       set_ring_uses_large_buffer(rx_ring);
+#endif
+}
+
 /**
  *  igb_configure_rx - Configure receive Unit after Reset
  *  @adapter: board private structure
@@ -3780,8 +3816,12 @@ static void igb_configure_rx(struct igb_adapter *adapter)
        /* Setup the HW Rx Head and Tail Descriptor Pointers and
         * the Base and Length of the Rx Descriptor Ring
         */
-       for (i = 0; i < adapter->num_rx_queues; i++)
-               igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
+       for (i = 0; i < adapter->num_rx_queues; i++) {
+               struct igb_ring *rx_ring = adapter->rx_ring[i];
+
+               igb_set_rx_buffer_len(adapter, rx_ring);
+               igb_configure_rx_ring(adapter, rx_ring);
+       }
 }
 
 /**
@@ -3822,55 +3862,63 @@ static void igb_free_all_tx_resources(struct igb_adapter *adapter)
                        igb_free_tx_resources(adapter->tx_ring[i]);
 }
 
-void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
-                                   struct igb_tx_buffer *tx_buffer)
-{
-       if (tx_buffer->skb) {
-               dev_kfree_skb_any(tx_buffer->skb);
-               if (dma_unmap_len(tx_buffer, len))
-                       dma_unmap_single(ring->dev,
-                                        dma_unmap_addr(tx_buffer, dma),
-                                        dma_unmap_len(tx_buffer, len),
-                                        DMA_TO_DEVICE);
-       } else if (dma_unmap_len(tx_buffer, len)) {
-               dma_unmap_page(ring->dev,
-                              dma_unmap_addr(tx_buffer, dma),
-                              dma_unmap_len(tx_buffer, len),
-                              DMA_TO_DEVICE);
-       }
-       tx_buffer->next_to_watch = NULL;
-       tx_buffer->skb = NULL;
-       dma_unmap_len_set(tx_buffer, len, 0);
-       /* buffer_info must be completely set up in the transmit path */
-}
-
 /**
  *  igb_clean_tx_ring - Free Tx Buffers
  *  @tx_ring: ring to be cleaned
  **/
 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
 {
-       struct igb_tx_buffer *buffer_info;
-       unsigned long size;
-       u16 i;
+       u16 i = tx_ring->next_to_clean;
+       struct igb_tx_buffer *tx_buffer = &tx_ring->tx_buffer_info[i];
 
-       if (!tx_ring->tx_buffer_info)
-               return;
-       /* Free all the Tx ring sk_buffs */
+       while (i != tx_ring->next_to_use) {
+               union e1000_adv_tx_desc *eop_desc, *tx_desc;
 
-       for (i = 0; i < tx_ring->count; i++) {
-               buffer_info = &tx_ring->tx_buffer_info[i];
-               igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
-       }
+               /* Free all the Tx ring sk_buffs */
+               dev_kfree_skb_any(tx_buffer->skb);
 
-       netdev_tx_reset_queue(txring_txq(tx_ring));
+               /* unmap skb header data */
+               dma_unmap_single(tx_ring->dev,
+                                dma_unmap_addr(tx_buffer, dma),
+                                dma_unmap_len(tx_buffer, len),
+                                DMA_TO_DEVICE);
 
-       size = sizeof(struct igb_tx_buffer) * tx_ring->count;
-       memset(tx_ring->tx_buffer_info, 0, size);
+               /* check for eop_desc to determine the end of the packet */
+               eop_desc = tx_buffer->next_to_watch;
+               tx_desc = IGB_TX_DESC(tx_ring, i);
+
+               /* unmap remaining buffers */
+               while (tx_desc != eop_desc) {
+                       tx_buffer++;
+                       tx_desc++;
+                       i++;
+                       if (unlikely(i == tx_ring->count)) {
+                               i = 0;
+                               tx_buffer = tx_ring->tx_buffer_info;
+                               tx_desc = IGB_TX_DESC(tx_ring, 0);
+                       }
+
+                       /* unmap any remaining paged data */
+                       if (dma_unmap_len(tx_buffer, len))
+                               dma_unmap_page(tx_ring->dev,
+                                              dma_unmap_addr(tx_buffer, dma),
+                                              dma_unmap_len(tx_buffer, len),
+                                              DMA_TO_DEVICE);
+               }
 
-       /* Zero out the descriptor ring */
-       memset(tx_ring->desc, 0, tx_ring->size);
+               /* move us one more past the eop_desc for start of next pkt */
+               tx_buffer++;
+               i++;
+               if (unlikely(i == tx_ring->count)) {
+                       i = 0;
+                       tx_buffer = tx_ring->tx_buffer_info;
+               }
+       }
 
+       /* reset BQL for queue */
+       netdev_tx_reset_queue(txring_txq(tx_ring));
+
+       /* reset next_to_use and next_to_clean */
        tx_ring->next_to_use = 0;
        tx_ring->next_to_clean = 0;
 }
@@ -3932,50 +3980,39 @@ static void igb_free_all_rx_resources(struct igb_adapter *adapter)
  **/
 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
 {
-       unsigned long size;
-       u16 i;
+       u16 i = rx_ring->next_to_clean;
 
        if (rx_ring->skb)
                dev_kfree_skb(rx_ring->skb);
        rx_ring->skb = NULL;
 
-       if (!rx_ring->rx_buffer_info)
-               return;
-
        /* Free all the Rx ring sk_buffs */
-       for (i = 0; i < rx_ring->count; i++) {
+       while (i != rx_ring->next_to_alloc) {
                struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
 
-               if (!buffer_info->page)
-                       continue;
-
                /* Invalidate cache lines that may have been written to by
                 * device so that we avoid corrupting memory.
                 */
                dma_sync_single_range_for_cpu(rx_ring->dev,
                                              buffer_info->dma,
                                              buffer_info->page_offset,
-                                             IGB_RX_BUFSZ,
+                                             igb_rx_bufsz(rx_ring),
                                              DMA_FROM_DEVICE);
 
                /* free resources associated with mapping */
                dma_unmap_page_attrs(rx_ring->dev,
                                     buffer_info->dma,
-                                    PAGE_SIZE,
+                                    igb_rx_pg_size(rx_ring),
                                     DMA_FROM_DEVICE,
-                                    DMA_ATTR_SKIP_CPU_SYNC);
+                                    IGB_RX_DMA_ATTR);
                __page_frag_cache_drain(buffer_info->page,
                                        buffer_info->pagecnt_bias);
 
-               buffer_info->page = NULL;
+               i++;
+               if (i == rx_ring->count)
+                       i = 0;
        }
 
-       size = sizeof(struct igb_rx_buffer) * rx_ring->count;
-       memset(rx_ring->rx_buffer_info, 0, size);
-
-       /* Zero out the descriptor ring */
-       memset(rx_ring->desc, 0, rx_ring->size);
-
        rx_ring->next_to_alloc = 0;
        rx_ring->next_to_clean = 0;
        rx_ring->next_to_use = 0;
@@ -4240,7 +4277,7 @@ static void igb_set_rx_mode(struct net_device *netdev)
        struct igb_adapter *adapter = netdev_priv(netdev);
        struct e1000_hw *hw = &adapter->hw;
        unsigned int vfn = adapter->vfs_allocated_count;
-       u32 rctl = 0, vmolr = 0;
+       u32 rctl = 0, vmolr = 0, rlpml = MAX_JUMBO_FRAME_SIZE;
        int count;
 
        /* Check for Promiscuous and All Multicast modes */
@@ -4298,6 +4335,14 @@ static void igb_set_rx_mode(struct net_device *netdev)
                                     E1000_RCTL_VFE);
        wr32(E1000_RCTL, rctl);
 
+#if (PAGE_SIZE < 8192)
+       if (!adapter->vfs_allocated_count) {
+               if (adapter->max_frame_size <= IGB_MAX_FRAME_BUILD_SKB)
+                       rlpml = IGB_MAX_FRAME_BUILD_SKB;
+       }
+#endif
+       wr32(E1000_RLPML, rlpml);
+
        /* In order to support SR-IOV and eventually VMDq it is necessary to set
         * the VMOLR to enable the appropriate modes.  Without this workaround
         * we will have issues with VLAN tag stripping not being done for frames
@@ -4312,12 +4357,17 @@ static void igb_set_rx_mode(struct net_device *netdev)
        vmolr |= rd32(E1000_VMOLR(vfn)) &
                 ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
 
-       /* enable Rx jumbo frames, no need for restriction */
+       /* enable Rx jumbo frames, restrict as needed to support build_skb */
        vmolr &= ~E1000_VMOLR_RLPML_MASK;
-       vmolr |= MAX_JUMBO_FRAME_SIZE | E1000_VMOLR_LPE;
+#if (PAGE_SIZE < 8192)
+       if (adapter->max_frame_size <= IGB_MAX_FRAME_BUILD_SKB)
+               vmolr |= IGB_MAX_FRAME_BUILD_SKB;
+       else
+#endif
+               vmolr |= MAX_JUMBO_FRAME_SIZE;
+       vmolr |= E1000_VMOLR_LPE;
 
        wr32(E1000_VMOLR(vfn), vmolr);
-       wr32(E1000_RLPML, MAX_JUMBO_FRAME_SIZE);
 
        igb_restore_vf_multicasts(adapter);
 }
@@ -5256,18 +5306,32 @@ static void igb_tx_map(struct igb_ring *tx_ring,
 
 dma_error:
        dev_err(tx_ring->dev, "TX DMA map failed\n");
+       tx_buffer = &tx_ring->tx_buffer_info[i];
 
        /* clear dma mappings for failed tx_buffer_info map */
-       for (;;) {
+       while (tx_buffer != first) {
+               if (dma_unmap_len(tx_buffer, len))
+                       dma_unmap_page(tx_ring->dev,
+                                      dma_unmap_addr(tx_buffer, dma),
+                                      dma_unmap_len(tx_buffer, len),
+                                      DMA_TO_DEVICE);
+               dma_unmap_len_set(tx_buffer, len, 0);
+
+               if (i--)
+                       i += tx_ring->count;
                tx_buffer = &tx_ring->tx_buffer_info[i];
-               igb_unmap_and_free_tx_resource(tx_ring, tx_buffer);
-               if (tx_buffer == first)
-                       break;
-               if (i == 0)
-                       i = tx_ring->count;
-               i--;
        }
 
+       if (dma_unmap_len(tx_buffer, len))
+               dma_unmap_single(tx_ring->dev,
+                                dma_unmap_addr(tx_buffer, dma),
+                                dma_unmap_len(tx_buffer, len),
+                                DMA_TO_DEVICE);
+       dma_unmap_len_set(tx_buffer, len, 0);
+
+       dev_kfree_skb_any(tx_buffer->skb);
+       tx_buffer->skb = NULL;
+
        tx_ring->next_to_use = i;
 }
 
@@ -5339,7 +5403,8 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
        return NETDEV_TX_OK;
 
 out_drop:
-       igb_unmap_and_free_tx_resource(tx_ring, first);
+       dev_kfree_skb_any(first->skb);
+       first->skb = NULL;
 
        return NETDEV_TX_OK;
 }
@@ -6686,7 +6751,6 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector, int napi_budget)
                                 DMA_TO_DEVICE);
 
                /* clear tx_buffer data */
-               tx_buffer->skb = NULL;
                dma_unmap_len_set(tx_buffer, len, 0);
 
                /* clear last DMA location and unmap remaining buffers */
@@ -6822,8 +6886,14 @@ static void igb_reuse_rx_page(struct igb_ring *rx_ring,
        nta++;
        rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
 
-       /* transfer page from old buffer to new buffer */
-       *new_buff = *old_buff;
+       /* Transfer page from old buffer to new buffer.
+        * Move each member individually to avoid possible store
+        * forwarding stalls.
+        */
+       new_buff->dma           = old_buff->dma;
+       new_buff->page          = old_buff->page;
+       new_buff->page_offset   = old_buff->page_offset;
+       new_buff->pagecnt_bias  = old_buff->pagecnt_bias;
 }
 
 static inline bool igb_page_is_reserved(struct page *page)
@@ -6831,11 +6901,10 @@ static inline bool igb_page_is_reserved(struct page *page)
        return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page);
 }
 
-static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer,
-                                 struct page *page,
-                                 unsigned int truesize)
+static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer)
 {
-       unsigned int pagecnt_bias = rx_buffer->pagecnt_bias--;
+       unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
+       struct page *page = rx_buffer->page;
 
        /* avoid re-using remote pages */
        if (unlikely(igb_page_is_reserved(page)))
@@ -6843,16 +6912,13 @@ static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer,
 
 #if (PAGE_SIZE < 8192)
        /* if we are only owner of page we can reuse it */
-       if (unlikely(page_ref_count(page) != pagecnt_bias))
+       if (unlikely((page_ref_count(page) - pagecnt_bias) > 1))
                return false;
-
-       /* flip page offset to other buffer */
-       rx_buffer->page_offset ^= IGB_RX_BUFSZ;
 #else
-       /* move offset up to the next cache line */
-       rx_buffer->page_offset += truesize;
+#define IGB_LAST_OFFSET \
+       (SKB_WITH_OVERHEAD(PAGE_SIZE) - IGB_RXBUFFER_2048)
 
-       if (rx_buffer->page_offset > (PAGE_SIZE - IGB_RX_BUFSZ))
+       if (rx_buffer->page_offset > IGB_LAST_OFFSET)
                return false;
 #endif
 
@@ -6860,7 +6926,7 @@ static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer,
         * the pagecnt_bias and page count so that we fully restock the
         * number of references the driver holds.
         */
-       if (unlikely(pagecnt_bias == 1)) {
+       if (unlikely(!pagecnt_bias)) {
                page_ref_add(page, USHRT_MAX);
                rx_buffer->pagecnt_bias = USHRT_MAX;
        }
@@ -6872,34 +6938,56 @@ static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer,
  *  igb_add_rx_frag - Add contents of Rx buffer to sk_buff
  *  @rx_ring: rx descriptor ring to transact packets on
  *  @rx_buffer: buffer containing page to add
- *  @rx_desc: descriptor containing length of buffer written by hardware
  *  @skb: sk_buff to place the data into
+ *  @size: size of buffer to be added
  *
  *  This function will add the data contained in rx_buffer->page to the skb.
- *  This is done either through a direct copy if the data in the buffer is
- *  less than the skb header size, otherwise it will just attach the page as
- *  a frag to the skb.
- *
- *  The function will then update the page offset if necessary and return
- *  true if the buffer can be reused by the adapter.
  **/
-static bool igb_add_rx_frag(struct igb_ring *rx_ring,
+static void igb_add_rx_frag(struct igb_ring *rx_ring,
                            struct igb_rx_buffer *rx_buffer,
-                           unsigned int size,
-                           union e1000_adv_rx_desc *rx_desc,
-                           struct sk_buff *skb)
+                           struct sk_buff *skb,
+                           unsigned int size)
 {
-       struct page *page = rx_buffer->page;
-       unsigned char *va = page_address(page) + rx_buffer->page_offset;
 #if (PAGE_SIZE < 8192)
-       unsigned int truesize = IGB_RX_BUFSZ;
+       unsigned int truesize = igb_rx_pg_size(rx_ring) / 2;
+#else
+       unsigned int truesize = ring_uses_build_skb(rx_ring) ?
+                               SKB_DATA_ALIGN(IGB_SKB_PAD + size) :
+                               SKB_DATA_ALIGN(size);
+#endif
+       skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
+                       rx_buffer->page_offset, size, truesize);
+#if (PAGE_SIZE < 8192)
+       rx_buffer->page_offset ^= truesize;
+#else
+       rx_buffer->page_offset += truesize;
+#endif
+}
+
+static struct sk_buff *igb_construct_skb(struct igb_ring *rx_ring,
+                                        struct igb_rx_buffer *rx_buffer,
+                                        union e1000_adv_rx_desc *rx_desc,
+                                        unsigned int size)
+{
+       void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
+#if (PAGE_SIZE < 8192)
+       unsigned int truesize = igb_rx_pg_size(rx_ring) / 2;
 #else
        unsigned int truesize = SKB_DATA_ALIGN(size);
 #endif
-       unsigned int pull_len;
+       unsigned int headlen;
+       struct sk_buff *skb;
 
-       if (unlikely(skb_is_nonlinear(skb)))
-               goto add_tail_frag;
+       /* prefetch first cache line of first page */
+       prefetch(va);
+#if L1_CACHE_BYTES < 128
+       prefetch(va + L1_CACHE_BYTES);
+#endif
+
+       /* allocate a skb to store the frags */
+       skb = napi_alloc_skb(&rx_ring->q_vector->napi, IGB_RX_HDR_LEN);
+       if (unlikely(!skb))
+               return NULL;
 
        if (unlikely(igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP))) {
                igb_ptp_rx_pktstamp(rx_ring->q_vector, va, skb);
@@ -6907,95 +6995,73 @@ static bool igb_add_rx_frag(struct igb_ring *rx_ring,
                size -= IGB_TS_HDR_LEN;
        }
 
-       if (likely(size <= IGB_RX_HDR_LEN)) {
-               memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long)));
-
-               /* page is not reserved, we can reuse buffer as-is */
-               if (likely(!igb_page_is_reserved(page)))
-                       return true;
-
-               /* this page cannot be reused so discard it */
-               return false;
-       }
-
-       /* we need the header to contain the greater of either ETH_HLEN or
-        * 60 bytes if the skb->len is less than 60 for skb_pad.
-        */
-       pull_len = eth_get_headlen(va, IGB_RX_HDR_LEN);
+       /* Determine available headroom for copy */
+       headlen = size;
+       if (headlen > IGB_RX_HDR_LEN)
+               headlen = eth_get_headlen(va, IGB_RX_HDR_LEN);
 
        /* align pull length to size of long to optimize memcpy performance */
-       memcpy(__skb_put(skb, pull_len), va, ALIGN(pull_len, sizeof(long)));
+       memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long)));
 
        /* update all of the pointers */
-       va += pull_len;
-       size -= pull_len;
-
-add_tail_frag:
-       skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
-                       (unsigned long)va & ~PAGE_MASK, size, truesize);
+       size -= headlen;
+       if (size) {
+               skb_add_rx_frag(skb, 0, rx_buffer->page,
+                               (va + headlen) - page_address(rx_buffer->page),
+                               size, truesize);
+#if (PAGE_SIZE < 8192)
+               rx_buffer->page_offset ^= truesize;
+#else
+               rx_buffer->page_offset += truesize;
+#endif
+       } else {
+               rx_buffer->pagecnt_bias++;
+       }
 
-       return igb_can_reuse_rx_page(rx_buffer, page, truesize);
+       return skb;
 }
 
-static struct sk_buff *igb_fetch_rx_buffer(struct igb_ring *rx_ring,
-                                          union e1000_adv_rx_desc *rx_desc,
-                                          struct sk_buff *skb)
+static struct sk_buff *igb_build_skb(struct igb_ring *rx_ring,
+                                    struct igb_rx_buffer *rx_buffer,
+                                    union e1000_adv_rx_desc *rx_desc,
+                                    unsigned int size)
 {
-       unsigned int size = le16_to_cpu(rx_desc->wb.upper.length);
-       struct igb_rx_buffer *rx_buffer;
-       struct page *page;
-
-       rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
-       page = rx_buffer->page;
-       prefetchw(page);
-
-       /* we are reusing so sync this buffer for CPU use */
-       dma_sync_single_range_for_cpu(rx_ring->dev,
-                                     rx_buffer->dma,
-                                     rx_buffer->page_offset,
-                                     size,
-                                     DMA_FROM_DEVICE);
-
-       if (likely(!skb)) {
-               void *page_addr = page_address(page) +
-                                 rx_buffer->page_offset;
+       void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
+#if (PAGE_SIZE < 8192)
+       unsigned int truesize = igb_rx_pg_size(rx_ring) / 2;
+#else
+       unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
+                               SKB_DATA_ALIGN(IGB_SKB_PAD + size);
+#endif
+       struct sk_buff *skb;
 
-               /* prefetch first cache line of first page */
-               prefetch(page_addr);
+       /* prefetch first cache line of first page */
+       prefetch(va);
 #if L1_CACHE_BYTES < 128
-               prefetch(page_addr + L1_CACHE_BYTES);
+       prefetch(va + L1_CACHE_BYTES);
 #endif
 
-               /* allocate a skb to store the frags */
-               skb = napi_alloc_skb(&rx_ring->q_vector->napi, IGB_RX_HDR_LEN);
-               if (unlikely(!skb)) {
-                       rx_ring->rx_stats.alloc_failed++;
-                       return NULL;
-               }
+       /* build an skb around the page buffer */
+       skb = build_skb(va - IGB_SKB_PAD, truesize);
+       if (unlikely(!skb))
+               return NULL;
 
-               /* we will be copying header into skb->data in
-                * pskb_may_pull so it is in our interest to prefetch
-                * it now to avoid a possible cache miss
-                */
-               prefetchw(skb->data);
-       }
+       /* update pointers within the skb to store the data */
+       skb_reserve(skb, IGB_SKB_PAD);
+       __skb_put(skb, size);
 
-       /* pull page into skb */
-       if (igb_add_rx_frag(rx_ring, rx_buffer, size, rx_desc, skb)) {
-               /* hand second half of page back to the ring */
-               igb_reuse_rx_page(rx_ring, rx_buffer);
-       } else {
-               /* We are not reusing the buffer so unmap it and free
-                * any references we are holding to it
-                */
-               dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
-                                    PAGE_SIZE, DMA_FROM_DEVICE,
-                                    DMA_ATTR_SKIP_CPU_SYNC);
-               __page_frag_cache_drain(page, rx_buffer->pagecnt_bias);
+       /* pull timestamp out of packet data */
+       if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
+               igb_ptp_rx_pktstamp(rx_ring->q_vector, skb->data, skb);
+               __skb_pull(skb, IGB_TS_HDR_LEN);
        }
 
-       /* clear contents of rx_buffer */
-       rx_buffer->page = NULL;
+       /* update buffer offset */
+#if (PAGE_SIZE < 8192)
+       rx_buffer->page_offset ^= truesize;
+#else
+       rx_buffer->page_offset += truesize;
+#endif
 
        return skb;
 }
@@ -7154,6 +7220,47 @@ static void igb_process_skb_fields(struct igb_ring *rx_ring,
        skb->protocol = eth_type_trans(skb, rx_ring->netdev);
 }
 
+static struct igb_rx_buffer *igb_get_rx_buffer(struct igb_ring *rx_ring,
+                                              const unsigned int size)
+{
+       struct igb_rx_buffer *rx_buffer;
+
+       rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
+       prefetchw(rx_buffer->page);
+
+       /* we are reusing so sync this buffer for CPU use */
+       dma_sync_single_range_for_cpu(rx_ring->dev,
+                                     rx_buffer->dma,
+                                     rx_buffer->page_offset,
+                                     size,
+                                     DMA_FROM_DEVICE);
+
+       rx_buffer->pagecnt_bias--;
+
+       return rx_buffer;
+}
+
+static void igb_put_rx_buffer(struct igb_ring *rx_ring,
+                             struct igb_rx_buffer *rx_buffer)
+{
+       if (igb_can_reuse_rx_page(rx_buffer)) {
+               /* hand second half of page back to the ring */
+               igb_reuse_rx_page(rx_ring, rx_buffer);
+       } else {
+               /* We are not reusing the buffer so unmap it and free
+                * any references we are holding to it
+                */
+               dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
+                                    igb_rx_pg_size(rx_ring), DMA_FROM_DEVICE,
+                                    IGB_RX_DMA_ATTR);
+               __page_frag_cache_drain(rx_buffer->page,
+                                       rx_buffer->pagecnt_bias);
+       }
+
+       /* clear contents of rx_buffer */
+       rx_buffer->page = NULL;
+}
+
 static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
 {
        struct igb_ring *rx_ring = q_vector->rx.ring;
@@ -7163,6 +7270,8 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
 
        while (likely(total_packets < budget)) {
                union e1000_adv_rx_desc *rx_desc;
+               struct igb_rx_buffer *rx_buffer;
+               unsigned int size;
 
                /* return some buffers to hardware, one at a time is too slow */
                if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
@@ -7171,8 +7280,8 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
                }
 
                rx_desc = IGB_RX_DESC(rx_ring, rx_ring->next_to_clean);
-
-               if (!rx_desc->wb.upper.status_error)
+               size = le16_to_cpu(rx_desc->wb.upper.length);
+               if (!size)
                        break;
 
                /* This memory barrier is needed to keep us from reading
@@ -7181,13 +7290,25 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
                 */
                dma_rmb();
 
+               rx_buffer = igb_get_rx_buffer(rx_ring, size);
+
                /* retrieve a buffer from the ring */
-               skb = igb_fetch_rx_buffer(rx_ring, rx_desc, skb);
+               if (skb)
+                       igb_add_rx_frag(rx_ring, rx_buffer, skb, size);
+               else if (ring_uses_build_skb(rx_ring))
+                       skb = igb_build_skb(rx_ring, rx_buffer, rx_desc, size);
+               else
+                       skb = igb_construct_skb(rx_ring, rx_buffer,
+                                               rx_desc, size);
 
                /* exit if we failed to retrieve a buffer */
-               if (!skb)
+               if (!skb) {
+                       rx_ring->rx_stats.alloc_failed++;
+                       rx_buffer->pagecnt_bias++;
                        break;
+               }
 
+               igb_put_rx_buffer(rx_ring, rx_buffer);
                cleaned_count++;
 
                /* fetch next buffer in frame if non-eop */
@@ -7231,6 +7352,11 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
        return total_packets;
 }
 
+static inline unsigned int igb_rx_offset(struct igb_ring *rx_ring)
+{
+       return ring_uses_build_skb(rx_ring) ? IGB_SKB_PAD : 0;
+}
+
 static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
                                  struct igb_rx_buffer *bi)
 {
@@ -7242,21 +7368,23 @@ static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
                return true;
 
        /* alloc new page for storage */
-       page = dev_alloc_page();
+       page = dev_alloc_pages(igb_rx_pg_order(rx_ring));
        if (unlikely(!page)) {
                rx_ring->rx_stats.alloc_failed++;
                return false;
        }
 
        /* map page for use */
-       dma = dma_map_page_attrs(rx_ring->dev, page, 0, PAGE_SIZE,
-                                DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
+       dma = dma_map_page_attrs(rx_ring->dev, page, 0,
+                                igb_rx_pg_size(rx_ring),
+                                DMA_FROM_DEVICE,
+                                IGB_RX_DMA_ATTR);
 
        /* if mapping failed free memory back to system since
         * there isn't much point in holding memory we can't use
         */
        if (dma_mapping_error(rx_ring->dev, dma)) {
-               __free_page(page);
+               __free_pages(page, igb_rx_pg_order(rx_ring));
 
                rx_ring->rx_stats.alloc_failed++;
                return false;
@@ -7264,7 +7392,7 @@ static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
 
        bi->dma = dma;
        bi->page = page;
-       bi->page_offset = 0;
+       bi->page_offset = igb_rx_offset(rx_ring);
        bi->pagecnt_bias = 1;
 
        return true;
@@ -7279,6 +7407,7 @@ void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
        union e1000_adv_rx_desc *rx_desc;
        struct igb_rx_buffer *bi;
        u16 i = rx_ring->next_to_use;
+       u16 bufsz;
 
        /* nothing to do */
        if (!cleaned_count)
@@ -7288,14 +7417,15 @@ void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
        bi = &rx_ring->rx_buffer_info[i];
        i -= rx_ring->count;
 
+       bufsz = igb_rx_bufsz(rx_ring);
+
        do {
                if (!igb_alloc_mapped_page(rx_ring, bi))
                        break;
 
                /* sync the buffer for use by the device */
                dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
-                                                bi->page_offset,
-                                                IGB_RX_BUFSZ,
+                                                bi->page_offset, bufsz,
                                                 DMA_FROM_DEVICE);
 
                /* Refresh the desc even if buffer_addrs didn't change
@@ -7312,8 +7442,8 @@ void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
                        i -= rx_ring->count;
                }
 
-               /* clear the status bits for the next_to_use descriptor */
-               rx_desc->wb.upper.status_error = 0;
+               /* clear the length for the next_to_use descriptor */
+               rx_desc->wb.upper.length = 0;
 
                cleaned_count--;
        } while (cleaned_count);
index c4477552ce9ef2e153eb1678c783ff9e9fb08eb8..7a3fd4d745928c809961c589e05ce25abc0077a9 100644 (file)
@@ -764,8 +764,7 @@ static void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter)
  * incoming frame.  The value is stored in little endian format starting on
  * byte 8.
  **/
-void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector,
-                        unsigned char *va,
+void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va,
                         struct sk_buff *skb)
 {
        __le64 *regval = (__le64 *)va;
index 8dea1b1367ef65603592d9949fe55af0521cbcf8..34faa113a8a018e1e42a5ff1eebc867be65ad10d 100644 (file)
@@ -71,45 +71,45 @@ static const char igbvf_gstrings_test[][ETH_GSTRING_LEN] = {
 
 #define IGBVF_TEST_LEN ARRAY_SIZE(igbvf_gstrings_test)
 
-static int igbvf_get_settings(struct net_device *netdev,
-                             struct ethtool_cmd *ecmd)
+static int igbvf_get_link_ksettings(struct net_device *netdev,
+                                   struct ethtool_link_ksettings *cmd)
 {
        struct igbvf_adapter *adapter = netdev_priv(netdev);
        struct e1000_hw *hw = &adapter->hw;
        u32 status;
 
-       ecmd->supported   = SUPPORTED_1000baseT_Full;
+       ethtool_link_ksettings_zero_link_mode(cmd, supported);
+       ethtool_link_ksettings_add_link_mode(cmd, supported, 1000baseT_Full);
+       ethtool_link_ksettings_zero_link_mode(cmd, advertising);
+       ethtool_link_ksettings_add_link_mode(cmd, advertising, 1000baseT_Full);
 
-       ecmd->advertising = ADVERTISED_1000baseT_Full;
-
-       ecmd->port = -1;
-       ecmd->transceiver = XCVR_DUMMY1;
+       cmd->base.port = -1;
 
        status = er32(STATUS);
        if (status & E1000_STATUS_LU) {
                if (status & E1000_STATUS_SPEED_1000)
-                       ethtool_cmd_speed_set(ecmd, SPEED_1000);
+                       cmd->base.speed = SPEED_1000;
                else if (status & E1000_STATUS_SPEED_100)
-                       ethtool_cmd_speed_set(ecmd, SPEED_100);
+                       cmd->base.speed = SPEED_100;
                else
-                       ethtool_cmd_speed_set(ecmd, SPEED_10);
+                       cmd->base.speed = SPEED_10;
 
                if (status & E1000_STATUS_FD)
-                       ecmd->duplex = DUPLEX_FULL;
+                       cmd->base.duplex = DUPLEX_FULL;
                else
-                       ecmd->duplex = DUPLEX_HALF;
+                       cmd->base.duplex = DUPLEX_HALF;
        } else {
-               ethtool_cmd_speed_set(ecmd, SPEED_UNKNOWN);
-               ecmd->duplex = DUPLEX_UNKNOWN;
+               cmd->base.speed = SPEED_UNKNOWN;
+               cmd->base.duplex = DUPLEX_UNKNOWN;
        }
 
-       ecmd->autoneg = AUTONEG_DISABLE;
+       cmd->base.autoneg = AUTONEG_DISABLE;
 
        return 0;
 }
 
-static int igbvf_set_settings(struct net_device *netdev,
-                             struct ethtool_cmd *ecmd)
+static int igbvf_set_link_ksettings(struct net_device *netdev,
+                                   const struct ethtool_link_ksettings *cmd)
 {
        return -EOPNOTSUPP;
 }
@@ -443,8 +443,6 @@ static void igbvf_get_strings(struct net_device *netdev, u32 stringset,
 }
 
 static const struct ethtool_ops igbvf_ethtool_ops = {
-       .get_settings           = igbvf_get_settings,
-       .set_settings           = igbvf_set_settings,
        .get_drvinfo            = igbvf_get_drvinfo,
        .get_regs_len           = igbvf_get_regs_len,
        .get_regs               = igbvf_get_regs,
@@ -467,6 +465,8 @@ static const struct ethtool_ops igbvf_ethtool_ops = {
        .get_ethtool_stats      = igbvf_get_ethtool_stats,
        .get_coalesce           = igbvf_get_coalesce,
        .set_coalesce           = igbvf_set_coalesce,
+       .get_link_ksettings     = igbvf_get_link_ksettings,
+       .set_link_ksettings     = igbvf_set_link_ksettings,
 };
 
 void igbvf_set_ethtool_ops(struct net_device *netdev)
index e5d72559cca9b060002525e5086ccfc008cc99d6..d10a0d242dda5db4f8c474a0814bc7a1b9eae491 100644 (file)
@@ -94,24 +94,30 @@ static struct ixgb_stats ixgb_gstrings_stats[] = {
 #define IXGB_STATS_LEN ARRAY_SIZE(ixgb_gstrings_stats)
 
 static int
-ixgb_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
+ixgb_get_link_ksettings(struct net_device *netdev,
+                       struct ethtool_link_ksettings *cmd)
 {
        struct ixgb_adapter *adapter = netdev_priv(netdev);
 
-       ecmd->supported = (SUPPORTED_10000baseT_Full | SUPPORTED_FIBRE);
-       ecmd->advertising = (ADVERTISED_10000baseT_Full | ADVERTISED_FIBRE);
-       ecmd->port = PORT_FIBRE;
-       ecmd->transceiver = XCVR_EXTERNAL;
+       ethtool_link_ksettings_zero_link_mode(cmd, supported);
+       ethtool_link_ksettings_add_link_mode(cmd, supported, 10000baseT_Full);
+       ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE);
+
+       ethtool_link_ksettings_zero_link_mode(cmd, advertising);
+       ethtool_link_ksettings_add_link_mode(cmd, advertising, 10000baseT_Full);
+       ethtool_link_ksettings_add_link_mode(cmd, advertising, FIBRE);
+
+       cmd->base.port = PORT_FIBRE;
 
        if (netif_carrier_ok(adapter->netdev)) {
-               ethtool_cmd_speed_set(ecmd, SPEED_10000);
-               ecmd->duplex = DUPLEX_FULL;
+               cmd->base.speed = SPEED_10000;
+               cmd->base.duplex = DUPLEX_FULL;
        } else {
-               ethtool_cmd_speed_set(ecmd, SPEED_UNKNOWN);
-               ecmd->duplex = DUPLEX_UNKNOWN;
+               cmd->base.speed = SPEED_UNKNOWN;
+               cmd->base.duplex = DUPLEX_UNKNOWN;
        }
 
-       ecmd->autoneg = AUTONEG_DISABLE;
+       cmd->base.autoneg = AUTONEG_DISABLE;
        return 0;
 }
 
@@ -126,13 +132,14 @@ void ixgb_set_speed_duplex(struct net_device *netdev)
 }
 
 static int
-ixgb_set_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
+ixgb_set_link_ksettings(struct net_device *netdev,
+                       const struct ethtool_link_ksettings *cmd)
 {
        struct ixgb_adapter *adapter = netdev_priv(netdev);
-       u32 speed = ethtool_cmd_speed(ecmd);
+       u32 speed = cmd->base.speed;
 
-       if (ecmd->autoneg == AUTONEG_ENABLE ||
-           (speed + ecmd->duplex != SPEED_10000 + DUPLEX_FULL))
+       if (cmd->base.autoneg == AUTONEG_ENABLE ||
+           (speed + cmd->base.duplex != SPEED_10000 + DUPLEX_FULL))
                return -EINVAL;
 
        if (netif_running(adapter->netdev)) {
@@ -630,8 +637,6 @@ ixgb_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
 }
 
 static const struct ethtool_ops ixgb_ethtool_ops = {
-       .get_settings = ixgb_get_settings,
-       .set_settings = ixgb_set_settings,
        .get_drvinfo = ixgb_get_drvinfo,
        .get_regs_len = ixgb_get_regs_len,
        .get_regs = ixgb_get_regs,
@@ -649,6 +654,8 @@ static const struct ethtool_ops ixgb_ethtool_ops = {
        .set_phys_id = ixgb_set_phys_id,
        .get_sset_count = ixgb_get_sset_count,
        .get_ethtool_stats = ixgb_get_ethtool_stats,
+       .get_link_ksettings = ixgb_get_link_ksettings,
+       .set_link_ksettings = ixgb_set_link_ksettings,
 };
 
 void ixgb_set_ethtool_ops(struct net_device *netdev)
index 90fa5bf23d1b5f6d636478626b7d7f45d6a8871c..0da0752fedef1db2988ee1b1d9d63831760b73de 100644 (file)
@@ -186,60 +186,62 @@ static u32 ixgbe_get_supported_10gtypes(struct ixgbe_hw *hw)
        }
 }
 
-static int ixgbe_get_settings(struct net_device *netdev,
-                             struct ethtool_cmd *ecmd)
+static int ixgbe_get_link_ksettings(struct net_device *netdev,
+                                   struct ethtool_link_ksettings *cmd)
 {
        struct ixgbe_adapter *adapter = netdev_priv(netdev);
        struct ixgbe_hw *hw = &adapter->hw;
        ixgbe_link_speed supported_link;
        bool autoneg = false;
+       u32 supported, advertising;
+
+       ethtool_convert_link_mode_to_legacy_u32(&supported,
+                                               cmd->link_modes.supported);
 
        hw->mac.ops.get_link_capabilities(hw, &supported_link, &autoneg);
 
        /* set the supported link speeds */
        if (supported_link & IXGBE_LINK_SPEED_10GB_FULL)
-               ecmd->supported |= ixgbe_get_supported_10gtypes(hw);
+               supported |= ixgbe_get_supported_10gtypes(hw);
        if (supported_link & IXGBE_LINK_SPEED_1GB_FULL)
-               ecmd->supported |= (ixgbe_isbackplane(hw->phy.media_type)) ?
+               supported |= (ixgbe_isbackplane(hw->phy.media_type)) ?
                                   SUPPORTED_1000baseKX_Full :
                                   SUPPORTED_1000baseT_Full;
        if (supported_link & IXGBE_LINK_SPEED_100_FULL)
-               ecmd->supported |= SUPPORTED_100baseT_Full;
+               supported |= SUPPORTED_100baseT_Full;
        if (supported_link & IXGBE_LINK_SPEED_10_FULL)
-               ecmd->supported |= SUPPORTED_10baseT_Full;
+               supported |= SUPPORTED_10baseT_Full;
 
        /* default advertised speed if phy.autoneg_advertised isn't set */
-       ecmd->advertising = ecmd->supported;
+       advertising = supported;
        /* set the advertised speeds */
        if (hw->phy.autoneg_advertised) {
-               ecmd->advertising = 0;
+               advertising = 0;
                if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_10_FULL)
-                       ecmd->advertising |= ADVERTISED_10baseT_Full;
+                       advertising |= ADVERTISED_10baseT_Full;
                if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_100_FULL)
-                       ecmd->advertising |= ADVERTISED_100baseT_Full;
+                       advertising |= ADVERTISED_100baseT_Full;
                if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_10GB_FULL)
-                       ecmd->advertising |= ecmd->supported & ADVRTSD_MSK_10G;
+                       advertising |= supported & ADVRTSD_MSK_10G;
                if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_1GB_FULL) {
-                       if (ecmd->supported & SUPPORTED_1000baseKX_Full)
-                               ecmd->advertising |= ADVERTISED_1000baseKX_Full;
+                       if (supported & SUPPORTED_1000baseKX_Full)
+                               advertising |= ADVERTISED_1000baseKX_Full;
                        else
-                               ecmd->advertising |= ADVERTISED_1000baseT_Full;
+                               advertising |= ADVERTISED_1000baseT_Full;
                }
        } else {
                if (hw->phy.multispeed_fiber && !autoneg) {
                        if (supported_link & IXGBE_LINK_SPEED_10GB_FULL)
-                               ecmd->advertising = ADVERTISED_10000baseT_Full;
+                               advertising = ADVERTISED_10000baseT_Full;
                }
        }
 
        if (autoneg) {
-               ecmd->supported |= SUPPORTED_Autoneg;
-               ecmd->advertising |= ADVERTISED_Autoneg;
-               ecmd->autoneg = AUTONEG_ENABLE;
+               supported |= SUPPORTED_Autoneg;
+               advertising |= ADVERTISED_Autoneg;
+               cmd->base.autoneg = AUTONEG_ENABLE;
        } else
-               ecmd->autoneg = AUTONEG_DISABLE;
-
-       ecmd->transceiver = XCVR_EXTERNAL;
+               cmd->base.autoneg = AUTONEG_DISABLE;
 
        /* Determine the remaining settings based on the PHY type. */
        switch (adapter->hw.phy.type) {
@@ -248,14 +250,14 @@ static int ixgbe_get_settings(struct net_device *netdev,
        case ixgbe_phy_x550em_ext_t:
        case ixgbe_phy_fw:
        case ixgbe_phy_cu_unknown:
-               ecmd->supported |= SUPPORTED_TP;
-               ecmd->advertising |= ADVERTISED_TP;
-               ecmd->port = PORT_TP;
+               supported |= SUPPORTED_TP;
+               advertising |= ADVERTISED_TP;
+               cmd->base.port = PORT_TP;
                break;
        case ixgbe_phy_qt:
-               ecmd->supported |= SUPPORTED_FIBRE;
-               ecmd->advertising |= ADVERTISED_FIBRE;
-               ecmd->port = PORT_FIBRE;
+               supported |= SUPPORTED_FIBRE;
+               advertising |= ADVERTISED_FIBRE;
+               cmd->base.port = PORT_FIBRE;
                break;
        case ixgbe_phy_nl:
        case ixgbe_phy_sfp_passive_tyco:
@@ -273,9 +275,9 @@ static int ixgbe_get_settings(struct net_device *netdev,
                case ixgbe_sfp_type_da_cu:
                case ixgbe_sfp_type_da_cu_core0:
                case ixgbe_sfp_type_da_cu_core1:
-                       ecmd->supported |= SUPPORTED_FIBRE;
-                       ecmd->advertising |= ADVERTISED_FIBRE;
-                       ecmd->port = PORT_DA;
+                       supported |= SUPPORTED_FIBRE;
+                       advertising |= ADVERTISED_FIBRE;
+                       cmd->base.port = PORT_DA;
                        break;
                case ixgbe_sfp_type_sr:
                case ixgbe_sfp_type_lr:
@@ -285,102 +287,113 @@ static int ixgbe_get_settings(struct net_device *netdev,
                case ixgbe_sfp_type_1g_sx_core1:
                case ixgbe_sfp_type_1g_lx_core0:
                case ixgbe_sfp_type_1g_lx_core1:
-                       ecmd->supported |= SUPPORTED_FIBRE;
-                       ecmd->advertising |= ADVERTISED_FIBRE;
-                       ecmd->port = PORT_FIBRE;
+                       supported |= SUPPORTED_FIBRE;
+                       advertising |= ADVERTISED_FIBRE;
+                       cmd->base.port = PORT_FIBRE;
                        break;
                case ixgbe_sfp_type_not_present:
-                       ecmd->supported |= SUPPORTED_FIBRE;
-                       ecmd->advertising |= ADVERTISED_FIBRE;
-                       ecmd->port = PORT_NONE;
+                       supported |= SUPPORTED_FIBRE;
+                       advertising |= ADVERTISED_FIBRE;
+                       cmd->base.port = PORT_NONE;
                        break;
                case ixgbe_sfp_type_1g_cu_core0:
                case ixgbe_sfp_type_1g_cu_core1:
-                       ecmd->supported |= SUPPORTED_TP;
-                       ecmd->advertising |= ADVERTISED_TP;
-                       ecmd->port = PORT_TP;
+                       supported |= SUPPORTED_TP;
+                       advertising |= ADVERTISED_TP;
+                       cmd->base.port = PORT_TP;
                        break;
                case ixgbe_sfp_type_unknown:
                default:
-                       ecmd->supported |= SUPPORTED_FIBRE;
-                       ecmd->advertising |= ADVERTISED_FIBRE;
-                       ecmd->port = PORT_OTHER;
+                       supported |= SUPPORTED_FIBRE;
+                       advertising |= ADVERTISED_FIBRE;
+                       cmd->base.port = PORT_OTHER;
                        break;
                }
                break;
        case ixgbe_phy_xaui:
-               ecmd->supported |= SUPPORTED_FIBRE;
-               ecmd->advertising |= ADVERTISED_FIBRE;
-               ecmd->port = PORT_NONE;
+               supported |= SUPPORTED_FIBRE;
+               advertising |= ADVERTISED_FIBRE;
+               cmd->base.port = PORT_NONE;
                break;
        case ixgbe_phy_unknown:
        case ixgbe_phy_generic:
        case ixgbe_phy_sfp_unsupported:
        default:
-               ecmd->supported |= SUPPORTED_FIBRE;
-               ecmd->advertising |= ADVERTISED_FIBRE;
-               ecmd->port = PORT_OTHER;
+               supported |= SUPPORTED_FIBRE;
+               advertising |= ADVERTISED_FIBRE;
+               cmd->base.port = PORT_OTHER;
                break;
        }
 
        /* Indicate pause support */
-       ecmd->supported |= SUPPORTED_Pause;
+       supported |= SUPPORTED_Pause;
 
        switch (hw->fc.requested_mode) {
        case ixgbe_fc_full:
-               ecmd->advertising |= ADVERTISED_Pause;
+               advertising |= ADVERTISED_Pause;
                break;
        case ixgbe_fc_rx_pause:
-               ecmd->advertising |= ADVERTISED_Pause |
+               advertising |= ADVERTISED_Pause |
                                     ADVERTISED_Asym_Pause;
                break;
        case ixgbe_fc_tx_pause:
-               ecmd->advertising |= ADVERTISED_Asym_Pause;
+               advertising |= ADVERTISED_Asym_Pause;
                break;
        default:
-               ecmd->advertising &= ~(ADVERTISED_Pause |
+               advertising &= ~(ADVERTISED_Pause |
                                       ADVERTISED_Asym_Pause);
        }
 
        if (netif_carrier_ok(netdev)) {
                switch (adapter->link_speed) {
                case IXGBE_LINK_SPEED_10GB_FULL:
-                       ethtool_cmd_speed_set(ecmd, SPEED_10000);
+                       cmd->base.speed = SPEED_10000;
                        break;
                case IXGBE_LINK_SPEED_5GB_FULL:
-                       ethtool_cmd_speed_set(ecmd, SPEED_5000);
+                       cmd->base.speed = SPEED_5000;
                        break;
                case IXGBE_LINK_SPEED_2_5GB_FULL:
-                       ethtool_cmd_speed_set(ecmd, SPEED_2500);
+                       cmd->base.speed = SPEED_2500;
                        break;
                case IXGBE_LINK_SPEED_1GB_FULL:
-                       ethtool_cmd_speed_set(ecmd, SPEED_1000);
+                       cmd->base.speed = SPEED_1000;
                        break;
                case IXGBE_LINK_SPEED_100_FULL:
-                       ethtool_cmd_speed_set(ecmd, SPEED_100);
+                       cmd->base.speed = SPEED_100;
                        break;
                case IXGBE_LINK_SPEED_10_FULL:
-                       ethtool_cmd_speed_set(ecmd, SPEED_10);
+                       cmd->base.speed = SPEED_10;
                        break;
                default:
                        break;
                }
-               ecmd->duplex = DUPLEX_FULL;
+               cmd->base.duplex = DUPLEX_FULL;
        } else {
-               ethtool_cmd_speed_set(ecmd, SPEED_UNKNOWN);
-               ecmd->duplex = DUPLEX_UNKNOWN;
+               cmd->base.speed = SPEED_UNKNOWN;
+               cmd->base.duplex = DUPLEX_UNKNOWN;
        }
 
+       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+                                               supported);
+       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+                                               advertising);
+
        return 0;
 }
 
-static int ixgbe_set_settings(struct net_device *netdev,
-                             struct ethtool_cmd *ecmd)
+static int ixgbe_set_link_ksettings(struct net_device *netdev,
+                                   const struct ethtool_link_ksettings *cmd)
 {
        struct ixgbe_adapter *adapter = netdev_priv(netdev);
        struct ixgbe_hw *hw = &adapter->hw;
        u32 advertised, old;
        s32 err = 0;
+       u32 supported, advertising;
+
+       ethtool_convert_link_mode_to_legacy_u32(&supported,
+                                               cmd->link_modes.supported);
+       ethtool_convert_link_mode_to_legacy_u32(&advertising,
+                                               cmd->link_modes.advertising);
 
        if ((hw->phy.media_type == ixgbe_media_type_copper) ||
            (hw->phy.multispeed_fiber)) {
@@ -388,12 +401,12 @@ static int ixgbe_set_settings(struct net_device *netdev,
                 * this function does not support duplex forcing, but can
                 * limit the advertising of the adapter to the specified speed
                 */
-               if (ecmd->advertising & ~ecmd->supported)
+               if (advertising & ~supported)
                        return -EINVAL;
 
                /* only allow one speed at a time if no autoneg */
-               if (!ecmd->autoneg && hw->phy.multispeed_fiber) {
-                       if (ecmd->advertising ==
+               if (!cmd->base.autoneg && hw->phy.multispeed_fiber) {
+                       if (advertising ==
                            (ADVERTISED_10000baseT_Full |
                             ADVERTISED_1000baseT_Full))
                                return -EINVAL;
@@ -401,16 +414,16 @@ static int ixgbe_set_settings(struct net_device *netdev,
 
                old = hw->phy.autoneg_advertised;
                advertised = 0;
-               if (ecmd->advertising & ADVERTISED_10000baseT_Full)
+               if (advertising & ADVERTISED_10000baseT_Full)
                        advertised |= IXGBE_LINK_SPEED_10GB_FULL;
 
-               if (ecmd->advertising & ADVERTISED_1000baseT_Full)
+               if (advertising & ADVERTISED_1000baseT_Full)
                        advertised |= IXGBE_LINK_SPEED_1GB_FULL;
 
-               if (ecmd->advertising & ADVERTISED_100baseT_Full)
+               if (advertising & ADVERTISED_100baseT_Full)
                        advertised |= IXGBE_LINK_SPEED_100_FULL;
 
-               if (ecmd->advertising & ADVERTISED_10baseT_Full)
+               if (advertising & ADVERTISED_10baseT_Full)
                        advertised |= IXGBE_LINK_SPEED_10_FULL;
 
                if (old == advertised)
@@ -428,10 +441,11 @@ static int ixgbe_set_settings(struct net_device *netdev,
                clear_bit(__IXGBE_IN_SFP_INIT, &adapter->state);
        } else {
                /* in this case we currently only support 10Gb/FULL */
-               u32 speed = ethtool_cmd_speed(ecmd);
-               if ((ecmd->autoneg == AUTONEG_ENABLE) ||
-                   (ecmd->advertising != ADVERTISED_10000baseT_Full) ||
-                   (speed + ecmd->duplex != SPEED_10000 + DUPLEX_FULL))
+               u32 speed = cmd->base.speed;
+
+               if ((cmd->base.autoneg == AUTONEG_ENABLE) ||
+                   (advertising != ADVERTISED_10000baseT_Full) ||
+                   (speed + cmd->base.duplex != SPEED_10000 + DUPLEX_FULL))
                        return -EINVAL;
        }
 
@@ -3402,8 +3416,6 @@ static int ixgbe_set_priv_flags(struct net_device *netdev, u32 priv_flags)
 }
 
 static const struct ethtool_ops ixgbe_ethtool_ops = {
-       .get_settings           = ixgbe_get_settings,
-       .set_settings           = ixgbe_set_settings,
        .get_drvinfo            = ixgbe_get_drvinfo,
        .get_regs_len           = ixgbe_get_regs_len,
        .get_regs               = ixgbe_get_regs,
@@ -3442,6 +3454,8 @@ static const struct ethtool_ops ixgbe_ethtool_ops = {
        .get_ts_info            = ixgbe_get_ts_info,
        .get_module_info        = ixgbe_get_module_info,
        .get_module_eeprom      = ixgbe_get_module_eeprom,
+       .get_link_ksettings     = ixgbe_get_link_ksettings,
+       .set_link_ksettings     = ixgbe_set_link_ksettings,
 };
 
 void ixgbe_set_ethtool_ops(struct net_device *netdev)
index a7a430a7be2cd9201cc36022249219e94bfb41ca..852a2e7e25ed185917732df098174820c56e1295 100644 (file)
@@ -2122,7 +2122,7 @@ static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring,
        prefetch(va + L1_CACHE_BYTES);
 #endif
 
-       /* build an skb to around the page buffer */
+       /* build an skb around the page buffer */
        skb = build_skb(va - IXGBE_SKB_PAD, truesize);
        if (unlikely(!skb))
                return NULL;
@@ -8948,7 +8948,9 @@ static int __ixgbe_setup_tc(struct net_device *dev, u32 handle, __be16 proto,
        if (tc->type != TC_SETUP_MQPRIO)
                return -EINVAL;
 
-       return ixgbe_setup_tc(dev, tc->tc);
+       tc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+
+       return ixgbe_setup_tc(dev, tc->mqprio->num_tc);
 }
 
 #ifdef CONFIG_PCI_IOV
index d2555e8b947ee2a93053f4ac54191193c4a50529..da6fb825afeafd28a56909aa3baa4e97c3704d58 100644 (file)
@@ -82,13 +82,13 @@ config MVNETA_BM
          that all dependencies are met.
 
 config MVPP2
-       tristate "Marvell Armada 375 network interface support"
+       tristate "Marvell Armada 375/7K/8K network interface support"
        depends on ARCH_MVEBU || COMPILE_TEST
        depends on HAS_DMA
        select MVMDIO
        ---help---
          This driver supports the network interface units in the
-         Marvell ARMADA 375 SoC.
+         Marvell ARMADA 375, 7K and 8K SoCs.
 
 config PXA168_ETH
        tristate "Marvell pxa168 ethernet support"
index 61dd4462411c03511d6121d75ca65dc36c7f688f..34a3686d2ce669091510f5dc85773a84d7880c92 100644 (file)
@@ -431,6 +431,7 @@ struct mvneta_port {
        /* Flags for special SoC configurations */
        bool neta_armada3700;
        u16 rx_offset_correction;
+       const struct mbus_dram_target_info *dram_target_info;
 };
 
 /* The mvneta_tx_desc and mvneta_rx_desc structures describe the
@@ -4098,6 +4099,8 @@ static int mvneta_port_power_up(struct mvneta_port *pp, int phy_mode)
                break;
        case PHY_INTERFACE_MODE_RGMII:
        case PHY_INTERFACE_MODE_RGMII_ID:
+       case PHY_INTERFACE_MODE_RGMII_RXID:
+       case PHY_INTERFACE_MODE_RGMII_TXID:
                ctrl |= MVNETA_GMAC2_PORT_RGMII;
                break;
        default:
@@ -4118,7 +4121,6 @@ static int mvneta_port_power_up(struct mvneta_port *pp, int phy_mode)
 /* Device initialization routine */
 static int mvneta_probe(struct platform_device *pdev)
 {
-       const struct mbus_dram_target_info *dram_target_info;
        struct resource *res;
        struct device_node *dn = pdev->dev.of_node;
        struct device_node *phy_node;
@@ -4267,13 +4269,13 @@ static int mvneta_probe(struct platform_device *pdev)
 
        pp->tx_csum_limit = tx_csum_limit;
 
-       dram_target_info = mv_mbus_dram_info();
+       pp->dram_target_info = mv_mbus_dram_info();
        /* Armada3700 requires setting default configuration of Mbus
         * windows, however without using filled mbus_dram_target_info
         * structure.
         */
-       if (dram_target_info || pp->neta_armada3700)
-               mvneta_conf_mbus_windows(pp, dram_target_info);
+       if (pp->dram_target_info || pp->neta_armada3700)
+               mvneta_conf_mbus_windows(pp, pp->dram_target_info);
 
        pp->tx_ring_size = MVNETA_MAX_TXD;
        pp->rx_ring_size = MVNETA_MAX_RXD;
@@ -4405,6 +4407,61 @@ static int mvneta_remove(struct platform_device *pdev)
        return 0;
 }
 
+#ifdef CONFIG_PM_SLEEP
+static int mvneta_suspend(struct device *device)
+{
+       struct net_device *dev = dev_get_drvdata(device);
+       struct mvneta_port *pp = netdev_priv(dev);
+
+       if (netif_running(dev))
+               mvneta_stop(dev);
+       netif_device_detach(dev);
+       clk_disable_unprepare(pp->clk_bus);
+       clk_disable_unprepare(pp->clk);
+       return 0;
+}
+
+static int mvneta_resume(struct device *device)
+{
+       struct platform_device *pdev = to_platform_device(device);
+       struct net_device *dev = dev_get_drvdata(device);
+       struct mvneta_port *pp = netdev_priv(dev);
+       int err;
+
+       clk_prepare_enable(pp->clk);
+       if (!IS_ERR(pp->clk_bus))
+               clk_prepare_enable(pp->clk_bus);
+       if (pp->dram_target_info || pp->neta_armada3700)
+               mvneta_conf_mbus_windows(pp, pp->dram_target_info);
+       if (pp->bm_priv) {
+               err = mvneta_bm_port_init(pdev, pp);
+               if (err < 0) {
+                       dev_info(&pdev->dev, "use SW buffer management\n");
+                       pp->bm_priv = NULL;
+               }
+       }
+       mvneta_defaults_set(pp);
+       err = mvneta_port_power_up(pp, pp->phy_interface);
+       if (err < 0) {
+               dev_err(device, "can't power up port\n");
+               return err;
+       }
+
+       if (pp->use_inband_status)
+               mvneta_fixed_link_update(pp, dev->phydev);
+
+       netif_device_attach(dev);
+       if (netif_running(dev)) {
+               mvneta_open(dev);
+               mvneta_set_rx_mode(dev);
+       }
+
+       return 0;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(mvneta_pm_ops, mvneta_suspend, mvneta_resume);
+
 static const struct of_device_id mvneta_match[] = {
        { .compatible = "marvell,armada-370-neta" },
        { .compatible = "marvell,armada-xp-neta" },
@@ -4419,6 +4476,7 @@ static struct platform_driver mvneta_driver = {
        .driver = {
                .name = MVNETA_DRIVER_NAME,
                .of_match_table = mvneta_match,
+               .pm = &mvneta_pm_ops,
        },
 };
 
index d00421b9ffea7c0569417e4ee3814469532802c8..af5bfa13d976d20ffa75129f27ce69a87ddc7bc1 100644 (file)
@@ -25,6 +25,7 @@
 #include <linux/of_mdio.h>
 #include <linux/of_net.h>
 #include <linux/of_address.h>
+#include <linux/of_device.h>
 #include <linux/phy.h>
 #include <linux/clk.h>
 #include <linux/hrtimer.h>
 #define     MVPP2_SNOOP_PKT_SIZE_MASK          0x1ff
 #define     MVPP2_SNOOP_BUF_HDR_MASK           BIT(9)
 #define     MVPP2_RXQ_POOL_SHORT_OFFS          20
-#define     MVPP2_RXQ_POOL_SHORT_MASK          0x700000
+#define     MVPP21_RXQ_POOL_SHORT_MASK         0x700000
+#define     MVPP22_RXQ_POOL_SHORT_MASK         0xf00000
 #define     MVPP2_RXQ_POOL_LONG_OFFS           24
-#define     MVPP2_RXQ_POOL_LONG_MASK           0x7000000
+#define     MVPP21_RXQ_POOL_LONG_MASK          0x7000000
+#define     MVPP22_RXQ_POOL_LONG_MASK          0xf000000
 #define     MVPP2_RXQ_PACKET_OFFSET_OFFS       28
 #define     MVPP2_RXQ_PACKET_OFFSET_MASK       0x70000000
 #define     MVPP2_RXQ_DISABLE_MASK             BIT(31)
 /* Descriptor Manager Top Registers */
 #define MVPP2_RXQ_NUM_REG                      0x2040
 #define MVPP2_RXQ_DESC_ADDR_REG                        0x2044
+#define     MVPP22_DESC_ADDR_OFFS              8
 #define MVPP2_RXQ_DESC_SIZE_REG                        0x2048
 #define     MVPP2_RXQ_DESC_SIZE_MASK           0x3ff0
 #define MVPP2_RXQ_STATUS_UPDATE_REG(rxq)       (0x3000 + 4 * (rxq))
 #define MVPP2_TXQ_DESC_SIZE_REG                        0x2088
 #define     MVPP2_TXQ_DESC_SIZE_MASK           0x3ff0
 #define MVPP2_AGGR_TXQ_UPDATE_REG              0x2090
-#define MVPP2_TXQ_THRESH_REG                   0x2094
-#define     MVPP2_TRANSMITTED_THRESH_OFFSET    16
-#define     MVPP2_TRANSMITTED_THRESH_MASK      0x3fff0000
 #define MVPP2_TXQ_INDEX_REG                    0x2098
 #define MVPP2_TXQ_PREF_BUF_REG                 0x209c
 #define     MVPP2_PREF_BUF_PTR(desc)           ((desc) & 0xfff)
 #define MVPP2_TXQ_RSVD_CLR_REG                 0x20b8
 #define     MVPP2_TXQ_RSVD_CLR_OFFSET          16
 #define MVPP2_AGGR_TXQ_DESC_ADDR_REG(cpu)      (0x2100 + 4 * (cpu))
+#define     MVPP22_AGGR_TXQ_DESC_ADDR_OFFS     8
 #define MVPP2_AGGR_TXQ_DESC_SIZE_REG(cpu)      (0x2140 + 4 * (cpu))
 #define     MVPP2_AGGR_TXQ_DESC_SIZE_MASK      0x3ff0
 #define MVPP2_AGGR_TXQ_STATUS_REG(cpu)         (0x2180 + 4 * (cpu))
 #define MVPP2_WIN_REMAP(w)                     (0x4040 + ((w) << 2))
 #define MVPP2_BASE_ADDR_ENABLE                 0x4060
 
+/* AXI Bridge Registers */
+#define MVPP22_AXI_BM_WR_ATTR_REG              0x4100
+#define MVPP22_AXI_BM_RD_ATTR_REG              0x4104
+#define MVPP22_AXI_AGGRQ_DESCR_RD_ATTR_REG     0x4110
+#define MVPP22_AXI_TXQ_DESCR_WR_ATTR_REG       0x4114
+#define MVPP22_AXI_TXQ_DESCR_RD_ATTR_REG       0x4118
+#define MVPP22_AXI_RXQ_DESCR_WR_ATTR_REG       0x411c
+#define MVPP22_AXI_RX_DATA_WR_ATTR_REG         0x4120
+#define MVPP22_AXI_TX_DATA_RD_ATTR_REG         0x4130
+#define MVPP22_AXI_RD_NORMAL_CODE_REG          0x4150
+#define MVPP22_AXI_RD_SNOOP_CODE_REG           0x4154
+#define MVPP22_AXI_WR_NORMAL_CODE_REG          0x4160
+#define MVPP22_AXI_WR_SNOOP_CODE_REG           0x4164
+
+/* Values for AXI Bridge registers */
+#define MVPP22_AXI_ATTR_CACHE_OFFS             0
+#define MVPP22_AXI_ATTR_DOMAIN_OFFS            12
+
+#define MVPP22_AXI_CODE_CACHE_OFFS             0
+#define MVPP22_AXI_CODE_DOMAIN_OFFS            4
+
+#define MVPP22_AXI_CODE_CACHE_NON_CACHE                0x3
+#define MVPP22_AXI_CODE_CACHE_WR_CACHE         0x7
+#define MVPP22_AXI_CODE_CACHE_RD_CACHE         0xb
+
+#define MVPP22_AXI_CODE_DOMAIN_OUTER_DOM       2
+#define MVPP22_AXI_CODE_DOMAIN_SYSTEM          3
+
 /* Interrupt Cause and Mask registers */
 #define MVPP2_ISR_RX_THRESHOLD_REG(rxq)                (0x5200 + 4 * (rxq))
 #define     MVPP2_MAX_ISR_RX_THRESHOLD         0xfffff0
-#define MVPP2_ISR_RXQ_GROUP_REG(rxq)           (0x5400 + 4 * (rxq))
+#define MVPP21_ISR_RXQ_GROUP_REG(rxq)          (0x5400 + 4 * (rxq))
+
+#define MVPP22_ISR_RXQ_GROUP_INDEX_REG          0x5400
+#define MVPP22_ISR_RXQ_GROUP_INDEX_SUBGROUP_MASK 0xf
+#define MVPP22_ISR_RXQ_GROUP_INDEX_GROUP_MASK   0x380
+#define MVPP22_ISR_RXQ_GROUP_INDEX_GROUP_OFFSET 7
+
+#define MVPP22_ISR_RXQ_GROUP_INDEX_SUBGROUP_MASK 0xf
+#define MVPP22_ISR_RXQ_GROUP_INDEX_GROUP_MASK   0x380
+
+#define MVPP22_ISR_RXQ_SUB_GROUP_CONFIG_REG     0x5404
+#define MVPP22_ISR_RXQ_SUB_GROUP_STARTQ_MASK    0x1f
+#define MVPP22_ISR_RXQ_SUB_GROUP_SIZE_MASK      0xf00
+#define MVPP22_ISR_RXQ_SUB_GROUP_SIZE_OFFSET    8
+
 #define MVPP2_ISR_ENABLE_REG(port)             (0x5420 + 4 * (port))
 #define     MVPP2_ISR_ENABLE_INTERRUPT(mask)   ((mask) & 0xffff)
 #define     MVPP2_ISR_DISABLE_INTERRUPT(mask)  (((mask) << 16) & 0xffff0000)
 #define MVPP2_BM_PHY_ALLOC_REG(pool)           (0x6400 + ((pool) * 4))
 #define     MVPP2_BM_PHY_ALLOC_GRNTD_MASK      BIT(0)
 #define MVPP2_BM_VIRT_ALLOC_REG                        0x6440
+#define MVPP22_BM_ADDR_HIGH_ALLOC              0x6444
+#define     MVPP22_BM_ADDR_HIGH_PHYS_MASK      0xff
+#define     MVPP22_BM_ADDR_HIGH_VIRT_MASK      0xff00
+#define     MVPP22_BM_ADDR_HIGH_VIRT_SHIFT     8
 #define MVPP2_BM_PHY_RLS_REG(pool)             (0x6480 + ((pool) * 4))
 #define     MVPP2_BM_PHY_RLS_MC_BUFF_MASK      BIT(0)
 #define     MVPP2_BM_PHY_RLS_PRIO_EN_MASK      BIT(1)
 #define     MVPP2_BM_PHY_RLS_GRNTD_MASK                BIT(2)
 #define MVPP2_BM_VIRT_RLS_REG                  0x64c0
-#define MVPP2_BM_MC_RLS_REG                    0x64c4
-#define     MVPP2_BM_MC_ID_MASK                        0xfff
-#define     MVPP2_BM_FORCE_RELEASE_MASK                BIT(12)
+#define MVPP22_BM_ADDR_HIGH_RLS_REG            0x64c4
+#define     MVPP22_BM_ADDR_HIGH_PHYS_RLS_MASK  0xff
+#define            MVPP22_BM_ADDR_HIGH_VIRT_RLS_MASK   0xff00
+#define     MVPP22_BM_ADDR_HIGH_VIRT_RLS_SHIFT 8
 
 /* TX Scheduler registers */
 #define MVPP2_TXP_SCHED_PORT_INDEX_REG         0x8000
 #define      MVPP2_GMAC_TX_FIFO_MIN_TH_ALL_MASK        0x1fc0
 #define      MVPP2_GMAC_TX_FIFO_MIN_TH_MASK(v) (((v) << 6) & \
                                        MVPP2_GMAC_TX_FIFO_MIN_TH_ALL_MASK)
+#define MVPP22_GMAC_CTRL_4_REG                 0x90
+#define      MVPP22_CTRL4_EXT_PIN_GMII_SEL     BIT(0)
+#define      MVPP22_CTRL4_DP_CLK_SEL           BIT(5)
+#define      MVPP22_CTRL4_SYNC_BYPASS          BIT(6)
+#define      MVPP22_CTRL4_QSGMII_BYPASS_ACTIVE BIT(7)
+
+/* Per-port XGMAC registers. PPv2.2 only, only for GOP port 0,
+ * relative to port->base.
+ */
+#define MVPP22_XLG_CTRL3_REG                   0x11c
+#define      MVPP22_XLG_CTRL3_MACMODESELECT_MASK       (7 << 13)
+#define      MVPP22_XLG_CTRL3_MACMODESELECT_GMAC       (0 << 13)
+
+/* SMI registers. PPv2.2 only, relative to priv->iface_base. */
+#define MVPP22_SMI_MISC_CFG_REG                        0x1204
+#define      MVPP22_SMI_POLLING_EN             BIT(10)
+
+#define MVPP22_GMAC_BASE(port)         (0x7000 + (port) * 0x1000 + 0xe00)
 
 #define MVPP2_CAUSE_TXQ_SENT_DESC_ALL_MASK     0xff
 
 /* Maximum number of TXQs used by single port */
 #define MVPP2_MAX_TXQ                  8
 
-/* Maximum number of RXQs used by single port */
-#define MVPP2_MAX_RXQ                  8
-
 /* Dfault number of RXQs in use */
 #define MVPP2_DEFAULT_RXQ              4
 
-/* Total number of RXQs available to all ports */
-#define MVPP2_RXQ_TOTAL_NUM            (MVPP2_MAX_PORTS * MVPP2_MAX_RXQ)
-
 /* Max number of Rx descriptors */
 #define MVPP2_MAX_RXD                  128
 
@@ -615,6 +676,11 @@ enum mvpp2_prs_l3_cast {
  */
 #define MVPP2_BM_SHORT_PKT_SIZE                MVPP2_RX_MAX_PKT_SIZE(512)
 
+#define MVPP21_ADDR_SPACE_SZ           0
+#define MVPP22_ADDR_SPACE_SZ           SZ_64K
+
+#define MVPP2_MAX_CPUS                 4
+
 enum mvpp2_bm_type {
        MVPP2_BM_FREE,
        MVPP2_BM_SWF_LONG,
@@ -626,12 +692,19 @@ enum mvpp2_bm_type {
 /* Shared Packet Processor resources */
 struct mvpp2 {
        /* Shared registers' base addresses */
-       void __iomem *base;
        void __iomem *lms_base;
+       void __iomem *iface_base;
+
+       /* On PPv2.2, each CPU can access the base register through a
+        * separate address space, each 64 KB apart from each
+        * other.
+        */
+       void __iomem *cpu_base[MVPP2_MAX_CPUS];
 
        /* Common clocks */
        struct clk *pp_clk;
        struct clk *gop_clk;
+       struct clk *mg_clk;
 
        /* List of pointers to port structures */
        struct mvpp2_port **port_list;
@@ -649,6 +722,12 @@ struct mvpp2 {
 
        /* Tclk value */
        u32 tclk;
+
+       /* HW version */
+       enum { MVPP21, MVPP22 } hw_version;
+
+       /* Maximum number of RXQs per port */
+       unsigned int max_port_rxqs;
 };
 
 struct mvpp2_pcpu_stats {
@@ -670,6 +749,11 @@ struct mvpp2_port_pcpu {
 struct mvpp2_port {
        u8 id;
 
+       /* Index of the port from the "group of ports" complex point
+        * of view
+        */
+       int gop_id;
+
        int irq;
 
        struct mvpp2 *priv;
@@ -741,22 +825,24 @@ struct mvpp2_port {
 #define MVPP2_RXD_L3_IP6               BIT(30)
 #define MVPP2_RXD_BUF_HDR              BIT(31)
 
-struct mvpp2_tx_desc {
+/* HW TX descriptor for PPv2.1 */
+struct mvpp21_tx_desc {
        u32 command;            /* Options used by HW for packet transmitting.*/
        u8  packet_offset;      /* the offset from the buffer beginning */
        u8  phys_txq;           /* destination queue ID                 */
        u16 data_size;          /* data size of transmitted packet in bytes */
-       u32 buf_phys_addr;      /* physical addr of transmitted buffer  */
+       u32 buf_dma_addr;       /* physical addr of transmitted buffer  */
        u32 buf_cookie;         /* cookie for access to TX buffer in tx path */
        u32 reserved1[3];       /* hw_cmd (for future use, BM, PON, PNC) */
        u32 reserved2;          /* reserved (for future use)            */
 };
 
-struct mvpp2_rx_desc {
+/* HW RX descriptor for PPv2.1 */
+struct mvpp21_rx_desc {
        u32 status;             /* info about received packet           */
        u16 reserved1;          /* parser_info (for future use, PnC)    */
        u16 data_size;          /* size of received packet in bytes     */
-       u32 buf_phys_addr;      /* physical address of the buffer       */
+       u32 buf_dma_addr;       /* physical address of the buffer       */
        u32 buf_cookie;         /* cookie for access to RX buffer in rx path */
        u16 reserved2;          /* gem_port_id (for future use, PON)    */
        u16 reserved3;          /* csum_l4 (for future use, PnC)        */
@@ -767,12 +853,51 @@ struct mvpp2_rx_desc {
        u32 reserved8;
 };
 
+/* HW TX descriptor for PPv2.2 */
+struct mvpp22_tx_desc {
+       u32 command;
+       u8  packet_offset;
+       u8  phys_txq;
+       u16 data_size;
+       u64 reserved1;
+       u64 buf_dma_addr_ptp;
+       u64 buf_cookie_misc;
+};
+
+/* HW RX descriptor for PPv2.2 */
+struct mvpp22_rx_desc {
+       u32 status;
+       u16 reserved1;
+       u16 data_size;
+       u32 reserved2;
+       u32 reserved3;
+       u64 buf_dma_addr_key_hash;
+       u64 buf_cookie_misc;
+};
+
+/* Opaque type used by the driver to manipulate the HW TX and RX
+ * descriptors
+ */
+struct mvpp2_tx_desc {
+       union {
+               struct mvpp21_tx_desc pp21;
+               struct mvpp22_tx_desc pp22;
+       };
+};
+
+struct mvpp2_rx_desc {
+       union {
+               struct mvpp21_rx_desc pp21;
+               struct mvpp22_rx_desc pp22;
+       };
+};
+
 struct mvpp2_txq_pcpu_buf {
        /* Transmitted SKB */
        struct sk_buff *skb;
 
        /* Physical address of transmitted buffer */
-       dma_addr_t phys;
+       dma_addr_t dma;
 
        /* Size transmitted */
        size_t size;
@@ -825,7 +950,7 @@ struct mvpp2_tx_queue {
        struct mvpp2_tx_desc *descs;
 
        /* DMA address of the Tx DMA descriptors array */
-       dma_addr_t descs_phys;
+       dma_addr_t descs_dma;
 
        /* Index of the last Tx DMA descriptor */
        int last_desc;
@@ -848,7 +973,7 @@ struct mvpp2_rx_queue {
        struct mvpp2_rx_desc *descs;
 
        /* DMA address of the RX DMA descriptors array */
-       dma_addr_t descs_phys;
+       dma_addr_t descs_dma;
 
        /* Index of the last RX DMA descriptor */
        int last_desc;
@@ -912,6 +1037,8 @@ struct mvpp2_bm_pool {
 
        /* Buffer Pointers Pool External (BPPE) size */
        int size;
+       /* BPPE size in bytes */
+       int size_bytes;
        /* Number of buffers for this pool */
        int buf_num;
        /* Pool buffer size */
@@ -922,29 +1049,13 @@ struct mvpp2_bm_pool {
 
        /* BPPE virtual base address */
        u32 *virt_addr;
-       /* BPPE physical base address */
-       dma_addr_t phys_addr;
+       /* BPPE DMA base address */
+       dma_addr_t dma_addr;
 
        /* Ports using BM pool */
        u32 port_map;
 };
 
-struct mvpp2_buff_hdr {
-       u32 next_buff_phys_addr;
-       u32 next_buff_virt_addr;
-       u16 byte_count;
-       u16 info;
-       u8  reserved1;          /* bm_qset (for future use, BM)         */
-};
-
-/* Buffer header info bits */
-#define MVPP2_B_HDR_INFO_MC_ID_MASK    0xfff
-#define MVPP2_B_HDR_INFO_MC_ID(info)   ((info) & MVPP2_B_HDR_INFO_MC_ID_MASK)
-#define MVPP2_B_HDR_INFO_LAST_OFFS     12
-#define MVPP2_B_HDR_INFO_LAST_MASK     BIT(12)
-#define MVPP2_B_HDR_INFO_IS_LAST(info) \
-          ((info & MVPP2_B_HDR_INFO_LAST_MASK) >> MVPP2_B_HDR_INFO_LAST_OFFS)
-
 /* Static declaractions */
 
 /* Number of RXQs used by single port */
@@ -959,12 +1070,177 @@ static int txq_number = MVPP2_MAX_TXQ;
 
 static void mvpp2_write(struct mvpp2 *priv, u32 offset, u32 data)
 {
-       writel(data, priv->base + offset);
+       writel(data, priv->cpu_base[0] + offset);
 }
 
 static u32 mvpp2_read(struct mvpp2 *priv, u32 offset)
 {
-       return readl(priv->base + offset);
+       return readl(priv->cpu_base[0] + offset);
+}
+
+/* These accessors should be used to access:
+ *
+ * - per-CPU registers, where each CPU has its own copy of the
+ *   register.
+ *
+ *   MVPP2_BM_VIRT_ALLOC_REG
+ *   MVPP2_BM_ADDR_HIGH_ALLOC
+ *   MVPP22_BM_ADDR_HIGH_RLS_REG
+ *   MVPP2_BM_VIRT_RLS_REG
+ *   MVPP2_ISR_RX_TX_CAUSE_REG
+ *   MVPP2_ISR_RX_TX_MASK_REG
+ *   MVPP2_TXQ_NUM_REG
+ *   MVPP2_AGGR_TXQ_UPDATE_REG
+ *   MVPP2_TXQ_RSVD_REQ_REG
+ *   MVPP2_TXQ_RSVD_RSLT_REG
+ *   MVPP2_TXQ_SENT_REG
+ *   MVPP2_RXQ_NUM_REG
+ *
+ * - global registers that must be accessed through a specific CPU
+ *   window, because they are related to an access to a per-CPU
+ *   register
+ *
+ *   MVPP2_BM_PHY_ALLOC_REG    (related to MVPP2_BM_VIRT_ALLOC_REG)
+ *   MVPP2_BM_PHY_RLS_REG      (related to MVPP2_BM_VIRT_RLS_REG)
+ *   MVPP2_RXQ_THRESH_REG      (related to MVPP2_RXQ_NUM_REG)
+ *   MVPP2_RXQ_DESC_ADDR_REG   (related to MVPP2_RXQ_NUM_REG)
+ *   MVPP2_RXQ_DESC_SIZE_REG   (related to MVPP2_RXQ_NUM_REG)
+ *   MVPP2_RXQ_INDEX_REG       (related to MVPP2_RXQ_NUM_REG)
+ *   MVPP2_TXQ_PENDING_REG     (related to MVPP2_TXQ_NUM_REG)
+ *   MVPP2_TXQ_DESC_ADDR_REG   (related to MVPP2_TXQ_NUM_REG)
+ *   MVPP2_TXQ_DESC_SIZE_REG   (related to MVPP2_TXQ_NUM_REG)
+ *   MVPP2_TXQ_INDEX_REG       (related to MVPP2_TXQ_NUM_REG)
+ *   MVPP2_TXQ_PENDING_REG     (related to MVPP2_TXQ_NUM_REG)
+ *   MVPP2_TXQ_PREF_BUF_REG    (related to MVPP2_TXQ_NUM_REG)
+ *   MVPP2_TXQ_PREF_BUF_REG    (related to MVPP2_TXQ_NUM_REG)
+ */
+static void mvpp2_percpu_write(struct mvpp2 *priv, int cpu,
+                              u32 offset, u32 data)
+{
+       writel(data, priv->cpu_base[cpu] + offset);
+}
+
+static u32 mvpp2_percpu_read(struct mvpp2 *priv, int cpu,
+                            u32 offset)
+{
+       return readl(priv->cpu_base[cpu] + offset);
+}
+
+static dma_addr_t mvpp2_txdesc_dma_addr_get(struct mvpp2_port *port,
+                                           struct mvpp2_tx_desc *tx_desc)
+{
+       if (port->priv->hw_version == MVPP21)
+               return tx_desc->pp21.buf_dma_addr;
+       else
+               return tx_desc->pp22.buf_dma_addr_ptp & GENMASK_ULL(40, 0);
+}
+
+static void mvpp2_txdesc_dma_addr_set(struct mvpp2_port *port,
+                                     struct mvpp2_tx_desc *tx_desc,
+                                     dma_addr_t dma_addr)
+{
+       if (port->priv->hw_version == MVPP21) {
+               tx_desc->pp21.buf_dma_addr = dma_addr;
+       } else {
+               u64 val = (u64)dma_addr;
+
+               tx_desc->pp22.buf_dma_addr_ptp &= ~GENMASK_ULL(40, 0);
+               tx_desc->pp22.buf_dma_addr_ptp |= val;
+       }
+}
+
+static size_t mvpp2_txdesc_size_get(struct mvpp2_port *port,
+                                   struct mvpp2_tx_desc *tx_desc)
+{
+       if (port->priv->hw_version == MVPP21)
+               return tx_desc->pp21.data_size;
+       else
+               return tx_desc->pp22.data_size;
+}
+
+static void mvpp2_txdesc_size_set(struct mvpp2_port *port,
+                                 struct mvpp2_tx_desc *tx_desc,
+                                 size_t size)
+{
+       if (port->priv->hw_version == MVPP21)
+               tx_desc->pp21.data_size = size;
+       else
+               tx_desc->pp22.data_size = size;
+}
+
+static void mvpp2_txdesc_txq_set(struct mvpp2_port *port,
+                                struct mvpp2_tx_desc *tx_desc,
+                                unsigned int txq)
+{
+       if (port->priv->hw_version == MVPP21)
+               tx_desc->pp21.phys_txq = txq;
+       else
+               tx_desc->pp22.phys_txq = txq;
+}
+
+static void mvpp2_txdesc_cmd_set(struct mvpp2_port *port,
+                                struct mvpp2_tx_desc *tx_desc,
+                                unsigned int command)
+{
+       if (port->priv->hw_version == MVPP21)
+               tx_desc->pp21.command = command;
+       else
+               tx_desc->pp22.command = command;
+}
+
+static void mvpp2_txdesc_offset_set(struct mvpp2_port *port,
+                                   struct mvpp2_tx_desc *tx_desc,
+                                   unsigned int offset)
+{
+       if (port->priv->hw_version == MVPP21)
+               tx_desc->pp21.packet_offset = offset;
+       else
+               tx_desc->pp22.packet_offset = offset;
+}
+
+static unsigned int mvpp2_txdesc_offset_get(struct mvpp2_port *port,
+                                           struct mvpp2_tx_desc *tx_desc)
+{
+       if (port->priv->hw_version == MVPP21)
+               return tx_desc->pp21.packet_offset;
+       else
+               return tx_desc->pp22.packet_offset;
+}
+
+static dma_addr_t mvpp2_rxdesc_dma_addr_get(struct mvpp2_port *port,
+                                           struct mvpp2_rx_desc *rx_desc)
+{
+       if (port->priv->hw_version == MVPP21)
+               return rx_desc->pp21.buf_dma_addr;
+       else
+               return rx_desc->pp22.buf_dma_addr_key_hash & GENMASK_ULL(40, 0);
+}
+
+static unsigned long mvpp2_rxdesc_cookie_get(struct mvpp2_port *port,
+                                            struct mvpp2_rx_desc *rx_desc)
+{
+       if (port->priv->hw_version == MVPP21)
+               return rx_desc->pp21.buf_cookie;
+       else
+               return rx_desc->pp22.buf_cookie_misc & GENMASK_ULL(40, 0);
+}
+
+static size_t mvpp2_rxdesc_size_get(struct mvpp2_port *port,
+                                   struct mvpp2_rx_desc *rx_desc)
+{
+       if (port->priv->hw_version == MVPP21)
+               return rx_desc->pp21.data_size;
+       else
+               return rx_desc->pp22.data_size;
+}
+
+static u32 mvpp2_rxdesc_status_get(struct mvpp2_port *port,
+                                  struct mvpp2_rx_desc *rx_desc)
+{
+       if (port->priv->hw_version == MVPP21)
+               return rx_desc->pp21.status;
+       else
+               return rx_desc->pp22.status;
 }
 
 static void mvpp2_txq_inc_get(struct mvpp2_txq_pcpu *txq_pcpu)
@@ -974,15 +1250,17 @@ static void mvpp2_txq_inc_get(struct mvpp2_txq_pcpu *txq_pcpu)
                txq_pcpu->txq_get_index = 0;
 }
 
-static void mvpp2_txq_inc_put(struct mvpp2_txq_pcpu *txq_pcpu,
+static void mvpp2_txq_inc_put(struct mvpp2_port *port,
+                             struct mvpp2_txq_pcpu *txq_pcpu,
                              struct sk_buff *skb,
                              struct mvpp2_tx_desc *tx_desc)
 {
        struct mvpp2_txq_pcpu_buf *tx_buf =
                txq_pcpu->buffs + txq_pcpu->txq_put_index;
        tx_buf->skb = skb;
-       tx_buf->size = tx_desc->data_size;
-       tx_buf->phys = tx_desc->buf_phys_addr + tx_desc->packet_offset;
+       tx_buf->size = mvpp2_txdesc_size_get(port, tx_desc);
+       tx_buf->dma = mvpp2_txdesc_dma_addr_get(port, tx_desc) +
+               mvpp2_txdesc_offset_get(port, tx_desc);
        txq_pcpu->txq_put_index++;
        if (txq_pcpu->txq_put_index == txq_pcpu->size)
                txq_pcpu->txq_put_index = 0;
@@ -3378,27 +3656,39 @@ static int mvpp2_bm_pool_create(struct platform_device *pdev,
                                struct mvpp2 *priv,
                                struct mvpp2_bm_pool *bm_pool, int size)
 {
-       int size_bytes;
        u32 val;
 
-       size_bytes = sizeof(u32) * size;
-       bm_pool->virt_addr = dma_alloc_coherent(&pdev->dev, size_bytes,
-                                               &bm_pool->phys_addr,
+       /* Number of buffer pointers must be a multiple of 16, as per
+        * hardware constraints
+        */
+       if (!IS_ALIGNED(size, 16))
+               return -EINVAL;
+
+       /* PPv2.1 needs 8 bytes per buffer pointer, PPv2.2 needs 16
+        * bytes per buffer pointer
+        */
+       if (priv->hw_version == MVPP21)
+               bm_pool->size_bytes = 2 * sizeof(u32) * size;
+       else
+               bm_pool->size_bytes = 2 * sizeof(u64) * size;
+
+       bm_pool->virt_addr = dma_alloc_coherent(&pdev->dev, bm_pool->size_bytes,
+                                               &bm_pool->dma_addr,
                                                GFP_KERNEL);
        if (!bm_pool->virt_addr)
                return -ENOMEM;
 
        if (!IS_ALIGNED((unsigned long)bm_pool->virt_addr,
                        MVPP2_BM_POOL_PTR_ALIGN)) {
-               dma_free_coherent(&pdev->dev, size_bytes, bm_pool->virt_addr,
-                                 bm_pool->phys_addr);
+               dma_free_coherent(&pdev->dev, bm_pool->size_bytes,
+                                 bm_pool->virt_addr, bm_pool->dma_addr);
                dev_err(&pdev->dev, "BM pool %d is not %d bytes aligned\n",
                        bm_pool->id, MVPP2_BM_POOL_PTR_ALIGN);
                return -ENOMEM;
        }
 
        mvpp2_write(priv, MVPP2_BM_POOL_BASE_REG(bm_pool->id),
-                   bm_pool->phys_addr);
+                   lower_32_bits(bm_pool->dma_addr));
        mvpp2_write(priv, MVPP2_BM_POOL_SIZE_REG(bm_pool->id), size);
 
        val = mvpp2_read(priv, MVPP2_BM_POOL_CTRL_REG(bm_pool->id));
@@ -3426,6 +3716,34 @@ static void mvpp2_bm_pool_bufsize_set(struct mvpp2 *priv,
        mvpp2_write(priv, MVPP2_POOL_BUF_SIZE_REG(bm_pool->id), val);
 }
 
+static void mvpp2_bm_bufs_get_addrs(struct device *dev, struct mvpp2 *priv,
+                                   struct mvpp2_bm_pool *bm_pool,
+                                   dma_addr_t *dma_addr,
+                                   phys_addr_t *phys_addr)
+{
+       int cpu = smp_processor_id();
+
+       *dma_addr = mvpp2_percpu_read(priv, cpu,
+                                     MVPP2_BM_PHY_ALLOC_REG(bm_pool->id));
+       *phys_addr = mvpp2_percpu_read(priv, cpu, MVPP2_BM_VIRT_ALLOC_REG);
+
+       if (priv->hw_version == MVPP22) {
+               u32 val;
+               u32 dma_addr_highbits, phys_addr_highbits;
+
+               val = mvpp2_percpu_read(priv, cpu, MVPP22_BM_ADDR_HIGH_ALLOC);
+               dma_addr_highbits = (val & MVPP22_BM_ADDR_HIGH_PHYS_MASK);
+               phys_addr_highbits = (val & MVPP22_BM_ADDR_HIGH_VIRT_MASK) >>
+                       MVPP22_BM_ADDR_HIGH_VIRT_SHIFT;
+
+               if (sizeof(dma_addr_t) == 8)
+                       *dma_addr |= (u64)dma_addr_highbits << 32;
+
+               if (sizeof(phys_addr_t) == 8)
+                       *phys_addr |= (u64)phys_addr_highbits << 32;
+       }
+}
+
 /* Free all buffers from the pool */
 static void mvpp2_bm_bufs_free(struct device *dev, struct mvpp2 *priv,
                               struct mvpp2_bm_pool *bm_pool)
@@ -3433,21 +3751,21 @@ static void mvpp2_bm_bufs_free(struct device *dev, struct mvpp2 *priv,
        int i;
 
        for (i = 0; i < bm_pool->buf_num; i++) {
-               dma_addr_t buf_phys_addr;
-               unsigned long vaddr;
+               dma_addr_t buf_dma_addr;
+               phys_addr_t buf_phys_addr;
+               void *data;
 
-               /* Get buffer virtual address (indirect access) */
-               buf_phys_addr = mvpp2_read(priv,
-                                          MVPP2_BM_PHY_ALLOC_REG(bm_pool->id));
-               vaddr = mvpp2_read(priv, MVPP2_BM_VIRT_ALLOC_REG);
+               mvpp2_bm_bufs_get_addrs(dev, priv, bm_pool,
+                                       &buf_dma_addr, &buf_phys_addr);
 
-               dma_unmap_single(dev, buf_phys_addr,
+               dma_unmap_single(dev, buf_dma_addr,
                                 bm_pool->buf_size, DMA_FROM_DEVICE);
 
-               if (!vaddr)
+               data = (void *)phys_to_virt(buf_phys_addr);
+               if (!data)
                        break;
 
-               mvpp2_frag_free(bm_pool, (void *)vaddr);
+               mvpp2_frag_free(bm_pool, data);
        }
 
        /* Update BM driver with number of buffers removed from pool */
@@ -3471,9 +3789,9 @@ static int mvpp2_bm_pool_destroy(struct platform_device *pdev,
        val |= MVPP2_BM_STOP_MASK;
        mvpp2_write(priv, MVPP2_BM_POOL_CTRL_REG(bm_pool->id), val);
 
-       dma_free_coherent(&pdev->dev, sizeof(u32) * bm_pool->size,
+       dma_free_coherent(&pdev->dev, bm_pool->size_bytes,
                          bm_pool->virt_addr,
-                         bm_pool->phys_addr);
+                         bm_pool->dma_addr);
        return 0;
 }
 
@@ -3529,17 +3847,20 @@ static int mvpp2_bm_init(struct platform_device *pdev, struct mvpp2 *priv)
 static void mvpp2_rxq_long_pool_set(struct mvpp2_port *port,
                                    int lrxq, int long_pool)
 {
-       u32 val;
+       u32 val, mask;
        int prxq;
 
        /* Get queue physical ID */
        prxq = port->rxqs[lrxq]->id;
 
-       val = mvpp2_read(port->priv, MVPP2_RXQ_CONFIG_REG(prxq));
-       val &= ~MVPP2_RXQ_POOL_LONG_MASK;
-       val |= ((long_pool << MVPP2_RXQ_POOL_LONG_OFFS) &
-                   MVPP2_RXQ_POOL_LONG_MASK);
+       if (port->priv->hw_version == MVPP21)
+               mask = MVPP21_RXQ_POOL_LONG_MASK;
+       else
+               mask = MVPP22_RXQ_POOL_LONG_MASK;
 
+       val = mvpp2_read(port->priv, MVPP2_RXQ_CONFIG_REG(prxq));
+       val &= ~mask;
+       val |= (long_pool << MVPP2_RXQ_POOL_LONG_OFFS) & mask;
        mvpp2_write(port->priv, MVPP2_RXQ_CONFIG_REG(prxq), val);
 }
 
@@ -3547,40 +3868,45 @@ static void mvpp2_rxq_long_pool_set(struct mvpp2_port *port,
 static void mvpp2_rxq_short_pool_set(struct mvpp2_port *port,
                                     int lrxq, int short_pool)
 {
-       u32 val;
+       u32 val, mask;
        int prxq;
 
        /* Get queue physical ID */
        prxq = port->rxqs[lrxq]->id;
 
-       val = mvpp2_read(port->priv, MVPP2_RXQ_CONFIG_REG(prxq));
-       val &= ~MVPP2_RXQ_POOL_SHORT_MASK;
-       val |= ((short_pool << MVPP2_RXQ_POOL_SHORT_OFFS) &
-                   MVPP2_RXQ_POOL_SHORT_MASK);
+       if (port->priv->hw_version == MVPP21)
+               mask = MVPP21_RXQ_POOL_SHORT_MASK;
+       else
+               mask = MVPP22_RXQ_POOL_SHORT_MASK;
 
+       val = mvpp2_read(port->priv, MVPP2_RXQ_CONFIG_REG(prxq));
+       val &= ~mask;
+       val |= (short_pool << MVPP2_RXQ_POOL_SHORT_OFFS) & mask;
        mvpp2_write(port->priv, MVPP2_RXQ_CONFIG_REG(prxq), val);
 }
 
 static void *mvpp2_buf_alloc(struct mvpp2_port *port,
                             struct mvpp2_bm_pool *bm_pool,
-                            dma_addr_t *buf_phys_addr,
+                            dma_addr_t *buf_dma_addr,
+                            phys_addr_t *buf_phys_addr,
                             gfp_t gfp_mask)
 {
-       dma_addr_t phys_addr;
+       dma_addr_t dma_addr;
        void *data;
 
        data = mvpp2_frag_alloc(bm_pool);
        if (!data)
                return NULL;
 
-       phys_addr = dma_map_single(port->dev->dev.parent, data,
-                                  MVPP2_RX_BUF_SIZE(bm_pool->pkt_size),
-                                   DMA_FROM_DEVICE);
-       if (unlikely(dma_mapping_error(port->dev->dev.parent, phys_addr))) {
+       dma_addr = dma_map_single(port->dev->dev.parent, data,
+                                 MVPP2_RX_BUF_SIZE(bm_pool->pkt_size),
+                                 DMA_FROM_DEVICE);
+       if (unlikely(dma_mapping_error(port->dev->dev.parent, dma_addr))) {
                mvpp2_frag_free(bm_pool, data);
                return NULL;
        }
-       *buf_phys_addr = phys_addr;
+       *buf_dma_addr = dma_addr;
+       *buf_phys_addr = virt_to_phys(data);
 
        return data;
 }
@@ -3604,37 +3930,46 @@ static inline int mvpp2_bm_cookie_pool_get(unsigned long cookie)
 
 /* Release buffer to BM */
 static inline void mvpp2_bm_pool_put(struct mvpp2_port *port, int pool,
-                                    dma_addr_t buf_phys_addr,
-                                    unsigned long buf_virt_addr)
+                                    dma_addr_t buf_dma_addr,
+                                    phys_addr_t buf_phys_addr)
 {
-       mvpp2_write(port->priv, MVPP2_BM_VIRT_RLS_REG, buf_virt_addr);
-       mvpp2_write(port->priv, MVPP2_BM_PHY_RLS_REG(pool), buf_phys_addr);
-}
+       int cpu = smp_processor_id();
 
-/* Release multicast buffer */
-static void mvpp2_bm_pool_mc_put(struct mvpp2_port *port, int pool,
-                                dma_addr_t buf_phys_addr,
-                                unsigned long buf_virt_addr,
-                                int mc_id)
-{
-       u32 val = 0;
+       if (port->priv->hw_version == MVPP22) {
+               u32 val = 0;
+
+               if (sizeof(dma_addr_t) == 8)
+                       val |= upper_32_bits(buf_dma_addr) &
+                               MVPP22_BM_ADDR_HIGH_PHYS_RLS_MASK;
+
+               if (sizeof(phys_addr_t) == 8)
+                       val |= (upper_32_bits(buf_phys_addr)
+                               << MVPP22_BM_ADDR_HIGH_VIRT_RLS_SHIFT) &
+                               MVPP22_BM_ADDR_HIGH_VIRT_RLS_MASK;
 
-       val |= (mc_id & MVPP2_BM_MC_ID_MASK);
-       mvpp2_write(port->priv, MVPP2_BM_MC_RLS_REG, val);
+               mvpp2_percpu_write(port->priv, cpu,
+                                  MVPP22_BM_ADDR_HIGH_RLS_REG, val);
+       }
 
-       mvpp2_bm_pool_put(port, pool,
-                         buf_phys_addr | MVPP2_BM_PHY_RLS_MC_BUFF_MASK,
-                         buf_virt_addr);
+       /* MVPP2_BM_VIRT_RLS_REG is not interpreted by HW, and simply
+        * returned in the "cookie" field of the RX
+        * descriptor. Instead of storing the virtual address, we
+        * store the physical address
+        */
+       mvpp2_percpu_write(port->priv, cpu,
+                          MVPP2_BM_VIRT_RLS_REG, buf_phys_addr);
+       mvpp2_percpu_write(port->priv, cpu,
+                          MVPP2_BM_PHY_RLS_REG(pool), buf_dma_addr);
 }
 
 /* Refill BM pool */
 static void mvpp2_pool_refill(struct mvpp2_port *port, u32 bm,
-                             dma_addr_t phys_addr,
-                             unsigned long cookie)
+                             dma_addr_t dma_addr,
+                             phys_addr_t phys_addr)
 {
        int pool = mvpp2_bm_cookie_pool_get(bm);
 
-       mvpp2_bm_pool_put(port, pool, phys_addr, cookie);
+       mvpp2_bm_pool_put(port, pool, dma_addr, phys_addr);
 }
 
 /* Allocate buffers for the pool */
@@ -3642,7 +3977,8 @@ static int mvpp2_bm_bufs_add(struct mvpp2_port *port,
                             struct mvpp2_bm_pool *bm_pool, int buf_num)
 {
        int i, buf_size, total_size;
-       dma_addr_t phys_addr;
+       dma_addr_t dma_addr;
+       phys_addr_t phys_addr;
        void *buf;
 
        buf_size = MVPP2_RX_BUF_SIZE(bm_pool->pkt_size);
@@ -3657,12 +3993,13 @@ static int mvpp2_bm_bufs_add(struct mvpp2_port *port,
        }
 
        for (i = 0; i < buf_num; i++) {
-               buf = mvpp2_buf_alloc(port, bm_pool, &phys_addr, GFP_KERNEL);
+               buf = mvpp2_buf_alloc(port, bm_pool, &dma_addr,
+                                     &phys_addr, GFP_KERNEL);
                if (!buf)
                        break;
 
-               mvpp2_bm_pool_put(port, bm_pool->id, phys_addr,
-                                 (unsigned long)buf);
+               mvpp2_bm_pool_put(port, bm_pool->id, dma_addr,
+                                 phys_addr);
        }
 
        /* Update BM driver with number of buffers added to pool */
@@ -3830,7 +4167,8 @@ static void mvpp2_interrupts_mask(void *arg)
 {
        struct mvpp2_port *port = arg;
 
-       mvpp2_write(port->priv, MVPP2_ISR_RX_TX_MASK_REG(port->id), 0);
+       mvpp2_percpu_write(port->priv, smp_processor_id(),
+                          MVPP2_ISR_RX_TX_MASK_REG(port->id), 0);
 }
 
 /* Unmask the current CPU's Rx/Tx interrupts */
@@ -3838,17 +4176,46 @@ static void mvpp2_interrupts_unmask(void *arg)
 {
        struct mvpp2_port *port = arg;
 
-       mvpp2_write(port->priv, MVPP2_ISR_RX_TX_MASK_REG(port->id),
-                   (MVPP2_CAUSE_MISC_SUM_MASK |
-                    MVPP2_CAUSE_RXQ_OCCUP_DESC_ALL_MASK));
+       mvpp2_percpu_write(port->priv, smp_processor_id(),
+                          MVPP2_ISR_RX_TX_MASK_REG(port->id),
+                          (MVPP2_CAUSE_MISC_SUM_MASK |
+                           MVPP2_CAUSE_RXQ_OCCUP_DESC_ALL_MASK));
 }
 
 /* Port configuration routines */
 
+static void mvpp22_port_mii_set(struct mvpp2_port *port)
+{
+       u32 val;
+
+       return;
+
+       /* Only GOP port 0 has an XLG MAC */
+       if (port->gop_id == 0) {
+               val = readl(port->base + MVPP22_XLG_CTRL3_REG);
+               val &= ~MVPP22_XLG_CTRL3_MACMODESELECT_MASK;
+               val |= MVPP22_XLG_CTRL3_MACMODESELECT_GMAC;
+               writel(val, port->base + MVPP22_XLG_CTRL3_REG);
+       }
+
+       val = readl(port->base + MVPP22_GMAC_CTRL_4_REG);
+       if (port->phy_interface == PHY_INTERFACE_MODE_RGMII)
+               val |= MVPP22_CTRL4_EXT_PIN_GMII_SEL;
+       else
+               val &= ~MVPP22_CTRL4_EXT_PIN_GMII_SEL;
+       val &= ~MVPP22_CTRL4_DP_CLK_SEL;
+       val |= MVPP22_CTRL4_SYNC_BYPASS;
+       val |= MVPP22_CTRL4_QSGMII_BYPASS_ACTIVE;
+       writel(val, port->base + MVPP22_GMAC_CTRL_4_REG);
+}
+
 static void mvpp2_port_mii_set(struct mvpp2_port *port)
 {
        u32 val;
 
+       if (port->priv->hw_version == MVPP22)
+               mvpp22_port_mii_set(port);
+
        val = readl(port->base + MVPP2_GMAC_CTRL_2_REG);
 
        switch (port->phy_interface) {
@@ -3952,16 +4319,18 @@ static void mvpp2_defaults_set(struct mvpp2_port *port)
 {
        int tx_port_num, val, queue, ptxq, lrxq;
 
-       /* Configure port to loopback if needed */
-       if (port->flags & MVPP2_F_LOOPBACK)
-               mvpp2_port_loopback_set(port);
+       if (port->priv->hw_version == MVPP21) {
+               /* Configure port to loopback if needed */
+               if (port->flags & MVPP2_F_LOOPBACK)
+                       mvpp2_port_loopback_set(port);
 
-       /* Update TX FIFO MIN Threshold */
-       val = readl(port->base + MVPP2_GMAC_PORT_FIFO_CFG_1_REG);
-       val &= ~MVPP2_GMAC_TX_FIFO_MIN_TH_ALL_MASK;
-       /* Min. TX threshold must be less than minimal packet length */
-       val |= MVPP2_GMAC_TX_FIFO_MIN_TH_MASK(64 - 4 - 2);
-       writel(val, port->base + MVPP2_GMAC_PORT_FIFO_CFG_1_REG);
+               /* Update TX FIFO MIN Threshold */
+               val = readl(port->base + MVPP2_GMAC_PORT_FIFO_CFG_1_REG);
+               val &= ~MVPP2_GMAC_TX_FIFO_MIN_TH_ALL_MASK;
+               /* Min. TX threshold must be less than minimal packet length */
+               val |= MVPP2_GMAC_TX_FIFO_MIN_TH_MASK(64 - 4 - 2);
+               writel(val, port->base + MVPP2_GMAC_PORT_FIFO_CFG_1_REG);
+       }
 
        /* Disable Legacy WRR, Disable EJP, Release from reset */
        tx_port_num = mvpp2_egress_port(port);
@@ -4149,11 +4518,15 @@ static void mvpp2_rxq_offset_set(struct mvpp2_port *port,
 }
 
 /* Obtain BM cookie information from descriptor */
-static u32 mvpp2_bm_cookie_build(struct mvpp2_rx_desc *rx_desc)
+static u32 mvpp2_bm_cookie_build(struct mvpp2_port *port,
+                                struct mvpp2_rx_desc *rx_desc)
 {
-       int pool = (rx_desc->status & MVPP2_RXD_BM_POOL_ID_MASK) >>
-                  MVPP2_RXD_BM_POOL_ID_OFFS;
        int cpu = smp_processor_id();
+       int pool;
+
+       pool = (mvpp2_rxdesc_status_get(port, rx_desc) &
+               MVPP2_RXD_BM_POOL_ID_MASK) >>
+               MVPP2_RXD_BM_POOL_ID_OFFS;
 
        return ((pool & 0xFF) << MVPP2_BM_COOKIE_POOL_OFFS) |
               ((cpu & 0xFF) << MVPP2_BM_COOKIE_CPU_OFFS);
@@ -4161,18 +4534,6 @@ static u32 mvpp2_bm_cookie_build(struct mvpp2_rx_desc *rx_desc)
 
 /* Tx descriptors helper methods */
 
-/* Get number of Tx descriptors waiting to be transmitted by HW */
-static int mvpp2_txq_pend_desc_num_get(struct mvpp2_port *port,
-                                      struct mvpp2_tx_queue *txq)
-{
-       u32 val;
-
-       mvpp2_write(port->priv, MVPP2_TXQ_NUM_REG, txq->id);
-       val = mvpp2_read(port->priv, MVPP2_TXQ_PENDING_REG);
-
-       return val & MVPP2_TXQ_PENDING_MASK;
-}
-
 /* Get pointer to next Tx descriptor to be processed (send) by HW */
 static struct mvpp2_tx_desc *
 mvpp2_txq_next_desc_get(struct mvpp2_tx_queue *txq)
@@ -4187,7 +4548,8 @@ mvpp2_txq_next_desc_get(struct mvpp2_tx_queue *txq)
 static void mvpp2_aggr_txq_pend_desc_add(struct mvpp2_port *port, int pending)
 {
        /* aggregated access - relevant TXQ number is written in TX desc */
-       mvpp2_write(port->priv, MVPP2_AGGR_TXQ_UPDATE_REG, pending);
+       mvpp2_percpu_write(port->priv, smp_processor_id(),
+                          MVPP2_AGGR_TXQ_UPDATE_REG, pending);
 }
 
 
@@ -4216,11 +4578,12 @@ static int mvpp2_txq_alloc_reserved_desc(struct mvpp2 *priv,
                                         struct mvpp2_tx_queue *txq, int num)
 {
        u32 val;
+       int cpu = smp_processor_id();
 
        val = (txq->id << MVPP2_TXQ_RSVD_REQ_Q_OFFSET) | num;
-       mvpp2_write(priv, MVPP2_TXQ_RSVD_REQ_REG, val);
+       mvpp2_percpu_write(priv, cpu, MVPP2_TXQ_RSVD_REQ_REG, val);
 
-       val = mvpp2_read(priv, MVPP2_TXQ_RSVD_RSLT_REG);
+       val = mvpp2_percpu_read(priv, cpu, MVPP2_TXQ_RSVD_RSLT_REG);
 
        return val & MVPP2_TXQ_RSVD_RSLT_MASK;
 }
@@ -4321,7 +4684,8 @@ static inline int mvpp2_txq_sent_desc_proc(struct mvpp2_port *port,
        u32 val;
 
        /* Reading status reg resets transmitted descriptor counter */
-       val = mvpp2_read(port->priv, MVPP2_TXQ_SENT_REG(txq->id));
+       val = mvpp2_percpu_read(port->priv, smp_processor_id(),
+                               MVPP2_TXQ_SENT_REG(txq->id));
 
        return (val & MVPP2_TRANSMITTED_COUNT_MASK) >>
                MVPP2_TRANSMITTED_COUNT_OFFSET;
@@ -4335,7 +4699,8 @@ static void mvpp2_txq_sent_counter_clear(void *arg)
        for (queue = 0; queue < txq_number; queue++) {
                int id = port->txqs[queue]->id;
 
-               mvpp2_read(port->priv, MVPP2_TXQ_SENT_REG(id));
+               mvpp2_percpu_read(port->priv, smp_processor_id(),
+                                 MVPP2_TXQ_SENT_REG(id));
        }
 }
 
@@ -4394,12 +4759,14 @@ static void mvpp2_txp_max_tx_size_set(struct mvpp2_port *port)
 static void mvpp2_rx_pkts_coal_set(struct mvpp2_port *port,
                                   struct mvpp2_rx_queue *rxq)
 {
+       int cpu = smp_processor_id();
+
        if (rxq->pkts_coal > MVPP2_OCCUPIED_THRESH_MASK)
                rxq->pkts_coal = MVPP2_OCCUPIED_THRESH_MASK;
 
-       mvpp2_write(port->priv, MVPP2_RXQ_NUM_REG, rxq->id);
-       mvpp2_write(port->priv, MVPP2_RXQ_THRESH_REG,
-                   rxq->pkts_coal);
+       mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_NUM_REG, rxq->id);
+       mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_THRESH_REG,
+                          rxq->pkts_coal);
 }
 
 static u32 mvpp2_usec_to_cycles(u32 usec, unsigned long clk_hz)
@@ -4449,7 +4816,7 @@ static void mvpp2_txq_bufs_free(struct mvpp2_port *port,
                struct mvpp2_txq_pcpu_buf *tx_buf =
                        txq_pcpu->buffs + txq_pcpu->txq_get_index;
 
-               dma_unmap_single(port->dev->dev.parent, tx_buf->phys,
+               dma_unmap_single(port->dev->dev.parent, tx_buf->dma,
                                 tx_buf->size, DMA_TO_DEVICE);
                if (tx_buf->skb)
                        dev_kfree_skb_any(tx_buf->skb);
@@ -4527,10 +4894,12 @@ static int mvpp2_aggr_txq_init(struct platform_device *pdev,
                               int desc_num, int cpu,
                               struct mvpp2 *priv)
 {
+       u32 txq_dma;
+
        /* Allocate memory for TX descriptors */
        aggr_txq->descs = dma_alloc_coherent(&pdev->dev,
                                desc_num * MVPP2_DESC_ALIGNED_SIZE,
-                               &aggr_txq->descs_phys, GFP_KERNEL);
+                               &aggr_txq->descs_dma, GFP_KERNEL);
        if (!aggr_txq->descs)
                return -ENOMEM;
 
@@ -4540,10 +4909,16 @@ static int mvpp2_aggr_txq_init(struct platform_device *pdev,
        aggr_txq->next_desc_to_proc = mvpp2_read(priv,
                                                 MVPP2_AGGR_TXQ_INDEX_REG(cpu));
 
-       /* Set Tx descriptors queue starting address */
-       /* indirect access */
-       mvpp2_write(priv, MVPP2_AGGR_TXQ_DESC_ADDR_REG(cpu),
-                   aggr_txq->descs_phys);
+       /* Set Tx descriptors queue starting address indirect
+        * access
+        */
+       if (priv->hw_version == MVPP21)
+               txq_dma = aggr_txq->descs_dma;
+       else
+               txq_dma = aggr_txq->descs_dma >>
+                       MVPP22_AGGR_TXQ_DESC_ADDR_OFFS;
+
+       mvpp2_write(priv, MVPP2_AGGR_TXQ_DESC_ADDR_REG(cpu), txq_dma);
        mvpp2_write(priv, MVPP2_AGGR_TXQ_DESC_SIZE_REG(cpu), desc_num);
 
        return 0;
@@ -4554,12 +4929,15 @@ static int mvpp2_rxq_init(struct mvpp2_port *port,
                          struct mvpp2_rx_queue *rxq)
 
 {
+       u32 rxq_dma;
+       int cpu;
+
        rxq->size = port->rx_ring_size;
 
        /* Allocate memory for RX descriptors */
        rxq->descs = dma_alloc_coherent(port->dev->dev.parent,
                                        rxq->size * MVPP2_DESC_ALIGNED_SIZE,
-                                       &rxq->descs_phys, GFP_KERNEL);
+                                       &rxq->descs_dma, GFP_KERNEL);
        if (!rxq->descs)
                return -ENOMEM;
 
@@ -4569,10 +4947,15 @@ static int mvpp2_rxq_init(struct mvpp2_port *port,
        mvpp2_write(port->priv, MVPP2_RXQ_STATUS_REG(rxq->id), 0);
 
        /* Set Rx descriptors queue starting address - indirect access */
-       mvpp2_write(port->priv, MVPP2_RXQ_NUM_REG, rxq->id);
-       mvpp2_write(port->priv, MVPP2_RXQ_DESC_ADDR_REG, rxq->descs_phys);
-       mvpp2_write(port->priv, MVPP2_RXQ_DESC_SIZE_REG, rxq->size);
-       mvpp2_write(port->priv, MVPP2_RXQ_INDEX_REG, 0);
+       cpu = smp_processor_id();
+       mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_NUM_REG, rxq->id);
+       if (port->priv->hw_version == MVPP21)
+               rxq_dma = rxq->descs_dma;
+       else
+               rxq_dma = rxq->descs_dma >> MVPP22_DESC_ADDR_OFFS;
+       mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_DESC_ADDR_REG, rxq_dma);
+       mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_DESC_SIZE_REG, rxq->size);
+       mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_INDEX_REG, 0);
 
        /* Set Offset */
        mvpp2_rxq_offset_set(port, rxq->id, NET_SKB_PAD);
@@ -4599,10 +4982,11 @@ static void mvpp2_rxq_drop_pkts(struct mvpp2_port *port,
 
        for (i = 0; i < rx_received; i++) {
                struct mvpp2_rx_desc *rx_desc = mvpp2_rxq_next_desc_get(rxq);
-               u32 bm = mvpp2_bm_cookie_build(rx_desc);
+               u32 bm = mvpp2_bm_cookie_build(port, rx_desc);
 
-               mvpp2_pool_refill(port, bm, rx_desc->buf_phys_addr,
-                                 rx_desc->buf_cookie);
+               mvpp2_pool_refill(port, bm,
+                                 mvpp2_rxdesc_dma_addr_get(port, rx_desc),
+                                 mvpp2_rxdesc_cookie_get(port, rx_desc));
        }
        mvpp2_rxq_status_update(port, rxq->id, rx_received, rx_received);
 }
@@ -4611,26 +4995,29 @@ static void mvpp2_rxq_drop_pkts(struct mvpp2_port *port,
 static void mvpp2_rxq_deinit(struct mvpp2_port *port,
                             struct mvpp2_rx_queue *rxq)
 {
+       int cpu;
+
        mvpp2_rxq_drop_pkts(port, rxq);
 
        if (rxq->descs)
                dma_free_coherent(port->dev->dev.parent,
                                  rxq->size * MVPP2_DESC_ALIGNED_SIZE,
                                  rxq->descs,
-                                 rxq->descs_phys);
+                                 rxq->descs_dma);
 
        rxq->descs             = NULL;
        rxq->last_desc         = 0;
        rxq->next_desc_to_proc = 0;
-       rxq->descs_phys        = 0;
+       rxq->descs_dma         = 0;
 
        /* Clear Rx descriptors queue starting address and size;
         * free descriptor number
         */
        mvpp2_write(port->priv, MVPP2_RXQ_STATUS_REG(rxq->id), 0);
-       mvpp2_write(port->priv, MVPP2_RXQ_NUM_REG, rxq->id);
-       mvpp2_write(port->priv, MVPP2_RXQ_DESC_ADDR_REG, 0);
-       mvpp2_write(port->priv, MVPP2_RXQ_DESC_SIZE_REG, 0);
+       cpu = smp_processor_id();
+       mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_NUM_REG, rxq->id);
+       mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_DESC_ADDR_REG, 0);
+       mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_DESC_SIZE_REG, 0);
 }
 
 /* Create and initialize a Tx queue */
@@ -4646,23 +5033,25 @@ static int mvpp2_txq_init(struct mvpp2_port *port,
        /* Allocate memory for Tx descriptors */
        txq->descs = dma_alloc_coherent(port->dev->dev.parent,
                                txq->size * MVPP2_DESC_ALIGNED_SIZE,
-                               &txq->descs_phys, GFP_KERNEL);
+                               &txq->descs_dma, GFP_KERNEL);
        if (!txq->descs)
                return -ENOMEM;
 
        txq->last_desc = txq->size - 1;
 
        /* Set Tx descriptors queue starting address - indirect access */
-       mvpp2_write(port->priv, MVPP2_TXQ_NUM_REG, txq->id);
-       mvpp2_write(port->priv, MVPP2_TXQ_DESC_ADDR_REG, txq->descs_phys);
-       mvpp2_write(port->priv, MVPP2_TXQ_DESC_SIZE_REG, txq->size &
-                                            MVPP2_TXQ_DESC_SIZE_MASK);
-       mvpp2_write(port->priv, MVPP2_TXQ_INDEX_REG, 0);
-       mvpp2_write(port->priv, MVPP2_TXQ_RSVD_CLR_REG,
-                   txq->id << MVPP2_TXQ_RSVD_CLR_OFFSET);
-       val = mvpp2_read(port->priv, MVPP2_TXQ_PENDING_REG);
+       cpu = smp_processor_id();
+       mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_NUM_REG, txq->id);
+       mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_DESC_ADDR_REG,
+                          txq->descs_dma);
+       mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_DESC_SIZE_REG,
+                          txq->size & MVPP2_TXQ_DESC_SIZE_MASK);
+       mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_INDEX_REG, 0);
+       mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_RSVD_CLR_REG,
+                          txq->id << MVPP2_TXQ_RSVD_CLR_OFFSET);
+       val = mvpp2_percpu_read(port->priv, cpu, MVPP2_TXQ_PENDING_REG);
        val &= ~MVPP2_TXQ_PENDING_MASK;
-       mvpp2_write(port->priv, MVPP2_TXQ_PENDING_REG, val);
+       mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_PENDING_REG, val);
 
        /* Calculate base address in prefetch buffer. We reserve 16 descriptors
         * for each existing TXQ.
@@ -4673,9 +5062,9 @@ static int mvpp2_txq_init(struct mvpp2_port *port,
        desc = (port->id * MVPP2_MAX_TXQ * desc_per_txq) +
               (txq->log_id * desc_per_txq);
 
-       mvpp2_write(port->priv, MVPP2_TXQ_PREF_BUF_REG,
-                   MVPP2_PREF_BUF_PTR(desc) | MVPP2_PREF_BUF_SIZE_16 |
-                   MVPP2_PREF_BUF_THRESH(desc_per_txq/2));
+       mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_PREF_BUF_REG,
+                          MVPP2_PREF_BUF_PTR(desc) | MVPP2_PREF_BUF_SIZE_16 |
+                          MVPP2_PREF_BUF_THRESH(desc_per_txq / 2));
 
        /* WRR / EJP configuration - indirect access */
        tx_port_num = mvpp2_egress_port(port);
@@ -4716,7 +5105,7 @@ error:
 
        dma_free_coherent(port->dev->dev.parent,
                          txq->size * MVPP2_DESC_ALIGNED_SIZE,
-                         txq->descs, txq->descs_phys);
+                         txq->descs, txq->descs_dma);
 
        return -ENOMEM;
 }
@@ -4736,20 +5125,21 @@ static void mvpp2_txq_deinit(struct mvpp2_port *port,
        if (txq->descs)
                dma_free_coherent(port->dev->dev.parent,
                                  txq->size * MVPP2_DESC_ALIGNED_SIZE,
-                                 txq->descs, txq->descs_phys);
+                                 txq->descs, txq->descs_dma);
 
        txq->descs             = NULL;
        txq->last_desc         = 0;
        txq->next_desc_to_proc = 0;
-       txq->descs_phys        = 0;
+       txq->descs_dma         = 0;
 
        /* Set minimum bandwidth for disabled TXQs */
        mvpp2_write(port->priv, MVPP2_TXQ_SCHED_TOKEN_CNTR_REG(txq->id), 0);
 
        /* Set Tx descriptors queue starting address and size */
-       mvpp2_write(port->priv, MVPP2_TXQ_NUM_REG, txq->id);
-       mvpp2_write(port->priv, MVPP2_TXQ_DESC_ADDR_REG, 0);
-       mvpp2_write(port->priv, MVPP2_TXQ_DESC_SIZE_REG, 0);
+       cpu = smp_processor_id();
+       mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_NUM_REG, txq->id);
+       mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_DESC_ADDR_REG, 0);
+       mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_DESC_SIZE_REG, 0);
 }
 
 /* Cleanup Tx ports */
@@ -4759,10 +5149,11 @@ static void mvpp2_txq_clean(struct mvpp2_port *port, struct mvpp2_tx_queue *txq)
        int delay, pending, cpu;
        u32 val;
 
-       mvpp2_write(port->priv, MVPP2_TXQ_NUM_REG, txq->id);
-       val = mvpp2_read(port->priv, MVPP2_TXQ_PREF_BUF_REG);
+       cpu = smp_processor_id();
+       mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_NUM_REG, txq->id);
+       val = mvpp2_percpu_read(port->priv, cpu, MVPP2_TXQ_PREF_BUF_REG);
        val |= MVPP2_TXQ_DRAIN_EN_MASK;
-       mvpp2_write(port->priv, MVPP2_TXQ_PREF_BUF_REG, val);
+       mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_PREF_BUF_REG, val);
 
        /* The napi queue has been stopped so wait for all packets
         * to be transmitted.
@@ -4778,11 +5169,13 @@ static void mvpp2_txq_clean(struct mvpp2_port *port, struct mvpp2_tx_queue *txq)
                mdelay(1);
                delay++;
 
-               pending = mvpp2_txq_pend_desc_num_get(port, txq);
+               pending = mvpp2_percpu_read(port->priv, cpu,
+                                           MVPP2_TXQ_PENDING_REG);
+               pending &= MVPP2_TXQ_PENDING_MASK;
        } while (pending);
 
        val &= ~MVPP2_TXQ_DRAIN_EN_MASK;
-       mvpp2_write(port->priv, MVPP2_TXQ_PREF_BUF_REG, val);
+       mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_PREF_BUF_REG, val);
 
        for_each_present_cpu(cpu) {
                txq_pcpu = per_cpu_ptr(txq->pcpu, cpu);
@@ -4991,20 +5384,21 @@ static enum hrtimer_restart mvpp2_hr_timer_cb(struct hrtimer *timer)
 static void mvpp2_rx_error(struct mvpp2_port *port,
                           struct mvpp2_rx_desc *rx_desc)
 {
-       u32 status = rx_desc->status;
+       u32 status = mvpp2_rxdesc_status_get(port, rx_desc);
+       size_t sz = mvpp2_rxdesc_size_get(port, rx_desc);
 
        switch (status & MVPP2_RXD_ERR_CODE_MASK) {
        case MVPP2_RXD_ERR_CRC:
-               netdev_err(port->dev, "bad rx status %08x (crc error), size=%d\n",
-                          status, rx_desc->data_size);
+               netdev_err(port->dev, "bad rx status %08x (crc error), size=%zu\n",
+                          status, sz);
                break;
        case MVPP2_RXD_ERR_OVERRUN:
-               netdev_err(port->dev, "bad rx status %08x (overrun error), size=%d\n",
-                          status, rx_desc->data_size);
+               netdev_err(port->dev, "bad rx status %08x (overrun error), size=%zu\n",
+                          status, sz);
                break;
        case MVPP2_RXD_ERR_RESOURCE:
-               netdev_err(port->dev, "bad rx status %08x (resource error), size=%d\n",
-                          status, rx_desc->data_size);
+               netdev_err(port->dev, "bad rx status %08x (resource error), size=%zu\n",
+                          status, sz);
                break;
        }
 }
@@ -5031,15 +5425,17 @@ static void mvpp2_rx_csum(struct mvpp2_port *port, u32 status,
 static int mvpp2_rx_refill(struct mvpp2_port *port,
                           struct mvpp2_bm_pool *bm_pool, u32 bm)
 {
-       dma_addr_t phys_addr;
+       dma_addr_t dma_addr;
+       phys_addr_t phys_addr;
        void *buf;
 
        /* No recycle or too many buffers are in use, so allocate a new skb */
-       buf = mvpp2_buf_alloc(port, bm_pool, &phys_addr, GFP_ATOMIC);
+       buf = mvpp2_buf_alloc(port, bm_pool, &dma_addr, &phys_addr,
+                             GFP_ATOMIC);
        if (!buf)
                return -ENOMEM;
 
-       mvpp2_pool_refill(port, bm, phys_addr, (unsigned long)buf);
+       mvpp2_pool_refill(port, bm, dma_addr, phys_addr);
 
        return 0;
 }
@@ -5075,43 +5471,6 @@ static u32 mvpp2_skb_tx_csum(struct mvpp2_port *port, struct sk_buff *skb)
        return MVPP2_TXD_L4_CSUM_NOT | MVPP2_TXD_IP_CSUM_DISABLE;
 }
 
-static void mvpp2_buff_hdr_rx(struct mvpp2_port *port,
-                             struct mvpp2_rx_desc *rx_desc)
-{
-       struct mvpp2_buff_hdr *buff_hdr;
-       struct sk_buff *skb;
-       u32 rx_status = rx_desc->status;
-       dma_addr_t buff_phys_addr;
-       unsigned long buff_virt_addr;
-       dma_addr_t buff_phys_addr_next;
-       unsigned long buff_virt_addr_next;
-       int mc_id;
-       int pool_id;
-
-       pool_id = (rx_status & MVPP2_RXD_BM_POOL_ID_MASK) >>
-                  MVPP2_RXD_BM_POOL_ID_OFFS;
-       buff_phys_addr = rx_desc->buf_phys_addr;
-       buff_virt_addr = rx_desc->buf_cookie;
-
-       do {
-               skb = (struct sk_buff *)buff_virt_addr;
-               buff_hdr = (struct mvpp2_buff_hdr *)skb->head;
-
-               mc_id = MVPP2_B_HDR_INFO_MC_ID(buff_hdr->info);
-
-               buff_phys_addr_next = buff_hdr->next_buff_phys_addr;
-               buff_virt_addr_next = buff_hdr->next_buff_virt_addr;
-
-               /* Release buffer */
-               mvpp2_bm_pool_mc_put(port, pool_id, buff_phys_addr,
-                                    buff_virt_addr, mc_id);
-
-               buff_phys_addr = buff_phys_addr_next;
-               buff_virt_addr = buff_virt_addr_next;
-
-       } while (!MVPP2_B_HDR_INFO_IS_LAST(buff_hdr->info));
-}
-
 /* Main rx processing */
 static int mvpp2_rx(struct mvpp2_port *port, int rx_todo,
                    struct mvpp2_rx_queue *rxq)
@@ -5132,25 +5491,23 @@ static int mvpp2_rx(struct mvpp2_port *port, int rx_todo,
                struct mvpp2_bm_pool *bm_pool;
                struct sk_buff *skb;
                unsigned int frag_size;
-               dma_addr_t phys_addr;
+               dma_addr_t dma_addr;
+               phys_addr_t phys_addr;
                u32 bm, rx_status;
                int pool, rx_bytes, err;
                void *data;
 
                rx_done++;
-               rx_status = rx_desc->status;
-               rx_bytes = rx_desc->data_size - MVPP2_MH_SIZE;
-               phys_addr = rx_desc->buf_phys_addr;
-               data = (void *)(uintptr_t)rx_desc->buf_cookie;
-
-               bm = mvpp2_bm_cookie_build(rx_desc);
+               rx_status = mvpp2_rxdesc_status_get(port, rx_desc);
+               rx_bytes = mvpp2_rxdesc_size_get(port, rx_desc);
+               rx_bytes -= MVPP2_MH_SIZE;
+               dma_addr = mvpp2_rxdesc_dma_addr_get(port, rx_desc);
+               phys_addr = mvpp2_rxdesc_cookie_get(port, rx_desc);
+               data = (void *)phys_to_virt(phys_addr);
+
+               bm = mvpp2_bm_cookie_build(port, rx_desc);
                pool = mvpp2_bm_cookie_pool_get(bm);
                bm_pool = &port->priv->bm_pools[pool];
-               /* Check if buffer header is used */
-               if (rx_status & MVPP2_RXD_BUF_HDR) {
-                       mvpp2_buff_hdr_rx(port, rx_desc);
-                       continue;
-               }
 
                /* In case of an error, release the requested buffer pointer
                 * to the Buffer Manager. This request process is controlled
@@ -5162,9 +5519,7 @@ static int mvpp2_rx(struct mvpp2_port *port, int rx_todo,
                        dev->stats.rx_errors++;
                        mvpp2_rx_error(port, rx_desc);
                        /* Return the buffer to the pool */
-
-                       mvpp2_pool_refill(port, bm, rx_desc->buf_phys_addr,
-                                         rx_desc->buf_cookie);
+                       mvpp2_pool_refill(port, bm, dma_addr, phys_addr);
                        continue;
                }
 
@@ -5185,7 +5540,7 @@ static int mvpp2_rx(struct mvpp2_port *port, int rx_todo,
                        goto err_drop_frame;
                }
 
-               dma_unmap_single(dev->dev.parent, phys_addr,
+               dma_unmap_single(dev->dev.parent, dma_addr,
                                 bm_pool->buf_size, DMA_FROM_DEVICE);
 
                rcvd_pkts++;
@@ -5216,11 +5571,15 @@ static int mvpp2_rx(struct mvpp2_port *port, int rx_todo,
 }
 
 static inline void
-tx_desc_unmap_put(struct device *dev, struct mvpp2_tx_queue *txq,
+tx_desc_unmap_put(struct mvpp2_port *port, struct mvpp2_tx_queue *txq,
                  struct mvpp2_tx_desc *desc)
 {
-       dma_unmap_single(dev, desc->buf_phys_addr,
-                        desc->data_size, DMA_TO_DEVICE);
+       dma_addr_t buf_dma_addr =
+               mvpp2_txdesc_dma_addr_get(port, desc);
+       size_t buf_sz =
+               mvpp2_txdesc_size_get(port, desc);
+       dma_unmap_single(port->dev->dev.parent, buf_dma_addr,
+                        buf_sz, DMA_TO_DEVICE);
        mvpp2_txq_desc_put(txq);
 }
 
@@ -5232,35 +5591,38 @@ static int mvpp2_tx_frag_process(struct mvpp2_port *port, struct sk_buff *skb,
        struct mvpp2_txq_pcpu *txq_pcpu = this_cpu_ptr(txq->pcpu);
        struct mvpp2_tx_desc *tx_desc;
        int i;
-       dma_addr_t buf_phys_addr;
+       dma_addr_t buf_dma_addr;
 
        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
                skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
                void *addr = page_address(frag->page.p) + frag->page_offset;
 
                tx_desc = mvpp2_txq_next_desc_get(aggr_txq);
-               tx_desc->phys_txq = txq->id;
-               tx_desc->data_size = frag->size;
+               mvpp2_txdesc_txq_set(port, tx_desc, txq->id);
+               mvpp2_txdesc_size_set(port, tx_desc, frag->size);
 
-               buf_phys_addr = dma_map_single(port->dev->dev.parent, addr,
-                                              tx_desc->data_size,
+               buf_dma_addr = dma_map_single(port->dev->dev.parent, addr,
+                                              frag->size,
                                               DMA_TO_DEVICE);
-               if (dma_mapping_error(port->dev->dev.parent, buf_phys_addr)) {
+               if (dma_mapping_error(port->dev->dev.parent, buf_dma_addr)) {
                        mvpp2_txq_desc_put(txq);
                        goto error;
                }
 
-               tx_desc->packet_offset = buf_phys_addr & MVPP2_TX_DESC_ALIGN;
-               tx_desc->buf_phys_addr = buf_phys_addr & (~MVPP2_TX_DESC_ALIGN);
+               mvpp2_txdesc_offset_set(port, tx_desc,
+                                       buf_dma_addr & MVPP2_TX_DESC_ALIGN);
+               mvpp2_txdesc_dma_addr_set(port, tx_desc,
+                                         buf_dma_addr & ~MVPP2_TX_DESC_ALIGN);
 
                if (i == (skb_shinfo(skb)->nr_frags - 1)) {
                        /* Last descriptor */
-                       tx_desc->command = MVPP2_TXD_L_DESC;
-                       mvpp2_txq_inc_put(txq_pcpu, skb, tx_desc);
+                       mvpp2_txdesc_cmd_set(port, tx_desc,
+                                            MVPP2_TXD_L_DESC);
+                       mvpp2_txq_inc_put(port, txq_pcpu, skb, tx_desc);
                } else {
                        /* Descriptor in the middle: Not First, Not Last */
-                       tx_desc->command = 0;
-                       mvpp2_txq_inc_put(txq_pcpu, NULL, tx_desc);
+                       mvpp2_txdesc_cmd_set(port, tx_desc, 0);
+                       mvpp2_txq_inc_put(port, txq_pcpu, NULL, tx_desc);
                }
        }
 
@@ -5272,7 +5634,7 @@ error:
         */
        for (i = i - 1; i >= 0; i--) {
                tx_desc = txq->descs + i;
-               tx_desc_unmap_put(port->dev->dev.parent, txq, tx_desc);
+               tx_desc_unmap_put(port, txq, tx_desc);
        }
 
        return -ENOMEM;
@@ -5285,7 +5647,7 @@ static int mvpp2_tx(struct sk_buff *skb, struct net_device *dev)
        struct mvpp2_tx_queue *txq, *aggr_txq;
        struct mvpp2_txq_pcpu *txq_pcpu;
        struct mvpp2_tx_desc *tx_desc;
-       dma_addr_t buf_phys_addr;
+       dma_addr_t buf_dma_addr;
        int frags = 0;
        u16 txq_id;
        u32 tx_cmd;
@@ -5307,35 +5669,38 @@ static int mvpp2_tx(struct sk_buff *skb, struct net_device *dev)
 
        /* Get a descriptor for the first part of the packet */
        tx_desc = mvpp2_txq_next_desc_get(aggr_txq);
-       tx_desc->phys_txq = txq->id;
-       tx_desc->data_size = skb_headlen(skb);
+       mvpp2_txdesc_txq_set(port, tx_desc, txq->id);
+       mvpp2_txdesc_size_set(port, tx_desc, skb_headlen(skb));
 
-       buf_phys_addr = dma_map_single(dev->dev.parent, skb->data,
-                                      tx_desc->data_size, DMA_TO_DEVICE);
-       if (unlikely(dma_mapping_error(dev->dev.parent, buf_phys_addr))) {
+       buf_dma_addr = dma_map_single(dev->dev.parent, skb->data,
+                                     skb_headlen(skb), DMA_TO_DEVICE);
+       if (unlikely(dma_mapping_error(dev->dev.parent, buf_dma_addr))) {
                mvpp2_txq_desc_put(txq);
                frags = 0;
                goto out;
        }
-       tx_desc->packet_offset = buf_phys_addr & MVPP2_TX_DESC_ALIGN;
-       tx_desc->buf_phys_addr = buf_phys_addr & ~MVPP2_TX_DESC_ALIGN;
+
+       mvpp2_txdesc_offset_set(port, tx_desc,
+                               buf_dma_addr & MVPP2_TX_DESC_ALIGN);
+       mvpp2_txdesc_dma_addr_set(port, tx_desc,
+                                 buf_dma_addr & ~MVPP2_TX_DESC_ALIGN);
 
        tx_cmd = mvpp2_skb_tx_csum(port, skb);
 
        if (frags == 1) {
                /* First and Last descriptor */
                tx_cmd |= MVPP2_TXD_F_DESC | MVPP2_TXD_L_DESC;
-               tx_desc->command = tx_cmd;
-               mvpp2_txq_inc_put(txq_pcpu, skb, tx_desc);
+               mvpp2_txdesc_cmd_set(port, tx_desc, tx_cmd);
+               mvpp2_txq_inc_put(port, txq_pcpu, skb, tx_desc);
        } else {
                /* First but not Last */
                tx_cmd |= MVPP2_TXD_F_DESC | MVPP2_TXD_PADDING_DISABLE;
-               tx_desc->command = tx_cmd;
-               mvpp2_txq_inc_put(txq_pcpu, NULL, tx_desc);
+               mvpp2_txdesc_cmd_set(port, tx_desc, tx_cmd);
+               mvpp2_txq_inc_put(port, txq_pcpu, NULL, tx_desc);
 
                /* Continue with other skb fragments */
                if (mvpp2_tx_frag_process(port, skb, aggr_txq, txq)) {
-                       tx_desc_unmap_put(port->dev->dev.parent, txq, tx_desc);
+                       tx_desc_unmap_put(port, txq, tx_desc);
                        frags = 0;
                        goto out;
                }
@@ -5396,6 +5761,7 @@ static int mvpp2_poll(struct napi_struct *napi, int budget)
        u32 cause_rx_tx, cause_rx, cause_misc;
        int rx_done = 0;
        struct mvpp2_port *port = netdev_priv(napi->dev);
+       int cpu = smp_processor_id();
 
        /* Rx/Tx cause register
         *
@@ -5407,8 +5773,8 @@ static int mvpp2_poll(struct napi_struct *napi, int budget)
         *
         * Each CPU has its own Rx/Tx cause register
         */
-       cause_rx_tx = mvpp2_read(port->priv,
-                                MVPP2_ISR_RX_TX_CAUSE_REG(port->id));
+       cause_rx_tx = mvpp2_percpu_read(port->priv, cpu,
+                                       MVPP2_ISR_RX_TX_CAUSE_REG(port->id));
        cause_rx_tx &= ~MVPP2_CAUSE_TXQ_OCCUP_DESC_ALL_MASK;
        cause_misc = cause_rx_tx & MVPP2_CAUSE_MISC_SUM_MASK;
 
@@ -5417,8 +5783,9 @@ static int mvpp2_poll(struct napi_struct *napi, int budget)
 
                /* Clear the cause register */
                mvpp2_write(port->priv, MVPP2_ISR_MISC_CAUSE_REG, 0);
-               mvpp2_write(port->priv, MVPP2_ISR_RX_TX_CAUSE_REG(port->id),
-                           cause_rx_tx & ~MVPP2_CAUSE_MISC_SUM_MASK);
+               mvpp2_percpu_write(port->priv, cpu,
+                                  MVPP2_ISR_RX_TX_CAUSE_REG(port->id),
+                                  cause_rx_tx & ~MVPP2_CAUSE_MISC_SUM_MASK);
        }
 
        cause_rx = cause_rx_tx & MVPP2_CAUSE_RXQ_OCCUP_DESC_ALL_MASK;
@@ -5530,7 +5897,7 @@ static int mvpp2_check_ringparam_valid(struct net_device *dev,
        return 0;
 }
 
-static void mvpp2_get_mac_address(struct mvpp2_port *port, unsigned char *addr)
+static void mvpp21_get_mac_address(struct mvpp2_port *port, unsigned char *addr)
 {
        u32 mac_addr_l, mac_addr_m, mac_addr_h;
 
@@ -5975,16 +6342,6 @@ static const struct ethtool_ops mvpp2_eth_tool_ops = {
        .set_link_ksettings = phy_ethtool_set_link_ksettings,
 };
 
-/* Driver initialization */
-
-static void mvpp2_port_power_up(struct mvpp2_port *port)
-{
-       mvpp2_port_mii_set(port);
-       mvpp2_port_periodic_xon_disable(port);
-       mvpp2_port_fc_adv_enable(port);
-       mvpp2_port_reset(port);
-}
-
 /* Initialize port HW */
 static int mvpp2_port_init(struct mvpp2_port *port)
 {
@@ -5993,7 +6350,8 @@ static int mvpp2_port_init(struct mvpp2_port *port)
        struct mvpp2_txq_pcpu *txq_pcpu;
        int queue, cpu, err;
 
-       if (port->first_rxq + rxq_number > MVPP2_RXQ_TOTAL_NUM)
+       if (port->first_rxq + rxq_number >
+           MVPP2_MAX_PORTS * priv->max_port_rxqs)
                return -EINVAL;
 
        /* Disable port */
@@ -6061,7 +6419,18 @@ static int mvpp2_port_init(struct mvpp2_port *port)
        }
 
        /* Configure Rx queue group interrupt for this port */
-       mvpp2_write(priv, MVPP2_ISR_RXQ_GROUP_REG(port->id), rxq_number);
+       if (priv->hw_version == MVPP21) {
+               mvpp2_write(priv, MVPP21_ISR_RXQ_GROUP_REG(port->id),
+                           rxq_number);
+       } else {
+               u32 val;
+
+               val = (port->id << MVPP22_ISR_RXQ_GROUP_INDEX_GROUP_OFFSET);
+               mvpp2_write(priv, MVPP22_ISR_RXQ_GROUP_INDEX_REG, val);
+
+               val = (rxq_number << MVPP22_ISR_RXQ_SUB_GROUP_SIZE_OFFSET);
+               mvpp2_write(priv, MVPP22_ISR_RXQ_SUB_GROUP_CONFIG_REG, val);
+       }
 
        /* Create Rx descriptor rings */
        for (queue = 0; queue < rxq_number; queue++) {
@@ -6103,8 +6472,7 @@ err_free_percpu:
 /* Ports initialization */
 static int mvpp2_port_probe(struct platform_device *pdev,
                            struct device_node *port_node,
-                           struct mvpp2 *priv,
-                           int *next_first_rxq)
+                           struct mvpp2 *priv)
 {
        struct device_node *phy_node;
        struct mvpp2_port *port;
@@ -6117,7 +6485,6 @@ static int mvpp2_port_probe(struct platform_device *pdev,
        u32 id;
        int features;
        int phy_mode;
-       int priv_common_regs_num = 2;
        int err, i, cpu;
 
        dev = alloc_etherdev_mqs(sizeof(struct mvpp2_port), txq_number,
@@ -6163,16 +6530,30 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 
        port->priv = priv;
        port->id = id;
-       port->first_rxq = *next_first_rxq;
+       if (priv->hw_version == MVPP21)
+               port->first_rxq = port->id * rxq_number;
+       else
+               port->first_rxq = port->id * priv->max_port_rxqs;
+
        port->phy_node = phy_node;
        port->phy_interface = phy_mode;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM,
-                                   priv_common_regs_num + id);
-       port->base = devm_ioremap_resource(&pdev->dev, res);
-       if (IS_ERR(port->base)) {
-               err = PTR_ERR(port->base);
-               goto err_free_irq;
+       if (priv->hw_version == MVPP21) {
+               res = platform_get_resource(pdev, IORESOURCE_MEM, 2 + id);
+               port->base = devm_ioremap_resource(&pdev->dev, res);
+               if (IS_ERR(port->base)) {
+                       err = PTR_ERR(port->base);
+                       goto err_free_irq;
+               }
+       } else {
+               if (of_property_read_u32(port_node, "gop-port-id",
+                                        &port->gop_id)) {
+                       err = -EINVAL;
+                       dev_err(&pdev->dev, "missing gop-port-id value\n");
+                       goto err_free_irq;
+               }
+
+               port->base = priv->iface_base + MVPP22_GMAC_BASE(port->gop_id);
        }
 
        /* Alloc per-cpu stats */
@@ -6187,7 +6568,8 @@ static int mvpp2_port_probe(struct platform_device *pdev,
                mac_from = "device tree";
                ether_addr_copy(dev->dev_addr, dt_mac_addr);
        } else {
-               mvpp2_get_mac_address(port, hw_mac_addr);
+               if (priv->hw_version == MVPP21)
+                       mvpp21_get_mac_address(port, hw_mac_addr);
                if (is_valid_ether_addr(hw_mac_addr)) {
                        mac_from = "hardware";
                        ether_addr_copy(dev->dev_addr, hw_mac_addr);
@@ -6207,7 +6589,14 @@ static int mvpp2_port_probe(struct platform_device *pdev,
                dev_err(&pdev->dev, "failed to init port %d\n", id);
                goto err_free_stats;
        }
-       mvpp2_port_power_up(port);
+
+       mvpp2_port_mii_set(port);
+       mvpp2_port_periodic_xon_disable(port);
+
+       if (priv->hw_version == MVPP21)
+               mvpp2_port_fc_adv_enable(port);
+
+       mvpp2_port_reset(port);
 
        port->pcpu = alloc_percpu(struct mvpp2_port_pcpu);
        if (!port->pcpu) {
@@ -6245,8 +6634,6 @@ static int mvpp2_port_probe(struct platform_device *pdev,
        }
        netdev_info(dev, "Using %s mac address %pM\n", mac_from, dev->dev_addr);
 
-       /* Increment the first Rx queue number to be used by the next port */
-       *next_first_rxq += rxq_number;
        priv->port_list[id] = port;
        return 0;
 
@@ -6330,6 +6717,60 @@ static void mvpp2_rx_fifo_init(struct mvpp2 *priv)
        mvpp2_write(priv, MVPP2_RX_FIFO_INIT_REG, 0x1);
 }
 
+static void mvpp2_axi_init(struct mvpp2 *priv)
+{
+       u32 val, rdval, wrval;
+
+       mvpp2_write(priv, MVPP22_BM_ADDR_HIGH_RLS_REG, 0x0);
+
+       /* AXI Bridge Configuration */
+
+       rdval = MVPP22_AXI_CODE_CACHE_RD_CACHE
+               << MVPP22_AXI_ATTR_CACHE_OFFS;
+       rdval |= MVPP22_AXI_CODE_DOMAIN_OUTER_DOM
+               << MVPP22_AXI_ATTR_DOMAIN_OFFS;
+
+       wrval = MVPP22_AXI_CODE_CACHE_WR_CACHE
+               << MVPP22_AXI_ATTR_CACHE_OFFS;
+       wrval |= MVPP22_AXI_CODE_DOMAIN_OUTER_DOM
+               << MVPP22_AXI_ATTR_DOMAIN_OFFS;
+
+       /* BM */
+       mvpp2_write(priv, MVPP22_AXI_BM_WR_ATTR_REG, wrval);
+       mvpp2_write(priv, MVPP22_AXI_BM_RD_ATTR_REG, rdval);
+
+       /* Descriptors */
+       mvpp2_write(priv, MVPP22_AXI_AGGRQ_DESCR_RD_ATTR_REG, rdval);
+       mvpp2_write(priv, MVPP22_AXI_TXQ_DESCR_WR_ATTR_REG, wrval);
+       mvpp2_write(priv, MVPP22_AXI_TXQ_DESCR_RD_ATTR_REG, rdval);
+       mvpp2_write(priv, MVPP22_AXI_RXQ_DESCR_WR_ATTR_REG, wrval);
+
+       /* Buffer Data */
+       mvpp2_write(priv, MVPP22_AXI_TX_DATA_RD_ATTR_REG, rdval);
+       mvpp2_write(priv, MVPP22_AXI_RX_DATA_WR_ATTR_REG, wrval);
+
+       val = MVPP22_AXI_CODE_CACHE_NON_CACHE
+               << MVPP22_AXI_CODE_CACHE_OFFS;
+       val |= MVPP22_AXI_CODE_DOMAIN_SYSTEM
+               << MVPP22_AXI_CODE_DOMAIN_OFFS;
+       mvpp2_write(priv, MVPP22_AXI_RD_NORMAL_CODE_REG, val);
+       mvpp2_write(priv, MVPP22_AXI_WR_NORMAL_CODE_REG, val);
+
+       val = MVPP22_AXI_CODE_CACHE_RD_CACHE
+               << MVPP22_AXI_CODE_CACHE_OFFS;
+       val |= MVPP22_AXI_CODE_DOMAIN_OUTER_DOM
+               << MVPP22_AXI_CODE_DOMAIN_OFFS;
+
+       mvpp2_write(priv, MVPP22_AXI_RD_SNOOP_CODE_REG, val);
+
+       val = MVPP22_AXI_CODE_CACHE_WR_CACHE
+               << MVPP22_AXI_CODE_CACHE_OFFS;
+       val |= MVPP22_AXI_CODE_DOMAIN_OUTER_DOM
+               << MVPP22_AXI_CODE_DOMAIN_OFFS;
+
+       mvpp2_write(priv, MVPP22_AXI_WR_SNOOP_CODE_REG, val);
+}
+
 /* Initialize network controller common part HW */
 static int mvpp2_init(struct platform_device *pdev, struct mvpp2 *priv)
 {
@@ -6338,7 +6779,7 @@ static int mvpp2_init(struct platform_device *pdev, struct mvpp2 *priv)
        u32 val;
 
        /* Checks for hardware constraints */
-       if (rxq_number % 4 || (rxq_number > MVPP2_MAX_RXQ) ||
+       if (rxq_number % 4 || (rxq_number > priv->max_port_rxqs) ||
            (txq_number > MVPP2_MAX_TXQ)) {
                dev_err(&pdev->dev, "invalid queue size parameter\n");
                return -EINVAL;
@@ -6349,10 +6790,19 @@ static int mvpp2_init(struct platform_device *pdev, struct mvpp2 *priv)
        if (dram_target_info)
                mvpp2_conf_mbus_windows(dram_target_info, priv);
 
+       if (priv->hw_version == MVPP22)
+               mvpp2_axi_init(priv);
+
        /* Disable HW PHY polling */
-       val = readl(priv->lms_base + MVPP2_PHY_AN_CFG0_REG);
-       val |= MVPP2_PHY_AN_STOP_SMI0_MASK;
-       writel(val, priv->lms_base + MVPP2_PHY_AN_CFG0_REG);
+       if (priv->hw_version == MVPP21) {
+               val = readl(priv->lms_base + MVPP2_PHY_AN_CFG0_REG);
+               val |= MVPP2_PHY_AN_STOP_SMI0_MASK;
+               writel(val, priv->lms_base + MVPP2_PHY_AN_CFG0_REG);
+       } else {
+               val = readl(priv->iface_base + MVPP22_SMI_MISC_CFG_REG);
+               val &= ~MVPP22_SMI_POLLING_EN;
+               writel(val, priv->iface_base + MVPP22_SMI_MISC_CFG_REG);
+       }
 
        /* Allocate and initialize aggregated TXQs */
        priv->aggr_txqs = devm_kcalloc(&pdev->dev, num_present_cpus(),
@@ -6374,11 +6824,25 @@ static int mvpp2_init(struct platform_device *pdev, struct mvpp2 *priv)
        mvpp2_rx_fifo_init(priv);
 
        /* Reset Rx queue group interrupt configuration */
-       for (i = 0; i < MVPP2_MAX_PORTS; i++)
-               mvpp2_write(priv, MVPP2_ISR_RXQ_GROUP_REG(i), rxq_number);
+       for (i = 0; i < MVPP2_MAX_PORTS; i++) {
+               if (priv->hw_version == MVPP21) {
+                       mvpp2_write(priv, MVPP21_ISR_RXQ_GROUP_REG(i),
+                                   rxq_number);
+                       continue;
+               } else {
+                       u32 val;
+
+                       val = (i << MVPP22_ISR_RXQ_GROUP_INDEX_GROUP_OFFSET);
+                       mvpp2_write(priv, MVPP22_ISR_RXQ_GROUP_INDEX_REG, val);
 
-       writel(MVPP2_EXT_GLOBAL_CTRL_DEFAULT,
-              priv->lms_base + MVPP2_MNG_EXTENDED_GLOBAL_CTRL_REG);
+                       val = (rxq_number << MVPP22_ISR_RXQ_SUB_GROUP_SIZE_OFFSET);
+                       mvpp2_write(priv, MVPP22_ISR_RXQ_SUB_GROUP_CONFIG_REG, val);
+               }
+       }
+
+       if (priv->hw_version == MVPP21)
+               writel(MVPP2_EXT_GLOBAL_CTRL_DEFAULT,
+                      priv->lms_base + MVPP2_MNG_EXTENDED_GLOBAL_CTRL_REG);
 
        /* Allow cache snoop when transmiting packets */
        mvpp2_write(priv, MVPP2_TX_SNOOP_REG, 0x1);
@@ -6405,22 +6869,46 @@ static int mvpp2_probe(struct platform_device *pdev)
        struct device_node *port_node;
        struct mvpp2 *priv;
        struct resource *res;
-       int port_count, first_rxq;
+       void __iomem *base;
+       int port_count, cpu;
        int err;
 
        priv = devm_kzalloc(&pdev->dev, sizeof(struct mvpp2), GFP_KERNEL);
        if (!priv)
                return -ENOMEM;
 
+       priv->hw_version =
+               (unsigned long)of_device_get_match_data(&pdev->dev);
+
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       priv->base = devm_ioremap_resource(&pdev->dev, res);
-       if (IS_ERR(priv->base))
-               return PTR_ERR(priv->base);
+       base = devm_ioremap_resource(&pdev->dev, res);
+       if (IS_ERR(base))
+               return PTR_ERR(base);
+
+       if (priv->hw_version == MVPP21) {
+               res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+               priv->lms_base = devm_ioremap_resource(&pdev->dev, res);
+               if (IS_ERR(priv->lms_base))
+                       return PTR_ERR(priv->lms_base);
+       } else {
+               res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+               priv->iface_base = devm_ioremap_resource(&pdev->dev, res);
+               if (IS_ERR(priv->iface_base))
+                       return PTR_ERR(priv->iface_base);
+       }
+
+       for_each_present_cpu(cpu) {
+               u32 addr_space_sz;
+
+               addr_space_sz = (priv->hw_version == MVPP21 ?
+                                MVPP21_ADDR_SPACE_SZ : MVPP22_ADDR_SPACE_SZ);
+               priv->cpu_base[cpu] = base + cpu * addr_space_sz;
+       }
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-       priv->lms_base = devm_ioremap_resource(&pdev->dev, res);
-       if (IS_ERR(priv->lms_base))
-               return PTR_ERR(priv->lms_base);
+       if (priv->hw_version == MVPP21)
+               priv->max_port_rxqs = 8;
+       else
+               priv->max_port_rxqs = 32;
 
        priv->pp_clk = devm_clk_get(&pdev->dev, "pp_clk");
        if (IS_ERR(priv->pp_clk))
@@ -6438,21 +6926,47 @@ static int mvpp2_probe(struct platform_device *pdev)
        if (err < 0)
                goto err_pp_clk;
 
+       if (priv->hw_version == MVPP22) {
+               priv->mg_clk = devm_clk_get(&pdev->dev, "mg_clk");
+               if (IS_ERR(priv->mg_clk)) {
+                       err = PTR_ERR(priv->mg_clk);
+                       goto err_gop_clk;
+               }
+
+               err = clk_prepare_enable(priv->mg_clk);
+               if (err < 0)
+                       goto err_gop_clk;
+       }
+
        /* Get system's tclk rate */
        priv->tclk = clk_get_rate(priv->pp_clk);
 
+       if (priv->hw_version == MVPP22) {
+               err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(40));
+               if (err)
+                       goto err_mg_clk;
+               /* Sadly, the BM pools all share the same register to
+                * store the high 32 bits of their address. So they
+                * must all have the same high 32 bits, which forces
+                * us to restrict coherent memory to DMA_BIT_MASK(32).
+                */
+               err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
+               if (err)
+                       goto err_mg_clk;
+       }
+
        /* Initialize network controller */
        err = mvpp2_init(pdev, priv);
        if (err < 0) {
                dev_err(&pdev->dev, "failed to initialize controller\n");
-               goto err_gop_clk;
+               goto err_mg_clk;
        }
 
        port_count = of_get_available_child_count(dn);
        if (port_count == 0) {
                dev_err(&pdev->dev, "no ports enabled\n");
                err = -ENODEV;
-               goto err_gop_clk;
+               goto err_mg_clk;
        }
 
        priv->port_list = devm_kcalloc(&pdev->dev, port_count,
@@ -6460,20 +6974,22 @@ static int mvpp2_probe(struct platform_device *pdev)
                                      GFP_KERNEL);
        if (!priv->port_list) {
                err = -ENOMEM;
-               goto err_gop_clk;
+               goto err_mg_clk;
        }
 
        /* Initialize ports */
-       first_rxq = 0;
        for_each_available_child_of_node(dn, port_node) {
-               err = mvpp2_port_probe(pdev, port_node, priv, &first_rxq);
+               err = mvpp2_port_probe(pdev, port_node, priv);
                if (err < 0)
-                       goto err_gop_clk;
+                       goto err_mg_clk;
        }
 
        platform_set_drvdata(pdev, priv);
        return 0;
 
+err_mg_clk:
+       if (priv->hw_version == MVPP22)
+               clk_disable_unprepare(priv->mg_clk);
 err_gop_clk:
        clk_disable_unprepare(priv->gop_clk);
 err_pp_clk:
@@ -6506,9 +7022,10 @@ static int mvpp2_remove(struct platform_device *pdev)
                dma_free_coherent(&pdev->dev,
                                  MVPP2_AGGR_TXQ_SIZE * MVPP2_DESC_ALIGNED_SIZE,
                                  aggr_txq->descs,
-                                 aggr_txq->descs_phys);
+                                 aggr_txq->descs_dma);
        }
 
+       clk_disable_unprepare(priv->mg_clk);
        clk_disable_unprepare(priv->pp_clk);
        clk_disable_unprepare(priv->gop_clk);
 
@@ -6516,7 +7033,14 @@ static int mvpp2_remove(struct platform_device *pdev)
 }
 
 static const struct of_device_id mvpp2_match[] = {
-       { .compatible = "marvell,armada-375-pp2" },
+       {
+               .compatible = "marvell,armada-375-pp2",
+               .data = (void *)MVPP21,
+       },
+       {
+               .compatible = "marvell,armada-7k-pp22",
+               .data = (void *)MVPP22,
+       },
        { }
 };
 MODULE_DEVICE_TABLE(of, mvpp2_match);
index 9e757684816d48b903f62cdac2d6a1123e6c3305..bf6317eca2f6bde8c739a50ae9722353549749df 100644 (file)
@@ -1908,10 +1908,9 @@ static int __init mtk_init(struct net_device *dev)
 
        /* If the mac address is invalid, use random mac address  */
        if (!is_valid_ether_addr(dev->dev_addr)) {
-               random_ether_addr(dev->dev_addr);
+               eth_hw_addr_random(dev);
                dev_err(eth->dev, "generated random MAC address %pM\n",
                        dev->dev_addr);
-               dev->addr_assign_type = NET_ADDR_RANDOM;
        }
 
        return mtk_phy_connect(dev);
index e8c105164931f31ff0cf5ed12acef455d0010eda..0e0fa70305659521ed50d1cf1bc40fd38aa3ad04 100644 (file)
@@ -2305,6 +2305,17 @@ static int sync_toggles(struct mlx4_dev *dev)
                rd_toggle = swab32(readl(&priv->mfunc.comm->slave_read));
                if (wr_toggle == 0xffffffff || rd_toggle == 0xffffffff) {
                        /* PCI might be offline */
+
+                       /* If device removal has been requested,
+                        * do not continue retrying.
+                        */
+                       if (dev->persist->interface_state &
+                           MLX4_INTERFACE_STATE_NOWAIT) {
+                               mlx4_warn(dev,
+                                         "communication channel is offline\n");
+                               return -EIO;
+                       }
+
                        msleep(100);
                        wr_toggle = swab32(readl(&priv->mfunc.comm->
                                           slave_write));
index c4d714fcc7dae759998a49a1f90f9ab1ee9bdda3..ffbcb27c05e55f43630a812249bab21609886dd9 100644 (file)
@@ -117,7 +117,7 @@ static const char main_strings[][ETH_GSTRING_LEN] = {
        /* port statistics */
        "tso_packets",
        "xmit_more",
-       "queue_stopped", "wake_queue", "tx_timeout", "rx_alloc_failed",
+       "queue_stopped", "wake_queue", "tx_timeout", "rx_alloc_pages",
        "rx_csum_good", "rx_csum_none", "rx_csum_complete", "tx_chksum_offload",
 
        /* pf statistics */
index 61420473fe5fb57032fa50de9a0d2abfa71831d6..94fab20ef146bd5874a21ec2ecbe3dea16180aec 100644 (file)
@@ -92,7 +92,9 @@ static int __mlx4_en_setup_tc(struct net_device *dev, u32 handle, __be16 proto,
        if (tc->type != TC_SETUP_MQPRIO)
                return -EINVAL;
 
-       return mlx4_en_setup_tc(dev, tc->tc);
+       tc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+
+       return mlx4_en_setup_tc(dev, tc->mqprio->num_tc);
 }
 
 #ifdef CONFIG_RFS_ACCEL
index 9166d90e732858610b1407fe85cbf6cbe27f5e0b..e0eb695318e64ebcaf58d6edb5f9a57be6f9ddf6 100644 (file)
@@ -213,6 +213,7 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
        priv->port_stats.rx_chksum_good = 0;
        priv->port_stats.rx_chksum_none = 0;
        priv->port_stats.rx_chksum_complete = 0;
+       priv->port_stats.rx_alloc_pages = 0;
        priv->xdp_stats.rx_xdp_drop    = 0;
        priv->xdp_stats.rx_xdp_tx      = 0;
        priv->xdp_stats.rx_xdp_tx_full = 0;
@@ -223,6 +224,7 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
                priv->port_stats.rx_chksum_good += READ_ONCE(ring->csum_ok);
                priv->port_stats.rx_chksum_none += READ_ONCE(ring->csum_none);
                priv->port_stats.rx_chksum_complete += READ_ONCE(ring->csum_complete);
+               priv->port_stats.rx_alloc_pages += READ_ONCE(ring->rx_alloc_pages);
                priv->xdp_stats.rx_xdp_drop     += READ_ONCE(ring->xdp_drop);
                priv->xdp_stats.rx_xdp_tx       += READ_ONCE(ring->xdp_tx);
                priv->xdp_stats.rx_xdp_tx_full  += READ_ONCE(ring->xdp_tx_full);
index 867292880c07a15124a0cf099d1fcda09926548e..aa074e57ce06fb2842fa1faabd156c3cd2fe10f5 100644 (file)
 
 #include "mlx4_en.h"
 
-static int mlx4_alloc_pages(struct mlx4_en_priv *priv,
-                           struct mlx4_en_rx_alloc *page_alloc,
-                           const struct mlx4_en_frag_info *frag_info,
-                           gfp_t _gfp)
+static int mlx4_alloc_page(struct mlx4_en_priv *priv,
+                          struct mlx4_en_rx_alloc *frag,
+                          gfp_t gfp)
 {
-       int order;
        struct page *page;
        dma_addr_t dma;
 
-       for (order = frag_info->order; ;) {
-               gfp_t gfp = _gfp;
-
-               if (order)
-                       gfp |= __GFP_COMP | __GFP_NOWARN | __GFP_NOMEMALLOC;
-               page = alloc_pages(gfp, order);
-               if (likely(page))
-                       break;
-               if (--order < 0 ||
-                   ((PAGE_SIZE << order) < frag_info->frag_size))
-                       return -ENOMEM;
-       }
-       dma = dma_map_page(priv->ddev, page, 0, PAGE_SIZE << order,
-                          frag_info->dma_dir);
+       page = alloc_page(gfp);
+       if (unlikely(!page))
+               return -ENOMEM;
+       dma = dma_map_page(priv->ddev, page, 0, PAGE_SIZE, priv->dma_dir);
        if (unlikely(dma_mapping_error(priv->ddev, dma))) {
-               put_page(page);
+               __free_page(page);
                return -ENOMEM;
        }
-       page_alloc->page_size = PAGE_SIZE << order;
-       page_alloc->page = page;
-       page_alloc->dma = dma;
-       page_alloc->page_offset = 0;
-       /* Not doing get_page() for each frag is a big win
-        * on asymetric workloads. Note we can not use atomic_set().
-        */
-       page_ref_add(page, page_alloc->page_size / frag_info->frag_stride - 1);
+       frag->page = page;
+       frag->dma = dma;
+       frag->page_offset = priv->rx_headroom;
        return 0;
 }
 
 static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv,
+                              struct mlx4_en_rx_ring *ring,
                               struct mlx4_en_rx_desc *rx_desc,
                               struct mlx4_en_rx_alloc *frags,
-                              struct mlx4_en_rx_alloc *ring_alloc,
                               gfp_t gfp)
 {
-       struct mlx4_en_rx_alloc page_alloc[MLX4_EN_MAX_RX_FRAGS];
-       const struct mlx4_en_frag_info *frag_info;
-       struct page *page;
        int i;
 
-       for (i = 0; i < priv->num_frags; i++) {
-               frag_info = &priv->frag_info[i];
-               page_alloc[i] = ring_alloc[i];
-               page_alloc[i].page_offset += frag_info->frag_stride;
-
-               if (page_alloc[i].page_offset + frag_info->frag_stride <=
-                   ring_alloc[i].page_size)
-                       continue;
-
-               if (unlikely(mlx4_alloc_pages(priv, &page_alloc[i],
-                                             frag_info, gfp)))
-                       goto out;
-       }
-
-       for (i = 0; i < priv->num_frags; i++) {
-               frags[i] = ring_alloc[i];
-               frags[i].page_offset += priv->frag_info[i].rx_headroom;
-               rx_desc->data[i].addr = cpu_to_be64(frags[i].dma +
-                                                   frags[i].page_offset);
-               ring_alloc[i] = page_alloc[i];
-       }
-
-       return 0;
-
-out:
-       while (i--) {
-               if (page_alloc[i].page != ring_alloc[i].page) {
-                       dma_unmap_page(priv->ddev, page_alloc[i].dma,
-                               page_alloc[i].page_size,
-                               priv->frag_info[i].dma_dir);
-                       page = page_alloc[i].page;
-                       /* Revert changes done by mlx4_alloc_pages */
-                       page_ref_sub(page, page_alloc[i].page_size /
-                                          priv->frag_info[i].frag_stride - 1);
-                       put_page(page);
+       for (i = 0; i < priv->num_frags; i++, frags++) {
+               if (!frags->page) {
+                       if (mlx4_alloc_page(priv, frags, gfp))
+                               return -ENOMEM;
+                       ring->rx_alloc_pages++;
                }
-       }
-       return -ENOMEM;
-}
-
-static void mlx4_en_free_frag(struct mlx4_en_priv *priv,
-                             struct mlx4_en_rx_alloc *frags,
-                             int i)
-{
-       const struct mlx4_en_frag_info *frag_info = &priv->frag_info[i];
-       u32 next_frag_end = frags[i].page_offset + 2 * frag_info->frag_stride;
-
-
-       if (next_frag_end > frags[i].page_size)
-               dma_unmap_page(priv->ddev, frags[i].dma, frags[i].page_size,
-                              frag_info->dma_dir);
-
-       if (frags[i].page)
-               put_page(frags[i].page);
-}
-
-static int mlx4_en_init_allocator(struct mlx4_en_priv *priv,
-                                 struct mlx4_en_rx_ring *ring)
-{
-       int i;
-       struct mlx4_en_rx_alloc *page_alloc;
-
-       for (i = 0; i < priv->num_frags; i++) {
-               const struct mlx4_en_frag_info *frag_info = &priv->frag_info[i];
-
-               if (mlx4_alloc_pages(priv, &ring->page_alloc[i],
-                                    frag_info, GFP_KERNEL | __GFP_COLD))
-                       goto out;
-
-               en_dbg(DRV, priv, "  frag %d allocator: - size:%d frags:%d\n",
-                      i, ring->page_alloc[i].page_size,
-                      page_ref_count(ring->page_alloc[i].page));
+               rx_desc->data[i].addr = cpu_to_be64(frags->dma +
+                                                   frags->page_offset);
        }
        return 0;
-
-out:
-       while (i--) {
-               struct page *page;
-
-               page_alloc = &ring->page_alloc[i];
-               dma_unmap_page(priv->ddev, page_alloc->dma,
-                              page_alloc->page_size,
-                              priv->frag_info[i].dma_dir);
-               page = page_alloc->page;
-               /* Revert changes done by mlx4_alloc_pages */
-               page_ref_sub(page, page_alloc->page_size /
-                                  priv->frag_info[i].frag_stride - 1);
-               put_page(page);
-               page_alloc->page = NULL;
-       }
-       return -ENOMEM;
 }
 
-static void mlx4_en_destroy_allocator(struct mlx4_en_priv *priv,
-                                     struct mlx4_en_rx_ring *ring)
+static void mlx4_en_free_frag(const struct mlx4_en_priv *priv,
+                             struct mlx4_en_rx_alloc *frag)
 {
-       struct mlx4_en_rx_alloc *page_alloc;
-       int i;
-
-       for (i = 0; i < priv->num_frags; i++) {
-               const struct mlx4_en_frag_info *frag_info = &priv->frag_info[i];
-
-               page_alloc = &ring->page_alloc[i];
-               en_dbg(DRV, priv, "Freeing allocator:%d count:%d\n",
-                      i, page_count(page_alloc->page));
-
-               dma_unmap_page(priv->ddev, page_alloc->dma,
-                               page_alloc->page_size, frag_info->dma_dir);
-               while (page_alloc->page_offset + frag_info->frag_stride <
-                      page_alloc->page_size) {
-                       put_page(page_alloc->page);
-                       page_alloc->page_offset += frag_info->frag_stride;
-               }
-               page_alloc->page = NULL;
+       if (frag->page) {
+               dma_unmap_page(priv->ddev, frag->dma,
+                              PAGE_SIZE, priv->dma_dir);
+               __free_page(frag->page);
        }
+       /* We need to clear all fields, otherwise a change of priv->log_rx_info
+        * could lead to see garbage later in frag->page.
+        */
+       memset(frag, 0, sizeof(*frag));
 }
 
-static void mlx4_en_init_rx_desc(struct mlx4_en_priv *priv,
+static void mlx4_en_init_rx_desc(const struct mlx4_en_priv *priv,
                                 struct mlx4_en_rx_ring *ring, int index)
 {
        struct mlx4_en_rx_desc *rx_desc = ring->buf + ring->stride * index;
@@ -248,18 +137,23 @@ static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv,
        struct mlx4_en_rx_desc *rx_desc = ring->buf + (index * ring->stride);
        struct mlx4_en_rx_alloc *frags = ring->rx_info +
                                        (index << priv->log_rx_info);
-
        if (ring->page_cache.index > 0) {
-               frags[0] = ring->page_cache.buf[--ring->page_cache.index];
-               rx_desc->data[0].addr = cpu_to_be64(frags[0].dma +
-                                                   frags[0].page_offset);
+               /* XDP uses a single page per frame */
+               if (!frags->page) {
+                       ring->page_cache.index--;
+                       frags->page = ring->page_cache.buf[ring->page_cache.index].page;
+                       frags->dma  = ring->page_cache.buf[ring->page_cache.index].dma;
+               }
+               frags->page_offset = XDP_PACKET_HEADROOM;
+               rx_desc->data[0].addr = cpu_to_be64(frags->dma +
+                                                   XDP_PACKET_HEADROOM);
                return 0;
        }
 
-       return mlx4_en_alloc_frags(priv, rx_desc, frags, ring->page_alloc, gfp);
+       return mlx4_en_alloc_frags(priv, ring, rx_desc, frags, gfp);
 }
 
-static inline bool mlx4_en_is_ring_empty(struct mlx4_en_rx_ring *ring)
+static bool mlx4_en_is_ring_empty(const struct mlx4_en_rx_ring *ring)
 {
        return ring->prod == ring->cons;
 }
@@ -269,7 +163,8 @@ static inline void mlx4_en_update_rx_prod_db(struct mlx4_en_rx_ring *ring)
        *ring->wqres.db.db = cpu_to_be32(ring->prod & 0xffff);
 }
 
-static void mlx4_en_free_rx_desc(struct mlx4_en_priv *priv,
+/* slow path */
+static void mlx4_en_free_rx_desc(const struct mlx4_en_priv *priv,
                                 struct mlx4_en_rx_ring *ring,
                                 int index)
 {
@@ -279,7 +174,7 @@ static void mlx4_en_free_rx_desc(struct mlx4_en_priv *priv,
        frags = ring->rx_info + (index << priv->log_rx_info);
        for (nr = 0; nr < priv->num_frags; nr++) {
                en_dbg(DRV, priv, "Freeing fragment:%d\n", nr);
-               mlx4_en_free_frag(priv, frags, nr);
+               mlx4_en_free_frag(priv, frags + nr);
        }
 }
 
@@ -335,12 +230,12 @@ static void mlx4_en_free_rx_buf(struct mlx4_en_priv *priv,
               ring->cons, ring->prod);
 
        /* Unmap and free Rx buffers */
-       while (!mlx4_en_is_ring_empty(ring)) {
-               index = ring->cons & ring->size_mask;
+       for (index = 0; index < ring->size; index++) {
                en_dbg(DRV, priv, "Processing descriptor:%d\n", index);
                mlx4_en_free_rx_desc(priv, ring, index);
-               ++ring->cons;
        }
+       ring->cons = 0;
+       ring->prod = 0;
 }
 
 void mlx4_en_set_num_rx_rings(struct mlx4_en_dev *mdev)
@@ -392,9 +287,9 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
 
        tmp = size * roundup_pow_of_two(MLX4_EN_MAX_RX_FRAGS *
                                        sizeof(struct mlx4_en_rx_alloc));
-       ring->rx_info = vmalloc_node(tmp, node);
+       ring->rx_info = vzalloc_node(tmp, node);
        if (!ring->rx_info) {
-               ring->rx_info = vmalloc(tmp);
+               ring->rx_info = vzalloc(tmp);
                if (!ring->rx_info) {
                        err = -ENOMEM;
                        goto err_ring;
@@ -464,16 +359,6 @@ int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv)
                /* Initialize all descriptors */
                for (i = 0; i < ring->size; i++)
                        mlx4_en_init_rx_desc(priv, ring, i);
-
-               /* Initialize page allocators */
-               err = mlx4_en_init_allocator(priv, ring);
-               if (err) {
-                       en_err(priv, "Failed initializing ring allocator\n");
-                       if (ring->stride <= TXBB_SIZE)
-                               ring->buf -= TXBB_SIZE;
-                       ring_ind--;
-                       goto err_allocator;
-               }
        }
        err = mlx4_en_fill_rx_buffers(priv);
        if (err)
@@ -493,11 +378,9 @@ err_buffers:
                mlx4_en_free_rx_buf(priv, priv->rx_ring[ring_ind]);
 
        ring_ind = priv->rx_ring_num - 1;
-err_allocator:
        while (ring_ind >= 0) {
                if (priv->rx_ring[ring_ind]->stride <= TXBB_SIZE)
                        priv->rx_ring[ring_ind]->buf -= TXBB_SIZE;
-               mlx4_en_destroy_allocator(priv, priv->rx_ring[ring_ind]);
                ring_ind--;
        }
        return err;
@@ -537,7 +420,9 @@ bool mlx4_en_rx_recycle(struct mlx4_en_rx_ring *ring,
        if (cache->index >= MLX4_EN_CACHE_SIZE)
                return false;
 
-       cache->buf[cache->index++] = *frame;
+       cache->buf[cache->index].page = frame->page;
+       cache->buf[cache->index].dma = frame->dma;
+       cache->index++;
        return true;
 }
 
@@ -567,136 +452,91 @@ void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv,
        int i;
 
        for (i = 0; i < ring->page_cache.index; i++) {
-               struct mlx4_en_rx_alloc *frame = &ring->page_cache.buf[i];
-
-               dma_unmap_page(priv->ddev, frame->dma, frame->page_size,
-                              priv->frag_info[0].dma_dir);
-               put_page(frame->page);
+               dma_unmap_page(priv->ddev, ring->page_cache.buf[i].dma,
+                              PAGE_SIZE, priv->dma_dir);
+               put_page(ring->page_cache.buf[i].page);
        }
        ring->page_cache.index = 0;
        mlx4_en_free_rx_buf(priv, ring);
        if (ring->stride <= TXBB_SIZE)
                ring->buf -= TXBB_SIZE;
-       mlx4_en_destroy_allocator(priv, ring);
 }
 
 
 static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv,
-                                   struct mlx4_en_rx_desc *rx_desc,
                                    struct mlx4_en_rx_alloc *frags,
                                    struct sk_buff *skb,
                                    int length)
 {
-       struct skb_frag_struct *skb_frags_rx = skb_shinfo(skb)->frags;
-       struct mlx4_en_frag_info *frag_info;
-       int nr;
+       const struct mlx4_en_frag_info *frag_info = priv->frag_info;
+       unsigned int truesize = 0;
+       int nr, frag_size;
+       struct page *page;
        dma_addr_t dma;
+       bool release;
 
        /* Collect used fragments while replacing them in the HW descriptors */
-       for (nr = 0; nr < priv->num_frags; nr++) {
-               frag_info = &priv->frag_info[nr];
-               if (length <= frag_info->frag_prefix_size)
-                       break;
-               if (unlikely(!frags[nr].page))
+       for (nr = 0;; frags++) {
+               frag_size = min_t(int, length, frag_info->frag_size);
+
+               page = frags->page;
+               if (unlikely(!page))
                        goto fail;
 
-               dma = be64_to_cpu(rx_desc->data[nr].addr);
-               dma_sync_single_for_cpu(priv->ddev, dma, frag_info->frag_size,
-                                       DMA_FROM_DEVICE);
+               dma = frags->dma;
+               dma_sync_single_range_for_cpu(priv->ddev, dma, frags->page_offset,
+                                             frag_size, priv->dma_dir);
+
+               __skb_fill_page_desc(skb, nr, page, frags->page_offset,
+                                    frag_size);
 
-               __skb_fill_page_desc(skb, nr, frags[nr].page,
-                                    frags[nr].page_offset,
-                                    frag_info->frag_size);
+               truesize += frag_info->frag_stride;
+               if (frag_info->frag_stride == PAGE_SIZE / 2) {
+                       frags->page_offset ^= PAGE_SIZE / 2;
+                       release = page_count(page) != 1 ||
+                                 page_is_pfmemalloc(page) ||
+                                 page_to_nid(page) != numa_mem_id();
+               } else {
+                       u32 sz_align = ALIGN(frag_size, SMP_CACHE_BYTES);
 
-               skb->truesize += frag_info->frag_stride;
-               frags[nr].page = NULL;
+                       frags->page_offset += sz_align;
+                       release = frags->page_offset + frag_info->frag_size > PAGE_SIZE;
+               }
+               if (release) {
+                       dma_unmap_page(priv->ddev, dma, PAGE_SIZE, priv->dma_dir);
+                       frags->page = NULL;
+               } else {
+                       page_ref_inc(page);
+               }
+
+               nr++;
+               length -= frag_size;
+               if (!length)
+                       break;
+               frag_info++;
        }
-       /* Adjust size of last fragment to match actual length */
-       if (nr > 0)
-               skb_frag_size_set(&skb_frags_rx[nr - 1],
-                       length - priv->frag_info[nr - 1].frag_prefix_size);
+       skb->truesize += truesize;
        return nr;
 
 fail:
        while (nr > 0) {
                nr--;
-               __skb_frag_unref(&skb_frags_rx[nr]);
+               __skb_frag_unref(skb_shinfo(skb)->frags + nr);
        }
        return 0;
 }
 
-
-static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv,
-                                     struct mlx4_en_rx_desc *rx_desc,
-                                     struct mlx4_en_rx_alloc *frags,
-                                     unsigned int length)
-{
-       struct sk_buff *skb;
-       void *va;
-       int used_frags;
-       dma_addr_t dma;
-
-       skb = netdev_alloc_skb(priv->dev, SMALL_PACKET_SIZE + NET_IP_ALIGN);
-       if (unlikely(!skb)) {
-               en_dbg(RX_ERR, priv, "Failed allocating skb\n");
-               return NULL;
-       }
-       skb_reserve(skb, NET_IP_ALIGN);
-       skb->len = length;
-
-       /* Get pointer to first fragment so we could copy the headers into the
-        * (linear part of the) skb */
-       va = page_address(frags[0].page) + frags[0].page_offset;
-
-       if (length <= SMALL_PACKET_SIZE) {
-               /* We are copying all relevant data to the skb - temporarily
-                * sync buffers for the copy */
-               dma = be64_to_cpu(rx_desc->data[0].addr);
-               dma_sync_single_for_cpu(priv->ddev, dma, length,
-                                       DMA_FROM_DEVICE);
-               skb_copy_to_linear_data(skb, va, length);
-               skb->tail += length;
-       } else {
-               unsigned int pull_len;
-
-               /* Move relevant fragments to skb */
-               used_frags = mlx4_en_complete_rx_desc(priv, rx_desc, frags,
-                                                       skb, length);
-               if (unlikely(!used_frags)) {
-                       kfree_skb(skb);
-                       return NULL;
-               }
-               skb_shinfo(skb)->nr_frags = used_frags;
-
-               pull_len = eth_get_headlen(va, SMALL_PACKET_SIZE);
-               /* Copy headers into the skb linear buffer */
-               memcpy(skb->data, va, pull_len);
-               skb->tail += pull_len;
-
-               /* Skip headers in first fragment */
-               skb_shinfo(skb)->frags[0].page_offset += pull_len;
-
-               /* Adjust size of first fragment */
-               skb_frag_size_sub(&skb_shinfo(skb)->frags[0], pull_len);
-               skb->data_len = length - pull_len;
-       }
-       return skb;
-}
-
-static void validate_loopback(struct mlx4_en_priv *priv, struct sk_buff *skb)
+static void validate_loopback(struct mlx4_en_priv *priv, void *va)
 {
+       const unsigned char *data = va + ETH_HLEN;
        int i;
-       int offset = ETH_HLEN;
 
-       for (i = 0; i < MLX4_LOOPBACK_TEST_PAYLOAD; i++, offset++) {
-               if (*(skb->data + offset) != (unsigned char) (i & 0xff))
-                       goto out_loopback;
+       for (i = 0; i < MLX4_LOOPBACK_TEST_PAYLOAD; i++) {
+               if (data[i] != (unsigned char)i)
+                       return;
        }
        /* Loopback found */
        priv->loopback_ok = 1;
-
-out_loopback:
-       dev_kfree_skb_any(skb);
 }
 
 static bool mlx4_en_refill_rx_buffers(struct mlx4_en_priv *priv,
@@ -801,7 +641,6 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
        struct mlx4_cqe *cqe;
        struct mlx4_en_rx_ring *ring = priv->rx_ring[cq->ring];
        struct mlx4_en_rx_alloc *frags;
-       struct mlx4_en_rx_desc *rx_desc;
        struct bpf_prog *xdp_prog;
        int doorbell_pending;
        struct sk_buff *skb;
@@ -834,10 +673,10 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
        /* Process all completed CQEs */
        while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK,
                    cq->mcq.cons_index & cq->size)) {
+               void *va;
 
                frags = ring->rx_info + (index << priv->log_rx_info);
-               rx_desc = ring->buf + (index << ring->log_stride);
-
+               va = page_address(frags[0].page) + frags[0].page_offset;
                /*
                 * make sure we read the CQE after we read the ownership bit
                 */
@@ -860,16 +699,14 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
                 * and not performing the selftest or flb disabled
                 */
                if (priv->flags & MLX4_EN_FLAG_RX_FILTER_NEEDED) {
-                       struct ethhdr *ethh;
+                       const struct ethhdr *ethh = va;
                        dma_addr_t dma;
                        /* Get pointer to first fragment since we haven't
                         * skb yet and cast it to ethhdr struct
                         */
-                       dma = be64_to_cpu(rx_desc->data[0].addr);
+                       dma = frags[0].dma + frags[0].page_offset;
                        dma_sync_single_for_cpu(priv->ddev, dma, sizeof(*ethh),
                                                DMA_FROM_DEVICE);
-                       ethh = (struct ethhdr *)(page_address(frags[0].page) +
-                                                frags[0].page_offset);
 
                        if (is_multicast_ether_addr(ethh->h_dest)) {
                                struct mlx4_mac_entry *entry;
@@ -887,13 +724,16 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
                        }
                }
 
+               if (unlikely(priv->validate_loopback)) {
+                       validate_loopback(priv, va);
+                       goto next;
+               }
+
                /*
                 * Packet is OK - process it.
                 */
                length = be32_to_cpu(cqe->byte_cnt);
                length -= ring->fcs_del;
-               l2_tunnel = (dev->hw_enc_features & NETIF_F_RXCSUM) &&
-                       (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_L2_TUNNEL));
 
                /* A bpf program gets first chance to drop the packet. It may
                 * read bytes but not past the end of the frag.
@@ -904,13 +744,13 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
                        void *orig_data;
                        u32 act;
 
-                       dma = be64_to_cpu(rx_desc->data[0].addr);
+                       dma = frags[0].dma + frags[0].page_offset;
                        dma_sync_single_for_cpu(priv->ddev, dma,
                                                priv->frag_info[0].frag_size,
                                                DMA_FROM_DEVICE);
 
-                       xdp.data_hard_start = page_address(frags[0].page);
-                       xdp.data = xdp.data_hard_start + frags[0].page_offset;
+                       xdp.data_hard_start = va - frags[0].page_offset;
+                       xdp.data = va;
                        xdp.data_end = xdp.data + length;
                        orig_data = xdp.data;
 
@@ -920,6 +760,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
                                length = xdp.data_end - xdp.data;
                                frags[0].page_offset = xdp.data -
                                        xdp.data_hard_start;
+                               va = xdp.data;
                        }
 
                        switch (act) {
@@ -928,8 +769,10 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
                        case XDP_TX:
                                if (likely(!mlx4_en_xmit_frame(ring, frags, dev,
                                                        length, cq->ring,
-                                                       &doorbell_pending)))
-                                       goto consumed;
+                                                       &doorbell_pending))) {
+                                       frags[0].page = NULL;
+                                       goto next;
+                               }
                                trace_xdp_exception(dev, xdp_prog, act);
                                goto xdp_drop_no_cnt; /* Drop on xmit failure */
                        default:
@@ -939,8 +782,6 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
                        case XDP_DROP:
                                ring->xdp_drop++;
 xdp_drop_no_cnt:
-                               if (likely(mlx4_en_rx_recycle(ring, frags)))
-                                       goto consumed;
                                goto next;
                        }
                }
@@ -948,129 +789,51 @@ xdp_drop_no_cnt:
                ring->bytes += length;
                ring->packets++;
 
+               skb = napi_get_frags(&cq->napi);
+               if (!skb)
+                       goto next;
+
+               if (unlikely(ring->hwtstamp_rx_filter == HWTSTAMP_FILTER_ALL)) {
+                       timestamp = mlx4_en_get_cqe_ts(cqe);
+                       mlx4_en_fill_hwtstamps(mdev, skb_hwtstamps(skb),
+                                              timestamp);
+               }
+               skb_record_rx_queue(skb, cq->ring);
+
                if (likely(dev->features & NETIF_F_RXCSUM)) {
                        if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_TCP |
                                                      MLX4_CQE_STATUS_UDP)) {
                                if ((cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) &&
                                    cqe->checksum == cpu_to_be16(0xffff)) {
                                        ip_summed = CHECKSUM_UNNECESSARY;
+                                       l2_tunnel = (dev->hw_enc_features & NETIF_F_RXCSUM) &&
+                                               (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_L2_TUNNEL));
+                                       if (l2_tunnel)
+                                               skb->csum_level = 1;
                                        ring->csum_ok++;
                                } else {
-                                       ip_summed = CHECKSUM_NONE;
-                                       ring->csum_none++;
+                                       goto csum_none;
                                }
                        } else {
                                if (priv->flags & MLX4_EN_FLAG_RX_CSUM_NON_TCP_UDP &&
                                    (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV4 |
                                                               MLX4_CQE_STATUS_IPV6))) {
-                                       ip_summed = CHECKSUM_COMPLETE;
-                                       ring->csum_complete++;
+                                       if (check_csum(cqe, skb, va, dev->features)) {
+                                               goto csum_none;
+                                       } else {
+                                               ip_summed = CHECKSUM_COMPLETE;
+                                               ring->csum_complete++;
+                                       }
                                } else {
-                                       ip_summed = CHECKSUM_NONE;
-                                       ring->csum_none++;
+                                       goto csum_none;
                                }
                        }
                } else {
+csum_none:
                        ip_summed = CHECKSUM_NONE;
                        ring->csum_none++;
                }
-
-               /* This packet is eligible for GRO if it is:
-                * - DIX Ethernet (type interpretation)
-                * - TCP/IP (v4)
-                * - without IP options
-                * - not an IP fragment
-                */
-               if (dev->features & NETIF_F_GRO) {
-                       struct sk_buff *gro_skb = napi_get_frags(&cq->napi);
-                       if (!gro_skb)
-                               goto next;
-
-                       nr = mlx4_en_complete_rx_desc(priv,
-                               rx_desc, frags, gro_skb,
-                               length);
-                       if (!nr)
-                               goto next;
-
-                       if (ip_summed == CHECKSUM_COMPLETE) {
-                               void *va = skb_frag_address(skb_shinfo(gro_skb)->frags);
-                               if (check_csum(cqe, gro_skb, va,
-                                              dev->features)) {
-                                       ip_summed = CHECKSUM_NONE;
-                                       ring->csum_none++;
-                                       ring->csum_complete--;
-                               }
-                       }
-
-                       skb_shinfo(gro_skb)->nr_frags = nr;
-                       gro_skb->len = length;
-                       gro_skb->data_len = length;
-                       gro_skb->ip_summed = ip_summed;
-
-                       if (l2_tunnel && ip_summed == CHECKSUM_UNNECESSARY)
-                               gro_skb->csum_level = 1;
-
-                       if ((cqe->vlan_my_qpn &
-                           cpu_to_be32(MLX4_CQE_CVLAN_PRESENT_MASK)) &&
-                           (dev->features & NETIF_F_HW_VLAN_CTAG_RX)) {
-                               u16 vid = be16_to_cpu(cqe->sl_vid);
-
-                               __vlan_hwaccel_put_tag(gro_skb, htons(ETH_P_8021Q), vid);
-                       } else if ((be32_to_cpu(cqe->vlan_my_qpn) &
-                                 MLX4_CQE_SVLAN_PRESENT_MASK) &&
-                                (dev->features & NETIF_F_HW_VLAN_STAG_RX)) {
-                               __vlan_hwaccel_put_tag(gro_skb,
-                                                      htons(ETH_P_8021AD),
-                                                      be16_to_cpu(cqe->sl_vid));
-                       }
-
-                       if (dev->features & NETIF_F_RXHASH)
-                               skb_set_hash(gro_skb,
-                                            be32_to_cpu(cqe->immed_rss_invalid),
-                                            (ip_summed == CHECKSUM_UNNECESSARY) ?
-                                               PKT_HASH_TYPE_L4 :
-                                               PKT_HASH_TYPE_L3);
-
-                       skb_record_rx_queue(gro_skb, cq->ring);
-
-                       if (ring->hwtstamp_rx_filter == HWTSTAMP_FILTER_ALL) {
-                               timestamp = mlx4_en_get_cqe_ts(cqe);
-                               mlx4_en_fill_hwtstamps(mdev,
-                                                      skb_hwtstamps(gro_skb),
-                                                      timestamp);
-                       }
-
-                       napi_gro_frags(&cq->napi);
-                       goto next;
-               }
-
-               /* GRO not possible, complete processing here */
-               skb = mlx4_en_rx_skb(priv, rx_desc, frags, length);
-               if (unlikely(!skb)) {
-                       ring->dropped++;
-                       goto next;
-               }
-
-               if (unlikely(priv->validate_loopback)) {
-                       validate_loopback(priv, skb);
-                       goto next;
-               }
-
-               if (ip_summed == CHECKSUM_COMPLETE) {
-                       if (check_csum(cqe, skb, skb->data, dev->features)) {
-                               ip_summed = CHECKSUM_NONE;
-                               ring->csum_complete--;
-                               ring->csum_none++;
-                       }
-               }
-
                skb->ip_summed = ip_summed;
-               skb->protocol = eth_type_trans(skb, dev);
-               skb_record_rx_queue(skb, cq->ring);
-
-               if (l2_tunnel && ip_summed == CHECKSUM_UNNECESSARY)
-                       skb->csum_level = 1;
-
                if (dev->features & NETIF_F_RXHASH)
                        skb_set_hash(skb,
                                     be32_to_cpu(cqe->immed_rss_invalid),
@@ -1078,36 +841,36 @@ xdp_drop_no_cnt:
                                        PKT_HASH_TYPE_L4 :
                                        PKT_HASH_TYPE_L3);
 
-               if ((be32_to_cpu(cqe->vlan_my_qpn) &
-                   MLX4_CQE_CVLAN_PRESENT_MASK) &&
+
+               if ((cqe->vlan_my_qpn &
+                    cpu_to_be32(MLX4_CQE_CVLAN_PRESENT_MASK)) &&
                    (dev->features & NETIF_F_HW_VLAN_CTAG_RX))
-                       __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), be16_to_cpu(cqe->sl_vid));
-               else if ((be32_to_cpu(cqe->vlan_my_qpn) &
-                         MLX4_CQE_SVLAN_PRESENT_MASK) &&
+                       __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
+                                              be16_to_cpu(cqe->sl_vid));
+               else if ((cqe->vlan_my_qpn &
+                         cpu_to_be32(MLX4_CQE_SVLAN_PRESENT_MASK)) &&
                         (dev->features & NETIF_F_HW_VLAN_STAG_RX))
                        __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021AD),
                                               be16_to_cpu(cqe->sl_vid));
 
-               if (ring->hwtstamp_rx_filter == HWTSTAMP_FILTER_ALL) {
-                       timestamp = mlx4_en_get_cqe_ts(cqe);
-                       mlx4_en_fill_hwtstamps(mdev, skb_hwtstamps(skb),
-                                              timestamp);
+               nr = mlx4_en_complete_rx_desc(priv, frags, skb, length);
+               if (likely(nr)) {
+                       skb_shinfo(skb)->nr_frags = nr;
+                       skb->len = length;
+                       skb->data_len = length;
+                       napi_gro_frags(&cq->napi);
+               } else {
+                       skb->vlan_tci = 0;
+                       skb_clear_hash(skb);
                }
-
-               napi_gro_receive(&cq->napi, skb);
 next:
-               for (nr = 0; nr < priv->num_frags; nr++)
-                       mlx4_en_free_frag(priv, frags, nr);
-
-consumed:
                ++cq->mcq.cons_index;
                index = (cq->mcq.cons_index) & ring->size_mask;
                cqe = mlx4_en_get_cqe(cq->buf, index, priv->cqe_size) + factor;
                if (++polled == budget)
-                       goto out;
+                       break;
        }
 
-out:
        rcu_read_unlock();
 
        if (polled) {
@@ -1178,13 +941,6 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget)
        return done;
 }
 
-static const int frag_sizes[] = {
-       FRAG_SZ0,
-       FRAG_SZ1,
-       FRAG_SZ2,
-       FRAG_SZ3
-};
-
 void mlx4_en_calc_rx_buf(struct net_device *dev)
 {
        struct mlx4_en_priv *priv = netdev_priv(dev);
@@ -1195,33 +951,43 @@ void mlx4_en_calc_rx_buf(struct net_device *dev)
         * This only works when num_frags == 1.
         */
        if (priv->tx_ring_num[TX_XDP]) {
-               priv->frag_info[0].order = 0;
                priv->frag_info[0].frag_size = eff_mtu;
-               priv->frag_info[0].frag_prefix_size = 0;
                /* This will gain efficient xdp frame recycling at the
                 * expense of more costly truesize accounting
                 */
                priv->frag_info[0].frag_stride = PAGE_SIZE;
-               priv->frag_info[0].dma_dir = PCI_DMA_BIDIRECTIONAL;
-               priv->frag_info[0].rx_headroom = XDP_PACKET_HEADROOM;
+               priv->dma_dir = PCI_DMA_BIDIRECTIONAL;
+               priv->rx_headroom = XDP_PACKET_HEADROOM;
                i = 1;
        } else {
-               int buf_size = 0;
+               int frag_size_max = 2048, buf_size = 0;
+
+               /* should not happen, right ? */
+               if (eff_mtu > PAGE_SIZE + (MLX4_EN_MAX_RX_FRAGS - 1) * 2048)
+                       frag_size_max = PAGE_SIZE;
 
                while (buf_size < eff_mtu) {
-                       priv->frag_info[i].order = MLX4_EN_ALLOC_PREFER_ORDER;
-                       priv->frag_info[i].frag_size =
-                               (eff_mtu > buf_size + frag_sizes[i]) ?
-                                       frag_sizes[i] : eff_mtu - buf_size;
-                       priv->frag_info[i].frag_prefix_size = buf_size;
-                       priv->frag_info[i].frag_stride =
-                               ALIGN(priv->frag_info[i].frag_size,
-                                     SMP_CACHE_BYTES);
-                       priv->frag_info[i].dma_dir = PCI_DMA_FROMDEVICE;
-                       priv->frag_info[i].rx_headroom = 0;
-                       buf_size += priv->frag_info[i].frag_size;
+                       int frag_stride, frag_size = eff_mtu - buf_size;
+                       int pad, nb;
+
+                       if (i < MLX4_EN_MAX_RX_FRAGS - 1)
+                               frag_size = min(frag_size, frag_size_max);
+
+                       priv->frag_info[i].frag_size = frag_size;
+                       frag_stride = ALIGN(frag_size, SMP_CACHE_BYTES);
+                       /* We can only pack 2 1536-bytes frames in on 4K page
+                        * Therefore, each frame would consume more bytes (truesize)
+                        */
+                       nb = PAGE_SIZE / frag_stride;
+                       pad = (PAGE_SIZE - nb * frag_stride) / nb;
+                       pad &= ~(SMP_CACHE_BYTES - 1);
+                       priv->frag_info[i].frag_stride = frag_stride + pad;
+
+                       buf_size += frag_size;
                        i++;
                }
+               priv->dma_dir = PCI_DMA_FROMDEVICE;
+               priv->rx_headroom = 0;
        }
 
        priv->num_frags = i;
@@ -1232,10 +998,9 @@ void mlx4_en_calc_rx_buf(struct net_device *dev)
               eff_mtu, priv->num_frags);
        for (i = 0; i < priv->num_frags; i++) {
                en_err(priv,
-                      "  frag:%d - size:%d prefix:%d stride:%d\n",
+                      "  frag:%d - size:%d stride:%d\n",
                       i,
                       priv->frag_info[i].frag_size,
-                      priv->frag_info[i].frag_prefix_size,
                       priv->frag_info[i].frag_stride);
        }
 }
index 95290e1fc9fe7600b2e3bcca334f3fad7d733c09..17112faafbccc5f7a75ee82a287be7952859ae9e 100644 (file)
@@ -81,14 +81,11 @@ static int mlx4_en_test_loopback(struct mlx4_en_priv *priv)
 {
        u32 loopback_ok = 0;
        int i;
-       bool gro_enabled;
 
         priv->loopback_ok = 0;
        priv->validate_loopback = 1;
-       gro_enabled = priv->dev->features & NETIF_F_GRO;
 
        mlx4_en_update_loopback_state(priv->dev, priv->dev->features);
-       priv->dev->features &= ~NETIF_F_GRO;
 
        /* xmit */
        if (mlx4_en_test_loopback_xmit(priv)) {
@@ -111,9 +108,6 @@ mlx4_en_test_loopback_exit:
 
        priv->validate_loopback = 0;
 
-       if (gro_enabled)
-               priv->dev->features |= NETIF_F_GRO;
-
        mlx4_en_update_loopback_state(priv->dev, priv->dev->features);
        return !loopback_ok;
 }
index 3ed42199d3f1275f77560e92a430c0dde181e95a..e0c5ffb3e3a6607456e1f191b0b8c8becfc71219 100644 (file)
@@ -354,13 +354,11 @@ u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv,
        struct mlx4_en_rx_alloc frame = {
                .page = tx_info->page,
                .dma = tx_info->map0_dma,
-               .page_offset = XDP_PACKET_HEADROOM,
-               .page_size = PAGE_SIZE,
        };
 
        if (!mlx4_en_rx_recycle(ring->recycle_ring, &frame)) {
                dma_unmap_page(priv->ddev, tx_info->map0_dma,
-                              PAGE_SIZE, priv->frag_info[0].dma_dir);
+                              PAGE_SIZE, priv->dma_dir);
                put_page(tx_info->page);
        }
 
index 21377c315083b686d8db25033583dd020d7e50a6..703205475524d689cd2762f2d2ce3abfd2b6ebcb 100644 (file)
@@ -1940,6 +1940,14 @@ static int mlx4_comm_check_offline(struct mlx4_dev *dev)
                               (u32)(1 << COMM_CHAN_OFFLINE_OFFSET));
                if (!offline_bit)
                        return 0;
+
+               /* If device removal has been requested,
+                * do not continue retrying.
+                */
+               if (dev->persist->interface_state &
+                   MLX4_INTERFACE_STATE_NOWAIT)
+                       break;
+
                /* There are cases as part of AER/Reset flow that PF needs
                 * around 100 msec to load. We therefore sleep for 100 msec
                 * to allow other tasks to make use of that CPU during this
@@ -3955,6 +3963,9 @@ static void mlx4_remove_one(struct pci_dev *pdev)
        struct devlink *devlink = priv_to_devlink(priv);
        int active_vfs = 0;
 
+       if (mlx4_is_slave(dev))
+               persist->interface_state |= MLX4_INTERFACE_STATE_NOWAIT;
+
        mutex_lock(&persist->interface_state_mutex);
        persist->interface_state |= MLX4_INTERFACE_STATE_DELETION;
        mutex_unlock(&persist->interface_state_mutex);
index 3629ce11a68b9dec5c1659539bdc6f2c11114e35..39f401aa30474e61c0b0029463b23a829ec35fa3 100644 (file)
 /* Use the maximum between 16384 and a single page */
 #define MLX4_EN_ALLOC_SIZE     PAGE_ALIGN(16384)
 
-#define MLX4_EN_ALLOC_PREFER_ORDER min_t(int, get_order(32768),                \
-                                        PAGE_ALLOC_COSTLY_ORDER)
-
-/* Receive fragment sizes; we use at most 3 fragments (for 9600 byte MTU
- * and 4K allocations) */
-enum {
-       FRAG_SZ0 = 1536 - NET_IP_ALIGN,
-       FRAG_SZ1 = 4096,
-       FRAG_SZ2 = 4096,
-       FRAG_SZ3 = MLX4_EN_ALLOC_SIZE
-};
 #define MLX4_EN_MAX_RX_FRAGS   4
 
 /* Maximum ring sizes */
@@ -264,13 +253,16 @@ struct mlx4_en_rx_alloc {
        struct page     *page;
        dma_addr_t      dma;
        u32             page_offset;
-       u32             page_size;
 };
 
 #define MLX4_EN_CACHE_SIZE (2 * NAPI_POLL_WEIGHT)
+
 struct mlx4_en_page_cache {
        u32 index;
-       struct mlx4_en_rx_alloc buf[MLX4_EN_CACHE_SIZE];
+       struct {
+               struct page     *page;
+               dma_addr_t      dma;
+       } buf[MLX4_EN_CACHE_SIZE];
 };
 
 struct mlx4_en_priv;
@@ -335,7 +327,6 @@ struct mlx4_en_rx_desc {
 
 struct mlx4_en_rx_ring {
        struct mlx4_hwq_resources wqres;
-       struct mlx4_en_rx_alloc page_alloc[MLX4_EN_MAX_RX_FRAGS];
        u32 size ;      /* number of Rx descs*/
        u32 actual_size;
        u32 size_mask;
@@ -355,6 +346,7 @@ struct mlx4_en_rx_ring {
        unsigned long csum_ok;
        unsigned long csum_none;
        unsigned long csum_complete;
+       unsigned long rx_alloc_pages;
        unsigned long xdp_drop;
        unsigned long xdp_tx;
        unsigned long xdp_tx_full;
@@ -472,11 +464,7 @@ struct mlx4_en_mc_list {
 
 struct mlx4_en_frag_info {
        u16 frag_size;
-       u16 frag_prefix_size;
        u32 frag_stride;
-       enum dma_data_direction dma_dir;
-       u16 order;
-       u16 rx_headroom;
 };
 
 #ifdef CONFIG_MLX4_EN_DCB
@@ -584,8 +572,10 @@ struct mlx4_en_priv {
        u32 rx_ring_num;
        u32 rx_skb_size;
        struct mlx4_en_frag_info frag_info[MLX4_EN_MAX_RX_FRAGS];
-       u16 num_frags;
-       u16 log_rx_info;
+       u8 num_frags;
+       u8 log_rx_info;
+       u8 dma_dir;
+       u16 rx_headroom;
 
        struct mlx4_en_tx_ring **tx_ring[MLX4_EN_NUM_TX_TYPES];
        struct mlx4_en_rx_ring *rx_ring[MAX_RX_RINGS];
index 48641cb0367f251a07537b82d0a16bf50d8479ef..926f3c3f3665c5d28fe5d35c41afaa0e5917c007 100644 (file)
@@ -37,7 +37,7 @@ struct mlx4_en_port_stats {
        unsigned long queue_stopped;
        unsigned long wake_queue;
        unsigned long tx_timeout;
-       unsigned long rx_alloc_failed;
+       unsigned long rx_alloc_pages;
        unsigned long rx_chksum_good;
        unsigned long rx_chksum_none;
        unsigned long rx_chksum_complete;
index caa837e5e2b991fc3666776d2050fe20b1c6c7f6..5bdaf3d545b2fc656a318d5b562f940e14ecd9d9 100644 (file)
@@ -279,6 +279,8 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
        case MLX5_CMD_OP_DESTROY_XRC_SRQ:
        case MLX5_CMD_OP_DESTROY_DCT:
        case MLX5_CMD_OP_DEALLOC_Q_COUNTER:
+       case MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT:
+       case MLX5_CMD_OP_DESTROY_QOS_PARA_VPORT:
        case MLX5_CMD_OP_DEALLOC_PD:
        case MLX5_CMD_OP_DEALLOC_UAR:
        case MLX5_CMD_OP_DETACH_FROM_MCG:
@@ -305,8 +307,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
        case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
        case MLX5_CMD_OP_SET_FLOW_TABLE_ROOT:
        case MLX5_CMD_OP_DEALLOC_ENCAP_HEADER:
-       case MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT:
-       case MLX5_CMD_OP_DESTROY_QOS_PARA_VPORT:
+       case MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT:
                return MLX5_CMD_STAT_OK;
 
        case MLX5_CMD_OP_QUERY_HCA_CAP:
@@ -361,6 +362,12 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
        case MLX5_CMD_OP_QUERY_VPORT_COUNTER:
        case MLX5_CMD_OP_ALLOC_Q_COUNTER:
        case MLX5_CMD_OP_QUERY_Q_COUNTER:
+       case MLX5_CMD_OP_SET_RATE_LIMIT:
+       case MLX5_CMD_OP_QUERY_RATE_LIMIT:
+       case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
+       case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
+       case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT:
+       case MLX5_CMD_OP_CREATE_QOS_PARA_VPORT:
        case MLX5_CMD_OP_ALLOC_PD:
        case MLX5_CMD_OP_ALLOC_UAR:
        case MLX5_CMD_OP_CONFIG_INT_MODERATION:
@@ -412,10 +419,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
        case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
        case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
        case MLX5_CMD_OP_ALLOC_ENCAP_HEADER:
-       case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
-       case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
-       case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT:
-       case MLX5_CMD_OP_CREATE_QOS_PARA_VPORT:
+       case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT:
                *status = MLX5_DRIVER_STATUS_ABORTED;
                *synd = MLX5_DRIVER_SYND;
                return -EIO;
@@ -497,6 +501,14 @@ const char *mlx5_command_str(int command)
        MLX5_COMMAND_STR_CASE(ALLOC_Q_COUNTER);
        MLX5_COMMAND_STR_CASE(DEALLOC_Q_COUNTER);
        MLX5_COMMAND_STR_CASE(QUERY_Q_COUNTER);
+       MLX5_COMMAND_STR_CASE(SET_RATE_LIMIT);
+       MLX5_COMMAND_STR_CASE(QUERY_RATE_LIMIT);
+       MLX5_COMMAND_STR_CASE(CREATE_SCHEDULING_ELEMENT);
+       MLX5_COMMAND_STR_CASE(DESTROY_SCHEDULING_ELEMENT);
+       MLX5_COMMAND_STR_CASE(QUERY_SCHEDULING_ELEMENT);
+       MLX5_COMMAND_STR_CASE(MODIFY_SCHEDULING_ELEMENT);
+       MLX5_COMMAND_STR_CASE(CREATE_QOS_PARA_VPORT);
+       MLX5_COMMAND_STR_CASE(DESTROY_QOS_PARA_VPORT);
        MLX5_COMMAND_STR_CASE(ALLOC_PD);
        MLX5_COMMAND_STR_CASE(DEALLOC_PD);
        MLX5_COMMAND_STR_CASE(ALLOC_UAR);
@@ -572,12 +584,8 @@ const char *mlx5_command_str(int command)
        MLX5_COMMAND_STR_CASE(MODIFY_FLOW_TABLE);
        MLX5_COMMAND_STR_CASE(ALLOC_ENCAP_HEADER);
        MLX5_COMMAND_STR_CASE(DEALLOC_ENCAP_HEADER);
-       MLX5_COMMAND_STR_CASE(CREATE_SCHEDULING_ELEMENT);
-       MLX5_COMMAND_STR_CASE(DESTROY_SCHEDULING_ELEMENT);
-       MLX5_COMMAND_STR_CASE(QUERY_SCHEDULING_ELEMENT);
-       MLX5_COMMAND_STR_CASE(MODIFY_SCHEDULING_ELEMENT);
-       MLX5_COMMAND_STR_CASE(CREATE_QOS_PARA_VPORT);
-       MLX5_COMMAND_STR_CASE(DESTROY_QOS_PARA_VPORT);
+       MLX5_COMMAND_STR_CASE(ALLOC_MODIFY_HEADER_CONTEXT);
+       MLX5_COMMAND_STR_CASE(DEALLOC_MODIFY_HEADER_CONTEXT);
        default: return "unknown command opcode";
        }
 }
index f6a6ded204f61cda53c6233d80b3db7cde678c6e..150fb52a073713c458944749a4370bb7007ccfea 100644 (file)
 #define MLX5E_MAX_NUM_SQS              (MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC)
 #define MLX5E_TX_CQ_POLL_BUDGET        128
 #define MLX5E_UPDATE_STATS_INTERVAL    200 /* msecs */
-#define MLX5E_SQ_BF_BUDGET             16
 
 #define MLX5E_ICOSQ_MAX_WQEBBS \
        (DIV_ROUND_UP(sizeof(struct mlx5e_umr_wqe), MLX5_SEND_WQE_BB))
 
 #define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
-#define MLX5E_XDP_IHS_DS_COUNT \
-       DIV_ROUND_UP(MLX5E_XDP_MIN_INLINE - 2, MLX5_SEND_WQE_DS)
 #define MLX5E_XDP_TX_DS_COUNT \
        ((sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) + 1 /* SG DS */)
-#define MLX5E_XDP_TX_WQEBBS \
-       DIV_ROUND_UP(MLX5E_XDP_TX_DS_COUNT, MLX5_SEND_WQEBB_NUM_DS)
 
 #define MLX5E_NUM_MAIN_GROUPS 9
 
@@ -187,15 +182,15 @@ enum mlx5e_priv_flag {
        MLX5E_PFLAG_RX_CQE_COMPRESS = (1 << 1),
 };
 
-#define MLX5E_SET_PFLAG(priv, pflag, enable)                   \
+#define MLX5E_SET_PFLAG(params, pflag, enable)                 \
        do {                                                    \
                if (enable)                                     \
-                       (priv)->params.pflags |= (pflag);       \
+                       (params)->pflags |= (pflag);            \
                else                                            \
-                       (priv)->params.pflags &= ~(pflag);      \
+                       (params)->pflags &= ~(pflag);           \
        } while (0)
 
-#define MLX5E_GET_PFLAG(priv, pflag) (!!((priv)->params.pflags & (pflag)))
+#define MLX5E_GET_PFLAG(params, pflag) (!!((params)->pflags & (pflag)))
 
 #ifdef CONFIG_MLX5_CORE_EN_DCB
 #define MLX5E_MAX_BW_ALLOC 100 /* Max percentage of BW allocation */
@@ -218,7 +213,6 @@ struct mlx5e_params {
        bool rx_cqe_compress_def;
        struct mlx5e_cq_moder rx_cq_moderation;
        struct mlx5e_cq_moder tx_cq_moderation;
-       u16 min_rx_wqes;
        bool lro_en;
        u32 lro_wqe_sz;
        u16 tx_max_inline;
@@ -230,6 +224,7 @@ struct mlx5e_params {
        bool rx_am_enabled;
        u32 lro_timeout;
        u32 pflags;
+       struct bpf_prog *xdp_prog;
 };
 
 #ifdef CONFIG_MLX5_CORE_EN_DCB
@@ -285,7 +280,6 @@ struct mlx5e_cq {
        struct napi_struct        *napi;
        struct mlx5_core_cq        mcq;
        struct mlx5e_channel      *channel;
-       struct mlx5e_priv         *priv;
 
        /* cqe decompression */
        struct mlx5_cqe64          title;
@@ -295,22 +289,163 @@ struct mlx5e_cq {
        u16                        decmprs_wqe_counter;
 
        /* control */
+       struct mlx5_core_dev      *mdev;
        struct mlx5_frag_wq_ctrl   wq_ctrl;
 } ____cacheline_aligned_in_smp;
 
-struct mlx5e_rq;
-typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq *rq,
-                                      struct mlx5_cqe64 *cqe);
-typedef int (*mlx5e_fp_alloc_wqe)(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe,
-                                 u16 ix);
+struct mlx5e_tx_wqe_info {
+       u32 num_bytes;
+       u8  num_wqebbs;
+       u8  num_dma;
+};
 
-typedef void (*mlx5e_fp_dealloc_wqe)(struct mlx5e_rq *rq, u16 ix);
+enum mlx5e_dma_map_type {
+       MLX5E_DMA_MAP_SINGLE,
+       MLX5E_DMA_MAP_PAGE
+};
+
+struct mlx5e_sq_dma {
+       dma_addr_t              addr;
+       u32                     size;
+       enum mlx5e_dma_map_type type;
+};
+
+enum {
+       MLX5E_SQ_STATE_ENABLED,
+};
+
+struct mlx5e_sq_wqe_info {
+       u8  opcode;
+       u8  num_wqebbs;
+};
+
+struct mlx5e_txqsq {
+       /* data path */
+
+       /* dirtied @completion */
+       u16                        cc;
+       u32                        dma_fifo_cc;
+
+       /* dirtied @xmit */
+       u16                        pc ____cacheline_aligned_in_smp;
+       u32                        dma_fifo_pc;
+       struct mlx5e_sq_stats      stats;
+
+       struct mlx5e_cq            cq;
+
+       /* write@xmit, read@completion */
+       struct {
+               struct sk_buff           **skb;
+               struct mlx5e_sq_dma       *dma_fifo;
+               struct mlx5e_tx_wqe_info  *wqe_info;
+       } db;
+
+       /* read only */
+       struct mlx5_wq_cyc         wq;
+       u32                        dma_fifo_mask;
+       void __iomem              *uar_map;
+       struct netdev_queue       *txq;
+       u32                        sqn;
+       u16                        max_inline;
+       u8                         min_inline_mode;
+       u16                        edge;
+       struct device             *pdev;
+       struct mlx5e_tstamp       *tstamp;
+       __be32                     mkey_be;
+       unsigned long              state;
+
+       /* control path */
+       struct mlx5_wq_ctrl        wq_ctrl;
+       struct mlx5e_channel      *channel;
+       int                        txq_ix;
+       u32                        rate_limit;
+} ____cacheline_aligned_in_smp;
+
+struct mlx5e_xdpsq {
+       /* data path */
+
+       /* dirtied @rx completion */
+       u16                        cc;
+       u16                        pc;
+
+       struct mlx5e_cq            cq;
+
+       /* write@xmit, read@completion */
+       struct {
+               struct mlx5e_dma_info     *di;
+               bool                       doorbell;
+       } db;
+
+       /* read only */
+       struct mlx5_wq_cyc         wq;
+       void __iomem              *uar_map;
+       u32                        sqn;
+       struct device             *pdev;
+       __be32                     mkey_be;
+       u8                         min_inline_mode;
+       unsigned long              state;
+
+       /* control path */
+       struct mlx5_wq_ctrl        wq_ctrl;
+       struct mlx5e_channel      *channel;
+} ____cacheline_aligned_in_smp;
+
+struct mlx5e_icosq {
+       /* data path */
+
+       /* dirtied @completion */
+       u16                        cc;
+
+       /* dirtied @xmit */
+       u16                        pc ____cacheline_aligned_in_smp;
+       u32                        dma_fifo_pc;
+       u16                        prev_cc;
+
+       struct mlx5e_cq            cq;
+
+       /* write@xmit, read@completion */
+       struct {
+               struct mlx5e_sq_wqe_info *ico_wqe;
+       } db;
+
+       /* read only */
+       struct mlx5_wq_cyc         wq;
+       void __iomem              *uar_map;
+       u32                        sqn;
+       u16                        edge;
+       struct device             *pdev;
+       __be32                     mkey_be;
+       unsigned long              state;
+
+       /* control path */
+       struct mlx5_wq_ctrl        wq_ctrl;
+       struct mlx5e_channel      *channel;
+} ____cacheline_aligned_in_smp;
+
+static inline bool
+mlx5e_wqc_has_room_for(struct mlx5_wq_cyc *wq, u16 cc, u16 pc, u16 n)
+{
+       return (((wq->sz_m1 & (cc - pc)) >= n) || (cc == pc));
+}
 
 struct mlx5e_dma_info {
        struct page     *page;
        dma_addr_t      addr;
 };
 
+struct mlx5e_umr_dma_info {
+       __be64                *mtt;
+       dma_addr_t             mtt_addr;
+       struct mlx5e_dma_info  dma_info[MLX5_MPWRQ_PAGES_PER_WQE];
+       struct mlx5e_umr_wqe   wqe;
+};
+
+struct mlx5e_mpw_info {
+       struct mlx5e_umr_dma_info umr;
+       u16 consumed_strides;
+       u16 skbs_frags[MLX5_MPWRQ_PAGES_PER_WQE];
+};
+
 struct mlx5e_rx_am_stats {
        int ppms; /* packets per msec */
        int epms; /* events per msec */
@@ -347,6 +482,11 @@ struct mlx5e_page_cache {
        struct mlx5e_dma_info page_cache[MLX5E_CACHE_SIZE];
 };
 
+struct mlx5e_rq;
+typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq*, struct mlx5_cqe64*);
+typedef int (*mlx5e_fp_alloc_wqe)(struct mlx5e_rq*, struct mlx5e_rx_wqe*, u16);
+typedef void (*mlx5e_fp_dealloc_wqe)(struct mlx5e_rq*, u16);
+
 struct mlx5e_rq {
        /* data path */
        struct mlx5_wq_ll      wq;
@@ -381,7 +521,10 @@ struct mlx5e_rq {
        u16                    rx_headroom;
 
        struct mlx5e_rx_am     am; /* Adaptive Moderation */
+
+       /* XDP */
        struct bpf_prog       *xdp_prog;
+       struct mlx5e_xdpsq     xdpsq;
 
        /* control */
        struct mlx5_wq_ctrl    wq_ctrl;
@@ -390,118 +533,10 @@ struct mlx5e_rq {
        u32                    mpwqe_num_strides;
        u32                    rqn;
        struct mlx5e_channel  *channel;
-       struct mlx5e_priv     *priv;
+       struct mlx5_core_dev  *mdev;
        struct mlx5_core_mkey  umr_mkey;
 } ____cacheline_aligned_in_smp;
 
-struct mlx5e_umr_dma_info {
-       __be64                *mtt;
-       dma_addr_t             mtt_addr;
-       struct mlx5e_dma_info  dma_info[MLX5_MPWRQ_PAGES_PER_WQE];
-       struct mlx5e_umr_wqe   wqe;
-};
-
-struct mlx5e_mpw_info {
-       struct mlx5e_umr_dma_info umr;
-       u16 consumed_strides;
-       u16 skbs_frags[MLX5_MPWRQ_PAGES_PER_WQE];
-};
-
-struct mlx5e_tx_wqe_info {
-       u32 num_bytes;
-       u8  num_wqebbs;
-       u8  num_dma;
-};
-
-enum mlx5e_dma_map_type {
-       MLX5E_DMA_MAP_SINGLE,
-       MLX5E_DMA_MAP_PAGE
-};
-
-struct mlx5e_sq_dma {
-       dma_addr_t              addr;
-       u32                     size;
-       enum mlx5e_dma_map_type type;
-};
-
-enum {
-       MLX5E_SQ_STATE_ENABLED,
-       MLX5E_SQ_STATE_BF_ENABLE,
-};
-
-struct mlx5e_sq_wqe_info {
-       u8  opcode;
-       u8  num_wqebbs;
-};
-
-enum mlx5e_sq_type {
-       MLX5E_SQ_TXQ,
-       MLX5E_SQ_ICO,
-       MLX5E_SQ_XDP
-};
-
-struct mlx5e_sq {
-       /* data path */
-
-       /* dirtied @completion */
-       u16                        cc;
-       u32                        dma_fifo_cc;
-
-       /* dirtied @xmit */
-       u16                        pc ____cacheline_aligned_in_smp;
-       u32                        dma_fifo_pc;
-       u16                        bf_offset;
-       u16                        prev_cc;
-       u8                         bf_budget;
-       struct mlx5e_sq_stats      stats;
-
-       struct mlx5e_cq            cq;
-
-       /* pointers to per tx element info: write@xmit, read@completion */
-       union {
-               struct {
-                       struct sk_buff           **skb;
-                       struct mlx5e_sq_dma       *dma_fifo;
-                       struct mlx5e_tx_wqe_info  *wqe_info;
-               } txq;
-               struct mlx5e_sq_wqe_info *ico_wqe;
-               struct {
-                       struct mlx5e_sq_wqe_info  *wqe_info;
-                       struct mlx5e_dma_info     *di;
-                       bool                       doorbell;
-               } xdp;
-       } db;
-
-       /* read only */
-       struct mlx5_wq_cyc         wq;
-       u32                        dma_fifo_mask;
-       void __iomem              *uar_map;
-       struct netdev_queue       *txq;
-       u32                        sqn;
-       u16                        bf_buf_size;
-       u16                        max_inline;
-       u8                         min_inline_mode;
-       u16                        edge;
-       struct device             *pdev;
-       struct mlx5e_tstamp       *tstamp;
-       __be32                     mkey_be;
-       unsigned long              state;
-
-       /* control path */
-       struct mlx5_wq_ctrl        wq_ctrl;
-       struct mlx5_sq_bfreg       bfreg;
-       struct mlx5e_channel      *channel;
-       int                        tc;
-       u32                        rate_limit;
-       u8                         type;
-} ____cacheline_aligned_in_smp;
-
-static inline bool mlx5e_sq_has_room_for(struct mlx5e_sq *sq, u16 n)
-{
-       return (((sq->wq.sz_m1 & (sq->cc - sq->pc)) >= n) ||
-               (sq->cc  == sq->pc));
-}
-
 enum channel_flags {
        MLX5E_CHANNEL_NAPI_SCHED = 1,
 };
@@ -509,9 +544,8 @@ enum channel_flags {
 struct mlx5e_channel {
        /* data path */
        struct mlx5e_rq            rq;
-       struct mlx5e_sq            xdp_sq;
-       struct mlx5e_sq            sq[MLX5E_MAX_NUM_TC];
-       struct mlx5e_sq            icosq;   /* internal control operations */
+       struct mlx5e_txqsq         sq[MLX5E_MAX_NUM_TC];
+       struct mlx5e_icosq         icosq;   /* internal control operations */
        bool                       xdp;
        struct napi_struct         napi;
        struct device             *pdev;
@@ -522,10 +556,18 @@ struct mlx5e_channel {
 
        /* control */
        struct mlx5e_priv         *priv;
+       struct mlx5_core_dev      *mdev;
+       struct mlx5e_tstamp       *tstamp;
        int                        ix;
        int                        cpu;
 };
 
+struct mlx5e_channels {
+       struct mlx5e_channel **c;
+       unsigned int           num;
+       struct mlx5e_params    params;
+};
+
 enum mlx5e_traffic_types {
        MLX5E_TT_IPV4_TCP,
        MLX5E_TT_IPV6_TCP,
@@ -675,34 +717,17 @@ enum {
        MLX5E_NIC_PRIO
 };
 
-struct mlx5e_profile {
-       void    (*init)(struct mlx5_core_dev *mdev,
-                       struct net_device *netdev,
-                       const struct mlx5e_profile *profile, void *ppriv);
-       void    (*cleanup)(struct mlx5e_priv *priv);
-       int     (*init_rx)(struct mlx5e_priv *priv);
-       void    (*cleanup_rx)(struct mlx5e_priv *priv);
-       int     (*init_tx)(struct mlx5e_priv *priv);
-       void    (*cleanup_tx)(struct mlx5e_priv *priv);
-       void    (*enable)(struct mlx5e_priv *priv);
-       void    (*disable)(struct mlx5e_priv *priv);
-       void    (*update_stats)(struct mlx5e_priv *priv);
-       int     (*max_nch)(struct mlx5_core_dev *mdev);
-       int     max_tc;
-};
-
 struct mlx5e_priv {
        /* priv data path fields - start */
-       struct mlx5e_sq            **txq_to_sq_map;
-       int channeltc_to_txq_map[MLX5E_MAX_NUM_CHANNELS][MLX5E_MAX_NUM_TC];
-       struct bpf_prog *xdp_prog;
+       struct mlx5e_txqsq *txq2sq[MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC];
+       int channel_tc2txq[MLX5E_MAX_NUM_CHANNELS][MLX5E_MAX_NUM_TC];
        /* priv data path fields - end */
 
        unsigned long              state;
        struct mutex               state_lock; /* Protects Interface state */
        struct mlx5e_rq            drop_rq;
 
-       struct mlx5e_channel     **channel;
+       struct mlx5e_channels      channels;
        u32                        tisn[MLX5E_MAX_NUM_TC];
        struct mlx5e_rqt           indir_rqt;
        struct mlx5e_tir           indir_tir[MLX5E_NUM_INDIR_TIRS];
@@ -712,7 +737,6 @@ struct mlx5e_priv {
        struct mlx5e_flow_steering fs;
        struct mlx5e_vxlan_db      vxlan;
 
-       struct mlx5e_params        params;
        struct workqueue_struct    *wq;
        struct work_struct         update_carrier_work;
        struct work_struct         set_rx_mode_work;
@@ -732,9 +756,24 @@ struct mlx5e_priv {
        void                      *ppriv;
 };
 
+struct mlx5e_profile {
+       void    (*init)(struct mlx5_core_dev *mdev,
+                       struct net_device *netdev,
+                       const struct mlx5e_profile *profile, void *ppriv);
+       void    (*cleanup)(struct mlx5e_priv *priv);
+       int     (*init_rx)(struct mlx5e_priv *priv);
+       void    (*cleanup_rx)(struct mlx5e_priv *priv);
+       int     (*init_tx)(struct mlx5e_priv *priv);
+       void    (*cleanup_tx)(struct mlx5e_priv *priv);
+       void    (*enable)(struct mlx5e_priv *priv);
+       void    (*disable)(struct mlx5e_priv *priv);
+       void    (*update_stats)(struct mlx5e_priv *priv);
+       int     (*max_nch)(struct mlx5_core_dev *mdev);
+       int     max_tc;
+};
+
 void mlx5e_build_ptys2ethtool_map(void);
 
-void mlx5e_send_nop(struct mlx5e_sq *sq, bool notify_hw);
 u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
                       void *accel_priv, select_queue_fallback_t fallback);
 netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev);
@@ -744,7 +783,9 @@ void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event);
 int mlx5e_napi_poll(struct napi_struct *napi, int budget);
 bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget);
 int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget);
-void mlx5e_free_sq_descs(struct mlx5e_sq *sq);
+bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq);
+void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq);
+void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq);
 
 void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info,
                        bool recycle);
@@ -792,7 +833,7 @@ void mlx5e_pps_event_handler(struct mlx5e_priv *priv,
                             struct ptp_clock_event *event);
 int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr);
 int mlx5e_hwstamp_get(struct net_device *dev, struct ifreq *ifr);
-void mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool val);
+int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool val);
 
 int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto,
                          u16 vid);
@@ -801,14 +842,40 @@ int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto,
 void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv);
 void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv);
 
-int mlx5e_modify_rqs_vsd(struct mlx5e_priv *priv, bool vsd);
+int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd);
 
-int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz, int ix);
-void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_priv *priv, void *tirc,
-                                   enum mlx5e_traffic_types tt);
+struct mlx5e_redirect_rqt_param {
+       bool is_rss;
+       union {
+               u32 rqn; /* Direct RQN (Non-RSS) */
+               struct {
+                       u8 hfunc;
+                       struct mlx5e_channels *channels;
+               } rss; /* RSS data */
+       };
+};
+
+int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz,
+                      struct mlx5e_redirect_rqt_param rrp);
+void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_params *params,
+                                   enum mlx5e_traffic_types tt,
+                                   void *tirc);
 
 int mlx5e_open_locked(struct net_device *netdev);
 int mlx5e_close_locked(struct net_device *netdev);
+
+int mlx5e_open_channels(struct mlx5e_priv *priv,
+                       struct mlx5e_channels *chs);
+void mlx5e_close_channels(struct mlx5e_channels *chs);
+
+/* Function pointer to be used to modify WH settings while
+ * switching channels
+ */
+typedef int (*mlx5e_fp_hw_modify)(struct mlx5e_priv *priv);
+void mlx5e_switch_priv_channels(struct mlx5e_priv *priv,
+                               struct mlx5e_channels *new_chs,
+                               mlx5e_fp_hw_modify hw_modify);
+
 void mlx5e_build_default_indir_rqt(struct mlx5_core_dev *mdev,
                                   u32 *indirection_rqt, int len,
                                   int num_channels);
@@ -816,30 +883,43 @@ int mlx5e_get_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed);
 
 void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params,
                                 u8 cq_period_mode);
-void mlx5e_set_rq_type_params(struct mlx5e_priv *priv, u8 rq_type);
+void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev,
+                             struct mlx5e_params *params, u8 rq_type);
 
-static inline void mlx5e_tx_notify_hw(struct mlx5e_sq *sq,
-                                     struct mlx5_wqe_ctrl_seg *ctrl, int bf_sz)
+static inline
+struct mlx5e_tx_wqe *mlx5e_post_nop(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc)
 {
-       u16 ofst = sq->bf_offset;
+       u16                         pi   = *pc & wq->sz_m1;
+       struct mlx5e_tx_wqe        *wqe  = mlx5_wq_cyc_get_wqe(wq, pi);
+       struct mlx5_wqe_ctrl_seg   *cseg = &wqe->ctrl;
+
+       memset(cseg, 0, sizeof(*cseg));
+
+       cseg->opmod_idx_opcode = cpu_to_be32((*pc << 8) | MLX5_OPCODE_NOP);
+       cseg->qpn_ds           = cpu_to_be32((sqn << 8) | 0x01);
+
+       (*pc)++;
 
+       return wqe;
+}
+
+static inline
+void mlx5e_notify_hw(struct mlx5_wq_cyc *wq, u16 pc,
+                    void __iomem *uar_map,
+                    struct mlx5_wqe_ctrl_seg *ctrl)
+{
+       ctrl->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
        /* ensure wqe is visible to device before updating doorbell record */
        dma_wmb();
 
-       *sq->wq.db = cpu_to_be32(sq->pc);
+       *wq->db = cpu_to_be32(pc);
 
        /* ensure doorbell record is visible to device before ringing the
         * doorbell
         */
        wmb();
-       if (bf_sz)
-               __iowrite64_copy(sq->uar_map + ofst, ctrl, bf_sz);
-       else
-               mlx5_write64((__be32 *)ctrl, sq->uar_map + ofst, NULL);
-       /* flush the write-combining mapped buffer */
-       wmb();
 
-       sq->bf_offset ^= sq->bf_buf_size;
+       mlx5_write64((__be32 *)ctrl, uar_map, NULL);
 }
 
 static inline void mlx5e_cq_arm(struct mlx5e_cq *cq)
@@ -895,8 +975,7 @@ void mlx5e_destroy_tir(struct mlx5_core_dev *mdev,
                       struct mlx5e_tir *tir);
 int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev);
 void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev);
-int mlx5e_refresh_tirs_self_loopback(struct mlx5_core_dev *mdev,
-                                    bool enable_uc_lb);
+int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb);
 
 struct mlx5_eswitch_rep;
 int mlx5e_vport_rep_load(struct mlx5_eswitch *esw,
@@ -928,10 +1007,6 @@ void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv);
 int mlx5e_attach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev);
 void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev);
 u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout);
-void mlx5e_add_vxlan_port(struct net_device *netdev,
-                         struct udp_tunnel_info *ti);
-void mlx5e_del_vxlan_port(struct net_device *netdev,
-                         struct udp_tunnel_info *ti);
 
 int mlx5e_get_offload_stats(int attr_id, const struct net_device *dev,
                            void *sp);
index 68419a01db36e33765b1cc366455da8b55420da7..c4e9cc79f5c77054029748c6d9785d62f82c8b41 100644 (file)
@@ -174,13 +174,9 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv,
                                 enum arfs_type type)
 {
        struct arfs_table *arfs_t = &priv->fs.arfs.arfs_tables[type];
-       struct mlx5_flow_act flow_act = {
-               .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
-               .flow_tag = MLX5_FS_DEFAULT_FLOW_TAG,
-               .encap_id = 0,
-       };
-       struct mlx5_flow_destination dest;
        struct mlx5e_tir *tir = priv->indir_tir;
+       struct mlx5_flow_destination dest;
+       MLX5_DECLARE_FLOW_ACT(flow_act);
        struct mlx5_flow_spec *spec;
        int err = 0;
 
@@ -469,15 +465,11 @@ static struct arfs_table *arfs_get_table(struct mlx5e_arfs_tables *arfs,
 static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv,
                                              struct arfs_rule *arfs_rule)
 {
-       struct mlx5_flow_act flow_act = {
-               .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
-               .flow_tag = MLX5_FS_DEFAULT_FLOW_TAG,
-               .encap_id = 0,
-       };
        struct mlx5e_arfs_tables *arfs = &priv->fs.arfs;
        struct arfs_tuple *tuple = &arfs_rule->tuple;
        struct mlx5_flow_handle *rule = NULL;
        struct mlx5_flow_destination dest;
+       MLX5_DECLARE_FLOW_ACT(flow_act);
        struct arfs_table *arfs_table;
        struct mlx5_flow_spec *spec;
        struct mlx5_flow_table *ft;
index 37e66eef6fb5ea62576e0a8b012b04e6ba579d56..e706a87fc8b2b06b8f620a96f66d316e90070e04 100644 (file)
@@ -90,6 +90,7 @@ int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr)
 {
        struct mlx5e_priv *priv = netdev_priv(dev);
        struct hwtstamp_config config;
+       int err;
 
        if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz))
                return -EOPNOTSUPP;
@@ -111,7 +112,7 @@ int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr)
        switch (config.rx_filter) {
        case HWTSTAMP_FILTER_NONE:
                /* Reset CQE compression to Admin default */
-               mlx5e_modify_rx_cqe_compression_locked(priv, priv->params.rx_cqe_compress_def);
+               mlx5e_modify_rx_cqe_compression_locked(priv, priv->channels.params.rx_cqe_compress_def);
                break;
        case HWTSTAMP_FILTER_ALL:
        case HWTSTAMP_FILTER_SOME:
@@ -129,7 +130,12 @@ int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr)
        case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
                /* Disable CQE compression */
                netdev_warn(dev, "Disabling cqe compression");
-               mlx5e_modify_rx_cqe_compression_locked(priv, false);
+               err = mlx5e_modify_rx_cqe_compression_locked(priv, false);
+               if (err) {
+                       netdev_err(dev, "Failed disabling cqe compression err=%d\n", err);
+                       mutex_unlock(&priv->state_lock);
+                       return err;
+               }
                config.rx_filter = HWTSTAMP_FILTER_ALL;
                break;
        default:
index bd898d8deda0ce0c4d6dca7f1ac26722eacf96c4..f1f17f7a3cd049de412abdff58b00f314160be22 100644 (file)
@@ -107,10 +107,18 @@ int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev)
                goto err_dealloc_transport_domain;
        }
 
+       err = mlx5_alloc_bfreg(mdev, &res->bfreg, false, false);
+       if (err) {
+               mlx5_core_err(mdev, "alloc bfreg failed, %d\n", err);
+               goto err_destroy_mkey;
+       }
+
        INIT_LIST_HEAD(&mdev->mlx5e_res.td.tirs_list);
 
        return 0;
 
+err_destroy_mkey:
+       mlx5_core_destroy_mkey(mdev, &res->mkey);
 err_dealloc_transport_domain:
        mlx5_core_dealloc_transport_domain(mdev, res->td.tdn);
 err_dealloc_pd:
@@ -122,23 +130,26 @@ void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev)
 {
        struct mlx5e_resources *res = &mdev->mlx5e_res;
 
+       mlx5_free_bfreg(mdev, &res->bfreg);
        mlx5_core_destroy_mkey(mdev, &res->mkey);
        mlx5_core_dealloc_transport_domain(mdev, res->td.tdn);
        mlx5_core_dealloc_pd(mdev, res->pdn);
 }
 
-int mlx5e_refresh_tirs_self_loopback(struct mlx5_core_dev *mdev,
-                                    bool enable_uc_lb)
+int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb)
 {
+       struct mlx5_core_dev *mdev = priv->mdev;
        struct mlx5e_tir *tir;
-       void *in;
+       int err  = -ENOMEM;
+       u32 tirn = 0;
        int inlen;
-       int err = 0;
+       void *in;
+
 
        inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
        in = mlx5_vzalloc(inlen);
        if (!in)
-               return -ENOMEM;
+               goto out;
 
        if (enable_uc_lb)
                MLX5_SET(modify_tir_in, in, ctx.self_lb_block,
@@ -147,13 +158,16 @@ int mlx5e_refresh_tirs_self_loopback(struct mlx5_core_dev *mdev,
        MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1);
 
        list_for_each_entry(tir, &mdev->mlx5e_res.td.tirs_list, list) {
-               err = mlx5_core_modify_tir(mdev, tir->tirn, in, inlen);
+               tirn = tir->tirn;
+               err = mlx5_core_modify_tir(mdev, tirn, in, inlen);
                if (err)
                        goto out;
        }
 
 out:
        kvfree(in);
+       if (err)
+               netdev_err(priv->netdev, "refresh tir(0x%x) failed, %d\n", tirn, err);
 
        return err;
 }
index a004a5a1a4c22a742ef3f9939769c6b5c9445f46..af039b6c0799d4f2e988ce91939ed388dca86375 100644 (file)
@@ -152,12 +152,9 @@ static bool mlx5e_query_global_pause_combined(struct mlx5e_priv *priv)
 }
 
 #define MLX5E_NUM_Q_CNTRS(priv) (NUM_Q_COUNTERS * (!!priv->q_counter))
-#define MLX5E_NUM_RQ_STATS(priv) \
-       (NUM_RQ_STATS * priv->params.num_channels * \
-        test_bit(MLX5E_STATE_OPENED, &priv->state))
+#define MLX5E_NUM_RQ_STATS(priv) (NUM_RQ_STATS * (priv)->channels.num)
 #define MLX5E_NUM_SQ_STATS(priv) \
-       (NUM_SQ_STATS * priv->params.num_channels * priv->params.num_tc * \
-        test_bit(MLX5E_STATE_OPENED, &priv->state))
+       (NUM_SQ_STATS * (priv)->channels.num * (priv)->channels.params.num_tc)
 #define MLX5E_NUM_PFC_COUNTERS(priv) \
        ((mlx5e_query_global_pause_combined(priv) + hweight8(mlx5e_query_pfc_combined(priv))) * \
          NUM_PPORT_PER_PRIO_PFC_COUNTERS)
@@ -262,17 +259,17 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data)
                return;
 
        /* per channel counters */
-       for (i = 0; i < priv->params.num_channels; i++)
+       for (i = 0; i < priv->channels.num; i++)
                for (j = 0; j < NUM_RQ_STATS; j++)
                        sprintf(data + (idx++) * ETH_GSTRING_LEN,
                                rq_stats_desc[j].format, i);
 
-       for (tc = 0; tc < priv->params.num_tc; tc++)
-               for (i = 0; i < priv->params.num_channels; i++)
+       for (tc = 0; tc < priv->channels.params.num_tc; tc++)
+               for (i = 0; i < priv->channels.num; i++)
                        for (j = 0; j < NUM_SQ_STATS; j++)
                                sprintf(data + (idx++) * ETH_GSTRING_LEN,
                                        sq_stats_desc[j].format,
-                                       priv->channeltc_to_txq_map[i][tc]);
+                                       priv->channel_tc2txq[i][tc]);
 }
 
 static void mlx5e_get_strings(struct net_device *dev,
@@ -303,6 +300,7 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev,
                                    struct ethtool_stats *stats, u64 *data)
 {
        struct mlx5e_priv *priv = netdev_priv(dev);
+       struct mlx5e_channels *channels;
        struct mlx5_priv *mlx5_priv;
        int i, j, tc, prio, idx = 0;
        unsigned long pfc_combined;
@@ -313,6 +311,7 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev,
        mutex_lock(&priv->state_lock);
        if (test_bit(MLX5E_STATE_OPENED, &priv->state))
                mlx5e_update_stats(priv);
+       channels = &priv->channels;
        mutex_unlock(&priv->state_lock);
 
        for (i = 0; i < NUM_SW_COUNTERS; i++)
@@ -382,16 +381,16 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev,
                return;
 
        /* per channel counters */
-       for (i = 0; i < priv->params.num_channels; i++)
+       for (i = 0; i < channels->num; i++)
                for (j = 0; j < NUM_RQ_STATS; j++)
                        data[idx++] =
-                              MLX5E_READ_CTR64_CPU(&priv->channel[i]->rq.stats,
+                              MLX5E_READ_CTR64_CPU(&channels->c[i]->rq.stats,
                                                    rq_stats_desc, j);
 
-       for (tc = 0; tc < priv->params.num_tc; tc++)
-               for (i = 0; i < priv->params.num_channels; i++)
+       for (tc = 0; tc < priv->channels.params.num_tc; tc++)
+               for (i = 0; i < channels->num; i++)
                        for (j = 0; j < NUM_SQ_STATS; j++)
-                               data[idx++] = MLX5E_READ_CTR64_CPU(&priv->channel[i]->sq[tc].stats,
+                               data[idx++] = MLX5E_READ_CTR64_CPU(&channels->c[i]->sq[tc].stats,
                                                                   sq_stats_desc, j);
 }
 
@@ -406,8 +405,8 @@ static u32 mlx5e_rx_wqes_to_packets(struct mlx5e_priv *priv, int rq_wq_type,
        if (rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
                return num_wqe;
 
-       stride_size = 1 << priv->params.mpwqe_log_stride_sz;
-       num_strides = 1 << priv->params.mpwqe_log_num_strides;
+       stride_size = 1 << priv->channels.params.mpwqe_log_stride_sz;
+       num_strides = 1 << priv->channels.params.mpwqe_log_num_strides;
        wqe_size = stride_size * num_strides;
 
        packets_per_wqe = wqe_size /
@@ -427,8 +426,8 @@ static u32 mlx5e_packets_to_rx_wqes(struct mlx5e_priv *priv, int rq_wq_type,
        if (rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
                return num_packets;
 
-       stride_size = 1 << priv->params.mpwqe_log_stride_sz;
-       num_strides = 1 << priv->params.mpwqe_log_num_strides;
+       stride_size = 1 << priv->channels.params.mpwqe_log_stride_sz;
+       num_strides = 1 << priv->channels.params.mpwqe_log_num_strides;
        wqe_size = stride_size * num_strides;
 
        num_packets = (1 << order_base_2(num_packets));
@@ -443,26 +442,25 @@ static void mlx5e_get_ringparam(struct net_device *dev,
                                struct ethtool_ringparam *param)
 {
        struct mlx5e_priv *priv = netdev_priv(dev);
-       int rq_wq_type = priv->params.rq_wq_type;
+       int rq_wq_type = priv->channels.params.rq_wq_type;
 
        param->rx_max_pending = mlx5e_rx_wqes_to_packets(priv, rq_wq_type,
                                                         1 << mlx5_max_log_rq_size(rq_wq_type));
        param->tx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE;
        param->rx_pending = mlx5e_rx_wqes_to_packets(priv, rq_wq_type,
-                                                    1 << priv->params.log_rq_size);
-       param->tx_pending     = 1 << priv->params.log_sq_size;
+                                                    1 << priv->channels.params.log_rq_size);
+       param->tx_pending     = 1 << priv->channels.params.log_sq_size;
 }
 
 static int mlx5e_set_ringparam(struct net_device *dev,
                               struct ethtool_ringparam *param)
 {
        struct mlx5e_priv *priv = netdev_priv(dev);
-       bool was_opened;
-       int rq_wq_type = priv->params.rq_wq_type;
+       int rq_wq_type = priv->channels.params.rq_wq_type;
+       struct mlx5e_channels new_channels = {};
        u32 rx_pending_wqes;
        u32 min_rq_size;
        u32 max_rq_size;
-       u16 min_rx_wqes;
        u8 log_rq_size;
        u8 log_sq_size;
        u32 num_mtts;
@@ -500,7 +498,7 @@ static int mlx5e_set_ringparam(struct net_device *dev,
        }
 
        num_mtts = MLX5E_REQUIRED_MTTS(rx_pending_wqes);
-       if (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ &&
+       if (priv->channels.params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ &&
            !MLX5E_VALID_NUM_MTTS(num_mtts)) {
                netdev_info(dev, "%s: rx_pending (%d) request can't be satisfied, try to reduce.\n",
                            __func__, param->rx_pending);
@@ -522,26 +520,29 @@ static int mlx5e_set_ringparam(struct net_device *dev,
 
        log_rq_size = order_base_2(rx_pending_wqes);
        log_sq_size = order_base_2(param->tx_pending);
-       min_rx_wqes = mlx5_min_rx_wqes(rq_wq_type, rx_pending_wqes);
 
-       if (log_rq_size == priv->params.log_rq_size &&
-           log_sq_size == priv->params.log_sq_size &&
-           min_rx_wqes == priv->params.min_rx_wqes)
+       if (log_rq_size == priv->channels.params.log_rq_size &&
+           log_sq_size == priv->channels.params.log_sq_size)
                return 0;
 
        mutex_lock(&priv->state_lock);
 
-       was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
-       if (was_opened)
-               mlx5e_close_locked(dev);
+       new_channels.params = priv->channels.params;
+       new_channels.params.log_rq_size = log_rq_size;
+       new_channels.params.log_sq_size = log_sq_size;
 
-       priv->params.log_rq_size = log_rq_size;
-       priv->params.log_sq_size = log_sq_size;
-       priv->params.min_rx_wqes = min_rx_wqes;
+       if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+               priv->channels.params = new_channels.params;
+               goto unlock;
+       }
+
+       err = mlx5e_open_channels(priv, &new_channels);
+       if (err)
+               goto unlock;
 
-       if (was_opened)
-               err = mlx5e_open_locked(dev);
+       mlx5e_switch_priv_channels(priv, &new_channels, NULL);
 
+unlock:
        mutex_unlock(&priv->state_lock);
 
        return err;
@@ -553,7 +554,7 @@ static void mlx5e_get_channels(struct net_device *dev,
        struct mlx5e_priv *priv = netdev_priv(dev);
 
        ch->max_combined   = priv->profile->max_nch(priv->mdev);
-       ch->combined_count = priv->params.num_channels;
+       ch->combined_count = priv->channels.params.num_channels;
 }
 
 static int mlx5e_set_channels(struct net_device *dev,
@@ -561,8 +562,8 @@ static int mlx5e_set_channels(struct net_device *dev,
 {
        struct mlx5e_priv *priv = netdev_priv(dev);
        unsigned int count = ch->combined_count;
+       struct mlx5e_channels new_channels = {};
        bool arfs_enabled;
-       bool was_opened;
        int err = 0;
 
        if (!count) {
@@ -571,27 +572,32 @@ static int mlx5e_set_channels(struct net_device *dev,
                return -EINVAL;
        }
 
-       if (priv->params.num_channels == count)
+       if (priv->channels.params.num_channels == count)
                return 0;
 
        mutex_lock(&priv->state_lock);
 
-       was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
-       if (was_opened)
-               mlx5e_close_locked(dev);
+       new_channels.params = priv->channels.params;
+       new_channels.params.num_channels = count;
+       mlx5e_build_default_indir_rqt(priv->mdev, new_channels.params.indirection_rqt,
+                                     MLX5E_INDIR_RQT_SIZE, count);
+
+       if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+               priv->channels.params = new_channels.params;
+               goto out;
+       }
+
+       /* Create fresh channels with new parameters */
+       err = mlx5e_open_channels(priv, &new_channels);
+       if (err)
+               goto out;
 
        arfs_enabled = dev->features & NETIF_F_NTUPLE;
        if (arfs_enabled)
                mlx5e_arfs_disable(priv);
 
-       priv->params.num_channels = count;
-       mlx5e_build_default_indir_rqt(priv->mdev, priv->params.indirection_rqt,
-                                     MLX5E_INDIR_RQT_SIZE, count);
-
-       if (was_opened)
-               err = mlx5e_open_locked(dev);
-       if (err)
-               goto out;
+       /* Switch to new channels, set new parameters and close old ones */
+       mlx5e_switch_priv_channels(priv, &new_channels, NULL);
 
        if (arfs_enabled) {
                err = mlx5e_arfs_enable(priv);
@@ -614,49 +620,24 @@ static int mlx5e_get_coalesce(struct net_device *netdev,
        if (!MLX5_CAP_GEN(priv->mdev, cq_moderation))
                return -EOPNOTSUPP;
 
-       coal->rx_coalesce_usecs       = priv->params.rx_cq_moderation.usec;
-       coal->rx_max_coalesced_frames = priv->params.rx_cq_moderation.pkts;
-       coal->tx_coalesce_usecs       = priv->params.tx_cq_moderation.usec;
-       coal->tx_max_coalesced_frames = priv->params.tx_cq_moderation.pkts;
-       coal->use_adaptive_rx_coalesce = priv->params.rx_am_enabled;
+       coal->rx_coalesce_usecs       = priv->channels.params.rx_cq_moderation.usec;
+       coal->rx_max_coalesced_frames = priv->channels.params.rx_cq_moderation.pkts;
+       coal->tx_coalesce_usecs       = priv->channels.params.tx_cq_moderation.usec;
+       coal->tx_max_coalesced_frames = priv->channels.params.tx_cq_moderation.pkts;
+       coal->use_adaptive_rx_coalesce = priv->channels.params.rx_am_enabled;
 
        return 0;
 }
 
-static int mlx5e_set_coalesce(struct net_device *netdev,
-                             struct ethtool_coalesce *coal)
+static void
+mlx5e_set_priv_channels_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal)
 {
-       struct mlx5e_priv *priv    = netdev_priv(netdev);
        struct mlx5_core_dev *mdev = priv->mdev;
-       struct mlx5e_channel *c;
-       bool restart =
-               !!coal->use_adaptive_rx_coalesce != priv->params.rx_am_enabled;
-       bool was_opened;
-       int err = 0;
        int tc;
        int i;
 
-       if (!MLX5_CAP_GEN(mdev, cq_moderation))
-               return -EOPNOTSUPP;
-
-       mutex_lock(&priv->state_lock);
-
-       was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
-       if (was_opened && restart) {
-               mlx5e_close_locked(netdev);
-               priv->params.rx_am_enabled = !!coal->use_adaptive_rx_coalesce;
-       }
-
-       priv->params.tx_cq_moderation.usec = coal->tx_coalesce_usecs;
-       priv->params.tx_cq_moderation.pkts = coal->tx_max_coalesced_frames;
-       priv->params.rx_cq_moderation.usec = coal->rx_coalesce_usecs;
-       priv->params.rx_cq_moderation.pkts = coal->rx_max_coalesced_frames;
-
-       if (!was_opened || restart)
-               goto out;
-
-       for (i = 0; i < priv->params.num_channels; ++i) {
-               c = priv->channel[i];
+       for (i = 0; i < priv->channels.num; ++i) {
+               struct mlx5e_channel *c = priv->channels.c[i];
 
                for (tc = 0; tc < c->num_tc; tc++) {
                        mlx5_core_modify_cq_moderation(mdev,
@@ -669,11 +650,50 @@ static int mlx5e_set_coalesce(struct net_device *netdev,
                                               coal->rx_coalesce_usecs,
                                               coal->rx_max_coalesced_frames);
        }
+}
 
-out:
-       if (was_opened && restart)
-               err = mlx5e_open_locked(netdev);
+static int mlx5e_set_coalesce(struct net_device *netdev,
+                             struct ethtool_coalesce *coal)
+{
+       struct mlx5e_priv *priv    = netdev_priv(netdev);
+       struct mlx5_core_dev *mdev = priv->mdev;
+       struct mlx5e_channels new_channels = {};
+       int err = 0;
+       bool reset;
+
+       if (!MLX5_CAP_GEN(mdev, cq_moderation))
+               return -EOPNOTSUPP;
+
+       mutex_lock(&priv->state_lock);
+       new_channels.params = priv->channels.params;
+
+       new_channels.params.tx_cq_moderation.usec = coal->tx_coalesce_usecs;
+       new_channels.params.tx_cq_moderation.pkts = coal->tx_max_coalesced_frames;
+       new_channels.params.rx_cq_moderation.usec = coal->rx_coalesce_usecs;
+       new_channels.params.rx_cq_moderation.pkts = coal->rx_max_coalesced_frames;
+       new_channels.params.rx_am_enabled         = !!coal->use_adaptive_rx_coalesce;
+
+       if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+               priv->channels.params = new_channels.params;
+               goto out;
+       }
+       /* we are opened */
+
+       reset = !!coal->use_adaptive_rx_coalesce != priv->channels.params.rx_am_enabled;
+       if (!reset) {
+               mlx5e_set_priv_channels_coalesce(priv, coal);
+               priv->channels.params = new_channels.params;
+               goto out;
+       }
+
+       /* open fresh channels with new coal parameters */
+       err = mlx5e_open_channels(priv, &new_channels);
+       if (err)
+               goto out;
+
+       mlx5e_switch_priv_channels(priv, &new_channels, NULL);
 
+out:
        mutex_unlock(&priv->state_lock);
        return err;
 }
@@ -968,7 +988,7 @@ static u32 mlx5e_get_rxfh_key_size(struct net_device *netdev)
 {
        struct mlx5e_priv *priv = netdev_priv(netdev);
 
-       return sizeof(priv->params.toeplitz_hash_key);
+       return sizeof(priv->channels.params.toeplitz_hash_key);
 }
 
 static u32 mlx5e_get_rxfh_indir_size(struct net_device *netdev)
@@ -982,15 +1002,15 @@ static int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
        struct mlx5e_priv *priv = netdev_priv(netdev);
 
        if (indir)
-               memcpy(indir, priv->params.indirection_rqt,
-                      sizeof(priv->params.indirection_rqt));
+               memcpy(indir, priv->channels.params.indirection_rqt,
+                      sizeof(priv->channels.params.indirection_rqt));
 
        if (key)
-               memcpy(key, priv->params.toeplitz_hash_key,
-                      sizeof(priv->params.toeplitz_hash_key));
+               memcpy(key, priv->channels.params.toeplitz_hash_key,
+                      sizeof(priv->channels.params.toeplitz_hash_key));
 
        if (hfunc)
-               *hfunc = priv->params.rss_hfunc;
+               *hfunc = priv->channels.params.rss_hfunc;
 
        return 0;
 }
@@ -1006,7 +1026,7 @@ static void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen)
 
        for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
                memset(tirc, 0, ctxlen);
-               mlx5e_build_indir_tir_ctx_hash(priv, tirc, tt);
+               mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc);
                mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in, inlen);
        }
 }
@@ -1030,25 +1050,37 @@ static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir,
 
        mutex_lock(&priv->state_lock);
 
-       if (indir) {
-               u32 rqtn = priv->indir_rqt.rqtn;
-
-               memcpy(priv->params.indirection_rqt, indir,
-                      sizeof(priv->params.indirection_rqt));
-               mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, 0);
-       }
-
        if (hfunc != ETH_RSS_HASH_NO_CHANGE &&
-           hfunc != priv->params.rss_hfunc) {
-               priv->params.rss_hfunc = hfunc;
+           hfunc != priv->channels.params.rss_hfunc) {
+               priv->channels.params.rss_hfunc = hfunc;
                hash_changed = true;
        }
 
+       if (indir) {
+               memcpy(priv->channels.params.indirection_rqt, indir,
+                      sizeof(priv->channels.params.indirection_rqt));
+
+               if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+                       u32 rqtn = priv->indir_rqt.rqtn;
+                       struct mlx5e_redirect_rqt_param rrp = {
+                               .is_rss = true,
+                               {
+                                       .rss = {
+                                               .hfunc = priv->channels.params.rss_hfunc,
+                                               .channels  = &priv->channels,
+                                       },
+                               },
+                       };
+
+                       mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, rrp);
+               }
+       }
+
        if (key) {
-               memcpy(priv->params.toeplitz_hash_key, key,
-                      sizeof(priv->params.toeplitz_hash_key));
+               memcpy(priv->channels.params.toeplitz_hash_key, key,
+                      sizeof(priv->channels.params.toeplitz_hash_key));
                hash_changed = hash_changed ||
-                              priv->params.rss_hfunc == ETH_RSS_HASH_TOP;
+                              priv->channels.params.rss_hfunc == ETH_RSS_HASH_TOP;
        }
 
        if (hash_changed)
@@ -1069,7 +1101,7 @@ static int mlx5e_get_rxnfc(struct net_device *netdev,
 
        switch (info->cmd) {
        case ETHTOOL_GRXRINGS:
-               info->data = priv->params.num_channels;
+               info->data = priv->channels.params.num_channels;
                break;
        case ETHTOOL_GRXCLSRLCNT:
                info->rule_cnt = priv->fs.ethtool.tot_num_rules;
@@ -1097,7 +1129,7 @@ static int mlx5e_get_tunable(struct net_device *dev,
 
        switch (tuna->id) {
        case ETHTOOL_TX_COPYBREAK:
-               *(u32 *)data = priv->params.tx_max_inline;
+               *(u32 *)data = priv->channels.params.tx_max_inline;
                break;
        default:
                err = -EINVAL;
@@ -1113,9 +1145,11 @@ static int mlx5e_set_tunable(struct net_device *dev,
 {
        struct mlx5e_priv *priv = netdev_priv(dev);
        struct mlx5_core_dev *mdev = priv->mdev;
-       bool was_opened;
-       u32 val;
+       struct mlx5e_channels new_channels = {};
        int err = 0;
+       u32 val;
+
+       mutex_lock(&priv->state_lock);
 
        switch (tuna->id) {
        case ETHTOOL_TX_COPYBREAK:
@@ -1125,24 +1159,26 @@ static int mlx5e_set_tunable(struct net_device *dev,
                        break;
                }
 
-               mutex_lock(&priv->state_lock);
+               new_channels.params = priv->channels.params;
+               new_channels.params.tx_max_inline = val;
 
-               was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
-               if (was_opened)
-                       mlx5e_close_locked(dev);
-
-               priv->params.tx_max_inline = val;
+               if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+                       priv->channels.params = new_channels.params;
+                       break;
+               }
 
-               if (was_opened)
-                       err = mlx5e_open_locked(dev);
+               err = mlx5e_open_channels(priv, &new_channels);
+               if (err)
+                       break;
+               mlx5e_switch_priv_channels(priv, &new_channels, NULL);
 
-               mutex_unlock(&priv->state_lock);
                break;
        default:
                err = -EINVAL;
                break;
        }
 
+       mutex_unlock(&priv->state_lock);
        return err;
 }
 
@@ -1442,15 +1478,15 @@ static int set_pflag_rx_cqe_based_moder(struct net_device *netdev, bool enable)
 {
        struct mlx5e_priv *priv = netdev_priv(netdev);
        struct mlx5_core_dev *mdev = priv->mdev;
+       struct mlx5e_channels new_channels = {};
        bool rx_mode_changed;
        u8 rx_cq_period_mode;
        int err = 0;
-       bool reset;
 
        rx_cq_period_mode = enable ?
                MLX5_CQ_PERIOD_MODE_START_FROM_CQE :
                MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
-       rx_mode_changed = rx_cq_period_mode != priv->params.rx_cq_period_mode;
+       rx_mode_changed = rx_cq_period_mode != priv->channels.params.rx_cq_period_mode;
 
        if (rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE &&
            !MLX5_CAP_GEN(mdev, cq_period_start_from_cqe))
@@ -1459,16 +1495,51 @@ static int set_pflag_rx_cqe_based_moder(struct net_device *netdev, bool enable)
        if (!rx_mode_changed)
                return 0;
 
-       reset = test_bit(MLX5E_STATE_OPENED, &priv->state);
-       if (reset)
-               mlx5e_close_locked(netdev);
+       new_channels.params = priv->channels.params;
+       mlx5e_set_rx_cq_mode_params(&new_channels.params, rx_cq_period_mode);
 
-       mlx5e_set_rx_cq_mode_params(&priv->params, rx_cq_period_mode);
+       if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+               priv->channels.params = new_channels.params;
+               return 0;
+       }
 
-       if (reset)
-               err = mlx5e_open_locked(netdev);
+       err = mlx5e_open_channels(priv, &new_channels);
+       if (err)
+               return err;
 
-       return err;
+       mlx5e_switch_priv_channels(priv, &new_channels, NULL);
+       return 0;
+}
+
+int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val)
+{
+       bool curr_val = MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS);
+       struct mlx5e_channels new_channels = {};
+       int err = 0;
+
+       if (!MLX5_CAP_GEN(priv->mdev, cqe_compression))
+               return new_val ? -EOPNOTSUPP : 0;
+
+       if (curr_val == new_val)
+               return 0;
+
+       new_channels.params = priv->channels.params;
+       MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS, new_val);
+
+       mlx5e_set_rq_type_params(priv->mdev, &new_channels.params,
+                                new_channels.params.rq_wq_type);
+
+       if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+               priv->channels.params = new_channels.params;
+               return 0;
+       }
+
+       err = mlx5e_open_channels(priv, &new_channels);
+       if (err)
+               return err;
+
+       mlx5e_switch_priv_channels(priv, &new_channels, NULL);
+       return 0;
 }
 
 static int set_pflag_rx_cqe_compress(struct net_device *netdev,
@@ -1486,8 +1557,7 @@ static int set_pflag_rx_cqe_compress(struct net_device *netdev,
        }
 
        mlx5e_modify_rx_cqe_compression_locked(priv, enable);
-       priv->params.rx_cqe_compress_def = enable;
-       mlx5e_set_rq_type_params(priv, priv->params.rq_wq_type);
+       priv->channels.params.rx_cqe_compress_def = enable;
 
        return 0;
 }
@@ -1499,7 +1569,7 @@ static int mlx5e_handle_pflag(struct net_device *netdev,
 {
        struct mlx5e_priv *priv = netdev_priv(netdev);
        bool enable = !!(wanted_flags & flag);
-       u32 changes = wanted_flags ^ priv->params.pflags;
+       u32 changes = wanted_flags ^ priv->channels.params.pflags;
        int err;
 
        if (!(changes & flag))
@@ -1512,7 +1582,7 @@ static int mlx5e_handle_pflag(struct net_device *netdev,
                return err;
        }
 
-       MLX5E_SET_PFLAG(priv, flag, enable);
+       MLX5E_SET_PFLAG(&priv->channels.params, flag, enable);
        return 0;
 }
 
@@ -1541,7 +1611,7 @@ static u32 mlx5e_get_priv_flags(struct net_device *netdev)
 {
        struct mlx5e_priv *priv = netdev_priv(netdev);
 
-       return priv->params.pflags;
+       return priv->channels.params.pflags;
 }
 
 static int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
index f2762e45c8ae2aadd5366ea467a5ce3b4edb3d7e..5376d69a6b1a7b164567fac89ef48adadf5aba48 100644 (file)
@@ -159,14 +159,10 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv,
                                 enum mlx5e_vlan_rule_type rule_type,
                                 u16 vid, struct mlx5_flow_spec *spec)
 {
-       struct mlx5_flow_act flow_act = {
-               .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
-               .flow_tag = MLX5_FS_DEFAULT_FLOW_TAG,
-               .encap_id = 0,
-       };
        struct mlx5_flow_table *ft = priv->fs.vlan.ft.t;
        struct mlx5_flow_destination dest;
        struct mlx5_flow_handle **rule_p;
+       MLX5_DECLARE_FLOW_ACT(flow_act);
        int err = 0;
 
        dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
@@ -659,11 +655,7 @@ mlx5e_generate_ttc_rule(struct mlx5e_priv *priv,
                        u16 etype,
                        u8 proto)
 {
-       struct mlx5_flow_act flow_act = {
-               .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
-               .flow_tag = MLX5_FS_DEFAULT_FLOW_TAG,
-               .encap_id = 0,
-       };
+       MLX5_DECLARE_FLOW_ACT(flow_act);
        struct mlx5_flow_handle *rule;
        struct mlx5_flow_spec *spec;
        int err = 0;
@@ -848,13 +840,9 @@ static void mlx5e_del_l2_flow_rule(struct mlx5e_priv *priv,
 static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv,
                                  struct mlx5e_l2_rule *ai, int type)
 {
-       struct mlx5_flow_act flow_act = {
-               .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
-               .flow_tag = MLX5_FS_DEFAULT_FLOW_TAG,
-               .encap_id = 0,
-       };
        struct mlx5_flow_table *ft = priv->fs.l2.ft.t;
        struct mlx5_flow_destination dest;
+       MLX5_DECLARE_FLOW_ACT(flow_act);
        struct mlx5_flow_spec *spec;
        int err = 0;
        u8 *mc_dmac;
index d55fff0ba388f746809ac601fc3863e94309fc12..e73c97fea55c8cfb9e84d878a0115fd090c0a7b4 100644 (file)
@@ -390,7 +390,7 @@ static int validate_flow(struct mlx5e_priv *priv,
        if (fs->location >= MAX_NUM_OF_ETHTOOL_RULES)
                return -EINVAL;
 
-       if (fs->ring_cookie >= priv->params.num_channels &&
+       if (fs->ring_cookie >= priv->channels.params.num_channels &&
            fs->ring_cookie != RX_CLS_FLOW_DISC)
                return -EINVAL;
 
index 8ef64c4db2c21ad6a752338cb32b054a5e5f3968..ec389b1b51cbdf6f50b0ef299aac588b52fc7e72 100644 (file)
 struct mlx5e_rq_param {
        u32                     rqc[MLX5_ST_SZ_DW(rqc)];
        struct mlx5_wq_param    wq;
-       bool                    am_enabled;
 };
 
 struct mlx5e_sq_param {
        u32                        sqc[MLX5_ST_SZ_DW(sqc)];
        struct mlx5_wq_param       wq;
-       u16                        max_inline;
-       u8                         min_inline_mode;
-       enum mlx5e_sq_type         type;
 };
 
 struct mlx5e_cq_param {
@@ -79,49 +75,47 @@ static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
                MLX5_CAP_ETH(mdev, reg_umr_sq);
 }
 
-void mlx5e_set_rq_type_params(struct mlx5e_priv *priv, u8 rq_type)
+void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev,
+                             struct mlx5e_params *params, u8 rq_type)
 {
-       priv->params.rq_wq_type = rq_type;
-       priv->params.lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
-       switch (priv->params.rq_wq_type) {
+       params->rq_wq_type = rq_type;
+       params->lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
+       switch (params->rq_wq_type) {
        case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
-               priv->params.log_rq_size = is_kdump_kernel() ?
+               params->log_rq_size = is_kdump_kernel() ?
                        MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW :
                        MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW;
-               priv->params.mpwqe_log_stride_sz =
-                       MLX5E_GET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS) ?
-                       MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(priv->mdev) :
-                       MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(priv->mdev);
-               priv->params.mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ -
-                       priv->params.mpwqe_log_stride_sz;
+               params->mpwqe_log_stride_sz =
+                       MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS) ?
+                       MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) :
+                       MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev);
+               params->mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ -
+                       params->mpwqe_log_stride_sz;
                break;
        default: /* MLX5_WQ_TYPE_LINKED_LIST */
-               priv->params.log_rq_size = is_kdump_kernel() ?
+               params->log_rq_size = is_kdump_kernel() ?
                        MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE :
                        MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
 
                /* Extra room needed for build_skb */
-               priv->params.lro_wqe_sz -= MLX5_RX_HEADROOM +
+               params->lro_wqe_sz -= MLX5_RX_HEADROOM +
                        SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
        }
-       priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type,
-                                              BIT(priv->params.log_rq_size));
 
-       mlx5_core_info(priv->mdev,
-                      "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n",
-                      priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ,
-                      BIT(priv->params.log_rq_size),
-                      BIT(priv->params.mpwqe_log_stride_sz),
-                      MLX5E_GET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS));
+       mlx5_core_info(mdev, "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n",
+                      params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ,
+                      BIT(params->log_rq_size),
+                      BIT(params->mpwqe_log_stride_sz),
+                      MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
 }
 
-static void mlx5e_set_rq_priv_params(struct mlx5e_priv *priv)
+static void mlx5e_set_rq_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
 {
-       u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(priv->mdev) &&
-                   !priv->xdp_prog ?
+       u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) &&
+                   !params->xdp_prog ?
                    MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ :
                    MLX5_WQ_TYPE_LINKED_LIST;
-       mlx5e_set_rq_type_params(priv, rq_type);
+       mlx5e_set_rq_type_params(mdev, params, rq_type);
 }
 
 static void mlx5e_update_carrier(struct mlx5e_priv *priv)
@@ -181,8 +175,10 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv)
        int i, j;
 
        memset(s, 0, sizeof(*s));
-       for (i = 0; i < priv->params.num_channels; i++) {
-               rq_stats = &priv->channel[i]->rq.stats;
+       for (i = 0; i < priv->channels.num; i++) {
+               struct mlx5e_channel *c = priv->channels.c[i];
+
+               rq_stats = &c->rq.stats;
 
                s->rx_packets   += rq_stats->packets;
                s->rx_bytes     += rq_stats->bytes;
@@ -204,8 +200,8 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv)
                s->rx_cache_empty += rq_stats->cache_empty;
                s->rx_cache_busy  += rq_stats->cache_busy;
 
-               for (j = 0; j < priv->params.num_tc; j++) {
-                       sq_stats = &priv->channel[i]->sq[j].stats;
+               for (j = 0; j < priv->channels.params.num_tc; j++) {
+                       sq_stats = &c->sq[j].stats;
 
                        s->tx_packets           += sq_stats->packets;
                        s->tx_bytes             += sq_stats->bytes;
@@ -402,8 +398,10 @@ static inline int mlx5e_get_wqe_mtt_sz(void)
                     MLX5_UMR_MTT_ALIGNMENT);
 }
 
-static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, struct mlx5e_sq *sq,
-                                      struct mlx5e_umr_wqe *wqe, u16 ix)
+static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq,
+                                      struct mlx5e_icosq *sq,
+                                      struct mlx5e_umr_wqe *wqe,
+                                      u16 ix)
 {
        struct mlx5_wqe_ctrl_seg      *cseg = &wqe->ctrl;
        struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl;
@@ -493,11 +491,10 @@ static void mlx5e_rq_free_mpwqe_info(struct mlx5e_rq *rq)
        kfree(rq->mpwqe.info);
 }
 
-static int mlx5e_create_umr_mkey(struct mlx5e_priv *priv,
+static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev,
                                 u64 npages, u8 page_shift,
                                 struct mlx5_core_mkey *umr_mkey)
 {
-       struct mlx5_core_dev *mdev = priv->mdev;
        int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
        void *mkc;
        u32 *in;
@@ -531,21 +528,20 @@ static int mlx5e_create_umr_mkey(struct mlx5e_priv *priv,
        return err;
 }
 
-static int mlx5e_create_rq_umr_mkey(struct mlx5e_rq *rq)
+static int mlx5e_create_rq_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq *rq)
 {
-       struct mlx5e_priv *priv = rq->priv;
-       u64 num_mtts = MLX5E_REQUIRED_MTTS(BIT(priv->params.log_rq_size));
+       u64 num_mtts = MLX5E_REQUIRED_MTTS(mlx5_wq_ll_get_size(&rq->wq));
 
-       return mlx5e_create_umr_mkey(priv, num_mtts, PAGE_SHIFT, &rq->umr_mkey);
+       return mlx5e_create_umr_mkey(mdev, num_mtts, PAGE_SHIFT, &rq->umr_mkey);
 }
 
-static int mlx5e_create_rq(struct mlx5e_channel *c,
-                          struct mlx5e_rq_param *param,
-                          struct mlx5e_rq *rq)
+static int mlx5e_alloc_rq(struct mlx5e_channel *c,
+                         struct mlx5e_params *params,
+                         struct mlx5e_rq_param *rqp,
+                         struct mlx5e_rq *rq)
 {
-       struct mlx5e_priv *priv = c->priv;
-       struct mlx5_core_dev *mdev = priv->mdev;
-       void *rqc = param->rqc;
+       struct mlx5_core_dev *mdev = c->mdev;
+       void *rqc = rqp->rqc;
        void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
        u32 byte_count;
        u32 frag_sz;
@@ -554,9 +550,9 @@ static int mlx5e_create_rq(struct mlx5e_channel *c,
        int err;
        int i;
 
-       param->wq.db_numa_node = cpu_to_node(c->cpu);
+       rqp->wq.db_numa_node = cpu_to_node(c->cpu);
 
-       err = mlx5_wq_ll_create(mdev, &param->wq, rqc_wq, &rq->wq,
+       err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->wq,
                                &rq->wq_ctrl);
        if (err)
                return err;
@@ -565,15 +561,15 @@ static int mlx5e_create_rq(struct mlx5e_channel *c,
 
        wq_sz = mlx5_wq_ll_get_size(&rq->wq);
 
-       rq->wq_type = priv->params.rq_wq_type;
+       rq->wq_type = params->rq_wq_type;
        rq->pdev    = c->pdev;
        rq->netdev  = c->netdev;
-       rq->tstamp  = &priv->tstamp;
+       rq->tstamp  = c->tstamp;
        rq->channel = c;
        rq->ix      = c->ix;
-       rq->priv    = c->priv;
+       rq->mdev    = mdev;
 
-       rq->xdp_prog = priv->xdp_prog ? bpf_prog_inc(priv->xdp_prog) : NULL;
+       rq->xdp_prog = params->xdp_prog ? bpf_prog_inc(params->xdp_prog) : NULL;
        if (IS_ERR(rq->xdp_prog)) {
                err = PTR_ERR(rq->xdp_prog);
                rq->xdp_prog = NULL;
@@ -588,9 +584,9 @@ static int mlx5e_create_rq(struct mlx5e_channel *c,
                rq->rx_headroom = MLX5_RX_HEADROOM;
        }
 
-       switch (priv->params.rq_wq_type) {
+       switch (rq->wq_type) {
        case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
-               if (mlx5e_is_vf_vport_rep(priv)) {
+               if (mlx5e_is_vf_vport_rep(c->priv)) {
                        err = -EINVAL;
                        goto err_rq_wq_destroy;
                }
@@ -599,13 +595,13 @@ static int mlx5e_create_rq(struct mlx5e_channel *c,
                rq->alloc_wqe = mlx5e_alloc_rx_mpwqe;
                rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe;
 
-               rq->mpwqe_stride_sz = BIT(priv->params.mpwqe_log_stride_sz);
-               rq->mpwqe_num_strides = BIT(priv->params.mpwqe_log_num_strides);
+               rq->mpwqe_stride_sz = BIT(params->mpwqe_log_stride_sz);
+               rq->mpwqe_num_strides = BIT(params->mpwqe_log_num_strides);
 
                rq->buff.wqe_sz = rq->mpwqe_stride_sz * rq->mpwqe_num_strides;
                byte_count = rq->buff.wqe_sz;
 
-               err = mlx5e_create_rq_umr_mkey(rq);
+               err = mlx5e_create_rq_umr_mkey(mdev, rq);
                if (err)
                        goto err_rq_wq_destroy;
                rq->mkey_be = cpu_to_be32(rq->umr_mkey.key);
@@ -622,7 +618,7 @@ static int mlx5e_create_rq(struct mlx5e_channel *c,
                        goto err_rq_wq_destroy;
                }
 
-               if (mlx5e_is_vf_vport_rep(priv))
+               if (mlx5e_is_vf_vport_rep(c->priv))
                        rq->handle_rx_cqe = mlx5e_handle_rx_cqe_rep;
                else
                        rq->handle_rx_cqe = mlx5e_handle_rx_cqe;
@@ -630,9 +626,9 @@ static int mlx5e_create_rq(struct mlx5e_channel *c,
                rq->alloc_wqe = mlx5e_alloc_rx_wqe;
                rq->dealloc_wqe = mlx5e_dealloc_rx_wqe;
 
-               rq->buff.wqe_sz = (priv->params.lro_en) ?
-                               priv->params.lro_wqe_sz :
-                               MLX5E_SW2HW_MTU(priv->netdev->mtu);
+               rq->buff.wqe_sz = params->lro_en  ?
+                               params->lro_wqe_sz :
+                               MLX5E_SW2HW_MTU(c->netdev->mtu);
                byte_count = rq->buff.wqe_sz;
 
                /* calc the required page order */
@@ -656,8 +652,7 @@ static int mlx5e_create_rq(struct mlx5e_channel *c,
        }
 
        INIT_WORK(&rq->am.work, mlx5e_rx_am_work);
-       rq->am.mode = priv->params.rx_cq_period_mode;
-
+       rq->am.mode = params->rx_cq_period_mode;
        rq->page_cache.head = 0;
        rq->page_cache.tail = 0;
 
@@ -674,7 +669,7 @@ err_rq_wq_destroy:
        return err;
 }
 
-static void mlx5e_destroy_rq(struct mlx5e_rq *rq)
+static void mlx5e_free_rq(struct mlx5e_rq *rq)
 {
        int i;
 
@@ -684,7 +679,7 @@ static void mlx5e_destroy_rq(struct mlx5e_rq *rq)
        switch (rq->wq_type) {
        case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
                mlx5e_rq_free_mpwqe_info(rq);
-               mlx5_core_destroy_mkey(rq->priv->mdev, &rq->umr_mkey);
+               mlx5_core_destroy_mkey(rq->mdev, &rq->umr_mkey);
                break;
        default: /* MLX5_WQ_TYPE_LINKED_LIST */
                kfree(rq->dma_info);
@@ -699,10 +694,10 @@ static void mlx5e_destroy_rq(struct mlx5e_rq *rq)
        mlx5_wq_destroy(&rq->wq_ctrl);
 }
 
-static int mlx5e_enable_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param)
+static int mlx5e_create_rq(struct mlx5e_rq *rq,
+                          struct mlx5e_rq_param *param)
 {
-       struct mlx5e_priv *priv = rq->priv;
-       struct mlx5_core_dev *mdev = priv->mdev;
+       struct mlx5_core_dev *mdev = rq->mdev;
 
        void *in;
        void *rqc;
@@ -723,7 +718,6 @@ static int mlx5e_enable_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param)
 
        MLX5_SET(rqc,  rqc, cqn,                rq->cq.mcq.cqn);
        MLX5_SET(rqc,  rqc, state,              MLX5_RQC_STATE_RST);
-       MLX5_SET(rqc,  rqc, vsd, priv->params.vlan_strip_disable);
        MLX5_SET(wq,   wq,  log_wq_pg_sz,       rq->wq_ctrl.buf.page_shift -
                                                MLX5_ADAPTER_PAGE_SHIFT);
        MLX5_SET64(wq, wq,  dbr_addr,           rq->wq_ctrl.db.dma);
@@ -742,8 +736,7 @@ static int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state,
                                 int next_state)
 {
        struct mlx5e_channel *c = rq->channel;
-       struct mlx5e_priv *priv = c->priv;
-       struct mlx5_core_dev *mdev = priv->mdev;
+       struct mlx5_core_dev *mdev = c->mdev;
 
        void *in;
        void *rqc;
@@ -770,9 +763,7 @@ static int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state,
 static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd)
 {
        struct mlx5e_channel *c = rq->channel;
-       struct mlx5e_priv *priv = c->priv;
-       struct mlx5_core_dev *mdev = priv->mdev;
-
+       struct mlx5_core_dev *mdev = c->mdev;
        void *in;
        void *rqc;
        int inlen;
@@ -798,25 +789,28 @@ static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd)
        return err;
 }
 
-static void mlx5e_disable_rq(struct mlx5e_rq *rq)
+static void mlx5e_destroy_rq(struct mlx5e_rq *rq)
 {
-       mlx5_core_destroy_rq(rq->priv->mdev, rq->rqn);
+       mlx5_core_destroy_rq(rq->mdev, rq->rqn);
 }
 
 static int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq)
 {
        unsigned long exp_time = jiffies + msecs_to_jiffies(20000);
        struct mlx5e_channel *c = rq->channel;
-       struct mlx5e_priv *priv = c->priv;
+
        struct mlx5_wq_ll *wq = &rq->wq;
+       u16 min_wqes = mlx5_min_rx_wqes(rq->wq_type, mlx5_wq_ll_get_size(wq));
 
        while (time_before(jiffies, exp_time)) {
-               if (wq->cur_sz >= priv->params.min_rx_wqes)
+               if (wq->cur_sz >= min_wqes)
                        return 0;
 
                msleep(20);
        }
 
+       netdev_warn(c->netdev, "Failed to get min RX wqes on RQN[0x%x] wq cur_sz(%d) min_rx_wqes(%d)\n",
+                   rq->rqn, wq->cur_sz, min_wqes);
        return -ETIMEDOUT;
 }
 
@@ -842,83 +836,128 @@ static void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
 }
 
 static int mlx5e_open_rq(struct mlx5e_channel *c,
+                        struct mlx5e_params *params,
                         struct mlx5e_rq_param *param,
                         struct mlx5e_rq *rq)
 {
-       struct mlx5e_sq *sq = &c->icosq;
-       u16 pi = sq->pc & sq->wq.sz_m1;
        int err;
 
-       err = mlx5e_create_rq(c, param, rq);
+       err = mlx5e_alloc_rq(c, params, param, rq);
        if (err)
                return err;
 
-       err = mlx5e_enable_rq(rq, param);
+       err = mlx5e_create_rq(rq, param);
        if (err)
-               goto err_destroy_rq;
+               goto err_free_rq;
 
-       set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state);
        err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
        if (err)
-               goto err_disable_rq;
+               goto err_destroy_rq;
 
-       if (param->am_enabled)
+       if (params->rx_am_enabled)
                set_bit(MLX5E_RQ_STATE_AM, &c->rq.state);
 
-       sq->db.ico_wqe[pi].opcode     = MLX5_OPCODE_NOP;
-       sq->db.ico_wqe[pi].num_wqebbs = 1;
-       mlx5e_send_nop(sq, true); /* trigger mlx5e_post_rx_wqes() */
-
        return 0;
 
-err_disable_rq:
-       clear_bit(MLX5E_RQ_STATE_ENABLED, &rq->state);
-       mlx5e_disable_rq(rq);
 err_destroy_rq:
        mlx5e_destroy_rq(rq);
+err_free_rq:
+       mlx5e_free_rq(rq);
 
        return err;
 }
 
-static void mlx5e_close_rq(struct mlx5e_rq *rq)
+static void mlx5e_activate_rq(struct mlx5e_rq *rq)
+{
+       struct mlx5e_icosq *sq = &rq->channel->icosq;
+       u16 pi = sq->pc & sq->wq.sz_m1;
+       struct mlx5e_tx_wqe *nopwqe;
+
+       set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state);
+       sq->db.ico_wqe[pi].opcode     = MLX5_OPCODE_NOP;
+       sq->db.ico_wqe[pi].num_wqebbs = 1;
+       nopwqe = mlx5e_post_nop(&sq->wq, sq->sqn, &sq->pc);
+       mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, &nopwqe->ctrl);
+}
+
+static void mlx5e_deactivate_rq(struct mlx5e_rq *rq)
 {
        clear_bit(MLX5E_RQ_STATE_ENABLED, &rq->state);
        napi_synchronize(&rq->channel->napi); /* prevent mlx5e_post_rx_wqes */
-       cancel_work_sync(&rq->am.work);
+}
 
-       mlx5e_disable_rq(rq);
-       mlx5e_free_rx_descs(rq);
+static void mlx5e_close_rq(struct mlx5e_rq *rq)
+{
+       cancel_work_sync(&rq->am.work);
        mlx5e_destroy_rq(rq);
+       mlx5e_free_rx_descs(rq);
+       mlx5e_free_rq(rq);
 }
 
-static void mlx5e_free_sq_xdp_db(struct mlx5e_sq *sq)
+static void mlx5e_free_xdpsq_db(struct mlx5e_xdpsq *sq)
 {
-       kfree(sq->db.xdp.di);
-       kfree(sq->db.xdp.wqe_info);
+       kfree(sq->db.di);
 }
 
-static int mlx5e_alloc_sq_xdp_db(struct mlx5e_sq *sq, int numa)
+static int mlx5e_alloc_xdpsq_db(struct mlx5e_xdpsq *sq, int numa)
 {
        int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
 
-       sq->db.xdp.di = kzalloc_node(sizeof(*sq->db.xdp.di) * wq_sz,
+       sq->db.di = kzalloc_node(sizeof(*sq->db.di) * wq_sz,
                                     GFP_KERNEL, numa);
-       sq->db.xdp.wqe_info = kzalloc_node(sizeof(*sq->db.xdp.wqe_info) * wq_sz,
-                                          GFP_KERNEL, numa);
-       if (!sq->db.xdp.di || !sq->db.xdp.wqe_info) {
-               mlx5e_free_sq_xdp_db(sq);
+       if (!sq->db.di) {
+               mlx5e_free_xdpsq_db(sq);
                return -ENOMEM;
        }
 
        return 0;
 }
 
-static void mlx5e_free_sq_ico_db(struct mlx5e_sq *sq)
+static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
+                            struct mlx5e_params *params,
+                            struct mlx5e_sq_param *param,
+                            struct mlx5e_xdpsq *sq)
+{
+       void *sqc_wq               = MLX5_ADDR_OF(sqc, param->sqc, wq);
+       struct mlx5_core_dev *mdev = c->mdev;
+       int err;
+
+       sq->pdev      = c->pdev;
+       sq->mkey_be   = c->mkey_be;
+       sq->channel   = c;
+       sq->uar_map   = mdev->mlx5e_res.bfreg.map;
+       sq->min_inline_mode = params->tx_min_inline_mode;
+
+       param->wq.db_numa_node = cpu_to_node(c->cpu);
+       err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
+       if (err)
+               return err;
+       sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
+
+       err = mlx5e_alloc_xdpsq_db(sq, cpu_to_node(c->cpu));
+       if (err)
+               goto err_sq_wq_destroy;
+
+       return 0;
+
+err_sq_wq_destroy:
+       mlx5_wq_destroy(&sq->wq_ctrl);
+
+       return err;
+}
+
+static void mlx5e_free_xdpsq(struct mlx5e_xdpsq *sq)
+{
+       mlx5e_free_xdpsq_db(sq);
+       mlx5_wq_destroy(&sq->wq_ctrl);
+}
+
+static void mlx5e_free_icosq_db(struct mlx5e_icosq *sq)
 {
        kfree(sq->db.ico_wqe);
 }
 
-static int mlx5e_alloc_sq_ico_db(struct mlx5e_sq *sq, int numa)
+static int mlx5e_alloc_icosq_db(struct mlx5e_icosq *sq, int numa)
 {
        u8 wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
 
@@ -930,155 +969,131 @@ static int mlx5e_alloc_sq_ico_db(struct mlx5e_sq *sq, int numa)
        return 0;
 }
 
-static void mlx5e_free_sq_txq_db(struct mlx5e_sq *sq)
+static int mlx5e_alloc_icosq(struct mlx5e_channel *c,
+                            struct mlx5e_sq_param *param,
+                            struct mlx5e_icosq *sq)
 {
-       kfree(sq->db.txq.wqe_info);
-       kfree(sq->db.txq.dma_fifo);
-       kfree(sq->db.txq.skb);
-}
+       void *sqc_wq               = MLX5_ADDR_OF(sqc, param->sqc, wq);
+       struct mlx5_core_dev *mdev = c->mdev;
+       int err;
 
-static int mlx5e_alloc_sq_txq_db(struct mlx5e_sq *sq, int numa)
-{
-       int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
-       int df_sz = wq_sz * MLX5_SEND_WQEBB_NUM_DS;
+       sq->pdev      = c->pdev;
+       sq->mkey_be   = c->mkey_be;
+       sq->channel   = c;
+       sq->uar_map   = mdev->mlx5e_res.bfreg.map;
 
-       sq->db.txq.skb = kzalloc_node(wq_sz * sizeof(*sq->db.txq.skb),
-                                     GFP_KERNEL, numa);
-       sq->db.txq.dma_fifo = kzalloc_node(df_sz * sizeof(*sq->db.txq.dma_fifo),
-                                          GFP_KERNEL, numa);
-       sq->db.txq.wqe_info = kzalloc_node(wq_sz * sizeof(*sq->db.txq.wqe_info),
-                                          GFP_KERNEL, numa);
-       if (!sq->db.txq.skb || !sq->db.txq.dma_fifo || !sq->db.txq.wqe_info) {
-               mlx5e_free_sq_txq_db(sq);
-               return -ENOMEM;
-       }
+       param->wq.db_numa_node = cpu_to_node(c->cpu);
+       err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
+       if (err)
+               return err;
+       sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
 
-       sq->dma_fifo_mask = df_sz - 1;
+       err = mlx5e_alloc_icosq_db(sq, cpu_to_node(c->cpu));
+       if (err)
+               goto err_sq_wq_destroy;
+
+       sq->edge = (sq->wq.sz_m1 + 1) - MLX5E_ICOSQ_MAX_WQEBBS;
 
        return 0;
+
+err_sq_wq_destroy:
+       mlx5_wq_destroy(&sq->wq_ctrl);
+
+       return err;
 }
 
-static void mlx5e_free_sq_db(struct mlx5e_sq *sq)
+static void mlx5e_free_icosq(struct mlx5e_icosq *sq)
 {
-       switch (sq->type) {
-       case MLX5E_SQ_TXQ:
-               mlx5e_free_sq_txq_db(sq);
-               break;
-       case MLX5E_SQ_ICO:
-               mlx5e_free_sq_ico_db(sq);
-               break;
-       case MLX5E_SQ_XDP:
-               mlx5e_free_sq_xdp_db(sq);
-               break;
-       }
+       mlx5e_free_icosq_db(sq);
+       mlx5_wq_destroy(&sq->wq_ctrl);
 }
 
-static int mlx5e_alloc_sq_db(struct mlx5e_sq *sq, int numa)
+static void mlx5e_free_txqsq_db(struct mlx5e_txqsq *sq)
 {
-       switch (sq->type) {
-       case MLX5E_SQ_TXQ:
-               return mlx5e_alloc_sq_txq_db(sq, numa);
-       case MLX5E_SQ_ICO:
-               return mlx5e_alloc_sq_ico_db(sq, numa);
-       case MLX5E_SQ_XDP:
-               return mlx5e_alloc_sq_xdp_db(sq, numa);
-       }
-
-       return 0;
+       kfree(sq->db.wqe_info);
+       kfree(sq->db.dma_fifo);
+       kfree(sq->db.skb);
 }
 
-static int mlx5e_sq_get_max_wqebbs(u8 sq_type)
+static int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa)
 {
-       switch (sq_type) {
-       case MLX5E_SQ_ICO:
-               return MLX5E_ICOSQ_MAX_WQEBBS;
-       case MLX5E_SQ_XDP:
-               return MLX5E_XDP_TX_WQEBBS;
+       int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
+       int df_sz = wq_sz * MLX5_SEND_WQEBB_NUM_DS;
+
+       sq->db.skb = kzalloc_node(wq_sz * sizeof(*sq->db.skb),
+                                     GFP_KERNEL, numa);
+       sq->db.dma_fifo = kzalloc_node(df_sz * sizeof(*sq->db.dma_fifo),
+                                          GFP_KERNEL, numa);
+       sq->db.wqe_info = kzalloc_node(wq_sz * sizeof(*sq->db.wqe_info),
+                                          GFP_KERNEL, numa);
+       if (!sq->db.skb || !sq->db.dma_fifo || !sq->db.wqe_info) {
+               mlx5e_free_txqsq_db(sq);
+               return -ENOMEM;
        }
-       return MLX5_SEND_WQE_MAX_WQEBBS;
+
+       sq->dma_fifo_mask = df_sz - 1;
+
+       return 0;
 }
 
-static int mlx5e_create_sq(struct mlx5e_channel *c,
-                          int tc,
-                          struct mlx5e_sq_param *param,
-                          struct mlx5e_sq *sq)
+static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
+                            int txq_ix,
+                            struct mlx5e_params *params,
+                            struct mlx5e_sq_param *param,
+                            struct mlx5e_txqsq *sq)
 {
-       struct mlx5e_priv *priv = c->priv;
-       struct mlx5_core_dev *mdev = priv->mdev;
-
-       void *sqc = param->sqc;
-       void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq);
+       void *sqc_wq               = MLX5_ADDR_OF(sqc, param->sqc, wq);
+       struct mlx5_core_dev *mdev = c->mdev;
        int err;
 
-       sq->type      = param->type;
        sq->pdev      = c->pdev;
-       sq->tstamp    = &priv->tstamp;
+       sq->tstamp    = c->tstamp;
        sq->mkey_be   = c->mkey_be;
        sq->channel   = c;
-       sq->tc        = tc;
+       sq->txq_ix    = txq_ix;
+       sq->uar_map   = mdev->mlx5e_res.bfreg.map;
+       sq->max_inline      = params->tx_max_inline;
+       sq->min_inline_mode = params->tx_min_inline_mode;
 
-       err = mlx5_alloc_bfreg(mdev, &sq->bfreg, MLX5_CAP_GEN(mdev, bf), false);
-       if (err)
-               return err;
-
-       sq->uar_map = sq->bfreg.map;
        param->wq.db_numa_node = cpu_to_node(c->cpu);
-
-       err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq,
-                                &sq->wq_ctrl);
+       err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
        if (err)
-               goto err_unmap_free_uar;
-
-       sq->wq.db       = &sq->wq.db[MLX5_SND_DBR];
-       if (sq->bfreg.wc)
-               set_bit(MLX5E_SQ_STATE_BF_ENABLE, &sq->state);
-
-       sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2;
-       sq->max_inline  = param->max_inline;
-       sq->min_inline_mode = param->min_inline_mode;
+               return err;
+       sq->wq.db    = &sq->wq.db[MLX5_SND_DBR];
 
-       err = mlx5e_alloc_sq_db(sq, cpu_to_node(c->cpu));
+       err = mlx5e_alloc_txqsq_db(sq, cpu_to_node(c->cpu));
        if (err)
                goto err_sq_wq_destroy;
 
-       if (sq->type == MLX5E_SQ_TXQ) {
-               int txq_ix;
-
-               txq_ix = c->ix + tc * priv->params.num_channels;
-               sq->txq = netdev_get_tx_queue(priv->netdev, txq_ix);
-               priv->txq_to_sq_map[txq_ix] = sq;
-       }
-
-       sq->edge = (sq->wq.sz_m1 + 1) - mlx5e_sq_get_max_wqebbs(sq->type);
-       sq->bf_budget = MLX5E_SQ_BF_BUDGET;
+       sq->edge = (sq->wq.sz_m1 + 1) - MLX5_SEND_WQE_MAX_WQEBBS;
 
        return 0;
 
 err_sq_wq_destroy:
        mlx5_wq_destroy(&sq->wq_ctrl);
 
-err_unmap_free_uar:
-       mlx5_free_bfreg(mdev, &sq->bfreg);
-
        return err;
 }
 
-static void mlx5e_destroy_sq(struct mlx5e_sq *sq)
+static void mlx5e_free_txqsq(struct mlx5e_txqsq *sq)
 {
-       struct mlx5e_channel *c = sq->channel;
-       struct mlx5e_priv *priv = c->priv;
-
-       mlx5e_free_sq_db(sq);
+       mlx5e_free_txqsq_db(sq);
        mlx5_wq_destroy(&sq->wq_ctrl);
-       mlx5_free_bfreg(priv->mdev, &sq->bfreg);
 }
 
-static int mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param)
-{
-       struct mlx5e_channel *c = sq->channel;
-       struct mlx5e_priv *priv = c->priv;
-       struct mlx5_core_dev *mdev = priv->mdev;
+struct mlx5e_create_sq_param {
+       struct mlx5_wq_ctrl        *wq_ctrl;
+       u32                         cqn;
+       u32                         tisn;
+       u8                          tis_lst_sz;
+       u8                          min_inline_mode;
+};
 
+static int mlx5e_create_sq(struct mlx5_core_dev *mdev,
+                          struct mlx5e_sq_param *param,
+                          struct mlx5e_create_sq_param *csp,
+                          u32 *sqn)
+{
        void *in;
        void *sqc;
        void *wq;
@@ -1086,7 +1101,7 @@ static int mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param)
        int err;
 
        inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
-               sizeof(u64) * sq->wq_ctrl.buf.npages;
+               sizeof(u64) * csp->wq_ctrl->buf.npages;
        in = mlx5_vzalloc(inlen);
        if (!in)
                return -ENOMEM;
@@ -1095,40 +1110,40 @@ static int mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param)
        wq = MLX5_ADDR_OF(sqc, sqc, wq);
 
        memcpy(sqc, param->sqc, sizeof(param->sqc));
-
-       MLX5_SET(sqc,  sqc, tis_num_0, param->type == MLX5E_SQ_ICO ?
-                                      0 : priv->tisn[sq->tc]);
-       MLX5_SET(sqc,  sqc, cqn,                sq->cq.mcq.cqn);
+       MLX5_SET(sqc,  sqc, tis_lst_sz, csp->tis_lst_sz);
+       MLX5_SET(sqc,  sqc, tis_num_0, csp->tisn);
+       MLX5_SET(sqc,  sqc, cqn, csp->cqn);
 
        if (MLX5_CAP_ETH(mdev, wqe_inline_mode) == MLX5_CAP_INLINE_MODE_VPORT_CONTEXT)
-               MLX5_SET(sqc,  sqc, min_wqe_inline_mode, sq->min_inline_mode);
+               MLX5_SET(sqc,  sqc, min_wqe_inline_mode, csp->min_inline_mode);
 
-       MLX5_SET(sqc,  sqc, state,              MLX5_SQC_STATE_RST);
-       MLX5_SET(sqc,  sqc, tis_lst_sz, param->type == MLX5E_SQ_ICO ? 0 : 1);
+       MLX5_SET(sqc,  sqc, state, MLX5_SQC_STATE_RST);
 
        MLX5_SET(wq,   wq, wq_type,       MLX5_WQ_TYPE_CYCLIC);
-       MLX5_SET(wq,   wq, uar_page,      sq->bfreg.index);
-       MLX5_SET(wq,   wq, log_wq_pg_sz,  sq->wq_ctrl.buf.page_shift -
+       MLX5_SET(wq,   wq, uar_page,      mdev->mlx5e_res.bfreg.index);
+       MLX5_SET(wq,   wq, log_wq_pg_sz,  csp->wq_ctrl->buf.page_shift -
                                          MLX5_ADAPTER_PAGE_SHIFT);
-       MLX5_SET64(wq, wq, dbr_addr,      sq->wq_ctrl.db.dma);
+       MLX5_SET64(wq, wq, dbr_addr,      csp->wq_ctrl->db.dma);
 
-       mlx5_fill_page_array(&sq->wq_ctrl.buf,
-                            (__be64 *)MLX5_ADDR_OF(wq, wq, pas));
+       mlx5_fill_page_array(&csp->wq_ctrl->buf, (__be64 *)MLX5_ADDR_OF(wq, wq, pas));
 
-       err = mlx5_core_create_sq(mdev, in, inlen, &sq->sqn);
+       err = mlx5_core_create_sq(mdev, in, inlen, sqn);
 
        kvfree(in);
 
        return err;
 }
 
-static int mlx5e_modify_sq(struct mlx5e_sq *sq, int curr_state,
-                          int next_state, bool update_rl, int rl_index)
-{
-       struct mlx5e_channel *c = sq->channel;
-       struct mlx5e_priv *priv = c->priv;
-       struct mlx5_core_dev *mdev = priv->mdev;
+struct mlx5e_modify_sq_param {
+       int curr_state;
+       int next_state;
+       bool rl_update;
+       int rl_index;
+};
 
+static int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn,
+                          struct mlx5e_modify_sq_param *p)
+{
        void *in;
        void *sqc;
        int inlen;
@@ -1141,68 +1156,94 @@ static int mlx5e_modify_sq(struct mlx5e_sq *sq, int curr_state,
 
        sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
 
-       MLX5_SET(modify_sq_in, in, sq_state, curr_state);
-       MLX5_SET(sqc, sqc, state, next_state);
-       if (update_rl && next_state == MLX5_SQC_STATE_RDY) {
+       MLX5_SET(modify_sq_in, in, sq_state, p->curr_state);
+       MLX5_SET(sqc, sqc, state, p->next_state);
+       if (p->rl_update && p->next_state == MLX5_SQC_STATE_RDY) {
                MLX5_SET64(modify_sq_in, in, modify_bitmask, 1);
-               MLX5_SET(sqc,  sqc, packet_pacing_rate_limit_index, rl_index);
+               MLX5_SET(sqc,  sqc, packet_pacing_rate_limit_index, p->rl_index);
        }
 
-       err = mlx5_core_modify_sq(mdev, sq->sqn, in, inlen);
+       err = mlx5_core_modify_sq(mdev, sqn, in, inlen);
 
        kvfree(in);
 
        return err;
 }
 
-static void mlx5e_disable_sq(struct mlx5e_sq *sq)
+static void mlx5e_destroy_sq(struct mlx5_core_dev *mdev, u32 sqn)
 {
-       struct mlx5e_channel *c = sq->channel;
-       struct mlx5e_priv *priv = c->priv;
-       struct mlx5_core_dev *mdev = priv->mdev;
-
-       mlx5_core_destroy_sq(mdev, sq->sqn);
-       if (sq->rate_limit)
-               mlx5_rl_remove_rate(mdev, sq->rate_limit);
+       mlx5_core_destroy_sq(mdev, sqn);
 }
 
-static int mlx5e_open_sq(struct mlx5e_channel *c,
-                        int tc,
-                        struct mlx5e_sq_param *param,
-                        struct mlx5e_sq *sq)
+static int mlx5e_create_sq_rdy(struct mlx5_core_dev *mdev,
+                              struct mlx5e_sq_param *param,
+                              struct mlx5e_create_sq_param *csp,
+                              u32 *sqn)
 {
+       struct mlx5e_modify_sq_param msp = {0};
        int err;
 
-       err = mlx5e_create_sq(c, tc, param, sq);
+       err = mlx5e_create_sq(mdev, param, csp, sqn);
        if (err)
                return err;
 
-       err = mlx5e_enable_sq(sq, param);
+       msp.curr_state = MLX5_SQC_STATE_RST;
+       msp.next_state = MLX5_SQC_STATE_RDY;
+       err = mlx5e_modify_sq(mdev, *sqn, &msp);
        if (err)
-               goto err_destroy_sq;
+               mlx5e_destroy_sq(mdev, *sqn);
 
-       set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
-       err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY,
-                             false, 0);
+       return err;
+}
+
+static int mlx5e_set_sq_maxrate(struct net_device *dev,
+                               struct mlx5e_txqsq *sq, u32 rate);
+
+static int mlx5e_open_txqsq(struct mlx5e_channel *c,
+                           u32 tisn,
+                           int txq_ix,
+                           struct mlx5e_params *params,
+                           struct mlx5e_sq_param *param,
+                           struct mlx5e_txqsq *sq)
+{
+       struct mlx5e_create_sq_param csp = {};
+       u32 tx_rate;
+       int err;
+
+       err = mlx5e_alloc_txqsq(c, txq_ix, params, param, sq);
        if (err)
-               goto err_disable_sq;
+               return err;
 
-       if (sq->txq) {
-               netdev_tx_reset_queue(sq->txq);
-               netif_tx_start_queue(sq->txq);
-       }
+       csp.tisn            = tisn;
+       csp.tis_lst_sz      = 1;
+       csp.cqn             = sq->cq.mcq.cqn;
+       csp.wq_ctrl         = &sq->wq_ctrl;
+       csp.min_inline_mode = sq->min_inline_mode;
+       err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn);
+       if (err)
+               goto err_free_txqsq;
+
+       tx_rate = c->priv->tx_rates[sq->txq_ix];
+       if (tx_rate)
+               mlx5e_set_sq_maxrate(c->netdev, sq, tx_rate);
 
        return 0;
 
-err_disable_sq:
+err_free_txqsq:
        clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
-       mlx5e_disable_sq(sq);
-err_destroy_sq:
-       mlx5e_destroy_sq(sq);
+       mlx5e_free_txqsq(sq);
 
        return err;
 }
 
+static void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq)
+{
+       sq->txq = netdev_get_tx_queue(sq->channel->netdev, sq->txq_ix);
+       set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
+       netdev_tx_reset_queue(sq->txq);
+       netif_tx_start_queue(sq->txq);
+}
+
 static inline void netif_tx_disable_queue(struct netdev_queue *txq)
 {
        __netif_tx_lock_bh(txq);
@@ -1210,33 +1251,148 @@ static inline void netif_tx_disable_queue(struct netdev_queue *txq)
        __netif_tx_unlock_bh(txq);
 }
 
-static void mlx5e_close_sq(struct mlx5e_sq *sq)
+static void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq)
 {
+       struct mlx5e_channel *c = sq->channel;
+
        clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
        /* prevent netif_tx_wake_queue */
-       napi_synchronize(&sq->channel->napi);
+       napi_synchronize(&c->napi);
 
-       if (sq->txq) {
-               netif_tx_disable_queue(sq->txq);
+       netif_tx_disable_queue(sq->txq);
 
-               /* last doorbell out, godspeed .. */
-               if (mlx5e_sq_has_room_for(sq, 1)) {
-                       sq->db.txq.skb[(sq->pc & sq->wq.sz_m1)] = NULL;
-                       mlx5e_send_nop(sq, true);
-               }
+       /* last doorbell out, godspeed .. */
+       if (mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, 1)) {
+               struct mlx5e_tx_wqe *nop;
+
+               sq->db.skb[(sq->pc & sq->wq.sz_m1)] = NULL;
+               nop = mlx5e_post_nop(&sq->wq, sq->sqn, &sq->pc);
+               mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, &nop->ctrl);
        }
+}
+
+static void mlx5e_close_txqsq(struct mlx5e_txqsq *sq)
+{
+       struct mlx5e_channel *c = sq->channel;
+       struct mlx5_core_dev *mdev = c->mdev;
 
-       mlx5e_disable_sq(sq);
-       mlx5e_free_sq_descs(sq);
-       mlx5e_destroy_sq(sq);
+       mlx5e_destroy_sq(mdev, sq->sqn);
+       if (sq->rate_limit)
+               mlx5_rl_remove_rate(mdev, sq->rate_limit);
+       mlx5e_free_txqsq_descs(sq);
+       mlx5e_free_txqsq(sq);
 }
 
-static int mlx5e_create_cq(struct mlx5e_channel *c,
-                          struct mlx5e_cq_param *param,
-                          struct mlx5e_cq *cq)
+static int mlx5e_open_icosq(struct mlx5e_channel *c,
+                           struct mlx5e_params *params,
+                           struct mlx5e_sq_param *param,
+                           struct mlx5e_icosq *sq)
 {
-       struct mlx5e_priv *priv = c->priv;
-       struct mlx5_core_dev *mdev = priv->mdev;
+       struct mlx5e_create_sq_param csp = {};
+       int err;
+
+       err = mlx5e_alloc_icosq(c, param, sq);
+       if (err)
+               return err;
+
+       csp.cqn             = sq->cq.mcq.cqn;
+       csp.wq_ctrl         = &sq->wq_ctrl;
+       csp.min_inline_mode = params->tx_min_inline_mode;
+       set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
+       err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn);
+       if (err)
+               goto err_free_icosq;
+
+       return 0;
+
+err_free_icosq:
+       clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
+       mlx5e_free_icosq(sq);
+
+       return err;
+}
+
+static void mlx5e_close_icosq(struct mlx5e_icosq *sq)
+{
+       struct mlx5e_channel *c = sq->channel;
+
+       clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
+       napi_synchronize(&c->napi);
+
+       mlx5e_destroy_sq(c->mdev, sq->sqn);
+       mlx5e_free_icosq(sq);
+}
+
+static int mlx5e_open_xdpsq(struct mlx5e_channel *c,
+                           struct mlx5e_params *params,
+                           struct mlx5e_sq_param *param,
+                           struct mlx5e_xdpsq *sq)
+{
+       unsigned int ds_cnt = MLX5E_XDP_TX_DS_COUNT;
+       struct mlx5e_create_sq_param csp = {};
+       unsigned int inline_hdr_sz = 0;
+       int err;
+       int i;
+
+       err = mlx5e_alloc_xdpsq(c, params, param, sq);
+       if (err)
+               return err;
+
+       csp.tis_lst_sz      = 1;
+       csp.tisn            = c->priv->tisn[0]; /* tc = 0 */
+       csp.cqn             = sq->cq.mcq.cqn;
+       csp.wq_ctrl         = &sq->wq_ctrl;
+       csp.min_inline_mode = sq->min_inline_mode;
+       set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
+       err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn);
+       if (err)
+               goto err_free_xdpsq;
+
+       if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
+               inline_hdr_sz = MLX5E_XDP_MIN_INLINE;
+               ds_cnt++;
+       }
+
+       /* Pre initialize fixed WQE fields */
+       for (i = 0; i < mlx5_wq_cyc_get_size(&sq->wq); i++) {
+               struct mlx5e_tx_wqe      *wqe  = mlx5_wq_cyc_get_wqe(&sq->wq, i);
+               struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
+               struct mlx5_wqe_eth_seg  *eseg = &wqe->eth;
+               struct mlx5_wqe_data_seg *dseg;
+
+               cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
+               eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz);
+
+               dseg = (struct mlx5_wqe_data_seg *)cseg + (ds_cnt - 1);
+               dseg->lkey = sq->mkey_be;
+       }
+
+       return 0;
+
+err_free_xdpsq:
+       clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
+       mlx5e_free_xdpsq(sq);
+
+       return err;
+}
+
+static void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq)
+{
+       struct mlx5e_channel *c = sq->channel;
+
+       clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
+       napi_synchronize(&c->napi);
+
+       mlx5e_destroy_sq(c->mdev, sq->sqn);
+       mlx5e_free_xdpsq_descs(sq);
+       mlx5e_free_xdpsq(sq);
+}
+
+static int mlx5e_alloc_cq(struct mlx5e_channel *c,
+                         struct mlx5e_cq_param *param,
+                         struct mlx5e_cq *cq)
+{
+       struct mlx5_core_dev *mdev = c->mdev;
        struct mlx5_core_cq *mcq = &cq->mcq;
        int eqn_not_used;
        unsigned int irqn;
@@ -1273,20 +1429,19 @@ static int mlx5e_create_cq(struct mlx5e_channel *c,
        }
 
        cq->channel = c;
-       cq->priv = priv;
+       cq->mdev = mdev;
 
        return 0;
 }
 
-static void mlx5e_destroy_cq(struct mlx5e_cq *cq)
+static void mlx5e_free_cq(struct mlx5e_cq *cq)
 {
        mlx5_cqwq_destroy(&cq->wq_ctrl);
 }
 
-static int mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param)
+static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param)
 {
-       struct mlx5e_priv *priv = cq->priv;
-       struct mlx5_core_dev *mdev = priv->mdev;
+       struct mlx5_core_dev *mdev = cq->mdev;
        struct mlx5_core_cq *mcq = &cq->mcq;
 
        void *in;
@@ -1330,47 +1485,41 @@ static int mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param)
        return 0;
 }
 
-static void mlx5e_disable_cq(struct mlx5e_cq *cq)
+static void mlx5e_destroy_cq(struct mlx5e_cq *cq)
 {
-       struct mlx5e_priv *priv = cq->priv;
-       struct mlx5_core_dev *mdev = priv->mdev;
-
-       mlx5_core_destroy_cq(mdev, &cq->mcq);
+       mlx5_core_destroy_cq(cq->mdev, &cq->mcq);
 }
 
 static int mlx5e_open_cq(struct mlx5e_channel *c,
+                        struct mlx5e_cq_moder moder,
                         struct mlx5e_cq_param *param,
-                        struct mlx5e_cq *cq,
-                        struct mlx5e_cq_moder moderation)
+                        struct mlx5e_cq *cq)
 {
+       struct mlx5_core_dev *mdev = c->mdev;
        int err;
-       struct mlx5e_priv *priv = c->priv;
-       struct mlx5_core_dev *mdev = priv->mdev;
 
-       err = mlx5e_create_cq(c, param, cq);
+       err = mlx5e_alloc_cq(c, param, cq);
        if (err)
                return err;
 
-       err = mlx5e_enable_cq(cq, param);
+       err = mlx5e_create_cq(cq, param);
        if (err)
-               goto err_destroy_cq;
+               goto err_free_cq;
 
        if (MLX5_CAP_GEN(mdev, cq_moderation))
-               mlx5_core_modify_cq_moderation(mdev, &cq->mcq,
-                                              moderation.usec,
-                                              moderation.pkts);
+               mlx5_core_modify_cq_moderation(mdev, &cq->mcq, moder.usec, moder.pkts);
        return 0;
 
-err_destroy_cq:
-       mlx5e_destroy_cq(cq);
+err_free_cq:
+       mlx5e_free_cq(cq);
 
        return err;
 }
 
 static void mlx5e_close_cq(struct mlx5e_cq *cq)
 {
-       mlx5e_disable_cq(cq);
        mlx5e_destroy_cq(cq);
+       mlx5e_free_cq(cq);
 }
 
 static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix)
@@ -1379,15 +1528,15 @@ static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix)
 }
 
 static int mlx5e_open_tx_cqs(struct mlx5e_channel *c,
+                            struct mlx5e_params *params,
                             struct mlx5e_channel_param *cparam)
 {
-       struct mlx5e_priv *priv = c->priv;
        int err;
        int tc;
 
        for (tc = 0; tc < c->num_tc; tc++) {
-               err = mlx5e_open_cq(c, &cparam->tx_cq, &c->sq[tc].cq,
-                                   priv->params.tx_cq_moderation);
+               err = mlx5e_open_cq(c, params->tx_cq_moderation,
+                                   &cparam->tx_cq, &c->sq[tc].cq);
                if (err)
                        goto err_close_tx_cqs;
        }
@@ -1410,13 +1559,17 @@ static void mlx5e_close_tx_cqs(struct mlx5e_channel *c)
 }
 
 static int mlx5e_open_sqs(struct mlx5e_channel *c,
+                         struct mlx5e_params *params,
                          struct mlx5e_channel_param *cparam)
 {
        int err;
        int tc;
 
-       for (tc = 0; tc < c->num_tc; tc++) {
-               err = mlx5e_open_sq(c, tc, &cparam->sq, &c->sq[tc]);
+       for (tc = 0; tc < params->num_tc; tc++) {
+               int txq_ix = c->ix + tc * params->num_channels;
+
+               err = mlx5e_open_txqsq(c, c->priv->tisn[tc], txq_ix,
+                                      params, &cparam->sq, &c->sq[tc]);
                if (err)
                        goto err_close_sqs;
        }
@@ -1425,7 +1578,7 @@ static int mlx5e_open_sqs(struct mlx5e_channel *c,
 
 err_close_sqs:
        for (tc--; tc >= 0; tc--)
-               mlx5e_close_sq(&c->sq[tc]);
+               mlx5e_close_txqsq(&c->sq[tc]);
 
        return err;
 }
@@ -1435,23 +1588,15 @@ static void mlx5e_close_sqs(struct mlx5e_channel *c)
        int tc;
 
        for (tc = 0; tc < c->num_tc; tc++)
-               mlx5e_close_sq(&c->sq[tc]);
-}
-
-static void mlx5e_build_channeltc_to_txq_map(struct mlx5e_priv *priv, int ix)
-{
-       int i;
-
-       for (i = 0; i < priv->profile->max_tc; i++)
-               priv->channeltc_to_txq_map[ix][i] =
-                       ix + i * priv->params.num_channels;
+               mlx5e_close_txqsq(&c->sq[tc]);
 }
 
 static int mlx5e_set_sq_maxrate(struct net_device *dev,
-                               struct mlx5e_sq *sq, u32 rate)
+                               struct mlx5e_txqsq *sq, u32 rate)
 {
        struct mlx5e_priv *priv = netdev_priv(dev);
        struct mlx5_core_dev *mdev = priv->mdev;
+       struct mlx5e_modify_sq_param msp = {0};
        u16 rl_index = 0;
        int err;
 
@@ -1474,8 +1619,11 @@ static int mlx5e_set_sq_maxrate(struct net_device *dev,
                }
        }
 
-       err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY,
-                             MLX5_SQC_STATE_RDY, true, rl_index);
+       msp.curr_state = MLX5_SQC_STATE_RDY;
+       msp.next_state = MLX5_SQC_STATE_RDY;
+       msp.rl_index   = rl_index;
+       msp.rl_update  = true;
+       err = mlx5e_modify_sq(mdev, sq->sqn, &msp);
        if (err) {
                netdev_err(dev, "Failed configuring rate %u: %d\n",
                           rate, err);
@@ -1493,7 +1641,7 @@ static int mlx5e_set_tx_maxrate(struct net_device *dev, int index, u32 rate)
 {
        struct mlx5e_priv *priv = netdev_priv(dev);
        struct mlx5_core_dev *mdev = priv->mdev;
-       struct mlx5e_sq *sq = priv->txq_to_sq_map[index];
+       struct mlx5e_txqsq *sq = priv->txq2sq[index];
        int err = 0;
 
        if (!mlx5_rl_is_supported(mdev)) {
@@ -1529,105 +1677,86 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev)
 }
 
 static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
+                             struct mlx5e_params *params,
                              struct mlx5e_channel_param *cparam,
                              struct mlx5e_channel **cp)
 {
-       struct mlx5e_cq_moder icosq_cq_moder = {0, 0};
+       struct mlx5e_cq_moder icocq_moder = {0, 0};
        struct net_device *netdev = priv->netdev;
-       struct mlx5e_cq_moder rx_cq_profile;
        int cpu = mlx5e_get_cpu(priv, ix);
        struct mlx5e_channel *c;
-       struct mlx5e_sq *sq;
        int err;
-       int i;
 
        c = kzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu));
        if (!c)
                return -ENOMEM;
 
        c->priv     = priv;
+       c->mdev     = priv->mdev;
+       c->tstamp   = &priv->tstamp;
        c->ix       = ix;
        c->cpu      = cpu;
        c->pdev     = &priv->mdev->pdev->dev;
        c->netdev   = priv->netdev;
        c->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key);
-       c->num_tc   = priv->params.num_tc;
-       c->xdp      = !!priv->xdp_prog;
-
-       if (priv->params.rx_am_enabled)
-               rx_cq_profile = mlx5e_am_get_def_profile(priv->params.rx_cq_period_mode);
-       else
-               rx_cq_profile = priv->params.rx_cq_moderation;
-
-       mlx5e_build_channeltc_to_txq_map(priv, ix);
+       c->num_tc   = params->num_tc;
+       c->xdp      = !!params->xdp_prog;
 
        netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64);
 
-       err = mlx5e_open_cq(c, &cparam->icosq_cq, &c->icosq.cq, icosq_cq_moder);
+       err = mlx5e_open_cq(c, icocq_moder, &cparam->icosq_cq, &c->icosq.cq);
        if (err)
                goto err_napi_del;
 
-       err = mlx5e_open_tx_cqs(c, cparam);
+       err = mlx5e_open_tx_cqs(c, params, cparam);
        if (err)
                goto err_close_icosq_cq;
 
-       err = mlx5e_open_cq(c, &cparam->rx_cq, &c->rq.cq,
-                           rx_cq_profile);
+       err = mlx5e_open_cq(c, params->rx_cq_moderation, &cparam->rx_cq, &c->rq.cq);
        if (err)
                goto err_close_tx_cqs;
 
        /* XDP SQ CQ params are same as normal TXQ sq CQ params */
-       err = c->xdp ? mlx5e_open_cq(c, &cparam->tx_cq, &c->xdp_sq.cq,
-                                    priv->params.tx_cq_moderation) : 0;
+       err = c->xdp ? mlx5e_open_cq(c, params->tx_cq_moderation,
+                                    &cparam->tx_cq, &c->rq.xdpsq.cq) : 0;
        if (err)
                goto err_close_rx_cq;
 
        napi_enable(&c->napi);
 
-       err = mlx5e_open_sq(c, 0, &cparam->icosq, &c->icosq);
+       err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->icosq);
        if (err)
                goto err_disable_napi;
 
-       err = mlx5e_open_sqs(c, cparam);
+       err = mlx5e_open_sqs(c, params, cparam);
        if (err)
                goto err_close_icosq;
 
-       for (i = 0; i < priv->params.num_tc; i++) {
-               u32 txq_ix = priv->channeltc_to_txq_map[ix][i];
-
-               if (priv->tx_rates[txq_ix]) {
-                       sq = priv->txq_to_sq_map[txq_ix];
-                       mlx5e_set_sq_maxrate(priv->netdev, sq,
-                                            priv->tx_rates[txq_ix]);
-               }
-       }
-
-       err = c->xdp ? mlx5e_open_sq(c, 0, &cparam->xdp_sq, &c->xdp_sq) : 0;
+       err = c->xdp ? mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, &c->rq.xdpsq) : 0;
        if (err)
                goto err_close_sqs;
 
-       err = mlx5e_open_rq(c, &cparam->rq, &c->rq);
+       err = mlx5e_open_rq(c, params, &cparam->rq, &c->rq);
        if (err)
                goto err_close_xdp_sq;
 
-       netif_set_xps_queue(netdev, get_cpu_mask(c->cpu), ix);
        *cp = c;
 
        return 0;
 err_close_xdp_sq:
        if (c->xdp)
-               mlx5e_close_sq(&c->xdp_sq);
+               mlx5e_close_xdpsq(&c->rq.xdpsq);
 
 err_close_sqs:
        mlx5e_close_sqs(c);
 
 err_close_icosq:
-       mlx5e_close_sq(&c->icosq);
+       mlx5e_close_icosq(&c->icosq);
 
 err_disable_napi:
        napi_disable(&c->napi);
        if (c->xdp)
-               mlx5e_close_cq(&c->xdp_sq.cq);
+               mlx5e_close_cq(&c->rq.xdpsq.cq);
 
 err_close_rx_cq:
        mlx5e_close_cq(&c->rq.cq);
@@ -1645,16 +1774,35 @@ err_napi_del:
        return err;
 }
 
+static void mlx5e_activate_channel(struct mlx5e_channel *c)
+{
+       int tc;
+
+       for (tc = 0; tc < c->num_tc; tc++)
+               mlx5e_activate_txqsq(&c->sq[tc]);
+       mlx5e_activate_rq(&c->rq);
+       netif_set_xps_queue(c->netdev, get_cpu_mask(c->cpu), c->ix);
+}
+
+static void mlx5e_deactivate_channel(struct mlx5e_channel *c)
+{
+       int tc;
+
+       mlx5e_deactivate_rq(&c->rq);
+       for (tc = 0; tc < c->num_tc; tc++)
+               mlx5e_deactivate_txqsq(&c->sq[tc]);
+}
+
 static void mlx5e_close_channel(struct mlx5e_channel *c)
 {
        mlx5e_close_rq(&c->rq);
        if (c->xdp)
-               mlx5e_close_sq(&c->xdp_sq);
+               mlx5e_close_xdpsq(&c->rq.xdpsq);
        mlx5e_close_sqs(c);
-       mlx5e_close_sq(&c->icosq);
+       mlx5e_close_icosq(&c->icosq);
        napi_disable(&c->napi);
        if (c->xdp)
-               mlx5e_close_cq(&c->xdp_sq.cq);
+               mlx5e_close_cq(&c->rq.xdpsq.cq);
        mlx5e_close_cq(&c->rq.cq);
        mlx5e_close_tx_cqs(c);
        mlx5e_close_cq(&c->icosq.cq);
@@ -1664,17 +1812,16 @@ static void mlx5e_close_channel(struct mlx5e_channel *c)
 }
 
 static void mlx5e_build_rq_param(struct mlx5e_priv *priv,
+                                struct mlx5e_params *params,
                                 struct mlx5e_rq_param *param)
 {
        void *rqc = param->rqc;
        void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
 
-       switch (priv->params.rq_wq_type) {
+       switch (params->rq_wq_type) {
        case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
-               MLX5_SET(wq, wq, log_wqe_num_of_strides,
-                        priv->params.mpwqe_log_num_strides - 9);
-               MLX5_SET(wq, wq, log_wqe_stride_size,
-                        priv->params.mpwqe_log_stride_sz - 6);
+               MLX5_SET(wq, wq, log_wqe_num_of_strides, params->mpwqe_log_num_strides - 9);
+               MLX5_SET(wq, wq, log_wqe_stride_size, params->mpwqe_log_stride_sz - 6);
                MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ);
                break;
        default: /* MLX5_WQ_TYPE_LINKED_LIST */
@@ -1683,14 +1830,13 @@ static void mlx5e_build_rq_param(struct mlx5e_priv *priv,
 
        MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
        MLX5_SET(wq, wq, log_wq_stride,    ilog2(sizeof(struct mlx5e_rx_wqe)));
-       MLX5_SET(wq, wq, log_wq_sz,        priv->params.log_rq_size);
+       MLX5_SET(wq, wq, log_wq_sz,        params->log_rq_size);
        MLX5_SET(wq, wq, pd,               priv->mdev->mlx5e_res.pdn);
        MLX5_SET(rqc, rqc, counter_set_id, priv->q_counter);
+       MLX5_SET(rqc, rqc, vsd,            params->vlan_strip_disable);
 
        param->wq.buf_numa_node = dev_to_node(&priv->mdev->pdev->dev);
        param->wq.linear = 1;
-
-       param->am_enabled = priv->params.rx_am_enabled;
 }
 
 static void mlx5e_build_drop_rq_param(struct mlx5e_rq_param *param)
@@ -1715,17 +1861,14 @@ static void mlx5e_build_sq_param_common(struct mlx5e_priv *priv,
 }
 
 static void mlx5e_build_sq_param(struct mlx5e_priv *priv,
+                                struct mlx5e_params *params,
                                 struct mlx5e_sq_param *param)
 {
        void *sqc = param->sqc;
        void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
 
        mlx5e_build_sq_param_common(priv, param);
-       MLX5_SET(wq, wq, log_wq_sz,     priv->params.log_sq_size);
-
-       param->max_inline = priv->params.tx_max_inline;
-       param->min_inline_mode = priv->params.tx_min_inline_mode;
-       param->type = MLX5E_SQ_TXQ;
+       MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
 }
 
 static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
@@ -1737,37 +1880,40 @@ static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
 }
 
 static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
+                                   struct mlx5e_params *params,
                                    struct mlx5e_cq_param *param)
 {
        void *cqc = param->cqc;
        u8 log_cq_size;
 
-       switch (priv->params.rq_wq_type) {
+       switch (params->rq_wq_type) {
        case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
-               log_cq_size = priv->params.log_rq_size +
-                       priv->params.mpwqe_log_num_strides;
+               log_cq_size = params->log_rq_size + params->mpwqe_log_num_strides;
                break;
        default: /* MLX5_WQ_TYPE_LINKED_LIST */
-               log_cq_size = priv->params.log_rq_size;
+               log_cq_size = params->log_rq_size;
        }
 
        MLX5_SET(cqc, cqc, log_cq_size, log_cq_size);
-       if (MLX5E_GET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS)) {
+       if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) {
                MLX5_SET(cqc, cqc, mini_cqe_res_format, MLX5_CQE_FORMAT_CSUM);
                MLX5_SET(cqc, cqc, cqe_comp_en, 1);
        }
 
        mlx5e_build_common_cq_param(priv, param);
 
-       param->cq_period_mode = priv->params.rx_cq_period_mode;
+       if (params->rx_am_enabled)
+               params->rx_cq_moderation =
+                       mlx5e_am_get_def_profile(params->rx_cq_period_mode);
 }
 
 static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
+                                   struct mlx5e_params *params,
                                    struct mlx5e_cq_param *param)
 {
        void *cqc = param->cqc;
 
-       MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_sq_size);
+       MLX5_SET(cqc, cqc, log_cq_size, params->log_sq_size);
 
        mlx5e_build_common_cq_param(priv, param);
 
@@ -1775,8 +1921,8 @@ static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
 }
 
 static void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv,
-                                    struct mlx5e_cq_param *param,
-                                    u8 log_wq_size)
+                                    u8 log_wq_size,
+                                    struct mlx5e_cq_param *param)
 {
        void *cqc = param->cqc;
 
@@ -1788,8 +1934,8 @@ static void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv,
 }
 
 static void mlx5e_build_icosq_param(struct mlx5e_priv *priv,
-                                   struct mlx5e_sq_param *param,
-                                   u8 log_wq_size)
+                                   u8 log_wq_size,
+                                   struct mlx5e_sq_param *param)
 {
        void *sqc = param->sqc;
        void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
@@ -1798,162 +1944,119 @@ static void mlx5e_build_icosq_param(struct mlx5e_priv *priv,
 
        MLX5_SET(wq, wq, log_wq_sz, log_wq_size);
        MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(priv->mdev, reg_umr_sq));
-
-       param->type = MLX5E_SQ_ICO;
 }
 
 static void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv,
+                                   struct mlx5e_params *params,
                                    struct mlx5e_sq_param *param)
 {
        void *sqc = param->sqc;
        void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
 
        mlx5e_build_sq_param_common(priv, param);
-       MLX5_SET(wq, wq, log_wq_sz,     priv->params.log_sq_size);
-
-       param->max_inline = priv->params.tx_max_inline;
-       param->min_inline_mode = priv->params.tx_min_inline_mode;
-       param->type = MLX5E_SQ_XDP;
+       MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
 }
 
-static void mlx5e_build_channel_param(struct mlx5e_priv *priv, struct mlx5e_channel_param *cparam)
+static void mlx5e_build_channel_param(struct mlx5e_priv *priv,
+                                     struct mlx5e_params *params,
+                                     struct mlx5e_channel_param *cparam)
 {
        u8 icosq_log_wq_sz = MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
 
-       mlx5e_build_rq_param(priv, &cparam->rq);
-       mlx5e_build_sq_param(priv, &cparam->sq);
-       mlx5e_build_xdpsq_param(priv, &cparam->xdp_sq);
-       mlx5e_build_icosq_param(priv, &cparam->icosq, icosq_log_wq_sz);
-       mlx5e_build_rx_cq_param(priv, &cparam->rx_cq);
-       mlx5e_build_tx_cq_param(priv, &cparam->tx_cq);
-       mlx5e_build_ico_cq_param(priv, &cparam->icosq_cq, icosq_log_wq_sz);
+       mlx5e_build_rq_param(priv, params, &cparam->rq);
+       mlx5e_build_sq_param(priv, params, &cparam->sq);
+       mlx5e_build_xdpsq_param(priv, params, &cparam->xdp_sq);
+       mlx5e_build_icosq_param(priv, icosq_log_wq_sz, &cparam->icosq);
+       mlx5e_build_rx_cq_param(priv, params, &cparam->rx_cq);
+       mlx5e_build_tx_cq_param(priv, params, &cparam->tx_cq);
+       mlx5e_build_ico_cq_param(priv, icosq_log_wq_sz, &cparam->icosq_cq);
 }
 
-static int mlx5e_open_channels(struct mlx5e_priv *priv)
+int mlx5e_open_channels(struct mlx5e_priv *priv,
+                       struct mlx5e_channels *chs)
 {
        struct mlx5e_channel_param *cparam;
-       int nch = priv->params.num_channels;
        int err = -ENOMEM;
        int i;
-       int j;
 
-       priv->channel = kcalloc(nch, sizeof(struct mlx5e_channel *),
-                               GFP_KERNEL);
-
-       priv->txq_to_sq_map = kcalloc(nch * priv->params.num_tc,
-                                     sizeof(struct mlx5e_sq *), GFP_KERNEL);
+       chs->num = chs->params.num_channels;
 
+       chs->c = kcalloc(chs->num, sizeof(struct mlx5e_channel *), GFP_KERNEL);
        cparam = kzalloc(sizeof(struct mlx5e_channel_param), GFP_KERNEL);
+       if (!chs->c || !cparam)
+               goto err_free;
 
-       if (!priv->channel || !priv->txq_to_sq_map || !cparam)
-               goto err_free_txq_to_sq_map;
-
-       mlx5e_build_channel_param(priv, cparam);
-
-       for (i = 0; i < nch; i++) {
-               err = mlx5e_open_channel(priv, i, cparam, &priv->channel[i]);
-               if (err)
-                       goto err_close_channels;
-       }
-
-       for (j = 0; j < nch; j++) {
-               err = mlx5e_wait_for_min_rx_wqes(&priv->channel[j]->rq);
+       mlx5e_build_channel_param(priv, &chs->params, cparam);
+       for (i = 0; i < chs->num; i++) {
+               err = mlx5e_open_channel(priv, i, &chs->params, cparam, &chs->c[i]);
                if (err)
                        goto err_close_channels;
        }
 
-       /* FIXME: This is a W/A for tx timeout watch dog false alarm when
-        * polling for inactive tx queues.
-        */
-       netif_tx_start_all_queues(priv->netdev);
-
        kfree(cparam);
        return 0;
 
 err_close_channels:
        for (i--; i >= 0; i--)
-               mlx5e_close_channel(priv->channel[i]);
+               mlx5e_close_channel(chs->c[i]);
 
-err_free_txq_to_sq_map:
-       kfree(priv->txq_to_sq_map);
-       kfree(priv->channel);
+err_free:
+       kfree(chs->c);
        kfree(cparam);
-
+       chs->num = 0;
        return err;
 }
 
-static void mlx5e_close_channels(struct mlx5e_priv *priv)
+static void mlx5e_activate_channels(struct mlx5e_channels *chs)
 {
        int i;
 
-       /* FIXME: This is a W/A only for tx timeout watch dog false alarm when
-        * polling for inactive tx queues.
-        */
-       netif_tx_stop_all_queues(priv->netdev);
-       netif_tx_disable(priv->netdev);
-
-       for (i = 0; i < priv->params.num_channels; i++)
-               mlx5e_close_channel(priv->channel[i]);
-
-       kfree(priv->txq_to_sq_map);
-       kfree(priv->channel);
-}
-
-static int mlx5e_rx_hash_fn(int hfunc)
-{
-       return (hfunc == ETH_RSS_HASH_TOP) ?
-              MLX5_RX_HASH_FN_TOEPLITZ :
-              MLX5_RX_HASH_FN_INVERTED_XOR8;
+       for (i = 0; i < chs->num; i++)
+               mlx5e_activate_channel(chs->c[i]);
 }
 
-static int mlx5e_bits_invert(unsigned long a, int size)
+static int mlx5e_wait_channels_min_rx_wqes(struct mlx5e_channels *chs)
 {
-       int inv = 0;
+       int err = 0;
        int i;
 
-       for (i = 0; i < size; i++)
-               inv |= (test_bit(size - i - 1, &a) ? 1 : 0) << i;
+       for (i = 0; i < chs->num; i++) {
+               err = mlx5e_wait_for_min_rx_wqes(&chs->c[i]->rq);
+               if (err)
+                       break;
+       }
 
-       return inv;
+       return err;
 }
 
-static void mlx5e_fill_indir_rqt_rqns(struct mlx5e_priv *priv, void *rqtc)
+static void mlx5e_deactivate_channels(struct mlx5e_channels *chs)
 {
        int i;
 
-       for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++) {
-               int ix = i;
-               u32 rqn;
-
-               if (priv->params.rss_hfunc == ETH_RSS_HASH_XOR)
-                       ix = mlx5e_bits_invert(i, MLX5E_LOG_INDIR_RQT_SIZE);
-
-               ix = priv->params.indirection_rqt[ix];
-               rqn = test_bit(MLX5E_STATE_OPENED, &priv->state) ?
-                               priv->channel[ix]->rq.rqn :
-                               priv->drop_rq.rqn;
-               MLX5_SET(rqtc, rqtc, rq_num[i], rqn);
-       }
+       for (i = 0; i < chs->num; i++)
+               mlx5e_deactivate_channel(chs->c[i]);
 }
 
-static void mlx5e_fill_direct_rqt_rqn(struct mlx5e_priv *priv, void *rqtc,
-                                     int ix)
+void mlx5e_close_channels(struct mlx5e_channels *chs)
 {
-       u32 rqn = test_bit(MLX5E_STATE_OPENED, &priv->state) ?
-                       priv->channel[ix]->rq.rqn :
-                       priv->drop_rq.rqn;
+       int i;
 
-       MLX5_SET(rqtc, rqtc, rq_num[0], rqn);
+       for (i = 0; i < chs->num; i++)
+               mlx5e_close_channel(chs->c[i]);
+
+       kfree(chs->c);
+       chs->num = 0;
 }
 
-static int mlx5e_create_rqt(struct mlx5e_priv *priv, int sz,
-                           int ix, struct mlx5e_rqt *rqt)
+static int
+mlx5e_create_rqt(struct mlx5e_priv *priv, int sz, struct mlx5e_rqt *rqt)
 {
        struct mlx5_core_dev *mdev = priv->mdev;
        void *rqtc;
        int inlen;
        int err;
        u32 *in;
+       int i;
 
        inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
        in = mlx5_vzalloc(inlen);
@@ -1965,10 +2068,8 @@ static int mlx5e_create_rqt(struct mlx5e_priv *priv, int sz,
        MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
        MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
 
-       if (sz > 1) /* RSS */
-               mlx5e_fill_indir_rqt_rqns(priv, rqtc);
-       else
-               mlx5e_fill_direct_rqt_rqn(priv, rqtc, ix);
+       for (i = 0; i < sz; i++)
+               MLX5_SET(rqtc, rqtc, rq_num[i], priv->drop_rq.rqn);
 
        err = mlx5_core_create_rqt(mdev, in, inlen, &rqt->rqtn);
        if (!err)
@@ -1988,7 +2089,7 @@ static int mlx5e_create_indirect_rqts(struct mlx5e_priv *priv)
 {
        struct mlx5e_rqt *rqt = &priv->indir_rqt;
 
-       return mlx5e_create_rqt(priv, MLX5E_INDIR_RQT_SIZE, 0, rqt);
+       return mlx5e_create_rqt(priv, MLX5E_INDIR_RQT_SIZE, rqt);
 }
 
 int mlx5e_create_direct_rqts(struct mlx5e_priv *priv)
@@ -1999,7 +2100,7 @@ int mlx5e_create_direct_rqts(struct mlx5e_priv *priv)
 
        for (ix = 0; ix < priv->profile->max_nch(priv->mdev); ix++) {
                rqt = &priv->direct_tir[ix].rqt;
-               err = mlx5e_create_rqt(priv, 1 /*size */, ix, rqt);
+               err = mlx5e_create_rqt(priv, 1 /*size */, rqt);
                if (err)
                        goto err_destroy_rqts;
        }
@@ -2013,7 +2114,49 @@ err_destroy_rqts:
        return err;
 }
 
-int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz, int ix)
+static int mlx5e_rx_hash_fn(int hfunc)
+{
+       return (hfunc == ETH_RSS_HASH_TOP) ?
+              MLX5_RX_HASH_FN_TOEPLITZ :
+              MLX5_RX_HASH_FN_INVERTED_XOR8;
+}
+
+static int mlx5e_bits_invert(unsigned long a, int size)
+{
+       int inv = 0;
+       int i;
+
+       for (i = 0; i < size; i++)
+               inv |= (test_bit(size - i - 1, &a) ? 1 : 0) << i;
+
+       return inv;
+}
+
+static void mlx5e_fill_rqt_rqns(struct mlx5e_priv *priv, int sz,
+                               struct mlx5e_redirect_rqt_param rrp, void *rqtc)
+{
+       int i;
+
+       for (i = 0; i < sz; i++) {
+               u32 rqn;
+
+               if (rrp.is_rss) {
+                       int ix = i;
+
+                       if (rrp.rss.hfunc == ETH_RSS_HASH_XOR)
+                               ix = mlx5e_bits_invert(i, ilog2(sz));
+
+                       ix = priv->channels.params.indirection_rqt[ix];
+                       rqn = rrp.rss.channels->c[ix]->rq.rqn;
+               } else {
+                       rqn = rrp.rqn;
+               }
+               MLX5_SET(rqtc, rqtc, rq_num[i], rqn);
+       }
+}
+
+int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz,
+                      struct mlx5e_redirect_rqt_param rrp)
 {
        struct mlx5_core_dev *mdev = priv->mdev;
        void *rqtc;
@@ -2029,41 +2172,86 @@ int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz, int ix)
        rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx);
 
        MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
-       if (sz > 1) /* RSS */
-               mlx5e_fill_indir_rqt_rqns(priv, rqtc);
-       else
-               mlx5e_fill_direct_rqt_rqn(priv, rqtc, ix);
-
        MLX5_SET(modify_rqt_in, in, bitmask.rqn_list, 1);
-
+       mlx5e_fill_rqt_rqns(priv, sz, rrp, rqtc);
        err = mlx5_core_modify_rqt(mdev, rqtn, in, inlen);
 
        kvfree(in);
-
        return err;
 }
 
-static void mlx5e_redirect_rqts(struct mlx5e_priv *priv)
+static u32 mlx5e_get_direct_rqn(struct mlx5e_priv *priv, int ix,
+                               struct mlx5e_redirect_rqt_param rrp)
+{
+       if (!rrp.is_rss)
+               return rrp.rqn;
+
+       if (ix >= rrp.rss.channels->num)
+               return priv->drop_rq.rqn;
+
+       return rrp.rss.channels->c[ix]->rq.rqn;
+}
+
+static void mlx5e_redirect_rqts(struct mlx5e_priv *priv,
+                               struct mlx5e_redirect_rqt_param rrp)
 {
        u32 rqtn;
        int ix;
 
        if (priv->indir_rqt.enabled) {
+               /* RSS RQ table */
                rqtn = priv->indir_rqt.rqtn;
-               mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, 0);
+               mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, rrp);
        }
 
-       for (ix = 0; ix < priv->params.num_channels; ix++) {
+       for (ix = 0; ix < priv->profile->max_nch(priv->mdev); ix++) {
+               struct mlx5e_redirect_rqt_param direct_rrp = {
+                       .is_rss = false,
+                       {
+                               .rqn    = mlx5e_get_direct_rqn(priv, ix, rrp)
+                       },
+               };
+
+               /* Direct RQ Tables */
                if (!priv->direct_tir[ix].rqt.enabled)
                        continue;
+
                rqtn = priv->direct_tir[ix].rqt.rqtn;
-               mlx5e_redirect_rqt(priv, rqtn, 1, ix);
+               mlx5e_redirect_rqt(priv, rqtn, 1, direct_rrp);
        }
 }
 
-static void mlx5e_build_tir_ctx_lro(void *tirc, struct mlx5e_priv *priv)
+static void mlx5e_redirect_rqts_to_channels(struct mlx5e_priv *priv,
+                                           struct mlx5e_channels *chs)
+{
+       struct mlx5e_redirect_rqt_param rrp = {
+               .is_rss        = true,
+               {
+                       .rss = {
+                               .channels  = chs,
+                               .hfunc     = chs->params.rss_hfunc,
+                       }
+               },
+       };
+
+       mlx5e_redirect_rqts(priv, rrp);
+}
+
+static void mlx5e_redirect_rqts_to_drop(struct mlx5e_priv *priv)
 {
-       if (!priv->params.lro_en)
+       struct mlx5e_redirect_rqt_param drop_rrp = {
+               .is_rss = false,
+               {
+                       .rqn = priv->drop_rq.rqn,
+               },
+       };
+
+       mlx5e_redirect_rqts(priv, drop_rrp);
+}
+
+static void mlx5e_build_tir_ctx_lro(struct mlx5e_params *params, void *tirc)
+{
+       if (!params->lro_en)
                return;
 
 #define ROUGH_MAX_L2_L3_HDR_SZ 256
@@ -2072,13 +2260,13 @@ static void mlx5e_build_tir_ctx_lro(void *tirc, struct mlx5e_priv *priv)
                 MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO |
                 MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO);
        MLX5_SET(tirc, tirc, lro_max_ip_payload_size,
-                (priv->params.lro_wqe_sz -
-                 ROUGH_MAX_L2_L3_HDR_SZ) >> 8);
-       MLX5_SET(tirc, tirc, lro_timeout_period_usecs, priv->params.lro_timeout);
+                (params->lro_wqe_sz - ROUGH_MAX_L2_L3_HDR_SZ) >> 8);
+       MLX5_SET(tirc, tirc, lro_timeout_period_usecs, params->lro_timeout);
 }
 
-void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_priv *priv, void *tirc,
-                                   enum mlx5e_traffic_types tt)
+void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_params *params,
+                                   enum mlx5e_traffic_types tt,
+                                   void *tirc)
 {
        void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
 
@@ -2094,16 +2282,15 @@ void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_priv *priv, void *tirc,
                                 MLX5_HASH_FIELD_SEL_DST_IP   |\
                                 MLX5_HASH_FIELD_SEL_IPSEC_SPI)
 
-       MLX5_SET(tirc, tirc, rx_hash_fn,
-                mlx5e_rx_hash_fn(priv->params.rss_hfunc));
-       if (priv->params.rss_hfunc == ETH_RSS_HASH_TOP) {
+       MLX5_SET(tirc, tirc, rx_hash_fn, mlx5e_rx_hash_fn(params->rss_hfunc));
+       if (params->rss_hfunc == ETH_RSS_HASH_TOP) {
                void *rss_key = MLX5_ADDR_OF(tirc, tirc,
                                             rx_hash_toeplitz_key);
                size_t len = MLX5_FLD_SZ_BYTES(tirc,
                                               rx_hash_toeplitz_key);
 
                MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
-               memcpy(rss_key, priv->params.toeplitz_hash_key, len);
+               memcpy(rss_key, params->toeplitz_hash_key, len);
        }
 
        switch (tt) {
@@ -2208,7 +2395,7 @@ static int mlx5e_modify_tirs_lro(struct mlx5e_priv *priv)
        MLX5_SET(modify_tir_in, in, bitmask.lro, 1);
        tirc = MLX5_ADDR_OF(modify_tir_in, in, ctx);
 
-       mlx5e_build_tir_ctx_lro(tirc, priv);
+       mlx5e_build_tir_ctx_lro(&priv->channels.params, tirc);
 
        for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
                err = mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in,
@@ -2258,9 +2445,9 @@ static void mlx5e_query_mtu(struct mlx5e_priv *priv, u16 *mtu)
        *mtu = MLX5E_HW2SW_MTU(hw_mtu);
 }
 
-static int mlx5e_set_dev_port_mtu(struct net_device *netdev)
+static int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv)
 {
-       struct mlx5e_priv *priv = netdev_priv(netdev);
+       struct net_device *netdev = priv->netdev;
        u16 mtu;
        int err;
 
@@ -2280,8 +2467,8 @@ static int mlx5e_set_dev_port_mtu(struct net_device *netdev)
 static void mlx5e_netdev_set_tcs(struct net_device *netdev)
 {
        struct mlx5e_priv *priv = netdev_priv(netdev);
-       int nch = priv->params.num_channels;
-       int ntc = priv->params.num_tc;
+       int nch = priv->channels.params.num_channels;
+       int ntc = priv->channels.params.num_tc;
        int tc;
 
        netdev_reset_tc(netdev);
@@ -2298,53 +2485,112 @@ static void mlx5e_netdev_set_tcs(struct net_device *netdev)
                netdev_set_tc_queue(netdev, tc, nch, 0);
 }
 
+static void mlx5e_build_channels_tx_maps(struct mlx5e_priv *priv)
+{
+       struct mlx5e_channel *c;
+       struct mlx5e_txqsq *sq;
+       int i, tc;
+
+       for (i = 0; i < priv->channels.num; i++)
+               for (tc = 0; tc < priv->profile->max_tc; tc++)
+                       priv->channel_tc2txq[i][tc] = i + tc * priv->channels.num;
+
+       for (i = 0; i < priv->channels.num; i++) {
+               c = priv->channels.c[i];
+               for (tc = 0; tc < c->num_tc; tc++) {
+                       sq = &c->sq[tc];
+                       priv->txq2sq[sq->txq_ix] = sq;
+               }
+       }
+}
+
+static void mlx5e_activate_priv_channels(struct mlx5e_priv *priv)
+{
+       int num_txqs = priv->channels.num * priv->channels.params.num_tc;
+       struct net_device *netdev = priv->netdev;
+
+       mlx5e_netdev_set_tcs(netdev);
+       if (netdev->real_num_tx_queues != num_txqs)
+               netif_set_real_num_tx_queues(netdev, num_txqs);
+       if (netdev->real_num_rx_queues != priv->channels.num)
+               netif_set_real_num_rx_queues(netdev, priv->channels.num);
+
+       mlx5e_build_channels_tx_maps(priv);
+       mlx5e_activate_channels(&priv->channels);
+       netif_tx_start_all_queues(priv->netdev);
+
+       if (MLX5_CAP_GEN(priv->mdev, vport_group_manager))
+               mlx5e_add_sqs_fwd_rules(priv);
+
+       mlx5e_wait_channels_min_rx_wqes(&priv->channels);
+       mlx5e_redirect_rqts_to_channels(priv, &priv->channels);
+}
+
+static void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv)
+{
+       mlx5e_redirect_rqts_to_drop(priv);
+
+       if (MLX5_CAP_GEN(priv->mdev, vport_group_manager))
+               mlx5e_remove_sqs_fwd_rules(priv);
+
+       /* FIXME: This is a W/A only for tx timeout watch dog false alarm when
+        * polling for inactive tx queues.
+        */
+       netif_tx_stop_all_queues(priv->netdev);
+       netif_tx_disable(priv->netdev);
+       mlx5e_deactivate_channels(&priv->channels);
+}
+
+void mlx5e_switch_priv_channels(struct mlx5e_priv *priv,
+                               struct mlx5e_channels *new_chs,
+                               mlx5e_fp_hw_modify hw_modify)
+{
+       struct net_device *netdev = priv->netdev;
+       int new_num_txqs;
+
+       new_num_txqs = new_chs->num * new_chs->params.num_tc;
+
+       netif_carrier_off(netdev);
+
+       if (new_num_txqs < netdev->real_num_tx_queues)
+               netif_set_real_num_tx_queues(netdev, new_num_txqs);
+
+       mlx5e_deactivate_priv_channels(priv);
+       mlx5e_close_channels(&priv->channels);
+
+       priv->channels = *new_chs;
+
+       /* New channels are ready to roll, modify HW settings if needed */
+       if (hw_modify)
+               hw_modify(priv);
+
+       mlx5e_refresh_tirs(priv, false);
+       mlx5e_activate_priv_channels(priv);
+
+       mlx5e_update_carrier(priv);
+}
+
 int mlx5e_open_locked(struct net_device *netdev)
 {
        struct mlx5e_priv *priv = netdev_priv(netdev);
-       struct mlx5_core_dev *mdev = priv->mdev;
-       int num_txqs;
        int err;
 
        set_bit(MLX5E_STATE_OPENED, &priv->state);
 
-       mlx5e_netdev_set_tcs(netdev);
-
-       num_txqs = priv->params.num_channels * priv->params.num_tc;
-       netif_set_real_num_tx_queues(netdev, num_txqs);
-       netif_set_real_num_rx_queues(netdev, priv->params.num_channels);
-
-       err = mlx5e_open_channels(priv);
-       if (err) {
-               netdev_err(netdev, "%s: mlx5e_open_channels failed, %d\n",
-                          __func__, err);
+       err = mlx5e_open_channels(priv, &priv->channels);
+       if (err)
                goto err_clear_state_opened_flag;
-       }
-
-       err = mlx5e_refresh_tirs_self_loopback(priv->mdev, false);
-       if (err) {
-               netdev_err(netdev, "%s: mlx5e_refresh_tirs_self_loopback_enable failed, %d\n",
-                          __func__, err);
-               goto err_close_channels;
-       }
 
-       mlx5e_redirect_rqts(priv);
+       mlx5e_refresh_tirs(priv, false);
+       mlx5e_activate_priv_channels(priv);
        mlx5e_update_carrier(priv);
        mlx5e_timestamp_init(priv);
-#ifdef CONFIG_RFS_ACCEL
-       priv->netdev->rx_cpu_rmap = priv->mdev->rmap;
-#endif
+
        if (priv->profile->update_stats)
                queue_delayed_work(priv->wq, &priv->update_stats_work, 0);
 
-       if (MLX5_CAP_GEN(mdev, vport_group_manager)) {
-               err = mlx5e_add_sqs_fwd_rules(priv);
-               if (err)
-                       goto err_close_channels;
-       }
        return 0;
 
-err_close_channels:
-       mlx5e_close_channels(priv);
 err_clear_state_opened_flag:
        clear_bit(MLX5E_STATE_OPENED, &priv->state);
        return err;
@@ -2365,7 +2611,6 @@ int mlx5e_open(struct net_device *netdev)
 int mlx5e_close_locked(struct net_device *netdev)
 {
        struct mlx5e_priv *priv = netdev_priv(netdev);
-       struct mlx5_core_dev *mdev = priv->mdev;
 
        /* May already be CLOSED in case a previous configuration operation
         * (e.g RX/TX queue size change) that involves close&open failed.
@@ -2375,13 +2620,10 @@ int mlx5e_close_locked(struct net_device *netdev)
 
        clear_bit(MLX5E_STATE_OPENED, &priv->state);
 
-       if (MLX5_CAP_GEN(mdev, vport_group_manager))
-               mlx5e_remove_sqs_fwd_rules(priv);
-
        mlx5e_timestamp_cleanup(priv);
        netif_carrier_off(priv->netdev);
-       mlx5e_redirect_rqts(priv);
-       mlx5e_close_channels(priv);
+       mlx5e_deactivate_priv_channels(priv);
+       mlx5e_close_channels(&priv->channels);
 
        return 0;
 }
@@ -2401,11 +2643,10 @@ int mlx5e_close(struct net_device *netdev)
        return err;
 }
 
-static int mlx5e_create_drop_rq(struct mlx5e_priv *priv,
-                               struct mlx5e_rq *rq,
-                               struct mlx5e_rq_param *param)
+static int mlx5e_alloc_drop_rq(struct mlx5_core_dev *mdev,
+                              struct mlx5e_rq *rq,
+                              struct mlx5e_rq_param *param)
 {
-       struct mlx5_core_dev *mdev = priv->mdev;
        void *rqc = param->rqc;
        void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
        int err;
@@ -2417,16 +2658,15 @@ static int mlx5e_create_drop_rq(struct mlx5e_priv *priv,
        if (err)
                return err;
 
-       rq->priv = priv;
+       rq->mdev = mdev;
 
        return 0;
 }
 
-static int mlx5e_create_drop_cq(struct mlx5e_priv *priv,
-                               struct mlx5e_cq *cq,
-                               struct mlx5e_cq_param *param)
+static int mlx5e_alloc_drop_cq(struct mlx5_core_dev *mdev,
+                              struct mlx5e_cq *cq,
+                              struct mlx5e_cq_param *param)
 {
-       struct mlx5_core_dev *mdev = priv->mdev;
        struct mlx5_core_cq *mcq = &cq->mcq;
        int eqn_not_used;
        unsigned int irqn;
@@ -2449,59 +2689,57 @@ static int mlx5e_create_drop_cq(struct mlx5e_priv *priv,
        mcq->event      = mlx5e_cq_error_event;
        mcq->irqn       = irqn;
 
-       cq->priv = priv;
+       cq->mdev = mdev;
 
        return 0;
 }
 
-static int mlx5e_open_drop_rq(struct mlx5e_priv *priv)
+static int mlx5e_open_drop_rq(struct mlx5_core_dev *mdev,
+                             struct mlx5e_rq *drop_rq)
 {
-       struct mlx5e_cq_param cq_param;
-       struct mlx5e_rq_param rq_param;
-       struct mlx5e_rq *rq = &priv->drop_rq;
-       struct mlx5e_cq *cq = &priv->drop_rq.cq;
+       struct mlx5e_cq_param cq_param = {};
+       struct mlx5e_rq_param rq_param = {};
+       struct mlx5e_cq *cq = &drop_rq->cq;
        int err;
 
-       memset(&cq_param, 0, sizeof(cq_param));
-       memset(&rq_param, 0, sizeof(rq_param));
        mlx5e_build_drop_rq_param(&rq_param);
 
-       err = mlx5e_create_drop_cq(priv, cq, &cq_param);
+       err = mlx5e_alloc_drop_cq(mdev, cq, &cq_param);
        if (err)
                return err;
 
-       err = mlx5e_enable_cq(cq, &cq_param);
+       err = mlx5e_create_cq(cq, &cq_param);
        if (err)
-               goto err_destroy_cq;
+               goto err_free_cq;
 
-       err = mlx5e_create_drop_rq(priv, rq, &rq_param);
+       err = mlx5e_alloc_drop_rq(mdev, drop_rq, &rq_param);
        if (err)
-               goto err_disable_cq;
+               goto err_destroy_cq;
 
-       err = mlx5e_enable_rq(rq, &rq_param);
+       err = mlx5e_create_rq(drop_rq, &rq_param);
        if (err)
-               goto err_destroy_rq;
+               goto err_free_rq;
 
        return 0;
 
-err_destroy_rq:
-       mlx5e_destroy_rq(&priv->drop_rq);
-
-err_disable_cq:
-       mlx5e_disable_cq(&priv->drop_rq.cq);
+err_free_rq:
+       mlx5e_free_rq(drop_rq);
 
 err_destroy_cq:
-       mlx5e_destroy_cq(&priv->drop_rq.cq);
+       mlx5e_destroy_cq(cq);
+
+err_free_cq:
+       mlx5e_free_cq(cq);
 
        return err;
 }
 
-static void mlx5e_close_drop_rq(struct mlx5e_priv *priv)
+static void mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq)
 {
-       mlx5e_disable_rq(&priv->drop_rq);
-       mlx5e_destroy_rq(&priv->drop_rq);
-       mlx5e_disable_cq(&priv->drop_rq.cq);
-       mlx5e_destroy_cq(&priv->drop_rq.cq);
+       mlx5e_destroy_rq(drop_rq);
+       mlx5e_free_rq(drop_rq);
+       mlx5e_destroy_cq(&drop_rq->cq);
+       mlx5e_free_cq(&drop_rq->cq);
 }
 
 static int mlx5e_create_tis(struct mlx5e_priv *priv, int tc)
@@ -2552,24 +2790,24 @@ void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv)
                mlx5e_destroy_tis(priv, tc);
 }
 
-static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv, u32 *tirc,
-                                     enum mlx5e_traffic_types tt)
+static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv,
+                                     enum mlx5e_traffic_types tt,
+                                     u32 *tirc)
 {
        MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn);
 
-       mlx5e_build_tir_ctx_lro(tirc, priv);
+       mlx5e_build_tir_ctx_lro(&priv->channels.params, tirc);
 
        MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
        MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqt.rqtn);
-       mlx5e_build_indir_tir_ctx_hash(priv, tirc, tt);
+       mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc);
 }
 
-static void mlx5e_build_direct_tir_ctx(struct mlx5e_priv *priv, u32 *tirc,
-                                      u32 rqtn)
+static void mlx5e_build_direct_tir_ctx(struct mlx5e_priv *priv, u32 rqtn, u32 *tirc)
 {
        MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn);
 
-       mlx5e_build_tir_ctx_lro(tirc, priv);
+       mlx5e_build_tir_ctx_lro(&priv->channels.params, tirc);
 
        MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
        MLX5_SET(tirc, tirc, indirect_table, rqtn);
@@ -2594,7 +2832,7 @@ static int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv)
                memset(in, 0, inlen);
                tir = &priv->indir_tir[tt];
                tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
-               mlx5e_build_indir_tir_ctx(priv, tirc, tt);
+               mlx5e_build_indir_tir_ctx(priv, tt, tirc);
                err = mlx5e_create_tir(priv->mdev, tir, in, inlen);
                if (err)
                        goto err_destroy_tirs;
@@ -2632,8 +2870,7 @@ int mlx5e_create_direct_tirs(struct mlx5e_priv *priv)
                memset(in, 0, inlen);
                tir = &priv->direct_tir[ix];
                tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
-               mlx5e_build_direct_tir_ctx(priv, tirc,
-                                          priv->direct_tir[ix].rqt.rqtn);
+               mlx5e_build_direct_tir_ctx(priv, priv->direct_tir[ix].rqt.rqtn, tirc);
                err = mlx5e_create_tir(priv->mdev, tir, in, inlen);
                if (err)
                        goto err_destroy_ch_tirs;
@@ -2669,16 +2906,13 @@ void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv)
                mlx5e_destroy_tir(priv->mdev, &priv->direct_tir[i]);
 }
 
-int mlx5e_modify_rqs_vsd(struct mlx5e_priv *priv, bool vsd)
+int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd)
 {
        int err = 0;
        int i;
 
-       if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
-               return 0;
-
-       for (i = 0; i < priv->params.num_channels; i++) {
-               err = mlx5e_modify_rq_vsd(&priv->channel[i]->rq, vsd);
+       for (i = 0; i < chs->num; i++) {
+               err = mlx5e_modify_rq_vsd(&chs->c[i]->rq, vsd);
                if (err)
                        return err;
        }
@@ -2689,7 +2923,7 @@ int mlx5e_modify_rqs_vsd(struct mlx5e_priv *priv, bool vsd)
 static int mlx5e_setup_tc(struct net_device *netdev, u8 tc)
 {
        struct mlx5e_priv *priv = netdev_priv(netdev);
-       bool was_opened;
+       struct mlx5e_channels new_channels = {};
        int err = 0;
 
        if (tc && tc != MLX5E_MAX_NUM_TC)
@@ -2697,17 +2931,21 @@ static int mlx5e_setup_tc(struct net_device *netdev, u8 tc)
 
        mutex_lock(&priv->state_lock);
 
-       was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
-       if (was_opened)
-               mlx5e_close_locked(priv->netdev);
+       new_channels.params = priv->channels.params;
+       new_channels.params.num_tc = tc ? tc : 1;
 
-       priv->params.num_tc = tc ? tc : 1;
+       if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+               priv->channels.params = new_channels.params;
+               goto out;
+       }
 
-       if (was_opened)
-               err = mlx5e_open_locked(priv->netdev);
+       err = mlx5e_open_channels(priv, &new_channels);
+       if (err)
+               goto out;
 
+       mlx5e_switch_priv_channels(priv, &new_channels, NULL);
+out:
        mutex_unlock(&priv->state_lock);
-
        return err;
 }
 
@@ -2737,7 +2975,9 @@ mqprio:
        if (tc->type != TC_SETUP_MQPRIO)
                return -EINVAL;
 
-       return mlx5e_setup_tc(dev, tc->tc);
+       tc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+
+       return mlx5e_setup_tc(dev, tc->mqprio->num_tc);
 }
 
 static void
@@ -2822,26 +3062,31 @@ typedef int (*mlx5e_feature_handler)(struct net_device *netdev, bool enable);
 static int set_feature_lro(struct net_device *netdev, bool enable)
 {
        struct mlx5e_priv *priv = netdev_priv(netdev);
-       bool was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
-       int err;
+       struct mlx5e_channels new_channels = {};
+       int err = 0;
+       bool reset;
 
        mutex_lock(&priv->state_lock);
 
-       if (was_opened && (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST))
-               mlx5e_close_locked(priv->netdev);
+       reset = (priv->channels.params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST);
+       reset = reset && test_bit(MLX5E_STATE_OPENED, &priv->state);
 
-       priv->params.lro_en = enable;
-       err = mlx5e_modify_tirs_lro(priv);
-       if (err) {
-               netdev_err(netdev, "lro modify failed, %d\n", err);
-               priv->params.lro_en = !enable;
+       new_channels.params = priv->channels.params;
+       new_channels.params.lro_en = enable;
+
+       if (!reset) {
+               priv->channels.params = new_channels.params;
+               err = mlx5e_modify_tirs_lro(priv);
+               goto out;
        }
 
-       if (was_opened && (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST))
-               mlx5e_open_locked(priv->netdev);
+       err = mlx5e_open_channels(priv, &new_channels);
+       if (err)
+               goto out;
 
+       mlx5e_switch_priv_channels(priv, &new_channels, mlx5e_modify_tirs_lro);
+out:
        mutex_unlock(&priv->state_lock);
-
        return err;
 }
 
@@ -2881,15 +3126,19 @@ static int set_feature_rx_all(struct net_device *netdev, bool enable)
 static int set_feature_rx_vlan(struct net_device *netdev, bool enable)
 {
        struct mlx5e_priv *priv = netdev_priv(netdev);
-       int err;
+       int err = 0;
 
        mutex_lock(&priv->state_lock);
 
-       priv->params.vlan_strip_disable = !enable;
-       err = mlx5e_modify_rqs_vsd(priv, !enable);
+       priv->channels.params.vlan_strip_disable = !enable;
+       if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+               goto unlock;
+
+       err = mlx5e_modify_channels_vsd(&priv->channels, !enable);
        if (err)
-               priv->params.vlan_strip_disable = enable;
+               priv->channels.params.vlan_strip_disable = enable;
 
+unlock:
        mutex_unlock(&priv->state_lock);
 
        return err;
@@ -2960,28 +3209,38 @@ static int mlx5e_set_features(struct net_device *netdev,
 static int mlx5e_change_mtu(struct net_device *netdev, int new_mtu)
 {
        struct mlx5e_priv *priv = netdev_priv(netdev);
-       bool was_opened;
+       struct mlx5e_channels new_channels = {};
+       int curr_mtu;
        int err = 0;
        bool reset;
 
        mutex_lock(&priv->state_lock);
 
-       reset = !priv->params.lro_en &&
-               (priv->params.rq_wq_type !=
+       reset = !priv->channels.params.lro_en &&
+               (priv->channels.params.rq_wq_type !=
                 MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ);
 
-       was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
-       if (was_opened && reset)
-               mlx5e_close_locked(netdev);
+       reset = reset && test_bit(MLX5E_STATE_OPENED, &priv->state);
 
+       curr_mtu    = netdev->mtu;
        netdev->mtu = new_mtu;
-       mlx5e_set_dev_port_mtu(netdev);
 
-       if (was_opened && reset)
-               err = mlx5e_open_locked(netdev);
+       if (!reset) {
+               mlx5e_set_dev_port_mtu(priv);
+               goto out;
+       }
 
-       mutex_unlock(&priv->state_lock);
+       new_channels.params = priv->channels.params;
+       err = mlx5e_open_channels(priv, &new_channels);
+       if (err) {
+               netdev->mtu = curr_mtu;
+               goto out;
+       }
 
+       mlx5e_switch_priv_channels(priv, &new_channels, mlx5e_set_dev_port_mtu);
+
+out:
+       mutex_unlock(&priv->state_lock);
        return err;
 }
 
@@ -3100,8 +3359,8 @@ static int mlx5e_get_vf_stats(struct net_device *dev,
                                            vf_stats);
 }
 
-void mlx5e_add_vxlan_port(struct net_device *netdev,
-                         struct udp_tunnel_info *ti)
+static void mlx5e_add_vxlan_port(struct net_device *netdev,
+                                struct udp_tunnel_info *ti)
 {
        struct mlx5e_priv *priv = netdev_priv(netdev);
 
@@ -3114,8 +3373,8 @@ void mlx5e_add_vxlan_port(struct net_device *netdev,
        mlx5e_vxlan_queue_work(priv, ti->sa_family, be16_to_cpu(ti->port), 1);
 }
 
-void mlx5e_del_vxlan_port(struct net_device *netdev,
-                         struct udp_tunnel_info *ti)
+static void mlx5e_del_vxlan_port(struct net_device *netdev,
+                                struct udp_tunnel_info *ti)
 {
        struct mlx5e_priv *priv = netdev_priv(netdev);
 
@@ -3186,8 +3445,8 @@ static void mlx5e_tx_timeout(struct net_device *dev)
 
        netdev_err(dev, "TX timeout detected\n");
 
-       for (i = 0; i < priv->params.num_channels * priv->params.num_tc; i++) {
-               struct mlx5e_sq *sq = priv->txq_to_sq_map[i];
+       for (i = 0; i < priv->channels.num * priv->channels.params.num_tc; i++) {
+               struct mlx5e_txqsq *sq = priv->txq2sq[i];
 
                if (!netif_xmit_stopped(netdev_get_tx_queue(dev, i)))
                        continue;
@@ -3219,7 +3478,7 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
 
        was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
        /* no need for full reset when exchanging programs */
-       reset = (!priv->xdp_prog || !prog);
+       reset = (!priv->channels.params.xdp_prog || !prog);
 
        if (was_opened && reset)
                mlx5e_close_locked(netdev);
@@ -3227,7 +3486,7 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
                /* num_channels is invariant here, so we can take the
                 * batched reference right upfront.
                 */
-               prog = bpf_prog_add(prog, priv->params.num_channels);
+               prog = bpf_prog_add(prog, priv->channels.num);
                if (IS_ERR(prog)) {
                        err = PTR_ERR(prog);
                        goto unlock;
@@ -3237,12 +3496,12 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
        /* exchange programs, extra prog reference we got from caller
         * as long as we don't fail from this point onwards.
         */
-       old_prog = xchg(&priv->xdp_prog, prog);
+       old_prog = xchg(&priv->channels.params.xdp_prog, prog);
        if (old_prog)
                bpf_prog_put(old_prog);
 
        if (reset) /* change RQ type according to priv->xdp_prog */
-               mlx5e_set_rq_priv_params(priv);
+               mlx5e_set_rq_params(priv->mdev, &priv->channels.params);
 
        if (was_opened && reset)
                mlx5e_open_locked(netdev);
@@ -3253,8 +3512,8 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
        /* exchanging programs w/o reset, we update ref counts on behalf
         * of the channels RQs here.
         */
-       for (i = 0; i < priv->params.num_channels; i++) {
-               struct mlx5e_channel *c = priv->channel[i];
+       for (i = 0; i < priv->channels.num; i++) {
+               struct mlx5e_channel *c = priv->channels.c[i];
 
                clear_bit(MLX5E_RQ_STATE_ENABLED, &c->rq.state);
                napi_synchronize(&c->napi);
@@ -3280,7 +3539,7 @@ static bool mlx5e_xdp_attached(struct net_device *dev)
 {
        struct mlx5e_priv *priv = netdev_priv(dev);
 
-       return !!priv->xdp_prog;
+       return !!priv->channels.params.xdp_prog;
 }
 
 static int mlx5e_xdp(struct net_device *dev, struct netdev_xdp *xdp)
@@ -3303,10 +3562,12 @@ static int mlx5e_xdp(struct net_device *dev, struct netdev_xdp *xdp)
 static void mlx5e_netpoll(struct net_device *dev)
 {
        struct mlx5e_priv *priv = netdev_priv(dev);
+       struct mlx5e_channels *chs = &priv->channels;
+
        int i;
 
-       for (i = 0; i < priv->params.num_channels; i++)
-               napi_schedule(&priv->channel[i]->napi);
+       for (i = 0; i < chs->num; i++)
+               napi_schedule(&chs->c[i]->napi);
 }
 #endif
 
@@ -3475,6 +3736,9 @@ void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode)
        if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE)
                params->rx_cq_moderation.usec =
                        MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE;
+
+       MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_BASED_MODER,
+                       params->rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
 }
 
 u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout)
@@ -3489,75 +3753,79 @@ u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout)
        return MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]);
 }
 
-static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev,
-                                       struct net_device *netdev,
-                                       const struct mlx5e_profile *profile,
-                                       void *ppriv)
+static void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
+                                  struct mlx5e_params *params,
+                                  u16 max_channels)
 {
-       struct mlx5e_priv *priv = netdev_priv(netdev);
+       u8 cq_period_mode = 0;
        u32 link_speed = 0;
        u32 pci_bw = 0;
-       u8 cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
-                                        MLX5_CQ_PERIOD_MODE_START_FROM_CQE :
-                                        MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
 
-       priv->mdev                         = mdev;
-       priv->netdev                       = netdev;
-       priv->params.num_channels          = profile->max_nch(mdev);
-       priv->profile                      = profile;
-       priv->ppriv                        = ppriv;
+       params->num_channels = max_channels;
+       params->num_tc       = 1;
 
-       priv->params.lro_timeout =
-               mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT);
-
-       priv->params.log_sq_size = is_kdump_kernel() ?
+       /* SQ */
+       params->log_sq_size = is_kdump_kernel() ?
                MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE :
                MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
 
        /* set CQE compression */
-       priv->params.rx_cqe_compress_def = false;
+       params->rx_cqe_compress_def = false;
        if (MLX5_CAP_GEN(mdev, cqe_compression) &&
-           MLX5_CAP_GEN(mdev, vport_group_manager)) {
+            MLX5_CAP_GEN(mdev, vport_group_manager)) {
                mlx5e_get_max_linkspeed(mdev, &link_speed);
                mlx5e_get_pci_bw(mdev, &pci_bw);
                mlx5_core_dbg(mdev, "Max link speed = %d, PCI BW = %d\n",
-                             link_speed, pci_bw);
-               priv->params.rx_cqe_compress_def =
-                       cqe_compress_heuristic(link_speed, pci_bw);
+                              link_speed, pci_bw);
+               params->rx_cqe_compress_def = cqe_compress_heuristic(link_speed, pci_bw);
        }
-
-       MLX5E_SET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS,
-                       priv->params.rx_cqe_compress_def);
-
-       mlx5e_set_rq_priv_params(priv);
-       if (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
-               priv->params.lro_en = true;
-
-       priv->params.rx_am_enabled = MLX5_CAP_GEN(mdev, cq_moderation);
-       mlx5e_set_rx_cq_mode_params(&priv->params, cq_period_mode);
-
-       priv->params.tx_cq_moderation.usec =
-               MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC;
-       priv->params.tx_cq_moderation.pkts =
-               MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
-       priv->params.tx_max_inline         = mlx5e_get_max_inline_cap(mdev);
-       mlx5_query_min_inline(mdev, &priv->params.tx_min_inline_mode);
-       if (priv->params.tx_min_inline_mode == MLX5_INLINE_MODE_NONE &&
+       MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS, params->rx_cqe_compress_def);
+
+       /* RQ */
+       mlx5e_set_rq_params(mdev, params);
+
+       /* HW LRO */
+       if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
+               params->lro_en = true;
+       params->lro_timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT);
+
+       /* CQ moderation params */
+       cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
+                       MLX5_CQ_PERIOD_MODE_START_FROM_CQE :
+                       MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
+       params->rx_am_enabled = MLX5_CAP_GEN(mdev, cq_moderation);
+       mlx5e_set_rx_cq_mode_params(params, cq_period_mode);
+
+       params->tx_cq_moderation.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC;
+       params->tx_cq_moderation.pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
+
+       /* TX inline */
+       params->tx_max_inline = mlx5e_get_max_inline_cap(mdev);
+       mlx5_query_min_inline(mdev, &params->tx_min_inline_mode);
+       if (params->tx_min_inline_mode == MLX5_INLINE_MODE_NONE &&
            !MLX5_CAP_ETH(mdev, wqe_vlan_insert))
-               priv->params.tx_min_inline_mode = MLX5_INLINE_MODE_L2;
+               params->tx_min_inline_mode = MLX5_INLINE_MODE_L2;
 
-       priv->params.num_tc                = 1;
-       priv->params.rss_hfunc             = ETH_RSS_HASH_XOR;
+       /* RSS */
+       params->rss_hfunc = ETH_RSS_HASH_XOR;
+       netdev_rss_key_fill(params->toeplitz_hash_key, sizeof(params->toeplitz_hash_key));
+       mlx5e_build_default_indir_rqt(mdev, params->indirection_rqt,
+                                     MLX5E_INDIR_RQT_SIZE, max_channels);
+}
 
-       netdev_rss_key_fill(priv->params.toeplitz_hash_key,
-                           sizeof(priv->params.toeplitz_hash_key));
+static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev,
+                                       struct net_device *netdev,
+                                       const struct mlx5e_profile *profile,
+                                       void *ppriv)
+{
+       struct mlx5e_priv *priv = netdev_priv(netdev);
 
-       mlx5e_build_default_indir_rqt(mdev, priv->params.indirection_rqt,
-                                     MLX5E_INDIR_RQT_SIZE, profile->max_nch(mdev));
+       priv->mdev        = mdev;
+       priv->netdev      = netdev;
+       priv->profile     = profile;
+       priv->ppriv       = ppriv;
 
-       /* Initialize pflags */
-       MLX5E_SET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_BASED_MODER,
-                       priv->params.rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
+       mlx5e_build_nic_params(mdev, &priv->channels.params, profile->max_nch(mdev));
 
        mutex_init(&priv->state_lock);
 
@@ -3643,7 +3911,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
                netdev->hw_features |= NETIF_F_RXALL;
 
        netdev->features          = netdev->hw_features;
-       if (!priv->params.lro_en)
+       if (!priv->channels.params.lro_en)
                netdev->features  &= ~NETIF_F_LRO;
 
        if (fcs_enabled)
@@ -3708,8 +3976,8 @@ static void mlx5e_nic_cleanup(struct mlx5e_priv *priv)
 {
        mlx5e_vxlan_cleanup(priv);
 
-       if (priv->xdp_prog)
-               bpf_prog_put(priv->xdp_prog);
+       if (priv->channels.params.xdp_prog)
+               bpf_prog_put(priv->channels.params.xdp_prog);
 }
 
 static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
@@ -3872,6 +4140,10 @@ struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
                return NULL;
        }
 
+#ifdef CONFIG_RFS_ACCEL
+       netdev->rx_cpu_rmap = mdev->rmap;
+#endif
+
        profile->init(mdev, netdev, profile, ppriv);
 
        netif_carrier_off(netdev);
@@ -3906,7 +4178,7 @@ int mlx5e_attach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev)
        if (err)
                goto out;
 
-       err = mlx5e_open_drop_rq(priv);
+       err = mlx5e_open_drop_rq(mdev, &priv->drop_rq);
        if (err) {
                mlx5_core_err(mdev, "open drop rq failed, %d\n", err);
                goto err_cleanup_tx;
@@ -3925,7 +4197,7 @@ int mlx5e_attach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev)
        mlx5_query_port_max_mtu(priv->mdev, &max_mtu, 1);
        netdev->max_mtu = MLX5E_HW2SW_MTU(max_mtu);
 
-       mlx5e_set_dev_port_mtu(netdev);
+       mlx5e_set_dev_port_mtu(priv);
 
        if (profile->enable)
                profile->enable(priv);
@@ -3939,7 +4211,7 @@ int mlx5e_attach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev)
        return 0;
 
 err_close_drop_rq:
-       mlx5e_close_drop_rq(priv);
+       mlx5e_close_drop_rq(&priv->drop_rq);
 
 err_cleanup_tx:
        profile->cleanup_tx(priv);
@@ -4003,7 +4275,7 @@ void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev)
 
        mlx5e_destroy_q_counter(priv);
        profile->cleanup_rx(priv);
-       mlx5e_close_drop_rq(priv);
+       mlx5e_close_drop_rq(&priv->drop_rq);
        profile->cleanup_tx(priv);
        cancel_delayed_work_sync(&priv->update_stats_work);
 }
index 2c864574a9d5faeaa3b329f3bc0ab0d4e0cc7b55..53db5ec2c1225a7960e78965403aced80eecc94d 100644 (file)
@@ -102,14 +102,16 @@ static void mlx5e_rep_update_sw_counters(struct mlx5e_priv *priv)
        int i, j;
 
        memset(s, 0, sizeof(*s));
-       for (i = 0; i < priv->params.num_channels; i++) {
-               rq_stats = &priv->channel[i]->rq.stats;
+       for (i = 0; i < priv->channels.num; i++) {
+               struct mlx5e_channel *c = priv->channels.c[i];
+
+               rq_stats = &c->rq.stats;
 
                s->rx_packets   += rq_stats->packets;
                s->rx_bytes     += rq_stats->bytes;
 
-               for (j = 0; j < priv->params.num_tc; j++) {
-                       sq_stats = &priv->channel[i]->sq[j].stats;
+               for (j = 0; j < priv->channels.params.num_tc; j++) {
+                       sq_stats = &c->sq[j].stats;
 
                        s->tx_packets           += sq_stats->packets;
                        s->tx_bytes             += sq_stats->bytes;
@@ -187,22 +189,26 @@ int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv)
        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
        struct mlx5_eswitch_rep *rep = priv->ppriv;
        struct mlx5e_channel *c;
-       int n, tc, err, num_sqs = 0;
+       int n, tc, num_sqs = 0;
+       int err = -ENOMEM;
        u16 *sqs;
 
-       sqs = kcalloc(priv->params.num_channels * priv->params.num_tc, sizeof(u16), GFP_KERNEL);
+       sqs = kcalloc(priv->channels.num * priv->channels.params.num_tc, sizeof(u16), GFP_KERNEL);
        if (!sqs)
-               return -ENOMEM;
+               goto out;
 
-       for (n = 0; n < priv->params.num_channels; n++) {
-               c = priv->channel[n];
+       for (n = 0; n < priv->channels.num; n++) {
+               c = priv->channels.c[n];
                for (tc = 0; tc < c->num_tc; tc++)
                        sqs[num_sqs++] = c->sq[tc].sqn;
        }
 
        err = mlx5_eswitch_sqs2vport_start(esw, rep, sqs, num_sqs);
-
        kfree(sqs);
+
+out:
+       if (err)
+               netdev_warn(priv->netdev, "Failed to add SQs FWD rules %d\n", err);
        return err;
 }
 
@@ -393,48 +399,27 @@ static const struct net_device_ops mlx5e_netdev_ops_rep = {
        .ndo_get_phys_port_name  = mlx5e_rep_get_phys_port_name,
        .ndo_setup_tc            = mlx5e_rep_ndo_setup_tc,
        .ndo_get_stats64         = mlx5e_rep_get_stats,
-       .ndo_udp_tunnel_add      = mlx5e_add_vxlan_port,
-       .ndo_udp_tunnel_del      = mlx5e_del_vxlan_port,
        .ndo_has_offload_stats   = mlx5e_has_offload_stats,
        .ndo_get_offload_stats   = mlx5e_get_offload_stats,
 };
 
-static void mlx5e_build_rep_netdev_priv(struct mlx5_core_dev *mdev,
-                                       struct net_device *netdev,
-                                       const struct mlx5e_profile *profile,
-                                       void *ppriv)
+static void mlx5e_build_rep_params(struct mlx5_core_dev *mdev,
+                                  struct mlx5e_params *params)
 {
-       struct mlx5e_priv *priv = netdev_priv(netdev);
        u8 cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
                                         MLX5_CQ_PERIOD_MODE_START_FROM_CQE :
                                         MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
 
-       priv->params.log_sq_size           =
-               MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
-       priv->params.rq_wq_type = MLX5_WQ_TYPE_LINKED_LIST;
-       priv->params.log_rq_size = MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE;
-
-       priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type,
-                                           BIT(priv->params.log_rq_size));
-
-       priv->params.rx_am_enabled = MLX5_CAP_GEN(mdev, cq_moderation);
-       mlx5e_set_rx_cq_mode_params(&priv->params, cq_period_mode);
+       params->log_sq_size = MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
+       params->rq_wq_type  = MLX5_WQ_TYPE_LINKED_LIST;
+       params->log_rq_size = MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE;
 
-       priv->params.tx_max_inline         = mlx5e_get_max_inline_cap(mdev);
-       priv->params.num_tc                = 1;
+       params->rx_am_enabled = MLX5_CAP_GEN(mdev, cq_moderation);
+       mlx5e_set_rx_cq_mode_params(params, cq_period_mode);
 
-       priv->params.lro_wqe_sz            =
-               MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
-
-       priv->mdev                         = mdev;
-       priv->netdev                       = netdev;
-       priv->params.num_channels          = profile->max_nch(mdev);
-       priv->profile                      = profile;
-       priv->ppriv                        = ppriv;
-
-       mutex_init(&priv->state_lock);
-
-       INIT_DELAYED_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
+       params->tx_max_inline         = mlx5e_get_max_inline_cap(mdev);
+       params->num_tc                = 1;
+       params->lro_wqe_sz            = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
 }
 
 static void mlx5e_build_rep_netdev(struct net_device *netdev)
@@ -460,7 +445,19 @@ static void mlx5e_init_rep(struct mlx5_core_dev *mdev,
                           const struct mlx5e_profile *profile,
                           void *ppriv)
 {
-       mlx5e_build_rep_netdev_priv(mdev, netdev, profile, ppriv);
+       struct mlx5e_priv *priv = netdev_priv(netdev);
+
+       priv->mdev                         = mdev;
+       priv->netdev                       = netdev;
+       priv->profile                      = profile;
+       priv->ppriv                        = ppriv;
+
+       mutex_init(&priv->state_lock);
+
+       INIT_DELAYED_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
+
+       priv->channels.params.num_channels = profile->max_nch(mdev);
+       mlx5e_build_rep_params(mdev, &priv->channels.params);
        mlx5e_build_rep_netdev(netdev);
 }
 
@@ -505,7 +502,7 @@ err_del_flow_rule:
 err_destroy_direct_tirs:
        mlx5e_destroy_direct_tirs(priv);
 err_destroy_direct_rqts:
-       for (i = 0; i < priv->params.num_channels; i++)
+       for (i = 0; i < priv->channels.params.num_channels; i++)
                mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt);
        return err;
 }
@@ -518,7 +515,7 @@ static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv)
        mlx5e_tc_cleanup(priv);
        mlx5_del_flow_rules(rep->vport_rx_rule);
        mlx5e_destroy_direct_tirs(priv);
-       for (i = 0; i < priv->params.num_channels; i++)
+       for (i = 0; i < priv->channels.params.num_channels; i++)
                mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt);
 }
 
index 3d371688fbbbf3544170468840829e15095ea3a0..1a9532b31635f3dca7d8d6d57e590b1c18122315 100644 (file)
@@ -156,28 +156,6 @@ static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq,
        return mlx5e_decompress_cqes_cont(rq, cq, 1, budget_rem) - 1;
 }
 
-void mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool val)
-{
-       bool was_opened;
-
-       if (!MLX5_CAP_GEN(priv->mdev, cqe_compression))
-               return;
-
-       if (MLX5E_GET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS) == val)
-               return;
-
-       was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
-       if (was_opened)
-               mlx5e_close_locked(priv->netdev);
-
-       MLX5E_SET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS, val);
-       mlx5e_set_rq_type_params(priv, priv->params.rq_wq_type);
-
-       if (was_opened)
-               mlx5e_open_locked(priv->netdev);
-
-}
-
 #define RQ_PAGE_SIZE(rq) ((1 << rq->buff.page_order) << PAGE_SHIFT)
 
 static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq,
@@ -331,7 +309,7 @@ mlx5e_copy_skb_header_mpwqe(struct device *pdev,
 static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix)
 {
        struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix];
-       struct mlx5e_sq *sq = &rq->channel->icosq;
+       struct mlx5e_icosq *sq = &rq->channel->icosq;
        struct mlx5_wq_cyc *wq = &sq->wq;
        struct mlx5e_umr_wqe *wqe;
        u8 num_wqebbs = DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_BB);
@@ -341,7 +319,7 @@ static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix)
        while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) {
                sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP;
                sq->db.ico_wqe[pi].num_wqebbs = 1;
-               mlx5e_send_nop(sq, false);
+               mlx5e_post_nop(wq, sq->sqn, &sq->pc);
        }
 
        wqe = mlx5_wq_cyc_get_wqe(wq, pi);
@@ -353,7 +331,7 @@ static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix)
        sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_UMR;
        sq->db.ico_wqe[pi].num_wqebbs = num_wqebbs;
        sq->pc += num_wqebbs;
-       mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0);
+       mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, &wqe->ctrl);
 }
 
 static int mlx5e_alloc_rx_umr_mpwqe(struct mlx5e_rq *rq,
@@ -601,6 +579,10 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe,
        if (lro_num_seg > 1) {
                mlx5e_lro_update_hdr(skb, cqe, cqe_bcnt);
                skb_shinfo(skb)->gso_size = DIV_ROUND_UP(cqe_bcnt, lro_num_seg);
+               /* Subtract one since we already counted this as one
+                * "regular" packet in mlx5e_complete_rx_cqe()
+                */
+               rq->stats.packets += lro_num_seg - 1;
                rq->stats.lro_packets++;
                rq->stats.lro_bytes += cqe_bcnt;
        }
@@ -633,37 +615,36 @@ static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq,
        mlx5e_build_rx_skb(cqe, cqe_bcnt, rq, skb);
 }
 
-static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_sq *sq)
+static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_xdpsq *sq)
 {
        struct mlx5_wq_cyc *wq = &sq->wq;
        struct mlx5e_tx_wqe *wqe;
-       u16 pi = (sq->pc - MLX5E_XDP_TX_WQEBBS) & wq->sz_m1; /* last pi */
+       u16 pi = (sq->pc - 1) & wq->sz_m1; /* last pi */
 
        wqe  = mlx5_wq_cyc_get_wqe(wq, pi);
 
-       wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
-       mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0);
+       mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &wqe->ctrl);
 }
 
 static inline bool mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq,
                                        struct mlx5e_dma_info *di,
                                        const struct xdp_buff *xdp)
 {
-       struct mlx5e_sq          *sq   = &rq->channel->xdp_sq;
+       struct mlx5e_xdpsq       *sq   = &rq->xdpsq;
        struct mlx5_wq_cyc       *wq   = &sq->wq;
-       u16                      pi    = sq->pc & wq->sz_m1;
+       u16                       pi   = sq->pc & wq->sz_m1;
        struct mlx5e_tx_wqe      *wqe  = mlx5_wq_cyc_get_wqe(wq, pi);
-       struct mlx5e_sq_wqe_info *wi   = &sq->db.xdp.wqe_info[pi];
 
        struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
        struct mlx5_wqe_eth_seg  *eseg = &wqe->eth;
        struct mlx5_wqe_data_seg *dseg;
-       u8 ds_cnt = MLX5E_XDP_TX_DS_COUNT;
 
        ptrdiff_t data_offset = xdp->data - xdp->data_hard_start;
        dma_addr_t dma_addr  = di->addr + data_offset;
        unsigned int dma_len = xdp->data_end - xdp->data;
 
+       prefetchw(wqe);
+
        if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE ||
                     MLX5E_SW2HW_MTU(rq->netdev->mtu) < dma_len)) {
                rq->stats.xdp_drop++;
@@ -671,48 +652,42 @@ static inline bool mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq,
                return false;
        }
 
-       if (unlikely(!mlx5e_sq_has_room_for(sq, MLX5E_XDP_TX_WQEBBS))) {
-               if (sq->db.xdp.doorbell) {
+       if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1))) {
+               if (sq->db.doorbell) {
                        /* SQ is full, ring doorbell */
                        mlx5e_xmit_xdp_doorbell(sq);
-                       sq->db.xdp.doorbell = false;
+                       sq->db.doorbell = false;
                }
                rq->stats.xdp_tx_full++;
                mlx5e_page_release(rq, di, true);
                return false;
        }
 
-       dma_sync_single_for_device(sq->pdev, dma_addr, dma_len,
-                                  PCI_DMA_TODEVICE);
+       dma_sync_single_for_device(sq->pdev, dma_addr, dma_len, PCI_DMA_TODEVICE);
 
-       memset(wqe, 0, sizeof(*wqe));
+       cseg->fm_ce_se = 0;
 
        dseg = (struct mlx5_wqe_data_seg *)eseg + 1;
+
        /* copy the inline part if required */
        if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
                memcpy(eseg->inline_hdr.start, xdp->data, MLX5E_XDP_MIN_INLINE);
                eseg->inline_hdr.sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE);
                dma_len  -= MLX5E_XDP_MIN_INLINE;
                dma_addr += MLX5E_XDP_MIN_INLINE;
-
-               ds_cnt   += MLX5E_XDP_IHS_DS_COUNT;
                dseg++;
        }
 
        /* write the dma part */
        dseg->addr       = cpu_to_be64(dma_addr);
        dseg->byte_count = cpu_to_be32(dma_len);
-       dseg->lkey       = sq->mkey_be;
 
        cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND);
-       cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
 
-       sq->db.xdp.di[pi] = *di;
-       wi->opcode     = MLX5_OPCODE_SEND;
-       wi->num_wqebbs = MLX5E_XDP_TX_WQEBBS;
-       sq->pc += MLX5E_XDP_TX_WQEBBS;
+       sq->db.di[pi] = *di;
+       sq->pc++;
 
-       sq->db.xdp.doorbell = true;
+       sq->db.doorbell = true;
        rq->stats.xdp_tx++;
        return true;
 }
@@ -946,7 +921,7 @@ mpwrq_cqe_out:
 int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
 {
        struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq);
-       struct mlx5e_sq *xdp_sq = &rq->channel->xdp_sq;
+       struct mlx5e_xdpsq *xdpsq = &rq->xdpsq;
        int work_done = 0;
 
        if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state)))
@@ -973,9 +948,9 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
                rq->handle_rx_cqe(rq, cqe);
        }
 
-       if (xdp_sq->db.xdp.doorbell) {
-               mlx5e_xmit_xdp_doorbell(xdp_sq);
-               xdp_sq->db.xdp.doorbell = false;
+       if (xdpsq->db.doorbell) {
+               mlx5e_xmit_xdp_doorbell(xdpsq);
+               xdpsq->db.doorbell = false;
        }
 
        mlx5_cqwq_update_db_record(&cq->wq);
@@ -985,3 +960,74 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
 
        return work_done;
 }
+
+bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
+{
+       struct mlx5e_xdpsq *sq;
+       struct mlx5e_rq *rq;
+       u16 sqcc;
+       int i;
+
+       sq = container_of(cq, struct mlx5e_xdpsq, cq);
+
+       if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)))
+               return false;
+
+       rq = container_of(sq, struct mlx5e_rq, xdpsq);
+
+       /* sq->cc must be updated only after mlx5_cqwq_update_db_record(),
+        * otherwise a cq overrun may occur
+        */
+       sqcc = sq->cc;
+
+       for (i = 0; i < MLX5E_TX_CQ_POLL_BUDGET; i++) {
+               struct mlx5_cqe64 *cqe;
+               u16 wqe_counter;
+               bool last_wqe;
+
+               cqe = mlx5e_get_cqe(cq);
+               if (!cqe)
+                       break;
+
+               mlx5_cqwq_pop(&cq->wq);
+
+               wqe_counter = be16_to_cpu(cqe->wqe_counter);
+
+               do {
+                       struct mlx5e_dma_info *di;
+                       u16 ci;
+
+                       last_wqe = (sqcc == wqe_counter);
+
+                       ci = sqcc & sq->wq.sz_m1;
+                       di = &sq->db.di[ci];
+
+                       sqcc++;
+                       /* Recycle RX page */
+                       mlx5e_page_release(rq, di, true);
+               } while (!last_wqe);
+       }
+
+       mlx5_cqwq_update_db_record(&cq->wq);
+
+       /* ensure cq space is freed before enabling more cqes */
+       wmb();
+
+       sq->cc = sqcc;
+       return (i == MLX5E_TX_CQ_POLL_BUDGET);
+}
+
+void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq)
+{
+       struct mlx5e_rq *rq = container_of(sq, struct mlx5e_rq, xdpsq);
+       struct mlx5e_dma_info *di;
+       u16 ci;
+
+       while (sq->cc != sq->pc) {
+               ci = sq->cc & sq->wq.sz_m1;
+               di = &sq->db.di[ci];
+               sq->cc++;
+
+               mlx5e_page_release(rq, di, false);
+       }
+}
index cbfac06b7ffd1d5140226ccb87331db57d4880d8..02dd3a95ed8f013d0d4795d5054bd79ae4ca1201 100644 (file)
@@ -293,7 +293,7 @@ void mlx5e_rx_am_work(struct work_struct *work)
        struct mlx5e_rq *rq = container_of(am, struct mlx5e_rq, am);
        struct mlx5e_cq_moder cur_profile = profile[am->mode][am->profile_ix];
 
-       mlx5_core_modify_cq_moderation(rq->priv->mdev, &rq->cq.mcq,
+       mlx5_core_modify_cq_moderation(rq->mdev, &rq->cq.mcq,
                                       cur_profile.usec, cur_profile.pkts);
 
        am->state = MLX5E_AM_START_MEASURE;
index 5621dcfda4f1868c6bccdff9cd220b7a43d46dd3..5225f2226a67cc25761d265e3d5f69b510d0df3b 100644 (file)
@@ -236,12 +236,9 @@ static int mlx5e_test_loopback_setup(struct mlx5e_priv *priv,
 {
        int err = 0;
 
-       err = mlx5e_refresh_tirs_self_loopback(priv->mdev, true);
-       if (err) {
-               netdev_err(priv->netdev,
-                          "\tFailed to enable UC loopback err(%d)\n", err);
+       err = mlx5e_refresh_tirs(priv, true);
+       if (err)
                return err;
-       }
 
        lbtp->loopback_ok = false;
        init_completion(&lbtp->comp);
@@ -258,7 +255,7 @@ static void mlx5e_test_loopback_cleanup(struct mlx5e_priv *priv,
                                        struct mlx5e_lbt_priv *lbtp)
 {
        dev_remove_pack(&lbtp->pt);
-       mlx5e_refresh_tirs_self_loopback(priv->mdev, false);
+       mlx5e_refresh_tirs(priv, false);
 }
 
 #define MLX5E_LB_VERIFY_TIMEOUT (msecs_to_jiffies(200))
index 79481f4cf264320648d0ea9c335cf6a3b23faf01..9dec11c00a49b379bc0524ecfaebf72d775d3b2f 100644 (file)
 #include <net/tc_act/tc_mirred.h>
 #include <net/tc_act/tc_vlan.h>
 #include <net/tc_act/tc_tunnel_key.h>
+#include <net/tc_act/tc_pedit.h>
 #include <net/vxlan.h>
 #include "en.h"
 #include "en_tc.h"
 #include "eswitch.h"
 #include "vxlan.h"
 
+struct mlx5_nic_flow_attr {
+       u32 action;
+       u32 flow_tag;
+       u32 mod_hdr_id;
+};
+
 enum {
        MLX5E_TC_FLOW_ESWITCH   = BIT(0),
+       MLX5E_TC_FLOW_NIC       = BIT(1),
 };
 
 struct mlx5e_tc_flow {
@@ -58,7 +66,16 @@ struct mlx5e_tc_flow {
        u8                      flags;
        struct mlx5_flow_handle *rule;
        struct list_head        encap; /* flows sharing the same encap */
-       struct mlx5_esw_flow_attr *attr;
+       union {
+               struct mlx5_esw_flow_attr esw_attr[0];
+               struct mlx5_nic_flow_attr nic_attr[0];
+       };
+};
+
+struct mlx5e_tc_flow_parse_attr {
+       struct mlx5_flow_spec spec;
+       int num_mod_hdr_actions;
+       void *mod_hdr_actions;
 };
 
 enum {
@@ -71,24 +88,26 @@ enum {
 
 static struct mlx5_flow_handle *
 mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
-                     struct mlx5_flow_spec *spec,
-                     u32 action, u32 flow_tag)
+                     struct mlx5e_tc_flow_parse_attr *parse_attr,
+                     struct mlx5e_tc_flow *flow)
 {
+       struct mlx5_nic_flow_attr *attr = flow->nic_attr;
        struct mlx5_core_dev *dev = priv->mdev;
-       struct mlx5_flow_destination dest = { 0 };
+       struct mlx5_flow_destination dest = {};
        struct mlx5_flow_act flow_act = {
-               .action = action,
-               .flow_tag = flow_tag,
+               .action = attr->action,
+               .flow_tag = attr->flow_tag,
                .encap_id = 0,
        };
        struct mlx5_fc *counter = NULL;
        struct mlx5_flow_handle *rule;
        bool table_created = false;
+       int err;
 
-       if (action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
+       if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
                dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
                dest.ft = priv->fs.vlan.ft.t;
-       } else if (action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+       } else if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
                counter = mlx5_fc_create(dev, true);
                if (IS_ERR(counter))
                        return ERR_CAST(counter);
@@ -97,6 +116,19 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
                dest.counter = counter;
        }
 
+       if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
+               err = mlx5_modify_header_alloc(dev, MLX5_FLOW_NAMESPACE_KERNEL,
+                                              parse_attr->num_mod_hdr_actions,
+                                              parse_attr->mod_hdr_actions,
+                                              &attr->mod_hdr_id);
+               flow_act.modify_id = attr->mod_hdr_id;
+               kfree(parse_attr->mod_hdr_actions);
+               if (err) {
+                       rule = ERR_PTR(err);
+                       goto err_create_mod_hdr_id;
+               }
+       }
+
        if (IS_ERR_OR_NULL(priv->fs.tc.t)) {
                priv->fs.tc.t =
                        mlx5_create_auto_grouped_flow_table(priv->fs.ns,
@@ -114,8 +146,9 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
                table_created = true;
        }
 
-       spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
-       rule = mlx5_add_flow_rules(priv->fs.tc.t, spec, &flow_act, &dest, 1);
+       parse_attr->spec.match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+       rule = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec,
+                                  &flow_act, &dest, 1);
 
        if (IS_ERR(rule))
                goto err_add_rule;
@@ -128,28 +161,104 @@ err_add_rule:
                priv->fs.tc.t = NULL;
        }
 err_create_ft:
+       if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
+               mlx5_modify_header_dealloc(priv->mdev,
+                                          attr->mod_hdr_id);
+err_create_mod_hdr_id:
        mlx5_fc_destroy(dev, counter);
 
        return rule;
 }
 
+static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
+                                 struct mlx5e_tc_flow *flow)
+{
+       struct mlx5_fc *counter = NULL;
+
+       counter = mlx5_flow_rule_counter(flow->rule);
+       mlx5_del_flow_rules(flow->rule);
+       mlx5_fc_destroy(priv->mdev, counter);
+
+       if (!mlx5e_tc_num_filters(priv) && (priv->fs.tc.t)) {
+               mlx5_destroy_flow_table(priv->fs.tc.t);
+               priv->fs.tc.t = NULL;
+       }
+
+       if (flow->nic_attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
+               mlx5_modify_header_dealloc(priv->mdev,
+                                          flow->nic_attr->mod_hdr_id);
+}
+
+static void mlx5e_detach_encap(struct mlx5e_priv *priv,
+                              struct mlx5e_tc_flow *flow);
+
 static struct mlx5_flow_handle *
 mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
-                     struct mlx5_flow_spec *spec,
-                     struct mlx5_esw_flow_attr *attr)
+                     struct mlx5e_tc_flow_parse_attr *parse_attr,
+                     struct mlx5e_tc_flow *flow)
 {
        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+       struct mlx5_esw_flow_attr *attr = flow->esw_attr;
+       struct mlx5_flow_handle *rule;
        int err;
 
        err = mlx5_eswitch_add_vlan_action(esw, attr);
-       if (err)
-               return ERR_PTR(err);
+       if (err) {
+               rule = ERR_PTR(err);
+               goto err_add_vlan;
+       }
+
+       if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
+               err = mlx5_modify_header_alloc(priv->mdev, MLX5_FLOW_NAMESPACE_FDB,
+                                              parse_attr->num_mod_hdr_actions,
+                                              parse_attr->mod_hdr_actions,
+                                              &attr->mod_hdr_id);
+               kfree(parse_attr->mod_hdr_actions);
+               if (err) {
+                       rule = ERR_PTR(err);
+                       goto err_mod_hdr;
+               }
+       }
+
+       rule = mlx5_eswitch_add_offloaded_rule(esw, &parse_attr->spec, attr);
+       if (IS_ERR(rule))
+               goto err_add_rule;
+
+       return rule;
+
+err_add_rule:
+       if (flow->esw_attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
+               mlx5_modify_header_dealloc(priv->mdev,
+                                          attr->mod_hdr_id);
+err_mod_hdr:
+       mlx5_eswitch_del_vlan_action(esw, attr);
+err_add_vlan:
+       if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
+               mlx5e_detach_encap(priv, flow);
+       return rule;
+}
+
+static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
+                                 struct mlx5e_tc_flow *flow)
+{
+       struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+       struct mlx5_esw_flow_attr *attr = flow->esw_attr;
+
+       mlx5_eswitch_del_offloaded_rule(esw, flow->rule, flow->esw_attr);
+
+       mlx5_eswitch_del_vlan_action(esw, flow->esw_attr);
 
-       return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
+       if (flow->esw_attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
+               mlx5e_detach_encap(priv, flow);
+
+       if (flow->esw_attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
+               mlx5_modify_header_dealloc(priv->mdev,
+                                          attr->mod_hdr_id);
 }
 
 static void mlx5e_detach_encap(struct mlx5e_priv *priv,
-                              struct mlx5e_tc_flow *flow) {
+                              struct mlx5e_tc_flow *flow)
+{
        struct list_head *next = flow->encap.next;
 
        list_del(&flow->encap);
@@ -166,32 +275,13 @@ static void mlx5e_detach_encap(struct mlx5e_priv *priv,
        }
 }
 
-/* we get here also when setting rule to the FW failed, etc. It means that the
- * flow rule itself might not exist, but some offloading related to the actions
- * should be cleaned.
- */
 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
                              struct mlx5e_tc_flow *flow)
 {
-       struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-       struct mlx5_fc *counter = NULL;
-
-       if (!IS_ERR(flow->rule)) {
-               counter = mlx5_flow_rule_counter(flow->rule);
-               mlx5_del_flow_rules(flow->rule);
-               mlx5_fc_destroy(priv->mdev, counter);
-       }
-
-       if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
-               mlx5_eswitch_del_vlan_action(esw, flow->attr);
-               if (flow->attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
-                       mlx5e_detach_encap(priv, flow);
-       }
-
-       if (!mlx5e_tc_num_filters(priv) && (priv->fs.tc.t)) {
-               mlx5_destroy_flow_table(priv->fs.tc.t);
-               priv->fs.tc.t = NULL;
-       }
+       if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
+               mlx5e_tc_del_fdb_flow(priv, flow);
+       else
+               mlx5e_tc_del_nic_flow(priv, flow);
 }
 
 static void parse_vxlan_attr(struct mlx5_flow_spec *spec,
@@ -248,12 +338,15 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
                        skb_flow_dissector_target(f->dissector,
                                                  FLOW_DISSECTOR_KEY_ENC_PORTS,
                                                  f->mask);
+               struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+               struct net_device *up_dev = mlx5_eswitch_get_uplink_netdev(esw);
+               struct mlx5e_priv *up_priv = netdev_priv(up_dev);
 
                /* Full udp dst port must be given */
                if (memchr_inv(&mask->dst, 0xff, sizeof(mask->dst)))
                        goto vxlan_match_offload_err;
 
-               if (mlx5e_vxlan_lookup_port(priv, be16_to_cpu(key->dst)) &&
+               if (mlx5e_vxlan_lookup_port(up_priv, be16_to_cpu(key->dst)) &&
                    MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap))
                        parse_vxlan_attr(spec, f);
                else {
@@ -628,29 +721,313 @@ static int parse_cls_flower(struct mlx5e_priv *priv,
        return err;
 }
 
+struct pedit_headers {
+       struct ethhdr  eth;
+       struct iphdr   ip4;
+       struct ipv6hdr ip6;
+       struct tcphdr  tcp;
+       struct udphdr  udp;
+};
+
+static int pedit_header_offsets[] = {
+       [TCA_PEDIT_KEY_EX_HDR_TYPE_ETH] = offsetof(struct pedit_headers, eth),
+       [TCA_PEDIT_KEY_EX_HDR_TYPE_IP4] = offsetof(struct pedit_headers, ip4),
+       [TCA_PEDIT_KEY_EX_HDR_TYPE_IP6] = offsetof(struct pedit_headers, ip6),
+       [TCA_PEDIT_KEY_EX_HDR_TYPE_TCP] = offsetof(struct pedit_headers, tcp),
+       [TCA_PEDIT_KEY_EX_HDR_TYPE_UDP] = offsetof(struct pedit_headers, udp),
+};
+
+#define pedit_header(_ph, _htype) ((void *)(_ph) + pedit_header_offsets[_htype])
+
+static int set_pedit_val(u8 hdr_type, u32 mask, u32 val, u32 offset,
+                        struct pedit_headers *masks,
+                        struct pedit_headers *vals)
+{
+       u32 *curr_pmask, *curr_pval;
+
+       if (hdr_type >= __PEDIT_HDR_TYPE_MAX)
+               goto out_err;
+
+       curr_pmask = (u32 *)(pedit_header(masks, hdr_type) + offset);
+       curr_pval  = (u32 *)(pedit_header(vals, hdr_type) + offset);
+
+       if (*curr_pmask & mask)  /* disallow acting twice on the same location */
+               goto out_err;
+
+       *curr_pmask |= mask;
+       *curr_pval  |= (val & mask);
+
+       return 0;
+
+out_err:
+       return -EOPNOTSUPP;
+}
+
+struct mlx5_fields {
+       u8  field;
+       u8  size;
+       u32 offset;
+};
+
+static struct mlx5_fields fields[] = {
+       {MLX5_ACTION_IN_FIELD_OUT_DMAC_47_16, 4, offsetof(struct pedit_headers, eth.h_dest[0])},
+       {MLX5_ACTION_IN_FIELD_OUT_DMAC_15_0,  2, offsetof(struct pedit_headers, eth.h_dest[4])},
+       {MLX5_ACTION_IN_FIELD_OUT_SMAC_47_16, 4, offsetof(struct pedit_headers, eth.h_source[0])},
+       {MLX5_ACTION_IN_FIELD_OUT_SMAC_15_0,  2, offsetof(struct pedit_headers, eth.h_source[4])},
+       {MLX5_ACTION_IN_FIELD_OUT_ETHERTYPE,  2, offsetof(struct pedit_headers, eth.h_proto)},
+
+       {MLX5_ACTION_IN_FIELD_OUT_IP_DSCP, 1, offsetof(struct pedit_headers, ip4.tos)},
+       {MLX5_ACTION_IN_FIELD_OUT_IP_TTL,  1, offsetof(struct pedit_headers, ip4.ttl)},
+       {MLX5_ACTION_IN_FIELD_OUT_SIPV4,   4, offsetof(struct pedit_headers, ip4.saddr)},
+       {MLX5_ACTION_IN_FIELD_OUT_DIPV4,   4, offsetof(struct pedit_headers, ip4.daddr)},
+
+       {MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96, 4, offsetof(struct pedit_headers, ip6.saddr.s6_addr32[0])},
+       {MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64,  4, offsetof(struct pedit_headers, ip6.saddr.s6_addr32[1])},
+       {MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32,  4, offsetof(struct pedit_headers, ip6.saddr.s6_addr32[2])},
+       {MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0,   4, offsetof(struct pedit_headers, ip6.saddr.s6_addr32[3])},
+       {MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96, 4, offsetof(struct pedit_headers, ip6.daddr.s6_addr32[0])},
+       {MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64,  4, offsetof(struct pedit_headers, ip6.daddr.s6_addr32[1])},
+       {MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32,  4, offsetof(struct pedit_headers, ip6.daddr.s6_addr32[2])},
+       {MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0,   4, offsetof(struct pedit_headers, ip6.daddr.s6_addr32[3])},
+
+       {MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT, 2, offsetof(struct pedit_headers, tcp.source)},
+       {MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT, 2, offsetof(struct pedit_headers, tcp.dest)},
+       {MLX5_ACTION_IN_FIELD_OUT_TCP_FLAGS, 1, offsetof(struct pedit_headers, tcp.ack_seq) + 5},
+
+       {MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT, 2, offsetof(struct pedit_headers, udp.source)},
+       {MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT, 2, offsetof(struct pedit_headers, udp.dest)},
+};
+
+/* On input attr->num_mod_hdr_actions tells how many HW actions can be parsed at
+ * max from the SW pedit action. On success, it says how many HW actions were
+ * actually parsed.
+ */
+static int offload_pedit_fields(struct pedit_headers *masks,
+                               struct pedit_headers *vals,
+                               struct mlx5e_tc_flow_parse_attr *parse_attr)
+{
+       struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals;
+       int i, action_size, nactions, max_actions, first, last;
+       void *s_masks_p, *a_masks_p, *vals_p;
+       u32 s_mask, a_mask, val;
+       struct mlx5_fields *f;
+       u8 cmd, field_bsize;
+       unsigned long mask;
+       void *action;
+
+       set_masks = &masks[TCA_PEDIT_KEY_EX_CMD_SET];
+       add_masks = &masks[TCA_PEDIT_KEY_EX_CMD_ADD];
+       set_vals = &vals[TCA_PEDIT_KEY_EX_CMD_SET];
+       add_vals = &vals[TCA_PEDIT_KEY_EX_CMD_ADD];
+
+       action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto);
+       action = parse_attr->mod_hdr_actions;
+       max_actions = parse_attr->num_mod_hdr_actions;
+       nactions = 0;
+
+       for (i = 0; i < ARRAY_SIZE(fields); i++) {
+               f = &fields[i];
+               /* avoid seeing bits set from previous iterations */
+               s_mask = a_mask = mask = val = 0;
+
+               s_masks_p = (void *)set_masks + f->offset;
+               a_masks_p = (void *)add_masks + f->offset;
+
+               memcpy(&s_mask, s_masks_p, f->size);
+               memcpy(&a_mask, a_masks_p, f->size);
+
+               if (!s_mask && !a_mask) /* nothing to offload here */
+                       continue;
+
+               if (s_mask && a_mask) {
+                       printk(KERN_WARNING "mlx5: can't set and add to the same HW field (%x)\n", f->field);
+                       return -EOPNOTSUPP;
+               }
+
+               if (nactions == max_actions) {
+                       printk(KERN_WARNING "mlx5: parsed %d pedit actions, can't do more\n", nactions);
+                       return -EOPNOTSUPP;
+               }
+
+               if (s_mask) {
+                       cmd  = MLX5_ACTION_TYPE_SET;
+                       mask = s_mask;
+                       vals_p = (void *)set_vals + f->offset;
+                       /* clear to denote we consumed this field */
+                       memset(s_masks_p, 0, f->size);
+               } else {
+                       cmd  = MLX5_ACTION_TYPE_ADD;
+                       mask = a_mask;
+                       vals_p = (void *)add_vals + f->offset;
+                       /* clear to denote we consumed this field */
+                       memset(a_masks_p, 0, f->size);
+               }
+
+               memcpy(&val, vals_p, f->size);
+
+               field_bsize = f->size * BITS_PER_BYTE;
+               first = find_first_bit(&mask, field_bsize);
+               last  = find_last_bit(&mask, field_bsize);
+               if (first > 0 || last != (field_bsize - 1)) {
+                       printk(KERN_WARNING "mlx5: partial rewrite (mask %lx) is currently not offloaded\n",
+                              mask);
+                       return -EOPNOTSUPP;
+               }
+
+               MLX5_SET(set_action_in, action, action_type, cmd);
+               MLX5_SET(set_action_in, action, field, f->field);
+
+               if (cmd == MLX5_ACTION_TYPE_SET) {
+                       MLX5_SET(set_action_in, action, offset, 0);
+                       /* length is num of bits to be written, zero means length of 32 */
+                       MLX5_SET(set_action_in, action, length, field_bsize);
+               }
+
+               if (field_bsize == 32)
+                       MLX5_SET(set_action_in, action, data, ntohl(val));
+               else if (field_bsize == 16)
+                       MLX5_SET(set_action_in, action, data, ntohs(val));
+               else if (field_bsize == 8)
+                       MLX5_SET(set_action_in, action, data, val);
+
+               action += action_size;
+               nactions++;
+       }
+
+       parse_attr->num_mod_hdr_actions = nactions;
+       return 0;
+}
+
+static int alloc_mod_hdr_actions(struct mlx5e_priv *priv,
+                                const struct tc_action *a, int namespace,
+                                struct mlx5e_tc_flow_parse_attr *parse_attr)
+{
+       int nkeys, action_size, max_actions;
+
+       nkeys = tcf_pedit_nkeys(a);
+       action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto);
+
+       if (namespace == MLX5_FLOW_NAMESPACE_FDB) /* FDB offloading */
+               max_actions = MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, max_modify_header_actions);
+       else /* namespace is MLX5_FLOW_NAMESPACE_KERNEL - NIC offloading */
+               max_actions = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, max_modify_header_actions);
+
+       /* can get up to crazingly 16 HW actions in 32 bits pedit SW key */
+       max_actions = min(max_actions, nkeys * 16);
+
+       parse_attr->mod_hdr_actions = kcalloc(max_actions, action_size, GFP_KERNEL);
+       if (!parse_attr->mod_hdr_actions)
+               return -ENOMEM;
+
+       parse_attr->num_mod_hdr_actions = max_actions;
+       return 0;
+}
+
+static const struct pedit_headers zero_masks = {};
+
+static int parse_tc_pedit_action(struct mlx5e_priv *priv,
+                                const struct tc_action *a, int namespace,
+                                struct mlx5e_tc_flow_parse_attr *parse_attr)
+{
+       struct pedit_headers masks[__PEDIT_CMD_MAX], vals[__PEDIT_CMD_MAX], *cmd_masks;
+       int nkeys, i, err = -EOPNOTSUPP;
+       u32 mask, val, offset;
+       u8 cmd, htype;
+
+       nkeys = tcf_pedit_nkeys(a);
+
+       memset(masks, 0, sizeof(struct pedit_headers) * __PEDIT_CMD_MAX);
+       memset(vals,  0, sizeof(struct pedit_headers) * __PEDIT_CMD_MAX);
+
+       for (i = 0; i < nkeys; i++) {
+               htype = tcf_pedit_htype(a, i);
+               cmd = tcf_pedit_cmd(a, i);
+               err = -EOPNOTSUPP; /* can't be all optimistic */
+
+               if (htype == TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK) {
+                       printk(KERN_WARNING "mlx5: legacy pedit isn't offloaded\n");
+                       goto out_err;
+               }
+
+               if (cmd != TCA_PEDIT_KEY_EX_CMD_SET && cmd != TCA_PEDIT_KEY_EX_CMD_ADD) {
+                       printk(KERN_WARNING "mlx5: pedit cmd %d isn't offloaded\n", cmd);
+                       goto out_err;
+               }
+
+               mask = tcf_pedit_mask(a, i);
+               val = tcf_pedit_val(a, i);
+               offset = tcf_pedit_offset(a, i);
+
+               err = set_pedit_val(htype, ~mask, val, offset, &masks[cmd], &vals[cmd]);
+               if (err)
+                       goto out_err;
+       }
+
+       err = alloc_mod_hdr_actions(priv, a, namespace, parse_attr);
+       if (err)
+               goto out_err;
+
+       err = offload_pedit_fields(masks, vals, parse_attr);
+       if (err < 0)
+               goto out_dealloc_parsed_actions;
+
+       for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) {
+               cmd_masks = &masks[cmd];
+               if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) {
+                       printk(KERN_WARNING "mlx5: attempt to offload an unsupported field (cmd %d)\n",
+                              cmd);
+                       print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS,
+                                      16, 1, cmd_masks, sizeof(zero_masks), true);
+                       err = -EOPNOTSUPP;
+                       goto out_dealloc_parsed_actions;
+               }
+       }
+
+       return 0;
+
+out_dealloc_parsed_actions:
+       kfree(parse_attr->mod_hdr_actions);
+out_err:
+       return err;
+}
+
 static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
-                               u32 *action, u32 *flow_tag)
+                               struct mlx5e_tc_flow_parse_attr *parse_attr,
+                               struct mlx5e_tc_flow *flow)
 {
+       struct mlx5_nic_flow_attr *attr = flow->nic_attr;
        const struct tc_action *a;
        LIST_HEAD(actions);
+       int err;
 
        if (tc_no_actions(exts))
                return -EINVAL;
 
-       *flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
-       *action = 0;
+       attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
+       attr->action = 0;
 
        tcf_exts_to_list(exts, &actions);
        list_for_each_entry(a, &actions, list) {
                /* Only support a single action per rule */
-               if (*action)
+               if (attr->action)
                        return -EINVAL;
 
                if (is_tcf_gact_shot(a)) {
-                       *action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
+                       attr->action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
                        if (MLX5_CAP_FLOWTABLE(priv->mdev,
                                               flow_table_properties_nic_receive.flow_counter))
-                               *action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
+                               attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
+                       continue;
+               }
+
+               if (is_tcf_pedit(a)) {
+                       err = parse_tc_pedit_action(priv, a, MLX5_FLOW_NAMESPACE_KERNEL,
+                                                   parse_attr);
+                       if (err)
+                               return err;
+
+                       attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
+                                       MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
                        continue;
                }
 
@@ -663,8 +1040,8 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
                                return -EINVAL;
                        }
 
-                       *flow_tag = mark;
-                       *action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+                       attr->flow_tag = mark;
+                       attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
                        continue;
                }
 
@@ -976,6 +1353,8 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv,
                              struct mlx5_esw_flow_attr *attr)
 {
        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+       struct net_device *up_dev = mlx5_eswitch_get_uplink_netdev(esw);
+       struct mlx5e_priv *up_priv = netdev_priv(up_dev);
        unsigned short family = ip_tunnel_info_af(tun_info);
        struct ip_tunnel_key *key = &tun_info->key;
        struct mlx5_encap_entry *e;
@@ -996,7 +1375,7 @@ vxlan_encap_offload_err:
                return -EOPNOTSUPP;
        }
 
-       if (mlx5e_vxlan_lookup_port(priv, be16_to_cpu(key->tp_dst)) &&
+       if (mlx5e_vxlan_lookup_port(up_priv, be16_to_cpu(key->tp_dst)) &&
            MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) {
                tunnel_type = MLX5_HEADER_TYPE_VXLAN;
        } else {
@@ -1047,9 +1426,10 @@ out_err:
 }
 
 static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
+                               struct mlx5e_tc_flow_parse_attr *parse_attr,
                                struct mlx5e_tc_flow *flow)
 {
-       struct mlx5_esw_flow_attr *attr = flow->attr;
+       struct mlx5_esw_flow_attr *attr = flow->esw_attr;
        struct ip_tunnel_info *info = NULL;
        const struct tc_action *a;
        LIST_HEAD(actions);
@@ -1070,6 +1450,16 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
                        continue;
                }
 
+               if (is_tcf_pedit(a)) {
+                       err = parse_tc_pedit_action(priv, a, MLX5_FLOW_NAMESPACE_FDB,
+                                                   parse_attr);
+                       if (err)
+                               return err;
+
+                       attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+                       continue;
+               }
+
                if (is_tcf_mirred_egress_redirect(a)) {
                        int ifindex = tcf_mirred_ifindex(a);
                        struct net_device *out_dev;
@@ -1112,14 +1502,16 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
                }
 
                if (is_tcf_vlan(a)) {
-                       if (tcf_vlan_action(a) == VLAN_F_POP) {
+                       if (tcf_vlan_action(a) == TCA_VLAN_ACT_POP) {
                                attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
-                       } else if (tcf_vlan_action(a) == VLAN_F_PUSH) {
+                       } else if (tcf_vlan_action(a) == TCA_VLAN_ACT_PUSH) {
                                if (tcf_vlan_push_proto(a) != htons(ETH_P_8021Q))
                                        return -EOPNOTSUPP;
 
                                attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
                                attr->vlan = tcf_vlan_push_vid(a);
+                       } else { /* action is TCA_VLAN_ACT_MODIFY */
+                               return -EOPNOTSUPP;
                        }
                        continue;
                }
@@ -1137,22 +1529,24 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
 int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
                           struct tc_cls_flower_offload *f)
 {
+       struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+       struct mlx5e_tc_flow_parse_attr *parse_attr;
        struct mlx5e_tc_table *tc = &priv->fs.tc;
-       int err, attr_size = 0;
-       u32 flow_tag, action;
        struct mlx5e_tc_flow *flow;
-       struct mlx5_flow_spec *spec;
-       struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+       int attr_size, err = 0;
        u8 flow_flags = 0;
 
        if (esw && esw->mode == SRIOV_OFFLOADS) {
                flow_flags = MLX5E_TC_FLOW_ESWITCH;
                attr_size  = sizeof(struct mlx5_esw_flow_attr);
+       } else {
+               flow_flags = MLX5E_TC_FLOW_NIC;
+               attr_size  = sizeof(struct mlx5_nic_flow_attr);
        }
 
        flow = kzalloc(sizeof(*flow) + attr_size, GFP_KERNEL);
-       spec = mlx5_vzalloc(sizeof(*spec));
-       if (!spec || !flow) {
+       parse_attr = mlx5_vzalloc(sizeof(*parse_attr));
+       if (!parse_attr || !flow) {
                err = -ENOMEM;
                goto err_free;
        }
@@ -1160,26 +1554,25 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
        flow->cookie = f->cookie;
        flow->flags = flow_flags;
 
-       err = parse_cls_flower(priv, flow, spec, f);
+       err = parse_cls_flower(priv, flow, &parse_attr->spec, f);
        if (err < 0)
                goto err_free;
 
        if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
-               flow->attr  = (struct mlx5_esw_flow_attr *)(flow + 1);
-               err = parse_tc_fdb_actions(priv, f->exts, flow);
+               err = parse_tc_fdb_actions(priv, f->exts, parse_attr, flow);
                if (err < 0)
                        goto err_free;
-               flow->rule = mlx5e_tc_add_fdb_flow(priv, spec, flow->attr);
+               flow->rule = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow);
        } else {
-               err = parse_tc_nic_actions(priv, f->exts, &action, &flow_tag);
+               err = parse_tc_nic_actions(priv, f->exts, parse_attr, flow);
                if (err < 0)
                        goto err_free;
-               flow->rule = mlx5e_tc_add_nic_flow(priv, spec, action, flow_tag);
+               flow->rule = mlx5e_tc_add_nic_flow(priv, parse_attr, flow);
        }
 
        if (IS_ERR(flow->rule)) {
                err = PTR_ERR(flow->rule);
-               goto err_del_rule;
+               goto err_free;
        }
 
        err = rhashtable_insert_fast(&tc->ht, &flow->node,
@@ -1195,7 +1588,7 @@ err_del_rule:
 err_free:
        kfree(flow);
 out:
-       kvfree(spec);
+       kvfree(parse_attr);
        return err;
 }
 
index f193128bac4b8c18504ec1f5905def3baa5c4633..5bbc313e70c553e51b6e0fe806e82c3a9dba2e44 100644 (file)
 #define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\
                            MLX5E_SQ_NOPS_ROOM)
 
-void mlx5e_send_nop(struct mlx5e_sq *sq, bool notify_hw)
-{
-       struct mlx5_wq_cyc                *wq  = &sq->wq;
-
-       u16 pi = sq->pc & wq->sz_m1;
-       struct mlx5e_tx_wqe              *wqe  = mlx5_wq_cyc_get_wqe(wq, pi);
-
-       struct mlx5_wqe_ctrl_seg         *cseg = &wqe->ctrl;
-
-       memset(cseg, 0, sizeof(*cseg));
-
-       cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_NOP);
-       cseg->qpn_ds           = cpu_to_be32((sq->sqn << 8) | 0x01);
-
-       sq->pc++;
-       sq->stats.nop++;
-
-       if (notify_hw) {
-               cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
-               mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0);
-       }
-}
-
 static inline void mlx5e_tx_dma_unmap(struct device *pdev,
                                      struct mlx5e_sq_dma *dma)
 {
@@ -76,25 +53,25 @@ static inline void mlx5e_tx_dma_unmap(struct device *pdev,
        }
 }
 
-static inline void mlx5e_dma_push(struct mlx5e_sq *sq,
+static inline void mlx5e_dma_push(struct mlx5e_txqsq *sq,
                                  dma_addr_t addr,
                                  u32 size,
                                  enum mlx5e_dma_map_type map_type)
 {
        u32 i = sq->dma_fifo_pc & sq->dma_fifo_mask;
 
-       sq->db.txq.dma_fifo[i].addr = addr;
-       sq->db.txq.dma_fifo[i].size = size;
-       sq->db.txq.dma_fifo[i].type = map_type;
+       sq->db.dma_fifo[i].addr = addr;
+       sq->db.dma_fifo[i].size = size;
+       sq->db.dma_fifo[i].type = map_type;
        sq->dma_fifo_pc++;
 }
 
-static inline struct mlx5e_sq_dma *mlx5e_dma_get(struct mlx5e_sq *sq, u32 i)
+static inline struct mlx5e_sq_dma *mlx5e_dma_get(struct mlx5e_txqsq *sq, u32 i)
 {
-       return &sq->db.txq.dma_fifo[i & sq->dma_fifo_mask];
+       return &sq->db.dma_fifo[i & sq->dma_fifo_mask];
 }
 
-static void mlx5e_dma_unmap_wqe_err(struct mlx5e_sq *sq, u8 num_dma)
+static void mlx5e_dma_unmap_wqe_err(struct mlx5e_txqsq *sq, u8 num_dma)
 {
        int i;
 
@@ -111,6 +88,7 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
 {
        struct mlx5e_priv *priv = netdev_priv(dev);
        int channel_ix = fallback(dev, skb);
+       u16 num_channels;
        int up = 0;
 
        if (!netdev_get_num_tc(dev))
@@ -122,11 +100,11 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
        /* channel_ix can be larger than num_channels since
         * dev->num_real_tx_queues = num_channels * num_tc
         */
-       if (channel_ix >= priv->params.num_channels)
-               channel_ix = reciprocal_scale(channel_ix,
-                                             priv->params.num_channels);
+       num_channels = priv->channels.params.num_channels;
+       if (channel_ix >= num_channels)
+               channel_ix = reciprocal_scale(channel_ix, num_channels);
 
-       return priv->channeltc_to_txq_map[channel_ix][up];
+       return priv->channel_tc2txq[channel_ix][up];
 }
 
 static inline int mlx5e_skb_l2_header_offset(struct sk_buff *skb)
@@ -175,25 +153,6 @@ static inline unsigned int mlx5e_calc_min_inline(enum mlx5_inline_modes mode,
        }
 }
 
-static inline u16 mlx5e_get_inline_hdr_size(struct mlx5e_sq *sq,
-                                           struct sk_buff *skb, bool bf)
-{
-       /* Some NIC TX decisions, e.g loopback, are based on the packet
-        * headers and occur before the data gather.
-        * Therefore these headers must be copied into the WQE
-        */
-       if (bf) {
-               u16 ihs = skb_headlen(skb);
-
-               if (skb_vlan_tag_present(skb))
-                       ihs += VLAN_HLEN;
-
-               if (ihs <= sq->max_inline)
-                       return skb_headlen(skb);
-       }
-       return mlx5e_calc_min_inline(sq->min_inline_mode, skb);
-}
-
 static inline void mlx5e_tx_skb_pull_inline(unsigned char **skb_data,
                                            unsigned int *skb_len,
                                            unsigned int len)
@@ -218,13 +177,13 @@ static inline void mlx5e_insert_vlan(void *start, struct sk_buff *skb, u16 ihs,
        mlx5e_tx_skb_pull_inline(skb_data, skb_len, cpy2_sz);
 }
 
-static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb)
+static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb)
 {
        struct mlx5_wq_cyc       *wq   = &sq->wq;
 
        u16 pi = sq->pc & wq->sz_m1;
        struct mlx5e_tx_wqe      *wqe  = mlx5_wq_cyc_get_wqe(wq, pi);
-       struct mlx5e_tx_wqe_info *wi   = &sq->db.txq.wqe_info[pi];
+       struct mlx5e_tx_wqe_info *wi   = &sq->db.wqe_info[pi];
 
        struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
        struct mlx5_wqe_eth_seg  *eseg = &wqe->eth;
@@ -235,7 +194,6 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb)
        u8  opcode = MLX5_OPCODE_SEND;
        dma_addr_t dma_addr = 0;
        unsigned int num_bytes;
-       bool bf = false;
        u16 headlen;
        u16 ds_cnt;
        u16 ihs;
@@ -255,11 +213,6 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb)
        } else
                sq->stats.csum_none++;
 
-       if (sq->cc != sq->prev_cc) {
-               sq->prev_cc = sq->cc;
-               sq->bf_budget = (sq->cc == sq->pc) ? MLX5E_SQ_BF_BUDGET : 0;
-       }
-
        if (skb_is_gso(skb)) {
                eseg->mss    = cpu_to_be16(skb_shinfo(skb)->gso_size);
                opcode       = MLX5_OPCODE_LSO;
@@ -274,15 +227,15 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb)
                        sq->stats.tso_bytes += skb->len - ihs;
                }
 
+               sq->stats.packets += skb_shinfo(skb)->gso_segs;
                num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs;
        } else {
-               bf = sq->bf_budget &&
-                    !skb->xmit_more &&
-                    !skb_shinfo(skb)->nr_frags;
-               ihs = mlx5e_get_inline_hdr_size(sq, skb, bf);
+               ihs = mlx5e_calc_min_inline(sq->min_inline_mode, skb);
+               sq->stats.packets++;
                num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN);
        }
 
+       sq->stats.bytes += num_bytes;
        wi->num_bytes = num_bytes;
 
        ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS;
@@ -346,7 +299,7 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb)
        cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode);
        cseg->qpn_ds           = cpu_to_be32((sq->sqn << 8) | ds_cnt);
 
-       sq->db.txq.skb[pi] = skb;
+       sq->db.skb[pi] = skb;
 
        wi->num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
        sq->pc += wi->num_wqebbs;
@@ -356,33 +309,23 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb)
        if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
                skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
 
-       if (unlikely(!mlx5e_sq_has_room_for(sq, MLX5E_SQ_STOP_ROOM))) {
+       if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc,
+                                            MLX5E_SQ_STOP_ROOM))) {
                netif_tx_stop_queue(sq->txq);
                sq->stats.stopped++;
        }
 
        sq->stats.xmit_more += skb->xmit_more;
-       if (!skb->xmit_more || netif_xmit_stopped(sq->txq)) {
-               int bf_sz = 0;
-
-               if (bf && test_bit(MLX5E_SQ_STATE_BF_ENABLE, &sq->state))
-                       bf_sz = wi->num_wqebbs << 3;
-
-               cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
-               mlx5e_tx_notify_hw(sq, &wqe->ctrl, bf_sz);
-       }
+       if (!skb->xmit_more || netif_xmit_stopped(sq->txq))
+               mlx5e_notify_hw(wq, sq->pc, sq->uar_map, cseg);
 
        /* fill sq edge with nops to avoid wqe wrap around */
        while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) {
-               sq->db.txq.skb[pi] = NULL;
-               mlx5e_send_nop(sq, false);
+               sq->db.skb[pi] = NULL;
+               mlx5e_post_nop(&sq->wq, sq->sqn, &sq->pc);
+               sq->stats.nop++;
        }
 
-       if (bf)
-               sq->bf_budget--;
-
-       sq->stats.packets++;
-       sq->stats.bytes += num_bytes;
        return NETDEV_TX_OK;
 
 dma_unmap_wqe_err:
@@ -397,21 +340,21 @@ dma_unmap_wqe_err:
 netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
 {
        struct mlx5e_priv *priv = netdev_priv(dev);
-       struct mlx5e_sq *sq = priv->txq_to_sq_map[skb_get_queue_mapping(skb)];
+       struct mlx5e_txqsq *sq = priv->txq2sq[skb_get_queue_mapping(skb)];
 
        return mlx5e_sq_xmit(sq, skb);
 }
 
 bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
 {
-       struct mlx5e_sq *sq;
+       struct mlx5e_txqsq *sq;
        u32 dma_fifo_cc;
        u32 nbytes;
        u16 npkts;
        u16 sqcc;
        int i;
 
-       sq = container_of(cq, struct mlx5e_sq, cq);
+       sq = container_of(cq, struct mlx5e_txqsq, cq);
 
        if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)))
                return false;
@@ -449,8 +392,8 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
                        last_wqe = (sqcc == wqe_counter);
 
                        ci = sqcc & sq->wq.sz_m1;
-                       skb = sq->db.txq.skb[ci];
-                       wi = &sq->db.txq.wqe_info[ci];
+                       skb = sq->db.skb[ci];
+                       wi = &sq->db.wqe_info[ci];
 
                        if (unlikely(!skb)) { /* nop */
                                sqcc++;
@@ -491,7 +434,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
        netdev_tx_completed_queue(sq->txq, npkts, nbytes);
 
        if (netif_tx_queue_stopped(sq->txq) &&
-           mlx5e_sq_has_room_for(sq, MLX5E_SQ_STOP_ROOM)) {
+           mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, MLX5E_SQ_STOP_ROOM)) {
                netif_tx_wake_queue(sq->txq);
                sq->stats.wake++;
        }
@@ -499,7 +442,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
        return (i == MLX5E_TX_CQ_POLL_BUDGET);
 }
 
-static void mlx5e_free_txq_sq_descs(struct mlx5e_sq *sq)
+void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq)
 {
        struct mlx5e_tx_wqe_info *wi;
        struct sk_buff *skb;
@@ -508,8 +451,8 @@ static void mlx5e_free_txq_sq_descs(struct mlx5e_sq *sq)
 
        while (sq->cc != sq->pc) {
                ci = sq->cc & sq->wq.sz_m1;
-               skb = sq->db.txq.skb[ci];
-               wi = &sq->db.txq.wqe_info[ci];
+               skb = sq->db.skb[ci];
+               wi = &sq->db.wqe_info[ci];
 
                if (!skb) { /* nop */
                        sq->cc++;
@@ -527,37 +470,3 @@ static void mlx5e_free_txq_sq_descs(struct mlx5e_sq *sq)
                sq->cc += wi->num_wqebbs;
        }
 }
-
-static void mlx5e_free_xdp_sq_descs(struct mlx5e_sq *sq)
-{
-       struct mlx5e_sq_wqe_info *wi;
-       struct mlx5e_dma_info *di;
-       u16 ci;
-
-       while (sq->cc != sq->pc) {
-               ci = sq->cc & sq->wq.sz_m1;
-               di = &sq->db.xdp.di[ci];
-               wi = &sq->db.xdp.wqe_info[ci];
-
-               if (wi->opcode == MLX5_OPCODE_NOP) {
-                       sq->cc++;
-                       continue;
-               }
-
-               sq->cc += wi->num_wqebbs;
-
-               mlx5e_page_release(&sq->channel->rq, di, false);
-       }
-}
-
-void mlx5e_free_sq_descs(struct mlx5e_sq *sq)
-{
-       switch (sq->type) {
-       case MLX5E_SQ_TXQ:
-               mlx5e_free_txq_sq_descs(sq);
-               break;
-       case MLX5E_SQ_XDP:
-               mlx5e_free_xdp_sq_descs(sq);
-               break;
-       }
-}
index e5c12a732aa1212274943183ed83696ce2606639..43729ec35dfca585e4827175051671c2a662abcc 100644 (file)
@@ -44,14 +44,14 @@ struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq)
                return NULL;
 
        /* ensure cqe content is read after cqe ownership bit */
-       rmb();
+       dma_rmb();
 
        return cqe;
 }
 
 static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq)
 {
-       struct mlx5e_sq *sq = container_of(cq, struct mlx5e_sq, cq);
+       struct mlx5e_icosq *sq = container_of(cq, struct mlx5e_icosq, cq);
        struct mlx5_wq_cyc *wq;
        struct mlx5_cqe64 *cqe;
        u16 sqcc;
@@ -105,66 +105,6 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq)
        sq->cc = sqcc;
 }
 
-static inline bool mlx5e_poll_xdp_tx_cq(struct mlx5e_cq *cq)
-{
-       struct mlx5e_sq *sq;
-       u16 sqcc;
-       int i;
-
-       sq = container_of(cq, struct mlx5e_sq, cq);
-
-       if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)))
-               return false;
-
-       /* sq->cc must be updated only after mlx5_cqwq_update_db_record(),
-        * otherwise a cq overrun may occur
-        */
-       sqcc = sq->cc;
-
-       for (i = 0; i < MLX5E_TX_CQ_POLL_BUDGET; i++) {
-               struct mlx5_cqe64 *cqe;
-               u16 wqe_counter;
-               bool last_wqe;
-
-               cqe = mlx5e_get_cqe(cq);
-               if (!cqe)
-                       break;
-
-               mlx5_cqwq_pop(&cq->wq);
-
-               wqe_counter = be16_to_cpu(cqe->wqe_counter);
-
-               do {
-                       struct mlx5e_sq_wqe_info *wi;
-                       struct mlx5e_dma_info *di;
-                       u16 ci;
-
-                       last_wqe = (sqcc == wqe_counter);
-
-                       ci = sqcc & sq->wq.sz_m1;
-                       di = &sq->db.xdp.di[ci];
-                       wi = &sq->db.xdp.wqe_info[ci];
-
-                       if (unlikely(wi->opcode == MLX5_OPCODE_NOP)) {
-                               sqcc++;
-                               continue;
-                       }
-
-                       sqcc += wi->num_wqebbs;
-                       /* Recycle RX page */
-                       mlx5e_page_release(&sq->channel->rq, di, true);
-               } while (!last_wqe);
-       }
-
-       mlx5_cqwq_update_db_record(&cq->wq);
-
-       /* ensure cq space is freed before enabling more cqes */
-       wmb();
-
-       sq->cc = sqcc;
-       return (i == MLX5E_TX_CQ_POLL_BUDGET);
-}
-
 int mlx5e_napi_poll(struct napi_struct *napi, int budget)
 {
        struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel,
@@ -178,12 +118,12 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
        for (i = 0; i < c->num_tc; i++)
                busy |= mlx5e_poll_tx_cq(&c->sq[i].cq, budget);
 
+       if (c->xdp)
+               busy |= mlx5e_poll_xdpsq_cq(&c->rq.xdpsq.cq);
+
        work_done = mlx5e_poll_rx_cq(&c->rq.cq, budget);
        busy |= work_done == budget;
 
-       if (c->xdp)
-               busy |= mlx5e_poll_xdp_tx_cq(&c->xdp_sq.cq);
-
        mlx5e_poll_ico_cq(&c->icosq.cq);
 
        busy |= mlx5e_post_rx_wqes(&c->rq);
@@ -224,8 +164,7 @@ void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event)
 {
        struct mlx5e_cq *cq = container_of(mcq, struct mlx5e_cq, mcq);
        struct mlx5e_channel *c = cq->channel;
-       struct mlx5e_priv *priv = c->priv;
-       struct net_device *netdev = priv->netdev;
+       struct net_device *netdev = c->netdev;
 
        netdev_err(netdev, "%s: cqn=0x%.6x event=0x%.2x\n",
                   __func__, mcq->cqn, event);
index 5b78883d565413ec59a00ecba4ddb483e4eecd3f..1f56ed9f5a6f78e20e76d63d8cba9bef847518a1 100644 (file)
@@ -209,6 +209,7 @@ struct mlx5_esw_offload {
        struct mlx5_eswitch_rep *vport_reps;
        DECLARE_HASHTABLE(encap_tbl, 8);
        u8 inline_mode;
+       u64 num_flows;
 };
 
 struct mlx5_eswitch {
@@ -271,6 +272,11 @@ struct mlx5_flow_handle *
 mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
                                struct mlx5_flow_spec *spec,
                                struct mlx5_esw_flow_attr *attr);
+void
+mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw,
+                               struct mlx5_flow_handle *rule,
+                               struct mlx5_esw_flow_attr *attr);
+
 struct mlx5_flow_handle *
 mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn);
 
@@ -279,8 +285,8 @@ enum {
        SET_VLAN_INSERT = BIT(1)
 };
 
-#define MLX5_FLOW_CONTEXT_ACTION_VLAN_POP  0x40
-#define MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH 0x80
+#define MLX5_FLOW_CONTEXT_ACTION_VLAN_POP  0x4000
+#define MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH 0x8000
 
 struct mlx5_encap_entry {
        struct hlist_node encap_hlist;
@@ -302,6 +308,7 @@ struct mlx5_esw_flow_attr {
        u16     vlan;
        bool    vlan_handled;
        struct mlx5_encap_entry *encap;
+       u32     mod_hdr_id;
 };
 
 int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw,
index 4f5b0d47d5f38237129a7c90a1240b8615615d32..fff962dac8e310fe4f3d9ab8af0412188ed190cd 100644 (file)
@@ -68,8 +68,10 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
        }
        if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
                counter = mlx5_fc_create(esw->dev, true);
-               if (IS_ERR(counter))
-                       return ERR_CAST(counter);
+               if (IS_ERR(counter)) {
+                       rule = ERR_CAST(counter);
+                       goto err_counter_alloc;
+               }
                dest[i].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
                dest[i].counter = counter;
                i++;
@@ -86,15 +88,38 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
        if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DECAP)
                spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS;
 
-       if (attr->encap)
+       if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
+               flow_act.modify_id = attr->mod_hdr_id;
+
+       if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
                flow_act.encap_id = attr->encap->encap_id;
 
        rule = mlx5_add_flow_rules((struct mlx5_flow_table *)esw->fdb_table.fdb,
                                   spec, &flow_act, dest, i);
        if (IS_ERR(rule))
-               mlx5_fc_destroy(esw->dev, counter);
+               goto err_add_rule;
+       else
+               esw->offloads.num_flows++;
 
        return rule;
+
+err_add_rule:
+       mlx5_fc_destroy(esw->dev, counter);
+err_counter_alloc:
+       return rule;
+}
+
+void
+mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw,
+                               struct mlx5_flow_handle *rule,
+                               struct mlx5_esw_flow_attr *attr)
+{
+       struct mlx5_fc *counter = NULL;
+
+       counter = mlx5_flow_rule_counter(rule);
+       mlx5_del_flow_rules(rule);
+       mlx5_fc_destroy(esw->dev, counter);
+       esw->offloads.num_flows--;
 }
 
 static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val)
@@ -908,6 +933,11 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode)
            MLX5_CAP_INLINE_MODE_VPORT_CONTEXT)
                return -EOPNOTSUPP;
 
+       if (esw->offloads.num_flows > 0) {
+               esw_warn(dev, "Can't set inline mode when flows are configured\n");
+               return -EOPNOTSUPP;
+       }
+
        err = esw_inline_mode_from_devlink(mode, &mlx5_mode);
        if (err)
                goto out;
index b64a781c7e855fd1d38cb7303d26a27073626435..c6178ea1a46161223ba73598c328cfd7c36ffa06 100644 (file)
@@ -249,6 +249,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
        MLX5_SET(flow_context, in_flow_context, flow_tag, fte->flow_tag);
        MLX5_SET(flow_context, in_flow_context, action, fte->action);
        MLX5_SET(flow_context, in_flow_context, encap_id, fte->encap_id);
+       MLX5_SET(flow_context, in_flow_context, modify_header_id, fte->modify_id);
        in_match_value = MLX5_ADDR_OF(flow_context, in_flow_context,
                                      match_value);
        memcpy(in_match_value, &fte->val, MLX5_ST_SZ_BYTES(fte_match_param));
@@ -515,3 +516,69 @@ void mlx5_encap_dealloc(struct mlx5_core_dev *dev, u32 encap_id)
 
        mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
+
+int mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
+                            u8 namespace, u8 num_actions,
+                            void *modify_actions, u32 *modify_header_id)
+{
+       u32 out[MLX5_ST_SZ_DW(alloc_modify_header_context_out)];
+       int max_actions, actions_size, inlen, err;
+       void *actions_in;
+       u8 table_type;
+       u32 *in;
+
+       switch (namespace) {
+       case MLX5_FLOW_NAMESPACE_FDB:
+               max_actions = MLX5_CAP_ESW_FLOWTABLE_FDB(dev, max_modify_header_actions);
+               table_type = FS_FT_FDB;
+               break;
+       case MLX5_FLOW_NAMESPACE_KERNEL:
+               max_actions = MLX5_CAP_FLOWTABLE_NIC_RX(dev, max_modify_header_actions);
+               table_type = FS_FT_NIC_RX;
+               break;
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       if (num_actions > max_actions) {
+               mlx5_core_warn(dev, "too many modify header actions %d, max supported %d\n",
+                              num_actions, max_actions);
+               return -EOPNOTSUPP;
+       }
+
+       actions_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto) * num_actions;
+       inlen = MLX5_ST_SZ_BYTES(alloc_modify_header_context_in) + actions_size;
+
+       in = kzalloc(inlen, GFP_KERNEL);
+       if (!in)
+               return -ENOMEM;
+
+       MLX5_SET(alloc_modify_header_context_in, in, opcode,
+                MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT);
+       MLX5_SET(alloc_modify_header_context_in, in, table_type, table_type);
+       MLX5_SET(alloc_modify_header_context_in, in, num_of_actions, num_actions);
+
+       actions_in = MLX5_ADDR_OF(alloc_modify_header_context_in, in, actions);
+       memcpy(actions_in, modify_actions, actions_size);
+
+       memset(out, 0, sizeof(out));
+       err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+
+       *modify_header_id = MLX5_GET(alloc_modify_header_context_out, out, modify_header_id);
+       kfree(in);
+       return err;
+}
+
+void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, u32 modify_header_id)
+{
+       u32 in[MLX5_ST_SZ_DW(dealloc_modify_header_context_in)];
+       u32 out[MLX5_ST_SZ_DW(dealloc_modify_header_context_out)];
+
+       memset(in, 0, sizeof(in));
+       MLX5_SET(dealloc_modify_header_context_in, in, opcode,
+                MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT);
+       MLX5_SET(dealloc_modify_header_context_in, in, modify_header_id,
+                modify_header_id);
+
+       mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
index ded27bb9a3b6049ff4bad1606443dbeff53be8f1..27ff815600f78dfc0e363fe3852663e1518d09c2 100644 (file)
@@ -476,6 +476,7 @@ static struct fs_fte *alloc_fte(struct mlx5_flow_act *flow_act,
        fte->index = index;
        fte->action = flow_act->action;
        fte->encap_id = flow_act->encap_id;
+       fte->modify_id = flow_act->modify_id;
 
        return fte;
 }
index 8e668c63f69ec4afefb197f1f4c0a32ca3760179..03af2e7989f375943db11845536f5646a8d46fa3 100644 (file)
@@ -152,6 +152,7 @@ struct fs_fte {
        u32                             index;
        u32                             action;
        u32                             encap_id;
+       u32                             modify_id;
        enum fs_fte_status              status;
        struct mlx5_fc                  *counter;
 };
index e2bd600d19de09c8e4049e0b83ec2b9eae645729..60154a175bd3866f2b461a357c60d86283afde12 100644 (file)
@@ -87,7 +87,7 @@ static struct mlx5_profile profile[] = {
        [2] = {
                .mask           = MLX5_PROF_MASK_QP_SIZE |
                                  MLX5_PROF_MASK_MR_CACHE,
-               .log_max_qp     = 17,
+               .log_max_qp     = 18,
                .mr_cache[0]    = {
                        .size   = 500,
                        .limit  = 250
index b3dabe6e88366133fd07dab68f059d4f5d7e5e3a..fbc6e9e9e3053a7527cf49e7d00c54104acf50af 100644 (file)
@@ -141,6 +141,11 @@ int mlx5_encap_alloc(struct mlx5_core_dev *dev,
                     u32 *encap_id);
 void mlx5_encap_dealloc(struct mlx5_core_dev *dev, u32 encap_id);
 
+int mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
+                            u8 namespace, u8 num_actions,
+                            void *modify_actions, u32 *modify_header_id);
+void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, u32 modify_header_id);
+
 bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv);
 
 int mlx5_query_mtpps(struct mlx5_core_dev *dev, u32 *mtpps, u32 mtpps_size);
index 6b6c30deee83ca289ba0bfcb924678efe55e65e7..2fb8c6585ac711c748d18355e9d4877f779ccbf5 100644 (file)
@@ -15,7 +15,8 @@ obj-$(CONFIG_MLXSW_SPECTRUM)  += mlxsw_spectrum.o
 mlxsw_spectrum-objs            := spectrum.o spectrum_buffers.o \
                                   spectrum_switchdev.o spectrum_router.o \
                                   spectrum_kvdl.o spectrum_acl_tcam.o \
-                                  spectrum_acl.o spectrum_flower.o
+                                  spectrum_acl.o spectrum_flower.o \
+                                  spectrum_cnt.o spectrum_dpipe.o
 mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB)    += spectrum_dcb.o
 obj-$(CONFIG_MLXSW_MINIMAL)    += mlxsw_minimal.o
 mlxsw_minimal-objs             := minimal.o
index a1b48421648a3c11e25e7a5c148a25bf07d322c0..479511cf79bc1ca3f02ec3dd1b8cb578416f1cc8 100644 (file)
@@ -1043,13 +1043,6 @@ MLXSW_ITEM32(cmd_mbox, sw2hw_cq, cv, 0x00, 28, 4);
  */
 MLXSW_ITEM32(cmd_mbox, sw2hw_cq, c_eqn, 0x00, 24, 1);
 
-/* cmd_mbox_sw2hw_cq_oi
- * When set, overrun ignore is enabled. When set, updates of
- * CQ consumer counter (poll for completion) or Request completion
- * notifications (Arm CQ) DoorBells should not be rung on that CQ.
- */
-MLXSW_ITEM32(cmd_mbox, sw2hw_cq, oi, 0x00, 12, 1);
-
 /* cmd_mbox_sw2hw_cq_st
  * Event delivery state machine
  * 0x0 - FIRED
@@ -1132,11 +1125,6 @@ static inline int mlxsw_cmd_sw2hw_eq(struct mlxsw_core *mlxsw_core,
  */
 MLXSW_ITEM32(cmd_mbox, sw2hw_eq, int_msix, 0x00, 24, 1);
 
-/* cmd_mbox_sw2hw_eq_oi
- * When set, overrun ignore is enabled.
- */
-MLXSW_ITEM32(cmd_mbox, sw2hw_eq, oi, 0x00, 12, 1);
-
 /* cmd_mbox_sw2hw_eq_st
  * Event delivery state machine
  * 0x0 - FIRED
index a4c07841aaf6254c844eb8d8512687b447928ba8..affe84eb4bff5717e5ddba4835395a8a8989f8ca 100644 (file)
@@ -40,9 +40,6 @@
 #include <linux/export.h>
 #include <linux/err.h>
 #include <linux/if_link.h>
-#include <linux/debugfs.h>
-#include <linux/seq_file.h>
-#include <linux/u64_stats_sync.h>
 #include <linux/netdevice.h>
 #include <linux/completion.h>
 #include <linux/skbuff.h>
@@ -74,23 +71,9 @@ static DEFINE_SPINLOCK(mlxsw_core_driver_list_lock);
 
 static const char mlxsw_core_driver_name[] = "mlxsw_core";
 
-static struct dentry *mlxsw_core_dbg_root;
-
 static struct workqueue_struct *mlxsw_wq;
 static struct workqueue_struct *mlxsw_owq;
 
-struct mlxsw_core_pcpu_stats {
-       u64                     trap_rx_packets[MLXSW_TRAP_ID_MAX];
-       u64                     trap_rx_bytes[MLXSW_TRAP_ID_MAX];
-       u64                     port_rx_packets[MLXSW_PORT_MAX_PORTS];
-       u64                     port_rx_bytes[MLXSW_PORT_MAX_PORTS];
-       struct u64_stats_sync   syncp;
-       u32                     trap_rx_dropped[MLXSW_TRAP_ID_MAX];
-       u32                     port_rx_dropped[MLXSW_PORT_MAX_PORTS];
-       u32                     trap_rx_invalid;
-       u32                     port_rx_invalid;
-};
-
 struct mlxsw_core_port {
        struct devlink_port devlink_port;
        void *port_driver_priv;
@@ -121,23 +104,48 @@ struct mlxsw_core {
                spinlock_t trans_list_lock; /* protects trans_list writes */
                bool use_emad;
        } emad;
-       struct mlxsw_core_pcpu_stats __percpu *pcpu_stats;
-       struct dentry *dbg_dir;
-       struct {
-               struct debugfs_blob_wrapper vsd_blob;
-               struct debugfs_blob_wrapper psid_blob;
-       } dbg;
        struct {
                u8 *mapping; /* lag_id+port_index to local_port mapping */
        } lag;
        struct mlxsw_res res;
        struct mlxsw_hwmon *hwmon;
        struct mlxsw_thermal *thermal;
-       struct mlxsw_core_port ports[MLXSW_PORT_MAX_PORTS];
+       struct mlxsw_core_port *ports;
+       unsigned int max_ports;
        unsigned long driver_priv[0];
        /* driver_priv has to be always the last item */
 };
 
+#define MLXSW_PORT_MAX_PORTS_DEFAULT   0x40
+
+static int mlxsw_ports_init(struct mlxsw_core *mlxsw_core)
+{
+       /* Switch ports are numbered from 1 to queried value */
+       if (MLXSW_CORE_RES_VALID(mlxsw_core, MAX_SYSTEM_PORT))
+               mlxsw_core->max_ports = MLXSW_CORE_RES_GET(mlxsw_core,
+                                                          MAX_SYSTEM_PORT) + 1;
+       else
+               mlxsw_core->max_ports = MLXSW_PORT_MAX_PORTS_DEFAULT + 1;
+
+       mlxsw_core->ports = kcalloc(mlxsw_core->max_ports,
+                                   sizeof(struct mlxsw_core_port), GFP_KERNEL);
+       if (!mlxsw_core->ports)
+               return -ENOMEM;
+
+       return 0;
+}
+
+static void mlxsw_ports_fini(struct mlxsw_core *mlxsw_core)
+{
+       kfree(mlxsw_core->ports);
+}
+
+unsigned int mlxsw_core_max_ports(const struct mlxsw_core *mlxsw_core)
+{
+       return mlxsw_core->max_ports;
+}
+EXPORT_SYMBOL(mlxsw_core_max_ports);
+
 void *mlxsw_core_driver_priv(struct mlxsw_core *mlxsw_core)
 {
        return mlxsw_core->driver_priv;
@@ -703,91 +711,6 @@ err_out:
  * Core functions
  *****************/
 
-static int mlxsw_core_rx_stats_dbg_read(struct seq_file *file, void *data)
-{
-       struct mlxsw_core *mlxsw_core = file->private;
-       struct mlxsw_core_pcpu_stats *p;
-       u64 rx_packets, rx_bytes;
-       u64 tmp_rx_packets, tmp_rx_bytes;
-       u32 rx_dropped, rx_invalid;
-       unsigned int start;
-       int i;
-       int j;
-       static const char hdr[] =
-               "     NUM   RX_PACKETS     RX_BYTES RX_DROPPED\n";
-
-       seq_printf(file, hdr);
-       for (i = 0; i < MLXSW_TRAP_ID_MAX; i++) {
-               rx_packets = 0;
-               rx_bytes = 0;
-               rx_dropped = 0;
-               for_each_possible_cpu(j) {
-                       p = per_cpu_ptr(mlxsw_core->pcpu_stats, j);
-                       do {
-                               start = u64_stats_fetch_begin(&p->syncp);
-                               tmp_rx_packets = p->trap_rx_packets[i];
-                               tmp_rx_bytes = p->trap_rx_bytes[i];
-                       } while (u64_stats_fetch_retry(&p->syncp, start));
-
-                       rx_packets += tmp_rx_packets;
-                       rx_bytes += tmp_rx_bytes;
-                       rx_dropped += p->trap_rx_dropped[i];
-               }
-               seq_printf(file, "trap %3d %12llu %12llu %10u\n",
-                          i, rx_packets, rx_bytes, rx_dropped);
-       }
-       rx_invalid = 0;
-       for_each_possible_cpu(j) {
-               p = per_cpu_ptr(mlxsw_core->pcpu_stats, j);
-               rx_invalid += p->trap_rx_invalid;
-       }
-       seq_printf(file, "trap INV                           %10u\n",
-                  rx_invalid);
-
-       for (i = 0; i < MLXSW_PORT_MAX_PORTS; i++) {
-               rx_packets = 0;
-               rx_bytes = 0;
-               rx_dropped = 0;
-               for_each_possible_cpu(j) {
-                       p = per_cpu_ptr(mlxsw_core->pcpu_stats, j);
-                       do {
-                               start = u64_stats_fetch_begin(&p->syncp);
-                               tmp_rx_packets = p->port_rx_packets[i];
-                               tmp_rx_bytes = p->port_rx_bytes[i];
-                       } while (u64_stats_fetch_retry(&p->syncp, start));
-
-                       rx_packets += tmp_rx_packets;
-                       rx_bytes += tmp_rx_bytes;
-                       rx_dropped += p->port_rx_dropped[i];
-               }
-               seq_printf(file, "port %3d %12llu %12llu %10u\n",
-                          i, rx_packets, rx_bytes, rx_dropped);
-       }
-       rx_invalid = 0;
-       for_each_possible_cpu(j) {
-               p = per_cpu_ptr(mlxsw_core->pcpu_stats, j);
-               rx_invalid += p->port_rx_invalid;
-       }
-       seq_printf(file, "port INV                           %10u\n",
-                  rx_invalid);
-       return 0;
-}
-
-static int mlxsw_core_rx_stats_dbg_open(struct inode *inode, struct file *f)
-{
-       struct mlxsw_core *mlxsw_core = inode->i_private;
-
-       return single_open(f, mlxsw_core_rx_stats_dbg_read, mlxsw_core);
-}
-
-static const struct file_operations mlxsw_core_rx_stats_dbg_ops = {
-       .owner = THIS_MODULE,
-       .open = mlxsw_core_rx_stats_dbg_open,
-       .release = single_release,
-       .read = seq_read,
-       .llseek = seq_lseek
-};
-
 int mlxsw_core_driver_register(struct mlxsw_driver *mlxsw_driver)
 {
        spin_lock(&mlxsw_core_driver_list_lock);
@@ -835,39 +758,13 @@ static void mlxsw_core_driver_put(const char *kind)
        spin_unlock(&mlxsw_core_driver_list_lock);
 }
 
-static int mlxsw_core_debugfs_init(struct mlxsw_core *mlxsw_core)
-{
-       const struct mlxsw_bus_info *bus_info = mlxsw_core->bus_info;
-
-       mlxsw_core->dbg_dir = debugfs_create_dir(bus_info->device_name,
-                                                mlxsw_core_dbg_root);
-       if (!mlxsw_core->dbg_dir)
-               return -ENOMEM;
-       debugfs_create_file("rx_stats", S_IRUGO, mlxsw_core->dbg_dir,
-                           mlxsw_core, &mlxsw_core_rx_stats_dbg_ops);
-       mlxsw_core->dbg.vsd_blob.data = (void *) &bus_info->vsd;
-       mlxsw_core->dbg.vsd_blob.size = sizeof(bus_info->vsd);
-       debugfs_create_blob("vsd", S_IRUGO, mlxsw_core->dbg_dir,
-                           &mlxsw_core->dbg.vsd_blob);
-       mlxsw_core->dbg.psid_blob.data = (void *) &bus_info->psid;
-       mlxsw_core->dbg.psid_blob.size = sizeof(bus_info->psid);
-       debugfs_create_blob("psid", S_IRUGO, mlxsw_core->dbg_dir,
-                           &mlxsw_core->dbg.psid_blob);
-       return 0;
-}
-
-static void mlxsw_core_debugfs_fini(struct mlxsw_core *mlxsw_core)
-{
-       debugfs_remove_recursive(mlxsw_core->dbg_dir);
-}
-
 static int mlxsw_devlink_port_split(struct devlink *devlink,
                                    unsigned int port_index,
                                    unsigned int count)
 {
        struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
 
-       if (port_index >= MLXSW_PORT_MAX_PORTS)
+       if (port_index >= mlxsw_core->max_ports)
                return -EINVAL;
        if (!mlxsw_core->driver->port_split)
                return -EOPNOTSUPP;
@@ -879,7 +776,7 @@ static int mlxsw_devlink_port_unsplit(struct devlink *devlink,
 {
        struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
 
-       if (port_index >= MLXSW_PORT_MAX_PORTS)
+       if (port_index >= mlxsw_core->max_ports)
                return -EINVAL;
        if (!mlxsw_core->driver->port_unsplit)
                return -EOPNOTSUPP;
@@ -1101,18 +998,15 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
        mlxsw_core->bus_priv = bus_priv;
        mlxsw_core->bus_info = mlxsw_bus_info;
 
-       mlxsw_core->pcpu_stats =
-               netdev_alloc_pcpu_stats(struct mlxsw_core_pcpu_stats);
-       if (!mlxsw_core->pcpu_stats) {
-               err = -ENOMEM;
-               goto err_alloc_stats;
-       }
-
        err = mlxsw_bus->init(bus_priv, mlxsw_core, mlxsw_driver->profile,
                              &mlxsw_core->res);
        if (err)
                goto err_bus_init;
 
+       err = mlxsw_ports_init(mlxsw_core);
+       if (err)
+               goto err_ports_init;
+
        if (MLXSW_CORE_RES_VALID(mlxsw_core, MAX_LAG) &&
            MLXSW_CORE_RES_VALID(mlxsw_core, MAX_LAG_MEMBERS)) {
                alloc_size = sizeof(u8) *
@@ -1148,15 +1042,8 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
                        goto err_driver_init;
        }
 
-       err = mlxsw_core_debugfs_init(mlxsw_core);
-       if (err)
-               goto err_debugfs_init;
-
        return 0;
 
-err_debugfs_init:
-       if (mlxsw_core->driver->fini)
-               mlxsw_core->driver->fini(mlxsw_core);
 err_driver_init:
        mlxsw_thermal_fini(mlxsw_core->thermal);
 err_thermal_init:
@@ -1167,10 +1054,10 @@ err_devlink_register:
 err_emad_init:
        kfree(mlxsw_core->lag.mapping);
 err_alloc_lag_mapping:
+       mlxsw_ports_fini(mlxsw_core);
+err_ports_init:
        mlxsw_bus->fini(bus_priv);
 err_bus_init:
-       free_percpu(mlxsw_core->pcpu_stats);
-err_alloc_stats:
        devlink_free(devlink);
 err_devlink_alloc:
        mlxsw_core_driver_put(device_kind);
@@ -1183,15 +1070,14 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core)
        const char *device_kind = mlxsw_core->bus_info->device_kind;
        struct devlink *devlink = priv_to_devlink(mlxsw_core);
 
-       mlxsw_core_debugfs_fini(mlxsw_core);
        if (mlxsw_core->driver->fini)
                mlxsw_core->driver->fini(mlxsw_core);
        mlxsw_thermal_fini(mlxsw_core->thermal);
        devlink_unregister(devlink);
        mlxsw_emad_fini(mlxsw_core);
        kfree(mlxsw_core->lag.mapping);
+       mlxsw_ports_fini(mlxsw_core);
        mlxsw_core->bus->fini(mlxsw_core->bus_priv);
-       free_percpu(mlxsw_core->pcpu_stats);
        devlink_free(devlink);
        mlxsw_core_driver_put(device_kind);
 }
@@ -1639,7 +1525,6 @@ void mlxsw_core_skb_receive(struct mlxsw_core *mlxsw_core, struct sk_buff *skb,
 {
        struct mlxsw_rx_listener_item *rxl_item;
        const struct mlxsw_rx_listener *rxl;
-       struct mlxsw_core_pcpu_stats *pcpu_stats;
        u8 local_port;
        bool found = false;
 
@@ -1661,7 +1546,7 @@ void mlxsw_core_skb_receive(struct mlxsw_core *mlxsw_core, struct sk_buff *skb,
                            __func__, local_port, rx_info->trap_id);
 
        if ((rx_info->trap_id >= MLXSW_TRAP_ID_MAX) ||
-           (local_port >= MLXSW_PORT_MAX_PORTS))
+           (local_port >= mlxsw_core->max_ports))
                goto drop;
 
        rcu_read_lock();
@@ -1678,26 +1563,10 @@ void mlxsw_core_skb_receive(struct mlxsw_core *mlxsw_core, struct sk_buff *skb,
        if (!found)
                goto drop;
 
-       pcpu_stats = this_cpu_ptr(mlxsw_core->pcpu_stats);
-       u64_stats_update_begin(&pcpu_stats->syncp);
-       pcpu_stats->port_rx_packets[local_port]++;
-       pcpu_stats->port_rx_bytes[local_port] += skb->len;
-       pcpu_stats->trap_rx_packets[rx_info->trap_id]++;
-       pcpu_stats->trap_rx_bytes[rx_info->trap_id] += skb->len;
-       u64_stats_update_end(&pcpu_stats->syncp);
-
        rxl->func(skb, local_port, rxl_item->priv);
        return;
 
 drop:
-       if (rx_info->trap_id >= MLXSW_TRAP_ID_MAX)
-               this_cpu_inc(mlxsw_core->pcpu_stats->trap_rx_invalid);
-       else
-               this_cpu_inc(mlxsw_core->pcpu_stats->trap_rx_dropped[rx_info->trap_id]);
-       if (local_port >= MLXSW_PORT_MAX_PORTS)
-               this_cpu_inc(mlxsw_core->pcpu_stats->port_rx_invalid);
-       else
-               this_cpu_inc(mlxsw_core->pcpu_stats->port_rx_dropped[local_port]);
        dev_kfree_skb(skb);
 }
 EXPORT_SYMBOL(mlxsw_core_skb_receive);
@@ -1926,15 +1795,8 @@ static int __init mlxsw_core_module_init(void)
                err = -ENOMEM;
                goto err_alloc_ordered_workqueue;
        }
-       mlxsw_core_dbg_root = debugfs_create_dir(mlxsw_core_driver_name, NULL);
-       if (!mlxsw_core_dbg_root) {
-               err = -ENOMEM;
-               goto err_debugfs_create_dir;
-       }
        return 0;
 
-err_debugfs_create_dir:
-       destroy_workqueue(mlxsw_owq);
 err_alloc_ordered_workqueue:
        destroy_workqueue(mlxsw_wq);
        return err;
@@ -1942,7 +1804,6 @@ err_alloc_ordered_workqueue:
 
 static void __exit mlxsw_core_module_exit(void)
 {
-       debugfs_remove_recursive(mlxsw_core_dbg_root);
        destroy_workqueue(mlxsw_owq);
        destroy_workqueue(mlxsw_wq);
 }
index cf38cf9027f80a95a4f8a744de7551cb0810bf51..7fb35395adf52076ead77cbcbe0381317c197627 100644 (file)
@@ -57,6 +57,8 @@ struct mlxsw_driver;
 struct mlxsw_bus;
 struct mlxsw_bus_info;
 
+unsigned int mlxsw_core_max_ports(const struct mlxsw_core *mlxsw_core);
+
 void *mlxsw_core_driver_priv(struct mlxsw_core *mlxsw_core);
 
 int mlxsw_core_driver_register(struct mlxsw_driver *mlxsw_driver);
index 5f337715a4da64dcd94178bd627189648d6f775b..a984c361926c7841cfcd12c9a7c5a2e17bfdc15c 100644 (file)
@@ -567,6 +567,89 @@ static char *mlxsw_afa_block_append_action(struct mlxsw_afa_block *block,
        return oneact + MLXSW_AFA_PAYLOAD_OFFSET;
 }
 
+/* VLAN Action
+ * -----------
+ * VLAN action is used for manipulating VLANs. It can be used to implement QinQ,
+ * VLAN translation, change of PCP bits of the VLAN tag, push, pop as swap VLANs
+ * and more.
+ */
+
+#define MLXSW_AFA_VLAN_CODE 0x02
+#define MLXSW_AFA_VLAN_SIZE 1
+
+enum mlxsw_afa_vlan_vlan_tag_cmd {
+       MLXSW_AFA_VLAN_VLAN_TAG_CMD_NOP,
+       MLXSW_AFA_VLAN_VLAN_TAG_CMD_PUSH_TAG,
+       MLXSW_AFA_VLAN_VLAN_TAG_CMD_POP_TAG,
+};
+
+enum mlxsw_afa_vlan_cmd {
+       MLXSW_AFA_VLAN_CMD_NOP,
+       MLXSW_AFA_VLAN_CMD_SET_OUTER,
+       MLXSW_AFA_VLAN_CMD_SET_INNER,
+       MLXSW_AFA_VLAN_CMD_COPY_OUTER_TO_INNER,
+       MLXSW_AFA_VLAN_CMD_COPY_INNER_TO_OUTER,
+       MLXSW_AFA_VLAN_CMD_SWAP,
+};
+
+/* afa_vlan_vlan_tag_cmd
+ * Tag command: push, pop, nop VLAN header.
+ */
+MLXSW_ITEM32(afa, vlan, vlan_tag_cmd, 0x00, 29, 3);
+
+/* afa_vlan_vid_cmd */
+MLXSW_ITEM32(afa, vlan, vid_cmd, 0x04, 29, 3);
+
+/* afa_vlan_vid */
+MLXSW_ITEM32(afa, vlan, vid, 0x04, 0, 12);
+
+/* afa_vlan_ethertype_cmd */
+MLXSW_ITEM32(afa, vlan, ethertype_cmd, 0x08, 29, 3);
+
+/* afa_vlan_ethertype
+ * Index to EtherTypes in Switch VLAN EtherType Register (SVER).
+ */
+MLXSW_ITEM32(afa, vlan, ethertype, 0x08, 24, 3);
+
+/* afa_vlan_pcp_cmd */
+MLXSW_ITEM32(afa, vlan, pcp_cmd, 0x08, 13, 3);
+
+/* afa_vlan_pcp */
+MLXSW_ITEM32(afa, vlan, pcp, 0x08, 8, 3);
+
+static inline void
+mlxsw_afa_vlan_pack(char *payload,
+                   enum mlxsw_afa_vlan_vlan_tag_cmd vlan_tag_cmd,
+                   enum mlxsw_afa_vlan_cmd vid_cmd, u16 vid,
+                   enum mlxsw_afa_vlan_cmd pcp_cmd, u8 pcp,
+                   enum mlxsw_afa_vlan_cmd ethertype_cmd, u8 ethertype)
+{
+       mlxsw_afa_vlan_vlan_tag_cmd_set(payload, vlan_tag_cmd);
+       mlxsw_afa_vlan_vid_cmd_set(payload, vid_cmd);
+       mlxsw_afa_vlan_vid_set(payload, vid);
+       mlxsw_afa_vlan_pcp_cmd_set(payload, pcp_cmd);
+       mlxsw_afa_vlan_pcp_set(payload, pcp);
+       mlxsw_afa_vlan_ethertype_cmd_set(payload, ethertype_cmd);
+       mlxsw_afa_vlan_ethertype_set(payload, ethertype);
+}
+
+int mlxsw_afa_block_append_vlan_modify(struct mlxsw_afa_block *block,
+                                      u16 vid, u8 pcp, u8 et)
+{
+       char *act = mlxsw_afa_block_append_action(block,
+                                                 MLXSW_AFA_VLAN_CODE,
+                                                 MLXSW_AFA_VLAN_SIZE);
+
+       if (!act)
+               return -ENOBUFS;
+       mlxsw_afa_vlan_pack(act, MLXSW_AFA_VLAN_VLAN_TAG_CMD_NOP,
+                           MLXSW_AFA_VLAN_CMD_SET_OUTER, vid,
+                           MLXSW_AFA_VLAN_CMD_SET_OUTER, pcp,
+                           MLXSW_AFA_VLAN_CMD_SET_OUTER, et);
+       return 0;
+}
+EXPORT_SYMBOL(mlxsw_afa_block_append_vlan_modify);
+
 /* Trap / Discard Action
  * ---------------------
  * The Trap / Discard action enables trapping / mirroring packets to the CPU
@@ -677,3 +760,54 @@ err_append_action:
        return err;
 }
 EXPORT_SYMBOL(mlxsw_afa_block_append_fwd);
+
+/* Policing and Counting Action
+ * ----------------------------
+ * Policing and Counting action is used for binding policer and counter
+ * to ACL rules.
+ */
+
+#define MLXSW_AFA_POLCNT_CODE 0x08
+#define MLXSW_AFA_POLCNT_SIZE 1
+
+enum mlxsw_afa_polcnt_counter_set_type {
+       /* No count */
+       MLXSW_AFA_POLCNT_COUNTER_SET_TYPE_NO_COUNT = 0x00,
+       /* Count packets and bytes */
+       MLXSW_AFA_POLCNT_COUNTER_SET_TYPE_PACKETS_BYTES = 0x03,
+       /* Count only packets */
+       MLXSW_AFA_POLCNT_COUNTER_SET_TYPE_PACKETS = 0x05,
+};
+
+/* afa_polcnt_counter_set_type
+ * Counter set type for flow counters.
+ */
+MLXSW_ITEM32(afa, polcnt, counter_set_type, 0x04, 24, 8);
+
+/* afa_polcnt_counter_index
+ * Counter index for flow counters.
+ */
+MLXSW_ITEM32(afa, polcnt, counter_index, 0x04, 0, 24);
+
+static inline void
+mlxsw_afa_polcnt_pack(char *payload,
+                     enum mlxsw_afa_polcnt_counter_set_type set_type,
+                     u32 counter_index)
+{
+       mlxsw_afa_polcnt_counter_set_type_set(payload, set_type);
+       mlxsw_afa_polcnt_counter_index_set(payload, counter_index);
+}
+
+int mlxsw_afa_block_append_counter(struct mlxsw_afa_block *block,
+                                  u32 counter_index)
+{
+       char *act = mlxsw_afa_block_append_action(block,
+                                                 MLXSW_AFA_POLCNT_CODE,
+                                                 MLXSW_AFA_POLCNT_SIZE);
+       if (!act)
+               return -ENOBUFS;
+       mlxsw_afa_polcnt_pack(act, MLXSW_AFA_POLCNT_COUNTER_SET_TYPE_PACKETS_BYTES,
+                             counter_index);
+       return 0;
+}
+EXPORT_SYMBOL(mlxsw_afa_block_append_counter);
index 43f78dcfe3942b87c5167054eff93f4f45e19a52..a03362c1ef3245cff5aba5c2dd07232614df094e 100644 (file)
@@ -62,5 +62,9 @@ void mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id);
 int mlxsw_afa_block_append_drop(struct mlxsw_afa_block *block);
 int mlxsw_afa_block_append_fwd(struct mlxsw_afa_block *block,
                               u8 local_port, bool in_port);
+int mlxsw_afa_block_append_vlan_modify(struct mlxsw_afa_block *block,
+                                      u16 vid, u8 pcp, u8 et);
+int mlxsw_afa_block_append_counter(struct mlxsw_afa_block *block,
+                                  u32 counter_index);
 
 #endif
index e4fcba7c2af202002e9382cee5a0a83857707f6b..c75e9141e3ec57b9ca47f1b35cc717c4dae14c83 100644 (file)
@@ -54,6 +54,8 @@ enum mlxsw_afk_element {
        MLXSW_AFK_ELEMENT_DST_IP6_LO,
        MLXSW_AFK_ELEMENT_DST_L4_PORT,
        MLXSW_AFK_ELEMENT_SRC_L4_PORT,
+       MLXSW_AFK_ELEMENT_VID,
+       MLXSW_AFK_ELEMENT_PCP,
        MLXSW_AFK_ELEMENT_MAX,
 };
 
@@ -88,7 +90,7 @@ struct mlxsw_afk_element_info {
        MLXSW_AFK_ELEMENT_INFO(MLXSW_AFK_ELEMENT_TYPE_BUF,                      \
                               _element, _offset, 0, _size)
 
-/* For the purpose of the driver, define a internal storage scratchpad
+/* For the purpose of the driver, define an internal storage scratchpad
  * that will be used to store key/mask values. For each defined element type
  * define an internal storage geometry.
  */
@@ -98,6 +100,8 @@ static const struct mlxsw_afk_element_info mlxsw_afk_element_infos[] = {
        MLXSW_AFK_ELEMENT_INFO_BUF(SMAC, 0x0A, 6),
        MLXSW_AFK_ELEMENT_INFO_U32(ETHERTYPE, 0x00, 0, 16),
        MLXSW_AFK_ELEMENT_INFO_U32(IP_PROTO, 0x10, 0, 8),
+       MLXSW_AFK_ELEMENT_INFO_U32(VID, 0x10, 8, 12),
+       MLXSW_AFK_ELEMENT_INFO_U32(PCP, 0x10, 20, 3),
        MLXSW_AFK_ELEMENT_INFO_U32(SRC_IP4, 0x18, 0, 32),
        MLXSW_AFK_ELEMENT_INFO_U32(DST_IP4, 0x1C, 0, 32),
        MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP6_HI, 0x18, 8),
index a223c85dfde064eee873eb6ffd6aae818a4f46ba..eaa3e3bf5a2bcf7d4e479e9d958e9bf9f23887cb 100644 (file)
@@ -44,8 +44,6 @@
 #include <linux/skbuff.h>
 #include <linux/if_vlan.h>
 #include <linux/log2.h>
-#include <linux/debugfs.h>
-#include <linux/seq_file.h>
 #include <linux/string.h>
 
 #include "pci_hw.h"
@@ -57,8 +55,6 @@
 
 static const char mlxsw_pci_driver_name[] = "mlxsw_pci";
 
-static struct dentry *mlxsw_pci_dbg_root;
-
 #define mlxsw_pci_write32(mlxsw_pci, reg, val) \
        iowrite32be(val, (mlxsw_pci)->hw_addr + (MLXSW_PCI_ ## reg))
 #define mlxsw_pci_read32(mlxsw_pci, reg) \
@@ -71,21 +67,6 @@ enum mlxsw_pci_queue_type {
        MLXSW_PCI_QUEUE_TYPE_EQ,
 };
 
-static const char *mlxsw_pci_queue_type_str(enum mlxsw_pci_queue_type q_type)
-{
-       switch (q_type) {
-       case MLXSW_PCI_QUEUE_TYPE_SDQ:
-               return "sdq";
-       case MLXSW_PCI_QUEUE_TYPE_RDQ:
-               return "rdq";
-       case MLXSW_PCI_QUEUE_TYPE_CQ:
-               return "cq";
-       case MLXSW_PCI_QUEUE_TYPE_EQ:
-               return "eq";
-       }
-       BUG();
-}
-
 #define MLXSW_PCI_QUEUE_TYPE_COUNT     4
 
 static const u16 mlxsw_pci_doorbell_type_offset[] = {
@@ -174,7 +155,6 @@ struct mlxsw_pci {
                } comp;
        } cmd;
        struct mlxsw_bus_info bus_info;
-       struct dentry *dbg_dir;
 };
 
 static void mlxsw_pci_queue_tasklet_schedule(struct mlxsw_pci_queue *q)
@@ -261,21 +241,11 @@ static u8 mlxsw_pci_sdq_count(struct mlxsw_pci *mlxsw_pci)
        return __mlxsw_pci_queue_count(mlxsw_pci, MLXSW_PCI_QUEUE_TYPE_SDQ);
 }
 
-static u8 mlxsw_pci_rdq_count(struct mlxsw_pci *mlxsw_pci)
-{
-       return __mlxsw_pci_queue_count(mlxsw_pci, MLXSW_PCI_QUEUE_TYPE_RDQ);
-}
-
 static u8 mlxsw_pci_cq_count(struct mlxsw_pci *mlxsw_pci)
 {
        return __mlxsw_pci_queue_count(mlxsw_pci, MLXSW_PCI_QUEUE_TYPE_CQ);
 }
 
-static u8 mlxsw_pci_eq_count(struct mlxsw_pci *mlxsw_pci)
-{
-       return __mlxsw_pci_queue_count(mlxsw_pci, MLXSW_PCI_QUEUE_TYPE_EQ);
-}
-
 static struct mlxsw_pci_queue *
 __mlxsw_pci_queue_get(struct mlxsw_pci *mlxsw_pci,
                      enum mlxsw_pci_queue_type q_type, u8 q_num)
@@ -390,26 +360,6 @@ static void mlxsw_pci_sdq_fini(struct mlxsw_pci *mlxsw_pci,
        mlxsw_cmd_hw2sw_sdq(mlxsw_pci->core, q->num);
 }
 
-static int mlxsw_pci_sdq_dbg_read(struct seq_file *file, void *data)
-{
-       struct mlxsw_pci *mlxsw_pci = dev_get_drvdata(file->private);
-       struct mlxsw_pci_queue *q;
-       int i;
-       static const char hdr[] =
-               "NUM PROD_COUNT CONS_COUNT COUNT\n";
-
-       seq_printf(file, hdr);
-       for (i = 0; i < mlxsw_pci_sdq_count(mlxsw_pci); i++) {
-               q = mlxsw_pci_sdq_get(mlxsw_pci, i);
-               spin_lock_bh(&q->lock);
-               seq_printf(file, "%3d %10d %10d %5d\n",
-                          i, q->producer_counter, q->consumer_counter,
-                          q->count);
-               spin_unlock_bh(&q->lock);
-       }
-       return 0;
-}
-
 static int mlxsw_pci_wqe_frag_map(struct mlxsw_pci *mlxsw_pci, char *wqe,
                                  int index, char *frag_data, size_t frag_len,
                                  int direction)
@@ -544,26 +494,6 @@ static void mlxsw_pci_rdq_fini(struct mlxsw_pci *mlxsw_pci,
        }
 }
 
-static int mlxsw_pci_rdq_dbg_read(struct seq_file *file, void *data)
-{
-       struct mlxsw_pci *mlxsw_pci = dev_get_drvdata(file->private);
-       struct mlxsw_pci_queue *q;
-       int i;
-       static const char hdr[] =
-               "NUM PROD_COUNT CONS_COUNT COUNT\n";
-
-       seq_printf(file, hdr);
-       for (i = 0; i < mlxsw_pci_rdq_count(mlxsw_pci); i++) {
-               q = mlxsw_pci_rdq_get(mlxsw_pci, i);
-               spin_lock_bh(&q->lock);
-               seq_printf(file, "%3d %10d %10d %5d\n",
-                          i, q->producer_counter, q->consumer_counter,
-                          q->count);
-               spin_unlock_bh(&q->lock);
-       }
-       return 0;
-}
-
 static int mlxsw_pci_cq_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
                             struct mlxsw_pci_queue *q)
 {
@@ -580,7 +510,6 @@ static int mlxsw_pci_cq_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
 
        mlxsw_cmd_mbox_sw2hw_cq_cv_set(mbox, 0); /* CQE ver 0 */
        mlxsw_cmd_mbox_sw2hw_cq_c_eqn_set(mbox, MLXSW_PCI_EQ_COMP_NUM);
-       mlxsw_cmd_mbox_sw2hw_cq_oi_set(mbox, 0);
        mlxsw_cmd_mbox_sw2hw_cq_st_set(mbox, 0);
        mlxsw_cmd_mbox_sw2hw_cq_log_cq_size_set(mbox, ilog2(q->count));
        for (i = 0; i < MLXSW_PCI_AQ_PAGES; i++) {
@@ -602,27 +531,6 @@ static void mlxsw_pci_cq_fini(struct mlxsw_pci *mlxsw_pci,
        mlxsw_cmd_hw2sw_cq(mlxsw_pci->core, q->num);
 }
 
-static int mlxsw_pci_cq_dbg_read(struct seq_file *file, void *data)
-{
-       struct mlxsw_pci *mlxsw_pci = dev_get_drvdata(file->private);
-
-       struct mlxsw_pci_queue *q;
-       int i;
-       static const char hdr[] =
-               "NUM CONS_INDEX  SDQ_COUNT  RDQ_COUNT COUNT\n";
-
-       seq_printf(file, hdr);
-       for (i = 0; i < mlxsw_pci_cq_count(mlxsw_pci); i++) {
-               q = mlxsw_pci_cq_get(mlxsw_pci, i);
-               spin_lock_bh(&q->lock);
-               seq_printf(file, "%3d %10d %10d %10d %5d\n",
-                          i, q->consumer_counter, q->u.cq.comp_sdq_count,
-                          q->u.cq.comp_rdq_count, q->count);
-               spin_unlock_bh(&q->lock);
-       }
-       return 0;
-}
-
 static void mlxsw_pci_cqe_sdq_handle(struct mlxsw_pci *mlxsw_pci,
                                     struct mlxsw_pci_queue *q,
                                     u16 consumer_counter_limit,
@@ -755,7 +663,6 @@ static int mlxsw_pci_eq_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
        }
 
        mlxsw_cmd_mbox_sw2hw_eq_int_msix_set(mbox, 1); /* MSI-X used */
-       mlxsw_cmd_mbox_sw2hw_eq_oi_set(mbox, 0);
        mlxsw_cmd_mbox_sw2hw_eq_st_set(mbox, 1); /* armed */
        mlxsw_cmd_mbox_sw2hw_eq_log_eq_size_set(mbox, ilog2(q->count));
        for (i = 0; i < MLXSW_PCI_AQ_PAGES; i++) {
@@ -777,27 +684,6 @@ static void mlxsw_pci_eq_fini(struct mlxsw_pci *mlxsw_pci,
        mlxsw_cmd_hw2sw_eq(mlxsw_pci->core, q->num);
 }
 
-static int mlxsw_pci_eq_dbg_read(struct seq_file *file, void *data)
-{
-       struct mlxsw_pci *mlxsw_pci = dev_get_drvdata(file->private);
-       struct mlxsw_pci_queue *q;
-       int i;
-       static const char hdr[] =
-               "NUM CONS_COUNT     EV_CMD    EV_COMP   EV_OTHER COUNT\n";
-
-       seq_printf(file, hdr);
-       for (i = 0; i < mlxsw_pci_eq_count(mlxsw_pci); i++) {
-               q = mlxsw_pci_eq_get(mlxsw_pci, i);
-               spin_lock_bh(&q->lock);
-               seq_printf(file, "%3d %10d %10d %10d %10d %5d\n",
-                          i, q->consumer_counter, q->u.eq.ev_cmd_count,
-                          q->u.eq.ev_comp_count, q->u.eq.ev_other_count,
-                          q->count);
-               spin_unlock_bh(&q->lock);
-       }
-       return 0;
-}
-
 static void mlxsw_pci_eq_cmd_event(struct mlxsw_pci *mlxsw_pci, char *eqe)
 {
        mlxsw_pci->cmd.comp.status = mlxsw_pci_eqe_cmd_status_get(eqe);
@@ -868,7 +754,6 @@ struct mlxsw_pci_queue_ops {
        void (*fini)(struct mlxsw_pci *mlxsw_pci,
                     struct mlxsw_pci_queue *q);
        void (*tasklet)(unsigned long data);
-       int (*dbg_read)(struct seq_file *s, void *data);
        u16 elem_count;
        u8 elem_size;
 };
@@ -877,7 +762,6 @@ static const struct mlxsw_pci_queue_ops mlxsw_pci_sdq_ops = {
        .type           = MLXSW_PCI_QUEUE_TYPE_SDQ,
        .init           = mlxsw_pci_sdq_init,
        .fini           = mlxsw_pci_sdq_fini,
-       .dbg_read       = mlxsw_pci_sdq_dbg_read,
        .elem_count     = MLXSW_PCI_WQE_COUNT,
        .elem_size      = MLXSW_PCI_WQE_SIZE,
 };
@@ -886,7 +770,6 @@ static const struct mlxsw_pci_queue_ops mlxsw_pci_rdq_ops = {
        .type           = MLXSW_PCI_QUEUE_TYPE_RDQ,
        .init           = mlxsw_pci_rdq_init,
        .fini           = mlxsw_pci_rdq_fini,
-       .dbg_read       = mlxsw_pci_rdq_dbg_read,
        .elem_count     = MLXSW_PCI_WQE_COUNT,
        .elem_size      = MLXSW_PCI_WQE_SIZE
 };
@@ -896,7 +779,6 @@ static const struct mlxsw_pci_queue_ops mlxsw_pci_cq_ops = {
        .init           = mlxsw_pci_cq_init,
        .fini           = mlxsw_pci_cq_fini,
        .tasklet        = mlxsw_pci_cq_tasklet,
-       .dbg_read       = mlxsw_pci_cq_dbg_read,
        .elem_count     = MLXSW_PCI_CQE_COUNT,
        .elem_size      = MLXSW_PCI_CQE_SIZE
 };
@@ -906,7 +788,6 @@ static const struct mlxsw_pci_queue_ops mlxsw_pci_eq_ops = {
        .init           = mlxsw_pci_eq_init,
        .fini           = mlxsw_pci_eq_fini,
        .tasklet        = mlxsw_pci_eq_tasklet,
-       .dbg_read       = mlxsw_pci_eq_dbg_read,
        .elem_count     = MLXSW_PCI_EQE_COUNT,
        .elem_size      = MLXSW_PCI_EQE_SIZE
 };
@@ -984,9 +865,7 @@ static int mlxsw_pci_queue_group_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
                                      const struct mlxsw_pci_queue_ops *q_ops,
                                      u8 num_qs)
 {
-       struct pci_dev *pdev = mlxsw_pci->pdev;
        struct mlxsw_pci_queue_type_group *queue_group;
-       char tmp[16];
        int i;
        int err;
 
@@ -1003,10 +882,6 @@ static int mlxsw_pci_queue_group_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
        }
        queue_group->count = num_qs;
 
-       sprintf(tmp, "%s_stats", mlxsw_pci_queue_type_str(q_ops->type));
-       debugfs_create_devm_seqfile(&pdev->dev, tmp, mlxsw_pci->dbg_dir,
-                                   q_ops->dbg_read);
-
        return 0;
 
 err_queue_init:
@@ -1852,14 +1727,6 @@ static int mlxsw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        mlxsw_pci->bus_info.device_name = pci_name(mlxsw_pci->pdev);
        mlxsw_pci->bus_info.dev = &pdev->dev;
 
-       mlxsw_pci->dbg_dir = debugfs_create_dir(mlxsw_pci->bus_info.device_name,
-                                               mlxsw_pci_dbg_root);
-       if (!mlxsw_pci->dbg_dir) {
-               dev_err(&pdev->dev, "Failed to create debugfs dir\n");
-               err = -ENOMEM;
-               goto err_dbg_create_dir;
-       }
-
        err = mlxsw_core_bus_device_register(&mlxsw_pci->bus_info,
                                             &mlxsw_pci_bus, mlxsw_pci);
        if (err) {
@@ -1870,8 +1737,6 @@ static int mlxsw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        return 0;
 
 err_bus_device_register:
-       debugfs_remove_recursive(mlxsw_pci->dbg_dir);
-err_dbg_create_dir:
        pci_disable_msix(mlxsw_pci->pdev);
 err_msix_init:
 err_sw_reset:
@@ -1892,7 +1757,6 @@ static void mlxsw_pci_remove(struct pci_dev *pdev)
        struct mlxsw_pci *mlxsw_pci = pci_get_drvdata(pdev);
 
        mlxsw_core_bus_device_unregister(mlxsw_pci->core);
-       debugfs_remove_recursive(mlxsw_pci->dbg_dir);
        pci_disable_msix(mlxsw_pci->pdev);
        iounmap(mlxsw_pci->hw_addr);
        pci_release_regions(mlxsw_pci->pdev);
@@ -1916,15 +1780,11 @@ EXPORT_SYMBOL(mlxsw_pci_driver_unregister);
 
 static int __init mlxsw_pci_module_init(void)
 {
-       mlxsw_pci_dbg_root = debugfs_create_dir(mlxsw_pci_driver_name, NULL);
-       if (!mlxsw_pci_dbg_root)
-               return -ENOMEM;
        return 0;
 }
 
 static void __exit mlxsw_pci_module_exit(void)
 {
-       debugfs_remove_recursive(mlxsw_pci_dbg_root);
 }
 
 module_init(mlxsw_pci_module_init);
index 3d42146473b30a786629ec06091eb4364cdfde37..c580abba8d342b844b8f776e32fd8d683722da18 100644 (file)
 
 #define MLXSW_PORT_MID                 0xd000
 
-#define MLXSW_PORT_MAX_PHY_PORTS       0x40
-#define MLXSW_PORT_MAX_PORTS           (MLXSW_PORT_MAX_PHY_PORTS + 1)
-
 #define MLXSW_PORT_MAX_IB_PHY_PORTS    36
 #define MLXSW_PORT_MAX_IB_PORTS                (MLXSW_PORT_MAX_IB_PHY_PORTS + 1)
 
-#define MLXSW_PORT_DEVID_BITS_OFFSET   10
-#define MLXSW_PORT_PHY_BITS_OFFSET     4
-#define MLXSW_PORT_PHY_BITS_MASK       (MLXSW_PORT_MAX_PHY_PORTS - 1)
-
 #define MLXSW_PORT_CPU_PORT            0x0
-#define MLXSW_PORT_ROUTER_PORT         (MLXSW_PORT_MAX_PHY_PORTS + 2)
 
-#define MLXSW_PORT_DONT_CARE           (MLXSW_PORT_MAX_PORTS)
+#define MLXSW_PORT_DONT_CARE           0xFF
 
 #define MLXSW_PORT_MODULE_MAX_WIDTH    4
 
index d9616daf8a705645eb5b14e5af889e0b5020ab0d..83b277c8090e3d16a4b32e380eee387aac95a1a0 100644 (file)
@@ -4125,6 +4125,60 @@ MLXSW_ITEM32(reg, ritr, sp_if_system_port, 0x08, 0, 16);
  */
 MLXSW_ITEM32(reg, ritr, sp_if_vid, 0x18, 0, 12);
 
+/* Shared between ingress/egress */
+enum mlxsw_reg_ritr_counter_set_type {
+       /* No Count. */
+       MLXSW_REG_RITR_COUNTER_SET_TYPE_NO_COUNT = 0x0,
+       /* Basic. Used for router interfaces, counting the following:
+        *      - Error and Discard counters.
+        *      - Unicast, Multicast and Broadcast counters. Sharing the
+        *        same set of counters for the different type of traffic
+        *        (IPv4, IPv6 and mpls).
+        */
+       MLXSW_REG_RITR_COUNTER_SET_TYPE_BASIC = 0x9,
+};
+
+/* reg_ritr_ingress_counter_index
+ * Counter Index for flow counter.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, ingress_counter_index, 0x38, 0, 24);
+
+/* reg_ritr_ingress_counter_set_type
+ * Igress Counter Set Type for router interface counter.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, ingress_counter_set_type, 0x38, 24, 8);
+
+/* reg_ritr_egress_counter_index
+ * Counter Index for flow counter.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, egress_counter_index, 0x3C, 0, 24);
+
+/* reg_ritr_egress_counter_set_type
+ * Egress Counter Set Type for router interface counter.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, egress_counter_set_type, 0x3C, 24, 8);
+
+static inline void mlxsw_reg_ritr_counter_pack(char *payload, u32 index,
+                                              bool enable, bool egress)
+{
+       enum mlxsw_reg_ritr_counter_set_type set_type;
+
+       if (enable)
+               set_type = MLXSW_REG_RITR_COUNTER_SET_TYPE_BASIC;
+       else
+               set_type = MLXSW_REG_RITR_COUNTER_SET_TYPE_NO_COUNT;
+       mlxsw_reg_ritr_egress_counter_set_type_set(payload, set_type);
+
+       if (egress)
+               mlxsw_reg_ritr_egress_counter_index_set(payload, index);
+       else
+               mlxsw_reg_ritr_ingress_counter_index_set(payload, index);
+}
+
 static inline void mlxsw_reg_ritr_rif_pack(char *payload, u16 rif)
 {
        MLXSW_REG_ZERO(ritr, payload);
@@ -4141,7 +4195,8 @@ static inline void mlxsw_reg_ritr_sp_if_pack(char *payload, bool lag,
 
 static inline void mlxsw_reg_ritr_pack(char *payload, bool enable,
                                       enum mlxsw_reg_ritr_if_type type,
-                                      u16 rif, u16 mtu, const char *mac)
+                                      u16 rif, u16 vr_id, u16 mtu,
+                                      const char *mac)
 {
        bool op = enable ? MLXSW_REG_RITR_RIF_CREATE : MLXSW_REG_RITR_RIF_DEL;
 
@@ -4153,6 +4208,7 @@ static inline void mlxsw_reg_ritr_pack(char *payload, bool enable,
        mlxsw_reg_ritr_rif_set(payload, rif);
        mlxsw_reg_ritr_ipv4_fe_set(payload, 1);
        mlxsw_reg_ritr_lb_en_set(payload, 1);
+       mlxsw_reg_ritr_virtual_router_set(payload, vr_id);
        mlxsw_reg_ritr_mtu_set(payload, mtu);
        mlxsw_reg_ritr_if_mac_memcpy_to(payload, mac);
 }
@@ -4285,6 +4341,129 @@ static inline void mlxsw_reg_ratr_eth_entry_pack(char *payload,
        mlxsw_reg_ratr_eth_destination_mac_memcpy_to(payload, dest_mac);
 }
 
+/* RICNT - Router Interface Counter Register
+ * -----------------------------------------
+ * The RICNT register retrieves per port performance counters
+ */
+#define MLXSW_REG_RICNT_ID 0x800B
+#define MLXSW_REG_RICNT_LEN 0x100
+
+MLXSW_REG_DEFINE(ricnt, MLXSW_REG_RICNT_ID, MLXSW_REG_RICNT_LEN);
+
+/* reg_ricnt_counter_index
+ * Counter index
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ricnt, counter_index, 0x04, 0, 24);
+
+enum mlxsw_reg_ricnt_counter_set_type {
+       /* No Count. */
+       MLXSW_REG_RICNT_COUNTER_SET_TYPE_NO_COUNT = 0x00,
+       /* Basic. Used for router interfaces, counting the following:
+        *      - Error and Discard counters.
+        *      - Unicast, Multicast and Broadcast counters. Sharing the
+        *        same set of counters for the different type of traffic
+        *        (IPv4, IPv6 and mpls).
+        */
+       MLXSW_REG_RICNT_COUNTER_SET_TYPE_BASIC = 0x09,
+};
+
+/* reg_ricnt_counter_set_type
+ * Counter Set Type for router interface counter
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ricnt, counter_set_type, 0x04, 24, 8);
+
+enum mlxsw_reg_ricnt_opcode {
+       /* Nop. Supported only for read access*/
+       MLXSW_REG_RICNT_OPCODE_NOP = 0x00,
+       /* Clear. Setting the clr bit will reset the counter value for
+        * all counters of the specified Router Interface.
+        */
+       MLXSW_REG_RICNT_OPCODE_CLEAR = 0x08,
+};
+
+/* reg_ricnt_opcode
+ * Opcode
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ricnt, op, 0x00, 28, 4);
+
+/* reg_ricnt_good_unicast_packets
+ * good unicast packets.
+ * Access: RW
+ */
+MLXSW_ITEM64(reg, ricnt, good_unicast_packets, 0x08, 0, 64);
+
+/* reg_ricnt_good_multicast_packets
+ * good multicast packets.
+ * Access: RW
+ */
+MLXSW_ITEM64(reg, ricnt, good_multicast_packets, 0x10, 0, 64);
+
+/* reg_ricnt_good_broadcast_packets
+ * good broadcast packets
+ * Access: RW
+ */
+MLXSW_ITEM64(reg, ricnt, good_broadcast_packets, 0x18, 0, 64);
+
+/* reg_ricnt_good_unicast_bytes
+ * A count of L3 data and padding octets not including L2 headers
+ * for good unicast frames.
+ * Access: RW
+ */
+MLXSW_ITEM64(reg, ricnt, good_unicast_bytes, 0x20, 0, 64);
+
+/* reg_ricnt_good_multicast_bytes
+ * A count of L3 data and padding octets not including L2 headers
+ * for good multicast frames.
+ * Access: RW
+ */
+MLXSW_ITEM64(reg, ricnt, good_multicast_bytes, 0x28, 0, 64);
+
+/* reg_ritr_good_broadcast_bytes
+ * A count of L3 data and padding octets not including L2 headers
+ * for good broadcast frames.
+ * Access: RW
+ */
+MLXSW_ITEM64(reg, ricnt, good_broadcast_bytes, 0x30, 0, 64);
+
+/* reg_ricnt_error_packets
+ * A count of errored frames that do not pass the router checks.
+ * Access: RW
+ */
+MLXSW_ITEM64(reg, ricnt, error_packets, 0x38, 0, 64);
+
+/* reg_ricnt_discrad_packets
+ * A count of non-errored frames that do not pass the router checks.
+ * Access: RW
+ */
+MLXSW_ITEM64(reg, ricnt, discard_packets, 0x40, 0, 64);
+
+/* reg_ricnt_error_bytes
+ * A count of L3 data and padding octets not including L2 headers
+ * for errored frames.
+ * Access: RW
+ */
+MLXSW_ITEM64(reg, ricnt, error_bytes, 0x48, 0, 64);
+
+/* reg_ricnt_discard_bytes
+ * A count of L3 data and padding octets not including L2 headers
+ * for non-errored frames that do not pass the router checks.
+ * Access: RW
+ */
+MLXSW_ITEM64(reg, ricnt, discard_bytes, 0x50, 0, 64);
+
+static inline void mlxsw_reg_ricnt_pack(char *payload, u32 index,
+                                       enum mlxsw_reg_ricnt_opcode op)
+{
+       MLXSW_REG_ZERO(ricnt, payload);
+       mlxsw_reg_ricnt_op_set(payload, op);
+       mlxsw_reg_ricnt_counter_index_set(payload, index);
+       mlxsw_reg_ricnt_counter_set_type_set(payload,
+                                            MLXSW_REG_RICNT_COUNTER_SET_TYPE_BASIC);
+}
+
 /* RALTA - Router Algorithmic LPM Tree Allocation Register
  * -------------------------------------------------------
  * RALTA is used to allocate the LPM trees of the SHSPM method.
@@ -5504,6 +5683,70 @@ static inline void mlxsw_reg_mpsc_pack(char *payload, u8 local_port, bool e,
        mlxsw_reg_mpsc_rate_set(payload, rate);
 }
 
+/* MGPC - Monitoring General Purpose Counter Set Register
+ * The MGPC register retrieves and sets the General Purpose Counter Set.
+ */
+#define MLXSW_REG_MGPC_ID 0x9081
+#define MLXSW_REG_MGPC_LEN 0x18
+
+MLXSW_REG_DEFINE(mgpc, MLXSW_REG_MGPC_ID, MLXSW_REG_MGPC_LEN);
+
+enum mlxsw_reg_mgpc_counter_set_type {
+       /* No count */
+       MLXSW_REG_MGPC_COUNTER_SET_TYPE_NO_COUT = 0x00,
+       /* Count packets and bytes */
+       MLXSW_REG_MGPC_COUNTER_SET_TYPE_PACKETS_BYTES = 0x03,
+       /* Count only packets */
+       MLXSW_REG_MGPC_COUNTER_SET_TYPE_PACKETS = 0x05,
+};
+
+/* reg_mgpc_counter_set_type
+ * Counter set type.
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, mgpc, counter_set_type, 0x00, 24, 8);
+
+/* reg_mgpc_counter_index
+ * Counter index.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, mgpc, counter_index, 0x00, 0, 24);
+
+enum mlxsw_reg_mgpc_opcode {
+       /* Nop */
+       MLXSW_REG_MGPC_OPCODE_NOP = 0x00,
+       /* Clear counters */
+       MLXSW_REG_MGPC_OPCODE_CLEAR = 0x08,
+};
+
+/* reg_mgpc_opcode
+ * Opcode.
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, mgpc, opcode, 0x04, 28, 4);
+
+/* reg_mgpc_byte_counter
+ * Byte counter value.
+ * Access: RW
+ */
+MLXSW_ITEM64(reg, mgpc, byte_counter, 0x08, 0, 64);
+
+/* reg_mgpc_packet_counter
+ * Packet counter value.
+ * Access: RW
+ */
+MLXSW_ITEM64(reg, mgpc, packet_counter, 0x10, 0, 64);
+
+static inline void mlxsw_reg_mgpc_pack(char *payload, u32 counter_index,
+                                      enum mlxsw_reg_mgpc_opcode opcode,
+                                      enum mlxsw_reg_mgpc_counter_set_type set_type)
+{
+       MLXSW_REG_ZERO(mgpc, payload);
+       mlxsw_reg_mgpc_counter_index_set(payload, counter_index);
+       mlxsw_reg_mgpc_counter_set_type_set(payload, set_type);
+       mlxsw_reg_mgpc_opcode_set(payload, opcode);
+}
+
 /* SBPR - Shared Buffer Pools Register
  * -----------------------------------
  * The SBPR configures and retrieves the shared buffer pools and configuration.
@@ -5960,6 +6203,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
        MLXSW_REG(rgcr),
        MLXSW_REG(ritr),
        MLXSW_REG(ratr),
+       MLXSW_REG(ricnt),
        MLXSW_REG(ralta),
        MLXSW_REG(ralst),
        MLXSW_REG(raltb),
@@ -5977,6 +6221,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
        MLXSW_REG(mpar),
        MLXSW_REG(mlcr),
        MLXSW_REG(mpsc),
+       MLXSW_REG(mgpc),
        MLXSW_REG(sbpr),
        MLXSW_REG(sbcm),
        MLXSW_REG(sbpm),
index bce8c2e006302db45ce4eaedf8b3a368ec2660c4..9556d934714b0871119d52258813a23f94227d09 100644 (file)
@@ -43,11 +43,15 @@ enum mlxsw_res_id {
        MLXSW_RES_ID_KVD_SINGLE_MIN_SIZE,
        MLXSW_RES_ID_KVD_DOUBLE_MIN_SIZE,
        MLXSW_RES_ID_MAX_TRAP_GROUPS,
+       MLXSW_RES_ID_COUNTER_POOL_SIZE,
        MLXSW_RES_ID_MAX_SPAN,
+       MLXSW_RES_ID_COUNTER_SIZE_PACKETS_BYTES,
+       MLXSW_RES_ID_COUNTER_SIZE_ROUTER_BASIC,
        MLXSW_RES_ID_MAX_SYSTEM_PORT,
        MLXSW_RES_ID_MAX_LAG,
        MLXSW_RES_ID_MAX_LAG_MEMBERS,
        MLXSW_RES_ID_MAX_BUFFER_SIZE,
+       MLXSW_RES_ID_CELL_SIZE,
        MLXSW_RES_ID_ACL_MAX_TCAM_REGIONS,
        MLXSW_RES_ID_ACL_MAX_TCAM_RULES,
        MLXSW_RES_ID_ACL_MAX_REGIONS,
@@ -59,6 +63,7 @@ enum mlxsw_res_id {
        MLXSW_RES_ID_MAX_CPU_POLICERS,
        MLXSW_RES_ID_MAX_VRS,
        MLXSW_RES_ID_MAX_RIFS,
+       MLXSW_RES_ID_MAX_LPM_TREES,
 
        /* Internal resources.
         * Determined by the SW, not queried from the HW.
@@ -75,11 +80,15 @@ static u16 mlxsw_res_ids[] = {
        [MLXSW_RES_ID_KVD_SINGLE_MIN_SIZE] = 0x1002,
        [MLXSW_RES_ID_KVD_DOUBLE_MIN_SIZE] = 0x1003,
        [MLXSW_RES_ID_MAX_TRAP_GROUPS] = 0x2201,
+       [MLXSW_RES_ID_COUNTER_POOL_SIZE] = 0x2410,
        [MLXSW_RES_ID_MAX_SPAN] = 0x2420,
+       [MLXSW_RES_ID_COUNTER_SIZE_PACKETS_BYTES] = 0x2443,
+       [MLXSW_RES_ID_COUNTER_SIZE_ROUTER_BASIC] = 0x2449,
        [MLXSW_RES_ID_MAX_SYSTEM_PORT] = 0x2502,
        [MLXSW_RES_ID_MAX_LAG] = 0x2520,
        [MLXSW_RES_ID_MAX_LAG_MEMBERS] = 0x2521,
        [MLXSW_RES_ID_MAX_BUFFER_SIZE] = 0x2802,        /* Bytes */
+       [MLXSW_RES_ID_CELL_SIZE] = 0x2803,      /* Bytes */
        [MLXSW_RES_ID_ACL_MAX_TCAM_REGIONS] = 0x2901,
        [MLXSW_RES_ID_ACL_MAX_TCAM_RULES] = 0x2902,
        [MLXSW_RES_ID_ACL_MAX_REGIONS] = 0x2903,
@@ -91,6 +100,7 @@ static u16 mlxsw_res_ids[] = {
        [MLXSW_RES_ID_MAX_CPU_POLICERS] = 0x2A13,
        [MLXSW_RES_ID_MAX_VRS] = 0x2C01,
        [MLXSW_RES_ID_MAX_RIFS] = 0x2C02,
+       [MLXSW_RES_ID_MAX_LPM_TREES] = 0x2C30,
 };
 
 struct mlxsw_res {
index 16484f24b7dbbaa2fe10170bd7cb46fee9832938..b031f09bf4e64bc08d99c6fc91104234a4ff93cb 100644 (file)
@@ -66,6 +66,8 @@
 #include "port.h"
 #include "trap.h"
 #include "txheader.h"
+#include "spectrum_cnt.h"
+#include "spectrum_dpipe.h"
 
 static const char mlxsw_sp_driver_name[] = "mlxsw_spectrum";
 static const char mlxsw_sp_driver_version[] = "1.0";
@@ -138,6 +140,60 @@ MLXSW_ITEM32(tx, hdr, fid, 0x08, 0, 16);
  */
 MLXSW_ITEM32(tx, hdr, type, 0x0C, 0, 4);
 
+int mlxsw_sp_flow_counter_get(struct mlxsw_sp *mlxsw_sp,
+                             unsigned int counter_index, u64 *packets,
+                             u64 *bytes)
+{
+       char mgpc_pl[MLXSW_REG_MGPC_LEN];
+       int err;
+
+       mlxsw_reg_mgpc_pack(mgpc_pl, counter_index, MLXSW_REG_MGPC_OPCODE_NOP,
+                           MLXSW_REG_MGPC_COUNTER_SET_TYPE_PACKETS_BYTES);
+       err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(mgpc), mgpc_pl);
+       if (err)
+               return err;
+       *packets = mlxsw_reg_mgpc_packet_counter_get(mgpc_pl);
+       *bytes = mlxsw_reg_mgpc_byte_counter_get(mgpc_pl);
+       return 0;
+}
+
+static int mlxsw_sp_flow_counter_clear(struct mlxsw_sp *mlxsw_sp,
+                                      unsigned int counter_index)
+{
+       char mgpc_pl[MLXSW_REG_MGPC_LEN];
+
+       mlxsw_reg_mgpc_pack(mgpc_pl, counter_index, MLXSW_REG_MGPC_OPCODE_CLEAR,
+                           MLXSW_REG_MGPC_COUNTER_SET_TYPE_PACKETS_BYTES);
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mgpc), mgpc_pl);
+}
+
+int mlxsw_sp_flow_counter_alloc(struct mlxsw_sp *mlxsw_sp,
+                               unsigned int *p_counter_index)
+{
+       int err;
+
+       err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_FLOW,
+                                    p_counter_index);
+       if (err)
+               return err;
+       err = mlxsw_sp_flow_counter_clear(mlxsw_sp, *p_counter_index);
+       if (err)
+               goto err_counter_clear;
+       return 0;
+
+err_counter_clear:
+       mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_FLOW,
+                             *p_counter_index);
+       return err;
+}
+
+void mlxsw_sp_flow_counter_free(struct mlxsw_sp *mlxsw_sp,
+                               unsigned int counter_index)
+{
+        mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_FLOW,
+                              counter_index);
+}
+
 static void mlxsw_sp_txhdr_construct(struct sk_buff *skb,
                                     const struct mlxsw_tx_info *tx_info)
 {
@@ -304,9 +360,10 @@ static bool mlxsw_sp_span_is_egress_mirror(struct mlxsw_sp_port *port)
        return false;
 }
 
-static int mlxsw_sp_span_mtu_to_buffsize(int mtu)
+static int mlxsw_sp_span_mtu_to_buffsize(const struct mlxsw_sp *mlxsw_sp,
+                                        int mtu)
 {
-       return MLXSW_SP_BYTES_TO_CELLS(mtu * 5 / 2) + 1;
+       return mlxsw_sp_bytes_cells(mlxsw_sp, mtu * 5 / 2) + 1;
 }
 
 static int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu)
@@ -319,8 +376,9 @@ static int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu)
         * updated according to the mtu value
         */
        if (mlxsw_sp_span_is_egress_mirror(port)) {
-               mlxsw_reg_sbib_pack(sbib_pl, port->local_port,
-                                   mlxsw_sp_span_mtu_to_buffsize(mtu));
+               u32 buffsize = mlxsw_sp_span_mtu_to_buffsize(mlxsw_sp, mtu);
+
+               mlxsw_reg_sbib_pack(sbib_pl, port->local_port, buffsize);
                err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
                if (err) {
                        netdev_err(port->dev, "Could not update shared buffer for mirroring\n");
@@ -357,8 +415,10 @@ mlxsw_sp_span_inspected_port_bind(struct mlxsw_sp_port *port,
 
        /* if it is an egress SPAN, bind a shared buffer to it */
        if (type == MLXSW_SP_SPAN_EGRESS) {
-               mlxsw_reg_sbib_pack(sbib_pl, port->local_port,
-                                   mlxsw_sp_span_mtu_to_buffsize(port->dev->mtu));
+               u32 buffsize = mlxsw_sp_span_mtu_to_buffsize(mlxsw_sp,
+                                                            port->dev->mtu);
+
+               mlxsw_reg_sbib_pack(sbib_pl, port->local_port, buffsize);
                err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
                if (err) {
                        netdev_err(port->dev, "Could not create shared buffer for mirroring\n");
@@ -745,19 +805,47 @@ static int mlxsw_sp_port_set_mac_address(struct net_device *dev, void *p)
        return 0;
 }
 
-static void mlxsw_sp_pg_buf_pack(char *pbmc_pl, int pg_index, int mtu,
-                                bool pause_en, bool pfc_en, u16 delay)
+static u16 mlxsw_sp_pg_buf_threshold_get(const struct mlxsw_sp *mlxsw_sp,
+                                        int mtu)
 {
-       u16 pg_size = 2 * MLXSW_SP_BYTES_TO_CELLS(mtu);
+       return 2 * mlxsw_sp_bytes_cells(mlxsw_sp, mtu);
+}
 
-       delay = pfc_en ? mlxsw_sp_pfc_delay_get(mtu, delay) :
-                        MLXSW_SP_PAUSE_DELAY;
+#define MLXSW_SP_CELL_FACTOR 2 /* 2 * cell_size / (IPG + cell_size + 1) */
+
+static u16 mlxsw_sp_pfc_delay_get(const struct mlxsw_sp *mlxsw_sp, int mtu,
+                                 u16 delay)
+{
+       delay = mlxsw_sp_bytes_cells(mlxsw_sp, DIV_ROUND_UP(delay,
+                                                           BITS_PER_BYTE));
+       return MLXSW_SP_CELL_FACTOR * delay + mlxsw_sp_bytes_cells(mlxsw_sp,
+                                                                  mtu);
+}
 
-       if (pause_en || pfc_en)
-               mlxsw_reg_pbmc_lossless_buffer_pack(pbmc_pl, pg_index,
-                                                   pg_size + delay, pg_size);
+/* Maximum delay buffer needed in case of PAUSE frames, in bytes.
+ * Assumes 100m cable and maximum MTU.
+ */
+#define MLXSW_SP_PAUSE_DELAY 58752
+
+static u16 mlxsw_sp_pg_buf_delay_get(const struct mlxsw_sp *mlxsw_sp, int mtu,
+                                    u16 delay, bool pfc, bool pause)
+{
+       if (pfc)
+               return mlxsw_sp_pfc_delay_get(mlxsw_sp, mtu, delay);
+       else if (pause)
+               return mlxsw_sp_bytes_cells(mlxsw_sp, MLXSW_SP_PAUSE_DELAY);
        else
-               mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, pg_index, pg_size);
+               return 0;
+}
+
+static void mlxsw_sp_pg_buf_pack(char *pbmc_pl, int index, u16 size, u16 thres,
+                                bool lossy)
+{
+       if (lossy)
+               mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, index, size);
+       else
+               mlxsw_reg_pbmc_lossless_buffer_pack(pbmc_pl, index, size,
+                                                   thres);
 }
 
 int __mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, int mtu,
@@ -778,6 +866,8 @@ int __mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, int mtu,
        for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
                bool configure = false;
                bool pfc = false;
+               bool lossy;
+               u16 thres;
 
                for (j = 0; j < IEEE_8021QAZ_MAX_TCS; j++) {
                        if (prio_tc[j] == i) {
@@ -789,7 +879,12 @@ int __mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, int mtu,
 
                if (!configure)
                        continue;
-               mlxsw_sp_pg_buf_pack(pbmc_pl, i, mtu, pause_en, pfc, delay);
+
+               lossy = !(pfc || pause_en);
+               thres = mlxsw_sp_pg_buf_threshold_get(mlxsw_sp, mtu);
+               delay = mlxsw_sp_pg_buf_delay_get(mlxsw_sp, mtu, delay, pfc,
+                                                 pause_en);
+               mlxsw_sp_pg_buf_pack(pbmc_pl, i, thres + delay, thres, lossy);
        }
 
        return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pbmc), pbmc_pl);
@@ -1368,7 +1463,7 @@ static int mlxsw_sp_setup_tc(struct net_device *dev, u32 handle,
                                                       tc->cls_mall);
                        return 0;
                default:
-                       return -EINVAL;
+                       return -EOPNOTSUPP;
                }
        case TC_SETUP_CLSFLOWER:
                switch (tc->cls_flower->command) {
@@ -1379,6 +1474,9 @@ static int mlxsw_sp_setup_tc(struct net_device *dev, u32 handle,
                        mlxsw_sp_flower_destroy(mlxsw_sp_port, ingress,
                                                tc->cls_flower);
                        return 0;
+               case TC_CLSFLOWER_STATS:
+                       return mlxsw_sp_flower_stats(mlxsw_sp_port, ingress,
+                                                    tc->cls_flower);
                default:
                        return -EOPNOTSUPP;
                }
@@ -1492,6 +1590,7 @@ err_port_pause_configure:
 struct mlxsw_sp_port_hw_stats {
        char str[ETH_GSTRING_LEN];
        u64 (*getter)(const char *payload);
+       bool cells_bytes;
 };
 
 static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_stats[] = {
@@ -1612,17 +1711,11 @@ static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_prio_stats[] = {
 
 #define MLXSW_SP_PORT_HW_PRIO_STATS_LEN ARRAY_SIZE(mlxsw_sp_port_hw_prio_stats)
 
-static u64 mlxsw_reg_ppcnt_tc_transmit_queue_bytes_get(const char *ppcnt_pl)
-{
-       u64 transmit_queue = mlxsw_reg_ppcnt_tc_transmit_queue_get(ppcnt_pl);
-
-       return MLXSW_SP_CELLS_TO_BYTES(transmit_queue);
-}
-
 static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_tc_stats[] = {
        {
                .str = "tc_transmit_queue_tc",
-               .getter = mlxsw_reg_ppcnt_tc_transmit_queue_bytes_get,
+               .getter = mlxsw_reg_ppcnt_tc_transmit_queue_get,
+               .cells_bytes = true,
        },
        {
                .str = "tc_no_buffer_discard_uc_tc",
@@ -1734,6 +1827,8 @@ static void __mlxsw_sp_port_get_stats(struct net_device *dev,
                                      enum mlxsw_reg_ppcnt_grp grp, int prio,
                                      u64 *data, int data_index)
 {
+       struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+       struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
        struct mlxsw_sp_port_hw_stats *hw_stats;
        char ppcnt_pl[MLXSW_REG_PPCNT_LEN];
        int i, len;
@@ -1743,8 +1838,13 @@ static void __mlxsw_sp_port_get_stats(struct net_device *dev,
        if (err)
                return;
        mlxsw_sp_port_get_stats_raw(dev, grp, prio, ppcnt_pl);
-       for (i = 0; i < len; i++)
+       for (i = 0; i < len; i++) {
                data[data_index + i] = hw_stats[i].getter(ppcnt_pl);
+               if (!hw_stats[i].cells_bytes)
+                       continue;
+               data[data_index + i] = mlxsw_sp_cells_bytes(mlxsw_sp,
+                                                           data[data_index + i]);
+       }
 }
 
 static void mlxsw_sp_port_get_stats(struct net_device *dev,
@@ -2537,25 +2637,33 @@ static void mlxsw_sp_ports_remove(struct mlxsw_sp *mlxsw_sp)
 {
        int i;
 
-       for (i = 1; i < MLXSW_PORT_MAX_PORTS; i++)
+       for (i = 1; i < mlxsw_core_max_ports(mlxsw_sp->core); i++)
                if (mlxsw_sp_port_created(mlxsw_sp, i))
                        mlxsw_sp_port_remove(mlxsw_sp, i);
+       kfree(mlxsw_sp->port_to_module);
        kfree(mlxsw_sp->ports);
 }
 
 static int mlxsw_sp_ports_create(struct mlxsw_sp *mlxsw_sp)
 {
+       unsigned int max_ports = mlxsw_core_max_ports(mlxsw_sp->core);
        u8 module, width, lane;
        size_t alloc_size;
        int i;
        int err;
 
-       alloc_size = sizeof(struct mlxsw_sp_port *) * MLXSW_PORT_MAX_PORTS;
+       alloc_size = sizeof(struct mlxsw_sp_port *) * max_ports;
        mlxsw_sp->ports = kzalloc(alloc_size, GFP_KERNEL);
        if (!mlxsw_sp->ports)
                return -ENOMEM;
 
-       for (i = 1; i < MLXSW_PORT_MAX_PORTS; i++) {
+       mlxsw_sp->port_to_module = kcalloc(max_ports, sizeof(u8), GFP_KERNEL);
+       if (!mlxsw_sp->port_to_module) {
+               err = -ENOMEM;
+               goto err_port_to_module_alloc;
+       }
+
+       for (i = 1; i < max_ports; i++) {
                err = mlxsw_sp_port_module_info_get(mlxsw_sp, i, &module,
                                                    &width, &lane);
                if (err)
@@ -2575,6 +2683,8 @@ err_port_module_info_get:
        for (i--; i >= 1; i--)
                if (mlxsw_sp_port_created(mlxsw_sp, i))
                        mlxsw_sp_port_remove(mlxsw_sp, i);
+       kfree(mlxsw_sp->port_to_module);
+err_port_to_module_alloc:
        kfree(mlxsw_sp->ports);
        return err;
 }
@@ -3224,6 +3334,18 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
                goto err_acl_init;
        }
 
+       err = mlxsw_sp_counter_pool_init(mlxsw_sp);
+       if (err) {
+               dev_err(mlxsw_sp->bus_info->dev, "Failed to init counter pool\n");
+               goto err_counter_pool_init;
+       }
+
+       err = mlxsw_sp_dpipe_init(mlxsw_sp);
+       if (err) {
+               dev_err(mlxsw_sp->bus_info->dev, "Failed to init pipeline debug\n");
+               goto err_dpipe_init;
+       }
+
        err = mlxsw_sp_ports_create(mlxsw_sp);
        if (err) {
                dev_err(mlxsw_sp->bus_info->dev, "Failed to create ports\n");
@@ -3233,6 +3355,10 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
        return 0;
 
 err_ports_create:
+       mlxsw_sp_dpipe_fini(mlxsw_sp);
+err_dpipe_init:
+       mlxsw_sp_counter_pool_fini(mlxsw_sp);
+err_counter_pool_init:
        mlxsw_sp_acl_fini(mlxsw_sp);
 err_acl_init:
        mlxsw_sp_span_fini(mlxsw_sp);
@@ -3255,6 +3381,8 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
        struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
 
        mlxsw_sp_ports_remove(mlxsw_sp);
+       mlxsw_sp_dpipe_fini(mlxsw_sp);
+       mlxsw_sp_counter_pool_fini(mlxsw_sp);
        mlxsw_sp_acl_fini(mlxsw_sp);
        mlxsw_sp_span_fini(mlxsw_sp);
        mlxsw_sp_router_fini(mlxsw_sp);
@@ -3326,13 +3454,13 @@ bool mlxsw_sp_port_dev_check(const struct net_device *dev)
        return dev->netdev_ops == &mlxsw_sp_port_netdev_ops;
 }
 
-static int mlxsw_lower_dev_walk(struct net_device *lower_dev, void *data)
+static int mlxsw_sp_lower_dev_walk(struct net_device *lower_dev, void *data)
 {
-       struct mlxsw_sp_port **port = data;
+       struct mlxsw_sp_port **p_mlxsw_sp_port = data;
        int ret = 0;
 
        if (mlxsw_sp_port_dev_check(lower_dev)) {
-               *port = netdev_priv(lower_dev);
+               *p_mlxsw_sp_port = netdev_priv(lower_dev);
                ret = 1;
        }
 
@@ -3341,18 +3469,18 @@ static int mlxsw_lower_dev_walk(struct net_device *lower_dev, void *data)
 
 static struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find(struct net_device *dev)
 {
-       struct mlxsw_sp_port *port;
+       struct mlxsw_sp_port *mlxsw_sp_port;
 
        if (mlxsw_sp_port_dev_check(dev))
                return netdev_priv(dev);
 
-       port = NULL;
-       netdev_walk_all_lower_dev(dev, mlxsw_lower_dev_walk, &port);
+       mlxsw_sp_port = NULL;
+       netdev_walk_all_lower_dev(dev, mlxsw_sp_lower_dev_walk, &mlxsw_sp_port);
 
-       return port;
+       return mlxsw_sp_port;
 }
 
-static struct mlxsw_sp *mlxsw_sp_lower_get(struct net_device *dev)
+struct mlxsw_sp *mlxsw_sp_lower_get(struct net_device *dev)
 {
        struct mlxsw_sp_port *mlxsw_sp_port;
 
@@ -3362,15 +3490,16 @@ static struct mlxsw_sp *mlxsw_sp_lower_get(struct net_device *dev)
 
 static struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find_rcu(struct net_device *dev)
 {
-       struct mlxsw_sp_port *port;
+       struct mlxsw_sp_port *mlxsw_sp_port;
 
        if (mlxsw_sp_port_dev_check(dev))
                return netdev_priv(dev);
 
-       port = NULL;
-       netdev_walk_all_lower_dev_rcu(dev, mlxsw_lower_dev_walk, &port);
+       mlxsw_sp_port = NULL;
+       netdev_walk_all_lower_dev_rcu(dev, mlxsw_sp_lower_dev_walk,
+                                     &mlxsw_sp_port);
 
-       return port;
+       return mlxsw_sp_port;
 }
 
 struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev)
@@ -3390,546 +3519,6 @@ void mlxsw_sp_port_dev_put(struct mlxsw_sp_port *mlxsw_sp_port)
        dev_put(mlxsw_sp_port->dev);
 }
 
-static bool mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *r,
-                                      unsigned long event)
-{
-       switch (event) {
-       case NETDEV_UP:
-               if (!r)
-                       return true;
-               r->ref_count++;
-               return false;
-       case NETDEV_DOWN:
-               if (r && --r->ref_count == 0)
-                       return true;
-               /* It is possible we already removed the RIF ourselves
-                * if it was assigned to a netdev that is now a bridge
-                * or LAG slave.
-                */
-               return false;
-       }
-
-       return false;
-}
-
-static int mlxsw_sp_avail_rif_get(struct mlxsw_sp *mlxsw_sp)
-{
-       int i;
-
-       for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
-               if (!mlxsw_sp->rifs[i])
-                       return i;
-
-       return MLXSW_SP_INVALID_RIF;
-}
-
-static void mlxsw_sp_vport_rif_sp_attr_get(struct mlxsw_sp_port *mlxsw_sp_vport,
-                                          bool *p_lagged, u16 *p_system_port)
-{
-       u8 local_port = mlxsw_sp_vport->local_port;
-
-       *p_lagged = mlxsw_sp_vport->lagged;
-       *p_system_port = *p_lagged ? mlxsw_sp_vport->lag_id : local_port;
-}
-
-static int mlxsw_sp_vport_rif_sp_op(struct mlxsw_sp_port *mlxsw_sp_vport,
-                                   struct net_device *l3_dev, u16 rif,
-                                   bool create)
-{
-       struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp;
-       bool lagged = mlxsw_sp_vport->lagged;
-       char ritr_pl[MLXSW_REG_RITR_LEN];
-       u16 system_port;
-
-       mlxsw_reg_ritr_pack(ritr_pl, create, MLXSW_REG_RITR_SP_IF, rif,
-                           l3_dev->mtu, l3_dev->dev_addr);
-
-       mlxsw_sp_vport_rif_sp_attr_get(mlxsw_sp_vport, &lagged, &system_port);
-       mlxsw_reg_ritr_sp_if_pack(ritr_pl, lagged, system_port,
-                                 mlxsw_sp_vport_vid_get(mlxsw_sp_vport));
-
-       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
-}
-
-static void mlxsw_sp_vport_rif_sp_leave(struct mlxsw_sp_port *mlxsw_sp_vport);
-
-static struct mlxsw_sp_fid *
-mlxsw_sp_rfid_alloc(u16 fid, struct net_device *l3_dev)
-{
-       struct mlxsw_sp_fid *f;
-
-       f = kzalloc(sizeof(*f), GFP_KERNEL);
-       if (!f)
-               return NULL;
-
-       f->leave = mlxsw_sp_vport_rif_sp_leave;
-       f->ref_count = 0;
-       f->dev = l3_dev;
-       f->fid = fid;
-
-       return f;
-}
-
-static struct mlxsw_sp_rif *
-mlxsw_sp_rif_alloc(u16 rif, struct net_device *l3_dev, struct mlxsw_sp_fid *f)
-{
-       struct mlxsw_sp_rif *r;
-
-       r = kzalloc(sizeof(*r), GFP_KERNEL);
-       if (!r)
-               return NULL;
-
-       INIT_LIST_HEAD(&r->nexthop_list);
-       INIT_LIST_HEAD(&r->neigh_list);
-       ether_addr_copy(r->addr, l3_dev->dev_addr);
-       r->mtu = l3_dev->mtu;
-       r->ref_count = 1;
-       r->dev = l3_dev;
-       r->rif = rif;
-       r->f = f;
-
-       return r;
-}
-
-static struct mlxsw_sp_rif *
-mlxsw_sp_vport_rif_sp_create(struct mlxsw_sp_port *mlxsw_sp_vport,
-                            struct net_device *l3_dev)
-{
-       struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp;
-       struct mlxsw_sp_fid *f;
-       struct mlxsw_sp_rif *r;
-       u16 fid, rif;
-       int err;
-
-       rif = mlxsw_sp_avail_rif_get(mlxsw_sp);
-       if (rif == MLXSW_SP_INVALID_RIF)
-               return ERR_PTR(-ERANGE);
-
-       err = mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, l3_dev, rif, true);
-       if (err)
-               return ERR_PTR(err);
-
-       fid = mlxsw_sp_rif_sp_to_fid(rif);
-       err = mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, fid, true);
-       if (err)
-               goto err_rif_fdb_op;
-
-       f = mlxsw_sp_rfid_alloc(fid, l3_dev);
-       if (!f) {
-               err = -ENOMEM;
-               goto err_rfid_alloc;
-       }
-
-       r = mlxsw_sp_rif_alloc(rif, l3_dev, f);
-       if (!r) {
-               err = -ENOMEM;
-               goto err_rif_alloc;
-       }
-
-       f->r = r;
-       mlxsw_sp->rifs[rif] = r;
-
-       return r;
-
-err_rif_alloc:
-       kfree(f);
-err_rfid_alloc:
-       mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, fid, false);
-err_rif_fdb_op:
-       mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, l3_dev, rif, false);
-       return ERR_PTR(err);
-}
-
-static void mlxsw_sp_vport_rif_sp_destroy(struct mlxsw_sp_port *mlxsw_sp_vport,
-                                         struct mlxsw_sp_rif *r)
-{
-       struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp;
-       struct net_device *l3_dev = r->dev;
-       struct mlxsw_sp_fid *f = r->f;
-       u16 fid = f->fid;
-       u16 rif = r->rif;
-
-       mlxsw_sp_router_rif_gone_sync(mlxsw_sp, r);
-
-       mlxsw_sp->rifs[rif] = NULL;
-       f->r = NULL;
-
-       kfree(r);
-
-       kfree(f);
-
-       mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, fid, false);
-
-       mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, l3_dev, rif, false);
-}
-
-static int mlxsw_sp_vport_rif_sp_join(struct mlxsw_sp_port *mlxsw_sp_vport,
-                                     struct net_device *l3_dev)
-{
-       struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp;
-       struct mlxsw_sp_rif *r;
-
-       r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
-       if (!r) {
-               r = mlxsw_sp_vport_rif_sp_create(mlxsw_sp_vport, l3_dev);
-               if (IS_ERR(r))
-                       return PTR_ERR(r);
-       }
-
-       mlxsw_sp_vport_fid_set(mlxsw_sp_vport, r->f);
-       r->f->ref_count++;
-
-       netdev_dbg(mlxsw_sp_vport->dev, "Joined FID=%d\n", r->f->fid);
-
-       return 0;
-}
-
-static void mlxsw_sp_vport_rif_sp_leave(struct mlxsw_sp_port *mlxsw_sp_vport)
-{
-       struct mlxsw_sp_fid *f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport);
-
-       netdev_dbg(mlxsw_sp_vport->dev, "Left FID=%d\n", f->fid);
-
-       mlxsw_sp_vport_fid_set(mlxsw_sp_vport, NULL);
-       if (--f->ref_count == 0)
-               mlxsw_sp_vport_rif_sp_destroy(mlxsw_sp_vport, f->r);
-}
-
-static int mlxsw_sp_inetaddr_vport_event(struct net_device *l3_dev,
-                                        struct net_device *port_dev,
-                                        unsigned long event, u16 vid)
-{
-       struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
-       struct mlxsw_sp_port *mlxsw_sp_vport;
-
-       mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid);
-       if (WARN_ON(!mlxsw_sp_vport))
-               return -EINVAL;
-
-       switch (event) {
-       case NETDEV_UP:
-               return mlxsw_sp_vport_rif_sp_join(mlxsw_sp_vport, l3_dev);
-       case NETDEV_DOWN:
-               mlxsw_sp_vport_rif_sp_leave(mlxsw_sp_vport);
-               break;
-       }
-
-       return 0;
-}
-
-static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
-                                       unsigned long event)
-{
-       if (netif_is_bridge_port(port_dev) || netif_is_lag_port(port_dev))
-               return 0;
-
-       return mlxsw_sp_inetaddr_vport_event(port_dev, port_dev, event, 1);
-}
-
-static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
-                                        struct net_device *lag_dev,
-                                        unsigned long event, u16 vid)
-{
-       struct net_device *port_dev;
-       struct list_head *iter;
-       int err;
-
-       netdev_for_each_lower_dev(lag_dev, port_dev, iter) {
-               if (mlxsw_sp_port_dev_check(port_dev)) {
-                       err = mlxsw_sp_inetaddr_vport_event(l3_dev, port_dev,
-                                                           event, vid);
-                       if (err)
-                               return err;
-               }
-       }
-
-       return 0;
-}
-
-static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
-                                      unsigned long event)
-{
-       if (netif_is_bridge_port(lag_dev))
-               return 0;
-
-       return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, 1);
-}
-
-static struct mlxsw_sp_fid *mlxsw_sp_bridge_fid_get(struct mlxsw_sp *mlxsw_sp,
-                                                   struct net_device *l3_dev)
-{
-       u16 fid;
-
-       if (is_vlan_dev(l3_dev))
-               fid = vlan_dev_vlan_id(l3_dev);
-       else if (mlxsw_sp->master_bridge.dev == l3_dev)
-               fid = 1;
-       else
-               return mlxsw_sp_vfid_find(mlxsw_sp, l3_dev);
-
-       return mlxsw_sp_fid_find(mlxsw_sp, fid);
-}
-
-static enum mlxsw_flood_table_type mlxsw_sp_flood_table_type_get(u16 fid)
-{
-       return mlxsw_sp_fid_is_vfid(fid) ? MLXSW_REG_SFGC_TABLE_TYPE_FID :
-              MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFEST;
-}
-
-static u16 mlxsw_sp_flood_table_index_get(u16 fid)
-{
-       return mlxsw_sp_fid_is_vfid(fid) ? mlxsw_sp_fid_to_vfid(fid) : fid;
-}
-
-static int mlxsw_sp_router_port_flood_set(struct mlxsw_sp *mlxsw_sp, u16 fid,
-                                         bool set)
-{
-       enum mlxsw_flood_table_type table_type;
-       char *sftr_pl;
-       u16 index;
-       int err;
-
-       sftr_pl = kmalloc(MLXSW_REG_SFTR_LEN, GFP_KERNEL);
-       if (!sftr_pl)
-               return -ENOMEM;
-
-       table_type = mlxsw_sp_flood_table_type_get(fid);
-       index = mlxsw_sp_flood_table_index_get(fid);
-       mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_BC, index, table_type,
-                           1, MLXSW_PORT_ROUTER_PORT, set);
-       err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl);
-
-       kfree(sftr_pl);
-       return err;
-}
-
-static enum mlxsw_reg_ritr_if_type mlxsw_sp_rif_type_get(u16 fid)
-{
-       if (mlxsw_sp_fid_is_vfid(fid))
-               return MLXSW_REG_RITR_FID_IF;
-       else
-               return MLXSW_REG_RITR_VLAN_IF;
-}
-
-static int mlxsw_sp_rif_bridge_op(struct mlxsw_sp *mlxsw_sp,
-                                 struct net_device *l3_dev,
-                                 u16 fid, u16 rif,
-                                 bool create)
-{
-       enum mlxsw_reg_ritr_if_type rif_type;
-       char ritr_pl[MLXSW_REG_RITR_LEN];
-
-       rif_type = mlxsw_sp_rif_type_get(fid);
-       mlxsw_reg_ritr_pack(ritr_pl, create, rif_type, rif, l3_dev->mtu,
-                           l3_dev->dev_addr);
-       mlxsw_reg_ritr_fid_set(ritr_pl, rif_type, fid);
-
-       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
-}
-
-static int mlxsw_sp_rif_bridge_create(struct mlxsw_sp *mlxsw_sp,
-                                     struct net_device *l3_dev,
-                                     struct mlxsw_sp_fid *f)
-{
-       struct mlxsw_sp_rif *r;
-       u16 rif;
-       int err;
-
-       rif = mlxsw_sp_avail_rif_get(mlxsw_sp);
-       if (rif == MLXSW_SP_INVALID_RIF)
-               return -ERANGE;
-
-       err = mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, true);
-       if (err)
-               return err;
-
-       err = mlxsw_sp_rif_bridge_op(mlxsw_sp, l3_dev, f->fid, rif, true);
-       if (err)
-               goto err_rif_bridge_op;
-
-       err = mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, true);
-       if (err)
-               goto err_rif_fdb_op;
-
-       r = mlxsw_sp_rif_alloc(rif, l3_dev, f);
-       if (!r) {
-               err = -ENOMEM;
-               goto err_rif_alloc;
-       }
-
-       f->r = r;
-       mlxsw_sp->rifs[rif] = r;
-
-       netdev_dbg(l3_dev, "RIF=%d created\n", rif);
-
-       return 0;
-
-err_rif_alloc:
-       mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, false);
-err_rif_fdb_op:
-       mlxsw_sp_rif_bridge_op(mlxsw_sp, l3_dev, f->fid, rif, false);
-err_rif_bridge_op:
-       mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, false);
-       return err;
-}
-
-void mlxsw_sp_rif_bridge_destroy(struct mlxsw_sp *mlxsw_sp,
-                                struct mlxsw_sp_rif *r)
-{
-       struct net_device *l3_dev = r->dev;
-       struct mlxsw_sp_fid *f = r->f;
-       u16 rif = r->rif;
-
-       mlxsw_sp_router_rif_gone_sync(mlxsw_sp, r);
-
-       mlxsw_sp->rifs[rif] = NULL;
-       f->r = NULL;
-
-       kfree(r);
-
-       mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, false);
-
-       mlxsw_sp_rif_bridge_op(mlxsw_sp, l3_dev, f->fid, rif, false);
-
-       mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, false);
-
-       netdev_dbg(l3_dev, "RIF=%d destroyed\n", rif);
-}
-
-static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev,
-                                         struct net_device *br_dev,
-                                         unsigned long event)
-{
-       struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
-       struct mlxsw_sp_fid *f;
-
-       /* FID can either be an actual FID if the L3 device is the
-        * VLAN-aware bridge or a VLAN device on top. Otherwise, the
-        * L3 device is a VLAN-unaware bridge and we get a vFID.
-        */
-       f = mlxsw_sp_bridge_fid_get(mlxsw_sp, l3_dev);
-       if (WARN_ON(!f))
-               return -EINVAL;
-
-       switch (event) {
-       case NETDEV_UP:
-               return mlxsw_sp_rif_bridge_create(mlxsw_sp, l3_dev, f);
-       case NETDEV_DOWN:
-               mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->r);
-               break;
-       }
-
-       return 0;
-}
-
-static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev,
-                                       unsigned long event)
-{
-       struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
-       struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(vlan_dev);
-       u16 vid = vlan_dev_vlan_id(vlan_dev);
-
-       if (mlxsw_sp_port_dev_check(real_dev))
-               return mlxsw_sp_inetaddr_vport_event(vlan_dev, real_dev, event,
-                                                    vid);
-       else if (netif_is_lag_master(real_dev))
-               return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
-                                                    vid);
-       else if (netif_is_bridge_master(real_dev) &&
-                mlxsw_sp->master_bridge.dev == real_dev)
-               return mlxsw_sp_inetaddr_bridge_event(vlan_dev, real_dev,
-                                                     event);
-
-       return 0;
-}
-
-static int mlxsw_sp_inetaddr_event(struct notifier_block *unused,
-                                  unsigned long event, void *ptr)
-{
-       struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
-       struct net_device *dev = ifa->ifa_dev->dev;
-       struct mlxsw_sp *mlxsw_sp;
-       struct mlxsw_sp_rif *r;
-       int err = 0;
-
-       mlxsw_sp = mlxsw_sp_lower_get(dev);
-       if (!mlxsw_sp)
-               goto out;
-
-       r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
-       if (!mlxsw_sp_rif_should_config(r, event))
-               goto out;
-
-       if (mlxsw_sp_port_dev_check(dev))
-               err = mlxsw_sp_inetaddr_port_event(dev, event);
-       else if (netif_is_lag_master(dev))
-               err = mlxsw_sp_inetaddr_lag_event(dev, event);
-       else if (netif_is_bridge_master(dev))
-               err = mlxsw_sp_inetaddr_bridge_event(dev, dev, event);
-       else if (is_vlan_dev(dev))
-               err = mlxsw_sp_inetaddr_vlan_event(dev, event);
-
-out:
-       return notifier_from_errno(err);
-}
-
-static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif,
-                            const char *mac, int mtu)
-{
-       char ritr_pl[MLXSW_REG_RITR_LEN];
-       int err;
-
-       mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
-       err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
-       if (err)
-               return err;
-
-       mlxsw_reg_ritr_mtu_set(ritr_pl, mtu);
-       mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac);
-       mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE);
-       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
-}
-
-static int mlxsw_sp_netdevice_router_port_event(struct net_device *dev)
-{
-       struct mlxsw_sp *mlxsw_sp;
-       struct mlxsw_sp_rif *r;
-       int err;
-
-       mlxsw_sp = mlxsw_sp_lower_get(dev);
-       if (!mlxsw_sp)
-               return 0;
-
-       r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
-       if (!r)
-               return 0;
-
-       err = mlxsw_sp_rif_fdb_op(mlxsw_sp, r->addr, r->f->fid, false);
-       if (err)
-               return err;
-
-       err = mlxsw_sp_rif_edit(mlxsw_sp, r->rif, dev->dev_addr, dev->mtu);
-       if (err)
-               goto err_rif_edit;
-
-       err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, r->f->fid, true);
-       if (err)
-               goto err_rif_fdb_op;
-
-       ether_addr_copy(r->addr, dev->dev_addr);
-       r->mtu = dev->mtu;
-
-       netdev_dbg(dev, "Updated RIF=%d\n", r->rif);
-
-       return 0;
-
-err_rif_fdb_op:
-       mlxsw_sp_rif_edit(mlxsw_sp, r->rif, r->addr, r->mtu);
-err_rif_edit:
-       mlxsw_sp_rif_fdb_op(mlxsw_sp, r->addr, r->f->fid, true);
-       return err;
-}
-
 static bool mlxsw_sp_lag_port_fid_member(struct mlxsw_sp_port *lag_port,
                                         u16 fid)
 {
@@ -4220,7 +3809,7 @@ static int mlxsw_sp_port_lag_index_get(struct mlxsw_sp *mlxsw_sp,
 
 static void
 mlxsw_sp_port_pvid_vport_lag_join(struct mlxsw_sp_port *mlxsw_sp_port,
-                                 u16 lag_id)
+                                 struct net_device *lag_dev, u16 lag_id)
 {
        struct mlxsw_sp_port *mlxsw_sp_vport;
        struct mlxsw_sp_fid *f;
@@ -4238,6 +3827,7 @@ mlxsw_sp_port_pvid_vport_lag_join(struct mlxsw_sp_port *mlxsw_sp_port,
 
        mlxsw_sp_vport->lag_id = lag_id;
        mlxsw_sp_vport->lagged = 1;
+       mlxsw_sp_vport->dev = lag_dev;
 }
 
 static void
@@ -4254,6 +3844,7 @@ mlxsw_sp_port_pvid_vport_lag_leave(struct mlxsw_sp_port *mlxsw_sp_port)
        if (f)
                f->leave(mlxsw_sp_vport);
 
+       mlxsw_sp_vport->dev = mlxsw_sp_port->dev;
        mlxsw_sp_vport->lagged = 0;
 }
 
@@ -4293,7 +3884,7 @@ static int mlxsw_sp_port_lag_join(struct mlxsw_sp_port *mlxsw_sp_port,
        mlxsw_sp_port->lagged = 1;
        lag->ref_count++;
 
-       mlxsw_sp_port_pvid_vport_lag_join(mlxsw_sp_port, lag_id);
+       mlxsw_sp_port_pvid_vport_lag_join(mlxsw_sp_port, lag_dev, lag_id);
 
        return 0;
 
@@ -4421,7 +4012,8 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *dev,
                upper_dev = info->upper_dev;
                if (!is_vlan_dev(upper_dev) &&
                    !netif_is_lag_master(upper_dev) &&
-                   !netif_is_bridge_master(upper_dev))
+                   !netif_is_bridge_master(upper_dev) &&
+                   !netif_is_l3_master(upper_dev))
                        return -EINVAL;
                if (!info->linking)
                        break;
@@ -4461,6 +4053,11 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *dev,
                        else
                                mlxsw_sp_port_lag_leave(mlxsw_sp_port,
                                                        upper_dev);
+               } else if (netif_is_l3_master(upper_dev)) {
+                       if (info->linking)
+                               err = mlxsw_sp_port_vrf_join(mlxsw_sp_port);
+                       else
+                               mlxsw_sp_port_vrf_leave(mlxsw_sp_port);
                } else {
                        err = -EINVAL;
                        WARN_ON(1);
@@ -4552,8 +4149,8 @@ static void mlxsw_sp_master_bridge_vlan_unlink(struct mlxsw_sp *mlxsw_sp,
        struct mlxsw_sp_fid *f;
 
        f = mlxsw_sp_fid_find(mlxsw_sp, fid);
-       if (f && f->r)
-               mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->r);
+       if (f && f->rif)
+               mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->rif);
        if (f && --f->ref_count == 0)
                mlxsw_sp_fid_destroy(mlxsw_sp, f);
 }
@@ -4564,33 +4161,46 @@ static int mlxsw_sp_netdevice_bridge_event(struct net_device *br_dev,
        struct netdev_notifier_changeupper_info *info;
        struct net_device *upper_dev;
        struct mlxsw_sp *mlxsw_sp;
-       int err;
+       int err = 0;
 
        mlxsw_sp = mlxsw_sp_lower_get(br_dev);
        if (!mlxsw_sp)
                return 0;
-       if (br_dev != mlxsw_sp->master_bridge.dev)
-               return 0;
 
        info = ptr;
 
        switch (event) {
+       case NETDEV_PRECHANGEUPPER:
+               upper_dev = info->upper_dev;
+               if (!is_vlan_dev(upper_dev) && !netif_is_l3_master(upper_dev))
+                       return -EINVAL;
+               if (is_vlan_dev(upper_dev) &&
+                   br_dev != mlxsw_sp->master_bridge.dev)
+                       return -EINVAL;
+               break;
        case NETDEV_CHANGEUPPER:
                upper_dev = info->upper_dev;
-               if (!is_vlan_dev(upper_dev))
-                       break;
-               if (info->linking) {
-                       err = mlxsw_sp_master_bridge_vlan_link(mlxsw_sp,
-                                                              upper_dev);
-                       if (err)
-                               return err;
+               if (is_vlan_dev(upper_dev)) {
+                       if (info->linking)
+                               err = mlxsw_sp_master_bridge_vlan_link(mlxsw_sp,
+                                                                      upper_dev);
+                       else
+                               mlxsw_sp_master_bridge_vlan_unlink(mlxsw_sp,
+                                                                  upper_dev);
+               } else if (netif_is_l3_master(upper_dev)) {
+                       if (info->linking)
+                               err = mlxsw_sp_bridge_vrf_join(mlxsw_sp,
+                                                              br_dev);
+                       else
+                               mlxsw_sp_bridge_vrf_leave(mlxsw_sp, br_dev);
                } else {
-                       mlxsw_sp_master_bridge_vlan_unlink(mlxsw_sp, upper_dev);
+                       err = -EINVAL;
+                       WARN_ON(1);
                }
                break;
        }
 
-       return 0;
+       return err;
 }
 
 static u16 mlxsw_sp_avail_vfid_get(const struct mlxsw_sp *mlxsw_sp)
@@ -4657,8 +4267,8 @@ static void mlxsw_sp_vfid_destroy(struct mlxsw_sp *mlxsw_sp,
        clear_bit(vfid, mlxsw_sp->vfids.mapped);
        list_del(&f->list);
 
-       if (f->r)
-               mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->r);
+       if (f->rif)
+               mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->rif);
 
        kfree(f);
 
@@ -4810,33 +4420,43 @@ static int mlxsw_sp_netdevice_vport_event(struct net_device *dev,
        int err = 0;
 
        mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid);
+       if (!mlxsw_sp_vport)
+               return 0;
 
        switch (event) {
        case NETDEV_PRECHANGEUPPER:
                upper_dev = info->upper_dev;
-               if (!netif_is_bridge_master(upper_dev))
+               if (!netif_is_bridge_master(upper_dev) &&
+                   !netif_is_l3_master(upper_dev))
                        return -EINVAL;
                if (!info->linking)
                        break;
                /* We can't have multiple VLAN interfaces configured on
                 * the same port and being members in the same bridge.
                 */
-               if (!mlxsw_sp_port_master_bridge_check(mlxsw_sp_port,
+               if (netif_is_bridge_master(upper_dev) &&
+                   !mlxsw_sp_port_master_bridge_check(mlxsw_sp_port,
                                                       upper_dev))
                        return -EINVAL;
                break;
        case NETDEV_CHANGEUPPER:
                upper_dev = info->upper_dev;
-               if (info->linking) {
-                       if (WARN_ON(!mlxsw_sp_vport))
-                               return -EINVAL;
-                       err = mlxsw_sp_vport_bridge_join(mlxsw_sp_vport,
-                                                        upper_dev);
+               if (netif_is_bridge_master(upper_dev)) {
+                       if (info->linking)
+                               err = mlxsw_sp_vport_bridge_join(mlxsw_sp_vport,
+                                                                upper_dev);
+                       else
+                               mlxsw_sp_vport_bridge_leave(mlxsw_sp_vport);
+               } else if (netif_is_l3_master(upper_dev)) {
+                       if (info->linking)
+                               err = mlxsw_sp_vport_vrf_join(mlxsw_sp_vport);
+                       else
+                               mlxsw_sp_vport_vrf_leave(mlxsw_sp_vport);
                } else {
-                       if (!mlxsw_sp_vport)
-                               return 0;
-                       mlxsw_sp_vport_bridge_leave(mlxsw_sp_vport);
+                       err = -EINVAL;
+                       WARN_ON(1);
                }
+               break;
        }
 
        return err;
@@ -4862,6 +4482,47 @@ static int mlxsw_sp_netdevice_lag_vport_event(struct net_device *lag_dev,
        return 0;
 }
 
+static int mlxsw_sp_netdevice_bridge_vlan_event(struct net_device *vlan_dev,
+                                               unsigned long event, void *ptr)
+{
+       struct netdev_notifier_changeupper_info *info;
+       struct mlxsw_sp *mlxsw_sp;
+       int err = 0;
+
+       mlxsw_sp = mlxsw_sp_lower_get(vlan_dev);
+       if (!mlxsw_sp)
+               return 0;
+
+       info = ptr;
+
+       switch (event) {
+       case NETDEV_PRECHANGEUPPER:
+               /* VLAN devices are only allowed on top of the
+                * VLAN-aware bridge.
+                */
+               if (WARN_ON(vlan_dev_real_dev(vlan_dev) !=
+                           mlxsw_sp->master_bridge.dev))
+                       return -EINVAL;
+               if (!netif_is_l3_master(info->upper_dev))
+                       return -EINVAL;
+               break;
+       case NETDEV_CHANGEUPPER:
+               if (netif_is_l3_master(info->upper_dev)) {
+                       if (info->linking)
+                               err = mlxsw_sp_bridge_vrf_join(mlxsw_sp,
+                                                              vlan_dev);
+                       else
+                               mlxsw_sp_bridge_vrf_leave(mlxsw_sp, vlan_dev);
+               } else {
+                       err = -EINVAL;
+                       WARN_ON(1);
+               }
+               break;
+       }
+
+       return err;
+}
+
 static int mlxsw_sp_netdevice_vlan_event(struct net_device *vlan_dev,
                                         unsigned long event, void *ptr)
 {
@@ -4874,6 +4535,9 @@ static int mlxsw_sp_netdevice_vlan_event(struct net_device *vlan_dev,
        else if (netif_is_lag_master(real_dev))
                return mlxsw_sp_netdevice_lag_vport_event(real_dev, event, ptr,
                                                          vid);
+       else if (netif_is_bridge_master(real_dev))
+               return mlxsw_sp_netdevice_bridge_vlan_event(vlan_dev, event,
+                                                           ptr);
 
        return 0;
 }
index 13ec85e7c392f8941ecf6441333d416ba4609f3a..c245e4c3d9adc3a36a1a8451f3348eeaa68af5ce 100644 (file)
 #define MLXSW_SP_VFID_MAX 1024 /* Bridged VLAN interfaces */
 
 #define MLXSW_SP_RFID_BASE 15360
-#define MLXSW_SP_INVALID_RIF 0xffff
 
 #define MLXSW_SP_MID_MAX 7000
 
 #define MLXSW_SP_PORTS_PER_CLUSTER_MAX 4
 
-#define MLXSW_SP_LPM_TREE_MIN 2 /* trees 0 and 1 are reserved */
-#define MLXSW_SP_LPM_TREE_MAX 22
-#define MLXSW_SP_LPM_TREE_COUNT (MLXSW_SP_LPM_TREE_MAX - MLXSW_SP_LPM_TREE_MIN)
-
 #define MLXSW_SP_PORT_BASE_SPEED 25000 /* Mb/s */
 
-#define MLXSW_SP_BYTES_PER_CELL 96
-
-#define MLXSW_SP_BYTES_TO_CELLS(b) DIV_ROUND_UP(b, MLXSW_SP_BYTES_PER_CELL)
-#define MLXSW_SP_CELLS_TO_BYTES(c) (c * MLXSW_SP_BYTES_PER_CELL)
-
 #define MLXSW_SP_KVD_LINEAR_SIZE 65536 /* entries */
 #define MLXSW_SP_KVD_GRANULARITY 128
 
-/* Maximum delay buffer needed in case of PAUSE frames, in cells.
- * Assumes 100m cable and maximum MTU.
- */
-#define MLXSW_SP_PAUSE_DELAY 612
-
-#define MLXSW_SP_CELL_FACTOR 2 /* 2 * cell_size / (IPG + cell_size + 1) */
-
-static inline u16 mlxsw_sp_pfc_delay_get(int mtu, u16 delay)
-{
-       delay = MLXSW_SP_BYTES_TO_CELLS(DIV_ROUND_UP(delay, BITS_PER_BYTE));
-       return MLXSW_SP_CELL_FACTOR * delay + MLXSW_SP_BYTES_TO_CELLS(mtu);
-}
-
 struct mlxsw_sp_port;
+struct mlxsw_sp_rif;
 
 struct mlxsw_sp_upper {
        struct net_device *dev;
@@ -103,21 +81,10 @@ struct mlxsw_sp_fid {
        struct list_head list;
        unsigned int ref_count;
        struct net_device *dev;
-       struct mlxsw_sp_rif *r;
+       struct mlxsw_sp_rif *rif;
        u16 fid;
 };
 
-struct mlxsw_sp_rif {
-       struct list_head nexthop_list;
-       struct list_head neigh_list;
-       struct net_device *dev;
-       unsigned int ref_count;
-       struct mlxsw_sp_fid *f;
-       unsigned char addr[ETH_ALEN];
-       int mtu;
-       u16 rif;
-};
-
 struct mlxsw_sp_mid {
        struct list_head list;
        unsigned char addr[ETH_ALEN];
@@ -141,16 +108,6 @@ static inline bool mlxsw_sp_fid_is_vfid(u16 fid)
        return fid >= MLXSW_SP_VFID_BASE && fid < MLXSW_SP_RFID_BASE;
 }
 
-static inline bool mlxsw_sp_fid_is_rfid(u16 fid)
-{
-       return fid >= MLXSW_SP_RFID_BASE;
-}
-
-static inline u16 mlxsw_sp_rif_sp_to_fid(u16 rif)
-{
-       return MLXSW_SP_RFID_BASE + rif;
-}
-
 struct mlxsw_sp_sb_pr {
        enum mlxsw_reg_sbpr_mode mode;
        u32 size;
@@ -177,12 +134,15 @@ struct mlxsw_sp_sb_pm {
 #define MLXSW_SP_SB_POOL_COUNT 4
 #define MLXSW_SP_SB_TC_COUNT   8
 
+struct mlxsw_sp_sb_port {
+       struct mlxsw_sp_sb_cm cms[2][MLXSW_SP_SB_TC_COUNT];
+       struct mlxsw_sp_sb_pm pms[2][MLXSW_SP_SB_POOL_COUNT];
+};
+
 struct mlxsw_sp_sb {
        struct mlxsw_sp_sb_pr prs[2][MLXSW_SP_SB_POOL_COUNT];
-       struct {
-               struct mlxsw_sp_sb_cm cms[2][MLXSW_SP_SB_TC_COUNT];
-               struct mlxsw_sp_sb_pm pms[2][MLXSW_SP_SB_POOL_COUNT];
-       } ports[MLXSW_PORT_MAX_PORTS];
+       struct mlxsw_sp_sb_port *ports;
+       u32 cell_size;
 };
 
 #define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE)
@@ -207,11 +167,9 @@ struct mlxsw_sp_fib;
 
 struct mlxsw_sp_vr {
        u16 id; /* virtual router ID */
-       bool used;
-       enum mlxsw_sp_l3proto proto;
        u32 tb_id; /* kernel fib table id */
-       struct mlxsw_sp_lpm_tree *lpm_tree;
-       struct mlxsw_sp_fib *fib;
+       unsigned int rif_count;
+       struct mlxsw_sp_fib *fib4;
 };
 
 enum mlxsw_sp_span_type {
@@ -253,11 +211,14 @@ struct mlxsw_sp_port_mall_tc_entry {
 };
 
 struct mlxsw_sp_router {
-       struct mlxsw_sp_lpm_tree lpm_trees[MLXSW_SP_LPM_TREE_COUNT];
        struct mlxsw_sp_vr *vrs;
        struct rhashtable neigh_ht;
        struct rhashtable nexthop_group_ht;
        struct rhashtable nexthop_ht;
+       struct {
+               struct mlxsw_sp_lpm_tree *trees;
+               unsigned int tree_count;
+       } lpm;
        struct {
                struct delayed_work dw;
                unsigned long interval; /* ms */
@@ -269,6 +230,7 @@ struct mlxsw_sp_router {
 };
 
 struct mlxsw_sp_acl;
+struct mlxsw_sp_counter_pool;
 
 struct mlxsw_sp {
        struct {
@@ -296,7 +258,7 @@ struct mlxsw_sp {
        u32 ageing_time;
        struct mlxsw_sp_upper master_bridge;
        struct mlxsw_sp_upper *lags;
-       u8 port_to_module[MLXSW_PORT_MAX_PORTS];
+       u8 *port_to_module;
        struct mlxsw_sp_sb sb;
        struct mlxsw_sp_router router;
        struct mlxsw_sp_acl *acl;
@@ -304,6 +266,7 @@ struct mlxsw_sp {
                DECLARE_BITMAP(usage, MLXSW_SP_KVD_LINEAR_SIZE);
        } kvdl;
 
+       struct mlxsw_sp_counter_pool *counter_pool;
        struct {
                struct mlxsw_sp_span_entry *entries;
                int entries_count;
@@ -317,6 +280,18 @@ mlxsw_sp_lag_get(struct mlxsw_sp *mlxsw_sp, u16 lag_id)
        return &mlxsw_sp->lags[lag_id];
 }
 
+static inline u32 mlxsw_sp_cells_bytes(const struct mlxsw_sp *mlxsw_sp,
+                                      u32 cells)
+{
+       return mlxsw_sp->sb.cell_size * cells;
+}
+
+static inline u32 mlxsw_sp_bytes_cells(const struct mlxsw_sp *mlxsw_sp,
+                                      u32 bytes)
+{
+       return DIV_ROUND_UP(bytes, mlxsw_sp->sb.cell_size);
+}
+
 struct mlxsw_sp_port_pcpu_stats {
        u64                     rx_packets;
        u64                     rx_bytes;
@@ -386,6 +361,7 @@ struct mlxsw_sp_port {
 };
 
 bool mlxsw_sp_port_dev_check(const struct net_device *dev);
+struct mlxsw_sp *mlxsw_sp_lower_get(struct net_device *dev);
 struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev);
 void mlxsw_sp_port_dev_put(struct mlxsw_sp_port *mlxsw_sp_port);
 
@@ -497,19 +473,6 @@ mlxsw_sp_vfid_find(const struct mlxsw_sp *mlxsw_sp,
        return NULL;
 }
 
-static inline struct mlxsw_sp_rif *
-mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
-                        const struct net_device *dev)
-{
-       int i;
-
-       for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
-               if (mlxsw_sp->rifs[i] && mlxsw_sp->rifs[i]->dev == dev)
-                       return mlxsw_sp->rifs[i];
-
-       return NULL;
-}
-
 enum mlxsw_sp_flood_table {
        MLXSW_SP_FLOOD_TABLE_UC,
        MLXSW_SP_FLOOD_TABLE_BC,
@@ -570,8 +533,6 @@ int mlxsw_sp_rif_fdb_op(struct mlxsw_sp *mlxsw_sp, const char *mac, u16 fid,
                        bool adding);
 struct mlxsw_sp_fid *mlxsw_sp_fid_create(struct mlxsw_sp *mlxsw_sp, u16 fid);
 void mlxsw_sp_fid_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *f);
-void mlxsw_sp_rif_bridge_destroy(struct mlxsw_sp *mlxsw_sp,
-                                struct mlxsw_sp_rif *r);
 int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port,
                          enum mlxsw_reg_qeec_hr hr, u8 index, u8 next_index,
                          bool dwrr, u8 dwrr_weight);
@@ -608,10 +569,22 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp);
 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp);
 int mlxsw_sp_router_netevent_event(struct notifier_block *unused,
                                   unsigned long event, void *ptr);
-void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
-                                  struct mlxsw_sp_rif *r);
-
-int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count);
+int mlxsw_sp_netdevice_router_port_event(struct net_device *dev);
+int mlxsw_sp_inetaddr_event(struct notifier_block *unused,
+                           unsigned long event, void *ptr);
+void mlxsw_sp_rif_bridge_destroy(struct mlxsw_sp *mlxsw_sp,
+                                struct mlxsw_sp_rif *rif);
+int mlxsw_sp_vport_vrf_join(struct mlxsw_sp_port *mlxsw_sp_vport);
+void mlxsw_sp_vport_vrf_leave(struct mlxsw_sp_port *mlxsw_sp_vport);
+int mlxsw_sp_port_vrf_join(struct mlxsw_sp_port *mlxsw_sp_port);
+void mlxsw_sp_port_vrf_leave(struct mlxsw_sp_port *mlxsw_sp_port);
+int mlxsw_sp_bridge_vrf_join(struct mlxsw_sp *mlxsw_sp,
+                            struct net_device *l3_dev);
+void mlxsw_sp_bridge_vrf_leave(struct mlxsw_sp *mlxsw_sp,
+                              struct net_device *l3_dev);
+
+int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count,
+                       u32 *p_entry_index);
 void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, int entry_index);
 
 struct mlxsw_afk *mlxsw_sp_acl_afk(struct mlxsw_sp_acl *acl);
@@ -620,6 +593,8 @@ struct mlxsw_sp_acl_rule_info {
        unsigned int priority;
        struct mlxsw_afk_element_values values;
        struct mlxsw_afa_block *act_block;
+       unsigned int counter_index;
+       bool counter_valid;
 };
 
 enum mlxsw_sp_acl_profile {
@@ -639,6 +614,8 @@ struct mlxsw_sp_acl_profile_ops {
                        void *ruleset_priv, void *rule_priv,
                        struct mlxsw_sp_acl_rule_info *rulei);
        void (*rule_del)(struct mlxsw_sp *mlxsw_sp, void *rule_priv);
+       int (*rule_activity_get)(struct mlxsw_sp *mlxsw_sp, void *rule_priv,
+                                bool *activity);
 };
 
 struct mlxsw_sp_acl_ops {
@@ -679,6 +656,11 @@ int mlxsw_sp_acl_rulei_act_drop(struct mlxsw_sp_acl_rule_info *rulei);
 int mlxsw_sp_acl_rulei_act_fwd(struct mlxsw_sp *mlxsw_sp,
                               struct mlxsw_sp_acl_rule_info *rulei,
                               struct net_device *out_dev);
+int mlxsw_sp_acl_rulei_act_vlan(struct mlxsw_sp *mlxsw_sp,
+                               struct mlxsw_sp_acl_rule_info *rulei,
+                               u32 action, u16 vid, u16 proto, u8 prio);
+int mlxsw_sp_acl_rulei_act_count(struct mlxsw_sp *mlxsw_sp,
+                                struct mlxsw_sp_acl_rule_info *rulei);
 
 struct mlxsw_sp_acl_rule;
 
@@ -698,6 +680,9 @@ mlxsw_sp_acl_rule_lookup(struct mlxsw_sp *mlxsw_sp,
                         unsigned long cookie);
 struct mlxsw_sp_acl_rule_info *
 mlxsw_sp_acl_rule_rulei(struct mlxsw_sp_acl_rule *rule);
+int mlxsw_sp_acl_rule_get_stats(struct mlxsw_sp *mlxsw_sp,
+                               struct mlxsw_sp_acl_rule *rule,
+                               u64 *packets, u64 *bytes, u64 *last_use);
 
 int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp);
 void mlxsw_sp_acl_fini(struct mlxsw_sp *mlxsw_sp);
@@ -708,5 +693,14 @@ int mlxsw_sp_flower_replace(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress,
                            __be16 protocol, struct tc_cls_flower_offload *f);
 void mlxsw_sp_flower_destroy(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress,
                             struct tc_cls_flower_offload *f);
+int mlxsw_sp_flower_stats(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress,
+                         struct tc_cls_flower_offload *f);
+int mlxsw_sp_flow_counter_get(struct mlxsw_sp *mlxsw_sp,
+                             unsigned int counter_index, u64 *packets,
+                             u64 *bytes);
+int mlxsw_sp_flow_counter_alloc(struct mlxsw_sp *mlxsw_sp,
+                               unsigned int *p_counter_index);
+void mlxsw_sp_flow_counter_free(struct mlxsw_sp *mlxsw_sp,
+                               unsigned int counter_index);
 
 #endif
index 8a18b3aa70dc20d7464a14805e60cc838a87ce17..d3b791f69f5bb8b0332a9321cc258a7d7573d3d3 100644 (file)
@@ -39,6 +39,7 @@
 #include <linux/string.h>
 #include <linux/rhashtable.h>
 #include <linux/netdevice.h>
+#include <net/tc_act/tc_vlan.h>
 
 #include "reg.h"
 #include "core.h"
 #include "spectrum_acl_flex_keys.h"
 
 struct mlxsw_sp_acl {
+       struct mlxsw_sp *mlxsw_sp;
        struct mlxsw_afk *afk;
        struct mlxsw_afa *afa;
        const struct mlxsw_sp_acl_ops *ops;
        struct rhashtable ruleset_ht;
+       struct list_head rules;
+       struct {
+               struct delayed_work dw;
+               unsigned long interval; /* ms */
+#define MLXSW_SP_ACL_RULE_ACTIVITY_UPDATE_PERIOD_MS 1000
+       } rule_activity_update;
        unsigned long priv[0];
        /* priv has to be always the last item */
 };
@@ -79,9 +87,13 @@ struct mlxsw_sp_acl_ruleset {
 
 struct mlxsw_sp_acl_rule {
        struct rhash_head ht_node; /* Member of rule HT */
+       struct list_head list;
        unsigned long cookie; /* HT key */
        struct mlxsw_sp_acl_ruleset *ruleset;
        struct mlxsw_sp_acl_rule_info *rulei;
+       u64 last_used;
+       u64 last_packets;
+       u64 last_bytes;
        unsigned long priv[0];
        /* priv has to be always the last item */
 };
@@ -237,6 +249,27 @@ void mlxsw_sp_acl_ruleset_put(struct mlxsw_sp *mlxsw_sp,
        mlxsw_sp_acl_ruleset_ref_dec(mlxsw_sp, ruleset);
 }
 
+static int
+mlxsw_sp_acl_rulei_counter_alloc(struct mlxsw_sp *mlxsw_sp,
+                                struct mlxsw_sp_acl_rule_info *rulei)
+{
+       int err;
+
+       err = mlxsw_sp_flow_counter_alloc(mlxsw_sp, &rulei->counter_index);
+       if (err)
+               return err;
+       rulei->counter_valid = true;
+       return 0;
+}
+
+static void
+mlxsw_sp_acl_rulei_counter_free(struct mlxsw_sp *mlxsw_sp,
+                               struct mlxsw_sp_acl_rule_info *rulei)
+{
+       rulei->counter_valid = false;
+       mlxsw_sp_flow_counter_free(mlxsw_sp, rulei->counter_index);
+}
+
 struct mlxsw_sp_acl_rule_info *
 mlxsw_sp_acl_rulei_create(struct mlxsw_sp_acl *acl)
 {
@@ -335,6 +368,41 @@ int mlxsw_sp_acl_rulei_act_fwd(struct mlxsw_sp *mlxsw_sp,
                                          local_port, in_port);
 }
 
+int mlxsw_sp_acl_rulei_act_vlan(struct mlxsw_sp *mlxsw_sp,
+                               struct mlxsw_sp_acl_rule_info *rulei,
+                               u32 action, u16 vid, u16 proto, u8 prio)
+{
+       u8 ethertype;
+
+       if (action == TCA_VLAN_ACT_MODIFY) {
+               switch (proto) {
+               case ETH_P_8021Q:
+                       ethertype = 0;
+                       break;
+               case ETH_P_8021AD:
+                       ethertype = 1;
+                       break;
+               default:
+                       dev_err(mlxsw_sp->bus_info->dev, "Unsupported VLAN protocol %#04x\n",
+                               proto);
+                       return -EINVAL;
+               }
+
+               return mlxsw_afa_block_append_vlan_modify(rulei->act_block,
+                                                         vid, prio, ethertype);
+       } else {
+               dev_err(mlxsw_sp->bus_info->dev, "Unsupported VLAN action\n");
+               return -EINVAL;
+       }
+}
+
+int mlxsw_sp_acl_rulei_act_count(struct mlxsw_sp *mlxsw_sp,
+                                struct mlxsw_sp_acl_rule_info *rulei)
+{
+       return mlxsw_afa_block_append_counter(rulei->act_block,
+                                             rulei->counter_index);
+}
+
 struct mlxsw_sp_acl_rule *
 mlxsw_sp_acl_rule_create(struct mlxsw_sp *mlxsw_sp,
                         struct mlxsw_sp_acl_ruleset *ruleset,
@@ -358,8 +426,14 @@ mlxsw_sp_acl_rule_create(struct mlxsw_sp *mlxsw_sp,
                err = PTR_ERR(rule->rulei);
                goto err_rulei_create;
        }
+
+       err = mlxsw_sp_acl_rulei_counter_alloc(mlxsw_sp, rule->rulei);
+       if (err)
+               goto err_counter_alloc;
        return rule;
 
+err_counter_alloc:
+       mlxsw_sp_acl_rulei_destroy(rule->rulei);
 err_rulei_create:
        kfree(rule);
 err_alloc:
@@ -372,6 +446,7 @@ void mlxsw_sp_acl_rule_destroy(struct mlxsw_sp *mlxsw_sp,
 {
        struct mlxsw_sp_acl_ruleset *ruleset = rule->ruleset;
 
+       mlxsw_sp_acl_rulei_counter_free(mlxsw_sp, rule->rulei);
        mlxsw_sp_acl_rulei_destroy(rule->rulei);
        kfree(rule);
        mlxsw_sp_acl_ruleset_ref_dec(mlxsw_sp, ruleset);
@@ -393,6 +468,7 @@ int mlxsw_sp_acl_rule_add(struct mlxsw_sp *mlxsw_sp,
        if (err)
                goto err_rhashtable_insert;
 
+       list_add_tail(&rule->list, &mlxsw_sp->acl->rules);
        return 0;
 
 err_rhashtable_insert:
@@ -406,6 +482,7 @@ void mlxsw_sp_acl_rule_del(struct mlxsw_sp *mlxsw_sp,
        struct mlxsw_sp_acl_ruleset *ruleset = rule->ruleset;
        const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops;
 
+       list_del(&rule->list);
        rhashtable_remove_fast(&ruleset->rule_ht, &rule->ht_node,
                               mlxsw_sp_acl_rule_ht_params);
        ops->rule_del(mlxsw_sp, rule->priv);
@@ -426,6 +503,90 @@ mlxsw_sp_acl_rule_rulei(struct mlxsw_sp_acl_rule *rule)
        return rule->rulei;
 }
 
+static int mlxsw_sp_acl_rule_activity_update(struct mlxsw_sp *mlxsw_sp,
+                                            struct mlxsw_sp_acl_rule *rule)
+{
+       struct mlxsw_sp_acl_ruleset *ruleset = rule->ruleset;
+       const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops;
+       bool active;
+       int err;
+
+       err = ops->rule_activity_get(mlxsw_sp, rule->priv, &active);
+       if (err)
+               return err;
+       if (active)
+               rule->last_used = jiffies;
+       return 0;
+}
+
+static int mlxsw_sp_acl_rules_activity_update(struct mlxsw_sp_acl *acl)
+{
+       struct mlxsw_sp_acl_rule *rule;
+       int err;
+
+       /* Protect internal structures from changes */
+       rtnl_lock();
+       list_for_each_entry(rule, &acl->rules, list) {
+               err = mlxsw_sp_acl_rule_activity_update(acl->mlxsw_sp,
+                                                       rule);
+               if (err)
+                       goto err_rule_update;
+       }
+       rtnl_unlock();
+       return 0;
+
+err_rule_update:
+       rtnl_unlock();
+       return err;
+}
+
+static void mlxsw_sp_acl_rule_activity_work_schedule(struct mlxsw_sp_acl *acl)
+{
+       unsigned long interval = acl->rule_activity_update.interval;
+
+       mlxsw_core_schedule_dw(&acl->rule_activity_update.dw,
+                              msecs_to_jiffies(interval));
+}
+
+static void mlxsw_sp_acl_rul_activity_update_work(struct work_struct *work)
+{
+       struct mlxsw_sp_acl *acl = container_of(work, struct mlxsw_sp_acl,
+                                               rule_activity_update.dw.work);
+       int err;
+
+       err = mlxsw_sp_acl_rules_activity_update(acl);
+       if (err)
+               dev_err(acl->mlxsw_sp->bus_info->dev, "Could not update acl activity");
+
+       mlxsw_sp_acl_rule_activity_work_schedule(acl);
+}
+
+int mlxsw_sp_acl_rule_get_stats(struct mlxsw_sp *mlxsw_sp,
+                               struct mlxsw_sp_acl_rule *rule,
+                               u64 *packets, u64 *bytes, u64 *last_use)
+
+{
+       struct mlxsw_sp_acl_rule_info *rulei;
+       u64 current_packets;
+       u64 current_bytes;
+       int err;
+
+       rulei = mlxsw_sp_acl_rule_rulei(rule);
+       err = mlxsw_sp_flow_counter_get(mlxsw_sp, rulei->counter_index,
+                                       &current_packets, &current_bytes);
+       if (err)
+               return err;
+
+       *packets = current_packets - rule->last_packets;
+       *bytes = current_bytes - rule->last_bytes;
+       *last_use = rule->last_used;
+
+       rule->last_bytes = current_bytes;
+       rule->last_packets = current_packets;
+
+       return 0;
+}
+
 #define MLXSW_SP_KDVL_ACT_EXT_SIZE 1
 
 static int mlxsw_sp_act_kvdl_set_add(void *priv, u32 *p_kvdl_index,
@@ -434,7 +595,6 @@ static int mlxsw_sp_act_kvdl_set_add(void *priv, u32 *p_kvdl_index,
        struct mlxsw_sp *mlxsw_sp = priv;
        char pefa_pl[MLXSW_REG_PEFA_LEN];
        u32 kvdl_index;
-       int ret;
        int err;
 
        /* The first action set of a TCAM entry is stored directly in TCAM,
@@ -443,10 +603,10 @@ static int mlxsw_sp_act_kvdl_set_add(void *priv, u32 *p_kvdl_index,
        if (is_first)
                return 0;
 
-       ret = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KDVL_ACT_EXT_SIZE);
-       if (ret < 0)
-               return ret;
-       kvdl_index = ret;
+       err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KDVL_ACT_EXT_SIZE,
+                                 &kvdl_index);
+       if (err)
+               return err;
        mlxsw_reg_pefa_pack(pefa_pl, kvdl_index, enc_actions);
        err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pefa), pefa_pl);
        if (err)
@@ -475,13 +635,11 @@ static int mlxsw_sp_act_kvdl_fwd_entry_add(void *priv, u32 *p_kvdl_index,
        struct mlxsw_sp *mlxsw_sp = priv;
        char ppbs_pl[MLXSW_REG_PPBS_LEN];
        u32 kvdl_index;
-       int ret;
        int err;
 
-       ret = mlxsw_sp_kvdl_alloc(mlxsw_sp, 1);
-       if (ret < 0)
-               return ret;
-       kvdl_index = ret;
+       err = mlxsw_sp_kvdl_alloc(mlxsw_sp, 1, &kvdl_index);
+       if (err)
+               return err;
        mlxsw_reg_ppbs_pack(ppbs_pl, kvdl_index, local_port);
        err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ppbs), ppbs_pl);
        if (err)
@@ -518,7 +676,7 @@ int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp)
        if (!acl)
                return -ENOMEM;
        mlxsw_sp->acl = acl;
-
+       acl->mlxsw_sp = mlxsw_sp;
        acl->afk = mlxsw_afk_create(MLXSW_CORE_RES_GET(mlxsw_sp->core,
                                                       ACL_FLEX_KEYS),
                                    mlxsw_sp_afk_blocks,
@@ -541,11 +699,18 @@ int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp)
        if (err)
                goto err_rhashtable_init;
 
+       INIT_LIST_HEAD(&acl->rules);
        err = acl_ops->init(mlxsw_sp, acl->priv);
        if (err)
                goto err_acl_ops_init;
 
        acl->ops = acl_ops;
+
+       /* Create the delayed work for the rule activity_update */
+       INIT_DELAYED_WORK(&acl->rule_activity_update.dw,
+                         mlxsw_sp_acl_rul_activity_update_work);
+       acl->rule_activity_update.interval = MLXSW_SP_ACL_RULE_ACTIVITY_UPDATE_PERIOD_MS;
+       mlxsw_core_schedule_dw(&acl->rule_activity_update.dw, 0);
        return 0;
 
 err_acl_ops_init:
@@ -564,7 +729,9 @@ void mlxsw_sp_acl_fini(struct mlxsw_sp *mlxsw_sp)
        struct mlxsw_sp_acl *acl = mlxsw_sp->acl;
        const struct mlxsw_sp_acl_ops *acl_ops = acl->ops;
 
+       cancel_delayed_work_sync(&mlxsw_sp->acl->rule_activity_update.dw);
        acl_ops->fini(mlxsw_sp, acl->priv);
+       WARN_ON(!list_empty(&acl->rules));
        rhashtable_destroy(&acl->ruleset_ht);
        mlxsw_afa_destroy(acl->afa);
        mlxsw_afk_destroy(acl->afk);
index 82b81cf7f4a7de875191c1b7dcf69b0fd27854d1..af7b7bad48df7746946c9d0dab44cb8bdf13f6b6 100644 (file)
 
 static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_dmac[] = {
        MLXSW_AFK_ELEMENT_INST_BUF(DMAC, 0x00, 6),
+       MLXSW_AFK_ELEMENT_INST_U32(PCP, 0x08, 13, 3),
+       MLXSW_AFK_ELEMENT_INST_U32(VID, 0x08, 0, 12),
        MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 16),
 };
 
 static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_smac[] = {
        MLXSW_AFK_ELEMENT_INST_BUF(SMAC, 0x00, 6),
+       MLXSW_AFK_ELEMENT_INST_U32(PCP, 0x08, 13, 3),
+       MLXSW_AFK_ELEMENT_INST_U32(VID, 0x08, 0, 12),
        MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 16),
 };
 
@@ -65,6 +69,8 @@ static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_dip[] = {
 };
 
 static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_ex[] = {
+       MLXSW_AFK_ELEMENT_INST_U32(VID, 0x00, 0, 12),
+       MLXSW_AFK_ELEMENT_INST_U32(PCP, 0x08, 29, 3),
        MLXSW_AFK_ELEMENT_INST_U32(SRC_L4_PORT, 0x08, 0, 16),
        MLXSW_AFK_ELEMENT_INST_U32(DST_L4_PORT, 0x0C, 0, 16),
 };
index 7382832215faa0d2211625a53ee6d7f328686ba2..3a24289979d9a0bf41ec87d008e0e55c404bfba6 100644 (file)
@@ -561,6 +561,24 @@ mlxsw_sp_acl_tcam_region_entry_remove(struct mlxsw_sp *mlxsw_sp,
        mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptce2), ptce2_pl);
 }
 
+static int
+mlxsw_sp_acl_tcam_region_entry_activity_get(struct mlxsw_sp *mlxsw_sp,
+                                           struct mlxsw_sp_acl_tcam_region *region,
+                                           unsigned int offset,
+                                           bool *activity)
+{
+       char ptce2_pl[MLXSW_REG_PTCE2_LEN];
+       int err;
+
+       mlxsw_reg_ptce2_pack(ptce2_pl, true, MLXSW_REG_PTCE2_OP_QUERY_CLEAR_ON_READ,
+                            region->tcam_region_info, offset);
+       err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptce2), ptce2_pl);
+       if (err)
+               return err;
+       *activity = mlxsw_reg_ptce2_a_get(ptce2_pl);
+       return 0;
+}
+
 #define MLXSW_SP_ACL_TCAM_CATCHALL_PRIO (~0U)
 
 static int
@@ -940,6 +958,19 @@ static void mlxsw_sp_acl_tcam_entry_del(struct mlxsw_sp *mlxsw_sp,
        mlxsw_sp_acl_tcam_chunk_put(mlxsw_sp, chunk);
 }
 
+static int
+mlxsw_sp_acl_tcam_entry_activity_get(struct mlxsw_sp *mlxsw_sp,
+                                    struct mlxsw_sp_acl_tcam_entry *entry,
+                                    bool *activity)
+{
+       struct mlxsw_sp_acl_tcam_chunk *chunk = entry->chunk;
+       struct mlxsw_sp_acl_tcam_region *region = chunk->region;
+
+       return mlxsw_sp_acl_tcam_region_entry_activity_get(mlxsw_sp, region,
+                                                          entry->parman_item.index,
+                                                          activity);
+}
+
 static const enum mlxsw_afk_element mlxsw_sp_acl_tcam_pattern_ipv4[] = {
        MLXSW_AFK_ELEMENT_SRC_SYS_PORT,
        MLXSW_AFK_ELEMENT_DMAC,
@@ -950,6 +981,8 @@ static const enum mlxsw_afk_element mlxsw_sp_acl_tcam_pattern_ipv4[] = {
        MLXSW_AFK_ELEMENT_DST_IP4,
        MLXSW_AFK_ELEMENT_DST_L4_PORT,
        MLXSW_AFK_ELEMENT_SRC_L4_PORT,
+       MLXSW_AFK_ELEMENT_VID,
+       MLXSW_AFK_ELEMENT_PCP,
 };
 
 static const enum mlxsw_afk_element mlxsw_sp_acl_tcam_pattern_ipv6[] = {
@@ -1046,6 +1079,16 @@ mlxsw_sp_acl_tcam_flower_rule_del(struct mlxsw_sp *mlxsw_sp, void *rule_priv)
        mlxsw_sp_acl_tcam_entry_del(mlxsw_sp, &rule->entry);
 }
 
+static int
+mlxsw_sp_acl_tcam_flower_rule_activity_get(struct mlxsw_sp *mlxsw_sp,
+                                          void *rule_priv, bool *activity)
+{
+       struct mlxsw_sp_acl_tcam_flower_rule *rule = rule_priv;
+
+       return mlxsw_sp_acl_tcam_entry_activity_get(mlxsw_sp, &rule->entry,
+                                                   activity);
+}
+
 static const struct mlxsw_sp_acl_profile_ops mlxsw_sp_acl_tcam_flower_ops = {
        .ruleset_priv_size      = sizeof(struct mlxsw_sp_acl_tcam_flower_ruleset),
        .ruleset_add            = mlxsw_sp_acl_tcam_flower_ruleset_add,
@@ -1055,6 +1098,7 @@ static const struct mlxsw_sp_acl_profile_ops mlxsw_sp_acl_tcam_flower_ops = {
        .rule_priv_size         = sizeof(struct mlxsw_sp_acl_tcam_flower_rule),
        .rule_add               = mlxsw_sp_acl_tcam_flower_rule_add,
        .rule_del               = mlxsw_sp_acl_tcam_flower_rule_del,
+       .rule_activity_get      = mlxsw_sp_acl_tcam_flower_rule_activity_get,
 };
 
 static const struct mlxsw_sp_acl_profile_ops *
index a7468262f118979c8914e8acea042361b838f69e..997189cfe7fd58c7e93419d69cf609420eb1633f 100644 (file)
@@ -162,8 +162,8 @@ static int mlxsw_sp_sb_pm_occ_query(struct mlxsw_sp *mlxsw_sp, u8 local_port,
 }
 
 static const u16 mlxsw_sp_pbs[] = {
-       [0] = 2 * MLXSW_SP_BYTES_TO_CELLS(ETH_FRAME_LEN),
-       [9] = 2 * MLXSW_SP_BYTES_TO_CELLS(MLXSW_PORT_MAX_MTU),
+       [0] = 2 * ETH_FRAME_LEN,
+       [9] = 2 * MLXSW_PORT_MAX_MTU,
 };
 
 #define MLXSW_SP_PBS_LEN ARRAY_SIZE(mlxsw_sp_pbs)
@@ -171,20 +171,22 @@ static const u16 mlxsw_sp_pbs[] = {
 
 static int mlxsw_sp_port_pb_init(struct mlxsw_sp_port *mlxsw_sp_port)
 {
+       struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
        char pbmc_pl[MLXSW_REG_PBMC_LEN];
        int i;
 
        mlxsw_reg_pbmc_pack(pbmc_pl, mlxsw_sp_port->local_port,
                            0xffff, 0xffff / 2);
        for (i = 0; i < MLXSW_SP_PBS_LEN; i++) {
+               u16 size = mlxsw_sp_bytes_cells(mlxsw_sp, mlxsw_sp_pbs[i]);
+
                if (i == MLXSW_SP_PB_UNUSED)
                        continue;
-               mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, i, mlxsw_sp_pbs[i]);
+               mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, i, size);
        }
        mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl,
                                         MLXSW_REG_PBMC_PORT_SHARED_BUF_IDX, 0);
-       return mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core,
-                              MLXSW_REG(pbmc), pbmc_pl);
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pbmc), pbmc_pl);
 }
 
 static int mlxsw_sp_port_pb_prio_init(struct mlxsw_sp_port *mlxsw_sp_port)
@@ -209,11 +211,25 @@ static int mlxsw_sp_port_headroom_init(struct mlxsw_sp_port *mlxsw_sp_port)
        return mlxsw_sp_port_pb_prio_init(mlxsw_sp_port);
 }
 
-#define MLXSW_SP_SB_PR_INGRESS_SIZE                            \
-       (15000000 - (2 * 20000 * MLXSW_PORT_MAX_PORTS))
+static int mlxsw_sp_sb_ports_init(struct mlxsw_sp *mlxsw_sp)
+{
+       unsigned int max_ports = mlxsw_core_max_ports(mlxsw_sp->core);
+
+       mlxsw_sp->sb.ports = kcalloc(max_ports, sizeof(struct mlxsw_sp_sb_port),
+                                    GFP_KERNEL);
+       if (!mlxsw_sp->sb.ports)
+               return -ENOMEM;
+       return 0;
+}
+
+static void mlxsw_sp_sb_ports_fini(struct mlxsw_sp *mlxsw_sp)
+{
+       kfree(mlxsw_sp->sb.ports);
+}
+
+#define MLXSW_SP_SB_PR_INGRESS_SIZE    12440000
 #define MLXSW_SP_SB_PR_INGRESS_MNG_SIZE (200 * 1000)
-#define MLXSW_SP_SB_PR_EGRESS_SIZE                             \
-       (14000000 - (8 * 1500 * MLXSW_PORT_MAX_PORTS))
+#define MLXSW_SP_SB_PR_EGRESS_SIZE     13232000
 
 #define MLXSW_SP_SB_PR(_mode, _size)   \
        {                               \
@@ -223,18 +239,17 @@ static int mlxsw_sp_port_headroom_init(struct mlxsw_sp_port *mlxsw_sp_port)
 
 static const struct mlxsw_sp_sb_pr mlxsw_sp_sb_prs_ingress[] = {
        MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC,
-                      MLXSW_SP_BYTES_TO_CELLS(MLXSW_SP_SB_PR_INGRESS_SIZE)),
+                      MLXSW_SP_SB_PR_INGRESS_SIZE),
        MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0),
        MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0),
        MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC,
-                      MLXSW_SP_BYTES_TO_CELLS(MLXSW_SP_SB_PR_INGRESS_MNG_SIZE)),
+                      MLXSW_SP_SB_PR_INGRESS_MNG_SIZE),
 };
 
 #define MLXSW_SP_SB_PRS_INGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_prs_ingress)
 
 static const struct mlxsw_sp_sb_pr mlxsw_sp_sb_prs_egress[] = {
-       MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC,
-                      MLXSW_SP_BYTES_TO_CELLS(MLXSW_SP_SB_PR_EGRESS_SIZE)),
+       MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, MLXSW_SP_SB_PR_EGRESS_SIZE),
        MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0),
        MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0),
        MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0),
@@ -251,11 +266,9 @@ static int __mlxsw_sp_sb_prs_init(struct mlxsw_sp *mlxsw_sp,
        int err;
 
        for (i = 0; i < prs_len; i++) {
-               const struct mlxsw_sp_sb_pr *pr;
+               u32 size = mlxsw_sp_bytes_cells(mlxsw_sp, prs[i].size);
 
-               pr = &prs[i];
-               err = mlxsw_sp_sb_pr_write(mlxsw_sp, i, dir,
-                                          pr->mode, pr->size);
+               err = mlxsw_sp_sb_pr_write(mlxsw_sp, i, dir, prs[i].mode, size);
                if (err)
                        return err;
        }
@@ -284,7 +297,7 @@ static int mlxsw_sp_sb_prs_init(struct mlxsw_sp *mlxsw_sp)
        }
 
 static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms_ingress[] = {
-       MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(10000), 8, 0),
+       MLXSW_SP_SB_CM(10000, 8, 0),
        MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0),
        MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0),
        MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0),
@@ -293,20 +306,20 @@ static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms_ingress[] = {
        MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0),
        MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0),
        MLXSW_SP_SB_CM(0, 0, 0), /* dummy, this PG does not exist */
-       MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(20000), 1, 3),
+       MLXSW_SP_SB_CM(20000, 1, 3),
 };
 
 #define MLXSW_SP_SB_CMS_INGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_cms_ingress)
 
 static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms_egress[] = {
-       MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(1500), 9, 0),
-       MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(1500), 9, 0),
-       MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(1500), 9, 0),
-       MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(1500), 9, 0),
-       MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(1500), 9, 0),
-       MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(1500), 9, 0),
-       MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(1500), 9, 0),
-       MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(1500), 9, 0),
+       MLXSW_SP_SB_CM(1500, 9, 0),
+       MLXSW_SP_SB_CM(1500, 9, 0),
+       MLXSW_SP_SB_CM(1500, 9, 0),
+       MLXSW_SP_SB_CM(1500, 9, 0),
+       MLXSW_SP_SB_CM(1500, 9, 0),
+       MLXSW_SP_SB_CM(1500, 9, 0),
+       MLXSW_SP_SB_CM(1500, 9, 0),
+       MLXSW_SP_SB_CM(1500, 9, 0),
        MLXSW_SP_SB_CM(0, 0, 0),
        MLXSW_SP_SB_CM(0, 0, 0),
        MLXSW_SP_SB_CM(0, 0, 0),
@@ -330,7 +343,7 @@ static const struct mlxsw_sp_sb_cm mlxsw_sp_cpu_port_sb_cms[] = {
        MLXSW_SP_CPU_PORT_SB_CM,
        MLXSW_SP_CPU_PORT_SB_CM,
        MLXSW_SP_CPU_PORT_SB_CM,
-       MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(10000), 0, 0),
+       MLXSW_SP_SB_CM(10000, 0, 0),
        MLXSW_SP_CPU_PORT_SB_CM,
        MLXSW_SP_CPU_PORT_SB_CM,
        MLXSW_SP_CPU_PORT_SB_CM,
@@ -370,13 +383,17 @@ static int __mlxsw_sp_sb_cms_init(struct mlxsw_sp *mlxsw_sp, u8 local_port,
 
        for (i = 0; i < cms_len; i++) {
                const struct mlxsw_sp_sb_cm *cm;
+               u32 min_buff;
 
                if (i == 8 && dir == MLXSW_REG_SBXX_DIR_INGRESS)
                        continue; /* PG number 8 does not exist, skip it */
                cm = &cms[i];
+               /* All pools are initialized using dynamic thresholds,
+                * therefore 'max_buff' isn't specified in cells.
+                */
+               min_buff = mlxsw_sp_bytes_cells(mlxsw_sp, cm->min_buff);
                err = mlxsw_sp_sb_cm_write(mlxsw_sp, local_port, i, dir,
-                                          cm->min_buff, cm->max_buff,
-                                          cm->pool);
+                                          min_buff, cm->max_buff, cm->pool);
                if (err)
                        return err;
        }
@@ -484,21 +501,21 @@ struct mlxsw_sp_sb_mm {
        }
 
 static const struct mlxsw_sp_sb_mm mlxsw_sp_sb_mms[] = {
-       MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0),
-       MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0),
-       MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0),
-       MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0),
-       MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0),
-       MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0),
-       MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0),
-       MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0),
-       MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0),
-       MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0),
-       MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0),
-       MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0),
-       MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0),
-       MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0),
-       MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0),
+       MLXSW_SP_SB_MM(20000, 0xff, 0),
+       MLXSW_SP_SB_MM(20000, 0xff, 0),
+       MLXSW_SP_SB_MM(20000, 0xff, 0),
+       MLXSW_SP_SB_MM(20000, 0xff, 0),
+       MLXSW_SP_SB_MM(20000, 0xff, 0),
+       MLXSW_SP_SB_MM(20000, 0xff, 0),
+       MLXSW_SP_SB_MM(20000, 0xff, 0),
+       MLXSW_SP_SB_MM(20000, 0xff, 0),
+       MLXSW_SP_SB_MM(20000, 0xff, 0),
+       MLXSW_SP_SB_MM(20000, 0xff, 0),
+       MLXSW_SP_SB_MM(20000, 0xff, 0),
+       MLXSW_SP_SB_MM(20000, 0xff, 0),
+       MLXSW_SP_SB_MM(20000, 0xff, 0),
+       MLXSW_SP_SB_MM(20000, 0xff, 0),
+       MLXSW_SP_SB_MM(20000, 0xff, 0),
 };
 
 #define MLXSW_SP_SB_MMS_LEN ARRAY_SIZE(mlxsw_sp_sb_mms)
@@ -511,10 +528,15 @@ static int mlxsw_sp_sb_mms_init(struct mlxsw_sp *mlxsw_sp)
 
        for (i = 0; i < MLXSW_SP_SB_MMS_LEN; i++) {
                const struct mlxsw_sp_sb_mm *mc;
+               u32 min_buff;
 
                mc = &mlxsw_sp_sb_mms[i];
-               mlxsw_reg_sbmm_pack(sbmm_pl, i, mc->min_buff,
-                                   mc->max_buff, mc->pool);
+               /* All pools are initialized using dynamic thresholds,
+                * therefore 'max_buff' isn't specified in cells.
+                */
+               min_buff = mlxsw_sp_bytes_cells(mlxsw_sp, mc->min_buff);
+               mlxsw_reg_sbmm_pack(sbmm_pl, i, min_buff, mc->max_buff,
+                                   mc->pool);
                err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbmm), sbmm_pl);
                if (err)
                        return err;
@@ -522,32 +544,53 @@ static int mlxsw_sp_sb_mms_init(struct mlxsw_sp *mlxsw_sp)
        return 0;
 }
 
-#define MLXSW_SP_SB_SIZE (16 * 1024 * 1024)
-
 int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp)
 {
+       u64 sb_size;
        int err;
 
-       err = mlxsw_sp_sb_prs_init(mlxsw_sp);
+       if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, CELL_SIZE))
+               return -EIO;
+       mlxsw_sp->sb.cell_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, CELL_SIZE);
+
+       if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_BUFFER_SIZE))
+               return -EIO;
+       sb_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_BUFFER_SIZE);
+
+       err = mlxsw_sp_sb_ports_init(mlxsw_sp);
        if (err)
                return err;
+       err = mlxsw_sp_sb_prs_init(mlxsw_sp);
+       if (err)
+               goto err_sb_prs_init;
        err = mlxsw_sp_cpu_port_sb_cms_init(mlxsw_sp);
        if (err)
-               return err;
+               goto err_sb_cpu_port_sb_cms_init;
        err = mlxsw_sp_sb_mms_init(mlxsw_sp);
        if (err)
-               return err;
-       return devlink_sb_register(priv_to_devlink(mlxsw_sp->core), 0,
-                                  MLXSW_SP_SB_SIZE,
-                                  MLXSW_SP_SB_POOL_COUNT,
-                                  MLXSW_SP_SB_POOL_COUNT,
-                                  MLXSW_SP_SB_TC_COUNT,
-                                  MLXSW_SP_SB_TC_COUNT);
+               goto err_sb_mms_init;
+       err = devlink_sb_register(priv_to_devlink(mlxsw_sp->core), 0, sb_size,
+                                 MLXSW_SP_SB_POOL_COUNT,
+                                 MLXSW_SP_SB_POOL_COUNT,
+                                 MLXSW_SP_SB_TC_COUNT,
+                                 MLXSW_SP_SB_TC_COUNT);
+       if (err)
+               goto err_devlink_sb_register;
+
+       return 0;
+
+err_devlink_sb_register:
+err_sb_mms_init:
+err_sb_cpu_port_sb_cms_init:
+err_sb_prs_init:
+       mlxsw_sp_sb_ports_fini(mlxsw_sp);
+       return err;
 }
 
 void mlxsw_sp_buffers_fini(struct mlxsw_sp *mlxsw_sp)
 {
        devlink_sb_unregister(priv_to_devlink(mlxsw_sp->core), 0);
+       mlxsw_sp_sb_ports_fini(mlxsw_sp);
 }
 
 int mlxsw_sp_port_buffers_init(struct mlxsw_sp_port *mlxsw_sp_port)
@@ -596,7 +639,7 @@ int mlxsw_sp_sb_pool_get(struct mlxsw_core *mlxsw_core,
        struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool, dir);
 
        pool_info->pool_type = (enum devlink_sb_pool_type) dir;
-       pool_info->size = MLXSW_SP_CELLS_TO_BYTES(pr->size);
+       pool_info->size = mlxsw_sp_cells_bytes(mlxsw_sp, pr->size);
        pool_info->threshold_type = (enum devlink_sb_threshold_type) pr->mode;
        return 0;
 }
@@ -606,9 +649,9 @@ int mlxsw_sp_sb_pool_set(struct mlxsw_core *mlxsw_core,
                         enum devlink_sb_threshold_type threshold_type)
 {
        struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+       u32 pool_size = mlxsw_sp_bytes_cells(mlxsw_sp, size);
        u8 pool = pool_get(pool_index);
        enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index);
-       u32 pool_size = MLXSW_SP_BYTES_TO_CELLS(size);
        enum mlxsw_reg_sbpr_mode mode;
 
        if (size > MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_BUFFER_SIZE))
@@ -627,7 +670,7 @@ static u32 mlxsw_sp_sb_threshold_out(struct mlxsw_sp *mlxsw_sp, u8 pool,
 
        if (pr->mode == MLXSW_REG_SBPR_MODE_DYNAMIC)
                return max_buff - MLXSW_SP_SB_THRESHOLD_TO_ALPHA_OFFSET;
-       return MLXSW_SP_CELLS_TO_BYTES(max_buff);
+       return mlxsw_sp_cells_bytes(mlxsw_sp, max_buff);
 }
 
 static int mlxsw_sp_sb_threshold_in(struct mlxsw_sp *mlxsw_sp, u8 pool,
@@ -645,7 +688,7 @@ static int mlxsw_sp_sb_threshold_in(struct mlxsw_sp *mlxsw_sp, u8 pool,
                        return -EINVAL;
                *p_max_buff = val;
        } else {
-               *p_max_buff = MLXSW_SP_BYTES_TO_CELLS(threshold);
+               *p_max_buff = mlxsw_sp_bytes_cells(mlxsw_sp, threshold);
        }
        return 0;
 }
@@ -761,7 +804,7 @@ static void mlxsw_sp_sb_sr_occ_query_cb(struct mlxsw_core *mlxsw_core,
 
        masked_count = 0;
        for (local_port = cb_ctx.local_port_1;
-            local_port < MLXSW_PORT_MAX_PORTS; local_port++) {
+            local_port < mlxsw_core_max_ports(mlxsw_core); local_port++) {
                if (!mlxsw_sp->ports[local_port])
                        continue;
                for (i = 0; i < MLXSW_SP_SB_TC_COUNT; i++) {
@@ -775,7 +818,7 @@ static void mlxsw_sp_sb_sr_occ_query_cb(struct mlxsw_core *mlxsw_core,
        }
        masked_count = 0;
        for (local_port = cb_ctx.local_port_1;
-            local_port < MLXSW_PORT_MAX_PORTS; local_port++) {
+            local_port < mlxsw_core_max_ports(mlxsw_core); local_port++) {
                if (!mlxsw_sp->ports[local_port])
                        continue;
                for (i = 0; i < MLXSW_SP_SB_TC_COUNT; i++) {
@@ -817,7 +860,7 @@ next_batch:
                mlxsw_reg_sbsr_pg_buff_mask_set(sbsr_pl, i, 1);
                mlxsw_reg_sbsr_tclass_mask_set(sbsr_pl, i, 1);
        }
-       for (; local_port < MLXSW_PORT_MAX_PORTS; local_port++) {
+       for (; local_port < mlxsw_core_max_ports(mlxsw_core); local_port++) {
                if (!mlxsw_sp->ports[local_port])
                        continue;
                mlxsw_reg_sbsr_ingress_port_mask_set(sbsr_pl, local_port, 1);
@@ -847,7 +890,7 @@ do_query:
                                    cb_priv);
        if (err)
                goto out;
-       if (local_port < MLXSW_PORT_MAX_PORTS)
+       if (local_port < mlxsw_core_max_ports(mlxsw_core))
                goto next_batch;
 
 out:
@@ -882,7 +925,7 @@ next_batch:
                mlxsw_reg_sbsr_pg_buff_mask_set(sbsr_pl, i, 1);
                mlxsw_reg_sbsr_tclass_mask_set(sbsr_pl, i, 1);
        }
-       for (; local_port < MLXSW_PORT_MAX_PORTS; local_port++) {
+       for (; local_port < mlxsw_core_max_ports(mlxsw_core); local_port++) {
                if (!mlxsw_sp->ports[local_port])
                        continue;
                mlxsw_reg_sbsr_ingress_port_mask_set(sbsr_pl, local_port, 1);
@@ -908,7 +951,7 @@ do_query:
                                    &bulk_list, NULL, 0);
        if (err)
                goto out;
-       if (local_port < MLXSW_PORT_MAX_PORTS)
+       if (local_port < mlxsw_core_max_ports(mlxsw_core))
                goto next_batch;
 
 out:
@@ -932,8 +975,8 @@ int mlxsw_sp_sb_occ_port_pool_get(struct mlxsw_core_port *mlxsw_core_port,
        struct mlxsw_sp_sb_pm *pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port,
                                                       pool, dir);
 
-       *p_cur = MLXSW_SP_CELLS_TO_BYTES(pm->occ.cur);
-       *p_max = MLXSW_SP_CELLS_TO_BYTES(pm->occ.max);
+       *p_cur = mlxsw_sp_cells_bytes(mlxsw_sp, pm->occ.cur);
+       *p_max = mlxsw_sp_cells_bytes(mlxsw_sp, pm->occ.max);
        return 0;
 }
 
@@ -951,7 +994,7 @@ int mlxsw_sp_sb_occ_tc_port_bind_get(struct mlxsw_core_port *mlxsw_core_port,
        struct mlxsw_sp_sb_cm *cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port,
                                                       pg_buff, dir);
 
-       *p_cur = MLXSW_SP_CELLS_TO_BYTES(cm->occ.cur);
-       *p_max = MLXSW_SP_CELLS_TO_BYTES(cm->occ.max);
+       *p_cur = mlxsw_sp_cells_bytes(mlxsw_sp, cm->occ.cur);
+       *p_max = mlxsw_sp_cells_bytes(mlxsw_sp, cm->occ.max);
        return 0;
 }
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.c
new file mode 100644 (file)
index 0000000..0f46775
--- /dev/null
@@ -0,0 +1,207 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.c
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017 Arkadi Sharshevsky <arkadis@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/bitops.h>
+
+#include "spectrum_cnt.h"
+
+#define MLXSW_SP_COUNTER_POOL_BANK_SIZE 4096
+
+struct mlxsw_sp_counter_sub_pool {
+       unsigned int base_index;
+       unsigned int size;
+       unsigned int entry_size;
+       unsigned int bank_count;
+};
+
+struct mlxsw_sp_counter_pool {
+       unsigned int pool_size;
+       unsigned long *usage; /* Usage bitmap */
+       struct mlxsw_sp_counter_sub_pool *sub_pools;
+};
+
+static struct mlxsw_sp_counter_sub_pool mlxsw_sp_counter_sub_pools[] = {
+       [MLXSW_SP_COUNTER_SUB_POOL_FLOW] = {
+               .bank_count = 6,
+       },
+       [MLXSW_SP_COUNTER_SUB_POOL_RIF] = {
+               .bank_count = 2,
+       }
+};
+
+static int mlxsw_sp_counter_pool_validate(struct mlxsw_sp *mlxsw_sp)
+{
+       unsigned int total_bank_config = 0;
+       unsigned int pool_size;
+       int i;
+
+       pool_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, COUNTER_POOL_SIZE);
+       /* Check config is valid, no bank over subscription */
+       for (i = 0; i < ARRAY_SIZE(mlxsw_sp_counter_sub_pools); i++)
+               total_bank_config += mlxsw_sp_counter_sub_pools[i].bank_count;
+       if (total_bank_config > pool_size / MLXSW_SP_COUNTER_POOL_BANK_SIZE + 1)
+               return -EINVAL;
+       return 0;
+}
+
+static int mlxsw_sp_counter_sub_pools_prepare(struct mlxsw_sp *mlxsw_sp)
+{
+       struct mlxsw_sp_counter_sub_pool *sub_pool;
+
+       /* Prepare generic flow pool*/
+       sub_pool = &mlxsw_sp_counter_sub_pools[MLXSW_SP_COUNTER_SUB_POOL_FLOW];
+       if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, COUNTER_SIZE_PACKETS_BYTES))
+               return -EIO;
+       sub_pool->entry_size = MLXSW_CORE_RES_GET(mlxsw_sp->core,
+                                                 COUNTER_SIZE_PACKETS_BYTES);
+       /* Prepare erif pool*/
+       sub_pool = &mlxsw_sp_counter_sub_pools[MLXSW_SP_COUNTER_SUB_POOL_RIF];
+       if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, COUNTER_SIZE_ROUTER_BASIC))
+               return -EIO;
+       sub_pool->entry_size = MLXSW_CORE_RES_GET(mlxsw_sp->core,
+                                                 COUNTER_SIZE_ROUTER_BASIC);
+       return 0;
+}
+
+int mlxsw_sp_counter_pool_init(struct mlxsw_sp *mlxsw_sp)
+{
+       struct mlxsw_sp_counter_sub_pool *sub_pool;
+       struct mlxsw_sp_counter_pool *pool;
+       unsigned int base_index;
+       unsigned int map_size;
+       int i;
+       int err;
+
+       if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, COUNTER_POOL_SIZE))
+               return -EIO;
+
+       err = mlxsw_sp_counter_pool_validate(mlxsw_sp);
+       if (err)
+               return err;
+
+       err = mlxsw_sp_counter_sub_pools_prepare(mlxsw_sp);
+       if (err)
+               return err;
+
+       pool = kzalloc(sizeof(*pool), GFP_KERNEL);
+       if (!pool)
+               return -ENOMEM;
+
+       pool->pool_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, COUNTER_POOL_SIZE);
+       map_size = BITS_TO_LONGS(pool->pool_size) * sizeof(unsigned long);
+
+       pool->usage = kzalloc(map_size, GFP_KERNEL);
+       if (!pool->usage) {
+               err = -ENOMEM;
+               goto err_usage_alloc;
+       }
+
+       pool->sub_pools = mlxsw_sp_counter_sub_pools;
+       /* Allocation is based on bank count which should be
+        * specified for each sub pool statically.
+        */
+       base_index = 0;
+       for (i = 0; i < ARRAY_SIZE(mlxsw_sp_counter_sub_pools); i++) {
+               sub_pool = &pool->sub_pools[i];
+               sub_pool->size = sub_pool->bank_count *
+                                MLXSW_SP_COUNTER_POOL_BANK_SIZE;
+               sub_pool->base_index = base_index;
+               base_index += sub_pool->size;
+               /* The last bank can't be fully used */
+               if (sub_pool->base_index + sub_pool->size > pool->pool_size)
+                       sub_pool->size = pool->pool_size - sub_pool->base_index;
+       }
+
+       mlxsw_sp->counter_pool = pool;
+       return 0;
+
+err_usage_alloc:
+       kfree(pool);
+       return err;
+}
+
+void mlxsw_sp_counter_pool_fini(struct mlxsw_sp *mlxsw_sp)
+{
+       struct mlxsw_sp_counter_pool *pool = mlxsw_sp->counter_pool;
+
+       WARN_ON(find_first_bit(pool->usage, pool->pool_size) !=
+                              pool->pool_size);
+       kfree(pool->usage);
+       kfree(pool);
+}
+
+int mlxsw_sp_counter_alloc(struct mlxsw_sp *mlxsw_sp,
+                          enum mlxsw_sp_counter_sub_pool_id sub_pool_id,
+                          unsigned int *p_counter_index)
+{
+       struct mlxsw_sp_counter_pool *pool = mlxsw_sp->counter_pool;
+       struct mlxsw_sp_counter_sub_pool *sub_pool;
+       unsigned int entry_index;
+       unsigned int stop_index;
+       int i;
+
+       sub_pool = &mlxsw_sp_counter_sub_pools[sub_pool_id];
+       stop_index = sub_pool->base_index + sub_pool->size;
+       entry_index = sub_pool->base_index;
+
+       entry_index = find_next_zero_bit(pool->usage, stop_index, entry_index);
+       if (entry_index == stop_index)
+               return -ENOBUFS;
+       /* The sub-pools can contain non-integer number of entries
+        * so we must check for overflow
+        */
+       if (entry_index + sub_pool->entry_size > stop_index)
+               return -ENOBUFS;
+       for (i = 0; i < sub_pool->entry_size; i++)
+               __set_bit(entry_index + i, pool->usage);
+
+       *p_counter_index = entry_index;
+       return 0;
+}
+
+void mlxsw_sp_counter_free(struct mlxsw_sp *mlxsw_sp,
+                          enum mlxsw_sp_counter_sub_pool_id sub_pool_id,
+                          unsigned int counter_index)
+{
+       struct mlxsw_sp_counter_pool *pool = mlxsw_sp->counter_pool;
+       struct mlxsw_sp_counter_sub_pool *sub_pool;
+       int i;
+
+       if (WARN_ON(counter_index >= pool->pool_size))
+               return;
+       sub_pool = &mlxsw_sp_counter_sub_pools[sub_pool_id];
+       for (i = 0; i < sub_pool->entry_size; i++)
+               __clear_bit(counter_index + i, pool->usage);
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h
new file mode 100644 (file)
index 0000000..fd34d0a
--- /dev/null
@@ -0,0 +1,54 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017 Arkadi Sharshevsky <arkdis@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _MLXSW_SPECTRUM_CNT_H
+#define _MLXSW_SPECTRUM_CNT_H
+
+#include "spectrum.h"
+
+enum mlxsw_sp_counter_sub_pool_id {
+       MLXSW_SP_COUNTER_SUB_POOL_FLOW,
+       MLXSW_SP_COUNTER_SUB_POOL_RIF,
+};
+
+int mlxsw_sp_counter_alloc(struct mlxsw_sp *mlxsw_sp,
+                          enum mlxsw_sp_counter_sub_pool_id sub_pool_id,
+                          unsigned int *p_counter_index);
+void mlxsw_sp_counter_free(struct mlxsw_sp *mlxsw_sp,
+                          enum mlxsw_sp_counter_sub_pool_id sub_pool_id,
+                          unsigned int counter_index);
+int mlxsw_sp_counter_pool_init(struct mlxsw_sp *mlxsw_sp);
+void mlxsw_sp_counter_pool_fini(struct mlxsw_sp *mlxsw_sp);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
new file mode 100644 (file)
index 0000000..ea56f6a
--- /dev/null
@@ -0,0 +1,351 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017 Arkadi Sharshevsky <arakdis@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/kernel.h>
+#include <net/devlink.h>
+
+#include "spectrum.h"
+#include "spectrum_dpipe.h"
+#include "spectrum_router.h"
+
+enum mlxsw_sp_field_metadata_id {
+       MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT,
+       MLXSW_SP_DPIPE_FIELD_METADATA_L3_FORWARD,
+       MLXSW_SP_DPIPE_FIELD_METADATA_L3_DROP,
+};
+
+static struct devlink_dpipe_field mlxsw_sp_dpipe_fields_metadata[] = {
+       { .name = "erif_port",
+         .id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT,
+         .bitwidth = 32,
+         .mapping_type = DEVLINK_DPIPE_FIELD_MAPPING_TYPE_IFINDEX,
+       },
+       { .name = "l3_forward",
+         .id = MLXSW_SP_DPIPE_FIELD_METADATA_L3_FORWARD,
+         .bitwidth = 1,
+       },
+       { .name = "l3_drop",
+         .id = MLXSW_SP_DPIPE_FIELD_METADATA_L3_DROP,
+         .bitwidth = 1,
+       },
+};
+
+enum mlxsw_sp_dpipe_header_id {
+       MLXSW_SP_DPIPE_HEADER_METADATA,
+};
+
+static struct devlink_dpipe_header mlxsw_sp_dpipe_header_metadata = {
+       .name = "mlxsw_meta",
+       .id = MLXSW_SP_DPIPE_HEADER_METADATA,
+       .fields = mlxsw_sp_dpipe_fields_metadata,
+       .fields_count = ARRAY_SIZE(mlxsw_sp_dpipe_fields_metadata),
+};
+
+static struct devlink_dpipe_header *mlxsw_dpipe_headers[] = {
+       &mlxsw_sp_dpipe_header_metadata,
+};
+
+static struct devlink_dpipe_headers mlxsw_sp_dpipe_headers = {
+       .headers = mlxsw_dpipe_headers,
+       .headers_count = ARRAY_SIZE(mlxsw_dpipe_headers),
+};
+
+static int mlxsw_sp_dpipe_table_erif_actions_dump(void *priv,
+                                                 struct sk_buff *skb)
+{
+       struct devlink_dpipe_action action = {0};
+       int err;
+
+       action.type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY;
+       action.header = &mlxsw_sp_dpipe_header_metadata;
+       action.field_id = MLXSW_SP_DPIPE_FIELD_METADATA_L3_FORWARD;
+
+       err = devlink_dpipe_action_put(skb, &action);
+       if (err)
+               return err;
+
+       action.type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY;
+       action.header = &mlxsw_sp_dpipe_header_metadata;
+       action.field_id = MLXSW_SP_DPIPE_FIELD_METADATA_L3_DROP;
+
+       return devlink_dpipe_action_put(skb, &action);
+}
+
+static int mlxsw_sp_dpipe_table_erif_matches_dump(void *priv,
+                                                 struct sk_buff *skb)
+{
+       struct devlink_dpipe_match match = {0};
+
+       match.type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT;
+       match.header = &mlxsw_sp_dpipe_header_metadata;
+       match.field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT;
+
+       return devlink_dpipe_match_put(skb, &match);
+}
+
+static void mlxsw_sp_erif_entry_clear(struct devlink_dpipe_entry *entry)
+{
+       unsigned int value_count, value_index;
+       struct devlink_dpipe_value *value;
+
+       value = entry->action_values;
+       value_count = entry->action_values_count;
+       for (value_index = 0; value_index < value_count; value_index++) {
+               kfree(value[value_index].value);
+               kfree(value[value_index].mask);
+       }
+
+       value = entry->match_values;
+       value_count = entry->match_values_count;
+       for (value_index = 0; value_index < value_count; value_index++) {
+               kfree(value[value_index].value);
+               kfree(value[value_index].mask);
+       }
+}
+
+static void
+mlxsw_sp_erif_match_action_prepare(struct devlink_dpipe_match *match,
+                                  struct devlink_dpipe_action *action)
+{
+       action->type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY;
+       action->header = &mlxsw_sp_dpipe_header_metadata;
+       action->field_id = MLXSW_SP_DPIPE_FIELD_METADATA_L3_FORWARD;
+
+       match->type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT;
+       match->header = &mlxsw_sp_dpipe_header_metadata;
+       match->field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT;
+}
+
+static int mlxsw_sp_erif_entry_prepare(struct devlink_dpipe_entry *entry,
+                                      struct devlink_dpipe_value *match_value,
+                                      struct devlink_dpipe_match *match,
+                                      struct devlink_dpipe_value *action_value,
+                                      struct devlink_dpipe_action *action)
+{
+       entry->match_values = match_value;
+       entry->match_values_count = 1;
+
+       entry->action_values = action_value;
+       entry->action_values_count = 1;
+
+       match_value->match = match;
+       match_value->value_size = sizeof(u32);
+       match_value->value = kmalloc(match_value->value_size, GFP_KERNEL);
+       if (!match_value->value)
+               return -ENOMEM;
+
+       action_value->action = action;
+       action_value->value_size = sizeof(u32);
+       action_value->value = kmalloc(action_value->value_size, GFP_KERNEL);
+       if (!action_value->value)
+               goto err_action_alloc;
+       return 0;
+
+err_action_alloc:
+       kfree(match_value->value);
+       return -ENOMEM;
+}
+
+static int mlxsw_sp_erif_entry_get(struct mlxsw_sp *mlxsw_sp,
+                                  struct devlink_dpipe_entry *entry,
+                                  struct mlxsw_sp_rif *rif,
+                                  bool counters_enabled)
+{
+       u32 *action_value;
+       u32 *rif_value;
+       u64 cnt;
+       int err;
+
+       /* Set Match RIF index */
+       rif_value = entry->match_values->value;
+       *rif_value = mlxsw_sp_rif_index(rif);
+       entry->match_values->mapping_value = mlxsw_sp_rif_dev_ifindex(rif);
+       entry->match_values->mapping_valid = true;
+
+       /* Set Action Forwarding */
+       action_value = entry->action_values->value;
+       *action_value = 1;
+
+       entry->counter_valid = false;
+       entry->counter = 0;
+       if (!counters_enabled)
+               return 0;
+
+       entry->index = mlxsw_sp_rif_index(rif);
+       err = mlxsw_sp_rif_counter_value_get(mlxsw_sp, rif,
+                                            MLXSW_SP_RIF_COUNTER_EGRESS,
+                                            &cnt);
+       if (!err) {
+               entry->counter = cnt;
+               entry->counter_valid = true;
+       }
+       return 0;
+}
+
+static int
+mlxsw_sp_table_erif_entries_dump(void *priv, bool counters_enabled,
+                                struct devlink_dpipe_dump_ctx *dump_ctx)
+{
+       struct devlink_dpipe_value match_value = {{0}}, action_value = {{0}};
+       struct devlink_dpipe_action action = {0};
+       struct devlink_dpipe_match match = {0};
+       struct devlink_dpipe_entry entry = {0};
+       struct mlxsw_sp *mlxsw_sp = priv;
+       unsigned int rif_count;
+       int i, j;
+       int err;
+
+       mlxsw_sp_erif_match_action_prepare(&match, &action);
+       err = mlxsw_sp_erif_entry_prepare(&entry, &match_value, &match,
+                                         &action_value, &action);
+       if (err)
+               return err;
+
+       rif_count = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
+       rtnl_lock();
+       i = 0;
+start_again:
+       err = devlink_dpipe_entry_ctx_prepare(dump_ctx);
+       if (err)
+               return err;
+       j = 0;
+       for (; i < rif_count; i++) {
+               if (!mlxsw_sp->rifs[i])
+                       continue;
+               err = mlxsw_sp_erif_entry_get(mlxsw_sp, &entry,
+                                             mlxsw_sp->rifs[i],
+                                             counters_enabled);
+               if (err)
+                       goto err_entry_get;
+               err = devlink_dpipe_entry_ctx_append(dump_ctx, &entry);
+               if (err) {
+                       if (err == -EMSGSIZE) {
+                               if (!j)
+                                       goto err_entry_append;
+                               break;
+                       }
+                       goto err_entry_append;
+               }
+               j++;
+       }
+
+       devlink_dpipe_entry_ctx_close(dump_ctx);
+       if (i != rif_count)
+               goto start_again;
+       rtnl_unlock();
+
+       mlxsw_sp_erif_entry_clear(&entry);
+       return 0;
+err_entry_append:
+err_entry_get:
+       rtnl_unlock();
+       mlxsw_sp_erif_entry_clear(&entry);
+       return err;
+}
+
+static int mlxsw_sp_table_erif_counters_update(void *priv, bool enable)
+{
+       struct mlxsw_sp *mlxsw_sp = priv;
+       int i;
+
+       rtnl_lock();
+       for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
+               if (!mlxsw_sp->rifs[i])
+                       continue;
+               if (enable)
+                       mlxsw_sp_rif_counter_alloc(mlxsw_sp,
+                                                  mlxsw_sp->rifs[i],
+                                                  MLXSW_SP_RIF_COUNTER_EGRESS);
+               else
+                       mlxsw_sp_rif_counter_free(mlxsw_sp,
+                                                 mlxsw_sp->rifs[i],
+                                                 MLXSW_SP_RIF_COUNTER_EGRESS);
+       }
+       rtnl_unlock();
+       return 0;
+}
+
+static struct devlink_dpipe_table_ops mlxsw_sp_erif_ops = {
+       .matches_dump = mlxsw_sp_dpipe_table_erif_matches_dump,
+       .actions_dump = mlxsw_sp_dpipe_table_erif_actions_dump,
+       .entries_dump = mlxsw_sp_table_erif_entries_dump,
+       .counters_set_update = mlxsw_sp_table_erif_counters_update,
+};
+
+static int mlxsw_sp_dpipe_erif_table_init(struct mlxsw_sp *mlxsw_sp)
+{
+       struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
+       u64 table_size;
+
+       table_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
+       return devlink_dpipe_table_register(devlink,
+                                           MLXSW_SP_DPIPE_TABLE_NAME_ERIF,
+                                           &mlxsw_sp_erif_ops,
+                                           mlxsw_sp, table_size,
+                                           false);
+}
+
+static void mlxsw_sp_dpipe_erif_table_fini(struct mlxsw_sp *mlxsw_sp)
+{
+       struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
+
+       devlink_dpipe_table_unregister(devlink, MLXSW_SP_DPIPE_TABLE_NAME_ERIF);
+}
+
+int mlxsw_sp_dpipe_init(struct mlxsw_sp *mlxsw_sp)
+{
+       struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
+       int err;
+
+       err = devlink_dpipe_headers_register(devlink,
+                                            &mlxsw_sp_dpipe_headers);
+       if (err)
+               return err;
+       err = mlxsw_sp_dpipe_erif_table_init(mlxsw_sp);
+       if (err)
+               goto err_erif_register;
+       return 0;
+
+err_erif_register:
+       devlink_dpipe_headers_unregister(priv_to_devlink(mlxsw_sp->core));
+       return err;
+}
+
+void mlxsw_sp_dpipe_fini(struct mlxsw_sp *mlxsw_sp)
+{
+       struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
+
+       mlxsw_sp_dpipe_erif_table_fini(mlxsw_sp);
+       devlink_dpipe_headers_unregister(devlink);
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h
new file mode 100644 (file)
index 0000000..d208929
--- /dev/null
@@ -0,0 +1,43 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017 Arkadi Sharshevsky <arkadis@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _MLXSW_PIPELINE_H_
+#define _MLXSW_PIPELINE_H_
+
+int mlxsw_sp_dpipe_init(struct mlxsw_sp *mlxsw_sp);
+void mlxsw_sp_dpipe_fini(struct mlxsw_sp *mlxsw_sp);
+
+#define MLXSW_SP_DPIPE_TABLE_NAME_ERIF "mlxsw_erif"
+
+#endif /* _MLXSW_PIPELINE_H_*/
index ae6cccc666e4619bbb1a8360b5cc090d303da60b..3e7a0bcbba72d56d74eb5f3c7d42056487bbc4cd 100644 (file)
@@ -39,6 +39,7 @@
 #include <net/pkt_cls.h>
 #include <net/tc_act/tc_gact.h>
 #include <net/tc_act/tc_mirred.h>
+#include <net/tc_act/tc_vlan.h>
 
 #include "spectrum.h"
 #include "core_acl_flex_keys.h"
@@ -55,6 +56,11 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
        if (tc_no_actions(exts))
                return 0;
 
+       /* Count action is inserted first */
+       err = mlxsw_sp_acl_rulei_act_count(mlxsw_sp, rulei);
+       if (err)
+               return err;
+
        tcf_exts_to_list(exts, &actions);
        list_for_each_entry(a, &actions, list) {
                if (is_tcf_gact_shot(a)) {
@@ -73,6 +79,15 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
                                                         out_dev);
                        if (err)
                                return err;
+               } else if (is_tcf_vlan(a)) {
+                       u16 proto = be16_to_cpu(tcf_vlan_push_proto(a));
+                       u32 action = tcf_vlan_action(a);
+                       u8 prio = tcf_vlan_push_prio(a);
+                       u16 vid = tcf_vlan_push_vid(a);
+
+                       return mlxsw_sp_acl_rulei_act_vlan(mlxsw_sp, rulei,
+                                                          action, vid,
+                                                          proto, prio);
                } else {
                        dev_err(mlxsw_sp->bus_info->dev, "Unsupported action\n");
                        return -EOPNOTSUPP;
@@ -173,7 +188,8 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp,
              BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
              BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
              BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
-             BIT(FLOW_DISSECTOR_KEY_PORTS))) {
+             BIT(FLOW_DISSECTOR_KEY_PORTS) |
+             BIT(FLOW_DISSECTOR_KEY_VLAN))) {
                dev_err(mlxsw_sp->bus_info->dev, "Unsupported key\n");
                return -EOPNOTSUPP;
        }
@@ -234,6 +250,27 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp,
                                               sizeof(key->src));
        }
 
+       if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) {
+               struct flow_dissector_key_vlan *key =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_VLAN,
+                                                 f->key);
+               struct flow_dissector_key_vlan *mask =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_VLAN,
+                                                 f->mask);
+               if (mask->vlan_id != 0)
+                       mlxsw_sp_acl_rulei_keymask_u32(rulei,
+                                                      MLXSW_AFK_ELEMENT_VID,
+                                                      key->vlan_id,
+                                                      mask->vlan_id);
+               if (mask->vlan_priority != 0)
+                       mlxsw_sp_acl_rulei_keymask_u32(rulei,
+                                                      MLXSW_AFK_ELEMENT_PCP,
+                                                      key->vlan_priority,
+                                                      mask->vlan_priority);
+       }
+
        if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS)
                mlxsw_sp_flower_parse_ipv4(rulei, f);
 
@@ -314,3 +351,47 @@ void mlxsw_sp_flower_destroy(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress,
 
        mlxsw_sp_acl_ruleset_put(mlxsw_sp, ruleset);
 }
+
+int mlxsw_sp_flower_stats(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress,
+                         struct tc_cls_flower_offload *f)
+{
+       struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+       struct mlxsw_sp_acl_ruleset *ruleset;
+       struct mlxsw_sp_acl_rule *rule;
+       struct tc_action *a;
+       LIST_HEAD(actions);
+       u64 packets;
+       u64 lastuse;
+       u64 bytes;
+       int err;
+
+       ruleset = mlxsw_sp_acl_ruleset_get(mlxsw_sp, mlxsw_sp_port->dev,
+                                          ingress,
+                                          MLXSW_SP_ACL_PROFILE_FLOWER);
+       if (WARN_ON(IS_ERR(ruleset)))
+               return -EINVAL;
+
+       rule = mlxsw_sp_acl_rule_lookup(mlxsw_sp, ruleset, f->cookie);
+       if (!rule)
+               return -EINVAL;
+
+       err = mlxsw_sp_acl_rule_get_stats(mlxsw_sp, rule, &packets, &bytes,
+                                         &lastuse);
+       if (err)
+               goto err_rule_get_stats;
+
+       preempt_disable();
+
+       tcf_exts_to_list(f->exts, &actions);
+       list_for_each_entry(a, &actions, list)
+               tcf_action_stats_update(a, bytes, packets, lastuse);
+
+       preempt_enable();
+
+       mlxsw_sp_acl_ruleset_put(mlxsw_sp, ruleset);
+       return 0;
+
+err_rule_get_stats:
+       mlxsw_sp_acl_ruleset_put(mlxsw_sp, ruleset);
+       return err;
+}
index ac321e8e5c1ac4cb85ce90b53b7907cddd3e859b..26c26cd30c3d4038948fcd79028a3b853d3651c7 100644 (file)
@@ -45,7 +45,8 @@
        (MLXSW_SP_KVD_LINEAR_SIZE - MLXSW_SP_KVDL_CHUNKS_BASE)
 #define MLXSW_SP_CHUNK_MAX 32
 
-int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count)
+int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count,
+                       u32 *p_entry_index)
 {
        int entry_index;
        int size;
@@ -72,7 +73,8 @@ int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count)
 
                for (i = 0; i < type_entries; i++)
                        set_bit(entry_index + i, mlxsw_sp->kvdl.usage);
-               return entry_index;
+               *p_entry_index = entry_index;
+               return 0;
        }
        return -ENOBUFS;
 }
index bd8de6b9be718f967ca6967a06c00be21d2e3b6c..c70c59181014d96ce169231000eed109644b0c79 100644 (file)
 #include <linux/in6.h>
 #include <linux/notifier.h>
 #include <linux/inetdevice.h>
+#include <linux/netdevice.h>
 #include <net/netevent.h>
 #include <net/neighbour.h>
 #include <net/arp.h>
 #include <net/ip_fib.h>
+#include <net/fib_rules.h>
+#include <net/l3mdev.h>
 
 #include "spectrum.h"
 #include "core.h"
 #include "reg.h"
+#include "spectrum_cnt.h"
+#include "spectrum_dpipe.h"
+#include "spectrum_router.h"
+
+struct mlxsw_sp_rif {
+       struct list_head nexthop_list;
+       struct list_head neigh_list;
+       struct net_device *dev;
+       struct mlxsw_sp_fid *f;
+       unsigned char addr[ETH_ALEN];
+       int mtu;
+       u16 rif_index;
+       u16 vr_id;
+       unsigned int counter_ingress;
+       bool counter_ingress_valid;
+       unsigned int counter_egress;
+       bool counter_egress_valid;
+};
+
+static unsigned int *
+mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif,
+                          enum mlxsw_sp_rif_counter_dir dir)
+{
+       switch (dir) {
+       case MLXSW_SP_RIF_COUNTER_EGRESS:
+               return &rif->counter_egress;
+       case MLXSW_SP_RIF_COUNTER_INGRESS:
+               return &rif->counter_ingress;
+       }
+       return NULL;
+}
+
+static bool
+mlxsw_sp_rif_counter_valid_get(struct mlxsw_sp_rif *rif,
+                              enum mlxsw_sp_rif_counter_dir dir)
+{
+       switch (dir) {
+       case MLXSW_SP_RIF_COUNTER_EGRESS:
+               return rif->counter_egress_valid;
+       case MLXSW_SP_RIF_COUNTER_INGRESS:
+               return rif->counter_ingress_valid;
+       }
+       return false;
+}
+
+static void
+mlxsw_sp_rif_counter_valid_set(struct mlxsw_sp_rif *rif,
+                              enum mlxsw_sp_rif_counter_dir dir,
+                              bool valid)
+{
+       switch (dir) {
+       case MLXSW_SP_RIF_COUNTER_EGRESS:
+               rif->counter_egress_valid = valid;
+               break;
+       case MLXSW_SP_RIF_COUNTER_INGRESS:
+               rif->counter_ingress_valid = valid;
+               break;
+       }
+}
+
+static int mlxsw_sp_rif_counter_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
+                                    unsigned int counter_index, bool enable,
+                                    enum mlxsw_sp_rif_counter_dir dir)
+{
+       char ritr_pl[MLXSW_REG_RITR_LEN];
+       bool is_egress = false;
+       int err;
+
+       if (dir == MLXSW_SP_RIF_COUNTER_EGRESS)
+               is_egress = true;
+       mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
+       err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
+       if (err)
+               return err;
+
+       mlxsw_reg_ritr_counter_pack(ritr_pl, counter_index, enable,
+                                   is_egress);
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
+}
+
+int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
+                                  struct mlxsw_sp_rif *rif,
+                                  enum mlxsw_sp_rif_counter_dir dir, u64 *cnt)
+{
+       char ricnt_pl[MLXSW_REG_RICNT_LEN];
+       unsigned int *p_counter_index;
+       bool valid;
+       int err;
+
+       valid = mlxsw_sp_rif_counter_valid_get(rif, dir);
+       if (!valid)
+               return -EINVAL;
+
+       p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
+       if (!p_counter_index)
+               return -EINVAL;
+       mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
+                            MLXSW_REG_RICNT_OPCODE_NOP);
+       err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
+       if (err)
+               return err;
+       *cnt = mlxsw_reg_ricnt_good_unicast_packets_get(ricnt_pl);
+       return 0;
+}
+
+static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp,
+                                     unsigned int counter_index)
+{
+       char ricnt_pl[MLXSW_REG_RICNT_LEN];
+
+       mlxsw_reg_ricnt_pack(ricnt_pl, counter_index,
+                            MLXSW_REG_RICNT_OPCODE_CLEAR);
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
+}
+
+int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp,
+                              struct mlxsw_sp_rif *rif,
+                              enum mlxsw_sp_rif_counter_dir dir)
+{
+       unsigned int *p_counter_index;
+       int err;
+
+       p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
+       if (!p_counter_index)
+               return -EINVAL;
+       err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
+                                    p_counter_index);
+       if (err)
+               return err;
+
+       err = mlxsw_sp_rif_counter_clear(mlxsw_sp, *p_counter_index);
+       if (err)
+               goto err_counter_clear;
+
+       err = mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
+                                       *p_counter_index, true, dir);
+       if (err)
+               goto err_counter_edit;
+       mlxsw_sp_rif_counter_valid_set(rif, dir, true);
+       return 0;
+
+err_counter_edit:
+err_counter_clear:
+       mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
+                             *p_counter_index);
+       return err;
+}
+
+void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp,
+                              struct mlxsw_sp_rif *rif,
+                              enum mlxsw_sp_rif_counter_dir dir)
+{
+       unsigned int *p_counter_index;
+
+       p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
+       if (WARN_ON(!p_counter_index))
+               return;
+       mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
+                                 *p_counter_index, false, dir);
+       mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
+                             *p_counter_index);
+       mlxsw_sp_rif_counter_valid_set(rif, dir, false);
+}
+
+static struct mlxsw_sp_rif *
+mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
+                        const struct net_device *dev);
 
 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
        for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
@@ -88,12 +258,6 @@ mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
        memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
 }
 
-static void
-mlxsw_sp_prefix_usage_zero(struct mlxsw_sp_prefix_usage *prefix_usage)
-{
-       memset(prefix_usage, 0, sizeof(*prefix_usage));
-}
-
 static void
 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
                          unsigned char prefix_len)
@@ -125,7 +289,7 @@ struct mlxsw_sp_fib_node {
        struct list_head entry_list;
        struct list_head list;
        struct rhash_head ht_node;
-       struct mlxsw_sp_vr *vr;
+       struct mlxsw_sp_fib *fib;
        struct mlxsw_sp_fib_key key;
 };
 
@@ -149,13 +313,17 @@ struct mlxsw_sp_fib_entry {
 struct mlxsw_sp_fib {
        struct rhashtable ht;
        struct list_head node_list;
+       struct mlxsw_sp_vr *vr;
+       struct mlxsw_sp_lpm_tree *lpm_tree;
        unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
        struct mlxsw_sp_prefix_usage prefix_usage;
+       enum mlxsw_sp_l3proto proto;
 };
 
 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
 
-static struct mlxsw_sp_fib *mlxsw_sp_fib_create(void)
+static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp_vr *vr,
+                                               enum mlxsw_sp_l3proto proto)
 {
        struct mlxsw_sp_fib *fib;
        int err;
@@ -167,6 +335,8 @@ static struct mlxsw_sp_fib *mlxsw_sp_fib_create(void)
        if (err)
                goto err_rhashtable_init;
        INIT_LIST_HEAD(&fib->node_list);
+       fib->proto = proto;
+       fib->vr = vr;
        return fib;
 
 err_rhashtable_init:
@@ -177,24 +347,21 @@ err_rhashtable_init:
 static void mlxsw_sp_fib_destroy(struct mlxsw_sp_fib *fib)
 {
        WARN_ON(!list_empty(&fib->node_list));
+       WARN_ON(fib->lpm_tree);
        rhashtable_destroy(&fib->ht);
        kfree(fib);
 }
 
 static struct mlxsw_sp_lpm_tree *
-mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp, bool one_reserved)
+mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp)
 {
        static struct mlxsw_sp_lpm_tree *lpm_tree;
        int i;
 
-       for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) {
-               lpm_tree = &mlxsw_sp->router.lpm_trees[i];
-               if (lpm_tree->ref_count == 0) {
-                       if (one_reserved)
-                               one_reserved = false;
-                       else
-                               return lpm_tree;
-               }
+       for (i = 0; i < mlxsw_sp->router.lpm.tree_count; i++) {
+               lpm_tree = &mlxsw_sp->router.lpm.trees[i];
+               if (lpm_tree->ref_count == 0)
+                       return lpm_tree;
        }
        return NULL;
 }
@@ -248,12 +415,12 @@ mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
 static struct mlxsw_sp_lpm_tree *
 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
                         struct mlxsw_sp_prefix_usage *prefix_usage,
-                        enum mlxsw_sp_l3proto proto, bool one_reserved)
+                        enum mlxsw_sp_l3proto proto)
 {
        struct mlxsw_sp_lpm_tree *lpm_tree;
        int err;
 
-       lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp, one_reserved);
+       lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp);
        if (!lpm_tree)
                return ERR_PTR(-EBUSY);
        lpm_tree->proto = proto;
@@ -283,13 +450,13 @@ static int mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
 static struct mlxsw_sp_lpm_tree *
 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
                      struct mlxsw_sp_prefix_usage *prefix_usage,
-                     enum mlxsw_sp_l3proto proto, bool one_reserved)
+                     enum mlxsw_sp_l3proto proto)
 {
        struct mlxsw_sp_lpm_tree *lpm_tree;
        int i;
 
-       for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) {
-               lpm_tree = &mlxsw_sp->router.lpm_trees[i];
+       for (i = 0; i < mlxsw_sp->router.lpm.tree_count; i++) {
+               lpm_tree = &mlxsw_sp->router.lpm.trees[i];
                if (lpm_tree->ref_count != 0 &&
                    lpm_tree->proto == proto &&
                    mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
@@ -297,7 +464,7 @@ mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
                        goto inc_ref_count;
        }
        lpm_tree = mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage,
-                                           proto, one_reserved);
+                                           proto);
        if (IS_ERR(lpm_tree))
                return lpm_tree;
 
@@ -314,15 +481,41 @@ static int mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
        return 0;
 }
 
-static void mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
+#define MLXSW_SP_LPM_TREE_MIN 2 /* trees 0 and 1 are reserved */
+
+static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
 {
        struct mlxsw_sp_lpm_tree *lpm_tree;
+       u64 max_trees;
        int i;
 
-       for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) {
-               lpm_tree = &mlxsw_sp->router.lpm_trees[i];
+       if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES))
+               return -EIO;
+
+       max_trees = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LPM_TREES);
+       mlxsw_sp->router.lpm.tree_count = max_trees - MLXSW_SP_LPM_TREE_MIN;
+       mlxsw_sp->router.lpm.trees = kcalloc(mlxsw_sp->router.lpm.tree_count,
+                                            sizeof(struct mlxsw_sp_lpm_tree),
+                                            GFP_KERNEL);
+       if (!mlxsw_sp->router.lpm.trees)
+               return -ENOMEM;
+
+       for (i = 0; i < mlxsw_sp->router.lpm.tree_count; i++) {
+               lpm_tree = &mlxsw_sp->router.lpm.trees[i];
                lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
        }
+
+       return 0;
+}
+
+static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
+{
+       kfree(mlxsw_sp->router.lpm.trees);
+}
+
+static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
+{
+       return !!vr->fib4;
 }
 
 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
@@ -332,31 +525,31 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
 
        for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
                vr = &mlxsw_sp->router.vrs[i];
-               if (!vr->used)
+               if (!mlxsw_sp_vr_is_used(vr))
                        return vr;
        }
        return NULL;
 }
 
 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
-                                    struct mlxsw_sp_vr *vr)
+                                    const struct mlxsw_sp_fib *fib)
 {
        char raltb_pl[MLXSW_REG_RALTB_LEN];
 
-       mlxsw_reg_raltb_pack(raltb_pl, vr->id,
-                            (enum mlxsw_reg_ralxx_protocol) vr->proto,
-                            vr->lpm_tree->id);
+       mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
+                            (enum mlxsw_reg_ralxx_protocol) fib->proto,
+                            fib->lpm_tree->id);
        return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
 }
 
 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
-                                      struct mlxsw_sp_vr *vr)
+                                      const struct mlxsw_sp_fib *fib)
 {
        char raltb_pl[MLXSW_REG_RALTB_LEN];
 
        /* Bind to tree 0 which is default */
-       mlxsw_reg_raltb_pack(raltb_pl, vr->id,
-                            (enum mlxsw_reg_ralxx_protocol) vr->proto, 0);
+       mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
+                            (enum mlxsw_reg_ralxx_protocol) fib->proto, 0);
        return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
 }
 
@@ -369,8 +562,7 @@ static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
 }
 
 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
-                                           u32 tb_id,
-                                           enum mlxsw_sp_l3proto proto)
+                                           u32 tb_id)
 {
        struct mlxsw_sp_vr *vr;
        int i;
@@ -379,69 +571,50 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
 
        for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
                vr = &mlxsw_sp->router.vrs[i];
-               if (vr->used && vr->proto == proto && vr->tb_id == tb_id)
+               if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id)
                        return vr;
        }
        return NULL;
 }
 
+static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
+                                           enum mlxsw_sp_l3proto proto)
+{
+       switch (proto) {
+       case MLXSW_SP_L3_PROTO_IPV4:
+               return vr->fib4;
+       case MLXSW_SP_L3_PROTO_IPV6:
+               BUG_ON(1);
+       }
+       return NULL;
+}
+
 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
-                                             unsigned char prefix_len,
-                                             u32 tb_id,
-                                             enum mlxsw_sp_l3proto proto)
+                                             u32 tb_id)
 {
-       struct mlxsw_sp_prefix_usage req_prefix_usage;
-       struct mlxsw_sp_lpm_tree *lpm_tree;
        struct mlxsw_sp_vr *vr;
-       int err;
 
        vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
        if (!vr)
                return ERR_PTR(-EBUSY);
-       vr->fib = mlxsw_sp_fib_create();
-       if (IS_ERR(vr->fib))
-               return ERR_CAST(vr->fib);
-
-       vr->proto = proto;
+       vr->fib4 = mlxsw_sp_fib_create(vr, MLXSW_SP_L3_PROTO_IPV4);
+       if (IS_ERR(vr->fib4))
+               return ERR_CAST(vr->fib4);
        vr->tb_id = tb_id;
-       mlxsw_sp_prefix_usage_zero(&req_prefix_usage);
-       mlxsw_sp_prefix_usage_set(&req_prefix_usage, prefix_len);
-       lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
-                                        proto, true);
-       if (IS_ERR(lpm_tree)) {
-               err = PTR_ERR(lpm_tree);
-               goto err_tree_get;
-       }
-       vr->lpm_tree = lpm_tree;
-       err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr);
-       if (err)
-               goto err_tree_bind;
-
-       vr->used = true;
        return vr;
-
-err_tree_bind:
-       mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree);
-err_tree_get:
-       mlxsw_sp_fib_destroy(vr->fib);
-
-       return ERR_PTR(err);
 }
 
-static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp,
-                               struct mlxsw_sp_vr *vr)
+static void mlxsw_sp_vr_destroy(struct mlxsw_sp_vr *vr)
 {
-       mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, vr);
-       mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree);
-       mlxsw_sp_fib_destroy(vr->fib);
-       vr->used = false;
+       mlxsw_sp_fib_destroy(vr->fib4);
+       vr->fib4 = NULL;
 }
 
 static int
-mlxsw_sp_vr_lpm_tree_check(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr,
+mlxsw_sp_vr_lpm_tree_check(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib *fib,
                           struct mlxsw_sp_prefix_usage *req_prefix_usage)
 {
-       struct mlxsw_sp_lpm_tree *lpm_tree = vr->lpm_tree;
+       struct mlxsw_sp_lpm_tree *lpm_tree = fib->lpm_tree;
        struct mlxsw_sp_lpm_tree *new_tree;
        int err;
 
@@ -449,7 +622,7 @@ mlxsw_sp_vr_lpm_tree_check(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr,
                return 0;
 
        new_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, req_prefix_usage,
-                                        vr->proto, false);
+                                        fib->proto);
        if (IS_ERR(new_tree)) {
                /* We failed to get a tree according to the required
                 * prefix usage. However, the current tree might be still good
@@ -463,8 +636,8 @@ mlxsw_sp_vr_lpm_tree_check(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr,
        }
 
        /* Prevent packet loss by overwriting existing binding */
-       vr->lpm_tree = new_tree;
-       err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr);
+       fib->lpm_tree = new_tree;
+       err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib);
        if (err)
                goto err_tree_bind;
        mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
@@ -472,53 +645,26 @@ mlxsw_sp_vr_lpm_tree_check(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr,
        return 0;
 
 err_tree_bind:
-       vr->lpm_tree = lpm_tree;
+       fib->lpm_tree = lpm_tree;
        mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
        return err;
 }
 
-static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp,
-                                          unsigned char prefix_len,
-                                          u32 tb_id,
-                                          enum mlxsw_sp_l3proto proto)
+static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id)
 {
        struct mlxsw_sp_vr *vr;
-       int err;
 
        tb_id = mlxsw_sp_fix_tb_id(tb_id);
-       vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id, proto);
-       if (!vr) {
-               vr = mlxsw_sp_vr_create(mlxsw_sp, prefix_len, tb_id, proto);
-               if (IS_ERR(vr))
-                       return vr;
-       } else {
-               struct mlxsw_sp_prefix_usage req_prefix_usage;
-
-               mlxsw_sp_prefix_usage_cpy(&req_prefix_usage,
-                                         &vr->fib->prefix_usage);
-               mlxsw_sp_prefix_usage_set(&req_prefix_usage, prefix_len);
-               /* Need to replace LPM tree in case new prefix is required. */
-               err = mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, vr,
-                                                &req_prefix_usage);
-               if (err)
-                       return ERR_PTR(err);
-       }
+       vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
+       if (!vr)
+               vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id);
        return vr;
 }
 
-static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
+static void mlxsw_sp_vr_put(struct mlxsw_sp_vr *vr)
 {
-       /* Destroy virtual router entity in case the associated FIB is empty
-        * and allow it to be used for other tables in future. Otherwise,
-        * check if some prefix usage did not disappear and change tree if
-        * that is the case. Note that in case new, smaller tree cannot be
-        * allocated, the original one will be kept being used.
-        */
-       if (mlxsw_sp_prefix_usage_none(&vr->fib->prefix_usage))
-               mlxsw_sp_vr_destroy(mlxsw_sp, vr);
-       else
-               mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, vr,
-                                          &vr->fib->prefix_usage);
+       if (!vr->rif_count && list_empty(&vr->fib4->node_list))
+               mlxsw_sp_vr_destroy(vr);
 }
 
 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
@@ -627,14 +773,14 @@ static struct mlxsw_sp_neigh_entry *
 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
 {
        struct mlxsw_sp_neigh_entry *neigh_entry;
-       struct mlxsw_sp_rif *r;
+       struct mlxsw_sp_rif *rif;
        int err;
 
-       r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
-       if (!r)
+       rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
+       if (!rif)
                return ERR_PTR(-EINVAL);
 
-       neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, r->rif);
+       neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, rif->rif_index);
        if (!neigh_entry)
                return ERR_PTR(-ENOMEM);
 
@@ -642,7 +788,7 @@ mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
        if (err)
                goto err_neigh_entry_insert;
 
-       list_add(&neigh_entry->rif_list_node, &r->neigh_list);
+       list_add(&neigh_entry->rif_list_node, &rif->neigh_list);
 
        return neigh_entry;
 
@@ -1050,22 +1196,22 @@ static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
 }
 
 static int mlxsw_sp_neigh_rif_flush(struct mlxsw_sp *mlxsw_sp,
-                                   const struct mlxsw_sp_rif *r)
+                                   const struct mlxsw_sp_rif *rif)
 {
        char rauht_pl[MLXSW_REG_RAUHT_LEN];
 
        mlxsw_reg_rauht_pack(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE_ALL,
-                            r->rif, r->addr);
+                            rif->rif_index, rif->addr);
        return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
 }
 
 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
-                                        struct mlxsw_sp_rif *r)
+                                        struct mlxsw_sp_rif *rif)
 {
        struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
 
-       mlxsw_sp_neigh_rif_flush(mlxsw_sp, r);
-       list_for_each_entry_safe(neigh_entry, tmp, &r->neigh_list,
+       mlxsw_sp_neigh_rif_flush(mlxsw_sp, rif);
+       list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
                                 rif_list_node)
                mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
 }
@@ -1082,7 +1228,7 @@ struct mlxsw_sp_nexthop {
                                                */
        struct rhash_head ht_node;
        struct mlxsw_sp_nexthop_key key;
-       struct mlxsw_sp_rif *r;
+       struct mlxsw_sp_rif *rif;
        u8 should_offload:1, /* set indicates this neigh is connected and
                              * should be put to KVD linear area of this group.
                              */
@@ -1109,7 +1255,7 @@ struct mlxsw_sp_nexthop_group {
        u16 ecmp_size;
        u16 count;
        struct mlxsw_sp_nexthop nexthops[0];
-#define nh_rif nexthops[0].r
+#define nh_rif nexthops[0].rif
 };
 
 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
@@ -1171,7 +1317,7 @@ mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
 }
 
 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
-                                            struct mlxsw_sp_vr *vr,
+                                            const struct mlxsw_sp_fib *fib,
                                             u32 adj_index, u16 ecmp_size,
                                             u32 new_adj_index,
                                             u16 new_ecmp_size)
@@ -1179,8 +1325,8 @@ static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
        char raleu_pl[MLXSW_REG_RALEU_LEN];
 
        mlxsw_reg_raleu_pack(raleu_pl,
-                            (enum mlxsw_reg_ralxx_protocol) vr->proto, vr->id,
-                            adj_index, ecmp_size, new_adj_index,
+                            (enum mlxsw_reg_ralxx_protocol) fib->proto,
+                            fib->vr->id, adj_index, ecmp_size, new_adj_index,
                             new_ecmp_size);
        return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
 }
@@ -1190,14 +1336,14 @@ static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
                                          u32 old_adj_index, u16 old_ecmp_size)
 {
        struct mlxsw_sp_fib_entry *fib_entry;
-       struct mlxsw_sp_vr *vr = NULL;
+       struct mlxsw_sp_fib *fib = NULL;
        int err;
 
        list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
-               if (vr == fib_entry->fib_node->vr)
+               if (fib == fib_entry->fib_node->fib)
                        continue;
-               vr = fib_entry->fib_node->vr;
-               err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, vr,
+               fib = fib_entry->fib_node->fib;
+               err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, fib,
                                                        old_adj_index,
                                                        old_ecmp_size,
                                                        nh_grp->adj_index,
@@ -1280,7 +1426,6 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
        bool old_adj_index_valid;
        u32 old_adj_index;
        u16 old_ecmp_size;
-       int ret;
        int i;
        int err;
 
@@ -1318,15 +1463,14 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
                 */
                goto set_trap;
 
-       ret = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size);
-       if (ret < 0) {
+       err = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size, &adj_index);
+       if (err) {
                /* We ran out of KVD linear space, just set the
                 * trap and let everything flow through kernel.
                 */
                dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
                goto set_trap;
        }
-       adj_index = ret;
        old_adj_index_valid = nh_grp->adj_index_valid;
        old_adj_index = nh_grp->adj_index;
        old_ecmp_size = nh_grp->ecmp_size;
@@ -1399,22 +1543,22 @@ mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
 }
 
 static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh,
-                                     struct mlxsw_sp_rif *r)
+                                     struct mlxsw_sp_rif *rif)
 {
-       if (nh->r)
+       if (nh->rif)
                return;
 
-       nh->r = r;
-       list_add(&nh->rif_list_node, &r->nexthop_list);
+       nh->rif = rif;
+       list_add(&nh->rif_list_node, &rif->nexthop_list);
 }
 
 static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh)
 {
-       if (!nh->r)
+       if (!nh->rif)
                return;
 
        list_del(&nh->rif_list_node);
-       nh->r = NULL;
+       nh->rif = NULL;
 }
 
 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
@@ -1505,7 +1649,7 @@ static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp,
 {
        struct net_device *dev = fib_nh->nh_dev;
        struct in_device *in_dev;
-       struct mlxsw_sp_rif *r;
+       struct mlxsw_sp_rif *rif;
        int err;
 
        nh->nh_grp = nh_grp;
@@ -1514,15 +1658,18 @@ static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp,
        if (err)
                return err;
 
+       if (!dev)
+               return 0;
+
        in_dev = __in_dev_get_rtnl(dev);
        if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
            fib_nh->nh_flags & RTNH_F_LINKDOWN)
                return 0;
 
-       r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
-       if (!r)
+       rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
+       if (!rif)
                return 0;
-       mlxsw_sp_nexthop_rif_init(nh, r);
+       mlxsw_sp_nexthop_rif_init(nh, rif);
 
        err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
        if (err)
@@ -1548,7 +1695,7 @@ static void mlxsw_sp_nexthop_event(struct mlxsw_sp *mlxsw_sp,
 {
        struct mlxsw_sp_nexthop_key key;
        struct mlxsw_sp_nexthop *nh;
-       struct mlxsw_sp_rif *r;
+       struct mlxsw_sp_rif *rif;
 
        if (mlxsw_sp->router.aborted)
                return;
@@ -1558,13 +1705,13 @@ static void mlxsw_sp_nexthop_event(struct mlxsw_sp *mlxsw_sp,
        if (WARN_ON_ONCE(!nh))
                return;
 
-       r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, fib_nh->nh_dev);
-       if (!r)
+       rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, fib_nh->nh_dev);
+       if (!rif)
                return;
 
        switch (event) {
        case FIB_EVENT_NH_ADD:
-               mlxsw_sp_nexthop_rif_init(nh, r);
+               mlxsw_sp_nexthop_rif_init(nh, rif);
                mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
                break;
        case FIB_EVENT_NH_DEL:
@@ -1577,11 +1724,11 @@ static void mlxsw_sp_nexthop_event(struct mlxsw_sp *mlxsw_sp,
 }
 
 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
-                                          struct mlxsw_sp_rif *r)
+                                          struct mlxsw_sp_rif *rif)
 {
        struct mlxsw_sp_nexthop *nh, *tmp;
 
-       list_for_each_entry_safe(nh, tmp, &r->nexthop_list, rif_list_node) {
+       list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) {
                mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
                mlxsw_sp_nexthop_rif_fini(nh);
                mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
@@ -1699,7 +1846,7 @@ static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
 {
        fib_entry->offloaded = true;
 
-       switch (fib_entry->fib_node->vr->proto) {
+       switch (fib_entry->fib_node->fib->proto) {
        case MLXSW_SP_L3_PROTO_IPV4:
                fib_info_offload_inc(fib_entry->nh_group->key.fi);
                break;
@@ -1711,7 +1858,7 @@ static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
 static void
 mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
 {
-       switch (fib_entry->fib_node->vr->proto) {
+       switch (fib_entry->fib_node->fib->proto) {
        case MLXSW_SP_L3_PROTO_IPV4:
                fib_info_offload_dec(fib_entry->nh_group->key.fi);
                break;
@@ -1751,8 +1898,8 @@ static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp,
                                         enum mlxsw_reg_ralue_op op)
 {
        char ralue_pl[MLXSW_REG_RALUE_LEN];
+       struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
        u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr;
-       struct mlxsw_sp_vr *vr = fib_entry->fib_node->vr;
        enum mlxsw_reg_ralue_trap_action trap_action;
        u16 trap_id = 0;
        u32 adjacency_index = 0;
@@ -1772,8 +1919,8 @@ static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp,
        }
 
        mlxsw_reg_ralue_pack4(ralue_pl,
-                             (enum mlxsw_reg_ralxx_protocol) vr->proto, op,
-                             vr->id, fib_entry->fib_node->key.prefix_len,
+                             (enum mlxsw_reg_ralxx_protocol) fib->proto, op,
+                             fib->vr->id, fib_entry->fib_node->key.prefix_len,
                              *p_dip);
        mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
                                        adjacency_index, ecmp_size);
@@ -1784,27 +1931,28 @@ static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp,
                                        struct mlxsw_sp_fib_entry *fib_entry,
                                        enum mlxsw_reg_ralue_op op)
 {
-       struct mlxsw_sp_rif *r = fib_entry->nh_group->nh_rif;
+       struct mlxsw_sp_rif *rif = fib_entry->nh_group->nh_rif;
+       struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
        enum mlxsw_reg_ralue_trap_action trap_action;
        char ralue_pl[MLXSW_REG_RALUE_LEN];
        u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr;
-       struct mlxsw_sp_vr *vr = fib_entry->fib_node->vr;
        u16 trap_id = 0;
-       u16 rif = 0;
+       u16 rif_index = 0;
 
        if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
                trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
-               rif = r->rif;
+               rif_index = rif->rif_index;
        } else {
                trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
                trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
        }
 
        mlxsw_reg_ralue_pack4(ralue_pl,
-                             (enum mlxsw_reg_ralxx_protocol) vr->proto, op,
-                             vr->id, fib_entry->fib_node->key.prefix_len,
+                             (enum mlxsw_reg_ralxx_protocol) fib->proto, op,
+                             fib->vr->id, fib_entry->fib_node->key.prefix_len,
                              *p_dip);
-       mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id, rif);
+       mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id,
+                                      rif_index);
        return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
 }
 
@@ -1812,13 +1960,13 @@ static int mlxsw_sp_fib_entry_op4_trap(struct mlxsw_sp *mlxsw_sp,
                                       struct mlxsw_sp_fib_entry *fib_entry,
                                       enum mlxsw_reg_ralue_op op)
 {
+       struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
        char ralue_pl[MLXSW_REG_RALUE_LEN];
        u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr;
-       struct mlxsw_sp_vr *vr = fib_entry->fib_node->vr;
 
        mlxsw_reg_ralue_pack4(ralue_pl,
-                             (enum mlxsw_reg_ralxx_protocol) vr->proto, op,
-                             vr->id, fib_entry->fib_node->key.prefix_len,
+                             (enum mlxsw_reg_ralxx_protocol) fib->proto, op,
+                             fib->vr->id, fib_entry->fib_node->key.prefix_len,
                              *p_dip);
        mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
        return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
@@ -1845,7 +1993,7 @@ static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
 {
        int err = -EINVAL;
 
-       switch (fib_entry->fib_node->vr->proto) {
+       switch (fib_entry->fib_node->fib->proto) {
        case MLXSW_SP_L3_PROTO_IPV4:
                err = mlxsw_sp_fib_entry_op4(mlxsw_sp, fib_entry, op);
                break;
@@ -1877,17 +2025,29 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
 {
        struct fib_info *fi = fen_info->fi;
 
-       if (fen_info->type == RTN_LOCAL || fen_info->type == RTN_BROADCAST) {
+       switch (fen_info->type) {
+       case RTN_BROADCAST: /* fall through */
+       case RTN_LOCAL:
                fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
                return 0;
-       }
-       if (fen_info->type != RTN_UNICAST)
-               return -EINVAL;
-       if (fi->fib_nh->nh_scope != RT_SCOPE_LINK)
+       case RTN_UNREACHABLE: /* fall through */
+       case RTN_BLACKHOLE: /* fall through */
+       case RTN_PROHIBIT:
+               /* Packets hitting these routes need to be trapped, but
+                * can do so with a lower priority than packets directed
+                * at the host, so use action type local instead of trap.
+                */
                fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
-       else
-               fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
-       return 0;
+               return 0;
+       case RTN_UNICAST:
+               if (fi->fib_nh->nh_scope != RT_SCOPE_LINK)
+                       fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
+               else
+                       fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
+               return 0;
+       default:
+               return -EINVAL;
+       }
 }
 
 static struct mlxsw_sp_fib_entry *
@@ -1996,7 +2156,7 @@ mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
 }
 
 static struct mlxsw_sp_fib_node *
-mlxsw_sp_fib_node_create(struct mlxsw_sp_vr *vr, const void *addr,
+mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr,
                         size_t addr_len, unsigned char prefix_len)
 {
        struct mlxsw_sp_fib_node *fib_node;
@@ -2006,18 +2166,15 @@ mlxsw_sp_fib_node_create(struct mlxsw_sp_vr *vr, const void *addr,
                return NULL;
 
        INIT_LIST_HEAD(&fib_node->entry_list);
-       list_add(&fib_node->list, &vr->fib->node_list);
+       list_add(&fib_node->list, &fib->node_list);
        memcpy(fib_node->key.addr, addr, addr_len);
        fib_node->key.prefix_len = prefix_len;
-       mlxsw_sp_fib_node_insert(vr->fib, fib_node);
-       fib_node->vr = vr;
 
        return fib_node;
 }
 
 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
 {
-       mlxsw_sp_fib_node_remove(fib_node->vr->fib, fib_node);
        list_del(&fib_node->list);
        WARN_ON(!list_empty(&fib_node->entry_list));
        kfree(fib_node);
@@ -2034,7 +2191,7 @@ mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
 static void mlxsw_sp_fib_node_prefix_inc(struct mlxsw_sp_fib_node *fib_node)
 {
        unsigned char prefix_len = fib_node->key.prefix_len;
-       struct mlxsw_sp_fib *fib = fib_node->vr->fib;
+       struct mlxsw_sp_fib *fib = fib_node->fib;
 
        if (fib->prefix_ref_count[prefix_len]++ == 0)
                mlxsw_sp_prefix_usage_set(&fib->prefix_usage, prefix_len);
@@ -2043,32 +2200,98 @@ static void mlxsw_sp_fib_node_prefix_inc(struct mlxsw_sp_fib_node *fib_node)
 static void mlxsw_sp_fib_node_prefix_dec(struct mlxsw_sp_fib_node *fib_node)
 {
        unsigned char prefix_len = fib_node->key.prefix_len;
-       struct mlxsw_sp_fib *fib = fib_node->vr->fib;
+       struct mlxsw_sp_fib *fib = fib_node->fib;
 
        if (--fib->prefix_ref_count[prefix_len] == 0)
                mlxsw_sp_prefix_usage_clear(&fib->prefix_usage, prefix_len);
 }
 
+static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
+                                 struct mlxsw_sp_fib_node *fib_node,
+                                 struct mlxsw_sp_fib *fib)
+{
+       struct mlxsw_sp_prefix_usage req_prefix_usage;
+       struct mlxsw_sp_lpm_tree *lpm_tree;
+       int err;
+
+       err = mlxsw_sp_fib_node_insert(fib, fib_node);
+       if (err)
+               return err;
+       fib_node->fib = fib;
+
+       mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &fib->prefix_usage);
+       mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
+
+       if (!mlxsw_sp_prefix_usage_none(&fib->prefix_usage)) {
+               err = mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, fib,
+                                                &req_prefix_usage);
+               if (err)
+                       goto err_tree_check;
+       } else {
+               lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
+                                                fib->proto);
+               if (IS_ERR(lpm_tree))
+                       return PTR_ERR(lpm_tree);
+               fib->lpm_tree = lpm_tree;
+               err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib);
+               if (err)
+                       goto err_tree_bind;
+       }
+
+       mlxsw_sp_fib_node_prefix_inc(fib_node);
+
+       return 0;
+
+err_tree_bind:
+       fib->lpm_tree = NULL;
+       mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
+err_tree_check:
+       fib_node->fib = NULL;
+       mlxsw_sp_fib_node_remove(fib, fib_node);
+       return err;
+}
+
+static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp,
+                                  struct mlxsw_sp_fib_node *fib_node)
+{
+       struct mlxsw_sp_lpm_tree *lpm_tree = fib_node->fib->lpm_tree;
+       struct mlxsw_sp_fib *fib = fib_node->fib;
+
+       mlxsw_sp_fib_node_prefix_dec(fib_node);
+
+       if (mlxsw_sp_prefix_usage_none(&fib->prefix_usage)) {
+               mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
+               fib->lpm_tree = NULL;
+               mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
+       } else {
+               mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, fib, &fib->prefix_usage);
+       }
+
+       fib_node->fib = NULL;
+       mlxsw_sp_fib_node_remove(fib, fib_node);
+}
+
 static struct mlxsw_sp_fib_node *
 mlxsw_sp_fib4_node_get(struct mlxsw_sp *mlxsw_sp,
                       const struct fib_entry_notifier_info *fen_info)
 {
        struct mlxsw_sp_fib_node *fib_node;
+       struct mlxsw_sp_fib *fib;
        struct mlxsw_sp_vr *vr;
        int err;
 
-       vr = mlxsw_sp_vr_get(mlxsw_sp, fen_info->dst_len, fen_info->tb_id,
-                            MLXSW_SP_L3_PROTO_IPV4);
+       vr = mlxsw_sp_vr_get(mlxsw_sp, fen_info->tb_id);
        if (IS_ERR(vr))
                return ERR_CAST(vr);
+       fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
 
-       fib_node = mlxsw_sp_fib_node_lookup(vr->fib, &fen_info->dst,
+       fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
                                            sizeof(fen_info->dst),
                                            fen_info->dst_len);
        if (fib_node)
                return fib_node;
 
-       fib_node = mlxsw_sp_fib_node_create(vr, &fen_info->dst,
+       fib_node = mlxsw_sp_fib_node_create(fib, &fen_info->dst,
                                            sizeof(fen_info->dst),
                                            fen_info->dst_len);
        if (!fib_node) {
@@ -2076,22 +2299,29 @@ mlxsw_sp_fib4_node_get(struct mlxsw_sp *mlxsw_sp,
                goto err_fib_node_create;
        }
 
+       err = mlxsw_sp_fib_node_init(mlxsw_sp, fib_node, fib);
+       if (err)
+               goto err_fib_node_init;
+
        return fib_node;
 
+err_fib_node_init:
+       mlxsw_sp_fib_node_destroy(fib_node);
 err_fib_node_create:
-       mlxsw_sp_vr_put(mlxsw_sp, vr);
+       mlxsw_sp_vr_put(vr);
        return ERR_PTR(err);
 }
 
 static void mlxsw_sp_fib4_node_put(struct mlxsw_sp *mlxsw_sp,
                                   struct mlxsw_sp_fib_node *fib_node)
 {
-       struct mlxsw_sp_vr *vr = fib_node->vr;
+       struct mlxsw_sp_vr *vr = fib_node->fib->vr;
 
        if (!list_empty(&fib_node->entry_list))
                return;
+       mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node);
        mlxsw_sp_fib_node_destroy(fib_node);
-       mlxsw_sp_vr_put(mlxsw_sp, vr);
+       mlxsw_sp_vr_put(vr);
 }
 
 static struct mlxsw_sp_fib_entry *
@@ -2236,8 +2466,6 @@ static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp,
        if (err)
                goto err_fib4_node_entry_add;
 
-       mlxsw_sp_fib_node_prefix_inc(fib_node);
-
        return 0;
 
 err_fib4_node_entry_add:
@@ -2251,7 +2479,6 @@ mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
 {
        struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
 
-       mlxsw_sp_fib_node_prefix_dec(fib_node);
        mlxsw_sp_fib4_node_entry_del(mlxsw_sp, fib_node, fib_entry);
        mlxsw_sp_fib4_node_list_remove(fib_entry);
 }
@@ -2340,9 +2567,7 @@ static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
 {
        char ralta_pl[MLXSW_REG_RALTA_LEN];
        char ralst_pl[MLXSW_REG_RALST_LEN];
-       char raltb_pl[MLXSW_REG_RALTB_LEN];
-       char ralue_pl[MLXSW_REG_RALUE_LEN];
-       int err;
+       int i, err;
 
        mlxsw_reg_ralta_pack(ralta_pl, true, MLXSW_REG_RALXX_PROTOCOL_IPV4,
                             MLXSW_SP_LPM_TREE_MIN);
@@ -2355,16 +2580,33 @@ static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
        if (err)
                return err;
 
-       mlxsw_reg_raltb_pack(raltb_pl, 0, MLXSW_REG_RALXX_PROTOCOL_IPV4,
-                            MLXSW_SP_LPM_TREE_MIN);
-       err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
-       if (err)
-               return err;
+       for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
+               struct mlxsw_sp_vr *vr = &mlxsw_sp->router.vrs[i];
+               char raltb_pl[MLXSW_REG_RALTB_LEN];
+               char ralue_pl[MLXSW_REG_RALUE_LEN];
 
-       mlxsw_reg_ralue_pack4(ralue_pl, MLXSW_SP_L3_PROTO_IPV4,
-                             MLXSW_REG_RALUE_OP_WRITE_WRITE, 0, 0, 0);
-       mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
-       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
+               if (!mlxsw_sp_vr_is_used(vr))
+                       continue;
+
+               mlxsw_reg_raltb_pack(raltb_pl, vr->id,
+                                    MLXSW_REG_RALXX_PROTOCOL_IPV4,
+                                    MLXSW_SP_LPM_TREE_MIN);
+               err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb),
+                                     raltb_pl);
+               if (err)
+                       return err;
+
+               mlxsw_reg_ralue_pack4(ralue_pl, MLXSW_SP_L3_PROTO_IPV4,
+                                     MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0,
+                                     0);
+               mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
+               err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue),
+                                     ralue_pl);
+               if (err)
+                       return err;
+       }
+
+       return 0;
 }
 
 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
@@ -2390,7 +2632,7 @@ static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
                                    struct mlxsw_sp_fib_node *fib_node)
 {
-       switch (fib_node->vr->proto) {
+       switch (fib_node->fib->proto) {
        case MLXSW_SP_L3_PROTO_IPV4:
                mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
                break;
@@ -2400,26 +2642,32 @@ static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
        }
 }
 
-static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
+static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp,
+                                 struct mlxsw_sp_vr *vr,
+                                 enum mlxsw_sp_l3proto proto)
 {
+       struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
        struct mlxsw_sp_fib_node *fib_node, *tmp;
-       struct mlxsw_sp_vr *vr;
+
+       list_for_each_entry_safe(fib_node, tmp, &fib->node_list, list) {
+               bool do_break = &tmp->list == &fib->node_list;
+
+               mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
+               if (do_break)
+                       break;
+       }
+}
+
+static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
+{
        int i;
 
        for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
-               vr = &mlxsw_sp->router.vrs[i];
+               struct mlxsw_sp_vr *vr = &mlxsw_sp->router.vrs[i];
 
-               if (!vr->used)
+               if (!mlxsw_sp_vr_is_used(vr))
                        continue;
-
-               list_for_each_entry_safe(fib_node, tmp, &vr->fib->node_list,
-                                        list) {
-                       bool do_break = &tmp->list == &vr->fib->node_list;
-
-                       mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
-                       if (do_break)
-                               break;
-               }
+               mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
        }
 }
 
@@ -2437,86 +2685,24 @@ static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp)
                dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
 }
 
-static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
-{
-       char ritr_pl[MLXSW_REG_RITR_LEN];
-       int err;
-
-       mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
-       err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
-       if (WARN_ON_ONCE(err))
-               return err;
-
-       mlxsw_reg_ritr_enable_set(ritr_pl, false);
-       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
-}
-
-void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
-                                  struct mlxsw_sp_rif *r)
-{
-       mlxsw_sp_router_rif_disable(mlxsw_sp, r->rif);
-       mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, r);
-       mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, r);
-}
+struct mlxsw_sp_fib_event_work {
+       struct work_struct work;
+       union {
+               struct fib_entry_notifier_info fen_info;
+               struct fib_rule_notifier_info fr_info;
+               struct fib_nh_notifier_info fnh_info;
+       };
+       struct mlxsw_sp *mlxsw_sp;
+       unsigned long event;
+};
 
-static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
+static void mlxsw_sp_router_fib_event_work(struct work_struct *work)
 {
-       char rgcr_pl[MLXSW_REG_RGCR_LEN];
-       u64 max_rifs;
-       int err;
-
-       if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
-               return -EIO;
-
-       max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
-       mlxsw_sp->rifs = kcalloc(max_rifs, sizeof(struct mlxsw_sp_rif *),
-                                GFP_KERNEL);
-       if (!mlxsw_sp->rifs)
-               return -ENOMEM;
-
-       mlxsw_reg_rgcr_pack(rgcr_pl, true);
-       mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
-       err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
-       if (err)
-               goto err_rgcr_fail;
-
-       return 0;
-
-err_rgcr_fail:
-       kfree(mlxsw_sp->rifs);
-       return err;
-}
-
-static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
-{
-       char rgcr_pl[MLXSW_REG_RGCR_LEN];
-       int i;
-
-       mlxsw_reg_rgcr_pack(rgcr_pl, false);
-       mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
-
-       for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
-               WARN_ON_ONCE(mlxsw_sp->rifs[i]);
-
-       kfree(mlxsw_sp->rifs);
-}
-
-struct mlxsw_sp_fib_event_work {
-       struct work_struct work;
-       union {
-               struct fib_entry_notifier_info fen_info;
-               struct fib_nh_notifier_info fnh_info;
-       };
-       struct mlxsw_sp *mlxsw_sp;
-       unsigned long event;
-};
-
-static void mlxsw_sp_router_fib_event_work(struct work_struct *work)
-{
-       struct mlxsw_sp_fib_event_work *fib_work =
-               container_of(work, struct mlxsw_sp_fib_event_work, work);
-       struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
-       bool replace, append;
+       struct mlxsw_sp_fib_event_work *fib_work =
+               container_of(work, struct mlxsw_sp_fib_event_work, work);
+       struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
+       struct fib_rule *rule;
+       bool replace, append;
        int err;
 
        /* Protect internal structures from changes */
@@ -2539,7 +2725,10 @@ static void mlxsw_sp_router_fib_event_work(struct work_struct *work)
                break;
        case FIB_EVENT_RULE_ADD: /* fall through */
        case FIB_EVENT_RULE_DEL:
-               mlxsw_sp_router_fib4_abort(mlxsw_sp);
+               rule = fib_work->fr_info.rule;
+               if (!fib4_rule_default(rule) && !rule->l3mdev)
+                       mlxsw_sp_router_fib4_abort(mlxsw_sp);
+               fib_rule_put(rule);
                break;
        case FIB_EVENT_NH_ADD: /* fall through */
        case FIB_EVENT_NH_DEL:
@@ -2582,6 +2771,11 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
                 */
                fib_info_hold(fib_work->fen_info.fi);
                break;
+       case FIB_EVENT_RULE_ADD: /* fall through */
+       case FIB_EVENT_RULE_DEL:
+               memcpy(&fib_work->fr_info, ptr, sizeof(fib_work->fr_info));
+               fib_rule_get(fib_work->fr_info.rule);
+               break;
        case FIB_EVENT_NH_ADD: /* fall through */
        case FIB_EVENT_NH_DEL:
                memcpy(&fib_work->fnh_info, ptr, sizeof(fib_work->fnh_info));
@@ -2594,6 +2788,716 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
        return NOTIFY_DONE;
 }
 
+static struct mlxsw_sp_rif *
+mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
+                        const struct net_device *dev)
+{
+       int i;
+
+       for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
+               if (mlxsw_sp->rifs[i] && mlxsw_sp->rifs[i]->dev == dev)
+                       return mlxsw_sp->rifs[i];
+
+       return NULL;
+}
+
+static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
+{
+       char ritr_pl[MLXSW_REG_RITR_LEN];
+       int err;
+
+       mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
+       err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
+       if (WARN_ON_ONCE(err))
+               return err;
+
+       mlxsw_reg_ritr_enable_set(ritr_pl, false);
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
+}
+
+static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
+                                         struct mlxsw_sp_rif *rif)
+{
+       mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index);
+       mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif);
+       mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
+}
+
+static bool mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif,
+                                      const struct in_device *in_dev,
+                                      unsigned long event)
+{
+       switch (event) {
+       case NETDEV_UP:
+               if (!rif)
+                       return true;
+               return false;
+       case NETDEV_DOWN:
+               if (rif && !in_dev->ifa_list &&
+                   !netif_is_l3_slave(rif->dev))
+                       return true;
+               /* It is possible we already removed the RIF ourselves
+                * if it was assigned to a netdev that is now a bridge
+                * or LAG slave.
+                */
+               return false;
+       }
+
+       return false;
+}
+
+#define MLXSW_SP_INVALID_INDEX_RIF 0xffff
+static int mlxsw_sp_avail_rif_get(struct mlxsw_sp *mlxsw_sp)
+{
+       int i;
+
+       for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
+               if (!mlxsw_sp->rifs[i])
+                       return i;
+
+       return MLXSW_SP_INVALID_INDEX_RIF;
+}
+
+static void mlxsw_sp_vport_rif_sp_attr_get(struct mlxsw_sp_port *mlxsw_sp_vport,
+                                          bool *p_lagged, u16 *p_system_port)
+{
+       u8 local_port = mlxsw_sp_vport->local_port;
+
+       *p_lagged = mlxsw_sp_vport->lagged;
+       *p_system_port = *p_lagged ? mlxsw_sp_vport->lag_id : local_port;
+}
+
+static int mlxsw_sp_vport_rif_sp_op(struct mlxsw_sp_port *mlxsw_sp_vport,
+                                   u16 vr_id, struct net_device *l3_dev,
+                                   u16 rif_index, bool create)
+{
+       struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp;
+       bool lagged = mlxsw_sp_vport->lagged;
+       char ritr_pl[MLXSW_REG_RITR_LEN];
+       u16 system_port;
+
+       mlxsw_reg_ritr_pack(ritr_pl, create, MLXSW_REG_RITR_SP_IF, rif_index,
+                           vr_id, l3_dev->mtu, l3_dev->dev_addr);
+
+       mlxsw_sp_vport_rif_sp_attr_get(mlxsw_sp_vport, &lagged, &system_port);
+       mlxsw_reg_ritr_sp_if_pack(ritr_pl, lagged, system_port,
+                                 mlxsw_sp_vport_vid_get(mlxsw_sp_vport));
+
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
+}
+
+static void mlxsw_sp_vport_rif_sp_leave(struct mlxsw_sp_port *mlxsw_sp_vport);
+
+static u16 mlxsw_sp_rif_sp_to_fid(u16 rif_index)
+{
+       return MLXSW_SP_RFID_BASE + rif_index;
+}
+
+static struct mlxsw_sp_fid *
+mlxsw_sp_rfid_alloc(u16 fid, struct net_device *l3_dev)
+{
+       struct mlxsw_sp_fid *f;
+
+       f = kzalloc(sizeof(*f), GFP_KERNEL);
+       if (!f)
+               return NULL;
+
+       f->leave = mlxsw_sp_vport_rif_sp_leave;
+       f->ref_count = 0;
+       f->dev = l3_dev;
+       f->fid = fid;
+
+       return f;
+}
+
+static struct mlxsw_sp_rif *
+mlxsw_sp_rif_alloc(u16 rif_index, u16 vr_id, struct net_device *l3_dev,
+                  struct mlxsw_sp_fid *f)
+{
+       struct mlxsw_sp_rif *rif;
+
+       rif = kzalloc(sizeof(*rif), GFP_KERNEL);
+       if (!rif)
+               return NULL;
+
+       INIT_LIST_HEAD(&rif->nexthop_list);
+       INIT_LIST_HEAD(&rif->neigh_list);
+       ether_addr_copy(rif->addr, l3_dev->dev_addr);
+       rif->mtu = l3_dev->mtu;
+       rif->vr_id = vr_id;
+       rif->dev = l3_dev;
+       rif->rif_index = rif_index;
+       rif->f = f;
+
+       return rif;
+}
+
+u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif)
+{
+       return rif->rif_index;
+}
+
+int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
+{
+       return rif->dev->ifindex;
+}
+
+static struct mlxsw_sp_rif *
+mlxsw_sp_vport_rif_sp_create(struct mlxsw_sp_port *mlxsw_sp_vport,
+                            struct net_device *l3_dev)
+{
+       struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp;
+       u32 tb_id = l3mdev_fib_table(l3_dev);
+       struct mlxsw_sp_vr *vr;
+       struct mlxsw_sp_fid *f;
+       struct mlxsw_sp_rif *rif;
+       u16 fid, rif_index;
+       int err;
+
+       rif_index = mlxsw_sp_avail_rif_get(mlxsw_sp);
+       if (rif_index == MLXSW_SP_INVALID_INDEX_RIF)
+               return ERR_PTR(-ERANGE);
+
+       vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN);
+       if (IS_ERR(vr))
+               return ERR_CAST(vr);
+
+       err = mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, vr->id, l3_dev,
+                                      rif_index, true);
+       if (err)
+               goto err_vport_rif_sp_op;
+
+       fid = mlxsw_sp_rif_sp_to_fid(rif_index);
+       err = mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, fid, true);
+       if (err)
+               goto err_rif_fdb_op;
+
+       f = mlxsw_sp_rfid_alloc(fid, l3_dev);
+       if (!f) {
+               err = -ENOMEM;
+               goto err_rfid_alloc;
+       }
+
+       rif = mlxsw_sp_rif_alloc(rif_index, vr->id, l3_dev, f);
+       if (!rif) {
+               err = -ENOMEM;
+               goto err_rif_alloc;
+       }
+
+       if (devlink_dpipe_table_counter_enabled(priv_to_devlink(mlxsw_sp->core),
+                                               MLXSW_SP_DPIPE_TABLE_NAME_ERIF)) {
+               err = mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif,
+                                                MLXSW_SP_RIF_COUNTER_EGRESS);
+               if (err)
+                       netdev_dbg(mlxsw_sp_vport->dev,
+                                  "Counter alloc Failed err=%d\n", err);
+       }
+
+       f->rif = rif;
+       mlxsw_sp->rifs[rif_index] = rif;
+       vr->rif_count++;
+
+       return rif;
+
+err_rif_alloc:
+       kfree(f);
+err_rfid_alloc:
+       mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, fid, false);
+err_rif_fdb_op:
+       mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, vr->id, l3_dev, rif_index,
+                                false);
+err_vport_rif_sp_op:
+       mlxsw_sp_vr_put(vr);
+       return ERR_PTR(err);
+}
+
+static void mlxsw_sp_vport_rif_sp_destroy(struct mlxsw_sp_port *mlxsw_sp_vport,
+                                         struct mlxsw_sp_rif *rif)
+{
+       struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp;
+       struct mlxsw_sp_vr *vr = &mlxsw_sp->router.vrs[rif->vr_id];
+       struct net_device *l3_dev = rif->dev;
+       struct mlxsw_sp_fid *f = rif->f;
+       u16 rif_index = rif->rif_index;
+       u16 fid = f->fid;
+
+       mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
+
+       mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
+       mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_INGRESS);
+
+       vr->rif_count--;
+       mlxsw_sp->rifs[rif_index] = NULL;
+       f->rif = NULL;
+
+       kfree(rif);
+
+       kfree(f);
+
+       mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, fid, false);
+
+       mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, vr->id, l3_dev, rif_index,
+                                false);
+       mlxsw_sp_vr_put(vr);
+}
+
+static int mlxsw_sp_vport_rif_sp_join(struct mlxsw_sp_port *mlxsw_sp_vport,
+                                     struct net_device *l3_dev)
+{
+       struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp;
+       struct mlxsw_sp_rif *rif;
+
+       rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
+       if (!rif) {
+               rif = mlxsw_sp_vport_rif_sp_create(mlxsw_sp_vport, l3_dev);
+               if (IS_ERR(rif))
+                       return PTR_ERR(rif);
+       }
+
+       mlxsw_sp_vport_fid_set(mlxsw_sp_vport, rif->f);
+       rif->f->ref_count++;
+
+       netdev_dbg(mlxsw_sp_vport->dev, "Joined FID=%d\n", rif->f->fid);
+
+       return 0;
+}
+
+static void mlxsw_sp_vport_rif_sp_leave(struct mlxsw_sp_port *mlxsw_sp_vport)
+{
+       struct mlxsw_sp_fid *f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport);
+
+       netdev_dbg(mlxsw_sp_vport->dev, "Left FID=%d\n", f->fid);
+
+       mlxsw_sp_vport_fid_set(mlxsw_sp_vport, NULL);
+       if (--f->ref_count == 0)
+               mlxsw_sp_vport_rif_sp_destroy(mlxsw_sp_vport, f->rif);
+}
+
+static int mlxsw_sp_inetaddr_vport_event(struct net_device *l3_dev,
+                                        struct net_device *port_dev,
+                                        unsigned long event, u16 vid)
+{
+       struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
+       struct mlxsw_sp_port *mlxsw_sp_vport;
+
+       mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid);
+       if (WARN_ON(!mlxsw_sp_vport))
+               return -EINVAL;
+
+       switch (event) {
+       case NETDEV_UP:
+               return mlxsw_sp_vport_rif_sp_join(mlxsw_sp_vport, l3_dev);
+       case NETDEV_DOWN:
+               mlxsw_sp_vport_rif_sp_leave(mlxsw_sp_vport);
+               break;
+       }
+
+       return 0;
+}
+
+static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
+                                       unsigned long event)
+{
+       if (netif_is_bridge_port(port_dev) || netif_is_lag_port(port_dev))
+               return 0;
+
+       return mlxsw_sp_inetaddr_vport_event(port_dev, port_dev, event, 1);
+}
+
+static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
+                                        struct net_device *lag_dev,
+                                        unsigned long event, u16 vid)
+{
+       struct net_device *port_dev;
+       struct list_head *iter;
+       int err;
+
+       netdev_for_each_lower_dev(lag_dev, port_dev, iter) {
+               if (mlxsw_sp_port_dev_check(port_dev)) {
+                       err = mlxsw_sp_inetaddr_vport_event(l3_dev, port_dev,
+                                                           event, vid);
+                       if (err)
+                               return err;
+               }
+       }
+
+       return 0;
+}
+
+static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
+                                      unsigned long event)
+{
+       if (netif_is_bridge_port(lag_dev))
+               return 0;
+
+       return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, 1);
+}
+
+static struct mlxsw_sp_fid *mlxsw_sp_bridge_fid_get(struct mlxsw_sp *mlxsw_sp,
+                                                   struct net_device *l3_dev)
+{
+       u16 fid;
+
+       if (is_vlan_dev(l3_dev))
+               fid = vlan_dev_vlan_id(l3_dev);
+       else if (mlxsw_sp->master_bridge.dev == l3_dev)
+               fid = 1;
+       else
+               return mlxsw_sp_vfid_find(mlxsw_sp, l3_dev);
+
+       return mlxsw_sp_fid_find(mlxsw_sp, fid);
+}
+
+static u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
+{
+       return mlxsw_core_max_ports(mlxsw_sp->core) + 1;
+}
+
+static enum mlxsw_flood_table_type mlxsw_sp_flood_table_type_get(u16 fid)
+{
+       return mlxsw_sp_fid_is_vfid(fid) ? MLXSW_REG_SFGC_TABLE_TYPE_FID :
+              MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFEST;
+}
+
+static u16 mlxsw_sp_flood_table_index_get(u16 fid)
+{
+       return mlxsw_sp_fid_is_vfid(fid) ? mlxsw_sp_fid_to_vfid(fid) : fid;
+}
+
+static int mlxsw_sp_router_port_flood_set(struct mlxsw_sp *mlxsw_sp, u16 fid,
+                                         bool set)
+{
+       u8 router_port = mlxsw_sp_router_port(mlxsw_sp);
+       enum mlxsw_flood_table_type table_type;
+       char *sftr_pl;
+       u16 index;
+       int err;
+
+       sftr_pl = kmalloc(MLXSW_REG_SFTR_LEN, GFP_KERNEL);
+       if (!sftr_pl)
+               return -ENOMEM;
+
+       table_type = mlxsw_sp_flood_table_type_get(fid);
+       index = mlxsw_sp_flood_table_index_get(fid);
+       mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_BC, index, table_type,
+                           1, router_port, set);
+       err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl);
+
+       kfree(sftr_pl);
+       return err;
+}
+
+static enum mlxsw_reg_ritr_if_type mlxsw_sp_rif_type_get(u16 fid)
+{
+       if (mlxsw_sp_fid_is_vfid(fid))
+               return MLXSW_REG_RITR_FID_IF;
+       else
+               return MLXSW_REG_RITR_VLAN_IF;
+}
+
+static int mlxsw_sp_rif_bridge_op(struct mlxsw_sp *mlxsw_sp, u16 vr_id,
+                                 struct net_device *l3_dev,
+                                 u16 fid, u16 rif,
+                                 bool create)
+{
+       enum mlxsw_reg_ritr_if_type rif_type;
+       char ritr_pl[MLXSW_REG_RITR_LEN];
+
+       rif_type = mlxsw_sp_rif_type_get(fid);
+       mlxsw_reg_ritr_pack(ritr_pl, create, rif_type, rif, vr_id, l3_dev->mtu,
+                           l3_dev->dev_addr);
+       mlxsw_reg_ritr_fid_set(ritr_pl, rif_type, fid);
+
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
+}
+
+static int mlxsw_sp_rif_bridge_create(struct mlxsw_sp *mlxsw_sp,
+                                     struct net_device *l3_dev,
+                                     struct mlxsw_sp_fid *f)
+{
+       u32 tb_id = l3mdev_fib_table(l3_dev);
+       struct mlxsw_sp_rif *rif;
+       struct mlxsw_sp_vr *vr;
+       u16 rif_index;
+       int err;
+
+       rif_index = mlxsw_sp_avail_rif_get(mlxsw_sp);
+       if (rif_index == MLXSW_SP_INVALID_INDEX_RIF)
+               return -ERANGE;
+
+       vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN);
+       if (IS_ERR(vr))
+               return PTR_ERR(vr);
+
+       err = mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, true);
+       if (err)
+               goto err_port_flood_set;
+
+       err = mlxsw_sp_rif_bridge_op(mlxsw_sp, vr->id, l3_dev, f->fid,
+                                    rif_index, true);
+       if (err)
+               goto err_rif_bridge_op;
+
+       err = mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, true);
+       if (err)
+               goto err_rif_fdb_op;
+
+       rif = mlxsw_sp_rif_alloc(rif_index, vr->id, l3_dev, f);
+       if (!rif) {
+               err = -ENOMEM;
+               goto err_rif_alloc;
+       }
+
+       f->rif = rif;
+       mlxsw_sp->rifs[rif_index] = rif;
+       vr->rif_count++;
+
+       netdev_dbg(l3_dev, "RIF=%d created\n", rif_index);
+
+       return 0;
+
+err_rif_alloc:
+       mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, false);
+err_rif_fdb_op:
+       mlxsw_sp_rif_bridge_op(mlxsw_sp, vr->id, l3_dev, f->fid, rif_index,
+                              false);
+err_rif_bridge_op:
+       mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, false);
+err_port_flood_set:
+       mlxsw_sp_vr_put(vr);
+       return err;
+}
+
+void mlxsw_sp_rif_bridge_destroy(struct mlxsw_sp *mlxsw_sp,
+                                struct mlxsw_sp_rif *rif)
+{
+       struct mlxsw_sp_vr *vr = &mlxsw_sp->router.vrs[rif->vr_id];
+       struct net_device *l3_dev = rif->dev;
+       struct mlxsw_sp_fid *f = rif->f;
+       u16 rif_index = rif->rif_index;
+
+       mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
+
+       vr->rif_count--;
+       mlxsw_sp->rifs[rif_index] = NULL;
+       f->rif = NULL;
+
+       kfree(rif);
+
+       mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, false);
+
+       mlxsw_sp_rif_bridge_op(mlxsw_sp, vr->id, l3_dev, f->fid, rif_index,
+                              false);
+
+       mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, false);
+
+       mlxsw_sp_vr_put(vr);
+
+       netdev_dbg(l3_dev, "RIF=%d destroyed\n", rif_index);
+}
+
+static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev,
+                                         struct net_device *br_dev,
+                                         unsigned long event)
+{
+       struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
+       struct mlxsw_sp_fid *f;
+
+       /* FID can either be an actual FID if the L3 device is the
+        * VLAN-aware bridge or a VLAN device on top. Otherwise, the
+        * L3 device is a VLAN-unaware bridge and we get a vFID.
+        */
+       f = mlxsw_sp_bridge_fid_get(mlxsw_sp, l3_dev);
+       if (WARN_ON(!f))
+               return -EINVAL;
+
+       switch (event) {
+       case NETDEV_UP:
+               return mlxsw_sp_rif_bridge_create(mlxsw_sp, l3_dev, f);
+       case NETDEV_DOWN:
+               mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->rif);
+               break;
+       }
+
+       return 0;
+}
+
+static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev,
+                                       unsigned long event)
+{
+       struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
+       struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(vlan_dev);
+       u16 vid = vlan_dev_vlan_id(vlan_dev);
+
+       if (mlxsw_sp_port_dev_check(real_dev))
+               return mlxsw_sp_inetaddr_vport_event(vlan_dev, real_dev, event,
+                                                    vid);
+       else if (netif_is_lag_master(real_dev))
+               return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
+                                                    vid);
+       else if (netif_is_bridge_master(real_dev) &&
+                mlxsw_sp->master_bridge.dev == real_dev)
+               return mlxsw_sp_inetaddr_bridge_event(vlan_dev, real_dev,
+                                                     event);
+
+       return 0;
+}
+
+int mlxsw_sp_inetaddr_event(struct notifier_block *unused,
+                           unsigned long event, void *ptr)
+{
+       struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
+       struct net_device *dev = ifa->ifa_dev->dev;
+       struct mlxsw_sp *mlxsw_sp;
+       struct mlxsw_sp_rif *rif;
+       int err = 0;
+
+       mlxsw_sp = mlxsw_sp_lower_get(dev);
+       if (!mlxsw_sp)
+               goto out;
+
+       rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
+       if (!mlxsw_sp_rif_should_config(rif, ifa->ifa_dev, event))
+               goto out;
+
+       if (mlxsw_sp_port_dev_check(dev))
+               err = mlxsw_sp_inetaddr_port_event(dev, event);
+       else if (netif_is_lag_master(dev))
+               err = mlxsw_sp_inetaddr_lag_event(dev, event);
+       else if (netif_is_bridge_master(dev))
+               err = mlxsw_sp_inetaddr_bridge_event(dev, dev, event);
+       else if (is_vlan_dev(dev))
+               err = mlxsw_sp_inetaddr_vlan_event(dev, event);
+
+out:
+       return notifier_from_errno(err);
+}
+
+static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
+                            const char *mac, int mtu)
+{
+       char ritr_pl[MLXSW_REG_RITR_LEN];
+       int err;
+
+       mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
+       err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
+       if (err)
+               return err;
+
+       mlxsw_reg_ritr_mtu_set(ritr_pl, mtu);
+       mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac);
+       mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE);
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
+}
+
+int mlxsw_sp_netdevice_router_port_event(struct net_device *dev)
+{
+       struct mlxsw_sp *mlxsw_sp;
+       struct mlxsw_sp_rif *rif;
+       int err;
+
+       mlxsw_sp = mlxsw_sp_lower_get(dev);
+       if (!mlxsw_sp)
+               return 0;
+
+       rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
+       if (!rif)
+               return 0;
+
+       err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, rif->f->fid, false);
+       if (err)
+               return err;
+
+       err = mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, dev->dev_addr,
+                               dev->mtu);
+       if (err)
+               goto err_rif_edit;
+
+       err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, rif->f->fid, true);
+       if (err)
+               goto err_rif_fdb_op;
+
+       ether_addr_copy(rif->addr, dev->dev_addr);
+       rif->mtu = dev->mtu;
+
+       netdev_dbg(dev, "Updated RIF=%d\n", rif->rif_index);
+
+       return 0;
+
+err_rif_fdb_op:
+       mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu);
+err_rif_edit:
+       mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, rif->f->fid, true);
+       return err;
+}
+
+int mlxsw_sp_vport_vrf_join(struct mlxsw_sp_port *mlxsw_sp_vport)
+{
+       struct mlxsw_sp_fid *f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport);
+       struct net_device *dev = mlxsw_sp_vport->dev;
+
+       /* In case vPort already has a RIF, then we need to drop it.
+        * A new one will be created using the VRF's VR.
+        */
+       if (f && f->rif)
+               mlxsw_sp_vport_rif_sp_leave(mlxsw_sp_vport);
+
+       return mlxsw_sp_vport_rif_sp_join(mlxsw_sp_vport, dev);
+}
+
+void mlxsw_sp_vport_vrf_leave(struct mlxsw_sp_port *mlxsw_sp_vport)
+{
+       mlxsw_sp_vport_rif_sp_leave(mlxsw_sp_vport);
+}
+
+int mlxsw_sp_port_vrf_join(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+       struct mlxsw_sp_port *mlxsw_sp_vport;
+
+       mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, 1);
+       if (WARN_ON(!mlxsw_sp_vport))
+               return -EINVAL;
+
+       return mlxsw_sp_vport_vrf_join(mlxsw_sp_vport);
+}
+
+void mlxsw_sp_port_vrf_leave(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+       struct mlxsw_sp_port *mlxsw_sp_vport;
+
+       mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, 1);
+       if (WARN_ON(!mlxsw_sp_vport))
+               return;
+
+       mlxsw_sp_vport_vrf_leave(mlxsw_sp_vport);
+}
+
+int mlxsw_sp_bridge_vrf_join(struct mlxsw_sp *mlxsw_sp,
+                            struct net_device *l3_dev)
+{
+       struct mlxsw_sp_fid *f;
+
+       f = mlxsw_sp_bridge_fid_get(mlxsw_sp, l3_dev);
+       if (WARN_ON(!f))
+               return -EINVAL;
+
+       if (f->rif)
+               mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->rif);
+
+       return mlxsw_sp_rif_bridge_create(mlxsw_sp, l3_dev, f);
+}
+
+void mlxsw_sp_bridge_vrf_leave(struct mlxsw_sp *mlxsw_sp,
+                              struct net_device *l3_dev)
+{
+       struct mlxsw_sp_fid *f;
+
+       f = mlxsw_sp_bridge_fid_get(mlxsw_sp, l3_dev);
+       if (WARN_ON(!f))
+               return;
+       mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->rif);
+}
+
 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
 {
        struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb);
@@ -2606,6 +3510,48 @@ static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
        mlxsw_sp_router_fib_flush(mlxsw_sp);
 }
 
+static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
+{
+       char rgcr_pl[MLXSW_REG_RGCR_LEN];
+       u64 max_rifs;
+       int err;
+
+       if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
+               return -EIO;
+
+       max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
+       mlxsw_sp->rifs = kcalloc(max_rifs, sizeof(struct mlxsw_sp_rif *),
+                                GFP_KERNEL);
+       if (!mlxsw_sp->rifs)
+               return -ENOMEM;
+
+       mlxsw_reg_rgcr_pack(rgcr_pl, true);
+       mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
+       err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
+       if (err)
+               goto err_rgcr_fail;
+
+       return 0;
+
+err_rgcr_fail:
+       kfree(mlxsw_sp->rifs);
+       return err;
+}
+
+static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
+{
+       char rgcr_pl[MLXSW_REG_RGCR_LEN];
+       int i;
+
+       mlxsw_reg_rgcr_pack(rgcr_pl, false);
+       mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
+
+       for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
+               WARN_ON_ONCE(mlxsw_sp->rifs[i]);
+
+       kfree(mlxsw_sp->rifs);
+}
+
 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
 {
        int err;
@@ -2625,7 +3571,10 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
        if (err)
                goto err_nexthop_group_ht_init;
 
-       mlxsw_sp_lpm_init(mlxsw_sp);
+       err = mlxsw_sp_lpm_init(mlxsw_sp);
+       if (err)
+               goto err_lpm_init;
+
        err = mlxsw_sp_vrs_init(mlxsw_sp);
        if (err)
                goto err_vrs_init;
@@ -2647,6 +3596,8 @@ err_register_fib_notifier:
 err_neigh_init:
        mlxsw_sp_vrs_fini(mlxsw_sp);
 err_vrs_init:
+       mlxsw_sp_lpm_fini(mlxsw_sp);
+err_lpm_init:
        rhashtable_destroy(&mlxsw_sp->router.nexthop_group_ht);
 err_nexthop_group_ht_init:
        rhashtable_destroy(&mlxsw_sp->router.nexthop_ht);
@@ -2660,6 +3611,7 @@ void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
        unregister_fib_notifier(&mlxsw_sp->fib_nb);
        mlxsw_sp_neigh_fini(mlxsw_sp);
        mlxsw_sp_vrs_fini(mlxsw_sp);
+       mlxsw_sp_lpm_fini(mlxsw_sp);
        rhashtable_destroy(&mlxsw_sp->router.nexthop_group_ht);
        rhashtable_destroy(&mlxsw_sp->router.nexthop_ht);
        __mlxsw_sp_router_fini(mlxsw_sp);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
new file mode 100644 (file)
index 0000000..c3095fe
--- /dev/null
@@ -0,0 +1,58 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017 Arkadi Sharshevsky <arkadis@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _MLXSW_ROUTER_H_
+#define _MLXSW_ROUTER_H_
+
+#include "spectrum.h"
+
+enum mlxsw_sp_rif_counter_dir {
+       MLXSW_SP_RIF_COUNTER_INGRESS,
+       MLXSW_SP_RIF_COUNTER_EGRESS,
+};
+
+u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif);
+int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif);
+int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
+                                  struct mlxsw_sp_rif *rif,
+                                  enum mlxsw_sp_rif_counter_dir dir,
+                                  u64 *cnt);
+void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp,
+                              struct mlxsw_sp_rif *rif,
+                              enum mlxsw_sp_rif_counter_dir dir);
+int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp,
+                              struct mlxsw_sp_rif *rif,
+                              enum mlxsw_sp_rif_counter_dir dir);
+
+#endif /* _MLXSW_ROUTER_H_*/
index 598727d578c16e924ac5b25a98a7d622e02dc06a..05eaa15ad9d5458c9b67c64ad999eac919a4b0d9 100644 (file)
@@ -568,8 +568,8 @@ void mlxsw_sp_fid_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *f)
 
        list_del(&f->list);
 
-       if (f->r)
-               mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->r);
+       if (f->rif)
+               mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->rif);
 
        kfree(f);
 
@@ -1012,7 +1012,7 @@ static int mlxsw_sp_port_smid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 mid,
 
        mlxsw_reg_smid_pack(smid_pl, mid, mlxsw_sp_port->local_port, add);
        if (clear_all_ports) {
-               for (i = 1; i < MLXSW_PORT_MAX_PORTS; i++)
+               for (i = 1; i < mlxsw_core_max_ports(mlxsw_sp->core); i++)
                        if (mlxsw_sp->ports[i])
                                mlxsw_reg_smid_port_mask_set(smid_pl, i, 1);
        }
index ec1e886d4566fb098aefc6e4d82d6f69ea62173b..3b0f72455681663514d4725b50ffebe696ada240 100644 (file)
@@ -1321,7 +1321,7 @@ static void mlxsw_sx_ports_remove(struct mlxsw_sx *mlxsw_sx)
 {
        int i;
 
-       for (i = 1; i < MLXSW_PORT_MAX_PORTS; i++)
+       for (i = 1; i < mlxsw_core_max_ports(mlxsw_sx->core); i++)
                if (mlxsw_sx_port_created(mlxsw_sx, i))
                        mlxsw_sx_port_remove(mlxsw_sx, i);
        kfree(mlxsw_sx->ports);
@@ -1329,17 +1329,18 @@ static void mlxsw_sx_ports_remove(struct mlxsw_sx *mlxsw_sx)
 
 static int mlxsw_sx_ports_create(struct mlxsw_sx *mlxsw_sx)
 {
+       unsigned int max_ports = mlxsw_core_max_ports(mlxsw_sx->core);
        size_t alloc_size;
        u8 module, width;
        int i;
        int err;
 
-       alloc_size = sizeof(struct mlxsw_sx_port *) * MLXSW_PORT_MAX_PORTS;
+       alloc_size = sizeof(struct mlxsw_sx_port *) * max_ports;
        mlxsw_sx->ports = kzalloc(alloc_size, GFP_KERNEL);
        if (!mlxsw_sx->ports)
                return -ENOMEM;
 
-       for (i = 1; i < MLXSW_PORT_MAX_PORTS; i++) {
+       for (i = 1; i < max_ports; i++) {
                err = mlxsw_sx_port_module_info_get(mlxsw_sx, i, &module,
                                                    &width);
                if (err)
index 279ee4612981b0af8d482674c6f1f5525988a6f7..20358f87de57053b6e8a0cdd5078334260e1be6b 100644 (file)
@@ -211,25 +211,6 @@ static void ks8851_wrreg8(struct ks8851_net *ks, unsigned reg, unsigned val)
                netdev_err(ks->netdev, "spi_sync() failed\n");
 }
 
-/**
- * ks8851_rx_1msg - select whether to use one or two messages for spi read
- * @ks: The device structure
- *
- * Return whether to generate a single message with a tx and rx buffer
- * supplied to spi_sync(), or alternatively send the tx and rx buffers
- * as separate messages.
- *
- * Depending on the hardware in use, a single message may be more efficient
- * on interrupts or work done by the driver.
- *
- * This currently always returns true until we add some per-device data passed
- * from the platform code to specify which mode is better.
- */
-static inline bool ks8851_rx_1msg(struct ks8851_net *ks)
-{
-       return true;
-}
-
 /**
  * ks8851_rdreg - issue read register command and return the data
  * @ks: The device state
@@ -251,14 +232,7 @@ static void ks8851_rdreg(struct ks8851_net *ks, unsigned op,
 
        txb[0] = cpu_to_le16(op | KS_SPIOP_RD);
 
-       if (ks8851_rx_1msg(ks)) {
-               msg = &ks->spi_msg1;
-               xfer = &ks->spi_xfer1;
-
-               xfer->tx_buf = txb;
-               xfer->rx_buf = trx;
-               xfer->len = rxl + 2;
-       } else {
+       if (ks->spidev->master->flags & SPI_MASTER_HALF_DUPLEX) {
                msg = &ks->spi_msg2;
                xfer = ks->spi_xfer2;
 
@@ -270,15 +244,22 @@ static void ks8851_rdreg(struct ks8851_net *ks, unsigned op,
                xfer->tx_buf = NULL;
                xfer->rx_buf = trx;
                xfer->len = rxl;
+       } else {
+               msg = &ks->spi_msg1;
+               xfer = &ks->spi_xfer1;
+
+               xfer->tx_buf = txb;
+               xfer->rx_buf = trx;
+               xfer->len = rxl + 2;
        }
 
        ret = spi_sync(ks->spidev, msg);
        if (ret < 0)
                netdev_err(ks->netdev, "read: spi_sync() failed\n");
-       else if (ks8851_rx_1msg(ks))
-               memcpy(rxb, trx + 2, rxl);
-       else
+       else if (ks->spidev->master->flags & SPI_MASTER_HALF_DUPLEX)
                memcpy(rxb, trx, rxl);
+       else
+               memcpy(rxb, trx + 2, rxl);
 }
 
 /**
index 6933afa69df2e28b8ed24ced53d7108ccc308647..4a5d13ef92a4f431eebeaf06bf72134185a736d8 100644 (file)
@@ -6,6 +6,7 @@ nfp-objs := \
            nfpcore/nfp_cpplib.o \
            nfpcore/nfp_hwinfo.o \
            nfpcore/nfp_mip.o \
+           nfpcore/nfp_mutex.o \
            nfpcore/nfp_nffw.o \
            nfpcore/nfp_nsp.o \
            nfpcore/nfp_nsp_eth.o \
index e614a376b595280148494e8a1029fb2328057ca4..8e04aa0e6e87519afceac23982ed759f49e1f909 100644 (file)
 
 #include "nfp_net_ctrl.h"
 
-#define nn_err(nn, fmt, args...)  netdev_err((nn)->netdev, fmt, ## args)
-#define nn_warn(nn, fmt, args...) netdev_warn((nn)->netdev, fmt, ## args)
-#define nn_info(nn, fmt, args...) netdev_info((nn)->netdev, fmt, ## args)
-#define nn_dbg(nn, fmt, args...)  netdev_dbg((nn)->netdev, fmt, ## args)
-#define nn_warn_ratelimit(nn, fmt, args...)                            \
+#define nn_err(nn, fmt, args...)  netdev_err((nn)->dp.netdev, fmt, ## args)
+#define nn_warn(nn, fmt, args...) netdev_warn((nn)->dp.netdev, fmt, ## args)
+#define nn_info(nn, fmt, args...) netdev_info((nn)->dp.netdev, fmt, ## args)
+#define nn_dbg(nn, fmt, args...)  netdev_dbg((nn)->dp.netdev, fmt, ## args)
+#define nn_dp_warn(dp, fmt, args...)                                   \
        do {                                                            \
                if (unlikely(net_ratelimit()))                          \
-                       netdev_warn((nn)->netdev, fmt, ## args);        \
+                       netdev_warn((dp)->netdev, fmt, ## args);        \
        } while (0)
 
 /* Max time to wait for NFP to respond on updates (in seconds) */
 
 /* Forward declarations */
 struct nfp_cpp;
+struct nfp_eth_table_port;
 struct nfp_net;
 struct nfp_net_r_vector;
 
@@ -306,17 +307,13 @@ struct nfp_net_rx_buf {
  * @rd_p:       FL/RX ring read pointer (free running)
  * @idx:        Ring index from Linux's perspective
  * @fl_qcidx:   Queue Controller Peripheral (QCP) queue index for the freelist
- * @rx_qcidx:   Queue Controller Peripheral (QCP) queue index for the RX queue
  * @qcp_fl:     Pointer to base of the QCP freelist queue
- * @qcp_rx:     Pointer to base of the QCP RX queue
  * @wr_ptr_add: Accumulated number of buffers to add to QCP write pointer
  *              (used for free list batching)
  * @rxbufs:     Array of transmitted FL/RX buffers
  * @rxds:       Virtual address of FL/RX ring in host memory
  * @dma:        DMA address of the FL/RX ring
  * @size:       Size, in bytes, of the FL/RX ring (needed to free)
- * @bufsz:     Buffer allocation size for convenience of management routines
- *             (NOTE: this is in second cache line, do not use on fast path!)
  */
 struct nfp_net_rx_ring {
        struct nfp_net_r_vector *r_vec;
@@ -325,20 +322,17 @@ struct nfp_net_rx_ring {
        u32 wr_p;
        u32 rd_p;
 
-       u16 idx;
-       u16 wr_ptr_add;
+       u32 idx;
+       u32 wr_ptr_add;
 
        int fl_qcidx;
-       int rx_qcidx;
        u8 __iomem *qcp_fl;
-       u8 __iomem *qcp_rx;
 
        struct nfp_net_rx_buf *rxbufs;
        struct nfp_net_rx_desc *rxds;
 
        dma_addr_t dma;
        unsigned int size;
-       unsigned int bufsz;
 } ____cacheline_aligned;
 
 /**
@@ -433,19 +427,76 @@ struct nfp_stat_pair {
 };
 
 /**
- * struct nfp_net - NFP network device structure
- * @pdev:               Backpointer to PCI device
- * @netdev:             Backpointer to net_device structure
- * @is_vf:              Is the driver attached to a VF?
+ * struct nfp_net_dp - NFP network device datapath data structure
+ * @dev:               Backpointer to struct device
+ * @netdev:            Backpointer to net_device structure
+ * @is_vf:             Is the driver attached to a VF?
  * @bpf_offload_skip_sw:  Offloaded BPF program will not be rerun by cls_bpf
  * @bpf_offload_xdp:   Offloaded BPF program is XDP
- * @ctrl:               Local copy of the control register/word.
- * @fl_bufsz:           Currently configured size of the freelist buffers
+ * @chained_metadata_format:  Firemware will use new metadata format
+ * @rx_dma_dir:                Mapping direction for RX buffers
+ * @rx_dma_off:                Offset at which DMA packets (for XDP headroom)
  * @rx_offset:         Offset in the RX buffers where packet data starts
+ * @ctrl:              Local copy of the control register/word.
+ * @fl_bufsz:          Currently configured size of the freelist buffers
  * @xdp_prog:          Installed XDP program
- * @fw_ver:             Firmware version
+ * @tx_rings:          Array of pre-allocated TX ring structures
+ * @rx_rings:          Array of pre-allocated RX ring structures
+ * @ctrl_bar:          Pointer to mapped control BAR
+ *
+ * @txd_cnt:           Size of the TX ring in number of descriptors
+ * @rxd_cnt:           Size of the RX ring in number of descriptors
+ * @num_r_vecs:                Number of used ring vectors
+ * @num_tx_rings:      Currently configured number of TX rings
+ * @num_stack_tx_rings:        Number of TX rings used by the stack (not XDP)
+ * @num_rx_rings:      Currently configured number of RX rings
+ * @mtu:               Device MTU
+ */
+struct nfp_net_dp {
+       struct device *dev;
+       struct net_device *netdev;
+
+       u8 is_vf:1;
+       u8 bpf_offload_skip_sw:1;
+       u8 bpf_offload_xdp:1;
+       u8 chained_metadata_format:1;
+
+       u8 rx_dma_dir;
+       u8 rx_dma_off;
+
+       u8 rx_offset;
+
+       u32 ctrl;
+       u32 fl_bufsz;
+
+       struct bpf_prog *xdp_prog;
+
+       struct nfp_net_tx_ring *tx_rings;
+       struct nfp_net_rx_ring *rx_rings;
+
+       u8 __iomem *ctrl_bar;
+
+       /* Cold data follows */
+
+       unsigned int txd_cnt;
+       unsigned int rxd_cnt;
+
+       unsigned int num_r_vecs;
+
+       unsigned int num_tx_rings;
+       unsigned int num_stack_tx_rings;
+       unsigned int num_rx_rings;
+
+       unsigned int mtu;
+};
+
+/**
+ * struct nfp_net - NFP network device structure
+ * @dp:                        Datapath structure
+ * @fw_ver:            Firmware version
  * @cap:                Capabilities advertised by the Firmware
  * @max_mtu:            Maximum support MTU advertised by the Firmware
+ * @rss_hfunc:         RSS selected hash function
  * @rss_cfg:            RSS configuration
  * @rss_key:            RSS secret key
  * @rss_itbl:           RSS indirection table
@@ -454,17 +505,9 @@ struct nfp_stat_pair {
  * @rx_filter_change:  Jiffies when statistics last changed
  * @rx_filter_stats_timer:  Timer for polling filter offload statistics
  * @rx_filter_lock:    Lock protecting timer state changes (teardown)
+ * @max_r_vecs:                Number of allocated interrupt vectors for RX/TX
  * @max_tx_rings:       Maximum number of TX rings supported by the Firmware
  * @max_rx_rings:       Maximum number of RX rings supported by the Firmware
- * @num_tx_rings:       Currently configured number of TX rings
- * @num_stack_tx_rings:        Number of TX rings used by the stack (not XDP)
- * @num_rx_rings:       Currently configured number of RX rings
- * @txd_cnt:            Size of the TX ring in number of descriptors
- * @rxd_cnt:            Size of the RX ring in number of descriptors
- * @tx_rings:           Array of pre-allocated TX ring structures
- * @rx_rings:           Array of pre-allocated RX ring structures
- * @max_r_vecs:                Number of allocated interrupt vectors for RX/TX
- * @num_r_vecs:         Number of used ring vectors
  * @r_vecs:             Pre-allocated array of ring vectors
  * @irq_entries:        Pre-allocated array of MSI-X entries
  * @lsc_handler:        Handler for Link State Change interrupt
@@ -488,36 +531,24 @@ struct nfp_stat_pair {
  * @vxlan_ports:       VXLAN ports for RX inner csum offload communicated to HW
  * @vxlan_usecnt:      IPv4/IPv6 VXLAN port use counts
  * @qcp_cfg:            Pointer to QCP queue used for configuration notification
- * @ctrl_bar:           Pointer to mapped control BAR
  * @tx_bar:             Pointer to mapped TX queues
  * @rx_bar:             Pointer to mapped FL/RX queues
  * @debugfs_dir:       Device directory in debugfs
  * @ethtool_dump_flag: Ethtool dump flag
  * @port_list:         Entry on device port list
+ * @pdev:              Backpointer to PCI device
  * @cpp:               CPP device handle if available
+ * @eth_port:          Translated ETH Table port entry
  */
 struct nfp_net {
-       struct pci_dev *pdev;
-       struct net_device *netdev;
-
-       unsigned is_vf:1;
-       unsigned bpf_offload_skip_sw:1;
-       unsigned bpf_offload_xdp:1;
-
-       u32 ctrl;
-       u32 fl_bufsz;
-
-       u32 rx_offset;
-
-       struct bpf_prog *xdp_prog;
-
-       struct nfp_net_tx_ring *tx_rings;
-       struct nfp_net_rx_ring *rx_rings;
+       struct nfp_net_dp dp;
 
        struct nfp_net_fw_version fw_ver;
+
        u32 cap;
        u32 max_mtu;
 
+       u8 rss_hfunc;
        u32 rss_cfg;
        u8 rss_key[NFP_NET_CFG_RSS_KEY_SZ];
        u8 rss_itbl[NFP_NET_CFG_RSS_ITBL_SZ];
@@ -530,18 +561,10 @@ struct nfp_net {
        unsigned int max_tx_rings;
        unsigned int max_rx_rings;
 
-       unsigned int num_tx_rings;
-       unsigned int num_stack_tx_rings;
-       unsigned int num_rx_rings;
-
        int stride_tx;
        int stride_rx;
 
-       int txd_cnt;
-       int rxd_cnt;
-
        unsigned int max_r_vecs;
-       unsigned int num_r_vecs;
        struct nfp_net_r_vector r_vecs[NFP_NET_MAX_R_VECS];
        struct msix_entry irq_entries[NFP_NET_MAX_IRQS];
 
@@ -575,7 +598,6 @@ struct nfp_net {
 
        u8 __iomem *qcp_cfg;
 
-       u8 __iomem *ctrl_bar;
        u8 __iomem *tx_bar;
        u8 __iomem *rx_bar;
 
@@ -584,14 +606,10 @@ struct nfp_net {
 
        struct list_head port_list;
 
+       struct pci_dev *pdev;
        struct nfp_cpp *cpp;
-};
 
-struct nfp_net_ring_set {
-       unsigned int n_rings;
-       unsigned int mtu;
-       unsigned int dcnt;
-       void *rings;
+       struct nfp_eth_table_port *eth_port;
 };
 
 /* Functions to read/write from/to a BAR
@@ -599,42 +617,42 @@ struct nfp_net_ring_set {
  */
 static inline u16 nn_readb(struct nfp_net *nn, int off)
 {
-       return readb(nn->ctrl_bar + off);
+       return readb(nn->dp.ctrl_bar + off);
 }
 
 static inline void nn_writeb(struct nfp_net *nn, int off, u8 val)
 {
-       writeb(val, nn->ctrl_bar + off);
+       writeb(val, nn->dp.ctrl_bar + off);
 }
 
 static inline u16 nn_readw(struct nfp_net *nn, int off)
 {
-       return readw(nn->ctrl_bar + off);
+       return readw(nn->dp.ctrl_bar + off);
 }
 
 static inline void nn_writew(struct nfp_net *nn, int off, u16 val)
 {
-       writew(val, nn->ctrl_bar + off);
+       writew(val, nn->dp.ctrl_bar + off);
 }
 
 static inline u32 nn_readl(struct nfp_net *nn, int off)
 {
-       return readl(nn->ctrl_bar + off);
+       return readl(nn->dp.ctrl_bar + off);
 }
 
 static inline void nn_writel(struct nfp_net *nn, int off, u32 val)
 {
-       writel(val, nn->ctrl_bar + off);
+       writel(val, nn->dp.ctrl_bar + off);
 }
 
 static inline u64 nn_readq(struct nfp_net *nn, int off)
 {
-       return readq(nn->ctrl_bar + off);
+       return readq(nn->dp.ctrl_bar + off);
 }
 
 static inline void nn_writeq(struct nfp_net *nn, int off, u64 val)
 {
-       writeq(val, nn->ctrl_bar + off);
+       writeq(val, nn->dp.ctrl_bar + off);
 }
 
 /* Flush posted PCI writes by reading something without side effects */
@@ -776,6 +794,7 @@ void nfp_net_netdev_clean(struct net_device *netdev);
 void nfp_net_set_ethtool_ops(struct net_device *netdev);
 void nfp_net_info(struct nfp_net *nn);
 int nfp_net_reconfig(struct nfp_net *nn, u32 update);
+unsigned int nfp_net_rss_key_sz(struct nfp_net *nn);
 void nfp_net_rss_write_itbl(struct nfp_net *nn);
 void nfp_net_rss_write_key(struct nfp_net *nn);
 void nfp_net_coalesce_write_cfg(struct nfp_net *nn);
@@ -787,9 +806,9 @@ void nfp_net_irqs_disable(struct pci_dev *pdev);
 void
 nfp_net_irqs_assign(struct nfp_net *nn, struct msix_entry *irq_entries,
                    unsigned int n);
-int
-nfp_net_ring_reconfig(struct nfp_net *nn, struct bpf_prog **xdp_prog,
-                     struct nfp_net_ring_set *rx, struct nfp_net_ring_set *tx);
+
+struct nfp_net_dp *nfp_net_clone_dp(struct nfp_net *nn);
+int nfp_net_ring_reconfig(struct nfp_net *nn, struct nfp_net_dp *new);
 
 #ifdef CONFIG_NFP_DEBUG
 void nfp_net_debugfs_create(void);
index 9179a99563afa86f4ed7bbcb41b045c2568243de..8f2da128ce0f2554622afc8ba7eb617074a03d8d 100644 (file)
@@ -41,6 +41,7 @@
  *          Chris Telfer <chris.telfer@netronome.com>
  */
 
+#include <linux/bitfield.h>
 #include <linux/bpf.h>
 #include <linux/bpf_trace.h>
 #include <linux/module.h>
@@ -66,6 +67,7 @@
 #include <net/pkt_cls.h>
 #include <net/vxlan.h>
 
+#include "nfpcore/nfp_nsp_eth.h"
 #include "nfp_net_ctrl.h"
 #include "nfp_net.h"
 
@@ -83,20 +85,18 @@ void nfp_net_get_fw_version(struct nfp_net_fw_version *fw_ver,
        put_unaligned_le32(reg, fw_ver);
 }
 
-static dma_addr_t
-nfp_net_dma_map_rx(struct nfp_net *nn, void *frag, unsigned int bufsz,
-                  int direction)
+static dma_addr_t nfp_net_dma_map_rx(struct nfp_net_dp *dp, void *frag)
 {
-       return dma_map_single(&nn->pdev->dev, frag + NFP_NET_RX_BUF_HEADROOM,
-                             bufsz - NFP_NET_RX_BUF_NON_DATA, direction);
+       return dma_map_single(dp->dev, frag + NFP_NET_RX_BUF_HEADROOM,
+                             dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA,
+                             dp->rx_dma_dir);
 }
 
-static void
-nfp_net_dma_unmap_rx(struct nfp_net *nn, dma_addr_t dma_addr,
-                    unsigned int bufsz, int direction)
+static void nfp_net_dma_unmap_rx(struct nfp_net_dp *dp, dma_addr_t dma_addr)
 {
-       dma_unmap_single(&nn->pdev->dev, dma_addr,
-                        bufsz - NFP_NET_RX_BUF_NON_DATA, direction);
+       dma_unmap_single(dp->dev, dma_addr,
+                        dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA,
+                        dp->rx_dma_dir);
 }
 
 /* Firmware reconfig
@@ -327,19 +327,22 @@ void
 nfp_net_irqs_assign(struct nfp_net *nn, struct msix_entry *irq_entries,
                    unsigned int n)
 {
+       struct nfp_net_dp *dp = &nn->dp;
+
        nn->max_r_vecs = n - NFP_NET_NON_Q_VECTORS;
-       nn->num_r_vecs = nn->max_r_vecs;
+       dp->num_r_vecs = nn->max_r_vecs;
 
        memcpy(nn->irq_entries, irq_entries, sizeof(*irq_entries) * n);
 
-       if (nn->num_rx_rings > nn->num_r_vecs ||
-           nn->num_tx_rings > nn->num_r_vecs)
-               nn_warn(nn, "More rings (%d,%d) than vectors (%d).\n",
-                       nn->num_rx_rings, nn->num_tx_rings, nn->num_r_vecs);
+       if (dp->num_rx_rings > dp->num_r_vecs ||
+           dp->num_tx_rings > dp->num_r_vecs)
+               dev_warn(nn->dp.dev, "More rings (%d,%d) than vectors (%d).\n",
+                        dp->num_rx_rings, dp->num_tx_rings,
+                        dp->num_r_vecs);
 
-       nn->num_rx_rings = min(nn->num_r_vecs, nn->num_rx_rings);
-       nn->num_tx_rings = min(nn->num_r_vecs, nn->num_tx_rings);
-       nn->num_stack_tx_rings = nn->num_tx_rings;
+       dp->num_rx_rings = min(dp->num_r_vecs, dp->num_rx_rings);
+       dp->num_tx_rings = min(dp->num_r_vecs, dp->num_tx_rings);
+       dp->num_stack_tx_rings = dp->num_tx_rings;
 }
 
 /**
@@ -394,11 +397,11 @@ static void nfp_net_read_link_status(struct nfp_net *nn)
        nn->link_up = link_up;
 
        if (nn->link_up) {
-               netif_carrier_on(nn->netdev);
-               netdev_info(nn->netdev, "NIC Link is Up\n");
+               netif_carrier_on(nn->dp.netdev);
+               netdev_info(nn->dp.netdev, "NIC Link is Up\n");
        } else {
-               netif_carrier_off(nn->netdev);
-               netdev_info(nn->netdev, "NIC Link is Down\n");
+               netif_carrier_off(nn->dp.netdev);
+               netdev_info(nn->dp.netdev, "NIC Link is Down\n");
        }
 out:
        spin_unlock_irqrestore(&nn->link_status_lock, flags);
@@ -476,10 +479,7 @@ nfp_net_rx_ring_init(struct nfp_net_rx_ring *rx_ring,
        rx_ring->r_vec = r_vec;
 
        rx_ring->fl_qcidx = rx_ring->idx * nn->stride_rx;
-       rx_ring->rx_qcidx = rx_ring->fl_qcidx + (nn->stride_rx - 1);
-
        rx_ring->qcp_fl = nn->rx_bar + NFP_QCP_QUEUE_OFF(rx_ring->fl_qcidx);
-       rx_ring->qcp_rx = nn->rx_bar + NFP_QCP_QUEUE_OFF(rx_ring->rx_qcidx);
 }
 
 /**
@@ -530,7 +530,7 @@ nfp_net_aux_irq_request(struct nfp_net *nn, u32 ctrl_offset,
 
        entry = &nn->irq_entries[vector_idx];
 
-       snprintf(name, name_sz, format, netdev_name(nn->netdev));
+       snprintf(name, name_sz, format, netdev_name(nn->dp.netdev));
        err = request_irq(entry->vector, handler, 0, name, nn);
        if (err) {
                nn_err(nn, "Failed to request IRQ %d (err=%d).\n",
@@ -617,7 +617,6 @@ static void nfp_net_tx_ring_stop(struct netdev_queue *nd_q,
 
 /**
  * nfp_net_tx_tso() - Set up Tx descriptor for LSO
- * @nn:  NFP Net device
  * @r_vec: per-ring structure
  * @txbuf: Pointer to driver soft TX descriptor
  * @txd: Pointer to HW TX descriptor
@@ -626,7 +625,7 @@ static void nfp_net_tx_ring_stop(struct netdev_queue *nd_q,
  * Set up Tx descriptor for LSO, do nothing for non-LSO skbs.
  * Return error on packet header greater than maximum supported LSO header size.
  */
-static void nfp_net_tx_tso(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
+static void nfp_net_tx_tso(struct nfp_net_r_vector *r_vec,
                           struct nfp_net_tx_buf *txbuf,
                           struct nfp_net_tx_desc *txd, struct sk_buff *skb)
 {
@@ -657,7 +656,7 @@ static void nfp_net_tx_tso(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
 
 /**
  * nfp_net_tx_csum() - Set TX CSUM offload flags in TX descriptor
- * @nn:  NFP Net device
+ * @dp:  NFP Net data path struct
  * @r_vec: per-ring structure
  * @txbuf: Pointer to driver soft TX descriptor
  * @txd: Pointer to TX descriptor
@@ -666,7 +665,8 @@ static void nfp_net_tx_tso(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
  * This function sets the TX checksum flags in the TX descriptor based
  * on the configuration and the protocol of the packet to be transmitted.
  */
-static void nfp_net_tx_csum(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
+static void nfp_net_tx_csum(struct nfp_net_dp *dp,
+                           struct nfp_net_r_vector *r_vec,
                            struct nfp_net_tx_buf *txbuf,
                            struct nfp_net_tx_desc *txd, struct sk_buff *skb)
 {
@@ -674,7 +674,7 @@ static void nfp_net_tx_csum(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
        struct iphdr *iph;
        u8 l4_hdr;
 
-       if (!(nn->ctrl & NFP_NET_CFG_CTRL_TXCSUM))
+       if (!(dp->ctrl & NFP_NET_CFG_CTRL_TXCSUM))
                return;
 
        if (skb->ip_summed != CHECKSUM_PARTIAL)
@@ -693,8 +693,7 @@ static void nfp_net_tx_csum(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
        } else if (ipv6h->version == 6) {
                l4_hdr = ipv6h->nexthdr;
        } else {
-               nn_warn_ratelimit(nn, "partial checksum but ipv=%x!\n",
-                                 iph->version);
+               nn_dp_warn(dp, "partial checksum but ipv=%x!\n", iph->version);
                return;
        }
 
@@ -706,8 +705,7 @@ static void nfp_net_tx_csum(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
                txd->flags |= PCIE_DESC_TX_UDP_CSUM;
                break;
        default:
-               nn_warn_ratelimit(nn, "partial checksum but l4 proto=%x!\n",
-                                 l4_hdr);
+               nn_dp_warn(dp, "partial checksum but l4 proto=%x!\n", l4_hdr);
                return;
        }
 
@@ -737,28 +735,31 @@ static int nfp_net_tx(struct sk_buff *skb, struct net_device *netdev)
 {
        struct nfp_net *nn = netdev_priv(netdev);
        const struct skb_frag_struct *frag;
-       struct nfp_net_r_vector *r_vec;
        struct nfp_net_tx_desc *txd, txdg;
-       struct nfp_net_tx_buf *txbuf;
        struct nfp_net_tx_ring *tx_ring;
+       struct nfp_net_r_vector *r_vec;
+       struct nfp_net_tx_buf *txbuf;
        struct netdev_queue *nd_q;
+       struct nfp_net_dp *dp;
        dma_addr_t dma_addr;
        unsigned int fsize;
        int f, nr_frags;
        int wr_idx;
        u16 qidx;
 
+       dp = &nn->dp;
        qidx = skb_get_queue_mapping(skb);
-       tx_ring = &nn->tx_rings[qidx];
+       tx_ring = &dp->tx_rings[qidx];
        r_vec = tx_ring->r_vec;
-       nd_q = netdev_get_tx_queue(nn->netdev, qidx);
+       nd_q = netdev_get_tx_queue(dp->netdev, qidx);
 
        nr_frags = skb_shinfo(skb)->nr_frags;
 
        if (unlikely(nfp_net_tx_full(tx_ring, nr_frags + 1))) {
-               nn_warn_ratelimit(nn, "TX ring %d busy. wrp=%u rdp=%u\n",
-                                 qidx, tx_ring->wr_p, tx_ring->rd_p);
+               nn_dp_warn(dp, "TX ring %d busy. wrp=%u rdp=%u\n",
+                          qidx, tx_ring->wr_p, tx_ring->rd_p);
                netif_tx_stop_queue(nd_q);
+               nfp_net_tx_xmit_more_flush(tx_ring);
                u64_stats_update_begin(&r_vec->tx_sync);
                r_vec->tx_busy++;
                u64_stats_update_end(&r_vec->tx_sync);
@@ -766,9 +767,9 @@ static int nfp_net_tx(struct sk_buff *skb, struct net_device *netdev)
        }
 
        /* Start with the head skbuf */
-       dma_addr = dma_map_single(&nn->pdev->dev, skb->data, skb_headlen(skb),
+       dma_addr = dma_map_single(dp->dev, skb->data, skb_headlen(skb),
                                  DMA_TO_DEVICE);
-       if (dma_mapping_error(&nn->pdev->dev, dma_addr))
+       if (dma_mapping_error(dp->dev, dma_addr))
                goto err_free;
 
        wr_idx = tx_ring->wr_p & (tx_ring->cnt - 1);
@@ -792,11 +793,11 @@ static int nfp_net_tx(struct sk_buff *skb, struct net_device *netdev)
        txd->mss = 0;
        txd->l4_offset = 0;
 
-       nfp_net_tx_tso(nn, r_vec, txbuf, txd, skb);
+       nfp_net_tx_tso(r_vec, txbuf, txd, skb);
 
-       nfp_net_tx_csum(nn, r_vec, txbuf, txd, skb);
+       nfp_net_tx_csum(dp, r_vec, txbuf, txd, skb);
 
-       if (skb_vlan_tag_present(skb) && nn->ctrl & NFP_NET_CFG_CTRL_TXVLAN) {
+       if (skb_vlan_tag_present(skb) && dp->ctrl & NFP_NET_CFG_CTRL_TXVLAN) {
                txd->flags |= PCIE_DESC_TX_VLAN;
                txd->vlan = cpu_to_le16(skb_vlan_tag_get(skb));
        }
@@ -810,9 +811,9 @@ static int nfp_net_tx(struct sk_buff *skb, struct net_device *netdev)
                        frag = &skb_shinfo(skb)->frags[f];
                        fsize = skb_frag_size(frag);
 
-                       dma_addr = skb_frag_dma_map(&nn->pdev->dev, frag, 0,
+                       dma_addr = skb_frag_dma_map(dp->dev, frag, 0,
                                                    fsize, DMA_TO_DEVICE);
-                       if (dma_mapping_error(&nn->pdev->dev, dma_addr))
+                       if (dma_mapping_error(dp->dev, dma_addr))
                                goto err_unmap;
 
                        wr_idx = (wr_idx + 1) & (tx_ring->cnt - 1);
@@ -851,8 +852,7 @@ err_unmap:
        --f;
        while (f >= 0) {
                frag = &skb_shinfo(skb)->frags[f];
-               dma_unmap_page(&nn->pdev->dev,
-                              tx_ring->txbufs[wr_idx].dma_addr,
+               dma_unmap_page(dp->dev, tx_ring->txbufs[wr_idx].dma_addr,
                               skb_frag_size(frag), DMA_TO_DEVICE);
                tx_ring->txbufs[wr_idx].skb = NULL;
                tx_ring->txbufs[wr_idx].dma_addr = 0;
@@ -861,13 +861,14 @@ err_unmap:
                if (wr_idx < 0)
                        wr_idx += tx_ring->cnt;
        }
-       dma_unmap_single(&nn->pdev->dev, tx_ring->txbufs[wr_idx].dma_addr,
+       dma_unmap_single(dp->dev, tx_ring->txbufs[wr_idx].dma_addr,
                         skb_headlen(skb), DMA_TO_DEVICE);
        tx_ring->txbufs[wr_idx].skb = NULL;
        tx_ring->txbufs[wr_idx].dma_addr = 0;
        tx_ring->txbufs[wr_idx].fidx = -2;
 err_free:
-       nn_warn_ratelimit(nn, "Failed to map DMA TX buffer\n");
+       nn_dp_warn(dp, "Failed to map DMA TX buffer\n");
+       nfp_net_tx_xmit_more_flush(tx_ring);
        u64_stats_update_begin(&r_vec->tx_sync);
        r_vec->tx_errors++;
        u64_stats_update_end(&r_vec->tx_sync);
@@ -884,7 +885,7 @@ err_free:
 static void nfp_net_tx_complete(struct nfp_net_tx_ring *tx_ring)
 {
        struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
-       struct nfp_net *nn = r_vec->nfp_net;
+       struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
        const struct skb_frag_struct *frag;
        struct netdev_queue *nd_q;
        u32 done_pkts = 0, done_bytes = 0;
@@ -918,8 +919,7 @@ static void nfp_net_tx_complete(struct nfp_net_tx_ring *tx_ring)
 
                if (fidx == -1) {
                        /* unmap head */
-                       dma_unmap_single(&nn->pdev->dev,
-                                        tx_ring->txbufs[idx].dma_addr,
+                       dma_unmap_single(dp->dev, tx_ring->txbufs[idx].dma_addr,
                                         skb_headlen(skb), DMA_TO_DEVICE);
 
                        done_pkts += tx_ring->txbufs[idx].pkt_cnt;
@@ -927,8 +927,7 @@ static void nfp_net_tx_complete(struct nfp_net_tx_ring *tx_ring)
                } else {
                        /* unmap fragment */
                        frag = &skb_shinfo(skb)->frags[fidx];
-                       dma_unmap_page(&nn->pdev->dev,
-                                      tx_ring->txbufs[idx].dma_addr,
+                       dma_unmap_page(dp->dev, tx_ring->txbufs[idx].dma_addr,
                                       skb_frag_size(frag), DMA_TO_DEVICE);
                }
 
@@ -948,7 +947,7 @@ static void nfp_net_tx_complete(struct nfp_net_tx_ring *tx_ring)
        r_vec->tx_pkts += done_pkts;
        u64_stats_update_end(&r_vec->tx_sync);
 
-       nd_q = netdev_get_tx_queue(nn->netdev, tx_ring->idx);
+       nd_q = netdev_get_tx_queue(dp->netdev, tx_ring->idx);
        netdev_tx_completed_queue(nd_q, done_pkts, done_bytes);
        if (nfp_net_tx_ring_should_wake(tx_ring)) {
                /* Make sure TX thread will see updated tx_ring->rd_p */
@@ -966,7 +965,7 @@ static void nfp_net_tx_complete(struct nfp_net_tx_ring *tx_ring)
 static void nfp_net_xdp_complete(struct nfp_net_tx_ring *tx_ring)
 {
        struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
-       struct nfp_net *nn = r_vec->nfp_net;
+       struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
        u32 done_pkts = 0, done_bytes = 0;
        int idx, todo;
        u32 qcp_rd_p;
@@ -989,8 +988,7 @@ static void nfp_net_xdp_complete(struct nfp_net_tx_ring *tx_ring)
                if (!tx_ring->txbufs[idx].frag)
                        continue;
 
-               nfp_net_dma_unmap_rx(nn, tx_ring->txbufs[idx].dma_addr,
-                                    nn->fl_bufsz, DMA_BIDIRECTIONAL);
+               nfp_net_dma_unmap_rx(dp, tx_ring->txbufs[idx].dma_addr);
                __free_page(virt_to_page(tx_ring->txbufs[idx].frag));
 
                done_pkts++;
@@ -1015,17 +1013,16 @@ static void nfp_net_xdp_complete(struct nfp_net_tx_ring *tx_ring)
 
 /**
  * nfp_net_tx_ring_reset() - Free any untransmitted buffers and reset pointers
- * @nn:                NFP Net device
+ * @dp:                NFP Net data path struct
  * @tx_ring:   TX ring structure
  *
  * Assumes that the device is stopped
  */
 static void
-nfp_net_tx_ring_reset(struct nfp_net *nn, struct nfp_net_tx_ring *tx_ring)
+nfp_net_tx_ring_reset(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring)
 {
        struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
        const struct skb_frag_struct *frag;
-       struct pci_dev *pdev = nn->pdev;
        struct netdev_queue *nd_q;
 
        while (tx_ring->rd_p != tx_ring->wr_p) {
@@ -1036,8 +1033,7 @@ nfp_net_tx_ring_reset(struct nfp_net *nn, struct nfp_net_tx_ring *tx_ring)
                tx_buf = &tx_ring->txbufs[idx];
 
                if (tx_ring == r_vec->xdp_ring) {
-                       nfp_net_dma_unmap_rx(nn, tx_buf->dma_addr,
-                                            nn->fl_bufsz, DMA_BIDIRECTIONAL);
+                       nfp_net_dma_unmap_rx(dp, tx_buf->dma_addr);
                        __free_page(virt_to_page(tx_ring->txbufs[idx].frag));
                } else {
                        struct sk_buff *skb = tx_ring->txbufs[idx].skb;
@@ -1045,13 +1041,13 @@ nfp_net_tx_ring_reset(struct nfp_net *nn, struct nfp_net_tx_ring *tx_ring)
 
                        if (tx_buf->fidx == -1) {
                                /* unmap head */
-                               dma_unmap_single(&pdev->dev, tx_buf->dma_addr,
+                               dma_unmap_single(dp->dev, tx_buf->dma_addr,
                                                 skb_headlen(skb),
                                                 DMA_TO_DEVICE);
                        } else {
                                /* unmap fragment */
                                frag = &skb_shinfo(skb)->frags[tx_buf->fidx];
-                               dma_unmap_page(&pdev->dev, tx_buf->dma_addr,
+                               dma_unmap_page(dp->dev, tx_buf->dma_addr,
                                               skb_frag_size(frag),
                                               DMA_TO_DEVICE);
                        }
@@ -1078,7 +1074,7 @@ nfp_net_tx_ring_reset(struct nfp_net *nn, struct nfp_net_tx_ring *tx_ring)
        if (tx_ring == r_vec->xdp_ring)
                return;
 
-       nd_q = netdev_get_tx_queue(nn->netdev, tx_ring->idx);
+       nd_q = netdev_get_tx_queue(dp->netdev, tx_ring->idx);
        netdev_tx_reset_queue(nd_q);
 }
 
@@ -1087,7 +1083,7 @@ static void nfp_net_tx_timeout(struct net_device *netdev)
        struct nfp_net *nn = netdev_priv(netdev);
        int i;
 
-       for (i = 0; i < nn->netdev->real_num_tx_queues; i++) {
+       for (i = 0; i < nn->dp.netdev->real_num_tx_queues; i++) {
                if (!netif_tx_queue_stopped(netdev_get_tx_queue(netdev, i)))
                        continue;
                nn_warn(nn, "TX timeout on ring: %d\n", i);
@@ -1098,16 +1094,17 @@ static void nfp_net_tx_timeout(struct net_device *netdev)
 /* Receive processing
  */
 static unsigned int
-nfp_net_calc_fl_bufsz(struct nfp_net *nn, unsigned int mtu)
+nfp_net_calc_fl_bufsz(struct nfp_net_dp *dp)
 {
        unsigned int fl_bufsz;
 
        fl_bufsz = NFP_NET_RX_BUF_HEADROOM;
-       if (nn->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC)
+       fl_bufsz += dp->rx_dma_off;
+       if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC)
                fl_bufsz += NFP_NET_MAX_PREPEND;
        else
-               fl_bufsz += nn->rx_offset;
-       fl_bufsz += ETH_HLEN + VLAN_HLEN * 2 + mtu;
+               fl_bufsz += dp->rx_offset;
+       fl_bufsz += ETH_HLEN + VLAN_HLEN * 2 + dp->mtu;
 
        fl_bufsz = SKB_DATA_ALIGN(fl_bufsz);
        fl_bufsz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
@@ -1126,62 +1123,56 @@ nfp_net_free_frag(void *frag, bool xdp)
 
 /**
  * nfp_net_rx_alloc_one() - Allocate and map page frag for RX
+ * @dp:                NFP Net data path struct
  * @rx_ring:   RX ring structure of the skb
  * @dma_addr:  Pointer to storage for DMA address (output param)
- * @fl_bufsz:  size of freelist buffers
- * @xdp:       Whether XDP is enabled
  *
  * This function will allcate a new page frag, map it for DMA.
  *
  * Return: allocated page frag or NULL on failure.
  */
 static void *
-nfp_net_rx_alloc_one(struct nfp_net_rx_ring *rx_ring, dma_addr_t *dma_addr,
-                    unsigned int fl_bufsz, bool xdp)
+nfp_net_rx_alloc_one(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring,
+                    dma_addr_t *dma_addr)
 {
-       struct nfp_net *nn = rx_ring->r_vec->nfp_net;
-       int direction;
        void *frag;
 
-       if (!xdp)
-               frag = netdev_alloc_frag(fl_bufsz);
+       if (!dp->xdp_prog)
+               frag = netdev_alloc_frag(dp->fl_bufsz);
        else
                frag = page_address(alloc_page(GFP_KERNEL | __GFP_COLD));
        if (!frag) {
-               nn_warn_ratelimit(nn, "Failed to alloc receive page frag\n");
+               nn_dp_warn(dp, "Failed to alloc receive page frag\n");
                return NULL;
        }
 
-       direction = xdp ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
-
-       *dma_addr = nfp_net_dma_map_rx(nn, frag, fl_bufsz, direction);
-       if (dma_mapping_error(&nn->pdev->dev, *dma_addr)) {
-               nfp_net_free_frag(frag, xdp);
-               nn_warn_ratelimit(nn, "Failed to map DMA RX buffer\n");
+       *dma_addr = nfp_net_dma_map_rx(dp, frag);
+       if (dma_mapping_error(dp->dev, *dma_addr)) {
+               nfp_net_free_frag(frag, dp->xdp_prog);
+               nn_dp_warn(dp, "Failed to map DMA RX buffer\n");
                return NULL;
        }
 
        return frag;
 }
 
-static void *
-nfp_net_napi_alloc_one(struct nfp_net *nn, int direction, dma_addr_t *dma_addr)
+static void *nfp_net_napi_alloc_one(struct nfp_net_dp *dp, dma_addr_t *dma_addr)
 {
        void *frag;
 
-       if (!nn->xdp_prog)
-               frag = napi_alloc_frag(nn->fl_bufsz);
+       if (!dp->xdp_prog)
+               frag = napi_alloc_frag(dp->fl_bufsz);
        else
                frag = page_address(alloc_page(GFP_ATOMIC | __GFP_COLD));
        if (!frag) {
-               nn_warn_ratelimit(nn, "Failed to alloc receive page frag\n");
+               nn_dp_warn(dp, "Failed to alloc receive page frag\n");
                return NULL;
        }
 
-       *dma_addr = nfp_net_dma_map_rx(nn, frag, nn->fl_bufsz, direction);
-       if (dma_mapping_error(&nn->pdev->dev, *dma_addr)) {
-               nfp_net_free_frag(frag, nn->xdp_prog);
-               nn_warn_ratelimit(nn, "Failed to map DMA RX buffer\n");
+       *dma_addr = nfp_net_dma_map_rx(dp, frag);
+       if (dma_mapping_error(dp->dev, *dma_addr)) {
+               nfp_net_free_frag(frag, dp->xdp_prog);
+               nn_dp_warn(dp, "Failed to map DMA RX buffer\n");
                return NULL;
        }
 
@@ -1190,11 +1181,13 @@ nfp_net_napi_alloc_one(struct nfp_net *nn, int direction, dma_addr_t *dma_addr)
 
 /**
  * nfp_net_rx_give_one() - Put mapped skb on the software and hardware rings
+ * @dp:                NFP Net data path struct
  * @rx_ring:   RX ring structure
  * @frag:      page fragment buffer
  * @dma_addr:  DMA address of skb mapping
  */
-static void nfp_net_rx_give_one(struct nfp_net_rx_ring *rx_ring,
+static void nfp_net_rx_give_one(const struct nfp_net_dp *dp,
+                               struct nfp_net_rx_ring *rx_ring,
                                void *frag, dma_addr_t dma_addr)
 {
        unsigned int wr_idx;
@@ -1208,7 +1201,8 @@ static void nfp_net_rx_give_one(struct nfp_net_rx_ring *rx_ring,
        /* Fill freelist descriptor */
        rx_ring->rxds[wr_idx].fld.reserved = 0;
        rx_ring->rxds[wr_idx].fld.meta_len_dd = 0;
-       nfp_desc_set_dma_addr(&rx_ring->rxds[wr_idx].fld, dma_addr);
+       nfp_desc_set_dma_addr(&rx_ring->rxds[wr_idx].fld,
+                             dma_addr + dp->rx_dma_off);
 
        rx_ring->wr_p++;
        rx_ring->wr_ptr_add++;
@@ -1249,19 +1243,17 @@ static void nfp_net_rx_ring_reset(struct nfp_net_rx_ring *rx_ring)
 
 /**
  * nfp_net_rx_ring_bufs_free() - Free any buffers currently on the RX ring
- * @nn:                NFP Net device
+ * @dp:                NFP Net data path struct
  * @rx_ring:   RX ring to remove buffers from
- * @xdp:       Whether XDP is enabled
  *
  * Assumes that the device is stopped and buffers are in [0, ring->cnt - 1)
  * entries.  After device is disabled nfp_net_rx_ring_reset() must be called
  * to restore required ring geometry.
  */
 static void
-nfp_net_rx_ring_bufs_free(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring,
-                         bool xdp)
+nfp_net_rx_ring_bufs_free(struct nfp_net_dp *dp,
+                         struct nfp_net_rx_ring *rx_ring)
 {
-       int direction = xdp ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
        unsigned int i;
 
        for (i = 0; i < rx_ring->cnt - 1; i++) {
@@ -1272,9 +1264,8 @@ nfp_net_rx_ring_bufs_free(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring,
                if (!rx_ring->rxbufs[i].frag)
                        continue;
 
-               nfp_net_dma_unmap_rx(nn, rx_ring->rxbufs[i].dma_addr,
-                                    rx_ring->bufsz, direction);
-               nfp_net_free_frag(rx_ring->rxbufs[i].frag, xdp);
+               nfp_net_dma_unmap_rx(dp, rx_ring->rxbufs[i].dma_addr);
+               nfp_net_free_frag(rx_ring->rxbufs[i].frag, dp->xdp_prog);
                rx_ring->rxbufs[i].dma_addr = 0;
                rx_ring->rxbufs[i].frag = NULL;
        }
@@ -1282,13 +1273,12 @@ nfp_net_rx_ring_bufs_free(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring,
 
 /**
  * nfp_net_rx_ring_bufs_alloc() - Fill RX ring with buffers (don't give to FW)
- * @nn:                NFP Net device
+ * @dp:                NFP Net data path struct
  * @rx_ring:   RX ring to remove buffers from
- * @xdp:       Whether XDP is enabled
  */
 static int
-nfp_net_rx_ring_bufs_alloc(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring,
-                          bool xdp)
+nfp_net_rx_ring_bufs_alloc(struct nfp_net_dp *dp,
+                          struct nfp_net_rx_ring *rx_ring)
 {
        struct nfp_net_rx_buf *rxbufs;
        unsigned int i;
@@ -1297,10 +1287,9 @@ nfp_net_rx_ring_bufs_alloc(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring,
 
        for (i = 0; i < rx_ring->cnt - 1; i++) {
                rxbufs[i].frag =
-                       nfp_net_rx_alloc_one(rx_ring, &rxbufs[i].dma_addr,
-                                            rx_ring->bufsz, xdp);
+                       nfp_net_rx_alloc_one(dp, rx_ring, &rxbufs[i].dma_addr);
                if (!rxbufs[i].frag) {
-                       nfp_net_rx_ring_bufs_free(nn, rx_ring, xdp);
+                       nfp_net_rx_ring_bufs_free(dp, rx_ring);
                        return -ENOMEM;
                }
        }
@@ -1310,14 +1299,17 @@ nfp_net_rx_ring_bufs_alloc(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring,
 
 /**
  * nfp_net_rx_ring_fill_freelist() - Give buffers from the ring to FW
+ * @dp:             NFP Net data path struct
  * @rx_ring: RX ring to fill
  */
-static void nfp_net_rx_ring_fill_freelist(struct nfp_net_rx_ring *rx_ring)
+static void
+nfp_net_rx_ring_fill_freelist(struct nfp_net_dp *dp,
+                             struct nfp_net_rx_ring *rx_ring)
 {
        unsigned int i;
 
        for (i = 0; i < rx_ring->cnt - 1; i++)
-               nfp_net_rx_give_one(rx_ring, rx_ring->rxbufs[i].frag,
+               nfp_net_rx_give_one(dp, rx_ring, rx_ring->rxbufs[i].frag,
                                    rx_ring->rxbufs[i].dma_addr);
 }
 
@@ -1337,17 +1329,18 @@ static int nfp_net_rx_csum_has_errors(u16 flags)
 
 /**
  * nfp_net_rx_csum() - set SKB checksum field based on RX descriptor flags
- * @nn:  NFP Net device
+ * @dp:  NFP Net data path struct
  * @r_vec: per-ring structure
  * @rxd: Pointer to RX descriptor
  * @skb: Pointer to SKB
  */
-static void nfp_net_rx_csum(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
+static void nfp_net_rx_csum(struct nfp_net_dp *dp,
+                           struct nfp_net_r_vector *r_vec,
                            struct nfp_net_rx_desc *rxd, struct sk_buff *skb)
 {
        skb_checksum_none_assert(skb);
 
-       if (!(nn->netdev->features & NETIF_F_RXCSUM))
+       if (!(dp->netdev->features & NETIF_F_RXCSUM))
                return;
 
        if (nfp_net_rx_csum_has_errors(le16_to_cpu(rxd->rxd.flags))) {
@@ -1398,24 +1391,21 @@ static void nfp_net_set_hash(struct net_device *netdev, struct sk_buff *skb,
 
 static void
 nfp_net_set_hash_desc(struct net_device *netdev, struct sk_buff *skb,
-                     struct nfp_net_rx_desc *rxd)
+                     void *data, struct nfp_net_rx_desc *rxd)
 {
-       struct nfp_net_rx_hash *rx_hash;
+       struct nfp_net_rx_hash *rx_hash = data;
 
        if (!(rxd->rxd.flags & PCIE_DESC_RX_RSS))
                return;
 
-       rx_hash = (struct nfp_net_rx_hash *)(skb->data - sizeof(*rx_hash));
-
        nfp_net_set_hash(netdev, skb, get_unaligned_be32(&rx_hash->hash_type),
                         &rx_hash->hash);
 }
 
 static void *
 nfp_net_parse_meta(struct net_device *netdev, struct sk_buff *skb,
-                  int meta_len)
+                  void *data, int meta_len)
 {
-       u8 *data = skb->data - meta_len;
        u32 meta_info;
 
        meta_info = get_unaligned_be32(data);
@@ -1445,8 +1435,9 @@ nfp_net_parse_meta(struct net_device *netdev, struct sk_buff *skb,
 }
 
 static void
-nfp_net_rx_drop(struct nfp_net_r_vector *r_vec, struct nfp_net_rx_ring *rx_ring,
-               struct nfp_net_rx_buf *rxbuf, struct sk_buff *skb)
+nfp_net_rx_drop(const struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec,
+               struct nfp_net_rx_ring *rx_ring, struct nfp_net_rx_buf *rxbuf,
+               struct sk_buff *skb)
 {
        u64_stats_update_begin(&r_vec->rx_sync);
        r_vec->rx_drops++;
@@ -1458,15 +1449,15 @@ nfp_net_rx_drop(struct nfp_net_r_vector *r_vec, struct nfp_net_rx_ring *rx_ring,
        if (skb && rxbuf && skb->head == rxbuf->frag)
                page_ref_inc(virt_to_head_page(rxbuf->frag));
        if (rxbuf)
-               nfp_net_rx_give_one(rx_ring, rxbuf->frag, rxbuf->dma_addr);
+               nfp_net_rx_give_one(dp, rx_ring, rxbuf->frag, rxbuf->dma_addr);
        if (skb)
                dev_kfree_skb_any(skb);
 }
 
 static bool
-nfp_net_tx_xdp_buf(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring,
+nfp_net_tx_xdp_buf(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring,
                   struct nfp_net_tx_ring *tx_ring,
-                  struct nfp_net_rx_buf *rxbuf, unsigned int pkt_off,
+                  struct nfp_net_rx_buf *rxbuf, unsigned int dma_off,
                   unsigned int pkt_len)
 {
        struct nfp_net_tx_buf *txbuf;
@@ -1476,16 +1467,16 @@ nfp_net_tx_xdp_buf(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring,
        int wr_idx;
 
        if (unlikely(nfp_net_tx_full(tx_ring, 1))) {
-               nfp_net_rx_drop(rx_ring->r_vec, rx_ring, rxbuf, NULL);
+               nfp_net_rx_drop(dp, rx_ring->r_vec, rx_ring, rxbuf, NULL);
                return false;
        }
 
-       new_frag = nfp_net_napi_alloc_one(nn, DMA_BIDIRECTIONAL, &new_dma_addr);
+       new_frag = nfp_net_napi_alloc_one(dp, &new_dma_addr);
        if (unlikely(!new_frag)) {
-               nfp_net_rx_drop(rx_ring->r_vec, rx_ring, rxbuf, NULL);
+               nfp_net_rx_drop(dp, rx_ring->r_vec, rx_ring, rxbuf, NULL);
                return false;
        }
-       nfp_net_rx_give_one(rx_ring, new_frag, new_dma_addr);
+       nfp_net_rx_give_one(dp, rx_ring, new_frag, new_dma_addr);
 
        wr_idx = tx_ring->wr_p & (tx_ring->cnt - 1);
 
@@ -1497,14 +1488,14 @@ nfp_net_tx_xdp_buf(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring,
        txbuf->pkt_cnt = 1;
        txbuf->real_len = pkt_len;
 
-       dma_sync_single_for_device(&nn->pdev->dev, rxbuf->dma_addr + pkt_off,
+       dma_sync_single_for_device(dp->dev, rxbuf->dma_addr + dma_off,
                                   pkt_len, DMA_BIDIRECTIONAL);
 
        /* Build TX descriptor */
        txd = &tx_ring->txds[wr_idx];
        txd->offset_eop = PCIE_DESC_TX_EOP;
        txd->dma_len = cpu_to_le16(pkt_len);
-       nfp_desc_set_dma_addr(txd, rxbuf->dma_addr + pkt_off);
+       nfp_desc_set_dma_addr(txd, rxbuf->dma_addr + dma_off);
        txd->data_len = cpu_to_le16(pkt_len);
 
        txd->flags = 0;
@@ -1516,14 +1507,24 @@ nfp_net_tx_xdp_buf(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring,
        return true;
 }
 
-static int nfp_net_run_xdp(struct bpf_prog *prog, void *data, unsigned int len)
+static int nfp_net_run_xdp(struct bpf_prog *prog, void *data, void *hard_start,
+                          unsigned int *off, unsigned int *len)
 {
        struct xdp_buff xdp;
+       void *orig_data;
+       int ret;
+
+       xdp.data_hard_start = hard_start;
+       xdp.data = data + *off;
+       xdp.data_end = data + *off + *len;
+
+       orig_data = xdp.data;
+       ret = bpf_prog_run_xdp(prog, &xdp);
 
-       xdp.data = data;
-       xdp.data_end = data + len;
+       *len -= xdp.data - orig_data;
+       *off += xdp.data - orig_data;
 
-       return bpf_prog_run_xdp(prog, &xdp);
+       return ret;
 }
 
 /**
@@ -1540,27 +1541,27 @@ static int nfp_net_run_xdp(struct bpf_prog *prog, void *data, unsigned int len)
 static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
 {
        struct nfp_net_r_vector *r_vec = rx_ring->r_vec;
-       struct nfp_net *nn = r_vec->nfp_net;
+       struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
        struct nfp_net_tx_ring *tx_ring;
        struct bpf_prog *xdp_prog;
        unsigned int true_bufsz;
        struct sk_buff *skb;
        int pkts_polled = 0;
-       int rx_dma_map_dir;
        int idx;
 
        rcu_read_lock();
-       xdp_prog = READ_ONCE(nn->xdp_prog);
-       rx_dma_map_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
-       true_bufsz = xdp_prog ? PAGE_SIZE : nn->fl_bufsz;
+       xdp_prog = READ_ONCE(dp->xdp_prog);
+       true_bufsz = xdp_prog ? PAGE_SIZE : dp->fl_bufsz;
        tx_ring = r_vec->xdp_ring;
 
        while (pkts_polled < budget) {
-               unsigned int meta_len, data_len, data_off, pkt_len, pkt_off;
+               unsigned int meta_len, data_len, data_off, pkt_len;
+               u8 meta_prepend[NFP_NET_MAX_PREPEND];
                struct nfp_net_rx_buf *rxbuf;
                struct nfp_net_rx_desc *rxd;
                dma_addr_t new_dma_addr;
                void *new_frag;
+               u8 *meta;
 
                idx = rx_ring->rd_p & (rx_ring->cnt - 1);
 
@@ -1593,11 +1594,11 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
                data_len = le16_to_cpu(rxd->rxd.data_len);
                pkt_len = data_len - meta_len;
 
-               if (nn->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC)
-                       pkt_off = meta_len;
+               if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC)
+                       data_off = NFP_NET_RX_BUF_HEADROOM + meta_len;
                else
-                       pkt_off = nn->rx_offset;
-               data_off = NFP_NET_RX_BUF_HEADROOM + pkt_off;
+                       data_off = NFP_NET_RX_BUF_HEADROOM + dp->rx_offset;
+               data_off += dp->rx_dma_off;
 
                /* Stats update */
                u64_stats_update_begin(&r_vec->rx_sync);
@@ -1605,30 +1606,55 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
                r_vec->rx_bytes += pkt_len;
                u64_stats_update_end(&r_vec->rx_sync);
 
+               /* Pointer to start of metadata */
+               meta = rxbuf->frag + data_off - meta_len;
+
+               if (unlikely(meta_len > NFP_NET_MAX_PREPEND ||
+                            (dp->rx_offset && meta_len > dp->rx_offset))) {
+                       nn_dp_warn(dp, "oversized RX packet metadata %u\n",
+                                  meta_len);
+                       nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL);
+                       continue;
+               }
+
                if (xdp_prog && !(rxd->rxd.flags & PCIE_DESC_RX_BPF &&
-                                 nn->bpf_offload_xdp)) {
+                                 dp->bpf_offload_xdp)) {
+                       unsigned int dma_off;
+                       void *hard_start;
                        int act;
 
-                       dma_sync_single_for_cpu(&nn->pdev->dev,
-                                               rxbuf->dma_addr + pkt_off,
-                                               pkt_len, DMA_BIDIRECTIONAL);
-                       act = nfp_net_run_xdp(xdp_prog, rxbuf->frag + data_off,
-                                             pkt_len);
+                       hard_start = rxbuf->frag + NFP_NET_RX_BUF_HEADROOM;
+                       dma_off = data_off - NFP_NET_RX_BUF_HEADROOM;
+                       dma_sync_single_for_cpu(dp->dev, rxbuf->dma_addr,
+                                               dma_off + pkt_len,
+                                               DMA_BIDIRECTIONAL);
+
+                       /* Move prepend out of the way */
+                       if (xdp_prog->xdp_adjust_head) {
+                               memcpy(meta_prepend, meta, meta_len);
+                               meta = meta_prepend;
+                       }
+
+                       act = nfp_net_run_xdp(xdp_prog, rxbuf->frag, hard_start,
+                                             &data_off, &pkt_len);
                        switch (act) {
                        case XDP_PASS:
                                break;
                        case XDP_TX:
-                               if (unlikely(!nfp_net_tx_xdp_buf(nn, rx_ring,
+                               dma_off = data_off - NFP_NET_RX_BUF_HEADROOM;
+                               if (unlikely(!nfp_net_tx_xdp_buf(dp, rx_ring,
                                                                 tx_ring, rxbuf,
-                                                                pkt_off, pkt_len)))
-                                       trace_xdp_exception(nn->netdev, xdp_prog, act);
+                                                                dma_off,
+                                                                pkt_len)))
+                                       trace_xdp_exception(dp->netdev,
+                                                           xdp_prog, act);
                                continue;
                        default:
                                bpf_warn_invalid_xdp_action(act);
                        case XDP_ABORTED:
-                               trace_xdp_exception(nn->netdev, xdp_prog, act);
+                               trace_xdp_exception(dp->netdev, xdp_prog, act);
                        case XDP_DROP:
-                               nfp_net_rx_give_one(rx_ring, rxbuf->frag,
+                               nfp_net_rx_give_one(dp, rx_ring, rxbuf->frag,
                                                    rxbuf->dma_addr);
                                continue;
                        }
@@ -1636,41 +1662,40 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
 
                skb = build_skb(rxbuf->frag, true_bufsz);
                if (unlikely(!skb)) {
-                       nfp_net_rx_drop(r_vec, rx_ring, rxbuf, NULL);
+                       nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL);
                        continue;
                }
-               new_frag = nfp_net_napi_alloc_one(nn, rx_dma_map_dir,
-                                                 &new_dma_addr);
+               new_frag = nfp_net_napi_alloc_one(dp, &new_dma_addr);
                if (unlikely(!new_frag)) {
-                       nfp_net_rx_drop(r_vec, rx_ring, rxbuf, skb);
+                       nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, skb);
                        continue;
                }
 
-               nfp_net_dma_unmap_rx(nn, rxbuf->dma_addr, nn->fl_bufsz,
-                                    rx_dma_map_dir);
+               nfp_net_dma_unmap_rx(dp, rxbuf->dma_addr);
 
-               nfp_net_rx_give_one(rx_ring, new_frag, new_dma_addr);
+               nfp_net_rx_give_one(dp, rx_ring, new_frag, new_dma_addr);
 
                skb_reserve(skb, data_off);
                skb_put(skb, pkt_len);
 
-               if (nn->fw_ver.major <= 3) {
-                       nfp_net_set_hash_desc(nn->netdev, skb, rxd);
+               if (!dp->chained_metadata_format) {
+                       nfp_net_set_hash_desc(dp->netdev, skb, meta, rxd);
                } else if (meta_len) {
                        void *end;
 
-                       end = nfp_net_parse_meta(nn->netdev, skb, meta_len);
-                       if (unlikely(end != skb->data)) {
-                               nn_warn_ratelimit(nn, "invalid RX packet metadata\n");
-                               nfp_net_rx_drop(r_vec, rx_ring, NULL, skb);
+                       end = nfp_net_parse_meta(dp->netdev, skb, meta,
+                                                meta_len);
+                       if (unlikely(end != meta + meta_len)) {
+                               nn_dp_warn(dp, "invalid RX packet metadata\n");
+                               nfp_net_rx_drop(dp, r_vec, rx_ring, NULL, skb);
                                continue;
                        }
                }
 
                skb_record_rx_queue(skb, rx_ring->idx);
-               skb->protocol = eth_type_trans(skb, nn->netdev);
+               skb->protocol = eth_type_trans(skb, dp->netdev);
 
-               nfp_net_rx_csum(nn, r_vec, rxd, skb);
+               nfp_net_rx_csum(dp, r_vec, rxd, skb);
 
                if (rxd->rxd.flags & PCIE_DESC_RX_VLAN)
                        __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
@@ -1707,10 +1732,9 @@ static int nfp_net_poll(struct napi_struct *napi, int budget)
                        nfp_net_xdp_complete(r_vec->xdp_ring);
        }
 
-       if (pkts_polled < budget) {
-               napi_complete_done(napi, pkts_polled);
-               nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry);
-       }
+       if (pkts_polled < budget)
+               if (napi_complete_done(napi, pkts_polled))
+                       nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry);
 
        return pkts_polled;
 }
@@ -1725,13 +1749,12 @@ static int nfp_net_poll(struct napi_struct *napi, int budget)
 static void nfp_net_tx_ring_free(struct nfp_net_tx_ring *tx_ring)
 {
        struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
-       struct nfp_net *nn = r_vec->nfp_net;
-       struct pci_dev *pdev = nn->pdev;
+       struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
 
        kfree(tx_ring->txbufs);
 
        if (tx_ring->txds)
-               dma_free_coherent(&pdev->dev, tx_ring->size,
+               dma_free_coherent(dp->dev, tx_ring->size,
                                  tx_ring->txds, tx_ring->dma);
 
        tx_ring->cnt = 0;
@@ -1743,24 +1766,23 @@ static void nfp_net_tx_ring_free(struct nfp_net_tx_ring *tx_ring)
 
 /**
  * nfp_net_tx_ring_alloc() - Allocate resource for a TX ring
+ * @dp:        NFP Net data path struct
  * @tx_ring:   TX Ring structure to allocate
- * @cnt:       Ring buffer count
  * @is_xdp:    True if ring will be used for XDP
  *
  * Return: 0 on success, negative errno otherwise.
  */
 static int
-nfp_net_tx_ring_alloc(struct nfp_net_tx_ring *tx_ring, u32 cnt, bool is_xdp)
+nfp_net_tx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring,
+                     bool is_xdp)
 {
        struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
-       struct nfp_net *nn = r_vec->nfp_net;
-       struct pci_dev *pdev = nn->pdev;
        int sz;
 
-       tx_ring->cnt = cnt;
+       tx_ring->cnt = dp->txd_cnt;
 
        tx_ring->size = sizeof(*tx_ring->txds) * tx_ring->cnt;
-       tx_ring->txds = dma_zalloc_coherent(&pdev->dev, tx_ring->size,
+       tx_ring->txds = dma_zalloc_coherent(dp->dev, tx_ring->size,
                                            &tx_ring->dma, GFP_KERNEL);
        if (!tx_ring->txds)
                goto err_alloc;
@@ -1771,14 +1793,9 @@ nfp_net_tx_ring_alloc(struct nfp_net_tx_ring *tx_ring, u32 cnt, bool is_xdp)
                goto err_alloc;
 
        if (!is_xdp)
-               netif_set_xps_queue(nn->netdev, &r_vec->affinity_mask,
+               netif_set_xps_queue(dp->netdev, &r_vec->affinity_mask,
                                    tx_ring->idx);
 
-       nn_dbg(nn, "TxQ%02d: QCidx=%02d cnt=%d dma=%#llx host=%p %s\n",
-              tx_ring->idx, tx_ring->qcidx,
-              tx_ring->cnt, (unsigned long long)tx_ring->dma, tx_ring->txds,
-              is_xdp ? "XDP" : "");
-
        return 0;
 
 err_alloc:
@@ -1786,62 +1803,45 @@ err_alloc:
        return -ENOMEM;
 }
 
-static struct nfp_net_tx_ring *
-nfp_net_tx_ring_set_prepare(struct nfp_net *nn, struct nfp_net_ring_set *s,
-                           unsigned int num_stack_tx_rings)
+static int nfp_net_tx_rings_prepare(struct nfp_net *nn, struct nfp_net_dp *dp)
 {
-       struct nfp_net_tx_ring *rings;
        unsigned int r;
 
-       rings = kcalloc(s->n_rings, sizeof(*rings), GFP_KERNEL);
-       if (!rings)
-               return NULL;
+       dp->tx_rings = kcalloc(dp->num_tx_rings, sizeof(*dp->tx_rings),
+                              GFP_KERNEL);
+       if (!dp->tx_rings)
+               return -ENOMEM;
 
-       for (r = 0; r < s->n_rings; r++) {
+       for (r = 0; r < dp->num_tx_rings; r++) {
                int bias = 0;
 
-               if (r >= num_stack_tx_rings)
-                       bias = num_stack_tx_rings;
+               if (r >= dp->num_stack_tx_rings)
+                       bias = dp->num_stack_tx_rings;
 
-               nfp_net_tx_ring_init(&rings[r], &nn->r_vecs[r - bias], r);
+               nfp_net_tx_ring_init(&dp->tx_rings[r], &nn->r_vecs[r - bias],
+                                    r);
 
-               if (nfp_net_tx_ring_alloc(&rings[r], s->dcnt, bias))
+               if (nfp_net_tx_ring_alloc(dp, &dp->tx_rings[r], bias))
                        goto err_free_prev;
        }
 
-       return s->rings = rings;
+       return 0;
 
 err_free_prev:
        while (r--)
-               nfp_net_tx_ring_free(&rings[r]);
-       kfree(rings);
-       return NULL;
-}
-
-static void
-nfp_net_tx_ring_set_swap(struct nfp_net *nn, struct nfp_net_ring_set *s)
-{
-       struct nfp_net_ring_set new = *s;
-
-       s->dcnt = nn->txd_cnt;
-       s->rings = nn->tx_rings;
-       s->n_rings = nn->num_tx_rings;
-
-       nn->txd_cnt = new.dcnt;
-       nn->tx_rings = new.rings;
-       nn->num_tx_rings = new.n_rings;
+               nfp_net_tx_ring_free(&dp->tx_rings[r]);
+       kfree(dp->tx_rings);
+       return -ENOMEM;
 }
 
-static void
-nfp_net_tx_ring_set_free(struct nfp_net *nn, struct nfp_net_ring_set *s)
+static void nfp_net_tx_rings_free(struct nfp_net_dp *dp)
 {
-       struct nfp_net_tx_ring *rings = s->rings;
        unsigned int r;
 
-       for (r = 0; r < s->n_rings; r++)
-               nfp_net_tx_ring_free(&rings[r]);
+       for (r = 0; r < dp->num_tx_rings; r++)
+               nfp_net_tx_ring_free(&dp->tx_rings[r]);
 
-       kfree(rings);
+       kfree(dp->tx_rings);
 }
 
 /**
@@ -1851,13 +1851,12 @@ nfp_net_tx_ring_set_free(struct nfp_net *nn, struct nfp_net_ring_set *s)
 static void nfp_net_rx_ring_free(struct nfp_net_rx_ring *rx_ring)
 {
        struct nfp_net_r_vector *r_vec = rx_ring->r_vec;
-       struct nfp_net *nn = r_vec->nfp_net;
-       struct pci_dev *pdev = nn->pdev;
+       struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
 
        kfree(rx_ring->rxbufs);
 
        if (rx_ring->rxds)
-               dma_free_coherent(&pdev->dev, rx_ring->size,
+               dma_free_coherent(dp->dev, rx_ring->size,
                                  rx_ring->rxds, rx_ring->dma);
 
        rx_ring->cnt = 0;
@@ -1869,26 +1868,19 @@ static void nfp_net_rx_ring_free(struct nfp_net_rx_ring *rx_ring)
 
 /**
  * nfp_net_rx_ring_alloc() - Allocate resource for a RX ring
+ * @dp:              NFP Net data path struct
  * @rx_ring:  RX ring to allocate
- * @fl_bufsz: Size of buffers to allocate
- * @cnt:      Ring buffer count
  *
  * Return: 0 on success, negative errno otherwise.
  */
 static int
-nfp_net_rx_ring_alloc(struct nfp_net_rx_ring *rx_ring, unsigned int fl_bufsz,
-                     u32 cnt)
+nfp_net_rx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring)
 {
-       struct nfp_net_r_vector *r_vec = rx_ring->r_vec;
-       struct nfp_net *nn = r_vec->nfp_net;
-       struct pci_dev *pdev = nn->pdev;
        int sz;
 
-       rx_ring->cnt = cnt;
-       rx_ring->bufsz = fl_bufsz;
-
+       rx_ring->cnt = dp->rxd_cnt;
        rx_ring->size = sizeof(*rx_ring->rxds) * rx_ring->cnt;
-       rx_ring->rxds = dma_zalloc_coherent(&pdev->dev, rx_ring->size,
+       rx_ring->rxds = dma_zalloc_coherent(dp->dev, rx_ring->size,
                                            &rx_ring->dma, GFP_KERNEL);
        if (!rx_ring->rxds)
                goto err_alloc;
@@ -1898,10 +1890,6 @@ nfp_net_rx_ring_alloc(struct nfp_net_rx_ring *rx_ring, unsigned int fl_bufsz,
        if (!rx_ring->rxbufs)
                goto err_alloc;
 
-       nn_dbg(nn, "RxQ%02d: FlQCidx=%02d RxQCidx=%02d cnt=%d dma=%#llx host=%p\n",
-              rx_ring->idx, rx_ring->fl_qcidx, rx_ring->rx_qcidx,
-              rx_ring->cnt, (unsigned long long)rx_ring->dma, rx_ring->rxds);
-
        return 0;
 
 err_alloc:
@@ -1909,82 +1897,59 @@ err_alloc:
        return -ENOMEM;
 }
 
-static struct nfp_net_rx_ring *
-nfp_net_rx_ring_set_prepare(struct nfp_net *nn, struct nfp_net_ring_set *s,
-                           bool xdp)
+static int nfp_net_rx_rings_prepare(struct nfp_net *nn, struct nfp_net_dp *dp)
 {
-       unsigned int fl_bufsz = nfp_net_calc_fl_bufsz(nn, s->mtu);
-       struct nfp_net_rx_ring *rings;
        unsigned int r;
 
-       rings = kcalloc(s->n_rings, sizeof(*rings), GFP_KERNEL);
-       if (!rings)
-               return NULL;
+       dp->rx_rings = kcalloc(dp->num_rx_rings, sizeof(*dp->rx_rings),
+                              GFP_KERNEL);
+       if (!dp->rx_rings)
+               return -ENOMEM;
 
-       for (r = 0; r < s->n_rings; r++) {
-               nfp_net_rx_ring_init(&rings[r], &nn->r_vecs[r], r);
+       for (r = 0; r < dp->num_rx_rings; r++) {
+               nfp_net_rx_ring_init(&dp->rx_rings[r], &nn->r_vecs[r], r);
 
-               if (nfp_net_rx_ring_alloc(&rings[r], fl_bufsz, s->dcnt))
+               if (nfp_net_rx_ring_alloc(dp, &dp->rx_rings[r]))
                        goto err_free_prev;
 
-               if (nfp_net_rx_ring_bufs_alloc(nn, &rings[r], xdp))
+               if (nfp_net_rx_ring_bufs_alloc(dp, &dp->rx_rings[r]))
                        goto err_free_ring;
        }
 
-       return s->rings = rings;
+       return 0;
 
 err_free_prev:
        while (r--) {
-               nfp_net_rx_ring_bufs_free(nn, &rings[r], xdp);
+               nfp_net_rx_ring_bufs_free(dp, &dp->rx_rings[r]);
 err_free_ring:
-               nfp_net_rx_ring_free(&rings[r]);
+               nfp_net_rx_ring_free(&dp->rx_rings[r]);
        }
-       kfree(rings);
-       return NULL;
-}
-
-static void
-nfp_net_rx_ring_set_swap(struct nfp_net *nn, struct nfp_net_ring_set *s)
-{
-       struct nfp_net_ring_set new = *s;
-
-       s->mtu = nn->netdev->mtu;
-       s->dcnt = nn->rxd_cnt;
-       s->rings = nn->rx_rings;
-       s->n_rings = nn->num_rx_rings;
-
-       nn->netdev->mtu = new.mtu;
-       nn->fl_bufsz = nfp_net_calc_fl_bufsz(nn, new.mtu);
-       nn->rxd_cnt = new.dcnt;
-       nn->rx_rings = new.rings;
-       nn->num_rx_rings = new.n_rings;
+       kfree(dp->rx_rings);
+       return -ENOMEM;
 }
 
-static void
-nfp_net_rx_ring_set_free(struct nfp_net *nn, struct nfp_net_ring_set *s,
-                        bool xdp)
+static void nfp_net_rx_rings_free(struct nfp_net_dp *dp)
 {
-       struct nfp_net_rx_ring *rings = s->rings;
        unsigned int r;
 
-       for (r = 0; r < s->n_rings; r++) {
-               nfp_net_rx_ring_bufs_free(nn, &rings[r], xdp);
-               nfp_net_rx_ring_free(&rings[r]);
+       for (r = 0; r < dp->num_rx_rings; r++) {
+               nfp_net_rx_ring_bufs_free(dp, &dp->rx_rings[r]);
+               nfp_net_rx_ring_free(&dp->rx_rings[r]);
        }
 
-       kfree(rings);
+       kfree(dp->rx_rings);
 }
 
 static void
-nfp_net_vector_assign_rings(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
-                           int idx)
+nfp_net_vector_assign_rings(struct nfp_net_dp *dp,
+                           struct nfp_net_r_vector *r_vec, int idx)
 {
-       r_vec->rx_ring = idx < nn->num_rx_rings ? &nn->rx_rings[idx] : NULL;
+       r_vec->rx_ring = idx < dp->num_rx_rings ? &dp->rx_rings[idx] : NULL;
        r_vec->tx_ring =
-               idx < nn->num_stack_tx_rings ? &nn->tx_rings[idx] : NULL;
+               idx < dp->num_stack_tx_rings ? &dp->tx_rings[idx] : NULL;
 
-       r_vec->xdp_ring = idx < nn->num_tx_rings - nn->num_stack_tx_rings ?
-               &nn->tx_rings[nn->num_stack_tx_rings + idx] : NULL;
+       r_vec->xdp_ring = idx < dp->num_tx_rings - dp->num_stack_tx_rings ?
+               &dp->tx_rings[dp->num_stack_tx_rings + idx] : NULL;
 }
 
 static int
@@ -1994,11 +1959,11 @@ nfp_net_prepare_vector(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
        int err;
 
        /* Setup NAPI */
-       netif_napi_add(nn->netdev, &r_vec->napi,
+       netif_napi_add(nn->dp.netdev, &r_vec->napi,
                       nfp_net_poll, NAPI_POLL_WEIGHT);
 
        snprintf(r_vec->name, sizeof(r_vec->name),
-                "%s-rxtx-%d", nn->netdev->name, idx);
+                "%s-rxtx-%d", nn->dp.netdev->name, idx);
        err = request_irq(r_vec->irq_vector, r_vec->handler, 0, r_vec->name,
                          r_vec);
        if (err) {
@@ -2045,7 +2010,7 @@ void nfp_net_rss_write_key(struct nfp_net *nn)
 {
        int i;
 
-       for (i = 0; i < NFP_NET_CFG_RSS_KEY_SZ; i += 4)
+       for (i = 0; i < nfp_net_rss_key_sz(nn); i += 4)
                nn_writel(nn, NFP_NET_CFG_RSS_KEY + i,
                          get_unaligned_le32(nn->rss_key + i));
 }
@@ -2069,13 +2034,13 @@ void nfp_net_coalesce_write_cfg(struct nfp_net *nn)
        /* copy RX interrupt coalesce parameters */
        value = (nn->rx_coalesce_max_frames << 16) |
                (factor * nn->rx_coalesce_usecs);
-       for (i = 0; i < nn->num_rx_rings; i++)
+       for (i = 0; i < nn->dp.num_rx_rings; i++)
                nn_writel(nn, NFP_NET_CFG_RXR_IRQ_MOD(i), value);
 
        /* copy TX interrupt coalesce parameters */
        value = (nn->tx_coalesce_max_frames << 16) |
                (factor * nn->tx_coalesce_usecs);
-       for (i = 0; i < nn->num_tx_rings; i++)
+       for (i = 0; i < nn->dp.num_tx_rings; i++)
                nn_writel(nn, NFP_NET_CFG_TXR_IRQ_MOD(i), value);
 }
 
@@ -2090,9 +2055,9 @@ void nfp_net_coalesce_write_cfg(struct nfp_net *nn)
 static void nfp_net_write_mac_addr(struct nfp_net *nn)
 {
        nn_writel(nn, NFP_NET_CFG_MACADDR + 0,
-                 get_unaligned_be32(nn->netdev->dev_addr));
+                 get_unaligned_be32(nn->dp.netdev->dev_addr));
        nn_writew(nn, NFP_NET_CFG_MACADDR + 6,
-                 get_unaligned_be16(nn->netdev->dev_addr + 4));
+                 get_unaligned_be16(nn->dp.netdev->dev_addr + 4));
 }
 
 static void nfp_net_vec_clear_ring_data(struct nfp_net *nn, unsigned int idx)
@@ -2116,7 +2081,7 @@ static void nfp_net_clear_config_and_disable(struct nfp_net *nn)
        unsigned int r;
        int err;
 
-       new_ctrl = nn->ctrl;
+       new_ctrl = nn->dp.ctrl;
        new_ctrl &= ~NFP_NET_CFG_CTRL_ENABLE;
        update = NFP_NET_CFG_UPDATE_GEN;
        update |= NFP_NET_CFG_UPDATE_MSIX;
@@ -2133,14 +2098,14 @@ static void nfp_net_clear_config_and_disable(struct nfp_net *nn)
        if (err)
                nn_err(nn, "Could not disable device: %d\n", err);
 
-       for (r = 0; r < nn->num_rx_rings; r++)
-               nfp_net_rx_ring_reset(&nn->rx_rings[r]);
-       for (r = 0; r < nn->num_tx_rings; r++)
-               nfp_net_tx_ring_reset(nn, &nn->tx_rings[r]);
-       for (r = 0; r < nn->num_r_vecs; r++)
+       for (r = 0; r < nn->dp.num_rx_rings; r++)
+               nfp_net_rx_ring_reset(&nn->dp.rx_rings[r]);
+       for (r = 0; r < nn->dp.num_tx_rings; r++)
+               nfp_net_tx_ring_reset(&nn->dp, &nn->dp.tx_rings[r]);
+       for (r = 0; r < nn->dp.num_r_vecs; r++)
                nfp_net_vec_clear_ring_data(nn, r);
 
-       nn->ctrl = new_ctrl;
+       nn->dp.ctrl = new_ctrl;
 }
 
 static void
@@ -2162,13 +2127,17 @@ nfp_net_tx_ring_hw_cfg_write(struct nfp_net *nn,
        nn_writeb(nn, NFP_NET_CFG_TXR_VEC(idx), tx_ring->r_vec->irq_entry);
 }
 
-static int __nfp_net_set_config_and_enable(struct nfp_net *nn)
+/**
+ * nfp_net_set_config_and_enable() - Write control BAR and enable NFP
+ * @nn:      NFP Net device to reconfigure
+ */
+static int nfp_net_set_config_and_enable(struct nfp_net *nn)
 {
        u32 new_ctrl, update = 0;
        unsigned int r;
        int err;
 
-       new_ctrl = nn->ctrl;
+       new_ctrl = nn->dp.ctrl;
 
        if (nn->cap & NFP_NET_CFG_CTRL_RSS) {
                nfp_net_rss_write_key(nn);
@@ -2184,22 +2153,22 @@ static int __nfp_net_set_config_and_enable(struct nfp_net *nn)
                update |= NFP_NET_CFG_UPDATE_IRQMOD;
        }
 
-       for (r = 0; r < nn->num_tx_rings; r++)
-               nfp_net_tx_ring_hw_cfg_write(nn, &nn->tx_rings[r], r);
-       for (r = 0; r < nn->num_rx_rings; r++)
-               nfp_net_rx_ring_hw_cfg_write(nn, &nn->rx_rings[r], r);
+       for (r = 0; r < nn->dp.num_tx_rings; r++)
+               nfp_net_tx_ring_hw_cfg_write(nn, &nn->dp.tx_rings[r], r);
+       for (r = 0; r < nn->dp.num_rx_rings; r++)
+               nfp_net_rx_ring_hw_cfg_write(nn, &nn->dp.rx_rings[r], r);
 
-       nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, nn->num_tx_rings == 64 ?
-                 0xffffffffffffffffULL : ((u64)1 << nn->num_tx_rings) - 1);
+       nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, nn->dp.num_tx_rings == 64 ?
+                 0xffffffffffffffffULL : ((u64)1 << nn->dp.num_tx_rings) - 1);
 
-       nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE, nn->num_rx_rings == 64 ?
-                 0xffffffffffffffffULL : ((u64)1 << nn->num_rx_rings) - 1);
+       nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE, nn->dp.num_rx_rings == 64 ?
+                 0xffffffffffffffffULL : ((u64)1 << nn->dp.num_rx_rings) - 1);
 
        nfp_net_write_mac_addr(nn);
 
-       nn_writel(nn, NFP_NET_CFG_MTU, nn->netdev->mtu);
+       nn_writel(nn, NFP_NET_CFG_MTU, nn->dp.netdev->mtu);
        nn_writel(nn, NFP_NET_CFG_FLBUFSZ,
-                 nn->fl_bufsz - NFP_NET_RX_BUF_NON_DATA);
+                 nn->dp.fl_bufsz - NFP_NET_RX_BUF_NON_DATA);
 
        /* Enable device */
        new_ctrl |= NFP_NET_CFG_CTRL_ENABLE;
@@ -2211,37 +2180,26 @@ static int __nfp_net_set_config_and_enable(struct nfp_net *nn)
 
        nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl);
        err = nfp_net_reconfig(nn, update);
+       if (err) {
+               nfp_net_clear_config_and_disable(nn);
+               return err;
+       }
 
-       nn->ctrl = new_ctrl;
+       nn->dp.ctrl = new_ctrl;
 
-       for (r = 0; r < nn->num_rx_rings; r++)
-               nfp_net_rx_ring_fill_freelist(&nn->rx_rings[r]);
+       for (r = 0; r < nn->dp.num_rx_rings; r++)
+               nfp_net_rx_ring_fill_freelist(&nn->dp, &nn->dp.rx_rings[r]);
 
        /* Since reconfiguration requests while NFP is down are ignored we
         * have to wipe the entire VXLAN configuration and reinitialize it.
         */
-       if (nn->ctrl & NFP_NET_CFG_CTRL_VXLAN) {
+       if (nn->dp.ctrl & NFP_NET_CFG_CTRL_VXLAN) {
                memset(&nn->vxlan_ports, 0, sizeof(nn->vxlan_ports));
                memset(&nn->vxlan_usecnt, 0, sizeof(nn->vxlan_usecnt));
-               udp_tunnel_get_rx_info(nn->netdev);
+               udp_tunnel_get_rx_info(nn->dp.netdev);
        }
 
-       return err;
-}
-
-/**
- * nfp_net_set_config_and_enable() - Write control BAR and enable NFP
- * @nn:      NFP Net device to reconfigure
- */
-static int nfp_net_set_config_and_enable(struct nfp_net *nn)
-{
-       int err;
-
-       err = __nfp_net_set_config_and_enable(nn);
-       if (err)
-               nfp_net_clear_config_and_disable(nn);
-
-       return err;
+       return 0;
 }
 
 /**
@@ -2252,12 +2210,12 @@ static void nfp_net_open_stack(struct nfp_net *nn)
 {
        unsigned int r;
 
-       for (r = 0; r < nn->num_r_vecs; r++) {
+       for (r = 0; r < nn->dp.num_r_vecs; r++) {
                napi_enable(&nn->r_vecs[r].napi);
                enable_irq(nn->r_vecs[r].irq_vector);
        }
 
-       netif_tx_wake_all_queues(nn->netdev);
+       netif_tx_wake_all_queues(nn->dp.netdev);
 
        enable_irq(nn->irq_entries[NFP_NET_IRQ_LSC_IDX].vector);
        nfp_net_read_link_status(nn);
@@ -2266,22 +2224,8 @@ static void nfp_net_open_stack(struct nfp_net *nn)
 static int nfp_net_netdev_open(struct net_device *netdev)
 {
        struct nfp_net *nn = netdev_priv(netdev);
-       struct nfp_net_ring_set rx = {
-               .n_rings = nn->num_rx_rings,
-               .mtu = nn->netdev->mtu,
-               .dcnt = nn->rxd_cnt,
-       };
-       struct nfp_net_ring_set tx = {
-               .n_rings = nn->num_tx_rings,
-               .dcnt = nn->txd_cnt,
-       };
        int err, r;
 
-       if (nn->ctrl & NFP_NET_CFG_CTRL_ENABLE) {
-               nn_err(nn, "Dev is already enabled: 0x%08x\n", nn->ctrl);
-               return -EBUSY;
-       }
-
        /* Step 1: Allocate resources for rings and the like
         * - Request interrupts
         * - Allocate RX and TX ring resources
@@ -2299,33 +2243,28 @@ static int nfp_net_netdev_open(struct net_device *netdev)
                goto err_free_exn;
        disable_irq(nn->irq_entries[NFP_NET_IRQ_LSC_IDX].vector);
 
-       for (r = 0; r < nn->num_r_vecs; r++) {
+       for (r = 0; r < nn->dp.num_r_vecs; r++) {
                err = nfp_net_prepare_vector(nn, &nn->r_vecs[r], r);
                if (err)
                        goto err_cleanup_vec_p;
        }
 
-       nn->rx_rings = nfp_net_rx_ring_set_prepare(nn, &rx, nn->xdp_prog);
-       if (!nn->rx_rings) {
-               err = -ENOMEM;
+       err = nfp_net_rx_rings_prepare(nn, &nn->dp);
+       if (err)
                goto err_cleanup_vec;
-       }
 
-       nn->tx_rings = nfp_net_tx_ring_set_prepare(nn, &tx,
-                                                  nn->num_stack_tx_rings);
-       if (!nn->tx_rings) {
-               err = -ENOMEM;
+       err = nfp_net_tx_rings_prepare(nn, &nn->dp);
+       if (err)
                goto err_free_rx_rings;
-       }
 
        for (r = 0; r < nn->max_r_vecs; r++)
-               nfp_net_vector_assign_rings(nn, &nn->r_vecs[r], r);
+               nfp_net_vector_assign_rings(&nn->dp, &nn->r_vecs[r], r);
 
-       err = netif_set_real_num_tx_queues(netdev, nn->num_stack_tx_rings);
+       err = netif_set_real_num_tx_queues(netdev, nn->dp.num_stack_tx_rings);
        if (err)
                goto err_free_rings;
 
-       err = netif_set_real_num_rx_queues(netdev, nn->num_rx_rings);
+       err = netif_set_real_num_rx_queues(netdev, nn->dp.num_rx_rings);
        if (err)
                goto err_free_rings;
 
@@ -2351,11 +2290,11 @@ static int nfp_net_netdev_open(struct net_device *netdev)
        return 0;
 
 err_free_rings:
-       nfp_net_tx_ring_set_free(nn, &tx);
+       nfp_net_tx_rings_free(&nn->dp);
 err_free_rx_rings:
-       nfp_net_rx_ring_set_free(nn, &rx, nn->xdp_prog);
+       nfp_net_rx_rings_free(&nn->dp);
 err_cleanup_vec:
-       r = nn->num_r_vecs;
+       r = nn->dp.num_r_vecs;
 err_cleanup_vec_p:
        while (r--)
                nfp_net_cleanup_vector(nn, &nn->r_vecs[r]);
@@ -2374,15 +2313,15 @@ static void nfp_net_close_stack(struct nfp_net *nn)
        unsigned int r;
 
        disable_irq(nn->irq_entries[NFP_NET_IRQ_LSC_IDX].vector);
-       netif_carrier_off(nn->netdev);
+       netif_carrier_off(nn->dp.netdev);
        nn->link_up = false;
 
-       for (r = 0; r < nn->num_r_vecs; r++) {
+       for (r = 0; r < nn->dp.num_r_vecs; r++) {
                disable_irq(nn->r_vecs[r].irq_vector);
                napi_disable(&nn->r_vecs[r].napi);
        }
 
-       netif_tx_disable(nn->netdev);
+       netif_tx_disable(nn->dp.netdev);
 }
 
 /**
@@ -2393,17 +2332,17 @@ static void nfp_net_close_free_all(struct nfp_net *nn)
 {
        unsigned int r;
 
-       for (r = 0; r < nn->num_rx_rings; r++) {
-               nfp_net_rx_ring_bufs_free(nn, &nn->rx_rings[r], nn->xdp_prog);
-               nfp_net_rx_ring_free(&nn->rx_rings[r]);
+       for (r = 0; r < nn->dp.num_rx_rings; r++) {
+               nfp_net_rx_ring_bufs_free(&nn->dp, &nn->dp.rx_rings[r]);
+               nfp_net_rx_ring_free(&nn->dp.rx_rings[r]);
        }
-       for (r = 0; r < nn->num_tx_rings; r++)
-               nfp_net_tx_ring_free(&nn->tx_rings[r]);
-       for (r = 0; r < nn->num_r_vecs; r++)
+       for (r = 0; r < nn->dp.num_tx_rings; r++)
+               nfp_net_tx_ring_free(&nn->dp.tx_rings[r]);
+       for (r = 0; r < nn->dp.num_r_vecs; r++)
                nfp_net_cleanup_vector(nn, &nn->r_vecs[r]);
 
-       kfree(nn->rx_rings);
-       kfree(nn->tx_rings);
+       kfree(nn->dp.rx_rings);
+       kfree(nn->dp.tx_rings);
 
        nfp_net_aux_irq_free(nn, NFP_NET_CFG_LSC, NFP_NET_IRQ_LSC_IDX);
        nfp_net_aux_irq_free(nn, NFP_NET_CFG_EXN, NFP_NET_IRQ_EXN_IDX);
@@ -2417,11 +2356,6 @@ static int nfp_net_netdev_close(struct net_device *netdev)
 {
        struct nfp_net *nn = netdev_priv(netdev);
 
-       if (!(nn->ctrl & NFP_NET_CFG_CTRL_ENABLE)) {
-               nn_err(nn, "Dev is not up: 0x%08x\n", nn->ctrl);
-               return 0;
-       }
-
        /* Step 1: Disable RX and TX rings from the Linux kernel perspective
         */
        nfp_net_close_stack(nn);
@@ -2443,7 +2377,7 @@ static void nfp_net_set_rx_mode(struct net_device *netdev)
        struct nfp_net *nn = netdev_priv(netdev);
        u32 new_ctrl;
 
-       new_ctrl = nn->ctrl;
+       new_ctrl = nn->dp.ctrl;
 
        if (netdev->flags & IFF_PROMISC) {
                if (nn->cap & NFP_NET_CFG_CTRL_PROMISC)
@@ -2454,13 +2388,13 @@ static void nfp_net_set_rx_mode(struct net_device *netdev)
                new_ctrl &= ~NFP_NET_CFG_CTRL_PROMISC;
        }
 
-       if (new_ctrl == nn->ctrl)
+       if (new_ctrl == nn->dp.ctrl)
                return;
 
        nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl);
        nfp_net_reconfig_post(nn, NFP_NET_CFG_UPDATE_GEN);
 
-       nn->ctrl = new_ctrl;
+       nn->dp.ctrl = new_ctrl;
 }
 
 static void nfp_net_rss_init_itbl(struct nfp_net *nn)
@@ -2469,61 +2403,76 @@ static void nfp_net_rss_init_itbl(struct nfp_net *nn)
 
        for (i = 0; i < sizeof(nn->rss_itbl); i++)
                nn->rss_itbl[i] =
-                       ethtool_rxfh_indir_default(i, nn->num_rx_rings);
+                       ethtool_rxfh_indir_default(i, nn->dp.num_rx_rings);
 }
 
-static int
-nfp_net_ring_swap_enable(struct nfp_net *nn, unsigned int *num_vecs,
-                        unsigned int *stack_tx_rings,
-                        struct bpf_prog **xdp_prog,
-                        struct nfp_net_ring_set *rx,
-                        struct nfp_net_ring_set *tx)
+static void nfp_net_dp_swap(struct nfp_net *nn, struct nfp_net_dp *dp)
+{
+       struct nfp_net_dp new_dp = *dp;
+
+       *dp = nn->dp;
+       nn->dp = new_dp;
+
+       nn->dp.netdev->mtu = new_dp.mtu;
+
+       if (!netif_is_rxfh_configured(nn->dp.netdev))
+               nfp_net_rss_init_itbl(nn);
+}
+
+static int nfp_net_dp_swap_enable(struct nfp_net *nn, struct nfp_net_dp *dp)
 {
        unsigned int r;
        int err;
 
-       if (rx)
-               nfp_net_rx_ring_set_swap(nn, rx);
-       if (tx)
-               nfp_net_tx_ring_set_swap(nn, tx);
-
-       swap(*num_vecs, nn->num_r_vecs);
-       swap(*stack_tx_rings, nn->num_stack_tx_rings);
-       *xdp_prog = xchg(&nn->xdp_prog, *xdp_prog);
+       nfp_net_dp_swap(nn, dp);
 
        for (r = 0; r < nn->max_r_vecs; r++)
-               nfp_net_vector_assign_rings(nn, &nn->r_vecs[r], r);
+               nfp_net_vector_assign_rings(&nn->dp, &nn->r_vecs[r], r);
 
-       if (!netif_is_rxfh_configured(nn->netdev))
-               nfp_net_rss_init_itbl(nn);
-
-       err = netif_set_real_num_rx_queues(nn->netdev,
-                                          nn->num_rx_rings);
+       err = netif_set_real_num_rx_queues(nn->dp.netdev, nn->dp.num_rx_rings);
        if (err)
                return err;
 
-       if (nn->netdev->real_num_tx_queues != nn->num_stack_tx_rings) {
-               err = netif_set_real_num_tx_queues(nn->netdev,
-                                                  nn->num_stack_tx_rings);
+       if (nn->dp.netdev->real_num_tx_queues != nn->dp.num_stack_tx_rings) {
+               err = netif_set_real_num_tx_queues(nn->dp.netdev,
+                                                  nn->dp.num_stack_tx_rings);
                if (err)
                        return err;
        }
 
-       return __nfp_net_set_config_and_enable(nn);
+       return nfp_net_set_config_and_enable(nn);
 }
 
-static int
-nfp_net_check_config(struct nfp_net *nn, struct bpf_prog *xdp_prog,
-                    struct nfp_net_ring_set *rx, struct nfp_net_ring_set *tx)
+struct nfp_net_dp *nfp_net_clone_dp(struct nfp_net *nn)
+{
+       struct nfp_net_dp *new;
+
+       new = kmalloc(sizeof(*new), GFP_KERNEL);
+       if (!new)
+               return NULL;
+
+       *new = nn->dp;
+
+       /* Clear things which need to be recomputed */
+       new->fl_bufsz = 0;
+       new->tx_rings = NULL;
+       new->rx_rings = NULL;
+       new->num_r_vecs = 0;
+       new->num_stack_tx_rings = 0;
+
+       return new;
+}
+
+static int nfp_net_check_config(struct nfp_net *nn, struct nfp_net_dp *dp)
 {
        /* XDP-enabled tests */
-       if (!xdp_prog)
+       if (!dp->xdp_prog)
                return 0;
-       if (rx && nfp_net_calc_fl_bufsz(nn, rx->mtu) > PAGE_SIZE) {
+       if (dp->fl_bufsz > PAGE_SIZE) {
                nn_warn(nn, "MTU too large w/ XDP enabled\n");
                return -EINVAL;
        }
-       if (tx && tx->n_rings > nn->max_tx_rings) {
+       if (dp->num_tx_rings > nn->max_tx_rings) {
                nn_warn(nn, "Insufficient number of TX rings w/ XDP enabled\n");
                return -EINVAL;
        }
@@ -2531,119 +2480,94 @@ nfp_net_check_config(struct nfp_net *nn, struct bpf_prog *xdp_prog,
        return 0;
 }
 
-static void
-nfp_net_ring_reconfig_down(struct nfp_net *nn, struct bpf_prog **xdp_prog,
-                          struct nfp_net_ring_set *rx,
-                          struct nfp_net_ring_set *tx,
-                          unsigned int stack_tx_rings, unsigned int num_vecs)
-{
-       nn->netdev->mtu = rx ? rx->mtu : nn->netdev->mtu;
-       nn->fl_bufsz = nfp_net_calc_fl_bufsz(nn, nn->netdev->mtu);
-       nn->rxd_cnt = rx ? rx->dcnt : nn->rxd_cnt;
-       nn->txd_cnt = tx ? tx->dcnt : nn->txd_cnt;
-       nn->num_rx_rings = rx ? rx->n_rings : nn->num_rx_rings;
-       nn->num_tx_rings = tx ? tx->n_rings : nn->num_tx_rings;
-       nn->num_stack_tx_rings = stack_tx_rings;
-       nn->num_r_vecs = num_vecs;
-       *xdp_prog = xchg(&nn->xdp_prog, *xdp_prog);
-
-       if (!netif_is_rxfh_configured(nn->netdev))
-               nfp_net_rss_init_itbl(nn);
-}
-
-int
-nfp_net_ring_reconfig(struct nfp_net *nn, struct bpf_prog **xdp_prog,
-                     struct nfp_net_ring_set *rx, struct nfp_net_ring_set *tx)
+int nfp_net_ring_reconfig(struct nfp_net *nn, struct nfp_net_dp *dp)
 {
-       unsigned int stack_tx_rings, num_vecs, r;
-       int err;
+       int r, err;
+
+       dp->fl_bufsz = nfp_net_calc_fl_bufsz(dp);
 
-       stack_tx_rings = tx ? tx->n_rings : nn->num_tx_rings;
-       if (*xdp_prog)
-               stack_tx_rings -= rx ? rx->n_rings : nn->num_rx_rings;
+       dp->num_stack_tx_rings = dp->num_tx_rings;
+       if (dp->xdp_prog)
+               dp->num_stack_tx_rings -= dp->num_rx_rings;
 
-       num_vecs = max(rx ? rx->n_rings : nn->num_rx_rings, stack_tx_rings);
+       dp->num_r_vecs = max(dp->num_rx_rings, dp->num_stack_tx_rings);
 
-       err = nfp_net_check_config(nn, *xdp_prog, rx, tx);
+       err = nfp_net_check_config(nn, dp);
        if (err)
-               return err;
+               goto exit_free_dp;
 
-       if (!netif_running(nn->netdev)) {
-               nfp_net_ring_reconfig_down(nn, xdp_prog, rx, tx,
-                                          stack_tx_rings, num_vecs);
-               return 0;
+       if (!netif_running(dp->netdev)) {
+               nfp_net_dp_swap(nn, dp);
+               err = 0;
+               goto exit_free_dp;
        }
 
        /* Prepare new rings */
-       for (r = nn->num_r_vecs; r < num_vecs; r++) {
+       for (r = nn->dp.num_r_vecs; r < dp->num_r_vecs; r++) {
                err = nfp_net_prepare_vector(nn, &nn->r_vecs[r], r);
                if (err) {
-                       num_vecs = r;
-                       goto err_cleanup_vecs;
-               }
-       }
-       if (rx) {
-               if (!nfp_net_rx_ring_set_prepare(nn, rx, *xdp_prog)) {
-                       err = -ENOMEM;
+                       dp->num_r_vecs = r;
                        goto err_cleanup_vecs;
                }
        }
-       if (tx) {
-               if (!nfp_net_tx_ring_set_prepare(nn, tx, stack_tx_rings)) {
-                       err = -ENOMEM;
-                       goto err_free_rx;
-               }
-       }
+
+       err = nfp_net_rx_rings_prepare(nn, dp);
+       if (err)
+               goto err_cleanup_vecs;
+
+       err = nfp_net_tx_rings_prepare(nn, dp);
+       if (err)
+               goto err_free_rx;
 
        /* Stop device, swap in new rings, try to start the firmware */
        nfp_net_close_stack(nn);
        nfp_net_clear_config_and_disable(nn);
 
-       err = nfp_net_ring_swap_enable(nn, &num_vecs, &stack_tx_rings,
-                                      xdp_prog, rx, tx);
+       err = nfp_net_dp_swap_enable(nn, dp);
        if (err) {
                int err2;
 
                nfp_net_clear_config_and_disable(nn);
 
                /* Try with old configuration and old rings */
-               err2 = nfp_net_ring_swap_enable(nn, &num_vecs, &stack_tx_rings,
-                                               xdp_prog, rx, tx);
+               err2 = nfp_net_dp_swap_enable(nn, dp);
                if (err2)
                        nn_err(nn, "Can't restore ring config - FW communication failed (%d,%d)\n",
                               err, err2);
        }
-       for (r = num_vecs - 1; r >= nn->num_r_vecs; r--)
+       for (r = dp->num_r_vecs - 1; r >= nn->dp.num_r_vecs; r--)
                nfp_net_cleanup_vector(nn, &nn->r_vecs[r]);
 
-       if (rx)
-               nfp_net_rx_ring_set_free(nn, rx, *xdp_prog);
-       if (tx)
-               nfp_net_tx_ring_set_free(nn, tx);
+       nfp_net_rx_rings_free(dp);
+       nfp_net_tx_rings_free(dp);
 
        nfp_net_open_stack(nn);
+exit_free_dp:
+       kfree(dp);
 
        return err;
 
 err_free_rx:
-       if (rx)
-               nfp_net_rx_ring_set_free(nn, rx, *xdp_prog);
+       nfp_net_rx_rings_free(dp);
 err_cleanup_vecs:
-       for (r = num_vecs - 1; r >= nn->num_r_vecs; r--)
+       for (r = dp->num_r_vecs - 1; r >= nn->dp.num_r_vecs; r--)
                nfp_net_cleanup_vector(nn, &nn->r_vecs[r]);
+       kfree(dp);
        return err;
 }
 
 static int nfp_net_change_mtu(struct net_device *netdev, int new_mtu)
 {
        struct nfp_net *nn = netdev_priv(netdev);
-       struct nfp_net_ring_set rx = {
-               .n_rings = nn->num_rx_rings,
-               .mtu = new_mtu,
-               .dcnt = nn->rxd_cnt,
-       };
+       struct nfp_net_dp *dp;
+
+       dp = nfp_net_clone_dp(nn);
+       if (!dp)
+               return -ENOMEM;
+
+       dp->mtu = new_mtu;
 
-       return nfp_net_ring_reconfig(nn, &nn->xdp_prog, &rx, NULL);
+       return nfp_net_ring_reconfig(nn, dp);
 }
 
 static void nfp_net_stat64(struct net_device *netdev,
@@ -2652,7 +2576,7 @@ static void nfp_net_stat64(struct net_device *netdev,
        struct nfp_net *nn = netdev_priv(netdev);
        int r;
 
-       for (r = 0; r < nn->num_r_vecs; r++) {
+       for (r = 0; r < nn->dp.num_r_vecs; r++) {
                struct nfp_net_r_vector *r_vec = &nn->r_vecs[r];
                u64 data[3];
                unsigned int start;
@@ -2699,7 +2623,7 @@ nfp_net_setup_tc(struct net_device *netdev, u32 handle, __be16 proto,
                return -ENOTSUPP;
 
        if (tc->type == TC_SETUP_CLSBPF && nfp_net_ebpf_capable(nn)) {
-               if (!nn->bpf_offload_xdp)
+               if (!nn->dp.bpf_offload_xdp)
                        return nfp_net_bpf_offload(nn, tc->cls_bpf);
                else
                        return -EBUSY;
@@ -2718,7 +2642,7 @@ static int nfp_net_set_features(struct net_device *netdev,
 
        /* Assume this is not called with features we have not advertised */
 
-       new_ctrl = nn->ctrl;
+       new_ctrl = nn->dp.ctrl;
 
        if (changed & NETIF_F_RXCSUM) {
                if (features & NETIF_F_RXCSUM)
@@ -2762,7 +2686,7 @@ static int nfp_net_set_features(struct net_device *netdev,
                        new_ctrl &= ~NFP_NET_CFG_CTRL_GATHER;
        }
 
-       if (changed & NETIF_F_HW_TC && nn->ctrl & NFP_NET_CFG_CTRL_BPF) {
+       if (changed & NETIF_F_HW_TC && nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF) {
                nn_err(nn, "Cannot disable HW TC offload while in use\n");
                return -EBUSY;
        }
@@ -2770,16 +2694,16 @@ static int nfp_net_set_features(struct net_device *netdev,
        nn_dbg(nn, "Feature change 0x%llx -> 0x%llx (changed=0x%llx)\n",
               netdev->features, features, changed);
 
-       if (new_ctrl == nn->ctrl)
+       if (new_ctrl == nn->dp.ctrl)
                return 0;
 
-       nn_dbg(nn, "NIC ctrl: 0x%x -> 0x%x\n", nn->ctrl, new_ctrl);
+       nn_dbg(nn, "NIC ctrl: 0x%x -> 0x%x\n", nn->dp.ctrl, new_ctrl);
        nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl);
        err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN);
        if (err)
                return err;
 
-       nn->ctrl = new_ctrl;
+       nn->dp.ctrl = new_ctrl;
 
        return 0;
 }
@@ -2830,6 +2754,26 @@ nfp_net_features_check(struct sk_buff *skb, struct net_device *dev,
        return features;
 }
 
+static int
+nfp_net_get_phys_port_name(struct net_device *netdev, char *name, size_t len)
+{
+       struct nfp_net *nn = netdev_priv(netdev);
+       int err;
+
+       if (!nn->eth_port)
+               return -EOPNOTSUPP;
+
+       if (!nn->eth_port->is_split)
+               err = snprintf(name, len, "p%d", nn->eth_port->label_port);
+       else
+               err = snprintf(name, len, "p%ds%d", nn->eth_port->label_port,
+                              nn->eth_port->label_subport);
+       if (err >= len)
+               return -EINVAL;
+
+       return 0;
+}
+
 /**
  * nfp_net_set_vxlan_port() - set vxlan port in SW and reconfigure HW
  * @nn:   NFP Net device to reconfigure
@@ -2842,7 +2786,7 @@ static void nfp_net_set_vxlan_port(struct nfp_net *nn, int idx, __be16 port)
 
        nn->vxlan_ports[idx] = port;
 
-       if (!(nn->ctrl & NFP_NET_CFG_CTRL_VXLAN))
+       if (!(nn->dp.ctrl & NFP_NET_CFG_CTRL_VXLAN))
                return;
 
        BUILD_BUG_ON(NFP_NET_N_VXLAN_PORTS & 1);
@@ -2921,8 +2865,8 @@ static int nfp_net_xdp_offload(struct nfp_net *nn, struct bpf_prog *prog)
        if (!nfp_net_ebpf_capable(nn))
                return -EINVAL;
 
-       if (nn->ctrl & NFP_NET_CFG_CTRL_BPF) {
-               if (!nn->bpf_offload_xdp)
+       if (nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF) {
+               if (!nn->dp.bpf_offload_xdp)
                        return prog ? -EBUSY : 0;
                cmd.command = prog ? TC_CLSBPF_REPLACE : TC_CLSBPF_DESTROY;
        } else {
@@ -2935,48 +2879,47 @@ static int nfp_net_xdp_offload(struct nfp_net *nn, struct bpf_prog *prog)
        /* Stop offload if replace not possible */
        if (ret && cmd.command == TC_CLSBPF_REPLACE)
                nfp_net_xdp_offload(nn, NULL);
-       nn->bpf_offload_xdp = prog && !ret;
+       nn->dp.bpf_offload_xdp = prog && !ret;
        return ret;
 }
 
 static int nfp_net_xdp_setup(struct nfp_net *nn, struct bpf_prog *prog)
 {
-       struct nfp_net_ring_set rx = {
-               .n_rings = nn->num_rx_rings,
-               .mtu = nn->netdev->mtu,
-               .dcnt = nn->rxd_cnt,
-       };
-       struct nfp_net_ring_set tx = {
-               .n_rings = nn->num_tx_rings,
-               .dcnt = nn->txd_cnt,
-       };
+       struct bpf_prog *old_prog = nn->dp.xdp_prog;
+       struct nfp_net_dp *dp;
        int err;
 
-       if (prog && prog->xdp_adjust_head) {
-               nn_err(nn, "Does not support bpf_xdp_adjust_head()\n");
-               return -EOPNOTSUPP;
-       }
-       if (!prog && !nn->xdp_prog)
+       if (!prog && !nn->dp.xdp_prog)
                return 0;
-       if (prog && nn->xdp_prog) {
-               prog = xchg(&nn->xdp_prog, prog);
+       if (prog && nn->dp.xdp_prog) {
+               prog = xchg(&nn->dp.xdp_prog, prog);
                bpf_prog_put(prog);
-               nfp_net_xdp_offload(nn, nn->xdp_prog);
+               nfp_net_xdp_offload(nn, nn->dp.xdp_prog);
                return 0;
        }
 
-       tx.n_rings += prog ? nn->num_rx_rings : -nn->num_rx_rings;
+       dp = nfp_net_clone_dp(nn);
+       if (!dp)
+               return -ENOMEM;
+
+       dp->xdp_prog = prog;
+       dp->num_tx_rings += prog ? nn->dp.num_rx_rings : -nn->dp.num_rx_rings;
+       dp->rx_dma_dir = prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
+       if (prog)
+               dp->rx_dma_off = XDP_PACKET_HEADROOM -
+                       (nn->dp.rx_offset ?: NFP_NET_MAX_PREPEND);
+       else
+               dp->rx_dma_off = 0;
 
        /* We need RX reconfig to remap the buffers (BIDIR vs FROM_DEV) */
-       err = nfp_net_ring_reconfig(nn, &prog, &rx, &tx);
+       err = nfp_net_ring_reconfig(nn, dp);
        if (err)
                return err;
 
-       /* @prog got swapped and is now the old one */
-       if (prog)
-               bpf_prog_put(prog);
+       if (old_prog)
+               bpf_prog_put(old_prog);
 
-       nfp_net_xdp_offload(nn, nn->xdp_prog);
+       nfp_net_xdp_offload(nn, nn->dp.xdp_prog);
 
        return 0;
 }
@@ -2989,7 +2932,7 @@ static int nfp_net_xdp(struct net_device *netdev, struct netdev_xdp *xdp)
        case XDP_SETUP_PROG:
                return nfp_net_xdp_setup(nn, xdp->prog);
        case XDP_QUERY_PROG:
-               xdp->prog_attached = !!nn->xdp_prog;
+               xdp->prog_attached = !!nn->dp.xdp_prog;
                return 0;
        default:
                return -EINVAL;
@@ -3008,6 +2951,7 @@ static const struct net_device_ops nfp_net_netdev_ops = {
        .ndo_set_mac_address    = eth_mac_addr,
        .ndo_set_features       = nfp_net_set_features,
        .ndo_features_check     = nfp_net_features_check,
+       .ndo_get_phys_port_name = nfp_net_get_phys_port_name,
        .ndo_udp_tunnel_add     = nfp_net_add_vxlan_port,
        .ndo_udp_tunnel_del     = nfp_net_del_vxlan_port,
        .ndo_xdp                = nfp_net_xdp,
@@ -3020,9 +2964,9 @@ static const struct net_device_ops nfp_net_netdev_ops = {
 void nfp_net_info(struct nfp_net *nn)
 {
        nn_info(nn, "Netronome NFP-6xxx %sNetdev: TxQs=%d/%d RxQs=%d/%d\n",
-               nn->is_vf ? "VF " : "",
-               nn->num_tx_rings, nn->max_tx_rings,
-               nn->num_rx_rings, nn->max_rx_rings);
+               nn->dp.is_vf ? "VF " : "",
+               nn->dp.num_tx_rings, nn->max_tx_rings,
+               nn->dp.num_rx_rings, nn->max_rx_rings);
        nn_info(nn, "VER: %d.%d.%d.%d, Maximum supported MTU: %d\n",
                nn->fw_ver.resv, nn->fw_ver.class,
                nn->fw_ver.major, nn->fw_ver.minor,
@@ -3074,21 +3018,24 @@ struct nfp_net *nfp_net_netdev_alloc(struct pci_dev *pdev,
        SET_NETDEV_DEV(netdev, &pdev->dev);
        nn = netdev_priv(netdev);
 
-       nn->netdev = netdev;
+       nn->dp.netdev = netdev;
+       nn->dp.dev = &pdev->dev;
        nn->pdev = pdev;
 
        nn->max_tx_rings = max_tx_rings;
        nn->max_rx_rings = max_rx_rings;
 
-       nn->num_tx_rings = min_t(unsigned int, max_tx_rings, num_online_cpus());
-       nn->num_rx_rings = min_t(unsigned int, max_rx_rings,
+       nn->dp.num_tx_rings = min_t(unsigned int,
+                                   max_tx_rings, num_online_cpus());
+       nn->dp.num_rx_rings = min_t(unsigned int, max_rx_rings,
                                 netif_get_num_default_rss_queues());
 
-       nn->num_r_vecs = max(nn->num_tx_rings, nn->num_rx_rings);
-       nn->num_r_vecs = min_t(unsigned int, nn->num_r_vecs, num_online_cpus());
+       nn->dp.num_r_vecs = max(nn->dp.num_tx_rings, nn->dp.num_rx_rings);
+       nn->dp.num_r_vecs = min_t(unsigned int,
+                                 nn->dp.num_r_vecs, num_online_cpus());
 
-       nn->txd_cnt = NFP_NET_TX_DESCS_DEFAULT;
-       nn->rxd_cnt = NFP_NET_RX_DESCS_DEFAULT;
+       nn->dp.txd_cnt = NFP_NET_TX_DESCS_DEFAULT;
+       nn->dp.rxd_cnt = NFP_NET_RX_DESCS_DEFAULT;
 
        spin_lock_init(&nn->reconfig_lock);
        spin_lock_init(&nn->rx_filter_lock);
@@ -3108,7 +3055,28 @@ struct nfp_net *nfp_net_netdev_alloc(struct pci_dev *pdev,
  */
 void nfp_net_netdev_free(struct nfp_net *nn)
 {
-       free_netdev(nn->netdev);
+       free_netdev(nn->dp.netdev);
+}
+
+/**
+ * nfp_net_rss_key_sz() - Get current size of the RSS key
+ * @nn:                NFP Net device instance
+ *
+ * Return: size of the RSS key for currently selected hash function.
+ */
+unsigned int nfp_net_rss_key_sz(struct nfp_net *nn)
+{
+       switch (nn->rss_hfunc) {
+       case ETH_RSS_HASH_TOP:
+               return NFP_NET_CFG_RSS_KEY_SZ;
+       case ETH_RSS_HASH_XOR:
+               return 0;
+       case ETH_RSS_HASH_CRC32:
+               return 4;
+       }
+
+       nn_warn(nn, "Unknown hash function: %u\n", nn->rss_hfunc);
+       return 0;
 }
 
 /**
@@ -3117,14 +3085,32 @@ void nfp_net_netdev_free(struct nfp_net *nn)
  */
 static void nfp_net_rss_init(struct nfp_net *nn)
 {
-       netdev_rss_key_fill(nn->rss_key, NFP_NET_CFG_RSS_KEY_SZ);
+       unsigned long func_bit, rss_cap_hfunc;
+       u32 reg;
+
+       /* Read the RSS function capability and select first supported func */
+       reg = nn_readl(nn, NFP_NET_CFG_RSS_CAP);
+       rss_cap_hfunc = FIELD_GET(NFP_NET_CFG_RSS_CAP_HFUNC, reg);
+       if (!rss_cap_hfunc)
+               rss_cap_hfunc = FIELD_GET(NFP_NET_CFG_RSS_CAP_HFUNC,
+                                         NFP_NET_CFG_RSS_TOEPLITZ);
+
+       func_bit = find_first_bit(&rss_cap_hfunc, NFP_NET_CFG_RSS_HFUNCS);
+       if (func_bit == NFP_NET_CFG_RSS_HFUNCS) {
+               dev_warn(nn->dp.dev,
+                        "Bad RSS config, defaulting to Toeplitz hash\n");
+               func_bit = ETH_RSS_HASH_TOP_BIT;
+       }
+       nn->rss_hfunc = 1 << func_bit;
+
+       netdev_rss_key_fill(nn->rss_key, nfp_net_rss_key_sz(nn));
 
        nfp_net_rss_init_itbl(nn);
 
        /* Enable IPv4/IPv6 TCP by default */
        nn->rss_cfg = NFP_NET_CFG_RSS_IPV4_TCP |
                      NFP_NET_CFG_RSS_IPV6_TCP |
-                     NFP_NET_CFG_RSS_TOEPLITZ |
+                     FIELD_PREP(NFP_NET_CFG_RSS_HFUNC, nn->rss_hfunc) |
                      NFP_NET_CFG_RSS_MASK;
 }
 
@@ -3151,6 +3137,17 @@ int nfp_net_netdev_init(struct net_device *netdev)
        struct nfp_net *nn = netdev_priv(netdev);
        int err;
 
+       /* XDP calls for 256 byte packet headroom which wouldn't fit in a u8.
+        * We, however, reuse the metadata prepend space for XDP buffers which
+        * is at least 1 byte long and as long as XDP headroom doesn't increase
+        * above 256 the *extra* XDP headroom will fit on 8 bits.
+        */
+       BUILD_BUG_ON(XDP_PACKET_HEADROOM > 256);
+
+       nn->dp.chained_metadata_format = nn->fw_ver.major > 3;
+
+       nn->dp.rx_dma_dir = DMA_FROM_DEVICE;
+
        /* Get some of the read-only fields from the BAR */
        nn->cap = nn_readl(nn, NFP_NET_CFG_CAP);
        nn->max_mtu = nn_readl(nn, NFP_NET_CFG_MAX_MTU);
@@ -3158,17 +3155,26 @@ int nfp_net_netdev_init(struct net_device *netdev)
        nfp_net_write_mac_addr(nn);
 
        /* Determine RX packet/metadata boundary offset */
-       if (nn->fw_ver.major >= 2)
-               nn->rx_offset = nn_readl(nn, NFP_NET_CFG_RX_OFFSET);
-       else
-               nn->rx_offset = NFP_NET_RX_OFFSET;
+       if (nn->fw_ver.major >= 2) {
+               u32 reg;
+
+               reg = nn_readl(nn, NFP_NET_CFG_RX_OFFSET);
+               if (reg > NFP_NET_MAX_PREPEND) {
+                       nn_err(nn, "Invalid rx offset: %d\n", reg);
+                       return -EINVAL;
+               }
+               nn->dp.rx_offset = reg;
+       } else {
+               nn->dp.rx_offset = NFP_NET_RX_OFFSET;
+       }
 
        /* Set default MTU and Freelist buffer size */
        if (nn->max_mtu < NFP_NET_DEFAULT_MTU)
                netdev->mtu = nn->max_mtu;
        else
                netdev->mtu = NFP_NET_DEFAULT_MTU;
-       nn->fl_bufsz = nfp_net_calc_fl_bufsz(nn, netdev->mtu);
+       nn->dp.mtu = netdev->mtu;
+       nn->dp.fl_bufsz = nfp_net_calc_fl_bufsz(&nn->dp);
 
        /* Advertise/enable offloads based on capabilities
         *
@@ -3179,31 +3185,31 @@ int nfp_net_netdev_init(struct net_device *netdev)
        netdev->hw_features = NETIF_F_HIGHDMA;
        if (nn->cap & NFP_NET_CFG_CTRL_RXCSUM) {
                netdev->hw_features |= NETIF_F_RXCSUM;
-               nn->ctrl |= NFP_NET_CFG_CTRL_RXCSUM;
+               nn->dp.ctrl |= NFP_NET_CFG_CTRL_RXCSUM;
        }
        if (nn->cap & NFP_NET_CFG_CTRL_TXCSUM) {
                netdev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
-               nn->ctrl |= NFP_NET_CFG_CTRL_TXCSUM;
+               nn->dp.ctrl |= NFP_NET_CFG_CTRL_TXCSUM;
        }
        if (nn->cap & NFP_NET_CFG_CTRL_GATHER) {
                netdev->hw_features |= NETIF_F_SG;
-               nn->ctrl |= NFP_NET_CFG_CTRL_GATHER;
+               nn->dp.ctrl |= NFP_NET_CFG_CTRL_GATHER;
        }
        if ((nn->cap & NFP_NET_CFG_CTRL_LSO) && nn->fw_ver.major > 2) {
                netdev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6;
-               nn->ctrl |= NFP_NET_CFG_CTRL_LSO;
+               nn->dp.ctrl |= NFP_NET_CFG_CTRL_LSO;
        }
        if (nn->cap & NFP_NET_CFG_CTRL_RSS) {
                netdev->hw_features |= NETIF_F_RXHASH;
                nfp_net_rss_init(nn);
-               nn->ctrl |= NFP_NET_CFG_CTRL_RSS;
+               nn->dp.ctrl |= NFP_NET_CFG_CTRL_RSS;
        }
        if (nn->cap & NFP_NET_CFG_CTRL_VXLAN &&
            nn->cap & NFP_NET_CFG_CTRL_NVGRE) {
                if (nn->cap & NFP_NET_CFG_CTRL_LSO)
                        netdev->hw_features |= NETIF_F_GSO_GRE |
                                               NETIF_F_GSO_UDP_TUNNEL;
-               nn->ctrl |= NFP_NET_CFG_CTRL_VXLAN | NFP_NET_CFG_CTRL_NVGRE;
+               nn->dp.ctrl |= NFP_NET_CFG_CTRL_VXLAN | NFP_NET_CFG_CTRL_NVGRE;
 
                netdev->hw_enc_features = netdev->hw_features;
        }
@@ -3212,11 +3218,11 @@ int nfp_net_netdev_init(struct net_device *netdev)
 
        if (nn->cap & NFP_NET_CFG_CTRL_RXVLAN) {
                netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX;
-               nn->ctrl |= NFP_NET_CFG_CTRL_RXVLAN;
+               nn->dp.ctrl |= NFP_NET_CFG_CTRL_RXVLAN;
        }
        if (nn->cap & NFP_NET_CFG_CTRL_TXVLAN) {
                netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX;
-               nn->ctrl |= NFP_NET_CFG_CTRL_TXVLAN;
+               nn->dp.ctrl |= NFP_NET_CFG_CTRL_TXVLAN;
        }
 
        netdev->features = netdev->hw_features;
@@ -3229,14 +3235,14 @@ int nfp_net_netdev_init(struct net_device *netdev)
 
        /* Allow L2 Broadcast and Multicast through by default, if supported */
        if (nn->cap & NFP_NET_CFG_CTRL_L2BC)
-               nn->ctrl |= NFP_NET_CFG_CTRL_L2BC;
+               nn->dp.ctrl |= NFP_NET_CFG_CTRL_L2BC;
        if (nn->cap & NFP_NET_CFG_CTRL_L2MC)
-               nn->ctrl |= NFP_NET_CFG_CTRL_L2MC;
+               nn->dp.ctrl |= NFP_NET_CFG_CTRL_L2MC;
 
        /* Allow IRQ moderation, if supported */
        if (nn->cap & NFP_NET_CFG_CTRL_IRQMOD) {
                nfp_net_irqmod_init(nn);
-               nn->ctrl |= NFP_NET_CFG_CTRL_IRQMOD;
+               nn->dp.ctrl |= NFP_NET_CFG_CTRL_IRQMOD;
        }
 
        /* Stash the re-configuration queue away.  First odd queue in TX Bar */
@@ -3275,9 +3281,9 @@ void nfp_net_netdev_clean(struct net_device *netdev)
 {
        struct nfp_net *nn = netdev_priv(netdev);
 
-       if (nn->xdp_prog)
-               bpf_prog_put(nn->xdp_prog);
-       if (nn->bpf_offload_xdp)
+       if (nn->dp.xdp_prog)
+               bpf_prog_put(nn->dp.xdp_prog);
+       if (nn->dp.bpf_offload_xdp)
                nfp_net_xdp_offload(nn, NULL);
-       unregister_netdev(nn->netdev);
+       unregister_netdev(nn->dp.netdev);
 }
index 385ba355c965c35cf81ecd09f25e3c70c29b76e7..71d86171b4eeca4ecc2ad17369a8926584af9002 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2015 Netronome Systems, Inc.
+ * Copyright (C) 2015-2017 Netronome Systems, Inc.
  *
  * This software is dual licensed under the GNU General License Version 2,
  * June 1991 as shown in the file COPYING in the top-level directory of this
 #define NFP_NET_CFG_RX_OFFSET          0x0050
 #define NFP_NET_CFG_RX_OFFSET_DYNAMIC          0       /* Prepend mode */
 
+/**
+ * RSS capabilities
+ * @NFP_NET_CFG_RSS_CAP_HFUNC: supported hash functions (same bits as
+ *                             @NFP_NET_CFG_RSS_HFUNC)
+ */
+#define NFP_NET_CFG_RSS_CAP            0x0054
+#define   NFP_NET_CFG_RSS_CAP_HFUNC      0xff000000
+
 /**
  * VXLAN/UDP encap configuration
  * @NFP_NET_CFG_VXLAN_PORT:    Base address of table of tunnels' UDP dst ports
 #define   NFP_NET_CFG_RSS_IPV4_UDP        (1 << 11) /* RSS for IPv4/UDP */
 #define   NFP_NET_CFG_RSS_IPV6_TCP        (1 << 12) /* RSS for IPv6/TCP */
 #define   NFP_NET_CFG_RSS_IPV6_UDP        (1 << 13) /* RSS for IPv6/UDP */
+#define   NFP_NET_CFG_RSS_HFUNC                  0xff000000
 #define   NFP_NET_CFG_RSS_TOEPLITZ        (1 << 24) /* Use Toeplitz hash */
+#define   NFP_NET_CFG_RSS_XOR            (1 << 25) /* Use XOR as hash */
+#define   NFP_NET_CFG_RSS_CRC32                  (1 << 26) /* Use CRC32 as hash */
+#define   NFP_NET_CFG_RSS_HFUNCS         3
 #define NFP_NET_CFG_RSS_KEY             (NFP_NET_CFG_RSS_BASE + 0x4)
 #define NFP_NET_CFG_RSS_KEY_SZ          0x28
 #define NFP_NET_CFG_RSS_ITBL            (NFP_NET_CFG_RSS_BASE + 0x4 + \
index 6e9372a1837579928bb24b5435e2062dc0c534b8..4077c59bf782ea595420c1b72ad86f72ebbac1a4 100644 (file)
@@ -40,9 +40,9 @@ static struct dentry *nfp_dir;
 
 static int nfp_net_debugfs_rx_q_read(struct seq_file *file, void *data)
 {
-       int fl_rd_p, fl_wr_p, rx_rd_p, rx_wr_p, rxd_cnt;
        struct nfp_net_r_vector *r_vec = file->private;
        struct nfp_net_rx_ring *rx_ring;
+       int fl_rd_p, fl_wr_p, rxd_cnt;
        struct nfp_net_rx_desc *rxd;
        struct nfp_net *nn;
        void *frag;
@@ -54,19 +54,18 @@ static int nfp_net_debugfs_rx_q_read(struct seq_file *file, void *data)
                goto out;
        nn = r_vec->nfp_net;
        rx_ring = r_vec->rx_ring;
-       if (!netif_running(nn->netdev))
+       if (!netif_running(nn->dp.netdev))
                goto out;
 
        rxd_cnt = rx_ring->cnt;
 
        fl_rd_p = nfp_qcp_rd_ptr_read(rx_ring->qcp_fl);
        fl_wr_p = nfp_qcp_wr_ptr_read(rx_ring->qcp_fl);
-       rx_rd_p = nfp_qcp_rd_ptr_read(rx_ring->qcp_rx);
-       rx_wr_p = nfp_qcp_wr_ptr_read(rx_ring->qcp_rx);
 
-       seq_printf(file, "RX[%02d]: H_RD=%d H_WR=%d FL_RD=%d FL_WR=%d RX_RD=%d RX_WR=%d\n",
-                  rx_ring->idx, rx_ring->rd_p, rx_ring->wr_p,
-                  fl_rd_p, fl_wr_p, rx_rd_p, rx_wr_p);
+       seq_printf(file, "RX[%02d,%02d]: cnt=%d dma=%pad host=%p   H_RD=%d H_WR=%d FL_RD=%d FL_WR=%d\n",
+                  rx_ring->idx, rx_ring->fl_qcidx,
+                  rx_ring->cnt, &rx_ring->dma, rx_ring->rxds,
+                  rx_ring->rd_p, rx_ring->wr_p, fl_rd_p, fl_wr_p);
 
        for (i = 0; i < rxd_cnt; i++) {
                rxd = &rx_ring->rxds[i];
@@ -89,10 +88,6 @@ static int nfp_net_debugfs_rx_q_read(struct seq_file *file, void *data)
                        seq_puts(file, " FL_RD");
                if (i == fl_wr_p % rxd_cnt)
                        seq_puts(file, " FL_WR");
-               if (i == rx_rd_p % rxd_cnt)
-                       seq_puts(file, " RX_RD");
-               if (i == rx_wr_p % rxd_cnt)
-                       seq_puts(file, " RX_WR");
 
                seq_putc(file, '\n');
        }
@@ -143,7 +138,7 @@ static int nfp_net_debugfs_tx_q_read(struct seq_file *file, void *data)
        if (!r_vec->nfp_net || !tx_ring)
                goto out;
        nn = r_vec->nfp_net;
-       if (!netif_running(nn->netdev))
+       if (!netif_running(nn->dp.netdev))
                goto out;
 
        txd_cnt = tx_ring->cnt;
@@ -151,8 +146,11 @@ static int nfp_net_debugfs_tx_q_read(struct seq_file *file, void *data)
        d_rd_p = nfp_qcp_rd_ptr_read(tx_ring->qcp_q);
        d_wr_p = nfp_qcp_wr_ptr_read(tx_ring->qcp_q);
 
-       seq_printf(file, "TX[%02d]: H_RD=%d H_WR=%d D_RD=%d D_WR=%d\n",
-                  tx_ring->idx, tx_ring->rd_p, tx_ring->wr_p, d_rd_p, d_wr_p);
+       seq_printf(file, "TX[%02d,%02d%s]: cnt=%d dma=%pad host=%p   H_RD=%d H_WR=%d D_RD=%d D_WR=%d\n",
+                  tx_ring->idx, tx_ring->qcidx,
+                  tx_ring == r_vec->tx_ring ? "" : "xdp",
+                  tx_ring->cnt, &tx_ring->dma, tx_ring->txds,
+                  tx_ring->rd_p, tx_ring->wr_p, d_rd_p, d_wr_p);
 
        for (i = 0; i < txd_cnt; i++) {
                txd = &tx_ring->txds[i];
index 2649f7523c81f11ddbb9c0b9bdba78dd220d7c6a..ed22a813e5791af7ed1ef8783f02f50779d69e30 100644 (file)
@@ -40,6 +40,7 @@
  *          Brad Petrus <brad.petrus@netronome.com>
  */
 
+#include <linux/bitfield.h>
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
@@ -126,9 +127,9 @@ static const struct _nfp_net_et_stats nfp_net_et_stats[] = {
 };
 
 #define NN_ET_GLOBAL_STATS_LEN ARRAY_SIZE(nfp_net_et_stats)
-#define NN_ET_RVEC_STATS_LEN (nn->num_r_vecs * 3)
+#define NN_ET_RVEC_STATS_LEN (nn->dp.num_r_vecs * 3)
 #define NN_ET_RVEC_GATHER_STATS 7
-#define NN_ET_QUEUE_STATS_LEN ((nn->num_tx_rings + nn->num_rx_rings) * 2)
+#define NN_ET_QUEUE_STATS_LEN ((nn->dp.num_tx_rings + nn->dp.num_rx_rings) * 2)
 #define NN_ET_STATS_LEN (NN_ET_GLOBAL_STATS_LEN + NN_ET_RVEC_GATHER_STATS + \
                         NN_ET_RVEC_STATS_LEN + NN_ET_QUEUE_STATS_LEN)
 
@@ -179,30 +180,22 @@ static void nfp_net_get_ringparam(struct net_device *netdev,
 
        ring->rx_max_pending = NFP_NET_MAX_RX_DESCS;
        ring->tx_max_pending = NFP_NET_MAX_TX_DESCS;
-       ring->rx_pending = nn->rxd_cnt;
-       ring->tx_pending = nn->txd_cnt;
+       ring->rx_pending = nn->dp.rxd_cnt;
+       ring->tx_pending = nn->dp.txd_cnt;
 }
 
 static int nfp_net_set_ring_size(struct nfp_net *nn, u32 rxd_cnt, u32 txd_cnt)
 {
-       struct nfp_net_ring_set *reconfig_rx = NULL, *reconfig_tx = NULL;
-       struct nfp_net_ring_set rx = {
-               .n_rings = nn->num_rx_rings,
-               .mtu = nn->netdev->mtu,
-               .dcnt = rxd_cnt,
-       };
-       struct nfp_net_ring_set tx = {
-               .n_rings = nn->num_tx_rings,
-               .dcnt = txd_cnt,
-       };
+       struct nfp_net_dp *dp;
+
+       dp = nfp_net_clone_dp(nn);
+       if (!dp)
+               return -ENOMEM;
 
-       if (nn->rxd_cnt != rxd_cnt)
-               reconfig_rx = &rx;
-       if (nn->txd_cnt != txd_cnt)
-               reconfig_tx = &tx;
+       dp->rxd_cnt = rxd_cnt;
+       dp->txd_cnt = txd_cnt;
 
-       return nfp_net_ring_reconfig(nn, &nn->xdp_prog,
-                                    reconfig_rx, reconfig_tx);
+       return nfp_net_ring_reconfig(nn, dp);
 }
 
 static int nfp_net_set_ringparam(struct net_device *netdev,
@@ -223,11 +216,11 @@ static int nfp_net_set_ringparam(struct net_device *netdev,
            txd_cnt < NFP_NET_MIN_TX_DESCS || txd_cnt > NFP_NET_MAX_TX_DESCS)
                return -EINVAL;
 
-       if (nn->rxd_cnt == rxd_cnt && nn->txd_cnt == txd_cnt)
+       if (nn->dp.rxd_cnt == rxd_cnt && nn->dp.txd_cnt == txd_cnt)
                return 0;
 
        nn_dbg(nn, "Change ring size: RxQ %u->%u, TxQ %u->%u\n",
-              nn->rxd_cnt, rxd_cnt, nn->txd_cnt, txd_cnt);
+              nn->dp.rxd_cnt, rxd_cnt, nn->dp.txd_cnt, txd_cnt);
 
        return nfp_net_set_ring_size(nn, rxd_cnt, txd_cnt);
 }
@@ -245,7 +238,7 @@ static void nfp_net_get_strings(struct net_device *netdev,
                        memcpy(p, nfp_net_et_stats[i].name, ETH_GSTRING_LEN);
                        p += ETH_GSTRING_LEN;
                }
-               for (i = 0; i < nn->num_r_vecs; i++) {
+               for (i = 0; i < nn->dp.num_r_vecs; i++) {
                        sprintf(p, "rvec_%u_rx_pkts", i);
                        p += ETH_GSTRING_LEN;
                        sprintf(p, "rvec_%u_tx_pkts", i);
@@ -267,13 +260,13 @@ static void nfp_net_get_strings(struct net_device *netdev,
                p += ETH_GSTRING_LEN;
                strncpy(p, "tx_lso", ETH_GSTRING_LEN);
                p += ETH_GSTRING_LEN;
-               for (i = 0; i < nn->num_tx_rings; i++) {
+               for (i = 0; i < nn->dp.num_tx_rings; i++) {
                        sprintf(p, "txq_%u_pkts", i);
                        p += ETH_GSTRING_LEN;
                        sprintf(p, "txq_%u_bytes", i);
                        p += ETH_GSTRING_LEN;
                }
-               for (i = 0; i < nn->num_rx_rings; i++) {
+               for (i = 0; i < nn->dp.num_rx_rings; i++) {
                        sprintf(p, "rxq_%u_pkts", i);
                        p += ETH_GSTRING_LEN;
                        sprintf(p, "rxq_%u_bytes", i);
@@ -306,12 +299,12 @@ static void nfp_net_get_stats(struct net_device *netdev,
                        break;
 
                case NFP_NET_DEV_ET_STATS:
-                       io_p = nn->ctrl_bar + nfp_net_et_stats[i].off;
+                       io_p = nn->dp.ctrl_bar + nfp_net_et_stats[i].off;
                        data[i] = readq(io_p);
                        break;
                }
        }
-       for (j = 0; j < nn->num_r_vecs; j++) {
+       for (j = 0; j < nn->dp.num_r_vecs; j++) {
                unsigned int start;
 
                do {
@@ -337,16 +330,16 @@ static void nfp_net_get_stats(struct net_device *netdev,
        }
        for (j = 0; j < NN_ET_RVEC_GATHER_STATS; j++)
                data[i++] = gathered_stats[j];
-       for (j = 0; j < nn->num_tx_rings; j++) {
-               io_p = nn->ctrl_bar + NFP_NET_CFG_TXR_STATS(j);
+       for (j = 0; j < nn->dp.num_tx_rings; j++) {
+               io_p = nn->dp.ctrl_bar + NFP_NET_CFG_TXR_STATS(j);
                data[i++] = readq(io_p);
-               io_p = nn->ctrl_bar + NFP_NET_CFG_TXR_STATS(j) + 8;
+               io_p = nn->dp.ctrl_bar + NFP_NET_CFG_TXR_STATS(j) + 8;
                data[i++] = readq(io_p);
        }
-       for (j = 0; j < nn->num_rx_rings; j++) {
-               io_p = nn->ctrl_bar + NFP_NET_CFG_RXR_STATS(j);
+       for (j = 0; j < nn->dp.num_rx_rings; j++) {
+               io_p = nn->dp.ctrl_bar + NFP_NET_CFG_RXR_STATS(j);
                data[i++] = readq(io_p);
-               io_p = nn->ctrl_bar + NFP_NET_CFG_RXR_STATS(j) + 8;
+               io_p = nn->dp.ctrl_bar + NFP_NET_CFG_RXR_STATS(j) + 8;
                data[i++] = readq(io_p);
        }
 }
@@ -410,7 +403,7 @@ static int nfp_net_get_rxnfc(struct net_device *netdev,
 
        switch (cmd->cmd) {
        case ETHTOOL_GRXRINGS:
-               cmd->data = nn->num_rx_rings;
+               cmd->data = nn->dp.num_rx_rings;
                return 0;
        case ETHTOOL_GRXFH:
                return nfp_net_get_rss_hash_opts(nn, cmd);
@@ -454,13 +447,13 @@ static int nfp_net_set_rss_hash_opt(struct nfp_net *nn,
                return -EINVAL;
        }
 
-       new_rss_cfg |= NFP_NET_CFG_RSS_TOEPLITZ;
+       new_rss_cfg |= FIELD_PREP(NFP_NET_CFG_RSS_HFUNC, nn->rss_hfunc);
        new_rss_cfg |= NFP_NET_CFG_RSS_MASK;
 
        if (new_rss_cfg == nn->rss_cfg)
                return 0;
 
-       writel(new_rss_cfg, nn->ctrl_bar + NFP_NET_CFG_RSS_CTRL);
+       writel(new_rss_cfg, nn->dp.ctrl_bar + NFP_NET_CFG_RSS_CTRL);
        err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_RSS);
        if (err)
                return err;
@@ -496,7 +489,12 @@ static u32 nfp_net_get_rxfh_indir_size(struct net_device *netdev)
 
 static u32 nfp_net_get_rxfh_key_size(struct net_device *netdev)
 {
-       return NFP_NET_CFG_RSS_KEY_SZ;
+       struct nfp_net *nn = netdev_priv(netdev);
+
+       if (!(nn->cap & NFP_NET_CFG_CTRL_RSS))
+               return -EOPNOTSUPP;
+
+       return nfp_net_rss_key_sz(nn);
 }
 
 static int nfp_net_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
@@ -512,9 +510,12 @@ static int nfp_net_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
                for (i = 0; i < ARRAY_SIZE(nn->rss_itbl); i++)
                        indir[i] = nn->rss_itbl[i];
        if (key)
-               memcpy(key, nn->rss_key, NFP_NET_CFG_RSS_KEY_SZ);
-       if (hfunc)
-               *hfunc = ETH_RSS_HASH_TOP;
+               memcpy(key, nn->rss_key, nfp_net_rss_key_sz(nn));
+       if (hfunc) {
+               *hfunc = nn->rss_hfunc;
+               if (*hfunc >= 1 << ETH_RSS_HASH_FUNCS_COUNT)
+                       *hfunc = ETH_RSS_HASH_UNKNOWN;
+       }
 
        return 0;
 }
@@ -527,14 +528,14 @@ static int nfp_net_set_rxfh(struct net_device *netdev,
        int i;
 
        if (!(nn->cap & NFP_NET_CFG_CTRL_RSS) ||
-           !(hfunc == ETH_RSS_HASH_NO_CHANGE || hfunc == ETH_RSS_HASH_TOP))
+           !(hfunc == ETH_RSS_HASH_NO_CHANGE || hfunc == nn->rss_hfunc))
                return -EOPNOTSUPP;
 
        if (!key && !indir)
                return 0;
 
        if (key) {
-               memcpy(nn->rss_key, key, NFP_NET_CFG_RSS_KEY_SZ);
+               memcpy(nn->rss_key, key, nfp_net_rss_key_sz(nn));
                nfp_net_rss_write_key(nn);
        }
        if (indir) {
@@ -564,7 +565,7 @@ static void nfp_net_get_regs(struct net_device *netdev,
        regs->version = nn_readl(nn, NFP_NET_CFG_VERSION);
 
        for (i = 0; i < NFP_NET_CFG_BAR_SZ / sizeof(u32); i++)
-               regs_buf[i] = readl(nn->ctrl_bar + (i * sizeof(u32)));
+               regs_buf[i] = readl(nn->dp.ctrl_bar + (i * sizeof(u32)));
 }
 
 static int nfp_net_get_coalesce(struct net_device *netdev,
@@ -736,16 +737,16 @@ static void nfp_net_get_channels(struct net_device *netdev,
        struct nfp_net *nn = netdev_priv(netdev);
        unsigned int num_tx_rings;
 
-       num_tx_rings = nn->num_tx_rings;
-       if (nn->xdp_prog)
-               num_tx_rings -= nn->num_rx_rings;
+       num_tx_rings = nn->dp.num_tx_rings;
+       if (nn->dp.xdp_prog)
+               num_tx_rings -= nn->dp.num_rx_rings;
 
        channel->max_rx = min(nn->max_rx_rings, nn->max_r_vecs);
        channel->max_tx = min(nn->max_tx_rings, nn->max_r_vecs);
        channel->max_combined = min(channel->max_rx, channel->max_tx);
        channel->max_other = NFP_NET_NON_Q_VECTORS;
-       channel->combined_count = min(nn->num_rx_rings, num_tx_rings);
-       channel->rx_count = nn->num_rx_rings - channel->combined_count;
+       channel->combined_count = min(nn->dp.num_rx_rings, num_tx_rings);
+       channel->rx_count = nn->dp.num_rx_rings - channel->combined_count;
        channel->tx_count = num_tx_rings - channel->combined_count;
        channel->other_count = NFP_NET_NON_Q_VECTORS;
 }
@@ -753,29 +754,19 @@ static void nfp_net_get_channels(struct net_device *netdev,
 static int nfp_net_set_num_rings(struct nfp_net *nn, unsigned int total_rx,
                                 unsigned int total_tx)
 {
-       struct nfp_net_ring_set *reconfig_rx = NULL, *reconfig_tx = NULL;
-       struct nfp_net_ring_set rx = {
-               .n_rings = total_rx,
-               .mtu = nn->netdev->mtu,
-               .dcnt = nn->rxd_cnt,
-       };
-       struct nfp_net_ring_set tx = {
-               .n_rings = total_tx,
-               .dcnt = nn->txd_cnt,
-       };
+       struct nfp_net_dp *dp;
 
-       if (nn->num_rx_rings != total_rx)
-               reconfig_rx = &rx;
-       if (nn->num_stack_tx_rings != total_tx ||
-           (nn->xdp_prog && reconfig_rx))
-               reconfig_tx = &tx;
+       dp = nfp_net_clone_dp(nn);
+       if (!dp)
+               return -ENOMEM;
 
-       /* nfp_net_check_config() will catch tx.n_rings > nn->max_tx_rings */
-       if (nn->xdp_prog)
-               tx.n_rings += total_rx;
+       dp->num_rx_rings = total_rx;
+       dp->num_tx_rings = total_tx;
+       /* nfp_net_check_config() will catch num_tx_rings > nn->max_tx_rings */
+       if (dp->xdp_prog)
+               dp->num_tx_rings += total_rx;
 
-       return nfp_net_ring_reconfig(nn, &nn->xdp_prog,
-                                    reconfig_rx, reconfig_tx);
+       return nfp_net_ring_reconfig(nn, dp);
 }
 
 static int nfp_net_set_channels(struct net_device *netdev,
index 3afcdc11480c82c7d19f2252cae29a40066cfef8..2025cb7c6d90599078acbf5b9929ee1e5eb47c15 100644 (file)
@@ -130,7 +130,7 @@ err_area:
 }
 
 static void
-nfp_net_get_mac_addr_hwinfo(struct nfp_net *nn, struct nfp_cpp *cpp,
+nfp_net_get_mac_addr_hwinfo(struct nfp_net_dp *dp, struct nfp_cpp *cpp,
                            unsigned int id)
 {
        u8 mac_addr[ETH_ALEN];
@@ -141,23 +141,22 @@ nfp_net_get_mac_addr_hwinfo(struct nfp_net *nn, struct nfp_cpp *cpp,
 
        mac_str = nfp_hwinfo_lookup(cpp, name);
        if (!mac_str) {
-               dev_warn(&nn->pdev->dev,
-                        "Can't lookup MAC address. Generate\n");
-               eth_hw_addr_random(nn->netdev);
+               dev_warn(dp->dev, "Can't lookup MAC address. Generate\n");
+               eth_hw_addr_random(dp->netdev);
                return;
        }
 
        if (sscanf(mac_str, "%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx",
                   &mac_addr[0], &mac_addr[1], &mac_addr[2],
                   &mac_addr[3], &mac_addr[4], &mac_addr[5]) != 6) {
-               dev_warn(&nn->pdev->dev,
+               dev_warn(dp->dev,
                         "Can't parse MAC address (%s). Generate.\n", mac_str);
-               eth_hw_addr_random(nn->netdev);
+               eth_hw_addr_random(dp->netdev);
                return;
        }
 
-       ether_addr_copy(nn->netdev->dev_addr, mac_addr);
-       ether_addr_copy(nn->netdev->perm_addr, mac_addr);
+       ether_addr_copy(dp->netdev->dev_addr, mac_addr);
+       ether_addr_copy(dp->netdev->perm_addr, mac_addr);
 }
 
 /**
@@ -178,12 +177,14 @@ nfp_net_get_mac_addr(struct nfp_net *nn, struct nfp_pf *pf, unsigned int id)
                if (pf->eth_tbl->ports[i].eth_index == id) {
                        const u8 *mac_addr = pf->eth_tbl->ports[i].mac_addr;
 
-                       ether_addr_copy(nn->netdev->dev_addr, mac_addr);
-                       ether_addr_copy(nn->netdev->perm_addr, mac_addr);
+                       nn->eth_port = &pf->eth_tbl->ports[i];
+
+                       ether_addr_copy(nn->dp.netdev->dev_addr, mac_addr);
+                       ether_addr_copy(nn->dp.netdev->perm_addr, mac_addr);
                        return;
                }
 
-       nfp_net_get_mac_addr_hwinfo(nn, pf->cpp, id);
+       nfp_net_get_mac_addr_hwinfo(&nn->dp, pf->cpp, id);
 }
 
 static unsigned int nfp_net_pf_get_num_ports(struct nfp_pf *pf)
@@ -305,10 +306,10 @@ nfp_net_pf_alloc_port_netdev(struct nfp_pf *pf, void __iomem *ctrl_bar,
 
        nn->cpp = pf->cpp;
        nn->fw_ver = *fw_ver;
-       nn->ctrl_bar = ctrl_bar;
+       nn->dp.ctrl_bar = ctrl_bar;
        nn->tx_bar = tx_bar;
        nn->rx_bar = rx_bar;
-       nn->is_vf = 0;
+       nn->dp.is_vf = 0;
        nn->stride_rx = stride;
        nn->stride_tx = stride;
 
@@ -330,7 +331,7 @@ nfp_net_pf_init_port_netdev(struct nfp_pf *pf, struct nfp_net *nn,
         */
        nn->me_freq_mhz = 1200;
 
-       err = nfp_net_netdev_init(nn->netdev);
+       err = nfp_net_netdev_init(nn->dp.netdev);
        if (err)
                return err;
 
@@ -399,7 +400,7 @@ nfp_net_pf_spawn_netdevs(struct nfp_pf *pf,
        /* Get MSI-X vectors */
        wanted_irqs = 0;
        list_for_each_entry(nn, &pf->ports, port_list)
-               wanted_irqs += NFP_NET_NON_Q_VECTORS + nn->num_r_vecs;
+               wanted_irqs += NFP_NET_NON_Q_VECTORS + nn->dp.num_r_vecs;
        pf->irq_entries = kcalloc(wanted_irqs, sizeof(*pf->irq_entries),
                                  GFP_KERNEL);
        if (!pf->irq_entries) {
@@ -444,7 +445,7 @@ nfp_net_pf_spawn_netdevs(struct nfp_pf *pf,
 err_prev_deinit:
        list_for_each_entry_continue_reverse(nn, &pf->ports, port_list) {
                nfp_net_debugfs_dir_clean(&nn->debugfs_dir);
-               nfp_net_netdev_clean(nn->netdev);
+               nfp_net_netdev_clean(nn->dp.netdev);
        }
        nfp_net_irqs_disable(pf->pdev);
 err_vec_free:
@@ -570,7 +571,7 @@ void nfp_net_pci_remove(struct nfp_pf *pf)
        list_for_each_entry(nn, &pf->ports, port_list) {
                nfp_net_debugfs_dir_clean(&nn->debugfs_dir);
 
-               nfp_net_netdev_clean(nn->netdev);
+               nfp_net_netdev_clean(nn->dp.netdev);
        }
 
        nfp_net_pf_free_netdevs(pf);
index 18a851eb35084397dd6fa003b3def76ed42a6960..b5b6f69d1e0f8489aa91284052dca2eb092d0135 100644 (file)
@@ -58,7 +58,7 @@ void nfp_net_filter_stats_timer(unsigned long data)
 
        spin_lock_bh(&nn->rx_filter_lock);
 
-       if (nn->ctrl & NFP_NET_CFG_CTRL_BPF)
+       if (nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF)
                mod_timer(&nn->rx_filter_stats_timer,
                          jiffies + NFP_NET_STAT_POLL_IVL);
 
@@ -132,7 +132,7 @@ nfp_net_bpf_get_act(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf)
                        return NN_ACT_TC_DROP;
 
                if (is_tcf_mirred_egress_redirect(a) &&
-                   tcf_mirred_ifindex(a) == nn->netdev->ifindex)
+                   tcf_mirred_ifindex(a) == nn->dp.netdev->ifindex)
                        return NN_ACT_TC_REDIR;
        }
 
@@ -160,7 +160,7 @@ nfp_net_bpf_offload_prepare(struct nfp_net *nn,
        act = ret;
 
        max_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32;
-       if (max_mtu < nn->netdev->mtu) {
+       if (max_mtu < nn->dp.netdev->mtu) {
                nn_info(nn, "BPF offload not supported with MTU larger than HW packet split boundary\n");
                return -ENOTSUPP;
        }
@@ -168,8 +168,7 @@ nfp_net_bpf_offload_prepare(struct nfp_net *nn,
        start_off = nn_readw(nn, NFP_NET_CFG_BPF_START);
        done_off = nn_readw(nn, NFP_NET_CFG_BPF_DONE);
 
-       *code = dma_zalloc_coherent(&nn->pdev->dev, code_sz, dma_addr,
-                                   GFP_KERNEL);
+       *code = dma_zalloc_coherent(nn->dp.dev, code_sz, dma_addr, GFP_KERNEL);
        if (!*code)
                return -ENOMEM;
 
@@ -181,7 +180,7 @@ nfp_net_bpf_offload_prepare(struct nfp_net *nn,
        return 0;
 
 out:
-       dma_free_coherent(&nn->pdev->dev, code_sz, *code, *dma_addr);
+       dma_free_coherent(nn->dp.dev, code_sz, *code, *dma_addr);
        return ret;
 }
 
@@ -194,7 +193,7 @@ nfp_net_bpf_load_and_start(struct nfp_net *nn, u32 tc_flags,
        u64 bpf_addr = dma_addr;
        int err;
 
-       nn->bpf_offload_skip_sw = !!(tc_flags & TCA_CLS_FLAGS_SKIP_SW);
+       nn->dp.bpf_offload_skip_sw = !!(tc_flags & TCA_CLS_FLAGS_SKIP_SW);
 
        if (dense_mode)
                bpf_addr |= NFP_NET_CFG_BPF_CFG_8CTX;
@@ -208,13 +207,13 @@ nfp_net_bpf_load_and_start(struct nfp_net *nn, u32 tc_flags,
                nn_err(nn, "FW command error while loading BPF: %d\n", err);
 
        /* Enable passing packets through BPF function */
-       nn->ctrl |= NFP_NET_CFG_CTRL_BPF;
-       nn_writel(nn, NFP_NET_CFG_CTRL, nn->ctrl);
+       nn->dp.ctrl |= NFP_NET_CFG_CTRL_BPF;
+       nn_writel(nn, NFP_NET_CFG_CTRL, nn->dp.ctrl);
        err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN);
        if (err)
                nn_err(nn, "FW command error while enabling BPF: %d\n", err);
 
-       dma_free_coherent(&nn->pdev->dev, code_sz, code, dma_addr);
+       dma_free_coherent(nn->dp.dev, code_sz, code, dma_addr);
 
        nfp_net_bpf_stats_reset(nn);
        mod_timer(&nn->rx_filter_stats_timer, jiffies + NFP_NET_STAT_POLL_IVL);
@@ -222,16 +221,16 @@ nfp_net_bpf_load_and_start(struct nfp_net *nn, u32 tc_flags,
 
 static int nfp_net_bpf_stop(struct nfp_net *nn)
 {
-       if (!(nn->ctrl & NFP_NET_CFG_CTRL_BPF))
+       if (!(nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF))
                return 0;
 
        spin_lock_bh(&nn->rx_filter_lock);
-       nn->ctrl &= ~NFP_NET_CFG_CTRL_BPF;
+       nn->dp.ctrl &= ~NFP_NET_CFG_CTRL_BPF;
        spin_unlock_bh(&nn->rx_filter_lock);
-       nn_writel(nn, NFP_NET_CFG_CTRL, nn->ctrl);
+       nn_writel(nn, NFP_NET_CFG_CTRL, nn->dp.ctrl);
 
        del_timer_sync(&nn->rx_filter_stats_timer);
-       nn->bpf_offload_skip_sw = 0;
+       nn->dp.bpf_offload_skip_sw = 0;
 
        return nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN);
 }
@@ -255,7 +254,7 @@ int nfp_net_bpf_offload(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf)
                 * frames which didn't have BPF applied in the hardware should
                 * be fine if software fallback is available, though.
                 */
-               if (nn->bpf_offload_skip_sw)
+               if (nn->dp.bpf_offload_skip_sw)
                        return -EBUSY;
 
                err = nfp_net_bpf_offload_prepare(nn, cls_bpf, &res, &code,
@@ -270,7 +269,7 @@ int nfp_net_bpf_offload(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf)
                return 0;
 
        case TC_CLSBPF_ADD:
-               if (nn->ctrl & NFP_NET_CFG_CTRL_BPF)
+               if (nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF)
                        return -EBUSY;
 
                err = nfp_net_bpf_offload_prepare(nn, cls_bpf, &res, &code,
index 39407f7cc586c948319b963aae3fb455a7a6cb8f..86e61be6f35c11f8c7b932844b15c989cd6db079 100644 (file)
@@ -84,12 +84,12 @@ static void nfp_netvf_get_mac_addr(struct nfp_net *nn)
        put_unaligned_be16(nn_readw(nn, NFP_NET_CFG_MACADDR + 6), &mac_addr[4]);
 
        if (!is_valid_ether_addr(mac_addr)) {
-               eth_hw_addr_random(nn->netdev);
+               eth_hw_addr_random(nn->dp.netdev);
                return;
        }
 
-       ether_addr_copy(nn->netdev->dev_addr, mac_addr);
-       ether_addr_copy(nn->netdev->perm_addr, mac_addr);
+       ether_addr_copy(nn->dp.netdev->dev_addr, mac_addr);
+       ether_addr_copy(nn->dp.netdev->perm_addr, mac_addr);
 }
 
 static int nfp_netvf_pci_probe(struct pci_dev *pdev,
@@ -210,8 +210,8 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev,
        vf->nn = nn;
 
        nn->fw_ver = fw_ver;
-       nn->ctrl_bar = ctrl_bar;
-       nn->is_vf = 1;
+       nn->dp.ctrl_bar = ctrl_bar;
+       nn->dp.is_vf = 1;
        nn->stride_tx = stride;
        nn->stride_rx = stride;
 
@@ -268,7 +268,8 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev,
 
        num_irqs = nfp_net_irqs_alloc(pdev, vf->irq_entries,
                                      NFP_NET_MIN_PORT_IRQS,
-                                     NFP_NET_NON_Q_VECTORS + nn->num_r_vecs);
+                                     NFP_NET_NON_Q_VECTORS +
+                                     nn->dp.num_r_vecs);
        if (!num_irqs) {
                nn_warn(nn, "Unable to allocate MSI-X Vectors. Exiting\n");
                err = -EIO;
@@ -282,7 +283,7 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev,
         */
        nn->me_freq_mhz = 1200;
 
-       err = nfp_net_netdev_init(nn->netdev);
+       err = nfp_net_netdev_init(nn->dp.netdev);
        if (err)
                goto err_irqs_disable;
 
@@ -327,7 +328,7 @@ static void nfp_netvf_pci_remove(struct pci_dev *pdev)
        nfp_net_debugfs_dir_clean(&nn->debugfs_dir);
        nfp_net_debugfs_dir_clean(&vf->ddir);
 
-       nfp_net_netdev_clean(nn->netdev);
+       nfp_net_netdev_clean(nn->dp.netdev);
 
        nfp_net_irqs_disable(pdev);
 
@@ -337,7 +338,7 @@ static void nfp_netvf_pci_remove(struct pci_dev *pdev)
        } else {
                iounmap(vf->q_bar);
        }
-       iounmap(nn->ctrl_bar);
+       iounmap(nn->dp.ctrl_bar);
 
        nfp_net_netdev_free(nn);
 
index 42cb720b696d17b6dfb580a145237245a8217412..f7ca8e374923c36580ab046e2d9b933e844fd9e6 100644 (file)
@@ -66,14 +66,7 @@ int nfp_nsp_write_eth_table(struct nfp_nsp *state,
 
 /* Implemented in nfp_resource.c */
 
-#define NFP_RESOURCE_TBL_TARGET                NFP_CPP_TARGET_MU
-#define NFP_RESOURCE_TBL_BASE          0x8100000000ULL
-
-/* NFP Resource Table self-identifier */
-#define NFP_RESOURCE_TBL_NAME          "nfp.res"
-#define NFP_RESOURCE_TBL_KEY           0x00000000 /* Special key for entry 0 */
-
-/* All other keys are CRC32-POSIX of the 8-byte identification string */
+/* All keys are CRC32-POSIX of the 8-byte identification string */
 
 /* ARM/PCI vNIC Interfaces 0..3 */
 #define NFP_RESOURCE_VNIC_PCI_0                "vnic.p0"
index 15cc3e77cf6acddfec0afe8906e2a61f3f958aa5..43dc68e01274225c79fea7760ac303a8a2409a85 100644 (file)
@@ -217,7 +217,7 @@ static resource_size_t nfp_bar_resource_start(struct nfp_bar *bar)
 #define TARGET_WIDTH_64    8
 
 static int
-compute_bar(struct nfp6000_pcie *nfp, struct nfp_bar *bar,
+compute_bar(const struct nfp6000_pcie *nfp, const struct nfp_bar *bar,
            u32 *bar_config, u64 *bar_base,
            int tgt, int act, int tok, u64 offset, size_t size, int width)
 {
@@ -410,35 +410,36 @@ find_matching_bar(struct nfp6000_pcie *nfp,
 
 /* Return EAGAIN if no resource is available */
 static int
-find_unused_bar_noblock(struct nfp6000_pcie *nfp,
+find_unused_bar_noblock(const struct nfp6000_pcie *nfp,
                        int tgt, int act, int tok,
                        u64 offset, size_t size, int width)
 {
-       int n, invalid = 0;
+       int n, busy = 0;
 
        for (n = 0; n < nfp->bars; n++) {
-               struct nfp_bar *bar = &nfp->bar[n];
+               const struct nfp_bar *bar = &nfp->bar[n];
                int err;
 
-               if (bar->bitsize == 0) {
-                       invalid++;
-                       continue;
-               }
-
-               if (atomic_read(&bar->refcnt) != 0)
+               if (!bar->bitsize)
                        continue;
 
                /* Just check to see if we can make it fit... */
                err = compute_bar(nfp, bar, NULL, NULL,
                                  tgt, act, tok, offset, size, width);
+               if (err)
+                       continue;
 
-               if (err < 0)
-                       invalid++;
-               else
+               if (!atomic_read(&bar->refcnt))
                        return n;
+
+               busy++;
        }
 
-       return (n == invalid) ? -EINVAL : -EAGAIN;
+       if (WARN(!busy, "No suitable BAR found for request tgt:0x%x act:0x%x tok:0x%x off:0x%llx size:%zd width:%d\n",
+                tgt, act, tok, offset, size, width))
+               return -EINVAL;
+
+       return -EAGAIN;
 }
 
 static int
index 40108e66c65480fcf8e002379bf68ec948cd22ee..e2abba4c3a3fd84225f443d82e61454d41093819 100644 (file)
@@ -65,39 +65,49 @@ struct nfp_cpp_resource {
        u64 end;
 };
 
-struct nfp_cpp_mutex {
-       struct list_head list;
-       struct nfp_cpp *cpp;
-       int target;
-       u16 usage;
-       u16 depth;
-       unsigned long long address;
-       u32 key;
-};
-
+/**
+ * struct nfp_cpp - main nfpcore device structure
+ * Following fields are read-only after probe() exits or netdevs are spawned.
+ * @dev:               embedded device structure
+ * @op:                        low-level implementation ops
+ * @priv:              private data of the low-level implementation
+ * @model:             chip model
+ * @interface:         chip interface id we are using to reach it
+ * @serial:            chip serial number
+ * @imb_cat_table:     CPP Mapping Table
+ *
+ * Following fields can be used only in probe() or with rtnl held:
+ * @hwinfo:            HWInfo database fetched from the device
+ * @rtsym:             firmware run time symbols
+ *
+ * Following fields use explicit locking:
+ * @resource_list:     NFP CPP resource list
+ * @resource_lock:     protects @resource_list
+ *
+ * @area_cache_list:   cached areas for cpp/xpb read/write speed up
+ * @area_cache_mutex:  protects @area_cache_list
+ *
+ * @waitq:             area wait queue
+ */
 struct nfp_cpp {
        struct device dev;
 
-       void *priv; /* Private data of the low-level implementation */
+       void *priv;
 
        u32 model;
        u16 interface;
        u8 serial[NFP_SERIAL_LEN];
 
        const struct nfp_cpp_operations *op;
-       struct list_head resource_list; /* NFP CPP resource list */
-       struct list_head mutex_cache;   /* Mutex cache */
+       struct list_head resource_list;
        rwlock_t resource_lock;
        wait_queue_head_t waitq;
 
-       /* NFP6000 CPP Mapping Table */
        u32 imb_cat_table[16];
 
-       /* Cached areas for cpp/xpb readl/writel speedups */
-       struct mutex area_cache_mutex;  /* Lock for the area cache */
+       struct mutex area_cache_mutex;
        struct list_head area_cache_list;
 
-       /* Cached information */
        void *hwinfo;
        void *rtsym;
 };
@@ -187,24 +197,6 @@ void nfp_cpp_free(struct nfp_cpp *cpp)
 {
        struct nfp_cpp_area_cache *cache, *ctmp;
        struct nfp_cpp_resource *res, *rtmp;
-       struct nfp_cpp_mutex *mutex, *mtmp;
-
-       /* There should be no mutexes in the cache at this point. */
-       WARN_ON(!list_empty(&cpp->mutex_cache));
-       /* .. but if there are, unlock them and complain. */
-       list_for_each_entry_safe(mutex, mtmp, &cpp->mutex_cache, list) {
-               dev_err(cpp->dev.parent, "Dangling mutex: @%d::0x%llx, %d locks held by %d owners\n",
-                       mutex->target, (unsigned long long)mutex->address,
-                       mutex->depth, mutex->usage);
-
-               /* Forcing an unlock */
-               mutex->depth = 1;
-               nfp_cpp_mutex_unlock(mutex);
-
-               /* Forcing a free */
-               mutex->usage = 1;
-               nfp_cpp_mutex_free(mutex);
-       }
 
        /* Remove all caches */
        list_for_each_entry_safe(cache, ctmp, &cpp->area_cache_list, entry) {
@@ -419,9 +411,43 @@ nfp_cpp_area_alloc(struct nfp_cpp *cpp, u32 dest,
  */
 void nfp_cpp_area_free(struct nfp_cpp_area *area)
 {
+       if (atomic_read(&area->refcount))
+               nfp_warn(area->cpp, "Warning: freeing busy area\n");
        nfp_cpp_area_put(area);
 }
 
+static bool nfp_cpp_area_acquire_try(struct nfp_cpp_area *area, int *status)
+{
+       *status = area->cpp->op->area_acquire(area);
+
+       return *status != -EAGAIN;
+}
+
+static int __nfp_cpp_area_acquire(struct nfp_cpp_area *area)
+{
+       int err, status;
+
+       if (atomic_inc_return(&area->refcount) > 1)
+               return 0;
+
+       if (!area->cpp->op->area_acquire)
+               return 0;
+
+       err = wait_event_interruptible(area->cpp->waitq,
+                                      nfp_cpp_area_acquire_try(area, &status));
+       if (!err)
+               err = status;
+       if (err) {
+               nfp_warn(area->cpp, "Warning: area wait failed: %d\n", err);
+               atomic_dec(&area->refcount);
+               return err;
+       }
+
+       nfp_cpp_area_get(area);
+
+       return 0;
+}
+
 /**
  * nfp_cpp_area_acquire() - lock down a CPP area for access
  * @area:      CPP area handle
@@ -433,27 +459,13 @@ void nfp_cpp_area_free(struct nfp_cpp_area *area)
  */
 int nfp_cpp_area_acquire(struct nfp_cpp_area *area)
 {
-       mutex_lock(&area->mutex);
-       if (atomic_inc_return(&area->refcount) == 1) {
-               int (*a_a)(struct nfp_cpp_area *);
-
-               a_a = area->cpp->op->area_acquire;
-               if (a_a) {
-                       int err;
+       int ret;
 
-                       wait_event_interruptible(area->cpp->waitq,
-                                                (err = a_a(area)) != -EAGAIN);
-                       if (err < 0) {
-                               atomic_dec(&area->refcount);
-                               mutex_unlock(&area->mutex);
-                               return err;
-                       }
-               }
-       }
+       mutex_lock(&area->mutex);
+       ret = __nfp_cpp_area_acquire(area);
        mutex_unlock(&area->mutex);
 
-       nfp_cpp_area_get(area);
-       return 0;
+       return ret;
 }
 
 /**
@@ -829,10 +841,7 @@ area_cache_get(struct nfp_cpp *cpp, u32 id,
         * the need for special case code below when
         * checking against available cache size.
         */
-       if (length == 0)
-               return NULL;
-
-       if (list_empty(&cpp->area_cache_list) || id == 0)
+       if (length == 0 || id == 0)
                return NULL;
 
        /* Remap from cpp_island to cpp_target */
@@ -840,10 +849,15 @@ area_cache_get(struct nfp_cpp *cpp, u32 id,
        if (err < 0)
                return NULL;
 
-       addr += *offset;
-
        mutex_lock(&cpp->area_cache_mutex);
 
+       if (list_empty(&cpp->area_cache_list)) {
+               mutex_unlock(&cpp->area_cache_mutex);
+               return NULL;
+       }
+
+       addr += *offset;
+
        /* See if we have a match */
        list_for_each_entry(cache, &cpp->area_cache_list, entry) {
                if (id == cache->id &&
@@ -937,12 +951,14 @@ int nfp_cpp_read(struct nfp_cpp *cpp, u32 destination,
                        return -ENOMEM;
 
                err = nfp_cpp_area_acquire(area);
-               if (err)
-                       goto out;
+               if (err) {
+                       nfp_cpp_area_free(area);
+                       return err;
+               }
        }
 
        err = nfp_cpp_area_read(area, offset, kernel_vaddr, length);
-out:
+
        if (cache)
                area_cache_put(cpp, cache);
        else
@@ -979,13 +995,14 @@ int nfp_cpp_write(struct nfp_cpp *cpp, u32 destination,
                        return -ENOMEM;
 
                err = nfp_cpp_area_acquire(area);
-               if (err)
-                       goto out;
+               if (err) {
+                       nfp_cpp_area_free(area);
+                       return err;
+               }
        }
 
        err = nfp_cpp_area_write(area, offset, kernel_vaddr, length);
 
-out:
        if (cache)
                area_cache_put(cpp, cache);
        else
@@ -1127,7 +1144,6 @@ nfp_cpp_from_operations(const struct nfp_cpp_operations *ops,
        rwlock_init(&cpp->resource_lock);
        init_waitqueue_head(&cpp->waitq);
        lockdep_set_class(&cpp->resource_lock, &nfp_cpp_resource_lock_key);
-       INIT_LIST_HEAD(&cpp->mutex_cache);
        INIT_LIST_HEAD(&cpp->resource_list);
        INIT_LIST_HEAD(&cpp->area_cache_list);
        mutex_init(&cpp->area_cache_mutex);
@@ -1425,322 +1441,3 @@ void *nfp_cpp_explicit_priv(struct nfp_cpp_explicit *cpp_explicit)
 {
        return &cpp_explicit[1];
 }
-
-/* THIS FUNCTION IS NOT EXPORTED */
-static u32 nfp_mutex_locked(u16 interface)
-{
-       return (u32)interface << 16 | 0x000f;
-}
-
-static u32 nfp_mutex_unlocked(u16 interface)
-{
-       return (u32)interface << 16 | 0x0000;
-}
-
-static bool nfp_mutex_is_locked(u32 val)
-{
-       return (val & 0xffff) == 0x000f;
-}
-
-static bool nfp_mutex_is_unlocked(u32 val)
-{
-       return (val & 0xffff) == 0000;
-}
-
-/* If you need more than 65536 recursive locks, please rethink your code. */
-#define MUTEX_DEPTH_MAX         0xffff
-
-static int
-nfp_cpp_mutex_validate(u16 interface, int *target, unsigned long long address)
-{
-       /* Not permitted on invalid interfaces */
-       if (NFP_CPP_INTERFACE_TYPE_of(interface) ==
-           NFP_CPP_INTERFACE_TYPE_INVALID)
-               return -EINVAL;
-
-       /* Address must be 64-bit aligned */
-       if (address & 7)
-               return -EINVAL;
-
-       if (*target != NFP_CPP_TARGET_MU)
-               return -EINVAL;
-
-       return 0;
-}
-
-/**
- * nfp_cpp_mutex_init() - Initialize a mutex location
- * @cpp:       NFP CPP handle
- * @target:    NFP CPP target ID (ie NFP_CPP_TARGET_CLS or NFP_CPP_TARGET_MU)
- * @address:   Offset into the address space of the NFP CPP target ID
- * @key:       Unique 32-bit value for this mutex
- *
- * The CPP target:address must point to a 64-bit aligned location, and
- * will initialize 64 bits of data at the location.
- *
- * This creates the initial mutex state, as locked by this
- * nfp_cpp_interface().
- *
- * This function should only be called when setting up
- * the initial lock state upon boot-up of the system.
- *
- * Return: 0 on success, or -errno on failure
- */
-int nfp_cpp_mutex_init(struct nfp_cpp *cpp,
-                      int target, unsigned long long address, u32 key)
-{
-       const u32 muw = NFP_CPP_ID(target, 4, 0);    /* atomic_write */
-       u16 interface = nfp_cpp_interface(cpp);
-       int err;
-
-       err = nfp_cpp_mutex_validate(interface, &target, address);
-       if (err)
-               return err;
-
-       err = nfp_cpp_writel(cpp, muw, address + 4, key);
-       if (err)
-               return err;
-
-       err = nfp_cpp_writel(cpp, muw, address, nfp_mutex_locked(interface));
-       if (err)
-               return err;
-
-       return 0;
-}
-
-/**
- * nfp_cpp_mutex_alloc() - Create a mutex handle
- * @cpp:       NFP CPP handle
- * @target:    NFP CPP target ID (ie NFP_CPP_TARGET_CLS or NFP_CPP_TARGET_MU)
- * @address:   Offset into the address space of the NFP CPP target ID
- * @key:       32-bit unique key (must match the key at this location)
- *
- * The CPP target:address must point to a 64-bit aligned location, and
- * reserve 64 bits of data at the location for use by the handle.
- *
- * Only target/address pairs that point to entities that support the
- * MU Atomic Engine's CmpAndSwap32 command are supported.
- *
- * Return:     A non-NULL struct nfp_cpp_mutex * on success, NULL on failure.
- */
-struct nfp_cpp_mutex *nfp_cpp_mutex_alloc(struct nfp_cpp *cpp, int target,
-                                         unsigned long long address, u32 key)
-{
-       const u32 mur = NFP_CPP_ID(target, 3, 0);    /* atomic_read */
-       u16 interface = nfp_cpp_interface(cpp);
-       struct nfp_cpp_mutex *mutex;
-       int err;
-       u32 tmp;
-
-       err = nfp_cpp_mutex_validate(interface, &target, address);
-       if (err)
-               return NULL;
-
-       /* Look for mutex on cache list */
-       list_for_each_entry(mutex, &cpp->mutex_cache, list) {
-               if (mutex->target == target && mutex->address == address) {
-                       mutex->usage++;
-                       return mutex;
-               }
-       }
-
-       err = nfp_cpp_readl(cpp, mur, address + 4, &tmp);
-       if (err < 0)
-               return NULL;
-
-       if (tmp != key)
-               return NULL;
-
-       mutex = kzalloc(sizeof(*mutex), GFP_KERNEL);
-       if (!mutex)
-               return NULL;
-
-       mutex->cpp = cpp;
-       mutex->target = target;
-       mutex->address = address;
-       mutex->key = key;
-       mutex->depth = 0;
-       mutex->usage = 1;
-
-       /* Add mutex to cache list */
-       list_add(&mutex->list, &cpp->mutex_cache);
-
-       return mutex;
-}
-
-/**
- * nfp_cpp_mutex_free() - Free a mutex handle - does not alter the lock state
- * @mutex:     NFP CPP Mutex handle
- */
-void nfp_cpp_mutex_free(struct nfp_cpp_mutex *mutex)
-{
-       if (--mutex->usage)
-               return;
-
-       /* Remove mutex from cache */
-       list_del(&mutex->list);
-       kfree(mutex);
-}
-
-/**
- * nfp_cpp_mutex_lock() - Lock a mutex handle, using the NFP MU Atomic Engine
- * @mutex:     NFP CPP Mutex handle
- *
- * Return: 0 on success, or -errno on failure
- */
-int nfp_cpp_mutex_lock(struct nfp_cpp_mutex *mutex)
-{
-       unsigned long warn_at = jiffies + 15 * HZ;
-       unsigned int timeout_ms = 1;
-       int err;
-
-       /* We can't use a waitqueue here, because the unlocker
-        * might be on a separate CPU.
-        *
-        * So just wait for now.
-        */
-       for (;;) {
-               err = nfp_cpp_mutex_trylock(mutex);
-               if (err != -EBUSY)
-                       break;
-
-               err = msleep_interruptible(timeout_ms);
-               if (err != 0)
-                       return -ERESTARTSYS;
-
-               if (time_is_before_eq_jiffies(warn_at)) {
-                       warn_at = jiffies + 60 * HZ;
-                       dev_warn(mutex->cpp->dev.parent,
-                                "Warning: waiting for NFP mutex [usage:%hd depth:%hd target:%d addr:%llx key:%08x]\n",
-                                mutex->usage, mutex->depth,
-                                mutex->target, mutex->address, mutex->key);
-               }
-       }
-
-       return err;
-}
-
-/**
- * nfp_cpp_mutex_unlock() - Unlock a mutex handle, using the MU Atomic Engine
- * @mutex:     NFP CPP Mutex handle
- *
- * Return: 0 on success, or -errno on failure
- */
-int nfp_cpp_mutex_unlock(struct nfp_cpp_mutex *mutex)
-{
-       const u32 muw = NFP_CPP_ID(mutex->target, 4, 0);    /* atomic_write */
-       const u32 mur = NFP_CPP_ID(mutex->target, 3, 0);    /* atomic_read */
-       struct nfp_cpp *cpp = mutex->cpp;
-       u32 key, value;
-       u16 interface;
-       int err;
-
-       interface = nfp_cpp_interface(cpp);
-
-       if (mutex->depth > 1) {
-               mutex->depth--;
-               return 0;
-       }
-
-       err = nfp_cpp_readl(mutex->cpp, mur, mutex->address + 4, &key);
-       if (err < 0)
-               return err;
-
-       if (key != mutex->key)
-               return -EPERM;
-
-       err = nfp_cpp_readl(mutex->cpp, mur, mutex->address, &value);
-       if (err < 0)
-               return err;
-
-       if (value != nfp_mutex_locked(interface))
-               return -EACCES;
-
-       err = nfp_cpp_writel(cpp, muw, mutex->address,
-                            nfp_mutex_unlocked(interface));
-       if (err < 0)
-               return err;
-
-       mutex->depth = 0;
-       return 0;
-}
-
-/**
- * nfp_cpp_mutex_trylock() - Attempt to lock a mutex handle
- * @mutex:     NFP CPP Mutex handle
- *
- * Return:      0 if the lock succeeded, -errno on failure
- */
-int nfp_cpp_mutex_trylock(struct nfp_cpp_mutex *mutex)
-{
-       const u32 muw = NFP_CPP_ID(mutex->target, 4, 0);    /* atomic_write */
-       const u32 mus = NFP_CPP_ID(mutex->target, 5, 3);    /* test_set_imm */
-       const u32 mur = NFP_CPP_ID(mutex->target, 3, 0);    /* atomic_read */
-       struct nfp_cpp *cpp = mutex->cpp;
-       u32 key, value, tmp;
-       int err;
-
-       if (mutex->depth > 0) {
-               if (mutex->depth == MUTEX_DEPTH_MAX)
-                       return -E2BIG;
-               mutex->depth++;
-               return 0;
-       }
-
-       /* Verify that the lock marker is not damaged */
-       err = nfp_cpp_readl(cpp, mur, mutex->address + 4, &key);
-       if (err < 0)
-               return err;
-
-       if (key != mutex->key)
-               return -EPERM;
-
-       /* Compare against the unlocked state, and if true,
-        * write the interface id into the top 16 bits, and
-        * mark as locked.
-        */
-       value = nfp_mutex_locked(nfp_cpp_interface(cpp));
-
-       /* We use test_set_imm here, as it implies a read
-        * of the current state, and sets the bits in the
-        * bytemask of the command to 1s. Since the mutex
-        * is guaranteed to be 64-bit aligned, the bytemask
-        * of this 32-bit command is ensured to be 8'b00001111,
-        * which implies that the lower 4 bits will be set to
-        * ones regardless of the initial state.
-        *
-        * Since this is a 'Readback' operation, with no Pull
-        * data, we can treat this as a normal Push (read)
-        * atomic, which returns the original value.
-        */
-       err = nfp_cpp_readl(cpp, mus, mutex->address, &tmp);
-       if (err < 0)
-               return err;
-
-       /* Was it unlocked? */
-       if (nfp_mutex_is_unlocked(tmp)) {
-               /* The read value can only be 0x....0000 in the unlocked state.
-                * If there was another contending for this lock, then
-                * the lock state would be 0x....000f
-                */
-
-               /* Write our owner ID into the lock
-                * While not strictly necessary, this helps with
-                * debug and bookkeeping.
-                */
-               err = nfp_cpp_writel(cpp, muw, mutex->address, value);
-               if (err < 0)
-                       return err;
-
-               mutex->depth = 1;
-               return 0;
-       }
-
-       /* Already locked by us? Success! */
-       if (tmp == value) {
-               mutex->depth = 1;
-               return 0;
-       }
-
-       return nfp_mutex_is_locked(tmp) ? -EBUSY : -EINVAL;
-}
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_mutex.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_mutex.c
new file mode 100644 (file)
index 0000000..8a99c18
--- /dev/null
@@ -0,0 +1,345 @@
+/*
+ * Copyright (C) 2015-2017 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/jiffies.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/wait.h>
+
+#include "nfp_cpp.h"
+#include "nfp6000/nfp6000.h"
+
+struct nfp_cpp_mutex {
+       struct nfp_cpp *cpp;
+       int target;
+       u16 depth;
+       unsigned long long address;
+       u32 key;
+};
+
+static u32 nfp_mutex_locked(u16 interface)
+{
+       return (u32)interface << 16 | 0x000f;
+}
+
+static u32 nfp_mutex_unlocked(u16 interface)
+{
+       return (u32)interface << 16 | 0x0000;
+}
+
+static bool nfp_mutex_is_locked(u32 val)
+{
+       return (val & 0xffff) == 0x000f;
+}
+
+static bool nfp_mutex_is_unlocked(u32 val)
+{
+       return (val & 0xffff) == 0000;
+}
+
+/* If you need more than 65536 recursive locks, please rethink your code. */
+#define NFP_MUTEX_DEPTH_MAX         0xffff
+
+static int
+nfp_cpp_mutex_validate(u16 interface, int *target, unsigned long long address)
+{
+       /* Not permitted on invalid interfaces */
+       if (NFP_CPP_INTERFACE_TYPE_of(interface) ==
+           NFP_CPP_INTERFACE_TYPE_INVALID)
+               return -EINVAL;
+
+       /* Address must be 64-bit aligned */
+       if (address & 7)
+               return -EINVAL;
+
+       if (*target != NFP_CPP_TARGET_MU)
+               return -EINVAL;
+
+       return 0;
+}
+
+/**
+ * nfp_cpp_mutex_init() - Initialize a mutex location
+ * @cpp:       NFP CPP handle
+ * @target:    NFP CPP target ID (ie NFP_CPP_TARGET_CLS or NFP_CPP_TARGET_MU)
+ * @address:   Offset into the address space of the NFP CPP target ID
+ * @key:       Unique 32-bit value for this mutex
+ *
+ * The CPP target:address must point to a 64-bit aligned location, and
+ * will initialize 64 bits of data at the location.
+ *
+ * This creates the initial mutex state, as locked by this
+ * nfp_cpp_interface().
+ *
+ * This function should only be called when setting up
+ * the initial lock state upon boot-up of the system.
+ *
+ * Return: 0 on success, or -errno on failure
+ */
+int nfp_cpp_mutex_init(struct nfp_cpp *cpp,
+                      int target, unsigned long long address, u32 key)
+{
+       const u32 muw = NFP_CPP_ID(target, 4, 0);    /* atomic_write */
+       u16 interface = nfp_cpp_interface(cpp);
+       int err;
+
+       err = nfp_cpp_mutex_validate(interface, &target, address);
+       if (err)
+               return err;
+
+       err = nfp_cpp_writel(cpp, muw, address + 4, key);
+       if (err)
+               return err;
+
+       err = nfp_cpp_writel(cpp, muw, address, nfp_mutex_locked(interface));
+       if (err)
+               return err;
+
+       return 0;
+}
+
+/**
+ * nfp_cpp_mutex_alloc() - Create a mutex handle
+ * @cpp:       NFP CPP handle
+ * @target:    NFP CPP target ID (ie NFP_CPP_TARGET_CLS or NFP_CPP_TARGET_MU)
+ * @address:   Offset into the address space of the NFP CPP target ID
+ * @key:       32-bit unique key (must match the key at this location)
+ *
+ * The CPP target:address must point to a 64-bit aligned location, and
+ * reserve 64 bits of data at the location for use by the handle.
+ *
+ * Only target/address pairs that point to entities that support the
+ * MU Atomic Engine's CmpAndSwap32 command are supported.
+ *
+ * Return:     A non-NULL struct nfp_cpp_mutex * on success, NULL on failure.
+ */
+struct nfp_cpp_mutex *nfp_cpp_mutex_alloc(struct nfp_cpp *cpp, int target,
+                                         unsigned long long address, u32 key)
+{
+       const u32 mur = NFP_CPP_ID(target, 3, 0);    /* atomic_read */
+       u16 interface = nfp_cpp_interface(cpp);
+       struct nfp_cpp_mutex *mutex;
+       int err;
+       u32 tmp;
+
+       err = nfp_cpp_mutex_validate(interface, &target, address);
+       if (err)
+               return NULL;
+
+       err = nfp_cpp_readl(cpp, mur, address + 4, &tmp);
+       if (err < 0)
+               return NULL;
+
+       if (tmp != key)
+               return NULL;
+
+       mutex = kzalloc(sizeof(*mutex), GFP_KERNEL);
+       if (!mutex)
+               return NULL;
+
+       mutex->cpp = cpp;
+       mutex->target = target;
+       mutex->address = address;
+       mutex->key = key;
+       mutex->depth = 0;
+
+       return mutex;
+}
+
+/**
+ * nfp_cpp_mutex_free() - Free a mutex handle - does not alter the lock state
+ * @mutex:     NFP CPP Mutex handle
+ */
+void nfp_cpp_mutex_free(struct nfp_cpp_mutex *mutex)
+{
+       kfree(mutex);
+}
+
+/**
+ * nfp_cpp_mutex_lock() - Lock a mutex handle, using the NFP MU Atomic Engine
+ * @mutex:     NFP CPP Mutex handle
+ *
+ * Return: 0 on success, or -errno on failure
+ */
+int nfp_cpp_mutex_lock(struct nfp_cpp_mutex *mutex)
+{
+       unsigned long warn_at = jiffies + 15 * HZ;
+       unsigned int timeout_ms = 1;
+       int err;
+
+       /* We can't use a waitqueue here, because the unlocker
+        * might be on a separate CPU.
+        *
+        * So just wait for now.
+        */
+       for (;;) {
+               err = nfp_cpp_mutex_trylock(mutex);
+               if (err != -EBUSY)
+                       break;
+
+               err = msleep_interruptible(timeout_ms);
+               if (err != 0)
+                       return -ERESTARTSYS;
+
+               if (time_is_before_eq_jiffies(warn_at)) {
+                       warn_at = jiffies + 60 * HZ;
+                       nfp_warn(mutex->cpp,
+                                "Warning: waiting for NFP mutex [depth:%hd target:%d addr:%llx key:%08x]\n",
+                                mutex->depth,
+                                mutex->target, mutex->address, mutex->key);
+               }
+       }
+
+       return err;
+}
+
+/**
+ * nfp_cpp_mutex_unlock() - Unlock a mutex handle, using the MU Atomic Engine
+ * @mutex:     NFP CPP Mutex handle
+ *
+ * Return: 0 on success, or -errno on failure
+ */
+int nfp_cpp_mutex_unlock(struct nfp_cpp_mutex *mutex)
+{
+       const u32 muw = NFP_CPP_ID(mutex->target, 4, 0);    /* atomic_write */
+       const u32 mur = NFP_CPP_ID(mutex->target, 3, 0);    /* atomic_read */
+       struct nfp_cpp *cpp = mutex->cpp;
+       u32 key, value;
+       u16 interface;
+       int err;
+
+       interface = nfp_cpp_interface(cpp);
+
+       if (mutex->depth > 1) {
+               mutex->depth--;
+               return 0;
+       }
+
+       err = nfp_cpp_readl(mutex->cpp, mur, mutex->address + 4, &key);
+       if (err < 0)
+               return err;
+
+       if (key != mutex->key)
+               return -EPERM;
+
+       err = nfp_cpp_readl(mutex->cpp, mur, mutex->address, &value);
+       if (err < 0)
+               return err;
+
+       if (value != nfp_mutex_locked(interface))
+               return -EACCES;
+
+       err = nfp_cpp_writel(cpp, muw, mutex->address,
+                            nfp_mutex_unlocked(interface));
+       if (err < 0)
+               return err;
+
+       mutex->depth = 0;
+       return 0;
+}
+
+/**
+ * nfp_cpp_mutex_trylock() - Attempt to lock a mutex handle
+ * @mutex:     NFP CPP Mutex handle
+ *
+ * Return:      0 if the lock succeeded, -errno on failure
+ */
+int nfp_cpp_mutex_trylock(struct nfp_cpp_mutex *mutex)
+{
+       const u32 muw = NFP_CPP_ID(mutex->target, 4, 0);    /* atomic_write */
+       const u32 mus = NFP_CPP_ID(mutex->target, 5, 3);    /* test_set_imm */
+       const u32 mur = NFP_CPP_ID(mutex->target, 3, 0);    /* atomic_read */
+       struct nfp_cpp *cpp = mutex->cpp;
+       u32 key, value, tmp;
+       int err;
+
+       if (mutex->depth > 0) {
+               if (mutex->depth == NFP_MUTEX_DEPTH_MAX)
+                       return -E2BIG;
+               mutex->depth++;
+               return 0;
+       }
+
+       /* Verify that the lock marker is not damaged */
+       err = nfp_cpp_readl(cpp, mur, mutex->address + 4, &key);
+       if (err < 0)
+               return err;
+
+       if (key != mutex->key)
+               return -EPERM;
+
+       /* Compare against the unlocked state, and if true,
+        * write the interface id into the top 16 bits, and
+        * mark as locked.
+        */
+       value = nfp_mutex_locked(nfp_cpp_interface(cpp));
+
+       /* We use test_set_imm here, as it implies a read
+        * of the current state, and sets the bits in the
+        * bytemask of the command to 1s. Since the mutex
+        * is guaranteed to be 64-bit aligned, the bytemask
+        * of this 32-bit command is ensured to be 8'b00001111,
+        * which implies that the lower 4 bits will be set to
+        * ones regardless of the initial state.
+        *
+        * Since this is a 'Readback' operation, with no Pull
+        * data, we can treat this as a normal Push (read)
+        * atomic, which returns the original value.
+        */
+       err = nfp_cpp_readl(cpp, mus, mutex->address, &tmp);
+       if (err < 0)
+               return err;
+
+       /* Was it unlocked? */
+       if (nfp_mutex_is_unlocked(tmp)) {
+               /* The read value can only be 0x....0000 in the unlocked state.
+                * If there was another contending for this lock, then
+                * the lock state would be 0x....000f
+                */
+
+               /* Write our owner ID into the lock
+                * While not strictly necessary, this helps with
+                * debug and bookkeeping.
+                */
+               err = nfp_cpp_writel(cpp, muw, mutex->address, value);
+               if (err < 0)
+                       return err;
+
+               mutex->depth = 1;
+               return 0;
+       }
+
+       return nfp_mutex_is_locked(tmp) ? -EBUSY : -EINVAL;
+}
index 34c50987c377c6e721d82621b51702457a629781..17822ae4a17f35c5da2f1127785072e154dd0b58 100644 (file)
@@ -209,9 +209,8 @@ nfp_nsp_wait_reg(struct nfp_cpp *cpp, u64 *reg,
                if ((*reg & mask) == val)
                        return 0;
 
-               err = msleep_interruptible(100);
-               if (err)
-                       return err;
+               if (msleep_interruptible(25))
+                       return -ERESTARTSYS;
 
                if (time_after(start_time, wait_until))
                        return -ETIMEDOUT;
@@ -228,7 +227,7 @@ nfp_nsp_wait_reg(struct nfp_cpp *cpp, u64 *reg,
  *
  * Return: 0 for success with no result
  *
- *      1..255 for NSP completion with a result code
+ *      positive value for NSP completion with a result code
  *
  *     -EAGAIN if the NSP is not yet present
  *     -ENODEV if the NSP is not a supported model
@@ -380,9 +379,10 @@ int nfp_nsp_wait(struct nfp_nsp *state)
                if (err != -EAGAIN)
                        break;
 
-               err = msleep_interruptible(100);
-               if (err)
+               if (msleep_interruptible(25)) {
+                       err = -ERESTARTSYS;
                        break;
+               }
 
                if (time_after(start_time, wait_until)) {
                        err = -ETIMEDOUT;
index 1ece1f8ae4b30c0c74a7f630487749d91d5b5620..38bd80077e33fe82438a993e333af3162b78acce 100644 (file)
@@ -134,9 +134,32 @@ nfp_eth_port_translate(const struct eth_table_entry *src, unsigned int index,
 
        nfp_eth_copy_mac_reverse(dst->mac_addr, src->mac_addr);
 
-       snprintf(dst->label, sizeof(dst->label) - 1, "%llu.%llu",
-                FIELD_GET(NSP_ETH_PORT_PHYLABEL, port),
-                FIELD_GET(NSP_ETH_PORT_LABEL, port));
+       dst->label_port = FIELD_GET(NSP_ETH_PORT_PHYLABEL, port);
+       dst->label_subport = FIELD_GET(NSP_ETH_PORT_LABEL, port);
+}
+
+static void
+nfp_eth_mark_split_ports(struct nfp_cpp *cpp, struct nfp_eth_table *table)
+{
+       unsigned int i, j;
+
+       for (i = 0; i < table->count; i++)
+               for (j = 0; j < table->count; j++) {
+                       if (i == j)
+                               continue;
+                       if (table->ports[i].label_port !=
+                           table->ports[j].label_port)
+                               continue;
+                       if (table->ports[i].label_subport ==
+                           table->ports[j].label_subport)
+                               nfp_warn(cpp,
+                                        "Port %d subport %d is a duplicate\n",
+                                        table->ports[i].label_port,
+                                        table->ports[i].label_subport);
+
+                       table->ports[i].is_split = true;
+                       break;
+               }
 }
 
 /**
@@ -168,8 +191,7 @@ __nfp_eth_read_ports(struct nfp_cpp *cpp, struct nfp_nsp *nsp)
 {
        struct eth_table_entry *entries;
        struct nfp_eth_table *table;
-       unsigned int cnt;
-       int i, j, ret;
+       int i, j, ret, cnt = 0;
 
        entries = kzalloc(NSP_ETH_TABLE_SIZE, GFP_KERNEL);
        if (!entries)
@@ -178,24 +200,27 @@ __nfp_eth_read_ports(struct nfp_cpp *cpp, struct nfp_nsp *nsp)
        ret = nfp_nsp_read_eth_table(nsp, entries, NSP_ETH_TABLE_SIZE);
        if (ret < 0) {
                nfp_err(cpp, "reading port table failed %d\n", ret);
-               kfree(entries);
-               return NULL;
+               goto err;
        }
 
-       /* Some versions of flash will give us 0 instead of port count */
-       cnt = ret;
-       if (!cnt) {
-               for (i = 0; i < NSP_ETH_MAX_COUNT; i++)
-                       if (entries[i].port & NSP_ETH_PORT_LANES_MASK)
-                               cnt++;
+       for (i = 0; i < NSP_ETH_MAX_COUNT; i++)
+               if (entries[i].port & NSP_ETH_PORT_LANES_MASK)
+                       cnt++;
+
+       /* Some versions of flash will give us 0 instead of port count.
+        * For those that give a port count, verify it against the value
+        * calculated above.
+        */
+       if (ret && ret != cnt) {
+               nfp_err(cpp, "table entry count reported (%d) does not match entries present (%d)\n",
+                       ret, cnt);
+               goto err;
        }
 
        table = kzalloc(sizeof(*table) +
                        sizeof(struct nfp_eth_table_port) * cnt, GFP_KERNEL);
-       if (!table) {
-               kfree(entries);
-               return NULL;
-       }
+       if (!table)
+               goto err;
 
        table->count = cnt;
        for (i = 0, j = 0; i < NSP_ETH_MAX_COUNT; i++)
@@ -203,9 +228,15 @@ __nfp_eth_read_ports(struct nfp_cpp *cpp, struct nfp_nsp *nsp)
                        nfp_eth_port_translate(&entries[i], i,
                                               &table->ports[j++]);
 
+       nfp_eth_mark_split_ports(cpp, table);
+
        kfree(entries);
 
        return table;
+
+err:
+       kfree(entries);
+       return NULL;
 }
 
 /**
index edf703d319c8a9c7d98386b74237017b38d795cc..325e841ca90a9a660057bab5bdacc12ff3615dc7 100644 (file)
  * @lanes:     number of channels
  * @speed:     interface speed (in Mbps)
  * @mac_addr:  interface MAC address
- * @label:     interface id string
+ * @label_port:        port id
+ * @label_subport:  id of interface within port (for split ports)
  * @enabled:   is enabled?
  * @tx_enabled:        is TX enabled?
  * @rx_enabled:        is RX enabled?
+ *
+ * @is_split:  is interface part of a split port
  */
 struct nfp_eth_table {
        unsigned int count;
@@ -65,14 +68,22 @@ struct nfp_eth_table {
                unsigned int speed;
 
                u8 mac_addr[ETH_ALEN];
-               char label[8];
+
+               u8 label_port;
+               u8 label_subport;
 
                bool enabled;
                bool tx_enabled;
                bool rx_enabled;
+
+               /* Computed fields */
+               bool is_split;
        } ports[0];
 };
 
+struct nfp_cpp;
+struct nfp_nsp;
+
 struct nfp_eth_table *nfp_eth_read_ports(struct nfp_cpp *cpp);
 struct nfp_eth_table *
 __nfp_eth_read_ports(struct nfp_cpp *cpp, struct nfp_nsp *nsp);
index a2850344f8b44179dac1501e1f0a5369b6dddc6a..2d15a7c9d0de33d6b3773e3c7da7ed49c694095d 100644 (file)
 #include "nfp_cpp.h"
 #include "nfp6000/nfp6000.h"
 
+#define NFP_RESOURCE_TBL_TARGET                NFP_CPP_TARGET_MU
+#define NFP_RESOURCE_TBL_BASE          0x8100000000ULL
+
+/* NFP Resource Table self-identifier */
+#define NFP_RESOURCE_TBL_NAME          "nfp.res"
+#define NFP_RESOURCE_TBL_KEY           0x00000000 /* Special key for entry 0 */
+
 #define NFP_RESOURCE_ENTRY_NAME_SZ     8
 
 /**
@@ -100,9 +107,11 @@ static int nfp_cpp_resource_find(struct nfp_cpp *cpp, struct nfp_resource *res)
        strncpy(name_pad, res->name, sizeof(name_pad));
 
        /* Search for a matching entry */
-       key = NFP_RESOURCE_TBL_KEY;
-       if (memcmp(name_pad, NFP_RESOURCE_TBL_NAME "\0\0\0\0\0\0\0\0", 8))
-               key = crc32_posix(name_pad, sizeof(name_pad));
+       if (!memcmp(name_pad, NFP_RESOURCE_TBL_NAME "\0\0\0\0\0\0\0\0", 8)) {
+               nfp_err(cpp, "Grabbing device lock not supported\n");
+               return -EOPNOTSUPP;
+       }
+       key = crc32_posix(name_pad, sizeof(name_pad));
 
        for (i = 0; i < NFP_RESOURCE_TBL_ENTRIES; i++) {
                u64 addr = NFP_RESOURCE_TBL_BASE +
index 7b43a3b4abdcbc7bc1cdfd4d13c611563e2760a2..3dd973475125c0f856d4eccacba4ce4c0c34cc50 100644 (file)
@@ -1375,13 +1375,8 @@ netxen_receive_peg_ready(struct netxen_adapter *adapter)
 
        } while (--retries);
 
-       if (!retries) {
-               printk(KERN_ERR "Receive Peg initialization not "
-                             "complete, state: 0x%x.\n", val);
-               return -EIO;
-       }
-
-       return 0;
+       pr_err("Receive Peg initialization not complete, state: 0x%x.\n", val);
+       return -EIO;
 }
 
 int netxen_init_firmware(struct netxen_adapter *adapter)
index 00c17fa6545bd5752a427e3660b062dc26ba57db..d8bcc21a4f697b68c7d7086a7a9ecd53aa41c716 100644 (file)
 #include "qed_hsi.h"
 
 extern const struct qed_common_ops qed_common_ops_pass;
-#define DRV_MODULE_VERSION "8.10.10.20"
+
+#define QED_MAJOR_VERSION               8
+#define QED_MINOR_VERSION               10
+#define QED_REVISION_VERSION            10
+#define QED_ENGINEERING_VERSION 21
+
+#define QED_VERSION                                             \
+       ((QED_MAJOR_VERSION << 24) | (QED_MINOR_VERSION << 16) | \
+        (QED_REVISION_VERSION << 8) | QED_ENGINEERING_VERSION)
+
+#define STORM_FW_VERSION                                      \
+       ((FW_MAJOR_VERSION << 24) | (FW_MINOR_VERSION << 16) | \
+        (FW_REVISION_VERSION << 8) | FW_ENGINEERING_VERSION)
 
 #define MAX_HWFNS_PER_DEVICE    (4)
 #define NAME_SIZE 16
@@ -59,8 +71,6 @@ extern const struct qed_common_ops qed_common_ops_pass;
 
 #define QED_WFQ_UNIT   100
 
-#define ISCSI_BDQ_ID(_port_id) (_port_id)
-#define FCOE_BDQ_ID(_port_id) ((_port_id) + 2)
 #define QED_WID_SIZE            (1024)
 #define QED_PF_DEMS_SIZE        (4)
 
@@ -76,6 +86,15 @@ union qed_mcp_protocol_stats;
 enum qed_mcp_protocol_type;
 
 /* helpers */
+#define QED_MFW_GET_FIELD(name, field) \
+       (((name) & (field ## _MASK)) >> (field ## _SHIFT))
+
+#define QED_MFW_SET_FIELD(name, field, value)                                 \
+       do {                                                                   \
+               (name)  &= ~((field ## _MASK) << (field ## _SHIFT));           \
+               (name)  |= (((value) << (field ## _SHIFT)) & (field ## _MASK));\
+       } while (0)
+
 static inline u32 qed_db_addr(u32 cid, u32 DEMS)
 {
        u32 db_addr = FIELD_VALUE(DB_LEGACY_ADDR_DEMS, DEMS) |
@@ -198,6 +217,7 @@ enum qed_resources {
        QED_LL2_QUEUE,
        QED_CMDQS_CQS,
        QED_RDMA_STATS_QUEUE,
+       QED_BDQ,
        QED_MAX_RESC,
 };
 
@@ -219,7 +239,9 @@ enum QED_PORT_MODE {
        QED_PORT_MODE_DE_4X20G,
        QED_PORT_MODE_DE_1X40G,
        QED_PORT_MODE_DE_2X25G,
-       QED_PORT_MODE_DE_1X25G
+       QED_PORT_MODE_DE_1X25G,
+       QED_PORT_MODE_DE_4X25G,
+       QED_PORT_MODE_DE_2X10G,
 };
 
 enum qed_dev_cap {
@@ -249,9 +271,14 @@ struct qed_hw_info {
                                 RESC_NUM(_p_hwfn, resc))
 #define FEAT_NUM(_p_hwfn, resc) ((_p_hwfn)->hw_info.feat_num[resc])
 
-       u8                              num_tc;
+       /* Amount of traffic classes HW supports */
+       u8 num_hw_tc;
+
+       /* Amount of TCs which should be active according to DCBx or upper
+        * layer driver configuration.
+        */
+       u8 num_active_tc;
        u8                              offload_tc;
-       u8                              non_offload_tc;
 
        u32                             concrete_fid;
        u16                             opaque_fid;
@@ -314,15 +341,19 @@ struct qed_qm_info {
        struct init_qm_port_params      *qm_port_params;
        u16                             start_pq;
        u8                              start_vport;
-       u8                              pure_lb_pq;
-       u8                              offload_pq;
-       u8                              pure_ack_pq;
-       u8 ooo_pq;
-       u8                              vf_queues_offset;
+       u16                              pure_lb_pq;
+       u16                             offload_pq;
+       u16                             low_latency_pq;
+       u16                             pure_ack_pq;
+       u16                             ooo_pq;
+       u16                             first_vf_pq;
+       u16                             first_mcos_pq;
+       u16                             first_rl_pq;
        u16                             num_pqs;
        u16                             num_vf_pqs;
        u8                              num_vports;
        u8                              max_phys_tcs_per_port;
+       u8                              ooo_tc;
        bool                            pf_rl_en;
        bool                            pf_wfq_en;
        bool                            vport_rl_en;
@@ -353,6 +384,12 @@ struct qed_fw_data {
        u32                     init_ops_size;
 };
 
+#define DRV_MODULE_VERSION                   \
+       __stringify(QED_MAJOR_VERSION) "."    \
+       __stringify(QED_MINOR_VERSION) "."    \
+       __stringify(QED_REVISION_VERSION) "." \
+       __stringify(QED_ENGINEERING_VERSION)
+
 struct qed_simd_fp_handler {
        void    *token;
        void    (*func)(void *);
@@ -364,7 +401,8 @@ struct qed_hwfn {
 #define IS_LEAD_HWFN(edev)              (!((edev)->my_id))
        u8                              rel_pf_id;      /* Relative to engine*/
        u8                              abs_pf_id;
-#define QED_PATH_ID(_p_hwfn)           ((_p_hwfn)->abs_pf_id & 1)
+#define QED_PATH_ID(_p_hwfn) \
+       (QED_IS_K2((_p_hwfn)->cdev) ? 0 : ((_p_hwfn)->abs_pf_id & 1))
        u8                              port_id;
        bool                            b_active;
 
@@ -523,9 +561,7 @@ struct qed_dev {
        u8      dp_level;
        char    name[NAME_SIZE];
 
-       u8      type;
-#define QED_DEV_TYPE_BB (0 << 0)
-#define QED_DEV_TYPE_AH BIT(0)
+       enum    qed_dev_type type;
 /* Translate type/revision combo into the proper conditions */
 #define QED_IS_BB(dev)  ((dev)->type == QED_DEV_TYPE_BB)
 #define QED_IS_BB_A0(dev)       (QED_IS_BB(dev) && \
@@ -540,6 +576,9 @@ struct qed_dev {
 
        u16     vendor_id;
        u16     device_id;
+#define QED_DEV_ID_MASK                0xff00
+#define QED_DEV_ID_MASK_BB     0x1600
+#define QED_DEV_ID_MASK_AH     0x8000
 
        u16     chip_num;
 #define CHIP_NUM_MASK                   0xffff
@@ -654,10 +693,16 @@ struct qed_dev {
        u32 rdma_max_srq_sge;
 };
 
-#define NUM_OF_VFS(dev)         MAX_NUM_VFS_BB
-#define NUM_OF_L2_QUEUES(dev)  MAX_NUM_L2_QUEUES_BB
-#define NUM_OF_SBS(dev)         MAX_SB_PER_PATH_BB
-#define NUM_OF_ENG_PFS(dev)     MAX_NUM_PFS_BB
+#define NUM_OF_VFS(dev)         (QED_IS_BB(dev) ? MAX_NUM_VFS_BB \
+                                               : MAX_NUM_VFS_K2)
+#define NUM_OF_L2_QUEUES(dev)   (QED_IS_BB(dev) ? MAX_NUM_L2_QUEUES_BB \
+                                               : MAX_NUM_L2_QUEUES_K2)
+#define NUM_OF_PORTS(dev)       (QED_IS_BB(dev) ? MAX_NUM_PORTS_BB \
+                                               : MAX_NUM_PORTS_K2)
+#define NUM_OF_SBS(dev)         (QED_IS_BB(dev) ? MAX_SB_PER_PATH_BB \
+                                               : MAX_SB_PER_PATH_K2)
+#define NUM_OF_ENG_PFS(dev)     (QED_IS_BB(dev) ? MAX_NUM_PFS_BB \
+                                               : MAX_NUM_PFS_K2)
 
 /**
  * @brief qed_concrete_to_sw_fid - get the sw function id from
@@ -693,6 +738,25 @@ void qed_configure_vp_wfq_on_link_change(struct qed_dev *cdev,
                                         u32 min_pf_rate);
 
 void qed_clean_wfq_db(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+int qed_device_num_engines(struct qed_dev *cdev);
+
+#define QED_LEADING_HWFN(dev)   (&dev->hwfns[0])
+
+/* Flags for indication of required queues */
+#define PQ_FLAGS_RLS    (BIT(0))
+#define PQ_FLAGS_MCOS   (BIT(1))
+#define PQ_FLAGS_LB     (BIT(2))
+#define PQ_FLAGS_OOO    (BIT(3))
+#define PQ_FLAGS_ACK    (BIT(4))
+#define PQ_FLAGS_OFLD   (BIT(5))
+#define PQ_FLAGS_VFS    (BIT(6))
+#define PQ_FLAGS_LLT    (BIT(7))
+
+/* physical queue index for cm context intialization */
+u16 qed_get_cm_pq_idx(struct qed_hwfn *p_hwfn, u32 pq_flags);
+u16 qed_get_cm_pq_idx_mcos(struct qed_hwfn *p_hwfn, u8 tc);
+u16 qed_get_cm_pq_idx_vf(struct qed_hwfn *p_hwfn, u16 vf);
+
 #define QED_LEADING_HWFN(dev)   (&dev->hwfns[0])
 
 /* Other Linux specific common definitions */
@@ -721,5 +785,6 @@ void qed_get_protocol_stats(struct qed_dev *cdev,
                            enum qed_mcp_protocol_type type,
                            union qed_mcp_protocol_stats *stats);
 int qed_slowpath_irq_req(struct qed_hwfn *hwfn);
+void qed_slowpath_irq_sync(struct qed_hwfn *p_hwfn);
 
 #endif /* _QED_H */
index 7e3a6fed3da6d94fe47139aef697563b56726950..485b8b22ec7a94f8080cdf53f4aed94af78be30e 100644 (file)
@@ -71,8 +71,7 @@
 #define TM_ALIGN        BIT(TM_SHIFT)
 #define TM_ELEM_SIZE    4
 
-/* For RoCE we configure to 64K to cover for RoCE max tasks 256K purpose. */
-#define ILT_DEFAULT_HW_P_SIZE  (IS_ENABLED(CONFIG_QED_RDMA) ? 4 : 3)
+#define ILT_DEFAULT_HW_P_SIZE  4
 
 #define ILT_PAGE_IN_BYTES(hw_p_size)   (1U << ((hw_p_size) + 12))
 #define ILT_CFG_REG(cli, reg)  PSWRQ2_REG_ ## cli ## _ ## reg ## _RT_OFFSET
@@ -242,8 +241,7 @@ struct qed_cxt_mngr {
 static bool src_proto(enum protocol_type type)
 {
        return type == PROTOCOLID_ISCSI ||
-              type == PROTOCOLID_FCOE ||
-              type == PROTOCOLID_ROCE;
+              type == PROTOCOLID_FCOE;
 }
 
 static bool tm_cid_proto(enum protocol_type type)
@@ -304,16 +302,34 @@ struct qed_tm_iids {
        u32 per_vf_tids;
 };
 
-static void qed_cxt_tm_iids(struct qed_cxt_mngr *p_mngr,
+static void qed_cxt_tm_iids(struct qed_hwfn *p_hwfn,
+                           struct qed_cxt_mngr *p_mngr,
                            struct qed_tm_iids *iids)
 {
-       u32 i, j;
-
-       for (i = 0; i < MAX_CONN_TYPES; i++) {
+       bool tm_vf_required = false;
+       bool tm_required = false;
+       int i, j;
+
+       /* Timers is a special case -> we don't count how many cids require
+        * timers but what's the max cid that will be used by the timer block.
+        * therefore we traverse in reverse order, and once we hit a protocol
+        * that requires the timers memory, we'll sum all the protocols up
+        * to that one.
+        */
+       for (i = MAX_CONN_TYPES - 1; i >= 0; i--) {
                struct qed_conn_type_cfg *p_cfg = &p_mngr->conn_cfg[i];
 
-               if (tm_cid_proto(i)) {
+               if (tm_cid_proto(i) || tm_required) {
+                       if (p_cfg->cid_count)
+                               tm_required = true;
+
                        iids->pf_cids += p_cfg->cid_count;
+               }
+
+               if (tm_cid_proto(i) || tm_vf_required) {
+                       if (p_cfg->cids_per_vf)
+                               tm_vf_required = true;
+
                        iids->per_vf_cids += p_cfg->cids_per_vf;
                }
 
@@ -527,7 +543,22 @@ static u32 qed_ilt_get_dynamic_line_cnt(struct qed_hwfn *p_hwfn,
        return lines_to_skip;
 }
 
-int qed_cxt_cfg_ilt_compute(struct qed_hwfn *p_hwfn)
+static struct qed_ilt_client_cfg *qed_cxt_set_cli(struct qed_ilt_client_cfg
+                                                 *p_cli)
+{
+       p_cli->active = false;
+       p_cli->first.val = 0;
+       p_cli->last.val = 0;
+       return p_cli;
+}
+
+static struct qed_ilt_cli_blk *qed_cxt_set_blk(struct qed_ilt_cli_blk *p_blk)
+{
+       p_blk->total_size = 0;
+       return p_blk;
+}
+
+int qed_cxt_cfg_ilt_compute(struct qed_hwfn *p_hwfn, u32 *line_count)
 {
        struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr;
        u32 curr_line, total, i, task_size, line;
@@ -551,7 +582,8 @@ int qed_cxt_cfg_ilt_compute(struct qed_hwfn *p_hwfn)
                   p_hwfn->my_id, p_hwfn->p_cxt_mngr->pf_start_line);
 
        /* CDUC */
-       p_cli = &p_mngr->clients[ILT_CLI_CDUC];
+       p_cli = qed_cxt_set_cli(&p_mngr->clients[ILT_CLI_CDUC]);
+
        curr_line = p_mngr->pf_start_line;
 
        /* CDUC PF */
@@ -560,7 +592,7 @@ int qed_cxt_cfg_ilt_compute(struct qed_hwfn *p_hwfn)
        /* get the counters for the CDUC and QM clients  */
        qed_cxt_cdu_iids(p_mngr, &cdu_iids);
 
-       p_blk = &p_cli->pf_blks[CDUC_BLK];
+       p_blk = qed_cxt_set_blk(&p_cli->pf_blks[CDUC_BLK]);
 
        total = cdu_iids.pf_cids * CONN_CXT_SIZE(p_hwfn);
 
@@ -574,7 +606,7 @@ int qed_cxt_cfg_ilt_compute(struct qed_hwfn *p_hwfn)
                                                               ILT_CLI_CDUC);
 
        /* CDUC VF */
-       p_blk = &p_cli->vf_blks[CDUC_BLK];
+       p_blk = qed_cxt_set_blk(&p_cli->vf_blks[CDUC_BLK]);
        total = cdu_iids.per_vf_cids * CONN_CXT_SIZE(p_hwfn);
 
        qed_ilt_cli_blk_fill(p_cli, p_blk, curr_line,
@@ -588,7 +620,7 @@ int qed_cxt_cfg_ilt_compute(struct qed_hwfn *p_hwfn)
                                     ILT_CLI_CDUC);
 
        /* CDUT PF */
-       p_cli = &p_mngr->clients[ILT_CLI_CDUT];
+       p_cli = qed_cxt_set_cli(&p_mngr->clients[ILT_CLI_CDUT]);
        p_cli->first.val = curr_line;
 
        /* first the 'working' task memory */
@@ -597,7 +629,7 @@ int qed_cxt_cfg_ilt_compute(struct qed_hwfn *p_hwfn)
                if (!p_seg || p_seg->count == 0)
                        continue;
 
-               p_blk = &p_cli->pf_blks[CDUT_SEG_BLK(i)];
+               p_blk = qed_cxt_set_blk(&p_cli->pf_blks[CDUT_SEG_BLK(i)]);
                total = p_seg->count * p_mngr->task_type_size[p_seg->type];
                qed_ilt_cli_blk_fill(p_cli, p_blk, curr_line, total,
                                     p_mngr->task_type_size[p_seg->type]);
@@ -612,7 +644,8 @@ int qed_cxt_cfg_ilt_compute(struct qed_hwfn *p_hwfn)
                if (!p_seg || p_seg->count == 0)
                        continue;
 
-               p_blk = &p_cli->pf_blks[CDUT_FL_SEG_BLK(i, PF)];
+               p_blk =
+                   qed_cxt_set_blk(&p_cli->pf_blks[CDUT_FL_SEG_BLK(i, PF)]);
 
                if (!p_seg->has_fl_mem) {
                        /* The segment is active (total size pf 'working'
@@ -657,7 +690,7 @@ int qed_cxt_cfg_ilt_compute(struct qed_hwfn *p_hwfn)
                /* 'working' memory */
                total = p_seg->count * p_mngr->task_type_size[p_seg->type];
 
-               p_blk = &p_cli->vf_blks[CDUT_SEG_BLK(0)];
+               p_blk = qed_cxt_set_blk(&p_cli->vf_blks[CDUT_SEG_BLK(0)]);
                qed_ilt_cli_blk_fill(p_cli, p_blk,
                                     curr_line, total,
                                     p_mngr->task_type_size[p_seg->type]);
@@ -666,7 +699,8 @@ int qed_cxt_cfg_ilt_compute(struct qed_hwfn *p_hwfn)
                                     ILT_CLI_CDUT);
 
                /* 'init' memory */
-               p_blk = &p_cli->vf_blks[CDUT_FL_SEG_BLK(0, VF)];
+               p_blk =
+                   qed_cxt_set_blk(&p_cli->vf_blks[CDUT_FL_SEG_BLK(0, VF)]);
                if (!p_seg->has_fl_mem) {
                        /* see comment above */
                        line = p_cli->vf_blks[CDUT_SEG_BLK(0)].start_line;
@@ -694,8 +728,8 @@ int qed_cxt_cfg_ilt_compute(struct qed_hwfn *p_hwfn)
        }
 
        /* QM */
-       p_cli = &p_mngr->clients[ILT_CLI_QM];
-       p_blk = &p_cli->pf_blks[0];
+       p_cli = qed_cxt_set_cli(&p_mngr->clients[ILT_CLI_QM]);
+       p_blk = qed_cxt_set_blk(&p_cli->pf_blks[0]);
 
        qed_cxt_qm_iids(p_hwfn, &qm_iids);
        total = qed_qm_pf_mem_size(p_hwfn->rel_pf_id, qm_iids.cids,
@@ -719,7 +753,7 @@ int qed_cxt_cfg_ilt_compute(struct qed_hwfn *p_hwfn)
        p_cli->pf_total_lines = curr_line - p_blk->start_line;
 
        /* SRC */
-       p_cli = &p_mngr->clients[ILT_CLI_SRC];
+       p_cli = qed_cxt_set_cli(&p_mngr->clients[ILT_CLI_SRC]);
        qed_cxt_src_iids(p_mngr, &src_iids);
 
        /* Both the PF and VFs searcher connections are stored in the per PF
@@ -733,7 +767,7 @@ int qed_cxt_cfg_ilt_compute(struct qed_hwfn *p_hwfn)
 
                total = roundup_pow_of_two(local_max);
 
-               p_blk = &p_cli->pf_blks[0];
+               p_blk = qed_cxt_set_blk(&p_cli->pf_blks[0]);
                qed_ilt_cli_blk_fill(p_cli, p_blk, curr_line,
                                     total * sizeof(struct src_ent),
                                     sizeof(struct src_ent));
@@ -744,11 +778,11 @@ int qed_cxt_cfg_ilt_compute(struct qed_hwfn *p_hwfn)
        }
 
        /* TM PF */
-       p_cli = &p_mngr->clients[ILT_CLI_TM];
-       qed_cxt_tm_iids(p_mngr, &tm_iids);
+       p_cli = qed_cxt_set_cli(&p_mngr->clients[ILT_CLI_TM]);
+       qed_cxt_tm_iids(p_hwfn, p_mngr, &tm_iids);
        total = tm_iids.pf_cids + tm_iids.pf_tids_total;
        if (total) {
-               p_blk = &p_cli->pf_blks[0];
+               p_blk = qed_cxt_set_blk(&p_cli->pf_blks[0]);
                qed_ilt_cli_blk_fill(p_cli, p_blk, curr_line,
                                     total * TM_ELEM_SIZE, TM_ELEM_SIZE);
 
@@ -760,14 +794,14 @@ int qed_cxt_cfg_ilt_compute(struct qed_hwfn *p_hwfn)
        /* TM VF */
        total = tm_iids.per_vf_cids + tm_iids.per_vf_tids;
        if (total) {
-               p_blk = &p_cli->vf_blks[0];
+               p_blk = qed_cxt_set_blk(&p_cli->vf_blks[0]);
                qed_ilt_cli_blk_fill(p_cli, p_blk, curr_line,
                                     total * TM_ELEM_SIZE, TM_ELEM_SIZE);
 
                qed_ilt_cli_adv_line(p_hwfn, p_cli, p_blk, &curr_line,
                                     ILT_CLI_TM);
-               p_cli->pf_total_lines = curr_line - p_blk->start_line;
 
+               p_cli->vf_total_lines = curr_line - p_blk->start_line;
                for (i = 1; i < p_mngr->vf_count; i++)
                        qed_ilt_cli_adv_line(p_hwfn, p_cli, p_blk, &curr_line,
                                             ILT_CLI_TM);
@@ -777,8 +811,8 @@ int qed_cxt_cfg_ilt_compute(struct qed_hwfn *p_hwfn)
        total = qed_cxt_get_srq_count(p_hwfn);
 
        if (total) {
-               p_cli = &p_mngr->clients[ILT_CLI_TSDM];
-               p_blk = &p_cli->pf_blks[SRQ_BLK];
+               p_cli = qed_cxt_set_cli(&p_mngr->clients[ILT_CLI_TSDM]);
+               p_blk = qed_cxt_set_blk(&p_cli->pf_blks[SRQ_BLK]);
                qed_ilt_cli_blk_fill(p_cli, p_blk, curr_line,
                                     total * SRQ_CXT_SIZE, SRQ_CXT_SIZE);
 
@@ -787,13 +821,50 @@ int qed_cxt_cfg_ilt_compute(struct qed_hwfn *p_hwfn)
                p_cli->pf_total_lines = curr_line - p_blk->start_line;
        }
 
+       *line_count = curr_line - p_hwfn->p_cxt_mngr->pf_start_line;
+
        if (curr_line - p_hwfn->p_cxt_mngr->pf_start_line >
-           RESC_NUM(p_hwfn, QED_ILT)) {
-               DP_ERR(p_hwfn, "too many ilt lines...#lines=%d\n",
-                      curr_line - p_hwfn->p_cxt_mngr->pf_start_line);
+           RESC_NUM(p_hwfn, QED_ILT))
                return -EINVAL;
+
+       return 0;
+}
+
+u32 qed_cxt_cfg_ilt_compute_excess(struct qed_hwfn *p_hwfn, u32 used_lines)
+{
+       struct qed_ilt_client_cfg *p_cli;
+       u32 excess_lines, available_lines;
+       struct qed_cxt_mngr *p_mngr;
+       u32 ilt_page_size, elem_size;
+       struct qed_tid_seg *p_seg;
+       int i;
+
+       available_lines = RESC_NUM(p_hwfn, QED_ILT);
+       excess_lines = used_lines - available_lines;
+
+       if (!excess_lines)
+               return 0;
+
+       if (p_hwfn->hw_info.personality != QED_PCI_ETH_ROCE)
+               return 0;
+
+       p_mngr = p_hwfn->p_cxt_mngr;
+       p_cli = &p_mngr->clients[ILT_CLI_CDUT];
+       ilt_page_size = ILT_PAGE_IN_BYTES(p_cli->p_size.val);
+
+       for (i = 0; i < NUM_TASK_PF_SEGMENTS; i++) {
+               p_seg = qed_cxt_tid_seg_info(p_hwfn, i);
+               if (!p_seg || p_seg->count == 0)
+                       continue;
+
+               elem_size = p_mngr->task_type_size[p_seg->type];
+               if (!elem_size)
+                       continue;
+
+               return (ilt_page_size / elem_size) * excess_lines;
        }
 
+       DP_NOTICE(p_hwfn, "failed computing excess ILT lines\n");
        return 0;
 }
 
@@ -1127,7 +1198,7 @@ int qed_cxt_mngr_alloc(struct qed_hwfn *p_hwfn)
        clients[ILT_CLI_TSDM].first.reg = ILT_CFG_REG(TSDM, FIRST_ILT);
        clients[ILT_CLI_TSDM].last.reg = ILT_CFG_REG(TSDM, LAST_ILT);
        clients[ILT_CLI_TSDM].p_size.reg = ILT_CFG_REG(TSDM, P_SIZE);
-       /* default ILT page size for all clients is 32K */
+       /* default ILT page size for all clients is 64K */
        for (i = 0; i < ILT_CLI_MAX; i++)
                p_mngr->clients[i].p_size.val = ILT_DEFAULT_HW_P_SIZE;
 
@@ -1397,18 +1468,11 @@ void qed_qm_init_pf(struct qed_hwfn *p_hwfn)
 }
 
 /* CM PF */
-static int qed_cm_init_pf(struct qed_hwfn *p_hwfn)
+void qed_cm_init_pf(struct qed_hwfn *p_hwfn)
 {
-       union qed_qm_pq_params pq_params;
-       u16 pq;
-
        /* XCM pure-LB queue */
-       memset(&pq_params, 0, sizeof(pq_params));
-       pq_params.core.tc = LB_TC;
-       pq = qed_get_qm_pq(p_hwfn, PROTOCOLID_CORE, &pq_params);
-       STORE_RT_REG(p_hwfn, XCM_REG_CON_PHY_Q3_RT_OFFSET, pq);
-
-       return 0;
+       STORE_RT_REG(p_hwfn, XCM_REG_CON_PHY_Q3_RT_OFFSET,
+                    qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_LB));
 }
 
 /* DQ PF */
@@ -1640,7 +1704,7 @@ static void qed_tm_init_pf(struct qed_hwfn *p_hwfn)
        u8 i;
 
        memset(&tm_iids, 0, sizeof(tm_iids));
-       qed_cxt_tm_iids(p_mngr, &tm_iids);
+       qed_cxt_tm_iids(p_hwfn, p_mngr, &tm_iids);
 
        /* @@@TBD No pre-scan for now */
 
@@ -1884,13 +1948,12 @@ int qed_cxt_get_cid_info(struct qed_hwfn *p_hwfn, struct qed_cxt_info *p_info)
 }
 
 static void qed_rdma_set_pf_params(struct qed_hwfn *p_hwfn,
-                                  struct qed_rdma_pf_params *p_params)
+                                  struct qed_rdma_pf_params *p_params,
+                                  u32 num_tasks)
 {
-       u32 num_cons, num_tasks, num_qps, num_mrs, num_srqs;
+       u32 num_cons, num_qps, num_srqs;
        enum protocol_type proto;
 
-       num_mrs = min_t(u32, RDMA_MAX_TIDS, p_params->num_mrs);
-       num_tasks = num_mrs;    /* each mr uses a single task id */
        num_srqs = min_t(u32, 32 * 1024, p_params->num_srqs);
 
        switch (p_hwfn->hw_info.personality) {
@@ -1919,7 +1982,7 @@ static void qed_rdma_set_pf_params(struct qed_hwfn *p_hwfn,
        }
 }
 
-int qed_cxt_set_pf_params(struct qed_hwfn *p_hwfn)
+int qed_cxt_set_pf_params(struct qed_hwfn *p_hwfn, u32 rdma_tasks)
 {
        /* Set the number of required CORE connections */
        u32 core_cids = 1; /* SPQ */
@@ -1931,9 +1994,10 @@ int qed_cxt_set_pf_params(struct qed_hwfn *p_hwfn)
        switch (p_hwfn->hw_info.personality) {
        case QED_PCI_ETH_ROCE:
        {
-               qed_rdma_set_pf_params(p_hwfn,
-                                      &p_hwfn->
-                                      pf_params.rdma_pf_params);
+                       qed_rdma_set_pf_params(p_hwfn,
+                                              &p_hwfn->
+                                              pf_params.rdma_pf_params,
+                                              rdma_tasks);
                /* no need for break since RoCE coexist with Ethernet */
        }
        case QED_PCI_ETH:
index 8b010324268ad2b5b5313f6b8e62db226f7650e5..f34b2889f4bbb967a30bab93506d06923ec6391d 100644 (file)
@@ -105,19 +105,28 @@ u32 qed_cxt_get_proto_cid_count(struct qed_hwfn *p_hwfn,
  * @brief qed_cxt_set_pf_params - Set the PF params for cxt init
  *
  * @param p_hwfn
- *
+ * @param rdma_tasks - requested maximum
  * @return int
  */
-int qed_cxt_set_pf_params(struct qed_hwfn *p_hwfn);
+int qed_cxt_set_pf_params(struct qed_hwfn *p_hwfn, u32 rdma_tasks);
 
 /**
  * @brief qed_cxt_cfg_ilt_compute - compute ILT init parameters
  *
  * @param p_hwfn
+ * @param last_line
  *
  * @return int
  */
-int qed_cxt_cfg_ilt_compute(struct qed_hwfn *p_hwfn);
+int qed_cxt_cfg_ilt_compute(struct qed_hwfn *p_hwfn, u32 *last_line);
+
+/**
+ * @brief qed_cxt_cfg_ilt_compute_excess - how many lines can be decreased
+ *
+ * @param p_hwfn
+ * @param used_lines
+ */
+u32 qed_cxt_cfg_ilt_compute_excess(struct qed_hwfn *p_hwfn, u32 used_lines);
 
 /**
  * @brief qed_cxt_mngr_alloc - Allocate and init the context manager struct
index 5bd36a4a8fcdfd201b40321c7fefb82776cd347d..2fc1fde824bdbe817b4d6e538b25de33fd8987bc 100644 (file)
@@ -183,7 +183,7 @@ qed_dcbx_dp_protocol(struct qed_hwfn *p_hwfn, struct qed_dcbx_results *p_data)
                           "%s info: update %d, enable %d, prio %d, tc %d, num_tc %d\n",
                           qed_dcbx_app_update[i].name, p_data->arr[id].update,
                           p_data->arr[id].enable, p_data->arr[id].priority,
-                          p_data->arr[id].tc, p_hwfn->hw_info.num_tc);
+                          p_data->arr[id].tc, p_hwfn->hw_info.num_active_tc);
        }
 }
 
@@ -204,12 +204,8 @@ qed_dcbx_set_params(struct qed_dcbx_results *p_data,
        p_data->arr[type].tc = tc;
 
        /* QM reconf data */
-       if (p_info->personality == personality) {
-               if (personality == QED_PCI_ETH)
-                       p_info->non_offload_tc = tc;
-               else
-                       p_info->offload_tc = tc;
-       }
+       if (p_info->personality == personality)
+               p_info->offload_tc = tc;
 }
 
 /* Update app protocol data and hw_info fields with the TLV info */
@@ -376,7 +372,9 @@ static int qed_dcbx_process_mib_info(struct qed_hwfn *p_hwfn)
        if (rc)
                return rc;
 
-       p_info->num_tc = QED_MFW_GET_FIELD(p_ets->flags, DCBX_ETS_MAX_TCS);
+       p_info->num_active_tc = QED_MFW_GET_FIELD(p_ets->flags,
+                                                 DCBX_ETS_MAX_TCS);
+       p_hwfn->qm_info.ooo_tc = QED_MFW_GET_FIELD(p_ets->flags, DCBX_OOO_TC);
        data.pf_id = p_hwfn->rel_pf_id;
        data.dcbx_enabled = !!dcbx_version;
 
index 0fabe97f998d2c8e29b343f47ba90396d61f4398..2eb988fe1298dfd043fbd8c18d49187930a5c970 100644 (file)
@@ -85,9 +85,6 @@ struct qed_dcbx_app_metadata {
        enum qed_pci_personality personality;
 };
 
-#define QED_MFW_GET_FIELD(name, field) \
-       (((name) & (field ## _MASK)) >> (field ## _SHIFT))
-
 struct qed_dcbx_info {
        struct lldp_status_params_s lldp_remote[LLDP_MAX_LLDP_AGENTS];
        struct lldp_config_params_s lldp_local[LLDP_MAX_LLDP_AGENTS];
index 68f19ca57f965b13d6fbf32c85e86d65e500b881..483241b4b05db2add64ff928ccc9419fe733355a 100644 (file)
@@ -17,7 +17,6 @@
 
 /* Chip IDs enum */
 enum chip_ids {
-       CHIP_RESERVED,
        CHIP_BB_B0,
        CHIP_K2,
        MAX_CHIP_IDS
@@ -40,6 +39,7 @@ enum mem_groups {
        MEM_GROUP_BTB_RAM,
        MEM_GROUP_RDIF_CTX,
        MEM_GROUP_TDIF_CTX,
+       MEM_GROUP_CFC_MEM,
        MEM_GROUP_CONN_CFC_MEM,
        MEM_GROUP_TASK_CFC_MEM,
        MEM_GROUP_CAU_PI,
@@ -72,6 +72,7 @@ static const char * const s_mem_group_names[] = {
        "BTB_RAM",
        "RDIF_CTX",
        "TDIF_CTX",
+       "CFC_MEM",
        "CONN_CFC_MEM",
        "TASK_CFC_MEM",
        "CAU_PI",
@@ -185,13 +186,16 @@ struct dbg_array {
        u32 size_in_dwords;
 };
 
+struct chip_platform_defs {
+       u8 num_ports;
+       u8 num_pfs;
+       u8 num_vfs;
+};
+
 /* Chip constant definitions */
 struct chip_defs {
        const char *name;
-       struct {
-               u8 num_ports;
-               u8 num_pfs;
-       } per_platform[MAX_PLATFORM_IDS];
+       struct chip_platform_defs per_platform[MAX_PLATFORM_IDS];
 };
 
 /* Platform constant definitions */
@@ -405,22 +409,23 @@ struct phy_defs {
 /***************************** Constant Arrays *******************************/
 
 /* Debug arrays */
-static struct dbg_array s_dbg_arrays[MAX_BIN_DBG_BUFFER_TYPE] = { {NULL} };
+static struct dbg_array s_dbg_arrays[MAX_BIN_DBG_BUFFER_TYPE] = { {0} };
 
 /* Chip constant definitions array */
 static struct chip_defs s_chip_defs[MAX_CHIP_IDS] = {
-       { "reserved", { {0, 0}, {0, 0}, {0, 0}, {0, 0} } },
        { "bb_b0",
-         { {MAX_NUM_PORTS_BB, MAX_NUM_PFS_BB}, {0, 0}, {0, 0}, {0, 0} } },
-       { "k2", { {MAX_NUM_PORTS_K2, MAX_NUM_PFS_K2}, {0, 0}, {0, 0}, {0, 0} } }
+         { {MAX_NUM_PORTS_BB, MAX_NUM_PFS_BB, MAX_NUM_VFS_BB}, {0, 0, 0},
+           {0, 0, 0}, {0, 0, 0} } },
+       { "k2",
+         { {MAX_NUM_PORTS_K2, MAX_NUM_PFS_K2, MAX_NUM_VFS_K2}, {0, 0, 0},
+           {0, 0, 0}, {0, 0, 0} } }
 };
 
 /* Storm constant definitions array */
 static struct storm_defs s_storm_defs[] = {
        /* Tstorm */
        {'T', BLOCK_TSEM,
-        {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT,
-         DBG_BUS_CLIENT_RBCT}, true,
+        {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT}, true,
         TSEM_REG_FAST_MEMORY,
         TSEM_REG_DBG_FRAME_MODE, TSEM_REG_SLOW_DBG_ACTIVE,
         TSEM_REG_SLOW_DBG_MODE, TSEM_REG_DBG_MODE1_CFG,
@@ -432,8 +437,7 @@ static struct storm_defs s_storm_defs[] = {
         4, TCM_REG_SM_TASK_CTX},
        /* Mstorm */
        {'M', BLOCK_MSEM,
-        {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT,
-         DBG_BUS_CLIENT_RBCM}, false,
+        {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM}, false,
         MSEM_REG_FAST_MEMORY,
         MSEM_REG_DBG_FRAME_MODE, MSEM_REG_SLOW_DBG_ACTIVE,
         MSEM_REG_SLOW_DBG_MODE, MSEM_REG_DBG_MODE1_CFG,
@@ -445,8 +449,7 @@ static struct storm_defs s_storm_defs[] = {
         7, MCM_REG_SM_TASK_CTX},
        /* Ustorm */
        {'U', BLOCK_USEM,
-        {DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU,
-         DBG_BUS_CLIENT_RBCU}, false,
+        {DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU}, false,
         USEM_REG_FAST_MEMORY,
         USEM_REG_DBG_FRAME_MODE, USEM_REG_SLOW_DBG_ACTIVE,
         USEM_REG_SLOW_DBG_MODE, USEM_REG_DBG_MODE1_CFG,
@@ -458,8 +461,7 @@ static struct storm_defs s_storm_defs[] = {
         3, UCM_REG_SM_TASK_CTX},
        /* Xstorm */
        {'X', BLOCK_XSEM,
-        {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX,
-         DBG_BUS_CLIENT_RBCX}, false,
+        {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX}, false,
         XSEM_REG_FAST_MEMORY,
         XSEM_REG_DBG_FRAME_MODE, XSEM_REG_SLOW_DBG_ACTIVE,
         XSEM_REG_SLOW_DBG_MODE, XSEM_REG_DBG_MODE1_CFG,
@@ -471,8 +473,7 @@ static struct storm_defs s_storm_defs[] = {
         0, 0},
        /* Ystorm */
        {'Y', BLOCK_YSEM,
-        {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX,
-         DBG_BUS_CLIENT_RBCY}, false,
+        {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCY}, false,
         YSEM_REG_FAST_MEMORY,
         YSEM_REG_DBG_FRAME_MODE, YSEM_REG_SLOW_DBG_ACTIVE,
         YSEM_REG_SLOW_DBG_MODE, YSEM_REG_DBG_MODE1_CFG,
@@ -484,8 +485,7 @@ static struct storm_defs s_storm_defs[] = {
         12, YCM_REG_SM_TASK_CTX},
        /* Pstorm */
        {'P', BLOCK_PSEM,
-        {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS,
-         DBG_BUS_CLIENT_RBCS}, true,
+        {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS}, true,
         PSEM_REG_FAST_MEMORY,
         PSEM_REG_DBG_FRAME_MODE, PSEM_REG_SLOW_DBG_ACTIVE,
         PSEM_REG_SLOW_DBG_MODE, PSEM_REG_DBG_MODE1_CFG,
@@ -499,8 +499,9 @@ static struct storm_defs s_storm_defs[] = {
 
 /* Block definitions array */
 static struct block_defs block_grc_defs = {
-       "grc", {true, true, true}, false, 0,
-       {DBG_BUS_CLIENT_RBCN, DBG_BUS_CLIENT_RBCN, DBG_BUS_CLIENT_RBCN},
+       "grc",
+       {true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCN, DBG_BUS_CLIENT_RBCN},
        GRC_REG_DBG_SELECT, GRC_REG_DBG_DWORD_ENABLE,
        GRC_REG_DBG_SHIFT, GRC_REG_DBG_FORCE_VALID,
        GRC_REG_DBG_FORCE_FRAME,
@@ -508,29 +509,30 @@ static struct block_defs block_grc_defs = {
 };
 
 static struct block_defs block_miscs_defs = {
-       "miscs", {false, false, false}, false, 0,
-       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
+       "miscs", {false, false}, false, 0,
+       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
        0, 0, 0, 0, 0,
        false, false, MAX_DBG_RESET_REGS, 0
 };
 
 static struct block_defs block_misc_defs = {
-       "misc", {false, false, false}, false, 0,
-       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
+       "misc", {false, false}, false, 0,
+       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
        0, 0, 0, 0, 0,
        false, false, MAX_DBG_RESET_REGS, 0
 };
 
 static struct block_defs block_dbu_defs = {
-       "dbu", {false, false, false}, false, 0,
-       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
+       "dbu", {false, false}, false, 0,
+       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
        0, 0, 0, 0, 0,
        false, false, MAX_DBG_RESET_REGS, 0
 };
 
 static struct block_defs block_pglue_b_defs = {
-       "pglue_b", {true, true, true}, false, 0,
-       {DBG_BUS_CLIENT_RBCH, DBG_BUS_CLIENT_RBCH, DBG_BUS_CLIENT_RBCH},
+       "pglue_b",
+       {true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCH, DBG_BUS_CLIENT_RBCH},
        PGLUE_B_REG_DBG_SELECT, PGLUE_B_REG_DBG_DWORD_ENABLE,
        PGLUE_B_REG_DBG_SHIFT, PGLUE_B_REG_DBG_FORCE_VALID,
        PGLUE_B_REG_DBG_FORCE_FRAME,
@@ -538,8 +540,9 @@ static struct block_defs block_pglue_b_defs = {
 };
 
 static struct block_defs block_cnig_defs = {
-       "cnig", {false, false, true}, false, 0,
-       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCW},
+       "cnig",
+       {false, true}, false, 0,
+       {MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCW},
        CNIG_REG_DBG_SELECT_K2, CNIG_REG_DBG_DWORD_ENABLE_K2,
        CNIG_REG_DBG_SHIFT_K2, CNIG_REG_DBG_FORCE_VALID_K2,
        CNIG_REG_DBG_FORCE_FRAME_K2,
@@ -547,15 +550,16 @@ static struct block_defs block_cnig_defs = {
 };
 
 static struct block_defs block_cpmu_defs = {
-       "cpmu", {false, false, false}, false, 0,
-       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
+       "cpmu", {false, false}, false, 0,
+       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
        0, 0, 0, 0, 0,
        true, false, DBG_RESET_REG_MISCS_PL_HV, 8
 };
 
 static struct block_defs block_ncsi_defs = {
-       "ncsi", {true, true, true}, false, 0,
-       {DBG_BUS_CLIENT_RBCZ, DBG_BUS_CLIENT_RBCZ, DBG_BUS_CLIENT_RBCZ},
+       "ncsi",
+       {true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCZ, DBG_BUS_CLIENT_RBCZ},
        NCSI_REG_DBG_SELECT, NCSI_REG_DBG_DWORD_ENABLE,
        NCSI_REG_DBG_SHIFT, NCSI_REG_DBG_FORCE_VALID,
        NCSI_REG_DBG_FORCE_FRAME,
@@ -563,15 +567,16 @@ static struct block_defs block_ncsi_defs = {
 };
 
 static struct block_defs block_opte_defs = {
-       "opte", {false, false, false}, false, 0,
-       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
+       "opte", {false, false}, false, 0,
+       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
        0, 0, 0, 0, 0,
        true, false, DBG_RESET_REG_MISCS_PL_HV, 4
 };
 
 static struct block_defs block_bmb_defs = {
-       "bmb", {true, true, true}, false, 0,
-       {DBG_BUS_CLIENT_RBCZ, DBG_BUS_CLIENT_RBCZ, DBG_BUS_CLIENT_RBCB},
+       "bmb",
+       {true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCZ, DBG_BUS_CLIENT_RBCB},
        BMB_REG_DBG_SELECT, BMB_REG_DBG_DWORD_ENABLE,
        BMB_REG_DBG_SHIFT, BMB_REG_DBG_FORCE_VALID,
        BMB_REG_DBG_FORCE_FRAME,
@@ -579,8 +584,9 @@ static struct block_defs block_bmb_defs = {
 };
 
 static struct block_defs block_pcie_defs = {
-       "pcie", {false, false, true}, false, 0,
-       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCH},
+       "pcie",
+       {false, true}, false, 0,
+       {MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCH},
        PCIE_REG_DBG_COMMON_SELECT, PCIE_REG_DBG_COMMON_DWORD_ENABLE,
        PCIE_REG_DBG_COMMON_SHIFT, PCIE_REG_DBG_COMMON_FORCE_VALID,
        PCIE_REG_DBG_COMMON_FORCE_FRAME,
@@ -588,15 +594,16 @@ static struct block_defs block_pcie_defs = {
 };
 
 static struct block_defs block_mcp_defs = {
-       "mcp", {false, false, false}, false, 0,
-       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
+       "mcp", {false, false}, false, 0,
+       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
        0, 0, 0, 0, 0,
        false, false, MAX_DBG_RESET_REGS, 0
 };
 
 static struct block_defs block_mcp2_defs = {
-       "mcp2", {true, true, true}, false, 0,
-       {DBG_BUS_CLIENT_RBCZ, DBG_BUS_CLIENT_RBCZ, DBG_BUS_CLIENT_RBCZ},
+       "mcp2",
+       {true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCZ, DBG_BUS_CLIENT_RBCZ},
        MCP2_REG_DBG_SELECT, MCP2_REG_DBG_DWORD_ENABLE,
        MCP2_REG_DBG_SHIFT, MCP2_REG_DBG_FORCE_VALID,
        MCP2_REG_DBG_FORCE_FRAME,
@@ -604,8 +611,9 @@ static struct block_defs block_mcp2_defs = {
 };
 
 static struct block_defs block_pswhst_defs = {
-       "pswhst", {true, true, true}, false, 0,
-       {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
+       "pswhst",
+       {true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
        PSWHST_REG_DBG_SELECT, PSWHST_REG_DBG_DWORD_ENABLE,
        PSWHST_REG_DBG_SHIFT, PSWHST_REG_DBG_FORCE_VALID,
        PSWHST_REG_DBG_FORCE_FRAME,
@@ -613,8 +621,9 @@ static struct block_defs block_pswhst_defs = {
 };
 
 static struct block_defs block_pswhst2_defs = {
-       "pswhst2", {true, true, true}, false, 0,
-       {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
+       "pswhst2",
+       {true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
        PSWHST2_REG_DBG_SELECT, PSWHST2_REG_DBG_DWORD_ENABLE,
        PSWHST2_REG_DBG_SHIFT, PSWHST2_REG_DBG_FORCE_VALID,
        PSWHST2_REG_DBG_FORCE_FRAME,
@@ -622,8 +631,9 @@ static struct block_defs block_pswhst2_defs = {
 };
 
 static struct block_defs block_pswrd_defs = {
-       "pswrd", {true, true, true}, false, 0,
-       {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
+       "pswrd",
+       {true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
        PSWRD_REG_DBG_SELECT, PSWRD_REG_DBG_DWORD_ENABLE,
        PSWRD_REG_DBG_SHIFT, PSWRD_REG_DBG_FORCE_VALID,
        PSWRD_REG_DBG_FORCE_FRAME,
@@ -631,8 +641,9 @@ static struct block_defs block_pswrd_defs = {
 };
 
 static struct block_defs block_pswrd2_defs = {
-       "pswrd2", {true, true, true}, false, 0,
-       {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
+       "pswrd2",
+       {true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
        PSWRD2_REG_DBG_SELECT, PSWRD2_REG_DBG_DWORD_ENABLE,
        PSWRD2_REG_DBG_SHIFT, PSWRD2_REG_DBG_FORCE_VALID,
        PSWRD2_REG_DBG_FORCE_FRAME,
@@ -640,8 +651,9 @@ static struct block_defs block_pswrd2_defs = {
 };
 
 static struct block_defs block_pswwr_defs = {
-       "pswwr", {true, true, true}, false, 0,
-       {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
+       "pswwr",
+       {true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
        PSWWR_REG_DBG_SELECT, PSWWR_REG_DBG_DWORD_ENABLE,
        PSWWR_REG_DBG_SHIFT, PSWWR_REG_DBG_FORCE_VALID,
        PSWWR_REG_DBG_FORCE_FRAME,
@@ -649,15 +661,16 @@ static struct block_defs block_pswwr_defs = {
 };
 
 static struct block_defs block_pswwr2_defs = {
-       "pswwr2", {false, false, false}, false, 0,
-       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
+       "pswwr2", {false, false}, false, 0,
+       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
        0, 0, 0, 0, 0,
        true, false, DBG_RESET_REG_MISC_PL_HV, 3
 };
 
 static struct block_defs block_pswrq_defs = {
-       "pswrq", {true, true, true}, false, 0,
-       {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
+       "pswrq",
+       {true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
        PSWRQ_REG_DBG_SELECT, PSWRQ_REG_DBG_DWORD_ENABLE,
        PSWRQ_REG_DBG_SHIFT, PSWRQ_REG_DBG_FORCE_VALID,
        PSWRQ_REG_DBG_FORCE_FRAME,
@@ -665,8 +678,9 @@ static struct block_defs block_pswrq_defs = {
 };
 
 static struct block_defs block_pswrq2_defs = {
-       "pswrq2", {true, true, true}, false, 0,
-       {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
+       "pswrq2",
+       {true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
        PSWRQ2_REG_DBG_SELECT, PSWRQ2_REG_DBG_DWORD_ENABLE,
        PSWRQ2_REG_DBG_SHIFT, PSWRQ2_REG_DBG_FORCE_VALID,
        PSWRQ2_REG_DBG_FORCE_FRAME,
@@ -674,8 +688,9 @@ static struct block_defs block_pswrq2_defs = {
 };
 
 static struct block_defs block_pglcs_defs = {
-       "pglcs", {false, false, true}, false, 0,
-       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCH},
+       "pglcs",
+       {false, true}, false, 0,
+       {MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCH},
        PGLCS_REG_DBG_SELECT, PGLCS_REG_DBG_DWORD_ENABLE,
        PGLCS_REG_DBG_SHIFT, PGLCS_REG_DBG_FORCE_VALID,
        PGLCS_REG_DBG_FORCE_FRAME,
@@ -683,8 +698,9 @@ static struct block_defs block_pglcs_defs = {
 };
 
 static struct block_defs block_ptu_defs = {
-       "ptu", {true, true, true}, false, 0,
-       {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
+       "ptu",
+       {true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
        PTU_REG_DBG_SELECT, PTU_REG_DBG_DWORD_ENABLE,
        PTU_REG_DBG_SHIFT, PTU_REG_DBG_FORCE_VALID,
        PTU_REG_DBG_FORCE_FRAME,
@@ -692,8 +708,9 @@ static struct block_defs block_ptu_defs = {
 };
 
 static struct block_defs block_dmae_defs = {
-       "dmae", {true, true, true}, false, 0,
-       {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
+       "dmae",
+       {true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
        DMAE_REG_DBG_SELECT, DMAE_REG_DBG_DWORD_ENABLE,
        DMAE_REG_DBG_SHIFT, DMAE_REG_DBG_FORCE_VALID,
        DMAE_REG_DBG_FORCE_FRAME,
@@ -701,8 +718,9 @@ static struct block_defs block_dmae_defs = {
 };
 
 static struct block_defs block_tcm_defs = {
-       "tcm", {true, true, true}, true, DBG_TSTORM_ID,
-       {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT},
+       "tcm",
+       {true, true}, true, DBG_TSTORM_ID,
+       {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT},
        TCM_REG_DBG_SELECT, TCM_REG_DBG_DWORD_ENABLE,
        TCM_REG_DBG_SHIFT, TCM_REG_DBG_FORCE_VALID,
        TCM_REG_DBG_FORCE_FRAME,
@@ -710,8 +728,9 @@ static struct block_defs block_tcm_defs = {
 };
 
 static struct block_defs block_mcm_defs = {
-       "mcm", {true, true, true}, true, DBG_MSTORM_ID,
-       {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM},
+       "mcm",
+       {true, true}, true, DBG_MSTORM_ID,
+       {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM},
        MCM_REG_DBG_SELECT, MCM_REG_DBG_DWORD_ENABLE,
        MCM_REG_DBG_SHIFT, MCM_REG_DBG_FORCE_VALID,
        MCM_REG_DBG_FORCE_FRAME,
@@ -719,8 +738,9 @@ static struct block_defs block_mcm_defs = {
 };
 
 static struct block_defs block_ucm_defs = {
-       "ucm", {true, true, true}, true, DBG_USTORM_ID,
-       {DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU},
+       "ucm",
+       {true, true}, true, DBG_USTORM_ID,
+       {DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU},
        UCM_REG_DBG_SELECT, UCM_REG_DBG_DWORD_ENABLE,
        UCM_REG_DBG_SHIFT, UCM_REG_DBG_FORCE_VALID,
        UCM_REG_DBG_FORCE_FRAME,
@@ -728,8 +748,9 @@ static struct block_defs block_ucm_defs = {
 };
 
 static struct block_defs block_xcm_defs = {
-       "xcm", {true, true, true}, true, DBG_XSTORM_ID,
-       {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX},
+       "xcm",
+       {true, true}, true, DBG_XSTORM_ID,
+       {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX},
        XCM_REG_DBG_SELECT, XCM_REG_DBG_DWORD_ENABLE,
        XCM_REG_DBG_SHIFT, XCM_REG_DBG_FORCE_VALID,
        XCM_REG_DBG_FORCE_FRAME,
@@ -737,8 +758,9 @@ static struct block_defs block_xcm_defs = {
 };
 
 static struct block_defs block_ycm_defs = {
-       "ycm", {true, true, true}, true, DBG_YSTORM_ID,
-       {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCY},
+       "ycm",
+       {true, true}, true, DBG_YSTORM_ID,
+       {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCY},
        YCM_REG_DBG_SELECT, YCM_REG_DBG_DWORD_ENABLE,
        YCM_REG_DBG_SHIFT, YCM_REG_DBG_FORCE_VALID,
        YCM_REG_DBG_FORCE_FRAME,
@@ -746,8 +768,9 @@ static struct block_defs block_ycm_defs = {
 };
 
 static struct block_defs block_pcm_defs = {
-       "pcm", {true, true, true}, true, DBG_PSTORM_ID,
-       {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS},
+       "pcm",
+       {true, true}, true, DBG_PSTORM_ID,
+       {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS},
        PCM_REG_DBG_SELECT, PCM_REG_DBG_DWORD_ENABLE,
        PCM_REG_DBG_SHIFT, PCM_REG_DBG_FORCE_VALID,
        PCM_REG_DBG_FORCE_FRAME,
@@ -755,8 +778,9 @@ static struct block_defs block_pcm_defs = {
 };
 
 static struct block_defs block_qm_defs = {
-       "qm", {true, true, true}, false, 0,
-       {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCQ},
+       "qm",
+       {true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCQ},
        QM_REG_DBG_SELECT, QM_REG_DBG_DWORD_ENABLE,
        QM_REG_DBG_SHIFT, QM_REG_DBG_FORCE_VALID,
        QM_REG_DBG_FORCE_FRAME,
@@ -764,8 +788,9 @@ static struct block_defs block_qm_defs = {
 };
 
 static struct block_defs block_tm_defs = {
-       "tm", {true, true, true}, false, 0,
-       {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS},
+       "tm",
+       {true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS},
        TM_REG_DBG_SELECT, TM_REG_DBG_DWORD_ENABLE,
        TM_REG_DBG_SHIFT, TM_REG_DBG_FORCE_VALID,
        TM_REG_DBG_FORCE_FRAME,
@@ -773,8 +798,9 @@ static struct block_defs block_tm_defs = {
 };
 
 static struct block_defs block_dorq_defs = {
-       "dorq", {true, true, true}, false, 0,
-       {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCY},
+       "dorq",
+       {true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCY},
        DORQ_REG_DBG_SELECT, DORQ_REG_DBG_DWORD_ENABLE,
        DORQ_REG_DBG_SHIFT, DORQ_REG_DBG_FORCE_VALID,
        DORQ_REG_DBG_FORCE_FRAME,
@@ -782,8 +808,9 @@ static struct block_defs block_dorq_defs = {
 };
 
 static struct block_defs block_brb_defs = {
-       "brb", {true, true, true}, false, 0,
-       {DBG_BUS_CLIENT_RBCR, DBG_BUS_CLIENT_RBCR, DBG_BUS_CLIENT_RBCR},
+       "brb",
+       {true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCR, DBG_BUS_CLIENT_RBCR},
        BRB_REG_DBG_SELECT, BRB_REG_DBG_DWORD_ENABLE,
        BRB_REG_DBG_SHIFT, BRB_REG_DBG_FORCE_VALID,
        BRB_REG_DBG_FORCE_FRAME,
@@ -791,8 +818,9 @@ static struct block_defs block_brb_defs = {
 };
 
 static struct block_defs block_src_defs = {
-       "src", {true, true, true}, false, 0,
-       {DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF},
+       "src",
+       {true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF},
        SRC_REG_DBG_SELECT, SRC_REG_DBG_DWORD_ENABLE,
        SRC_REG_DBG_SHIFT, SRC_REG_DBG_FORCE_VALID,
        SRC_REG_DBG_FORCE_FRAME,
@@ -800,8 +828,9 @@ static struct block_defs block_src_defs = {
 };
 
 static struct block_defs block_prs_defs = {
-       "prs", {true, true, true}, false, 0,
-       {DBG_BUS_CLIENT_RBCR, DBG_BUS_CLIENT_RBCR, DBG_BUS_CLIENT_RBCR},
+       "prs",
+       {true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCR, DBG_BUS_CLIENT_RBCR},
        PRS_REG_DBG_SELECT, PRS_REG_DBG_DWORD_ENABLE,
        PRS_REG_DBG_SHIFT, PRS_REG_DBG_FORCE_VALID,
        PRS_REG_DBG_FORCE_FRAME,
@@ -809,8 +838,9 @@ static struct block_defs block_prs_defs = {
 };
 
 static struct block_defs block_tsdm_defs = {
-       "tsdm", {true, true, true}, true, DBG_TSTORM_ID,
-       {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT},
+       "tsdm",
+       {true, true}, true, DBG_TSTORM_ID,
+       {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT},
        TSDM_REG_DBG_SELECT, TSDM_REG_DBG_DWORD_ENABLE,
        TSDM_REG_DBG_SHIFT, TSDM_REG_DBG_FORCE_VALID,
        TSDM_REG_DBG_FORCE_FRAME,
@@ -818,8 +848,9 @@ static struct block_defs block_tsdm_defs = {
 };
 
 static struct block_defs block_msdm_defs = {
-       "msdm", {true, true, true}, true, DBG_MSTORM_ID,
-       {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM},
+       "msdm",
+       {true, true}, true, DBG_MSTORM_ID,
+       {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM},
        MSDM_REG_DBG_SELECT, MSDM_REG_DBG_DWORD_ENABLE,
        MSDM_REG_DBG_SHIFT, MSDM_REG_DBG_FORCE_VALID,
        MSDM_REG_DBG_FORCE_FRAME,
@@ -827,8 +858,9 @@ static struct block_defs block_msdm_defs = {
 };
 
 static struct block_defs block_usdm_defs = {
-       "usdm", {true, true, true}, true, DBG_USTORM_ID,
-       {DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU},
+       "usdm",
+       {true, true}, true, DBG_USTORM_ID,
+       {DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU},
        USDM_REG_DBG_SELECT, USDM_REG_DBG_DWORD_ENABLE,
        USDM_REG_DBG_SHIFT, USDM_REG_DBG_FORCE_VALID,
        USDM_REG_DBG_FORCE_FRAME,
@@ -836,8 +868,9 @@ static struct block_defs block_usdm_defs = {
 };
 
 static struct block_defs block_xsdm_defs = {
-       "xsdm", {true, true, true}, true, DBG_XSTORM_ID,
-       {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX},
+       "xsdm",
+       {true, true}, true, DBG_XSTORM_ID,
+       {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX},
        XSDM_REG_DBG_SELECT, XSDM_REG_DBG_DWORD_ENABLE,
        XSDM_REG_DBG_SHIFT, XSDM_REG_DBG_FORCE_VALID,
        XSDM_REG_DBG_FORCE_FRAME,
@@ -845,8 +878,9 @@ static struct block_defs block_xsdm_defs = {
 };
 
 static struct block_defs block_ysdm_defs = {
-       "ysdm", {true, true, true}, true, DBG_YSTORM_ID,
-       {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCY},
+       "ysdm",
+       {true, true}, true, DBG_YSTORM_ID,
+       {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCY},
        YSDM_REG_DBG_SELECT, YSDM_REG_DBG_DWORD_ENABLE,
        YSDM_REG_DBG_SHIFT, YSDM_REG_DBG_FORCE_VALID,
        YSDM_REG_DBG_FORCE_FRAME,
@@ -854,8 +888,9 @@ static struct block_defs block_ysdm_defs = {
 };
 
 static struct block_defs block_psdm_defs = {
-       "psdm", {true, true, true}, true, DBG_PSTORM_ID,
-       {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS},
+       "psdm",
+       {true, true}, true, DBG_PSTORM_ID,
+       {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS},
        PSDM_REG_DBG_SELECT, PSDM_REG_DBG_DWORD_ENABLE,
        PSDM_REG_DBG_SHIFT, PSDM_REG_DBG_FORCE_VALID,
        PSDM_REG_DBG_FORCE_FRAME,
@@ -863,8 +898,9 @@ static struct block_defs block_psdm_defs = {
 };
 
 static struct block_defs block_tsem_defs = {
-       "tsem", {true, true, true}, true, DBG_TSTORM_ID,
-       {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT},
+       "tsem",
+       {true, true}, true, DBG_TSTORM_ID,
+       {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT},
        TSEM_REG_DBG_SELECT, TSEM_REG_DBG_DWORD_ENABLE,
        TSEM_REG_DBG_SHIFT, TSEM_REG_DBG_FORCE_VALID,
        TSEM_REG_DBG_FORCE_FRAME,
@@ -872,8 +908,9 @@ static struct block_defs block_tsem_defs = {
 };
 
 static struct block_defs block_msem_defs = {
-       "msem", {true, true, true}, true, DBG_MSTORM_ID,
-       {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM},
+       "msem",
+       {true, true}, true, DBG_MSTORM_ID,
+       {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM},
        MSEM_REG_DBG_SELECT, MSEM_REG_DBG_DWORD_ENABLE,
        MSEM_REG_DBG_SHIFT, MSEM_REG_DBG_FORCE_VALID,
        MSEM_REG_DBG_FORCE_FRAME,
@@ -881,8 +918,9 @@ static struct block_defs block_msem_defs = {
 };
 
 static struct block_defs block_usem_defs = {
-       "usem", {true, true, true}, true, DBG_USTORM_ID,
-       {DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU},
+       "usem",
+       {true, true}, true, DBG_USTORM_ID,
+       {DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU},
        USEM_REG_DBG_SELECT, USEM_REG_DBG_DWORD_ENABLE,
        USEM_REG_DBG_SHIFT, USEM_REG_DBG_FORCE_VALID,
        USEM_REG_DBG_FORCE_FRAME,
@@ -890,8 +928,9 @@ static struct block_defs block_usem_defs = {
 };
 
 static struct block_defs block_xsem_defs = {
-       "xsem", {true, true, true}, true, DBG_XSTORM_ID,
-       {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX},
+       "xsem",
+       {true, true}, true, DBG_XSTORM_ID,
+       {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX},
        XSEM_REG_DBG_SELECT, XSEM_REG_DBG_DWORD_ENABLE,
        XSEM_REG_DBG_SHIFT, XSEM_REG_DBG_FORCE_VALID,
        XSEM_REG_DBG_FORCE_FRAME,
@@ -899,8 +938,9 @@ static struct block_defs block_xsem_defs = {
 };
 
 static struct block_defs block_ysem_defs = {
-       "ysem", {true, true, true}, true, DBG_YSTORM_ID,
-       {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCY},
+       "ysem",
+       {true, true}, true, DBG_YSTORM_ID,
+       {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCY},
        YSEM_REG_DBG_SELECT, YSEM_REG_DBG_DWORD_ENABLE,
        YSEM_REG_DBG_SHIFT, YSEM_REG_DBG_FORCE_VALID,
        YSEM_REG_DBG_FORCE_FRAME,
@@ -908,8 +948,9 @@ static struct block_defs block_ysem_defs = {
 };
 
 static struct block_defs block_psem_defs = {
-       "psem", {true, true, true}, true, DBG_PSTORM_ID,
-       {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS},
+       "psem",
+       {true, true}, true, DBG_PSTORM_ID,
+       {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS},
        PSEM_REG_DBG_SELECT, PSEM_REG_DBG_DWORD_ENABLE,
        PSEM_REG_DBG_SHIFT, PSEM_REG_DBG_FORCE_VALID,
        PSEM_REG_DBG_FORCE_FRAME,
@@ -917,8 +958,9 @@ static struct block_defs block_psem_defs = {
 };
 
 static struct block_defs block_rss_defs = {
-       "rss", {true, true, true}, false, 0,
-       {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT},
+       "rss",
+       {true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT},
        RSS_REG_DBG_SELECT, RSS_REG_DBG_DWORD_ENABLE,
        RSS_REG_DBG_SHIFT, RSS_REG_DBG_FORCE_VALID,
        RSS_REG_DBG_FORCE_FRAME,
@@ -926,8 +968,9 @@ static struct block_defs block_rss_defs = {
 };
 
 static struct block_defs block_tmld_defs = {
-       "tmld", {true, true, true}, false, 0,
-       {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM},
+       "tmld",
+       {true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM},
        TMLD_REG_DBG_SELECT, TMLD_REG_DBG_DWORD_ENABLE,
        TMLD_REG_DBG_SHIFT, TMLD_REG_DBG_FORCE_VALID,
        TMLD_REG_DBG_FORCE_FRAME,
@@ -935,8 +978,9 @@ static struct block_defs block_tmld_defs = {
 };
 
 static struct block_defs block_muld_defs = {
-       "muld", {true, true, true}, false, 0,
-       {DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU},
+       "muld",
+       {true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU},
        MULD_REG_DBG_SELECT, MULD_REG_DBG_DWORD_ENABLE,
        MULD_REG_DBG_SHIFT, MULD_REG_DBG_FORCE_VALID,
        MULD_REG_DBG_FORCE_FRAME,
@@ -944,8 +988,9 @@ static struct block_defs block_muld_defs = {
 };
 
 static struct block_defs block_yuld_defs = {
-       "yuld", {true, true, true}, false, 0,
-       {DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU},
+       "yuld",
+       {true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU},
        YULD_REG_DBG_SELECT, YULD_REG_DBG_DWORD_ENABLE,
        YULD_REG_DBG_SHIFT, YULD_REG_DBG_FORCE_VALID,
        YULD_REG_DBG_FORCE_FRAME,
@@ -953,8 +998,9 @@ static struct block_defs block_yuld_defs = {
 };
 
 static struct block_defs block_xyld_defs = {
-       "xyld", {true, true, true}, false, 0,
-       {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX},
+       "xyld",
+       {true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX},
        XYLD_REG_DBG_SELECT, XYLD_REG_DBG_DWORD_ENABLE,
        XYLD_REG_DBG_SHIFT, XYLD_REG_DBG_FORCE_VALID,
        XYLD_REG_DBG_FORCE_FRAME,
@@ -962,8 +1008,9 @@ static struct block_defs block_xyld_defs = {
 };
 
 static struct block_defs block_prm_defs = {
-       "prm", {true, true, true}, false, 0,
-       {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM},
+       "prm",
+       {true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM},
        PRM_REG_DBG_SELECT, PRM_REG_DBG_DWORD_ENABLE,
        PRM_REG_DBG_SHIFT, PRM_REG_DBG_FORCE_VALID,
        PRM_REG_DBG_FORCE_FRAME,
@@ -971,8 +1018,9 @@ static struct block_defs block_prm_defs = {
 };
 
 static struct block_defs block_pbf_pb1_defs = {
-       "pbf_pb1", {true, true, true}, false, 0,
-       {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCV},
+       "pbf_pb1",
+       {true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCV},
        PBF_PB1_REG_DBG_SELECT, PBF_PB1_REG_DBG_DWORD_ENABLE,
        PBF_PB1_REG_DBG_SHIFT, PBF_PB1_REG_DBG_FORCE_VALID,
        PBF_PB1_REG_DBG_FORCE_FRAME,
@@ -981,8 +1029,9 @@ static struct block_defs block_pbf_pb1_defs = {
 };
 
 static struct block_defs block_pbf_pb2_defs = {
-       "pbf_pb2", {true, true, true}, false, 0,
-       {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCV},
+       "pbf_pb2",
+       {true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCV},
        PBF_PB2_REG_DBG_SELECT, PBF_PB2_REG_DBG_DWORD_ENABLE,
        PBF_PB2_REG_DBG_SHIFT, PBF_PB2_REG_DBG_FORCE_VALID,
        PBF_PB2_REG_DBG_FORCE_FRAME,
@@ -991,8 +1040,9 @@ static struct block_defs block_pbf_pb2_defs = {
 };
 
 static struct block_defs block_rpb_defs = {
-       "rpb", {true, true, true}, false, 0,
-       {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM},
+       "rpb",
+       {true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM},
        RPB_REG_DBG_SELECT, RPB_REG_DBG_DWORD_ENABLE,
        RPB_REG_DBG_SHIFT, RPB_REG_DBG_FORCE_VALID,
        RPB_REG_DBG_FORCE_FRAME,
@@ -1000,8 +1050,9 @@ static struct block_defs block_rpb_defs = {
 };
 
 static struct block_defs block_btb_defs = {
-       "btb", {true, true, true}, false, 0,
-       {DBG_BUS_CLIENT_RBCR, DBG_BUS_CLIENT_RBCR, DBG_BUS_CLIENT_RBCV},
+       "btb",
+       {true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCR, DBG_BUS_CLIENT_RBCV},
        BTB_REG_DBG_SELECT, BTB_REG_DBG_DWORD_ENABLE,
        BTB_REG_DBG_SHIFT, BTB_REG_DBG_FORCE_VALID,
        BTB_REG_DBG_FORCE_FRAME,
@@ -1009,8 +1060,9 @@ static struct block_defs block_btb_defs = {
 };
 
 static struct block_defs block_pbf_defs = {
-       "pbf", {true, true, true}, false, 0,
-       {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCV},
+       "pbf",
+       {true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCV},
        PBF_REG_DBG_SELECT, PBF_REG_DBG_DWORD_ENABLE,
        PBF_REG_DBG_SHIFT, PBF_REG_DBG_FORCE_VALID,
        PBF_REG_DBG_FORCE_FRAME,
@@ -1018,8 +1070,9 @@ static struct block_defs block_pbf_defs = {
 };
 
 static struct block_defs block_rdif_defs = {
-       "rdif", {true, true, true}, false, 0,
-       {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM},
+       "rdif",
+       {true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM},
        RDIF_REG_DBG_SELECT, RDIF_REG_DBG_DWORD_ENABLE,
        RDIF_REG_DBG_SHIFT, RDIF_REG_DBG_FORCE_VALID,
        RDIF_REG_DBG_FORCE_FRAME,
@@ -1027,8 +1080,9 @@ static struct block_defs block_rdif_defs = {
 };
 
 static struct block_defs block_tdif_defs = {
-       "tdif", {true, true, true}, false, 0,
-       {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS},
+       "tdif",
+       {true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS},
        TDIF_REG_DBG_SELECT, TDIF_REG_DBG_DWORD_ENABLE,
        TDIF_REG_DBG_SHIFT, TDIF_REG_DBG_FORCE_VALID,
        TDIF_REG_DBG_FORCE_FRAME,
@@ -1036,8 +1090,9 @@ static struct block_defs block_tdif_defs = {
 };
 
 static struct block_defs block_cdu_defs = {
-       "cdu", {true, true, true}, false, 0,
-       {DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF},
+       "cdu",
+       {true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF},
        CDU_REG_DBG_SELECT, CDU_REG_DBG_DWORD_ENABLE,
        CDU_REG_DBG_SHIFT, CDU_REG_DBG_FORCE_VALID,
        CDU_REG_DBG_FORCE_FRAME,
@@ -1045,8 +1100,9 @@ static struct block_defs block_cdu_defs = {
 };
 
 static struct block_defs block_ccfc_defs = {
-       "ccfc", {true, true, true}, false, 0,
-       {DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF},
+       "ccfc",
+       {true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF},
        CCFC_REG_DBG_SELECT, CCFC_REG_DBG_DWORD_ENABLE,
        CCFC_REG_DBG_SHIFT, CCFC_REG_DBG_FORCE_VALID,
        CCFC_REG_DBG_FORCE_FRAME,
@@ -1054,8 +1110,9 @@ static struct block_defs block_ccfc_defs = {
 };
 
 static struct block_defs block_tcfc_defs = {
-       "tcfc", {true, true, true}, false, 0,
-       {DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF},
+       "tcfc",
+       {true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF},
        TCFC_REG_DBG_SELECT, TCFC_REG_DBG_DWORD_ENABLE,
        TCFC_REG_DBG_SHIFT, TCFC_REG_DBG_FORCE_VALID,
        TCFC_REG_DBG_FORCE_FRAME,
@@ -1063,8 +1120,9 @@ static struct block_defs block_tcfc_defs = {
 };
 
 static struct block_defs block_igu_defs = {
-       "igu", {true, true, true}, false, 0,
-       {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
+       "igu",
+       {true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
        IGU_REG_DBG_SELECT, IGU_REG_DBG_DWORD_ENABLE,
        IGU_REG_DBG_SHIFT, IGU_REG_DBG_FORCE_VALID,
        IGU_REG_DBG_FORCE_FRAME,
@@ -1072,8 +1130,9 @@ static struct block_defs block_igu_defs = {
 };
 
 static struct block_defs block_cau_defs = {
-       "cau", {true, true, true}, false, 0,
-       {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
+       "cau",
+       {true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
        CAU_REG_DBG_SELECT, CAU_REG_DBG_DWORD_ENABLE,
        CAU_REG_DBG_SHIFT, CAU_REG_DBG_FORCE_VALID,
        CAU_REG_DBG_FORCE_FRAME,
@@ -1081,8 +1140,9 @@ static struct block_defs block_cau_defs = {
 };
 
 static struct block_defs block_umac_defs = {
-       "umac", {false, false, true}, false, 0,
-       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCZ},
+       "umac",
+       {false, true}, false, 0,
+       {MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCZ},
        UMAC_REG_DBG_SELECT, UMAC_REG_DBG_DWORD_ENABLE,
        UMAC_REG_DBG_SHIFT, UMAC_REG_DBG_FORCE_VALID,
        UMAC_REG_DBG_FORCE_FRAME,
@@ -1090,22 +1150,23 @@ static struct block_defs block_umac_defs = {
 };
 
 static struct block_defs block_xmac_defs = {
-       "xmac", {false, false, false}, false, 0,
-       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
+       "xmac", {false, false}, false, 0,
+       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
        0, 0, 0, 0, 0,
        false, false, MAX_DBG_RESET_REGS, 0
 };
 
 static struct block_defs block_dbg_defs = {
-       "dbg", {false, false, false}, false, 0,
-       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
+       "dbg", {false, false}, false, 0,
+       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
        0, 0, 0, 0, 0,
        true, true, DBG_RESET_REG_MISC_PL_PDA_VAUX, 3
 };
 
 static struct block_defs block_nig_defs = {
-       "nig", {true, true, true}, false, 0,
-       {DBG_BUS_CLIENT_RBCN, DBG_BUS_CLIENT_RBCN, DBG_BUS_CLIENT_RBCN},
+       "nig",
+       {true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCN, DBG_BUS_CLIENT_RBCN},
        NIG_REG_DBG_SELECT, NIG_REG_DBG_DWORD_ENABLE,
        NIG_REG_DBG_SHIFT, NIG_REG_DBG_FORCE_VALID,
        NIG_REG_DBG_FORCE_FRAME,
@@ -1113,8 +1174,9 @@ static struct block_defs block_nig_defs = {
 };
 
 static struct block_defs block_wol_defs = {
-       "wol", {false, false, true}, false, 0,
-       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCZ},
+       "wol",
+       {false, true}, false, 0,
+       {MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCZ},
        WOL_REG_DBG_SELECT, WOL_REG_DBG_DWORD_ENABLE,
        WOL_REG_DBG_SHIFT, WOL_REG_DBG_FORCE_VALID,
        WOL_REG_DBG_FORCE_FRAME,
@@ -1122,8 +1184,9 @@ static struct block_defs block_wol_defs = {
 };
 
 static struct block_defs block_bmbn_defs = {
-       "bmbn", {false, false, true}, false, 0,
-       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCB},
+       "bmbn",
+       {false, true}, false, 0,
+       {MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCB},
        BMBN_REG_DBG_SELECT, BMBN_REG_DBG_DWORD_ENABLE,
        BMBN_REG_DBG_SHIFT, BMBN_REG_DBG_FORCE_VALID,
        BMBN_REG_DBG_FORCE_FRAME,
@@ -1131,15 +1194,16 @@ static struct block_defs block_bmbn_defs = {
 };
 
 static struct block_defs block_ipc_defs = {
-       "ipc", {false, false, false}, false, 0,
-       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
+       "ipc", {false, false}, false, 0,
+       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
        0, 0, 0, 0, 0,
        true, false, DBG_RESET_REG_MISCS_PL_UA, 8
 };
 
 static struct block_defs block_nwm_defs = {
-       "nwm", {false, false, true}, false, 0,
-       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCW},
+       "nwm",
+       {false, true}, false, 0,
+       {MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCW},
        NWM_REG_DBG_SELECT, NWM_REG_DBG_DWORD_ENABLE,
        NWM_REG_DBG_SHIFT, NWM_REG_DBG_FORCE_VALID,
        NWM_REG_DBG_FORCE_FRAME,
@@ -1147,22 +1211,29 @@ static struct block_defs block_nwm_defs = {
 };
 
 static struct block_defs block_nws_defs = {
-       "nws", {false, false, false}, false, 0,
-       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
-       0, 0, 0, 0, 0,
+       "nws",
+       {false, true}, false, 0,
+       {MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCW},
+       NWS_REG_DBG_SELECT, NWS_REG_DBG_DWORD_ENABLE,
+       NWS_REG_DBG_SHIFT, NWS_REG_DBG_FORCE_VALID,
+       NWS_REG_DBG_FORCE_FRAME,
        true, false, DBG_RESET_REG_MISCS_PL_HV, 12
 };
 
 static struct block_defs block_ms_defs = {
-       "ms", {false, false, false}, false, 0,
-       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
-       0, 0, 0, 0, 0,
+       "ms",
+       {false, true}, false, 0,
+       {MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCZ},
+       MS_REG_DBG_SELECT, MS_REG_DBG_DWORD_ENABLE,
+       MS_REG_DBG_SHIFT, MS_REG_DBG_FORCE_VALID,
+       MS_REG_DBG_FORCE_FRAME,
        true, false, DBG_RESET_REG_MISCS_PL_HV, 13
 };
 
 static struct block_defs block_phy_pcie_defs = {
-       "phy_pcie", {false, false, true}, false, 0,
-       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCH},
+       "phy_pcie",
+       {false, true}, false, 0,
+       {MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCH},
        PCIE_REG_DBG_COMMON_SELECT, PCIE_REG_DBG_COMMON_DWORD_ENABLE,
        PCIE_REG_DBG_COMMON_SHIFT, PCIE_REG_DBG_COMMON_FORCE_VALID,
        PCIE_REG_DBG_COMMON_FORCE_FRAME,
@@ -1170,22 +1241,57 @@ static struct block_defs block_phy_pcie_defs = {
 };
 
 static struct block_defs block_led_defs = {
-       "led", {false, false, false}, false, 0,
-       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
+       "led", {false, false}, false, 0,
+       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
+       0, 0, 0, 0, 0,
+       true, false, DBG_RESET_REG_MISCS_PL_HV, 14
+};
+
+static struct block_defs block_avs_wrap_defs = {
+       "avs_wrap", {false, false}, false, 0,
+       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
+       0, 0, 0, 0, 0,
+       true, false, DBG_RESET_REG_MISCS_PL_UA, 11
+};
+
+static struct block_defs block_rgfs_defs = {
+       "rgfs", {false, false}, false, 0,
+       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
        0, 0, 0, 0, 0,
-       true, true, DBG_RESET_REG_MISCS_PL_HV, 14
+       false, false, MAX_DBG_RESET_REGS, 0
+};
+
+static struct block_defs block_tgfs_defs = {
+       "tgfs", {false, false}, false, 0,
+       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
+       0, 0, 0, 0, 0,
+       false, false, MAX_DBG_RESET_REGS, 0
+};
+
+static struct block_defs block_ptld_defs = {
+       "ptld", {false, false}, false, 0,
+       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
+       0, 0, 0, 0, 0,
+       false, false, MAX_DBG_RESET_REGS, 0
+};
+
+static struct block_defs block_ypld_defs = {
+       "ypld", {false, false}, false, 0,
+       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
+       0, 0, 0, 0, 0,
+       false, false, MAX_DBG_RESET_REGS, 0
 };
 
 static struct block_defs block_misc_aeu_defs = {
-       "misc_aeu", {false, false, false}, false, 0,
-       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
+       "misc_aeu", {false, false}, false, 0,
+       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
        0, 0, 0, 0, 0,
        false, false, MAX_DBG_RESET_REGS, 0
 };
 
 static struct block_defs block_bar0_map_defs = {
-       "bar0_map", {false, false, false}, false, 0,
-       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
+       "bar0_map", {false, false}, false, 0,
+       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
        0, 0, 0, 0, 0,
        false, false, MAX_DBG_RESET_REGS, 0
 };
@@ -1269,6 +1375,11 @@ static struct block_defs *s_block_defs[MAX_BLOCK_ID] = {
        &block_ms_defs,
        &block_phy_pcie_defs,
        &block_led_defs,
+       &block_avs_wrap_defs,
+       &block_rgfs_defs,
+       &block_tgfs_defs,
+       &block_ptld_defs,
+       &block_ypld_defs,
        &block_misc_aeu_defs,
        &block_bar0_map_defs,
 };
@@ -1281,65 +1392,67 @@ static struct platform_defs s_platform_defs[] = {
 };
 
 static struct grc_param_defs s_grc_param_defs[] = {
-       {{1, 1, 1}, 0, 1, false, 1, 1}, /* DBG_GRC_PARAM_DUMP_TSTORM */
-       {{1, 1, 1}, 0, 1, false, 1, 1}, /* DBG_GRC_PARAM_DUMP_MSTORM */
-       {{1, 1, 1}, 0, 1, false, 1, 1}, /* DBG_GRC_PARAM_DUMP_USTORM */
-       {{1, 1, 1}, 0, 1, false, 1, 1}, /* DBG_GRC_PARAM_DUMP_XSTORM */
-       {{1, 1, 1}, 0, 1, false, 1, 1}, /* DBG_GRC_PARAM_DUMP_YSTORM */
-       {{1, 1, 1}, 0, 1, false, 1, 1}, /* DBG_GRC_PARAM_DUMP_PSTORM */
-       {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_REGS */
-       {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_RAM */
-       {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_PBUF */
-       {{0, 0, 0}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_IOR */
-       {{0, 0, 0}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_VFC */
-       {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_CM_CTX */
-       {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_ILT */
-       {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_RSS */
-       {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_CAU */
-       {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_QM */
-       {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_MCP */
-       {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_RESERVED */
-       {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_CFC */
-       {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_IGU */
-       {{0, 0, 0}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_BRB */
-       {{0, 0, 0}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_BTB */
-       {{0, 0, 0}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_BMB */
-       {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_NIG */
-       {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_MULD */
-       {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_PRS */
-       {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_DMAE */
-       {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_TM */
-       {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_SDM */
-       {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_DIF */
-       {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_STATIC */
-       {{0, 0, 0}, 0, 1, false, 0, 0}, /* DBG_GRC_PARAM_UNSTALL */
-       {{MAX_LCIDS, MAX_LCIDS, MAX_LCIDS}, 1, MAX_LCIDS, false, MAX_LCIDS,
+       {{1, 1}, 0, 1, false, 1, 1},    /* DBG_GRC_PARAM_DUMP_TSTORM */
+       {{1, 1}, 0, 1, false, 1, 1},    /* DBG_GRC_PARAM_DUMP_MSTORM */
+       {{1, 1}, 0, 1, false, 1, 1},    /* DBG_GRC_PARAM_DUMP_USTORM */
+       {{1, 1}, 0, 1, false, 1, 1},    /* DBG_GRC_PARAM_DUMP_XSTORM */
+       {{1, 1}, 0, 1, false, 1, 1},    /* DBG_GRC_PARAM_DUMP_YSTORM */
+       {{1, 1}, 0, 1, false, 1, 1},    /* DBG_GRC_PARAM_DUMP_PSTORM */
+       {{1, 1}, 0, 1, false, 0, 1},    /* DBG_GRC_PARAM_DUMP_REGS */
+       {{1, 1}, 0, 1, false, 0, 1},    /* DBG_GRC_PARAM_DUMP_RAM */
+       {{1, 1}, 0, 1, false, 0, 1},    /* DBG_GRC_PARAM_DUMP_PBUF */
+       {{0, 0}, 0, 1, false, 0, 1},    /* DBG_GRC_PARAM_DUMP_IOR */
+       {{0, 0}, 0, 1, false, 0, 1},    /* DBG_GRC_PARAM_DUMP_VFC */
+       {{1, 1}, 0, 1, false, 0, 1},    /* DBG_GRC_PARAM_DUMP_CM_CTX */
+       {{1, 1}, 0, 1, false, 0, 1},    /* DBG_GRC_PARAM_DUMP_ILT */
+       {{1, 1}, 0, 1, false, 0, 1},    /* DBG_GRC_PARAM_DUMP_RSS */
+       {{1, 1}, 0, 1, false, 0, 1},    /* DBG_GRC_PARAM_DUMP_CAU */
+       {{1, 1}, 0, 1, false, 0, 1},    /* DBG_GRC_PARAM_DUMP_QM */
+       {{1, 1}, 0, 1, false, 0, 1},    /* DBG_GRC_PARAM_DUMP_MCP */
+       {{1, 1}, 0, 1, false, 0, 1},    /* DBG_GRC_PARAM_RESERVED */
+       {{1, 1}, 0, 1, false, 0, 1},    /* DBG_GRC_PARAM_DUMP_CFC */
+       {{1, 1}, 0, 1, false, 0, 1},    /* DBG_GRC_PARAM_DUMP_IGU */
+       {{0, 0}, 0, 1, false, 0, 1},    /* DBG_GRC_PARAM_DUMP_BRB */
+       {{0, 0}, 0, 1, false, 0, 1},    /* DBG_GRC_PARAM_DUMP_BTB */
+       {{0, 0}, 0, 1, false, 0, 1},    /* DBG_GRC_PARAM_DUMP_BMB */
+       {{1, 1}, 0, 1, false, 0, 1},    /* DBG_GRC_PARAM_DUMP_NIG */
+       {{1, 1}, 0, 1, false, 0, 1},    /* DBG_GRC_PARAM_DUMP_MULD */
+       {{1, 1}, 0, 1, false, 0, 1},    /* DBG_GRC_PARAM_DUMP_PRS */
+       {{1, 1}, 0, 1, false, 0, 1},    /* DBG_GRC_PARAM_DUMP_DMAE */
+       {{1, 1}, 0, 1, false, 0, 1},    /* DBG_GRC_PARAM_DUMP_TM */
+       {{1, 1}, 0, 1, false, 0, 1},    /* DBG_GRC_PARAM_DUMP_SDM */
+       {{1, 1}, 0, 1, false, 0, 1},    /* DBG_GRC_PARAM_DUMP_DIF */
+       {{1, 1}, 0, 1, false, 0, 1},    /* DBG_GRC_PARAM_DUMP_STATIC */
+       {{0, 0}, 0, 1, false, 0, 0},    /* DBG_GRC_PARAM_UNSTALL */
+       {{MAX_LCIDS, MAX_LCIDS}, 1, MAX_LCIDS, false, MAX_LCIDS,
         MAX_LCIDS},                    /* DBG_GRC_PARAM_NUM_LCIDS */
-       {{MAX_LTIDS, MAX_LTIDS, MAX_LTIDS}, 1, MAX_LTIDS, false, MAX_LTIDS,
+       {{MAX_LTIDS, MAX_LTIDS}, 1, MAX_LTIDS, false, MAX_LTIDS,
         MAX_LTIDS},                    /* DBG_GRC_PARAM_NUM_LTIDS */
-       {{0, 0, 0}, 0, 1, true, 0, 0},  /* DBG_GRC_PARAM_EXCLUDE_ALL */
-       {{0, 0, 0}, 0, 1, true, 0, 0},  /* DBG_GRC_PARAM_CRASH */
-       {{0, 0, 0}, 0, 1, false, 1, 0}, /* DBG_GRC_PARAM_PARITY_SAFE */
-       {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_CM */
-       {{1, 1, 1}, 0, 1, false, 0, 1}  /* DBG_GRC_PARAM_DUMP_PHY */
+       {{0, 0}, 0, 1, true, 0, 0},     /* DBG_GRC_PARAM_EXCLUDE_ALL */
+       {{0, 0}, 0, 1, true, 0, 0},     /* DBG_GRC_PARAM_CRASH */
+       {{0, 0}, 0, 1, false, 1, 0},    /* DBG_GRC_PARAM_PARITY_SAFE */
+       {{1, 1}, 0, 1, false, 0, 1},    /* DBG_GRC_PARAM_DUMP_CM */
+       {{1, 1}, 0, 1, false, 0, 1},    /* DBG_GRC_PARAM_DUMP_PHY */
+       {{0, 0}, 0, 1, false, 0, 0},    /* DBG_GRC_PARAM_NO_MCP */
+       {{0, 0}, 0, 1, false, 0, 0}     /* DBG_GRC_PARAM_NO_FW_VER */
 };
 
 static struct rss_mem_defs s_rss_mem_defs[] = {
        { "rss_mem_cid", "rss_cid", 0,
-         {256, 256, 320},
-         {32, 32, 32} },
+         {256, 320},
+         {32, 32} },
        { "rss_mem_key_msb", "rss_key", 1024,
-         {128, 128, 208},
-         {256, 256, 256} },
+         {128, 208},
+         {256, 256} },
        { "rss_mem_key_lsb", "rss_key", 2048,
-         {128, 128, 208},
-         {64, 64, 64} },
+         {128, 208},
+         {64, 64} },
        { "rss_mem_info", "rss_info", 3072,
-         {128, 128, 208},
-         {16, 16, 16} },
+         {128, 208},
+         {16, 16} },
        { "rss_mem_ind", "rss_ind", 4096,
-         {(128 * 128), (128 * 128), (128 * 208)},
-         {16, 16, 16} }
+         {(128 * 128), (128 * 208)},
+         {16, 16} }
 };
 
 static struct vfc_ram_defs s_vfc_ram_defs[] = {
@@ -1352,32 +1465,32 @@ static struct vfc_ram_defs s_vfc_ram_defs[] = {
 static struct big_ram_defs s_big_ram_defs[] = {
        { "BRB", MEM_GROUP_BRB_MEM, MEM_GROUP_BRB_RAM, DBG_GRC_PARAM_DUMP_BRB,
          BRB_REG_BIG_RAM_ADDRESS, BRB_REG_BIG_RAM_DATA,
-         {4800, 4800, 5632} },
+         {4800, 5632} },
        { "BTB", MEM_GROUP_BTB_MEM, MEM_GROUP_BTB_RAM, DBG_GRC_PARAM_DUMP_BTB,
          BTB_REG_BIG_RAM_ADDRESS, BTB_REG_BIG_RAM_DATA,
-         {2880, 2880, 3680} },
+         {2880, 3680} },
        { "BMB", MEM_GROUP_BMB_MEM, MEM_GROUP_BMB_RAM, DBG_GRC_PARAM_DUMP_BMB,
          BMB_REG_BIG_RAM_ADDRESS, BMB_REG_BIG_RAM_DATA,
-         {1152, 1152, 1152} }
+         {1152, 1152} }
 };
 
 static struct reset_reg_defs s_reset_regs_defs[] = {
        { MISCS_REG_RESET_PL_UA, 0x0,
-         {true, true, true} },         /* DBG_RESET_REG_MISCS_PL_UA */
+         {true, true} },               /* DBG_RESET_REG_MISCS_PL_UA */
        { MISCS_REG_RESET_PL_HV, 0x0,
-         {true, true, true} },         /* DBG_RESET_REG_MISCS_PL_HV */
+         {true, true} },               /* DBG_RESET_REG_MISCS_PL_HV */
        { MISCS_REG_RESET_PL_HV_2, 0x0,
-         {false, false, true} },       /* DBG_RESET_REG_MISCS_PL_HV_2 */
+         {false, true} },      /* DBG_RESET_REG_MISCS_PL_HV_2 */
        { MISC_REG_RESET_PL_UA, 0x0,
-         {true, true, true} },         /* DBG_RESET_REG_MISC_PL_UA */
+         {true, true} },               /* DBG_RESET_REG_MISC_PL_UA */
        { MISC_REG_RESET_PL_HV, 0x0,
-         {true, true, true} },         /* DBG_RESET_REG_MISC_PL_HV */
+         {true, true} },               /* DBG_RESET_REG_MISC_PL_HV */
        { MISC_REG_RESET_PL_PDA_VMAIN_1, 0x4404040,
-         {true, true, true} },         /* DBG_RESET_REG_MISC_PL_PDA_VMAIN_1 */
+         {true, true} },               /* DBG_RESET_REG_MISC_PL_PDA_VMAIN_1 */
        { MISC_REG_RESET_PL_PDA_VMAIN_2, 0x7c00007,
-         {true, true, true} },         /* DBG_RESET_REG_MISC_PL_PDA_VMAIN_2 */
+         {true, true} },               /* DBG_RESET_REG_MISC_PL_PDA_VMAIN_2 */
        { MISC_REG_RESET_PL_PDA_VAUX, 0x2,
-         {true, true, true} },         /* DBG_RESET_REG_MISC_PL_PDA_VAUX */
+         {true, true} },               /* DBG_RESET_REG_MISC_PL_PDA_VAUX */
 };
 
 static struct phy_defs s_phy_defs[] = {
@@ -1410,6 +1523,26 @@ static u32 qed_read_unaligned_dword(u8 *buf)
        return dword;
 }
 
+/* Returns the value of the specified GRC param */
+static u32 qed_grc_get_param(struct qed_hwfn *p_hwfn,
+                            enum dbg_grc_params grc_param)
+{
+       struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+
+       return dev_data->grc.param_val[grc_param];
+}
+
+/* Initializes the GRC parameters */
+static void qed_dbg_grc_init_params(struct qed_hwfn *p_hwfn)
+{
+       struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+
+       if (!dev_data->grc.params_initialized) {
+               qed_dbg_grc_set_params_default(p_hwfn);
+               dev_data->grc.params_initialized = 1;
+       }
+}
+
 /* Initializes debug data for the specified device */
 static enum dbg_status qed_dbg_dev_init(struct qed_hwfn *p_hwfn,
                                        struct qed_ptt *p_ptt)
@@ -1424,13 +1557,17 @@ static enum dbg_status qed_dbg_dev_init(struct qed_hwfn *p_hwfn,
                dev_data->mode_enable[MODE_K2] = 1;
        } else if (QED_IS_BB_B0(p_hwfn->cdev)) {
                dev_data->chip_id = CHIP_BB_B0;
-               dev_data->mode_enable[MODE_BB_B0] = 1;
+               dev_data->mode_enable[MODE_BB] = 1;
        } else {
                return DBG_STATUS_UNKNOWN_CHIP;
        }
 
        dev_data->platform_id = PLATFORM_ASIC;
        dev_data->mode_enable[MODE_ASIC] = 1;
+
+       /* Initializes the GRC parameters */
+       qed_dbg_grc_init_params(p_hwfn);
+
        dev_data->initialized = true;
        return DBG_STATUS_OK;
 }
@@ -1561,7 +1698,7 @@ static u32 qed_dump_fw_ver_param(struct qed_hwfn *p_hwfn,
        int printed_chars;
        u32 offset = 0;
 
-       if (dump) {
+       if (dump && !qed_grc_get_param(p_hwfn, DBG_GRC_PARAM_NO_FW_VER)) {
                /* Read FW image/version from PRAM in a non-reset SEMI */
                bool found = false;
                u8 storm_id;
@@ -1622,7 +1759,7 @@ static u32 qed_dump_mfw_ver_param(struct qed_hwfn *p_hwfn,
 {
        char mfw_ver_str[16] = EMPTY_FW_VERSION_STR;
 
-       if (dump) {
+       if (dump && !qed_grc_get_param(p_hwfn, DBG_GRC_PARAM_NO_FW_VER)) {
                u32 global_section_offsize, global_section_addr, mfw_ver;
                u32 public_data_addr, global_section_offsize_addr;
                int printed_chars;
@@ -1683,15 +1820,13 @@ static u32 qed_dump_common_global_params(struct qed_hwfn *p_hwfn,
                                         bool dump,
                                         u8 num_specific_global_params)
 {
+       u8 num_params = NUM_COMMON_GLOBAL_PARAMS + num_specific_global_params;
        struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
        u32 offset = 0;
 
        /* Find platform string and dump global params section header */
        offset += qed_dump_section_hdr(dump_buf + offset,
-                                      dump,
-                                      "global_params",
-                                      NUM_COMMON_GLOBAL_PARAMS +
-                                      num_specific_global_params);
+                                      dump, "global_params", num_params);
 
        /* Store params */
        offset += qed_dump_fw_ver_param(p_hwfn, p_ptt, dump_buf + offset, dump);
@@ -1815,37 +1950,6 @@ static bool qed_is_mode_match(struct qed_hwfn *p_hwfn, u16 *modes_buf_offset)
        }
 }
 
-/* Returns the value of the specified GRC param */
-static u32 qed_grc_get_param(struct qed_hwfn *p_hwfn,
-                            enum dbg_grc_params grc_param)
-{
-       struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
-
-       return dev_data->grc.param_val[grc_param];
-}
-
-/* Clear all GRC params */
-static void qed_dbg_grc_clear_params(struct qed_hwfn *p_hwfn)
-{
-       struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
-       u32 i;
-
-       for (i = 0; i < MAX_DBG_GRC_PARAMS; i++)
-               dev_data->grc.param_set_by_user[i] = 0;
-}
-
-/* Assign default GRC param values */
-static void qed_dbg_grc_set_params_default(struct qed_hwfn *p_hwfn)
-{
-       struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
-       u32 i;
-
-       for (i = 0; i < MAX_DBG_GRC_PARAMS; i++)
-               if (!dev_data->grc.param_set_by_user[i])
-                       dev_data->grc.param_val[i] =
-                           s_grc_param_defs[i].default_val[dev_data->chip_id];
-}
-
 /* Returns true if the specified entity (indicated by GRC param) should be
  * included in the dump, false otherwise.
  */
@@ -1971,7 +2075,7 @@ static void qed_grc_unreset_blocks(struct qed_hwfn *p_hwfn,
        }
 }
 
-/* Returns the attention name offsets of the specified block */
+/* Returns the attention block data of the specified block */
 static const struct dbg_attn_block_type_data *
 qed_get_block_attn_data(enum block_id block_id, enum dbg_attn_type attn_type)
 {
@@ -2040,7 +2144,7 @@ static void qed_grc_clear_all_prty(struct qed_hwfn *p_hwfn,
  * The following parameters are dumped:
  * - 'count' = num_dumped_entries
  * - 'split' = split_type
- * - 'id'i = split_id (dumped only if split_id >= 0)
+ * - 'id' = split_id (dumped only if split_id >= 0)
  * - 'param_name' = param_val (user param, dumped only if param_name != NULL and
  *     param_val != NULL)
  */
@@ -2069,21 +2173,81 @@ static u32 qed_grc_dump_regs_hdr(u32 *dump_buf,
        return offset;
 }
 
-/* Dumps GRC register/memory. Returns the dumped size in dwords. */
+/* Dumps the GRC registers in the specified address range.
+ * Returns the dumped size in dwords.
+ */
+static u32 qed_grc_dump_addr_range(struct qed_hwfn *p_hwfn,
+                                  struct qed_ptt *p_ptt, u32 *dump_buf,
+                                  bool dump, u32 addr, u32 len)
+{
+       u32 byte_addr = DWORDS_TO_BYTES(addr), offset = 0, i;
+
+       if (dump)
+               for (i = 0; i < len; i++, byte_addr += BYTES_IN_DWORD, offset++)
+                       *(dump_buf + offset) = qed_rd(p_hwfn, p_ptt, byte_addr);
+       else
+               offset += len;
+       return offset;
+}
+
+/* Dumps GRC registers sequence header. Returns the dumped size in dwords. */
+static u32 qed_grc_dump_reg_entry_hdr(u32 *dump_buf, bool dump, u32 addr,
+                                     u32 len)
+{
+       if (dump)
+               *dump_buf = addr | (len << REG_DUMP_LEN_SHIFT);
+       return 1;
+}
+
+/* Dumps GRC registers sequence. Returns the dumped size in dwords. */
 static u32 qed_grc_dump_reg_entry(struct qed_hwfn *p_hwfn,
                                  struct qed_ptt *p_ptt, u32 *dump_buf,
                                  bool dump, u32 addr, u32 len)
 {
-       u32 offset = 0, i;
+       u32 offset = 0;
+
+       offset += qed_grc_dump_reg_entry_hdr(dump_buf, dump, addr, len);
+       offset += qed_grc_dump_addr_range(p_hwfn,
+                                         p_ptt,
+                                         dump_buf + offset, dump, addr, len);
+       return offset;
+}
+
+/* Dumps GRC registers sequence with skip cycle.
+ * Returns the dumped size in dwords.
+ */
+static u32 qed_grc_dump_reg_entry_skip(struct qed_hwfn *p_hwfn,
+                                      struct qed_ptt *p_ptt, u32 *dump_buf,
+                                      bool dump, u32 addr, u32 total_len,
+                                      u32 read_len, u32 skip_len)
+{
+       u32 offset = 0, reg_offset = 0;
 
+       offset += qed_grc_dump_reg_entry_hdr(dump_buf, dump, addr, total_len);
        if (dump) {
-               *(dump_buf + offset++) = addr | (len << REG_DUMP_LEN_SHIFT);
-               for (i = 0; i < len; i++, addr++, offset++)
-                       *(dump_buf + offset) = qed_rd(p_hwfn,
-                                                     p_ptt,
-                                                     DWORDS_TO_BYTES(addr));
+               while (reg_offset < total_len) {
+                       u32 curr_len = min_t(u32,
+                                            read_len,
+                                            total_len - reg_offset);
+                       offset += qed_grc_dump_addr_range(p_hwfn,
+                                                         p_ptt,
+                                                         dump_buf + offset,
+                                                         dump, addr, curr_len);
+                       reg_offset += curr_len;
+                       addr += curr_len;
+                       if (reg_offset < total_len) {
+                               curr_len = min_t(u32,
+                                                skip_len,
+                                                total_len - skip_len);
+                               memset(dump_buf + offset, 0,
+                                      DWORDS_TO_BYTES(curr_len));
+                               offset += curr_len;
+                               reg_offset += curr_len;
+                               addr += curr_len;
+                       }
+               }
        } else {
-               offset += len + 1;
+               offset += total_len;
        }
 
        return offset;
@@ -2124,14 +2288,17 @@ static u32 qed_grc_dump_regs_entries(struct qed_hwfn *p_hwfn,
                                const struct dbg_dump_reg *reg =
                                    (const struct dbg_dump_reg *)
                                    &input_regs_arr.ptr[input_offset];
+                               u32 addr, len;
 
+                               addr = GET_FIELD(reg->data,
+                                                DBG_DUMP_REG_ADDRESS);
+                               len = GET_FIELD(reg->data, DBG_DUMP_REG_LENGTH);
                                offset +=
-                                       qed_grc_dump_reg_entry(p_hwfn, p_ptt,
-                                                   dump_buf + offset, dump,
-                                                   GET_FIELD(reg->data,
-                                                       DBG_DUMP_REG_ADDRESS),
-                                                   GET_FIELD(reg->data,
-                                                       DBG_DUMP_REG_LENGTH));
+                                   qed_grc_dump_reg_entry(p_hwfn, p_ptt,
+                                                          dump_buf + offset,
+                                                          dump,
+                                                          addr,
+                                                          len);
                                (*num_dumped_reg_entries)++;
                        }
                } else {
@@ -2194,8 +2361,14 @@ static u32 qed_grc_dump_registers(struct qed_hwfn *p_hwfn,
                                  const char *param_name, const char *param_val)
 {
        struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+       struct chip_platform_defs *p_platform_defs;
        u32 offset = 0, input_offset = 0;
-       u8 port_id, pf_id;
+       struct chip_defs *p_chip_defs;
+       u8 port_id, pf_id, vf_id;
+       u16 fid;
+
+       p_chip_defs = &s_chip_defs[dev_data->chip_id];
+       p_platform_defs = &p_chip_defs->per_platform[dev_data->platform_id];
 
        if (dump)
                DP_VERBOSE(p_hwfn, QED_MSG_DEBUG, "Dumping registers...\n");
@@ -2214,7 +2387,6 @@ static u32 qed_grc_dump_registers(struct qed_hwfn *p_hwfn,
 
                switch (split_type_id) {
                case SPLIT_TYPE_NONE:
-               case SPLIT_TYPE_VF:
                        offset += qed_grc_dump_split_data(p_hwfn,
                                                          p_ptt,
                                                          curr_input_regs_arr,
@@ -2227,10 +2399,7 @@ static u32 qed_grc_dump_registers(struct qed_hwfn *p_hwfn,
                                                          param_val);
                        break;
                case SPLIT_TYPE_PORT:
-                       for (port_id = 0;
-                            port_id <
-                            s_chip_defs[dev_data->chip_id].
-                            per_platform[dev_data->platform_id].num_ports;
+                       for (port_id = 0; port_id < p_platform_defs->num_ports;
                             port_id++) {
                                if (dump)
                                        qed_port_pretend(p_hwfn, p_ptt,
@@ -2247,20 +2416,48 @@ static u32 qed_grc_dump_registers(struct qed_hwfn *p_hwfn,
                        break;
                case SPLIT_TYPE_PF:
                case SPLIT_TYPE_PORT_PF:
-                       for (pf_id = 0;
-                            pf_id <
-                            s_chip_defs[dev_data->chip_id].
-                            per_platform[dev_data->platform_id].num_pfs;
+                       for (pf_id = 0; pf_id < p_platform_defs->num_pfs;
                             pf_id++) {
-                               if (dump)
-                                       qed_fid_pretend(p_hwfn, p_ptt, pf_id);
-                               offset += qed_grc_dump_split_data(p_hwfn,
-                                                       p_ptt,
-                                                       curr_input_regs_arr,
-                                                       dump_buf + offset,
-                                                       dump, block_enable,
-                                                       "pf", pf_id, param_name,
-                                                       param_val);
+                               u8 pfid_shift =
+                                       PXP_PRETEND_CONCRETE_FID_PFID_SHIFT;
+
+                               if (dump) {
+                                       fid = pf_id << pfid_shift;
+                                       qed_fid_pretend(p_hwfn, p_ptt, fid);
+                               }
+
+                               offset +=
+                                   qed_grc_dump_split_data(p_hwfn, p_ptt,
+                                                           curr_input_regs_arr,
+                                                           dump_buf + offset,
+                                                           dump, block_enable,
+                                                           "pf", pf_id,
+                                                           param_name,
+                                                           param_val);
+                       }
+                       break;
+               case SPLIT_TYPE_VF:
+                       for (vf_id = 0; vf_id < p_platform_defs->num_vfs;
+                            vf_id++) {
+                               u8 vfvalid_shift =
+                                       PXP_PRETEND_CONCRETE_FID_VFVALID_SHIFT;
+                               u8 vfid_shift =
+                                       PXP_PRETEND_CONCRETE_FID_VFID_SHIFT;
+
+                               if (dump) {
+                                       fid = BIT(vfvalid_shift) |
+                                             (vf_id << vfid_shift);
+                                       qed_fid_pretend(p_hwfn, p_ptt, fid);
+                               }
+
+                               offset +=
+                                   qed_grc_dump_split_data(p_hwfn, p_ptt,
+                                                           curr_input_regs_arr,
+                                                           dump_buf + offset,
+                                                           dump, block_enable,
+                                                           "vf", vf_id,
+                                                           param_name,
+                                                           param_val);
                        }
                        break;
                default:
@@ -2271,8 +2468,11 @@ static u32 qed_grc_dump_registers(struct qed_hwfn *p_hwfn,
        }
 
        /* Pretend to original PF */
-       if (dump)
-               qed_fid_pretend(p_hwfn, p_ptt, p_hwfn->rel_pf_id);
+       if (dump) {
+               fid = p_hwfn->rel_pf_id << PXP_PRETEND_CONCRETE_FID_PFID_SHIFT;
+               qed_fid_pretend(p_hwfn, p_ptt, fid);
+       }
+
        return offset;
 }
 
@@ -2291,13 +2491,14 @@ static u32 qed_grc_dump_reset_regs(struct qed_hwfn *p_hwfn,
        /* Write reset registers */
        for (i = 0; i < MAX_DBG_RESET_REGS; i++) {
                if (s_reset_regs_defs[i].exists[dev_data->chip_id]) {
+                       u32 addr = BYTES_TO_DWORDS(s_reset_regs_defs[i].addr);
+
                        offset += qed_grc_dump_reg_entry(p_hwfn,
                                                         p_ptt,
                                                         dump_buf + offset,
                                                         dump,
-                                                        BYTES_TO_DWORDS
-                                                        (s_reset_regs_defs
-                                                         [i].addr), 1);
+                                                        addr,
+                                                        1);
                        num_regs++;
                }
        }
@@ -2339,6 +2540,7 @@ static u32 qed_grc_dump_modified_regs(struct qed_hwfn *p_hwfn,
                                &attn_reg_arr[reg_idx];
                        u16 modes_buf_offset;
                        bool eval_mode;
+                       u32 addr;
 
                        /* Check mode */
                        eval_mode = GET_FIELD(reg_data->mode.data,
@@ -2349,19 +2551,23 @@ static u32 qed_grc_dump_modified_regs(struct qed_hwfn *p_hwfn,
                        if (!eval_mode ||
                            qed_is_mode_match(p_hwfn, &modes_buf_offset)) {
                                /* Mode match - read and dump registers */
-                               offset += qed_grc_dump_reg_entry(p_hwfn,
-                                                       p_ptt,
-                                                       dump_buf + offset,
-                                                       dump,
-                                                       reg_data->mask_address,
-                                                       1);
-                               offset += qed_grc_dump_reg_entry(p_hwfn,
-                                               p_ptt,
-                                               dump_buf + offset,
-                                               dump,
-                                               GET_FIELD(reg_data->data,
-                                                   DBG_ATTN_REG_STS_ADDRESS),
-                                               1);
+                               addr = reg_data->mask_address;
+                               offset +=
+                                   qed_grc_dump_reg_entry(p_hwfn,
+                                                          p_ptt,
+                                                          dump_buf + offset,
+                                                          dump,
+                                                          addr,
+                                                          1);
+                               addr = GET_FIELD(reg_data->data,
+                                                DBG_ATTN_REG_STS_ADDRESS);
+                               offset +=
+                                   qed_grc_dump_reg_entry(p_hwfn,
+                                                          p_ptt,
+                                                          dump_buf + offset,
+                                                          dump,
+                                                          addr,
+                                                          1);
                                num_reg_entries += 2;
                        }
                }
@@ -2369,18 +2575,21 @@ static u32 qed_grc_dump_modified_regs(struct qed_hwfn *p_hwfn,
 
        /* Write storm stall status registers */
        for (storm_id = 0; storm_id < MAX_DBG_STORMS; storm_id++) {
+               u32 addr;
+
                if (dev_data->block_in_reset[s_storm_defs[storm_id].block_id] &&
                    dump)
                        continue;
 
+               addr =
+                   BYTES_TO_DWORDS(s_storm_defs[storm_id].sem_fast_mem_addr +
+                                   SEM_FAST_REG_STALLED);
                offset += qed_grc_dump_reg_entry(p_hwfn,
-                                       p_ptt,
-                                       dump_buf + offset,
-                                       dump,
-                                       BYTES_TO_DWORDS(s_storm_defs[storm_id].
-                                                       sem_fast_mem_addr +
-                                                       SEM_FAST_REG_STALLED),
-                                       1);
+                                                p_ptt,
+                                                dump_buf + offset,
+                                                dump,
+                                                addr,
+                                                1);
                num_reg_entries++;
        }
 
@@ -2392,11 +2601,47 @@ static u32 qed_grc_dump_modified_regs(struct qed_hwfn *p_hwfn,
        return offset;
 }
 
+/* Dumps registers that can't be represented in the debug arrays */
+static u32 qed_grc_dump_special_regs(struct qed_hwfn *p_hwfn,
+                                    struct qed_ptt *p_ptt,
+                                    u32 *dump_buf, bool dump)
+{
+       u32 offset = 0, addr;
+
+       offset += qed_grc_dump_regs_hdr(dump_buf,
+                                       dump, 2, "eng", -1, NULL, NULL);
+
+       /* Dump R/TDIF_REG_DEBUG_ERROR_INFO_SIZE (every 8'th register should be
+        * skipped).
+        */
+       addr = BYTES_TO_DWORDS(RDIF_REG_DEBUG_ERROR_INFO);
+       offset += qed_grc_dump_reg_entry_skip(p_hwfn,
+                                             p_ptt,
+                                             dump_buf + offset,
+                                             dump,
+                                             addr,
+                                             RDIF_REG_DEBUG_ERROR_INFO_SIZE,
+                                             7,
+                                             1);
+       addr = BYTES_TO_DWORDS(TDIF_REG_DEBUG_ERROR_INFO);
+       offset +=
+           qed_grc_dump_reg_entry_skip(p_hwfn,
+                                       p_ptt,
+                                       dump_buf + offset,
+                                       dump,
+                                       addr,
+                                       TDIF_REG_DEBUG_ERROR_INFO_SIZE,
+                                       7,
+                                       1);
+
+       return offset;
+}
+
 /* Dumps a GRC memory header (section and params).
  * The following parameters are dumped:
  * name - name is dumped only if it's not NULL.
- * addr - byte_addr is dumped only if name is NULL.
- * len - dword_len is always dumped.
+ * addr - addr is dumped only if name is NULL.
+ * len - len is always dumped.
  * width - bit_width is dumped if it's not zero.
  * packed - packed=1 is dumped if it's not false.
  * mem_group - mem_group is always dumped.
@@ -2408,8 +2653,8 @@ static u32 qed_grc_dump_mem_hdr(struct qed_hwfn *p_hwfn,
                                u32 *dump_buf,
                                bool dump,
                                const char *name,
-                               u32 byte_addr,
-                               u32 dword_len,
+                               u32 addr,
+                               u32 len,
                                u32 bit_width,
                                bool packed,
                                const char *mem_group,
@@ -2419,7 +2664,7 @@ static u32 qed_grc_dump_mem_hdr(struct qed_hwfn *p_hwfn,
        u32 offset = 0;
        char buf[64];
 
-       if (!dword_len)
+       if (!len)
                DP_NOTICE(p_hwfn,
                          "Unexpected GRC Dump error: dumped memory size must be non-zero\n");
        if (bit_width)
@@ -2446,20 +2691,21 @@ static u32 qed_grc_dump_mem_hdr(struct qed_hwfn *p_hwfn,
                        DP_VERBOSE(p_hwfn,
                                   QED_MSG_DEBUG,
                                   "Dumping %d registers from %s...\n",
-                                  dword_len, buf);
+                                  len, buf);
        } else {
                /* Dump address */
                offset += qed_dump_num_param(dump_buf + offset,
-                                            dump, "addr", byte_addr);
-               if (dump && dword_len > 64)
+                                            dump, "addr",
+                                            DWORDS_TO_BYTES(addr));
+               if (dump && len > 64)
                        DP_VERBOSE(p_hwfn,
                                   QED_MSG_DEBUG,
                                   "Dumping %d registers from address 0x%x...\n",
-                                  dword_len, byte_addr);
+                                  len, (u32)DWORDS_TO_BYTES(addr));
        }
 
        /* Dump len */
-       offset += qed_dump_num_param(dump_buf + offset, dump, "len", dword_len);
+       offset += qed_dump_num_param(dump_buf + offset, dump, "len", len);
 
        /* Dump bit width */
        if (bit_width)
@@ -2492,8 +2738,8 @@ static u32 qed_grc_dump_mem(struct qed_hwfn *p_hwfn,
                            u32 *dump_buf,
                            bool dump,
                            const char *name,
-                           u32 byte_addr,
-                           u32 dword_len,
+                           u32 addr,
+                           u32 len,
                            u32 bit_width,
                            bool packed,
                            const char *mem_group,
@@ -2505,21 +2751,14 @@ static u32 qed_grc_dump_mem(struct qed_hwfn *p_hwfn,
                                       dump_buf + offset,
                                       dump,
                                       name,
-                                      byte_addr,
-                                      dword_len,
+                                      addr,
+                                      len,
                                       bit_width,
                                       packed,
                                       mem_group, is_storm, storm_letter);
-       if (dump) {
-               u32 i;
-
-               for (i = 0; i < dword_len;
-                    i++, byte_addr += BYTES_IN_DWORD, offset++)
-                       *(dump_buf + offset) = qed_rd(p_hwfn, p_ptt, byte_addr);
-       } else {
-               offset += dword_len;
-       }
-
+       offset += qed_grc_dump_addr_range(p_hwfn,
+                                         p_ptt,
+                                         dump_buf + offset, dump, addr, len);
        return offset;
 }
 
@@ -2575,25 +2814,41 @@ static u32 qed_grc_dump_mem_entries(struct qed_hwfn *p_hwfn,
                        if (qed_grc_is_mem_included(p_hwfn,
                                        (enum block_id)cond_hdr->block_id,
                                        mem_group_id)) {
-                               u32 mem_byte_addr =
-                                       DWORDS_TO_BYTES(GET_FIELD(mem->dword0,
-                                                       DBG_DUMP_MEM_ADDRESS));
+                               u32 mem_addr = GET_FIELD(mem->dword0,
+                                                        DBG_DUMP_MEM_ADDRESS);
                                u32 mem_len = GET_FIELD(mem->dword1,
                                                        DBG_DUMP_MEM_LENGTH);
+                               enum dbg_grc_params grc_param;
                                char storm_letter = 'a';
                                bool is_storm = false;
 
                                /* Update memory length for CCFC/TCFC memories
                                 * according to number of LCIDs/LTIDs.
                                 */
-                               if (mem_group_id == MEM_GROUP_CONN_CFC_MEM)
+                               if (mem_group_id == MEM_GROUP_CONN_CFC_MEM) {
+                                       if (mem_len % MAX_LCIDS != 0) {
+                                               DP_NOTICE(p_hwfn,
+                                                         "Invalid CCFC connection memory size\n");
+                                               return 0;
+                                       }
+
+                                       grc_param = DBG_GRC_PARAM_NUM_LCIDS;
                                        mem_len = qed_grc_get_param(p_hwfn,
-                                                       DBG_GRC_PARAM_NUM_LCIDS)
-                                                       * (mem_len / MAX_LCIDS);
-                               else if (mem_group_id == MEM_GROUP_TASK_CFC_MEM)
+                                                                   grc_param) *
+                                                 (mem_len / MAX_LCIDS);
+                               } else if (mem_group_id ==
+                                          MEM_GROUP_TASK_CFC_MEM) {
+                                       if (mem_len % MAX_LTIDS != 0) {
+                                               DP_NOTICE(p_hwfn,
+                                                         "Invalid TCFC task memory size\n");
+                                               return 0;
+                                       }
+
+                                       grc_param = DBG_GRC_PARAM_NUM_LTIDS;
                                        mem_len = qed_grc_get_param(p_hwfn,
-                                                       DBG_GRC_PARAM_NUM_LTIDS)
-                                                       * (mem_len / MAX_LTIDS);
+                                                                   grc_param) *
+                                                 (mem_len / MAX_LTIDS);
+                               }
 
                                /* If memory is associated with Storm, update
                                 * Storm details.
@@ -2610,7 +2865,7 @@ static u32 qed_grc_dump_mem_entries(struct qed_hwfn *p_hwfn,
                                /* Dump memory */
                                offset += qed_grc_dump_mem(p_hwfn, p_ptt,
                                                dump_buf + offset, dump, NULL,
-                                               mem_byte_addr, mem_len, 0,
+                                               mem_addr, mem_len, 0,
                                                false,
                                                s_mem_group_names[mem_group_id],
                                                is_storm, storm_letter);
@@ -2799,29 +3054,31 @@ static u32 qed_grc_dump_iors(struct qed_hwfn *p_hwfn,
        u32 offset = 0;
 
        for (storm_id = 0; storm_id < MAX_DBG_STORMS; storm_id++) {
-               if (qed_grc_is_storm_included(p_hwfn,
-                                             (enum dbg_storms)storm_id)) {
-                       for (set_id = 0; set_id < NUM_IOR_SETS; set_id++) {
-                               u32 addr =
-                                   s_storm_defs[storm_id].sem_fast_mem_addr +
-                                   SEM_FAST_REG_STORM_REG_FILE +
-                                   DWORDS_TO_BYTES(IOR_SET_OFFSET(set_id));
+               struct storm_defs *storm = &s_storm_defs[storm_id];
 
-                               buf[strlen(buf) - 1] = '0' + set_id;
-                               offset += qed_grc_dump_mem(p_hwfn,
-                                                          p_ptt,
-                                                          dump_buf + offset,
-                                                          dump,
-                                                          buf,
-                                                          addr,
-                                                          IORS_PER_SET,
-                                                          32,
-                                                          false,
-                                                          "ior",
-                                                          true,
-                                                          s_storm_defs
-                                                          [storm_id].letter);
-                       }
+               if (!qed_grc_is_storm_included(p_hwfn,
+                                              (enum dbg_storms)storm_id))
+                       continue;
+
+               for (set_id = 0; set_id < NUM_IOR_SETS; set_id++) {
+                       u32 dwords, addr;
+
+                       dwords = storm->sem_fast_mem_addr +
+                                SEM_FAST_REG_STORM_REG_FILE;
+                       addr = BYTES_TO_DWORDS(dwords) + IOR_SET_OFFSET(set_id);
+                       buf[strlen(buf) - 1] = '0' + set_id;
+                       offset += qed_grc_dump_mem(p_hwfn,
+                                                  p_ptt,
+                                                  dump_buf + offset,
+                                                  dump,
+                                                  buf,
+                                                  addr,
+                                                  IORS_PER_SET,
+                                                  32,
+                                                  false,
+                                                  "ior",
+                                                  true,
+                                                  storm->letter);
                }
        }
 
@@ -2990,34 +3247,39 @@ static u32 qed_grc_dump_rss(struct qed_hwfn *p_hwfn,
                struct rss_mem_defs *rss_defs = &s_rss_mem_defs[rss_mem_id];
                u32 num_entries = rss_defs->num_entries[dev_data->chip_id];
                u32 entry_width = rss_defs->entry_width[dev_data->chip_id];
-               u32 total_size = (num_entries * entry_width) / 32;
+               u32 total_dwords = (num_entries * entry_width) / 32;
+               u32 size = RSS_REG_RSS_RAM_DATA_SIZE;
                bool packed = (entry_width == 16);
-               u32 addr = rss_defs->addr;
-               u32 i, j;
+               u32 rss_addr = rss_defs->addr;
+               u32 i, addr;
 
                offset += qed_grc_dump_mem_hdr(p_hwfn,
                                               dump_buf + offset,
                                               dump,
                                               rss_defs->mem_name,
-                                              addr,
-                                              total_size,
+                                              0,
+                                              total_dwords,
                                               entry_width,
                                               packed,
                                               rss_defs->type_name, false, 0);
 
                if (!dump) {
-                       offset += total_size;
+                       offset += total_dwords;
                        continue;
                }
 
                /* Dump RSS data */
-               for (i = 0; i < BYTES_TO_DWORDS(total_size); i++, addr++) {
-                       qed_wr(p_hwfn, p_ptt, RSS_REG_RSS_RAM_ADDR, addr);
-                       for (j = 0; j < BYTES_IN_DWORD; j++, offset++)
-                               *(dump_buf + offset) =
-                                       qed_rd(p_hwfn, p_ptt,
-                                              RSS_REG_RSS_RAM_DATA +
-                                              DWORDS_TO_BYTES(j));
+               for (i = 0; i < total_dwords;
+                    i += RSS_REG_RSS_RAM_DATA_SIZE, rss_addr++) {
+                       addr = BYTES_TO_DWORDS(RSS_REG_RSS_RAM_DATA);
+                       qed_wr(p_hwfn, p_ptt, RSS_REG_RSS_RAM_ADDR, rss_addr);
+                               offset += qed_grc_dump_addr_range(p_hwfn,
+                                                                 p_ptt,
+                                                                 dump_buf +
+                                                                 offset,
+                                                                 dump,
+                                                                 addr,
+                                                                 size);
                }
        }
 
@@ -3030,19 +3292,19 @@ static u32 qed_grc_dump_big_ram(struct qed_hwfn *p_hwfn,
                                u32 *dump_buf, bool dump, u8 big_ram_id)
 {
        struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+       u32 total_blocks, ram_size, offset = 0, i;
        char mem_name[12] = "???_BIG_RAM";
        char type_name[8] = "???_RAM";
-       u32 ram_size, total_blocks;
-       u32 offset = 0, i, j;
+       struct big_ram_defs *big_ram;
 
-       total_blocks =
-               s_big_ram_defs[big_ram_id].num_of_blocks[dev_data->chip_id];
+       big_ram = &s_big_ram_defs[big_ram_id];
+       total_blocks = big_ram->num_of_blocks[dev_data->chip_id];
        ram_size = total_blocks * BIG_RAM_BLOCK_SIZE_DWORDS;
 
-       strncpy(type_name, s_big_ram_defs[big_ram_id].instance_name,
-               strlen(s_big_ram_defs[big_ram_id].instance_name));
-       strncpy(mem_name, s_big_ram_defs[big_ram_id].instance_name,
-               strlen(s_big_ram_defs[big_ram_id].instance_name));
+       strncpy(type_name, big_ram->instance_name,
+               strlen(big_ram->instance_name));
+       strncpy(mem_name, big_ram->instance_name,
+               strlen(big_ram->instance_name));
 
        /* Dump memory header */
        offset += qed_grc_dump_mem_hdr(p_hwfn,
@@ -3059,13 +3321,17 @@ static u32 qed_grc_dump_big_ram(struct qed_hwfn *p_hwfn,
 
        /* Read and dump Big RAM data */
        for (i = 0; i < total_blocks / 2; i++) {
-               qed_wr(p_hwfn, p_ptt, s_big_ram_defs[big_ram_id].addr_reg_addr,
-                      i);
-               for (j = 0; j < 2 * BIG_RAM_BLOCK_SIZE_DWORDS; j++, offset++)
-                       *(dump_buf + offset) = qed_rd(p_hwfn, p_ptt,
-                                               s_big_ram_defs[big_ram_id].
-                                                       data_reg_addr +
-                                               DWORDS_TO_BYTES(j));
+               u32 addr, len;
+
+               qed_wr(p_hwfn, p_ptt, big_ram->addr_reg_addr, i);
+               addr = BYTES_TO_DWORDS(big_ram->data_reg_addr);
+               len = 2 * BIG_RAM_BLOCK_SIZE_DWORDS;
+               offset += qed_grc_dump_addr_range(p_hwfn,
+                                                 p_ptt,
+                                                 dump_buf + offset,
+                                                 dump,
+                                                 addr,
+                                                 len);
        }
 
        return offset;
@@ -3075,11 +3341,11 @@ static u32 qed_grc_dump_mcp(struct qed_hwfn *p_hwfn,
                            struct qed_ptt *p_ptt, u32 *dump_buf, bool dump)
 {
        bool block_enable[MAX_BLOCK_ID] = { 0 };
+       u32 offset = 0, addr;
        bool halted = false;
-       u32 offset = 0;
 
        /* Halt MCP */
-       if (dump) {
+       if (dump && !qed_grc_get_param(p_hwfn, DBG_GRC_PARAM_NO_MCP)) {
                halted = !qed_mcp_halt(p_hwfn, p_ptt);
                if (!halted)
                        DP_NOTICE(p_hwfn, "MCP halt failed!\n");
@@ -3091,7 +3357,7 @@ static u32 qed_grc_dump_mcp(struct qed_hwfn *p_hwfn,
                                   dump_buf + offset,
                                   dump,
                                   NULL,
-                                  MCP_REG_SCRATCH,
+                                  BYTES_TO_DWORDS(MCP_REG_SCRATCH),
                                   MCP_REG_SCRATCH_SIZE,
                                   0, false, "MCP", false, 0);
 
@@ -3101,7 +3367,7 @@ static u32 qed_grc_dump_mcp(struct qed_hwfn *p_hwfn,
                                   dump_buf + offset,
                                   dump,
                                   NULL,
-                                  MCP_REG_CPU_REG_FILE,
+                                  BYTES_TO_DWORDS(MCP_REG_CPU_REG_FILE),
                                   MCP_REG_CPU_REG_FILE_SIZE,
                                   0, false, "MCP", false, 0);
 
@@ -3115,12 +3381,13 @@ static u32 qed_grc_dump_mcp(struct qed_hwfn *p_hwfn,
        /* Dump required non-MCP registers */
        offset += qed_grc_dump_regs_hdr(dump_buf + offset,
                                        dump, 1, "eng", -1, "block", "MCP");
+       addr = BYTES_TO_DWORDS(MISC_REG_SHARED_MEM_ADDR);
        offset += qed_grc_dump_reg_entry(p_hwfn,
                                         p_ptt,
                                         dump_buf + offset,
                                         dump,
-                                        BYTES_TO_DWORDS
-                                        (MISC_REG_SHARED_MEM_ADDR), 1);
+                                        addr,
+                                        1);
 
        /* Release MCP */
        if (halted && qed_mcp_resume(p_hwfn, p_ptt))
@@ -3212,7 +3479,7 @@ static u32 qed_grc_dump_static_debug(struct qed_hwfn *p_hwfn,
 {
        u32 block_dwords = NUM_DBG_BUS_LINES * STATIC_DEBUG_LINE_DWORDS;
        struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
-       u32 offset = 0, block_id, line_id, addr, i;
+       u32 offset = 0, block_id, line_id;
        struct block_defs *p_block_defs;
 
        if (dump) {
@@ -3255,6 +3522,8 @@ static u32 qed_grc_dump_static_debug(struct qed_hwfn *p_hwfn,
                if (dump && !dev_data->block_in_reset[block_id]) {
                        u8 dbg_client_id =
                                p_block_defs->dbg_client_id[dev_data->chip_id];
+                       u32 addr = BYTES_TO_DWORDS(DBG_REG_CALENDAR_OUT_DATA);
+                       u32 len = STATIC_DEBUG_LINE_DWORDS;
 
                        /* Enable block's client */
                        qed_bus_enable_clients(p_hwfn, p_ptt,
@@ -3270,11 +3539,13 @@ static u32 qed_grc_dump_static_debug(struct qed_hwfn *p_hwfn,
                                                    0xf, 0, 0, 0);
 
                                /* Read debug line info */
-                               for (i = 0, addr = DBG_REG_CALENDAR_OUT_DATA;
-                                    i < STATIC_DEBUG_LINE_DWORDS;
-                                    i++, offset++, addr += BYTES_IN_DWORD)
-                                       dump_buf[offset] = qed_rd(p_hwfn, p_ptt,
-                                                                 addr);
+                               offset +=
+                                   qed_grc_dump_addr_range(p_hwfn,
+                                                           p_ptt,
+                                                           dump_buf + offset,
+                                                           dump,
+                                                           addr,
+                                                           len);
                        }
 
                        /* Disable block's client and debug output */
@@ -3311,14 +3582,8 @@ static enum dbg_status qed_grc_dump(struct qed_hwfn *p_hwfn,
        u8 i, port_mode = 0;
        u32 offset = 0;
 
-       /* Check if emulation platform */
        *num_dumped_dwords = 0;
 
-       /* Fill GRC parameters that were not set by the user with their default
-        * value.
-        */
-       qed_dbg_grc_set_params_default(p_hwfn);
-
        /* Find port mode */
        if (dump) {
                switch (qed_rd(p_hwfn, p_ptt, MISC_REG_PORT_MODE)) {
@@ -3370,15 +3635,14 @@ static enum dbg_status qed_grc_dump(struct qed_hwfn *p_hwfn,
        }
 
        /* Disable all parities using MFW command */
-       if (dump) {
+       if (dump && !qed_grc_get_param(p_hwfn, DBG_GRC_PARAM_NO_MCP)) {
                parities_masked = !qed_mcp_mask_parities(p_hwfn, p_ptt, 1);
                if (!parities_masked) {
+                       DP_NOTICE(p_hwfn,
+                                 "Failed to mask parities using MFW\n");
                        if (qed_grc_get_param
                            (p_hwfn, DBG_GRC_PARAM_PARITY_SAFE))
                                return DBG_STATUS_MCP_COULD_NOT_MASK_PRTY;
-                       else
-                               DP_NOTICE(p_hwfn,
-                                         "Failed to mask parities using MFW\n");
                }
        }
 
@@ -3409,6 +3673,11 @@ static enum dbg_status qed_grc_dump(struct qed_hwfn *p_hwfn,
                                                 offset,
                                                 dump,
                                                 block_enable, NULL, NULL);
+
+               /* Dump special registers */
+               offset += qed_grc_dump_special_regs(p_hwfn,
+                                                   p_ptt,
+                                                   dump_buf + offset, dump);
        }
 
        /* Dump memories */
@@ -3583,9 +3852,9 @@ static u32 qed_idle_chk_dump_failure(struct qed_hwfn *p_hwfn,
                        }
 
                        if (mode_match) {
-                               u32 grc_addr =
-                                       DWORDS_TO_BYTES(GET_FIELD(reg->data,
-                                               DBG_IDLE_CHK_INFO_REG_ADDRESS));
+                               u32 addr =
+                                   GET_FIELD(reg->data,
+                                             DBG_IDLE_CHK_INFO_REG_ADDRESS);
 
                                /* Write register header */
                                struct dbg_idle_chk_result_reg_hdr *reg_hdr =
@@ -3597,16 +3866,19 @@ static u32 qed_idle_chk_dump_failure(struct qed_hwfn *p_hwfn,
                                memset(reg_hdr, 0, sizeof(*reg_hdr));
                                reg_hdr->size = reg->size;
                                SET_FIELD(reg_hdr->data,
-                                       DBG_IDLE_CHK_RESULT_REG_HDR_REG_ID,
-                                       rule->num_cond_regs + reg_id);
+                                         DBG_IDLE_CHK_RESULT_REG_HDR_REG_ID,
+                                         rule->num_cond_regs + reg_id);
 
                                /* Write register values */
-                               for (i = 0; i < reg->size;
-                                    i++, offset++, grc_addr += 4)
-                                       dump_buf[offset] =
-                                               qed_rd(p_hwfn, p_ptt, grc_addr);
-                               }
+                               offset +=
+                                   qed_grc_dump_addr_range(p_hwfn,
+                                                           p_ptt,
+                                                           dump_buf + offset,
+                                                           dump,
+                                                           addr,
+                                                           reg->size);
                        }
+               }
        }
 
        return offset;
@@ -3621,7 +3893,7 @@ qed_idle_chk_dump_rule_entries(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
 {
        struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
        u32 cond_reg_values[IDLE_CHK_MAX_ENTRIES_SIZE];
-       u32 i, j, offset = 0;
+       u32 i, offset = 0;
        u16 entry_id;
        u8 reg_id;
 
@@ -3664,73 +3936,83 @@ qed_idle_chk_dump_rule_entries(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
                if (!check_rule && dump)
                        continue;
 
+               if (!dump) {
+                       u32 entry_dump_size =
+                               qed_idle_chk_dump_failure(p_hwfn,
+                                                         p_ptt,
+                                                         dump_buf + offset,
+                                                         false,
+                                                         rule->rule_id,
+                                                         rule,
+                                                         0,
+                                                         NULL);
+
+                       offset += num_reg_entries * entry_dump_size;
+                       (*num_failing_rules) += num_reg_entries;
+                       continue;
+               }
+
                /* Go over all register entries (number of entries is the same
                 * for all condition registers).
                 */
                for (entry_id = 0; entry_id < num_reg_entries; entry_id++) {
                        /* Read current entry of all condition registers */
-                       if (dump) {
-                               u32 next_reg_offset = 0;
-
-                               for (reg_id = 0;
-                                    reg_id < rule->num_cond_regs;
-                                    reg_id++) {
-                                       const struct dbg_idle_chk_cond_reg
-                                               *reg = &cond_regs[reg_id];
-
-                                       /* Find GRC address (if it's a memory,
-                                        * the address of the specific entry is
-                                        * calculated).
-                                        */
-                                       u32 grc_addr =
-                                          DWORDS_TO_BYTES(
-                                               GET_FIELD(reg->data,
-                                                   DBG_IDLE_CHK_COND_REG_ADDRESS));
-
-                                       if (reg->num_entries > 1 ||
-                                           reg->start_entry > 0) {
-                                               u32 padded_entry_size =
-                                                       reg->entry_size > 1 ?
-                                                       roundup_pow_of_two
-                                                       (reg->entry_size) : 1;
-
-                                               grc_addr +=
-                                                       DWORDS_TO_BYTES(
-                                                               (reg->start_entry +
-                                                               entry_id)
-                                                               * padded_entry_size);
-                                       }
+                       u32 next_reg_offset = 0;
 
-                                       /* Read registers */
-                                       if (next_reg_offset + reg->entry_size >=
-                                           IDLE_CHK_MAX_ENTRIES_SIZE) {
-                                               DP_NOTICE(p_hwfn,
-                                                         "idle check registers entry is too large\n");
-                                               return 0;
-                                       }
+                       for (reg_id = 0; reg_id < rule->num_cond_regs;
+                            reg_id++) {
+                               const struct dbg_idle_chk_cond_reg *reg =
+                                       &cond_regs[reg_id];
 
-                                       for (j = 0; j < reg->entry_size;
-                                            j++, next_reg_offset++,
-                                            grc_addr += 4)
-                                            cond_reg_values[next_reg_offset] =
-                                               qed_rd(p_hwfn, p_ptt, grc_addr);
+                               /* Find GRC address (if it's a memory,the
+                                * address of the specific entry is calculated).
+                                */
+                               u32 addr =
+                                   GET_FIELD(reg->data,
+                                             DBG_IDLE_CHK_COND_REG_ADDRESS);
+
+                               if (reg->num_entries > 1 ||
+                                   reg->start_entry > 0) {
+                                       u32 padded_entry_size =
+                                          reg->entry_size > 1 ?
+                                          roundup_pow_of_two(reg->entry_size) :
+                                          1;
+
+                                       addr += (reg->start_entry + entry_id) *
+                                               padded_entry_size;
                                }
+
+                               /* Read registers */
+                               if (next_reg_offset + reg->entry_size >=
+                                   IDLE_CHK_MAX_ENTRIES_SIZE) {
+                                       DP_NOTICE(p_hwfn,
+                                                 "idle check registers entry is too large\n");
+                                       return 0;
+                               }
+
+                               next_reg_offset +=
+                                   qed_grc_dump_addr_range(p_hwfn,
+                                                           p_ptt,
+                                                           cond_reg_values +
+                                                           next_reg_offset,
+                                                           dump, addr,
+                                                           reg->entry_size);
                        }
 
                        /* Call rule's condition function - a return value of
                         * true indicates failure.
                         */
                        if ((*cond_arr[rule->cond_id])(cond_reg_values,
-                                                      imm_values) || !dump) {
+                                                      imm_values)) {
                                offset +=
-                                       qed_idle_chk_dump_failure(p_hwfn,
-                                                       p_ptt,
-                                                       dump_buf + offset,
-                                                       dump,
-                                                       rule->rule_id,
-                                                       rule,
-                                                       entry_id,
-                                                       cond_reg_values);
+                                   qed_idle_chk_dump_failure(p_hwfn,
+                                                             p_ptt,
+                                                             dump_buf + offset,
+                                                             dump,
+                                                             rule->rule_id,
+                                                             rule,
+                                                             entry_id,
+                                                             cond_reg_values);
                                (*num_failing_rules)++;
                                break;
                        }
@@ -3818,13 +4100,18 @@ static enum dbg_status qed_find_nvram_image(struct qed_hwfn *p_hwfn,
        struct mcp_file_att file_att;
 
        /* Call NVRAM get file command */
-       if (qed_mcp_nvm_rd_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_NVM_GET_FILE_ATT,
-                              image_type, &ret_mcp_resp, &ret_mcp_param,
-                              &ret_txn_size, (u32 *)&file_att) != 0)
-               return DBG_STATUS_NVRAM_GET_IMAGE_FAILED;
+       int nvm_result = qed_mcp_nvm_rd_cmd(p_hwfn,
+                                           p_ptt,
+                                           DRV_MSG_CODE_NVM_GET_FILE_ATT,
+                                           image_type,
+                                           &ret_mcp_resp,
+                                           &ret_mcp_param,
+                                           &ret_txn_size,
+                                           (u32 *)&file_att);
 
        /* Check response */
-       if ((ret_mcp_resp & FW_MSG_CODE_MASK) != FW_MSG_CODE_NVM_OK)
+       if (nvm_result ||
+           (ret_mcp_resp & FW_MSG_CODE_MASK) != FW_MSG_CODE_NVM_OK)
                return DBG_STATUS_NVRAM_GET_IMAGE_FAILED;
 
        /* Update return values */
@@ -3944,7 +4231,6 @@ static enum dbg_status qed_mcp_trace_get_meta_info(struct qed_hwfn *p_hwfn,
        u32 running_mfw_addr =
                MCP_REG_SCRATCH + SECTION_OFFSET(spad_trace_offsize) +
                QED_SECTION_SIZE(spad_trace_offsize) + trace_data_size_bytes;
-       enum dbg_status status;
        u32 nvram_image_type;
 
        *running_bundle_id = qed_rd(p_hwfn, p_ptt, running_mfw_addr);
@@ -3955,30 +4241,12 @@ static enum dbg_status qed_mcp_trace_get_meta_info(struct qed_hwfn *p_hwfn,
        nvram_image_type =
            (*running_bundle_id ==
             DIR_ID_1) ? NVM_TYPE_MFW_TRACE1 : NVM_TYPE_MFW_TRACE2;
-       status = qed_find_nvram_image(p_hwfn,
-                                     p_ptt,
-                                     nvram_image_type,
-                                     trace_meta_offset_bytes,
-                                     trace_meta_size_bytes);
-
-       return status;
-}
-
-/* Reads the MCP Trace data from the specified GRC address into the specified
- * buffer.
- */
-static void qed_mcp_trace_read_data(struct qed_hwfn *p_hwfn,
-                                   struct qed_ptt *p_ptt,
-                                   u32 grc_addr, u32 size_in_dwords, u32 *buf)
-{
-       u32 i;
 
-       DP_VERBOSE(p_hwfn,
-                  QED_MSG_DEBUG,
-                  "mcp_trace_read_data: reading trace data of size %d dwords from GRC address 0x%x\n",
-                  size_in_dwords, grc_addr);
-       for (i = 0; i < size_in_dwords; i++, grc_addr += BYTES_IN_DWORD)
-               buf[i] = qed_rd(p_hwfn, p_ptt, grc_addr);
+       return qed_find_nvram_image(p_hwfn,
+                                   p_ptt,
+                                   nvram_image_type,
+                                   trace_meta_offset_bytes,
+                                   trace_meta_size_bytes);
 }
 
 /* Reads the MCP Trace meta data (from NVRAM or buffer) into the specified
@@ -4034,11 +4302,14 @@ static enum dbg_status qed_mcp_trace_dump(struct qed_hwfn *p_hwfn,
                                          bool dump, u32 *num_dumped_dwords)
 {
        u32 trace_data_grc_addr, trace_data_size_bytes, trace_data_size_dwords;
-       u32 trace_meta_size_dwords, running_bundle_id, offset = 0;
-       u32 trace_meta_offset_bytes, trace_meta_size_bytes;
+       u32 trace_meta_size_dwords = 0, running_bundle_id, offset = 0;
+       u32 trace_meta_offset_bytes = 0, trace_meta_size_bytes = 0;
        enum dbg_status status;
+       bool mcp_access;
        int halted = 0;
 
+       mcp_access = !qed_grc_get_param(p_hwfn, DBG_GRC_PARAM_NO_MCP);
+
        *num_dumped_dwords = 0;
 
        /* Get trace data info */
@@ -4060,7 +4331,7 @@ static enum dbg_status qed_mcp_trace_dump(struct qed_hwfn *p_hwfn,
         * consistent if halt fails, MCP trace is taken anyway, with a small
         * risk that it may be corrupt.
         */
-       if (dump) {
+       if (dump && mcp_access) {
                halted = !qed_mcp_halt(p_hwfn, p_ptt);
                if (!halted)
                        DP_NOTICE(p_hwfn, "MCP halt failed!\n");
@@ -4078,13 +4349,12 @@ static enum dbg_status qed_mcp_trace_dump(struct qed_hwfn *p_hwfn,
                                     dump, "size", trace_data_size_dwords);
 
        /* Read trace data from scratchpad into dump buffer */
-       if (dump)
-               qed_mcp_trace_read_data(p_hwfn,
-                                       p_ptt,
-                                       trace_data_grc_addr,
-                                       trace_data_size_dwords,
-                                       dump_buf + offset);
-       offset += trace_data_size_dwords;
+       offset += qed_grc_dump_addr_range(p_hwfn,
+                                         p_ptt,
+                                         dump_buf + offset,
+                                         dump,
+                                         BYTES_TO_DWORDS(trace_data_grc_addr),
+                                         trace_data_size_dwords);
 
        /* Resume MCP (only if halt succeeded) */
        if (halted && qed_mcp_resume(p_hwfn, p_ptt) != 0)
@@ -4095,38 +4365,38 @@ static enum dbg_status qed_mcp_trace_dump(struct qed_hwfn *p_hwfn,
                                       dump, "mcp_trace_meta", 1);
 
        /* Read trace meta info */
-       status = qed_mcp_trace_get_meta_info(p_hwfn,
-                                            p_ptt,
-                                            trace_data_size_bytes,
-                                            &running_bundle_id,
-                                            &trace_meta_offset_bytes,
-                                            &trace_meta_size_bytes);
-       if (status != DBG_STATUS_OK)
-               return status;
+       if (mcp_access) {
+               status = qed_mcp_trace_get_meta_info(p_hwfn,
+                                                    p_ptt,
+                                                    trace_data_size_bytes,
+                                                    &running_bundle_id,
+                                                    &trace_meta_offset_bytes,
+                                                    &trace_meta_size_bytes);
+               if (status == DBG_STATUS_OK)
+                       trace_meta_size_dwords =
+                               BYTES_TO_DWORDS(trace_meta_size_bytes);
+       }
 
-       /* Dump trace meta size param (trace_meta_size_bytes is always
-        * dword-aligned).
-        */
-       trace_meta_size_dwords = BYTES_TO_DWORDS(trace_meta_size_bytes);
-       offset += qed_dump_num_param(dump_buf + offset, dump, "size",
-                                    trace_meta_size_dwords);
+       /* Dump trace meta size param */
+       offset += qed_dump_num_param(dump_buf + offset,
+                                    dump, "size", trace_meta_size_dwords);
 
        /* Read trace meta image into dump buffer */
-       if (dump) {
+       if (dump && trace_meta_size_dwords)
                status = qed_mcp_trace_read_meta(p_hwfn,
-                                               p_ptt,
-                                               trace_meta_offset_bytes,
-                                               trace_meta_size_bytes,
-                                               dump_buf + offset);
-               if (status != DBG_STATUS_OK)
-                       return status;
-       }
-
-       offset += trace_meta_size_dwords;
+                                                p_ptt,
+                                                trace_meta_offset_bytes,
+                                                trace_meta_size_bytes,
+                                                dump_buf + offset);
+       if (status == DBG_STATUS_OK)
+               offset += trace_meta_size_dwords;
 
        *num_dumped_dwords = offset;
 
-       return DBG_STATUS_OK;
+       /* If no mcp access, indicate that the dump doesn't contain the meta
+        * data from NVRAM.
+        */
+       return mcp_access ? status : DBG_STATUS_NVRAM_GET_IMAGE_FAILED;
 }
 
 /* Dump GRC FIFO */
@@ -4311,9 +4581,10 @@ static u32 qed_fw_asserts_dump(struct qed_hwfn *p_hwfn,
                               struct qed_ptt *p_ptt, u32 *dump_buf, bool dump)
 {
        struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+       struct fw_asserts_ram_section *asserts;
        char storm_letter_str[2] = "?";
        struct fw_info fw_info;
-       u32 offset = 0, i;
+       u32 offset = 0;
        u8 storm_id;
 
        /* Dump global params */
@@ -4323,8 +4594,8 @@ static u32 qed_fw_asserts_dump(struct qed_hwfn *p_hwfn,
        offset += qed_dump_str_param(dump_buf + offset,
                                     dump, "dump-type", "fw-asserts");
        for (storm_id = 0; storm_id < MAX_DBG_STORMS; storm_id++) {
-               u32 fw_asserts_section_addr, next_list_idx_addr, next_list_idx,
-                       last_list_idx, element_addr;
+               u32 fw_asserts_section_addr, next_list_idx_addr, next_list_idx;
+               u32 last_list_idx, addr;
 
                if (dev_data->block_in_reset[s_storm_defs[storm_id].block_id])
                        continue;
@@ -4332,6 +4603,8 @@ static u32 qed_fw_asserts_dump(struct qed_hwfn *p_hwfn,
                /* Read FW info for the current Storm */
                qed_read_fw_info(p_hwfn, p_ptt, storm_id, &fw_info);
 
+               asserts = &fw_info.fw_asserts_section;
+
                /* Dump FW Asserts section header and params */
                storm_letter_str[0] = s_storm_defs[storm_id].letter;
                offset += qed_dump_section_hdr(dump_buf + offset, dump,
@@ -4339,12 +4612,10 @@ static u32 qed_fw_asserts_dump(struct qed_hwfn *p_hwfn,
                offset += qed_dump_str_param(dump_buf + offset, dump, "storm",
                                             storm_letter_str);
                offset += qed_dump_num_param(dump_buf + offset, dump, "size",
-                                            fw_info.fw_asserts_section.
-                                            list_element_dword_size);
+                                            asserts->list_element_dword_size);
 
                if (!dump) {
-                       offset += fw_info.fw_asserts_section.
-                                 list_element_dword_size;
+                       offset += asserts->list_element_dword_size;
                        continue;
                }
 
@@ -4352,28 +4623,22 @@ static u32 qed_fw_asserts_dump(struct qed_hwfn *p_hwfn,
                fw_asserts_section_addr =
                        s_storm_defs[storm_id].sem_fast_mem_addr +
                        SEM_FAST_REG_INT_RAM +
-                       RAM_LINES_TO_BYTES(fw_info.fw_asserts_section.
-                                          section_ram_line_offset);
+                       RAM_LINES_TO_BYTES(asserts->section_ram_line_offset);
                next_list_idx_addr =
                        fw_asserts_section_addr +
-                       DWORDS_TO_BYTES(fw_info.fw_asserts_section.
-                                       list_next_index_dword_offset);
+                       DWORDS_TO_BYTES(asserts->list_next_index_dword_offset);
                next_list_idx = qed_rd(p_hwfn, p_ptt, next_list_idx_addr);
                last_list_idx = (next_list_idx > 0
                                 ? next_list_idx
-                                : fw_info.fw_asserts_section.list_num_elements)
-                               - 1;
-               element_addr =
-                       fw_asserts_section_addr +
-                       DWORDS_TO_BYTES(fw_info.fw_asserts_section.
-                                       list_dword_offset) +
-                       last_list_idx *
-                       DWORDS_TO_BYTES(fw_info.fw_asserts_section.
-                                       list_element_dword_size);
-               for (i = 0;
-                    i < fw_info.fw_asserts_section.list_element_dword_size;
-                    i++, offset++, element_addr += BYTES_IN_DWORD)
-                       dump_buf[offset] = qed_rd(p_hwfn, p_ptt, element_addr);
+                                : asserts->list_num_elements) - 1;
+               addr = BYTES_TO_DWORDS(fw_asserts_section_addr) +
+                      asserts->list_dword_offset +
+                      last_list_idx * asserts->list_element_dword_size;
+               offset +=
+                   qed_grc_dump_addr_range(p_hwfn, p_ptt,
+                                           dump_buf + offset,
+                                           dump, addr,
+                                           asserts->list_element_dword_size);
        }
 
        /* Dump last section */
@@ -4386,13 +4651,10 @@ static u32 qed_fw_asserts_dump(struct qed_hwfn *p_hwfn,
 enum dbg_status qed_dbg_set_bin_ptr(const u8 * const bin_ptr)
 {
        /* Convert binary data to debug arrays */
-       u32 num_of_buffers = *(u32 *)bin_ptr;
-       struct bin_buffer_hdr *buf_array;
+       struct bin_buffer_hdr *buf_array = (struct bin_buffer_hdr *)bin_ptr;
        u8 buf_id;
 
-       buf_array = (struct bin_buffer_hdr *)((u32 *)bin_ptr + 1);
-
-       for (buf_id = 0; buf_id < num_of_buffers; buf_id++) {
+       for (buf_id = 0; buf_id < MAX_BIN_DBG_BUFFER_TYPE; buf_id++) {
                s_dbg_arrays[buf_id].ptr =
                    (u32 *)(bin_ptr + buf_array[buf_id].offset);
                s_dbg_arrays[buf_id].size_in_dwords =
@@ -4402,6 +4664,17 @@ enum dbg_status qed_dbg_set_bin_ptr(const u8 * const bin_ptr)
        return DBG_STATUS_OK;
 }
 
+/* Assign default GRC param values */
+void qed_dbg_grc_set_params_default(struct qed_hwfn *p_hwfn)
+{
+       struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+       u32 i;
+
+       for (i = 0; i < MAX_DBG_GRC_PARAMS; i++)
+               dev_data->grc.param_val[i] =
+                   s_grc_param_defs[i].default_val[dev_data->chip_id];
+}
+
 enum dbg_status qed_dbg_grc_get_dump_buf_size(struct qed_hwfn *p_hwfn,
                                              struct qed_ptt *p_ptt,
                                              u32 *buf_size)
@@ -4441,8 +4714,9 @@ enum dbg_status qed_dbg_grc_dump(struct qed_hwfn *p_hwfn,
        /* GRC Dump */
        status = qed_grc_dump(p_hwfn, p_ptt, dump_buf, true, num_dumped_dwords);
 
-       /* Clear all GRC params */
-       qed_dbg_grc_clear_params(p_hwfn);
+       /* Revert GRC params to their default */
+       qed_dbg_grc_set_params_default(p_hwfn);
+
        return status;
 }
 
@@ -4495,6 +4769,10 @@ enum dbg_status qed_dbg_idle_chk_dump(struct qed_hwfn *p_hwfn,
 
        /* Idle Check Dump */
        *num_dumped_dwords = qed_idle_chk_dump(p_hwfn, p_ptt, dump_buf, true);
+
+       /* Revert GRC params to their default */
+       qed_dbg_grc_set_params_default(p_hwfn);
+
        return DBG_STATUS_OK;
 }
 
@@ -4519,11 +4797,15 @@ enum dbg_status qed_dbg_mcp_trace_dump(struct qed_hwfn *p_hwfn,
        u32 needed_buf_size_in_dwords;
        enum dbg_status status;
 
-       status = qed_dbg_mcp_trace_get_dump_buf_size(p_hwfn, p_ptt,
+       /* validate buffer size */
+       status =
+           qed_dbg_mcp_trace_get_dump_buf_size(p_hwfn, p_ptt,
                                                &needed_buf_size_in_dwords);
 
-       if (status != DBG_STATUS_OK)
+       if (status != DBG_STATUS_OK &&
+           status != DBG_STATUS_NVRAM_GET_IMAGE_FAILED)
                return status;
+
        if (buf_size_in_dwords < needed_buf_size_in_dwords)
                return DBG_STATUS_DUMP_BUF_TOO_SMALL;
 
@@ -4531,8 +4813,13 @@ enum dbg_status qed_dbg_mcp_trace_dump(struct qed_hwfn *p_hwfn,
        qed_update_blocks_reset_state(p_hwfn, p_ptt);
 
        /* Perform dump */
-       return qed_mcp_trace_dump(p_hwfn,
-                                 p_ptt, dump_buf, true, num_dumped_dwords);
+       status = qed_mcp_trace_dump(p_hwfn,
+                                   p_ptt, dump_buf, true, num_dumped_dwords);
+
+       /* Revert GRC params to their default */
+       qed_dbg_grc_set_params_default(p_hwfn);
+
+       return status;
 }
 
 enum dbg_status qed_dbg_reg_fifo_get_dump_buf_size(struct qed_hwfn *p_hwfn,
@@ -4567,8 +4854,14 @@ enum dbg_status qed_dbg_reg_fifo_dump(struct qed_hwfn *p_hwfn,
 
        /* Update reset state */
        qed_update_blocks_reset_state(p_hwfn, p_ptt);
-       return qed_reg_fifo_dump(p_hwfn,
-                                p_ptt, dump_buf, true, num_dumped_dwords);
+
+       status = qed_reg_fifo_dump(p_hwfn,
+                                  p_ptt, dump_buf, true, num_dumped_dwords);
+
+       /* Revert GRC params to their default */
+       qed_dbg_grc_set_params_default(p_hwfn);
+
+       return status;
 }
 
 enum dbg_status qed_dbg_igu_fifo_get_dump_buf_size(struct qed_hwfn *p_hwfn,
@@ -4603,8 +4896,13 @@ enum dbg_status qed_dbg_igu_fifo_dump(struct qed_hwfn *p_hwfn,
 
        /* Update reset state */
        qed_update_blocks_reset_state(p_hwfn, p_ptt);
-       return qed_igu_fifo_dump(p_hwfn,
-                                p_ptt, dump_buf, true, num_dumped_dwords);
+
+       status = qed_igu_fifo_dump(p_hwfn,
+                                  p_ptt, dump_buf, true, num_dumped_dwords);
+       /* Revert GRC params to their default */
+       qed_dbg_grc_set_params_default(p_hwfn);
+
+       return status;
 }
 
 enum dbg_status
@@ -4641,9 +4939,16 @@ enum dbg_status qed_dbg_protection_override_dump(struct qed_hwfn *p_hwfn,
 
        /* Update reset state */
        qed_update_blocks_reset_state(p_hwfn, p_ptt);
-       return qed_protection_override_dump(p_hwfn,
-                                           p_ptt,
-                                           dump_buf, true, num_dumped_dwords);
+
+       status = qed_protection_override_dump(p_hwfn,
+                                             p_ptt,
+                                             dump_buf,
+                                             true, num_dumped_dwords);
+
+       /* Revert GRC params to their default */
+       qed_dbg_grc_set_params_default(p_hwfn);
+
+       return status;
 }
 
 enum dbg_status qed_dbg_fw_asserts_get_dump_buf_size(struct qed_hwfn *p_hwfn,
@@ -5045,13 +5350,10 @@ static char s_temp_buf[MAX_MSG_LEN];
 enum dbg_status qed_dbg_user_set_bin_ptr(const u8 * const bin_ptr)
 {
        /* Convert binary data to debug arrays */
-       u32 num_of_buffers = *(u32 *)bin_ptr;
-       struct bin_buffer_hdr *buf_array;
+       struct bin_buffer_hdr *buf_array = (struct bin_buffer_hdr *)bin_ptr;
        u8 buf_id;
 
-       buf_array = (struct bin_buffer_hdr *)((u32 *)bin_ptr + 1);
-
-       for (buf_id = 0; buf_id < num_of_buffers; buf_id++) {
+       for (buf_id = 0; buf_id < MAX_BIN_DBG_BUFFER_TYPE; buf_id++) {
                s_dbg_arrays[buf_id].ptr =
                    (u32 *)(bin_ptr + buf_array[buf_id].offset);
                s_dbg_arrays[buf_id].size_in_dwords =
@@ -5874,16 +6176,16 @@ static enum dbg_status qed_parse_reg_fifo_dump(struct qed_hwfn *p_hwfn,
                results_offset +=
                    sprintf(qed_get_buf_ptr(results_buf,
                                            results_offset),
-                           "raw: 0x%016llx, address: 0x%07llx, access: %-5s, pf: %2lld, vf: %s, port: %lld, privilege: %-3s, protection: %-12s, master: %-4s, errors: ",
+                           "raw: 0x%016llx, address: 0x%07x, access: %-5s, pf: %2d, vf: %s, port: %d, privilege: %-3s, protection: %-12s, master: %-4s, errors: ",
                            elements[i].data,
-                           GET_FIELD(elements[i].data,
+                           (u32)GET_FIELD(elements[i].data,
                                      REG_FIFO_ELEMENT_ADDRESS) *
                                      REG_FIFO_ELEMENT_ADDR_FACTOR,
                                      s_access_strs[GET_FIELD(elements[i].data,
                                                    REG_FIFO_ELEMENT_ACCESS)],
-                           GET_FIELD(elements[i].data,
-                                     REG_FIFO_ELEMENT_PF), vf_str,
-                           GET_FIELD(elements[i].data,
+                           (u32)GET_FIELD(elements[i].data,
+                                          REG_FIFO_ELEMENT_PF), vf_str,
+                           (u32)GET_FIELD(elements[i].data,
                                      REG_FIFO_ELEMENT_PORT),
                                      s_privilege_strs[GET_FIELD(elements[i].
                                      data,
@@ -6189,13 +6491,13 @@ qed_parse_protection_override_dump(struct qed_hwfn *p_hwfn,
                results_offset +=
                    sprintf(qed_get_buf_ptr(results_buf,
                                            results_offset),
-                           "window %2d, address: 0x%07x, size: %7lld regs, read: %lld, write: %lld, read protection: %-12s, write protection: %-12s\n",
+                           "window %2d, address: 0x%07x, size: %7d regs, read: %d, write: %d, read protection: %-12s, write protection: %-12s\n",
                            i, address,
-                           GET_FIELD(elements[i].data,
+                           (u32)GET_FIELD(elements[i].data,
                                      PROTECTION_OVERRIDE_ELEMENT_WINDOW_SIZE),
-                           GET_FIELD(elements[i].data,
+                           (u32)GET_FIELD(elements[i].data,
                                      PROTECTION_OVERRIDE_ELEMENT_READ),
-                           GET_FIELD(elements[i].data,
+                           (u32)GET_FIELD(elements[i].data,
                                      PROTECTION_OVERRIDE_ELEMENT_WRITE),
                            s_protection_strs[GET_FIELD(elements[i].data,
                                PROTECTION_OVERRIDE_ELEMENT_READ_PROTECTION)],
@@ -6508,7 +6810,7 @@ static enum dbg_status qed_dbg_dump(struct qed_hwfn *p_hwfn,
         */
        rc = qed_features_lookup[feature_idx].get_size(p_hwfn, p_ptt,
                                                       &buf_size_dwords);
-       if (rc != DBG_STATUS_OK)
+       if (rc != DBG_STATUS_OK && rc != DBG_STATUS_NVRAM_GET_IMAGE_FAILED)
                return rc;
        feature->buf_size = buf_size_dwords * sizeof(u32);
        feature->dump_buf = vmalloc(feature->buf_size);
index e518f914eab13f52d8f82a8e1a29a5a80a2f2b24..249878533fd9793fced4fcfa546775904c178a25 100644 (file)
@@ -186,195 +186,569 @@ void qed_resc_free(struct qed_dev *cdev)
        }
 }
 
-static int qed_init_qm_info(struct qed_hwfn *p_hwfn, bool b_sleepable)
+/******************** QM initialization *******************/
+#define ACTIVE_TCS_BMAP 0x9f
+#define ACTIVE_TCS_BMAP_4PORT_K2 0xf
+
+/* determines the physical queue flags for a given PF. */
+static u32 qed_get_pq_flags(struct qed_hwfn *p_hwfn)
 {
-       u8 num_vports, vf_offset = 0, i, vport_id, num_ports, curr_queue = 0;
-       struct qed_qm_info *qm_info = &p_hwfn->qm_info;
-       struct init_qm_port_params *p_qm_port;
-       bool init_rdma_offload_pq = false;
-       bool init_pure_ack_pq = false;
-       bool init_ooo_pq = false;
-       u16 num_pqs, multi_cos_tcs = 1;
-       u8 pf_wfq = qm_info->pf_wfq;
-       u32 pf_rl = qm_info->pf_rl;
-       u16 num_pf_rls = 0;
-       u16 num_vfs = 0;
-
-#ifdef CONFIG_QED_SRIOV
-       if (p_hwfn->cdev->p_iov_info)
-               num_vfs = p_hwfn->cdev->p_iov_info->total_vfs;
-#endif
-       memset(qm_info, 0, sizeof(*qm_info));
+       u32 flags;
 
-       num_pqs = multi_cos_tcs + num_vfs + 1;  /* The '1' is for pure-LB */
-       num_vports = (u8)RESC_NUM(p_hwfn, QED_VPORT);
+       /* common flags */
+       flags = PQ_FLAGS_LB;
 
-       if (p_hwfn->hw_info.personality == QED_PCI_ETH_ROCE) {
-               num_pqs++;      /* for RoCE queue */
-               init_rdma_offload_pq = true;
-               /* we subtract num_vfs because each require a rate limiter,
-                * and one default rate limiter
-                */
-               if (p_hwfn->pf_params.rdma_pf_params.enable_dcqcn)
-                       num_pf_rls = RESC_NUM(p_hwfn, QED_RL) - num_vfs - 1;
+       /* feature flags */
+       if (IS_QED_SRIOV(p_hwfn->cdev))
+               flags |= PQ_FLAGS_VFS;
 
-               num_pqs += num_pf_rls;
-               qm_info->num_pf_rls = (u8) num_pf_rls;
+       /* protocol flags */
+       switch (p_hwfn->hw_info.personality) {
+       case QED_PCI_ETH:
+               flags |= PQ_FLAGS_MCOS;
+               break;
+       case QED_PCI_FCOE:
+               flags |= PQ_FLAGS_OFLD;
+               break;
+       case QED_PCI_ISCSI:
+               flags |= PQ_FLAGS_ACK | PQ_FLAGS_OOO | PQ_FLAGS_OFLD;
+               break;
+       case QED_PCI_ETH_ROCE:
+               flags |= PQ_FLAGS_MCOS | PQ_FLAGS_OFLD | PQ_FLAGS_LLT;
+               break;
+       default:
+               DP_ERR(p_hwfn,
+                      "unknown personality %d\n", p_hwfn->hw_info.personality);
+               return 0;
        }
 
-       if (p_hwfn->hw_info.personality == QED_PCI_ISCSI) {
-               num_pqs += 2;   /* for iSCSI pure-ACK / OOO queue */
-               init_pure_ack_pq = true;
-               init_ooo_pq = true;
-       }
+       return flags;
+}
 
-       /* Sanity checking that setup requires legal number of resources */
-       if (num_pqs > RESC_NUM(p_hwfn, QED_PQ)) {
-               DP_ERR(p_hwfn,
-                      "Need too many Physical queues - 0x%04x when only %04x are available\n",
-                      num_pqs, RESC_NUM(p_hwfn, QED_PQ));
-               return -EINVAL;
-       }
+/* Getters for resource amounts necessary for qm initialization */
+u8 qed_init_qm_get_num_tcs(struct qed_hwfn *p_hwfn)
+{
+       return p_hwfn->hw_info.num_hw_tc;
+}
 
-       /* PQs will be arranged as follows: First per-TC PQ then pure-LB quete.
-        */
-       qm_info->qm_pq_params = kcalloc(num_pqs,
-                                       sizeof(struct init_qm_pq_params),
-                                       b_sleepable ? GFP_KERNEL : GFP_ATOMIC);
-       if (!qm_info->qm_pq_params)
-               goto alloc_err;
+u16 qed_init_qm_get_num_vfs(struct qed_hwfn *p_hwfn)
+{
+       return IS_QED_SRIOV(p_hwfn->cdev) ?
+              p_hwfn->cdev->p_iov_info->total_vfs : 0;
+}
 
-       qm_info->qm_vport_params = kcalloc(num_vports,
-                                          sizeof(struct init_qm_vport_params),
-                                          b_sleepable ? GFP_KERNEL
-                                                      : GFP_ATOMIC);
-       if (!qm_info->qm_vport_params)
-               goto alloc_err;
+#define NUM_DEFAULT_RLS 1
 
-       qm_info->qm_port_params = kcalloc(MAX_NUM_PORTS,
-                                         sizeof(struct init_qm_port_params),
-                                         b_sleepable ? GFP_KERNEL
-                                                     : GFP_ATOMIC);
-       if (!qm_info->qm_port_params)
-               goto alloc_err;
+u16 qed_init_qm_get_num_pf_rls(struct qed_hwfn *p_hwfn)
+{
+       u16 num_pf_rls, num_vfs = qed_init_qm_get_num_vfs(p_hwfn);
 
-       qm_info->wfq_data = kcalloc(num_vports, sizeof(struct qed_wfq_data),
-                                   b_sleepable ? GFP_KERNEL : GFP_ATOMIC);
-       if (!qm_info->wfq_data)
-               goto alloc_err;
+       /* num RLs can't exceed resource amount of rls or vports */
+       num_pf_rls = (u16) min_t(u32, RESC_NUM(p_hwfn, QED_RL),
+                                RESC_NUM(p_hwfn, QED_VPORT));
 
-       vport_id = (u8)RESC_START(p_hwfn, QED_VPORT);
+       /* Make sure after we reserve there's something left */
+       if (num_pf_rls < num_vfs + NUM_DEFAULT_RLS)
+               return 0;
 
-       /* First init rate limited queues */
-       for (curr_queue = 0; curr_queue < num_pf_rls; curr_queue++) {
-               qm_info->qm_pq_params[curr_queue].vport_id = vport_id++;
-               qm_info->qm_pq_params[curr_queue].tc_id =
-                   p_hwfn->hw_info.non_offload_tc;
-               qm_info->qm_pq_params[curr_queue].wrr_group = 1;
-               qm_info->qm_pq_params[curr_queue].rl_valid = 1;
-       }
+       /* subtract rls necessary for VFs and one default one for the PF */
+       num_pf_rls -= num_vfs + NUM_DEFAULT_RLS;
 
-       /* First init per-TC PQs */
-       for (i = 0; i < multi_cos_tcs; i++) {
-               struct init_qm_pq_params *params =
-                   &qm_info->qm_pq_params[curr_queue++];
+       return num_pf_rls;
+}
 
-               if (p_hwfn->hw_info.personality == QED_PCI_ETH_ROCE ||
-                   p_hwfn->hw_info.personality == QED_PCI_ETH) {
-                       params->vport_id = vport_id;
-                       params->tc_id = p_hwfn->hw_info.non_offload_tc;
-                       params->wrr_group = 1;
-               } else {
-                       params->vport_id = vport_id;
-                       params->tc_id = p_hwfn->hw_info.offload_tc;
-                       params->wrr_group = 1;
-               }
-       }
+u16 qed_init_qm_get_num_vports(struct qed_hwfn *p_hwfn)
+{
+       u32 pq_flags = qed_get_pq_flags(p_hwfn);
+
+       /* all pqs share the same vport, except for vfs and pf_rl pqs */
+       return (!!(PQ_FLAGS_RLS & pq_flags)) *
+              qed_init_qm_get_num_pf_rls(p_hwfn) +
+              (!!(PQ_FLAGS_VFS & pq_flags)) *
+              qed_init_qm_get_num_vfs(p_hwfn) + 1;
+}
+
+/* calc amount of PQs according to the requested flags */
+u16 qed_init_qm_get_num_pqs(struct qed_hwfn *p_hwfn)
+{
+       u32 pq_flags = qed_get_pq_flags(p_hwfn);
+
+       return (!!(PQ_FLAGS_RLS & pq_flags)) *
+              qed_init_qm_get_num_pf_rls(p_hwfn) +
+              (!!(PQ_FLAGS_MCOS & pq_flags)) *
+              qed_init_qm_get_num_tcs(p_hwfn) +
+              (!!(PQ_FLAGS_LB & pq_flags)) + (!!(PQ_FLAGS_OOO & pq_flags)) +
+              (!!(PQ_FLAGS_ACK & pq_flags)) + (!!(PQ_FLAGS_OFLD & pq_flags)) +
+              (!!(PQ_FLAGS_LLT & pq_flags)) +
+              (!!(PQ_FLAGS_VFS & pq_flags)) * qed_init_qm_get_num_vfs(p_hwfn);
+}
+
+/* initialize the top level QM params */
+static void qed_init_qm_params(struct qed_hwfn *p_hwfn)
+{
+       struct qed_qm_info *qm_info = &p_hwfn->qm_info;
+       bool four_port;
+
+       /* pq and vport bases for this PF */
+       qm_info->start_pq = (u16) RESC_START(p_hwfn, QED_PQ);
+       qm_info->start_vport = (u8) RESC_START(p_hwfn, QED_VPORT);
+
+       /* rate limiting and weighted fair queueing are always enabled */
+       qm_info->vport_rl_en = 1;
+       qm_info->vport_wfq_en = 1;
 
-       /* Then init pure-LB PQ */
-       qm_info->pure_lb_pq = curr_queue;
-       qm_info->qm_pq_params[curr_queue].vport_id =
-           (u8) RESC_START(p_hwfn, QED_VPORT);
-       qm_info->qm_pq_params[curr_queue].tc_id = PURE_LB_TC;
-       qm_info->qm_pq_params[curr_queue].wrr_group = 1;
-       curr_queue++;
-
-       qm_info->offload_pq = 0;
-       if (init_rdma_offload_pq) {
-               qm_info->offload_pq = curr_queue;
-               qm_info->qm_pq_params[curr_queue].vport_id = vport_id;
-               qm_info->qm_pq_params[curr_queue].tc_id =
-                   p_hwfn->hw_info.offload_tc;
-               qm_info->qm_pq_params[curr_queue].wrr_group = 1;
-               curr_queue++;
-       }
-
-       if (init_pure_ack_pq) {
-               qm_info->pure_ack_pq = curr_queue;
-               qm_info->qm_pq_params[curr_queue].vport_id = vport_id;
-               qm_info->qm_pq_params[curr_queue].tc_id =
-                   p_hwfn->hw_info.offload_tc;
-               qm_info->qm_pq_params[curr_queue].wrr_group = 1;
-               curr_queue++;
-       }
-
-       if (init_ooo_pq) {
-               qm_info->ooo_pq = curr_queue;
-               qm_info->qm_pq_params[curr_queue].vport_id = vport_id;
-               qm_info->qm_pq_params[curr_queue].tc_id = DCBX_ISCSI_OOO_TC;
-               qm_info->qm_pq_params[curr_queue].wrr_group = 1;
-               curr_queue++;
-       }
-
-       /* Then init per-VF PQs */
-       vf_offset = curr_queue;
-       for (i = 0; i < num_vfs; i++) {
-               /* First vport is used by the PF */
-               qm_info->qm_pq_params[curr_queue].vport_id = vport_id + i + 1;
-               qm_info->qm_pq_params[curr_queue].tc_id =
-                   p_hwfn->hw_info.non_offload_tc;
-               qm_info->qm_pq_params[curr_queue].wrr_group = 1;
-               qm_info->qm_pq_params[curr_queue].rl_valid = 1;
-               curr_queue++;
-       }
-
-       qm_info->vf_queues_offset = vf_offset;
-       qm_info->num_pqs = num_pqs;
-       qm_info->num_vports = num_vports;
+       /* TC config is different for AH 4 port */
+       four_port = p_hwfn->cdev->num_ports_in_engines == MAX_NUM_PORTS_K2;
 
+       /* in AH 4 port we have fewer TCs per port */
+       qm_info->max_phys_tcs_per_port = four_port ? NUM_PHYS_TCS_4PORT_K2 :
+                                                    NUM_OF_PHYS_TCS;
+
+       /* unless MFW indicated otherwise, ooo_tc == 3 for
+        * AH 4-port and 4 otherwise.
+        */
+       if (!qm_info->ooo_tc)
+               qm_info->ooo_tc = four_port ? DCBX_TCP_OOO_K2_4PORT_TC :
+                                             DCBX_TCP_OOO_TC;
+}
+
+/* initialize qm vport params */
+static void qed_init_qm_vport_params(struct qed_hwfn *p_hwfn)
+{
+       struct qed_qm_info *qm_info = &p_hwfn->qm_info;
+       u8 i;
+
+       /* all vports participate in weighted fair queueing */
+       for (i = 0; i < qed_init_qm_get_num_vports(p_hwfn); i++)
+               qm_info->qm_vport_params[i].vport_wfq = 1;
+}
+
+/* initialize qm port params */
+static void qed_init_qm_port_params(struct qed_hwfn *p_hwfn)
+{
        /* Initialize qm port parameters */
-       num_ports = p_hwfn->cdev->num_ports_in_engines;
+       u8 i, active_phys_tcs, num_ports = p_hwfn->cdev->num_ports_in_engines;
+
+       /* indicate how ooo and high pri traffic is dealt with */
+       active_phys_tcs = num_ports == MAX_NUM_PORTS_K2 ?
+                         ACTIVE_TCS_BMAP_4PORT_K2 :
+                         ACTIVE_TCS_BMAP;
+
        for (i = 0; i < num_ports; i++) {
-               p_qm_port = &qm_info->qm_port_params[i];
+               struct init_qm_port_params *p_qm_port =
+                   &p_hwfn->qm_info.qm_port_params[i];
+
                p_qm_port->active = 1;
-               if (num_ports == 4)
-                       p_qm_port->active_phys_tcs = 0x7;
-               else
-                       p_qm_port->active_phys_tcs = 0x9f;
+               p_qm_port->active_phys_tcs = active_phys_tcs;
                p_qm_port->num_pbf_cmd_lines = PBF_MAX_CMD_LINES / num_ports;
                p_qm_port->num_btb_blocks = BTB_MAX_BLOCKS / num_ports;
        }
+}
+
+/* Reset the params which must be reset for qm init. QM init may be called as
+ * a result of flows other than driver load (e.g. dcbx renegotiation). Other
+ * params may be affected by the init but would simply recalculate to the same
+ * values. The allocations made for QM init, ports, vports, pqs and vfqs are not
+ * affected as these amounts stay the same.
+ */
+static void qed_init_qm_reset_params(struct qed_hwfn *p_hwfn)
+{
+       struct qed_qm_info *qm_info = &p_hwfn->qm_info;
+
+       qm_info->num_pqs = 0;
+       qm_info->num_vports = 0;
+       qm_info->num_pf_rls = 0;
+       qm_info->num_vf_pqs = 0;
+       qm_info->first_vf_pq = 0;
+       qm_info->first_mcos_pq = 0;
+       qm_info->first_rl_pq = 0;
+}
+
+static void qed_init_qm_advance_vport(struct qed_hwfn *p_hwfn)
+{
+       struct qed_qm_info *qm_info = &p_hwfn->qm_info;
+
+       qm_info->num_vports++;
+
+       if (qm_info->num_vports > qed_init_qm_get_num_vports(p_hwfn))
+               DP_ERR(p_hwfn,
+                      "vport overflow! qm_info->num_vports %d, qm_init_get_num_vports() %d\n",
+                      qm_info->num_vports, qed_init_qm_get_num_vports(p_hwfn));
+}
+
+/* initialize a single pq and manage qm_info resources accounting.
+ * The pq_init_flags param determines whether the PQ is rate limited
+ * (for VF or PF) and whether a new vport is allocated to the pq or not
+ * (i.e. vport will be shared).
+ */
+
+/* flags for pq init */
+#define PQ_INIT_SHARE_VPORT     (1 << 0)
+#define PQ_INIT_PF_RL           (1 << 1)
+#define PQ_INIT_VF_RL           (1 << 2)
+
+/* defines for pq init */
+#define PQ_INIT_DEFAULT_WRR_GROUP       1
+#define PQ_INIT_DEFAULT_TC              0
+#define PQ_INIT_OFLD_TC                 (p_hwfn->hw_info.offload_tc)
+
+static void qed_init_qm_pq(struct qed_hwfn *p_hwfn,
+                          struct qed_qm_info *qm_info,
+                          u8 tc, u32 pq_init_flags)
+{
+       u16 pq_idx = qm_info->num_pqs, max_pq = qed_init_qm_get_num_pqs(p_hwfn);
+
+       if (pq_idx > max_pq)
+               DP_ERR(p_hwfn,
+                      "pq overflow! pq %d, max pq %d\n", pq_idx, max_pq);
+
+       /* init pq params */
+       qm_info->qm_pq_params[pq_idx].vport_id = qm_info->start_vport +
+           qm_info->num_vports;
+       qm_info->qm_pq_params[pq_idx].tc_id = tc;
+       qm_info->qm_pq_params[pq_idx].wrr_group = PQ_INIT_DEFAULT_WRR_GROUP;
+       qm_info->qm_pq_params[pq_idx].rl_valid =
+           (pq_init_flags & PQ_INIT_PF_RL || pq_init_flags & PQ_INIT_VF_RL);
+
+       /* qm params accounting */
+       qm_info->num_pqs++;
+       if (!(pq_init_flags & PQ_INIT_SHARE_VPORT))
+               qm_info->num_vports++;
+
+       if (pq_init_flags & PQ_INIT_PF_RL)
+               qm_info->num_pf_rls++;
+
+       if (qm_info->num_vports > qed_init_qm_get_num_vports(p_hwfn))
+               DP_ERR(p_hwfn,
+                      "vport overflow! qm_info->num_vports %d, qm_init_get_num_vports() %d\n",
+                      qm_info->num_vports, qed_init_qm_get_num_vports(p_hwfn));
+
+       if (qm_info->num_pf_rls > qed_init_qm_get_num_pf_rls(p_hwfn))
+               DP_ERR(p_hwfn,
+                      "rl overflow! qm_info->num_pf_rls %d, qm_init_get_num_pf_rls() %d\n",
+                      qm_info->num_pf_rls, qed_init_qm_get_num_pf_rls(p_hwfn));
+}
+
+/* get pq index according to PQ_FLAGS */
+static u16 *qed_init_qm_get_idx_from_flags(struct qed_hwfn *p_hwfn,
+                                          u32 pq_flags)
+{
+       struct qed_qm_info *qm_info = &p_hwfn->qm_info;
+
+       /* Can't have multiple flags set here */
+       if (bitmap_weight((unsigned long *)&pq_flags, sizeof(pq_flags)) > 1)
+               goto err;
+
+       switch (pq_flags) {
+       case PQ_FLAGS_RLS:
+               return &qm_info->first_rl_pq;
+       case PQ_FLAGS_MCOS:
+               return &qm_info->first_mcos_pq;
+       case PQ_FLAGS_LB:
+               return &qm_info->pure_lb_pq;
+       case PQ_FLAGS_OOO:
+               return &qm_info->ooo_pq;
+       case PQ_FLAGS_ACK:
+               return &qm_info->pure_ack_pq;
+       case PQ_FLAGS_OFLD:
+               return &qm_info->offload_pq;
+       case PQ_FLAGS_LLT:
+               return &qm_info->low_latency_pq;
+       case PQ_FLAGS_VFS:
+               return &qm_info->first_vf_pq;
+       default:
+               goto err;
+       }
+
+err:
+       DP_ERR(p_hwfn, "BAD pq flags %d\n", pq_flags);
+       return NULL;
+}
+
+/* save pq index in qm info */
+static void qed_init_qm_set_idx(struct qed_hwfn *p_hwfn,
+                               u32 pq_flags, u16 pq_val)
+{
+       u16 *base_pq_idx = qed_init_qm_get_idx_from_flags(p_hwfn, pq_flags);
+
+       *base_pq_idx = p_hwfn->qm_info.start_pq + pq_val;
+}
+
+/* get tx pq index, with the PQ TX base already set (ready for context init) */
+u16 qed_get_cm_pq_idx(struct qed_hwfn *p_hwfn, u32 pq_flags)
+{
+       u16 *base_pq_idx = qed_init_qm_get_idx_from_flags(p_hwfn, pq_flags);
+
+       return *base_pq_idx + CM_TX_PQ_BASE;
+}
+
+u16 qed_get_cm_pq_idx_mcos(struct qed_hwfn *p_hwfn, u8 tc)
+{
+       u8 max_tc = qed_init_qm_get_num_tcs(p_hwfn);
+
+       if (tc > max_tc)
+               DP_ERR(p_hwfn, "tc %d must be smaller than %d\n", tc, max_tc);
+
+       return qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_MCOS) + tc;
+}
+
+u16 qed_get_cm_pq_idx_vf(struct qed_hwfn *p_hwfn, u16 vf)
+{
+       u16 max_vf = qed_init_qm_get_num_vfs(p_hwfn);
+
+       if (vf > max_vf)
+               DP_ERR(p_hwfn, "vf %d must be smaller than %d\n", vf, max_vf);
+
+       return qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_VFS) + vf;
+}
+
+u16 qed_get_cm_pq_idx_rl(struct qed_hwfn *p_hwfn, u8 rl)
+{
+       u16 max_rl = qed_init_qm_get_num_pf_rls(p_hwfn);
+
+       if (rl > max_rl)
+               DP_ERR(p_hwfn, "rl %d must be smaller than %d\n", rl, max_rl);
+
+       return qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_RLS) + rl;
+}
+
+/* Functions for creating specific types of pqs */
+static void qed_init_qm_lb_pq(struct qed_hwfn *p_hwfn)
+{
+       struct qed_qm_info *qm_info = &p_hwfn->qm_info;
+
+       if (!(qed_get_pq_flags(p_hwfn) & PQ_FLAGS_LB))
+               return;
+
+       qed_init_qm_set_idx(p_hwfn, PQ_FLAGS_LB, qm_info->num_pqs);
+       qed_init_qm_pq(p_hwfn, qm_info, PURE_LB_TC, PQ_INIT_SHARE_VPORT);
+}
 
-       qm_info->max_phys_tcs_per_port = NUM_OF_PHYS_TCS;
+static void qed_init_qm_ooo_pq(struct qed_hwfn *p_hwfn)
+{
+       struct qed_qm_info *qm_info = &p_hwfn->qm_info;
+
+       if (!(qed_get_pq_flags(p_hwfn) & PQ_FLAGS_OOO))
+               return;
+
+       qed_init_qm_set_idx(p_hwfn, PQ_FLAGS_OOO, qm_info->num_pqs);
+       qed_init_qm_pq(p_hwfn, qm_info, qm_info->ooo_tc, PQ_INIT_SHARE_VPORT);
+}
 
-       qm_info->start_pq = (u16)RESC_START(p_hwfn, QED_PQ);
+static void qed_init_qm_pure_ack_pq(struct qed_hwfn *p_hwfn)
+{
+       struct qed_qm_info *qm_info = &p_hwfn->qm_info;
+
+       if (!(qed_get_pq_flags(p_hwfn) & PQ_FLAGS_ACK))
+               return;
+
+       qed_init_qm_set_idx(p_hwfn, PQ_FLAGS_ACK, qm_info->num_pqs);
+       qed_init_qm_pq(p_hwfn, qm_info, PQ_INIT_OFLD_TC, PQ_INIT_SHARE_VPORT);
+}
 
+static void qed_init_qm_offload_pq(struct qed_hwfn *p_hwfn)
+{
+       struct qed_qm_info *qm_info = &p_hwfn->qm_info;
+
+       if (!(qed_get_pq_flags(p_hwfn) & PQ_FLAGS_OFLD))
+               return;
+
+       qed_init_qm_set_idx(p_hwfn, PQ_FLAGS_OFLD, qm_info->num_pqs);
+       qed_init_qm_pq(p_hwfn, qm_info, PQ_INIT_OFLD_TC, PQ_INIT_SHARE_VPORT);
+}
+
+static void qed_init_qm_low_latency_pq(struct qed_hwfn *p_hwfn)
+{
+       struct qed_qm_info *qm_info = &p_hwfn->qm_info;
+
+       if (!(qed_get_pq_flags(p_hwfn) & PQ_FLAGS_LLT))
+               return;
+
+       qed_init_qm_set_idx(p_hwfn, PQ_FLAGS_LLT, qm_info->num_pqs);
+       qed_init_qm_pq(p_hwfn, qm_info, PQ_INIT_OFLD_TC, PQ_INIT_SHARE_VPORT);
+}
+
+static void qed_init_qm_mcos_pqs(struct qed_hwfn *p_hwfn)
+{
+       struct qed_qm_info *qm_info = &p_hwfn->qm_info;
+       u8 tc_idx;
+
+       if (!(qed_get_pq_flags(p_hwfn) & PQ_FLAGS_MCOS))
+               return;
+
+       qed_init_qm_set_idx(p_hwfn, PQ_FLAGS_MCOS, qm_info->num_pqs);
+       for (tc_idx = 0; tc_idx < qed_init_qm_get_num_tcs(p_hwfn); tc_idx++)
+               qed_init_qm_pq(p_hwfn, qm_info, tc_idx, PQ_INIT_SHARE_VPORT);
+}
+
+static void qed_init_qm_vf_pqs(struct qed_hwfn *p_hwfn)
+{
+       struct qed_qm_info *qm_info = &p_hwfn->qm_info;
+       u16 vf_idx, num_vfs = qed_init_qm_get_num_vfs(p_hwfn);
+
+       if (!(qed_get_pq_flags(p_hwfn) & PQ_FLAGS_VFS))
+               return;
+
+       qed_init_qm_set_idx(p_hwfn, PQ_FLAGS_VFS, qm_info->num_pqs);
        qm_info->num_vf_pqs = num_vfs;
-       qm_info->start_vport = (u8) RESC_START(p_hwfn, QED_VPORT);
+       for (vf_idx = 0; vf_idx < num_vfs; vf_idx++)
+               qed_init_qm_pq(p_hwfn,
+                              qm_info, PQ_INIT_DEFAULT_TC, PQ_INIT_VF_RL);
+}
 
-       for (i = 0; i < qm_info->num_vports; i++)
-               qm_info->qm_vport_params[i].vport_wfq = 1;
+static void qed_init_qm_rl_pqs(struct qed_hwfn *p_hwfn)
+{
+       u16 pf_rls_idx, num_pf_rls = qed_init_qm_get_num_pf_rls(p_hwfn);
+       struct qed_qm_info *qm_info = &p_hwfn->qm_info;
 
-       qm_info->vport_rl_en = 1;
-       qm_info->vport_wfq_en = 1;
-       qm_info->pf_rl = pf_rl;
-       qm_info->pf_wfq = pf_wfq;
+       if (!(qed_get_pq_flags(p_hwfn) & PQ_FLAGS_RLS))
+               return;
+
+       qed_init_qm_set_idx(p_hwfn, PQ_FLAGS_RLS, qm_info->num_pqs);
+       for (pf_rls_idx = 0; pf_rls_idx < num_pf_rls; pf_rls_idx++)
+               qed_init_qm_pq(p_hwfn, qm_info, PQ_INIT_OFLD_TC, PQ_INIT_PF_RL);
+}
+
+static void qed_init_qm_pq_params(struct qed_hwfn *p_hwfn)
+{
+       /* rate limited pqs, must come first (FW assumption) */
+       qed_init_qm_rl_pqs(p_hwfn);
+
+       /* pqs for multi cos */
+       qed_init_qm_mcos_pqs(p_hwfn);
+
+       /* pure loopback pq */
+       qed_init_qm_lb_pq(p_hwfn);
+
+       /* out of order pq */
+       qed_init_qm_ooo_pq(p_hwfn);
+
+       /* pure ack pq */
+       qed_init_qm_pure_ack_pq(p_hwfn);
+
+       /* pq for offloaded protocol */
+       qed_init_qm_offload_pq(p_hwfn);
+
+       /* low latency pq */
+       qed_init_qm_low_latency_pq(p_hwfn);
+
+       /* done sharing vports */
+       qed_init_qm_advance_vport(p_hwfn);
+
+       /* pqs for vfs */
+       qed_init_qm_vf_pqs(p_hwfn);
+}
+
+/* compare values of getters against resources amounts */
+static int qed_init_qm_sanity(struct qed_hwfn *p_hwfn)
+{
+       if (qed_init_qm_get_num_vports(p_hwfn) > RESC_NUM(p_hwfn, QED_VPORT)) {
+               DP_ERR(p_hwfn, "requested amount of vports exceeds resource\n");
+               return -EINVAL;
+       }
+
+       if (qed_init_qm_get_num_pqs(p_hwfn) > RESC_NUM(p_hwfn, QED_PQ)) {
+               DP_ERR(p_hwfn, "requested amount of pqs exceeds resource\n");
+               return -EINVAL;
+       }
 
        return 0;
+}
 
-alloc_err:
-       qed_qm_info_free(p_hwfn);
-       return -ENOMEM;
+static void qed_dp_init_qm_params(struct qed_hwfn *p_hwfn)
+{
+       struct qed_qm_info *qm_info = &p_hwfn->qm_info;
+       struct init_qm_vport_params *vport;
+       struct init_qm_port_params *port;
+       struct init_qm_pq_params *pq;
+       int i, tc;
+
+       /* top level params */
+       DP_VERBOSE(p_hwfn,
+                  NETIF_MSG_HW,
+                  "qm init top level params: start_pq %d, start_vport %d, pure_lb_pq %d, offload_pq %d, pure_ack_pq %d\n",
+                  qm_info->start_pq,
+                  qm_info->start_vport,
+                  qm_info->pure_lb_pq,
+                  qm_info->offload_pq, qm_info->pure_ack_pq);
+       DP_VERBOSE(p_hwfn,
+                  NETIF_MSG_HW,
+                  "ooo_pq %d, first_vf_pq %d, num_pqs %d, num_vf_pqs %d, num_vports %d, max_phys_tcs_per_port %d\n",
+                  qm_info->ooo_pq,
+                  qm_info->first_vf_pq,
+                  qm_info->num_pqs,
+                  qm_info->num_vf_pqs,
+                  qm_info->num_vports, qm_info->max_phys_tcs_per_port);
+       DP_VERBOSE(p_hwfn,
+                  NETIF_MSG_HW,
+                  "pf_rl_en %d, pf_wfq_en %d, vport_rl_en %d, vport_wfq_en %d, pf_wfq %d, pf_rl %d, num_pf_rls %d, pq_flags %x\n",
+                  qm_info->pf_rl_en,
+                  qm_info->pf_wfq_en,
+                  qm_info->vport_rl_en,
+                  qm_info->vport_wfq_en,
+                  qm_info->pf_wfq,
+                  qm_info->pf_rl,
+                  qm_info->num_pf_rls, qed_get_pq_flags(p_hwfn));
+
+       /* port table */
+       for (i = 0; i < p_hwfn->cdev->num_ports_in_engines; i++) {
+               port = &(qm_info->qm_port_params[i]);
+               DP_VERBOSE(p_hwfn,
+                          NETIF_MSG_HW,
+                          "port idx %d, active %d, active_phys_tcs %d, num_pbf_cmd_lines %d, num_btb_blocks %d, reserved %d\n",
+                          i,
+                          port->active,
+                          port->active_phys_tcs,
+                          port->num_pbf_cmd_lines,
+                          port->num_btb_blocks, port->reserved);
+       }
+
+       /* vport table */
+       for (i = 0; i < qm_info->num_vports; i++) {
+               vport = &(qm_info->qm_vport_params[i]);
+               DP_VERBOSE(p_hwfn,
+                          NETIF_MSG_HW,
+                          "vport idx %d, vport_rl %d, wfq %d, first_tx_pq_id [ ",
+                          qm_info->start_vport + i,
+                          vport->vport_rl, vport->vport_wfq);
+               for (tc = 0; tc < NUM_OF_TCS; tc++)
+                       DP_VERBOSE(p_hwfn,
+                                  NETIF_MSG_HW,
+                                  "%d ", vport->first_tx_pq_id[tc]);
+               DP_VERBOSE(p_hwfn, NETIF_MSG_HW, "]\n");
+       }
+
+       /* pq table */
+       for (i = 0; i < qm_info->num_pqs; i++) {
+               pq = &(qm_info->qm_pq_params[i]);
+               DP_VERBOSE(p_hwfn,
+                          NETIF_MSG_HW,
+                          "pq idx %d, vport_id %d, tc %d, wrr_grp %d, rl_valid %d\n",
+                          qm_info->start_pq + i,
+                          pq->vport_id,
+                          pq->tc_id, pq->wrr_group, pq->rl_valid);
+       }
+}
+
+static void qed_init_qm_info(struct qed_hwfn *p_hwfn)
+{
+       /* reset params required for init run */
+       qed_init_qm_reset_params(p_hwfn);
+
+       /* init QM top level params */
+       qed_init_qm_params(p_hwfn);
+
+       /* init QM port params */
+       qed_init_qm_port_params(p_hwfn);
+
+       /* init QM vport params */
+       qed_init_qm_vport_params(p_hwfn);
+
+       /* init QM physical queue params */
+       qed_init_qm_pq_params(p_hwfn);
+
+       /* display all that init */
+       qed_dp_init_qm_params(p_hwfn);
 }
 
 /* This function reconfigures the QM pf on the fly.
@@ -391,17 +765,8 @@ int qed_qm_reconf(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
        bool b_rc;
        int rc;
 
-       /* qm_info is allocated in qed_init_qm_info() which is already called
-        * from qed_resc_alloc() or previous call of qed_qm_reconf().
-        * The allocated size may change each init, so we free it before next
-        * allocation.
-        */
-       qed_qm_info_free(p_hwfn);
-
        /* initialize qed's qm data structure */
-       rc = qed_init_qm_info(p_hwfn, false);
-       if (rc)
-               return rc;
+       qed_init_qm_info(p_hwfn);
 
        /* stop PF's qm queues */
        spin_lock_bh(&qm_lock);
@@ -434,6 +799,47 @@ int qed_qm_reconf(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
        return 0;
 }
 
+static int qed_alloc_qm_data(struct qed_hwfn *p_hwfn)
+{
+       struct qed_qm_info *qm_info = &p_hwfn->qm_info;
+       int rc;
+
+       rc = qed_init_qm_sanity(p_hwfn);
+       if (rc)
+               goto alloc_err;
+
+       qm_info->qm_pq_params = kzalloc(sizeof(*qm_info->qm_pq_params) *
+                                       qed_init_qm_get_num_pqs(p_hwfn),
+                                       GFP_KERNEL);
+       if (!qm_info->qm_pq_params)
+               goto alloc_err;
+
+       qm_info->qm_vport_params = kzalloc(sizeof(*qm_info->qm_vport_params) *
+                                          qed_init_qm_get_num_vports(p_hwfn),
+                                          GFP_KERNEL);
+       if (!qm_info->qm_vport_params)
+               goto alloc_err;
+
+       qm_info->qm_port_params = kzalloc(sizeof(qm_info->qm_port_params) *
+                                         p_hwfn->cdev->num_ports_in_engines,
+                                         GFP_KERNEL);
+       if (!qm_info->qm_port_params)
+               goto alloc_err;
+
+       qm_info->wfq_data = kzalloc(sizeof(*qm_info->wfq_data) *
+                                   qed_init_qm_get_num_vports(p_hwfn),
+                                   GFP_KERNEL);
+       if (!qm_info->wfq_data)
+               goto alloc_err;
+
+       return 0;
+
+alloc_err:
+       DP_NOTICE(p_hwfn, "Failed to allocate memory for QM params\n");
+       qed_qm_info_free(p_hwfn);
+       return -ENOMEM;
+}
+
 int qed_resc_alloc(struct qed_dev *cdev)
 {
        struct qed_iscsi_info *p_iscsi_info;
@@ -442,8 +848,10 @@ int qed_resc_alloc(struct qed_dev *cdev)
 #ifdef CONFIG_QED_LL2
        struct qed_ll2_info *p_ll2_info;
 #endif
+       u32 rdma_tasks, excess_tasks;
        struct qed_consq *p_consq;
        struct qed_eq *p_eq;
+       u32 line_count;
        int i, rc = 0;
 
        if (IS_VF(cdev))
@@ -465,19 +873,44 @@ int qed_resc_alloc(struct qed_dev *cdev)
                /* Set the HW cid/tid numbers (in the contest manager)
                 * Must be done prior to any further computations.
                 */
-               rc = qed_cxt_set_pf_params(p_hwfn);
+               rc = qed_cxt_set_pf_params(p_hwfn, RDMA_MAX_TIDS);
                if (rc)
                        goto alloc_err;
 
-               /* Prepare and process QM requirements */
-               rc = qed_init_qm_info(p_hwfn, true);
+               rc = qed_alloc_qm_data(p_hwfn);
                if (rc)
                        goto alloc_err;
 
+               /* init qm info */
+               qed_init_qm_info(p_hwfn);
+
                /* Compute the ILT client partition */
-               rc = qed_cxt_cfg_ilt_compute(p_hwfn);
-               if (rc)
-                       goto alloc_err;
+               rc = qed_cxt_cfg_ilt_compute(p_hwfn, &line_count);
+               if (rc) {
+                       DP_NOTICE(p_hwfn,
+                                 "too many ILT lines; re-computing with less lines\n");
+                       /* In case there are not enough ILT lines we reduce the
+                        * number of RDMA tasks and re-compute.
+                        */
+                       excess_tasks =
+                           qed_cxt_cfg_ilt_compute_excess(p_hwfn, line_count);
+                       if (!excess_tasks)
+                               goto alloc_err;
+
+                       rdma_tasks = RDMA_MAX_TIDS - excess_tasks;
+                       rc = qed_cxt_set_pf_params(p_hwfn, rdma_tasks);
+                       if (rc)
+                               goto alloc_err;
+
+                       rc = qed_cxt_cfg_ilt_compute(p_hwfn, &line_count);
+                       if (rc) {
+                               DP_ERR(p_hwfn,
+                                      "failed ILT compute. Requested too many lines: %u\n",
+                                      line_count);
+
+                               goto alloc_err;
+                       }
+               }
 
                /* CID map / ILT shadow table / T2
                 * The talbes sizes are determined by the computations above
@@ -674,11 +1107,19 @@ int qed_final_cleanup(struct qed_hwfn *p_hwfn,
        return rc;
 }
 
-static void qed_calc_hw_mode(struct qed_hwfn *p_hwfn)
+static int qed_calc_hw_mode(struct qed_hwfn *p_hwfn)
 {
        int hw_mode = 0;
 
-       hw_mode = (1 << MODE_BB_B0);
+       if (QED_IS_BB_B0(p_hwfn->cdev)) {
+               hw_mode |= 1 << MODE_BB;
+       } else if (QED_IS_AH(p_hwfn->cdev)) {
+               hw_mode |= 1 << MODE_K2;
+       } else {
+               DP_NOTICE(p_hwfn, "Unknown chip type %#x\n",
+                         p_hwfn->cdev->type);
+               return -EINVAL;
+       }
 
        switch (p_hwfn->cdev->num_ports_in_engines) {
        case 1:
@@ -693,7 +1134,7 @@ static void qed_calc_hw_mode(struct qed_hwfn *p_hwfn)
        default:
                DP_NOTICE(p_hwfn, "num_ports_in_engine = %d not supported\n",
                          p_hwfn->cdev->num_ports_in_engines);
-               return;
+               return -EINVAL;
        }
 
        switch (p_hwfn->cdev->mf_mode) {
@@ -719,6 +1160,8 @@ static void qed_calc_hw_mode(struct qed_hwfn *p_hwfn)
        DP_VERBOSE(p_hwfn, (NETIF_MSG_PROBE | NETIF_MSG_IFUP),
                   "Configuring function for hw_mode: 0x%08x\n",
                   p_hwfn->hw_info.hw_mode);
+
+       return 0;
 }
 
 /* Init run time data for all PFs on an engine. */
@@ -754,10 +1197,10 @@ static int qed_hw_init_common(struct qed_hwfn *p_hwfn,
        struct qed_qm_info *qm_info = &p_hwfn->qm_info;
        struct qed_qm_common_rt_init_params params;
        struct qed_dev *cdev = p_hwfn->cdev;
+       u8 vf_id, max_num_vfs;
        u16 num_pfs, pf_id;
        u32 concrete_fid;
        int rc = 0;
-       u8 vf_id;
 
        qed_init_cau_rt_data(cdev);
 
@@ -814,7 +1257,8 @@ static int qed_hw_init_common(struct qed_hwfn *p_hwfn,
                qed_fid_pretend(p_hwfn, p_ptt, p_hwfn->rel_pf_id);
        }
 
-       for (vf_id = 0; vf_id < MAX_NUM_VFS_BB; vf_id++) {
+       max_num_vfs = QED_IS_AH(cdev) ? MAX_NUM_VFS_K2 : MAX_NUM_VFS_BB;
+       for (vf_id = 0; vf_id < max_num_vfs; vf_id++) {
                concrete_fid = qed_vfid_to_concrete(p_hwfn, vf_id);
                qed_fid_pretend(p_hwfn, p_ptt, (u16) concrete_fid);
                qed_wr(p_hwfn, p_ptt, CCFC_REG_STRONG_ENABLE_VF, 0x1);
@@ -1095,25 +1539,34 @@ static void qed_reset_mb_shadow(struct qed_hwfn *p_hwfn,
               p_hwfn->mcp_info->mfw_mb_cur, p_hwfn->mcp_info->mfw_mb_length);
 }
 
-int qed_hw_init(struct qed_dev *cdev,
-               struct qed_tunn_start_params *p_tunn,
-               bool b_hw_start,
-               enum qed_int_mode int_mode,
-               bool allow_npar_tx_switch,
-               const u8 *bin_fw_data)
+static void
+qed_fill_load_req_params(struct qed_load_req_params *p_load_req,
+                        struct qed_drv_load_params *p_drv_load)
+{
+       memset(p_load_req, 0, sizeof(*p_load_req));
+
+       p_load_req->drv_role = p_drv_load->is_crash_kernel ?
+                              QED_DRV_ROLE_KDUMP : QED_DRV_ROLE_OS;
+       p_load_req->timeout_val = p_drv_load->mfw_timeout_val;
+       p_load_req->avoid_eng_reset = p_drv_load->avoid_eng_reset;
+       p_load_req->override_force_load = p_drv_load->override_force_load;
+}
+
+int qed_hw_init(struct qed_dev *cdev, struct qed_hw_init_params *p_params)
 {
+       struct qed_load_req_params load_req_params;
        u32 load_code, param, drv_mb_param;
        bool b_default_mtu = true;
        struct qed_hwfn *p_hwfn;
        int rc = 0, mfw_rc, i;
 
-       if ((int_mode == QED_INT_MODE_MSI) && (cdev->num_hwfns > 1)) {
+       if ((p_params->int_mode == QED_INT_MODE_MSI) && (cdev->num_hwfns > 1)) {
                DP_NOTICE(cdev, "MSI mode is not supported for CMT devices\n");
                return -EINVAL;
        }
 
        if (IS_PF(cdev)) {
-               rc = qed_init_fw_data(cdev, bin_fw_data);
+               rc = qed_init_fw_data(cdev, p_params->bin_fw_data);
                if (rc)
                        return rc;
        }
@@ -1135,19 +1588,25 @@ int qed_hw_init(struct qed_dev *cdev,
                /* Enable DMAE in PXP */
                rc = qed_change_pci_hwfn(p_hwfn, p_hwfn->p_main_ptt, true);
 
-               qed_calc_hw_mode(p_hwfn);
+               rc = qed_calc_hw_mode(p_hwfn);
+               if (rc)
+                       return rc;
 
-               rc = qed_mcp_load_req(p_hwfn, p_hwfn->p_main_ptt, &load_code);
+               qed_fill_load_req_params(&load_req_params,
+                                        p_params->p_drv_load_params);
+               rc = qed_mcp_load_req(p_hwfn, p_hwfn->p_main_ptt,
+                                     &load_req_params);
                if (rc) {
-                       DP_NOTICE(p_hwfn, "Failed sending LOAD_REQ command\n");
+                       DP_NOTICE(p_hwfn, "Failed sending LOAD_REQ command\n");
                        return rc;
                }
 
-               qed_reset_mb_shadow(p_hwfn, p_hwfn->p_main_ptt);
-
+               load_code = load_req_params.load_code;
                DP_VERBOSE(p_hwfn, QED_MSG_SP,
-                          "Load request was sent. Resp:0x%x, Load code: 0x%x\n",
-                          rc, load_code);
+                          "Load request was sent. Load code: 0x%x\n",
+                          load_code);
+
+               qed_reset_mb_shadow(p_hwfn, p_hwfn->p_main_ptt);
 
                p_hwfn->first_on_engine = (load_code ==
                                           FW_MSG_CODE_DRV_LOAD_ENGINE);
@@ -1168,11 +1627,15 @@ int qed_hw_init(struct qed_dev *cdev,
                /* Fall into */
                case FW_MSG_CODE_DRV_LOAD_FUNCTION:
                        rc = qed_hw_init_pf(p_hwfn, p_hwfn->p_main_ptt,
-                                           p_tunn, p_hwfn->hw_info.hw_mode,
-                                           b_hw_start, int_mode,
-                                           allow_npar_tx_switch);
+                                           p_params->p_tunn,
+                                           p_hwfn->hw_info.hw_mode,
+                                           p_params->b_hw_start,
+                                           p_params->int_mode,
+                                           p_params->allow_npar_tx_switch);
                        break;
                default:
+                       DP_NOTICE(p_hwfn,
+                                 "Unexpected load code [0x%08x]", load_code);
                        rc = -EINVAL;
                        break;
                }
@@ -1212,10 +1675,7 @@ int qed_hw_init(struct qed_dev *cdev,
 
        if (IS_PF(cdev)) {
                p_hwfn = QED_LEADING_HWFN(cdev);
-               drv_mb_param = (FW_MAJOR_VERSION << 24) |
-                              (FW_MINOR_VERSION << 16) |
-                              (FW_REVISION_VERSION << 8) |
-                              (FW_ENGINEERING_VERSION);
+               drv_mb_param = STORM_FW_VERSION;
                rc = qed_mcp_cmd(p_hwfn, p_hwfn->p_main_ptt,
                                 DRV_MSG_CODE_OV_UPDATE_STORM_FW_VER,
                                 drv_mb_param, &load_code, &param);
@@ -1290,27 +1750,53 @@ void qed_hw_timers_stop_all(struct qed_dev *cdev)
 
 int qed_hw_stop(struct qed_dev *cdev)
 {
-       int rc = 0, t_rc;
+       struct qed_hwfn *p_hwfn;
+       struct qed_ptt *p_ptt;
+       int rc, rc2 = 0;
        int j;
 
        for_each_hwfn(cdev, j) {
-               struct qed_hwfn *p_hwfn = &cdev->hwfns[j];
-               struct qed_ptt *p_ptt = p_hwfn->p_main_ptt;
+               p_hwfn = &cdev->hwfns[j];
+               p_ptt = p_hwfn->p_main_ptt;
 
                DP_VERBOSE(p_hwfn, NETIF_MSG_IFDOWN, "Stopping hw/fw\n");
 
                if (IS_VF(cdev)) {
                        qed_vf_pf_int_cleanup(p_hwfn);
+                       rc = qed_vf_pf_reset(p_hwfn);
+                       if (rc) {
+                               DP_NOTICE(p_hwfn,
+                                         "qed_vf_pf_reset failed. rc = %d.\n",
+                                         rc);
+                               rc2 = -EINVAL;
+                       }
                        continue;
                }
 
                /* mark the hw as uninitialized... */
                p_hwfn->hw_init_done = false;
 
+               /* Send unload command to MCP */
+               rc = qed_mcp_unload_req(p_hwfn, p_ptt);
+               if (rc) {
+                       DP_NOTICE(p_hwfn,
+                                 "Failed sending a UNLOAD_REQ command. rc = %d.\n",
+                                 rc);
+                       rc2 = -EINVAL;
+               }
+
+               qed_slowpath_irq_sync(p_hwfn);
+
+               /* After this point no MFW attentions are expected, e.g. prevent
+                * race between pf stop and dcbx pf update.
+                */
                rc = qed_sp_pf_stop(p_hwfn);
-               if (rc)
+               if (rc) {
                        DP_NOTICE(p_hwfn,
-                                 "Failed to close PF against FW. Continue to stop HW to prevent illegal host access by the device\n");
+                                 "Failed to close PF against FW [rc = %d]. Continue to stop HW to prevent illegal host access by the device.\n",
+                                 rc);
+                       rc2 = -EINVAL;
+               }
 
                qed_wr(p_hwfn, p_ptt,
                       NIG_REG_RX_LLH_BRB_GATE_DNTFWD_PERPF, 0x1);
@@ -1333,20 +1819,37 @@ int qed_hw_stop(struct qed_dev *cdev)
 
                /* Need to wait 1ms to guarantee SBs are cleared */
                usleep_range(1000, 2000);
+
+               /* Disable PF in HW blocks */
+               qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_DB_ENABLE, 0);
+               qed_wr(p_hwfn, p_ptt, QM_REG_PF_EN, 0);
+
+               qed_mcp_unload_done(p_hwfn, p_ptt);
+               if (rc) {
+                       DP_NOTICE(p_hwfn,
+                                 "Failed sending a UNLOAD_DONE command. rc = %d.\n",
+                                 rc);
+                       rc2 = -EINVAL;
+               }
        }
 
        if (IS_PF(cdev)) {
+               p_hwfn = QED_LEADING_HWFN(cdev);
+               p_ptt = QED_LEADING_HWFN(cdev)->p_main_ptt;
+
                /* Disable DMAE in PXP - in CMT, this should only be done for
                 * first hw-function, and only after all transactions have
                 * stopped for all active hw-functions.
                 */
-               t_rc = qed_change_pci_hwfn(&cdev->hwfns[0],
-                                          cdev->hwfns[0].p_main_ptt, false);
-               if (t_rc != 0)
-                       rc = t_rc;
+               rc = qed_change_pci_hwfn(p_hwfn, p_ptt, false);
+               if (rc) {
+                       DP_NOTICE(p_hwfn,
+                                 "qed_change_pci_hwfn failed. rc = %d.\n", rc);
+                       rc2 = -EINVAL;
+               }
        }
 
-       return rc;
+       return rc2;
 }
 
 void qed_hw_stop_fastpath(struct qed_dev *cdev)
@@ -1374,104 +1877,21 @@ void qed_hw_stop_fastpath(struct qed_dev *cdev)
                qed_wr(p_hwfn, p_ptt, PRS_REG_SEARCH_ROCE, 0x0);
                qed_wr(p_hwfn, p_ptt, PRS_REG_SEARCH_OPENFLOW, 0x0);
 
-               qed_int_igu_init_pure_rt(p_hwfn, p_ptt, false, false);
-
-               /* Need to wait 1ms to guarantee SBs are cleared */
-               usleep_range(1000, 2000);
-       }
-}
-
-void qed_hw_start_fastpath(struct qed_hwfn *p_hwfn)
-{
-       if (IS_VF(p_hwfn->cdev))
-               return;
-
-       /* Re-open incoming traffic */
-       qed_wr(p_hwfn, p_hwfn->p_main_ptt,
-              NIG_REG_RX_LLH_BRB_GATE_DNTFWD_PERPF, 0x0);
-}
-
-static int qed_reg_assert(struct qed_hwfn *p_hwfn,
-                         struct qed_ptt *p_ptt, u32 reg, bool expected)
-{
-       u32 assert_val = qed_rd(p_hwfn, p_ptt, reg);
-
-       if (assert_val != expected) {
-               DP_NOTICE(p_hwfn, "Value at address 0x%08x != 0x%08x\n",
-                         reg, expected);
-               return -EINVAL;
-       }
-
-       return 0;
-}
-
-int qed_hw_reset(struct qed_dev *cdev)
-{
-       int rc = 0;
-       u32 unload_resp, unload_param;
-       u32 wol_param;
-       int i;
-
-       switch (cdev->wol_config) {
-       case QED_OV_WOL_DISABLED:
-               wol_param = DRV_MB_PARAM_UNLOAD_WOL_DISABLED;
-               break;
-       case QED_OV_WOL_ENABLED:
-               wol_param = DRV_MB_PARAM_UNLOAD_WOL_ENABLED;
-               break;
-       default:
-               DP_NOTICE(cdev,
-                         "Unknown WoL configuration %02x\n", cdev->wol_config);
-               /* Fallthrough */
-       case QED_OV_WOL_DEFAULT:
-               wol_param = DRV_MB_PARAM_UNLOAD_WOL_MCP;
-       }
-
-       for_each_hwfn(cdev, i) {
-               struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
-
-               if (IS_VF(cdev)) {
-                       rc = qed_vf_pf_reset(p_hwfn);
-                       if (rc)
-                               return rc;
-                       continue;
-               }
-
-               DP_VERBOSE(p_hwfn, NETIF_MSG_IFDOWN, "Resetting hw/fw\n");
-
-               /* Check for incorrect states */
-               qed_reg_assert(p_hwfn, p_hwfn->p_main_ptt,
-                              QM_REG_USG_CNT_PF_TX, 0);
-               qed_reg_assert(p_hwfn, p_hwfn->p_main_ptt,
-                              QM_REG_USG_CNT_PF_OTHER, 0);
-
-               /* Disable PF in HW blocks */
-               qed_wr(p_hwfn, p_hwfn->p_main_ptt, DORQ_REG_PF_DB_ENABLE, 0);
-               qed_wr(p_hwfn, p_hwfn->p_main_ptt, QM_REG_PF_EN, 0);
-               qed_wr(p_hwfn, p_hwfn->p_main_ptt,
-                      TCFC_REG_STRONG_ENABLE_PF, 0);
-               qed_wr(p_hwfn, p_hwfn->p_main_ptt,
-                      CCFC_REG_STRONG_ENABLE_PF, 0);
-
-               /* Send unload command to MCP */
-               rc = qed_mcp_cmd(p_hwfn, p_hwfn->p_main_ptt,
-                                DRV_MSG_CODE_UNLOAD_REQ, wol_param,
-                                &unload_resp, &unload_param);
-               if (rc) {
-                       DP_NOTICE(p_hwfn, "qed_hw_reset: UNLOAD_REQ failed\n");
-                       unload_resp = FW_MSG_CODE_DRV_UNLOAD_ENGINE;
-               }
+               qed_int_igu_init_pure_rt(p_hwfn, p_ptt, false, false);
 
-               rc = qed_mcp_cmd(p_hwfn, p_hwfn->p_main_ptt,
-                                DRV_MSG_CODE_UNLOAD_DONE,
-                                0, &unload_resp, &unload_param);
-               if (rc) {
-                       DP_NOTICE(p_hwfn, "qed_hw_reset: UNLOAD_DONE failed\n");
-                       return rc;
-               }
+               /* Need to wait 1ms to guarantee SBs are cleared */
+               usleep_range(1000, 2000);
        }
+}
 
-       return rc;
+void qed_hw_start_fastpath(struct qed_hwfn *p_hwfn)
+{
+       if (IS_VF(p_hwfn->cdev))
+               return;
+
+       /* Re-open incoming traffic */
+       qed_wr(p_hwfn, p_hwfn->p_main_ptt,
+              NIG_REG_RX_LLH_BRB_GATE_DNTFWD_PERPF, 0x0);
 }
 
 /* Free hwfn memory and resources acquired in hw_hwfn_prepare */
@@ -1485,10 +1905,25 @@ static void qed_hw_hwfn_free(struct qed_hwfn *p_hwfn)
 static void qed_hw_hwfn_prepare(struct qed_hwfn *p_hwfn)
 {
        /* clear indirect access */
-       qed_wr(p_hwfn, p_hwfn->p_main_ptt, PGLUE_B_REG_PGL_ADDR_88_F0, 0);
-       qed_wr(p_hwfn, p_hwfn->p_main_ptt, PGLUE_B_REG_PGL_ADDR_8C_F0, 0);
-       qed_wr(p_hwfn, p_hwfn->p_main_ptt, PGLUE_B_REG_PGL_ADDR_90_F0, 0);
-       qed_wr(p_hwfn, p_hwfn->p_main_ptt, PGLUE_B_REG_PGL_ADDR_94_F0, 0);
+       if (QED_IS_AH(p_hwfn->cdev)) {
+               qed_wr(p_hwfn, p_hwfn->p_main_ptt,
+                      PGLUE_B_REG_PGL_ADDR_E8_F0_K2, 0);
+               qed_wr(p_hwfn, p_hwfn->p_main_ptt,
+                      PGLUE_B_REG_PGL_ADDR_EC_F0_K2, 0);
+               qed_wr(p_hwfn, p_hwfn->p_main_ptt,
+                      PGLUE_B_REG_PGL_ADDR_F0_F0_K2, 0);
+               qed_wr(p_hwfn, p_hwfn->p_main_ptt,
+                      PGLUE_B_REG_PGL_ADDR_F4_F0_K2, 0);
+       } else {
+               qed_wr(p_hwfn, p_hwfn->p_main_ptt,
+                      PGLUE_B_REG_PGL_ADDR_88_F0_BB, 0);
+               qed_wr(p_hwfn, p_hwfn->p_main_ptt,
+                      PGLUE_B_REG_PGL_ADDR_8C_F0_BB, 0);
+               qed_wr(p_hwfn, p_hwfn->p_main_ptt,
+                      PGLUE_B_REG_PGL_ADDR_90_F0_BB, 0);
+               qed_wr(p_hwfn, p_hwfn->p_main_ptt,
+                      PGLUE_B_REG_PGL_ADDR_94_F0_BB, 0);
+       }
 
        /* Clean Previous errors if such exist */
        qed_wr(p_hwfn, p_hwfn->p_main_ptt,
@@ -1522,7 +1957,7 @@ static void qed_hw_set_feat(struct qed_hwfn *p_hwfn)
 {
        u32 *feat_num = p_hwfn->hw_info.feat_num;
        struct qed_sb_cnt_info sb_cnt_info;
-       int num_features = 1;
+       u32 non_l2_sbs = 0;
 
        if (IS_ENABLED(CONFIG_QED_RDMA) &&
            p_hwfn->hw_info.personality == QED_PCI_ETH_ROCE) {
@@ -1530,204 +1965,255 @@ static void qed_hw_set_feat(struct qed_hwfn *p_hwfn)
                 * the status blocks equally between L2 / RoCE but with
                 * consideration as to how many l2 queues / cnqs we have.
                 */
-               num_features++;
-
                feat_num[QED_RDMA_CNQ] =
-                       min_t(u32, RESC_NUM(p_hwfn, QED_SB) / num_features,
+                       min_t(u32, RESC_NUM(p_hwfn, QED_SB) / 2,
                              RESC_NUM(p_hwfn, QED_RDMA_CNQ_RAM));
-       }
 
-       feat_num[QED_PF_L2_QUE] = min_t(u32, RESC_NUM(p_hwfn, QED_SB) /
-                                               num_features,
-                                       RESC_NUM(p_hwfn, QED_L2_QUEUE));
+               non_l2_sbs = feat_num[QED_RDMA_CNQ];
+       }
 
-       memset(&sb_cnt_info, 0, sizeof(sb_cnt_info));
-       qed_int_get_num_sbs(p_hwfn, &sb_cnt_info);
-       feat_num[QED_VF_L2_QUE] =
-           min_t(u32,
-                 RESC_NUM(p_hwfn, QED_L2_QUEUE) -
-                 FEAT_NUM(p_hwfn, QED_PF_L2_QUE), sb_cnt_info.sb_iov_cnt);
+       if (p_hwfn->hw_info.personality == QED_PCI_ETH_ROCE ||
+           p_hwfn->hw_info.personality == QED_PCI_ETH) {
+               /* Start by allocating VF queues, then PF's */
+               memset(&sb_cnt_info, 0, sizeof(sb_cnt_info));
+               qed_int_get_num_sbs(p_hwfn, &sb_cnt_info);
+               feat_num[QED_VF_L2_QUE] = min_t(u32,
+                                               RESC_NUM(p_hwfn, QED_L2_QUEUE),
+                                               sb_cnt_info.sb_iov_cnt);
+               feat_num[QED_PF_L2_QUE] = min_t(u32,
+                                               RESC_NUM(p_hwfn, QED_SB) -
+                                               non_l2_sbs,
+                                               RESC_NUM(p_hwfn,
+                                                        QED_L2_QUEUE) -
+                                               FEAT_NUM(p_hwfn,
+                                                        QED_VF_L2_QUE));
+       }
 
        DP_VERBOSE(p_hwfn,
                   NETIF_MSG_PROBE,
-                  "#PF_L2_QUEUES=%d VF_L2_QUEUES=%d #ROCE_CNQ=%d #SBS=%d num_features=%d\n",
+                  "#PF_L2_QUEUES=%d VF_L2_QUEUES=%d #ROCE_CNQ=%d #SBS=%d\n",
                   (int)FEAT_NUM(p_hwfn, QED_PF_L2_QUE),
                   (int)FEAT_NUM(p_hwfn, QED_VF_L2_QUE),
                   (int)FEAT_NUM(p_hwfn, QED_RDMA_CNQ),
-                  RESC_NUM(p_hwfn, QED_SB), num_features);
+                  RESC_NUM(p_hwfn, QED_SB));
 }
 
-static enum resource_id_enum qed_hw_get_mfw_res_id(enum qed_resources res_id)
+const char *qed_hw_get_resc_name(enum qed_resources res_id)
 {
-       enum resource_id_enum mfw_res_id = RESOURCE_NUM_INVALID;
-
        switch (res_id) {
-       case QED_SB:
-               mfw_res_id = RESOURCE_NUM_SB_E;
-               break;
        case QED_L2_QUEUE:
-               mfw_res_id = RESOURCE_NUM_L2_QUEUE_E;
-               break;
+               return "L2_QUEUE";
        case QED_VPORT:
-               mfw_res_id = RESOURCE_NUM_VPORT_E;
-               break;
+               return "VPORT";
        case QED_RSS_ENG:
-               mfw_res_id = RESOURCE_NUM_RSS_ENGINES_E;
-               break;
+               return "RSS_ENG";
        case QED_PQ:
-               mfw_res_id = RESOURCE_NUM_PQ_E;
-               break;
+               return "PQ";
        case QED_RL:
-               mfw_res_id = RESOURCE_NUM_RL_E;
-               break;
+               return "RL";
        case QED_MAC:
+               return "MAC";
        case QED_VLAN:
-               /* Each VFC resource can accommodate both a MAC and a VLAN */
-               mfw_res_id = RESOURCE_VFC_FILTER_E;
-               break;
+               return "VLAN";
+       case QED_RDMA_CNQ_RAM:
+               return "RDMA_CNQ_RAM";
        case QED_ILT:
-               mfw_res_id = RESOURCE_ILT_E;
-               break;
+               return "ILT";
        case QED_LL2_QUEUE:
-               mfw_res_id = RESOURCE_LL2_QUEUE_E;
-               break;
-       case QED_RDMA_CNQ_RAM:
+               return "LL2_QUEUE";
        case QED_CMDQS_CQS:
-               /* CNQ/CMDQS are the same resource */
-               mfw_res_id = RESOURCE_CQS_E;
-               break;
+               return "CMDQS_CQS";
        case QED_RDMA_STATS_QUEUE:
-               mfw_res_id = RESOURCE_RDMA_STATS_QUEUE_E;
-               break;
+               return "RDMA_STATS_QUEUE";
+       case QED_BDQ:
+               return "BDQ";
+       case QED_SB:
+               return "SB";
        default:
-               break;
+               return "UNKNOWN_RESOURCE";
+       }
+}
+
+static int
+__qed_hw_set_soft_resc_size(struct qed_hwfn *p_hwfn,
+                           struct qed_ptt *p_ptt,
+                           enum qed_resources res_id,
+                           u32 resc_max_val, u32 *p_mcp_resp)
+{
+       int rc;
+
+       rc = qed_mcp_set_resc_max_val(p_hwfn, p_ptt, res_id,
+                                     resc_max_val, p_mcp_resp);
+       if (rc) {
+               DP_NOTICE(p_hwfn,
+                         "MFW response failure for a max value setting of resource %d [%s]\n",
+                         res_id, qed_hw_get_resc_name(res_id));
+               return rc;
+       }
+
+       if (*p_mcp_resp != FW_MSG_CODE_RESOURCE_ALLOC_OK)
+               DP_INFO(p_hwfn,
+                       "Failed to set the max value of resource %d [%s]. mcp_resp = 0x%08x.\n",
+                       res_id, qed_hw_get_resc_name(res_id), *p_mcp_resp);
+
+       return 0;
+}
+
+static int
+qed_hw_set_soft_resc_size(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+       bool b_ah = QED_IS_AH(p_hwfn->cdev);
+       u32 resc_max_val, mcp_resp;
+       u8 res_id;
+       int rc;
+
+       for (res_id = 0; res_id < QED_MAX_RESC; res_id++) {
+               switch (res_id) {
+               case QED_LL2_QUEUE:
+                       resc_max_val = MAX_NUM_LL2_RX_QUEUES;
+                       break;
+               case QED_RDMA_CNQ_RAM:
+                       /* No need for a case for QED_CMDQS_CQS since
+                        * CNQ/CMDQS are the same resource.
+                        */
+                       resc_max_val = NUM_OF_CMDQS_CQS;
+                       break;
+               case QED_RDMA_STATS_QUEUE:
+                       resc_max_val = b_ah ? RDMA_NUM_STATISTIC_COUNTERS_K2
+                           : RDMA_NUM_STATISTIC_COUNTERS_BB;
+                       break;
+               case QED_BDQ:
+                       resc_max_val = BDQ_NUM_RESOURCES;
+                       break;
+               default:
+                       continue;
+               }
+
+               rc = __qed_hw_set_soft_resc_size(p_hwfn, p_ptt, res_id,
+                                                resc_max_val, &mcp_resp);
+               if (rc)
+                       return rc;
+
+               /* There's no point to continue to the next resource if the
+                * command is not supported by the MFW.
+                * We do continue if the command is supported but the resource
+                * is unknown to the MFW. Such a resource will be later
+                * configured with the default allocation values.
+                */
+               if (mcp_resp == FW_MSG_CODE_UNSUPPORTED)
+                       return -EINVAL;
        }
 
-       return mfw_res_id;
+       return 0;
 }
 
-static u32 qed_hw_get_dflt_resc_num(struct qed_hwfn *p_hwfn,
-                                   enum qed_resources res_id)
+static
+int qed_hw_get_dflt_resc(struct qed_hwfn *p_hwfn,
+                        enum qed_resources res_id,
+                        u32 *p_resc_num, u32 *p_resc_start)
 {
        u8 num_funcs = p_hwfn->num_funcs_on_engine;
+       bool b_ah = QED_IS_AH(p_hwfn->cdev);
        struct qed_sb_cnt_info sb_cnt_info;
-       u32 dflt_resc_num = 0;
 
        switch (res_id) {
-       case QED_SB:
-               memset(&sb_cnt_info, 0, sizeof(sb_cnt_info));
-               qed_int_get_num_sbs(p_hwfn, &sb_cnt_info);
-               dflt_resc_num = sb_cnt_info.sb_cnt;
-               break;
        case QED_L2_QUEUE:
-               dflt_resc_num = MAX_NUM_L2_QUEUES_BB / num_funcs;
+               *p_resc_num = (b_ah ? MAX_NUM_L2_QUEUES_K2 :
+                              MAX_NUM_L2_QUEUES_BB) / num_funcs;
                break;
        case QED_VPORT:
-               dflt_resc_num = MAX_NUM_VPORTS_BB / num_funcs;
+               *p_resc_num = (b_ah ? MAX_NUM_VPORTS_K2 :
+                              MAX_NUM_VPORTS_BB) / num_funcs;
                break;
        case QED_RSS_ENG:
-               dflt_resc_num = ETH_RSS_ENGINE_NUM_BB / num_funcs;
+               *p_resc_num = (b_ah ? ETH_RSS_ENGINE_NUM_K2 :
+                              ETH_RSS_ENGINE_NUM_BB) / num_funcs;
                break;
        case QED_PQ:
-               /* The granularity of the PQs is 8 */
-               dflt_resc_num = MAX_QM_TX_QUEUES_BB / num_funcs;
-               dflt_resc_num &= ~0x7;
+               *p_resc_num = (b_ah ? MAX_QM_TX_QUEUES_K2 :
+                              MAX_QM_TX_QUEUES_BB) / num_funcs;
+               *p_resc_num &= ~0x7;    /* The granularity of the PQs is 8 */
                break;
        case QED_RL:
-               dflt_resc_num = MAX_QM_GLOBAL_RLS / num_funcs;
+               *p_resc_num = MAX_QM_GLOBAL_RLS / num_funcs;
                break;
        case QED_MAC:
        case QED_VLAN:
                /* Each VFC resource can accommodate both a MAC and a VLAN */
-               dflt_resc_num = ETH_NUM_MAC_FILTERS / num_funcs;
+               *p_resc_num = ETH_NUM_MAC_FILTERS / num_funcs;
                break;
        case QED_ILT:
-               dflt_resc_num = PXP_NUM_ILT_RECORDS_BB / num_funcs;
+               *p_resc_num = (b_ah ? PXP_NUM_ILT_RECORDS_K2 :
+                              PXP_NUM_ILT_RECORDS_BB) / num_funcs;
                break;
        case QED_LL2_QUEUE:
-               dflt_resc_num = MAX_NUM_LL2_RX_QUEUES / num_funcs;
+               *p_resc_num = MAX_NUM_LL2_RX_QUEUES / num_funcs;
                break;
        case QED_RDMA_CNQ_RAM:
        case QED_CMDQS_CQS:
                /* CNQ/CMDQS are the same resource */
-               dflt_resc_num = NUM_OF_CMDQS_CQS / num_funcs;
+               *p_resc_num = NUM_OF_CMDQS_CQS / num_funcs;
                break;
        case QED_RDMA_STATS_QUEUE:
-               dflt_resc_num = RDMA_NUM_STATISTIC_COUNTERS_BB / num_funcs;
+               *p_resc_num = (b_ah ? RDMA_NUM_STATISTIC_COUNTERS_K2 :
+                              RDMA_NUM_STATISTIC_COUNTERS_BB) / num_funcs;
                break;
-       default:
+       case QED_BDQ:
+               if (p_hwfn->hw_info.personality != QED_PCI_ISCSI &&
+                   p_hwfn->hw_info.personality != QED_PCI_FCOE)
+                       *p_resc_num = 0;
+               else
+                       *p_resc_num = 1;
+               break;
+       case QED_SB:
+               memset(&sb_cnt_info, 0, sizeof(sb_cnt_info));
+               qed_int_get_num_sbs(p_hwfn, &sb_cnt_info);
+               *p_resc_num = sb_cnt_info.sb_cnt;
                break;
+       default:
+               return -EINVAL;
        }
 
-       return dflt_resc_num;
-}
-
-static const char *qed_hw_get_resc_name(enum qed_resources res_id)
-{
        switch (res_id) {
-       case QED_SB:
-               return "SB";
-       case QED_L2_QUEUE:
-               return "L2_QUEUE";
-       case QED_VPORT:
-               return "VPORT";
-       case QED_RSS_ENG:
-               return "RSS_ENG";
-       case QED_PQ:
-               return "PQ";
-       case QED_RL:
-               return "RL";
-       case QED_MAC:
-               return "MAC";
-       case QED_VLAN:
-               return "VLAN";
-       case QED_RDMA_CNQ_RAM:
-               return "RDMA_CNQ_RAM";
-       case QED_ILT:
-               return "ILT";
-       case QED_LL2_QUEUE:
-               return "LL2_QUEUE";
-       case QED_CMDQS_CQS:
-               return "CMDQS_CQS";
-       case QED_RDMA_STATS_QUEUE:
-               return "RDMA_STATS_QUEUE";
+       case QED_BDQ:
+               if (!*p_resc_num)
+                       *p_resc_start = 0;
+               else if (p_hwfn->cdev->num_ports_in_engines == 4)
+                       *p_resc_start = p_hwfn->port_id;
+               else if (p_hwfn->hw_info.personality == QED_PCI_ISCSI)
+                       *p_resc_start = p_hwfn->port_id;
+               else if (p_hwfn->hw_info.personality == QED_PCI_FCOE)
+                       *p_resc_start = p_hwfn->port_id + 2;
+               break;
        default:
-               return "UNKNOWN_RESOURCE";
+               *p_resc_start = *p_resc_num * p_hwfn->enabled_func_idx;
+               break;
        }
+
+       return 0;
 }
 
-static int qed_hw_set_resc_info(struct qed_hwfn *p_hwfn,
-                               enum qed_resources res_id)
+static int __qed_hw_set_resc_info(struct qed_hwfn *p_hwfn,
+                                 enum qed_resources res_id)
 {
-       u32 dflt_resc_num = 0, dflt_resc_start = 0, mcp_resp, mcp_param;
-       u32 *p_resc_num, *p_resc_start;
-       struct resource_info resc_info;
+       u32 dflt_resc_num = 0, dflt_resc_start = 0;
+       u32 mcp_resp, *p_resc_num, *p_resc_start;
        int rc;
 
        p_resc_num = &RESC_NUM(p_hwfn, res_id);
        p_resc_start = &RESC_START(p_hwfn, res_id);
 
-       /* Default values assumes that each function received equal share */
-       dflt_resc_num = qed_hw_get_dflt_resc_num(p_hwfn, res_id);
-       if (!dflt_resc_num) {
+       rc = qed_hw_get_dflt_resc(p_hwfn, res_id, &dflt_resc_num,
+                                 &dflt_resc_start);
+       if (rc) {
                DP_ERR(p_hwfn,
                       "Failed to get default amount for resource %d [%s]\n",
                       res_id, qed_hw_get_resc_name(res_id));
-               return -EINVAL;
-       }
-       dflt_resc_start = dflt_resc_num * p_hwfn->enabled_func_idx;
-
-       memset(&resc_info, 0, sizeof(resc_info));
-       resc_info.res_id = qed_hw_get_mfw_res_id(res_id);
-       if (resc_info.res_id == RESOURCE_NUM_INVALID) {
-               DP_ERR(p_hwfn,
-                      "Failed to match resource %d [%s] with the MFW resources\n",
-                      res_id, qed_hw_get_resc_name(res_id));
-               return -EINVAL;
+               return rc;
        }
 
-       rc = qed_mcp_get_resc_info(p_hwfn, p_hwfn->p_main_ptt, &resc_info,
-                                  &mcp_resp, &mcp_param);
+       rc = qed_mcp_get_resc_info(p_hwfn, p_hwfn->p_main_ptt, res_id,
+                                  &mcp_resp, p_resc_num, p_resc_start);
        if (rc) {
                DP_NOTICE(p_hwfn,
                          "MFW response failure for an allocation request for resource %d [%s]\n",
@@ -1740,13 +2226,12 @@ static int qed_hw_set_resc_info(struct qed_hwfn *p_hwfn,
         * - There is an internal error in the MFW while processing the request
         * - The resource ID is unknown to the MFW
         */
-       if (mcp_resp != FW_MSG_CODE_RESOURCE_ALLOC_OK &&
-           mcp_resp != FW_MSG_CODE_RESOURCE_ALLOC_DEPRECATED) {
-               DP_NOTICE(p_hwfn,
-                         "Resource %d [%s]: No allocation info was received [mcp_resp 0x%x]. Applying default values [num %d, start %d].\n",
-                         res_id,
-                         qed_hw_get_resc_name(res_id),
-                         mcp_resp, dflt_resc_num, dflt_resc_start);
+       if (mcp_resp != FW_MSG_CODE_RESOURCE_ALLOC_OK) {
+               DP_INFO(p_hwfn,
+                       "Failed to receive allocation info for resource %d [%s]. mcp_resp = 0x%x. Applying default values [%d,%d].\n",
+                       res_id,
+                       qed_hw_get_resc_name(res_id),
+                       mcp_resp, dflt_resc_num, dflt_resc_start);
                *p_resc_num = dflt_resc_num;
                *p_resc_start = dflt_resc_start;
                goto out;
@@ -1754,13 +2239,9 @@ static int qed_hw_set_resc_info(struct qed_hwfn *p_hwfn,
 
        /* Special handling for status blocks; Would be revised in future */
        if (res_id == QED_SB) {
-               resc_info.size -= 1;
-               resc_info.offset -= p_hwfn->enabled_func_idx;
+               *p_resc_num -= 1;
+               *p_resc_start -= p_hwfn->enabled_func_idx;
        }
-
-       *p_resc_num = resc_info.size;
-       *p_resc_start = resc_info.offset;
-
 out:
        /* PQs have to divide by 8 [that's the HW granularity].
         * Reduce number so it would fit.
@@ -1778,19 +2259,88 @@ out:
        return 0;
 }
 
-static int qed_hw_get_resc(struct qed_hwfn *p_hwfn)
+static int qed_hw_set_resc_info(struct qed_hwfn *p_hwfn)
 {
-       u8 res_id;
        int rc;
+       u8 res_id;
 
        for (res_id = 0; res_id < QED_MAX_RESC; res_id++) {
-               rc = qed_hw_set_resc_info(p_hwfn, res_id);
+               rc = __qed_hw_set_resc_info(p_hwfn, res_id);
                if (rc)
                        return rc;
        }
 
+       return 0;
+}
+
+#define QED_RESC_ALLOC_LOCK_RETRY_CNT           10
+#define QED_RESC_ALLOC_LOCK_RETRY_INTVL_US      10000  /* 10 msec */
+
+static int qed_hw_get_resc(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+       struct qed_resc_unlock_params resc_unlock_params;
+       struct qed_resc_lock_params resc_lock_params;
+       bool b_ah = QED_IS_AH(p_hwfn->cdev);
+       u8 res_id;
+       int rc;
+
+       /* Setting the max values of the soft resources and the following
+        * resources allocation queries should be atomic. Since several PFs can
+        * run in parallel - a resource lock is needed.
+        * If either the resource lock or resource set value commands are not
+        * supported - skip the the max values setting, release the lock if
+        * needed, and proceed to the queries. Other failures, including a
+        * failure to acquire the lock, will cause this function to fail.
+        */
+       memset(&resc_lock_params, 0, sizeof(resc_lock_params));
+       resc_lock_params.resource = QED_RESC_LOCK_RESC_ALLOC;
+       resc_lock_params.retry_num = QED_RESC_ALLOC_LOCK_RETRY_CNT;
+       resc_lock_params.retry_interval = QED_RESC_ALLOC_LOCK_RETRY_INTVL_US;
+       resc_lock_params.sleep_b4_retry = true;
+       memset(&resc_unlock_params, 0, sizeof(resc_unlock_params));
+       resc_unlock_params.resource = QED_RESC_LOCK_RESC_ALLOC;
+
+       rc = qed_mcp_resc_lock(p_hwfn, p_ptt, &resc_lock_params);
+       if (rc && rc != -EINVAL) {
+               return rc;
+       } else if (rc == -EINVAL) {
+               DP_INFO(p_hwfn,
+                       "Skip the max values setting of the soft resources since the resource lock is not supported by the MFW\n");
+       } else if (!rc && !resc_lock_params.b_granted) {
+               DP_NOTICE(p_hwfn,
+                         "Failed to acquire the resource lock for the resource allocation commands\n");
+               return -EBUSY;
+       } else {
+               rc = qed_hw_set_soft_resc_size(p_hwfn, p_ptt);
+               if (rc && rc != -EINVAL) {
+                       DP_NOTICE(p_hwfn,
+                                 "Failed to set the max values of the soft resources\n");
+                       goto unlock_and_exit;
+               } else if (rc == -EINVAL) {
+                       DP_INFO(p_hwfn,
+                               "Skip the max values setting of the soft resources since it is not supported by the MFW\n");
+                       rc = qed_mcp_resc_unlock(p_hwfn, p_ptt,
+                                                &resc_unlock_params);
+                       if (rc)
+                               DP_INFO(p_hwfn,
+                                       "Failed to release the resource lock for the resource allocation commands\n");
+               }
+       }
+
+       rc = qed_hw_set_resc_info(p_hwfn);
+       if (rc)
+               goto unlock_and_exit;
+
+       if (resc_lock_params.b_granted && !resc_unlock_params.b_released) {
+               rc = qed_mcp_resc_unlock(p_hwfn, p_ptt, &resc_unlock_params);
+               if (rc)
+                       DP_INFO(p_hwfn,
+                               "Failed to release the resource lock for the resource allocation commands\n");
+       }
+
        /* Sanity for ILT */
-       if ((RESC_END(p_hwfn, QED_ILT) > PXP_NUM_ILT_RECORDS_BB)) {
+       if ((b_ah && (RESC_END(p_hwfn, QED_ILT) > PXP_NUM_ILT_RECORDS_K2)) ||
+           (!b_ah && (RESC_END(p_hwfn, QED_ILT) > PXP_NUM_ILT_RECORDS_BB))) {
                DP_NOTICE(p_hwfn, "Can't assign ILT pages [%08x,...,%08x]\n",
                          RESC_START(p_hwfn, QED_ILT),
                          RESC_END(p_hwfn, QED_ILT) - 1);
@@ -1799,8 +2349,6 @@ static int qed_hw_get_resc(struct qed_hwfn *p_hwfn)
 
        qed_hw_set_feat(p_hwfn);
 
-       DP_VERBOSE(p_hwfn, NETIF_MSG_PROBE,
-                  "The numbers for each resource are:\n");
        for (res_id = 0; res_id < QED_MAX_RESC; res_id++)
                DP_VERBOSE(p_hwfn, NETIF_MSG_PROBE, "%s = %d start = %d\n",
                           qed_hw_get_resc_name(res_id),
@@ -1808,6 +2356,11 @@ static int qed_hw_get_resc(struct qed_hwfn *p_hwfn)
                           RESC_START(p_hwfn, res_id));
 
        return 0;
+
+unlock_and_exit:
+       if (resc_lock_params.b_granted && !resc_unlock_params.b_released)
+               qed_mcp_resc_unlock(p_hwfn, p_ptt, &resc_unlock_params);
+       return rc;
 }
 
 static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
@@ -1860,9 +2413,15 @@ static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
        case NVM_CFG1_GLOB_NETWORK_PORT_MODE_2X25G:
                p_hwfn->hw_info.port_mode = QED_PORT_MODE_DE_2X25G;
                break;
+       case NVM_CFG1_GLOB_NETWORK_PORT_MODE_2X10G:
+               p_hwfn->hw_info.port_mode = QED_PORT_MODE_DE_2X10G;
+               break;
        case NVM_CFG1_GLOB_NETWORK_PORT_MODE_1X25G:
                p_hwfn->hw_info.port_mode = QED_PORT_MODE_DE_1X25G;
                break;
+       case NVM_CFG1_GLOB_NETWORK_PORT_MODE_4X25G:
+               p_hwfn->hw_info.port_mode = QED_PORT_MODE_DE_4X25G;
+               break;
        default:
                DP_NOTICE(p_hwfn, "Unknown port mode in 0x%08x\n", core_cfg);
                break;
@@ -1976,8 +2535,9 @@ static void qed_get_num_funcs(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
        u8 num_funcs, enabled_func_idx = p_hwfn->rel_pf_id;
        u32 reg_function_hide, tmp, eng_mask, low_pfs_mask;
+       struct qed_dev *cdev = p_hwfn->cdev;
 
-       num_funcs = MAX_NUM_PFS_BB;
+       num_funcs = QED_IS_AH(cdev) ? MAX_NUM_PFS_K2 : MAX_NUM_PFS_BB;
 
        /* Bit 0 of MISCS_REG_FUNCTION_HIDE indicates whether the bypass values
         * in the other bits are selected.
@@ -1990,12 +2550,17 @@ static void qed_get_num_funcs(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
        reg_function_hide = qed_rd(p_hwfn, p_ptt, MISCS_REG_FUNCTION_HIDE);
 
        if (reg_function_hide & 0x1) {
-               if (QED_PATH_ID(p_hwfn) && p_hwfn->cdev->num_hwfns == 1) {
-                       num_funcs = 0;
-                       eng_mask = 0xaaaa;
+               if (QED_IS_BB(cdev)) {
+                       if (QED_PATH_ID(p_hwfn) && cdev->num_hwfns == 1) {
+                               num_funcs = 0;
+                               eng_mask = 0xaaaa;
+                       } else {
+                               num_funcs = 1;
+                               eng_mask = 0x5554;
+                       }
                } else {
                        num_funcs = 1;
-                       eng_mask = 0x5554;
+                       eng_mask = 0xfffe;
                }
 
                /* Get the number of the enabled functions on the engine */
@@ -2027,24 +2592,12 @@ static void qed_get_num_funcs(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
                   p_hwfn->enabled_func_idx, p_hwfn->num_funcs_on_engine);
 }
 
-static int
-qed_get_hw_info(struct qed_hwfn *p_hwfn,
-               struct qed_ptt *p_ptt,
-               enum qed_pci_personality personality)
+static void qed_hw_info_port_num_bb(struct qed_hwfn *p_hwfn,
+                                   struct qed_ptt *p_ptt)
 {
        u32 port_mode;
-       int rc;
-
-       /* Since all information is common, only first hwfns should do this */
-       if (IS_LEAD_HWFN(p_hwfn)) {
-               rc = qed_iov_hw_info(p_hwfn);
-               if (rc)
-                       return rc;
-       }
 
-       /* Read the port mode */
-       port_mode = qed_rd(p_hwfn, p_ptt,
-                          CNIG_REG_NW_PORT_MODE_BB_B0);
+       port_mode = qed_rd(p_hwfn, p_ptt, CNIG_REG_NW_PORT_MODE_BB_B0);
 
        if (port_mode < 3) {
                p_hwfn->cdev->num_ports_in_engines = 1;
@@ -2057,6 +2610,54 @@ qed_get_hw_info(struct qed_hwfn *p_hwfn,
                /* Default num_ports_in_engines to something */
                p_hwfn->cdev->num_ports_in_engines = 1;
        }
+}
+
+static void qed_hw_info_port_num_ah(struct qed_hwfn *p_hwfn,
+                                   struct qed_ptt *p_ptt)
+{
+       u32 port;
+       int i;
+
+       p_hwfn->cdev->num_ports_in_engines = 0;
+
+       for (i = 0; i < MAX_NUM_PORTS_K2; i++) {
+               port = qed_rd(p_hwfn, p_ptt,
+                             CNIG_REG_NIG_PORT0_CONF_K2 + (i * 4));
+               if (port & 1)
+                       p_hwfn->cdev->num_ports_in_engines++;
+       }
+
+       if (!p_hwfn->cdev->num_ports_in_engines) {
+               DP_NOTICE(p_hwfn, "All NIG ports are inactive\n");
+
+               /* Default num_ports_in_engine to something */
+               p_hwfn->cdev->num_ports_in_engines = 1;
+       }
+}
+
+static void qed_hw_info_port_num(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+       if (QED_IS_BB(p_hwfn->cdev))
+               qed_hw_info_port_num_bb(p_hwfn, p_ptt);
+       else
+               qed_hw_info_port_num_ah(p_hwfn, p_ptt);
+}
+
+static int
+qed_get_hw_info(struct qed_hwfn *p_hwfn,
+               struct qed_ptt *p_ptt,
+               enum qed_pci_personality personality)
+{
+       int rc;
+
+       /* Since all information is common, only first hwfns should do this */
+       if (IS_LEAD_HWFN(p_hwfn)) {
+               rc = qed_iov_hw_info(p_hwfn);
+               if (rc)
+                       return rc;
+       }
+
+       qed_hw_info_port_num(p_hwfn, p_ptt);
 
        qed_hw_get_nvm_info(p_hwfn, p_ptt);
 
@@ -2085,30 +2686,47 @@ qed_get_hw_info(struct qed_hwfn *p_hwfn,
                p_hwfn->hw_info.personality = protocol;
        }
 
+       p_hwfn->hw_info.num_hw_tc = NUM_PHYS_TCS_4PORT_K2;
+       p_hwfn->hw_info.num_active_tc = 1;
+
        qed_get_num_funcs(p_hwfn, p_ptt);
 
        if (qed_mcp_is_init(p_hwfn))
                p_hwfn->hw_info.mtu = p_hwfn->mcp_info->func_info.mtu;
 
-       return qed_hw_get_resc(p_hwfn);
+       return qed_hw_get_resc(p_hwfn, p_ptt);
 }
 
 static int qed_get_dev_info(struct qed_dev *cdev)
 {
        struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
+       u16 device_id_mask;
        u32 tmp;
 
        /* Read Vendor Id / Device Id */
        pci_read_config_word(cdev->pdev, PCI_VENDOR_ID, &cdev->vendor_id);
        pci_read_config_word(cdev->pdev, PCI_DEVICE_ID, &cdev->device_id);
 
+       /* Determine type */
+       device_id_mask = cdev->device_id & QED_DEV_ID_MASK;
+       switch (device_id_mask) {
+       case QED_DEV_ID_MASK_BB:
+               cdev->type = QED_DEV_TYPE_BB;
+               break;
+       case QED_DEV_ID_MASK_AH:
+               cdev->type = QED_DEV_TYPE_AH;
+               break;
+       default:
+               DP_NOTICE(p_hwfn, "Unknown device id 0x%x\n", cdev->device_id);
+               return -EBUSY;
+       }
+
        cdev->chip_num = (u16)qed_rd(p_hwfn, p_hwfn->p_main_ptt,
                                     MISCS_REG_CHIP_NUM);
        cdev->chip_rev = (u16)qed_rd(p_hwfn, p_hwfn->p_main_ptt,
                                     MISCS_REG_CHIP_REV);
        MASK_FIELD(CHIP_REV, cdev->chip_rev);
 
-       cdev->type = QED_DEV_TYPE_BB;
        /* Learn number of HW-functions */
        tmp = qed_rd(p_hwfn, p_hwfn->p_main_ptt,
                     MISCS_REG_CMT_ENABLED_FOR_PAIR);
@@ -2128,7 +2746,10 @@ static int qed_get_dev_info(struct qed_dev *cdev)
        MASK_FIELD(CHIP_METAL, cdev->chip_metal);
 
        DP_INFO(cdev->hwfns,
-               "Chip details - Num: %04x Rev: %04x Bond id: %04x Metal: %04x\n",
+               "Chip details - %s %c%d, Num: %04x Rev: %04x Bond id: %04x Metal: %04x\n",
+               QED_IS_BB(cdev) ? "BB" : "AH",
+               'A' + cdev->chip_rev,
+               (int)cdev->chip_metal,
                cdev->chip_num, cdev->chip_rev,
                cdev->chip_bond_id, cdev->chip_metal);
 
@@ -2195,6 +2816,15 @@ static int qed_hw_prepare_single(struct qed_hwfn *p_hwfn,
                goto err2;
        }
 
+       /* Sending a mailbox to the MFW should be done after qed_get_hw_info()
+        * is called as it sets the ports number in an engine.
+        */
+       if (IS_LEAD_HWFN(p_hwfn)) {
+               rc = qed_mcp_initiate_pf_flr(p_hwfn, p_hwfn->p_main_ptt);
+               if (rc)
+                       DP_NOTICE(p_hwfn, "Failed to initiate PF FLR\n");
+       }
+
        /* Allocate the init RT array and initialize the init-ops engine */
        rc = qed_init_alloc(p_hwfn);
        if (rc)
@@ -3363,3 +3993,8 @@ void qed_clean_wfq_db(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
        memset(p_hwfn->qm_info.wfq_data, 0,
               sizeof(*p_hwfn->qm_info.wfq_data) * p_hwfn->qm_info.num_vports);
 }
+
+int qed_device_num_engines(struct qed_dev *cdev)
+{
+       return QED_IS_BB(cdev) ? 2 : 1;
+}
index 6812003411cdc9870a6508de198a962e4bd1e68e..2c6637fd7ef6c64ce2d691310b5a19311c18080a 100644 (file)
@@ -82,26 +82,63 @@ int qed_resc_alloc(struct qed_dev *cdev);
  */
 void qed_resc_setup(struct qed_dev *cdev);
 
+enum qed_override_force_load {
+       QED_OVERRIDE_FORCE_LOAD_NONE,
+       QED_OVERRIDE_FORCE_LOAD_ALWAYS,
+       QED_OVERRIDE_FORCE_LOAD_NEVER,
+};
+
+struct qed_drv_load_params {
+       /* Indicates whether the driver is running over a crash kernel.
+        * As part of the load request, this will be used for providing the
+        * driver role to the MFW.
+        * In case of a crash kernel over PDA - this should be set to false.
+        */
+       bool is_crash_kernel;
+
+       /* The timeout value that the MFW should use when locking the engine for
+        * the driver load process.
+        * A value of '0' means the default value, and '255' means no timeout.
+        */
+       u8 mfw_timeout_val;
+#define QED_LOAD_REQ_LOCK_TO_DEFAULT    0
+#define QED_LOAD_REQ_LOCK_TO_NONE       255
+
+       /* Avoid engine reset when first PF loads on it */
+       bool avoid_eng_reset;
+
+       /* Allow overriding the default force load behavior */
+       enum qed_override_force_load override_force_load;
+};
+
+struct qed_hw_init_params {
+       /* Tunneling parameters */
+       struct qed_tunn_start_params *p_tunn;
+
+       bool b_hw_start;
+
+       /* Interrupt mode [msix, inta, etc.] to use */
+       enum qed_int_mode int_mode;
+
+       /* NPAR tx switching to be used for vports for tx-switching */
+       bool allow_npar_tx_switch;
+
+       /* Binary fw data pointer in binary fw file */
+       const u8 *bin_fw_data;
+
+       /* Driver load parameters */
+       struct qed_drv_load_params *p_drv_load_params;
+};
+
 /**
  * @brief qed_hw_init -
  *
  * @param cdev
- * @param p_tunn
- * @param b_hw_start
- * @param int_mode - interrupt mode [msix, inta, etc.] to use.
- * @param allow_npar_tx_switch - npar tx switching to be used
- *       for vports configured for tx-switching.
- * @param bin_fw_data - binary fw data pointer in binary fw file.
- *                     Pass NULL if not using binary fw file.
+ * @param p_params
  *
  * @return int
  */
-int qed_hw_init(struct qed_dev *cdev,
-               struct qed_tunn_start_params *p_tunn,
-               bool b_hw_start,
-               enum qed_int_mode int_mode,
-               bool allow_npar_tx_switch,
-               const u8 *bin_fw_data);
+int qed_hw_init(struct qed_dev *cdev, struct qed_hw_init_params *p_params);
 
 /**
  * @brief qed_hw_timers_stop_all - stop the timers HW block
@@ -140,14 +177,6 @@ void qed_hw_stop_fastpath(struct qed_dev *cdev);
  */
 void qed_hw_start_fastpath(struct qed_hwfn *p_hwfn);
 
-/**
- * @brief qed_hw_reset -
- *
- * @param cdev
- *
- * @return int
- */
-int qed_hw_reset(struct qed_dev *cdev);
 
 /**
  * @brief qed_hw_prepare -
@@ -441,4 +470,6 @@ int qed_set_rxq_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
  */
 int qed_set_txq_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
                         u16 coalesce, u8 qid, u16 sb_id);
+
+const char *qed_hw_get_resc_name(enum qed_resources res_id);
 #endif
index cbc81412174f9e1ea07a763a90072348e883699f..f4b95345d1a583602fdb235e8486a765cb162e0d 100644 (file)
@@ -191,7 +191,7 @@ qed_sp_fcoe_func_start(struct qed_hwfn *p_hwfn,
        p_data->q_params.cq_sb_pi = fcoe_pf_params->gl_rq_pi;
        p_data->q_params.cmdq_sb_pi = fcoe_pf_params->gl_cmd_pi;
 
-       p_data->q_params.bdq_resource_id = FCOE_BDQ_ID(p_hwfn->port_id);
+       p_data->q_params.bdq_resource_id = (u8)RESC_START(p_hwfn, QED_BDQ);
 
        DMA_REGPAIR_LE(p_data->q_params.bdq_pbl_base_address[BDQ_ID_RQ],
                       fcoe_pf_params->bdq_pbl_base_addr[BDQ_ID_RQ]);
@@ -241,7 +241,7 @@ qed_sp_fcoe_conn_offload(struct qed_hwfn *p_hwfn,
        struct fcoe_conn_offload_ramrod_data *p_data;
        struct qed_spq_entry *p_ent = NULL;
        struct qed_sp_init_data init_data;
-       u16 pq_id = 0, tmp;
+       u16 physical_q0, tmp;
        int rc;
 
        /* Get SPQ entry */
@@ -261,9 +261,9 @@ qed_sp_fcoe_conn_offload(struct qed_hwfn *p_hwfn,
        p_data = &p_ramrod->offload_ramrod_data;
 
        /* Transmission PQ is the first of the PF */
-       pq_id = qed_get_qm_pq(p_hwfn, PROTOCOLID_FCOE, NULL);
-       p_conn->physical_q0 = cpu_to_le16(pq_id);
-       p_data->physical_q0 = cpu_to_le16(pq_id);
+       physical_q0 = qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_OFLD);
+       p_conn->physical_q0 = cpu_to_le16(physical_q0);
+       p_data->physical_q0 = cpu_to_le16(physical_q0);
 
        p_data->conn_id = cpu_to_le16(p_conn->conn_id);
        DMA_REGPAIR_LE(p_data->sq_pbl_addr, p_conn->sq_pbl_addr);
@@ -512,19 +512,31 @@ static void __iomem *qed_fcoe_get_db_addr(struct qed_hwfn *p_hwfn, u32 cid)
 static void __iomem *qed_fcoe_get_primary_bdq_prod(struct qed_hwfn *p_hwfn,
                                                   u8 bdq_id)
 {
-       u8 bdq_function_id = FCOE_BDQ_ID(p_hwfn->port_id);
-
-       return (u8 __iomem *)p_hwfn->regview + GTT_BAR0_MAP_REG_MSDM_RAM +
-              MSTORM_SCSI_BDQ_EXT_PROD_OFFSET(bdq_function_id, bdq_id);
+       if (RESC_NUM(p_hwfn, QED_BDQ)) {
+               return (u8 __iomem *)p_hwfn->regview +
+                      GTT_BAR0_MAP_REG_MSDM_RAM +
+                      MSTORM_SCSI_BDQ_EXT_PROD_OFFSET(RESC_START(p_hwfn,
+                                                                 QED_BDQ),
+                                                      bdq_id);
+       } else {
+               DP_NOTICE(p_hwfn, "BDQ is not allocated!\n");
+               return NULL;
+       }
 }
 
 static void __iomem *qed_fcoe_get_secondary_bdq_prod(struct qed_hwfn *p_hwfn,
                                                     u8 bdq_id)
 {
-       u8 bdq_function_id = FCOE_BDQ_ID(p_hwfn->port_id);
-
-       return (u8 __iomem *)p_hwfn->regview + GTT_BAR0_MAP_REG_TSDM_RAM +
-              TSTORM_SCSI_BDQ_EXT_PROD_OFFSET(bdq_function_id, bdq_id);
+       if (RESC_NUM(p_hwfn, QED_BDQ)) {
+               return (u8 __iomem *)p_hwfn->regview +
+                      GTT_BAR0_MAP_REG_TSDM_RAM +
+                      TSTORM_SCSI_BDQ_EXT_PROD_OFFSET(RESC_START(p_hwfn,
+                                                                 QED_BDQ),
+                                                      bdq_id);
+       } else {
+               DP_NOTICE(p_hwfn, "BDQ is not allocated!\n");
+               return NULL;
+       }
 }
 
 struct qed_fcoe_info *qed_fcoe_alloc(struct qed_hwfn *p_hwfn)
index 37c2bfb663bb481c3d7241e8ccfc614d389d7535..815c4ec5b458c1f03e29f4cd68b0d55f29502887 100644 (file)
@@ -574,6 +574,7 @@ enum core_event_opcode {
        CORE_EVENT_TX_QUEUE_STOP,
        CORE_EVENT_RX_QUEUE_START,
        CORE_EVENT_RX_QUEUE_STOP,
+       CORE_EVENT_RX_QUEUE_FLUSH,
        MAX_CORE_EVENT_OPCODE
 };
 
@@ -625,6 +626,7 @@ enum core_ramrod_cmd_id {
        CORE_RAMROD_TX_QUEUE_START,
        CORE_RAMROD_RX_QUEUE_STOP,
        CORE_RAMROD_TX_QUEUE_STOP,
+       CORE_RAMROD_RX_QUEUE_FLUSH,
        MAX_CORE_RAMROD_CMD_ID
 };
 
@@ -698,7 +700,8 @@ struct core_rx_slow_path_cqe {
        u8 type;
        u8 ramrod_cmd_id;
        __le16 echo;
-       __le32 reserved1[7];
+       struct core_rx_cqe_opaque_data opaque_data;
+       __le32 reserved1[5];
 };
 
 union core_rx_cqe_union {
@@ -735,45 +738,46 @@ struct core_rx_stop_ramrod_data {
        __le16 reserved2[2];
 };
 
-struct core_tx_bd_flags {
-       u8 as_bitfield;
-#define CORE_TX_BD_FLAGS_FORCE_VLAN_MODE_MASK  0x1
-#define CORE_TX_BD_FLAGS_FORCE_VLAN_MODE_SHIFT 0
-#define CORE_TX_BD_FLAGS_VLAN_INSERTION_MASK   0x1
-#define CORE_TX_BD_FLAGS_VLAN_INSERTION_SHIFT  1
-#define CORE_TX_BD_FLAGS_START_BD_MASK 0x1
-#define CORE_TX_BD_FLAGS_START_BD_SHIFT        2
-#define CORE_TX_BD_FLAGS_IP_CSUM_MASK  0x1
-#define CORE_TX_BD_FLAGS_IP_CSUM_SHIFT 3
-#define CORE_TX_BD_FLAGS_L4_CSUM_MASK  0x1
-#define CORE_TX_BD_FLAGS_L4_CSUM_SHIFT 4
-#define CORE_TX_BD_FLAGS_IPV6_EXT_MASK 0x1
-#define CORE_TX_BD_FLAGS_IPV6_EXT_SHIFT        5
-#define CORE_TX_BD_FLAGS_L4_PROTOCOL_MASK      0x1
-#define CORE_TX_BD_FLAGS_L4_PROTOCOL_SHIFT     6
-#define CORE_TX_BD_FLAGS_L4_PSEUDO_CSUM_MODE_MASK      0x1
-#define CORE_TX_BD_FLAGS_L4_PSEUDO_CSUM_MODE_SHIFT 7
+struct core_tx_bd_data {
+       __le16 as_bitfield;
+#define CORE_TX_BD_DATA_FORCE_VLAN_MODE_MASK   0x1
+#define CORE_TX_BD_DATA_FORCE_VLAN_MODE_SHIFT     0
+#define CORE_TX_BD_DATA_VLAN_INSERTION_MASK    0x1
+#define CORE_TX_BD_DATA_VLAN_INSERTION_SHIFT      1
+#define CORE_TX_BD_DATA_START_BD_MASK  0x1
+#define CORE_TX_BD_DATA_START_BD_SHIFT            2
+#define CORE_TX_BD_DATA_IP_CSUM_MASK   0x1
+#define CORE_TX_BD_DATA_IP_CSUM_SHIFT             3
+#define CORE_TX_BD_DATA_L4_CSUM_MASK   0x1
+#define CORE_TX_BD_DATA_L4_CSUM_SHIFT             4
+#define CORE_TX_BD_DATA_IPV6_EXT_MASK  0x1
+#define CORE_TX_BD_DATA_IPV6_EXT_SHIFT            5
+#define CORE_TX_BD_DATA_L4_PROTOCOL_MASK       0x1
+#define CORE_TX_BD_DATA_L4_PROTOCOL_SHIFT         6
+#define CORE_TX_BD_DATA_L4_PSEUDO_CSUM_MODE_MASK       0x1
+#define CORE_TX_BD_DATA_L4_PSEUDO_CSUM_MODE_SHIFT 7
+#define CORE_TX_BD_DATA_NBDS_MASK      0xF
+#define CORE_TX_BD_DATA_NBDS_SHIFT                8
+#define CORE_TX_BD_DATA_ROCE_FLAV_MASK 0x1
+#define CORE_TX_BD_DATA_ROCE_FLAV_SHIFT           12
+#define CORE_TX_BD_DATA_IP_LEN_MASK    0x1
+#define CORE_TX_BD_DATA_IP_LEN_SHIFT              13
+#define CORE_TX_BD_DATA_RESERVED0_MASK            0x3
+#define CORE_TX_BD_DATA_RESERVED0_SHIFT           14
 };
 
 struct core_tx_bd {
        struct regpair addr;
        __le16 nbytes;
        __le16 nw_vlan_or_lb_echo;
-       u8 bitfield0;
-#define CORE_TX_BD_NBDS_MASK   0xF
-#define CORE_TX_BD_NBDS_SHIFT  0
-#define CORE_TX_BD_ROCE_FLAV_MASK      0x1
-#define CORE_TX_BD_ROCE_FLAV_SHIFT     4
-#define CORE_TX_BD_RESERVED0_MASK      0x7
-#define CORE_TX_BD_RESERVED0_SHIFT     5
-       struct core_tx_bd_flags bd_flags;
+       struct core_tx_bd_data bd_data;
        __le16 bitfield1;
 #define CORE_TX_BD_L4_HDR_OFFSET_W_MASK        0x3FFF
 #define CORE_TX_BD_L4_HDR_OFFSET_W_SHIFT 0
 #define CORE_TX_BD_TX_DST_MASK 0x1
 #define CORE_TX_BD_TX_DST_SHIFT        14
-#define CORE_TX_BD_RESERVED1_MASK      0x1
-#define CORE_TX_BD_RESERVED1_SHIFT     15
+#define CORE_TX_BD_RESERVED_MASK         0x1
+#define CORE_TX_BD_RESERVED_SHIFT        15
 };
 
 enum core_tx_dest {
@@ -800,6 +804,14 @@ struct core_tx_stop_ramrod_data {
        __le32 reserved0[2];
 };
 
+enum dcb_dhcp_update_flag {
+       DONT_UPDATE_DCB_DHCP,
+       UPDATE_DCB,
+       UPDATE_DSCP,
+       UPDATE_DCB_DSCP,
+       MAX_DCB_DHCP_UPDATE_FLAG
+};
+
 struct eth_mstorm_per_pf_stat {
        struct regpair gre_discard_pkts;
        struct regpair vxlan_discard_pkts;
@@ -893,6 +905,12 @@ union event_ring_element {
        struct event_ring_next_addr next_addr;
 };
 
+enum fw_flow_ctrl_mode {
+       flow_ctrl_pause,
+       flow_ctrl_pfc,
+       MAX_FW_FLOW_CTRL_MODE
+};
+
 /* Major and Minor hsi Versions */
 struct hsi_fp_ver_struct {
        u8 minor_ver_arr[2];
@@ -921,6 +939,7 @@ enum malicious_vf_error_id {
        ETH_EDPM_OUT_OF_SYNC,
        ETH_TUNN_IPV6_EXT_NBD_ERR,
        ETH_CONTROL_PACKET_VIOLATION,
+       ETH_ANTI_SPOOFING_ERR,
        MAX_MALICIOUS_VF_ERROR_ID
 };
 
@@ -1106,8 +1125,9 @@ struct tstorm_per_port_stat {
        struct regpair ll2_mac_filter_discard;
        struct regpair ll2_conn_disabled_discard;
        struct regpair iscsi_irregular_pkt;
-       struct regpair reserved;
+       struct regpair fcoe_irregular_pkt;
        struct regpair roce_irregular_pkt;
+       struct regpair reserved;
        struct regpair eth_irregular_pkt;
        struct regpair reserved1;
        struct regpair preroce_irregular_pkt;
@@ -1648,6 +1668,11 @@ enum block_addr {
        GRCBASE_MS = 0x6a0000,
        GRCBASE_PHY_PCIE = 0x620000,
        GRCBASE_LED = 0x6b8000,
+       GRCBASE_AVS_WRAP = 0x6b0000,
+       GRCBASE_RGFS = 0x19d0000,
+       GRCBASE_TGFS = 0x19e0000,
+       GRCBASE_PTLD = 0x19f0000,
+       GRCBASE_YPLD = 0x1a10000,
        GRCBASE_MISC_AEU = 0x8000,
        GRCBASE_BAR0_MAP = 0x1c00000,
        MAX_BLOCK_ADDR
@@ -1732,6 +1757,11 @@ enum block_id {
        BLOCK_MS,
        BLOCK_PHY_PCIE,
        BLOCK_LED,
+       BLOCK_AVS_WRAP,
+       BLOCK_RGFS,
+       BLOCK_TGFS,
+       BLOCK_PTLD,
+       BLOCK_YPLD,
        BLOCK_MISC_AEU,
        BLOCK_BAR0_MAP,
        MAX_BLOCK_ID
@@ -1783,9 +1813,9 @@ struct dbg_attn_reg_result {
        __le32 data;
 #define DBG_ATTN_REG_RESULT_STS_ADDRESS_MASK   0xFFFFFF
 #define DBG_ATTN_REG_RESULT_STS_ADDRESS_SHIFT  0
-#define DBG_ATTN_REG_RESULT_NUM_ATTN_IDX_MASK  0xFF
-#define DBG_ATTN_REG_RESULT_NUM_ATTN_IDX_SHIFT 24
-       __le16 attn_idx_offset;
+#define DBG_ATTN_REG_RESULT_NUM_REG_ATTN_MASK  0xFF
+#define DBG_ATTN_REG_RESULT_NUM_REG_ATTN_SHIFT 24
+       __le16 block_attn_offset;
        __le16 reserved;
        __le32 sts_val;
        __le32 mask_val;
@@ -1815,12 +1845,12 @@ struct dbg_mode_hdr {
 /* Attention register */
 struct dbg_attn_reg {
        struct dbg_mode_hdr mode;
-       __le16 attn_idx_offset;
+       __le16 block_attn_offset;
        __le32 data;
 #define DBG_ATTN_REG_STS_ADDRESS_MASK  0xFFFFFF
 #define DBG_ATTN_REG_STS_ADDRESS_SHIFT 0
-#define DBG_ATTN_REG_NUM_ATTN_IDX_MASK 0xFF
-#define DBG_ATTN_REG_NUM_ATTN_IDX_SHIFT        24
+#define DBG_ATTN_REG_NUM_REG_ATTN_MASK 0xFF
+#define DBG_ATTN_REG_NUM_REG_ATTN_SHIFT 24
        __le32 sts_clr_address;
        __le32 mask_address;
 };
@@ -2001,6 +2031,20 @@ enum dbg_bus_clients {
        MAX_DBG_BUS_CLIENTS
 };
 
+enum dbg_bus_constraint_ops {
+       DBG_BUS_CONSTRAINT_OP_EQ,
+       DBG_BUS_CONSTRAINT_OP_NE,
+       DBG_BUS_CONSTRAINT_OP_LT,
+       DBG_BUS_CONSTRAINT_OP_LTC,
+       DBG_BUS_CONSTRAINT_OP_LE,
+       DBG_BUS_CONSTRAINT_OP_LEC,
+       DBG_BUS_CONSTRAINT_OP_GT,
+       DBG_BUS_CONSTRAINT_OP_GTC,
+       DBG_BUS_CONSTRAINT_OP_GE,
+       DBG_BUS_CONSTRAINT_OP_GEC,
+       MAX_DBG_BUS_CONSTRAINT_OPS
+};
+
 /* Debug Bus memory address */
 struct dbg_bus_mem_addr {
        __le32 lo;
@@ -2092,10 +2136,18 @@ struct dbg_bus_data {
                                              * DBG_BUS_TARGET_ID_PCI.
                                              */
        __le16 reserved;
-       struct dbg_bus_block_data blocks[80];/* Debug Bus data for each block */
+       struct dbg_bus_block_data blocks[88];/* Debug Bus data for each block */
        struct dbg_bus_storm_data storms[6]; /* Debug Bus data for each block */
 };
 
+enum dbg_bus_filter_types {
+       DBG_BUS_FILTER_TYPE_OFF,
+       DBG_BUS_FILTER_TYPE_PRE,
+       DBG_BUS_FILTER_TYPE_POST,
+       DBG_BUS_FILTER_TYPE_ON,
+       MAX_DBG_BUS_FILTER_TYPES
+};
+
 /* Debug bus frame modes */
 enum dbg_bus_frame_modes {
        DBG_BUS_FRAME_MODE_0HW_4ST = 0, /* 0 HW dwords, 4 Storm dwords */
@@ -2104,6 +2156,40 @@ enum dbg_bus_frame_modes {
        MAX_DBG_BUS_FRAME_MODES
 };
 
+enum dbg_bus_input_types {
+       DBG_BUS_INPUT_TYPE_STORM,
+       DBG_BUS_INPUT_TYPE_BLOCK,
+       MAX_DBG_BUS_INPUT_TYPES
+};
+
+enum dbg_bus_other_engine_modes {
+       DBG_BUS_OTHER_ENGINE_MODE_NONE,
+       DBG_BUS_OTHER_ENGINE_MODE_DOUBLE_BW_TX,
+       DBG_BUS_OTHER_ENGINE_MODE_DOUBLE_BW_RX,
+       DBG_BUS_OTHER_ENGINE_MODE_CROSS_ENGINE_TX,
+       DBG_BUS_OTHER_ENGINE_MODE_CROSS_ENGINE_RX,
+       MAX_DBG_BUS_OTHER_ENGINE_MODES
+};
+
+enum dbg_bus_post_trigger_types {
+       DBG_BUS_POST_TRIGGER_RECORD,
+       DBG_BUS_POST_TRIGGER_DROP,
+       MAX_DBG_BUS_POST_TRIGGER_TYPES
+};
+
+enum dbg_bus_pre_trigger_types {
+       DBG_BUS_PRE_TRIGGER_START_FROM_ZERO,
+       DBG_BUS_PRE_TRIGGER_NUM_CHUNKS,
+       DBG_BUS_PRE_TRIGGER_DROP,
+       MAX_DBG_BUS_PRE_TRIGGER_TYPES
+};
+
+enum dbg_bus_semi_frame_modes {
+       DBG_BUS_SEMI_FRAME_MODE_0SLOW_4FAST = 0,
+       DBG_BUS_SEMI_FRAME_MODE_4SLOW_0FAST = 3,
+       MAX_DBG_BUS_SEMI_FRAME_MODES
+};
+
 /* Debug bus states */
 enum dbg_bus_states {
        DBG_BUS_STATE_IDLE, /* debug bus idle state (not recording) */
@@ -2115,6 +2201,19 @@ enum dbg_bus_states {
        MAX_DBG_BUS_STATES
 };
 
+enum dbg_bus_storm_modes {
+       DBG_BUS_STORM_MODE_PRINTF,
+       DBG_BUS_STORM_MODE_PRAM_ADDR,
+       DBG_BUS_STORM_MODE_DRA_RW,
+       DBG_BUS_STORM_MODE_DRA_W,
+       DBG_BUS_STORM_MODE_LD_ST_ADDR,
+       DBG_BUS_STORM_MODE_DRA_FSM,
+       DBG_BUS_STORM_MODE_RH,
+       DBG_BUS_STORM_MODE_FOC,
+       DBG_BUS_STORM_MODE_EXT_STORE,
+       MAX_DBG_BUS_STORM_MODES
+};
+
 /* Debug bus target IDs */
 enum dbg_bus_targets {
        /* records debug bus to DBG block internal buffer */
@@ -2128,13 +2227,10 @@ enum dbg_bus_targets {
 
 /* GRC Dump data */
 struct dbg_grc_data {
-       __le32 param_val[40]; /* Value of each GRC parameter. Array size must
-                              * match the enum dbg_grc_params.
-                              */
-       u8 param_set_by_user[40]; /* Indicates for each GRC parameter if it was
-                                  * set by the user (0/1). Array size must
-                                  * match the enum dbg_grc_params.
-                                  */
+       u8 params_initialized;
+       u8 reserved1;
+       __le16 reserved2;
+       __le32 param_val[48];
 };
 
 /* Debug GRC params */
@@ -2181,6 +2277,8 @@ enum dbg_grc_params {
        DBG_GRC_PARAM_PARITY_SAFE,
        DBG_GRC_PARAM_DUMP_CM, /* dump CM memories (0/1) */
        DBG_GRC_PARAM_DUMP_PHY, /* dump PHY memories (0/1) */
+       DBG_GRC_PARAM_NO_MCP,
+       DBG_GRC_PARAM_NO_FW_VER,
        MAX_DBG_GRC_PARAMS
 };
 
@@ -2280,7 +2378,7 @@ struct dbg_tools_data {
        struct dbg_bus_data bus; /* Debug Bus data */
        struct idle_chk_data idle_chk; /* Idle Check data */
        u8 mode_enable[40]; /* Indicates if a mode is enabled (0/1) */
-       u8 block_in_reset[80]; /* Indicates if a block is in reset state (0/1).
+       u8 block_in_reset[88]; /* Indicates if a block is in reset state (0/1).
                                */
        u8 chip_id; /* Chip ID (from enum chip_ids) */
        u8 platform_id; /* Platform ID (from enum platform_ids) */
@@ -2404,7 +2502,7 @@ struct fw_info_location {
 
 enum init_modes {
        MODE_RESERVED,
-       MODE_BB_B0,
+       MODE_BB,
        MODE_K2,
        MODE_ASIC,
        MODE_RESERVED2,
@@ -2418,7 +2516,6 @@ enum init_modes {
        MODE_PORTS_PER_ENG_2,
        MODE_PORTS_PER_ENG_4,
        MODE_100G,
-       MODE_40G,
        MODE_RESERVED6,
        MAX_INIT_MODES
 };
@@ -2685,6 +2782,13 @@ struct iro {
  * @param bin_ptr - a pointer to the binary data with debug arrays.
  */
 enum dbg_status qed_dbg_set_bin_ptr(const u8 * const bin_ptr);
+/**
+ * @brief qed_dbg_grc_set_params_default - Reverts all GRC parameters to their
+ *     default value.
+ *
+ * @param p_hwfn               - HW device data
+ */
+void qed_dbg_grc_set_params_default(struct qed_hwfn *p_hwfn);
 /**
  * @brief qed_dbg_grc_get_dump_buf_size - Returns the required buffer size for
  *     GRC Dump.
@@ -3418,7 +3522,7 @@ void qed_set_geneve_enable(struct qed_hwfn *p_hwfn,
 #define        MSTORM_TPA_TIMEOUT_US_SIZE                      (IRO[21].size)
 #define        MSTORM_ETH_PF_STAT_OFFSET(pf_id) \
        (IRO[22].base + ((pf_id) * IRO[22].m1))
-#define        MSTORM_ETH_PF_STAT_SIZE                         (IRO[21].size)
+#define        MSTORM_ETH_PF_STAT_SIZE                         (IRO[22].size)
 #define        USTORM_QUEUE_STAT_OFFSET(stat_counter_id) \
        (IRO[23].base + ((stat_counter_id) * IRO[23].m1))
 #define        USTORM_QUEUE_STAT_SIZE                          (IRO[23].size)
@@ -3482,7 +3586,7 @@ void qed_set_geneve_enable(struct qed_hwfn *p_hwfn,
 
 static const struct iro iro_arr[47] = {
        {0x0, 0x0, 0x0, 0x0, 0x8},
-       {0x4cb0, 0x78, 0x0, 0x0, 0x78},
+       {0x4cb0, 0x80, 0x0, 0x0, 0x80},
        {0x6318, 0x20, 0x0, 0x0, 0x20},
        {0xb00, 0x8, 0x0, 0x0, 0x4},
        {0xa80, 0x8, 0x0, 0x0, 0x4},
@@ -3521,13 +3625,13 @@ static const struct iro iro_arr[47] = {
        {0xd888, 0x38, 0x0, 0x0, 0x24},
        {0x12c38, 0x10, 0x0, 0x0, 0x8},
        {0x11aa0, 0x38, 0x0, 0x0, 0x18},
-       {0xa8c0, 0x30, 0x0, 0x0, 0x10},
-       {0x86f8, 0x28, 0x0, 0x0, 0x18},
+       {0xa8c0, 0x38, 0x0, 0x0, 0x10},
+       {0x86f8, 0x30, 0x0, 0x0, 0x18},
        {0x101f8, 0x10, 0x0, 0x0, 0x10},
        {0xdd08, 0x48, 0x0, 0x0, 0x38},
        {0x10660, 0x20, 0x0, 0x0, 0x20},
        {0x2b80, 0x80, 0x0, 0x0, 0x10},
-       {0x5000, 0x10, 0x0, 0x0, 0x10},
+       {0x5020, 0x10, 0x0, 0x0, 0x10},
 };
 
 /* Runtime array offsets */
@@ -4595,6 +4699,12 @@ enum eth_ipv4_frag_type {
        MAX_ETH_IPV4_FRAG_TYPE
 };
 
+enum eth_ip_type {
+       ETH_IPV4,
+       ETH_IPV6,
+       MAX_ETH_IP_TYPE
+};
+
 enum eth_ramrod_cmd_id {
        ETH_RAMROD_UNUSED,
        ETH_RAMROD_VPORT_START,
@@ -4944,7 +5054,10 @@ struct vport_update_ramrod_data_cmn {
        u8 update_mtu_flg;
 
        __le16 mtu;
-       u8 reserved[2];
+       u8 update_ctl_frame_checks_en_flg;
+       u8 ctl_frame_mac_check_en;
+       u8 ctl_frame_ethtype_check_en;
+       u8 reserved[15];
 };
 
 struct vport_update_ramrod_mcast {
@@ -4962,6 +5075,492 @@ struct vport_update_ramrod_data {
        struct eth_vport_rss_config rss_config;
 };
 
+struct mstorm_eth_conn_ag_ctx {
+       u8 byte0;
+       u8 byte1;
+       u8 flags0;
+#define MSTORM_ETH_CONN_AG_CTX_EXIST_IN_QM0_MASK       0x1
+#define MSTORM_ETH_CONN_AG_CTX_EXIST_IN_QM0_SHIFT 0
+#define MSTORM_ETH_CONN_AG_CTX_BIT1_MASK       0x1
+#define MSTORM_ETH_CONN_AG_CTX_BIT1_SHIFT         1
+#define MSTORM_ETH_CONN_AG_CTX_CF0_MASK        0x3
+#define MSTORM_ETH_CONN_AG_CTX_CF0_SHIFT          2
+#define MSTORM_ETH_CONN_AG_CTX_CF1_MASK        0x3
+#define MSTORM_ETH_CONN_AG_CTX_CF1_SHIFT          4
+#define MSTORM_ETH_CONN_AG_CTX_CF2_MASK        0x3
+#define MSTORM_ETH_CONN_AG_CTX_CF2_SHIFT          6
+       u8 flags1;
+#define MSTORM_ETH_CONN_AG_CTX_CF0EN_MASK      0x1
+#define MSTORM_ETH_CONN_AG_CTX_CF0EN_SHIFT        0
+#define MSTORM_ETH_CONN_AG_CTX_CF1EN_MASK      0x1
+#define MSTORM_ETH_CONN_AG_CTX_CF1EN_SHIFT        1
+#define MSTORM_ETH_CONN_AG_CTX_CF2EN_MASK      0x1
+#define MSTORM_ETH_CONN_AG_CTX_CF2EN_SHIFT        2
+#define MSTORM_ETH_CONN_AG_CTX_RULE0EN_MASK    0x1
+#define MSTORM_ETH_CONN_AG_CTX_RULE0EN_SHIFT      3
+#define MSTORM_ETH_CONN_AG_CTX_RULE1EN_MASK    0x1
+#define MSTORM_ETH_CONN_AG_CTX_RULE1EN_SHIFT      4
+#define MSTORM_ETH_CONN_AG_CTX_RULE2EN_MASK    0x1
+#define MSTORM_ETH_CONN_AG_CTX_RULE2EN_SHIFT      5
+#define MSTORM_ETH_CONN_AG_CTX_RULE3EN_MASK    0x1
+#define MSTORM_ETH_CONN_AG_CTX_RULE3EN_SHIFT      6
+#define MSTORM_ETH_CONN_AG_CTX_RULE4EN_MASK    0x1
+#define MSTORM_ETH_CONN_AG_CTX_RULE4EN_SHIFT      7
+       __le16 word0;
+       __le16 word1;
+       __le32 reg0;
+       __le32 reg1;
+};
+
+struct xstorm_eth_conn_agctxdq_ext_ldpart {
+       u8 reserved0;
+       u8 eth_state;
+       u8 flags0;
+#define XSTORMETHCONNAGCTXDQEXTLDPART_EXIST_IN_QM0_MASK        0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_EXIST_IN_QM0_SHIFT           0
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RESERVED1_MASK   0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RESERVED1_SHIFT              1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RESERVED2_MASK   0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RESERVED2_SHIFT              2
+#define XSTORMETHCONNAGCTXDQEXTLDPART_EXIST_IN_QM3_MASK        0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_EXIST_IN_QM3_SHIFT           3
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RESERVED3_MASK   0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RESERVED3_SHIFT              4
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RESERVED4_MASK   0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RESERVED4_SHIFT              5
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RESERVED5_MASK   0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RESERVED5_SHIFT              6
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RESERVED6_MASK   0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RESERVED6_SHIFT              7
+       u8 flags1;
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RESERVED7_MASK   0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RESERVED7_SHIFT              0
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RESERVED8_MASK   0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RESERVED8_SHIFT              1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RESERVED9_MASK   0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RESERVED9_SHIFT              2
+#define XSTORMETHCONNAGCTXDQEXTLDPART_BIT11_MASK       0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_BIT11_SHIFT                  3
+#define XSTORMETHCONNAGCTXDQEXTLDPART_BIT12_MASK       0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_BIT12_SHIFT                  4
+#define XSTORMETHCONNAGCTXDQEXTLDPART_BIT13_MASK       0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_BIT13_SHIFT                  5
+#define XSTORMETHCONNAGCTXDQEXTLDPART_TX_RULE_ACTIVE_MASK      0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_TX_RULE_ACTIVE_SHIFT         6
+#define XSTORMETHCONNAGCTXDQEXTLDPART_DQ_CF_ACTIVE_MASK        0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_DQ_CF_ACTIVE_SHIFT           7
+       u8 flags2;
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF0_MASK 0x3
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF0_SHIFT                    0
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF1_MASK 0x3
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF1_SHIFT                    2
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF2_MASK 0x3
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF2_SHIFT                    4
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF3_MASK 0x3
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF3_SHIFT                    6
+       u8 flags3;
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF4_MASK 0x3
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF4_SHIFT                    0
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF5_MASK 0x3
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF5_SHIFT                    2
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF6_MASK 0x3
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF6_SHIFT                    4
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF7_MASK 0x3
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF7_SHIFT                    6
+       u8 flags4;
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF8_MASK 0x3
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF8_SHIFT                    0
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF9_MASK 0x3
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF9_SHIFT                    2
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF10_MASK        0x3
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF10_SHIFT                   4
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF11_MASK        0x3
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF11_SHIFT                   6
+       u8 flags5;
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF12_MASK        0x3
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF12_SHIFT                   0
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF13_MASK        0x3
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF13_SHIFT                   2
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF14_MASK        0x3
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF14_SHIFT                   4
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF15_MASK        0x3
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF15_SHIFT                   6
+       u8 flags6;
+#define XSTORMETHCONNAGCTXDQEXTLDPART_GO_TO_BD_CONS_CF_MASK    0x3
+#define XSTORMETHCONNAGCTXDQEXTLDPART_GO_TO_BD_CONS_CF_SHIFT       0
+#define XSTORMETHCONNAGCTXDQEXTLDPART_MULTI_UNICAST_CF_MASK    0x3
+#define XSTORMETHCONNAGCTXDQEXTLDPART_MULTI_UNICAST_CF_SHIFT       2
+#define XSTORMETHCONNAGCTXDQEXTLDPART_DQ_CF_MASK       0x3
+#define XSTORMETHCONNAGCTXDQEXTLDPART_DQ_CF_SHIFT                  4
+#define XSTORMETHCONNAGCTXDQEXTLDPART_TERMINATE_CF_MASK        0x3
+#define XSTORMETHCONNAGCTXDQEXTLDPART_TERMINATE_CF_SHIFT           6
+       u8 flags7;
+#define XSTORMETHCONNAGCTXDQEXTLDPART_FLUSH_Q0_MASK    0x3
+#define XSTORMETHCONNAGCTXDQEXTLDPART_FLUSH_Q0_SHIFT               0
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RESERVED10_MASK  0x3
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RESERVED10_SHIFT             2
+#define XSTORMETHCONNAGCTXDQEXTLDPART_SLOW_PATH_MASK   0x3
+#define XSTORMETHCONNAGCTXDQEXTLDPART_SLOW_PATH_SHIFT              4
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF0EN_MASK       0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF0EN_SHIFT                  6
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF1EN_MASK       0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF1EN_SHIFT                  7
+       u8 flags8;
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF2EN_MASK       0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF2EN_SHIFT                  0
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF3EN_MASK       0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF3EN_SHIFT                  1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF4EN_MASK       0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF4EN_SHIFT                  2
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF5EN_MASK       0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF5EN_SHIFT                  3
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF6EN_MASK       0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF6EN_SHIFT                  4
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF7EN_MASK       0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF7EN_SHIFT                  5
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF8EN_MASK       0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF8EN_SHIFT                  6
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF9EN_MASK       0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF9EN_SHIFT                  7
+       u8 flags9;
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF10EN_MASK      0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF10EN_SHIFT                 0
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF11EN_MASK      0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF11EN_SHIFT                 1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF12EN_MASK      0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF12EN_SHIFT                 2
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF13EN_MASK      0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF13EN_SHIFT                 3
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF14EN_MASK      0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF14EN_SHIFT                 4
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF15EN_MASK      0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_CF15EN_SHIFT                 5
+#define XSTORMETHCONNAGCTXDQEXTLDPART_GO_TO_BD_CONS_CF_EN_MASK 0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_GO_TO_BD_CONS_CF_EN_SHIFT    6
+#define XSTORMETHCONNAGCTXDQEXTLDPART_MULTI_UNICAST_CF_EN_MASK 0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_MULTI_UNICAST_CF_EN_SHIFT    7
+       u8 flags10;
+#define XSTORMETHCONNAGCTXDQEXTLDPART_DQ_CF_EN_MASK    0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_DQ_CF_EN_SHIFT               0
+#define XSTORMETHCONNAGCTXDQEXTLDPART_TERMINATE_CF_EN_MASK     0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_TERMINATE_CF_EN_SHIFT        1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_FLUSH_Q0_EN_MASK 0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_FLUSH_Q0_EN_SHIFT            2
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RESERVED11_MASK  0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RESERVED11_SHIFT             3
+#define XSTORMETHCONNAGCTXDQEXTLDPART_SLOW_PATH_EN_MASK        0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_SLOW_PATH_EN_SHIFT           4
+#define XSTORMETHCONNAGCTXDQEXTLDPART_TPH_ENABLE_EN_RESERVED_MASK      0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_TPH_ENABLE_EN_RESERVED_SHIFT 5
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RESERVED12_MASK  0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RESERVED12_SHIFT             6
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RESERVED13_MASK  0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RESERVED13_SHIFT             7
+       u8 flags11;
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RESERVED14_MASK  0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RESERVED14_SHIFT             0
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RESERVED15_MASK  0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RESERVED15_SHIFT             1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_TX_DEC_RULE_EN_MASK      0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_TX_DEC_RULE_EN_SHIFT         2
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RULE5EN_MASK     0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RULE5EN_SHIFT                3
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RULE6EN_MASK     0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RULE6EN_SHIFT                4
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RULE7EN_MASK     0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RULE7EN_SHIFT                5
+#define XSTORMETHCONNAGCTXDQEXTLDPART_A0_RESERVED1_MASK        0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_A0_RESERVED1_SHIFT           6
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RULE9EN_MASK     0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RULE9EN_SHIFT                7
+       u8 flags12;
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RULE10EN_MASK    0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RULE10EN_SHIFT               0
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RULE11EN_MASK    0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RULE11EN_SHIFT               1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_A0_RESERVED2_MASK        0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_A0_RESERVED2_SHIFT           2
+#define XSTORMETHCONNAGCTXDQEXTLDPART_A0_RESERVED3_MASK        0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_A0_RESERVED3_SHIFT           3
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RULE14EN_MASK    0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RULE14EN_SHIFT               4
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RULE15EN_MASK    0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RULE15EN_SHIFT               5
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RULE16EN_MASK    0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RULE16EN_SHIFT               6
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RULE17EN_MASK    0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RULE17EN_SHIFT               7
+       u8 flags13;
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RULE18EN_MASK    0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RULE18EN_SHIFT               0
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RULE19EN_MASK    0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_RULE19EN_SHIFT               1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_A0_RESERVED4_MASK        0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_A0_RESERVED4_SHIFT           2
+#define XSTORMETHCONNAGCTXDQEXTLDPART_A0_RESERVED5_MASK        0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_A0_RESERVED5_SHIFT           3
+#define XSTORMETHCONNAGCTXDQEXTLDPART_A0_RESERVED6_MASK        0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_A0_RESERVED6_SHIFT           4
+#define XSTORMETHCONNAGCTXDQEXTLDPART_A0_RESERVED7_MASK        0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_A0_RESERVED7_SHIFT           5
+#define XSTORMETHCONNAGCTXDQEXTLDPART_A0_RESERVED8_MASK        0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_A0_RESERVED8_SHIFT           6
+#define XSTORMETHCONNAGCTXDQEXTLDPART_A0_RESERVED9_MASK        0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_A0_RESERVED9_SHIFT           7
+       u8 flags14;
+#define XSTORMETHCONNAGCTXDQEXTLDPART_EDPM_USE_EXT_HDR_MASK    0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_EDPM_USE_EXT_HDR_SHIFT       0
+#define XSTORMETHCONNAGCTXDQEXTLDPART_EDPM_SEND_RAW_L3L4_MASK  0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_EDPM_SEND_RAW_L3L4_SHIFT     1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_EDPM_INBAND_PROP_HDR_MASK        0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_EDPM_INBAND_PROP_HDR_SHIFT   2
+#define XSTORMETHCONNAGCTXDQEXTLDPART_EDPM_SEND_EXT_TUNNEL_MASK        0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_EDPM_SEND_EXT_TUNNEL_SHIFT   3
+#define XSTORMETHCONNAGCTXDQEXTLDPART_L2_EDPM_ENABLE_MASK      0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_L2_EDPM_ENABLE_SHIFT         4
+#define XSTORMETHCONNAGCTXDQEXTLDPART_ROCE_EDPM_ENABLE_MASK    0x1
+#define XSTORMETHCONNAGCTXDQEXTLDPART_ROCE_EDPM_ENABLE_SHIFT       5
+#define XSTORMETHCONNAGCTXDQEXTLDPART_TPH_ENABLE_MASK  0x3
+#define XSTORMETHCONNAGCTXDQEXTLDPART_TPH_ENABLE_SHIFT             6
+       u8 edpm_event_id;
+       __le16 physical_q0;
+       __le16 quota;
+       __le16 edpm_num_bds;
+       __le16 tx_bd_cons;
+       __le16 tx_bd_prod;
+       __le16 tx_class;
+       __le16 conn_dpi;
+       u8 byte3;
+       u8 byte4;
+       u8 byte5;
+       u8 byte6;
+       __le32 reg0;
+       __le32 reg1;
+       __le32 reg2;
+       __le32 reg3;
+       __le32 reg4;
+};
+
+struct xstorm_eth_hw_conn_ag_ctx {
+       u8 reserved0;
+       u8 eth_state;
+       u8 flags0;
+#define XSTORM_ETH_HW_CONN_AG_CTX_EXIST_IN_QM0_MASK    0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_EXIST_IN_QM0_SHIFT           0
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED1_MASK       0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED1_SHIFT              1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED2_MASK       0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED2_SHIFT              2
+#define XSTORM_ETH_HW_CONN_AG_CTX_EXIST_IN_QM3_MASK    0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_EXIST_IN_QM3_SHIFT           3
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED3_MASK       0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED3_SHIFT              4
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED4_MASK       0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED4_SHIFT              5
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED5_MASK       0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED5_SHIFT              6
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED6_MASK       0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED6_SHIFT              7
+       u8 flags1;
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED7_MASK       0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED7_SHIFT              0
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED8_MASK       0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED8_SHIFT              1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED9_MASK       0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED9_SHIFT              2
+#define XSTORM_ETH_HW_CONN_AG_CTX_BIT11_MASK   0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_BIT11_SHIFT                  3
+#define XSTORM_ETH_HW_CONN_AG_CTX_BIT12_MASK   0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_BIT12_SHIFT                  4
+#define XSTORM_ETH_HW_CONN_AG_CTX_BIT13_MASK   0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_BIT13_SHIFT                  5
+#define XSTORM_ETH_HW_CONN_AG_CTX_TX_RULE_ACTIVE_MASK  0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_TX_RULE_ACTIVE_SHIFT         6
+#define XSTORM_ETH_HW_CONN_AG_CTX_DQ_CF_ACTIVE_MASK    0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_DQ_CF_ACTIVE_SHIFT           7
+       u8 flags2;
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF0_MASK     0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF0_SHIFT                    0
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF1_MASK     0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF1_SHIFT                    2
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF2_MASK     0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF2_SHIFT                    4
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF3_MASK     0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF3_SHIFT                    6
+       u8 flags3;
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF4_MASK     0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF4_SHIFT                    0
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF5_MASK     0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF5_SHIFT                    2
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF6_MASK     0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF6_SHIFT                    4
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF7_MASK     0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF7_SHIFT                    6
+       u8 flags4;
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF8_MASK     0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF8_SHIFT                    0
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF9_MASK     0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF9_SHIFT                    2
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF10_MASK    0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF10_SHIFT                   4
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF11_MASK    0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF11_SHIFT                   6
+       u8 flags5;
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF12_MASK    0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF12_SHIFT                   0
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF13_MASK    0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF13_SHIFT                   2
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF14_MASK    0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF14_SHIFT                   4
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF15_MASK    0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF15_SHIFT                   6
+       u8 flags6;
+#define XSTORM_ETH_HW_CONN_AG_CTX_GO_TO_BD_CONS_CF_MASK        0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_GO_TO_BD_CONS_CF_SHIFT       0
+#define XSTORM_ETH_HW_CONN_AG_CTX_MULTI_UNICAST_CF_MASK        0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_MULTI_UNICAST_CF_SHIFT       2
+#define XSTORM_ETH_HW_CONN_AG_CTX_DQ_CF_MASK   0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_DQ_CF_SHIFT                  4
+#define XSTORM_ETH_HW_CONN_AG_CTX_TERMINATE_CF_MASK    0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_TERMINATE_CF_SHIFT           6
+       u8 flags7;
+#define XSTORM_ETH_HW_CONN_AG_CTX_FLUSH_Q0_MASK        0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_FLUSH_Q0_SHIFT               0
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED10_MASK      0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED10_SHIFT             2
+#define XSTORM_ETH_HW_CONN_AG_CTX_SLOW_PATH_MASK       0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_SLOW_PATH_SHIFT              4
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF0EN_MASK   0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF0EN_SHIFT                  6
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF1EN_MASK   0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF1EN_SHIFT                  7
+       u8 flags8;
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF2EN_MASK   0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF2EN_SHIFT                  0
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF3EN_MASK   0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF3EN_SHIFT                  1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF4EN_MASK   0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF4EN_SHIFT                  2
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF5EN_MASK   0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF5EN_SHIFT                  3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF6EN_MASK   0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF6EN_SHIFT                  4
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF7EN_MASK   0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF7EN_SHIFT                  5
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF8EN_MASK   0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF8EN_SHIFT                  6
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF9EN_MASK   0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF9EN_SHIFT                  7
+       u8 flags9;
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF10EN_MASK  0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF10EN_SHIFT                 0
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF11EN_MASK  0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF11EN_SHIFT                 1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF12EN_MASK  0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF12EN_SHIFT                 2
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF13EN_MASK  0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF13EN_SHIFT                 3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF14EN_MASK  0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF14EN_SHIFT                 4
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF15EN_MASK  0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF15EN_SHIFT                 5
+#define XSTORM_ETH_HW_CONN_AG_CTX_GO_TO_BD_CONS_CF_EN_MASK     0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_GO_TO_BD_CONS_CF_EN_SHIFT    6
+#define XSTORM_ETH_HW_CONN_AG_CTX_MULTI_UNICAST_CF_EN_MASK     0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_MULTI_UNICAST_CF_EN_SHIFT    7
+       u8 flags10;
+#define XSTORM_ETH_HW_CONN_AG_CTX_DQ_CF_EN_MASK        0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_DQ_CF_EN_SHIFT               0
+#define XSTORM_ETH_HW_CONN_AG_CTX_TERMINATE_CF_EN_MASK 0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_TERMINATE_CF_EN_SHIFT        1
+#define XSTORM_ETH_HW_CONN_AG_CTX_FLUSH_Q0_EN_MASK     0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_FLUSH_Q0_EN_SHIFT            2
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED11_MASK      0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED11_SHIFT             3
+#define XSTORM_ETH_HW_CONN_AG_CTX_SLOW_PATH_EN_MASK    0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_SLOW_PATH_EN_SHIFT           4
+#define XSTORM_ETH_HW_CONN_AG_CTX_TPH_ENABLE_EN_RESERVED_MASK  0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_TPH_ENABLE_EN_RESERVED_SHIFT 5
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED12_MASK      0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED12_SHIFT             6
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED13_MASK      0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED13_SHIFT             7
+       u8 flags11;
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED14_MASK      0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED14_SHIFT             0
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED15_MASK      0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED15_SHIFT             1
+#define XSTORM_ETH_HW_CONN_AG_CTX_TX_DEC_RULE_EN_MASK  0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_TX_DEC_RULE_EN_SHIFT         2
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE5EN_MASK 0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE5EN_SHIFT                3
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE6EN_MASK 0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE6EN_SHIFT                4
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE7EN_MASK 0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE7EN_SHIFT                5
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED1_MASK    0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED1_SHIFT           6
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE9EN_MASK 0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE9EN_SHIFT                7
+       u8 flags12;
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE10EN_MASK        0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE10EN_SHIFT               0
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE11EN_MASK        0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE11EN_SHIFT               1
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED2_MASK    0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED2_SHIFT           2
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED3_MASK    0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED3_SHIFT           3
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE14EN_MASK        0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE14EN_SHIFT               4
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE15EN_MASK        0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE15EN_SHIFT               5
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE16EN_MASK        0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE16EN_SHIFT               6
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE17EN_MASK        0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE17EN_SHIFT               7
+       u8 flags13;
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE18EN_MASK        0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE18EN_SHIFT               0
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE19EN_MASK        0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE19EN_SHIFT               1
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED4_MASK    0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED4_SHIFT           2
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED5_MASK    0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED5_SHIFT           3
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED6_MASK    0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED6_SHIFT           4
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED7_MASK    0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED7_SHIFT           5
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED8_MASK    0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED8_SHIFT           6
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED9_MASK    0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED9_SHIFT           7
+       u8 flags14;
+#define XSTORM_ETH_HW_CONN_AG_CTX_EDPM_USE_EXT_HDR_MASK        0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_EDPM_USE_EXT_HDR_SHIFT       0
+#define XSTORM_ETH_HW_CONN_AG_CTX_EDPM_SEND_RAW_L3L4_MASK      0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_EDPM_SEND_RAW_L3L4_SHIFT     1
+#define XSTORM_ETH_HW_CONN_AG_CTX_EDPM_INBAND_PROP_HDR_MASK    0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_EDPM_INBAND_PROP_HDR_SHIFT   2
+#define XSTORM_ETH_HW_CONN_AG_CTX_EDPM_SEND_EXT_TUNNEL_MASK    0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_EDPM_SEND_EXT_TUNNEL_SHIFT   3
+#define XSTORM_ETH_HW_CONN_AG_CTX_L2_EDPM_ENABLE_MASK  0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_L2_EDPM_ENABLE_SHIFT         4
+#define XSTORM_ETH_HW_CONN_AG_CTX_ROCE_EDPM_ENABLE_MASK        0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_ROCE_EDPM_ENABLE_SHIFT       5
+#define XSTORM_ETH_HW_CONN_AG_CTX_TPH_ENABLE_MASK      0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_TPH_ENABLE_SHIFT             6
+       u8 edpm_event_id;
+       __le16 physical_q0;
+       __le16 quota;
+       __le16 edpm_num_bds;
+       __le16 tx_bd_cons;
+       __le16 tx_bd_prod;
+       __le16 tx_class;
+       __le16 conn_dpi;
+};
+
 struct mstorm_rdma_task_st_ctx {
        struct regpair temp[4];
 };
@@ -6165,7 +6764,7 @@ struct ystorm_roce_conn_st_ctx {
 };
 
 struct xstorm_roce_conn_st_ctx {
-       struct regpair temp[22];
+       struct regpair temp[24];
 };
 
 struct tstorm_roce_conn_st_ctx {
@@ -6220,7 +6819,7 @@ struct roce_create_qp_req_ramrod_data {
        __le16 mtu;
        __le16 pd;
        __le16 sq_num_pages;
-       __le16 reseved2;
+       __le16 low_latency_phy_queue;
        struct regpair sq_pbl_addr;
        struct regpair orq_pbl_addr;
        __le16 local_mac_addr[3];
@@ -6234,7 +6833,7 @@ struct roce_create_qp_req_ramrod_data {
        u8 stats_counter_id;
        u8 reserved3[7];
        __le32 cq_cid;
-       __le16 physical_queue0;
+       __le16 regular_latency_phy_queue;
        __le16 dpi;
 };
 
@@ -6282,15 +6881,16 @@ struct roce_create_qp_resp_ramrod_data {
        __le32 dst_gid[4];
        struct regpair qp_handle_for_cqe;
        struct regpair qp_handle_for_async;
-       __le32 reserved2[2];
+       __le16 low_latency_phy_queue;
+       u8 reserved2[6];
        __le32 cq_cid;
-       __le16 physical_queue0;
+       __le16 regular_latency_phy_queue;
        __le16 dpi;
 };
 
 struct roce_destroy_qp_req_output_params {
        __le32 num_bound_mw;
-       __le32 reserved;
+       __le32 cq_prod;
 };
 
 struct roce_destroy_qp_req_ramrod_data {
@@ -6299,7 +6899,7 @@ struct roce_destroy_qp_req_ramrod_data {
 
 struct roce_destroy_qp_resp_output_params {
        __le32 num_invalidated_mw;
-       __le32 reserved;
+       __le32 cq_prod;
 };
 
 struct roce_destroy_qp_resp_ramrod_data {
@@ -7426,6 +8026,7 @@ struct ystorm_fcoe_conn_st_ctx {
        u8 fcp_rsp_size;
        __le16 mss;
        struct regpair reserved;
+       __le16 min_frame_size;
        u8 protection_info_flags;
 #define YSTORM_FCOE_CONN_ST_CTX_SUPPORT_PROTECTION_MASK  0x1
 #define YSTORM_FCOE_CONN_ST_CTX_SUPPORT_PROTECTION_SHIFT 0
@@ -7444,7 +8045,6 @@ struct ystorm_fcoe_conn_st_ctx {
 #define YSTORM_FCOE_CONN_ST_CTX_RSRV_MASK                0x3F
 #define YSTORM_FCOE_CONN_ST_CTX_RSRV_SHIFT               2
        u8 fcp_xfer_size;
-       u8 reserved3[2];
 };
 
 struct fcoe_vlan_fields {
@@ -8273,10 +8873,10 @@ struct xstorm_iscsi_conn_ag_ctx {
 #define XSTORM_ISCSI_CONN_AG_CTX_DQ_FLUSH_MASK                    0x3
 #define XSTORM_ISCSI_CONN_AG_CTX_DQ_FLUSH_SHIFT                   6
        u8 flags7;
-#define XSTORM_ISCSI_CONN_AG_CTX_FLUSH_Q0_MASK                    0x3
-#define XSTORM_ISCSI_CONN_AG_CTX_FLUSH_Q0_SHIFT                   0
-#define XSTORM_ISCSI_CONN_AG_CTX_FLUSH_Q1_MASK                    0x3
-#define XSTORM_ISCSI_CONN_AG_CTX_FLUSH_Q1_SHIFT                   2
+#define XSTORM_ISCSI_CONN_AG_CTX_MST_XCM_Q0_FLUSH_CF_MASK      0x3
+#define XSTORM_ISCSI_CONN_AG_CTX_MST_XCM_Q0_FLUSH_CF_SHIFT        0
+#define XSTORM_ISCSI_CONN_AG_CTX_UST_XCM_Q1_FLUSH_CF_MASK      0x3
+#define XSTORM_ISCSI_CONN_AG_CTX_UST_XCM_Q1_FLUSH_CF_SHIFT        2
 #define XSTORM_ISCSI_CONN_AG_CTX_SLOW_PATH_MASK                   0x3
 #define XSTORM_ISCSI_CONN_AG_CTX_SLOW_PATH_SHIFT                  4
 #define XSTORM_ISCSI_CONN_AG_CTX_CF0EN_MASK                       0x1
@@ -8322,10 +8922,10 @@ struct xstorm_iscsi_conn_ag_ctx {
 #define XSTORM_ISCSI_CONN_AG_CTX_CF18EN_SHIFT                     0
 #define XSTORM_ISCSI_CONN_AG_CTX_DQ_FLUSH_EN_MASK                 0x1
 #define XSTORM_ISCSI_CONN_AG_CTX_DQ_FLUSH_EN_SHIFT                1
-#define XSTORM_ISCSI_CONN_AG_CTX_FLUSH_Q0_EN_MASK                 0x1
-#define XSTORM_ISCSI_CONN_AG_CTX_FLUSH_Q0_EN_SHIFT                2
-#define XSTORM_ISCSI_CONN_AG_CTX_FLUSH_Q1_EN_MASK                 0x1
-#define XSTORM_ISCSI_CONN_AG_CTX_FLUSH_Q1_EN_SHIFT                3
+#define XSTORM_ISCSI_CONN_AG_CTX_MST_XCM_Q0_FLUSH_CF_EN_MASK   0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_MST_XCM_Q0_FLUSH_CF_EN_SHIFT     2
+#define XSTORM_ISCSI_CONN_AG_CTX_UST_XCM_Q1_FLUSH_CF_EN_MASK   0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_UST_XCM_Q1_FLUSH_CF_EN_SHIFT     3
 #define XSTORM_ISCSI_CONN_AG_CTX_SLOW_PATH_EN_MASK                0x1
 #define XSTORM_ISCSI_CONN_AG_CTX_SLOW_PATH_EN_SHIFT               4
 #define XSTORM_ISCSI_CONN_AG_CTX_PROC_ONLY_CLEANUP_EN_MASK        0x1
@@ -8335,8 +8935,8 @@ struct xstorm_iscsi_conn_ag_ctx {
 #define XSTORM_ISCSI_CONN_AG_CTX_MORE_TO_SEND_DEC_RULE_EN_MASK    0x1
 #define XSTORM_ISCSI_CONN_AG_CTX_MORE_TO_SEND_DEC_RULE_EN_SHIFT   7
        u8 flags11;
-#define XSTORM_ISCSI_CONN_AG_CTX_RULE2EN_MASK                     0x1
-#define XSTORM_ISCSI_CONN_AG_CTX_RULE2EN_SHIFT                    0
+#define XSTORM_ISCSI_CONN_AG_CTX_TX_BLOCKED_EN_MASK    0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_TX_BLOCKED_EN_SHIFT              0
 #define XSTORM_ISCSI_CONN_AG_CTX_RULE3EN_MASK                     0x1
 #define XSTORM_ISCSI_CONN_AG_CTX_RULE3EN_SHIFT                    1
 #define XSTORM_ISCSI_CONN_AG_CTX_RESERVED3_MASK                   0x1
@@ -8440,7 +9040,7 @@ struct xstorm_iscsi_conn_ag_ctx {
        __le32 reg10;
        __le32 reg11;
        __le32 exp_stat_sn;
-       __le32 reg13;
+       __le32 ongoing_fast_rxmit_seq;
        __le32 reg14;
        __le32 reg15;
        __le32 reg16;
@@ -8466,10 +9066,10 @@ struct tstorm_iscsi_conn_ag_ctx {
 #define TSTORM_ISCSI_CONN_AG_CTX_CF0_MASK                0x3
 #define TSTORM_ISCSI_CONN_AG_CTX_CF0_SHIFT               6
        u8 flags1;
-#define TSTORM_ISCSI_CONN_AG_CTX_CF1_MASK                0x3
-#define TSTORM_ISCSI_CONN_AG_CTX_CF1_SHIFT               0
-#define TSTORM_ISCSI_CONN_AG_CTX_CF2_MASK                0x3
-#define TSTORM_ISCSI_CONN_AG_CTX_CF2_SHIFT               2
+#define TSTORM_ISCSI_CONN_AG_CTX_P2T_FLUSH_CF_MASK     0x3
+#define TSTORM_ISCSI_CONN_AG_CTX_P2T_FLUSH_CF_SHIFT      0
+#define TSTORM_ISCSI_CONN_AG_CTX_M2T_FLUSH_CF_MASK     0x3
+#define TSTORM_ISCSI_CONN_AG_CTX_M2T_FLUSH_CF_SHIFT      2
 #define TSTORM_ISCSI_CONN_AG_CTX_TIMER_STOP_ALL_MASK     0x3
 #define TSTORM_ISCSI_CONN_AG_CTX_TIMER_STOP_ALL_SHIFT    4
 #define TSTORM_ISCSI_CONN_AG_CTX_CF4_MASK                0x3
@@ -8490,10 +9090,10 @@ struct tstorm_iscsi_conn_ag_ctx {
 #define TSTORM_ISCSI_CONN_AG_CTX_CF10_SHIFT              2
 #define TSTORM_ISCSI_CONN_AG_CTX_CF0EN_MASK              0x1
 #define TSTORM_ISCSI_CONN_AG_CTX_CF0EN_SHIFT             4
-#define TSTORM_ISCSI_CONN_AG_CTX_CF1EN_MASK              0x1
-#define TSTORM_ISCSI_CONN_AG_CTX_CF1EN_SHIFT             5
-#define TSTORM_ISCSI_CONN_AG_CTX_CF2EN_MASK              0x1
-#define TSTORM_ISCSI_CONN_AG_CTX_CF2EN_SHIFT             6
+#define TSTORM_ISCSI_CONN_AG_CTX_P2T_FLUSH_CF_EN_MASK  0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_P2T_FLUSH_CF_EN_SHIFT   5
+#define TSTORM_ISCSI_CONN_AG_CTX_M2T_FLUSH_CF_EN_MASK  0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_M2T_FLUSH_CF_EN_SHIFT   6
 #define TSTORM_ISCSI_CONN_AG_CTX_TIMER_STOP_ALL_EN_MASK  0x1
 #define TSTORM_ISCSI_CONN_AG_CTX_TIMER_STOP_ALL_EN_SHIFT 7
        u8 flags4;
@@ -8539,7 +9139,7 @@ struct tstorm_iscsi_conn_ag_ctx {
        __le32 reg6;
        __le32 reg7;
        __le32 reg8;
-       u8 byte2;
+       u8 cid_offload_cnt;
        u8 byte3;
        __le16 word0;
 };
@@ -8831,11 +9431,24 @@ struct eth_stats {
        u64 r511;
        u64 r1023;
        u64 r1518;
-       u64 r1522;
-       u64 r2047;
-       u64 r4095;
-       u64 r9216;
-       u64 r16383;
+
+       union {
+               struct {
+                       u64 r1522;
+                       u64 r2047;
+                       u64 r4095;
+                       u64 r9216;
+                       u64 r16383;
+               } bb0;
+               struct {
+                       u64 unused1;
+                       u64 r1519_to_max;
+                       u64 unused2;
+                       u64 unused3;
+                       u64 unused4;
+               } ah0;
+       } u0;
+
        u64 rfcs;
        u64 rxcf;
        u64 rxpf;
@@ -8852,14 +9465,36 @@ struct eth_stats {
        u64 t511;
        u64 t1023;
        u64 t1518;
-       u64 t2047;
-       u64 t4095;
-       u64 t9216;
-       u64 t16383;
+
+       union {
+               struct {
+                       u64 t2047;
+                       u64 t4095;
+                       u64 t9216;
+                       u64 t16383;
+               } bb1;
+               struct {
+                       u64 t1519_to_max;
+                       u64 unused6;
+                       u64 unused7;
+                       u64 unused8;
+               } ah1;
+       } u1;
+
        u64 txpf;
        u64 txpp;
-       u64 tlpiec;
-       u64 tncl;
+
+       union {
+               struct {
+                       u64 tlpiec;
+                       u64 tncl;
+               } bb2;
+               struct {
+                       u64 unused9;
+                       u64 unused10;
+               } ah2;
+       } u2;
+
        u64 rbyte;
        u64 rxuca;
        u64 rxmca;
@@ -8943,12 +9578,12 @@ struct dcbx_ets_feature {
 #define DCBX_ETS_CBS_SHIFT     3
 #define DCBX_ETS_MAX_TCS_MASK  0x000000f0
 #define DCBX_ETS_MAX_TCS_SHIFT 4
-#define DCBX_ISCSI_OOO_TC_MASK 0x00000f00
-#define DCBX_ISCSI_OOO_TC_SHIFT        8
+#define DCBX_OOO_TC_MASK       0x00000f00
+#define DCBX_OOO_TC_SHIFT      8
        u32 pri_tc_tbl[1];
-#define DCBX_ISCSI_OOO_TC      (4)
+#define DCBX_TCP_OOO_TC                (4)
 
-#define NIG_ETS_ISCSI_OOO_CLIENT_OFFSET        (DCBX_ISCSI_OOO_TC + 1)
+#define NIG_ETS_ISCSI_OOO_CLIENT_OFFSET        (DCBX_TCP_OOO_TC + 1)
 #define DCBX_CEE_STRICT_PRIORITY       0xf
        u32 tc_bw_tbl[2];
        u32 tc_tsa_tbl[2];
@@ -8957,6 +9592,9 @@ struct dcbx_ets_feature {
 #define DCBX_ETS_TSA_ETS       2
 };
 
+#define DCBX_TCP_OOO_TC                        (4)
+#define DCBX_TCP_OOO_K2_4PORT_TC       (3)
+
 struct dcbx_app_priority_entry {
        u32 entry;
 #define DCBX_APP_PRI_MAP_MASK          0x000000ff
@@ -9067,6 +9705,10 @@ struct dcb_dscp_map {
 struct public_global {
        u32 max_path;
        u32 max_ports;
+#define MODE_1P 1
+#define MODE_2P 2
+#define MODE_3P 3
+#define MODE_4P 4
        u32 debug_mb_offset;
        u32 phymod_dbg_mb_offset;
        struct couple_mode_teaming cmt;
@@ -9248,9 +9890,11 @@ struct public_func {
 #define DRV_ID_PDA_COMP_VER_MASK       0x0000ffff
 #define DRV_ID_PDA_COMP_VER_SHIFT      0
 
+#define LOAD_REQ_HSI_VERSION           2
 #define DRV_ID_MCP_HSI_VER_MASK                0x00ff0000
 #define DRV_ID_MCP_HSI_VER_SHIFT       16
-#define DRV_ID_MCP_HSI_VER_CURRENT     (1 << DRV_ID_MCP_HSI_VER_SHIFT)
+#define DRV_ID_MCP_HSI_VER_CURRENT     (LOAD_REQ_HSI_VERSION << \
+                                        DRV_ID_MCP_HSI_VER_SHIFT)
 
 #define DRV_ID_DRV_TYPE_MASK           0x7f000000
 #define DRV_ID_DRV_TYPE_SHIFT          24
@@ -9345,6 +9989,7 @@ enum resource_id_enum {
        RESOURCE_NUM_RSS_ENGINES_E = 14,
        RESOURCE_LL2_QUEUE_E = 15,
        RESOURCE_RDMA_STATS_QUEUE_E = 16,
+       RESOURCE_BDQ_E = 17,
        RESOURCE_MAX_NUM,
        RESOURCE_NUM_INVALID = 0xFFFFFFFF
 };
@@ -9362,6 +10007,46 @@ struct resource_info {
 #define RESOURCE_ELEMENT_STRICT (1 << 0)
 };
 
+#define DRV_ROLE_NONE           0
+#define DRV_ROLE_PREBOOT        1
+#define DRV_ROLE_OS             2
+#define DRV_ROLE_KDUMP          3
+
+struct load_req_stc {
+       u32 drv_ver_0;
+       u32 drv_ver_1;
+       u32 fw_ver;
+       u32 misc0;
+#define LOAD_REQ_ROLE_MASK              0x000000FF
+#define LOAD_REQ_ROLE_SHIFT             0
+#define LOAD_REQ_LOCK_TO_MASK           0x0000FF00
+#define LOAD_REQ_LOCK_TO_SHIFT          8
+#define LOAD_REQ_LOCK_TO_DEFAULT        0
+#define LOAD_REQ_LOCK_TO_NONE           255
+#define LOAD_REQ_FORCE_MASK             0x000F0000
+#define LOAD_REQ_FORCE_SHIFT            16
+#define LOAD_REQ_FORCE_NONE             0
+#define LOAD_REQ_FORCE_PF               1
+#define LOAD_REQ_FORCE_ALL              2
+#define LOAD_REQ_FLAGS0_MASK            0x00F00000
+#define LOAD_REQ_FLAGS0_SHIFT           20
+#define LOAD_REQ_FLAGS0_AVOID_RESET     (0x1 << 0)
+};
+
+struct load_rsp_stc {
+       u32 drv_ver_0;
+       u32 drv_ver_1;
+       u32 fw_ver;
+       u32 misc0;
+#define LOAD_RSP_ROLE_MASK              0x000000FF
+#define LOAD_RSP_ROLE_SHIFT             0
+#define LOAD_RSP_HSI_MASK               0x0000FF00
+#define LOAD_RSP_HSI_SHIFT              8
+#define LOAD_RSP_FLAGS0_MASK            0x000F0000
+#define LOAD_RSP_FLAGS0_SHIFT           16
+#define LOAD_RSP_FLAGS0_DRV_EXISTS      (0x1 << 0)
+};
+
 union drv_union_data {
        u32 ver_str[MCP_DRV_VER_STR_SIZE_DWORD];
        struct mcp_mac wol_mac;
@@ -9393,6 +10078,7 @@ struct public_drv_mb {
 #define DRV_MSG_CODE_LOAD_REQ                  0x10000000
 #define DRV_MSG_CODE_LOAD_DONE                 0x11000000
 #define DRV_MSG_CODE_INIT_HW                   0x12000000
+#define DRV_MSG_CODE_CANCEL_LOAD_REQ            0x13000000
 #define DRV_MSG_CODE_UNLOAD_REQ                        0x20000000
 #define DRV_MSG_CODE_UNLOAD_DONE               0x21000000
 #define DRV_MSG_CODE_INIT_PHY                  0x22000000
@@ -9405,12 +10091,14 @@ struct public_drv_mb {
 #define DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE     0x31000000
 #define DRV_MSG_CODE_BW_UPDATE_ACK              0x32000000
 #define DRV_MSG_CODE_OV_UPDATE_MTU              0x33000000
+#define DRV_MSG_GET_RESOURCE_ALLOC_MSG         0x34000000
+#define DRV_MSG_SET_RESOURCE_VALUE_MSG         0x35000000
 #define DRV_MSG_CODE_OV_UPDATE_WOL              0x38000000
 #define DRV_MSG_CODE_OV_UPDATE_ESWITCH_MODE     0x39000000
 
 #define DRV_MSG_CODE_BW_UPDATE_ACK             0x32000000
 #define DRV_MSG_CODE_NIG_DRAIN                 0x30000000
-#define DRV_MSG_GET_RESOURCE_ALLOC_MSG          0x34000000
+#define DRV_MSG_CODE_INITIATE_PF_FLR            0x02010000
 #define DRV_MSG_CODE_VF_DISABLED_DONE          0xc0000000
 #define DRV_MSG_CODE_CFG_VF_MSIX               0xc0010000
 #define DRV_MSG_CODE_NVM_GET_FILE_ATT          0x00030000
@@ -9436,6 +10124,33 @@ struct public_drv_mb {
 
 #define DRV_MSG_CODE_BIST_TEST                 0x001e0000
 #define DRV_MSG_CODE_SET_LED_MODE              0x00200000
+#define DRV_MSG_CODE_RESOURCE_CMD      0x00230000
+
+#define RESOURCE_CMD_REQ_RESC_MASK             0x0000001F
+#define RESOURCE_CMD_REQ_RESC_SHIFT            0
+#define RESOURCE_CMD_REQ_OPCODE_MASK           0x000000E0
+#define RESOURCE_CMD_REQ_OPCODE_SHIFT          5
+#define RESOURCE_OPCODE_REQ                    1
+#define RESOURCE_OPCODE_REQ_WO_AGING           2
+#define RESOURCE_OPCODE_REQ_W_AGING            3
+#define RESOURCE_OPCODE_RELEASE                        4
+#define RESOURCE_OPCODE_FORCE_RELEASE          5
+#define RESOURCE_CMD_REQ_AGE_MASK              0x0000FF00
+#define RESOURCE_CMD_REQ_AGE_SHIFT             8
+
+#define RESOURCE_CMD_RSP_OWNER_MASK            0x000000FF
+#define RESOURCE_CMD_RSP_OWNER_SHIFT           0
+#define RESOURCE_CMD_RSP_OPCODE_MASK           0x00000700
+#define RESOURCE_CMD_RSP_OPCODE_SHIFT          8
+#define RESOURCE_OPCODE_GNT                    1
+#define RESOURCE_OPCODE_BUSY                   2
+#define RESOURCE_OPCODE_RELEASED               3
+#define RESOURCE_OPCODE_RELEASED_PREVIOUS      4
+#define RESOURCE_OPCODE_WRONG_OWNER            5
+#define RESOURCE_OPCODE_UNKNOWN_CMD            255
+
+#define RESOURCE_DUMP                          0
+
 #define DRV_MSG_CODE_GET_PF_RDMA_PROTOCOL      0x002b0000
 #define DRV_MSG_CODE_OS_WOL                    0x002e0000
 
@@ -9524,12 +10239,16 @@ struct public_drv_mb {
 
        u32 fw_mb_header;
 #define FW_MSG_CODE_MASK                       0xffff0000
+#define FW_MSG_CODE_UNSUPPORTED                 0x00000000
 #define FW_MSG_CODE_DRV_LOAD_ENGINE            0x10100000
 #define FW_MSG_CODE_DRV_LOAD_PORT              0x10110000
 #define FW_MSG_CODE_DRV_LOAD_FUNCTION          0x10120000
 #define FW_MSG_CODE_DRV_LOAD_REFUSED_PDA       0x10200000
-#define FW_MSG_CODE_DRV_LOAD_REFUSED_HSI       0x10210000
+#define FW_MSG_CODE_DRV_LOAD_REFUSED_HSI_1     0x10210000
 #define FW_MSG_CODE_DRV_LOAD_REFUSED_DIAG      0x10220000
+#define FW_MSG_CODE_DRV_LOAD_REFUSED_HSI        0x10230000
+#define FW_MSG_CODE_DRV_LOAD_REFUSED_REQUIRES_FORCE 0x10300000
+#define FW_MSG_CODE_DRV_LOAD_REFUSED_REJECT     0x10310000
 #define FW_MSG_CODE_DRV_LOAD_DONE              0x11100000
 #define FW_MSG_CODE_DRV_UNLOAD_ENGINE          0x20110000
 #define FW_MSG_CODE_DRV_UNLOAD_PORT            0x20120000
@@ -9549,6 +10268,10 @@ struct public_drv_mb {
 #define FW_MSG_SEQ_NUMBER_MASK                 0x0000ffff
 
        u32 fw_mb_param;
+#define FW_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR_MASK  0xFFFF0000
+#define FW_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR_SHIFT 16
+#define FW_MB_PARAM_RESOURCE_ALLOC_VERSION_MINOR_MASK  0x0000FFFF
+#define FW_MB_PARAM_RESOURCE_ALLOC_VERSION_MINOR_SHIFT 0
 
        /* get pf rdma protocol command responce */
 #define FW_MB_PARAM_GET_PF_RDMA_NONE           0x0
@@ -9659,6 +10382,8 @@ struct nvm_cfg1_glob {
 #define NVM_CFG1_GLOB_NETWORK_PORT_MODE_2X25G          0xC
 #define NVM_CFG1_GLOB_NETWORK_PORT_MODE_1X25G          0xD
 #define NVM_CFG1_GLOB_NETWORK_PORT_MODE_4X25G          0xE
+#define NVM_CFG1_GLOB_NETWORK_PORT_MODE_2X10G          0xF
+
        u32 e_lane_cfg1;
        u32 e_lane_cfg2;
        u32 f_lane_cfg1;
index 899cad7f97ea41a5b8dc5e1aaccd68b068593cae..79e584a57d26a1ce0864985c4d47f06b77ceb1a0 100644 (file)
@@ -800,55 +800,3 @@ int qed_dmae_host2host(struct qed_hwfn *p_hwfn,
        return rc;
 }
 
-u16 qed_get_qm_pq(struct qed_hwfn *p_hwfn,
-                 enum protocol_type proto, union qed_qm_pq_params *p_params)
-{
-       u16 pq_id = 0;
-
-       if ((proto == PROTOCOLID_CORE ||
-            proto == PROTOCOLID_ETH ||
-            proto == PROTOCOLID_ISCSI ||
-            proto == PROTOCOLID_ROCE) && !p_params) {
-               DP_NOTICE(p_hwfn,
-                         "Protocol %d received NULL PQ params\n", proto);
-               return 0;
-       }
-
-       switch (proto) {
-       case PROTOCOLID_CORE:
-               if (p_params->core.tc == LB_TC)
-                       pq_id = p_hwfn->qm_info.pure_lb_pq;
-               else if (p_params->core.tc == OOO_LB_TC)
-                       pq_id = p_hwfn->qm_info.ooo_pq;
-               else
-                       pq_id = p_hwfn->qm_info.offload_pq;
-               break;
-       case PROTOCOLID_ETH:
-               pq_id = p_params->eth.tc;
-               if (p_params->eth.is_vf)
-                       pq_id += p_hwfn->qm_info.vf_queues_offset +
-                                p_params->eth.vf_id;
-               break;
-       case PROTOCOLID_ISCSI:
-               if (p_params->iscsi.q_idx == 1)
-                       pq_id = p_hwfn->qm_info.pure_ack_pq;
-               break;
-       case PROTOCOLID_ROCE:
-               if (p_params->roce.dcqcn)
-                       pq_id = p_params->roce.qpid;
-               else
-                       pq_id = p_hwfn->qm_info.offload_pq;
-               if (pq_id > p_hwfn->qm_info.num_pf_rls)
-                       pq_id = p_hwfn->qm_info.offload_pq;
-               break;
-       case PROTOCOLID_FCOE:
-               pq_id = p_hwfn->qm_info.offload_pq;
-               break;
-       default:
-               pq_id = 0;
-       }
-
-       pq_id = CM_TX_PQ_BASE + pq_id + RESC_START(p_hwfn, QED_PQ);
-
-       return pq_id;
-}
index 9277264d2e6552a92a9ca88853501b80763a5dbb..f2505c691c264198e73ce946cfa5933b63292e86 100644 (file)
@@ -297,9 +297,6 @@ union qed_qm_pq_params {
        } roce;
 };
 
-u16 qed_get_qm_pq(struct qed_hwfn *p_hwfn,
-                 enum protocol_type proto, union qed_qm_pq_params *params);
-
 int qed_init_fw_data(struct qed_dev *cdev,
                     const u8 *fw_data);
 #endif
index d891a68526950609f9efbe75ecacfb40ce49b97a..2a50e2b7568f5aff16f3d6afa70c630a6166201a 100644 (file)
@@ -215,13 +215,6 @@ static void qed_cmdq_lines_voq_rt_init(struct qed_hwfn *p_hwfn,
 {
        u32 qm_line_crd;
 
-       /* In A0 - Limit the size of pbf queue so that only 511 commands with
-        * the minimum size of 4 (FCoE minimum size)
-        */
-       bool is_bb_a0 = QED_IS_BB_A0(p_hwfn->cdev);
-
-       if (is_bb_a0)
-               cmdq_lines = min_t(u32, cmdq_lines, 1022);
        qm_line_crd = QM_VOQ_LINE_CRD(cmdq_lines);
        OVERWRITE_RT_REG(p_hwfn, PBF_CMDQ_LINES_RT_OFFSET(voq),
                         (u32)cmdq_lines);
@@ -343,13 +336,11 @@ static void qed_tx_pq_map_rt_init(
        u16 first_pq_group = p_params->start_pq / QM_PF_QUEUE_GROUP_SIZE;
        u16 last_pq_group = (p_params->start_pq + num_pqs - 1) /
                            QM_PF_QUEUE_GROUP_SIZE;
-       bool is_bb_a0 = QED_IS_BB_A0(p_hwfn->cdev);
        u16 i, pq_id, pq_group;
 
        /* a bit per Tx PQ indicating if the PQ is associated with a VF */
        u32 tx_pq_vf_mask[MAX_QM_TX_QUEUES / QM_PF_QUEUE_GROUP_SIZE] = { 0 };
-       u32 tx_pq_vf_mask_width = is_bb_a0 ? 32 : QM_PF_QUEUE_GROUP_SIZE;
-       u32 num_tx_pq_vf_masks = MAX_QM_TX_QUEUES / tx_pq_vf_mask_width;
+       u32 num_tx_pq_vf_masks = MAX_QM_TX_QUEUES / QM_PF_QUEUE_GROUP_SIZE;
        u32 pq_mem_4kb = QM_PQ_MEM_4KB(p_params->num_pf_cids);
        u32 vport_pq_mem_4kb = QM_PQ_MEM_4KB(p_params->num_vf_cids);
        u32 mem_addr_4kb = base_mem_addr_4kb;
@@ -371,6 +362,10 @@ static void qed_tx_pq_map_rt_init(
                bool is_vf_pq = (i >= p_params->num_pf_pqs);
                struct qm_rf_pq_map tx_pq_map;
 
+               bool rl_valid = p_params->pq_params[i].rl_valid &&
+                               (p_params->pq_params[i].vport_id <
+                                MAX_QM_GLOBAL_RLS);
+
                /* update first Tx PQ of VPORT/TC */
                u8 vport_id_in_pf = p_params->pq_params[i].vport_id -
                                    p_params->start_vport;
@@ -389,14 +384,18 @@ static void qed_tx_pq_map_rt_init(
                                     (p_params->pf_id <<
                                      QM_WFQ_VP_PQ_PF_SHIFT));
                }
+
+               if (p_params->pq_params[i].rl_valid && !rl_valid)
+                       DP_NOTICE(p_hwfn,
+                                 "Invalid VPORT ID for rate limiter configuration");
                /* fill PQ map entry */
                memset(&tx_pq_map, 0, sizeof(tx_pq_map));
                SET_FIELD(tx_pq_map.reg, QM_RF_PQ_MAP_PQ_VALID, 1);
-               SET_FIELD(tx_pq_map.reg, QM_RF_PQ_MAP_RL_VALID,
-                         p_params->pq_params[i].rl_valid ? 1 : 0);
+               SET_FIELD(tx_pq_map.reg,
+                         QM_RF_PQ_MAP_RL_VALID, rl_valid ? 1 : 0);
                SET_FIELD(tx_pq_map.reg, QM_RF_PQ_MAP_VP_PQ_ID, first_tx_pq_id);
                SET_FIELD(tx_pq_map.reg, QM_RF_PQ_MAP_RL_ID,
-                         p_params->pq_params[i].rl_valid ?
+                         rl_valid ?
                          p_params->pq_params[i].vport_id : 0);
                SET_FIELD(tx_pq_map.reg, QM_RF_PQ_MAP_VOQ, voq);
                SET_FIELD(tx_pq_map.reg, QM_RF_PQ_MAP_WRR_WEIGHT_GROUP,
@@ -413,8 +412,9 @@ static void qed_tx_pq_map_rt_init(
                        /* if PQ is associated with a VF, add indication
                         * to PQ VF mask
                         */
-                       tx_pq_vf_mask[pq_id / tx_pq_vf_mask_width] |=
-                               (1 << (pq_id % tx_pq_vf_mask_width));
+                       tx_pq_vf_mask[pq_id /
+                                     QM_PF_QUEUE_GROUP_SIZE] |=
+                           BIT((pq_id % QM_PF_QUEUE_GROUP_SIZE));
                        mem_addr_4kb += vport_pq_mem_4kb;
                } else {
                        mem_addr_4kb += pq_mem_4kb;
@@ -480,8 +480,8 @@ static int qed_pf_wfq_rt_init(struct qed_hwfn *p_hwfn,
        if (p_params->pf_id < MAX_NUM_PFS_BB)
                crd_reg_offset = QM_REG_WFQPFCRD_RT_OFFSET;
        else
-               crd_reg_offset = QM_REG_WFQPFCRD_MSB_RT_OFFSET +
-                                (p_params->pf_id % MAX_NUM_PFS_BB);
+               crd_reg_offset = QM_REG_WFQPFCRD_MSB_RT_OFFSET;
+       crd_reg_offset += p_params->pf_id % MAX_NUM_PFS_BB;
 
        inc_val = QM_WFQ_INC_VAL(p_params->pf_wfq);
        if (!inc_val || inc_val > QM_WFQ_MAX_INC_VAL) {
@@ -498,11 +498,11 @@ static int qed_pf_wfq_rt_init(struct qed_hwfn *p_hwfn,
                                 QM_WFQ_CRD_REG_SIGN_BIT);
        }
 
-       STORE_RT_REG(p_hwfn, QM_REG_WFQPFWEIGHT_RT_OFFSET + p_params->pf_id,
-                    inc_val);
        STORE_RT_REG(p_hwfn,
                     QM_REG_WFQPFUPPERBOUND_RT_OFFSET + p_params->pf_id,
                     QM_WFQ_UPPER_BOUND | QM_WFQ_CRD_REG_SIGN_BIT);
+       STORE_RT_REG(p_hwfn, QM_REG_WFQPFWEIGHT_RT_OFFSET + p_params->pf_id,
+                    inc_val);
        return 0;
 }
 
@@ -576,6 +576,12 @@ static int qed_vport_rl_rt_init(struct qed_hwfn *p_hwfn,
 {
        u8 i, vport_id;
 
+       if (start_vport + num_vports >= MAX_QM_GLOBAL_RLS) {
+               DP_NOTICE(p_hwfn,
+                         "Invalid VPORT ID for rate limiter configuration");
+               return -1;
+       }
+
        /* go over all PF VPORTs */
        for (i = 0, vport_id = start_vport; i < num_vports; i++, vport_id++) {
                u32 inc_val = QM_RL_INC_VAL(vport_params[i].vport_rl);
@@ -785,6 +791,12 @@ int qed_init_vport_rl(struct qed_hwfn *p_hwfn,
 {
        u32 inc_val = QM_RL_INC_VAL(vport_rl);
 
+       if (vport_id >= MAX_QM_GLOBAL_RLS) {
+               DP_NOTICE(p_hwfn,
+                         "Invalid VPORT ID for rate limiter configuration");
+               return -1;
+       }
+
        if (inc_val > QM_RL_MAX_INC_VAL) {
                DP_NOTICE(p_hwfn, "Invalid VPORT rate-limit configuration");
                return -1;
@@ -940,12 +952,6 @@ void qed_set_geneve_enable(struct qed_hwfn *p_hwfn,
               eth_geneve_enable ? 1 : 0);
        qed_wr(p_hwfn, p_ptt, NIG_REG_NGE_IP_ENABLE, ip_geneve_enable ? 1 : 0);
 
-       /* comp ver */
-       reg_val = (ip_geneve_enable || eth_geneve_enable) ? 1 : 0;
-       qed_wr(p_hwfn, p_ptt, NIG_REG_NGE_COMP_VER, reg_val);
-       qed_wr(p_hwfn, p_ptt, PBF_REG_NGE_COMP_VER, reg_val);
-       qed_wr(p_hwfn, p_ptt, PRS_REG_NGE_COMP_VER, reg_val);
-
        /* EDPM with geneve tunnel not supported in BB_B0 */
        if (QED_IS_BB_B0(p_hwfn->cdev))
                return;
index 243b64e0d4dc3ed36f92e570022aa68c7af58901..4a2e7be5bf7210acc93f3ded8d20e1240e3aa6ef 100644 (file)
@@ -554,7 +554,7 @@ int qed_init_fw_data(struct qed_dev *cdev, const u8 *data)
        }
 
        /* First Dword contains metadata and should be skipped */
-       buf_hdr = (struct bin_buffer_hdr *)(data + sizeof(u32));
+       buf_hdr = (struct bin_buffer_hdr *)data;
 
        offset = buf_hdr[BIN_BUF_INIT_FW_VER_INFO].offset;
        fw->fw_ver_info = (struct fw_ver_info *)(data + offset);
index 84310b60849b4881557cfd62761549a1a182f2d9..0ed24d6e6c6520450ed05b50e8486e54b712a354 100644 (file)
@@ -2500,8 +2500,9 @@ void qed_int_cau_conf_sb(struct qed_hwfn *p_hwfn,
 
        /* Configure pi coalescing if set */
        if (p_hwfn->cdev->int_coalescing_mode == QED_COAL_MODE_ENABLE) {
+               u8 num_tc = p_hwfn->hw_info.num_hw_tc;
                u8 timeset, timer_res;
-               u8 num_tc = 1, i;
+               u8 i;
 
                /* timeset = (coalesce >> timer-res), timeset is 7bit wide */
                if (p_hwfn->cdev->rx_coalesce_usecs <= 0x7F)
index 098766f7fe88a6e0a131712330cfa3b144c32738..112b96fba4333184a623c660363f833ee91eb550 100644 (file)
@@ -216,7 +216,7 @@ qed_sp_iscsi_func_start(struct qed_hwfn *p_hwfn,
                p_queue->cq_cmdq_sb_num_arr[i] = cpu_to_le16(val);
        }
 
-       p_queue->bdq_resource_id = ISCSI_BDQ_ID(p_hwfn->port_id);
+       p_queue->bdq_resource_id = (u8)RESC_START(p_hwfn, QED_BDQ);
 
        DMA_REGPAIR_LE(p_queue->bdq_pbl_base_address[BDQ_ID_RQ],
                       p_params->bdq_pbl_base_addr[BDQ_ID_RQ]);
@@ -270,11 +270,10 @@ static int qed_sp_iscsi_conn_offload(struct qed_hwfn *p_hwfn,
        struct tcp_offload_params *p_tcp = NULL;
        struct qed_spq_entry *p_ent = NULL;
        struct qed_sp_init_data init_data;
-       union qed_qm_pq_params pq_params;
-       u16 pq0_id = 0, pq1_id = 0;
        dma_addr_t r2tq_pbl_addr;
        dma_addr_t xhq_pbl_addr;
        dma_addr_t uhq_pbl_addr;
+       u16 physical_q;
        int rc = 0;
        u32 dval;
        u16 wval;
@@ -297,16 +296,14 @@ static int qed_sp_iscsi_conn_offload(struct qed_hwfn *p_hwfn,
        p_ramrod = &p_ent->ramrod.iscsi_conn_offload;
 
        /* Transmission PQ is the first of the PF */
-       memset(&pq_params, 0, sizeof(pq_params));
-       pq0_id = qed_get_qm_pq(p_hwfn, PROTOCOLID_ISCSI, &pq_params);
-       p_conn->physical_q0 = cpu_to_le16(pq0_id);
-       p_ramrod->iscsi.physical_q0 = cpu_to_le16(pq0_id);
+       physical_q = qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_OFLD);
+       p_conn->physical_q0 = cpu_to_le16(physical_q);
+       p_ramrod->iscsi.physical_q0 = cpu_to_le16(physical_q);
 
        /* iSCSI Pure-ACK PQ */
-       pq_params.iscsi.q_idx = 1;
-       pq1_id = qed_get_qm_pq(p_hwfn, PROTOCOLID_ISCSI, &pq_params);
-       p_conn->physical_q1 = cpu_to_le16(pq1_id);
-       p_ramrod->iscsi.physical_q1 = cpu_to_le16(pq1_id);
+       physical_q = qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_ACK);
+       p_conn->physical_q1 = cpu_to_le16(physical_q);
+       p_ramrod->iscsi.physical_q1 = cpu_to_le16(physical_q);
 
        p_ramrod->hdr.op_code = ISCSI_RAMROD_CMD_ID_OFFLOAD_CONN;
        SET_FIELD(p_ramrod->hdr.flags, ISCSI_SLOW_PATH_HDR_LAYER_CODE,
@@ -593,21 +590,31 @@ static void __iomem *qed_iscsi_get_db_addr(struct qed_hwfn *p_hwfn, u32 cid)
 static void __iomem *qed_iscsi_get_primary_bdq_prod(struct qed_hwfn *p_hwfn,
                                                    u8 bdq_id)
 {
-       u8 bdq_function_id = ISCSI_BDQ_ID(p_hwfn->port_id);
-
-       return (u8 __iomem *)p_hwfn->regview + GTT_BAR0_MAP_REG_MSDM_RAM +
-                            MSTORM_SCSI_BDQ_EXT_PROD_OFFSET(bdq_function_id,
-                                                            bdq_id);
+       if (RESC_NUM(p_hwfn, QED_BDQ)) {
+               return (u8 __iomem *)p_hwfn->regview +
+                      GTT_BAR0_MAP_REG_MSDM_RAM +
+                      MSTORM_SCSI_BDQ_EXT_PROD_OFFSET(RESC_START(p_hwfn,
+                                                                 QED_BDQ),
+                                                      bdq_id);
+       } else {
+               DP_NOTICE(p_hwfn, "BDQ is not allocated!\n");
+               return NULL;
+       }
 }
 
 static void __iomem *qed_iscsi_get_secondary_bdq_prod(struct qed_hwfn *p_hwfn,
                                                      u8 bdq_id)
 {
-       u8 bdq_function_id = ISCSI_BDQ_ID(p_hwfn->port_id);
-
-       return (u8 __iomem *)p_hwfn->regview + GTT_BAR0_MAP_REG_TSDM_RAM +
-                            TSTORM_SCSI_BDQ_EXT_PROD_OFFSET(bdq_function_id,
-                                                            bdq_id);
+       if (RESC_NUM(p_hwfn, QED_BDQ)) {
+               return (u8 __iomem *)p_hwfn->regview +
+                      GTT_BAR0_MAP_REG_TSDM_RAM +
+                      TSTORM_SCSI_BDQ_EXT_PROD_OFFSET(RESC_START(p_hwfn,
+                                                                 QED_BDQ),
+                                                      bdq_id);
+       } else {
+               DP_NOTICE(p_hwfn, "BDQ is not allocated!\n");
+               return NULL;
+       }
 }
 
 static int qed_iscsi_setup_connection(struct qed_hwfn *p_hwfn,
index df932be5a4e5aa1e47b16530d51a5dcf78f15706..9900f7a1f9f1c4b021251569414b2ebe4d0a0395 100644 (file)
@@ -938,15 +938,12 @@ qed_eth_pf_tx_queue_start(struct qed_hwfn *p_hwfn,
                          dma_addr_t pbl_addr,
                          u16 pbl_size, void __iomem **pp_doorbell)
 {
-       union qed_qm_pq_params pq_params;
        int rc;
 
-       memset(&pq_params, 0, sizeof(pq_params));
 
        rc = qed_eth_txq_start_ramrod(p_hwfn, p_cid,
                                      pbl_addr, pbl_size,
-                                     qed_get_qm_pq(p_hwfn, PROTOCOLID_ETH,
-                                                   &pq_params));
+                                     qed_get_cm_pq_idx_mcos(p_hwfn, tc));
        if (rc)
                return rc;
 
@@ -1470,13 +1467,20 @@ static void __qed_get_vport_pstats(struct qed_hwfn *p_hwfn,
        memset(&pstats, 0, sizeof(pstats));
        qed_memcpy_from(p_hwfn, p_ptt, &pstats, pstats_addr, pstats_len);
 
-       p_stats->tx_ucast_bytes += HILO_64_REGPAIR(pstats.sent_ucast_bytes);
-       p_stats->tx_mcast_bytes += HILO_64_REGPAIR(pstats.sent_mcast_bytes);
-       p_stats->tx_bcast_bytes += HILO_64_REGPAIR(pstats.sent_bcast_bytes);
-       p_stats->tx_ucast_pkts += HILO_64_REGPAIR(pstats.sent_ucast_pkts);
-       p_stats->tx_mcast_pkts += HILO_64_REGPAIR(pstats.sent_mcast_pkts);
-       p_stats->tx_bcast_pkts += HILO_64_REGPAIR(pstats.sent_bcast_pkts);
-       p_stats->tx_err_drop_pkts += HILO_64_REGPAIR(pstats.error_drop_pkts);
+       p_stats->common.tx_ucast_bytes +=
+           HILO_64_REGPAIR(pstats.sent_ucast_bytes);
+       p_stats->common.tx_mcast_bytes +=
+           HILO_64_REGPAIR(pstats.sent_mcast_bytes);
+       p_stats->common.tx_bcast_bytes +=
+           HILO_64_REGPAIR(pstats.sent_bcast_bytes);
+       p_stats->common.tx_ucast_pkts +=
+           HILO_64_REGPAIR(pstats.sent_ucast_pkts);
+       p_stats->common.tx_mcast_pkts +=
+           HILO_64_REGPAIR(pstats.sent_mcast_pkts);
+       p_stats->common.tx_bcast_pkts +=
+           HILO_64_REGPAIR(pstats.sent_bcast_pkts);
+       p_stats->common.tx_err_drop_pkts +=
+           HILO_64_REGPAIR(pstats.error_drop_pkts);
 }
 
 static void __qed_get_vport_tstats(struct qed_hwfn *p_hwfn,
@@ -1502,10 +1506,10 @@ static void __qed_get_vport_tstats(struct qed_hwfn *p_hwfn,
        memset(&tstats, 0, sizeof(tstats));
        qed_memcpy_from(p_hwfn, p_ptt, &tstats, tstats_addr, tstats_len);
 
-       p_stats->mftag_filter_discards +=
-               HILO_64_REGPAIR(tstats.mftag_filter_discard);
-       p_stats->mac_filter_discards +=
-               HILO_64_REGPAIR(tstats.eth_mac_filter_discard);
+       p_stats->common.mftag_filter_discards +=
+           HILO_64_REGPAIR(tstats.mftag_filter_discard);
+       p_stats->common.mac_filter_discards +=
+           HILO_64_REGPAIR(tstats.eth_mac_filter_discard);
 }
 
 static void __qed_get_vport_ustats_addrlen(struct qed_hwfn *p_hwfn,
@@ -1539,12 +1543,15 @@ static void __qed_get_vport_ustats(struct qed_hwfn *p_hwfn,
        memset(&ustats, 0, sizeof(ustats));
        qed_memcpy_from(p_hwfn, p_ptt, &ustats, ustats_addr, ustats_len);
 
-       p_stats->rx_ucast_bytes += HILO_64_REGPAIR(ustats.rcv_ucast_bytes);
-       p_stats->rx_mcast_bytes += HILO_64_REGPAIR(ustats.rcv_mcast_bytes);
-       p_stats->rx_bcast_bytes += HILO_64_REGPAIR(ustats.rcv_bcast_bytes);
-       p_stats->rx_ucast_pkts += HILO_64_REGPAIR(ustats.rcv_ucast_pkts);
-       p_stats->rx_mcast_pkts += HILO_64_REGPAIR(ustats.rcv_mcast_pkts);
-       p_stats->rx_bcast_pkts += HILO_64_REGPAIR(ustats.rcv_bcast_pkts);
+       p_stats->common.rx_ucast_bytes +=
+           HILO_64_REGPAIR(ustats.rcv_ucast_bytes);
+       p_stats->common.rx_mcast_bytes +=
+           HILO_64_REGPAIR(ustats.rcv_mcast_bytes);
+       p_stats->common.rx_bcast_bytes +=
+           HILO_64_REGPAIR(ustats.rcv_bcast_bytes);
+       p_stats->common.rx_ucast_pkts += HILO_64_REGPAIR(ustats.rcv_ucast_pkts);
+       p_stats->common.rx_mcast_pkts += HILO_64_REGPAIR(ustats.rcv_mcast_pkts);
+       p_stats->common.rx_bcast_pkts += HILO_64_REGPAIR(ustats.rcv_bcast_pkts);
 }
 
 static void __qed_get_vport_mstats_addrlen(struct qed_hwfn *p_hwfn,
@@ -1578,23 +1585,26 @@ static void __qed_get_vport_mstats(struct qed_hwfn *p_hwfn,
        memset(&mstats, 0, sizeof(mstats));
        qed_memcpy_from(p_hwfn, p_ptt, &mstats, mstats_addr, mstats_len);
 
-       p_stats->no_buff_discards += HILO_64_REGPAIR(mstats.no_buff_discard);
-       p_stats->packet_too_big_discard +=
-               HILO_64_REGPAIR(mstats.packet_too_big_discard);
-       p_stats->ttl0_discard += HILO_64_REGPAIR(mstats.ttl0_discard);
-       p_stats->tpa_coalesced_pkts +=
-               HILO_64_REGPAIR(mstats.tpa_coalesced_pkts);
-       p_stats->tpa_coalesced_events +=
-               HILO_64_REGPAIR(mstats.tpa_coalesced_events);
-       p_stats->tpa_aborts_num += HILO_64_REGPAIR(mstats.tpa_aborts_num);
-       p_stats->tpa_coalesced_bytes +=
-               HILO_64_REGPAIR(mstats.tpa_coalesced_bytes);
+       p_stats->common.no_buff_discards +=
+           HILO_64_REGPAIR(mstats.no_buff_discard);
+       p_stats->common.packet_too_big_discard +=
+           HILO_64_REGPAIR(mstats.packet_too_big_discard);
+       p_stats->common.ttl0_discard += HILO_64_REGPAIR(mstats.ttl0_discard);
+       p_stats->common.tpa_coalesced_pkts +=
+           HILO_64_REGPAIR(mstats.tpa_coalesced_pkts);
+       p_stats->common.tpa_coalesced_events +=
+           HILO_64_REGPAIR(mstats.tpa_coalesced_events);
+       p_stats->common.tpa_aborts_num +=
+           HILO_64_REGPAIR(mstats.tpa_aborts_num);
+       p_stats->common.tpa_coalesced_bytes +=
+           HILO_64_REGPAIR(mstats.tpa_coalesced_bytes);
 }
 
 static void __qed_get_vport_port_stats(struct qed_hwfn *p_hwfn,
                                       struct qed_ptt *p_ptt,
                                       struct qed_eth_stats *p_stats)
 {
+       struct qed_eth_stats_common *p_common = &p_stats->common;
        struct port_stats port_stats;
        int j;
 
@@ -1605,54 +1615,75 @@ static void __qed_get_vport_port_stats(struct qed_hwfn *p_hwfn,
                        offsetof(struct public_port, stats),
                        sizeof(port_stats));
 
-       p_stats->rx_64_byte_packets             += port_stats.eth.r64;
-       p_stats->rx_65_to_127_byte_packets      += port_stats.eth.r127;
-       p_stats->rx_128_to_255_byte_packets     += port_stats.eth.r255;
-       p_stats->rx_256_to_511_byte_packets     += port_stats.eth.r511;
-       p_stats->rx_512_to_1023_byte_packets    += port_stats.eth.r1023;
-       p_stats->rx_1024_to_1518_byte_packets   += port_stats.eth.r1518;
-       p_stats->rx_1519_to_1522_byte_packets   += port_stats.eth.r1522;
-       p_stats->rx_1519_to_2047_byte_packets   += port_stats.eth.r2047;
-       p_stats->rx_2048_to_4095_byte_packets   += port_stats.eth.r4095;
-       p_stats->rx_4096_to_9216_byte_packets   += port_stats.eth.r9216;
-       p_stats->rx_9217_to_16383_byte_packets  += port_stats.eth.r16383;
-       p_stats->rx_crc_errors                  += port_stats.eth.rfcs;
-       p_stats->rx_mac_crtl_frames             += port_stats.eth.rxcf;
-       p_stats->rx_pause_frames                += port_stats.eth.rxpf;
-       p_stats->rx_pfc_frames                  += port_stats.eth.rxpp;
-       p_stats->rx_align_errors                += port_stats.eth.raln;
-       p_stats->rx_carrier_errors              += port_stats.eth.rfcr;
-       p_stats->rx_oversize_packets            += port_stats.eth.rovr;
-       p_stats->rx_jabbers                     += port_stats.eth.rjbr;
-       p_stats->rx_undersize_packets           += port_stats.eth.rund;
-       p_stats->rx_fragments                   += port_stats.eth.rfrg;
-       p_stats->tx_64_byte_packets             += port_stats.eth.t64;
-       p_stats->tx_65_to_127_byte_packets      += port_stats.eth.t127;
-       p_stats->tx_128_to_255_byte_packets     += port_stats.eth.t255;
-       p_stats->tx_256_to_511_byte_packets     += port_stats.eth.t511;
-       p_stats->tx_512_to_1023_byte_packets    += port_stats.eth.t1023;
-       p_stats->tx_1024_to_1518_byte_packets   += port_stats.eth.t1518;
-       p_stats->tx_1519_to_2047_byte_packets   += port_stats.eth.t2047;
-       p_stats->tx_2048_to_4095_byte_packets   += port_stats.eth.t4095;
-       p_stats->tx_4096_to_9216_byte_packets   += port_stats.eth.t9216;
-       p_stats->tx_9217_to_16383_byte_packets  += port_stats.eth.t16383;
-       p_stats->tx_pause_frames                += port_stats.eth.txpf;
-       p_stats->tx_pfc_frames                  += port_stats.eth.txpp;
-       p_stats->tx_lpi_entry_count             += port_stats.eth.tlpiec;
-       p_stats->tx_total_collisions            += port_stats.eth.tncl;
-       p_stats->rx_mac_bytes                   += port_stats.eth.rbyte;
-       p_stats->rx_mac_uc_packets              += port_stats.eth.rxuca;
-       p_stats->rx_mac_mc_packets              += port_stats.eth.rxmca;
-       p_stats->rx_mac_bc_packets              += port_stats.eth.rxbca;
-       p_stats->rx_mac_frames_ok               += port_stats.eth.rxpok;
-       p_stats->tx_mac_bytes                   += port_stats.eth.tbyte;
-       p_stats->tx_mac_uc_packets              += port_stats.eth.txuca;
-       p_stats->tx_mac_mc_packets              += port_stats.eth.txmca;
-       p_stats->tx_mac_bc_packets              += port_stats.eth.txbca;
-       p_stats->tx_mac_ctrl_frames             += port_stats.eth.txcf;
+       p_common->rx_64_byte_packets += port_stats.eth.r64;
+       p_common->rx_65_to_127_byte_packets += port_stats.eth.r127;
+       p_common->rx_128_to_255_byte_packets += port_stats.eth.r255;
+       p_common->rx_256_to_511_byte_packets += port_stats.eth.r511;
+       p_common->rx_512_to_1023_byte_packets += port_stats.eth.r1023;
+       p_common->rx_1024_to_1518_byte_packets += port_stats.eth.r1518;
+       p_common->rx_crc_errors += port_stats.eth.rfcs;
+       p_common->rx_mac_crtl_frames += port_stats.eth.rxcf;
+       p_common->rx_pause_frames += port_stats.eth.rxpf;
+       p_common->rx_pfc_frames += port_stats.eth.rxpp;
+       p_common->rx_align_errors += port_stats.eth.raln;
+       p_common->rx_carrier_errors += port_stats.eth.rfcr;
+       p_common->rx_oversize_packets += port_stats.eth.rovr;
+       p_common->rx_jabbers += port_stats.eth.rjbr;
+       p_common->rx_undersize_packets += port_stats.eth.rund;
+       p_common->rx_fragments += port_stats.eth.rfrg;
+       p_common->tx_64_byte_packets += port_stats.eth.t64;
+       p_common->tx_65_to_127_byte_packets += port_stats.eth.t127;
+       p_common->tx_128_to_255_byte_packets += port_stats.eth.t255;
+       p_common->tx_256_to_511_byte_packets += port_stats.eth.t511;
+       p_common->tx_512_to_1023_byte_packets += port_stats.eth.t1023;
+       p_common->tx_1024_to_1518_byte_packets += port_stats.eth.t1518;
+       p_common->tx_pause_frames += port_stats.eth.txpf;
+       p_common->tx_pfc_frames += port_stats.eth.txpp;
+       p_common->rx_mac_bytes += port_stats.eth.rbyte;
+       p_common->rx_mac_uc_packets += port_stats.eth.rxuca;
+       p_common->rx_mac_mc_packets += port_stats.eth.rxmca;
+       p_common->rx_mac_bc_packets += port_stats.eth.rxbca;
+       p_common->rx_mac_frames_ok += port_stats.eth.rxpok;
+       p_common->tx_mac_bytes += port_stats.eth.tbyte;
+       p_common->tx_mac_uc_packets += port_stats.eth.txuca;
+       p_common->tx_mac_mc_packets += port_stats.eth.txmca;
+       p_common->tx_mac_bc_packets += port_stats.eth.txbca;
+       p_common->tx_mac_ctrl_frames += port_stats.eth.txcf;
        for (j = 0; j < 8; j++) {
-               p_stats->brb_truncates  += port_stats.brb.brb_truncate[j];
-               p_stats->brb_discards   += port_stats.brb.brb_discard[j];
+               p_common->brb_truncates += port_stats.brb.brb_truncate[j];
+               p_common->brb_discards += port_stats.brb.brb_discard[j];
+       }
+
+       if (QED_IS_BB(p_hwfn->cdev)) {
+               struct qed_eth_stats_bb *p_bb = &p_stats->bb;
+
+               p_bb->rx_1519_to_1522_byte_packets +=
+                   port_stats.eth.u0.bb0.r1522;
+               p_bb->rx_1519_to_2047_byte_packets +=
+                   port_stats.eth.u0.bb0.r2047;
+               p_bb->rx_2048_to_4095_byte_packets +=
+                   port_stats.eth.u0.bb0.r4095;
+               p_bb->rx_4096_to_9216_byte_packets +=
+                   port_stats.eth.u0.bb0.r9216;
+               p_bb->rx_9217_to_16383_byte_packets +=
+                   port_stats.eth.u0.bb0.r16383;
+               p_bb->tx_1519_to_2047_byte_packets +=
+                   port_stats.eth.u1.bb1.t2047;
+               p_bb->tx_2048_to_4095_byte_packets +=
+                   port_stats.eth.u1.bb1.t4095;
+               p_bb->tx_4096_to_9216_byte_packets +=
+                   port_stats.eth.u1.bb1.t9216;
+               p_bb->tx_9217_to_16383_byte_packets +=
+                   port_stats.eth.u1.bb1.t16383;
+               p_bb->tx_lpi_entry_count += port_stats.eth.u2.bb2.tlpiec;
+               p_bb->tx_total_collisions += port_stats.eth.u2.bb2.tncl;
+       } else {
+               struct qed_eth_stats_ah *p_ah = &p_stats->ah;
+
+               p_ah->rx_1519_to_max_byte_packets +=
+                   port_stats.eth.u0.ah0.r1519_to_max;
+               p_ah->tx_1519_to_max_byte_packets =
+                   port_stats.eth.u1.ah1.t1519_to_max;
        }
 }
 
index 0d3cef409c96d0849c7860e8f03a920b3b8966f1..708c601e8ccf8674084979f879a0fa4f8c76cc23 100644 (file)
@@ -597,7 +597,7 @@ static u8 qed_ll2_convert_rx_parse_to_tx_flags(u16 parse_flags)
        u8 bd_flags = 0;
 
        if (GET_FIELD(parse_flags, PARSING_AND_ERR_FLAGS_TAG8021QEXIST))
-               SET_FIELD(bd_flags, CORE_TX_BD_FLAGS_VLAN_INSERTION, 1);
+               SET_FIELD(bd_flags, CORE_TX_BD_DATA_VLAN_INSERTION, 1);
 
        return bd_flags;
 }
@@ -758,8 +758,8 @@ qed_ooo_submit_tx_buffers(struct qed_hwfn *p_hwfn,
                             p_buffer->placement_offset;
                parse_flags = p_buffer->parse_flags;
                bd_flags = qed_ll2_convert_rx_parse_to_tx_flags(parse_flags);
-               SET_FIELD(bd_flags, CORE_TX_BD_FLAGS_FORCE_VLAN_MODE, 1);
-               SET_FIELD(bd_flags, CORE_TX_BD_FLAGS_L4_PROTOCOL, 1);
+               SET_FIELD(bd_flags, CORE_TX_BD_DATA_FORCE_VLAN_MODE, 1);
+               SET_FIELD(bd_flags, CORE_TX_BD_DATA_L4_PROTOCOL, 1);
 
                rc = qed_ll2_prepare_tx_packet(p_hwfn, p_ll2_conn->my_id, 1,
                                               p_buffer->vlan, bd_flags,
@@ -1090,7 +1090,6 @@ static int qed_sp_ll2_tx_queue_start(struct qed_hwfn *p_hwfn,
        struct core_tx_start_ramrod_data *p_ramrod = NULL;
        struct qed_spq_entry *p_ent = NULL;
        struct qed_sp_init_data init_data;
-       union qed_qm_pq_params pq_params;
        u16 pq_id = 0, pbl_size;
        int rc = -EINVAL;
 
@@ -1127,9 +1126,17 @@ static int qed_sp_ll2_tx_queue_start(struct qed_hwfn *p_hwfn,
        pbl_size = qed_chain_get_page_cnt(&p_tx->txq_chain);
        p_ramrod->pbl_size = cpu_to_le16(pbl_size);
 
-       memset(&pq_params, 0, sizeof(pq_params));
-       pq_params.core.tc = p_ll2_conn->conn.tx_tc;
-       pq_id = qed_get_qm_pq(p_hwfn, PROTOCOLID_CORE, &pq_params);
+       switch (p_ll2_conn->conn.tx_tc) {
+       case LB_TC:
+               pq_id = qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_LB);
+               break;
+       case OOO_LB_TC:
+               pq_id = qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_OOO);
+       default:
+               pq_id = qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_OFLD);
+               break;
+       }
+
        p_ramrod->qm_pq_id = cpu_to_le16(pq_id);
 
        switch (conn_type) {
@@ -1591,33 +1598,34 @@ static void qed_ll2_prepare_tx_packet_set(struct qed_hwfn *p_hwfn,
        p_tx->cur_send_frag_num++;
 }
 
-static void qed_ll2_prepare_tx_packet_set_bd(struct qed_hwfn *p_hwfn,
-                                            struct qed_ll2_info *p_ll2,
-                                            struct qed_ll2_tx_packet *p_curp,
-                                            u8 num_of_bds,
-                                            enum core_tx_dest tx_dest,
-                                            u16 vlan,
-                                            u8 bd_flags,
-                                            u16 l4_hdr_offset_w,
-                                            enum core_roce_flavor_type type,
-                                            dma_addr_t first_frag,
-                                            u16 first_frag_len)
+static void
+qed_ll2_prepare_tx_packet_set_bd(struct qed_hwfn *p_hwfn,
+                                struct qed_ll2_info *p_ll2,
+                                struct qed_ll2_tx_packet *p_curp,
+                                u8 num_of_bds,
+                                enum core_tx_dest tx_dest,
+                                u16 vlan,
+                                u8 bd_flags,
+                                u16 l4_hdr_offset_w,
+                                enum core_roce_flavor_type roce_flavor,
+                                dma_addr_t first_frag,
+                                u16 first_frag_len)
 {
        struct qed_chain *p_tx_chain = &p_ll2->tx_queue.txq_chain;
        u16 prod_idx = qed_chain_get_prod_idx(p_tx_chain);
        struct core_tx_bd *start_bd = NULL;
-       u16 frag_idx;
+       u16 bd_data = 0, frag_idx;
 
        start_bd = (struct core_tx_bd *)qed_chain_produce(p_tx_chain);
        start_bd->nw_vlan_or_lb_echo = cpu_to_le16(vlan);
        SET_FIELD(start_bd->bitfield1, CORE_TX_BD_L4_HDR_OFFSET_W,
                  cpu_to_le16(l4_hdr_offset_w));
        SET_FIELD(start_bd->bitfield1, CORE_TX_BD_TX_DST, tx_dest);
-       start_bd->bd_flags.as_bitfield = bd_flags;
-       start_bd->bd_flags.as_bitfield |= CORE_TX_BD_FLAGS_START_BD_MASK <<
-           CORE_TX_BD_FLAGS_START_BD_SHIFT;
-       SET_FIELD(start_bd->bitfield0, CORE_TX_BD_NBDS, num_of_bds);
-       SET_FIELD(start_bd->bitfield0, CORE_TX_BD_ROCE_FLAV, type);
+       bd_data |= bd_flags;
+       SET_FIELD(bd_data, CORE_TX_BD_DATA_START_BD, 0x1);
+       SET_FIELD(bd_data, CORE_TX_BD_DATA_NBDS, num_of_bds);
+       SET_FIELD(bd_data, CORE_TX_BD_DATA_ROCE_FLAV, roce_flavor);
+       start_bd->bd_data.as_bitfield = cpu_to_le16(bd_data);
        DMA_REGPAIR_LE(start_bd->addr, first_frag);
        start_bd->nbytes = cpu_to_le16(first_frag_len);
 
@@ -1642,9 +1650,8 @@ static void qed_ll2_prepare_tx_packet_set_bd(struct qed_hwfn *p_hwfn,
                struct core_tx_bd **p_bd = &p_curp->bds_set[frag_idx].txq_bd;
 
                *p_bd = (struct core_tx_bd *)qed_chain_produce(p_tx_chain);
-               (*p_bd)->bd_flags.as_bitfield = 0;
+               (*p_bd)->bd_data.as_bitfield = 0;
                (*p_bd)->bitfield1 = 0;
-               (*p_bd)->bitfield0 = 0;
                p_curp->bds_set[frag_idx].tx_frag = 0;
                p_curp->bds_set[frag_idx].frag_len = 0;
        }
@@ -2241,11 +2248,11 @@ static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb)
        /* Request HW to calculate IP csum */
        if (!((vlan_get_protocol(skb) == htons(ETH_P_IPV6)) &&
              ipv6_hdr(skb)->nexthdr == NEXTHDR_IPV6))
-               flags |= BIT(CORE_TX_BD_FLAGS_IP_CSUM_SHIFT);
+               flags |= BIT(CORE_TX_BD_DATA_IP_CSUM_SHIFT);
 
        if (skb_vlan_tag_present(skb)) {
                vlan = skb_vlan_tag_get(skb);
-               flags |= BIT(CORE_TX_BD_FLAGS_VLAN_INSERTION_SHIFT);
+               flags |= BIT(CORE_TX_BD_DATA_VLAN_INSERTION_SHIFT);
        }
 
        rc = qed_ll2_prepare_tx_packet(QED_LEADING_HWFN(cdev),
index eef30a598b408e5ded4109860a0d4a37362fbaee..634e7a2433a90331abfa41ec33ab9040927cb664 100644 (file)
@@ -45,6 +45,7 @@
 #include <linux/ethtool.h>
 #include <linux/etherdevice.h>
 #include <linux/vmalloc.h>
+#include <linux/crash_dump.h>
 #include <linux/qed/qed_if.h>
 #include <linux/qed/qed_ll2_if.h>
 
@@ -238,6 +239,7 @@ int qed_fill_dev_info(struct qed_dev *cdev,
        dev_info->rdma_supported = (cdev->hwfns[0].hw_info.personality ==
                                    QED_PCI_ETH_ROCE);
        dev_info->is_mf_default = IS_MF_DEFAULT(&cdev->hwfns[0]);
+       dev_info->dev_type = cdev->type;
        ether_addr_copy(dev_info->hw_mac, cdev->hwfns[0].hw_info.hw_mac_addr);
 
        if (IS_PF(cdev)) {
@@ -588,6 +590,19 @@ int qed_slowpath_irq_req(struct qed_hwfn *hwfn)
        return rc;
 }
 
+void qed_slowpath_irq_sync(struct qed_hwfn *p_hwfn)
+{
+       struct qed_dev *cdev = p_hwfn->cdev;
+       u8 id = p_hwfn->my_id;
+       u32 int_mode;
+
+       int_mode = cdev->int_params.out.int_mode;
+       if (int_mode == QED_INT_MODE_MSIX)
+               synchronize_irq(cdev->int_params.msix_table[id].vector);
+       else
+               synchronize_irq(cdev->pdev->irq);
+}
+
 static void qed_slowpath_irq_free(struct qed_dev *cdev)
 {
        int i;
@@ -630,19 +645,6 @@ static int qed_nic_stop(struct qed_dev *cdev)
        return rc;
 }
 
-static int qed_nic_reset(struct qed_dev *cdev)
-{
-       int rc;
-
-       rc = qed_hw_reset(cdev);
-       if (rc)
-               return rc;
-
-       qed_resc_free(cdev);
-
-       return 0;
-}
-
 static int qed_nic_setup(struct qed_dev *cdev)
 {
        int rc, i;
@@ -875,7 +877,6 @@ static void qed_update_pf_params(struct qed_dev *cdev,
                params->rdma_pf_params.num_qps = QED_ROCE_QPS;
                params->rdma_pf_params.min_dpis = QED_ROCE_DPIS;
                /* divide by 3 the MRs to avoid MF ILT overflow */
-               params->rdma_pf_params.num_mrs = RDMA_MAX_TIDS;
                params->rdma_pf_params.gl_pi = QED_ROCE_PROTOCOL_INDEX;
        }
 
@@ -900,6 +901,8 @@ static void qed_update_pf_params(struct qed_dev *cdev,
 static int qed_slowpath_start(struct qed_dev *cdev,
                              struct qed_slowpath_params *params)
 {
+       struct qed_drv_load_params drv_load_params;
+       struct qed_hw_init_params hw_init_params;
        struct qed_tunn_start_params tunn_info;
        struct qed_mcp_drv_version drv_version;
        const u8 *data = NULL;
@@ -965,9 +968,21 @@ static int qed_slowpath_start(struct qed_dev *cdev,
        tunn_info.tunn_clss_ipgre = QED_TUNN_CLSS_MAC_VLAN;
 
        /* Start the slowpath */
-       rc = qed_hw_init(cdev, &tunn_info, true,
-                        cdev->int_params.out.int_mode,
-                        true, data);
+       memset(&hw_init_params, 0, sizeof(hw_init_params));
+       hw_init_params.p_tunn = &tunn_info;
+       hw_init_params.b_hw_start = true;
+       hw_init_params.int_mode = cdev->int_params.out.int_mode;
+       hw_init_params.allow_npar_tx_switch = true;
+       hw_init_params.bin_fw_data = data;
+
+       memset(&drv_load_params, 0, sizeof(drv_load_params));
+       drv_load_params.is_crash_kernel = is_kdump_kernel();
+       drv_load_params.mfw_timeout_val = QED_LOAD_REQ_LOCK_TO_DEFAULT;
+       drv_load_params.avoid_eng_reset = false;
+       drv_load_params.override_force_load = QED_OVERRIDE_FORCE_LOAD_NONE;
+       hw_init_params.p_drv_load_params = &drv_load_params;
+
+       rc = qed_hw_init(cdev, &hw_init_params);
        if (rc)
                goto err2;
 
@@ -1042,7 +1057,8 @@ static int qed_slowpath_stop(struct qed_dev *cdev)
        }
 
        qed_disable_msix(cdev);
-       qed_nic_reset(cdev);
+
+       qed_resc_free(cdev);
 
        qed_iov_wq_stop(cdev, true);
 
@@ -1653,8 +1669,10 @@ void qed_get_protocol_stats(struct qed_dev *cdev,
        switch (type) {
        case QED_MCP_LAN_STATS:
                qed_get_vport_stats(cdev, &eth_stats);
-               stats->lan_stats.ucast_rx_pkts = eth_stats.rx_ucast_pkts;
-               stats->lan_stats.ucast_tx_pkts = eth_stats.tx_ucast_pkts;
+               stats->lan_stats.ucast_rx_pkts =
+                                       eth_stats.common.rx_ucast_pkts;
+               stats->lan_stats.ucast_tx_pkts =
+                                       eth_stats.common.tx_ucast_pkts;
                stats->lan_stats.fcs_err = -1;
                break;
        case QED_MCP_FCOE_STATS:
index 87fde205149fdbf3181befd79ca62508b2daa388..619eac845028d99db205acc51cbf09da3c3a679d 100644 (file)
@@ -111,12 +111,71 @@ void qed_mcp_read_mb(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
        }
 }
 
+struct qed_mcp_cmd_elem {
+       struct list_head list;
+       struct qed_mcp_mb_params *p_mb_params;
+       u16 expected_seq_num;
+       bool b_is_completed;
+};
+
+/* Must be called while cmd_lock is acquired */
+static struct qed_mcp_cmd_elem *
+qed_mcp_cmd_add_elem(struct qed_hwfn *p_hwfn,
+                    struct qed_mcp_mb_params *p_mb_params,
+                    u16 expected_seq_num)
+{
+       struct qed_mcp_cmd_elem *p_cmd_elem = NULL;
+
+       p_cmd_elem = kzalloc(sizeof(*p_cmd_elem), GFP_ATOMIC);
+       if (!p_cmd_elem)
+               goto out;
+
+       p_cmd_elem->p_mb_params = p_mb_params;
+       p_cmd_elem->expected_seq_num = expected_seq_num;
+       list_add(&p_cmd_elem->list, &p_hwfn->mcp_info->cmd_list);
+out:
+       return p_cmd_elem;
+}
+
+/* Must be called while cmd_lock is acquired */
+static void qed_mcp_cmd_del_elem(struct qed_hwfn *p_hwfn,
+                                struct qed_mcp_cmd_elem *p_cmd_elem)
+{
+       list_del(&p_cmd_elem->list);
+       kfree(p_cmd_elem);
+}
+
+/* Must be called while cmd_lock is acquired */
+static struct qed_mcp_cmd_elem *qed_mcp_cmd_get_elem(struct qed_hwfn *p_hwfn,
+                                                    u16 seq_num)
+{
+       struct qed_mcp_cmd_elem *p_cmd_elem = NULL;
+
+       list_for_each_entry(p_cmd_elem, &p_hwfn->mcp_info->cmd_list, list) {
+               if (p_cmd_elem->expected_seq_num == seq_num)
+                       return p_cmd_elem;
+       }
+
+       return NULL;
+}
+
 int qed_mcp_free(struct qed_hwfn *p_hwfn)
 {
        if (p_hwfn->mcp_info) {
+               struct qed_mcp_cmd_elem *p_cmd_elem, *p_tmp;
+
                kfree(p_hwfn->mcp_info->mfw_mb_cur);
                kfree(p_hwfn->mcp_info->mfw_mb_shadow);
+
+               spin_lock_bh(&p_hwfn->mcp_info->cmd_lock);
+               list_for_each_entry_safe(p_cmd_elem,
+                                        p_tmp,
+                                        &p_hwfn->mcp_info->cmd_list, list) {
+                       qed_mcp_cmd_del_elem(p_hwfn, p_cmd_elem);
+               }
+               spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock);
        }
+
        kfree(p_hwfn->mcp_info);
 
        return 0;
@@ -160,7 +219,7 @@ static int qed_load_mcp_offsets(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
        p_info->drv_pulse_seq = DRV_MB_RD(p_hwfn, p_ptt, drv_pulse_mb) &
                                DRV_PULSE_SEQ_MASK;
 
-       p_info->mcp_hist = (u16)qed_rd(p_hwfn, p_ptt, MISCS_REG_GENERIC_POR_0);
+       p_info->mcp_hist = qed_rd(p_hwfn, p_ptt, MISCS_REG_GENERIC_POR_0);
 
        return 0;
 }
@@ -176,6 +235,12 @@ int qed_mcp_cmd_init(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
                goto err;
        p_info = p_hwfn->mcp_info;
 
+       /* Initialize the MFW spinlock */
+       spin_lock_init(&p_info->cmd_lock);
+       spin_lock_init(&p_info->link_lock);
+
+       INIT_LIST_HEAD(&p_info->cmd_list);
+
        if (qed_load_mcp_offsets(p_hwfn, p_ptt) != 0) {
                DP_NOTICE(p_hwfn, "MCP is not initialized\n");
                /* Do not free mcp_info here, since public_base indicate that
@@ -190,10 +255,6 @@ int qed_mcp_cmd_init(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
        if (!p_info->mfw_mb_shadow || !p_info->mfw_mb_addr)
                goto err;
 
-       /* Initialize the MFW spinlock */
-       spin_lock_init(&p_info->lock);
-       spin_lock_init(&p_info->link_lock);
-
        return 0;
 
 err:
@@ -201,68 +262,39 @@ err:
        return -ENOMEM;
 }
 
-/* Locks the MFW mailbox of a PF to ensure a single access.
- * The lock is achieved in most cases by holding a spinlock, causing other
- * threads to wait till a previous access is done.
- * In some cases (currently when a [UN]LOAD_REQ commands are sent), the single
- * access is achieved by setting a blocking flag, which will fail other
- * competing contexts to send their mailboxes.
- */
-static int qed_mcp_mb_lock(struct qed_hwfn *p_hwfn, u32 cmd)
+static void qed_mcp_reread_offsets(struct qed_hwfn *p_hwfn,
+                                  struct qed_ptt *p_ptt)
 {
-       spin_lock_bh(&p_hwfn->mcp_info->lock);
+       u32 generic_por_0 = qed_rd(p_hwfn, p_ptt, MISCS_REG_GENERIC_POR_0);
 
-       /* The spinlock shouldn't be acquired when the mailbox command is
-        * [UN]LOAD_REQ, since the engine is locked by the MFW, and a parallel
-        * pending [UN]LOAD_REQ command of another PF together with a spinlock
-        * (i.e. interrupts are disabled) - can lead to a deadlock.
-        * It is assumed that for a single PF, no other mailbox commands can be
-        * sent from another context while sending LOAD_REQ, and that any
-        * parallel commands to UNLOAD_REQ can be cancelled.
+       /* Use MCP history register to check if MCP reset occurred between init
+        * time and now.
         */
-       if (cmd == DRV_MSG_CODE_LOAD_DONE || cmd == DRV_MSG_CODE_UNLOAD_DONE)
-               p_hwfn->mcp_info->block_mb_sending = false;
-
-       if (p_hwfn->mcp_info->block_mb_sending) {
-               DP_NOTICE(p_hwfn,
-                         "Trying to send a MFW mailbox command [0x%x] in parallel to [UN]LOAD_REQ. Aborting.\n",
-                         cmd);
-               spin_unlock_bh(&p_hwfn->mcp_info->lock);
-               return -EBUSY;
-       }
+       if (p_hwfn->mcp_info->mcp_hist != generic_por_0) {
+               DP_VERBOSE(p_hwfn,
+                          QED_MSG_SP,
+                          "Rereading MCP offsets [mcp_hist 0x%08x, generic_por_0 0x%08x]\n",
+                          p_hwfn->mcp_info->mcp_hist, generic_por_0);
 
-       if (cmd == DRV_MSG_CODE_LOAD_REQ || cmd == DRV_MSG_CODE_UNLOAD_REQ) {
-               p_hwfn->mcp_info->block_mb_sending = true;
-               spin_unlock_bh(&p_hwfn->mcp_info->lock);
+               qed_load_mcp_offsets(p_hwfn, p_ptt);
+               qed_mcp_cmd_port_init(p_hwfn, p_ptt);
        }
-
-       return 0;
-}
-
-static void qed_mcp_mb_unlock(struct qed_hwfn *p_hwfn, u32 cmd)
-{
-       if (cmd != DRV_MSG_CODE_LOAD_REQ && cmd != DRV_MSG_CODE_UNLOAD_REQ)
-               spin_unlock_bh(&p_hwfn->mcp_info->lock);
 }
 
 int qed_mcp_reset(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
-       u32 seq = ++p_hwfn->mcp_info->drv_mb_seq;
-       u8 delay = CHIP_MCP_RESP_ITER_US;
-       u32 org_mcp_reset_seq, cnt = 0;
+       u32 org_mcp_reset_seq, seq, delay = CHIP_MCP_RESP_ITER_US, cnt = 0;
        int rc = 0;
 
-       /* Ensure that only a single thread is accessing the mailbox at a
-        * certain time.
-        */
-       rc = qed_mcp_mb_lock(p_hwfn, DRV_MSG_CODE_MCP_RESET);
-       if (rc != 0)
-               return rc;
+       /* Ensure that only a single thread is accessing the mailbox */
+       spin_lock_bh(&p_hwfn->mcp_info->cmd_lock);
 
-       /* Set drv command along with the updated sequence */
        org_mcp_reset_seq = qed_rd(p_hwfn, p_ptt, MISCS_REG_GENERIC_POR_0);
-       DRV_MB_WR(p_hwfn, p_ptt, drv_mb_header,
-                 (DRV_MSG_CODE_MCP_RESET | seq));
+
+       /* Set drv command along with the updated sequence */
+       qed_mcp_reread_offsets(p_hwfn, p_ptt);
+       seq = ++p_hwfn->mcp_info->drv_mb_seq;
+       DRV_MB_WR(p_hwfn, p_ptt, drv_mb_header, (DRV_MSG_CODE_MCP_RESET | seq));
 
        do {
                /* Wait for MFW response */
@@ -281,72 +313,205 @@ int qed_mcp_reset(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
                rc = -EAGAIN;
        }
 
-       qed_mcp_mb_unlock(p_hwfn, DRV_MSG_CODE_MCP_RESET);
+       spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock);
 
        return rc;
 }
 
-static int qed_do_mcp_cmd(struct qed_hwfn *p_hwfn,
-                         struct qed_ptt *p_ptt,
-                         u32 cmd,
-                         u32 param,
-                         u32 *o_mcp_resp,
-                         u32 *o_mcp_param)
+/* Must be called while cmd_lock is acquired */
+static bool qed_mcp_has_pending_cmd(struct qed_hwfn *p_hwfn)
 {
-       u8 delay = CHIP_MCP_RESP_ITER_US;
-       u32 seq, cnt = 1, actual_mb_seq;
-       int rc = 0;
-
-       /* Get actual driver mailbox sequence */
-       actual_mb_seq = DRV_MB_RD(p_hwfn, p_ptt, drv_mb_header) &
-                       DRV_MSG_SEQ_NUMBER_MASK;
+       struct qed_mcp_cmd_elem *p_cmd_elem;
 
-       /* Use MCP history register to check if MCP reset occurred between
-        * init time and now.
+       /* There is at most one pending command at a certain time, and if it
+        * exists - it is placed at the HEAD of the list.
         */
-       if (p_hwfn->mcp_info->mcp_hist !=
-           qed_rd(p_hwfn, p_ptt, MISCS_REG_GENERIC_POR_0)) {
-               DP_VERBOSE(p_hwfn, QED_MSG_SP, "Rereading MCP offsets\n");
-               qed_load_mcp_offsets(p_hwfn, p_ptt);
-               qed_mcp_cmd_port_init(p_hwfn, p_ptt);
+       if (!list_empty(&p_hwfn->mcp_info->cmd_list)) {
+               p_cmd_elem = list_first_entry(&p_hwfn->mcp_info->cmd_list,
+                                             struct qed_mcp_cmd_elem, list);
+               return !p_cmd_elem->b_is_completed;
        }
-       seq = ++p_hwfn->mcp_info->drv_mb_seq;
 
-       /* Set drv param */
-       DRV_MB_WR(p_hwfn, p_ptt, drv_mb_param, param);
+       return false;
+}
 
-       /* Set drv command along with the updated sequence */
-       DRV_MB_WR(p_hwfn, p_ptt, drv_mb_header, (cmd | seq));
+/* Must be called while cmd_lock is acquired */
+static int
+qed_mcp_update_pending_cmd(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+       struct qed_mcp_mb_params *p_mb_params;
+       struct qed_mcp_cmd_elem *p_cmd_elem;
+       u32 mcp_resp;
+       u16 seq_num;
+
+       mcp_resp = DRV_MB_RD(p_hwfn, p_ptt, fw_mb_header);
+       seq_num = (u16)(mcp_resp & FW_MSG_SEQ_NUMBER_MASK);
+
+       /* Return if no new non-handled response has been received */
+       if (seq_num != p_hwfn->mcp_info->drv_mb_seq)
+               return -EAGAIN;
+
+       p_cmd_elem = qed_mcp_cmd_get_elem(p_hwfn, seq_num);
+       if (!p_cmd_elem) {
+               DP_ERR(p_hwfn,
+                      "Failed to find a pending mailbox cmd that expects sequence number %d\n",
+                      seq_num);
+               return -EINVAL;
+       }
+
+       p_mb_params = p_cmd_elem->p_mb_params;
+
+       /* Get the MFW response along with the sequence number */
+       p_mb_params->mcp_resp = mcp_resp;
+
+       /* Get the MFW param */
+       p_mb_params->mcp_param = DRV_MB_RD(p_hwfn, p_ptt, fw_mb_param);
+
+       /* Get the union data */
+       if (p_mb_params->p_data_dst != NULL && p_mb_params->data_dst_size) {
+               u32 union_data_addr = p_hwfn->mcp_info->drv_mb_addr +
+                                     offsetof(struct public_drv_mb,
+                                              union_data);
+               qed_memcpy_from(p_hwfn, p_ptt, p_mb_params->p_data_dst,
+                               union_data_addr, p_mb_params->data_dst_size);
+       }
+
+       p_cmd_elem->b_is_completed = true;
+
+       return 0;
+}
+
+/* Must be called while cmd_lock is acquired */
+static void __qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn,
+                                   struct qed_ptt *p_ptt,
+                                   struct qed_mcp_mb_params *p_mb_params,
+                                   u16 seq_num)
+{
+       union drv_union_data union_data;
+       u32 union_data_addr;
+
+       /* Set the union data */
+       union_data_addr = p_hwfn->mcp_info->drv_mb_addr +
+                         offsetof(struct public_drv_mb, union_data);
+       memset(&union_data, 0, sizeof(union_data));
+       if (p_mb_params->p_data_src != NULL && p_mb_params->data_src_size)
+               memcpy(&union_data, p_mb_params->p_data_src,
+                      p_mb_params->data_src_size);
+       qed_memcpy_to(p_hwfn, p_ptt, union_data_addr, &union_data,
+                     sizeof(union_data));
+
+       /* Set the drv param */
+       DRV_MB_WR(p_hwfn, p_ptt, drv_mb_param, p_mb_params->param);
+
+       /* Set the drv command along with the sequence number */
+       DRV_MB_WR(p_hwfn, p_ptt, drv_mb_header, (p_mb_params->cmd | seq_num));
 
        DP_VERBOSE(p_hwfn, QED_MSG_SP,
-                  "wrote command (%x) to MFW MB param 0x%08x\n",
-                  (cmd | seq), param);
+                  "MFW mailbox: command 0x%08x param 0x%08x\n",
+                  (p_mb_params->cmd | seq_num), p_mb_params->param);
+}
+
+static int
+_qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn,
+                      struct qed_ptt *p_ptt,
+                      struct qed_mcp_mb_params *p_mb_params,
+                      u32 max_retries, u32 delay)
+{
+       struct qed_mcp_cmd_elem *p_cmd_elem;
+       u32 cnt = 0;
+       u16 seq_num;
+       int rc = 0;
 
+       /* Wait until the mailbox is non-occupied */
        do {
-               /* Wait for MFW response */
+               /* Exit the loop if there is no pending command, or if the
+                * pending command is completed during this iteration.
+                * The spinlock stays locked until the command is sent.
+                */
+
+               spin_lock_bh(&p_hwfn->mcp_info->cmd_lock);
+
+               if (!qed_mcp_has_pending_cmd(p_hwfn))
+                       break;
+
+               rc = qed_mcp_update_pending_cmd(p_hwfn, p_ptt);
+               if (!rc)
+                       break;
+               else if (rc != -EAGAIN)
+                       goto err;
+
+               spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock);
+               udelay(delay);
+       } while (++cnt < max_retries);
+
+       if (cnt >= max_retries) {
+               DP_NOTICE(p_hwfn,
+                         "The MFW mailbox is occupied by an uncompleted command. Failed to send command 0x%08x [param 0x%08x].\n",
+                         p_mb_params->cmd, p_mb_params->param);
+               return -EAGAIN;
+       }
+
+       /* Send the mailbox command */
+       qed_mcp_reread_offsets(p_hwfn, p_ptt);
+       seq_num = ++p_hwfn->mcp_info->drv_mb_seq;
+       p_cmd_elem = qed_mcp_cmd_add_elem(p_hwfn, p_mb_params, seq_num);
+       if (!p_cmd_elem)
+               goto err;
+
+       __qed_mcp_cmd_and_union(p_hwfn, p_ptt, p_mb_params, seq_num);
+       spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock);
+
+       /* Wait for the MFW response */
+       do {
+               /* Exit the loop if the command is already completed, or if the
+                * command is completed during this iteration.
+                * The spinlock stays locked until the list element is removed.
+                */
+
                udelay(delay);
-               *o_mcp_resp = DRV_MB_RD(p_hwfn, p_ptt, fw_mb_header);
+               spin_lock_bh(&p_hwfn->mcp_info->cmd_lock);
 
-               /* Give the FW up to 5 second (500*10ms) */
-       } while ((seq != (*o_mcp_resp & FW_MSG_SEQ_NUMBER_MASK)) &&
-                (cnt++ < QED_DRV_MB_MAX_RETRIES));
+               if (p_cmd_elem->b_is_completed)
+                       break;
 
-       DP_VERBOSE(p_hwfn, QED_MSG_SP,
-                  "[after %d ms] read (%x) seq is (%x) from FW MB\n",
-                  cnt * delay, *o_mcp_resp, seq);
-
-       /* Is this a reply to our command? */
-       if (seq == (*o_mcp_resp & FW_MSG_SEQ_NUMBER_MASK)) {
-               *o_mcp_resp &= FW_MSG_CODE_MASK;
-               /* Get the MCP param */
-               *o_mcp_param = DRV_MB_RD(p_hwfn, p_ptt, fw_mb_param);
-       } else {
-               /* FW BUG! */
-               DP_ERR(p_hwfn, "MFW failed to respond [cmd 0x%x param 0x%x]\n",
-                      cmd, param);
-               *o_mcp_resp = 0;
-               rc = -EAGAIN;
+               rc = qed_mcp_update_pending_cmd(p_hwfn, p_ptt);
+               if (!rc)
+                       break;
+               else if (rc != -EAGAIN)
+                       goto err;
+
+               spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock);
+       } while (++cnt < max_retries);
+
+       if (cnt >= max_retries) {
+               DP_NOTICE(p_hwfn,
+                         "The MFW failed to respond to command 0x%08x [param 0x%08x].\n",
+                         p_mb_params->cmd, p_mb_params->param);
+
+               spin_lock_bh(&p_hwfn->mcp_info->cmd_lock);
+               qed_mcp_cmd_del_elem(p_hwfn, p_cmd_elem);
+               spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock);
+
+               return -EAGAIN;
        }
+
+       qed_mcp_cmd_del_elem(p_hwfn, p_cmd_elem);
+       spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock);
+
+       DP_VERBOSE(p_hwfn,
+                  QED_MSG_SP,
+                  "MFW mailbox: response 0x%08x param 0x%08x [after %d.%03d ms]\n",
+                  p_mb_params->mcp_resp,
+                  p_mb_params->mcp_param,
+                  (cnt * delay) / 1000, (cnt * delay) % 1000);
+
+       /* Clear the sequence number from the MFW response */
+       p_mb_params->mcp_resp &= FW_MSG_CODE_MASK;
+
+       return 0;
+
+err:
+       spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock);
        return rc;
 }
 
@@ -354,9 +519,9 @@ static int qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn,
                                 struct qed_ptt *p_ptt,
                                 struct qed_mcp_mb_params *p_mb_params)
 {
-       u32 union_data_addr;
-
-       int rc;
+       size_t union_data_size = sizeof(union drv_union_data);
+       u32 max_retries = QED_DRV_MB_MAX_RETRIES;
+       u32 delay = CHIP_MCP_RESP_ITER_US;
 
        /* MCP not initialized */
        if (!qed_mcp_is_init(p_hwfn)) {
@@ -364,33 +529,17 @@ static int qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn,
                return -EBUSY;
        }
 
-       union_data_addr = p_hwfn->mcp_info->drv_mb_addr +
-                         offsetof(struct public_drv_mb, union_data);
-
-       /* Ensure that only a single thread is accessing the mailbox at a
-        * certain time.
-        */
-       rc = qed_mcp_mb_lock(p_hwfn, p_mb_params->cmd);
-       if (rc)
-               return rc;
-
-       if (p_mb_params->p_data_src != NULL)
-               qed_memcpy_to(p_hwfn, p_ptt, union_data_addr,
-                             p_mb_params->p_data_src,
-                             sizeof(*p_mb_params->p_data_src));
-
-       rc = qed_do_mcp_cmd(p_hwfn, p_ptt, p_mb_params->cmd,
-                           p_mb_params->param, &p_mb_params->mcp_resp,
-                           &p_mb_params->mcp_param);
-
-       if (p_mb_params->p_data_dst != NULL)
-               qed_memcpy_from(p_hwfn, p_ptt, p_mb_params->p_data_dst,
-                               union_data_addr,
-                               sizeof(*p_mb_params->p_data_dst));
-
-       qed_mcp_mb_unlock(p_hwfn, p_mb_params->cmd);
+       if (p_mb_params->data_src_size > union_data_size ||
+           p_mb_params->data_dst_size > union_data_size) {
+               DP_ERR(p_hwfn,
+                      "The provided size is larger than the union data size [src_size %u, dst_size %u, union_data_size %zu]\n",
+                      p_mb_params->data_src_size,
+                      p_mb_params->data_dst_size, union_data_size);
+               return -EINVAL;
+       }
 
-       return rc;
+       return _qed_mcp_cmd_and_union(p_hwfn, p_ptt, p_mb_params, max_retries,
+                                     delay);
 }
 
 int qed_mcp_cmd(struct qed_hwfn *p_hwfn,
@@ -401,32 +550,12 @@ int qed_mcp_cmd(struct qed_hwfn *p_hwfn,
                u32 *o_mcp_param)
 {
        struct qed_mcp_mb_params mb_params;
-       union drv_union_data data_src;
        int rc;
 
        memset(&mb_params, 0, sizeof(mb_params));
-       memset(&data_src, 0, sizeof(data_src));
        mb_params.cmd = cmd;
        mb_params.param = param;
 
-       /* In case of UNLOAD_DONE, set the primary MAC */
-       if ((cmd == DRV_MSG_CODE_UNLOAD_DONE) &&
-           (p_hwfn->cdev->wol_config == QED_OV_WOL_ENABLED)) {
-               u8 *p_mac = p_hwfn->cdev->wol_mac;
-
-               data_src.wol_mac.mac_upper = p_mac[0] << 8 | p_mac[1];
-               data_src.wol_mac.mac_lower = p_mac[2] << 24 | p_mac[3] << 16 |
-                                            p_mac[4] << 8 | p_mac[5];
-
-               DP_VERBOSE(p_hwfn,
-                          (QED_MSG_SP | NETIF_MSG_IFDOWN),
-                          "Setting WoL MAC: %pM --> [%08x,%08x]\n",
-                          p_mac, data_src.wol_mac.mac_upper,
-                          data_src.wol_mac.mac_lower);
-
-               mb_params.p_data_src = &data_src;
-       }
-
        rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
        if (rc)
                return rc;
@@ -445,13 +574,17 @@ int qed_mcp_nvm_rd_cmd(struct qed_hwfn *p_hwfn,
                       u32 *o_mcp_param, u32 *o_txn_size, u32 *o_buf)
 {
        struct qed_mcp_mb_params mb_params;
-       union drv_union_data union_data;
+       u8 raw_data[MCP_DRV_NVM_BUF_LEN];
        int rc;
 
        memset(&mb_params, 0, sizeof(mb_params));
        mb_params.cmd = cmd;
        mb_params.param = param;
-       mb_params.p_data_dst = &union_data;
+       mb_params.p_data_dst = raw_data;
+
+       /* Use the maximal value since the actual one is part of the response */
+       mb_params.data_dst_size = MCP_DRV_NVM_BUF_LEN;
+
        rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
        if (rc)
                return rc;
@@ -460,55 +593,413 @@ int qed_mcp_nvm_rd_cmd(struct qed_hwfn *p_hwfn,
        *o_mcp_param = mb_params.mcp_param;
 
        *o_txn_size = *o_mcp_param;
-       memcpy(o_buf, &union_data.raw_data, *o_txn_size);
+       memcpy(o_buf, raw_data, *o_txn_size);
 
        return 0;
 }
 
-int qed_mcp_load_req(struct qed_hwfn *p_hwfn,
-                    struct qed_ptt *p_ptt, u32 *p_load_code)
+static bool
+qed_mcp_can_force_load(u8 drv_role,
+                      u8 exist_drv_role,
+                      enum qed_override_force_load override_force_load)
+{
+       bool can_force_load = false;
+
+       switch (override_force_load) {
+       case QED_OVERRIDE_FORCE_LOAD_ALWAYS:
+               can_force_load = true;
+               break;
+       case QED_OVERRIDE_FORCE_LOAD_NEVER:
+               can_force_load = false;
+               break;
+       default:
+               can_force_load = (drv_role == DRV_ROLE_OS &&
+                                 exist_drv_role == DRV_ROLE_PREBOOT) ||
+                                (drv_role == DRV_ROLE_KDUMP &&
+                                 exist_drv_role == DRV_ROLE_OS);
+               break;
+       }
+
+       return can_force_load;
+}
+
+static int qed_mcp_cancel_load_req(struct qed_hwfn *p_hwfn,
+                                  struct qed_ptt *p_ptt)
+{
+       u32 resp = 0, param = 0;
+       int rc;
+
+       rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_CANCEL_LOAD_REQ, 0,
+                        &resp, &param);
+       if (rc)
+               DP_NOTICE(p_hwfn,
+                         "Failed to send cancel load request, rc = %d\n", rc);
+
+       return rc;
+}
+
+#define CONFIG_QEDE_BITMAP_IDX         BIT(0)
+#define CONFIG_QED_SRIOV_BITMAP_IDX    BIT(1)
+#define CONFIG_QEDR_BITMAP_IDX         BIT(2)
+#define CONFIG_QEDF_BITMAP_IDX         BIT(4)
+#define CONFIG_QEDI_BITMAP_IDX         BIT(5)
+#define CONFIG_QED_LL2_BITMAP_IDX      BIT(6)
+
+static u32 qed_get_config_bitmap(void)
+{
+       u32 config_bitmap = 0x0;
+
+       if (IS_ENABLED(CONFIG_QEDE))
+               config_bitmap |= CONFIG_QEDE_BITMAP_IDX;
+
+       if (IS_ENABLED(CONFIG_QED_SRIOV))
+               config_bitmap |= CONFIG_QED_SRIOV_BITMAP_IDX;
+
+       if (IS_ENABLED(CONFIG_QED_RDMA))
+               config_bitmap |= CONFIG_QEDR_BITMAP_IDX;
+
+       if (IS_ENABLED(CONFIG_QED_FCOE))
+               config_bitmap |= CONFIG_QEDF_BITMAP_IDX;
+
+       if (IS_ENABLED(CONFIG_QED_ISCSI))
+               config_bitmap |= CONFIG_QEDI_BITMAP_IDX;
+
+       if (IS_ENABLED(CONFIG_QED_LL2))
+               config_bitmap |= CONFIG_QED_LL2_BITMAP_IDX;
+
+       return config_bitmap;
+}
+
+struct qed_load_req_in_params {
+       u8 hsi_ver;
+#define QED_LOAD_REQ_HSI_VER_DEFAULT   0
+#define QED_LOAD_REQ_HSI_VER_1         1
+       u32 drv_ver_0;
+       u32 drv_ver_1;
+       u32 fw_ver;
+       u8 drv_role;
+       u8 timeout_val;
+       u8 force_cmd;
+       bool avoid_eng_reset;
+};
+
+struct qed_load_req_out_params {
+       u32 load_code;
+       u32 exist_drv_ver_0;
+       u32 exist_drv_ver_1;
+       u32 exist_fw_ver;
+       u8 exist_drv_role;
+       u8 mfw_hsi_ver;
+       bool drv_exists;
+};
+
+static int
+__qed_mcp_load_req(struct qed_hwfn *p_hwfn,
+                  struct qed_ptt *p_ptt,
+                  struct qed_load_req_in_params *p_in_params,
+                  struct qed_load_req_out_params *p_out_params)
 {
-       struct qed_dev *cdev = p_hwfn->cdev;
        struct qed_mcp_mb_params mb_params;
-       union drv_union_data union_data;
+       struct load_req_stc load_req;
+       struct load_rsp_stc load_rsp;
+       u32 hsi_ver;
        int rc;
 
+       memset(&load_req, 0, sizeof(load_req));
+       load_req.drv_ver_0 = p_in_params->drv_ver_0;
+       load_req.drv_ver_1 = p_in_params->drv_ver_1;
+       load_req.fw_ver = p_in_params->fw_ver;
+       QED_MFW_SET_FIELD(load_req.misc0, LOAD_REQ_ROLE, p_in_params->drv_role);
+       QED_MFW_SET_FIELD(load_req.misc0, LOAD_REQ_LOCK_TO,
+                         p_in_params->timeout_val);
+       QED_MFW_SET_FIELD(load_req.misc0, LOAD_REQ_FORCE,
+                         p_in_params->force_cmd);
+       QED_MFW_SET_FIELD(load_req.misc0, LOAD_REQ_FLAGS0,
+                         p_in_params->avoid_eng_reset);
+
+       hsi_ver = (p_in_params->hsi_ver == QED_LOAD_REQ_HSI_VER_DEFAULT) ?
+                 DRV_ID_MCP_HSI_VER_CURRENT :
+                 (p_in_params->hsi_ver << DRV_ID_MCP_HSI_VER_SHIFT);
+
        memset(&mb_params, 0, sizeof(mb_params));
-       /* Load Request */
        mb_params.cmd = DRV_MSG_CODE_LOAD_REQ;
-       mb_params.param = PDA_COMP | DRV_ID_MCP_HSI_VER_CURRENT |
-                         cdev->drv_type;
-       memcpy(&union_data.ver_str, cdev->ver_str, MCP_DRV_VER_STR_SIZE);
-       mb_params.p_data_src = &union_data;
-       rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
+       mb_params.param = PDA_COMP | hsi_ver | p_hwfn->cdev->drv_type;
+       mb_params.p_data_src = &load_req;
+       mb_params.data_src_size = sizeof(load_req);
+       mb_params.p_data_dst = &load_rsp;
+       mb_params.data_dst_size = sizeof(load_rsp);
 
-       /* if mcp fails to respond we must abort */
+       DP_VERBOSE(p_hwfn, QED_MSG_SP,
+                  "Load Request: param 0x%08x [init_hw %d, drv_type %d, hsi_ver %d, pda 0x%04x]\n",
+                  mb_params.param,
+                  QED_MFW_GET_FIELD(mb_params.param, DRV_ID_DRV_INIT_HW),
+                  QED_MFW_GET_FIELD(mb_params.param, DRV_ID_DRV_TYPE),
+                  QED_MFW_GET_FIELD(mb_params.param, DRV_ID_MCP_HSI_VER),
+                  QED_MFW_GET_FIELD(mb_params.param, DRV_ID_PDA_COMP_VER));
+
+       if (p_in_params->hsi_ver != QED_LOAD_REQ_HSI_VER_1) {
+               DP_VERBOSE(p_hwfn, QED_MSG_SP,
+                          "Load Request: drv_ver 0x%08x_0x%08x, fw_ver 0x%08x, misc0 0x%08x [role %d, timeout %d, force %d, flags0 0x%x]\n",
+                          load_req.drv_ver_0,
+                          load_req.drv_ver_1,
+                          load_req.fw_ver,
+                          load_req.misc0,
+                          QED_MFW_GET_FIELD(load_req.misc0, LOAD_REQ_ROLE),
+                          QED_MFW_GET_FIELD(load_req.misc0,
+                                            LOAD_REQ_LOCK_TO),
+                          QED_MFW_GET_FIELD(load_req.misc0, LOAD_REQ_FORCE),
+                          QED_MFW_GET_FIELD(load_req.misc0, LOAD_REQ_FLAGS0));
+       }
+
+       rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
        if (rc) {
-               DP_ERR(p_hwfn, "MCP response failure, aborting\n");
+               DP_NOTICE(p_hwfn, "Failed to send load request, rc = %d\n", rc);
                return rc;
        }
 
-       *p_load_code = mb_params.mcp_resp;
+       DP_VERBOSE(p_hwfn, QED_MSG_SP,
+                  "Load Response: resp 0x%08x\n", mb_params.mcp_resp);
+       p_out_params->load_code = mb_params.mcp_resp;
+
+       if (p_in_params->hsi_ver != QED_LOAD_REQ_HSI_VER_1 &&
+           p_out_params->load_code != FW_MSG_CODE_DRV_LOAD_REFUSED_HSI_1) {
+               DP_VERBOSE(p_hwfn,
+                          QED_MSG_SP,
+                          "Load Response: exist_drv_ver 0x%08x_0x%08x, exist_fw_ver 0x%08x, misc0 0x%08x [exist_role %d, mfw_hsi %d, flags0 0x%x]\n",
+                          load_rsp.drv_ver_0,
+                          load_rsp.drv_ver_1,
+                          load_rsp.fw_ver,
+                          load_rsp.misc0,
+                          QED_MFW_GET_FIELD(load_rsp.misc0, LOAD_RSP_ROLE),
+                          QED_MFW_GET_FIELD(load_rsp.misc0, LOAD_RSP_HSI),
+                          QED_MFW_GET_FIELD(load_rsp.misc0, LOAD_RSP_FLAGS0));
+
+               p_out_params->exist_drv_ver_0 = load_rsp.drv_ver_0;
+               p_out_params->exist_drv_ver_1 = load_rsp.drv_ver_1;
+               p_out_params->exist_fw_ver = load_rsp.fw_ver;
+               p_out_params->exist_drv_role =
+                   QED_MFW_GET_FIELD(load_rsp.misc0, LOAD_RSP_ROLE);
+               p_out_params->mfw_hsi_ver =
+                   QED_MFW_GET_FIELD(load_rsp.misc0, LOAD_RSP_HSI);
+               p_out_params->drv_exists =
+                   QED_MFW_GET_FIELD(load_rsp.misc0, LOAD_RSP_FLAGS0) &
+                   LOAD_RSP_FLAGS0_DRV_EXISTS;
+       }
+
+       return 0;
+}
+
+static int eocre_get_mfw_drv_role(struct qed_hwfn *p_hwfn,
+                                 enum qed_drv_role drv_role,
+                                 u8 *p_mfw_drv_role)
+{
+       switch (drv_role) {
+       case QED_DRV_ROLE_OS:
+               *p_mfw_drv_role = DRV_ROLE_OS;
+               break;
+       case QED_DRV_ROLE_KDUMP:
+               *p_mfw_drv_role = DRV_ROLE_KDUMP;
+               break;
+       default:
+               DP_ERR(p_hwfn, "Unexpected driver role %d\n", drv_role);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+enum qed_load_req_force {
+       QED_LOAD_REQ_FORCE_NONE,
+       QED_LOAD_REQ_FORCE_PF,
+       QED_LOAD_REQ_FORCE_ALL,
+};
+
+static void qed_get_mfw_force_cmd(struct qed_hwfn *p_hwfn,
+
+                                 enum qed_load_req_force force_cmd,
+                                 u8 *p_mfw_force_cmd)
+{
+       switch (force_cmd) {
+       case QED_LOAD_REQ_FORCE_NONE:
+               *p_mfw_force_cmd = LOAD_REQ_FORCE_NONE;
+               break;
+       case QED_LOAD_REQ_FORCE_PF:
+               *p_mfw_force_cmd = LOAD_REQ_FORCE_PF;
+               break;
+       case QED_LOAD_REQ_FORCE_ALL:
+               *p_mfw_force_cmd = LOAD_REQ_FORCE_ALL;
+               break;
+       }
+}
+
+int qed_mcp_load_req(struct qed_hwfn *p_hwfn,
+                    struct qed_ptt *p_ptt,
+                    struct qed_load_req_params *p_params)
+{
+       struct qed_load_req_out_params out_params;
+       struct qed_load_req_in_params in_params;
+       u8 mfw_drv_role, mfw_force_cmd;
+       int rc;
+
+       memset(&in_params, 0, sizeof(in_params));
+       in_params.hsi_ver = QED_LOAD_REQ_HSI_VER_DEFAULT;
+       in_params.drv_ver_0 = QED_VERSION;
+       in_params.drv_ver_1 = qed_get_config_bitmap();
+       in_params.fw_ver = STORM_FW_VERSION;
+       rc = eocre_get_mfw_drv_role(p_hwfn, p_params->drv_role, &mfw_drv_role);
+       if (rc)
+               return rc;
+
+       in_params.drv_role = mfw_drv_role;
+       in_params.timeout_val = p_params->timeout_val;
+       qed_get_mfw_force_cmd(p_hwfn,
+                             QED_LOAD_REQ_FORCE_NONE, &mfw_force_cmd);
+
+       in_params.force_cmd = mfw_force_cmd;
+       in_params.avoid_eng_reset = p_params->avoid_eng_reset;
+
+       memset(&out_params, 0, sizeof(out_params));
+       rc = __qed_mcp_load_req(p_hwfn, p_ptt, &in_params, &out_params);
+       if (rc)
+               return rc;
+
+       /* First handle cases where another load request should/might be sent:
+        * - MFW expects the old interface [HSI version = 1]
+        * - MFW responds that a force load request is required
+        */
+       if (out_params.load_code == FW_MSG_CODE_DRV_LOAD_REFUSED_HSI_1) {
+               DP_INFO(p_hwfn,
+                       "MFW refused a load request due to HSI > 1. Resending with HSI = 1\n");
+
+               in_params.hsi_ver = QED_LOAD_REQ_HSI_VER_1;
+               memset(&out_params, 0, sizeof(out_params));
+               rc = __qed_mcp_load_req(p_hwfn, p_ptt, &in_params, &out_params);
+               if (rc)
+                       return rc;
+       } else if (out_params.load_code ==
+                  FW_MSG_CODE_DRV_LOAD_REFUSED_REQUIRES_FORCE) {
+               if (qed_mcp_can_force_load(in_params.drv_role,
+                                          out_params.exist_drv_role,
+                                          p_params->override_force_load)) {
+                       DP_INFO(p_hwfn,
+                               "A force load is required [{role, fw_ver, drv_ver}: loading={%d, 0x%08x, x%08x_0x%08x}, existing={%d, 0x%08x, 0x%08x_0x%08x}]\n",
+                               in_params.drv_role, in_params.fw_ver,
+                               in_params.drv_ver_0, in_params.drv_ver_1,
+                               out_params.exist_drv_role,
+                               out_params.exist_fw_ver,
+                               out_params.exist_drv_ver_0,
+                               out_params.exist_drv_ver_1);
+
+                       qed_get_mfw_force_cmd(p_hwfn,
+                                             QED_LOAD_REQ_FORCE_ALL,
+                                             &mfw_force_cmd);
+
+                       in_params.force_cmd = mfw_force_cmd;
+                       memset(&out_params, 0, sizeof(out_params));
+                       rc = __qed_mcp_load_req(p_hwfn, p_ptt, &in_params,
+                                               &out_params);
+                       if (rc)
+                               return rc;
+               } else {
+                       DP_NOTICE(p_hwfn,
+                                 "A force load is required [{role, fw_ver, drv_ver}: loading={%d, 0x%08x, x%08x_0x%08x}, existing={%d, 0x%08x, 0x%08x_0x%08x}] - Avoid\n",
+                                 in_params.drv_role, in_params.fw_ver,
+                                 in_params.drv_ver_0, in_params.drv_ver_1,
+                                 out_params.exist_drv_role,
+                                 out_params.exist_fw_ver,
+                                 out_params.exist_drv_ver_0,
+                                 out_params.exist_drv_ver_1);
+                       DP_NOTICE(p_hwfn,
+                                 "Avoid sending a force load request to prevent disruption of active PFs\n");
+
+                       qed_mcp_cancel_load_req(p_hwfn, p_ptt);
+                       return -EBUSY;
+               }
+       }
 
-       /* If MFW refused (e.g. other port is in diagnostic mode) we
-        * must abort. This can happen in the following cases:
-        * - Other port is in diagnostic mode
-        * - Previously loaded function on the engine is not compliant with
-        *   the requester.
-        * - MFW cannot cope with the requester's DRV_MFW_HSI_VERSION.
-        *      -
+       /* Now handle the other types of responses.
+        * The "REFUSED_HSI_1" and "REFUSED_REQUIRES_FORCE" responses are not
+        * expected here after the additional revised load requests were sent.
         */
-       if (!(*p_load_code) ||
-           ((*p_load_code) == FW_MSG_CODE_DRV_LOAD_REFUSED_HSI) ||
-           ((*p_load_code) == FW_MSG_CODE_DRV_LOAD_REFUSED_PDA) ||
-           ((*p_load_code) == FW_MSG_CODE_DRV_LOAD_REFUSED_DIAG)) {
-               DP_ERR(p_hwfn, "MCP refused load request, aborting\n");
+       switch (out_params.load_code) {
+       case FW_MSG_CODE_DRV_LOAD_ENGINE:
+       case FW_MSG_CODE_DRV_LOAD_PORT:
+       case FW_MSG_CODE_DRV_LOAD_FUNCTION:
+               if (out_params.mfw_hsi_ver != QED_LOAD_REQ_HSI_VER_1 &&
+                   out_params.drv_exists) {
+                       /* The role and fw/driver version match, but the PF is
+                        * already loaded and has not been unloaded gracefully.
+                        */
+                       DP_NOTICE(p_hwfn,
+                                 "PF is already loaded\n");
+                       return -EINVAL;
+               }
+               break;
+       default:
+               DP_NOTICE(p_hwfn,
+                         "Unexpected refusal to load request [resp 0x%08x]. Aborting.\n",
+                         out_params.load_code);
                return -EBUSY;
        }
 
+       p_params->load_code = out_params.load_code;
+
        return 0;
 }
 
+int qed_mcp_unload_req(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+       u32 wol_param, mcp_resp, mcp_param;
+
+       switch (p_hwfn->cdev->wol_config) {
+       case QED_OV_WOL_DISABLED:
+               wol_param = DRV_MB_PARAM_UNLOAD_WOL_DISABLED;
+               break;
+       case QED_OV_WOL_ENABLED:
+               wol_param = DRV_MB_PARAM_UNLOAD_WOL_ENABLED;
+               break;
+       default:
+               DP_NOTICE(p_hwfn,
+                         "Unknown WoL configuration %02x\n",
+                         p_hwfn->cdev->wol_config);
+               /* Fallthrough */
+       case QED_OV_WOL_DEFAULT:
+               wol_param = DRV_MB_PARAM_UNLOAD_WOL_MCP;
+       }
+
+       return qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_UNLOAD_REQ, wol_param,
+                          &mcp_resp, &mcp_param);
+}
+
+int qed_mcp_unload_done(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+       struct qed_mcp_mb_params mb_params;
+       struct mcp_mac wol_mac;
+
+       memset(&mb_params, 0, sizeof(mb_params));
+       mb_params.cmd = DRV_MSG_CODE_UNLOAD_DONE;
+
+       /* Set the primary MAC if WoL is enabled */
+       if (p_hwfn->cdev->wol_config == QED_OV_WOL_ENABLED) {
+               u8 *p_mac = p_hwfn->cdev->wol_mac;
+
+               memset(&wol_mac, 0, sizeof(wol_mac));
+               wol_mac.mac_upper = p_mac[0] << 8 | p_mac[1];
+               wol_mac.mac_lower = p_mac[2] << 24 | p_mac[3] << 16 |
+                                   p_mac[4] << 8 | p_mac[5];
+
+               DP_VERBOSE(p_hwfn,
+                          (QED_MSG_SP | NETIF_MSG_IFDOWN),
+                          "Setting WoL MAC: %pM --> [%08x,%08x]\n",
+                          p_mac, wol_mac.mac_upper, wol_mac.mac_lower);
+
+               mb_params.p_data_src = &wol_mac;
+               mb_params.data_src_size = sizeof(wol_mac);
+       }
+
+       return qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
+}
+
 static void qed_mcp_handle_vf_flr(struct qed_hwfn *p_hwfn,
                                  struct qed_ptt *p_ptt)
 {
@@ -549,7 +1040,6 @@ int qed_mcp_ack_vf_flr(struct qed_hwfn *p_hwfn,
        u32 func_addr = SECTION_ADDR(mfw_func_offsize,
                                     MCP_PF_ID(p_hwfn));
        struct qed_mcp_mb_params mb_params;
-       union drv_union_data union_data;
        int rc;
        int i;
 
@@ -560,8 +1050,8 @@ int qed_mcp_ack_vf_flr(struct qed_hwfn *p_hwfn,
 
        memset(&mb_params, 0, sizeof(mb_params));
        mb_params.cmd = DRV_MSG_CODE_VF_DISABLED_DONE;
-       memcpy(&union_data.ack_vf_disabled, vfs_to_ack, VF_MAX_STATIC / 8);
-       mb_params.p_data_src = &union_data;
+       mb_params.p_data_src = vfs_to_ack;
+       mb_params.data_src_size = VF_MAX_STATIC / 8;
        rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
        if (rc) {
                DP_NOTICE(p_hwfn, "Failed to pass ACK for VF flr to MFW\n");
@@ -744,33 +1234,31 @@ int qed_mcp_set_link(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, bool b_up)
 {
        struct qed_mcp_link_params *params = &p_hwfn->mcp_info->link_input;
        struct qed_mcp_mb_params mb_params;
-       union drv_union_data union_data;
-       struct eth_phy_cfg *phy_cfg;
+       struct eth_phy_cfg phy_cfg;
        int rc = 0;
        u32 cmd;
 
        /* Set the shmem configuration according to params */
-       phy_cfg = &union_data.drv_phy_cfg;
-       memset(phy_cfg, 0, sizeof(*phy_cfg));
+       memset(&phy_cfg, 0, sizeof(phy_cfg));
        cmd = b_up ? DRV_MSG_CODE_INIT_PHY : DRV_MSG_CODE_LINK_RESET;
        if (!params->speed.autoneg)
-               phy_cfg->speed = params->speed.forced_speed;
-       phy_cfg->pause |= (params->pause.autoneg) ? ETH_PAUSE_AUTONEG : 0;
-       phy_cfg->pause |= (params->pause.forced_rx) ? ETH_PAUSE_RX : 0;
-       phy_cfg->pause |= (params->pause.forced_tx) ? ETH_PAUSE_TX : 0;
-       phy_cfg->adv_speed = params->speed.advertised_speeds;
-       phy_cfg->loopback_mode = params->loopback_mode;
+               phy_cfg.speed = params->speed.forced_speed;
+       phy_cfg.pause |= (params->pause.autoneg) ? ETH_PAUSE_AUTONEG : 0;
+       phy_cfg.pause |= (params->pause.forced_rx) ? ETH_PAUSE_RX : 0;
+       phy_cfg.pause |= (params->pause.forced_tx) ? ETH_PAUSE_TX : 0;
+       phy_cfg.adv_speed = params->speed.advertised_speeds;
+       phy_cfg.loopback_mode = params->loopback_mode;
 
        p_hwfn->b_drv_link_init = b_up;
 
        if (b_up) {
                DP_VERBOSE(p_hwfn, NETIF_MSG_LINK,
                           "Configuring Link: Speed 0x%08x, Pause 0x%08x, adv_speed 0x%08x, loopback 0x%08x, features 0x%08x\n",
-                          phy_cfg->speed,
-                          phy_cfg->pause,
-                          phy_cfg->adv_speed,
-                          phy_cfg->loopback_mode,
-                          phy_cfg->feature_config_flags);
+                          phy_cfg.speed,
+                          phy_cfg.pause,
+                          phy_cfg.adv_speed,
+                          phy_cfg.loopback_mode,
+                          phy_cfg.feature_config_flags);
        } else {
                DP_VERBOSE(p_hwfn, NETIF_MSG_LINK,
                           "Resetting link\n");
@@ -778,7 +1266,8 @@ int qed_mcp_set_link(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, bool b_up)
 
        memset(&mb_params, 0, sizeof(mb_params));
        mb_params.cmd = cmd;
-       mb_params.p_data_src = &union_data;
+       mb_params.p_data_src = &phy_cfg;
+       mb_params.data_src_size = sizeof(phy_cfg);
        rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
 
        /* if mcp fails to respond we must abort */
@@ -805,7 +1294,6 @@ static void qed_mcp_send_protocol_stats(struct qed_hwfn *p_hwfn,
        enum qed_mcp_protocol_type stats_type;
        union qed_mcp_protocol_stats stats;
        struct qed_mcp_mb_params mb_params;
-       union drv_union_data union_data;
        u32 hsi_param;
 
        switch (type) {
@@ -835,8 +1323,8 @@ static void qed_mcp_send_protocol_stats(struct qed_hwfn *p_hwfn,
        memset(&mb_params, 0, sizeof(mb_params));
        mb_params.cmd = DRV_MSG_CODE_GET_STATS;
        mb_params.param = hsi_param;
-       memcpy(&union_data, &stats, sizeof(stats));
-       mb_params.p_data_src = &union_data;
+       mb_params.p_data_src = &stats;
+       mb_params.data_src_size = sizeof(stats);
        qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
 }
 
@@ -963,7 +1451,7 @@ int qed_mcp_handle_events(struct qed_hwfn *p_hwfn,
                        qed_mcp_update_bw(p_hwfn, p_ptt);
                        break;
                default:
-                       DP_NOTICE(p_hwfn, "Unimplemented MFW message %d\n", i);
+                       DP_INFO(p_hwfn, "Unimplemented MFW message %d\n", i);
                        rc = -EINVAL;
                }
        }
@@ -1316,24 +1804,23 @@ qed_mcp_send_drv_version(struct qed_hwfn *p_hwfn,
                         struct qed_ptt *p_ptt,
                         struct qed_mcp_drv_version *p_ver)
 {
-       struct drv_version_stc *p_drv_version;
        struct qed_mcp_mb_params mb_params;
-       union drv_union_data union_data;
+       struct drv_version_stc drv_version;
        __be32 val;
        u32 i;
        int rc;
 
-       p_drv_version = &union_data.drv_version;
-       p_drv_version->version = p_ver->version;
-
+       memset(&drv_version, 0, sizeof(drv_version));
+       drv_version.version = p_ver->version;
        for (i = 0; i < (MCP_DRV_VER_STR_SIZE - 4) / sizeof(u32); i++) {
                val = cpu_to_be32(*((u32 *)&p_ver->name[i * sizeof(u32)]));
-               *(__be32 *)&p_drv_version->name[i * sizeof(u32)] = val;
+               *(__be32 *)&drv_version.name[i * sizeof(u32)] = val;
        }
 
        memset(&mb_params, 0, sizeof(mb_params));
        mb_params.cmd = DRV_MSG_CODE_SET_VERSION;
-       mb_params.p_data_src = &union_data;
+       mb_params.p_data_src = &drv_version;
+       mb_params.data_src_size = sizeof(drv_version);
        rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
        if (rc)
                DP_ERR(p_hwfn, "MCP response failure, aborting\n");
@@ -1450,7 +1937,7 @@ int qed_mcp_ov_update_mac(struct qed_hwfn *p_hwfn,
                          struct qed_ptt *p_ptt, u8 *mac)
 {
        struct qed_mcp_mb_params mb_params;
-       union drv_union_data union_data;
+       u32 mfw_mac[2];
        int rc;
 
        memset(&mb_params, 0, sizeof(mb_params));
@@ -1458,8 +1945,17 @@ int qed_mcp_ov_update_mac(struct qed_hwfn *p_hwfn,
        mb_params.param = DRV_MSG_CODE_VMAC_TYPE_MAC <<
                          DRV_MSG_CODE_VMAC_TYPE_SHIFT;
        mb_params.param |= MCP_PF_ID(p_hwfn);
-       ether_addr_copy(&union_data.raw_data[0], mac);
-       mb_params.p_data_src = &union_data;
+
+       /* MCP is BE, and on LE platforms PCI would swap access to SHMEM
+        * in 32-bit granularity.
+        * So the MAC has to be set in native order [and not byte order],
+        * otherwise it would be read incorrectly by MFW after swap.
+        */
+       mfw_mac[0] = mac[0] << 24 | mac[1] << 16 | mac[2] << 8 | mac[3];
+       mfw_mac[1] = mac[4] << 24 | mac[5] << 16;
+
+       mb_params.p_data_src = (u8 *)mfw_mac;
+       mb_params.data_src_size = 8;
        rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
        if (rc)
                DP_ERR(p_hwfn, "Failed to send mac address, rc = %d\n", rc);
@@ -1724,52 +2220,396 @@ int qed_mcp_bist_nvm_test_get_image_att(struct qed_hwfn *p_hwfn,
        return rc;
 }
 
-#define QED_RESC_ALLOC_VERSION_MAJOR    1
+static enum resource_id_enum qed_mcp_get_mfw_res_id(enum qed_resources res_id)
+{
+       enum resource_id_enum mfw_res_id = RESOURCE_NUM_INVALID;
+
+       switch (res_id) {
+       case QED_SB:
+               mfw_res_id = RESOURCE_NUM_SB_E;
+               break;
+       case QED_L2_QUEUE:
+               mfw_res_id = RESOURCE_NUM_L2_QUEUE_E;
+               break;
+       case QED_VPORT:
+               mfw_res_id = RESOURCE_NUM_VPORT_E;
+               break;
+       case QED_RSS_ENG:
+               mfw_res_id = RESOURCE_NUM_RSS_ENGINES_E;
+               break;
+       case QED_PQ:
+               mfw_res_id = RESOURCE_NUM_PQ_E;
+               break;
+       case QED_RL:
+               mfw_res_id = RESOURCE_NUM_RL_E;
+               break;
+       case QED_MAC:
+       case QED_VLAN:
+               /* Each VFC resource can accommodate both a MAC and a VLAN */
+               mfw_res_id = RESOURCE_VFC_FILTER_E;
+               break;
+       case QED_ILT:
+               mfw_res_id = RESOURCE_ILT_E;
+               break;
+       case QED_LL2_QUEUE:
+               mfw_res_id = RESOURCE_LL2_QUEUE_E;
+               break;
+       case QED_RDMA_CNQ_RAM:
+       case QED_CMDQS_CQS:
+               /* CNQ/CMDQS are the same resource */
+               mfw_res_id = RESOURCE_CQS_E;
+               break;
+       case QED_RDMA_STATS_QUEUE:
+               mfw_res_id = RESOURCE_RDMA_STATS_QUEUE_E;
+               break;
+       case QED_BDQ:
+               mfw_res_id = RESOURCE_BDQ_E;
+               break;
+       default:
+               break;
+       }
+
+       return mfw_res_id;
+}
+
+#define QED_RESC_ALLOC_VERSION_MAJOR    2
 #define QED_RESC_ALLOC_VERSION_MINOR    0
 #define QED_RESC_ALLOC_VERSION                              \
        ((QED_RESC_ALLOC_VERSION_MAJOR <<                    \
          DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR_SHIFT) | \
         (QED_RESC_ALLOC_VERSION_MINOR <<                    \
          DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MINOR_SHIFT))
-int qed_mcp_get_resc_info(struct qed_hwfn *p_hwfn,
-                         struct qed_ptt *p_ptt,
-                         struct resource_info *p_resc_info,
-                         u32 *p_mcp_resp, u32 *p_mcp_param)
+
+struct qed_resc_alloc_in_params {
+       u32 cmd;
+       enum qed_resources res_id;
+       u32 resc_max_val;
+};
+
+struct qed_resc_alloc_out_params {
+       u32 mcp_resp;
+       u32 mcp_param;
+       u32 resc_num;
+       u32 resc_start;
+       u32 vf_resc_num;
+       u32 vf_resc_start;
+       u32 flags;
+};
+
+static int
+qed_mcp_resc_allocation_msg(struct qed_hwfn *p_hwfn,
+                           struct qed_ptt *p_ptt,
+                           struct qed_resc_alloc_in_params *p_in_params,
+                           struct qed_resc_alloc_out_params *p_out_params)
 {
        struct qed_mcp_mb_params mb_params;
-       union drv_union_data union_data;
+       struct resource_info mfw_resc_info;
        int rc;
 
+       memset(&mfw_resc_info, 0, sizeof(mfw_resc_info));
+
+       mfw_resc_info.res_id = qed_mcp_get_mfw_res_id(p_in_params->res_id);
+       if (mfw_resc_info.res_id == RESOURCE_NUM_INVALID) {
+               DP_ERR(p_hwfn,
+                      "Failed to match resource %d [%s] with the MFW resources\n",
+                      p_in_params->res_id,
+                      qed_hw_get_resc_name(p_in_params->res_id));
+               return -EINVAL;
+       }
+
+       switch (p_in_params->cmd) {
+       case DRV_MSG_SET_RESOURCE_VALUE_MSG:
+               mfw_resc_info.size = p_in_params->resc_max_val;
+               /* Fallthrough */
+       case DRV_MSG_GET_RESOURCE_ALLOC_MSG:
+               break;
+       default:
+               DP_ERR(p_hwfn, "Unexpected resource alloc command [0x%08x]\n",
+                      p_in_params->cmd);
+               return -EINVAL;
+       }
+
        memset(&mb_params, 0, sizeof(mb_params));
-       memset(&union_data, 0, sizeof(union_data));
-       mb_params.cmd = DRV_MSG_GET_RESOURCE_ALLOC_MSG;
+       mb_params.cmd = p_in_params->cmd;
        mb_params.param = QED_RESC_ALLOC_VERSION;
+       mb_params.p_data_src = &mfw_resc_info;
+       mb_params.data_src_size = sizeof(mfw_resc_info);
+       mb_params.p_data_dst = mb_params.p_data_src;
+       mb_params.data_dst_size = mb_params.data_src_size;
 
-       /* Need to have a sufficient large struct, as the cmd_and_union
-        * is going to do memcpy from and to it.
-        */
-       memcpy(&union_data.resource, p_resc_info, sizeof(*p_resc_info));
+       DP_VERBOSE(p_hwfn,
+                  QED_MSG_SP,
+                  "Resource message request: cmd 0x%08x, res_id %d [%s], hsi_version %d.%d, val 0x%x\n",
+                  p_in_params->cmd,
+                  p_in_params->res_id,
+                  qed_hw_get_resc_name(p_in_params->res_id),
+                  QED_MFW_GET_FIELD(mb_params.param,
+                                    DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR),
+                  QED_MFW_GET_FIELD(mb_params.param,
+                                    DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MINOR),
+                  p_in_params->resc_max_val);
 
-       mb_params.p_data_src = &union_data;
-       mb_params.p_data_dst = &union_data;
        rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
        if (rc)
                return rc;
 
-       /* Copy the data back */
-       memcpy(p_resc_info, &union_data.resource, sizeof(*p_resc_info));
-       *p_mcp_resp = mb_params.mcp_resp;
-       *p_mcp_param = mb_params.mcp_param;
+       p_out_params->mcp_resp = mb_params.mcp_resp;
+       p_out_params->mcp_param = mb_params.mcp_param;
+       p_out_params->resc_num = mfw_resc_info.size;
+       p_out_params->resc_start = mfw_resc_info.offset;
+       p_out_params->vf_resc_num = mfw_resc_info.vf_size;
+       p_out_params->vf_resc_start = mfw_resc_info.vf_offset;
+       p_out_params->flags = mfw_resc_info.flags;
+
+       DP_VERBOSE(p_hwfn,
+                  QED_MSG_SP,
+                  "Resource message response: mfw_hsi_version %d.%d, num 0x%x, start 0x%x, vf_num 0x%x, vf_start 0x%x, flags 0x%08x\n",
+                  QED_MFW_GET_FIELD(p_out_params->mcp_param,
+                                    FW_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR),
+                  QED_MFW_GET_FIELD(p_out_params->mcp_param,
+                                    FW_MB_PARAM_RESOURCE_ALLOC_VERSION_MINOR),
+                  p_out_params->resc_num,
+                  p_out_params->resc_start,
+                  p_out_params->vf_resc_num,
+                  p_out_params->vf_resc_start, p_out_params->flags);
+
+       return 0;
+}
+
+int
+qed_mcp_set_resc_max_val(struct qed_hwfn *p_hwfn,
+                        struct qed_ptt *p_ptt,
+                        enum qed_resources res_id,
+                        u32 resc_max_val, u32 *p_mcp_resp)
+{
+       struct qed_resc_alloc_out_params out_params;
+       struct qed_resc_alloc_in_params in_params;
+       int rc;
+
+       memset(&in_params, 0, sizeof(in_params));
+       in_params.cmd = DRV_MSG_SET_RESOURCE_VALUE_MSG;
+       in_params.res_id = res_id;
+       in_params.resc_max_val = resc_max_val;
+       memset(&out_params, 0, sizeof(out_params));
+       rc = qed_mcp_resc_allocation_msg(p_hwfn, p_ptt, &in_params,
+                                        &out_params);
+       if (rc)
+               return rc;
+
+       *p_mcp_resp = out_params.mcp_resp;
+
+       return 0;
+}
+
+int
+qed_mcp_get_resc_info(struct qed_hwfn *p_hwfn,
+                     struct qed_ptt *p_ptt,
+                     enum qed_resources res_id,
+                     u32 *p_mcp_resp, u32 *p_resc_num, u32 *p_resc_start)
+{
+       struct qed_resc_alloc_out_params out_params;
+       struct qed_resc_alloc_in_params in_params;
+       int rc;
+
+       memset(&in_params, 0, sizeof(in_params));
+       in_params.cmd = DRV_MSG_GET_RESOURCE_ALLOC_MSG;
+       in_params.res_id = res_id;
+       memset(&out_params, 0, sizeof(out_params));
+       rc = qed_mcp_resc_allocation_msg(p_hwfn, p_ptt, &in_params,
+                                        &out_params);
+       if (rc)
+               return rc;
+
+       *p_mcp_resp = out_params.mcp_resp;
+
+       if (*p_mcp_resp == FW_MSG_CODE_RESOURCE_ALLOC_OK) {
+               *p_resc_num = out_params.resc_num;
+               *p_resc_start = out_params.resc_start;
+       }
+
+       return 0;
+}
+
+int qed_mcp_initiate_pf_flr(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+       u32 mcp_resp, mcp_param;
+
+       return qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_INITIATE_PF_FLR, 0,
+                          &mcp_resp, &mcp_param);
+}
+
+static int qed_mcp_resource_cmd(struct qed_hwfn *p_hwfn,
+                               struct qed_ptt *p_ptt,
+                               u32 param, u32 *p_mcp_resp, u32 *p_mcp_param)
+{
+       int rc;
+
+       rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_RESOURCE_CMD, param,
+                        p_mcp_resp, p_mcp_param);
+       if (rc)
+               return rc;
+
+       if (*p_mcp_resp == FW_MSG_CODE_UNSUPPORTED) {
+               DP_INFO(p_hwfn,
+                       "The resource command is unsupported by the MFW\n");
+               return -EINVAL;
+       }
+
+       if (*p_mcp_param == RESOURCE_OPCODE_UNKNOWN_CMD) {
+               u8 opcode = QED_MFW_GET_FIELD(param, RESOURCE_CMD_REQ_OPCODE);
+
+               DP_NOTICE(p_hwfn,
+                         "The resource command is unknown to the MFW [param 0x%08x, opcode %d]\n",
+                         param, opcode);
+               return -EINVAL;
+       }
+
+       return rc;
+}
+
+int
+__qed_mcp_resc_lock(struct qed_hwfn *p_hwfn,
+                   struct qed_ptt *p_ptt,
+                   struct qed_resc_lock_params *p_params)
+{
+       u32 param = 0, mcp_resp, mcp_param;
+       u8 opcode;
+       int rc;
+
+       switch (p_params->timeout) {
+       case QED_MCP_RESC_LOCK_TO_DEFAULT:
+               opcode = RESOURCE_OPCODE_REQ;
+               p_params->timeout = 0;
+               break;
+       case QED_MCP_RESC_LOCK_TO_NONE:
+               opcode = RESOURCE_OPCODE_REQ_WO_AGING;
+               p_params->timeout = 0;
+               break;
+       default:
+               opcode = RESOURCE_OPCODE_REQ_W_AGING;
+               break;
+       }
+
+       QED_MFW_SET_FIELD(param, RESOURCE_CMD_REQ_RESC, p_params->resource);
+       QED_MFW_SET_FIELD(param, RESOURCE_CMD_REQ_OPCODE, opcode);
+       QED_MFW_SET_FIELD(param, RESOURCE_CMD_REQ_AGE, p_params->timeout);
+
+       DP_VERBOSE(p_hwfn,
+                  QED_MSG_SP,
+                  "Resource lock request: param 0x%08x [age %d, opcode %d, resource %d]\n",
+                  param, p_params->timeout, opcode, p_params->resource);
+
+       /* Attempt to acquire the resource */
+       rc = qed_mcp_resource_cmd(p_hwfn, p_ptt, param, &mcp_resp, &mcp_param);
+       if (rc)
+               return rc;
+
+       /* Analyze the response */
+       p_params->owner = QED_MFW_GET_FIELD(mcp_param, RESOURCE_CMD_RSP_OWNER);
+       opcode = QED_MFW_GET_FIELD(mcp_param, RESOURCE_CMD_RSP_OPCODE);
 
        DP_VERBOSE(p_hwfn,
                   QED_MSG_SP,
-                  "MFW resource_info: version 0x%x, res_id 0x%x, size 0x%x, offset 0x%x, vf_size 0x%x, vf_offset 0x%x, flags 0x%x\n",
-                  *p_mcp_param,
-                  p_resc_info->res_id,
-                  p_resc_info->size,
-                  p_resc_info->offset,
-                  p_resc_info->vf_size,
-                  p_resc_info->vf_offset, p_resc_info->flags);
+                  "Resource lock response: mcp_param 0x%08x [opcode %d, owner %d]\n",
+                  mcp_param, opcode, p_params->owner);
+
+       switch (opcode) {
+       case RESOURCE_OPCODE_GNT:
+               p_params->b_granted = true;
+               break;
+       case RESOURCE_OPCODE_BUSY:
+               p_params->b_granted = false;
+               break;
+       default:
+               DP_NOTICE(p_hwfn,
+                         "Unexpected opcode in resource lock response [mcp_param 0x%08x, opcode %d]\n",
+                         mcp_param, opcode);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+int
+qed_mcp_resc_lock(struct qed_hwfn *p_hwfn,
+                 struct qed_ptt *p_ptt, struct qed_resc_lock_params *p_params)
+{
+       u32 retry_cnt = 0;
+       int rc;
+
+       do {
+               /* No need for an interval before the first iteration */
+               if (retry_cnt) {
+                       if (p_params->sleep_b4_retry) {
+                               u16 retry_interval_in_ms =
+                                   DIV_ROUND_UP(p_params->retry_interval,
+                                                1000);
+
+                               msleep(retry_interval_in_ms);
+                       } else {
+                               udelay(p_params->retry_interval);
+                       }
+               }
+
+               rc = __qed_mcp_resc_lock(p_hwfn, p_ptt, p_params);
+               if (rc)
+                       return rc;
+
+               if (p_params->b_granted)
+                       break;
+       } while (retry_cnt++ < p_params->retry_num);
+
+       return 0;
+}
+
+int
+qed_mcp_resc_unlock(struct qed_hwfn *p_hwfn,
+                   struct qed_ptt *p_ptt,
+                   struct qed_resc_unlock_params *p_params)
+{
+       u32 param = 0, mcp_resp, mcp_param;
+       u8 opcode;
+       int rc;
+
+       opcode = p_params->b_force ? RESOURCE_OPCODE_FORCE_RELEASE
+                                  : RESOURCE_OPCODE_RELEASE;
+       QED_MFW_SET_FIELD(param, RESOURCE_CMD_REQ_RESC, p_params->resource);
+       QED_MFW_SET_FIELD(param, RESOURCE_CMD_REQ_OPCODE, opcode);
+
+       DP_VERBOSE(p_hwfn, QED_MSG_SP,
+                  "Resource unlock request: param 0x%08x [opcode %d, resource %d]\n",
+                  param, opcode, p_params->resource);
+
+       /* Attempt to release the resource */
+       rc = qed_mcp_resource_cmd(p_hwfn, p_ptt, param, &mcp_resp, &mcp_param);
+       if (rc)
+               return rc;
+
+       /* Analyze the response */
+       opcode = QED_MFW_GET_FIELD(mcp_param, RESOURCE_CMD_RSP_OPCODE);
+
+       DP_VERBOSE(p_hwfn, QED_MSG_SP,
+                  "Resource unlock response: mcp_param 0x%08x [opcode %d]\n",
+                  mcp_param, opcode);
+
+       switch (opcode) {
+       case RESOURCE_OPCODE_RELEASED_PREVIOUS:
+               DP_INFO(p_hwfn,
+                       "Resource unlock request for an already released resource [%d]\n",
+                       p_params->resource);
+               /* Fallthrough */
+       case RESOURCE_OPCODE_RELEASED:
+               p_params->b_released = true;
+               break;
+       case RESOURCE_OPCODE_WRONG_OWNER:
+               p_params->b_released = false;
+               break;
+       default:
+               DP_NOTICE(p_hwfn,
+                         "Unexpected opcode in resource unlock response [mcp_param 0x%08x, opcode %d]\n",
+                         mcp_param, opcode);
+               return -EINVAL;
+       }
 
        return 0;
 }
index 368e88de146cdbc7a7a66210c6ff9b1302867a8f..ac7d406be1edeba4c2206f654739d25e62dec18f 100644 (file)
@@ -39,6 +39,7 @@
 #include <linux/spinlock.h>
 #include <linux/qed/qed_fcoe_if.h>
 #include "qed_hsi.h"
+#include "qed_dev_api.h"
 
 struct qed_mcp_link_speed_params {
        bool    autoneg;
@@ -479,14 +480,18 @@ int qed_mcp_bist_nvm_test_get_image_att(struct qed_hwfn *p_hwfn,
                                            rel_pfid)
 #define MCP_PF_ID(p_hwfn) MCP_PF_ID_BY_REL(p_hwfn, (p_hwfn)->rel_pf_id)
 
-/* TODO - this is only correct as long as only BB is supported, and
- * no port-swapping is implemented; Afterwards we'll need to fix it.
- */
-#define MFW_PORT(_p_hwfn)       ((_p_hwfn)->abs_pf_id %        \
-                                ((_p_hwfn)->cdev->num_ports_in_engines * 2))
+#define MFW_PORT(_p_hwfn)       ((_p_hwfn)->abs_pf_id %                          \
+                                ((_p_hwfn)->cdev->num_ports_in_engines * \
+                                 qed_device_num_engines((_p_hwfn)->cdev)))
+
 struct qed_mcp_info {
-       /* Spinlock used for protecting the access to the MFW mailbox */
-       spinlock_t                              lock;
+       /* List for mailbox commands which were sent and wait for a response */
+       struct list_head                        cmd_list;
+
+       /* Spinlock used for protecting the access to the mailbox commands list
+        * and the sending of the commands.
+        */
+       spinlock_t                              cmd_lock;
 
        /* Spinlock used for syncing SW link-changes and link-changes
         * originating from attention context.
@@ -506,14 +511,16 @@ struct qed_mcp_info {
        u8                                      *mfw_mb_cur;
        u8                                      *mfw_mb_shadow;
        u16                                     mfw_mb_length;
-       u16                                     mcp_hist;
+       u32                                     mcp_hist;
 };
 
 struct qed_mcp_mb_params {
        u32                     cmd;
        u32                     param;
-       union drv_union_data    *p_data_src;
-       union drv_union_data    *p_data_dst;
+       void                    *p_data_src;
+       u8                      data_src_size;
+       void                    *p_data_dst;
+       u8                      data_dst_size;
        u32                     mcp_resp;
        u32                     mcp_param;
 };
@@ -564,27 +571,55 @@ int qed_mcp_free(struct qed_hwfn *p_hwfn);
 int qed_mcp_handle_events(struct qed_hwfn *p_hwfn,
                          struct qed_ptt *p_ptt);
 
+enum qed_drv_role {
+       QED_DRV_ROLE_OS,
+       QED_DRV_ROLE_KDUMP,
+};
+
+struct qed_load_req_params {
+       /* Input params */
+       enum qed_drv_role drv_role;
+       u8 timeout_val;
+       bool avoid_eng_reset;
+       enum qed_override_force_load override_force_load;
+
+       /* Output params */
+       u32 load_code;
+};
+
 /**
- * @brief Sends a LOAD_REQ to the MFW, and in case operation
- *        succeed, returns whether this PF is the first on the
- *        chip/engine/port or function. This function should be
- *        called when driver is ready to accept MFW events after
- *        Storms initializations are done.
+ * @brief Sends a LOAD_REQ to the MFW, and in case the operation succeeds,
+ *        returns whether this PF is the first on the engine/port or function.
  *
- * @param p_hwfn       - hw function
- * @param p_ptt        - PTT required for register access
- * @param p_load_code  - The MCP response param containing one
- *      of the following:
- *      FW_MSG_CODE_DRV_LOAD_ENGINE
- *      FW_MSG_CODE_DRV_LOAD_PORT
- *      FW_MSG_CODE_DRV_LOAD_FUNCTION
- * @return int -
- *      0 - Operation was successul.
- *      -EBUSY - Operation failed
+ * @param p_hwfn
+ * @param p_ptt
+ * @param p_params
+ *
+ * @return int - 0 - Operation was successful.
  */
 int qed_mcp_load_req(struct qed_hwfn *p_hwfn,
                     struct qed_ptt *p_ptt,
-                    u32 *p_load_code);
+                    struct qed_load_req_params *p_params);
+
+/**
+ * @brief Sends a UNLOAD_REQ message to the MFW
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ *
+ * @return int - 0 - Operation was successful.
+ */
+int qed_mcp_unload_req(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+
+/**
+ * @brief Sends a UNLOAD_DONE message to the MFW
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ *
+ * @return int - 0 - Operation was successful.
+ */
+int qed_mcp_unload_done(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
 
 /**
  * @brief Read the MFW mailbox into Current buffer.
@@ -707,6 +742,41 @@ int __qed_configure_pf_min_bandwidth(struct qed_hwfn *p_hwfn,
 int qed_mcp_mask_parities(struct qed_hwfn *p_hwfn,
                          struct qed_ptt *p_ptt, u32 mask_parities);
 
+/**
+ * @brief - Sets the MFW's max value for the given resource
+ *
+ *  @param p_hwfn
+ *  @param p_ptt
+ *  @param res_id
+ *  @param resc_max_val
+ *  @param p_mcp_resp
+ *
+ * @return int - 0 - operation was successful.
+ */
+int
+qed_mcp_set_resc_max_val(struct qed_hwfn *p_hwfn,
+                        struct qed_ptt *p_ptt,
+                        enum qed_resources res_id,
+                        u32 resc_max_val, u32 *p_mcp_resp);
+
+/**
+ * @brief - Gets the MFW allocation info for the given resource
+ *
+ *  @param p_hwfn
+ *  @param p_ptt
+ *  @param res_id
+ *  @param p_mcp_resp
+ *  @param p_resc_num
+ *  @param p_resc_start
+ *
+ * @return int - 0 - operation was successful.
+ */
+int
+qed_mcp_get_resc_info(struct qed_hwfn *p_hwfn,
+                     struct qed_ptt *p_ptt,
+                     enum qed_resources res_id,
+                     u32 *p_mcp_resp, u32 *p_resc_num, u32 *p_resc_start);
+
 /**
  * @brief Send eswitch mode to MFW
  *
@@ -720,19 +790,86 @@ int qed_mcp_ov_update_eswitch(struct qed_hwfn *p_hwfn,
                              struct qed_ptt *p_ptt,
                              enum qed_ov_eswitch eswitch);
 
+#define QED_MCP_RESC_LOCK_MIN_VAL       RESOURCE_DUMP
+#define QED_MCP_RESC_LOCK_MAX_VAL       31
+
+enum qed_resc_lock {
+       QED_RESC_LOCK_DBG_DUMP = QED_MCP_RESC_LOCK_MIN_VAL,
+       QED_RESC_LOCK_RESC_ALLOC = QED_MCP_RESC_LOCK_MAX_VAL
+};
+
 /**
- * @brief - Gets the MFW allocation info for the given resource
+ * @brief - Initiates PF FLR
  *
  *  @param p_hwfn
  *  @param p_ptt
- *  @param p_resc_info - descriptor of requested resource
- *  @param p_mcp_resp
- *  @param p_mcp_param
  *
  * @return int - 0 - operation was successful.
  */
-int qed_mcp_get_resc_info(struct qed_hwfn *p_hwfn,
-                         struct qed_ptt *p_ptt,
-                         struct resource_info *p_resc_info,
-                         u32 *p_mcp_resp, u32 *p_mcp_param);
+int qed_mcp_initiate_pf_flr(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+struct qed_resc_lock_params {
+       /* Resource number [valid values are 0..31] */
+       u8 resource;
+
+       /* Lock timeout value in seconds [default, none or 1..254] */
+       u8 timeout;
+#define QED_MCP_RESC_LOCK_TO_DEFAULT    0
+#define QED_MCP_RESC_LOCK_TO_NONE       255
+
+       /* Number of times to retry locking */
+       u8 retry_num;
+
+       /* The interval in usec between retries */
+       u16 retry_interval;
+
+       /* Use sleep or delay between retries */
+       bool sleep_b4_retry;
+
+       /* Will be set as true if the resource is free and granted */
+       bool b_granted;
+
+       /* Will be filled with the resource owner.
+        * [0..15 = PF0-15, 16 = MFW]
+        */
+       u8 owner;
+};
+
+/**
+ * @brief Acquires MFW generic resource lock
+ *
+ *  @param p_hwfn
+ *  @param p_ptt
+ *  @param p_params
+ *
+ * @return int - 0 - operation was successful.
+ */
+int
+qed_mcp_resc_lock(struct qed_hwfn *p_hwfn,
+                 struct qed_ptt *p_ptt, struct qed_resc_lock_params *p_params);
+
+struct qed_resc_unlock_params {
+       /* Resource number [valid values are 0..31] */
+       u8 resource;
+
+       /* Allow to release a resource even if belongs to another PF */
+       bool b_force;
+
+       /* Will be set as true if the resource is released */
+       bool b_released;
+};
+
+/**
+ * @brief Releases MFW generic resource lock
+ *
+ *  @param p_hwfn
+ *  @param p_ptt
+ *  @param p_params
+ *
+ * @return int - 0 - operation was successful.
+ */
+int
+qed_mcp_resc_unlock(struct qed_hwfn *p_hwfn,
+                   struct qed_ptt *p_ptt,
+                   struct qed_resc_unlock_params *p_params);
+
 #endif
index d27aa85da23cb526ca9319a152107a64c6df0e4a..80c9c0b172dd0d7b7f59a58349699178ec36d438 100644 (file)
@@ -262,12 +262,20 @@ static int qed_ptp_hw_enable(struct qed_dev *cdev)
        qed_wr(p_hwfn, p_ptt, NIG_REG_TS_OUTPUT_ENABLE_PDA, 0x1);
 
        /* Pause free running counter */
-       qed_wr(p_hwfn, p_ptt, NIG_REG_TIMESYNC_GEN_REG_BB, 2);
+       if (QED_IS_BB_B0(p_hwfn->cdev))
+               qed_wr(p_hwfn, p_ptt, NIG_REG_TIMESYNC_GEN_REG_BB, 2);
+       if (QED_IS_AH(p_hwfn->cdev))
+               qed_wr(p_hwfn, p_ptt, NIG_REG_TSGEN_FREECNT_UPDATE_K2, 2);
 
        qed_wr(p_hwfn, p_ptt, NIG_REG_TSGEN_FREE_CNT_VALUE_LSB, 0);
        qed_wr(p_hwfn, p_ptt, NIG_REG_TSGEN_FREE_CNT_VALUE_MSB, 0);
        /* Resume free running counter */
-       qed_wr(p_hwfn, p_ptt, NIG_REG_TIMESYNC_GEN_REG_BB, 4);
+       if (QED_IS_BB_B0(p_hwfn->cdev))
+               qed_wr(p_hwfn, p_ptt, NIG_REG_TIMESYNC_GEN_REG_BB, 4);
+       if (QED_IS_AH(p_hwfn->cdev)) {
+               qed_wr(p_hwfn, p_ptt, NIG_REG_TSGEN_FREECNT_UPDATE_K2, 4);
+               qed_wr(p_hwfn, p_ptt, NIG_REG_PTP_LATCH_OSTS_PKT_TIME, 1);
+       }
 
        /* Disable drift register */
        qed_wr(p_hwfn, p_ptt, NIG_REG_TSGEN_DRIFT_CNTR_CONF, 0x0);
index d59d9df60cd24c20f031e66e7f034975d5244cba..6d4ac7e2ee83eb50ffb2ec8517a2f2f0992dc2f5 100644 (file)
        0x2e0704UL
 #define  CCFC_REG_STRONG_ENABLE_PF \
        0x2e0708UL
-#define  PGLUE_B_REG_PGL_ADDR_88_F0 \
+#define  PGLUE_B_REG_PGL_ADDR_88_F0_BB \
        0x2aa404UL
-#define  PGLUE_B_REG_PGL_ADDR_8C_F0 \
+#define  PGLUE_B_REG_PGL_ADDR_8C_F0_BB \
        0x2aa408UL
-#define  PGLUE_B_REG_PGL_ADDR_90_F0 \
+#define  PGLUE_B_REG_PGL_ADDR_90_F0_BB \
        0x2aa40cUL
-#define  PGLUE_B_REG_PGL_ADDR_94_F0 \
+#define  PGLUE_B_REG_PGL_ADDR_94_F0_BB \
        0x2aa410UL
 #define  PGLUE_B_REG_WAS_ERROR_PF_31_0_CLR \
        0x2aa138UL
        0x238804UL
 #define  RDIF_REG_STOP_ON_ERROR \
        0x300040UL
+#define RDIF_REG_DEBUG_ERROR_INFO \
+       0x300400UL
+#define RDIF_REG_DEBUG_ERROR_INFO_SIZE \
+       64
 #define  SRC_REG_SOFT_RST \
        0x23874cUL
 #define  TCFC_REG_ACTIVITY_COUNTER \
        0x1700004UL
 #define  TDIF_REG_STOP_ON_ERROR \
        0x310040UL
+#define TDIF_REG_DEBUG_ERROR_INFO \
+       0x310400UL
+#define TDIF_REG_DEBUG_ERROR_INFO_SIZE \
+       64
 #define  UCM_REG_INIT \
        0x1280000UL
 #define  UMAC_REG_IPG_HD_BKP_CNTL_BB_B0 \
        0x1901534UL
 #define USEM_REG_DBG_FORCE_FRAME \
        0x1901538UL
+#define NWS_REG_DBG_SELECT \
+       0x700128UL
+#define NWS_REG_DBG_DWORD_ENABLE \
+       0x70012cUL
+#define NWS_REG_DBG_SHIFT \
+       0x700130UL
+#define NWS_REG_DBG_FORCE_VALID        \
+       0x700134UL
+#define NWS_REG_DBG_FORCE_FRAME        \
+       0x700138UL
+#define MS_REG_DBG_SELECT \
+       0x6a0228UL
+#define MS_REG_DBG_DWORD_ENABLE \
+       0x6a022cUL
+#define MS_REG_DBG_SHIFT \
+       0x6a0230UL
+#define MS_REG_DBG_FORCE_VALID \
+       0x6a0234UL
+#define MS_REG_DBG_FORCE_FRAME \
+       0x6a0238UL
 #define PCIE_REG_DBG_COMMON_SELECT \
        0x054398UL
 #define PCIE_REG_DBG_COMMON_DWORD_ENABLE \
        0x000b48UL
 #define RSS_REG_RSS_RAM_DATA \
        0x238c20UL
+#define RSS_REG_RSS_RAM_DATA_SIZE \
+       4
 #define MISC_REG_BLOCK_256B_EN \
        0x008c14UL
 #define NWS_REG_NWS_CMU        \
 #define NIG_REG_TIMESYNC_GEN_REG_BB 0x500d00UL
 #define NIG_REG_TSGEN_FREE_CNT_VALUE_LSB 0x5088a8UL
 #define NIG_REG_TSGEN_FREE_CNT_VALUE_MSB 0x5088acUL
+#define NIG_REG_PTP_LATCH_OSTS_PKT_TIME 0x509040UL
+
+#define PGLUE_B_REG_PGL_ADDR_E8_F0_K2 0x2aaf98UL
+#define PGLUE_B_REG_PGL_ADDR_EC_F0_K2 0x2aaf9cUL
+#define PGLUE_B_REG_PGL_ADDR_F0_F0_K2 0x2aafa0UL
+#define PGLUE_B_REG_PGL_ADDR_F4_F0_K2 0x2aafa4UL
+#define NIG_REG_TSGEN_FREECNT_UPDATE_K2 0x509008UL
+#define CNIG_REG_NIG_PORT0_CONF_K2 0x218200UL
+
 #endif
index d9ff6b28591c19faf288a130e28c19445fd3ba9b..b8c811f9520541318e75dd2627a2afbd68ccf7ae 100644 (file)
 #include "qed_roce.h"
 #include "qed_ll2.h"
 
-void qed_async_roce_event(struct qed_hwfn *p_hwfn,
-                         struct event_ring_entry *p_eqe)
+static void qed_roce_free_real_icid(struct qed_hwfn *p_hwfn, u16 icid);
+
+void qed_roce_async_event(struct qed_hwfn *p_hwfn,
+                         u8 fw_event_code, union rdma_eqe_data *rdma_data)
 {
-       struct qed_rdma_info *p_rdma_info = p_hwfn->p_rdma_info;
+       if (fw_event_code == ROCE_ASYNC_EVENT_DESTROY_QP_DONE) {
+               u16 icid =
+                   (u16)le32_to_cpu(rdma_data->rdma_destroy_qp_data.cid);
+
+               /* icid release in this async event can occur only if the icid
+                * was offloaded to the FW. In case it wasn't offloaded this is
+                * handled in qed_roce_sp_destroy_qp.
+                */
+               qed_roce_free_real_icid(p_hwfn, icid);
+       } else {
+               struct qed_rdma_events *events = &p_hwfn->p_rdma_info->events;
 
-       p_rdma_info->events.affiliated_event(p_rdma_info->events.context,
-                                            p_eqe->opcode, &p_eqe->data);
+               events->affiliated_event(p_hwfn->p_rdma_info->events.context,
+                                        fw_event_code,
+                                        &rdma_data->async_handle);
+       }
 }
 
 static int qed_rdma_bmap_alloc(struct qed_hwfn *p_hwfn,
@@ -113,6 +127,15 @@ static int qed_rdma_bmap_alloc_id(struct qed_hwfn *p_hwfn,
        return 0;
 }
 
+static void qed_bmap_set_id(struct qed_hwfn *p_hwfn,
+                           struct qed_bmap *bmap, u32 id_num)
+{
+       if (id_num >= bmap->max_count)
+               return;
+
+       __set_bit(id_num, bmap->bitmap);
+}
+
 static void qed_bmap_release_id(struct qed_hwfn *p_hwfn,
                                struct qed_bmap *bmap, u32 id_num)
 {
@@ -129,6 +152,15 @@ static void qed_bmap_release_id(struct qed_hwfn *p_hwfn,
        }
 }
 
+static int qed_bmap_test_id(struct qed_hwfn *p_hwfn,
+                           struct qed_bmap *bmap, u32 id_num)
+{
+       if (id_num >= bmap->max_count)
+               return -1;
+
+       return test_bit(id_num, bmap->bitmap);
+}
+
 static u32 qed_rdma_get_sb_id(void *p_hwfn, u32 rel_sb_id)
 {
        /* First sb id for RoCE is after all the l2 sb */
@@ -170,7 +202,8 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn,
        /* Queue zone lines are shared between RoCE and L2 in such a way that
         * they can be used by each without obstructing the other.
         */
-       p_rdma_info->queue_zone_base = (u16)FEAT_NUM(p_hwfn, QED_L2_QUEUE);
+       p_rdma_info->queue_zone_base = (u16)RESC_START(p_hwfn, QED_L2_QUEUE);
+       p_rdma_info->max_queue_zones = (u16)RESC_NUM(p_hwfn, QED_L2_QUEUE);
 
        /* Allocate a struct with device params and fill it */
        p_rdma_info->dev = kzalloc(sizeof(*p_rdma_info->dev), GFP_KERNEL);
@@ -248,9 +281,18 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn,
                goto free_tid_map;
        }
 
+       /* Allocate bitmap for cids used for responders/requesters. */
+       rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->real_cid_map, num_cons);
+       if (rc) {
+               DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+                          "Failed to allocate real cid bitmap, rc = %d\n", rc);
+               goto free_cid_map;
+       }
        DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Allocation successful\n");
        return 0;
 
+free_cid_map:
+       kfree(p_rdma_info->cid_map.bitmap);
 free_tid_map:
        kfree(p_rdma_info->tid_map.bitmap);
 free_toggle_map:
@@ -273,7 +315,22 @@ free_rdma_info:
 
 static void qed_rdma_resc_free(struct qed_hwfn *p_hwfn)
 {
+       struct qed_bmap *rcid_map = &p_hwfn->p_rdma_info->real_cid_map;
        struct qed_rdma_info *p_rdma_info = p_hwfn->p_rdma_info;
+       int wait_count = 0;
+
+       /* when destroying a_RoCE QP the control is returned to the user after
+        * the synchronous part. The asynchronous part may take a little longer.
+        * We delay for a short while if an async destroy QP is still expected.
+        * Beyond the added delay we clear the bitmap anyway.
+        */
+       while (bitmap_weight(rcid_map->bitmap, rcid_map->max_count)) {
+               msleep(100);
+               if (wait_count++ > 20) {
+                       DP_NOTICE(p_hwfn, "cid bitmap wait timed out\n");
+                       break;
+               }
+       }
 
        kfree(p_rdma_info->cid_map.bitmap);
        kfree(p_rdma_info->tid_map.bitmap);
@@ -724,6 +781,14 @@ static void qed_rdma_cnq_prod_update(void *rdma_cxt, u8 qz_offset, u16 prod)
        u32 addr;
 
        p_hwfn = (struct qed_hwfn *)rdma_cxt;
+
+       if (qz_offset > p_hwfn->p_rdma_info->max_queue_zones) {
+               DP_NOTICE(p_hwfn,
+                         "queue zone offset %d is too large (max is %d)\n",
+                         qz_offset, p_hwfn->p_rdma_info->max_queue_zones);
+               return;
+       }
+
        qz_num = p_hwfn->p_rdma_info->queue_zone_base + qz_offset;
        addr = GTT_BAR0_MAP_REG_USDM_RAM +
               USTORM_COMMON_QUEUE_CONS_OFFSET(qz_num);
@@ -1080,6 +1145,14 @@ static enum roce_flavor qed_roce_mode_to_flavor(enum roce_mode roce_mode)
        return flavor;
 }
 
+void qed_roce_free_cid_pair(struct qed_hwfn *p_hwfn, u16 cid)
+{
+       spin_lock_bh(&p_hwfn->p_rdma_info->lock);
+       qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->cid_map, cid);
+       qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->cid_map, cid + 1);
+       spin_unlock_bh(&p_hwfn->p_rdma_info->lock);
+}
+
 static int qed_roce_alloc_cid(struct qed_hwfn *p_hwfn, u16 *cid)
 {
        struct qed_rdma_info *p_rdma_info = p_hwfn->p_rdma_info;
@@ -1139,15 +1212,22 @@ err:
        return rc;
 }
 
+static void qed_roce_set_real_cid(struct qed_hwfn *p_hwfn, u32 cid)
+{
+       spin_lock_bh(&p_hwfn->p_rdma_info->lock);
+       qed_bmap_set_id(p_hwfn, &p_hwfn->p_rdma_info->real_cid_map, cid);
+       spin_unlock_bh(&p_hwfn->p_rdma_info->lock);
+}
+
 static int qed_roce_sp_create_responder(struct qed_hwfn *p_hwfn,
                                        struct qed_rdma_qp *qp)
 {
        struct roce_create_qp_resp_ramrod_data *p_ramrod;
        struct qed_sp_init_data init_data;
-       union qed_qm_pq_params qm_params;
        enum roce_flavor roce_flavor;
        struct qed_spq_entry *p_ent;
-       u16 physical_queue0 = 0;
+       u16 regular_latency_queue;
+       enum protocol_type proto;
        int rc;
 
        DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid);
@@ -1229,15 +1309,16 @@ static int qed_roce_sp_create_responder(struct qed_hwfn *p_hwfn,
        p_ramrod->qp_handle_for_async.lo = cpu_to_le32(qp->qp_handle_async.lo);
        p_ramrod->qp_handle_for_cqe.hi = cpu_to_le32(qp->qp_handle.hi);
        p_ramrod->qp_handle_for_cqe.lo = cpu_to_le32(qp->qp_handle.lo);
-       p_ramrod->stats_counter_id = p_hwfn->rel_pf_id;
        p_ramrod->cq_cid = cpu_to_le32((p_hwfn->hw_info.opaque_fid << 16) |
                                       qp->rq_cq_id);
 
-       memset(&qm_params, 0, sizeof(qm_params));
-       qm_params.roce.qpid = qp->icid >> 1;
-       physical_queue0 = qed_get_qm_pq(p_hwfn, PROTOCOLID_ROCE, &qm_params);
+       regular_latency_queue = qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_OFLD);
+
+       p_ramrod->regular_latency_phy_queue =
+           cpu_to_le16(regular_latency_queue);
+       p_ramrod->low_latency_phy_queue =
+           cpu_to_le16(regular_latency_queue);
 
-       p_ramrod->physical_queue0 = cpu_to_le16(physical_queue0);
        p_ramrod->dpi = cpu_to_le16(qp->dpi);
 
        qed_rdma_set_fw_mac(p_ramrod->remote_mac_addr, qp->remote_mac_addr);
@@ -1253,13 +1334,19 @@ static int qed_roce_sp_create_responder(struct qed_hwfn *p_hwfn,
 
        rc = qed_spq_post(p_hwfn, p_ent, NULL);
 
-       DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "rc = %d physical_queue0 = 0x%x\n",
-                  rc, physical_queue0);
+       DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+                  "rc = %d regular physical queue = 0x%x\n", rc,
+                  regular_latency_queue);
 
        if (rc)
                goto err;
 
        qp->resp_offloaded = true;
+       qp->cq_prod = 0;
+
+       proto = p_hwfn->p_rdma_info->proto;
+       qed_roce_set_real_cid(p_hwfn, qp->icid -
+                             qed_cxt_get_proto_cid_start(p_hwfn, proto));
 
        return rc;
 
@@ -1277,10 +1364,10 @@ static int qed_roce_sp_create_requester(struct qed_hwfn *p_hwfn,
 {
        struct roce_create_qp_req_ramrod_data *p_ramrod;
        struct qed_sp_init_data init_data;
-       union qed_qm_pq_params qm_params;
        enum roce_flavor roce_flavor;
        struct qed_spq_entry *p_ent;
-       u16 physical_queue0 = 0;
+       u16 regular_latency_queue;
+       enum protocol_type proto;
        int rc;
 
        DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid);
@@ -1351,15 +1438,16 @@ static int qed_roce_sp_create_requester(struct qed_hwfn *p_hwfn,
        p_ramrod->qp_handle_for_async.lo = cpu_to_le32(qp->qp_handle_async.lo);
        p_ramrod->qp_handle_for_cqe.hi = cpu_to_le32(qp->qp_handle.hi);
        p_ramrod->qp_handle_for_cqe.lo = cpu_to_le32(qp->qp_handle.lo);
-       p_ramrod->stats_counter_id = p_hwfn->rel_pf_id;
-       p_ramrod->cq_cid = cpu_to_le32((p_hwfn->hw_info.opaque_fid << 16) |
-                                      qp->sq_cq_id);
+       p_ramrod->cq_cid =
+           cpu_to_le32((p_hwfn->hw_info.opaque_fid << 16) | qp->sq_cq_id);
+
+       regular_latency_queue = qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_OFLD);
 
-       memset(&qm_params, 0, sizeof(qm_params));
-       qm_params.roce.qpid = qp->icid >> 1;
-       physical_queue0 = qed_get_qm_pq(p_hwfn, PROTOCOLID_ROCE, &qm_params);
+       p_ramrod->regular_latency_phy_queue =
+           cpu_to_le16(regular_latency_queue);
+       p_ramrod->low_latency_phy_queue =
+           cpu_to_le16(regular_latency_queue);
 
-       p_ramrod->physical_queue0 = cpu_to_le16(physical_queue0);
        p_ramrod->dpi = cpu_to_le16(qp->dpi);
 
        qed_rdma_set_fw_mac(p_ramrod->remote_mac_addr, qp->remote_mac_addr);
@@ -1378,6 +1466,10 @@ static int qed_roce_sp_create_requester(struct qed_hwfn *p_hwfn,
                goto err;
 
        qp->req_offloaded = true;
+       proto = p_hwfn->p_rdma_info->proto;
+       qed_roce_set_real_cid(p_hwfn,
+                             qp->icid + 1 -
+                             qed_cxt_get_proto_cid_start(p_hwfn, proto));
 
        return rc;
 
@@ -1577,7 +1669,8 @@ static int qed_roce_sp_modify_requester(struct qed_hwfn *p_hwfn,
 
 static int qed_roce_sp_destroy_qp_responder(struct qed_hwfn *p_hwfn,
                                            struct qed_rdma_qp *qp,
-                                           u32 *num_invalidated_mw)
+                                           u32 *num_invalidated_mw,
+                                           u32 *cq_prod)
 {
        struct roce_destroy_qp_resp_output_params *p_ramrod_res;
        struct roce_destroy_qp_resp_ramrod_data *p_ramrod;
@@ -1588,8 +1681,22 @@ static int qed_roce_sp_destroy_qp_responder(struct qed_hwfn *p_hwfn,
 
        DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid);
 
-       if (!qp->resp_offloaded)
+       *num_invalidated_mw = 0;
+       *cq_prod = qp->cq_prod;
+
+       if (!qp->resp_offloaded) {
+               /* If a responder was never offload, we need to free the cids
+                * allocated in create_qp as a FW async event will never arrive
+                */
+               u32 cid;
+
+               cid = qp->icid -
+                     qed_cxt_get_proto_cid_start(p_hwfn,
+                                                 p_hwfn->p_rdma_info->proto);
+               qed_roce_free_cid_pair(p_hwfn, (u16)cid);
+
                return 0;
+       }
 
        /* Get SPQ entry */
        memset(&init_data, 0, sizeof(init_data));
@@ -1624,6 +1731,8 @@ static int qed_roce_sp_destroy_qp_responder(struct qed_hwfn *p_hwfn,
                goto err;
 
        *num_invalidated_mw = le32_to_cpu(p_ramrod_res->num_invalidated_mw);
+       *cq_prod = le32_to_cpu(p_ramrod_res->cq_prod);
+       qp->cq_prod = *cq_prod;
 
        /* Free IRQ - only if ramrod succeeded, in case FW is still using it */
        dma_free_coherent(&p_hwfn->cdev->pdev->dev,
@@ -1827,10 +1936,8 @@ static int qed_roce_query_qp(struct qed_hwfn *p_hwfn,
 
        out_params->draining = false;
 
-       if (rq_err_state)
+       if (rq_err_state || sq_err_state)
                qp->cur_state = QED_ROCE_QP_STATE_ERR;
-       else if (sq_err_state)
-               qp->cur_state = QED_ROCE_QP_STATE_SQE;
        else if (sq_draining)
                out_params->draining = true;
        out_params->state = qp->cur_state;
@@ -1849,10 +1956,9 @@ err_resp:
 
 static int qed_roce_destroy_qp(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp)
 {
-       struct qed_rdma_info *p_rdma_info = p_hwfn->p_rdma_info;
        u32 num_invalidated_mw = 0;
        u32 num_bound_mw = 0;
-       u32 start_cid;
+       u32 cq_prod;
        int rc;
 
        /* Destroys the specified QP */
@@ -1866,7 +1972,8 @@ static int qed_roce_destroy_qp(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp)
 
        if (qp->cur_state != QED_ROCE_QP_STATE_RESET) {
                rc = qed_roce_sp_destroy_qp_responder(p_hwfn, qp,
-                                                     &num_invalidated_mw);
+                                                     &num_invalidated_mw,
+                                                     &cq_prod);
                if (rc)
                        return rc;
 
@@ -1881,21 +1988,6 @@ static int qed_roce_destroy_qp(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp)
                                  "number of invalidate memory windows is different from bounded ones\n");
                        return -EINVAL;
                }
-
-               spin_lock_bh(&p_rdma_info->lock);
-
-               start_cid = qed_cxt_get_proto_cid_start(p_hwfn,
-                                                       p_rdma_info->proto);
-
-               /* Release responder's icid */
-               qed_bmap_release_id(p_hwfn, &p_rdma_info->cid_map,
-                                   qp->icid - start_cid);
-
-               /* Release requester's icid */
-               qed_bmap_release_id(p_hwfn, &p_rdma_info->cid_map,
-                                   qp->icid + 1 - start_cid);
-
-               spin_unlock_bh(&p_rdma_info->lock);
        }
 
        return 0;
@@ -2110,12 +2202,19 @@ static int qed_roce_modify_qp(struct qed_hwfn *p_hwfn,
                return rc;
        } else if (qp->cur_state == QED_ROCE_QP_STATE_RESET) {
                /* Any state -> RESET */
+               u32 cq_prod;
+
+               /* Send destroy responder ramrod */
+               rc = qed_roce_sp_destroy_qp_responder(p_hwfn,
+                                                     qp,
+                                                     &num_invalidated_mw,
+                                                     &cq_prod);
 
-               rc = qed_roce_sp_destroy_qp_responder(p_hwfn, qp,
-                                                     &num_invalidated_mw);
                if (rc)
                        return rc;
 
+               qp->cq_prod = cq_prod;
+
                rc = qed_roce_sp_destroy_qp_requester(p_hwfn, qp,
                                                      &num_bound_mw);
 
@@ -2454,6 +2553,31 @@ static int qed_rdma_deregister_tid(void *rdma_cxt, u32 itid)
        return rc;
 }
 
+static void qed_roce_free_real_icid(struct qed_hwfn *p_hwfn, u16 icid)
+{
+       struct qed_rdma_info *p_rdma_info = p_hwfn->p_rdma_info;
+       u32 start_cid, cid, xcid;
+
+       /* an even icid belongs to a responder while an odd icid belongs to a
+        * requester. The 'cid' received as an input can be either. We calculate
+        * the "partner" icid and call it xcid. Only if both are free then the
+        * "cid" map can be cleared.
+        */
+       start_cid = qed_cxt_get_proto_cid_start(p_hwfn, p_rdma_info->proto);
+       cid = icid - start_cid;
+       xcid = cid ^ 1;
+
+       spin_lock_bh(&p_rdma_info->lock);
+
+       qed_bmap_release_id(p_hwfn, &p_rdma_info->real_cid_map, cid);
+       if (qed_bmap_test_id(p_hwfn, &p_rdma_info->real_cid_map, xcid) == 0) {
+               qed_bmap_release_id(p_hwfn, &p_rdma_info->cid_map, cid);
+               qed_bmap_release_id(p_hwfn, &p_rdma_info->cid_map, xcid);
+       }
+
+       spin_unlock_bh(&p_hwfn->p_rdma_info->lock);
+}
+
 static void *qed_rdma_get_rdma_ctx(struct qed_dev *cdev)
 {
        return QED_LEADING_HWFN(cdev);
@@ -2773,7 +2897,7 @@ static int qed_roce_ll2_tx(struct qed_dev *cdev,
                                                      : QED_LL2_RROCE;
 
        if (pkt->roce_mode == ROCE_V2_IPV4)
-               flags |= BIT(CORE_TX_BD_FLAGS_IP_CSUM_SHIFT);
+               flags |= BIT(CORE_TX_BD_DATA_IP_CSUM_SHIFT);
 
        /* Tx header */
        rc = qed_ll2_prepare_tx_packet(QED_LEADING_HWFN(cdev), roce_ll2->handle,
index 36cf4b2ab7faf0afcd17ec3347bf0d9d05567901..3ccc08a7c9959108382cbbb9f8fd8c592441bd28 100644 (file)
@@ -82,6 +82,7 @@ struct qed_rdma_info {
        struct qed_bmap qp_map;
        struct qed_bmap srq_map;
        struct qed_bmap cid_map;
+       struct qed_bmap real_cid_map;
        struct qed_bmap dpi_map;
        struct qed_bmap toggle_bits;
        struct qed_rdma_events events;
@@ -92,6 +93,7 @@ struct qed_rdma_info {
        u32 num_qps;
        u32 num_mrs;
        u16 queue_zone_base;
+       u16 max_queue_zones;
        enum protocol_type proto;
 };
 
@@ -153,6 +155,7 @@ struct qed_rdma_qp {
        dma_addr_t irq_phys_addr;
        u8 irq_num_pages;
        bool resp_offloaded;
+       u32 cq_prod;
 
        u8 remote_mac_addr[6];
        u8 local_mac_addr[6];
@@ -163,8 +166,8 @@ struct qed_rdma_qp {
 
 #if IS_ENABLED(CONFIG_QED_RDMA)
 void qed_rdma_dpm_bar(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
-void qed_async_roce_event(struct qed_hwfn *p_hwfn,
-                         struct event_ring_entry *p_eqe);
+void qed_roce_async_event(struct qed_hwfn *p_hwfn,
+                         u8 fw_event_code, union rdma_eqe_data *rdma_data);
 void qed_ll2b_complete_tx_gsi_packet(struct qed_hwfn *p_hwfn,
                                     u8 connection_handle,
                                     void *cookie,
@@ -187,7 +190,9 @@ void qed_ll2b_complete_rx_gsi_packet(struct qed_hwfn *p_hwfn,
                                     u16 src_mac_addr_lo, bool b_last_packet);
 #else
 static inline void qed_rdma_dpm_bar(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) {}
-static inline void qed_async_roce_event(struct qed_hwfn *p_hwfn, struct event_ring_entry *p_eqe) {}
+static inline void qed_roce_async_event(struct qed_hwfn *p_hwfn,
+                                       u8 fw_event_code,
+                                       union rdma_eqe_data *rdma_data) {}
 static inline void qed_ll2b_complete_tx_gsi_packet(struct qed_hwfn *p_hwfn,
                                                   u8 connection_handle,
                                                   void *cookie,
index 645328a9f0cfb6b4040c8d6402ad5684d79adab9..13f715569253e6aa7e1b0a8bbdc6dfc77bd774bf 100644 (file)
@@ -205,11 +205,10 @@ static int qed_spq_fill_entry(struct qed_hwfn *p_hwfn,
 static void qed_spq_hw_initialize(struct qed_hwfn *p_hwfn,
                                  struct qed_spq *p_spq)
 {
-       u16                             pq;
-       struct qed_cxt_info             cxt_info;
-       struct core_conn_context        *p_cxt;
-       union qed_qm_pq_params          pq_params;
-       int                             rc;
+       struct core_conn_context *p_cxt;
+       struct qed_cxt_info cxt_info;
+       u16 physical_q;
+       int rc;
 
        cxt_info.iid = p_spq->cid;
 
@@ -231,10 +230,8 @@ static void qed_spq_hw_initialize(struct qed_hwfn *p_hwfn,
                  XSTORM_CORE_CONN_AG_CTX_CONSOLID_PROD_CF_EN, 1);
 
        /* QM physical queue */
-       memset(&pq_params, 0, sizeof(pq_params));
-       pq_params.core.tc = LB_TC;
-       pq = qed_get_qm_pq(p_hwfn, PROTOCOLID_CORE, &pq_params);
-       p_cxt->xstorm_ag_context.physical_q0 = cpu_to_le16(pq);
+       physical_q = qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_LB);
+       p_cxt->xstorm_ag_context.physical_q0 = cpu_to_le16(physical_q);
 
        p_cxt->xstorm_st_context.spq_base_lo =
                DMA_LO_LE(p_spq->chain.p_phys_addr);
@@ -296,9 +293,12 @@ qed_async_event_completion(struct qed_hwfn *p_hwfn,
                           struct event_ring_entry *p_eqe)
 {
        switch (p_eqe->protocol_id) {
+#if IS_ENABLED(CONFIG_QED_RDMA)
        case PROTOCOLID_ROCE:
-               qed_async_roce_event(p_hwfn, p_eqe);
+               qed_roce_async_event(p_hwfn, p_eqe->opcode,
+                                    &p_eqe->data.rdma_data);
                return 0;
+#endif
        case PROTOCOLID_COMMON:
                return qed_sriov_eqe_event(p_hwfn,
                                           p_eqe->opcode,
@@ -306,14 +306,6 @@ qed_async_event_completion(struct qed_hwfn *p_hwfn,
        case PROTOCOLID_ISCSI:
                if (!IS_ENABLED(CONFIG_QED_ISCSI))
                        return -EINVAL;
-               if (p_eqe->opcode == ISCSI_EVENT_TYPE_ASYN_DELETE_OOO_ISLES) {
-                       u32 cid = le32_to_cpu(p_eqe->data.iscsi_info.cid);
-
-                       qed_ooo_release_connection_isles(p_hwfn,
-                                                        p_hwfn->p_ooo_info,
-                                                        cid);
-                       return 0;
-               }
 
                if (p_hwfn->p_iscsi_info->event_cb) {
                        struct qed_iscsi_info *p_iscsi = p_hwfn->p_iscsi_info;
index 253c2bbe1e4e1a705e52054b4d3faa199fd2ca93..92a3ee1715d9b9f0ccf62467f1212938c7a2627d 100644 (file)
@@ -178,26 +178,59 @@ static struct qed_vf_info *qed_iov_get_vf_info(struct qed_hwfn *p_hwfn,
        return vf;
 }
 
+enum qed_iov_validate_q_mode {
+       QED_IOV_VALIDATE_Q_NA,
+       QED_IOV_VALIDATE_Q_ENABLE,
+       QED_IOV_VALIDATE_Q_DISABLE,
+};
+
+static bool qed_iov_validate_queue_mode(struct qed_hwfn *p_hwfn,
+                                       struct qed_vf_info *p_vf,
+                                       u16 qid,
+                                       enum qed_iov_validate_q_mode mode,
+                                       bool b_is_tx)
+{
+       if (mode == QED_IOV_VALIDATE_Q_NA)
+               return true;
+
+       if ((b_is_tx && p_vf->vf_queues[qid].p_tx_cid) ||
+           (!b_is_tx && p_vf->vf_queues[qid].p_rx_cid))
+               return mode == QED_IOV_VALIDATE_Q_ENABLE;
+
+       /* In case we haven't found any valid cid, then its disabled */
+       return mode == QED_IOV_VALIDATE_Q_DISABLE;
+}
+
 static bool qed_iov_validate_rxq(struct qed_hwfn *p_hwfn,
-                                struct qed_vf_info *p_vf, u16 rx_qid)
+                                struct qed_vf_info *p_vf,
+                                u16 rx_qid,
+                                enum qed_iov_validate_q_mode mode)
 {
-       if (rx_qid >= p_vf->num_rxqs)
+       if (rx_qid >= p_vf->num_rxqs) {
                DP_VERBOSE(p_hwfn,
                           QED_MSG_IOV,
                           "VF[0x%02x] - can't touch Rx queue[%04x]; Only 0x%04x are allocated\n",
                           p_vf->abs_vf_id, rx_qid, p_vf->num_rxqs);
-       return rx_qid < p_vf->num_rxqs;
+               return false;
+       }
+
+       return qed_iov_validate_queue_mode(p_hwfn, p_vf, rx_qid, mode, false);
 }
 
 static bool qed_iov_validate_txq(struct qed_hwfn *p_hwfn,
-                                struct qed_vf_info *p_vf, u16 tx_qid)
+                                struct qed_vf_info *p_vf,
+                                u16 tx_qid,
+                                enum qed_iov_validate_q_mode mode)
 {
-       if (tx_qid >= p_vf->num_txqs)
+       if (tx_qid >= p_vf->num_txqs) {
                DP_VERBOSE(p_hwfn,
                           QED_MSG_IOV,
                           "VF[0x%02x] - can't touch Tx queue[%04x]; Only 0x%04x are allocated\n",
                           p_vf->abs_vf_id, tx_qid, p_vf->num_txqs);
-       return tx_qid < p_vf->num_txqs;
+               return false;
+       }
+
+       return qed_iov_validate_queue_mode(p_hwfn, p_vf, tx_qid, mode, true);
 }
 
 static bool qed_iov_validate_sb(struct qed_hwfn *p_hwfn,
@@ -217,6 +250,34 @@ static bool qed_iov_validate_sb(struct qed_hwfn *p_hwfn,
        return false;
 }
 
+static bool qed_iov_validate_active_rxq(struct qed_hwfn *p_hwfn,
+                                       struct qed_vf_info *p_vf)
+{
+       u8 i;
+
+       for (i = 0; i < p_vf->num_rxqs; i++)
+               if (qed_iov_validate_queue_mode(p_hwfn, p_vf, i,
+                                               QED_IOV_VALIDATE_Q_ENABLE,
+                                               false))
+                       return true;
+
+       return false;
+}
+
+static bool qed_iov_validate_active_txq(struct qed_hwfn *p_hwfn,
+                                       struct qed_vf_info *p_vf)
+{
+       u8 i;
+
+       for (i = 0; i < p_vf->num_txqs; i++)
+               if (qed_iov_validate_queue_mode(p_hwfn, p_vf, i,
+                                               QED_IOV_VALIDATE_Q_ENABLE,
+                                               true))
+                       return true;
+
+       return false;
+}
+
 static int qed_iov_post_vf_bulletin(struct qed_hwfn *p_hwfn,
                                    int vfid, struct qed_ptt *p_ptt)
 {
@@ -557,14 +618,30 @@ int qed_iov_hw_info(struct qed_hwfn *p_hwfn)
                return 0;
        }
 
-       /* Calculate the first VF index - this is a bit tricky; Basically,
-        * VFs start at offset 16 relative to PF0, and 2nd engine VFs begin
-        * after the first engine's VFs.
+       /* First VF index based on offset is tricky:
+        *  - If ARI is supported [likely], offset - (16 - pf_id) would
+        *    provide the number for eng0. 2nd engine Vfs would begin
+        *    after the first engine's VFs.
+        *  - If !ARI, VFs would start on next device.
+        *    so offset - (256 - pf_id) would provide the number.
+        * Utilize the fact that (256 - pf_id) is achieved only by later
+        * to diffrentiate between the two.
         */
-       cdev->p_iov_info->first_vf_in_pf = p_hwfn->cdev->p_iov_info->offset +
-                                          p_hwfn->abs_pf_id - 16;
-       if (QED_PATH_ID(p_hwfn))
-               cdev->p_iov_info->first_vf_in_pf -= MAX_NUM_VFS_BB;
+
+       if (p_hwfn->cdev->p_iov_info->offset < (256 - p_hwfn->abs_pf_id)) {
+               u32 first = p_hwfn->cdev->p_iov_info->offset +
+                           p_hwfn->abs_pf_id - 16;
+
+               cdev->p_iov_info->first_vf_in_pf = first;
+
+               if (QED_PATH_ID(p_hwfn))
+                       cdev->p_iov_info->first_vf_in_pf -= MAX_NUM_VFS_BB;
+       } else {
+               u32 first = p_hwfn->cdev->p_iov_info->offset +
+                           p_hwfn->abs_pf_id - 256;
+
+               cdev->p_iov_info->first_vf_in_pf = first;
+       }
 
        DP_VERBOSE(p_hwfn, QED_MSG_IOV,
                   "First VF in hwfn 0x%08x\n",
@@ -677,6 +754,11 @@ static int qed_iov_enable_vf_access(struct qed_hwfn *p_hwfn,
        u32 igu_vf_conf = IGU_VF_CONF_FUNC_EN;
        int rc;
 
+       /* It's possible VF was previously considered malicious -
+        * clear the indication even if we're only going to disable VF.
+        */
+       vf->b_malicious = false;
+
        if (vf->to_disable)
                return 0;
 
@@ -689,9 +771,6 @@ static int qed_iov_enable_vf_access(struct qed_hwfn *p_hwfn,
 
        qed_iov_vf_igu_reset(p_hwfn, p_ptt, vf);
 
-       /* It's possible VF was previously considered malicious */
-       vf->b_malicious = false;
-
        rc = qed_mcp_config_vf_msix(p_hwfn, p_ptt, vf->abs_vf_id, vf->num_sbs);
        if (rc)
                return rc;
@@ -1118,13 +1197,17 @@ static void qed_iov_send_response(struct qed_hwfn *p_hwfn,
                           (sizeof(union pfvf_tlvs) - sizeof(u64)) / 4,
                           &params);
 
-       qed_dmae_host2host(p_hwfn, p_ptt, mbx->reply_phys,
-                          mbx->req_virt->first_tlv.reply_address,
-                          sizeof(u64) / 4, &params);
-
+       /* Once PF copies the rc to the VF, the latter can continue
+        * and send an additional message. So we have to make sure the
+        * channel would be re-set to ready prior to that.
+        */
        REG_WR(p_hwfn,
               GTT_BAR0_MAP_REG_USDM_RAM +
               USTORM_VF_PF_CHANNEL_READY_OFFSET(eng_vf_id), 1);
+
+       qed_dmae_host2host(p_hwfn, p_ptt, mbx->reply_phys,
+                          mbx->req_virt->first_tlv.reply_address,
+                          sizeof(u64) / 4, &params);
 }
 
 static u16 qed_iov_vport_to_tlv(struct qed_hwfn *p_hwfn,
@@ -1733,6 +1816,8 @@ static void qed_iov_vf_mbx_start_vport(struct qed_hwfn *p_hwfn,
        vf->state = VF_ENABLED;
        start = &mbx->req_virt->start_vport;
 
+       qed_iov_enable_vf_traffic(p_hwfn, p_ptt, vf);
+
        /* Initialize Status block in CAU */
        for (sb_id = 0; sb_id < vf->num_sbs; sb_id++) {
                if (!start->sb_addr[sb_id]) {
@@ -1746,7 +1831,6 @@ static void qed_iov_vf_mbx_start_vport(struct qed_hwfn *p_hwfn,
                                    start->sb_addr[sb_id],
                                    vf->igu_sbs[sb_id], vf->abs_vf_id, 1);
        }
-       qed_iov_enable_vf_traffic(p_hwfn, p_ptt, vf);
 
        vf->mtu = start->mtu;
        vf->shadow_config.inner_vlan_removal = start->inner_vlan_removal;
@@ -1803,6 +1887,16 @@ static void qed_iov_vf_mbx_stop_vport(struct qed_hwfn *p_hwfn,
        vf->vport_instance--;
        vf->spoof_chk = false;
 
+       if ((qed_iov_validate_active_rxq(p_hwfn, vf)) ||
+           (qed_iov_validate_active_txq(p_hwfn, vf))) {
+               vf->b_malicious = true;
+               DP_NOTICE(p_hwfn,
+                         "VF [%02x] - considered malicious; Unable to stop RX/TX queuess\n",
+                         vf->abs_vf_id);
+               status = PFVF_STATUS_MALICIOUS;
+               goto out;
+       }
+
        rc = qed_sp_vport_stop(p_hwfn, vf->opaque_fid, vf->vport_id);
        if (rc) {
                DP_ERR(p_hwfn, "qed_iov_vf_mbx_stop_vport returned error %d\n",
@@ -1814,6 +1908,7 @@ static void qed_iov_vf_mbx_stop_vport(struct qed_hwfn *p_hwfn,
        vf->configured_features = 0;
        memset(&vf->shadow_config, 0, sizeof(vf->shadow_config));
 
+out:
        qed_iov_prepare_resp(p_hwfn, p_ptt, vf, CHANNEL_TLV_VPORT_TEARDOWN,
                             sizeof(struct pfvf_def_resp_tlv), status);
 }
@@ -1870,7 +1965,8 @@ static void qed_iov_vf_mbx_start_rxq(struct qed_hwfn *p_hwfn,
 
        req = &mbx->req_virt->start_rxq;
 
-       if (!qed_iov_validate_rxq(p_hwfn, vf, req->rx_qid) ||
+       if (!qed_iov_validate_rxq(p_hwfn, vf, req->rx_qid,
+                                 QED_IOV_VALIDATE_Q_DISABLE) ||
            !qed_iov_validate_sb(p_hwfn, vf, req->hw_sb))
                goto out;
 
@@ -1970,21 +2066,16 @@ static void qed_iov_vf_mbx_start_txq(struct qed_hwfn *p_hwfn,
        struct qed_queue_start_common_params params;
        struct qed_iov_vf_mbx *mbx = &vf->vf_mbx;
        u8 status = PFVF_STATUS_NO_RESOURCE;
-       union qed_qm_pq_params pq_params;
        struct vfpf_start_txq_tlv *req;
        struct qed_vf_q_info *p_queue;
        int rc;
        u16 pq;
 
-       /* Prepare the parameters which would choose the right PQ */
-       memset(&pq_params, 0, sizeof(pq_params));
-       pq_params.eth.is_vf = 1;
-       pq_params.eth.vf_id = vf->relative_vf_id;
-
        memset(&params, 0, sizeof(params));
        req = &mbx->req_virt->start_txq;
 
-       if (!qed_iov_validate_txq(p_hwfn, vf, req->tx_qid) ||
+       if (!qed_iov_validate_txq(p_hwfn, vf, req->tx_qid,
+                                 QED_IOV_VALIDATE_Q_DISABLE) ||
            !qed_iov_validate_sb(p_hwfn, vf, req->hw_sb))
                goto out;
 
@@ -2004,7 +2095,7 @@ static void qed_iov_vf_mbx_start_txq(struct qed_hwfn *p_hwfn,
        if (!p_queue->p_tx_cid)
                goto out;
 
-       pq = qed_get_qm_pq(p_hwfn, PROTOCOLID_ETH, &pq_params);
+       pq = qed_get_cm_pq_idx_vf(p_hwfn, vf->relative_vf_id);
        rc = qed_eth_txq_start_ramrod(p_hwfn, p_queue->p_tx_cid,
                                      req->pbl_addr, req->pbl_size, pq);
        if (rc) {
@@ -2021,57 +2112,53 @@ out:
 
 static int qed_iov_vf_stop_rxqs(struct qed_hwfn *p_hwfn,
                                struct qed_vf_info *vf,
-                               u16 rxq_id, u8 num_rxqs, bool cqe_completion)
+                               u16 rxq_id, bool cqe_completion)
 {
        struct qed_vf_q_info *p_queue;
        int rc = 0;
-       int qid;
 
-       if (rxq_id + num_rxqs > ARRAY_SIZE(vf->vf_queues))
+       if (!qed_iov_validate_rxq(p_hwfn, vf, rxq_id,
+                                 QED_IOV_VALIDATE_Q_ENABLE)) {
+               DP_VERBOSE(p_hwfn,
+                          QED_MSG_IOV,
+                          "VF[%d] Tried Closing Rx 0x%04x which is inactive\n",
+                          vf->relative_vf_id, rxq_id);
                return -EINVAL;
+       }
 
-       for (qid = rxq_id; qid < rxq_id + num_rxqs; qid++) {
-               p_queue = &vf->vf_queues[qid];
-
-               if (!p_queue->p_rx_cid)
-                       continue;
+       p_queue = &vf->vf_queues[rxq_id];
 
-               rc = qed_eth_rx_queue_stop(p_hwfn,
-                                          p_queue->p_rx_cid,
-                                          false, cqe_completion);
-               if (rc)
-                       return rc;
+       rc = qed_eth_rx_queue_stop(p_hwfn,
+                                  p_queue->p_rx_cid,
+                                  false, cqe_completion);
+       if (rc)
+               return rc;
 
-               vf->vf_queues[qid].p_rx_cid = NULL;
-               vf->num_active_rxqs--;
-       }
+       p_queue->p_rx_cid = NULL;
+       vf->num_active_rxqs--;
 
-       return rc;
+       return 0;
 }
 
 static int qed_iov_vf_stop_txqs(struct qed_hwfn *p_hwfn,
-                               struct qed_vf_info *vf, u16 txq_id, u8 num_txqs)
+                               struct qed_vf_info *vf, u16 txq_id)
 {
-       int rc = 0;
        struct qed_vf_q_info *p_queue;
-       int qid;
+       int rc = 0;
 
-       if (txq_id + num_txqs > ARRAY_SIZE(vf->vf_queues))
+       if (!qed_iov_validate_txq(p_hwfn, vf, txq_id,
+                                 QED_IOV_VALIDATE_Q_ENABLE))
                return -EINVAL;
 
-       for (qid = txq_id; qid < txq_id + num_txqs; qid++) {
-               p_queue = &vf->vf_queues[qid];
-               if (!p_queue->p_tx_cid)
-                       continue;
+       p_queue = &vf->vf_queues[txq_id];
 
-               rc = qed_eth_tx_queue_stop(p_hwfn, p_queue->p_tx_cid);
-               if (rc)
-                       return rc;
+       rc = qed_eth_tx_queue_stop(p_hwfn, p_queue->p_tx_cid);
+       if (rc)
+               return rc;
 
-               p_queue->p_tx_cid = NULL;
-       }
+       p_queue->p_tx_cid = NULL;
 
-       return rc;
+       return 0;
 }
 
 static void qed_iov_vf_mbx_stop_rxqs(struct qed_hwfn *p_hwfn,
@@ -2080,20 +2167,28 @@ static void qed_iov_vf_mbx_stop_rxqs(struct qed_hwfn *p_hwfn,
 {
        u16 length = sizeof(struct pfvf_def_resp_tlv);
        struct qed_iov_vf_mbx *mbx = &vf->vf_mbx;
-       u8 status = PFVF_STATUS_SUCCESS;
+       u8 status = PFVF_STATUS_FAILURE;
        struct vfpf_stop_rxqs_tlv *req;
        int rc;
 
-       /* We give the option of starting from qid != 0, in this case we
-        * need to make sure that qid + num_qs doesn't exceed the actual
-        * amount of queues that exist.
+       /* There has never been an official driver that used this interface
+        * for stopping multiple queues, and it is now considered deprecated.
+        * Validate this isn't used here.
         */
        req = &mbx->req_virt->stop_rxqs;
-       rc = qed_iov_vf_stop_rxqs(p_hwfn, vf, req->rx_qid,
-                                 req->num_rxqs, req->cqe_completion);
-       if (rc)
-               status = PFVF_STATUS_FAILURE;
+       if (req->num_rxqs != 1) {
+               DP_VERBOSE(p_hwfn, QED_MSG_IOV,
+                          "Odd; VF[%d] tried stopping multiple Rx queues\n",
+                          vf->relative_vf_id);
+               status = PFVF_STATUS_NOT_SUPPORTED;
+               goto out;
+       }
 
+       rc = qed_iov_vf_stop_rxqs(p_hwfn, vf, req->rx_qid,
+                                 req->cqe_completion);
+       if (!rc)
+               status = PFVF_STATUS_SUCCESS;
+out:
        qed_iov_prepare_resp(p_hwfn, p_ptt, vf, CHANNEL_TLV_STOP_RXQS,
                             length, status);
 }
@@ -2104,19 +2199,27 @@ static void qed_iov_vf_mbx_stop_txqs(struct qed_hwfn *p_hwfn,
 {
        u16 length = sizeof(struct pfvf_def_resp_tlv);
        struct qed_iov_vf_mbx *mbx = &vf->vf_mbx;
-       u8 status = PFVF_STATUS_SUCCESS;
+       u8 status = PFVF_STATUS_FAILURE;
        struct vfpf_stop_txqs_tlv *req;
        int rc;
 
-       /* We give the option of starting from qid != 0, in this case we
-        * need to make sure that qid + num_qs doesn't exceed the actual
-        * amount of queues that exist.
+       /* There has never been an official driver that used this interface
+        * for stopping multiple queues, and it is now considered deprecated.
+        * Validate this isn't used here.
         */
        req = &mbx->req_virt->stop_txqs;
-       rc = qed_iov_vf_stop_txqs(p_hwfn, vf, req->tx_qid, req->num_txqs);
-       if (rc)
-               status = PFVF_STATUS_FAILURE;
+       if (req->num_txqs != 1) {
+               DP_VERBOSE(p_hwfn, QED_MSG_IOV,
+                          "Odd; VF[%d] tried stopping multiple Tx queues\n",
+                          vf->relative_vf_id);
+               status = PFVF_STATUS_NOT_SUPPORTED;
+               goto out;
+       }
+       rc = qed_iov_vf_stop_txqs(p_hwfn, vf, req->tx_qid);
+       if (!rc)
+               status = PFVF_STATUS_SUCCESS;
 
+out:
        qed_iov_prepare_resp(p_hwfn, p_ptt, vf, CHANNEL_TLV_STOP_TXQS,
                             length, status);
 }
@@ -2141,22 +2244,17 @@ static void qed_iov_vf_mbx_update_rxqs(struct qed_hwfn *p_hwfn,
        complete_event_flg = !!(req->flags & VFPF_RXQ_UPD_COMPLETE_EVENT_FLAG);
 
        /* Validate inputs */
-       if (req->num_rxqs + req->rx_qid > QED_MAX_VF_CHAINS_PER_PF ||
-           !qed_iov_validate_rxq(p_hwfn, vf, req->rx_qid)) {
-               DP_INFO(p_hwfn, "VF[%d]: Incorrect Rxqs [%04x, %02x]\n",
-                       vf->relative_vf_id, req->rx_qid, req->num_rxqs);
-               goto out;
-       }
-
-       for (i = 0; i < req->num_rxqs; i++) {
-               qid = req->rx_qid + i;
-               if (!vf->vf_queues[qid].p_rx_cid) {
-                       DP_INFO(p_hwfn,
-                               "VF[%d] rx_qid = %d isn`t active!\n",
-                               vf->relative_vf_id, qid);
+       for (i = req->rx_qid; i < req->rx_qid + req->num_rxqs; i++)
+               if (!qed_iov_validate_rxq(p_hwfn, vf, i,
+                                         QED_IOV_VALIDATE_Q_ENABLE)) {
+                       DP_INFO(p_hwfn, "VF[%d]: Incorrect Rxqs [%04x, %02x]\n",
+                               vf->relative_vf_id, req->rx_qid, req->num_rxqs);
                        goto out;
                }
 
+       /* Prepare the handlers */
+       for (i = 0; i < req->num_rxqs; i++) {
+               qid = req->rx_qid + i;
                handlers[i] = vf->vf_queues[qid].p_rx_cid;
        }
 
@@ -2372,7 +2470,8 @@ qed_iov_vp_update_rss_param(struct qed_hwfn *p_hwfn,
 
        for (i = 0; i < table_size; i++) {
                q_idx = p_rss_tlv->rss_ind_table[i];
-               if (!qed_iov_validate_rxq(p_hwfn, vf, q_idx)) {
+               if (!qed_iov_validate_rxq(p_hwfn, vf, q_idx,
+                                         QED_IOV_VALIDATE_Q_ENABLE)) {
                        DP_VERBOSE(p_hwfn,
                                   QED_MSG_IOV,
                                   "VF[%d]: Omitting RSS due to wrong queue %04x\n",
@@ -2381,15 +2480,6 @@ qed_iov_vp_update_rss_param(struct qed_hwfn *p_hwfn,
                        goto out;
                }
 
-               if (!vf->vf_queues[q_idx].p_rx_cid) {
-                       DP_VERBOSE(p_hwfn,
-                                  QED_MSG_IOV,
-                                  "VF[%d]: Omitting RSS due to inactive queue %08x\n",
-                                  vf->relative_vf_id, q_idx);
-                       b_reject = true;
-                       goto out;
-               }
-
                p_rss->rss_ind_table[i] = vf->vf_queues[q_idx].p_rx_cid;
        }
 
@@ -3042,9 +3132,10 @@ qed_iov_vf_flr_cleanup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
        return rc;
 }
 
-int qed_iov_mark_vf_flr(struct qed_hwfn *p_hwfn, u32 *p_disabled_vfs)
+bool qed_iov_mark_vf_flr(struct qed_hwfn *p_hwfn, u32 *p_disabled_vfs)
 {
-       u16 i, found = 0;
+       bool found = false;
+       u16 i;
 
        DP_VERBOSE(p_hwfn, QED_MSG_IOV, "Marking FLR-ed VFs\n");
        for (i = 0; i < (VF_MAX_STATIC / 32); i++)
@@ -3054,7 +3145,7 @@ int qed_iov_mark_vf_flr(struct qed_hwfn *p_hwfn, u32 *p_disabled_vfs)
 
        if (!p_hwfn->cdev->p_iov_info) {
                DP_NOTICE(p_hwfn, "VF flr but no IOV\n");
-               return 0;
+               return false;
        }
 
        /* Mark VFs */
@@ -3083,7 +3174,7 @@ int qed_iov_mark_vf_flr(struct qed_hwfn *p_hwfn, u32 *p_disabled_vfs)
                         * VF flr until ACKs, we're safe.
                         */
                        p_flr[rel_vf_id / 64] |= 1ULL << (rel_vf_id % 64);
-                       found = 1;
+                       found = true;
                }
        }
 
@@ -3289,11 +3380,17 @@ static void qed_sriov_vfpf_malicious(struct qed_hwfn *p_hwfn,
        if (!p_vf)
                return;
 
-       DP_INFO(p_hwfn,
-               "VF [%d] - Malicious behavior [%02x]\n",
-               p_vf->abs_vf_id, p_data->err_id);
+       if (!p_vf->b_malicious) {
+               DP_NOTICE(p_hwfn,
+                         "VF [%d] - Malicious behavior [%02x]\n",
+                         p_vf->abs_vf_id, p_data->err_id);
 
-       p_vf->b_malicious = true;
+               p_vf->b_malicious = true;
+       } else {
+               DP_INFO(p_hwfn,
+                       "VF [%d] - Malicious behavior [%02x]\n",
+                       p_vf->abs_vf_id, p_data->err_id);
+       }
 }
 
 int qed_sriov_eqe_event(struct qed_hwfn *p_hwfn,
@@ -3842,6 +3939,7 @@ static int qed_get_vf_config(struct qed_dev *cdev,
 
 void qed_inform_vf_link_state(struct qed_hwfn *hwfn)
 {
+       struct qed_hwfn *lead_hwfn = QED_LEADING_HWFN(hwfn->cdev);
        struct qed_mcp_link_capabilities caps;
        struct qed_mcp_link_params params;
        struct qed_mcp_link_state link;
@@ -3858,9 +3956,15 @@ void qed_inform_vf_link_state(struct qed_hwfn *hwfn)
                if (!vf_info)
                        continue;
 
-               memcpy(&params, qed_mcp_get_link_params(hwfn), sizeof(params));
-               memcpy(&link, qed_mcp_get_link_state(hwfn), sizeof(link));
-               memcpy(&caps, qed_mcp_get_link_capabilities(hwfn),
+               /* Only hwfn0 is actually interested in the link speed.
+                * But since only it would receive an MFW indication of link,
+                * need to take configuration from it - otherwise things like
+                * rate limiting for hwfn1 VF would not work.
+                */
+               memcpy(&params, qed_mcp_get_link_params(lead_hwfn),
+                      sizeof(params));
+               memcpy(&link, qed_mcp_get_link_state(lead_hwfn), sizeof(link));
+               memcpy(&caps, qed_mcp_get_link_capabilities(lead_hwfn),
                       sizeof(caps));
 
                /* Modify link according to the VF's configured link state */
index a89605821522d528411f711bbb0755c0ae003e5a..8e96b1d1930847fc3b03e1518da1a205f09b6291 100644 (file)
@@ -348,9 +348,9 @@ int qed_sriov_eqe_event(struct qed_hwfn *p_hwfn,
  * @param p_hwfn
  * @param disabled_vfs - bitmask of all VFs on path that were FLRed
  *
- * @return 1 iff one of the PF's vfs got FLRed. 0 otherwise.
+ * @return true iff one of the PF's vfs got FLRed. false otherwise.
  */
-int qed_iov_mark_vf_flr(struct qed_hwfn *p_hwfn, u32 *disabled_vfs);
+bool qed_iov_mark_vf_flr(struct qed_hwfn *p_hwfn, u32 *disabled_vfs);
 
 /**
  * @brief Search extended TLVs in request/reply buffer.
@@ -407,10 +407,10 @@ static inline int qed_sriov_eqe_event(struct qed_hwfn *p_hwfn,
        return -EINVAL;
 }
 
-static inline int qed_iov_mark_vf_flr(struct qed_hwfn *p_hwfn,
-                                     u32 *disabled_vfs)
+static inline bool qed_iov_mark_vf_flr(struct qed_hwfn *p_hwfn,
+                                      u32 *disabled_vfs)
 {
-       return 0;
+       return false;
 }
 
 static inline void qed_iov_wq_stop(struct qed_dev *cdev, bool schedule_first)
index 15d2855ec56352f861b0915823992c505b9c1b9d..798786562b1bbc266f9da05132cd37f39a5b2cdf 100644 (file)
@@ -134,14 +134,20 @@ static int qed_send_msg2pf(struct qed_hwfn *p_hwfn, u8 *done, u32 resp_size)
        }
 
        if (!*done) {
-               DP_VERBOSE(p_hwfn, QED_MSG_IOV,
-                          "VF <-- PF Timeout [Type %d]\n",
-                          p_req->first_tlv.tl.type);
+               DP_NOTICE(p_hwfn,
+                         "VF <-- PF Timeout [Type %d]\n",
+                         p_req->first_tlv.tl.type);
                rc = -EBUSY;
        } else {
-               DP_VERBOSE(p_hwfn, QED_MSG_IOV,
-                          "PF response: %d [Type %d]\n",
-                          *done, p_req->first_tlv.tl.type);
+               if ((*done != PFVF_STATUS_SUCCESS) &&
+                   (*done != PFVF_STATUS_NO_RESOURCE))
+                       DP_NOTICE(p_hwfn,
+                                 "PF response: %d [Type %d]\n",
+                                 *done, p_req->first_tlv.tl.type);
+               else
+                       DP_VERBOSE(p_hwfn, QED_MSG_IOV,
+                                  "PF response: %d [Type %d]\n",
+                                  *done, p_req->first_tlv.tl.type);
        }
 
        return rc;
index 7da0b165d8bc2718d28fbbd108040cc4d948a03a..105c0edd2a01eec6a2f6af0547551265838bd53f 100644 (file)
@@ -275,6 +275,8 @@ struct vfpf_stop_rxqs_tlv {
        struct vfpf_first_tlv first_tlv;
 
        u16 rx_qid;
+
+       /* this field is deprecated and should *always* be set to '1' */
        u8 num_rxqs;
        u8 cqe_completion;
        u8 padding[4];
@@ -285,6 +287,8 @@ struct vfpf_stop_txqs_tlv {
        struct vfpf_first_tlv first_tlv;
 
        u16 tx_qid;
+
+       /* this field is deprecated and should *always* be set to '1' */
        u8 num_txqs;
        u8 padding[5];
 };
index f2aaef2cfb86d7a31c5fdc6f5d5940e1c9459a70..e73a4a5165ee7a00193f249a33dd3297898e04e0 100644 (file)
@@ -50,7 +50,7 @@
 #define QEDE_MAJOR_VERSION             8
 #define QEDE_MINOR_VERSION             10
 #define QEDE_REVISION_VERSION          10
-#define QEDE_ENGINEERING_VERSION       20
+#define QEDE_ENGINEERING_VERSION       21
 #define DRV_MODULE_VERSION __stringify(QEDE_MAJOR_VERSION) "." \
                __stringify(QEDE_MINOR_VERSION) "."             \
                __stringify(QEDE_REVISION_VERSION) "."          \
@@ -58,7 +58,7 @@
 
 #define DRV_MODULE_SYM         qede
 
-struct qede_stats {
+struct qede_stats_common {
        u64 no_buff_discards;
        u64 packet_too_big_discard;
        u64 ttl0_discard;
@@ -90,11 +90,6 @@ struct qede_stats {
        u64 rx_256_to_511_byte_packets;
        u64 rx_512_to_1023_byte_packets;
        u64 rx_1024_to_1518_byte_packets;
-       u64 rx_1519_to_1522_byte_packets;
-       u64 rx_1519_to_2047_byte_packets;
-       u64 rx_2048_to_4095_byte_packets;
-       u64 rx_4096_to_9216_byte_packets;
-       u64 rx_9217_to_16383_byte_packets;
        u64 rx_crc_errors;
        u64 rx_mac_crtl_frames;
        u64 rx_pause_frames;
@@ -111,17 +106,39 @@ struct qede_stats {
        u64 tx_256_to_511_byte_packets;
        u64 tx_512_to_1023_byte_packets;
        u64 tx_1024_to_1518_byte_packets;
+       u64 tx_pause_frames;
+       u64 tx_pfc_frames;
+       u64 brb_truncates;
+       u64 brb_discards;
+       u64 tx_mac_ctrl_frames;
+};
+
+struct qede_stats_bb {
+       u64 rx_1519_to_1522_byte_packets;
+       u64 rx_1519_to_2047_byte_packets;
+       u64 rx_2048_to_4095_byte_packets;
+       u64 rx_4096_to_9216_byte_packets;
+       u64 rx_9217_to_16383_byte_packets;
        u64 tx_1519_to_2047_byte_packets;
        u64 tx_2048_to_4095_byte_packets;
        u64 tx_4096_to_9216_byte_packets;
        u64 tx_9217_to_16383_byte_packets;
-       u64 tx_pause_frames;
-       u64 tx_pfc_frames;
        u64 tx_lpi_entry_count;
        u64 tx_total_collisions;
-       u64 brb_truncates;
-       u64 brb_discards;
-       u64 tx_mac_ctrl_frames;
+};
+
+struct qede_stats_ah {
+       u64 rx_1519_to_max_byte_packets;
+       u64 tx_1519_to_max_byte_packets;
+};
+
+struct qede_stats {
+       struct qede_stats_common common;
+
+       union {
+               struct qede_stats_bb bb;
+               struct qede_stats_ah ah;
+       };
 };
 
 struct qede_vlan {
@@ -158,6 +175,10 @@ struct qede_dev {
        struct qed_dev_eth_info dev_info;
 #define QEDE_MAX_RSS_CNT(edev) ((edev)->dev_info.num_queues)
 #define QEDE_MAX_TSS_CNT(edev) ((edev)->dev_info.num_queues)
+#define QEDE_IS_BB(edev) \
+       ((edev)->dev_info.common.dev_type == QED_DEV_TYPE_BB)
+#define QEDE_IS_AH(edev) \
+       ((edev)->dev_info.common.dev_type == QED_DEV_TYPE_AH)
 
        struct qede_fastpath            *fp_array;
        u8                              req_num_tx;
index 8979531332455453f4fcb54b4a1a16017005c61c..4dcfe9614731db70d673cc15aec6da26ff673743 100644 (file)
@@ -75,16 +75,33 @@ static const struct {
        QEDE_TQSTAT(stopped_cnt),
 };
 
-#define QEDE_STAT_OFFSET(stat_name) (offsetof(struct qede_stats, stat_name))
-#define QEDE_STAT_STRING(stat_name) (#stat_name)
-#define _QEDE_STAT(stat_name, pf_only) \
-        {QEDE_STAT_OFFSET(stat_name), QEDE_STAT_STRING(stat_name), pf_only}
-#define QEDE_PF_STAT(stat_name)        _QEDE_STAT(stat_name, true)
-#define QEDE_STAT(stat_name)   _QEDE_STAT(stat_name, false)
+#define QEDE_STAT_OFFSET(stat_name, type, base) \
+       (offsetof(type, stat_name) + (base))
+#define QEDE_STAT_STRING(stat_name)    (#stat_name)
+#define _QEDE_STAT(stat_name, type, base, attr) \
+       {QEDE_STAT_OFFSET(stat_name, type, base), \
+        QEDE_STAT_STRING(stat_name), \
+        attr}
+#define QEDE_STAT(stat_name) \
+       _QEDE_STAT(stat_name, struct qede_stats_common, 0, 0x0)
+#define QEDE_PF_STAT(stat_name) \
+       _QEDE_STAT(stat_name, struct qede_stats_common, 0, \
+                  BIT(QEDE_STAT_PF_ONLY))
+#define QEDE_PF_BB_STAT(stat_name) \
+       _QEDE_STAT(stat_name, struct qede_stats_bb, \
+                  offsetof(struct qede_stats, bb), \
+                  BIT(QEDE_STAT_PF_ONLY) | BIT(QEDE_STAT_BB_ONLY))
+#define QEDE_PF_AH_STAT(stat_name) \
+       _QEDE_STAT(stat_name, struct qede_stats_ah, \
+                  offsetof(struct qede_stats, ah), \
+                  BIT(QEDE_STAT_PF_ONLY) | BIT(QEDE_STAT_AH_ONLY))
 static const struct {
        u64 offset;
        char string[ETH_GSTRING_LEN];
-       bool pf_only;
+       unsigned long attr;
+#define QEDE_STAT_PF_ONLY      0
+#define QEDE_STAT_BB_ONLY      1
+#define QEDE_STAT_AH_ONLY      2
 } qede_stats_arr[] = {
        QEDE_STAT(rx_ucast_bytes),
        QEDE_STAT(rx_mcast_bytes),
@@ -106,22 +123,23 @@ static const struct {
        QEDE_PF_STAT(rx_256_to_511_byte_packets),
        QEDE_PF_STAT(rx_512_to_1023_byte_packets),
        QEDE_PF_STAT(rx_1024_to_1518_byte_packets),
-       QEDE_PF_STAT(rx_1519_to_1522_byte_packets),
-       QEDE_PF_STAT(rx_1519_to_2047_byte_packets),
-       QEDE_PF_STAT(rx_2048_to_4095_byte_packets),
-       QEDE_PF_STAT(rx_4096_to_9216_byte_packets),
-       QEDE_PF_STAT(rx_9217_to_16383_byte_packets),
+       QEDE_PF_BB_STAT(rx_1519_to_1522_byte_packets),
+       QEDE_PF_BB_STAT(rx_1519_to_2047_byte_packets),
+       QEDE_PF_BB_STAT(rx_2048_to_4095_byte_packets),
+       QEDE_PF_BB_STAT(rx_4096_to_9216_byte_packets),
+       QEDE_PF_BB_STAT(rx_9217_to_16383_byte_packets),
+       QEDE_PF_AH_STAT(rx_1519_to_max_byte_packets),
        QEDE_PF_STAT(tx_64_byte_packets),
        QEDE_PF_STAT(tx_65_to_127_byte_packets),
        QEDE_PF_STAT(tx_128_to_255_byte_packets),
        QEDE_PF_STAT(tx_256_to_511_byte_packets),
        QEDE_PF_STAT(tx_512_to_1023_byte_packets),
        QEDE_PF_STAT(tx_1024_to_1518_byte_packets),
-       QEDE_PF_STAT(tx_1519_to_2047_byte_packets),
-       QEDE_PF_STAT(tx_2048_to_4095_byte_packets),
-       QEDE_PF_STAT(tx_4096_to_9216_byte_packets),
-       QEDE_PF_STAT(tx_9217_to_16383_byte_packets),
-
+       QEDE_PF_BB_STAT(tx_1519_to_2047_byte_packets),
+       QEDE_PF_BB_STAT(tx_2048_to_4095_byte_packets),
+       QEDE_PF_BB_STAT(tx_4096_to_9216_byte_packets),
+       QEDE_PF_BB_STAT(tx_9217_to_16383_byte_packets),
+       QEDE_PF_AH_STAT(tx_1519_to_max_byte_packets),
        QEDE_PF_STAT(rx_mac_crtl_frames),
        QEDE_PF_STAT(tx_mac_ctrl_frames),
        QEDE_PF_STAT(rx_pause_frames),
@@ -136,8 +154,8 @@ static const struct {
        QEDE_PF_STAT(rx_jabbers),
        QEDE_PF_STAT(rx_undersize_packets),
        QEDE_PF_STAT(rx_fragments),
-       QEDE_PF_STAT(tx_lpi_entry_count),
-       QEDE_PF_STAT(tx_total_collisions),
+       QEDE_PF_BB_STAT(tx_lpi_entry_count),
+       QEDE_PF_BB_STAT(tx_total_collisions),
        QEDE_PF_STAT(brb_truncates),
        QEDE_PF_STAT(brb_discards),
        QEDE_STAT(no_buff_discards),
@@ -155,6 +173,12 @@ static const struct {
 };
 
 #define QEDE_NUM_STATS ARRAY_SIZE(qede_stats_arr)
+#define QEDE_STAT_IS_PF_ONLY(i) \
+       test_bit(QEDE_STAT_PF_ONLY, &qede_stats_arr[i].attr)
+#define QEDE_STAT_IS_BB_ONLY(i) \
+       test_bit(QEDE_STAT_BB_ONLY, &qede_stats_arr[i].attr)
+#define QEDE_STAT_IS_AH_ONLY(i) \
+       test_bit(QEDE_STAT_AH_ONLY, &qede_stats_arr[i].attr)
 
 enum {
        QEDE_PRI_FLAG_CMT,
@@ -213,6 +237,13 @@ static void qede_get_strings_stats_rxq(struct qede_dev *edev,
        }
 }
 
+static bool qede_is_irrelevant_stat(struct qede_dev *edev, int stat_index)
+{
+       return (IS_VF(edev) && QEDE_STAT_IS_PF_ONLY(stat_index)) ||
+              (QEDE_IS_BB(edev) && QEDE_STAT_IS_AH_ONLY(stat_index)) ||
+              (QEDE_IS_AH(edev) && QEDE_STAT_IS_BB_ONLY(stat_index));
+}
+
 static void qede_get_strings_stats(struct qede_dev *edev, u8 *buf)
 {
        struct qede_fastpath *fp;
@@ -234,7 +265,7 @@ static void qede_get_strings_stats(struct qede_dev *edev, u8 *buf)
 
        /* Account for non-queue statistics */
        for (i = 0; i < QEDE_NUM_STATS; i++) {
-               if (IS_VF(edev) && qede_stats_arr[i].pf_only)
+               if (qede_is_irrelevant_stat(edev, i))
                        continue;
                strcpy(buf, qede_stats_arr[i].string);
                buf += ETH_GSTRING_LEN;
@@ -309,7 +340,7 @@ static void qede_get_ethtool_stats(struct net_device *dev,
        }
 
        for (i = 0; i < QEDE_NUM_STATS; i++) {
-               if (IS_VF(edev) && qede_stats_arr[i].pf_only)
+               if (qede_is_irrelevant_stat(edev, i))
                        continue;
                *buf = *((u64 *)(((void *)&edev->stats) +
                                 qede_stats_arr[i].offset));
@@ -323,17 +354,13 @@ static void qede_get_ethtool_stats(struct net_device *dev,
 static int qede_get_sset_count(struct net_device *dev, int stringset)
 {
        struct qede_dev *edev = netdev_priv(dev);
-       int num_stats = QEDE_NUM_STATS;
+       int num_stats = QEDE_NUM_STATS, i;
 
        switch (stringset) {
        case ETH_SS_STATS:
-               if (IS_VF(edev)) {
-                       int i;
-
-                       for (i = 0; i < QEDE_NUM_STATS; i++)
-                               if (qede_stats_arr[i].pf_only)
-                                       num_stats--;
-               }
+               for (i = 0; i < QEDE_NUM_STATS; i++)
+                       if (qede_is_irrelevant_stat(edev, i))
+                               num_stats--;
 
                /* Account for the Regular Tx statistics */
                num_stats += QEDE_TSS_COUNT(edev) * QEDE_NUM_TQSTATS;
index 3a78c3f2515748ca882f89d7da2c3b4e5cfad79e..abd99109e5328229bed46d1c954c8b8b4bc9200e 100644 (file)
@@ -84,6 +84,8 @@ static const struct qed_eth_ops *qed_ops;
 #define CHIP_NUM_57980S_50             0x1654
 #define CHIP_NUM_57980S_25             0x1656
 #define CHIP_NUM_57980S_IOV            0x1664
+#define CHIP_NUM_AH                    0x8070
+#define CHIP_NUM_AH_IOV                        0x8090
 
 #ifndef PCI_DEVICE_ID_NX2_57980E
 #define PCI_DEVICE_ID_57980S_40                CHIP_NUM_57980S_40
@@ -93,6 +95,9 @@ static const struct qed_eth_ops *qed_ops;
 #define PCI_DEVICE_ID_57980S_50                CHIP_NUM_57980S_50
 #define PCI_DEVICE_ID_57980S_25                CHIP_NUM_57980S_25
 #define PCI_DEVICE_ID_57980S_IOV       CHIP_NUM_57980S_IOV
+#define PCI_DEVICE_ID_AH               CHIP_NUM_AH
+#define PCI_DEVICE_ID_AH_IOV           CHIP_NUM_AH_IOV
+
 #endif
 
 enum qede_pci_private {
@@ -109,6 +114,10 @@ static const struct pci_device_id qede_pci_tbl[] = {
        {PCI_VDEVICE(QLOGIC, PCI_DEVICE_ID_57980S_25), QEDE_PRIVATE_PF},
 #ifdef CONFIG_QED_SRIOV
        {PCI_VDEVICE(QLOGIC, PCI_DEVICE_ID_57980S_IOV), QEDE_PRIVATE_VF},
+#endif
+       {PCI_VDEVICE(QLOGIC, PCI_DEVICE_ID_AH), QEDE_PRIVATE_PF},
+#ifdef CONFIG_QED_SRIOV
+       {PCI_VDEVICE(QLOGIC, PCI_DEVICE_ID_AH_IOV), QEDE_PRIVATE_VF},
 #endif
        { 0 }
 };
@@ -314,122 +323,135 @@ static int qede_close(struct net_device *ndev);
 
 void qede_fill_by_demand_stats(struct qede_dev *edev)
 {
+       struct qede_stats_common *p_common = &edev->stats.common;
        struct qed_eth_stats stats;
 
        edev->ops->get_vport_stats(edev->cdev, &stats);
-       edev->stats.no_buff_discards = stats.no_buff_discards;
-       edev->stats.packet_too_big_discard = stats.packet_too_big_discard;
-       edev->stats.ttl0_discard = stats.ttl0_discard;
-       edev->stats.rx_ucast_bytes = stats.rx_ucast_bytes;
-       edev->stats.rx_mcast_bytes = stats.rx_mcast_bytes;
-       edev->stats.rx_bcast_bytes = stats.rx_bcast_bytes;
-       edev->stats.rx_ucast_pkts = stats.rx_ucast_pkts;
-       edev->stats.rx_mcast_pkts = stats.rx_mcast_pkts;
-       edev->stats.rx_bcast_pkts = stats.rx_bcast_pkts;
-       edev->stats.mftag_filter_discards = stats.mftag_filter_discards;
-       edev->stats.mac_filter_discards = stats.mac_filter_discards;
-
-       edev->stats.tx_ucast_bytes = stats.tx_ucast_bytes;
-       edev->stats.tx_mcast_bytes = stats.tx_mcast_bytes;
-       edev->stats.tx_bcast_bytes = stats.tx_bcast_bytes;
-       edev->stats.tx_ucast_pkts = stats.tx_ucast_pkts;
-       edev->stats.tx_mcast_pkts = stats.tx_mcast_pkts;
-       edev->stats.tx_bcast_pkts = stats.tx_bcast_pkts;
-       edev->stats.tx_err_drop_pkts = stats.tx_err_drop_pkts;
-       edev->stats.coalesced_pkts = stats.tpa_coalesced_pkts;
-       edev->stats.coalesced_events = stats.tpa_coalesced_events;
-       edev->stats.coalesced_aborts_num = stats.tpa_aborts_num;
-       edev->stats.non_coalesced_pkts = stats.tpa_not_coalesced_pkts;
-       edev->stats.coalesced_bytes = stats.tpa_coalesced_bytes;
-
-       edev->stats.rx_64_byte_packets = stats.rx_64_byte_packets;
-       edev->stats.rx_65_to_127_byte_packets = stats.rx_65_to_127_byte_packets;
-       edev->stats.rx_128_to_255_byte_packets =
-                               stats.rx_128_to_255_byte_packets;
-       edev->stats.rx_256_to_511_byte_packets =
-                               stats.rx_256_to_511_byte_packets;
-       edev->stats.rx_512_to_1023_byte_packets =
-                               stats.rx_512_to_1023_byte_packets;
-       edev->stats.rx_1024_to_1518_byte_packets =
-                               stats.rx_1024_to_1518_byte_packets;
-       edev->stats.rx_1519_to_1522_byte_packets =
-                               stats.rx_1519_to_1522_byte_packets;
-       edev->stats.rx_1519_to_2047_byte_packets =
-                               stats.rx_1519_to_2047_byte_packets;
-       edev->stats.rx_2048_to_4095_byte_packets =
-                               stats.rx_2048_to_4095_byte_packets;
-       edev->stats.rx_4096_to_9216_byte_packets =
-                               stats.rx_4096_to_9216_byte_packets;
-       edev->stats.rx_9217_to_16383_byte_packets =
-                               stats.rx_9217_to_16383_byte_packets;
-       edev->stats.rx_crc_errors = stats.rx_crc_errors;
-       edev->stats.rx_mac_crtl_frames = stats.rx_mac_crtl_frames;
-       edev->stats.rx_pause_frames = stats.rx_pause_frames;
-       edev->stats.rx_pfc_frames = stats.rx_pfc_frames;
-       edev->stats.rx_align_errors = stats.rx_align_errors;
-       edev->stats.rx_carrier_errors = stats.rx_carrier_errors;
-       edev->stats.rx_oversize_packets = stats.rx_oversize_packets;
-       edev->stats.rx_jabbers = stats.rx_jabbers;
-       edev->stats.rx_undersize_packets = stats.rx_undersize_packets;
-       edev->stats.rx_fragments = stats.rx_fragments;
-       edev->stats.tx_64_byte_packets = stats.tx_64_byte_packets;
-       edev->stats.tx_65_to_127_byte_packets = stats.tx_65_to_127_byte_packets;
-       edev->stats.tx_128_to_255_byte_packets =
-                               stats.tx_128_to_255_byte_packets;
-       edev->stats.tx_256_to_511_byte_packets =
-                               stats.tx_256_to_511_byte_packets;
-       edev->stats.tx_512_to_1023_byte_packets =
-                               stats.tx_512_to_1023_byte_packets;
-       edev->stats.tx_1024_to_1518_byte_packets =
-                               stats.tx_1024_to_1518_byte_packets;
-       edev->stats.tx_1519_to_2047_byte_packets =
-                               stats.tx_1519_to_2047_byte_packets;
-       edev->stats.tx_2048_to_4095_byte_packets =
-                               stats.tx_2048_to_4095_byte_packets;
-       edev->stats.tx_4096_to_9216_byte_packets =
-                               stats.tx_4096_to_9216_byte_packets;
-       edev->stats.tx_9217_to_16383_byte_packets =
-                               stats.tx_9217_to_16383_byte_packets;
-       edev->stats.tx_pause_frames = stats.tx_pause_frames;
-       edev->stats.tx_pfc_frames = stats.tx_pfc_frames;
-       edev->stats.tx_lpi_entry_count = stats.tx_lpi_entry_count;
-       edev->stats.tx_total_collisions = stats.tx_total_collisions;
-       edev->stats.brb_truncates = stats.brb_truncates;
-       edev->stats.brb_discards = stats.brb_discards;
-       edev->stats.tx_mac_ctrl_frames = stats.tx_mac_ctrl_frames;
+
+       p_common->no_buff_discards = stats.common.no_buff_discards;
+       p_common->packet_too_big_discard = stats.common.packet_too_big_discard;
+       p_common->ttl0_discard = stats.common.ttl0_discard;
+       p_common->rx_ucast_bytes = stats.common.rx_ucast_bytes;
+       p_common->rx_mcast_bytes = stats.common.rx_mcast_bytes;
+       p_common->rx_bcast_bytes = stats.common.rx_bcast_bytes;
+       p_common->rx_ucast_pkts = stats.common.rx_ucast_pkts;
+       p_common->rx_mcast_pkts = stats.common.rx_mcast_pkts;
+       p_common->rx_bcast_pkts = stats.common.rx_bcast_pkts;
+       p_common->mftag_filter_discards = stats.common.mftag_filter_discards;
+       p_common->mac_filter_discards = stats.common.mac_filter_discards;
+
+       p_common->tx_ucast_bytes = stats.common.tx_ucast_bytes;
+       p_common->tx_mcast_bytes = stats.common.tx_mcast_bytes;
+       p_common->tx_bcast_bytes = stats.common.tx_bcast_bytes;
+       p_common->tx_ucast_pkts = stats.common.tx_ucast_pkts;
+       p_common->tx_mcast_pkts = stats.common.tx_mcast_pkts;
+       p_common->tx_bcast_pkts = stats.common.tx_bcast_pkts;
+       p_common->tx_err_drop_pkts = stats.common.tx_err_drop_pkts;
+       p_common->coalesced_pkts = stats.common.tpa_coalesced_pkts;
+       p_common->coalesced_events = stats.common.tpa_coalesced_events;
+       p_common->coalesced_aborts_num = stats.common.tpa_aborts_num;
+       p_common->non_coalesced_pkts = stats.common.tpa_not_coalesced_pkts;
+       p_common->coalesced_bytes = stats.common.tpa_coalesced_bytes;
+
+       p_common->rx_64_byte_packets = stats.common.rx_64_byte_packets;
+       p_common->rx_65_to_127_byte_packets =
+           stats.common.rx_65_to_127_byte_packets;
+       p_common->rx_128_to_255_byte_packets =
+           stats.common.rx_128_to_255_byte_packets;
+       p_common->rx_256_to_511_byte_packets =
+           stats.common.rx_256_to_511_byte_packets;
+       p_common->rx_512_to_1023_byte_packets =
+           stats.common.rx_512_to_1023_byte_packets;
+       p_common->rx_1024_to_1518_byte_packets =
+           stats.common.rx_1024_to_1518_byte_packets;
+       p_common->rx_crc_errors = stats.common.rx_crc_errors;
+       p_common->rx_mac_crtl_frames = stats.common.rx_mac_crtl_frames;
+       p_common->rx_pause_frames = stats.common.rx_pause_frames;
+       p_common->rx_pfc_frames = stats.common.rx_pfc_frames;
+       p_common->rx_align_errors = stats.common.rx_align_errors;
+       p_common->rx_carrier_errors = stats.common.rx_carrier_errors;
+       p_common->rx_oversize_packets = stats.common.rx_oversize_packets;
+       p_common->rx_jabbers = stats.common.rx_jabbers;
+       p_common->rx_undersize_packets = stats.common.rx_undersize_packets;
+       p_common->rx_fragments = stats.common.rx_fragments;
+       p_common->tx_64_byte_packets = stats.common.tx_64_byte_packets;
+       p_common->tx_65_to_127_byte_packets =
+           stats.common.tx_65_to_127_byte_packets;
+       p_common->tx_128_to_255_byte_packets =
+           stats.common.tx_128_to_255_byte_packets;
+       p_common->tx_256_to_511_byte_packets =
+           stats.common.tx_256_to_511_byte_packets;
+       p_common->tx_512_to_1023_byte_packets =
+           stats.common.tx_512_to_1023_byte_packets;
+       p_common->tx_1024_to_1518_byte_packets =
+           stats.common.tx_1024_to_1518_byte_packets;
+       p_common->tx_pause_frames = stats.common.tx_pause_frames;
+       p_common->tx_pfc_frames = stats.common.tx_pfc_frames;
+       p_common->brb_truncates = stats.common.brb_truncates;
+       p_common->brb_discards = stats.common.brb_discards;
+       p_common->tx_mac_ctrl_frames = stats.common.tx_mac_ctrl_frames;
+
+       if (QEDE_IS_BB(edev)) {
+               struct qede_stats_bb *p_bb = &edev->stats.bb;
+
+               p_bb->rx_1519_to_1522_byte_packets =
+                   stats.bb.rx_1519_to_1522_byte_packets;
+               p_bb->rx_1519_to_2047_byte_packets =
+                   stats.bb.rx_1519_to_2047_byte_packets;
+               p_bb->rx_2048_to_4095_byte_packets =
+                   stats.bb.rx_2048_to_4095_byte_packets;
+               p_bb->rx_4096_to_9216_byte_packets =
+                   stats.bb.rx_4096_to_9216_byte_packets;
+               p_bb->rx_9217_to_16383_byte_packets =
+                   stats.bb.rx_9217_to_16383_byte_packets;
+               p_bb->tx_1519_to_2047_byte_packets =
+                   stats.bb.tx_1519_to_2047_byte_packets;
+               p_bb->tx_2048_to_4095_byte_packets =
+                   stats.bb.tx_2048_to_4095_byte_packets;
+               p_bb->tx_4096_to_9216_byte_packets =
+                   stats.bb.tx_4096_to_9216_byte_packets;
+               p_bb->tx_9217_to_16383_byte_packets =
+                   stats.bb.tx_9217_to_16383_byte_packets;
+               p_bb->tx_lpi_entry_count = stats.bb.tx_lpi_entry_count;
+               p_bb->tx_total_collisions = stats.bb.tx_total_collisions;
+       } else {
+               struct qede_stats_ah *p_ah = &edev->stats.ah;
+
+               p_ah->rx_1519_to_max_byte_packets =
+                   stats.ah.rx_1519_to_max_byte_packets;
+               p_ah->tx_1519_to_max_byte_packets =
+                   stats.ah.tx_1519_to_max_byte_packets;
+       }
 }
 
 static void qede_get_stats64(struct net_device *dev,
                             struct rtnl_link_stats64 *stats)
 {
        struct qede_dev *edev = netdev_priv(dev);
+       struct qede_stats_common *p_common;
 
        qede_fill_by_demand_stats(edev);
+       p_common = &edev->stats.common;
 
-       stats->rx_packets = edev->stats.rx_ucast_pkts +
-                           edev->stats.rx_mcast_pkts +
-                           edev->stats.rx_bcast_pkts;
-       stats->tx_packets = edev->stats.tx_ucast_pkts +
-                           edev->stats.tx_mcast_pkts +
-                           edev->stats.tx_bcast_pkts;
-
-       stats->rx_bytes = edev->stats.rx_ucast_bytes +
-                         edev->stats.rx_mcast_bytes +
-                         edev->stats.rx_bcast_bytes;
+       stats->rx_packets = p_common->rx_ucast_pkts + p_common->rx_mcast_pkts +
+                           p_common->rx_bcast_pkts;
+       stats->tx_packets = p_common->tx_ucast_pkts + p_common->tx_mcast_pkts +
+                           p_common->tx_bcast_pkts;
 
-       stats->tx_bytes = edev->stats.tx_ucast_bytes +
-                         edev->stats.tx_mcast_bytes +
-                         edev->stats.tx_bcast_bytes;
+       stats->rx_bytes = p_common->rx_ucast_bytes + p_common->rx_mcast_bytes +
+                         p_common->rx_bcast_bytes;
+       stats->tx_bytes = p_common->tx_ucast_bytes + p_common->tx_mcast_bytes +
+                         p_common->tx_bcast_bytes;
 
-       stats->tx_errors = edev->stats.tx_err_drop_pkts;
-       stats->multicast = edev->stats.rx_mcast_pkts +
-                          edev->stats.rx_bcast_pkts;
+       stats->tx_errors = p_common->tx_err_drop_pkts;
+       stats->multicast = p_common->rx_mcast_pkts + p_common->rx_bcast_pkts;
 
-       stats->rx_fifo_errors = edev->stats.no_buff_discards;
+       stats->rx_fifo_errors = p_common->no_buff_discards;
 
-       stats->collisions = edev->stats.tx_total_collisions;
-       stats->rx_crc_errors = edev->stats.rx_crc_errors;
-       stats->rx_frame_errors = edev->stats.rx_align_errors;
+       if (QEDE_IS_BB(edev))
+               stats->collisions = edev->stats.bb.tx_total_collisions;
+       stats->rx_crc_errors = p_common->rx_crc_errors;
+       stats->rx_frame_errors = p_common->rx_align_errors;
 }
 
 #ifdef CONFIG_QED_SRIOV
index f62c215be779853ad76cf71f02b09dc4f7d62a1a..7116be485e6129090b2a60f6a8397a37be3b1897 100644 (file)
@@ -26,6 +26,7 @@
 
 /* SGMII digital lane registers */
 #define EMAC_SGMII_LN_DRVR_CTRL0               0x000C
+#define EMAC_SGMII_LN_DRVR_CTRL1               0x0010
 #define EMAC_SGMII_LN_DRVR_TAP_EN              0x0018
 #define EMAC_SGMII_LN_TX_MARGINING             0x001C
 #define EMAC_SGMII_LN_TX_PRE                   0x0020
@@ -48,6 +49,7 @@
 #define EMAC_SGMII_LN_RX_EN_SIGNAL             0x02AC
 #define EMAC_SGMII_LN_RX_MISC_CNTRL0           0x02B8
 #define EMAC_SGMII_LN_DRVR_LOGIC_CLKDIV                0x02C8
+#define EMAC_SGMII_LN_RX_RESECODE_OFFSET       0x02CC
 
 /* SGMII digital lane register values */
 #define UCDR_STEP_BY_TWO_MODE0                 BIT(7)
@@ -73,6 +75,8 @@
 #define CML_GEAR_MODE(x)                       (((x) & 7) << 3)
 #define CML2CMOS_IBOOST_MODE(x)                        ((x) & 7)
 
+#define RESCODE_OFFSET(x)                      ((x) & 0x1f)
+
 #define MIXER_LOADB_MODE(x)                    (((x) & 0xf) << 2)
 #define MIXER_DATARATE_MODE(x)                 ((x) & 3)
 
@@ -159,6 +163,8 @@ static const struct emac_reg_write sgmii_laned[] = {
        {EMAC_SGMII_LN_PARALLEL_RATE, PARALLEL_RATE_MODE0(1)},
        {EMAC_SGMII_LN_TX_BAND_MODE, BAND_MODE0(1)},
        {EMAC_SGMII_LN_RX_BAND, BAND_MODE0(2)},
+       {EMAC_SGMII_LN_DRVR_CTRL1, RESCODE_OFFSET(7)},
+       {EMAC_SGMII_LN_RX_RESECODE_OFFSET, RESCODE_OFFSET(9)},
        {EMAC_SGMII_LN_LANE_MODE, LANE_MODE(26)},
        {EMAC_SGMII_LN_RX_RCVR_PATH1_MODE0, CDR_PD_SEL_MODE0(2) |
                EN_DLL_MODE0 | EN_IQ_DCC_MODE0 | EN_IQCAL_MODE0},
index 040b28977ee74c8cbd9a3f83c73a14597677ddbd..18c184ee1f3c0ee9e7ec66d2fafba4da22b9d676 100644 (file)
@@ -13,6 +13,7 @@
 /* Qualcomm Technologies, Inc. EMAC SGMII Controller driver.
  */
 
+#include <linux/interrupt.h>
 #include <linux/iopoll.h>
 #include <linux/acpi.h>
 #include <linux/of_device.h>
index 672f6b696069ad8b47989ecb1dd03d3a81fde565..72233ab9474b1263271c772691c5c35cc245a63e 100644 (file)
@@ -1406,27 +1406,29 @@ static int cp_get_sset_count (struct net_device *dev, int sset)
        }
 }
 
-static int cp_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int cp_get_link_ksettings(struct net_device *dev,
+                                struct ethtool_link_ksettings *cmd)
 {
        struct cp_private *cp = netdev_priv(dev);
        int rc;
        unsigned long flags;
 
        spin_lock_irqsave(&cp->lock, flags);
-       rc = mii_ethtool_gset(&cp->mii_if, cmd);
+       rc = mii_ethtool_get_link_ksettings(&cp->mii_if, cmd);
        spin_unlock_irqrestore(&cp->lock, flags);
 
        return rc;
 }
 
-static int cp_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int cp_set_link_ksettings(struct net_device *dev,
+                                const struct ethtool_link_ksettings *cmd)
 {
        struct cp_private *cp = netdev_priv(dev);
        int rc;
        unsigned long flags;
 
        spin_lock_irqsave(&cp->lock, flags);
-       rc = mii_ethtool_sset(&cp->mii_if, cmd);
+       rc = mii_ethtool_set_link_ksettings(&cp->mii_if, cmd);
        spin_unlock_irqrestore(&cp->lock, flags);
 
        return rc;
@@ -1578,8 +1580,6 @@ static const struct ethtool_ops cp_ethtool_ops = {
        .get_drvinfo            = cp_get_drvinfo,
        .get_regs_len           = cp_get_regs_len,
        .get_sset_count         = cp_get_sset_count,
-       .get_settings           = cp_get_settings,
-       .set_settings           = cp_set_settings,
        .nway_reset             = cp_nway_reset,
        .get_link               = ethtool_op_get_link,
        .get_msglevel           = cp_get_msglevel,
@@ -1593,6 +1593,8 @@ static const struct ethtool_ops cp_ethtool_ops = {
        .get_eeprom             = cp_get_eeprom,
        .set_eeprom             = cp_set_eeprom,
        .get_ringparam          = cp_get_ringparam,
+       .get_link_ksettings     = cp_get_link_ksettings,
+       .set_link_ksettings     = cp_set_link_ksettings,
 };
 
 static int cp_ioctl (struct net_device *dev, struct ifreq *rq, int cmd)
index 89631753e79962d91456d93b71929af768917da1..ca22f2898664617656f7fa6e4e98df1e7aa3bc26 100644 (file)
@@ -2384,21 +2384,23 @@ static void rtl8139_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *
        strlcpy(info->bus_info, pci_name(tp->pci_dev), sizeof(info->bus_info));
 }
 
-static int rtl8139_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int rtl8139_get_link_ksettings(struct net_device *dev,
+                                     struct ethtool_link_ksettings *cmd)
 {
        struct rtl8139_private *tp = netdev_priv(dev);
        spin_lock_irq(&tp->lock);
-       mii_ethtool_gset(&tp->mii, cmd);
+       mii_ethtool_get_link_ksettings(&tp->mii, cmd);
        spin_unlock_irq(&tp->lock);
        return 0;
 }
 
-static int rtl8139_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int rtl8139_set_link_ksettings(struct net_device *dev,
+                                     const struct ethtool_link_ksettings *cmd)
 {
        struct rtl8139_private *tp = netdev_priv(dev);
        int rc;
        spin_lock_irq(&tp->lock);
-       rc = mii_ethtool_sset(&tp->mii, cmd);
+       rc = mii_ethtool_set_link_ksettings(&tp->mii, cmd);
        spin_unlock_irq(&tp->lock);
        return rc;
 }
@@ -2480,8 +2482,6 @@ static void rtl8139_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 
 static const struct ethtool_ops rtl8139_ethtool_ops = {
        .get_drvinfo            = rtl8139_get_drvinfo,
-       .get_settings           = rtl8139_get_settings,
-       .set_settings           = rtl8139_set_settings,
        .get_regs_len           = rtl8139_get_regs_len,
        .get_regs               = rtl8139_get_regs,
        .nway_reset             = rtl8139_nway_reset,
@@ -2493,6 +2493,8 @@ static const struct ethtool_ops rtl8139_ethtool_ops = {
        .get_strings            = rtl8139_get_strings,
        .get_sset_count         = rtl8139_get_sset_count,
        .get_ethtool_stats      = rtl8139_get_ethtool_stats,
+       .get_link_ksettings     = rtl8139_get_link_ksettings,
+       .set_link_ksettings     = rtl8139_set_link_ksettings,
 };
 
 static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
index 81f18a8335276495a59fa93219c4607c2b8a47aa..0a8f2817ea60f2172eb28177473a4879f85bd18a 100644 (file)
@@ -817,7 +817,8 @@ struct rtl8169_private {
        } csi_ops;
 
        int (*set_speed)(struct net_device *, u8 aneg, u16 sp, u8 dpx, u32 adv);
-       int (*get_settings)(struct net_device *, struct ethtool_cmd *);
+       int (*get_link_ksettings)(struct net_device *,
+                                 struct ethtool_link_ksettings *);
        void (*phy_reset_enable)(struct rtl8169_private *tp);
        void (*hw_start)(struct net_device *);
        unsigned int (*phy_reset_pending)(struct rtl8169_private *tp);
@@ -2115,41 +2116,49 @@ static void rtl8169_rx_vlan_tag(struct RxDesc *desc, struct sk_buff *skb)
                __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), swab16(opts2 & 0xffff));
 }
 
-static int rtl8169_gset_tbi(struct net_device *dev, struct ethtool_cmd *cmd)
+static int rtl8169_get_link_ksettings_tbi(struct net_device *dev,
+                                         struct ethtool_link_ksettings *cmd)
 {
        struct rtl8169_private *tp = netdev_priv(dev);
        void __iomem *ioaddr = tp->mmio_addr;
        u32 status;
+       u32 supported, advertising;
 
-       cmd->supported =
+       supported =
                SUPPORTED_1000baseT_Full | SUPPORTED_Autoneg | SUPPORTED_FIBRE;
-       cmd->port = PORT_FIBRE;
-       cmd->transceiver = XCVR_INTERNAL;
+       cmd->base.port = PORT_FIBRE;
 
        status = RTL_R32(TBICSR);
-       cmd->advertising = (status & TBINwEnable) ?  ADVERTISED_Autoneg : 0;
-       cmd->autoneg = !!(status & TBINwEnable);
+       advertising = (status & TBINwEnable) ?  ADVERTISED_Autoneg : 0;
+       cmd->base.autoneg = !!(status & TBINwEnable);
 
-       ethtool_cmd_speed_set(cmd, SPEED_1000);
-       cmd->duplex = DUPLEX_FULL; /* Always set */
+       cmd->base.speed = SPEED_1000;
+       cmd->base.duplex = DUPLEX_FULL; /* Always set */
+
+       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+                                               supported);
+       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+                                               advertising);
 
        return 0;
 }
 
-static int rtl8169_gset_xmii(struct net_device *dev, struct ethtool_cmd *cmd)
+static int rtl8169_get_link_ksettings_xmii(struct net_device *dev,
+                                          struct ethtool_link_ksettings *cmd)
 {
        struct rtl8169_private *tp = netdev_priv(dev);
 
-       return mii_ethtool_gset(&tp->mii, cmd);
+       return mii_ethtool_get_link_ksettings(&tp->mii, cmd);
 }
 
-static int rtl8169_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int rtl8169_get_link_ksettings(struct net_device *dev,
+                                     struct ethtool_link_ksettings *cmd)
 {
        struct rtl8169_private *tp = netdev_priv(dev);
        int rc;
 
        rtl_lock_work(tp);
-       rc = tp->get_settings(dev, cmd);
+       rc = tp->get_link_ksettings(dev, cmd);
        rtl_unlock_work(tp);
 
        return rc;
@@ -2356,7 +2365,6 @@ static const struct ethtool_ops rtl8169_ethtool_ops = {
        .get_drvinfo            = rtl8169_get_drvinfo,
        .get_regs_len           = rtl8169_get_regs_len,
        .get_link               = ethtool_op_get_link,
-       .get_settings           = rtl8169_get_settings,
        .set_settings           = rtl8169_set_settings,
        .get_msglevel           = rtl8169_get_msglevel,
        .set_msglevel           = rtl8169_set_msglevel,
@@ -2368,6 +2376,7 @@ static const struct ethtool_ops rtl8169_ethtool_ops = {
        .get_ethtool_stats      = rtl8169_get_ethtool_stats,
        .get_ts_info            = ethtool_op_get_ts_info,
        .nway_reset             = rtl8169_nway_reset,
+       .get_link_ksettings     = rtl8169_get_link_ksettings,
 };
 
 static void rtl8169_get_mac_version(struct rtl8169_private *tp,
@@ -8351,14 +8360,14 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
        if (rtl_tbi_enabled(tp)) {
                tp->set_speed = rtl8169_set_speed_tbi;
-               tp->get_settings = rtl8169_gset_tbi;
+               tp->get_link_ksettings = rtl8169_get_link_ksettings_tbi;
                tp->phy_reset_enable = rtl8169_tbi_reset_enable;
                tp->phy_reset_pending = rtl8169_tbi_reset_pending;
                tp->link_ok = rtl8169_tbi_link_ok;
                tp->do_ioctl = rtl_tbi_ioctl;
        } else {
                tp->set_speed = rtl8169_set_speed_xmii;
-               tp->get_settings = rtl8169_gset_xmii;
+               tp->get_link_ksettings = rtl8169_get_link_ksettings_xmii;
                tp->phy_reset_enable = rtl8169_xmii_reset_enable;
                tp->phy_reset_pending = rtl8169_xmii_reset_pending;
                tp->link_ok = rtl8169_xmii_link_ok;
@@ -8444,9 +8453,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
        tp->opts1_mask = (tp->mac_version != RTL_GIGA_MAC_VER_01) ?
                ~(RxBOVF | RxFOVF) : ~0;
 
-       init_timer(&tp->timer);
-       tp->timer.data = (unsigned long) dev;
-       tp->timer.function = rtl8169_phy_timer;
+       setup_timer(&tp->timer, rtl8169_phy_timer, (unsigned long)dev);
 
        tp->rtl_fw = RTL_FIRMWARE_UNKNOWN;
 
index 0f63a44a955deb4de7b8da6c69b5dd6125d19dad..bab13613b138cc15c734d9e9fff5f465ef480a44 100644 (file)
@@ -33,6 +33,7 @@
 #include <net/rtnetlink.h>
 #include <net/netevent.h>
 #include <net/arp.h>
+#include <net/fib_rules.h>
 #include <linux/io-64-nonatomic-lo-hi.h>
 #include <generated/utsrelease.h>
 
@@ -1115,7 +1116,7 @@ rocker_cmd_get_port_settings_ethtool_proc(const struct rocker_port *rocker_port,
                                          const struct rocker_desc_info *desc_info,
                                          void *priv)
 {
-       struct ethtool_cmd *ecmd = priv;
+       struct ethtool_link_ksettings *ecmd = priv;
        const struct rocker_tlv *attrs[ROCKER_TLV_CMD_MAX + 1];
        const struct rocker_tlv *info_attrs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1];
        u32 speed;
@@ -1137,13 +1138,14 @@ rocker_cmd_get_port_settings_ethtool_proc(const struct rocker_port *rocker_port,
        duplex = rocker_tlv_get_u8(info_attrs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX]);
        autoneg = rocker_tlv_get_u8(info_attrs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]);
 
-       ecmd->transceiver = XCVR_INTERNAL;
-       ecmd->supported = SUPPORTED_TP;
-       ecmd->phy_address = 0xff;
-       ecmd->port = PORT_TP;
-       ethtool_cmd_speed_set(ecmd, speed);
-       ecmd->duplex = duplex ? DUPLEX_FULL : DUPLEX_HALF;
-       ecmd->autoneg = autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE;
+       ethtool_link_ksettings_zero_link_mode(ecmd, supported);
+       ethtool_link_ksettings_add_link_mode(ecmd, supported, TP);
+
+       ecmd->base.phy_address = 0xff;
+       ecmd->base.port = PORT_TP;
+       ecmd->base.speed = speed;
+       ecmd->base.duplex = duplex ? DUPLEX_FULL : DUPLEX_HALF;
+       ecmd->base.autoneg = autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE;
 
        return 0;
 }
@@ -1250,7 +1252,7 @@ rocker_cmd_set_port_settings_ethtool_prep(const struct rocker_port *rocker_port,
                                          struct rocker_desc_info *desc_info,
                                          void *priv)
 {
-       struct ethtool_cmd *ecmd = priv;
+       struct ethtool_link_ksettings *ecmd = priv;
        struct rocker_tlv *cmd_info;
 
        if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_TYPE,
@@ -1263,13 +1265,13 @@ rocker_cmd_set_port_settings_ethtool_prep(const struct rocker_port *rocker_port,
                               rocker_port->pport))
                return -EMSGSIZE;
        if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_CMD_PORT_SETTINGS_SPEED,
-                              ethtool_cmd_speed(ecmd)))
+                              ecmd->base.speed))
                return -EMSGSIZE;
        if (rocker_tlv_put_u8(desc_info, ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX,
-                             ecmd->duplex))
+                             ecmd->base.duplex))
                return -EMSGSIZE;
        if (rocker_tlv_put_u8(desc_info, ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG,
-                             ecmd->autoneg))
+                             ecmd->base.autoneg))
                return -EMSGSIZE;
        rocker_tlv_nest_end(desc_info, cmd_info);
        return 0;
@@ -1347,8 +1349,9 @@ rocker_cmd_set_port_learning_prep(const struct rocker_port *rocker_port,
        return 0;
 }
 
-static int rocker_cmd_get_port_settings_ethtool(struct rocker_port *rocker_port,
-                                               struct ethtool_cmd *ecmd)
+static int
+rocker_cmd_get_port_settings_ethtool(struct rocker_port *rocker_port,
+                                    struct ethtool_link_ksettings *ecmd)
 {
        return rocker_cmd_exec(rocker_port, false,
                               rocker_cmd_get_port_settings_prep, NULL,
@@ -1373,12 +1376,17 @@ static int rocker_cmd_get_port_settings_mode(struct rocker_port *rocker_port,
                               rocker_cmd_get_port_settings_mode_proc, p_mode);
 }
 
-static int rocker_cmd_set_port_settings_ethtool(struct rocker_port *rocker_port,
-                                               struct ethtool_cmd *ecmd)
+static int
+rocker_cmd_set_port_settings_ethtool(struct rocker_port *rocker_port,
+                                    const struct ethtool_link_ksettings *ecmd)
 {
+       struct ethtool_link_ksettings copy_ecmd;
+
+       memcpy(&copy_ecmd, ecmd, sizeof(copy_ecmd));
+
        return rocker_cmd_exec(rocker_port, false,
                               rocker_cmd_set_port_settings_ethtool_prep,
-                              ecmd, NULL, NULL);
+                              &copy_ecmd, NULL, NULL);
 }
 
 static int rocker_cmd_set_port_settings_macaddr(struct rocker_port *rocker_port,
@@ -2168,7 +2176,10 @@ static const struct switchdev_ops rocker_port_switchdev_ops = {
 
 struct rocker_fib_event_work {
        struct work_struct work;
-       struct fib_entry_notifier_info fen_info;
+       union {
+               struct fib_entry_notifier_info fen_info;
+               struct fib_rule_notifier_info fr_info;
+       };
        struct rocker *rocker;
        unsigned long event;
 };
@@ -2178,6 +2189,7 @@ static void rocker_router_fib_event_work(struct work_struct *work)
        struct rocker_fib_event_work *fib_work =
                container_of(work, struct rocker_fib_event_work, work);
        struct rocker *rocker = fib_work->rocker;
+       struct fib_rule *rule;
        int err;
 
        /* Protect internal structures from changes */
@@ -2195,7 +2207,10 @@ static void rocker_router_fib_event_work(struct work_struct *work)
                break;
        case FIB_EVENT_RULE_ADD: /* fall through */
        case FIB_EVENT_RULE_DEL:
-               rocker_world_fib4_abort(rocker);
+               rule = fib_work->fr_info.rule;
+               if (!fib4_rule_default(rule))
+                       rocker_world_fib4_abort(rocker);
+               fib_rule_put(rule);
                break;
        }
        rtnl_unlock();
@@ -2226,6 +2241,11 @@ static int rocker_router_fib_event(struct notifier_block *nb,
                 */
                fib_info_hold(fib_work->fen_info.fi);
                break;
+       case FIB_EVENT_RULE_ADD: /* fall through */
+       case FIB_EVENT_RULE_DEL:
+               memcpy(&fib_work->fr_info, ptr, sizeof(fib_work->fr_info));
+               fib_rule_get(fib_work->fr_info.rule);
+               break;
        }
 
        queue_work(rocker->rocker_owq, &fib_work->work);
@@ -2237,16 +2257,18 @@ static int rocker_router_fib_event(struct notifier_block *nb,
  * ethtool interface
  ********************/
 
-static int rocker_port_get_settings(struct net_device *dev,
-                                   struct ethtool_cmd *ecmd)
+static int
+rocker_port_get_link_ksettings(struct net_device *dev,
+                              struct ethtool_link_ksettings *ecmd)
 {
        struct rocker_port *rocker_port = netdev_priv(dev);
 
        return rocker_cmd_get_port_settings_ethtool(rocker_port, ecmd);
 }
 
-static int rocker_port_set_settings(struct net_device *dev,
-                                   struct ethtool_cmd *ecmd)
+static int
+rocker_port_set_link_ksettings(struct net_device *dev,
+                              const struct ethtool_link_ksettings *ecmd)
 {
        struct rocker_port *rocker_port = netdev_priv(dev);
 
@@ -2388,13 +2410,13 @@ static int rocker_port_get_sset_count(struct net_device *netdev, int sset)
 }
 
 static const struct ethtool_ops rocker_port_ethtool_ops = {
-       .get_settings           = rocker_port_get_settings,
-       .set_settings           = rocker_port_set_settings,
        .get_drvinfo            = rocker_port_get_drvinfo,
        .get_link               = ethtool_op_get_link,
        .get_strings            = rocker_port_get_strings,
        .get_ethtool_stats      = rocker_port_get_stats,
        .get_sset_count         = rocker_port_get_sset_count,
+       .get_link_ksettings     = rocker_port_get_link_ksettings,
+       .set_link_ksettings     = rocker_port_set_link_ksettings,
 };
 
 /*****************
index 334bcc6df6b2ba90a43da4baf7b44cc5ebfa1bac..50d28261b6b9ea22f42c26be0e9f0e0bed194109 100644 (file)
@@ -2404,7 +2404,7 @@ static void efx_udp_tunnel_del(struct net_device *dev, struct udp_tunnel_info *t
        tnl.type = (u16)efx_tunnel_type;
        tnl.port = ti->port;
 
-       if (efx->type->udp_tnl_add_port)
+       if (efx->type->udp_tnl_del_port)
                (void)efx->type->udp_tnl_del_port(efx, tnl);
 }
 
index 104fb15a73f2074c145878f0466d8ff1fc650167..f6daf09b86272397d35bc72d59c5269b4644db1c 100644 (file)
@@ -437,11 +437,13 @@ int ef4_setup_tc(struct net_device *net_dev, u32 handle, __be16 proto,
        if (ntc->type != TC_SETUP_MQPRIO)
                return -EINVAL;
 
-       num_tc = ntc->tc;
+       num_tc = ntc->mqprio->num_tc;
 
        if (ef4_nic_rev(efx) < EF4_REV_FALCON_B0 || num_tc > EF4_MAX_TX_TC)
                return -EINVAL;
 
+       ntc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+
        if (num_tc == net_dev->num_tc)
                return 0;
 
index ff88d60aa6d5650d04f46938eaf6abc63c4ff568..3bdf87f310877a31fee219afa3de3dea91e40521 100644 (file)
@@ -665,11 +665,13 @@ int efx_setup_tc(struct net_device *net_dev, u32 handle, __be16 proto,
        if (ntc->type != TC_SETUP_MQPRIO)
                return -EINVAL;
 
-       num_tc = ntc->tc;
+       num_tc = ntc->mqprio->num_tc;
 
        if (num_tc > EFX_MAX_TX_TC)
                return -EINVAL;
 
+       ntc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+
        if (num_tc == net_dev->num_tc)
                return 0;
 
index 57e6cef81ebec9d566a860424f4f83d7bb602663..52ead5524de76b8de6581c2fea8bc69d6ccebcb7 100644 (file)
@@ -1558,25 +1558,27 @@ static void ioc3_get_drvinfo (struct net_device *dev,
        strlcpy(info->bus_info, pci_name(ip->pdev), sizeof(info->bus_info));
 }
 
-static int ioc3_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int ioc3_get_link_ksettings(struct net_device *dev,
+                                  struct ethtool_link_ksettings *cmd)
 {
        struct ioc3_private *ip = netdev_priv(dev);
        int rc;
 
        spin_lock_irq(&ip->ioc3_lock);
-       rc = mii_ethtool_gset(&ip->mii, cmd);
+       rc = mii_ethtool_get_link_ksettings(&ip->mii, cmd);
        spin_unlock_irq(&ip->ioc3_lock);
 
        return rc;
 }
 
-static int ioc3_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int ioc3_set_link_ksettings(struct net_device *dev,
+                                  const struct ethtool_link_ksettings *cmd)
 {
        struct ioc3_private *ip = netdev_priv(dev);
        int rc;
 
        spin_lock_irq(&ip->ioc3_lock);
-       rc = mii_ethtool_sset(&ip->mii, cmd);
+       rc = mii_ethtool_set_link_ksettings(&ip->mii, cmd);
        spin_unlock_irq(&ip->ioc3_lock);
 
        return rc;
@@ -1608,10 +1610,10 @@ static u32 ioc3_get_link(struct net_device *dev)
 
 static const struct ethtool_ops ioc3_ethtool_ops = {
        .get_drvinfo            = ioc3_get_drvinfo,
-       .get_settings           = ioc3_get_settings,
-       .set_settings           = ioc3_set_settings,
        .nway_reset             = ioc3_nway_reset,
        .get_link               = ioc3_get_link,
+       .get_link_ksettings     = ioc3_get_link_ksettings,
+       .set_link_ksettings     = ioc3_set_link_ksettings,
 };
 
 static int ioc3_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
index 6c2e2b311c16d396f3969a90183648d7e77a7858..751c81848f3557c2c019f3d3f59e14a9fb247034 100644 (file)
@@ -1122,14 +1122,16 @@ static void sc92031_poll_controller(struct net_device *dev)
 }
 #endif
 
-static int sc92031_ethtool_get_settings(struct net_device *dev,
-               struct ethtool_cmd *cmd)
+static int
+sc92031_ethtool_get_link_ksettings(struct net_device *dev,
+                                  struct ethtool_link_ksettings *cmd)
 {
        struct sc92031_priv *priv = netdev_priv(dev);
        void __iomem *port_base = priv->port_base;
        u8 phy_address;
        u32 phy_ctrl;
        u16 output_status;
+       u32 supported, advertising;
 
        spin_lock_bh(&priv->lock);
 
@@ -1142,68 +1144,77 @@ static int sc92031_ethtool_get_settings(struct net_device *dev,
 
        spin_unlock_bh(&priv->lock);
 
-       cmd->supported = SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full
+       supported = SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full
                        | SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full
                        | SUPPORTED_Autoneg | SUPPORTED_TP | SUPPORTED_MII;
 
-       cmd->advertising = ADVERTISED_TP | ADVERTISED_MII;
+       advertising = ADVERTISED_TP | ADVERTISED_MII;
 
        if ((phy_ctrl & (PhyCtrlDux | PhyCtrlSpd100 | PhyCtrlSpd10))
                        == (PhyCtrlDux | PhyCtrlSpd100 | PhyCtrlSpd10))
-               cmd->advertising |= ADVERTISED_Autoneg;
+               advertising |= ADVERTISED_Autoneg;
 
        if ((phy_ctrl & PhyCtrlSpd10) == PhyCtrlSpd10)
-               cmd->advertising |= ADVERTISED_10baseT_Half;
+               advertising |= ADVERTISED_10baseT_Half;
 
        if ((phy_ctrl & (PhyCtrlSpd10 | PhyCtrlDux))
                        == (PhyCtrlSpd10 | PhyCtrlDux))
-               cmd->advertising |= ADVERTISED_10baseT_Full;
+               advertising |= ADVERTISED_10baseT_Full;
 
        if ((phy_ctrl & PhyCtrlSpd100) == PhyCtrlSpd100)
-               cmd->advertising |= ADVERTISED_100baseT_Half;
+               advertising |= ADVERTISED_100baseT_Half;
 
        if ((phy_ctrl & (PhyCtrlSpd100 | PhyCtrlDux))
                        == (PhyCtrlSpd100 | PhyCtrlDux))
-               cmd->advertising |= ADVERTISED_100baseT_Full;
+               advertising |= ADVERTISED_100baseT_Full;
 
        if (phy_ctrl & PhyCtrlAne)
-               cmd->advertising |= ADVERTISED_Autoneg;
+               advertising |= ADVERTISED_Autoneg;
 
-       ethtool_cmd_speed_set(cmd,
-                             (output_status & 0x2) ? SPEED_100 : SPEED_10);
-       cmd->duplex = (output_status & 0x4) ? DUPLEX_FULL : DUPLEX_HALF;
-       cmd->port = PORT_MII;
-       cmd->phy_address = phy_address;
-       cmd->transceiver = XCVR_INTERNAL;
-       cmd->autoneg = (phy_ctrl & PhyCtrlAne) ? AUTONEG_ENABLE : AUTONEG_DISABLE;
+       cmd->base.speed = (output_status & 0x2) ? SPEED_100 : SPEED_10;
+       cmd->base.duplex = (output_status & 0x4) ? DUPLEX_FULL : DUPLEX_HALF;
+       cmd->base.port = PORT_MII;
+       cmd->base.phy_address = phy_address;
+       cmd->base.autoneg = (phy_ctrl & PhyCtrlAne) ?
+               AUTONEG_ENABLE : AUTONEG_DISABLE;
+
+       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+                                               supported);
+       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+                                               advertising);
 
        return 0;
 }
 
-static int sc92031_ethtool_set_settings(struct net_device *dev,
-               struct ethtool_cmd *cmd)
+static int
+sc92031_ethtool_set_link_ksettings(struct net_device *dev,
+                                  const struct ethtool_link_ksettings *cmd)
 {
        struct sc92031_priv *priv = netdev_priv(dev);
        void __iomem *port_base = priv->port_base;
-       u32 speed = ethtool_cmd_speed(cmd);
+       u32 speed = cmd->base.speed;
        u32 phy_ctrl;
        u32 old_phy_ctrl;
+       u32 advertising;
+
+       ethtool_convert_link_mode_to_legacy_u32(&advertising,
+                                               cmd->link_modes.advertising);
 
        if (!(speed == SPEED_10 || speed == SPEED_100))
                return -EINVAL;
-       if (!(cmd->duplex == DUPLEX_HALF || cmd->duplex == DUPLEX_FULL))
-               return -EINVAL;
-       if (!(cmd->port == PORT_MII))
+       if (!(cmd->base.duplex == DUPLEX_HALF ||
+             cmd->base.duplex == DUPLEX_FULL))
                return -EINVAL;
-       if (!(cmd->phy_address == 0x1f))
+       if (!(cmd->base.port == PORT_MII))
                return -EINVAL;
-       if (!(cmd->transceiver == XCVR_INTERNAL))
+       if (!(cmd->base.phy_address == 0x1f))
                return -EINVAL;
-       if (!(cmd->autoneg == AUTONEG_DISABLE || cmd->autoneg == AUTONEG_ENABLE))
+       if (!(cmd->base.autoneg == AUTONEG_DISABLE ||
+             cmd->base.autoneg == AUTONEG_ENABLE))
                return -EINVAL;
 
-       if (cmd->autoneg == AUTONEG_ENABLE) {
-               if (!(cmd->advertising & (ADVERTISED_Autoneg
+       if (cmd->base.autoneg == AUTONEG_ENABLE) {
+               if (!(advertising & (ADVERTISED_Autoneg
                                | ADVERTISED_100baseT_Full
                                | ADVERTISED_100baseT_Half
                                | ADVERTISED_10baseT_Full
@@ -1213,15 +1224,15 @@ static int sc92031_ethtool_set_settings(struct net_device *dev,
                phy_ctrl = PhyCtrlAne;
 
                // FIXME: I'm not sure what the original code was trying to do
-               if (cmd->advertising & ADVERTISED_Autoneg)
+               if (advertising & ADVERTISED_Autoneg)
                        phy_ctrl |= PhyCtrlDux | PhyCtrlSpd100 | PhyCtrlSpd10;
-               if (cmd->advertising & ADVERTISED_100baseT_Full)
+               if (advertising & ADVERTISED_100baseT_Full)
                        phy_ctrl |= PhyCtrlDux | PhyCtrlSpd100;
-               if (cmd->advertising & ADVERTISED_100baseT_Half)
+               if (advertising & ADVERTISED_100baseT_Half)
                        phy_ctrl |= PhyCtrlSpd100;
-               if (cmd->advertising & ADVERTISED_10baseT_Full)
+               if (advertising & ADVERTISED_10baseT_Full)
                        phy_ctrl |= PhyCtrlSpd10 | PhyCtrlDux;
-               if (cmd->advertising & ADVERTISED_10baseT_Half)
+               if (advertising & ADVERTISED_10baseT_Half)
                        phy_ctrl |= PhyCtrlSpd10;
        } else {
                // FIXME: Whole branch guessed
@@ -1232,7 +1243,7 @@ static int sc92031_ethtool_set_settings(struct net_device *dev,
                else /* cmd->speed == SPEED_100 */
                        phy_ctrl |= PhyCtrlSpd100;
 
-               if (cmd->duplex == DUPLEX_FULL)
+               if (cmd->base.duplex == DUPLEX_FULL)
                        phy_ctrl |= PhyCtrlDux;
        }
 
@@ -1368,8 +1379,6 @@ static void sc92031_ethtool_get_ethtool_stats(struct net_device *dev,
 }
 
 static const struct ethtool_ops sc92031_ethtool_ops = {
-       .get_settings           = sc92031_ethtool_get_settings,
-       .set_settings           = sc92031_ethtool_set_settings,
        .get_wol                = sc92031_ethtool_get_wol,
        .set_wol                = sc92031_ethtool_set_wol,
        .nway_reset             = sc92031_ethtool_nway_reset,
@@ -1377,6 +1386,8 @@ static const struct ethtool_ops sc92031_ethtool_ops = {
        .get_strings            = sc92031_ethtool_get_strings,
        .get_sset_count         = sc92031_ethtool_get_sset_count,
        .get_ethtool_stats      = sc92031_ethtool_get_ethtool_stats,
+       .get_link_ksettings     = sc92031_ethtool_get_link_ksettings,
+       .set_link_ksettings     = sc92031_ethtool_set_link_ksettings,
 };
 
 
index 210e35d079dd88672fe5c419955a04913d49ad62..02da106c6e04e8a59ff001575fd29cda851435cf 100644 (file)
@@ -1734,18 +1734,20 @@ static void sis190_set_speed_auto(struct net_device *dev)
                   BMCR_ANENABLE | BMCR_ANRESTART | BMCR_RESET);
 }
 
-static int sis190_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int sis190_get_link_ksettings(struct net_device *dev,
+                                    struct ethtool_link_ksettings *cmd)
 {
        struct sis190_private *tp = netdev_priv(dev);
 
-       return mii_ethtool_gset(&tp->mii_if, cmd);
+       return mii_ethtool_get_link_ksettings(&tp->mii_if, cmd);
 }
 
-static int sis190_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int sis190_set_link_ksettings(struct net_device *dev,
+                                    const struct ethtool_link_ksettings *cmd)
 {
        struct sis190_private *tp = netdev_priv(dev);
 
-       return mii_ethtool_sset(&tp->mii_if, cmd);
+       return mii_ethtool_set_link_ksettings(&tp->mii_if, cmd);
 }
 
 static void sis190_get_drvinfo(struct net_device *dev,
@@ -1797,8 +1799,6 @@ static void sis190_set_msglevel(struct net_device *dev, u32 value)
 }
 
 static const struct ethtool_ops sis190_ethtool_ops = {
-       .get_settings   = sis190_get_settings,
-       .set_settings   = sis190_set_settings,
        .get_drvinfo    = sis190_get_drvinfo,
        .get_regs_len   = sis190_get_regs_len,
        .get_regs       = sis190_get_regs,
@@ -1806,6 +1806,8 @@ static const struct ethtool_ops sis190_ethtool_ops = {
        .get_msglevel   = sis190_get_msglevel,
        .set_msglevel   = sis190_set_msglevel,
        .nway_reset     = sis190_nway_reset,
+       .get_link_ksettings = sis190_get_link_ksettings,
+       .set_link_ksettings = sis190_set_link_ksettings,
 };
 
 static int sis190_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
index 1b6f6171d0788e74b939622d284880f5c083742e..40bd88362e3d94650f7eb9273efe2e5850b61312 100644 (file)
@@ -2035,23 +2035,23 @@ static u32 sis900_get_link(struct net_device *net_dev)
        return mii_link_ok(&sis_priv->mii_info);
 }
 
-static int sis900_get_settings(struct net_device *net_dev,
-                               struct ethtool_cmd *cmd)
+static int sis900_get_link_ksettings(struct net_device *net_dev,
+                                    struct ethtool_link_ksettings *cmd)
 {
        struct sis900_private *sis_priv = netdev_priv(net_dev);
        spin_lock_irq(&sis_priv->lock);
-       mii_ethtool_gset(&sis_priv->mii_info, cmd);
+       mii_ethtool_get_link_ksettings(&sis_priv->mii_info, cmd);
        spin_unlock_irq(&sis_priv->lock);
        return 0;
 }
 
-static int sis900_set_settings(struct net_device *net_dev,
-                               struct ethtool_cmd *cmd)
+static int sis900_set_link_ksettings(struct net_device *net_dev,
+                                    const struct ethtool_link_ksettings *cmd)
 {
        struct sis900_private *sis_priv = netdev_priv(net_dev);
        int rt;
        spin_lock_irq(&sis_priv->lock);
-       rt = mii_ethtool_sset(&sis_priv->mii_info, cmd);
+       rt = mii_ethtool_set_link_ksettings(&sis_priv->mii_info, cmd);
        spin_unlock_irq(&sis_priv->lock);
        return rt;
 }
@@ -2129,11 +2129,11 @@ static const struct ethtool_ops sis900_ethtool_ops = {
        .get_msglevel   = sis900_get_msglevel,
        .set_msglevel   = sis900_set_msglevel,
        .get_link       = sis900_get_link,
-       .get_settings   = sis900_get_settings,
-       .set_settings   = sis900_set_settings,
        .nway_reset     = sis900_nway_reset,
        .get_wol        = sis900_get_wol,
-       .set_wol        = sis900_set_wol
+       .set_wol        = sis900_set_wol,
+       .get_link_ksettings = sis900_get_link_ksettings,
+       .set_link_ksettings = sis900_set_link_ksettings,
 };
 
 /**
index 5f2737189c724eea48ffdd8d8a48f520a209f3e7..db6dcb06193d3dbce7243287800f54c28b198d7d 100644 (file)
@@ -1387,25 +1387,27 @@ static void netdev_get_drvinfo (struct net_device *dev, struct ethtool_drvinfo *
        strlcpy(info->bus_info, pci_name(np->pci_dev), sizeof(info->bus_info));
 }
 
-static int netdev_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int netdev_get_link_ksettings(struct net_device *dev,
+                                    struct ethtool_link_ksettings *cmd)
 {
        struct epic_private *np = netdev_priv(dev);
        int rc;
 
        spin_lock_irq(&np->lock);
-       rc = mii_ethtool_gset(&np->mii, cmd);
+       rc = mii_ethtool_get_link_ksettings(&np->mii, cmd);
        spin_unlock_irq(&np->lock);
 
        return rc;
 }
 
-static int netdev_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int netdev_set_link_ksettings(struct net_device *dev,
+                                    const struct ethtool_link_ksettings *cmd)
 {
        struct epic_private *np = netdev_priv(dev);
        int rc;
 
        spin_lock_irq(&np->lock);
-       rc = mii_ethtool_sset(&np->mii, cmd);
+       rc = mii_ethtool_set_link_ksettings(&np->mii, cmd);
        spin_unlock_irq(&np->lock);
 
        return rc;
@@ -1460,14 +1462,14 @@ static void ethtool_complete(struct net_device *dev)
 
 static const struct ethtool_ops netdev_ethtool_ops = {
        .get_drvinfo            = netdev_get_drvinfo,
-       .get_settings           = netdev_get_settings,
-       .set_settings           = netdev_set_settings,
        .nway_reset             = netdev_nway_reset,
        .get_link               = netdev_get_link,
        .get_msglevel           = netdev_get_msglevel,
        .set_msglevel           = netdev_set_msglevel,
        .begin                  = ethtool_begin,
-       .complete               = ethtool_complete
+       .complete               = ethtool_complete,
+       .get_link_ksettings     = netdev_get_link_ksettings,
+       .set_link_ksettings     = netdev_set_link_ksettings,
 };
 
 static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
index 4f19c6166182e780b77222c566390c5acdf2ad3e..36307d34f64181d03e744352d56681b33b111b1f 100644 (file)
@@ -1446,40 +1446,40 @@ static int smc911x_close(struct net_device *dev)
  * Ethtool support
  */
 static int
-smc911x_ethtool_getsettings(struct net_device *dev, struct ethtool_cmd *cmd)
+smc911x_ethtool_get_link_ksettings(struct net_device *dev,
+                                  struct ethtool_link_ksettings *cmd)
 {
        struct smc911x_local *lp = netdev_priv(dev);
        int ret, status;
        unsigned long flags;
+       u32 supported;
 
        DBG(SMC_DEBUG_FUNC, dev, "--> %s\n", __func__);
-       cmd->maxtxpkt = 1;
-       cmd->maxrxpkt = 1;
 
        if (lp->phy_type != 0) {
                spin_lock_irqsave(&lp->lock, flags);
-               ret = mii_ethtool_gset(&lp->mii, cmd);
+               ret = mii_ethtool_get_link_ksettings(&lp->mii, cmd);
                spin_unlock_irqrestore(&lp->lock, flags);
        } else {
-               cmd->supported = SUPPORTED_10baseT_Half |
+               supported = SUPPORTED_10baseT_Half |
                                SUPPORTED_10baseT_Full |
                                SUPPORTED_TP | SUPPORTED_AUI;
 
                if (lp->ctl_rspeed == 10)
-                       ethtool_cmd_speed_set(cmd, SPEED_10);
+                       cmd->base.speed = SPEED_10;
                else if (lp->ctl_rspeed == 100)
-                       ethtool_cmd_speed_set(cmd, SPEED_100);
-
-               cmd->autoneg = AUTONEG_DISABLE;
-               if (lp->mii.phy_id==1)
-                       cmd->transceiver = XCVR_INTERNAL;
-               else
-                       cmd->transceiver = XCVR_EXTERNAL;
-               cmd->port = 0;
+                       cmd->base.speed = SPEED_100;
+
+               cmd->base.autoneg = AUTONEG_DISABLE;
+               cmd->base.port = 0;
                SMC_GET_PHY_SPECIAL(lp, lp->mii.phy_id, status);
-               cmd->duplex =
+               cmd->base.duplex =
                        (status & (PHY_SPECIAL_SPD_10FULL_ | PHY_SPECIAL_SPD_100FULL_)) ?
                                DUPLEX_FULL : DUPLEX_HALF;
+
+               ethtool_convert_legacy_u32_to_link_mode(
+                       cmd->link_modes.supported, supported);
+
                ret = 0;
        }
 
@@ -1487,7 +1487,8 @@ smc911x_ethtool_getsettings(struct net_device *dev, struct ethtool_cmd *cmd)
 }
 
 static int
-smc911x_ethtool_setsettings(struct net_device *dev, struct ethtool_cmd *cmd)
+smc911x_ethtool_set_link_ksettings(struct net_device *dev,
+                                  const struct ethtool_link_ksettings *cmd)
 {
        struct smc911x_local *lp = netdev_priv(dev);
        int ret;
@@ -1495,16 +1496,18 @@ smc911x_ethtool_setsettings(struct net_device *dev, struct ethtool_cmd *cmd)
 
        if (lp->phy_type != 0) {
                spin_lock_irqsave(&lp->lock, flags);
-               ret = mii_ethtool_sset(&lp->mii, cmd);
+               ret = mii_ethtool_set_link_ksettings(&lp->mii, cmd);
                spin_unlock_irqrestore(&lp->lock, flags);
        } else {
-               if (cmd->autoneg != AUTONEG_DISABLE ||
-                       cmd->speed != SPEED_10 ||
-                       (cmd->duplex != DUPLEX_HALF && cmd->duplex != DUPLEX_FULL) ||
-                       (cmd->port != PORT_TP && cmd->port != PORT_AUI))
+               if (cmd->base.autoneg != AUTONEG_DISABLE ||
+                   cmd->base.speed != SPEED_10 ||
+                   (cmd->base.duplex != DUPLEX_HALF &&
+                    cmd->base.duplex != DUPLEX_FULL) ||
+                   (cmd->base.port != PORT_TP &&
+                    cmd->base.port != PORT_AUI))
                        return -EINVAL;
 
-               lp->ctl_rfduplx = cmd->duplex == DUPLEX_FULL;
+               lp->ctl_rfduplx = cmd->base.duplex == DUPLEX_FULL;
 
                ret = 0;
        }
@@ -1686,8 +1689,6 @@ static int smc911x_ethtool_geteeprom_len(struct net_device *dev)
 }
 
 static const struct ethtool_ops smc911x_ethtool_ops = {
-       .get_settings    = smc911x_ethtool_getsettings,
-       .set_settings    = smc911x_ethtool_setsettings,
        .get_drvinfo     = smc911x_ethtool_getdrvinfo,
        .get_msglevel    = smc911x_ethtool_getmsglevel,
        .set_msglevel    = smc911x_ethtool_setmsglevel,
@@ -1698,6 +1699,8 @@ static const struct ethtool_ops smc911x_ethtool_ops = {
        .get_eeprom_len = smc911x_ethtool_geteeprom_len,
        .get_eeprom = smc911x_ethtool_geteeprom,
        .set_eeprom = smc911x_ethtool_seteeprom,
+       .get_link_ksettings      = smc911x_ethtool_get_link_ksettings,
+       .set_link_ksettings      = smc911x_ethtool_set_link_ksettings,
 };
 
 /*
index 97280daba27f7f8349eaaabc71e4514c33d9fcf3..976aa876789a5731df5f50136ff7284dd2ab1258 100644 (file)
@@ -1843,56 +1843,60 @@ static int smc_link_ok(struct net_device *dev)
     }
 }
 
-static int smc_netdev_get_ecmd(struct net_device *dev, struct ethtool_cmd *ecmd)
+static int smc_netdev_get_ecmd(struct net_device *dev,
+                              struct ethtool_link_ksettings *ecmd)
 {
-    u16 tmp;
-    unsigned int ioaddr = dev->base_addr;
+       u16 tmp;
+       unsigned int ioaddr = dev->base_addr;
+       u32 supported;
 
-    ecmd->supported = (SUPPORTED_TP | SUPPORTED_AUI |
-       SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full);
-               
-    SMC_SELECT_BANK(1);
-    tmp = inw(ioaddr + CONFIG);
-    ecmd->port = (tmp & CFG_AUI_SELECT) ? PORT_AUI : PORT_TP;
-    ecmd->transceiver = XCVR_INTERNAL;
-    ethtool_cmd_speed_set(ecmd, SPEED_10);
-    ecmd->phy_address = ioaddr + MGMT;
+       supported = (SUPPORTED_TP | SUPPORTED_AUI |
+                    SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full);
 
-    SMC_SELECT_BANK(0);
-    tmp = inw(ioaddr + TCR);
-    ecmd->duplex = (tmp & TCR_FDUPLX) ? DUPLEX_FULL : DUPLEX_HALF;
+       SMC_SELECT_BANK(1);
+       tmp = inw(ioaddr + CONFIG);
+       ecmd->base.port = (tmp & CFG_AUI_SELECT) ? PORT_AUI : PORT_TP;
+       ecmd->base.speed = SPEED_10;
+       ecmd->base.phy_address = ioaddr + MGMT;
 
-    return 0;
+       SMC_SELECT_BANK(0);
+       tmp = inw(ioaddr + TCR);
+       ecmd->base.duplex = (tmp & TCR_FDUPLX) ? DUPLEX_FULL : DUPLEX_HALF;
+
+       ethtool_convert_legacy_u32_to_link_mode(ecmd->link_modes.supported,
+                                               supported);
+
+       return 0;
 }
 
-static int smc_netdev_set_ecmd(struct net_device *dev, struct ethtool_cmd *ecmd)
+static int smc_netdev_set_ecmd(struct net_device *dev,
+                              const struct ethtool_link_ksettings *ecmd)
 {
-    u16 tmp;
-    unsigned int ioaddr = dev->base_addr;
+       u16 tmp;
+       unsigned int ioaddr = dev->base_addr;
 
-    if (ethtool_cmd_speed(ecmd) != SPEED_10)
-       return -EINVAL;
-    if (ecmd->duplex != DUPLEX_HALF && ecmd->duplex != DUPLEX_FULL)
-       return -EINVAL;
-    if (ecmd->port != PORT_TP && ecmd->port != PORT_AUI)
-       return -EINVAL;
-    if (ecmd->transceiver != XCVR_INTERNAL)
-       return -EINVAL;
+       if (ecmd->base.speed != SPEED_10)
+               return -EINVAL;
+       if (ecmd->base.duplex != DUPLEX_HALF &&
+           ecmd->base.duplex != DUPLEX_FULL)
+               return -EINVAL;
+       if (ecmd->base.port != PORT_TP && ecmd->base.port != PORT_AUI)
+               return -EINVAL;
 
-    if (ecmd->port == PORT_AUI)
-       smc_set_xcvr(dev, 1);
-    else
-       smc_set_xcvr(dev, 0);
+       if (ecmd->base.port == PORT_AUI)
+               smc_set_xcvr(dev, 1);
+       else
+               smc_set_xcvr(dev, 0);
 
-    SMC_SELECT_BANK(0);
-    tmp = inw(ioaddr + TCR);
-    if (ecmd->duplex == DUPLEX_FULL)
-       tmp |= TCR_FDUPLX;
-    else
-       tmp &= ~TCR_FDUPLX;
-    outw(tmp, ioaddr + TCR);
-       
-    return 0;
+       SMC_SELECT_BANK(0);
+       tmp = inw(ioaddr + TCR);
+       if (ecmd->base.duplex == DUPLEX_FULL)
+               tmp |= TCR_FDUPLX;
+       else
+               tmp &= ~TCR_FDUPLX;
+       outw(tmp, ioaddr + TCR);
+
+       return 0;
 }
 
 static int check_if_running(struct net_device *dev)
@@ -1908,7 +1912,8 @@ static void smc_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info
        strlcpy(info->version, DRV_VERSION, sizeof(info->version));
 }
 
-static int smc_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
+static int smc_get_link_ksettings(struct net_device *dev,
+                                 struct ethtool_link_ksettings *ecmd)
 {
        struct smc_private *smc = netdev_priv(dev);
        unsigned int ioaddr = dev->base_addr;
@@ -1919,7 +1924,7 @@ static int smc_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
        spin_lock_irqsave(&smc->lock, flags);
        SMC_SELECT_BANK(3);
        if (smc->cfg & CFG_MII_SELECT)
-               ret = mii_ethtool_gset(&smc->mii_if, ecmd);
+               ret = mii_ethtool_get_link_ksettings(&smc->mii_if, ecmd);
        else
                ret = smc_netdev_get_ecmd(dev, ecmd);
        SMC_SELECT_BANK(saved_bank);
@@ -1927,7 +1932,8 @@ static int smc_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
        return ret;
 }
 
-static int smc_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
+static int smc_set_link_ksettings(struct net_device *dev,
+                                 const struct ethtool_link_ksettings *ecmd)
 {
        struct smc_private *smc = netdev_priv(dev);
        unsigned int ioaddr = dev->base_addr;
@@ -1938,7 +1944,7 @@ static int smc_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
        spin_lock_irqsave(&smc->lock, flags);
        SMC_SELECT_BANK(3);
        if (smc->cfg & CFG_MII_SELECT)
-               ret = mii_ethtool_sset(&smc->mii_if, ecmd);
+               ret = mii_ethtool_set_link_ksettings(&smc->mii_if, ecmd);
        else
                ret = smc_netdev_set_ecmd(dev, ecmd);
        SMC_SELECT_BANK(saved_bank);
@@ -1982,10 +1988,10 @@ static int smc_nway_reset(struct net_device *dev)
 static const struct ethtool_ops ethtool_ops = {
        .begin = check_if_running,
        .get_drvinfo = smc_get_drvinfo,
-       .get_settings = smc_get_settings,
-       .set_settings = smc_set_settings,
        .get_link = smc_get_link,
        .nway_reset = smc_nway_reset,
+       .get_link_ksettings = smc_get_link_ksettings,
+       .set_link_ksettings = smc_set_link_ksettings,
 };
 
 static int smc_ioctl (struct net_device *dev, struct ifreq *rq, int cmd)
index 04d9245b7149ce663b0827a158901a10b58ea701..90d28bcad8804f9a60c5c84a994ead8fca3cee7c 100644 (file)
@@ -246,6 +246,15 @@ struct stmmac_extra_stats {
 #define STMMAC_TX_MAX_FRAMES   256
 #define STMMAC_TX_FRAMES       64
 
+/* Packets types */
+enum packets_types {
+       PACKET_AVCPQ = 0x1, /* AV Untagged Control packets */
+       PACKET_PTPQ = 0x2, /* PTP Packets */
+       PACKET_DCBCPQ = 0x3, /* DCB Control Packets */
+       PACKET_UPQ = 0x4, /* Untagged Packets */
+       PACKET_MCBCQ = 0x5, /* Multicast & Broadcast Packets */
+};
+
 /* Rx IPC status */
 enum rx_frame_status {
        good_frame = 0x0,
@@ -324,6 +333,9 @@ struct dma_features {
        unsigned int number_tx_queues;
        /* Alternate (enhanced) DESC mode */
        unsigned int enh_desc;
+       /* TX and RX FIFO sizes */
+       unsigned int tx_fifo_size;
+       unsigned int rx_fifo_size;
 };
 
 /* GMAC TX FIFO is 8K, Rx FIFO is 16K */
@@ -413,6 +425,14 @@ struct stmmac_dma_ops {
        int (*reset)(void __iomem *ioaddr);
        void (*init)(void __iomem *ioaddr, struct stmmac_dma_cfg *dma_cfg,
                     u32 dma_tx, u32 dma_rx, int atds);
+       void (*init_chan)(void __iomem *ioaddr,
+                         struct stmmac_dma_cfg *dma_cfg, u32 chan);
+       void (*init_rx_chan)(void __iomem *ioaddr,
+                            struct stmmac_dma_cfg *dma_cfg,
+                            u32 dma_rx_phy, u32 chan);
+       void (*init_tx_chan)(void __iomem *ioaddr,
+                            struct stmmac_dma_cfg *dma_cfg,
+                            u32 dma_tx_phy, u32 chan);
        /* Configure the AXI Bus Mode Register */
        void (*axi)(void __iomem *ioaddr, struct stmmac_axi *axi);
        /* Dump DMA registers */
@@ -421,25 +441,28 @@ struct stmmac_dma_ops {
         * An invalid value enables the store-and-forward mode */
        void (*dma_mode)(void __iomem *ioaddr, int txmode, int rxmode,
                         int rxfifosz);
+       void (*dma_rx_mode)(void __iomem *ioaddr, int mode, u32 channel,
+                           int fifosz);
+       void (*dma_tx_mode)(void __iomem *ioaddr, int mode, u32 channel);
        /* To track extra statistic (if supported) */
        void (*dma_diagnostic_fr) (void *data, struct stmmac_extra_stats *x,
                                   void __iomem *ioaddr);
        void (*enable_dma_transmission) (void __iomem *ioaddr);
-       void (*enable_dma_irq) (void __iomem *ioaddr);
-       void (*disable_dma_irq) (void __iomem *ioaddr);
-       void (*start_tx) (void __iomem *ioaddr);
-       void (*stop_tx) (void __iomem *ioaddr);
-       void (*start_rx) (void __iomem *ioaddr);
-       void (*stop_rx) (void __iomem *ioaddr);
+       void (*enable_dma_irq)(void __iomem *ioaddr, u32 chan);
+       void (*disable_dma_irq)(void __iomem *ioaddr, u32 chan);
+       void (*start_tx)(void __iomem *ioaddr, u32 chan);
+       void (*stop_tx)(void __iomem *ioaddr, u32 chan);
+       void (*start_rx)(void __iomem *ioaddr, u32 chan);
+       void (*stop_rx)(void __iomem *ioaddr, u32 chan);
        int (*dma_interrupt) (void __iomem *ioaddr,
-                             struct stmmac_extra_stats *x);
+                             struct stmmac_extra_stats *x, u32 chan);
        /* If supported then get the optional core features */
        void (*get_hw_feature)(void __iomem *ioaddr,
                               struct dma_features *dma_cap);
        /* Program the HW RX Watchdog */
-       void (*rx_watchdog) (void __iomem *ioaddr, u32 riwt);
-       void (*set_tx_ring_len)(void __iomem *ioaddr, u32 len);
-       void (*set_rx_ring_len)(void __iomem *ioaddr, u32 len);
+       void (*rx_watchdog)(void __iomem *ioaddr, u32 riwt, u32 number_chan);
+       void (*set_tx_ring_len)(void __iomem *ioaddr, u32 len, u32 chan);
+       void (*set_rx_ring_len)(void __iomem *ioaddr, u32 len, u32 chan);
        void (*set_rx_tail_ptr)(void __iomem *ioaddr, u32 tail_ptr, u32 chan);
        void (*set_tx_tail_ptr)(void __iomem *ioaddr, u32 tail_ptr, u32 chan);
        void (*enable_tso)(void __iomem *ioaddr, bool en, u32 chan);
@@ -451,20 +474,44 @@ struct mac_device_info;
 struct stmmac_ops {
        /* MAC core initialization */
        void (*core_init)(struct mac_device_info *hw, int mtu);
+       /* Enable the MAC RX/TX */
+       void (*set_mac)(void __iomem *ioaddr, bool enable);
        /* Enable and verify that the IPC module is supported */
        int (*rx_ipc)(struct mac_device_info *hw);
        /* Enable RX Queues */
-       void (*rx_queue_enable)(struct mac_device_info *hw, u32 queue);
+       void (*rx_queue_enable)(struct mac_device_info *hw, u8 mode, u32 queue);
+       /* RX Queues Priority */
+       void (*rx_queue_prio)(struct mac_device_info *hw, u32 prio, u32 queue);
+       /* TX Queues Priority */
+       void (*tx_queue_prio)(struct mac_device_info *hw, u32 prio, u32 queue);
+       /* RX Queues Routing */
+       void (*rx_queue_routing)(struct mac_device_info *hw, u8 packet,
+                                u32 queue);
+       /* Program RX Algorithms */
+       void (*prog_mtl_rx_algorithms)(struct mac_device_info *hw, u32 rx_alg);
+       /* Program TX Algorithms */
+       void (*prog_mtl_tx_algorithms)(struct mac_device_info *hw, u32 tx_alg);
+       /* Set MTL TX queues weight */
+       void (*set_mtl_tx_queue_weight)(struct mac_device_info *hw,
+                                       u32 weight, u32 queue);
+       /* RX MTL queue to RX dma mapping */
+       void (*map_mtl_to_dma)(struct mac_device_info *hw, u32 queue, u32 chan);
+       /* Configure AV Algorithm */
+       void (*config_cbs)(struct mac_device_info *hw, u32 send_slope,
+                          u32 idle_slope, u32 high_credit, u32 low_credit,
+                          u32 queue);
        /* Dump MAC registers */
        void (*dump_regs)(struct mac_device_info *hw, u32 *reg_space);
        /* Handle extra events on specific interrupts hw dependent */
        int (*host_irq_status)(struct mac_device_info *hw,
                               struct stmmac_extra_stats *x);
+       /* Handle MTL interrupts */
+       int (*host_mtl_irq_status)(struct mac_device_info *hw, u32 chan);
        /* Multicast filter setting */
        void (*set_filter)(struct mac_device_info *hw, struct net_device *dev);
        /* Flow control setting */
        void (*flow_ctrl)(struct mac_device_info *hw, unsigned int duplex,
-                         unsigned int fc, unsigned int pause_time);
+                         unsigned int fc, unsigned int pause_time, u32 tx_cnt);
        /* Set power management mode (e.g. magic frame) */
        void (*pmt)(struct mac_device_info *hw, unsigned long mode);
        /* Set/Get Unicast MAC addresses */
@@ -477,7 +524,8 @@ struct stmmac_ops {
        void (*reset_eee_mode)(struct mac_device_info *hw);
        void (*set_eee_timer)(struct mac_device_info *hw, int ls, int tw);
        void (*set_eee_pls)(struct mac_device_info *hw, int link);
-       void (*debug)(void __iomem *ioaddr, struct stmmac_extra_stats *x);
+       void (*debug)(void __iomem *ioaddr, struct stmmac_extra_stats *x,
+                     u32 rx_queues, u32 tx_queues);
        /* PCS calls */
        void (*pcs_ctrl_ane)(void __iomem *ioaddr, bool ane, bool srgmi_ral,
                             bool loopback);
@@ -547,6 +595,11 @@ struct mac_device_info {
        unsigned int ps;
 };
 
+struct stmmac_rx_routing {
+       u32 reg_mask;
+       u32 reg_shift;
+};
+
 struct mac_device_info *dwmac1000_setup(void __iomem *ioaddr, int mcbins,
                                        int perfect_uc_entries,
                                        int *synopsys_id);
index 1a3fa3d9f85549c9b5cc10064c6aaac799184adc..dd6a2f9791cc11a390d71bcb5a1b071cd1bca068 100644 (file)
 #include <linux/clk.h>
 #include <linux/clk-provider.h>
 #include <linux/device.h>
+#include <linux/gpio/consumer.h>
 #include <linux/ethtool.h>
 #include <linux/io.h>
+#include <linux/iopoll.h>
 #include <linux/ioport.h>
 #include <linux/module.h>
+#include <linux/of_device.h>
 #include <linux/of_net.h>
 #include <linux/mfd/syscon.h>
 #include <linux/platform_device.h>
+#include <linux/reset.h>
 #include <linux/stmmac.h>
 
 #include "stmmac_platform.h"
+#include "dwmac4.h"
+
+struct tegra_eqos {
+       struct device *dev;
+       void __iomem *regs;
+
+       struct reset_control *rst;
+       struct clk *clk_master;
+       struct clk *clk_slave;
+       struct clk *clk_tx;
+       struct clk *clk_rx;
+
+       struct gpio_desc *reset;
+};
 
 static int dwc_eth_dwmac_config_dt(struct platform_device *pdev,
                                   struct plat_stmmacenet_data *plat_dat)
@@ -106,13 +124,309 @@ static int dwc_eth_dwmac_config_dt(struct platform_device *pdev,
        return 0;
 }
 
+static void *dwc_qos_probe(struct platform_device *pdev,
+                          struct plat_stmmacenet_data *plat_dat,
+                          struct stmmac_resources *stmmac_res)
+{
+       int err;
+
+       plat_dat->stmmac_clk = devm_clk_get(&pdev->dev, "apb_pclk");
+       if (IS_ERR(plat_dat->stmmac_clk)) {
+               dev_err(&pdev->dev, "apb_pclk clock not found.\n");
+               return ERR_CAST(plat_dat->stmmac_clk);
+       }
+
+       err = clk_prepare_enable(plat_dat->stmmac_clk);
+       if (err < 0) {
+               dev_err(&pdev->dev, "failed to enable apb_pclk clock: %d\n",
+                       err);
+               return ERR_PTR(err);
+       }
+
+       plat_dat->pclk = devm_clk_get(&pdev->dev, "phy_ref_clk");
+       if (IS_ERR(plat_dat->pclk)) {
+               dev_err(&pdev->dev, "phy_ref_clk clock not found.\n");
+               err = PTR_ERR(plat_dat->pclk);
+               goto disable;
+       }
+
+       err = clk_prepare_enable(plat_dat->pclk);
+       if (err < 0) {
+               dev_err(&pdev->dev, "failed to enable phy_ref clock: %d\n",
+                       err);
+               goto disable;
+       }
+
+       return NULL;
+
+disable:
+       clk_disable_unprepare(plat_dat->stmmac_clk);
+       return ERR_PTR(err);
+}
+
+static int dwc_qos_remove(struct platform_device *pdev)
+{
+       struct net_device *ndev = platform_get_drvdata(pdev);
+       struct stmmac_priv *priv = netdev_priv(ndev);
+
+       clk_disable_unprepare(priv->plat->pclk);
+       clk_disable_unprepare(priv->plat->stmmac_clk);
+
+       return 0;
+}
+
+#define SDMEMCOMPPADCTRL 0x8800
+#define  SDMEMCOMPPADCTRL_PAD_E_INPUT_OR_E_PWRD BIT(31)
+
+#define AUTO_CAL_CONFIG 0x8804
+#define  AUTO_CAL_CONFIG_START BIT(31)
+#define  AUTO_CAL_CONFIG_ENABLE BIT(29)
+
+#define AUTO_CAL_STATUS 0x880c
+#define  AUTO_CAL_STATUS_ACTIVE BIT(31)
+
+static void tegra_eqos_fix_speed(void *priv, unsigned int speed)
+{
+       struct tegra_eqos *eqos = priv;
+       unsigned long rate = 125000000;
+       bool needs_calibration = false;
+       u32 value;
+       int err;
+
+       switch (speed) {
+       case SPEED_1000:
+               needs_calibration = true;
+               rate = 125000000;
+               break;
+
+       case SPEED_100:
+               needs_calibration = true;
+               rate = 25000000;
+               break;
+
+       case SPEED_10:
+               rate = 2500000;
+               break;
+
+       default:
+               dev_err(eqos->dev, "invalid speed %u\n", speed);
+               break;
+       }
+
+       if (needs_calibration) {
+               /* calibrate */
+               value = readl(eqos->regs + SDMEMCOMPPADCTRL);
+               value |= SDMEMCOMPPADCTRL_PAD_E_INPUT_OR_E_PWRD;
+               writel(value, eqos->regs + SDMEMCOMPPADCTRL);
+
+               udelay(1);
+
+               value = readl(eqos->regs + AUTO_CAL_CONFIG);
+               value |= AUTO_CAL_CONFIG_START | AUTO_CAL_CONFIG_ENABLE;
+               writel(value, eqos->regs + AUTO_CAL_CONFIG);
+
+               err = readl_poll_timeout_atomic(eqos->regs + AUTO_CAL_STATUS,
+                                               value,
+                                               value & AUTO_CAL_STATUS_ACTIVE,
+                                               1, 10);
+               if (err < 0) {
+                       dev_err(eqos->dev, "calibration did not start\n");
+                       goto failed;
+               }
+
+               err = readl_poll_timeout_atomic(eqos->regs + AUTO_CAL_STATUS,
+                                               value,
+                                               (value & AUTO_CAL_STATUS_ACTIVE) == 0,
+                                               20, 200);
+               if (err < 0) {
+                       dev_err(eqos->dev, "calibration didn't finish\n");
+                       goto failed;
+               }
+
+       failed:
+               value = readl(eqos->regs + SDMEMCOMPPADCTRL);
+               value &= ~SDMEMCOMPPADCTRL_PAD_E_INPUT_OR_E_PWRD;
+               writel(value, eqos->regs + SDMEMCOMPPADCTRL);
+       } else {
+               value = readl(eqos->regs + AUTO_CAL_CONFIG);
+               value &= ~AUTO_CAL_CONFIG_ENABLE;
+               writel(value, eqos->regs + AUTO_CAL_CONFIG);
+       }
+
+       err = clk_set_rate(eqos->clk_tx, rate);
+       if (err < 0)
+               dev_err(eqos->dev, "failed to set TX rate: %d\n", err);
+}
+
+static int tegra_eqos_init(struct platform_device *pdev, void *priv)
+{
+       struct tegra_eqos *eqos = priv;
+       unsigned long rate;
+       u32 value;
+
+       rate = clk_get_rate(eqos->clk_slave);
+
+       value = (rate / 1000000) - 1;
+       writel(value, eqos->regs + GMAC_1US_TIC_COUNTER);
+
+       return 0;
+}
+
+static void *tegra_eqos_probe(struct platform_device *pdev,
+                             struct plat_stmmacenet_data *data,
+                             struct stmmac_resources *res)
+{
+       struct tegra_eqos *eqos;
+       int err;
+
+       eqos = devm_kzalloc(&pdev->dev, sizeof(*eqos), GFP_KERNEL);
+       if (!eqos) {
+               err = -ENOMEM;
+               goto error;
+       }
+
+       eqos->dev = &pdev->dev;
+       eqos->regs = res->addr;
+
+       eqos->clk_master = devm_clk_get(&pdev->dev, "master_bus");
+       if (IS_ERR(eqos->clk_master)) {
+               err = PTR_ERR(eqos->clk_master);
+               goto error;
+       }
+
+       err = clk_prepare_enable(eqos->clk_master);
+       if (err < 0)
+               goto error;
+
+       eqos->clk_slave = devm_clk_get(&pdev->dev, "slave_bus");
+       if (IS_ERR(eqos->clk_slave)) {
+               err = PTR_ERR(eqos->clk_slave);
+               goto disable_master;
+       }
+
+       data->stmmac_clk = eqos->clk_slave;
+
+       err = clk_prepare_enable(eqos->clk_slave);
+       if (err < 0)
+               goto disable_master;
+
+       eqos->clk_rx = devm_clk_get(&pdev->dev, "rx");
+       if (IS_ERR(eqos->clk_rx)) {
+               err = PTR_ERR(eqos->clk_rx);
+               goto disable_slave;
+       }
+
+       err = clk_prepare_enable(eqos->clk_rx);
+       if (err < 0)
+               goto disable_slave;
+
+       eqos->clk_tx = devm_clk_get(&pdev->dev, "tx");
+       if (IS_ERR(eqos->clk_tx)) {
+               err = PTR_ERR(eqos->clk_tx);
+               goto disable_rx;
+       }
+
+       err = clk_prepare_enable(eqos->clk_tx);
+       if (err < 0)
+               goto disable_rx;
+
+       eqos->reset = devm_gpiod_get(&pdev->dev, "phy-reset", GPIOD_OUT_HIGH);
+       if (IS_ERR(eqos->reset)) {
+               err = PTR_ERR(eqos->reset);
+               goto disable_tx;
+       }
+
+       usleep_range(2000, 4000);
+       gpiod_set_value(eqos->reset, 0);
+
+       eqos->rst = devm_reset_control_get(&pdev->dev, "eqos");
+       if (IS_ERR(eqos->rst)) {
+               err = PTR_ERR(eqos->rst);
+               goto reset_phy;
+       }
+
+       err = reset_control_assert(eqos->rst);
+       if (err < 0)
+               goto reset_phy;
+
+       usleep_range(2000, 4000);
+
+       err = reset_control_deassert(eqos->rst);
+       if (err < 0)
+               goto reset_phy;
+
+       usleep_range(2000, 4000);
+
+       data->fix_mac_speed = tegra_eqos_fix_speed;
+       data->init = tegra_eqos_init;
+       data->bsp_priv = eqos;
+
+       err = tegra_eqos_init(pdev, eqos);
+       if (err < 0)
+               goto reset;
+
+out:
+       return eqos;
+
+reset:
+       reset_control_assert(eqos->rst);
+reset_phy:
+       gpiod_set_value(eqos->reset, 1);
+disable_tx:
+       clk_disable_unprepare(eqos->clk_tx);
+disable_rx:
+       clk_disable_unprepare(eqos->clk_rx);
+disable_slave:
+       clk_disable_unprepare(eqos->clk_slave);
+disable_master:
+       clk_disable_unprepare(eqos->clk_master);
+error:
+       eqos = ERR_PTR(err);
+       goto out;
+}
+
+static int tegra_eqos_remove(struct platform_device *pdev)
+{
+       struct tegra_eqos *eqos = get_stmmac_bsp_priv(&pdev->dev);
+
+       reset_control_assert(eqos->rst);
+       gpiod_set_value(eqos->reset, 1);
+       clk_disable_unprepare(eqos->clk_tx);
+       clk_disable_unprepare(eqos->clk_rx);
+       clk_disable_unprepare(eqos->clk_slave);
+       clk_disable_unprepare(eqos->clk_master);
+
+       return 0;
+}
+
+struct dwc_eth_dwmac_data {
+       void *(*probe)(struct platform_device *pdev,
+                      struct plat_stmmacenet_data *data,
+                      struct stmmac_resources *res);
+       int (*remove)(struct platform_device *pdev);
+};
+
+static const struct dwc_eth_dwmac_data dwc_qos_data = {
+       .probe = dwc_qos_probe,
+       .remove = dwc_qos_remove,
+};
+
+static const struct dwc_eth_dwmac_data tegra_eqos_data = {
+       .probe = tegra_eqos_probe,
+       .remove = tegra_eqos_remove,
+};
+
 static int dwc_eth_dwmac_probe(struct platform_device *pdev)
 {
+       const struct dwc_eth_dwmac_data *data;
        struct plat_stmmacenet_data *plat_dat;
        struct stmmac_resources stmmac_res;
        struct resource *res;
+       void *priv;
        int ret;
 
+       data = of_device_get_match_data(&pdev->dev);
+
        memset(&stmmac_res, 0, sizeof(struct stmmac_resources));
 
        /**
@@ -138,39 +452,26 @@ static int dwc_eth_dwmac_probe(struct platform_device *pdev)
        if (IS_ERR(plat_dat))
                return PTR_ERR(plat_dat);
 
-       plat_dat->stmmac_clk = devm_clk_get(&pdev->dev, "apb_pclk");
-       if (IS_ERR(plat_dat->stmmac_clk)) {
-               dev_err(&pdev->dev, "apb_pclk clock not found.\n");
-               ret = PTR_ERR(plat_dat->stmmac_clk);
-               plat_dat->stmmac_clk = NULL;
-               goto err_remove_config_dt;
-       }
-       clk_prepare_enable(plat_dat->stmmac_clk);
-
-       plat_dat->pclk = devm_clk_get(&pdev->dev, "phy_ref_clk");
-       if (IS_ERR(plat_dat->pclk)) {
-               dev_err(&pdev->dev, "phy_ref_clk clock not found.\n");
-               ret = PTR_ERR(plat_dat->pclk);
-               plat_dat->pclk = NULL;
-               goto err_out_clk_dis_phy;
+       priv = data->probe(pdev, plat_dat, &stmmac_res);
+       if (IS_ERR(priv)) {
+               ret = PTR_ERR(priv);
+               dev_err(&pdev->dev, "failed to probe subdriver: %d\n", ret);
+               goto remove_config;
        }
-       clk_prepare_enable(plat_dat->pclk);
 
        ret = dwc_eth_dwmac_config_dt(pdev, plat_dat);
        if (ret)
-               goto err_out_clk_dis_aper;
+               goto remove;
 
        ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
        if (ret)
-               goto err_out_clk_dis_aper;
+               goto remove;
 
-       return 0;
+       return ret;
 
-err_out_clk_dis_aper:
-       clk_disable_unprepare(plat_dat->pclk);
-err_out_clk_dis_phy:
-       clk_disable_unprepare(plat_dat->stmmac_clk);
-err_remove_config_dt:
+remove:
+       data->remove(pdev);
+remove_config:
        stmmac_remove_config_dt(pdev, plat_dat);
 
        return ret;
@@ -178,11 +479,29 @@ err_remove_config_dt:
 
 static int dwc_eth_dwmac_remove(struct platform_device *pdev)
 {
-       return stmmac_pltfr_remove(pdev);
+       struct net_device *ndev = platform_get_drvdata(pdev);
+       struct stmmac_priv *priv = netdev_priv(ndev);
+       const struct dwc_eth_dwmac_data *data;
+       int err;
+
+       data = of_device_get_match_data(&pdev->dev);
+
+       err = stmmac_dvr_remove(&pdev->dev);
+       if (err < 0)
+               dev_err(&pdev->dev, "failed to remove platform: %d\n", err);
+
+       err = data->remove(pdev);
+       if (err < 0)
+               dev_err(&pdev->dev, "failed to remove subdriver: %d\n", err);
+
+       stmmac_remove_config_dt(pdev, priv->plat);
+
+       return err;
 }
 
 static const struct of_device_id dwc_eth_dwmac_match[] = {
-       { .compatible = "snps,dwc-qos-ethernet-4.10", },
+       { .compatible = "snps,dwc-qos-ethernet-4.10", .data = &dwc_qos_data },
+       { .compatible = "nvidia,tegra186-eqos", .data = &tegra_eqos_data },
        { }
 };
 MODULE_DEVICE_TABLE(of, dwc_eth_dwmac_match);
index e5db6ac362354317bff2ecdf0d0344a26831752a..f0df5193f047ba534ace8e012df673f5c6cad83f 100644 (file)
@@ -74,6 +74,10 @@ struct rk_priv_data {
 #define GRF_BIT(nr)    (BIT(nr) | BIT(nr+16))
 #define GRF_CLR_BIT(nr)        (BIT(nr+16))
 
+#define DELAY_ENABLE(soc, tx, rx) \
+       (((tx) ? soc##_GMAC_TXCLK_DLY_ENABLE : soc##_GMAC_TXCLK_DLY_DISABLE) | \
+        ((rx) ? soc##_GMAC_RXCLK_DLY_ENABLE : soc##_GMAC_RXCLK_DLY_DISABLE))
+
 #define RK3228_GRF_MAC_CON0    0x0900
 #define RK3228_GRF_MAC_CON1    0x0904
 
@@ -115,8 +119,7 @@ static void rk3228_set_to_rgmii(struct rk_priv_data *bsp_priv,
        regmap_write(bsp_priv->grf, RK3228_GRF_MAC_CON1,
                     RK3228_GMAC_PHY_INTF_SEL_RGMII |
                     RK3228_GMAC_RMII_MODE_CLR |
-                    RK3228_GMAC_RXCLK_DLY_ENABLE |
-                    RK3228_GMAC_TXCLK_DLY_ENABLE);
+                    DELAY_ENABLE(RK3228, tx_delay, rx_delay));
 
        regmap_write(bsp_priv->grf, RK3228_GRF_MAC_CON0,
                     RK3228_GMAC_CLK_RX_DL_CFG(rx_delay) |
@@ -232,8 +235,7 @@ static void rk3288_set_to_rgmii(struct rk_priv_data *bsp_priv,
                     RK3288_GMAC_PHY_INTF_SEL_RGMII |
                     RK3288_GMAC_RMII_MODE_CLR);
        regmap_write(bsp_priv->grf, RK3288_GRF_SOC_CON3,
-                    RK3288_GMAC_RXCLK_DLY_ENABLE |
-                    RK3288_GMAC_TXCLK_DLY_ENABLE |
+                    DELAY_ENABLE(RK3288, tx_delay, rx_delay) |
                     RK3288_GMAC_CLK_RX_DL_CFG(rx_delay) |
                     RK3288_GMAC_CLK_TX_DL_CFG(tx_delay));
 }
@@ -460,8 +462,7 @@ static void rk3366_set_to_rgmii(struct rk_priv_data *bsp_priv,
                     RK3366_GMAC_PHY_INTF_SEL_RGMII |
                     RK3366_GMAC_RMII_MODE_CLR);
        regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON7,
-                    RK3366_GMAC_RXCLK_DLY_ENABLE |
-                    RK3366_GMAC_TXCLK_DLY_ENABLE |
+                    DELAY_ENABLE(RK3366, tx_delay, rx_delay) |
                     RK3366_GMAC_CLK_RX_DL_CFG(rx_delay) |
                     RK3366_GMAC_CLK_TX_DL_CFG(tx_delay));
 }
@@ -572,8 +573,7 @@ static void rk3368_set_to_rgmii(struct rk_priv_data *bsp_priv,
                     RK3368_GMAC_PHY_INTF_SEL_RGMII |
                     RK3368_GMAC_RMII_MODE_CLR);
        regmap_write(bsp_priv->grf, RK3368_GRF_SOC_CON16,
-                    RK3368_GMAC_RXCLK_DLY_ENABLE |
-                    RK3368_GMAC_TXCLK_DLY_ENABLE |
+                    DELAY_ENABLE(RK3368, tx_delay, rx_delay) |
                     RK3368_GMAC_CLK_RX_DL_CFG(rx_delay) |
                     RK3368_GMAC_CLK_TX_DL_CFG(tx_delay));
 }
@@ -684,8 +684,7 @@ static void rk3399_set_to_rgmii(struct rk_priv_data *bsp_priv,
                     RK3399_GMAC_PHY_INTF_SEL_RGMII |
                     RK3399_GMAC_RMII_MODE_CLR);
        regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON6,
-                    RK3399_GMAC_RXCLK_DLY_ENABLE |
-                    RK3399_GMAC_TXCLK_DLY_ENABLE |
+                    DELAY_ENABLE(RK3399, tx_delay, rx_delay) |
                     RK3399_GMAC_CLK_RX_DL_CFG(rx_delay) |
                     RK3399_GMAC_CLK_TX_DL_CFG(tx_delay));
 }
@@ -985,14 +984,29 @@ static int rk_gmac_powerup(struct rk_priv_data *bsp_priv)
                return ret;
 
        /*rmii or rgmii*/
-       if (bsp_priv->phy_iface == PHY_INTERFACE_MODE_RGMII) {
+       switch (bsp_priv->phy_iface) {
+       case PHY_INTERFACE_MODE_RGMII:
                dev_info(dev, "init for RGMII\n");
                bsp_priv->ops->set_to_rgmii(bsp_priv, bsp_priv->tx_delay,
                                            bsp_priv->rx_delay);
-       } else if (bsp_priv->phy_iface == PHY_INTERFACE_MODE_RMII) {
+               break;
+       case PHY_INTERFACE_MODE_RGMII_ID:
+               dev_info(dev, "init for RGMII_ID\n");
+               bsp_priv->ops->set_to_rgmii(bsp_priv, 0, 0);
+               break;
+       case PHY_INTERFACE_MODE_RGMII_RXID:
+               dev_info(dev, "init for RGMII_RXID\n");
+               bsp_priv->ops->set_to_rgmii(bsp_priv, bsp_priv->tx_delay, 0);
+               break;
+       case PHY_INTERFACE_MODE_RGMII_TXID:
+               dev_info(dev, "init for RGMII_TXID\n");
+               bsp_priv->ops->set_to_rgmii(bsp_priv, 0, bsp_priv->rx_delay);
+               break;
+       case PHY_INTERFACE_MODE_RMII:
                dev_info(dev, "init for RMII\n");
                bsp_priv->ops->set_to_rmii(bsp_priv);
-       } else {
+               break;
+       default:
                dev_err(dev, "NO interface defined!\n");
        }
 
@@ -1022,12 +1036,19 @@ static void rk_fix_speed(void *priv, unsigned int speed)
        struct rk_priv_data *bsp_priv = priv;
        struct device *dev = &bsp_priv->pdev->dev;
 
-       if (bsp_priv->phy_iface == PHY_INTERFACE_MODE_RGMII)
+       switch (bsp_priv->phy_iface) {
+       case PHY_INTERFACE_MODE_RGMII:
+       case PHY_INTERFACE_MODE_RGMII_ID:
+       case PHY_INTERFACE_MODE_RGMII_RXID:
+       case PHY_INTERFACE_MODE_RGMII_TXID:
                bsp_priv->ops->set_rgmii_speed(bsp_priv, speed);
-       else if (bsp_priv->phy_iface == PHY_INTERFACE_MODE_RMII)
+               break;
+       case PHY_INTERFACE_MODE_RMII:
                bsp_priv->ops->set_rmii_speed(bsp_priv, speed);
-       else
+               break;
+       default:
                dev_err(dev, "unsupported interface %d", bsp_priv->phy_iface);
+       }
 }
 
 static int rk_gmac_probe(struct platform_device *pdev)
index 19b9b308709953cc9327961d3eb3bea527848bb7..f3d9305e5f706bc407c51a5d98af6a28b6404f42 100644 (file)
@@ -216,7 +216,8 @@ static void dwmac1000_set_filter(struct mac_device_info *hw,
 
 
 static void dwmac1000_flow_ctrl(struct mac_device_info *hw, unsigned int duplex,
-                               unsigned int fc, unsigned int pause_time)
+                               unsigned int fc, unsigned int pause_time,
+                               u32 tx_cnt)
 {
        void __iomem *ioaddr = hw->pcsr;
        /* Set flow such that DZPQ in Mac Register 6 is 0,
@@ -412,7 +413,8 @@ static void dwmac1000_get_adv_lp(void __iomem *ioaddr, struct rgmii_adv *adv)
        dwmac_get_adv_lp(ioaddr, GMAC_PCS_BASE, adv);
 }
 
-static void dwmac1000_debug(void __iomem *ioaddr, struct stmmac_extra_stats *x)
+static void dwmac1000_debug(void __iomem *ioaddr, struct stmmac_extra_stats *x,
+                           u32 rx_queues, u32 tx_queues)
 {
        u32 value = readl(ioaddr + GMAC_DEBUG);
 
@@ -488,6 +490,7 @@ static void dwmac1000_debug(void __iomem *ioaddr, struct stmmac_extra_stats *x)
 
 static const struct stmmac_ops dwmac1000_ops = {
        .core_init = dwmac1000_core_init,
+       .set_mac = stmmac_set_mac,
        .rx_ipc = dwmac1000_rx_ipc_enable,
        .dump_regs = dwmac1000_dump_regs,
        .host_irq_status = dwmac1000_irq_status,
index d3654a4470461e1f44282fac163dbcd9b6827df6..471a9aa6ac94c14d46d4dcf2d956965948193c56 100644 (file)
@@ -247,7 +247,8 @@ static void dwmac1000_get_hw_feature(void __iomem *ioaddr,
        dma_cap->enh_desc = (hw_cap & DMA_HW_FEAT_ENHDESSEL) >> 24;
 }
 
-static void dwmac1000_rx_watchdog(void __iomem *ioaddr, u32 riwt)
+static void dwmac1000_rx_watchdog(void __iomem *ioaddr, u32 riwt,
+                                 u32 number_chan)
 {
        writel(riwt, ioaddr + DMA_RX_WATCHDOG);
 }
index e370ccec6176671d1717d24d88917b88f69b1bd2..1b360910548473a486372835b3e167456528d508 100644 (file)
@@ -131,7 +131,8 @@ static void dwmac100_set_filter(struct mac_device_info *hw,
 }
 
 static void dwmac100_flow_ctrl(struct mac_device_info *hw, unsigned int duplex,
-                              unsigned int fc, unsigned int pause_time)
+                              unsigned int fc, unsigned int pause_time,
+                              u32 tx_cnt)
 {
        void __iomem *ioaddr = hw->pcsr;
        unsigned int flow = MAC_FLOW_CTRL_ENABLE;
@@ -149,6 +150,7 @@ static void dwmac100_pmt(struct mac_device_info *hw, unsigned long mode)
 
 static const struct stmmac_ops dwmac100_ops = {
        .core_init = dwmac100_core_init,
+       .set_mac = stmmac_set_mac,
        .rx_ipc = dwmac100_rx_ipc_enable,
        .dump_regs = dwmac100_dump_mac_regs,
        .host_irq_status = dwmac100_irq_status,
index db45134fddf04e50c703254b8570b744004123f8..d74cedf2a397580aeb6c62737a35030e65053367 100644 (file)
 #define GMAC_HASH_TAB_32_63            0x00000014
 #define GMAC_RX_FLOW_CTRL              0x00000090
 #define GMAC_QX_TX_FLOW_CTRL(x)                (0x70 + x * 4)
+#define GMAC_TXQ_PRTY_MAP0             0x98
+#define GMAC_TXQ_PRTY_MAP1             0x9C
 #define GMAC_RXQ_CTRL0                 0x000000a0
+#define GMAC_RXQ_CTRL1                 0x000000a4
+#define GMAC_RXQ_CTRL2                 0x000000a8
+#define GMAC_RXQ_CTRL3                 0x000000ac
 #define GMAC_INT_STATUS                        0x000000b0
 #define GMAC_INT_EN                    0x000000b4
+#define GMAC_1US_TIC_COUNTER           0x000000dc
 #define GMAC_PCS_BASE                  0x000000e0
 #define GMAC_PHYIF_CONTROL_STATUS      0x000000f8
 #define GMAC_PMT                       0x000000c0
 #define GMAC_ADDR_HIGH(reg)            (0x300 + reg * 8)
 #define GMAC_ADDR_LOW(reg)             (0x304 + reg * 8)
 
+/* RX Queues Routing */
+#define GMAC_RXQCTRL_AVCPQ_MASK                GENMASK(2, 0)
+#define GMAC_RXQCTRL_AVCPQ_SHIFT       0
+#define GMAC_RXQCTRL_PTPQ_MASK         GENMASK(6, 4)
+#define GMAC_RXQCTRL_PTPQ_SHIFT                4
+#define GMAC_RXQCTRL_DCBCPQ_MASK       GENMASK(10, 8)
+#define GMAC_RXQCTRL_DCBCPQ_SHIFT      8
+#define GMAC_RXQCTRL_UPQ_MASK          GENMASK(14, 12)
+#define GMAC_RXQCTRL_UPQ_SHIFT         12
+#define GMAC_RXQCTRL_MCBCQ_MASK                GENMASK(18, 16)
+#define GMAC_RXQCTRL_MCBCQ_SHIFT       16
+#define GMAC_RXQCTRL_MCBCQEN           BIT(20)
+#define GMAC_RXQCTRL_MCBCQEN_SHIFT     20
+#define GMAC_RXQCTRL_TACPQE            BIT(21)
+#define GMAC_RXQCTRL_TACPQE_SHIFT      21
+
 /* MAC Packet Filtering */
 #define GMAC_PACKET_FILTER_PR          BIT(0)
 #define GMAC_PACKET_FILTER_HMC         BIT(2)
 /* MAC Flow Control RX */
 #define GMAC_RX_FLOW_CTRL_RFE          BIT(0)
 
+/* RX Queues Priorities */
+#define GMAC_RXQCTRL_PSRQX_MASK(x)     GENMASK(7 + ((x) * 8), 0 + ((x) * 8))
+#define GMAC_RXQCTRL_PSRQX_SHIFT(x)    ((x) * 8)
+
+/* TX Queues Priorities */
+#define GMAC_TXQCTRL_PSTQX_MASK(x)     GENMASK(7 + ((x) * 8), 0 + ((x) * 8))
+#define GMAC_TXQCTRL_PSTQX_SHIFT(x)    ((x) * 8)
+
 /* MAC Flow Control TX */
 #define GMAC_TX_FLOW_CTRL_TFE          BIT(1)
 #define GMAC_TX_FLOW_CTRL_PT_SHIFT     16
@@ -148,6 +178,8 @@ enum power_event {
 /* MAC HW features1 bitmap */
 #define GMAC_HW_FEAT_AVSEL             BIT(20)
 #define GMAC_HW_TSOEN                  BIT(18)
+#define GMAC_HW_TXFIFOSIZE             GENMASK(10, 6)
+#define GMAC_HW_RXFIFOSIZE             GENMASK(4, 0)
 
 /* MAC HW features2 bitmap */
 #define GMAC_HW_FEAT_TXCHCNT           GENMASK(21, 18)
@@ -161,8 +193,25 @@ enum power_event {
 #define GMAC_HI_REG_AE                 BIT(31)
 
 /*  MTL registers */
+#define MTL_OPERATION_MODE             0x00000c00
+#define MTL_OPERATION_SCHALG_MASK      GENMASK(6, 5)
+#define MTL_OPERATION_SCHALG_WRR       (0x0 << 5)
+#define MTL_OPERATION_SCHALG_WFQ       (0x1 << 5)
+#define MTL_OPERATION_SCHALG_DWRR      (0x2 << 5)
+#define MTL_OPERATION_SCHALG_SP                (0x3 << 5)
+#define MTL_OPERATION_RAA              BIT(2)
+#define MTL_OPERATION_RAA_SP           (0x0 << 2)
+#define MTL_OPERATION_RAA_WSP          (0x1 << 2)
+
 #define MTL_INT_STATUS                 0x00000c20
-#define MTL_INT_Q0                     BIT(0)
+#define MTL_INT_QX(x)                  BIT(x)
+
+#define MTL_RXQ_DMA_MAP0               0x00000c30 /* queue 0 to 3 */
+#define MTL_RXQ_DMA_MAP1               0x00000c34 /* queue 4 to 7 */
+#define MTL_RXQ_DMA_Q04MDMACH_MASK     GENMASK(3, 0)
+#define MTL_RXQ_DMA_Q04MDMACH(x)       ((x) << 0)
+#define MTL_RXQ_DMA_QXMDMACH_MASK(x)   GENMASK(11 + (8 * ((x) - 1)), 8 * (x))
+#define MTL_RXQ_DMA_QXMDMACH(chan, q)  ((chan) << (8 * (q)))
 
 #define MTL_CHAN_BASE_ADDR             0x00000d00
 #define MTL_CHAN_BASE_OFFSET           0x40
@@ -180,6 +229,7 @@ enum power_event {
 #define MTL_OP_MODE_TSF                        BIT(1)
 
 #define MTL_OP_MODE_TQS_MASK           GENMASK(24, 16)
+#define MTL_OP_MODE_TQS_SHIFT          16
 
 #define MTL_OP_MODE_TTC_MASK           0x70
 #define MTL_OP_MODE_TTC_SHIFT          4
@@ -193,6 +243,17 @@ enum power_event {
 #define MTL_OP_MODE_TTC_384            (6 << MTL_OP_MODE_TTC_SHIFT)
 #define MTL_OP_MODE_TTC_512            (7 << MTL_OP_MODE_TTC_SHIFT)
 
+#define MTL_OP_MODE_RQS_MASK           GENMASK(29, 20)
+#define MTL_OP_MODE_RQS_SHIFT          20
+
+#define MTL_OP_MODE_RFD_MASK           GENMASK(19, 14)
+#define MTL_OP_MODE_RFD_SHIFT          14
+
+#define MTL_OP_MODE_RFA_MASK           GENMASK(13, 8)
+#define MTL_OP_MODE_RFA_SHIFT          8
+
+#define MTL_OP_MODE_EHFC               BIT(7)
+
 #define MTL_OP_MODE_RTC_MASK           0x18
 #define MTL_OP_MODE_RTC_SHIFT          3
 
@@ -201,6 +262,46 @@ enum power_event {
 #define MTL_OP_MODE_RTC_96             (2 << MTL_OP_MODE_RTC_SHIFT)
 #define MTL_OP_MODE_RTC_128            (3 << MTL_OP_MODE_RTC_SHIFT)
 
+/* MTL ETS Control register */
+#define MTL_ETS_CTRL_BASE_ADDR         0x00000d10
+#define MTL_ETS_CTRL_BASE_OFFSET       0x40
+#define MTL_ETSX_CTRL_BASE_ADDR(x)     (MTL_ETS_CTRL_BASE_ADDR + \
+                                       ((x) * MTL_ETS_CTRL_BASE_OFFSET))
+
+#define MTL_ETS_CTRL_CC                        BIT(3)
+#define MTL_ETS_CTRL_AVALG             BIT(2)
+
+/* MTL Queue Quantum Weight */
+#define MTL_TXQ_WEIGHT_BASE_ADDR       0x00000d18
+#define MTL_TXQ_WEIGHT_BASE_OFFSET     0x40
+#define MTL_TXQX_WEIGHT_BASE_ADDR(x)   (MTL_TXQ_WEIGHT_BASE_ADDR + \
+                                       ((x) * MTL_TXQ_WEIGHT_BASE_OFFSET))
+#define MTL_TXQ_WEIGHT_ISCQW_MASK      GENMASK(20, 0)
+
+/* MTL sendSlopeCredit register */
+#define MTL_SEND_SLP_CRED_BASE_ADDR    0x00000d1c
+#define MTL_SEND_SLP_CRED_OFFSET       0x40
+#define MTL_SEND_SLP_CREDX_BASE_ADDR(x)        (MTL_SEND_SLP_CRED_BASE_ADDR + \
+                                       ((x) * MTL_SEND_SLP_CRED_OFFSET))
+
+#define MTL_SEND_SLP_CRED_SSC_MASK     GENMASK(13, 0)
+
+/* MTL hiCredit register */
+#define MTL_HIGH_CRED_BASE_ADDR                0x00000d20
+#define MTL_HIGH_CRED_OFFSET           0x40
+#define MTL_HIGH_CREDX_BASE_ADDR(x)    (MTL_HIGH_CRED_BASE_ADDR + \
+                                       ((x) * MTL_HIGH_CRED_OFFSET))
+
+#define MTL_HIGH_CRED_HC_MASK          GENMASK(28, 0)
+
+/* MTL loCredit register */
+#define MTL_LOW_CRED_BASE_ADDR         0x00000d24
+#define MTL_LOW_CRED_OFFSET            0x40
+#define MTL_LOW_CREDX_BASE_ADDR(x)     (MTL_LOW_CRED_BASE_ADDR + \
+                                       ((x) * MTL_LOW_CRED_OFFSET))
+
+#define MTL_HIGH_CRED_LC_MASK          GENMASK(28, 0)
+
 /*  MTL debug */
 #define MTL_DEBUG_TXSTSFSTS            BIT(5)
 #define MTL_DEBUG_TXFSTS               BIT(4)
index 1e79e6529c4a79a805663e2d65f2cec558f362e3..48793f2e93075a9cabd57b3a4e0ee86765368bae 100644 (file)
@@ -59,17 +59,211 @@ static void dwmac4_core_init(struct mac_device_info *hw, int mtu)
        writel(value, ioaddr + GMAC_INT_EN);
 }
 
-static void dwmac4_rx_queue_enable(struct mac_device_info *hw, u32 queue)
+static void dwmac4_rx_queue_enable(struct mac_device_info *hw,
+                                  u8 mode, u32 queue)
 {
        void __iomem *ioaddr = hw->pcsr;
        u32 value = readl(ioaddr + GMAC_RXQ_CTRL0);
 
        value &= GMAC_RX_QUEUE_CLEAR(queue);
-       value |= GMAC_RX_AV_QUEUE_ENABLE(queue);
+       if (mode == MTL_QUEUE_AVB)
+               value |= GMAC_RX_AV_QUEUE_ENABLE(queue);
+       else if (mode == MTL_QUEUE_DCB)
+               value |= GMAC_RX_DCB_QUEUE_ENABLE(queue);
 
        writel(value, ioaddr + GMAC_RXQ_CTRL0);
 }
 
+static void dwmac4_rx_queue_priority(struct mac_device_info *hw,
+                                    u32 prio, u32 queue)
+{
+       void __iomem *ioaddr = hw->pcsr;
+       u32 base_register;
+       u32 value;
+
+       base_register = (queue < 4) ? GMAC_RXQ_CTRL2 : GMAC_RXQ_CTRL3;
+
+       value = readl(ioaddr + base_register);
+
+       value &= ~GMAC_RXQCTRL_PSRQX_MASK(queue);
+       value |= (prio << GMAC_RXQCTRL_PSRQX_SHIFT(queue)) &
+                                               GMAC_RXQCTRL_PSRQX_MASK(queue);
+       writel(value, ioaddr + base_register);
+}
+
+static void dwmac4_tx_queue_priority(struct mac_device_info *hw,
+                                    u32 prio, u32 queue)
+{
+       void __iomem *ioaddr = hw->pcsr;
+       u32 base_register;
+       u32 value;
+
+       base_register = (queue < 4) ? GMAC_TXQ_PRTY_MAP0 : GMAC_TXQ_PRTY_MAP1;
+
+       value = readl(ioaddr + base_register);
+
+       value &= ~GMAC_TXQCTRL_PSTQX_MASK(queue);
+       value |= (prio << GMAC_TXQCTRL_PSTQX_SHIFT(queue)) &
+                                               GMAC_TXQCTRL_PSTQX_MASK(queue);
+
+       writel(value, ioaddr + base_register);
+}
+
+static void dwmac4_tx_queue_routing(struct mac_device_info *hw,
+                                   u8 packet, u32 queue)
+{
+       void __iomem *ioaddr = hw->pcsr;
+       u32 value;
+
+       const struct stmmac_rx_routing route_possibilities[] = {
+               { GMAC_RXQCTRL_AVCPQ_MASK, GMAC_RXQCTRL_AVCPQ_SHIFT },
+               { GMAC_RXQCTRL_PTPQ_MASK, GMAC_RXQCTRL_PTPQ_SHIFT },
+               { GMAC_RXQCTRL_DCBCPQ_MASK, GMAC_RXQCTRL_DCBCPQ_SHIFT },
+               { GMAC_RXQCTRL_UPQ_MASK, GMAC_RXQCTRL_UPQ_SHIFT },
+               { GMAC_RXQCTRL_MCBCQ_MASK, GMAC_RXQCTRL_MCBCQ_SHIFT },
+       };
+
+       value = readl(ioaddr + GMAC_RXQ_CTRL1);
+
+       /* routing configuration */
+       value &= ~route_possibilities[packet - 1].reg_mask;
+       value |= (queue << route_possibilities[packet-1].reg_shift) &
+                route_possibilities[packet - 1].reg_mask;
+
+       /* some packets require extra ops */
+       if (packet == PACKET_AVCPQ) {
+               value &= ~GMAC_RXQCTRL_TACPQE;
+               value |= 0x1 << GMAC_RXQCTRL_TACPQE_SHIFT;
+       } else if (packet == PACKET_MCBCQ) {
+               value &= ~GMAC_RXQCTRL_MCBCQEN;
+               value |= 0x1 << GMAC_RXQCTRL_MCBCQEN_SHIFT;
+       }
+
+       writel(value, ioaddr + GMAC_RXQ_CTRL1);
+}
+
+static void dwmac4_prog_mtl_rx_algorithms(struct mac_device_info *hw,
+                                         u32 rx_alg)
+{
+       void __iomem *ioaddr = hw->pcsr;
+       u32 value = readl(ioaddr + MTL_OPERATION_MODE);
+
+       value &= ~MTL_OPERATION_RAA;
+       switch (rx_alg) {
+       case MTL_RX_ALGORITHM_SP:
+               value |= MTL_OPERATION_RAA_SP;
+               break;
+       case MTL_RX_ALGORITHM_WSP:
+               value |= MTL_OPERATION_RAA_WSP;
+               break;
+       default:
+               break;
+       }
+
+       writel(value, ioaddr + MTL_OPERATION_MODE);
+}
+
+static void dwmac4_prog_mtl_tx_algorithms(struct mac_device_info *hw,
+                                         u32 tx_alg)
+{
+       void __iomem *ioaddr = hw->pcsr;
+       u32 value = readl(ioaddr + MTL_OPERATION_MODE);
+
+       value &= ~MTL_OPERATION_SCHALG_MASK;
+       switch (tx_alg) {
+       case MTL_TX_ALGORITHM_WRR:
+               value |= MTL_OPERATION_SCHALG_WRR;
+               break;
+       case MTL_TX_ALGORITHM_WFQ:
+               value |= MTL_OPERATION_SCHALG_WFQ;
+               break;
+       case MTL_TX_ALGORITHM_DWRR:
+               value |= MTL_OPERATION_SCHALG_DWRR;
+               break;
+       case MTL_TX_ALGORITHM_SP:
+               value |= MTL_OPERATION_SCHALG_SP;
+               break;
+       default:
+               break;
+       }
+}
+
+static void dwmac4_set_mtl_tx_queue_weight(struct mac_device_info *hw,
+                                          u32 weight, u32 queue)
+{
+       void __iomem *ioaddr = hw->pcsr;
+       u32 value = readl(ioaddr + MTL_TXQX_WEIGHT_BASE_ADDR(queue));
+
+       value &= ~MTL_TXQ_WEIGHT_ISCQW_MASK;
+       value |= weight & MTL_TXQ_WEIGHT_ISCQW_MASK;
+       writel(value, ioaddr + MTL_TXQX_WEIGHT_BASE_ADDR(queue));
+}
+
+static void dwmac4_map_mtl_dma(struct mac_device_info *hw, u32 queue, u32 chan)
+{
+       void __iomem *ioaddr = hw->pcsr;
+       u32 value;
+
+       if (queue < 4)
+               value = readl(ioaddr + MTL_RXQ_DMA_MAP0);
+       else
+               value = readl(ioaddr + MTL_RXQ_DMA_MAP1);
+
+       if (queue == 0 || queue == 4) {
+               value &= ~MTL_RXQ_DMA_Q04MDMACH_MASK;
+               value |= MTL_RXQ_DMA_Q04MDMACH(chan);
+       } else {
+               value &= ~MTL_RXQ_DMA_QXMDMACH_MASK(queue);
+               value |= MTL_RXQ_DMA_QXMDMACH(chan, queue);
+       }
+
+       if (queue < 4)
+               writel(value, ioaddr + MTL_RXQ_DMA_MAP0);
+       else
+               writel(value, ioaddr + MTL_RXQ_DMA_MAP1);
+}
+
+static void dwmac4_config_cbs(struct mac_device_info *hw,
+                             u32 send_slope, u32 idle_slope,
+                             u32 high_credit, u32 low_credit, u32 queue)
+{
+       void __iomem *ioaddr = hw->pcsr;
+       u32 value;
+
+       pr_debug("Queue %d configured as AVB. Parameters:\n", queue);
+       pr_debug("\tsend_slope: 0x%08x\n", send_slope);
+       pr_debug("\tidle_slope: 0x%08x\n", idle_slope);
+       pr_debug("\thigh_credit: 0x%08x\n", high_credit);
+       pr_debug("\tlow_credit: 0x%08x\n", low_credit);
+
+       /* enable AV algorithm */
+       value = readl(ioaddr + MTL_ETSX_CTRL_BASE_ADDR(queue));
+       value |= MTL_ETS_CTRL_AVALG;
+       value |= MTL_ETS_CTRL_CC;
+       writel(value, ioaddr + MTL_ETSX_CTRL_BASE_ADDR(queue));
+
+       /* configure send slope */
+       value = readl(ioaddr + MTL_SEND_SLP_CREDX_BASE_ADDR(queue));
+       value &= ~MTL_SEND_SLP_CRED_SSC_MASK;
+       value |= send_slope & MTL_SEND_SLP_CRED_SSC_MASK;
+       writel(value, ioaddr + MTL_SEND_SLP_CREDX_BASE_ADDR(queue));
+
+       /* configure idle slope (same register as tx weight) */
+       dwmac4_set_mtl_tx_queue_weight(hw, idle_slope, queue);
+
+       /* configure high credit */
+       value = readl(ioaddr + MTL_HIGH_CREDX_BASE_ADDR(queue));
+       value &= ~MTL_HIGH_CRED_HC_MASK;
+       value |= high_credit & MTL_HIGH_CRED_HC_MASK;
+       writel(value, ioaddr + MTL_HIGH_CREDX_BASE_ADDR(queue));
+
+       /* configure high credit */
+       value = readl(ioaddr + MTL_LOW_CREDX_BASE_ADDR(queue));
+       value &= ~MTL_HIGH_CRED_LC_MASK;
+       value |= low_credit & MTL_HIGH_CRED_LC_MASK;
+       writel(value, ioaddr + MTL_LOW_CREDX_BASE_ADDR(queue));
+}
+
 static void dwmac4_dump_regs(struct mac_device_info *hw, u32 *reg_space)
 {
        void __iomem *ioaddr = hw->pcsr;
@@ -251,11 +445,12 @@ static void dwmac4_set_filter(struct mac_device_info *hw,
 }
 
 static void dwmac4_flow_ctrl(struct mac_device_info *hw, unsigned int duplex,
-                            unsigned int fc, unsigned int pause_time)
+                            unsigned int fc, unsigned int pause_time,
+                            u32 tx_cnt)
 {
        void __iomem *ioaddr = hw->pcsr;
-       u32 channel = STMMAC_CHAN0;     /* FIXME */
        unsigned int flow = 0;
+       u32 queue = 0;
 
        pr_debug("GMAC Flow-Control:\n");
        if (fc & FLOW_RX) {
@@ -265,13 +460,18 @@ static void dwmac4_flow_ctrl(struct mac_device_info *hw, unsigned int duplex,
        }
        if (fc & FLOW_TX) {
                pr_debug("\tTransmit Flow-Control ON\n");
-               flow |= GMAC_TX_FLOW_CTRL_TFE;
-               writel(flow, ioaddr + GMAC_QX_TX_FLOW_CTRL(channel));
 
-               if (duplex) {
+               if (duplex)
                        pr_debug("\tduplex mode: PAUSE %d\n", pause_time);
-                       flow |= (pause_time << GMAC_TX_FLOW_CTRL_PT_SHIFT);
-                       writel(flow, ioaddr + GMAC_QX_TX_FLOW_CTRL(channel));
+
+               for (queue = 0; queue < tx_cnt; queue++) {
+                       flow |= GMAC_TX_FLOW_CTRL_TFE;
+
+                       if (duplex)
+                               flow |=
+                               (pause_time << GMAC_TX_FLOW_CTRL_PT_SHIFT);
+
+                       writel(flow, ioaddr + GMAC_QX_TX_FLOW_CTRL(queue));
                }
        }
 }
@@ -325,11 +525,34 @@ static void dwmac4_phystatus(void __iomem *ioaddr, struct stmmac_extra_stats *x)
        }
 }
 
+static int dwmac4_irq_mtl_status(struct mac_device_info *hw, u32 chan)
+{
+       void __iomem *ioaddr = hw->pcsr;
+       u32 mtl_int_qx_status;
+       int ret = 0;
+
+       mtl_int_qx_status = readl(ioaddr + MTL_INT_STATUS);
+
+       /* Check MTL Interrupt */
+       if (mtl_int_qx_status & MTL_INT_QX(chan)) {
+               /* read Queue x Interrupt status */
+               u32 status = readl(ioaddr + MTL_CHAN_INT_CTRL(chan));
+
+               if (status & MTL_RX_OVERFLOW_INT) {
+                       /*  clear Interrupt */
+                       writel(status | MTL_RX_OVERFLOW_INT,
+                              ioaddr + MTL_CHAN_INT_CTRL(chan));
+                       ret = CORE_IRQ_MTL_RX_OVERFLOW;
+               }
+       }
+
+       return ret;
+}
+
 static int dwmac4_irq_status(struct mac_device_info *hw,
                             struct stmmac_extra_stats *x)
 {
        void __iomem *ioaddr = hw->pcsr;
-       u32 mtl_int_qx_status;
        u32 intr_status;
        int ret = 0;
 
@@ -348,20 +571,6 @@ static int dwmac4_irq_status(struct mac_device_info *hw,
                x->irq_receive_pmt_irq_n++;
        }
 
-       mtl_int_qx_status = readl(ioaddr + MTL_INT_STATUS);
-       /* Check MTL Interrupt: Currently only one queue is used: Q0. */
-       if (mtl_int_qx_status & MTL_INT_Q0) {
-               /* read Queue 0 Interrupt status */
-               u32 status = readl(ioaddr + MTL_CHAN_INT_CTRL(STMMAC_CHAN0));
-
-               if (status & MTL_RX_OVERFLOW_INT) {
-                       /*  clear Interrupt */
-                       writel(status | MTL_RX_OVERFLOW_INT,
-                              ioaddr + MTL_CHAN_INT_CTRL(STMMAC_CHAN0));
-                       ret = CORE_IRQ_MTL_RX_OVERFLOW;
-               }
-       }
-
        dwmac_pcs_isr(ioaddr, GMAC_PCS_BASE, intr_status, x);
        if (intr_status & PCS_RGSMIIIS_IRQ)
                dwmac4_phystatus(ioaddr, x);
@@ -369,64 +578,69 @@ static int dwmac4_irq_status(struct mac_device_info *hw,
        return ret;
 }
 
-static void dwmac4_debug(void __iomem *ioaddr, struct stmmac_extra_stats *x)
+static void dwmac4_debug(void __iomem *ioaddr, struct stmmac_extra_stats *x,
+                        u32 rx_queues, u32 tx_queues)
 {
        u32 value;
-
-       /*  Currently only channel 0 is supported */
-       value = readl(ioaddr + MTL_CHAN_TX_DEBUG(STMMAC_CHAN0));
-
-       if (value & MTL_DEBUG_TXSTSFSTS)
-               x->mtl_tx_status_fifo_full++;
-       if (value & MTL_DEBUG_TXFSTS)
-               x->mtl_tx_fifo_not_empty++;
-       if (value & MTL_DEBUG_TWCSTS)
-               x->mmtl_fifo_ctrl++;
-       if (value & MTL_DEBUG_TRCSTS_MASK) {
-               u32 trcsts = (value & MTL_DEBUG_TRCSTS_MASK)
-                            >> MTL_DEBUG_TRCSTS_SHIFT;
-               if (trcsts == MTL_DEBUG_TRCSTS_WRITE)
-                       x->mtl_tx_fifo_read_ctrl_write++;
-               else if (trcsts == MTL_DEBUG_TRCSTS_TXW)
-                       x->mtl_tx_fifo_read_ctrl_wait++;
-               else if (trcsts == MTL_DEBUG_TRCSTS_READ)
-                       x->mtl_tx_fifo_read_ctrl_read++;
-               else
-                       x->mtl_tx_fifo_read_ctrl_idle++;
+       u32 queue;
+
+       for (queue = 0; queue < tx_queues; queue++) {
+               value = readl(ioaddr + MTL_CHAN_TX_DEBUG(queue));
+
+               if (value & MTL_DEBUG_TXSTSFSTS)
+                       x->mtl_tx_status_fifo_full++;
+               if (value & MTL_DEBUG_TXFSTS)
+                       x->mtl_tx_fifo_not_empty++;
+               if (value & MTL_DEBUG_TWCSTS)
+                       x->mmtl_fifo_ctrl++;
+               if (value & MTL_DEBUG_TRCSTS_MASK) {
+                       u32 trcsts = (value & MTL_DEBUG_TRCSTS_MASK)
+                                    >> MTL_DEBUG_TRCSTS_SHIFT;
+                       if (trcsts == MTL_DEBUG_TRCSTS_WRITE)
+                               x->mtl_tx_fifo_read_ctrl_write++;
+                       else if (trcsts == MTL_DEBUG_TRCSTS_TXW)
+                               x->mtl_tx_fifo_read_ctrl_wait++;
+                       else if (trcsts == MTL_DEBUG_TRCSTS_READ)
+                               x->mtl_tx_fifo_read_ctrl_read++;
+                       else
+                               x->mtl_tx_fifo_read_ctrl_idle++;
+               }
+               if (value & MTL_DEBUG_TXPAUSED)
+                       x->mac_tx_in_pause++;
        }
-       if (value & MTL_DEBUG_TXPAUSED)
-               x->mac_tx_in_pause++;
 
-       value = readl(ioaddr + MTL_CHAN_RX_DEBUG(STMMAC_CHAN0));
+       for (queue = 0; queue < rx_queues; queue++) {
+               value = readl(ioaddr + MTL_CHAN_RX_DEBUG(queue));
 
-       if (value & MTL_DEBUG_RXFSTS_MASK) {
-               u32 rxfsts = (value & MTL_DEBUG_RXFSTS_MASK)
-                            >> MTL_DEBUG_RRCSTS_SHIFT;
+               if (value & MTL_DEBUG_RXFSTS_MASK) {
+                       u32 rxfsts = (value & MTL_DEBUG_RXFSTS_MASK)
+                                    >> MTL_DEBUG_RRCSTS_SHIFT;
 
-               if (rxfsts == MTL_DEBUG_RXFSTS_FULL)
-                       x->mtl_rx_fifo_fill_level_full++;
-               else if (rxfsts == MTL_DEBUG_RXFSTS_AT)
-                       x->mtl_rx_fifo_fill_above_thresh++;
-               else if (rxfsts == MTL_DEBUG_RXFSTS_BT)
-                       x->mtl_rx_fifo_fill_below_thresh++;
-               else
-                       x->mtl_rx_fifo_fill_level_empty++;
-       }
-       if (value & MTL_DEBUG_RRCSTS_MASK) {
-               u32 rrcsts = (value & MTL_DEBUG_RRCSTS_MASK) >>
-                            MTL_DEBUG_RRCSTS_SHIFT;
-
-               if (rrcsts == MTL_DEBUG_RRCSTS_FLUSH)
-                       x->mtl_rx_fifo_read_ctrl_flush++;
-               else if (rrcsts == MTL_DEBUG_RRCSTS_RSTAT)
-                       x->mtl_rx_fifo_read_ctrl_read_data++;
-               else if (rrcsts == MTL_DEBUG_RRCSTS_RDATA)
-                       x->mtl_rx_fifo_read_ctrl_status++;
-               else
-                       x->mtl_rx_fifo_read_ctrl_idle++;
+                       if (rxfsts == MTL_DEBUG_RXFSTS_FULL)
+                               x->mtl_rx_fifo_fill_level_full++;
+                       else if (rxfsts == MTL_DEBUG_RXFSTS_AT)
+                               x->mtl_rx_fifo_fill_above_thresh++;
+                       else if (rxfsts == MTL_DEBUG_RXFSTS_BT)
+                               x->mtl_rx_fifo_fill_below_thresh++;
+                       else
+                               x->mtl_rx_fifo_fill_level_empty++;
+               }
+               if (value & MTL_DEBUG_RRCSTS_MASK) {
+                       u32 rrcsts = (value & MTL_DEBUG_RRCSTS_MASK) >>
+                                    MTL_DEBUG_RRCSTS_SHIFT;
+
+                       if (rrcsts == MTL_DEBUG_RRCSTS_FLUSH)
+                               x->mtl_rx_fifo_read_ctrl_flush++;
+                       else if (rrcsts == MTL_DEBUG_RRCSTS_RSTAT)
+                               x->mtl_rx_fifo_read_ctrl_read_data++;
+                       else if (rrcsts == MTL_DEBUG_RRCSTS_RDATA)
+                               x->mtl_rx_fifo_read_ctrl_status++;
+                       else
+                               x->mtl_rx_fifo_read_ctrl_idle++;
+               }
+               if (value & MTL_DEBUG_RWCSTS)
+                       x->mtl_rx_fifo_ctrl_active++;
        }
-       if (value & MTL_DEBUG_RWCSTS)
-               x->mtl_rx_fifo_ctrl_active++;
 
        /* GMAC debug */
        value = readl(ioaddr + GMAC_DEBUG);
@@ -455,10 +669,51 @@ static void dwmac4_debug(void __iomem *ioaddr, struct stmmac_extra_stats *x)
 
 static const struct stmmac_ops dwmac4_ops = {
        .core_init = dwmac4_core_init,
+       .set_mac = stmmac_set_mac,
        .rx_ipc = dwmac4_rx_ipc_enable,
        .rx_queue_enable = dwmac4_rx_queue_enable,
+       .rx_queue_prio = dwmac4_rx_queue_priority,
+       .tx_queue_prio = dwmac4_tx_queue_priority,
+       .rx_queue_routing = dwmac4_tx_queue_routing,
+       .prog_mtl_rx_algorithms = dwmac4_prog_mtl_rx_algorithms,
+       .prog_mtl_tx_algorithms = dwmac4_prog_mtl_tx_algorithms,
+       .set_mtl_tx_queue_weight = dwmac4_set_mtl_tx_queue_weight,
+       .map_mtl_to_dma = dwmac4_map_mtl_dma,
+       .config_cbs = dwmac4_config_cbs,
        .dump_regs = dwmac4_dump_regs,
        .host_irq_status = dwmac4_irq_status,
+       .host_mtl_irq_status = dwmac4_irq_mtl_status,
+       .flow_ctrl = dwmac4_flow_ctrl,
+       .pmt = dwmac4_pmt,
+       .set_umac_addr = dwmac4_set_umac_addr,
+       .get_umac_addr = dwmac4_get_umac_addr,
+       .set_eee_mode = dwmac4_set_eee_mode,
+       .reset_eee_mode = dwmac4_reset_eee_mode,
+       .set_eee_timer = dwmac4_set_eee_timer,
+       .set_eee_pls = dwmac4_set_eee_pls,
+       .pcs_ctrl_ane = dwmac4_ctrl_ane,
+       .pcs_rane = dwmac4_rane,
+       .pcs_get_adv_lp = dwmac4_get_adv_lp,
+       .debug = dwmac4_debug,
+       .set_filter = dwmac4_set_filter,
+};
+
+static const struct stmmac_ops dwmac410_ops = {
+       .core_init = dwmac4_core_init,
+       .set_mac = stmmac_dwmac4_set_mac,
+       .rx_ipc = dwmac4_rx_ipc_enable,
+       .rx_queue_enable = dwmac4_rx_queue_enable,
+       .rx_queue_prio = dwmac4_rx_queue_priority,
+       .tx_queue_prio = dwmac4_tx_queue_priority,
+       .rx_queue_routing = dwmac4_tx_queue_routing,
+       .prog_mtl_rx_algorithms = dwmac4_prog_mtl_rx_algorithms,
+       .prog_mtl_tx_algorithms = dwmac4_prog_mtl_tx_algorithms,
+       .set_mtl_tx_queue_weight = dwmac4_set_mtl_tx_queue_weight,
+       .map_mtl_to_dma = dwmac4_map_mtl_dma,
+       .config_cbs = dwmac4_config_cbs,
+       .dump_regs = dwmac4_dump_regs,
+       .host_irq_status = dwmac4_irq_status,
+       .host_mtl_irq_status = dwmac4_irq_mtl_status,
        .flow_ctrl = dwmac4_flow_ctrl,
        .pmt = dwmac4_pmt,
        .set_umac_addr = dwmac4_set_umac_addr,
@@ -492,8 +747,6 @@ struct mac_device_info *dwmac4_setup(void __iomem *ioaddr, int mcbins,
        if (mac->multicast_filter_bins)
                mac->mcast_bits_log2 = ilog2(mac->multicast_filter_bins);
 
-       mac->mac = &dwmac4_ops;
-
        mac->link.port = GMAC_CONFIG_PS;
        mac->link.duplex = GMAC_CONFIG_DM;
        mac->link.speed = GMAC_CONFIG_FES;
@@ -514,5 +767,10 @@ struct mac_device_info *dwmac4_setup(void __iomem *ioaddr, int mcbins,
        else
                mac->dma = &dwmac4_dma_ops;
 
+       if (*synopsys_id >= DWMAC_CORE_4_00)
+               mac->mac = &dwmac410_ops;
+       else
+               mac->mac = &dwmac4_ops;
+
        return mac;
 }
index f97b0d5d998742efcad71972bd74ce40cc02afad..eec8463057fd7573b54019298dfa894d27fc33e4 100644 (file)
@@ -71,36 +71,48 @@ static void dwmac4_dma_axi(void __iomem *ioaddr, struct stmmac_axi *axi)
        writel(value, ioaddr + DMA_SYS_BUS_MODE);
 }
 
-static void dwmac4_dma_init_channel(void __iomem *ioaddr,
-                                   struct stmmac_dma_cfg *dma_cfg,
-                                   u32 dma_tx_phy, u32 dma_rx_phy,
-                                   u32 channel)
+void dwmac4_dma_init_rx_chan(void __iomem *ioaddr,
+                            struct stmmac_dma_cfg *dma_cfg,
+                            u32 dma_rx_phy, u32 chan)
 {
        u32 value;
-       int txpbl = dma_cfg->txpbl ?: dma_cfg->pbl;
-       int rxpbl = dma_cfg->rxpbl ?: dma_cfg->pbl;
+       u32 rxpbl = dma_cfg->rxpbl ?: dma_cfg->pbl;
 
-       /* set PBL for each channels. Currently we affect same configuration
-        * on each channel
-        */
-       value = readl(ioaddr + DMA_CHAN_CONTROL(channel));
-       if (dma_cfg->pblx8)
-               value = value | DMA_BUS_MODE_PBL;
-       writel(value, ioaddr + DMA_CHAN_CONTROL(channel));
+       value = readl(ioaddr + DMA_CHAN_RX_CONTROL(chan));
+       value = value | (rxpbl << DMA_BUS_MODE_RPBL_SHIFT);
+       writel(value, ioaddr + DMA_CHAN_RX_CONTROL(chan));
+
+       writel(dma_rx_phy, ioaddr + DMA_CHAN_RX_BASE_ADDR(chan));
+}
 
-       value = readl(ioaddr + DMA_CHAN_TX_CONTROL(channel));
+void dwmac4_dma_init_tx_chan(void __iomem *ioaddr,
+                            struct stmmac_dma_cfg *dma_cfg,
+                            u32 dma_tx_phy, u32 chan)
+{
+       u32 value;
+       u32 txpbl = dma_cfg->txpbl ?: dma_cfg->pbl;
+
+       value = readl(ioaddr + DMA_CHAN_TX_CONTROL(chan));
        value = value | (txpbl << DMA_BUS_MODE_PBL_SHIFT);
-       writel(value, ioaddr + DMA_CHAN_TX_CONTROL(channel));
+       writel(value, ioaddr + DMA_CHAN_TX_CONTROL(chan));
 
-       value = readl(ioaddr + DMA_CHAN_RX_CONTROL(channel));
-       value = value | (rxpbl << DMA_BUS_MODE_RPBL_SHIFT);
-       writel(value, ioaddr + DMA_CHAN_RX_CONTROL(channel));
+       writel(dma_tx_phy, ioaddr + DMA_CHAN_TX_BASE_ADDR(chan));
+}
 
-       /* Mask interrupts by writing to CSR7 */
-       writel(DMA_CHAN_INTR_DEFAULT_MASK, ioaddr + DMA_CHAN_INTR_ENA(channel));
+void dwmac4_dma_init_channel(void __iomem *ioaddr,
+                            struct stmmac_dma_cfg *dma_cfg, u32 chan)
+{
+       u32 value;
+
+       /* common channel control register config */
+       value = readl(ioaddr + DMA_CHAN_CONTROL(chan));
+       if (dma_cfg->pblx8)
+               value = value | DMA_BUS_MODE_PBL;
+       writel(value, ioaddr + DMA_CHAN_CONTROL(chan));
 
-       writel(dma_tx_phy, ioaddr + DMA_CHAN_TX_BASE_ADDR(channel));
-       writel(dma_rx_phy, ioaddr + DMA_CHAN_RX_BASE_ADDR(channel));
+       /* Mask interrupts by writing to CSR7 */
+       writel(DMA_CHAN_INTR_DEFAULT_MASK,
+              ioaddr + DMA_CHAN_INTR_ENA(chan));
 }
 
 static void dwmac4_dma_init(void __iomem *ioaddr,
@@ -108,7 +120,6 @@ static void dwmac4_dma_init(void __iomem *ioaddr,
                            u32 dma_tx, u32 dma_rx, int atds)
 {
        u32 value = readl(ioaddr + DMA_SYS_BUS_MODE);
-       int i;
 
        /* Set the Fixed burst mode */
        if (dma_cfg->fixed_burst)
@@ -122,9 +133,6 @@ static void dwmac4_dma_init(void __iomem *ioaddr,
                value |= DMA_SYS_BUS_AAL;
 
        writel(value, ioaddr + DMA_SYS_BUS_MODE);
-
-       for (i = 0; i < DMA_CHANNEL_NB_MAX; i++)
-               dwmac4_dma_init_channel(ioaddr, dma_cfg, dma_tx, dma_rx, i);
 }
 
 static void _dwmac4_dump_dma_regs(void __iomem *ioaddr, u32 channel,
@@ -174,46 +182,121 @@ static void dwmac4_dump_dma_regs(void __iomem *ioaddr, u32 *reg_space)
                _dwmac4_dump_dma_regs(ioaddr, i, reg_space);
 }
 
-static void dwmac4_rx_watchdog(void __iomem *ioaddr, u32 riwt)
+static void dwmac4_rx_watchdog(void __iomem *ioaddr, u32 riwt, u32 number_chan)
 {
-       int i;
+       u32 chan;
 
-       for (i = 0; i < DMA_CHANNEL_NB_MAX; i++)
-               writel(riwt, ioaddr + DMA_CHAN_RX_WATCHDOG(i));
+       for (chan = 0; chan < number_chan; chan++)
+               writel(riwt, ioaddr + DMA_CHAN_RX_WATCHDOG(chan));
 }
 
-static void dwmac4_dma_chan_op_mode(void __iomem *ioaddr, int txmode,
-                                   int rxmode, u32 channel)
+static void dwmac4_dma_rx_chan_op_mode(void __iomem *ioaddr, int mode,
+                                      u32 channel, int fifosz)
 {
-       u32 mtl_tx_op, mtl_rx_op, mtl_rx_int;
+       unsigned int rqs = fifosz / 256 - 1;
+       u32 mtl_rx_op, mtl_rx_int;
 
-       /* Following code only done for channel 0, other channels not yet
-        * supported.
-        */
-       mtl_tx_op = readl(ioaddr + MTL_CHAN_TX_OP_MODE(channel));
+       mtl_rx_op = readl(ioaddr + MTL_CHAN_RX_OP_MODE(channel));
+
+       if (mode == SF_DMA_MODE) {
+               pr_debug("GMAC: enable RX store and forward mode\n");
+               mtl_rx_op |= MTL_OP_MODE_RSF;
+       } else {
+               pr_debug("GMAC: disable RX SF mode (threshold %d)\n", mode);
+               mtl_rx_op &= ~MTL_OP_MODE_RSF;
+               mtl_rx_op &= MTL_OP_MODE_RTC_MASK;
+               if (mode <= 32)
+                       mtl_rx_op |= MTL_OP_MODE_RTC_32;
+               else if (mode <= 64)
+                       mtl_rx_op |= MTL_OP_MODE_RTC_64;
+               else if (mode <= 96)
+                       mtl_rx_op |= MTL_OP_MODE_RTC_96;
+               else
+                       mtl_rx_op |= MTL_OP_MODE_RTC_128;
+       }
+
+       mtl_rx_op &= ~MTL_OP_MODE_RQS_MASK;
+       mtl_rx_op |= rqs << MTL_OP_MODE_RQS_SHIFT;
+
+       /* enable flow control only if each channel gets 4 KiB or more FIFO */
+       if (fifosz >= 4096) {
+               unsigned int rfd, rfa;
+
+               mtl_rx_op |= MTL_OP_MODE_EHFC;
+
+               /* Set Threshold for Activating Flow Control to min 2 frames,
+                * i.e. 1500 * 2 = 3000 bytes.
+                *
+                * Set Threshold for Deactivating Flow Control to min 1 frame,
+                * i.e. 1500 bytes.
+                */
+               switch (fifosz) {
+               case 4096:
+                       /* This violates the above formula because of FIFO size
+                        * limit therefore overflow may occur in spite of this.
+                        */
+                       rfd = 0x03; /* Full-2.5K */
+                       rfa = 0x01; /* Full-1.5K */
+                       break;
+
+               case 8192:
+                       rfd = 0x06; /* Full-4K */
+                       rfa = 0x0a; /* Full-6K */
+                       break;
+
+               case 16384:
+                       rfd = 0x06; /* Full-4K */
+                       rfa = 0x12; /* Full-10K */
+                       break;
+
+               default:
+                       rfd = 0x06; /* Full-4K */
+                       rfa = 0x1e; /* Full-16K */
+                       break;
+               }
+
+               mtl_rx_op &= ~MTL_OP_MODE_RFD_MASK;
+               mtl_rx_op |= rfd << MTL_OP_MODE_RFD_SHIFT;
 
-       if (txmode == SF_DMA_MODE) {
+               mtl_rx_op &= ~MTL_OP_MODE_RFA_MASK;
+               mtl_rx_op |= rfa << MTL_OP_MODE_RFA_SHIFT;
+       }
+
+       writel(mtl_rx_op, ioaddr + MTL_CHAN_RX_OP_MODE(channel));
+
+       /* Enable MTL RX overflow */
+       mtl_rx_int = readl(ioaddr + MTL_CHAN_INT_CTRL(channel));
+       writel(mtl_rx_int | MTL_RX_OVERFLOW_INT_EN,
+              ioaddr + MTL_CHAN_INT_CTRL(channel));
+}
+
+static void dwmac4_dma_tx_chan_op_mode(void __iomem *ioaddr, int mode,
+                                      u32 channel)
+{
+       u32 mtl_tx_op = readl(ioaddr + MTL_CHAN_TX_OP_MODE(channel));
+
+       if (mode == SF_DMA_MODE) {
                pr_debug("GMAC: enable TX store and forward mode\n");
                /* Transmit COE type 2 cannot be done in cut-through mode. */
                mtl_tx_op |= MTL_OP_MODE_TSF;
        } else {
-               pr_debug("GMAC: disabling TX SF (threshold %d)\n", txmode);
+               pr_debug("GMAC: disabling TX SF (threshold %d)\n", mode);
                mtl_tx_op &= ~MTL_OP_MODE_TSF;
                mtl_tx_op &= MTL_OP_MODE_TTC_MASK;
                /* Set the transmit threshold */
-               if (txmode <= 32)
+               if (mode <= 32)
                        mtl_tx_op |= MTL_OP_MODE_TTC_32;
-               else if (txmode <= 64)
+               else if (mode <= 64)
                        mtl_tx_op |= MTL_OP_MODE_TTC_64;
-               else if (txmode <= 96)
+               else if (mode <= 96)
                        mtl_tx_op |= MTL_OP_MODE_TTC_96;
-               else if (txmode <= 128)
+               else if (mode <= 128)
                        mtl_tx_op |= MTL_OP_MODE_TTC_128;
-               else if (txmode <= 192)
+               else if (mode <= 192)
                        mtl_tx_op |= MTL_OP_MODE_TTC_192;
-               else if (txmode <= 256)
+               else if (mode <= 256)
                        mtl_tx_op |= MTL_OP_MODE_TTC_256;
-               else if (txmode <= 384)
+               else if (mode <= 384)
                        mtl_tx_op |= MTL_OP_MODE_TTC_384;
                else
                        mtl_tx_op |= MTL_OP_MODE_TTC_512;
@@ -230,39 +313,6 @@ static void dwmac4_dma_chan_op_mode(void __iomem *ioaddr, int txmode,
         */
        mtl_tx_op |= MTL_OP_MODE_TXQEN | MTL_OP_MODE_TQS_MASK;
        writel(mtl_tx_op, ioaddr +  MTL_CHAN_TX_OP_MODE(channel));
-
-       mtl_rx_op = readl(ioaddr + MTL_CHAN_RX_OP_MODE(channel));
-
-       if (rxmode == SF_DMA_MODE) {
-               pr_debug("GMAC: enable RX store and forward mode\n");
-               mtl_rx_op |= MTL_OP_MODE_RSF;
-       } else {
-               pr_debug("GMAC: disable RX SF mode (threshold %d)\n", rxmode);
-               mtl_rx_op &= ~MTL_OP_MODE_RSF;
-               mtl_rx_op &= MTL_OP_MODE_RTC_MASK;
-               if (rxmode <= 32)
-                       mtl_rx_op |= MTL_OP_MODE_RTC_32;
-               else if (rxmode <= 64)
-                       mtl_rx_op |= MTL_OP_MODE_RTC_64;
-               else if (rxmode <= 96)
-                       mtl_rx_op |= MTL_OP_MODE_RTC_96;
-               else
-                       mtl_rx_op |= MTL_OP_MODE_RTC_128;
-       }
-
-       writel(mtl_rx_op, ioaddr + MTL_CHAN_RX_OP_MODE(channel));
-
-       /* Enable MTL RX overflow */
-       mtl_rx_int = readl(ioaddr + MTL_CHAN_INT_CTRL(channel));
-       writel(mtl_rx_int | MTL_RX_OVERFLOW_INT_EN,
-              ioaddr + MTL_CHAN_INT_CTRL(channel));
-}
-
-static void dwmac4_dma_operation_mode(void __iomem *ioaddr, int txmode,
-                                     int rxmode, int rxfifosz)
-{
-       /* Only Channel 0 is actually configured and used */
-       dwmac4_dma_chan_op_mode(ioaddr, txmode, rxmode, 0);
 }
 
 static void dwmac4_get_hw_feature(void __iomem *ioaddr,
@@ -294,6 +344,11 @@ static void dwmac4_get_hw_feature(void __iomem *ioaddr,
        hw_cap = readl(ioaddr + GMAC_HW_FEATURE1);
        dma_cap->av = (hw_cap & GMAC_HW_FEAT_AVSEL) >> 20;
        dma_cap->tsoen = (hw_cap & GMAC_HW_TSOEN) >> 18;
+       /* RX and TX FIFO sizes are encoded as log2(n / 128). Undo that by
+        * shifting and store the sizes in bytes.
+        */
+       dma_cap->tx_fifo_size = 128 << ((hw_cap & GMAC_HW_TXFIFOSIZE) >> 6);
+       dma_cap->rx_fifo_size = 128 << ((hw_cap & GMAC_HW_RXFIFOSIZE) >> 0);
        /* MAC HW feature2 */
        hw_cap = readl(ioaddr + GMAC_HW_FEATURE2);
        /* TX and RX number of channels */
@@ -332,9 +387,13 @@ static void dwmac4_enable_tso(void __iomem *ioaddr, bool en, u32 chan)
 const struct stmmac_dma_ops dwmac4_dma_ops = {
        .reset = dwmac4_dma_reset,
        .init = dwmac4_dma_init,
+       .init_chan = dwmac4_dma_init_channel,
+       .init_rx_chan = dwmac4_dma_init_rx_chan,
+       .init_tx_chan = dwmac4_dma_init_tx_chan,
        .axi = dwmac4_dma_axi,
        .dump_regs = dwmac4_dump_dma_regs,
-       .dma_mode = dwmac4_dma_operation_mode,
+       .dma_rx_mode = dwmac4_dma_rx_chan_op_mode,
+       .dma_tx_mode = dwmac4_dma_tx_chan_op_mode,
        .enable_dma_irq = dwmac4_enable_dma_irq,
        .disable_dma_irq = dwmac4_disable_dma_irq,
        .start_tx = dwmac4_dma_start_tx,
@@ -354,9 +413,13 @@ const struct stmmac_dma_ops dwmac4_dma_ops = {
 const struct stmmac_dma_ops dwmac410_dma_ops = {
        .reset = dwmac4_dma_reset,
        .init = dwmac4_dma_init,
+       .init_chan = dwmac4_dma_init_channel,
+       .init_rx_chan = dwmac4_dma_init_rx_chan,
+       .init_tx_chan = dwmac4_dma_init_tx_chan,
        .axi = dwmac4_dma_axi,
        .dump_regs = dwmac4_dump_dma_regs,
-       .dma_mode = dwmac4_dma_operation_mode,
+       .dma_rx_mode = dwmac4_dma_rx_chan_op_mode,
+       .dma_tx_mode = dwmac4_dma_tx_chan_op_mode,
        .enable_dma_irq = dwmac410_enable_dma_irq,
        .disable_dma_irq = dwmac4_disable_dma_irq,
        .start_tx = dwmac4_dma_start_tx,
index 1b06df749e2bbab63c25dbbc9b4ef814fc9d5d46..8474bf961dd0c60a409ba4c1201557cdef7580dc 100644 (file)
 
 int dwmac4_dma_reset(void __iomem *ioaddr);
 void dwmac4_enable_dma_transmission(void __iomem *ioaddr, u32 tail_ptr);
-void dwmac4_enable_dma_irq(void __iomem *ioaddr);
-void dwmac410_enable_dma_irq(void __iomem *ioaddr);
-void dwmac4_disable_dma_irq(void __iomem *ioaddr);
-void dwmac4_dma_start_tx(void __iomem *ioaddr);
-void dwmac4_dma_stop_tx(void __iomem *ioaddr);
-void dwmac4_dma_start_rx(void __iomem *ioaddr);
-void dwmac4_dma_stop_rx(void __iomem *ioaddr);
+void dwmac4_enable_dma_irq(void __iomem *ioaddr, u32 chan);
+void dwmac410_enable_dma_irq(void __iomem *ioaddr, u32 chan);
+void dwmac4_disable_dma_irq(void __iomem *ioaddr, u32 chan);
+void dwmac4_dma_start_tx(void __iomem *ioaddr, u32 chan);
+void dwmac4_dma_stop_tx(void __iomem *ioaddr, u32 chan);
+void dwmac4_dma_start_rx(void __iomem *ioaddr, u32 chan);
+void dwmac4_dma_stop_rx(void __iomem *ioaddr, u32 chan);
 int dwmac4_dma_interrupt(void __iomem *ioaddr,
-                        struct stmmac_extra_stats *x);
-void dwmac4_set_rx_ring_len(void __iomem *ioaddr, u32 len);
-void dwmac4_set_tx_ring_len(void __iomem *ioaddr, u32 len);
+                        struct stmmac_extra_stats *x, u32 chan);
+void dwmac4_set_rx_ring_len(void __iomem *ioaddr, u32 len, u32 chan);
+void dwmac4_set_tx_ring_len(void __iomem *ioaddr, u32 len, u32 chan);
 void dwmac4_set_rx_tail_ptr(void __iomem *ioaddr, u32 tail_ptr, u32 chan);
 void dwmac4_set_tx_tail_ptr(void __iomem *ioaddr, u32 tail_ptr, u32 chan);
 
index c7326d5b2f432b00ea35042097adfe6d92063441..49f5687879df241f01ccf7d7befa3ac7dfd8f1dd 100644 (file)
@@ -37,96 +37,96 @@ int dwmac4_dma_reset(void __iomem *ioaddr)
 
 void dwmac4_set_rx_tail_ptr(void __iomem *ioaddr, u32 tail_ptr, u32 chan)
 {
-       writel(tail_ptr, ioaddr + DMA_CHAN_RX_END_ADDR(0));
+       writel(tail_ptr, ioaddr + DMA_CHAN_RX_END_ADDR(chan));
 }
 
 void dwmac4_set_tx_tail_ptr(void __iomem *ioaddr, u32 tail_ptr, u32 chan)
 {
-       writel(tail_ptr, ioaddr + DMA_CHAN_TX_END_ADDR(0));
+       writel(tail_ptr, ioaddr + DMA_CHAN_TX_END_ADDR(chan));
 }
 
-void dwmac4_dma_start_tx(void __iomem *ioaddr)
+void dwmac4_dma_start_tx(void __iomem *ioaddr, u32 chan)
 {
-       u32 value = readl(ioaddr + DMA_CHAN_TX_CONTROL(STMMAC_CHAN0));
+       u32 value = readl(ioaddr + DMA_CHAN_TX_CONTROL(chan));
 
        value |= DMA_CONTROL_ST;
-       writel(value, ioaddr + DMA_CHAN_TX_CONTROL(STMMAC_CHAN0));
+       writel(value, ioaddr + DMA_CHAN_TX_CONTROL(chan));
 
        value = readl(ioaddr + GMAC_CONFIG);
        value |= GMAC_CONFIG_TE;
        writel(value, ioaddr + GMAC_CONFIG);
 }
 
-void dwmac4_dma_stop_tx(void __iomem *ioaddr)
+void dwmac4_dma_stop_tx(void __iomem *ioaddr, u32 chan)
 {
-       u32 value = readl(ioaddr + DMA_CHAN_TX_CONTROL(STMMAC_CHAN0));
+       u32 value = readl(ioaddr + DMA_CHAN_TX_CONTROL(chan));
 
        value &= ~DMA_CONTROL_ST;
-       writel(value, ioaddr + DMA_CHAN_TX_CONTROL(STMMAC_CHAN0));
+       writel(value, ioaddr + DMA_CHAN_TX_CONTROL(chan));
 
        value = readl(ioaddr + GMAC_CONFIG);
        value &= ~GMAC_CONFIG_TE;
        writel(value, ioaddr + GMAC_CONFIG);
 }
 
-void dwmac4_dma_start_rx(void __iomem *ioaddr)
+void dwmac4_dma_start_rx(void __iomem *ioaddr, u32 chan)
 {
-       u32 value = readl(ioaddr + DMA_CHAN_RX_CONTROL(STMMAC_CHAN0));
+       u32 value = readl(ioaddr + DMA_CHAN_RX_CONTROL(chan));
 
        value |= DMA_CONTROL_SR;
 
-       writel(value, ioaddr + DMA_CHAN_RX_CONTROL(STMMAC_CHAN0));
+       writel(value, ioaddr + DMA_CHAN_RX_CONTROL(chan));
 
        value = readl(ioaddr + GMAC_CONFIG);
        value |= GMAC_CONFIG_RE;
        writel(value, ioaddr + GMAC_CONFIG);
 }
 
-void dwmac4_dma_stop_rx(void __iomem *ioaddr)
+void dwmac4_dma_stop_rx(void __iomem *ioaddr, u32 chan)
 {
-       u32 value = readl(ioaddr + DMA_CHAN_RX_CONTROL(STMMAC_CHAN0));
+       u32 value = readl(ioaddr + DMA_CHAN_RX_CONTROL(chan));
 
        value &= ~DMA_CONTROL_SR;
-       writel(value, ioaddr + DMA_CHAN_RX_CONTROL(STMMAC_CHAN0));
+       writel(value, ioaddr + DMA_CHAN_RX_CONTROL(chan));
 
        value = readl(ioaddr + GMAC_CONFIG);
        value &= ~GMAC_CONFIG_RE;
        writel(value, ioaddr + GMAC_CONFIG);
 }
 
-void dwmac4_set_tx_ring_len(void __iomem *ioaddr, u32 len)
+void dwmac4_set_tx_ring_len(void __iomem *ioaddr, u32 len, u32 chan)
 {
-       writel(len, ioaddr + DMA_CHAN_TX_RING_LEN(STMMAC_CHAN0));
+       writel(len, ioaddr + DMA_CHAN_TX_RING_LEN(chan));
 }
 
-void dwmac4_set_rx_ring_len(void __iomem *ioaddr, u32 len)
+void dwmac4_set_rx_ring_len(void __iomem *ioaddr, u32 len, u32 chan)
 {
-       writel(len, ioaddr + DMA_CHAN_RX_RING_LEN(STMMAC_CHAN0));
+       writel(len, ioaddr + DMA_CHAN_RX_RING_LEN(chan));
 }
 
-void dwmac4_enable_dma_irq(void __iomem *ioaddr)
+void dwmac4_enable_dma_irq(void __iomem *ioaddr, u32 chan)
 {
        writel(DMA_CHAN_INTR_DEFAULT_MASK, ioaddr +
-              DMA_CHAN_INTR_ENA(STMMAC_CHAN0));
+              DMA_CHAN_INTR_ENA(chan));
 }
 
-void dwmac410_enable_dma_irq(void __iomem *ioaddr)
+void dwmac410_enable_dma_irq(void __iomem *ioaddr, u32 chan)
 {
        writel(DMA_CHAN_INTR_DEFAULT_MASK_4_10,
-              ioaddr + DMA_CHAN_INTR_ENA(STMMAC_CHAN0));
+              ioaddr + DMA_CHAN_INTR_ENA(chan));
 }
 
-void dwmac4_disable_dma_irq(void __iomem *ioaddr)
+void dwmac4_disable_dma_irq(void __iomem *ioaddr, u32 chan)
 {
-       writel(0, ioaddr + DMA_CHAN_INTR_ENA(STMMAC_CHAN0));
+       writel(0, ioaddr + DMA_CHAN_INTR_ENA(chan));
 }
 
 int dwmac4_dma_interrupt(void __iomem *ioaddr,
-                        struct stmmac_extra_stats *x)
+                        struct stmmac_extra_stats *x, u32 chan)
 {
        int ret = 0;
 
-       u32 intr_status = readl(ioaddr + DMA_CHAN_STATUS(0));
+       u32 intr_status = readl(ioaddr + DMA_CHAN_STATUS(chan));
 
        /* ABNORMAL interrupts */
        if (unlikely(intr_status & DMA_CHAN_STATUS_AIS)) {
@@ -153,7 +153,7 @@ int dwmac4_dma_interrupt(void __iomem *ioaddr,
                if (likely(intr_status & DMA_CHAN_STATUS_RI)) {
                        u32 value;
 
-                       value = readl(ioaddr + DMA_CHAN_INTR_ENA(STMMAC_CHAN0));
+                       value = readl(ioaddr + DMA_CHAN_INTR_ENA(chan));
                        /* to schedule NAPI on real RIE event. */
                        if (likely(value & DMA_CHAN_INTR_ENA_RIE)) {
                                x->rx_normal_irq_n++;
@@ -172,7 +172,7 @@ int dwmac4_dma_interrupt(void __iomem *ioaddr,
         * status [21-0] expect reserved bits [5-3]
         */
        writel((intr_status & 0x3fffc7),
-              ioaddr + DMA_CHAN_STATUS(STMMAC_CHAN0));
+              ioaddr + DMA_CHAN_STATUS(chan));
 
        return ret;
 }
index 56e485f79077374a9e19859a7953b3f18f5c42f3..9091df86723a3988075cbda535d5d6ba21826b7b 100644 (file)
 #define DMA_CONTROL_FTF                0x00100000      /* Flush transmit FIFO */
 
 void dwmac_enable_dma_transmission(void __iomem *ioaddr);
-void dwmac_enable_dma_irq(void __iomem *ioaddr);
-void dwmac_disable_dma_irq(void __iomem *ioaddr);
-void dwmac_dma_start_tx(void __iomem *ioaddr);
-void dwmac_dma_stop_tx(void __iomem *ioaddr);
-void dwmac_dma_start_rx(void __iomem *ioaddr);
-void dwmac_dma_stop_rx(void __iomem *ioaddr);
-int dwmac_dma_interrupt(void __iomem *ioaddr, struct stmmac_extra_stats *x);
+void dwmac_enable_dma_irq(void __iomem *ioaddr, u32 chan);
+void dwmac_disable_dma_irq(void __iomem *ioaddr, u32 chan);
+void dwmac_dma_start_tx(void __iomem *ioaddr, u32 chan);
+void dwmac_dma_stop_tx(void __iomem *ioaddr, u32 chan);
+void dwmac_dma_start_rx(void __iomem *ioaddr, u32 chan);
+void dwmac_dma_stop_rx(void __iomem *ioaddr, u32 chan);
+int dwmac_dma_interrupt(void __iomem *ioaddr, struct stmmac_extra_stats *x,
+                       u32 chan);
 int dwmac_dma_reset(void __iomem *ioaddr);
 
 #endif /* __DWMAC_DMA_H__ */
index e60bfca2a763325880215bab4592d9dbe5056fbb..38f94305aab53116a74d533728d25cb750da66a1 100644 (file)
@@ -47,38 +47,38 @@ void dwmac_enable_dma_transmission(void __iomem *ioaddr)
        writel(1, ioaddr + DMA_XMT_POLL_DEMAND);
 }
 
-void dwmac_enable_dma_irq(void __iomem *ioaddr)
+void dwmac_enable_dma_irq(void __iomem *ioaddr, u32 chan)
 {
        writel(DMA_INTR_DEFAULT_MASK, ioaddr + DMA_INTR_ENA);
 }
 
-void dwmac_disable_dma_irq(void __iomem *ioaddr)
+void dwmac_disable_dma_irq(void __iomem *ioaddr, u32 chan)
 {
        writel(0, ioaddr + DMA_INTR_ENA);
 }
 
-void dwmac_dma_start_tx(void __iomem *ioaddr)
+void dwmac_dma_start_tx(void __iomem *ioaddr, u32 chan)
 {
        u32 value = readl(ioaddr + DMA_CONTROL);
        value |= DMA_CONTROL_ST;
        writel(value, ioaddr + DMA_CONTROL);
 }
 
-void dwmac_dma_stop_tx(void __iomem *ioaddr)
+void dwmac_dma_stop_tx(void __iomem *ioaddr, u32 chan)
 {
        u32 value = readl(ioaddr + DMA_CONTROL);
        value &= ~DMA_CONTROL_ST;
        writel(value, ioaddr + DMA_CONTROL);
 }
 
-void dwmac_dma_start_rx(void __iomem *ioaddr)
+void dwmac_dma_start_rx(void __iomem *ioaddr, u32 chan)
 {
        u32 value = readl(ioaddr + DMA_CONTROL);
        value |= DMA_CONTROL_SR;
        writel(value, ioaddr + DMA_CONTROL);
 }
 
-void dwmac_dma_stop_rx(void __iomem *ioaddr)
+void dwmac_dma_stop_rx(void __iomem *ioaddr, u32 chan)
 {
        u32 value = readl(ioaddr + DMA_CONTROL);
        value &= ~DMA_CONTROL_SR;
@@ -156,7 +156,7 @@ static void show_rx_process_state(unsigned int status)
 #endif
 
 int dwmac_dma_interrupt(void __iomem *ioaddr,
-                       struct stmmac_extra_stats *x)
+                       struct stmmac_extra_stats *x, u32 chan)
 {
        int ret = 0;
        /* read the status register (CSR5) */
index 85d64114e159e6d76a03fe5cca839fb246a6e0e6..16808e48ca1cf7bbd4237967ce0497caffc52808 100644 (file)
@@ -481,6 +481,7 @@ stmmac_set_pauseparam(struct net_device *netdev,
                      struct ethtool_pauseparam *pause)
 {
        struct stmmac_priv *priv = netdev_priv(netdev);
+       u32 tx_cnt = priv->plat->tx_queues_to_use;
        struct phy_device *phy = netdev->phydev;
        int new_pause = FLOW_OFF;
 
@@ -511,7 +512,7 @@ stmmac_set_pauseparam(struct net_device *netdev,
        }
 
        priv->hw->mac->flow_ctrl(priv->hw, phy->duplex, priv->flow_ctrl,
-                                priv->pause);
+                                priv->pause, tx_cnt);
        return 0;
 }
 
@@ -519,6 +520,8 @@ static void stmmac_get_ethtool_stats(struct net_device *dev,
                                 struct ethtool_stats *dummy, u64 *data)
 {
        struct stmmac_priv *priv = netdev_priv(dev);
+       u32 rx_queues_count = priv->plat->rx_queues_to_use;
+       u32 tx_queues_count = priv->plat->tx_queues_to_use;
        int i, j = 0;
 
        /* Update the DMA HW counters for dwmac10/100 */
@@ -549,7 +552,8 @@ static void stmmac_get_ethtool_stats(struct net_device *dev,
                if ((priv->hw->mac->debug) &&
                    (priv->synopsys_id >= DWMAC_CORE_3_50))
                        priv->hw->mac->debug(priv->ioaddr,
-                                            (void *)&priv->xstats);
+                                            (void *)&priv->xstats,
+                                            rx_queues_count, tx_queues_count);
        }
        for (i = 0; i < STMMAC_STATS_LEN; i++) {
                char *p = (char *)priv + stmmac_gstrings_stats[i].stat_offset;
@@ -726,6 +730,7 @@ static int stmmac_set_coalesce(struct net_device *dev,
                               struct ethtool_coalesce *ec)
 {
        struct stmmac_priv *priv = netdev_priv(dev);
+       u32 rx_cnt = priv->plat->rx_queues_to_use;
        unsigned int rx_riwt;
 
        /* Check not supported parameters  */
@@ -764,7 +769,7 @@ static int stmmac_set_coalesce(struct net_device *dev,
        priv->tx_coal_frames = ec->tx_max_coalesced_frames;
        priv->tx_coal_timer = ec->tx_coalesce_usecs;
        priv->rx_riwt = rx_riwt;
-       priv->hw->dma->rx_watchdog(priv->ioaddr, priv->rx_riwt);
+       priv->hw->dma->rx_watchdog(priv->ioaddr, priv->rx_riwt, rx_cnt);
 
        return 0;
 }
index 4498a3861aa3ad09460e922bd7f38e3506889dcb..c1c63197ff73e32eb62e45b049f30b0634e4cdfe 100644 (file)
@@ -672,6 +672,19 @@ static void stmmac_release_ptp(struct stmmac_priv *priv)
        stmmac_ptp_unregister(priv);
 }
 
+/**
+ *  stmmac_mac_flow_ctrl - Configure flow control in all queues
+ *  @priv: driver private structure
+ *  Description: It is used for configuring the flow control in all queues
+ */
+static void stmmac_mac_flow_ctrl(struct stmmac_priv *priv, u32 duplex)
+{
+       u32 tx_cnt = priv->plat->tx_queues_to_use;
+
+       priv->hw->mac->flow_ctrl(priv->hw, duplex, priv->flow_ctrl,
+                                priv->pause, tx_cnt);
+}
+
 /**
  * stmmac_adjust_link - adjusts the link parameters
  * @dev: net device structure
@@ -687,7 +700,6 @@ static void stmmac_adjust_link(struct net_device *dev)
        struct phy_device *phydev = dev->phydev;
        unsigned long flags;
        int new_state = 0;
-       unsigned int fc = priv->flow_ctrl, pause_time = priv->pause;
 
        if (!phydev)
                return;
@@ -709,8 +721,7 @@ static void stmmac_adjust_link(struct net_device *dev)
                }
                /* Flow Control operation */
                if (phydev->pause)
-                       priv->hw->mac->flow_ctrl(priv->hw, phydev->duplex,
-                                                fc, pause_time);
+                       stmmac_mac_flow_ctrl(priv, phydev->duplex);
 
                if (phydev->speed != priv->speed) {
                        new_state = 1;
@@ -1256,19 +1267,104 @@ static void free_dma_desc_resources(struct stmmac_priv *priv)
  */
 static void stmmac_mac_enable_rx_queues(struct stmmac_priv *priv)
 {
-       int rx_count = priv->dma_cap.number_rx_queues;
-       int queue = 0;
+       u32 rx_queues_count = priv->plat->rx_queues_to_use;
+       int queue;
+       u8 mode;
 
-       /* If GMAC does not have multiple queues, then this is not necessary*/
-       if (rx_count == 1)
-               return;
+       for (queue = 0; queue < rx_queues_count; queue++) {
+               mode = priv->plat->rx_queues_cfg[queue].mode_to_use;
+               priv->hw->mac->rx_queue_enable(priv->hw, mode, queue);
+       }
+}
 
-       /**
-        *  If the core is synthesized with multiple rx queues / multiple
-        *  dma channels, then rx queues will be disabled by default.
-        *  For now only rx queue 0 is enabled.
-        */
-       priv->hw->mac->rx_queue_enable(priv->hw, queue);
+/**
+ * stmmac_start_rx_dma - start RX DMA channel
+ * @priv: driver private structure
+ * @chan: RX channel index
+ * Description:
+ * This starts a RX DMA channel
+ */
+static void stmmac_start_rx_dma(struct stmmac_priv *priv, u32 chan)
+{
+       netdev_dbg(priv->dev, "DMA RX processes started in channel %d\n", chan);
+       priv->hw->dma->start_rx(priv->ioaddr, chan);
+}
+
+/**
+ * stmmac_start_tx_dma - start TX DMA channel
+ * @priv: driver private structure
+ * @chan: TX channel index
+ * Description:
+ * This starts a TX DMA channel
+ */
+static void stmmac_start_tx_dma(struct stmmac_priv *priv, u32 chan)
+{
+       netdev_dbg(priv->dev, "DMA TX processes started in channel %d\n", chan);
+       priv->hw->dma->start_tx(priv->ioaddr, chan);
+}
+
+/**
+ * stmmac_stop_rx_dma - stop RX DMA channel
+ * @priv: driver private structure
+ * @chan: RX channel index
+ * Description:
+ * This stops a RX DMA channel
+ */
+static void stmmac_stop_rx_dma(struct stmmac_priv *priv, u32 chan)
+{
+       netdev_dbg(priv->dev, "DMA RX processes stopped in channel %d\n", chan);
+       priv->hw->dma->stop_rx(priv->ioaddr, chan);
+}
+
+/**
+ * stmmac_stop_tx_dma - stop TX DMA channel
+ * @priv: driver private structure
+ * @chan: TX channel index
+ * Description:
+ * This stops a TX DMA channel
+ */
+static void stmmac_stop_tx_dma(struct stmmac_priv *priv, u32 chan)
+{
+       netdev_dbg(priv->dev, "DMA TX processes stopped in channel %d\n", chan);
+       priv->hw->dma->stop_tx(priv->ioaddr, chan);
+}
+
+/**
+ * stmmac_start_all_dma - start all RX and TX DMA channels
+ * @priv: driver private structure
+ * Description:
+ * This starts all the RX and TX DMA channels
+ */
+static void stmmac_start_all_dma(struct stmmac_priv *priv)
+{
+       u32 rx_channels_count = priv->plat->rx_queues_to_use;
+       u32 tx_channels_count = priv->plat->tx_queues_to_use;
+       u32 chan = 0;
+
+       for (chan = 0; chan < rx_channels_count; chan++)
+               stmmac_start_rx_dma(priv, chan);
+
+       for (chan = 0; chan < tx_channels_count; chan++)
+               stmmac_start_tx_dma(priv, chan);
+}
+
+/**
+ * stmmac_stop_all_dma - stop all RX and TX DMA channels
+ * @priv: driver private structure
+ * Description:
+ * This stops the RX and TX DMA channels
+ */
+static void stmmac_stop_all_dma(struct stmmac_priv *priv)
+{
+       u32 rx_channels_count = priv->plat->rx_queues_to_use;
+       u32 tx_channels_count = priv->plat->tx_queues_to_use;
+       u32 chan = 0;
+
+       for (chan = 0; chan < rx_channels_count; chan++)
+               stmmac_stop_rx_dma(priv, chan);
+
+       for (chan = 0; chan < tx_channels_count; chan++)
+               stmmac_stop_tx_dma(priv, chan);
 }
 
 /**
@@ -1279,11 +1375,20 @@ static void stmmac_mac_enable_rx_queues(struct stmmac_priv *priv)
  */
 static void stmmac_dma_operation_mode(struct stmmac_priv *priv)
 {
+       u32 rx_channels_count = priv->plat->rx_queues_to_use;
+       u32 tx_channels_count = priv->plat->tx_queues_to_use;
        int rxfifosz = priv->plat->rx_fifo_size;
+       u32 txmode = 0;
+       u32 rxmode = 0;
+       u32 chan = 0;
+
+       if (rxfifosz == 0)
+               rxfifosz = priv->dma_cap.rx_fifo_size;
 
-       if (priv->plat->force_thresh_dma_mode)
-               priv->hw->dma->dma_mode(priv->ioaddr, tc, tc, rxfifosz);
-       else if (priv->plat->force_sf_dma_mode || priv->plat->tx_coe) {
+       if (priv->plat->force_thresh_dma_mode) {
+               txmode = tc;
+               rxmode = tc;
+       } else if (priv->plat->force_sf_dma_mode || priv->plat->tx_coe) {
                /*
                 * In case of GMAC, SF mode can be enabled
                 * to perform the TX COE in HW. This depends on:
@@ -1291,12 +1396,26 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv)
                 * 2) There is no bugged Jumbo frame support
                 *    that needs to not insert csum in the TDES.
                 */
-               priv->hw->dma->dma_mode(priv->ioaddr, SF_DMA_MODE, SF_DMA_MODE,
-                                       rxfifosz);
+               txmode = SF_DMA_MODE;
+               rxmode = SF_DMA_MODE;
                priv->xstats.threshold = SF_DMA_MODE;
-       } else
-               priv->hw->dma->dma_mode(priv->ioaddr, tc, SF_DMA_MODE,
+       } else {
+               txmode = tc;
+               rxmode = SF_DMA_MODE;
+       }
+
+       /* configure all channels */
+       if (priv->synopsys_id >= DWMAC_CORE_4_00) {
+               for (chan = 0; chan < rx_channels_count; chan++)
+                       priv->hw->dma->dma_rx_mode(priv->ioaddr, rxmode, chan,
+                                                  rxfifosz);
+
+               for (chan = 0; chan < tx_channels_count; chan++)
+                       priv->hw->dma->dma_tx_mode(priv->ioaddr, txmode, chan);
+       } else {
+               priv->hw->dma->dma_mode(priv->ioaddr, txmode, rxmode,
                                        rxfifosz);
+       }
 }
 
 /**
@@ -1393,28 +1512,29 @@ static void stmmac_tx_clean(struct stmmac_priv *priv)
        netif_tx_unlock(priv->dev);
 }
 
-static inline void stmmac_enable_dma_irq(struct stmmac_priv *priv)
+static inline void stmmac_enable_dma_irq(struct stmmac_priv *priv, u32 chan)
 {
-       priv->hw->dma->enable_dma_irq(priv->ioaddr);
+       priv->hw->dma->enable_dma_irq(priv->ioaddr, chan);
 }
 
-static inline void stmmac_disable_dma_irq(struct stmmac_priv *priv)
+static inline void stmmac_disable_dma_irq(struct stmmac_priv *priv, u32 chan)
 {
-       priv->hw->dma->disable_dma_irq(priv->ioaddr);
+       priv->hw->dma->disable_dma_irq(priv->ioaddr, chan);
 }
 
 /**
  * stmmac_tx_err - to manage the tx error
  * @priv: driver private structure
+ * @chan: channel index
  * Description: it cleans the descriptors and restarts the transmission
  * in case of transmission errors.
  */
-static void stmmac_tx_err(struct stmmac_priv *priv)
+static void stmmac_tx_err(struct stmmac_priv *priv, u32 chan)
 {
        int i;
        netif_stop_queue(priv->dev);
 
-       priv->hw->dma->stop_tx(priv->ioaddr);
+       stmmac_stop_tx_dma(priv, chan);
        dma_free_tx_skbufs(priv);
        for (i = 0; i < DMA_TX_SIZE; i++)
                if (priv->extend_desc)
@@ -1428,12 +1548,40 @@ static void stmmac_tx_err(struct stmmac_priv *priv)
        priv->dirty_tx = 0;
        priv->cur_tx = 0;
        netdev_reset_queue(priv->dev);
-       priv->hw->dma->start_tx(priv->ioaddr);
+       stmmac_start_tx_dma(priv, chan);
 
        priv->dev->stats.tx_errors++;
        netif_wake_queue(priv->dev);
 }
 
+/**
+ *  stmmac_set_dma_operation_mode - Set DMA operation mode by channel
+ *  @priv: driver private structure
+ *  @txmode: TX operating mode
+ *  @rxmode: RX operating mode
+ *  @chan: channel index
+ *  Description: it is used for configuring of the DMA operation mode in
+ *  runtime in order to program the tx/rx DMA thresholds or Store-And-Forward
+ *  mode.
+ */
+static void stmmac_set_dma_operation_mode(struct stmmac_priv *priv, u32 txmode,
+                                         u32 rxmode, u32 chan)
+{
+       int rxfifosz = priv->plat->rx_fifo_size;
+
+       if (rxfifosz == 0)
+               rxfifosz = priv->dma_cap.rx_fifo_size;
+
+       if (priv->synopsys_id >= DWMAC_CORE_4_00) {
+               priv->hw->dma->dma_rx_mode(priv->ioaddr, rxmode, chan,
+                                          rxfifosz);
+               priv->hw->dma->dma_tx_mode(priv->ioaddr, txmode, chan);
+       } else {
+               priv->hw->dma->dma_mode(priv->ioaddr, txmode, rxmode,
+                                       rxfifosz);
+       }
+}
+
 /**
  * stmmac_dma_interrupt - DMA ISR
  * @priv: driver private structure
@@ -1443,31 +1591,41 @@ static void stmmac_tx_err(struct stmmac_priv *priv)
  */
 static void stmmac_dma_interrupt(struct stmmac_priv *priv)
 {
+       u32 tx_channel_count = priv->plat->tx_queues_to_use;
        int status;
-       int rxfifosz = priv->plat->rx_fifo_size;
+       u32 chan;
+
+       for (chan = 0; chan < tx_channel_count; chan++) {
+               status = priv->hw->dma->dma_interrupt(priv->ioaddr,
+                                                     &priv->xstats, chan);
+               if (likely((status & handle_rx)) || (status & handle_tx)) {
+                       if (likely(napi_schedule_prep(&priv->napi))) {
+                               stmmac_disable_dma_irq(priv, chan);
+                               __napi_schedule(&priv->napi);
+                       }
+               }
 
-       status = priv->hw->dma->dma_interrupt(priv->ioaddr, &priv->xstats);
-       if (likely((status & handle_rx)) || (status & handle_tx)) {
-               if (likely(napi_schedule_prep(&priv->napi))) {
-                       stmmac_disable_dma_irq(priv);
-                       __napi_schedule(&priv->napi);
+               if (unlikely(status & tx_hard_error_bump_tc)) {
+                       /* Try to bump up the dma threshold on this failure */
+                       if (unlikely(priv->xstats.threshold != SF_DMA_MODE) &&
+                           (tc <= 256)) {
+                               tc += 64;
+                               if (priv->plat->force_thresh_dma_mode)
+                                       stmmac_set_dma_operation_mode(priv,
+                                                                     tc,
+                                                                     tc,
+                                                                     chan);
+                               else
+                                       stmmac_set_dma_operation_mode(priv,
+                                                                   tc,
+                                                                   SF_DMA_MODE,
+                                                                   chan);
+                               priv->xstats.threshold = tc;
+                       }
+               } else if (unlikely(status == tx_hard_error)) {
+                       stmmac_tx_err(priv, chan);
                }
        }
-       if (unlikely(status & tx_hard_error_bump_tc)) {
-               /* Try to bump up the dma threshold on this failure */
-               if (unlikely(priv->xstats.threshold != SF_DMA_MODE) &&
-                   (tc <= 256)) {
-                       tc += 64;
-                       if (priv->plat->force_thresh_dma_mode)
-                               priv->hw->dma->dma_mode(priv->ioaddr, tc, tc,
-                                                       rxfifosz);
-                       else
-                               priv->hw->dma->dma_mode(priv->ioaddr, tc,
-                                                       SF_DMA_MODE, rxfifosz);
-                       priv->xstats.threshold = tc;
-               }
-       } else if (unlikely(status == tx_hard_error))
-               stmmac_tx_err(priv);
 }
 
 /**
@@ -1574,6 +1732,11 @@ static void stmmac_check_ether_addr(struct stmmac_priv *priv)
  */
 static int stmmac_init_dma_engine(struct stmmac_priv *priv)
 {
+       u32 rx_channels_count = priv->plat->rx_queues_to_use;
+       u32 tx_channels_count = priv->plat->tx_queues_to_use;
+       u32 dummy_dma_rx_phy = 0;
+       u32 dummy_dma_tx_phy = 0;
+       u32 chan = 0;
        int atds = 0;
        int ret = 0;
 
@@ -1591,19 +1754,43 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv)
                return ret;
        }
 
-       priv->hw->dma->init(priv->ioaddr, priv->plat->dma_cfg,
-                           priv->dma_tx_phy, priv->dma_rx_phy, atds);
-
        if (priv->synopsys_id >= DWMAC_CORE_4_00) {
-               priv->rx_tail_addr = priv->dma_rx_phy +
-                           (DMA_RX_SIZE * sizeof(struct dma_desc));
-               priv->hw->dma->set_rx_tail_ptr(priv->ioaddr, priv->rx_tail_addr,
-                                              STMMAC_CHAN0);
+               /* DMA Configuration */
+               priv->hw->dma->init(priv->ioaddr, priv->plat->dma_cfg,
+                                   dummy_dma_tx_phy, dummy_dma_rx_phy, atds);
+
+               /* DMA RX Channel Configuration */
+               for (chan = 0; chan < rx_channels_count; chan++) {
+                       priv->hw->dma->init_rx_chan(priv->ioaddr,
+                                                   priv->plat->dma_cfg,
+                                                   priv->dma_rx_phy, chan);
+
+                       priv->rx_tail_addr = priv->dma_rx_phy +
+                                   (DMA_RX_SIZE * sizeof(struct dma_desc));
+                       priv->hw->dma->set_rx_tail_ptr(priv->ioaddr,
+                                                      priv->rx_tail_addr,
+                                                      chan);
+               }
 
-               priv->tx_tail_addr = priv->dma_tx_phy +
-                           (DMA_TX_SIZE * sizeof(struct dma_desc));
-               priv->hw->dma->set_tx_tail_ptr(priv->ioaddr, priv->tx_tail_addr,
-                                              STMMAC_CHAN0);
+               /* DMA TX Channel Configuration */
+               for (chan = 0; chan < tx_channels_count; chan++) {
+                       priv->hw->dma->init_chan(priv->ioaddr,
+                                                       priv->plat->dma_cfg,
+                                                       chan);
+
+                       priv->hw->dma->init_tx_chan(priv->ioaddr,
+                                                   priv->plat->dma_cfg,
+                                                   priv->dma_tx_phy, chan);
+
+                       priv->tx_tail_addr = priv->dma_tx_phy +
+                                   (DMA_TX_SIZE * sizeof(struct dma_desc));
+                       priv->hw->dma->set_tx_tail_ptr(priv->ioaddr,
+                                                      priv->tx_tail_addr,
+                                                      chan);
+               }
+       } else {
+               priv->hw->dma->init(priv->ioaddr, priv->plat->dma_cfg,
+                                   priv->dma_tx_phy, priv->dma_rx_phy, atds);
        }
 
        if (priv->plat->axi && priv->hw->dma->axi)
@@ -1644,6 +1831,196 @@ static void stmmac_init_tx_coalesce(struct stmmac_priv *priv)
        add_timer(&priv->txtimer);
 }
 
+static void stmmac_set_rings_length(struct stmmac_priv *priv)
+{
+       u32 rx_channels_count = priv->plat->rx_queues_to_use;
+       u32 tx_channels_count = priv->plat->tx_queues_to_use;
+       u32 chan;
+
+       /* set TX ring length */
+       if (priv->hw->dma->set_tx_ring_len) {
+               for (chan = 0; chan < tx_channels_count; chan++)
+                       priv->hw->dma->set_tx_ring_len(priv->ioaddr,
+                                                      (DMA_TX_SIZE - 1), chan);
+       }
+
+       /* set RX ring length */
+       if (priv->hw->dma->set_rx_ring_len) {
+               for (chan = 0; chan < rx_channels_count; chan++)
+                       priv->hw->dma->set_rx_ring_len(priv->ioaddr,
+                                                      (DMA_RX_SIZE - 1), chan);
+       }
+}
+
+/**
+ *  stmmac_set_tx_queue_weight - Set TX queue weight
+ *  @priv: driver private structure
+ *  Description: It is used for setting TX queues weight
+ */
+static void stmmac_set_tx_queue_weight(struct stmmac_priv *priv)
+{
+       u32 tx_queues_count = priv->plat->tx_queues_to_use;
+       u32 weight;
+       u32 queue;
+
+       for (queue = 0; queue < tx_queues_count; queue++) {
+               weight = priv->plat->tx_queues_cfg[queue].weight;
+               priv->hw->mac->set_mtl_tx_queue_weight(priv->hw, weight, queue);
+       }
+}
+
+/**
+ *  stmmac_configure_cbs - Configure CBS in TX queue
+ *  @priv: driver private structure
+ *  Description: It is used for configuring CBS in AVB TX queues
+ */
+static void stmmac_configure_cbs(struct stmmac_priv *priv)
+{
+       u32 tx_queues_count = priv->plat->tx_queues_to_use;
+       u32 mode_to_use;
+       u32 queue;
+
+       /* queue 0 is reserved for legacy traffic */
+       for (queue = 1; queue < tx_queues_count; queue++) {
+               mode_to_use = priv->plat->tx_queues_cfg[queue].mode_to_use;
+               if (mode_to_use == MTL_QUEUE_DCB)
+                       continue;
+
+               priv->hw->mac->config_cbs(priv->hw,
+                               priv->plat->tx_queues_cfg[queue].send_slope,
+                               priv->plat->tx_queues_cfg[queue].idle_slope,
+                               priv->plat->tx_queues_cfg[queue].high_credit,
+                               priv->plat->tx_queues_cfg[queue].low_credit,
+                               queue);
+       }
+}
+
+/**
+ *  stmmac_rx_queue_dma_chan_map - Map RX queue to RX dma channel
+ *  @priv: driver private structure
+ *  Description: It is used for mapping RX queues to RX dma channels
+ */
+static void stmmac_rx_queue_dma_chan_map(struct stmmac_priv *priv)
+{
+       u32 rx_queues_count = priv->plat->rx_queues_to_use;
+       u32 queue;
+       u32 chan;
+
+       for (queue = 0; queue < rx_queues_count; queue++) {
+               chan = priv->plat->rx_queues_cfg[queue].chan;
+               priv->hw->mac->map_mtl_to_dma(priv->hw, queue, chan);
+       }
+}
+
+/**
+ *  stmmac_mac_config_rx_queues_prio - Configure RX Queue priority
+ *  @priv: driver private structure
+ *  Description: It is used for configuring the RX Queue Priority
+ */
+static void stmmac_mac_config_rx_queues_prio(struct stmmac_priv *priv)
+{
+       u32 rx_queues_count = priv->plat->rx_queues_to_use;
+       u32 queue;
+       u32 prio;
+
+       for (queue = 0; queue < rx_queues_count; queue++) {
+               if (!priv->plat->rx_queues_cfg[queue].use_prio)
+                       continue;
+
+               prio = priv->plat->rx_queues_cfg[queue].prio;
+               priv->hw->mac->rx_queue_prio(priv->hw, prio, queue);
+       }
+}
+
+/**
+ *  stmmac_mac_config_tx_queues_prio - Configure TX Queue priority
+ *  @priv: driver private structure
+ *  Description: It is used for configuring the TX Queue Priority
+ */
+static void stmmac_mac_config_tx_queues_prio(struct stmmac_priv *priv)
+{
+       u32 tx_queues_count = priv->plat->tx_queues_to_use;
+       u32 queue;
+       u32 prio;
+
+       for (queue = 0; queue < tx_queues_count; queue++) {
+               if (!priv->plat->tx_queues_cfg[queue].use_prio)
+                       continue;
+
+               prio = priv->plat->tx_queues_cfg[queue].prio;
+               priv->hw->mac->tx_queue_prio(priv->hw, prio, queue);
+       }
+}
+
+/**
+ *  stmmac_mac_config_rx_queues_routing - Configure RX Queue Routing
+ *  @priv: driver private structure
+ *  Description: It is used for configuring the RX queue routing
+ */
+static void stmmac_mac_config_rx_queues_routing(struct stmmac_priv *priv)
+{
+       u32 rx_queues_count = priv->plat->rx_queues_to_use;
+       u32 queue;
+       u8 packet;
+
+       for (queue = 0; queue < rx_queues_count; queue++) {
+               /* no specific packet type routing specified for the queue */
+               if (priv->plat->rx_queues_cfg[queue].pkt_route == 0x0)
+                       continue;
+
+               packet = priv->plat->rx_queues_cfg[queue].pkt_route;
+               priv->hw->mac->rx_queue_prio(priv->hw, packet, queue);
+       }
+}
+
+/**
+ *  stmmac_mtl_configuration - Configure MTL
+ *  @priv: driver private structure
+ *  Description: It is used for configurring MTL
+ */
+static void stmmac_mtl_configuration(struct stmmac_priv *priv)
+{
+       u32 rx_queues_count = priv->plat->rx_queues_to_use;
+       u32 tx_queues_count = priv->plat->tx_queues_to_use;
+
+       if (tx_queues_count > 1 && priv->hw->mac->set_mtl_tx_queue_weight)
+               stmmac_set_tx_queue_weight(priv);
+
+       /* Configure MTL RX algorithms */
+       if (rx_queues_count > 1 && priv->hw->mac->prog_mtl_rx_algorithms)
+               priv->hw->mac->prog_mtl_rx_algorithms(priv->hw,
+                                               priv->plat->rx_sched_algorithm);
+
+       /* Configure MTL TX algorithms */
+       if (tx_queues_count > 1 && priv->hw->mac->prog_mtl_tx_algorithms)
+               priv->hw->mac->prog_mtl_tx_algorithms(priv->hw,
+                                               priv->plat->tx_sched_algorithm);
+
+       /* Configure CBS in AVB TX queues */
+       if (tx_queues_count > 1 && priv->hw->mac->config_cbs)
+               stmmac_configure_cbs(priv);
+
+       /* Map RX MTL to DMA channels */
+       if (rx_queues_count > 1 && priv->hw->mac->map_mtl_to_dma)
+               stmmac_rx_queue_dma_chan_map(priv);
+
+       /* Enable MAC RX Queues */
+       if (priv->hw->mac->rx_queue_enable)
+               stmmac_mac_enable_rx_queues(priv);
+
+       /* Set RX priorities */
+       if (rx_queues_count > 1 && priv->hw->mac->rx_queue_prio)
+               stmmac_mac_config_rx_queues_prio(priv);
+
+       /* Set TX priorities */
+       if (tx_queues_count > 1 && priv->hw->mac->tx_queue_prio)
+               stmmac_mac_config_tx_queues_prio(priv);
+
+       /* Set RX routing */
+       if (rx_queues_count > 1 && priv->hw->mac->rx_queue_routing)
+               stmmac_mac_config_rx_queues_routing(priv);
+}
+
 /**
  * stmmac_hw_setup - setup mac in a usable state.
  *  @dev : pointer to the device structure.
@@ -1659,6 +2036,9 @@ static void stmmac_init_tx_coalesce(struct stmmac_priv *priv)
 static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
 {
        struct stmmac_priv *priv = netdev_priv(dev);
+       u32 rx_cnt = priv->plat->rx_queues_to_use;
+       u32 tx_cnt = priv->plat->tx_queues_to_use;
+       u32 chan;
        int ret;
 
        /* DMA initialization and SW reset */
@@ -1688,9 +2068,9 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
        /* Initialize the MAC Core */
        priv->hw->mac->core_init(priv->hw, dev->mtu);
 
-       /* Initialize MAC RX Queues */
-       if (priv->hw->mac->rx_queue_enable)
-               stmmac_mac_enable_rx_queues(priv);
+       /* Initialize MTL*/
+       if (priv->synopsys_id >= DWMAC_CORE_4_00)
+               stmmac_mtl_configuration(priv);
 
        ret = priv->hw->mac->rx_ipc(priv->hw);
        if (!ret) {
@@ -1700,10 +2080,7 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
        }
 
        /* Enable the MAC Rx/Tx */
-       if (priv->synopsys_id >= DWMAC_CORE_4_00)
-               stmmac_dwmac4_set_mac(priv->ioaddr, true);
-       else
-               stmmac_set_mac(priv->ioaddr, true);
+       priv->hw->mac->set_mac(priv->ioaddr, true);
 
        /* Set the HW DMA mode and the COE */
        stmmac_dma_operation_mode(priv);
@@ -1711,6 +2088,10 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
        stmmac_mmc_setup(priv);
 
        if (init_ptp) {
+               ret = clk_prepare_enable(priv->plat->clk_ptp_ref);
+               if (ret < 0)
+                       netdev_warn(priv->dev, "failed to enable PTP reference clock: %d\n", ret);
+
                ret = stmmac_init_ptp(priv);
                if (ret == -EOPNOTSUPP)
                        netdev_warn(priv->dev, "PTP not supported by HW\n");
@@ -1725,35 +2106,37 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
                            __func__);
 #endif
        /* Start the ball rolling... */
-       netdev_dbg(priv->dev, "DMA RX/TX processes started...\n");
-       priv->hw->dma->start_tx(priv->ioaddr);
-       priv->hw->dma->start_rx(priv->ioaddr);
+       stmmac_start_all_dma(priv);
 
        priv->tx_lpi_timer = STMMAC_DEFAULT_TWT_LS;
 
        if ((priv->use_riwt) && (priv->hw->dma->rx_watchdog)) {
                priv->rx_riwt = MAX_DMA_RIWT;
-               priv->hw->dma->rx_watchdog(priv->ioaddr, MAX_DMA_RIWT);
+               priv->hw->dma->rx_watchdog(priv->ioaddr, MAX_DMA_RIWT, rx_cnt);
        }
 
        if (priv->hw->pcs && priv->hw->mac->pcs_ctrl_ane)
                priv->hw->mac->pcs_ctrl_ane(priv->hw, 1, priv->hw->ps, 0);
 
-       /*  set TX ring length */
-       if (priv->hw->dma->set_tx_ring_len)
-               priv->hw->dma->set_tx_ring_len(priv->ioaddr,
-                                              (DMA_TX_SIZE - 1));
-       /*  set RX ring length */
-       if (priv->hw->dma->set_rx_ring_len)
-               priv->hw->dma->set_rx_ring_len(priv->ioaddr,
-                                              (DMA_RX_SIZE - 1));
+       /* set TX and RX rings length */
+       stmmac_set_rings_length(priv);
+
        /* Enable TSO */
-       if (priv->tso)
-               priv->hw->dma->enable_tso(priv->ioaddr, 1, STMMAC_CHAN0);
+       if (priv->tso) {
+               for (chan = 0; chan < tx_cnt; chan++)
+                       priv->hw->dma->enable_tso(priv->ioaddr, 1, chan);
+       }
 
        return 0;
 }
 
+static void stmmac_hw_teardown(struct net_device *dev)
+{
+       struct stmmac_priv *priv = netdev_priv(dev);
+
+       clk_disable_unprepare(priv->plat->clk_ptp_ref);
+}
+
 /**
  *  stmmac_open - open entry point of the driver
  *  @dev : pointer to the device structure.
@@ -1821,7 +2204,7 @@ static int stmmac_open(struct net_device *dev)
                netdev_err(priv->dev,
                           "%s: ERROR: allocating the IRQ %d (error: %d)\n",
                           __func__, dev->irq, ret);
-               goto init_error;
+               goto irq_error;
        }
 
        /* Request the Wake IRQ in case of another line is used for WoL */
@@ -1858,7 +2241,12 @@ lpiirq_error:
                free_irq(priv->wol_irq, dev);
 wolirq_error:
        free_irq(dev->irq, dev);
+irq_error:
+       if (dev->phydev)
+               phy_stop(dev->phydev);
 
+       del_timer_sync(&priv->txtimer);
+       stmmac_hw_teardown(dev);
 init_error:
        free_dma_desc_resources(priv);
 dma_desc_error:
@@ -1901,14 +2289,13 @@ static int stmmac_release(struct net_device *dev)
                free_irq(priv->lpi_irq, dev);
 
        /* Stop TX/RX DMA and clear the descriptors */
-       priv->hw->dma->stop_tx(priv->ioaddr);
-       priv->hw->dma->stop_rx(priv->ioaddr);
+       stmmac_stop_all_dma(priv);
 
        /* Release and free the Rx/Tx resources */
        free_dma_desc_resources(priv);
 
        /* Disable the MAC Rx/Tx */
-       stmmac_set_mac(priv->ioaddr, false);
+       priv->hw->mac->set_mac(priv->ioaddr, false);
 
        netif_carrier_off(dev);
 
@@ -2063,6 +2450,8 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
                des = skb_frag_dma_map(priv->device, frag, 0,
                                       skb_frag_size(frag),
                                       DMA_TO_DEVICE);
+               if (dma_mapping_error(priv->device, des))
+                       goto dma_map_err;
 
                stmmac_tso_allocator(priv, des, skb_frag_size(frag),
                                     (i == nfrags - 1));
@@ -2661,6 +3050,7 @@ static int stmmac_poll(struct napi_struct *napi, int budget)
 {
        struct stmmac_priv *priv = container_of(napi, struct stmmac_priv, napi);
        int work_done = 0;
+       u32 chan = STMMAC_CHAN0;
 
        priv->xstats.napi_poll++;
        stmmac_tx_clean(priv);
@@ -2668,7 +3058,7 @@ static int stmmac_poll(struct napi_struct *napi, int budget)
        work_done = stmmac_rx(priv, budget);
        if (work_done < budget) {
                napi_complete_done(napi, work_done);
-               stmmac_enable_dma_irq(priv);
+               stmmac_enable_dma_irq(priv, chan);
        }
        return work_done;
 }
@@ -2684,9 +3074,10 @@ static int stmmac_poll(struct napi_struct *napi, int budget)
 static void stmmac_tx_timeout(struct net_device *dev)
 {
        struct stmmac_priv *priv = netdev_priv(dev);
+       u32 chan = STMMAC_CHAN0;
 
        /* Clear Tx resources and restart transmitting again */
-       stmmac_tx_err(priv);
+       stmmac_tx_err(priv, chan);
 }
 
 /**
@@ -2795,6 +3186,12 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id)
 {
        struct net_device *dev = (struct net_device *)dev_id;
        struct stmmac_priv *priv = netdev_priv(dev);
+       u32 rx_cnt = priv->plat->rx_queues_to_use;
+       u32 tx_cnt = priv->plat->tx_queues_to_use;
+       u32 queues_count;
+       u32 queue;
+
+       queues_count = (rx_cnt > tx_cnt) ? rx_cnt : tx_cnt;
 
        if (priv->irq_wake)
                pm_wakeup_event(priv->device, 0);
@@ -2808,16 +3205,27 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id)
        if ((priv->plat->has_gmac) || (priv->plat->has_gmac4)) {
                int status = priv->hw->mac->host_irq_status(priv->hw,
                                                            &priv->xstats);
+
                if (unlikely(status)) {
                        /* For LPI we need to save the tx status */
                        if (status & CORE_IRQ_TX_PATH_IN_LPI_MODE)
                                priv->tx_path_in_lpi_mode = true;
                        if (status & CORE_IRQ_TX_PATH_EXIT_LPI_MODE)
                                priv->tx_path_in_lpi_mode = false;
-                       if (status & CORE_IRQ_MTL_RX_OVERFLOW && priv->hw->dma->set_rx_tail_ptr)
-                               priv->hw->dma->set_rx_tail_ptr(priv->ioaddr,
-                                                       priv->rx_tail_addr,
-                                                       STMMAC_CHAN0);
+               }
+
+               if (priv->synopsys_id >= DWMAC_CORE_4_00) {
+                       for (queue = 0; queue < queues_count; queue++) {
+                               status |=
+                               priv->hw->mac->host_mtl_irq_status(priv->hw,
+                                                                  queue);
+
+                               if (status & CORE_IRQ_MTL_RX_OVERFLOW &&
+                                   priv->hw->dma->set_rx_tail_ptr)
+                                       priv->hw->dma->set_rx_tail_ptr(priv->ioaddr,
+                                                               priv->rx_tail_addr,
+                                                               queue);
+                       }
                }
 
                /* PCS link status */
@@ -3369,10 +3777,9 @@ int stmmac_dvr_remove(struct device *dev)
 
        netdev_info(priv->dev, "%s: removing driver", __func__);
 
-       priv->hw->dma->stop_rx(priv->ioaddr);
-       priv->hw->dma->stop_tx(priv->ioaddr);
+       stmmac_stop_all_dma(priv);
 
-       stmmac_set_mac(priv->ioaddr, false);
+       priv->hw->mac->set_mac(priv->ioaddr, false);
        netif_carrier_off(ndev);
        unregister_netdev(ndev);
        if (priv->plat->stmmac_rst)
@@ -3416,15 +3823,14 @@ int stmmac_suspend(struct device *dev)
        napi_disable(&priv->napi);
 
        /* Stop TX/RX DMA */
-       priv->hw->dma->stop_tx(priv->ioaddr);
-       priv->hw->dma->stop_rx(priv->ioaddr);
+       stmmac_stop_all_dma(priv);
 
        /* Enable Power down mode by programming the PMT regs */
        if (device_may_wakeup(priv->device)) {
                priv->hw->mac->pmt(priv->hw, priv->wolopts);
                priv->irq_wake = 1;
        } else {
-               stmmac_set_mac(priv->ioaddr, false);
+               priv->hw->mac->set_mac(priv->ioaddr, false);
                pinctrl_pm_select_sleep_state(priv->device);
                /* Disable clock in case of PWM is off */
                clk_disable(priv->plat->pclk);
index 5c9e462276b9cbc25a8b5c2748988286fe17884f..a224d7bf1c1beea57fe976cbd4f94f63603a480a 100644 (file)
@@ -88,6 +88,17 @@ static void stmmac_default_data(struct plat_stmmacenet_data *plat)
 
        /* Set the maxmtu to a default of JUMBO_LEN */
        plat->maxmtu = JUMBO_LEN;
+
+       /* Set default number of RX and TX queues to use */
+       plat->tx_queues_to_use = 1;
+       plat->rx_queues_to_use = 1;
+
+       /* Disable Priority config by default */
+       plat->tx_queues_cfg[0].use_prio = false;
+       plat->rx_queues_cfg[0].use_prio = false;
+
+       /* Disable RX queues routing by default */
+       plat->rx_queues_cfg[0].pkt_route = 0x0;
 }
 
 static int quark_default_data(struct plat_stmmacenet_data *plat,
index 433a84239a687bab4ff0572978d7c0eaf849cb46..7fc3a1ef395ab2e99060355d1c47e5b5f1f9d9f1 100644 (file)
@@ -108,7 +108,7 @@ static struct stmmac_axi *stmmac_axi_setup(struct platform_device *pdev)
        if (!np)
                return NULL;
 
-       axi = kzalloc(sizeof(*axi), GFP_KERNEL);
+       axi = devm_kzalloc(&pdev->dev, sizeof(*axi), GFP_KERNEL);
        if (!axi) {
                of_node_put(np);
                return ERR_PTR(-ENOMEM);
@@ -131,6 +131,155 @@ static struct stmmac_axi *stmmac_axi_setup(struct platform_device *pdev)
        return axi;
 }
 
+/**
+ * stmmac_mtl_setup - parse DT parameters for multiple queues configuration
+ * @pdev: platform device
+ */
+static void stmmac_mtl_setup(struct platform_device *pdev,
+                            struct plat_stmmacenet_data *plat)
+{
+       struct device_node *q_node;
+       struct device_node *rx_node;
+       struct device_node *tx_node;
+       u8 queue = 0;
+
+       /* For backwards-compatibility with device trees that don't have any
+        * snps,mtl-rx-config or snps,mtl-tx-config properties, we fall back
+        * to one RX and TX queues each.
+        */
+       plat->rx_queues_to_use = 1;
+       plat->tx_queues_to_use = 1;
+
+       rx_node = of_parse_phandle(pdev->dev.of_node, "snps,mtl-rx-config", 0);
+       if (!rx_node)
+               return;
+
+       tx_node = of_parse_phandle(pdev->dev.of_node, "snps,mtl-tx-config", 0);
+       if (!tx_node) {
+               of_node_put(rx_node);
+               return;
+       }
+
+       /* Processing RX queues common config */
+       if (of_property_read_u8(rx_node, "snps,rx-queues-to-use",
+                               &plat->rx_queues_to_use))
+               plat->rx_queues_to_use = 1;
+
+       if (of_property_read_bool(rx_node, "snps,rx-sched-sp"))
+               plat->rx_sched_algorithm = MTL_RX_ALGORITHM_SP;
+       else if (of_property_read_bool(rx_node, "snps,rx-sched-wsp"))
+               plat->rx_sched_algorithm = MTL_RX_ALGORITHM_WSP;
+       else
+               plat->rx_sched_algorithm = MTL_RX_ALGORITHM_SP;
+
+       /* Processing individual RX queue config */
+       for_each_child_of_node(rx_node, q_node) {
+               if (queue >= plat->rx_queues_to_use)
+                       break;
+
+               if (of_property_read_bool(q_node, "snps,dcb-algorithm"))
+                       plat->rx_queues_cfg[queue].mode_to_use = MTL_QUEUE_DCB;
+               else if (of_property_read_bool(q_node, "snps,avb-algorithm"))
+                       plat->rx_queues_cfg[queue].mode_to_use = MTL_QUEUE_AVB;
+               else
+                       plat->rx_queues_cfg[queue].mode_to_use = MTL_QUEUE_DCB;
+
+               if (of_property_read_u8(q_node, "snps,map-to-dma-channel",
+                                       &plat->rx_queues_cfg[queue].chan))
+                       plat->rx_queues_cfg[queue].chan = queue;
+               /* TODO: Dynamic mapping to be included in the future */
+
+               if (of_property_read_u32(q_node, "snps,priority",
+                                       &plat->rx_queues_cfg[queue].prio)) {
+                       plat->rx_queues_cfg[queue].prio = 0;
+                       plat->rx_queues_cfg[queue].use_prio = false;
+               } else {
+                       plat->rx_queues_cfg[queue].use_prio = true;
+               }
+
+               /* RX queue specific packet type routing */
+               if (of_property_read_bool(q_node, "snps,route-avcp"))
+                       plat->rx_queues_cfg[queue].pkt_route = PACKET_AVCPQ;
+               else if (of_property_read_bool(q_node, "snps,route-ptp"))
+                       plat->rx_queues_cfg[queue].pkt_route = PACKET_PTPQ;
+               else if (of_property_read_bool(q_node, "snps,route-dcbcp"))
+                       plat->rx_queues_cfg[queue].pkt_route = PACKET_DCBCPQ;
+               else if (of_property_read_bool(q_node, "snps,route-up"))
+                       plat->rx_queues_cfg[queue].pkt_route = PACKET_UPQ;
+               else if (of_property_read_bool(q_node, "snps,route-multi-broad"))
+                       plat->rx_queues_cfg[queue].pkt_route = PACKET_MCBCQ;
+               else
+                       plat->rx_queues_cfg[queue].pkt_route = 0x0;
+
+               queue++;
+       }
+
+       /* Processing TX queues common config */
+       if (of_property_read_u8(tx_node, "snps,tx-queues-to-use",
+                               &plat->tx_queues_to_use))
+               plat->tx_queues_to_use = 1;
+
+       if (of_property_read_bool(tx_node, "snps,tx-sched-wrr"))
+               plat->tx_sched_algorithm = MTL_TX_ALGORITHM_WRR;
+       else if (of_property_read_bool(tx_node, "snps,tx-sched-wfq"))
+               plat->tx_sched_algorithm = MTL_TX_ALGORITHM_WFQ;
+       else if (of_property_read_bool(tx_node, "snps,tx-sched-dwrr"))
+               plat->tx_sched_algorithm = MTL_TX_ALGORITHM_DWRR;
+       else if (of_property_read_bool(tx_node, "snps,tx-sched-sp"))
+               plat->tx_sched_algorithm = MTL_TX_ALGORITHM_SP;
+       else
+               plat->tx_sched_algorithm = MTL_TX_ALGORITHM_SP;
+
+       queue = 0;
+
+       /* Processing individual TX queue config */
+       for_each_child_of_node(tx_node, q_node) {
+               if (queue >= plat->tx_queues_to_use)
+                       break;
+
+               if (of_property_read_u8(q_node, "snps,weight",
+                                       &plat->tx_queues_cfg[queue].weight))
+                       plat->tx_queues_cfg[queue].weight = 0x10 + queue;
+
+               if (of_property_read_bool(q_node, "snps,dcb-algorithm")) {
+                       plat->tx_queues_cfg[queue].mode_to_use = MTL_QUEUE_DCB;
+               } else if (of_property_read_bool(q_node,
+                                                "snps,avb-algorithm")) {
+                       plat->tx_queues_cfg[queue].mode_to_use = MTL_QUEUE_AVB;
+
+                       /* Credit Base Shaper parameters used by AVB */
+                       if (of_property_read_u32(q_node, "snps,send_slope",
+                               &plat->tx_queues_cfg[queue].send_slope))
+                               plat->tx_queues_cfg[queue].send_slope = 0x0;
+                       if (of_property_read_u32(q_node, "snps,idle_slope",
+                               &plat->tx_queues_cfg[queue].idle_slope))
+                               plat->tx_queues_cfg[queue].idle_slope = 0x0;
+                       if (of_property_read_u32(q_node, "snps,high_credit",
+                               &plat->tx_queues_cfg[queue].high_credit))
+                               plat->tx_queues_cfg[queue].high_credit = 0x0;
+                       if (of_property_read_u32(q_node, "snps,low_credit",
+                               &plat->tx_queues_cfg[queue].low_credit))
+                               plat->tx_queues_cfg[queue].low_credit = 0x0;
+               } else {
+                       plat->tx_queues_cfg[queue].mode_to_use = MTL_QUEUE_DCB;
+               }
+
+               if (of_property_read_u32(q_node, "snps,priority",
+                                       &plat->tx_queues_cfg[queue].prio)) {
+                       plat->tx_queues_cfg[queue].prio = 0;
+                       plat->tx_queues_cfg[queue].use_prio = false;
+               } else {
+                       plat->tx_queues_cfg[queue].use_prio = true;
+               }
+
+               queue++;
+       }
+
+       of_node_put(rx_node);
+       of_node_put(tx_node);
+       of_node_put(q_node);
+}
+
 /**
  * stmmac_dt_phy - parse device-tree driver parameters to allocate PHY resources
  * @plat: driver data platform structure
@@ -340,6 +489,8 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac)
 
        plat->axi = stmmac_axi_setup(pdev);
 
+       stmmac_mtl_setup(pdev, plat);
+
        /* clock setup */
        plat->stmmac_clk = devm_clk_get(&pdev->dev,
                                        STMMAC_RESOURCE_NAME);
@@ -359,13 +510,12 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac)
        clk_prepare_enable(plat->pclk);
 
        /* Fall-back to main clock in case of no PTP ref is passed */
-       plat->clk_ptp_ref = devm_clk_get(&pdev->dev, "clk_ptp_ref");
+       plat->clk_ptp_ref = devm_clk_get(&pdev->dev, "ptp_ref");
        if (IS_ERR(plat->clk_ptp_ref)) {
                plat->clk_ptp_rate = clk_get_rate(plat->stmmac_clk);
                plat->clk_ptp_ref = NULL;
                dev_warn(&pdev->dev, "PTP uses main clock\n");
        } else {
-               clk_prepare_enable(plat->clk_ptp_ref);
                plat->clk_ptp_rate = clk_get_rate(plat->clk_ptp_ref);
                dev_dbg(&pdev->dev, "PTP rate %d\n", plat->clk_ptp_rate);
        }
index 0e8e89f17dbb1128c6b562b17ae736622b6cf45b..382993c1561c5c9b071138d0f8ca0def10743e83 100644 (file)
@@ -691,7 +691,8 @@ static void cas_mif_poll(struct cas *cp, const int enable)
 }
 
 /* Must be invoked under cp->lock */
-static void cas_begin_auto_negotiation(struct cas *cp, struct ethtool_cmd *ep)
+static void cas_begin_auto_negotiation(struct cas *cp,
+                                      const struct ethtool_link_ksettings *ep)
 {
        u16 ctl;
 #if 1
@@ -704,16 +705,16 @@ static void cas_begin_auto_negotiation(struct cas *cp, struct ethtool_cmd *ep)
        if (!ep)
                goto start_aneg;
        lcntl = cp->link_cntl;
-       if (ep->autoneg == AUTONEG_ENABLE)
+       if (ep->base.autoneg == AUTONEG_ENABLE) {
                cp->link_cntl = BMCR_ANENABLE;
-       else {
-               u32 speed = ethtool_cmd_speed(ep);
+       else {
+               u32 speed = ep->base.speed;
                cp->link_cntl = 0;
                if (speed == SPEED_100)
                        cp->link_cntl |= BMCR_SPEED100;
                else if (speed == SPEED_1000)
                        cp->link_cntl |= CAS_BMCR_SPEED1000;
-               if (ep->duplex == DUPLEX_FULL)
+               if (ep->base.duplex == DUPLEX_FULL)
                        cp->link_cntl |= BMCR_FULLDPLX;
        }
 #if 1
@@ -4528,19 +4529,21 @@ static void cas_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info
        strlcpy(info->bus_info, pci_name(cp->pdev), sizeof(info->bus_info));
 }
 
-static int cas_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int cas_get_link_ksettings(struct net_device *dev,
+                                 struct ethtool_link_ksettings *cmd)
 {
        struct cas *cp = netdev_priv(dev);
        u16 bmcr;
        int full_duplex, speed, pause;
        unsigned long flags;
        enum link_state linkstate = link_up;
+       u32 supported, advertising;
 
-       cmd->advertising = 0;
-       cmd->supported = SUPPORTED_Autoneg;
+       advertising = 0;
+       supported = SUPPORTED_Autoneg;
        if (cp->cas_flags & CAS_FLAG_1000MB_CAP) {
-               cmd->supported |= SUPPORTED_1000baseT_Full;
-               cmd->advertising |= ADVERTISED_1000baseT_Full;
+               supported |= SUPPORTED_1000baseT_Full;
+               advertising |= ADVERTISED_1000baseT_Full;
        }
 
        /* Record PHY settings if HW is on. */
@@ -4548,17 +4551,15 @@ static int cas_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
        bmcr = 0;
        linkstate = cp->lstate;
        if (CAS_PHY_MII(cp->phy_type)) {
-               cmd->port = PORT_MII;
-               cmd->transceiver = (cp->cas_flags & CAS_FLAG_SATURN) ?
-                       XCVR_INTERNAL : XCVR_EXTERNAL;
-               cmd->phy_address = cp->phy_addr;
-               cmd->advertising |= ADVERTISED_TP | ADVERTISED_MII |
+               cmd->base.port = PORT_MII;
+               cmd->base.phy_address = cp->phy_addr;
+               advertising |= ADVERTISED_TP | ADVERTISED_MII |
                        ADVERTISED_10baseT_Half |
                        ADVERTISED_10baseT_Full |
                        ADVERTISED_100baseT_Half |
                        ADVERTISED_100baseT_Full;
 
-               cmd->supported |=
+               supported |=
                        (SUPPORTED_10baseT_Half |
                         SUPPORTED_10baseT_Full |
                         SUPPORTED_100baseT_Half |
@@ -4574,11 +4575,10 @@ static int cas_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
                }
 
        } else {
-               cmd->port = PORT_FIBRE;
-               cmd->transceiver = XCVR_INTERNAL;
-               cmd->phy_address = 0;
-               cmd->supported   |= SUPPORTED_FIBRE;
-               cmd->advertising |= ADVERTISED_FIBRE;
+               cmd->base.port = PORT_FIBRE;
+               cmd->base.phy_address = 0;
+               supported   |= SUPPORTED_FIBRE;
+               advertising |= ADVERTISED_FIBRE;
 
                if (cp->hw_running) {
                        /* pcs uses the same bits as mii */
@@ -4590,21 +4590,20 @@ static int cas_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
        spin_unlock_irqrestore(&cp->lock, flags);
 
        if (bmcr & BMCR_ANENABLE) {
-               cmd->advertising |= ADVERTISED_Autoneg;
-               cmd->autoneg = AUTONEG_ENABLE;
-               ethtool_cmd_speed_set(cmd, ((speed == 10) ?
+               advertising |= ADVERTISED_Autoneg;
+               cmd->base.autoneg = AUTONEG_ENABLE;
+               cmd->base.speed =  ((speed == 10) ?
                                            SPEED_10 :
                                            ((speed == 1000) ?
-                                            SPEED_1000 : SPEED_100)));
-               cmd->duplex = full_duplex ? DUPLEX_FULL : DUPLEX_HALF;
+                                            SPEED_1000 : SPEED_100));
+               cmd->base.duplex = full_duplex ? DUPLEX_FULL : DUPLEX_HALF;
        } else {
-               cmd->autoneg = AUTONEG_DISABLE;
-               ethtool_cmd_speed_set(cmd, ((bmcr & CAS_BMCR_SPEED1000) ?
+               cmd->base.autoneg = AUTONEG_DISABLE;
+               cmd->base.speed = ((bmcr & CAS_BMCR_SPEED1000) ?
                                            SPEED_1000 :
                                            ((bmcr & BMCR_SPEED100) ?
-                                            SPEED_100 : SPEED_10)));
-               cmd->duplex =
-                       (bmcr & BMCR_FULLDPLX) ?
+                                            SPEED_100 : SPEED_10));
+               cmd->base.duplex = (bmcr & BMCR_FULLDPLX) ?
                        DUPLEX_FULL : DUPLEX_HALF;
        }
        if (linkstate != link_up) {
@@ -4619,39 +4618,46 @@ static int cas_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
                 * settings that we configured.
                 */
                if (cp->link_cntl & BMCR_ANENABLE) {
-                       ethtool_cmd_speed_set(cmd, 0);
-                       cmd->duplex = 0xff;
+                       cmd->base.speed = 0;
+                       cmd->base.duplex = 0xff;
                } else {
-                       ethtool_cmd_speed_set(cmd, SPEED_10);
+                       cmd->base.speed = SPEED_10;
                        if (cp->link_cntl & BMCR_SPEED100) {
-                               ethtool_cmd_speed_set(cmd, SPEED_100);
+                               cmd->base.speed = SPEED_100;
                        } else if (cp->link_cntl & CAS_BMCR_SPEED1000) {
-                               ethtool_cmd_speed_set(cmd, SPEED_1000);
+                               cmd->base.speed = SPEED_1000;
                        }
-                       cmd->duplex = (cp->link_cntl & BMCR_FULLDPLX)?
+                       cmd->base.duplex = (cp->link_cntl & BMCR_FULLDPLX) ?
                                DUPLEX_FULL : DUPLEX_HALF;
                }
        }
+
+       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+                                               supported);
+       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+                                               advertising);
+
        return 0;
 }
 
-static int cas_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int cas_set_link_ksettings(struct net_device *dev,
+                                 const struct ethtool_link_ksettings *cmd)
 {
        struct cas *cp = netdev_priv(dev);
        unsigned long flags;
-       u32 speed = ethtool_cmd_speed(cmd);
+       u32 speed = cmd->base.speed;
 
        /* Verify the settings we care about. */
-       if (cmd->autoneg != AUTONEG_ENABLE &&
-           cmd->autoneg != AUTONEG_DISABLE)
+       if (cmd->base.autoneg != AUTONEG_ENABLE &&
+           cmd->base.autoneg != AUTONEG_DISABLE)
                return -EINVAL;
 
-       if (cmd->autoneg == AUTONEG_DISABLE &&
+       if (cmd->base.autoneg == AUTONEG_DISABLE &&
            ((speed != SPEED_1000 &&
              speed != SPEED_100 &&
              speed != SPEED_10) ||
-            (cmd->duplex != DUPLEX_HALF &&
-             cmd->duplex != DUPLEX_FULL)))
+            (cmd->base.duplex != DUPLEX_HALF &&
+             cmd->base.duplex != DUPLEX_FULL)))
                return -EINVAL;
 
        /* Apply settings and restart link process. */
@@ -4753,8 +4759,6 @@ static void cas_get_ethtool_stats(struct net_device *dev,
 
 static const struct ethtool_ops cas_ethtool_ops = {
        .get_drvinfo            = cas_get_drvinfo,
-       .get_settings           = cas_get_settings,
-       .set_settings           = cas_set_settings,
        .nway_reset             = cas_nway_reset,
        .get_link               = cas_get_link,
        .get_msglevel           = cas_get_msglevel,
@@ -4764,6 +4768,8 @@ static const struct ethtool_ops cas_ethtool_ops = {
        .get_sset_count         = cas_get_sset_count,
        .get_strings            = cas_get_strings,
        .get_ethtool_stats      = cas_get_ethtool_stats,
+       .get_link_ksettings     = cas_get_link_ksettings,
+       .set_link_ksettings     = cas_set_link_ksettings,
 };
 
 static int cas_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
index 89952deae47fc813b66c6d856f16dd648a803a0c..5a90fed0626065613ba59fa7c5f8ca3c2dff6ef3 100644 (file)
@@ -1,6 +1,6 @@
 /* ldmvsw.c: Sun4v LDOM Virtual Switch Driver.
  *
- * Copyright (C) 2016 Oracle. All rights reserved.
+ * Copyright (C) 2016-2017 Oracle. All rights reserved.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -41,8 +41,8 @@
 static u8 vsw_port_hwaddr[ETH_ALEN] = {0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF};
 
 #define DRV_MODULE_NAME                "ldmvsw"
-#define DRV_MODULE_VERSION     "1.1"
-#define DRV_MODULE_RELDATE     "February 3, 2017"
+#define DRV_MODULE_VERSION     "1.2"
+#define DRV_MODULE_RELDATE     "March 4, 2017"
 
 static char version[] =
        DRV_MODULE_NAME " " DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")";
@@ -123,6 +123,20 @@ static void vsw_set_rx_mode(struct net_device *dev)
        return sunvnet_set_rx_mode_common(dev, port->vp);
 }
 
+int ldmvsw_open(struct net_device *dev)
+{
+       struct vnet_port *port = netdev_priv(dev);
+       struct vio_driver_state *vio = &port->vio;
+
+       /* reset the channel */
+       vio_link_state_change(vio, LDC_EVENT_RESET);
+       vnet_port_reset(port);
+       vio_port_up(vio);
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(ldmvsw_open);
+
 #ifdef CONFIG_NET_POLL_CONTROLLER
 static void vsw_poll_controller(struct net_device *dev)
 {
@@ -133,7 +147,7 @@ static void vsw_poll_controller(struct net_device *dev)
 #endif
 
 static const struct net_device_ops vsw_ops = {
-       .ndo_open               = sunvnet_open_common,
+       .ndo_open               = ldmvsw_open,
        .ndo_stop               = sunvnet_close_common,
        .ndo_set_rx_mode        = vsw_set_rx_mode,
        .ndo_set_mac_address    = sunvnet_set_mac_addr_common,
@@ -365,6 +379,11 @@ static int vsw_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
        napi_enable(&port->napi);
        vio_port_up(&port->vio);
 
+       /* assure no carrier until we receive an LDC_EVENT_UP,
+        * even if the vsw config script tries to force us up
+        */
+       netif_carrier_off(dev);
+
        netdev_info(dev, "LDOM vsw-port %pM\n", dev->dev_addr);
 
        pr_info("%s: PORT ( remote-mac %pM%s )\n", dev->name,
index 57978056b3366f0ecb0410f96dbfbde228ea44f1..2dcca249eb9c732e48a563f22d4e98bc79429e24 100644 (file)
@@ -6813,7 +6813,8 @@ static void niu_get_drvinfo(struct net_device *dev,
                        sizeof(info->bus_info));
 }
 
-static int niu_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int niu_get_link_ksettings(struct net_device *dev,
+                                 struct ethtool_link_ksettings *cmd)
 {
        struct niu *np = netdev_priv(dev);
        struct niu_link_config *lp;
@@ -6821,28 +6822,30 @@ static int niu_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
        lp = &np->link_config;
 
        memset(cmd, 0, sizeof(*cmd));
-       cmd->phy_address = np->phy_addr;
-       cmd->supported = lp->supported;
-       cmd->advertising = lp->active_advertising;
-       cmd->autoneg = lp->active_autoneg;
-       ethtool_cmd_speed_set(cmd, lp->active_speed);
-       cmd->duplex = lp->active_duplex;
-       cmd->port = (np->flags & NIU_FLAGS_FIBER) ? PORT_FIBRE : PORT_TP;
-       cmd->transceiver = (np->flags & NIU_FLAGS_XCVR_SERDES) ?
-               XCVR_EXTERNAL : XCVR_INTERNAL;
+       cmd->base.phy_address = np->phy_addr;
+       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+                                               lp->supported);
+       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+                                               lp->active_advertising);
+       cmd->base.autoneg = lp->active_autoneg;
+       cmd->base.speed = lp->active_speed;
+       cmd->base.duplex = lp->active_duplex;
+       cmd->base.port = (np->flags & NIU_FLAGS_FIBER) ? PORT_FIBRE : PORT_TP;
 
        return 0;
 }
 
-static int niu_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int niu_set_link_ksettings(struct net_device *dev,
+                                 const struct ethtool_link_ksettings *cmd)
 {
        struct niu *np = netdev_priv(dev);
        struct niu_link_config *lp = &np->link_config;
 
-       lp->advertising = cmd->advertising;
-       lp->speed = ethtool_cmd_speed(cmd);
-       lp->duplex = cmd->duplex;
-       lp->autoneg = cmd->autoneg;
+       ethtool_convert_link_mode_to_legacy_u32(&lp->advertising,
+                                               cmd->link_modes.advertising);
+       lp->speed = cmd->base.speed;
+       lp->duplex = cmd->base.duplex;
+       lp->autoneg = cmd->base.autoneg;
        return niu_init_link(np);
 }
 
@@ -7902,14 +7905,14 @@ static const struct ethtool_ops niu_ethtool_ops = {
        .nway_reset             = niu_nway_reset,
        .get_eeprom_len         = niu_get_eeprom_len,
        .get_eeprom             = niu_get_eeprom,
-       .get_settings           = niu_get_settings,
-       .set_settings           = niu_set_settings,
        .get_strings            = niu_get_strings,
        .get_sset_count         = niu_get_sset_count,
        .get_ethtool_stats      = niu_get_ethtool_stats,
        .set_phys_id            = niu_set_phys_id,
        .get_rxnfc              = niu_get_nfc,
        .set_rxnfc              = niu_set_nfc,
+       .get_link_ksettings     = niu_get_link_ksettings,
+       .set_link_ksettings     = niu_set_link_ksettings,
 };
 
 static int niu_ldg_assign_ldn(struct niu *np, struct niu_parent *parent,
index 5c5952e782cd223c0aee844d414ee6c749c35804..fa607d062cb3130eff15295f61a8efce7c6a969c 100644 (file)
@@ -1250,12 +1250,18 @@ static void gem_stop_dma(struct gem *gp)
 
 
 // XXX dbl check what that function should do when called on PCS PHY
-static void gem_begin_auto_negotiation(struct gem *gp, struct ethtool_cmd *ep)
+static void gem_begin_auto_negotiation(struct gem *gp,
+                                      const struct ethtool_link_ksettings *ep)
 {
        u32 advertise, features;
        int autoneg;
        int speed;
        int duplex;
+       u32 advertising;
+
+       if (ep)
+               ethtool_convert_link_mode_to_legacy_u32(
+                       &advertising, ep->link_modes.advertising);
 
        if (gp->phy_type != phy_mii_mdio0 &&
            gp->phy_type != phy_mii_mdio1)
@@ -1278,13 +1284,13 @@ static void gem_begin_auto_negotiation(struct gem *gp, struct ethtool_cmd *ep)
        /* Setup link parameters */
        if (!ep)
                goto start_aneg;
-       if (ep->autoneg == AUTONEG_ENABLE) {
-               advertise = ep->advertising;
+       if (ep->base.autoneg == AUTONEG_ENABLE) {
+               advertise = advertising;
                autoneg = 1;
        } else {
                autoneg = 0;
-               speed = ethtool_cmd_speed(ep);
-               duplex = ep->duplex;
+               speed = ep->base.speed;
+               duplex = ep->base.duplex;
        }
 
 start_aneg:
@@ -2515,85 +2521,96 @@ static void gem_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info
        strlcpy(info->bus_info, pci_name(gp->pdev), sizeof(info->bus_info));
 }
 
-static int gem_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int gem_get_link_ksettings(struct net_device *dev,
+                                 struct ethtool_link_ksettings *cmd)
 {
        struct gem *gp = netdev_priv(dev);
+       u32 supported, advertising;
 
        if (gp->phy_type == phy_mii_mdio0 ||
            gp->phy_type == phy_mii_mdio1) {
                if (gp->phy_mii.def)
-                       cmd->supported = gp->phy_mii.def->features;
+                       supported = gp->phy_mii.def->features;
                else
-                       cmd->supported = (SUPPORTED_10baseT_Half |
+                       supported = (SUPPORTED_10baseT_Half |
                                          SUPPORTED_10baseT_Full);
 
                /* XXX hardcoded stuff for now */
-               cmd->port = PORT_MII;
-               cmd->transceiver = XCVR_EXTERNAL;
-               cmd->phy_address = 0; /* XXX fixed PHYAD */
+               cmd->base.port = PORT_MII;
+               cmd->base.phy_address = 0; /* XXX fixed PHYAD */
 
                /* Return current PHY settings */
-               cmd->autoneg = gp->want_autoneg;
-               ethtool_cmd_speed_set(cmd, gp->phy_mii.speed);
-               cmd->duplex = gp->phy_mii.duplex;
-               cmd->advertising = gp->phy_mii.advertising;
+               cmd->base.autoneg = gp->want_autoneg;
+               cmd->base.speed = gp->phy_mii.speed;
+               cmd->base.duplex = gp->phy_mii.duplex;
+               advertising = gp->phy_mii.advertising;
 
                /* If we started with a forced mode, we don't have a default
                 * advertise set, we need to return something sensible so
                 * userland can re-enable autoneg properly.
                 */
-               if (cmd->advertising == 0)
-                       cmd->advertising = cmd->supported;
+               if (advertising == 0)
+                       advertising = supported;
        } else { // XXX PCS ?
-               cmd->supported =
+               supported =
                        (SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full |
                         SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full |
                         SUPPORTED_Autoneg);
-               cmd->advertising = cmd->supported;
-               ethtool_cmd_speed_set(cmd, 0);
-               cmd->duplex = cmd->port = cmd->phy_address =
-                       cmd->transceiver = cmd->autoneg = 0;
+               advertising = supported;
+               cmd->base.speed = 0;
+               cmd->base.duplex = 0;
+               cmd->base.port = 0;
+               cmd->base.phy_address = 0;
+               cmd->base.autoneg = 0;
 
                /* serdes means usually a Fibre connector, with most fixed */
                if (gp->phy_type == phy_serdes) {
-                       cmd->port = PORT_FIBRE;
-                       cmd->supported = (SUPPORTED_1000baseT_Half |
+                       cmd->base.port = PORT_FIBRE;
+                       supported = (SUPPORTED_1000baseT_Half |
                                SUPPORTED_1000baseT_Full |
                                SUPPORTED_FIBRE | SUPPORTED_Autoneg |
                                SUPPORTED_Pause | SUPPORTED_Asym_Pause);
-                       cmd->advertising = cmd->supported;
-                       cmd->transceiver = XCVR_INTERNAL;
+                       advertising = supported;
                        if (gp->lstate == link_up)
-                               ethtool_cmd_speed_set(cmd, SPEED_1000);
-                       cmd->duplex = DUPLEX_FULL;
-                       cmd->autoneg = 1;
+                               cmd->base.speed = SPEED_1000;
+                       cmd->base.duplex = DUPLEX_FULL;
+                       cmd->base.autoneg = 1;
                }
        }
-       cmd->maxtxpkt = cmd->maxrxpkt = 0;
+
+       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+                                               supported);
+       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+                                               advertising);
 
        return 0;
 }
 
-static int gem_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int gem_set_link_ksettings(struct net_device *dev,
+                                 const struct ethtool_link_ksettings *cmd)
 {
        struct gem *gp = netdev_priv(dev);
-       u32 speed = ethtool_cmd_speed(cmd);
+       u32 speed = cmd->base.speed;
+       u32 advertising;
+
+       ethtool_convert_link_mode_to_legacy_u32(&advertising,
+                                               cmd->link_modes.advertising);
 
        /* Verify the settings we care about. */
-       if (cmd->autoneg != AUTONEG_ENABLE &&
-           cmd->autoneg != AUTONEG_DISABLE)
+       if (cmd->base.autoneg != AUTONEG_ENABLE &&
+           cmd->base.autoneg != AUTONEG_DISABLE)
                return -EINVAL;
 
-       if (cmd->autoneg == AUTONEG_ENABLE &&
-           cmd->advertising == 0)
+       if (cmd->base.autoneg == AUTONEG_ENABLE &&
+           advertising == 0)
                return -EINVAL;
 
-       if (cmd->autoneg == AUTONEG_DISABLE &&
+       if (cmd->base.autoneg == AUTONEG_DISABLE &&
            ((speed != SPEED_1000 &&
              speed != SPEED_100 &&
              speed != SPEED_10) ||
-            (cmd->duplex != DUPLEX_HALF &&
-             cmd->duplex != DUPLEX_FULL)))
+            (cmd->base.duplex != DUPLEX_HALF &&
+             cmd->base.duplex != DUPLEX_FULL)))
                return -EINVAL;
 
        /* Apply settings and restart link process. */
@@ -2666,13 +2683,13 @@ static int gem_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 static const struct ethtool_ops gem_ethtool_ops = {
        .get_drvinfo            = gem_get_drvinfo,
        .get_link               = ethtool_op_get_link,
-       .get_settings           = gem_get_settings,
-       .set_settings           = gem_set_settings,
        .nway_reset             = gem_nway_reset,
        .get_msglevel           = gem_get_msglevel,
        .set_msglevel           = gem_set_msglevel,
        .get_wol                = gem_get_wol,
        .set_wol                = gem_set_wol,
+       .get_link_ksettings     = gem_get_link_ksettings,
+       .set_link_ksettings     = gem_set_link_ksettings,
 };
 
 static int gem_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
index 72ff05cd3ed80c883d2bc34a969e8bdb1b61992a..53ff66ef53acfec77caafd90163361d80502f735 100644 (file)
@@ -1294,9 +1294,10 @@ static void happy_meal_init_rings(struct happy_meal *hp)
 }
 
 /* hp->happy_lock must be held */
-static void happy_meal_begin_auto_negotiation(struct happy_meal *hp,
-                                             void __iomem *tregs,
-                                             struct ethtool_cmd *ep)
+static void
+happy_meal_begin_auto_negotiation(struct happy_meal *hp,
+                                 void __iomem *tregs,
+                                 const struct ethtool_link_ksettings *ep)
 {
        int timeout;
 
@@ -1309,7 +1310,7 @@ static void happy_meal_begin_auto_negotiation(struct happy_meal *hp,
        /* XXX Check BMSR_ANEGCAPABLE, should not be necessary though. */
 
        hp->sw_advertise = happy_meal_tcvr_read(hp, tregs, MII_ADVERTISE);
-       if (ep == NULL || ep->autoneg == AUTONEG_ENABLE) {
+       if (!ep || ep->base.autoneg == AUTONEG_ENABLE) {
                /* Advertise everything we can support. */
                if (hp->sw_bmsr & BMSR_10HALF)
                        hp->sw_advertise |= (ADVERTISE_10HALF);
@@ -1384,14 +1385,14 @@ force_link:
                /* Disable auto-negotiation in BMCR, enable the duplex and
                 * speed setting, init the timer state machine, and fire it off.
                 */
-               if (ep == NULL || ep->autoneg == AUTONEG_ENABLE) {
+               if (!ep || ep->base.autoneg == AUTONEG_ENABLE) {
                        hp->sw_bmcr = BMCR_SPEED100;
                } else {
-                       if (ethtool_cmd_speed(ep) == SPEED_100)
+                       if (ep->base.speed == SPEED_100)
                                hp->sw_bmcr = BMCR_SPEED100;
                        else
                                hp->sw_bmcr = 0;
-                       if (ep->duplex == DUPLEX_FULL)
+                       if (ep->base.duplex == DUPLEX_FULL)
                                hp->sw_bmcr |= BMCR_FULLDPLX;
                }
                happy_meal_tcvr_write(hp, tregs, MII_BMCR, hp->sw_bmcr);
@@ -2434,20 +2435,21 @@ static void happy_meal_set_multicast(struct net_device *dev)
 }
 
 /* Ethtool support... */
-static int hme_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int hme_get_link_ksettings(struct net_device *dev,
+                                 struct ethtool_link_ksettings *cmd)
 {
        struct happy_meal *hp = netdev_priv(dev);
        u32 speed;
+       u32 supported;
 
-       cmd->supported =
+       supported =
                (SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full |
                 SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full |
                 SUPPORTED_Autoneg | SUPPORTED_TP | SUPPORTED_MII);
 
        /* XXX hardcoded stuff for now */
-       cmd->port = PORT_TP; /* XXX no MII support */
-       cmd->transceiver = XCVR_INTERNAL; /* XXX no external xcvr support */
-       cmd->phy_address = 0; /* XXX fixed PHYAD */
+       cmd->base.port = PORT_TP; /* XXX no MII support */
+       cmd->base.phy_address = 0; /* XXX fixed PHYAD */
 
        /* Record PHY settings. */
        spin_lock_irq(&hp->happy_lock);
@@ -2456,41 +2458,45 @@ static int hme_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
        spin_unlock_irq(&hp->happy_lock);
 
        if (hp->sw_bmcr & BMCR_ANENABLE) {
-               cmd->autoneg = AUTONEG_ENABLE;
+               cmd->base.autoneg = AUTONEG_ENABLE;
                speed = ((hp->sw_lpa & (LPA_100HALF | LPA_100FULL)) ?
                         SPEED_100 : SPEED_10);
                if (speed == SPEED_100)
-                       cmd->duplex =
+                       cmd->base.duplex =
                                (hp->sw_lpa & (LPA_100FULL)) ?
                                DUPLEX_FULL : DUPLEX_HALF;
                else
-                       cmd->duplex =
+                       cmd->base.duplex =
                                (hp->sw_lpa & (LPA_10FULL)) ?
                                DUPLEX_FULL : DUPLEX_HALF;
        } else {
-               cmd->autoneg = AUTONEG_DISABLE;
+               cmd->base.autoneg = AUTONEG_DISABLE;
                speed = (hp->sw_bmcr & BMCR_SPEED100) ? SPEED_100 : SPEED_10;
-               cmd->duplex =
+               cmd->base.duplex =
                        (hp->sw_bmcr & BMCR_FULLDPLX) ?
                        DUPLEX_FULL : DUPLEX_HALF;
        }
-       ethtool_cmd_speed_set(cmd, speed);
+       cmd->base.speed = speed;
+       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+                                               supported);
+
        return 0;
 }
 
-static int hme_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int hme_set_link_ksettings(struct net_device *dev,
+                                 const struct ethtool_link_ksettings *cmd)
 {
        struct happy_meal *hp = netdev_priv(dev);
 
        /* Verify the settings we care about. */
-       if (cmd->autoneg != AUTONEG_ENABLE &&
-           cmd->autoneg != AUTONEG_DISABLE)
+       if (cmd->base.autoneg != AUTONEG_ENABLE &&
+           cmd->base.autoneg != AUTONEG_DISABLE)
                return -EINVAL;
-       if (cmd->autoneg == AUTONEG_DISABLE &&
-           ((ethtool_cmd_speed(cmd) != SPEED_100 &&
-             ethtool_cmd_speed(cmd) != SPEED_10) ||
-            (cmd->duplex != DUPLEX_HALF &&
-             cmd->duplex != DUPLEX_FULL)))
+       if (cmd->base.autoneg == AUTONEG_DISABLE &&
+           ((cmd->base.speed != SPEED_100 &&
+             cmd->base.speed != SPEED_10) ||
+            (cmd->base.duplex != DUPLEX_HALF &&
+             cmd->base.duplex != DUPLEX_FULL)))
                return -EINVAL;
 
        /* Ok, do it to it. */
@@ -2537,10 +2543,10 @@ static u32 hme_get_link(struct net_device *dev)
 }
 
 static const struct ethtool_ops hme_ethtool_ops = {
-       .get_settings           = hme_get_settings,
-       .set_settings           = hme_set_settings,
        .get_drvinfo            = hme_get_drvinfo,
        .get_link               = hme_get_link,
+       .get_link_ksettings     = hme_get_link_ksettings,
+       .set_link_ksettings     = hme_set_link_ksettings,
 };
 
 static int hme_version_printed;
index 4cc2571f71c6b65a076071789cef36f537bc0ddd..0b95105f706007ae9e0ff4325ea9983f9d514ee4 100644 (file)
@@ -1,7 +1,7 @@
 /* sunvnet.c: Sun LDOM Virtual Network Driver.
  *
  * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
- * Copyright (C) 2016 Oracle. All rights reserved.
+ * Copyright (C) 2016-2017 Oracle. All rights reserved.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -77,11 +77,125 @@ static void vnet_set_msglevel(struct net_device *dev, u32 value)
        vp->msg_enable = value;
 }
 
+static const struct {
+       const char string[ETH_GSTRING_LEN];
+} ethtool_stats_keys[] = {
+       { "rx_packets" },
+       { "tx_packets" },
+       { "rx_bytes" },
+       { "tx_bytes" },
+       { "rx_errors" },
+       { "tx_errors" },
+       { "rx_dropped" },
+       { "tx_dropped" },
+       { "multicast" },
+       { "rx_length_errors" },
+       { "rx_frame_errors" },
+       { "rx_missed_errors" },
+       { "tx_carrier_errors" },
+       { "nports" },
+};
+
+static int vnet_get_sset_count(struct net_device *dev, int sset)
+{
+       struct vnet *vp = (struct vnet *)netdev_priv(dev);
+
+       switch (sset) {
+       case ETH_SS_STATS:
+               return ARRAY_SIZE(ethtool_stats_keys)
+                       + (NUM_VNET_PORT_STATS * vp->nports);
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
+static void vnet_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
+{
+       struct vnet *vp = (struct vnet *)netdev_priv(dev);
+       struct vnet_port *port;
+       char *p = (char *)buf;
+
+       switch (stringset) {
+       case ETH_SS_STATS:
+               memcpy(buf, &ethtool_stats_keys, sizeof(ethtool_stats_keys));
+               p += sizeof(ethtool_stats_keys);
+
+               rcu_read_lock();
+               list_for_each_entry_rcu(port, &vp->port_list, list) {
+                       snprintf(p, ETH_GSTRING_LEN, "p%u.%s-%pM",
+                                port->q_index, port->switch_port ? "s" : "q",
+                                port->raddr);
+                       p += ETH_GSTRING_LEN;
+                       snprintf(p, ETH_GSTRING_LEN, "p%u.rx_packets",
+                                port->q_index);
+                       p += ETH_GSTRING_LEN;
+                       snprintf(p, ETH_GSTRING_LEN, "p%u.tx_packets",
+                                port->q_index);
+                       p += ETH_GSTRING_LEN;
+                       snprintf(p, ETH_GSTRING_LEN, "p%u.rx_bytes",
+                                port->q_index);
+                       p += ETH_GSTRING_LEN;
+                       snprintf(p, ETH_GSTRING_LEN, "p%u.tx_bytes",
+                                port->q_index);
+                       p += ETH_GSTRING_LEN;
+                       snprintf(p, ETH_GSTRING_LEN, "p%u.event_up",
+                                port->q_index);
+                       p += ETH_GSTRING_LEN;
+                       snprintf(p, ETH_GSTRING_LEN, "p%u.event_reset",
+                                port->q_index);
+                       p += ETH_GSTRING_LEN;
+               }
+               rcu_read_unlock();
+               break;
+       default:
+               WARN_ON(1);
+               break;
+       }
+}
+
+static void vnet_get_ethtool_stats(struct net_device *dev,
+                                  struct ethtool_stats *estats, u64 *data)
+{
+       struct vnet *vp = (struct vnet *)netdev_priv(dev);
+       struct vnet_port *port;
+       int i = 0;
+
+       data[i++] = dev->stats.rx_packets;
+       data[i++] = dev->stats.tx_packets;
+       data[i++] = dev->stats.rx_bytes;
+       data[i++] = dev->stats.tx_bytes;
+       data[i++] = dev->stats.rx_errors;
+       data[i++] = dev->stats.tx_errors;
+       data[i++] = dev->stats.rx_dropped;
+       data[i++] = dev->stats.tx_dropped;
+       data[i++] = dev->stats.multicast;
+       data[i++] = dev->stats.rx_length_errors;
+       data[i++] = dev->stats.rx_frame_errors;
+       data[i++] = dev->stats.rx_missed_errors;
+       data[i++] = dev->stats.tx_carrier_errors;
+       data[i++] = vp->nports;
+
+       rcu_read_lock();
+       list_for_each_entry_rcu(port, &vp->port_list, list) {
+               data[i++] = port->q_index;
+               data[i++] = port->stats.rx_packets;
+               data[i++] = port->stats.tx_packets;
+               data[i++] = port->stats.rx_bytes;
+               data[i++] = port->stats.tx_bytes;
+               data[i++] = port->stats.event_up;
+               data[i++] = port->stats.event_reset;
+       }
+       rcu_read_unlock();
+}
+
 static const struct ethtool_ops vnet_ethtool_ops = {
        .get_drvinfo            = vnet_get_drvinfo,
        .get_msglevel           = vnet_get_msglevel,
        .set_msglevel           = vnet_set_msglevel,
        .get_link               = ethtool_op_get_link,
+       .get_sset_count         = vnet_get_sset_count,
+       .get_strings            = vnet_get_strings,
+       .get_ethtool_stats      = vnet_get_ethtool_stats,
 };
 
 static LIST_HEAD(vnet_list);
index fa2d11ca9b81e49d84a0b66dbe262cd3977c0560..9e86833249d48beed95fe8c4d19774582f29dc75 100644 (file)
@@ -1,7 +1,7 @@
 /* sunvnet.c: Sun LDOM Virtual Network Driver.
  *
  * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
- * Copyright (C) 2016 Oracle. All rights reserved.
+ * Copyright (C) 2016-2017 Oracle. All rights reserved.
  */
 
 #include <linux/module.h>
@@ -43,7 +43,6 @@ MODULE_LICENSE("GPL");
 MODULE_VERSION("1.1");
 
 static int __vnet_tx_trigger(struct vnet_port *port, u32 start);
-static void vnet_port_reset(struct vnet_port *port);
 
 static inline u32 vnet_tx_dring_avail(struct vio_dring_state *dr)
 {
@@ -410,8 +409,12 @@ static int vnet_rx_one(struct vnet_port *port, struct vio_net_desc *desc)
 
        skb->ip_summed = port->switch_port ? CHECKSUM_NONE : CHECKSUM_PARTIAL;
 
+       if (unlikely(is_multicast_ether_addr(eth_hdr(skb)->h_dest)))
+               dev->stats.multicast++;
        dev->stats.rx_packets++;
        dev->stats.rx_bytes += len;
+       port->stats.rx_packets++;
+       port->stats.rx_bytes += len;
        napi_gro_receive(&port->napi, skb);
        return 0;
 
@@ -747,6 +750,13 @@ static int vnet_event_napi(struct vnet_port *port, int budget)
 
        /* RESET takes precedent over any other event */
        if (port->rx_event & LDC_EVENT_RESET) {
+               /* a link went down */
+
+               if (port->vsw == 1) {
+                       netif_tx_stop_all_queues(dev);
+                       netif_carrier_off(dev);
+               }
+
                vio_link_state_change(vio, LDC_EVENT_RESET);
                vnet_port_reset(port);
                vio_port_up(vio);
@@ -762,12 +772,21 @@ static int vnet_event_napi(struct vnet_port *port, int budget)
                        maybe_tx_wakeup(port);
 
                port->rx_event = 0;
+               port->stats.event_reset++;
                return 0;
        }
 
        if (port->rx_event & LDC_EVENT_UP) {
+               /* a link came up */
+
+               if (port->vsw == 1) {
+                       netif_carrier_on(port->dev);
+                       netif_tx_start_all_queues(port->dev);
+               }
+
                vio_link_state_change(vio, LDC_EVENT_UP);
                port->rx_event = 0;
+               port->stats.event_up++;
                return 0;
        }
 
@@ -1417,6 +1436,8 @@ ldc_start_done:
 
        dev->stats.tx_packets++;
        dev->stats.tx_bytes += port->tx_bufs[txi].skb->len;
+       port->stats.tx_packets++;
+       port->stats.tx_bytes += port->tx_bufs[txi].skb->len;
 
        dr->prod = (dr->prod + 1) & (VNET_TX_RING_SIZE - 1);
        if (unlikely(vnet_tx_dring_avail(dr) < 1)) {
@@ -1631,7 +1652,7 @@ void sunvnet_port_free_tx_bufs_common(struct vnet_port *port)
 }
 EXPORT_SYMBOL_GPL(sunvnet_port_free_tx_bufs_common);
 
-static void vnet_port_reset(struct vnet_port *port)
+void vnet_port_reset(struct vnet_port *port)
 {
        del_timer(&port->clean_timer);
        sunvnet_port_free_tx_bufs_common(port);
@@ -1639,6 +1660,7 @@ static void vnet_port_reset(struct vnet_port *port)
        port->tso = (port->vsw == 0);  /* no tso in vsw, misbehaves in bridge */
        port->tsolen = 0;
 }
+EXPORT_SYMBOL_GPL(vnet_port_reset);
 
 static int vnet_port_alloc_tx_ring(struct vnet_port *port)
 {
@@ -1708,20 +1730,32 @@ EXPORT_SYMBOL_GPL(sunvnet_poll_controller_common);
 void sunvnet_port_add_txq_common(struct vnet_port *port)
 {
        struct vnet *vp = port->vp;
-       int n;
+       int smallest = 0;
+       int i;
+
+       /* find the first least-used q
+        * When there are more ldoms than q's, we start to
+        * double up on ports per queue.
+        */
+       for (i = 0; i < VNET_MAX_TXQS; i++) {
+               if (vp->q_used[i] == 0) {
+                       smallest = i;
+                       break;
+               }
+               if (vp->q_used[i] < vp->q_used[smallest])
+                       smallest = i;
+       }
 
-       n = vp->nports++;
-       n = n & (VNET_MAX_TXQS - 1);
-       port->q_index = n;
-       netif_tx_wake_queue(netdev_get_tx_queue(VNET_PORT_TO_NET_DEVICE(port),
-                                               port->q_index));
+       vp->nports++;
+       vp->q_used[smallest]++;
+       port->q_index = smallest;
 }
 EXPORT_SYMBOL_GPL(sunvnet_port_add_txq_common);
 
 void sunvnet_port_rm_txq_common(struct vnet_port *port)
 {
        port->vp->nports--;
-       netif_tx_stop_queue(netdev_get_tx_queue(VNET_PORT_TO_NET_DEVICE(port),
-                                               port->q_index));
+       port->vp->q_used[port->q_index]--;
+       port->q_index = 0;
 }
 EXPORT_SYMBOL_GPL(sunvnet_port_rm_txq_common);
index ce5c824128a3698acb31ddbac42555657863b10f..b20d6fa7ef25b798401e24a59398c893a026c462 100644 (file)
@@ -35,6 +35,19 @@ struct vnet_tx_entry {
 
 struct vnet;
 
+struct vnet_port_stats {
+       /* keep them all the same size */
+       u32 rx_bytes;
+       u32 tx_bytes;
+       u32 rx_packets;
+       u32 tx_packets;
+       u32 event_up;
+       u32 event_reset;
+       u32 q_placeholder;
+};
+
+#define NUM_VNET_PORT_STATS  (sizeof(struct vnet_port_stats) / sizeof(u32))
+
 /* Structure to describe a vnet-port or vsw-port in the MD.
  * If the vsw bit is set, this structure represents a vswitch
  * port, and the net_device can be found from ->dev. If the
@@ -44,6 +57,8 @@ struct vnet;
 struct vnet_port {
        struct vio_driver_state vio;
 
+       struct vnet_port_stats stats;
+
        struct hlist_node       hash;
        u8                      raddr[ETH_ALEN];
        unsigned                switch_port:1;
@@ -97,22 +112,15 @@ struct vnet_mcast_entry {
 };
 
 struct vnet {
-       /* Protects port_list and port_hash.  */
-       spinlock_t              lock;
-
+       spinlock_t              lock; /* Protects port_list and port_hash.  */
        struct net_device       *dev;
-
        u32                     msg_enable;
-
+       u8                      q_used[VNET_MAX_TXQS];
        struct list_head        port_list;
-
        struct hlist_head       port_hash[VNET_PORT_HASH_SIZE];
-
        struct vnet_mcast_entry *mcast_list;
-
        struct list_head        list;
        u64                     local_mac;
-
        int                     nports;
 };
 
@@ -139,6 +147,7 @@ int sunvnet_handle_attr_common(struct vio_driver_state *vio, void *arg);
 void sunvnet_handshake_complete_common(struct vio_driver_state *vio);
 int sunvnet_poll_common(struct napi_struct *napi, int budget);
 void sunvnet_port_free_tx_bufs_common(struct vnet_port *port);
+void vnet_port_reset(struct vnet_port *port);
 bool sunvnet_port_is_up_common(struct vnet_port *vnet);
 void sunvnet_port_add_txq_common(struct vnet_port *port);
 void sunvnet_port_rm_txq_common(struct vnet_port *port);
diff --git a/drivers/net/ethernet/synopsys/Kconfig b/drivers/net/ethernet/synopsys/Kconfig
new file mode 100644 (file)
index 0000000..a950388
--- /dev/null
@@ -0,0 +1,41 @@
+#
+# Synopsys network device configuration
+#
+
+config NET_VENDOR_SYNOPSYS
+       bool "Synopsys devices"
+       default y
+       ---help---
+         If you have a network (Ethernet) device belonging to this class, say Y.
+
+         Note that the answer to this question doesn't directly affect the
+         kernel: saying N will just cause the configurator to skip all
+         the questions about Synopsys devices. If you say Y, you will be asked
+         for your specific device in the following questions.
+
+if NET_VENDOR_SYNOPSYS
+
+config DWC_XLGMAC
+       tristate "Synopsys DWC Enterprise Ethernet (XLGMAC) driver support"
+       depends on HAS_IOMEM && HAS_DMA
+       select BITREVERSE
+       select CRC32
+       ---help---
+         This driver supports the Synopsys DesignWare Cores Enterprise
+         Ethernet (dwc-xlgmac).
+
+if DWC_XLGMAC
+
+config DWC_XLGMAC_PCI
+       tristate "XLGMAC PCI bus support"
+       depends on DWC_XLGMAC && PCI
+       ---help---
+         This selects the pci bus support for the dwc-xlgmac driver.
+         This driver was tested on Synopsys XLGMAC IP Prototyping Kit.
+
+         If you have a controller with this interface, say Y or M here.
+         If unsure, say N.
+
+endif # DWC_XLGMAC
+
+endif # NET_VENDOR_SYNOPSYS
diff --git a/drivers/net/ethernet/synopsys/Makefile b/drivers/net/ethernet/synopsys/Makefile
new file mode 100644 (file)
index 0000000..c06e2eb
--- /dev/null
@@ -0,0 +1,9 @@
+#
+# Makefile for the Synopsys network device drivers.
+#
+
+obj-$(CONFIG_DWC_XLGMAC) += dwc-xlgmac.o
+dwc-xlgmac-objs := dwc-xlgmac-net.o dwc-xlgmac-desc.o \
+                  dwc-xlgmac-hw.o dwc-xlgmac-common.o
+
+dwc-xlgmac-$(CONFIG_DWC_XLGMAC_PCI) += dwc-xlgmac-pci.o
diff --git a/drivers/net/ethernet/synopsys/dwc-xlgmac-common.c b/drivers/net/ethernet/synopsys/dwc-xlgmac-common.c
new file mode 100644 (file)
index 0000000..07def2b
--- /dev/null
@@ -0,0 +1,736 @@
+/* Synopsys DesignWare Core Enterprise Ethernet (XLGMAC) Driver
+ *
+ * Copyright (c) 2017 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is dual-licensed; you may select either version 2 of
+ * the GNU General Public License ("GPL") or BSD license ("BSD").
+ *
+ * This Synopsys DWC XLGMAC software driver and associated documentation
+ * (hereinafter the "Software") is an unsupported proprietary work of
+ * Synopsys, Inc. unless otherwise expressly agreed to in writing between
+ * Synopsys and you. The Software IS NOT an item of Licensed Software or a
+ * Licensed Product under any End User Software License Agreement or
+ * Agreement for Licensed Products with Synopsys or any supplement thereto.
+ * Synopsys is a registered trademark of Synopsys, Inc. Other names included
+ * in the SOFTWARE may be the trademarks of their respective owners.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include "dwc-xlgmac.h"
+#include "dwc-xlgmac-reg.h"
+
+MODULE_LICENSE("Dual BSD/GPL");
+
+static int debug = -1;
+module_param(debug, int, 0644);
+MODULE_PARM_DESC(debug, "DWC ethernet debug level (0=none,...,16=all)");
+static const u32 default_msg_level = (NETIF_MSG_LINK | NETIF_MSG_IFDOWN |
+                                     NETIF_MSG_IFUP);
+
+static unsigned char dev_addr[6] = {0, 0x55, 0x7b, 0xb5, 0x7d, 0xf7};
+
+static void xlgmac_read_mac_addr(struct xlgmac_pdata *pdata)
+{
+       struct net_device *netdev = pdata->netdev;
+
+       /* Currently it uses a static mac address for test */
+       memcpy(pdata->mac_addr, dev_addr, netdev->addr_len);
+}
+
+static void xlgmac_default_config(struct xlgmac_pdata *pdata)
+{
+       pdata->tx_osp_mode = DMA_OSP_ENABLE;
+       pdata->tx_sf_mode = MTL_TSF_ENABLE;
+       pdata->rx_sf_mode = MTL_RSF_DISABLE;
+       pdata->pblx8 = DMA_PBL_X8_ENABLE;
+       pdata->tx_pbl = DMA_PBL_32;
+       pdata->rx_pbl = DMA_PBL_32;
+       pdata->tx_threshold = MTL_TX_THRESHOLD_128;
+       pdata->rx_threshold = MTL_RX_THRESHOLD_128;
+       pdata->tx_pause = 1;
+       pdata->rx_pause = 1;
+       pdata->phy_speed = SPEED_25000;
+       pdata->sysclk_rate = XLGMAC_SYSCLOCK;
+
+       strlcpy(pdata->drv_name, XLGMAC_DRV_NAME, sizeof(pdata->drv_name));
+       strlcpy(pdata->drv_ver, XLGMAC_DRV_VERSION, sizeof(pdata->drv_ver));
+}
+
+static void xlgmac_init_all_ops(struct xlgmac_pdata *pdata)
+{
+       xlgmac_init_desc_ops(&pdata->desc_ops);
+       xlgmac_init_hw_ops(&pdata->hw_ops);
+}
+
+static int xlgmac_init(struct xlgmac_pdata *pdata)
+{
+       struct xlgmac_hw_ops *hw_ops = &pdata->hw_ops;
+       struct net_device *netdev = pdata->netdev;
+       unsigned int i;
+       int ret;
+
+       /* Set default configuration data */
+       xlgmac_default_config(pdata);
+
+       /* Set irq, base_addr, MAC address, */
+       netdev->irq = pdata->dev_irq;
+       netdev->base_addr = (unsigned long)pdata->mac_regs;
+       xlgmac_read_mac_addr(pdata);
+       memcpy(netdev->dev_addr, pdata->mac_addr, netdev->addr_len);
+
+       /* Set all the function pointers */
+       xlgmac_init_all_ops(pdata);
+
+       /* Issue software reset to device */
+       hw_ops->exit(pdata);
+
+       /* Populate the hardware features */
+       xlgmac_get_all_hw_features(pdata);
+       xlgmac_print_all_hw_features(pdata);
+
+       /* TODO: Set the PHY mode to XLGMII */
+
+       /* Set the DMA mask */
+       ret = dma_set_mask_and_coherent(pdata->dev,
+                                       DMA_BIT_MASK(pdata->hw_feat.dma_width));
+       if (ret) {
+               dev_err(pdata->dev, "dma_set_mask_and_coherent failed\n");
+               return ret;
+       }
+
+       /* Channel and ring params initializtion
+        *  pdata->channel_count;
+        *  pdata->tx_ring_count;
+        *  pdata->rx_ring_count;
+        *  pdata->tx_desc_count;
+        *  pdata->rx_desc_count;
+        */
+       BUILD_BUG_ON_NOT_POWER_OF_2(XLGMAC_TX_DESC_CNT);
+       pdata->tx_desc_count = XLGMAC_TX_DESC_CNT;
+       if (pdata->tx_desc_count & (pdata->tx_desc_count - 1)) {
+               dev_err(pdata->dev, "tx descriptor count (%d) is not valid\n",
+                       pdata->tx_desc_count);
+               ret = -EINVAL;
+               return ret;
+       }
+       BUILD_BUG_ON_NOT_POWER_OF_2(XLGMAC_RX_DESC_CNT);
+       pdata->rx_desc_count = XLGMAC_RX_DESC_CNT;
+       if (pdata->rx_desc_count & (pdata->rx_desc_count - 1)) {
+               dev_err(pdata->dev, "rx descriptor count (%d) is not valid\n",
+                       pdata->rx_desc_count);
+               ret = -EINVAL;
+               return ret;
+       }
+
+       pdata->tx_ring_count = min_t(unsigned int, num_online_cpus(),
+                                    pdata->hw_feat.tx_ch_cnt);
+       pdata->tx_ring_count = min_t(unsigned int, pdata->tx_ring_count,
+                                    pdata->hw_feat.tx_q_cnt);
+       pdata->tx_q_count = pdata->tx_ring_count;
+       ret = netif_set_real_num_tx_queues(netdev, pdata->tx_q_count);
+       if (ret) {
+               dev_err(pdata->dev, "error setting real tx queue count\n");
+               return ret;
+       }
+
+       pdata->rx_ring_count = min_t(unsigned int,
+                                    netif_get_num_default_rss_queues(),
+                                    pdata->hw_feat.rx_ch_cnt);
+       pdata->rx_ring_count = min_t(unsigned int, pdata->rx_ring_count,
+                                    pdata->hw_feat.rx_q_cnt);
+       pdata->rx_q_count = pdata->rx_ring_count;
+       ret = netif_set_real_num_rx_queues(netdev, pdata->rx_q_count);
+       if (ret) {
+               dev_err(pdata->dev, "error setting real rx queue count\n");
+               return ret;
+       }
+
+       pdata->channel_count =
+               max_t(unsigned int, pdata->tx_ring_count, pdata->rx_ring_count);
+
+       /* Initialize RSS hash key and lookup table */
+       netdev_rss_key_fill(pdata->rss_key, sizeof(pdata->rss_key));
+
+       for (i = 0; i < XLGMAC_RSS_MAX_TABLE_SIZE; i++)
+               pdata->rss_table[i] = XLGMAC_SET_REG_BITS(
+                                       pdata->rss_table[i],
+                                       MAC_RSSDR_DMCH_POS,
+                                       MAC_RSSDR_DMCH_LEN,
+                                       i % pdata->rx_ring_count);
+
+       pdata->rss_options = XLGMAC_SET_REG_BITS(
+                               pdata->rss_options,
+                               MAC_RSSCR_IP2TE_POS,
+                               MAC_RSSCR_IP2TE_LEN, 1);
+       pdata->rss_options = XLGMAC_SET_REG_BITS(
+                               pdata->rss_options,
+                               MAC_RSSCR_TCP4TE_POS,
+                               MAC_RSSCR_TCP4TE_LEN, 1);
+       pdata->rss_options = XLGMAC_SET_REG_BITS(
+                               pdata->rss_options,
+                               MAC_RSSCR_UDP4TE_POS,
+                               MAC_RSSCR_UDP4TE_LEN, 1);
+
+       /* Set device operations */
+       netdev->netdev_ops = xlgmac_get_netdev_ops();
+
+       /* Set device features */
+       if (pdata->hw_feat.tso) {
+               netdev->hw_features = NETIF_F_TSO;
+               netdev->hw_features |= NETIF_F_TSO6;
+               netdev->hw_features |= NETIF_F_SG;
+               netdev->hw_features |= NETIF_F_IP_CSUM;
+               netdev->hw_features |= NETIF_F_IPV6_CSUM;
+       } else if (pdata->hw_feat.tx_coe) {
+               netdev->hw_features = NETIF_F_IP_CSUM;
+               netdev->hw_features |= NETIF_F_IPV6_CSUM;
+       }
+
+       if (pdata->hw_feat.rx_coe) {
+               netdev->hw_features |= NETIF_F_RXCSUM;
+               netdev->hw_features |= NETIF_F_GRO;
+       }
+
+       if (pdata->hw_feat.rss)
+               netdev->hw_features |= NETIF_F_RXHASH;
+
+       netdev->vlan_features |= netdev->hw_features;
+
+       netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX;
+       if (pdata->hw_feat.sa_vlan_ins)
+               netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX;
+       if (pdata->hw_feat.vlhash)
+               netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+
+       netdev->features |= netdev->hw_features;
+       pdata->netdev_features = netdev->features;
+
+       netdev->priv_flags |= IFF_UNICAST_FLT;
+
+       /* Use default watchdog timeout */
+       netdev->watchdog_timeo = 0;
+
+       /* Tx coalesce parameters initialization */
+       pdata->tx_usecs = XLGMAC_INIT_DMA_TX_USECS;
+       pdata->tx_frames = XLGMAC_INIT_DMA_TX_FRAMES;
+
+       /* Rx coalesce parameters initialization */
+       pdata->rx_riwt = hw_ops->usec_to_riwt(pdata, XLGMAC_INIT_DMA_RX_USECS);
+       pdata->rx_usecs = XLGMAC_INIT_DMA_RX_USECS;
+       pdata->rx_frames = XLGMAC_INIT_DMA_RX_FRAMES;
+
+       return 0;
+}
+
+int xlgmac_drv_probe(struct device *dev, struct xlgmac_resources *res)
+{
+       struct xlgmac_pdata *pdata;
+       struct net_device *netdev;
+       int ret;
+
+       netdev = alloc_etherdev_mq(sizeof(struct xlgmac_pdata),
+                                  XLGMAC_MAX_DMA_CHANNELS);
+
+       if (!netdev) {
+               dev_err(dev, "alloc_etherdev failed\n");
+               return -ENOMEM;
+       }
+
+       SET_NETDEV_DEV(netdev, dev);
+       dev_set_drvdata(dev, netdev);
+       pdata = netdev_priv(netdev);
+       pdata->dev = dev;
+       pdata->netdev = netdev;
+
+       pdata->dev_irq = res->irq;
+       pdata->mac_regs = res->addr;
+
+       mutex_init(&pdata->rss_mutex);
+       pdata->msg_enable = netif_msg_init(debug, default_msg_level);
+
+       ret = xlgmac_init(pdata);
+       if (ret) {
+               dev_err(dev, "xlgmac init failed\n");
+               goto err_free_netdev;
+       }
+
+       ret = register_netdev(netdev);
+       if (ret) {
+               dev_err(dev, "net device registration failed\n");
+               goto err_free_netdev;
+       }
+
+       return 0;
+
+err_free_netdev:
+       free_netdev(netdev);
+
+       return ret;
+}
+
+int xlgmac_drv_remove(struct device *dev)
+{
+       struct net_device *netdev = dev_get_drvdata(dev);
+
+       unregister_netdev(netdev);
+       free_netdev(netdev);
+
+       return 0;
+}
+
+void xlgmac_dump_tx_desc(struct xlgmac_pdata *pdata,
+                        struct xlgmac_ring *ring,
+                        unsigned int idx,
+                        unsigned int count,
+                        unsigned int flag)
+{
+       struct xlgmac_desc_data *desc_data;
+       struct xlgmac_dma_desc *dma_desc;
+
+       while (count--) {
+               desc_data = XLGMAC_GET_DESC_DATA(ring, idx);
+               dma_desc = desc_data->dma_desc;
+
+               netdev_dbg(pdata->netdev, "TX: dma_desc=%p, dma_desc_addr=%pad\n",
+                          desc_data->dma_desc, &desc_data->dma_desc_addr);
+               netdev_dbg(pdata->netdev,
+                          "TX_NORMAL_DESC[%d %s] = %08x:%08x:%08x:%08x\n", idx,
+                          (flag == 1) ? "QUEUED FOR TX" : "TX BY DEVICE",
+                          le32_to_cpu(dma_desc->desc0),
+                          le32_to_cpu(dma_desc->desc1),
+                          le32_to_cpu(dma_desc->desc2),
+                          le32_to_cpu(dma_desc->desc3));
+
+               idx++;
+       }
+}
+
+void xlgmac_dump_rx_desc(struct xlgmac_pdata *pdata,
+                        struct xlgmac_ring *ring,
+                        unsigned int idx)
+{
+       struct xlgmac_desc_data *desc_data;
+       struct xlgmac_dma_desc *dma_desc;
+
+       desc_data = XLGMAC_GET_DESC_DATA(ring, idx);
+       dma_desc = desc_data->dma_desc;
+
+       netdev_dbg(pdata->netdev, "RX: dma_desc=%p, dma_desc_addr=%pad\n",
+                  desc_data->dma_desc, &desc_data->dma_desc_addr);
+       netdev_dbg(pdata->netdev,
+                  "RX_NORMAL_DESC[%d RX BY DEVICE] = %08x:%08x:%08x:%08x\n",
+                  idx,
+                  le32_to_cpu(dma_desc->desc0),
+                  le32_to_cpu(dma_desc->desc1),
+                  le32_to_cpu(dma_desc->desc2),
+                  le32_to_cpu(dma_desc->desc3));
+}
+
+void xlgmac_print_pkt(struct net_device *netdev,
+                     struct sk_buff *skb, bool tx_rx)
+{
+       struct ethhdr *eth = (struct ethhdr *)skb->data;
+       unsigned char *buf = skb->data;
+       unsigned char buffer[128];
+       unsigned int i, j;
+
+       netdev_dbg(netdev, "\n************** SKB dump ****************\n");
+
+       netdev_dbg(netdev, "%s packet of %d bytes\n",
+                  (tx_rx ? "TX" : "RX"), skb->len);
+
+       netdev_dbg(netdev, "Dst MAC addr: %pM\n", eth->h_dest);
+       netdev_dbg(netdev, "Src MAC addr: %pM\n", eth->h_source);
+       netdev_dbg(netdev, "Protocol: %#06hx\n", ntohs(eth->h_proto));
+
+       for (i = 0, j = 0; i < skb->len;) {
+               j += snprintf(buffer + j, sizeof(buffer) - j, "%02hhx",
+                             buf[i++]);
+
+               if ((i % 32) == 0) {
+                       netdev_dbg(netdev, "  %#06x: %s\n", i - 32, buffer);
+                       j = 0;
+               } else if ((i % 16) == 0) {
+                       buffer[j++] = ' ';
+                       buffer[j++] = ' ';
+               } else if ((i % 4) == 0) {
+                       buffer[j++] = ' ';
+               }
+       }
+       if (i % 32)
+               netdev_dbg(netdev, "  %#06x: %s\n", i - (i % 32), buffer);
+
+       netdev_dbg(netdev, "\n************** SKB dump ****************\n");
+}
+
+void xlgmac_get_all_hw_features(struct xlgmac_pdata *pdata)
+{
+       struct xlgmac_hw_features *hw_feat = &pdata->hw_feat;
+       unsigned int mac_hfr0, mac_hfr1, mac_hfr2;
+
+       mac_hfr0 = readl(pdata->mac_regs + MAC_HWF0R);
+       mac_hfr1 = readl(pdata->mac_regs + MAC_HWF1R);
+       mac_hfr2 = readl(pdata->mac_regs + MAC_HWF2R);
+
+       memset(hw_feat, 0, sizeof(*hw_feat));
+
+       hw_feat->version = readl(pdata->mac_regs + MAC_VR);
+
+       /* Hardware feature register 0 */
+       hw_feat->phyifsel    = XLGMAC_GET_REG_BITS(mac_hfr0,
+                                               MAC_HWF0R_PHYIFSEL_POS,
+                                               MAC_HWF0R_PHYIFSEL_LEN);
+       hw_feat->vlhash      = XLGMAC_GET_REG_BITS(mac_hfr0,
+                                               MAC_HWF0R_VLHASH_POS,
+                                               MAC_HWF0R_VLHASH_LEN);
+       hw_feat->sma         = XLGMAC_GET_REG_BITS(mac_hfr0,
+                                               MAC_HWF0R_SMASEL_POS,
+                                               MAC_HWF0R_SMASEL_LEN);
+       hw_feat->rwk         = XLGMAC_GET_REG_BITS(mac_hfr0,
+                                               MAC_HWF0R_RWKSEL_POS,
+                                               MAC_HWF0R_RWKSEL_LEN);
+       hw_feat->mgk         = XLGMAC_GET_REG_BITS(mac_hfr0,
+                                               MAC_HWF0R_MGKSEL_POS,
+                                               MAC_HWF0R_MGKSEL_LEN);
+       hw_feat->mmc         = XLGMAC_GET_REG_BITS(mac_hfr0,
+                                               MAC_HWF0R_MMCSEL_POS,
+                                               MAC_HWF0R_MMCSEL_LEN);
+       hw_feat->aoe         = XLGMAC_GET_REG_BITS(mac_hfr0,
+                                               MAC_HWF0R_ARPOFFSEL_POS,
+                                               MAC_HWF0R_ARPOFFSEL_LEN);
+       hw_feat->ts          = XLGMAC_GET_REG_BITS(mac_hfr0,
+                                               MAC_HWF0R_TSSEL_POS,
+                                               MAC_HWF0R_TSSEL_LEN);
+       hw_feat->eee         = XLGMAC_GET_REG_BITS(mac_hfr0,
+                                               MAC_HWF0R_EEESEL_POS,
+                                               MAC_HWF0R_EEESEL_LEN);
+       hw_feat->tx_coe      = XLGMAC_GET_REG_BITS(mac_hfr0,
+                                               MAC_HWF0R_TXCOESEL_POS,
+                                               MAC_HWF0R_TXCOESEL_LEN);
+       hw_feat->rx_coe      = XLGMAC_GET_REG_BITS(mac_hfr0,
+                                               MAC_HWF0R_RXCOESEL_POS,
+                                               MAC_HWF0R_RXCOESEL_LEN);
+       hw_feat->addn_mac    = XLGMAC_GET_REG_BITS(mac_hfr0,
+                                               MAC_HWF0R_ADDMACADRSEL_POS,
+                                               MAC_HWF0R_ADDMACADRSEL_LEN);
+       hw_feat->ts_src      = XLGMAC_GET_REG_BITS(mac_hfr0,
+                                               MAC_HWF0R_TSSTSSEL_POS,
+                                               MAC_HWF0R_TSSTSSEL_LEN);
+       hw_feat->sa_vlan_ins = XLGMAC_GET_REG_BITS(mac_hfr0,
+                                               MAC_HWF0R_SAVLANINS_POS,
+                                               MAC_HWF0R_SAVLANINS_LEN);
+
+       /* Hardware feature register 1 */
+       hw_feat->rx_fifo_size  = XLGMAC_GET_REG_BITS(mac_hfr1,
+                                               MAC_HWF1R_RXFIFOSIZE_POS,
+                                               MAC_HWF1R_RXFIFOSIZE_LEN);
+       hw_feat->tx_fifo_size  = XLGMAC_GET_REG_BITS(mac_hfr1,
+                                               MAC_HWF1R_TXFIFOSIZE_POS,
+                                               MAC_HWF1R_TXFIFOSIZE_LEN);
+       hw_feat->adv_ts_hi     = XLGMAC_GET_REG_BITS(mac_hfr1,
+                                               MAC_HWF1R_ADVTHWORD_POS,
+                                               MAC_HWF1R_ADVTHWORD_LEN);
+       hw_feat->dma_width     = XLGMAC_GET_REG_BITS(mac_hfr1,
+                                               MAC_HWF1R_ADDR64_POS,
+                                               MAC_HWF1R_ADDR64_LEN);
+       hw_feat->dcb           = XLGMAC_GET_REG_BITS(mac_hfr1,
+                                               MAC_HWF1R_DCBEN_POS,
+                                               MAC_HWF1R_DCBEN_LEN);
+       hw_feat->sph           = XLGMAC_GET_REG_BITS(mac_hfr1,
+                                               MAC_HWF1R_SPHEN_POS,
+                                               MAC_HWF1R_SPHEN_LEN);
+       hw_feat->tso           = XLGMAC_GET_REG_BITS(mac_hfr1,
+                                               MAC_HWF1R_TSOEN_POS,
+                                               MAC_HWF1R_TSOEN_LEN);
+       hw_feat->dma_debug     = XLGMAC_GET_REG_BITS(mac_hfr1,
+                                               MAC_HWF1R_DBGMEMA_POS,
+                                               MAC_HWF1R_DBGMEMA_LEN);
+       hw_feat->rss           = XLGMAC_GET_REG_BITS(mac_hfr1,
+                                               MAC_HWF1R_RSSEN_POS,
+                                               MAC_HWF1R_RSSEN_LEN);
+       hw_feat->tc_cnt        = XLGMAC_GET_REG_BITS(mac_hfr1,
+                                               MAC_HWF1R_NUMTC_POS,
+                                               MAC_HWF1R_NUMTC_LEN);
+       hw_feat->hash_table_size = XLGMAC_GET_REG_BITS(mac_hfr1,
+                                               MAC_HWF1R_HASHTBLSZ_POS,
+                                               MAC_HWF1R_HASHTBLSZ_LEN);
+       hw_feat->l3l4_filter_num = XLGMAC_GET_REG_BITS(mac_hfr1,
+                                               MAC_HWF1R_L3L4FNUM_POS,
+                                               MAC_HWF1R_L3L4FNUM_LEN);
+
+       /* Hardware feature register 2 */
+       hw_feat->rx_q_cnt     = XLGMAC_GET_REG_BITS(mac_hfr2,
+                                               MAC_HWF2R_RXQCNT_POS,
+                                               MAC_HWF2R_RXQCNT_LEN);
+       hw_feat->tx_q_cnt     = XLGMAC_GET_REG_BITS(mac_hfr2,
+                                               MAC_HWF2R_TXQCNT_POS,
+                                               MAC_HWF2R_TXQCNT_LEN);
+       hw_feat->rx_ch_cnt    = XLGMAC_GET_REG_BITS(mac_hfr2,
+                                               MAC_HWF2R_RXCHCNT_POS,
+                                               MAC_HWF2R_RXCHCNT_LEN);
+       hw_feat->tx_ch_cnt    = XLGMAC_GET_REG_BITS(mac_hfr2,
+                                               MAC_HWF2R_TXCHCNT_POS,
+                                               MAC_HWF2R_TXCHCNT_LEN);
+       hw_feat->pps_out_num  = XLGMAC_GET_REG_BITS(mac_hfr2,
+                                               MAC_HWF2R_PPSOUTNUM_POS,
+                                               MAC_HWF2R_PPSOUTNUM_LEN);
+       hw_feat->aux_snap_num = XLGMAC_GET_REG_BITS(mac_hfr2,
+                                               MAC_HWF2R_AUXSNAPNUM_POS,
+                                               MAC_HWF2R_AUXSNAPNUM_LEN);
+
+       /* Translate the Hash Table size into actual number */
+       switch (hw_feat->hash_table_size) {
+       case 0:
+               break;
+       case 1:
+               hw_feat->hash_table_size = 64;
+               break;
+       case 2:
+               hw_feat->hash_table_size = 128;
+               break;
+       case 3:
+               hw_feat->hash_table_size = 256;
+               break;
+       }
+
+       /* Translate the address width setting into actual number */
+       switch (hw_feat->dma_width) {
+       case 0:
+               hw_feat->dma_width = 32;
+               break;
+       case 1:
+               hw_feat->dma_width = 40;
+               break;
+       case 2:
+               hw_feat->dma_width = 48;
+               break;
+       default:
+               hw_feat->dma_width = 32;
+       }
+
+       /* The Queue, Channel and TC counts are zero based so increment them
+        * to get the actual number
+        */
+       hw_feat->rx_q_cnt++;
+       hw_feat->tx_q_cnt++;
+       hw_feat->rx_ch_cnt++;
+       hw_feat->tx_ch_cnt++;
+       hw_feat->tc_cnt++;
+}
+
+void xlgmac_print_all_hw_features(struct xlgmac_pdata *pdata)
+{
+       char *str = NULL;
+
+       XLGMAC_PR("\n");
+       XLGMAC_PR("=====================================================\n");
+       XLGMAC_PR("\n");
+       XLGMAC_PR("HW support following features\n");
+       XLGMAC_PR("\n");
+       /* HW Feature Register0 */
+       XLGMAC_PR("VLAN Hash Filter Selected                   : %s\n",
+                 pdata->hw_feat.vlhash ? "YES" : "NO");
+       XLGMAC_PR("SMA (MDIO) Interface                        : %s\n",
+                 pdata->hw_feat.sma ? "YES" : "NO");
+       XLGMAC_PR("PMT Remote Wake-up Packet Enable            : %s\n",
+                 pdata->hw_feat.rwk ? "YES" : "NO");
+       XLGMAC_PR("PMT Magic Packet Enable                     : %s\n",
+                 pdata->hw_feat.mgk ? "YES" : "NO");
+       XLGMAC_PR("RMON/MMC Module Enable                      : %s\n",
+                 pdata->hw_feat.mmc ? "YES" : "NO");
+       XLGMAC_PR("ARP Offload Enabled                         : %s\n",
+                 pdata->hw_feat.aoe ? "YES" : "NO");
+       XLGMAC_PR("IEEE 1588-2008 Timestamp Enabled            : %s\n",
+                 pdata->hw_feat.ts ? "YES" : "NO");
+       XLGMAC_PR("Energy Efficient Ethernet Enabled           : %s\n",
+                 pdata->hw_feat.eee ? "YES" : "NO");
+       XLGMAC_PR("Transmit Checksum Offload Enabled           : %s\n",
+                 pdata->hw_feat.tx_coe ? "YES" : "NO");
+       XLGMAC_PR("Receive Checksum Offload Enabled            : %s\n",
+                 pdata->hw_feat.rx_coe ? "YES" : "NO");
+       XLGMAC_PR("Additional MAC Addresses 1-31 Selected      : %s\n",
+                 pdata->hw_feat.addn_mac ? "YES" : "NO");
+
+       switch (pdata->hw_feat.ts_src) {
+       case 0:
+               str = "RESERVED";
+               break;
+       case 1:
+               str = "INTERNAL";
+               break;
+       case 2:
+               str = "EXTERNAL";
+               break;
+       case 3:
+               str = "BOTH";
+               break;
+       }
+       XLGMAC_PR("Timestamp System Time Source                : %s\n", str);
+
+       XLGMAC_PR("Source Address or VLAN Insertion Enable     : %s\n",
+                 pdata->hw_feat.sa_vlan_ins ? "YES" : "NO");
+
+       /* HW Feature Register1 */
+       switch (pdata->hw_feat.rx_fifo_size) {
+       case 0:
+               str = "128 bytes";
+               break;
+       case 1:
+               str = "256 bytes";
+               break;
+       case 2:
+               str = "512 bytes";
+               break;
+       case 3:
+               str = "1 KBytes";
+               break;
+       case 4:
+               str = "2 KBytes";
+               break;
+       case 5:
+               str = "4 KBytes";
+               break;
+       case 6:
+               str = "8 KBytes";
+               break;
+       case 7:
+               str = "16 KBytes";
+               break;
+       case 8:
+               str = "32 kBytes";
+               break;
+       case 9:
+               str = "64 KBytes";
+               break;
+       case 10:
+               str = "128 KBytes";
+               break;
+       case 11:
+               str = "256 KBytes";
+               break;
+       default:
+               str = "RESERVED";
+       }
+       XLGMAC_PR("MTL Receive FIFO Size                       : %s\n", str);
+
+       switch (pdata->hw_feat.tx_fifo_size) {
+       case 0:
+               str = "128 bytes";
+               break;
+       case 1:
+               str = "256 bytes";
+               break;
+       case 2:
+               str = "512 bytes";
+               break;
+       case 3:
+               str = "1 KBytes";
+               break;
+       case 4:
+               str = "2 KBytes";
+               break;
+       case 5:
+               str = "4 KBytes";
+               break;
+       case 6:
+               str = "8 KBytes";
+               break;
+       case 7:
+               str = "16 KBytes";
+               break;
+       case 8:
+               str = "32 kBytes";
+               break;
+       case 9:
+               str = "64 KBytes";
+               break;
+       case 10:
+               str = "128 KBytes";
+               break;
+       case 11:
+               str = "256 KBytes";
+               break;
+       default:
+               str = "RESERVED";
+       }
+       XLGMAC_PR("MTL Transmit FIFO Size                      : %s\n", str);
+
+       XLGMAC_PR("IEEE 1588 High Word Register Enable         : %s\n",
+                 pdata->hw_feat.adv_ts_hi ? "YES" : "NO");
+       XLGMAC_PR("Address width                               : %u\n",
+                 pdata->hw_feat.dma_width);
+       XLGMAC_PR("DCB Feature Enable                          : %s\n",
+                 pdata->hw_feat.dcb ? "YES" : "NO");
+       XLGMAC_PR("Split Header Feature Enable                 : %s\n",
+                 pdata->hw_feat.sph ? "YES" : "NO");
+       XLGMAC_PR("TCP Segmentation Offload Enable             : %s\n",
+                 pdata->hw_feat.tso ? "YES" : "NO");
+       XLGMAC_PR("DMA Debug Registers Enabled                 : %s\n",
+                 pdata->hw_feat.dma_debug ? "YES" : "NO");
+       XLGMAC_PR("RSS Feature Enabled                         : %s\n",
+                 pdata->hw_feat.rss ? "YES" : "NO");
+       XLGMAC_PR("Number of Traffic classes                   : %u\n",
+                 (pdata->hw_feat.tc_cnt));
+       XLGMAC_PR("Hash Table Size                             : %u\n",
+                 pdata->hw_feat.hash_table_size);
+       XLGMAC_PR("Total number of L3 or L4 Filters            : %u\n",
+                 pdata->hw_feat.l3l4_filter_num);
+
+       /* HW Feature Register2 */
+       XLGMAC_PR("Number of MTL Receive Queues                : %u\n",
+                 pdata->hw_feat.rx_q_cnt);
+       XLGMAC_PR("Number of MTL Transmit Queues               : %u\n",
+                 pdata->hw_feat.tx_q_cnt);
+       XLGMAC_PR("Number of DMA Receive Channels              : %u\n",
+                 pdata->hw_feat.rx_ch_cnt);
+       XLGMAC_PR("Number of DMA Transmit Channels             : %u\n",
+                 pdata->hw_feat.tx_ch_cnt);
+
+       switch (pdata->hw_feat.pps_out_num) {
+       case 0:
+               str = "No PPS output";
+               break;
+       case 1:
+               str = "1 PPS output";
+               break;
+       case 2:
+               str = "2 PPS output";
+               break;
+       case 3:
+               str = "3 PPS output";
+               break;
+       case 4:
+               str = "4 PPS output";
+               break;
+       default:
+               str = "RESERVED";
+       }
+       XLGMAC_PR("Number of PPS Outputs                       : %s\n", str);
+
+       switch (pdata->hw_feat.aux_snap_num) {
+       case 0:
+               str = "No auxiliary input";
+               break;
+       case 1:
+               str = "1 auxiliary input";
+               break;
+       case 2:
+               str = "2 auxiliary input";
+               break;
+       case 3:
+               str = "3 auxiliary input";
+               break;
+       case 4:
+               str = "4 auxiliary input";
+               break;
+       default:
+               str = "RESERVED";
+       }
+       XLGMAC_PR("Number of Auxiliary Snapshot Inputs         : %s", str);
+
+       XLGMAC_PR("\n");
+       XLGMAC_PR("=====================================================\n");
+       XLGMAC_PR("\n");
+}
diff --git a/drivers/net/ethernet/synopsys/dwc-xlgmac-desc.c b/drivers/net/ethernet/synopsys/dwc-xlgmac-desc.c
new file mode 100644 (file)
index 0000000..e9672b1
--- /dev/null
@@ -0,0 +1,644 @@
+/* Synopsys DesignWare Core Enterprise Ethernet (XLGMAC) Driver
+ *
+ * Copyright (c) 2017 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is dual-licensed; you may select either version 2 of
+ * the GNU General Public License ("GPL") or BSD license ("BSD").
+ *
+ * This Synopsys DWC XLGMAC software driver and associated documentation
+ * (hereinafter the "Software") is an unsupported proprietary work of
+ * Synopsys, Inc. unless otherwise expressly agreed to in writing between
+ * Synopsys and you. The Software IS NOT an item of Licensed Software or a
+ * Licensed Product under any End User Software License Agreement or
+ * Agreement for Licensed Products with Synopsys or any supplement thereto.
+ * Synopsys is a registered trademark of Synopsys, Inc. Other names included
+ * in the SOFTWARE may be the trademarks of their respective owners.
+ */
+
+#include "dwc-xlgmac.h"
+#include "dwc-xlgmac-reg.h"
+
+static void xlgmac_unmap_desc_data(struct xlgmac_pdata *pdata,
+                                  struct xlgmac_desc_data *desc_data)
+{
+       if (desc_data->skb_dma) {
+               if (desc_data->mapped_as_page) {
+                       dma_unmap_page(pdata->dev, desc_data->skb_dma,
+                                      desc_data->skb_dma_len, DMA_TO_DEVICE);
+               } else {
+                       dma_unmap_single(pdata->dev, desc_data->skb_dma,
+                                        desc_data->skb_dma_len, DMA_TO_DEVICE);
+               }
+               desc_data->skb_dma = 0;
+               desc_data->skb_dma_len = 0;
+       }
+
+       if (desc_data->skb) {
+               dev_kfree_skb_any(desc_data->skb);
+               desc_data->skb = NULL;
+       }
+
+       if (desc_data->rx.hdr.pa.pages)
+               put_page(desc_data->rx.hdr.pa.pages);
+
+       if (desc_data->rx.hdr.pa_unmap.pages) {
+               dma_unmap_page(pdata->dev, desc_data->rx.hdr.pa_unmap.pages_dma,
+                              desc_data->rx.hdr.pa_unmap.pages_len,
+                              DMA_FROM_DEVICE);
+               put_page(desc_data->rx.hdr.pa_unmap.pages);
+       }
+
+       if (desc_data->rx.buf.pa.pages)
+               put_page(desc_data->rx.buf.pa.pages);
+
+       if (desc_data->rx.buf.pa_unmap.pages) {
+               dma_unmap_page(pdata->dev, desc_data->rx.buf.pa_unmap.pages_dma,
+                              desc_data->rx.buf.pa_unmap.pages_len,
+                              DMA_FROM_DEVICE);
+               put_page(desc_data->rx.buf.pa_unmap.pages);
+       }
+
+       memset(&desc_data->tx, 0, sizeof(desc_data->tx));
+       memset(&desc_data->rx, 0, sizeof(desc_data->rx));
+
+       desc_data->mapped_as_page = 0;
+
+       if (desc_data->state_saved) {
+               desc_data->state_saved = 0;
+               desc_data->state.skb = NULL;
+               desc_data->state.len = 0;
+               desc_data->state.error = 0;
+       }
+}
+
+static void xlgmac_free_ring(struct xlgmac_pdata *pdata,
+                            struct xlgmac_ring *ring)
+{
+       struct xlgmac_desc_data *desc_data;
+       unsigned int i;
+
+       if (!ring)
+               return;
+
+       if (ring->desc_data_head) {
+               for (i = 0; i < ring->dma_desc_count; i++) {
+                       desc_data = XLGMAC_GET_DESC_DATA(ring, i);
+                       xlgmac_unmap_desc_data(pdata, desc_data);
+               }
+
+               kfree(ring->desc_data_head);
+               ring->desc_data_head = NULL;
+       }
+
+       if (ring->rx_hdr_pa.pages) {
+               dma_unmap_page(pdata->dev, ring->rx_hdr_pa.pages_dma,
+                              ring->rx_hdr_pa.pages_len, DMA_FROM_DEVICE);
+               put_page(ring->rx_hdr_pa.pages);
+
+               ring->rx_hdr_pa.pages = NULL;
+               ring->rx_hdr_pa.pages_len = 0;
+               ring->rx_hdr_pa.pages_offset = 0;
+               ring->rx_hdr_pa.pages_dma = 0;
+       }
+
+       if (ring->rx_buf_pa.pages) {
+               dma_unmap_page(pdata->dev, ring->rx_buf_pa.pages_dma,
+                              ring->rx_buf_pa.pages_len, DMA_FROM_DEVICE);
+               put_page(ring->rx_buf_pa.pages);
+
+               ring->rx_buf_pa.pages = NULL;
+               ring->rx_buf_pa.pages_len = 0;
+               ring->rx_buf_pa.pages_offset = 0;
+               ring->rx_buf_pa.pages_dma = 0;
+       }
+
+       if (ring->dma_desc_head) {
+               dma_free_coherent(pdata->dev,
+                                 (sizeof(struct xlgmac_dma_desc) *
+                                 ring->dma_desc_count),
+                                 ring->dma_desc_head,
+                                 ring->dma_desc_head_addr);
+               ring->dma_desc_head = NULL;
+       }
+}
+
+static int xlgmac_init_ring(struct xlgmac_pdata *pdata,
+                           struct xlgmac_ring *ring,
+                           unsigned int dma_desc_count)
+{
+       if (!ring)
+               return 0;
+
+       /* Descriptors */
+       ring->dma_desc_count = dma_desc_count;
+       ring->dma_desc_head = dma_alloc_coherent(pdata->dev,
+                                       (sizeof(struct xlgmac_dma_desc) *
+                                        dma_desc_count),
+                                       &ring->dma_desc_head_addr,
+                                       GFP_KERNEL);
+       if (!ring->dma_desc_head)
+               return -ENOMEM;
+
+       /* Array of descriptor data */
+       ring->desc_data_head = kcalloc(dma_desc_count,
+                                       sizeof(struct xlgmac_desc_data),
+                                       GFP_KERNEL);
+       if (!ring->desc_data_head)
+               return -ENOMEM;
+
+       netif_dbg(pdata, drv, pdata->netdev,
+                 "dma_desc_head=%p, dma_desc_head_addr=%pad, desc_data_head=%p\n",
+               ring->dma_desc_head,
+               &ring->dma_desc_head_addr,
+               ring->desc_data_head);
+
+       return 0;
+}
+
+static void xlgmac_free_rings(struct xlgmac_pdata *pdata)
+{
+       struct xlgmac_channel *channel;
+       unsigned int i;
+
+       if (!pdata->channel_head)
+               return;
+
+       channel = pdata->channel_head;
+       for (i = 0; i < pdata->channel_count; i++, channel++) {
+               xlgmac_free_ring(pdata, channel->tx_ring);
+               xlgmac_free_ring(pdata, channel->rx_ring);
+       }
+}
+
+static int xlgmac_alloc_rings(struct xlgmac_pdata *pdata)
+{
+       struct xlgmac_channel *channel;
+       unsigned int i;
+       int ret;
+
+       channel = pdata->channel_head;
+       for (i = 0; i < pdata->channel_count; i++, channel++) {
+               netif_dbg(pdata, drv, pdata->netdev, "%s - Tx ring:\n",
+                         channel->name);
+
+               ret = xlgmac_init_ring(pdata, channel->tx_ring,
+                                      pdata->tx_desc_count);
+
+               if (ret) {
+                       netdev_alert(pdata->netdev,
+                                    "error initializing Tx ring");
+                       goto err_init_ring;
+               }
+
+               netif_dbg(pdata, drv, pdata->netdev, "%s - Rx ring:\n",
+                         channel->name);
+
+               ret = xlgmac_init_ring(pdata, channel->rx_ring,
+                                      pdata->rx_desc_count);
+               if (ret) {
+                       netdev_alert(pdata->netdev,
+                                    "error initializing Rx ring\n");
+                       goto err_init_ring;
+               }
+       }
+
+       return 0;
+
+err_init_ring:
+       xlgmac_free_rings(pdata);
+
+       return ret;
+}
+
+static void xlgmac_free_channels(struct xlgmac_pdata *pdata)
+{
+       if (!pdata->channel_head)
+               return;
+
+       kfree(pdata->channel_head->tx_ring);
+       pdata->channel_head->tx_ring = NULL;
+
+       kfree(pdata->channel_head->rx_ring);
+       pdata->channel_head->rx_ring = NULL;
+
+       kfree(pdata->channel_head);
+
+       pdata->channel_head = NULL;
+       pdata->channel_count = 0;
+}
+
+static int xlgmac_alloc_channels(struct xlgmac_pdata *pdata)
+{
+       struct xlgmac_channel *channel_head, *channel;
+       struct xlgmac_ring *tx_ring, *rx_ring;
+       int ret = -ENOMEM;
+       unsigned int i;
+
+       channel_head = kcalloc(pdata->channel_count,
+                              sizeof(struct xlgmac_channel), GFP_KERNEL);
+       if (!channel_head)
+               return ret;
+
+       netif_dbg(pdata, drv, pdata->netdev,
+                 "channel_head=%p\n", channel_head);
+
+       tx_ring = kcalloc(pdata->tx_ring_count, sizeof(struct xlgmac_ring),
+                         GFP_KERNEL);
+       if (!tx_ring)
+               goto err_tx_ring;
+
+       rx_ring = kcalloc(pdata->rx_ring_count, sizeof(struct xlgmac_ring),
+                         GFP_KERNEL);
+       if (!rx_ring)
+               goto err_rx_ring;
+
+       for (i = 0, channel = channel_head; i < pdata->channel_count;
+               i++, channel++) {
+               snprintf(channel->name, sizeof(channel->name), "channel-%u", i);
+               channel->pdata = pdata;
+               channel->queue_index = i;
+               channel->dma_regs = pdata->mac_regs + DMA_CH_BASE +
+                                   (DMA_CH_INC * i);
+
+               if (pdata->per_channel_irq) {
+                       /* Get the per DMA interrupt */
+                       ret = pdata->channel_irq[i];
+                       if (ret < 0) {
+                               netdev_err(pdata->netdev,
+                                          "get_irq %u failed\n",
+                                          i + 1);
+                               goto err_irq;
+                       }
+                       channel->dma_irq = ret;
+               }
+
+               if (i < pdata->tx_ring_count)
+                       channel->tx_ring = tx_ring++;
+
+               if (i < pdata->rx_ring_count)
+                       channel->rx_ring = rx_ring++;
+
+               netif_dbg(pdata, drv, pdata->netdev,
+                         "%s: dma_regs=%p, tx_ring=%p, rx_ring=%p\n",
+                         channel->name, channel->dma_regs,
+                         channel->tx_ring, channel->rx_ring);
+       }
+
+       pdata->channel_head = channel_head;
+
+       return 0;
+
+err_irq:
+       kfree(rx_ring);
+
+err_rx_ring:
+       kfree(tx_ring);
+
+err_tx_ring:
+       kfree(channel_head);
+
+       return ret;
+}
+
+static void xlgmac_free_channels_and_rings(struct xlgmac_pdata *pdata)
+{
+       xlgmac_free_rings(pdata);
+
+       xlgmac_free_channels(pdata);
+}
+
+static int xlgmac_alloc_channels_and_rings(struct xlgmac_pdata *pdata)
+{
+       int ret;
+
+       ret = xlgmac_alloc_channels(pdata);
+       if (ret)
+               goto err_alloc;
+
+       ret = xlgmac_alloc_rings(pdata);
+       if (ret)
+               goto err_alloc;
+
+       return 0;
+
+err_alloc:
+       xlgmac_free_channels_and_rings(pdata);
+
+       return ret;
+}
+
+static int xlgmac_alloc_pages(struct xlgmac_pdata *pdata,
+                             struct xlgmac_page_alloc *pa,
+                             gfp_t gfp, int order)
+{
+       struct page *pages = NULL;
+       dma_addr_t pages_dma;
+
+       /* Try to obtain pages, decreasing order if necessary */
+       gfp |= __GFP_COLD | __GFP_COMP | __GFP_NOWARN;
+       while (order >= 0) {
+               pages = alloc_pages(gfp, order);
+               if (pages)
+                       break;
+
+               order--;
+       }
+       if (!pages)
+               return -ENOMEM;
+
+       /* Map the pages */
+       pages_dma = dma_map_page(pdata->dev, pages, 0,
+                                PAGE_SIZE << order, DMA_FROM_DEVICE);
+       if (dma_mapping_error(pdata->dev, pages_dma)) {
+               put_page(pages);
+               return -ENOMEM;
+       }
+
+       pa->pages = pages;
+       pa->pages_len = PAGE_SIZE << order;
+       pa->pages_offset = 0;
+       pa->pages_dma = pages_dma;
+
+       return 0;
+}
+
+static void xlgmac_set_buffer_data(struct xlgmac_buffer_data *bd,
+                                  struct xlgmac_page_alloc *pa,
+                                  unsigned int len)
+{
+       get_page(pa->pages);
+       bd->pa = *pa;
+
+       bd->dma_base = pa->pages_dma;
+       bd->dma_off = pa->pages_offset;
+       bd->dma_len = len;
+
+       pa->pages_offset += len;
+       if ((pa->pages_offset + len) > pa->pages_len) {
+               /* This data descriptor is responsible for unmapping page(s) */
+               bd->pa_unmap = *pa;
+
+               /* Get a new allocation next time */
+               pa->pages = NULL;
+               pa->pages_len = 0;
+               pa->pages_offset = 0;
+               pa->pages_dma = 0;
+       }
+}
+
+static int xlgmac_map_rx_buffer(struct xlgmac_pdata *pdata,
+                               struct xlgmac_ring *ring,
+                               struct xlgmac_desc_data *desc_data)
+{
+       int order, ret;
+
+       if (!ring->rx_hdr_pa.pages) {
+               ret = xlgmac_alloc_pages(pdata, &ring->rx_hdr_pa,
+                                        GFP_ATOMIC, 0);
+               if (ret)
+                       return ret;
+       }
+
+       if (!ring->rx_buf_pa.pages) {
+               order = max_t(int, PAGE_ALLOC_COSTLY_ORDER - 1, 0);
+               ret = xlgmac_alloc_pages(pdata, &ring->rx_buf_pa,
+                                        GFP_ATOMIC, order);
+               if (ret)
+                       return ret;
+       }
+
+       /* Set up the header page info */
+       xlgmac_set_buffer_data(&desc_data->rx.hdr, &ring->rx_hdr_pa,
+                              XLGMAC_SKB_ALLOC_SIZE);
+
+       /* Set up the buffer page info */
+       xlgmac_set_buffer_data(&desc_data->rx.buf, &ring->rx_buf_pa,
+                              pdata->rx_buf_size);
+
+       return 0;
+}
+
+static void xlgmac_tx_desc_init(struct xlgmac_pdata *pdata)
+{
+       struct xlgmac_hw_ops *hw_ops = &pdata->hw_ops;
+       struct xlgmac_desc_data *desc_data;
+       struct xlgmac_dma_desc *dma_desc;
+       struct xlgmac_channel *channel;
+       struct xlgmac_ring *ring;
+       dma_addr_t dma_desc_addr;
+       unsigned int i, j;
+
+       channel = pdata->channel_head;
+       for (i = 0; i < pdata->channel_count; i++, channel++) {
+               ring = channel->tx_ring;
+               if (!ring)
+                       break;
+
+               dma_desc = ring->dma_desc_head;
+               dma_desc_addr = ring->dma_desc_head_addr;
+
+               for (j = 0; j < ring->dma_desc_count; j++) {
+                       desc_data = XLGMAC_GET_DESC_DATA(ring, j);
+
+                       desc_data->dma_desc = dma_desc;
+                       desc_data->dma_desc_addr = dma_desc_addr;
+
+                       dma_desc++;
+                       dma_desc_addr += sizeof(struct xlgmac_dma_desc);
+               }
+
+               ring->cur = 0;
+               ring->dirty = 0;
+               memset(&ring->tx, 0, sizeof(ring->tx));
+
+               hw_ops->tx_desc_init(channel);
+       }
+}
+
+static void xlgmac_rx_desc_init(struct xlgmac_pdata *pdata)
+{
+       struct xlgmac_hw_ops *hw_ops = &pdata->hw_ops;
+       struct xlgmac_desc_data *desc_data;
+       struct xlgmac_dma_desc *dma_desc;
+       struct xlgmac_channel *channel;
+       struct xlgmac_ring *ring;
+       dma_addr_t dma_desc_addr;
+       unsigned int i, j;
+
+       channel = pdata->channel_head;
+       for (i = 0; i < pdata->channel_count; i++, channel++) {
+               ring = channel->rx_ring;
+               if (!ring)
+                       break;
+
+               dma_desc = ring->dma_desc_head;
+               dma_desc_addr = ring->dma_desc_head_addr;
+
+               for (j = 0; j < ring->dma_desc_count; j++) {
+                       desc_data = XLGMAC_GET_DESC_DATA(ring, j);
+
+                       desc_data->dma_desc = dma_desc;
+                       desc_data->dma_desc_addr = dma_desc_addr;
+
+                       if (xlgmac_map_rx_buffer(pdata, ring, desc_data))
+                               break;
+
+                       dma_desc++;
+                       dma_desc_addr += sizeof(struct xlgmac_dma_desc);
+               }
+
+               ring->cur = 0;
+               ring->dirty = 0;
+
+               hw_ops->rx_desc_init(channel);
+       }
+}
+
+static int xlgmac_map_tx_skb(struct xlgmac_channel *channel,
+                            struct sk_buff *skb)
+{
+       struct xlgmac_pdata *pdata = channel->pdata;
+       struct xlgmac_ring *ring = channel->tx_ring;
+       unsigned int start_index, cur_index;
+       struct xlgmac_desc_data *desc_data;
+       unsigned int offset, datalen, len;
+       struct xlgmac_pkt_info *pkt_info;
+       struct skb_frag_struct *frag;
+       unsigned int tso, vlan;
+       dma_addr_t skb_dma;
+       unsigned int i;
+
+       offset = 0;
+       start_index = ring->cur;
+       cur_index = ring->cur;
+
+       pkt_info = &ring->pkt_info;
+       pkt_info->desc_count = 0;
+       pkt_info->length = 0;
+
+       tso = XLGMAC_GET_REG_BITS(pkt_info->attributes,
+                                 TX_PACKET_ATTRIBUTES_TSO_ENABLE_POS,
+                                 TX_PACKET_ATTRIBUTES_TSO_ENABLE_LEN);
+       vlan = XLGMAC_GET_REG_BITS(pkt_info->attributes,
+                                  TX_PACKET_ATTRIBUTES_VLAN_CTAG_POS,
+                                  TX_PACKET_ATTRIBUTES_VLAN_CTAG_LEN);
+
+       /* Save space for a context descriptor if needed */
+       if ((tso && (pkt_info->mss != ring->tx.cur_mss)) ||
+           (vlan && (pkt_info->vlan_ctag != ring->tx.cur_vlan_ctag)))
+               cur_index++;
+       desc_data = XLGMAC_GET_DESC_DATA(ring, cur_index);
+
+       if (tso) {
+               /* Map the TSO header */
+               skb_dma = dma_map_single(pdata->dev, skb->data,
+                                        pkt_info->header_len, DMA_TO_DEVICE);
+               if (dma_mapping_error(pdata->dev, skb_dma)) {
+                       netdev_alert(pdata->netdev, "dma_map_single failed\n");
+                       goto err_out;
+               }
+               desc_data->skb_dma = skb_dma;
+               desc_data->skb_dma_len = pkt_info->header_len;
+               netif_dbg(pdata, tx_queued, pdata->netdev,
+                         "skb header: index=%u, dma=%pad, len=%u\n",
+                         cur_index, &skb_dma, pkt_info->header_len);
+
+               offset = pkt_info->header_len;
+
+               pkt_info->length += pkt_info->header_len;
+
+               cur_index++;
+               desc_data = XLGMAC_GET_DESC_DATA(ring, cur_index);
+       }
+
+       /* Map the (remainder of the) packet */
+       for (datalen = skb_headlen(skb) - offset; datalen; ) {
+               len = min_t(unsigned int, datalen, XLGMAC_TX_MAX_BUF_SIZE);
+
+               skb_dma = dma_map_single(pdata->dev, skb->data + offset, len,
+                                        DMA_TO_DEVICE);
+               if (dma_mapping_error(pdata->dev, skb_dma)) {
+                       netdev_alert(pdata->netdev, "dma_map_single failed\n");
+                       goto err_out;
+               }
+               desc_data->skb_dma = skb_dma;
+               desc_data->skb_dma_len = len;
+               netif_dbg(pdata, tx_queued, pdata->netdev,
+                         "skb data: index=%u, dma=%pad, len=%u\n",
+                         cur_index, &skb_dma, len);
+
+               datalen -= len;
+               offset += len;
+
+               pkt_info->length += len;
+
+               cur_index++;
+               desc_data = XLGMAC_GET_DESC_DATA(ring, cur_index);
+       }
+
+       for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+               netif_dbg(pdata, tx_queued, pdata->netdev,
+                         "mapping frag %u\n", i);
+
+               frag = &skb_shinfo(skb)->frags[i];
+               offset = 0;
+
+               for (datalen = skb_frag_size(frag); datalen; ) {
+                       len = min_t(unsigned int, datalen,
+                                   XLGMAC_TX_MAX_BUF_SIZE);
+
+                       skb_dma = skb_frag_dma_map(pdata->dev, frag, offset,
+                                                  len, DMA_TO_DEVICE);
+                       if (dma_mapping_error(pdata->dev, skb_dma)) {
+                               netdev_alert(pdata->netdev,
+                                            "skb_frag_dma_map failed\n");
+                               goto err_out;
+                       }
+                       desc_data->skb_dma = skb_dma;
+                       desc_data->skb_dma_len = len;
+                       desc_data->mapped_as_page = 1;
+                       netif_dbg(pdata, tx_queued, pdata->netdev,
+                                 "skb frag: index=%u, dma=%pad, len=%u\n",
+                                 cur_index, &skb_dma, len);
+
+                       datalen -= len;
+                       offset += len;
+
+                       pkt_info->length += len;
+
+                       cur_index++;
+                       desc_data = XLGMAC_GET_DESC_DATA(ring, cur_index);
+               }
+       }
+
+       /* Save the skb address in the last entry. We always have some data
+        * that has been mapped so desc_data is always advanced past the last
+        * piece of mapped data - use the entry pointed to by cur_index - 1.
+        */
+       desc_data = XLGMAC_GET_DESC_DATA(ring, cur_index - 1);
+       desc_data->skb = skb;
+
+       /* Save the number of descriptor entries used */
+       pkt_info->desc_count = cur_index - start_index;
+
+       return pkt_info->desc_count;
+
+err_out:
+       while (start_index < cur_index) {
+               desc_data = XLGMAC_GET_DESC_DATA(ring, start_index++);
+               xlgmac_unmap_desc_data(pdata, desc_data);
+       }
+
+       return 0;
+}
+
+void xlgmac_init_desc_ops(struct xlgmac_desc_ops *desc_ops)
+{
+       desc_ops->alloc_channles_and_rings = xlgmac_alloc_channels_and_rings;
+       desc_ops->free_channels_and_rings = xlgmac_free_channels_and_rings;
+       desc_ops->map_tx_skb = xlgmac_map_tx_skb;
+       desc_ops->map_rx_buffer = xlgmac_map_rx_buffer;
+       desc_ops->unmap_desc_data = xlgmac_unmap_desc_data;
+       desc_ops->tx_desc_init = xlgmac_tx_desc_init;
+       desc_ops->rx_desc_init = xlgmac_rx_desc_init;
+}
diff --git a/drivers/net/ethernet/synopsys/dwc-xlgmac-hw.c b/drivers/net/ethernet/synopsys/dwc-xlgmac-hw.c
new file mode 100644 (file)
index 0000000..0dec1dc
--- /dev/null
@@ -0,0 +1,3145 @@
+/* Synopsys DesignWare Core Enterprise Ethernet (XLGMAC) Driver
+ *
+ * Copyright (c) 2017 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is dual-licensed; you may select either version 2 of
+ * the GNU General Public License ("GPL") or BSD license ("BSD").
+ *
+ * This Synopsys DWC XLGMAC software driver and associated documentation
+ * (hereinafter the "Software") is an unsupported proprietary work of
+ * Synopsys, Inc. unless otherwise expressly agreed to in writing between
+ * Synopsys and you. The Software IS NOT an item of Licensed Software or a
+ * Licensed Product under any End User Software License Agreement or
+ * Agreement for Licensed Products with Synopsys or any supplement thereto.
+ * Synopsys is a registered trademark of Synopsys, Inc. Other names included
+ * in the SOFTWARE may be the trademarks of their respective owners.
+ */
+
+#include <linux/phy.h>
+#include <linux/mdio.h>
+#include <linux/clk.h>
+#include <linux/bitrev.h>
+#include <linux/crc32.h>
+#include <linux/dcbnl.h>
+
+#include "dwc-xlgmac.h"
+#include "dwc-xlgmac-reg.h"
+
+static int xlgmac_tx_complete(struct xlgmac_dma_desc *dma_desc)
+{
+       return !XLGMAC_GET_REG_BITS_LE(dma_desc->desc3,
+                               TX_NORMAL_DESC3_OWN_POS,
+                               TX_NORMAL_DESC3_OWN_LEN);
+}
+
+static int xlgmac_disable_rx_csum(struct xlgmac_pdata *pdata)
+{
+       u32 regval;
+
+       regval = readl(pdata->mac_regs + MAC_RCR);
+       regval = XLGMAC_SET_REG_BITS(regval, MAC_RCR_IPC_POS,
+                                    MAC_RCR_IPC_LEN, 0);
+       writel(regval, pdata->mac_regs + MAC_RCR);
+
+       return 0;
+}
+
+static int xlgmac_enable_rx_csum(struct xlgmac_pdata *pdata)
+{
+       u32 regval;
+
+       regval = readl(pdata->mac_regs + MAC_RCR);
+       regval = XLGMAC_SET_REG_BITS(regval, MAC_RCR_IPC_POS,
+                                    MAC_RCR_IPC_LEN, 1);
+       writel(regval, pdata->mac_regs + MAC_RCR);
+
+       return 0;
+}
+
+static int xlgmac_set_mac_address(struct xlgmac_pdata *pdata, u8 *addr)
+{
+       unsigned int mac_addr_hi, mac_addr_lo;
+
+       mac_addr_hi = (addr[5] <<  8) | (addr[4] <<  0);
+       mac_addr_lo = (addr[3] << 24) | (addr[2] << 16) |
+                     (addr[1] <<  8) | (addr[0] <<  0);
+
+       writel(mac_addr_hi, pdata->mac_regs + MAC_MACA0HR);
+       writel(mac_addr_lo, pdata->mac_regs + MAC_MACA0LR);
+
+       return 0;
+}
+
+static void xlgmac_set_mac_reg(struct xlgmac_pdata *pdata,
+                              struct netdev_hw_addr *ha,
+                              unsigned int *mac_reg)
+{
+       unsigned int mac_addr_hi, mac_addr_lo;
+       u8 *mac_addr;
+
+       mac_addr_lo = 0;
+       mac_addr_hi = 0;
+
+       if (ha) {
+               mac_addr = (u8 *)&mac_addr_lo;
+               mac_addr[0] = ha->addr[0];
+               mac_addr[1] = ha->addr[1];
+               mac_addr[2] = ha->addr[2];
+               mac_addr[3] = ha->addr[3];
+               mac_addr = (u8 *)&mac_addr_hi;
+               mac_addr[0] = ha->addr[4];
+               mac_addr[1] = ha->addr[5];
+
+               netif_dbg(pdata, drv, pdata->netdev,
+                         "adding mac address %pM at %#x\n",
+                         ha->addr, *mac_reg);
+
+               mac_addr_hi = XLGMAC_SET_REG_BITS(mac_addr_hi,
+                                                 MAC_MACA1HR_AE_POS,
+                                               MAC_MACA1HR_AE_LEN,
+                                               1);
+       }
+
+       writel(mac_addr_hi, pdata->mac_regs + *mac_reg);
+       *mac_reg += MAC_MACA_INC;
+       writel(mac_addr_lo, pdata->mac_regs + *mac_reg);
+       *mac_reg += MAC_MACA_INC;
+}
+
+static int xlgmac_enable_rx_vlan_stripping(struct xlgmac_pdata *pdata)
+{
+       u32 regval;
+
+       regval = readl(pdata->mac_regs + MAC_VLANTR);
+       /* Put the VLAN tag in the Rx descriptor */
+       regval = XLGMAC_SET_REG_BITS(regval, MAC_VLANTR_EVLRXS_POS,
+                                    MAC_VLANTR_EVLRXS_LEN, 1);
+       /* Don't check the VLAN type */
+       regval = XLGMAC_SET_REG_BITS(regval, MAC_VLANTR_DOVLTC_POS,
+                                    MAC_VLANTR_DOVLTC_LEN, 1);
+       /* Check only C-TAG (0x8100) packets */
+       regval = XLGMAC_SET_REG_BITS(regval, MAC_VLANTR_ERSVLM_POS,
+                                    MAC_VLANTR_ERSVLM_LEN, 0);
+       /* Don't consider an S-TAG (0x88A8) packet as a VLAN packet */
+       regval = XLGMAC_SET_REG_BITS(regval, MAC_VLANTR_ESVL_POS,
+                                    MAC_VLANTR_ESVL_LEN, 0);
+       /* Enable VLAN tag stripping */
+       regval = XLGMAC_SET_REG_BITS(regval, MAC_VLANTR_EVLS_POS,
+                                    MAC_VLANTR_EVLS_LEN, 0x3);
+       writel(regval, pdata->mac_regs + MAC_VLANTR);
+
+       return 0;
+}
+
+static int xlgmac_disable_rx_vlan_stripping(struct xlgmac_pdata *pdata)
+{
+       u32 regval;
+
+       regval = readl(pdata->mac_regs + MAC_VLANTR);
+       regval = XLGMAC_SET_REG_BITS(regval, MAC_VLANTR_EVLS_POS,
+                                    MAC_VLANTR_EVLS_LEN, 0);
+       writel(regval, pdata->mac_regs + MAC_VLANTR);
+
+       return 0;
+}
+
+static int xlgmac_enable_rx_vlan_filtering(struct xlgmac_pdata *pdata)
+{
+       u32 regval;
+
+       regval = readl(pdata->mac_regs + MAC_PFR);
+       /* Enable VLAN filtering */
+       regval = XLGMAC_SET_REG_BITS(regval, MAC_PFR_VTFE_POS,
+                                    MAC_PFR_VTFE_LEN, 1);
+       writel(regval, pdata->mac_regs + MAC_PFR);
+
+       regval = readl(pdata->mac_regs + MAC_VLANTR);
+       /* Enable VLAN Hash Table filtering */
+       regval = XLGMAC_SET_REG_BITS(regval, MAC_VLANTR_VTHM_POS,
+                                    MAC_VLANTR_VTHM_LEN, 1);
+       /* Disable VLAN tag inverse matching */
+       regval = XLGMAC_SET_REG_BITS(regval, MAC_VLANTR_VTIM_POS,
+                                    MAC_VLANTR_VTIM_LEN, 0);
+       /* Only filter on the lower 12-bits of the VLAN tag */
+       regval = XLGMAC_SET_REG_BITS(regval, MAC_VLANTR_ETV_POS,
+                                    MAC_VLANTR_ETV_LEN, 1);
+       /* In order for the VLAN Hash Table filtering to be effective,
+        * the VLAN tag identifier in the VLAN Tag Register must not
+        * be zero.  Set the VLAN tag identifier to "1" to enable the
+        * VLAN Hash Table filtering.  This implies that a VLAN tag of
+        * 1 will always pass filtering.
+        */
+       regval = XLGMAC_SET_REG_BITS(regval, MAC_VLANTR_VL_POS,
+                                    MAC_VLANTR_VL_LEN, 1);
+       writel(regval, pdata->mac_regs + MAC_VLANTR);
+
+       return 0;
+}
+
+static int xlgmac_disable_rx_vlan_filtering(struct xlgmac_pdata *pdata)
+{
+       u32 regval;
+
+       regval = readl(pdata->mac_regs + MAC_PFR);
+       /* Disable VLAN filtering */
+       regval = XLGMAC_SET_REG_BITS(regval, MAC_PFR_VTFE_POS,
+                                    MAC_PFR_VTFE_LEN, 0);
+       writel(regval, pdata->mac_regs + MAC_PFR);
+
+       return 0;
+}
+
+static u32 xlgmac_vid_crc32_le(__le16 vid_le)
+{
+       unsigned char *data = (unsigned char *)&vid_le;
+       unsigned char data_byte = 0;
+       u32 poly = 0xedb88320;
+       u32 crc = ~0;
+       u32 temp = 0;
+       int i, bits;
+
+       bits = get_bitmask_order(VLAN_VID_MASK);
+       for (i = 0; i < bits; i++) {
+               if ((i % 8) == 0)
+                       data_byte = data[i / 8];
+
+               temp = ((crc & 1) ^ data_byte) & 1;
+               crc >>= 1;
+               data_byte >>= 1;
+
+               if (temp)
+                       crc ^= poly;
+       }
+
+       return crc;
+}
+
+static int xlgmac_update_vlan_hash_table(struct xlgmac_pdata *pdata)
+{
+       u16 vlan_hash_table = 0;
+       __le16 vid_le;
+       u32 regval;
+       u32 crc;
+       u16 vid;
+
+       /* Generate the VLAN Hash Table value */
+       for_each_set_bit(vid, pdata->active_vlans, VLAN_N_VID) {
+               /* Get the CRC32 value of the VLAN ID */
+               vid_le = cpu_to_le16(vid);
+               crc = bitrev32(~xlgmac_vid_crc32_le(vid_le)) >> 28;
+
+               vlan_hash_table |= (1 << crc);
+       }
+
+       regval = readl(pdata->mac_regs + MAC_VLANHTR);
+       /* Set the VLAN Hash Table filtering register */
+       regval = XLGMAC_SET_REG_BITS(regval, MAC_VLANHTR_VLHT_POS,
+                                    MAC_VLANHTR_VLHT_LEN, vlan_hash_table);
+       writel(regval, pdata->mac_regs + MAC_VLANHTR);
+
+       return 0;
+}
+
+static int xlgmac_set_promiscuous_mode(struct xlgmac_pdata *pdata,
+                                      unsigned int enable)
+{
+       unsigned int val = enable ? 1 : 0;
+       u32 regval;
+
+       regval = XLGMAC_GET_REG_BITS(readl(pdata->mac_regs + MAC_PFR),
+                                    MAC_PFR_PR_POS, MAC_PFR_PR_LEN);
+       if (regval == val)
+               return 0;
+
+       netif_dbg(pdata, drv, pdata->netdev, "%s promiscuous mode\n",
+                 enable ? "entering" : "leaving");
+
+       regval = readl(pdata->mac_regs + MAC_PFR);
+       regval = XLGMAC_SET_REG_BITS(regval, MAC_PFR_PR_POS,
+                                    MAC_PFR_PR_LEN, val);
+       writel(regval, pdata->mac_regs + MAC_PFR);
+
+       /* Hardware will still perform VLAN filtering in promiscuous mode */
+       if (enable) {
+               xlgmac_disable_rx_vlan_filtering(pdata);
+       } else {
+               if (pdata->netdev->features & NETIF_F_HW_VLAN_CTAG_FILTER)
+                       xlgmac_enable_rx_vlan_filtering(pdata);
+       }
+
+       return 0;
+}
+
+static int xlgmac_set_all_multicast_mode(struct xlgmac_pdata *pdata,
+                                        unsigned int enable)
+{
+       unsigned int val = enable ? 1 : 0;
+       u32 regval;
+
+       regval = XLGMAC_GET_REG_BITS(readl(pdata->mac_regs + MAC_PFR),
+                                    MAC_PFR_PM_POS, MAC_PFR_PM_LEN);
+       if (regval == val)
+               return 0;
+
+       netif_dbg(pdata, drv, pdata->netdev, "%s allmulti mode\n",
+                 enable ? "entering" : "leaving");
+
+       regval = readl(pdata->mac_regs + MAC_PFR);
+       regval = XLGMAC_SET_REG_BITS(regval, MAC_PFR_PM_POS,
+                                    MAC_PFR_PM_LEN, val);
+       writel(regval, pdata->mac_regs + MAC_PFR);
+
+       return 0;
+}
+
+static void xlgmac_set_mac_addn_addrs(struct xlgmac_pdata *pdata)
+{
+       struct net_device *netdev = pdata->netdev;
+       struct netdev_hw_addr *ha;
+       unsigned int addn_macs;
+       unsigned int mac_reg;
+
+       mac_reg = MAC_MACA1HR;
+       addn_macs = pdata->hw_feat.addn_mac;
+
+       if (netdev_uc_count(netdev) > addn_macs) {
+               xlgmac_set_promiscuous_mode(pdata, 1);
+       } else {
+               netdev_for_each_uc_addr(ha, netdev) {
+                       xlgmac_set_mac_reg(pdata, ha, &mac_reg);
+                       addn_macs--;
+               }
+
+               if (netdev_mc_count(netdev) > addn_macs) {
+                       xlgmac_set_all_multicast_mode(pdata, 1);
+               } else {
+                       netdev_for_each_mc_addr(ha, netdev) {
+                               xlgmac_set_mac_reg(pdata, ha, &mac_reg);
+                               addn_macs--;
+                       }
+               }
+       }
+
+       /* Clear remaining additional MAC address entries */
+       while (addn_macs--)
+               xlgmac_set_mac_reg(pdata, NULL, &mac_reg);
+}
+
+static void xlgmac_set_mac_hash_table(struct xlgmac_pdata *pdata)
+{
+       unsigned int hash_table_shift, hash_table_count;
+       u32 hash_table[XLGMAC_MAC_HASH_TABLE_SIZE];
+       struct net_device *netdev = pdata->netdev;
+       struct netdev_hw_addr *ha;
+       unsigned int hash_reg;
+       unsigned int i;
+       u32 crc;
+
+       hash_table_shift = 26 - (pdata->hw_feat.hash_table_size >> 7);
+       hash_table_count = pdata->hw_feat.hash_table_size / 32;
+       memset(hash_table, 0, sizeof(hash_table));
+
+       /* Build the MAC Hash Table register values */
+       netdev_for_each_uc_addr(ha, netdev) {
+               crc = bitrev32(~crc32_le(~0, ha->addr, ETH_ALEN));
+               crc >>= hash_table_shift;
+               hash_table[crc >> 5] |= (1 << (crc & 0x1f));
+       }
+
+       netdev_for_each_mc_addr(ha, netdev) {
+               crc = bitrev32(~crc32_le(~0, ha->addr, ETH_ALEN));
+               crc >>= hash_table_shift;
+               hash_table[crc >> 5] |= (1 << (crc & 0x1f));
+       }
+
+       /* Set the MAC Hash Table registers */
+       hash_reg = MAC_HTR0;
+       for (i = 0; i < hash_table_count; i++) {
+               writel(hash_table[i], pdata->mac_regs + hash_reg);
+               hash_reg += MAC_HTR_INC;
+       }
+}
+
+static int xlgmac_add_mac_addresses(struct xlgmac_pdata *pdata)
+{
+       if (pdata->hw_feat.hash_table_size)
+               xlgmac_set_mac_hash_table(pdata);
+       else
+               xlgmac_set_mac_addn_addrs(pdata);
+
+       return 0;
+}
+
+static void xlgmac_config_mac_address(struct xlgmac_pdata *pdata)
+{
+       u32 regval;
+
+       xlgmac_set_mac_address(pdata, pdata->netdev->dev_addr);
+
+       /* Filtering is done using perfect filtering and hash filtering */
+       if (pdata->hw_feat.hash_table_size) {
+               regval = readl(pdata->mac_regs + MAC_PFR);
+               regval = XLGMAC_SET_REG_BITS(regval, MAC_PFR_HPF_POS,
+                                            MAC_PFR_HPF_LEN, 1);
+               regval = XLGMAC_SET_REG_BITS(regval, MAC_PFR_HUC_POS,
+                                            MAC_PFR_HUC_LEN, 1);
+               regval = XLGMAC_SET_REG_BITS(regval, MAC_PFR_HMC_POS,
+                                            MAC_PFR_HMC_LEN, 1);
+               writel(regval, pdata->mac_regs + MAC_PFR);
+       }
+}
+
+static void xlgmac_config_jumbo_enable(struct xlgmac_pdata *pdata)
+{
+       unsigned int val;
+       u32 regval;
+
+       val = (pdata->netdev->mtu > XLGMAC_STD_PACKET_MTU) ? 1 : 0;
+
+       regval = readl(pdata->mac_regs + MAC_RCR);
+       regval = XLGMAC_SET_REG_BITS(regval, MAC_RCR_JE_POS,
+                                    MAC_RCR_JE_LEN, val);
+       writel(regval, pdata->mac_regs + MAC_RCR);
+}
+
+static void xlgmac_config_checksum_offload(struct xlgmac_pdata *pdata)
+{
+       if (pdata->netdev->features & NETIF_F_RXCSUM)
+               xlgmac_enable_rx_csum(pdata);
+       else
+               xlgmac_disable_rx_csum(pdata);
+}
+
+static void xlgmac_config_vlan_support(struct xlgmac_pdata *pdata)
+{
+       u32 regval;
+
+       regval = readl(pdata->mac_regs + MAC_VLANIR);
+       /* Indicate that VLAN Tx CTAGs come from context descriptors */
+       regval = XLGMAC_SET_REG_BITS(regval, MAC_VLANIR_CSVL_POS,
+                                    MAC_VLANIR_CSVL_LEN, 0);
+       regval = XLGMAC_SET_REG_BITS(regval, MAC_VLANIR_VLTI_POS,
+                                    MAC_VLANIR_VLTI_LEN, 1);
+       writel(regval, pdata->mac_regs + MAC_VLANIR);
+
+       /* Set the current VLAN Hash Table register value */
+       xlgmac_update_vlan_hash_table(pdata);
+
+       if (pdata->netdev->features & NETIF_F_HW_VLAN_CTAG_FILTER)
+               xlgmac_enable_rx_vlan_filtering(pdata);
+       else
+               xlgmac_disable_rx_vlan_filtering(pdata);
+
+       if (pdata->netdev->features & NETIF_F_HW_VLAN_CTAG_RX)
+               xlgmac_enable_rx_vlan_stripping(pdata);
+       else
+               xlgmac_disable_rx_vlan_stripping(pdata);
+}
+
+static int xlgmac_config_rx_mode(struct xlgmac_pdata *pdata)
+{
+       struct net_device *netdev = pdata->netdev;
+       unsigned int pr_mode, am_mode;
+
+       pr_mode = ((netdev->flags & IFF_PROMISC) != 0);
+       am_mode = ((netdev->flags & IFF_ALLMULTI) != 0);
+
+       xlgmac_set_promiscuous_mode(pdata, pr_mode);
+       xlgmac_set_all_multicast_mode(pdata, am_mode);
+
+       xlgmac_add_mac_addresses(pdata);
+
+       return 0;
+}
+
+static void xlgmac_prepare_tx_stop(struct xlgmac_pdata *pdata,
+                                  struct xlgmac_channel *channel)
+{
+       unsigned int tx_dsr, tx_pos, tx_qidx;
+       unsigned long tx_timeout;
+       unsigned int tx_status;
+
+       /* Calculate the status register to read and the position within */
+       if (channel->queue_index < DMA_DSRX_FIRST_QUEUE) {
+               tx_dsr = DMA_DSR0;
+               tx_pos = (channel->queue_index * DMA_DSR_Q_LEN) +
+                        DMA_DSR0_TPS_START;
+       } else {
+               tx_qidx = channel->queue_index - DMA_DSRX_FIRST_QUEUE;
+
+               tx_dsr = DMA_DSR1 + ((tx_qidx / DMA_DSRX_QPR) * DMA_DSRX_INC);
+               tx_pos = ((tx_qidx % DMA_DSRX_QPR) * DMA_DSR_Q_LEN) +
+                        DMA_DSRX_TPS_START;
+       }
+
+       /* The Tx engine cannot be stopped if it is actively processing
+        * descriptors. Wait for the Tx engine to enter the stopped or
+        * suspended state.  Don't wait forever though...
+        */
+       tx_timeout = jiffies + (XLGMAC_DMA_STOP_TIMEOUT * HZ);
+       while (time_before(jiffies, tx_timeout)) {
+               tx_status = readl(pdata->mac_regs + tx_dsr);
+               tx_status = XLGMAC_GET_REG_BITS(tx_status, tx_pos,
+                                               DMA_DSR_TPS_LEN);
+               if ((tx_status == DMA_TPS_STOPPED) ||
+                   (tx_status == DMA_TPS_SUSPENDED))
+                       break;
+
+               usleep_range(500, 1000);
+       }
+
+       if (!time_before(jiffies, tx_timeout))
+               netdev_info(pdata->netdev,
+                           "timed out waiting for Tx DMA channel %u to stop\n",
+                           channel->queue_index);
+}
+
+static void xlgmac_enable_tx(struct xlgmac_pdata *pdata)
+{
+       struct xlgmac_channel *channel;
+       unsigned int i;
+       u32 regval;
+
+       /* Enable each Tx DMA channel */
+       channel = pdata->channel_head;
+       for (i = 0; i < pdata->channel_count; i++, channel++) {
+               if (!channel->tx_ring)
+                       break;
+
+               regval = readl(XLGMAC_DMA_REG(channel, DMA_CH_TCR));
+               regval = XLGMAC_SET_REG_BITS(regval, DMA_CH_TCR_ST_POS,
+                                            DMA_CH_TCR_ST_LEN, 1);
+               writel(regval, XLGMAC_DMA_REG(channel, DMA_CH_TCR));
+       }
+
+       /* Enable each Tx queue */
+       for (i = 0; i < pdata->tx_q_count; i++) {
+               regval = readl(XLGMAC_MTL_REG(pdata, i, MTL_Q_TQOMR));
+               regval = XLGMAC_SET_REG_BITS(regval, MTL_Q_TQOMR_TXQEN_POS,
+                                            MTL_Q_TQOMR_TXQEN_LEN,
+                                       MTL_Q_ENABLED);
+               writel(regval, XLGMAC_MTL_REG(pdata, i, MTL_Q_TQOMR));
+       }
+
+       /* Enable MAC Tx */
+       regval = readl(pdata->mac_regs + MAC_TCR);
+       regval = XLGMAC_SET_REG_BITS(regval, MAC_TCR_TE_POS,
+                                    MAC_TCR_TE_LEN, 1);
+       writel(regval, pdata->mac_regs + MAC_TCR);
+}
+
+static void xlgmac_disable_tx(struct xlgmac_pdata *pdata)
+{
+       struct xlgmac_channel *channel;
+       unsigned int i;
+       u32 regval;
+
+       /* Prepare for Tx DMA channel stop */
+       channel = pdata->channel_head;
+       for (i = 0; i < pdata->channel_count; i++, channel++) {
+               if (!channel->tx_ring)
+                       break;
+
+               xlgmac_prepare_tx_stop(pdata, channel);
+       }
+
+       /* Disable MAC Tx */
+       regval = readl(pdata->mac_regs + MAC_TCR);
+       regval = XLGMAC_SET_REG_BITS(regval, MAC_TCR_TE_POS,
+                                    MAC_TCR_TE_LEN, 0);
+       writel(regval, pdata->mac_regs + MAC_TCR);
+
+       /* Disable each Tx queue */
+       for (i = 0; i < pdata->tx_q_count; i++) {
+               regval = readl(XLGMAC_MTL_REG(pdata, i, MTL_Q_TQOMR));
+               regval = XLGMAC_SET_REG_BITS(regval, MTL_Q_TQOMR_TXQEN_POS,
+                                            MTL_Q_TQOMR_TXQEN_LEN, 0);
+               writel(regval, XLGMAC_MTL_REG(pdata, i, MTL_Q_TQOMR));
+       }
+
+       /* Disable each Tx DMA channel */
+       channel = pdata->channel_head;
+       for (i = 0; i < pdata->channel_count; i++, channel++) {
+               if (!channel->tx_ring)
+                       break;
+
+               regval = readl(XLGMAC_DMA_REG(channel, DMA_CH_TCR));
+               regval = XLGMAC_SET_REG_BITS(regval, DMA_CH_TCR_ST_POS,
+                                            DMA_CH_TCR_ST_LEN, 0);
+               writel(regval, XLGMAC_DMA_REG(channel, DMA_CH_TCR));
+       }
+}
+
+static void xlgmac_prepare_rx_stop(struct xlgmac_pdata *pdata,
+                                  unsigned int queue)
+{
+       unsigned int rx_status, prxq, rxqsts;
+       unsigned long rx_timeout;
+
+       /* The Rx engine cannot be stopped if it is actively processing
+        * packets. Wait for the Rx queue to empty the Rx fifo.  Don't
+        * wait forever though...
+        */
+       rx_timeout = jiffies + (XLGMAC_DMA_STOP_TIMEOUT * HZ);
+       while (time_before(jiffies, rx_timeout)) {
+               rx_status = readl(XLGMAC_MTL_REG(pdata, queue, MTL_Q_RQDR));
+               prxq = XLGMAC_GET_REG_BITS(rx_status, MTL_Q_RQDR_PRXQ_POS,
+                                          MTL_Q_RQDR_PRXQ_LEN);
+               rxqsts = XLGMAC_GET_REG_BITS(rx_status, MTL_Q_RQDR_RXQSTS_POS,
+                                            MTL_Q_RQDR_RXQSTS_LEN);
+               if ((prxq == 0) && (rxqsts == 0))
+                       break;
+
+               usleep_range(500, 1000);
+       }
+
+       if (!time_before(jiffies, rx_timeout))
+               netdev_info(pdata->netdev,
+                           "timed out waiting for Rx queue %u to empty\n",
+                           queue);
+}
+
+static void xlgmac_enable_rx(struct xlgmac_pdata *pdata)
+{
+       struct xlgmac_channel *channel;
+       unsigned int regval, i;
+
+       /* Enable each Rx DMA channel */
+       channel = pdata->channel_head;
+       for (i = 0; i < pdata->channel_count; i++, channel++) {
+               if (!channel->rx_ring)
+                       break;
+
+               regval = readl(XLGMAC_DMA_REG(channel, DMA_CH_RCR));
+               regval = XLGMAC_SET_REG_BITS(regval, DMA_CH_RCR_SR_POS,
+                                            DMA_CH_RCR_SR_LEN, 1);
+               writel(regval, XLGMAC_DMA_REG(channel, DMA_CH_RCR));
+       }
+
+       /* Enable each Rx queue */
+       regval = 0;
+       for (i = 0; i < pdata->rx_q_count; i++)
+               regval |= (0x02 << (i << 1));
+       writel(regval, pdata->mac_regs + MAC_RQC0R);
+
+       /* Enable MAC Rx */
+       regval = readl(pdata->mac_regs + MAC_RCR);
+       regval = XLGMAC_SET_REG_BITS(regval, MAC_RCR_DCRCC_POS,
+                                    MAC_RCR_DCRCC_LEN, 1);
+       regval = XLGMAC_SET_REG_BITS(regval, MAC_RCR_CST_POS,
+                                    MAC_RCR_CST_LEN, 1);
+       regval = XLGMAC_SET_REG_BITS(regval, MAC_RCR_ACS_POS,
+                                    MAC_RCR_ACS_LEN, 1);
+       regval = XLGMAC_SET_REG_BITS(regval, MAC_RCR_RE_POS,
+                                    MAC_RCR_RE_LEN, 1);
+       writel(regval, pdata->mac_regs + MAC_RCR);
+}
+
+static void xlgmac_disable_rx(struct xlgmac_pdata *pdata)
+{
+       struct xlgmac_channel *channel;
+       unsigned int i;
+       u32 regval;
+
+       /* Disable MAC Rx */
+       regval = readl(pdata->mac_regs + MAC_RCR);
+       regval = XLGMAC_SET_REG_BITS(regval, MAC_RCR_DCRCC_POS,
+                                    MAC_RCR_DCRCC_LEN, 0);
+       regval = XLGMAC_SET_REG_BITS(regval, MAC_RCR_CST_POS,
+                                    MAC_RCR_CST_LEN, 0);
+       regval = XLGMAC_SET_REG_BITS(regval, MAC_RCR_ACS_POS,
+                                    MAC_RCR_ACS_LEN, 0);
+       regval = XLGMAC_SET_REG_BITS(regval, MAC_RCR_RE_POS,
+                                    MAC_RCR_RE_LEN, 0);
+       writel(regval, pdata->mac_regs + MAC_RCR);
+
+       /* Prepare for Rx DMA channel stop */
+       for (i = 0; i < pdata->rx_q_count; i++)
+               xlgmac_prepare_rx_stop(pdata, i);
+
+       /* Disable each Rx queue */
+       writel(0, pdata->mac_regs + MAC_RQC0R);
+
+       /* Disable each Rx DMA channel */
+       channel = pdata->channel_head;
+       for (i = 0; i < pdata->channel_count; i++, channel++) {
+               if (!channel->rx_ring)
+                       break;
+
+               regval = readl(XLGMAC_DMA_REG(channel, DMA_CH_RCR));
+               regval = XLGMAC_SET_REG_BITS(regval, DMA_CH_RCR_SR_POS,
+                                            DMA_CH_RCR_SR_LEN, 0);
+               writel(regval, XLGMAC_DMA_REG(channel, DMA_CH_RCR));
+       }
+}
+
+static void xlgmac_tx_start_xmit(struct xlgmac_channel *channel,
+                                struct xlgmac_ring *ring)
+{
+       struct xlgmac_pdata *pdata = channel->pdata;
+       struct xlgmac_desc_data *desc_data;
+
+       /* Make sure everything is written before the register write */
+       wmb();
+
+       /* Issue a poll command to Tx DMA by writing address
+        * of next immediate free descriptor
+        */
+       desc_data = XLGMAC_GET_DESC_DATA(ring, ring->cur);
+       writel(lower_32_bits(desc_data->dma_desc_addr),
+              XLGMAC_DMA_REG(channel, DMA_CH_TDTR_LO));
+
+       /* Start the Tx timer */
+       if (pdata->tx_usecs && !channel->tx_timer_active) {
+               channel->tx_timer_active = 1;
+               mod_timer(&channel->tx_timer,
+                         jiffies + usecs_to_jiffies(pdata->tx_usecs));
+       }
+
+       ring->tx.xmit_more = 0;
+}
+
+static void xlgmac_dev_xmit(struct xlgmac_channel *channel)
+{
+       struct xlgmac_pdata *pdata = channel->pdata;
+       struct xlgmac_ring *ring = channel->tx_ring;
+       unsigned int tso_context, vlan_context;
+       struct xlgmac_desc_data *desc_data;
+       struct xlgmac_dma_desc *dma_desc;
+       struct xlgmac_pkt_info *pkt_info;
+       unsigned int csum, tso, vlan;
+       int start_index = ring->cur;
+       int cur_index = ring->cur;
+       unsigned int tx_set_ic;
+       int i;
+
+       pkt_info = &ring->pkt_info;
+       csum = XLGMAC_GET_REG_BITS(pkt_info->attributes,
+                                  TX_PACKET_ATTRIBUTES_CSUM_ENABLE_POS,
+                               TX_PACKET_ATTRIBUTES_CSUM_ENABLE_LEN);
+       tso = XLGMAC_GET_REG_BITS(pkt_info->attributes,
+                                 TX_PACKET_ATTRIBUTES_TSO_ENABLE_POS,
+                               TX_PACKET_ATTRIBUTES_TSO_ENABLE_LEN);
+       vlan = XLGMAC_GET_REG_BITS(pkt_info->attributes,
+                                  TX_PACKET_ATTRIBUTES_VLAN_CTAG_POS,
+                               TX_PACKET_ATTRIBUTES_VLAN_CTAG_LEN);
+
+       if (tso && (pkt_info->mss != ring->tx.cur_mss))
+               tso_context = 1;
+       else
+               tso_context = 0;
+
+       if (vlan && (pkt_info->vlan_ctag != ring->tx.cur_vlan_ctag))
+               vlan_context = 1;
+       else
+               vlan_context = 0;
+
+       /* Determine if an interrupt should be generated for this Tx:
+        *   Interrupt:
+        *     - Tx frame count exceeds the frame count setting
+        *     - Addition of Tx frame count to the frame count since the
+        *       last interrupt was set exceeds the frame count setting
+        *   No interrupt:
+        *     - No frame count setting specified (ethtool -C ethX tx-frames 0)
+        *     - Addition of Tx frame count to the frame count since the
+        *       last interrupt was set does not exceed the frame count setting
+        */
+       ring->coalesce_count += pkt_info->tx_packets;
+       if (!pdata->tx_frames)
+               tx_set_ic = 0;
+       else if (pkt_info->tx_packets > pdata->tx_frames)
+               tx_set_ic = 1;
+       else if ((ring->coalesce_count % pdata->tx_frames) <
+                pkt_info->tx_packets)
+               tx_set_ic = 1;
+       else
+               tx_set_ic = 0;
+
+       desc_data = XLGMAC_GET_DESC_DATA(ring, cur_index);
+       dma_desc = desc_data->dma_desc;
+
+       /* Create a context descriptor if this is a TSO pkt_info */
+       if (tso_context || vlan_context) {
+               if (tso_context) {
+                       netif_dbg(pdata, tx_queued, pdata->netdev,
+                                 "TSO context descriptor, mss=%u\n",
+                                 pkt_info->mss);
+
+                       /* Set the MSS size */
+                       dma_desc->desc2 = XLGMAC_SET_REG_BITS_LE(
+                                               dma_desc->desc2,
+                                               TX_CONTEXT_DESC2_MSS_POS,
+                                               TX_CONTEXT_DESC2_MSS_LEN,
+                                               pkt_info->mss);
+
+                       /* Mark it as a CONTEXT descriptor */
+                       dma_desc->desc3 = XLGMAC_SET_REG_BITS_LE(
+                                               dma_desc->desc3,
+                                               TX_CONTEXT_DESC3_CTXT_POS,
+                                               TX_CONTEXT_DESC3_CTXT_LEN,
+                                               1);
+
+                       /* Indicate this descriptor contains the MSS */
+                       dma_desc->desc3 = XLGMAC_SET_REG_BITS_LE(
+                                               dma_desc->desc3,
+                                               TX_CONTEXT_DESC3_TCMSSV_POS,
+                                               TX_CONTEXT_DESC3_TCMSSV_LEN,
+                                               1);
+
+                       ring->tx.cur_mss = pkt_info->mss;
+               }
+
+               if (vlan_context) {
+                       netif_dbg(pdata, tx_queued, pdata->netdev,
+                                 "VLAN context descriptor, ctag=%u\n",
+                                 pkt_info->vlan_ctag);
+
+                       /* Mark it as a CONTEXT descriptor */
+                       dma_desc->desc3 = XLGMAC_SET_REG_BITS_LE(
+                                               dma_desc->desc3,
+                                               TX_CONTEXT_DESC3_CTXT_POS,
+                                               TX_CONTEXT_DESC3_CTXT_LEN,
+                                               1);
+
+                       /* Set the VLAN tag */
+                       dma_desc->desc3 = XLGMAC_SET_REG_BITS_LE(
+                                               dma_desc->desc3,
+                                               TX_CONTEXT_DESC3_VT_POS,
+                                               TX_CONTEXT_DESC3_VT_LEN,
+                                               pkt_info->vlan_ctag);
+
+                       /* Indicate this descriptor contains the VLAN tag */
+                       dma_desc->desc3 = XLGMAC_SET_REG_BITS_LE(
+                                               dma_desc->desc3,
+                                               TX_CONTEXT_DESC3_VLTV_POS,
+                                               TX_CONTEXT_DESC3_VLTV_LEN,
+                                               1);
+
+                       ring->tx.cur_vlan_ctag = pkt_info->vlan_ctag;
+               }
+
+               cur_index++;
+               desc_data = XLGMAC_GET_DESC_DATA(ring, cur_index);
+               dma_desc = desc_data->dma_desc;
+       }
+
+       /* Update buffer address (for TSO this is the header) */
+       dma_desc->desc0 =  cpu_to_le32(lower_32_bits(desc_data->skb_dma));
+       dma_desc->desc1 =  cpu_to_le32(upper_32_bits(desc_data->skb_dma));
+
+       /* Update the buffer length */
+       dma_desc->desc2 = XLGMAC_SET_REG_BITS_LE(
+                               dma_desc->desc2,
+                               TX_NORMAL_DESC2_HL_B1L_POS,
+                               TX_NORMAL_DESC2_HL_B1L_LEN,
+                               desc_data->skb_dma_len);
+
+       /* VLAN tag insertion check */
+       if (vlan)
+               dma_desc->desc2 = XLGMAC_SET_REG_BITS_LE(
+                                       dma_desc->desc2,
+                                       TX_NORMAL_DESC2_VTIR_POS,
+                                       TX_NORMAL_DESC2_VTIR_LEN,
+                                       TX_NORMAL_DESC2_VLAN_INSERT);
+
+       /* Timestamp enablement check */
+       if (XLGMAC_GET_REG_BITS(pkt_info->attributes,
+                               TX_PACKET_ATTRIBUTES_PTP_POS,
+                               TX_PACKET_ATTRIBUTES_PTP_LEN))
+               dma_desc->desc2 = XLGMAC_SET_REG_BITS_LE(
+                                       dma_desc->desc2,
+                                       TX_NORMAL_DESC2_TTSE_POS,
+                                       TX_NORMAL_DESC2_TTSE_LEN,
+                                       1);
+
+       /* Mark it as First Descriptor */
+       dma_desc->desc3 = XLGMAC_SET_REG_BITS_LE(
+                               dma_desc->desc3,
+                               TX_NORMAL_DESC3_FD_POS,
+                               TX_NORMAL_DESC3_FD_LEN,
+                               1);
+
+       /* Mark it as a NORMAL descriptor */
+       dma_desc->desc3 = XLGMAC_SET_REG_BITS_LE(
+                               dma_desc->desc3,
+                               TX_NORMAL_DESC3_CTXT_POS,
+                               TX_NORMAL_DESC3_CTXT_LEN,
+                               0);
+
+       /* Set OWN bit if not the first descriptor */
+       if (cur_index != start_index)
+               dma_desc->desc3 = XLGMAC_SET_REG_BITS_LE(
+                                       dma_desc->desc3,
+                                       TX_NORMAL_DESC3_OWN_POS,
+                                       TX_NORMAL_DESC3_OWN_LEN,
+                                       1);
+
+       if (tso) {
+               /* Enable TSO */
+               dma_desc->desc3 = XLGMAC_SET_REG_BITS_LE(
+                                       dma_desc->desc3,
+                                       TX_NORMAL_DESC3_TSE_POS,
+                                       TX_NORMAL_DESC3_TSE_LEN, 1);
+               dma_desc->desc3 = XLGMAC_SET_REG_BITS_LE(
+                                       dma_desc->desc3,
+                                       TX_NORMAL_DESC3_TCPPL_POS,
+                                       TX_NORMAL_DESC3_TCPPL_LEN,
+                                       pkt_info->tcp_payload_len);
+               dma_desc->desc3 = XLGMAC_SET_REG_BITS_LE(
+                                       dma_desc->desc3,
+                                       TX_NORMAL_DESC3_TCPHDRLEN_POS,
+                                       TX_NORMAL_DESC3_TCPHDRLEN_LEN,
+                                       pkt_info->tcp_header_len / 4);
+
+               pdata->stats.tx_tso_packets++;
+       } else {
+               /* Enable CRC and Pad Insertion */
+               dma_desc->desc3 = XLGMAC_SET_REG_BITS_LE(
+                                       dma_desc->desc3,
+                                       TX_NORMAL_DESC3_CPC_POS,
+                                       TX_NORMAL_DESC3_CPC_LEN, 0);
+
+               /* Enable HW CSUM */
+               if (csum)
+                       dma_desc->desc3 = XLGMAC_SET_REG_BITS_LE(
+                                               dma_desc->desc3,
+                                               TX_NORMAL_DESC3_CIC_POS,
+                                               TX_NORMAL_DESC3_CIC_LEN,
+                                               0x3);
+
+               /* Set the total length to be transmitted */
+               dma_desc->desc3 = XLGMAC_SET_REG_BITS_LE(
+                                       dma_desc->desc3,
+                                       TX_NORMAL_DESC3_FL_POS,
+                                       TX_NORMAL_DESC3_FL_LEN,
+                                       pkt_info->length);
+       }
+
+       for (i = cur_index - start_index + 1; i < pkt_info->desc_count; i++) {
+               cur_index++;
+               desc_data = XLGMAC_GET_DESC_DATA(ring, cur_index);
+               dma_desc = desc_data->dma_desc;
+
+               /* Update buffer address */
+               dma_desc->desc0 =
+                       cpu_to_le32(lower_32_bits(desc_data->skb_dma));
+               dma_desc->desc1 =
+                       cpu_to_le32(upper_32_bits(desc_data->skb_dma));
+
+               /* Update the buffer length */
+               dma_desc->desc2 = XLGMAC_SET_REG_BITS_LE(
+                                       dma_desc->desc2,
+                                       TX_NORMAL_DESC2_HL_B1L_POS,
+                                       TX_NORMAL_DESC2_HL_B1L_LEN,
+                                       desc_data->skb_dma_len);
+
+               /* Set OWN bit */
+               dma_desc->desc3 = XLGMAC_SET_REG_BITS_LE(
+                                       dma_desc->desc3,
+                                       TX_NORMAL_DESC3_OWN_POS,
+                                       TX_NORMAL_DESC3_OWN_LEN, 1);
+
+               /* Mark it as NORMAL descriptor */
+               dma_desc->desc3 = XLGMAC_SET_REG_BITS_LE(
+                                       dma_desc->desc3,
+                                       TX_NORMAL_DESC3_CTXT_POS,
+                                       TX_NORMAL_DESC3_CTXT_LEN, 0);
+
+               /* Enable HW CSUM */
+               if (csum)
+                       dma_desc->desc3 = XLGMAC_SET_REG_BITS_LE(
+                                               dma_desc->desc3,
+                                               TX_NORMAL_DESC3_CIC_POS,
+                                               TX_NORMAL_DESC3_CIC_LEN,
+                                               0x3);
+       }
+
+       /* Set LAST bit for the last descriptor */
+       dma_desc->desc3 = XLGMAC_SET_REG_BITS_LE(
+                               dma_desc->desc3,
+                               TX_NORMAL_DESC3_LD_POS,
+                               TX_NORMAL_DESC3_LD_LEN, 1);
+
+       /* Set IC bit based on Tx coalescing settings */
+       if (tx_set_ic)
+               dma_desc->desc2 = XLGMAC_SET_REG_BITS_LE(
+                                       dma_desc->desc2,
+                                       TX_NORMAL_DESC2_IC_POS,
+                                       TX_NORMAL_DESC2_IC_LEN, 1);
+
+       /* Save the Tx info to report back during cleanup */
+       desc_data->tx.packets = pkt_info->tx_packets;
+       desc_data->tx.bytes = pkt_info->tx_bytes;
+
+       /* In case the Tx DMA engine is running, make sure everything
+        * is written to the descriptor(s) before setting the OWN bit
+        * for the first descriptor
+        */
+       dma_wmb();
+
+       /* Set OWN bit for the first descriptor */
+       desc_data = XLGMAC_GET_DESC_DATA(ring, start_index);
+       dma_desc = desc_data->dma_desc;
+       dma_desc->desc3 = XLGMAC_SET_REG_BITS_LE(
+                               dma_desc->desc3,
+                               TX_NORMAL_DESC3_OWN_POS,
+                               TX_NORMAL_DESC3_OWN_LEN, 1);
+
+       if (netif_msg_tx_queued(pdata))
+               xlgmac_dump_tx_desc(pdata, ring, start_index,
+                                   pkt_info->desc_count, 1);
+
+       /* Make sure ownership is written to the descriptor */
+       smp_wmb();
+
+       ring->cur = cur_index + 1;
+       if (!pkt_info->skb->xmit_more ||
+           netif_xmit_stopped(netdev_get_tx_queue(pdata->netdev,
+                                                  channel->queue_index)))
+               xlgmac_tx_start_xmit(channel, ring);
+       else
+               ring->tx.xmit_more = 1;
+
+       XLGMAC_PR("%s: descriptors %u to %u written\n",
+                 channel->name, start_index & (ring->dma_desc_count - 1),
+                 (ring->cur - 1) & (ring->dma_desc_count - 1));
+}
+
+static void xlgmac_get_rx_tstamp(struct xlgmac_pkt_info *pkt_info,
+                                struct xlgmac_dma_desc *dma_desc)
+{
+       u32 tsa, tsd;
+       u64 nsec;
+
+       tsa = XLGMAC_GET_REG_BITS_LE(dma_desc->desc3,
+                                    RX_CONTEXT_DESC3_TSA_POS,
+                               RX_CONTEXT_DESC3_TSA_LEN);
+       tsd = XLGMAC_GET_REG_BITS_LE(dma_desc->desc3,
+                                    RX_CONTEXT_DESC3_TSD_POS,
+                               RX_CONTEXT_DESC3_TSD_LEN);
+       if (tsa && !tsd) {
+               nsec = le32_to_cpu(dma_desc->desc1);
+               nsec <<= 32;
+               nsec |= le32_to_cpu(dma_desc->desc0);
+               if (nsec != 0xffffffffffffffffULL) {
+                       pkt_info->rx_tstamp = nsec;
+                       pkt_info->attributes = XLGMAC_SET_REG_BITS(
+                                       pkt_info->attributes,
+                                       RX_PACKET_ATTRIBUTES_RX_TSTAMP_POS,
+                                       RX_PACKET_ATTRIBUTES_RX_TSTAMP_LEN,
+                                       1);
+               }
+       }
+}
+
+static void xlgmac_tx_desc_reset(struct xlgmac_desc_data *desc_data)
+{
+       struct xlgmac_dma_desc *dma_desc = desc_data->dma_desc;
+
+       /* Reset the Tx descriptor
+        *   Set buffer 1 (lo) address to zero
+        *   Set buffer 1 (hi) address to zero
+        *   Reset all other control bits (IC, TTSE, B2L & B1L)
+        *   Reset all other control bits (OWN, CTXT, FD, LD, CPC, CIC, etc)
+        */
+       dma_desc->desc0 = 0;
+       dma_desc->desc1 = 0;
+       dma_desc->desc2 = 0;
+       dma_desc->desc3 = 0;
+
+       /* Make sure ownership is written to the descriptor */
+       dma_wmb();
+}
+
+static void xlgmac_tx_desc_init(struct xlgmac_channel *channel)
+{
+       struct xlgmac_ring *ring = channel->tx_ring;
+       struct xlgmac_desc_data *desc_data;
+       int start_index = ring->cur;
+       int i;
+
+       /* Initialze all descriptors */
+       for (i = 0; i < ring->dma_desc_count; i++) {
+               desc_data = XLGMAC_GET_DESC_DATA(ring, i);
+
+               /* Initialize Tx descriptor */
+               xlgmac_tx_desc_reset(desc_data);
+       }
+
+       /* Update the total number of Tx descriptors */
+       writel(ring->dma_desc_count - 1, XLGMAC_DMA_REG(channel, DMA_CH_TDRLR));
+
+       /* Update the starting address of descriptor ring */
+       desc_data = XLGMAC_GET_DESC_DATA(ring, start_index);
+       writel(upper_32_bits(desc_data->dma_desc_addr),
+              XLGMAC_DMA_REG(channel, DMA_CH_TDLR_HI));
+       writel(lower_32_bits(desc_data->dma_desc_addr),
+              XLGMAC_DMA_REG(channel, DMA_CH_TDLR_LO));
+}
+
+static void xlgmac_rx_desc_reset(struct xlgmac_pdata *pdata,
+                                struct xlgmac_desc_data *desc_data,
+                                unsigned int index)
+{
+       struct xlgmac_dma_desc *dma_desc = desc_data->dma_desc;
+       unsigned int rx_frames = pdata->rx_frames;
+       unsigned int rx_usecs = pdata->rx_usecs;
+       dma_addr_t hdr_dma, buf_dma;
+       unsigned int inte;
+
+       if (!rx_usecs && !rx_frames) {
+               /* No coalescing, interrupt for every descriptor */
+               inte = 1;
+       } else {
+               /* Set interrupt based on Rx frame coalescing setting */
+               if (rx_frames && !((index + 1) % rx_frames))
+                       inte = 1;
+               else
+                       inte = 0;
+       }
+
+       /* Reset the Rx descriptor
+        *   Set buffer 1 (lo) address to header dma address (lo)
+        *   Set buffer 1 (hi) address to header dma address (hi)
+        *   Set buffer 2 (lo) address to buffer dma address (lo)
+        *   Set buffer 2 (hi) address to buffer dma address (hi) and
+        *     set control bits OWN and INTE
+        */
+       hdr_dma = desc_data->rx.hdr.dma_base + desc_data->rx.hdr.dma_off;
+       buf_dma = desc_data->rx.buf.dma_base + desc_data->rx.buf.dma_off;
+       dma_desc->desc0 = cpu_to_le32(lower_32_bits(hdr_dma));
+       dma_desc->desc1 = cpu_to_le32(upper_32_bits(hdr_dma));
+       dma_desc->desc2 = cpu_to_le32(lower_32_bits(buf_dma));
+       dma_desc->desc3 = cpu_to_le32(upper_32_bits(buf_dma));
+
+       dma_desc->desc3 = XLGMAC_SET_REG_BITS_LE(
+                               dma_desc->desc3,
+                               RX_NORMAL_DESC3_INTE_POS,
+                               RX_NORMAL_DESC3_INTE_LEN,
+                               inte);
+
+       /* Since the Rx DMA engine is likely running, make sure everything
+        * is written to the descriptor(s) before setting the OWN bit
+        * for the descriptor
+        */
+       dma_wmb();
+
+       dma_desc->desc3 = XLGMAC_SET_REG_BITS_LE(
+                               dma_desc->desc3,
+                               RX_NORMAL_DESC3_OWN_POS,
+                               RX_NORMAL_DESC3_OWN_LEN,
+                               1);
+
+       /* Make sure ownership is written to the descriptor */
+       dma_wmb();
+}
+
+static void xlgmac_rx_desc_init(struct xlgmac_channel *channel)
+{
+       struct xlgmac_pdata *pdata = channel->pdata;
+       struct xlgmac_ring *ring = channel->rx_ring;
+       unsigned int start_index = ring->cur;
+       struct xlgmac_desc_data *desc_data;
+       unsigned int i;
+
+       /* Initialize all descriptors */
+       for (i = 0; i < ring->dma_desc_count; i++) {
+               desc_data = XLGMAC_GET_DESC_DATA(ring, i);
+
+               /* Initialize Rx descriptor */
+               xlgmac_rx_desc_reset(pdata, desc_data, i);
+       }
+
+       /* Update the total number of Rx descriptors */
+       writel(ring->dma_desc_count - 1, XLGMAC_DMA_REG(channel, DMA_CH_RDRLR));
+
+       /* Update the starting address of descriptor ring */
+       desc_data = XLGMAC_GET_DESC_DATA(ring, start_index);
+       writel(upper_32_bits(desc_data->dma_desc_addr),
+              XLGMAC_DMA_REG(channel, DMA_CH_RDLR_HI));
+       writel(lower_32_bits(desc_data->dma_desc_addr),
+              XLGMAC_DMA_REG(channel, DMA_CH_RDLR_LO));
+
+       /* Update the Rx Descriptor Tail Pointer */
+       desc_data = XLGMAC_GET_DESC_DATA(ring, start_index +
+                                         ring->dma_desc_count - 1);
+       writel(lower_32_bits(desc_data->dma_desc_addr),
+              XLGMAC_DMA_REG(channel, DMA_CH_RDTR_LO));
+}
+
+static int xlgmac_is_context_desc(struct xlgmac_dma_desc *dma_desc)
+{
+       /* Rx and Tx share CTXT bit, so check TDES3.CTXT bit */
+       return XLGMAC_GET_REG_BITS_LE(dma_desc->desc3,
+                               TX_NORMAL_DESC3_CTXT_POS,
+                               TX_NORMAL_DESC3_CTXT_LEN);
+}
+
+static int xlgmac_is_last_desc(struct xlgmac_dma_desc *dma_desc)
+{
+       /* Rx and Tx share LD bit, so check TDES3.LD bit */
+       return XLGMAC_GET_REG_BITS_LE(dma_desc->desc3,
+                               TX_NORMAL_DESC3_LD_POS,
+                               TX_NORMAL_DESC3_LD_LEN);
+}
+
+static int xlgmac_disable_tx_flow_control(struct xlgmac_pdata *pdata)
+{
+       unsigned int max_q_count, q_count;
+       unsigned int reg, regval;
+       unsigned int i;
+
+       /* Clear MTL flow control */
+       for (i = 0; i < pdata->rx_q_count; i++) {
+               regval = readl(XLGMAC_MTL_REG(pdata, i, MTL_Q_RQOMR));
+               regval = XLGMAC_SET_REG_BITS(regval, MTL_Q_RQOMR_EHFC_POS,
+                                            MTL_Q_RQOMR_EHFC_LEN, 0);
+               writel(regval, XLGMAC_MTL_REG(pdata, i, MTL_Q_RQOMR));
+       }
+
+       /* Clear MAC flow control */
+       max_q_count = XLGMAC_MAX_FLOW_CONTROL_QUEUES;
+       q_count = min_t(unsigned int, pdata->tx_q_count, max_q_count);
+       reg = MAC_Q0TFCR;
+       for (i = 0; i < q_count; i++) {
+               regval = readl(pdata->mac_regs + reg);
+               regval = XLGMAC_SET_REG_BITS(regval,
+                                            MAC_Q0TFCR_TFE_POS,
+                                       MAC_Q0TFCR_TFE_LEN,
+                                       0);
+               writel(regval, pdata->mac_regs + reg);
+
+               reg += MAC_QTFCR_INC;
+       }
+
+       return 0;
+}
+
+static int xlgmac_enable_tx_flow_control(struct xlgmac_pdata *pdata)
+{
+       unsigned int max_q_count, q_count;
+       unsigned int reg, regval;
+       unsigned int i;
+
+       /* Set MTL flow control */
+       for (i = 0; i < pdata->rx_q_count; i++) {
+               regval = readl(XLGMAC_MTL_REG(pdata, i, MTL_Q_RQOMR));
+               regval = XLGMAC_SET_REG_BITS(regval, MTL_Q_RQOMR_EHFC_POS,
+                                            MTL_Q_RQOMR_EHFC_LEN, 1);
+               writel(regval, XLGMAC_MTL_REG(pdata, i, MTL_Q_RQOMR));
+       }
+
+       /* Set MAC flow control */
+       max_q_count = XLGMAC_MAX_FLOW_CONTROL_QUEUES;
+       q_count = min_t(unsigned int, pdata->tx_q_count, max_q_count);
+       reg = MAC_Q0TFCR;
+       for (i = 0; i < q_count; i++) {
+               regval = readl(pdata->mac_regs + reg);
+
+               /* Enable transmit flow control */
+               regval = XLGMAC_SET_REG_BITS(regval, MAC_Q0TFCR_TFE_POS,
+                                            MAC_Q0TFCR_TFE_LEN, 1);
+               /* Set pause time */
+               regval = XLGMAC_SET_REG_BITS(regval, MAC_Q0TFCR_PT_POS,
+                                            MAC_Q0TFCR_PT_LEN, 0xffff);
+
+               writel(regval, pdata->mac_regs + reg);
+
+               reg += MAC_QTFCR_INC;
+       }
+
+       return 0;
+}
+
+static int xlgmac_disable_rx_flow_control(struct xlgmac_pdata *pdata)
+{
+       u32 regval;
+
+       regval = readl(pdata->mac_regs + MAC_RFCR);
+       regval = XLGMAC_SET_REG_BITS(regval, MAC_RFCR_RFE_POS,
+                                    MAC_RFCR_RFE_LEN, 0);
+       writel(regval, pdata->mac_regs + MAC_RFCR);
+
+       return 0;
+}
+
+static int xlgmac_enable_rx_flow_control(struct xlgmac_pdata *pdata)
+{
+       u32 regval;
+
+       regval = readl(pdata->mac_regs + MAC_RFCR);
+       regval = XLGMAC_SET_REG_BITS(regval, MAC_RFCR_RFE_POS,
+                                    MAC_RFCR_RFE_LEN, 1);
+       writel(regval, pdata->mac_regs + MAC_RFCR);
+
+       return 0;
+}
+
+static int xlgmac_config_tx_flow_control(struct xlgmac_pdata *pdata)
+{
+       if (pdata->tx_pause)
+               xlgmac_enable_tx_flow_control(pdata);
+       else
+               xlgmac_disable_tx_flow_control(pdata);
+
+       return 0;
+}
+
+static int xlgmac_config_rx_flow_control(struct xlgmac_pdata *pdata)
+{
+       if (pdata->rx_pause)
+               xlgmac_enable_rx_flow_control(pdata);
+       else
+               xlgmac_disable_rx_flow_control(pdata);
+
+       return 0;
+}
+
+static int xlgmac_config_rx_coalesce(struct xlgmac_pdata *pdata)
+{
+       struct xlgmac_channel *channel;
+       unsigned int i;
+       u32 regval;
+
+       channel = pdata->channel_head;
+       for (i = 0; i < pdata->channel_count; i++, channel++) {
+               if (!channel->rx_ring)
+                       break;
+
+               regval = readl(XLGMAC_DMA_REG(channel, DMA_CH_RIWT));
+               regval = XLGMAC_SET_REG_BITS(regval, DMA_CH_RIWT_RWT_POS,
+                                            DMA_CH_RIWT_RWT_LEN,
+                                            pdata->rx_riwt);
+               writel(regval, XLGMAC_DMA_REG(channel, DMA_CH_RIWT));
+       }
+
+       return 0;
+}
+
+static void xlgmac_config_flow_control(struct xlgmac_pdata *pdata)
+{
+       xlgmac_config_tx_flow_control(pdata);
+       xlgmac_config_rx_flow_control(pdata);
+}
+
+static void xlgmac_config_rx_fep_enable(struct xlgmac_pdata *pdata)
+{
+       unsigned int i;
+       u32 regval;
+
+       for (i = 0; i < pdata->rx_q_count; i++) {
+               regval = readl(XLGMAC_MTL_REG(pdata, i, MTL_Q_RQOMR));
+               regval = XLGMAC_SET_REG_BITS(regval, MTL_Q_RQOMR_FEP_POS,
+                                            MTL_Q_RQOMR_FEP_LEN, 1);
+               writel(regval, XLGMAC_MTL_REG(pdata, i, MTL_Q_RQOMR));
+       }
+}
+
+static void xlgmac_config_rx_fup_enable(struct xlgmac_pdata *pdata)
+{
+       unsigned int i;
+       u32 regval;
+
+       for (i = 0; i < pdata->rx_q_count; i++) {
+               regval = readl(XLGMAC_MTL_REG(pdata, i, MTL_Q_RQOMR));
+               regval = XLGMAC_SET_REG_BITS(regval, MTL_Q_RQOMR_FUP_POS,
+                                            MTL_Q_RQOMR_FUP_LEN, 1);
+               writel(regval, XLGMAC_MTL_REG(pdata, i, MTL_Q_RQOMR));
+       }
+}
+
+static int xlgmac_config_tx_coalesce(struct xlgmac_pdata *pdata)
+{
+       return 0;
+}
+
+static void xlgmac_config_rx_buffer_size(struct xlgmac_pdata *pdata)
+{
+       struct xlgmac_channel *channel;
+       unsigned int i;
+       u32 regval;
+
+       channel = pdata->channel_head;
+       for (i = 0; i < pdata->channel_count; i++, channel++) {
+               if (!channel->rx_ring)
+                       break;
+
+               regval = readl(XLGMAC_DMA_REG(channel, DMA_CH_RCR));
+               regval = XLGMAC_SET_REG_BITS(regval, DMA_CH_RCR_RBSZ_POS,
+                                            DMA_CH_RCR_RBSZ_LEN,
+                                       pdata->rx_buf_size);
+               writel(regval, XLGMAC_DMA_REG(channel, DMA_CH_RCR));
+       }
+}
+
+static void xlgmac_config_tso_mode(struct xlgmac_pdata *pdata)
+{
+       struct xlgmac_channel *channel;
+       unsigned int i;
+       u32 regval;
+
+       channel = pdata->channel_head;
+       for (i = 0; i < pdata->channel_count; i++, channel++) {
+               if (!channel->tx_ring)
+                       break;
+
+               if (pdata->hw_feat.tso) {
+                       regval = readl(XLGMAC_DMA_REG(channel, DMA_CH_TCR));
+                       regval = XLGMAC_SET_REG_BITS(regval, DMA_CH_TCR_TSE_POS,
+                                                    DMA_CH_TCR_TSE_LEN, 1);
+                       writel(regval, XLGMAC_DMA_REG(channel, DMA_CH_TCR));
+               }
+       }
+}
+
+static void xlgmac_config_sph_mode(struct xlgmac_pdata *pdata)
+{
+       struct xlgmac_channel *channel;
+       unsigned int i;
+       u32 regval;
+
+       channel = pdata->channel_head;
+       for (i = 0; i < pdata->channel_count; i++, channel++) {
+               if (!channel->rx_ring)
+                       break;
+
+               regval = readl(XLGMAC_DMA_REG(channel, DMA_CH_CR));
+               regval = XLGMAC_SET_REG_BITS(regval, DMA_CH_CR_SPH_POS,
+                                            DMA_CH_CR_SPH_LEN, 1);
+               writel(regval, XLGMAC_DMA_REG(channel, DMA_CH_CR));
+       }
+
+       regval = readl(pdata->mac_regs + MAC_RCR);
+       regval = XLGMAC_SET_REG_BITS(regval, MAC_RCR_HDSMS_POS,
+                                    MAC_RCR_HDSMS_LEN,
+                               XLGMAC_SPH_HDSMS_SIZE);
+       writel(regval, pdata->mac_regs + MAC_RCR);
+}
+
+static unsigned int xlgmac_usec_to_riwt(struct xlgmac_pdata *pdata,
+                                       unsigned int usec)
+{
+       unsigned long rate;
+       unsigned int ret;
+
+       rate = pdata->sysclk_rate;
+
+       /* Convert the input usec value to the watchdog timer value. Each
+        * watchdog timer value is equivalent to 256 clock cycles.
+        * Calculate the required value as:
+        *   ( usec * ( system_clock_mhz / 10^6 ) / 256
+        */
+       ret = (usec * (rate / 1000000)) / 256;
+
+       return ret;
+}
+
+static unsigned int xlgmac_riwt_to_usec(struct xlgmac_pdata *pdata,
+                                       unsigned int riwt)
+{
+       unsigned long rate;
+       unsigned int ret;
+
+       rate = pdata->sysclk_rate;
+
+       /* Convert the input watchdog timer value to the usec value. Each
+        * watchdog timer value is equivalent to 256 clock cycles.
+        * Calculate the required value as:
+        *   ( riwt * 256 ) / ( system_clock_mhz / 10^6 )
+        */
+       ret = (riwt * 256) / (rate / 1000000);
+
+       return ret;
+}
+
+static int xlgmac_config_rx_threshold(struct xlgmac_pdata *pdata,
+                                     unsigned int val)
+{
+       unsigned int i;
+       u32 regval;
+
+       for (i = 0; i < pdata->rx_q_count; i++) {
+               regval = readl(XLGMAC_MTL_REG(pdata, i, MTL_Q_RQOMR));
+               regval = XLGMAC_SET_REG_BITS(regval, MTL_Q_RQOMR_RTC_POS,
+                                            MTL_Q_RQOMR_RTC_LEN, val);
+               writel(regval, XLGMAC_MTL_REG(pdata, i, MTL_Q_RQOMR));
+       }
+
+       return 0;
+}
+
+static void xlgmac_config_mtl_mode(struct xlgmac_pdata *pdata)
+{
+       unsigned int i;
+       u32 regval;
+
+       /* Set Tx to weighted round robin scheduling algorithm */
+       regval = readl(pdata->mac_regs + MTL_OMR);
+       regval = XLGMAC_SET_REG_BITS(regval, MTL_OMR_ETSALG_POS,
+                                    MTL_OMR_ETSALG_LEN, MTL_ETSALG_WRR);
+       writel(regval, pdata->mac_regs + MTL_OMR);
+
+       /* Set Tx traffic classes to use WRR algorithm with equal weights */
+       for (i = 0; i < pdata->hw_feat.tc_cnt; i++) {
+               regval = readl(XLGMAC_MTL_REG(pdata, i, MTL_TC_ETSCR));
+               regval = XLGMAC_SET_REG_BITS(regval, MTL_TC_ETSCR_TSA_POS,
+                                            MTL_TC_ETSCR_TSA_LEN, MTL_TSA_ETS);
+               writel(regval, XLGMAC_MTL_REG(pdata, i, MTL_TC_ETSCR));
+
+               regval = readl(XLGMAC_MTL_REG(pdata, i, MTL_TC_QWR));
+               regval = XLGMAC_SET_REG_BITS(regval, MTL_TC_QWR_QW_POS,
+                                            MTL_TC_QWR_QW_LEN, 1);
+               writel(regval, XLGMAC_MTL_REG(pdata, i, MTL_TC_QWR));
+       }
+
+       /* Set Rx to strict priority algorithm */
+       regval = readl(pdata->mac_regs + MTL_OMR);
+       regval = XLGMAC_SET_REG_BITS(regval, MTL_OMR_RAA_POS,
+                                    MTL_OMR_RAA_LEN, MTL_RAA_SP);
+       writel(regval, pdata->mac_regs + MTL_OMR);
+}
+
+static void xlgmac_config_queue_mapping(struct xlgmac_pdata *pdata)
+{
+       unsigned int ppq, ppq_extra, prio, prio_queues;
+       unsigned int qptc, qptc_extra, queue;
+       unsigned int reg, regval;
+       unsigned int mask;
+       unsigned int i, j;
+
+       /* Map the MTL Tx Queues to Traffic Classes
+        *   Note: Tx Queues >= Traffic Classes
+        */
+       qptc = pdata->tx_q_count / pdata->hw_feat.tc_cnt;
+       qptc_extra = pdata->tx_q_count % pdata->hw_feat.tc_cnt;
+
+       for (i = 0, queue = 0; i < pdata->hw_feat.tc_cnt; i++) {
+               for (j = 0; j < qptc; j++) {
+                       netif_dbg(pdata, drv, pdata->netdev,
+                                 "TXq%u mapped to TC%u\n", queue, i);
+                       regval = readl(XLGMAC_MTL_REG(pdata, queue,
+                                                     MTL_Q_TQOMR));
+                       regval = XLGMAC_SET_REG_BITS(regval,
+                                                    MTL_Q_TQOMR_Q2TCMAP_POS,
+                                                    MTL_Q_TQOMR_Q2TCMAP_LEN,
+                                                    i);
+                       writel(regval, XLGMAC_MTL_REG(pdata, queue,
+                                                     MTL_Q_TQOMR));
+                       queue++;
+               }
+
+               if (i < qptc_extra) {
+                       netif_dbg(pdata, drv, pdata->netdev,
+                                 "TXq%u mapped to TC%u\n", queue, i);
+                       regval = readl(XLGMAC_MTL_REG(pdata, queue,
+                                                     MTL_Q_TQOMR));
+                       regval = XLGMAC_SET_REG_BITS(regval,
+                                                    MTL_Q_TQOMR_Q2TCMAP_POS,
+                                                    MTL_Q_TQOMR_Q2TCMAP_LEN,
+                                                    i);
+                       writel(regval, XLGMAC_MTL_REG(pdata, queue,
+                                                     MTL_Q_TQOMR));
+                       queue++;
+               }
+       }
+
+       /* Map the 8 VLAN priority values to available MTL Rx queues */
+       prio_queues = min_t(unsigned int, IEEE_8021QAZ_MAX_TCS,
+                           pdata->rx_q_count);
+       ppq = IEEE_8021QAZ_MAX_TCS / prio_queues;
+       ppq_extra = IEEE_8021QAZ_MAX_TCS % prio_queues;
+
+       reg = MAC_RQC2R;
+       regval = 0;
+       for (i = 0, prio = 0; i < prio_queues;) {
+               mask = 0;
+               for (j = 0; j < ppq; j++) {
+                       netif_dbg(pdata, drv, pdata->netdev,
+                                 "PRIO%u mapped to RXq%u\n", prio, i);
+                       mask |= (1 << prio);
+                       prio++;
+               }
+
+               if (i < ppq_extra) {
+                       netif_dbg(pdata, drv, pdata->netdev,
+                                 "PRIO%u mapped to RXq%u\n", prio, i);
+                       mask |= (1 << prio);
+                       prio++;
+               }
+
+               regval |= (mask << ((i++ % MAC_RQC2_Q_PER_REG) << 3));
+
+               if ((i % MAC_RQC2_Q_PER_REG) && (i != prio_queues))
+                       continue;
+
+               writel(regval, pdata->mac_regs + reg);
+               reg += MAC_RQC2_INC;
+               regval = 0;
+       }
+
+       /* Configure one to one, MTL Rx queue to DMA Rx channel mapping
+        *  ie Q0 <--> CH0, Q1 <--> CH1 ... Q11 <--> CH11
+        */
+       reg = MTL_RQDCM0R;
+       regval = readl(pdata->mac_regs + reg);
+       regval |= (MTL_RQDCM0R_Q0MDMACH | MTL_RQDCM0R_Q1MDMACH |
+                   MTL_RQDCM0R_Q2MDMACH | MTL_RQDCM0R_Q3MDMACH);
+       writel(regval, pdata->mac_regs + reg);
+
+       reg += MTL_RQDCM_INC;
+       regval = readl(pdata->mac_regs + reg);
+       regval |= (MTL_RQDCM1R_Q4MDMACH | MTL_RQDCM1R_Q5MDMACH |
+                   MTL_RQDCM1R_Q6MDMACH | MTL_RQDCM1R_Q7MDMACH);
+       writel(regval, pdata->mac_regs + reg);
+
+       reg += MTL_RQDCM_INC;
+       regval = readl(pdata->mac_regs + reg);
+       regval |= (MTL_RQDCM2R_Q8MDMACH | MTL_RQDCM2R_Q9MDMACH |
+                   MTL_RQDCM2R_Q10MDMACH | MTL_RQDCM2R_Q11MDMACH);
+       writel(regval, pdata->mac_regs + reg);
+}
+
+static unsigned int xlgmac_calculate_per_queue_fifo(
+                                       unsigned int fifo_size,
+                                       unsigned int queue_count)
+{
+       unsigned int q_fifo_size;
+       unsigned int p_fifo;
+
+       /* Calculate the configured fifo size */
+       q_fifo_size = 1 << (fifo_size + 7);
+
+       /* The configured value may not be the actual amount of fifo RAM */
+       q_fifo_size = min_t(unsigned int, XLGMAC_MAX_FIFO, q_fifo_size);
+
+       q_fifo_size = q_fifo_size / queue_count;
+
+       /* Each increment in the queue fifo size represents 256 bytes of
+        * fifo, with 0 representing 256 bytes. Distribute the fifo equally
+        * between the queues.
+        */
+       p_fifo = q_fifo_size / 256;
+       if (p_fifo)
+               p_fifo--;
+
+       return p_fifo;
+}
+
+static void xlgmac_config_tx_fifo_size(struct xlgmac_pdata *pdata)
+{
+       unsigned int fifo_size;
+       unsigned int i;
+       u32 regval;
+
+       fifo_size = xlgmac_calculate_per_queue_fifo(
+                               pdata->hw_feat.tx_fifo_size,
+                               pdata->tx_q_count);
+
+       for (i = 0; i < pdata->tx_q_count; i++) {
+               regval = readl(XLGMAC_MTL_REG(pdata, i, MTL_Q_TQOMR));
+               regval = XLGMAC_SET_REG_BITS(regval, MTL_Q_TQOMR_TQS_POS,
+                                            MTL_Q_TQOMR_TQS_LEN, fifo_size);
+               writel(regval, XLGMAC_MTL_REG(pdata, i, MTL_Q_TQOMR));
+       }
+
+       netif_info(pdata, drv, pdata->netdev,
+                  "%d Tx hardware queues, %d byte fifo per queue\n",
+                  pdata->tx_q_count, ((fifo_size + 1) * 256));
+}
+
+static void xlgmac_config_rx_fifo_size(struct xlgmac_pdata *pdata)
+{
+       unsigned int fifo_size;
+       unsigned int i;
+       u32 regval;
+
+       fifo_size = xlgmac_calculate_per_queue_fifo(
+                                       pdata->hw_feat.rx_fifo_size,
+                                       pdata->rx_q_count);
+
+       for (i = 0; i < pdata->rx_q_count; i++) {
+               regval = readl(XLGMAC_MTL_REG(pdata, i, MTL_Q_RQOMR));
+               regval = XLGMAC_SET_REG_BITS(regval, MTL_Q_RQOMR_RQS_POS,
+                                            MTL_Q_RQOMR_RQS_LEN, fifo_size);
+               writel(regval, XLGMAC_MTL_REG(pdata, i, MTL_Q_RQOMR));
+       }
+
+       netif_info(pdata, drv, pdata->netdev,
+                  "%d Rx hardware queues, %d byte fifo per queue\n",
+                  pdata->rx_q_count, ((fifo_size + 1) * 256));
+}
+
+static void xlgmac_config_flow_control_threshold(struct xlgmac_pdata *pdata)
+{
+       unsigned int i;
+       u32 regval;
+
+       for (i = 0; i < pdata->rx_q_count; i++) {
+               regval = readl(XLGMAC_MTL_REG(pdata, i, MTL_Q_RQFCR));
+               /* Activate flow control when less than 4k left in fifo */
+               regval = XLGMAC_SET_REG_BITS(regval, MTL_Q_RQFCR_RFA_POS,
+                                            MTL_Q_RQFCR_RFA_LEN, 2);
+               /* De-activate flow control when more than 6k left in fifo */
+               regval = XLGMAC_SET_REG_BITS(regval, MTL_Q_RQFCR_RFD_POS,
+                                            MTL_Q_RQFCR_RFD_LEN, 4);
+               writel(regval, XLGMAC_MTL_REG(pdata, i, MTL_Q_RQFCR));
+       }
+}
+
+static int xlgmac_config_tx_threshold(struct xlgmac_pdata *pdata,
+                                     unsigned int val)
+{
+       unsigned int i;
+       u32 regval;
+
+       for (i = 0; i < pdata->tx_q_count; i++) {
+               regval = readl(XLGMAC_MTL_REG(pdata, i, MTL_Q_TQOMR));
+               regval = XLGMAC_SET_REG_BITS(regval, MTL_Q_TQOMR_TTC_POS,
+                                            MTL_Q_TQOMR_TTC_LEN, val);
+               writel(regval, XLGMAC_MTL_REG(pdata, i, MTL_Q_TQOMR));
+       }
+
+       return 0;
+}
+
+static int xlgmac_config_rsf_mode(struct xlgmac_pdata *pdata,
+                                 unsigned int val)
+{
+       unsigned int i;
+       u32 regval;
+
+       for (i = 0; i < pdata->rx_q_count; i++) {
+               regval = readl(XLGMAC_MTL_REG(pdata, i, MTL_Q_RQOMR));
+               regval = XLGMAC_SET_REG_BITS(regval, MTL_Q_RQOMR_RSF_POS,
+                                            MTL_Q_RQOMR_RSF_LEN, val);
+               writel(regval, XLGMAC_MTL_REG(pdata, i, MTL_Q_RQOMR));
+       }
+
+       return 0;
+}
+
+static int xlgmac_config_tsf_mode(struct xlgmac_pdata *pdata,
+                                 unsigned int val)
+{
+       unsigned int i;
+       u32 regval;
+
+       for (i = 0; i < pdata->tx_q_count; i++) {
+               regval = readl(XLGMAC_MTL_REG(pdata, i, MTL_Q_TQOMR));
+               regval = XLGMAC_SET_REG_BITS(regval, MTL_Q_TQOMR_TSF_POS,
+                                            MTL_Q_TQOMR_TSF_LEN, val);
+               writel(regval, XLGMAC_MTL_REG(pdata, i, MTL_Q_TQOMR));
+       }
+
+       return 0;
+}
+
+static int xlgmac_config_osp_mode(struct xlgmac_pdata *pdata)
+{
+       struct xlgmac_channel *channel;
+       unsigned int i;
+       u32 regval;
+
+       channel = pdata->channel_head;
+       for (i = 0; i < pdata->channel_count; i++, channel++) {
+               if (!channel->tx_ring)
+                       break;
+
+               regval = readl(XLGMAC_DMA_REG(channel, DMA_CH_TCR));
+               regval = XLGMAC_SET_REG_BITS(regval, DMA_CH_TCR_OSP_POS,
+                                            DMA_CH_TCR_OSP_LEN,
+                                       pdata->tx_osp_mode);
+               writel(regval, XLGMAC_DMA_REG(channel, DMA_CH_TCR));
+       }
+
+       return 0;
+}
+
+static int xlgmac_config_pblx8(struct xlgmac_pdata *pdata)
+{
+       struct xlgmac_channel *channel;
+       unsigned int i;
+       u32 regval;
+
+       channel = pdata->channel_head;
+       for (i = 0; i < pdata->channel_count; i++, channel++) {
+               regval = readl(XLGMAC_DMA_REG(channel, DMA_CH_CR));
+               regval = XLGMAC_SET_REG_BITS(regval, DMA_CH_CR_PBLX8_POS,
+                                            DMA_CH_CR_PBLX8_LEN,
+                                       pdata->pblx8);
+               writel(regval, XLGMAC_DMA_REG(channel, DMA_CH_CR));
+       }
+
+       return 0;
+}
+
+static int xlgmac_get_tx_pbl_val(struct xlgmac_pdata *pdata)
+{
+       u32 regval;
+
+       regval = readl(XLGMAC_DMA_REG(pdata->channel_head, DMA_CH_TCR));
+       regval = XLGMAC_GET_REG_BITS(regval, DMA_CH_TCR_PBL_POS,
+                                    DMA_CH_TCR_PBL_LEN);
+       return regval;
+}
+
+static int xlgmac_config_tx_pbl_val(struct xlgmac_pdata *pdata)
+{
+       struct xlgmac_channel *channel;
+       unsigned int i;
+       u32 regval;
+
+       channel = pdata->channel_head;
+       for (i = 0; i < pdata->channel_count; i++, channel++) {
+               if (!channel->tx_ring)
+                       break;
+
+               regval = readl(XLGMAC_DMA_REG(channel, DMA_CH_TCR));
+               regval = XLGMAC_SET_REG_BITS(regval, DMA_CH_TCR_PBL_POS,
+                                            DMA_CH_TCR_PBL_LEN,
+                                       pdata->tx_pbl);
+               writel(regval, XLGMAC_DMA_REG(channel, DMA_CH_TCR));
+       }
+
+       return 0;
+}
+
+static int xlgmac_get_rx_pbl_val(struct xlgmac_pdata *pdata)
+{
+       u32 regval;
+
+       regval = readl(XLGMAC_DMA_REG(pdata->channel_head, DMA_CH_RCR));
+       regval = XLGMAC_GET_REG_BITS(regval, DMA_CH_RCR_PBL_POS,
+                                    DMA_CH_RCR_PBL_LEN);
+       return regval;
+}
+
+static int xlgmac_config_rx_pbl_val(struct xlgmac_pdata *pdata)
+{
+       struct xlgmac_channel *channel;
+       unsigned int i;
+       u32 regval;
+
+       channel = pdata->channel_head;
+       for (i = 0; i < pdata->channel_count; i++, channel++) {
+               if (!channel->rx_ring)
+                       break;
+
+               regval = readl(XLGMAC_DMA_REG(channel, DMA_CH_RCR));
+               regval = XLGMAC_SET_REG_BITS(regval, DMA_CH_RCR_PBL_POS,
+                                            DMA_CH_RCR_PBL_LEN,
+                                       pdata->rx_pbl);
+               writel(regval, XLGMAC_DMA_REG(channel, DMA_CH_RCR));
+       }
+
+       return 0;
+}
+
+static u64 xlgmac_mmc_read(struct xlgmac_pdata *pdata, unsigned int reg_lo)
+{
+       bool read_hi;
+       u64 val;
+
+       switch (reg_lo) {
+       /* These registers are always 64 bit */
+       case MMC_TXOCTETCOUNT_GB_LO:
+       case MMC_TXOCTETCOUNT_G_LO:
+       case MMC_RXOCTETCOUNT_GB_LO:
+       case MMC_RXOCTETCOUNT_G_LO:
+               read_hi = true;
+               break;
+
+       default:
+               read_hi = false;
+       }
+
+       val = (u64)readl(pdata->mac_regs + reg_lo);
+
+       if (read_hi)
+               val |= ((u64)readl(pdata->mac_regs + reg_lo + 4) << 32);
+
+       return val;
+}
+
+static void xlgmac_tx_mmc_int(struct xlgmac_pdata *pdata)
+{
+       unsigned int mmc_isr = readl(pdata->mac_regs + MMC_TISR);
+       struct xlgmac_stats *stats = &pdata->stats;
+
+       if (XLGMAC_GET_REG_BITS(mmc_isr,
+                               MMC_TISR_TXOCTETCOUNT_GB_POS,
+                               MMC_TISR_TXOCTETCOUNT_GB_LEN))
+               stats->txoctetcount_gb +=
+                       xlgmac_mmc_read(pdata, MMC_TXOCTETCOUNT_GB_LO);
+
+       if (XLGMAC_GET_REG_BITS(mmc_isr,
+                               MMC_TISR_TXFRAMECOUNT_GB_POS,
+                               MMC_TISR_TXFRAMECOUNT_GB_LEN))
+               stats->txframecount_gb +=
+                       xlgmac_mmc_read(pdata, MMC_TXFRAMECOUNT_GB_LO);
+
+       if (XLGMAC_GET_REG_BITS(mmc_isr,
+                               MMC_TISR_TXBROADCASTFRAMES_G_POS,
+                               MMC_TISR_TXBROADCASTFRAMES_G_LEN))
+               stats->txbroadcastframes_g +=
+                       xlgmac_mmc_read(pdata, MMC_TXBROADCASTFRAMES_G_LO);
+
+       if (XLGMAC_GET_REG_BITS(mmc_isr,
+                               MMC_TISR_TXMULTICASTFRAMES_G_POS,
+                               MMC_TISR_TXMULTICASTFRAMES_G_LEN))
+               stats->txmulticastframes_g +=
+                       xlgmac_mmc_read(pdata, MMC_TXMULTICASTFRAMES_G_LO);
+
+       if (XLGMAC_GET_REG_BITS(mmc_isr,
+                               MMC_TISR_TX64OCTETS_GB_POS,
+                               MMC_TISR_TX64OCTETS_GB_LEN))
+               stats->tx64octets_gb +=
+                       xlgmac_mmc_read(pdata, MMC_TX64OCTETS_GB_LO);
+
+       if (XLGMAC_GET_REG_BITS(mmc_isr,
+                               MMC_TISR_TX65TO127OCTETS_GB_POS,
+                               MMC_TISR_TX65TO127OCTETS_GB_LEN))
+               stats->tx65to127octets_gb +=
+                       xlgmac_mmc_read(pdata, MMC_TX65TO127OCTETS_GB_LO);
+
+       if (XLGMAC_GET_REG_BITS(mmc_isr,
+                               MMC_TISR_TX128TO255OCTETS_GB_POS,
+                               MMC_TISR_TX128TO255OCTETS_GB_LEN))
+               stats->tx128to255octets_gb +=
+                       xlgmac_mmc_read(pdata, MMC_TX128TO255OCTETS_GB_LO);
+
+       if (XLGMAC_GET_REG_BITS(mmc_isr,
+                               MMC_TISR_TX256TO511OCTETS_GB_POS,
+                               MMC_TISR_TX256TO511OCTETS_GB_LEN))
+               stats->tx256to511octets_gb +=
+                       xlgmac_mmc_read(pdata, MMC_TX256TO511OCTETS_GB_LO);
+
+       if (XLGMAC_GET_REG_BITS(mmc_isr,
+                               MMC_TISR_TX512TO1023OCTETS_GB_POS,
+                               MMC_TISR_TX512TO1023OCTETS_GB_LEN))
+               stats->tx512to1023octets_gb +=
+                       xlgmac_mmc_read(pdata, MMC_TX512TO1023OCTETS_GB_LO);
+
+       if (XLGMAC_GET_REG_BITS(mmc_isr,
+                               MMC_TISR_TX1024TOMAXOCTETS_GB_POS,
+                               MMC_TISR_TX1024TOMAXOCTETS_GB_LEN))
+               stats->tx1024tomaxoctets_gb +=
+                       xlgmac_mmc_read(pdata, MMC_TX1024TOMAXOCTETS_GB_LO);
+
+       if (XLGMAC_GET_REG_BITS(mmc_isr,
+                               MMC_TISR_TXUNICASTFRAMES_GB_POS,
+                               MMC_TISR_TXUNICASTFRAMES_GB_LEN))
+               stats->txunicastframes_gb +=
+                       xlgmac_mmc_read(pdata, MMC_TXUNICASTFRAMES_GB_LO);
+
+       if (XLGMAC_GET_REG_BITS(mmc_isr,
+                               MMC_TISR_TXMULTICASTFRAMES_GB_POS,
+                               MMC_TISR_TXMULTICASTFRAMES_GB_LEN))
+               stats->txmulticastframes_gb +=
+                       xlgmac_mmc_read(pdata, MMC_TXMULTICASTFRAMES_GB_LO);
+
+       if (XLGMAC_GET_REG_BITS(mmc_isr,
+                               MMC_TISR_TXBROADCASTFRAMES_GB_POS,
+                               MMC_TISR_TXBROADCASTFRAMES_GB_LEN))
+               stats->txbroadcastframes_g +=
+                       xlgmac_mmc_read(pdata, MMC_TXBROADCASTFRAMES_GB_LO);
+
+       if (XLGMAC_GET_REG_BITS(mmc_isr,
+                               MMC_TISR_TXUNDERFLOWERROR_POS,
+                               MMC_TISR_TXUNDERFLOWERROR_LEN))
+               stats->txunderflowerror +=
+                       xlgmac_mmc_read(pdata, MMC_TXUNDERFLOWERROR_LO);
+
+       if (XLGMAC_GET_REG_BITS(mmc_isr,
+                               MMC_TISR_TXOCTETCOUNT_G_POS,
+                               MMC_TISR_TXOCTETCOUNT_G_LEN))
+               stats->txoctetcount_g +=
+                       xlgmac_mmc_read(pdata, MMC_TXOCTETCOUNT_G_LO);
+
+       if (XLGMAC_GET_REG_BITS(mmc_isr,
+                               MMC_TISR_TXFRAMECOUNT_G_POS,
+                               MMC_TISR_TXFRAMECOUNT_G_LEN))
+               stats->txframecount_g +=
+                       xlgmac_mmc_read(pdata, MMC_TXFRAMECOUNT_G_LO);
+
+       if (XLGMAC_GET_REG_BITS(mmc_isr,
+                               MMC_TISR_TXPAUSEFRAMES_POS,
+                               MMC_TISR_TXPAUSEFRAMES_LEN))
+               stats->txpauseframes +=
+                       xlgmac_mmc_read(pdata, MMC_TXPAUSEFRAMES_LO);
+
+       if (XLGMAC_GET_REG_BITS(mmc_isr,
+                               MMC_TISR_TXVLANFRAMES_G_POS,
+                               MMC_TISR_TXVLANFRAMES_G_LEN))
+               stats->txvlanframes_g +=
+                       xlgmac_mmc_read(pdata, MMC_TXVLANFRAMES_G_LO);
+}
+
+static void xlgmac_rx_mmc_int(struct xlgmac_pdata *pdata)
+{
+       unsigned int mmc_isr = readl(pdata->mac_regs + MMC_RISR);
+       struct xlgmac_stats *stats = &pdata->stats;
+
+       if (XLGMAC_GET_REG_BITS(mmc_isr,
+                               MMC_RISR_RXFRAMECOUNT_GB_POS,
+                               MMC_RISR_RXFRAMECOUNT_GB_LEN))
+               stats->rxframecount_gb +=
+                       xlgmac_mmc_read(pdata, MMC_RXFRAMECOUNT_GB_LO);
+
+       if (XLGMAC_GET_REG_BITS(mmc_isr,
+                               MMC_RISR_RXOCTETCOUNT_GB_POS,
+                               MMC_RISR_RXOCTETCOUNT_GB_LEN))
+               stats->rxoctetcount_gb +=
+                       xlgmac_mmc_read(pdata, MMC_RXOCTETCOUNT_GB_LO);
+
+       if (XLGMAC_GET_REG_BITS(mmc_isr,
+                               MMC_RISR_RXOCTETCOUNT_G_POS,
+                               MMC_RISR_RXOCTETCOUNT_G_LEN))
+               stats->rxoctetcount_g +=
+                       xlgmac_mmc_read(pdata, MMC_RXOCTETCOUNT_G_LO);
+
+       if (XLGMAC_GET_REG_BITS(mmc_isr,
+                               MMC_RISR_RXBROADCASTFRAMES_G_POS,
+                               MMC_RISR_RXBROADCASTFRAMES_G_LEN))
+               stats->rxbroadcastframes_g +=
+                       xlgmac_mmc_read(pdata, MMC_RXBROADCASTFRAMES_G_LO);
+
+       if (XLGMAC_GET_REG_BITS(mmc_isr,
+                               MMC_RISR_RXMULTICASTFRAMES_G_POS,
+                               MMC_RISR_RXMULTICASTFRAMES_G_LEN))
+               stats->rxmulticastframes_g +=
+                       xlgmac_mmc_read(pdata, MMC_RXMULTICASTFRAMES_G_LO);
+
+       if (XLGMAC_GET_REG_BITS(mmc_isr,
+                               MMC_RISR_RXCRCERROR_POS,
+                               MMC_RISR_RXCRCERROR_LEN))
+               stats->rxcrcerror +=
+                       xlgmac_mmc_read(pdata, MMC_RXCRCERROR_LO);
+
+       if (XLGMAC_GET_REG_BITS(mmc_isr,
+                               MMC_RISR_RXRUNTERROR_POS,
+                               MMC_RISR_RXRUNTERROR_LEN))
+               stats->rxrunterror +=
+                       xlgmac_mmc_read(pdata, MMC_RXRUNTERROR);
+
+       if (XLGMAC_GET_REG_BITS(mmc_isr,
+                               MMC_RISR_RXJABBERERROR_POS,
+                               MMC_RISR_RXJABBERERROR_LEN))
+               stats->rxjabbererror +=
+                       xlgmac_mmc_read(pdata, MMC_RXJABBERERROR);
+
+       if (XLGMAC_GET_REG_BITS(mmc_isr,
+                               MMC_RISR_RXUNDERSIZE_G_POS,
+                               MMC_RISR_RXUNDERSIZE_G_LEN))
+               stats->rxundersize_g +=
+                       xlgmac_mmc_read(pdata, MMC_RXUNDERSIZE_G);
+
+       if (XLGMAC_GET_REG_BITS(mmc_isr,
+                               MMC_RISR_RXOVERSIZE_G_POS,
+                               MMC_RISR_RXOVERSIZE_G_LEN))
+               stats->rxoversize_g +=
+                       xlgmac_mmc_read(pdata, MMC_RXOVERSIZE_G);
+
+       if (XLGMAC_GET_REG_BITS(mmc_isr,
+                               MMC_RISR_RX64OCTETS_GB_POS,
+                               MMC_RISR_RX64OCTETS_GB_LEN))
+               stats->rx64octets_gb +=
+                       xlgmac_mmc_read(pdata, MMC_RX64OCTETS_GB_LO);
+
+       if (XLGMAC_GET_REG_BITS(mmc_isr,
+                               MMC_RISR_RX65TO127OCTETS_GB_POS,
+                               MMC_RISR_RX65TO127OCTETS_GB_LEN))
+               stats->rx65to127octets_gb +=
+                       xlgmac_mmc_read(pdata, MMC_RX65TO127OCTETS_GB_LO);
+
+       if (XLGMAC_GET_REG_BITS(mmc_isr,
+                               MMC_RISR_RX128TO255OCTETS_GB_POS,
+                               MMC_RISR_RX128TO255OCTETS_GB_LEN))
+               stats->rx128to255octets_gb +=
+                       xlgmac_mmc_read(pdata, MMC_RX128TO255OCTETS_GB_LO);
+
+       if (XLGMAC_GET_REG_BITS(mmc_isr,
+                               MMC_RISR_RX256TO511OCTETS_GB_POS,
+                               MMC_RISR_RX256TO511OCTETS_GB_LEN))
+               stats->rx256to511octets_gb +=
+                       xlgmac_mmc_read(pdata, MMC_RX256TO511OCTETS_GB_LO);
+
+       if (XLGMAC_GET_REG_BITS(mmc_isr,
+                               MMC_RISR_RX512TO1023OCTETS_GB_POS,
+                               MMC_RISR_RX512TO1023OCTETS_GB_LEN))
+               stats->rx512to1023octets_gb +=
+                       xlgmac_mmc_read(pdata, MMC_RX512TO1023OCTETS_GB_LO);
+
+       if (XLGMAC_GET_REG_BITS(mmc_isr,
+                               MMC_RISR_RX1024TOMAXOCTETS_GB_POS,
+                               MMC_RISR_RX1024TOMAXOCTETS_GB_LEN))
+               stats->rx1024tomaxoctets_gb +=
+                       xlgmac_mmc_read(pdata, MMC_RX1024TOMAXOCTETS_GB_LO);
+
+       if (XLGMAC_GET_REG_BITS(mmc_isr,
+                               MMC_RISR_RXUNICASTFRAMES_G_POS,
+                               MMC_RISR_RXUNICASTFRAMES_G_LEN))
+               stats->rxunicastframes_g +=
+                       xlgmac_mmc_read(pdata, MMC_RXUNICASTFRAMES_G_LO);
+
+       if (XLGMAC_GET_REG_BITS(mmc_isr,
+                               MMC_RISR_RXLENGTHERROR_POS,
+                               MMC_RISR_RXLENGTHERROR_LEN))
+               stats->rxlengtherror +=
+                       xlgmac_mmc_read(pdata, MMC_RXLENGTHERROR_LO);
+
+       if (XLGMAC_GET_REG_BITS(mmc_isr,
+                               MMC_RISR_RXOUTOFRANGETYPE_POS,
+                               MMC_RISR_RXOUTOFRANGETYPE_LEN))
+               stats->rxoutofrangetype +=
+                       xlgmac_mmc_read(pdata, MMC_RXOUTOFRANGETYPE_LO);
+
+       if (XLGMAC_GET_REG_BITS(mmc_isr,
+                               MMC_RISR_RXPAUSEFRAMES_POS,
+                               MMC_RISR_RXPAUSEFRAMES_LEN))
+               stats->rxpauseframes +=
+                       xlgmac_mmc_read(pdata, MMC_RXPAUSEFRAMES_LO);
+
+       if (XLGMAC_GET_REG_BITS(mmc_isr,
+                               MMC_RISR_RXFIFOOVERFLOW_POS,
+                               MMC_RISR_RXFIFOOVERFLOW_LEN))
+               stats->rxfifooverflow +=
+                       xlgmac_mmc_read(pdata, MMC_RXFIFOOVERFLOW_LO);
+
+       if (XLGMAC_GET_REG_BITS(mmc_isr,
+                               MMC_RISR_RXVLANFRAMES_GB_POS,
+                               MMC_RISR_RXVLANFRAMES_GB_LEN))
+               stats->rxvlanframes_gb +=
+                       xlgmac_mmc_read(pdata, MMC_RXVLANFRAMES_GB_LO);
+
+       if (XLGMAC_GET_REG_BITS(mmc_isr,
+                               MMC_RISR_RXWATCHDOGERROR_POS,
+                               MMC_RISR_RXWATCHDOGERROR_LEN))
+               stats->rxwatchdogerror +=
+                       xlgmac_mmc_read(pdata, MMC_RXWATCHDOGERROR);
+}
+
+static void xlgmac_read_mmc_stats(struct xlgmac_pdata *pdata)
+{
+       struct xlgmac_stats *stats = &pdata->stats;
+       u32 regval;
+
+       /* Freeze counters */
+       regval = readl(pdata->mac_regs + MMC_CR);
+       regval = XLGMAC_SET_REG_BITS(regval, MMC_CR_MCF_POS,
+                                    MMC_CR_MCF_LEN, 1);
+       writel(regval, pdata->mac_regs + MMC_CR);
+
+       stats->txoctetcount_gb +=
+               xlgmac_mmc_read(pdata, MMC_TXOCTETCOUNT_GB_LO);
+
+       stats->txframecount_gb +=
+               xlgmac_mmc_read(pdata, MMC_TXFRAMECOUNT_GB_LO);
+
+       stats->txbroadcastframes_g +=
+               xlgmac_mmc_read(pdata, MMC_TXBROADCASTFRAMES_G_LO);
+
+       stats->txmulticastframes_g +=
+               xlgmac_mmc_read(pdata, MMC_TXMULTICASTFRAMES_G_LO);
+
+       stats->tx64octets_gb +=
+               xlgmac_mmc_read(pdata, MMC_TX64OCTETS_GB_LO);
+
+       stats->tx65to127octets_gb +=
+               xlgmac_mmc_read(pdata, MMC_TX65TO127OCTETS_GB_LO);
+
+       stats->tx128to255octets_gb +=
+               xlgmac_mmc_read(pdata, MMC_TX128TO255OCTETS_GB_LO);
+
+       stats->tx256to511octets_gb +=
+               xlgmac_mmc_read(pdata, MMC_TX256TO511OCTETS_GB_LO);
+
+       stats->tx512to1023octets_gb +=
+               xlgmac_mmc_read(pdata, MMC_TX512TO1023OCTETS_GB_LO);
+
+       stats->tx1024tomaxoctets_gb +=
+               xlgmac_mmc_read(pdata, MMC_TX1024TOMAXOCTETS_GB_LO);
+
+       stats->txunicastframes_gb +=
+               xlgmac_mmc_read(pdata, MMC_TXUNICASTFRAMES_GB_LO);
+
+       stats->txmulticastframes_gb +=
+               xlgmac_mmc_read(pdata, MMC_TXMULTICASTFRAMES_GB_LO);
+
+       stats->txbroadcastframes_g +=
+               xlgmac_mmc_read(pdata, MMC_TXBROADCASTFRAMES_GB_LO);
+
+       stats->txunderflowerror +=
+               xlgmac_mmc_read(pdata, MMC_TXUNDERFLOWERROR_LO);
+
+       stats->txoctetcount_g +=
+               xlgmac_mmc_read(pdata, MMC_TXOCTETCOUNT_G_LO);
+
+       stats->txframecount_g +=
+               xlgmac_mmc_read(pdata, MMC_TXFRAMECOUNT_G_LO);
+
+       stats->txpauseframes +=
+               xlgmac_mmc_read(pdata, MMC_TXPAUSEFRAMES_LO);
+
+       stats->txvlanframes_g +=
+               xlgmac_mmc_read(pdata, MMC_TXVLANFRAMES_G_LO);
+
+       stats->rxframecount_gb +=
+               xlgmac_mmc_read(pdata, MMC_RXFRAMECOUNT_GB_LO);
+
+       stats->rxoctetcount_gb +=
+               xlgmac_mmc_read(pdata, MMC_RXOCTETCOUNT_GB_LO);
+
+       stats->rxoctetcount_g +=
+               xlgmac_mmc_read(pdata, MMC_RXOCTETCOUNT_G_LO);
+
+       stats->rxbroadcastframes_g +=
+               xlgmac_mmc_read(pdata, MMC_RXBROADCASTFRAMES_G_LO);
+
+       stats->rxmulticastframes_g +=
+               xlgmac_mmc_read(pdata, MMC_RXMULTICASTFRAMES_G_LO);
+
+       stats->rxcrcerror +=
+               xlgmac_mmc_read(pdata, MMC_RXCRCERROR_LO);
+
+       stats->rxrunterror +=
+               xlgmac_mmc_read(pdata, MMC_RXRUNTERROR);
+
+       stats->rxjabbererror +=
+               xlgmac_mmc_read(pdata, MMC_RXJABBERERROR);
+
+       stats->rxundersize_g +=
+               xlgmac_mmc_read(pdata, MMC_RXUNDERSIZE_G);
+
+       stats->rxoversize_g +=
+               xlgmac_mmc_read(pdata, MMC_RXOVERSIZE_G);
+
+       stats->rx64octets_gb +=
+               xlgmac_mmc_read(pdata, MMC_RX64OCTETS_GB_LO);
+
+       stats->rx65to127octets_gb +=
+               xlgmac_mmc_read(pdata, MMC_RX65TO127OCTETS_GB_LO);
+
+       stats->rx128to255octets_gb +=
+               xlgmac_mmc_read(pdata, MMC_RX128TO255OCTETS_GB_LO);
+
+       stats->rx256to511octets_gb +=
+               xlgmac_mmc_read(pdata, MMC_RX256TO511OCTETS_GB_LO);
+
+       stats->rx512to1023octets_gb +=
+               xlgmac_mmc_read(pdata, MMC_RX512TO1023OCTETS_GB_LO);
+
+       stats->rx1024tomaxoctets_gb +=
+               xlgmac_mmc_read(pdata, MMC_RX1024TOMAXOCTETS_GB_LO);
+
+       stats->rxunicastframes_g +=
+               xlgmac_mmc_read(pdata, MMC_RXUNICASTFRAMES_G_LO);
+
+       stats->rxlengtherror +=
+               xlgmac_mmc_read(pdata, MMC_RXLENGTHERROR_LO);
+
+       stats->rxoutofrangetype +=
+               xlgmac_mmc_read(pdata, MMC_RXOUTOFRANGETYPE_LO);
+
+       stats->rxpauseframes +=
+               xlgmac_mmc_read(pdata, MMC_RXPAUSEFRAMES_LO);
+
+       stats->rxfifooverflow +=
+               xlgmac_mmc_read(pdata, MMC_RXFIFOOVERFLOW_LO);
+
+       stats->rxvlanframes_gb +=
+               xlgmac_mmc_read(pdata, MMC_RXVLANFRAMES_GB_LO);
+
+       stats->rxwatchdogerror +=
+               xlgmac_mmc_read(pdata, MMC_RXWATCHDOGERROR);
+
+       /* Un-freeze counters */
+       regval = readl(pdata->mac_regs + MMC_CR);
+       regval = XLGMAC_SET_REG_BITS(regval, MMC_CR_MCF_POS,
+                                    MMC_CR_MCF_LEN, 0);
+       writel(regval, pdata->mac_regs + MMC_CR);
+}
+
+static void xlgmac_config_mmc(struct xlgmac_pdata *pdata)
+{
+       u32 regval;
+
+       regval = readl(pdata->mac_regs + MMC_CR);
+       /* Set counters to reset on read */
+       regval = XLGMAC_SET_REG_BITS(regval, MMC_CR_ROR_POS,
+                                    MMC_CR_ROR_LEN, 1);
+       /* Reset the counters */
+       regval = XLGMAC_SET_REG_BITS(regval, MMC_CR_CR_POS,
+                                    MMC_CR_CR_LEN, 1);
+       writel(regval, pdata->mac_regs + MMC_CR);
+}
+
+static int xlgmac_write_rss_reg(struct xlgmac_pdata *pdata, unsigned int type,
+                               unsigned int index, unsigned int val)
+{
+       unsigned int wait;
+       int ret = 0;
+       u32 regval;
+
+       mutex_lock(&pdata->rss_mutex);
+
+       regval = XLGMAC_GET_REG_BITS(readl(pdata->mac_regs + MAC_RSSAR),
+                                    MAC_RSSAR_OB_POS, MAC_RSSAR_OB_LEN);
+       if (regval) {
+               ret = -EBUSY;
+               goto unlock;
+       }
+
+       writel(val, pdata->mac_regs + MAC_RSSDR);
+
+       regval = readl(pdata->mac_regs + MAC_RSSAR);
+       regval = XLGMAC_SET_REG_BITS(regval, MAC_RSSAR_RSSIA_POS,
+                                    MAC_RSSAR_RSSIA_LEN, index);
+       regval = XLGMAC_SET_REG_BITS(regval, MAC_RSSAR_ADDRT_POS,
+                                    MAC_RSSAR_ADDRT_LEN, type);
+       regval = XLGMAC_SET_REG_BITS(regval, MAC_RSSAR_CT_POS,
+                                    MAC_RSSAR_CT_LEN, 0);
+       regval = XLGMAC_SET_REG_BITS(regval, MAC_RSSAR_OB_POS,
+                                    MAC_RSSAR_OB_LEN, 1);
+       writel(regval, pdata->mac_regs + MAC_RSSAR);
+
+       wait = 1000;
+       while (wait--) {
+               regval = XLGMAC_GET_REG_BITS(readl(pdata->mac_regs + MAC_RSSAR),
+                                            MAC_RSSAR_OB_POS,
+                                            MAC_RSSAR_OB_LEN);
+               if (!regval)
+                       goto unlock;
+
+               usleep_range(1000, 1500);
+       }
+
+       ret = -EBUSY;
+
+unlock:
+       mutex_unlock(&pdata->rss_mutex);
+
+       return ret;
+}
+
+static int xlgmac_write_rss_hash_key(struct xlgmac_pdata *pdata)
+{
+       unsigned int key_regs = sizeof(pdata->rss_key) / sizeof(u32);
+       unsigned int *key = (unsigned int *)&pdata->rss_key;
+       int ret;
+
+       while (key_regs--) {
+               ret = xlgmac_write_rss_reg(pdata, XLGMAC_RSS_HASH_KEY_TYPE,
+                                          key_regs, *key++);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+
+static int xlgmac_write_rss_lookup_table(struct xlgmac_pdata *pdata)
+{
+       unsigned int i;
+       int ret;
+
+       for (i = 0; i < ARRAY_SIZE(pdata->rss_table); i++) {
+               ret = xlgmac_write_rss_reg(pdata,
+                                          XLGMAC_RSS_LOOKUP_TABLE_TYPE, i,
+                                          pdata->rss_table[i]);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+
+static int xlgmac_set_rss_hash_key(struct xlgmac_pdata *pdata, const u8 *key)
+{
+       memcpy(pdata->rss_key, key, sizeof(pdata->rss_key));
+
+       return xlgmac_write_rss_hash_key(pdata);
+}
+
+static int xlgmac_set_rss_lookup_table(struct xlgmac_pdata *pdata,
+                                      const u32 *table)
+{
+       unsigned int i;
+       u32 tval;
+
+       for (i = 0; i < ARRAY_SIZE(pdata->rss_table); i++) {
+               tval = table[i];
+               pdata->rss_table[i] = XLGMAC_SET_REG_BITS(
+                                               pdata->rss_table[i],
+                                               MAC_RSSDR_DMCH_POS,
+                                               MAC_RSSDR_DMCH_LEN,
+                                               tval);
+       }
+
+       return xlgmac_write_rss_lookup_table(pdata);
+}
+
+static int xlgmac_enable_rss(struct xlgmac_pdata *pdata)
+{
+       u32 regval;
+       int ret;
+
+       if (!pdata->hw_feat.rss)
+               return -EOPNOTSUPP;
+
+       /* Program the hash key */
+       ret = xlgmac_write_rss_hash_key(pdata);
+       if (ret)
+               return ret;
+
+       /* Program the lookup table */
+       ret = xlgmac_write_rss_lookup_table(pdata);
+       if (ret)
+               return ret;
+
+       /* Set the RSS options */
+       writel(pdata->rss_options, pdata->mac_regs + MAC_RSSCR);
+
+       /* Enable RSS */
+       regval = readl(pdata->mac_regs + MAC_RSSCR);
+       regval = XLGMAC_SET_REG_BITS(regval, MAC_RSSCR_RSSE_POS,
+                                    MAC_RSSCR_RSSE_LEN, 1);
+       writel(regval, pdata->mac_regs + MAC_RSSCR);
+
+       return 0;
+}
+
+static int xlgmac_disable_rss(struct xlgmac_pdata *pdata)
+{
+       u32 regval;
+
+       if (!pdata->hw_feat.rss)
+               return -EOPNOTSUPP;
+
+       regval = readl(pdata->mac_regs + MAC_RSSCR);
+       regval = XLGMAC_SET_REG_BITS(regval, MAC_RSSCR_RSSE_POS,
+                                    MAC_RSSCR_RSSE_LEN, 0);
+       writel(regval, pdata->mac_regs + MAC_RSSCR);
+
+       return 0;
+}
+
+static void xlgmac_config_rss(struct xlgmac_pdata *pdata)
+{
+       int ret;
+
+       if (!pdata->hw_feat.rss)
+               return;
+
+       if (pdata->netdev->features & NETIF_F_RXHASH)
+               ret = xlgmac_enable_rss(pdata);
+       else
+               ret = xlgmac_disable_rss(pdata);
+
+       if (ret)
+               netdev_err(pdata->netdev,
+                          "error configuring RSS, RSS disabled\n");
+}
+
+static void xlgmac_enable_dma_interrupts(struct xlgmac_pdata *pdata)
+{
+       unsigned int dma_ch_isr, dma_ch_ier;
+       struct xlgmac_channel *channel;
+       unsigned int i;
+
+       channel = pdata->channel_head;
+       for (i = 0; i < pdata->channel_count; i++, channel++) {
+               /* Clear all the interrupts which are set */
+               dma_ch_isr = readl(XLGMAC_DMA_REG(channel, DMA_CH_SR));
+               writel(dma_ch_isr, XLGMAC_DMA_REG(channel, DMA_CH_SR));
+
+               /* Clear all interrupt enable bits */
+               dma_ch_ier = 0;
+
+               /* Enable following interrupts
+                *   NIE  - Normal Interrupt Summary Enable
+                *   AIE  - Abnormal Interrupt Summary Enable
+                *   FBEE - Fatal Bus Error Enable
+                */
+               dma_ch_ier = XLGMAC_SET_REG_BITS(dma_ch_ier,
+                                                DMA_CH_IER_NIE_POS,
+                                       DMA_CH_IER_NIE_LEN, 1);
+               dma_ch_ier = XLGMAC_SET_REG_BITS(dma_ch_ier,
+                                                DMA_CH_IER_AIE_POS,
+                                       DMA_CH_IER_AIE_LEN, 1);
+               dma_ch_ier = XLGMAC_SET_REG_BITS(dma_ch_ier,
+                                                DMA_CH_IER_FBEE_POS,
+                                       DMA_CH_IER_FBEE_LEN, 1);
+
+               if (channel->tx_ring) {
+                       /* Enable the following Tx interrupts
+                        *   TIE  - Transmit Interrupt Enable (unless using
+                        *          per channel interrupts)
+                        */
+                       if (!pdata->per_channel_irq)
+                               dma_ch_ier = XLGMAC_SET_REG_BITS(
+                                               dma_ch_ier,
+                                               DMA_CH_IER_TIE_POS,
+                                               DMA_CH_IER_TIE_LEN,
+                                               1);
+               }
+               if (channel->rx_ring) {
+                       /* Enable following Rx interrupts
+                        *   RBUE - Receive Buffer Unavailable Enable
+                        *   RIE  - Receive Interrupt Enable (unless using
+                        *          per channel interrupts)
+                        */
+                       dma_ch_ier = XLGMAC_SET_REG_BITS(
+                                       dma_ch_ier,
+                                       DMA_CH_IER_RBUE_POS,
+                                       DMA_CH_IER_RBUE_LEN,
+                                       1);
+                       if (!pdata->per_channel_irq)
+                               dma_ch_ier = XLGMAC_SET_REG_BITS(
+                                               dma_ch_ier,
+                                               DMA_CH_IER_RIE_POS,
+                                               DMA_CH_IER_RIE_LEN,
+                                               1);
+               }
+
+               writel(dma_ch_isr, XLGMAC_DMA_REG(channel, DMA_CH_IER));
+       }
+}
+
+static void xlgmac_enable_mtl_interrupts(struct xlgmac_pdata *pdata)
+{
+       unsigned int q_count, i;
+       unsigned int mtl_q_isr;
+
+       q_count = max(pdata->hw_feat.tx_q_cnt, pdata->hw_feat.rx_q_cnt);
+       for (i = 0; i < q_count; i++) {
+               /* Clear all the interrupts which are set */
+               mtl_q_isr = readl(XLGMAC_MTL_REG(pdata, i, MTL_Q_ISR));
+               writel(mtl_q_isr, XLGMAC_MTL_REG(pdata, i, MTL_Q_ISR));
+
+               /* No MTL interrupts to be enabled */
+               writel(0, XLGMAC_MTL_REG(pdata, i, MTL_Q_IER));
+       }
+}
+
+static void xlgmac_enable_mac_interrupts(struct xlgmac_pdata *pdata)
+{
+       unsigned int mac_ier = 0;
+       u32 regval;
+
+       /* Enable Timestamp interrupt */
+       mac_ier = XLGMAC_SET_REG_BITS(mac_ier, MAC_IER_TSIE_POS,
+                                     MAC_IER_TSIE_LEN, 1);
+
+       writel(mac_ier, pdata->mac_regs + MAC_IER);
+
+       /* Enable all counter interrupts */
+       regval = readl(pdata->mac_regs + MMC_RIER);
+       regval = XLGMAC_SET_REG_BITS(regval, MMC_RIER_ALL_INTERRUPTS_POS,
+                                    MMC_RIER_ALL_INTERRUPTS_LEN, 0xffffffff);
+       writel(regval, pdata->mac_regs + MMC_RIER);
+       regval = readl(pdata->mac_regs + MMC_TIER);
+       regval = XLGMAC_SET_REG_BITS(regval, MMC_TIER_ALL_INTERRUPTS_POS,
+                                    MMC_TIER_ALL_INTERRUPTS_LEN, 0xffffffff);
+       writel(regval, pdata->mac_regs + MMC_TIER);
+}
+
+static int xlgmac_set_xlgmii_25000_speed(struct xlgmac_pdata *pdata)
+{
+       u32 regval;
+
+       regval = XLGMAC_GET_REG_BITS(readl(pdata->mac_regs + MAC_TCR),
+                                    MAC_TCR_SS_POS, MAC_TCR_SS_LEN);
+       if (regval == 0x1)
+               return 0;
+
+       regval = readl(pdata->mac_regs + MAC_TCR);
+       regval = XLGMAC_SET_REG_BITS(regval, MAC_TCR_SS_POS,
+                                    MAC_TCR_SS_LEN, 0x1);
+       writel(regval, pdata->mac_regs + MAC_TCR);
+
+       return 0;
+}
+
+static int xlgmac_set_xlgmii_40000_speed(struct xlgmac_pdata *pdata)
+{
+       u32 regval;
+
+       regval = XLGMAC_GET_REG_BITS(readl(pdata->mac_regs + MAC_TCR),
+                                    MAC_TCR_SS_POS, MAC_TCR_SS_LEN);
+       if (regval == 0)
+               return 0;
+
+       regval = readl(pdata->mac_regs + MAC_TCR);
+       regval = XLGMAC_SET_REG_BITS(regval, MAC_TCR_SS_POS,
+                                    MAC_TCR_SS_LEN, 0);
+       writel(regval, pdata->mac_regs + MAC_TCR);
+
+       return 0;
+}
+
+static int xlgmac_set_xlgmii_50000_speed(struct xlgmac_pdata *pdata)
+{
+       u32 regval;
+
+       regval = XLGMAC_GET_REG_BITS(readl(pdata->mac_regs + MAC_TCR),
+                                    MAC_TCR_SS_POS, MAC_TCR_SS_LEN);
+       if (regval == 0x2)
+               return 0;
+
+       regval = readl(pdata->mac_regs + MAC_TCR);
+       regval = XLGMAC_SET_REG_BITS(regval, MAC_TCR_SS_POS,
+                                    MAC_TCR_SS_LEN, 0x2);
+       writel(regval, pdata->mac_regs + MAC_TCR);
+
+       return 0;
+}
+
+static int xlgmac_set_xlgmii_100000_speed(struct xlgmac_pdata *pdata)
+{
+       u32 regval;
+
+       regval = XLGMAC_GET_REG_BITS(readl(pdata->mac_regs + MAC_TCR),
+                                    MAC_TCR_SS_POS, MAC_TCR_SS_LEN);
+       if (regval == 0x3)
+               return 0;
+
+       regval = readl(pdata->mac_regs + MAC_TCR);
+       regval = XLGMAC_SET_REG_BITS(regval, MAC_TCR_SS_POS,
+                                    MAC_TCR_SS_LEN, 0x3);
+       writel(regval, pdata->mac_regs + MAC_TCR);
+
+       return 0;
+}
+
+static void xlgmac_config_mac_speed(struct xlgmac_pdata *pdata)
+{
+       switch (pdata->phy_speed) {
+       case SPEED_100000:
+               xlgmac_set_xlgmii_100000_speed(pdata);
+               break;
+
+       case SPEED_50000:
+               xlgmac_set_xlgmii_50000_speed(pdata);
+               break;
+
+       case SPEED_40000:
+               xlgmac_set_xlgmii_40000_speed(pdata);
+               break;
+
+       case SPEED_25000:
+               xlgmac_set_xlgmii_25000_speed(pdata);
+               break;
+       }
+}
+
+static int xlgmac_dev_read(struct xlgmac_channel *channel)
+{
+       struct xlgmac_pdata *pdata = channel->pdata;
+       struct xlgmac_ring *ring = channel->rx_ring;
+       struct net_device *netdev = pdata->netdev;
+       struct xlgmac_desc_data *desc_data;
+       struct xlgmac_dma_desc *dma_desc;
+       struct xlgmac_pkt_info *pkt_info;
+       unsigned int err, etlt, l34t;
+
+       desc_data = XLGMAC_GET_DESC_DATA(ring, ring->cur);
+       dma_desc = desc_data->dma_desc;
+       pkt_info = &ring->pkt_info;
+
+       /* Check for data availability */
+       if (XLGMAC_GET_REG_BITS_LE(dma_desc->desc3,
+                                  RX_NORMAL_DESC3_OWN_POS,
+                                  RX_NORMAL_DESC3_OWN_LEN))
+               return 1;
+
+       /* Make sure descriptor fields are read after reading the OWN bit */
+       dma_rmb();
+
+       if (netif_msg_rx_status(pdata))
+               xlgmac_dump_rx_desc(pdata, ring, ring->cur);
+
+       if (XLGMAC_GET_REG_BITS_LE(dma_desc->desc3,
+                                  RX_NORMAL_DESC3_CTXT_POS,
+                                  RX_NORMAL_DESC3_CTXT_LEN)) {
+               /* Timestamp Context Descriptor */
+               xlgmac_get_rx_tstamp(pkt_info, dma_desc);
+
+               pkt_info->attributes = XLGMAC_SET_REG_BITS(
+                                       pkt_info->attributes,
+                                       RX_PACKET_ATTRIBUTES_CONTEXT_POS,
+                                       RX_PACKET_ATTRIBUTES_CONTEXT_LEN,
+                                       1);
+               pkt_info->attributes = XLGMAC_SET_REG_BITS(
+                               pkt_info->attributes,
+                               RX_PACKET_ATTRIBUTES_CONTEXT_NEXT_POS,
+                               RX_PACKET_ATTRIBUTES_CONTEXT_NEXT_LEN,
+                               0);
+               return 0;
+       }
+
+       /* Normal Descriptor, be sure Context Descriptor bit is off */
+       pkt_info->attributes = XLGMAC_SET_REG_BITS(
+                               pkt_info->attributes,
+                               RX_PACKET_ATTRIBUTES_CONTEXT_POS,
+                               RX_PACKET_ATTRIBUTES_CONTEXT_LEN,
+                               0);
+
+       /* Indicate if a Context Descriptor is next */
+       if (XLGMAC_GET_REG_BITS_LE(dma_desc->desc3,
+                                  RX_NORMAL_DESC3_CDA_POS,
+                                  RX_NORMAL_DESC3_CDA_LEN))
+               pkt_info->attributes = XLGMAC_SET_REG_BITS(
+                               pkt_info->attributes,
+                               RX_PACKET_ATTRIBUTES_CONTEXT_NEXT_POS,
+                               RX_PACKET_ATTRIBUTES_CONTEXT_NEXT_LEN,
+                               1);
+
+       /* Get the header length */
+       if (XLGMAC_GET_REG_BITS_LE(dma_desc->desc3,
+                                  RX_NORMAL_DESC3_FD_POS,
+                                  RX_NORMAL_DESC3_FD_LEN)) {
+               desc_data->rx.hdr_len = XLGMAC_GET_REG_BITS_LE(dma_desc->desc2,
+                                                       RX_NORMAL_DESC2_HL_POS,
+                                                       RX_NORMAL_DESC2_HL_LEN);
+               if (desc_data->rx.hdr_len)
+                       pdata->stats.rx_split_header_packets++;
+       }
+
+       /* Get the RSS hash */
+       if (XLGMAC_GET_REG_BITS_LE(dma_desc->desc3,
+                                  RX_NORMAL_DESC3_RSV_POS,
+                                  RX_NORMAL_DESC3_RSV_LEN)) {
+               pkt_info->attributes = XLGMAC_SET_REG_BITS(
+                               pkt_info->attributes,
+                               RX_PACKET_ATTRIBUTES_RSS_HASH_POS,
+                               RX_PACKET_ATTRIBUTES_RSS_HASH_LEN,
+                               1);
+
+               pkt_info->rss_hash = le32_to_cpu(dma_desc->desc1);
+
+               l34t = XLGMAC_GET_REG_BITS_LE(dma_desc->desc3,
+                                             RX_NORMAL_DESC3_L34T_POS,
+                                         RX_NORMAL_DESC3_L34T_LEN);
+               switch (l34t) {
+               case RX_DESC3_L34T_IPV4_TCP:
+               case RX_DESC3_L34T_IPV4_UDP:
+               case RX_DESC3_L34T_IPV6_TCP:
+               case RX_DESC3_L34T_IPV6_UDP:
+                       pkt_info->rss_hash_type = PKT_HASH_TYPE_L4;
+                       break;
+               default:
+                       pkt_info->rss_hash_type = PKT_HASH_TYPE_L3;
+               }
+       }
+
+       /* Get the pkt_info length */
+       desc_data->rx.len = XLGMAC_GET_REG_BITS_LE(dma_desc->desc3,
+                                       RX_NORMAL_DESC3_PL_POS,
+                                       RX_NORMAL_DESC3_PL_LEN);
+
+       if (!XLGMAC_GET_REG_BITS_LE(dma_desc->desc3,
+                                   RX_NORMAL_DESC3_LD_POS,
+                                   RX_NORMAL_DESC3_LD_LEN)) {
+               /* Not all the data has been transferred for this pkt_info */
+               pkt_info->attributes = XLGMAC_SET_REG_BITS(
+                               pkt_info->attributes,
+                               RX_PACKET_ATTRIBUTES_INCOMPLETE_POS,
+                               RX_PACKET_ATTRIBUTES_INCOMPLETE_LEN,
+                               1);
+               return 0;
+       }
+
+       /* This is the last of the data for this pkt_info */
+       pkt_info->attributes = XLGMAC_SET_REG_BITS(
+                       pkt_info->attributes,
+                       RX_PACKET_ATTRIBUTES_INCOMPLETE_POS,
+                       RX_PACKET_ATTRIBUTES_INCOMPLETE_LEN,
+                       0);
+
+       /* Set checksum done indicator as appropriate */
+       if (netdev->features & NETIF_F_RXCSUM)
+               pkt_info->attributes = XLGMAC_SET_REG_BITS(
+                               pkt_info->attributes,
+                               RX_PACKET_ATTRIBUTES_CSUM_DONE_POS,
+                               RX_PACKET_ATTRIBUTES_CSUM_DONE_LEN,
+                               1);
+
+       /* Check for errors (only valid in last descriptor) */
+       err = XLGMAC_GET_REG_BITS_LE(dma_desc->desc3,
+                                    RX_NORMAL_DESC3_ES_POS,
+                                    RX_NORMAL_DESC3_ES_LEN);
+       etlt = XLGMAC_GET_REG_BITS_LE(dma_desc->desc3,
+                                     RX_NORMAL_DESC3_ETLT_POS,
+                                     RX_NORMAL_DESC3_ETLT_LEN);
+       netif_dbg(pdata, rx_status, netdev, "err=%u, etlt=%#x\n", err, etlt);
+
+       if (!err || !etlt) {
+               /* No error if err is 0 or etlt is 0 */
+               if ((etlt == 0x09) &&
+                   (netdev->features & NETIF_F_HW_VLAN_CTAG_RX)) {
+                       pkt_info->attributes = XLGMAC_SET_REG_BITS(
+                                       pkt_info->attributes,
+                                       RX_PACKET_ATTRIBUTES_VLAN_CTAG_POS,
+                                       RX_PACKET_ATTRIBUTES_VLAN_CTAG_LEN,
+                                       1);
+                       pkt_info->vlan_ctag =
+                               XLGMAC_GET_REG_BITS_LE(dma_desc->desc0,
+                                                      RX_NORMAL_DESC0_OVT_POS,
+                                                  RX_NORMAL_DESC0_OVT_LEN);
+                       netif_dbg(pdata, rx_status, netdev, "vlan-ctag=%#06x\n",
+                                 pkt_info->vlan_ctag);
+               }
+       } else {
+               if ((etlt == 0x05) || (etlt == 0x06))
+                       pkt_info->attributes = XLGMAC_SET_REG_BITS(
+                                       pkt_info->attributes,
+                                       RX_PACKET_ATTRIBUTES_CSUM_DONE_POS,
+                                       RX_PACKET_ATTRIBUTES_CSUM_DONE_LEN,
+                                       0);
+               else
+                       pkt_info->errors = XLGMAC_SET_REG_BITS(
+                                       pkt_info->errors,
+                                       RX_PACKET_ERRORS_FRAME_POS,
+                                       RX_PACKET_ERRORS_FRAME_LEN,
+                                       1);
+       }
+
+       XLGMAC_PR("%s - descriptor=%u (cur=%d)\n", channel->name,
+                 ring->cur & (ring->dma_desc_count - 1), ring->cur);
+
+       return 0;
+}
+
+static int xlgmac_enable_int(struct xlgmac_channel *channel,
+                            enum xlgmac_int int_id)
+{
+       unsigned int dma_ch_ier;
+
+       dma_ch_ier = readl(XLGMAC_DMA_REG(channel, DMA_CH_IER));
+
+       switch (int_id) {
+       case XLGMAC_INT_DMA_CH_SR_TI:
+               dma_ch_ier = XLGMAC_SET_REG_BITS(
+                               dma_ch_ier, DMA_CH_IER_TIE_POS,
+                               DMA_CH_IER_TIE_LEN, 1);
+               break;
+       case XLGMAC_INT_DMA_CH_SR_TPS:
+               dma_ch_ier = XLGMAC_SET_REG_BITS(
+                               dma_ch_ier, DMA_CH_IER_TXSE_POS,
+                               DMA_CH_IER_TXSE_LEN, 1);
+               break;
+       case XLGMAC_INT_DMA_CH_SR_TBU:
+               dma_ch_ier = XLGMAC_SET_REG_BITS(
+                               dma_ch_ier, DMA_CH_IER_TBUE_POS,
+                               DMA_CH_IER_TBUE_LEN, 1);
+               break;
+       case XLGMAC_INT_DMA_CH_SR_RI:
+               dma_ch_ier = XLGMAC_SET_REG_BITS(
+                               dma_ch_ier, DMA_CH_IER_RIE_POS,
+                               DMA_CH_IER_RIE_LEN, 1);
+               break;
+       case XLGMAC_INT_DMA_CH_SR_RBU:
+               dma_ch_ier = XLGMAC_SET_REG_BITS(
+                               dma_ch_ier, DMA_CH_IER_RBUE_POS,
+                               DMA_CH_IER_RBUE_LEN, 1);
+               break;
+       case XLGMAC_INT_DMA_CH_SR_RPS:
+               dma_ch_ier = XLGMAC_SET_REG_BITS(
+                               dma_ch_ier, DMA_CH_IER_RSE_POS,
+                               DMA_CH_IER_RSE_LEN, 1);
+               break;
+       case XLGMAC_INT_DMA_CH_SR_TI_RI:
+               dma_ch_ier = XLGMAC_SET_REG_BITS(
+                               dma_ch_ier, DMA_CH_IER_TIE_POS,
+                               DMA_CH_IER_TIE_LEN, 1);
+               dma_ch_ier = XLGMAC_SET_REG_BITS(
+                               dma_ch_ier, DMA_CH_IER_RIE_POS,
+                               DMA_CH_IER_RIE_LEN, 1);
+               break;
+       case XLGMAC_INT_DMA_CH_SR_FBE:
+               dma_ch_ier = XLGMAC_SET_REG_BITS(
+                               dma_ch_ier, DMA_CH_IER_FBEE_POS,
+                               DMA_CH_IER_FBEE_LEN, 1);
+               break;
+       case XLGMAC_INT_DMA_ALL:
+               dma_ch_ier |= channel->saved_ier;
+               break;
+       default:
+               return -1;
+       }
+
+       writel(dma_ch_ier, XLGMAC_DMA_REG(channel, DMA_CH_IER));
+
+       return 0;
+}
+
+static int xlgmac_disable_int(struct xlgmac_channel *channel,
+                             enum xlgmac_int int_id)
+{
+       unsigned int dma_ch_ier;
+
+       dma_ch_ier = readl(XLGMAC_DMA_REG(channel, DMA_CH_IER));
+
+       switch (int_id) {
+       case XLGMAC_INT_DMA_CH_SR_TI:
+               dma_ch_ier = XLGMAC_SET_REG_BITS(
+                               dma_ch_ier, DMA_CH_IER_TIE_POS,
+                               DMA_CH_IER_TIE_LEN, 0);
+               break;
+       case XLGMAC_INT_DMA_CH_SR_TPS:
+               dma_ch_ier = XLGMAC_SET_REG_BITS(
+                               dma_ch_ier, DMA_CH_IER_TXSE_POS,
+                               DMA_CH_IER_TXSE_LEN, 0);
+               break;
+       case XLGMAC_INT_DMA_CH_SR_TBU:
+               dma_ch_ier = XLGMAC_SET_REG_BITS(
+                               dma_ch_ier, DMA_CH_IER_TBUE_POS,
+                               DMA_CH_IER_TBUE_LEN, 0);
+               break;
+       case XLGMAC_INT_DMA_CH_SR_RI:
+               dma_ch_ier = XLGMAC_SET_REG_BITS(
+                               dma_ch_ier, DMA_CH_IER_RIE_POS,
+                               DMA_CH_IER_RIE_LEN, 0);
+               break;
+       case XLGMAC_INT_DMA_CH_SR_RBU:
+               dma_ch_ier = XLGMAC_SET_REG_BITS(
+                               dma_ch_ier, DMA_CH_IER_RBUE_POS,
+                               DMA_CH_IER_RBUE_LEN, 0);
+               break;
+       case XLGMAC_INT_DMA_CH_SR_RPS:
+               dma_ch_ier = XLGMAC_SET_REG_BITS(
+                               dma_ch_ier, DMA_CH_IER_RSE_POS,
+                               DMA_CH_IER_RSE_LEN, 0);
+               break;
+       case XLGMAC_INT_DMA_CH_SR_TI_RI:
+               dma_ch_ier = XLGMAC_SET_REG_BITS(
+                               dma_ch_ier, DMA_CH_IER_TIE_POS,
+                               DMA_CH_IER_TIE_LEN, 0);
+               dma_ch_ier = XLGMAC_SET_REG_BITS(
+                               dma_ch_ier, DMA_CH_IER_RIE_POS,
+                               DMA_CH_IER_RIE_LEN, 0);
+               break;
+       case XLGMAC_INT_DMA_CH_SR_FBE:
+               dma_ch_ier = XLGMAC_SET_REG_BITS(
+                               dma_ch_ier, DMA_CH_IER_FBEE_POS,
+                               DMA_CH_IER_FBEE_LEN, 0);
+               break;
+       case XLGMAC_INT_DMA_ALL:
+               channel->saved_ier = dma_ch_ier & XLGMAC_DMA_INTERRUPT_MASK;
+               dma_ch_ier &= ~XLGMAC_DMA_INTERRUPT_MASK;
+               break;
+       default:
+               return -1;
+       }
+
+       writel(dma_ch_ier, XLGMAC_DMA_REG(channel, DMA_CH_IER));
+
+       return 0;
+}
+
+static int xlgmac_flush_tx_queues(struct xlgmac_pdata *pdata)
+{
+       unsigned int i, count;
+       u32 regval;
+
+       for (i = 0; i < pdata->tx_q_count; i++) {
+               regval = readl(XLGMAC_MTL_REG(pdata, i, MTL_Q_TQOMR));
+               regval = XLGMAC_SET_REG_BITS(regval, MTL_Q_TQOMR_FTQ_POS,
+                                            MTL_Q_TQOMR_FTQ_LEN, 1);
+               writel(regval, XLGMAC_MTL_REG(pdata, i, MTL_Q_TQOMR));
+       }
+
+       /* Poll Until Poll Condition */
+       for (i = 0; i < pdata->tx_q_count; i++) {
+               count = 2000;
+               regval = readl(XLGMAC_MTL_REG(pdata, i, MTL_Q_TQOMR));
+               regval = XLGMAC_GET_REG_BITS(regval, MTL_Q_TQOMR_FTQ_POS,
+                                            MTL_Q_TQOMR_FTQ_LEN);
+               while (--count && regval)
+                       usleep_range(500, 600);
+
+               if (!count)
+                       return -EBUSY;
+       }
+
+       return 0;
+}
+
+static void xlgmac_config_dma_bus(struct xlgmac_pdata *pdata)
+{
+       u32 regval;
+
+       regval = readl(pdata->mac_regs + DMA_SBMR);
+       /* Set enhanced addressing mode */
+       regval = XLGMAC_SET_REG_BITS(regval, DMA_SBMR_EAME_POS,
+                                    DMA_SBMR_EAME_LEN, 1);
+       /* Set the System Bus mode */
+       regval = XLGMAC_SET_REG_BITS(regval, DMA_SBMR_UNDEF_POS,
+                                    DMA_SBMR_UNDEF_LEN, 1);
+       regval = XLGMAC_SET_REG_BITS(regval, DMA_SBMR_BLEN_256_POS,
+                                    DMA_SBMR_BLEN_256_LEN, 1);
+       writel(regval, pdata->mac_regs + DMA_SBMR);
+}
+
+static int xlgmac_hw_init(struct xlgmac_pdata *pdata)
+{
+       struct xlgmac_desc_ops *desc_ops = &pdata->desc_ops;
+       int ret;
+
+       /* Flush Tx queues */
+       ret = xlgmac_flush_tx_queues(pdata);
+       if (ret)
+               return ret;
+
+       /* Initialize DMA related features */
+       xlgmac_config_dma_bus(pdata);
+       xlgmac_config_osp_mode(pdata);
+       xlgmac_config_pblx8(pdata);
+       xlgmac_config_tx_pbl_val(pdata);
+       xlgmac_config_rx_pbl_val(pdata);
+       xlgmac_config_rx_coalesce(pdata);
+       xlgmac_config_tx_coalesce(pdata);
+       xlgmac_config_rx_buffer_size(pdata);
+       xlgmac_config_tso_mode(pdata);
+       xlgmac_config_sph_mode(pdata);
+       xlgmac_config_rss(pdata);
+       desc_ops->tx_desc_init(pdata);
+       desc_ops->rx_desc_init(pdata);
+       xlgmac_enable_dma_interrupts(pdata);
+
+       /* Initialize MTL related features */
+       xlgmac_config_mtl_mode(pdata);
+       xlgmac_config_queue_mapping(pdata);
+       xlgmac_config_tsf_mode(pdata, pdata->tx_sf_mode);
+       xlgmac_config_rsf_mode(pdata, pdata->rx_sf_mode);
+       xlgmac_config_tx_threshold(pdata, pdata->tx_threshold);
+       xlgmac_config_rx_threshold(pdata, pdata->rx_threshold);
+       xlgmac_config_tx_fifo_size(pdata);
+       xlgmac_config_rx_fifo_size(pdata);
+       xlgmac_config_flow_control_threshold(pdata);
+       xlgmac_config_rx_fep_enable(pdata);
+       xlgmac_config_rx_fup_enable(pdata);
+       xlgmac_enable_mtl_interrupts(pdata);
+
+       /* Initialize MAC related features */
+       xlgmac_config_mac_address(pdata);
+       xlgmac_config_rx_mode(pdata);
+       xlgmac_config_jumbo_enable(pdata);
+       xlgmac_config_flow_control(pdata);
+       xlgmac_config_mac_speed(pdata);
+       xlgmac_config_checksum_offload(pdata);
+       xlgmac_config_vlan_support(pdata);
+       xlgmac_config_mmc(pdata);
+       xlgmac_enable_mac_interrupts(pdata);
+
+       return 0;
+}
+
+static int xlgmac_hw_exit(struct xlgmac_pdata *pdata)
+{
+       unsigned int count = 2000;
+       u32 regval;
+
+       /* Issue a software reset */
+       regval = readl(pdata->mac_regs + DMA_MR);
+       regval = XLGMAC_SET_REG_BITS(regval, DMA_MR_SWR_POS,
+                                    DMA_MR_SWR_LEN, 1);
+       writel(regval, pdata->mac_regs + DMA_MR);
+       usleep_range(10, 15);
+
+       /* Poll Until Poll Condition */
+       while (--count &&
+              XLGMAC_GET_REG_BITS(readl(pdata->mac_regs + DMA_MR),
+                                  DMA_MR_SWR_POS, DMA_MR_SWR_LEN))
+               usleep_range(500, 600);
+
+       if (!count)
+               return -EBUSY;
+
+       return 0;
+}
+
+void xlgmac_init_hw_ops(struct xlgmac_hw_ops *hw_ops)
+{
+       hw_ops->init = xlgmac_hw_init;
+       hw_ops->exit = xlgmac_hw_exit;
+
+       hw_ops->tx_complete = xlgmac_tx_complete;
+
+       hw_ops->enable_tx = xlgmac_enable_tx;
+       hw_ops->disable_tx = xlgmac_disable_tx;
+       hw_ops->enable_rx = xlgmac_enable_rx;
+       hw_ops->disable_rx = xlgmac_disable_rx;
+
+       hw_ops->dev_xmit = xlgmac_dev_xmit;
+       hw_ops->dev_read = xlgmac_dev_read;
+       hw_ops->enable_int = xlgmac_enable_int;
+       hw_ops->disable_int = xlgmac_disable_int;
+
+       hw_ops->set_mac_address = xlgmac_set_mac_address;
+       hw_ops->config_rx_mode = xlgmac_config_rx_mode;
+       hw_ops->enable_rx_csum = xlgmac_enable_rx_csum;
+       hw_ops->disable_rx_csum = xlgmac_disable_rx_csum;
+
+       /* For MII speed configuration */
+       hw_ops->set_xlgmii_25000_speed = xlgmac_set_xlgmii_25000_speed;
+       hw_ops->set_xlgmii_40000_speed = xlgmac_set_xlgmii_40000_speed;
+       hw_ops->set_xlgmii_50000_speed = xlgmac_set_xlgmii_50000_speed;
+       hw_ops->set_xlgmii_100000_speed = xlgmac_set_xlgmii_100000_speed;
+
+       /* For descriptor related operation */
+       hw_ops->tx_desc_init = xlgmac_tx_desc_init;
+       hw_ops->rx_desc_init = xlgmac_rx_desc_init;
+       hw_ops->tx_desc_reset = xlgmac_tx_desc_reset;
+       hw_ops->rx_desc_reset = xlgmac_rx_desc_reset;
+       hw_ops->is_last_desc = xlgmac_is_last_desc;
+       hw_ops->is_context_desc = xlgmac_is_context_desc;
+       hw_ops->tx_start_xmit = xlgmac_tx_start_xmit;
+
+       /* For Flow Control */
+       hw_ops->config_tx_flow_control = xlgmac_config_tx_flow_control;
+       hw_ops->config_rx_flow_control = xlgmac_config_rx_flow_control;
+
+       /* For Vlan related config */
+       hw_ops->enable_rx_vlan_stripping = xlgmac_enable_rx_vlan_stripping;
+       hw_ops->disable_rx_vlan_stripping = xlgmac_disable_rx_vlan_stripping;
+       hw_ops->enable_rx_vlan_filtering = xlgmac_enable_rx_vlan_filtering;
+       hw_ops->disable_rx_vlan_filtering = xlgmac_disable_rx_vlan_filtering;
+       hw_ops->update_vlan_hash_table = xlgmac_update_vlan_hash_table;
+
+       /* For RX coalescing */
+       hw_ops->config_rx_coalesce = xlgmac_config_rx_coalesce;
+       hw_ops->config_tx_coalesce = xlgmac_config_tx_coalesce;
+       hw_ops->usec_to_riwt = xlgmac_usec_to_riwt;
+       hw_ops->riwt_to_usec = xlgmac_riwt_to_usec;
+
+       /* For RX and TX threshold config */
+       hw_ops->config_rx_threshold = xlgmac_config_rx_threshold;
+       hw_ops->config_tx_threshold = xlgmac_config_tx_threshold;
+
+       /* For RX and TX Store and Forward Mode config */
+       hw_ops->config_rsf_mode = xlgmac_config_rsf_mode;
+       hw_ops->config_tsf_mode = xlgmac_config_tsf_mode;
+
+       /* For TX DMA Operating on Second Frame config */
+       hw_ops->config_osp_mode = xlgmac_config_osp_mode;
+
+       /* For RX and TX PBL config */
+       hw_ops->config_rx_pbl_val = xlgmac_config_rx_pbl_val;
+       hw_ops->get_rx_pbl_val = xlgmac_get_rx_pbl_val;
+       hw_ops->config_tx_pbl_val = xlgmac_config_tx_pbl_val;
+       hw_ops->get_tx_pbl_val = xlgmac_get_tx_pbl_val;
+       hw_ops->config_pblx8 = xlgmac_config_pblx8;
+
+       /* For MMC statistics support */
+       hw_ops->tx_mmc_int = xlgmac_tx_mmc_int;
+       hw_ops->rx_mmc_int = xlgmac_rx_mmc_int;
+       hw_ops->read_mmc_stats = xlgmac_read_mmc_stats;
+
+       /* For Receive Side Scaling */
+       hw_ops->enable_rss = xlgmac_enable_rss;
+       hw_ops->disable_rss = xlgmac_disable_rss;
+       hw_ops->set_rss_hash_key = xlgmac_set_rss_hash_key;
+       hw_ops->set_rss_lookup_table = xlgmac_set_rss_lookup_table;
+}
diff --git a/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c b/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c
new file mode 100644 (file)
index 0000000..6acf86c
--- /dev/null
@@ -0,0 +1,1332 @@
+/* Synopsys DesignWare Core Enterprise Ethernet (XLGMAC) Driver
+ *
+ * Copyright (c) 2017 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is dual-licensed; you may select either version 2 of
+ * the GNU General Public License ("GPL") or BSD license ("BSD").
+ *
+ * This Synopsys DWC XLGMAC software driver and associated documentation
+ * (hereinafter the "Software") is an unsupported proprietary work of
+ * Synopsys, Inc. unless otherwise expressly agreed to in writing between
+ * Synopsys and you. The Software IS NOT an item of Licensed Software or a
+ * Licensed Product under any End User Software License Agreement or
+ * Agreement for Licensed Products with Synopsys or any supplement thereto.
+ * Synopsys is a registered trademark of Synopsys, Inc. Other names included
+ * in the SOFTWARE may be the trademarks of their respective owners.
+ */
+
+#include <linux/netdevice.h>
+#include <linux/tcp.h>
+
+#include "dwc-xlgmac.h"
+#include "dwc-xlgmac-reg.h"
+
+static int xlgmac_one_poll(struct napi_struct *, int);
+static int xlgmac_all_poll(struct napi_struct *, int);
+
+static inline unsigned int xlgmac_tx_avail_desc(struct xlgmac_ring *ring)
+{
+       return (ring->dma_desc_count - (ring->cur - ring->dirty));
+}
+
+static inline unsigned int xlgmac_rx_dirty_desc(struct xlgmac_ring *ring)
+{
+       return (ring->cur - ring->dirty);
+}
+
+static int xlgmac_maybe_stop_tx_queue(
+                       struct xlgmac_channel *channel,
+                       struct xlgmac_ring *ring,
+                       unsigned int count)
+{
+       struct xlgmac_pdata *pdata = channel->pdata;
+
+       if (count > xlgmac_tx_avail_desc(ring)) {
+               netif_info(pdata, drv, pdata->netdev,
+                          "Tx queue stopped, not enough descriptors available\n");
+               netif_stop_subqueue(pdata->netdev, channel->queue_index);
+               ring->tx.queue_stopped = 1;
+
+               /* If we haven't notified the hardware because of xmit_more
+                * support, tell it now
+                */
+               if (ring->tx.xmit_more)
+                       pdata->hw_ops.tx_start_xmit(channel, ring);
+
+               return NETDEV_TX_BUSY;
+       }
+
+       return 0;
+}
+
+static void xlgmac_prep_vlan(struct sk_buff *skb,
+                            struct xlgmac_pkt_info *pkt_info)
+{
+       if (skb_vlan_tag_present(skb))
+               pkt_info->vlan_ctag = skb_vlan_tag_get(skb);
+}
+
+static int xlgmac_prep_tso(struct sk_buff *skb,
+                          struct xlgmac_pkt_info *pkt_info)
+{
+       int ret;
+
+       if (!XLGMAC_GET_REG_BITS(pkt_info->attributes,
+                                TX_PACKET_ATTRIBUTES_TSO_ENABLE_POS,
+                                TX_PACKET_ATTRIBUTES_TSO_ENABLE_LEN))
+               return 0;
+
+       ret = skb_cow_head(skb, 0);
+       if (ret)
+               return ret;
+
+       pkt_info->header_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
+       pkt_info->tcp_header_len = tcp_hdrlen(skb);
+       pkt_info->tcp_payload_len = skb->len - pkt_info->header_len;
+       pkt_info->mss = skb_shinfo(skb)->gso_size;
+
+       XLGMAC_PR("header_len=%u\n", pkt_info->header_len);
+       XLGMAC_PR("tcp_header_len=%u, tcp_payload_len=%u\n",
+                 pkt_info->tcp_header_len, pkt_info->tcp_payload_len);
+       XLGMAC_PR("mss=%u\n", pkt_info->mss);
+
+       /* Update the number of packets that will ultimately be transmitted
+        * along with the extra bytes for each extra packet
+        */
+       pkt_info->tx_packets = skb_shinfo(skb)->gso_segs;
+       pkt_info->tx_bytes += (pkt_info->tx_packets - 1) * pkt_info->header_len;
+
+       return 0;
+}
+
+static int xlgmac_is_tso(struct sk_buff *skb)
+{
+       if (skb->ip_summed != CHECKSUM_PARTIAL)
+               return 0;
+
+       if (!skb_is_gso(skb))
+               return 0;
+
+       return 1;
+}
+
+static void xlgmac_prep_tx_pkt(struct xlgmac_pdata *pdata,
+                              struct xlgmac_ring *ring,
+                              struct sk_buff *skb,
+                              struct xlgmac_pkt_info *pkt_info)
+{
+       struct skb_frag_struct *frag;
+       unsigned int context_desc;
+       unsigned int len;
+       unsigned int i;
+
+       pkt_info->skb = skb;
+
+       context_desc = 0;
+       pkt_info->desc_count = 0;
+
+       pkt_info->tx_packets = 1;
+       pkt_info->tx_bytes = skb->len;
+
+       if (xlgmac_is_tso(skb)) {
+               /* TSO requires an extra descriptor if mss is different */
+               if (skb_shinfo(skb)->gso_size != ring->tx.cur_mss) {
+                       context_desc = 1;
+                       pkt_info->desc_count++;
+               }
+
+               /* TSO requires an extra descriptor for TSO header */
+               pkt_info->desc_count++;
+
+               pkt_info->attributes = XLGMAC_SET_REG_BITS(
+                                       pkt_info->attributes,
+                                       TX_PACKET_ATTRIBUTES_TSO_ENABLE_POS,
+                                       TX_PACKET_ATTRIBUTES_TSO_ENABLE_LEN,
+                                       1);
+               pkt_info->attributes = XLGMAC_SET_REG_BITS(
+                                       pkt_info->attributes,
+                                       TX_PACKET_ATTRIBUTES_CSUM_ENABLE_POS,
+                                       TX_PACKET_ATTRIBUTES_CSUM_ENABLE_LEN,
+                                       1);
+       } else if (skb->ip_summed == CHECKSUM_PARTIAL)
+               pkt_info->attributes = XLGMAC_SET_REG_BITS(
+                                       pkt_info->attributes,
+                                       TX_PACKET_ATTRIBUTES_CSUM_ENABLE_POS,
+                                       TX_PACKET_ATTRIBUTES_CSUM_ENABLE_LEN,
+                                       1);
+
+       if (skb_vlan_tag_present(skb)) {
+               /* VLAN requires an extra descriptor if tag is different */
+               if (skb_vlan_tag_get(skb) != ring->tx.cur_vlan_ctag)
+                       /* We can share with the TSO context descriptor */
+                       if (!context_desc) {
+                               context_desc = 1;
+                               pkt_info->desc_count++;
+                       }
+
+               pkt_info->attributes = XLGMAC_SET_REG_BITS(
+                                       pkt_info->attributes,
+                                       TX_PACKET_ATTRIBUTES_VLAN_CTAG_POS,
+                                       TX_PACKET_ATTRIBUTES_VLAN_CTAG_LEN,
+                                       1);
+       }
+
+       for (len = skb_headlen(skb); len;) {
+               pkt_info->desc_count++;
+               len -= min_t(unsigned int, len, XLGMAC_TX_MAX_BUF_SIZE);
+       }
+
+       for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+               frag = &skb_shinfo(skb)->frags[i];
+               for (len = skb_frag_size(frag); len; ) {
+                       pkt_info->desc_count++;
+                       len -= min_t(unsigned int, len, XLGMAC_TX_MAX_BUF_SIZE);
+               }
+       }
+}
+
+static int xlgmac_calc_rx_buf_size(struct net_device *netdev, unsigned int mtu)
+{
+       unsigned int rx_buf_size;
+
+       if (mtu > XLGMAC_JUMBO_PACKET_MTU) {
+               netdev_alert(netdev, "MTU exceeds maximum supported value\n");
+               return -EINVAL;
+       }
+
+       rx_buf_size = mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
+       rx_buf_size = clamp_val(rx_buf_size, XLGMAC_RX_MIN_BUF_SIZE, PAGE_SIZE);
+
+       rx_buf_size = (rx_buf_size + XLGMAC_RX_BUF_ALIGN - 1) &
+                     ~(XLGMAC_RX_BUF_ALIGN - 1);
+
+       return rx_buf_size;
+}
+
+static void xlgmac_enable_rx_tx_ints(struct xlgmac_pdata *pdata)
+{
+       struct xlgmac_hw_ops *hw_ops = &pdata->hw_ops;
+       struct xlgmac_channel *channel;
+       enum xlgmac_int int_id;
+       unsigned int i;
+
+       channel = pdata->channel_head;
+       for (i = 0; i < pdata->channel_count; i++, channel++) {
+               if (channel->tx_ring && channel->rx_ring)
+                       int_id = XLGMAC_INT_DMA_CH_SR_TI_RI;
+               else if (channel->tx_ring)
+                       int_id = XLGMAC_INT_DMA_CH_SR_TI;
+               else if (channel->rx_ring)
+                       int_id = XLGMAC_INT_DMA_CH_SR_RI;
+               else
+                       continue;
+
+               hw_ops->enable_int(channel, int_id);
+       }
+}
+
+static void xlgmac_disable_rx_tx_ints(struct xlgmac_pdata *pdata)
+{
+       struct xlgmac_hw_ops *hw_ops = &pdata->hw_ops;
+       struct xlgmac_channel *channel;
+       enum xlgmac_int int_id;
+       unsigned int i;
+
+       channel = pdata->channel_head;
+       for (i = 0; i < pdata->channel_count; i++, channel++) {
+               if (channel->tx_ring && channel->rx_ring)
+                       int_id = XLGMAC_INT_DMA_CH_SR_TI_RI;
+               else if (channel->tx_ring)
+                       int_id = XLGMAC_INT_DMA_CH_SR_TI;
+               else if (channel->rx_ring)
+                       int_id = XLGMAC_INT_DMA_CH_SR_RI;
+               else
+                       continue;
+
+               hw_ops->disable_int(channel, int_id);
+       }
+}
+
+static irqreturn_t xlgmac_isr(int irq, void *data)
+{
+       unsigned int dma_isr, dma_ch_isr, mac_isr;
+       struct xlgmac_pdata *pdata = data;
+       struct xlgmac_channel *channel;
+       struct xlgmac_hw_ops *hw_ops;
+       unsigned int i, ti, ri;
+
+       hw_ops = &pdata->hw_ops;
+
+       /* The DMA interrupt status register also reports MAC and MTL
+        * interrupts. So for polling mode, we just need to check for
+        * this register to be non-zero
+        */
+       dma_isr = readl(pdata->mac_regs + DMA_ISR);
+       if (!dma_isr)
+               return IRQ_HANDLED;
+
+       netif_dbg(pdata, intr, pdata->netdev, "DMA_ISR=%#010x\n", dma_isr);
+
+       for (i = 0; i < pdata->channel_count; i++) {
+               if (!(dma_isr & (1 << i)))
+                       continue;
+
+               channel = pdata->channel_head + i;
+
+               dma_ch_isr = readl(XLGMAC_DMA_REG(channel, DMA_CH_SR));
+               netif_dbg(pdata, intr, pdata->netdev, "DMA_CH%u_ISR=%#010x\n",
+                         i, dma_ch_isr);
+
+               /* The TI or RI interrupt bits may still be set even if using
+                * per channel DMA interrupts. Check to be sure those are not
+                * enabled before using the private data napi structure.
+                */
+               ti = XLGMAC_GET_REG_BITS(dma_ch_isr, DMA_CH_SR_TI_POS,
+                                        DMA_CH_SR_TI_LEN);
+               ri = XLGMAC_GET_REG_BITS(dma_ch_isr, DMA_CH_SR_RI_POS,
+                                        DMA_CH_SR_RI_LEN);
+               if (!pdata->per_channel_irq && (ti || ri)) {
+                       if (napi_schedule_prep(&pdata->napi)) {
+                               /* Disable Tx and Rx interrupts */
+                               xlgmac_disable_rx_tx_ints(pdata);
+
+                               /* Turn on polling */
+                               __napi_schedule_irqoff(&pdata->napi);
+                       }
+               }
+
+               if (XLGMAC_GET_REG_BITS(dma_ch_isr, DMA_CH_SR_RBU_POS,
+                                       DMA_CH_SR_RBU_LEN))
+                       pdata->stats.rx_buffer_unavailable++;
+
+               /* Restart the device on a Fatal Bus Error */
+               if (XLGMAC_GET_REG_BITS(dma_ch_isr, DMA_CH_SR_FBE_POS,
+                                       DMA_CH_SR_FBE_LEN))
+                       schedule_work(&pdata->restart_work);
+
+               /* Clear all interrupt signals */
+               writel(dma_ch_isr, XLGMAC_DMA_REG(channel, DMA_CH_SR));
+       }
+
+       if (XLGMAC_GET_REG_BITS(dma_isr, DMA_ISR_MACIS_POS,
+                               DMA_ISR_MACIS_LEN)) {
+               mac_isr = readl(pdata->mac_regs + MAC_ISR);
+
+               if (XLGMAC_GET_REG_BITS(mac_isr, MAC_ISR_MMCTXIS_POS,
+                                       MAC_ISR_MMCTXIS_LEN))
+                       hw_ops->tx_mmc_int(pdata);
+
+               if (XLGMAC_GET_REG_BITS(mac_isr, MAC_ISR_MMCRXIS_POS,
+                                       MAC_ISR_MMCRXIS_LEN))
+                       hw_ops->rx_mmc_int(pdata);
+       }
+
+       return IRQ_HANDLED;
+}
+
+static irqreturn_t xlgmac_dma_isr(int irq, void *data)
+{
+       struct xlgmac_channel *channel = data;
+
+       /* Per channel DMA interrupts are enabled, so we use the per
+        * channel napi structure and not the private data napi structure
+        */
+       if (napi_schedule_prep(&channel->napi)) {
+               /* Disable Tx and Rx interrupts */
+               disable_irq_nosync(channel->dma_irq);
+
+               /* Turn on polling */
+               __napi_schedule_irqoff(&channel->napi);
+       }
+
+       return IRQ_HANDLED;
+}
+
+static void xlgmac_tx_timer(unsigned long data)
+{
+       struct xlgmac_channel *channel = (struct xlgmac_channel *)data;
+       struct xlgmac_pdata *pdata = channel->pdata;
+       struct napi_struct *napi;
+
+       napi = (pdata->per_channel_irq) ? &channel->napi : &pdata->napi;
+
+       if (napi_schedule_prep(napi)) {
+               /* Disable Tx and Rx interrupts */
+               if (pdata->per_channel_irq)
+                       disable_irq_nosync(channel->dma_irq);
+               else
+                       xlgmac_disable_rx_tx_ints(pdata);
+
+               /* Turn on polling */
+               __napi_schedule(napi);
+       }
+
+       channel->tx_timer_active = 0;
+}
+
+static void xlgmac_init_timers(struct xlgmac_pdata *pdata)
+{
+       struct xlgmac_channel *channel;
+       unsigned int i;
+
+       channel = pdata->channel_head;
+       for (i = 0; i < pdata->channel_count; i++, channel++) {
+               if (!channel->tx_ring)
+                       break;
+
+               setup_timer(&channel->tx_timer, xlgmac_tx_timer,
+                           (unsigned long)channel);
+       }
+}
+
+static void xlgmac_stop_timers(struct xlgmac_pdata *pdata)
+{
+       struct xlgmac_channel *channel;
+       unsigned int i;
+
+       channel = pdata->channel_head;
+       for (i = 0; i < pdata->channel_count; i++, channel++) {
+               if (!channel->tx_ring)
+                       break;
+
+               del_timer_sync(&channel->tx_timer);
+       }
+}
+
+static void xlgmac_napi_enable(struct xlgmac_pdata *pdata, unsigned int add)
+{
+       struct xlgmac_channel *channel;
+       unsigned int i;
+
+       if (pdata->per_channel_irq) {
+               channel = pdata->channel_head;
+               for (i = 0; i < pdata->channel_count; i++, channel++) {
+                       if (add)
+                               netif_napi_add(pdata->netdev, &channel->napi,
+                                              xlgmac_one_poll,
+                                              NAPI_POLL_WEIGHT);
+
+                       napi_enable(&channel->napi);
+               }
+       } else {
+               if (add)
+                       netif_napi_add(pdata->netdev, &pdata->napi,
+                                      xlgmac_all_poll, NAPI_POLL_WEIGHT);
+
+               napi_enable(&pdata->napi);
+       }
+}
+
+static void xlgmac_napi_disable(struct xlgmac_pdata *pdata, unsigned int del)
+{
+       struct xlgmac_channel *channel;
+       unsigned int i;
+
+       if (pdata->per_channel_irq) {
+               channel = pdata->channel_head;
+               for (i = 0; i < pdata->channel_count; i++, channel++) {
+                       napi_disable(&channel->napi);
+
+                       if (del)
+                               netif_napi_del(&channel->napi);
+               }
+       } else {
+               napi_disable(&pdata->napi);
+
+               if (del)
+                       netif_napi_del(&pdata->napi);
+       }
+}
+
+static int xlgmac_request_irqs(struct xlgmac_pdata *pdata)
+{
+       struct net_device *netdev = pdata->netdev;
+       struct xlgmac_channel *channel;
+       unsigned int i;
+       int ret;
+
+       ret = devm_request_irq(pdata->dev, pdata->dev_irq, xlgmac_isr,
+                              IRQF_SHARED, netdev->name, pdata);
+       if (ret) {
+               netdev_alert(netdev, "error requesting irq %d\n",
+                            pdata->dev_irq);
+               return ret;
+       }
+
+       if (!pdata->per_channel_irq)
+               return 0;
+
+       channel = pdata->channel_head;
+       for (i = 0; i < pdata->channel_count; i++, channel++) {
+               snprintf(channel->dma_irq_name,
+                        sizeof(channel->dma_irq_name) - 1,
+                        "%s-TxRx-%u", netdev_name(netdev),
+                        channel->queue_index);
+
+               ret = devm_request_irq(pdata->dev, channel->dma_irq,
+                                      xlgmac_dma_isr, 0,
+                                      channel->dma_irq_name, channel);
+               if (ret) {
+                       netdev_alert(netdev, "error requesting irq %d\n",
+                                    channel->dma_irq);
+                       goto err_irq;
+               }
+       }
+
+       return 0;
+
+err_irq:
+       /* Using an unsigned int, 'i' will go to UINT_MAX and exit */
+       for (i--, channel--; i < pdata->channel_count; i--, channel--)
+               devm_free_irq(pdata->dev, channel->dma_irq, channel);
+
+       devm_free_irq(pdata->dev, pdata->dev_irq, pdata);
+
+       return ret;
+}
+
+static void xlgmac_free_irqs(struct xlgmac_pdata *pdata)
+{
+       struct xlgmac_channel *channel;
+       unsigned int i;
+
+       devm_free_irq(pdata->dev, pdata->dev_irq, pdata);
+
+       if (!pdata->per_channel_irq)
+               return;
+
+       channel = pdata->channel_head;
+       for (i = 0; i < pdata->channel_count; i++, channel++)
+               devm_free_irq(pdata->dev, channel->dma_irq, channel);
+}
+
+static void xlgmac_free_tx_data(struct xlgmac_pdata *pdata)
+{
+       struct xlgmac_desc_ops *desc_ops = &pdata->desc_ops;
+       struct xlgmac_desc_data *desc_data;
+       struct xlgmac_channel *channel;
+       struct xlgmac_ring *ring;
+       unsigned int i, j;
+
+       channel = pdata->channel_head;
+       for (i = 0; i < pdata->channel_count; i++, channel++) {
+               ring = channel->tx_ring;
+               if (!ring)
+                       break;
+
+               for (j = 0; j < ring->dma_desc_count; j++) {
+                       desc_data = XLGMAC_GET_DESC_DATA(ring, j);
+                       desc_ops->unmap_desc_data(pdata, desc_data);
+               }
+       }
+}
+
+static void xlgmac_free_rx_data(struct xlgmac_pdata *pdata)
+{
+       struct xlgmac_desc_ops *desc_ops = &pdata->desc_ops;
+       struct xlgmac_desc_data *desc_data;
+       struct xlgmac_channel *channel;
+       struct xlgmac_ring *ring;
+       unsigned int i, j;
+
+       channel = pdata->channel_head;
+       for (i = 0; i < pdata->channel_count; i++, channel++) {
+               ring = channel->rx_ring;
+               if (!ring)
+                       break;
+
+               for (j = 0; j < ring->dma_desc_count; j++) {
+                       desc_data = XLGMAC_GET_DESC_DATA(ring, j);
+                       desc_ops->unmap_desc_data(pdata, desc_data);
+               }
+       }
+}
+
+static int xlgmac_start(struct xlgmac_pdata *pdata)
+{
+       struct xlgmac_hw_ops *hw_ops = &pdata->hw_ops;
+       struct net_device *netdev = pdata->netdev;
+       int ret;
+
+       hw_ops->init(pdata);
+       xlgmac_napi_enable(pdata, 1);
+
+       ret = xlgmac_request_irqs(pdata);
+       if (ret)
+               goto err_napi;
+
+       hw_ops->enable_tx(pdata);
+       hw_ops->enable_rx(pdata);
+       netif_tx_start_all_queues(netdev);
+
+       return 0;
+
+err_napi:
+       xlgmac_napi_disable(pdata, 1);
+       hw_ops->exit(pdata);
+
+       return ret;
+}
+
+static void xlgmac_stop(struct xlgmac_pdata *pdata)
+{
+       struct xlgmac_hw_ops *hw_ops = &pdata->hw_ops;
+       struct net_device *netdev = pdata->netdev;
+       struct xlgmac_channel *channel;
+       struct netdev_queue *txq;
+       unsigned int i;
+
+       netif_tx_stop_all_queues(netdev);
+       xlgmac_stop_timers(pdata);
+       hw_ops->disable_tx(pdata);
+       hw_ops->disable_rx(pdata);
+       xlgmac_free_irqs(pdata);
+       xlgmac_napi_disable(pdata, 1);
+       hw_ops->exit(pdata);
+
+       channel = pdata->channel_head;
+       for (i = 0; i < pdata->channel_count; i++, channel++) {
+               if (!channel->tx_ring)
+                       continue;
+
+               txq = netdev_get_tx_queue(netdev, channel->queue_index);
+               netdev_tx_reset_queue(txq);
+       }
+}
+
+static void xlgmac_restart_dev(struct xlgmac_pdata *pdata)
+{
+       /* If not running, "restart" will happen on open */
+       if (!netif_running(pdata->netdev))
+               return;
+
+       xlgmac_stop(pdata);
+
+       xlgmac_free_tx_data(pdata);
+       xlgmac_free_rx_data(pdata);
+
+       xlgmac_start(pdata);
+}
+
+static void xlgmac_restart(struct work_struct *work)
+{
+       struct xlgmac_pdata *pdata = container_of(work,
+                                                  struct xlgmac_pdata,
+                                                  restart_work);
+
+       rtnl_lock();
+
+       xlgmac_restart_dev(pdata);
+
+       rtnl_unlock();
+}
+
+static int xlgmac_open(struct net_device *netdev)
+{
+       struct xlgmac_pdata *pdata = netdev_priv(netdev);
+       struct xlgmac_desc_ops *desc_ops;
+       int ret;
+
+       desc_ops = &pdata->desc_ops;
+
+       /* TODO: Initialize the phy */
+
+       /* Calculate the Rx buffer size before allocating rings */
+       ret = xlgmac_calc_rx_buf_size(netdev, netdev->mtu);
+       if (ret < 0)
+               return ret;
+       pdata->rx_buf_size = ret;
+
+       /* Allocate the channels and rings */
+       ret = desc_ops->alloc_channles_and_rings(pdata);
+       if (ret)
+               return ret;
+
+       INIT_WORK(&pdata->restart_work, xlgmac_restart);
+       xlgmac_init_timers(pdata);
+
+       ret = xlgmac_start(pdata);
+       if (ret)
+               goto err_channels_and_rings;
+
+       return 0;
+
+err_channels_and_rings:
+       desc_ops->free_channels_and_rings(pdata);
+
+       return ret;
+}
+
+static int xlgmac_close(struct net_device *netdev)
+{
+       struct xlgmac_pdata *pdata = netdev_priv(netdev);
+       struct xlgmac_desc_ops *desc_ops;
+
+       desc_ops = &pdata->desc_ops;
+
+       /* Stop the device */
+       xlgmac_stop(pdata);
+
+       /* Free the channels and rings */
+       desc_ops->free_channels_and_rings(pdata);
+
+       return 0;
+}
+
+static void xlgmac_tx_timeout(struct net_device *netdev)
+{
+       struct xlgmac_pdata *pdata = netdev_priv(netdev);
+
+       netdev_warn(netdev, "tx timeout, device restarting\n");
+       schedule_work(&pdata->restart_work);
+}
+
+static int xlgmac_xmit(struct sk_buff *skb, struct net_device *netdev)
+{
+       struct xlgmac_pdata *pdata = netdev_priv(netdev);
+       struct xlgmac_pkt_info *tx_pkt_info;
+       struct xlgmac_desc_ops *desc_ops;
+       struct xlgmac_channel *channel;
+       struct xlgmac_hw_ops *hw_ops;
+       struct netdev_queue *txq;
+       struct xlgmac_ring *ring;
+       int ret;
+
+       desc_ops = &pdata->desc_ops;
+       hw_ops = &pdata->hw_ops;
+
+       XLGMAC_PR("skb->len = %d\n", skb->len);
+
+       channel = pdata->channel_head + skb->queue_mapping;
+       txq = netdev_get_tx_queue(netdev, channel->queue_index);
+       ring = channel->tx_ring;
+       tx_pkt_info = &ring->pkt_info;
+
+       if (skb->len == 0) {
+               netif_err(pdata, tx_err, netdev,
+                         "empty skb received from stack\n");
+               dev_kfree_skb_any(skb);
+               return NETDEV_TX_OK;
+       }
+
+       /* Prepare preliminary packet info for TX */
+       memset(tx_pkt_info, 0, sizeof(*tx_pkt_info));
+       xlgmac_prep_tx_pkt(pdata, ring, skb, tx_pkt_info);
+
+       /* Check that there are enough descriptors available */
+       ret = xlgmac_maybe_stop_tx_queue(channel, ring,
+                                        tx_pkt_info->desc_count);
+       if (ret)
+               return ret;
+
+       ret = xlgmac_prep_tso(skb, tx_pkt_info);
+       if (ret) {
+               netif_err(pdata, tx_err, netdev,
+                         "error processing TSO packet\n");
+               dev_kfree_skb_any(skb);
+               return ret;
+       }
+       xlgmac_prep_vlan(skb, tx_pkt_info);
+
+       if (!desc_ops->map_tx_skb(channel, skb)) {
+               dev_kfree_skb_any(skb);
+               return NETDEV_TX_OK;
+       }
+
+       /* Report on the actual number of bytes (to be) sent */
+       netdev_tx_sent_queue(txq, tx_pkt_info->tx_bytes);
+
+       /* Configure required descriptor fields for transmission */
+       hw_ops->dev_xmit(channel);
+
+       if (netif_msg_pktdata(pdata))
+               xlgmac_print_pkt(netdev, skb, true);
+
+       /* Stop the queue in advance if there may not be enough descriptors */
+       xlgmac_maybe_stop_tx_queue(channel, ring, XLGMAC_TX_MAX_DESC_NR);
+
+       return NETDEV_TX_OK;
+}
+
+static void xlgmac_get_stats64(struct net_device *netdev,
+                              struct rtnl_link_stats64 *s)
+{
+       struct xlgmac_pdata *pdata = netdev_priv(netdev);
+       struct xlgmac_stats *pstats = &pdata->stats;
+
+       pdata->hw_ops.read_mmc_stats(pdata);
+
+       s->rx_packets = pstats->rxframecount_gb;
+       s->rx_bytes = pstats->rxoctetcount_gb;
+       s->rx_errors = pstats->rxframecount_gb -
+                      pstats->rxbroadcastframes_g -
+                      pstats->rxmulticastframes_g -
+                      pstats->rxunicastframes_g;
+       s->multicast = pstats->rxmulticastframes_g;
+       s->rx_length_errors = pstats->rxlengtherror;
+       s->rx_crc_errors = pstats->rxcrcerror;
+       s->rx_fifo_errors = pstats->rxfifooverflow;
+
+       s->tx_packets = pstats->txframecount_gb;
+       s->tx_bytes = pstats->txoctetcount_gb;
+       s->tx_errors = pstats->txframecount_gb - pstats->txframecount_g;
+       s->tx_dropped = netdev->stats.tx_dropped;
+}
+
+static int xlgmac_set_mac_address(struct net_device *netdev, void *addr)
+{
+       struct xlgmac_pdata *pdata = netdev_priv(netdev);
+       struct xlgmac_hw_ops *hw_ops = &pdata->hw_ops;
+       struct sockaddr *saddr = addr;
+
+       if (!is_valid_ether_addr(saddr->sa_data))
+               return -EADDRNOTAVAIL;
+
+       memcpy(netdev->dev_addr, saddr->sa_data, netdev->addr_len);
+
+       hw_ops->set_mac_address(pdata, netdev->dev_addr);
+
+       return 0;
+}
+
+static int xlgmac_ioctl(struct net_device *netdev,
+                       struct ifreq *ifreq, int cmd)
+{
+       if (!netif_running(netdev))
+               return -ENODEV;
+
+       return 0;
+}
+
+static int xlgmac_change_mtu(struct net_device *netdev, int mtu)
+{
+       struct xlgmac_pdata *pdata = netdev_priv(netdev);
+       int ret;
+
+       ret = xlgmac_calc_rx_buf_size(netdev, mtu);
+       if (ret < 0)
+               return ret;
+
+       pdata->rx_buf_size = ret;
+       netdev->mtu = mtu;
+
+       xlgmac_restart_dev(pdata);
+
+       return 0;
+}
+
+static int xlgmac_vlan_rx_add_vid(struct net_device *netdev,
+                                 __be16 proto,
+                                 u16 vid)
+{
+       struct xlgmac_pdata *pdata = netdev_priv(netdev);
+       struct xlgmac_hw_ops *hw_ops = &pdata->hw_ops;
+
+       set_bit(vid, pdata->active_vlans);
+       hw_ops->update_vlan_hash_table(pdata);
+
+       return 0;
+}
+
+static int xlgmac_vlan_rx_kill_vid(struct net_device *netdev,
+                                  __be16 proto,
+                                  u16 vid)
+{
+       struct xlgmac_pdata *pdata = netdev_priv(netdev);
+       struct xlgmac_hw_ops *hw_ops = &pdata->hw_ops;
+
+       clear_bit(vid, pdata->active_vlans);
+       hw_ops->update_vlan_hash_table(pdata);
+
+       return 0;
+}
+
+#ifdef CONFIG_NET_POLL_CONTROLLER
+static void xlgmac_poll_controller(struct net_device *netdev)
+{
+       struct xlgmac_pdata *pdata = netdev_priv(netdev);
+       struct xlgmac_channel *channel;
+       unsigned int i;
+
+       if (pdata->per_channel_irq) {
+               channel = pdata->channel_head;
+               for (i = 0; i < pdata->channel_count; i++, channel++)
+                       xlgmac_dma_isr(channel->dma_irq, channel);
+       } else {
+               disable_irq(pdata->dev_irq);
+               xlgmac_isr(pdata->dev_irq, pdata);
+               enable_irq(pdata->dev_irq);
+       }
+}
+#endif /* CONFIG_NET_POLL_CONTROLLER */
+
+static int xlgmac_set_features(struct net_device *netdev,
+                              netdev_features_t features)
+{
+       netdev_features_t rxhash, rxcsum, rxvlan, rxvlan_filter;
+       struct xlgmac_pdata *pdata = netdev_priv(netdev);
+       struct xlgmac_hw_ops *hw_ops = &pdata->hw_ops;
+       int ret = 0;
+
+       rxhash = pdata->netdev_features & NETIF_F_RXHASH;
+       rxcsum = pdata->netdev_features & NETIF_F_RXCSUM;
+       rxvlan = pdata->netdev_features & NETIF_F_HW_VLAN_CTAG_RX;
+       rxvlan_filter = pdata->netdev_features & NETIF_F_HW_VLAN_CTAG_FILTER;
+
+       if ((features & NETIF_F_RXHASH) && !rxhash)
+               ret = hw_ops->enable_rss(pdata);
+       else if (!(features & NETIF_F_RXHASH) && rxhash)
+               ret = hw_ops->disable_rss(pdata);
+       if (ret)
+               return ret;
+
+       if ((features & NETIF_F_RXCSUM) && !rxcsum)
+               hw_ops->enable_rx_csum(pdata);
+       else if (!(features & NETIF_F_RXCSUM) && rxcsum)
+               hw_ops->disable_rx_csum(pdata);
+
+       if ((features & NETIF_F_HW_VLAN_CTAG_RX) && !rxvlan)
+               hw_ops->enable_rx_vlan_stripping(pdata);
+       else if (!(features & NETIF_F_HW_VLAN_CTAG_RX) && rxvlan)
+               hw_ops->disable_rx_vlan_stripping(pdata);
+
+       if ((features & NETIF_F_HW_VLAN_CTAG_FILTER) && !rxvlan_filter)
+               hw_ops->enable_rx_vlan_filtering(pdata);
+       else if (!(features & NETIF_F_HW_VLAN_CTAG_FILTER) && rxvlan_filter)
+               hw_ops->disable_rx_vlan_filtering(pdata);
+
+       pdata->netdev_features = features;
+
+       return 0;
+}
+
+static void xlgmac_set_rx_mode(struct net_device *netdev)
+{
+       struct xlgmac_pdata *pdata = netdev_priv(netdev);
+       struct xlgmac_hw_ops *hw_ops = &pdata->hw_ops;
+
+       hw_ops->config_rx_mode(pdata);
+}
+
+static const struct net_device_ops xlgmac_netdev_ops = {
+       .ndo_open               = xlgmac_open,
+       .ndo_stop               = xlgmac_close,
+       .ndo_start_xmit         = xlgmac_xmit,
+       .ndo_tx_timeout         = xlgmac_tx_timeout,
+       .ndo_get_stats64        = xlgmac_get_stats64,
+       .ndo_change_mtu         = xlgmac_change_mtu,
+       .ndo_set_mac_address    = xlgmac_set_mac_address,
+       .ndo_validate_addr      = eth_validate_addr,
+       .ndo_do_ioctl           = xlgmac_ioctl,
+       .ndo_vlan_rx_add_vid    = xlgmac_vlan_rx_add_vid,
+       .ndo_vlan_rx_kill_vid   = xlgmac_vlan_rx_kill_vid,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+       .ndo_poll_controller    = xlgmac_poll_controller,
+#endif
+       .ndo_set_features       = xlgmac_set_features,
+       .ndo_set_rx_mode        = xlgmac_set_rx_mode,
+};
+
+const struct net_device_ops *xlgmac_get_netdev_ops(void)
+{
+       return &xlgmac_netdev_ops;
+}
+
+static void xlgmac_rx_refresh(struct xlgmac_channel *channel)
+{
+       struct xlgmac_pdata *pdata = channel->pdata;
+       struct xlgmac_ring *ring = channel->rx_ring;
+       struct xlgmac_desc_data *desc_data;
+       struct xlgmac_desc_ops *desc_ops;
+       struct xlgmac_hw_ops *hw_ops;
+
+       desc_ops = &pdata->desc_ops;
+       hw_ops = &pdata->hw_ops;
+
+       while (ring->dirty != ring->cur) {
+               desc_data = XLGMAC_GET_DESC_DATA(ring, ring->dirty);
+
+               /* Reset desc_data values */
+               desc_ops->unmap_desc_data(pdata, desc_data);
+
+               if (desc_ops->map_rx_buffer(pdata, ring, desc_data))
+                       break;
+
+               hw_ops->rx_desc_reset(pdata, desc_data, ring->dirty);
+
+               ring->dirty++;
+       }
+
+       /* Make sure everything is written before the register write */
+       wmb();
+
+       /* Update the Rx Tail Pointer Register with address of
+        * the last cleaned entry
+        */
+       desc_data = XLGMAC_GET_DESC_DATA(ring, ring->dirty - 1);
+       writel(lower_32_bits(desc_data->dma_desc_addr),
+              XLGMAC_DMA_REG(channel, DMA_CH_RDTR_LO));
+}
+
+static struct sk_buff *xlgmac_create_skb(struct xlgmac_pdata *pdata,
+                                        struct napi_struct *napi,
+                                        struct xlgmac_desc_data *desc_data,
+                                        unsigned int len)
+{
+       unsigned int copy_len;
+       struct sk_buff *skb;
+       u8 *packet;
+
+       skb = napi_alloc_skb(napi, desc_data->rx.hdr.dma_len);
+       if (!skb)
+               return NULL;
+
+       /* Start with the header buffer which may contain just the header
+        * or the header plus data
+        */
+       dma_sync_single_range_for_cpu(pdata->dev, desc_data->rx.hdr.dma_base,
+                                     desc_data->rx.hdr.dma_off,
+                                     desc_data->rx.hdr.dma_len,
+                                     DMA_FROM_DEVICE);
+
+       packet = page_address(desc_data->rx.hdr.pa.pages) +
+                desc_data->rx.hdr.pa.pages_offset;
+       copy_len = (desc_data->rx.hdr_len) ? desc_data->rx.hdr_len : len;
+       copy_len = min(desc_data->rx.hdr.dma_len, copy_len);
+       skb_copy_to_linear_data(skb, packet, copy_len);
+       skb_put(skb, copy_len);
+
+       len -= copy_len;
+       if (len) {
+               /* Add the remaining data as a frag */
+               dma_sync_single_range_for_cpu(pdata->dev,
+                                             desc_data->rx.buf.dma_base,
+                                             desc_data->rx.buf.dma_off,
+                                             desc_data->rx.buf.dma_len,
+                                             DMA_FROM_DEVICE);
+
+               skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
+                               desc_data->rx.buf.pa.pages,
+                               desc_data->rx.buf.pa.pages_offset,
+                               len, desc_data->rx.buf.dma_len);
+               desc_data->rx.buf.pa.pages = NULL;
+       }
+
+       return skb;
+}
+
+static int xlgmac_tx_poll(struct xlgmac_channel *channel)
+{
+       struct xlgmac_pdata *pdata = channel->pdata;
+       struct xlgmac_ring *ring = channel->tx_ring;
+       struct net_device *netdev = pdata->netdev;
+       unsigned int tx_packets = 0, tx_bytes = 0;
+       struct xlgmac_desc_data *desc_data;
+       struct xlgmac_dma_desc *dma_desc;
+       struct xlgmac_desc_ops *desc_ops;
+       struct xlgmac_hw_ops *hw_ops;
+       struct netdev_queue *txq;
+       int processed = 0;
+       unsigned int cur;
+
+       desc_ops = &pdata->desc_ops;
+       hw_ops = &pdata->hw_ops;
+
+       /* Nothing to do if there isn't a Tx ring for this channel */
+       if (!ring)
+               return 0;
+
+       cur = ring->cur;
+
+       /* Be sure we get ring->cur before accessing descriptor data */
+       smp_rmb();
+
+       txq = netdev_get_tx_queue(netdev, channel->queue_index);
+
+       while ((processed < XLGMAC_TX_DESC_MAX_PROC) &&
+              (ring->dirty != cur)) {
+               desc_data = XLGMAC_GET_DESC_DATA(ring, ring->dirty);
+               dma_desc = desc_data->dma_desc;
+
+               if (!hw_ops->tx_complete(dma_desc))
+                       break;
+
+               /* Make sure descriptor fields are read after reading
+                * the OWN bit
+                */
+               dma_rmb();
+
+               if (netif_msg_tx_done(pdata))
+                       xlgmac_dump_tx_desc(pdata, ring, ring->dirty, 1, 0);
+
+               if (hw_ops->is_last_desc(dma_desc)) {
+                       tx_packets += desc_data->tx.packets;
+                       tx_bytes += desc_data->tx.bytes;
+               }
+
+               /* Free the SKB and reset the descriptor for re-use */
+               desc_ops->unmap_desc_data(pdata, desc_data);
+               hw_ops->tx_desc_reset(desc_data);
+
+               processed++;
+               ring->dirty++;
+       }
+
+       if (!processed)
+               return 0;
+
+       netdev_tx_completed_queue(txq, tx_packets, tx_bytes);
+
+       if ((ring->tx.queue_stopped == 1) &&
+           (xlgmac_tx_avail_desc(ring) > XLGMAC_TX_DESC_MIN_FREE)) {
+               ring->tx.queue_stopped = 0;
+               netif_tx_wake_queue(txq);
+       }
+
+       XLGMAC_PR("processed=%d\n", processed);
+
+       return processed;
+}
+
+static int xlgmac_rx_poll(struct xlgmac_channel *channel, int budget)
+{
+       struct xlgmac_pdata *pdata = channel->pdata;
+       struct xlgmac_ring *ring = channel->rx_ring;
+       struct net_device *netdev = pdata->netdev;
+       unsigned int len, dma_desc_len, max_len;
+       unsigned int context_next, context;
+       struct xlgmac_desc_data *desc_data;
+       struct xlgmac_pkt_info *pkt_info;
+       unsigned int incomplete, error;
+       struct xlgmac_hw_ops *hw_ops;
+       unsigned int received = 0;
+       struct napi_struct *napi;
+       struct sk_buff *skb;
+       int packet_count = 0;
+
+       hw_ops = &pdata->hw_ops;
+
+       /* Nothing to do if there isn't a Rx ring for this channel */
+       if (!ring)
+               return 0;
+
+       incomplete = 0;
+       context_next = 0;
+
+       napi = (pdata->per_channel_irq) ? &channel->napi : &pdata->napi;
+
+       desc_data = XLGMAC_GET_DESC_DATA(ring, ring->cur);
+       pkt_info = &ring->pkt_info;
+       while (packet_count < budget) {
+               /* First time in loop see if we need to restore state */
+               if (!received && desc_data->state_saved) {
+                       skb = desc_data->state.skb;
+                       error = desc_data->state.error;
+                       len = desc_data->state.len;
+               } else {
+                       memset(pkt_info, 0, sizeof(*pkt_info));
+                       skb = NULL;
+                       error = 0;
+                       len = 0;
+               }
+
+read_again:
+               desc_data = XLGMAC_GET_DESC_DATA(ring, ring->cur);
+
+               if (xlgmac_rx_dirty_desc(ring) > XLGMAC_RX_DESC_MAX_DIRTY)
+                       xlgmac_rx_refresh(channel);
+
+               if (hw_ops->dev_read(channel))
+                       break;
+
+               received++;
+               ring->cur++;
+
+               incomplete = XLGMAC_GET_REG_BITS(
+                                       pkt_info->attributes,
+                                       RX_PACKET_ATTRIBUTES_INCOMPLETE_POS,
+                                       RX_PACKET_ATTRIBUTES_INCOMPLETE_LEN);
+               context_next = XLGMAC_GET_REG_BITS(
+                                       pkt_info->attributes,
+                                       RX_PACKET_ATTRIBUTES_CONTEXT_NEXT_POS,
+                                       RX_PACKET_ATTRIBUTES_CONTEXT_NEXT_LEN);
+               context = XLGMAC_GET_REG_BITS(
+                                       pkt_info->attributes,
+                                       RX_PACKET_ATTRIBUTES_CONTEXT_POS,
+                                       RX_PACKET_ATTRIBUTES_CONTEXT_LEN);
+
+               /* Earlier error, just drain the remaining data */
+               if ((incomplete || context_next) && error)
+                       goto read_again;
+
+               if (error || pkt_info->errors) {
+                       if (pkt_info->errors)
+                               netif_err(pdata, rx_err, netdev,
+                                         "error in received packet\n");
+                       dev_kfree_skb(skb);
+                       goto next_packet;
+               }
+
+               if (!context) {
+                       /* Length is cumulative, get this descriptor's length */
+                       dma_desc_len = desc_data->rx.len - len;
+                       len += dma_desc_len;
+
+                       if (dma_desc_len && !skb) {
+                               skb = xlgmac_create_skb(pdata, napi, desc_data,
+                                                       dma_desc_len);
+                               if (!skb)
+                                       error = 1;
+                       } else if (dma_desc_len) {
+                               dma_sync_single_range_for_cpu(
+                                               pdata->dev,
+                                               desc_data->rx.buf.dma_base,
+                                               desc_data->rx.buf.dma_off,
+                                               desc_data->rx.buf.dma_len,
+                                               DMA_FROM_DEVICE);
+
+                               skb_add_rx_frag(
+                                       skb, skb_shinfo(skb)->nr_frags,
+                                       desc_data->rx.buf.pa.pages,
+                                       desc_data->rx.buf.pa.pages_offset,
+                                       dma_desc_len,
+                                       desc_data->rx.buf.dma_len);
+                               desc_data->rx.buf.pa.pages = NULL;
+                       }
+               }
+
+               if (incomplete || context_next)
+                       goto read_again;
+
+               if (!skb)
+                       goto next_packet;
+
+               /* Be sure we don't exceed the configured MTU */
+               max_len = netdev->mtu + ETH_HLEN;
+               if (!(netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
+                   (skb->protocol == htons(ETH_P_8021Q)))
+                       max_len += VLAN_HLEN;
+
+               if (skb->len > max_len) {
+                       netif_err(pdata, rx_err, netdev,
+                                 "packet length exceeds configured MTU\n");
+                       dev_kfree_skb(skb);
+                       goto next_packet;
+               }
+
+               if (netif_msg_pktdata(pdata))
+                       xlgmac_print_pkt(netdev, skb, false);
+
+               skb_checksum_none_assert(skb);
+               if (XLGMAC_GET_REG_BITS(pkt_info->attributes,
+                                       RX_PACKET_ATTRIBUTES_CSUM_DONE_POS,
+                                   RX_PACKET_ATTRIBUTES_CSUM_DONE_LEN))
+                       skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+               if (XLGMAC_GET_REG_BITS(pkt_info->attributes,
+                                       RX_PACKET_ATTRIBUTES_VLAN_CTAG_POS,
+                                   RX_PACKET_ATTRIBUTES_VLAN_CTAG_LEN))
+                       __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
+                                              pkt_info->vlan_ctag);
+
+               if (XLGMAC_GET_REG_BITS(pkt_info->attributes,
+                                       RX_PACKET_ATTRIBUTES_RSS_HASH_POS,
+                                   RX_PACKET_ATTRIBUTES_RSS_HASH_LEN))
+                       skb_set_hash(skb, pkt_info->rss_hash,
+                                    pkt_info->rss_hash_type);
+
+               skb->dev = netdev;
+               skb->protocol = eth_type_trans(skb, netdev);
+               skb_record_rx_queue(skb, channel->queue_index);
+
+               napi_gro_receive(napi, skb);
+
+next_packet:
+               packet_count++;
+       }
+
+       /* Check if we need to save state before leaving */
+       if (received && (incomplete || context_next)) {
+               desc_data = XLGMAC_GET_DESC_DATA(ring, ring->cur);
+               desc_data->state_saved = 1;
+               desc_data->state.skb = skb;
+               desc_data->state.len = len;
+               desc_data->state.error = error;
+       }
+
+       XLGMAC_PR("packet_count = %d\n", packet_count);
+
+       return packet_count;
+}
+
+static int xlgmac_one_poll(struct napi_struct *napi, int budget)
+{
+       struct xlgmac_channel *channel = container_of(napi,
+                                               struct xlgmac_channel,
+                                               napi);
+       int processed = 0;
+
+       XLGMAC_PR("budget=%d\n", budget);
+
+       /* Cleanup Tx ring first */
+       xlgmac_tx_poll(channel);
+
+       /* Process Rx ring next */
+       processed = xlgmac_rx_poll(channel, budget);
+
+       /* If we processed everything, we are done */
+       if (processed < budget) {
+               /* Turn off polling */
+               napi_complete_done(napi, processed);
+
+               /* Enable Tx and Rx interrupts */
+               enable_irq(channel->dma_irq);
+       }
+
+       XLGMAC_PR("received = %d\n", processed);
+
+       return processed;
+}
+
+static int xlgmac_all_poll(struct napi_struct *napi, int budget)
+{
+       struct xlgmac_pdata *pdata = container_of(napi,
+                                                  struct xlgmac_pdata,
+                                                  napi);
+       struct xlgmac_channel *channel;
+       int processed, last_processed;
+       int ring_budget;
+       unsigned int i;
+
+       XLGMAC_PR("budget=%d\n", budget);
+
+       processed = 0;
+       ring_budget = budget / pdata->rx_ring_count;
+       do {
+               last_processed = processed;
+
+               channel = pdata->channel_head;
+               for (i = 0; i < pdata->channel_count; i++, channel++) {
+                       /* Cleanup Tx ring first */
+                       xlgmac_tx_poll(channel);
+
+                       /* Process Rx ring next */
+                       if (ring_budget > (budget - processed))
+                               ring_budget = budget - processed;
+                       processed += xlgmac_rx_poll(channel, ring_budget);
+               }
+       } while ((processed < budget) && (processed != last_processed));
+
+       /* If we processed everything, we are done */
+       if (processed < budget) {
+               /* Turn off polling */
+               napi_complete_done(napi, processed);
+
+               /* Enable Tx and Rx interrupts */
+               xlgmac_enable_rx_tx_ints(pdata);
+       }
+
+       XLGMAC_PR("received = %d\n", processed);
+
+       return processed;
+}
diff --git a/drivers/net/ethernet/synopsys/dwc-xlgmac-pci.c b/drivers/net/ethernet/synopsys/dwc-xlgmac-pci.c
new file mode 100644 (file)
index 0000000..386bafe
--- /dev/null
@@ -0,0 +1,78 @@
+/* Synopsys DesignWare Core Enterprise Ethernet (XLGMAC) Driver
+ *
+ * Copyright (c) 2017 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is dual-licensed; you may select either version 2 of
+ * the GNU General Public License ("GPL") or BSD license ("BSD").
+ *
+ * This Synopsys DWC XLGMAC software driver and associated documentation
+ * (hereinafter the "Software") is an unsupported proprietary work of
+ * Synopsys, Inc. unless otherwise expressly agreed to in writing between
+ * Synopsys and you. The Software IS NOT an item of Licensed Software or a
+ * Licensed Product under any End User Software License Agreement or
+ * Agreement for Licensed Products with Synopsys or any supplement thereto.
+ * Synopsys is a registered trademark of Synopsys, Inc. Other names included
+ * in the SOFTWARE may be the trademarks of their respective owners.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "dwc-xlgmac.h"
+#include "dwc-xlgmac-reg.h"
+
+static int xlgmac_probe(struct pci_dev *pcidev, const struct pci_device_id *id)
+{
+       struct device *dev = &pcidev->dev;
+       struct xlgmac_resources res;
+       int i, ret;
+
+       ret = pcim_enable_device(pcidev);
+       if (ret) {
+               dev_err(dev, "ERROR: failed to enable device\n");
+               return ret;
+       }
+
+       for (i = 0; i <= PCI_STD_RESOURCE_END; i++) {
+               if (pci_resource_len(pcidev, i) == 0)
+                       continue;
+               ret = pcim_iomap_regions(pcidev, BIT(i), XLGMAC_DRV_NAME);
+               if (ret)
+                       return ret;
+               break;
+       }
+
+       pci_set_master(pcidev);
+
+       memset(&res, 0, sizeof(res));
+       res.irq = pcidev->irq;
+       res.addr = pcim_iomap_table(pcidev)[i];
+
+       return xlgmac_drv_probe(&pcidev->dev, &res);
+}
+
+static void xlgmac_remove(struct pci_dev *pcidev)
+{
+       xlgmac_drv_remove(&pcidev->dev);
+}
+
+static const struct pci_device_id xlgmac_pci_tbl[] = {
+       { PCI_DEVICE(PCI_VENDOR_ID_SYNOPSYS, 0x7302) },
+       { 0 }
+};
+MODULE_DEVICE_TABLE(pci, xlgmac_pci_tbl);
+
+static struct pci_driver xlgmac_pci_driver = {
+       .name           = XLGMAC_DRV_NAME,
+       .id_table       = xlgmac_pci_tbl,
+       .probe          = xlgmac_probe,
+       .remove         = xlgmac_remove,
+};
+
+module_pci_driver(xlgmac_pci_driver);
+
+MODULE_DESCRIPTION(XLGMAC_DRV_DESC);
+MODULE_VERSION(XLGMAC_DRV_VERSION);
+MODULE_AUTHOR("Jie Deng <jiedeng@synopsys.com>");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/ethernet/synopsys/dwc-xlgmac-reg.h b/drivers/net/ethernet/synopsys/dwc-xlgmac-reg.h
new file mode 100644 (file)
index 0000000..3754f22
--- /dev/null
@@ -0,0 +1,744 @@
+/* Synopsys DesignWare Core Enterprise Ethernet (XLGMAC) Driver
+ *
+ * Copyright (c) 2017 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is dual-licensed; you may select either version 2 of
+ * the GNU General Public License ("GPL") or BSD license ("BSD").
+ *
+ * This Synopsys DWC XLGMAC software driver and associated documentation
+ * (hereinafter the "Software") is an unsupported proprietary work of
+ * Synopsys, Inc. unless otherwise expressly agreed to in writing between
+ * Synopsys and you. The Software IS NOT an item of Licensed Software or a
+ * Licensed Product under any End User Software License Agreement or
+ * Agreement for Licensed Products with Synopsys or any supplement thereto.
+ * Synopsys is a registered trademark of Synopsys, Inc. Other names included
+ * in the SOFTWARE may be the trademarks of their respective owners.
+ */
+
+#ifndef __DWC_XLGMAC_REG_H__
+#define __DWC_XLGMAC_REG_H__
+
+/* MAC register offsets */
+#define MAC_TCR                                0x0000
+#define MAC_RCR                                0x0004
+#define MAC_PFR                                0x0008
+#define MAC_HTR0                       0x0010
+#define MAC_VLANTR                     0x0050
+#define MAC_VLANHTR                    0x0058
+#define MAC_VLANIR                     0x0060
+#define MAC_Q0TFCR                     0x0070
+#define MAC_RFCR                       0x0090
+#define MAC_RQC0R                      0x00a0
+#define MAC_RQC1R                      0x00a4
+#define MAC_RQC2R                      0x00a8
+#define MAC_RQC3R                      0x00ac
+#define MAC_ISR                                0x00b0
+#define MAC_IER                                0x00b4
+#define MAC_VR                         0x0110
+#define MAC_HWF0R                      0x011c
+#define MAC_HWF1R                      0x0120
+#define MAC_HWF2R                      0x0124
+#define MAC_MACA0HR                    0x0300
+#define MAC_MACA0LR                    0x0304
+#define MAC_MACA1HR                    0x0308
+#define MAC_MACA1LR                    0x030c
+#define MAC_RSSCR                      0x0c80
+#define MAC_RSSAR                      0x0c88
+#define MAC_RSSDR                      0x0c8c
+
+#define MAC_QTFCR_INC                  4
+#define MAC_MACA_INC                   4
+#define MAC_HTR_INC                    4
+#define MAC_RQC2_INC                   4
+#define MAC_RQC2_Q_PER_REG             4
+
+/* MAC register entry bit positions and sizes */
+#define MAC_HWF0R_ADDMACADRSEL_POS     18
+#define MAC_HWF0R_ADDMACADRSEL_LEN     5
+#define MAC_HWF0R_ARPOFFSEL_POS                9
+#define MAC_HWF0R_ARPOFFSEL_LEN                1
+#define MAC_HWF0R_EEESEL_POS           13
+#define MAC_HWF0R_EEESEL_LEN           1
+#define MAC_HWF0R_PHYIFSEL_POS         1
+#define MAC_HWF0R_PHYIFSEL_LEN         2
+#define MAC_HWF0R_MGKSEL_POS           7
+#define MAC_HWF0R_MGKSEL_LEN           1
+#define MAC_HWF0R_MMCSEL_POS           8
+#define MAC_HWF0R_MMCSEL_LEN           1
+#define MAC_HWF0R_RWKSEL_POS           6
+#define MAC_HWF0R_RWKSEL_LEN           1
+#define MAC_HWF0R_RXCOESEL_POS         16
+#define MAC_HWF0R_RXCOESEL_LEN         1
+#define MAC_HWF0R_SAVLANINS_POS                27
+#define MAC_HWF0R_SAVLANINS_LEN                1
+#define MAC_HWF0R_SMASEL_POS           5
+#define MAC_HWF0R_SMASEL_LEN           1
+#define MAC_HWF0R_TSSEL_POS            12
+#define MAC_HWF0R_TSSEL_LEN            1
+#define MAC_HWF0R_TSSTSSEL_POS         25
+#define MAC_HWF0R_TSSTSSEL_LEN         2
+#define MAC_HWF0R_TXCOESEL_POS         14
+#define MAC_HWF0R_TXCOESEL_LEN         1
+#define MAC_HWF0R_VLHASH_POS           4
+#define MAC_HWF0R_VLHASH_LEN           1
+#define MAC_HWF1R_ADDR64_POS           14
+#define MAC_HWF1R_ADDR64_LEN           2
+#define MAC_HWF1R_ADVTHWORD_POS                13
+#define MAC_HWF1R_ADVTHWORD_LEN                1
+#define MAC_HWF1R_DBGMEMA_POS          19
+#define MAC_HWF1R_DBGMEMA_LEN          1
+#define MAC_HWF1R_DCBEN_POS            16
+#define MAC_HWF1R_DCBEN_LEN            1
+#define MAC_HWF1R_HASHTBLSZ_POS                24
+#define MAC_HWF1R_HASHTBLSZ_LEN                3
+#define MAC_HWF1R_L3L4FNUM_POS         27
+#define MAC_HWF1R_L3L4FNUM_LEN         4
+#define MAC_HWF1R_NUMTC_POS            21
+#define MAC_HWF1R_NUMTC_LEN            3
+#define MAC_HWF1R_RSSEN_POS            20
+#define MAC_HWF1R_RSSEN_LEN            1
+#define MAC_HWF1R_RXFIFOSIZE_POS       0
+#define MAC_HWF1R_RXFIFOSIZE_LEN       5
+#define MAC_HWF1R_SPHEN_POS            17
+#define MAC_HWF1R_SPHEN_LEN            1
+#define MAC_HWF1R_TSOEN_POS            18
+#define MAC_HWF1R_TSOEN_LEN            1
+#define MAC_HWF1R_TXFIFOSIZE_POS       6
+#define MAC_HWF1R_TXFIFOSIZE_LEN       5
+#define MAC_HWF2R_AUXSNAPNUM_POS       28
+#define MAC_HWF2R_AUXSNAPNUM_LEN       3
+#define MAC_HWF2R_PPSOUTNUM_POS                24
+#define MAC_HWF2R_PPSOUTNUM_LEN                3
+#define MAC_HWF2R_RXCHCNT_POS          12
+#define MAC_HWF2R_RXCHCNT_LEN          4
+#define MAC_HWF2R_RXQCNT_POS           0
+#define MAC_HWF2R_RXQCNT_LEN           4
+#define MAC_HWF2R_TXCHCNT_POS          18
+#define MAC_HWF2R_TXCHCNT_LEN          4
+#define MAC_HWF2R_TXQCNT_POS           6
+#define MAC_HWF2R_TXQCNT_LEN           4
+#define MAC_IER_TSIE_POS               12
+#define MAC_IER_TSIE_LEN               1
+#define MAC_ISR_MMCRXIS_POS            9
+#define MAC_ISR_MMCRXIS_LEN            1
+#define MAC_ISR_MMCTXIS_POS            10
+#define MAC_ISR_MMCTXIS_LEN            1
+#define MAC_ISR_PMTIS_POS              4
+#define MAC_ISR_PMTIS_LEN              1
+#define MAC_ISR_TSIS_POS               12
+#define MAC_ISR_TSIS_LEN               1
+#define MAC_MACA1HR_AE_POS             31
+#define MAC_MACA1HR_AE_LEN             1
+#define MAC_PFR_HMC_POS                        2
+#define MAC_PFR_HMC_LEN                        1
+#define MAC_PFR_HPF_POS                        10
+#define MAC_PFR_HPF_LEN                        1
+#define MAC_PFR_HUC_POS                        1
+#define MAC_PFR_HUC_LEN                        1
+#define MAC_PFR_PM_POS                 4
+#define MAC_PFR_PM_LEN                 1
+#define MAC_PFR_PR_POS                 0
+#define MAC_PFR_PR_LEN                 1
+#define MAC_PFR_VTFE_POS               16
+#define MAC_PFR_VTFE_LEN               1
+#define MAC_Q0TFCR_PT_POS              16
+#define MAC_Q0TFCR_PT_LEN              16
+#define MAC_Q0TFCR_TFE_POS             1
+#define MAC_Q0TFCR_TFE_LEN             1
+#define MAC_RCR_ACS_POS                        1
+#define MAC_RCR_ACS_LEN                        1
+#define MAC_RCR_CST_POS                        2
+#define MAC_RCR_CST_LEN                        1
+#define MAC_RCR_DCRCC_POS              3
+#define MAC_RCR_DCRCC_LEN              1
+#define MAC_RCR_HDSMS_POS              12
+#define MAC_RCR_HDSMS_LEN              3
+#define MAC_RCR_IPC_POS                        9
+#define MAC_RCR_IPC_LEN                        1
+#define MAC_RCR_JE_POS                 8
+#define MAC_RCR_JE_LEN                 1
+#define MAC_RCR_LM_POS                 10
+#define MAC_RCR_LM_LEN                 1
+#define MAC_RCR_RE_POS                 0
+#define MAC_RCR_RE_LEN                 1
+#define MAC_RFCR_PFCE_POS              8
+#define MAC_RFCR_PFCE_LEN              1
+#define MAC_RFCR_RFE_POS               0
+#define MAC_RFCR_RFE_LEN               1
+#define MAC_RFCR_UP_POS                        1
+#define MAC_RFCR_UP_LEN                        1
+#define MAC_RQC0R_RXQ0EN_POS           0
+#define MAC_RQC0R_RXQ0EN_LEN           2
+#define MAC_RSSAR_ADDRT_POS            2
+#define MAC_RSSAR_ADDRT_LEN            1
+#define MAC_RSSAR_CT_POS               1
+#define MAC_RSSAR_CT_LEN               1
+#define MAC_RSSAR_OB_POS               0
+#define MAC_RSSAR_OB_LEN               1
+#define MAC_RSSAR_RSSIA_POS            8
+#define MAC_RSSAR_RSSIA_LEN            8
+#define MAC_RSSCR_IP2TE_POS            1
+#define MAC_RSSCR_IP2TE_LEN            1
+#define MAC_RSSCR_RSSE_POS             0
+#define MAC_RSSCR_RSSE_LEN             1
+#define MAC_RSSCR_TCP4TE_POS           2
+#define MAC_RSSCR_TCP4TE_LEN           1
+#define MAC_RSSCR_UDP4TE_POS           3
+#define MAC_RSSCR_UDP4TE_LEN           1
+#define MAC_RSSDR_DMCH_POS             0
+#define MAC_RSSDR_DMCH_LEN             4
+#define MAC_TCR_SS_POS                 28
+#define MAC_TCR_SS_LEN                 3
+#define MAC_TCR_TE_POS                 0
+#define MAC_TCR_TE_LEN                 1
+#define MAC_VLANHTR_VLHT_POS           0
+#define MAC_VLANHTR_VLHT_LEN           16
+#define MAC_VLANIR_VLTI_POS            20
+#define MAC_VLANIR_VLTI_LEN            1
+#define MAC_VLANIR_CSVL_POS            19
+#define MAC_VLANIR_CSVL_LEN            1
+#define MAC_VLANTR_DOVLTC_POS          20
+#define MAC_VLANTR_DOVLTC_LEN          1
+#define MAC_VLANTR_ERSVLM_POS          19
+#define MAC_VLANTR_ERSVLM_LEN          1
+#define MAC_VLANTR_ESVL_POS            18
+#define MAC_VLANTR_ESVL_LEN            1
+#define MAC_VLANTR_ETV_POS             16
+#define MAC_VLANTR_ETV_LEN             1
+#define MAC_VLANTR_EVLS_POS            21
+#define MAC_VLANTR_EVLS_LEN            2
+#define MAC_VLANTR_EVLRXS_POS          24
+#define MAC_VLANTR_EVLRXS_LEN          1
+#define MAC_VLANTR_VL_POS              0
+#define MAC_VLANTR_VL_LEN              16
+#define MAC_VLANTR_VTHM_POS            25
+#define MAC_VLANTR_VTHM_LEN            1
+#define MAC_VLANTR_VTIM_POS            17
+#define MAC_VLANTR_VTIM_LEN            1
+#define MAC_VR_DEVID_POS               8
+#define MAC_VR_DEVID_LEN               8
+#define MAC_VR_SNPSVER_POS             0
+#define MAC_VR_SNPSVER_LEN             8
+#define MAC_VR_USERVER_POS             16
+#define MAC_VR_USERVER_LEN             8
+
+/* MMC register offsets */
+#define MMC_CR                         0x0800
+#define MMC_RISR                       0x0804
+#define MMC_TISR                       0x0808
+#define MMC_RIER                       0x080c
+#define MMC_TIER                       0x0810
+#define MMC_TXOCTETCOUNT_GB_LO         0x0814
+#define MMC_TXFRAMECOUNT_GB_LO         0x081c
+#define MMC_TXBROADCASTFRAMES_G_LO     0x0824
+#define MMC_TXMULTICASTFRAMES_G_LO     0x082c
+#define MMC_TX64OCTETS_GB_LO           0x0834
+#define MMC_TX65TO127OCTETS_GB_LO      0x083c
+#define MMC_TX128TO255OCTETS_GB_LO     0x0844
+#define MMC_TX256TO511OCTETS_GB_LO     0x084c
+#define MMC_TX512TO1023OCTETS_GB_LO    0x0854
+#define MMC_TX1024TOMAXOCTETS_GB_LO    0x085c
+#define MMC_TXUNICASTFRAMES_GB_LO      0x0864
+#define MMC_TXMULTICASTFRAMES_GB_LO    0x086c
+#define MMC_TXBROADCASTFRAMES_GB_LO    0x0874
+#define MMC_TXUNDERFLOWERROR_LO                0x087c
+#define MMC_TXOCTETCOUNT_G_LO          0x0884
+#define MMC_TXFRAMECOUNT_G_LO          0x088c
+#define MMC_TXPAUSEFRAMES_LO           0x0894
+#define MMC_TXVLANFRAMES_G_LO          0x089c
+#define MMC_RXFRAMECOUNT_GB_LO         0x0900
+#define MMC_RXOCTETCOUNT_GB_LO         0x0908
+#define MMC_RXOCTETCOUNT_G_LO          0x0910
+#define MMC_RXBROADCASTFRAMES_G_LO     0x0918
+#define MMC_RXMULTICASTFRAMES_G_LO     0x0920
+#define MMC_RXCRCERROR_LO              0x0928
+#define MMC_RXRUNTERROR                        0x0930
+#define MMC_RXJABBERERROR              0x0934
+#define MMC_RXUNDERSIZE_G              0x0938
+#define MMC_RXOVERSIZE_G               0x093c
+#define MMC_RX64OCTETS_GB_LO           0x0940
+#define MMC_RX65TO127OCTETS_GB_LO      0x0948
+#define MMC_RX128TO255OCTETS_GB_LO     0x0950
+#define MMC_RX256TO511OCTETS_GB_LO     0x0958
+#define MMC_RX512TO1023OCTETS_GB_LO    0x0960
+#define MMC_RX1024TOMAXOCTETS_GB_LO    0x0968
+#define MMC_RXUNICASTFRAMES_G_LO       0x0970
+#define MMC_RXLENGTHERROR_LO           0x0978
+#define MMC_RXOUTOFRANGETYPE_LO                0x0980
+#define MMC_RXPAUSEFRAMES_LO           0x0988
+#define MMC_RXFIFOOVERFLOW_LO          0x0990
+#define MMC_RXVLANFRAMES_GB_LO         0x0998
+#define MMC_RXWATCHDOGERROR            0x09a0
+
+/* MMC register entry bit positions and sizes */
+#define MMC_CR_CR_POS                          0
+#define MMC_CR_CR_LEN                          1
+#define MMC_CR_CSR_POS                         1
+#define MMC_CR_CSR_LEN                         1
+#define MMC_CR_ROR_POS                         2
+#define MMC_CR_ROR_LEN                         1
+#define MMC_CR_MCF_POS                         3
+#define MMC_CR_MCF_LEN                         1
+#define MMC_CR_MCT_POS                         4
+#define MMC_CR_MCT_LEN                         2
+#define MMC_RIER_ALL_INTERRUPTS_POS            0
+#define MMC_RIER_ALL_INTERRUPTS_LEN            23
+#define MMC_RISR_RXFRAMECOUNT_GB_POS           0
+#define MMC_RISR_RXFRAMECOUNT_GB_LEN           1
+#define MMC_RISR_RXOCTETCOUNT_GB_POS           1
+#define MMC_RISR_RXOCTETCOUNT_GB_LEN           1
+#define MMC_RISR_RXOCTETCOUNT_G_POS            2
+#define MMC_RISR_RXOCTETCOUNT_G_LEN            1
+#define MMC_RISR_RXBROADCASTFRAMES_G_POS       3
+#define MMC_RISR_RXBROADCASTFRAMES_G_LEN       1
+#define MMC_RISR_RXMULTICASTFRAMES_G_POS       4
+#define MMC_RISR_RXMULTICASTFRAMES_G_LEN       1
+#define MMC_RISR_RXCRCERROR_POS                        5
+#define MMC_RISR_RXCRCERROR_LEN                        1
+#define MMC_RISR_RXRUNTERROR_POS               6
+#define MMC_RISR_RXRUNTERROR_LEN               1
+#define MMC_RISR_RXJABBERERROR_POS             7
+#define MMC_RISR_RXJABBERERROR_LEN             1
+#define MMC_RISR_RXUNDERSIZE_G_POS             8
+#define MMC_RISR_RXUNDERSIZE_G_LEN             1
+#define MMC_RISR_RXOVERSIZE_G_POS              9
+#define MMC_RISR_RXOVERSIZE_G_LEN              1
+#define MMC_RISR_RX64OCTETS_GB_POS             10
+#define MMC_RISR_RX64OCTETS_GB_LEN             1
+#define MMC_RISR_RX65TO127OCTETS_GB_POS                11
+#define MMC_RISR_RX65TO127OCTETS_GB_LEN                1
+#define MMC_RISR_RX128TO255OCTETS_GB_POS       12
+#define MMC_RISR_RX128TO255OCTETS_GB_LEN       1
+#define MMC_RISR_RX256TO511OCTETS_GB_POS       13
+#define MMC_RISR_RX256TO511OCTETS_GB_LEN       1
+#define MMC_RISR_RX512TO1023OCTETS_GB_POS      14
+#define MMC_RISR_RX512TO1023OCTETS_GB_LEN      1
+#define MMC_RISR_RX1024TOMAXOCTETS_GB_POS      15
+#define MMC_RISR_RX1024TOMAXOCTETS_GB_LEN      1
+#define MMC_RISR_RXUNICASTFRAMES_G_POS         16
+#define MMC_RISR_RXUNICASTFRAMES_G_LEN         1
+#define MMC_RISR_RXLENGTHERROR_POS             17
+#define MMC_RISR_RXLENGTHERROR_LEN             1
+#define MMC_RISR_RXOUTOFRANGETYPE_POS          18
+#define MMC_RISR_RXOUTOFRANGETYPE_LEN          1
+#define MMC_RISR_RXPAUSEFRAMES_POS             19
+#define MMC_RISR_RXPAUSEFRAMES_LEN             1
+#define MMC_RISR_RXFIFOOVERFLOW_POS            20
+#define MMC_RISR_RXFIFOOVERFLOW_LEN            1
+#define MMC_RISR_RXVLANFRAMES_GB_POS           21
+#define MMC_RISR_RXVLANFRAMES_GB_LEN           1
+#define MMC_RISR_RXWATCHDOGERROR_POS           22
+#define MMC_RISR_RXWATCHDOGERROR_LEN           1
+#define MMC_TIER_ALL_INTERRUPTS_POS            0
+#define MMC_TIER_ALL_INTERRUPTS_LEN            18
+#define MMC_TISR_TXOCTETCOUNT_GB_POS           0
+#define MMC_TISR_TXOCTETCOUNT_GB_LEN           1
+#define MMC_TISR_TXFRAMECOUNT_GB_POS           1
+#define MMC_TISR_TXFRAMECOUNT_GB_LEN           1
+#define MMC_TISR_TXBROADCASTFRAMES_G_POS       2
+#define MMC_TISR_TXBROADCASTFRAMES_G_LEN       1
+#define MMC_TISR_TXMULTICASTFRAMES_G_POS       3
+#define MMC_TISR_TXMULTICASTFRAMES_G_LEN       1
+#define MMC_TISR_TX64OCTETS_GB_POS             4
+#define MMC_TISR_TX64OCTETS_GB_LEN             1
+#define MMC_TISR_TX65TO127OCTETS_GB_POS                5
+#define MMC_TISR_TX65TO127OCTETS_GB_LEN                1
+#define MMC_TISR_TX128TO255OCTETS_GB_POS       6
+#define MMC_TISR_TX128TO255OCTETS_GB_LEN       1
+#define MMC_TISR_TX256TO511OCTETS_GB_POS       7
+#define MMC_TISR_TX256TO511OCTETS_GB_LEN       1
+#define MMC_TISR_TX512TO1023OCTETS_GB_POS      8
+#define MMC_TISR_TX512TO1023OCTETS_GB_LEN      1
+#define MMC_TISR_TX1024TOMAXOCTETS_GB_POS      9
+#define MMC_TISR_TX1024TOMAXOCTETS_GB_LEN      1
+#define MMC_TISR_TXUNICASTFRAMES_GB_POS                10
+#define MMC_TISR_TXUNICASTFRAMES_GB_LEN                1
+#define MMC_TISR_TXMULTICASTFRAMES_GB_POS      11
+#define MMC_TISR_TXMULTICASTFRAMES_GB_LEN      1
+#define MMC_TISR_TXBROADCASTFRAMES_GB_POS      12
+#define MMC_TISR_TXBROADCASTFRAMES_GB_LEN      1
+#define MMC_TISR_TXUNDERFLOWERROR_POS          13
+#define MMC_TISR_TXUNDERFLOWERROR_LEN          1
+#define MMC_TISR_TXOCTETCOUNT_G_POS            14
+#define MMC_TISR_TXOCTETCOUNT_G_LEN            1
+#define MMC_TISR_TXFRAMECOUNT_G_POS            15
+#define MMC_TISR_TXFRAMECOUNT_G_LEN            1
+#define MMC_TISR_TXPAUSEFRAMES_POS             16
+#define MMC_TISR_TXPAUSEFRAMES_LEN             1
+#define MMC_TISR_TXVLANFRAMES_G_POS            17
+#define MMC_TISR_TXVLANFRAMES_G_LEN            1
+
+/* MTL register offsets */
+#define MTL_OMR                                0x1000
+#define MTL_FDDR                       0x1010
+#define MTL_RQDCM0R                    0x1030
+
+#define MTL_RQDCM_INC                  4
+#define MTL_RQDCM_Q_PER_REG            4
+
+/* MTL register entry bit positions and sizes */
+#define MTL_OMR_ETSALG_POS             5
+#define MTL_OMR_ETSALG_LEN             2
+#define MTL_OMR_RAA_POS                        2
+#define MTL_OMR_RAA_LEN                        1
+
+/* MTL queue register offsets
+ *   Multiple queues can be active.  The first queue has registers
+ *   that begin at 0x1100.  Each subsequent queue has registers that
+ *   are accessed using an offset of 0x80 from the previous queue.
+ */
+#define MTL_Q_BASE                     0x1100
+#define MTL_Q_INC                      0x80
+
+#define MTL_Q_TQOMR                    0x00
+#define MTL_Q_RQOMR                    0x40
+#define MTL_Q_RQDR                     0x48
+#define MTL_Q_RQFCR                    0x50
+#define MTL_Q_IER                      0x70
+#define MTL_Q_ISR                      0x74
+
+/* MTL queue register entry bit positions and sizes */
+#define MTL_Q_RQDR_PRXQ_POS            16
+#define MTL_Q_RQDR_PRXQ_LEN            14
+#define MTL_Q_RQDR_RXQSTS_POS          4
+#define MTL_Q_RQDR_RXQSTS_LEN          2
+#define MTL_Q_RQFCR_RFA_POS            1
+#define MTL_Q_RQFCR_RFA_LEN            6
+#define MTL_Q_RQFCR_RFD_POS            17
+#define MTL_Q_RQFCR_RFD_LEN            6
+#define MTL_Q_RQOMR_EHFC_POS           7
+#define MTL_Q_RQOMR_EHFC_LEN           1
+#define MTL_Q_RQOMR_RQS_POS            16
+#define MTL_Q_RQOMR_RQS_LEN            9
+#define MTL_Q_RQOMR_RSF_POS            5
+#define MTL_Q_RQOMR_RSF_LEN            1
+#define MTL_Q_RQOMR_FEP_POS            4
+#define MTL_Q_RQOMR_FEP_LEN            1
+#define MTL_Q_RQOMR_FUP_POS            3
+#define MTL_Q_RQOMR_FUP_LEN            1
+#define MTL_Q_RQOMR_RTC_POS            0
+#define MTL_Q_RQOMR_RTC_LEN            2
+#define MTL_Q_TQOMR_FTQ_POS            0
+#define MTL_Q_TQOMR_FTQ_LEN            1
+#define MTL_Q_TQOMR_Q2TCMAP_POS                8
+#define MTL_Q_TQOMR_Q2TCMAP_LEN                3
+#define MTL_Q_TQOMR_TQS_POS            16
+#define MTL_Q_TQOMR_TQS_LEN            10
+#define MTL_Q_TQOMR_TSF_POS            1
+#define MTL_Q_TQOMR_TSF_LEN            1
+#define MTL_Q_TQOMR_TTC_POS            4
+#define MTL_Q_TQOMR_TTC_LEN            3
+#define MTL_Q_TQOMR_TXQEN_POS          2
+#define MTL_Q_TQOMR_TXQEN_LEN          2
+
+/* MTL queue register value */
+#define MTL_RSF_DISABLE                        0x00
+#define MTL_RSF_ENABLE                 0x01
+#define MTL_TSF_DISABLE                        0x00
+#define MTL_TSF_ENABLE                 0x01
+
+#define MTL_RX_THRESHOLD_64            0x00
+#define MTL_RX_THRESHOLD_96            0x02
+#define MTL_RX_THRESHOLD_128           0x03
+#define MTL_TX_THRESHOLD_64            0x00
+#define MTL_TX_THRESHOLD_96            0x02
+#define MTL_TX_THRESHOLD_128           0x03
+#define MTL_TX_THRESHOLD_192           0x04
+#define MTL_TX_THRESHOLD_256           0x05
+#define MTL_TX_THRESHOLD_384           0x06
+#define MTL_TX_THRESHOLD_512           0x07
+
+#define MTL_ETSALG_WRR                 0x00
+#define MTL_ETSALG_WFQ                 0x01
+#define MTL_ETSALG_DWRR                        0x02
+#define MTL_RAA_SP                     0x00
+#define MTL_RAA_WSP                    0x01
+
+#define MTL_Q_DISABLED                 0x00
+#define MTL_Q_ENABLED                  0x02
+
+#define MTL_RQDCM0R_Q0MDMACH           0x0
+#define MTL_RQDCM0R_Q1MDMACH           0x00000100
+#define MTL_RQDCM0R_Q2MDMACH           0x00020000
+#define MTL_RQDCM0R_Q3MDMACH           0x03000000
+#define MTL_RQDCM1R_Q4MDMACH           0x00000004
+#define MTL_RQDCM1R_Q5MDMACH           0x00000500
+#define MTL_RQDCM1R_Q6MDMACH           0x00060000
+#define MTL_RQDCM1R_Q7MDMACH           0x07000000
+#define MTL_RQDCM2R_Q8MDMACH           0x00000008
+#define MTL_RQDCM2R_Q9MDMACH           0x00000900
+#define MTL_RQDCM2R_Q10MDMACH          0x000A0000
+#define MTL_RQDCM2R_Q11MDMACH          0x0B000000
+
+/* MTL traffic class register offsets
+ *   Multiple traffic classes can be active.  The first class has registers
+ *   that begin at 0x1100.  Each subsequent queue has registers that
+ *   are accessed using an offset of 0x80 from the previous queue.
+ */
+#define MTL_TC_BASE                    MTL_Q_BASE
+#define MTL_TC_INC                     MTL_Q_INC
+
+#define MTL_TC_ETSCR                   0x10
+#define MTL_TC_ETSSR                   0x14
+#define MTL_TC_QWR                     0x18
+
+/* MTL traffic class register entry bit positions and sizes */
+#define MTL_TC_ETSCR_TSA_POS           0
+#define MTL_TC_ETSCR_TSA_LEN           2
+#define MTL_TC_QWR_QW_POS              0
+#define MTL_TC_QWR_QW_LEN              21
+
+/* MTL traffic class register value */
+#define MTL_TSA_SP                     0x00
+#define MTL_TSA_ETS                    0x02
+
+/* DMA register offsets */
+#define DMA_MR                         0x3000
+#define DMA_SBMR                       0x3004
+#define DMA_ISR                                0x3008
+#define DMA_DSR0                       0x3020
+#define DMA_DSR1                       0x3024
+
+/* DMA register entry bit positions and sizes */
+#define DMA_ISR_MACIS_POS              17
+#define DMA_ISR_MACIS_LEN              1
+#define DMA_ISR_MTLIS_POS              16
+#define DMA_ISR_MTLIS_LEN              1
+#define DMA_MR_SWR_POS                 0
+#define DMA_MR_SWR_LEN                 1
+#define DMA_SBMR_EAME_POS              11
+#define DMA_SBMR_EAME_LEN              1
+#define DMA_SBMR_BLEN_64_POS           5
+#define DMA_SBMR_BLEN_64_LEN           1
+#define DMA_SBMR_BLEN_128_POS          6
+#define DMA_SBMR_BLEN_128_LEN          1
+#define DMA_SBMR_BLEN_256_POS          7
+#define DMA_SBMR_BLEN_256_LEN          1
+#define DMA_SBMR_UNDEF_POS             0
+#define DMA_SBMR_UNDEF_LEN             1
+
+/* DMA register values */
+#define DMA_DSR_RPS_LEN                        4
+#define DMA_DSR_TPS_LEN                        4
+#define DMA_DSR_Q_LEN                  (DMA_DSR_RPS_LEN + DMA_DSR_TPS_LEN)
+#define DMA_DSR0_TPS_START             12
+#define DMA_DSRX_FIRST_QUEUE           3
+#define DMA_DSRX_INC                   4
+#define DMA_DSRX_QPR                   4
+#define DMA_DSRX_TPS_START             4
+#define DMA_TPS_STOPPED                        0x00
+#define DMA_TPS_SUSPENDED              0x06
+
+/* DMA channel register offsets
+ *   Multiple channels can be active.  The first channel has registers
+ *   that begin at 0x3100.  Each subsequent channel has registers that
+ *   are accessed using an offset of 0x80 from the previous channel.
+ */
+#define DMA_CH_BASE                    0x3100
+#define DMA_CH_INC                     0x80
+
+#define DMA_CH_CR                      0x00
+#define DMA_CH_TCR                     0x04
+#define DMA_CH_RCR                     0x08
+#define DMA_CH_TDLR_HI                 0x10
+#define DMA_CH_TDLR_LO                 0x14
+#define DMA_CH_RDLR_HI                 0x18
+#define DMA_CH_RDLR_LO                 0x1c
+#define DMA_CH_TDTR_LO                 0x24
+#define DMA_CH_RDTR_LO                 0x2c
+#define DMA_CH_TDRLR                   0x30
+#define DMA_CH_RDRLR                   0x34
+#define DMA_CH_IER                     0x38
+#define DMA_CH_RIWT                    0x3c
+#define DMA_CH_SR                      0x60
+
+/* DMA channel register entry bit positions and sizes */
+#define DMA_CH_CR_PBLX8_POS            16
+#define DMA_CH_CR_PBLX8_LEN            1
+#define DMA_CH_CR_SPH_POS              24
+#define DMA_CH_CR_SPH_LEN              1
+#define DMA_CH_IER_AIE_POS             15
+#define DMA_CH_IER_AIE_LEN             1
+#define DMA_CH_IER_FBEE_POS            12
+#define DMA_CH_IER_FBEE_LEN            1
+#define DMA_CH_IER_NIE_POS             16
+#define DMA_CH_IER_NIE_LEN             1
+#define DMA_CH_IER_RBUE_POS            7
+#define DMA_CH_IER_RBUE_LEN            1
+#define DMA_CH_IER_RIE_POS             6
+#define DMA_CH_IER_RIE_LEN             1
+#define DMA_CH_IER_RSE_POS             8
+#define DMA_CH_IER_RSE_LEN             1
+#define DMA_CH_IER_TBUE_POS            2
+#define DMA_CH_IER_TBUE_LEN            1
+#define DMA_CH_IER_TIE_POS             0
+#define DMA_CH_IER_TIE_LEN             1
+#define DMA_CH_IER_TXSE_POS            1
+#define DMA_CH_IER_TXSE_LEN            1
+#define DMA_CH_RCR_PBL_POS             16
+#define DMA_CH_RCR_PBL_LEN             6
+#define DMA_CH_RCR_RBSZ_POS            1
+#define DMA_CH_RCR_RBSZ_LEN            14
+#define DMA_CH_RCR_SR_POS              0
+#define DMA_CH_RCR_SR_LEN              1
+#define DMA_CH_RIWT_RWT_POS            0
+#define DMA_CH_RIWT_RWT_LEN            8
+#define DMA_CH_SR_FBE_POS              12
+#define DMA_CH_SR_FBE_LEN              1
+#define DMA_CH_SR_RBU_POS              7
+#define DMA_CH_SR_RBU_LEN              1
+#define DMA_CH_SR_RI_POS               6
+#define DMA_CH_SR_RI_LEN               1
+#define DMA_CH_SR_RPS_POS              8
+#define DMA_CH_SR_RPS_LEN              1
+#define DMA_CH_SR_TBU_POS              2
+#define DMA_CH_SR_TBU_LEN              1
+#define DMA_CH_SR_TI_POS               0
+#define DMA_CH_SR_TI_LEN               1
+#define DMA_CH_SR_TPS_POS              1
+#define DMA_CH_SR_TPS_LEN              1
+#define DMA_CH_TCR_OSP_POS             4
+#define DMA_CH_TCR_OSP_LEN             1
+#define DMA_CH_TCR_PBL_POS             16
+#define DMA_CH_TCR_PBL_LEN             6
+#define DMA_CH_TCR_ST_POS              0
+#define DMA_CH_TCR_ST_LEN              1
+#define DMA_CH_TCR_TSE_POS             12
+#define DMA_CH_TCR_TSE_LEN             1
+
+/* DMA channel register values */
+#define DMA_OSP_DISABLE                        0x00
+#define DMA_OSP_ENABLE                 0x01
+#define DMA_PBL_1                      1
+#define DMA_PBL_2                      2
+#define DMA_PBL_4                      4
+#define DMA_PBL_8                      8
+#define DMA_PBL_16                     16
+#define DMA_PBL_32                     32
+#define DMA_PBL_64                     64
+#define DMA_PBL_128                    128
+#define DMA_PBL_256                    256
+#define DMA_PBL_X8_DISABLE             0x00
+#define DMA_PBL_X8_ENABLE              0x01
+
+/* Descriptor/Packet entry bit positions and sizes */
+#define RX_PACKET_ERRORS_CRC_POS               2
+#define RX_PACKET_ERRORS_CRC_LEN               1
+#define RX_PACKET_ERRORS_FRAME_POS             3
+#define RX_PACKET_ERRORS_FRAME_LEN             1
+#define RX_PACKET_ERRORS_LENGTH_POS            0
+#define RX_PACKET_ERRORS_LENGTH_LEN            1
+#define RX_PACKET_ERRORS_OVERRUN_POS           1
+#define RX_PACKET_ERRORS_OVERRUN_LEN           1
+
+#define RX_PACKET_ATTRIBUTES_CSUM_DONE_POS     0
+#define RX_PACKET_ATTRIBUTES_CSUM_DONE_LEN     1
+#define RX_PACKET_ATTRIBUTES_VLAN_CTAG_POS     1
+#define RX_PACKET_ATTRIBUTES_VLAN_CTAG_LEN     1
+#define RX_PACKET_ATTRIBUTES_INCOMPLETE_POS    2
+#define RX_PACKET_ATTRIBUTES_INCOMPLETE_LEN    1
+#define RX_PACKET_ATTRIBUTES_CONTEXT_NEXT_POS  3
+#define RX_PACKET_ATTRIBUTES_CONTEXT_NEXT_LEN  1
+#define RX_PACKET_ATTRIBUTES_CONTEXT_POS       4
+#define RX_PACKET_ATTRIBUTES_CONTEXT_LEN       1
+#define RX_PACKET_ATTRIBUTES_RX_TSTAMP_POS     5
+#define RX_PACKET_ATTRIBUTES_RX_TSTAMP_LEN     1
+#define RX_PACKET_ATTRIBUTES_RSS_HASH_POS      6
+#define RX_PACKET_ATTRIBUTES_RSS_HASH_LEN      1
+
+#define RX_NORMAL_DESC0_OVT_POS                        0
+#define RX_NORMAL_DESC0_OVT_LEN                        16
+#define RX_NORMAL_DESC2_HL_POS                 0
+#define RX_NORMAL_DESC2_HL_LEN                 10
+#define RX_NORMAL_DESC3_CDA_POS                        27
+#define RX_NORMAL_DESC3_CDA_LEN                        1
+#define RX_NORMAL_DESC3_CTXT_POS               30
+#define RX_NORMAL_DESC3_CTXT_LEN               1
+#define RX_NORMAL_DESC3_ES_POS                 15
+#define RX_NORMAL_DESC3_ES_LEN                 1
+#define RX_NORMAL_DESC3_ETLT_POS               16
+#define RX_NORMAL_DESC3_ETLT_LEN               4
+#define RX_NORMAL_DESC3_FD_POS                 29
+#define RX_NORMAL_DESC3_FD_LEN                 1
+#define RX_NORMAL_DESC3_INTE_POS               30
+#define RX_NORMAL_DESC3_INTE_LEN               1
+#define RX_NORMAL_DESC3_L34T_POS               20
+#define RX_NORMAL_DESC3_L34T_LEN               4
+#define RX_NORMAL_DESC3_LD_POS                 28
+#define RX_NORMAL_DESC3_LD_LEN                 1
+#define RX_NORMAL_DESC3_OWN_POS                        31
+#define RX_NORMAL_DESC3_OWN_LEN                        1
+#define RX_NORMAL_DESC3_PL_POS                 0
+#define RX_NORMAL_DESC3_PL_LEN                 14
+#define RX_NORMAL_DESC3_RSV_POS                        26
+#define RX_NORMAL_DESC3_RSV_LEN                        1
+
+#define RX_DESC3_L34T_IPV4_TCP                 1
+#define RX_DESC3_L34T_IPV4_UDP                 2
+#define RX_DESC3_L34T_IPV4_ICMP                        3
+#define RX_DESC3_L34T_IPV6_TCP                 9
+#define RX_DESC3_L34T_IPV6_UDP                 10
+#define RX_DESC3_L34T_IPV6_ICMP                        11
+
+#define RX_CONTEXT_DESC3_TSA_POS               4
+#define RX_CONTEXT_DESC3_TSA_LEN               1
+#define RX_CONTEXT_DESC3_TSD_POS               6
+#define RX_CONTEXT_DESC3_TSD_LEN               1
+
+#define TX_PACKET_ATTRIBUTES_CSUM_ENABLE_POS   0
+#define TX_PACKET_ATTRIBUTES_CSUM_ENABLE_LEN   1
+#define TX_PACKET_ATTRIBUTES_TSO_ENABLE_POS    1
+#define TX_PACKET_ATTRIBUTES_TSO_ENABLE_LEN    1
+#define TX_PACKET_ATTRIBUTES_VLAN_CTAG_POS     2
+#define TX_PACKET_ATTRIBUTES_VLAN_CTAG_LEN     1
+#define TX_PACKET_ATTRIBUTES_PTP_POS           3
+#define TX_PACKET_ATTRIBUTES_PTP_LEN           1
+
+#define TX_CONTEXT_DESC2_MSS_POS               0
+#define TX_CONTEXT_DESC2_MSS_LEN               15
+#define TX_CONTEXT_DESC3_CTXT_POS              30
+#define TX_CONTEXT_DESC3_CTXT_LEN              1
+#define TX_CONTEXT_DESC3_TCMSSV_POS            26
+#define TX_CONTEXT_DESC3_TCMSSV_LEN            1
+#define TX_CONTEXT_DESC3_VLTV_POS              16
+#define TX_CONTEXT_DESC3_VLTV_LEN              1
+#define TX_CONTEXT_DESC3_VT_POS                        0
+#define TX_CONTEXT_DESC3_VT_LEN                        16
+
+#define TX_NORMAL_DESC2_HL_B1L_POS             0
+#define TX_NORMAL_DESC2_HL_B1L_LEN             14
+#define TX_NORMAL_DESC2_IC_POS                 31
+#define TX_NORMAL_DESC2_IC_LEN                 1
+#define TX_NORMAL_DESC2_TTSE_POS               30
+#define TX_NORMAL_DESC2_TTSE_LEN               1
+#define TX_NORMAL_DESC2_VTIR_POS               14
+#define TX_NORMAL_DESC2_VTIR_LEN               2
+#define TX_NORMAL_DESC3_CIC_POS                        16
+#define TX_NORMAL_DESC3_CIC_LEN                        2
+#define TX_NORMAL_DESC3_CPC_POS                        26
+#define TX_NORMAL_DESC3_CPC_LEN                        2
+#define TX_NORMAL_DESC3_CTXT_POS               30
+#define TX_NORMAL_DESC3_CTXT_LEN               1
+#define TX_NORMAL_DESC3_FD_POS                 29
+#define TX_NORMAL_DESC3_FD_LEN                 1
+#define TX_NORMAL_DESC3_FL_POS                 0
+#define TX_NORMAL_DESC3_FL_LEN                 15
+#define TX_NORMAL_DESC3_LD_POS                 28
+#define TX_NORMAL_DESC3_LD_LEN                 1
+#define TX_NORMAL_DESC3_OWN_POS                        31
+#define TX_NORMAL_DESC3_OWN_LEN                        1
+#define TX_NORMAL_DESC3_TCPHDRLEN_POS          19
+#define TX_NORMAL_DESC3_TCPHDRLEN_LEN          4
+#define TX_NORMAL_DESC3_TCPPL_POS              0
+#define TX_NORMAL_DESC3_TCPPL_LEN              18
+#define TX_NORMAL_DESC3_TSE_POS                        18
+#define TX_NORMAL_DESC3_TSE_LEN                        1
+
+#define TX_NORMAL_DESC2_VLAN_INSERT            0x2
+
+#define XLGMAC_MTL_REG(pdata, n, reg)                                  \
+       ((pdata)->mac_regs + MTL_Q_BASE + ((n) * MTL_Q_INC) + (reg))
+
+#define XLGMAC_DMA_REG(channel, reg)   ((channel)->dma_regs + (reg))
+
+#endif /* __DWC_XLGMAC_REG_H__ */
diff --git a/drivers/net/ethernet/synopsys/dwc-xlgmac.h b/drivers/net/ethernet/synopsys/dwc-xlgmac.h
new file mode 100644 (file)
index 0000000..676b2fb
--- /dev/null
@@ -0,0 +1,649 @@
+/* Synopsys DesignWare Core Enterprise Ethernet (XLGMAC) Driver
+ *
+ * Copyright (c) 2017 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is dual-licensed; you may select either version 2 of
+ * the GNU General Public License ("GPL") or BSD license ("BSD").
+ *
+ * This Synopsys DWC XLGMAC software driver and associated documentation
+ * (hereinafter the "Software") is an unsupported proprietary work of
+ * Synopsys, Inc. unless otherwise expressly agreed to in writing between
+ * Synopsys and you. The Software IS NOT an item of Licensed Software or a
+ * Licensed Product under any End User Software License Agreement or
+ * Agreement for Licensed Products with Synopsys or any supplement thereto.
+ * Synopsys is a registered trademark of Synopsys, Inc. Other names included
+ * in the SOFTWARE may be the trademarks of their respective owners.
+ */
+
+#ifndef __DWC_XLGMAC_H__
+#define __DWC_XLGMAC_H__
+
+#include <linux/dma-mapping.h>
+#include <linux/netdevice.h>
+#include <linux/workqueue.h>
+#include <linux/phy.h>
+#include <linux/if_vlan.h>
+#include <linux/bitops.h>
+#include <linux/timecounter.h>
+
+#define XLGMAC_DRV_NAME                        "dwc-xlgmac"
+#define XLGMAC_DRV_VERSION             "1.0.0"
+#define XLGMAC_DRV_DESC                        "Synopsys DWC XLGMAC Driver"
+
+/* Descriptor related parameters */
+#define XLGMAC_TX_DESC_CNT             1024
+#define XLGMAC_TX_DESC_MIN_FREE                (XLGMAC_TX_DESC_CNT >> 3)
+#define XLGMAC_TX_DESC_MAX_PROC                (XLGMAC_TX_DESC_CNT >> 1)
+#define XLGMAC_RX_DESC_CNT             1024
+#define XLGMAC_RX_DESC_MAX_DIRTY       (XLGMAC_RX_DESC_CNT >> 3)
+
+/* Descriptors required for maximum contiguous TSO/GSO packet */
+#define XLGMAC_TX_MAX_SPLIT    ((GSO_MAX_SIZE / XLGMAC_TX_MAX_BUF_SIZE) + 1)
+
+/* Maximum possible descriptors needed for a SKB */
+#define XLGMAC_TX_MAX_DESC_NR  (MAX_SKB_FRAGS + XLGMAC_TX_MAX_SPLIT + 2)
+
+#define XLGMAC_TX_MAX_BUF_SIZE (0x3fff & ~(64 - 1))
+#define XLGMAC_RX_MIN_BUF_SIZE (ETH_FRAME_LEN + ETH_FCS_LEN + VLAN_HLEN)
+#define XLGMAC_RX_BUF_ALIGN    64
+
+/* Maximum Size for Splitting the Header Data
+ * Keep in sync with SKB_ALLOC_SIZE
+ * 3'b000: 64 bytes, 3'b001: 128 bytes
+ * 3'b010: 256 bytes, 3'b011: 512 bytes
+ * 3'b100: 1023 bytes ,   3'b101'3'b111: Reserved
+ */
+#define XLGMAC_SPH_HDSMS_SIZE          3
+#define XLGMAC_SKB_ALLOC_SIZE          512
+
+#define XLGMAC_MAX_FIFO                        81920
+
+#define XLGMAC_MAX_DMA_CHANNELS                16
+#define XLGMAC_DMA_STOP_TIMEOUT                5
+#define XLGMAC_DMA_INTERRUPT_MASK      0x31c7
+
+/* Default coalescing parameters */
+#define XLGMAC_INIT_DMA_TX_USECS       1000
+#define XLGMAC_INIT_DMA_TX_FRAMES      25
+#define XLGMAC_INIT_DMA_RX_USECS       30
+#define XLGMAC_INIT_DMA_RX_FRAMES      25
+
+/* Flow control queue count */
+#define XLGMAC_MAX_FLOW_CONTROL_QUEUES 8
+
+/* System clock is 125 MHz */
+#define XLGMAC_SYSCLOCK                        125000000
+
+/* Maximum MAC address hash table size (256 bits = 8 bytes) */
+#define XLGMAC_MAC_HASH_TABLE_SIZE     8
+
+/* Receive Side Scaling */
+#define XLGMAC_RSS_HASH_KEY_SIZE       40
+#define XLGMAC_RSS_MAX_TABLE_SIZE      256
+#define XLGMAC_RSS_LOOKUP_TABLE_TYPE   0
+#define XLGMAC_RSS_HASH_KEY_TYPE       1
+
+#define XLGMAC_STD_PACKET_MTU          1500
+#define XLGMAC_JUMBO_PACKET_MTU                9000
+
+/* Helper macro for descriptor handling
+ *  Always use XLGMAC_GET_DESC_DATA to access the descriptor data
+ */
+#define XLGMAC_GET_DESC_DATA(ring, idx) ({                             \
+       typeof(ring) _ring = (ring);                                    \
+       ((_ring)->desc_data_head +                                      \
+        ((idx) & ((_ring)->dma_desc_count - 1)));                      \
+})
+
+#define XLGMAC_GET_REG_BITS(var, pos, len) ({                          \
+       typeof(pos) _pos = (pos);                                       \
+       typeof(len) _len = (len);                                       \
+       ((var) & GENMASK(_pos + _len - 1, _pos)) >> (_pos);             \
+})
+
+#define XLGMAC_GET_REG_BITS_LE(var, pos, len) ({                       \
+       typeof(pos) _pos = (pos);                                       \
+       typeof(len) _len = (len);                                       \
+       typeof(var) _var = le32_to_cpu((var));                          \
+       ((_var) & GENMASK(_pos + _len - 1, _pos)) >> (_pos);            \
+})
+
+#define XLGMAC_SET_REG_BITS(var, pos, len, val) ({                     \
+       typeof(var) _var = (var);                                       \
+       typeof(pos) _pos = (pos);                                       \
+       typeof(len) _len = (len);                                       \
+       typeof(val) _val = (val);                                       \
+       _val = (_val << _pos) & GENMASK(_pos + _len - 1, _pos);         \
+       _var = (_var & ~GENMASK(_pos + _len - 1, _pos)) | _val;         \
+})
+
+#define XLGMAC_SET_REG_BITS_LE(var, pos, len, val) ({                  \
+       typeof(var) _var = (var);                                       \
+       typeof(pos) _pos = (pos);                                       \
+       typeof(len) _len = (len);                                       \
+       typeof(val) _val = (val);                                       \
+       _val = (_val << _pos) & GENMASK(_pos + _len - 1, _pos);         \
+       _var = (_var & ~GENMASK(_pos + _len - 1, _pos)) | _val;         \
+       cpu_to_le32(_var);                                              \
+})
+
+struct xlgmac_pdata;
+
+enum xlgmac_int {
+       XLGMAC_INT_DMA_CH_SR_TI,
+       XLGMAC_INT_DMA_CH_SR_TPS,
+       XLGMAC_INT_DMA_CH_SR_TBU,
+       XLGMAC_INT_DMA_CH_SR_RI,
+       XLGMAC_INT_DMA_CH_SR_RBU,
+       XLGMAC_INT_DMA_CH_SR_RPS,
+       XLGMAC_INT_DMA_CH_SR_TI_RI,
+       XLGMAC_INT_DMA_CH_SR_FBE,
+       XLGMAC_INT_DMA_ALL,
+};
+
+struct xlgmac_stats {
+       /* MMC TX counters */
+       u64 txoctetcount_gb;
+       u64 txframecount_gb;
+       u64 txbroadcastframes_g;
+       u64 txmulticastframes_g;
+       u64 tx64octets_gb;
+       u64 tx65to127octets_gb;
+       u64 tx128to255octets_gb;
+       u64 tx256to511octets_gb;
+       u64 tx512to1023octets_gb;
+       u64 tx1024tomaxoctets_gb;
+       u64 txunicastframes_gb;
+       u64 txmulticastframes_gb;
+       u64 txbroadcastframes_gb;
+       u64 txunderflowerror;
+       u64 txoctetcount_g;
+       u64 txframecount_g;
+       u64 txpauseframes;
+       u64 txvlanframes_g;
+
+       /* MMC RX counters */
+       u64 rxframecount_gb;
+       u64 rxoctetcount_gb;
+       u64 rxoctetcount_g;
+       u64 rxbroadcastframes_g;
+       u64 rxmulticastframes_g;
+       u64 rxcrcerror;
+       u64 rxrunterror;
+       u64 rxjabbererror;
+       u64 rxundersize_g;
+       u64 rxoversize_g;
+       u64 rx64octets_gb;
+       u64 rx65to127octets_gb;
+       u64 rx128to255octets_gb;
+       u64 rx256to511octets_gb;
+       u64 rx512to1023octets_gb;
+       u64 rx1024tomaxoctets_gb;
+       u64 rxunicastframes_g;
+       u64 rxlengtherror;
+       u64 rxoutofrangetype;
+       u64 rxpauseframes;
+       u64 rxfifooverflow;
+       u64 rxvlanframes_gb;
+       u64 rxwatchdogerror;
+
+       /* Extra counters */
+       u64 tx_tso_packets;
+       u64 rx_split_header_packets;
+       u64 rx_buffer_unavailable;
+};
+
+struct xlgmac_ring_buf {
+       struct sk_buff *skb;
+       dma_addr_t skb_dma;
+       unsigned int skb_len;
+};
+
+/* Common Tx and Rx DMA hardware descriptor */
+struct xlgmac_dma_desc {
+       __le32 desc0;
+       __le32 desc1;
+       __le32 desc2;
+       __le32 desc3;
+};
+
+/* Page allocation related values */
+struct xlgmac_page_alloc {
+       struct page *pages;
+       unsigned int pages_len;
+       unsigned int pages_offset;
+
+       dma_addr_t pages_dma;
+};
+
+/* Ring entry buffer data */
+struct xlgmac_buffer_data {
+       struct xlgmac_page_alloc pa;
+       struct xlgmac_page_alloc pa_unmap;
+
+       dma_addr_t dma_base;
+       unsigned long dma_off;
+       unsigned int dma_len;
+};
+
+/* Tx-related desc data */
+struct xlgmac_tx_desc_data {
+       unsigned int packets;           /* BQL packet count */
+       unsigned int bytes;             /* BQL byte count */
+};
+
+/* Rx-related desc data */
+struct xlgmac_rx_desc_data {
+       struct xlgmac_buffer_data hdr;  /* Header locations */
+       struct xlgmac_buffer_data buf;  /* Payload locations */
+
+       unsigned short hdr_len;         /* Length of received header */
+       unsigned short len;             /* Length of received packet */
+};
+
+struct xlgmac_pkt_info {
+       struct sk_buff *skb;
+
+       unsigned int attributes;
+
+       unsigned int errors;
+
+       /* descriptors needed for this packet */
+       unsigned int desc_count;
+       unsigned int length;
+
+       unsigned int tx_packets;
+       unsigned int tx_bytes;
+
+       unsigned int header_len;
+       unsigned int tcp_header_len;
+       unsigned int tcp_payload_len;
+       unsigned short mss;
+
+       unsigned short vlan_ctag;
+
+       u64 rx_tstamp;
+
+       u32 rss_hash;
+       enum pkt_hash_types rss_hash_type;
+};
+
+struct xlgmac_desc_data {
+       /* dma_desc: Virtual address of descriptor
+        *  dma_desc_addr: DMA address of descriptor
+        */
+       struct xlgmac_dma_desc *dma_desc;
+       dma_addr_t dma_desc_addr;
+
+       /* skb: Virtual address of SKB
+        *  skb_dma: DMA address of SKB data
+        *  skb_dma_len: Length of SKB DMA area
+        */
+       struct sk_buff *skb;
+       dma_addr_t skb_dma;
+       unsigned int skb_dma_len;
+
+       /* Tx/Rx -related data */
+       struct xlgmac_tx_desc_data tx;
+       struct xlgmac_rx_desc_data rx;
+
+       unsigned int mapped_as_page;
+
+       /* Incomplete receive save location.  If the budget is exhausted
+        * or the last descriptor (last normal descriptor or a following
+        * context descriptor) has not been DMA'd yet the current state
+        * of the receive processing needs to be saved.
+        */
+       unsigned int state_saved;
+       struct {
+               struct sk_buff *skb;
+               unsigned int len;
+               unsigned int error;
+       } state;
+};
+
+struct xlgmac_ring {
+       /* Per packet related information */
+       struct xlgmac_pkt_info pkt_info;
+
+       /* Virtual/DMA addresses of DMA descriptor list and the total count */
+       struct xlgmac_dma_desc *dma_desc_head;
+       dma_addr_t dma_desc_head_addr;
+       unsigned int dma_desc_count;
+
+       /* Array of descriptor data corresponding the DMA descriptor
+        * (always use the XLGMAC_GET_DESC_DATA macro to access this data)
+        */
+       struct xlgmac_desc_data *desc_data_head;
+
+       /* Page allocation for RX buffers */
+       struct xlgmac_page_alloc rx_hdr_pa;
+       struct xlgmac_page_alloc rx_buf_pa;
+
+       /* Ring index values
+        *  cur   - Tx: index of descriptor to be used for current transfer
+        *          Rx: index of descriptor to check for packet availability
+        *  dirty - Tx: index of descriptor to check for transfer complete
+        *          Rx: index of descriptor to check for buffer reallocation
+        */
+       unsigned int cur;
+       unsigned int dirty;
+
+       /* Coalesce frame count used for interrupt bit setting */
+       unsigned int coalesce_count;
+
+       union {
+               struct {
+                       unsigned int xmit_more;
+                       unsigned int queue_stopped;
+                       unsigned short cur_mss;
+                       unsigned short cur_vlan_ctag;
+               } tx;
+       };
+} ____cacheline_aligned;
+
+struct xlgmac_channel {
+       char name[16];
+
+       /* Address of private data area for device */
+       struct xlgmac_pdata *pdata;
+
+       /* Queue index and base address of queue's DMA registers */
+       unsigned int queue_index;
+       void __iomem *dma_regs;
+
+       /* Per channel interrupt irq number */
+       int dma_irq;
+       char dma_irq_name[IFNAMSIZ + 32];
+
+       /* Netdev related settings */
+       struct napi_struct napi;
+
+       unsigned int saved_ier;
+
+       unsigned int tx_timer_active;
+       struct timer_list tx_timer;
+
+       struct xlgmac_ring *tx_ring;
+       struct xlgmac_ring *rx_ring;
+} ____cacheline_aligned;
+
+struct xlgmac_desc_ops {
+       int (*alloc_channles_and_rings)(struct xlgmac_pdata *pdata);
+       void (*free_channels_and_rings)(struct xlgmac_pdata *pdata);
+       int (*map_tx_skb)(struct xlgmac_channel *channel,
+                         struct sk_buff *skb);
+       int (*map_rx_buffer)(struct xlgmac_pdata *pdata,
+                            struct xlgmac_ring *ring,
+                       struct xlgmac_desc_data *desc_data);
+       void (*unmap_desc_data)(struct xlgmac_pdata *pdata,
+                               struct xlgmac_desc_data *desc_data);
+       void (*tx_desc_init)(struct xlgmac_pdata *pdata);
+       void (*rx_desc_init)(struct xlgmac_pdata *pdata);
+};
+
+struct xlgmac_hw_ops {
+       int (*init)(struct xlgmac_pdata *pdata);
+       int (*exit)(struct xlgmac_pdata *pdata);
+
+       int (*tx_complete)(struct xlgmac_dma_desc *dma_desc);
+
+       void (*enable_tx)(struct xlgmac_pdata *pdata);
+       void (*disable_tx)(struct xlgmac_pdata *pdata);
+       void (*enable_rx)(struct xlgmac_pdata *pdata);
+       void (*disable_rx)(struct xlgmac_pdata *pdata);
+
+       int (*enable_int)(struct xlgmac_channel *channel,
+                         enum xlgmac_int int_id);
+       int (*disable_int)(struct xlgmac_channel *channel,
+                          enum xlgmac_int int_id);
+       void (*dev_xmit)(struct xlgmac_channel *channel);
+       int (*dev_read)(struct xlgmac_channel *channel);
+
+       int (*set_mac_address)(struct xlgmac_pdata *pdata, u8 *addr);
+       int (*config_rx_mode)(struct xlgmac_pdata *pdata);
+       int (*enable_rx_csum)(struct xlgmac_pdata *pdata);
+       int (*disable_rx_csum)(struct xlgmac_pdata *pdata);
+
+       /* For MII speed configuration */
+       int (*set_xlgmii_25000_speed)(struct xlgmac_pdata *pdata);
+       int (*set_xlgmii_40000_speed)(struct xlgmac_pdata *pdata);
+       int (*set_xlgmii_50000_speed)(struct xlgmac_pdata *pdata);
+       int (*set_xlgmii_100000_speed)(struct xlgmac_pdata *pdata);
+
+       /* For descriptor related operation */
+       void (*tx_desc_init)(struct xlgmac_channel *channel);
+       void (*rx_desc_init)(struct xlgmac_channel *channel);
+       void (*tx_desc_reset)(struct xlgmac_desc_data *desc_data);
+       void (*rx_desc_reset)(struct xlgmac_pdata *pdata,
+                             struct xlgmac_desc_data *desc_data,
+                       unsigned int index);
+       int (*is_last_desc)(struct xlgmac_dma_desc *dma_desc);
+       int (*is_context_desc)(struct xlgmac_dma_desc *dma_desc);
+       void (*tx_start_xmit)(struct xlgmac_channel *channel,
+                             struct xlgmac_ring *ring);
+
+       /* For Flow Control */
+       int (*config_tx_flow_control)(struct xlgmac_pdata *pdata);
+       int (*config_rx_flow_control)(struct xlgmac_pdata *pdata);
+
+       /* For Vlan related config */
+       int (*enable_rx_vlan_stripping)(struct xlgmac_pdata *pdata);
+       int (*disable_rx_vlan_stripping)(struct xlgmac_pdata *pdata);
+       int (*enable_rx_vlan_filtering)(struct xlgmac_pdata *pdata);
+       int (*disable_rx_vlan_filtering)(struct xlgmac_pdata *pdata);
+       int (*update_vlan_hash_table)(struct xlgmac_pdata *pdata);
+
+       /* For RX coalescing */
+       int (*config_rx_coalesce)(struct xlgmac_pdata *pdata);
+       int (*config_tx_coalesce)(struct xlgmac_pdata *pdata);
+       unsigned int (*usec_to_riwt)(struct xlgmac_pdata *pdata,
+                                    unsigned int usec);
+       unsigned int (*riwt_to_usec)(struct xlgmac_pdata *pdata,
+                                    unsigned int riwt);
+
+       /* For RX and TX threshold config */
+       int (*config_rx_threshold)(struct xlgmac_pdata *pdata,
+                                  unsigned int val);
+       int (*config_tx_threshold)(struct xlgmac_pdata *pdata,
+                                  unsigned int val);
+
+       /* For RX and TX Store and Forward Mode config */
+       int (*config_rsf_mode)(struct xlgmac_pdata *pdata,
+                              unsigned int val);
+       int (*config_tsf_mode)(struct xlgmac_pdata *pdata,
+                              unsigned int val);
+
+       /* For TX DMA Operate on Second Frame config */
+       int (*config_osp_mode)(struct xlgmac_pdata *pdata);
+
+       /* For RX and TX PBL config */
+       int (*config_rx_pbl_val)(struct xlgmac_pdata *pdata);
+       int (*get_rx_pbl_val)(struct xlgmac_pdata *pdata);
+       int (*config_tx_pbl_val)(struct xlgmac_pdata *pdata);
+       int (*get_tx_pbl_val)(struct xlgmac_pdata *pdata);
+       int (*config_pblx8)(struct xlgmac_pdata *pdata);
+
+       /* For MMC statistics */
+       void (*rx_mmc_int)(struct xlgmac_pdata *pdata);
+       void (*tx_mmc_int)(struct xlgmac_pdata *pdata);
+       void (*read_mmc_stats)(struct xlgmac_pdata *pdata);
+
+       /* For Receive Side Scaling */
+       int (*enable_rss)(struct xlgmac_pdata *pdata);
+       int (*disable_rss)(struct xlgmac_pdata *pdata);
+       int (*set_rss_hash_key)(struct xlgmac_pdata *pdata,
+                               const u8 *key);
+       int (*set_rss_lookup_table)(struct xlgmac_pdata *pdata,
+                                   const u32 *table);
+};
+
+/* This structure contains flags that indicate what hardware features
+ * or configurations are present in the device.
+ */
+struct xlgmac_hw_features {
+       /* HW Version */
+       unsigned int version;
+
+       /* HW Feature Register0 */
+       unsigned int phyifsel;          /* PHY interface support */
+       unsigned int vlhash;            /* VLAN Hash Filter */
+       unsigned int sma;               /* SMA(MDIO) Interface */
+       unsigned int rwk;               /* PMT remote wake-up packet */
+       unsigned int mgk;               /* PMT magic packet */
+       unsigned int mmc;               /* RMON module */
+       unsigned int aoe;               /* ARP Offload */
+       unsigned int ts;                /* IEEE 1588-2008 Advanced Timestamp */
+       unsigned int eee;               /* Energy Efficient Ethernet */
+       unsigned int tx_coe;            /* Tx Checksum Offload */
+       unsigned int rx_coe;            /* Rx Checksum Offload */
+       unsigned int addn_mac;          /* Additional MAC Addresses */
+       unsigned int ts_src;            /* Timestamp Source */
+       unsigned int sa_vlan_ins;       /* Source Address or VLAN Insertion */
+
+       /* HW Feature Register1 */
+       unsigned int rx_fifo_size;      /* MTL Receive FIFO Size */
+       unsigned int tx_fifo_size;      /* MTL Transmit FIFO Size */
+       unsigned int adv_ts_hi;         /* Advance Timestamping High Word */
+       unsigned int dma_width;         /* DMA width */
+       unsigned int dcb;               /* DCB Feature */
+       unsigned int sph;               /* Split Header Feature */
+       unsigned int tso;               /* TCP Segmentation Offload */
+       unsigned int dma_debug;         /* DMA Debug Registers */
+       unsigned int rss;               /* Receive Side Scaling */
+       unsigned int tc_cnt;            /* Number of Traffic Classes */
+       unsigned int hash_table_size;   /* Hash Table Size */
+       unsigned int l3l4_filter_num;   /* Number of L3-L4 Filters */
+
+       /* HW Feature Register2 */
+       unsigned int rx_q_cnt;          /* Number of MTL Receive Queues */
+       unsigned int tx_q_cnt;          /* Number of MTL Transmit Queues */
+       unsigned int rx_ch_cnt;         /* Number of DMA Receive Channels */
+       unsigned int tx_ch_cnt;         /* Number of DMA Transmit Channels */
+       unsigned int pps_out_num;       /* Number of PPS outputs */
+       unsigned int aux_snap_num;      /* Number of Aux snapshot inputs */
+};
+
+struct xlgmac_resources {
+       void __iomem *addr;
+       int irq;
+};
+
+struct xlgmac_pdata {
+       struct net_device *netdev;
+       struct device *dev;
+
+       struct xlgmac_hw_ops hw_ops;
+       struct xlgmac_desc_ops desc_ops;
+
+       /* Device statistics */
+       struct xlgmac_stats stats;
+
+       u32 msg_enable;
+
+       /* MAC registers base */
+       void __iomem *mac_regs;
+
+       /* Hardware features of the device */
+       struct xlgmac_hw_features hw_feat;
+
+       struct work_struct restart_work;
+
+       /* Rings for Tx/Rx on a DMA channel */
+       struct xlgmac_channel *channel_head;
+       unsigned int channel_count;
+       unsigned int tx_ring_count;
+       unsigned int rx_ring_count;
+       unsigned int tx_desc_count;
+       unsigned int rx_desc_count;
+       unsigned int tx_q_count;
+       unsigned int rx_q_count;
+
+       /* Tx/Rx common settings */
+       unsigned int pblx8;
+
+       /* Tx settings */
+       unsigned int tx_sf_mode;
+       unsigned int tx_threshold;
+       unsigned int tx_pbl;
+       unsigned int tx_osp_mode;
+
+       /* Rx settings */
+       unsigned int rx_sf_mode;
+       unsigned int rx_threshold;
+       unsigned int rx_pbl;
+
+       /* Tx coalescing settings */
+       unsigned int tx_usecs;
+       unsigned int tx_frames;
+
+       /* Rx coalescing settings */
+       unsigned int rx_riwt;
+       unsigned int rx_usecs;
+       unsigned int rx_frames;
+
+       /* Current Rx buffer size */
+       unsigned int rx_buf_size;
+
+       /* Flow control settings */
+       unsigned int tx_pause;
+       unsigned int rx_pause;
+
+       /* Device interrupt number */
+       int dev_irq;
+       unsigned int per_channel_irq;
+       int channel_irq[XLGMAC_MAX_DMA_CHANNELS];
+
+       /* Netdev related settings */
+       unsigned char mac_addr[ETH_ALEN];
+       netdev_features_t netdev_features;
+       struct napi_struct napi;
+
+       /* Filtering support */
+       unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
+
+       /* Device clocks */
+       unsigned long sysclk_rate;
+
+       /* RSS addressing mutex */
+       struct mutex rss_mutex;
+
+       /* Receive Side Scaling settings */
+       u8 rss_key[XLGMAC_RSS_HASH_KEY_SIZE];
+       u32 rss_table[XLGMAC_RSS_MAX_TABLE_SIZE];
+       u32 rss_options;
+
+       int phy_speed;
+
+       char drv_name[32];
+       char drv_ver[32];
+};
+
+void xlgmac_init_desc_ops(struct xlgmac_desc_ops *desc_ops);
+void xlgmac_init_hw_ops(struct xlgmac_hw_ops *hw_ops);
+const struct net_device_ops *xlgmac_get_netdev_ops(void);
+void xlgmac_dump_tx_desc(struct xlgmac_pdata *pdata,
+                        struct xlgmac_ring *ring,
+                        unsigned int idx,
+                        unsigned int count,
+                        unsigned int flag);
+void xlgmac_dump_rx_desc(struct xlgmac_pdata *pdata,
+                        struct xlgmac_ring *ring,
+                        unsigned int idx);
+void xlgmac_print_pkt(struct net_device *netdev,
+                     struct sk_buff *skb, bool tx_rx);
+void xlgmac_get_all_hw_features(struct xlgmac_pdata *pdata);
+void xlgmac_print_all_hw_features(struct xlgmac_pdata *pdata);
+int xlgmac_drv_probe(struct device *dev,
+                    struct xlgmac_resources *res);
+int xlgmac_drv_remove(struct device *dev);
+
+/* For debug prints */
+#ifdef XLGMAC_DEBUG
+#define XLGMAC_PR(fmt, args...) \
+       pr_alert("[%s,%d]:" fmt, __func__, __LINE__, ## args)
+#else
+#define XLGMAC_PR(x...)                do { } while (0)
+#endif
+
+#endif /* __DWC_XLGMAC_H__ */
index f864fd0663dbf830bd94b1db4daa7a7b5ad3ce1d..711fbbbc4b1f724fcebdaec80eda2eec1eef8551 100644 (file)
@@ -2124,33 +2124,26 @@ static const char
 };
 
 /*
- * bdx_get_settings - get device-specific settings
+ * bdx_get_link_ksettings - get device-specific settings
  * @netdev
  * @ecmd
  */
-static int bdx_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
-{
-       u32 rdintcm;
-       u32 tdintcm;
-       struct bdx_priv *priv = netdev_priv(netdev);
-
-       rdintcm = priv->rdintcm;
-       tdintcm = priv->tdintcm;
-
-       ecmd->supported = (SUPPORTED_10000baseT_Full | SUPPORTED_FIBRE);
-       ecmd->advertising = (ADVERTISED_10000baseT_Full | ADVERTISED_FIBRE);
-       ethtool_cmd_speed_set(ecmd, SPEED_10000);
-       ecmd->duplex = DUPLEX_FULL;
-       ecmd->port = PORT_FIBRE;
-       ecmd->transceiver = XCVR_EXTERNAL;      /* what does it mean? */
-       ecmd->autoneg = AUTONEG_DISABLE;
-
-       /* PCK_TH measures in multiples of FIFO bytes
-          We translate to packets */
-       ecmd->maxtxpkt =
-           ((GET_PCK_TH(tdintcm) * PCK_TH_MULT) / BDX_TXF_DESC_SZ);
-       ecmd->maxrxpkt =
-           ((GET_PCK_TH(rdintcm) * PCK_TH_MULT) / sizeof(struct rxf_desc));
+static int bdx_get_link_ksettings(struct net_device *netdev,
+                                 struct ethtool_link_ksettings *ecmd)
+{
+       ethtool_link_ksettings_zero_link_mode(ecmd, supported);
+       ethtool_link_ksettings_add_link_mode(ecmd, supported,
+                                            10000baseT_Full);
+       ethtool_link_ksettings_add_link_mode(ecmd, supported, FIBRE);
+       ethtool_link_ksettings_zero_link_mode(ecmd, advertising);
+       ethtool_link_ksettings_add_link_mode(ecmd, advertising,
+                                            10000baseT_Full);
+       ethtool_link_ksettings_add_link_mode(ecmd, advertising, FIBRE);
+
+       ecmd->base.speed = SPEED_10000;
+       ecmd->base.duplex = DUPLEX_FULL;
+       ecmd->base.port = PORT_FIBRE;
+       ecmd->base.autoneg = AUTONEG_DISABLE;
 
        return 0;
 }
@@ -2384,7 +2377,6 @@ static void bdx_get_ethtool_stats(struct net_device *netdev,
 static void bdx_set_ethtool_ops(struct net_device *netdev)
 {
        static const struct ethtool_ops bdx_ethtool_ops = {
-               .get_settings = bdx_get_settings,
                .get_drvinfo = bdx_get_drvinfo,
                .get_link = ethtool_op_get_link,
                .get_coalesce = bdx_get_coalesce,
@@ -2394,6 +2386,7 @@ static void bdx_set_ethtool_ops(struct net_device *netdev)
                .get_strings = bdx_get_strings,
                .get_sset_count = bdx_get_sset_count,
                .get_ethtool_stats = bdx_get_ethtool_stats,
+               .get_link_ksettings = bdx_get_link_ksettings,
        };
 
        netdev->ethtool_ops = &bdx_ethtool_ops;
index 296c8efd0038c8f66f41e9a58e30920462272192..9e631952b86f3d4ddd9108e1fe0db7c96d8363d2 100644 (file)
@@ -74,15 +74,21 @@ config TI_CPSW
          will be called cpsw.
 
 config TI_CPTS
-       tristate "TI Common Platform Time Sync (CPTS) Support"
+       bool "TI Common Platform Time Sync (CPTS) Support"
        depends on TI_CPSW || TI_KEYSTONE_NETCP
-       imply PTP_1588_CLOCK
+       depends on PTP_1588_CLOCK
        ---help---
          This driver supports the Common Platform Time Sync unit of
          the CPSW Ethernet Switch and Keystone 2 1g/10g Switch Subsystem.
          The unit can time stamp PTP UDP/IPv4 and Layer 2 packets, and the
          driver offers a PTP Hardware Clock.
 
+config TI_CPTS_MOD
+       tristate
+       depends on TI_CPTS
+       default y if TI_CPSW=y || TI_KEYSTONE_NETCP=y
+       default m
+
 config TI_KEYSTONE_NETCP
        tristate "TI Keystone NETCP Core Support"
        select TI_CPSW_ALE
index 1e7c10bf87132cda8e9c7ef2afc118ba1725388e..10e6b0ce51baf3115b8c72d40e933f10873186f0 100644 (file)
@@ -12,7 +12,7 @@ obj-$(CONFIG_TI_DAVINCI_MDIO) += davinci_mdio.o
 obj-$(CONFIG_TI_DAVINCI_CPDMA) += davinci_cpdma.o
 obj-$(CONFIG_TI_CPSW_PHY_SEL) += cpsw-phy-sel.o
 obj-$(CONFIG_TI_CPSW_ALE) += cpsw_ale.o
-obj-$(CONFIG_TI_CPTS) += cpts.o
+obj-$(CONFIG_TI_CPTS_MOD) += cpts.o
 obj-$(CONFIG_TI_CPSW) += ti_cpsw.o
 ti_cpsw-y := cpsw.o
 
index 7c7ae0890e90c450c2228e44cf618cdfdedcceec..9027c9c509b581cda23c00006124ff3839a6ae2c 100644 (file)
@@ -1882,6 +1882,7 @@ static u16 netcp_select_queue(struct net_device *dev, struct sk_buff *skb,
 static int netcp_setup_tc(struct net_device *dev, u32 handle, __be16 proto,
                          struct tc_to_netdev *tc)
 {
+       u8 num_tc;
        int i;
 
        /* setup tc must be called under rtnl lock */
@@ -1890,15 +1891,18 @@ static int netcp_setup_tc(struct net_device *dev, u32 handle, __be16 proto,
        if (tc->type != TC_SETUP_MQPRIO)
                return -EINVAL;
 
+       tc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+       num_tc = tc->mqprio->num_tc;
+
        /* Sanity-check the number of traffic classes requested */
        if ((dev->real_num_tx_queues <= 1) ||
-           (dev->real_num_tx_queues < tc->tc))
+           (dev->real_num_tx_queues < num_tc))
                return -EINVAL;
 
        /* Configure traffic class to queue mappings */
-       if (tc->tc) {
-               netdev_set_num_tc(dev, tc->tc);
-               for (i = 0; i < tc->tc; i++)
+       if (num_tc) {
+               netdev_set_num_tc(dev, num_tc);
+               for (i = 0; i < num_tc; i++)
                        netdev_set_tc_queue(dev, i, 1, i);
        } else {
                netdev_reset_tc(dev);
index 72013314bba81fbbbb0f7d37da6143a9b0754b0c..fa6a06571187ed0d4f30a7001983b7f5fcbea018 100644 (file)
@@ -1206,61 +1206,68 @@ void gelic_net_get_drvinfo(struct net_device *netdev,
        strlcpy(info->version, DRV_VERSION, sizeof(info->version));
 }
 
-static int gelic_ether_get_settings(struct net_device *netdev,
-                                   struct ethtool_cmd *cmd)
+static int gelic_ether_get_link_ksettings(struct net_device *netdev,
+                                         struct ethtool_link_ksettings *cmd)
 {
        struct gelic_card *card = netdev_card(netdev);
+       u32 supported, advertising;
 
        gelic_card_get_ether_port_status(card, 0);
 
        if (card->ether_port_status & GELIC_LV1_ETHER_FULL_DUPLEX)
-               cmd->duplex = DUPLEX_FULL;
+               cmd->base.duplex = DUPLEX_FULL;
        else
-               cmd->duplex = DUPLEX_HALF;
+               cmd->base.duplex = DUPLEX_HALF;
 
        switch (card->ether_port_status & GELIC_LV1_ETHER_SPEED_MASK) {
        case GELIC_LV1_ETHER_SPEED_10:
-               ethtool_cmd_speed_set(cmd, SPEED_10);
+               cmd->base.speed = SPEED_10;
                break;
        case GELIC_LV1_ETHER_SPEED_100:
-               ethtool_cmd_speed_set(cmd, SPEED_100);
+               cmd->base.speed = SPEED_100;
                break;
        case GELIC_LV1_ETHER_SPEED_1000:
-               ethtool_cmd_speed_set(cmd, SPEED_1000);
+               cmd->base.speed = SPEED_1000;
                break;
        default:
                pr_info("%s: speed unknown\n", __func__);
-               ethtool_cmd_speed_set(cmd, SPEED_10);
+               cmd->base.speed = SPEED_10;
                break;
        }
 
-       cmd->supported = SUPPORTED_TP | SUPPORTED_Autoneg |
+       supported = SUPPORTED_TP | SUPPORTED_Autoneg |
                        SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full |
                        SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full |
                        SUPPORTED_1000baseT_Full;
-       cmd->advertising = cmd->supported;
+       advertising = supported;
        if (card->link_mode & GELIC_LV1_ETHER_AUTO_NEG) {
-               cmd->autoneg = AUTONEG_ENABLE;
+               cmd->base.autoneg = AUTONEG_ENABLE;
        } else {
-               cmd->autoneg = AUTONEG_DISABLE;
-               cmd->advertising &= ~ADVERTISED_Autoneg;
+               cmd->base.autoneg = AUTONEG_DISABLE;
+               advertising &= ~ADVERTISED_Autoneg;
        }
-       cmd->port = PORT_TP;
+       cmd->base.port = PORT_TP;
+
+       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+                                               supported);
+       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+                                               advertising);
 
        return 0;
 }
 
-static int gelic_ether_set_settings(struct net_device *netdev,
-                                   struct ethtool_cmd *cmd)
+static int
+gelic_ether_set_link_ksettings(struct net_device *netdev,
+                              const struct ethtool_link_ksettings *cmd)
 {
        struct gelic_card *card = netdev_card(netdev);
        u64 mode;
        int ret;
 
-       if (cmd->autoneg == AUTONEG_ENABLE) {
+       if (cmd->base.autoneg == AUTONEG_ENABLE) {
                mode = GELIC_LV1_ETHER_AUTO_NEG;
        } else {
-               switch (cmd->speed) {
+               switch (cmd->base.speed) {
                case SPEED_10:
                        mode = GELIC_LV1_ETHER_SPEED_10;
                        break;
@@ -1273,9 +1280,9 @@ static int gelic_ether_set_settings(struct net_device *netdev,
                default:
                        return -EINVAL;
                }
-               if (cmd->duplex == DUPLEX_FULL)
+               if (cmd->base.duplex == DUPLEX_FULL) {
                        mode |= GELIC_LV1_ETHER_FULL_DUPLEX;
-               else if (cmd->speed == SPEED_1000) {
+               } else if (cmd->base.speed == SPEED_1000) {
                        pr_info("1000 half duplex is not supported.\n");
                        return -EINVAL;
                }
@@ -1370,11 +1377,11 @@ done:
 
 static const struct ethtool_ops gelic_ether_ethtool_ops = {
        .get_drvinfo    = gelic_net_get_drvinfo,
-       .get_settings   = gelic_ether_get_settings,
-       .set_settings   = gelic_ether_set_settings,
        .get_link       = ethtool_op_get_link,
        .get_wol        = gelic_net_get_wol,
        .set_wol        = gelic_net_set_wol,
+       .get_link_ksettings = gelic_ether_get_link_ksettings,
+       .set_link_ksettings = gelic_ether_set_link_ksettings,
 };
 
 /**
index ffe519382e111a04dd37c904a6080cee561e2d47..16bd036d06820e9139d04f05355a5a4e5127509c 100644 (file)
@@ -47,19 +47,23 @@ static struct {
 };
 
 static int
-spider_net_ethtool_get_settings(struct net_device *netdev,
-                              struct ethtool_cmd *cmd)
+spider_net_ethtool_get_link_ksettings(struct net_device *netdev,
+                                     struct ethtool_link_ksettings *cmd)
 {
        struct spider_net_card *card;
        card = netdev_priv(netdev);
 
-       cmd->supported   = (SUPPORTED_1000baseT_Full |
-                            SUPPORTED_FIBRE);
-       cmd->advertising = (ADVERTISED_1000baseT_Full |
-                            ADVERTISED_FIBRE);
-       cmd->port = PORT_FIBRE;
-       ethtool_cmd_speed_set(cmd, card->phy.speed);
-       cmd->duplex = DUPLEX_FULL;
+       ethtool_link_ksettings_zero_link_mode(cmd, supported);
+       ethtool_link_ksettings_add_link_mode(cmd, supported, 1000baseT_Full);
+       ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE);
+
+       ethtool_link_ksettings_zero_link_mode(cmd, advertising);
+       ethtool_link_ksettings_add_link_mode(cmd, advertising, 1000baseT_Full);
+       ethtool_link_ksettings_add_link_mode(cmd, advertising, FIBRE);
+
+       cmd->base.port = PORT_FIBRE;
+       cmd->base.speed = card->phy.speed;
+       cmd->base.duplex = DUPLEX_FULL;
 
        return 0;
 }
@@ -166,7 +170,6 @@ static void spider_net_get_strings(struct net_device *netdev, u32 stringset,
 }
 
 const struct ethtool_ops spider_net_ethtool_ops = {
-       .get_settings           = spider_net_ethtool_get_settings,
        .get_drvinfo            = spider_net_ethtool_get_drvinfo,
        .get_wol                = spider_net_ethtool_get_wol,
        .get_msglevel           = spider_net_ethtool_get_msglevel,
@@ -177,5 +180,6 @@ const struct ethtool_ops spider_net_ethtool_ops = {
        .get_strings            = spider_net_get_strings,
        .get_sset_count         = spider_net_get_sset_count,
        .get_ethtool_stats      = spider_net_get_ethtool_stats,
+       .get_link_ksettings     = spider_net_ethtool_get_link_ksettings,
 };
 
index c5583991da4aa462652b5236992c7731529422db..5ac6eaa9e78510a2c28cf2506dd791ece82b3009 100644 (file)
@@ -1499,27 +1499,29 @@ static void tsi108_init_mac(struct net_device *dev)
        TSI_WRITE(TSI108_EC_INTMASK, ~0);
 }
 
-static int tsi108_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int tsi108_get_link_ksettings(struct net_device *dev,
+                                    struct ethtool_link_ksettings *cmd)
 {
        struct tsi108_prv_data *data = netdev_priv(dev);
        unsigned long flags;
        int rc;
 
        spin_lock_irqsave(&data->txlock, flags);
-       rc = mii_ethtool_gset(&data->mii_if, cmd);
+       rc = mii_ethtool_get_link_ksettings(&data->mii_if, cmd);
        spin_unlock_irqrestore(&data->txlock, flags);
 
        return rc;
 }
 
-static int tsi108_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int tsi108_set_link_ksettings(struct net_device *dev,
+                                    const struct ethtool_link_ksettings *cmd)
 {
        struct tsi108_prv_data *data = netdev_priv(dev);
        unsigned long flags;
        int rc;
 
        spin_lock_irqsave(&data->txlock, flags);
-       rc = mii_ethtool_sset(&data->mii_if, cmd);
+       rc = mii_ethtool_set_link_ksettings(&data->mii_if, cmd);
        spin_unlock_irqrestore(&data->txlock, flags);
 
        return rc;
@@ -1535,8 +1537,8 @@ static int tsi108_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 
 static const struct ethtool_ops tsi108_ethtool_ops = {
        .get_link       = ethtool_op_get_link,
-       .get_settings   = tsi108_get_settings,
-       .set_settings   = tsi108_set_settings,
+       .get_link_ksettings     = tsi108_get_link_ksettings,
+       .set_link_ksettings     = tsi108_set_link_ksettings,
 };
 
 static const struct net_device_ops tsi108_netdev_ops = {
index c068c58428f7611ddcd526010cb07f8ada61b760..4cf41f779d0ef4ef8d6cf2fa688abd97b3e8264d 100644 (file)
@@ -2303,25 +2303,27 @@ static void netdev_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *i
        strlcpy(info->bus_info, dev_name(hwdev), sizeof(info->bus_info));
 }
 
-static int netdev_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int netdev_get_link_ksettings(struct net_device *dev,
+                                    struct ethtool_link_ksettings *cmd)
 {
        struct rhine_private *rp = netdev_priv(dev);
        int rc;
 
        mutex_lock(&rp->task_lock);
-       rc = mii_ethtool_gset(&rp->mii_if, cmd);
+       rc = mii_ethtool_get_link_ksettings(&rp->mii_if, cmd);
        mutex_unlock(&rp->task_lock);
 
        return rc;
 }
 
-static int netdev_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int netdev_set_link_ksettings(struct net_device *dev,
+                                    const struct ethtool_link_ksettings *cmd)
 {
        struct rhine_private *rp = netdev_priv(dev);
        int rc;
 
        mutex_lock(&rp->task_lock);
-       rc = mii_ethtool_sset(&rp->mii_if, cmd);
+       rc = mii_ethtool_set_link_ksettings(&rp->mii_if, cmd);
        rhine_set_carrier(&rp->mii_if);
        mutex_unlock(&rp->task_lock);
 
@@ -2391,14 +2393,14 @@ static int rhine_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 
 static const struct ethtool_ops netdev_ethtool_ops = {
        .get_drvinfo            = netdev_get_drvinfo,
-       .get_settings           = netdev_get_settings,
-       .set_settings           = netdev_set_settings,
        .nway_reset             = netdev_nway_reset,
        .get_link               = netdev_get_link,
        .get_msglevel           = netdev_get_msglevel,
        .set_msglevel           = netdev_set_msglevel,
        .get_wol                = rhine_get_wol,
        .set_wol                = rhine_set_wol,
+       .get_link_ksettings     = netdev_get_link_ksettings,
+       .set_link_ksettings     = netdev_set_link_ksettings,
 };
 
 static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
index d088788b27a751286f7556b7f478b210c0ab68a5..ef9538ee53d0db7f43eae4298dd39258b4c39122 100644 (file)
@@ -3291,15 +3291,17 @@ static void velocity_ethtool_down(struct net_device *dev)
                velocity_set_power_state(vptr, PCI_D3hot);
 }
 
-static int velocity_get_settings(struct net_device *dev,
-                                struct ethtool_cmd *cmd)
+static int velocity_get_link_ksettings(struct net_device *dev,
+                                      struct ethtool_link_ksettings *cmd)
 {
        struct velocity_info *vptr = netdev_priv(dev);
        struct mac_regs __iomem *regs = vptr->mac_regs;
        u32 status;
+       u32 supported, advertising;
+
        status = check_connection_type(vptr->mac_regs);
 
-       cmd->supported = SUPPORTED_TP |
+       supported = SUPPORTED_TP |
                        SUPPORTED_Autoneg |
                        SUPPORTED_10baseT_Half |
                        SUPPORTED_10baseT_Full |
@@ -3308,9 +3310,9 @@ static int velocity_get_settings(struct net_device *dev,
                        SUPPORTED_1000baseT_Half |
                        SUPPORTED_1000baseT_Full;
 
-       cmd->advertising = ADVERTISED_TP | ADVERTISED_Autoneg;
+       advertising = ADVERTISED_TP | ADVERTISED_Autoneg;
        if (vptr->options.spd_dpx == SPD_DPX_AUTO) {
-               cmd->advertising |=
+               advertising |=
                        ADVERTISED_10baseT_Half |
                        ADVERTISED_10baseT_Full |
                        ADVERTISED_100baseT_Half |
@@ -3320,19 +3322,19 @@ static int velocity_get_settings(struct net_device *dev,
        } else {
                switch (vptr->options.spd_dpx) {
                case SPD_DPX_1000_FULL:
-                       cmd->advertising |= ADVERTISED_1000baseT_Full;
+                       advertising |= ADVERTISED_1000baseT_Full;
                        break;
                case SPD_DPX_100_HALF:
-                       cmd->advertising |= ADVERTISED_100baseT_Half;
+                       advertising |= ADVERTISED_100baseT_Half;
                        break;
                case SPD_DPX_100_FULL:
-                       cmd->advertising |= ADVERTISED_100baseT_Full;
+                       advertising |= ADVERTISED_100baseT_Full;
                        break;
                case SPD_DPX_10_HALF:
-                       cmd->advertising |= ADVERTISED_10baseT_Half;
+                       advertising |= ADVERTISED_10baseT_Half;
                        break;
                case SPD_DPX_10_FULL:
-                       cmd->advertising |= ADVERTISED_10baseT_Full;
+                       advertising |= ADVERTISED_10baseT_Full;
                        break;
                default:
                        break;
@@ -3340,30 +3342,35 @@ static int velocity_get_settings(struct net_device *dev,
        }
 
        if (status & VELOCITY_SPEED_1000)
-               ethtool_cmd_speed_set(cmd, SPEED_1000);
+               cmd->base.speed = SPEED_1000;
        else if (status & VELOCITY_SPEED_100)
-               ethtool_cmd_speed_set(cmd, SPEED_100);
+               cmd->base.speed = SPEED_100;
        else
-               ethtool_cmd_speed_set(cmd, SPEED_10);
+               cmd->base.speed = SPEED_10;
 
-       cmd->autoneg = (status & VELOCITY_AUTONEG_ENABLE) ? AUTONEG_ENABLE : AUTONEG_DISABLE;
-       cmd->port = PORT_TP;
-       cmd->transceiver = XCVR_INTERNAL;
-       cmd->phy_address = readb(&regs->MIIADR) & 0x1F;
+       cmd->base.autoneg = (status & VELOCITY_AUTONEG_ENABLE) ?
+               AUTONEG_ENABLE : AUTONEG_DISABLE;
+       cmd->base.port = PORT_TP;
+       cmd->base.phy_address = readb(&regs->MIIADR) & 0x1F;
 
        if (status & VELOCITY_DUPLEX_FULL)
-               cmd->duplex = DUPLEX_FULL;
+               cmd->base.duplex = DUPLEX_FULL;
        else
-               cmd->duplex = DUPLEX_HALF;
+               cmd->base.duplex = DUPLEX_HALF;
+
+       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+                                               supported);
+       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+                                               advertising);
 
        return 0;
 }
 
-static int velocity_set_settings(struct net_device *dev,
-                                struct ethtool_cmd *cmd)
+static int velocity_set_link_ksettings(struct net_device *dev,
+                                      const struct ethtool_link_ksettings *cmd)
 {
        struct velocity_info *vptr = netdev_priv(dev);
-       u32 speed = ethtool_cmd_speed(cmd);
+       u32 speed = cmd->base.speed;
        u32 curr_status;
        u32 new_status = 0;
        int ret = 0;
@@ -3371,11 +3378,12 @@ static int velocity_set_settings(struct net_device *dev,
        curr_status = check_connection_type(vptr->mac_regs);
        curr_status &= (~VELOCITY_LINK_FAIL);
 
-       new_status |= ((cmd->autoneg) ? VELOCITY_AUTONEG_ENABLE : 0);
+       new_status |= ((cmd->base.autoneg) ? VELOCITY_AUTONEG_ENABLE : 0);
        new_status |= ((speed == SPEED_1000) ? VELOCITY_SPEED_1000 : 0);
        new_status |= ((speed == SPEED_100) ? VELOCITY_SPEED_100 : 0);
        new_status |= ((speed == SPEED_10) ? VELOCITY_SPEED_10 : 0);
-       new_status |= ((cmd->duplex == DUPLEX_FULL) ? VELOCITY_DUPLEX_FULL : 0);
+       new_status |= ((cmd->base.duplex == DUPLEX_FULL) ?
+                      VELOCITY_DUPLEX_FULL : 0);
 
        if ((new_status & VELOCITY_AUTONEG_ENABLE) &&
            (new_status != (curr_status | VELOCITY_AUTONEG_ENABLE))) {
@@ -3644,8 +3652,6 @@ static void velocity_get_ethtool_stats(struct net_device *dev,
 }
 
 static const struct ethtool_ops velocity_ethtool_ops = {
-       .get_settings           = velocity_get_settings,
-       .set_settings           = velocity_set_settings,
        .get_drvinfo            = velocity_get_drvinfo,
        .get_wol                = velocity_ethtool_get_wol,
        .set_wol                = velocity_ethtool_set_wol,
@@ -3658,7 +3664,9 @@ static const struct ethtool_ops velocity_ethtool_ops = {
        .get_coalesce           = velocity_get_coalesce,
        .set_coalesce           = velocity_set_coalesce,
        .begin                  = velocity_ethtool_up,
-       .complete               = velocity_ethtool_down
+       .complete               = velocity_ethtool_down,
+       .get_link_ksettings     = velocity_get_link_ksettings,
+       .set_link_ksettings     = velocity_set_link_ksettings,
 };
 
 #if defined(CONFIG_PM) && defined(CONFIG_INET)
index b96e96919e31d2da791da73aeeaad6e85fd8c581..33c595f4691d93e3f2b73f85f9e92dba9dbead7f 100644 (file)
@@ -301,7 +301,7 @@ static void axienet_set_mac_address(struct net_device *ndev,
        if (address)
                memcpy(ndev->dev_addr, address, ETH_ALEN);
        if (!is_valid_ether_addr(ndev->dev_addr))
-               eth_random_addr(ndev->dev_addr);
+               eth_hw_addr_random(ndev);
 
        /* Set up unicast MAC address filter set its mac address */
        axienet_iow(lp, XAE_UAW0_OFFSET,
index 6575f880f1be52fb9daa91ea7f88eb3199b614f5..7d101714c2ef4cc38201971d18b4b8f8bac1b8cd 100644 (file)
@@ -175,16 +175,15 @@ static void fjes_get_drvinfo(struct net_device *netdev,
                 "platform:%s", plat_dev->name);
 }
 
-static int fjes_get_settings(struct net_device *netdev,
-                            struct ethtool_cmd *ecmd)
+static int fjes_get_link_ksettings(struct net_device *netdev,
+                                  struct ethtool_link_ksettings *ecmd)
 {
-       ecmd->supported = 0;
-       ecmd->advertising = 0;
-       ecmd->duplex = DUPLEX_FULL;
-       ecmd->autoneg = AUTONEG_DISABLE;
-       ecmd->transceiver = XCVR_DUMMY1;
-       ecmd->port = PORT_NONE;
-       ethtool_cmd_speed_set(ecmd, 20000);     /* 20Gb/s */
+       ethtool_link_ksettings_zero_link_mode(ecmd, supported);
+       ethtool_link_ksettings_zero_link_mode(ecmd, advertising);
+       ecmd->base.duplex = DUPLEX_FULL;
+       ecmd->base.autoneg = AUTONEG_DISABLE;
+       ecmd->base.port = PORT_NONE;
+       ecmd->base.speed = 20000;       /* 20Gb/s */
 
        return 0;
 }
@@ -296,7 +295,6 @@ static int fjes_get_dump_data(struct net_device *netdev,
 }
 
 static const struct ethtool_ops fjes_ethtool_ops = {
-               .get_settings           = fjes_get_settings,
                .get_drvinfo            = fjes_get_drvinfo,
                .get_ethtool_stats = fjes_get_ethtool_stats,
                .get_strings      = fjes_get_strings,
@@ -306,6 +304,7 @@ static const struct ethtool_ops fjes_ethtool_ops = {
                .set_dump               = fjes_set_dump,
                .get_dump_flag          = fjes_get_dump_flag,
                .get_dump_data          = fjes_get_dump_data,
+               .get_link_ksettings     = fjes_get_link_ksettings,
 };
 
 void fjes_set_ethtool_ops(struct net_device *netdev)
index c4b3c4b77a9c1b36e997033529f2de3cb091acb4..ae48c809bac9fe13b0a92e086f0a1c6a4cf6feaf 100644 (file)
@@ -45,6 +45,8 @@ MODULE_DESCRIPTION("FUJITSU Extended Socket Network Device Driver");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(DRV_VERSION);
 
+#define ACPI_MOTHERBOARD_RESOURCE_HID "PNP0C02"
+
 static int fjes_request_irq(struct fjes_adapter *);
 static void fjes_free_irq(struct fjes_adapter *);
 
@@ -78,7 +80,7 @@ static void fjes_rx_irq(struct fjes_adapter *, int);
 static int fjes_poll(struct napi_struct *, int);
 
 static const struct acpi_device_id fjes_acpi_ids[] = {
-       {"PNP0C02", 0},
+       {ACPI_MOTHERBOARD_RESOURCE_HID, 0},
        {"", 0},
 };
 MODULE_DEVICE_TABLE(acpi, fjes_acpi_ids);
@@ -115,18 +117,17 @@ static struct resource fjes_resource[] = {
        },
 };
 
-static int fjes_acpi_add(struct acpi_device *device)
+static bool is_extended_socket_device(struct acpi_device *device)
 {
        struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL};
        char str_buf[sizeof(FJES_ACPI_SYMBOL) + 1];
-       struct platform_device *plat_dev;
        union acpi_object *str;
        acpi_status status;
        int result;
 
        status = acpi_evaluate_object(device->handle, "_STR", NULL, &buffer);
        if (ACPI_FAILURE(status))
-               return -ENODEV;
+               return false;
 
        str = buffer.pointer;
        result = utf16s_to_utf8s((wchar_t *)str->string.pointer,
@@ -136,10 +137,42 @@ static int fjes_acpi_add(struct acpi_device *device)
 
        if (strncmp(FJES_ACPI_SYMBOL, str_buf, strlen(FJES_ACPI_SYMBOL)) != 0) {
                kfree(buffer.pointer);
-               return -ENODEV;
+               return false;
        }
        kfree(buffer.pointer);
 
+       return true;
+}
+
+static int acpi_check_extended_socket_status(struct acpi_device *device)
+{
+       unsigned long long sta;
+       acpi_status status;
+
+       status = acpi_evaluate_integer(device->handle, "_STA", NULL, &sta);
+       if (ACPI_FAILURE(status))
+               return -ENODEV;
+
+       if (!((sta & ACPI_STA_DEVICE_PRESENT) &&
+             (sta & ACPI_STA_DEVICE_ENABLED) &&
+             (sta & ACPI_STA_DEVICE_UI) &&
+             (sta & ACPI_STA_DEVICE_FUNCTIONING)))
+               return -ENODEV;
+
+       return 0;
+}
+
+static int fjes_acpi_add(struct acpi_device *device)
+{
+       struct platform_device *plat_dev;
+       acpi_status status;
+
+       if (!is_extended_socket_device(device))
+               return -ENODEV;
+
+       if (acpi_check_extended_socket_status(device))
+               return -ENODEV;
+
        status = acpi_walk_resources(device->handle, METHOD_NAME__CRS,
                                     fjes_get_acpi_resource, fjes_resource);
        if (ACPI_FAILURE(status))
@@ -1473,11 +1506,44 @@ static void fjes_watch_unshare_task(struct work_struct *work)
        }
 }
 
+static acpi_status
+acpi_find_extended_socket_device(acpi_handle obj_handle, u32 level,
+                                void *context, void **return_value)
+{
+       struct acpi_device *device;
+       bool *found = context;
+       int result;
+
+       result = acpi_bus_get_device(obj_handle, &device);
+       if (result)
+               return AE_OK;
+
+       if (strcmp(acpi_device_hid(device), ACPI_MOTHERBOARD_RESOURCE_HID))
+               return AE_OK;
+
+       if (!is_extended_socket_device(device))
+               return AE_OK;
+
+       if (acpi_check_extended_socket_status(device))
+               return AE_OK;
+
+       *found = true;
+       return AE_CTRL_TERMINATE;
+}
+
 /* fjes_init_module - Driver Registration Routine */
 static int __init fjes_init_module(void)
 {
+       bool found = false;
        int result;
 
+       acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX,
+                           acpi_find_extended_socket_device, NULL, &found,
+                           NULL);
+
+       if (!found)
+               return -ENODEV;
+
        pr_info("%s - version %s - %s\n",
                fjes_driver_string, fjes_driver_version, fjes_copyright);
 
index 89698741682f41e2ea70868715ffcb4f4784e258..4fea1b3dfbb4457247c4bc98dc9378591c36ecd3 100644 (file)
@@ -56,7 +56,10 @@ struct pdp_ctx {
        u16                     af;
 
        struct in_addr          ms_addr_ip4;
-       struct in_addr          sgsn_addr_ip4;
+       struct in_addr          peer_addr_ip4;
+
+       struct sock             *sk;
+       struct net_device       *dev;
 
        atomic_t                tx_seq;
        struct rcu_head         rcu_head;
@@ -66,11 +69,12 @@ struct pdp_ctx {
 struct gtp_dev {
        struct list_head        list;
 
-       struct socket           *sock0;
-       struct socket           *sock1u;
+       struct sock             *sk0;
+       struct sock             *sk1u;
 
        struct net_device       *dev;
 
+       unsigned int            role;
        unsigned int            hash_size;
        struct hlist_head       *tid_hash;
        struct hlist_head       *addr_hash;
@@ -84,6 +88,8 @@ struct gtp_net {
 
 static u32 gtp_h_initval;
 
+static void pdp_context_delete(struct pdp_ctx *pctx);
+
 static inline u32 gtp0_hashfn(u64 tid)
 {
        u32 *tid32 = (u32 *) &tid;
@@ -149,8 +155,8 @@ static struct pdp_ctx *ipv4_pdp_find(struct gtp_dev *gtp, __be32 ms_addr)
        return NULL;
 }
 
-static bool gtp_check_src_ms_ipv4(struct sk_buff *skb, struct pdp_ctx *pctx,
-                                 unsigned int hdrlen)
+static bool gtp_check_ms_ipv4(struct sk_buff *skb, struct pdp_ctx *pctx,
+                                 unsigned int hdrlen, unsigned int role)
 {
        struct iphdr *iph;
 
@@ -159,25 +165,62 @@ static bool gtp_check_src_ms_ipv4(struct sk_buff *skb, struct pdp_ctx *pctx,
 
        iph = (struct iphdr *)(skb->data + hdrlen);
 
-       return iph->saddr == pctx->ms_addr_ip4.s_addr;
+       if (role == GTP_ROLE_SGSN)
+               return iph->daddr == pctx->ms_addr_ip4.s_addr;
+       else
+               return iph->saddr == pctx->ms_addr_ip4.s_addr;
 }
 
-/* Check if the inner IP source address in this packet is assigned to any
+/* Check if the inner IP address in this packet is assigned to any
  * existing mobile subscriber.
  */
-static bool gtp_check_src_ms(struct sk_buff *skb, struct pdp_ctx *pctx,
-                            unsigned int hdrlen)
+static bool gtp_check_ms(struct sk_buff *skb, struct pdp_ctx *pctx,
+                            unsigned int hdrlen, unsigned int role)
 {
        switch (ntohs(skb->protocol)) {
        case ETH_P_IP:
-               return gtp_check_src_ms_ipv4(skb, pctx, hdrlen);
+               return gtp_check_ms_ipv4(skb, pctx, hdrlen, role);
        }
        return false;
 }
 
+static int gtp_rx(struct pdp_ctx *pctx, struct sk_buff *skb,
+                       unsigned int hdrlen, unsigned int role)
+{
+       struct pcpu_sw_netstats *stats;
+
+       if (!gtp_check_ms(skb, pctx, hdrlen, role)) {
+               netdev_dbg(pctx->dev, "No PDP ctx for this MS\n");
+               return 1;
+       }
+
+       /* Get rid of the GTP + UDP headers. */
+       if (iptunnel_pull_header(skb, hdrlen, skb->protocol,
+                                !net_eq(sock_net(pctx->sk), dev_net(pctx->dev))))
+               return -1;
+
+       netdev_dbg(pctx->dev, "forwarding packet from GGSN to uplink\n");
+
+       /* Now that the UDP and the GTP header have been removed, set up the
+        * new network header. This is required by the upper layer to
+        * calculate the transport header.
+        */
+       skb_reset_network_header(skb);
+
+       skb->dev = pctx->dev;
+
+       stats = this_cpu_ptr(pctx->dev->tstats);
+       u64_stats_update_begin(&stats->syncp);
+       stats->rx_packets++;
+       stats->rx_bytes += skb->len;
+       u64_stats_update_end(&stats->syncp);
+
+       netif_rx(skb);
+       return 0;
+}
+
 /* 1 means pass up to the stack, -1 means drop and 0 means decapsulated. */
-static int gtp0_udp_encap_recv(struct gtp_dev *gtp, struct sk_buff *skb,
-                              bool xnet)
+static int gtp0_udp_encap_recv(struct gtp_dev *gtp, struct sk_buff *skb)
 {
        unsigned int hdrlen = sizeof(struct udphdr) +
                              sizeof(struct gtp0_header);
@@ -201,17 +244,10 @@ static int gtp0_udp_encap_recv(struct gtp_dev *gtp, struct sk_buff *skb,
                return 1;
        }
 
-       if (!gtp_check_src_ms(skb, pctx, hdrlen)) {
-               netdev_dbg(gtp->dev, "No PDP ctx for this MS\n");
-               return 1;
-       }
-
-       /* Get rid of the GTP + UDP headers. */
-       return iptunnel_pull_header(skb, hdrlen, skb->protocol, xnet);
+       return gtp_rx(pctx, skb, hdrlen, gtp->role);
 }
 
-static int gtp1u_udp_encap_recv(struct gtp_dev *gtp, struct sk_buff *skb,
-                               bool xnet)
+static int gtp1u_udp_encap_recv(struct gtp_dev *gtp, struct sk_buff *skb)
 {
        unsigned int hdrlen = sizeof(struct udphdr) +
                              sizeof(struct gtp1_header);
@@ -250,37 +286,33 @@ static int gtp1u_udp_encap_recv(struct gtp_dev *gtp, struct sk_buff *skb,
                return 1;
        }
 
-       if (!gtp_check_src_ms(skb, pctx, hdrlen)) {
-               netdev_dbg(gtp->dev, "No PDP ctx for this MS\n");
-               return 1;
-       }
-
-       /* Get rid of the GTP + UDP headers. */
-       return iptunnel_pull_header(skb, hdrlen, skb->protocol, xnet);
+       return gtp_rx(pctx, skb, hdrlen, gtp->role);
 }
 
-static void gtp_encap_disable(struct gtp_dev *gtp)
+static void gtp_encap_destroy(struct sock *sk)
 {
-       if (gtp->sock0 && gtp->sock0->sk) {
-               udp_sk(gtp->sock0->sk)->encap_type = 0;
-               rcu_assign_sk_user_data(gtp->sock0->sk, NULL);
-       }
-       if (gtp->sock1u && gtp->sock1u->sk) {
-               udp_sk(gtp->sock1u->sk)->encap_type = 0;
-               rcu_assign_sk_user_data(gtp->sock1u->sk, NULL);
-       }
+       struct gtp_dev *gtp;
 
-       gtp->sock0 = NULL;
-       gtp->sock1u = NULL;
+       gtp = rcu_dereference_sk_user_data(sk);
+       if (gtp) {
+               udp_sk(sk)->encap_type = 0;
+               rcu_assign_sk_user_data(sk, NULL);
+               sock_put(sk);
+       }
 }
 
-static void gtp_encap_destroy(struct sock *sk)
+static void gtp_encap_disable_sock(struct sock *sk)
 {
-       struct gtp_dev *gtp;
+       if (!sk)
+               return;
 
-       gtp = rcu_dereference_sk_user_data(sk);
-       if (gtp)
-               gtp_encap_disable(gtp);
+       gtp_encap_destroy(sk);
+}
+
+static void gtp_encap_disable(struct gtp_dev *gtp)
+{
+       gtp_encap_disable_sock(gtp->sk0);
+       gtp_encap_disable_sock(gtp->sk1u);
 }
 
 /* UDP encapsulation receive handler. See net/ipv4/udp.c.
@@ -288,10 +320,8 @@ static void gtp_encap_destroy(struct sock *sk)
  */
 static int gtp_encap_recv(struct sock *sk, struct sk_buff *skb)
 {
-       struct pcpu_sw_netstats *stats;
        struct gtp_dev *gtp;
-       bool xnet;
-       int ret;
+       int ret = 0;
 
        gtp = rcu_dereference_sk_user_data(sk);
        if (!gtp)
@@ -299,16 +329,14 @@ static int gtp_encap_recv(struct sock *sk, struct sk_buff *skb)
 
        netdev_dbg(gtp->dev, "encap_recv sk=%p\n", sk);
 
-       xnet = !net_eq(sock_net(sk), dev_net(gtp->dev));
-
        switch (udp_sk(sk)->encap_type) {
        case UDP_ENCAP_GTP0:
                netdev_dbg(gtp->dev, "received GTP0 packet\n");
-               ret = gtp0_udp_encap_recv(gtp, skb, xnet);
+               ret = gtp0_udp_encap_recv(gtp, skb);
                break;
        case UDP_ENCAP_GTP1U:
                netdev_dbg(gtp->dev, "received GTP1U packet\n");
-               ret = gtp1u_udp_encap_recv(gtp, skb, xnet);
+               ret = gtp1u_udp_encap_recv(gtp, skb);
                break;
        default:
                ret = -1; /* Shouldn't happen. */
@@ -317,33 +345,17 @@ static int gtp_encap_recv(struct sock *sk, struct sk_buff *skb)
        switch (ret) {
        case 1:
                netdev_dbg(gtp->dev, "pass up to the process\n");
-               return 1;
+               break;
        case 0:
-               netdev_dbg(gtp->dev, "forwarding packet from GGSN to uplink\n");
                break;
        case -1:
                netdev_dbg(gtp->dev, "GTP packet has been dropped\n");
                kfree_skb(skb);
-               return 0;
+               ret = 0;
+               break;
        }
 
-       /* Now that the UDP and the GTP header have been removed, set up the
-        * new network header. This is required by the upper layer to
-        * calculate the transport header.
-        */
-       skb_reset_network_header(skb);
-
-       skb->dev = gtp->dev;
-
-       stats = this_cpu_ptr(gtp->dev->tstats);
-       u64_stats_update_begin(&stats->syncp);
-       stats->rx_packets++;
-       stats->rx_bytes += skb->len;
-       u64_stats_update_end(&stats->syncp);
-
-       netif_rx(skb);
-
-       return 0;
+       return ret;
 }
 
 static int gtp_dev_init(struct net_device *dev)
@@ -367,8 +379,9 @@ static void gtp_dev_uninit(struct net_device *dev)
        free_percpu(dev->tstats);
 }
 
-static struct rtable *ip4_route_output_gtp(struct net *net, struct flowi4 *fl4,
-                                          const struct sock *sk, __be32 daddr)
+static struct rtable *ip4_route_output_gtp(struct flowi4 *fl4,
+                                          const struct sock *sk,
+                                          __be32 daddr)
 {
        memset(fl4, 0, sizeof(*fl4));
        fl4->flowi4_oif         = sk->sk_bound_dev_if;
@@ -377,7 +390,7 @@ static struct rtable *ip4_route_output_gtp(struct net *net, struct flowi4 *fl4,
        fl4->flowi4_tos         = RT_CONN_FLAGS(sk);
        fl4->flowi4_proto       = sk->sk_protocol;
 
-       return ip_route_output_key(net, fl4);
+       return ip_route_output_key(sock_net(sk), fl4);
 }
 
 static inline void gtp0_push_header(struct sk_buff *skb, struct pdp_ctx *pctx)
@@ -466,7 +479,6 @@ static int gtp_build_skb_ip4(struct sk_buff *skb, struct net_device *dev,
        struct rtable *rt;
        struct flowi4 fl4;
        struct iphdr *iph;
-       struct sock *sk;
        __be16 df;
        int mtu;
 
@@ -474,7 +486,11 @@ static int gtp_build_skb_ip4(struct sk_buff *skb, struct net_device *dev,
         * Prepend PDP header with TEI/TID from PDP ctx.
         */
        iph = ip_hdr(skb);
-       pctx = ipv4_pdp_find(gtp, iph->daddr);
+       if (gtp->role == GTP_ROLE_SGSN)
+               pctx = ipv4_pdp_find(gtp, iph->saddr);
+       else
+               pctx = ipv4_pdp_find(gtp, iph->daddr);
+
        if (!pctx) {
                netdev_dbg(dev, "no PDP ctx found for %pI4, skip\n",
                           &iph->daddr);
@@ -482,40 +498,17 @@ static int gtp_build_skb_ip4(struct sk_buff *skb, struct net_device *dev,
        }
        netdev_dbg(dev, "found PDP context %p\n", pctx);
 
-       switch (pctx->gtp_version) {
-       case GTP_V0:
-               if (gtp->sock0)
-                       sk = gtp->sock0->sk;
-               else
-                       sk = NULL;
-               break;
-       case GTP_V1:
-               if (gtp->sock1u)
-                       sk = gtp->sock1u->sk;
-               else
-                       sk = NULL;
-               break;
-       default:
-               return -ENOENT;
-       }
-
-       if (!sk) {
-               netdev_dbg(dev, "no userspace socket is available, skip\n");
-               return -ENOENT;
-       }
-
-       rt = ip4_route_output_gtp(sock_net(sk), &fl4, gtp->sock0->sk,
-                                 pctx->sgsn_addr_ip4.s_addr);
+       rt = ip4_route_output_gtp(&fl4, pctx->sk, pctx->peer_addr_ip4.s_addr);
        if (IS_ERR(rt)) {
                netdev_dbg(dev, "no route to SSGN %pI4\n",
-                          &pctx->sgsn_addr_ip4.s_addr);
+                          &pctx->peer_addr_ip4.s_addr);
                dev->stats.tx_carrier_errors++;
                goto err;
        }
 
        if (rt->dst.dev == dev) {
                netdev_dbg(dev, "circular route to SSGN %pI4\n",
-                          &pctx->sgsn_addr_ip4.s_addr);
+                          &pctx->peer_addr_ip4.s_addr);
                dev->stats.collisions++;
                goto err_rt;
        }
@@ -550,7 +543,7 @@ static int gtp_build_skb_ip4(struct sk_buff *skb, struct net_device *dev,
                goto err_rt;
        }
 
-       gtp_set_pktinfo_ipv4(pktinfo, sk, iph, pctx, rt, &fl4, dev);
+       gtp_set_pktinfo_ipv4(pktinfo, pctx->sk, iph, pctx, rt, &fl4, dev);
        gtp_push_header(skb, pktinfo);
 
        return 0;
@@ -640,27 +633,23 @@ static void gtp_link_setup(struct net_device *dev)
 
 static int gtp_hashtable_new(struct gtp_dev *gtp, int hsize);
 static void gtp_hashtable_free(struct gtp_dev *gtp);
-static int gtp_encap_enable(struct net_device *dev, struct gtp_dev *gtp,
-                           int fd_gtp0, int fd_gtp1);
+static int gtp_encap_enable(struct gtp_dev *gtp, struct nlattr *data[]);
 
 static int gtp_newlink(struct net *src_net, struct net_device *dev,
                        struct nlattr *tb[], struct nlattr *data[])
 {
-       int hashsize, err, fd0, fd1;
        struct gtp_dev *gtp;
        struct gtp_net *gn;
+       int hashsize, err;
 
-       if (!data[IFLA_GTP_FD0] || !data[IFLA_GTP_FD1])
+       if (!data[IFLA_GTP_FD0] && !data[IFLA_GTP_FD1])
                return -EINVAL;
 
        gtp = netdev_priv(dev);
 
-       fd0 = nla_get_u32(data[IFLA_GTP_FD0]);
-       fd1 = nla_get_u32(data[IFLA_GTP_FD1]);
-
-       err = gtp_encap_enable(dev, gtp, fd0, fd1);
+       err = gtp_encap_enable(gtp, data);
        if (err < 0)
-               goto out_err;
+               return err;
 
        if (!data[IFLA_GTP_PDP_HASHSIZE])
                hashsize = 1024;
@@ -688,7 +677,6 @@ out_hashtable:
        gtp_hashtable_free(gtp);
 out_encap:
        gtp_encap_disable(gtp);
-out_err:
        return err;
 }
 
@@ -706,6 +694,7 @@ static const struct nla_policy gtp_policy[IFLA_GTP_MAX + 1] = {
        [IFLA_GTP_FD0]                  = { .type = NLA_U32 },
        [IFLA_GTP_FD1]                  = { .type = NLA_U32 },
        [IFLA_GTP_PDP_HASHSIZE]         = { .type = NLA_U32 },
+       [IFLA_GTP_ROLE]                 = { .type = NLA_U32 },
 };
 
 static int gtp_validate(struct nlattr *tb[], struct nlattr *data[])
@@ -747,21 +736,6 @@ static struct rtnl_link_ops gtp_link_ops __read_mostly = {
        .fill_info      = gtp_fill_info,
 };
 
-static struct net *gtp_genl_get_net(struct net *src_net, struct nlattr *tb[])
-{
-       struct net *net;
-
-       /* Examine the link attributes and figure out which network namespace
-        * we are talking about.
-        */
-       if (tb[GTPA_NET_NS_FD])
-               net = get_net_ns_by_fd(nla_get_u32(tb[GTPA_NET_NS_FD]));
-       else
-               net = get_net(src_net);
-
-       return net;
-}
-
 static int gtp_hashtable_new(struct gtp_dev *gtp, int hsize)
 {
        int i;
@@ -791,93 +765,127 @@ static void gtp_hashtable_free(struct gtp_dev *gtp)
        struct pdp_ctx *pctx;
        int i;
 
-       for (i = 0; i < gtp->hash_size; i++) {
-               hlist_for_each_entry_rcu(pctx, &gtp->tid_hash[i], hlist_tid) {
-                       hlist_del_rcu(&pctx->hlist_tid);
-                       hlist_del_rcu(&pctx->hlist_addr);
-                       kfree_rcu(pctx, rcu_head);
-               }
-       }
+       for (i = 0; i < gtp->hash_size; i++)
+               hlist_for_each_entry_rcu(pctx, &gtp->tid_hash[i], hlist_tid)
+                       pdp_context_delete(pctx);
+
        synchronize_rcu();
        kfree(gtp->addr_hash);
        kfree(gtp->tid_hash);
 }
 
-static int gtp_encap_enable(struct net_device *dev, struct gtp_dev *gtp,
-                           int fd_gtp0, int fd_gtp1)
+static struct sock *gtp_encap_enable_socket(int fd, int type,
+                                           struct gtp_dev *gtp)
 {
        struct udp_tunnel_sock_cfg tuncfg = {NULL};
-       struct socket *sock0, *sock1u;
+       struct socket *sock;
+       struct sock *sk;
        int err;
 
-       netdev_dbg(dev, "enable gtp on %d, %d\n", fd_gtp0, fd_gtp1);
-
-       sock0 = sockfd_lookup(fd_gtp0, &err);
-       if (sock0 == NULL) {
-               netdev_dbg(dev, "socket fd=%d not found (gtp0)\n", fd_gtp0);
-               return -ENOENT;
-       }
+       pr_debug("enable gtp on %d, %d\n", fd, type);
 
-       if (sock0->sk->sk_protocol != IPPROTO_UDP) {
-               netdev_dbg(dev, "socket fd=%d not UDP\n", fd_gtp0);
-               err = -EINVAL;
-               goto err1;
+       sock = sockfd_lookup(fd, &err);
+       if (!sock) {
+               pr_debug("gtp socket fd=%d not found\n", fd);
+               return NULL;
        }
 
-       sock1u = sockfd_lookup(fd_gtp1, &err);
-       if (sock1u == NULL) {
-               netdev_dbg(dev, "socket fd=%d not found (gtp1u)\n", fd_gtp1);
-               err = -ENOENT;
-               goto err1;
+       if (sock->sk->sk_protocol != IPPROTO_UDP) {
+               pr_debug("socket fd=%d not UDP\n", fd);
+               sk = ERR_PTR(-EINVAL);
+               goto out_sock;
        }
 
-       if (sock1u->sk->sk_protocol != IPPROTO_UDP) {
-               netdev_dbg(dev, "socket fd=%d not UDP\n", fd_gtp1);
-               err = -EINVAL;
-               goto err2;
+       if (rcu_dereference_sk_user_data(sock->sk)) {
+               sk = ERR_PTR(-EBUSY);
+               goto out_sock;
        }
 
-       netdev_dbg(dev, "enable gtp on %p, %p\n", sock0, sock1u);
-
-       gtp->sock0 = sock0;
-       gtp->sock1u = sock1u;
+       sk = sock->sk;
+       sock_hold(sk);
 
        tuncfg.sk_user_data = gtp;
+       tuncfg.encap_type = type;
        tuncfg.encap_rcv = gtp_encap_recv;
        tuncfg.encap_destroy = gtp_encap_destroy;
 
-       tuncfg.encap_type = UDP_ENCAP_GTP0;
-       setup_udp_tunnel_sock(sock_net(gtp->sock0->sk), gtp->sock0, &tuncfg);
-
-       tuncfg.encap_type = UDP_ENCAP_GTP1U;
-       setup_udp_tunnel_sock(sock_net(gtp->sock1u->sk), gtp->sock1u, &tuncfg);
+       setup_udp_tunnel_sock(sock_net(sock->sk), sock, &tuncfg);
 
-       err = 0;
-err2:
-       sockfd_put(sock1u);
-err1:
-       sockfd_put(sock0);
-       return err;
+out_sock:
+       sockfd_put(sock);
+       return sk;
 }
 
-static struct net_device *gtp_find_dev(struct net *net, int ifindex)
+static int gtp_encap_enable(struct gtp_dev *gtp, struct nlattr *data[])
 {
-       struct gtp_net *gn = net_generic(net, gtp_net_id);
-       struct gtp_dev *gtp;
+       struct sock *sk1u = NULL;
+       struct sock *sk0 = NULL;
+       unsigned int role = GTP_ROLE_GGSN;
 
-       list_for_each_entry_rcu(gtp, &gn->gtp_dev_list, list) {
-               if (ifindex == gtp->dev->ifindex)
-                       return gtp->dev;
+       if (data[IFLA_GTP_FD0]) {
+               u32 fd0 = nla_get_u32(data[IFLA_GTP_FD0]);
+
+               sk0 = gtp_encap_enable_socket(fd0, UDP_ENCAP_GTP0, gtp);
+               if (IS_ERR(sk0))
+                       return PTR_ERR(sk0);
        }
-       return NULL;
+
+       if (data[IFLA_GTP_FD1]) {
+               u32 fd1 = nla_get_u32(data[IFLA_GTP_FD1]);
+
+               sk1u = gtp_encap_enable_socket(fd1, UDP_ENCAP_GTP1U, gtp);
+               if (IS_ERR(sk1u)) {
+                       if (sk0)
+                               gtp_encap_disable_sock(sk0);
+                       return PTR_ERR(sk1u);
+               }
+       }
+
+       if (data[IFLA_GTP_ROLE]) {
+               role = nla_get_u32(data[IFLA_GTP_ROLE]);
+               if (role > GTP_ROLE_SGSN)
+                       return -EINVAL;
+       }
+
+       gtp->sk0 = sk0;
+       gtp->sk1u = sk1u;
+       gtp->role = role;
+
+       return 0;
+}
+
+static struct gtp_dev *gtp_find_dev(struct net *src_net, struct nlattr *nla[])
+{
+       struct gtp_dev *gtp = NULL;
+       struct net_device *dev;
+       struct net *net;
+
+       /* Examine the link attributes and figure out which network namespace
+        * we are talking about.
+        */
+       if (nla[GTPA_NET_NS_FD])
+               net = get_net_ns_by_fd(nla_get_u32(nla[GTPA_NET_NS_FD]));
+       else
+               net = get_net(src_net);
+
+       if (IS_ERR(net))
+               return NULL;
+
+       /* Check if there's an existing gtpX device to configure */
+       dev = dev_get_by_index_rcu(net, nla_get_u32(nla[GTPA_LINK]));
+       if (dev->netdev_ops == &gtp_netdev_ops)
+               gtp = netdev_priv(dev);
+
+       put_net(net);
+       return gtp;
 }
 
 static void ipv4_pdp_fill(struct pdp_ctx *pctx, struct genl_info *info)
 {
        pctx->gtp_version = nla_get_u32(info->attrs[GTPA_VERSION]);
        pctx->af = AF_INET;
-       pctx->sgsn_addr_ip4.s_addr =
-               nla_get_be32(info->attrs[GTPA_SGSN_ADDRESS]);
+       pctx->peer_addr_ip4.s_addr =
+               nla_get_be32(info->attrs[GTPA_PEER_ADDRESS]);
        pctx->ms_addr_ip4.s_addr =
                nla_get_be32(info->attrs[GTPA_MS_ADDRESS]);
 
@@ -899,9 +907,10 @@ static void ipv4_pdp_fill(struct pdp_ctx *pctx, struct genl_info *info)
        }
 }
 
-static int ipv4_pdp_add(struct net_device *dev, struct genl_info *info)
+static int ipv4_pdp_add(struct gtp_dev *gtp, struct sock *sk,
+                       struct genl_info *info)
 {
-       struct gtp_dev *gtp = netdev_priv(dev);
+       struct net_device *dev = gtp->dev;
        u32 hash_ms, hash_tid = 0;
        struct pdp_ctx *pctx;
        bool found = false;
@@ -940,6 +949,9 @@ static int ipv4_pdp_add(struct net_device *dev, struct genl_info *info)
        if (pctx == NULL)
                return -ENOMEM;
 
+       sock_hold(sk);
+       pctx->sk = sk;
+       pctx->dev = gtp->dev;
        ipv4_pdp_fill(pctx, info);
        atomic_set(&pctx->tx_seq, 0);
 
@@ -963,31 +975,50 @@ static int ipv4_pdp_add(struct net_device *dev, struct genl_info *info)
        switch (pctx->gtp_version) {
        case GTP_V0:
                netdev_dbg(dev, "GTPv0-U: new PDP ctx id=%llx ssgn=%pI4 ms=%pI4 (pdp=%p)\n",
-                          pctx->u.v0.tid, &pctx->sgsn_addr_ip4,
+                          pctx->u.v0.tid, &pctx->peer_addr_ip4,
                           &pctx->ms_addr_ip4, pctx);
                break;
        case GTP_V1:
                netdev_dbg(dev, "GTPv1-U: new PDP ctx id=%x/%x ssgn=%pI4 ms=%pI4 (pdp=%p)\n",
                           pctx->u.v1.i_tei, pctx->u.v1.o_tei,
-                          &pctx->sgsn_addr_ip4, &pctx->ms_addr_ip4, pctx);
+                          &pctx->peer_addr_ip4, &pctx->ms_addr_ip4, pctx);
                break;
        }
 
        return 0;
 }
 
+static void pdp_context_free(struct rcu_head *head)
+{
+       struct pdp_ctx *pctx = container_of(head, struct pdp_ctx, rcu_head);
+
+       sock_put(pctx->sk);
+       kfree(pctx);
+}
+
+static void pdp_context_delete(struct pdp_ctx *pctx)
+{
+       hlist_del_rcu(&pctx->hlist_tid);
+       hlist_del_rcu(&pctx->hlist_addr);
+       call_rcu(&pctx->rcu_head, pdp_context_free);
+}
+
 static int gtp_genl_new_pdp(struct sk_buff *skb, struct genl_info *info)
 {
-       struct net_device *dev;
-       struct net *net;
+       unsigned int version;
+       struct gtp_dev *gtp;
+       struct sock *sk;
+       int err;
 
        if (!info->attrs[GTPA_VERSION] ||
            !info->attrs[GTPA_LINK] ||
-           !info->attrs[GTPA_SGSN_ADDRESS] ||
+           !info->attrs[GTPA_PEER_ADDRESS] ||
            !info->attrs[GTPA_MS_ADDRESS])
                return -EINVAL;
 
-       switch (nla_get_u32(info->attrs[GTPA_VERSION])) {
+       version = nla_get_u32(info->attrs[GTPA_VERSION]);
+
+       switch (version) {
        case GTP_V0:
                if (!info->attrs[GTPA_TID] ||
                    !info->attrs[GTPA_FLOW])
@@ -1003,77 +1034,101 @@ static int gtp_genl_new_pdp(struct sk_buff *skb, struct genl_info *info)
                return -EINVAL;
        }
 
-       net = gtp_genl_get_net(sock_net(skb->sk), info->attrs);
-       if (IS_ERR(net))
-               return PTR_ERR(net);
+       rcu_read_lock();
 
-       /* Check if there's an existing gtpX device to configure */
-       dev = gtp_find_dev(net, nla_get_u32(info->attrs[GTPA_LINK]));
-       if (dev == NULL) {
-               put_net(net);
-               return -ENODEV;
+       gtp = gtp_find_dev(sock_net(skb->sk), info->attrs);
+       if (!gtp) {
+               err = -ENODEV;
+               goto out_unlock;
        }
-       put_net(net);
 
-       return ipv4_pdp_add(dev, info);
+       if (version == GTP_V0)
+               sk = gtp->sk0;
+       else if (version == GTP_V1)
+               sk = gtp->sk1u;
+       else
+               sk = NULL;
+
+       if (!sk) {
+               err = -ENODEV;
+               goto out_unlock;
+       }
+
+       err = ipv4_pdp_add(gtp, sk, info);
+
+out_unlock:
+       rcu_read_unlock();
+       return err;
 }
 
-static int gtp_genl_del_pdp(struct sk_buff *skb, struct genl_info *info)
+static struct pdp_ctx *gtp_find_pdp_by_link(struct net *net,
+                                           struct nlattr *nla[])
 {
-       struct net_device *dev;
-       struct pdp_ctx *pctx;
        struct gtp_dev *gtp;
-       struct net *net;
 
-       if (!info->attrs[GTPA_VERSION] ||
-           !info->attrs[GTPA_LINK])
-               return -EINVAL;
+       gtp = gtp_find_dev(net, nla);
+       if (!gtp)
+               return ERR_PTR(-ENODEV);
 
-       net = gtp_genl_get_net(sock_net(skb->sk), info->attrs);
-       if (IS_ERR(net))
-               return PTR_ERR(net);
+       if (nla[GTPA_MS_ADDRESS]) {
+               __be32 ip = nla_get_be32(nla[GTPA_MS_ADDRESS]);
 
-       /* Check if there's an existing gtpX device to configure */
-       dev = gtp_find_dev(net, nla_get_u32(info->attrs[GTPA_LINK]));
-       if (dev == NULL) {
-               put_net(net);
-               return -ENODEV;
+               return ipv4_pdp_find(gtp, ip);
+       } else if (nla[GTPA_VERSION]) {
+               u32 gtp_version = nla_get_u32(nla[GTPA_VERSION]);
+
+               if (gtp_version == GTP_V0 && nla[GTPA_TID])
+                       return gtp0_pdp_find(gtp, nla_get_u64(nla[GTPA_TID]));
+               else if (gtp_version == GTP_V1 && nla[GTPA_I_TEI])
+                       return gtp1_pdp_find(gtp, nla_get_u32(nla[GTPA_I_TEI]));
        }
-       put_net(net);
 
-       gtp = netdev_priv(dev);
+       return ERR_PTR(-EINVAL);
+}
 
-       switch (nla_get_u32(info->attrs[GTPA_VERSION])) {
-       case GTP_V0:
-               if (!info->attrs[GTPA_TID])
-                       return -EINVAL;
-               pctx = gtp0_pdp_find(gtp, nla_get_u64(info->attrs[GTPA_TID]));
-               break;
-       case GTP_V1:
-               if (!info->attrs[GTPA_I_TEI])
-                       return -EINVAL;
-               pctx = gtp1_pdp_find(gtp, nla_get_u64(info->attrs[GTPA_I_TEI]));
-               break;
+static struct pdp_ctx *gtp_find_pdp(struct net *net, struct nlattr *nla[])
+{
+       struct pdp_ctx *pctx;
 
-       default:
+       if (nla[GTPA_LINK])
+               pctx = gtp_find_pdp_by_link(net, nla);
+       else
+               pctx = ERR_PTR(-EINVAL);
+
+       if (!pctx)
+               pctx = ERR_PTR(-ENOENT);
+
+       return pctx;
+}
+
+static int gtp_genl_del_pdp(struct sk_buff *skb, struct genl_info *info)
+{
+       struct pdp_ctx *pctx;
+       int err = 0;
+
+       if (!info->attrs[GTPA_VERSION])
                return -EINVAL;
-       }
 
-       if (pctx == NULL)
-               return -ENOENT;
+       rcu_read_lock();
+
+       pctx = gtp_find_pdp(sock_net(skb->sk), info->attrs);
+       if (IS_ERR(pctx)) {
+               err = PTR_ERR(pctx);
+               goto out_unlock;
+       }
 
        if (pctx->gtp_version == GTP_V0)
-               netdev_dbg(dev, "GTPv0-U: deleting tunnel id = %llx (pdp %p)\n",
+               netdev_dbg(pctx->dev, "GTPv0-U: deleting tunnel id = %llx (pdp %p)\n",
                           pctx->u.v0.tid, pctx);
        else if (pctx->gtp_version == GTP_V1)
-               netdev_dbg(dev, "GTPv1-U: deleting tunnel id = %x/%x (pdp %p)\n",
+               netdev_dbg(pctx->dev, "GTPv1-U: deleting tunnel id = %x/%x (pdp %p)\n",
                           pctx->u.v1.i_tei, pctx->u.v1.o_tei, pctx);
 
-       hlist_del_rcu(&pctx->hlist_tid);
-       hlist_del_rcu(&pctx->hlist_addr);
-       kfree_rcu(pctx, rcu_head);
+       pdp_context_delete(pctx);
 
-       return 0;
+out_unlock:
+       rcu_read_unlock();
+       return err;
 }
 
 static struct genl_family gtp_genl_family;
@@ -1089,7 +1144,7 @@ static int gtp_genl_fill_info(struct sk_buff *skb, u32 snd_portid, u32 snd_seq,
                goto nlmsg_failure;
 
        if (nla_put_u32(skb, GTPA_VERSION, pctx->gtp_version) ||
-           nla_put_be32(skb, GTPA_SGSN_ADDRESS, pctx->sgsn_addr_ip4.s_addr) ||
+           nla_put_be32(skb, GTPA_PEER_ADDRESS, pctx->peer_addr_ip4.s_addr) ||
            nla_put_be32(skb, GTPA_MS_ADDRESS, pctx->ms_addr_ip4.s_addr))
                goto nla_put_failure;
 
@@ -1117,59 +1172,17 @@ nla_put_failure:
 static int gtp_genl_get_pdp(struct sk_buff *skb, struct genl_info *info)
 {
        struct pdp_ctx *pctx = NULL;
-       struct net_device *dev;
        struct sk_buff *skb2;
-       struct gtp_dev *gtp;
-       u32 gtp_version;
-       struct net *net;
        int err;
 
-       if (!info->attrs[GTPA_VERSION] ||
-           !info->attrs[GTPA_LINK])
-               return -EINVAL;
-
-       gtp_version = nla_get_u32(info->attrs[GTPA_VERSION]);
-       switch (gtp_version) {
-       case GTP_V0:
-       case GTP_V1:
-               break;
-       default:
+       if (!info->attrs[GTPA_VERSION])
                return -EINVAL;
-       }
-
-       net = gtp_genl_get_net(sock_net(skb->sk), info->attrs);
-       if (IS_ERR(net))
-               return PTR_ERR(net);
-
-       /* Check if there's an existing gtpX device to configure */
-       dev = gtp_find_dev(net, nla_get_u32(info->attrs[GTPA_LINK]));
-       if (dev == NULL) {
-               put_net(net);
-               return -ENODEV;
-       }
-       put_net(net);
-
-       gtp = netdev_priv(dev);
 
        rcu_read_lock();
-       if (gtp_version == GTP_V0 &&
-           info->attrs[GTPA_TID]) {
-               u64 tid = nla_get_u64(info->attrs[GTPA_TID]);
-
-               pctx = gtp0_pdp_find(gtp, tid);
-       } else if (gtp_version == GTP_V1 &&
-                info->attrs[GTPA_I_TEI]) {
-               u32 tid = nla_get_u32(info->attrs[GTPA_I_TEI]);
-
-               pctx = gtp1_pdp_find(gtp, tid);
-       } else if (info->attrs[GTPA_MS_ADDRESS]) {
-               __be32 ip = nla_get_be32(info->attrs[GTPA_MS_ADDRESS]);
-
-               pctx = ipv4_pdp_find(gtp, ip);
-       }
 
-       if (pctx == NULL) {
-               err = -ENOENT;
+       pctx = gtp_find_pdp(sock_net(skb->sk), info->attrs);
+       if (IS_ERR(pctx)) {
+               err = PTR_ERR(pctx);
                goto err_unlock;
        }
 
@@ -1242,7 +1255,7 @@ static struct nla_policy gtp_genl_policy[GTPA_MAX + 1] = {
        [GTPA_LINK]             = { .type = NLA_U32, },
        [GTPA_VERSION]          = { .type = NLA_U32, },
        [GTPA_TID]              = { .type = NLA_U64, },
-       [GTPA_SGSN_ADDRESS]     = { .type = NLA_U32, },
+       [GTPA_PEER_ADDRESS]     = { .type = NLA_U32, },
        [GTPA_MS_ADDRESS]       = { .type = NLA_U32, },
        [GTPA_FLOW]             = { .type = NLA_U16, },
        [GTPA_NET_NS_FD]        = { .type = NLA_U32, },
index f9f3dba7a58800d9288199b50bec0b85d18cb249..4747ad48b3cc5278b510306449a2fea7e52115c5 100644 (file)
@@ -196,6 +196,7 @@ int netvsc_recv_callback(struct net_device *net,
                         const struct ndis_tcp_ip_checksum_info *csum_info,
                         const struct ndis_pkt_8021q_info *vlan);
 void netvsc_channel_cb(void *context);
+int netvsc_poll(struct napi_struct *napi, int budget);
 int rndis_filter_open(struct netvsc_device *nvdev);
 int rndis_filter_close(struct netvsc_device *nvdev);
 int rndis_filter_device_add(struct hv_device *dev,
@@ -685,7 +686,7 @@ struct net_device_context {
        /* point back to our device context */
        struct hv_device *device_ctx;
        /* netvsc_device */
-       struct netvsc_device *nvdev;
+       struct netvsc_device __rcu *nvdev;
        /* reconfigure work */
        struct delayed_work dwork;
        /* last reconfig time */
@@ -707,9 +708,6 @@ struct net_device_context {
        u32 speed;
        struct netvsc_ethtool_stats eth_stats;
 
-       /* the device is going away */
-       bool start_remove;
-
        /* State to manage the associated VF interface. */
        struct net_device __rcu *vf_netdev;
 
@@ -722,6 +720,8 @@ struct net_device_context {
 /* Per channel data */
 struct netvsc_channel {
        struct vmbus_channel *channel;
+       const struct vmpacket_descriptor *desc;
+       struct napi_struct napi;
        struct multi_send_data msd;
        struct multi_recv_comp mrc;
        atomic_t queue_sends;
@@ -761,8 +761,8 @@ struct netvsc_device {
 
        u32 max_chn;
        u32 num_chn;
-       spinlock_t sc_lock; /* Protects num_sc_offered variable */
-       u32 num_sc_offered;
+
+       refcount_t sc_offered;
 
        /* Holds rndis device info */
        void *extension;
@@ -777,6 +777,8 @@ struct netvsc_device {
        atomic_t open_cnt;
 
        struct netvsc_channel chan_table[VRSS_CHANNEL_MAX];
+
+       struct rcu_head rcu;
 };
 
 static inline struct netvsc_device *
@@ -1425,9 +1427,6 @@ struct rndis_message {
        ((void *) rndis_msg)
 
 
-#define __struct_bcount(x)
-
-
 
 #define RNDIS_HEADER_SIZE      (sizeof(struct rndis_message) - \
                                 sizeof(union rndis_message_container))
index 4c1d8cca247b921e263268bf8344898c31bb488a..e998e2f7a619785215309d5620e4512c287738e1 100644 (file)
@@ -80,8 +80,10 @@ static struct netvsc_device *alloc_net_device(void)
        return net_device;
 }
 
-static void free_netvsc_device(struct netvsc_device *nvdev)
+static void free_netvsc_device(struct rcu_head *head)
 {
+       struct netvsc_device *nvdev
+               = container_of(head, struct netvsc_device, rcu);
        int i;
 
        for (i = 0; i < VRSS_CHANNEL_MAX; i++)
@@ -90,14 +92,9 @@ static void free_netvsc_device(struct netvsc_device *nvdev)
        kfree(nvdev);
 }
 
-
-static inline bool netvsc_channel_idle(const struct netvsc_device *net_device,
-                                      u16 q_idx)
+static void free_netvsc_device_rcu(struct netvsc_device *nvdev)
 {
-       const struct netvsc_channel *nvchan = &net_device->chan_table[q_idx];
-
-       return atomic_read(&net_device->num_outstanding_recvs) == 0 &&
-               atomic_read(&nvchan->queue_sends) == 0;
+       call_rcu(&nvdev->rcu, free_netvsc_device);
 }
 
 static struct netvsc_device *get_outbound_net_device(struct hv_device *device)
@@ -556,10 +553,11 @@ void netvsc_device_remove(struct hv_device *device)
        struct net_device *ndev = hv_get_drvdata(device);
        struct net_device_context *net_device_ctx = netdev_priv(ndev);
        struct netvsc_device *net_device = net_device_ctx->nvdev;
+       int i;
 
        netvsc_disconnect_vsp(device);
 
-       net_device_ctx->nvdev = NULL;
+       RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
 
        /*
         * At this point, no one should be accessing net_device
@@ -570,8 +568,11 @@ void netvsc_device_remove(struct hv_device *device)
        /* Now, we can close the channel safely */
        vmbus_close(device->channel);
 
+       for (i = 0; i < net_device->num_chn; i++)
+               napi_disable(&net_device->chan_table[i].napi);
+
        /* Release all resources */
-       free_netvsc_device(net_device);
+       free_netvsc_device_rcu(net_device);
 }
 
 #define RING_AVAIL_PERCENT_HIWATER 20
@@ -600,11 +601,10 @@ static inline void netvsc_free_send_slot(struct netvsc_device *net_device,
 static void netvsc_send_tx_complete(struct netvsc_device *net_device,
                                    struct vmbus_channel *incoming_channel,
                                    struct hv_device *device,
-                                   struct vmpacket_descriptor *packet)
+                                   const struct vmpacket_descriptor *desc)
 {
-       struct sk_buff *skb = (struct sk_buff *)(unsigned long)packet->trans_id;
+       struct sk_buff *skb = (struct sk_buff *)(unsigned long)desc->trans_id;
        struct net_device *ndev = hv_get_drvdata(device);
-       struct net_device_context *net_device_ctx = netdev_priv(ndev);
        struct vmbus_channel *channel = device->channel;
        u16 q_idx = 0;
        int queue_sends;
@@ -638,7 +638,6 @@ static void netvsc_send_tx_complete(struct netvsc_device *net_device,
                wake_up(&net_device->wait_drain);
 
        if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) &&
-           !net_device_ctx->start_remove &&
            (hv_ringbuf_avail_percent(&channel->outbound) > RING_AVAIL_PERCENT_HIWATER ||
             queue_sends < 1))
                netif_tx_wake_queue(netdev_get_tx_queue(ndev, q_idx));
@@ -647,14 +646,11 @@ static void netvsc_send_tx_complete(struct netvsc_device *net_device,
 static void netvsc_send_completion(struct netvsc_device *net_device,
                                   struct vmbus_channel *incoming_channel,
                                   struct hv_device *device,
-                                  struct vmpacket_descriptor *packet)
+                                  const struct vmpacket_descriptor *desc)
 {
-       struct nvsp_message *nvsp_packet;
+       struct nvsp_message *nvsp_packet = hv_pkt_data(desc);
        struct net_device *ndev = hv_get_drvdata(device);
 
-       nvsp_packet = (struct nvsp_message *)((unsigned long)packet +
-                                             (packet->offset8 << 3));
-
        switch (nvsp_packet->hdr.msg_type) {
        case NVSP_MSG_TYPE_INIT_COMPLETE:
        case NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE:
@@ -668,7 +664,7 @@ static void netvsc_send_completion(struct netvsc_device *net_device,
 
        case NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE:
                netvsc_send_tx_complete(net_device, incoming_channel,
-                                       device, packet);
+                                       device, desc);
                break;
 
        default:
@@ -710,8 +706,7 @@ static u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device,
                packet->page_buf_cnt;
 
        /* Add padding */
-       if (skb && skb->xmit_more && remain &&
-           !packet->cp_partial) {
+       if (skb->xmit_more && remain && !packet->cp_partial) {
                padding = net_device->pkt_align - remain;
                rndis_msg->msg_len += padding;
                packet->total_data_buflen += padding;
@@ -869,9 +864,7 @@ int netvsc_send(struct hv_device *device,
        if (msdp->pkt)
                msd_len = msdp->pkt->total_data_buflen;
 
-       try_batch = (skb != NULL) && msd_len > 0 && msdp->count <
-                   net_device->max_pkt;
-
+       try_batch =  msd_len > 0 && msdp->count < net_device->max_pkt;
        if (try_batch && msd_len + pktlen + net_device->pkt_align <
            net_device->send_section_size) {
                section_index = msdp->pkt->send_buf_index;
@@ -881,7 +874,7 @@ int netvsc_send(struct hv_device *device,
                section_index = msdp->pkt->send_buf_index;
                packet->cp_partial = true;
 
-       } else if ((skb != NULL) && pktlen + net_device->pkt_align <
+       } else if (pktlen + net_device->pkt_align <
                   net_device->send_section_size) {
                section_index = netvsc_get_next_send_section(net_device);
                if (section_index != NETVSC_INVALID_INDEX) {
@@ -1066,28 +1059,29 @@ static inline struct recv_comp_data *get_recv_comp_slot(
        return rcd;
 }
 
-static void netvsc_receive(struct net_device *ndev,
+static int netvsc_receive(struct net_device *ndev,
                   struct netvsc_device *net_device,
                   struct net_device_context *net_device_ctx,
                   struct hv_device *device,
                   struct vmbus_channel *channel,
-                  struct vmtransfer_page_packet_header *vmxferpage_packet,
+                  const struct vmpacket_descriptor *desc,
                   struct nvsp_message *nvsp)
 {
+       const struct vmtransfer_page_packet_header *vmxferpage_packet
+               = container_of(desc, const struct vmtransfer_page_packet_header, d);
+       u16 q_idx = channel->offermsg.offer.sub_channel_index;
        char *recv_buf = net_device->recv_buf;
        u32 status = NVSP_STAT_SUCCESS;
        int i;
        int count = 0;
        int ret;
-       struct recv_comp_data *rcd;
-       u16 q_idx = channel->offermsg.offer.sub_channel_index;
 
        /* Make sure this is a valid nvsp packet */
        if (unlikely(nvsp->hdr.msg_type != NVSP_MSG1_TYPE_SEND_RNDIS_PKT)) {
                netif_err(net_device_ctx, rx_err, ndev,
                          "Unknown nvsp packet type received %u\n",
                          nvsp->hdr.msg_type);
-               return;
+               return 0;
        }
 
        if (unlikely(vmxferpage_packet->xfer_pageset_id != NETVSC_RECEIVE_BUFFER_ID)) {
@@ -1095,7 +1089,7 @@ static void netvsc_receive(struct net_device *ndev,
                          "Invalid xfer page set id - expecting %x got %x\n",
                          NETVSC_RECEIVE_BUFFER_ID,
                          vmxferpage_packet->xfer_pageset_id);
-               return;
+               return 0;
        }
 
        count = vmxferpage_packet->range_cnt;
@@ -1111,26 +1105,26 @@ static void netvsc_receive(struct net_device *ndev,
                                              channel, data, buflen);
        }
 
-       if (!net_device->chan_table[q_idx].mrc.buf) {
+       if (net_device->chan_table[q_idx].mrc.buf) {
+               struct recv_comp_data *rcd;
+
+               rcd = get_recv_comp_slot(net_device, channel, q_idx);
+               if (rcd) {
+                       rcd->tid = vmxferpage_packet->d.trans_id;
+                       rcd->status = status;
+               } else {
+                       netdev_err(ndev, "Recv_comp full buf q:%hd, tid:%llx\n",
+                                  q_idx, vmxferpage_packet->d.trans_id);
+               }
+       } else {
                ret = netvsc_send_recv_completion(channel,
                                                  vmxferpage_packet->d.trans_id,
                                                  status);
                if (ret)
                        netdev_err(ndev, "Recv_comp q:%hd, tid:%llx, err:%d\n",
                                   q_idx, vmxferpage_packet->d.trans_id, ret);
-               return;
-       }
-
-       rcd = get_recv_comp_slot(net_device, channel, q_idx);
-
-       if (!rcd) {
-               netdev_err(ndev, "Recv_comp full buf q:%hd, tid:%llx\n",
-                          q_idx, vmxferpage_packet->d.trans_id);
-               return;
        }
-
-       rcd->tid = vmxferpage_packet->d.trans_id;
-       rcd->status = status;
+       return count;
 }
 
 static void netvsc_send_table(struct hv_device *hdev,
@@ -1176,17 +1170,14 @@ static inline void netvsc_receive_inband(struct hv_device *hdev,
        }
 }
 
-static void netvsc_process_raw_pkt(struct hv_device *device,
-                                  struct vmbus_channel *channel,
-                                  struct netvsc_device *net_device,
-                                  struct net_device *ndev,
-                                  u64 request_id,
-                                  struct vmpacket_descriptor *desc)
+static int netvsc_process_raw_pkt(struct hv_device *device,
+                                 struct vmbus_channel *channel,
+                                 struct netvsc_device *net_device,
+                                 struct net_device *ndev,
+                                 const struct vmpacket_descriptor *desc)
 {
        struct net_device_context *net_device_ctx = netdev_priv(ndev);
-       struct nvsp_message *nvmsg
-               = (struct nvsp_message *)((unsigned long)desc
-                                         + (desc->offset8 << 3));
+       struct nvsp_message *nvmsg = hv_pkt_data(desc);
 
        switch (desc->type) {
        case VM_PKT_COMP:
@@ -1194,10 +1185,8 @@ static void netvsc_process_raw_pkt(struct hv_device *device,
                break;
 
        case VM_PKT_DATA_USING_XFER_PAGES:
-               netvsc_receive(ndev, net_device, net_device_ctx,
-                              device, channel,
-                              (struct vmtransfer_page_packet_header *)desc,
-                              nvmsg);
+               return netvsc_receive(ndev, net_device, net_device_ctx,
+                                     device, channel, desc, nvmsg);
                break;
 
        case VM_PKT_DATA_INBAND:
@@ -1206,50 +1195,74 @@ static void netvsc_process_raw_pkt(struct hv_device *device,
 
        default:
                netdev_err(ndev, "unhandled packet type %d, tid %llx\n",
-                          desc->type, request_id);
+                          desc->type, desc->trans_id);
                break;
        }
+
+       return 0;
 }
 
-void netvsc_channel_cb(void *context)
+static struct hv_device *netvsc_channel_to_device(struct vmbus_channel *channel)
+{
+       struct vmbus_channel *primary = channel->primary_channel;
+
+       return primary ? primary->device_obj : channel->device_obj;
+}
+
+/* Network processing softirq
+ * Process data in incoming ring buffer from host
+ * Stops when ring is empty or budget is met or exceeded.
+ */
+int netvsc_poll(struct napi_struct *napi, int budget)
 {
-       struct vmbus_channel *channel = context;
+       struct netvsc_channel *nvchan
+               = container_of(napi, struct netvsc_channel, napi);
+       struct vmbus_channel *channel = nvchan->channel;
+       struct hv_device *device = netvsc_channel_to_device(channel);
        u16 q_idx = channel->offermsg.offer.sub_channel_index;
-       struct hv_device *device;
-       struct netvsc_device *net_device;
-       struct vmpacket_descriptor *desc;
-       struct net_device *ndev;
-       bool need_to_commit = false;
+       struct net_device *ndev = hv_get_drvdata(device);
+       struct netvsc_device *net_device = net_device_to_netvsc_device(ndev);
+       int work_done = 0;
 
-       if (channel->primary_channel != NULL)
-               device = channel->primary_channel->device_obj;
-       else
-               device = channel->device_obj;
+       /* If starting a new interval */
+       if (!nvchan->desc)
+               nvchan->desc = hv_pkt_iter_first(channel);
 
-       ndev = hv_get_drvdata(device);
-       if (unlikely(!ndev))
-               return;
+       while (nvchan->desc && work_done < budget) {
+               work_done += netvsc_process_raw_pkt(device, channel, net_device,
+                                                   ndev, nvchan->desc);
+               nvchan->desc = hv_pkt_iter_next(channel, nvchan->desc);
+       }
 
-       net_device = net_device_to_netvsc_device(ndev);
-       if (unlikely(net_device->destroy) &&
-           netvsc_channel_idle(net_device, q_idx))
-               return;
+       /* If receive ring was exhausted
+        * and not doing busy poll
+        * then re-enable host interrupts
+        *  and reschedule if ring is not empty.
+        */
+       if (work_done < budget &&
+           napi_complete_done(napi, work_done) &&
+           hv_end_read(&channel->inbound) != 0)
+               napi_reschedule(napi);
 
-       /* commit_rd_index() -> hv_signal_on_read() needs this. */
-       init_cached_read_index(channel);
+       netvsc_chk_recv_comp(net_device, channel, q_idx);
 
-       while ((desc = get_next_pkt_raw(channel)) != NULL) {
-               netvsc_process_raw_pkt(device, channel, net_device,
-                                      ndev, desc->trans_id, desc);
+       /* Driver may overshoot since multiple packets per descriptor */
+       return min(work_done, budget);
+}
 
-               put_pkt_raw(channel, desc);
-               need_to_commit = true;
-       }
+/* Call back when data is available in host ring buffer.
+ * Processing is deferred until network softirq (NAPI)
+ */
+void netvsc_channel_cb(void *context)
+{
+       struct netvsc_channel *nvchan = context;
 
-       if (need_to_commit)
-               commit_rd_index(channel);
+       if (napi_schedule_prep(&nvchan->napi)) {
+               /* disable interupts from host */
+               hv_begin_read(&nvchan->channel->inbound);
 
-       netvsc_chk_recv_comp(net_device, channel, q_idx);
+               __napi_schedule(&nvchan->napi);
+       }
 }
 
 /*
@@ -1271,10 +1284,16 @@ int netvsc_device_add(struct hv_device *device,
 
        net_device->ring_size = ring_size;
 
+       /* Because the device uses NAPI, all the interrupt batching and
+        * control is done via Net softirq, not the channel handling
+        */
+       set_channel_read_mode(device->channel, HV_CALL_ISR);
+
        /* Open the channel */
        ret = vmbus_open(device->channel, ring_size * PAGE_SIZE,
                         ring_size * PAGE_SIZE, NULL, 0,
-                        netvsc_channel_cb, device->channel);
+                        netvsc_channel_cb,
+                        net_device->chan_table);
 
        if (ret != 0) {
                netdev_err(ndev, "unable to open channel: %d\n", ret);
@@ -1288,15 +1307,21 @@ int netvsc_device_add(struct hv_device *device,
         * chn_table with the default channel to use it before subchannels are
         * opened.
         */
-       for (i = 0; i < VRSS_CHANNEL_MAX; i++)
-               net_device->chan_table[i].channel = device->channel;
+       for (i = 0; i < VRSS_CHANNEL_MAX; i++) {
+               struct netvsc_channel *nvchan = &net_device->chan_table[i];
+
+               nvchan->channel = device->channel;
+               netif_napi_add(ndev, &nvchan->napi,
+                              netvsc_poll, NAPI_POLL_WEIGHT);
+       }
+
+       /* Enable NAPI handler for init callbacks */
+       napi_enable(&net_device->chan_table[0].napi);
 
        /* Writing nvdev pointer unlocks netvsc_send(), make sure chn_table is
         * populated.
         */
-       wmb();
-
-       net_device_ctx->nvdev = net_device;
+       rcu_assign_pointer(net_device_ctx->nvdev, net_device);
 
        /* Connect with the NetVsp */
        ret = netvsc_connect_vsp(device);
@@ -1309,11 +1334,13 @@ int netvsc_device_add(struct hv_device *device,
        return ret;
 
 close:
+       napi_disable(&net_device->chan_table[0].napi);
+
        /* Now, we can close the channel safely */
        vmbus_close(device->channel);
 
 cleanup:
-       free_netvsc_device(net_device);
+       free_netvsc_device(&net_device->rcu);
 
        return ret;
 }
index 5ede87f30463e8211ef2828a8f74d4951c4166a6..f24c2891dd0cf3e9f65af49c83fd723ccbcbc5ee 100644 (file)
@@ -62,7 +62,7 @@ static void do_set_multicast(struct work_struct *w)
                container_of(w, struct net_device_context, work);
        struct hv_device *device_obj = ndevctx->device_ctx;
        struct net_device *ndev = hv_get_drvdata(device_obj);
-       struct netvsc_device *nvdev = ndevctx->nvdev;
+       struct netvsc_device *nvdev = rcu_dereference(ndevctx->nvdev);
        struct rndis_device *rdev;
 
        if (!nvdev)
@@ -116,7 +116,7 @@ static int netvsc_open(struct net_device *net)
 static int netvsc_close(struct net_device *net)
 {
        struct net_device_context *net_device_ctx = netdev_priv(net);
-       struct netvsc_device *nvdev = net_device_ctx->nvdev;
+       struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
        int ret;
        u32 aread, awrite, i, msec = 10, retry = 0, retry_max = 20;
        struct vmbus_channel *chn;
@@ -584,13 +584,14 @@ void netvsc_linkstatus_callback(struct hv_device *device_obj,
 }
 
 static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net,
+                                            struct napi_struct *napi,
                                             const struct ndis_tcp_ip_checksum_info *csum_info,
                                             const struct ndis_pkt_8021q_info *vlan,
                                             void *data, u32 buflen)
 {
        struct sk_buff *skb;
 
-       skb = netdev_alloc_skb_ip_align(net, buflen);
+       skb = napi_alloc_skb(napi, buflen);
        if (!skb)
                return skb;
 
@@ -636,12 +637,12 @@ int netvsc_recv_callback(struct net_device *net,
                         const struct ndis_pkt_8021q_info *vlan)
 {
        struct net_device_context *net_device_ctx = netdev_priv(net);
-       struct netvsc_device *net_device = net_device_ctx->nvdev;
+       struct netvsc_device *net_device;
+       u16 q_idx = channel->offermsg.offer.sub_channel_index;
+       struct netvsc_channel *nvchan;
        struct net_device *vf_netdev;
        struct sk_buff *skb;
        struct netvsc_stats *rx_stats;
-       u16 q_idx = channel->offermsg.offer.sub_channel_index;
-
 
        if (net->reg_state != NETREG_REGISTERED)
                return NVSP_STAT_FAIL;
@@ -654,13 +655,20 @@ int netvsc_recv_callback(struct net_device *net,
         * interface in the guest.
         */
        rcu_read_lock();
+       net_device = rcu_dereference(net_device_ctx->nvdev);
+       if (unlikely(!net_device))
+               goto drop;
+
+       nvchan = &net_device->chan_table[q_idx];
        vf_netdev = rcu_dereference(net_device_ctx->vf_netdev);
        if (vf_netdev && (vf_netdev->flags & IFF_UP))
                net = vf_netdev;
 
        /* Allocate a skb - TODO direct I/O to pages? */
-       skb = netvsc_alloc_recv_skb(net, csum_info, vlan, data, len);
+       skb = netvsc_alloc_recv_skb(net, &nvchan->napi,
+                                   csum_info, vlan, data, len);
        if (unlikely(!skb)) {
+drop:
                ++net->stats.rx_dropped;
                rcu_read_unlock();
                return NVSP_STAT_FAIL;
@@ -674,7 +682,7 @@ int netvsc_recv_callback(struct net_device *net,
         * on the synthetic device because modifying the VF device
         * statistics will not work correctly.
         */
-       rx_stats = &net_device->chan_table[q_idx].rx_stats;
+       rx_stats = &nvchan->rx_stats;
        u64_stats_update_begin(&rx_stats->syncp);
        rx_stats->packets++;
        rx_stats->bytes += len;
@@ -685,12 +693,7 @@ int netvsc_recv_callback(struct net_device *net,
                ++rx_stats->multicast;
        u64_stats_update_end(&rx_stats->syncp);
 
-       /*
-        * Pass the skb back up. Network stack will deallocate the skb when it
-        * is done.
-        * TODO - use NAPI?
-        */
-       netif_receive_skb(skb);
+       napi_gro_receive(&nvchan->napi, skb);
        rcu_read_unlock();
 
        return 0;
@@ -707,7 +710,7 @@ static void netvsc_get_channels(struct net_device *net,
                                struct ethtool_channels *channel)
 {
        struct net_device_context *net_device_ctx = netdev_priv(net);
-       struct netvsc_device *nvdev = net_device_ctx->nvdev;
+       struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
 
        if (nvdev) {
                channel->max_combined   = nvdev->max_chn;
@@ -744,8 +747,9 @@ static int netvsc_set_channels(struct net_device *net,
 {
        struct net_device_context *net_device_ctx = netdev_priv(net);
        struct hv_device *dev = net_device_ctx->device_ctx;
-       struct netvsc_device *nvdev = net_device_ctx->nvdev;
+       struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
        unsigned int count = channels->combined_count;
+       bool was_running;
        int ret;
 
        /* We do not support separate count for rx, tx, or other */
@@ -756,7 +760,7 @@ static int netvsc_set_channels(struct net_device *net,
        if (count > net->num_tx_queues || count > net->num_rx_queues)
                return -EINVAL;
 
-       if (net_device_ctx->start_remove || !nvdev || nvdev->destroy)
+       if (!nvdev || nvdev->destroy)
                return -ENODEV;
 
        if (nvdev->nvsp_version < NVSP_PROTOCOL_VERSION_5)
@@ -765,11 +769,13 @@ static int netvsc_set_channels(struct net_device *net,
        if (count > nvdev->max_chn)
                return -EINVAL;
 
-       ret = netvsc_close(net);
-       if (ret)
-               return ret;
+       was_running = netif_running(net);
+       if (was_running) {
+               ret = netvsc_close(net);
+               if (ret)
+                       return ret;
+       }
 
-       net_device_ctx->start_remove = true;
        rndis_filter_device_remove(dev, nvdev);
 
        ret = netvsc_set_queues(net, dev, count);
@@ -778,8 +784,8 @@ static int netvsc_set_channels(struct net_device *net,
        else
                netvsc_set_queues(net, dev, nvdev->num_chn);
 
-       netvsc_open(net);
-       net_device_ctx->start_remove = false;
+       if (was_running)
+               ret = netvsc_open(net);
 
        /* We may have missed link change notifications */
        schedule_delayed_work(&net_device_ctx->dwork, 0);
@@ -787,18 +793,19 @@ static int netvsc_set_channels(struct net_device *net,
        return ret;
 }
 
-static bool netvsc_validate_ethtool_ss_cmd(const struct ethtool_cmd *cmd)
+static bool
+netvsc_validate_ethtool_ss_cmd(const struct ethtool_link_ksettings *cmd)
 {
-       struct ethtool_cmd diff1 = *cmd;
-       struct ethtool_cmd diff2 = {};
+       struct ethtool_link_ksettings diff1 = *cmd;
+       struct ethtool_link_ksettings diff2 = {};
 
-       ethtool_cmd_speed_set(&diff1, 0);
-       diff1.duplex = 0;
+       diff1.base.speed = 0;
+       diff1.base.duplex = 0;
        /* advertising and cmd are usually set */
-       diff1.advertising = 0;
-       diff1.cmd = 0;
+       ethtool_link_ksettings_zero_link_mode(&diff1, advertising);
+       diff1.base.cmd = 0;
        /* We set port to PORT_OTHER */
-       diff2.port = PORT_OTHER;
+       diff2.base.port = PORT_OTHER;
 
        return !memcmp(&diff1, &diff2, sizeof(diff1));
 }
@@ -811,30 +818,32 @@ static void netvsc_init_settings(struct net_device *dev)
        ndc->duplex = DUPLEX_UNKNOWN;
 }
 
-static int netvsc_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int netvsc_get_link_ksettings(struct net_device *dev,
+                                    struct ethtool_link_ksettings *cmd)
 {
        struct net_device_context *ndc = netdev_priv(dev);
 
-       ethtool_cmd_speed_set(cmd, ndc->speed);
-       cmd->duplex = ndc->duplex;
-       cmd->port = PORT_OTHER;
+       cmd->base.speed = ndc->speed;
+       cmd->base.duplex = ndc->duplex;
+       cmd->base.port = PORT_OTHER;
 
        return 0;
 }
 
-static int netvsc_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int netvsc_set_link_ksettings(struct net_device *dev,
+                                    const struct ethtool_link_ksettings *cmd)
 {
        struct net_device_context *ndc = netdev_priv(dev);
        u32 speed;
 
-       speed = ethtool_cmd_speed(cmd);
+       speed = cmd->base.speed;
        if (!ethtool_validate_speed(speed) ||
-           !ethtool_validate_duplex(cmd->duplex) ||
+           !ethtool_validate_duplex(cmd->base.duplex) ||
            !netvsc_validate_ethtool_ss_cmd(cmd))
                return -EINVAL;
 
        ndc->speed = speed;
-       ndc->duplex = cmd->duplex;
+       ndc->duplex = cmd->base.duplex;
 
        return 0;
 }
@@ -842,24 +851,27 @@ static int netvsc_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 static int netvsc_change_mtu(struct net_device *ndev, int mtu)
 {
        struct net_device_context *ndevctx = netdev_priv(ndev);
-       struct netvsc_device *nvdev = ndevctx->nvdev;
+       struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
        struct hv_device *hdev = ndevctx->device_ctx;
        struct netvsc_device_info device_info;
-       int ret;
+       bool was_running;
+       int ret = 0;
 
-       if (ndevctx->start_remove || !nvdev || nvdev->destroy)
+       if (!nvdev || nvdev->destroy)
                return -ENODEV;
 
-       ret = netvsc_close(ndev);
-       if (ret)
-               goto out;
+       was_running = netif_running(ndev);
+       if (was_running) {
+               ret = netvsc_close(ndev);
+               if (ret)
+                       return ret;
+       }
 
        memset(&device_info, 0, sizeof(device_info));
        device_info.ring_size = ring_size;
        device_info.num_chn = nvdev->num_chn;
        device_info.max_num_vrss_chns = nvdev->num_chn;
 
-       ndevctx->start_remove = true;
        rndis_filter_device_remove(hdev, nvdev);
 
        /* 'nvdev' has been freed in rndis_filter_device_remove() ->
@@ -872,9 +884,8 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
 
        rndis_filter_device_add(hdev, &device_info);
 
-out:
-       netvsc_open(ndev);
-       ndevctx->start_remove = false;
+       if (was_running)
+               ret = netvsc_open(ndev);
 
        /* We may have missed link change notifications */
        schedule_delayed_work(&ndevctx->dwork, 0);
@@ -886,7 +897,7 @@ static void netvsc_get_stats64(struct net_device *net,
                               struct rtnl_link_stats64 *t)
 {
        struct net_device_context *ndev_ctx = netdev_priv(net);
-       struct netvsc_device *nvdev = ndev_ctx->nvdev;
+       struct netvsc_device *nvdev = rcu_dereference(ndev_ctx->nvdev);
        int i;
 
        if (!nvdev)
@@ -971,7 +982,10 @@ static const struct {
 static int netvsc_get_sset_count(struct net_device *dev, int string_set)
 {
        struct net_device_context *ndc = netdev_priv(dev);
-       struct netvsc_device *nvdev = ndc->nvdev;
+       struct netvsc_device *nvdev = rcu_dereference(ndc->nvdev);
+
+       if (!nvdev)
+               return -ENODEV;
 
        switch (string_set) {
        case ETH_SS_STATS:
@@ -985,13 +999,16 @@ static void netvsc_get_ethtool_stats(struct net_device *dev,
                                     struct ethtool_stats *stats, u64 *data)
 {
        struct net_device_context *ndc = netdev_priv(dev);
-       struct netvsc_device *nvdev = ndc->nvdev;
+       struct netvsc_device *nvdev = rcu_dereference(ndc->nvdev);
        const void *nds = &ndc->eth_stats;
        const struct netvsc_stats *qstats;
        unsigned int start;
        u64 packets, bytes;
        int i, j;
 
+       if (!nvdev)
+               return;
+
        for (i = 0; i < NETVSC_GLOBAL_STATS_LEN; i++)
                data[i] = *(unsigned long *)(nds + netvsc_stats[i].offset);
 
@@ -1020,10 +1037,13 @@ static void netvsc_get_ethtool_stats(struct net_device *dev,
 static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 {
        struct net_device_context *ndc = netdev_priv(dev);
-       struct netvsc_device *nvdev = ndc->nvdev;
+       struct netvsc_device *nvdev = rcu_dereference(ndc->nvdev);
        u8 *p = data;
        int i;
 
+       if (!nvdev)
+               return;
+
        switch (stringset) {
        case ETH_SS_STATS:
                for (i = 0; i < ARRAY_SIZE(netvsc_stats); i++)
@@ -1075,7 +1095,10 @@ netvsc_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
                 u32 *rules)
 {
        struct net_device_context *ndc = netdev_priv(dev);
-       struct netvsc_device *nvdev = ndc->nvdev;
+       struct netvsc_device *nvdev = rcu_dereference(ndc->nvdev);
+
+       if (!nvdev)
+               return -ENODEV;
 
        switch (info->cmd) {
        case ETHTOOL_GRXRINGS:
@@ -1111,13 +1134,17 @@ static int netvsc_get_rxfh(struct net_device *dev, u32 *indir, u8 *key,
                           u8 *hfunc)
 {
        struct net_device_context *ndc = netdev_priv(dev);
-       struct netvsc_device *ndev = ndc->nvdev;
-       struct rndis_device *rndis_dev = ndev->extension;
+       struct netvsc_device *ndev = rcu_dereference(ndc->nvdev);
+       struct rndis_device *rndis_dev;
        int i;
 
+       if (!ndev)
+               return -ENODEV;
+
        if (hfunc)
                *hfunc = ETH_RSS_HASH_TOP;      /* Toeplitz */
 
+       rndis_dev = ndev->extension;
        if (indir) {
                for (i = 0; i < ITAB_NUM; i++)
                        indir[i] = rndis_dev->ind_table[i];
@@ -1133,13 +1160,17 @@ static int netvsc_set_rxfh(struct net_device *dev, const u32 *indir,
                           const u8 *key, const u8 hfunc)
 {
        struct net_device_context *ndc = netdev_priv(dev);
-       struct netvsc_device *ndev = ndc->nvdev;
-       struct rndis_device *rndis_dev = ndev->extension;
+       struct netvsc_device *ndev = rtnl_dereference(ndc->nvdev);
+       struct rndis_device *rndis_dev;
        int i;
 
+       if (!ndev)
+               return -ENODEV;
+
        if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)
                return -EOPNOTSUPP;
 
+       rndis_dev = ndev->extension;
        if (indir) {
                for (i = 0; i < ITAB_NUM; i++)
                        if (indir[i] >= dev->num_rx_queues)
@@ -1168,13 +1199,13 @@ static const struct ethtool_ops ethtool_ops = {
        .get_channels   = netvsc_get_channels,
        .set_channels   = netvsc_set_channels,
        .get_ts_info    = ethtool_op_get_ts_info,
-       .get_settings   = netvsc_get_settings,
-       .set_settings   = netvsc_set_settings,
        .get_rxnfc      = netvsc_get_rxnfc,
        .get_rxfh_key_size = netvsc_get_rxfh_key_size,
        .get_rxfh_indir_size = netvsc_rss_indir_size,
        .get_rxfh       = netvsc_get_rxfh,
        .set_rxfh       = netvsc_set_rxfh,
+       .get_link_ksettings = netvsc_get_link_ksettings,
+       .set_link_ksettings = netvsc_set_link_ksettings,
 };
 
 static const struct net_device_ops device_ops = {
@@ -1210,10 +1241,10 @@ static void netvsc_link_change(struct work_struct *w)
        unsigned long flags, next_reconfig, delay;
 
        rtnl_lock();
-       if (ndev_ctx->start_remove)
+       net_device = rtnl_dereference(ndev_ctx->nvdev);
+       if (!net_device)
                goto out_unlock;
 
-       net_device = ndev_ctx->nvdev;
        rdev = net_device->extension;
 
        next_reconfig = ndev_ctx->last_reconfig + LINKCHANGE_INT;
@@ -1354,7 +1385,7 @@ static int netvsc_register_vf(struct net_device *vf_netdev)
                return NOTIFY_DONE;
 
        net_device_ctx = netdev_priv(ndev);
-       netvsc_dev = net_device_ctx->nvdev;
+       netvsc_dev = rtnl_dereference(net_device_ctx->nvdev);
        if (!netvsc_dev || rtnl_dereference(net_device_ctx->vf_netdev))
                return NOTIFY_DONE;
 
@@ -1380,7 +1411,7 @@ static int netvsc_vf_up(struct net_device *vf_netdev)
                return NOTIFY_DONE;
 
        net_device_ctx = netdev_priv(ndev);
-       netvsc_dev = net_device_ctx->nvdev;
+       netvsc_dev = rtnl_dereference(net_device_ctx->nvdev);
 
        netdev_info(ndev, "VF up: %s\n", vf_netdev->name);
 
@@ -1414,7 +1445,7 @@ static int netvsc_vf_down(struct net_device *vf_netdev)
                return NOTIFY_DONE;
 
        net_device_ctx = netdev_priv(ndev);
-       netvsc_dev = net_device_ctx->nvdev;
+       netvsc_dev = rtnl_dereference(net_device_ctx->nvdev);
 
        netdev_info(ndev, "VF down: %s\n", vf_netdev->name);
        netvsc_switch_datapath(ndev, false);
@@ -1474,8 +1505,6 @@ static int netvsc_probe(struct hv_device *dev,
 
        hv_set_drvdata(dev, net);
 
-       net_device_ctx->start_remove = false;
-
        INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_link_change);
        INIT_WORK(&net_device_ctx->work, do_set_multicast);
 
@@ -1492,8 +1521,7 @@ static int netvsc_probe(struct hv_device *dev,
        /* Notify the netvsc driver of the new device */
        memset(&device_info, 0, sizeof(device_info));
        device_info.ring_size = ring_size;
-       device_info.max_num_vrss_chns = min_t(u32, VRSS_CHANNEL_DEFAULT,
-                                             num_online_cpus());
+       device_info.num_chn = VRSS_CHANNEL_DEFAULT;
        ret = rndis_filter_device_add(dev, &device_info);
        if (ret != 0) {
                netdev_err(net, "unable to add netvsc device (ret %d)\n", ret);
@@ -1509,6 +1537,7 @@ static int netvsc_probe(struct hv_device *dev,
                NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
        net->vlan_features = net->features;
 
+       /* RCU not necessary here, device not registered */
        nvdev = net_device_ctx->nvdev;
        netif_set_real_num_tx_queues(net, nvdev->num_chn);
        netif_set_real_num_rx_queues(net, nvdev->num_chn);
@@ -1544,26 +1573,20 @@ static int netvsc_remove(struct hv_device *dev)
 
        ndev_ctx = netdev_priv(net);
 
-       /* Avoid racing with netvsc_change_mtu()/netvsc_set_channels()
-        * removing the device.
-        */
-       rtnl_lock();
-       ndev_ctx->start_remove = true;
-       rtnl_unlock();
+       netif_device_detach(net);
 
        cancel_delayed_work_sync(&ndev_ctx->dwork);
        cancel_work_sync(&ndev_ctx->work);
 
-       /* Stop outbound asap */
-       netif_tx_disable(net);
-
-       unregister_netdev(net);
-
        /*
         * Call to the vsc driver to let it know that the device is being
-        * removed
+        * removed. Also blocks mtu and channel changes.
         */
+       rtnl_lock();
        rndis_filter_device_remove(dev, ndev_ctx->nvdev);
+       rtnl_unlock();
+
+       unregister_netdev(net);
 
        hv_set_drvdata(dev, NULL);
 
index 19356f56b7b144f40184c5766a8f9be33b4a3079..983582526b37ffbfe258ab3d603a63f20f34ad08 100644 (file)
@@ -819,16 +819,14 @@ int rndis_filter_set_packet_filter(struct rndis_device *dev, u32 new_filter)
 {
        struct rndis_request *request;
        struct rndis_set_request *set;
-       struct rndis_set_complete *set_complete;
        int ret;
 
        request = get_rndis_request(dev, RNDIS_MSG_SET,
                        RNDIS_MESSAGE_SIZE(struct rndis_set_request) +
                        sizeof(u32));
-       if (!request) {
-               ret = -ENOMEM;
-               goto cleanup;
-       }
+       if (!request)
+               return -ENOMEM;
+
 
        /* Setup the rndis set */
        set = &request->request_msg.msg.set_req;
@@ -840,15 +838,11 @@ int rndis_filter_set_packet_filter(struct rndis_device *dev, u32 new_filter)
               &new_filter, sizeof(u32));
 
        ret = rndis_filter_send_request(dev, request);
-       if (ret != 0)
-               goto cleanup;
+       if (ret == 0)
+               wait_for_completion(&request->wait_event);
 
-       wait_for_completion(&request->wait_event);
+       put_rndis_request(dev, request);
 
-       set_complete = &request->response_msg.msg.set_complete;
-cleanup:
-       if (request)
-               put_rndis_request(dev, request);
        return ret;
 }
 
@@ -926,8 +920,6 @@ static void rndis_filter_halt_device(struct rndis_device *dev)
        struct rndis_halt_request *halt;
        struct net_device_context *net_device_ctx = netdev_priv(dev->ndev);
        struct netvsc_device *nvdev = net_device_ctx->nvdev;
-       struct hv_device *hdev = net_device_ctx->device_ctx;
-       ulong flags;
 
        /* Attempt to do a rndis device halt */
        request = get_rndis_request(dev, RNDIS_MSG_HALT,
@@ -945,9 +937,10 @@ static void rndis_filter_halt_device(struct rndis_device *dev)
        dev->state = RNDIS_DEV_UNINITIALIZED;
 
 cleanup:
-       spin_lock_irqsave(&hdev->channel->inbound_lock, flags);
        nvdev->destroy = true;
-       spin_unlock_irqrestore(&hdev->channel->inbound_lock, flags);
+
+       /* Force flag to be ordered before waiting */
+       wmb();
 
        /* Wait for all send completions */
        wait_event(nvdev->wait_drain, netvsc_device_idle(nvdev));
@@ -996,26 +989,34 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc)
                hv_get_drvdata(new_sc->primary_channel->device_obj);
        struct netvsc_device *nvscdev = net_device_to_netvsc_device(ndev);
        u16 chn_index = new_sc->offermsg.offer.sub_channel_index;
+       struct netvsc_channel *nvchan;
        int ret;
-       unsigned long flags;
 
        if (chn_index >= nvscdev->num_chn)
                return;
 
-       nvscdev->chan_table[chn_index].mrc.buf
+       nvchan = nvscdev->chan_table + chn_index;
+       nvchan->mrc.buf
                = vzalloc(NETVSC_RECVSLOT_MAX * sizeof(struct recv_comp_data));
 
+       if (!nvchan->mrc.buf)
+               return;
+
+       /* Because the device uses NAPI, all the interrupt batching and
+        * control is done via Net softirq, not the channel handling
+        */
+       set_channel_read_mode(new_sc, HV_CALL_ISR);
+
        ret = vmbus_open(new_sc, nvscdev->ring_size * PAGE_SIZE,
                         nvscdev->ring_size * PAGE_SIZE, NULL, 0,
-                        netvsc_channel_cb, new_sc);
+                        netvsc_channel_cb, nvchan);
 
        if (ret == 0)
-               nvscdev->chan_table[chn_index].channel = new_sc;
+               nvchan->channel = new_sc;
+
+       napi_enable(&nvchan->napi);
 
-       spin_lock_irqsave(&nvscdev->sc_lock, flags);
-       nvscdev->num_sc_offered--;
-       spin_unlock_irqrestore(&nvscdev->sc_lock, flags);
-       if (nvscdev->num_sc_offered == 0)
+       if (refcount_dec_and_test(&nvscdev->sc_offered))
                complete(&nvscdev->channel_init_wait);
 }
 
@@ -1032,12 +1033,9 @@ int rndis_filter_device_add(struct hv_device *dev,
        struct ndis_recv_scale_cap rsscap;
        u32 rsscap_size = sizeof(struct ndis_recv_scale_cap);
        unsigned int gso_max_size = GSO_MAX_SIZE;
-       u32 mtu, size;
-       u32 num_rss_qs;
-       u32 sc_delta;
+       u32 mtu, size, num_rss_qs;
        const struct cpumask *node_cpu_mask;
        u32 num_possible_rss_qs;
-       unsigned long flags;
        int i, ret;
 
        rndis_device = get_rndis_device();
@@ -1060,7 +1058,7 @@ int rndis_filter_device_add(struct hv_device *dev,
        net_device->max_chn = 1;
        net_device->num_chn = 1;
 
-       spin_lock_init(&net_device->sc_lock);
+       refcount_set(&net_device->sc_offered, 0);
 
        net_device->extension = rndis_device;
        rndis_device->ndev = net;
@@ -1174,34 +1172,30 @@ int rndis_filter_device_add(struct hv_device *dev,
        if (ret || rsscap.num_recv_que < 2)
                goto out;
 
-       net_device->max_chn = min_t(u32, VRSS_CHANNEL_MAX, rsscap.num_recv_que);
-
-       num_rss_qs = min(device_info->max_num_vrss_chns, net_device->max_chn);
-
        /*
         * We will limit the VRSS channels to the number CPUs in the NUMA node
         * the primary channel is currently bound to.
+        *
+        * This also guarantees that num_possible_rss_qs <= num_online_cpus
         */
        node_cpu_mask = cpumask_of_node(cpu_to_node(dev->channel->target_cpu));
-       num_possible_rss_qs = cpumask_weight(node_cpu_mask);
+       num_possible_rss_qs = min_t(u32, cpumask_weight(node_cpu_mask),
+                                   rsscap.num_recv_que);
 
-       /* We will use the given number of channels if available. */
-       if (device_info->num_chn && device_info->num_chn < net_device->max_chn)
-               net_device->num_chn = device_info->num_chn;
-       else
-               net_device->num_chn = min(num_possible_rss_qs, num_rss_qs);
+       net_device->max_chn = min_t(u32, VRSS_CHANNEL_MAX, num_possible_rss_qs);
 
-       num_rss_qs = net_device->num_chn - 1;
+       /* We will use the given number of channels if available. */
+       net_device->num_chn = min(net_device->max_chn, device_info->num_chn);
 
        for (i = 0; i < ITAB_NUM; i++)
                rndis_device->ind_table[i] = ethtool_rxfh_indir_default(i,
                                                        net_device->num_chn);
 
-       net_device->num_sc_offered = num_rss_qs;
-
-       if (net_device->num_chn == 1)
-               goto out;
+       num_rss_qs = net_device->num_chn - 1;
+       if (num_rss_qs == 0)
+               return 0;
 
+       refcount_set(&net_device->sc_offered, num_rss_qs);
        vmbus_set_sc_create_callback(dev->channel, netvsc_sc_open);
 
        init_packet = &net_device->channel_init_pkt;
@@ -1217,32 +1211,23 @@ int rndis_filter_device_add(struct hv_device *dev,
                               VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
        if (ret)
                goto out;
-       wait_for_completion(&net_device->channel_init_wait);
 
-       if (init_packet->msg.v5_msg.subchn_comp.status !=
-           NVSP_STAT_SUCCESS) {
+       if (init_packet->msg.v5_msg.subchn_comp.status != NVSP_STAT_SUCCESS) {
                ret = -ENODEV;
                goto out;
        }
+       wait_for_completion(&net_device->channel_init_wait);
+
        net_device->num_chn = 1 +
                init_packet->msg.v5_msg.subchn_comp.num_subchannels;
 
-       ret = rndis_filter_set_rss_param(rndis_device, netvsc_hash_key,
-                                        net_device->num_chn);
-
-       /*
-        * Set the number of sub-channels to be received.
-        */
-       spin_lock_irqsave(&net_device->sc_lock, flags);
-       sc_delta = num_rss_qs - (net_device->num_chn - 1);
-       net_device->num_sc_offered -= sc_delta;
-       spin_unlock_irqrestore(&net_device->sc_lock, flags);
-
+       /* ignore failues from setting rss parameters, still have channels */
+       rndis_filter_set_rss_param(rndis_device, netvsc_hash_key,
+                                  net_device->num_chn);
 out:
        if (ret) {
                net_device->max_chn = 1;
                net_device->num_chn = 1;
-               net_device->num_sc_offered = 0;
        }
 
        return 0; /* return 0 because primary channel can be used alone */
@@ -1257,12 +1242,6 @@ void rndis_filter_device_remove(struct hv_device *dev,
 {
        struct rndis_device *rndis_dev = net_dev->extension;
 
-       /* If not all subchannel offers are complete, wait for them until
-        * completion to avoid race.
-        */
-       if (net_dev->num_sc_offered > 0)
-               wait_for_completion(&net_dev->channel_init_wait);
-
        /* Halt and release the rndis device */
        rndis_filter_halt_device(rndis_dev);
 
index 7b131f8e40937000a273cfabbbf04a03a4af2f17..bd63289c55e8fcb68e9e371712ad1fdfc3521a43 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/spi/spi.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
+#include <linux/of.h>
 #include <linux/regmap.h>
 #include <linux/ieee802154.h>
 #include <linux/irq.h>
index b23b71981fd55689daa817d41191063300bdaaf5..224f65cb576bbf106a4779ef5f60b75f34903b1c 100644 (file)
@@ -13,7 +13,7 @@
  *
  *             Alan Cox        :       Fixed oddments for NET3.014
  *             Alan Cox        :       Rejig for NET3.029 snap #3
- *             Alan Cox        :       Fixed NET3.029 bugs and sped up
+ *             Alan Cox        :       Fixed NET3.029 bugs and sped up
  *             Larry McVoy     :       Tiny tweak to double performance
  *             Alan Cox        :       Backed out LMV's tweak - the linux mm
  *                                     can't take it...
@@ -41,7 +41,7 @@
 #include <linux/in.h>
 
 #include <linux/uaccess.h>
-#include <asm/io.h>
+#include <linux/io.h>
 
 #include <linux/inet.h>
 #include <linux/netdevice.h>
@@ -55,6 +55,7 @@
 #include <linux/ip.h>
 #include <linux/tcp.h>
 #include <linux/percpu.h>
+#include <linux/net_tstamp.h>
 #include <net/net_namespace.h>
 #include <linux/u64_stats_sync.h>
 
@@ -64,8 +65,7 @@ struct pcpu_lstats {
        struct u64_stats_sync   syncp;
 };
 
-/*
- * The higher levels take care of making this non-reentrant (it's
+/* The higher levels take care of making this non-reentrant (it's
  * called with bh's disabled).
  */
 static netdev_tx_t loopback_xmit(struct sk_buff *skb,
@@ -74,6 +74,7 @@ static netdev_tx_t loopback_xmit(struct sk_buff *skb,
        struct pcpu_lstats *lb_stats;
        int len;
 
+       skb_tx_timestamp(skb);
        skb_orphan(skb);
 
        /* Before queueing this packet to netif_rx(),
@@ -129,8 +130,21 @@ static u32 always_on(struct net_device *dev)
        return 1;
 }
 
+static int loopback_get_ts_info(struct net_device *netdev,
+                               struct ethtool_ts_info *ts_info)
+{
+       ts_info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE |
+                                  SOF_TIMESTAMPING_RX_SOFTWARE |
+                                  SOF_TIMESTAMPING_SOFTWARE;
+
+       ts_info->phc_index = -1;
+
+       return 0;
+};
+
 static const struct ethtool_ops loopback_ethtool_ops = {
        .get_link               = always_on,
+       .get_ts_info            = loopback_get_ts_info,
 };
 
 static int loopback_dev_init(struct net_device *dev)
@@ -149,14 +163,13 @@ static void loopback_dev_free(struct net_device *dev)
 }
 
 static const struct net_device_ops loopback_ops = {
-       .ndo_init      = loopback_dev_init,
-       .ndo_start_xmit= loopback_xmit,
+       .ndo_init        = loopback_dev_init,
+       .ndo_start_xmit  = loopback_xmit,
        .ndo_get_stats64 = loopback_get_stats64,
        .ndo_set_mac_address = eth_mac_addr,
 };
 
-/*
- * The loopback device is special. There is only one instance
+/* The loopback device is special. There is only one instance
  * per network namespace.
  */
 static void loopback_setup(struct net_device *dev)
@@ -170,7 +183,7 @@ static void loopback_setup(struct net_device *dev)
        dev->priv_flags         |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE;
        netif_keep_dst(dev);
        dev->hw_features        = NETIF_F_GSO_SOFTWARE;
-       dev->features           = NETIF_F_SG | NETIF_F_FRAGLIST
+       dev->features           = NETIF_F_SG | NETIF_F_FRAGLIST
                | NETIF_F_GSO_SOFTWARE
                | NETIF_F_HW_CSUM
                | NETIF_F_RXCSUM
@@ -206,7 +219,6 @@ static __net_init int loopback_net_init(struct net *net)
        net->loopback_dev = dev;
        return 0;
 
-
 out_free_netdev:
        free_netdev(dev);
 out:
@@ -217,5 +229,5 @@ out:
 
 /* Registered in net/core/dev.c */
 struct pernet_operations __net_initdata loopback_net_ops = {
-       .init = loopback_net_init,
+       .init = loopback_net_init,
 };
index 36877ba6551646bf1308066026b2f9f9dd12a5d3..4daf3d0926a82cfb52fd6cf774f0a467230c3246 100644 (file)
@@ -372,18 +372,19 @@ static void ntb_get_drvinfo(struct net_device *ndev,
        strlcpy(info->bus_info, pci_name(dev->pdev), sizeof(info->bus_info));
 }
 
-static int ntb_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int ntb_get_link_ksettings(struct net_device *dev,
+                                 struct ethtool_link_ksettings *cmd)
 {
-       cmd->supported = SUPPORTED_Backplane;
-       cmd->advertising = ADVERTISED_Backplane;
-       ethtool_cmd_speed_set(cmd, SPEED_UNKNOWN);
-       cmd->duplex = DUPLEX_FULL;
-       cmd->port = PORT_OTHER;
-       cmd->phy_address = 0;
-       cmd->transceiver = XCVR_DUMMY1;
-       cmd->autoneg = AUTONEG_ENABLE;
-       cmd->maxtxpkt = 0;
-       cmd->maxrxpkt = 0;
+       ethtool_link_ksettings_zero_link_mode(cmd, supported);
+       ethtool_link_ksettings_add_link_mode(cmd, supported, Backplane);
+       ethtool_link_ksettings_zero_link_mode(cmd, advertising);
+       ethtool_link_ksettings_add_link_mode(cmd, advertising, Backplane);
+
+       cmd->base.speed = SPEED_UNKNOWN;
+       cmd->base.duplex = DUPLEX_FULL;
+       cmd->base.port = PORT_OTHER;
+       cmd->base.phy_address = 0;
+       cmd->base.autoneg = AUTONEG_ENABLE;
 
        return 0;
 }
@@ -391,7 +392,7 @@ static int ntb_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 static const struct ethtool_ops ntb_ethtool_ops = {
        .get_drvinfo = ntb_get_drvinfo,
        .get_link = ethtool_op_get_link,
-       .get_settings = ntb_get_settings,
+       .get_link_ksettings = ntb_get_link_ksettings,
 };
 
 static const struct ntb_queue_handlers ntb_netdev_handlers = {
index 8dbd59baa34d5ed9eda97396f38b5ab38e96416a..60ffc9da6a286272d84e8dc3f4326eb908e7961a 100644 (file)
@@ -2,33 +2,12 @@
 # PHY Layer Configuration
 #
 
-menuconfig PHYLIB
-       tristate "PHY Device support and infrastructure"
-       depends on NETDEVICES
+menuconfig MDIO_DEVICE
+       tristate "MDIO bus device drivers"
        help
-         Ethernet controllers are usually attached to PHY
-         devices.  This option provides infrastructure for
-         managing PHY devices.
-
-if PHYLIB
+          MDIO devices and driver infrastructure code.
 
-config SWPHY
-       bool
-
-config LED_TRIGGER_PHY
-       bool "Support LED triggers for tracking link state"
-       depends on LEDS_TRIGGERS
-       ---help---
-         Adds support for a set of LED trigger events per-PHY.  Link
-         state change will trigger the events, for consumption by an
-         LED class driver.  There are triggers for each link speed currently
-         supported by the phy, and are of the form:
-              <mii bus id>:<phy>:<speed>
-
-         Where speed is in the form:
-               <Speed in megabits>Mbps or <Speed in gigabits>Gbps
-
-comment "MDIO bus device drivers"
+if MDIO_DEVICE
 
 config MDIO_BCM_IPROC
        tristate "Broadcom iProc MDIO bus controller"
@@ -40,7 +19,7 @@ config MDIO_BCM_IPROC
 
 config MDIO_BCM_UNIMAC
        tristate "Broadcom UniMAC MDIO bus controller"
-       depends on HAS_IOMEM
+       depends on HAS_IOMEM && OF_MDIO
        help
          This module provides a driver for the Broadcom UniMAC MDIO busses.
          This hardware can be found in the Broadcom GENET Ethernet MAC
@@ -49,6 +28,7 @@ config MDIO_BCM_UNIMAC
 
 config MDIO_BITBANG
        tristate "Bitbanged MDIO buses"
+       depends on !(MDIO_DEVICE=y && PHYLIB=m)
        help
          This module implements the MDIO bus protocol in software,
          for use by low level drivers that export the ability to
@@ -160,6 +140,36 @@ config MDIO_XGENE
          This module provides a driver for the MDIO busses found in the
          APM X-Gene SoC's.
 
+endif
+
+menuconfig PHYLIB
+       tristate "PHY Device support and infrastructure"
+       depends on NETDEVICES
+       select MDIO_DEVICE
+       help
+         Ethernet controllers are usually attached to PHY
+         devices.  This option provides infrastructure for
+         managing PHY devices.
+
+if PHYLIB
+
+config SWPHY
+       bool
+
+config LED_TRIGGER_PHY
+       bool "Support LED triggers for tracking link state"
+       depends on LEDS_TRIGGERS
+       ---help---
+         Adds support for a set of LED trigger events per-PHY.  Link
+         state change will trigger the events, for consumption by an
+         LED class driver.  There are triggers for each link speed currently
+         supported by the phy, and are of the form:
+              <mii bus id>:<phy>:<speed>
+
+         Where speed is in the form:
+               <Speed in megabits>Mbps or <Speed in gigabits>Gbps
+
+
 comment "MII PHY device drivers"
 
 config AMD_PHY
index 407b0b601ea8264b0ac8bf32a609271d43be995a..e36db9a2ba3814330277ea21980689c2d88c4018 100644 (file)
@@ -1,7 +1,20 @@
 # Makefile for Linux PHY drivers and MDIO bus drivers
 
-libphy-y                       := phy.o phy_device.o mdio_bus.o mdio_device.o \
-                                  mdio-boardinfo.o
+libphy-y                       := phy.o phy-core.o phy_device.o
+mdio-bus-y                     += mdio_bus.o mdio_device.o
+
+ifdef CONFIG_MDIO_DEVICE
+obj-y                          += mdio-boardinfo.o
+endif
+
+# PHYLIB implies MDIO_DEVICE, in that case, we have a bunch of circular
+# dependencies that does not make it possible to split mdio-bus objects into a
+# dedicated loadable module, so we bundle them all together into libphy.ko
+ifdef CONFIG_PHYLIB
+libphy-y                       += $(mdio-bus-y)
+else
+obj-$(CONFIG_MDIO_DEVICE)      += mdio-bus.o
+endif
 libphy-$(CONFIG_SWPHY)         += swphy.o
 libphy-$(CONFIG_LED_TRIGGER_PHY)       += phy_led_triggers.o
 
index ab9ad689617c78d19b21ad8f35b5f1887c483b1c..171010eb4d9c5c36da0be9888fb75cc54e136768 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2015 Broadcom Corporation
+ * Copyright (C) 2015-2017 Broadcom
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as
@@ -201,8 +201,7 @@ int bcm_phy_set_eee(struct phy_device *phydev, bool enable)
        int val;
 
        /* Enable EEE at PHY level */
-       val = phy_read_mmd_indirect(phydev, BRCM_CL45VEN_EEE_CONTROL,
-                                   MDIO_MMD_AN);
+       val = phy_read_mmd(phydev, MDIO_MMD_AN, BRCM_CL45VEN_EEE_CONTROL);
        if (val < 0)
                return val;
 
@@ -211,22 +210,19 @@ int bcm_phy_set_eee(struct phy_device *phydev, bool enable)
        else
                val &= ~(LPI_FEATURE_EN | LPI_FEATURE_EN_DIG1000X);
 
-       phy_write_mmd_indirect(phydev, BRCM_CL45VEN_EEE_CONTROL,
-                              MDIO_MMD_AN, (u32)val);
+       phy_write_mmd(phydev, MDIO_MMD_AN, BRCM_CL45VEN_EEE_CONTROL, (u32)val);
 
        /* Advertise EEE */
-       val = phy_read_mmd_indirect(phydev, BCM_CL45VEN_EEE_ADV,
-                                   MDIO_MMD_AN);
+       val = phy_read_mmd(phydev, MDIO_MMD_AN, BCM_CL45VEN_EEE_ADV);
        if (val < 0)
                return val;
 
        if (enable)
-               val |= (MDIO_AN_EEE_ADV_100TX | MDIO_AN_EEE_ADV_1000T);
+               val |= (MDIO_EEE_100TX | MDIO_EEE_1000T);
        else
-               val &= ~(MDIO_AN_EEE_ADV_100TX | MDIO_AN_EEE_ADV_1000T);
+               val &= ~(MDIO_EEE_100TX | MDIO_EEE_1000T);
 
-       phy_write_mmd_indirect(phydev, BCM_CL45VEN_EEE_ADV,
-                              MDIO_MMD_AN, (u32)val);
+       phy_write_mmd(phydev, MDIO_MMD_AN, BCM_CL45VEN_EEE_ADV, (u32)val);
 
        return 0;
 }
index d1c2614dad3a60860f33c0ff1851f18e0d1a4253..caa9f6e17f34cb54277a941a6ca81854ac0cf1ad 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * Broadcom BCM7xxx internal transceivers support.
  *
- * Copyright (C) 2014, Broadcom Corporation
+ * Copyright (C) 2014-2017 Broadcom
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
@@ -19,7 +19,7 @@
 
 /* Broadcom BCM7xxx internal PHY registers */
 
-/* 40nm only register definitions */
+/* EPHY only register definitions */
 #define MII_BCM7XXX_100TX_AUX_CTL      0x10
 #define MII_BCM7XXX_100TX_FALSE_CAR    0x13
 #define MII_BCM7XXX_100TX_DISC         0x14
 #define  MII_BCM7XXX_64CLK_MDIO                BIT(12)
 #define MII_BCM7XXX_TEST               0x1f
 #define  MII_BCM7XXX_SHD_MODE_2                BIT(2)
+#define MII_BCM7XXX_SHD_2_ADDR_CTRL    0xe
+#define MII_BCM7XXX_SHD_2_CTRL_STAT    0xf
+#define MII_BCM7XXX_SHD_2_BIAS_TRIM    0x1a
+#define MII_BCM7XXX_SHD_3_AN_EEE_ADV   0x3
+#define MII_BCM7XXX_SHD_3_PCS_CTRL_2   0x6
+#define  MII_BCM7XXX_PCS_CTRL_2_DEF    0x4400
+#define MII_BCM7XXX_SHD_3_AN_STAT      0xb
+#define  MII_BCM7XXX_AN_NULL_MSG_EN    BIT(0)
+#define  MII_BCM7XXX_AN_EEE_EN         BIT(1)
+#define MII_BCM7XXX_SHD_3_EEE_THRESH   0xe
+#define  MII_BCM7XXX_EEE_THRESH_DEF    0x50
+#define MII_BCM7XXX_SHD_3_TL4          0x23
+#define  MII_BCM7XXX_TL4_RST_MSK       (BIT(2) | BIT(1))
 
 /* 28nm only register definitions */
 #define MISC_ADDR(base, channel)       base, channel
@@ -286,6 +299,181 @@ static int phy_set_clr_bits(struct phy_device *dev, int location,
        return v;
 }
 
+static int bcm7xxx_28nm_ephy_01_afe_config_init(struct phy_device *phydev)
+{
+       int ret;
+
+       /* set shadow mode 2 */
+       ret = phy_set_clr_bits(phydev, MII_BCM7XXX_TEST,
+                              MII_BCM7XXX_SHD_MODE_2, 0);
+       if (ret < 0)
+               return ret;
+
+       /* Set current trim values INT_trim = -1, Ext_trim =0 */
+       ret = phy_write(phydev, MII_BCM7XXX_SHD_2_BIAS_TRIM, 0x3BE0);
+       if (ret < 0)
+               goto reset_shadow_mode;
+
+       /* Cal reset */
+       ret = phy_write(phydev, MII_BCM7XXX_SHD_2_ADDR_CTRL,
+                       MII_BCM7XXX_SHD_3_TL4);
+       if (ret < 0)
+               goto reset_shadow_mode;
+       ret = phy_set_clr_bits(phydev, MII_BCM7XXX_SHD_2_CTRL_STAT,
+                              MII_BCM7XXX_TL4_RST_MSK, 0);
+       if (ret < 0)
+               goto reset_shadow_mode;
+
+       /* Cal reset disable */
+       ret = phy_write(phydev, MII_BCM7XXX_SHD_2_ADDR_CTRL,
+                       MII_BCM7XXX_SHD_3_TL4);
+       if (ret < 0)
+               goto reset_shadow_mode;
+       ret = phy_set_clr_bits(phydev, MII_BCM7XXX_SHD_2_CTRL_STAT,
+                              0, MII_BCM7XXX_TL4_RST_MSK);
+       if (ret < 0)
+               goto reset_shadow_mode;
+
+reset_shadow_mode:
+       /* reset shadow mode 2 */
+       ret = phy_set_clr_bits(phydev, MII_BCM7XXX_TEST, 0,
+                              MII_BCM7XXX_SHD_MODE_2);
+       if (ret < 0)
+               return ret;
+
+       return 0;
+}
+
+/* The 28nm EPHY does not support Clause 45 (MMD) used by bcm-phy-lib */
+static int bcm7xxx_28nm_ephy_apd_enable(struct phy_device *phydev)
+{
+       int ret;
+
+       /* set shadow mode 1 */
+       ret = phy_set_clr_bits(phydev, MII_BRCM_FET_BRCMTEST,
+                              MII_BRCM_FET_BT_SRE, 0);
+       if (ret < 0)
+               return ret;
+
+       /* Enable auto-power down */
+       ret = phy_set_clr_bits(phydev, MII_BRCM_FET_SHDW_AUXSTAT2,
+                              MII_BRCM_FET_SHDW_AS2_APDE, 0);
+       if (ret < 0)
+               return ret;
+
+       /* reset shadow mode 1 */
+       ret = phy_set_clr_bits(phydev, MII_BRCM_FET_BRCMTEST, 0,
+                              MII_BRCM_FET_BT_SRE);
+       if (ret < 0)
+               return ret;
+
+       return 0;
+}
+
+static int bcm7xxx_28nm_ephy_eee_enable(struct phy_device *phydev)
+{
+       int ret;
+
+       /* set shadow mode 2 */
+       ret = phy_set_clr_bits(phydev, MII_BCM7XXX_TEST,
+                              MII_BCM7XXX_SHD_MODE_2, 0);
+       if (ret < 0)
+               return ret;
+
+       /* Advertise supported modes */
+       ret = phy_write(phydev, MII_BCM7XXX_SHD_2_ADDR_CTRL,
+                       MII_BCM7XXX_SHD_3_AN_EEE_ADV);
+       if (ret < 0)
+               goto reset_shadow_mode;
+       ret = phy_write(phydev, MII_BCM7XXX_SHD_2_CTRL_STAT,
+                       MDIO_EEE_100TX);
+       if (ret < 0)
+               goto reset_shadow_mode;
+
+       /* Restore Defaults */
+       ret = phy_write(phydev, MII_BCM7XXX_SHD_2_ADDR_CTRL,
+                       MII_BCM7XXX_SHD_3_PCS_CTRL_2);
+       if (ret < 0)
+               goto reset_shadow_mode;
+       ret = phy_write(phydev, MII_BCM7XXX_SHD_2_CTRL_STAT,
+                       MII_BCM7XXX_PCS_CTRL_2_DEF);
+       if (ret < 0)
+               goto reset_shadow_mode;
+
+       ret = phy_write(phydev, MII_BCM7XXX_SHD_2_ADDR_CTRL,
+                       MII_BCM7XXX_SHD_3_EEE_THRESH);
+       if (ret < 0)
+               goto reset_shadow_mode;
+       ret = phy_write(phydev, MII_BCM7XXX_SHD_2_CTRL_STAT,
+                       MII_BCM7XXX_EEE_THRESH_DEF);
+       if (ret < 0)
+               goto reset_shadow_mode;
+
+       /* Enable EEE autonegotiation */
+       ret = phy_write(phydev, MII_BCM7XXX_SHD_2_ADDR_CTRL,
+                       MII_BCM7XXX_SHD_3_AN_STAT);
+       if (ret < 0)
+               goto reset_shadow_mode;
+       ret = phy_write(phydev, MII_BCM7XXX_SHD_2_CTRL_STAT,
+                       (MII_BCM7XXX_AN_NULL_MSG_EN | MII_BCM7XXX_AN_EEE_EN));
+       if (ret < 0)
+               goto reset_shadow_mode;
+
+reset_shadow_mode:
+       /* reset shadow mode 2 */
+       ret = phy_set_clr_bits(phydev, MII_BCM7XXX_TEST, 0,
+                              MII_BCM7XXX_SHD_MODE_2);
+       if (ret < 0)
+               return ret;
+
+       /* Restart autoneg */
+       phy_write(phydev, MII_BMCR,
+                 (BMCR_SPEED100 | BMCR_ANENABLE | BMCR_ANRESTART));
+
+       return 0;
+}
+
+static int bcm7xxx_28nm_ephy_config_init(struct phy_device *phydev)
+{
+       u8 rev = phydev->phy_id & ~phydev->drv->phy_id_mask;
+       int ret = 0;
+
+       pr_info_once("%s: %s PHY revision: 0x%02x\n",
+                    phydev_name(phydev), phydev->drv->name, rev);
+
+       /* Dummy read to a register to workaround a possible issue upon reset
+        * where the internal inverter may not allow the first MDIO transaction
+        * to pass the MDIO management controller and make us return 0xffff for
+        * such reads.
+        */
+       phy_read(phydev, MII_BMSR);
+
+       /* Apply AFE software work-around if necessary */
+       if (rev == 0x01) {
+               ret = bcm7xxx_28nm_ephy_01_afe_config_init(phydev);
+               if (ret)
+                       return ret;
+       }
+
+       ret = bcm7xxx_28nm_ephy_eee_enable(phydev);
+       if (ret)
+               return ret;
+
+       return bcm7xxx_28nm_ephy_apd_enable(phydev);
+}
+
+static int bcm7xxx_28nm_ephy_resume(struct phy_device *phydev)
+{
+       int ret;
+
+       /* Re-apply workarounds coming out suspend/resume */
+       ret = bcm7xxx_28nm_ephy_config_init(phydev);
+       if (ret)
+               return ret;
+
+       return genphy_config_aneg(phydev);
+}
+
 static int bcm7xxx_config_init(struct phy_device *phydev)
 {
        int ret;
@@ -434,6 +622,23 @@ static int bcm7xxx_28nm_probe(struct phy_device *phydev)
        .probe          = bcm7xxx_28nm_probe,                           \
 }
 
+#define BCM7XXX_28NM_EPHY(_oui, _name)                                 \
+{                                                                      \
+       .phy_id         = (_oui),                                       \
+       .phy_id_mask    = 0xfffffff0,                                   \
+       .name           = _name,                                        \
+       .features       = PHY_BASIC_FEATURES,                           \
+       .flags          = PHY_IS_INTERNAL,                              \
+       .config_init    = bcm7xxx_28nm_ephy_config_init,                \
+       .config_aneg    = genphy_config_aneg,                           \
+       .read_status    = genphy_read_status,                           \
+       .resume         = bcm7xxx_28nm_ephy_resume,                     \
+       .get_sset_count = bcm_phy_get_sset_count,                       \
+       .get_strings    = bcm_phy_get_strings,                          \
+       .get_stats      = bcm7xxx_28nm_get_phy_stats,                   \
+       .probe          = bcm7xxx_28nm_probe,                           \
+}
+
 #define BCM7XXX_40NM_EPHY(_oui, _name)                                 \
 {                                                                      \
        .phy_id         = (_oui),                                       \
@@ -450,6 +655,9 @@ static int bcm7xxx_28nm_probe(struct phy_device *phydev)
 
 static struct phy_driver bcm7xxx_driver[] = {
        BCM7XXX_28NM_GPHY(PHY_ID_BCM7250, "Broadcom BCM7250"),
+       BCM7XXX_28NM_EPHY(PHY_ID_BCM7260, "Broadcom BCM7260"),
+       BCM7XXX_28NM_EPHY(PHY_ID_BCM7268, "Broadcom BCM7268"),
+       BCM7XXX_28NM_EPHY(PHY_ID_BCM7271, "Broadcom BCM7271"),
        BCM7XXX_28NM_GPHY(PHY_ID_BCM7278, "Broadcom BCM7278"),
        BCM7XXX_28NM_GPHY(PHY_ID_BCM7364, "Broadcom BCM7364"),
        BCM7XXX_28NM_GPHY(PHY_ID_BCM7366, "Broadcom BCM7366"),
@@ -466,6 +674,9 @@ static struct phy_driver bcm7xxx_driver[] = {
 
 static struct mdio_device_id __maybe_unused bcm7xxx_tbl[] = {
        { PHY_ID_BCM7250, 0xfffffff0, },
+       { PHY_ID_BCM7260, 0xfffffff0, },
+       { PHY_ID_BCM7268, 0xfffffff0, },
+       { PHY_ID_BCM7271, 0xfffffff0, },
        { PHY_ID_BCM7278, 0xfffffff0, },
        { PHY_ID_BCM7364, 0xfffffff0, },
        { PHY_ID_BCM7366, 0xfffffff0, },
index 19865530e0b13c24e5d74def6cbd55e77b0ad378..b57f20e552ba83d0a0e39ef0593229b7ccd874d5 100644 (file)
@@ -133,14 +133,14 @@ static int dp83867_config_port_mirroring(struct phy_device *phydev)
                (struct dp83867_private *)phydev->priv;
        u16 val;
 
-       val = phy_read_mmd_indirect(phydev, DP83867_CFG4, DP83867_DEVADDR);
+       val = phy_read_mmd(phydev, DP83867_DEVADDR, DP83867_CFG4);
 
        if (dp83867->port_mirroring == DP83867_PORT_MIRROING_EN)
                val |= DP83867_CFG4_PORT_MIRROR_EN;
        else
                val &= ~DP83867_CFG4_PORT_MIRROR_EN;
 
-       phy_write_mmd_indirect(phydev, DP83867_CFG4, DP83867_DEVADDR, val);
+       phy_write_mmd(phydev, DP83867_DEVADDR, DP83867_CFG4, val);
 
        return 0;
 }
@@ -231,8 +231,7 @@ static int dp83867_config_init(struct phy_device *phydev)
                 * register's bit 11 (marked as RESERVED).
                 */
 
-               bs = phy_read_mmd_indirect(phydev, DP83867_STRAP_STS1,
-                                          DP83867_DEVADDR);
+               bs = phy_read_mmd(phydev, DP83867_DEVADDR, DP83867_STRAP_STS1);
                if (bs & DP83867_STRAP_STS1_RESERVED)
                        val &= ~DP83867_PHYCR_RESERVED_MASK;
 
@@ -243,8 +242,7 @@ static int dp83867_config_init(struct phy_device *phydev)
 
        if ((phydev->interface >= PHY_INTERFACE_MODE_RGMII_ID) &&
            (phydev->interface <= PHY_INTERFACE_MODE_RGMII_RXID)) {
-               val = phy_read_mmd_indirect(phydev, DP83867_RGMIICTL,
-                                           DP83867_DEVADDR);
+               val = phy_read_mmd(phydev, DP83867_DEVADDR, DP83867_RGMIICTL);
 
                if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID)
                        val |= (DP83867_RGMII_TX_CLK_DELAY_EN | DP83867_RGMII_RX_CLK_DELAY_EN);
@@ -255,25 +253,24 @@ static int dp83867_config_init(struct phy_device *phydev)
                if (phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID)
                        val |= DP83867_RGMII_RX_CLK_DELAY_EN;
 
-               phy_write_mmd_indirect(phydev, DP83867_RGMIICTL,
-                                      DP83867_DEVADDR, val);
+               phy_write_mmd(phydev, DP83867_DEVADDR, DP83867_RGMIICTL, val);
 
                delay = (dp83867->rx_id_delay |
                        (dp83867->tx_id_delay << DP83867_RGMII_TX_CLK_DELAY_SHIFT));
 
-               phy_write_mmd_indirect(phydev, DP83867_RGMIIDCTL,
-                                      DP83867_DEVADDR, delay);
+               phy_write_mmd(phydev, DP83867_DEVADDR, DP83867_RGMIIDCTL,
+                             delay);
 
                if (dp83867->io_impedance >= 0) {
-                       val = phy_read_mmd_indirect(phydev, DP83867_IO_MUX_CFG,
-                                                   DP83867_DEVADDR);
+                       val = phy_read_mmd(phydev, DP83867_DEVADDR,
+                                          DP83867_IO_MUX_CFG);
 
                        val &= ~DP83867_IO_MUX_CFG_IO_IMPEDANCE_CTRL;
                        val |= dp83867->io_impedance &
                               DP83867_IO_MUX_CFG_IO_IMPEDANCE_CTRL;
 
-                       phy_write_mmd_indirect(phydev, DP83867_IO_MUX_CFG,
-                                              DP83867_DEVADDR, val);
+                       phy_write_mmd(phydev, DP83867_DEVADDR,
+                                     DP83867_IO_MUX_CFG, val);
                }
        }
 
index b1fd7bb0e4dbebe2da6f7f1b13857b1b64fa76f9..55f8c52dd2f1b719b98950125c901eadc8383777 100644 (file)
@@ -166,13 +166,13 @@ static int xway_gphy_config_init(struct phy_device *phydev)
        /* Clear all pending interrupts */
        phy_read(phydev, XWAY_MDIO_ISTAT);
 
-       phy_write_mmd_indirect(phydev, XWAY_MMD_LEDCH, MDIO_MMD_VEND2,
-                              XWAY_MMD_LEDCH_NACS_NONE |
-                              XWAY_MMD_LEDCH_SBF_F02HZ |
-                              XWAY_MMD_LEDCH_FBF_F16HZ);
-       phy_write_mmd_indirect(phydev, XWAY_MMD_LEDCL, MDIO_MMD_VEND2,
-                              XWAY_MMD_LEDCH_CBLINK_NONE |
-                              XWAY_MMD_LEDCH_SCAN_NONE);
+       phy_write_mmd(phydev, MDIO_MMD_VEND2, XWAY_MMD_LEDCH,
+                     XWAY_MMD_LEDCH_NACS_NONE |
+                     XWAY_MMD_LEDCH_SBF_F02HZ |
+                     XWAY_MMD_LEDCH_FBF_F16HZ);
+       phy_write_mmd(phydev, MDIO_MMD_VEND2, XWAY_MMD_LEDCL,
+                     XWAY_MMD_LEDCH_CBLINK_NONE |
+                     XWAY_MMD_LEDCH_SCAN_NONE);
 
        /**
         * In most cases only one LED is connected to this phy, so
@@ -183,12 +183,12 @@ static int xway_gphy_config_init(struct phy_device *phydev)
        ledxh = XWAY_MMD_LEDxH_BLINKF_NONE | XWAY_MMD_LEDxH_CON_LINK10XX;
        ledxl = XWAY_MMD_LEDxL_PULSE_TXACT | XWAY_MMD_LEDxL_PULSE_RXACT |
                XWAY_MMD_LEDxL_BLINKS_NONE;
-       phy_write_mmd_indirect(phydev, XWAY_MMD_LED0H, MDIO_MMD_VEND2, ledxh);
-       phy_write_mmd_indirect(phydev, XWAY_MMD_LED0L, MDIO_MMD_VEND2, ledxl);
-       phy_write_mmd_indirect(phydev, XWAY_MMD_LED1H, MDIO_MMD_VEND2, ledxh);
-       phy_write_mmd_indirect(phydev, XWAY_MMD_LED1L, MDIO_MMD_VEND2, ledxl);
-       phy_write_mmd_indirect(phydev, XWAY_MMD_LED2H, MDIO_MMD_VEND2, ledxh);
-       phy_write_mmd_indirect(phydev, XWAY_MMD_LED2L, MDIO_MMD_VEND2, ledxl);
+       phy_write_mmd(phydev, MDIO_MMD_VEND2, XWAY_MMD_LED0H, ledxh);
+       phy_write_mmd(phydev, MDIO_MMD_VEND2, XWAY_MMD_LED0L, ledxl);
+       phy_write_mmd(phydev, MDIO_MMD_VEND2, XWAY_MMD_LED1H, ledxh);
+       phy_write_mmd(phydev, MDIO_MMD_VEND2, XWAY_MMD_LED1L, ledxl);
+       phy_write_mmd(phydev, MDIO_MMD_VEND2, XWAY_MMD_LED2H, ledxh);
+       phy_write_mmd(phydev, MDIO_MMD_VEND2, XWAY_MMD_LED2L, ledxl);
 
        return 0;
 }
index 8c73b2e771ddd7c865900ceed1d9abdcefff8a21..34395230ce709bd68ba93ca46eff089bb576668f 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * Broadcom UniMAC MDIO bus controller driver
  *
- * Copyright (C) 2014, Broadcom Corporation
+ * Copyright (C) 2014-2017 Broadcom
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -228,6 +228,7 @@ static int unimac_mdio_remove(struct platform_device *pdev)
 }
 
 static const struct of_device_id unimac_mdio_ids[] = {
+       { .compatible = "brcm,genet-mdio-v5", },
        { .compatible = "brcm,genet-mdio-v4", },
        { .compatible = "brcm,genet-mdio-v3", },
        { .compatible = "brcm,genet-mdio-v2", },
index 6b988f77da08fca5ba9e7efec8c4af354ad51ecc..1861f387820d61d527ed552e5ddb24c0009234b3 100644 (file)
@@ -24,10 +24,12 @@ static DEFINE_MUTEX(mdio_board_lock);
  * @mdiodev: MDIO device pointer
  * Context: can sleep
  */
-void mdiobus_setup_mdiodev_from_board_info(struct mii_bus *bus)
+void mdiobus_setup_mdiodev_from_board_info(struct mii_bus *bus,
+                                          int (*cb)
+                                          (struct mii_bus *bus,
+                                           struct mdio_board_info *bi))
 {
        struct mdio_board_entry *be;
-       struct mdio_device *mdiodev;
        struct mdio_board_info *bi;
        int ret;
 
@@ -38,23 +40,14 @@ void mdiobus_setup_mdiodev_from_board_info(struct mii_bus *bus)
                if (strcmp(bus->id, bi->bus_id))
                        continue;
 
-               mdiodev = mdio_device_create(bus, bi->mdio_addr);
-               if (IS_ERR(mdiodev))
+               ret = cb(bus, bi);
+               if (ret)
                        continue;
 
-               strncpy(mdiodev->modalias, bi->modalias,
-                       sizeof(mdiodev->modalias));
-               mdiodev->bus_match = mdio_device_bus_match;
-               mdiodev->dev.platform_data = (void *)bi->platform_data;
-
-               ret = mdio_device_register(mdiodev);
-               if (ret) {
-                       mdio_device_free(mdiodev);
-                       continue;
-               }
        }
        mutex_unlock(&mdio_board_lock);
 }
+EXPORT_SYMBOL(mdiobus_setup_mdiodev_from_board_info);
 
 /**
  * mdio_register_board_info - register MDIO devices for a given board
@@ -84,3 +77,4 @@ int mdiobus_register_board_info(const struct mdio_board_info *info,
 
        return 0;
 }
+EXPORT_SYMBOL(mdiobus_register_board_info);
index 00f98163e90eff985e24bf95e8fd7293d08b0763..3a7f143904e8c5948f94b1f570b9310368855ac8 100644 (file)
@@ -14,6 +14,9 @@ struct mdio_board_entry {
        struct mdio_board_info  board_info;
 };
 
-void mdiobus_setup_mdiodev_from_board_info(struct mii_bus *bus);
+void mdiobus_setup_mdiodev_from_board_info(struct mii_bus *bus,
+                                          int (*cb)
+                                          (struct mii_bus *bus,
+                                           struct mdio_board_info *bi));
 
 #endif /* __MDIO_BOARD_INFO_H */
index f095051beb549133db52cf3acd41ad9e3dbed29c..3e2ac07b6e372322c53125014cbac8b40ea1434f 100644 (file)
@@ -229,7 +229,7 @@ static int xgene_xfi_mdio_write(struct mii_bus *bus, int phy_id,
 
        val = SET_VAL(HSTPHYADX, phy_id) | SET_VAL(HSTREGADX, reg) |
              SET_VAL(HSTMIIMWRDAT, data);
-       xgene_enet_wr_mdio_csr(addr, MIIM_FIELD_ADDR, data);
+       xgene_enet_wr_mdio_csr(addr, MIIM_FIELD_ADDR, val);
 
        val = HSTLDCMD | SET_VAL(HSTMIIMCMD, MIIM_CMD_LEGACY_WRITE);
        xgene_enet_wr_mdio_csr(addr, MIIM_COMMAND_ADDR, val);
index fa7d51f14869efa8b94ce161e5bd4cb96b4951d3..5a214f3b867184e9b016b4802850393516d2cba3 100644 (file)
@@ -289,6 +289,36 @@ static inline void of_mdiobus_link_mdiodev(struct mii_bus *mdio,
 }
 #endif
 
+/**
+ * mdiobus_create_device_from_board_info - create a full MDIO device given
+ * a mdio_board_info structure
+ * @bus: MDIO bus to create the devices on
+ * @bi: mdio_board_info structure describing the devices
+ *
+ * Returns 0 on success or < 0 on error.
+ */
+static int mdiobus_create_device(struct mii_bus *bus,
+                                struct mdio_board_info *bi)
+{
+       struct mdio_device *mdiodev;
+       int ret = 0;
+
+       mdiodev = mdio_device_create(bus, bi->mdio_addr);
+       if (IS_ERR(mdiodev))
+               return -ENODEV;
+
+       strncpy(mdiodev->modalias, bi->modalias,
+               sizeof(mdiodev->modalias));
+       mdiodev->bus_match = mdio_device_bus_match;
+       mdiodev->dev.platform_data = (void *)bi->platform_data;
+
+       ret = mdio_device_register(mdiodev);
+       if (ret)
+               mdio_device_free(mdiodev);
+
+       return ret;
+}
+
 /**
  * __mdiobus_register - bring up all the PHYs on a given bus and attach them to bus
  * @bus: target mii_bus
@@ -345,7 +375,7 @@ int __mdiobus_register(struct mii_bus *bus, struct module *owner)
                }
        }
 
-       mdiobus_setup_mdiodev_from_board_info(bus);
+       mdiobus_setup_mdiodev_from_board_info(bus, mdiobus_create_device);
 
        bus->state = MDIOBUS_REGISTERED;
        pr_info("%s: probed\n", bus->name);
@@ -648,9 +678,18 @@ int __init mdio_bus_init(void)
 
        return ret;
 }
+EXPORT_SYMBOL_GPL(mdio_bus_init);
 
+#if IS_ENABLED(CONFIG_PHYLIB)
 void mdio_bus_exit(void)
 {
        class_unregister(&mdio_bus_class);
        bus_unregister(&mdio_bus_type);
 }
+EXPORT_SYMBOL_GPL(mdio_bus_exit);
+#else
+module_init(mdio_bus_init);
+/* no module_exit, intentional */
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("MDIO bus/device layer");
+#endif
index 6742070ca676f57694a9a6cb11364941deb520a0..b847184de6fc918ab9ff34db5a7d2cbbf690e721 100644 (file)
@@ -637,8 +637,7 @@ static int ksz8873mll_config_aneg(struct phy_device *phydev)
  * MMD extended PHY registers.
  */
 static int
-ksz9021_rd_mmd_phyreg(struct phy_device *phydev, int ptrad, int devnum,
-                     int regnum)
+ksz9021_rd_mmd_phyreg(struct phy_device *phydev, int devad, u16 regnum)
 {
        return -1;
 }
@@ -646,10 +645,10 @@ ksz9021_rd_mmd_phyreg(struct phy_device *phydev, int ptrad, int devnum,
 /* This routine does nothing since the Micrel ksz9021 does not support
  * standard IEEE MMD extended PHY registers.
  */
-static void
-ksz9021_wr_mmd_phyreg(struct phy_device *phydev, int ptrad, int devnum,
-                     int regnum, u32 val)
+static int
+ksz9021_wr_mmd_phyreg(struct phy_device *phydev, int devad, u16 regnum, u16 val)
 {
+       return -1;
 }
 
 static int kszphy_get_sset_count(struct phy_device *phydev)
@@ -962,8 +961,8 @@ static struct phy_driver ksphy_driver[] = {
        .get_stats      = kszphy_get_stats,
        .suspend        = genphy_suspend,
        .resume         = genphy_resume,
-       .read_mmd_indirect = ksz9021_rd_mmd_phyreg,
-       .write_mmd_indirect = ksz9021_wr_mmd_phyreg,
+       .read_mmd       = ksz9021_rd_mmd_phyreg,
+       .write_mmd      = ksz9021_wr_mmd_phyreg,
 }, {
        .phy_id         = PHY_ID_KSZ9031,
        .phy_id_mask    = MICREL_PHY_ID_MASK,
index 324fbf6ad8ff8fcc51e92cd2f4d26399b00dfc41..2b2f543cf9f030dbea5b2c478f6b8930f5fa520e 100644 (file)
@@ -78,9 +78,8 @@ static int lan88xx_probe(struct phy_device *phydev)
        priv->wolopts = 0;
 
        /* these values can be used to identify internal PHY */
-       priv->chip_id = phy_read_mmd_indirect(phydev, LAN88XX_MMD3_CHIP_ID, 3);
-       priv->chip_rev = phy_read_mmd_indirect(phydev, LAN88XX_MMD3_CHIP_REV,
-                                              3);
+       priv->chip_id = phy_read_mmd(phydev, 3, LAN88XX_MMD3_CHIP_ID);
+       priv->chip_rev = phy_read_mmd(phydev, 3, LAN88XX_MMD3_CHIP_REV);
 
        phydev->priv = priv;
 
diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c
new file mode 100644 (file)
index 0000000..357a4d0
--- /dev/null
@@ -0,0 +1,101 @@
+/*
+ * Core PHY library, taken from phy.c
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+#include <linux/export.h>
+#include <linux/phy.h>
+
+static void mmd_phy_indirect(struct mii_bus *bus, int phy_addr, int devad,
+                            u16 regnum)
+{
+       /* Write the desired MMD Devad */
+       bus->write(bus, phy_addr, MII_MMD_CTRL, devad);
+
+       /* Write the desired MMD register address */
+       bus->write(bus, phy_addr, MII_MMD_DATA, regnum);
+
+       /* Select the Function : DATA with no post increment */
+       bus->write(bus, phy_addr, MII_MMD_CTRL, devad | MII_MMD_CTRL_NOINCR);
+}
+
+/**
+ * phy_read_mmd - Convenience function for reading a register
+ * from an MMD on a given PHY.
+ * @phydev: The phy_device struct
+ * @devad: The MMD to read from (0..31)
+ * @regnum: The register on the MMD to read (0..65535)
+ *
+ * Same rules as for phy_read();
+ */
+int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum)
+{
+       int val;
+
+       if (regnum > (u16)~0 || devad > 32)
+               return -EINVAL;
+
+       if (phydev->drv->read_mmd) {
+               val = phydev->drv->read_mmd(phydev, devad, regnum);
+       } else if (phydev->is_c45) {
+               u32 addr = MII_ADDR_C45 | (devad << 16) | (regnum & 0xffff);
+
+               val = mdiobus_read(phydev->mdio.bus, phydev->mdio.addr, addr);
+       } else {
+               struct mii_bus *bus = phydev->mdio.bus;
+               int phy_addr = phydev->mdio.addr;
+
+               mutex_lock(&bus->mdio_lock);
+               mmd_phy_indirect(bus, phy_addr, devad, regnum);
+
+               /* Read the content of the MMD's selected register */
+               val = bus->read(bus, phy_addr, MII_MMD_DATA);
+               mutex_unlock(&bus->mdio_lock);
+       }
+       return val;
+}
+EXPORT_SYMBOL(phy_read_mmd);
+
+/**
+ * phy_write_mmd - Convenience function for writing a register
+ * on an MMD on a given PHY.
+ * @phydev: The phy_device struct
+ * @devad: The MMD to read from
+ * @regnum: The register on the MMD to read
+ * @val: value to write to @regnum
+ *
+ * Same rules as for phy_write();
+ */
+int phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val)
+{
+       int ret;
+
+       if (regnum > (u16)~0 || devad > 32)
+               return -EINVAL;
+
+       if (phydev->drv->read_mmd) {
+               ret = phydev->drv->write_mmd(phydev, devad, regnum, val);
+       } else if (phydev->is_c45) {
+               u32 addr = MII_ADDR_C45 | (devad << 16) | (regnum & 0xffff);
+
+               ret = mdiobus_write(phydev->mdio.bus, phydev->mdio.addr,
+                                   addr, val);
+       } else {
+               struct mii_bus *bus = phydev->mdio.bus;
+               int phy_addr = phydev->mdio.addr;
+
+               mutex_lock(&bus->mdio_lock);
+               mmd_phy_indirect(bus, phy_addr, devad, regnum);
+
+               /* Write the data into MMD's selected register */
+               bus->write(bus, phy_addr, MII_MMD_DATA, val);
+               mutex_unlock(&bus->mdio_lock);
+
+               ret = 0;
+       }
+       return ret;
+}
+EXPORT_SYMBOL(phy_write_mmd);
index 1be69d8bc90948e82f92736b8f7ee9d274b9bd2b..867c42154087dabf95795aa2af6b95794c80b857 100644 (file)
@@ -1192,91 +1192,6 @@ void phy_mac_interrupt(struct phy_device *phydev, int new_link)
 }
 EXPORT_SYMBOL(phy_mac_interrupt);
 
-static inline void mmd_phy_indirect(struct mii_bus *bus, int prtad, int devad,
-                                   int addr)
-{
-       /* Write the desired MMD Devad */
-       bus->write(bus, addr, MII_MMD_CTRL, devad);
-
-       /* Write the desired MMD register address */
-       bus->write(bus, addr, MII_MMD_DATA, prtad);
-
-       /* Select the Function : DATA with no post increment */
-       bus->write(bus, addr, MII_MMD_CTRL, (devad | MII_MMD_CTRL_NOINCR));
-}
-
-/**
- * phy_read_mmd_indirect - reads data from the MMD registers
- * @phydev: The PHY device bus
- * @prtad: MMD Address
- * @devad: MMD DEVAD
- *
- * Description: it reads data from the MMD registers (clause 22 to access to
- * clause 45) of the specified phy address.
- * To read these register we have:
- * 1) Write reg 13 // DEVAD
- * 2) Write reg 14 // MMD Address
- * 3) Write reg 13 // MMD Data Command for MMD DEVAD
- * 3) Read  reg 14 // Read MMD data
- */
-int phy_read_mmd_indirect(struct phy_device *phydev, int prtad, int devad)
-{
-       struct phy_driver *phydrv = phydev->drv;
-       int addr = phydev->mdio.addr;
-       int value = -1;
-
-       if (!phydrv->read_mmd_indirect) {
-               struct mii_bus *bus = phydev->mdio.bus;
-
-               mutex_lock(&bus->mdio_lock);
-               mmd_phy_indirect(bus, prtad, devad, addr);
-
-               /* Read the content of the MMD's selected register */
-               value = bus->read(bus, addr, MII_MMD_DATA);
-               mutex_unlock(&bus->mdio_lock);
-       } else {
-               value = phydrv->read_mmd_indirect(phydev, prtad, devad, addr);
-       }
-       return value;
-}
-EXPORT_SYMBOL(phy_read_mmd_indirect);
-
-/**
- * phy_write_mmd_indirect - writes data to the MMD registers
- * @phydev: The PHY device
- * @prtad: MMD Address
- * @devad: MMD DEVAD
- * @data: data to write in the MMD register
- *
- * Description: Write data from the MMD registers of the specified
- * phy address.
- * To write these register we have:
- * 1) Write reg 13 // DEVAD
- * 2) Write reg 14 // MMD Address
- * 3) Write reg 13 // MMD Data Command for MMD DEVAD
- * 3) Write reg 14 // Write MMD data
- */
-void phy_write_mmd_indirect(struct phy_device *phydev, int prtad,
-                                  int devad, u32 data)
-{
-       struct phy_driver *phydrv = phydev->drv;
-       int addr = phydev->mdio.addr;
-
-       if (!phydrv->write_mmd_indirect) {
-               struct mii_bus *bus = phydev->mdio.bus;
-
-               mutex_lock(&bus->mdio_lock);
-               mmd_phy_indirect(bus, prtad, devad, addr);
-
-               /* Write the data into MMD's selected register */
-               bus->write(bus, addr, MII_MMD_DATA, data);
-               mutex_unlock(&bus->mdio_lock);
-       } else {
-               phydrv->write_mmd_indirect(phydev, prtad, devad, addr, data);
-       }
-}
-EXPORT_SYMBOL(phy_write_mmd_indirect);
-
 /**
  * phy_init_eee - init and check the EEE feature
  * @phydev: target phy_device struct
@@ -1293,15 +1208,8 @@ int phy_init_eee(struct phy_device *phydev, bool clk_stop_enable)
                return -EIO;
 
        /* According to 802.3az,the EEE is supported only in full duplex-mode.
-        * Also EEE feature is active when core is operating with MII, GMII
-        * or RGMII (all kinds). Internal PHYs are also allowed to proceed and
-        * should return an error if they do not support EEE.
         */
-       if ((phydev->duplex == DUPLEX_FULL) &&
-           ((phydev->interface == PHY_INTERFACE_MODE_MII) ||
-           (phydev->interface == PHY_INTERFACE_MODE_GMII) ||
-            phy_interface_is_rgmii(phydev) ||
-            phy_is_internal(phydev))) {
+       if (phydev->duplex == DUPLEX_FULL) {
                int eee_lp, eee_cap, eee_adv;
                u32 lp, cap, adv;
                int status;
@@ -1312,8 +1220,7 @@ int phy_init_eee(struct phy_device *phydev, bool clk_stop_enable)
                        return status;
 
                /* First check if the EEE ability is supported */
-               eee_cap = phy_read_mmd_indirect(phydev, MDIO_PCS_EEE_ABLE,
-                                               MDIO_MMD_PCS);
+               eee_cap = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_PCS_EEE_ABLE);
                if (eee_cap <= 0)
                        goto eee_exit_err;
 
@@ -1324,13 +1231,11 @@ int phy_init_eee(struct phy_device *phydev, bool clk_stop_enable)
                /* Check which link settings negotiated and verify it in
                 * the EEE advertising registers.
                 */
-               eee_lp = phy_read_mmd_indirect(phydev, MDIO_AN_EEE_LPABLE,
-                                              MDIO_MMD_AN);
+               eee_lp = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_EEE_LPABLE);
                if (eee_lp <= 0)
                        goto eee_exit_err;
 
-               eee_adv = phy_read_mmd_indirect(phydev, MDIO_AN_EEE_ADV,
-                                               MDIO_MMD_AN);
+               eee_adv = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_EEE_ADV);
                if (eee_adv <= 0)
                        goto eee_exit_err;
 
@@ -1343,14 +1248,12 @@ int phy_init_eee(struct phy_device *phydev, bool clk_stop_enable)
                        /* Configure the PHY to stop receiving xMII
                         * clock while it is signaling LPI.
                         */
-                       int val = phy_read_mmd_indirect(phydev, MDIO_CTRL1,
-                                                       MDIO_MMD_PCS);
+                       int val = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL1);
                        if (val < 0)
                                return val;
 
                        val |= MDIO_PCS_CTRL1_CLKSTOP_EN;
-                       phy_write_mmd_indirect(phydev, MDIO_CTRL1,
-                                              MDIO_MMD_PCS, val);
+                       phy_write_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL1, val);
                }
 
                return 0; /* EEE supported */
@@ -1372,7 +1275,7 @@ int phy_get_eee_err(struct phy_device *phydev)
        if (!phydev->drv)
                return -EIO;
 
-       return phy_read_mmd_indirect(phydev, MDIO_PCS_EEE_WK_ERR, MDIO_MMD_PCS);
+       return phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_PCS_EEE_WK_ERR);
 }
 EXPORT_SYMBOL(phy_get_eee_err);
 
@@ -1392,19 +1295,19 @@ int phy_ethtool_get_eee(struct phy_device *phydev, struct ethtool_eee *data)
                return -EIO;
 
        /* Get Supported EEE */
-       val = phy_read_mmd_indirect(phydev, MDIO_PCS_EEE_ABLE, MDIO_MMD_PCS);
+       val = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_PCS_EEE_ABLE);
        if (val < 0)
                return val;
        data->supported = mmd_eee_cap_to_ethtool_sup_t(val);
 
        /* Get advertisement EEE */
-       val = phy_read_mmd_indirect(phydev, MDIO_AN_EEE_ADV, MDIO_MMD_AN);
+       val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_EEE_ADV);
        if (val < 0)
                return val;
        data->advertised = mmd_eee_adv_to_ethtool_adv_t(val);
 
        /* Get LP advertisement EEE */
-       val = phy_read_mmd_indirect(phydev, MDIO_AN_EEE_LPABLE, MDIO_MMD_AN);
+       val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_EEE_LPABLE);
        if (val < 0)
                return val;
        data->lp_advertised = mmd_eee_adv_to_ethtool_adv_t(val);
@@ -1422,15 +1325,37 @@ EXPORT_SYMBOL(phy_ethtool_get_eee);
  */
 int phy_ethtool_set_eee(struct phy_device *phydev, struct ethtool_eee *data)
 {
-       int val = ethtool_adv_to_mmd_eee_adv_t(data->advertised);
+       int cap, old_adv, adv, ret;
 
        if (!phydev->drv)
                return -EIO;
 
+       /* Get Supported EEE */
+       cap = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_PCS_EEE_ABLE);
+       if (cap < 0)
+               return cap;
+
+       old_adv = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_EEE_ADV);
+       if (old_adv < 0)
+               return old_adv;
+
+       adv = ethtool_adv_to_mmd_eee_adv_t(data->advertised) & cap;
+
        /* Mask prohibited EEE modes */
-       val &= ~phydev->eee_broken_modes;
+       adv &= ~phydev->eee_broken_modes;
+
+       if (old_adv != adv) {
+               ret = phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_AN_EEE_ADV, adv);
+               if (ret < 0)
+                       return ret;
 
-       phy_write_mmd_indirect(phydev, MDIO_AN_EEE_ADV, MDIO_MMD_AN, val);
+               /* Restart autonegotiation so the new modes get sent to the
+                * link partner.
+                */
+               ret = genphy_restart_aneg(phydev);
+               if (ret < 0)
+                       return ret;
+       }
 
        return 0;
 }
index 5198ccfa347f8b4bfb5ee5e0c69ee12fb44ec681..1219eeab69d1ff4b94fceb942e38c9b9e989ae40 100644 (file)
@@ -1217,7 +1217,7 @@ static int genphy_config_eee_advert(struct phy_device *phydev)
         * supported by the phy. If we read 0, EEE is not advertised
         * In both case, we don't need to continue
         */
-       adv = phy_read_mmd_indirect(phydev, MDIO_AN_EEE_ADV, MDIO_MMD_AN);
+       adv = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_EEE_ADV);
        if (adv <= 0)
                return 0;
 
@@ -1228,7 +1228,7 @@ static int genphy_config_eee_advert(struct phy_device *phydev)
        if (old_adv == adv)
                return 0;
 
-       phy_write_mmd_indirect(phydev, MDIO_AN_EEE_ADV, MDIO_MMD_AN, adv);
+       phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_AN_EEE_ADV, adv);
 
        return 1;
 }
index fb32eaf2255d84a7de2842f60f91b52cf17c1717..cef6967b039617fdd909e783ca479a8241eebd88 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/module.h>
 #include <linux/mii.h>
 #include <linux/ethtool.h>
+#include <linux/of.h>
 #include <linux/phy.h>
 #include <linux/netdevice.h>
 #include <linux/smscphy.h>
index 34cc3c590aa5c5c49509159d8fbf0f0cfcfca988..bbd707b9ef7a6a305804ed0d56c3fc0e1db7d565 100644 (file)
@@ -1931,6 +1931,8 @@ static int set_offload(struct tun_struct *tun, unsigned long arg)
                return -EINVAL;
 
        tun->set_features = features;
+       tun->dev->wanted_features &= ~TUN_USER_FEATURES;
+       tun->dev->wanted_features |= features;
        netdev_update_features(tun->dev);
 
        return 0;
@@ -2442,18 +2444,16 @@ static struct miscdevice tun_miscdev = {
 
 /* ethtool interface */
 
-static int tun_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-       cmd->supported          = 0;
-       cmd->advertising        = 0;
-       ethtool_cmd_speed_set(cmd, SPEED_10);
-       cmd->duplex             = DUPLEX_FULL;
-       cmd->port               = PORT_TP;
-       cmd->phy_address        = 0;
-       cmd->transceiver        = XCVR_INTERNAL;
-       cmd->autoneg            = AUTONEG_DISABLE;
-       cmd->maxtxpkt           = 0;
-       cmd->maxrxpkt           = 0;
+static int tun_get_link_ksettings(struct net_device *dev,
+                                 struct ethtool_link_ksettings *cmd)
+{
+       ethtool_link_ksettings_zero_link_mode(cmd, supported);
+       ethtool_link_ksettings_zero_link_mode(cmd, advertising);
+       cmd->base.speed         = SPEED_10;
+       cmd->base.duplex        = DUPLEX_FULL;
+       cmd->base.port          = PORT_TP;
+       cmd->base.phy_address   = 0;
+       cmd->base.autoneg       = AUTONEG_DISABLE;
        return 0;
 }
 
@@ -2516,7 +2516,6 @@ static int tun_set_coalesce(struct net_device *dev,
 }
 
 static const struct ethtool_ops tun_ethtool_ops = {
-       .get_settings   = tun_get_settings,
        .get_drvinfo    = tun_get_drvinfo,
        .get_msglevel   = tun_get_msglevel,
        .set_msglevel   = tun_set_msglevel,
@@ -2524,6 +2523,7 @@ static const struct ethtool_ops tun_ethtool_ops = {
        .get_ts_info    = ethtool_op_get_ts_info,
        .get_coalesce   = tun_get_coalesce,
        .set_coalesce   = tun_set_coalesce,
+       .get_link_ksettings = tun_get_link_ksettings,
 };
 
 static int tun_queue_resize(struct tun_struct *tun)
index 0dd510604118bc8c26c5ec9a84410edbe16a4d8d..a3aa0a27dfe56b22121a0571cc4eaca1b2bbee03 100644 (file)
@@ -136,9 +136,9 @@ static const struct ethtool_ops ax88172_ethtool_ops = {
        .get_eeprom_len         = asix_get_eeprom_len,
        .get_eeprom             = asix_get_eeprom,
        .set_eeprom             = asix_set_eeprom,
-       .get_settings           = usbnet_get_settings,
-       .set_settings           = usbnet_set_settings,
        .nway_reset             = usbnet_nway_reset,
+       .get_link_ksettings     = usbnet_get_link_ksettings,
+       .set_link_ksettings     = usbnet_set_link_ksettings,
 };
 
 static void ax88172_set_multicast(struct net_device *net)
@@ -206,6 +206,7 @@ static const struct net_device_ops ax88172_netdev_ops = {
        .ndo_start_xmit         = usbnet_start_xmit,
        .ndo_tx_timeout         = usbnet_tx_timeout,
        .ndo_change_mtu         = usbnet_change_mtu,
+       .ndo_get_stats64        = usbnet_get_stats64,
        .ndo_set_mac_address    = eth_mac_addr,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_do_ioctl           = asix_ioctl,
@@ -301,9 +302,9 @@ static const struct ethtool_ops ax88772_ethtool_ops = {
        .get_eeprom_len         = asix_get_eeprom_len,
        .get_eeprom             = asix_get_eeprom,
        .set_eeprom             = asix_set_eeprom,
-       .get_settings           = usbnet_get_settings,
-       .set_settings           = usbnet_set_settings,
        .nway_reset             = usbnet_nway_reset,
+       .get_link_ksettings     = usbnet_get_link_ksettings,
+       .set_link_ksettings     = usbnet_set_link_ksettings,
 };
 
 static int ax88772_link_reset(struct usbnet *dev)
@@ -591,6 +592,7 @@ static const struct net_device_ops ax88772_netdev_ops = {
        .ndo_start_xmit         = usbnet_start_xmit,
        .ndo_tx_timeout         = usbnet_tx_timeout,
        .ndo_change_mtu         = usbnet_change_mtu,
+       .ndo_get_stats64        = usbnet_get_stats64,
        .ndo_set_mac_address    = asix_set_mac_address,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_do_ioctl           = asix_ioctl,
@@ -775,9 +777,9 @@ static const struct ethtool_ops ax88178_ethtool_ops = {
        .get_eeprom_len         = asix_get_eeprom_len,
        .get_eeprom             = asix_get_eeprom,
        .set_eeprom             = asix_set_eeprom,
-       .get_settings           = usbnet_get_settings,
-       .set_settings           = usbnet_set_settings,
        .nway_reset             = usbnet_nway_reset,
+       .get_link_ksettings     = usbnet_get_link_ksettings,
+       .set_link_ksettings     = usbnet_set_link_ksettings,
 };
 
 static int marvell_phy_init(struct usbnet *dev)
@@ -1044,6 +1046,7 @@ static const struct net_device_ops ax88178_netdev_ops = {
        .ndo_stop               = usbnet_stop,
        .ndo_start_xmit         = usbnet_start_xmit,
        .ndo_tx_timeout         = usbnet_tx_timeout,
+       .ndo_get_stats64        = usbnet_get_stats64,
        .ndo_set_mac_address    = asix_set_mac_address,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_set_rx_mode        = asix_set_multicast,
index 6308386b09dfeafcb12c7912b06e8acf6aaff69b..501576f538546392381471da43f0d2897df243bd 100644 (file)
@@ -143,6 +143,7 @@ static const struct net_device_ops ax88172a_netdev_ops = {
        .ndo_start_xmit         = usbnet_start_xmit,
        .ndo_tx_timeout         = usbnet_tx_timeout,
        .ndo_change_mtu         = usbnet_change_mtu,
+       .ndo_get_stats64        = usbnet_get_stats64,
        .ndo_set_mac_address    = asix_set_mac_address,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_do_ioctl           = ax88172a_ioctl,
index a3a7db0702d8d7ece5d4999ce2426bef316f0e06..51cf60092a18e33924f52c568d91a33f30828b21 100644 (file)
@@ -620,16 +620,18 @@ ax88179_get_eeprom(struct net_device *net, struct ethtool_eeprom *eeprom,
        return 0;
 }
 
-static int ax88179_get_settings(struct net_device *net, struct ethtool_cmd *cmd)
+static int ax88179_get_link_ksettings(struct net_device *net,
+                                     struct ethtool_link_ksettings *cmd)
 {
        struct usbnet *dev = netdev_priv(net);
-       return mii_ethtool_gset(&dev->mii, cmd);
+       return mii_ethtool_get_link_ksettings(&dev->mii, cmd);
 }
 
-static int ax88179_set_settings(struct net_device *net, struct ethtool_cmd *cmd)
+static int ax88179_set_link_ksettings(struct net_device *net,
+                                     const struct ethtool_link_ksettings *cmd)
 {
        struct usbnet *dev = netdev_priv(net);
-       return mii_ethtool_sset(&dev->mii, cmd);
+       return mii_ethtool_set_link_ksettings(&dev->mii, cmd);
 }
 
 static int
@@ -826,11 +828,11 @@ static const struct ethtool_ops ax88179_ethtool_ops = {
        .set_wol                = ax88179_set_wol,
        .get_eeprom_len         = ax88179_get_eeprom_len,
        .get_eeprom             = ax88179_get_eeprom,
-       .get_settings           = ax88179_get_settings,
-       .set_settings           = ax88179_set_settings,
        .get_eee                = ax88179_get_eee,
        .set_eee                = ax88179_set_eee,
        .nway_reset             = usbnet_nway_reset,
+       .get_link_ksettings     = ax88179_get_link_ksettings,
+       .set_link_ksettings     = ax88179_set_link_ksettings,
 };
 
 static void ax88179_set_multicast(struct net_device *net)
@@ -957,6 +959,7 @@ static const struct net_device_ops ax88179_netdev_ops = {
        .ndo_stop               = usbnet_stop,
        .ndo_start_xmit         = usbnet_start_xmit,
        .ndo_tx_timeout         = usbnet_tx_timeout,
+       .ndo_get_stats64        = usbnet_get_stats64,
        .ndo_change_mtu         = ax88179_change_mtu,
        .ndo_set_mac_address    = ax88179_set_mac_addr,
        .ndo_validate_addr      = eth_validate_addr,
index 0acc9b640419a2e94bc9a2d3d43a5a2a65800c8b..fce92f0e5abd56ba863a297ebb9eb6ea25ff37b1 100644 (file)
@@ -688,29 +688,34 @@ static void catc_get_drvinfo(struct net_device *dev,
        usb_make_path(catc->usbdev, info->bus_info, sizeof(info->bus_info));
 }
 
-static int catc_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int catc_get_link_ksettings(struct net_device *dev,
+                                  struct ethtool_link_ksettings *cmd)
 {
        struct catc *catc = netdev_priv(dev);
        if (!catc->is_f5u011)
                return -EOPNOTSUPP;
 
-       cmd->supported = SUPPORTED_10baseT_Half | SUPPORTED_TP;
-       cmd->advertising = ADVERTISED_10baseT_Half | ADVERTISED_TP;
-       ethtool_cmd_speed_set(cmd, SPEED_10);
-       cmd->duplex = DUPLEX_HALF;
-       cmd->port = PORT_TP; 
-       cmd->phy_address = 0;
-       cmd->transceiver = XCVR_INTERNAL;
-       cmd->autoneg = AUTONEG_DISABLE;
-       cmd->maxtxpkt = 1;
-       cmd->maxrxpkt = 1;
+       ethtool_link_ksettings_zero_link_mode(cmd, supported);
+       ethtool_link_ksettings_add_link_mode(cmd, supported, 10baseT_Half);
+       ethtool_link_ksettings_add_link_mode(cmd, supported, TP);
+
+       ethtool_link_ksettings_zero_link_mode(cmd, advertising);
+       ethtool_link_ksettings_add_link_mode(cmd, advertising, 10baseT_Half);
+       ethtool_link_ksettings_add_link_mode(cmd, advertising, TP);
+
+       cmd->base.speed = SPEED_10;
+       cmd->base.duplex = DUPLEX_HALF;
+       cmd->base.port = PORT_TP;
+       cmd->base.phy_address = 0;
+       cmd->base.autoneg = AUTONEG_DISABLE;
+
        return 0;
 }
 
 static const struct ethtool_ops ops = {
        .get_drvinfo = catc_get_drvinfo,
-       .get_settings = catc_get_settings,
-       .get_link = ethtool_op_get_link
+       .get_link = ethtool_op_get_link,
+       .get_link_ksettings = catc_get_link_ksettings,
 };
 
 /*
index 3a98f3762a4c81debc023d04b9c01876c6cbdc5f..a6b997cffd3b6e4c4f039a3d3dee5f386bb17e48 100644 (file)
@@ -100,6 +100,7 @@ static const struct net_device_ops cdc_mbim_netdev_ops = {
        .ndo_stop             = usbnet_stop,
        .ndo_start_xmit       = usbnet_start_xmit,
        .ndo_tx_timeout       = usbnet_tx_timeout,
+       .ndo_get_stats64      = usbnet_get_stats64,
        .ndo_change_mtu       = cdc_ncm_change_mtu,
        .ndo_set_mac_address  = eth_mac_addr,
        .ndo_validate_addr    = eth_validate_addr,
index f317984f75360141ee606b8ca31b78baa853a1d5..bb3f71f9fbde06ef1a8d514dbb856c8aa7ff5130 100644 (file)
@@ -131,8 +131,6 @@ static void cdc_ncm_get_strings(struct net_device __always_unused *netdev, u32 s
 static void cdc_ncm_update_rxtx_max(struct usbnet *dev, u32 new_rx, u32 new_tx);
 
 static const struct ethtool_ops cdc_ncm_ethtool_ops = {
-       .get_settings      = usbnet_get_settings,
-       .set_settings      = usbnet_set_settings,
        .get_link          = usbnet_get_link,
        .nway_reset        = usbnet_nway_reset,
        .get_drvinfo       = usbnet_get_drvinfo,
@@ -142,6 +140,8 @@ static const struct ethtool_ops cdc_ncm_ethtool_ops = {
        .get_sset_count    = cdc_ncm_get_sset_count,
        .get_strings       = cdc_ncm_get_strings,
        .get_ethtool_stats = cdc_ncm_get_ethtool_stats,
+       .get_link_ksettings      = usbnet_get_link_ksettings,
+       .set_link_ksettings      = usbnet_set_link_ksettings,
 };
 
 static u32 cdc_ncm_check_rx_max(struct usbnet *dev, u32 new_rx)
@@ -753,6 +753,7 @@ static const struct net_device_ops cdc_ncm_netdev_ops = {
        .ndo_stop            = usbnet_stop,
        .ndo_start_xmit      = usbnet_start_xmit,
        .ndo_tx_timeout      = usbnet_tx_timeout,
+       .ndo_get_stats64     = usbnet_get_stats64,
        .ndo_change_mtu      = cdc_ncm_change_mtu,
        .ndo_set_mac_address = eth_mac_addr,
        .ndo_validate_addr   = eth_validate_addr,
index 0b4bdd39106b0a73e954070a42ac87be22ac1821..b91f92e4e5f22d659d89c35d8accc6ef03191b74 100644 (file)
@@ -281,9 +281,9 @@ static const struct ethtool_ops dm9601_ethtool_ops = {
        .set_msglevel   = usbnet_set_msglevel,
        .get_eeprom_len = dm9601_get_eeprom_len,
        .get_eeprom     = dm9601_get_eeprom,
-       .get_settings   = usbnet_get_settings,
-       .set_settings   = usbnet_set_settings,
        .nway_reset     = usbnet_nway_reset,
+       .get_link_ksettings     = usbnet_get_link_ksettings,
+       .set_link_ksettings     = usbnet_set_link_ksettings,
 };
 
 static void dm9601_set_multicast(struct net_device *net)
@@ -343,6 +343,7 @@ static const struct net_device_ops dm9601_netdev_ops = {
        .ndo_start_xmit         = usbnet_start_xmit,
        .ndo_tx_timeout         = usbnet_tx_timeout,
        .ndo_change_mtu         = usbnet_change_mtu,
+       .ndo_get_stats64        = usbnet_get_stats64,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_do_ioctl           = dm9601_ioctl,
        .ndo_set_rx_mode        = dm9601_set_multicast,
index 4ff70b22c6eec0e0516373b02b66c71fae158028..5a43b77a6b9c60ef4f2b667ae0ffaa86aa65d4bd 100644 (file)
@@ -144,6 +144,7 @@ static const struct net_device_ops int51x1_netdev_ops = {
        .ndo_start_xmit         = usbnet_start_xmit,
        .ndo_tx_timeout         = usbnet_tx_timeout,
        .ndo_change_mtu         = usbnet_change_mtu,
+       .ndo_get_stats64        = usbnet_get_stats64,
        .ndo_set_mac_address    = eth_mac_addr,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_set_rx_mode        = int51x1_set_multicast,
index 9889a70ff4f6fece5bfabbfb45a3470f721a5a32..a17e32bf5f924c11d18db475ae144aaafa3566ab 100644 (file)
@@ -29,6 +29,7 @@
 #include <linux/ip.h>
 #include <linux/ipv6.h>
 #include <linux/mdio.h>
+#include <linux/phy.h>
 #include <net/ip6_checksum.h>
 #include <linux/interrupt.h>
 #include <linux/irqdomain.h>
@@ -1952,10 +1953,10 @@ static int lan8835_fixup(struct phy_device *phydev)
        struct lan78xx_net *dev = netdev_priv(phydev->attached_dev);
 
        /* LED2/PME_N/IRQ_N/RGMII_ID pin to IRQ_N mode */
-       buf = phy_read_mmd_indirect(phydev, 0x8010, 3);
+       buf = phy_read_mmd(phydev, MDIO_MMD_PCS, 0x8010);
        buf &= ~0x1800;
        buf |= 0x0800;
-       phy_write_mmd_indirect(phydev, 0x8010, 3, buf);
+       phy_write_mmd(phydev, MDIO_MMD_PCS, 0x8010, buf);
 
        /* RGMII MAC TXC Delay Enable */
        ret = lan78xx_write_reg(dev, MAC_RGMII_ID,
@@ -1975,11 +1976,11 @@ static int ksz9031rnx_fixup(struct phy_device *phydev)
 
        /* Micrel9301RNX PHY configuration */
        /* RGMII Control Signal Pad Skew */
-       phy_write_mmd_indirect(phydev, 4, 2, 0x0077);
+       phy_write_mmd(phydev, MDIO_MMD_WIS, 4, 0x0077);
        /* RGMII RX Data Pad Skew */
-       phy_write_mmd_indirect(phydev, 5, 2, 0x7777);
+       phy_write_mmd(phydev, MDIO_MMD_WIS, 5, 0x7777);
        /* RGMII RX Clock Pad Skew */
-       phy_write_mmd_indirect(phydev, 8, 2, 0x1FF);
+       phy_write_mmd(phydev, MDIO_MMD_WIS, 8, 0x1FF);
 
        dev->interface = PHY_INTERFACE_MODE_RGMII_RXID;
 
index 4f345bd4e6e29558daf29c3d472d2c0768c3202f..5a47e5510ca8243eed8f12cc86a4ec70ac42658e 100644 (file)
@@ -464,9 +464,9 @@ static const struct ethtool_ops mcs7830_ethtool_ops = {
        .get_link               = usbnet_get_link,
        .get_msglevel           = usbnet_get_msglevel,
        .set_msglevel           = usbnet_set_msglevel,
-       .get_settings           = usbnet_get_settings,
-       .set_settings           = usbnet_set_settings,
        .nway_reset             = usbnet_nway_reset,
+       .get_link_ksettings     = usbnet_get_link_ksettings,
+       .set_link_ksettings     = usbnet_set_link_ksettings,
 };
 
 static const struct net_device_ops mcs7830_netdev_ops = {
@@ -475,6 +475,7 @@ static const struct net_device_ops mcs7830_netdev_ops = {
        .ndo_start_xmit         = usbnet_start_xmit,
        .ndo_tx_timeout         = usbnet_tx_timeout,
        .ndo_change_mtu         = usbnet_change_mtu,
+       .ndo_get_stats64        = usbnet_get_stats64,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_do_ioctl           = mcs7830_ioctl,
        .ndo_set_rx_mode        = mcs7830_set_multicast,
index 36674484c6fb9b73011619824f7bc60c50b9c1ad..321e059e13ae698d6173d0b34fd1bd538f04966b 100644 (file)
@@ -953,20 +953,22 @@ static inline void pegasus_reset_wol(struct net_device *dev)
 }
 
 static int
-pegasus_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
+pegasus_get_link_ksettings(struct net_device *dev,
+                          struct ethtool_link_ksettings *ecmd)
 {
        pegasus_t *pegasus;
 
        pegasus = netdev_priv(dev);
-       mii_ethtool_gset(&pegasus->mii, ecmd);
+       mii_ethtool_get_link_ksettings(&pegasus->mii, ecmd);
        return 0;
 }
 
 static int
-pegasus_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
+pegasus_set_link_ksettings(struct net_device *dev,
+                          const struct ethtool_link_ksettings *ecmd)
 {
        pegasus_t *pegasus = netdev_priv(dev);
-       return mii_ethtool_sset(&pegasus->mii, ecmd);
+       return mii_ethtool_set_link_ksettings(&pegasus->mii, ecmd);
 }
 
 static int pegasus_nway_reset(struct net_device *dev)
@@ -995,14 +997,14 @@ static void pegasus_set_msglevel(struct net_device *dev, u32 v)
 
 static const struct ethtool_ops ops = {
        .get_drvinfo = pegasus_get_drvinfo,
-       .get_settings = pegasus_get_settings,
-       .set_settings = pegasus_set_settings,
        .nway_reset = pegasus_nway_reset,
        .get_link = pegasus_get_link,
        .get_msglevel = pegasus_get_msglevel,
        .set_msglevel = pegasus_set_msglevel,
        .get_wol = pegasus_get_wol,
        .set_wol = pegasus_set_wol,
+       .get_link_ksettings = pegasus_get_link_ksettings,
+       .set_link_ksettings = pegasus_set_link_ksettings,
 };
 
 static int pegasus_ioctl(struct net_device *net, struct ifreq *rq, int cmd)
index 8056745506832867165f03ae0b24c2f1a578d849..adbed261cc8aed15138fb6090258988c6c112a85 100644 (file)
@@ -58,12 +58,198 @@ struct qmi_wwan_state {
 
 enum qmi_wwan_flags {
        QMI_WWAN_FLAG_RAWIP = 1 << 0,
+       QMI_WWAN_FLAG_MUX = 1 << 1,
 };
 
 enum qmi_wwan_quirks {
        QMI_WWAN_QUIRK_DTR = 1 << 0,    /* needs "set DTR" request */
 };
 
+struct qmimux_hdr {
+       u8 pad;
+       u8 mux_id;
+       __be16 pkt_len;
+};
+
+struct qmimux_priv {
+       struct net_device *real_dev;
+       u8 mux_id;
+};
+
+static int qmimux_open(struct net_device *dev)
+{
+       struct qmimux_priv *priv = netdev_priv(dev);
+       struct net_device *real_dev = priv->real_dev;
+
+       if (!(priv->real_dev->flags & IFF_UP))
+               return -ENETDOWN;
+
+       if (netif_carrier_ok(real_dev))
+               netif_carrier_on(dev);
+       return 0;
+}
+
+static int qmimux_stop(struct net_device *dev)
+{
+       netif_carrier_off(dev);
+       return 0;
+}
+
+static netdev_tx_t qmimux_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+       struct qmimux_priv *priv = netdev_priv(dev);
+       unsigned int len = skb->len;
+       struct qmimux_hdr *hdr;
+
+       hdr = (struct qmimux_hdr *)skb_push(skb, sizeof(struct qmimux_hdr));
+       hdr->pad = 0;
+       hdr->mux_id = priv->mux_id;
+       hdr->pkt_len = cpu_to_be16(len);
+       skb->dev = priv->real_dev;
+       return dev_queue_xmit(skb);
+}
+
+static const struct net_device_ops qmimux_netdev_ops = {
+       .ndo_open       = qmimux_open,
+       .ndo_stop       = qmimux_stop,
+       .ndo_start_xmit = qmimux_start_xmit,
+};
+
+static void qmimux_setup(struct net_device *dev)
+{
+       dev->header_ops      = NULL;  /* No header */
+       dev->type            = ARPHRD_NONE;
+       dev->hard_header_len = 0;
+       dev->addr_len        = 0;
+       dev->flags           = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
+       dev->netdev_ops      = &qmimux_netdev_ops;
+       dev->destructor      = free_netdev;
+}
+
+static struct net_device *qmimux_find_dev(struct usbnet *dev, u8 mux_id)
+{
+       struct qmimux_priv *priv;
+       struct list_head *iter;
+       struct net_device *ldev;
+
+       rcu_read_lock();
+       netdev_for_each_upper_dev_rcu(dev->net, ldev, iter) {
+               priv = netdev_priv(ldev);
+               if (priv->mux_id == mux_id) {
+                       rcu_read_unlock();
+                       return ldev;
+               }
+       }
+       rcu_read_unlock();
+       return NULL;
+}
+
+static bool qmimux_has_slaves(struct usbnet *dev)
+{
+       return !list_empty(&dev->net->adj_list.upper);
+}
+
+static int qmimux_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
+{
+       unsigned int len, offset = sizeof(struct qmimux_hdr);
+       struct qmimux_hdr *hdr;
+       struct net_device *net;
+       struct sk_buff *skbn;
+
+       while (offset < skb->len) {
+               hdr = (struct qmimux_hdr *)skb->data;
+               len = be16_to_cpu(hdr->pkt_len);
+
+               /* drop the packet, bogus length */
+               if (offset + len > skb->len)
+                       return 0;
+
+               /* control packet, we do not know what to do */
+               if (hdr->pad & 0x80)
+                       goto skip;
+
+               net = qmimux_find_dev(dev, hdr->mux_id);
+               if (!net)
+                       goto skip;
+               skbn = netdev_alloc_skb(net, len);
+               if (!skbn)
+                       return 0;
+               skbn->dev = net;
+
+               switch (skb->data[offset] & 0xf0) {
+               case 0x40:
+                       skbn->protocol = htons(ETH_P_IP);
+                       break;
+               case 0x60:
+                       skbn->protocol = htons(ETH_P_IPV6);
+                       break;
+               default:
+                       /* not ip - do not know what to do */
+                       goto skip;
+               }
+
+               memcpy(skb_put(skbn, len), skb->data + offset, len);
+               if (netif_rx(skbn) != NET_RX_SUCCESS)
+                       return 0;
+
+skip:
+               offset += len + sizeof(struct qmimux_hdr);
+       }
+       return 1;
+}
+
+static int qmimux_register_device(struct net_device *real_dev, u8 mux_id)
+{
+       struct net_device *new_dev;
+       struct qmimux_priv *priv;
+       int err;
+
+       new_dev = alloc_netdev(sizeof(struct qmimux_priv),
+                              "qmimux%d", NET_NAME_UNKNOWN, qmimux_setup);
+       if (!new_dev)
+               return -ENOBUFS;
+
+       dev_net_set(new_dev, dev_net(real_dev));
+       priv = netdev_priv(new_dev);
+       priv->mux_id = mux_id;
+       priv->real_dev = real_dev;
+
+       err = register_netdevice(new_dev);
+       if (err < 0)
+               goto out_free_newdev;
+
+       /* Account for reference in struct qmimux_priv_priv */
+       dev_hold(real_dev);
+
+       err = netdev_upper_dev_link(real_dev, new_dev);
+       if (err)
+               goto out_unregister_netdev;
+
+       netif_stacked_transfer_operstate(real_dev, new_dev);
+
+       return 0;
+
+out_unregister_netdev:
+       unregister_netdevice(new_dev);
+       dev_put(real_dev);
+
+out_free_newdev:
+       free_netdev(new_dev);
+       return err;
+}
+
+static void qmimux_unregister_device(struct net_device *dev)
+{
+       struct qmimux_priv *priv = netdev_priv(dev);
+       struct net_device *real_dev = priv->real_dev;
+
+       netdev_upper_dev_unlink(real_dev, dev);
+       unregister_netdevice(dev);
+
+       /* Get rid of the reference to real_dev */
+       dev_put(real_dev);
+}
+
 static void qmi_wwan_netdev_setup(struct net_device *net)
 {
        struct usbnet *dev = netdev_priv(net);
@@ -137,10 +323,114 @@ err:
        return ret;
 }
 
+static ssize_t add_mux_show(struct device *d, struct device_attribute *attr, char *buf)
+{
+       struct net_device *dev = to_net_dev(d);
+       struct qmimux_priv *priv;
+       struct list_head *iter;
+       struct net_device *ldev;
+       ssize_t count = 0;
+
+       rcu_read_lock();
+       netdev_for_each_upper_dev_rcu(dev, ldev, iter) {
+               priv = netdev_priv(ldev);
+               count += scnprintf(&buf[count], PAGE_SIZE - count,
+                                  "0x%02x\n", priv->mux_id);
+       }
+       rcu_read_unlock();
+       return count;
+}
+
+static ssize_t add_mux_store(struct device *d,  struct device_attribute *attr, const char *buf, size_t len)
+{
+       struct usbnet *dev = netdev_priv(to_net_dev(d));
+       struct qmi_wwan_state *info = (void *)&dev->data;
+       u8 mux_id;
+       int ret;
+
+       if (kstrtou8(buf, 0, &mux_id))
+               return -EINVAL;
+
+       /* mux_id [1 - 0x7f] range empirically found */
+       if (mux_id < 1 || mux_id > 0x7f)
+               return -EINVAL;
+
+       if (!rtnl_trylock())
+               return restart_syscall();
+
+       if (qmimux_find_dev(dev, mux_id)) {
+               netdev_err(dev->net, "mux_id already present\n");
+               ret = -EINVAL;
+               goto err;
+       }
+
+       /* we don't want to modify a running netdev */
+       if (netif_running(dev->net)) {
+               netdev_err(dev->net, "Cannot change a running device\n");
+               ret = -EBUSY;
+               goto err;
+       }
+
+       ret = qmimux_register_device(dev->net, mux_id);
+       if (!ret) {
+               info->flags |= QMI_WWAN_FLAG_MUX;
+               ret = len;
+       }
+err:
+       rtnl_unlock();
+       return ret;
+}
+
+static ssize_t del_mux_show(struct device *d, struct device_attribute *attr, char *buf)
+{
+       return add_mux_show(d, attr, buf);
+}
+
+static ssize_t del_mux_store(struct device *d,  struct device_attribute *attr, const char *buf, size_t len)
+{
+       struct usbnet *dev = netdev_priv(to_net_dev(d));
+       struct qmi_wwan_state *info = (void *)&dev->data;
+       struct net_device *del_dev;
+       u8 mux_id;
+       int ret = 0;
+
+       if (kstrtou8(buf, 0, &mux_id))
+               return -EINVAL;
+
+       if (!rtnl_trylock())
+               return restart_syscall();
+
+       /* we don't want to modify a running netdev */
+       if (netif_running(dev->net)) {
+               netdev_err(dev->net, "Cannot change a running device\n");
+               ret = -EBUSY;
+               goto err;
+       }
+
+       del_dev = qmimux_find_dev(dev, mux_id);
+       if (!del_dev) {
+               netdev_err(dev->net, "mux_id not present\n");
+               ret = -EINVAL;
+               goto err;
+       }
+       qmimux_unregister_device(del_dev);
+
+       if (!qmimux_has_slaves(dev))
+               info->flags &= ~QMI_WWAN_FLAG_MUX;
+       ret = len;
+err:
+       rtnl_unlock();
+       return ret;
+}
+
 static DEVICE_ATTR_RW(raw_ip);
+static DEVICE_ATTR_RW(add_mux);
+static DEVICE_ATTR_RW(del_mux);
 
 static struct attribute *qmi_wwan_sysfs_attrs[] = {
        &dev_attr_raw_ip.attr,
+       &dev_attr_add_mux.attr,
+       &dev_attr_del_mux.attr,
        NULL,
 };
 
@@ -184,6 +474,9 @@ static int qmi_wwan_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
        if (skb->len < dev->net->hard_header_len)
                return 0;
 
+       if (info->flags & QMI_WWAN_FLAG_MUX)
+               return qmimux_rx_fixup(dev, skb);
+
        switch (skb->data[0] & 0xf0) {
        case 0x40:
                proto = htons(ETH_P_IP);
@@ -249,6 +542,7 @@ static const struct net_device_ops qmi_wwan_netdev_ops = {
        .ndo_start_xmit         = usbnet_start_xmit,
        .ndo_tx_timeout         = usbnet_tx_timeout,
        .ndo_change_mtu         = usbnet_change_mtu,
+       .ndo_get_stats64        = usbnet_get_stats64,
        .ndo_set_mac_address    = qmi_wwan_mac_addr,
        .ndo_validate_addr      = eth_validate_addr,
 };
@@ -580,6 +874,10 @@ static const struct usb_device_id products[] = {
                USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, USB_CLASS_VENDOR_SPEC, 0x01, 0x69),
                .driver_info        = (unsigned long)&qmi_wwan_info,
        },
+       {       /* Motorola Mapphone devices with MDM6600 */
+               USB_VENDOR_AND_INTERFACE_INFO(0x22b8, USB_CLASS_VENDOR_SPEC, 0xfb, 0xff),
+               .driver_info        = (unsigned long)&qmi_wwan_info,
+       },
 
        /* 2. Combined interface devices matching on class+protocol */
        {       /* Huawei E367 and possibly others in "Windows mode" */
@@ -925,6 +1223,8 @@ static const struct usb_device_id products[] = {
        {QMI_FIXED_INTF(0x413c, 0x81a9, 8)},    /* Dell Wireless 5808e Gobi(TM) 4G LTE Mobile Broadband Card */
        {QMI_FIXED_INTF(0x413c, 0x81b1, 8)},    /* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card */
        {QMI_FIXED_INTF(0x413c, 0x81b3, 8)},    /* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card (rev3) */
+       {QMI_FIXED_INTF(0x413c, 0x81b6, 8)},    /* Dell Wireless 5811e */
+       {QMI_FIXED_INTF(0x413c, 0x81b6, 10)},   /* Dell Wireless 5811e */
        {QMI_FIXED_INTF(0x03f0, 0x4e1d, 8)},    /* HP lt4111 LTE/EV-DO/HSPA+ Gobi 4G Module */
        {QMI_FIXED_INTF(0x22de, 0x9061, 3)},    /* WeTelecom WPD-600N */
        {QMI_FIXED_INTF(0x1e0e, 0x9001, 5)},    /* SIMCom 7230E */
@@ -1030,11 +1330,33 @@ static int qmi_wwan_probe(struct usb_interface *intf,
        return usbnet_probe(intf, id);
 }
 
+static void qmi_wwan_disconnect(struct usb_interface *intf)
+{
+       struct usbnet *dev = usb_get_intfdata(intf);
+       struct qmi_wwan_state *info = (void *)&dev->data;
+       struct list_head *iter;
+       struct net_device *ldev;
+
+       if (info->flags & QMI_WWAN_FLAG_MUX) {
+               if (!rtnl_trylock()) {
+                       restart_syscall();
+                       return;
+               }
+               rcu_read_lock();
+               netdev_for_each_upper_dev_rcu(dev->net, ldev, iter)
+                       qmimux_unregister_device(ldev);
+               rcu_read_unlock();
+               rtnl_unlock();
+               info->flags &= ~QMI_WWAN_FLAG_MUX;
+       }
+       usbnet_disconnect(intf);
+}
+
 static struct usb_driver qmi_wwan_driver = {
        .name                 = "qmi_wwan",
        .id_table             = products,
        .probe                = qmi_wwan_probe,
-       .disconnect           = usbnet_disconnect,
+       .disconnect           = qmi_wwan_disconnect,
        .suspend              = qmi_wwan_suspend,
        .resume               = qmi_wwan_resume,
        .reset_resume         = qmi_wwan_resume,
index 986243c932ccd6fe19c592805c1c63274f5e5555..4deced102f729475dd78a013fa897a24fb5bc81f 100644 (file)
@@ -32,7 +32,7 @@
 #define NETNEXT_VERSION                "08"
 
 /* Information for net */
-#define NET_VERSION            "8"
+#define NET_VERSION            "9"
 
 #define DRIVER_VERSION         "v1." NETNEXT_VERSION "." NET_VERSION
 #define DRIVER_AUTHOR "Realtek linux nic maintainers <nic_swsd@realtek.com>"
@@ -501,6 +501,8 @@ enum rtl_register_content {
 #define RTL8153_RMS            RTL8153_MAX_PACKET
 #define RTL8152_TX_TIMEOUT     (5 * HZ)
 #define RTL8152_NAPI_WEIGHT    64
+#define rx_reserved_size(x)    ((x) + VLAN_ETH_HLEN + CRC_SIZE + \
+                                sizeof(struct rx_desc) + RX_ALIGN)
 
 /* rtl8152 flags */
 enum rtl8152_flags {
@@ -1362,6 +1364,7 @@ static int alloc_all_mem(struct r8152 *tp)
        spin_lock_init(&tp->rx_lock);
        spin_lock_init(&tp->tx_lock);
        INIT_LIST_HEAD(&tp->tx_free);
+       INIT_LIST_HEAD(&tp->rx_done);
        skb_queue_head_init(&tp->tx_queue);
        skb_queue_head_init(&tp->rx_queue);
 
@@ -1761,6 +1764,7 @@ static int rx_bottom(struct r8152 *tp, int budget)
        unsigned long flags;
        struct list_head *cursor, *next, rx_queue;
        int ret = 0, work_done = 0;
+       struct napi_struct *napi = &tp->napi;
 
        if (!skb_queue_empty(&tp->rx_queue)) {
                while (work_done < budget) {
@@ -1773,7 +1777,7 @@ static int rx_bottom(struct r8152 *tp, int budget)
                                break;
 
                        pkt_len = skb->len;
-                       napi_gro_receive(&tp->napi, skb);
+                       napi_gro_receive(napi, skb);
                        work_done++;
                        stats->rx_packets++;
                        stats->rx_bytes += pkt_len;
@@ -1823,7 +1827,7 @@ static int rx_bottom(struct r8152 *tp, int budget)
                        pkt_len -= CRC_SIZE;
                        rx_data += sizeof(struct rx_desc);
 
-                       skb = napi_alloc_skb(&tp->napi, pkt_len);
+                       skb = napi_alloc_skb(napi, pkt_len);
                        if (!skb) {
                                stats->rx_dropped++;
                                goto find_next_rx;
@@ -1835,7 +1839,7 @@ static int rx_bottom(struct r8152 *tp, int budget)
                        skb->protocol = eth_type_trans(skb, netdev);
                        rtl_rx_vlan_tag(rx_desc, skb);
                        if (work_done < budget) {
-                               napi_gro_receive(&tp->napi, skb);
+                               napi_gro_receive(napi, skb);
                                work_done++;
                                stats->rx_packets++;
                                stats->rx_bytes += pkt_len;
@@ -2252,8 +2256,7 @@ static void r8153_set_rx_early_timeout(struct r8152 *tp)
 
 static void r8153_set_rx_early_size(struct r8152 *tp)
 {
-       u32 mtu = tp->netdev->mtu;
-       u32 ocp_data = (agg_buf_sz - mtu - VLAN_ETH_HLEN - VLAN_HLEN) / 8;
+       u32 ocp_data = (agg_buf_sz - rx_reserved_size(tp->netdev->mtu)) / 4;
 
        ocp_write_word(tp, MCU_TYPE_USB, USB_RX_EARLY_SIZE, ocp_data);
 }
@@ -2898,7 +2901,8 @@ static void r8153_first_init(struct r8152 *tp)
 
        rtl_rx_vlan_en(tp, tp->netdev->features & NETIF_F_HW_VLAN_CTAG_RX);
 
-       ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, RTL8153_RMS);
+       ocp_data = tp->netdev->mtu + VLAN_ETH_HLEN + CRC_SIZE;
+       ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, ocp_data);
        ocp_write_byte(tp, MCU_TYPE_PLA, PLA_MTPS, MTPS_JUMBO);
 
        ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_TCR0);
@@ -2950,7 +2954,8 @@ static void r8153_enter_oob(struct r8152 *tp)
                usleep_range(1000, 2000);
        }
 
-       ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, RTL8153_RMS);
+       ocp_data = tp->netdev->mtu + VLAN_ETH_HLEN + CRC_SIZE;
+       ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, ocp_data);
 
        ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_TEREDO_CFG);
        ocp_data &= ~TEREDO_WAKE_MASK;
@@ -3150,6 +3155,7 @@ static bool rtl8153_in_nway(struct r8152 *tp)
 static void set_carrier(struct r8152 *tp)
 {
        struct net_device *netdev = tp->netdev;
+       struct napi_struct *napi = &tp->napi;
        u8 speed;
 
        speed = rtl8152_get_speed(tp);
@@ -3159,7 +3165,7 @@ static void set_carrier(struct r8152 *tp)
                        tp->rtl_ops.enable(tp);
                        set_bit(RTL8152_SET_RX_MODE, &tp->flags);
                        netif_stop_queue(netdev);
-                       napi_disable(&tp->napi);
+                       napi_disable(napi);
                        netif_carrier_on(netdev);
                        rtl_start_rx(tp);
                        napi_enable(&tp->napi);
@@ -3169,9 +3175,9 @@ static void set_carrier(struct r8152 *tp)
        } else {
                if (netif_carrier_ok(netdev)) {
                        netif_carrier_off(netdev);
-                       napi_disable(&tp->napi);
+                       napi_disable(napi);
                        tp->rtl_ops.disable(tp);
-                       napi_enable(&tp->napi);
+                       napi_enable(napi);
                        netif_info(tp, link, netdev, "carrier off\n");
                }
        }
@@ -3633,11 +3639,13 @@ static int rtl8152_runtime_suspend(struct r8152 *tp)
                tp->rtl_ops.autosuspend_en(tp, true);
 
                if (netif_carrier_ok(netdev)) {
-                       napi_disable(&tp->napi);
+                       struct napi_struct *napi = &tp->napi;
+
+                       napi_disable(napi);
                        rtl_stop_rx(tp);
                        rxdy_gated_en(tp, false);
                        ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, rcr);
-                       napi_enable(&tp->napi);
+                       napi_enable(napi);
                }
        }
 
@@ -3653,12 +3661,14 @@ static int rtl8152_system_suspend(struct r8152 *tp)
        netif_device_detach(netdev);
 
        if (netif_running(netdev) && test_bit(WORK_ENABLE, &tp->flags)) {
+               struct napi_struct *napi = &tp->napi;
+
                clear_bit(WORK_ENABLE, &tp->flags);
                usb_kill_urb(tp->intr_urb);
-               napi_disable(&tp->napi);
+               napi_disable(napi);
                cancel_delayed_work_sync(&tp->schedule);
                tp->rtl_ops.down(tp);
-               napi_enable(&tp->napi);
+               napi_enable(napi);
        }
 
        return ret;
@@ -3684,35 +3694,38 @@ static int rtl8152_suspend(struct usb_interface *intf, pm_message_t message)
 static int rtl8152_resume(struct usb_interface *intf)
 {
        struct r8152 *tp = usb_get_intfdata(intf);
+       struct net_device *netdev = tp->netdev;
 
        mutex_lock(&tp->control);
 
        if (!test_bit(SELECTIVE_SUSPEND, &tp->flags)) {
                tp->rtl_ops.init(tp);
                queue_delayed_work(system_long_wq, &tp->hw_phy_work, 0);
-               netif_device_attach(tp->netdev);
+               netif_device_attach(netdev);
        }
 
-       if (netif_running(tp->netdev) && tp->netdev->flags & IFF_UP) {
+       if (netif_running(netdev) && netdev->flags & IFF_UP) {
                if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) {
+                       struct napi_struct *napi = &tp->napi;
+
                        tp->rtl_ops.autosuspend_en(tp, false);
-                       napi_disable(&tp->napi);
+                       napi_disable(napi);
                        set_bit(WORK_ENABLE, &tp->flags);
-                       if (netif_carrier_ok(tp->netdev))
+                       if (netif_carrier_ok(netdev))
                                rtl_start_rx(tp);
-                       napi_enable(&tp->napi);
+                       napi_enable(napi);
                        clear_bit(SELECTIVE_SUSPEND, &tp->flags);
                        smp_mb__after_atomic();
                        if (!list_empty(&tp->rx_done))
                                napi_schedule(&tp->napi);
                } else {
                        tp->rtl_ops.up(tp);
-                       netif_carrier_off(tp->netdev);
+                       netif_carrier_off(netdev);
                        set_bit(WORK_ENABLE, &tp->flags);
                }
                usb_submit_urb(tp->intr_urb, GFP_KERNEL);
        } else if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) {
-               if (tp->netdev->flags & IFF_UP)
+               if (netdev->flags & IFF_UP)
                        tp->rtl_ops.autosuspend_en(tp, false);
                clear_bit(SELECTIVE_SUSPEND, &tp->flags);
        }
@@ -3800,7 +3813,8 @@ static void rtl8152_get_drvinfo(struct net_device *netdev,
 }
 
 static
-int rtl8152_get_settings(struct net_device *netdev, struct ethtool_cmd *cmd)
+int rtl8152_get_link_ksettings(struct net_device *netdev,
+                              struct ethtool_link_ksettings *cmd)
 {
        struct r8152 *tp = netdev_priv(netdev);
        int ret;
@@ -3814,7 +3828,7 @@ int rtl8152_get_settings(struct net_device *netdev, struct ethtool_cmd *cmd)
 
        mutex_lock(&tp->control);
 
-       ret = mii_ethtool_gset(&tp->mii, cmd);
+       ret = mii_ethtool_get_link_ksettings(&tp->mii, cmd);
 
        mutex_unlock(&tp->control);
 
@@ -3824,7 +3838,8 @@ out:
        return ret;
 }
 
-static int rtl8152_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int rtl8152_set_link_ksettings(struct net_device *dev,
+                                     const struct ethtool_link_ksettings *cmd)
 {
        struct r8152 *tp = netdev_priv(dev);
        int ret;
@@ -3835,11 +3850,12 @@ static int rtl8152_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 
        mutex_lock(&tp->control);
 
-       ret = rtl8152_set_speed(tp, cmd->autoneg, cmd->speed, cmd->duplex);
+       ret = rtl8152_set_speed(tp, cmd->base.autoneg, cmd->base.speed,
+                               cmd->base.duplex);
        if (!ret) {
-               tp->autoneg = cmd->autoneg;
-               tp->speed = cmd->speed;
-               tp->duplex = cmd->duplex;
+               tp->autoneg = cmd->base.autoneg;
+               tp->speed = cmd->base.speed;
+               tp->duplex = cmd->base.duplex;
        }
 
        mutex_unlock(&tp->control);
@@ -4117,8 +4133,6 @@ static int rtl8152_set_coalesce(struct net_device *netdev,
 
 static const struct ethtool_ops ops = {
        .get_drvinfo = rtl8152_get_drvinfo,
-       .get_settings = rtl8152_get_settings,
-       .set_settings = rtl8152_set_settings,
        .get_link = ethtool_op_get_link,
        .nway_reset = rtl8152_nway_reset,
        .get_msglevel = rtl8152_get_msglevel,
@@ -4132,6 +4146,8 @@ static const struct ethtool_ops ops = {
        .set_coalesce = rtl8152_set_coalesce,
        .get_eee = rtl_ethtool_get_eee,
        .set_eee = rtl_ethtool_set_eee,
+       .get_link_ksettings = rtl8152_get_link_ksettings,
+       .set_link_ksettings = rtl8152_set_link_ksettings,
 };
 
 static int rtl8152_ioctl(struct net_device *netdev, struct ifreq *rq, int cmd)
@@ -4200,8 +4216,14 @@ static int rtl8152_change_mtu(struct net_device *dev, int new_mtu)
 
        dev->mtu = new_mtu;
 
-       if (netif_running(dev) && netif_carrier_ok(dev))
-               r8153_set_rx_early_size(tp);
+       if (netif_running(dev)) {
+               u32 rms = new_mtu + VLAN_ETH_HLEN + CRC_SIZE;
+
+               ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, rms);
+
+               if (netif_carrier_ok(dev))
+                       r8153_set_rx_early_size(tp);
+       }
 
        mutex_unlock(&tp->control);
 
@@ -4224,44 +4246,6 @@ static const struct net_device_ops rtl8152_netdev_ops = {
        .ndo_features_check     = rtl8152_features_check,
 };
 
-static void r8152b_get_version(struct r8152 *tp)
-{
-       u32     ocp_data;
-       u16     version;
-
-       ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_TCR1);
-       version = (u16)(ocp_data & VERSION_MASK);
-
-       switch (version) {
-       case 0x4c00:
-               tp->version = RTL_VER_01;
-               break;
-       case 0x4c10:
-               tp->version = RTL_VER_02;
-               break;
-       case 0x5c00:
-               tp->version = RTL_VER_03;
-               tp->mii.supports_gmii = 1;
-               break;
-       case 0x5c10:
-               tp->version = RTL_VER_04;
-               tp->mii.supports_gmii = 1;
-               break;
-       case 0x5c20:
-               tp->version = RTL_VER_05;
-               tp->mii.supports_gmii = 1;
-               break;
-       case 0x5c30:
-               tp->version = RTL_VER_06;
-               tp->mii.supports_gmii = 1;
-               break;
-       default:
-               netif_info(tp, probe, tp->netdev,
-                          "Unknown version 0x%04x\n", version);
-               break;
-       }
-}
-
 static void rtl8152_unload(struct r8152 *tp)
 {
        if (test_bit(RTL8152_UNPLUG, &tp->flags))
@@ -4326,14 +4310,66 @@ static int rtl_ops_init(struct r8152 *tp)
        return ret;
 }
 
+static u8 rtl_get_version(struct usb_interface *intf)
+{
+       struct usb_device *udev = interface_to_usbdev(intf);
+       u32 ocp_data = 0;
+       __le32 *tmp;
+       u8 version;
+       int ret;
+
+       tmp = kmalloc(sizeof(*tmp), GFP_KERNEL);
+       if (!tmp)
+               return 0;
+
+       ret = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0),
+                             RTL8152_REQ_GET_REGS, RTL8152_REQT_READ,
+                             PLA_TCR0, MCU_TYPE_PLA, tmp, sizeof(*tmp), 500);
+       if (ret > 0)
+               ocp_data = (__le32_to_cpu(*tmp) >> 16) & VERSION_MASK;
+
+       kfree(tmp);
+
+       switch (ocp_data) {
+       case 0x4c00:
+               version = RTL_VER_01;
+               break;
+       case 0x4c10:
+               version = RTL_VER_02;
+               break;
+       case 0x5c00:
+               version = RTL_VER_03;
+               break;
+       case 0x5c10:
+               version = RTL_VER_04;
+               break;
+       case 0x5c20:
+               version = RTL_VER_05;
+               break;
+       case 0x5c30:
+               version = RTL_VER_06;
+               break;
+       default:
+               version = RTL_VER_UNKNOWN;
+               dev_info(&intf->dev, "Unknown version 0x%04x\n", ocp_data);
+               break;
+       }
+
+       return version;
+}
+
 static int rtl8152_probe(struct usb_interface *intf,
                         const struct usb_device_id *id)
 {
        struct usb_device *udev = interface_to_usbdev(intf);
+       u8 version = rtl_get_version(intf);
        struct r8152 *tp;
        struct net_device *netdev;
        int ret;
 
+       if (version == RTL_VER_UNKNOWN)
+               return -ENODEV;
+
        if (udev->actconfig->desc.bConfigurationValue != 1) {
                usb_driver_set_configuration(udev, 1);
                return -ENODEV;
@@ -4353,8 +4389,18 @@ static int rtl8152_probe(struct usb_interface *intf,
        tp->udev = udev;
        tp->netdev = netdev;
        tp->intf = intf;
+       tp->version = version;
+
+       switch (version) {
+       case RTL_VER_01:
+       case RTL_VER_02:
+               tp->mii.supports_gmii = 0;
+               break;
+       default:
+               tp->mii.supports_gmii = 1;
+               break;
+       }
 
-       r8152b_get_version(tp);
        ret = rtl_ops_init(tp);
        if (ret)
                goto out;
index c5b21138b7eb1723579528532deb1b7ea9c2aa72..e96e2e5673d724ea5e009b686ca32afaf921190f 100644 (file)
@@ -291,6 +291,7 @@ static const struct net_device_ops rndis_netdev_ops = {
        .ndo_stop               = usbnet_stop,
        .ndo_start_xmit         = usbnet_start_xmit,
        .ndo_tx_timeout         = usbnet_tx_timeout,
+       .ndo_get_stats64        = usbnet_get_stats64,
        .ndo_set_mac_address    = eth_mac_addr,
        .ndo_validate_addr      = eth_validate_addr,
 };
index c81c79110cefca9443d614679d8e7cdd4b3295c3..daaa88a66f401fbe834b126d54125b79bbcfbad4 100644 (file)
@@ -791,47 +791,52 @@ static void rtl8150_get_drvinfo(struct net_device *netdev, struct ethtool_drvinf
        usb_make_path(dev->udev, info->bus_info, sizeof(info->bus_info));
 }
 
-static int rtl8150_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
+static int rtl8150_get_link_ksettings(struct net_device *netdev,
+                                     struct ethtool_link_ksettings *ecmd)
 {
        rtl8150_t *dev = netdev_priv(netdev);
        short lpa, bmcr;
+       u32 supported;
 
-       ecmd->supported = (SUPPORTED_10baseT_Half |
+       supported = (SUPPORTED_10baseT_Half |
                          SUPPORTED_10baseT_Full |
                          SUPPORTED_100baseT_Half |
                          SUPPORTED_100baseT_Full |
                          SUPPORTED_Autoneg |
                          SUPPORTED_TP | SUPPORTED_MII);
-       ecmd->port = PORT_TP;
-       ecmd->transceiver = XCVR_INTERNAL;
-       ecmd->phy_address = dev->phy;
+       ecmd->base.port = PORT_TP;
+       ecmd->base.phy_address = dev->phy;
        get_registers(dev, BMCR, 2, &bmcr);
        get_registers(dev, ANLP, 2, &lpa);
        if (bmcr & BMCR_ANENABLE) {
                u32 speed = ((lpa & (LPA_100HALF | LPA_100FULL)) ?
                             SPEED_100 : SPEED_10);
-               ethtool_cmd_speed_set(ecmd, speed);
-               ecmd->autoneg = AUTONEG_ENABLE;
+               ecmd->base.speed = speed;
+               ecmd->base.autoneg = AUTONEG_ENABLE;
                if (speed == SPEED_100)
-                       ecmd->duplex = (lpa & LPA_100FULL) ?
+                       ecmd->base.duplex = (lpa & LPA_100FULL) ?
                            DUPLEX_FULL : DUPLEX_HALF;
                else
-                       ecmd->duplex = (lpa & LPA_10FULL) ?
+                       ecmd->base.duplex = (lpa & LPA_10FULL) ?
                            DUPLEX_FULL : DUPLEX_HALF;
        } else {
-               ecmd->autoneg = AUTONEG_DISABLE;
-               ethtool_cmd_speed_set(ecmd, ((bmcr & BMCR_SPEED100) ?
-                                            SPEED_100 : SPEED_10));
-               ecmd->duplex = (bmcr & BMCR_FULLDPLX) ?
+               ecmd->base.autoneg = AUTONEG_DISABLE;
+               ecmd->base.speed = ((bmcr & BMCR_SPEED100) ?
+                                            SPEED_100 : SPEED_10);
+               ecmd->base.duplex = (bmcr & BMCR_FULLDPLX) ?
                    DUPLEX_FULL : DUPLEX_HALF;
        }
+
+       ethtool_convert_legacy_u32_to_link_mode(ecmd->link_modes.supported,
+                                               supported);
+
        return 0;
 }
 
 static const struct ethtool_ops ops = {
        .get_drvinfo = rtl8150_get_drvinfo,
-       .get_settings = rtl8150_get_settings,
-       .get_link = ethtool_op_get_link
+       .get_link = ethtool_op_get_link,
+       .get_link_ksettings = rtl8150_get_link_ksettings,
 };
 
 static int rtl8150_ioctl(struct net_device *netdev, struct ifreq *rq, int cmd)
index ac69f28d92d2360ddf3fc4ace031046db8d7d39d..2110ab3513f0446a9a090a83de66b0044b0791c9 100644 (file)
@@ -199,6 +199,7 @@ static const struct net_device_ops sierra_net_device_ops = {
        .ndo_start_xmit         = usbnet_start_xmit,
        .ndo_tx_timeout         = usbnet_tx_timeout,
        .ndo_change_mtu         = usbnet_change_mtu,
+       .ndo_get_stats64        = usbnet_get_stats64,
        .ndo_set_mac_address    = eth_mac_addr,
        .ndo_validate_addr      = eth_validate_addr,
 };
@@ -648,9 +649,9 @@ static const struct ethtool_ops sierra_net_ethtool_ops = {
        .get_link = sierra_net_get_link,
        .get_msglevel = usbnet_get_msglevel,
        .set_msglevel = usbnet_set_msglevel,
-       .get_settings = usbnet_get_settings,
-       .set_settings = usbnet_set_settings,
        .nway_reset = usbnet_nway_reset,
+       .get_link_ksettings = usbnet_get_link_ksettings,
+       .set_link_ksettings = usbnet_set_link_ksettings,
 };
 
 static int sierra_net_get_fw_attr(struct usbnet *dev, u16 *datap)
index 0b17b40d7a4fa2653caf21406c4a6b3b45d868b0..1ce01dbd494f3581236a3afcd71e145a5eb6cb52 100644 (file)
@@ -743,13 +743,13 @@ static const struct ethtool_ops smsc75xx_ethtool_ops = {
        .get_drvinfo    = usbnet_get_drvinfo,
        .get_msglevel   = usbnet_get_msglevel,
        .set_msglevel   = usbnet_set_msglevel,
-       .get_settings   = usbnet_get_settings,
-       .set_settings   = usbnet_set_settings,
        .get_eeprom_len = smsc75xx_ethtool_get_eeprom_len,
        .get_eeprom     = smsc75xx_ethtool_get_eeprom,
        .set_eeprom     = smsc75xx_ethtool_set_eeprom,
        .get_wol        = smsc75xx_ethtool_get_wol,
        .set_wol        = smsc75xx_ethtool_set_wol,
+       .get_link_ksettings     = usbnet_get_link_ksettings,
+       .set_link_ksettings     = usbnet_set_link_ksettings,
 };
 
 static int smsc75xx_ioctl(struct net_device *netdev, struct ifreq *rq, int cmd)
@@ -1381,6 +1381,7 @@ static const struct net_device_ops smsc75xx_netdev_ops = {
        .ndo_stop               = usbnet_stop,
        .ndo_start_xmit         = usbnet_start_xmit,
        .ndo_tx_timeout         = usbnet_tx_timeout,
+       .ndo_get_stats64        = usbnet_get_stats64,
        .ndo_change_mtu         = smsc75xx_change_mtu,
        .ndo_set_mac_address    = eth_mac_addr,
        .ndo_validate_addr      = eth_validate_addr,
index 831aa33d078ae7d2dd57fdded5de71d1eb915f99..c2f67cecdf5bfcd4d7b07566c8aaaf7ec14269e2 100644 (file)
@@ -853,32 +853,32 @@ static void set_mdix_status(struct net_device *net, __u8 mdix_ctrl)
        pdata->mdix_ctrl = mdix_ctrl;
 }
 
-static int smsc95xx_get_settings(struct net_device *net,
-                                struct ethtool_cmd *cmd)
+static int smsc95xx_get_link_ksettings(struct net_device *net,
+                                      struct ethtool_link_ksettings *cmd)
 {
        struct usbnet *dev = netdev_priv(net);
        struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]);
        int retval;
 
-       retval = usbnet_get_settings(net, cmd);
+       retval = usbnet_get_link_ksettings(net, cmd);
 
-       cmd->eth_tp_mdix = pdata->mdix_ctrl;
-       cmd->eth_tp_mdix_ctrl = pdata->mdix_ctrl;
+       cmd->base.eth_tp_mdix = pdata->mdix_ctrl;
+       cmd->base.eth_tp_mdix_ctrl = pdata->mdix_ctrl;
 
        return retval;
 }
 
-static int smsc95xx_set_settings(struct net_device *net,
-                                struct ethtool_cmd *cmd)
+static int smsc95xx_set_link_ksettings(struct net_device *net,
+                                      const struct ethtool_link_ksettings *cmd)
 {
        struct usbnet *dev = netdev_priv(net);
        struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]);
        int retval;
 
-       if (pdata->mdix_ctrl != cmd->eth_tp_mdix_ctrl)
-               set_mdix_status(net, cmd->eth_tp_mdix_ctrl);
+       if (pdata->mdix_ctrl != cmd->base.eth_tp_mdix_ctrl)
+               set_mdix_status(net, cmd->base.eth_tp_mdix_ctrl);
 
-       retval = usbnet_set_settings(net, cmd);
+       retval = usbnet_set_link_ksettings(net, cmd);
 
        return retval;
 }
@@ -889,8 +889,6 @@ static const struct ethtool_ops smsc95xx_ethtool_ops = {
        .get_drvinfo    = usbnet_get_drvinfo,
        .get_msglevel   = usbnet_get_msglevel,
        .set_msglevel   = usbnet_set_msglevel,
-       .get_settings   = smsc95xx_get_settings,
-       .set_settings   = smsc95xx_set_settings,
        .get_eeprom_len = smsc95xx_ethtool_get_eeprom_len,
        .get_eeprom     = smsc95xx_ethtool_get_eeprom,
        .set_eeprom     = smsc95xx_ethtool_set_eeprom,
@@ -898,6 +896,8 @@ static const struct ethtool_ops smsc95xx_ethtool_ops = {
        .get_regs       = smsc95xx_ethtool_getregs,
        .get_wol        = smsc95xx_ethtool_get_wol,
        .set_wol        = smsc95xx_ethtool_set_wol,
+       .get_link_ksettings     = smsc95xx_get_link_ksettings,
+       .set_link_ksettings     = smsc95xx_set_link_ksettings,
 };
 
 static int smsc95xx_ioctl(struct net_device *netdev, struct ifreq *rq, int cmd)
@@ -1248,6 +1248,7 @@ static const struct net_device_ops smsc95xx_netdev_ops = {
        .ndo_start_xmit         = usbnet_start_xmit,
        .ndo_tx_timeout         = usbnet_tx_timeout,
        .ndo_change_mtu         = usbnet_change_mtu,
+       .ndo_get_stats64        = usbnet_get_stats64,
        .ndo_set_mac_address    = eth_mac_addr,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_do_ioctl           = smsc95xx_ioctl,
index 4a1e9c489f1f455388ffee289d65e1d6b36cba42..317287f4409c840638f2ef30df36754805fb2410 100644 (file)
@@ -249,9 +249,9 @@ static const struct ethtool_ops sr9700_ethtool_ops = {
        .set_msglevel   = usbnet_set_msglevel,
        .get_eeprom_len = sr9700_get_eeprom_len,
        .get_eeprom     = sr9700_get_eeprom,
-       .get_settings   = usbnet_get_settings,
-       .set_settings   = usbnet_set_settings,
        .nway_reset     = usbnet_nway_reset,
+       .get_link_ksettings     = usbnet_get_link_ksettings,
+       .set_link_ksettings     = usbnet_set_link_ksettings,
 };
 
 static void sr9700_set_multicast(struct net_device *netdev)
@@ -308,6 +308,7 @@ static const struct net_device_ops sr9700_netdev_ops = {
        .ndo_start_xmit         = usbnet_start_xmit,
        .ndo_tx_timeout         = usbnet_tx_timeout,
        .ndo_change_mtu         = usbnet_change_mtu,
+       .ndo_get_stats64        = usbnet_get_stats64,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_do_ioctl           = sr9700_ioctl,
        .ndo_set_rx_mode        = sr9700_set_multicast,
index a50df0d8fb9abbd548ad6646e4a066a1211363c5..9277a0f228dfa6de355c74d2652edcf2fb1d2f4b 100644 (file)
@@ -524,9 +524,9 @@ static const struct ethtool_ops sr9800_ethtool_ops = {
        .set_wol        = sr_set_wol,
        .get_eeprom_len = sr_get_eeprom_len,
        .get_eeprom     = sr_get_eeprom,
-       .get_settings   = usbnet_get_settings,
-       .set_settings   = usbnet_set_settings,
        .nway_reset     = usbnet_nway_reset,
+       .get_link_ksettings     = usbnet_get_link_ksettings,
+       .set_link_ksettings     = usbnet_set_link_ksettings,
 };
 
 static int sr9800_link_reset(struct usbnet *dev)
@@ -679,6 +679,7 @@ static const struct net_device_ops sr9800_netdev_ops = {
        .ndo_start_xmit         = usbnet_start_xmit,
        .ndo_tx_timeout         = usbnet_tx_timeout,
        .ndo_change_mtu         = usbnet_change_mtu,
+       .ndo_get_stats64        = usbnet_get_stats64,
        .ndo_set_mac_address    = sr_set_mac_address,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_do_ioctl           = sr_ioctl,
index 3de65ea6531a8add927c0a2d7c74e8923c0f3274..9890656af7358b6b8c8edb2d208627230cbc63ef 100644 (file)
@@ -316,6 +316,7 @@ static void __usbnet_status_stop_force(struct usbnet *dev)
  */
 void usbnet_skb_return (struct usbnet *dev, struct sk_buff *skb)
 {
+       struct pcpu_sw_netstats *stats64 = this_cpu_ptr(dev->stats64);
        int     status;
 
        if (test_bit(EVENT_RX_PAUSED, &dev->flags)) {
@@ -327,8 +328,10 @@ void usbnet_skb_return (struct usbnet *dev, struct sk_buff *skb)
        if (skb->protocol == 0)
                skb->protocol = eth_type_trans (skb, dev->net);
 
-       dev->net->stats.rx_packets++;
-       dev->net->stats.rx_bytes += skb->len;
+       u64_stats_update_begin(&stats64->syncp);
+       stats64->rx_packets++;
+       stats64->rx_bytes += skb->len;
+       u64_stats_update_end(&stats64->syncp);
 
        netif_dbg(dev, rx_status, dev->net, "< rx, len %zu, type 0x%x\n",
                  skb->len + sizeof (struct ethhdr), skb->protocol);
@@ -947,18 +950,20 @@ EXPORT_SYMBOL_GPL(usbnet_open);
  * they'll probably want to use this base set.
  */
 
-int usbnet_get_settings (struct net_device *net, struct ethtool_cmd *cmd)
+int usbnet_get_link_ksettings(struct net_device *net,
+                             struct ethtool_link_ksettings *cmd)
 {
        struct usbnet *dev = netdev_priv(net);
 
        if (!dev->mii.mdio_read)
                return -EOPNOTSUPP;
 
-       return mii_ethtool_gset(&dev->mii, cmd);
+       return mii_ethtool_get_link_ksettings(&dev->mii, cmd);
 }
-EXPORT_SYMBOL_GPL(usbnet_get_settings);
+EXPORT_SYMBOL_GPL(usbnet_get_link_ksettings);
 
-int usbnet_set_settings (struct net_device *net, struct ethtool_cmd *cmd)
+int usbnet_set_link_ksettings(struct net_device *net,
+                             const struct ethtool_link_ksettings *cmd)
 {
        struct usbnet *dev = netdev_priv(net);
        int retval;
@@ -966,7 +971,7 @@ int usbnet_set_settings (struct net_device *net, struct ethtool_cmd *cmd)
        if (!dev->mii.mdio_write)
                return -EOPNOTSUPP;
 
-       retval = mii_ethtool_sset(&dev->mii, cmd);
+       retval = mii_ethtool_set_link_ksettings(&dev->mii, cmd);
 
        /* link speed/duplex might have changed */
        if (dev->driver_info->link_reset)
@@ -976,9 +981,39 @@ int usbnet_set_settings (struct net_device *net, struct ethtool_cmd *cmd)
        usbnet_update_max_qlen(dev);
 
        return retval;
+}
+EXPORT_SYMBOL_GPL(usbnet_set_link_ksettings);
 
+void usbnet_get_stats64(struct net_device *net, struct rtnl_link_stats64 *stats)
+{
+       struct usbnet *dev = netdev_priv(net);
+       unsigned int start;
+       int cpu;
+
+       netdev_stats_to_stats64(stats, &net->stats);
+
+       for_each_possible_cpu(cpu) {
+               struct pcpu_sw_netstats *stats64;
+               u64 rx_packets, rx_bytes;
+               u64 tx_packets, tx_bytes;
+
+               stats64 = per_cpu_ptr(dev->stats64, cpu);
+
+               do {
+                       start = u64_stats_fetch_begin_irq(&stats64->syncp);
+                       rx_packets = stats64->rx_packets;
+                       rx_bytes = stats64->rx_bytes;
+                       tx_packets = stats64->tx_packets;
+                       tx_bytes = stats64->tx_bytes;
+               } while (u64_stats_fetch_retry_irq(&stats64->syncp, start));
+
+               stats->rx_packets += rx_packets;
+               stats->rx_bytes += rx_bytes;
+               stats->tx_packets += tx_packets;
+               stats->tx_bytes += tx_bytes;
+       }
 }
-EXPORT_SYMBOL_GPL(usbnet_set_settings);
+EXPORT_SYMBOL_GPL(usbnet_get_stats64);
 
 u32 usbnet_get_link (struct net_device *net)
 {
@@ -1038,14 +1073,14 @@ EXPORT_SYMBOL_GPL(usbnet_set_msglevel);
 
 /* drivers may override default ethtool_ops in their bind() routine */
 static const struct ethtool_ops usbnet_ethtool_ops = {
-       .get_settings           = usbnet_get_settings,
-       .set_settings           = usbnet_set_settings,
        .get_link               = usbnet_get_link,
        .nway_reset             = usbnet_nway_reset,
        .get_drvinfo            = usbnet_get_drvinfo,
        .get_msglevel           = usbnet_get_msglevel,
        .set_msglevel           = usbnet_set_msglevel,
        .get_ts_info            = ethtool_op_get_ts_info,
+       .get_link_ksettings     = usbnet_get_link_ksettings,
+       .set_link_ksettings     = usbnet_set_link_ksettings,
 };
 
 /*-------------------------------------------------------------------------*/
@@ -1211,8 +1246,12 @@ static void tx_complete (struct urb *urb)
        struct usbnet           *dev = entry->dev;
 
        if (urb->status == 0) {
-               dev->net->stats.tx_packets += entry->packets;
-               dev->net->stats.tx_bytes += entry->length;
+               struct pcpu_sw_netstats *stats64 = this_cpu_ptr(dev->stats64);
+
+               u64_stats_update_begin(&stats64->syncp);
+               stats64->tx_packets += entry->packets;
+               stats64->tx_bytes += entry->length;
+               u64_stats_update_end(&stats64->syncp);
        } else {
                dev->net->stats.tx_errors++;
 
@@ -1569,6 +1608,7 @@ void usbnet_disconnect (struct usb_interface *intf)
        usb_free_urb(dev->interrupt);
        kfree(dev->padding_pkt);
 
+       free_percpu(dev->stats64);
        free_netdev(net);
 }
 EXPORT_SYMBOL_GPL(usbnet_disconnect);
@@ -1580,6 +1620,7 @@ static const struct net_device_ops usbnet_netdev_ops = {
        .ndo_tx_timeout         = usbnet_tx_timeout,
        .ndo_set_rx_mode        = usbnet_set_rx_mode,
        .ndo_change_mtu         = usbnet_change_mtu,
+       .ndo_get_stats64        = usbnet_get_stats64,
        .ndo_set_mac_address    = eth_mac_addr,
        .ndo_validate_addr      = eth_validate_addr,
 };
@@ -1641,6 +1682,11 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod)
        dev->intf = udev;
        dev->driver_info = info;
        dev->driver_name = name;
+
+       dev->stats64 = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+       if (!dev->stats64)
+               goto out0;
+
        dev->msg_enable = netif_msg_init (msg_level, NETIF_MSG_DRV
                                | NETIF_MSG_PROBE | NETIF_MSG_LINK);
        init_waitqueue_head(&dev->wait);
@@ -1780,6 +1826,8 @@ out1:
         */
        cancel_work_sync(&dev->kevent);
        del_timer_sync(&dev->delay);
+       free_percpu(dev->stats64);
+out0:
        free_netdev(net);
 out:
        return status;
index 8c39d6d690e5e7f8ea6e522b8eb01a2db550c61a..317103680675f97eb056e650d5adc07fe51f67e2 100644 (file)
@@ -45,18 +45,13 @@ static struct {
        { "peer_ifindex" },
 };
 
-static int veth_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int veth_get_link_ksettings(struct net_device *dev,
+                                  struct ethtool_link_ksettings *cmd)
 {
-       cmd->supported          = 0;
-       cmd->advertising        = 0;
-       ethtool_cmd_speed_set(cmd, SPEED_10000);
-       cmd->duplex             = DUPLEX_FULL;
-       cmd->port               = PORT_TP;
-       cmd->phy_address        = 0;
-       cmd->transceiver        = XCVR_INTERNAL;
-       cmd->autoneg            = AUTONEG_DISABLE;
-       cmd->maxtxpkt           = 0;
-       cmd->maxrxpkt           = 0;
+       cmd->base.speed         = SPEED_10000;
+       cmd->base.duplex        = DUPLEX_FULL;
+       cmd->base.port          = PORT_TP;
+       cmd->base.autoneg       = AUTONEG_DISABLE;
        return 0;
 }
 
@@ -95,12 +90,12 @@ static void veth_get_ethtool_stats(struct net_device *dev,
 }
 
 static const struct ethtool_ops veth_ethtool_ops = {
-       .get_settings           = veth_get_settings,
        .get_drvinfo            = veth_get_drvinfo,
        .get_link               = ethtool_op_get_link,
        .get_strings            = veth_get_strings,
        .get_sset_count         = veth_get_sset_count,
        .get_ethtool_stats      = veth_get_ethtool_stats,
+       .get_link_ksettings     = veth_get_link_ksettings,
 };
 
 static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
index ea9890d619670e1abfba75fe608c2925d824cb1c..b0d241d110ec608848ed65054dfd54014e0afb86 100644 (file)
@@ -1636,47 +1636,57 @@ static void virtnet_get_channels(struct net_device *dev,
 }
 
 /* Check if the user is trying to change anything besides speed/duplex */
-static bool virtnet_validate_ethtool_cmd(const struct ethtool_cmd *cmd)
+static bool
+virtnet_validate_ethtool_cmd(const struct ethtool_link_ksettings *cmd)
 {
-       struct ethtool_cmd diff1 = *cmd;
-       struct ethtool_cmd diff2 = {};
+       struct ethtool_link_ksettings diff1 = *cmd;
+       struct ethtool_link_ksettings diff2 = {};
 
        /* cmd is always set so we need to clear it, validate the port type
         * and also without autonegotiation we can ignore advertising
         */
-       ethtool_cmd_speed_set(&diff1, 0);
-       diff2.port = PORT_OTHER;
-       diff1.advertising = 0;
-       diff1.duplex = 0;
-       diff1.cmd = 0;
+       diff1.base.speed = 0;
+       diff2.base.port = PORT_OTHER;
+       ethtool_link_ksettings_zero_link_mode(&diff1, advertising);
+       diff1.base.duplex = 0;
+       diff1.base.cmd = 0;
+       diff1.base.link_mode_masks_nwords = 0;
 
-       return !memcmp(&diff1, &diff2, sizeof(diff1));
+       return !memcmp(&diff1.base, &diff2.base, sizeof(diff1.base)) &&
+               bitmap_empty(diff1.link_modes.supported,
+                            __ETHTOOL_LINK_MODE_MASK_NBITS) &&
+               bitmap_empty(diff1.link_modes.advertising,
+                            __ETHTOOL_LINK_MODE_MASK_NBITS) &&
+               bitmap_empty(diff1.link_modes.lp_advertising,
+                            __ETHTOOL_LINK_MODE_MASK_NBITS);
 }
 
-static int virtnet_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int virtnet_set_link_ksettings(struct net_device *dev,
+                                     const struct ethtool_link_ksettings *cmd)
 {
        struct virtnet_info *vi = netdev_priv(dev);
        u32 speed;
 
-       speed = ethtool_cmd_speed(cmd);
+       speed = cmd->base.speed;
        /* don't allow custom speed and duplex */
        if (!ethtool_validate_speed(speed) ||
-           !ethtool_validate_duplex(cmd->duplex) ||
+           !ethtool_validate_duplex(cmd->base.duplex) ||
            !virtnet_validate_ethtool_cmd(cmd))
                return -EINVAL;
        vi->speed = speed;
-       vi->duplex = cmd->duplex;
+       vi->duplex = cmd->base.duplex;
 
        return 0;
 }
 
-static int virtnet_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int virtnet_get_link_ksettings(struct net_device *dev,
+                                     struct ethtool_link_ksettings *cmd)
 {
        struct virtnet_info *vi = netdev_priv(dev);
 
-       ethtool_cmd_speed_set(cmd, vi->speed);
-       cmd->duplex = vi->duplex;
-       cmd->port = PORT_OTHER;
+       cmd->base.speed = vi->speed;
+       cmd->base.duplex = vi->duplex;
+       cmd->base.port = PORT_OTHER;
 
        return 0;
 }
@@ -1696,8 +1706,8 @@ static const struct ethtool_ops virtnet_ethtool_ops = {
        .set_channels = virtnet_set_channels,
        .get_channels = virtnet_get_channels,
        .get_ts_info = ethtool_op_get_ts_info,
-       .get_settings = virtnet_get_settings,
-       .set_settings = virtnet_set_settings,
+       .get_link_ksettings = virtnet_get_link_ksettings,
+       .set_link_ksettings = virtnet_set_link_ksettings,
 };
 
 static void virtnet_freeze_down(struct virtio_device *vdev)
index f88ffafebfbfd40192fd5919ce970f1bf15b73fc..2ff27314e04739034cef408b59aed6a77cd98911 100644 (file)
@@ -471,22 +471,25 @@ vmxnet3_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
 
 
 static int
-vmxnet3_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
+vmxnet3_get_link_ksettings(struct net_device *netdev,
+                          struct ethtool_link_ksettings *ecmd)
 {
        struct vmxnet3_adapter *adapter = netdev_priv(netdev);
 
-       ecmd->supported = SUPPORTED_10000baseT_Full | SUPPORTED_1000baseT_Full |
-                         SUPPORTED_TP;
-       ecmd->advertising = ADVERTISED_TP;
-       ecmd->port = PORT_TP;
-       ecmd->transceiver = XCVR_INTERNAL;
+       ethtool_link_ksettings_zero_link_mode(ecmd, supported);
+       ethtool_link_ksettings_add_link_mode(ecmd, supported, 10000baseT_Full);
+       ethtool_link_ksettings_add_link_mode(ecmd, supported, 1000baseT_Full);
+       ethtool_link_ksettings_add_link_mode(ecmd, supported, TP);
+       ethtool_link_ksettings_zero_link_mode(ecmd, advertising);
+       ethtool_link_ksettings_add_link_mode(ecmd, advertising, TP);
+       ecmd->base.port = PORT_TP;
 
        if (adapter->link_speed) {
-               ethtool_cmd_speed_set(ecmd, adapter->link_speed);
-               ecmd->duplex = DUPLEX_FULL;
+               ecmd->base.speed = adapter->link_speed;
+               ecmd->base.duplex = DUPLEX_FULL;
        } else {
-               ethtool_cmd_speed_set(ecmd, SPEED_UNKNOWN);
-               ecmd->duplex = DUPLEX_UNKNOWN;
+               ecmd->base.speed = SPEED_UNKNOWN;
+               ecmd->base.duplex = DUPLEX_UNKNOWN;
        }
        return 0;
 }
@@ -880,7 +883,6 @@ done:
 }
 
 static const struct ethtool_ops vmxnet3_ethtool_ops = {
-       .get_settings      = vmxnet3_get_settings,
        .get_drvinfo       = vmxnet3_get_drvinfo,
        .get_regs_len      = vmxnet3_get_regs_len,
        .get_regs          = vmxnet3_get_regs,
@@ -900,6 +902,7 @@ static const struct ethtool_ops vmxnet3_ethtool_ops = {
        .get_rxfh          = vmxnet3_get_rss,
        .set_rxfh          = vmxnet3_set_rss,
 #endif
+       .get_link_ksettings = vmxnet3_get_link_ksettings,
 };
 
 void vmxnet3_set_ethtool_ops(struct net_device *netdev)
index fea687f35b5ac6f373396a860a3ff16a8e59fd66..eb5493e835569f8d95118a5a7f73fa9adac05843 100644 (file)
@@ -104,6 +104,23 @@ static void vrf_get_stats64(struct net_device *dev,
        }
 }
 
+/* by default VRF devices do not have a qdisc and are expected
+ * to be created with only a single queue.
+ */
+static bool qdisc_tx_is_default(const struct net_device *dev)
+{
+       struct netdev_queue *txq;
+       struct Qdisc *qdisc;
+
+       if (dev->num_tx_queues > 1)
+               return false;
+
+       txq = netdev_get_tx_queue(dev, 0);
+       qdisc = rcu_access_pointer(txq->qdisc);
+
+       return !qdisc->enqueue;
+}
+
 /* Local traffic destined to local address. Reinsert the packet to rx
  * path, similar to loopback handling.
  */
@@ -357,6 +374,29 @@ static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev)
        return ret;
 }
 
+static int vrf_finish_direct(struct net *net, struct sock *sk,
+                            struct sk_buff *skb)
+{
+       struct net_device *vrf_dev = skb->dev;
+
+       if (!list_empty(&vrf_dev->ptype_all) &&
+           likely(skb_headroom(skb) >= ETH_HLEN)) {
+               struct ethhdr *eth = (struct ethhdr *)skb_push(skb, ETH_HLEN);
+
+               ether_addr_copy(eth->h_source, vrf_dev->dev_addr);
+               eth_zero_addr(eth->h_dest);
+               eth->h_proto = skb->protocol;
+
+               rcu_read_lock_bh();
+               dev_queue_xmit_nit(skb, vrf_dev);
+               rcu_read_unlock_bh();
+
+               skb_pull(skb, ETH_HLEN);
+       }
+
+       return 1;
+}
+
 #if IS_ENABLED(CONFIG_IPV6)
 /* modelled after ip6_finish_output2 */
 static int vrf_finish_output6(struct net *net, struct sock *sk,
@@ -405,18 +445,13 @@ static int vrf_output6(struct net *net, struct sock *sk, struct sk_buff *skb)
  * packet to go through device based features such as qdisc, netfilter
  * hooks and packet sockets with skb->dev set to vrf device.
  */
-static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev,
-                                  struct sock *sk,
-                                  struct sk_buff *skb)
+static struct sk_buff *vrf_ip6_out_redirect(struct net_device *vrf_dev,
+                                           struct sk_buff *skb)
 {
        struct net_vrf *vrf = netdev_priv(vrf_dev);
        struct dst_entry *dst = NULL;
        struct rt6_info *rt6;
 
-       /* don't divert link scope packets */
-       if (rt6_need_strict(&ipv6_hdr(skb)->daddr))
-               return skb;
-
        rcu_read_lock();
 
        rt6 = rcu_dereference(vrf->rt6);
@@ -438,6 +473,55 @@ static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev,
        return skb;
 }
 
+static int vrf_output6_direct(struct net *net, struct sock *sk,
+                             struct sk_buff *skb)
+{
+       skb->protocol = htons(ETH_P_IPV6);
+
+       return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
+                           net, sk, skb, NULL, skb->dev,
+                           vrf_finish_direct,
+                           !(IPCB(skb)->flags & IPSKB_REROUTED));
+}
+
+static struct sk_buff *vrf_ip6_out_direct(struct net_device *vrf_dev,
+                                         struct sock *sk,
+                                         struct sk_buff *skb)
+{
+       struct net *net = dev_net(vrf_dev);
+       int err;
+
+       skb->dev = vrf_dev;
+
+       err = nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk,
+                     skb, NULL, vrf_dev, vrf_output6_direct);
+
+       if (likely(err == 1))
+               err = vrf_output6_direct(net, sk, skb);
+
+       /* reset skb device */
+       if (likely(err == 1))
+               nf_reset(skb);
+       else
+               skb = NULL;
+
+       return skb;
+}
+
+static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev,
+                                  struct sock *sk,
+                                  struct sk_buff *skb)
+{
+       /* don't divert link scope packets */
+       if (rt6_need_strict(&ipv6_hdr(skb)->daddr))
+               return skb;
+
+       if (qdisc_tx_is_default(vrf_dev))
+               return vrf_ip6_out_direct(vrf_dev, sk, skb);
+
+       return vrf_ip6_out_redirect(vrf_dev, skb);
+}
+
 /* holding rtnl */
 static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf)
 {
@@ -462,8 +546,10 @@ static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf)
        }
 
        if (rt6_local) {
-               if (rt6_local->rt6i_idev)
+               if (rt6_local->rt6i_idev) {
                        in6_dev_put(rt6_local->rt6i_idev);
+                       rt6_local->rt6i_idev = NULL;
+               }
 
                dst = &rt6_local->dst;
                dev_put(dst->dev);
@@ -607,18 +693,13 @@ static int vrf_output(struct net *net, struct sock *sk, struct sk_buff *skb)
  * packet to go through device based features such as qdisc, netfilter
  * hooks and packet sockets with skb->dev set to vrf device.
  */
-static struct sk_buff *vrf_ip_out(struct net_device *vrf_dev,
-                                 struct sock *sk,
-                                 struct sk_buff *skb)
+static struct sk_buff *vrf_ip_out_redirect(struct net_device *vrf_dev,
+                                          struct sk_buff *skb)
 {
        struct net_vrf *vrf = netdev_priv(vrf_dev);
        struct dst_entry *dst = NULL;
        struct rtable *rth;
 
-       /* don't divert multicast */
-       if (ipv4_is_multicast(ip_hdr(skb)->daddr))
-               return skb;
-
        rcu_read_lock();
 
        rth = rcu_dereference(vrf->rth);
@@ -640,6 +721,55 @@ static struct sk_buff *vrf_ip_out(struct net_device *vrf_dev,
        return skb;
 }
 
+static int vrf_output_direct(struct net *net, struct sock *sk,
+                            struct sk_buff *skb)
+{
+       skb->protocol = htons(ETH_P_IP);
+
+       return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
+                           net, sk, skb, NULL, skb->dev,
+                           vrf_finish_direct,
+                           !(IPCB(skb)->flags & IPSKB_REROUTED));
+}
+
+static struct sk_buff *vrf_ip_out_direct(struct net_device *vrf_dev,
+                                        struct sock *sk,
+                                        struct sk_buff *skb)
+{
+       struct net *net = dev_net(vrf_dev);
+       int err;
+
+       skb->dev = vrf_dev;
+
+       err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk,
+                     skb, NULL, vrf_dev, vrf_output_direct);
+
+       if (likely(err == 1))
+               err = vrf_output_direct(net, sk, skb);
+
+       /* reset skb device */
+       if (likely(err == 1))
+               nf_reset(skb);
+       else
+               skb = NULL;
+
+       return skb;
+}
+
+static struct sk_buff *vrf_ip_out(struct net_device *vrf_dev,
+                                 struct sock *sk,
+                                 struct sk_buff *skb)
+{
+       /* don't divert multicast */
+       if (ipv4_is_multicast(ip_hdr(skb)->daddr))
+               return skb;
+
+       if (qdisc_tx_is_default(vrf_dev))
+               return vrf_ip_out_direct(vrf_dev, sk, skb);
+
+       return vrf_ip_out_redirect(vrf_dev, skb);
+}
+
 /* called with rcu lock held */
 static struct sk_buff *vrf_l3_out(struct net_device *vrf_dev,
                                  struct sock *sk,
@@ -747,14 +877,18 @@ static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev)
 {
        int ret;
 
+       port_dev->priv_flags |= IFF_L3MDEV_SLAVE;
        ret = netdev_master_upper_dev_link(port_dev, dev, NULL, NULL);
        if (ret < 0)
-               return ret;
+               goto err;
 
-       port_dev->priv_flags |= IFF_L3MDEV_SLAVE;
        cycle_netdev(port_dev);
 
        return 0;
+
+err:
+       port_dev->priv_flags &= ~IFF_L3MDEV_SLAVE;
+       return ret;
 }
 
 static int vrf_add_slave(struct net_device *dev, struct net_device *port_dev)
@@ -976,9 +1110,11 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev,
                skb->dev = vrf_dev;
                skb->skb_iif = vrf_dev->ifindex;
 
-               skb_push(skb, skb->mac_len);
-               dev_queue_xmit_nit(skb, vrf_dev);
-               skb_pull(skb, skb->mac_len);
+               if (!list_empty(&vrf_dev->ptype_all)) {
+                       skb_push(skb, skb->mac_len);
+                       dev_queue_xmit_nit(skb, vrf_dev);
+                       skb_pull(skb, skb->mac_len);
+               }
 
                IP6CB(skb)->flags |= IP6SKB_L3SLAVE;
        }
@@ -1019,9 +1155,11 @@ static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev,
 
        vrf_rx_stats(vrf_dev, skb->len);
 
-       skb_push(skb, skb->mac_len);
-       dev_queue_xmit_nit(skb, vrf_dev);
-       skb_pull(skb, skb->mac_len);
+       if (!list_empty(&vrf_dev->ptype_all)) {
+               skb_push(skb, skb->mac_len);
+               dev_queue_xmit_nit(skb, vrf_dev);
+               skb_pull(skb, skb->mac_len);
+       }
 
        skb = vrf_rcv_nfhook(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, vrf_dev);
 out:
index bdb6ae16d4a85bf9539199e189011bce104ba51a..ebc98bb17a51088acecc1174b6ea3e39256f6bed 100644 (file)
@@ -276,9 +276,9 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
        send_eth = send_ip = true;
 
        if (type == RTM_GETNEIGH) {
-               ndm->ndm_family = AF_INET;
                send_ip = !vxlan_addr_any(&rdst->remote_ip);
                send_eth = !is_zero_ether_addr(fdb->eth_addr);
+               ndm->ndm_family = send_ip ? rdst->remote_ip.sa.sa_family : AF_INET;
        } else
                ndm->ndm_family = AF_BRIDGE;
        ndm->ndm_state = fdb->state;
@@ -1515,7 +1515,7 @@ static struct sk_buff *vxlan_na_create(struct sk_buff *request,
        int ns_olen;
        int i, len;
 
-       if (dev == NULL)
+       if (dev == NULL || !pskb_may_pull(request, request->len))
                return NULL;
 
        len = LL_RESERVED_SPACE(dev) + sizeof(struct ipv6hdr) +
@@ -1530,10 +1530,11 @@ static struct sk_buff *vxlan_na_create(struct sk_buff *request,
        skb_push(reply, sizeof(struct ethhdr));
        skb_reset_mac_header(reply);
 
-       ns = (struct nd_msg *)skb_transport_header(request);
+       ns = (struct nd_msg *)(ipv6_hdr(request) + 1);
 
        daddr = eth_hdr(request)->h_source;
-       ns_olen = request->len - skb_transport_offset(request) - sizeof(*ns);
+       ns_olen = request->len - skb_network_offset(request) -
+               sizeof(struct ipv6hdr) - sizeof(*ns);
        for (i = 0; i < ns_olen-1; i += (ns->opt[i+1]<<3)) {
                if (ns->opt[i] == ND_OPT_SOURCE_LL_ADDR) {
                        daddr = ns->opt + i + sizeof(struct nd_opt_hdr);
@@ -1604,10 +1605,13 @@ static int neigh_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)
        if (!in6_dev)
                goto out;
 
+       if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + sizeof(struct nd_msg)))
+               goto out;
+
        iphdr = ipv6_hdr(skb);
        daddr = &iphdr->daddr;
 
-       msg = (struct nd_msg *)skb_transport_header(skb);
+       msg = (struct nd_msg *)(iphdr + 1);
        if (msg->icmph.icmp6_code != 0 ||
            msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION)
                goto out;
@@ -2242,16 +2246,13 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
                if (ntohs(eth->h_proto) == ETH_P_ARP)
                        return arp_reduce(dev, skb, vni);
 #if IS_ENABLED(CONFIG_IPV6)
-               else if (ntohs(eth->h_proto) == ETH_P_IPV6 &&
-                        pskb_may_pull(skb, sizeof(struct ipv6hdr)
-                                      + sizeof(struct nd_msg)) &&
-                        ipv6_hdr(skb)->nexthdr == IPPROTO_ICMPV6) {
-                               struct nd_msg *msg;
-
-                               msg = (struct nd_msg *)skb_transport_header(skb);
-                               if (msg->icmph.icmp6_code == 0 &&
-                                   msg->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION)
-                                       return neigh_reduce(dev, skb, vni);
+               else if (ntohs(eth->h_proto) == ETH_P_IPV6) {
+                       struct ipv6hdr *hdr, _hdr;
+                       if ((hdr = skb_header_pointer(skb,
+                                                     skb_network_offset(skb),
+                                                     sizeof(_hdr), &_hdr)) &&
+                           hdr->nexthdr == IPPROTO_ICMPV6)
+                               return neigh_reduce(dev, skb, vni);
                }
 #endif
        }
@@ -2322,6 +2323,9 @@ static void vxlan_cleanup(unsigned long arg)
                        if (f->state & (NUD_PERMANENT | NUD_NOARP))
                                continue;
 
+                       if (f->flags & NTF_EXT_LEARNED)
+                               continue;
+
                        timeout = f->used + vxlan->cfg.age_interval * HZ;
                        if (time_before_eq(timeout, jiffies)) {
                                netdev_dbg(vxlan->dev,
@@ -2923,6 +2927,11 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
                return -EINVAL;
        }
 
+       if (lowerdev) {
+               dev->gso_max_size = lowerdev->gso_max_size;
+               dev->gso_max_segs = lowerdev->gso_max_segs;
+       }
+
        if (conf->mtu) {
                int max_mtu = ETH_MAX_MTU;
 
index 4b83e87f0b9450bf431b48a168901371f1cab33c..20bf967a70b969d6be6e593e7ae04330d1d85daa 100644 (file)
@@ -2,7 +2,7 @@ config WCN36XX
        tristate "Qualcomm Atheros WCN3660/3680 support"
        depends on MAC80211 && HAS_DMA
        depends on QCOM_WCNSS_CTRL || QCOM_WCNSS_CTRL=n
-       depends on QCOM_SMD || QCOM_SMD=n
+       depends on RPMSG || RPMSG=n
        ---help---
          This module adds support for wireless adapters based on
          Qualcomm Atheros WCN3660 and WCN3680 mobile chipsets.
index 7a0c2e7da7f643333506e595d39fd988b535825e..bb7110f7fc8608788ce93db56a990776046497b9 100644 (file)
@@ -22,7 +22,7 @@
 #include <linux/of_address.h>
 #include <linux/of_device.h>
 #include <linux/of_irq.h>
-#include <linux/soc/qcom/smd.h>
+#include <linux/rpmsg.h>
 #include <linux/soc/qcom/smem_state.h>
 #include <linux/soc/qcom/wcnss_ctrl.h>
 #include "wcn36xx.h"
@@ -1218,15 +1218,13 @@ static int wcn36xx_probe(struct platform_device *pdev)
 
        INIT_WORK(&wcn->scan_work, wcn36xx_hw_scan_worker);
 
-       wcn->smd_channel = qcom_wcnss_open_channel(wcnss, "WLAN_CTRL", wcn36xx_smd_rsp_process);
+       wcn->smd_channel = qcom_wcnss_open_channel(wcnss, "WLAN_CTRL", wcn36xx_smd_rsp_process, hw);
        if (IS_ERR(wcn->smd_channel)) {
                wcn36xx_err("failed to open WLAN_CTRL channel\n");
                ret = PTR_ERR(wcn->smd_channel);
                goto out_wq;
        }
 
-       qcom_smd_set_drvdata(wcn->smd_channel, hw);
-
        addr = of_get_property(pdev->dev.of_node, "local-mac-address", &ret);
        if (addr && ret != ETH_ALEN) {
                wcn36xx_err("invalid local-mac-address\n");
index 1c2966f7db7a3f7c41660027444e17f4a908c54d..9c6590d5348ad53f64d130287f51b9fe7ccf95d8 100644 (file)
@@ -19,7 +19,7 @@
 #include <linux/etherdevice.h>
 #include <linux/firmware.h>
 #include <linux/bitops.h>
-#include <linux/soc/qcom/smd.h>
+#include <linux/rpmsg.h>
 #include "smd.h"
 
 struct wcn36xx_cfg_val {
@@ -254,7 +254,7 @@ static int wcn36xx_smd_send_and_wait(struct wcn36xx *wcn, size_t len)
 
        init_completion(&wcn->hal_rsp_compl);
        start = jiffies;
-       ret = qcom_smd_send(wcn->smd_channel, wcn->hal_buf, len);
+       ret = rpmsg_send(wcn->smd_channel, wcn->hal_buf, len);
        if (ret) {
                wcn36xx_err("HAL TX failed\n");
                goto out;
@@ -2205,11 +2205,11 @@ out:
        return ret;
 }
 
-int wcn36xx_smd_rsp_process(struct qcom_smd_channel *channel,
-                           const void *buf, size_t len)
+int wcn36xx_smd_rsp_process(struct rpmsg_device *rpdev,
+                           void *buf, int len, void *priv, u32 addr)
 {
        const struct wcn36xx_hal_msg_header *msg_header = buf;
-       struct ieee80211_hw *hw = qcom_smd_get_drvdata(channel);
+       struct ieee80211_hw *hw = priv;
        struct wcn36xx *wcn = hw->priv;
        struct wcn36xx_hal_ind_msg *msg_ind;
        wcn36xx_dbg_dump(WCN36XX_DBG_SMD_DUMP, "SMD <<< ", buf, len);
index 8892ccd67b144903ae25cde3287e79f951e5a8c6..013fc9546f56d4b48d60a3452b4a72d3204cdaa6 100644 (file)
@@ -51,7 +51,7 @@ struct wcn36xx_hal_ind_msg {
 };
 
 struct wcn36xx;
-struct qcom_smd_channel;
+struct rpmsg_device;
 
 int wcn36xx_smd_open(struct wcn36xx *wcn);
 void wcn36xx_smd_close(struct wcn36xx *wcn);
@@ -129,8 +129,8 @@ int wcn36xx_smd_trigger_ba(struct wcn36xx *wcn, u8 sta_index);
 
 int wcn36xx_smd_update_cfg(struct wcn36xx *wcn, u32 cfg_id, u32 value);
 
-int wcn36xx_smd_rsp_process(struct qcom_smd_channel *channel,
-                           const void *buf, size_t len);
+int wcn36xx_smd_rsp_process(struct rpmsg_device *rpdev,
+                           void *buf, int len, void *priv, u32 addr);
 
 int wcn36xx_smd_set_mc_list(struct wcn36xx *wcn,
                            struct ieee80211_vif *vif,
index 7423998ddeb492cab624134fcb5d363db42093d9..b52b4da9a967bab2ad6b5298f76797f3557e8395 100644 (file)
@@ -195,7 +195,7 @@ struct wcn36xx {
        void __iomem            *ccu_base;
        void __iomem            *dxe_base;
 
-       struct qcom_smd_channel *smd_channel;
+       struct rpmsg_endpoint   *smd_channel;
 
        struct qcom_smem_state  *tx_enable_state;
        unsigned                tx_enable_state_bit;
index 5bc2ba214735af2a8f44394834e1c40c45820487..9b970dc2b922a8dd0e4c4199442f5976f7649798 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/pci_ids.h>
 #include <linux/sched.h>
 #include <linux/completion.h>
+#include <linux/interrupt.h>
 #include <linux/scatterlist.h>
 #include <linux/mmc/sdio.h>
 #include <linux/mmc/core.h>
index 785334f7a5386e82c32cd6fe41dee6d36b37b969..3b68eaffb48c32e709be0cdb0f12f539a06dbfc5 100644 (file)
@@ -3392,6 +3392,7 @@ static const struct net_device_ops rndis_wlan_netdev_ops = {
        .ndo_stop               = usbnet_stop,
        .ndo_start_xmit         = usbnet_start_xmit,
        .ndo_tx_timeout         = usbnet_tx_timeout,
+       .ndo_get_stats64        = usbnet_get_stats64,
        .ndo_set_mac_address    = eth_mac_addr,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_set_rx_mode        = rndis_wlan_set_multicast_list,
index d3acc85932a569024a51d4b9d68b6dbc79dcfec8..709f56e5ad875c534dc44fac64d371c74b88fe1a 100644 (file)
@@ -10,6 +10,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/interrupt.h>
 #include <linux/gpio.h>
 #include <linux/delay.h>
 #include <linux/mmc/host.h>
index 65f86bc24c07c7032726700e09a1d9ef3cdfb3c2..faad69a1a5974b8f76acde68a9e8c8922e27d01e 100644 (file)
@@ -76,7 +76,7 @@ config QCOM_ADSP_PIL
        depends on OF && ARCH_QCOM
        depends on REMOTEPROC
        depends on QCOM_SMEM
-       depends on QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n)
+       depends on RPMSG_QCOM_SMD || (COMPILE_TEST && RPMSG_QCOM_SMD=n)
        select MFD_SYSCON
        select QCOM_MDT_LOADER
        select QCOM_RPROC_COMMON
@@ -93,7 +93,7 @@ config QCOM_Q6V5_PIL
        depends on OF && ARCH_QCOM
        depends on QCOM_SMEM
        depends on REMOTEPROC
-       depends on QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n)
+       depends on RPMSG_QCOM_SMD || (COMPILE_TEST && RPMSG_QCOM_SMD=n)
        select MFD_SYSCON
        select QCOM_RPROC_COMMON
        select QCOM_SCM
@@ -104,7 +104,7 @@ config QCOM_Q6V5_PIL
 config QCOM_WCNSS_PIL
        tristate "Qualcomm WCNSS Peripheral Image Loader"
        depends on OF && ARCH_QCOM
-       depends on QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n)
+       depends on RPMSG_QCOM_SMD || (COMPILE_TEST && RPMSG_QCOM_SMD=n)
        depends on QCOM_SMEM
        depends on REMOTEPROC
        select QCOM_MDT_LOADER
index f12ac0b28263f1dc6ae646554cc5a0b4226a1a2a..edc008f556632b03d182efc2e306601d38519b42 100644 (file)
@@ -16,7 +16,6 @@ config RPMSG_CHAR
 config RPMSG_QCOM_SMD
        tristate "Qualcomm Shared Memory Driver (SMD)"
        depends on QCOM_SMEM
-       depends on QCOM_SMD=n
        select RPMSG
        help
          Say y here to enable support for the Qualcomm Shared Memory Driver
index 4bf55b5d78be53cd0aced1b7e5f2b59fcf93d186..3c52867dfe28e33b04f85858dfdb9285eef11ba2 100644 (file)
@@ -1253,20 +1253,6 @@ config SCSI_LPFC_DEBUG_FS
          This makes debugging information from the lpfc driver
          available via the debugfs filesystem.
 
-config LPFC_NVME_INITIATOR
-       bool "Emulex LightPulse Fibre Channel NVME Initiator Support"
-       depends on SCSI_LPFC && NVME_FC
-       ---help---
-         This enables NVME Initiator support in the Emulex lpfc driver.
-
-config LPFC_NVME_TARGET
-       bool "Emulex LightPulse Fibre Channel NVME Initiator Support"
-       depends on SCSI_LPFC && NVME_TARGET_FC
-       ---help---
-         This enables NVME Target support in the Emulex lpfc driver.
-         Target enablement must still be enabled on a per adapter
-         basis by module parameters.
-
 config SCSI_SIM710
        tristate "Simple 53c710 SCSI support (Compaq, NCR machines)"
        depends on (EISA || MCA) && SCSI
index 524a0c755ed7e74cd790778ec7c04ae452cc853d..0d0be7754a653120a4e08c3897e37c572d3cda49 100644 (file)
@@ -2956,7 +2956,7 @@ static int hpsa_send_reset(struct ctlr_info *h, unsigned char *scsi3addr,
        /* fill_cmd can't fail here, no data buffer to map. */
        (void) fill_cmd(c, reset_type, h, NULL, 0, 0,
                        scsi3addr, TYPE_MSG);
-       rc = hpsa_scsi_do_simple_cmd(h, c, reply_queue, DEFAULT_TIMEOUT);
+       rc = hpsa_scsi_do_simple_cmd(h, c, reply_queue, NO_TIMEOUT);
        if (rc) {
                dev_warn(&h->pdev->dev, "Failed to send reset command\n");
                goto out;
@@ -3714,7 +3714,7 @@ exit_failed:
  *  # (integer code indicating one of several NOT READY states
  *     describing why a volume is to be kept offline)
  */
-static int hpsa_volume_offline(struct ctlr_info *h,
+static unsigned char hpsa_volume_offline(struct ctlr_info *h,
                                        unsigned char scsi3addr[])
 {
        struct CommandList *c;
@@ -3735,7 +3735,7 @@ static int hpsa_volume_offline(struct ctlr_info *h,
                                        DEFAULT_TIMEOUT);
        if (rc) {
                cmd_free(h, c);
-               return 0;
+               return HPSA_VPD_LV_STATUS_UNSUPPORTED;
        }
        sense = c->err_info->SenseInfo;
        if (c->err_info->SenseLen > sizeof(c->err_info->SenseInfo))
@@ -3746,19 +3746,13 @@ static int hpsa_volume_offline(struct ctlr_info *h,
        cmd_status = c->err_info->CommandStatus;
        scsi_status = c->err_info->ScsiStatus;
        cmd_free(h, c);
-       /* Is the volume 'not ready'? */
-       if (cmd_status != CMD_TARGET_STATUS ||
-               scsi_status != SAM_STAT_CHECK_CONDITION ||
-               sense_key != NOT_READY ||
-               asc != ASC_LUN_NOT_READY)  {
-               return 0;
-       }
 
        /* Determine the reason for not ready state */
        ldstat = hpsa_get_volume_status(h, scsi3addr);
 
        /* Keep volume offline in certain cases: */
        switch (ldstat) {
+       case HPSA_LV_FAILED:
        case HPSA_LV_UNDERGOING_ERASE:
        case HPSA_LV_NOT_AVAILABLE:
        case HPSA_LV_UNDERGOING_RPI:
@@ -3780,7 +3774,7 @@ static int hpsa_volume_offline(struct ctlr_info *h,
        default:
                break;
        }
-       return 0;
+       return HPSA_LV_OK;
 }
 
 /*
@@ -3853,10 +3847,10 @@ static int hpsa_update_device_info(struct ctlr_info *h,
        /* Do an inquiry to the device to see what it is. */
        if (hpsa_scsi_do_inquiry(h, scsi3addr, 0, inq_buff,
                (unsigned char) OBDR_TAPE_INQ_SIZE) != 0) {
-               /* Inquiry failed (msg printed already) */
                dev_err(&h->pdev->dev,
-                       "hpsa_update_device_info: inquiry failed\n");
-               rc = -EIO;
+                       "%s: inquiry failed, device will be skipped.\n",
+                       __func__);
+               rc = HPSA_INQUIRY_FAILED;
                goto bail_out;
        }
 
@@ -3885,15 +3879,19 @@ static int hpsa_update_device_info(struct ctlr_info *h,
        if ((this_device->devtype == TYPE_DISK ||
                this_device->devtype == TYPE_ZBC) &&
                is_logical_dev_addr_mode(scsi3addr)) {
-               int volume_offline;
+               unsigned char volume_offline;
 
                hpsa_get_raid_level(h, scsi3addr, &this_device->raid_level);
                if (h->fw_support & MISC_FW_RAID_OFFLOAD_BASIC)
                        hpsa_get_ioaccel_status(h, scsi3addr, this_device);
                volume_offline = hpsa_volume_offline(h, scsi3addr);
-               if (volume_offline < 0 || volume_offline > 0xff)
-                       volume_offline = HPSA_VPD_LV_STATUS_UNSUPPORTED;
-               this_device->volume_offline = volume_offline & 0xff;
+               if (volume_offline == HPSA_LV_FAILED) {
+                       rc = HPSA_LV_FAILED;
+                       dev_err(&h->pdev->dev,
+                               "%s: LV failed, device will be skipped.\n",
+                               __func__);
+                       goto bail_out;
+               }
        } else {
                this_device->raid_level = RAID_UNKNOWN;
                this_device->offload_config = 0;
@@ -4379,8 +4377,7 @@ static void hpsa_update_scsi_devices(struct ctlr_info *h)
                        goto out;
                }
                if (rc) {
-                       dev_warn(&h->pdev->dev,
-                               "Inquiry failed, skipping device.\n");
+                       h->drv_req_rescan = 1;
                        continue;
                }
 
@@ -5558,7 +5555,7 @@ static void hpsa_scan_complete(struct ctlr_info *h)
 
        spin_lock_irqsave(&h->scan_lock, flags);
        h->scan_finished = 1;
-       wake_up_all(&h->scan_wait_queue);
+       wake_up(&h->scan_wait_queue);
        spin_unlock_irqrestore(&h->scan_lock, flags);
 }
 
@@ -5576,11 +5573,23 @@ static void hpsa_scan_start(struct Scsi_Host *sh)
        if (unlikely(lockup_detected(h)))
                return hpsa_scan_complete(h);
 
+       /*
+        * If a scan is already waiting to run, no need to add another
+        */
+       spin_lock_irqsave(&h->scan_lock, flags);
+       if (h->scan_waiting) {
+               spin_unlock_irqrestore(&h->scan_lock, flags);
+               return;
+       }
+
+       spin_unlock_irqrestore(&h->scan_lock, flags);
+
        /* wait until any scan already in progress is finished. */
        while (1) {
                spin_lock_irqsave(&h->scan_lock, flags);
                if (h->scan_finished)
                        break;
+               h->scan_waiting = 1;
                spin_unlock_irqrestore(&h->scan_lock, flags);
                wait_event(h->scan_wait_queue, h->scan_finished);
                /* Note: We don't need to worry about a race between this
@@ -5590,6 +5599,7 @@ static void hpsa_scan_start(struct Scsi_Host *sh)
                 */
        }
        h->scan_finished = 0; /* mark scan as in progress */
+       h->scan_waiting = 0;
        spin_unlock_irqrestore(&h->scan_lock, flags);
 
        if (unlikely(lockup_detected(h)))
@@ -8792,6 +8802,7 @@ reinit_after_soft_reset:
        init_waitqueue_head(&h->event_sync_wait_queue);
        mutex_init(&h->reset_mutex);
        h->scan_finished = 1; /* no scan currently in progress */
+       h->scan_waiting = 0;
 
        pci_set_drvdata(pdev, h);
        h->ndevices = 0;
index bf6cdc1066544fa5fe2df6f5396d17ca4b4c8909..6f04f2ad412530a76d615b394250d502221d5457 100644 (file)
@@ -201,6 +201,7 @@ struct ctlr_info {
        dma_addr_t              errinfo_pool_dhandle;
        unsigned long           *cmd_pool_bits;
        int                     scan_finished;
+       u8                      scan_waiting : 1;
        spinlock_t              scan_lock;
        wait_queue_head_t       scan_wait_queue;
 
index a584cdf0705846ef13a0375ecb2e1579513ecf92..5961705eef767526f66a6dbc1bbb1e7feec70c85 100644 (file)
 #define CFGTBL_BusType_Fibre2G  0x00000200l
 
 /* VPD Inquiry types */
+#define HPSA_INQUIRY_FAILED            0x02
 #define HPSA_VPD_SUPPORTED_PAGES        0x00
 #define HPSA_VPD_LV_DEVICE_ID           0x83
 #define HPSA_VPD_LV_DEVICE_GEOMETRY     0xC1
 /* Logical volume states */
 #define HPSA_VPD_LV_STATUS_UNSUPPORTED                 0xff
 #define HPSA_LV_OK                                      0x0
+#define HPSA_LV_FAILED                                 0x01
 #define HPSA_LV_NOT_AVAILABLE                          0x0b
 #define HPSA_LV_UNDERGOING_ERASE                       0x0F
 #define HPSA_LV_UNDERGOING_RPI                         0x12
index 5c3be3e6f5e2aebfa3da8d47a92775cb17d774a4..22819afbaef5c4a229ce66ebdf8d681b47fbbc2f 100644 (file)
@@ -3315,9 +3315,9 @@ LPFC_ATTR_R(nvmet_mrq_post, LPFC_DEF_MRQ_POST,
  * lpfc_enable_fc4_type: Defines what FC4 types are supported.
  * Supported Values:  1 - register just FCP
  *                    3 - register both FCP and NVME
- * Supported values are [1,3]. Default value is 3
+ * Supported values are [1,3]. Default value is 1
  */
-LPFC_ATTR_R(enable_fc4_type, LPFC_ENABLE_BOTH,
+LPFC_ATTR_R(enable_fc4_type, LPFC_ENABLE_FCP,
            LPFC_ENABLE_FCP, LPFC_ENABLE_BOTH,
            "Define fc4 type to register with fabric.");
 
index 2697d49da4d7762d13430cfd9737463d909bf93f..6cc561b042118ed6d172dddb8221d77c8a793ee4 100644 (file)
@@ -5891,10 +5891,17 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
                /* Check to see if it matches any module parameter */
                for (i = 0; i < lpfc_enable_nvmet_cnt; i++) {
                        if (wwn == lpfc_enable_nvmet[i]) {
+#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
                                lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
                                                "6017 NVME Target %016llx\n",
                                                wwn);
                                phba->nvmet_support = 1; /* a match */
+#else
+                               lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+                                               "6021 Can't enable NVME Target."
+                                               " NVME_TARGET_FC infrastructure"
+                                               " is not in kernel\n");
+#endif
                        }
                }
        }
index 0a4c1908140940cc116450cc5d5cb5a7a99de634..0024de1c6c1fea8e4568007a23296dd46f709826 100644 (file)
@@ -2149,7 +2149,7 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport)
        /* localport is allocated from the stack, but the registration
         * call allocates heap memory as well as the private area.
         */
-#ifdef CONFIG_LPFC_NVME_INITIATOR
+#if (IS_ENABLED(CONFIG_NVME_FC))
        ret = nvme_fc_register_localport(&nfcp_info, &lpfc_nvme_template,
                                         &vport->phba->pcidev->dev, &localport);
 #else
@@ -2190,7 +2190,7 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport)
 void
 lpfc_nvme_destroy_localport(struct lpfc_vport *vport)
 {
-#ifdef CONFIG_LPFC_NVME_INITIATOR
+#if (IS_ENABLED(CONFIG_NVME_FC))
        struct nvme_fc_local_port *localport;
        struct lpfc_nvme_lport *lport;
        struct lpfc_nvme_rport *rport = NULL, *rport_next = NULL;
@@ -2274,7 +2274,7 @@ lpfc_nvme_update_localport(struct lpfc_vport *vport)
 int
 lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 {
-#ifdef CONFIG_LPFC_NVME_INITIATOR
+#if (IS_ENABLED(CONFIG_NVME_FC))
        int ret = 0;
        struct nvme_fc_local_port *localport;
        struct lpfc_nvme_lport *lport;
@@ -2403,7 +2403,7 @@ lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 void
 lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 {
-#ifdef CONFIG_LPFC_NVME_INITIATOR
+#if (IS_ENABLED(CONFIG_NVME_FC))
        int ret;
        struct nvme_fc_local_port *localport;
        struct lpfc_nvme_lport *lport;
index b7739a554fe00505401bae97183ec3ce69559768..7ca868f394da62db293701af58b6797687eea89b 100644 (file)
@@ -671,7 +671,7 @@ lpfc_nvmet_create_targetport(struct lpfc_hba *phba)
        lpfc_tgttemplate.target_features = NVMET_FCTGTFEAT_READDATA_RSP |
                                           NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED;
 
-#ifdef CONFIG_LPFC_NVME_TARGET
+#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
        error = nvmet_fc_register_targetport(&pinfo, &lpfc_tgttemplate,
                                             &phba->pcidev->dev,
                                             &phba->targetport);
@@ -756,7 +756,7 @@ lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba,
 void
 lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba)
 {
-#ifdef CONFIG_LPFC_NVME_TARGET
+#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
        struct lpfc_nvmet_tgtport *tgtp;
 
        if (phba->nvmet_support == 0)
@@ -788,7 +788,7 @@ static void
 lpfc_nvmet_unsol_ls_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
                           struct hbq_dmabuf *nvmebuf)
 {
-#ifdef CONFIG_LPFC_NVME_TARGET
+#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
        struct lpfc_nvmet_tgtport *tgtp;
        struct fc_frame_header *fc_hdr;
        struct lpfc_nvmet_rcv_ctx *ctxp;
@@ -891,7 +891,7 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
                            struct rqb_dmabuf *nvmebuf,
                            uint64_t isr_timestamp)
 {
-#ifdef CONFIG_LPFC_NVME_TARGET
+#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
        struct lpfc_nvmet_rcv_ctx *ctxp;
        struct lpfc_nvmet_tgtport *tgtp;
        struct fc_frame_header *fc_hdr;
index e7e5974e1a2c435ef2ee0a79276e981fcb79cc87..2b209bbb4c9165fa7afdeff0f233f649684f8495 100644 (file)
@@ -35,8 +35,8 @@
 /*
  * MegaRAID SAS Driver meta data
  */
-#define MEGASAS_VERSION                                "07.701.16.00-rc1"
-#define MEGASAS_RELDATE                                "February 2, 2017"
+#define MEGASAS_VERSION                                "07.701.17.00-rc1"
+#define MEGASAS_RELDATE                                "March 2, 2017"
 
 /*
  * Device IDs
index 7ac9a9ee9bd473c3cc0b6178975f46e3d32f3b77..0016f12cc563e7c6e1eb3c2a87685f60c83b9747 100644 (file)
@@ -1963,6 +1963,9 @@ scan_target:
        if (!mr_device_priv_data)
                return -ENOMEM;
        sdev->hostdata = mr_device_priv_data;
+
+       atomic_set(&mr_device_priv_data->r1_ldio_hint,
+                  instance->r1_ldio_hint_default);
        return 0;
 }
 
@@ -5034,10 +5037,12 @@ megasas_setup_irqs_msix(struct megasas_instance *instance, u8 is_probe)
                                         &instance->irq_context[j]);
                        /* Retry irq register for IO_APIC*/
                        instance->msix_vectors = 0;
-                       if (is_probe)
+                       if (is_probe) {
+                               pci_free_irq_vectors(instance->pdev);
                                return megasas_setup_irqs_ioapic(instance);
-                       else
+                       } else {
                                return -1;
+                       }
                }
        }
        return 0;
@@ -5277,9 +5282,11 @@ static int megasas_init_fw(struct megasas_instance *instance)
                        MPI2_REPLY_POST_HOST_INDEX_OFFSET);
        }
 
-       i = pci_alloc_irq_vectors(instance->pdev, 1, 1, PCI_IRQ_LEGACY);
-       if (i < 0)
-               goto fail_setup_irqs;
+       if (!instance->msix_vectors) {
+               i = pci_alloc_irq_vectors(instance->pdev, 1, 1, PCI_IRQ_LEGACY);
+               if (i < 0)
+                       goto fail_setup_irqs;
+       }
 
        dev_info(&instance->pdev->dev,
                "firmware supports msix\t: (%d)", fw_msix_count);
index 29650ba669da58da099cf91e9de0aae504146bb0..f990ab4d45e1bf72b3adf8991b11c01309c7530b 100644 (file)
@@ -2159,7 +2159,7 @@ megasas_set_raidflag_cpu_affinity(union RAID_CONTEXT_UNION *praid_context,
                                cpu_sel = MR_RAID_CTX_CPUSEL_1;
 
                        if (is_stream_detected(rctx_g35) &&
-                           (raid->level == 5) &&
+                           ((raid->level == 5) || (raid->level == 6)) &&
                            (raid->writeMode == MR_RL_WRITE_THROUGH_MODE) &&
                            (cpu_sel == MR_RAID_CTX_CPUSEL_FCFS))
                                cpu_sel = MR_RAID_CTX_CPUSEL_0;
@@ -2338,7 +2338,7 @@ megasas_build_ldio_fusion(struct megasas_instance *instance,
                                fp_possible = false;
                                atomic_dec(&instance->fw_outstanding);
                        } else if ((scsi_buff_len > MR_LARGE_IO_MIN_SIZE) ||
-                                  atomic_dec_if_positive(&mrdev_priv->r1_ldio_hint)) {
+                                  (atomic_dec_if_positive(&mrdev_priv->r1_ldio_hint) > 0)) {
                                fp_possible = false;
                                atomic_dec(&instance->fw_outstanding);
                                if (scsi_buff_len > MR_LARGE_IO_MIN_SIZE)
index 64e9f507ce328637cb8675ef32a2d94631d1b0c2..414f2a772a5fca9b6b0079e24e359444ae3be577 100644 (file)
@@ -1,5 +1,5 @@
 obj-$(CONFIG_QEDF) := qedf.o
 qedf-y = qedf_dbg.o qedf_main.o qedf_io.o qedf_fip.o \
-        qedf_attr.o qedf_els.o
+        qedf_attr.o qedf_els.o drv_scsi_fw_funcs.o drv_fcoe_fw_funcs.o
 
 qedf-$(CONFIG_DEBUG_FS) += qedf_debugfs.o
diff --git a/drivers/scsi/qedf/drv_fcoe_fw_funcs.c b/drivers/scsi/qedf/drv_fcoe_fw_funcs.c
new file mode 100644 (file)
index 0000000..8c65e3b
--- /dev/null
@@ -0,0 +1,190 @@
+/* QLogic FCoE Offload Driver
+ * Copyright (c) 2016 Cavium Inc.
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+#include "drv_fcoe_fw_funcs.h"
+#include "drv_scsi_fw_funcs.h"
+
+#define FCOE_RX_ID (0xFFFFu)
+
+static inline void init_common_sqe(struct fcoe_task_params *task_params,
+                                  enum fcoe_sqe_request_type request_type)
+{
+       memset(task_params->sqe, 0, sizeof(*(task_params->sqe)));
+       SET_FIELD(task_params->sqe->flags, FCOE_WQE_REQ_TYPE,
+                 request_type);
+       task_params->sqe->task_id = task_params->itid;
+}
+
+int init_initiator_rw_fcoe_task(struct fcoe_task_params *task_params,
+                               struct scsi_sgl_task_params *sgl_task_params,
+                               struct regpair sense_data_buffer_phys_addr,
+                               u32 task_retry_id,
+                               u8 fcp_cmd_payload[32])
+{
+       struct fcoe_task_context *ctx = task_params->context;
+       struct ystorm_fcoe_task_st_ctx *y_st_ctx;
+       struct tstorm_fcoe_task_st_ctx *t_st_ctx;
+       struct ustorm_fcoe_task_ag_ctx *u_ag_ctx;
+       struct mstorm_fcoe_task_st_ctx *m_st_ctx;
+       u32 io_size, val;
+       bool slow_sgl;
+
+       memset(ctx, 0, sizeof(*(ctx)));
+       slow_sgl = scsi_is_slow_sgl(sgl_task_params->num_sges,
+                                   sgl_task_params->small_mid_sge);
+       io_size = (task_params->task_type == FCOE_TASK_TYPE_WRITE_INITIATOR ?
+                  task_params->tx_io_size : task_params->rx_io_size);
+
+       /* Ystorm ctx */
+       y_st_ctx = &ctx->ystorm_st_context;
+       y_st_ctx->data_2_trns_rem = cpu_to_le32(io_size);
+       y_st_ctx->task_rety_identifier = cpu_to_le32(task_retry_id);
+       y_st_ctx->task_type = task_params->task_type;
+       memcpy(&y_st_ctx->tx_info_union.fcp_cmd_payload,
+              fcp_cmd_payload, sizeof(struct fcoe_fcp_cmd_payload));
+
+       /* Tstorm ctx */
+       t_st_ctx = &ctx->tstorm_st_context;
+       t_st_ctx->read_only.dev_type = (task_params->is_tape_device == 1 ?
+                                       FCOE_TASK_DEV_TYPE_TAPE :
+                                       FCOE_TASK_DEV_TYPE_DISK);
+       t_st_ctx->read_only.cid = cpu_to_le32(task_params->conn_cid);
+       val = cpu_to_le32(task_params->cq_rss_number);
+       t_st_ctx->read_only.glbl_q_num = val;
+       t_st_ctx->read_only.fcp_cmd_trns_size = cpu_to_le32(io_size);
+       t_st_ctx->read_only.task_type = task_params->task_type;
+       SET_FIELD(t_st_ctx->read_write.flags,
+                 FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_EXP_FIRST_FRAME, 1);
+       t_st_ctx->read_write.rx_id = cpu_to_le16(FCOE_RX_ID);
+
+       /* Ustorm ctx */
+       u_ag_ctx = &ctx->ustorm_ag_context;
+       u_ag_ctx->global_cq_num = cpu_to_le32(task_params->cq_rss_number);
+
+       /* Mstorm buffer for sense/rsp data placement */
+       m_st_ctx = &ctx->mstorm_st_context;
+       val = cpu_to_le32(sense_data_buffer_phys_addr.hi);
+       m_st_ctx->rsp_buf_addr.hi = val;
+       val = cpu_to_le32(sense_data_buffer_phys_addr.lo);
+       m_st_ctx->rsp_buf_addr.lo = val;
+
+       if (task_params->task_type == FCOE_TASK_TYPE_WRITE_INITIATOR) {
+               /* Ystorm ctx */
+               y_st_ctx->expect_first_xfer = 1;
+
+               /* Set the amount of super SGEs. Can be up to 4. */
+               SET_FIELD(y_st_ctx->sgl_mode,
+                         YSTORM_FCOE_TASK_ST_CTX_TX_SGL_MODE,
+                         (slow_sgl ? SCSI_TX_SLOW_SGL : SCSI_FAST_SGL));
+               init_scsi_sgl_context(&y_st_ctx->sgl_params,
+                                     &y_st_ctx->data_desc,
+                                     sgl_task_params);
+
+               /* Mstorm ctx */
+               SET_FIELD(m_st_ctx->flags,
+                         MSTORM_FCOE_TASK_ST_CTX_TX_SGL_MODE,
+                         (slow_sgl ? SCSI_TX_SLOW_SGL : SCSI_FAST_SGL));
+       } else {
+               /* Tstorm ctx */
+               SET_FIELD(t_st_ctx->read_write.flags,
+                         FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_RX_SGL_MODE,
+                         (slow_sgl ? SCSI_TX_SLOW_SGL : SCSI_FAST_SGL));
+
+               /* Mstorm ctx */
+               m_st_ctx->data_2_trns_rem = cpu_to_le32(io_size);
+               init_scsi_sgl_context(&m_st_ctx->sgl_params,
+                                     &m_st_ctx->data_desc,
+                                     sgl_task_params);
+       }
+
+       init_common_sqe(task_params, SEND_FCOE_CMD);
+       return 0;
+}
+
+int init_initiator_midpath_unsolicited_fcoe_task(
+       struct fcoe_task_params *task_params,
+       struct fcoe_tx_mid_path_params *mid_path_fc_header,
+       struct scsi_sgl_task_params *tx_sgl_task_params,
+       struct scsi_sgl_task_params *rx_sgl_task_params,
+       u8 fw_to_place_fc_header)
+{
+       struct fcoe_task_context *ctx = task_params->context;
+       struct ystorm_fcoe_task_st_ctx *y_st_ctx;
+       struct tstorm_fcoe_task_st_ctx *t_st_ctx;
+       struct ustorm_fcoe_task_ag_ctx *u_ag_ctx;
+       struct mstorm_fcoe_task_st_ctx *m_st_ctx;
+       u32 val;
+
+       memset(ctx, 0, sizeof(*(ctx)));
+
+       /* Init Ystorm */
+       y_st_ctx = &ctx->ystorm_st_context;
+       init_scsi_sgl_context(&y_st_ctx->sgl_params,
+                             &y_st_ctx->data_desc,
+                             tx_sgl_task_params);
+       SET_FIELD(y_st_ctx->sgl_mode,
+                 YSTORM_FCOE_TASK_ST_CTX_TX_SGL_MODE, SCSI_FAST_SGL);
+       y_st_ctx->data_2_trns_rem = cpu_to_le32(task_params->tx_io_size);
+       y_st_ctx->task_type = task_params->task_type;
+       memcpy(&y_st_ctx->tx_info_union.tx_params.mid_path,
+              mid_path_fc_header, sizeof(struct fcoe_tx_mid_path_params));
+
+       /* Init Mstorm */
+       m_st_ctx = &ctx->mstorm_st_context;
+       init_scsi_sgl_context(&m_st_ctx->sgl_params,
+                             &m_st_ctx->data_desc,
+                             rx_sgl_task_params);
+       SET_FIELD(m_st_ctx->flags,
+                 MSTORM_FCOE_TASK_ST_CTX_MP_INCLUDE_FC_HEADER,
+                 fw_to_place_fc_header);
+       m_st_ctx->data_2_trns_rem = cpu_to_le32(task_params->rx_io_size);
+
+       /* Init Tstorm */
+       t_st_ctx = &ctx->tstorm_st_context;
+       t_st_ctx->read_only.cid = cpu_to_le32(task_params->conn_cid);
+       val = cpu_to_le32(task_params->cq_rss_number);
+       t_st_ctx->read_only.glbl_q_num = val;
+       t_st_ctx->read_only.task_type = task_params->task_type;
+       SET_FIELD(t_st_ctx->read_write.flags,
+                 FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_EXP_FIRST_FRAME, 1);
+       t_st_ctx->read_write.rx_id = cpu_to_le16(FCOE_RX_ID);
+
+       /* Init Ustorm */
+       u_ag_ctx = &ctx->ustorm_ag_context;
+       u_ag_ctx->global_cq_num = cpu_to_le32(task_params->cq_rss_number);
+
+       /* Init SQE */
+       init_common_sqe(task_params, SEND_FCOE_MIDPATH);
+       task_params->sqe->additional_info_union.burst_length =
+                                   tx_sgl_task_params->total_buffer_size;
+       SET_FIELD(task_params->sqe->flags,
+                 FCOE_WQE_NUM_SGES, tx_sgl_task_params->num_sges);
+       SET_FIELD(task_params->sqe->flags, FCOE_WQE_SGL_MODE,
+                 SCSI_FAST_SGL);
+
+       return 0;
+}
+
+int init_initiator_abort_fcoe_task(struct fcoe_task_params *task_params)
+{
+       init_common_sqe(task_params, SEND_FCOE_ABTS_REQUEST);
+       return 0;
+}
+
+int init_initiator_cleanup_fcoe_task(struct fcoe_task_params *task_params)
+{
+       init_common_sqe(task_params, FCOE_EXCHANGE_CLEANUP);
+       return 0;
+}
+
+int init_initiator_sequence_recovery_fcoe_task(
+       struct fcoe_task_params *task_params, u32 off)
+{
+       init_common_sqe(task_params, FCOE_SEQUENCE_RECOVERY);
+       task_params->sqe->additional_info_union.seq_rec_updated_offset = off;
+       return 0;
+}
diff --git a/drivers/scsi/qedf/drv_fcoe_fw_funcs.h b/drivers/scsi/qedf/drv_fcoe_fw_funcs.h
new file mode 100644 (file)
index 0000000..617529b
--- /dev/null
@@ -0,0 +1,93 @@
+/* QLogic FCoE Offload Driver
+ * Copyright (c) 2016 Cavium Inc.
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+#ifndef _FCOE_FW_FUNCS_H
+#define _FCOE_FW_FUNCS_H
+#include "drv_scsi_fw_funcs.h"
+#include "qedf_hsi.h"
+#include <linux/qed/qed_if.h>
+
+struct fcoe_task_params {
+       /* Output parameter [set/filled by the HSI function] */
+       struct fcoe_task_context *context;
+
+       /* Output parameter [set/filled by the HSI function] */
+       struct fcoe_wqe *sqe;
+       enum fcoe_task_type task_type;
+       u32 tx_io_size; /* in bytes */
+       u32 rx_io_size; /* in bytes */
+       u32 conn_cid;
+       u16 itid;
+       u8 cq_rss_number;
+
+        /* Whether it's Tape device or not (0=Disk, 1=Tape) */
+       u8 is_tape_device;
+};
+
+/**
+ * @brief init_initiator_rw_fcoe_task - Initializes FCoE task context for
+ * read/write task types and init fcoe_sqe
+ *
+ * @param task_params - Pointer to task parameters struct
+ * @param sgl_task_params - Pointer to SGL task params
+ * @param sense_data_buffer_phys_addr - Pointer to sense data buffer
+ * @param task_retry_id - retry identification - Used only for Tape device
+ * @param fcp_cmnd_payload - FCP CMD Payload
+ */
+int init_initiator_rw_fcoe_task(struct fcoe_task_params *task_params,
+       struct scsi_sgl_task_params *sgl_task_params,
+       struct regpair sense_data_buffer_phys_addr,
+       u32 task_retry_id,
+       u8 fcp_cmd_payload[32]);
+
+/**
+ * @brief init_initiator_midpath_fcoe_task - Initializes FCoE task context for
+ * midpath/unsolicited task types and init fcoe_sqe
+ *
+ * @param task_params - Pointer to task parameters struct
+ * @param mid_path_fc_header - FC header
+ * @param tx_sgl_task_params - Pointer to Tx SGL task params
+ * @param rx_sgl_task_params - Pointer to Rx SGL task params
+ * @param fw_to_place_fc_header        - Indication if the FW will place the FC header
+ * in addition to the data arrives.
+ */
+int init_initiator_midpath_unsolicited_fcoe_task(
+       struct fcoe_task_params *task_params,
+       struct fcoe_tx_mid_path_params *mid_path_fc_header,
+       struct scsi_sgl_task_params *tx_sgl_task_params,
+       struct scsi_sgl_task_params *rx_sgl_task_params,
+       u8 fw_to_place_fc_header);
+
+/**
+ * @brief init_initiator_abort_fcoe_task - Initializes FCoE task context for
+ * abort task types and init fcoe_sqe
+ *
+ * @param task_params - Pointer to task parameters struct
+ */
+int init_initiator_abort_fcoe_task(struct fcoe_task_params *task_params);
+
+/**
+ * @brief init_initiator_cleanup_fcoe_task - Initializes FCoE task context for
+ * cleanup task types and init fcoe_sqe
+ *
+ *
+ * @param task_params - Pointer to task parameters struct
+ */
+int init_initiator_cleanup_fcoe_task(struct fcoe_task_params *task_params);
+
+/**
+ * @brief init_initiator_cleanup_fcoe_task - Initializes FCoE task context for
+ * sequence recovery task types and init fcoe_sqe
+ *
+ *
+ * @param task_params - Pointer to task parameters struct
+ * @param desired_offset - The desired offest the task will be re-sent from
+ */
+int init_initiator_sequence_recovery_fcoe_task(
+       struct fcoe_task_params *task_params,
+       u32 desired_offset);
+#endif
diff --git a/drivers/scsi/qedf/drv_scsi_fw_funcs.c b/drivers/scsi/qedf/drv_scsi_fw_funcs.c
new file mode 100644 (file)
index 0000000..11e0cc0
--- /dev/null
@@ -0,0 +1,44 @@
+/* QLogic FCoE Offload Driver
+ * Copyright (c) 2016 Cavium Inc.
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+#include "drv_scsi_fw_funcs.h"
+
+#define SCSI_NUM_SGES_IN_CACHE 0x4
+
+bool scsi_is_slow_sgl(u16 num_sges, bool small_mid_sge)
+{
+       return (num_sges > SCSI_NUM_SGES_SLOW_SGL_THR && small_mid_sge);
+}
+
+void init_scsi_sgl_context(struct scsi_sgl_params *ctx_sgl_params,
+                          struct scsi_cached_sges *ctx_data_desc,
+                          struct scsi_sgl_task_params *sgl_task_params)
+{
+       /* no need to check for sgl_task_params->sgl validity */
+       u8 num_sges_to_init = sgl_task_params->num_sges >
+                             SCSI_NUM_SGES_IN_CACHE ? SCSI_NUM_SGES_IN_CACHE :
+                             sgl_task_params->num_sges;
+       u8 sge_index;
+       u32 val;
+
+       val = cpu_to_le32(sgl_task_params->sgl_phys_addr.lo);
+       ctx_sgl_params->sgl_addr.lo = val;
+       val = cpu_to_le32(sgl_task_params->sgl_phys_addr.hi);
+       ctx_sgl_params->sgl_addr.hi = val;
+       val = cpu_to_le32(sgl_task_params->total_buffer_size);
+       ctx_sgl_params->sgl_total_length = val;
+       ctx_sgl_params->sgl_num_sges = cpu_to_le16(sgl_task_params->num_sges);
+
+       for (sge_index = 0; sge_index < num_sges_to_init; sge_index++) {
+               val = cpu_to_le32(sgl_task_params->sgl[sge_index].sge_addr.lo);
+               ctx_data_desc->sge[sge_index].sge_addr.lo = val;
+               val = cpu_to_le32(sgl_task_params->sgl[sge_index].sge_addr.hi);
+               ctx_data_desc->sge[sge_index].sge_addr.hi = val;
+               val = cpu_to_le32(sgl_task_params->sgl[sge_index].sge_len);
+               ctx_data_desc->sge[sge_index].sge_len = val;
+       }
+}
diff --git a/drivers/scsi/qedf/drv_scsi_fw_funcs.h b/drivers/scsi/qedf/drv_scsi_fw_funcs.h
new file mode 100644 (file)
index 0000000..9cb4541
--- /dev/null
@@ -0,0 +1,85 @@
+/* QLogic FCoE Offload Driver
+ * Copyright (c) 2016 Cavium Inc.
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+#ifndef _SCSI_FW_FUNCS_H
+#define _SCSI_FW_FUNCS_H
+#include <linux/qed/common_hsi.h>
+#include <linux/qed/storage_common.h>
+#include <linux/qed/fcoe_common.h>
+
+struct scsi_sgl_task_params {
+       struct scsi_sge *sgl;
+       struct regpair sgl_phys_addr;
+       u32 total_buffer_size;
+       u16 num_sges;
+
+        /* true if SGL contains a small (< 4KB) SGE in middle(not 1st or last)
+         * -> relevant for tx only
+         */
+       bool small_mid_sge;
+};
+
+struct scsi_dif_task_params {
+       u32 initial_ref_tag;
+       bool initial_ref_tag_is_valid;
+       u16 application_tag;
+       u16 application_tag_mask;
+       u16 dif_block_size_log;
+       bool dif_on_network;
+       bool dif_on_host;
+       u8 host_guard_type;
+       u8 protection_type;
+       u8 ref_tag_mask;
+       bool crc_seed;
+
+        /* Enable Connection error upon DIF error (segments with DIF errors are
+         * dropped)
+         */
+       bool tx_dif_conn_err_en;
+       bool ignore_app_tag;
+       bool keep_ref_tag_const;
+       bool validate_guard;
+       bool validate_app_tag;
+       bool validate_ref_tag;
+       bool forward_guard;
+       bool forward_app_tag;
+       bool forward_ref_tag;
+       bool forward_app_tag_with_mask;
+       bool forward_ref_tag_with_mask;
+};
+
+struct scsi_initiator_cmd_params {
+        /* for cdb_size > default CDB size (extended CDB > 16 bytes) ->
+         * pointer to the CDB buffer SGE
+         */
+       struct scsi_sge extended_cdb_sge;
+
+       /* Physical address of sense data buffer for sense data - 256B buffer */
+       struct regpair sense_data_buffer_phys_addr;
+};
+
+/**
+ * @brief scsi_is_slow_sgl - checks for slow SGL
+ *
+ * @param num_sges - number of sges in SGL
+ * @param small_mid_sge - True is the SGL contains an SGE which is smaller than
+ * 4KB and its not the 1st or last SGE in the SGL
+ */
+bool scsi_is_slow_sgl(u16 num_sges, bool small_mid_sge);
+
+/**
+ * @brief init_scsi_sgl_context - initializes SGL task context
+ *
+ * @param sgl_params - SGL context parameters to initialize (output parameter)
+ * @param data_desc - context struct containing SGEs array to set (output
+ * parameter)
+ * @param sgl_task_params - SGL parameters (input)
+ */
+void init_scsi_sgl_context(struct scsi_sgl_params *sgl_params,
+       struct scsi_cached_sges *ctx_data_desc,
+       struct scsi_sgl_task_params *sgl_task_params);
+#endif
index 96346a1b1515e81b6c17a7035fb4051a263b8a4a..40aeb6bb96a2afd11c3264b7ea0004110033e2d1 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/qed/qed_ll2_if.h>
 #include "qedf_version.h"
 #include "qedf_dbg.h"
+#include "drv_fcoe_fw_funcs.h"
 
 /* Helpers to extract upper and lower 32-bits of pointer */
 #define U64_HI(val) ((u32)(((u64)(val)) >> 32))
 #define UPSTREAM_KEEP          1
 
 struct qedf_mp_req {
-       uint8_t tm_flags;
-
        uint32_t req_len;
        void *req_buf;
        dma_addr_t req_buf_dma;
-       struct fcoe_sge *mp_req_bd;
+       struct scsi_sge *mp_req_bd;
        dma_addr_t mp_req_bd_dma;
        struct fc_frame_header req_fc_hdr;
 
        uint32_t resp_len;
        void *resp_buf;
        dma_addr_t resp_buf_dma;
-       struct fcoe_sge *mp_resp_bd;
+       struct scsi_sge *mp_resp_bd;
        dma_addr_t mp_resp_bd_dma;
        struct fc_frame_header resp_fc_hdr;
 };
@@ -119,6 +118,7 @@ struct qedf_ioreq {
 #define QEDF_CMD_IN_CLEANUP            0x2
 #define QEDF_CMD_SRR_SENT              0x3
        u8 io_req_flags;
+       uint8_t tm_flags;
        struct qedf_rport *fcport;
        unsigned long flags;
        enum qedf_ioreq_event event;
@@ -130,6 +130,8 @@ struct qedf_ioreq {
        struct completion tm_done;
        struct completion abts_done;
        struct fcoe_task_context *task;
+       struct fcoe_task_params *task_params;
+       struct scsi_sgl_task_params *sgl_task_params;
        int idx;
 /*
  * Need to allocate enough room for both sense data and FCP response data
@@ -199,8 +201,8 @@ struct qedf_rport {
        dma_addr_t sq_pbl_dma;
        u32 sq_pbl_size;
        u32 sid;
-#define        QEDF_RPORT_TYPE_DISK            1
-#define        QEDF_RPORT_TYPE_TAPE            2
+#define        QEDF_RPORT_TYPE_DISK            0
+#define        QEDF_RPORT_TYPE_TAPE            1
        uint dev_type; /* Disk or tape */
        struct list_head peers;
 };
@@ -391,7 +393,7 @@ struct qedf_ctx {
 
 struct io_bdt {
        struct qedf_ioreq *io_req;
-       struct fcoe_sge *bd_tbl;
+       struct scsi_sge *bd_tbl;
        dma_addr_t bd_tbl_dma;
        u16 bd_valid;
 };
@@ -400,7 +402,7 @@ struct qedf_cmd_mgr {
        struct qedf_ctx *qedf;
        u16 idx;
        struct io_bdt **io_bdt_pool;
-#define FCOE_PARAMS_NUM_TASKS          4096
+#define FCOE_PARAMS_NUM_TASKS          2048
        struct qedf_ioreq cmds[FCOE_PARAMS_NUM_TASKS];
        spinlock_t lock;
        atomic_t free_list_cnt;
@@ -465,9 +467,8 @@ extern void qedf_cmd_timer_set(struct qedf_ctx *qedf, struct qedf_ioreq *io_req,
        unsigned int timer_msec);
 extern int qedf_init_mp_req(struct qedf_ioreq *io_req);
 extern void qedf_init_mp_task(struct qedf_ioreq *io_req,
-       struct fcoe_task_context *task_ctx);
-extern void qedf_add_to_sq(struct qedf_rport *fcport, u16 xid,
-       u32 ptu_invalidate, enum fcoe_task_type req_type, u32 offset);
+       struct fcoe_task_context *task_ctx, struct fcoe_wqe *wqe);
+extern u16 qedf_get_sqe_idx(struct qedf_rport *fcport);
 extern void qedf_ring_doorbell(struct qedf_rport *fcport);
 extern void qedf_process_els_compl(struct qedf_ctx *qedf, struct fcoe_cqe *cqe,
        struct qedf_ioreq *els_req);
index 59f3e5c73a139b9324a4307dc1f500a1153a4536..c505d41f6dc843825fb52fabaaca33e5dd25d1ce 100644 (file)
@@ -25,6 +25,9 @@ static int qedf_initiate_els(struct qedf_rport *fcport, unsigned int op,
        uint16_t xid;
        uint32_t start_time = jiffies / HZ;
        uint32_t current_time;
+       struct fcoe_wqe *sqe;
+       unsigned long flags;
+       u16 sqe_idx;
 
        QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS, "Sending ELS\n");
 
@@ -113,20 +116,25 @@ retry_els:
        /* Obtain exchange id */
        xid = els_req->xid;
 
+       spin_lock_irqsave(&fcport->rport_lock, flags);
+
+       sqe_idx = qedf_get_sqe_idx(fcport);
+       sqe = &fcport->sq[sqe_idx];
+       memset(sqe, 0, sizeof(struct fcoe_wqe));
+
        /* Initialize task context for this IO request */
        task = qedf_get_task_mem(&qedf->tasks, xid);
-       qedf_init_mp_task(els_req, task);
+       qedf_init_mp_task(els_req, task, sqe);
 
        /* Put timer on original I/O request */
        if (timer_msec)
                qedf_cmd_timer_set(qedf, els_req, timer_msec);
 
-       qedf_add_to_sq(fcport, xid, 0, FCOE_TASK_TYPE_MIDPATH, 0);
-
        /* Ring doorbell */
        QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS, "Ringing doorbell for ELS "
                   "req\n");
        qedf_ring_doorbell(fcport);
+       spin_unlock_irqrestore(&fcport->rport_lock, flags);
 els_err:
        return rc;
 }
@@ -604,6 +612,8 @@ static void qedf_initiate_seq_cleanup(struct qedf_ioreq *orig_io_req,
        struct qedf_rport *fcport;
        unsigned long flags;
        struct qedf_els_cb_arg *cb_arg;
+       struct fcoe_wqe *sqe;
+       u16 sqe_idx;
 
        fcport = orig_io_req->fcport;
 
@@ -631,8 +641,13 @@ static void qedf_initiate_seq_cleanup(struct qedf_ioreq *orig_io_req,
 
        spin_lock_irqsave(&fcport->rport_lock, flags);
 
-       qedf_add_to_sq(fcport, orig_io_req->xid, 0,
-           FCOE_TASK_TYPE_SEQUENCE_CLEANUP, offset);
+       sqe_idx = qedf_get_sqe_idx(fcport);
+       sqe = &fcport->sq[sqe_idx];
+       memset(sqe, 0, sizeof(struct fcoe_wqe));
+       orig_io_req->task_params->sqe = sqe;
+
+       init_initiator_sequence_recovery_fcoe_task(orig_io_req->task_params,
+                                                  offset);
        qedf_ring_doorbell(fcport);
 
        spin_unlock_irqrestore(&fcport->rport_lock, flags);
index 46debe5034af102710a574a80254433b94265271..1d7f90d0adc1a0c4f55ade57f0bf18f622372184 100644 (file)
@@ -96,7 +96,7 @@ void qedf_cmd_mgr_free(struct qedf_cmd_mgr *cmgr)
        if (!cmgr->io_bdt_pool)
                goto free_cmd_pool;
 
-       bd_tbl_sz = QEDF_MAX_BDS_PER_CMD * sizeof(struct fcoe_sge);
+       bd_tbl_sz = QEDF_MAX_BDS_PER_CMD * sizeof(struct scsi_sge);
        for (i = 0; i < num_ios; i++) {
                bdt_info = cmgr->io_bdt_pool[i];
                if (bdt_info->bd_tbl) {
@@ -119,6 +119,8 @@ free_cmd_pool:
 
        for (i = 0; i < num_ios; i++) {
                io_req = &cmgr->cmds[i];
+               kfree(io_req->sgl_task_params);
+               kfree(io_req->task_params);
                /* Make sure we free per command sense buffer */
                if (io_req->sense_buffer)
                        dma_free_coherent(&qedf->pdev->dev,
@@ -178,7 +180,7 @@ struct qedf_cmd_mgr *qedf_cmd_mgr_alloc(struct qedf_ctx *qedf)
        spin_lock_init(&cmgr->lock);
 
        /*
-        * Initialize list of qedf_ioreq.
+        * Initialize I/O request fields.
         */
        xid = QEDF_MIN_XID;
 
@@ -196,6 +198,29 @@ struct qedf_cmd_mgr *qedf_cmd_mgr_alloc(struct qedf_ctx *qedf)
                    GFP_KERNEL);
                if (!io_req->sense_buffer)
                        goto mem_err;
+
+               /* Allocate task parameters to pass to f/w init funcions */
+               io_req->task_params = kzalloc(sizeof(*io_req->task_params),
+                                             GFP_KERNEL);
+               if (!io_req->task_params) {
+                       QEDF_ERR(&(qedf->dbg_ctx),
+                                "Failed to allocate task_params for xid=0x%x\n",
+                                i);
+                       goto mem_err;
+               }
+
+               /*
+                * Allocate scatter/gather list info to pass to f/w init
+                * functions.
+                */
+               io_req->sgl_task_params = kzalloc(
+                   sizeof(struct scsi_sgl_task_params), GFP_KERNEL);
+               if (!io_req->sgl_task_params) {
+                       QEDF_ERR(&(qedf->dbg_ctx),
+                                "Failed to allocate sgl_task_params for xid=0x%x\n",
+                                i);
+                       goto mem_err;
+               }
        }
 
        /* Allocate pool of io_bdts - one for each qedf_ioreq */
@@ -211,8 +236,8 @@ struct qedf_cmd_mgr *qedf_cmd_mgr_alloc(struct qedf_ctx *qedf)
                cmgr->io_bdt_pool[i] = kmalloc(sizeof(struct io_bdt),
                    GFP_KERNEL);
                if (!cmgr->io_bdt_pool[i]) {
-                       QEDF_WARN(&(qedf->dbg_ctx), "Failed to alloc "
-                                  "io_bdt_pool[%d].\n", i);
+                       QEDF_WARN(&(qedf->dbg_ctx),
+                                 "Failed to alloc io_bdt_pool[%d].\n", i);
                        goto mem_err;
                }
        }
@@ -220,11 +245,11 @@ struct qedf_cmd_mgr *qedf_cmd_mgr_alloc(struct qedf_ctx *qedf)
        for (i = 0; i < num_ios; i++) {
                bdt_info = cmgr->io_bdt_pool[i];
                bdt_info->bd_tbl = dma_alloc_coherent(&qedf->pdev->dev,
-                   QEDF_MAX_BDS_PER_CMD * sizeof(struct fcoe_sge),
+                   QEDF_MAX_BDS_PER_CMD * sizeof(struct scsi_sge),
                    &bdt_info->bd_tbl_dma, GFP_KERNEL);
                if (!bdt_info->bd_tbl) {
-                       QEDF_WARN(&(qedf->dbg_ctx), "Failed to alloc "
-                                  "bdt_tbl[%d].\n", i);
+                       QEDF_WARN(&(qedf->dbg_ctx),
+                                 "Failed to alloc bdt_tbl[%d].\n", i);
                        goto mem_err;
                }
        }
@@ -318,6 +343,7 @@ struct qedf_ioreq *qedf_alloc_cmd(struct qedf_rport *fcport, u8 cmd_type)
        }
        bd_tbl->io_req = io_req;
        io_req->cmd_type = cmd_type;
+       io_req->tm_flags = 0;
 
        /* Reset sequence offset data */
        io_req->rx_buf_off = 0;
@@ -336,10 +362,9 @@ static void qedf_free_mp_resc(struct qedf_ioreq *io_req)
 {
        struct qedf_mp_req *mp_req = &(io_req->mp_req);
        struct qedf_ctx *qedf = io_req->fcport->qedf;
-       uint64_t sz = sizeof(struct fcoe_sge);
+       uint64_t sz = sizeof(struct scsi_sge);
 
        /* clear tm flags */
-       mp_req->tm_flags = 0;
        if (mp_req->mp_req_bd) {
                dma_free_coherent(&qedf->pdev->dev, sz,
                    mp_req->mp_req_bd, mp_req->mp_req_bd_dma);
@@ -387,7 +412,7 @@ void qedf_release_cmd(struct kref *ref)
 static int qedf_split_bd(struct qedf_ioreq *io_req, u64 addr, int sg_len,
        int bd_index)
 {
-       struct fcoe_sge *bd = io_req->bd_tbl->bd_tbl;
+       struct scsi_sge *bd = io_req->bd_tbl->bd_tbl;
        int frag_size, sg_frags;
 
        sg_frags = 0;
@@ -398,7 +423,7 @@ static int qedf_split_bd(struct qedf_ioreq *io_req, u64 addr, int sg_len,
                        frag_size = sg_len;
                bd[bd_index + sg_frags].sge_addr.lo = U64_LO(addr);
                bd[bd_index + sg_frags].sge_addr.hi = U64_HI(addr);
-               bd[bd_index + sg_frags].size = (uint16_t)frag_size;
+               bd[bd_index + sg_frags].sge_len = (uint16_t)frag_size;
 
                addr += (u64)frag_size;
                sg_frags++;
@@ -413,7 +438,7 @@ static int qedf_map_sg(struct qedf_ioreq *io_req)
        struct Scsi_Host *host = sc->device->host;
        struct fc_lport *lport = shost_priv(host);
        struct qedf_ctx *qedf = lport_priv(lport);
-       struct fcoe_sge *bd = io_req->bd_tbl->bd_tbl;
+       struct scsi_sge *bd = io_req->bd_tbl->bd_tbl;
        struct scatterlist *sg;
        int byte_count = 0;
        int sg_count = 0;
@@ -439,7 +464,7 @@ static int qedf_map_sg(struct qedf_ioreq *io_req)
 
                bd[bd_count].sge_addr.lo = (addr & 0xffffffff);
                bd[bd_count].sge_addr.hi = (addr >> 32);
-               bd[bd_count].size = (u16)sg_len;
+               bd[bd_count].sge_len = (u16)sg_len;
 
                return ++bd_count;
        }
@@ -480,7 +505,7 @@ static int qedf_map_sg(struct qedf_ioreq *io_req)
                        sg_frags = 1;
                        bd[bd_count].sge_addr.lo = U64_LO(addr);
                        bd[bd_count].sge_addr.hi  = U64_HI(addr);
-                       bd[bd_count].size = (uint16_t)sg_len;
+                       bd[bd_count].sge_len = (uint16_t)sg_len;
                }
 
                bd_count += sg_frags;
@@ -498,7 +523,7 @@ static int qedf_map_sg(struct qedf_ioreq *io_req)
 static int qedf_build_bd_list_from_sg(struct qedf_ioreq *io_req)
 {
        struct scsi_cmnd *sc = io_req->sc_cmd;
-       struct fcoe_sge *bd = io_req->bd_tbl->bd_tbl;
+       struct scsi_sge *bd = io_req->bd_tbl->bd_tbl;
        int bd_count;
 
        if (scsi_sg_count(sc)) {
@@ -508,7 +533,7 @@ static int qedf_build_bd_list_from_sg(struct qedf_ioreq *io_req)
        } else {
                bd_count = 0;
                bd[0].sge_addr.lo = bd[0].sge_addr.hi = 0;
-               bd[0].size = 0;
+               bd[0].sge_len = 0;
        }
        io_req->bd_tbl->bd_valid = bd_count;
 
@@ -529,430 +554,223 @@ static void qedf_build_fcp_cmnd(struct qedf_ioreq *io_req,
 
        /* 4 bytes: flag info */
        fcp_cmnd->fc_pri_ta = 0;
-       fcp_cmnd->fc_tm_flags = io_req->mp_req.tm_flags;
+       fcp_cmnd->fc_tm_flags = io_req->tm_flags;
        fcp_cmnd->fc_flags = io_req->io_req_flags;
        fcp_cmnd->fc_cmdref = 0;
 
        /* Populate data direction */
-       if (sc_cmd->sc_data_direction == DMA_TO_DEVICE)
-               fcp_cmnd->fc_flags |= FCP_CFL_WRDATA;
-       else if (sc_cmd->sc_data_direction == DMA_FROM_DEVICE)
+       if (io_req->cmd_type == QEDF_TASK_MGMT_CMD) {
                fcp_cmnd->fc_flags |= FCP_CFL_RDDATA;
+       } else {
+               if (sc_cmd->sc_data_direction == DMA_TO_DEVICE)
+                       fcp_cmnd->fc_flags |= FCP_CFL_WRDATA;
+               else if (sc_cmd->sc_data_direction == DMA_FROM_DEVICE)
+                       fcp_cmnd->fc_flags |= FCP_CFL_RDDATA;
+       }
 
        fcp_cmnd->fc_pri_ta = FCP_PTA_SIMPLE;
 
        /* 16 bytes: CDB information */
-       memcpy(fcp_cmnd->fc_cdb, sc_cmd->cmnd, sc_cmd->cmd_len);
+       if (io_req->cmd_type != QEDF_TASK_MGMT_CMD)
+               memcpy(fcp_cmnd->fc_cdb, sc_cmd->cmnd, sc_cmd->cmd_len);
 
        /* 4 bytes: FCP data length */
        fcp_cmnd->fc_dl = htonl(io_req->data_xfer_len);
-
 }
 
 static void  qedf_init_task(struct qedf_rport *fcport, struct fc_lport *lport,
-       struct qedf_ioreq *io_req, u32 *ptu_invalidate,
-       struct fcoe_task_context *task_ctx)
+       struct qedf_ioreq *io_req, struct fcoe_task_context *task_ctx,
+       struct fcoe_wqe *sqe)
 {
        enum fcoe_task_type task_type;
        struct scsi_cmnd *sc_cmd = io_req->sc_cmd;
        struct io_bdt *bd_tbl = io_req->bd_tbl;
-       union fcoe_data_desc_ctx *data_desc;
-       u32 *fcp_cmnd;
+       u8 fcp_cmnd[32];
        u32 tmp_fcp_cmnd[8];
-       int cnt, i;
-       int bd_count;
+       int bd_count = 0;
        struct qedf_ctx *qedf = fcport->qedf;
        uint16_t cq_idx = smp_processor_id() % qedf->num_queues;
-       u8 tmp_sgl_mode = 0;
-       u8 mst_sgl_mode = 0;
+       struct regpair sense_data_buffer_phys_addr;
+       u32 tx_io_size = 0;
+       u32 rx_io_size = 0;
+       int i, cnt;
 
-       memset(task_ctx, 0, sizeof(struct fcoe_task_context));
+       /* Note init_initiator_rw_fcoe_task memsets the task context */
        io_req->task = task_ctx;
+       memset(task_ctx, 0, sizeof(struct fcoe_task_context));
+       memset(io_req->task_params, 0, sizeof(struct fcoe_task_params));
+       memset(io_req->sgl_task_params, 0, sizeof(struct scsi_sgl_task_params));
 
-       if (sc_cmd->sc_data_direction == DMA_TO_DEVICE)
-               task_type = FCOE_TASK_TYPE_WRITE_INITIATOR;
-       else
+       /* Set task type bassed on DMA directio of command */
+       if (io_req->cmd_type == QEDF_TASK_MGMT_CMD) {
                task_type = FCOE_TASK_TYPE_READ_INITIATOR;
-
-       /* Y Storm context */
-       task_ctx->ystorm_st_context.expect_first_xfer = 1;
-       task_ctx->ystorm_st_context.data_2_trns_rem = io_req->data_xfer_len;
-       /* Check if this is required */
-       task_ctx->ystorm_st_context.ox_id = io_req->xid;
-       task_ctx->ystorm_st_context.task_rety_identifier =
-           io_req->task_retry_identifier;
-
-       /* T Storm ag context */
-       SET_FIELD(task_ctx->tstorm_ag_context.flags0,
-           TSTORM_FCOE_TASK_AG_CTX_CONNECTION_TYPE, PROTOCOLID_FCOE);
-       task_ctx->tstorm_ag_context.icid = (u16)fcport->fw_cid;
-
-       /* T Storm st context */
-       SET_FIELD(task_ctx->tstorm_st_context.read_write.flags,
-           FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_EXP_FIRST_FRAME,
-           1);
-       task_ctx->tstorm_st_context.read_write.rx_id = 0xffff;
-
-       task_ctx->tstorm_st_context.read_only.dev_type =
-           FCOE_TASK_DEV_TYPE_DISK;
-       task_ctx->tstorm_st_context.read_only.conf_supported = 0;
-       task_ctx->tstorm_st_context.read_only.cid = fcport->fw_cid;
-
-       /* Completion queue for response. */
-       task_ctx->tstorm_st_context.read_only.glbl_q_num = cq_idx;
-       task_ctx->tstorm_st_context.read_only.fcp_cmd_trns_size =
-           io_req->data_xfer_len;
-       task_ctx->tstorm_st_context.read_write.e_d_tov_exp_timeout_val =
-           lport->e_d_tov;
-
-       task_ctx->ustorm_ag_context.global_cq_num = cq_idx;
-       io_req->fp_idx = cq_idx;
-
-       bd_count = bd_tbl->bd_valid;
-       if (task_type == FCOE_TASK_TYPE_WRITE_INITIATOR) {
-               /* Setup WRITE task */
-               struct fcoe_sge *fcoe_bd_tbl = bd_tbl->bd_tbl;
-
-               task_ctx->ystorm_st_context.task_type =
-                   FCOE_TASK_TYPE_WRITE_INITIATOR;
-               data_desc = &task_ctx->ystorm_st_context.data_desc;
-
-               if (io_req->use_slowpath) {
-                       SET_FIELD(task_ctx->ystorm_st_context.sgl_mode,
-                           YSTORM_FCOE_TASK_ST_CTX_TX_SGL_MODE,
-                           FCOE_SLOW_SGL);
-                       data_desc->slow.base_sgl_addr.lo =
-                           U64_LO(bd_tbl->bd_tbl_dma);
-                       data_desc->slow.base_sgl_addr.hi =
-                           U64_HI(bd_tbl->bd_tbl_dma);
-                       data_desc->slow.remainder_num_sges = bd_count;
-                       data_desc->slow.curr_sge_off = 0;
-                       data_desc->slow.curr_sgl_index = 0;
-                       qedf->slow_sge_ios++;
-                       io_req->sge_type = QEDF_IOREQ_SLOW_SGE;
-               } else {
-                       SET_FIELD(task_ctx->ystorm_st_context.sgl_mode,
-                           YSTORM_FCOE_TASK_ST_CTX_TX_SGL_MODE,
-                           (bd_count <= 4) ? (enum fcoe_sgl_mode)bd_count :
-                           FCOE_MUL_FAST_SGES);
-
-                       if (bd_count == 1) {
-                               data_desc->single_sge.sge_addr.lo =
-                                   fcoe_bd_tbl->sge_addr.lo;
-                               data_desc->single_sge.sge_addr.hi =
-                                   fcoe_bd_tbl->sge_addr.hi;
-                               data_desc->single_sge.size =
-                                   fcoe_bd_tbl->size;
-                               data_desc->single_sge.is_valid_sge = 0;
-                               qedf->single_sge_ios++;
-                               io_req->sge_type = QEDF_IOREQ_SINGLE_SGE;
-                       } else {
-                               data_desc->fast.sgl_start_addr.lo =
-                                   U64_LO(bd_tbl->bd_tbl_dma);
-                               data_desc->fast.sgl_start_addr.hi =
-                                   U64_HI(bd_tbl->bd_tbl_dma);
-                               data_desc->fast.sgl_byte_offset =
-                                   data_desc->fast.sgl_start_addr.lo &
-                                   (QEDF_PAGE_SIZE - 1);
-                               if (data_desc->fast.sgl_byte_offset > 0)
-                                       QEDF_ERR(&(qedf->dbg_ctx),
-                                           "byte_offset=%u for xid=0x%x.\n",
-                                           io_req->xid,
-                                           data_desc->fast.sgl_byte_offset);
-                               data_desc->fast.task_reuse_cnt =
-                                   io_req->reuse_count;
-                               io_req->reuse_count++;
-                               if (io_req->reuse_count == QEDF_MAX_REUSE) {
-                                       *ptu_invalidate = 1;
-                                       io_req->reuse_count = 0;
-                               }
-                               qedf->fast_sge_ios++;
-                               io_req->sge_type = QEDF_IOREQ_FAST_SGE;
-                       }
-               }
-
-               /* T Storm context */
-               task_ctx->tstorm_st_context.read_only.task_type =
-                   FCOE_TASK_TYPE_WRITE_INITIATOR;
-
-               /* M Storm context */
-               tmp_sgl_mode = GET_FIELD(task_ctx->ystorm_st_context.sgl_mode,
-                   YSTORM_FCOE_TASK_ST_CTX_TX_SGL_MODE);
-               SET_FIELD(task_ctx->mstorm_st_context.non_fp.tx_rx_sgl_mode,
-                   FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_TX_SGL_MODE,
-                   tmp_sgl_mode);
-
        } else {
-               /* Setup READ task */
-
-               /* M Storm context */
-               struct fcoe_sge *fcoe_bd_tbl = bd_tbl->bd_tbl;
-
-               data_desc = &task_ctx->mstorm_st_context.fp.data_desc;
-               task_ctx->mstorm_st_context.fp.data_2_trns_rem =
-                   io_req->data_xfer_len;
-
-               if (io_req->use_slowpath) {
-                       SET_FIELD(
-                           task_ctx->mstorm_st_context.non_fp.tx_rx_sgl_mode,
-                           FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_RX_SGL_MODE,
-                           FCOE_SLOW_SGL);
-                       data_desc->slow.base_sgl_addr.lo =
-                           U64_LO(bd_tbl->bd_tbl_dma);
-                       data_desc->slow.base_sgl_addr.hi =
-                           U64_HI(bd_tbl->bd_tbl_dma);
-                       data_desc->slow.remainder_num_sges =
-                           bd_count;
-                       data_desc->slow.curr_sge_off = 0;
-                       data_desc->slow.curr_sgl_index = 0;
-                       qedf->slow_sge_ios++;
-                       io_req->sge_type = QEDF_IOREQ_SLOW_SGE;
+               if (sc_cmd->sc_data_direction == DMA_TO_DEVICE) {
+                       task_type = FCOE_TASK_TYPE_WRITE_INITIATOR;
+                       tx_io_size = io_req->data_xfer_len;
                } else {
-                       SET_FIELD(
-                           task_ctx->mstorm_st_context.non_fp.tx_rx_sgl_mode,
-                           FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_RX_SGL_MODE,
-                           (bd_count <= 4) ? (enum fcoe_sgl_mode)bd_count :
-                           FCOE_MUL_FAST_SGES);
-
-                       if (bd_count == 1) {
-                               data_desc->single_sge.sge_addr.lo =
-                                   fcoe_bd_tbl->sge_addr.lo;
-                               data_desc->single_sge.sge_addr.hi =
-                                   fcoe_bd_tbl->sge_addr.hi;
-                               data_desc->single_sge.size =
-                                   fcoe_bd_tbl->size;
-                               data_desc->single_sge.is_valid_sge = 0;
-                               qedf->single_sge_ios++;
-                               io_req->sge_type = QEDF_IOREQ_SINGLE_SGE;
-                       } else {
-                               data_desc->fast.sgl_start_addr.lo =
-                                   U64_LO(bd_tbl->bd_tbl_dma);
-                               data_desc->fast.sgl_start_addr.hi =
-                                   U64_HI(bd_tbl->bd_tbl_dma);
-                               data_desc->fast.sgl_byte_offset = 0;
-                               data_desc->fast.task_reuse_cnt =
-                                   io_req->reuse_count;
-                               io_req->reuse_count++;
-                               if (io_req->reuse_count == QEDF_MAX_REUSE) {
-                                       *ptu_invalidate = 1;
-                                       io_req->reuse_count = 0;
-                               }
-                               qedf->fast_sge_ios++;
-                               io_req->sge_type = QEDF_IOREQ_FAST_SGE;
-                       }
+                       task_type = FCOE_TASK_TYPE_READ_INITIATOR;
+                       rx_io_size = io_req->data_xfer_len;
                }
-
-               /* Y Storm context */
-               task_ctx->ystorm_st_context.expect_first_xfer = 0;
-               task_ctx->ystorm_st_context.task_type =
-                   FCOE_TASK_TYPE_READ_INITIATOR;
-
-               /* T Storm context */
-               task_ctx->tstorm_st_context.read_only.task_type =
-                   FCOE_TASK_TYPE_READ_INITIATOR;
-               mst_sgl_mode = GET_FIELD(
-                   task_ctx->mstorm_st_context.non_fp.tx_rx_sgl_mode,
-                   FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_RX_SGL_MODE);
-               SET_FIELD(task_ctx->tstorm_st_context.read_write.flags,
-                   FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_RX_SGL_MODE,
-                   mst_sgl_mode);
        }
 
+       /* Setup the fields for fcoe_task_params */
+       io_req->task_params->context = task_ctx;
+       io_req->task_params->sqe = sqe;
+       io_req->task_params->task_type = task_type;
+       io_req->task_params->tx_io_size = tx_io_size;
+       io_req->task_params->rx_io_size = rx_io_size;
+       io_req->task_params->conn_cid = fcport->fw_cid;
+       io_req->task_params->itid = io_req->xid;
+       io_req->task_params->cq_rss_number = cq_idx;
+       io_req->task_params->is_tape_device = fcport->dev_type;
+
+       /* Fill in information for scatter/gather list */
+       if (io_req->cmd_type != QEDF_TASK_MGMT_CMD) {
+               bd_count = bd_tbl->bd_valid;
+               io_req->sgl_task_params->sgl = bd_tbl->bd_tbl;
+               io_req->sgl_task_params->sgl_phys_addr.lo =
+                       U64_LO(bd_tbl->bd_tbl_dma);
+               io_req->sgl_task_params->sgl_phys_addr.hi =
+                       U64_HI(bd_tbl->bd_tbl_dma);
+               io_req->sgl_task_params->num_sges = bd_count;
+               io_req->sgl_task_params->total_buffer_size =
+                   scsi_bufflen(io_req->sc_cmd);
+               io_req->sgl_task_params->small_mid_sge =
+                       io_req->use_slowpath;
+       }
+
+       /* Fill in physical address of sense buffer */
+       sense_data_buffer_phys_addr.lo = U64_LO(io_req->sense_buffer_dma);
+       sense_data_buffer_phys_addr.hi = U64_HI(io_req->sense_buffer_dma);
+
        /* fill FCP_CMND IU */
-       fcp_cmnd = (u32 *)task_ctx->ystorm_st_context.tx_info_union.fcp_cmd_payload.opaque;
-       qedf_build_fcp_cmnd(io_req, (struct fcp_cmnd *)&tmp_fcp_cmnd);
+       qedf_build_fcp_cmnd(io_req, (struct fcp_cmnd *)tmp_fcp_cmnd);
 
        /* Swap fcp_cmnd since FC is big endian */
        cnt = sizeof(struct fcp_cmnd) / sizeof(u32);
-
        for (i = 0; i < cnt; i++) {
-               *fcp_cmnd = cpu_to_be32(tmp_fcp_cmnd[i]);
-               fcp_cmnd++;
+               tmp_fcp_cmnd[i] = cpu_to_be32(tmp_fcp_cmnd[i]);
+       }
+       memcpy(fcp_cmnd, tmp_fcp_cmnd, sizeof(struct fcp_cmnd));
+
+       init_initiator_rw_fcoe_task(io_req->task_params,
+                                   io_req->sgl_task_params,
+                                   sense_data_buffer_phys_addr,
+                                   io_req->task_retry_identifier, fcp_cmnd);
+
+       /* Increment SGL type counters */
+       if (bd_count == 1) {
+               qedf->single_sge_ios++;
+               io_req->sge_type = QEDF_IOREQ_SINGLE_SGE;
+       } else if (io_req->use_slowpath) {
+               qedf->slow_sge_ios++;
+               io_req->sge_type = QEDF_IOREQ_SLOW_SGE;
+       } else {
+               qedf->fast_sge_ios++;
+               io_req->sge_type = QEDF_IOREQ_FAST_SGE;
        }
-
-       /* M Storm context - Sense buffer */
-       task_ctx->mstorm_st_context.non_fp.rsp_buf_addr.lo =
-               U64_LO(io_req->sense_buffer_dma);
-       task_ctx->mstorm_st_context.non_fp.rsp_buf_addr.hi =
-               U64_HI(io_req->sense_buffer_dma);
 }
 
 void qedf_init_mp_task(struct qedf_ioreq *io_req,
-       struct fcoe_task_context *task_ctx)
+       struct fcoe_task_context *task_ctx, struct fcoe_wqe *sqe)
 {
        struct qedf_mp_req *mp_req = &(io_req->mp_req);
        struct qedf_rport *fcport = io_req->fcport;
        struct qedf_ctx *qedf = io_req->fcport->qedf;
        struct fc_frame_header *fc_hdr;
-       enum fcoe_task_type task_type = 0;
-       union fcoe_data_desc_ctx *data_desc;
+       struct fcoe_tx_mid_path_params task_fc_hdr;
+       struct scsi_sgl_task_params tx_sgl_task_params;
+       struct scsi_sgl_task_params rx_sgl_task_params;
 
-       QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC, "Initializing MP task "
-                  "for cmd_type = %d\n", io_req->cmd_type);
+       QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC,
+                 "Initializing MP task for cmd_type=%d\n",
+                 io_req->cmd_type);
 
        qedf->control_requests++;
 
-       /* Obtain task_type */
-       if ((io_req->cmd_type == QEDF_TASK_MGMT_CMD) ||
-           (io_req->cmd_type == QEDF_ELS)) {
-               task_type = FCOE_TASK_TYPE_MIDPATH;
-       } else if (io_req->cmd_type == QEDF_ABTS) {
-               task_type = FCOE_TASK_TYPE_ABTS;
-       }
-
+       memset(&tx_sgl_task_params, 0, sizeof(struct scsi_sgl_task_params));
+       memset(&rx_sgl_task_params, 0, sizeof(struct scsi_sgl_task_params));
        memset(task_ctx, 0, sizeof(struct fcoe_task_context));
+       memset(&task_fc_hdr, 0, sizeof(struct fcoe_tx_mid_path_params));
 
        /* Setup the task from io_req for easy reference */
        io_req->task = task_ctx;
 
-       QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC, "task type = %d\n",
-                  task_type);
-
-       /* YSTORM only */
-       {
-               /* Initialize YSTORM task context */
-               struct fcoe_tx_mid_path_params *task_fc_hdr =
-                   &task_ctx->ystorm_st_context.tx_info_union.tx_params.mid_path;
-               memset(task_fc_hdr, 0, sizeof(struct fcoe_tx_mid_path_params));
-               task_ctx->ystorm_st_context.task_rety_identifier =
-                   io_req->task_retry_identifier;
-
-               /* Init SGL parameters */
-               if ((task_type == FCOE_TASK_TYPE_MIDPATH) ||
-                   (task_type == FCOE_TASK_TYPE_UNSOLICITED)) {
-                       data_desc = &task_ctx->ystorm_st_context.data_desc;
-                       data_desc->slow.base_sgl_addr.lo =
-                           U64_LO(mp_req->mp_req_bd_dma);
-                       data_desc->slow.base_sgl_addr.hi =
-                           U64_HI(mp_req->mp_req_bd_dma);
-                       data_desc->slow.remainder_num_sges = 1;
-                       data_desc->slow.curr_sge_off = 0;
-                       data_desc->slow.curr_sgl_index = 0;
-               }
-
-               fc_hdr = &(mp_req->req_fc_hdr);
-               if (task_type == FCOE_TASK_TYPE_MIDPATH) {
-                       fc_hdr->fh_ox_id = io_req->xid;
-                       fc_hdr->fh_rx_id = htons(0xffff);
-               } else if (task_type == FCOE_TASK_TYPE_UNSOLICITED) {
-                       fc_hdr->fh_rx_id = io_req->xid;
-               }
+       /* Setup the fields for fcoe_task_params */
+       io_req->task_params->context = task_ctx;
+       io_req->task_params->sqe = sqe;
+       io_req->task_params->task_type = FCOE_TASK_TYPE_MIDPATH;
+       io_req->task_params->tx_io_size = io_req->data_xfer_len;
+       /* rx_io_size tells the f/w how large a response buffer we have */
+       io_req->task_params->rx_io_size = PAGE_SIZE;
+       io_req->task_params->conn_cid = fcport->fw_cid;
+       io_req->task_params->itid = io_req->xid;
+       /* Return middle path commands on CQ 0 */
+       io_req->task_params->cq_rss_number = 0;
+       io_req->task_params->is_tape_device = fcport->dev_type;
+
+       fc_hdr = &(mp_req->req_fc_hdr);
+       /* Set OX_ID and RX_ID based on driver task id */
+       fc_hdr->fh_ox_id = io_req->xid;
+       fc_hdr->fh_rx_id = htons(0xffff);
+
+       /* Set up FC header information */
+       task_fc_hdr.parameter = fc_hdr->fh_parm_offset;
+       task_fc_hdr.r_ctl = fc_hdr->fh_r_ctl;
+       task_fc_hdr.type = fc_hdr->fh_type;
+       task_fc_hdr.cs_ctl = fc_hdr->fh_cs_ctl;
+       task_fc_hdr.df_ctl = fc_hdr->fh_df_ctl;
+       task_fc_hdr.rx_id = fc_hdr->fh_rx_id;
+       task_fc_hdr.ox_id = fc_hdr->fh_ox_id;
+
+       /* Set up s/g list parameters for request buffer */
+       tx_sgl_task_params.sgl = mp_req->mp_req_bd;
+       tx_sgl_task_params.sgl_phys_addr.lo = U64_LO(mp_req->mp_req_bd_dma);
+       tx_sgl_task_params.sgl_phys_addr.hi = U64_HI(mp_req->mp_req_bd_dma);
+       tx_sgl_task_params.num_sges = 1;
+       /* Set PAGE_SIZE for now since sg element is that size ??? */
+       tx_sgl_task_params.total_buffer_size = io_req->data_xfer_len;
+       tx_sgl_task_params.small_mid_sge = 0;
+
+       /* Set up s/g list parameters for request buffer */
+       rx_sgl_task_params.sgl = mp_req->mp_resp_bd;
+       rx_sgl_task_params.sgl_phys_addr.lo = U64_LO(mp_req->mp_resp_bd_dma);
+       rx_sgl_task_params.sgl_phys_addr.hi = U64_HI(mp_req->mp_resp_bd_dma);
+       rx_sgl_task_params.num_sges = 1;
+       /* Set PAGE_SIZE for now since sg element is that size ??? */
+       rx_sgl_task_params.total_buffer_size = PAGE_SIZE;
+       rx_sgl_task_params.small_mid_sge = 0;
 
-               /* Fill FC Header into middle path buffer */
-               task_fc_hdr->parameter = fc_hdr->fh_parm_offset;
-               task_fc_hdr->r_ctl = fc_hdr->fh_r_ctl;
-               task_fc_hdr->type = fc_hdr->fh_type;
-               task_fc_hdr->cs_ctl = fc_hdr->fh_cs_ctl;
-               task_fc_hdr->df_ctl = fc_hdr->fh_df_ctl;
-               task_fc_hdr->rx_id = fc_hdr->fh_rx_id;
-               task_fc_hdr->ox_id = fc_hdr->fh_ox_id;
-
-               task_ctx->ystorm_st_context.data_2_trns_rem =
-                   io_req->data_xfer_len;
-               task_ctx->ystorm_st_context.task_type = task_type;
-       }
-
-       /* TSTORM ONLY */
-       {
-               task_ctx->tstorm_ag_context.icid = (u16)fcport->fw_cid;
-               task_ctx->tstorm_st_context.read_only.cid = fcport->fw_cid;
-               /* Always send middle-path repsonses on CQ #0 */
-               task_ctx->tstorm_st_context.read_only.glbl_q_num = 0;
-               io_req->fp_idx = 0;
-               SET_FIELD(task_ctx->tstorm_ag_context.flags0,
-                   TSTORM_FCOE_TASK_AG_CTX_CONNECTION_TYPE,
-                   PROTOCOLID_FCOE);
-               task_ctx->tstorm_st_context.read_only.task_type = task_type;
-               SET_FIELD(task_ctx->tstorm_st_context.read_write.flags,
-                   FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_EXP_FIRST_FRAME,
-                   1);
-               task_ctx->tstorm_st_context.read_write.rx_id = 0xffff;
-       }
-
-       /* MSTORM only */
-       {
-               if (task_type == FCOE_TASK_TYPE_MIDPATH) {
-                       /* Initialize task context */
-                       data_desc = &task_ctx->mstorm_st_context.fp.data_desc;
-
-                       /* Set cache sges address and length */
-                       data_desc->slow.base_sgl_addr.lo =
-                           U64_LO(mp_req->mp_resp_bd_dma);
-                       data_desc->slow.base_sgl_addr.hi =
-                           U64_HI(mp_req->mp_resp_bd_dma);
-                       data_desc->slow.remainder_num_sges = 1;
-                       data_desc->slow.curr_sge_off = 0;
-                       data_desc->slow.curr_sgl_index = 0;
 
-                       /*
-                        * Also need to fil in non-fastpath response address
-                        * for middle path commands.
-                        */
-                       task_ctx->mstorm_st_context.non_fp.rsp_buf_addr.lo =
-                           U64_LO(mp_req->mp_resp_bd_dma);
-                       task_ctx->mstorm_st_context.non_fp.rsp_buf_addr.hi =
-                           U64_HI(mp_req->mp_resp_bd_dma);
-               }
-       }
-
-       /* USTORM ONLY */
-       {
-               task_ctx->ustorm_ag_context.global_cq_num = 0;
-       }
+       /*
+        * Last arg is 0 as previous code did not set that we wanted the
+        * fc header information.
+        */
+       init_initiator_midpath_unsolicited_fcoe_task(io_req->task_params,
+                                                    &task_fc_hdr,
+                                                    &tx_sgl_task_params,
+                                                    &rx_sgl_task_params, 0);
 
-       /* I/O stats. Middle path commands always use slow SGEs */
-       qedf->slow_sge_ios++;
-       io_req->sge_type = QEDF_IOREQ_SLOW_SGE;
+       /* Midpath requests always consume 1 SGE */
+       qedf->single_sge_ios++;
 }
 
-void qedf_add_to_sq(struct qedf_rport *fcport, u16 xid, u32 ptu_invalidate,
-       enum fcoe_task_type req_type, u32 offset)
+/* Presumed that fcport->rport_lock is held */
+u16 qedf_get_sqe_idx(struct qedf_rport *fcport)
 {
-       struct fcoe_wqe *sqe;
        uint16_t total_sqe = (fcport->sq_mem_size)/(sizeof(struct fcoe_wqe));
+       u16 rval;
 
-       sqe = &fcport->sq[fcport->sq_prod_idx];
+       rval = fcport->sq_prod_idx;
 
+       /* Adjust ring index */
        fcport->sq_prod_idx++;
        fcport->fw_sq_prod_idx++;
        if (fcport->sq_prod_idx == total_sqe)
                fcport->sq_prod_idx = 0;
 
-       switch (req_type) {
-       case FCOE_TASK_TYPE_WRITE_INITIATOR:
-       case FCOE_TASK_TYPE_READ_INITIATOR:
-               SET_FIELD(sqe->flags, FCOE_WQE_REQ_TYPE, SEND_FCOE_CMD);
-               if (ptu_invalidate)
-                       SET_FIELD(sqe->flags, FCOE_WQE_INVALIDATE_PTU, 1);
-               break;
-       case FCOE_TASK_TYPE_MIDPATH:
-               SET_FIELD(sqe->flags, FCOE_WQE_REQ_TYPE, SEND_FCOE_MIDPATH);
-               break;
-       case FCOE_TASK_TYPE_ABTS:
-               SET_FIELD(sqe->flags, FCOE_WQE_REQ_TYPE,
-                   SEND_FCOE_ABTS_REQUEST);
-               break;
-       case FCOE_TASK_TYPE_EXCHANGE_CLEANUP:
-               SET_FIELD(sqe->flags, FCOE_WQE_REQ_TYPE,
-                    FCOE_EXCHANGE_CLEANUP);
-               break;
-       case FCOE_TASK_TYPE_SEQUENCE_CLEANUP:
-               SET_FIELD(sqe->flags, FCOE_WQE_REQ_TYPE,
-                   FCOE_SEQUENCE_RECOVERY);
-               /* NOTE: offset param only used for sequence recovery */
-               sqe->additional_info_union.seq_rec_updated_offset = offset;
-               break;
-       case FCOE_TASK_TYPE_UNSOLICITED:
-               break;
-       default:
-               break;
-       }
-
-       sqe->task_id = xid;
-
-       /* Make sure SQ data is coherent */
-       wmb();
-
+       return rval;
 }
 
 void qedf_ring_doorbell(struct qedf_rport *fcport)
@@ -1029,7 +847,8 @@ int qedf_post_io_req(struct qedf_rport *fcport, struct qedf_ioreq *io_req)
        struct fcoe_task_context *task_ctx;
        u16 xid;
        enum fcoe_task_type req_type = 0;
-       u32 ptu_invalidate = 0;
+       struct fcoe_wqe *sqe;
+       u16 sqe_idx;
 
        /* Initialize rest of io_req fileds */
        io_req->data_xfer_len = scsi_bufflen(sc_cmd);
@@ -1061,6 +880,16 @@ int qedf_post_io_req(struct qedf_rport *fcport, struct qedf_ioreq *io_req)
                return -EAGAIN;
        }
 
+       if (!test_bit(QEDF_RPORT_SESSION_READY, &fcport->flags)) {
+               QEDF_ERR(&(qedf->dbg_ctx), "Session not offloaded yet.\n");
+               kref_put(&io_req->refcount, qedf_release_cmd);
+       }
+
+       /* Obtain free SQE */
+       sqe_idx = qedf_get_sqe_idx(fcport);
+       sqe = &fcport->sq[sqe_idx];
+       memset(sqe, 0, sizeof(struct fcoe_wqe));
+
        /* Get the task context */
        task_ctx = qedf_get_task_mem(&qedf->tasks, xid);
        if (!task_ctx) {
@@ -1070,15 +899,7 @@ int qedf_post_io_req(struct qedf_rport *fcport, struct qedf_ioreq *io_req)
                return -EINVAL;
        }
 
-       qedf_init_task(fcport, lport, io_req, &ptu_invalidate, task_ctx);
-
-       if (!test_bit(QEDF_RPORT_SESSION_READY, &fcport->flags)) {
-               QEDF_ERR(&(qedf->dbg_ctx), "Session not offloaded yet.\n");
-               kref_put(&io_req->refcount, qedf_release_cmd);
-       }
-
-       /* Obtain free SQ entry */
-       qedf_add_to_sq(fcport, xid, ptu_invalidate, req_type, 0);
+       qedf_init_task(fcport, lport, io_req, task_ctx, sqe);
 
        /* Ring doorbell */
        qedf_ring_doorbell(fcport);
@@ -1661,6 +1482,8 @@ int qedf_initiate_abts(struct qedf_ioreq *io_req, bool return_scsi_cmd_on_abts)
        u32 r_a_tov = 0;
        int rc = 0;
        unsigned long flags;
+       struct fcoe_wqe *sqe;
+       u16 sqe_idx;
 
        r_a_tov = rdata->r_a_tov;
        lport = qedf->lport;
@@ -1712,10 +1535,12 @@ int qedf_initiate_abts(struct qedf_ioreq *io_req, bool return_scsi_cmd_on_abts)
 
        spin_lock_irqsave(&fcport->rport_lock, flags);
 
-       /* Add ABTS to send queue */
-       qedf_add_to_sq(fcport, xid, 0, FCOE_TASK_TYPE_ABTS, 0);
+       sqe_idx = qedf_get_sqe_idx(fcport);
+       sqe = &fcport->sq[sqe_idx];
+       memset(sqe, 0, sizeof(struct fcoe_wqe));
+       io_req->task_params->sqe = sqe;
 
-       /* Ring doorbell */
+       init_initiator_abort_fcoe_task(io_req->task_params);
        qedf_ring_doorbell(fcport);
 
        spin_unlock_irqrestore(&fcport->rport_lock, flags);
@@ -1784,8 +1609,8 @@ void qedf_process_abts_compl(struct qedf_ctx *qedf, struct fcoe_cqe *cqe,
 int qedf_init_mp_req(struct qedf_ioreq *io_req)
 {
        struct qedf_mp_req *mp_req;
-       struct fcoe_sge *mp_req_bd;
-       struct fcoe_sge *mp_resp_bd;
+       struct scsi_sge *mp_req_bd;
+       struct scsi_sge *mp_resp_bd;
        struct qedf_ctx *qedf = io_req->fcport->qedf;
        dma_addr_t addr;
        uint64_t sz;
@@ -1819,7 +1644,7 @@ int qedf_init_mp_req(struct qedf_ioreq *io_req)
        }
 
        /* Allocate and map mp_req_bd and mp_resp_bd */
-       sz = sizeof(struct fcoe_sge);
+       sz = sizeof(struct scsi_sge);
        mp_req->mp_req_bd = dma_alloc_coherent(&qedf->pdev->dev, sz,
            &mp_req->mp_req_bd_dma, GFP_KERNEL);
        if (!mp_req->mp_req_bd) {
@@ -1841,7 +1666,7 @@ int qedf_init_mp_req(struct qedf_ioreq *io_req)
        mp_req_bd = mp_req->mp_req_bd;
        mp_req_bd->sge_addr.lo = U64_LO(addr);
        mp_req_bd->sge_addr.hi = U64_HI(addr);
-       mp_req_bd->size = QEDF_PAGE_SIZE;
+       mp_req_bd->sge_len = QEDF_PAGE_SIZE;
 
        /*
         * MP buffer is either a task mgmt command or an ELS.
@@ -1852,7 +1677,7 @@ int qedf_init_mp_req(struct qedf_ioreq *io_req)
        addr = mp_req->resp_buf_dma;
        mp_resp_bd->sge_addr.lo = U64_LO(addr);
        mp_resp_bd->sge_addr.hi = U64_HI(addr);
-       mp_resp_bd->size = QEDF_PAGE_SIZE;
+       mp_resp_bd->sge_len = QEDF_PAGE_SIZE;
 
        return 0;
 }
@@ -1895,6 +1720,8 @@ int qedf_initiate_cleanup(struct qedf_ioreq *io_req,
        int tmo = 0;
        int rc = SUCCESS;
        unsigned long flags;
+       struct fcoe_wqe *sqe;
+       u16 sqe_idx;
 
        fcport = io_req->fcport;
        if (!fcport) {
@@ -1940,12 +1767,16 @@ int qedf_initiate_cleanup(struct qedf_ioreq *io_req,
 
        init_completion(&io_req->tm_done);
 
-       /* Obtain free SQ entry */
        spin_lock_irqsave(&fcport->rport_lock, flags);
-       qedf_add_to_sq(fcport, xid, 0, FCOE_TASK_TYPE_EXCHANGE_CLEANUP, 0);
 
-       /* Ring doorbell */
+       sqe_idx = qedf_get_sqe_idx(fcport);
+       sqe = &fcport->sq[sqe_idx];
+       memset(sqe, 0, sizeof(struct fcoe_wqe));
+       io_req->task_params->sqe = sqe;
+
+       init_initiator_cleanup_fcoe_task(io_req->task_params);
        qedf_ring_doorbell(fcport);
+
        spin_unlock_irqrestore(&fcport->rport_lock, flags);
 
        tmo = wait_for_completion_timeout(&io_req->tm_done,
@@ -1991,16 +1822,15 @@ static int qedf_execute_tmf(struct qedf_rport *fcport, struct scsi_cmnd *sc_cmd,
        uint8_t tm_flags)
 {
        struct qedf_ioreq *io_req;
-       struct qedf_mp_req *tm_req;
        struct fcoe_task_context *task;
-       struct fc_frame_header *fc_hdr;
-       struct fcp_cmnd *fcp_cmnd;
        struct qedf_ctx *qedf = fcport->qedf;
+       struct fc_lport *lport = qedf->lport;
        int rc = 0;
        uint16_t xid;
-       uint32_t sid, did;
        int tmo = 0;
        unsigned long flags;
+       struct fcoe_wqe *sqe;
+       u16 sqe_idx;
 
        if (!sc_cmd) {
                QEDF_ERR(&(qedf->dbg_ctx), "invalid arg\n");
@@ -2031,36 +1861,14 @@ static int qedf_execute_tmf(struct qedf_rport *fcport, struct scsi_cmnd *sc_cmd,
        /* Set the return CPU to be the same as the request one */
        io_req->cpu = smp_processor_id();
 
-       tm_req = (struct qedf_mp_req *)&(io_req->mp_req);
-
-       rc = qedf_init_mp_req(io_req);
-       if (rc == FAILED) {
-               QEDF_ERR(&(qedf->dbg_ctx), "Task mgmt MP request init "
-                         "failed\n");
-               kref_put(&io_req->refcount, qedf_release_cmd);
-               goto reset_tmf_err;
-       }
-
        /* Set TM flags */
-       io_req->io_req_flags = 0;
-       tm_req->tm_flags = tm_flags;
+       io_req->io_req_flags = QEDF_READ;
+       io_req->data_xfer_len = 0;
+       io_req->tm_flags = tm_flags;
 
        /* Default is to return a SCSI command when an error occurs */
        io_req->return_scsi_cmd_on_abts = true;
 
-       /* Fill FCP_CMND */
-       qedf_build_fcp_cmnd(io_req, (struct fcp_cmnd *)tm_req->req_buf);
-       fcp_cmnd = (struct fcp_cmnd *)tm_req->req_buf;
-       memset(fcp_cmnd->fc_cdb, 0, FCP_CMND_LEN);
-       fcp_cmnd->fc_dl = 0;
-
-       /* Fill FC header */
-       fc_hdr = &(tm_req->req_fc_hdr);
-       sid = fcport->sid;
-       did = fcport->rdata->ids.port_id;
-       __fc_fill_fc_hdr(fc_hdr, FC_RCTL_DD_UNSOL_CMD, sid, did,
-                          FC_TYPE_FCP, FC_FC_FIRST_SEQ | FC_FC_END_SEQ |
-                          FC_FC_SEQ_INIT, 0);
        /* Obtain exchange id */
        xid = io_req->xid;
 
@@ -2069,16 +1877,18 @@ static int qedf_execute_tmf(struct qedf_rport *fcport, struct scsi_cmnd *sc_cmd,
 
        /* Initialize task context for this IO request */
        task = qedf_get_task_mem(&qedf->tasks, xid);
-       qedf_init_mp_task(io_req, task);
 
        init_completion(&io_req->tm_done);
 
-       /* Obtain free SQ entry */
        spin_lock_irqsave(&fcport->rport_lock, flags);
-       qedf_add_to_sq(fcport, xid, 0, FCOE_TASK_TYPE_MIDPATH, 0);
 
-       /* Ring doorbell */
+       sqe_idx = qedf_get_sqe_idx(fcport);
+       sqe = &fcport->sq[sqe_idx];
+       memset(sqe, 0, sizeof(struct fcoe_wqe));
+
+       qedf_init_task(fcport, lport, io_req, task, sqe);
        qedf_ring_doorbell(fcport);
+
        spin_unlock_irqrestore(&fcport->rport_lock, flags);
 
        tmo = wait_for_completion_timeout(&io_req->tm_done,
@@ -2162,14 +1972,6 @@ void qedf_process_tmf_compl(struct qedf_ctx *qedf, struct fcoe_cqe *cqe,
        struct qedf_ioreq *io_req)
 {
        struct fcoe_cqe_rsp_info *fcp_rsp;
-       struct fcoe_cqe_midpath_info *mp_info;
-
-
-       /* Get TMF response length from CQE */
-       mp_info = &cqe->cqe_info.midpath_info;
-       io_req->mp_req.resp_len = mp_info->data_placement_size;
-       QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_SCSI_TM,
-           "Response len is %d.\n", io_req->mp_req.resp_len);
 
        fcp_rsp = &cqe->cqe_info.rsp_info;
        qedf_parse_fcp_rsp(io_req, fcp_rsp);
index 2b3e16b24299ee94bf752d361d7b479c7958c385..90a6925577cca6d5ea147f3028f645af844b91b6 100644 (file)
@@ -1,5 +1,5 @@
 obj-$(CONFIG_QEDI) := qedi.o
 qedi-y := qedi_main.o qedi_iscsi.o qedi_fw.o qedi_sysfs.o \
-           qedi_dbg.o
+           qedi_dbg.o qedi_fw_api.o
 
 qedi-$(CONFIG_DEBUG_FS) += qedi_debugfs.o
index 2bce3efc66a4b4bda8bae40768ca3e961a6d33b0..d6978cbc56f0586aa8a075191433184c50c93b01 100644 (file)
@@ -14,6 +14,8 @@
 #include "qedi.h"
 #include "qedi_iscsi.h"
 #include "qedi_gbl.h"
+#include "qedi_fw_iscsi.h"
+#include "qedi_fw_scsi.h"
 
 static int qedi_send_iscsi_tmf(struct qedi_conn *qedi_conn,
                               struct iscsi_task *mtask);
@@ -53,8 +55,8 @@ static void qedi_process_logout_resp(struct qedi_ctx *qedi,
        resp_hdr->exp_cmdsn = cpu_to_be32(cqe_logout_response->exp_cmd_sn);
        resp_hdr->max_cmdsn = cpu_to_be32(cqe_logout_response->max_cmd_sn);
 
-       resp_hdr->t2wait = cpu_to_be32(cqe_logout_response->time2wait);
-       resp_hdr->t2retain = cpu_to_be32(cqe_logout_response->time2retain);
+       resp_hdr->t2wait = cpu_to_be32(cqe_logout_response->time_2_wait);
+       resp_hdr->t2retain = cpu_to_be32(cqe_logout_response->time_2_retain);
 
        QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_TID,
                  "Freeing tid=0x%x for cid=0x%x\n",
@@ -975,81 +977,6 @@ exit_fp_process:
        return;
 }
 
-static void qedi_add_to_sq(struct qedi_conn *qedi_conn, struct iscsi_task *task,
-                          u16 tid, uint16_t ptu_invalidate, int is_cleanup)
-{
-       struct iscsi_wqe *wqe;
-       struct iscsi_wqe_field *cont_field;
-       struct qedi_endpoint *ep;
-       struct scsi_cmnd *sc = task->sc;
-       struct iscsi_login_req *login_hdr;
-       struct qedi_cmd *cmd = task->dd_data;
-
-       login_hdr = (struct iscsi_login_req *)task->hdr;
-       ep = qedi_conn->ep;
-       wqe = &ep->sq[ep->sq_prod_idx];
-
-       memset(wqe, 0, sizeof(*wqe));
-
-       ep->sq_prod_idx++;
-       ep->fw_sq_prod_idx++;
-       if (ep->sq_prod_idx == QEDI_SQ_SIZE)
-               ep->sq_prod_idx = 0;
-
-       if (is_cleanup) {
-               SET_FIELD(wqe->flags, ISCSI_WQE_WQE_TYPE,
-                         ISCSI_WQE_TYPE_TASK_CLEANUP);
-               wqe->task_id = tid;
-               return;
-       }
-
-       if (ptu_invalidate) {
-               SET_FIELD(wqe->flags, ISCSI_WQE_PTU_INVALIDATE,
-                         ISCSI_WQE_SET_PTU_INVALIDATE);
-       }
-
-       cont_field = &wqe->cont_prevtid_union.cont_field;
-
-       switch (task->hdr->opcode & ISCSI_OPCODE_MASK) {
-       case ISCSI_OP_LOGIN:
-       case ISCSI_OP_TEXT:
-               SET_FIELD(wqe->flags, ISCSI_WQE_WQE_TYPE,
-                         ISCSI_WQE_TYPE_MIDDLE_PATH);
-               SET_FIELD(wqe->flags, ISCSI_WQE_NUM_FAST_SGES,
-                         1);
-               cont_field->contlen_cdbsize_field = ntoh24(login_hdr->dlength);
-               break;
-       case ISCSI_OP_LOGOUT:
-       case ISCSI_OP_NOOP_OUT:
-       case ISCSI_OP_SCSI_TMFUNC:
-                SET_FIELD(wqe->flags, ISCSI_WQE_WQE_TYPE,
-                          ISCSI_WQE_TYPE_NORMAL);
-               break;
-       default:
-               if (!sc)
-                       break;
-
-               SET_FIELD(wqe->flags, ISCSI_WQE_WQE_TYPE,
-                         ISCSI_WQE_TYPE_NORMAL);
-               cont_field->contlen_cdbsize_field =
-                               (sc->sc_data_direction == DMA_TO_DEVICE) ?
-                               scsi_bufflen(sc) : 0;
-               if (cmd->use_slowpath)
-                       SET_FIELD(wqe->flags, ISCSI_WQE_NUM_FAST_SGES, 0);
-               else
-                       SET_FIELD(wqe->flags, ISCSI_WQE_NUM_FAST_SGES,
-                                 (sc->sc_data_direction ==
-                                  DMA_TO_DEVICE) ?
-                                 min((u16)QEDI_FAST_SGE_COUNT,
-                                     (u16)cmd->io_tbl.sge_valid) : 0);
-               break;
-       }
-
-       wqe->task_id = tid;
-       /* Make sure SQ data is coherent */
-       wmb();
-}
-
 static void qedi_ring_doorbell(struct qedi_conn *qedi_conn)
 {
        struct iscsi_db_data dbell = { 0 };
@@ -1076,96 +1003,116 @@ static void qedi_ring_doorbell(struct qedi_conn *qedi_conn)
                  qedi_conn->iscsi_conn_id);
 }
 
+static u16 qedi_get_wqe_idx(struct qedi_conn *qedi_conn)
+{
+       struct qedi_endpoint *ep;
+       u16 rval;
+
+       ep = qedi_conn->ep;
+       rval = ep->sq_prod_idx;
+
+       /* Increament SQ index */
+       ep->sq_prod_idx++;
+       ep->fw_sq_prod_idx++;
+       if (ep->sq_prod_idx == QEDI_SQ_SIZE)
+               ep->sq_prod_idx = 0;
+
+       return rval;
+}
+
 int qedi_send_iscsi_login(struct qedi_conn *qedi_conn,
                          struct iscsi_task *task)
 {
-       struct qedi_ctx *qedi = qedi_conn->qedi;
+       struct iscsi_login_req_hdr login_req_pdu_header;
+       struct scsi_sgl_task_params tx_sgl_task_params;
+       struct scsi_sgl_task_params rx_sgl_task_params;
+       struct iscsi_task_params task_params;
        struct iscsi_task_context *fw_task_ctx;
+       struct qedi_ctx *qedi = qedi_conn->qedi;
        struct iscsi_login_req *login_hdr;
-       struct iscsi_login_req_hdr *fw_login_req = NULL;
-       struct iscsi_cached_sge_ctx *cached_sge = NULL;
-       struct iscsi_sge *single_sge = NULL;
-       struct iscsi_sge *req_sge = NULL;
-       struct iscsi_sge *resp_sge = NULL;
+       struct scsi_sge *req_sge = NULL;
+       struct scsi_sge *resp_sge = NULL;
        struct qedi_cmd *qedi_cmd;
-       s16 ptu_invalidate = 0;
+       struct qedi_endpoint *ep;
        s16 tid = 0;
+       u16 sq_idx = 0;
+       int rval = 0;
 
-       req_sge = (struct iscsi_sge *)qedi_conn->gen_pdu.req_bd_tbl;
-       resp_sge = (struct iscsi_sge *)qedi_conn->gen_pdu.resp_bd_tbl;
+       req_sge = (struct scsi_sge *)qedi_conn->gen_pdu.req_bd_tbl;
+       resp_sge = (struct scsi_sge *)qedi_conn->gen_pdu.resp_bd_tbl;
        qedi_cmd = (struct qedi_cmd *)task->dd_data;
+       ep = qedi_conn->ep;
        login_hdr = (struct iscsi_login_req *)task->hdr;
 
        tid = qedi_get_task_idx(qedi);
        if (tid == -1)
                return -ENOMEM;
 
-       fw_task_ctx = qedi_get_task_mem(&qedi->tasks, tid);
+       fw_task_ctx =
+            (struct iscsi_task_context *)qedi_get_task_mem(&qedi->tasks, tid);
        memset(fw_task_ctx, 0, sizeof(struct iscsi_task_context));
 
        qedi_cmd->task_id = tid;
 
-       /* Ystorm context */
-       fw_login_req = &fw_task_ctx->ystorm_st_context.pdu_hdr.login_req;
-       fw_login_req->opcode = login_hdr->opcode;
-       fw_login_req->version_min = login_hdr->min_version;
-       fw_login_req->version_max = login_hdr->max_version;
-       fw_login_req->flags_attr = login_hdr->flags;
-       fw_login_req->isid_tabc = *((u16 *)login_hdr->isid + 2);
-       fw_login_req->isid_d = *((u32 *)login_hdr->isid);
-       fw_login_req->tsih = login_hdr->tsih;
-       qedi_update_itt_map(qedi, tid, task->itt, qedi_cmd);
-       fw_login_req->itt = qedi_set_itt(tid, get_itt(task->itt));
-       fw_login_req->cid = qedi_conn->iscsi_conn_id;
-       fw_login_req->cmd_sn = be32_to_cpu(login_hdr->cmdsn);
-       fw_login_req->exp_stat_sn = be32_to_cpu(login_hdr->exp_statsn);
-       fw_login_req->exp_stat_sn = 0;
-
-       if (qedi->tid_reuse_count[tid] == QEDI_MAX_TASK_NUM) {
-               ptu_invalidate = 1;
-               qedi->tid_reuse_count[tid] = 0;
-       }
+       memset(&task_params, 0, sizeof(task_params));
+       memset(&login_req_pdu_header, 0, sizeof(login_req_pdu_header));
+       memset(&tx_sgl_task_params, 0, sizeof(tx_sgl_task_params));
+       memset(&rx_sgl_task_params, 0, sizeof(rx_sgl_task_params));
+       /* Update header info */
+       login_req_pdu_header.opcode = login_hdr->opcode;
+       login_req_pdu_header.version_min = login_hdr->min_version;
+       login_req_pdu_header.version_max = login_hdr->max_version;
+       login_req_pdu_header.flags_attr = login_hdr->flags;
+       login_req_pdu_header.isid_tabc = swab32p((u32 *)login_hdr->isid);
+       login_req_pdu_header.isid_d = swab16p((u16 *)&login_hdr->isid[4]);
+
+       login_req_pdu_header.tsih = login_hdr->tsih;
+       login_req_pdu_header.hdr_second_dword = ntoh24(login_hdr->dlength);
 
-       fw_task_ctx->ystorm_st_context.state.reuse_count =
-                                               qedi->tid_reuse_count[tid];
-       fw_task_ctx->mstorm_st_context.reuse_count =
-                                               qedi->tid_reuse_count[tid]++;
-       cached_sge =
-              &fw_task_ctx->ystorm_st_context.state.sgl_ctx_union.cached_sge;
-       cached_sge->sge.sge_len = req_sge->sge_len;
-       cached_sge->sge.sge_addr.lo = (u32)(qedi_conn->gen_pdu.req_dma_addr);
-       cached_sge->sge.sge_addr.hi =
-                            (u32)((u64)qedi_conn->gen_pdu.req_dma_addr >> 32);
-
-       /* Mstorm context */
-       single_sge = &fw_task_ctx->mstorm_st_context.sgl_union.single_sge;
-       fw_task_ctx->mstorm_st_context.task_type = 0x2;
-       fw_task_ctx->mstorm_ag_context.task_cid = (u16)qedi_conn->iscsi_conn_id;
-       single_sge->sge_addr.lo = resp_sge->sge_addr.lo;
-       single_sge->sge_addr.hi = resp_sge->sge_addr.hi;
-       single_sge->sge_len = resp_sge->sge_len;
-
-       SET_FIELD(fw_task_ctx->mstorm_st_context.flags.mflags,
-                 ISCSI_MFLAGS_SINGLE_SGE, 1);
-       SET_FIELD(fw_task_ctx->mstorm_st_context.flags.mflags,
-                 ISCSI_MFLAGS_SLOW_IO, 0);
-       fw_task_ctx->mstorm_st_context.sgl_size = 1;
-       fw_task_ctx->mstorm_st_context.rem_task_size = resp_sge->sge_len;
-
-       /* Ustorm context */
-       fw_task_ctx->ustorm_st_context.rem_rcv_len = resp_sge->sge_len;
-       fw_task_ctx->ustorm_st_context.exp_data_transfer_len =
-                                               ntoh24(login_hdr->dlength);
-       fw_task_ctx->ustorm_st_context.exp_data_sn = 0;
-       fw_task_ctx->ustorm_st_context.cq_rss_number = 0;
-       fw_task_ctx->ustorm_st_context.task_type = 0x2;
-       fw_task_ctx->ustorm_ag_context.icid = (u16)qedi_conn->iscsi_conn_id;
-       fw_task_ctx->ustorm_ag_context.exp_data_acked =
-                                                ntoh24(login_hdr->dlength);
-       SET_FIELD(fw_task_ctx->ustorm_ag_context.flags1,
-                 USTORM_ISCSI_TASK_AG_CTX_R2T2RECV, 1);
-       SET_FIELD(fw_task_ctx->ustorm_st_context.flags,
-                 USTORM_ISCSI_TASK_ST_CTX_LOCAL_COMP, 0);
+       qedi_update_itt_map(qedi, tid, task->itt, qedi_cmd);
+       login_req_pdu_header.itt = qedi_set_itt(tid, get_itt(task->itt));
+       login_req_pdu_header.cid = qedi_conn->iscsi_conn_id;
+       login_req_pdu_header.cmd_sn = be32_to_cpu(login_hdr->cmdsn);
+       login_req_pdu_header.exp_stat_sn = be32_to_cpu(login_hdr->exp_statsn);
+       login_req_pdu_header.exp_stat_sn = 0;
+
+       /* Fill tx AHS and rx buffer */
+       tx_sgl_task_params.sgl =
+                              (struct scsi_sge *)qedi_conn->gen_pdu.req_bd_tbl;
+       tx_sgl_task_params.sgl_phys_addr.lo =
+                                        (u32)(qedi_conn->gen_pdu.req_dma_addr);
+       tx_sgl_task_params.sgl_phys_addr.hi =
+                             (u32)((u64)qedi_conn->gen_pdu.req_dma_addr >> 32);
+       tx_sgl_task_params.total_buffer_size = ntoh24(login_hdr->dlength);
+       tx_sgl_task_params.num_sges = 1;
+
+       rx_sgl_task_params.sgl =
+                             (struct scsi_sge *)qedi_conn->gen_pdu.resp_bd_tbl;
+       rx_sgl_task_params.sgl_phys_addr.lo =
+                                       (u32)(qedi_conn->gen_pdu.resp_dma_addr);
+       rx_sgl_task_params.sgl_phys_addr.hi =
+                            (u32)((u64)qedi_conn->gen_pdu.resp_dma_addr >> 32);
+       rx_sgl_task_params.total_buffer_size = resp_sge->sge_len;
+       rx_sgl_task_params.num_sges = 1;
+
+       /* Fill fw input params */
+       task_params.context = fw_task_ctx;
+       task_params.conn_icid = (u16)qedi_conn->iscsi_conn_id;
+       task_params.itid = tid;
+       task_params.cq_rss_number = 0;
+       task_params.tx_io_size = ntoh24(login_hdr->dlength);
+       task_params.rx_io_size = resp_sge->sge_len;
+
+       sq_idx = qedi_get_wqe_idx(qedi_conn);
+       task_params.sqe = &ep->sq[sq_idx];
+
+       memset(task_params.sqe, 0, sizeof(struct iscsi_wqe));
+       rval = init_initiator_login_request_task(&task_params,
+                                                &login_req_pdu_header,
+                                                &tx_sgl_task_params,
+                                                &rx_sgl_task_params);
+       if (rval)
+               return -1;
 
        spin_lock(&qedi_conn->list_lock);
        list_add_tail(&qedi_cmd->io_cmd, &qedi_conn->active_cmd_list);
@@ -1173,7 +1120,6 @@ int qedi_send_iscsi_login(struct qedi_conn *qedi_conn,
        qedi_conn->active_cmd_count++;
        spin_unlock(&qedi_conn->list_lock);
 
-       qedi_add_to_sq(qedi_conn, task, tid, ptu_invalidate, false);
        qedi_ring_doorbell(qedi_conn);
        return 0;
 }
@@ -1181,65 +1127,64 @@ int qedi_send_iscsi_login(struct qedi_conn *qedi_conn,
 int qedi_send_iscsi_logout(struct qedi_conn *qedi_conn,
                           struct iscsi_task *task)
 {
-       struct qedi_ctx *qedi = qedi_conn->qedi;
-       struct iscsi_logout_req_hdr *fw_logout_req = NULL;
-       struct iscsi_task_context *fw_task_ctx = NULL;
+       struct iscsi_logout_req_hdr logout_pdu_header;
+       struct scsi_sgl_task_params tx_sgl_task_params;
+       struct scsi_sgl_task_params rx_sgl_task_params;
+       struct iscsi_task_params task_params;
+       struct iscsi_task_context *fw_task_ctx;
        struct iscsi_logout *logout_hdr = NULL;
-       struct qedi_cmd *qedi_cmd = NULL;
-       s16  tid = 0;
-       s16 ptu_invalidate = 0;
+       struct qedi_ctx *qedi = qedi_conn->qedi;
+       struct qedi_cmd *qedi_cmd;
+       struct qedi_endpoint *ep;
+       s16 tid = 0;
+       u16 sq_idx = 0;
+       int rval = 0;
 
        qedi_cmd = (struct qedi_cmd *)task->dd_data;
        logout_hdr = (struct iscsi_logout *)task->hdr;
+       ep = qedi_conn->ep;
 
        tid = qedi_get_task_idx(qedi);
        if (tid == -1)
                return -ENOMEM;
 
-       fw_task_ctx = qedi_get_task_mem(&qedi->tasks, tid);
-
+       fw_task_ctx =
+            (struct iscsi_task_context *)qedi_get_task_mem(&qedi->tasks, tid);
        memset(fw_task_ctx, 0, sizeof(struct iscsi_task_context));
+
        qedi_cmd->task_id = tid;
 
-       /* Ystorm context */
-       fw_logout_req = &fw_task_ctx->ystorm_st_context.pdu_hdr.logout_req;
-       fw_logout_req->opcode = ISCSI_OPCODE_LOGOUT_REQUEST;
-       fw_logout_req->reason_code = 0x80 | logout_hdr->flags;
-       qedi_update_itt_map(qedi, tid, task->itt, qedi_cmd);
-       fw_logout_req->itt = qedi_set_itt(tid, get_itt(task->itt));
-       fw_logout_req->exp_stat_sn = be32_to_cpu(logout_hdr->exp_statsn);
-       fw_logout_req->cmd_sn = be32_to_cpu(logout_hdr->cmdsn);
+       memset(&task_params, 0, sizeof(task_params));
+       memset(&logout_pdu_header, 0, sizeof(logout_pdu_header));
+       memset(&tx_sgl_task_params, 0, sizeof(tx_sgl_task_params));
+       memset(&rx_sgl_task_params, 0, sizeof(rx_sgl_task_params));
 
-       if (qedi->tid_reuse_count[tid] == QEDI_MAX_TASK_NUM) {
-               ptu_invalidate = 1;
-               qedi->tid_reuse_count[tid] = 0;
-       }
-       fw_task_ctx->ystorm_st_context.state.reuse_count =
-                                                 qedi->tid_reuse_count[tid];
-       fw_task_ctx->mstorm_st_context.reuse_count =
-                                               qedi->tid_reuse_count[tid]++;
-       fw_logout_req->cid = qedi_conn->iscsi_conn_id;
-       fw_task_ctx->ystorm_st_context.state.buffer_offset[0] = 0;
-
-       /* Mstorm context */
-       fw_task_ctx->mstorm_st_context.task_type = ISCSI_TASK_TYPE_MIDPATH;
-       fw_task_ctx->mstorm_ag_context.task_cid = (u16)qedi_conn->iscsi_conn_id;
-
-       /* Ustorm context */
-       fw_task_ctx->ustorm_st_context.rem_rcv_len = 0;
-       fw_task_ctx->ustorm_st_context.exp_data_transfer_len = 0;
-       fw_task_ctx->ustorm_st_context.exp_data_sn = 0;
-       fw_task_ctx->ustorm_st_context.task_type =  ISCSI_TASK_TYPE_MIDPATH;
-       fw_task_ctx->ustorm_st_context.cq_rss_number = 0;
-
-       SET_FIELD(fw_task_ctx->ustorm_st_context.flags,
-                 USTORM_ISCSI_TASK_ST_CTX_LOCAL_COMP, 0);
-       SET_FIELD(fw_task_ctx->ustorm_st_context.reg1.reg1_map,
-                 ISCSI_REG1_NUM_FAST_SGES, 0);
-
-       fw_task_ctx->ustorm_ag_context.icid = (u16)qedi_conn->iscsi_conn_id;
-       SET_FIELD(fw_task_ctx->ustorm_ag_context.flags1,
-                 USTORM_ISCSI_TASK_AG_CTX_R2T2RECV, 1);
+       /* Update header info */
+       logout_pdu_header.opcode = logout_hdr->opcode;
+       logout_pdu_header.reason_code = 0x80 | logout_hdr->flags;
+       qedi_update_itt_map(qedi, tid, task->itt, qedi_cmd);
+       logout_pdu_header.itt = qedi_set_itt(tid, get_itt(task->itt));
+       logout_pdu_header.exp_stat_sn = be32_to_cpu(logout_hdr->exp_statsn);
+       logout_pdu_header.cmd_sn = be32_to_cpu(logout_hdr->cmdsn);
+       logout_pdu_header.cid = qedi_conn->iscsi_conn_id;
+
+       /* Fill fw input params */
+       task_params.context = fw_task_ctx;
+       task_params.conn_icid = (u16)qedi_conn->iscsi_conn_id;
+       task_params.itid = tid;
+       task_params.cq_rss_number = 0;
+       task_params.tx_io_size = 0;
+       task_params.rx_io_size = 0;
+
+       sq_idx = qedi_get_wqe_idx(qedi_conn);
+       task_params.sqe = &ep->sq[sq_idx];
+       memset(task_params.sqe, 0, sizeof(struct iscsi_wqe));
+
+       rval = init_initiator_logout_request_task(&task_params,
+                                                 &logout_pdu_header,
+                                                 NULL, NULL);
+       if (rval)
+               return -1;
 
        spin_lock(&qedi_conn->list_lock);
        list_add_tail(&qedi_cmd->io_cmd, &qedi_conn->active_cmd_list);
@@ -1247,9 +1192,7 @@ int qedi_send_iscsi_logout(struct qedi_conn *qedi_conn,
        qedi_conn->active_cmd_count++;
        spin_unlock(&qedi_conn->list_lock);
 
-       qedi_add_to_sq(qedi_conn, task, tid, ptu_invalidate, false);
        qedi_ring_doorbell(qedi_conn);
-
        return 0;
 }
 
@@ -1533,47 +1476,46 @@ ldel_exit:
 static int qedi_send_iscsi_tmf(struct qedi_conn *qedi_conn,
                               struct iscsi_task *mtask)
 {
-       struct iscsi_conn *conn = qedi_conn->cls_conn->dd_data;
+       struct iscsi_tmf_request_hdr tmf_pdu_header;
+       struct iscsi_task_params task_params;
        struct qedi_ctx *qedi = qedi_conn->qedi;
        struct iscsi_task_context *fw_task_ctx;
-       struct iscsi_tmf_request_hdr *fw_tmf_request;
-       struct iscsi_sge *single_sge;
-       struct qedi_cmd *qedi_cmd;
-       struct qedi_cmd *cmd;
+       struct iscsi_conn *conn = qedi_conn->cls_conn->dd_data;
        struct iscsi_task *ctask;
        struct iscsi_tm *tmf_hdr;
-       struct iscsi_sge *req_sge;
-       struct iscsi_sge *resp_sge;
-       u32 lun[2];
-       s16 tid = 0, ptu_invalidate = 0;
+       struct qedi_cmd *qedi_cmd;
+       struct qedi_cmd *cmd;
+       struct qedi_endpoint *ep;
+       u32 scsi_lun[2];
+       s16 tid = 0;
+       u16 sq_idx = 0;
+       int rval = 0;
 
-       req_sge = (struct iscsi_sge *)qedi_conn->gen_pdu.req_bd_tbl;
-       resp_sge = (struct iscsi_sge *)qedi_conn->gen_pdu.resp_bd_tbl;
-       qedi_cmd = (struct qedi_cmd *)mtask->dd_data;
        tmf_hdr = (struct iscsi_tm *)mtask->hdr;
+       qedi_cmd = (struct qedi_cmd *)mtask->dd_data;
+       ep = qedi_conn->ep;
 
-       tid = qedi_cmd->task_id;
-       qedi_update_itt_map(qedi, tid, mtask->itt, qedi_cmd);
+       tid = qedi_get_task_idx(qedi);
+       if (tid == -1)
+               return -ENOMEM;
 
-       fw_task_ctx = qedi_get_task_mem(&qedi->tasks, tid);
+       fw_task_ctx =
+            (struct iscsi_task_context *)qedi_get_task_mem(&qedi->tasks, tid);
        memset(fw_task_ctx, 0, sizeof(struct iscsi_task_context));
 
-       fw_tmf_request = &fw_task_ctx->ystorm_st_context.pdu_hdr.tmf_request;
-       fw_tmf_request->itt = qedi_set_itt(tid, get_itt(mtask->itt));
-       fw_tmf_request->cmd_sn = be32_to_cpu(tmf_hdr->cmdsn);
+       qedi_cmd->task_id = tid;
 
-       memcpy(lun, &tmf_hdr->lun, sizeof(struct scsi_lun));
-       fw_tmf_request->lun.lo = be32_to_cpu(lun[0]);
-       fw_tmf_request->lun.hi = be32_to_cpu(lun[1]);
+       memset(&task_params, 0, sizeof(task_params));
+       memset(&tmf_pdu_header, 0, sizeof(tmf_pdu_header));
 
-       if (qedi->tid_reuse_count[tid] == QEDI_MAX_TASK_NUM) {
-               ptu_invalidate = 1;
-               qedi->tid_reuse_count[tid] = 0;
-       }
-       fw_task_ctx->ystorm_st_context.state.reuse_count =
-                                               qedi->tid_reuse_count[tid];
-       fw_task_ctx->mstorm_st_context.reuse_count =
-                                               qedi->tid_reuse_count[tid]++;
+       /* Update header info */
+       qedi_update_itt_map(qedi, tid, mtask->itt, qedi_cmd);
+       tmf_pdu_header.itt = qedi_set_itt(tid, get_itt(mtask->itt));
+       tmf_pdu_header.cmd_sn = be32_to_cpu(tmf_hdr->cmdsn);
+
+       memcpy(scsi_lun, &tmf_hdr->lun, sizeof(struct scsi_lun));
+       tmf_pdu_header.lun.lo = be32_to_cpu(scsi_lun[0]);
+       tmf_pdu_header.lun.hi = be32_to_cpu(scsi_lun[1]);
 
        if ((tmf_hdr->flags & ISCSI_FLAG_TM_FUNC_MASK) ==
             ISCSI_TM_FUNC_ABORT_TASK) {
@@ -1584,53 +1526,34 @@ static int qedi_send_iscsi_tmf(struct qedi_conn *qedi_conn,
                        return 0;
                }
                cmd = (struct qedi_cmd *)ctask->dd_data;
-               fw_tmf_request->rtt =
+               tmf_pdu_header.rtt =
                                qedi_set_itt(cmd->task_id,
                                             get_itt(tmf_hdr->rtt));
        } else {
-               fw_tmf_request->rtt = ISCSI_RESERVED_TAG;
+               tmf_pdu_header.rtt = ISCSI_RESERVED_TAG;
        }
 
-       fw_tmf_request->opcode = tmf_hdr->opcode;
-       fw_tmf_request->function = tmf_hdr->flags;
-       fw_tmf_request->hdr_second_dword = ntoh24(tmf_hdr->dlength);
-       fw_tmf_request->ref_cmd_sn = be32_to_cpu(tmf_hdr->refcmdsn);
-
-       single_sge = &fw_task_ctx->mstorm_st_context.sgl_union.single_sge;
-       fw_task_ctx->mstorm_st_context.task_type = ISCSI_TASK_TYPE_MIDPATH;
-       fw_task_ctx->mstorm_ag_context.task_cid = (u16)qedi_conn->iscsi_conn_id;
-       single_sge->sge_addr.lo = resp_sge->sge_addr.lo;
-       single_sge->sge_addr.hi = resp_sge->sge_addr.hi;
-       single_sge->sge_len = resp_sge->sge_len;
-
-       SET_FIELD(fw_task_ctx->mstorm_st_context.flags.mflags,
-                 ISCSI_MFLAGS_SINGLE_SGE, 1);
-       SET_FIELD(fw_task_ctx->mstorm_st_context.flags.mflags,
-                 ISCSI_MFLAGS_SLOW_IO, 0);
-       fw_task_ctx->mstorm_st_context.sgl_size = 1;
-       fw_task_ctx->mstorm_st_context.rem_task_size = resp_sge->sge_len;
-
-       /* Ustorm context */
-       fw_task_ctx->ustorm_st_context.rem_rcv_len = 0;
-       fw_task_ctx->ustorm_st_context.exp_data_transfer_len = 0;
-       fw_task_ctx->ustorm_st_context.exp_data_sn = 0;
-       fw_task_ctx->ustorm_st_context.task_type =  ISCSI_TASK_TYPE_MIDPATH;
-       fw_task_ctx->ustorm_st_context.cq_rss_number = 0;
-
-       SET_FIELD(fw_task_ctx->ustorm_st_context.flags,
-                 USTORM_ISCSI_TASK_ST_CTX_LOCAL_COMP, 0);
-       SET_FIELD(fw_task_ctx->ustorm_st_context.reg1.reg1_map,
-                 ISCSI_REG1_NUM_FAST_SGES, 0);
-
-       fw_task_ctx->ustorm_ag_context.icid = (u16)qedi_conn->iscsi_conn_id;
-       SET_FIELD(fw_task_ctx->ustorm_ag_context.flags1,
-                 USTORM_ISCSI_TASK_AG_CTX_R2T2RECV, 1);
-       fw_task_ctx->ustorm_st_context.lun.lo = be32_to_cpu(lun[0]);
-       fw_task_ctx->ustorm_st_context.lun.hi = be32_to_cpu(lun[1]);
+       tmf_pdu_header.opcode = tmf_hdr->opcode;
+       tmf_pdu_header.function = tmf_hdr->flags;
+       tmf_pdu_header.hdr_second_dword = ntoh24(tmf_hdr->dlength);
+       tmf_pdu_header.ref_cmd_sn = be32_to_cpu(tmf_hdr->refcmdsn);
 
-       QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_SCSI_TM,
-                 "Add TMF to SQ, tmf tid=0x%x, itt=0x%x, cid=0x%x\n",
-                 tid,  mtask->itt, qedi_conn->iscsi_conn_id);
+       /* Fill fw input params */
+       task_params.context = fw_task_ctx;
+       task_params.conn_icid = (u16)qedi_conn->iscsi_conn_id;
+       task_params.itid = tid;
+       task_params.cq_rss_number = 0;
+       task_params.tx_io_size = 0;
+       task_params.rx_io_size = 0;
+
+       sq_idx = qedi_get_wqe_idx(qedi_conn);
+       task_params.sqe = &ep->sq[sq_idx];
+
+       memset(task_params.sqe, 0, sizeof(struct iscsi_wqe));
+       rval = init_initiator_tmf_request_task(&task_params,
+                                              &tmf_pdu_header);
+       if (rval)
+               return -1;
 
        spin_lock(&qedi_conn->list_lock);
        list_add_tail(&qedi_cmd->io_cmd, &qedi_conn->active_cmd_list);
@@ -1638,7 +1561,6 @@ static int qedi_send_iscsi_tmf(struct qedi_conn *qedi_conn,
        qedi_conn->active_cmd_count++;
        spin_unlock(&qedi_conn->list_lock);
 
-       qedi_add_to_sq(qedi_conn, mtask, tid, ptu_invalidate, false);
        qedi_ring_doorbell(qedi_conn);
        return 0;
 }
@@ -1689,101 +1611,98 @@ int qedi_iscsi_abort_work(struct qedi_conn *qedi_conn,
 int qedi_send_iscsi_text(struct qedi_conn *qedi_conn,
                         struct iscsi_task *task)
 {
-       struct qedi_ctx *qedi = qedi_conn->qedi;
+       struct iscsi_text_request_hdr text_request_pdu_header;
+       struct scsi_sgl_task_params tx_sgl_task_params;
+       struct scsi_sgl_task_params rx_sgl_task_params;
+       struct iscsi_task_params task_params;
        struct iscsi_task_context *fw_task_ctx;
-       struct iscsi_text_request_hdr *fw_text_request;
-       struct iscsi_cached_sge_ctx *cached_sge;
-       struct iscsi_sge *single_sge;
-       struct qedi_cmd *qedi_cmd;
-       /* For 6.5 hdr iscsi_hdr */
+       struct qedi_ctx *qedi = qedi_conn->qedi;
        struct iscsi_text *text_hdr;
-       struct iscsi_sge *req_sge;
-       struct iscsi_sge *resp_sge;
-       s16 ptu_invalidate = 0;
+       struct scsi_sge *req_sge = NULL;
+       struct scsi_sge *resp_sge = NULL;
+       struct qedi_cmd *qedi_cmd;
+       struct qedi_endpoint *ep;
        s16 tid = 0;
+       u16 sq_idx = 0;
+       int rval = 0;
 
-       req_sge = (struct iscsi_sge *)qedi_conn->gen_pdu.req_bd_tbl;
-       resp_sge = (struct iscsi_sge *)qedi_conn->gen_pdu.resp_bd_tbl;
+       req_sge = (struct scsi_sge *)qedi_conn->gen_pdu.req_bd_tbl;
+       resp_sge = (struct scsi_sge *)qedi_conn->gen_pdu.resp_bd_tbl;
        qedi_cmd = (struct qedi_cmd *)task->dd_data;
        text_hdr = (struct iscsi_text *)task->hdr;
+       ep = qedi_conn->ep;
 
        tid = qedi_get_task_idx(qedi);
        if (tid == -1)
                return -ENOMEM;
 
-       fw_task_ctx = qedi_get_task_mem(&qedi->tasks, tid);
+       fw_task_ctx =
+            (struct iscsi_task_context *)qedi_get_task_mem(&qedi->tasks, tid);
        memset(fw_task_ctx, 0, sizeof(struct iscsi_task_context));
 
        qedi_cmd->task_id = tid;
 
-       /* Ystorm context */
-       fw_text_request =
-                       &fw_task_ctx->ystorm_st_context.pdu_hdr.text_request;
-       fw_text_request->opcode = text_hdr->opcode;
-       fw_text_request->flags_attr = text_hdr->flags;
+       memset(&task_params, 0, sizeof(task_params));
+       memset(&text_request_pdu_header, 0, sizeof(text_request_pdu_header));
+       memset(&tx_sgl_task_params, 0, sizeof(tx_sgl_task_params));
+       memset(&rx_sgl_task_params, 0, sizeof(rx_sgl_task_params));
+
+       /* Update header info */
+       text_request_pdu_header.opcode = text_hdr->opcode;
+       text_request_pdu_header.flags_attr = text_hdr->flags;
 
        qedi_update_itt_map(qedi, tid, task->itt, qedi_cmd);
-       fw_text_request->itt = qedi_set_itt(tid, get_itt(task->itt));
-       fw_text_request->ttt = text_hdr->ttt;
-       fw_text_request->cmd_sn = be32_to_cpu(text_hdr->cmdsn);
-       fw_text_request->exp_stat_sn = be32_to_cpu(text_hdr->exp_statsn);
-       fw_text_request->hdr_second_dword = ntoh24(text_hdr->dlength);
-
-       if (qedi->tid_reuse_count[tid] == QEDI_MAX_TASK_NUM) {
-               ptu_invalidate = 1;
-               qedi->tid_reuse_count[tid] = 0;
-       }
-       fw_task_ctx->ystorm_st_context.state.reuse_count =
-                                                    qedi->tid_reuse_count[tid];
-       fw_task_ctx->mstorm_st_context.reuse_count =
-                                                  qedi->tid_reuse_count[tid]++;
-
-       cached_sge =
-              &fw_task_ctx->ystorm_st_context.state.sgl_ctx_union.cached_sge;
-       cached_sge->sge.sge_len = req_sge->sge_len;
-       cached_sge->sge.sge_addr.lo = (u32)(qedi_conn->gen_pdu.req_dma_addr);
-       cached_sge->sge.sge_addr.hi =
+       text_request_pdu_header.itt = qedi_set_itt(tid, get_itt(task->itt));
+       text_request_pdu_header.ttt = text_hdr->ttt;
+       text_request_pdu_header.cmd_sn = be32_to_cpu(text_hdr->cmdsn);
+       text_request_pdu_header.exp_stat_sn = be32_to_cpu(text_hdr->exp_statsn);
+       text_request_pdu_header.hdr_second_dword = ntoh24(text_hdr->dlength);
+
+       /* Fill tx AHS and rx buffer */
+       tx_sgl_task_params.sgl =
+                              (struct scsi_sge *)qedi_conn->gen_pdu.req_bd_tbl;
+       tx_sgl_task_params.sgl_phys_addr.lo =
+                                        (u32)(qedi_conn->gen_pdu.req_dma_addr);
+       tx_sgl_task_params.sgl_phys_addr.hi =
                              (u32)((u64)qedi_conn->gen_pdu.req_dma_addr >> 32);
+       tx_sgl_task_params.total_buffer_size = req_sge->sge_len;
+       tx_sgl_task_params.num_sges = 1;
+
+       rx_sgl_task_params.sgl =
+                             (struct scsi_sge *)qedi_conn->gen_pdu.resp_bd_tbl;
+       rx_sgl_task_params.sgl_phys_addr.lo =
+                                       (u32)(qedi_conn->gen_pdu.resp_dma_addr);
+       rx_sgl_task_params.sgl_phys_addr.hi =
+                            (u32)((u64)qedi_conn->gen_pdu.resp_dma_addr >> 32);
+       rx_sgl_task_params.total_buffer_size = resp_sge->sge_len;
+       rx_sgl_task_params.num_sges = 1;
+
+       /* Fill fw input params */
+       task_params.context = fw_task_ctx;
+       task_params.conn_icid = (u16)qedi_conn->iscsi_conn_id;
+       task_params.itid = tid;
+       task_params.cq_rss_number = 0;
+       task_params.tx_io_size = ntoh24(text_hdr->dlength);
+       task_params.rx_io_size = resp_sge->sge_len;
+
+       sq_idx = qedi_get_wqe_idx(qedi_conn);
+       task_params.sqe = &ep->sq[sq_idx];
+
+       memset(task_params.sqe, 0, sizeof(struct iscsi_wqe));
+       rval = init_initiator_text_request_task(&task_params,
+                                               &text_request_pdu_header,
+                                               &tx_sgl_task_params,
+                                               &rx_sgl_task_params);
+       if (rval)
+               return -1;
 
-       /* Mstorm context */
-       single_sge = &fw_task_ctx->mstorm_st_context.sgl_union.single_sge;
-       fw_task_ctx->mstorm_st_context.task_type = 0x2;
-       fw_task_ctx->mstorm_ag_context.task_cid = (u16)qedi_conn->iscsi_conn_id;
-       single_sge->sge_addr.lo = resp_sge->sge_addr.lo;
-       single_sge->sge_addr.hi = resp_sge->sge_addr.hi;
-       single_sge->sge_len = resp_sge->sge_len;
-
-       SET_FIELD(fw_task_ctx->mstorm_st_context.flags.mflags,
-                 ISCSI_MFLAGS_SINGLE_SGE, 1);
-       SET_FIELD(fw_task_ctx->mstorm_st_context.flags.mflags,
-                 ISCSI_MFLAGS_SLOW_IO, 0);
-       fw_task_ctx->mstorm_st_context.sgl_size = 1;
-       fw_task_ctx->mstorm_st_context.rem_task_size = resp_sge->sge_len;
-
-       /* Ustorm context */
-       fw_task_ctx->ustorm_ag_context.exp_data_acked =
-                                                     ntoh24(text_hdr->dlength);
-       fw_task_ctx->ustorm_st_context.rem_rcv_len = resp_sge->sge_len;
-       fw_task_ctx->ustorm_st_context.exp_data_transfer_len =
-                                                     ntoh24(text_hdr->dlength);
-       fw_task_ctx->ustorm_st_context.exp_data_sn =
-                                             be32_to_cpu(text_hdr->exp_statsn);
-       fw_task_ctx->ustorm_st_context.cq_rss_number = 0;
-       fw_task_ctx->ustorm_st_context.task_type = 0x2;
-       fw_task_ctx->ustorm_ag_context.icid = (u16)qedi_conn->iscsi_conn_id;
-       SET_FIELD(fw_task_ctx->ustorm_ag_context.flags1,
-                 USTORM_ISCSI_TASK_AG_CTX_R2T2RECV, 1);
-
-       /*  Add command in active command list */
        spin_lock(&qedi_conn->list_lock);
        list_add_tail(&qedi_cmd->io_cmd, &qedi_conn->active_cmd_list);
        qedi_cmd->io_cmd_in_list = true;
        qedi_conn->active_cmd_count++;
        spin_unlock(&qedi_conn->list_lock);
 
-       qedi_add_to_sq(qedi_conn, task, tid, ptu_invalidate, false);
        qedi_ring_doorbell(qedi_conn);
-
        return 0;
 }
 
@@ -1791,58 +1710,62 @@ int qedi_send_iscsi_nopout(struct qedi_conn *qedi_conn,
                           struct iscsi_task *task,
                           char *datap, int data_len, int unsol)
 {
+       struct iscsi_nop_out_hdr nop_out_pdu_header;
+       struct scsi_sgl_task_params tx_sgl_task_params;
+       struct scsi_sgl_task_params rx_sgl_task_params;
+       struct iscsi_task_params task_params;
        struct qedi_ctx *qedi = qedi_conn->qedi;
        struct iscsi_task_context *fw_task_ctx;
-       struct iscsi_nop_out_hdr *fw_nop_out;
-       struct qedi_cmd *qedi_cmd;
-       /* For 6.5 hdr iscsi_hdr */
        struct iscsi_nopout *nopout_hdr;
-       struct iscsi_cached_sge_ctx *cached_sge;
-       struct iscsi_sge *single_sge;
-       struct iscsi_sge *req_sge;
-       struct iscsi_sge *resp_sge;
-       u32 lun[2];
-       s16 ptu_invalidate = 0;
+       struct scsi_sge *req_sge = NULL;
+       struct scsi_sge *resp_sge = NULL;
+       struct qedi_cmd *qedi_cmd;
+       struct qedi_endpoint *ep;
+       u32 scsi_lun[2];
        s16 tid = 0;
+       u16 sq_idx = 0;
+       int rval = 0;
 
-       req_sge = (struct iscsi_sge *)qedi_conn->gen_pdu.req_bd_tbl;
-       resp_sge = (struct iscsi_sge *)qedi_conn->gen_pdu.resp_bd_tbl;
+       req_sge = (struct scsi_sge *)qedi_conn->gen_pdu.req_bd_tbl;
+       resp_sge = (struct scsi_sge *)qedi_conn->gen_pdu.resp_bd_tbl;
        qedi_cmd = (struct qedi_cmd *)task->dd_data;
        nopout_hdr = (struct iscsi_nopout *)task->hdr;
+       ep = qedi_conn->ep;
 
        tid = qedi_get_task_idx(qedi);
-       if (tid == -1) {
-               QEDI_WARN(&qedi->dbg_ctx, "Invalid tid\n");
+       if (tid == -1)
                return -ENOMEM;
-       }
-
-       fw_task_ctx = qedi_get_task_mem(&qedi->tasks, tid);
 
+       fw_task_ctx =
+            (struct iscsi_task_context *)qedi_get_task_mem(&qedi->tasks, tid);
        memset(fw_task_ctx, 0, sizeof(struct iscsi_task_context));
+
        qedi_cmd->task_id = tid;
 
-       /* Ystorm context */
-       fw_nop_out = &fw_task_ctx->ystorm_st_context.pdu_hdr.nop_out;
-       SET_FIELD(fw_nop_out->flags_attr, ISCSI_NOP_OUT_HDR_CONST1, 1);
-       SET_FIELD(fw_nop_out->flags_attr, ISCSI_NOP_OUT_HDR_RSRV, 0);
+       memset(&task_params, 0, sizeof(task_params));
+       memset(&nop_out_pdu_header, 0, sizeof(nop_out_pdu_header));
+       memset(&tx_sgl_task_params, 0, sizeof(tx_sgl_task_params));
+       memset(&rx_sgl_task_params, 0, sizeof(rx_sgl_task_params));
+
+       /* Update header info */
+       nop_out_pdu_header.opcode = nopout_hdr->opcode;
+       SET_FIELD(nop_out_pdu_header.flags_attr, ISCSI_NOP_OUT_HDR_CONST1, 1);
+       SET_FIELD(nop_out_pdu_header.flags_attr, ISCSI_NOP_OUT_HDR_RSRV, 0);
 
-       memcpy(lun, &nopout_hdr->lun, sizeof(struct scsi_lun));
-       fw_nop_out->lun.lo = be32_to_cpu(lun[0]);
-       fw_nop_out->lun.hi = be32_to_cpu(lun[1]);
+       memcpy(scsi_lun, &nopout_hdr->lun, sizeof(struct scsi_lun));
+       nop_out_pdu_header.lun.lo = be32_to_cpu(scsi_lun[0]);
+       nop_out_pdu_header.lun.hi = be32_to_cpu(scsi_lun[1]);
+       nop_out_pdu_header.cmd_sn = be32_to_cpu(nopout_hdr->cmdsn);
+       nop_out_pdu_header.exp_stat_sn = be32_to_cpu(nopout_hdr->exp_statsn);
 
        qedi_update_itt_map(qedi, tid, task->itt, qedi_cmd);
 
        if (nopout_hdr->ttt != ISCSI_TTT_ALL_ONES) {
-               fw_nop_out->itt = be32_to_cpu(nopout_hdr->itt);
-               fw_nop_out->ttt = be32_to_cpu(nopout_hdr->ttt);
-               fw_task_ctx->ystorm_st_context.state.buffer_offset[0] = 0;
-               fw_task_ctx->ystorm_st_context.state.local_comp = 1;
-               SET_FIELD(fw_task_ctx->ustorm_st_context.flags,
-                         USTORM_ISCSI_TASK_ST_CTX_LOCAL_COMP, 1);
+               nop_out_pdu_header.itt = be32_to_cpu(nopout_hdr->itt);
+               nop_out_pdu_header.ttt = be32_to_cpu(nopout_hdr->ttt);
        } else {
-               fw_nop_out->itt = qedi_set_itt(tid, get_itt(task->itt));
-               fw_nop_out->ttt = ISCSI_TTT_ALL_ONES;
-               fw_task_ctx->ystorm_st_context.state.buffer_offset[0] = 0;
+               nop_out_pdu_header.itt = qedi_set_itt(tid, get_itt(task->itt));
+               nop_out_pdu_header.ttt = ISCSI_TTT_ALL_ONES;
 
                spin_lock(&qedi_conn->list_lock);
                list_add_tail(&qedi_cmd->io_cmd, &qedi_conn->active_cmd_list);
@@ -1851,53 +1774,46 @@ int qedi_send_iscsi_nopout(struct qedi_conn *qedi_conn,
                spin_unlock(&qedi_conn->list_lock);
        }
 
-       fw_nop_out->opcode = ISCSI_OPCODE_NOP_OUT;
-       fw_nop_out->cmd_sn = be32_to_cpu(nopout_hdr->cmdsn);
-       fw_nop_out->exp_stat_sn = be32_to_cpu(nopout_hdr->exp_statsn);
-
-       cached_sge =
-              &fw_task_ctx->ystorm_st_context.state.sgl_ctx_union.cached_sge;
-       cached_sge->sge.sge_len = req_sge->sge_len;
-       cached_sge->sge.sge_addr.lo = (u32)(qedi_conn->gen_pdu.req_dma_addr);
-       cached_sge->sge.sge_addr.hi =
-                       (u32)((u64)qedi_conn->gen_pdu.req_dma_addr >> 32);
-
-       /* Mstorm context */
-       fw_task_ctx->mstorm_st_context.task_type = ISCSI_TASK_TYPE_MIDPATH;
-       fw_task_ctx->mstorm_ag_context.task_cid = (u16)qedi_conn->iscsi_conn_id;
-
-       single_sge = &fw_task_ctx->mstorm_st_context.sgl_union.single_sge;
-       single_sge->sge_addr.lo = resp_sge->sge_addr.lo;
-       single_sge->sge_addr.hi = resp_sge->sge_addr.hi;
-       single_sge->sge_len = resp_sge->sge_len;
-       fw_task_ctx->mstorm_st_context.rem_task_size = resp_sge->sge_len;
-
-       if (qedi->tid_reuse_count[tid] == QEDI_MAX_TASK_NUM) {
-               ptu_invalidate = 1;
-               qedi->tid_reuse_count[tid] = 0;
-       }
-       fw_task_ctx->ystorm_st_context.state.reuse_count =
-                                               qedi->tid_reuse_count[tid];
-       fw_task_ctx->mstorm_st_context.reuse_count =
-                                               qedi->tid_reuse_count[tid]++;
-       /* Ustorm context */
-       fw_task_ctx->ustorm_st_context.rem_rcv_len = resp_sge->sge_len;
-       fw_task_ctx->ustorm_st_context.exp_data_transfer_len = data_len;
-       fw_task_ctx->ustorm_st_context.exp_data_sn = 0;
-       fw_task_ctx->ustorm_st_context.task_type =  ISCSI_TASK_TYPE_MIDPATH;
-       fw_task_ctx->ustorm_st_context.cq_rss_number = 0;
-
-       SET_FIELD(fw_task_ctx->ustorm_st_context.reg1.reg1_map,
-                 ISCSI_REG1_NUM_FAST_SGES, 0);
-
-       fw_task_ctx->ustorm_ag_context.icid = (u16)qedi_conn->iscsi_conn_id;
-       SET_FIELD(fw_task_ctx->ustorm_ag_context.flags1,
-                 USTORM_ISCSI_TASK_AG_CTX_R2T2RECV, 1);
-
-       fw_task_ctx->ustorm_st_context.lun.lo = be32_to_cpu(lun[0]);
-       fw_task_ctx->ustorm_st_context.lun.hi = be32_to_cpu(lun[1]);
-
-       qedi_add_to_sq(qedi_conn, task, tid, ptu_invalidate, false);
+       /* Fill tx AHS and rx buffer */
+       if (data_len) {
+               tx_sgl_task_params.sgl =
+                              (struct scsi_sge *)qedi_conn->gen_pdu.req_bd_tbl;
+               tx_sgl_task_params.sgl_phys_addr.lo =
+                                        (u32)(qedi_conn->gen_pdu.req_dma_addr);
+               tx_sgl_task_params.sgl_phys_addr.hi =
+                             (u32)((u64)qedi_conn->gen_pdu.req_dma_addr >> 32);
+               tx_sgl_task_params.total_buffer_size = data_len;
+               tx_sgl_task_params.num_sges = 1;
+
+               rx_sgl_task_params.sgl =
+                             (struct scsi_sge *)qedi_conn->gen_pdu.resp_bd_tbl;
+               rx_sgl_task_params.sgl_phys_addr.lo =
+                                       (u32)(qedi_conn->gen_pdu.resp_dma_addr);
+               rx_sgl_task_params.sgl_phys_addr.hi =
+                            (u32)((u64)qedi_conn->gen_pdu.resp_dma_addr >> 32);
+               rx_sgl_task_params.total_buffer_size = resp_sge->sge_len;
+               rx_sgl_task_params.num_sges = 1;
+       }
+
+       /* Fill fw input params */
+       task_params.context = fw_task_ctx;
+       task_params.conn_icid = (u16)qedi_conn->iscsi_conn_id;
+       task_params.itid = tid;
+       task_params.cq_rss_number = 0;
+       task_params.tx_io_size = data_len;
+       task_params.rx_io_size = resp_sge->sge_len;
+
+       sq_idx = qedi_get_wqe_idx(qedi_conn);
+       task_params.sqe = &ep->sq[sq_idx];
+
+       memset(task_params.sqe, 0, sizeof(struct iscsi_wqe));
+       rval = init_initiator_nop_out_task(&task_params,
+                                          &nop_out_pdu_header,
+                                          &tx_sgl_task_params,
+                                          &rx_sgl_task_params);
+       if (rval)
+               return -1;
+
        qedi_ring_doorbell(qedi_conn);
        return 0;
 }
@@ -1905,7 +1821,7 @@ int qedi_send_iscsi_nopout(struct qedi_conn *qedi_conn,
 static int qedi_split_bd(struct qedi_cmd *cmd, u64 addr, int sg_len,
                         int bd_index)
 {
-       struct iscsi_sge *bd = cmd->io_tbl.sge_tbl;
+       struct scsi_sge *bd = cmd->io_tbl.sge_tbl;
        int frag_size, sg_frags;
 
        sg_frags = 0;
@@ -1938,7 +1854,7 @@ static int qedi_split_bd(struct qedi_cmd *cmd, u64 addr, int sg_len,
 static int qedi_map_scsi_sg(struct qedi_ctx *qedi, struct qedi_cmd *cmd)
 {
        struct scsi_cmnd *sc = cmd->scsi_cmd;
-       struct iscsi_sge *bd = cmd->io_tbl.sge_tbl;
+       struct scsi_sge *bd = cmd->io_tbl.sge_tbl;
        struct scatterlist *sg;
        int byte_count = 0;
        int bd_count = 0;
@@ -2040,7 +1956,7 @@ static void qedi_iscsi_map_sg_list(struct qedi_cmd *cmd)
                if (bd_count == 0)
                        return;
        } else {
-               struct iscsi_sge *bd = cmd->io_tbl.sge_tbl;
+               struct scsi_sge *bd = cmd->io_tbl.sge_tbl;
 
                bd[0].sge_addr.lo = 0;
                bd[0].sge_addr.hi = 0;
@@ -2136,244 +2052,182 @@ int qedi_iscsi_send_ioreq(struct iscsi_task *task)
        struct qedi_conn *qedi_conn = conn->dd_data;
        struct qedi_cmd *cmd = task->dd_data;
        struct scsi_cmnd *sc = task->sc;
+       struct iscsi_cmd_hdr cmd_pdu_header;
+       struct scsi_sgl_task_params tx_sgl_task_params;
+       struct scsi_sgl_task_params rx_sgl_task_params;
+       struct scsi_sgl_task_params *prx_sgl = NULL;
+       struct scsi_sgl_task_params *ptx_sgl = NULL;
+       struct iscsi_task_params task_params;
+       struct iscsi_conn_params conn_params;
+       struct scsi_initiator_cmd_params cmd_params;
        struct iscsi_task_context *fw_task_ctx;
-       struct iscsi_cached_sge_ctx *cached_sge;
-       struct iscsi_phys_sgl_ctx *phys_sgl;
-       struct iscsi_virt_sgl_ctx *virt_sgl;
-       struct ystorm_iscsi_task_st_ctx *yst_cxt;
-       struct mstorm_iscsi_task_st_ctx *mst_cxt;
-       struct iscsi_sgl *sgl_struct;
-       struct iscsi_sge *single_sge;
+       struct iscsi_cls_conn *cls_conn;
        struct iscsi_scsi_req *hdr = (struct iscsi_scsi_req *)task->hdr;
-       struct iscsi_sge *bd = cmd->io_tbl.sge_tbl;
-       enum iscsi_task_type task_type;
-       struct iscsi_cmd_hdr *fw_cmd;
-       u32 lun[2];
-       u32 exp_data;
-       u16 cq_idx = smp_processor_id() % qedi->num_queues;
-       s16 ptu_invalidate = 0;
+       enum iscsi_task_type task_type = MAX_ISCSI_TASK_TYPE;
+       struct qedi_endpoint *ep;
+       u32 scsi_lun[2];
        s16 tid = 0;
-       u8 num_fast_sgs;
+       u16 sq_idx = 0;
+       u16 cq_idx;
+       int rval = 0;
 
-       tid = qedi_get_task_idx(qedi);
-       if (tid == -1)
-               return -ENOMEM;
+       ep = qedi_conn->ep;
+       cls_conn = qedi_conn->cls_conn;
+       conn = cls_conn->dd_data;
 
        qedi_iscsi_map_sg_list(cmd);
+       int_to_scsilun(sc->device->lun, (struct scsi_lun *)scsi_lun);
 
-       int_to_scsilun(sc->device->lun, (struct scsi_lun *)lun);
-       fw_task_ctx = qedi_get_task_mem(&qedi->tasks, tid);
+       tid = qedi_get_task_idx(qedi);
+       if (tid == -1)
+               return -ENOMEM;
 
+       fw_task_ctx =
+            (struct iscsi_task_context *)qedi_get_task_mem(&qedi->tasks, tid);
        memset(fw_task_ctx, 0, sizeof(struct iscsi_task_context));
-       cmd->task_id = tid;
 
-       /* Ystorm context */
-       fw_cmd = &fw_task_ctx->ystorm_st_context.pdu_hdr.cmd;
-       SET_FIELD(fw_cmd->flags_attr, ISCSI_CMD_HDR_ATTR, ISCSI_ATTR_SIMPLE);
+       cmd->task_id = tid;
 
+       memset(&task_params, 0, sizeof(task_params));
+       memset(&cmd_pdu_header, 0, sizeof(cmd_pdu_header));
+       memset(&tx_sgl_task_params, 0, sizeof(tx_sgl_task_params));
+       memset(&rx_sgl_task_params, 0, sizeof(rx_sgl_task_params));
+       memset(&conn_params, 0, sizeof(conn_params));
+       memset(&cmd_params, 0, sizeof(cmd_params));
+
+       cq_idx = smp_processor_id() % qedi->num_queues;
+       /* Update header info */
+       SET_FIELD(cmd_pdu_header.flags_attr, ISCSI_CMD_HDR_ATTR,
+                 ISCSI_ATTR_SIMPLE);
        if (sc->sc_data_direction == DMA_TO_DEVICE) {
-               if (conn->session->initial_r2t_en) {
-                       exp_data = min((conn->session->imm_data_en *
-                                       conn->max_xmit_dlength),
-                                      conn->session->first_burst);
-                       exp_data = min(exp_data, scsi_bufflen(sc));
-                       fw_task_ctx->ustorm_ag_context.exp_data_acked =
-                                                         cpu_to_le32(exp_data);
-               } else {
-                       fw_task_ctx->ustorm_ag_context.exp_data_acked =
-                             min(conn->session->first_burst, scsi_bufflen(sc));
-               }
-
-               SET_FIELD(fw_cmd->flags_attr, ISCSI_CMD_HDR_WRITE, 1);
+               SET_FIELD(cmd_pdu_header.flags_attr,
+                         ISCSI_CMD_HDR_WRITE, 1);
                task_type = ISCSI_TASK_TYPE_INITIATOR_WRITE;
        } else {
-               if (scsi_bufflen(sc))
-                       SET_FIELD(fw_cmd->flags_attr, ISCSI_CMD_HDR_READ, 1);
+               SET_FIELD(cmd_pdu_header.flags_attr,
+                         ISCSI_CMD_HDR_READ, 1);
                task_type = ISCSI_TASK_TYPE_INITIATOR_READ;
        }
 
-       fw_cmd->lun.lo = be32_to_cpu(lun[0]);
-       fw_cmd->lun.hi = be32_to_cpu(lun[1]);
+       cmd_pdu_header.lun.lo = be32_to_cpu(scsi_lun[0]);
+       cmd_pdu_header.lun.hi = be32_to_cpu(scsi_lun[1]);
 
        qedi_update_itt_map(qedi, tid, task->itt, cmd);
-       fw_cmd->itt = qedi_set_itt(tid, get_itt(task->itt));
-       fw_cmd->expected_transfer_length = scsi_bufflen(sc);
-       fw_cmd->cmd_sn = be32_to_cpu(hdr->cmdsn);
-       fw_cmd->opcode = hdr->opcode;
-       qedi_cpy_scsi_cdb(sc, (u32 *)fw_cmd->cdb);
-
-       /* Mstorm context */
-       fw_task_ctx->mstorm_st_context.sense_db.lo = (u32)cmd->sense_buffer_dma;
-       fw_task_ctx->mstorm_st_context.sense_db.hi =
-                                       (u32)((u64)cmd->sense_buffer_dma >> 32);
-       fw_task_ctx->mstorm_ag_context.task_cid = qedi_conn->iscsi_conn_id;
-       fw_task_ctx->mstorm_st_context.task_type = task_type;
-
-       if (qedi->tid_reuse_count[tid] == QEDI_MAX_TASK_NUM) {
-               ptu_invalidate = 1;
-               qedi->tid_reuse_count[tid] = 0;
-       }
-       fw_task_ctx->ystorm_st_context.state.reuse_count =
-                                                    qedi->tid_reuse_count[tid];
-       fw_task_ctx->mstorm_st_context.reuse_count =
-                                                  qedi->tid_reuse_count[tid]++;
-
-       /* Ustorm context */
-       fw_task_ctx->ustorm_st_context.rem_rcv_len = scsi_bufflen(sc);
-       fw_task_ctx->ustorm_st_context.exp_data_transfer_len = scsi_bufflen(sc);
-       fw_task_ctx->ustorm_st_context.exp_data_sn =
-                                                  be32_to_cpu(hdr->exp_statsn);
-       fw_task_ctx->ustorm_st_context.task_type = task_type;
-       fw_task_ctx->ustorm_st_context.cq_rss_number = cq_idx;
-       fw_task_ctx->ustorm_ag_context.icid = (u16)qedi_conn->iscsi_conn_id;
-
-       SET_FIELD(fw_task_ctx->ustorm_ag_context.flags1,
-                 USTORM_ISCSI_TASK_AG_CTX_R2T2RECV, 1);
-       SET_FIELD(fw_task_ctx->ustorm_st_context.flags,
-                 USTORM_ISCSI_TASK_ST_CTX_LOCAL_COMP, 0);
-
-       num_fast_sgs = (cmd->io_tbl.sge_valid ?
-                       min((u16)QEDI_FAST_SGE_COUNT,
-                           (u16)cmd->io_tbl.sge_valid) : 0);
-       SET_FIELD(fw_task_ctx->ustorm_st_context.reg1.reg1_map,
-                 ISCSI_REG1_NUM_FAST_SGES, num_fast_sgs);
-
-       fw_task_ctx->ustorm_st_context.lun.lo = be32_to_cpu(lun[0]);
-       fw_task_ctx->ustorm_st_context.lun.hi = be32_to_cpu(lun[1]);
-
-       QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_IO, "Total sge count [%d]\n",
-                 cmd->io_tbl.sge_valid);
-
-       yst_cxt = &fw_task_ctx->ystorm_st_context;
-       mst_cxt = &fw_task_ctx->mstorm_st_context;
-       /* Tx path */
+       cmd_pdu_header.itt = qedi_set_itt(tid, get_itt(task->itt));
+       cmd_pdu_header.expected_transfer_length = cpu_to_be32(hdr->data_length);
+       cmd_pdu_header.hdr_second_dword = ntoh24(hdr->dlength);
+       cmd_pdu_header.cmd_sn = be32_to_cpu(hdr->cmdsn);
+       cmd_pdu_header.opcode = hdr->opcode;
+       qedi_cpy_scsi_cdb(sc, (u32 *)cmd_pdu_header.cdb);
+
+       /* Fill tx AHS and rx buffer */
        if (task_type == ISCSI_TASK_TYPE_INITIATOR_WRITE) {
-               /* not considering  superIO or FastIO */
-               if (cmd->io_tbl.sge_valid == 1) {
-                       cached_sge = &yst_cxt->state.sgl_ctx_union.cached_sge;
-                       cached_sge->sge.sge_addr.lo = bd[0].sge_addr.lo;
-                       cached_sge->sge.sge_addr.hi = bd[0].sge_addr.hi;
-                       cached_sge->sge.sge_len = bd[0].sge_len;
-                       qedi->cached_sgls++;
-               } else if ((cmd->io_tbl.sge_valid != 1) && cmd->use_slowpath) {
-                       SET_FIELD(fw_task_ctx->mstorm_st_context.flags.mflags,
-                                 ISCSI_MFLAGS_SLOW_IO, 1);
-                       SET_FIELD(fw_task_ctx->ustorm_st_context.reg1.reg1_map,
-                                 ISCSI_REG1_NUM_FAST_SGES, 0);
-                       phys_sgl = &yst_cxt->state.sgl_ctx_union.phys_sgl;
-                       phys_sgl->sgl_base.lo = (u32)(cmd->io_tbl.sge_tbl_dma);
-                       phys_sgl->sgl_base.hi =
-                                    (u32)((u64)cmd->io_tbl.sge_tbl_dma >> 32);
-                       phys_sgl->sgl_size = cmd->io_tbl.sge_valid;
-                       qedi->slow_sgls++;
-               } else if ((cmd->io_tbl.sge_valid != 1) && !cmd->use_slowpath) {
-                       SET_FIELD(fw_task_ctx->mstorm_st_context.flags.mflags,
-                                 ISCSI_MFLAGS_SLOW_IO, 0);
-                       SET_FIELD(fw_task_ctx->ustorm_st_context.reg1.reg1_map,
-                                 ISCSI_REG1_NUM_FAST_SGES,
-                                 min((u16)QEDI_FAST_SGE_COUNT,
-                                     (u16)cmd->io_tbl.sge_valid));
-                       virt_sgl = &yst_cxt->state.sgl_ctx_union.virt_sgl;
-                       virt_sgl->sgl_base.lo = (u32)(cmd->io_tbl.sge_tbl_dma);
-                       virt_sgl->sgl_base.hi =
+               tx_sgl_task_params.sgl = cmd->io_tbl.sge_tbl;
+               tx_sgl_task_params.sgl_phys_addr.lo =
+                                                (u32)(cmd->io_tbl.sge_tbl_dma);
+               tx_sgl_task_params.sgl_phys_addr.hi =
                                      (u32)((u64)cmd->io_tbl.sge_tbl_dma >> 32);
-                       virt_sgl->sgl_initial_offset =
-                                (u32)bd[0].sge_addr.lo & (QEDI_PAGE_SIZE - 1);
-                       qedi->fast_sgls++;
-               }
-               fw_task_ctx->mstorm_st_context.sgl_size = cmd->io_tbl.sge_valid;
-               fw_task_ctx->mstorm_st_context.rem_task_size = scsi_bufflen(sc);
-       } else {
-       /* Rx path */
-               if (cmd->io_tbl.sge_valid == 1) {
-                       SET_FIELD(fw_task_ctx->mstorm_st_context.flags.mflags,
-                                 ISCSI_MFLAGS_SLOW_IO, 0);
-                       SET_FIELD(fw_task_ctx->mstorm_st_context.flags.mflags,
-                                 ISCSI_MFLAGS_SINGLE_SGE, 1);
-                       single_sge = &mst_cxt->sgl_union.single_sge;
-                       single_sge->sge_addr.lo = bd[0].sge_addr.lo;
-                       single_sge->sge_addr.hi = bd[0].sge_addr.hi;
-                       single_sge->sge_len = bd[0].sge_len;
-                       qedi->cached_sgls++;
-               } else if ((cmd->io_tbl.sge_valid != 1) && cmd->use_slowpath) {
-                       sgl_struct = &mst_cxt->sgl_union.sgl_struct;
-                       sgl_struct->sgl_addr.lo =
-                                               (u32)(cmd->io_tbl.sge_tbl_dma);
-                       sgl_struct->sgl_addr.hi =
-                                    (u32)((u64)cmd->io_tbl.sge_tbl_dma >> 32);
-                       SET_FIELD(fw_task_ctx->mstorm_st_context.flags.mflags,
-                                 ISCSI_MFLAGS_SLOW_IO, 1);
-                       SET_FIELD(fw_task_ctx->ustorm_st_context.reg1.reg1_map,
-                                 ISCSI_REG1_NUM_FAST_SGES, 0);
-                       sgl_struct->updated_sge_size = 0;
-                       sgl_struct->updated_sge_offset = 0;
-                       qedi->slow_sgls++;
-               } else if ((cmd->io_tbl.sge_valid != 1) && !cmd->use_slowpath) {
-                       sgl_struct = &mst_cxt->sgl_union.sgl_struct;
-                       sgl_struct->sgl_addr.lo =
-                                               (u32)(cmd->io_tbl.sge_tbl_dma);
-                       sgl_struct->sgl_addr.hi =
-                                    (u32)((u64)cmd->io_tbl.sge_tbl_dma >> 32);
-                       sgl_struct->byte_offset =
-                               (u32)bd[0].sge_addr.lo & (QEDI_PAGE_SIZE - 1);
-                       SET_FIELD(fw_task_ctx->mstorm_st_context.flags.mflags,
-                                 ISCSI_MFLAGS_SLOW_IO, 0);
-                       SET_FIELD(fw_task_ctx->ustorm_st_context.reg1.reg1_map,
-                                 ISCSI_REG1_NUM_FAST_SGES, 0);
-                       sgl_struct->updated_sge_size = 0;
-                       sgl_struct->updated_sge_offset = 0;
-                       qedi->fast_sgls++;
-               }
-               fw_task_ctx->mstorm_st_context.sgl_size = cmd->io_tbl.sge_valid;
-               fw_task_ctx->mstorm_st_context.rem_task_size = scsi_bufflen(sc);
-       }
-
-       if (cmd->io_tbl.sge_valid == 1)
-               /* Singel-SGL */
-               qedi->use_cached_sge = true;
-       else {
+               tx_sgl_task_params.total_buffer_size = scsi_bufflen(sc);
+               tx_sgl_task_params.num_sges = cmd->io_tbl.sge_valid;
                if (cmd->use_slowpath)
-                       qedi->use_slow_sge = true;
-               else
-                       qedi->use_fast_sge = true;
-       }
+                       tx_sgl_task_params.small_mid_sge = true;
+       } else if (task_type == ISCSI_TASK_TYPE_INITIATOR_READ) {
+               rx_sgl_task_params.sgl = cmd->io_tbl.sge_tbl;
+               rx_sgl_task_params.sgl_phys_addr.lo =
+                                                (u32)(cmd->io_tbl.sge_tbl_dma);
+               rx_sgl_task_params.sgl_phys_addr.hi =
+                                     (u32)((u64)cmd->io_tbl.sge_tbl_dma >> 32);
+               rx_sgl_task_params.total_buffer_size = scsi_bufflen(sc);
+               rx_sgl_task_params.num_sges = cmd->io_tbl.sge_valid;
+       }
+
+       /* Add conn param */
+       conn_params.first_burst_length = conn->session->first_burst;
+       conn_params.max_send_pdu_length = conn->max_xmit_dlength;
+       conn_params.max_burst_length = conn->session->max_burst;
+       if (conn->session->initial_r2t_en)
+               conn_params.initial_r2t = true;
+       if (conn->session->imm_data_en)
+               conn_params.immediate_data = true;
+
+       /* Add cmd params */
+       cmd_params.sense_data_buffer_phys_addr.lo = (u32)cmd->sense_buffer_dma;
+       cmd_params.sense_data_buffer_phys_addr.hi =
+                                       (u32)((u64)cmd->sense_buffer_dma >> 32);
+       /* Fill fw input params */
+       task_params.context = fw_task_ctx;
+       task_params.conn_icid = (u16)qedi_conn->iscsi_conn_id;
+       task_params.itid = tid;
+       task_params.cq_rss_number = cq_idx;
+       if (task_type == ISCSI_TASK_TYPE_INITIATOR_WRITE)
+               task_params.tx_io_size = scsi_bufflen(sc);
+       else if (task_type == ISCSI_TASK_TYPE_INITIATOR_READ)
+               task_params.rx_io_size = scsi_bufflen(sc);
+
+       sq_idx = qedi_get_wqe_idx(qedi_conn);
+       task_params.sqe = &ep->sq[sq_idx];
+
        QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_IO,
-                 "%s: %s-SGL: num_sges=0x%x first-sge-lo=0x%x first-sge-hi=0x%x",
+                 "%s: %s-SGL: sg_len=0x%x num_sges=0x%x first-sge-lo=0x%x first-sge-hi=0x%x\n",
                  (task_type == ISCSI_TASK_TYPE_INITIATOR_WRITE) ?
                  "Write " : "Read ", (cmd->io_tbl.sge_valid == 1) ?
                  "Single" : (cmd->use_slowpath ? "SLOW" : "FAST"),
-                 (u16)cmd->io_tbl.sge_valid, (u32)(cmd->io_tbl.sge_tbl_dma),
+                 (u16)cmd->io_tbl.sge_valid, scsi_bufflen(sc),
+                 (u32)(cmd->io_tbl.sge_tbl_dma),
                  (u32)((u64)cmd->io_tbl.sge_tbl_dma >> 32));
 
-       /*  Add command in active command list */
+       memset(task_params.sqe, 0, sizeof(struct iscsi_wqe));
+
+       if (task_params.tx_io_size != 0)
+               ptx_sgl = &tx_sgl_task_params;
+       if (task_params.rx_io_size != 0)
+               prx_sgl = &rx_sgl_task_params;
+
+       rval = init_initiator_rw_iscsi_task(&task_params, &conn_params,
+                                           &cmd_params, &cmd_pdu_header,
+                                           ptx_sgl, prx_sgl,
+                                           NULL);
+       if (rval)
+               return -1;
+
        spin_lock(&qedi_conn->list_lock);
        list_add_tail(&cmd->io_cmd, &qedi_conn->active_cmd_list);
        cmd->io_cmd_in_list = true;
        qedi_conn->active_cmd_count++;
        spin_unlock(&qedi_conn->list_lock);
 
-       qedi_add_to_sq(qedi_conn, task, tid, ptu_invalidate, false);
        qedi_ring_doorbell(qedi_conn);
-       if (qedi_io_tracing)
-               qedi_trace_io(qedi, task, tid, QEDI_IO_TRACE_REQ);
-
        return 0;
 }
 
 int qedi_iscsi_cleanup_task(struct iscsi_task *task, bool mark_cmd_node_deleted)
 {
+       struct iscsi_task_params task_params;
+       struct qedi_endpoint *ep;
        struct iscsi_conn *conn = task->conn;
        struct qedi_conn *qedi_conn = conn->dd_data;
        struct qedi_cmd *cmd = task->dd_data;
-       s16 ptu_invalidate = 0;
+       u16 sq_idx = 0;
+       int rval = 0;
 
        QEDI_INFO(&qedi_conn->qedi->dbg_ctx, QEDI_LOG_SCSI_TM,
                  "issue cleanup tid=0x%x itt=0x%x task_state=%d cmd_state=0%x cid=0x%x\n",
                  cmd->task_id, get_itt(task->itt), task->state,
                  cmd->state, qedi_conn->iscsi_conn_id);
 
-       qedi_add_to_sq(qedi_conn, task, cmd->task_id, ptu_invalidate, true);
-       qedi_ring_doorbell(qedi_conn);
+       memset(&task_params, 0, sizeof(task_params));
+       ep = qedi_conn->ep;
+
+       sq_idx = qedi_get_wqe_idx(qedi_conn);
+
+       task_params.sqe = &ep->sq[sq_idx];
+       memset(task_params.sqe, 0, sizeof(struct iscsi_wqe));
+       task_params.itid = cmd->task_id;
 
+       rval = init_cleanup_task(&task_params);
+       if (rval)
+               return rval;
+
+       qedi_ring_doorbell(qedi_conn);
        return 0;
 }
diff --git a/drivers/scsi/qedi/qedi_fw_api.c b/drivers/scsi/qedi/qedi_fw_api.c
new file mode 100644 (file)
index 0000000..fd354d4
--- /dev/null
@@ -0,0 +1,781 @@
+/* QLogic iSCSI Offload Driver
+ * Copyright (c) 2016 Cavium Inc.
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#include <linux/types.h>
+#include <asm/byteorder.h>
+#include "qedi_hsi.h"
+#include <linux/qed/qed_if.h>
+
+#include "qedi_fw_iscsi.h"
+#include "qedi_fw_scsi.h"
+
+#define SCSI_NUM_SGES_IN_CACHE 0x4
+
+static bool scsi_is_slow_sgl(u16 num_sges, bool small_mid_sge)
+{
+       return (num_sges > SCSI_NUM_SGES_SLOW_SGL_THR && small_mid_sge);
+}
+
+static
+void init_scsi_sgl_context(struct scsi_sgl_params *ctx_sgl_params,
+                          struct scsi_cached_sges *ctx_data_desc,
+                          struct scsi_sgl_task_params *sgl_task_params)
+{
+       u8 sge_index;
+       u8 num_sges;
+       u32 val;
+
+       num_sges = (sgl_task_params->num_sges > SCSI_NUM_SGES_IN_CACHE) ?
+                            SCSI_NUM_SGES_IN_CACHE : sgl_task_params->num_sges;
+
+       /* sgl params */
+       val = cpu_to_le32(sgl_task_params->sgl_phys_addr.lo);
+       ctx_sgl_params->sgl_addr.lo = val;
+       val = cpu_to_le32(sgl_task_params->sgl_phys_addr.hi);
+       ctx_sgl_params->sgl_addr.hi = val;
+       val = cpu_to_le32(sgl_task_params->total_buffer_size);
+       ctx_sgl_params->sgl_total_length = val;
+       ctx_sgl_params->sgl_num_sges = cpu_to_le16(sgl_task_params->num_sges);
+
+       for (sge_index = 0; sge_index < num_sges; sge_index++) {
+               val = cpu_to_le32(sgl_task_params->sgl[sge_index].sge_addr.lo);
+               ctx_data_desc->sge[sge_index].sge_addr.lo = val;
+               val = cpu_to_le32(sgl_task_params->sgl[sge_index].sge_addr.hi);
+               ctx_data_desc->sge[sge_index].sge_addr.hi = val;
+               val = cpu_to_le32(sgl_task_params->sgl[sge_index].sge_len);
+               ctx_data_desc->sge[sge_index].sge_len = val;
+       }
+}
+
+static u32 calc_rw_task_size(struct iscsi_task_params *task_params,
+                            enum iscsi_task_type task_type,
+                            struct scsi_sgl_task_params *sgl_task_params,
+                            struct scsi_dif_task_params *dif_task_params)
+{
+       u32 io_size;
+
+       if (task_type == ISCSI_TASK_TYPE_INITIATOR_WRITE ||
+           task_type == ISCSI_TASK_TYPE_TARGET_READ)
+               io_size = task_params->tx_io_size;
+       else
+               io_size = task_params->rx_io_size;
+
+       if (!io_size)
+               return 0;
+
+       if (!dif_task_params)
+               return io_size;
+
+       return !dif_task_params->dif_on_network ?
+              io_size : sgl_task_params->total_buffer_size;
+}
+
+static void
+init_dif_context_flags(struct iscsi_dif_flags *ctx_dif_flags,
+                      struct scsi_dif_task_params *dif_task_params)
+{
+       if (!dif_task_params)
+               return;
+
+       SET_FIELD(ctx_dif_flags->flags, ISCSI_DIF_FLAGS_PROT_INTERVAL_SIZE_LOG,
+                 dif_task_params->dif_block_size_log);
+       SET_FIELD(ctx_dif_flags->flags, ISCSI_DIF_FLAGS_DIF_TO_PEER,
+                 dif_task_params->dif_on_network ? 1 : 0);
+       SET_FIELD(ctx_dif_flags->flags, ISCSI_DIF_FLAGS_HOST_INTERFACE,
+                 dif_task_params->dif_on_host ? 1 : 0);
+}
+
+static void init_sqe(struct iscsi_task_params *task_params,
+                    struct scsi_sgl_task_params *sgl_task_params,
+                    struct scsi_dif_task_params *dif_task_params,
+                    struct iscsi_common_hdr *pdu_header,
+                    struct scsi_initiator_cmd_params *cmd_params,
+                    enum iscsi_task_type task_type,
+                    bool is_cleanup)
+{
+       if (!task_params->sqe)
+               return;
+
+       memset(task_params->sqe, 0, sizeof(*task_params->sqe));
+       task_params->sqe->task_id = cpu_to_le16(task_params->itid);
+       if (is_cleanup) {
+               SET_FIELD(task_params->sqe->flags, ISCSI_WQE_WQE_TYPE,
+                         ISCSI_WQE_TYPE_TASK_CLEANUP);
+               return;
+       }
+
+       switch (task_type) {
+       case ISCSI_TASK_TYPE_INITIATOR_WRITE:
+       {
+               u32 buf_size = 0;
+               u32 num_sges = 0;
+
+               init_dif_context_flags(&task_params->sqe->prot_flags,
+                                      dif_task_params);
+
+               SET_FIELD(task_params->sqe->flags, ISCSI_WQE_WQE_TYPE,
+                         ISCSI_WQE_TYPE_NORMAL);
+
+               if (task_params->tx_io_size) {
+                       buf_size = calc_rw_task_size(task_params, task_type,
+                                                    sgl_task_params,
+                                                    dif_task_params);
+
+               if (scsi_is_slow_sgl(sgl_task_params->num_sges,
+                                    sgl_task_params->small_mid_sge))
+                       num_sges = ISCSI_WQE_NUM_SGES_SLOWIO;
+               else
+                       num_sges = min(sgl_task_params->num_sges,
+                                      (u16)SCSI_NUM_SGES_SLOW_SGL_THR);
+       }
+
+       SET_FIELD(task_params->sqe->flags, ISCSI_WQE_NUM_SGES, num_sges);
+       SET_FIELD(task_params->sqe->contlen_cdbsize, ISCSI_WQE_CONT_LEN,
+                 buf_size);
+
+       if (GET_FIELD(pdu_header->hdr_second_dword,
+                     ISCSI_CMD_HDR_TOTAL_AHS_LEN))
+               SET_FIELD(task_params->sqe->contlen_cdbsize, ISCSI_WQE_CDB_SIZE,
+                         cmd_params->extended_cdb_sge.sge_len);
+       }
+               break;
+       case ISCSI_TASK_TYPE_INITIATOR_READ:
+               SET_FIELD(task_params->sqe->flags, ISCSI_WQE_WQE_TYPE,
+                         ISCSI_WQE_TYPE_NORMAL);
+
+               if (GET_FIELD(pdu_header->hdr_second_dword,
+                             ISCSI_CMD_HDR_TOTAL_AHS_LEN))
+                       SET_FIELD(task_params->sqe->contlen_cdbsize,
+                                 ISCSI_WQE_CDB_SIZE,
+                                 cmd_params->extended_cdb_sge.sge_len);
+               break;
+       case ISCSI_TASK_TYPE_LOGIN_RESPONSE:
+       case ISCSI_TASK_TYPE_MIDPATH:
+       {
+               bool advance_statsn = true;
+
+               if (task_type == ISCSI_TASK_TYPE_LOGIN_RESPONSE)
+                       SET_FIELD(task_params->sqe->flags, ISCSI_WQE_WQE_TYPE,
+                                 ISCSI_WQE_TYPE_LOGIN);
+               else
+                       SET_FIELD(task_params->sqe->flags, ISCSI_WQE_WQE_TYPE,
+                                 ISCSI_WQE_TYPE_MIDDLE_PATH);
+
+               if (task_type == ISCSI_TASK_TYPE_MIDPATH) {
+                       u8 opcode = GET_FIELD(pdu_header->hdr_first_byte,
+                                             ISCSI_COMMON_HDR_OPCODE);
+
+                       if (opcode != ISCSI_OPCODE_TEXT_RESPONSE &&
+                           (opcode != ISCSI_OPCODE_NOP_IN ||
+                           pdu_header->itt == ISCSI_TTT_ALL_ONES))
+                               advance_statsn = false;
+               }
+
+               SET_FIELD(task_params->sqe->flags, ISCSI_WQE_RESPONSE,
+                         advance_statsn ? 1 : 0);
+
+               if (task_params->tx_io_size) {
+                       SET_FIELD(task_params->sqe->contlen_cdbsize,
+                                 ISCSI_WQE_CONT_LEN, task_params->tx_io_size);
+
+               if (scsi_is_slow_sgl(sgl_task_params->num_sges,
+                                    sgl_task_params->small_mid_sge))
+                       SET_FIELD(task_params->sqe->flags, ISCSI_WQE_NUM_SGES,
+                                 ISCSI_WQE_NUM_SGES_SLOWIO);
+               else
+                       SET_FIELD(task_params->sqe->flags, ISCSI_WQE_NUM_SGES,
+                                 min(sgl_task_params->num_sges,
+                                     (u16)SCSI_NUM_SGES_SLOW_SGL_THR));
+               }
+       }
+               break;
+       default:
+               break;
+       }
+}
+
+static void init_default_iscsi_task(struct iscsi_task_params *task_params,
+                                   struct data_hdr *pdu_header,
+                                   enum iscsi_task_type task_type)
+{
+       struct iscsi_task_context *context;
+       u16 index;
+       u32 val;
+
+       context = task_params->context;
+       memset(context, 0, sizeof(*context));
+
+       for (index = 0; index <
+            ARRAY_SIZE(context->ystorm_st_context.pdu_hdr.data.data);
+            index++) {
+               val = cpu_to_le32(pdu_header->data[index]);
+               context->ystorm_st_context.pdu_hdr.data.data[index] = val;
+       }
+
+       context->mstorm_st_context.task_type = task_type;
+       context->mstorm_ag_context.task_cid =
+                                           cpu_to_le16(task_params->conn_icid);
+
+       SET_FIELD(context->ustorm_ag_context.flags1,
+                 USTORM_ISCSI_TASK_AG_CTX_R2T2RECV, 1);
+
+       context->ustorm_st_context.task_type = task_type;
+       context->ustorm_st_context.cq_rss_number = task_params->cq_rss_number;
+       context->ustorm_ag_context.icid = cpu_to_le16(task_params->conn_icid);
+}
+
+static
+void init_initiator_rw_cdb_ystorm_context(struct ystorm_iscsi_task_st_ctx *ystc,
+                                         struct scsi_initiator_cmd_params *cmd)
+{
+       union iscsi_task_hdr *ctx_pdu_hdr = &ystc->pdu_hdr;
+       u32 val;
+
+       if (!cmd->extended_cdb_sge.sge_len)
+               return;
+
+       SET_FIELD(ctx_pdu_hdr->ext_cdb_cmd.hdr_second_dword,
+                 ISCSI_EXT_CDB_CMD_HDR_CDB_SIZE,
+                 cmd->extended_cdb_sge.sge_len);
+       val = cpu_to_le32(cmd->extended_cdb_sge.sge_addr.lo);
+       ctx_pdu_hdr->ext_cdb_cmd.cdb_sge.sge_addr.lo = val;
+       val = cpu_to_le32(cmd->extended_cdb_sge.sge_addr.hi);
+       ctx_pdu_hdr->ext_cdb_cmd.cdb_sge.sge_addr.hi = val;
+       val = cpu_to_le32(cmd->extended_cdb_sge.sge_len);
+       ctx_pdu_hdr->ext_cdb_cmd.cdb_sge.sge_len  = val;
+}
+
+static
+void init_ustorm_task_contexts(struct ustorm_iscsi_task_st_ctx *ustorm_st_cxt,
+                              struct ustorm_iscsi_task_ag_ctx *ustorm_ag_cxt,
+                              u32 remaining_recv_len,
+                              u32 expected_data_transfer_len,
+                              u8 num_sges, bool tx_dif_conn_err_en)
+{
+       u32 val;
+
+       ustorm_st_cxt->rem_rcv_len = cpu_to_le32(remaining_recv_len);
+       ustorm_ag_cxt->exp_data_acked = cpu_to_le32(expected_data_transfer_len);
+       val = cpu_to_le32(expected_data_transfer_len);
+       ustorm_st_cxt->exp_data_transfer_len = val;
+       SET_FIELD(ustorm_st_cxt->reg1.reg1_map, ISCSI_REG1_NUM_SGES, num_sges);
+       SET_FIELD(ustorm_ag_cxt->flags2,
+                 USTORM_ISCSI_TASK_AG_CTX_DIF_ERROR_CF_EN,
+                 tx_dif_conn_err_en ? 1 : 0);
+}
+
+static
+void set_rw_exp_data_acked_and_cont_len(struct iscsi_task_context *context,
+                                       struct iscsi_conn_params  *conn_params,
+                                       enum iscsi_task_type task_type,
+                                       u32 task_size,
+                                       u32 exp_data_transfer_len,
+                                       u8 total_ahs_length)
+{
+       u32 max_unsolicited_data = 0, val;
+
+       if (total_ahs_length &&
+           (task_type == ISCSI_TASK_TYPE_INITIATOR_WRITE ||
+            task_type == ISCSI_TASK_TYPE_INITIATOR_READ))
+               SET_FIELD(context->ustorm_st_context.flags2,
+                         USTORM_ISCSI_TASK_ST_CTX_AHS_EXIST, 1);
+
+       switch (task_type) {
+       case ISCSI_TASK_TYPE_INITIATOR_WRITE:
+               if (!conn_params->initial_r2t)
+                       max_unsolicited_data = conn_params->first_burst_length;
+               else if (conn_params->immediate_data)
+                       max_unsolicited_data =
+                                         min(conn_params->first_burst_length,
+                                             conn_params->max_send_pdu_length);
+
+               context->ustorm_ag_context.exp_data_acked =
+                                  cpu_to_le32(total_ahs_length == 0 ?
+                                               min(exp_data_transfer_len,
+                                                   max_unsolicited_data) :
+                                               ((u32)(total_ahs_length +
+                                                      ISCSI_AHS_CNTL_SIZE)));
+               break;
+       case ISCSI_TASK_TYPE_TARGET_READ:
+               val = cpu_to_le32(exp_data_transfer_len);
+               context->ustorm_ag_context.exp_data_acked = val;
+               break;
+       case ISCSI_TASK_TYPE_INITIATOR_READ:
+               context->ustorm_ag_context.exp_data_acked =
+                                       cpu_to_le32((total_ahs_length == 0 ? 0 :
+                                                    total_ahs_length +
+                                                    ISCSI_AHS_CNTL_SIZE));
+               break;
+       case ISCSI_TASK_TYPE_TARGET_WRITE:
+               val = cpu_to_le32(task_size);
+               context->ustorm_ag_context.exp_cont_len = val;
+               break;
+       default:
+               break;
+       }
+}
+
+static
+void init_rtdif_task_context(struct rdif_task_context *rdif_context,
+                            struct tdif_task_context *tdif_context,
+                            struct scsi_dif_task_params *dif_task_params,
+                            enum iscsi_task_type task_type)
+{
+       u32 val;
+
+       if (!dif_task_params->dif_on_network || !dif_task_params->dif_on_host)
+               return;
+
+       if (task_type == ISCSI_TASK_TYPE_TARGET_WRITE ||
+           task_type == ISCSI_TASK_TYPE_INITIATOR_READ) {
+               rdif_context->app_tag_value =
+                                 cpu_to_le16(dif_task_params->application_tag);
+               rdif_context->partial_crc_value = cpu_to_le16(0xffff);
+               val = cpu_to_le32(dif_task_params->initial_ref_tag);
+               rdif_context->initial_ref_tag = val;
+               rdif_context->app_tag_mask =
+                            cpu_to_le16(dif_task_params->application_tag_mask);
+               SET_FIELD(rdif_context->flags0, RDIF_TASK_CONTEXT_CRC_SEED,
+                         dif_task_params->crc_seed ? 1 : 0);
+               SET_FIELD(rdif_context->flags0, RDIF_TASK_CONTEXT_HOSTGUARDTYPE,
+                         dif_task_params->host_guard_type);
+               SET_FIELD(rdif_context->flags0,
+                         RDIF_TASK_CONTEXT_PROTECTIONTYPE,
+                         dif_task_params->protection_type);
+               SET_FIELD(rdif_context->flags0,
+                         RDIF_TASK_CONTEXT_INITIALREFTAGVALID, 1);
+               SET_FIELD(rdif_context->flags0,
+                         RDIF_TASK_CONTEXT_KEEPREFTAGCONST,
+                         dif_task_params->keep_ref_tag_const ? 1 : 0);
+               SET_FIELD(rdif_context->flags1,
+                         RDIF_TASK_CONTEXT_VALIDATEAPPTAG,
+                         (dif_task_params->validate_app_tag &&
+                         dif_task_params->dif_on_network) ? 1 : 0);
+               SET_FIELD(rdif_context->flags1,
+                         RDIF_TASK_CONTEXT_VALIDATEGUARD,
+                         (dif_task_params->validate_guard &&
+                         dif_task_params->dif_on_network) ? 1 : 0);
+               SET_FIELD(rdif_context->flags1,
+                         RDIF_TASK_CONTEXT_VALIDATEREFTAG,
+                         (dif_task_params->validate_ref_tag &&
+                         dif_task_params->dif_on_network) ? 1 : 0);
+               SET_FIELD(rdif_context->flags1,
+                         RDIF_TASK_CONTEXT_HOSTINTERFACE,
+                         dif_task_params->dif_on_host ? 1 : 0);
+               SET_FIELD(rdif_context->flags1,
+                         RDIF_TASK_CONTEXT_NETWORKINTERFACE,
+                         dif_task_params->dif_on_network ? 1 : 0);
+               SET_FIELD(rdif_context->flags1,
+                         RDIF_TASK_CONTEXT_FORWARDGUARD,
+                         dif_task_params->forward_guard ? 1 : 0);
+               SET_FIELD(rdif_context->flags1,
+                         RDIF_TASK_CONTEXT_FORWARDAPPTAG,
+                         dif_task_params->forward_app_tag ? 1 : 0);
+               SET_FIELD(rdif_context->flags1,
+                         RDIF_TASK_CONTEXT_FORWARDREFTAG,
+                         dif_task_params->forward_ref_tag ? 1 : 0);
+               SET_FIELD(rdif_context->flags1,
+                         RDIF_TASK_CONTEXT_FORWARDAPPTAGWITHMASK,
+                         dif_task_params->forward_app_tag_with_mask ? 1 : 0);
+               SET_FIELD(rdif_context->flags1,
+                         RDIF_TASK_CONTEXT_FORWARDREFTAGWITHMASK,
+                         dif_task_params->forward_ref_tag_with_mask ? 1 : 0);
+               SET_FIELD(rdif_context->flags1,
+                         RDIF_TASK_CONTEXT_INTERVALSIZE,
+                         dif_task_params->dif_block_size_log - 9);
+               SET_FIELD(rdif_context->state,
+                         RDIF_TASK_CONTEXT_REFTAGMASK,
+                         dif_task_params->ref_tag_mask);
+               SET_FIELD(rdif_context->state, RDIF_TASK_CONTEXT_IGNOREAPPTAG,
+                         dif_task_params->ignore_app_tag);
+       }
+
+       if (task_type == ISCSI_TASK_TYPE_TARGET_READ ||
+           task_type == ISCSI_TASK_TYPE_INITIATOR_WRITE) {
+               tdif_context->app_tag_value =
+                                 cpu_to_le16(dif_task_params->application_tag);
+               tdif_context->partial_crc_valueB =
+                      cpu_to_le16(dif_task_params->crc_seed ? 0xffff : 0x0000);
+               tdif_context->partial_crc_value_a =
+                      cpu_to_le16(dif_task_params->crc_seed ? 0xffff : 0x0000);
+               SET_FIELD(tdif_context->flags0, TDIF_TASK_CONTEXT_CRC_SEED,
+                         dif_task_params->crc_seed ? 1 : 0);
+
+               SET_FIELD(tdif_context->flags0,
+                         TDIF_TASK_CONTEXT_SETERRORWITHEOP,
+                         dif_task_params->tx_dif_conn_err_en ? 1 : 0);
+               SET_FIELD(tdif_context->flags1, TDIF_TASK_CONTEXT_FORWARDGUARD,
+                         dif_task_params->forward_guard   ? 1 : 0);
+               SET_FIELD(tdif_context->flags1, TDIF_TASK_CONTEXT_FORWARDAPPTAG,
+                         dif_task_params->forward_app_tag ? 1 : 0);
+               SET_FIELD(tdif_context->flags1, TDIF_TASK_CONTEXT_FORWARDREFTAG,
+                         dif_task_params->forward_ref_tag ? 1 : 0);
+               SET_FIELD(tdif_context->flags1, TDIF_TASK_CONTEXT_INTERVALSIZE,
+                         dif_task_params->dif_block_size_log - 9);
+               SET_FIELD(tdif_context->flags1, TDIF_TASK_CONTEXT_HOSTINTERFACE,
+                         dif_task_params->dif_on_host    ? 1 : 0);
+               SET_FIELD(tdif_context->flags1,
+                         TDIF_TASK_CONTEXT_NETWORKINTERFACE,
+                         dif_task_params->dif_on_network ? 1 : 0);
+               val = cpu_to_le32(dif_task_params->initial_ref_tag);
+               tdif_context->initial_ref_tag = val;
+               tdif_context->app_tag_mask =
+                            cpu_to_le16(dif_task_params->application_tag_mask);
+               SET_FIELD(tdif_context->flags0,
+                         TDIF_TASK_CONTEXT_HOSTGUARDTYPE,
+                         dif_task_params->host_guard_type);
+               SET_FIELD(tdif_context->flags0,
+                         TDIF_TASK_CONTEXT_PROTECTIONTYPE,
+                         dif_task_params->protection_type);
+               SET_FIELD(tdif_context->flags0,
+                         TDIF_TASK_CONTEXT_INITIALREFTAGVALID,
+                         dif_task_params->initial_ref_tag_is_valid ? 1 : 0);
+               SET_FIELD(tdif_context->flags0,
+                         TDIF_TASK_CONTEXT_KEEPREFTAGCONST,
+                         dif_task_params->keep_ref_tag_const ? 1 : 0);
+               SET_FIELD(tdif_context->flags1, TDIF_TASK_CONTEXT_VALIDATEGUARD,
+                         (dif_task_params->validate_guard &&
+                          dif_task_params->dif_on_host) ? 1 : 0);
+               SET_FIELD(tdif_context->flags1,
+                         TDIF_TASK_CONTEXT_VALIDATEAPPTAG,
+                         (dif_task_params->validate_app_tag &&
+                         dif_task_params->dif_on_host) ? 1 : 0);
+               SET_FIELD(tdif_context->flags1,
+                         TDIF_TASK_CONTEXT_VALIDATEREFTAG,
+                         (dif_task_params->validate_ref_tag &&
+                          dif_task_params->dif_on_host) ? 1 : 0);
+               SET_FIELD(tdif_context->flags1,
+                         TDIF_TASK_CONTEXT_FORWARDAPPTAGWITHMASK,
+                         dif_task_params->forward_app_tag_with_mask ? 1 : 0);
+               SET_FIELD(tdif_context->flags1,
+                         TDIF_TASK_CONTEXT_FORWARDREFTAGWITHMASK,
+                         dif_task_params->forward_ref_tag_with_mask ? 1 : 0);
+               SET_FIELD(tdif_context->flags1,
+                         TDIF_TASK_CONTEXT_REFTAGMASK,
+                         dif_task_params->ref_tag_mask);
+               SET_FIELD(tdif_context->flags0,
+                         TDIF_TASK_CONTEXT_IGNOREAPPTAG,
+                         dif_task_params->ignore_app_tag ? 1 : 0);
+       }
+}
+
+static void set_local_completion_context(struct iscsi_task_context *context)
+{
+       SET_FIELD(context->ystorm_st_context.state.flags,
+                 YSTORM_ISCSI_TASK_STATE_LOCAL_COMP, 1);
+       SET_FIELD(context->ustorm_st_context.flags,
+                 USTORM_ISCSI_TASK_ST_CTX_LOCAL_COMP, 1);
+}
+
+static int init_rw_iscsi_task(struct iscsi_task_params *task_params,
+                             enum iscsi_task_type task_type,
+                             struct iscsi_conn_params *conn_params,
+                             struct iscsi_common_hdr *pdu_header,
+                             struct scsi_sgl_task_params *sgl_task_params,
+                             struct scsi_initiator_cmd_params *cmd_params,
+                             struct scsi_dif_task_params *dif_task_params)
+{
+       u32 exp_data_transfer_len = conn_params->max_burst_length;
+       struct iscsi_task_context *cxt;
+       bool slow_io = false;
+       u32 task_size, val;
+       u8 num_sges = 0;
+
+       task_size = calc_rw_task_size(task_params, task_type, sgl_task_params,
+                                     dif_task_params);
+
+       init_default_iscsi_task(task_params, (struct data_hdr *)pdu_header,
+                               task_type);
+
+       cxt = task_params->context;
+
+       val = cpu_to_le32(task_size);
+       cxt->ystorm_st_context.pdu_hdr.cmd.expected_transfer_length = val;
+       init_initiator_rw_cdb_ystorm_context(&cxt->ystorm_st_context,
+                                            cmd_params);
+       val = cpu_to_le32(cmd_params->sense_data_buffer_phys_addr.lo);
+       cxt->mstorm_st_context.sense_db.lo = val;
+
+       val = cpu_to_le32(cmd_params->sense_data_buffer_phys_addr.hi);
+       cxt->mstorm_st_context.sense_db.hi = val;
+
+       if (task_params->tx_io_size) {
+               init_dif_context_flags(&cxt->ystorm_st_context.state.dif_flags,
+                                      dif_task_params);
+               init_scsi_sgl_context(&cxt->ystorm_st_context.state.sgl_params,
+                                     &cxt->ystorm_st_context.state.data_desc,
+                                     sgl_task_params);
+
+               slow_io = scsi_is_slow_sgl(sgl_task_params->num_sges,
+                                          sgl_task_params->small_mid_sge);
+
+               num_sges = !slow_io ? min_t(u16, sgl_task_params->num_sges,
+                                           (u16)SCSI_NUM_SGES_SLOW_SGL_THR) :
+                                     ISCSI_WQE_NUM_SGES_SLOWIO;
+
+               if (slow_io) {
+                       SET_FIELD(cxt->ystorm_st_context.state.flags,
+                                 YSTORM_ISCSI_TASK_STATE_SLOW_IO, 1);
+               }
+       } else if (task_params->rx_io_size) {
+               init_dif_context_flags(&cxt->mstorm_st_context.dif_flags,
+                                      dif_task_params);
+               init_scsi_sgl_context(&cxt->mstorm_st_context.sgl_params,
+                                     &cxt->mstorm_st_context.data_desc,
+                                     sgl_task_params);
+               num_sges = !scsi_is_slow_sgl(sgl_task_params->num_sges,
+                               sgl_task_params->small_mid_sge) ?
+                               min_t(u16, sgl_task_params->num_sges,
+                                     (u16)SCSI_NUM_SGES_SLOW_SGL_THR) :
+                               ISCSI_WQE_NUM_SGES_SLOWIO;
+               cxt->mstorm_st_context.rem_task_size = cpu_to_le32(task_size);
+       }
+
+       if (exp_data_transfer_len > task_size  ||
+           task_type != ISCSI_TASK_TYPE_TARGET_WRITE)
+               exp_data_transfer_len = task_size;
+
+       init_ustorm_task_contexts(&task_params->context->ustorm_st_context,
+                                 &task_params->context->ustorm_ag_context,
+                                 task_size, exp_data_transfer_len, num_sges,
+                                 dif_task_params ?
+                                 dif_task_params->tx_dif_conn_err_en : false);
+
+       set_rw_exp_data_acked_and_cont_len(task_params->context, conn_params,
+                                          task_type, task_size,
+                                          exp_data_transfer_len,
+                                       GET_FIELD(pdu_header->hdr_second_dword,
+                                                 ISCSI_CMD_HDR_TOTAL_AHS_LEN));
+
+       if (dif_task_params)
+               init_rtdif_task_context(&task_params->context->rdif_context,
+                                       &task_params->context->tdif_context,
+                                       dif_task_params, task_type);
+
+       init_sqe(task_params, sgl_task_params, dif_task_params, pdu_header,
+                cmd_params, task_type, false);
+
+       return 0;
+}
+
+int init_initiator_rw_iscsi_task(struct iscsi_task_params *task_params,
+                                struct iscsi_conn_params *conn_params,
+                                struct scsi_initiator_cmd_params *cmd_params,
+                                struct iscsi_cmd_hdr *cmd_header,
+                                struct scsi_sgl_task_params *tx_sgl_params,
+                                struct scsi_sgl_task_params *rx_sgl_params,
+                                struct scsi_dif_task_params *dif_task_params)
+{
+       if (GET_FIELD(cmd_header->flags_attr, ISCSI_CMD_HDR_WRITE))
+               return init_rw_iscsi_task(task_params,
+                                         ISCSI_TASK_TYPE_INITIATOR_WRITE,
+                                         conn_params,
+                                         (struct iscsi_common_hdr *)cmd_header,
+                                         tx_sgl_params, cmd_params,
+                                         dif_task_params);
+       else if (GET_FIELD(cmd_header->flags_attr, ISCSI_CMD_HDR_READ))
+               return init_rw_iscsi_task(task_params,
+                                         ISCSI_TASK_TYPE_INITIATOR_READ,
+                                         conn_params,
+                                         (struct iscsi_common_hdr *)cmd_header,
+                                         rx_sgl_params, cmd_params,
+                                         dif_task_params);
+       else
+               return -1;
+}
+
+int init_initiator_login_request_task(struct iscsi_task_params *task_params,
+                                     struct iscsi_login_req_hdr  *login_header,
+                                     struct scsi_sgl_task_params *tx_params,
+                                     struct scsi_sgl_task_params *rx_params)
+{
+       struct iscsi_task_context *cxt;
+
+       cxt = task_params->context;
+
+       init_default_iscsi_task(task_params,
+                               (struct data_hdr *)login_header,
+                               ISCSI_TASK_TYPE_MIDPATH);
+
+       init_ustorm_task_contexts(&cxt->ustorm_st_context,
+                                 &cxt->ustorm_ag_context,
+                                 task_params->rx_io_size ?
+                                 rx_params->total_buffer_size : 0,
+                                 task_params->tx_io_size ?
+                                 tx_params->total_buffer_size : 0, 0,
+                                 0);
+
+       if (task_params->tx_io_size)
+               init_scsi_sgl_context(&cxt->ystorm_st_context.state.sgl_params,
+                                     &cxt->ystorm_st_context.state.data_desc,
+                                     tx_params);
+
+       if (task_params->rx_io_size)
+               init_scsi_sgl_context(&cxt->mstorm_st_context.sgl_params,
+                                     &cxt->mstorm_st_context.data_desc,
+                                     rx_params);
+
+       cxt->mstorm_st_context.rem_task_size =
+                       cpu_to_le32(task_params->rx_io_size ?
+                                   rx_params->total_buffer_size : 0);
+
+       init_sqe(task_params, tx_params, NULL,
+                (struct iscsi_common_hdr *)login_header, NULL,
+                ISCSI_TASK_TYPE_MIDPATH, false);
+
+       return 0;
+}
+
+int init_initiator_nop_out_task(struct iscsi_task_params *task_params,
+                               struct iscsi_nop_out_hdr *nop_out_pdu_header,
+                               struct scsi_sgl_task_params *tx_sgl_task_params,
+                               struct scsi_sgl_task_params *rx_sgl_task_params)
+{
+       struct iscsi_task_context *cxt;
+
+       cxt = task_params->context;
+
+       init_default_iscsi_task(task_params,
+                               (struct data_hdr *)nop_out_pdu_header,
+                               ISCSI_TASK_TYPE_MIDPATH);
+
+       if (nop_out_pdu_header->itt == ISCSI_ITT_ALL_ONES)
+               set_local_completion_context(task_params->context);
+
+       if (task_params->tx_io_size)
+               init_scsi_sgl_context(&cxt->ystorm_st_context.state.sgl_params,
+                                     &cxt->ystorm_st_context.state.data_desc,
+                                     tx_sgl_task_params);
+
+       if (task_params->rx_io_size)
+               init_scsi_sgl_context(&cxt->mstorm_st_context.sgl_params,
+                                     &cxt->mstorm_st_context.data_desc,
+                                     rx_sgl_task_params);
+
+       init_ustorm_task_contexts(&cxt->ustorm_st_context,
+                                 &cxt->ustorm_ag_context,
+                                 task_params->rx_io_size ?
+                                 rx_sgl_task_params->total_buffer_size : 0,
+                                 task_params->tx_io_size ?
+                                 tx_sgl_task_params->total_buffer_size : 0,
+                                 0, 0);
+
+       cxt->mstorm_st_context.rem_task_size =
+                               cpu_to_le32(task_params->rx_io_size ?
+                                       rx_sgl_task_params->total_buffer_size :
+                                       0);
+
+       init_sqe(task_params, tx_sgl_task_params, NULL,
+                (struct iscsi_common_hdr *)nop_out_pdu_header, NULL,
+                ISCSI_TASK_TYPE_MIDPATH, false);
+
+       return 0;
+}
+
+int init_initiator_logout_request_task(struct iscsi_task_params *task_params,
+                                      struct iscsi_logout_req_hdr *logout_hdr,
+                                      struct scsi_sgl_task_params *tx_params,
+                                      struct scsi_sgl_task_params *rx_params)
+{
+       struct iscsi_task_context *cxt;
+
+       cxt = task_params->context;
+
+       init_default_iscsi_task(task_params,
+                               (struct data_hdr *)logout_hdr,
+                               ISCSI_TASK_TYPE_MIDPATH);
+
+       if (task_params->tx_io_size)
+               init_scsi_sgl_context(&cxt->ystorm_st_context.state.sgl_params,
+                                     &cxt->ystorm_st_context.state.data_desc,
+                                     tx_params);
+
+       if (task_params->rx_io_size)
+               init_scsi_sgl_context(&cxt->mstorm_st_context.sgl_params,
+                                     &cxt->mstorm_st_context.data_desc,
+                                     rx_params);
+
+       init_ustorm_task_contexts(&cxt->ustorm_st_context,
+                                 &cxt->ustorm_ag_context,
+                                 task_params->rx_io_size ?
+                                 rx_params->total_buffer_size : 0,
+                                 task_params->tx_io_size ?
+                                 tx_params->total_buffer_size : 0,
+                                 0, 0);
+
+       cxt->mstorm_st_context.rem_task_size =
+                                       cpu_to_le32(task_params->rx_io_size ?
+                                       rx_params->total_buffer_size : 0);
+
+       init_sqe(task_params, tx_params, NULL,
+                (struct iscsi_common_hdr *)logout_hdr, NULL,
+                ISCSI_TASK_TYPE_MIDPATH, false);
+
+       return 0;
+}
+
+int init_initiator_tmf_request_task(struct iscsi_task_params *task_params,
+                                   struct iscsi_tmf_request_hdr *tmf_header)
+{
+       init_default_iscsi_task(task_params, (struct data_hdr *)tmf_header,
+                               ISCSI_TASK_TYPE_MIDPATH);
+
+       init_sqe(task_params, NULL, NULL,
+                (struct iscsi_common_hdr *)tmf_header, NULL,
+                ISCSI_TASK_TYPE_MIDPATH, false);
+
+       return 0;
+}
+
+int init_initiator_text_request_task(struct iscsi_task_params *task_params,
+                                    struct iscsi_text_request_hdr *text_header,
+                                    struct scsi_sgl_task_params *tx_params,
+                                    struct scsi_sgl_task_params *rx_params)
+{
+       struct iscsi_task_context *cxt;
+
+       cxt = task_params->context;
+
+       init_default_iscsi_task(task_params,
+                               (struct data_hdr *)text_header,
+                               ISCSI_TASK_TYPE_MIDPATH);
+
+       if (task_params->tx_io_size)
+               init_scsi_sgl_context(&cxt->ystorm_st_context.state.sgl_params,
+                                     &cxt->ystorm_st_context.state.data_desc,
+                                     tx_params);
+
+       if (task_params->rx_io_size)
+               init_scsi_sgl_context(&cxt->mstorm_st_context.sgl_params,
+                                     &cxt->mstorm_st_context.data_desc,
+                                     rx_params);
+
+       cxt->mstorm_st_context.rem_task_size =
+                               cpu_to_le32(task_params->rx_io_size ?
+                                       rx_params->total_buffer_size : 0);
+
+       init_ustorm_task_contexts(&cxt->ustorm_st_context,
+                                 &cxt->ustorm_ag_context,
+                                 task_params->rx_io_size ?
+                                 rx_params->total_buffer_size : 0,
+                                 task_params->tx_io_size ?
+                                 tx_params->total_buffer_size : 0, 0, 0);
+
+       init_sqe(task_params, tx_params, NULL,
+                (struct iscsi_common_hdr *)text_header, NULL,
+                ISCSI_TASK_TYPE_MIDPATH, false);
+
+       return 0;
+}
+
+int init_cleanup_task(struct iscsi_task_params *task_params)
+{
+       init_sqe(task_params, NULL, NULL, NULL, NULL, ISCSI_TASK_TYPE_MIDPATH,
+                true);
+       return 0;
+}
diff --git a/drivers/scsi/qedi/qedi_fw_iscsi.h b/drivers/scsi/qedi/qedi_fw_iscsi.h
new file mode 100644 (file)
index 0000000..b6f24f9
--- /dev/null
@@ -0,0 +1,117 @@
+/*
+ * QLogic iSCSI Offload Driver
+ * Copyright (c) 2016 Cavium Inc.
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#ifndef _QEDI_FW_ISCSI_H_
+#define _QEDI_FW_ISCSI_H_
+
+#include "qedi_fw_scsi.h"
+
+struct iscsi_task_params {
+       struct iscsi_task_context *context;
+       struct iscsi_wqe          *sqe;
+       u32                       tx_io_size;
+       u32                       rx_io_size;
+       u16                       conn_icid;
+       u16                       itid;
+       u8                        cq_rss_number;
+};
+
+struct iscsi_conn_params {
+       u32     first_burst_length;
+       u32     max_send_pdu_length;
+       u32     max_burst_length;
+       bool    initial_r2t;
+       bool    immediate_data;
+};
+
+/* @brief init_initiator_read_iscsi_task - initializes iSCSI Initiator Read
+ * task context.
+ *
+ * @param task_params    - Pointer to task parameters struct
+ * @param conn_params    - Connection Parameters
+ * @param cmd_params     - command specific parameters
+ * @param cmd_pdu_header  - PDU Header Parameters
+ * @param sgl_task_params - Pointer to SGL task params
+ * @param dif_task_params - Pointer to DIF parameters struct
+ */
+int init_initiator_rw_iscsi_task(struct iscsi_task_params *task_params,
+                                struct iscsi_conn_params *conn_params,
+                                struct scsi_initiator_cmd_params *cmd_params,
+                                struct iscsi_cmd_hdr *cmd_pdu_header,
+                                struct scsi_sgl_task_params *tx_sgl_params,
+                                struct scsi_sgl_task_params *rx_sgl_params,
+                                struct scsi_dif_task_params *dif_task_params);
+
+/* @brief init_initiator_login_request_task - initializes iSCSI Initiator Login
+ * Request task context.
+ *
+ * @param task_params            - Pointer to task parameters struct
+ * @param login_req_pdu_header    - PDU Header Parameters
+ * @param tx_sgl_task_params     - Pointer to SGL task params
+ * @param rx_sgl_task_params     - Pointer to SGL task params
+ */
+int init_initiator_login_request_task(struct iscsi_task_params *task_params,
+                                     struct iscsi_login_req_hdr *login_header,
+                                     struct scsi_sgl_task_params *tx_params,
+                                     struct scsi_sgl_task_params *rx_params);
+
+/* @brief init_initiator_nop_out_task - initializes iSCSI Initiator NOP Out
+ * task context.
+ *
+ * @param task_params          - Pointer to task parameters struct
+ * @param nop_out_pdu_header    - PDU Header Parameters
+ * @param tx_sgl_task_params   - Pointer to SGL task params
+ * @param rx_sgl_task_params   - Pointer to SGL task params
+ */
+int init_initiator_nop_out_task(struct iscsi_task_params *task_params,
+                               struct iscsi_nop_out_hdr *nop_out_pdu_header,
+                               struct scsi_sgl_task_params *tx_sgl_params,
+                               struct scsi_sgl_task_params *rx_sgl_params);
+
+/* @brief init_initiator_logout_request_task - initializes iSCSI Initiator
+ * Logout Request task context.
+ *
+ * @param task_params          - Pointer to task parameters struct
+ * @param logout_pdu_header  - PDU Header Parameters
+ * @param tx_sgl_task_params   - Pointer to SGL task params
+ * @param rx_sgl_task_params   - Pointer to SGL task params
+ */
+int init_initiator_logout_request_task(struct iscsi_task_params *task_params,
+                                      struct iscsi_logout_req_hdr *logout_hdr,
+                                      struct scsi_sgl_task_params *tx_params,
+                                      struct scsi_sgl_task_params *rx_params);
+
+/* @brief init_initiator_tmf_request_task - initializes iSCSI Initiator TMF
+ * task context.
+ *
+ * @param task_params  - Pointer to task parameters struct
+ * @param tmf_pdu_header - PDU Header Parameters
+ */
+int init_initiator_tmf_request_task(struct iscsi_task_params *task_params,
+                                   struct iscsi_tmf_request_hdr *tmf_header);
+
+/* @brief init_initiator_text_request_task - initializes iSCSI Initiator Text
+ * Request task context.
+ *
+ * @param task_params               - Pointer to task parameters struct
+ * @param text_request_pdu_header    - PDU Header Parameters
+ * @param tx_sgl_task_params        - Pointer to Tx SGL task params
+ * @param rx_sgl_task_params        - Pointer to Rx SGL task params
+ */
+int init_initiator_text_request_task(struct iscsi_task_params *task_params,
+                                    struct iscsi_text_request_hdr *text_header,
+                                    struct scsi_sgl_task_params *tx_params,
+                                    struct scsi_sgl_task_params *rx_params);
+
+/* @brief init_cleanup_task - initializes Clean task (SQE)
+ *
+ * @param task_params - Pointer to task parameters struct
+ */
+int init_cleanup_task(struct iscsi_task_params *task_params);
+#endif
diff --git a/drivers/scsi/qedi/qedi_fw_scsi.h b/drivers/scsi/qedi/qedi_fw_scsi.h
new file mode 100644 (file)
index 0000000..cdaf918
--- /dev/null
@@ -0,0 +1,55 @@
+/*
+ * QLogic iSCSI Offload Driver
+ * Copyright (c) 2016 Cavium Inc.
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#ifndef _QEDI_FW_SCSI_H_
+#define _QEDI_FW_SCSI_H_
+
+#include <linux/types.h>
+#include <asm/byteorder.h>
+#include "qedi_hsi.h"
+#include <linux/qed/qed_if.h>
+
+struct scsi_sgl_task_params {
+       struct scsi_sge *sgl;
+       struct regpair  sgl_phys_addr;
+       u32             total_buffer_size;
+       u16             num_sges;
+       bool            small_mid_sge;
+};
+
+struct scsi_dif_task_params {
+       u32     initial_ref_tag;
+       bool    initial_ref_tag_is_valid;
+       u16     application_tag;
+       u16     application_tag_mask;
+       u16     dif_block_size_log;
+       bool    dif_on_network;
+       bool    dif_on_host;
+       u8      host_guard_type;
+       u8      protection_type;
+       u8      ref_tag_mask;
+       bool    crc_seed;
+       bool    tx_dif_conn_err_en;
+       bool    ignore_app_tag;
+       bool    keep_ref_tag_const;
+       bool    validate_guard;
+       bool    validate_app_tag;
+       bool    validate_ref_tag;
+       bool    forward_guard;
+       bool    forward_app_tag;
+       bool    forward_ref_tag;
+       bool    forward_app_tag_with_mask;
+       bool    forward_ref_tag_with_mask;
+};
+
+struct scsi_initiator_cmd_params {
+       struct scsi_sge extended_cdb_sge;
+       struct regpair  sense_data_buffer_phys_addr;
+};
+#endif
index 4cc474364c50568806b16520ddd66239b9f3ebfd..d1de172bebac626b9b61b2da4e1588a36519bb9d 100644 (file)
@@ -175,7 +175,7 @@ static void qedi_destroy_cmd_pool(struct qedi_ctx *qedi,
                if (cmd->io_tbl.sge_tbl)
                        dma_free_coherent(&qedi->pdev->dev,
                                          QEDI_ISCSI_MAX_BDS_PER_CMD *
-                                         sizeof(struct iscsi_sge),
+                                         sizeof(struct scsi_sge),
                                          cmd->io_tbl.sge_tbl,
                                          cmd->io_tbl.sge_tbl_dma);
 
@@ -191,7 +191,7 @@ static int qedi_alloc_sget(struct qedi_ctx *qedi, struct iscsi_session *session,
                           struct qedi_cmd *cmd)
 {
        struct qedi_io_bdt *io = &cmd->io_tbl;
-       struct iscsi_sge *sge;
+       struct scsi_sge *sge;
 
        io->sge_tbl = dma_alloc_coherent(&qedi->pdev->dev,
                                         QEDI_ISCSI_MAX_BDS_PER_CMD *
@@ -708,22 +708,20 @@ static void qedi_conn_get_stats(struct iscsi_cls_conn *cls_conn,
 
 static void qedi_iscsi_prep_generic_pdu_bd(struct qedi_conn *qedi_conn)
 {
-       struct iscsi_sge *bd_tbl;
+       struct scsi_sge *bd_tbl;
 
-       bd_tbl = (struct iscsi_sge *)qedi_conn->gen_pdu.req_bd_tbl;
+       bd_tbl = (struct scsi_sge *)qedi_conn->gen_pdu.req_bd_tbl;
 
        bd_tbl->sge_addr.hi =
                (u32)((u64)qedi_conn->gen_pdu.req_dma_addr >> 32);
        bd_tbl->sge_addr.lo = (u32)qedi_conn->gen_pdu.req_dma_addr;
        bd_tbl->sge_len = qedi_conn->gen_pdu.req_wr_ptr -
                                qedi_conn->gen_pdu.req_buf;
-       bd_tbl->reserved0 = 0;
-       bd_tbl = (struct iscsi_sge  *)qedi_conn->gen_pdu.resp_bd_tbl;
+       bd_tbl = (struct scsi_sge  *)qedi_conn->gen_pdu.resp_bd_tbl;
        bd_tbl->sge_addr.hi =
                        (u32)((u64)qedi_conn->gen_pdu.resp_dma_addr >> 32);
        bd_tbl->sge_addr.lo = (u32)qedi_conn->gen_pdu.resp_dma_addr;
        bd_tbl->sge_len = ISCSI_DEF_MAX_RECV_SEG_LEN;
-       bd_tbl->reserved0 = 0;
 }
 
 static int qedi_iscsi_send_generic_request(struct iscsi_task *task)
index d3c06bbddb4e8195d0ba1809a1ed6ad671d402c1..3247287cb0e7e5c4b16a2ad043ec636754e77266 100644 (file)
@@ -102,7 +102,7 @@ struct qedi_endpoint {
 #define QEDI_SQ_WQES_MIN       16
 
 struct qedi_io_bdt {
-       struct iscsi_sge *sge_tbl;
+       struct scsi_sge *sge_tbl;
        dma_addr_t sge_tbl_dma;
        u16 sge_valid;
 };
index 9543a1b139d4e9a00fbac1c9c8474d6bec0e7653..d61e3ac22e675bd1145003c9064e885de116d108 100644 (file)
@@ -7,8 +7,8 @@
  * this source tree.
  */
 
-#define QEDI_MODULE_VERSION    "8.10.3.0"
+#define QEDI_MODULE_VERSION    "8.10.4.0"
 #define QEDI_DRIVER_MAJOR_VER          8
 #define QEDI_DRIVER_MINOR_VER          10
-#define QEDI_DRIVER_REV_VER            3
+#define QEDI_DRIVER_REV_VER            4
 #define QEDI_DRIVER_ENG_VER            0
index 67c0d5aa32125ca135ccb6cc2bd83af76b0ffd1b..de952935b5d2ca572d618e2a8802a1e035c0fbdb 100644 (file)
@@ -3,6 +3,7 @@ config SCSI_QLA_FC
        depends on PCI && SCSI
        depends on SCSI_FC_ATTRS
        select FW_LOADER
+       select BTREE
        ---help---
        This qla2xxx driver supports all QLogic Fibre Channel
        PCI and PCIe host adapters.
index f610103994afd4c53cbf439db646eb5b44851689..435ff7fd6384a0a4e941efb3d60411e0731d4c1b 100644 (file)
@@ -2154,8 +2154,6 @@ qla24xx_vport_delete(struct fc_vport *fc_vport)
                    "Timer for the VP[%d] has stopped\n", vha->vp_idx);
        }
 
-       BUG_ON(atomic_read(&vha->vref_count));
-
        qla2x00_free_fcports(vha);
 
        mutex_lock(&ha->vport_lock);
@@ -2166,7 +2164,7 @@ qla24xx_vport_delete(struct fc_vport *fc_vport)
        dma_free_coherent(&ha->pdev->dev, vha->gnl.size, vha->gnl.l,
            vha->gnl.ldma);
 
-       if (vha->qpair->vp_idx == vha->vp_idx) {
+       if (vha->qpair && vha->qpair->vp_idx == vha->vp_idx) {
                if (qla2xxx_delete_qpair(vha, vha->qpair) != QLA_SUCCESS)
                        ql_log(ql_log_warn, vha, 0x7087,
                            "Queue Pair delete failed.\n");
index e1fc4e66966aeab7b64bfd4ca9c75ca4da1a5be5..c6bffe929fe7dc54b83ac8d89087b4b0d7e0efca 100644 (file)
@@ -348,6 +348,7 @@ ql_log_pci(uint32_t, struct pci_dev *pdev, int32_t, const char *fmt, ...);
 #define ql_dbg_tgt     0x00004000 /* Target mode */
 #define ql_dbg_tgt_mgt 0x00002000 /* Target mode management */
 #define ql_dbg_tgt_tmr 0x00001000 /* Target mode task management */
+#define ql_dbg_tgt_dif  0x00000800 /* Target mode dif */
 
 extern int qla27xx_dump_mpi_ram(struct qla_hw_data *, uint32_t, uint32_t *,
        uint32_t, void **);
index 625d438e3cce01e39a57bfdd3d581ac24e6a5c55..ae119018dfaae9fe65c5cfe1869cdc655b27a3ea 100644 (file)
@@ -25,6 +25,7 @@
 #include <linux/firmware.h>
 #include <linux/aer.h>
 #include <linux/mutex.h>
+#include <linux/btree.h>
 
 #include <scsi/scsi.h>
 #include <scsi/scsi_host.h>
@@ -395,11 +396,15 @@ struct srb_iocb {
                        struct completion comp;
                } abt;
                struct ct_arg ctarg;
+#define MAX_IOCB_MB_REG 28
+#define SIZEOF_IOCB_MB_REG (MAX_IOCB_MB_REG * sizeof(uint16_t))
                struct {
-                       __le16 in_mb[28];       /* fr fw */
-                       __le16 out_mb[28];      /* to fw */
+                       __le16 in_mb[MAX_IOCB_MB_REG];  /* from FW */
+                       __le16 out_mb[MAX_IOCB_MB_REG]; /* to FW */
                        void *out, *in;
                        dma_addr_t out_dma, in_dma;
+                       struct completion comp;
+                       int rc;
                } mbx;
                struct {
                        struct imm_ntfy_from_isp *ntfy;
@@ -437,7 +442,7 @@ typedef struct srb {
        uint32_t handle;
        uint16_t flags;
        uint16_t type;
-       char *name;
+       const char *name;
        int iocbs;
        struct qla_qpair *qpair;
        u32 gen1;       /* scratch */
@@ -2300,6 +2305,8 @@ typedef struct fc_port {
        struct ct_sns_desc ct_desc;
        enum discovery_state disc_state;
        enum login_state fw_login_state;
+       unsigned long plogi_nack_done_deadline;
+
        u32 login_gen, last_login_gen;
        u32 rscn_gen, last_rscn_gen;
        u32 chip_reset;
@@ -3106,6 +3113,16 @@ struct qla_chip_state_84xx {
        uint32_t gold_fw_version;
 };
 
+struct qla_dif_statistics {
+       uint64_t dif_input_bytes;
+       uint64_t dif_output_bytes;
+       uint64_t dif_input_requests;
+       uint64_t dif_output_requests;
+       uint32_t dif_guard_err;
+       uint32_t dif_ref_tag_err;
+       uint32_t dif_app_tag_err;
+};
+
 struct qla_statistics {
        uint32_t total_isp_aborts;
        uint64_t input_bytes;
@@ -3118,6 +3135,8 @@ struct qla_statistics {
        uint32_t stat_max_pend_cmds;
        uint32_t stat_max_qfull_cmds_alloc;
        uint32_t stat_max_qfull_cmds_dropped;
+
+       struct qla_dif_statistics qla_dif_stats;
 };
 
 struct bidi_statistics {
@@ -3125,6 +3144,16 @@ struct bidi_statistics {
        unsigned long long transfer_bytes;
 };
 
+struct qla_tc_param {
+       struct scsi_qla_host *vha;
+       uint32_t blk_sz;
+       uint32_t bufflen;
+       struct scatterlist *sg;
+       struct scatterlist *prot_sg;
+       struct crc_context *ctx;
+       uint8_t *ctx_dsd_alloced;
+};
+
 /* Multi queue support */
 #define MBC_INITIALIZE_MULTIQ 0x1f
 #define QLA_QUE_PAGE 0X1000
@@ -3272,6 +3301,8 @@ struct qlt_hw_data {
        uint8_t tgt_node_name[WWN_SIZE];
 
        struct dentry *dfs_tgt_sess;
+       struct dentry *dfs_tgt_port_database;
+
        struct list_head q_full_list;
        uint32_t num_pend_cmds;
        uint32_t num_qfull_cmds_alloc;
@@ -3281,6 +3312,7 @@ struct qlt_hw_data {
        spinlock_t sess_lock;
        int rspq_vector_cpuid;
        spinlock_t atio_lock ____cacheline_aligned;
+       struct btree_head32 host_map;
 };
 
 #define MAX_QFULL_CMDS_ALLOC   8192
@@ -3290,6 +3322,10 @@ struct qlt_hw_data {
 
 #define LEAK_EXCHG_THRESH_HOLD_PERCENT 75      /* 75 percent */
 
+#define QLA_EARLY_LINKUP(_ha) \
+       ((_ha->flags.n2n_ae || _ha->flags.lip_ae) && \
+        _ha->flags.fw_started && !_ha->flags.fw_init_done)
+
 /*
  * Qlogic host adapter specific data structure.
 */
@@ -3339,7 +3375,11 @@ struct qla_hw_data {
                uint32_t        fawwpn_enabled:1;
                uint32_t        exlogins_enabled:1;
                uint32_t        exchoffld_enabled:1;
-               /* 35 bits */
+
+               uint32_t        lip_ae:1;
+               uint32_t        n2n_ae:1;
+               uint32_t        fw_started:1;
+               uint32_t        fw_init_done:1;
        } flags;
 
        /* This spinlock is used to protect "io transactions", you must
@@ -3432,7 +3472,6 @@ struct qla_hw_data {
 #define P2P_LOOP  3
        uint8_t         interrupts_on;
        uint32_t        isp_abort_cnt;
-
 #define PCI_DEVICE_ID_QLOGIC_ISP2532    0x2532
 #define PCI_DEVICE_ID_QLOGIC_ISP8432    0x8432
 #define PCI_DEVICE_ID_QLOGIC_ISP8001   0x8001
@@ -3913,6 +3952,7 @@ typedef struct scsi_qla_host {
        struct list_head vp_fcports;    /* list of fcports */
        struct list_head work_list;
        spinlock_t work_lock;
+       struct work_struct iocb_work;
 
        /* Commonly used flags and state information. */
        struct Scsi_Host *host;
@@ -4076,6 +4116,7 @@ typedef struct scsi_qla_host {
        /* Count of active session/fcport */
        int fcport_count;
        wait_queue_head_t fcport_waitQ;
+       wait_queue_head_t vref_waitq;
 } scsi_qla_host_t;
 
 struct qla27xx_image_status {
@@ -4131,14 +4172,17 @@ struct qla2_sgx {
        mb();                                           \
        if (__vha->flags.delete_progress) {             \
                atomic_dec(&__vha->vref_count);         \
+               wake_up(&__vha->vref_waitq);            \
                __bail = 1;                             \
        } else {                                        \
                __bail = 0;                             \
        }                                               \
 } while (0)
 
-#define QLA_VHA_MARK_NOT_BUSY(__vha)                   \
+#define QLA_VHA_MARK_NOT_BUSY(__vha) do {              \
        atomic_dec(&__vha->vref_count);                 \
+       wake_up(&__vha->vref_waitq);                    \
+} while (0)                                            \
 
 #define QLA_QPAIR_MARK_BUSY(__qpair, __bail) do {      \
        atomic_inc(&__qpair->ref_count);                \
index b48cce696bac77e44f7c7579fd1829a40391da2d..989e17b0758cd51ec029204c48eddf37c55c180a 100644 (file)
@@ -19,11 +19,11 @@ qla2x00_dfs_tgt_sess_show(struct seq_file *s, void *unused)
        struct qla_hw_data *ha = vha->hw;
        unsigned long flags;
        struct fc_port *sess = NULL;
-       struct qla_tgt *tgt= vha->vha_tgt.qla_tgt;
+       struct qla_tgt *tgt = vha->vha_tgt.qla_tgt;
 
-       seq_printf(s, "%s\n",vha->host_str);
+       seq_printf(s, "%s\n", vha->host_str);
        if (tgt) {
-               seq_printf(s, "Port ID   Port Name                Handle\n");
+               seq_puts(s, "Port ID   Port Name                Handle\n");
 
                spin_lock_irqsave(&ha->tgt.sess_lock, flags);
                list_for_each_entry(sess, &vha->vp_fcports, list)
@@ -44,7 +44,6 @@ qla2x00_dfs_tgt_sess_open(struct inode *inode, struct file *file)
        return single_open(file, qla2x00_dfs_tgt_sess_show, vha);
 }
 
-
 static const struct file_operations dfs_tgt_sess_ops = {
        .open           = qla2x00_dfs_tgt_sess_open,
        .read           = seq_read,
@@ -52,6 +51,78 @@ static const struct file_operations dfs_tgt_sess_ops = {
        .release        = single_release,
 };
 
+static int
+qla2x00_dfs_tgt_port_database_show(struct seq_file *s, void *unused)
+{
+       scsi_qla_host_t *vha = s->private;
+       struct qla_hw_data *ha = vha->hw;
+       struct gid_list_info *gid_list;
+       dma_addr_t gid_list_dma;
+       fc_port_t fc_port;
+       char *id_iter;
+       int rc, i;
+       uint16_t entries, loop_id;
+       struct qla_tgt *tgt = vha->vha_tgt.qla_tgt;
+
+       seq_printf(s, "%s\n", vha->host_str);
+       if (tgt) {
+               gid_list = dma_alloc_coherent(&ha->pdev->dev,
+                   qla2x00_gid_list_size(ha),
+                   &gid_list_dma, GFP_KERNEL);
+               if (!gid_list) {
+                       ql_dbg(ql_dbg_user, vha, 0x705c,
+                           "DMA allocation failed for %u\n",
+                            qla2x00_gid_list_size(ha));
+                       return 0;
+               }
+
+               rc = qla24xx_gidlist_wait(vha, gid_list, gid_list_dma,
+                   &entries);
+               if (rc != QLA_SUCCESS)
+                       goto out_free_id_list;
+
+               id_iter = (char *)gid_list;
+
+               seq_puts(s, "Port Name  Port ID         Loop ID\n");
+
+               for (i = 0; i < entries; i++) {
+                       struct gid_list_info *gid =
+                           (struct gid_list_info *)id_iter;
+                       loop_id = le16_to_cpu(gid->loop_id);
+                       memset(&fc_port, 0, sizeof(fc_port_t));
+
+                       fc_port.loop_id = loop_id;
+
+                       rc = qla24xx_gpdb_wait(vha, &fc_port, 0);
+                       seq_printf(s, "%8phC  %02x%02x%02x  %d\n",
+                               fc_port.port_name, fc_port.d_id.b.domain,
+                               fc_port.d_id.b.area, fc_port.d_id.b.al_pa,
+                               fc_port.loop_id);
+                       id_iter += ha->gid_list_info_size;
+               }
+out_free_id_list:
+               dma_free_coherent(&ha->pdev->dev, qla2x00_gid_list_size(ha),
+                   gid_list, gid_list_dma);
+       }
+
+       return 0;
+}
+
+static int
+qla2x00_dfs_tgt_port_database_open(struct inode *inode, struct file *file)
+{
+       scsi_qla_host_t *vha = inode->i_private;
+
+       return single_open(file, qla2x00_dfs_tgt_port_database_show, vha);
+}
+
+static const struct file_operations dfs_tgt_port_database_ops = {
+       .open           = qla2x00_dfs_tgt_port_database_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
+
 static int
 qla_dfs_fw_resource_cnt_show(struct seq_file *s, void *unused)
 {
@@ -114,6 +185,21 @@ qla_dfs_tgt_counters_show(struct seq_file *s, void *unused)
        seq_printf(s, "num Q full sent = %lld\n",
                vha->tgt_counters.num_q_full_sent);
 
+       /* DIF stats */
+       seq_printf(s, "DIF Inp Bytes = %lld\n",
+               vha->qla_stats.qla_dif_stats.dif_input_bytes);
+       seq_printf(s, "DIF Outp Bytes = %lld\n",
+               vha->qla_stats.qla_dif_stats.dif_output_bytes);
+       seq_printf(s, "DIF Inp Req = %lld\n",
+               vha->qla_stats.qla_dif_stats.dif_input_requests);
+       seq_printf(s, "DIF Outp Req = %lld\n",
+               vha->qla_stats.qla_dif_stats.dif_output_requests);
+       seq_printf(s, "DIF Guard err = %d\n",
+               vha->qla_stats.qla_dif_stats.dif_guard_err);
+       seq_printf(s, "DIF Ref tag err = %d\n",
+               vha->qla_stats.qla_dif_stats.dif_ref_tag_err);
+       seq_printf(s, "DIF App tag err = %d\n",
+               vha->qla_stats.qla_dif_stats.dif_app_tag_err);
        return 0;
 }
 
@@ -281,6 +367,14 @@ create_nodes:
                goto out;
        }
 
+       ha->tgt.dfs_tgt_port_database = debugfs_create_file("tgt_port_database",
+           S_IRUSR,  ha->dfs_dir, vha, &dfs_tgt_port_database_ops);
+       if (!ha->tgt.dfs_tgt_port_database) {
+               ql_log(ql_log_warn, vha, 0xffff,
+                   "Unable to create debugFS tgt_port_database node.\n");
+               goto out;
+       }
+
        ha->dfs_fce = debugfs_create_file("fce", S_IRUSR, ha->dfs_dir, vha,
            &dfs_fce_ops);
        if (!ha->dfs_fce) {
@@ -311,6 +405,11 @@ qla2x00_dfs_remove(scsi_qla_host_t *vha)
                ha->tgt.dfs_tgt_sess = NULL;
        }
 
+       if (ha->tgt.dfs_tgt_port_database) {
+               debugfs_remove(ha->tgt.dfs_tgt_port_database);
+               ha->tgt.dfs_tgt_port_database = NULL;
+       }
+
        if (ha->dfs_fw_resource_cnt) {
                debugfs_remove(ha->dfs_fw_resource_cnt);
                ha->dfs_fw_resource_cnt = NULL;
index b3d6441d1d90eb27f1908fa27ea1ec28f024b1d9..5b2451745e9f471988e8685d68f3423ec5d5811f 100644 (file)
@@ -193,6 +193,7 @@ extern int qla24xx_post_upd_fcport_work(struct scsi_qla_host *, fc_port_t *);
 void qla2x00_handle_login_done_event(struct scsi_qla_host *, fc_port_t *,
        uint16_t *);
 int qla24xx_post_gnl_work(struct scsi_qla_host *, fc_port_t *);
+int qla24xx_async_abort_cmd(srb_t *);
 
 /*
  * Global Functions in qla_mid.c source file.
@@ -256,11 +257,11 @@ extern unsigned long qla2x00_get_async_timeout(struct scsi_qla_host *);
 extern void *qla2x00_alloc_iocbs(scsi_qla_host_t *, srb_t *);
 extern int qla2x00_issue_marker(scsi_qla_host_t *, int);
 extern int qla24xx_walk_and_build_sglist_no_difb(struct qla_hw_data *, srb_t *,
-       uint32_t *, uint16_t, struct qla_tgt_cmd *);
+       uint32_t *, uint16_t, struct qla_tc_param *);
 extern int qla24xx_walk_and_build_sglist(struct qla_hw_data *, srb_t *,
-       uint32_t *, uint16_t, struct qla_tgt_cmd *);
+       uint32_t *, uint16_t, struct qla_tc_param *);
 extern int qla24xx_walk_and_build_prot_sglist(struct qla_hw_data *, srb_t *,
-       uint32_t *, uint16_t, struct qla_tgt_cmd *);
+       uint32_t *, uint16_t, struct qla_tc_param *);
 extern int qla24xx_get_one_block_sg(uint32_t, struct qla2_sgx *, uint32_t *);
 extern int qla24xx_configure_prot_mode(srb_t *, uint16_t *);
 extern int qla24xx_build_scsi_crc_2_iocbs(srb_t *,
@@ -368,7 +369,7 @@ qla2x00_get_link_status(scsi_qla_host_t *, uint16_t, struct link_statistics *,
 
 extern int
 qla24xx_get_isp_stats(scsi_qla_host_t *, struct link_statistics *,
-    dma_addr_t, uint);
+    dma_addr_t, uint16_t);
 
 extern int qla24xx_abort_command(srb_t *);
 extern int qla24xx_async_abort_command(srb_t *);
@@ -472,6 +473,13 @@ qla2x00_dump_mctp_data(scsi_qla_host_t *, dma_addr_t, uint32_t, uint32_t);
 extern int
 qla26xx_dport_diagnostics(scsi_qla_host_t *, void *, uint, uint);
 
+int qla24xx_send_mb_cmd(struct scsi_qla_host *, mbx_cmd_t *);
+int qla24xx_gpdb_wait(struct scsi_qla_host *, fc_port_t *, u8);
+int qla24xx_gidlist_wait(struct scsi_qla_host *, void *, dma_addr_t,
+    uint16_t *);
+int __qla24xx_parse_gpdb(struct scsi_qla_host *, fc_port_t *,
+       struct port_database_24xx *);
+
 /*
  * Global Function Prototypes in qla_isr.c source file.
  */
@@ -846,5 +854,7 @@ extern struct fc_port *qlt_find_sess_invalidate_other(scsi_qla_host_t *,
        uint64_t wwn, port_id_t port_id, uint16_t loop_id, struct fc_port **);
 void qla24xx_delete_sess_fn(struct work_struct *);
 void qlt_unknown_atio_work_fn(struct work_struct *);
+void qlt_update_host_map(struct scsi_qla_host *, port_id_t);
+void qlt_remove_target_resources(struct qla_hw_data *);
 
 #endif /* _QLA_GBL_H */
index 32fb9007f13770e4cd43650521b67e991a66d3e9..f9d2fe7b1adedf9349c11b7bfaf389c223a21ba8 100644 (file)
@@ -629,7 +629,6 @@ void qla24xx_async_gpdb_sp_done(void *s, int res)
        struct srb *sp = s;
        struct scsi_qla_host *vha = sp->vha;
        struct qla_hw_data *ha = vha->hw;
-       uint64_t zero = 0;
        struct port_database_24xx *pd;
        fc_port_t *fcport = sp->fcport;
        u16 *mb = sp->u.iocb_cmd.u.mbx.in_mb;
@@ -649,48 +648,7 @@ void qla24xx_async_gpdb_sp_done(void *s, int res)
 
        pd = (struct port_database_24xx *)sp->u.iocb_cmd.u.mbx.in;
 
-       /* Check for logged in state. */
-       if (pd->current_login_state != PDS_PRLI_COMPLETE &&
-           pd->last_login_state != PDS_PRLI_COMPLETE) {
-               ql_dbg(ql_dbg_mbx, vha, 0xffff,
-                   "Unable to verify login-state (%x/%x) for "
-                   "loop_id %x.\n", pd->current_login_state,
-                   pd->last_login_state, fcport->loop_id);
-               rval = QLA_FUNCTION_FAILED;
-               goto gpd_error_out;
-       }
-
-       if (fcport->loop_id == FC_NO_LOOP_ID ||
-           (memcmp(fcport->port_name, (uint8_t *)&zero, 8) &&
-               memcmp(fcport->port_name, pd->port_name, 8))) {
-               /* We lost the device mid way. */
-               rval = QLA_NOT_LOGGED_IN;
-               goto gpd_error_out;
-       }
-
-       /* Names are little-endian. */
-       memcpy(fcport->node_name, pd->node_name, WWN_SIZE);
-
-       /* Get port_id of device. */
-       fcport->d_id.b.domain = pd->port_id[0];
-       fcport->d_id.b.area = pd->port_id[1];
-       fcport->d_id.b.al_pa = pd->port_id[2];
-       fcport->d_id.b.rsvd_1 = 0;
-
-       /* If not target must be initiator or unknown type. */
-       if ((pd->prli_svc_param_word_3[0] & BIT_4) == 0)
-               fcport->port_type = FCT_INITIATOR;
-       else
-               fcport->port_type = FCT_TARGET;
-
-       /* Passback COS information. */
-       fcport->supported_classes = (pd->flags & PDF_CLASS_2) ?
-               FC_COS_CLASS2 : FC_COS_CLASS3;
-
-       if (pd->prli_svc_param_word_3[0] & BIT_7) {
-               fcport->flags |= FCF_CONF_COMP_SUPPORTED;
-               fcport->conf_compl_supported = 1;
-       }
+       rval = __qla24xx_parse_gpdb(vha, fcport, pd);
 
 gpd_error_out:
        memset(&ea, 0, sizeof(ea));
@@ -876,10 +834,14 @@ int qla24xx_fcport_handle_login(struct scsi_qla_host *vha, fc_port_t *fcport)
        fcport->login_retry--;
 
        if ((fcport->fw_login_state == DSC_LS_PLOGI_PEND) ||
-           (fcport->fw_login_state == DSC_LS_PLOGI_COMP) ||
            (fcport->fw_login_state == DSC_LS_PRLI_PEND))
                return 0;
 
+       if (fcport->fw_login_state == DSC_LS_PLOGI_COMP) {
+               if (time_before_eq(jiffies, fcport->plogi_nack_done_deadline))
+                       return 0;
+       }
+
        /* for pure Target Mode. Login will not be initiated */
        if (vha->host->active_mode == MODE_TARGET)
                return 0;
@@ -1041,10 +1003,14 @@ void qla24xx_handle_relogin_event(scsi_qla_host_t *vha,
                fcport->flags);
 
        if ((fcport->fw_login_state == DSC_LS_PLOGI_PEND) ||
-           (fcport->fw_login_state == DSC_LS_PLOGI_COMP) ||
            (fcport->fw_login_state == DSC_LS_PRLI_PEND))
                return;
 
+       if (fcport->fw_login_state == DSC_LS_PLOGI_COMP) {
+               if (time_before_eq(jiffies, fcport->plogi_nack_done_deadline))
+                       return;
+       }
+
        if (fcport->flags & FCF_ASYNC_SENT) {
                fcport->login_retry++;
                set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
@@ -1258,7 +1224,7 @@ qla24xx_abort_sp_done(void *ptr, int res)
        complete(&abt->u.abt.comp);
 }
 
-static int
+int
 qla24xx_async_abort_cmd(srb_t *cmd_sp)
 {
        scsi_qla_host_t *vha = cmd_sp->vha;
@@ -3212,6 +3178,7 @@ next_check:
        } else {
                ql_dbg(ql_dbg_init, vha, 0x00d3,
                    "Init Firmware -- success.\n");
+               ha->flags.fw_started = 1;
        }
 
        return (rval);
@@ -3374,8 +3341,8 @@ qla2x00_configure_hba(scsi_qla_host_t *vha)
        uint8_t       domain;
        char            connect_type[22];
        struct qla_hw_data *ha = vha->hw;
-       unsigned long flags;
        scsi_qla_host_t *base_vha = pci_get_drvdata(ha->pdev);
+       port_id_t id;
 
        /* Get host addresses. */
        rval = qla2x00_get_adapter_id(vha,
@@ -3453,13 +3420,11 @@ qla2x00_configure_hba(scsi_qla_host_t *vha)
 
        /* Save Host port and loop ID. */
        /* byte order - Big Endian */
-       vha->d_id.b.domain = domain;
-       vha->d_id.b.area = area;
-       vha->d_id.b.al_pa = al_pa;
-
-       spin_lock_irqsave(&ha->vport_slock, flags);
-       qlt_update_vp_map(vha, SET_AL_PA);
-       spin_unlock_irqrestore(&ha->vport_slock, flags);
+       id.b.domain = domain;
+       id.b.area = area;
+       id.b.al_pa = al_pa;
+       id.b.rsvd_1 = 0;
+       qlt_update_host_map(vha, id);
 
        if (!vha->flags.init_done)
                ql_log(ql_log_info, vha, 0x2010,
@@ -4036,6 +4001,7 @@ qla2x00_configure_loop(scsi_qla_host_t *vha)
                        atomic_set(&vha->loop_state, LOOP_READY);
                        ql_dbg(ql_dbg_disc, vha, 0x2069,
                            "LOOP READY.\n");
+                       ha->flags.fw_init_done = 1;
 
                        /*
                         * Process any ATIO queue entries that came in
@@ -5148,6 +5114,7 @@ qla2x00_update_fcports(scsi_qla_host_t *base_vha)
                        }
                }
                atomic_dec(&vha->vref_count);
+               wake_up(&vha->vref_waitq);
        }
        spin_unlock_irqrestore(&ha->vport_slock, flags);
 }
@@ -5526,6 +5493,11 @@ qla2x00_abort_isp_cleanup(scsi_qla_host_t *vha)
        if (!(IS_P3P_TYPE(ha)))
                ha->isp_ops->reset_chip(vha);
 
+       ha->flags.n2n_ae = 0;
+       ha->flags.lip_ae = 0;
+       ha->current_topology = 0;
+       ha->flags.fw_started = 0;
+       ha->flags.fw_init_done = 0;
        ha->chip_reset++;
 
        atomic_set(&vha->loop_down_timer, LOOP_DOWN_TIME);
@@ -6802,6 +6774,8 @@ qla2x00_try_to_stop_firmware(scsi_qla_host_t *vha)
                return;
        if (!ha->fw_major_version)
                return;
+       if (!ha->flags.fw_started)
+               return;
 
        ret = qla2x00_stop_firmware(vha);
        for (retries = 5; ret != QLA_SUCCESS && ret != QLA_FUNCTION_TIMEOUT &&
@@ -6815,6 +6789,9 @@ qla2x00_try_to_stop_firmware(scsi_qla_host_t *vha)
                    "Attempting retry of stop-firmware command.\n");
                ret = qla2x00_stop_firmware(vha);
        }
+
+       ha->flags.fw_started = 0;
+       ha->flags.fw_init_done = 0;
 }
 
 int
index 535079280288fbd6554a3ca28e620065b8b9fe98..ea027f6a7fd4e949c1a9a53aad0de00b0a7ee361 100644 (file)
@@ -889,7 +889,7 @@ qla24xx_get_one_block_sg(uint32_t blk_sz, struct qla2_sgx *sgx,
 
 int
 qla24xx_walk_and_build_sglist_no_difb(struct qla_hw_data *ha, srb_t *sp,
-       uint32_t *dsd, uint16_t tot_dsds, struct qla_tgt_cmd *tc)
+       uint32_t *dsd, uint16_t tot_dsds, struct qla_tc_param *tc)
 {
        void *next_dsd;
        uint8_t avail_dsds = 0;
@@ -898,7 +898,6 @@ qla24xx_walk_and_build_sglist_no_difb(struct qla_hw_data *ha, srb_t *sp,
        struct scatterlist *sg_prot;
        uint32_t *cur_dsd = dsd;
        uint16_t        used_dsds = tot_dsds;
-
        uint32_t        prot_int; /* protection interval */
        uint32_t        partial;
        struct qla2_sgx sgx;
@@ -966,7 +965,7 @@ alloc_and_fill:
                        } else {
                                list_add_tail(&dsd_ptr->list,
                                    &(tc->ctx->dsd_list));
-                               tc->ctx_dsd_alloced = 1;
+                               *tc->ctx_dsd_alloced = 1;
                        }
 
 
@@ -1005,7 +1004,7 @@ alloc_and_fill:
 
 int
 qla24xx_walk_and_build_sglist(struct qla_hw_data *ha, srb_t *sp, uint32_t *dsd,
-       uint16_t tot_dsds, struct qla_tgt_cmd *tc)
+       uint16_t tot_dsds, struct qla_tc_param *tc)
 {
        void *next_dsd;
        uint8_t avail_dsds = 0;
@@ -1066,7 +1065,7 @@ qla24xx_walk_and_build_sglist(struct qla_hw_data *ha, srb_t *sp, uint32_t *dsd,
                        } else {
                                list_add_tail(&dsd_ptr->list,
                                    &(tc->ctx->dsd_list));
-                               tc->ctx_dsd_alloced = 1;
+                               *tc->ctx_dsd_alloced = 1;
                        }
 
                        /* add new list to cmd iocb or last list */
@@ -1092,7 +1091,7 @@ qla24xx_walk_and_build_sglist(struct qla_hw_data *ha, srb_t *sp, uint32_t *dsd,
 
 int
 qla24xx_walk_and_build_prot_sglist(struct qla_hw_data *ha, srb_t *sp,
-       uint32_t *dsd, uint16_t tot_dsds, struct qla_tgt_cmd *tc)
+       uint32_t *dsd, uint16_t tot_dsds, struct qla_tc_param *tc)
 {
        void *next_dsd;
        uint8_t avail_dsds = 0;
@@ -1158,7 +1157,7 @@ qla24xx_walk_and_build_prot_sglist(struct qla_hw_data *ha, srb_t *sp,
                        } else {
                                list_add_tail(&dsd_ptr->list,
                                    &(tc->ctx->dsd_list));
-                               tc->ctx_dsd_alloced = 1;
+                               *tc->ctx_dsd_alloced = 1;
                        }
 
                        /* add new list to cmd iocb or last list */
index 3c66ea29de2704fcefc71e965c071aa05c7bca78..3203367a4f423608ab69d75882d5a3141a1465a1 100644 (file)
@@ -708,6 +708,8 @@ skip_rio:
                    "mbx7=%xh.\n", mb[1], mb[2], mb[3], mbx);
 
                ha->isp_ops->fw_dump(vha, 1);
+               ha->flags.fw_init_done = 0;
+               ha->flags.fw_started = 0;
 
                if (IS_FWI2_CAPABLE(ha)) {
                        if (mb[1] == 0 && mb[2] == 0) {
@@ -761,6 +763,9 @@ skip_rio:
                break;
 
        case MBA_LIP_OCCURRED:          /* Loop Initialization Procedure */
+               ha->flags.lip_ae = 1;
+               ha->flags.n2n_ae = 0;
+
                ql_dbg(ql_dbg_async, vha, 0x5009,
                    "LIP occurred (%x).\n", mb[1]);
 
@@ -797,6 +802,10 @@ skip_rio:
                break;
 
        case MBA_LOOP_DOWN:             /* Loop Down Event */
+               ha->flags.n2n_ae = 0;
+               ha->flags.lip_ae = 0;
+               ha->current_topology = 0;
+
                mbx = (IS_QLA81XX(ha) || IS_QLA8031(ha))
                        ? RD_REG_WORD(&reg24->mailbox4) : 0;
                mbx = (IS_P3P_TYPE(ha)) ? RD_REG_WORD(&reg82->mailbox_out[4])
@@ -866,6 +875,9 @@ skip_rio:
 
        /* case MBA_DCBX_COMPLETE: */
        case MBA_POINT_TO_POINT:        /* Point-to-Point */
+               ha->flags.lip_ae = 0;
+               ha->flags.n2n_ae = 1;
+
                if (IS_QLA2100(ha))
                        break;
 
@@ -1620,9 +1632,9 @@ qla24xx_logio_entry(scsi_qla_host_t *vha, struct req_que *req,
                QLA_LOGIO_LOGIN_RETRIED : 0;
        if (logio->entry_status) {
                ql_log(ql_log_warn, fcport->vha, 0x5034,
-                   "Async-%s error entry - hdl=%x"
+                   "Async-%s error entry - %8phC hdl=%x"
                    "portid=%02x%02x%02x entry-status=%x.\n",
-                   type, sp->handle, fcport->d_id.b.domain,
+                   type, fcport->port_name, sp->handle, fcport->d_id.b.domain,
                    fcport->d_id.b.area, fcport->d_id.b.al_pa,
                    logio->entry_status);
                ql_dump_buffer(ql_dbg_async + ql_dbg_buffer, vha, 0x504d,
@@ -1633,8 +1645,9 @@ qla24xx_logio_entry(scsi_qla_host_t *vha, struct req_que *req,
 
        if (le16_to_cpu(logio->comp_status) == CS_COMPLETE) {
                ql_dbg(ql_dbg_async, fcport->vha, 0x5036,
-                   "Async-%s complete - hdl=%x portid=%02x%02x%02x "
-                   "iop0=%x.\n", type, sp->handle, fcport->d_id.b.domain,
+                   "Async-%s complete - %8phC hdl=%x portid=%02x%02x%02x "
+                   "iop0=%x.\n", type, fcport->port_name, sp->handle,
+                   fcport->d_id.b.domain,
                    fcport->d_id.b.area, fcport->d_id.b.al_pa,
                    le32_to_cpu(logio->io_parameter[0]));
 
@@ -1674,6 +1687,17 @@ qla24xx_logio_entry(scsi_qla_host_t *vha, struct req_que *req,
        case LSC_SCODE_NPORT_USED:
                data[0] = MBS_LOOP_ID_USED;
                break;
+       case LSC_SCODE_CMD_FAILED:
+               if (iop[1] == 0x0606) {
+                       /*
+                        * PLOGI/PRLI Completed. We must have Recv PLOGI/PRLI,
+                        * Target side acked.
+                        */
+                       data[0] = MBS_COMMAND_COMPLETE;
+                       goto logio_done;
+               }
+               data[0] = MBS_COMMAND_ERROR;
+               break;
        case LSC_SCODE_NOXCB:
                vha->hw->exch_starvation++;
                if (vha->hw->exch_starvation > 5) {
@@ -1695,8 +1719,9 @@ qla24xx_logio_entry(scsi_qla_host_t *vha, struct req_que *req,
        }
 
        ql_dbg(ql_dbg_async, fcport->vha, 0x5037,
-           "Async-%s failed - hdl=%x portid=%02x%02x%02x comp=%x "
-           "iop0=%x iop1=%x.\n", type, sp->handle, fcport->d_id.b.domain,
+           "Async-%s failed - %8phC hdl=%x portid=%02x%02x%02x comp=%x "
+           "iop0=%x iop1=%x.\n", type, fcport->port_name,
+               sp->handle, fcport->d_id.b.domain,
            fcport->d_id.b.area, fcport->d_id.b.al_pa,
            le16_to_cpu(logio->comp_status),
            le32_to_cpu(logio->io_parameter[0]),
@@ -2679,7 +2704,7 @@ qla24xx_abort_iocb_entry(scsi_qla_host_t *vha, struct req_que *req,
                return;
 
        abt = &sp->u.iocb_cmd;
-       abt->u.abt.comp_status = le32_to_cpu(pkt->nport_handle);
+       abt->u.abt.comp_status = le16_to_cpu(pkt->nport_handle);
        sp->done(sp, 0);
 }
 
@@ -2693,7 +2718,7 @@ void qla24xx_process_response_queue(struct scsi_qla_host *vha,
        struct sts_entry_24xx *pkt;
        struct qla_hw_data *ha = vha->hw;
 
-       if (!vha->flags.online)
+       if (!ha->flags.fw_started)
                return;
 
        while (rsp->ring_ptr->signature != RESPONSE_PROCESSED) {
index 35079f4174179967d99568a4491713d82d96c7a3..a113ab3592a7f86eb16ce8f76d82337557cab029 100644 (file)
 #include <linux/delay.h>
 #include <linux/gfp.h>
 
+static struct mb_cmd_name {
+       uint16_t cmd;
+       const char *str;
+} mb_str[] = {
+       {MBC_GET_PORT_DATABASE,         "GPDB"},
+       {MBC_GET_ID_LIST,               "GIDList"},
+       {MBC_GET_LINK_PRIV_STATS,       "Stats"},
+};
+
+static const char *mb_to_str(uint16_t cmd)
+{
+       int i;
+       struct mb_cmd_name *e;
+
+       for (i = 0; i < ARRAY_SIZE(mb_str); i++) {
+               e = mb_str + i;
+               if (cmd == e->cmd)
+                       return e->str;
+       }
+       return "unknown";
+}
+
 static struct rom_cmd {
        uint16_t cmd;
 } rom_cmds[] = {
@@ -2818,7 +2840,7 @@ qla2x00_get_link_status(scsi_qla_host_t *vha, uint16_t loop_id,
 
 int
 qla24xx_get_isp_stats(scsi_qla_host_t *vha, struct link_statistics *stats,
-    dma_addr_t stats_dma, uint options)
+    dma_addr_t stats_dma, uint16_t options)
 {
        int rval;
        mbx_cmd_t mc;
@@ -2828,19 +2850,17 @@ qla24xx_get_isp_stats(scsi_qla_host_t *vha, struct link_statistics *stats,
        ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x1088,
            "Entered %s.\n", __func__);
 
-       mcp->mb[0] = MBC_GET_LINK_PRIV_STATS;
-       mcp->mb[2] = MSW(stats_dma);
-       mcp->mb[3] = LSW(stats_dma);
-       mcp->mb[6] = MSW(MSD(stats_dma));
-       mcp->mb[7] = LSW(MSD(stats_dma));
-       mcp->mb[8] = sizeof(struct link_statistics) / 4;
-       mcp->mb[9] = vha->vp_idx;
-       mcp->mb[10] = options;
-       mcp->out_mb = MBX_10|MBX_9|MBX_8|MBX_7|MBX_6|MBX_3|MBX_2|MBX_0;
-       mcp->in_mb = MBX_2|MBX_1|MBX_0;
-       mcp->tov = MBX_TOV_SECONDS;
-       mcp->flags = IOCTL_CMD;
-       rval = qla2x00_mailbox_command(vha, mcp);
+       memset(&mc, 0, sizeof(mc));
+       mc.mb[0] = MBC_GET_LINK_PRIV_STATS;
+       mc.mb[2] = MSW(stats_dma);
+       mc.mb[3] = LSW(stats_dma);
+       mc.mb[6] = MSW(MSD(stats_dma));
+       mc.mb[7] = LSW(MSD(stats_dma));
+       mc.mb[8] = sizeof(struct link_statistics) / 4;
+       mc.mb[9] = cpu_to_le16(vha->vp_idx);
+       mc.mb[10] = cpu_to_le16(options);
+
+       rval = qla24xx_send_mb_cmd(vha, &mc);
 
        if (rval == QLA_SUCCESS) {
                if (mcp->mb[0] != MBS_COMMAND_COMPLETE) {
@@ -3603,6 +3623,7 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha,
        scsi_qla_host_t *vp = NULL;
        unsigned long   flags;
        int found;
+       port_id_t id;
 
        ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x10b6,
            "Entered %s.\n", __func__);
@@ -3610,28 +3631,27 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha,
        if (rptid_entry->entry_status != 0)
                return;
 
+       id.b.domain = rptid_entry->port_id[2];
+       id.b.area   = rptid_entry->port_id[1];
+       id.b.al_pa  = rptid_entry->port_id[0];
+       id.b.rsvd_1 = 0;
+
        if (rptid_entry->format == 0) {
                /* loop */
-               ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x10b7,
+               ql_dbg(ql_dbg_async, vha, 0x10b7,
                    "Format 0 : Number of VPs setup %d, number of "
                    "VPs acquired %d.\n", rptid_entry->vp_setup,
                    rptid_entry->vp_acquired);
-               ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x10b8,
+               ql_dbg(ql_dbg_async, vha, 0x10b8,
                    "Primary port id %02x%02x%02x.\n",
                    rptid_entry->port_id[2], rptid_entry->port_id[1],
                    rptid_entry->port_id[0]);
 
-               vha->d_id.b.domain = rptid_entry->port_id[2];
-               vha->d_id.b.area = rptid_entry->port_id[1];
-               vha->d_id.b.al_pa = rptid_entry->port_id[0];
-
-               spin_lock_irqsave(&ha->vport_slock, flags);
-               qlt_update_vp_map(vha, SET_AL_PA);
-               spin_unlock_irqrestore(&ha->vport_slock, flags);
+               qlt_update_host_map(vha, id);
 
        } else if (rptid_entry->format == 1) {
                /* fabric */
-               ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x10b9,
+               ql_dbg(ql_dbg_async, vha, 0x10b9,
                    "Format 1: VP[%d] enabled - status %d - with "
                    "port id %02x%02x%02x.\n", rptid_entry->vp_idx,
                        rptid_entry->vp_status,
@@ -3653,12 +3673,7 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha,
                                            WWN_SIZE);
                                }
 
-                               vha->d_id.b.domain = rptid_entry->port_id[2];
-                               vha->d_id.b.area = rptid_entry->port_id[1];
-                               vha->d_id.b.al_pa = rptid_entry->port_id[0];
-                               spin_lock_irqsave(&ha->vport_slock, flags);
-                               qlt_update_vp_map(vha, SET_AL_PA);
-                               spin_unlock_irqrestore(&ha->vport_slock, flags);
+                               qlt_update_host_map(vha, id);
                        }
 
                        fc_host_port_name(vha->host) =
@@ -3694,12 +3709,7 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha,
                        if (!found)
                                return;
 
-                       vp->d_id.b.domain = rptid_entry->port_id[2];
-                       vp->d_id.b.area =  rptid_entry->port_id[1];
-                       vp->d_id.b.al_pa = rptid_entry->port_id[0];
-                       spin_lock_irqsave(&ha->vport_slock, flags);
-                       qlt_update_vp_map(vp, SET_AL_PA);
-                       spin_unlock_irqrestore(&ha->vport_slock, flags);
+                       qlt_update_host_map(vp, id);
 
                        /*
                         * Cannot configure here as we are still sitting on the
@@ -5827,3 +5837,225 @@ qla26xx_dport_diagnostics(scsi_qla_host_t *vha,
 
        return rval;
 }
+
+static void qla2x00_async_mb_sp_done(void *s, int res)
+{
+       struct srb *sp = s;
+
+       sp->u.iocb_cmd.u.mbx.rc = res;
+
+       complete(&sp->u.iocb_cmd.u.mbx.comp);
+       /* don't free sp here. Let the caller do the free */
+}
+
+/*
+ * This mailbox uses the iocb interface to send MB command.
+ * This allows non-critial (non chip setup) command to go
+ * out in parrallel.
+ */
+int qla24xx_send_mb_cmd(struct scsi_qla_host *vha, mbx_cmd_t *mcp)
+{
+       int rval = QLA_FUNCTION_FAILED;
+       srb_t *sp;
+       struct srb_iocb *c;
+
+       if (!vha->hw->flags.fw_started)
+               goto done;
+
+       sp = qla2x00_get_sp(vha, NULL, GFP_KERNEL);
+       if (!sp)
+               goto done;
+
+       sp->type = SRB_MB_IOCB;
+       sp->name = mb_to_str(mcp->mb[0]);
+
+       qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
+
+       memcpy(sp->u.iocb_cmd.u.mbx.out_mb, mcp->mb, SIZEOF_IOCB_MB_REG);
+
+       c = &sp->u.iocb_cmd;
+       c->timeout = qla2x00_async_iocb_timeout;
+       init_completion(&c->u.mbx.comp);
+
+       sp->done = qla2x00_async_mb_sp_done;
+
+       rval = qla2x00_start_sp(sp);
+       if (rval != QLA_SUCCESS) {
+               ql_dbg(ql_dbg_mbx, vha, 0xffff,
+                   "%s: %s Failed submission. %x.\n",
+                   __func__, sp->name, rval);
+               goto done_free_sp;
+       }
+
+       ql_dbg(ql_dbg_mbx, vha, 0xffff, "MB:%s hndl %x submitted\n",
+           sp->name, sp->handle);
+
+       wait_for_completion(&c->u.mbx.comp);
+       memcpy(mcp->mb, sp->u.iocb_cmd.u.mbx.in_mb, SIZEOF_IOCB_MB_REG);
+
+       rval = c->u.mbx.rc;
+       switch (rval) {
+       case QLA_FUNCTION_TIMEOUT:
+               ql_dbg(ql_dbg_mbx, vha, 0xffff, "%s: %s Timeout. %x.\n",
+                   __func__, sp->name, rval);
+               break;
+       case  QLA_SUCCESS:
+               ql_dbg(ql_dbg_mbx, vha, 0xffff, "%s: %s done.\n",
+                   __func__, sp->name);
+               sp->free(sp);
+               break;
+       default:
+               ql_dbg(ql_dbg_mbx, vha, 0xffff, "%s: %s Failed. %x.\n",
+                   __func__, sp->name, rval);
+               sp->free(sp);
+               break;
+       }
+
+       return rval;
+
+done_free_sp:
+       sp->free(sp);
+done:
+       return rval;
+}
+
+/*
+ * qla24xx_gpdb_wait
+ * NOTE: Do not call this routine from DPC thread
+ */
+int qla24xx_gpdb_wait(struct scsi_qla_host *vha, fc_port_t *fcport, u8 opt)
+{
+       int rval = QLA_FUNCTION_FAILED;
+       dma_addr_t pd_dma;
+       struct port_database_24xx *pd;
+       struct qla_hw_data *ha = vha->hw;
+       mbx_cmd_t mc;
+
+       if (!vha->hw->flags.fw_started)
+               goto done;
+
+       pd = dma_pool_alloc(ha->s_dma_pool, GFP_KERNEL, &pd_dma);
+       if (pd  == NULL) {
+               ql_log(ql_log_warn, vha, 0xffff,
+                       "Failed to allocate port database structure.\n");
+               goto done_free_sp;
+       }
+       memset(pd, 0, max(PORT_DATABASE_SIZE, PORT_DATABASE_24XX_SIZE));
+
+       memset(&mc, 0, sizeof(mc));
+       mc.mb[0] = MBC_GET_PORT_DATABASE;
+       mc.mb[1] = cpu_to_le16(fcport->loop_id);
+       mc.mb[2] = MSW(pd_dma);
+       mc.mb[3] = LSW(pd_dma);
+       mc.mb[6] = MSW(MSD(pd_dma));
+       mc.mb[7] = LSW(MSD(pd_dma));
+       mc.mb[9] = cpu_to_le16(vha->vp_idx);
+       mc.mb[10] = cpu_to_le16((uint16_t)opt);
+
+       rval = qla24xx_send_mb_cmd(vha, &mc);
+       if (rval != QLA_SUCCESS) {
+               ql_dbg(ql_dbg_mbx, vha, 0xffff,
+                   "%s: %8phC fail\n", __func__, fcport->port_name);
+               goto done_free_sp;
+       }
+
+       rval = __qla24xx_parse_gpdb(vha, fcport, pd);
+
+       ql_dbg(ql_dbg_mbx, vha, 0xffff, "%s: %8phC done\n",
+           __func__, fcport->port_name);
+
+done_free_sp:
+       if (pd)
+               dma_pool_free(ha->s_dma_pool, pd, pd_dma);
+done:
+       return rval;
+}
+
+int __qla24xx_parse_gpdb(struct scsi_qla_host *vha, fc_port_t *fcport,
+    struct port_database_24xx *pd)
+{
+       int rval = QLA_SUCCESS;
+       uint64_t zero = 0;
+
+       /* Check for logged in state. */
+       if (pd->current_login_state != PDS_PRLI_COMPLETE &&
+               pd->last_login_state != PDS_PRLI_COMPLETE) {
+               ql_dbg(ql_dbg_mbx, vha, 0xffff,
+                          "Unable to verify login-state (%x/%x) for "
+                          "loop_id %x.\n", pd->current_login_state,
+                          pd->last_login_state, fcport->loop_id);
+               rval = QLA_FUNCTION_FAILED;
+               goto gpd_error_out;
+       }
+
+       if (fcport->loop_id == FC_NO_LOOP_ID ||
+           (memcmp(fcport->port_name, (uint8_t *)&zero, 8) &&
+            memcmp(fcport->port_name, pd->port_name, 8))) {
+               /* We lost the device mid way. */
+               rval = QLA_NOT_LOGGED_IN;
+               goto gpd_error_out;
+       }
+
+       /* Names are little-endian. */
+       memcpy(fcport->node_name, pd->node_name, WWN_SIZE);
+       memcpy(fcport->port_name, pd->port_name, WWN_SIZE);
+
+       /* Get port_id of device. */
+       fcport->d_id.b.domain = pd->port_id[0];
+       fcport->d_id.b.area = pd->port_id[1];
+       fcport->d_id.b.al_pa = pd->port_id[2];
+       fcport->d_id.b.rsvd_1 = 0;
+
+       /* If not target must be initiator or unknown type. */
+       if ((pd->prli_svc_param_word_3[0] & BIT_4) == 0)
+               fcport->port_type = FCT_INITIATOR;
+       else
+               fcport->port_type = FCT_TARGET;
+
+       /* Passback COS information. */
+       fcport->supported_classes = (pd->flags & PDF_CLASS_2) ?
+               FC_COS_CLASS2 : FC_COS_CLASS3;
+
+       if (pd->prli_svc_param_word_3[0] & BIT_7) {
+               fcport->flags |= FCF_CONF_COMP_SUPPORTED;
+               fcport->conf_compl_supported = 1;
+       }
+
+gpd_error_out:
+       return rval;
+}
+
+/*
+ * qla24xx_gidlist__wait
+ * NOTE: don't call this routine from DPC thread.
+ */
+int qla24xx_gidlist_wait(struct scsi_qla_host *vha,
+       void *id_list, dma_addr_t id_list_dma, uint16_t *entries)
+{
+       int rval = QLA_FUNCTION_FAILED;
+       mbx_cmd_t mc;
+
+       if (!vha->hw->flags.fw_started)
+               goto done;
+
+       memset(&mc, 0, sizeof(mc));
+       mc.mb[0] = MBC_GET_ID_LIST;
+       mc.mb[2] = MSW(id_list_dma);
+       mc.mb[3] = LSW(id_list_dma);
+       mc.mb[6] = MSW(MSD(id_list_dma));
+       mc.mb[7] = LSW(MSD(id_list_dma));
+       mc.mb[8] = 0;
+       mc.mb[9] = cpu_to_le16(vha->vp_idx);
+
+       rval = qla24xx_send_mb_cmd(vha, &mc);
+       if (rval != QLA_SUCCESS) {
+               ql_dbg(ql_dbg_mbx, vha, 0xffff,
+                       "%s:  fail\n", __func__);
+       } else {
+               *entries = mc.mb[1];
+               ql_dbg(ql_dbg_mbx, vha, 0xffff,
+                       "%s:  done\n", __func__);
+       }
+done:
+       return rval;
+}
index c6d6f0d912ff75ffaf9b9d810f81af735e39549b..09a490c98763a9406a6eafd3082df8f8ed50a149 100644 (file)
@@ -74,13 +74,14 @@ qla24xx_deallocate_vp_id(scsi_qla_host_t *vha)
         * ensures no active vp_list traversal while the vport is removed
         * from the queue)
         */
-       spin_lock_irqsave(&ha->vport_slock, flags);
-       while (atomic_read(&vha->vref_count)) {
-               spin_unlock_irqrestore(&ha->vport_slock, flags);
-
-               msleep(500);
+       wait_event_timeout(vha->vref_waitq, atomic_read(&vha->vref_count),
+           10*HZ);
 
-               spin_lock_irqsave(&ha->vport_slock, flags);
+       spin_lock_irqsave(&ha->vport_slock, flags);
+       if (atomic_read(&vha->vref_count)) {
+               ql_dbg(ql_dbg_vport, vha, 0xfffa,
+                   "vha->vref_count=%u timeout\n", vha->vref_count.counter);
+               vha->vref_count = (atomic_t)ATOMIC_INIT(0);
        }
        list_del(&vha->list);
        qlt_update_vp_map(vha, RESET_VP_IDX);
@@ -269,6 +270,7 @@ qla2x00_alert_all_vps(struct rsp_que *rsp, uint16_t *mb)
 
                        spin_lock_irqsave(&ha->vport_slock, flags);
                        atomic_dec(&vha->vref_count);
+                       wake_up(&vha->vref_waitq);
                }
                i++;
        }
index 1fed235a1b4a03172a4717a360a90f29ae383a4f..41d5b09f7326fb706f132fc64bcfe54023e37309 100644 (file)
@@ -2560,6 +2560,20 @@ qla2xxx_scan_finished(struct Scsi_Host *shost, unsigned long time)
        return atomic_read(&vha->loop_state) == LOOP_READY;
 }
 
+static void qla2x00_iocb_work_fn(struct work_struct *work)
+{
+       struct scsi_qla_host *vha = container_of(work,
+               struct scsi_qla_host, iocb_work);
+       int cnt = 0;
+
+       while (!list_empty(&vha->work_list)) {
+               qla2x00_do_work(vha);
+               cnt++;
+               if (cnt > 10)
+                       break;
+       }
+}
+
 /*
  * PCI driver interface
  */
@@ -3078,6 +3092,7 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
         */
        qla2xxx_wake_dpc(base_vha);
 
+       INIT_WORK(&base_vha->iocb_work, qla2x00_iocb_work_fn);
        INIT_WORK(&ha->board_disable, qla2x00_disable_board_on_pci_error);
 
        if (IS_QLA8031(ha) || IS_MCTP_CAPABLE(ha)) {
@@ -3469,6 +3484,7 @@ qla2x00_remove_one(struct pci_dev *pdev)
        qla2x00_free_sysfs_attr(base_vha, true);
 
        fc_remove_host(base_vha->host);
+       qlt_remove_target_resources(ha);
 
        scsi_remove_host(base_vha->host);
 
@@ -4268,6 +4284,7 @@ struct scsi_qla_host *qla2x00_create_host(struct scsi_host_template *sht,
        spin_lock_init(&vha->work_lock);
        spin_lock_init(&vha->cmd_list_lock);
        init_waitqueue_head(&vha->fcport_waitQ);
+       init_waitqueue_head(&vha->vref_waitq);
 
        vha->gnl.size = sizeof(struct get_name_list_extended) *
                        (ha->max_loop_id + 1);
@@ -4319,7 +4336,11 @@ qla2x00_post_work(struct scsi_qla_host *vha, struct qla_work_evt *e)
        spin_lock_irqsave(&vha->work_lock, flags);
        list_add_tail(&e->list, &vha->work_list);
        spin_unlock_irqrestore(&vha->work_lock, flags);
-       qla2xxx_wake_dpc(vha);
+
+       if (QLA_EARLY_LINKUP(vha->hw))
+               schedule_work(&vha->iocb_work);
+       else
+               qla2xxx_wake_dpc(vha);
 
        return QLA_SUCCESS;
 }
index 45f5077684f0a5b39c0645ddee831bf4071667d4..0e03ca2ab3e52358c817cdd2cdc667ba2bfb1ba3 100644 (file)
@@ -130,6 +130,9 @@ static void qlt_send_term_imm_notif(struct scsi_qla_host *vha,
 static struct fc_port *qlt_create_sess(struct scsi_qla_host *vha,
        fc_port_t *fcport, bool local);
 void qlt_unreg_sess(struct fc_port *sess);
+static void qlt_24xx_handle_abts(struct scsi_qla_host *,
+       struct abts_recv_from_24xx *);
+
 /*
  * Global Variables
  */
@@ -140,6 +143,20 @@ static struct workqueue_struct *qla_tgt_wq;
 static DEFINE_MUTEX(qla_tgt_mutex);
 static LIST_HEAD(qla_tgt_glist);
 
+static const char *prot_op_str(u32 prot_op)
+{
+       switch (prot_op) {
+       case TARGET_PROT_NORMAL:        return "NORMAL";
+       case TARGET_PROT_DIN_INSERT:    return "DIN_INSERT";
+       case TARGET_PROT_DOUT_INSERT:   return "DOUT_INSERT";
+       case TARGET_PROT_DIN_STRIP:     return "DIN_STRIP";
+       case TARGET_PROT_DOUT_STRIP:    return "DOUT_STRIP";
+       case TARGET_PROT_DIN_PASS:      return "DIN_PASS";
+       case TARGET_PROT_DOUT_PASS:     return "DOUT_PASS";
+       default:                        return "UNKNOWN";
+       }
+}
+
 /* This API intentionally takes dest as a parameter, rather than returning
  * int value to avoid caller forgetting to issue wmb() after the store */
 void qlt_do_generation_tick(struct scsi_qla_host *vha, int *dest)
@@ -170,21 +187,23 @@ static inline
 struct scsi_qla_host *qlt_find_host_by_d_id(struct scsi_qla_host *vha,
        uint8_t *d_id)
 {
-       struct qla_hw_data *ha = vha->hw;
-       uint8_t vp_idx;
-
-       if ((vha->d_id.b.area != d_id[1]) || (vha->d_id.b.domain != d_id[0]))
-               return NULL;
+       struct scsi_qla_host *host;
+       uint32_t key = 0;
 
-       if (vha->d_id.b.al_pa == d_id[2])
+       if ((vha->d_id.b.area == d_id[1]) && (vha->d_id.b.domain == d_id[0]) &&
+           (vha->d_id.b.al_pa == d_id[2]))
                return vha;
 
-       BUG_ON(ha->tgt.tgt_vp_map == NULL);
-       vp_idx = ha->tgt.tgt_vp_map[d_id[2]].idx;
-       if (likely(test_bit(vp_idx, ha->vp_idx_map)))
-               return ha->tgt.tgt_vp_map[vp_idx].vha;
+       key  = (uint32_t)d_id[0] << 16;
+       key |= (uint32_t)d_id[1] <<  8;
+       key |= (uint32_t)d_id[2];
 
-       return NULL;
+       host = btree_lookup32(&vha->hw->tgt.host_map, key);
+       if (!host)
+               ql_dbg(ql_dbg_tgt_mgt, vha, 0xffff,
+                          "Unable to find host %06x\n", key);
+
+       return host;
 }
 
 static inline
@@ -389,6 +408,8 @@ static bool qlt_24xx_atio_pkt_all_vps(struct scsi_qla_host *vha,
                        (struct abts_recv_from_24xx *)atio;
                struct scsi_qla_host *host = qlt_find_host_by_vp_idx(vha,
                        entry->vp_index);
+               unsigned long flags;
+
                if (unlikely(!host)) {
                        ql_dbg(ql_dbg_tgt, vha, 0xffff,
                            "qla_target(%d): Response pkt (ABTS_RECV_24XX) "
@@ -396,9 +417,12 @@ static bool qlt_24xx_atio_pkt_all_vps(struct scsi_qla_host *vha,
                            vha->vp_idx, entry->vp_index);
                        break;
                }
-               qlt_response_pkt(host, (response_t *)atio);
+               if (!ha_locked)
+                       spin_lock_irqsave(&host->hw->hardware_lock, flags);
+               qlt_24xx_handle_abts(host, (struct abts_recv_from_24xx *)atio);
+               if (!ha_locked)
+                       spin_unlock_irqrestore(&host->hw->hardware_lock, flags);
                break;
-
        }
 
        /* case PUREX_IOCB_TYPE: ql2xmvasynctoatio */
@@ -554,6 +578,7 @@ void qla2x00_async_nack_sp_done(void *s, int res)
                sp->fcport->login_gen++;
                sp->fcport->fw_login_state = DSC_LS_PLOGI_COMP;
                sp->fcport->logout_on_delete = 1;
+               sp->fcport->plogi_nack_done_deadline = jiffies + HZ;
                break;
 
        case SRB_NACK_PRLI:
@@ -613,6 +638,7 @@ int qla24xx_async_notify_ack(scsi_qla_host_t *vha, fc_port_t *fcport,
                break;
        case SRB_NACK_PRLI:
                fcport->fw_login_state = DSC_LS_PRLI_PEND;
+               fcport->deleted = 0;
                c = "PRLI";
                break;
        case SRB_NACK_LOGO:
@@ -1215,7 +1241,7 @@ static int qla24xx_get_loop_id(struct scsi_qla_host *vha, const uint8_t *s_id,
        }
 
        /* Get list of logged in devices */
-       rc = qla2x00_get_id_list(vha, gid_list, gid_list_dma, &entries);
+       rc = qla24xx_gidlist_wait(vha, gid_list, gid_list_dma, &entries);
        if (rc != QLA_SUCCESS) {
                ql_dbg(ql_dbg_tgt_mgt, vha, 0xf045,
                    "qla_target(%d): get_id_list() failed: %x\n",
@@ -1551,6 +1577,9 @@ static void qlt_send_notify_ack(struct scsi_qla_host *vha,
        request_t *pkt;
        struct nack_to_isp *nack;
 
+       if (!ha->flags.fw_started)
+               return;
+
        ql_dbg(ql_dbg_tgt, vha, 0xe004, "Sending NOTIFY_ACK (ha=%p)\n", ha);
 
        /* Send marker if required */
@@ -2013,6 +2042,70 @@ void qlt_free_mcmd(struct qla_tgt_mgmt_cmd *mcmd)
 }
 EXPORT_SYMBOL(qlt_free_mcmd);
 
+/*
+ * ha->hardware_lock supposed to be held on entry. Might drop it, then
+ * reacquire
+ */
+void qlt_send_resp_ctio(scsi_qla_host_t *vha, struct qla_tgt_cmd *cmd,
+    uint8_t scsi_status, uint8_t sense_key, uint8_t asc, uint8_t ascq)
+{
+       struct atio_from_isp *atio = &cmd->atio;
+       struct ctio7_to_24xx *ctio;
+       uint16_t temp;
+
+       ql_dbg(ql_dbg_tgt_dif, vha, 0x3066,
+           "Sending response CTIO7 (vha=%p, atio=%p, scsi_status=%02x, "
+           "sense_key=%02x, asc=%02x, ascq=%02x",
+           vha, atio, scsi_status, sense_key, asc, ascq);
+
+       ctio = (struct ctio7_to_24xx *)qla2x00_alloc_iocbs(vha, NULL);
+       if (!ctio) {
+               ql_dbg(ql_dbg_async, vha, 0x3067,
+                   "qla2x00t(%ld): %s failed: unable to allocate request packet",
+                   vha->host_no, __func__);
+               goto out;
+       }
+
+       ctio->entry_type = CTIO_TYPE7;
+       ctio->entry_count = 1;
+       ctio->handle = QLA_TGT_SKIP_HANDLE;
+       ctio->nport_handle = cmd->sess->loop_id;
+       ctio->timeout = cpu_to_le16(QLA_TGT_TIMEOUT);
+       ctio->vp_index = vha->vp_idx;
+       ctio->initiator_id[0] = atio->u.isp24.fcp_hdr.s_id[2];
+       ctio->initiator_id[1] = atio->u.isp24.fcp_hdr.s_id[1];
+       ctio->initiator_id[2] = atio->u.isp24.fcp_hdr.s_id[0];
+       ctio->exchange_addr = atio->u.isp24.exchange_addr;
+       ctio->u.status1.flags = (atio->u.isp24.attr << 9) |
+           cpu_to_le16(CTIO7_FLAGS_STATUS_MODE_1 | CTIO7_FLAGS_SEND_STATUS);
+       temp = be16_to_cpu(atio->u.isp24.fcp_hdr.ox_id);
+       ctio->u.status1.ox_id = cpu_to_le16(temp);
+       ctio->u.status1.scsi_status =
+           cpu_to_le16(SS_RESPONSE_INFO_LEN_VALID | scsi_status);
+       ctio->u.status1.response_len = cpu_to_le16(18);
+       ctio->u.status1.residual = cpu_to_le32(get_datalen_for_atio(atio));
+
+       if (ctio->u.status1.residual != 0)
+               ctio->u.status1.scsi_status |=
+                   cpu_to_le16(SS_RESIDUAL_UNDER);
+
+       /* Response code and sense key */
+       put_unaligned_le32(((0x70 << 24) | (sense_key << 8)),
+           (&ctio->u.status1.sense_data)[0]);
+       /* Additional sense length */
+       put_unaligned_le32(0x0a, (&ctio->u.status1.sense_data)[1]);
+       /* ASC and ASCQ */
+       put_unaligned_le32(((asc << 24) | (ascq << 16)),
+           (&ctio->u.status1.sense_data)[3]);
+
+       /* Memory Barrier */
+       wmb();
+
+       qla2x00_start_iocbs(vha, vha->req);
+out:
+       return;
+}
+
 /* callback from target fabric module code */
 void qlt_xmit_tm_rsp(struct qla_tgt_mgmt_cmd *mcmd)
 {
@@ -2261,7 +2354,7 @@ static int qlt_24xx_build_ctio_pkt(struct qla_tgt_prm *prm,
                 */
                return -EAGAIN;
        } else
-               ha->tgt.cmds[h-1] = prm->cmd;
+               ha->tgt.cmds[h - 1] = prm->cmd;
 
        pkt->handle = h | CTIO_COMPLETION_HANDLE_MARK;
        pkt->nport_handle = prm->cmd->loop_id;
@@ -2391,6 +2484,50 @@ static inline int qlt_has_data(struct qla_tgt_cmd *cmd)
        return cmd->bufflen > 0;
 }
 
+static void qlt_print_dif_err(struct qla_tgt_prm *prm)
+{
+       struct qla_tgt_cmd *cmd;
+       struct scsi_qla_host *vha;
+
+       /* asc 0x10=dif error */
+       if (prm->sense_buffer && (prm->sense_buffer[12] == 0x10)) {
+               cmd = prm->cmd;
+               vha = cmd->vha;
+               /* ASCQ */
+               switch (prm->sense_buffer[13]) {
+               case 1:
+                       ql_dbg(ql_dbg_tgt_dif, vha, 0xffff,
+                           "BE detected Guard TAG ERR: lba[0x%llx|%lld] len[0x%x] "
+                           "se_cmd=%p tag[%x]",
+                           cmd->lba, cmd->lba, cmd->num_blks, &cmd->se_cmd,
+                           cmd->atio.u.isp24.exchange_addr);
+                       break;
+               case 2:
+                       ql_dbg(ql_dbg_tgt_dif, vha, 0xffff,
+                           "BE detected APP TAG ERR: lba[0x%llx|%lld] len[0x%x] "
+                           "se_cmd=%p tag[%x]",
+                           cmd->lba, cmd->lba, cmd->num_blks, &cmd->se_cmd,
+                           cmd->atio.u.isp24.exchange_addr);
+                       break;
+               case 3:
+                       ql_dbg(ql_dbg_tgt_dif, vha, 0xffff,
+                           "BE detected REF TAG ERR: lba[0x%llx|%lld] len[0x%x] "
+                           "se_cmd=%p tag[%x]",
+                           cmd->lba, cmd->lba, cmd->num_blks, &cmd->se_cmd,
+                           cmd->atio.u.isp24.exchange_addr);
+                       break;
+               default:
+                       ql_dbg(ql_dbg_tgt_dif, vha, 0xffff,
+                           "BE detected Dif ERR: lba[%llx|%lld] len[%x] "
+                           "se_cmd=%p tag[%x]",
+                           cmd->lba, cmd->lba, cmd->num_blks, &cmd->se_cmd,
+                           cmd->atio.u.isp24.exchange_addr);
+                       break;
+               }
+               ql_dump_buffer(ql_dbg_tgt_dif, vha, 0xffff, cmd->cdb, 16);
+       }
+}
+
 /*
  * Called without ha->hardware_lock held
  */
@@ -2512,18 +2649,9 @@ skip_explict_conf:
                for (i = 0; i < prm->sense_buffer_len/4; i++)
                        ((uint32_t *)ctio->u.status1.sense_data)[i] =
                                cpu_to_be32(((uint32_t *)prm->sense_buffer)[i]);
-#if 0
-               if (unlikely((prm->sense_buffer_len % 4) != 0)) {
-                       static int q;
-                       if (q < 10) {
-                               ql_dbg(ql_dbg_tgt, vha, 0xe04f,
-                                   "qla_target(%d): %d bytes of sense "
-                                   "lost", prm->tgt->ha->vp_idx,
-                                   prm->sense_buffer_len % 4);
-                               q++;
-                       }
-               }
-#endif
+
+               qlt_print_dif_err(prm);
+
        } else {
                ctio->u.status1.flags &=
                    ~cpu_to_le16(CTIO7_FLAGS_STATUS_MODE_0);
@@ -2537,19 +2665,9 @@ skip_explict_conf:
        /* Sense with len > 24, is it possible ??? */
 }
 
-
-
-/* diff  */
 static inline int
 qlt_hba_err_chk_enabled(struct se_cmd *se_cmd)
 {
-       /*
-        * Uncomment when corresponding SCSI changes are done.
-        *
-        if (!sp->cmd->prot_chk)
-        return 0;
-        *
-        */
        switch (se_cmd->prot_op) {
        case TARGET_PROT_DOUT_INSERT:
        case TARGET_PROT_DIN_STRIP:
@@ -2570,16 +2688,38 @@ qlt_hba_err_chk_enabled(struct se_cmd *se_cmd)
        return 0;
 }
 
+static inline int
+qla_tgt_ref_mask_check(struct se_cmd *se_cmd)
+{
+       switch (se_cmd->prot_op) {
+       case TARGET_PROT_DIN_INSERT:
+       case TARGET_PROT_DOUT_INSERT:
+       case TARGET_PROT_DIN_STRIP:
+       case TARGET_PROT_DOUT_STRIP:
+       case TARGET_PROT_DIN_PASS:
+       case TARGET_PROT_DOUT_PASS:
+           return 1;
+       default:
+           return 0;
+       }
+       return 0;
+}
+
 /*
- * qla24xx_set_t10dif_tags_from_cmd - Extract Ref and App tags from SCSI command
- *
+ * qla_tgt_set_dif_tags - Extract Ref and App tags from SCSI command
  */
-static inline void
-qlt_set_t10dif_tags(struct se_cmd *se_cmd, struct crc_context *ctx)
+static void
+qla_tgt_set_dif_tags(struct qla_tgt_cmd *cmd, struct crc_context *ctx,
+    uint16_t *pfw_prot_opts)
 {
+       struct se_cmd *se_cmd = &cmd->se_cmd;
        uint32_t lba = 0xffffffff & se_cmd->t_task_lba;
+       scsi_qla_host_t *vha = cmd->tgt->vha;
+       struct qla_hw_data *ha = vha->hw;
+       uint32_t t32 = 0;
 
-       /* wait til Mode Sense/Select cmd, modepage Ah, subpage 2
+       /*
+        * wait till Mode Sense/Select cmd, modepage Ah, subpage 2
         * have been immplemented by TCM, before AppTag is avail.
         * Look for modesense_handlers[]
         */
@@ -2587,65 +2727,73 @@ qlt_set_t10dif_tags(struct se_cmd *se_cmd, struct crc_context *ctx)
        ctx->app_tag_mask[0] = 0x0;
        ctx->app_tag_mask[1] = 0x0;
 
+       if (IS_PI_UNINIT_CAPABLE(ha)) {
+               if ((se_cmd->prot_type == TARGET_DIF_TYPE1_PROT) ||
+                   (se_cmd->prot_type == TARGET_DIF_TYPE2_PROT))
+                       *pfw_prot_opts |= PO_DIS_VALD_APP_ESC;
+               else if (se_cmd->prot_type == TARGET_DIF_TYPE3_PROT)
+                       *pfw_prot_opts |= PO_DIS_VALD_APP_REF_ESC;
+       }
+
+       t32 = ha->tgt.tgt_ops->get_dif_tags(cmd, pfw_prot_opts);
+
        switch (se_cmd->prot_type) {
        case TARGET_DIF_TYPE0_PROT:
                /*
-                * No check for ql2xenablehba_err_chk, as it would be an
-                * I/O error if hba tag generation is not done.
+                * No check for ql2xenablehba_err_chk, as it
+                * would be an I/O error if hba tag generation
+                * is not done.
                 */
                ctx->ref_tag = cpu_to_le32(lba);
-
-               if (!qlt_hba_err_chk_enabled(se_cmd))
-                       break;
-
                /* enable ALL bytes of the ref tag */
                ctx->ref_tag_mask[0] = 0xff;
                ctx->ref_tag_mask[1] = 0xff;
                ctx->ref_tag_mask[2] = 0xff;
                ctx->ref_tag_mask[3] = 0xff;
                break;
-       /*
-        * For TYpe 1 protection: 16 bit GUARD tag, 32 bit REF tag, and
-        * 16 bit app tag.
-        */
        case TARGET_DIF_TYPE1_PROT:
-               ctx->ref_tag = cpu_to_le32(lba);
-
-               if (!qlt_hba_err_chk_enabled(se_cmd))
-                       break;
-
-               /* enable ALL bytes of the ref tag */
-               ctx->ref_tag_mask[0] = 0xff;
-               ctx->ref_tag_mask[1] = 0xff;
-               ctx->ref_tag_mask[2] = 0xff;
-               ctx->ref_tag_mask[3] = 0xff;
-               break;
-       /*
-        * For TYPE 2 protection: 16 bit GUARD + 32 bit REF tag has to
-        * match LBA in CDB + N
-        */
+           /*
+            * For TYPE 1 protection: 16 bit GUARD tag, 32 bit
+            * REF tag, and 16 bit app tag.
+            */
+           ctx->ref_tag = cpu_to_le32(lba);
+           if (!qla_tgt_ref_mask_check(se_cmd) ||
+               !(ha->tgt.tgt_ops->chk_dif_tags(t32))) {
+                   *pfw_prot_opts |= PO_DIS_REF_TAG_VALD;
+                   break;
+           }
+           /* enable ALL bytes of the ref tag */
+           ctx->ref_tag_mask[0] = 0xff;
+           ctx->ref_tag_mask[1] = 0xff;
+           ctx->ref_tag_mask[2] = 0xff;
+           ctx->ref_tag_mask[3] = 0xff;
+           break;
        case TARGET_DIF_TYPE2_PROT:
-               ctx->ref_tag = cpu_to_le32(lba);
-
-               if (!qlt_hba_err_chk_enabled(se_cmd))
-                       break;
-
-               /* enable ALL bytes of the ref tag */
-               ctx->ref_tag_mask[0] = 0xff;
-               ctx->ref_tag_mask[1] = 0xff;
-               ctx->ref_tag_mask[2] = 0xff;
-               ctx->ref_tag_mask[3] = 0xff;
-               break;
-
-       /* For Type 3 protection: 16 bit GUARD only */
+           /*
+            * For TYPE 2 protection: 16 bit GUARD + 32 bit REF
+            * tag has to match LBA in CDB + N
+            */
+           ctx->ref_tag = cpu_to_le32(lba);
+           if (!qla_tgt_ref_mask_check(se_cmd) ||
+               !(ha->tgt.tgt_ops->chk_dif_tags(t32))) {
+                   *pfw_prot_opts |= PO_DIS_REF_TAG_VALD;
+                   break;
+           }
+           /* enable ALL bytes of the ref tag */
+           ctx->ref_tag_mask[0] = 0xff;
+           ctx->ref_tag_mask[1] = 0xff;
+           ctx->ref_tag_mask[2] = 0xff;
+           ctx->ref_tag_mask[3] = 0xff;
+           break;
        case TARGET_DIF_TYPE3_PROT:
-               ctx->ref_tag_mask[0] = ctx->ref_tag_mask[1] =
-                       ctx->ref_tag_mask[2] = ctx->ref_tag_mask[3] = 0x00;
-               break;
+           /* For TYPE 3 protection: 16 bit GUARD only */
+           *pfw_prot_opts |= PO_DIS_REF_TAG_VALD;
+           ctx->ref_tag_mask[0] = ctx->ref_tag_mask[1] =
+               ctx->ref_tag_mask[2] = ctx->ref_tag_mask[3] = 0x00;
+           break;
        }
 }
 
-
 static inline int
 qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha)
 {
@@ -2664,6 +2812,7 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha)
        struct se_cmd           *se_cmd = &cmd->se_cmd;
        uint32_t h;
        struct atio_from_isp *atio = &prm->cmd->atio;
+       struct qla_tc_param     tc;
        uint16_t t16;
 
        ha = vha->hw;
@@ -2689,16 +2838,15 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha)
        case TARGET_PROT_DIN_INSERT:
        case TARGET_PROT_DOUT_STRIP:
                transfer_length = data_bytes;
-               data_bytes += dif_bytes;
+               if (cmd->prot_sg_cnt)
+                       data_bytes += dif_bytes;
                break;
-
        case TARGET_PROT_DIN_STRIP:
        case TARGET_PROT_DOUT_INSERT:
        case TARGET_PROT_DIN_PASS:
        case TARGET_PROT_DOUT_PASS:
                transfer_length = data_bytes + dif_bytes;
                break;
-
        default:
                BUG();
                break;
@@ -2734,7 +2882,6 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha)
                break;
        }
 
-
        /* ---- PKT ---- */
        /* Update entry type to indicate Command Type CRC_2 IOCB */
        pkt->entry_type  = CTIO_CRC2;
@@ -2752,9 +2899,8 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha)
        } else
                ha->tgt.cmds[h-1] = prm->cmd;
 
-
        pkt->handle  = h | CTIO_COMPLETION_HANDLE_MARK;
-       pkt->nport_handle = prm->cmd->loop_id;
+       pkt->nport_handle = cpu_to_le16(prm->cmd->loop_id);
        pkt->timeout = cpu_to_le16(QLA_TGT_TIMEOUT);
        pkt->initiator_id[0] = atio->u.isp24.fcp_hdr.s_id[2];
        pkt->initiator_id[1] = atio->u.isp24.fcp_hdr.s_id[1];
@@ -2775,12 +2921,10 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha)
        else if (cmd->dma_data_direction == DMA_FROM_DEVICE)
                pkt->flags = cpu_to_le16(CTIO7_FLAGS_DATA_OUT);
 
-
        pkt->dseg_count = prm->tot_dsds;
        /* Fibre channel byte count */
        pkt->transfer_length = cpu_to_le32(transfer_length);
 
-
        /* ----- CRC context -------- */
 
        /* Allocate CRC context from global pool */
@@ -2800,13 +2944,12 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha)
        /* Set handle */
        crc_ctx_pkt->handle = pkt->handle;
 
-       qlt_set_t10dif_tags(se_cmd, crc_ctx_pkt);
+       qla_tgt_set_dif_tags(cmd, crc_ctx_pkt, &fw_prot_opts);
 
        pkt->crc_context_address[0] = cpu_to_le32(LSD(crc_ctx_dma));
        pkt->crc_context_address[1] = cpu_to_le32(MSD(crc_ctx_dma));
        pkt->crc_context_len = CRC_CONTEXT_LEN_FW;
 
-
        if (!bundling) {
                cur_dsd = (uint32_t *) &crc_ctx_pkt->u.nobundling.data_address;
        } else {
@@ -2827,16 +2970,24 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha)
        crc_ctx_pkt->byte_count = cpu_to_le32(data_bytes);
        crc_ctx_pkt->guard_seed = cpu_to_le16(0);
 
+       memset((uint8_t *)&tc, 0 , sizeof(tc));
+       tc.vha = vha;
+       tc.blk_sz = cmd->blk_sz;
+       tc.bufflen = cmd->bufflen;
+       tc.sg = cmd->sg;
+       tc.prot_sg = cmd->prot_sg;
+       tc.ctx = crc_ctx_pkt;
+       tc.ctx_dsd_alloced = &cmd->ctx_dsd_alloced;
 
        /* Walks data segments */
        pkt->flags |= cpu_to_le16(CTIO7_FLAGS_DSD_PTR);
 
        if (!bundling && prm->prot_seg_cnt) {
                if (qla24xx_walk_and_build_sglist_no_difb(ha, NULL, cur_dsd,
-                       prm->tot_dsds, cmd))
+                       prm->tot_dsds, &tc))
                        goto crc_queuing_error;
        } else if (qla24xx_walk_and_build_sglist(ha, NULL, cur_dsd,
-               (prm->tot_dsds - prm->prot_seg_cnt), cmd))
+               (prm->tot_dsds - prm->prot_seg_cnt), &tc))
                goto crc_queuing_error;
 
        if (bundling && prm->prot_seg_cnt) {
@@ -2845,18 +2996,18 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha)
 
                cur_dsd = (uint32_t *) &crc_ctx_pkt->u.bundling.dif_address;
                if (qla24xx_walk_and_build_prot_sglist(ha, NULL, cur_dsd,
-                       prm->prot_seg_cnt, cmd))
+                       prm->prot_seg_cnt, &tc))
                        goto crc_queuing_error;
        }
        return QLA_SUCCESS;
 
 crc_queuing_error:
        /* Cleanup will be performed by the caller */
+       vha->hw->tgt.cmds[h - 1] = NULL;
 
        return QLA_FUNCTION_FAILED;
 }
 
-
 /*
  * Callback to setup response of xmit_type of QLA_TGT_XMIT_DATA and *
  * QLA_TGT_XMIT_STATUS for >= 24xx silicon
@@ -2906,7 +3057,7 @@ int qlt_xmit_response(struct qla_tgt_cmd *cmd, int xmit_type,
        else
                vha->tgt_counters.core_qla_que_buf++;
 
-       if (!vha->flags.online || cmd->reset_count != ha->chip_reset) {
+       if (!ha->flags.fw_started || cmd->reset_count != ha->chip_reset) {
                /*
                 * Either the port is not online or this request was from
                 * previous life, just abort the processing.
@@ -3047,7 +3198,7 @@ int qlt_rdy_to_xfer(struct qla_tgt_cmd *cmd)
 
        spin_lock_irqsave(&ha->hardware_lock, flags);
 
-       if (!vha->flags.online || (cmd->reset_count != ha->chip_reset) ||
+       if (!ha->flags.fw_started || (cmd->reset_count != ha->chip_reset) ||
            (cmd->sess && cmd->sess->deleted)) {
                /*
                 * Either the port is not online or this request was from
@@ -3104,139 +3255,113 @@ EXPORT_SYMBOL(qlt_rdy_to_xfer);
 
 
 /*
- * Checks the guard or meta-data for the type of error
- * detected by the HBA.
+ * it is assumed either hardware_lock or qpair lock is held.
  */
-static inline int
+static void
 qlt_handle_dif_error(struct scsi_qla_host *vha, struct qla_tgt_cmd *cmd,
-               struct ctio_crc_from_fw *sts)
+       struct ctio_crc_from_fw *sts)
 {
        uint8_t         *ap = &sts->actual_dif[0];
        uint8_t         *ep = &sts->expected_dif[0];
-       uint32_t        e_ref_tag, a_ref_tag;
-       uint16_t        e_app_tag, a_app_tag;
-       uint16_t        e_guard, a_guard;
        uint64_t        lba = cmd->se_cmd.t_task_lba;
+       uint8_t scsi_status, sense_key, asc, ascq;
+       unsigned long flags;
 
-       a_guard   = be16_to_cpu(*(uint16_t *)(ap + 0));
-       a_app_tag = be16_to_cpu(*(uint16_t *)(ap + 2));
-       a_ref_tag = be32_to_cpu(*(uint32_t *)(ap + 4));
-
-       e_guard   = be16_to_cpu(*(uint16_t *)(ep + 0));
-       e_app_tag = be16_to_cpu(*(uint16_t *)(ep + 2));
-       e_ref_tag = be32_to_cpu(*(uint32_t *)(ep + 4));
-
-       ql_dbg(ql_dbg_tgt, vha, 0xe075,
-           "iocb(s) %p Returned STATUS.\n", sts);
-
-       ql_dbg(ql_dbg_tgt, vha, 0xf075,
-           "dif check TGT cdb 0x%x lba 0x%llx: [Actual|Expected] Ref Tag[0x%x|0x%x], App Tag [0x%x|0x%x], Guard [0x%x|0x%x]\n",
-           cmd->atio.u.isp24.fcp_cmnd.cdb[0], lba,
-           a_ref_tag, e_ref_tag, a_app_tag, e_app_tag, a_guard, e_guard);
-
-       /*
-        * Ignore sector if:
-        * For type     3: ref & app tag is all 'f's
-        * For type 0,1,2: app tag is all 'f's
-        */
-       if ((a_app_tag == 0xffff) &&
-           ((cmd->se_cmd.prot_type != TARGET_DIF_TYPE3_PROT) ||
-            (a_ref_tag == 0xffffffff))) {
-               uint32_t blocks_done;
-
-               /* 2TB boundary case covered automatically with this */
-               blocks_done = e_ref_tag - (uint32_t)lba + 1;
-               cmd->se_cmd.bad_sector = e_ref_tag;
-               cmd->se_cmd.pi_err = 0;
-               ql_dbg(ql_dbg_tgt, vha, 0xf074,
-                       "need to return scsi good\n");
-
-               /* Update protection tag */
-               if (cmd->prot_sg_cnt) {
-                       uint32_t i, k = 0, num_ent;
-                       struct scatterlist *sg, *sgl;
-
-
-                       sgl = cmd->prot_sg;
-
-                       /* Patch the corresponding protection tags */
-                       for_each_sg(sgl, sg, cmd->prot_sg_cnt, i) {
-                               num_ent = sg_dma_len(sg) / 8;
-                               if (k + num_ent < blocks_done) {
-                                       k += num_ent;
-                                       continue;
-                               }
-                               k = blocks_done;
-                               break;
-                       }
+       cmd->trc_flags |= TRC_DIF_ERR;
 
-                       if (k != blocks_done) {
-                               ql_log(ql_log_warn, vha, 0xf076,
-                                   "unexpected tag values tag:lba=%u:%llu)\n",
-                                   e_ref_tag, (unsigned long long)lba);
-                               goto out;
-                       }
+       cmd->a_guard   = be16_to_cpu(*(uint16_t *)(ap + 0));
+       cmd->a_app_tag = be16_to_cpu(*(uint16_t *)(ap + 2));
+       cmd->a_ref_tag = be32_to_cpu(*(uint32_t *)(ap + 4));
 
-#if 0
-                       struct sd_dif_tuple *spt;
-                       /* TODO:
-                        * This section came from initiator. Is it valid here?
-                        * should ulp be override with actual val???
-                        */
-                       spt = page_address(sg_page(sg)) + sg->offset;
-                       spt += j;
+       cmd->e_guard   = be16_to_cpu(*(uint16_t *)(ep + 0));
+       cmd->e_app_tag = be16_to_cpu(*(uint16_t *)(ep + 2));
+       cmd->e_ref_tag = be32_to_cpu(*(uint32_t *)(ep + 4));
 
-                       spt->app_tag = 0xffff;
-                       if (cmd->se_cmd.prot_type == SCSI_PROT_DIF_TYPE3)
-                               spt->ref_tag = 0xffffffff;
-#endif
-               }
+       ql_dbg(ql_dbg_tgt_dif, vha, 0xf075,
+           "%s: aborted %d state %d\n", __func__, cmd->aborted, cmd->state);
 
-               return 0;
-       }
+       scsi_status = sense_key = asc = ascq = 0;
 
-       /* check guard */
-       if (e_guard != a_guard) {
-               cmd->se_cmd.pi_err = TCM_LOGICAL_BLOCK_GUARD_CHECK_FAILED;
-               cmd->se_cmd.bad_sector = cmd->se_cmd.t_task_lba;
-
-               ql_log(ql_log_warn, vha, 0xe076,
-                   "Guard ERR: cdb 0x%x lba 0x%llx: [Actual|Expected] Ref Tag[0x%x|0x%x], App Tag [0x%x|0x%x], Guard [0x%x|0x%x] cmd=%p\n",
-                   cmd->atio.u.isp24.fcp_cmnd.cdb[0], lba,
-                   a_ref_tag, e_ref_tag, a_app_tag, e_app_tag,
-                   a_guard, e_guard, cmd);
-               goto out;
+       /* check appl tag */
+       if (cmd->e_app_tag != cmd->a_app_tag) {
+               ql_dbg(ql_dbg_tgt_dif, vha, 0xffff,
+                       "App Tag ERR: cdb[%x] lba[%llx %llx] blks[%x] [Actual|Expected] "
+                       "Ref[%x|%x], App[%x|%x], "
+                       "Guard [%x|%x] cmd=%p ox_id[%04x]",
+                       cmd->cdb[0], lba, (lba+cmd->num_blks), cmd->num_blks,
+                       cmd->a_ref_tag, cmd->e_ref_tag,
+                       cmd->a_app_tag, cmd->e_app_tag,
+                       cmd->a_guard, cmd->e_guard,
+                       cmd, cmd->atio.u.isp24.fcp_hdr.ox_id);
+
+               cmd->dif_err_code = DIF_ERR_APP;
+               scsi_status = SAM_STAT_CHECK_CONDITION;
+               sense_key = ABORTED_COMMAND;
+               asc = 0x10;
+               ascq = 0x2;
        }
 
        /* check ref tag */
-       if (e_ref_tag != a_ref_tag) {
-               cmd->se_cmd.pi_err = TCM_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED;
-               cmd->se_cmd.bad_sector = e_ref_tag;
-
-               ql_log(ql_log_warn, vha, 0xe077,
-                       "Ref Tag ERR: cdb 0x%x lba 0x%llx: [Actual|Expected] Ref Tag[0x%x|0x%x], App Tag [0x%x|0x%x], Guard [0x%x|0x%x] cmd=%p\n",
-                       cmd->atio.u.isp24.fcp_cmnd.cdb[0], lba,
-                       a_ref_tag, e_ref_tag, a_app_tag, e_app_tag,
-                       a_guard, e_guard, cmd);
+       if (cmd->e_ref_tag != cmd->a_ref_tag) {
+               ql_dbg(ql_dbg_tgt_dif, vha, 0xffff,
+                       "Ref Tag ERR: cdb[%x] lba[%llx %llx] blks[%x] [Actual|Expected] "
+                       "Ref[%x|%x], App[%x|%x], "
+                       "Guard[%x|%x] cmd=%p ox_id[%04x] ",
+                       cmd->cdb[0], lba, (lba+cmd->num_blks), cmd->num_blks,
+                       cmd->a_ref_tag, cmd->e_ref_tag,
+                       cmd->a_app_tag, cmd->e_app_tag,
+                       cmd->a_guard, cmd->e_guard,
+                       cmd, cmd->atio.u.isp24.fcp_hdr.ox_id);
+
+               cmd->dif_err_code = DIF_ERR_REF;
+               scsi_status = SAM_STAT_CHECK_CONDITION;
+               sense_key = ABORTED_COMMAND;
+               asc = 0x10;
+               ascq = 0x3;
                goto out;
        }
 
-       /* check appl tag */
-       if (e_app_tag != a_app_tag) {
-               cmd->se_cmd.pi_err = TCM_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED;
-               cmd->se_cmd.bad_sector = cmd->se_cmd.t_task_lba;
-
-               ql_log(ql_log_warn, vha, 0xe078,
-                       "App Tag ERR: cdb 0x%x lba 0x%llx: [Actual|Expected] Ref Tag[0x%x|0x%x], App Tag [0x%x|0x%x], Guard [0x%x|0x%x] cmd=%p\n",
-                       cmd->atio.u.isp24.fcp_cmnd.cdb[0], lba,
-                       a_ref_tag, e_ref_tag, a_app_tag, e_app_tag,
-                       a_guard, e_guard, cmd);
-               goto out;
+       /* check guard */
+       if (cmd->e_guard != cmd->a_guard) {
+               ql_dbg(ql_dbg_tgt_dif, vha, 0xffff,
+                       "Guard ERR: cdb[%x] lba[%llx %llx] blks[%x] [Actual|Expected] "
+                       "Ref[%x|%x], App[%x|%x], "
+                       "Guard [%x|%x] cmd=%p ox_id[%04x]",
+                       cmd->cdb[0], lba, (lba+cmd->num_blks), cmd->num_blks,
+                       cmd->a_ref_tag, cmd->e_ref_tag,
+                       cmd->a_app_tag, cmd->e_app_tag,
+                       cmd->a_guard, cmd->e_guard,
+                       cmd, cmd->atio.u.isp24.fcp_hdr.ox_id);
+               cmd->dif_err_code = DIF_ERR_GRD;
+               scsi_status = SAM_STAT_CHECK_CONDITION;
+               sense_key = ABORTED_COMMAND;
+               asc = 0x10;
+               ascq = 0x1;
        }
 out:
-       return 1;
-}
+       switch (cmd->state) {
+       case QLA_TGT_STATE_NEED_DATA:
+               /* handle_data will load DIF error code  */
+               cmd->state = QLA_TGT_STATE_DATA_IN;
+               vha->hw->tgt.tgt_ops->handle_data(cmd);
+               break;
+       default:
+               spin_lock_irqsave(&cmd->cmd_lock, flags);
+               if (cmd->aborted) {
+                       spin_unlock_irqrestore(&cmd->cmd_lock, flags);
+                       vha->hw->tgt.tgt_ops->free_cmd(cmd);
+                       break;
+               }
+               spin_unlock_irqrestore(&cmd->cmd_lock, flags);
 
+               qlt_send_resp_ctio(vha, cmd, scsi_status, sense_key, asc, ascq);
+               /* assume scsi status gets out on the wire.
+                * Will not wait for completion.
+                */
+               vha->hw->tgt.tgt_ops->free_cmd(cmd);
+               break;
+       }
+}
 
 /* If hardware_lock held on entry, might drop it, then reaquire */
 /* This function sends the appropriate CTIO to ISP 2xxx or 24xx */
@@ -3251,7 +3376,7 @@ static int __qlt_send_term_imm_notif(struct scsi_qla_host *vha,
        ql_dbg(ql_dbg_tgt_tmr, vha, 0xe01c,
            "Sending TERM ELS CTIO (ha=%p)\n", ha);
 
-       pkt = (request_t *)qla2x00_alloc_iocbs_ready(vha, NULL);
+       pkt = (request_t *)qla2x00_alloc_iocbs(vha, NULL);
        if (pkt == NULL) {
                ql_dbg(ql_dbg_tgt, vha, 0xe080,
                    "qla_target(%d): %s failed: unable to allocate "
@@ -3543,6 +3668,16 @@ static int qlt_term_ctio_exchange(struct scsi_qla_host *vha, void *ctio,
 {
        int term = 0;
 
+       if (cmd->se_cmd.prot_op)
+               ql_dbg(ql_dbg_tgt_dif, vha, 0xffff,
+                   "Term DIF cmd: lba[0x%llx|%lld] len[0x%x] "
+                   "se_cmd=%p tag[%x] op %#x/%s",
+                    cmd->lba, cmd->lba,
+                    cmd->num_blks, &cmd->se_cmd,
+                    cmd->atio.u.isp24.exchange_addr,
+                    cmd->se_cmd.prot_op,
+                    prot_op_str(cmd->se_cmd.prot_op));
+
        if (ctio != NULL) {
                struct ctio7_from_24xx *c = (struct ctio7_from_24xx *)ctio;
                term = !(c->flags &
@@ -3760,32 +3895,15 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha, uint32_t handle,
                        struct ctio_crc_from_fw *crc =
                                (struct ctio_crc_from_fw *)ctio;
                        ql_dbg(ql_dbg_tgt_mgt, vha, 0xf073,
-                           "qla_target(%d): CTIO with DIF_ERROR status %x received (state %x, se_cmd %p) actual_dif[0x%llx] expect_dif[0x%llx]\n",
+                           "qla_target(%d): CTIO with DIF_ERROR status %x "
+                           "received (state %x, ulp_cmd %p) actual_dif[0x%llx] "
+                           "expect_dif[0x%llx]\n",
                            vha->vp_idx, status, cmd->state, se_cmd,
                            *((u64 *)&crc->actual_dif[0]),
                            *((u64 *)&crc->expected_dif[0]));
 
-                       if (qlt_handle_dif_error(vha, cmd, ctio)) {
-                               if (cmd->state == QLA_TGT_STATE_NEED_DATA) {
-                                       /* scsi Write/xfer rdy complete */
-                                       goto skip_term;
-                               } else {
-                                       /* scsi read/xmit respond complete
-                                        * call handle dif to send scsi status
-                                        * rather than terminate exchange.
-                                        */
-                                       cmd->state = QLA_TGT_STATE_PROCESSED;
-                                       ha->tgt.tgt_ops->handle_dif_err(cmd);
-                                       return;
-                               }
-                       } else {
-                               /* Need to generate a SCSI good completion.
-                                * because FW did not send scsi status.
-                                */
-                               status = 0;
-                               goto skip_term;
-                       }
-                       break;
+                       qlt_handle_dif_error(vha, cmd, ctio);
+                       return;
                }
                default:
                        ql_dbg(ql_dbg_tgt_mgt, vha, 0xf05b,
@@ -3808,7 +3926,6 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha, uint32_t handle,
                                return;
                }
        }
-skip_term:
 
        if (cmd->state == QLA_TGT_STATE_PROCESSED) {
                cmd->trc_flags |= TRC_CTIO_DONE;
@@ -4584,7 +4701,8 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha,
                }
 
                if (sess != NULL) {
-                       if (sess->fw_login_state == DSC_LS_PLOGI_PEND) {
+                       if (sess->fw_login_state != DSC_LS_PLOGI_PEND &&
+                           sess->fw_login_state != DSC_LS_PLOGI_COMP) {
                                /*
                                 * Impatient initiator sent PRLI before last
                                 * PLOGI could finish. Will force him to re-try,
@@ -4623,15 +4741,23 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha,
 
                /* Make session global (not used in fabric mode) */
                if (ha->current_topology != ISP_CFG_F) {
-                       set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags);
-                       set_bit(LOCAL_LOOP_UPDATE, &vha->dpc_flags);
-                       qla2xxx_wake_dpc(vha);
+                       if (sess) {
+                               ql_dbg(ql_dbg_disc, vha, 0xffff,
+                                   "%s %d %8phC post nack\n",
+                                   __func__, __LINE__, sess->port_name);
+                               qla24xx_post_nack_work(vha, sess, iocb,
+                                       SRB_NACK_PRLI);
+                               res = 0;
+                       } else {
+                               set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags);
+                               set_bit(LOCAL_LOOP_UPDATE, &vha->dpc_flags);
+                               qla2xxx_wake_dpc(vha);
+                       }
                } else {
                        if (sess) {
                                ql_dbg(ql_dbg_disc, vha, 0xffff,
-                                          "%s %d %8phC post nack\n",
-                                          __func__, __LINE__, sess->port_name);
-
+                                   "%s %d %8phC post nack\n",
+                                   __func__, __LINE__, sess->port_name);
                                qla24xx_post_nack_work(vha, sess, iocb,
                                        SRB_NACK_PRLI);
                                res = 0;
@@ -4639,7 +4765,6 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha,
                }
                break;
 
-
        case ELS_TPRLO:
                if (le16_to_cpu(iocb->u.isp24.flags) &
                        NOTIFY24XX_FLAGS_GLOBAL_TPRLO) {
@@ -5079,16 +5204,22 @@ qlt_send_busy(struct scsi_qla_host *vha,
 
 static int
 qlt_chk_qfull_thresh_hold(struct scsi_qla_host *vha,
-       struct atio_from_isp *atio)
+       struct atio_from_isp *atio, bool ha_locked)
 {
        struct qla_hw_data *ha = vha->hw;
        uint16_t status;
+       unsigned long flags;
 
        if (ha->tgt.num_pend_cmds < Q_FULL_THRESH_HOLD(ha))
                return 0;
 
+       if (!ha_locked)
+               spin_lock_irqsave(&ha->hardware_lock, flags);
        status = temp_sam_status;
        qlt_send_busy(vha, atio, status);
+       if (!ha_locked)
+               spin_unlock_irqrestore(&ha->hardware_lock, flags);
+
        return 1;
 }
 
@@ -5103,7 +5234,7 @@ static void qlt_24xx_atio_pkt(struct scsi_qla_host *vha,
        unsigned long flags;
 
        if (unlikely(tgt == NULL)) {
-               ql_dbg(ql_dbg_io, vha, 0x3064,
+               ql_dbg(ql_dbg_tgt, vha, 0x3064,
                    "ATIO pkt, but no tgt (ha %p)", ha);
                return;
        }
@@ -5133,7 +5264,7 @@ static void qlt_24xx_atio_pkt(struct scsi_qla_host *vha,
 
 
                if (likely(atio->u.isp24.fcp_cmnd.task_mgmt_flags == 0)) {
-                       rc = qlt_chk_qfull_thresh_hold(vha, atio);
+                       rc = qlt_chk_qfull_thresh_hold(vha, atio, ha_locked);
                        if (rc != 0) {
                                tgt->atio_irq_cmd_count--;
                                return;
@@ -5256,7 +5387,7 @@ static void qlt_response_pkt(struct scsi_qla_host *vha, response_t *pkt)
                        break;
                }
 
-               rc = qlt_chk_qfull_thresh_hold(vha, atio);
+               rc = qlt_chk_qfull_thresh_hold(vha, atio, true);
                if (rc != 0) {
                        tgt->irq_cmd_count--;
                        return;
@@ -5531,7 +5662,7 @@ static fc_port_t *qlt_get_port_database(struct scsi_qla_host *vha,
 
        fcport->loop_id = loop_id;
 
-       rc = qla2x00_get_port_database(vha, fcport, 0);
+       rc = qla24xx_gpdb_wait(vha, fcport, 0);
        if (rc != QLA_SUCCESS) {
                ql_dbg(ql_dbg_tgt_mgt, vha, 0xf070,
                    "qla_target(%d): Failed to retrieve fcport "
@@ -5713,30 +5844,23 @@ static void qlt_abort_work(struct qla_tgt *tgt,
                }
        }
 
-       spin_lock_irqsave(&ha->hardware_lock, flags);
-
-       if (tgt->tgt_stop)
-               goto out_term;
-
        rc = __qlt_24xx_handle_abts(vha, &prm->abts, sess);
+       ha->tgt.tgt_ops->put_sess(sess);
+       spin_unlock_irqrestore(&ha->tgt.sess_lock, flags2);
+
        if (rc != 0)
                goto out_term;
-       spin_unlock_irqrestore(&ha->hardware_lock, flags);
-       if (sess)
-               ha->tgt.tgt_ops->put_sess(sess);
-       spin_unlock_irqrestore(&ha->tgt.sess_lock, flags2);
        return;
 
 out_term2:
-       spin_lock_irqsave(&ha->hardware_lock, flags);
+       if (sess)
+               ha->tgt.tgt_ops->put_sess(sess);
+       spin_unlock_irqrestore(&ha->tgt.sess_lock, flags2);
 
 out_term:
+       spin_lock_irqsave(&ha->hardware_lock, flags);
        qlt_24xx_send_abts_resp(vha, &prm->abts, FCP_TMF_REJECTED, false);
        spin_unlock_irqrestore(&ha->hardware_lock, flags);
-
-       if (sess)
-               ha->tgt.tgt_ops->put_sess(sess);
-       spin_unlock_irqrestore(&ha->tgt.sess_lock, flags2);
 }
 
 static void qlt_tmr_work(struct qla_tgt *tgt,
@@ -5756,7 +5880,7 @@ static void qlt_tmr_work(struct qla_tgt *tgt,
        spin_lock_irqsave(&ha->tgt.sess_lock, flags);
 
        if (tgt->tgt_stop)
-               goto out_term;
+               goto out_term2;
 
        s_id = prm->tm_iocb2.u.isp24.fcp_hdr.s_id;
        sess = ha->tgt.tgt_ops->find_sess_by_s_id(vha, s_id);
@@ -5768,11 +5892,11 @@ static void qlt_tmr_work(struct qla_tgt *tgt,
 
                spin_lock_irqsave(&ha->tgt.sess_lock, flags);
                if (!sess)
-                       goto out_term;
+                       goto out_term2;
        } else {
                if (sess->deleted) {
                        sess = NULL;
-                       goto out_term;
+                       goto out_term2;
                }
 
                if (!kref_get_unless_zero(&sess->sess_kref)) {
@@ -5780,7 +5904,7 @@ static void qlt_tmr_work(struct qla_tgt *tgt,
                            "%s: kref_get fail %8phC\n",
                             __func__, sess->port_name);
                        sess = NULL;
-                       goto out_term;
+                       goto out_term2;
                }
        }
 
@@ -5790,17 +5914,19 @@ static void qlt_tmr_work(struct qla_tgt *tgt,
        unpacked_lun = scsilun_to_int((struct scsi_lun *)&lun);
 
        rc = qlt_issue_task_mgmt(sess, unpacked_lun, fn, iocb, 0);
-       if (rc != 0)
-               goto out_term;
-
        ha->tgt.tgt_ops->put_sess(sess);
        spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
+
+       if (rc != 0)
+               goto out_term;
        return;
 
+out_term2:
+       if (sess)
+               ha->tgt.tgt_ops->put_sess(sess);
+       spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
 out_term:
        qlt_send_term_exchange(vha, NULL, &prm->tm_iocb2, 1, 0);
-       ha->tgt.tgt_ops->put_sess(sess);
-       spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
 }
 
 static void qlt_sess_work_fn(struct work_struct *work)
@@ -5893,13 +6019,13 @@ int qlt_add_target(struct qla_hw_data *ha, struct scsi_qla_host *base_vha)
        tgt->datasegs_per_cmd = QLA_TGT_DATASEGS_PER_CMD_24XX;
        tgt->datasegs_per_cont = QLA_TGT_DATASEGS_PER_CONT_24XX;
 
-       if (base_vha->fc_vport)
-               return 0;
-
        mutex_lock(&qla_tgt_mutex);
        list_add_tail(&tgt->tgt_list_entry, &qla_tgt_glist);
        mutex_unlock(&qla_tgt_mutex);
 
+       if (ha->tgt.tgt_ops && ha->tgt.tgt_ops->add_target)
+               ha->tgt.tgt_ops->add_target(base_vha);
+
        return 0;
 }
 
@@ -5928,6 +6054,17 @@ int qlt_remove_target(struct qla_hw_data *ha, struct scsi_qla_host *vha)
        return 0;
 }
 
+void qlt_remove_target_resources(struct qla_hw_data *ha)
+{
+       struct scsi_qla_host *node;
+       u32 key = 0;
+
+       btree_for_each_safe32(&ha->tgt.host_map, key, node)
+               btree_remove32(&ha->tgt.host_map, key);
+
+       btree_destroy32(&ha->tgt.host_map);
+}
+
 static void qlt_lport_dump(struct scsi_qla_host *vha, u64 wwpn,
        unsigned char *b)
 {
@@ -6234,7 +6371,7 @@ qlt_24xx_process_atio_queue(struct scsi_qla_host *vha, uint8_t ha_locked)
        struct atio_from_isp *pkt;
        int cnt, i;
 
-       if (!vha->flags.online)
+       if (!ha->flags.fw_started)
                return;
 
        while ((ha->tgt.atio_ring_ptr->signature != ATIO_PROCESSED) ||
@@ -6581,6 +6718,8 @@ qlt_modify_vp_config(struct scsi_qla_host *vha,
 void
 qlt_probe_one_stage1(struct scsi_qla_host *base_vha, struct qla_hw_data *ha)
 {
+       int rc;
+
        if (!QLA_TGT_MODE_ENABLED())
                return;
 
@@ -6600,6 +6739,13 @@ qlt_probe_one_stage1(struct scsi_qla_host *base_vha, struct qla_hw_data *ha)
            qlt_unknown_atio_work_fn);
 
        qlt_clear_mode(base_vha);
+
+       rc = btree_init32(&ha->tgt.host_map);
+       if (rc)
+               ql_log(ql_log_info, base_vha, 0xffff,
+                   "Unable to initialize ha->host_map btree\n");
+
+       qlt_update_vp_map(base_vha, SET_VP_IDX);
 }
 
 irqreturn_t
@@ -6642,6 +6788,8 @@ qlt_handle_abts_recv_work(struct work_struct *work)
        spin_lock_irqsave(&ha->hardware_lock, flags);
        qlt_response_pkt_all_vps(vha, (response_t *)&op->atio);
        spin_unlock_irqrestore(&ha->hardware_lock, flags);
+
+       kfree(op);
 }
 
 void
@@ -6706,25 +6854,69 @@ qlt_mem_free(struct qla_hw_data *ha)
 void
 qlt_update_vp_map(struct scsi_qla_host *vha, int cmd)
 {
+       void *slot;
+       u32 key;
+       int rc;
+
        if (!QLA_TGT_MODE_ENABLED())
                return;
 
+       key = vha->d_id.b24;
+
        switch (cmd) {
        case SET_VP_IDX:
                vha->hw->tgt.tgt_vp_map[vha->vp_idx].vha = vha;
                break;
        case SET_AL_PA:
-               vha->hw->tgt.tgt_vp_map[vha->d_id.b.al_pa].idx = vha->vp_idx;
+               slot = btree_lookup32(&vha->hw->tgt.host_map, key);
+               if (!slot) {
+                       ql_dbg(ql_dbg_tgt_mgt, vha, 0xffff,
+                           "Save vha in host_map %p %06x\n", vha, key);
+                       rc = btree_insert32(&vha->hw->tgt.host_map,
+                               key, vha, GFP_ATOMIC);
+                       if (rc)
+                               ql_log(ql_log_info, vha, 0xffff,
+                                   "Unable to insert s_id into host_map: %06x\n",
+                                   key);
+                       return;
+               }
+               ql_dbg(ql_dbg_tgt_mgt, vha, 0xffff,
+                       "replace existing vha in host_map %p %06x\n", vha, key);
+               btree_update32(&vha->hw->tgt.host_map, key, vha);
                break;
        case RESET_VP_IDX:
                vha->hw->tgt.tgt_vp_map[vha->vp_idx].vha = NULL;
                break;
        case RESET_AL_PA:
-               vha->hw->tgt.tgt_vp_map[vha->d_id.b.al_pa].idx = 0;
+               ql_dbg(ql_dbg_tgt_mgt, vha, 0xffff,
+                  "clear vha in host_map %p %06x\n", vha, key);
+               slot = btree_lookup32(&vha->hw->tgt.host_map, key);
+               if (slot)
+                       btree_remove32(&vha->hw->tgt.host_map, key);
+               vha->d_id.b24 = 0;
                break;
        }
 }
 
+void qlt_update_host_map(struct scsi_qla_host *vha, port_id_t id)
+{
+       unsigned long flags;
+       struct qla_hw_data *ha = vha->hw;
+
+       if (!vha->d_id.b24) {
+               spin_lock_irqsave(&ha->vport_slock, flags);
+               vha->d_id = id;
+               qlt_update_vp_map(vha, SET_AL_PA);
+               spin_unlock_irqrestore(&ha->vport_slock, flags);
+       } else if (vha->d_id.b24 != id.b24) {
+               spin_lock_irqsave(&ha->vport_slock, flags);
+               qlt_update_vp_map(vha, RESET_AL_PA);
+               vha->d_id = id;
+               qlt_update_vp_map(vha, SET_AL_PA);
+               spin_unlock_irqrestore(&ha->vport_slock, flags);
+       }
+}
+
 static int __init qlt_parse_ini_mode(void)
 {
        if (strcasecmp(qlini_mode, QLA2XXX_INI_MODE_STR_EXCLUSIVE) == 0)
index a7f90dcaae37d3eaad551544c6151785faf84cb9..d64420251194eb5fa634a36699ecf07c69e09edd 100644 (file)
@@ -378,6 +378,14 @@ static inline void adjust_corrupted_atio(struct atio_from_isp *atio)
        atio->u.isp24.fcp_cmnd.add_cdb_len = 0;
 }
 
+static inline int get_datalen_for_atio(struct atio_from_isp *atio)
+{
+       int len = atio->u.isp24.fcp_cmnd.add_cdb_len;
+
+       return (be32_to_cpu(get_unaligned((uint32_t *)
+           &atio->u.isp24.fcp_cmnd.add_cdb[len * 4])));
+}
+
 #define CTIO_TYPE7 0x12 /* Continue target I/O entry (for 24xx) */
 
 /*
@@ -667,7 +675,6 @@ struct qla_tgt_func_tmpl {
        int (*handle_cmd)(struct scsi_qla_host *, struct qla_tgt_cmd *,
                        unsigned char *, uint32_t, int, int, int);
        void (*handle_data)(struct qla_tgt_cmd *);
-       void (*handle_dif_err)(struct qla_tgt_cmd *);
        int (*handle_tmr)(struct qla_tgt_mgmt_cmd *, uint32_t, uint16_t,
                        uint32_t);
        void (*free_cmd)(struct qla_tgt_cmd *);
@@ -684,6 +691,9 @@ struct qla_tgt_func_tmpl {
        void (*clear_nacl_from_fcport_map)(struct fc_port *);
        void (*put_sess)(struct fc_port *);
        void (*shutdown_sess)(struct fc_port *);
+       int (*get_dif_tags)(struct qla_tgt_cmd *cmd, uint16_t *pfw_prot_opts);
+       int (*chk_dif_tags)(uint32_t tag);
+       void (*add_target)(struct scsi_qla_host *);
 };
 
 int qla2x00_wait_for_hba_online(struct scsi_qla_host *);
@@ -720,8 +730,8 @@ int qla2x00_wait_for_hba_online(struct scsi_qla_host *);
 #define QLA_TGT_ABORT_ALL               0xFFFE
 #define QLA_TGT_NEXUS_LOSS_SESS         0xFFFD
 #define QLA_TGT_NEXUS_LOSS              0xFFFC
-#define QLA_TGT_ABTS                                   0xFFFB
-#define QLA_TGT_2G_ABORT_TASK                  0xFFFA
+#define QLA_TGT_ABTS                   0xFFFB
+#define QLA_TGT_2G_ABORT_TASK          0xFFFA
 
 /* Notify Acknowledge flags */
 #define NOTIFY_ACK_RES_COUNT        BIT_8
@@ -845,6 +855,7 @@ enum trace_flags {
        TRC_CMD_FREE = BIT_17,
        TRC_DATA_IN = BIT_18,
        TRC_ABORT = BIT_19,
+       TRC_DIF_ERR = BIT_20,
 };
 
 struct qla_tgt_cmd {
@@ -862,7 +873,6 @@ struct qla_tgt_cmd {
        unsigned int sg_mapped:1;
        unsigned int free_sg:1;
        unsigned int write_data_transferred:1;
-       unsigned int ctx_dsd_alloced:1;
        unsigned int q_full:1;
        unsigned int term_exchg:1;
        unsigned int cmd_sent_to_fw:1;
@@ -885,11 +895,25 @@ struct qla_tgt_cmd {
        struct list_head cmd_list;
 
        struct atio_from_isp atio;
-       /* t10dif */
+
+       uint8_t ctx_dsd_alloced;
+
+       /* T10-DIF */
+#define DIF_ERR_NONE 0
+#define DIF_ERR_GRD 1
+#define DIF_ERR_REF 2
+#define DIF_ERR_APP 3
+       int8_t dif_err_code;
        struct scatterlist *prot_sg;
        uint32_t prot_sg_cnt;
-       uint32_t blk_sz;
+       uint32_t blk_sz, num_blks;
+       uint8_t scsi_status, sense_key, asc, ascq;
+
        struct crc_context *ctx;
+       uint8_t         *cdb;
+       uint64_t        lba;
+       uint16_t        a_guard, e_guard, a_app_tag, e_app_tag;
+       uint32_t        a_ref_tag, e_ref_tag;
 
        uint64_t jiffies_at_alloc;
        uint64_t jiffies_at_free;
@@ -1053,4 +1077,7 @@ extern int qlt_free_qfull_cmds(struct scsi_qla_host *);
 extern void qlt_logo_completion_handler(fc_port_t *, int);
 extern void qlt_do_generation_tick(struct scsi_qla_host *, int *);
 
+void qlt_send_resp_ctio(scsi_qla_host_t *, struct qla_tgt_cmd *, uint8_t,
+    uint8_t, uint8_t, uint8_t);
+
 #endif /* __QLA_TARGET_H */
index 3cb1964b7786e4e2add64d7c8f5788fd73b90134..45bc84e8e3bf50f798616de47a2f348d684222b4 100644 (file)
@@ -7,9 +7,9 @@
 /*
  * Driver version
  */
-#define QLA2XXX_VERSION      "8.07.00.38-k"
+#define QLA2XXX_VERSION      "9.00.00.00-k"
 
-#define QLA_DRIVER_MAJOR_VER   8
-#define QLA_DRIVER_MINOR_VER   7
+#define QLA_DRIVER_MAJOR_VER   9
+#define QLA_DRIVER_MINOR_VER   0
 #define QLA_DRIVER_PATCH_VER   0
 #define QLA_DRIVER_BETA_VER    0
index 8e8ab0fa9672a6674d3cc9556beeccc44dfc70b2..7443e4efa3aed461f225f6b04bae9223f615dd0b 100644 (file)
@@ -531,6 +531,24 @@ static void tcm_qla2xxx_handle_data_work(struct work_struct *work)
                        return;
                }
 
+               switch (cmd->dif_err_code) {
+               case DIF_ERR_GRD:
+                       cmd->se_cmd.pi_err =
+                           TCM_LOGICAL_BLOCK_GUARD_CHECK_FAILED;
+                       break;
+               case DIF_ERR_REF:
+                       cmd->se_cmd.pi_err =
+                           TCM_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED;
+                       break;
+               case DIF_ERR_APP:
+                       cmd->se_cmd.pi_err =
+                           TCM_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED;
+                       break;
+               case DIF_ERR_NONE:
+               default:
+                       break;
+               }
+
                if (cmd->se_cmd.pi_err)
                        transport_generic_request_failure(&cmd->se_cmd,
                                cmd->se_cmd.pi_err);
@@ -555,25 +573,23 @@ static void tcm_qla2xxx_handle_data(struct qla_tgt_cmd *cmd)
        queue_work_on(smp_processor_id(), tcm_qla2xxx_free_wq, &cmd->work);
 }
 
-static void tcm_qla2xxx_handle_dif_work(struct work_struct *work)
+static int tcm_qla2xxx_chk_dif_tags(uint32_t tag)
 {
-       struct qla_tgt_cmd *cmd = container_of(work, struct qla_tgt_cmd, work);
-
-       /* take an extra kref to prevent cmd free too early.
-        * need to wait for SCSI status/check condition to
-        * finish responding generate by transport_generic_request_failure.
-        */
-       kref_get(&cmd->se_cmd.cmd_kref);
-       transport_generic_request_failure(&cmd->se_cmd, cmd->se_cmd.pi_err);
+       return 0;
 }
 
-/*
- * Called from qla_target.c:qlt_do_ctio_completion()
- */
-static void tcm_qla2xxx_handle_dif_err(struct qla_tgt_cmd *cmd)
+static int tcm_qla2xxx_dif_tags(struct qla_tgt_cmd *cmd,
+    uint16_t *pfw_prot_opts)
 {
-       INIT_WORK(&cmd->work, tcm_qla2xxx_handle_dif_work);
-       queue_work(tcm_qla2xxx_free_wq, &cmd->work);
+       struct se_cmd *se_cmd = &cmd->se_cmd;
+
+       if (!(se_cmd->prot_checks & TARGET_DIF_CHECK_GUARD))
+               *pfw_prot_opts |= PO_DISABLE_GUARD_CHECK;
+
+       if (!(se_cmd->prot_checks & TARGET_DIF_CHECK_APPTAG))
+               *pfw_prot_opts |= PO_DIS_APP_TAG_VALD;
+
+       return 0;
 }
 
 /*
@@ -1610,7 +1626,6 @@ static void tcm_qla2xxx_update_sess(struct fc_port *sess, port_id_t s_id,
 static struct qla_tgt_func_tmpl tcm_qla2xxx_template = {
        .handle_cmd             = tcm_qla2xxx_handle_cmd,
        .handle_data            = tcm_qla2xxx_handle_data,
-       .handle_dif_err         = tcm_qla2xxx_handle_dif_err,
        .handle_tmr             = tcm_qla2xxx_handle_tmr,
        .free_cmd               = tcm_qla2xxx_free_cmd,
        .free_mcmd              = tcm_qla2xxx_free_mcmd,
@@ -1622,6 +1637,8 @@ static struct qla_tgt_func_tmpl tcm_qla2xxx_template = {
        .clear_nacl_from_fcport_map = tcm_qla2xxx_clear_nacl_from_fcport_map,
        .put_sess               = tcm_qla2xxx_put_sess,
        .shutdown_sess          = tcm_qla2xxx_shutdown_sess,
+       .get_dif_tags           = tcm_qla2xxx_dif_tags,
+       .chk_dif_tags           = tcm_qla2xxx_chk_dif_tags,
 };
 
 static int tcm_qla2xxx_init_lport(struct tcm_qla2xxx_lport *lport)
index 1359913bf840ce0522e09fe72b5c93bd210e4db7..e8c26e6e623726fe15f8e40ddafa089d68869c33 100644 (file)
@@ -7642,7 +7642,7 @@ static inline ssize_t ufshcd_pm_lvl_store(struct device *dev,
        if (kstrtoul(buf, 0, &value))
                return -EINVAL;
 
-       if ((value < UFS_PM_LVL_0) || (value >= UFS_PM_LVL_MAX))
+       if (value >= UFS_PM_LVL_MAX)
                return -EINVAL;
 
        spin_lock_irqsave(hba->host->host_lock, flags);
index 78b1bb7bcf20ab1e3c39d7d3817b3ae3570825be..9fca977ef18d2fd4638132988215dd5b2d327967 100644 (file)
@@ -33,17 +33,10 @@ config QCOM_SMEM
          The driver provides an interface to items in a heap shared among all
          processors in a Qualcomm platform.
 
-config QCOM_SMD
-       tristate "Qualcomm Shared Memory Driver (SMD)"
-       depends on QCOM_SMEM
-       help
-         Say y here to enable support for the Qualcomm Shared Memory Driver
-         providing communication channels to remote processors in Qualcomm
-         platforms.
-
 config QCOM_SMD_RPM
        tristate "Qualcomm Resource Power Manager (RPM) over SMD"
-       depends on QCOM_SMD && OF
+       depends on ARCH_QCOM
+       depends on RPMSG && OF
        help
          If you say yes to this option, support will be included for the
          Resource Power Manager system found in the Qualcomm 8974 based
@@ -76,7 +69,8 @@ config QCOM_SMSM
 
 config QCOM_WCNSS_CTRL
        tristate "Qualcomm WCNSS control driver"
-       depends on QCOM_SMD
+       depends on ARCH_QCOM
+       depends on RPMSG
        help
          Client driver for the WCNSS_CTRL SMD channel, used to download nv
          firmware to a newly booted WCNSS chip.
index 1f30260b06b8f39ffa04d51e9b13b41c19c7b988..414f0de274fae462c78d188bca7369c1e4795f85 100644 (file)
@@ -1,7 +1,6 @@
 obj-$(CONFIG_QCOM_GSBI)        +=      qcom_gsbi.o
 obj-$(CONFIG_QCOM_MDT_LOADER)  += mdt_loader.o
 obj-$(CONFIG_QCOM_PM)  +=      spm.o
-obj-$(CONFIG_QCOM_SMD) +=      smd.o
 obj-$(CONFIG_QCOM_SMD_RPM)     += smd-rpm.o
 obj-$(CONFIG_QCOM_SMEM) +=     smem.o
 obj-$(CONFIG_QCOM_SMEM_STATE) += smem_state.o
index 6609d7e0edb045c3f26e159ee8938037de397dd2..c2346752b3eaacdc64fb30bfefd2a53bc05add83 100644 (file)
@@ -19,7 +19,7 @@
 #include <linux/interrupt.h>
 #include <linux/slab.h>
 
-#include <linux/soc/qcom/smd.h>
+#include <linux/rpmsg.h>
 #include <linux/soc/qcom/smd-rpm.h>
 
 #define RPM_REQUEST_TIMEOUT     (5 * HZ)
@@ -32,7 +32,7 @@
  * @ack_status:                result of the rpm request
  */
 struct qcom_smd_rpm {
-       struct qcom_smd_channel *rpm_channel;
+       struct rpmsg_endpoint *rpm_channel;
        struct device *dev;
 
        struct completion ack;
@@ -133,7 +133,7 @@ int qcom_rpm_smd_write(struct qcom_smd_rpm *rpm,
        pkt->req.data_len = cpu_to_le32(count);
        memcpy(pkt->payload, buf, count);
 
-       ret = qcom_smd_send(rpm->rpm_channel, pkt, size);
+       ret = rpmsg_send(rpm->rpm_channel, pkt, size);
        if (ret)
                goto out;
 
@@ -150,14 +150,16 @@ out:
 }
 EXPORT_SYMBOL(qcom_rpm_smd_write);
 
-static int qcom_smd_rpm_callback(struct qcom_smd_channel *channel,
-                                const void *data,
-                                size_t count)
+static int qcom_smd_rpm_callback(struct rpmsg_device *rpdev,
+                                void *data,
+                                int count,
+                                void *priv,
+                                u32 addr)
 {
        const struct qcom_rpm_header *hdr = data;
        size_t hdr_length = le32_to_cpu(hdr->length);
        const struct qcom_rpm_message *msg;
-       struct qcom_smd_rpm *rpm = qcom_smd_get_drvdata(channel);
+       struct qcom_smd_rpm *rpm = dev_get_drvdata(&rpdev->dev);
        const u8 *buf = data + sizeof(struct qcom_rpm_header);
        const u8 *end = buf + hdr_length;
        char msgbuf[32];
@@ -196,59 +198,57 @@ static int qcom_smd_rpm_callback(struct qcom_smd_channel *channel,
        return 0;
 }
 
-static int qcom_smd_rpm_probe(struct qcom_smd_device *sdev)
+static int qcom_smd_rpm_probe(struct rpmsg_device *rpdev)
 {
        struct qcom_smd_rpm *rpm;
 
-       rpm = devm_kzalloc(&sdev->dev, sizeof(*rpm), GFP_KERNEL);
+       rpm = devm_kzalloc(&rpdev->dev, sizeof(*rpm), GFP_KERNEL);
        if (!rpm)
                return -ENOMEM;
 
        mutex_init(&rpm->lock);
        init_completion(&rpm->ack);
 
-       rpm->dev = &sdev->dev;
-       rpm->rpm_channel = sdev->channel;
-       qcom_smd_set_drvdata(sdev->channel, rpm);
+       rpm->dev = &rpdev->dev;
+       rpm->rpm_channel = rpdev->ept;
+       dev_set_drvdata(&rpdev->dev, rpm);
 
-       dev_set_drvdata(&sdev->dev, rpm);
-
-       return of_platform_populate(sdev->dev.of_node, NULL, NULL, &sdev->dev);
+       return of_platform_populate(rpdev->dev.of_node, NULL, NULL, &rpdev->dev);
 }
 
-static void qcom_smd_rpm_remove(struct qcom_smd_device *sdev)
+static void qcom_smd_rpm_remove(struct rpmsg_device *rpdev)
 {
-       of_platform_depopulate(&sdev->dev);
+       of_platform_depopulate(&rpdev->dev);
 }
 
 static const struct of_device_id qcom_smd_rpm_of_match[] = {
        { .compatible = "qcom,rpm-apq8084" },
        { .compatible = "qcom,rpm-msm8916" },
        { .compatible = "qcom,rpm-msm8974" },
+       { .compatible = "qcom,rpm-msm8996" },
        {}
 };
 MODULE_DEVICE_TABLE(of, qcom_smd_rpm_of_match);
 
-static struct qcom_smd_driver qcom_smd_rpm_driver = {
+static struct rpmsg_driver qcom_smd_rpm_driver = {
        .probe = qcom_smd_rpm_probe,
        .remove = qcom_smd_rpm_remove,
        .callback = qcom_smd_rpm_callback,
-       .driver  = {
+       .drv  = {
                .name  = "qcom_smd_rpm",
-               .owner = THIS_MODULE,
                .of_match_table = qcom_smd_rpm_of_match,
        },
 };
 
 static int __init qcom_smd_rpm_init(void)
 {
-       return qcom_smd_driver_register(&qcom_smd_rpm_driver);
+       return register_rpmsg_driver(&qcom_smd_rpm_driver);
 }
 arch_initcall(qcom_smd_rpm_init);
 
 static void __exit qcom_smd_rpm_exit(void)
 {
-       qcom_smd_driver_unregister(&qcom_smd_rpm_driver);
+       unregister_rpmsg_driver(&qcom_smd_rpm_driver);
 }
 module_exit(qcom_smd_rpm_exit);
 
diff --git a/drivers/soc/qcom/smd.c b/drivers/soc/qcom/smd.c
deleted file mode 100644 (file)
index 322034a..0000000
+++ /dev/null
@@ -1,1560 +0,0 @@
-/*
- * Copyright (c) 2015, Sony Mobile Communications AB.
- * Copyright (c) 2012-2013, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <linux/interrupt.h>
-#include <linux/io.h>
-#include <linux/mfd/syscon.h>
-#include <linux/module.h>
-#include <linux/of_irq.h>
-#include <linux/of_platform.h>
-#include <linux/platform_device.h>
-#include <linux/regmap.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/soc/qcom/smd.h>
-#include <linux/soc/qcom/smem.h>
-#include <linux/wait.h>
-
-/*
- * The Qualcomm Shared Memory communication solution provides point-to-point
- * channels for clients to send and receive streaming or packet based data.
- *
- * Each channel consists of a control item (channel info) and a ring buffer
- * pair. The channel info carry information related to channel state, flow
- * control and the offsets within the ring buffer.
- *
- * All allocated channels are listed in an allocation table, identifying the
- * pair of items by name, type and remote processor.
- *
- * Upon creating a new channel the remote processor allocates channel info and
- * ring buffer items from the smem heap and populate the allocation table. An
- * interrupt is sent to the other end of the channel and a scan for new
- * channels should be done. A channel never goes away, it will only change
- * state.
- *
- * The remote processor signals it intent for bring up the communication
- * channel by setting the state of its end of the channel to "opening" and
- * sends out an interrupt. We detect this change and register a smd device to
- * consume the channel. Upon finding a consumer we finish the handshake and the
- * channel is up.
- *
- * Upon closing a channel, the remote processor will update the state of its
- * end of the channel and signal us, we will then unregister any attached
- * device and close our end of the channel.
- *
- * Devices attached to a channel can use the qcom_smd_send function to push
- * data to the channel, this is done by copying the data into the tx ring
- * buffer, updating the pointers in the channel info and signaling the remote
- * processor.
- *
- * The remote processor does the equivalent when it transfer data and upon
- * receiving the interrupt we check the channel info for new data and delivers
- * this to the attached device. If the device is not ready to receive the data
- * we leave it in the ring buffer for now.
- */
-
-struct smd_channel_info;
-struct smd_channel_info_pair;
-struct smd_channel_info_word;
-struct smd_channel_info_word_pair;
-
-#define SMD_ALLOC_TBL_COUNT    2
-#define SMD_ALLOC_TBL_SIZE     64
-
-/*
- * This lists the various smem heap items relevant for the allocation table and
- * smd channel entries.
- */
-static const struct {
-       unsigned alloc_tbl_id;
-       unsigned info_base_id;
-       unsigned fifo_base_id;
-} smem_items[SMD_ALLOC_TBL_COUNT] = {
-       {
-               .alloc_tbl_id = 13,
-               .info_base_id = 14,
-               .fifo_base_id = 338
-       },
-       {
-               .alloc_tbl_id = 266,
-               .info_base_id = 138,
-               .fifo_base_id = 202,
-       },
-};
-
-/**
- * struct qcom_smd_edge - representing a remote processor
- * @dev:               device for this edge
- * @of_node:           of_node handle for information related to this edge
- * @edge_id:           identifier of this edge
- * @remote_pid:                identifier of remote processor
- * @irq:               interrupt for signals on this edge
- * @ipc_regmap:                regmap handle holding the outgoing ipc register
- * @ipc_offset:                offset within @ipc_regmap of the register for ipc
- * @ipc_bit:           bit in the register at @ipc_offset of @ipc_regmap
- * @channels:          list of all channels detected on this edge
- * @channels_lock:     guard for modifications of @channels
- * @allocated:         array of bitmaps representing already allocated channels
- * @smem_available:    last available amount of smem triggering a channel scan
- * @scan_work:         work item for discovering new channels
- * @state_work:                work item for edge state changes
- */
-struct qcom_smd_edge {
-       struct device dev;
-
-       struct device_node *of_node;
-       unsigned edge_id;
-       unsigned remote_pid;
-
-       int irq;
-
-       struct regmap *ipc_regmap;
-       int ipc_offset;
-       int ipc_bit;
-
-       struct list_head channels;
-       spinlock_t channels_lock;
-
-       DECLARE_BITMAP(allocated[SMD_ALLOC_TBL_COUNT], SMD_ALLOC_TBL_SIZE);
-
-       unsigned smem_available;
-
-       wait_queue_head_t new_channel_event;
-
-       struct work_struct scan_work;
-       struct work_struct state_work;
-};
-
-#define to_smd_edge(d) container_of(d, struct qcom_smd_edge, dev)
-
-/*
- * SMD channel states.
- */
-enum smd_channel_state {
-       SMD_CHANNEL_CLOSED,
-       SMD_CHANNEL_OPENING,
-       SMD_CHANNEL_OPENED,
-       SMD_CHANNEL_FLUSHING,
-       SMD_CHANNEL_CLOSING,
-       SMD_CHANNEL_RESET,
-       SMD_CHANNEL_RESET_OPENING
-};
-
-/**
- * struct qcom_smd_channel - smd channel struct
- * @edge:              qcom_smd_edge this channel is living on
- * @qsdev:             reference to a associated smd client device
- * @name:              name of the channel
- * @state:             local state of the channel
- * @remote_state:      remote state of the channel
- * @info:              byte aligned outgoing/incoming channel info
- * @info_word:         word aligned outgoing/incoming channel info
- * @tx_lock:           lock to make writes to the channel mutually exclusive
- * @fblockread_event:  wakeup event tied to tx fBLOCKREADINTR
- * @tx_fifo:           pointer to the outgoing ring buffer
- * @rx_fifo:           pointer to the incoming ring buffer
- * @fifo_size:         size of each ring buffer
- * @bounce_buffer:     bounce buffer for reading wrapped packets
- * @cb:                        callback function registered for this channel
- * @recv_lock:         guard for rx info modifications and cb pointer
- * @pkt_size:          size of the currently handled packet
- * @list:              lite entry for @channels in qcom_smd_edge
- */
-struct qcom_smd_channel {
-       struct qcom_smd_edge *edge;
-
-       struct qcom_smd_device *qsdev;
-
-       char *name;
-       enum smd_channel_state state;
-       enum smd_channel_state remote_state;
-
-       struct smd_channel_info_pair *info;
-       struct smd_channel_info_word_pair *info_word;
-
-       struct mutex tx_lock;
-       wait_queue_head_t fblockread_event;
-
-       void *tx_fifo;
-       void *rx_fifo;
-       int fifo_size;
-
-       void *bounce_buffer;
-       qcom_smd_cb_t cb;
-
-       spinlock_t recv_lock;
-
-       int pkt_size;
-
-       void *drvdata;
-
-       struct list_head list;
-};
-
-/*
- * Format of the smd_info smem items, for byte aligned channels.
- */
-struct smd_channel_info {
-       __le32 state;
-       u8  fDSR;
-       u8  fCTS;
-       u8  fCD;
-       u8  fRI;
-       u8  fHEAD;
-       u8  fTAIL;
-       u8  fSTATE;
-       u8  fBLOCKREADINTR;
-       __le32 tail;
-       __le32 head;
-};
-
-struct smd_channel_info_pair {
-       struct smd_channel_info tx;
-       struct smd_channel_info rx;
-};
-
-/*
- * Format of the smd_info smem items, for word aligned channels.
- */
-struct smd_channel_info_word {
-       __le32 state;
-       __le32 fDSR;
-       __le32 fCTS;
-       __le32 fCD;
-       __le32 fRI;
-       __le32 fHEAD;
-       __le32 fTAIL;
-       __le32 fSTATE;
-       __le32 fBLOCKREADINTR;
-       __le32 tail;
-       __le32 head;
-};
-
-struct smd_channel_info_word_pair {
-       struct smd_channel_info_word tx;
-       struct smd_channel_info_word rx;
-};
-
-#define GET_RX_CHANNEL_FLAG(channel, param)                                 \
-       ({                                                                   \
-               BUILD_BUG_ON(sizeof(channel->info->rx.param) != sizeof(u8)); \
-               channel->info_word ?                                         \
-                       le32_to_cpu(channel->info_word->rx.param) :          \
-                       channel->info->rx.param;                             \
-       })
-
-#define GET_RX_CHANNEL_INFO(channel, param)                                  \
-       ({                                                                    \
-               BUILD_BUG_ON(sizeof(channel->info->rx.param) != sizeof(u32)); \
-               le32_to_cpu(channel->info_word ?                              \
-                       channel->info_word->rx.param :                        \
-                       channel->info->rx.param);                             \
-       })
-
-#define SET_RX_CHANNEL_FLAG(channel, param, value)                          \
-       ({                                                                   \
-               BUILD_BUG_ON(sizeof(channel->info->rx.param) != sizeof(u8)); \
-               if (channel->info_word)                                      \
-                       channel->info_word->rx.param = cpu_to_le32(value);   \
-               else                                                         \
-                       channel->info->rx.param = value;                     \
-       })
-
-#define SET_RX_CHANNEL_INFO(channel, param, value)                           \
-       ({                                                                    \
-               BUILD_BUG_ON(sizeof(channel->info->rx.param) != sizeof(u32)); \
-               if (channel->info_word)                                       \
-                       channel->info_word->rx.param = cpu_to_le32(value);    \
-               else                                                          \
-                       channel->info->rx.param = cpu_to_le32(value);         \
-       })
-
-#define GET_TX_CHANNEL_FLAG(channel, param)                                 \
-       ({                                                                   \
-               BUILD_BUG_ON(sizeof(channel->info->tx.param) != sizeof(u8)); \
-               channel->info_word ?                                         \
-                       le32_to_cpu(channel->info_word->tx.param) :          \
-                       channel->info->tx.param;                             \
-       })
-
-#define GET_TX_CHANNEL_INFO(channel, param)                                  \
-       ({                                                                    \
-               BUILD_BUG_ON(sizeof(channel->info->tx.param) != sizeof(u32)); \
-               le32_to_cpu(channel->info_word ?                              \
-                       channel->info_word->tx.param :                        \
-                       channel->info->tx.param);                             \
-       })
-
-#define SET_TX_CHANNEL_FLAG(channel, param, value)                          \
-       ({                                                                   \
-               BUILD_BUG_ON(sizeof(channel->info->tx.param) != sizeof(u8)); \
-               if (channel->info_word)                                      \
-                       channel->info_word->tx.param = cpu_to_le32(value);   \
-               else                                                         \
-                       channel->info->tx.param = value;                     \
-       })
-
-#define SET_TX_CHANNEL_INFO(channel, param, value)                           \
-       ({                                                                    \
-               BUILD_BUG_ON(sizeof(channel->info->tx.param) != sizeof(u32)); \
-               if (channel->info_word)                                       \
-                       channel->info_word->tx.param = cpu_to_le32(value);   \
-               else                                                          \
-                       channel->info->tx.param = cpu_to_le32(value);         \
-       })
-
-/**
- * struct qcom_smd_alloc_entry - channel allocation entry
- * @name:      channel name
- * @cid:       channel index
- * @flags:     channel flags and edge id
- * @ref_count: reference count of the channel
- */
-struct qcom_smd_alloc_entry {
-       u8 name[20];
-       __le32 cid;
-       __le32 flags;
-       __le32 ref_count;
-} __packed;
-
-#define SMD_CHANNEL_FLAGS_EDGE_MASK    0xff
-#define SMD_CHANNEL_FLAGS_STREAM       BIT(8)
-#define SMD_CHANNEL_FLAGS_PACKET       BIT(9)
-
-/*
- * Each smd packet contains a 20 byte header, with the first 4 being the length
- * of the packet.
- */
-#define SMD_PACKET_HEADER_LEN  20
-
-/*
- * Signal the remote processor associated with 'channel'.
- */
-static void qcom_smd_signal_channel(struct qcom_smd_channel *channel)
-{
-       struct qcom_smd_edge *edge = channel->edge;
-
-       regmap_write(edge->ipc_regmap, edge->ipc_offset, BIT(edge->ipc_bit));
-}
-
-/*
- * Initialize the tx channel info
- */
-static void qcom_smd_channel_reset(struct qcom_smd_channel *channel)
-{
-       SET_TX_CHANNEL_INFO(channel, state, SMD_CHANNEL_CLOSED);
-       SET_TX_CHANNEL_FLAG(channel, fDSR, 0);
-       SET_TX_CHANNEL_FLAG(channel, fCTS, 0);
-       SET_TX_CHANNEL_FLAG(channel, fCD, 0);
-       SET_TX_CHANNEL_FLAG(channel, fRI, 0);
-       SET_TX_CHANNEL_FLAG(channel, fHEAD, 0);
-       SET_TX_CHANNEL_FLAG(channel, fTAIL, 0);
-       SET_TX_CHANNEL_FLAG(channel, fSTATE, 1);
-       SET_TX_CHANNEL_FLAG(channel, fBLOCKREADINTR, 1);
-       SET_TX_CHANNEL_INFO(channel, head, 0);
-       SET_RX_CHANNEL_INFO(channel, tail, 0);
-
-       qcom_smd_signal_channel(channel);
-
-       channel->state = SMD_CHANNEL_CLOSED;
-       channel->pkt_size = 0;
-}
-
-/*
- * Set the callback for a channel, with appropriate locking
- */
-static void qcom_smd_channel_set_callback(struct qcom_smd_channel *channel,
-                                         qcom_smd_cb_t cb)
-{
-       unsigned long flags;
-
-       spin_lock_irqsave(&channel->recv_lock, flags);
-       channel->cb = cb;
-       spin_unlock_irqrestore(&channel->recv_lock, flags);
-};
-
-/*
- * Calculate the amount of data available in the rx fifo
- */
-static size_t qcom_smd_channel_get_rx_avail(struct qcom_smd_channel *channel)
-{
-       unsigned head;
-       unsigned tail;
-
-       head = GET_RX_CHANNEL_INFO(channel, head);
-       tail = GET_RX_CHANNEL_INFO(channel, tail);
-
-       return (head - tail) & (channel->fifo_size - 1);
-}
-
-/*
- * Set tx channel state and inform the remote processor
- */
-static void qcom_smd_channel_set_state(struct qcom_smd_channel *channel,
-                                      int state)
-{
-       struct qcom_smd_edge *edge = channel->edge;
-       bool is_open = state == SMD_CHANNEL_OPENED;
-
-       if (channel->state == state)
-               return;
-
-       dev_dbg(&edge->dev, "set_state(%s, %d)\n", channel->name, state);
-
-       SET_TX_CHANNEL_FLAG(channel, fDSR, is_open);
-       SET_TX_CHANNEL_FLAG(channel, fCTS, is_open);
-       SET_TX_CHANNEL_FLAG(channel, fCD, is_open);
-
-       SET_TX_CHANNEL_INFO(channel, state, state);
-       SET_TX_CHANNEL_FLAG(channel, fSTATE, 1);
-
-       channel->state = state;
-       qcom_smd_signal_channel(channel);
-}
-
-/*
- * Copy count bytes of data using 32bit accesses, if that's required.
- */
-static void smd_copy_to_fifo(void __iomem *dst,
-                            const void *src,
-                            size_t count,
-                            bool word_aligned)
-{
-       if (word_aligned) {
-               __iowrite32_copy(dst, src, count / sizeof(u32));
-       } else {
-               memcpy_toio(dst, src, count);
-       }
-}
-
-/*
- * Copy count bytes of data using 32bit accesses, if that is required.
- */
-static void smd_copy_from_fifo(void *dst,
-                              const void __iomem *src,
-                              size_t count,
-                              bool word_aligned)
-{
-       if (word_aligned) {
-               __ioread32_copy(dst, src, count / sizeof(u32));
-       } else {
-               memcpy_fromio(dst, src, count);
-       }
-}
-
-/*
- * Read count bytes of data from the rx fifo into buf, but don't advance the
- * tail.
- */
-static size_t qcom_smd_channel_peek(struct qcom_smd_channel *channel,
-                                   void *buf, size_t count)
-{
-       bool word_aligned;
-       unsigned tail;
-       size_t len;
-
-       word_aligned = channel->info_word;
-       tail = GET_RX_CHANNEL_INFO(channel, tail);
-
-       len = min_t(size_t, count, channel->fifo_size - tail);
-       if (len) {
-               smd_copy_from_fifo(buf,
-                                  channel->rx_fifo + tail,
-                                  len,
-                                  word_aligned);
-       }
-
-       if (len != count) {
-               smd_copy_from_fifo(buf + len,
-                                  channel->rx_fifo,
-                                  count - len,
-                                  word_aligned);
-       }
-
-       return count;
-}
-
-/*
- * Advance the rx tail by count bytes.
- */
-static void qcom_smd_channel_advance(struct qcom_smd_channel *channel,
-                                    size_t count)
-{
-       unsigned tail;
-
-       tail = GET_RX_CHANNEL_INFO(channel, tail);
-       tail += count;
-       tail &= (channel->fifo_size - 1);
-       SET_RX_CHANNEL_INFO(channel, tail, tail);
-}
-
-/*
- * Read out a single packet from the rx fifo and deliver it to the device
- */
-static int qcom_smd_channel_recv_single(struct qcom_smd_channel *channel)
-{
-       unsigned tail;
-       size_t len;
-       void *ptr;
-       int ret;
-
-       if (!channel->cb)
-               return 0;
-
-       tail = GET_RX_CHANNEL_INFO(channel, tail);
-
-       /* Use bounce buffer if the data wraps */
-       if (tail + channel->pkt_size >= channel->fifo_size) {
-               ptr = channel->bounce_buffer;
-               len = qcom_smd_channel_peek(channel, ptr, channel->pkt_size);
-       } else {
-               ptr = channel->rx_fifo + tail;
-               len = channel->pkt_size;
-       }
-
-       ret = channel->cb(channel, ptr, len);
-       if (ret < 0)
-               return ret;
-
-       /* Only forward the tail if the client consumed the data */
-       qcom_smd_channel_advance(channel, len);
-
-       channel->pkt_size = 0;
-
-       return 0;
-}
-
-/*
- * Per channel interrupt handling
- */
-static bool qcom_smd_channel_intr(struct qcom_smd_channel *channel)
-{
-       bool need_state_scan = false;
-       int remote_state;
-       __le32 pktlen;
-       int avail;
-       int ret;
-
-       /* Handle state changes */
-       remote_state = GET_RX_CHANNEL_INFO(channel, state);
-       if (remote_state != channel->remote_state) {
-               channel->remote_state = remote_state;
-               need_state_scan = true;
-       }
-       /* Indicate that we have seen any state change */
-       SET_RX_CHANNEL_FLAG(channel, fSTATE, 0);
-
-       /* Signal waiting qcom_smd_send() about the interrupt */
-       if (!GET_TX_CHANNEL_FLAG(channel, fBLOCKREADINTR))
-               wake_up_interruptible(&channel->fblockread_event);
-
-       /* Don't consume any data until we've opened the channel */
-       if (channel->state != SMD_CHANNEL_OPENED)
-               goto out;
-
-       /* Indicate that we've seen the new data */
-       SET_RX_CHANNEL_FLAG(channel, fHEAD, 0);
-
-       /* Consume data */
-       for (;;) {
-               avail = qcom_smd_channel_get_rx_avail(channel);
-
-               if (!channel->pkt_size && avail >= SMD_PACKET_HEADER_LEN) {
-                       qcom_smd_channel_peek(channel, &pktlen, sizeof(pktlen));
-                       qcom_smd_channel_advance(channel, SMD_PACKET_HEADER_LEN);
-                       channel->pkt_size = le32_to_cpu(pktlen);
-               } else if (channel->pkt_size && avail >= channel->pkt_size) {
-                       ret = qcom_smd_channel_recv_single(channel);
-                       if (ret)
-                               break;
-               } else {
-                       break;
-               }
-       }
-
-       /* Indicate that we have seen and updated tail */
-       SET_RX_CHANNEL_FLAG(channel, fTAIL, 1);
-
-       /* Signal the remote that we've consumed the data (if requested) */
-       if (!GET_RX_CHANNEL_FLAG(channel, fBLOCKREADINTR)) {
-               /* Ensure ordering of channel info updates */
-               wmb();
-
-               qcom_smd_signal_channel(channel);
-       }
-
-out:
-       return need_state_scan;
-}
-
-/*
- * The edge interrupts are triggered by the remote processor on state changes,
- * channel info updates or when new channels are created.
- */
-static irqreturn_t qcom_smd_edge_intr(int irq, void *data)
-{
-       struct qcom_smd_edge *edge = data;
-       struct qcom_smd_channel *channel;
-       unsigned available;
-       bool kick_scanner = false;
-       bool kick_state = false;
-
-       /*
-        * Handle state changes or data on each of the channels on this edge
-        */
-       spin_lock(&edge->channels_lock);
-       list_for_each_entry(channel, &edge->channels, list) {
-               spin_lock(&channel->recv_lock);
-               kick_state |= qcom_smd_channel_intr(channel);
-               spin_unlock(&channel->recv_lock);
-       }
-       spin_unlock(&edge->channels_lock);
-
-       /*
-        * Creating a new channel requires allocating an smem entry, so we only
-        * have to scan if the amount of available space in smem have changed
-        * since last scan.
-        */
-       available = qcom_smem_get_free_space(edge->remote_pid);
-       if (available != edge->smem_available) {
-               edge->smem_available = available;
-               kick_scanner = true;
-       }
-
-       if (kick_scanner)
-               schedule_work(&edge->scan_work);
-       if (kick_state)
-               schedule_work(&edge->state_work);
-
-       return IRQ_HANDLED;
-}
-
-/*
- * Delivers any outstanding packets in the rx fifo, can be used after probe of
- * the clients to deliver any packets that wasn't delivered before the client
- * was setup.
- */
-static void qcom_smd_channel_resume(struct qcom_smd_channel *channel)
-{
-       unsigned long flags;
-
-       spin_lock_irqsave(&channel->recv_lock, flags);
-       qcom_smd_channel_intr(channel);
-       spin_unlock_irqrestore(&channel->recv_lock, flags);
-}
-
-/*
- * Calculate how much space is available in the tx fifo.
- */
-static size_t qcom_smd_get_tx_avail(struct qcom_smd_channel *channel)
-{
-       unsigned head;
-       unsigned tail;
-       unsigned mask = channel->fifo_size - 1;
-
-       head = GET_TX_CHANNEL_INFO(channel, head);
-       tail = GET_TX_CHANNEL_INFO(channel, tail);
-
-       return mask - ((head - tail) & mask);
-}
-
-/*
- * Write count bytes of data into channel, possibly wrapping in the ring buffer
- */
-static int qcom_smd_write_fifo(struct qcom_smd_channel *channel,
-                              const void *data,
-                              size_t count)
-{
-       bool word_aligned;
-       unsigned head;
-       size_t len;
-
-       word_aligned = channel->info_word;
-       head = GET_TX_CHANNEL_INFO(channel, head);
-
-       len = min_t(size_t, count, channel->fifo_size - head);
-       if (len) {
-               smd_copy_to_fifo(channel->tx_fifo + head,
-                                data,
-                                len,
-                                word_aligned);
-       }
-
-       if (len != count) {
-               smd_copy_to_fifo(channel->tx_fifo,
-                                data + len,
-                                count - len,
-                                word_aligned);
-       }
-
-       head += count;
-       head &= (channel->fifo_size - 1);
-       SET_TX_CHANNEL_INFO(channel, head, head);
-
-       return count;
-}
-
-/**
- * qcom_smd_send - write data to smd channel
- * @channel:   channel handle
- * @data:      buffer of data to write
- * @len:       number of bytes to write
- *
- * This is a blocking write of len bytes into the channel's tx ring buffer and
- * signal the remote end. It will sleep until there is enough space available
- * in the tx buffer, utilizing the fBLOCKREADINTR signaling mechanism to avoid
- * polling.
- */
-int qcom_smd_send(struct qcom_smd_channel *channel, const void *data, int len)
-{
-       __le32 hdr[5] = { cpu_to_le32(len), };
-       int tlen = sizeof(hdr) + len;
-       int ret;
-
-       /* Word aligned channels only accept word size aligned data */
-       if (channel->info_word && len % 4)
-               return -EINVAL;
-
-       /* Reject packets that are too big */
-       if (tlen >= channel->fifo_size)
-               return -EINVAL;
-
-       ret = mutex_lock_interruptible(&channel->tx_lock);
-       if (ret)
-               return ret;
-
-       while (qcom_smd_get_tx_avail(channel) < tlen) {
-               if (channel->state != SMD_CHANNEL_OPENED) {
-                       ret = -EPIPE;
-                       goto out;
-               }
-
-               SET_TX_CHANNEL_FLAG(channel, fBLOCKREADINTR, 0);
-
-               ret = wait_event_interruptible(channel->fblockread_event,
-                                      qcom_smd_get_tx_avail(channel) >= tlen ||
-                                      channel->state != SMD_CHANNEL_OPENED);
-               if (ret)
-                       goto out;
-
-               SET_TX_CHANNEL_FLAG(channel, fBLOCKREADINTR, 1);
-       }
-
-       SET_TX_CHANNEL_FLAG(channel, fTAIL, 0);
-
-       qcom_smd_write_fifo(channel, hdr, sizeof(hdr));
-       qcom_smd_write_fifo(channel, data, len);
-
-       SET_TX_CHANNEL_FLAG(channel, fHEAD, 1);
-
-       /* Ensure ordering of channel info updates */
-       wmb();
-
-       qcom_smd_signal_channel(channel);
-
-out:
-       mutex_unlock(&channel->tx_lock);
-
-       return ret;
-}
-EXPORT_SYMBOL(qcom_smd_send);
-
-static struct qcom_smd_device *to_smd_device(struct device *dev)
-{
-       return container_of(dev, struct qcom_smd_device, dev);
-}
-
-static struct qcom_smd_driver *to_smd_driver(struct device *dev)
-{
-       struct qcom_smd_device *qsdev = to_smd_device(dev);
-
-       return container_of(qsdev->dev.driver, struct qcom_smd_driver, driver);
-}
-
-static int qcom_smd_dev_match(struct device *dev, struct device_driver *drv)
-{
-       struct qcom_smd_device *qsdev = to_smd_device(dev);
-       struct qcom_smd_driver *qsdrv = container_of(drv, struct qcom_smd_driver, driver);
-       const struct qcom_smd_id *match = qsdrv->smd_match_table;
-       const char *name = qsdev->channel->name;
-
-       if (match) {
-               while (match->name[0]) {
-                       if (!strcmp(match->name, name))
-                               return 1;
-                       match++;
-               }
-       }
-
-       return of_driver_match_device(dev, drv);
-}
-
-/*
- * Helper for opening a channel
- */
-static int qcom_smd_channel_open(struct qcom_smd_channel *channel,
-                                qcom_smd_cb_t cb)
-{
-       size_t bb_size;
-
-       /*
-        * Packets are maximum 4k, but reduce if the fifo is smaller
-        */
-       bb_size = min(channel->fifo_size, SZ_4K);
-       channel->bounce_buffer = kmalloc(bb_size, GFP_KERNEL);
-       if (!channel->bounce_buffer)
-               return -ENOMEM;
-
-       qcom_smd_channel_set_callback(channel, cb);
-       qcom_smd_channel_set_state(channel, SMD_CHANNEL_OPENING);
-       qcom_smd_channel_set_state(channel, SMD_CHANNEL_OPENED);
-
-       return 0;
-}
-
-/*
- * Helper for closing and resetting a channel
- */
-static void qcom_smd_channel_close(struct qcom_smd_channel *channel)
-{
-       qcom_smd_channel_set_callback(channel, NULL);
-
-       kfree(channel->bounce_buffer);
-       channel->bounce_buffer = NULL;
-
-       qcom_smd_channel_set_state(channel, SMD_CHANNEL_CLOSED);
-       qcom_smd_channel_reset(channel);
-}
-
-/*
- * Probe the smd client.
- *
- * The remote side have indicated that it want the channel to be opened, so
- * complete the state handshake and probe our client driver.
- */
-static int qcom_smd_dev_probe(struct device *dev)
-{
-       struct qcom_smd_device *qsdev = to_smd_device(dev);
-       struct qcom_smd_driver *qsdrv = to_smd_driver(dev);
-       struct qcom_smd_channel *channel = qsdev->channel;
-       int ret;
-
-       ret = qcom_smd_channel_open(channel, qsdrv->callback);
-       if (ret)
-               return ret;
-
-       ret = qsdrv->probe(qsdev);
-       if (ret)
-               goto err;
-
-       qcom_smd_channel_resume(channel);
-
-       return 0;
-
-err:
-       dev_err(&qsdev->dev, "probe failed\n");
-
-       qcom_smd_channel_close(channel);
-       return ret;
-}
-
-/*
- * Remove the smd client.
- *
- * The channel is going away, for some reason, so remove the smd client and
- * reset the channel state.
- */
-static int qcom_smd_dev_remove(struct device *dev)
-{
-       struct qcom_smd_device *qsdev = to_smd_device(dev);
-       struct qcom_smd_driver *qsdrv = to_smd_driver(dev);
-       struct qcom_smd_channel *channel = qsdev->channel;
-
-       qcom_smd_channel_set_state(channel, SMD_CHANNEL_CLOSING);
-
-       /*
-        * Make sure we don't race with the code receiving data.
-        */
-       qcom_smd_channel_set_callback(channel, NULL);
-
-       /* Wake up any sleepers in qcom_smd_send() */
-       wake_up_interruptible(&channel->fblockread_event);
-
-       /*
-        * We expect that the client might block in remove() waiting for any
-        * outstanding calls to qcom_smd_send() to wake up and finish.
-        */
-       if (qsdrv->remove)
-               qsdrv->remove(qsdev);
-
-       /* The client is now gone, close the primary channel */
-       qcom_smd_channel_close(channel);
-       channel->qsdev = NULL;
-
-       return 0;
-}
-
-static struct bus_type qcom_smd_bus = {
-       .name = "qcom_smd",
-       .match = qcom_smd_dev_match,
-       .probe = qcom_smd_dev_probe,
-       .remove = qcom_smd_dev_remove,
-};
-
-/*
- * Release function for the qcom_smd_device object.
- */
-static void qcom_smd_release_device(struct device *dev)
-{
-       struct qcom_smd_device *qsdev = to_smd_device(dev);
-
-       kfree(qsdev);
-}
-
-/*
- * Finds the device_node for the smd child interested in this channel.
- */
-static struct device_node *qcom_smd_match_channel(struct device_node *edge_node,
-                                                 const char *channel)
-{
-       struct device_node *child;
-       const char *name;
-       const char *key;
-       int ret;
-
-       for_each_available_child_of_node(edge_node, child) {
-               key = "qcom,smd-channels";
-               ret = of_property_read_string(child, key, &name);
-               if (ret)
-                       continue;
-
-               if (strcmp(name, channel) == 0)
-                       return child;
-       }
-
-       return NULL;
-}
-
-/*
- * Create a smd client device for channel that is being opened.
- */
-static int qcom_smd_create_device(struct qcom_smd_channel *channel)
-{
-       struct qcom_smd_device *qsdev;
-       struct qcom_smd_edge *edge = channel->edge;
-       struct device_node *node;
-       int ret;
-
-       if (channel->qsdev)
-               return -EEXIST;
-
-       dev_dbg(&edge->dev, "registering '%s'\n", channel->name);
-
-       qsdev = kzalloc(sizeof(*qsdev), GFP_KERNEL);
-       if (!qsdev)
-               return -ENOMEM;
-
-       node = qcom_smd_match_channel(edge->of_node, channel->name);
-       dev_set_name(&qsdev->dev, "%s.%s",
-                    edge->of_node->name,
-                    node ? node->name : channel->name);
-
-       qsdev->dev.parent = &edge->dev;
-       qsdev->dev.bus = &qcom_smd_bus;
-       qsdev->dev.release = qcom_smd_release_device;
-       qsdev->dev.of_node = node;
-
-       qsdev->channel = channel;
-
-       channel->qsdev = qsdev;
-
-       ret = device_register(&qsdev->dev);
-       if (ret) {
-               dev_err(&edge->dev, "device_register failed: %d\n", ret);
-               put_device(&qsdev->dev);
-       }
-
-       return ret;
-}
-
-/*
- * Destroy a smd client device for a channel that's going away.
- */
-static void qcom_smd_destroy_device(struct qcom_smd_channel *channel)
-{
-       struct device *dev;
-
-       BUG_ON(!channel->qsdev);
-
-       dev = &channel->qsdev->dev;
-
-       device_unregister(dev);
-       of_node_put(dev->of_node);
-       put_device(dev);
-}
-
-/**
- * qcom_smd_driver_register - register a smd driver
- * @qsdrv:     qcom_smd_driver struct
- */
-int qcom_smd_driver_register(struct qcom_smd_driver *qsdrv)
-{
-       qsdrv->driver.bus = &qcom_smd_bus;
-       return driver_register(&qsdrv->driver);
-}
-EXPORT_SYMBOL(qcom_smd_driver_register);
-
-void *qcom_smd_get_drvdata(struct qcom_smd_channel *channel)
-{
-       return channel->drvdata;
-}
-EXPORT_SYMBOL(qcom_smd_get_drvdata);
-
-void qcom_smd_set_drvdata(struct qcom_smd_channel *channel, void *data)
-{
-       channel->drvdata = data;
-}
-EXPORT_SYMBOL(qcom_smd_set_drvdata);
-
-/**
- * qcom_smd_driver_unregister - unregister a smd driver
- * @qsdrv:     qcom_smd_driver struct
- */
-void qcom_smd_driver_unregister(struct qcom_smd_driver *qsdrv)
-{
-       driver_unregister(&qsdrv->driver);
-}
-EXPORT_SYMBOL(qcom_smd_driver_unregister);
-
-static struct qcom_smd_channel *
-qcom_smd_find_channel(struct qcom_smd_edge *edge, const char *name)
-{
-       struct qcom_smd_channel *channel;
-       struct qcom_smd_channel *ret = NULL;
-       unsigned long flags;
-       unsigned state;
-
-       spin_lock_irqsave(&edge->channels_lock, flags);
-       list_for_each_entry(channel, &edge->channels, list) {
-               if (strcmp(channel->name, name))
-                       continue;
-
-               state = GET_RX_CHANNEL_INFO(channel, state);
-               if (state != SMD_CHANNEL_OPENING &&
-                   state != SMD_CHANNEL_OPENED)
-                       continue;
-
-               ret = channel;
-               break;
-       }
-       spin_unlock_irqrestore(&edge->channels_lock, flags);
-
-       return ret;
-}
-
-/**
- * qcom_smd_open_channel() - claim additional channels on the same edge
- * @sdev:      smd_device handle
- * @name:      channel name
- * @cb:                callback method to use for incoming data
- *
- * Returns a channel handle on success, or -EPROBE_DEFER if the channel isn't
- * ready.
- *
- * Any channels returned must be closed with a call to qcom_smd_close_channel()
- */
-struct qcom_smd_channel *qcom_smd_open_channel(struct qcom_smd_channel *parent,
-                                              const char *name,
-                                              qcom_smd_cb_t cb)
-{
-       struct qcom_smd_channel *channel;
-       struct qcom_smd_device *sdev = parent->qsdev;
-       struct qcom_smd_edge *edge = parent->edge;
-       int ret;
-
-       /* Wait up to HZ for the channel to appear */
-       ret = wait_event_interruptible_timeout(edge->new_channel_event,
-                       (channel = qcom_smd_find_channel(edge, name)) != NULL,
-                       HZ);
-       if (!ret)
-               return ERR_PTR(-ETIMEDOUT);
-
-       if (channel->state != SMD_CHANNEL_CLOSED) {
-               dev_err(&sdev->dev, "channel %s is busy\n", channel->name);
-               return ERR_PTR(-EBUSY);
-       }
-
-       channel->qsdev = sdev;
-       ret = qcom_smd_channel_open(channel, cb);
-       if (ret) {
-               channel->qsdev = NULL;
-               return ERR_PTR(ret);
-       }
-
-       return channel;
-}
-EXPORT_SYMBOL(qcom_smd_open_channel);
-
-/**
- * qcom_smd_close_channel() - close an additionally opened channel
- * @channel:   channel handle, returned by qcom_smd_open_channel()
- */
-void qcom_smd_close_channel(struct qcom_smd_channel *channel)
-{
-       qcom_smd_channel_close(channel);
-       channel->qsdev = NULL;
-}
-EXPORT_SYMBOL(qcom_smd_close_channel);
-
-/*
- * Allocate the qcom_smd_channel object for a newly found smd channel,
- * retrieving and validating the smem items involved.
- */
-static struct qcom_smd_channel *qcom_smd_create_channel(struct qcom_smd_edge *edge,
-                                                       unsigned smem_info_item,
-                                                       unsigned smem_fifo_item,
-                                                       char *name)
-{
-       struct qcom_smd_channel *channel;
-       size_t fifo_size;
-       size_t info_size;
-       void *fifo_base;
-       void *info;
-       int ret;
-
-       channel = devm_kzalloc(&edge->dev, sizeof(*channel), GFP_KERNEL);
-       if (!channel)
-               return ERR_PTR(-ENOMEM);
-
-       channel->edge = edge;
-       channel->name = devm_kstrdup(&edge->dev, name, GFP_KERNEL);
-       if (!channel->name)
-               return ERR_PTR(-ENOMEM);
-
-       mutex_init(&channel->tx_lock);
-       spin_lock_init(&channel->recv_lock);
-       init_waitqueue_head(&channel->fblockread_event);
-
-       info = qcom_smem_get(edge->remote_pid, smem_info_item, &info_size);
-       if (IS_ERR(info)) {
-               ret = PTR_ERR(info);
-               goto free_name_and_channel;
-       }
-
-       /*
-        * Use the size of the item to figure out which channel info struct to
-        * use.
-        */
-       if (info_size == 2 * sizeof(struct smd_channel_info_word)) {
-               channel->info_word = info;
-       } else if (info_size == 2 * sizeof(struct smd_channel_info)) {
-               channel->info = info;
-       } else {
-               dev_err(&edge->dev,
-                       "channel info of size %zu not supported\n", info_size);
-               ret = -EINVAL;
-               goto free_name_and_channel;
-       }
-
-       fifo_base = qcom_smem_get(edge->remote_pid, smem_fifo_item, &fifo_size);
-       if (IS_ERR(fifo_base)) {
-               ret =  PTR_ERR(fifo_base);
-               goto free_name_and_channel;
-       }
-
-       /* The channel consist of a rx and tx fifo of equal size */
-       fifo_size /= 2;
-
-       dev_dbg(&edge->dev, "new channel '%s' info-size: %zu fifo-size: %zu\n",
-                         name, info_size, fifo_size);
-
-       channel->tx_fifo = fifo_base;
-       channel->rx_fifo = fifo_base + fifo_size;
-       channel->fifo_size = fifo_size;
-
-       qcom_smd_channel_reset(channel);
-
-       return channel;
-
-free_name_and_channel:
-       devm_kfree(&edge->dev, channel->name);
-       devm_kfree(&edge->dev, channel);
-
-       return ERR_PTR(ret);
-}
-
-/*
- * Scans the allocation table for any newly allocated channels, calls
- * qcom_smd_create_channel() to create representations of these and add
- * them to the edge's list of channels.
- */
-static void qcom_channel_scan_worker(struct work_struct *work)
-{
-       struct qcom_smd_edge *edge = container_of(work, struct qcom_smd_edge, scan_work);
-       struct qcom_smd_alloc_entry *alloc_tbl;
-       struct qcom_smd_alloc_entry *entry;
-       struct qcom_smd_channel *channel;
-       unsigned long flags;
-       unsigned fifo_id;
-       unsigned info_id;
-       int tbl;
-       int i;
-       u32 eflags, cid;
-
-       for (tbl = 0; tbl < SMD_ALLOC_TBL_COUNT; tbl++) {
-               alloc_tbl = qcom_smem_get(edge->remote_pid,
-                                   smem_items[tbl].alloc_tbl_id, NULL);
-               if (IS_ERR(alloc_tbl))
-                       continue;
-
-               for (i = 0; i < SMD_ALLOC_TBL_SIZE; i++) {
-                       entry = &alloc_tbl[i];
-                       eflags = le32_to_cpu(entry->flags);
-                       if (test_bit(i, edge->allocated[tbl]))
-                               continue;
-
-                       if (entry->ref_count == 0)
-                               continue;
-
-                       if (!entry->name[0])
-                               continue;
-
-                       if (!(eflags & SMD_CHANNEL_FLAGS_PACKET))
-                               continue;
-
-                       if ((eflags & SMD_CHANNEL_FLAGS_EDGE_MASK) != edge->edge_id)
-                               continue;
-
-                       cid = le32_to_cpu(entry->cid);
-                       info_id = smem_items[tbl].info_base_id + cid;
-                       fifo_id = smem_items[tbl].fifo_base_id + cid;
-
-                       channel = qcom_smd_create_channel(edge, info_id, fifo_id, entry->name);
-                       if (IS_ERR(channel))
-                               continue;
-
-                       spin_lock_irqsave(&edge->channels_lock, flags);
-                       list_add(&channel->list, &edge->channels);
-                       spin_unlock_irqrestore(&edge->channels_lock, flags);
-
-                       dev_dbg(&edge->dev, "new channel found: '%s'\n", channel->name);
-                       set_bit(i, edge->allocated[tbl]);
-
-                       wake_up_interruptible(&edge->new_channel_event);
-               }
-       }
-
-       schedule_work(&edge->state_work);
-}
-
-/*
- * This per edge worker scans smem for any new channels and register these. It
- * then scans all registered channels for state changes that should be handled
- * by creating or destroying smd client devices for the registered channels.
- *
- * LOCKING: edge->channels_lock only needs to cover the list operations, as the
- * worker is killed before any channels are deallocated
- */
-static void qcom_channel_state_worker(struct work_struct *work)
-{
-       struct qcom_smd_channel *channel;
-       struct qcom_smd_edge *edge = container_of(work,
-                                                 struct qcom_smd_edge,
-                                                 state_work);
-       unsigned remote_state;
-       unsigned long flags;
-
-       /*
-        * Register a device for any closed channel where the remote processor
-        * is showing interest in opening the channel.
-        */
-       spin_lock_irqsave(&edge->channels_lock, flags);
-       list_for_each_entry(channel, &edge->channels, list) {
-               if (channel->state != SMD_CHANNEL_CLOSED)
-                       continue;
-
-               remote_state = GET_RX_CHANNEL_INFO(channel, state);
-               if (remote_state != SMD_CHANNEL_OPENING &&
-                   remote_state != SMD_CHANNEL_OPENED)
-                       continue;
-
-               spin_unlock_irqrestore(&edge->channels_lock, flags);
-               qcom_smd_create_device(channel);
-               spin_lock_irqsave(&edge->channels_lock, flags);
-       }
-
-       /*
-        * Unregister the device for any channel that is opened where the
-        * remote processor is closing the channel.
-        */
-       list_for_each_entry(channel, &edge->channels, list) {
-               if (channel->state != SMD_CHANNEL_OPENING &&
-                   channel->state != SMD_CHANNEL_OPENED)
-                       continue;
-
-               remote_state = GET_RX_CHANNEL_INFO(channel, state);
-               if (remote_state == SMD_CHANNEL_OPENING ||
-                   remote_state == SMD_CHANNEL_OPENED)
-                       continue;
-
-               spin_unlock_irqrestore(&edge->channels_lock, flags);
-               qcom_smd_destroy_device(channel);
-               spin_lock_irqsave(&edge->channels_lock, flags);
-       }
-       spin_unlock_irqrestore(&edge->channels_lock, flags);
-}
-
-/*
- * Parses an of_node describing an edge.
- */
-static int qcom_smd_parse_edge(struct device *dev,
-                              struct device_node *node,
-                              struct qcom_smd_edge *edge)
-{
-       struct device_node *syscon_np;
-       const char *key;
-       int irq;
-       int ret;
-
-       INIT_LIST_HEAD(&edge->channels);
-       spin_lock_init(&edge->channels_lock);
-
-       INIT_WORK(&edge->scan_work, qcom_channel_scan_worker);
-       INIT_WORK(&edge->state_work, qcom_channel_state_worker);
-
-       edge->of_node = of_node_get(node);
-
-       key = "qcom,smd-edge";
-       ret = of_property_read_u32(node, key, &edge->edge_id);
-       if (ret) {
-               dev_err(dev, "edge missing %s property\n", key);
-               return -EINVAL;
-       }
-
-       edge->remote_pid = QCOM_SMEM_HOST_ANY;
-       key = "qcom,remote-pid";
-       of_property_read_u32(node, key, &edge->remote_pid);
-
-       syscon_np = of_parse_phandle(node, "qcom,ipc", 0);
-       if (!syscon_np) {
-               dev_err(dev, "no qcom,ipc node\n");
-               return -ENODEV;
-       }
-
-       edge->ipc_regmap = syscon_node_to_regmap(syscon_np);
-       if (IS_ERR(edge->ipc_regmap))
-               return PTR_ERR(edge->ipc_regmap);
-
-       key = "qcom,ipc";
-       ret = of_property_read_u32_index(node, key, 1, &edge->ipc_offset);
-       if (ret < 0) {
-               dev_err(dev, "no offset in %s\n", key);
-               return -EINVAL;
-       }
-
-       ret = of_property_read_u32_index(node, key, 2, &edge->ipc_bit);
-       if (ret < 0) {
-               dev_err(dev, "no bit in %s\n", key);
-               return -EINVAL;
-       }
-
-       irq = irq_of_parse_and_map(node, 0);
-       if (irq < 0) {
-               dev_err(dev, "required smd interrupt missing\n");
-               return -EINVAL;
-       }
-
-       ret = devm_request_irq(dev, irq,
-                              qcom_smd_edge_intr, IRQF_TRIGGER_RISING,
-                              node->name, edge);
-       if (ret) {
-               dev_err(dev, "failed to request smd irq\n");
-               return ret;
-       }
-
-       edge->irq = irq;
-
-       return 0;
-}
-
-/*
- * Release function for an edge.
- * Reset the state of each associated channel and free the edge context.
- */
-static void qcom_smd_edge_release(struct device *dev)
-{
-       struct qcom_smd_channel *channel;
-       struct qcom_smd_edge *edge = to_smd_edge(dev);
-
-       list_for_each_entry(channel, &edge->channels, list) {
-               SET_RX_CHANNEL_INFO(channel, state, SMD_CHANNEL_CLOSED);
-               SET_RX_CHANNEL_INFO(channel, head, 0);
-               SET_RX_CHANNEL_INFO(channel, tail, 0);
-       }
-
-       kfree(edge);
-}
-
-/**
- * qcom_smd_register_edge() - register an edge based on an device_node
- * @parent:    parent device for the edge
- * @node:      device_node describing the edge
- *
- * Returns an edge reference, or negative ERR_PTR() on failure.
- */
-struct qcom_smd_edge *qcom_smd_register_edge(struct device *parent,
-                                            struct device_node *node)
-{
-       struct qcom_smd_edge *edge;
-       int ret;
-
-       edge = kzalloc(sizeof(*edge), GFP_KERNEL);
-       if (!edge)
-               return ERR_PTR(-ENOMEM);
-
-       init_waitqueue_head(&edge->new_channel_event);
-
-       edge->dev.parent = parent;
-       edge->dev.release = qcom_smd_edge_release;
-       dev_set_name(&edge->dev, "%s:%s", dev_name(parent), node->name);
-       ret = device_register(&edge->dev);
-       if (ret) {
-               pr_err("failed to register smd edge\n");
-               return ERR_PTR(ret);
-       }
-
-       ret = qcom_smd_parse_edge(&edge->dev, node, edge);
-       if (ret) {
-               dev_err(&edge->dev, "failed to parse smd edge\n");
-               goto unregister_dev;
-       }
-
-       schedule_work(&edge->scan_work);
-
-       return edge;
-
-unregister_dev:
-       put_device(&edge->dev);
-       return ERR_PTR(ret);
-}
-EXPORT_SYMBOL(qcom_smd_register_edge);
-
-static int qcom_smd_remove_device(struct device *dev, void *data)
-{
-       device_unregister(dev);
-       of_node_put(dev->of_node);
-       put_device(dev);
-
-       return 0;
-}
-
-/**
- * qcom_smd_unregister_edge() - release an edge and its children
- * @edge:      edge reference acquired from qcom_smd_register_edge
- */
-int qcom_smd_unregister_edge(struct qcom_smd_edge *edge)
-{
-       int ret;
-
-       disable_irq(edge->irq);
-       cancel_work_sync(&edge->scan_work);
-       cancel_work_sync(&edge->state_work);
-
-       ret = device_for_each_child(&edge->dev, NULL, qcom_smd_remove_device);
-       if (ret)
-               dev_warn(&edge->dev, "can't remove smd device: %d\n", ret);
-
-       device_unregister(&edge->dev);
-
-       return 0;
-}
-EXPORT_SYMBOL(qcom_smd_unregister_edge);
-
-static int qcom_smd_probe(struct platform_device *pdev)
-{
-       struct device_node *node;
-       void *p;
-
-       /* Wait for smem */
-       p = qcom_smem_get(QCOM_SMEM_HOST_ANY, smem_items[0].alloc_tbl_id, NULL);
-       if (PTR_ERR(p) == -EPROBE_DEFER)
-               return PTR_ERR(p);
-
-       for_each_available_child_of_node(pdev->dev.of_node, node)
-               qcom_smd_register_edge(&pdev->dev, node);
-
-       return 0;
-}
-
-static int qcom_smd_remove_edge(struct device *dev, void *data)
-{
-       struct qcom_smd_edge *edge = to_smd_edge(dev);
-
-       return qcom_smd_unregister_edge(edge);
-}
-
-/*
- * Shut down all smd clients by making sure that each edge stops processing
- * events and scanning for new channels, then call destroy on the devices.
- */
-static int qcom_smd_remove(struct platform_device *pdev)
-{
-       int ret;
-
-       ret = device_for_each_child(&pdev->dev, NULL, qcom_smd_remove_edge);
-       if (ret)
-               dev_warn(&pdev->dev, "can't remove smd device: %d\n", ret);
-
-       return ret;
-}
-
-static const struct of_device_id qcom_smd_of_match[] = {
-       { .compatible = "qcom,smd" },
-       {}
-};
-MODULE_DEVICE_TABLE(of, qcom_smd_of_match);
-
-static struct platform_driver qcom_smd_driver = {
-       .probe = qcom_smd_probe,
-       .remove = qcom_smd_remove,
-       .driver = {
-               .name = "qcom-smd",
-               .of_match_table = qcom_smd_of_match,
-       },
-};
-
-static int __init qcom_smd_init(void)
-{
-       int ret;
-
-       ret = bus_register(&qcom_smd_bus);
-       if (ret) {
-               pr_err("failed to register smd bus: %d\n", ret);
-               return ret;
-       }
-
-       return platform_driver_register(&qcom_smd_driver);
-}
-postcore_initcall(qcom_smd_init);
-
-static void __exit qcom_smd_exit(void)
-{
-       platform_driver_unregister(&qcom_smd_driver);
-       bus_unregister(&qcom_smd_bus);
-}
-module_exit(qcom_smd_exit);
-
-MODULE_AUTHOR("Bjorn Andersson <bjorn.andersson@sonymobile.com>");
-MODULE_DESCRIPTION("Qualcomm Shared Memory Driver");
-MODULE_LICENSE("GPL v2");
index 520aedd29965498295b3992e4c8c76d2ea6b8c04..b9069184df193f34aa6d9c8bfa4f6502d6764574 100644 (file)
 #include <linux/firmware.h>
 #include <linux/module.h>
 #include <linux/slab.h>
-#include <linux/soc/qcom/smd.h>
 #include <linux/io.h>
 #include <linux/of_platform.h>
 #include <linux/platform_device.h>
+#include <linux/rpmsg.h>
 #include <linux/soc/qcom/wcnss_ctrl.h>
 
 #define WCNSS_REQUEST_TIMEOUT  (5 * HZ)
@@ -40,7 +40,7 @@
  */
 struct wcnss_ctrl {
        struct device *dev;
-       struct qcom_smd_channel *channel;
+       struct rpmsg_endpoint *channel;
 
        struct completion ack;
        struct completion cbc;
@@ -122,11 +122,13 @@ struct wcnss_download_nv_resp {
  *
  * Handles any incoming packets from the remote WCNSS_CTRL service.
  */
-static int wcnss_ctrl_smd_callback(struct qcom_smd_channel *channel,
-                                  const void *data,
-                                  size_t count)
+static int wcnss_ctrl_smd_callback(struct rpmsg_device *rpdev,
+                                  void *data,
+                                  int count,
+                                  void *priv,
+                                  u32 addr)
 {
-       struct wcnss_ctrl *wcnss = qcom_smd_get_drvdata(channel);
+       struct wcnss_ctrl *wcnss = dev_get_drvdata(&rpdev->dev);
        const struct wcnss_download_nv_resp *nvresp;
        const struct wcnss_version_resp *version;
        const struct wcnss_msg_hdr *hdr = data;
@@ -180,7 +182,7 @@ static int wcnss_request_version(struct wcnss_ctrl *wcnss)
 
        msg.type = WCNSS_VERSION_REQ;
        msg.len = sizeof(msg);
-       ret = qcom_smd_send(wcnss->channel, &msg, sizeof(msg));
+       ret = rpmsg_send(wcnss->channel, &msg, sizeof(msg));
        if (ret < 0)
                return ret;
 
@@ -238,7 +240,7 @@ static int wcnss_download_nv(struct wcnss_ctrl *wcnss, bool *expect_cbc)
 
                memcpy(req->fragment, data, req->frag_size);
 
-               ret = qcom_smd_send(wcnss->channel, req, req->hdr.len);
+               ret = rpmsg_send(wcnss->channel, req, req->hdr.len);
                if (ret < 0) {
                        dev_err(wcnss->dev, "failed to send smd packet\n");
                        goto release_fw;
@@ -274,11 +276,16 @@ free_req:
  * @name:      SMD channel name
  * @cb:                callback to handle incoming data on the channel
  */
-struct qcom_smd_channel *qcom_wcnss_open_channel(void *wcnss, const char *name, qcom_smd_cb_t cb)
+struct rpmsg_endpoint *qcom_wcnss_open_channel(void *wcnss, const char *name, rpmsg_rx_cb_t cb, void *priv)
 {
+       struct rpmsg_channel_info chinfo;
        struct wcnss_ctrl *_wcnss = wcnss;
 
-       return qcom_smd_open_channel(_wcnss->channel, name, cb);
+       strncpy(chinfo.name, name, sizeof(chinfo.name));
+       chinfo.src = RPMSG_ADDR_ANY;
+       chinfo.dst = RPMSG_ADDR_ANY;
+
+       return rpmsg_create_ept(_wcnss->channel->rpdev, cb, priv, chinfo);
 }
 EXPORT_SYMBOL(qcom_wcnss_open_channel);
 
@@ -306,35 +313,34 @@ static void wcnss_async_probe(struct work_struct *work)
        of_platform_populate(wcnss->dev->of_node, NULL, NULL, wcnss->dev);
 }
 
-static int wcnss_ctrl_probe(struct qcom_smd_device *sdev)
+static int wcnss_ctrl_probe(struct rpmsg_device *rpdev)
 {
        struct wcnss_ctrl *wcnss;
 
-       wcnss = devm_kzalloc(&sdev->dev, sizeof(*wcnss), GFP_KERNEL);
+       wcnss = devm_kzalloc(&rpdev->dev, sizeof(*wcnss), GFP_KERNEL);
        if (!wcnss)
                return -ENOMEM;
 
-       wcnss->dev = &sdev->dev;
-       wcnss->channel = sdev->channel;
+       wcnss->dev = &rpdev->dev;
+       wcnss->channel = rpdev->ept;
 
        init_completion(&wcnss->ack);
        init_completion(&wcnss->cbc);
        INIT_WORK(&wcnss->probe_work, wcnss_async_probe);
 
-       qcom_smd_set_drvdata(sdev->channel, wcnss);
-       dev_set_drvdata(&sdev->dev, wcnss);
+       dev_set_drvdata(&rpdev->dev, wcnss);
 
        schedule_work(&wcnss->probe_work);
 
        return 0;
 }
 
-static void wcnss_ctrl_remove(struct qcom_smd_device *sdev)
+static void wcnss_ctrl_remove(struct rpmsg_device *rpdev)
 {
-       struct wcnss_ctrl *wcnss = qcom_smd_get_drvdata(sdev->channel);
+       struct wcnss_ctrl *wcnss = dev_get_drvdata(&rpdev->dev);
 
        cancel_work_sync(&wcnss->probe_work);
-       of_platform_depopulate(&sdev->dev);
+       of_platform_depopulate(&rpdev->dev);
 }
 
 static const struct of_device_id wcnss_ctrl_of_match[] = {
@@ -342,18 +348,18 @@ static const struct of_device_id wcnss_ctrl_of_match[] = {
        {}
 };
 
-static struct qcom_smd_driver wcnss_ctrl_driver = {
+static struct rpmsg_driver wcnss_ctrl_driver = {
        .probe = wcnss_ctrl_probe,
        .remove = wcnss_ctrl_remove,
        .callback = wcnss_ctrl_smd_callback,
-       .driver  = {
+       .drv  = {
                .name  = "qcom_wcnss_ctrl",
                .owner = THIS_MODULE,
                .of_match_table = wcnss_ctrl_of_match,
        },
 };
 
-module_qcom_smd_driver(wcnss_ctrl_driver);
+module_rpmsg_driver(wcnss_ctrl_driver);
 
 MODULE_DESCRIPTION("Qualcomm WCNSS control driver");
 MODULE_LICENSE("GPL v2");
index f5e330099bfca713f4cb12bd2dc77826fdad1b3b..fd7c16a7ca6e06ad53e6d6df54ab739550ae4a4a 100644 (file)
@@ -43,7 +43,7 @@
 #include "target_core_ua.h"
 
 static sense_reason_t core_alua_check_transition(int state, int valid,
-                                                int *primary);
+                                                int *primary, int explicit);
 static int core_alua_set_tg_pt_secondary_state(
                struct se_lun *lun, int explicit, int offline);
 
@@ -335,8 +335,8 @@ target_emulate_set_target_port_groups(struct se_cmd *cmd)
                 * the state is a primary or secondary target port asymmetric
                 * access state.
                 */
-               rc = core_alua_check_transition(alua_access_state,
-                                               valid_states, &primary);
+               rc = core_alua_check_transition(alua_access_state, valid_states,
+                                               &primary, 1);
                if (rc) {
                        /*
                         * If the SET TARGET PORT GROUPS attempts to establish
@@ -691,7 +691,7 @@ target_alua_state_check(struct se_cmd *cmd)
 
        if (dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)
                return 0;
-       if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH)
+       if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_ALUA)
                return 0;
 
        /*
@@ -762,7 +762,7 @@ target_alua_state_check(struct se_cmd *cmd)
  * Check implicit and explicit ALUA state change request.
  */
 static sense_reason_t
-core_alua_check_transition(int state, int valid, int *primary)
+core_alua_check_transition(int state, int valid, int *primary, int explicit)
 {
        /*
         * OPTIMIZED, NON-OPTIMIZED, STANDBY and UNAVAILABLE are
@@ -804,11 +804,14 @@ core_alua_check_transition(int state, int valid, int *primary)
                *primary = 0;
                break;
        case ALUA_ACCESS_STATE_TRANSITION:
-               /*
-                * Transitioning is set internally, and
-                * cannot be selected manually.
-                */
-               goto not_supported;
+               if (!(valid & ALUA_T_SUP) || explicit)
+                       /*
+                        * Transitioning is set internally and by tcmu daemon,
+                        * and cannot be selected through a STPG.
+                        */
+                       goto not_supported;
+               *primary = 0;
+               break;
        default:
                pr_err("Unknown ALUA access state: 0x%02x\n", state);
                return TCM_INVALID_PARAMETER_LIST;
@@ -1013,7 +1016,7 @@ static void core_alua_queue_state_change_ua(struct t10_alua_tg_pt_gp *tg_pt_gp)
 static void core_alua_do_transition_tg_pt_work(struct work_struct *work)
 {
        struct t10_alua_tg_pt_gp *tg_pt_gp = container_of(work,
-               struct t10_alua_tg_pt_gp, tg_pt_gp_transition_work.work);
+               struct t10_alua_tg_pt_gp, tg_pt_gp_transition_work);
        struct se_device *dev = tg_pt_gp->tg_pt_gp_dev;
        bool explicit = (tg_pt_gp->tg_pt_gp_alua_access_status ==
                         ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG);
@@ -1070,32 +1073,19 @@ static int core_alua_do_transition_tg_pt(
        if (atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state) == new_state)
                return 0;
 
-       if (new_state == ALUA_ACCESS_STATE_TRANSITION)
+       if (explicit && new_state == ALUA_ACCESS_STATE_TRANSITION)
                return -EAGAIN;
 
        /*
         * Flush any pending transitions
         */
-       if (!explicit && tg_pt_gp->tg_pt_gp_implicit_trans_secs &&
-           atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state) ==
-           ALUA_ACCESS_STATE_TRANSITION) {
-               /* Just in case */
-               tg_pt_gp->tg_pt_gp_alua_pending_state = new_state;
-               tg_pt_gp->tg_pt_gp_transition_complete = &wait;
-               flush_delayed_work(&tg_pt_gp->tg_pt_gp_transition_work);
-               wait_for_completion(&wait);
-               tg_pt_gp->tg_pt_gp_transition_complete = NULL;
-               return 0;
-       }
+       if (!explicit)
+               flush_work(&tg_pt_gp->tg_pt_gp_transition_work);
 
        /*
         * Save the old primary ALUA access state, and set the current state
         * to ALUA_ACCESS_STATE_TRANSITION.
         */
-       tg_pt_gp->tg_pt_gp_alua_previous_state =
-               atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state);
-       tg_pt_gp->tg_pt_gp_alua_pending_state = new_state;
-
        atomic_set(&tg_pt_gp->tg_pt_gp_alua_access_state,
                        ALUA_ACCESS_STATE_TRANSITION);
        tg_pt_gp->tg_pt_gp_alua_access_status = (explicit) ?
@@ -1104,6 +1094,13 @@ static int core_alua_do_transition_tg_pt(
 
        core_alua_queue_state_change_ua(tg_pt_gp);
 
+       if (new_state == ALUA_ACCESS_STATE_TRANSITION)
+               return 0;
+
+       tg_pt_gp->tg_pt_gp_alua_previous_state =
+               atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state);
+       tg_pt_gp->tg_pt_gp_alua_pending_state = new_state;
+
        /*
         * Check for the optional ALUA primary state transition delay
         */
@@ -1117,17 +1114,9 @@ static int core_alua_do_transition_tg_pt(
        atomic_inc(&tg_pt_gp->tg_pt_gp_ref_cnt);
        spin_unlock(&dev->t10_alua.tg_pt_gps_lock);
 
-       if (!explicit && tg_pt_gp->tg_pt_gp_implicit_trans_secs) {
-               unsigned long transition_tmo;
-
-               transition_tmo = tg_pt_gp->tg_pt_gp_implicit_trans_secs * HZ;
-               queue_delayed_work(tg_pt_gp->tg_pt_gp_dev->tmr_wq,
-                                  &tg_pt_gp->tg_pt_gp_transition_work,
-                                  transition_tmo);
-       } else {
+       schedule_work(&tg_pt_gp->tg_pt_gp_transition_work);
+       if (explicit) {
                tg_pt_gp->tg_pt_gp_transition_complete = &wait;
-               queue_delayed_work(tg_pt_gp->tg_pt_gp_dev->tmr_wq,
-                                  &tg_pt_gp->tg_pt_gp_transition_work, 0);
                wait_for_completion(&wait);
                tg_pt_gp->tg_pt_gp_transition_complete = NULL;
        }
@@ -1149,8 +1138,12 @@ int core_alua_do_port_transition(
        struct t10_alua_tg_pt_gp *tg_pt_gp;
        int primary, valid_states, rc = 0;
 
+       if (l_dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_ALUA)
+               return -ENODEV;
+
        valid_states = l_tg_pt_gp->tg_pt_gp_alua_supported_states;
-       if (core_alua_check_transition(new_state, valid_states, &primary) != 0)
+       if (core_alua_check_transition(new_state, valid_states, &primary,
+                                      explicit) != 0)
                return -EINVAL;
 
        local_lu_gp_mem = l_dev->dev_alua_lu_gp_mem;
@@ -1695,8 +1688,8 @@ struct t10_alua_tg_pt_gp *core_alua_allocate_tg_pt_gp(struct se_device *dev,
        mutex_init(&tg_pt_gp->tg_pt_gp_md_mutex);
        spin_lock_init(&tg_pt_gp->tg_pt_gp_lock);
        atomic_set(&tg_pt_gp->tg_pt_gp_ref_cnt, 0);
-       INIT_DELAYED_WORK(&tg_pt_gp->tg_pt_gp_transition_work,
-                         core_alua_do_transition_tg_pt_work);
+       INIT_WORK(&tg_pt_gp->tg_pt_gp_transition_work,
+                 core_alua_do_transition_tg_pt_work);
        tg_pt_gp->tg_pt_gp_dev = dev;
        atomic_set(&tg_pt_gp->tg_pt_gp_alua_access_state,
                ALUA_ACCESS_STATE_ACTIVE_OPTIMIZED);
@@ -1804,7 +1797,7 @@ void core_alua_free_tg_pt_gp(
        dev->t10_alua.alua_tg_pt_gps_counter--;
        spin_unlock(&dev->t10_alua.tg_pt_gps_lock);
 
-       flush_delayed_work(&tg_pt_gp->tg_pt_gp_transition_work);
+       flush_work(&tg_pt_gp->tg_pt_gp_transition_work);
 
        /*
         * Allow a struct t10_alua_tg_pt_gp_member * referenced by
@@ -1973,7 +1966,7 @@ ssize_t core_alua_store_tg_pt_gp_info(
        unsigned char buf[TG_PT_GROUP_NAME_BUF];
        int move = 0;
 
-       if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH ||
+       if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_ALUA ||
            (dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE))
                return -ENODEV;
 
@@ -2230,7 +2223,7 @@ ssize_t core_alua_store_offline_bit(
        unsigned long tmp;
        int ret;
 
-       if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH ||
+       if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_ALUA ||
            (dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE))
                return -ENODEV;
 
@@ -2316,7 +2309,8 @@ ssize_t core_alua_store_secondary_write_metadata(
 
 int core_setup_alua(struct se_device *dev)
 {
-       if (!(dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) &&
+       if (!(dev->transport->transport_flags &
+            TRANSPORT_FLAG_PASSTHROUGH_ALUA) &&
            !(dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)) {
                struct t10_alua_lu_gp_member *lu_gp_mem;
 
index 54b36c9835be3ae2127cb1f447321eba73b824ac..38b5025e4c7a877f9e5c0bcfa6995262b6330e32 100644 (file)
@@ -421,6 +421,10 @@ static int target_fabric_tf_ops_check(const struct target_core_fabric_ops *tfo)
                pr_err("Missing tfo->aborted_task()\n");
                return -EINVAL;
        }
+       if (!tfo->check_stop_free) {
+               pr_err("Missing tfo->check_stop_free()\n");
+               return -EINVAL;
+       }
        /*
         * We at least require tfo->fabric_make_wwn(), tfo->fabric_drop_wwn()
         * tfo->fabric_make_tpg() and tfo->fabric_drop_tpg() in
index a8f8e53f2f574852de573a08a86ad1c25b4cf332..94cda7991e80abbffb32941c8d8f5cfcbd262e3f 100644 (file)
@@ -154,7 +154,7 @@ static void pscsi_tape_read_blocksize(struct se_device *dev,
 
        buf = kzalloc(12, GFP_KERNEL);
        if (!buf)
-               return;
+               goto out_free;
 
        memset(cdb, 0, MAX_COMMAND_SIZE);
        cdb[0] = MODE_SENSE;
@@ -169,9 +169,10 @@ static void pscsi_tape_read_blocksize(struct se_device *dev,
         * If MODE_SENSE still returns zero, set the default value to 1024.
         */
        sdev->sector_size = (buf[9] << 16) | (buf[10] << 8) | (buf[11]);
+out_free:
        if (!sdev->sector_size)
                sdev->sector_size = 1024;
-out_free:
+
        kfree(buf);
 }
 
@@ -314,9 +315,10 @@ static int pscsi_add_device_to_list(struct se_device *dev,
                                sd->lun, sd->queue_depth);
        }
 
-       dev->dev_attrib.hw_block_size = sd->sector_size;
+       dev->dev_attrib.hw_block_size =
+               min_not_zero((int)sd->sector_size, 512);
        dev->dev_attrib.hw_max_sectors =
-               min_t(int, sd->host->max_sectors, queue_max_hw_sectors(q));
+               min_not_zero(sd->host->max_sectors, queue_max_hw_sectors(q));
        dev->dev_attrib.hw_queue_depth = sd->queue_depth;
 
        /*
@@ -339,8 +341,10 @@ static int pscsi_add_device_to_list(struct se_device *dev,
        /*
         * For TYPE_TAPE, attempt to determine blocksize with MODE_SENSE.
         */
-       if (sd->type == TYPE_TAPE)
+       if (sd->type == TYPE_TAPE) {
                pscsi_tape_read_blocksize(dev, sd);
+               dev->dev_attrib.hw_block_size = sd->sector_size;
+       }
        return 0;
 }
 
@@ -406,7 +410,7 @@ static int pscsi_create_type_disk(struct se_device *dev, struct scsi_device *sd)
 /*
  * Called with struct Scsi_Host->host_lock called.
  */
-static int pscsi_create_type_rom(struct se_device *dev, struct scsi_device *sd)
+static int pscsi_create_type_nondisk(struct se_device *dev, struct scsi_device *sd)
        __releases(sh->host_lock)
 {
        struct pscsi_hba_virt *phv = dev->se_hba->hba_ptr;
@@ -433,28 +437,6 @@ static int pscsi_create_type_rom(struct se_device *dev, struct scsi_device *sd)
        return 0;
 }
 
-/*
- * Called with struct Scsi_Host->host_lock called.
- */
-static int pscsi_create_type_other(struct se_device *dev,
-               struct scsi_device *sd)
-       __releases(sh->host_lock)
-{
-       struct pscsi_hba_virt *phv = dev->se_hba->hba_ptr;
-       struct Scsi_Host *sh = sd->host;
-       int ret;
-
-       spin_unlock_irq(sh->host_lock);
-       ret = pscsi_add_device_to_list(dev, sd);
-       if (ret)
-               return ret;
-
-       pr_debug("CORE_PSCSI[%d] - Added Type: %s for %d:%d:%d:%llu\n",
-               phv->phv_host_id, scsi_device_type(sd->type), sh->host_no,
-               sd->channel, sd->id, sd->lun);
-       return 0;
-}
-
 static int pscsi_configure_device(struct se_device *dev)
 {
        struct se_hba *hba = dev->se_hba;
@@ -542,11 +524,8 @@ static int pscsi_configure_device(struct se_device *dev)
                case TYPE_DISK:
                        ret = pscsi_create_type_disk(dev, sd);
                        break;
-               case TYPE_ROM:
-                       ret = pscsi_create_type_rom(dev, sd);
-                       break;
                default:
-                       ret = pscsi_create_type_other(dev, sd);
+                       ret = pscsi_create_type_nondisk(dev, sd);
                        break;
                }
 
@@ -611,8 +590,7 @@ static void pscsi_free_device(struct se_device *dev)
                else if (pdv->pdv_lld_host)
                        scsi_host_put(pdv->pdv_lld_host);
 
-               if ((sd->type == TYPE_DISK) || (sd->type == TYPE_ROM))
-                       scsi_device_put(sd);
+               scsi_device_put(sd);
 
                pdv->pdv_sd = NULL;
        }
@@ -1064,7 +1042,6 @@ static sector_t pscsi_get_blocks(struct se_device *dev)
        if (pdv->pdv_bd && pdv->pdv_bd->bd_part)
                return pdv->pdv_bd->bd_part->nr_sects;
 
-       dump_stack();
        return 0;
 }
 
@@ -1103,7 +1080,8 @@ static void pscsi_req_done(struct request *req, int uptodate)
 static const struct target_backend_ops pscsi_ops = {
        .name                   = "pscsi",
        .owner                  = THIS_MODULE,
-       .transport_flags        = TRANSPORT_FLAG_PASSTHROUGH,
+       .transport_flags        = TRANSPORT_FLAG_PASSTHROUGH |
+                                 TRANSPORT_FLAG_PASSTHROUGH_ALUA,
        .attach_hba             = pscsi_attach_hba,
        .detach_hba             = pscsi_detach_hba,
        .pmode_enable_hba       = pscsi_pmode_enable_hba,
index 68d8aef7ab78d4084b57e6fd0fa0b0afce7251df..c194063f169b13ce44bf014894960693530e25d7 100644 (file)
@@ -1105,9 +1105,15 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
                        return ret;
                break;
        case VERIFY:
+       case VERIFY_16:
                size = 0;
-               sectors = transport_get_sectors_10(cdb);
-               cmd->t_task_lba = transport_lba_32(cdb);
+               if (cdb[0] == VERIFY) {
+                       sectors = transport_get_sectors_10(cdb);
+                       cmd->t_task_lba = transport_lba_32(cdb);
+               } else {
+                       sectors = transport_get_sectors_16(cdb);
+                       cmd->t_task_lba = transport_lba_64(cdb);
+               }
                cmd->execute_cmd = sbc_emulate_noop;
                goto check_lba;
        case REZERO_UNIT:
index c0dbfa0165750523e552b93fdbb0c64c94cdab2d..6fb191914f458f7889508652e19b860355387491 100644 (file)
@@ -602,7 +602,8 @@ int core_tpg_add_lun(
        if (ret)
                goto out_kill_ref;
 
-       if (!(dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) &&
+       if (!(dev->transport->transport_flags &
+            TRANSPORT_FLAG_PASSTHROUGH_ALUA) &&
            !(dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE))
                target_attach_tg_pt_gp(lun, dev->t10_alua.default_tg_pt_gp);
 
index 434d9d693989179f72abca120e01155d664d0c87..b1a3cdb29468cf84e7eb48d6c8c41934c0b5b4cb 100644 (file)
@@ -636,8 +636,7 @@ static int transport_cmd_check_stop_to_fabric(struct se_cmd *cmd)
         * Fabric modules are expected to return '1' here if the se_cmd being
         * passed is released at this point, or zero if not being released.
         */
-       return cmd->se_tfo->check_stop_free ? cmd->se_tfo->check_stop_free(cmd)
-               : 0;
+       return cmd->se_tfo->check_stop_free(cmd);
 }
 
 static void transport_lun_remove_cmd(struct se_cmd *cmd)
index c3adefe95e50f7f7054e272e15fc5e37663d11c9..c6874c38a10bc45e86beae58ddfed175664d51cf 100644 (file)
@@ -28,6 +28,7 @@
 #include <linux/stringify.h>
 #include <linux/bitops.h>
 #include <linux/highmem.h>
+#include <linux/configfs.h>
 #include <net/genetlink.h>
 #include <scsi/scsi_common.h>
 #include <scsi/scsi_proto.h>
@@ -112,6 +113,7 @@ struct tcmu_dev {
        spinlock_t commands_lock;
 
        struct timer_list timeout;
+       unsigned int cmd_time_out;
 
        char dev_config[TCMU_CONFIG_LEN];
 };
@@ -172,7 +174,9 @@ static struct tcmu_cmd *tcmu_alloc_cmd(struct se_cmd *se_cmd)
 
        tcmu_cmd->se_cmd = se_cmd;
        tcmu_cmd->tcmu_dev = udev;
-       tcmu_cmd->deadline = jiffies + msecs_to_jiffies(TCMU_TIME_OUT);
+       if (udev->cmd_time_out)
+               tcmu_cmd->deadline = jiffies +
+                                       msecs_to_jiffies(udev->cmd_time_out);
 
        idr_preload(GFP_KERNEL);
        spin_lock_irq(&udev->commands_lock);
@@ -451,7 +455,11 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 
                pr_debug("sleeping for ring space\n");
                spin_unlock_irq(&udev->cmdr_lock);
-               ret = schedule_timeout(msecs_to_jiffies(TCMU_TIME_OUT));
+               if (udev->cmd_time_out)
+                       ret = schedule_timeout(
+                                       msecs_to_jiffies(udev->cmd_time_out));
+               else
+                       ret = schedule_timeout(msecs_to_jiffies(TCMU_TIME_OUT));
                finish_wait(&udev->wait_cmdr, &__wait);
                if (!ret) {
                        pr_warn("tcmu: command timed out\n");
@@ -526,8 +534,9 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
        /* TODO: only if FLUSH and FUA? */
        uio_event_notify(&udev->uio_info);
 
-       mod_timer(&udev->timeout,
-               round_jiffies_up(jiffies + msecs_to_jiffies(TCMU_TIME_OUT)));
+       if (udev->cmd_time_out)
+               mod_timer(&udev->timeout, round_jiffies_up(jiffies +
+                         msecs_to_jiffies(udev->cmd_time_out)));
 
        return TCM_NO_SENSE;
 }
@@ -742,6 +751,7 @@ static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name)
        }
 
        udev->hba = hba;
+       udev->cmd_time_out = TCMU_TIME_OUT;
 
        init_waitqueue_head(&udev->wait_cmdr);
        spin_lock_init(&udev->cmdr_lock);
@@ -960,7 +970,8 @@ static int tcmu_configure_device(struct se_device *dev)
        if (dev->dev_attrib.hw_block_size == 0)
                dev->dev_attrib.hw_block_size = 512;
        /* Other attributes can be configured in userspace */
-       dev->dev_attrib.hw_max_sectors = 128;
+       if (!dev->dev_attrib.hw_max_sectors)
+               dev->dev_attrib.hw_max_sectors = 128;
        dev->dev_attrib.hw_queue_depth = 128;
 
        ret = tcmu_netlink_event(TCMU_CMD_ADDED_DEVICE, udev->uio_info.name,
@@ -997,6 +1008,11 @@ static void tcmu_dev_call_rcu(struct rcu_head *p)
        kfree(udev);
 }
 
+static bool tcmu_dev_configured(struct tcmu_dev *udev)
+{
+       return udev->uio_info.uio_dev ? true : false;
+}
+
 static void tcmu_free_device(struct se_device *dev)
 {
        struct tcmu_dev *udev = TCMU_DEV(dev);
@@ -1018,8 +1034,7 @@ static void tcmu_free_device(struct se_device *dev)
        spin_unlock_irq(&udev->commands_lock);
        WARN_ON(!all_expired);
 
-       /* Device was configured */
-       if (udev->uio_info.uio_dev) {
+       if (tcmu_dev_configured(udev)) {
                tcmu_netlink_event(TCMU_CMD_REMOVED_DEVICE, udev->uio_info.name,
                                   udev->uio_info.uio_dev->minor);
 
@@ -1031,16 +1046,42 @@ static void tcmu_free_device(struct se_device *dev)
 }
 
 enum {
-       Opt_dev_config, Opt_dev_size, Opt_hw_block_size, Opt_err,
+       Opt_dev_config, Opt_dev_size, Opt_hw_block_size, Opt_hw_max_sectors,
+       Opt_err,
 };
 
 static match_table_t tokens = {
        {Opt_dev_config, "dev_config=%s"},
        {Opt_dev_size, "dev_size=%u"},
        {Opt_hw_block_size, "hw_block_size=%u"},
+       {Opt_hw_max_sectors, "hw_max_sectors=%u"},
        {Opt_err, NULL}
 };
 
+static int tcmu_set_dev_attrib(substring_t *arg, u32 *dev_attrib)
+{
+       unsigned long tmp_ul;
+       char *arg_p;
+       int ret;
+
+       arg_p = match_strdup(arg);
+       if (!arg_p)
+               return -ENOMEM;
+
+       ret = kstrtoul(arg_p, 0, &tmp_ul);
+       kfree(arg_p);
+       if (ret < 0) {
+               pr_err("kstrtoul() failed for dev attrib\n");
+               return ret;
+       }
+       if (!tmp_ul) {
+               pr_err("dev attrib must be nonzero\n");
+               return -EINVAL;
+       }
+       *dev_attrib = tmp_ul;
+       return 0;
+}
+
 static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev,
                const char *page, ssize_t count)
 {
@@ -1048,7 +1089,6 @@ static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev,
        char *orig, *ptr, *opts, *arg_p;
        substring_t args[MAX_OPT_ARGS];
        int ret = 0, token;
-       unsigned long tmp_ul;
 
        opts = kstrdup(page, GFP_KERNEL);
        if (!opts)
@@ -1082,26 +1122,19 @@ static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev,
                                pr_err("kstrtoul() failed for dev_size=\n");
                        break;
                case Opt_hw_block_size:
-                       arg_p = match_strdup(&args[0]);
-                       if (!arg_p) {
-                               ret = -ENOMEM;
-                               break;
-                       }
-                       ret = kstrtoul(arg_p, 0, &tmp_ul);
-                       kfree(arg_p);
-                       if (ret < 0) {
-                               pr_err("kstrtoul() failed for hw_block_size=\n");
-                               break;
-                       }
-                       if (!tmp_ul) {
-                               pr_err("hw_block_size must be nonzero\n");
-                               break;
-                       }
-                       dev->dev_attrib.hw_block_size = tmp_ul;
+                       ret = tcmu_set_dev_attrib(&args[0],
+                                       &(dev->dev_attrib.hw_block_size));
+                       break;
+               case Opt_hw_max_sectors:
+                       ret = tcmu_set_dev_attrib(&args[0],
+                                       &(dev->dev_attrib.hw_max_sectors));
                        break;
                default:
                        break;
                }
+
+               if (ret)
+                       break;
        }
 
        kfree(orig);
@@ -1134,7 +1167,48 @@ tcmu_parse_cdb(struct se_cmd *cmd)
        return passthrough_parse_cdb(cmd, tcmu_queue_cmd);
 }
 
-static const struct target_backend_ops tcmu_ops = {
+static ssize_t tcmu_cmd_time_out_show(struct config_item *item, char *page)
+{
+       struct se_dev_attrib *da = container_of(to_config_group(item),
+                                       struct se_dev_attrib, da_group);
+       struct tcmu_dev *udev = container_of(da->da_dev,
+                                       struct tcmu_dev, se_dev);
+
+       return snprintf(page, PAGE_SIZE, "%lu\n", udev->cmd_time_out / MSEC_PER_SEC);
+}
+
+static ssize_t tcmu_cmd_time_out_store(struct config_item *item, const char *page,
+                                      size_t count)
+{
+       struct se_dev_attrib *da = container_of(to_config_group(item),
+                                       struct se_dev_attrib, da_group);
+       struct tcmu_dev *udev = container_of(da->da_dev,
+                                       struct tcmu_dev, se_dev);
+       u32 val;
+       int ret;
+
+       if (da->da_dev->export_count) {
+               pr_err("Unable to set tcmu cmd_time_out while exports exist\n");
+               return -EINVAL;
+       }
+
+       ret = kstrtou32(page, 0, &val);
+       if (ret < 0)
+               return ret;
+
+       if (!val) {
+               pr_err("Illegal value for cmd_time_out\n");
+               return -EINVAL;
+       }
+
+       udev->cmd_time_out = val * MSEC_PER_SEC;
+       return count;
+}
+CONFIGFS_ATTR(tcmu_, cmd_time_out);
+
+static struct configfs_attribute **tcmu_attrs;
+
+static struct target_backend_ops tcmu_ops = {
        .name                   = "user",
        .owner                  = THIS_MODULE,
        .transport_flags        = TRANSPORT_FLAG_PASSTHROUGH,
@@ -1148,12 +1222,12 @@ static const struct target_backend_ops tcmu_ops = {
        .show_configfs_dev_params = tcmu_show_configfs_dev_params,
        .get_device_type        = sbc_get_device_type,
        .get_blocks             = tcmu_get_blocks,
-       .tb_dev_attrib_attrs    = passthrough_attrib_attrs,
+       .tb_dev_attrib_attrs    = NULL,
 };
 
 static int __init tcmu_module_init(void)
 {
-       int ret;
+       int ret, i, len = 0;
 
        BUILD_BUG_ON((sizeof(struct tcmu_cmd_entry) % TCMU_OP_ALIGN_SIZE) != 0);
 
@@ -1175,12 +1249,31 @@ static int __init tcmu_module_init(void)
                goto out_unreg_device;
        }
 
+       for (i = 0; passthrough_attrib_attrs[i] != NULL; i++) {
+               len += sizeof(struct configfs_attribute *);
+       }
+       len += sizeof(struct configfs_attribute *) * 2;
+
+       tcmu_attrs = kzalloc(len, GFP_KERNEL);
+       if (!tcmu_attrs) {
+               ret = -ENOMEM;
+               goto out_unreg_genl;
+       }
+
+       for (i = 0; passthrough_attrib_attrs[i] != NULL; i++) {
+               tcmu_attrs[i] = passthrough_attrib_attrs[i];
+       }
+       tcmu_attrs[i] = &tcmu_attr_cmd_time_out;
+       tcmu_ops.tb_dev_attrib_attrs = tcmu_attrs;
+
        ret = transport_backend_register(&tcmu_ops);
        if (ret)
-               goto out_unreg_genl;
+               goto out_attrs;
 
        return 0;
 
+out_attrs:
+       kfree(tcmu_attrs);
 out_unreg_genl:
        genl_unregister_family(&tcmu_genl_family);
 out_unreg_device:
@@ -1194,6 +1287,7 @@ out_free_cache:
 static void __exit tcmu_module_exit(void)
 {
        target_backend_unregister(&tcmu_ops);
+       kfree(tcmu_attrs);
        genl_unregister_family(&tcmu_genl_family);
        root_device_unregister(tcmu_root_device);
        kmem_cache_destroy(tcmu_cmd_cache);
index bcf1d33e6ffe0b3cb9952e88046658df1358543b..c334bcc59c649eedc2933ac29c4dc1ef45ae21d2 100644 (file)
@@ -575,12 +575,13 @@ static void asc_set_termios(struct uart_port *port, struct ktermios *termios,
                        pinctrl_select_state(ascport->pinctrl,
                                             ascport->states[NO_HW_FLOWCTRL]);
 
-                       gpiod = devm_get_gpiod_from_child(port->dev, "rts",
-                                                         &np->fwnode);
-                       if (!IS_ERR(gpiod)) {
-                               gpiod_direction_output(gpiod, 0);
+                       gpiod = devm_fwnode_get_gpiod_from_child(port->dev,
+                                                                "rts",
+                                                                &np->fwnode,
+                                                                GPIOD_OUT_LOW,
+                                                                np->name);
+                       if (!IS_ERR(gpiod))
                                ascport->rts = gpiod;
-                       }
                }
        }
 
index 224717e63a5300970867a663cd030d8cd62068f6..864819ff9a7d362962eb837755886423b57906fb 100644 (file)
@@ -16,6 +16,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/device.h>
 #include <linux/etherdevice.h>
index ce5e63d2c66aac7d019c422ec294cab025e94e5e..44eed8eb0725b25e3c9765e19387e7c338ab9bbb 100644 (file)
@@ -223,6 +223,46 @@ vhost_transport_send_pkt(struct virtio_vsock_pkt *pkt)
        return len;
 }
 
+static int
+vhost_transport_cancel_pkt(struct vsock_sock *vsk)
+{
+       struct vhost_vsock *vsock;
+       struct virtio_vsock_pkt *pkt, *n;
+       int cnt = 0;
+       LIST_HEAD(freeme);
+
+       /* Find the vhost_vsock according to guest context id  */
+       vsock = vhost_vsock_get(vsk->remote_addr.svm_cid);
+       if (!vsock)
+               return -ENODEV;
+
+       spin_lock_bh(&vsock->send_pkt_list_lock);
+       list_for_each_entry_safe(pkt, n, &vsock->send_pkt_list, list) {
+               if (pkt->vsk != vsk)
+                       continue;
+               list_move(&pkt->list, &freeme);
+       }
+       spin_unlock_bh(&vsock->send_pkt_list_lock);
+
+       list_for_each_entry_safe(pkt, n, &freeme, list) {
+               if (pkt->reply)
+                       cnt++;
+               list_del(&pkt->list);
+               virtio_transport_free_pkt(pkt);
+       }
+
+       if (cnt) {
+               struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX];
+               int new_cnt;
+
+               new_cnt = atomic_sub_return(cnt, &vsock->queued_replies);
+               if (new_cnt + cnt >= tx_vq->num && new_cnt < tx_vq->num)
+                       vhost_poll_queue(&tx_vq->poll);
+       }
+
+       return 0;
+}
+
 static struct virtio_vsock_pkt *
 vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
                      unsigned int out, unsigned int in)
@@ -675,6 +715,7 @@ static struct virtio_transport vhost_transport = {
                .release                  = virtio_transport_release,
                .connect                  = virtio_transport_connect,
                .shutdown                 = virtio_transport_shutdown,
+               .cancel_pkt               = vhost_transport_cancel_pkt,
 
                .dgram_enqueue            = virtio_transport_dgram_enqueue,
                .dgram_dequeue            = virtio_transport_dgram_dequeue,
index c77a0751a31173344de0c02c3f70d18ec259ca63..f3bf8f4e2d6cef09101b53aa9f1a69563b206287 100644 (file)
@@ -36,6 +36,7 @@
 #include <linux/spinlock.h>
 #include <linux/slab.h>
 #include <linux/highmem.h>
+#include <linux/refcount.h>
 
 #include <xen/xen.h>
 #include <xen/grant_table.h>
@@ -86,7 +87,7 @@ struct grant_map {
        int index;
        int count;
        int flags;
-       atomic_t users;
+       refcount_t users;
        struct unmap_notify notify;
        struct ioctl_gntdev_grant_ref *grants;
        struct gnttab_map_grant_ref   *map_ops;
@@ -166,7 +167,7 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count)
 
        add->index = 0;
        add->count = count;
-       atomic_set(&add->users, 1);
+       refcount_set(&add->users, 1);
 
        return add;
 
@@ -212,7 +213,7 @@ static void gntdev_put_map(struct gntdev_priv *priv, struct grant_map *map)
        if (!map)
                return;
 
-       if (!atomic_dec_and_test(&map->users))
+       if (!refcount_dec_and_test(&map->users))
                return;
 
        atomic_sub(map->count, &pages_mapped);
@@ -400,7 +401,7 @@ static void gntdev_vma_open(struct vm_area_struct *vma)
        struct grant_map *map = vma->vm_private_data;
 
        pr_debug("gntdev_vma_open %p\n", vma);
-       atomic_inc(&map->users);
+       refcount_inc(&map->users);
 }
 
 static void gntdev_vma_close(struct vm_area_struct *vma)
@@ -1004,7 +1005,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
                goto unlock_out;
        }
 
-       atomic_inc(&map->users);
+       refcount_inc(&map->users);
 
        vma->vm_ops = &gntdev_vmops;
 
index b29447e03ede0d638950fa0dd64d908004156ea6..25d404d22caebcfd6b6b60d6287e36258f1185eb 100644 (file)
@@ -362,7 +362,7 @@ static void afs_callback_updater(struct work_struct *work)
 {
        struct afs_server *server;
        struct afs_vnode *vnode, *xvnode;
-       time_t now;
+       time64_t now;
        long timeout;
        int ret;
 
@@ -370,7 +370,7 @@ static void afs_callback_updater(struct work_struct *work)
 
        _enter("");
 
-       now = get_seconds();
+       now = ktime_get_real_seconds();
 
        /* find the first vnode to update */
        spin_lock(&server->cb_lock);
@@ -424,7 +424,8 @@ static void afs_callback_updater(struct work_struct *work)
 
        /* and then reschedule */
        _debug("reschedule");
-       vnode->update_at = get_seconds() + afs_vnode_update_timeout;
+       vnode->update_at = ktime_get_real_seconds() +
+                       afs_vnode_update_timeout;
 
        spin_lock(&server->cb_lock);
 
index 2edbdcbf6432add190464b5a5f414592953c944a..3062cceb5c2aebcc4a15e3c52d1b26ecea82f20d 100644 (file)
@@ -187,7 +187,6 @@ static int afs_deliver_cb_callback(struct afs_call *call)
        struct afs_callback *cb;
        struct afs_server *server;
        __be32 *bp;
-       u32 tmp;
        int ret, loop;
 
        _enter("{%u}", call->unmarshall);
@@ -249,9 +248,9 @@ static int afs_deliver_cb_callback(struct afs_call *call)
                if (ret < 0)
                        return ret;
 
-               tmp = ntohl(call->tmp);
-               _debug("CB count: %u", tmp);
-               if (tmp != call->count && tmp != 0)
+               call->count2 = ntohl(call->tmp);
+               _debug("CB count: %u", call->count2);
+               if (call->count2 != call->count && call->count2 != 0)
                        return -EBADMSG;
                call->offset = 0;
                call->unmarshall++;
@@ -259,14 +258,14 @@ static int afs_deliver_cb_callback(struct afs_call *call)
        case 4:
                _debug("extract CB array");
                ret = afs_extract_data(call, call->buffer,
-                                      call->count * 3 * 4, false);
+                                      call->count2 * 3 * 4, false);
                if (ret < 0)
                        return ret;
 
                _debug("unmarshall CB array");
                cb = call->request;
                bp = call->buffer;
-               for (loop = call->count; loop > 0; loop--, cb++) {
+               for (loop = call->count2; loop > 0; loop--, cb++) {
                        cb->version     = ntohl(*bp++);
                        cb->expiry      = ntohl(*bp++);
                        cb->type        = ntohl(*bp++);
index ba7b71fba34bcc4cd5f8b8a305ace06a388ac607..0d5b8508869bf0642a88d4c87b3feb49c1fab433 100644 (file)
@@ -30,6 +30,7 @@ static int afs_readpages(struct file *filp, struct address_space *mapping,
 
 const struct file_operations afs_file_operations = {
        .open           = afs_open,
+       .flush          = afs_flush,
        .release        = afs_release,
        .llseek         = generic_file_llseek,
        .read_iter      = generic_file_read_iter,
@@ -184,10 +185,13 @@ int afs_page_filler(void *data, struct page *page)
                if (!req)
                        goto enomem;
 
+               /* We request a full page.  If the page is a partial one at the
+                * end of the file, the server will return a short read and the
+                * unmarshalling code will clear the unfilled space.
+                */
                atomic_set(&req->usage, 1);
                req->pos = (loff_t)page->index << PAGE_SHIFT;
-               req->len = min_t(size_t, i_size_read(inode) - req->pos,
-                                PAGE_SIZE);
+               req->len = PAGE_SIZE;
                req->nr_pages = 1;
                req->pages[0] = page;
                get_page(page);
@@ -208,7 +212,13 @@ int afs_page_filler(void *data, struct page *page)
                        fscache_uncache_page(vnode->cache, page);
 #endif
                        BUG_ON(PageFsCache(page));
-                       goto error;
+
+                       if (ret == -EINTR ||
+                           ret == -ENOMEM ||
+                           ret == -ERESTARTSYS ||
+                           ret == -EAGAIN)
+                               goto error;
+                       goto io_error;
                }
 
                SetPageUptodate(page);
@@ -227,10 +237,12 @@ int afs_page_filler(void *data, struct page *page)
        _leave(" = 0");
        return 0;
 
+io_error:
+       SetPageError(page);
+       goto error;
 enomem:
        ret = -ENOMEM;
 error:
-       SetPageError(page);
        unlock_page(page);
        _leave(" = %d", ret);
        return ret;
index ac8e766978dc440e8690fbf44333d41f9894f92a..19f76ae36982df43be740c1bf73d396b1a81c77c 100644 (file)
 #include "internal.h"
 #include "afs_fs.h"
 
+/*
+ * We need somewhere to discard into in case the server helpfully returns more
+ * than we asked for in FS.FetchData{,64}.
+ */
+static u8 afs_discard_buffer[64];
+
 /*
  * decode an AFSFid block
  */
@@ -105,7 +111,7 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
                        vnode->vfs_inode.i_mode = mode;
                }
 
-               vnode->vfs_inode.i_ctime.tv_sec = status->mtime_server;
+               vnode->vfs_inode.i_ctime.tv_sec = status->mtime_client;
                vnode->vfs_inode.i_mtime        = vnode->vfs_inode.i_ctime;
                vnode->vfs_inode.i_atime        = vnode->vfs_inode.i_ctime;
                vnode->vfs_inode.i_version      = data_version;
@@ -139,7 +145,7 @@ static void xdr_decode_AFSCallBack(const __be32 **_bp, struct afs_vnode *vnode)
        vnode->cb_version       = ntohl(*bp++);
        vnode->cb_expiry        = ntohl(*bp++);
        vnode->cb_type          = ntohl(*bp++);
-       vnode->cb_expires       = vnode->cb_expiry + get_seconds();
+       vnode->cb_expires       = vnode->cb_expiry + ktime_get_real_seconds();
        *_bp = bp;
 }
 
@@ -315,7 +321,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
        void *buffer;
        int ret;
 
-       _enter("{%u,%zu/%u;%u/%llu}",
+       _enter("{%u,%zu/%u;%llu/%llu}",
               call->unmarshall, call->offset, call->count,
               req->remain, req->actual_len);
 
@@ -353,12 +359,6 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
 
                req->actual_len |= ntohl(call->tmp);
                _debug("DATA length: %llu", req->actual_len);
-               /* Check that the server didn't want to send us extra.  We
-                * might want to just discard instead, but that requires
-                * cooperation from AF_RXRPC.
-                */
-               if (req->actual_len > req->len)
-                       return -EBADMSG;
 
                req->remain = req->actual_len;
                call->offset = req->pos & (PAGE_SIZE - 1);
@@ -368,6 +368,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
                call->unmarshall++;
 
        begin_page:
+               ASSERTCMP(req->index, <, req->nr_pages);
                if (req->remain > PAGE_SIZE - call->offset)
                        size = PAGE_SIZE - call->offset;
                else
@@ -378,7 +379,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
 
                /* extract the returned data */
        case 3:
-               _debug("extract data %u/%llu %zu/%u",
+               _debug("extract data %llu/%llu %zu/%u",
                       req->remain, req->actual_len, call->offset, call->count);
 
                buffer = kmap(req->pages[req->index]);
@@ -389,19 +390,40 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
                if (call->offset == PAGE_SIZE) {
                        if (req->page_done)
                                req->page_done(call, req);
+                       req->index++;
                        if (req->remain > 0) {
-                               req->index++;
                                call->offset = 0;
+                               if (req->index >= req->nr_pages) {
+                                       call->unmarshall = 4;
+                                       goto begin_discard;
+                               }
                                goto begin_page;
                        }
                }
+               goto no_more_data;
+
+               /* Discard any excess data the server gave us */
+       begin_discard:
+       case 4:
+               size = min_t(loff_t, sizeof(afs_discard_buffer), req->remain);
+               call->count = size;
+               _debug("extract discard %llu/%llu %zu/%u",
+                      req->remain, req->actual_len, call->offset, call->count);
+
+               call->offset = 0;
+               ret = afs_extract_data(call, afs_discard_buffer, call->count, true);
+               req->remain -= call->offset;
+               if (ret < 0)
+                       return ret;
+               if (req->remain > 0)
+                       goto begin_discard;
 
        no_more_data:
                call->offset = 0;
-               call->unmarshall++;
+               call->unmarshall = 5;
 
                /* extract the metadata */
-       case 4:
+       case 5:
                ret = afs_extract_data(call, call->buffer,
                                       (21 + 3 + 6) * 4, false);
                if (ret < 0)
@@ -416,16 +438,17 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
                call->offset = 0;
                call->unmarshall++;
 
-       case 5:
+       case 6:
                break;
        }
 
-       if (call->count < PAGE_SIZE) {
-               buffer = kmap(req->pages[req->index]);
-               memset(buffer + call->count, 0, PAGE_SIZE - call->count);
-               kunmap(req->pages[req->index]);
+       for (; req->index < req->nr_pages; req->index++) {
+               if (call->count < PAGE_SIZE)
+                       zero_user_segment(req->pages[req->index],
+                                         call->count, PAGE_SIZE);
                if (req->page_done)
                        req->page_done(call, req);
+               call->count = 0;
        }
 
        _leave(" = 0 [done]");
@@ -711,8 +734,8 @@ int afs_fs_create(struct afs_server *server,
                memset(bp, 0, padsz);
                bp = (void *) bp + padsz;
        }
-       *bp++ = htonl(AFS_SET_MODE);
-       *bp++ = 0; /* mtime */
+       *bp++ = htonl(AFS_SET_MODE | AFS_SET_MTIME);
+       *bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */
        *bp++ = 0; /* owner */
        *bp++ = 0; /* group */
        *bp++ = htonl(mode & S_IALLUGO); /* unix mode */
@@ -980,8 +1003,8 @@ int afs_fs_symlink(struct afs_server *server,
                memset(bp, 0, c_padsz);
                bp = (void *) bp + c_padsz;
        }
-       *bp++ = htonl(AFS_SET_MODE);
-       *bp++ = 0; /* mtime */
+       *bp++ = htonl(AFS_SET_MODE | AFS_SET_MTIME);
+       *bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */
        *bp++ = 0; /* owner */
        *bp++ = 0; /* group */
        *bp++ = htonl(S_IRWXUGO); /* unix mode */
@@ -1180,8 +1203,8 @@ static int afs_fs_store_data64(struct afs_server *server,
        *bp++ = htonl(vnode->fid.vnode);
        *bp++ = htonl(vnode->fid.unique);
 
-       *bp++ = 0; /* mask */
-       *bp++ = 0; /* mtime */
+       *bp++ = htonl(AFS_SET_MTIME); /* mask */
+       *bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */
        *bp++ = 0; /* owner */
        *bp++ = 0; /* group */
        *bp++ = 0; /* unix mode */
@@ -1213,7 +1236,7 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb,
        _enter(",%x,{%x:%u},,",
               key_serial(wb->key), vnode->fid.vid, vnode->fid.vnode);
 
-       size = to - offset;
+       size = (loff_t)to - (loff_t)offset;
        if (first != last)
                size += (loff_t)(last - first) << PAGE_SHIFT;
        pos = (loff_t)first << PAGE_SHIFT;
@@ -1257,8 +1280,8 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb,
        *bp++ = htonl(vnode->fid.vnode);
        *bp++ = htonl(vnode->fid.unique);
 
-       *bp++ = 0; /* mask */
-       *bp++ = 0; /* mtime */
+       *bp++ = htonl(AFS_SET_MTIME); /* mask */
+       *bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */
        *bp++ = 0; /* owner */
        *bp++ = 0; /* group */
        *bp++ = 0; /* unix mode */
index 1e4897a048d2ee0dee49b613f22336b7118ff9f8..aae55dd151087e16f123adc0ebe51e47e393b297 100644 (file)
@@ -54,8 +54,21 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
                inode->i_fop    = &afs_dir_file_operations;
                break;
        case AFS_FTYPE_SYMLINK:
-               inode->i_mode   = S_IFLNK | vnode->status.mode;
-               inode->i_op     = &page_symlink_inode_operations;
+               /* Symlinks with a mode of 0644 are actually mountpoints. */
+               if ((vnode->status.mode & 0777) == 0644) {
+                       inode->i_flags |= S_AUTOMOUNT;
+
+                       spin_lock(&vnode->lock);
+                       set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags);
+                       spin_unlock(&vnode->lock);
+
+                       inode->i_mode   = S_IFDIR | 0555;
+                       inode->i_op     = &afs_mntpt_inode_operations;
+                       inode->i_fop    = &afs_mntpt_file_operations;
+               } else {
+                       inode->i_mode   = S_IFLNK | vnode->status.mode;
+                       inode->i_op     = &page_symlink_inode_operations;
+               }
                inode_nohighmem(inode);
                break;
        default:
@@ -70,27 +83,15 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
 
        set_nlink(inode, vnode->status.nlink);
        inode->i_uid            = vnode->status.owner;
-       inode->i_gid            = GLOBAL_ROOT_GID;
+       inode->i_gid            = vnode->status.group;
        inode->i_size           = vnode->status.size;
-       inode->i_ctime.tv_sec   = vnode->status.mtime_server;
+       inode->i_ctime.tv_sec   = vnode->status.mtime_client;
        inode->i_ctime.tv_nsec  = 0;
        inode->i_atime          = inode->i_mtime = inode->i_ctime;
        inode->i_blocks         = 0;
        inode->i_generation     = vnode->fid.unique;
        inode->i_version        = vnode->status.data_version;
        inode->i_mapping->a_ops = &afs_fs_aops;
-
-       /* check to see whether a symbolic link is really a mountpoint */
-       if (vnode->status.type == AFS_FTYPE_SYMLINK) {
-               afs_mntpt_check_symlink(vnode, key);
-
-               if (test_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags)) {
-                       inode->i_mode   = S_IFDIR | vnode->status.mode;
-                       inode->i_op     = &afs_mntpt_inode_operations;
-                       inode->i_fop    = &afs_mntpt_file_operations;
-               }
-       }
-
        return 0;
 }
 
@@ -245,12 +246,13 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
                        vnode->cb_version = 0;
                        vnode->cb_expiry = 0;
                        vnode->cb_type = 0;
-                       vnode->cb_expires = get_seconds();
+                       vnode->cb_expires = ktime_get_real_seconds();
                } else {
                        vnode->cb_version = cb->version;
                        vnode->cb_expiry = cb->expiry;
                        vnode->cb_type = cb->type;
-                       vnode->cb_expires = vnode->cb_expiry + get_seconds();
+                       vnode->cb_expires = vnode->cb_expiry +
+                               ktime_get_real_seconds();
                }
        }
 
@@ -323,7 +325,7 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
            !test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags) &&
            !test_bit(AFS_VNODE_MODIFIED, &vnode->flags) &&
            !test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) {
-               if (vnode->cb_expires < get_seconds() + 10) {
+               if (vnode->cb_expires < ktime_get_real_seconds() + 10) {
                        _debug("callback expired");
                        set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
                } else {
@@ -444,7 +446,7 @@ void afs_evict_inode(struct inode *inode)
 
        mutex_lock(&vnode->permits_lock);
        permits = vnode->permits;
-       rcu_assign_pointer(vnode->permits, NULL);
+       RCU_INIT_POINTER(vnode->permits, NULL);
        mutex_unlock(&vnode->permits_lock);
        if (permits)
                call_rcu(&permits->rcu, afs_zap_permits);
index 5dfa56903a2d4b6ff058160ef973efaaa5e690d8..a6901360fb81d435bf47a85b781a89a1056fd900 100644 (file)
@@ -11,6 +11,7 @@
 
 #include <linux/compiler.h>
 #include <linux/kernel.h>
+#include <linux/ktime.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
 #include <linux/rxrpc.h>
@@ -90,7 +91,10 @@ struct afs_call {
        unsigned                request_size;   /* size of request data */
        unsigned                reply_max;      /* maximum size of reply */
        unsigned                first_offset;   /* offset into mapping[first] */
-       unsigned                last_to;        /* amount of mapping[last] */
+       union {
+               unsigned        last_to;        /* amount of mapping[last] */
+               unsigned        count2;         /* count used in unmarshalling */
+       };
        unsigned char           unmarshall;     /* unmarshalling phase */
        bool                    incoming;       /* T if incoming call */
        bool                    send_pages;     /* T if data from mapping should be sent */
@@ -127,12 +131,11 @@ struct afs_call_type {
  */
 struct afs_read {
        loff_t                  pos;            /* Where to start reading */
-       loff_t                  len;            /* How much to read */
+       loff_t                  len;            /* How much we're asking for */
        loff_t                  actual_len;     /* How much we're actually getting */
+       loff_t                  remain;         /* Amount remaining */
        atomic_t                usage;
-       unsigned int            remain;         /* Amount remaining */
        unsigned int            index;          /* Which page we're reading into */
-       unsigned int            pg_offset;      /* Offset in page we're at */
        unsigned int            nr_pages;
        void (*page_done)(struct afs_call *, struct afs_read *);
        struct page             *pages[];
@@ -247,7 +250,7 @@ struct afs_cache_vhash {
  */
 struct afs_vlocation {
        atomic_t                usage;
-       time_t                  time_of_death;  /* time at which put reduced usage to 0 */
+       time64_t                time_of_death;  /* time at which put reduced usage to 0 */
        struct list_head        link;           /* link in cell volume location list */
        struct list_head        grave;          /* link in master graveyard list */
        struct list_head        update;         /* link in master update list */
@@ -258,7 +261,7 @@ struct afs_vlocation {
        struct afs_cache_vlocation vldb;        /* volume information DB record */
        struct afs_volume       *vols[3];       /* volume access record pointer (index by type) */
        wait_queue_head_t       waitq;          /* status change waitqueue */
-       time_t                  update_at;      /* time at which record should be updated */
+       time64_t                update_at;      /* time at which record should be updated */
        spinlock_t              lock;           /* access lock */
        afs_vlocation_state_t   state;          /* volume location state */
        unsigned short          upd_rej_cnt;    /* ENOMEDIUM count during update */
@@ -271,7 +274,7 @@ struct afs_vlocation {
  */
 struct afs_server {
        atomic_t                usage;
-       time_t                  time_of_death;  /* time at which put reduced usage to 0 */
+       time64_t                time_of_death;  /* time at which put reduced usage to 0 */
        struct in_addr          addr;           /* server address */
        struct afs_cell         *cell;          /* cell in which server resides */
        struct list_head        link;           /* link in cell's server list */
@@ -374,8 +377,8 @@ struct afs_vnode {
        struct rb_node          server_rb;      /* link in server->fs_vnodes */
        struct rb_node          cb_promise;     /* link in server->cb_promises */
        struct work_struct      cb_broken_work; /* work to be done on callback break */
-       time_t                  cb_expires;     /* time at which callback expires */
-       time_t                  cb_expires_at;  /* time used to order cb_promise */
+       time64_t                cb_expires;     /* time at which callback expires */
+       time64_t                cb_expires_at;  /* time used to order cb_promise */
        unsigned                cb_version;     /* callback version */
        unsigned                cb_expiry;      /* callback expiry time */
        afs_callback_type_t     cb_type;        /* type of callback */
@@ -557,7 +560,6 @@ extern const struct inode_operations afs_autocell_inode_operations;
 extern const struct file_operations afs_mntpt_file_operations;
 
 extern struct vfsmount *afs_d_automount(struct path *);
-extern int afs_mntpt_check_symlink(struct afs_vnode *, struct key *);
 extern void afs_mntpt_kill_timer(void);
 
 /*
@@ -718,6 +720,7 @@ extern int afs_writepages(struct address_space *, struct writeback_control *);
 extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *);
 extern ssize_t afs_file_write(struct kiocb *, struct iov_iter *);
 extern int afs_writeback_all(struct afs_vnode *);
+extern int afs_flush(struct file *, fl_owner_t);
 extern int afs_fsync(struct file *, loff_t, loff_t, int);
 
 
index 91ea1aa0d8b3ab0a817b525e9f9b3deec98f775f..100b207efc9eaddff4ed9f7e0e4415ed62ba2880 100644 (file)
@@ -84,6 +84,8 @@ int afs_abort_to_error(u32 abort_code)
        case RXKADDATALEN:      return -EKEYREJECTED;
        case RXKADILLEGALLEVEL: return -EKEYREJECTED;
 
+       case RXGEN_OPCODE:      return -ENOTSUPP;
+
        default:                return -EREMOTEIO;
        }
 }
index d4fb0afc0097d4947d3c2013cf27f521b055d423..bd3b65cde282a24769f7c549c9fe52c85b6c8e4e 100644 (file)
@@ -46,59 +46,6 @@ static DECLARE_DELAYED_WORK(afs_mntpt_expiry_timer, afs_mntpt_expiry_timed_out);
 
 static unsigned long afs_mntpt_expiry_timeout = 10 * 60;
 
-/*
- * check a symbolic link to see whether it actually encodes a mountpoint
- * - sets the AFS_VNODE_MOUNTPOINT flag on the vnode appropriately
- */
-int afs_mntpt_check_symlink(struct afs_vnode *vnode, struct key *key)
-{
-       struct page *page;
-       size_t size;
-       char *buf;
-       int ret;
-
-       _enter("{%x:%u,%u}",
-              vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique);
-
-       /* read the contents of the symlink into the pagecache */
-       page = read_cache_page(AFS_VNODE_TO_I(vnode)->i_mapping, 0,
-                              afs_page_filler, key);
-       if (IS_ERR(page)) {
-               ret = PTR_ERR(page);
-               goto out;
-       }
-
-       ret = -EIO;
-       if (PageError(page))
-               goto out_free;
-
-       buf = kmap(page);
-
-       /* examine the symlink's contents */
-       size = vnode->status.size;
-       _debug("symlink to %*.*s", (int) size, (int) size, buf);
-
-       if (size > 2 &&
-           (buf[0] == '%' || buf[0] == '#') &&
-           buf[size - 1] == '.'
-           ) {
-               _debug("symlink is a mountpoint");
-               spin_lock(&vnode->lock);
-               set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags);
-               vnode->vfs_inode.i_flags |= S_AUTOMOUNT;
-               spin_unlock(&vnode->lock);
-       }
-
-       ret = 0;
-
-       kunmap(page);
-out_free:
-       put_page(page);
-out:
-       _leave(" = %d", ret);
-       return ret;
-}
-
 /*
  * no valid lookup procedure on this sort of dir
  */
index 419ef05dcb5ec7149a3a0b5de657c75bbc6eabb4..8f76b13d55494bddec9e81203c0734a0f6d811d7 100644 (file)
@@ -259,67 +259,74 @@ void afs_flat_call_destructor(struct afs_call *call)
        call->buffer = NULL;
 }
 
+#define AFS_BVEC_MAX 8
+
+/*
+ * Load the given bvec with the next few pages.
+ */
+static void afs_load_bvec(struct afs_call *call, struct msghdr *msg,
+                         struct bio_vec *bv, pgoff_t first, pgoff_t last,
+                         unsigned offset)
+{
+       struct page *pages[AFS_BVEC_MAX];
+       unsigned int nr, n, i, to, bytes = 0;
+
+       nr = min_t(pgoff_t, last - first + 1, AFS_BVEC_MAX);
+       n = find_get_pages_contig(call->mapping, first, nr, pages);
+       ASSERTCMP(n, ==, nr);
+
+       msg->msg_flags |= MSG_MORE;
+       for (i = 0; i < nr; i++) {
+               to = PAGE_SIZE;
+               if (first + i >= last) {
+                       to = call->last_to;
+                       msg->msg_flags &= ~MSG_MORE;
+               }
+               bv[i].bv_page = pages[i];
+               bv[i].bv_len = to - offset;
+               bv[i].bv_offset = offset;
+               bytes += to - offset;
+               offset = 0;
+       }
+
+       iov_iter_bvec(&msg->msg_iter, WRITE | ITER_BVEC, bv, nr, bytes);
+}
+
 /*
  * attach the data from a bunch of pages on an inode to a call
  */
 static int afs_send_pages(struct afs_call *call, struct msghdr *msg)
 {
-       struct page *pages[8];
-       unsigned count, n, loop, offset, to;
+       struct bio_vec bv[AFS_BVEC_MAX];
+       unsigned int bytes, nr, loop, offset;
        pgoff_t first = call->first, last = call->last;
        int ret;
 
-       _enter("");
-
        offset = call->first_offset;
        call->first_offset = 0;
 
        do {
-               _debug("attach %lx-%lx", first, last);
-
-               count = last - first + 1;
-               if (count > ARRAY_SIZE(pages))
-                       count = ARRAY_SIZE(pages);
-               n = find_get_pages_contig(call->mapping, first, count, pages);
-               ASSERTCMP(n, ==, count);
-
-               loop = 0;
-               do {
-                       struct bio_vec bvec = {.bv_page = pages[loop],
-                                              .bv_offset = offset};
-                       msg->msg_flags = 0;
-                       to = PAGE_SIZE;
-                       if (first + loop >= last)
-                               to = call->last_to;
-                       else
-                               msg->msg_flags = MSG_MORE;
-                       bvec.bv_len = to - offset;
-                       offset = 0;
-
-                       _debug("- range %u-%u%s",
-                              offset, to, msg->msg_flags ? " [more]" : "");
-                       iov_iter_bvec(&msg->msg_iter, WRITE | ITER_BVEC,
-                                     &bvec, 1, to - offset);
-
-                       /* have to change the state *before* sending the last
-                        * packet as RxRPC might give us the reply before it
-                        * returns from sending the request */
-                       if (first + loop >= last)
-                               call->state = AFS_CALL_AWAIT_REPLY;
-                       ret = rxrpc_kernel_send_data(afs_socket, call->rxcall,
-                                                    msg, to - offset);
-                       if (ret < 0)
-                               break;
-               } while (++loop < count);
-               first += count;
-
-               for (loop = 0; loop < count; loop++)
-                       put_page(pages[loop]);
+               afs_load_bvec(call, msg, bv, first, last, offset);
+               offset = 0;
+               bytes = msg->msg_iter.count;
+               nr = msg->msg_iter.nr_segs;
+
+               /* Have to change the state *before* sending the last
+                * packet as RxRPC might give us the reply before it
+                * returns from sending the request.
+                */
+               if (first + nr - 1 >= last)
+                       call->state = AFS_CALL_AWAIT_REPLY;
+               ret = rxrpc_kernel_send_data(afs_socket, call->rxcall,
+                                            msg, bytes);
+               for (loop = 0; loop < nr; loop++)
+                       put_page(bv[loop].bv_page);
                if (ret < 0)
                        break;
+
+               first += nr;
        } while (first <= last);
 
-       _leave(" = %d", ret);
        return ret;
 }
 
@@ -333,6 +340,8 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
        struct rxrpc_call *rxcall;
        struct msghdr msg;
        struct kvec iov[1];
+       size_t offset;
+       u32 abort_code;
        int ret;
 
        _enter("%x,{%d},", addr->s_addr, ntohs(call->port));
@@ -381,9 +390,11 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
        msg.msg_controllen      = 0;
        msg.msg_flags           = (call->send_pages ? MSG_MORE : 0);
 
-       /* have to change the state *before* sending the last packet as RxRPC
-        * might give us the reply before it returns from sending the
-        * request */
+       /* We have to change the state *before* sending the last packet as
+        * rxrpc might give us the reply before it returns from sending the
+        * request.  Further, if the send fails, we may already have been given
+        * a notification and may have collected it.
+        */
        if (!call->send_pages)
                call->state = AFS_CALL_AWAIT_REPLY;
        ret = rxrpc_kernel_send_data(afs_socket, rxcall,
@@ -405,7 +416,17 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
        return afs_wait_for_call_to_complete(call);
 
 error_do_abort:
-       rxrpc_kernel_abort_call(afs_socket, rxcall, RX_USER_ABORT, -ret, "KSD");
+       call->state = AFS_CALL_COMPLETE;
+       if (ret != -ECONNABORTED) {
+               rxrpc_kernel_abort_call(afs_socket, rxcall, RX_USER_ABORT,
+                                       -ret, "KSD");
+       } else {
+               abort_code = 0;
+               offset = 0;
+               rxrpc_kernel_recv_data(afs_socket, rxcall, NULL, 0, &offset,
+                                      false, &abort_code);
+               ret = call->type->abort_to_error(abort_code);
+       }
 error_kill_call:
        afs_put_call(call);
        _leave(" = %d", ret);
@@ -452,16 +473,18 @@ static void afs_deliver_to_call(struct afs_call *call)
                case -EINPROGRESS:
                case -EAGAIN:
                        goto out;
+               case -ECONNABORTED:
+                       goto call_complete;
                case -ENOTCONN:
                        abort_code = RX_CALL_DEAD;
                        rxrpc_kernel_abort_call(afs_socket, call->rxcall,
                                                abort_code, -ret, "KNC");
-                       goto do_abort;
+                       goto save_error;
                case -ENOTSUPP:
-                       abort_code = RX_INVALID_OPERATION;
+                       abort_code = RXGEN_OPCODE;
                        rxrpc_kernel_abort_call(afs_socket, call->rxcall,
                                                abort_code, -ret, "KIV");
-                       goto do_abort;
+                       goto save_error;
                case -ENODATA:
                case -EBADMSG:
                case -EMSGSIZE:
@@ -471,7 +494,7 @@ static void afs_deliver_to_call(struct afs_call *call)
                                abort_code = RXGEN_SS_UNMARSHAL;
                        rxrpc_kernel_abort_call(afs_socket, call->rxcall,
                                                abort_code, EBADMSG, "KUM");
-                       goto do_abort;
+                       goto save_error;
                }
        }
 
@@ -482,8 +505,9 @@ out:
        _leave("");
        return;
 
-do_abort:
+save_error:
        call->error = ret;
+call_complete:
        call->state = AFS_CALL_COMPLETE;
        goto done;
 }
@@ -493,7 +517,6 @@ do_abort:
  */
 static int afs_wait_for_call_to_complete(struct afs_call *call)
 {
-       const char *abort_why;
        int ret;
 
        DECLARE_WAITQUEUE(myself, current);
@@ -512,13 +535,8 @@ static int afs_wait_for_call_to_complete(struct afs_call *call)
                        continue;
                }
 
-               abort_why = "KWC";
-               ret = call->error;
-               if (call->state == AFS_CALL_COMPLETE)
-                       break;
-               abort_why = "KWI";
-               ret = -EINTR;
-               if (signal_pending(current))
+               if (call->state == AFS_CALL_COMPLETE ||
+                   signal_pending(current))
                        break;
                schedule();
        }
@@ -526,13 +544,14 @@ static int afs_wait_for_call_to_complete(struct afs_call *call)
        remove_wait_queue(&call->waitq, &myself);
        __set_current_state(TASK_RUNNING);
 
-       /* kill the call */
+       /* Kill off the call if it's still live. */
        if (call->state < AFS_CALL_COMPLETE) {
-               _debug("call incomplete");
+               _debug("call interrupted");
                rxrpc_kernel_abort_call(afs_socket, call->rxcall,
-                                       RX_CALL_DEAD, -ret, abort_why);
+                                       RX_USER_ABORT, -EINTR, "KWI");
        }
 
+       ret = call->error;
        _debug("call complete");
        afs_put_call(call);
        _leave(" = %d", ret);
index 8d010422dc8962b72fb3af64f75fdedb8e892cc0..ecb86a6701801cb74745bc99b74f9d8a367a2792 100644 (file)
@@ -114,7 +114,7 @@ void afs_clear_permits(struct afs_vnode *vnode)
 
        mutex_lock(&vnode->permits_lock);
        permits = vnode->permits;
-       rcu_assign_pointer(vnode->permits, NULL);
+       RCU_INIT_POINTER(vnode->permits, NULL);
        mutex_unlock(&vnode->permits_lock);
 
        if (permits)
@@ -340,17 +340,22 @@ int afs_permission(struct inode *inode, int mask)
        } else {
                if (!(access & AFS_ACE_LOOKUP))
                        goto permission_denied;
+               if ((mask & MAY_EXEC) && !(inode->i_mode & S_IXUSR))
+                       goto permission_denied;
                if (mask & (MAY_EXEC | MAY_READ)) {
                        if (!(access & AFS_ACE_READ))
                                goto permission_denied;
+                       if (!(inode->i_mode & S_IRUSR))
+                               goto permission_denied;
                } else if (mask & MAY_WRITE) {
                        if (!(access & AFS_ACE_WRITE))
                                goto permission_denied;
+                       if (!(inode->i_mode & S_IWUSR))
+                               goto permission_denied;
                }
        }
 
        key_put(key);
-       ret = generic_permission(inode, mask);
        _leave(" = %d", ret);
        return ret;
 
index d4066ab7dd5505b364a6506a1a2d932274bb5d9d..c001b1f2455fbf6dee4c9635c95590ada3890483 100644 (file)
@@ -242,7 +242,7 @@ void afs_put_server(struct afs_server *server)
        spin_lock(&afs_server_graveyard_lock);
        if (atomic_read(&server->usage) == 0) {
                list_move_tail(&server->grave, &afs_server_graveyard);
-               server->time_of_death = get_seconds();
+               server->time_of_death = ktime_get_real_seconds();
                queue_delayed_work(afs_wq, &afs_server_reaper,
                                   afs_server_timeout * HZ);
        }
@@ -277,9 +277,9 @@ static void afs_reap_server(struct work_struct *work)
        LIST_HEAD(corpses);
        struct afs_server *server;
        unsigned long delay, expiry;
-       time_t now;
+       time64_t now;
 
-       now = get_seconds();
+       now = ktime_get_real_seconds();
        spin_lock(&afs_server_graveyard_lock);
 
        while (!list_empty(&afs_server_graveyard)) {
index d7d8dd8c0b3187e6fe7eaed8e6300cb06826ff81..37b7c3b342a6b5a1f2f0cd06c0538e8e1d7f9073 100644 (file)
@@ -340,7 +340,8 @@ static void afs_vlocation_queue_for_updates(struct afs_vlocation *vl)
        struct afs_vlocation *xvl;
 
        /* wait at least 10 minutes before updating... */
-       vl->update_at = get_seconds() + afs_vlocation_update_timeout;
+       vl->update_at = ktime_get_real_seconds() +
+                       afs_vlocation_update_timeout;
 
        spin_lock(&afs_vlocation_updates_lock);
 
@@ -506,7 +507,7 @@ void afs_put_vlocation(struct afs_vlocation *vl)
        if (atomic_read(&vl->usage) == 0) {
                _debug("buried");
                list_move_tail(&vl->grave, &afs_vlocation_graveyard);
-               vl->time_of_death = get_seconds();
+               vl->time_of_death = ktime_get_real_seconds();
                queue_delayed_work(afs_wq, &afs_vlocation_reap,
                                   afs_vlocation_timeout * HZ);
 
@@ -543,11 +544,11 @@ static void afs_vlocation_reaper(struct work_struct *work)
        LIST_HEAD(corpses);
        struct afs_vlocation *vl;
        unsigned long delay, expiry;
-       time_t now;
+       time64_t now;
 
        _enter("");
 
-       now = get_seconds();
+       now = ktime_get_real_seconds();
        spin_lock(&afs_vlocation_graveyard_lock);
 
        while (!list_empty(&afs_vlocation_graveyard)) {
@@ -622,13 +623,13 @@ static void afs_vlocation_updater(struct work_struct *work)
 {
        struct afs_cache_vlocation vldb;
        struct afs_vlocation *vl, *xvl;
-       time_t now;
+       time64_t now;
        long timeout;
        int ret;
 
        _enter("");
 
-       now = get_seconds();
+       now = ktime_get_real_seconds();
 
        /* find a record to update */
        spin_lock(&afs_vlocation_updates_lock);
@@ -684,7 +685,8 @@ static void afs_vlocation_updater(struct work_struct *work)
 
        /* and then reschedule */
        _debug("reschedule");
-       vl->update_at = get_seconds() + afs_vlocation_update_timeout;
+       vl->update_at = ktime_get_real_seconds() +
+                       afs_vlocation_update_timeout;
 
        spin_lock(&afs_vlocation_updates_lock);
 
index c83c1a0e851fb34051c026bcea8e2a561299cf95..2d2fccd5044bcd9b02127246824c1221ec502484 100644 (file)
@@ -84,10 +84,9 @@ void afs_put_writeback(struct afs_writeback *wb)
  * partly or wholly fill a page that's under preparation for writing
  */
 static int afs_fill_page(struct afs_vnode *vnode, struct key *key,
-                        loff_t pos, struct page *page)
+                        loff_t pos, unsigned int len, struct page *page)
 {
        struct afs_read *req;
-       loff_t i_size;
        int ret;
 
        _enter(",,%llu", (unsigned long long)pos);
@@ -99,14 +98,10 @@ static int afs_fill_page(struct afs_vnode *vnode, struct key *key,
 
        atomic_set(&req->usage, 1);
        req->pos = pos;
+       req->len = len;
        req->nr_pages = 1;
        req->pages[0] = page;
-
-       i_size = i_size_read(&vnode->vfs_inode);
-       if (pos + PAGE_SIZE > i_size)
-               req->len = i_size - pos;
-       else
-               req->len = PAGE_SIZE;
+       get_page(page);
 
        ret = afs_vnode_fetch_data(vnode, key, req);
        afs_put_read(req);
@@ -159,12 +154,12 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
                kfree(candidate);
                return -ENOMEM;
        }
-       *pagep = page;
-       /* page won't leak in error case: it eventually gets cleaned off LRU */
 
        if (!PageUptodate(page) && len != PAGE_SIZE) {
-               ret = afs_fill_page(vnode, key, index << PAGE_SHIFT, page);
+               ret = afs_fill_page(vnode, key, pos & PAGE_MASK, PAGE_SIZE, page);
                if (ret < 0) {
+                       unlock_page(page);
+                       put_page(page);
                        kfree(candidate);
                        _leave(" = %d [prep]", ret);
                        return ret;
@@ -172,6 +167,9 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
                SetPageUptodate(page);
        }
 
+       /* page won't leak in error case: it eventually gets cleaned off LRU */
+       *pagep = page;
+
 try_again:
        spin_lock(&vnode->writeback_lock);
 
@@ -233,7 +231,7 @@ flush_conflicting_wb:
        if (wb->state == AFS_WBACK_PENDING)
                wb->state = AFS_WBACK_CONFLICTING;
        spin_unlock(&vnode->writeback_lock);
-       if (PageDirty(page)) {
+       if (clear_page_dirty_for_io(page)) {
                ret = afs_write_back_from_locked_page(wb, page);
                if (ret < 0) {
                        afs_put_writeback(candidate);
@@ -257,7 +255,9 @@ int afs_write_end(struct file *file, struct address_space *mapping,
                  struct page *page, void *fsdata)
 {
        struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
+       struct key *key = file->private_data;
        loff_t i_size, maybe_i_size;
+       int ret;
 
        _enter("{%x:%u},{%lx}",
               vnode->fid.vid, vnode->fid.vnode, page->index);
@@ -273,6 +273,20 @@ int afs_write_end(struct file *file, struct address_space *mapping,
                spin_unlock(&vnode->writeback_lock);
        }
 
+       if (!PageUptodate(page)) {
+               if (copied < len) {
+                       /* Try and load any missing data from the server.  The
+                        * unmarshalling routine will take care of clearing any
+                        * bits that are beyond the EOF.
+                        */
+                       ret = afs_fill_page(vnode, key, pos + copied,
+                                           len - copied, page);
+                       if (ret < 0)
+                               return ret;
+               }
+               SetPageUptodate(page);
+       }
+
        set_page_dirty(page);
        if (PageDirty(page))
                _debug("dirtied");
@@ -307,10 +321,14 @@ static void afs_kill_pages(struct afs_vnode *vnode, bool error,
                ASSERTCMP(pv.nr, ==, count);
 
                for (loop = 0; loop < count; loop++) {
-                       ClearPageUptodate(pv.pages[loop]);
+                       struct page *page = pv.pages[loop];
+                       ClearPageUptodate(page);
                        if (error)
-                               SetPageError(pv.pages[loop]);
-                       end_page_writeback(pv.pages[loop]);
+                               SetPageError(page);
+                       if (PageWriteback(page))
+                               end_page_writeback(page);
+                       if (page->index >= first)
+                               first = page->index + 1;
                }
 
                __pagevec_release(&pv);
@@ -335,8 +353,6 @@ static int afs_write_back_from_locked_page(struct afs_writeback *wb,
        _enter(",%lx", primary_page->index);
 
        count = 1;
-       if (!clear_page_dirty_for_io(primary_page))
-               BUG();
        if (test_set_page_writeback(primary_page))
                BUG();
 
@@ -502,17 +518,17 @@ static int afs_writepages_region(struct address_space *mapping,
                 */
                lock_page(page);
 
-               if (page->mapping != mapping) {
+               if (page->mapping != mapping || !PageDirty(page)) {
                        unlock_page(page);
                        put_page(page);
                        continue;
                }
 
-               if (wbc->sync_mode != WB_SYNC_NONE)
-                       wait_on_page_writeback(page);
-
-               if (PageWriteback(page) || !PageDirty(page)) {
+               if (PageWriteback(page)) {
                        unlock_page(page);
+                       if (wbc->sync_mode != WB_SYNC_NONE)
+                               wait_on_page_writeback(page);
+                       put_page(page);
                        continue;
                }
 
@@ -523,6 +539,8 @@ static int afs_writepages_region(struct address_space *mapping,
                wb->state = AFS_WBACK_WRITING;
                spin_unlock(&wb->vnode->writeback_lock);
 
+               if (!clear_page_dirty_for_io(page))
+                       BUG();
                ret = afs_write_back_from_locked_page(wb, page);
                unlock_page(page);
                put_page(page);
@@ -745,6 +763,20 @@ out:
        return ret;
 }
 
+/*
+ * Flush out all outstanding writes on a file opened for writing when it is
+ * closed.
+ */
+int afs_flush(struct file *file, fl_owner_t id)
+{
+       _enter("");
+
+       if ((file->f_mode & FMODE_WRITE) == 0)
+               return 0;
+
+       return vfs_fsync(file, 0);
+}
+
 /*
  * notification that a previously read-only page is about to become writable
  * - if it returns an error, the caller will deliver a bus error signal
index 28e81922a21c1ecead950f50cf3e685ad03c57f6..8df797432740df92e9c35c67fa75ce0331c193ff 100644 (file)
@@ -1714,7 +1714,8 @@ static int __process_pages_contig(struct address_space *mapping,
                         * can we find nothing at @index.
                         */
                        ASSERT(page_ops & PAGE_LOCK);
-                       return ret;
+                       err = -EAGAIN;
+                       goto out;
                }
 
                for (i = 0; i < ret; i++) {
index c40060cc481f60440044d00ea4a76904cc4d9761..2315039356529fac0315a57733a035d86a00b3b1 100644 (file)
@@ -6709,6 +6709,20 @@ static noinline int uncompress_inline(struct btrfs_path *path,
        max_size = min_t(unsigned long, PAGE_SIZE, max_size);
        ret = btrfs_decompress(compress_type, tmp, page,
                               extent_offset, inline_size, max_size);
+
+       /*
+        * decompression code contains a memset to fill in any space between the end
+        * of the uncompressed data and the end of max_size in case the decompressed
+        * data ends up shorter than ram_bytes.  That doesn't cover the hole between
+        * the end of an inline extent and the beginning of the next block, so we
+        * cover that region here.
+        */
+
+       if (max_size + pg_offset < PAGE_SIZE) {
+               char *map = kmap(page);
+               memset(map + pg_offset + max_size, 0, PAGE_SIZE - max_size - pg_offset);
+               kunmap(page);
+       }
        kfree(tmp);
        return ret;
 }
index 15e1db8738aecad0c8a86888c0fa1ada5f9b7623..8c91f37ac0ebd784e3eb9b964f57f9c73ea37a5d 100644 (file)
@@ -37,6 +37,7 @@
 #include <linux/freezer.h>
 #include <linux/namei.h>
 #include <linux/random.h>
+#include <linux/uuid.h>
 #include <linux/xattr.h>
 #include <net/ipv6.h>
 #include "cifsfs.h"
index 9ae695ae3ed7be3788db2a889e34cde8a3224c9c..858698dcde3cd0a44209164fa9226e1762c504a8 100644 (file)
@@ -35,6 +35,7 @@
 #include <linux/pagevec.h>
 #include <linux/freezer.h>
 #include <linux/namei.h>
+#include <linux/uuid.h>
 #include <linux/uaccess.h>
 #include <asm/processor.h>
 #include <linux/inet.h>
index 7446496850a3bd5f21fb36e12b65ba5c78532612..fb75fe908225d77572eaa0a962ab5cd2afa227dd 100644 (file)
@@ -33,6 +33,7 @@
 #include <linux/vfs.h>
 #include <linux/task_io_accounting_ops.h>
 #include <linux/uaccess.h>
+#include <linux/uuid.h>
 #include <linux/pagemap.h>
 #include <linux/xattr.h>
 #include "smb2pdu.h"
index 341251421ced00ab1be854c4145cf408cfc93c2f..5420767c9b686a7a9db30bbf932e85071c83c9f6 100644 (file)
@@ -42,6 +42,7 @@
 #include <linux/seq_file.h>
 #include <linux/compat.h>
 #include <linux/rculist.h>
+#include <net/busy_poll.h>
 
 /*
  * LOCKING:
@@ -224,6 +225,11 @@ struct eventpoll {
        /* used to optimize loop detection check */
        int visited;
        struct list_head visited_list_link;
+
+#ifdef CONFIG_NET_RX_BUSY_POLL
+       /* used to track busy poll napi_id */
+       unsigned int napi_id;
+#endif
 };
 
 /* Wait structure used by the poll hooks */
@@ -384,6 +390,77 @@ static inline int ep_events_available(struct eventpoll *ep)
        return !list_empty(&ep->rdllist) || ep->ovflist != EP_UNACTIVE_PTR;
 }
 
+#ifdef CONFIG_NET_RX_BUSY_POLL
+static bool ep_busy_loop_end(void *p, unsigned long start_time)
+{
+       struct eventpoll *ep = p;
+
+       return ep_events_available(ep) || busy_loop_timeout(start_time);
+}
+#endif /* CONFIG_NET_RX_BUSY_POLL */
+
+/*
+ * Busy poll if globally on and supporting sockets found && no events,
+ * busy loop will return if need_resched or ep_events_available.
+ *
+ * we must do our busy polling with irqs enabled
+ */
+static void ep_busy_loop(struct eventpoll *ep, int nonblock)
+{
+#ifdef CONFIG_NET_RX_BUSY_POLL
+       unsigned int napi_id = READ_ONCE(ep->napi_id);
+
+       if ((napi_id >= MIN_NAPI_ID) && net_busy_loop_on())
+               napi_busy_loop(napi_id, nonblock ? NULL : ep_busy_loop_end, ep);
+#endif
+}
+
+static inline void ep_reset_busy_poll_napi_id(struct eventpoll *ep)
+{
+#ifdef CONFIG_NET_RX_BUSY_POLL
+       if (ep->napi_id)
+               ep->napi_id = 0;
+#endif
+}
+
+/*
+ * Set epoll busy poll NAPI ID from sk.
+ */
+static inline void ep_set_busy_poll_napi_id(struct epitem *epi)
+{
+#ifdef CONFIG_NET_RX_BUSY_POLL
+       struct eventpoll *ep;
+       unsigned int napi_id;
+       struct socket *sock;
+       struct sock *sk;
+       int err;
+
+       if (!net_busy_loop_on())
+               return;
+
+       sock = sock_from_file(epi->ffd.file, &err);
+       if (!sock)
+               return;
+
+       sk = sock->sk;
+       if (!sk)
+               return;
+
+       napi_id = READ_ONCE(sk->sk_napi_id);
+       ep = epi->ep;
+
+       /* Non-NAPI IDs can be rejected
+        *      or
+        * Nothing to do if we already have this ID
+        */
+       if (napi_id < MIN_NAPI_ID || napi_id == ep->napi_id)
+               return;
+
+       /* record NAPI ID for use in next busy poll */
+       ep->napi_id = napi_id;
+#endif
+}
+
 /**
  * ep_call_nested - Perform a bound (possibly) nested call, by checking
  *                  that the recursion limit is not exceeded, and that
@@ -1022,6 +1099,8 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
 
        spin_lock_irqsave(&ep->lock, flags);
 
+       ep_set_busy_poll_napi_id(epi);
+
        /*
         * If the event mask does not contain any poll(2) event, we consider the
         * descriptor to be disabled. This condition is likely the effect of the
@@ -1363,6 +1442,9 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
        /* We have to drop the new item inside our item list to keep track of it */
        spin_lock_irqsave(&ep->lock, flags);
 
+       /* record NAPI ID of new item if present */
+       ep_set_busy_poll_napi_id(epi);
+
        /* If the file is already "ready" we drop it inside the ready list */
        if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) {
                list_add_tail(&epi->rdllink, &ep->rdllist);
@@ -1637,9 +1719,20 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
        }
 
 fetch_events:
+
+       if (!ep_events_available(ep))
+               ep_busy_loop(ep, timed_out);
+
        spin_lock_irqsave(&ep->lock, flags);
 
        if (!ep_events_available(ep)) {
+               /*
+                * Busy poll timed out.  Drop NAPI ID for now, we can add
+                * it back in when we have moved a socket with a valid NAPI
+                * ID onto the ready list.
+                */
+               ep_reset_busy_poll_napi_id(ep);
+
                /*
                 * We don't have any available event to return to the caller.
                 * We need to sleep here, and we will be wake up by
index a77df377e2e8197097912c9248948c7e729ce566..ee2d0a485fc3478fc5f93b5b85c6dad0431e8ea0 100644 (file)
@@ -196,6 +196,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
        si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS);
        si->base_mem += NM_I(sbi)->nat_blocks * NAT_ENTRY_BITMAP_SIZE;
        si->base_mem += NM_I(sbi)->nat_blocks / 8;
+       si->base_mem += NM_I(sbi)->nat_blocks * sizeof(unsigned short);
 
 get_cache:
        si->cache_mem = 0;
index 4650c9b85de77679adaa275406512868671bb1bb..8d5c62b07b283f53e90ded2366c8bb9375409fa2 100644 (file)
@@ -750,7 +750,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
        dentry_blk = page_address(page);
        bit_pos = dentry - dentry_blk->dentry;
        for (i = 0; i < slots; i++)
-               clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
+               __clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
 
        /* Let's check and deallocate this dentry page */
        bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
index e849f83d611407b8968bec904c10f1939c40b4f1..0a6e115562f62edca5b60ee4c833e889a904c202 100644 (file)
@@ -561,6 +561,8 @@ struct f2fs_nm_info {
        struct mutex build_lock;        /* lock for build free nids */
        unsigned char (*free_nid_bitmap)[NAT_ENTRY_BITMAP_SIZE];
        unsigned char *nat_block_bitmap;
+       unsigned short *free_nid_count; /* free nid count of NAT block */
+       spinlock_t free_nid_lock;       /* protect updating of nid count */
 
        /* for checkpoint */
        char *nat_bitmap;               /* NAT bitmap pointer */
index 94967171dee87a381655ede9190ff0f66b3ca4af..481aa8dc79f46f4c156cf67cca665e8160e36e6a 100644 (file)
@@ -338,9 +338,6 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
                set_nat_flag(e, IS_CHECKPOINTED, false);
        __set_nat_cache_dirty(nm_i, e);
 
-       if (enabled_nat_bits(sbi, NULL) && new_blkaddr == NEW_ADDR)
-               clear_bit_le(NAT_BLOCK_OFFSET(ni->nid), nm_i->empty_nat_bits);
-
        /* update fsync_mark if its inode nat entry is still alive */
        if (ni->nid != ni->ino)
                e = __lookup_nat_cache(nm_i, ni->ino);
@@ -1823,7 +1820,8 @@ static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
                kmem_cache_free(free_nid_slab, i);
 }
 
-void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, bool set)
+static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid,
+                       bool set, bool build, bool locked)
 {
        struct f2fs_nm_info *nm_i = NM_I(sbi);
        unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid);
@@ -1833,9 +1831,18 @@ void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, bool set)
                return;
 
        if (set)
-               set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
+               __set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
        else
-               clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
+               __clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
+
+       if (!locked)
+               spin_lock(&nm_i->free_nid_lock);
+       if (set)
+               nm_i->free_nid_count[nat_ofs]++;
+       else if (!build)
+               nm_i->free_nid_count[nat_ofs]--;
+       if (!locked)
+               spin_unlock(&nm_i->free_nid_lock);
 }
 
 static void scan_nat_page(struct f2fs_sb_info *sbi,
@@ -1847,7 +1854,10 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
        unsigned int nat_ofs = NAT_BLOCK_OFFSET(start_nid);
        int i;
 
-       set_bit_le(nat_ofs, nm_i->nat_block_bitmap);
+       if (test_bit_le(nat_ofs, nm_i->nat_block_bitmap))
+               return;
+
+       __set_bit_le(nat_ofs, nm_i->nat_block_bitmap);
 
        i = start_nid % NAT_ENTRY_PER_BLOCK;
 
@@ -1861,7 +1871,7 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
                f2fs_bug_on(sbi, blk_addr == NEW_ADDR);
                if (blk_addr == NULL_ADDR)
                        freed = add_free_nid(sbi, start_nid, true);
-               update_free_nid_bitmap(sbi, start_nid, freed);
+               update_free_nid_bitmap(sbi, start_nid, freed, true, false);
        }
 }
 
@@ -1877,6 +1887,8 @@ static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
        for (i = 0; i < nm_i->nat_blocks; i++) {
                if (!test_bit_le(i, nm_i->nat_block_bitmap))
                        continue;
+               if (!nm_i->free_nid_count[i])
+                       continue;
                for (idx = 0; idx < NAT_ENTRY_PER_BLOCK; idx++) {
                        nid_t nid;
 
@@ -1907,58 +1919,6 @@ out:
        up_read(&nm_i->nat_tree_lock);
 }
 
-static int scan_nat_bits(struct f2fs_sb_info *sbi)
-{
-       struct f2fs_nm_info *nm_i = NM_I(sbi);
-       struct page *page;
-       unsigned int i = 0;
-       nid_t nid;
-
-       if (!enabled_nat_bits(sbi, NULL))
-               return -EAGAIN;
-
-       down_read(&nm_i->nat_tree_lock);
-check_empty:
-       i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
-       if (i >= nm_i->nat_blocks) {
-               i = 0;
-               goto check_partial;
-       }
-
-       for (nid = i * NAT_ENTRY_PER_BLOCK; nid < (i + 1) * NAT_ENTRY_PER_BLOCK;
-                                                                       nid++) {
-               if (unlikely(nid >= nm_i->max_nid))
-                       break;
-               add_free_nid(sbi, nid, true);
-       }
-
-       if (nm_i->nid_cnt[FREE_NID_LIST] >= MAX_FREE_NIDS)
-               goto out;
-       i++;
-       goto check_empty;
-
-check_partial:
-       i = find_next_zero_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
-       if (i >= nm_i->nat_blocks) {
-               disable_nat_bits(sbi, true);
-               up_read(&nm_i->nat_tree_lock);
-               return -EINVAL;
-       }
-
-       nid = i * NAT_ENTRY_PER_BLOCK;
-       page = get_current_nat_page(sbi, nid);
-       scan_nat_page(sbi, page, nid);
-       f2fs_put_page(page, 1);
-
-       if (nm_i->nid_cnt[FREE_NID_LIST] < MAX_FREE_NIDS) {
-               i++;
-               goto check_partial;
-       }
-out:
-       up_read(&nm_i->nat_tree_lock);
-       return 0;
-}
-
 static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
 {
        struct f2fs_nm_info *nm_i = NM_I(sbi);
@@ -1980,21 +1940,6 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
 
                if (nm_i->nid_cnt[FREE_NID_LIST])
                        return;
-
-               /* try to find free nids with nat_bits */
-               if (!scan_nat_bits(sbi) && nm_i->nid_cnt[FREE_NID_LIST])
-                       return;
-       }
-
-       /* find next valid candidate */
-       if (enabled_nat_bits(sbi, NULL)) {
-               int idx = find_next_zero_bit_le(nm_i->full_nat_bits,
-                                       nm_i->nat_blocks, 0);
-
-               if (idx >= nm_i->nat_blocks)
-                       set_sbi_flag(sbi, SBI_NEED_FSCK);
-               else
-                       nid = idx * NAT_ENTRY_PER_BLOCK;
        }
 
        /* readahead nat pages to be scanned */
@@ -2081,7 +2026,7 @@ retry:
                __insert_nid_to_list(sbi, i, ALLOC_NID_LIST, false);
                nm_i->available_nids--;
 
-               update_free_nid_bitmap(sbi, *nid, false);
+               update_free_nid_bitmap(sbi, *nid, false, false, false);
 
                spin_unlock(&nm_i->nid_list_lock);
                return true;
@@ -2137,7 +2082,7 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
 
        nm_i->available_nids++;
 
-       update_free_nid_bitmap(sbi, nid, true);
+       update_free_nid_bitmap(sbi, nid, true, false, false);
 
        spin_unlock(&nm_i->nid_list_lock);
 
@@ -2383,7 +2328,7 @@ add_out:
        list_add_tail(&nes->set_list, head);
 }
 
-void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
+static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
                                                struct page *page)
 {
        struct f2fs_nm_info *nm_i = NM_I(sbi);
@@ -2402,16 +2347,16 @@ void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
                        valid++;
        }
        if (valid == 0) {
-               set_bit_le(nat_index, nm_i->empty_nat_bits);
-               clear_bit_le(nat_index, nm_i->full_nat_bits);
+               __set_bit_le(nat_index, nm_i->empty_nat_bits);
+               __clear_bit_le(nat_index, nm_i->full_nat_bits);
                return;
        }
 
-       clear_bit_le(nat_index, nm_i->empty_nat_bits);
+       __clear_bit_le(nat_index, nm_i->empty_nat_bits);
        if (valid == NAT_ENTRY_PER_BLOCK)
-               set_bit_le(nat_index, nm_i->full_nat_bits);
+               __set_bit_le(nat_index, nm_i->full_nat_bits);
        else
-               clear_bit_le(nat_index, nm_i->full_nat_bits);
+               __clear_bit_le(nat_index, nm_i->full_nat_bits);
 }
 
 static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
@@ -2467,11 +2412,11 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
                        add_free_nid(sbi, nid, false);
                        spin_lock(&NM_I(sbi)->nid_list_lock);
                        NM_I(sbi)->available_nids++;
-                       update_free_nid_bitmap(sbi, nid, true);
+                       update_free_nid_bitmap(sbi, nid, true, false, false);
                        spin_unlock(&NM_I(sbi)->nid_list_lock);
                } else {
                        spin_lock(&NM_I(sbi)->nid_list_lock);
-                       update_free_nid_bitmap(sbi, nid, false);
+                       update_free_nid_bitmap(sbi, nid, false, false, false);
                        spin_unlock(&NM_I(sbi)->nid_list_lock);
                }
        }
@@ -2577,6 +2522,40 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
        return 0;
 }
 
+inline void load_free_nid_bitmap(struct f2fs_sb_info *sbi)
+{
+       struct f2fs_nm_info *nm_i = NM_I(sbi);
+       unsigned int i = 0;
+       nid_t nid, last_nid;
+
+       if (!enabled_nat_bits(sbi, NULL))
+               return;
+
+       for (i = 0; i < nm_i->nat_blocks; i++) {
+               i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
+               if (i >= nm_i->nat_blocks)
+                       break;
+
+               __set_bit_le(i, nm_i->nat_block_bitmap);
+
+               nid = i * NAT_ENTRY_PER_BLOCK;
+               last_nid = (i + 1) * NAT_ENTRY_PER_BLOCK;
+
+               spin_lock(&nm_i->free_nid_lock);
+               for (; nid < last_nid; nid++)
+                       update_free_nid_bitmap(sbi, nid, true, true, true);
+               spin_unlock(&nm_i->free_nid_lock);
+       }
+
+       for (i = 0; i < nm_i->nat_blocks; i++) {
+               i = find_next_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
+               if (i >= nm_i->nat_blocks)
+                       break;
+
+               __set_bit_le(i, nm_i->nat_block_bitmap);
+       }
+}
+
 static int init_node_manager(struct f2fs_sb_info *sbi)
 {
        struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi);
@@ -2638,7 +2617,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
        return 0;
 }
 
-int init_free_nid_cache(struct f2fs_sb_info *sbi)
+static int init_free_nid_cache(struct f2fs_sb_info *sbi)
 {
        struct f2fs_nm_info *nm_i = NM_I(sbi);
 
@@ -2651,6 +2630,14 @@ int init_free_nid_cache(struct f2fs_sb_info *sbi)
                                                                GFP_KERNEL);
        if (!nm_i->nat_block_bitmap)
                return -ENOMEM;
+
+       nm_i->free_nid_count = f2fs_kvzalloc(nm_i->nat_blocks *
+                                       sizeof(unsigned short), GFP_KERNEL);
+       if (!nm_i->free_nid_count)
+               return -ENOMEM;
+
+       spin_lock_init(&nm_i->free_nid_lock);
+
        return 0;
 }
 
@@ -2670,6 +2657,9 @@ int build_node_manager(struct f2fs_sb_info *sbi)
        if (err)
                return err;
 
+       /* load free nid status from nat_bits table */
+       load_free_nid_bitmap(sbi);
+
        build_free_nids(sbi, true, true);
        return 0;
 }
@@ -2730,6 +2720,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
 
        kvfree(nm_i->nat_block_bitmap);
        kvfree(nm_i->free_nid_bitmap);
+       kvfree(nm_i->free_nid_count);
 
        kfree(nm_i->nat_bitmap);
        kfree(nm_i->nat_bits);
index 4bd7a8b19332d176d78b0a40c24e7bb12bbe2f5e..29ef7088c5582a480b6a1f7965fbbcca4f07e24e 100644 (file)
@@ -1163,6 +1163,12 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
                if (f2fs_discard_en(sbi) &&
                        !f2fs_test_and_set_bit(offset, se->discard_map))
                        sbi->discard_blks--;
+
+               /* don't overwrite by SSR to keep node chain */
+               if (se->type == CURSEG_WARM_NODE) {
+                       if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map))
+                               se->ckpt_valid_blocks++;
+               }
        } else {
                if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map)) {
 #ifdef CONFIG_F2FS_CHECK_FS
index ef600591d96f9a42be98699025f4cf94ef8e7762..63ee2940775ce9c16daca5c2f7590e0c6e57bc07 100644 (file)
@@ -173,19 +173,33 @@ static void wb_wakeup(struct bdi_writeback *wb)
        spin_unlock_bh(&wb->work_lock);
 }
 
+static void finish_writeback_work(struct bdi_writeback *wb,
+                                 struct wb_writeback_work *work)
+{
+       struct wb_completion *done = work->done;
+
+       if (work->auto_free)
+               kfree(work);
+       if (done && atomic_dec_and_test(&done->cnt))
+               wake_up_all(&wb->bdi->wb_waitq);
+}
+
 static void wb_queue_work(struct bdi_writeback *wb,
                          struct wb_writeback_work *work)
 {
        trace_writeback_queue(wb, work);
 
-       spin_lock_bh(&wb->work_lock);
-       if (!test_bit(WB_registered, &wb->state))
-               goto out_unlock;
        if (work->done)
                atomic_inc(&work->done->cnt);
-       list_add_tail(&work->list, &wb->work_list);
-       mod_delayed_work(bdi_wq, &wb->dwork, 0);
-out_unlock:
+
+       spin_lock_bh(&wb->work_lock);
+
+       if (test_bit(WB_registered, &wb->state)) {
+               list_add_tail(&work->list, &wb->work_list);
+               mod_delayed_work(bdi_wq, &wb->dwork, 0);
+       } else
+               finish_writeback_work(wb, work);
+
        spin_unlock_bh(&wb->work_lock);
 }
 
@@ -1873,16 +1887,9 @@ static long wb_do_writeback(struct bdi_writeback *wb)
 
        set_bit(WB_writeback_running, &wb->state);
        while ((work = get_next_work_item(wb)) != NULL) {
-               struct wb_completion *done = work->done;
-
                trace_writeback_exec(wb, work);
-
                wrote += wb_writeback(wb, work);
-
-               if (work->auto_free)
-                       kfree(work);
-               if (done && atomic_dec_and_test(&done->cnt))
-                       wake_up_all(&wb->bdi->wb_waitq);
+               finish_writeback_work(wb, work);
        }
 
        /*
index bb79972dc638ba8bf27beef1930deeb186820af5..773774531aff5fc081610706ea39756b0e5a5c25 100644 (file)
@@ -232,12 +232,12 @@ static struct svc_serv_ops nfs41_cb_sv_ops = {
        .svo_module             = THIS_MODULE,
 };
 
-struct svc_serv_ops *nfs4_cb_sv_ops[] = {
+static struct svc_serv_ops *nfs4_cb_sv_ops[] = {
        [0] = &nfs40_cb_sv_ops,
        [1] = &nfs41_cb_sv_ops,
 };
 #else
-struct svc_serv_ops *nfs4_cb_sv_ops[] = {
+static struct svc_serv_ops *nfs4_cb_sv_ops[] = {
        [0] = &nfs40_cb_sv_ops,
        [1] = NULL,
 };
index 91a8d610ba0fa6db7cc76458ec2514aec9b124db..390ada8741bcbfd2e4aaecb3f759ec0707003674 100644 (file)
@@ -325,10 +325,33 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
        return NULL;
 }
 
-static bool nfs_client_init_is_complete(const struct nfs_client *clp)
+/*
+ * Return true if @clp is done initializing, false if still working on it.
+ *
+ * Use nfs_client_init_status to check if it was successful.
+ */
+bool nfs_client_init_is_complete(const struct nfs_client *clp)
 {
        return clp->cl_cons_state <= NFS_CS_READY;
 }
+EXPORT_SYMBOL_GPL(nfs_client_init_is_complete);
+
+/*
+ * Return 0 if @clp was successfully initialized, -errno otherwise.
+ *
+ * This must be called *after* nfs_client_init_is_complete() returns true,
+ * otherwise it will pop WARN_ON_ONCE and return -EINVAL
+ */
+int nfs_client_init_status(const struct nfs_client *clp)
+{
+       /* called without checking nfs_client_init_is_complete */
+       if (clp->cl_cons_state > NFS_CS_READY) {
+               WARN_ON_ONCE(1);
+               return -EINVAL;
+       }
+       return clp->cl_cons_state;
+}
+EXPORT_SYMBOL_GPL(nfs_client_init_status);
 
 int nfs_wait_client_init_complete(const struct nfs_client *clp)
 {
index f956ca20a8a3595e36e6cae0e913dc90a47b1e22..d913e818858f3fee8d7d5c199714d2d79b1bef39 100644 (file)
@@ -266,6 +266,7 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
        struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg);
        struct nfs4_pnfs_ds *ret = ds;
        struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode);
+       int status;
 
        if (ds == NULL) {
                printk(KERN_ERR "NFS: %s: No data server for offset index %d\n",
@@ -277,9 +278,14 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
        if (ds->ds_clp)
                goto out_test_devid;
 
-       nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo,
+       status = nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo,
                             dataserver_retrans, 4,
                             s->nfs_client->cl_minorversion);
+       if (status) {
+               nfs4_mark_deviceid_unavailable(devid);
+               ret = NULL;
+               goto out;
+       }
 
 out_test_devid:
        if (ret->ds_clp == NULL ||
index f4f39b0ab09b25170ed1f9f9a9a961ecadb9a5d2..98b34c9b0564b348615a0d560b863c11cd17ad5e 100644 (file)
@@ -175,7 +175,19 @@ ff_layout_no_read_on_rw(struct pnfs_layout_segment *lseg)
 static inline bool
 ff_layout_test_devid_unavailable(struct nfs4_deviceid_node *node)
 {
-       return nfs4_test_deviceid_unavailable(node);
+       /*
+        * Flexfiles should never mark a DS unavailable, but if it does
+        * print a (ratelimited) warning as this can affect performance.
+        */
+       if (nfs4_test_deviceid_unavailable(node)) {
+               u32 *p = (u32 *)node->deviceid.data;
+
+               pr_warn_ratelimited("NFS: flexfiles layout referencing an "
+                               "unavailable device [%x%x%x%x]\n",
+                               p[0], p[1], p[2], p[3]);
+               return true;
+       }
+       return false;
 }
 
 static inline int
index e5a6f248697b369003e89ed526608d7cd2a296eb..85fde93dff774e7edf619bffe43657b9c2346034 100644 (file)
@@ -384,6 +384,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
        struct inode *ino = lseg->pls_layout->plh_inode;
        struct nfs_server *s = NFS_SERVER(ino);
        unsigned int max_payload;
+       int status;
 
        if (!ff_layout_mirror_valid(lseg, mirror, true)) {
                pr_err_ratelimited("NFS: %s: No data server for offset index %d\n",
@@ -404,7 +405,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
        /* FIXME: For now we assume the server sent only one version of NFS
         * to use for the DS.
         */
-       nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo,
+       status = nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo,
                             dataserver_retrans,
                             mirror->mirror_ds->ds_versions[0].version,
                             mirror->mirror_ds->ds_versions[0].minor_version);
@@ -420,11 +421,11 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
                        mirror->mirror_ds->ds_versions[0].wsize = max_payload;
                goto out;
        }
+out_fail:
        ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout),
                                 mirror, lseg->pls_range.offset,
                                 lseg->pls_range.length, NFS4ERR_NXIO,
                                 OP_ILLEGAL, GFP_NOIO);
-out_fail:
        if (fail_return || !ff_layout_has_available_ds(lseg))
                pnfs_error_mark_layout_for_return(ino, lseg);
        ds = NULL;
index 09ca5095c04e427c881785170aefe7fdf58e7621..7b38fedb7e032824ec509edca5cf465a22147851 100644 (file)
@@ -186,6 +186,8 @@ extern struct nfs_server *nfs_clone_server(struct nfs_server *,
                                           struct nfs_fh *,
                                           struct nfs_fattr *,
                                           rpc_authflavor_t);
+extern bool nfs_client_init_is_complete(const struct nfs_client *clp);
+extern int nfs_client_init_status(const struct nfs_client *clp);
 extern int nfs_wait_client_init_complete(const struct nfs_client *clp);
 extern void nfs_mark_client_ready(struct nfs_client *clp, int state);
 extern struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
index 5ae9d64ea08bc80c97c7c4c5b71ee73ef1a6ba8b..8346ccbf2d52e518b6fa61d0c8cbb3d033ec1f02 100644 (file)
@@ -1023,9 +1023,9 @@ static void nfs4_session_set_rwsize(struct nfs_server *server)
        server_resp_sz = sess->fc_attrs.max_resp_sz - nfs41_maxread_overhead;
        server_rqst_sz = sess->fc_attrs.max_rqst_sz - nfs41_maxwrite_overhead;
 
-       if (server->rsize > server_resp_sz)
+       if (!server->rsize || server->rsize > server_resp_sz)
                server->rsize = server_resp_sz;
-       if (server->wsize > server_rqst_sz)
+       if (!server->wsize || server->wsize > server_rqst_sz)
                server->wsize = server_rqst_sz;
 #endif /* CONFIG_NFS_V4_1 */
 }
index 1b183686c6d4f06c3b1d4ed044c527bff6ba4a83..c780d98035ccf79573c47ac8fb46b8f06a17653e 100644 (file)
@@ -2258,8 +2258,6 @@ static int nfs4_opendata_access(struct rpc_cred *cred,
        if ((mask & ~cache.mask & (MAY_READ | MAY_EXEC)) == 0)
                return 0;
 
-       /* even though OPEN succeeded, access is denied. Close the file */
-       nfs4_close_state(state, fmode);
        return -EACCES;
 }
 
@@ -7427,11 +7425,11 @@ static void nfs4_exchange_id_release(void *data)
        struct nfs41_exchange_id_data *cdata =
                                        (struct nfs41_exchange_id_data *)data;
 
-       nfs_put_client(cdata->args.client);
        if (cdata->xprt) {
                xprt_put(cdata->xprt);
                rpc_clnt_xprt_switch_put(cdata->args.client->cl_rpcclient);
        }
+       nfs_put_client(cdata->args.client);
        kfree(cdata->res.impl_id);
        kfree(cdata->res.server_scope);
        kfree(cdata->res.server_owner);
@@ -7538,10 +7536,8 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
        task_setup_data.callback_data = calldata;
 
        task = rpc_run_task(&task_setup_data);
-       if (IS_ERR(task)) {
-       status = PTR_ERR(task);
-               goto out_impl_id;
-       }
+       if (IS_ERR(task))
+               return PTR_ERR(task);
 
        if (!xprt) {
                status = rpc_wait_for_completion_task(task);
@@ -7569,6 +7565,7 @@ out_server_owner:
        kfree(calldata->res.server_owner);
 out_calldata:
        kfree(calldata);
+       nfs_put_client(clp);
        goto out;
 }
 
index f0369e36275341404db0684aebb4e9bdba273205..80ce289eea05326336a7edecbe8a132ee4900d23 100644 (file)
@@ -3942,7 +3942,7 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap,
                if (len <= 0)
                        goto out;
                dprintk("%s: name=%s\n", __func__, group_name->data);
-               return NFS_ATTR_FATTR_OWNER_NAME;
+               return NFS_ATTR_FATTR_GROUP_NAME;
        } else {
                len = xdr_stream_decode_opaque_inline(xdr, (void **)&p,
                                XDR_MAX_NETOBJ);
index 63f77b49a586a53a1abbcf7b517aa2a90f3ddb2e..590e1e35781f0b737b5b277d76ab56092f8e3f3b 100644 (file)
@@ -367,7 +367,7 @@ void nfs4_pnfs_ds_put(struct nfs4_pnfs_ds *ds);
 struct nfs4_pnfs_ds *nfs4_pnfs_ds_add(struct list_head *dsaddrs,
                                      gfp_t gfp_flags);
 void nfs4_pnfs_v3_ds_connect_unload(void);
-void nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
+int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
                          struct nfs4_deviceid_node *devid, unsigned int timeo,
                          unsigned int retrans, u32 version, u32 minor_version);
 struct nfs4_pnfs_ds_addr *nfs4_decode_mp_ds_addr(struct net *net,
index 9414b492439fbf0e70d32f9238ac29b8e9cf50be..7250b95549ecc73bd1dbdae9ec909aac64f93a49 100644 (file)
@@ -745,15 +745,17 @@ out:
 /*
  * Create an rpc connection to the nfs4_pnfs_ds data server.
  * Currently only supports IPv4 and IPv6 addresses.
- * If connection fails, make devid unavailable.
+ * If connection fails, make devid unavailable and return a -errno.
  */
-void nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
+int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
                          struct nfs4_deviceid_node *devid, unsigned int timeo,
                          unsigned int retrans, u32 version, u32 minor_version)
 {
-       if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) {
-               int err = 0;
+       int err;
 
+again:
+       err = 0;
+       if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) {
                if (version == 3) {
                        err = _nfs4_pnfs_v3_ds_connect(mds_srv, ds, timeo,
                                                       retrans);
@@ -766,12 +768,29 @@ void nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
                        err = -EPROTONOSUPPORT;
                }
 
-               if (err)
-                       nfs4_mark_deviceid_unavailable(devid);
                nfs4_clear_ds_conn_bit(ds);
        } else {
                nfs4_wait_ds_connect(ds);
+
+               /* what was waited on didn't connect AND didn't mark unavail */
+               if (!ds->ds_clp && !nfs4_test_deviceid_unavailable(devid))
+                       goto again;
        }
+
+       /*
+        * At this point the ds->ds_clp should be ready, but it might have
+        * hit an error.
+        */
+       if (!err) {
+               if (!ds->ds_clp || !nfs_client_init_is_complete(ds->ds_clp)) {
+                       WARN_ON_ONCE(ds->ds_clp ||
+                               !nfs4_test_deviceid_unavailable(devid));
+                       return -EINVAL;
+               }
+               err = nfs_client_init_status(ds->ds_clp);
+       }
+
+       return err;
 }
 EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_connect);
 
index e75b056f46f43583b84da4a423cbafedb850c630..abb2c8a3be42e4755f747c62a1cec5466f13ee77 100644 (file)
@@ -1784,7 +1784,8 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
                        (long long)req_offset(req));
                if (status < 0) {
                        nfs_context_set_write_error(req->wb_context, status);
-                       nfs_inode_remove_request(req);
+                       if (req->wb_page)
+                               nfs_inode_remove_request(req);
                        dprintk_cont(", error = %d\n", status);
                        goto next;
                }
@@ -1793,7 +1794,8 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
                 * returned by the server against all stored verfs. */
                if (!nfs_write_verifier_cmp(&req->wb_verf, &data->verf.verifier)) {
                        /* We have a match */
-                       nfs_inode_remove_request(req);
+                       if (req->wb_page)
+                               nfs_inode_remove_request(req);
                        dprintk_cont(" OK\n");
                        goto next;
                }
index e2112270d75a5f878e291bb5bb681474e3c4eeaf..9287d3a96e35582af28d880e7f1b9bc903ef205b 100644 (file)
@@ -409,7 +409,7 @@ int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time)
        int retval, i, timed_out = 0;
        u64 slack = 0;
        unsigned int busy_flag = net_busy_loop_on() ? POLL_BUSY_LOOP : 0;
-       unsigned long busy_end = 0;
+       unsigned long busy_start = 0;
 
        rcu_read_lock();
        retval = max_select_fd(n, fds);
@@ -512,11 +512,11 @@ int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time)
 
                /* only if found POLL_BUSY_LOOP sockets && not out of time */
                if (can_busy_loop && !need_resched()) {
-                       if (!busy_end) {
-                               busy_end = busy_loop_end_time();
+                       if (!busy_start) {
+                               busy_start = busy_loop_current_time();
                                continue;
                        }
-                       if (!busy_loop_timeout(busy_end))
+                       if (!busy_loop_timeout(busy_start))
                                continue;
                }
                busy_flag = 0;
@@ -800,7 +800,7 @@ static int do_poll(struct poll_list *list, struct poll_wqueues *wait,
        int timed_out = 0, count = 0;
        u64 slack = 0;
        unsigned int busy_flag = net_busy_loop_on() ? POLL_BUSY_LOOP : 0;
-       unsigned long busy_end = 0;
+       unsigned long busy_start = 0;
 
        /* Optimise the no-wait case */
        if (end_time && !end_time->tv_sec && !end_time->tv_nsec) {
@@ -853,11 +853,11 @@ static int do_poll(struct poll_list *list, struct poll_wqueues *wait,
 
                /* only if found POLL_BUSY_LOOP sockets && not out of time */
                if (can_busy_loop && !need_resched()) {
-                       if (!busy_end) {
-                               busy_end = busy_loop_end_time();
+                       if (!busy_start) {
+                               busy_start = busy_loop_current_time();
                                continue;
                        }
-                       if (!busy_loop_timeout(busy_end))
+                       if (!busy_loop_timeout(busy_start))
                                continue;
                }
                busy_flag = 0;
index d04547fcf274af0eaee18096c94b22652551b9f7..eb00bc133bca673c556eb85a18385bbc3748dfcf 100644 (file)
@@ -125,6 +125,8 @@ extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino);
 extern int xfs_dir2_sf_lookup(struct xfs_da_args *args);
 extern int xfs_dir2_sf_removename(struct xfs_da_args *args);
 extern int xfs_dir2_sf_replace(struct xfs_da_args *args);
+extern int xfs_dir2_sf_verify(struct xfs_mount *mp, struct xfs_dir2_sf_hdr *sfp,
+               int size);
 
 /* xfs_dir2_readdir.c */
 extern int xfs_readdir(struct xfs_inode *dp, struct dir_context *ctx,
index c6809ff41197d934c068e84b19eb77986bc7dccf..96b45cd6c63f0686d3c1cce5c41b232f0ab82080 100644 (file)
@@ -629,6 +629,93 @@ xfs_dir2_sf_check(
 }
 #endif /* DEBUG */
 
+/* Verify the consistency of an inline directory. */
+int
+xfs_dir2_sf_verify(
+       struct xfs_mount                *mp,
+       struct xfs_dir2_sf_hdr          *sfp,
+       int                             size)
+{
+       struct xfs_dir2_sf_entry        *sfep;
+       struct xfs_dir2_sf_entry        *next_sfep;
+       char                            *endp;
+       const struct xfs_dir_ops        *dops;
+       xfs_ino_t                       ino;
+       int                             i;
+       int                             i8count;
+       int                             offset;
+       __uint8_t                       filetype;
+
+       dops = xfs_dir_get_ops(mp, NULL);
+
+       /*
+        * Give up if the directory is way too short.
+        */
+       XFS_WANT_CORRUPTED_RETURN(mp, size >
+                       offsetof(struct xfs_dir2_sf_hdr, parent));
+       XFS_WANT_CORRUPTED_RETURN(mp, size >=
+                       xfs_dir2_sf_hdr_size(sfp->i8count));
+
+       endp = (char *)sfp + size;
+
+       /* Check .. entry */
+       ino = dops->sf_get_parent_ino(sfp);
+       i8count = ino > XFS_DIR2_MAX_SHORT_INUM;
+       XFS_WANT_CORRUPTED_RETURN(mp, !xfs_dir_ino_validate(mp, ino));
+       offset = dops->data_first_offset;
+
+       /* Check all reported entries */
+       sfep = xfs_dir2_sf_firstentry(sfp);
+       for (i = 0; i < sfp->count; i++) {
+               /*
+                * struct xfs_dir2_sf_entry has a variable length.
+                * Check the fixed-offset parts of the structure are
+                * within the data buffer.
+                */
+               XFS_WANT_CORRUPTED_RETURN(mp,
+                               ((char *)sfep + sizeof(*sfep)) < endp);
+
+               /* Don't allow names with known bad length. */
+               XFS_WANT_CORRUPTED_RETURN(mp, sfep->namelen > 0);
+               XFS_WANT_CORRUPTED_RETURN(mp, sfep->namelen < MAXNAMELEN);
+
+               /*
+                * Check that the variable-length part of the structure is
+                * within the data buffer.  The next entry starts after the
+                * name component, so nextentry is an acceptable test.
+                */
+               next_sfep = dops->sf_nextentry(sfp, sfep);
+               XFS_WANT_CORRUPTED_RETURN(mp, endp >= (char *)next_sfep);
+
+               /* Check that the offsets always increase. */
+               XFS_WANT_CORRUPTED_RETURN(mp,
+                               xfs_dir2_sf_get_offset(sfep) >= offset);
+
+               /* Check the inode number. */
+               ino = dops->sf_get_ino(sfp, sfep);
+               i8count += ino > XFS_DIR2_MAX_SHORT_INUM;
+               XFS_WANT_CORRUPTED_RETURN(mp, !xfs_dir_ino_validate(mp, ino));
+
+               /* Check the file type. */
+               filetype = dops->sf_get_ftype(sfep);
+               XFS_WANT_CORRUPTED_RETURN(mp, filetype < XFS_DIR3_FT_MAX);
+
+               offset = xfs_dir2_sf_get_offset(sfep) +
+                               dops->data_entsize(sfep->namelen);
+
+               sfep = next_sfep;
+       }
+       XFS_WANT_CORRUPTED_RETURN(mp, i8count == sfp->i8count);
+       XFS_WANT_CORRUPTED_RETURN(mp, (void *)sfep == (void *)endp);
+
+       /* Make sure this whole thing ought to be in local format. */
+       XFS_WANT_CORRUPTED_RETURN(mp, offset +
+              (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
+              (uint)sizeof(xfs_dir2_block_tail_t) <= mp->m_dir_geo->blksize);
+
+       return 0;
+}
+
 /*
  * Create a new (shortform) directory.
  */
index 25c1e078aef6a5925c12f2cc91b0d18b8b38711b..9653e964eda4f99ca611bb2cb6449a470be45d48 100644 (file)
@@ -33,6 +33,8 @@
 #include "xfs_trace.h"
 #include "xfs_attr_sf.h"
 #include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_dir2_priv.h"
 
 kmem_zone_t *xfs_ifork_zone;
 
@@ -320,6 +322,7 @@ xfs_iformat_local(
        int             whichfork,
        int             size)
 {
+       int             error;
 
        /*
         * If the size is unreasonable, then something
@@ -336,6 +339,14 @@ xfs_iformat_local(
                return -EFSCORRUPTED;
        }
 
+       if (S_ISDIR(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK) {
+               error = xfs_dir2_sf_verify(ip->i_mount,
+                               (struct xfs_dir2_sf_hdr *)XFS_DFORK_DPTR(dip),
+                               size);
+               if (error)
+                       return error;
+       }
+
        xfs_init_local_fork(ip, whichfork, XFS_DFORK_PTR(dip, whichfork), size);
        return 0;
 }
@@ -856,7 +867,7 @@ xfs_iextents_copy(
  * In these cases, the format always takes precedence, because the
  * format indicates the current state of the fork.
  */
-void
+int
 xfs_iflush_fork(
        xfs_inode_t             *ip,
        xfs_dinode_t            *dip,
@@ -866,6 +877,7 @@ xfs_iflush_fork(
        char                    *cp;
        xfs_ifork_t             *ifp;
        xfs_mount_t             *mp;
+       int                     error;
        static const short      brootflag[2] =
                { XFS_ILOG_DBROOT, XFS_ILOG_ABROOT };
        static const short      dataflag[2] =
@@ -874,7 +886,7 @@ xfs_iflush_fork(
                { XFS_ILOG_DEXT, XFS_ILOG_AEXT };
 
        if (!iip)
-               return;
+               return 0;
        ifp = XFS_IFORK_PTR(ip, whichfork);
        /*
         * This can happen if we gave up in iformat in an error path,
@@ -882,12 +894,19 @@ xfs_iflush_fork(
         */
        if (!ifp) {
                ASSERT(whichfork == XFS_ATTR_FORK);
-               return;
+               return 0;
        }
        cp = XFS_DFORK_PTR(dip, whichfork);
        mp = ip->i_mount;
        switch (XFS_IFORK_FORMAT(ip, whichfork)) {
        case XFS_DINODE_FMT_LOCAL:
+               if (S_ISDIR(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK) {
+                       error = xfs_dir2_sf_verify(mp,
+                                       (struct xfs_dir2_sf_hdr *)ifp->if_u1.if_data,
+                                       ifp->if_bytes);
+                       if (error)
+                               return error;
+               }
                if ((iip->ili_fields & dataflag[whichfork]) &&
                    (ifp->if_bytes > 0)) {
                        ASSERT(ifp->if_u1.if_data != NULL);
@@ -940,6 +959,7 @@ xfs_iflush_fork(
                ASSERT(0);
                break;
        }
+       return 0;
 }
 
 /*
index 7fb8365326d1a745583c4f133bc5a63668316b33..132dc59fdde6942cd22fca4ae11b8adbc193f051 100644 (file)
@@ -140,7 +140,7 @@ typedef struct xfs_ifork {
 struct xfs_ifork *xfs_iext_state_to_fork(struct xfs_inode *ip, int state);
 
 int            xfs_iformat_fork(struct xfs_inode *, struct xfs_dinode *);
-void           xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *,
+int            xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *,
                                struct xfs_inode_log_item *, int);
 void           xfs_idestroy_fork(struct xfs_inode *, int);
 void           xfs_idata_realloc(struct xfs_inode *, int, int);
index 003a99b83bd8845e22d6311be1d474679521242d..ad9396e516f6e389b88bca5dc2dc41d3372ed714 100644 (file)
@@ -71,22 +71,11 @@ xfs_dir2_sf_getdents(
        struct xfs_da_geometry  *geo = args->geo;
 
        ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
-       /*
-        * Give up if the directory is way too short.
-        */
-       if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
-               ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
-               return -EIO;
-       }
-
        ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
        ASSERT(dp->i_df.if_u1.if_data != NULL);
 
        sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
 
-       if (dp->i_d.di_size < xfs_dir2_sf_hdr_size(sfp->i8count))
-               return -EFSCORRUPTED;
-
        /*
         * If the block number in the offset is out of range, we're done.
         */
index 7eaf1ef74e3c63ebb3c640e32d2db87864984a4a..c7fe2c2123ab8375caf0e0349a454ed8b2762095 100644 (file)
@@ -3475,6 +3475,7 @@ xfs_iflush_int(
        struct xfs_inode_log_item *iip = ip->i_itemp;
        struct xfs_dinode       *dip;
        struct xfs_mount        *mp = ip->i_mount;
+       int                     error;
 
        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
        ASSERT(xfs_isiflocked(ip));
@@ -3557,9 +3558,14 @@ xfs_iflush_int(
        if (ip->i_d.di_flushiter == DI_MAX_FLUSH)
                ip->i_d.di_flushiter = 0;
 
-       xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK);
-       if (XFS_IFORK_Q(ip))
-               xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK);
+       error = xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK);
+       if (error)
+               return error;
+       if (XFS_IFORK_Q(ip)) {
+               error = xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK);
+               if (error)
+                       return error;
+       }
        xfs_inobp_check(mp, bp);
 
        /*
index 673acda012af44efe4fb5a7fc5279d08e416cc86..9b05886f9773cde8439a0c3e21b39ad29460c440 100644 (file)
@@ -287,18 +287,15 @@ static inline bool invalid_phys_cpuid(phys_cpuid_t phys_id)
 }
 
 /* Validate the processor object's proc_id */
-bool acpi_processor_validate_proc_id(int proc_id);
+bool acpi_duplicate_processor_id(int proc_id);
 
 #ifdef CONFIG_ACPI_HOTPLUG_CPU
 /* Arch dependent functions for cpu hotplug support */
 int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id,
                 int *pcpu);
 int acpi_unmap_cpu(int cpu);
-int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid);
 #endif /* CONFIG_ACPI_HOTPLUG_CPU */
 
-void acpi_set_processor_mapping(void);
-
 #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
 int acpi_get_ioapic_id(acpi_handle handle, u32 gsi_base, u64 *phys_addr);
 #endif
index 909fc033173a7c893ffe7113f0e32568392b76ae..bbb513da5075724f4ed2054635bbefaa621107b2 100644 (file)
@@ -35,6 +35,7 @@ struct bpf_map_ops {
        void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
                                int fd);
        void (*map_fd_put_ptr)(void *ptr);
+       u32 (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf);
 };
 
 struct bpf_map {
@@ -49,6 +50,7 @@ struct bpf_map {
        const struct bpf_map_ops *ops;
        struct work_struct work;
        atomic_t usercnt;
+       struct bpf_map *inner_map_meta;
 };
 
 struct bpf_map_type_list {
@@ -167,6 +169,8 @@ struct bpf_verifier_ops {
                                  const struct bpf_insn *src,
                                  struct bpf_insn *dst,
                                  struct bpf_prog *prog);
+       int (*test_run)(struct bpf_prog *prog, const union bpf_attr *kattr,
+                       union bpf_attr __user *uattr);
 };
 
 struct bpf_prog_type_list {
@@ -231,6 +235,11 @@ typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src,
 u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
                     void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy);
 
+int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
+                         union bpf_attr __user *uattr);
+int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
+                         union bpf_attr __user *uattr);
+
 #ifdef CONFIG_BPF_SYSCALL
 DECLARE_PER_CPU(int, bpf_prog_active);
 
@@ -275,6 +284,8 @@ int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value);
 int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
                                 void *key, void *value, u64 map_flags);
 void bpf_fd_array_map_clear(struct bpf_map *map);
+int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file,
+                               void *key, void *value, u64 map_flags);
 
 /* memcpy that is used with 8-byte aligned pointers, power-of-8 size and
  * forced to use 'long' read/writes to try to atomically copy long counters.
index a13b031dc6b807f38c0e7e687ba2cfbfe4c65fb7..5efb4db44e1ef3223d984296ccf1ca0737224d10 100644 (file)
@@ -66,7 +66,10 @@ struct bpf_verifier_state_list {
 };
 
 struct bpf_insn_aux_data {
-       enum bpf_reg_type ptr_type;     /* pointer type for load/store insns */
+       union {
+               enum bpf_reg_type ptr_type;     /* pointer type for load/store insns */
+               struct bpf_map *map_ptr;        /* pointer for call insn into lookup_elem */
+       };
 };
 
 #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
index 55e517130311980b36ad628054043130f88626af..abcda9b458ab65143acb1fb28b50225adbec83fd 100644 (file)
@@ -25,6 +25,9 @@
 #define PHY_ID_BCM57780                        0x03625d90
 
 #define PHY_ID_BCM7250                 0xae025280
+#define PHY_ID_BCM7260                 0xae025190
+#define PHY_ID_BCM7268                 0xae025090
+#define PHY_ID_BCM7271                 0xae0253b0
 #define PHY_ID_BCM7278                 0xae0251a0
 #define PHY_ID_BCM7364                 0xae025260
 #define PHY_ID_BCM7366                 0x600d8490
index 30c4570e928dfe871bc84382f14eb49b5cac018e..9ef518af5515a01e202dee3cf4c27ffcd8c56441 100644 (file)
@@ -1140,7 +1140,6 @@ static inline bool device_supports_offline(struct device *dev)
 extern void lock_device_hotplug(void);
 extern void unlock_device_hotplug(void);
 extern int lock_device_hotplug_sysfs(void);
-void assert_held_device_hotplug(void);
 extern int device_offline(struct device *dev);
 extern int device_online(struct device *dev);
 extern void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode);
index 9ca23fcfb5d73131b564ad81d887929abc0e954b..6fdfc884fdeb3d3cf81dcbd40c52c0b8c8d203b1 100644 (file)
@@ -20,6 +20,8 @@ struct sock_exterr_skb {
        struct sock_extended_err        ee;
        u16                             addr_offset;
        __be16                          port;
+       u8                              opt_stats:1,
+                                       unused:7;
 };
 
 #endif
index c62b709b1ce087b7891f5d9c76aa2940b7f4a9a9..2d9f80848d4bd2b6e60dc06f1053ba91256c37ac 100644 (file)
@@ -446,21 +446,6 @@ static inline void eth_addr_dec(u8 *addr)
        u64_to_ether_addr(u, addr);
 }
 
-/**
- * ether_addr_greater - Compare two Ethernet addresses
- * @addr1: Pointer to a six-byte array containing the Ethernet address
- * @addr2: Pointer other six-byte array containing the Ethernet address
- *
- * Compare two Ethernet addresses, returns true addr1 is greater than addr2
- */
-static inline bool ether_addr_greater(const u8 *addr1, const u8 *addr2)
-{
-       u64 u1 = ether_addr_to_u64(addr1);
-       u64 u2 = ether_addr_to_u64(addr2);
-
-       return u1 > u2;
-}
-
 /**
  * is_etherdev_addr - Tell if given Ethernet address belongs to the device.
  * @dev: Pointer to a device structure
index 9ded8c6d8176b909cf68da0e125eef4441b7c9a9..83cc9863444b078765255b9010b015578768be09 100644 (file)
@@ -60,6 +60,7 @@ enum ethtool_phys_id_state {
 enum {
        ETH_RSS_HASH_TOP_BIT, /* Configurable RSS hash function - Toeplitz */
        ETH_RSS_HASH_XOR_BIT, /* Configurable RSS hash function - Xor */
+       ETH_RSS_HASH_CRC32_BIT, /* Configurable RSS hash function - Crc32 */
 
        /*
         * Add your fresh new hash function bits above and remember to update
@@ -73,6 +74,7 @@ enum {
 
 #define ETH_RSS_HASH_TOP       __ETH_RSS_HASH(TOP)
 #define ETH_RSS_HASH_XOR       __ETH_RSS_HASH(XOR)
+#define ETH_RSS_HASH_CRC32     __ETH_RSS_HASH(CRC32)
 
 #define ETH_RSS_HASH_UNKNOWN   0
 #define ETH_RSS_HASH_NO_CHANGE 0
index fbf7b39e81035506b73ddc860e3029119104b255..511fe910bf1d5225a017234015b3455d0a46c2b9 100644 (file)
@@ -7,6 +7,7 @@
 #include <stdarg.h>
 
 #include <linux/atomic.h>
+#include <linux/refcount.h>
 #include <linux/compat.h>
 #include <linux/skbuff.h>
 #include <linux/linkage.h>
@@ -430,7 +431,7 @@ struct bpf_prog {
 };
 
 struct sk_filter {
-       atomic_t        refcnt;
+       refcount_t      refcnt;
        struct rcu_head rcu;
        struct bpf_prog *prog;
 };
@@ -693,6 +694,11 @@ static inline bool bpf_jit_is_ebpf(void)
 # endif
 }
 
+static inline bool ebpf_jit_enabled(void)
+{
+       return bpf_jit_enable && bpf_jit_is_ebpf();
+}
+
 static inline bool bpf_prog_ebpf_jited(const struct bpf_prog *fp)
 {
        return fp->jited && bpf_jit_is_ebpf();
@@ -753,6 +759,11 @@ void bpf_prog_kallsyms_del(struct bpf_prog *fp);
 
 #else /* CONFIG_BPF_JIT */
 
+static inline bool ebpf_jit_enabled(void)
+{
+       return false;
+}
+
 static inline bool bpf_prog_ebpf_jited(const struct bpf_prog *fp)
 {
        return false;
index 2484b2fcc6eb58d0139605359fe97b285df8e5f5..933d936566055de430f9db64ae152eb31785b7ff 100644 (file)
@@ -143,15 +143,6 @@ struct gpio_desc *devm_fwnode_get_index_gpiod_from_child(struct device *dev,
                                                struct fwnode_handle *child,
                                                enum gpiod_flags flags,
                                                const char *label);
-/* FIXME: delete this helper when users are switched over */
-static inline struct gpio_desc *devm_get_gpiod_from_child(struct device *dev,
-                         const char *con_id, struct fwnode_handle *child)
-{
-       return devm_fwnode_get_index_gpiod_from_child(dev, con_id,
-                                                     0, child,
-                                                     GPIOD_ASIS,
-                                                     "?");
-}
 
 #else /* CONFIG_GPIOLIB */
 
@@ -444,13 +435,6 @@ struct gpio_desc *devm_fwnode_get_index_gpiod_from_child(struct device *dev,
        return ERR_PTR(-ENOSYS);
 }
 
-/* FIXME: delete this when all users are switched over */
-static inline struct gpio_desc *devm_get_gpiod_from_child(struct device *dev,
-                         const char *con_id, struct fwnode_handle *child)
-{
-       return ERR_PTR(-ENOSYS);
-}
-
 #endif /* CONFIG_GPIOLIB */
 
 static inline
index 62bbf3c1aa4a04409fac1a001b03a87cf0162fd1..36162485d66310e803884aee213d65aac6a8b10d 100644 (file)
@@ -1504,14 +1504,6 @@ static inline  void hv_signal_on_read(struct vmbus_channel *channel)
        return;
 }
 
-static inline void
-init_cached_read_index(struct vmbus_channel *channel)
-{
-       struct hv_ring_buffer_info *rbi = &channel->inbound;
-
-       rbi->cached_read_index = rbi->ring_buffer->read_index;
-}
-
 /*
  * Mask off host interrupt callback notifications
  */
@@ -1545,76 +1537,48 @@ static inline u32 hv_end_read(struct hv_ring_buffer_info *rbi)
 /*
  * An API to support in-place processing of incoming VMBUS packets.
  */
-#define VMBUS_PKT_TRAILER      8
 
-static inline struct vmpacket_descriptor *
-get_next_pkt_raw(struct vmbus_channel *channel)
+/* Get data payload associated with descriptor */
+static inline void *hv_pkt_data(const struct vmpacket_descriptor *desc)
 {
-       struct hv_ring_buffer_info *ring_info = &channel->inbound;
-       u32 priv_read_loc = ring_info->priv_read_index;
-       void *ring_buffer = hv_get_ring_buffer(ring_info);
-       u32 dsize = ring_info->ring_datasize;
-       /*
-        * delta is the difference between what is available to read and
-        * what was already consumed in place. We commit read index after
-        * the whole batch is processed.
-        */
-       u32 delta = priv_read_loc >= ring_info->ring_buffer->read_index ?
-               priv_read_loc - ring_info->ring_buffer->read_index :
-               (dsize - ring_info->ring_buffer->read_index) + priv_read_loc;
-       u32 bytes_avail_toread = (hv_get_bytes_to_read(ring_info) - delta);
-
-       if (bytes_avail_toread < sizeof(struct vmpacket_descriptor))
-               return NULL;
-
-       return ring_buffer + priv_read_loc;
+       return (void *)((unsigned long)desc + (desc->offset8 << 3));
 }
 
-/*
- * A helper function to step through packets "in-place"
- * This API is to be called after each successful call
- * get_next_pkt_raw().
- */
-static inline void put_pkt_raw(struct vmbus_channel *channel,
-                               struct vmpacket_descriptor *desc)
+/* Get data size associated with descriptor */
+static inline u32 hv_pkt_datalen(const struct vmpacket_descriptor *desc)
 {
-       struct hv_ring_buffer_info *ring_info = &channel->inbound;
-       u32 packetlen = desc->len8 << 3;
-       u32 dsize = ring_info->ring_datasize;
-
-       /*
-        * Include the packet trailer.
-        */
-       ring_info->priv_read_index += packetlen + VMBUS_PKT_TRAILER;
-       ring_info->priv_read_index %= dsize;
+       return (desc->len8 << 3) - (desc->offset8 << 3);
 }
 
+
+struct vmpacket_descriptor *
+hv_pkt_iter_first(struct vmbus_channel *channel);
+
+struct vmpacket_descriptor *
+__hv_pkt_iter_next(struct vmbus_channel *channel,
+                  const struct vmpacket_descriptor *pkt);
+
+void hv_pkt_iter_close(struct vmbus_channel *channel);
+
 /*
- * This call commits the read index and potentially signals the host.
- * Here is the pattern for using the "in-place" consumption APIs:
- *
- * init_cached_read_index();
- *
- * while (get_next_pkt_raw() {
- *     process the packet "in-place";
- *     put_pkt_raw();
- * }
- * if (packets processed in place)
- *     commit_rd_index();
+ * Get next packet descriptor from iterator
+ * If at end of list, return NULL and update host.
  */
-static inline void commit_rd_index(struct vmbus_channel *channel)
+static inline struct vmpacket_descriptor *
+hv_pkt_iter_next(struct vmbus_channel *channel,
+                const struct vmpacket_descriptor *pkt)
 {
-       struct hv_ring_buffer_info *ring_info = &channel->inbound;
-       /*
-        * Make sure all reads are done before we update the read index since
-        * the writer may start writing to the read area once the read index
-        * is updated.
-        */
-       virt_rmb();
-       ring_info->ring_buffer->read_index = ring_info->priv_read_index;
+       struct vmpacket_descriptor *nxt;
+
+       nxt = __hv_pkt_iter_next(channel, pkt);
+       if (!nxt)
+               hv_pkt_iter_close(channel);
 
-       hv_signal_on_read(channel);
+       return nxt;
 }
 
+#define foreach_vmbus_pkt(pkt, channel) \
+       for (pkt = hv_pkt_iter_first(channel); pkt; \
+           pkt = hv_pkt_iter_next(channel, pkt))
 
 #endif /* _HYPERV_H */
index ee971f335a8b659f04e5a3048ca70e5bc361ee5f..a2e9d6ea1349fb85418a9ebafc55dd08d16ca6b0 100644 (file)
@@ -153,8 +153,8 @@ struct in_ifaddr {
 int register_inetaddr_notifier(struct notifier_block *nb);
 int unregister_inetaddr_notifier(struct notifier_block *nb);
 
-void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
-                                struct ipv4_devconf *devconf);
+void inet_netconf_notify_devconf(struct net *net, int event, int type,
+                                int ifindex, struct ipv4_devconf *devconf);
 
 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref);
 static inline struct net_device *ip_dev_find(struct net *net, __be32 addr)
index 71be5b330d21305af23f8f7e1779988930755ed8..e1b442996f810529a755533270b6d21c350fbd5a 100644 (file)
@@ -37,6 +37,7 @@ struct ipv6_devconf {
        __s32           accept_ra_rtr_pref;
        __s32           rtr_probe_interval;
 #ifdef CONFIG_IPV6_ROUTE_INFO
+       __s32           accept_ra_rt_info_min_plen;
        __s32           accept_ra_rt_info_max_plen;
 #endif
 #endif
@@ -70,6 +71,7 @@ struct ipv6_devconf {
 #endif
        __u32           enhanced_dad;
        __u32           addr_gen_mode;
+       __s32           disable_policy;
 
        struct ctl_table_header *sysctl_header;
 };
index 1c823bef4c15105485bc0497a12708b8ee27ed9d..5734480c9590946412ebd16b7752c5341c4600be 100644 (file)
@@ -6,6 +6,7 @@
 struct kmem_cache;
 struct page;
 struct vm_struct;
+struct task_struct;
 
 #ifdef CONFIG_KASAN
 
index 7e66e4f62858f395cd000226e9580785b03a4cf1..1beb1ec2fbdf339b34affc69508a5f5462b409b0 100644 (file)
@@ -476,6 +476,7 @@ enum {
 enum {
        MLX4_INTERFACE_STATE_UP         = 1 << 0,
        MLX4_INTERFACE_STATE_DELETION   = 1 << 1,
+       MLX4_INTERFACE_STATE_NOWAIT     = 1 << 2,
 };
 
 #define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \
index 2fcff6b4503f6a4824bea50c189b072ef6c486cb..f508646262305afe315f4532200dc979b05fddae 100644 (file)
@@ -728,6 +728,7 @@ struct mlx5e_resources {
        u32                        pdn;
        struct mlx5_td             td;
        struct mlx5_core_mkey      mkey;
+       struct mlx5_sq_bfreg       bfreg;
 };
 
 struct mlx5_core_dev {
index 949b24b6c4794ce14909d779b7dbfd2534aa53db..ae91a4bda1a3063d3b2f6cef2d10c1266c1ef59f 100644 (file)
@@ -134,8 +134,13 @@ struct mlx5_flow_act {
        u32 action;
        u32 flow_tag;
        u32 encap_id;
+       u32 modify_id;
 };
 
+#define MLX5_DECLARE_FLOW_ACT(name) \
+       struct mlx5_flow_act name = {MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,\
+                                    MLX5_FS_DEFAULT_FLOW_TAG, 0, 0}
+
 /* Single destination per rule.
  * Group ID is implied by the match criteria.
  */
@@ -156,5 +161,4 @@ struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging);
 void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter);
 void mlx5_fc_query_cached(struct mlx5_fc *counter,
                          u64 *bytes, u64 *packets, u64 *lastuse);
-
 #endif
index 838242697541a28fdda4d90bf7b604e25f3bfba2..56bc842b062055aec966e9cff46a42af6bdcd095 100644 (file)
@@ -227,6 +227,8 @@ enum {
        MLX5_CMD_OP_MODIFY_FLOW_TABLE             = 0x93c,
        MLX5_CMD_OP_ALLOC_ENCAP_HEADER            = 0x93d,
        MLX5_CMD_OP_DEALLOC_ENCAP_HEADER          = 0x93e,
+       MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT   = 0x940,
+       MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT = 0x941,
        MLX5_CMD_OP_MAX
 };
 
@@ -302,7 +304,8 @@ struct mlx5_ifc_flow_table_prop_layout_bits {
 
        u8         reserved_at_20[0x2];
        u8         log_max_ft_size[0x6];
-       u8         reserved_at_28[0x10];
+       u8         log_max_modify_header_context[0x8];
+       u8         max_modify_header_actions[0x8];
        u8         max_ft_level[0x8];
 
        u8         reserved_at_40[0x20];
@@ -2190,6 +2193,7 @@ enum {
        MLX5_FLOW_CONTEXT_ACTION_COUNT     = 0x8,
        MLX5_FLOW_CONTEXT_ACTION_ENCAP     = 0x10,
        MLX5_FLOW_CONTEXT_ACTION_DECAP     = 0x20,
+       MLX5_FLOW_CONTEXT_ACTION_MOD_HDR   = 0x40,
 };
 
 struct mlx5_ifc_flow_context_bits {
@@ -2211,7 +2215,9 @@ struct mlx5_ifc_flow_context_bits {
 
        u8         encap_id[0x20];
 
-       u8         reserved_at_e0[0x120];
+       u8         modify_header_id[0x20];
+
+       u8         reserved_at_100[0x100];
 
        struct mlx5_ifc_fte_match_param_bits match_value;
 
@@ -4534,6 +4540,109 @@ struct mlx5_ifc_dealloc_encap_header_in_bits {
        u8         reserved_60[0x20];
 };
 
+struct mlx5_ifc_set_action_in_bits {
+       u8         action_type[0x4];
+       u8         field[0xc];
+       u8         reserved_at_10[0x3];
+       u8         offset[0x5];
+       u8         reserved_at_18[0x3];
+       u8         length[0x5];
+
+       u8         data[0x20];
+};
+
+struct mlx5_ifc_add_action_in_bits {
+       u8         action_type[0x4];
+       u8         field[0xc];
+       u8         reserved_at_10[0x10];
+
+       u8         data[0x20];
+};
+
+union mlx5_ifc_set_action_in_add_action_in_auto_bits {
+       struct mlx5_ifc_set_action_in_bits set_action_in;
+       struct mlx5_ifc_add_action_in_bits add_action_in;
+       u8         reserved_at_0[0x40];
+};
+
+enum {
+       MLX5_ACTION_TYPE_SET   = 0x1,
+       MLX5_ACTION_TYPE_ADD   = 0x2,
+};
+
+enum {
+       MLX5_ACTION_IN_FIELD_OUT_SMAC_47_16    = 0x1,
+       MLX5_ACTION_IN_FIELD_OUT_SMAC_15_0     = 0x2,
+       MLX5_ACTION_IN_FIELD_OUT_ETHERTYPE     = 0x3,
+       MLX5_ACTION_IN_FIELD_OUT_DMAC_47_16    = 0x4,
+       MLX5_ACTION_IN_FIELD_OUT_DMAC_15_0     = 0x5,
+       MLX5_ACTION_IN_FIELD_OUT_IP_DSCP       = 0x6,
+       MLX5_ACTION_IN_FIELD_OUT_TCP_FLAGS     = 0x7,
+       MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT     = 0x8,
+       MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT     = 0x9,
+       MLX5_ACTION_IN_FIELD_OUT_IP_TTL        = 0xa,
+       MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT     = 0xb,
+       MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT     = 0xc,
+       MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96  = 0xd,
+       MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64   = 0xe,
+       MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32   = 0xf,
+       MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0    = 0x10,
+       MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96  = 0x11,
+       MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64   = 0x12,
+       MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32   = 0x13,
+       MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0    = 0x14,
+       MLX5_ACTION_IN_FIELD_OUT_SIPV4         = 0x15,
+       MLX5_ACTION_IN_FIELD_OUT_DIPV4         = 0x16,
+};
+
+struct mlx5_ifc_alloc_modify_header_context_out_bits {
+       u8         status[0x8];
+       u8         reserved_at_8[0x18];
+
+       u8         syndrome[0x20];
+
+       u8         modify_header_id[0x20];
+
+       u8         reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_alloc_modify_header_context_in_bits {
+       u8         opcode[0x10];
+       u8         reserved_at_10[0x10];
+
+       u8         reserved_at_20[0x10];
+       u8         op_mod[0x10];
+
+       u8         reserved_at_40[0x20];
+
+       u8         table_type[0x8];
+       u8         reserved_at_68[0x10];
+       u8         num_of_actions[0x8];
+
+       union mlx5_ifc_set_action_in_add_action_in_auto_bits actions[0];
+};
+
+struct mlx5_ifc_dealloc_modify_header_context_out_bits {
+       u8         status[0x8];
+       u8         reserved_at_8[0x18];
+
+       u8         syndrome[0x20];
+
+       u8         reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_dealloc_modify_header_context_in_bits {
+       u8         opcode[0x10];
+       u8         reserved_at_10[0x10];
+
+       u8         reserved_at_20[0x10];
+       u8         op_mod[0x10];
+
+       u8         modify_header_id[0x20];
+
+       u8         reserved_at_60[0x20];
+};
+
 struct mlx5_ifc_query_dct_out_bits {
        u8         status[0x8];
        u8         reserved_at_8[0x18];
index 97456b2539e46d6232dda804f6a434db6fd7134f..cc07c3be2705f1a3432117b1c687fb679813bb5a 100644 (file)
@@ -41,7 +41,6 @@
 
 #include <linux/ethtool.h>
 #include <net/net_namespace.h>
-#include <net/dsa.h>
 #ifdef CONFIG_DCB
 #include <net/dcbnl.h>
 #endif
@@ -57,6 +56,8 @@
 struct netpoll_info;
 struct device;
 struct phy_device;
+struct dsa_switch_tree;
+
 /* 802.11 specific */
 struct wireless_dev;
 /* 802.15.4 specific */
@@ -786,11 +787,11 @@ struct tc_cls_u32_offload;
 struct tc_to_netdev {
        unsigned int type;
        union {
-               u8 tc;
                struct tc_cls_u32_offload *cls_u32;
                struct tc_cls_flower_offload *cls_flower;
                struct tc_cls_matchall_offload *cls_mall;
                struct tc_cls_bpf_offload *cls_bpf;
+               struct tc_mqprio_qopt *mqprio;
        };
        bool egress_dev;
 };
@@ -2004,15 +2005,6 @@ void dev_net_set(struct net_device *dev, struct net *net)
        write_pnet(&dev->nd_net, net);
 }
 
-static inline bool netdev_uses_dsa(struct net_device *dev)
-{
-#if IS_ENABLED(CONFIG_NET_DSA)
-       if (dev->dsa_ptr != NULL)
-               return dsa_uses_tagged_protocol(dev->dsa_ptr);
-#endif
-       return false;
-}
-
 /**
  *     netdev_priv - access network device private data
  *     @dev: network device
index a58cca8bcb29d8c9bdcda71538cfa8f25c913916..ba35ba5204871a69b5d5522f170432d24aa32e96 100644 (file)
@@ -12,7 +12,7 @@
 #include <linux/phy.h>
 #include <linux/of.h>
 
-#ifdef CONFIG_OF
+#if IS_ENABLED(CONFIG_OF_MDIO)
 extern int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np);
 extern struct phy_device *of_phy_find_device(struct device_node *phy_np);
 extern struct phy_device *of_phy_connect(struct net_device *dev,
@@ -32,7 +32,7 @@ extern int of_phy_register_fixed_link(struct device_node *np);
 extern void of_phy_deregister_fixed_link(struct device_node *np);
 extern bool of_phy_is_fixed_link(struct device_node *np);
 
-#else /* CONFIG_OF */
+#else /* CONFIG_OF_MDIO */
 static inline int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np)
 {
        /*
index 43a774873aa96d4af64d0cdebb579be572a6658a..624cecf69c28d39b148b6d0616842fcf40317473 100644 (file)
@@ -587,23 +587,29 @@ struct phy_driver {
         */
        void (*link_change_notify)(struct phy_device *dev);
 
-       /* A function provided by a phy specific driver to override the
-        * the PHY driver framework support for reading a MMD register
-        * from the PHY. If not supported, return -1. This function is
-        * optional for PHY specific drivers, if not provided then the
-        * default MMD read function is used by the PHY framework.
+       /*
+        * Phy specific driver override for reading a MMD register.
+        * This function is optional for PHY specific drivers.  When
+        * not provided, the default MMD read function will be used
+        * by phy_read_mmd(), which will use either a direct read for
+        * Clause 45 PHYs or an indirect read for Clause 22 PHYs.
+        *  devnum is the MMD device number within the PHY device,
+        *  regnum is the register within the selected MMD device.
         */
-       int (*read_mmd_indirect)(struct phy_device *dev, int ptrad,
-                                int devnum, int regnum);
-
-       /* A function provided by a phy specific driver to override the
-        * the PHY driver framework support for writing a MMD register
-        * from the PHY. This function is optional for PHY specific drivers,
-        * if not provided then the default MMD read function is used by
-        * the PHY framework.
+       int (*read_mmd)(struct phy_device *dev, int devnum, u16 regnum);
+
+       /*
+        * Phy specific driver override for writing a MMD register.
+        * This function is optional for PHY specific drivers.  When
+        * not provided, the default MMD write function will be used
+        * by phy_write_mmd(), which will use either a direct write for
+        * Clause 45 PHYs, or an indirect write for Clause 22 PHYs.
+        *  devnum is the MMD device number within the PHY device,
+        *  regnum is the register within the selected MMD device.
+        *  val is the value to be written.
         */
-       void (*write_mmd_indirect)(struct phy_device *dev, int ptrad,
-                                  int devnum, int regnum, u32 val);
+       int (*write_mmd)(struct phy_device *dev, int devnum, u16 regnum,
+                        u16 val);
 
        /* Get the size and type of the eeprom contained within a plug-in
         * module */
@@ -651,25 +657,7 @@ struct phy_fixup {
  *
  * Same rules as for phy_read();
  */
-static inline int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum)
-{
-       if (!phydev->is_c45)
-               return -EOPNOTSUPP;
-
-       return mdiobus_read(phydev->mdio.bus, phydev->mdio.addr,
-                           MII_ADDR_C45 | (devad << 16) | (regnum & 0xffff));
-}
-
-/**
- * phy_read_mmd_indirect - reads data from the MMD registers
- * @phydev: The PHY device bus
- * @prtad: MMD Address
- * @addr: PHY address on the MII bus
- *
- * Description: it reads data from the MMD registers (clause 22 to access to
- * clause 45) of the specified phy address.
- */
-int phy_read_mmd_indirect(struct phy_device *phydev, int prtad, int devad);
+int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum);
 
 /**
  * phy_read - Convenience function for reading a given PHY register
@@ -752,35 +740,29 @@ static inline bool phy_is_pseudo_fixed_link(struct phy_device *phydev)
  *
  * Same rules as for phy_write();
  */
-static inline int phy_write_mmd(struct phy_device *phydev, int devad,
-                               u32 regnum, u16 val)
-{
-       if (!phydev->is_c45)
-               return -EOPNOTSUPP;
-
-       regnum = MII_ADDR_C45 | ((devad & 0x1f) << 16) | (regnum & 0xffff);
-
-       return mdiobus_write(phydev->mdio.bus, phydev->mdio.addr, regnum, val);
-}
-
-/**
- * phy_write_mmd_indirect - writes data to the MMD registers
- * @phydev: The PHY device
- * @prtad: MMD Address
- * @devad: MMD DEVAD
- * @data: data to write in the MMD register
- *
- * Description: Write data from the MMD registers of the specified
- * phy address.
- */
-void phy_write_mmd_indirect(struct phy_device *phydev, int prtad,
-                           int devad, u32 data);
+int phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val);
 
 struct phy_device *phy_device_create(struct mii_bus *bus, int addr, int phy_id,
                                     bool is_c45,
                                     struct phy_c45_device_ids *c45_ids);
+#if IS_ENABLED(CONFIG_PHYLIB)
 struct phy_device *get_phy_device(struct mii_bus *bus, int addr, bool is_c45);
 int phy_device_register(struct phy_device *phy);
+void phy_device_free(struct phy_device *phydev);
+#else
+static inline
+struct phy_device *get_phy_device(struct mii_bus *bus, int addr, bool is_c45)
+{
+       return NULL;
+}
+
+static inline int phy_device_register(struct phy_device *phy)
+{
+       return 0;
+}
+
+static inline void phy_device_free(struct phy_device *phydev) { }
+#endif /* CONFIG_PHYLIB */
 void phy_device_remove(struct phy_device *phydev);
 int phy_init_hw(struct phy_device *phydev);
 int phy_suspend(struct phy_device *phydev);
@@ -861,7 +843,6 @@ int phy_ethtool_ksettings_set(struct phy_device *phydev,
 int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd);
 int phy_start_interrupts(struct phy_device *phydev);
 void phy_print_status(struct phy_device *phydev);
-void phy_device_free(struct phy_device *phydev);
 int phy_set_max_speed(struct phy_device *phydev, u32 max_speed);
 
 int phy_register_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask,
@@ -888,8 +869,10 @@ int phy_ethtool_set_link_ksettings(struct net_device *ndev,
                                   const struct ethtool_link_ksettings *cmd);
 int phy_ethtool_nway_reset(struct net_device *ndev);
 
+#if IS_ENABLED(CONFIG_PHYLIB)
 int __init mdio_bus_init(void);
 void mdio_bus_exit(void);
+#endif
 
 extern struct bus_type mdio_bus_type;
 
@@ -900,7 +883,7 @@ struct mdio_board_info {
        const void      *platform_data;
 };
 
-#if IS_ENABLED(CONFIG_PHYLIB)
+#if IS_ENABLED(CONFIG_MDIO_DEVICE)
 int mdiobus_register_board_info(const struct mdio_board_info *info,
                                unsigned int n);
 #else
index 52966b9bfde3740b506664bf2329596cf23695ef..fbab6e0514f07bf0f4a9ac481cb712c58d113b7f 100644 (file)
 #define MAX_NUM_LL2_TX_STATS_COUNTERS  32
 
 #define FW_MAJOR_VERSION       8
-#define FW_MINOR_VERSION       10
-#define FW_REVISION_VERSION    10
+#define FW_MINOR_VERSION       15
+#define FW_REVISION_VERSION    3
 #define FW_ENGINEERING_VERSION 0
 
 /***********************/
 
 /* DEMS */
 #define DQ_DEMS_LEGACY                 0
+#define DQ_DEMS_TOE_MORE_TO_SEND       3
+#define DQ_DEMS_TOE_LOCAL_ADV_WND      4
+#define DQ_DEMS_ROCE_CQ_CONS           7
 
 /* XCM agg val selection */
 #define DQ_XCM_AGG_VAL_SEL_WORD2  0
 #define DQ_XCM_ISCSI_MORE_TO_SEND_SEQ_CMD DQ_XCM_AGG_VAL_SEL_REG3
 #define DQ_XCM_ISCSI_EXP_STAT_SN_CMD   DQ_XCM_AGG_VAL_SEL_REG6
 #define DQ_XCM_ROCE_SQ_PROD_CMD        DQ_XCM_AGG_VAL_SEL_WORD4
+#define DQ_XCM_TOE_TX_BD_PROD_CMD      DQ_XCM_AGG_VAL_SEL_WORD4
+#define DQ_XCM_TOE_MORE_TO_SEND_SEQ_CMD        DQ_XCM_AGG_VAL_SEL_REG3
+#define DQ_XCM_TOE_LOCAL_ADV_WND_SEQ_CMD DQ_XCM_AGG_VAL_SEL_REG4
 
 /* UCM agg val selection (HW) */
 #define        DQ_UCM_AGG_VAL_SEL_WORD0        0
 #define DQ_XCM_ISCSI_DQ_FLUSH_CMD      BIT(DQ_XCM_AGG_FLG_SHIFT_CF19)
 #define DQ_XCM_ISCSI_SLOW_PATH_CMD     BIT(DQ_XCM_AGG_FLG_SHIFT_CF22)
 #define DQ_XCM_ISCSI_PROC_ONLY_CLEANUP_CMD BIT(DQ_XCM_AGG_FLG_SHIFT_CF23)
+#define DQ_XCM_TOE_DQ_FLUSH_CMD                BIT(DQ_XCM_AGG_FLG_SHIFT_CF19)
+#define DQ_XCM_TOE_SLOW_PATH_CMD       BIT(DQ_XCM_AGG_FLG_SHIFT_CF22)
 
 /* UCM agg counter flag selection (HW) */
 #define        DQ_UCM_AGG_FLG_SHIFT_CF0        0
 #define DQ_UCM_ETH_PMD_RX_ARM_CMD      BIT(DQ_UCM_AGG_FLG_SHIFT_CF5)
 #define DQ_UCM_ROCE_CQ_ARM_SE_CF_CMD   BIT(DQ_UCM_AGG_FLG_SHIFT_CF4)
 #define DQ_UCM_ROCE_CQ_ARM_CF_CMD      BIT(DQ_UCM_AGG_FLG_SHIFT_CF5)
+#define DQ_UCM_TOE_TIMER_STOP_ALL_CMD  BIT(DQ_UCM_AGG_FLG_SHIFT_CF3)
+#define DQ_UCM_TOE_SLOW_PATH_CF_CMD    BIT(DQ_UCM_AGG_FLG_SHIFT_CF4)
+#define DQ_UCM_TOE_DQ_CF_CMD           BIT(DQ_UCM_AGG_FLG_SHIFT_CF5)
 
 /* TCM agg counter flag selection (HW) */
 #define DQ_TCM_AGG_FLG_SHIFT_CF0       0
 #define DQ_TCM_FCOE_TIMER_STOP_ALL_CMD      BIT(DQ_TCM_AGG_FLG_SHIFT_CF3)
 #define DQ_TCM_ISCSI_FLUSH_Q0_CMD      BIT(DQ_TCM_AGG_FLG_SHIFT_CF1)
 #define DQ_TCM_ISCSI_TIMER_STOP_ALL_CMD        BIT(DQ_TCM_AGG_FLG_SHIFT_CF3)
+#define DQ_TCM_TOE_FLUSH_Q0_CMD                BIT(DQ_TCM_AGG_FLG_SHIFT_CF1)
+#define DQ_TCM_TOE_TIMER_STOP_ALL_CMD  BIT(DQ_TCM_AGG_FLG_SHIFT_CF3)
+#define DQ_TCM_IWARP_POST_RQ_CF_CMD    BIT(DQ_TCM_AGG_FLG_SHIFT_CF1)
 
 /* PWM address mapping */
 #define DQ_PWM_OFFSET_DPM_BASE 0x0
@@ -689,6 +703,16 @@ struct iscsi_eqe_data {
 #define ISCSI_EQE_DATA_RESERVED0_SHIFT                 7
 };
 
+struct rdma_eqe_destroy_qp {
+       __le32 cid;
+       u8 reserved[4];
+};
+
+union rdma_eqe_data {
+       struct regpair async_handle;
+       struct rdma_eqe_destroy_qp rdma_destroy_qp_data;
+};
+
 struct malicious_vf_eqe_data {
        u8 vf_id;
        u8 err_id;
@@ -705,9 +729,9 @@ union event_ring_data {
        u8 bytes[8];
        struct vf_pf_channel_eqe_data vf_pf_channel;
        struct iscsi_eqe_data iscsi_info;
+       union rdma_eqe_data rdma_data;
        struct malicious_vf_eqe_data malicious_vf;
        struct initial_cleanup_eqe_data vf_init_cleanup;
-       struct regpair roce_handle;
 };
 
 /* Event Ring Entry */
index 4b402fb0eaad5fdf7bd29221d95596f092c0e945..34d93eb5bfba346019ba1d2c9014ab8a2fa5fd8f 100644 (file)
@@ -49,6 +49,9 @@
 #define ETH_RX_CQE_PAGE_SIZE_BYTES                      4096
 #define ETH_RX_NUM_NEXT_PAGE_BDS                        2
 
+#define ETH_MAX_TUNN_LSO_INNER_IPV4_OFFSET          253
+#define ETH_MAX_TUNN_LSO_INNER_IPV6_OFFSET          251
+
 #define ETH_TX_MIN_BDS_PER_NON_LSO_PKT                          1
 #define ETH_TX_MAX_BDS_PER_NON_LSO_PACKET                       18
 #define ETH_TX_MAX_BDS_PER_LSO_PACKET  255
index 2e417a45c5f7028ba1fc2b6201fc828f1fef4247..947a635d04bb57ff15a61f1ee82c41ae27c1d4e5 100644 (file)
@@ -109,13 +109,6 @@ struct fcoe_conn_terminate_ramrod_data {
        struct regpair terminate_params_addr;
 };
 
-struct fcoe_fast_sgl_ctx {
-       struct regpair sgl_start_addr;
-       __le32 sgl_byte_offset;
-       __le16 task_reuse_cnt;
-       __le16 init_offset_in_first_sge;
-};
-
 struct fcoe_slow_sgl_ctx {
        struct regpair base_sgl_addr;
        __le16 curr_sge_off;
@@ -124,23 +117,16 @@ struct fcoe_slow_sgl_ctx {
        __le16 reserved;
 };
 
-struct fcoe_sge {
-       struct regpair sge_addr;
-       __le16 size;
-       __le16 reserved0;
-       u8 reserved1[3];
-       u8 is_valid_sge;
-};
-
-union fcoe_data_desc_ctx {
-       struct fcoe_fast_sgl_ctx fast;
-       struct fcoe_slow_sgl_ctx slow;
-       struct fcoe_sge single_sge;
-};
-
 union fcoe_dix_desc_ctx {
        struct fcoe_slow_sgl_ctx dix_sgl;
-       struct fcoe_sge cached_dix_sge;
+       struct scsi_sge cached_dix_sge;
+};
+
+struct fcoe_fast_sgl_ctx {
+       struct regpair sgl_start_addr;
+       __le32 sgl_byte_offset;
+       __le16 task_reuse_cnt;
+       __le16 init_offset_in_first_sge;
 };
 
 struct fcoe_fcp_cmd_payload {
@@ -172,57 +158,6 @@ enum fcoe_mode_type {
        MAX_FCOE_MODE_TYPE
 };
 
-struct fcoe_mstorm_fcoe_task_st_ctx_fp {
-       __le16 flags;
-#define FCOE_MSTORM_FCOE_TASK_ST_CTX_FP_RSRV0_MASK                 0x7FFF
-#define FCOE_MSTORM_FCOE_TASK_ST_CTX_FP_RSRV0_SHIFT                0
-#define FCOE_MSTORM_FCOE_TASK_ST_CTX_FP_MP_INCLUDE_FC_HEADER_MASK  0x1
-#define FCOE_MSTORM_FCOE_TASK_ST_CTX_FP_MP_INCLUDE_FC_HEADER_SHIFT 15
-       __le16 difDataResidue;
-       __le16 parent_id;
-       __le16 single_sge_saved_offset;
-       __le32 data_2_trns_rem;
-       __le32 offset_in_io;
-       union fcoe_dix_desc_ctx dix_desc;
-       union fcoe_data_desc_ctx data_desc;
-};
-
-struct fcoe_mstorm_fcoe_task_st_ctx_non_fp {
-       __le16 flags;
-#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_HOST_INTERFACE_MASK            0x3
-#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_HOST_INTERFACE_SHIFT           0
-#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_DIF_TO_PEER_MASK               0x1
-#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_DIF_TO_PEER_SHIFT              2
-#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_VALIDATE_DIX_APP_TAG_MASK      0x1
-#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_VALIDATE_DIX_APP_TAG_SHIFT     3
-#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_INTERVAL_SIZE_LOG_MASK         0xF
-#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_INTERVAL_SIZE_LOG_SHIFT        4
-#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_DIX_BLOCK_SIZE_MASK            0x3
-#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_DIX_BLOCK_SIZE_SHIFT           8
-#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_RESERVED_MASK                  0x1
-#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_RESERVED_SHIFT                 10
-#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_HAS_FIRST_PACKET_ARRIVED_MASK  0x1
-#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_HAS_FIRST_PACKET_ARRIVED_SHIFT 11
-#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_VALIDATE_DIX_REF_TAG_MASK      0x1
-#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_VALIDATE_DIX_REF_TAG_SHIFT     12
-#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_DIX_CACHED_SGE_FLG_MASK        0x1
-#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_DIX_CACHED_SGE_FLG_SHIFT       13
-#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_OFFSET_IN_IO_VALID_MASK        0x1
-#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_OFFSET_IN_IO_VALID_SHIFT       14
-#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_DIF_SUPPORTED_MASK             0x1
-#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_DIF_SUPPORTED_SHIFT            15
-       u8 tx_rx_sgl_mode;
-#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_TX_SGL_MODE_MASK               0x7
-#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_TX_SGL_MODE_SHIFT              0
-#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_RX_SGL_MODE_MASK               0x7
-#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_RX_SGL_MODE_SHIFT              3
-#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_RSRV1_MASK                     0x3
-#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_RSRV1_SHIFT                    6
-       u8 rsrv2;
-       __le32 num_prm_zero_read;
-       struct regpair rsp_buf_addr;
-};
-
 struct fcoe_rx_stat {
        struct regpair fcoe_rx_byte_cnt;
        struct regpair fcoe_rx_data_pkt_cnt;
@@ -236,16 +171,6 @@ struct fcoe_rx_stat {
        __le32 rsrv;
 };
 
-enum fcoe_sgl_mode {
-       FCOE_SLOW_SGL,
-       FCOE_SINGLE_FAST_SGE,
-       FCOE_2_FAST_SGE,
-       FCOE_3_FAST_SGE,
-       FCOE_4_FAST_SGE,
-       FCOE_MUL_FAST_SGES,
-       MAX_FCOE_SGL_MODE
-};
-
 struct fcoe_stat_ramrod_data {
        struct regpair stat_params_addr;
 };
@@ -328,22 +253,24 @@ union fcoe_tx_info_union_ctx {
 struct ystorm_fcoe_task_st_ctx {
        u8 task_type;
        u8 sgl_mode;
-#define YSTORM_FCOE_TASK_ST_CTX_TX_SGL_MODE_MASK  0x7
+#define YSTORM_FCOE_TASK_ST_CTX_TX_SGL_MODE_MASK  0x1
 #define YSTORM_FCOE_TASK_ST_CTX_TX_SGL_MODE_SHIFT 0
-#define YSTORM_FCOE_TASK_ST_CTX_RSRV_MASK         0x1F
-#define YSTORM_FCOE_TASK_ST_CTX_RSRV_SHIFT        3
+#define YSTORM_FCOE_TASK_ST_CTX_RSRV_MASK         0x7F
+#define YSTORM_FCOE_TASK_ST_CTX_RSRV_SHIFT        1
        u8 cached_dix_sge;
        u8 expect_first_xfer;
        __le32 num_pbf_zero_write;
        union protection_info_union_ctx protection_info_union;
        __le32 data_2_trns_rem;
+       struct scsi_sgl_params sgl_params;
+       u8 reserved1[12];
        union fcoe_tx_info_union_ctx tx_info_union;
        union fcoe_dix_desc_ctx dix_desc;
-       union fcoe_data_desc_ctx data_desc;
+       struct scsi_cached_sges data_desc;
        __le16 ox_id;
        __le16 rx_id;
        __le32 task_rety_identifier;
-       __le32 reserved1[2];
+       u8 reserved2[8];
 };
 
 struct ystorm_fcoe_task_ag_ctx {
@@ -484,22 +411,22 @@ struct tstorm_fcoe_task_ag_ctx {
 struct fcoe_tstorm_fcoe_task_st_ctx_read_write {
        union fcoe_cleanup_addr_exp_ro_union cleanup_addr_exp_ro_union;
        __le16 flags;
-#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_RX_SGL_MODE_MASK       0x7
+#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_RX_SGL_MODE_MASK       0x1
 #define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_RX_SGL_MODE_SHIFT      0
 #define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_EXP_FIRST_FRAME_MASK   0x1
-#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_EXP_FIRST_FRAME_SHIFT  3
+#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_EXP_FIRST_FRAME_SHIFT  1
 #define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_SEQ_ACTIVE_MASK        0x1
-#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_SEQ_ACTIVE_SHIFT       4
+#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_SEQ_ACTIVE_SHIFT       2
 #define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_SEQ_TIMEOUT_MASK       0x1
-#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_SEQ_TIMEOUT_SHIFT      5
+#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_SEQ_TIMEOUT_SHIFT      3
 #define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_SINGLE_PKT_IN_EX_MASK  0x1
-#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_SINGLE_PKT_IN_EX_SHIFT 6
+#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_SINGLE_PKT_IN_EX_SHIFT 4
 #define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_OOO_RX_SEQ_STAT_MASK   0x1
-#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_OOO_RX_SEQ_STAT_SHIFT  7
+#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_OOO_RX_SEQ_STAT_SHIFT  5
 #define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_CQ_ADD_ADV_MASK        0x3
-#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_CQ_ADD_ADV_SHIFT       8
-#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_RSRV1_MASK             0x3F
-#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_RSRV1_SHIFT            10
+#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_CQ_ADD_ADV_SHIFT       6
+#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_RSRV1_MASK             0xFF
+#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_RSRV1_SHIFT            8
        __le16 seq_cnt;
        u8 seq_id;
        u8 ooo_rx_seq_id;
@@ -582,8 +509,34 @@ struct mstorm_fcoe_task_ag_ctx {
 };
 
 struct mstorm_fcoe_task_st_ctx {
-       struct fcoe_mstorm_fcoe_task_st_ctx_non_fp non_fp;
-       struct fcoe_mstorm_fcoe_task_st_ctx_fp fp;
+       struct regpair rsp_buf_addr;
+       __le32 rsrv[2];
+       struct scsi_sgl_params sgl_params;
+       __le32 data_2_trns_rem;
+       __le32 data_buffer_offset;
+       __le16 parent_id;
+       __le16 flags;
+#define MSTORM_FCOE_TASK_ST_CTX_INTERVAL_SIZE_LOG_MASK     0xF
+#define MSTORM_FCOE_TASK_ST_CTX_INTERVAL_SIZE_LOG_SHIFT    0
+#define MSTORM_FCOE_TASK_ST_CTX_HOST_INTERFACE_MASK        0x3
+#define MSTORM_FCOE_TASK_ST_CTX_HOST_INTERFACE_SHIFT       4
+#define MSTORM_FCOE_TASK_ST_CTX_DIF_TO_PEER_MASK           0x1
+#define MSTORM_FCOE_TASK_ST_CTX_DIF_TO_PEER_SHIFT          6
+#define MSTORM_FCOE_TASK_ST_CTX_MP_INCLUDE_FC_HEADER_MASK  0x1
+#define MSTORM_FCOE_TASK_ST_CTX_MP_INCLUDE_FC_HEADER_SHIFT 7
+#define MSTORM_FCOE_TASK_ST_CTX_DIX_BLOCK_SIZE_MASK        0x3
+#define MSTORM_FCOE_TASK_ST_CTX_DIX_BLOCK_SIZE_SHIFT       8
+#define MSTORM_FCOE_TASK_ST_CTX_VALIDATE_DIX_REF_TAG_MASK  0x1
+#define MSTORM_FCOE_TASK_ST_CTX_VALIDATE_DIX_REF_TAG_SHIFT 10
+#define MSTORM_FCOE_TASK_ST_CTX_DIX_CACHED_SGE_FLG_MASK    0x1
+#define MSTORM_FCOE_TASK_ST_CTX_DIX_CACHED_SGE_FLG_SHIFT   11
+#define MSTORM_FCOE_TASK_ST_CTX_DIF_SUPPORTED_MASK         0x1
+#define MSTORM_FCOE_TASK_ST_CTX_DIF_SUPPORTED_SHIFT        12
+#define MSTORM_FCOE_TASK_ST_CTX_TX_SGL_MODE_MASK           0x1
+#define MSTORM_FCOE_TASK_ST_CTX_TX_SGL_MODE_SHIFT          13
+#define MSTORM_FCOE_TASK_ST_CTX_RESERVED_MASK              0x3
+#define MSTORM_FCOE_TASK_ST_CTX_RESERVED_SHIFT             14
+       struct scsi_cached_sges data_desc;
 };
 
 struct ustorm_fcoe_task_ag_ctx {
@@ -646,6 +599,7 @@ struct ustorm_fcoe_task_ag_ctx {
 
 struct fcoe_task_context {
        struct ystorm_fcoe_task_st_ctx ystorm_st_context;
+       struct regpair ystorm_st_padding[2];
        struct tdif_task_context tdif_context;
        struct ystorm_fcoe_task_ag_ctx ystorm_ag_context;
        struct tstorm_fcoe_task_ag_ctx tstorm_ag_context;
@@ -668,20 +622,20 @@ struct fcoe_tx_stat {
 struct fcoe_wqe {
        __le16 task_id;
        __le16 flags;
-#define FCOE_WQE_REQ_TYPE_MASK        0xF
-#define FCOE_WQE_REQ_TYPE_SHIFT       0
-#define FCOE_WQE_SGL_MODE_MASK        0x7
-#define FCOE_WQE_SGL_MODE_SHIFT       4
-#define FCOE_WQE_CONTINUATION_MASK    0x1
-#define FCOE_WQE_CONTINUATION_SHIFT   7
-#define FCOE_WQE_INVALIDATE_PTU_MASK  0x1
-#define FCOE_WQE_INVALIDATE_PTU_SHIFT 8
-#define FCOE_WQE_SUPER_IO_MASK        0x1
-#define FCOE_WQE_SUPER_IO_SHIFT       9
-#define FCOE_WQE_SEND_AUTO_RSP_MASK   0x1
-#define FCOE_WQE_SEND_AUTO_RSP_SHIFT  10
-#define FCOE_WQE_RESERVED0_MASK       0x1F
-#define FCOE_WQE_RESERVED0_SHIFT      11
+#define FCOE_WQE_REQ_TYPE_MASK       0xF
+#define FCOE_WQE_REQ_TYPE_SHIFT      0
+#define FCOE_WQE_SGL_MODE_MASK       0x1
+#define FCOE_WQE_SGL_MODE_SHIFT      4
+#define FCOE_WQE_CONTINUATION_MASK   0x1
+#define FCOE_WQE_CONTINUATION_SHIFT  5
+#define FCOE_WQE_SEND_AUTO_RSP_MASK  0x1
+#define FCOE_WQE_SEND_AUTO_RSP_SHIFT 6
+#define FCOE_WQE_RESERVED_MASK       0x1
+#define FCOE_WQE_RESERVED_SHIFT      7
+#define FCOE_WQE_NUM_SGES_MASK       0xF
+#define FCOE_WQE_NUM_SGES_SHIFT      8
+#define FCOE_WQE_RESERVED1_MASK      0xF
+#define FCOE_WQE_RESERVED1_SHIFT     12
        union fcoe_additional_info_union additional_info_union;
 };
 
index 4c5747babcf63ff32b8db664146df40545d986d2..69949f8e354b0447c7950884bd205622a4653ab2 100644 (file)
 /* iSCSI HSI constants */
 #define ISCSI_DEFAULT_MTU       (1500)
 
-/* Current iSCSI HSI version number composed of two fields (16 bit) */
-#define ISCSI_HSI_MAJOR_VERSION (0)
-#define ISCSI_HSI_MINOR_VERSION (0)
-
 /* KWQ (kernel work queue) layer codes */
 #define ISCSI_SLOW_PATH_LAYER_CODE   (6)
 
-/* CQE completion status */
-#define ISCSI_EQE_COMPLETION_SUCCESS (0x0)
-#define ISCSI_EQE_RST_CONN_RCVD (0x1)
-
 /* iSCSI parameter defaults */
 #define ISCSI_DEFAULT_HEADER_DIGEST         (0)
 #define ISCSI_DEFAULT_DATA_DIGEST           (0)
 #define ISCSI_MIN_VAL_MAX_OUTSTANDING_R2T   (1)
 #define ISCSI_MAX_VAL_MAX_OUTSTANDING_R2T   (0xff)
 
+#define ISCSI_AHS_CNTL_SIZE 4
+
+#define ISCSI_WQE_NUM_SGES_SLOWIO           (0xf)
+
 /* iSCSI reserved params */
 #define ISCSI_ITT_ALL_ONES     (0xffffffff)
 #define ISCSI_TTT_ALL_ONES     (0xffffffff)
@@ -173,19 +169,6 @@ struct iscsi_async_msg_hdr {
        __le32 reserved7;
 };
 
-struct iscsi_sge {
-       struct regpair sge_addr;
-       __le16 sge_len;
-       __le16 reserved0;
-       __le32 reserved1;
-};
-
-struct iscsi_cached_sge_ctx {
-       struct iscsi_sge sge;
-       struct regpair reserved;
-       __le32 dsgl_curr_offset[2];
-};
-
 struct iscsi_cmd_hdr {
        __le16 reserved1;
        u8 flags_attr;
@@ -229,8 +212,13 @@ struct iscsi_common_hdr {
 #define ISCSI_COMMON_HDR_DATA_SEG_LEN_SHIFT  0
 #define ISCSI_COMMON_HDR_TOTAL_AHS_LEN_MASK  0xFF
 #define ISCSI_COMMON_HDR_TOTAL_AHS_LEN_SHIFT 24
-       __le32 lun_reserved[4];
-       __le32 data[6];
+       struct regpair lun_reserved;
+       __le32 itt;
+       __le32 ttt;
+       __le32 cmdstat_sn;
+       __le32 exp_statcmd_sn;
+       __le32 max_cmd_sn;
+       __le32 data[3];
 };
 
 struct iscsi_conn_offload_params {
@@ -246,8 +234,10 @@ struct iscsi_conn_offload_params {
 #define ISCSI_CONN_OFFLOAD_PARAMS_TCP_ON_CHIP_1B_SHIFT 0
 #define ISCSI_CONN_OFFLOAD_PARAMS_TARGET_MODE_MASK     0x1
 #define ISCSI_CONN_OFFLOAD_PARAMS_TARGET_MODE_SHIFT    1
-#define ISCSI_CONN_OFFLOAD_PARAMS_RESERVED1_MASK       0x3F
-#define ISCSI_CONN_OFFLOAD_PARAMS_RESERVED1_SHIFT      2
+#define ISCSI_CONN_OFFLOAD_PARAMS_RESTRICTED_MODE_MASK 0x1
+#define ISCSI_CONN_OFFLOAD_PARAMS_RESTRICTED_MODE_SHIFT        2
+#define ISCSI_CONN_OFFLOAD_PARAMS_RESERVED1_MASK       0x1F
+#define ISCSI_CONN_OFFLOAD_PARAMS_RESERVED1_SHIFT      3
        u8 pbl_page_size_log;
        u8 pbe_page_size_log;
        u8 default_cq;
@@ -278,8 +268,12 @@ struct iscsi_conn_update_ramrod_params {
 #define ISCSI_CONN_UPDATE_RAMROD_PARAMS_INITIAL_R2T_SHIFT    2
 #define ISCSI_CONN_UPDATE_RAMROD_PARAMS_IMMEDIATE_DATA_MASK  0x1
 #define ISCSI_CONN_UPDATE_RAMROD_PARAMS_IMMEDIATE_DATA_SHIFT 3
-#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_RESERVED1_MASK       0xF
-#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_RESERVED1_SHIFT      4
+#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_DIF_BLOCK_SIZE_MASK  0x1
+#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_DIF_BLOCK_SIZE_SHIFT 4
+#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_DIF_ON_HOST_EN_MASK  0x1
+#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_DIF_ON_HOST_EN_SHIFT 5
+#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_RESERVED1_MASK       0x3
+#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_RESERVED1_SHIFT      6
        u8 reserved0[3];
        __le32 max_seq_size;
        __le32 max_send_pdu_length;
@@ -312,7 +306,7 @@ struct iscsi_ext_cdb_cmd_hdr {
        __le32 expected_transfer_length;
        __le32 cmd_sn;
        __le32 exp_stat_sn;
-       struct iscsi_sge cdb_sge;
+       struct scsi_sge cdb_sge;
 };
 
 struct iscsi_login_req_hdr {
@@ -519,8 +513,8 @@ struct iscsi_logout_response_hdr {
        __le32 exp_cmd_sn;
        __le32 max_cmd_sn;
        __le32 reserved4;
-       __le16 time2retain;
-       __le16 time2wait;
+       __le16 time_2_retain;
+       __le16 time_2_wait;
        __le32 reserved5[1];
 };
 
@@ -602,7 +596,7 @@ struct iscsi_tmf_response_hdr {
 #define ISCSI_TMF_RESPONSE_HDR_TOTAL_AHS_LEN_SHIFT 24
        struct regpair reserved0;
        __le32 itt;
-       __le32 rtt;
+       __le32 reserved1;
        __le32 stat_sn;
        __le32 exp_cmd_sn;
        __le32 max_cmd_sn;
@@ -641,7 +635,7 @@ struct iscsi_reject_hdr {
 #define ISCSI_REJECT_HDR_TOTAL_AHS_LEN_MASK  0xFF
 #define ISCSI_REJECT_HDR_TOTAL_AHS_LEN_SHIFT 24
        struct regpair reserved0;
-       __le32 reserved1;
+       __le32 all_ones;
        __le32 reserved2;
        __le32 stat_sn;
        __le32 exp_cmd_sn;
@@ -688,7 +682,9 @@ struct iscsi_cqe_solicited {
        __le16 itid;
        u8 task_type;
        u8 fw_dbg_field;
-       __le32 reserved1[2];
+       u8 caused_conn_err;
+       u8 reserved0[3];
+       __le32 reserved1[1];
        union iscsi_task_hdr iscsi_hdr;
 };
 
@@ -727,35 +723,6 @@ enum iscsi_cqe_unsolicited_type {
        MAX_ISCSI_CQE_UNSOLICITED_TYPE
 };
 
-struct iscsi_virt_sgl_ctx {
-       struct regpair sgl_base;
-       struct regpair dsgl_base;
-       __le32 sgl_initial_offset;
-       __le32 dsgl_initial_offset;
-       __le32 dsgl_curr_offset[2];
-};
-
-struct iscsi_sgl_var_params {
-       u8 sgl_ptr;
-       u8 dsgl_ptr;
-       __le16 sge_offset;
-       __le16 dsge_offset;
-};
-
-struct iscsi_phys_sgl_ctx {
-       struct regpair sgl_base;
-       struct regpair dsgl_base;
-       u8 sgl_size;
-       u8 dsgl_size;
-       __le16 reserved;
-       struct iscsi_sgl_var_params var_params[2];
-};
-
-union iscsi_data_desc_ctx {
-       struct iscsi_virt_sgl_ctx virt_sgl;
-       struct iscsi_phys_sgl_ctx phys_sgl;
-       struct iscsi_cached_sge_ctx cached_sge;
-};
 
 struct iscsi_debug_modes {
        u8 flags;
@@ -771,8 +738,10 @@ struct iscsi_debug_modes {
 #define ISCSI_DEBUG_MODES_ASSERT_IF_RECV_REJECT_OR_ASYNC_SHIFT 4
 #define ISCSI_DEBUG_MODES_ASSERT_IF_RECV_NOP_MASK              0x1
 #define ISCSI_DEBUG_MODES_ASSERT_IF_RECV_NOP_SHIFT             5
-#define ISCSI_DEBUG_MODES_RESERVED0_MASK                       0x3
-#define ISCSI_DEBUG_MODES_RESERVED0_SHIFT                      6
+#define ISCSI_DEBUG_MODES_ASSERT_IF_DATA_DIGEST_ERROR_MASK     0x1
+#define ISCSI_DEBUG_MODES_ASSERT_IF_DATA_DIGEST_ERROR_SHIFT    6
+#define ISCSI_DEBUG_MODES_ASSERT_IF_DIF_ERROR_MASK             0x1
+#define ISCSI_DEBUG_MODES_ASSERT_IF_DIF_ERROR_SHIFT            7
 };
 
 struct iscsi_dif_flags {
@@ -806,7 +775,6 @@ enum iscsi_eqe_opcode {
        ISCSI_EVENT_TYPE_ASYN_FIN_WAIT2,
        ISCSI_EVENT_TYPE_ISCSI_CONN_ERROR,
        ISCSI_EVENT_TYPE_TCP_CONN_ERROR,
-       ISCSI_EVENT_TYPE_ASYN_DELETE_OOO_ISLES,
        MAX_ISCSI_EQE_OPCODE
 };
 
@@ -856,31 +824,11 @@ enum iscsi_error_types {
        ISCSI_CONN_ERROR_PROTOCOL_ERR_DIF_TX,
        ISCSI_CONN_ERROR_SENSE_DATA_LENGTH,
        ISCSI_CONN_ERROR_DATA_PLACEMENT_ERROR,
+       ISCSI_CONN_ERROR_INVALID_ITT,
        ISCSI_ERROR_UNKNOWN,
        MAX_ISCSI_ERROR_TYPES
 };
 
-struct iscsi_mflags {
-       u8 mflags;
-#define ISCSI_MFLAGS_SLOW_IO_MASK     0x1
-#define ISCSI_MFLAGS_SLOW_IO_SHIFT    0
-#define ISCSI_MFLAGS_SINGLE_SGE_MASK  0x1
-#define ISCSI_MFLAGS_SINGLE_SGE_SHIFT 1
-#define ISCSI_MFLAGS_RESERVED_MASK    0x3F
-#define ISCSI_MFLAGS_RESERVED_SHIFT   2
-};
-
-struct iscsi_sgl {
-       struct regpair sgl_addr;
-       __le16 updated_sge_size;
-       __le16 updated_sge_offset;
-       __le32 byte_offset;
-};
-
-union iscsi_mstorm_sgl {
-       struct iscsi_sgl sgl_struct;
-       struct iscsi_sge single_sge;
-};
 
 enum iscsi_ramrod_cmd_id {
        ISCSI_RAMROD_CMD_ID_UNUSED = 0,
@@ -896,10 +844,10 @@ enum iscsi_ramrod_cmd_id {
 
 struct iscsi_reg1 {
        __le32 reg1_map;
-#define ISCSI_REG1_NUM_FAST_SGES_MASK  0x7
-#define ISCSI_REG1_NUM_FAST_SGES_SHIFT 0
-#define ISCSI_REG1_RESERVED1_MASK      0x1FFFFFFF
-#define ISCSI_REG1_RESERVED1_SHIFT     3
+#define ISCSI_REG1_NUM_SGES_MASK   0xF
+#define ISCSI_REG1_NUM_SGES_SHIFT  0
+#define ISCSI_REG1_RESERVED1_MASK  0xFFFFFFF
+#define ISCSI_REG1_RESERVED1_SHIFT 4
 };
 
 union iscsi_seq_num {
@@ -967,22 +915,33 @@ struct iscsi_spe_func_init {
 };
 
 struct ystorm_iscsi_task_state {
-       union iscsi_data_desc_ctx sgl_ctx_union;
-       __le32 buffer_offset[2];
-       __le16 bytes_nxt_dif;
-       __le16 rxmit_bytes_nxt_dif;
-       union iscsi_seq_num seq_num_union;
-       u8 dif_bytes_leftover;
-       u8 rxmit_dif_bytes_leftover;
-       __le16 reuse_count;
-       struct iscsi_dif_flags dif_flags;
-       u8 local_comp;
+       struct scsi_cached_sges data_desc;
+       struct scsi_sgl_params sgl_params;
        __le32 exp_r2t_sn;
-       __le32 sgl_offset[2];
+       __le32 buffer_offset;
+       union iscsi_seq_num seq_num;
+       struct iscsi_dif_flags dif_flags;
+       u8 flags;
+#define YSTORM_ISCSI_TASK_STATE_LOCAL_COMP_MASK  0x1
+#define YSTORM_ISCSI_TASK_STATE_LOCAL_COMP_SHIFT 0
+#define YSTORM_ISCSI_TASK_STATE_SLOW_IO_MASK     0x1
+#define YSTORM_ISCSI_TASK_STATE_SLOW_IO_SHIFT    1
+#define YSTORM_ISCSI_TASK_STATE_RESERVED0_MASK   0x3F
+#define YSTORM_ISCSI_TASK_STATE_RESERVED0_SHIFT  2
+};
+
+struct ystorm_iscsi_task_rxmit_opt {
+       __le32 fast_rxmit_sge_offset;
+       __le32 scan_start_buffer_offset;
+       __le32 fast_rxmit_buffer_offset;
+       u8 scan_start_sgl_index;
+       u8 fast_rxmit_sgl_index;
+       __le16 reserved;
 };
 
 struct ystorm_iscsi_task_st_ctx {
        struct ystorm_iscsi_task_state state;
+       struct ystorm_iscsi_task_rxmit_opt rxmit_opt;
        union iscsi_task_hdr pdu_hdr;
 };
 
@@ -1152,25 +1111,16 @@ struct ustorm_iscsi_task_ag_ctx {
 };
 
 struct mstorm_iscsi_task_st_ctx {
-       union iscsi_mstorm_sgl sgl_union;
-       struct iscsi_dif_flags dif_flags;
-       struct iscsi_mflags flags;
-       u8 sgl_size;
-       u8 host_sge_index;
-       __le16 dix_cur_sge_offset;
-       __le16 dix_cur_sge_size;
-       __le32 data_offset_rtid;
-       u8 dif_offset;
-       u8 dix_sgl_size;
-       u8 dix_sge_index;
+       struct scsi_cached_sges data_desc;
+       struct scsi_sgl_params sgl_params;
+       __le32 rem_task_size;
+       __le32 data_buffer_offset;
        u8 task_type;
+       struct iscsi_dif_flags dif_flags;
+       u8 reserved0[2];
        struct regpair sense_db;
-       struct regpair dix_sgl_cur_sge;
-       __le32 rem_task_size;
-       __le16 reuse_count;
-       __le16 dif_data_residue;
-       u8 reserved0[4];
-       __le32 reserved1[1];
+       __le32 expected_itt;
+       __le32 reserved1;
 };
 
 struct ustorm_iscsi_task_st_ctx {
@@ -1184,7 +1134,7 @@ struct ustorm_iscsi_task_st_ctx {
 #define USTORM_ISCSI_TASK_ST_CTX_AHS_EXIST_SHIFT            0
 #define USTORM_ISCSI_TASK_ST_CTX_RESERVED1_MASK             0x7F
 #define USTORM_ISCSI_TASK_ST_CTX_RESERVED1_SHIFT            1
-       u8 reserved2;
+       struct iscsi_dif_flags dif_flags;
        __le16 reserved3;
        __le32 reserved4;
        __le32 reserved5;
@@ -1207,10 +1157,10 @@ struct ustorm_iscsi_task_st_ctx {
 #define USTORM_ISCSI_TASK_ST_CTX_LOCAL_COMP_SHIFT           2
 #define USTORM_ISCSI_TASK_ST_CTX_Q0_R2TQE_WRITE_MASK        0x1
 #define USTORM_ISCSI_TASK_ST_CTX_Q0_R2TQE_WRITE_SHIFT       3
-#define USTORM_ISCSI_TASK_ST_CTX_TOTALDATAACKED_DONE_MASK   0x1
-#define USTORM_ISCSI_TASK_ST_CTX_TOTALDATAACKED_DONE_SHIFT  4
-#define USTORM_ISCSI_TASK_ST_CTX_HQSCANNED_DONE_MASK        0x1
-#define USTORM_ISCSI_TASK_ST_CTX_HQSCANNED_DONE_SHIFT       5
+#define USTORM_ISCSI_TASK_ST_CTX_TOTAL_DATA_ACKED_DONE_MASK  0x1
+#define USTORM_ISCSI_TASK_ST_CTX_TOTAL_DATA_ACKED_DONE_SHIFT 4
+#define USTORM_ISCSI_TASK_ST_CTX_HQ_SCANNED_DONE_MASK        0x1
+#define USTORM_ISCSI_TASK_ST_CTX_HQ_SCANNED_DONE_SHIFT       5
 #define USTORM_ISCSI_TASK_ST_CTX_R2T2RECV_DONE_MASK         0x1
 #define USTORM_ISCSI_TASK_ST_CTX_R2T2RECV_DONE_SHIFT        6
 #define USTORM_ISCSI_TASK_ST_CTX_RESERVED0_MASK             0x1
@@ -1220,7 +1170,6 @@ struct ustorm_iscsi_task_st_ctx {
 
 struct iscsi_task_context {
        struct ystorm_iscsi_task_st_ctx ystorm_st_context;
-       struct regpair ystorm_st_padding[2];
        struct ystorm_iscsi_task_ag_ctx ystorm_ag_context;
        struct regpair ystorm_ag_padding[2];
        struct tdif_task_context tdif_context;
@@ -1272,32 +1221,22 @@ struct iscsi_uhqe {
 #define ISCSI_UHQE_TASK_ID_LO_SHIFT         24
 };
 
-struct iscsi_wqe_field {
-       __le32 contlen_cdbsize_field;
-#define ISCSI_WQE_FIELD_CONT_LEN_MASK  0xFFFFFF
-#define ISCSI_WQE_FIELD_CONT_LEN_SHIFT 0
-#define ISCSI_WQE_FIELD_CDB_SIZE_MASK  0xFF
-#define ISCSI_WQE_FIELD_CDB_SIZE_SHIFT 24
-};
-
-union iscsi_wqe_field_union {
-       struct iscsi_wqe_field cont_field;
-       __le32 prev_tid;
-};
 
 struct iscsi_wqe {
        __le16 task_id;
        u8 flags;
 #define ISCSI_WQE_WQE_TYPE_MASK        0x7
 #define ISCSI_WQE_WQE_TYPE_SHIFT       0
-#define ISCSI_WQE_NUM_FAST_SGES_MASK   0x7
-#define ISCSI_WQE_NUM_FAST_SGES_SHIFT  3
-#define ISCSI_WQE_PTU_INVALIDATE_MASK  0x1
-#define ISCSI_WQE_PTU_INVALIDATE_SHIFT 6
+#define ISCSI_WQE_NUM_SGES_MASK  0xF
+#define ISCSI_WQE_NUM_SGES_SHIFT 3
 #define ISCSI_WQE_RESPONSE_MASK        0x1
 #define ISCSI_WQE_RESPONSE_SHIFT       7
        struct iscsi_dif_flags prot_flags;
-       union iscsi_wqe_field_union cont_prevtid_union;
+       __le32 contlen_cdbsize;
+#define ISCSI_WQE_CONT_LEN_MASK  0xFFFFFF
+#define ISCSI_WQE_CONT_LEN_SHIFT 0
+#define ISCSI_WQE_CDB_SIZE_MASK  0xFF
+#define ISCSI_WQE_CDB_SIZE_SHIFT 24
 };
 
 enum iscsi_wqe_type {
@@ -1318,17 +1257,15 @@ struct iscsi_xhqe {
        u8 total_ahs_length;
        u8 opcode;
        u8 flags;
-#define ISCSI_XHQE_NUM_FAST_SGES_MASK  0x7
-#define ISCSI_XHQE_NUM_FAST_SGES_SHIFT 0
-#define ISCSI_XHQE_FINAL_MASK          0x1
-#define ISCSI_XHQE_FINAL_SHIFT         3
-#define ISCSI_XHQE_SUPER_IO_MASK       0x1
-#define ISCSI_XHQE_SUPER_IO_SHIFT      4
-#define ISCSI_XHQE_STATUS_BIT_MASK     0x1
-#define ISCSI_XHQE_STATUS_BIT_SHIFT    5
-#define ISCSI_XHQE_RESERVED_MASK       0x3
-#define ISCSI_XHQE_RESERVED_SHIFT      6
-       union iscsi_seq_num seq_num_union;
+#define ISCSI_XHQE_FINAL_MASK       0x1
+#define ISCSI_XHQE_FINAL_SHIFT      0
+#define ISCSI_XHQE_STATUS_BIT_MASK  0x1
+#define ISCSI_XHQE_STATUS_BIT_SHIFT 1
+#define ISCSI_XHQE_NUM_SGES_MASK    0xF
+#define ISCSI_XHQE_NUM_SGES_SHIFT   2
+#define ISCSI_XHQE_RESERVED0_MASK   0x3
+#define ISCSI_XHQE_RESERVED0_SHIFT  6
+       union iscsi_seq_num seq_num;
        __le16 reserved1;
 };
 
index fde56c436f7177e27173656f4fa4480752351c6d..625f80f08f91100b2cf0a7ffc2232868c82843a7 100644 (file)
@@ -263,7 +263,6 @@ struct qed_rdma_pf_params {
         * the doorbell BAR).
         */
        u32 min_dpis;           /* number of requested DPIs */
-       u32 num_mrs;            /* number of requested memory regions */
        u32 num_qps;            /* number of requested Queue Pairs */
        u32 num_srqs;           /* number of requested SRQ */
        u8 roce_edpm_mode;      /* see QED_ROCE_EDPM_MODE_ENABLE */
@@ -300,6 +299,11 @@ struct qed_sb_info {
        struct qed_dev          *cdev;
 };
 
+enum qed_dev_type {
+       QED_DEV_TYPE_BB,
+       QED_DEV_TYPE_AH,
+};
+
 struct qed_dev_info {
        unsigned long   pci_mem_start;
        unsigned long   pci_mem_end;
@@ -325,6 +329,8 @@ struct qed_dev_info {
        u16             mtu;
 
        bool wol_support;
+
+       enum qed_dev_type dev_type;
 };
 
 enum qed_sb_type {
@@ -752,7 +758,7 @@ enum qed_mf_mode {
        QED_MF_NPAR,
 };
 
-struct qed_eth_stats {
+struct qed_eth_stats_common {
        u64     no_buff_discards;
        u64     packet_too_big_discard;
        u64     ttl0_discard;
@@ -784,11 +790,6 @@ struct qed_eth_stats {
        u64     rx_256_to_511_byte_packets;
        u64     rx_512_to_1023_byte_packets;
        u64     rx_1024_to_1518_byte_packets;
-       u64     rx_1519_to_1522_byte_packets;
-       u64     rx_1519_to_2047_byte_packets;
-       u64     rx_2048_to_4095_byte_packets;
-       u64     rx_4096_to_9216_byte_packets;
-       u64     rx_9217_to_16383_byte_packets;
        u64     rx_crc_errors;
        u64     rx_mac_crtl_frames;
        u64     rx_pause_frames;
@@ -805,14 +806,8 @@ struct qed_eth_stats {
        u64     tx_256_to_511_byte_packets;
        u64     tx_512_to_1023_byte_packets;
        u64     tx_1024_to_1518_byte_packets;
-       u64     tx_1519_to_2047_byte_packets;
-       u64     tx_2048_to_4095_byte_packets;
-       u64     tx_4096_to_9216_byte_packets;
-       u64     tx_9217_to_16383_byte_packets;
        u64     tx_pause_frames;
        u64     tx_pfc_frames;
-       u64     tx_lpi_entry_count;
-       u64     tx_total_collisions;
        u64     brb_truncates;
        u64     brb_discards;
        u64     rx_mac_bytes;
@@ -827,6 +822,34 @@ struct qed_eth_stats {
        u64     tx_mac_ctrl_frames;
 };
 
+struct qed_eth_stats_bb {
+       u64 rx_1519_to_1522_byte_packets;
+       u64 rx_1519_to_2047_byte_packets;
+       u64 rx_2048_to_4095_byte_packets;
+       u64 rx_4096_to_9216_byte_packets;
+       u64 rx_9217_to_16383_byte_packets;
+       u64 tx_1519_to_2047_byte_packets;
+       u64 tx_2048_to_4095_byte_packets;
+       u64 tx_4096_to_9216_byte_packets;
+       u64 tx_9217_to_16383_byte_packets;
+       u64 tx_lpi_entry_count;
+       u64 tx_total_collisions;
+};
+
+struct qed_eth_stats_ah {
+       u64 rx_1519_to_max_byte_packets;
+       u64 tx_1519_to_max_byte_packets;
+};
+
+struct qed_eth_stats {
+       struct qed_eth_stats_common common;
+
+       union {
+               struct qed_eth_stats_bb bb;
+               struct qed_eth_stats_ah ah;
+       };
+};
+
 #define QED_SB_IDX              0x0002
 
 #define RX_PI           0
index f773aa5e746ff47bb886aa19f568a24108dd0d1e..72c770f9f6669a5169f1780f8cae524bc6c3e0b4 100644 (file)
@@ -52,7 +52,8 @@
 #define RDMA_MAX_PDS                            (64 * 1024)
 
 #define RDMA_NUM_STATISTIC_COUNTERS                     MAX_NUM_VPORTS
-#define RDMA_NUM_STATISTIC_COUNTERS_BB                 MAX_NUM_VPORTS_BB
+#define RDMA_NUM_STATISTIC_COUNTERS_K2                  MAX_NUM_VPORTS_K2
+#define RDMA_NUM_STATISTIC_COUNTERS_BB                  MAX_NUM_VPORTS_BB
 
 #define RDMA_TASK_TYPE (PROTOCOLID_ROCE)
 
index bad02df213dfccd11cd25fa1b1e0decaf17f92c6..866f063026dedc6540d87d595bcacf9071f14681 100644 (file)
 
 #define ROCE_MAX_QPS   (32 * 1024)
 
+enum roce_async_events_type {
+       ROCE_ASYNC_EVENT_NONE = 0,
+       ROCE_ASYNC_EVENT_COMM_EST = 1,
+       ROCE_ASYNC_EVENT_SQ_DRAINED,
+       ROCE_ASYNC_EVENT_SRQ_LIMIT,
+       ROCE_ASYNC_EVENT_LAST_WQE_REACHED,
+       ROCE_ASYNC_EVENT_CQ_ERR,
+       ROCE_ASYNC_EVENT_LOCAL_INVALID_REQUEST_ERR,
+       ROCE_ASYNC_EVENT_LOCAL_CATASTROPHIC_ERR,
+       ROCE_ASYNC_EVENT_LOCAL_ACCESS_ERR,
+       ROCE_ASYNC_EVENT_QP_CATASTROPHIC_ERR,
+       ROCE_ASYNC_EVENT_CQ_OVERFLOW_ERR,
+       ROCE_ASYNC_EVENT_SRQ_EMPTY,
+       ROCE_ASYNC_EVENT_DESTROY_QP_DONE,
+       MAX_ROCE_ASYNC_EVENTS_TYPE
+};
+
 #endif /* __ROCE_COMMON__ */
index 03f3e37ab059d5e4b48aa2b7f80366016b27fdf5..08df82a096b62de80e51f34122f4809f0eb6d27e 100644 (file)
@@ -40,6 +40,8 @@
 #define BDQ_ID_IMM_DATA          (1)
 #define BDQ_NUM_IDS          (2)
 
+#define SCSI_NUM_SGES_SLOW_SGL_THR      8
+
 #define BDQ_MAX_EXTERNAL_RING_SIZE (1 << 15)
 
 struct scsi_bd {
@@ -52,6 +54,16 @@ struct scsi_bdq_ram_drv_data {
        __le16 reserved0[3];
 };
 
+struct scsi_sge {
+       struct regpair sge_addr;
+       __le32 sge_len;
+       __le32 reserved;
+};
+
+struct scsi_cached_sges {
+       struct scsi_sge sge[4];
+};
+
 struct scsi_drv_cmdq {
        __le16 cmdq_cons;
        __le16 reserved0;
@@ -99,11 +111,19 @@ struct scsi_ram_per_bdq_resource_drv_data {
        struct scsi_bdq_ram_drv_data drv_data_per_bdq_id[BDQ_NUM_IDS];
 };
 
-struct scsi_sge {
-       struct regpair sge_addr;
-       __le16 sge_len;
-       __le16 reserved0;
-       __le32 reserved1;
+enum scsi_sgl_mode {
+       SCSI_TX_SLOW_SGL,
+       SCSI_FAST_SGL,
+       MAX_SCSI_SGL_MODE
+};
+
+struct scsi_sgl_params {
+       struct regpair sgl_addr;
+       __le32 sgl_total_length;
+       __le32 sge_offset;
+       __le16 sgl_num_sges;
+       u8 sgl_index;
+       u8 reserved;
 };
 
 struct scsi_terminate_extra_params {
index 46fe7856f1b22c828474257ceedf03c182958ec5..a5e843268f0e9431eacd07ad5cc74e10be690b0d 100644 (file)
@@ -173,6 +173,7 @@ enum tcp_seg_placement_event {
        TCP_EVENT_ADD_ISLE_RIGHT,
        TCP_EVENT_ADD_ISLE_LEFT,
        TCP_EVENT_JOIN,
+       TCP_EVENT_DELETE_ISLES,
        TCP_EVENT_NOP,
        MAX_TCP_SEG_PLACEMENT_EVENT
 };
index 092292b6675e2cf08b1138410a8488bc87986495..e507290cd2c7736ff3e84527a87748dfdc504d9c 100644 (file)
@@ -915,6 +915,28 @@ static inline int rhashtable_lookup_insert_fast(
        return ret == NULL ? 0 : -EEXIST;
 }
 
+/**
+ * rhashtable_lookup_get_insert_fast - lookup and insert object into hash table
+ * @ht:                hash table
+ * @obj:       pointer to hash head inside object
+ * @params:    hash table parameters
+ *
+ * Just like rhashtable_lookup_insert_fast(), but this function returns the
+ * object if it exists, NULL if it did not and the insertion was successful,
+ * and an ERR_PTR otherwise.
+ */
+static inline void *rhashtable_lookup_get_insert_fast(
+       struct rhashtable *ht, struct rhash_head *obj,
+       const struct rhashtable_params params)
+{
+       const char *key = rht_obj(ht, obj);
+
+       BUG_ON(ht->p.obj_hashfn);
+
+       return __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, params,
+                                       false);
+}
+
 /**
  * rhashtable_lookup_insert_key - search and insert object to hash table
  *                               with explicit key
index 8ec8b6439b25edb956bb06bcf116c190a46ce62c..f27917e0a10114f9c72e228c42b6f60efd51b541 100644 (file)
@@ -6,7 +6,7 @@
 
 struct qcom_smd_edge;
 
-#if IS_ENABLED(CONFIG_RPMSG_QCOM_SMD) || IS_ENABLED(CONFIG_QCOM_SMD)
+#if IS_ENABLED(CONFIG_RPMSG_QCOM_SMD)
 
 struct qcom_smd_edge *qcom_smd_register_edge(struct device *parent,
                                             struct device_node *node);
diff --git a/include/linux/soc/qcom/smd.h b/include/linux/soc/qcom/smd.h
deleted file mode 100644 (file)
index f148e0f..0000000
+++ /dev/null
@@ -1,139 +0,0 @@
-#ifndef __QCOM_SMD_H__
-#define __QCOM_SMD_H__
-
-#include <linux/device.h>
-#include <linux/mod_devicetable.h>
-
-struct qcom_smd;
-struct qcom_smd_channel;
-struct qcom_smd_lookup;
-
-/**
- * struct qcom_smd_id - struct used for matching a smd device
- * @name:      name of the channel
- */
-struct qcom_smd_id {
-       char name[20];
-};
-
-/**
- * struct qcom_smd_device - smd device struct
- * @dev:       the device struct
- * @channel:   handle to the smd channel for this device
- */
-struct qcom_smd_device {
-       struct device dev;
-       struct qcom_smd_channel *channel;
-};
-
-typedef int (*qcom_smd_cb_t)(struct qcom_smd_channel *, const void *, size_t);
-
-/**
- * struct qcom_smd_driver - smd driver struct
- * @driver:    underlying device driver
- * @smd_match_table: static channel match table
- * @probe:     invoked when the smd channel is found
- * @remove:    invoked when the smd channel is closed
- * @callback:  invoked when an inbound message is received on the channel,
- *             should return 0 on success or -EBUSY if the data cannot be
- *             consumed at this time
- */
-struct qcom_smd_driver {
-       struct device_driver driver;
-       const struct qcom_smd_id *smd_match_table;
-
-       int (*probe)(struct qcom_smd_device *dev);
-       void (*remove)(struct qcom_smd_device *dev);
-       qcom_smd_cb_t callback;
-};
-
-#if IS_ENABLED(CONFIG_QCOM_SMD)
-
-int qcom_smd_driver_register(struct qcom_smd_driver *drv);
-void qcom_smd_driver_unregister(struct qcom_smd_driver *drv);
-
-struct qcom_smd_channel *qcom_smd_open_channel(struct qcom_smd_channel *channel,
-                                              const char *name,
-                                              qcom_smd_cb_t cb);
-void qcom_smd_close_channel(struct qcom_smd_channel *channel);
-void *qcom_smd_get_drvdata(struct qcom_smd_channel *channel);
-void qcom_smd_set_drvdata(struct qcom_smd_channel *channel, void *data);
-int qcom_smd_send(struct qcom_smd_channel *channel, const void *data, int len);
-
-
-struct qcom_smd_edge *qcom_smd_register_edge(struct device *parent,
-                                            struct device_node *node);
-int qcom_smd_unregister_edge(struct qcom_smd_edge *edge);
-
-#else
-
-static inline int qcom_smd_driver_register(struct qcom_smd_driver *drv)
-{
-       return -ENXIO;
-}
-
-static inline void qcom_smd_driver_unregister(struct qcom_smd_driver *drv)
-{
-       /* This shouldn't be possible */
-       WARN_ON(1);
-}
-
-static inline struct qcom_smd_channel *
-qcom_smd_open_channel(struct qcom_smd_channel *channel,
-                     const char *name,
-                     qcom_smd_cb_t cb)
-{
-       /* This shouldn't be possible */
-       WARN_ON(1);
-       return NULL;
-}
-
-static inline void qcom_smd_close_channel(struct qcom_smd_channel *channel)
-{
-       /* This shouldn't be possible */
-       WARN_ON(1);
-}
-
-static inline void *qcom_smd_get_drvdata(struct qcom_smd_channel *channel)
-{
-       /* This shouldn't be possible */
-       WARN_ON(1);
-       return NULL;
-}
-
-static inline void qcom_smd_set_drvdata(struct qcom_smd_channel *channel, void *data)
-{
-       /* This shouldn't be possible */
-       WARN_ON(1);
-}
-
-static inline int qcom_smd_send(struct qcom_smd_channel *channel,
-                               const void *data, int len)
-{
-       /* This shouldn't be possible */
-       WARN_ON(1);
-       return -ENXIO;
-}
-
-static inline struct qcom_smd_edge *
-qcom_smd_register_edge(struct device *parent,
-                      struct device_node *node)
-{
-       return ERR_PTR(-ENXIO);
-}
-
-static inline int qcom_smd_unregister_edge(struct qcom_smd_edge *edge)
-{
-       /* This shouldn't be possible */
-       WARN_ON(1);
-       return -ENXIO;
-}
-
-#endif
-
-#define module_qcom_smd_driver(__smd_driver) \
-       module_driver(__smd_driver, qcom_smd_driver_register, \
-                     qcom_smd_driver_unregister)
-
-
-#endif
index eab64976a73b0e1aa2c15de6a06ae65e1333de99..a4dd4d7c711dc000fc3f1fe93a794dbff245b418 100644 (file)
@@ -1,16 +1,19 @@
 #ifndef __WCNSS_CTRL_H__
 #define __WCNSS_CTRL_H__
 
-#include <linux/soc/qcom/smd.h>
+#include <linux/rpmsg.h>
 
 #if IS_ENABLED(CONFIG_QCOM_WCNSS_CTRL)
 
-struct qcom_smd_channel *qcom_wcnss_open_channel(void *wcnss, const char *name, qcom_smd_cb_t cb);
+struct rpmsg_endpoint *qcom_wcnss_open_channel(void *wcnss, const char *name,
+                                              rpmsg_rx_cb_t cb, void *priv);
 
 #else
 
-static inline struct qcom_smd_channel*
-qcom_wcnss_open_channel(void *wcnss, const char *name, qcom_smd_cb_t cb)
+static struct rpmsg_endpoint *qcom_wcnss_open_channel(void *wcnss,
+                                                     const char *name,
+                                                     rpmsg_rx_cb_t cb,
+                                                     void *priv)
 {
        WARN_ON(1);
        return ERR_PTR(-ENXIO);
index a0596ca0e80ac77aeb0afa29648532ef51a5deae..a2f8109bb215751427e99b3c026badfe7113b875 100644 (file)
@@ -24,6 +24,7 @@ void sock_diag_unregister(const struct sock_diag_handler *h);
 void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh));
 void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh));
 
+u64 sock_gen_cookie(struct sock *sk);
 int sock_diag_check_cookie(struct sock *sk, const __u32 *cookie);
 void sock_diag_save_cookie(struct sock *sk, __u32 *cookie);
 
index fc273e9d5f67625b9ddf611746d1a09ff555e4ab..3921cb9dfadb9b4a9bbe30854ccada75725b8005 100644 (file)
@@ -28,6 +28,9 @@
 
 #include <linux/platform_device.h>
 
+#define MTL_MAX_RX_QUEUES      8
+#define MTL_MAX_TX_QUEUES      8
+
 #define STMMAC_RX_COE_NONE     0
 #define STMMAC_RX_COE_TYPE1    1
 #define STMMAC_RX_COE_TYPE2    2
 #define        STMMAC_CSR_150_250M     0x4     /* MDC = clk_scr_i/102 */
 #define        STMMAC_CSR_250_300M     0x5     /* MDC = clk_scr_i/122 */
 
+/* MTL algorithms identifiers */
+#define MTL_TX_ALGORITHM_WRR   0x0
+#define MTL_TX_ALGORITHM_WFQ   0x1
+#define MTL_TX_ALGORITHM_DWRR  0x2
+#define MTL_TX_ALGORITHM_SP    0x3
+#define MTL_RX_ALGORITHM_SP    0x4
+#define MTL_RX_ALGORITHM_WSP   0x5
+
+/* RX/TX Queue Mode */
+#define MTL_QUEUE_AVB          0x0
+#define MTL_QUEUE_DCB          0x1
+
 /* The MDC clock could be set higher than the IEEE 802.3
  * specified frequency limit 0f 2.5 MHz, by programming a clock divider
  * of value different than the above defined values. The resultant MDIO
@@ -109,6 +124,26 @@ struct stmmac_axi {
        bool axi_rb;
 };
 
+struct stmmac_rxq_cfg {
+       u8 mode_to_use;
+       u8 chan;
+       u8 pkt_route;
+       bool use_prio;
+       u32 prio;
+};
+
+struct stmmac_txq_cfg {
+       u8 weight;
+       u8 mode_to_use;
+       /* Credit Base Shaper parameters */
+       u32 send_slope;
+       u32 idle_slope;
+       u32 high_credit;
+       u32 low_credit;
+       bool use_prio;
+       u32 prio;
+};
+
 struct plat_stmmacenet_data {
        int bus_id;
        int phy_addr;
@@ -133,6 +168,12 @@ struct plat_stmmacenet_data {
        int unicast_filter_entries;
        int tx_fifo_size;
        int rx_fifo_size;
+       u8 rx_queues_to_use;
+       u8 tx_queues_to_use;
+       u8 rx_sched_algorithm;
+       u8 tx_sched_algorithm;
+       struct stmmac_rxq_cfg rx_queues_cfg[MTL_MAX_RX_QUEUES];
+       struct stmmac_txq_cfg tx_queues_cfg[MTL_MAX_TX_QUEUES];
        void (*fix_mac_speed)(void *priv, unsigned int speed);
        int (*init)(struct platform_device *pdev, void *priv);
        void (*exit)(struct platform_device *pdev, void *priv);
index c0f530809d1f3db7323e51a52224eb49d8f97da0..6cb4061a720d2df5e5f9467de8269529195ce827 100644 (file)
@@ -115,6 +115,6 @@ static inline bool udp_get_no_check6_rx(struct sock *sk)
 #define udp_portaddr_for_each_entry_rcu(__sk, list) \
        hlist_for_each_entry_rcu(__sk, list, __sk_common.skc_portaddr_node)
 
-#define IS_UDPLITE(__sk) (udp_sk(__sk)->pcflag)
+#define IS_UDPLITE(__sk) (__sk->sk_protocol == IPPROTO_UDPLITE)
 
 #endif /* _LINUX_UDP_H */
index 6e0ce8c7b8cb5a9fcb985a5a5078f82267d03092..7dffa5624ea62bee992e547c44ed4a86a577b0a7 100644 (file)
@@ -64,6 +64,8 @@ struct usbnet {
        struct usb_anchor       deferred;
        struct tasklet_struct   bh;
 
+       struct pcpu_sw_netstats __percpu *stats64;
+
        struct work_struct      kevent;
        unsigned long           flags;
 #              define EVENT_TX_HALT    0
@@ -261,10 +263,10 @@ extern void usbnet_pause_rx(struct usbnet *);
 extern void usbnet_resume_rx(struct usbnet *);
 extern void usbnet_purge_paused_rxq(struct usbnet *);
 
-extern int usbnet_get_settings(struct net_device *net,
-                              struct ethtool_cmd *cmd);
-extern int usbnet_set_settings(struct net_device *net,
-                              struct ethtool_cmd *cmd);
+extern int usbnet_get_link_ksettings(struct net_device *net,
+                                    struct ethtool_link_ksettings *cmd);
+extern int usbnet_set_link_ksettings(struct net_device *net,
+                                    const struct ethtool_link_ksettings *cmd);
 extern u32 usbnet_get_link(struct net_device *net);
 extern u32 usbnet_get_msglevel(struct net_device *);
 extern void usbnet_set_msglevel(struct net_device *, u32);
@@ -278,5 +280,7 @@ extern int usbnet_status_start(struct usbnet *dev, gfp_t mem_flags);
 extern void usbnet_status_stop(struct usbnet *dev);
 
 extern void usbnet_update_max_qlen(struct usbnet *dev);
+extern void usbnet_get_stats64(struct net_device *dev,
+                              struct rtnl_link_stats64 *stats);
 
 #endif /* __LINUX_USB_USBNET_H */
index 9638bfeb0d1f639ae310d1586b4e2fca567ba2f7..584f9a647ad4acca191ff6116a47c14da1385fa3 100644 (file)
@@ -48,6 +48,8 @@ struct virtio_vsock_pkt {
        struct virtio_vsock_hdr hdr;
        struct work_struct work;
        struct list_head list;
+       /* socket refcnt not held, only use for cancellation */
+       struct vsock_sock *vsk;
        void *buf;
        u32 len;
        u32 off;
@@ -56,6 +58,7 @@ struct virtio_vsock_pkt {
 
 struct virtio_vsock_pkt_info {
        u32 remote_cid, remote_port;
+       struct vsock_sock *vsk;
        struct msghdr *msg;
        u32 pkt_len;
        u16 type;
index 17c6fd84e287808eca08503b87a56a5d6fc0cc5c..1aeb25dd42a7d07282308f1995888ecdf01f99b8 100644 (file)
@@ -262,8 +262,8 @@ int register_inet6addr_notifier(struct notifier_block *nb);
 int unregister_inet6addr_notifier(struct notifier_block *nb);
 int inet6addr_notifier_call_chain(unsigned long val, void *v);
 
-void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex,
-                                 struct ipv6_devconf *devconf);
+void inet6_netconf_notify_devconf(struct net *net, int event, int type,
+                                 int ifindex, struct ipv6_devconf *devconf);
 
 /**
  * __in6_dev_get - get inet6_dev pointer from netdevice
index f2758964ce6f890e3b11df5ba5bf2eefe636abd1..f32ed9ac181a47c00757596fc3b8c5733426c468 100644 (file)
@@ -100,6 +100,9 @@ struct vsock_transport {
        void (*destruct)(struct vsock_sock *);
        void (*release)(struct vsock_sock *);
 
+       /* Cancel all pending packets sent on vsock. */
+       int (*cancel_pkt)(struct vsock_sock *vsk);
+
        /* Connections. */
        int (*connect)(struct vsock_sock *);
 
index 3c857778a6ca6870f7e7d5604adcd263380e4708..fb2dd97857c4f6ca773005f09e13ffb9e6264797 100644 (file)
@@ -153,7 +153,8 @@ struct slave {
        unsigned long last_link_up;
        unsigned long last_rx;
        unsigned long target_last_arp_rx[BOND_MAX_ARP_TARGETS];
-       s8     link;    /* one of BOND_LINK_XXXX */
+       s8     link;            /* one of BOND_LINK_XXXX */
+       s8     link_new_state;  /* one of BOND_LINK_XXXX */
        s8     new_link;
        u8     backup:1,   /* indicates backup slave. Value corresponds with
                              BOND_STATE_ACTIVE and BOND_STATE_BACKUP */
@@ -504,13 +505,17 @@ static inline bool bond_is_slave_inactive(struct slave *slave)
        return slave->inactive;
 }
 
-static inline void bond_set_slave_link_state(struct slave *slave, int state,
-                                            bool notify)
+static inline void bond_propose_link_state(struct slave *slave, int state)
+{
+       slave->link_new_state = state;
+}
+
+static inline void bond_commit_link_state(struct slave *slave, bool notify)
 {
-       if (slave->link == state)
+       if (slave->link == slave->link_new_state)
                return;
 
-       slave->link = state;
+       slave->link = slave->link_new_state;
        if (notify) {
                bond_queue_slave_event(slave);
                bond_lower_state_changed(slave);
@@ -523,6 +528,13 @@ static inline void bond_set_slave_link_state(struct slave *slave, int state,
        }
 }
 
+static inline void bond_set_slave_link_state(struct slave *slave, int state,
+                                            bool notify)
+{
+       bond_propose_link_state(slave, state);
+       bond_commit_link_state(slave, notify);
+}
+
 static inline void bond_slave_link_notify(struct bonding *bond)
 {
        struct list_head *iter;
index c0452de83086e51738a249bea5fa86d6fb121760..8ffd434676b7a270af73534f3dbcc26f370fe215 100644 (file)
@@ -35,83 +35,101 @@ struct napi_struct;
 extern unsigned int sysctl_net_busy_read __read_mostly;
 extern unsigned int sysctl_net_busy_poll __read_mostly;
 
+/*             0 - Reserved to indicate value not set
+ *     1..NR_CPUS - Reserved for sender_cpu
+ *  NR_CPUS+1..~0 - Region available for NAPI IDs
+ */
+#define MIN_NAPI_ID ((unsigned int)(NR_CPUS + 1))
+
 static inline bool net_busy_loop_on(void)
 {
        return sysctl_net_busy_poll;
 }
 
-static inline u64 busy_loop_us_clock(void)
+static inline bool sk_can_busy_loop(const struct sock *sk)
 {
-       return local_clock() >> 10;
+       return sk->sk_ll_usec && !signal_pending(current);
 }
 
-static inline unsigned long sk_busy_loop_end_time(struct sock *sk)
-{
-       return busy_loop_us_clock() + ACCESS_ONCE(sk->sk_ll_usec);
-}
+bool sk_busy_loop_end(void *p, unsigned long start_time);
 
-/* in poll/select we use the global sysctl_net_ll_poll value */
-static inline unsigned long busy_loop_end_time(void)
+void napi_busy_loop(unsigned int napi_id,
+                   bool (*loop_end)(void *, unsigned long),
+                   void *loop_end_arg);
+
+#else /* CONFIG_NET_RX_BUSY_POLL */
+static inline unsigned long net_busy_loop_on(void)
 {
-       return busy_loop_us_clock() + ACCESS_ONCE(sysctl_net_busy_poll);
+       return 0;
 }
 
-static inline bool sk_can_busy_loop(const struct sock *sk)
+static inline bool sk_can_busy_loop(struct sock *sk)
 {
-       return sk->sk_ll_usec && sk->sk_napi_id && !signal_pending(current);
+       return false;
 }
 
+#endif /* CONFIG_NET_RX_BUSY_POLL */
 
-static inline bool busy_loop_timeout(unsigned long end_time)
+static inline unsigned long busy_loop_current_time(void)
 {
-       unsigned long now = busy_loop_us_clock();
-
-       return time_after(now, end_time);
+#ifdef CONFIG_NET_RX_BUSY_POLL
+       return (unsigned long)(local_clock() >> 10);
+#else
+       return 0;
+#endif
 }
 
-bool sk_busy_loop(struct sock *sk, int nonblock);
-
-/* used in the NIC receive handler to mark the skb */
-static inline void skb_mark_napi_id(struct sk_buff *skb,
-                                   struct napi_struct *napi)
+/* in poll/select we use the global sysctl_net_ll_poll value */
+static inline bool busy_loop_timeout(unsigned long start_time)
 {
-       skb->napi_id = napi->napi_id;
-}
+#ifdef CONFIG_NET_RX_BUSY_POLL
+       unsigned long bp_usec = READ_ONCE(sysctl_net_busy_poll);
 
+       if (bp_usec) {
+               unsigned long end_time = start_time + bp_usec;
+               unsigned long now = busy_loop_current_time();
 
-#else /* CONFIG_NET_RX_BUSY_POLL */
-static inline unsigned long net_busy_loop_on(void)
-{
-       return 0;
+               return time_after(now, end_time);
+       }
+#endif
+       return true;
 }
 
-static inline unsigned long busy_loop_end_time(void)
+static inline bool sk_busy_loop_timeout(struct sock *sk,
+                                       unsigned long start_time)
 {
-       return 0;
-}
+#ifdef CONFIG_NET_RX_BUSY_POLL
+       unsigned long bp_usec = READ_ONCE(sk->sk_ll_usec);
 
-static inline bool sk_can_busy_loop(struct sock *sk)
-{
-       return false;
-}
+       if (bp_usec) {
+               unsigned long end_time = start_time + bp_usec;
+               unsigned long now = busy_loop_current_time();
 
-static inline void skb_mark_napi_id(struct sk_buff *skb,
-                                   struct napi_struct *napi)
-{
+               return time_after(now, end_time);
+       }
+#endif
+       return true;
 }
 
-static inline bool busy_loop_timeout(unsigned long end_time)
+static inline void sk_busy_loop(struct sock *sk, int nonblock)
 {
-       return true;
+#ifdef CONFIG_NET_RX_BUSY_POLL
+       unsigned int napi_id = READ_ONCE(sk->sk_napi_id);
+
+       if (napi_id >= MIN_NAPI_ID)
+               napi_busy_loop(napi_id, nonblock ? NULL : sk_busy_loop_end, sk);
+#endif
 }
 
-static inline bool sk_busy_loop(struct sock *sk, int nonblock)
+/* used in the NIC receive handler to mark the skb */
+static inline void skb_mark_napi_id(struct sk_buff *skb,
+                                   struct napi_struct *napi)
 {
-       return false;
+#ifdef CONFIG_NET_RX_BUSY_POLL
+       skb->napi_id = napi->napi_id;
+#endif
 }
 
-#endif /* CONFIG_NET_RX_BUSY_POLL */
-
 /* used in the protocol hanlder to propagate the napi_id to the socket */
 static inline void sk_mark_napi_id(struct sock *sk, const struct sk_buff *skb)
 {
index d29e5fc8258216b9d79604bc99b69b66cce3e443..24de13f8c94f763bc63313fce444ee516acdef3a 100644 (file)
@@ -25,6 +25,8 @@ struct devlink {
        struct list_head list;
        struct list_head port_list;
        struct list_head sb_list;
+       struct list_head dpipe_table_list;
+       struct devlink_dpipe_headers *dpipe_headers;
        const struct devlink_ops *ops;
        struct device *dev;
        possible_net_t _net;
@@ -49,6 +51,178 @@ struct devlink_sb_pool_info {
        enum devlink_sb_threshold_type threshold_type;
 };
 
+/**
+ * struct devlink_dpipe_field - dpipe field object
+ * @name: field name
+ * @id: index inside the headers field array
+ * @bitwidth: bitwidth
+ * @mapping_type: mapping type
+ */
+struct devlink_dpipe_field {
+       const char *name;
+       unsigned int id;
+       unsigned int bitwidth;
+       enum devlink_dpipe_field_mapping_type mapping_type;
+};
+
+/**
+ * struct devlink_dpipe_header - dpipe header object
+ * @name: header name
+ * @id: index, global/local detrmined by global bit
+ * @fields: fields
+ * @fields_count: number of fields
+ * @global: indicates if header is shared like most protocol header
+ *         or driver specific
+ */
+struct devlink_dpipe_header {
+       const char *name;
+       unsigned int id;
+       struct devlink_dpipe_field *fields;
+       unsigned int fields_count;
+       bool global;
+};
+
+/**
+ * struct devlink_dpipe_match - represents match operation
+ * @type: type of match
+ * @header_index: header index (packets can have several headers of same
+ *               type like in case of tunnels)
+ * @header: header
+ * @fieled_id: field index
+ */
+struct devlink_dpipe_match {
+       enum devlink_dpipe_match_type type;
+       unsigned int header_index;
+       struct devlink_dpipe_header *header;
+       unsigned int field_id;
+};
+
+/**
+ * struct devlink_dpipe_action - represents action operation
+ * @type: type of action
+ * @header_index: header index (packets can have several headers of same
+ *               type like in case of tunnels)
+ * @header: header
+ * @fieled_id: field index
+ */
+struct devlink_dpipe_action {
+       enum devlink_dpipe_action_type type;
+       unsigned int header_index;
+       struct devlink_dpipe_header *header;
+       unsigned int field_id;
+};
+
+/**
+ * struct devlink_dpipe_value - represents value of match/action
+ * @action: action
+ * @match: match
+ * @mapping_value: in case the field has some mapping this value
+ *                 specified the mapping value
+ * @mapping_valid: specify if mapping value is valid
+ * @value_size: value size
+ * @value: value
+ * @mask: bit mask
+ */
+struct devlink_dpipe_value {
+       union {
+               struct devlink_dpipe_action *action;
+               struct devlink_dpipe_match *match;
+       };
+       unsigned int mapping_value;
+       bool mapping_valid;
+       unsigned int value_size;
+       void *value;
+       void *mask;
+};
+
+/**
+ * struct devlink_dpipe_entry - table entry object
+ * @index: index of the entry in the table
+ * @match_values: match values
+ * @matche_values_count: count of matches tuples
+ * @action_values: actions values
+ * @action_values_count: count of actions values
+ * @counter: value of counter
+ * @counter_valid: Specify if value is valid from hardware
+ */
+struct devlink_dpipe_entry {
+       u64 index;
+       struct devlink_dpipe_value *match_values;
+       unsigned int match_values_count;
+       struct devlink_dpipe_value *action_values;
+       unsigned int action_values_count;
+       u64 counter;
+       bool counter_valid;
+};
+
+/**
+ * struct devlink_dpipe_dump_ctx - context provided to driver in order
+ *                                to dump
+ * @info: info
+ * @cmd: devlink command
+ * @skb: skb
+ * @nest: top attribute
+ * @hdr: hdr
+ */
+struct devlink_dpipe_dump_ctx {
+       struct genl_info *info;
+       enum devlink_command cmd;
+       struct sk_buff *skb;
+       struct nlattr *nest;
+       void *hdr;
+};
+
+struct devlink_dpipe_table_ops;
+
+/**
+ * struct devlink_dpipe_table - table object
+ * @priv: private
+ * @name: table name
+ * @size: maximum number of entries
+ * @counters_enabled: indicates if counters are active
+ * @counter_control_extern: indicates if counter control is in dpipe or
+ *                         external tool
+ * @table_ops: table operations
+ * @rcu: rcu
+ */
+struct devlink_dpipe_table {
+       void *priv;
+       struct list_head list;
+       const char *name;
+       u64 size;
+       bool counters_enabled;
+       bool counter_control_extern;
+       struct devlink_dpipe_table_ops *table_ops;
+       struct rcu_head rcu;
+};
+
+/**
+ * struct devlink_dpipe_table_ops - dpipe_table ops
+ * @actions_dump - dumps all tables actions
+ * @matches_dump - dumps all tables matches
+ * @entries_dump - dumps all active entries in the table
+ * @counters_set_update - when changing the counter status hardware sync
+ *                       maybe needed to allocate/free counter related
+ *                       resources
+ */
+struct devlink_dpipe_table_ops {
+       int (*actions_dump)(void *priv, struct sk_buff *skb);
+       int (*matches_dump)(void *priv, struct sk_buff *skb);
+       int (*entries_dump)(void *priv, bool counters_enabled,
+                           struct devlink_dpipe_dump_ctx *dump_ctx);
+       int (*counters_set_update)(void *priv, bool enable);
+};
+
+/**
+ * struct devlink_dpipe_headers - dpipe headers
+ * @headers - header array can be shared (global bit) or driver specific
+ * @headers_count - count of headers
+ */
+struct devlink_dpipe_headers {
+       struct devlink_dpipe_header **headers;
+       unsigned int headers_count;
+};
+
 struct devlink_ops {
        int (*port_type_set)(struct devlink_port *devlink_port,
                             enum devlink_port_type port_type);
@@ -132,6 +306,26 @@ int devlink_sb_register(struct devlink *devlink, unsigned int sb_index,
                        u16 egress_pools_count, u16 ingress_tc_count,
                        u16 egress_tc_count);
 void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index);
+int devlink_dpipe_table_register(struct devlink *devlink,
+                                const char *table_name,
+                                struct devlink_dpipe_table_ops *table_ops,
+                                void *priv, u64 size,
+                                bool counter_control_extern);
+void devlink_dpipe_table_unregister(struct devlink *devlink,
+                                   const char *table_name);
+int devlink_dpipe_headers_register(struct devlink *devlink,
+                                  struct devlink_dpipe_headers *dpipe_headers);
+void devlink_dpipe_headers_unregister(struct devlink *devlink);
+bool devlink_dpipe_table_counter_enabled(struct devlink *devlink,
+                                        const char *table_name);
+int devlink_dpipe_entry_ctx_prepare(struct devlink_dpipe_dump_ctx *dump_ctx);
+int devlink_dpipe_entry_ctx_append(struct devlink_dpipe_dump_ctx *dump_ctx,
+                                  struct devlink_dpipe_entry *entry);
+int devlink_dpipe_entry_ctx_close(struct devlink_dpipe_dump_ctx *dump_ctx);
+int devlink_dpipe_action_put(struct sk_buff *skb,
+                            struct devlink_dpipe_action *action);
+int devlink_dpipe_match_put(struct sk_buff *skb,
+                           struct devlink_dpipe_match *match);
 
 #else
 
@@ -200,6 +394,71 @@ static inline void devlink_sb_unregister(struct devlink *devlink,
 {
 }
 
+static inline int
+devlink_dpipe_table_register(struct devlink *devlink,
+                            const char *table_name,
+                            struct devlink_dpipe_table_ops *table_ops,
+                            void *priv, u64 size,
+                            bool counter_control_extern)
+{
+       return 0;
+}
+
+static inline void devlink_dpipe_table_unregister(struct devlink *devlink,
+                                                 const char *table_name)
+{
+}
+
+static inline int devlink_dpipe_headers_register(struct devlink *devlink,
+                                                struct devlink_dpipe_headers *
+                                                dpipe_headers)
+{
+       return 0;
+}
+
+static inline void devlink_dpipe_headers_unregister(struct devlink *devlink)
+{
+}
+
+static inline bool devlink_dpipe_table_counter_enabled(struct devlink *devlink,
+                                                      const char *table_name)
+{
+       return false;
+}
+
+static inline int
+devlink_dpipe_entry_ctx_prepare(struct devlink_dpipe_dump_ctx *dump_ctx)
+{
+       return 0;
+}
+
+static inline int
+devlink_dpipe_entry_ctx_append(struct devlink_dpipe_dump_ctx *dump_ctx,
+                              struct devlink_dpipe_entry *entry)
+{
+       return 0;
+}
+
+static inline int
+devlink_dpipe_entry_ctx_close(struct devlink_dpipe_dump_ctx *dump_ctx)
+{
+       return 0;
+}
+
+static inline int
+devlink_dpipe_action_put(struct sk_buff *skb,
+                        struct devlink_dpipe_action *action)
+{
+       return 0;
+}
+
+static inline int
+devlink_dpipe_match_put(struct sk_buff *skb,
+                       struct devlink_dpipe_match *match)
+{
+       return 0;
+}
+
 #endif
 
 #endif /* _NET_DEVLINK_H_ */
index 4e13e695f0251d5c762c3089065f4eeb429033eb..ffe56cc338feb9618191bdfcee44201d8139c72d 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/workqueue.h>
 #include <linux/of.h>
 #include <linux/ethtool.h>
+#include <net/devlink.h>
 
 struct tc_action;
 struct phy_device;
@@ -182,6 +183,7 @@ struct dsa_port {
        unsigned int            ageing_time;
        u8                      stp_state;
        struct net_device       *bridge_dev;
+       struct devlink_port     devlink_port;
 };
 
 struct dsa_switch {
@@ -233,6 +235,13 @@ struct dsa_switch {
        u32                     phys_mii_mask;
        struct mii_bus          *slave_mii_bus;
 
+       /* Ageing Time limits in msecs */
+       unsigned int ageing_time_min;
+       unsigned int ageing_time_max;
+
+       /* devlink used to represent this switch device */
+       struct devlink          *devlink;
+
        /* Dynamically allocated ports, keep last */
        size_t num_ports;
        struct dsa_port ports[];
@@ -248,6 +257,11 @@ static inline bool dsa_is_dsa_port(struct dsa_switch *ds, int p)
        return !!((ds->dsa_port_mask) & (1 << p));
 }
 
+static inline bool dsa_is_normal_port(struct dsa_switch *ds, int p)
+{
+       return !dsa_is_cpu_port(ds, p) && !dsa_is_dsa_port(ds, p);
+}
+
 static inline bool dsa_is_port_initialized(struct dsa_switch *ds, int p)
 {
        return ds->enabled_port_mask & (1 << p) && ds->ports[p].netdev;
@@ -442,6 +456,14 @@ struct dsa_switch_ops {
                                   bool ingress);
        void    (*port_mirror_del)(struct dsa_switch *ds, int port,
                                   struct dsa_mall_mirror_tc_entry *mirror);
+
+       /*
+        * Cross-chip operations
+        */
+       int     (*crosschip_bridge_join)(struct dsa_switch *ds, int sw_index,
+                                        int port, struct net_device *br);
+       void    (*crosschip_bridge_leave)(struct dsa_switch *ds, int sw_index,
+                                         int port, struct net_device *br);
 };
 
 struct dsa_switch_driver {
@@ -459,6 +481,15 @@ static inline bool dsa_uses_tagged_protocol(struct dsa_switch_tree *dst)
        return dst->rcv != NULL;
 }
 
+static inline bool netdev_uses_dsa(struct net_device *dev)
+{
+#if IS_ENABLED(CONFIG_NET_DSA)
+       if (dev->dsa_ptr != NULL)
+               return dsa_uses_tagged_protocol(dev->dsa_ptr);
+#endif
+       return false;
+}
+
 struct dsa_switch *dsa_switch_alloc(struct device *dev, size_t n);
 void dsa_unregister_switch(struct dsa_switch *ds);
 int dsa_register_switch(struct dsa_switch *ds, struct device *dev);
index 8dbfdf728cd8ce901b3b05f0e58b4eeee25051fe..1243b9c7694e309f49dde4a5bf38cd1387dfecb7 100644 (file)
@@ -141,6 +141,7 @@ int fib_rules_lookup(struct fib_rules_ops *, struct flowi *, int flags,
                     struct fib_lookup_arg *);
 int fib_default_rule_add(struct fib_rules_ops *, u32 pref, u32 table,
                         u32 flags);
+bool fib_rule_matchall(const struct fib_rule *rule);
 
 int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh);
 int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh);
index 6984f1913dc124ab97627a4a5ecb2aba9f93e901..bae198b3039e6e66829c2717c1baf2baebbec6a2 100644 (file)
@@ -202,7 +202,7 @@ static inline struct flowi *flowidn_to_flowi(struct flowidn *fldn)
 
 typedef unsigned long flow_compare_t;
 
-static inline size_t flow_key_size(u16 family)
+static inline unsigned int flow_key_size(u16 family)
 {
        switch (family) {
        case AF_INET:
index 9caf3bfc8d2dafcc89064ec54e78682244d71fb7..51eb971e897378e25329b06f80678f522eec2ea6 100644 (file)
@@ -8,7 +8,7 @@
 
 struct flow_cache_percpu {
        struct hlist_head               *hash_table;
-       int                             hash_count;
+       unsigned int                    hash_count;
        u32                             hash_rnd;
        int                             hash_rnd_recalc;
        struct tasklet_struct           flush_tasklet;
@@ -18,8 +18,8 @@ struct flow_cache {
        u32                             hash_shift;
        struct flow_cache_percpu __percpu *percpu;
        struct hlist_node               node;
-       int                             low_watermark;
-       int                             high_watermark;
+       unsigned int                    low_watermark;
+       unsigned int                    high_watermark;
        struct timer_list               rnd_timer;
 };
 #endif /* _NET_FLOWCACHE_H */
index 368bb4024b78c411d02a842f340f9fa11a9b5f7e..6692c5758b332d468f1e0611ecc4f3e03ae03b2b 100644 (file)
@@ -213,6 +213,11 @@ struct fib_entry_notifier_info {
        u32 tb_id;
 };
 
+struct fib_rule_notifier_info {
+       struct fib_notifier_info info; /* must be first */
+       struct fib_rule *rule;
+};
+
 struct fib_nh_notifier_info {
        struct fib_notifier_info info; /* must be first */
        struct fib_nh *fib_nh;
@@ -232,9 +237,21 @@ enum fib_event_type {
 int register_fib_notifier(struct notifier_block *nb,
                          void (*cb)(struct notifier_block *nb));
 int unregister_fib_notifier(struct notifier_block *nb);
+int call_fib_notifier(struct notifier_block *nb, struct net *net,
+                     enum fib_event_type event_type,
+                     struct fib_notifier_info *info);
 int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
                       struct fib_notifier_info *info);
 
+void fib_notify(struct net *net, struct notifier_block *nb);
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+void fib_rules_notify(struct net *net, struct notifier_block *nb);
+#else
+static inline void fib_rules_notify(struct net *net, struct notifier_block *nb)
+{
+}
+#endif
+
 struct fib_table {
        struct hlist_node       tb_hlist;
        u32                     tb_id;
@@ -299,6 +316,11 @@ static inline int fib_lookup(struct net *net, const struct flowi4 *flp,
        return err;
 }
 
+static inline bool fib4_rule_default(const struct fib_rule *rule)
+{
+       return true;
+}
+
 #else /* CONFIG_IP_MULTIPLE_TABLES */
 int __net_init fib4_rules_init(struct net *net);
 void __net_exit fib4_rules_exit(struct net *net);
@@ -343,6 +365,8 @@ out:
        return err;
 }
 
+bool fib4_rule_default(const struct fib_rule *rule);
+
 #endif /* CONFIG_IP_MULTIPLE_TABLES */
 
 /* Exported by fib_frontend.c */
@@ -371,17 +395,13 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force);
 int fib_sync_down_addr(struct net_device *dev, __be32 local);
 int fib_sync_up(struct net_device *dev, unsigned int nh_flags);
 
-extern u32 fib_multipath_secret __read_mostly;
-
-static inline int fib_multipath_hash(__be32 saddr, __be32 daddr)
-{
-       return jhash_2words((__force u32)saddr, (__force u32)daddr,
-                           fib_multipath_secret) >> 1;
-}
-
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
+                      const struct sk_buff *skb);
+#endif
 void fib_select_multipath(struct fib_result *res, int hash);
 void fib_select_path(struct net *net, struct fib_result *res,
-                    struct flowi4 *fl4, int mp_hash);
+                    struct flowi4 *fl4, const struct sk_buff *skb);
 
 /* Exported by fib_trie.c */
 void fib_trie_init(void);
index 7bdfa7d783639d8b65c18bd7f5a6ea5fa4fbb7da..8a4a57b887fb508c732b7e24cd4f9dd888a6941c 100644 (file)
@@ -12,6 +12,8 @@
 #include <linux/list.h>                 /* for struct list_head */
 #include <linux/spinlock.h>             /* for struct rwlock_t */
 #include <linux/atomic.h>               /* for struct atomic_t */
+#include <linux/refcount.h>             /* for struct refcount_t */
+
 #include <linux/compiler.h>
 #include <linux/timer.h>
 #include <linux/bug.h>
@@ -525,7 +527,7 @@ struct ip_vs_conn {
        struct netns_ipvs       *ipvs;
 
        /* counter and timer */
-       atomic_t                refcnt;         /* reference count */
+       refcount_t              refcnt;         /* reference count */
        struct timer_list       timer;          /* Expiration timer */
        volatile unsigned long  timeout;        /* timeout */
 
@@ -667,7 +669,7 @@ struct ip_vs_dest {
        atomic_t                conn_flags;     /* flags to copy to conn */
        atomic_t                weight;         /* server weight */
 
-       atomic_t                refcnt;         /* reference counter */
+       refcount_t              refcnt;         /* reference counter */
        struct ip_vs_stats      stats;          /* statistics */
        unsigned long           idle_start;     /* start time, jiffies */
 
@@ -1211,14 +1213,14 @@ struct ip_vs_conn * ip_vs_conn_out_get_proto(struct netns_ipvs *ipvs, int af,
  */
 static inline bool __ip_vs_conn_get(struct ip_vs_conn *cp)
 {
-       return atomic_inc_not_zero(&cp->refcnt);
+       return refcount_inc_not_zero(&cp->refcnt);
 }
 
 /* put back the conn without restarting its timer */
 static inline void __ip_vs_conn_put(struct ip_vs_conn *cp)
 {
        smp_mb__before_atomic();
-       atomic_dec(&cp->refcnt);
+       refcount_dec(&cp->refcnt);
 }
 void ip_vs_conn_put(struct ip_vs_conn *cp);
 void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport);
@@ -1410,18 +1412,18 @@ void ip_vs_try_bind_dest(struct ip_vs_conn *cp);
 
 static inline void ip_vs_dest_hold(struct ip_vs_dest *dest)
 {
-       atomic_inc(&dest->refcnt);
+       refcount_inc(&dest->refcnt);
 }
 
 static inline void ip_vs_dest_put(struct ip_vs_dest *dest)
 {
        smp_mb__before_atomic();
-       atomic_dec(&dest->refcnt);
+       refcount_dec(&dest->refcnt);
 }
 
 static inline void ip_vs_dest_put_and_free(struct ip_vs_dest *dest)
 {
-       if (atomic_dec_and_test(&dest->refcnt))
+       if (refcount_dec_and_test(&dest->refcnt))
                kfree(dest);
 }
 
index 179253f9dcfd986ef806331044bc4973f1cc7d6e..9d22bf67ac86623eaaea2c49fbe1140747ce67e8 100644 (file)
 #ifndef _NET_MPLS_IPTUNNEL_H
 #define _NET_MPLS_IPTUNNEL_H 1
 
-#define MAX_NEW_LABELS 2
-
 struct mpls_iptunnel_encap {
-       u32     label[MAX_NEW_LABELS];
        u8      labels;
+       u8      ttl_propagate;
+       u8      default_ttl;
+       u8      reserved1;
+       u32     label[0];
 };
 
 static inline struct mpls_iptunnel_encap *mpls_lwtunnel_encap(struct lwtunnel_state *lwtstate)
index 8a0214654b6b10bc480d7e6dc8195555ca58dc9a..1036c902d2c9904ed084fcd5a4d8dc70b7902cbe 100644 (file)
@@ -439,8 +439,10 @@ void ndisc_update(const struct net_device *dev, struct neighbour *neigh,
  *     IGMP
  */
 int igmp6_init(void);
+int igmp6_late_init(void);
 
 void igmp6_cleanup(void);
+void igmp6_late_cleanup(void);
 
 int igmp6_event_query(struct sk_buff *skb);
 
index 5ebf6949116097f60e668b0c2c4c48dd1639e5e8..9496179c7b4ea883a6b0fe7361d2965c991cca40 100644 (file)
@@ -314,7 +314,8 @@ static inline struct neighbour *neigh_create(struct neigh_table *tbl,
 }
 void neigh_destroy(struct neighbour *neigh);
 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb);
-int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, u32 flags);
+int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, u32 flags,
+                u32 nlmsg_pid);
 void __neigh_set_probe_once(struct neighbour *neigh);
 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev);
 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
index 5ed33ea4718ef5a101689432825ebb1a48fcc0d9..65cc2cb005d937d610a2f072cb4f0f9c61570e60 100644 (file)
@@ -5,6 +5,8 @@
 #ifndef _NF_CONNTRACK_EXPECT_H
 #define _NF_CONNTRACK_EXPECT_H
 
+#include <linux/refcount.h>
+
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_zones.h>
 
@@ -37,7 +39,7 @@ struct nf_conntrack_expect {
        struct timer_list timeout;
 
        /* Usage count. */
-       atomic_t use;
+       refcount_t use;
 
        /* Flags */
        unsigned int flags;
index 5cc5e9e6171a03db471407c708578b4794c22b92..d40b89355fdd345617cf21593922753613190a11 100644 (file)
@@ -4,6 +4,7 @@
 #include <net/net_namespace.h>
 #include <linux/netfilter/nf_conntrack_common.h>
 #include <linux/netfilter/nf_conntrack_tuple_common.h>
+#include <linux/refcount.h>
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_extend.h>
 
@@ -12,7 +13,7 @@
 struct ctnl_timeout {
        struct list_head        head;
        struct rcu_head         rcu_head;
-       atomic_t                refcnt;
+       refcount_t              refcnt;
        char                    name[CTNL_TIMEOUT_NAME_MAX];
        __u16                   l3num;
        struct nf_conntrack_l4proto *l4proto;
index 0136028652bdb8b3c20813b01b2fa8cfb16ba012..f713a053f89d4627d3166a5c2bb14f7a0f67b574 100644 (file)
@@ -413,10 +413,11 @@ static inline struct nft_set *nft_set_container_of(const void *priv)
        return (void *)priv - offsetof(struct nft_set, data);
 }
 
-struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
-                                    const struct nlattr *nla, u8 genmask);
-struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
-                                         const struct nlattr *nla, u8 genmask);
+struct nft_set *nft_set_lookup(const struct net *net,
+                              const struct nft_table *table,
+                              const struct nlattr *nla_set_name,
+                              const struct nlattr *nla_set_id,
+                              u8 genmask);
 
 static inline unsigned long nft_set_gc_interval(const struct nft_set *set)
 {
@@ -1044,7 +1045,8 @@ struct nft_object_type {
        unsigned int                    maxattr;
        struct module                   *owner;
        const struct nla_policy         *policy;
-       int                             (*init)(const struct nlattr * const tb[],
+       int                             (*init)(const struct nft_ctx *ctx,
+                                               const struct nlattr *const tb[],
                                                struct nft_object *obj);
        void                            (*destroy)(struct nft_object *obj);
        int                             (*dump)(struct sk_buff *skb,
index 5ceb2205e4e3ed93461ed4a3956b227f99ac9494..381af9469e6ada01e4acffd4efc3ebc88e66a019 100644 (file)
@@ -32,6 +32,6 @@ void nft_fib6_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
 void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
                   const struct nft_pktinfo *pkt);
 
-void nft_fib_store_result(void *reg, enum nft_fib_result r,
+void nft_fib_store_result(void *reg, const struct nft_fib *priv,
                          const struct nft_pktinfo *pkt, int index);
 #endif
index 622d2da27135586d164c228b81e71afb922d5d8c..cd686c4fb32dc5409a08f818d48228bffa6f6778 100644 (file)
@@ -33,7 +33,6 @@ struct inet_timewait_death_row {
        atomic_t                tw_count;
 
        struct inet_hashinfo    *hashinfo ____cacheline_aligned_in_smp;
-       int                     sysctl_tw_recycle;
        int                     sysctl_max_tw_buckets;
 };
 
@@ -96,6 +95,8 @@ struct netns_ipv4 {
        /* Shall we try to damage output packets if routing dev changes? */
        int sysctl_ip_dynaddr;
        int sysctl_ip_early_demux;
+       int sysctl_tcp_early_demux;
+       int sysctl_udp_early_demux;
 
        int sysctl_fwmark_reflect;
        int sysctl_tcp_fwmark_accept;
@@ -152,6 +153,7 @@ struct netns_ipv4 {
 #endif
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
        int sysctl_fib_multipath_use_neigh;
+       int sysctl_fib_multipath_hash_policy;
 #endif
 
        unsigned int    fib_seq;        /* protected by rtnl_mutex */
index d29203651c01700d9406157ef8dd016ea55a4cbb..6608b3693385e771147f78da13afa87cc6355d83 100644 (file)
@@ -9,8 +9,11 @@ struct mpls_route;
 struct ctl_table_header;
 
 struct netns_mpls {
+       int ip_ttl_propagate;
+       int default_ttl;
        size_t platform_labels;
        struct mpls_route __rcu * __rcu *platform_label;
+
        struct ctl_table_header *ctl;
 };
 
index f1b76b8e6d2d296177116d0ef0f254d175551cbe..bec46f63f10ced844f8aec2b19bebf8b3dc01167 100644 (file)
@@ -92,7 +92,7 @@ int unregister_qdisc(struct Qdisc_ops *qops);
 void qdisc_get_default(char *id, size_t len);
 int qdisc_set_default(const char *id);
 
-void qdisc_hash_add(struct Qdisc *q);
+void qdisc_hash_add(struct Qdisc *q, bool invisible);
 void qdisc_hash_del(struct Qdisc *q);
 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle);
 struct Qdisc *qdisc_lookup_class(struct net_device *dev, u32 handle);
index bf36ca34af7ad255b9eb821cbed0a70abad993f5..65ba335b0e7e66bb7f1b4bd279d31e616e0dd31e 100644 (file)
@@ -40,6 +40,7 @@
 /* This is used to register protocols. */
 struct net_protocol {
        void                    (*early_demux)(struct sk_buff *skb);
+       void                    (*early_demux_handler)(struct sk_buff *skb);
        int                     (*handler)(struct sk_buff *skb);
        void                    (*err_handler)(struct sk_buff *skb, u32 info);
        unsigned int            no_policy:1,
@@ -54,7 +55,7 @@ struct net_protocol {
 #if IS_ENABLED(CONFIG_IPV6)
 struct inet6_protocol {
        void    (*early_demux)(struct sk_buff *skb);
-
+       void    (*early_demux_handler)(struct sk_buff *skb);
        int     (*handler)(struct sk_buff *skb);
 
        void    (*err_handler)(struct sk_buff *skb,
@@ -92,12 +93,12 @@ struct inet_protosw {
 #define INET_PROTOSW_PERMANENT 0x02  /* Permanent protocols are unremovable. */
 #define INET_PROTOSW_ICSK      0x04  /* Is this an inet_connection_sock? */
 
-extern const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS];
+extern struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS];
 extern const struct net_offload __rcu *inet_offloads[MAX_INET_PROTOS];
 extern const struct net_offload __rcu *inet6_offloads[MAX_INET_PROTOS];
 
 #if IS_ENABLED(CONFIG_IPV6)
-extern const struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS];
+extern struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS];
 #endif
 
 int inet_add_protocol(const struct net_protocol *prot, unsigned char num);
index c0874c87c173717f2c13c8af06d2482a76190243..2cc0e14c63598ce3d3be88bb04d2fd433d676129 100644 (file)
@@ -113,13 +113,13 @@ struct in_device;
 int ip_rt_init(void);
 void rt_cache_flush(struct net *net);
 void rt_flush_dev(struct net_device *dev);
-struct rtable *__ip_route_output_key_hash(struct net *, struct flowi4 *flp,
-                                         int mp_hash);
+struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *flp,
+                                         const struct sk_buff *skb);
 
 static inline struct rtable *__ip_route_output_key(struct net *net,
                                                   struct flowi4 *flp)
 {
-       return __ip_route_output_key_hash(net, flp, -1);
+       return __ip_route_output_key_hash(net, flp, NULL);
 }
 
 struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp,
index aeec4086afb2446dadb1fb8c54ad54a909634380..65d50261031473d33c27f6bce1020048a697481d 100644 (file)
@@ -66,6 +66,7 @@ struct Qdisc {
 #define TCQ_F_NOPARENT         0x40 /* root of its hierarchy :
                                      * qdisc_tree_decrease_qlen() should stop.
                                      */
+#define TCQ_F_INVISIBLE                0x80 /* invisible by default in dump */
        u32                     limit;
        const struct Qdisc_ops  *ops;
        struct qdisc_size_table __rcu *stab;
index b6f682ec184a62a1e7b9d2a0ea159293cca12a76..47113f2c4b0a2b6c596d2f28018a1e1941cb6ede 100644 (file)
@@ -293,6 +293,22 @@ struct sctp_chunk *sctp_process_strreset_inreq(
                                struct sctp_association *asoc,
                                union sctp_params param,
                                struct sctp_ulpevent **evp);
+struct sctp_chunk *sctp_process_strreset_tsnreq(
+                               struct sctp_association *asoc,
+                               union sctp_params param,
+                               struct sctp_ulpevent **evp);
+struct sctp_chunk *sctp_process_strreset_addstrm_out(
+                               struct sctp_association *asoc,
+                               union sctp_params param,
+                               struct sctp_ulpevent **evp);
+struct sctp_chunk *sctp_process_strreset_addstrm_in(
+                               struct sctp_association *asoc,
+                               union sctp_params param,
+                               struct sctp_ulpevent **evp);
+struct sctp_chunk *sctp_process_strreset_resp(
+                               struct sctp_association *asoc,
+                               union sctp_params param,
+                               struct sctp_ulpevent **evp);
 
 /* Prototypes for statetable processing. */
 
index 07a0b128625a4e24f9aa83019eff6eb17308eda3..3e61a54424a177281b3338c67051580fa9a1610d 100644 (file)
@@ -83,6 +83,7 @@ struct sctp_bind_addr;
 struct sctp_ulpq;
 struct sctp_ep_common;
 struct crypto_shash;
+struct sctp_stream;
 
 
 #include <net/sctp/tsnmap.h>
@@ -753,6 +754,8 @@ struct sctp_transport {
                /* Is the Path MTU update pending on this tranport */
                pmtu_pending:1,
 
+               dst_pending_confirm:1,  /* need to confirm neighbour */
+
                /* Has this transport moved the ctsn since we last sacked */
                sack_generation:1;
        u32 dst_cookie;
@@ -806,8 +809,6 @@ struct sctp_transport {
 
        __u32 burst_limited;    /* Holds old cwnd when max.burst is applied */
 
-       __u32 dst_pending_confirm;      /* need to confirm neighbour */
-
        /* Destination */
        struct dst_entry *dst;
        /* Source address. */
@@ -1314,6 +1315,8 @@ struct sctp_inithdr_host {
 struct sctp_stream_out {
        __u16   ssn;
        __u8    state;
+       __u64   abandoned_unsent[SCTP_PR_INDEX(MAX) + 1];
+       __u64   abandoned_sent[SCTP_PR_INDEX(MAX) + 1];
 };
 
 struct sctp_stream_in {
index 324b5965fc4de505ca98fbbb9aedc0d3a0039742..1060494ac230b80caca57f6963dca2692ae10b9d 100644 (file)
@@ -132,6 +132,14 @@ struct sctp_ulpevent *sctp_ulpevent_make_stream_reset_event(
        const struct sctp_association *asoc, __u16 flags,
        __u16 stream_num, __u16 *stream_list, gfp_t gfp);
 
+struct sctp_ulpevent *sctp_ulpevent_make_assoc_reset_event(
+       const struct sctp_association *asoc, __u16 flags,
+        __u32 local_tsn, __u32 remote_tsn, gfp_t gfp);
+
+struct sctp_ulpevent *sctp_ulpevent_make_stream_change_event(
+       const struct sctp_association *asoc, __u16 flags,
+       __u32 strchange_instrms, __u32 strchange_outstrms, gfp_t gfp);
+
 void sctp_ulpevent_read_sndrcvinfo(const struct sctp_ulpevent *event,
                                   struct msghdr *);
 void sctp_ulpevent_read_rcvinfo(const struct sctp_ulpevent *event,
index 0caee631a8364fe6e49ab8cacba864d019be8b47..fe236b3429f0d8caeb1adc367b5b4a20591c848b 100644 (file)
@@ -6,10 +6,10 @@
 u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport);
 u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
                               __be16 dport);
-u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
+u32 secure_tcp_seq_and_tsoff(__be32 saddr, __be32 daddr,
+                            __be16 sport, __be16 dport, u32 *tsoff);
+u32 secure_tcpv6_seq_and_tsoff(const __be32 *saddr, const __be32 *daddr,
                               __be16 sport, __be16 dport, u32 *tsoff);
-u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
-                                __be16 sport, __be16 dport, u32 *tsoff);
 u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr,
                                __be16 sport, __be16 dport);
 u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr,
index 03252d53975de7ad0da66d35802738830b0e3367..66349e49d468646ce724485bb8e74952825f0d6c 100644 (file)
@@ -1783,11 +1783,8 @@ __sk_dst_set(struct sock *sk, struct dst_entry *dst)
 
        sk_tx_queue_clear(sk);
        sk->sk_dst_pending_confirm = 0;
-       /*
-        * This can be called while sk is owned by the caller only,
-        * with no state that can be checked in a rcu_dereference_check() cond
-        */
-       old_dst = rcu_dereference_raw(sk->sk_dst_cache);
+       old_dst = rcu_dereference_protected(sk->sk_dst_cache,
+                                           lockdep_sock_is_held(sk));
        rcu_assign_pointer(sk->sk_dst_cache, dst);
        dst_release(old_dst);
 }
@@ -2242,6 +2239,7 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
 void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
                              struct sk_buff *skb);
 
+#define SK_DEFAULT_STAMP (-1L * NSEC_PER_SEC)
 static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
                                          struct sk_buff *skb)
 {
@@ -2252,8 +2250,10 @@ static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
 
        if (sk->sk_flags & FLAGS_TS_OR_DROPS || sk->sk_tsflags & TSFLAGS_ANY)
                __sock_recv_ts_and_drops(msg, sk, skb);
-       else
+       else if (unlikely(sock_flag(sk, SOCK_TIMESTAMP)))
                sk->sk_stamp = skb->tstamp;
+       else if (unlikely(sk->sk_stamp == SK_DEFAULT_STAMP))
+               sk->sk_stamp = 0;
 }
 
 void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags);
@@ -2365,6 +2365,8 @@ bool sk_ns_capable(const struct sock *sk,
 bool sk_capable(const struct sock *sk, int cap);
 bool sk_net_capable(const struct sock *sk, int cap);
 
+void sk_get_meminfo(const struct sock *sk, u32 *meminfo);
+
 extern __u32 sysctl_wmem_max;
 extern __u32 sysctl_rmem_max;
 
index dfbd6ee0bc7cd196c052e700da43deddb8d1dfef..a46c3f2ace702932dc95c3021e7b13d72a2a4777 100644 (file)
@@ -2,6 +2,7 @@
 #define __NET_TC_PED_H
 
 #include <net/act_api.h>
+#include <linux/tc_act/tc_pedit.h>
 
 struct tcf_pedit_key_ex {
        enum pedit_header_type htype;
@@ -17,4 +18,48 @@ struct tcf_pedit {
 };
 #define to_pedit(a) ((struct tcf_pedit *)a)
 
+static inline bool is_tcf_pedit(const struct tc_action *a)
+{
+#ifdef CONFIG_NET_CLS_ACT
+       if (a->ops && a->ops->type == TCA_ACT_PEDIT)
+               return true;
+#endif
+       return false;
+}
+
+static inline int tcf_pedit_nkeys(const struct tc_action *a)
+{
+       return to_pedit(a)->tcfp_nkeys;
+}
+
+static inline u32 tcf_pedit_htype(const struct tc_action *a, int index)
+{
+       if (to_pedit(a)->tcfp_keys_ex)
+               return to_pedit(a)->tcfp_keys_ex[index].htype;
+
+       return TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK;
+}
+
+static inline u32 tcf_pedit_cmd(const struct tc_action *a, int index)
+{
+       if (to_pedit(a)->tcfp_keys_ex)
+               return to_pedit(a)->tcfp_keys_ex[index].cmd;
+
+       return __PEDIT_CMD_MAX;
+}
+
+static inline u32 tcf_pedit_mask(const struct tc_action *a, int index)
+{
+       return to_pedit(a)->tcfp_keys[index].mask;
+}
+
+static inline u32 tcf_pedit_val(const struct tc_action *a, int index)
+{
+       return to_pedit(a)->tcfp_keys[index].val;
+}
+
+static inline u32 tcf_pedit_offset(const struct tc_action *a, int index)
+{
+       return to_pedit(a)->tcfp_keys[index].off;
+}
 #endif /* __NET_TC_PED_H */
index 48cca321ee6c4f3e44f8f546d05d5f32b470ccc1..9690c047b6cf8adb2c65142f32473c44e39cf0a8 100644 (file)
@@ -49,4 +49,9 @@ static inline __be16 tcf_vlan_push_proto(const struct tc_action *a)
        return to_vlan(a)->tcfv_push_proto;
 }
 
+static inline u8 tcf_vlan_push_prio(const struct tc_action *a)
+{
+       return to_vlan(a)->tcfv_push_prio;
+}
+
 #endif /* __NET_TC_VLAN_H */
index 6ec4ea652f3f55e53675dbe09f29599af179c41a..582e3772c0d9c54a2dc7c757c0b6452b78725525 100644 (file)
@@ -406,11 +406,7 @@ void tcp_clear_retrans(struct tcp_sock *tp);
 void tcp_update_metrics(struct sock *sk);
 void tcp_init_metrics(struct sock *sk);
 void tcp_metrics_init(void);
-bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst,
-                       bool paws_check, bool timestamps);
-bool tcp_remember_stamp(struct sock *sk);
-bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw);
-void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst);
+bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst);
 void tcp_disable_fack(struct tcp_sock *tp);
 void tcp_close(struct sock *sk, long timeout);
 void tcp_init_sock(struct sock *sk);
@@ -1252,9 +1248,11 @@ void tcp_select_initial_window(int __space, __u32 mss, __u32 *rcv_wnd,
 
 static inline int tcp_win_from_space(int space)
 {
-       return sysctl_tcp_adv_win_scale<=0 ?
-               (space>>(-sysctl_tcp_adv_win_scale)) :
-               space - (space>>sysctl_tcp_adv_win_scale);
+       int tcp_adv_win_scale = sysctl_tcp_adv_win_scale;
+
+       return tcp_adv_win_scale <= 0 ?
+               (space>>(-tcp_adv_win_scale)) :
+               space - (space>>tcp_adv_win_scale);
 }
 
 /* Note: caller must be prepared to deal with negative returns */
@@ -1814,9 +1812,8 @@ struct tcp_request_sock_ops {
                                 __u16 *mss);
 #endif
        struct dst_entry *(*route_req)(const struct sock *sk, struct flowi *fl,
-                                      const struct request_sock *req,
-                                      bool *strict);
-       __u32 (*init_seq)(const struct sk_buff *skb, u32 *tsoff);
+                                      const struct request_sock *req);
+       __u32 (*init_seq_tsoff)(const struct sk_buff *skb, u32 *tsoff);
        int (*send_synack)(const struct sock *sk, struct dst_entry *dst,
                           struct flowi *fl, struct request_sock *req,
                           struct tcp_fastopen_cookie *foc,
index c9d8b8e848e05c2e7228f287f88ccdb57b2e10c2..3391dbd739595a76150453c28468ce8bb55530f8 100644 (file)
@@ -372,4 +372,5 @@ void udp_encap_enable(void);
 #if IS_ENABLED(CONFIG_IPV6)
 void udpv6_encap_enable(void);
 #endif
+
 #endif /* _UDP_H */
index b54b98dc2d4a77681dd3ecf883d75e062589ee8c..1b0f447ce850f015e64dd27e47751fe945cbb2ec 100644 (file)
@@ -4,7 +4,12 @@
 #include <linux/types.h>
 #include <target/target_core_base.h>
 
-#define TRANSPORT_FLAG_PASSTHROUGH             1
+#define TRANSPORT_FLAG_PASSTHROUGH             0x1
+/*
+ * ALUA commands, state checks and setup operations are handled by the
+ * backend module.
+ */
+#define TRANSPORT_FLAG_PASSTHROUGH_ALUA                0x2
 
 struct request_queue;
 struct scatterlist;
index 37c274e61acceee74d792a240b8f3695f0d78085..4b784b6e21c0d9cb533b31997883d7dd447343bf 100644 (file)
@@ -299,7 +299,7 @@ struct t10_alua_tg_pt_gp {
        struct list_head tg_pt_gp_lun_list;
        struct se_lun *tg_pt_gp_alua_lun;
        struct se_node_acl *tg_pt_gp_alua_nacl;
-       struct delayed_work tg_pt_gp_transition_work;
+       struct work_struct tg_pt_gp_transition_work;
        struct completion *tg_pt_gp_transition_complete;
 };
 
index 2c748ddad5f875711ed66f91eae9bc69b9a41fe0..c98a52fb572a4e9ad59410f43931aa81205212ec 100644 (file)
@@ -94,4 +94,8 @@
 
 #define SCM_TIMESTAMPING_OPT_STATS     54
 
+#define SO_MEMINFO             55
+
+#define SO_INCOMING_NAPI_ID    56
+
 #endif /* __ASM_GENERIC_SOCKET_H */
index 407cb55df6ac178e11620fd9554cc913e30b401c..7fb97863c94577d7b9f583abe8a41fe14f54b734 100644 (file)
@@ -33,8 +33,8 @@ extern "C" {
 #define OMAP_PARAM_CHIPSET_ID  1       /* ie. 0x3430, 0x4430, etc */
 
 struct drm_omap_param {
-       uint64_t param;                 /* in */
-       uint64_t value;                 /* in (set_param), out (get_param) */
+       __u64 param;                    /* in */
+       __u64 value;                    /* in (set_param), out (get_param) */
 };
 
 #define OMAP_BO_SCANOUT                0x00000001      /* scanout capable (phys contiguous) */
@@ -53,18 +53,18 @@ struct drm_omap_param {
 #define OMAP_BO_TILED          (OMAP_BO_TILED_8 | OMAP_BO_TILED_16 | OMAP_BO_TILED_32)
 
 union omap_gem_size {
-       uint32_t bytes;         /* (for non-tiled formats) */
+       __u32 bytes;            /* (for non-tiled formats) */
        struct {
-               uint16_t width;
-               uint16_t height;
+               __u16 width;
+               __u16 height;
        } tiled;                /* (for tiled formats) */
 };
 
 struct drm_omap_gem_new {
        union omap_gem_size size;       /* in */
-       uint32_t flags;                 /* in */
-       uint32_t handle;                /* out */
-       uint32_t __pad;
+       __u32 flags;                    /* in */
+       __u32 handle;                   /* out */
+       __u32 __pad;
 };
 
 /* mask of operations: */
@@ -74,33 +74,33 @@ enum omap_gem_op {
 };
 
 struct drm_omap_gem_cpu_prep {
-       uint32_t handle;                /* buffer handle (in) */
-       uint32_t op;                    /* mask of omap_gem_op (in) */
+       __u32 handle;                   /* buffer handle (in) */
+       __u32 op;                       /* mask of omap_gem_op (in) */
 };
 
 struct drm_omap_gem_cpu_fini {
-       uint32_t handle;                /* buffer handle (in) */
-       uint32_t op;                    /* mask of omap_gem_op (in) */
+       __u32 handle;                   /* buffer handle (in) */
+       __u32 op;                       /* mask of omap_gem_op (in) */
        /* TODO maybe here we pass down info about what regions are touched
         * by sw so we can be clever about cache ops?  For now a placeholder,
         * set to zero and we just do full buffer flush..
         */
-       uint32_t nregions;
-       uint32_t __pad;
+       __u32 nregions;
+       __u32 __pad;
 };
 
 struct drm_omap_gem_info {
-       uint32_t handle;                /* buffer handle (in) */
-       uint32_t pad;
-       uint64_t offset;                /* mmap offset (out) */
+       __u32 handle;                   /* buffer handle (in) */
+       __u32 pad;
+       __u64 offset;                   /* mmap offset (out) */
        /* note: in case of tiled buffers, the user virtual size can be
         * different from the physical size (ie. how many pages are needed
         * to back the object) which is returned in DRM_IOCTL_GEM_OPEN..
         * This size here is the one that should be used if you want to
         * mmap() the buffer:
         */
-       uint32_t size;                  /* virtual size for mmap'ing (out) */
-       uint32_t __pad;
+       __u32 size;                     /* virtual size for mmap'ing (out) */
+       __u32 __pad;
 };
 
 #define DRM_OMAP_GET_PARAM             0x00
index 0539a0ceef38155835552360667070552ebce641..a1d95386f562fe7ec7e5a2783346f55c8a1cfbd9 100644 (file)
@@ -81,6 +81,7 @@ enum bpf_cmd {
        BPF_OBJ_GET,
        BPF_PROG_ATTACH,
        BPF_PROG_DETACH,
+       BPF_PROG_TEST_RUN,
 };
 
 enum bpf_map_type {
@@ -96,6 +97,8 @@ enum bpf_map_type {
        BPF_MAP_TYPE_LRU_HASH,
        BPF_MAP_TYPE_LRU_PERCPU_HASH,
        BPF_MAP_TYPE_LPM_TRIE,
+       BPF_MAP_TYPE_ARRAY_OF_MAPS,
+       BPF_MAP_TYPE_HASH_OF_MAPS,
 };
 
 enum bpf_prog_type {
@@ -152,6 +155,7 @@ union bpf_attr {
                __u32   value_size;     /* size of value in bytes */
                __u32   max_entries;    /* max number of entries in a map */
                __u32   map_flags;      /* prealloc or not */
+               __u32   inner_map_fd;   /* fd pointing to the inner map */
        };
 
        struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
@@ -186,6 +190,17 @@ union bpf_attr {
                __u32           attach_type;
                __u32           attach_flags;
        };
+
+       struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */
+               __u32           prog_fd;
+               __u32           retval;
+               __u32           data_size_in;
+               __u32           data_size_out;
+               __aligned_u64   data_in;
+               __aligned_u64   data_out;
+               __u32           repeat;
+               __u32           duration;
+       } test;
 } __attribute__((aligned(8)));
 
 /* BPF helper function descriptions:
@@ -456,6 +471,18 @@ union bpf_attr {
  *     Return:
  *       > 0 length of the string including the trailing NUL on success
  *       < 0 error
+ *
+ * u64 bpf_bpf_get_socket_cookie(skb)
+ *     Get the cookie for the socket stored inside sk_buff.
+ *     @skb: pointer to skb
+ *     Return: 8 Bytes non-decreasing number on success or 0 if the socket
+ *     field is missing inside sk_buff
+ *
+ * u32 bpf_get_socket_uid(skb)
+ *     Get the owner uid of the socket stored inside sk_buff.
+ *     @skb: pointer to skb
+ *     Return: uid of the socket owner on success or 0 if the socket pointer
+ *     inside sk_buff is NULL
  */
 #define __BPF_FUNC_MAPPER(FN)          \
        FN(unspec),                     \
@@ -503,7 +530,9 @@ union bpf_attr {
        FN(get_numa_node_id),           \
        FN(skb_change_head),            \
        FN(xdp_adjust_head),            \
-       FN(probe_read_str),
+       FN(probe_read_str),             \
+       FN(get_socket_cookie),          \
+       FN(get_socket_uid),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
index db4c253f8011b2f483ddd1ffc09f4f04a93fdc0a..dcfc3a5a9cb1d20f29bbac00c6ef315006e9d208 100644 (file)
@@ -713,33 +713,6 @@ enum btrfs_err_code {
        BTRFS_ERROR_DEV_ONLY_WRITABLE,
        BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS
 };
-/* An error code to error string mapping for the kernel
-*  error codes
-*/
-static inline char *btrfs_err_str(enum btrfs_err_code err_code)
-{
-       switch (err_code) {
-               case BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET:
-                       return "unable to go below two devices on raid1";
-               case BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET:
-                       return "unable to go below four devices on raid10";
-               case BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET:
-                       return "unable to go below two devices on raid5";
-               case BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET:
-                       return "unable to go below three devices on raid6";
-               case BTRFS_ERROR_DEV_TGT_REPLACE:
-                       return "unable to remove the dev_replace target dev";
-               case BTRFS_ERROR_DEV_MISSING_NOT_FOUND:
-                       return "no missing devices found to remove";
-               case BTRFS_ERROR_DEV_ONLY_WRITABLE:
-                       return "unable to remove the only writeable device";
-               case BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS:
-                       return "add/delete/balance/replace/resize operation "\
-                               "in progress";
-               default:
-                       return NULL;
-       }
-}
 
 #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \
                                   struct btrfs_ioctl_vol_args)
index 0f1f3a12e23c30e511cdb332059e30ee8d3d5efb..b47bee2773477b1504ebaee3b1e88f806f35bfc2 100644 (file)
@@ -65,8 +65,12 @@ enum devlink_command {
 #define DEVLINK_CMD_ESWITCH_MODE_SET /* obsolete, never use this! */ \
        DEVLINK_CMD_ESWITCH_SET
 
-       /* add new commands above here */
+       DEVLINK_CMD_DPIPE_TABLE_GET,
+       DEVLINK_CMD_DPIPE_ENTRIES_GET,
+       DEVLINK_CMD_DPIPE_HEADERS_GET,
+       DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET,
 
+       /* add new commands above here */
        __DEVLINK_CMD_MAX,
        DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1
 };
@@ -148,10 +152,71 @@ enum devlink_attr {
        DEVLINK_ATTR_ESWITCH_MODE,              /* u16 */
        DEVLINK_ATTR_ESWITCH_INLINE_MODE,       /* u8 */
 
+       DEVLINK_ATTR_DPIPE_TABLES,              /* nested */
+       DEVLINK_ATTR_DPIPE_TABLE,               /* nested */
+       DEVLINK_ATTR_DPIPE_TABLE_NAME,          /* string */
+       DEVLINK_ATTR_DPIPE_TABLE_SIZE,          /* u64 */
+       DEVLINK_ATTR_DPIPE_TABLE_MATCHES,       /* nested */
+       DEVLINK_ATTR_DPIPE_TABLE_ACTIONS,       /* nested */
+       DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED,      /* u8 */
+
+       DEVLINK_ATTR_DPIPE_ENTRIES,             /* nested */
+       DEVLINK_ATTR_DPIPE_ENTRY,               /* nested */
+       DEVLINK_ATTR_DPIPE_ENTRY_INDEX,         /* u64 */
+       DEVLINK_ATTR_DPIPE_ENTRY_MATCH_VALUES,  /* nested */
+       DEVLINK_ATTR_DPIPE_ENTRY_ACTION_VALUES, /* nested */
+       DEVLINK_ATTR_DPIPE_ENTRY_COUNTER,       /* u64 */
+
+       DEVLINK_ATTR_DPIPE_MATCH,               /* nested */
+       DEVLINK_ATTR_DPIPE_MATCH_VALUE,         /* nested */
+       DEVLINK_ATTR_DPIPE_MATCH_TYPE,          /* u32 */
+
+       DEVLINK_ATTR_DPIPE_ACTION,              /* nested */
+       DEVLINK_ATTR_DPIPE_ACTION_VALUE,        /* nested */
+       DEVLINK_ATTR_DPIPE_ACTION_TYPE,         /* u32 */
+
+       DEVLINK_ATTR_DPIPE_VALUE,
+       DEVLINK_ATTR_DPIPE_VALUE_MASK,
+       DEVLINK_ATTR_DPIPE_VALUE_MAPPING,       /* u32 */
+
+       DEVLINK_ATTR_DPIPE_HEADERS,             /* nested */
+       DEVLINK_ATTR_DPIPE_HEADER,              /* nested */
+       DEVLINK_ATTR_DPIPE_HEADER_NAME,         /* string */
+       DEVLINK_ATTR_DPIPE_HEADER_ID,           /* u32 */
+       DEVLINK_ATTR_DPIPE_HEADER_FIELDS,       /* nested */
+       DEVLINK_ATTR_DPIPE_HEADER_GLOBAL,       /* u8 */
+       DEVLINK_ATTR_DPIPE_HEADER_INDEX,        /* u32 */
+
+       DEVLINK_ATTR_DPIPE_FIELD,               /* nested */
+       DEVLINK_ATTR_DPIPE_FIELD_NAME,          /* string */
+       DEVLINK_ATTR_DPIPE_FIELD_ID,            /* u32 */
+       DEVLINK_ATTR_DPIPE_FIELD_BITWIDTH,      /* u32 */
+       DEVLINK_ATTR_DPIPE_FIELD_MAPPING_TYPE,  /* u32 */
+
+       DEVLINK_ATTR_PAD,
+
        /* add new attributes above here, update the policy in devlink.c */
 
        __DEVLINK_ATTR_MAX,
        DEVLINK_ATTR_MAX = __DEVLINK_ATTR_MAX - 1
 };
 
+/* Mapping between internal resource described by the field and system
+ * structure
+ */
+enum devlink_dpipe_field_mapping_type {
+       DEVLINK_DPIPE_FIELD_MAPPING_TYPE_NONE,
+       DEVLINK_DPIPE_FIELD_MAPPING_TYPE_IFINDEX,
+};
+
+/* Match type - specify the type of the match */
+enum devlink_dpipe_match_type {
+       DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT,
+};
+
+/* Action type - specify the action type */
+enum devlink_dpipe_action_type {
+       DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY,
+};
+
 #endif /* _UAPI_LINUX_DEVLINK_H_ */
index 72a04a0e8ccef1e5560378af9177a6c47954daaf..57d1edb8efd9bae86d0b11779f59de9d18f35f1a 100644 (file)
@@ -19,7 +19,8 @@ enum gtp_attrs {
        GTPA_LINK,
        GTPA_VERSION,
        GTPA_TID,       /* for GTPv0 only */
-       GTPA_SGSN_ADDRESS,
+       GTPA_PEER_ADDRESS,      /* Remote GSN peer, either SGSN or GGSN */
+#define GTPA_SGSN_ADDRESS GTPA_PEER_ADDRESS /* maintain legacy attr name */
        GTPA_MS_ADDRESS,
        GTPA_FLOW,
        GTPA_NET_NS_FD,
index 320fc1e747ee9623db56fbaf26b2a514b5d5a3d1..8b405afb23763498bf0b879ab319dcc9bf94f4b9 100644 (file)
@@ -538,11 +538,18 @@ enum {
 #define IFLA_PPP_MAX (__IFLA_PPP_MAX - 1)
 
 /* GTP section */
+
+enum ifla_gtp_role {
+       GTP_ROLE_GGSN = 0,
+       GTP_ROLE_SGSN,
+};
+
 enum {
        IFLA_GTP_UNSPEC,
        IFLA_GTP_FD0,
        IFLA_GTP_FD1,
        IFLA_GTP_PDP_HASHSIZE,
+       IFLA_GTP_ROLE,
        __IFLA_GTP_MAX,
 };
 #define IFLA_GTP_MAX (__IFLA_GTP_MAX - 1)
index 8ef9e75e004ebbf951cb50b4b4b17f2c5b907dd4..2ae59178189d7983fb9ed99ae1cd5f40197cfb04 100644 (file)
@@ -183,6 +183,8 @@ enum {
        DEVCONF_SEG6_REQUIRE_HMAC,
        DEVCONF_ENHANCED_DAD,
        DEVCONF_ADDR_GEN_MODE,
+       DEVCONF_DISABLE_POLICY,
+       DEVCONF_ACCEPT_RA_RT_INFO_MIN_PLEN,
        DEVCONF_MAX
 };
 
index d80a0498f77ed2d4dac3b61a6eef1906eb8b0626..f5e45095b0bb5c17af6515012587d6d805a7d79c 100644 (file)
 /* MPLS tunnel attributes
  * [RTA_ENCAP] = {
  *     [MPLS_IPTUNNEL_DST]
+ *     [MPLS_IPTUNNEL_TTL]
  * }
  */
 enum {
        MPLS_IPTUNNEL_UNSPEC,
        MPLS_IPTUNNEL_DST,
+       MPLS_IPTUNNEL_TTL,
        __MPLS_IPTUNNEL_MAX,
 };
 #define MPLS_IPTUNNEL_MAX (__MPLS_IPTUNNEL_MAX - 1)
index 05215d30fe5c9853b7871e799ccdce4878a04ef1..8f3842690d176bb6d847e4a157dfc404d0e6f0f1 100644 (file)
@@ -815,6 +815,17 @@ enum nft_rt_keys {
        NFT_RT_NEXTHOP6,
 };
 
+/**
+ * enum nft_hash_types - nf_tables hash expression types
+ *
+ * @NFT_HASH_JENKINS: Jenkins Hash
+ * @NFT_HASH_SYM: Symmetric Hash
+ */
+enum nft_hash_types {
+       NFT_HASH_JENKINS,
+       NFT_HASH_SYM,
+};
+
 /**
  * enum nft_hash_attributes - nf_tables hash expression netlink attributes
  *
@@ -824,6 +835,7 @@ enum nft_rt_keys {
  * @NFTA_HASH_MODULUS: modulus value (NLA_U32)
  * @NFTA_HASH_SEED: seed value (NLA_U32)
  * @NFTA_HASH_OFFSET: add this offset value to hash result (NLA_U32)
+ * @NFTA_HASH_TYPE: hash operation (NLA_U32: nft_hash_types)
  */
 enum nft_hash_attributes {
        NFTA_HASH_UNSPEC,
@@ -833,6 +845,7 @@ enum nft_hash_attributes {
        NFTA_HASH_MODULUS,
        NFTA_HASH_SEED,
        NFTA_HASH_OFFSET,
+       NFTA_HASH_TYPE,
        __NFTA_HASH_MAX,
 };
 #define NFTA_HASH_MAX  (__NFTA_HASH_MAX - 1)
@@ -1244,12 +1257,23 @@ enum nft_fib_flags {
        NFTA_FIB_F_MARK         = 1 << 2,       /* use skb->mark */
        NFTA_FIB_F_IIF          = 1 << 3,       /* restrict to iif */
        NFTA_FIB_F_OIF          = 1 << 4,       /* restrict to oif */
+       NFTA_FIB_F_PRESENT      = 1 << 5,       /* check existence only */
+};
+
+enum nft_ct_helper_attributes {
+       NFTA_CT_HELPER_UNSPEC,
+       NFTA_CT_HELPER_NAME,
+       NFTA_CT_HELPER_L3PROTO,
+       NFTA_CT_HELPER_L4PROTO,
+       __NFTA_CT_HELPER_MAX,
 };
+#define NFTA_CT_HELPER_MAX     (__NFTA_CT_HELPER_MAX - 1)
 
 #define NFT_OBJECT_UNSPEC      0
 #define NFT_OBJECT_COUNTER     1
 #define NFT_OBJECT_QUOTA       2
-#define __NFT_OBJECT_MAX       3
+#define NFT_OBJECT_CT_HELPER   3
+#define __NFT_OBJECT_MAX       4
 #define NFT_OBJECT_MAX         (__NFT_OBJECT_MAX - 1)
 
 /**
index 7f41f7d0000f9f0ee36c274d88ad0d330fa8f5d6..66d1c3ccfd8e26087644d247fe281c2a037c3864 100644 (file)
@@ -578,10 +578,25 @@ enum ovs_sample_attr {
        OVS_SAMPLE_ATTR_PROBABILITY, /* u32 number */
        OVS_SAMPLE_ATTR_ACTIONS,     /* Nested OVS_ACTION_ATTR_* attributes. */
        __OVS_SAMPLE_ATTR_MAX,
+
+#ifdef __KERNEL__
+       OVS_SAMPLE_ATTR_ARG          /* struct sample_arg  */
+#endif
 };
 
 #define OVS_SAMPLE_ATTR_MAX (__OVS_SAMPLE_ATTR_MAX - 1)
 
+#ifdef __KERNEL__
+struct sample_arg {
+       bool exec;                   /* When true, actions in sample will not
+                                     * change flow keys. False otherwise.
+                                     */
+       u32  probability;            /* Same value as
+                                     * 'OVS_SAMPLE_ATTR_PROBABILITY'.
+                                     */
+};
+#endif
+
 /**
  * enum ovs_userspace_attr - Attributes for %OVS_ACTION_ATTR_USERSPACE action.
  * @OVS_USERSPACE_ATTR_PID: u32 Netlink PID to which the %OVS_PACKET_CMD_ACTION
index df7451d351311cd915fc96752fcaeb0a43b5d112..099bf5528fed30008bfbde3529315be35c0411f9 100644 (file)
@@ -617,6 +617,14 @@ struct tc_drr_stats {
 #define TC_QOPT_BITMASK 15
 #define TC_QOPT_MAX_QUEUE 16
 
+enum {
+       TC_MQPRIO_HW_OFFLOAD_NONE,      /* no offload requested */
+       TC_MQPRIO_HW_OFFLOAD_TCS,       /* offload TCs, no queue counts */
+       __TC_MQPRIO_HW_OFFLOAD_MAX
+};
+
+#define TC_MQPRIO_HW_OFFLOAD_MAX (__TC_MQPRIO_HW_OFFLOAD_MAX - 1)
+
 struct tc_mqprio_qopt {
        __u8    num_tc;
        __u8    prio_tc_map[TC_QOPT_BITMASK + 1];
index 6546917d605a916bfd5a905e30eb05d68fd6ad6b..cce061382e4073d4fe8296a00f22eba42cf13818 100644 (file)
@@ -122,6 +122,8 @@ enum {
 
        RTM_NEWNETCONF = 80,
 #define RTM_NEWNETCONF RTM_NEWNETCONF
+       RTM_DELNETCONF,
+#define RTM_DELNETCONF RTM_DELNETCONF
        RTM_GETNETCONF = 82,
 #define RTM_GETNETCONF RTM_GETNETCONF
 
@@ -319,6 +321,7 @@ enum rtattr_type_t {
        RTA_EXPIRES,
        RTA_PAD,
        RTA_UID,
+       RTA_TTL_PROPAGATE,
        __RTA_MAX
 };
 
@@ -545,6 +548,7 @@ enum {
        TCA_STATS2,
        TCA_STAB,
        TCA_PAD,
+       TCA_DUMP_INVISIBLE,
        __TCA_MAX
 };
 
index d3ae381fcf3327489c82e2f47eb39a363ec030c7..ced9d8b974268ed270661c3e2da77165e3a24784 100644 (file)
@@ -115,6 +115,8 @@ typedef __s32 sctp_assoc_t;
 #define SCTP_PR_SUPPORTED      113
 #define SCTP_DEFAULT_PRINFO    114
 #define SCTP_PR_ASSOC_STATUS   115
+#define SCTP_PR_STREAM_STATUS  116
+#define SCTP_RECONFIG_SUPPORTED        117
 #define SCTP_ENABLE_STREAM_RESET       118
 #define SCTP_RESET_STREAMS     119
 #define SCTP_RESET_ASSOC       120
@@ -502,6 +504,28 @@ struct sctp_stream_reset_event {
        __u16 strreset_stream_list[];
 };
 
+#define SCTP_ASSOC_RESET_DENIED                0x0004
+#define SCTP_ASSOC_RESET_FAILED                0x0008
+struct sctp_assoc_reset_event {
+       __u16 assocreset_type;
+       __u16 assocreset_flags;
+       __u32 assocreset_length;
+       sctp_assoc_t assocreset_assoc_id;
+       __u32 assocreset_local_tsn;
+       __u32 assocreset_remote_tsn;
+};
+
+#define SCTP_ASSOC_CHANGE_DENIED       0x0004
+#define SCTP_ASSOC_CHANGE_FAILED       0x0008
+struct sctp_stream_change_event {
+       __u16 strchange_type;
+       __u16 strchange_flags;
+       __u32 strchange_length;
+       sctp_assoc_t strchange_assoc_id;
+       __u16 strchange_instrms;
+       __u16 strchange_outstrms;
+};
+
 /*
  * Described in Section 7.3
  *   Ancillary Data and Notification Interest Options
@@ -518,6 +542,8 @@ struct sctp_event_subscribe {
        __u8 sctp_authentication_event;
        __u8 sctp_sender_dry_event;
        __u8 sctp_stream_reset_event;
+       __u8 sctp_assoc_reset_event;
+       __u8 sctp_stream_change_event;
 };
 
 /*
@@ -543,6 +569,8 @@ union sctp_notification {
        struct sctp_authkey_event sn_authkey_event;
        struct sctp_sender_dry_event sn_sender_dry_event;
        struct sctp_stream_reset_event sn_strreset_event;
+       struct sctp_assoc_reset_event sn_assocreset_event;
+       struct sctp_stream_change_event sn_strchange_event;
 };
 
 /* Section 5.3.1
@@ -572,6 +600,10 @@ enum sctp_sn_type {
 #define SCTP_SENDER_DRY_EVENT          SCTP_SENDER_DRY_EVENT
        SCTP_STREAM_RESET_EVENT,
 #define SCTP_STREAM_RESET_EVENT                SCTP_STREAM_RESET_EVENT
+       SCTP_ASSOC_RESET_EVENT,
+#define SCTP_ASSOC_RESET_EVENT         SCTP_ASSOC_RESET_EVENT
+       SCTP_STREAM_CHANGE_EVENT,
+#define SCTP_STREAM_CHANGE_EVENT       SCTP_STREAM_CHANGE_EVENT
 };
 
 /* Notification error codes used to fill up the error fields in some
index 3b2bed7ca9a4d92c5671e614f2bc598668805f75..cec0e171d20caea2f188c06a9924f886b0daaa85 100644 (file)
@@ -177,7 +177,6 @@ enum
        LINUX_MIB_TIMEWAITED,                   /* TimeWaited */
        LINUX_MIB_TIMEWAITRECYCLED,             /* TimeWaitRecycled */
        LINUX_MIB_TIMEWAITKILLED,               /* TimeWaitKilled */
-       LINUX_MIB_PAWSPASSIVEREJECTED,          /* PAWSPassiveRejected */
        LINUX_MIB_PAWSACTIVEREJECTED,           /* PAWSActiveRejected */
        LINUX_MIB_PAWSESTABREJECTED,            /* PAWSEstabRejected */
        LINUX_MIB_DELAYEDACKS,                  /* DelayedACKs */
index d2b12152e358f14e791ef3e842cb6eac7cc8ceec..e13d48058b8d0e5cf36e458e68e257d73a9a1e8f 100644 (file)
@@ -568,6 +568,7 @@ enum {
        NET_IPV6_PROXY_NDP=23,
        NET_IPV6_ACCEPT_SOURCE_ROUTE=25,
        NET_IPV6_ACCEPT_RA_FROM_LOCAL=26,
+       NET_IPV6_ACCEPT_RA_RT_INFO_MIN_PLEN=27,
        __NET_IPV6_MAX
 };
 
index e1ce4f4fd7fd47fda2c18776573c0f65479c6728..e1e5e658f2dbf887be70fbfc9492d4365aef5064 100644 (file)
@@ -1,7 +1,7 @@
 obj-y := core.o
 
 obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o
-obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o
+obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
 ifeq ($(CONFIG_PERF_EVENTS),y)
 obj-$(CONFIG_BPF_SYSCALL) += stackmap.o
 endif
index 6b6f41f0b21164a3cc2c26bb71be2b89098bbdbd..bc9da93db403813589034ca71337750bea0bd8ec 100644 (file)
@@ -1,4 +1,5 @@
 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
+ * Copyright (c) 2016,2017 Facebook
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of version 2 of the GNU General Public
@@ -16,6 +17,8 @@
 #include <linux/filter.h>
 #include <linux/perf_event.h>
 
+#include "map_in_map.h"
+
 static void bpf_array_free_percpu(struct bpf_array *array)
 {
        int i;
@@ -113,6 +116,30 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key)
        return array->value + array->elem_size * index;
 }
 
+/* emit BPF instructions equivalent to C code of array_map_lookup_elem() */
+static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
+{
+       struct bpf_insn *insn = insn_buf;
+       u32 elem_size = round_up(map->value_size, 8);
+       const int ret = BPF_REG_0;
+       const int map_ptr = BPF_REG_1;
+       const int index = BPF_REG_2;
+
+       *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
+       *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
+       *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3);
+
+       if (is_power_of_2(elem_size)) {
+               *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
+       } else {
+               *insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size);
+       }
+       *insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr);
+       *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
+       *insn++ = BPF_MOV64_IMM(ret, 0);
+       return insn - insn_buf;
+}
+
 /* Called from eBPF program */
 static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
 {
@@ -267,6 +294,7 @@ static const struct bpf_map_ops array_ops = {
        .map_lookup_elem = array_map_lookup_elem,
        .map_update_elem = array_map_update_elem,
        .map_delete_elem = array_map_delete_elem,
+       .map_gen_lookup = array_map_gen_lookup,
 };
 
 static struct bpf_map_type_list array_type __ro_after_init = {
@@ -576,3 +604,64 @@ static int __init register_cgroup_array_map(void)
 }
 late_initcall(register_cgroup_array_map);
 #endif
+
+static struct bpf_map *array_of_map_alloc(union bpf_attr *attr)
+{
+       struct bpf_map *map, *inner_map_meta;
+
+       inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd);
+       if (IS_ERR(inner_map_meta))
+               return inner_map_meta;
+
+       map = fd_array_map_alloc(attr);
+       if (IS_ERR(map)) {
+               bpf_map_meta_free(inner_map_meta);
+               return map;
+       }
+
+       map->inner_map_meta = inner_map_meta;
+
+       return map;
+}
+
+static void array_of_map_free(struct bpf_map *map)
+{
+       /* map->inner_map_meta is only accessed by syscall which
+        * is protected by fdget/fdput.
+        */
+       bpf_map_meta_free(map->inner_map_meta);
+       bpf_fd_array_map_clear(map);
+       fd_array_map_free(map);
+}
+
+static void *array_of_map_lookup_elem(struct bpf_map *map, void *key)
+{
+       struct bpf_map **inner_map = array_map_lookup_elem(map, key);
+
+       if (!inner_map)
+               return NULL;
+
+       return READ_ONCE(*inner_map);
+}
+
+static const struct bpf_map_ops array_of_map_ops = {
+       .map_alloc = array_of_map_alloc,
+       .map_free = array_of_map_free,
+       .map_get_next_key = array_map_get_next_key,
+       .map_lookup_elem = array_of_map_lookup_elem,
+       .map_delete_elem = fd_array_map_delete_elem,
+       .map_fd_get_ptr = bpf_map_fd_get_ptr,
+       .map_fd_put_ptr = bpf_map_fd_put_ptr,
+};
+
+static struct bpf_map_type_list array_of_map_type __ro_after_init = {
+       .ops = &array_of_map_ops,
+       .type = BPF_MAP_TYPE_ARRAY_OF_MAPS,
+};
+
+static int __init register_array_of_map(void)
+{
+       bpf_register_map_type(&array_of_map_type);
+       return 0;
+}
+late_initcall(register_array_of_map);
index afe5bab376c9811c7b82f56bc0b93ce69b6a579b..d5b0623ce87d3697b72eb3711e866db0caa95812 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/rculist_nulls.h>
 #include "percpu_freelist.h"
 #include "bpf_lru_list.h"
+#include "map_in_map.h"
 
 struct bucket {
        struct hlist_nulls_head head;
@@ -30,18 +31,12 @@ struct bpf_htab {
                struct pcpu_freelist freelist;
                struct bpf_lru lru;
        };
-       void __percpu *extra_elems;
+       struct htab_elem *__percpu *extra_elems;
        atomic_t count; /* number of elements in this hashtable */
        u32 n_buckets;  /* number of hash buckets */
        u32 elem_size;  /* size of each element in bytes */
 };
 
-enum extra_elem_state {
-       HTAB_NOT_AN_EXTRA_ELEM = 0,
-       HTAB_EXTRA_ELEM_FREE,
-       HTAB_EXTRA_ELEM_USED
-};
-
 /* each htab element is struct htab_elem + key + value */
 struct htab_elem {
        union {
@@ -56,7 +51,6 @@ struct htab_elem {
        };
        union {
                struct rcu_head rcu;
-               enum extra_elem_state state;
                struct bpf_lru_node lru_node;
        };
        u32 hash;
@@ -77,6 +71,11 @@ static bool htab_is_percpu(const struct bpf_htab *htab)
                htab->map.map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
 }
 
+static bool htab_is_prealloc(const struct bpf_htab *htab)
+{
+       return !(htab->map.map_flags & BPF_F_NO_PREALLOC);
+}
+
 static inline void htab_elem_set_ptr(struct htab_elem *l, u32 key_size,
                                     void __percpu *pptr)
 {
@@ -88,6 +87,11 @@ static inline void __percpu *htab_elem_get_ptr(struct htab_elem *l, u32 key_size
        return *(void __percpu **)(l->key + key_size);
 }
 
+static void *fd_htab_map_get_ptr(const struct bpf_map *map, struct htab_elem *l)
+{
+       return *(void **)(l->key + roundup(map->key_size, 8));
+}
+
 static struct htab_elem *get_htab_elem(struct bpf_htab *htab, int i)
 {
        return (struct htab_elem *) (htab->elems + i * htab->elem_size);
@@ -128,17 +132,20 @@ static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key,
 
 static int prealloc_init(struct bpf_htab *htab)
 {
+       u32 num_entries = htab->map.max_entries;
        int err = -ENOMEM, i;
 
-       htab->elems = bpf_map_area_alloc(htab->elem_size *
-                                        htab->map.max_entries);
+       if (!htab_is_percpu(htab) && !htab_is_lru(htab))
+               num_entries += num_possible_cpus();
+
+       htab->elems = bpf_map_area_alloc(htab->elem_size * num_entries);
        if (!htab->elems)
                return -ENOMEM;
 
        if (!htab_is_percpu(htab))
                goto skip_percpu_elems;
 
-       for (i = 0; i < htab->map.max_entries; i++) {
+       for (i = 0; i < num_entries; i++) {
                u32 size = round_up(htab->map.value_size, 8);
                void __percpu *pptr;
 
@@ -166,11 +173,11 @@ skip_percpu_elems:
        if (htab_is_lru(htab))
                bpf_lru_populate(&htab->lru, htab->elems,
                                 offsetof(struct htab_elem, lru_node),
-                                htab->elem_size, htab->map.max_entries);
+                                htab->elem_size, num_entries);
        else
                pcpu_freelist_populate(&htab->freelist,
                                       htab->elems + offsetof(struct htab_elem, fnode),
-                                      htab->elem_size, htab->map.max_entries);
+                                      htab->elem_size, num_entries);
 
        return 0;
 
@@ -191,16 +198,22 @@ static void prealloc_destroy(struct bpf_htab *htab)
 
 static int alloc_extra_elems(struct bpf_htab *htab)
 {
-       void __percpu *pptr;
+       struct htab_elem *__percpu *pptr, *l_new;
+       struct pcpu_freelist_node *l;
        int cpu;
 
-       pptr = __alloc_percpu_gfp(htab->elem_size, 8, GFP_USER | __GFP_NOWARN);
+       pptr = __alloc_percpu_gfp(sizeof(struct htab_elem *), 8,
+                                 GFP_USER | __GFP_NOWARN);
        if (!pptr)
                return -ENOMEM;
 
        for_each_possible_cpu(cpu) {
-               ((struct htab_elem *)per_cpu_ptr(pptr, cpu))->state =
-                       HTAB_EXTRA_ELEM_FREE;
+               l = pcpu_freelist_pop(&htab->freelist);
+               /* pop will succeed, since prealloc_init()
+                * preallocated extra num_possible_cpus elements
+                */
+               l_new = container_of(l, struct htab_elem, fnode);
+               *per_cpu_ptr(pptr, cpu) = l_new;
        }
        htab->extra_elems = pptr;
        return 0;
@@ -342,25 +355,25 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
                raw_spin_lock_init(&htab->buckets[i].lock);
        }
 
-       if (!percpu && !lru) {
-               /* lru itself can remove the least used element, so
-                * there is no need for an extra elem during map_update.
-                */
-               err = alloc_extra_elems(htab);
-               if (err)
-                       goto free_buckets;
-       }
-
        if (prealloc) {
                err = prealloc_init(htab);
                if (err)
-                       goto free_extra_elems;
+                       goto free_buckets;
+
+               if (!percpu && !lru) {
+                       /* lru itself can remove the least used element, so
+                        * there is no need for an extra elem during map_update.
+                        */
+                       err = alloc_extra_elems(htab);
+                       if (err)
+                               goto free_prealloc;
+               }
        }
 
        return &htab->map;
 
-free_extra_elems:
-       free_percpu(htab->extra_elems);
+free_prealloc:
+       prealloc_destroy(htab);
 free_buckets:
        bpf_map_area_free(htab->buckets);
 free_htab:
@@ -419,7 +432,11 @@ again:
        return NULL;
 }
 
-/* Called from syscall or from eBPF program */
+/* Called from syscall or from eBPF program directly, so
+ * arguments have to match bpf_map_lookup_elem() exactly.
+ * The return value is adjusted by BPF instructions
+ * in htab_map_gen_lookup().
+ */
 static void *__htab_map_lookup_elem(struct bpf_map *map, void *key)
 {
        struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
@@ -451,6 +468,30 @@ static void *htab_map_lookup_elem(struct bpf_map *map, void *key)
        return NULL;
 }
 
+/* inline bpf_map_lookup_elem() call.
+ * Instead of:
+ * bpf_prog
+ *   bpf_map_lookup_elem
+ *     map->ops->map_lookup_elem
+ *       htab_map_lookup_elem
+ *         __htab_map_lookup_elem
+ * do:
+ * bpf_prog
+ *   __htab_map_lookup_elem
+ */
+static u32 htab_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
+{
+       struct bpf_insn *insn = insn_buf;
+       const int ret = BPF_REG_0;
+
+       *insn++ = BPF_EMIT_CALL((u64 (*)(u64, u64, u64, u64, u64))__htab_map_lookup_elem);
+       *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1);
+       *insn++ = BPF_ALU64_IMM(BPF_ADD, ret,
+                               offsetof(struct htab_elem, key) +
+                               round_up(map->key_size, 8));
+       return insn - insn_buf;
+}
+
 static void *htab_lru_map_lookup_elem(struct bpf_map *map, void *key)
 {
        struct htab_elem *l = __htab_map_lookup_elem(map, key);
@@ -575,12 +616,15 @@ static void htab_elem_free_rcu(struct rcu_head *head)
 
 static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
 {
-       if (l->state == HTAB_EXTRA_ELEM_USED) {
-               l->state = HTAB_EXTRA_ELEM_FREE;
-               return;
+       struct bpf_map *map = &htab->map;
+
+       if (map->ops->map_fd_put_ptr) {
+               void *ptr = fd_htab_map_get_ptr(map, l);
+
+               map->ops->map_fd_put_ptr(ptr);
        }
 
-       if (!(htab->map.map_flags & BPF_F_NO_PREALLOC)) {
+       if (htab_is_prealloc(htab)) {
                pcpu_freelist_push(&htab->freelist, &l->fnode);
        } else {
                atomic_dec(&htab->count);
@@ -610,47 +654,43 @@ static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr,
 static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
                                         void *value, u32 key_size, u32 hash,
                                         bool percpu, bool onallcpus,
-                                        bool old_elem_exists)
+                                        struct htab_elem *old_elem)
 {
        u32 size = htab->map.value_size;
-       bool prealloc = !(htab->map.map_flags & BPF_F_NO_PREALLOC);
-       struct htab_elem *l_new;
+       bool prealloc = htab_is_prealloc(htab);
+       struct htab_elem *l_new, **pl_new;
        void __percpu *pptr;
-       int err = 0;
 
        if (prealloc) {
-               struct pcpu_freelist_node *l;
+               if (old_elem) {
+                       /* if we're updating the existing element,
+                        * use per-cpu extra elems to avoid freelist_pop/push
+                        */
+                       pl_new = this_cpu_ptr(htab->extra_elems);
+                       l_new = *pl_new;
+                       *pl_new = old_elem;
+               } else {
+                       struct pcpu_freelist_node *l;
 
-               l = pcpu_freelist_pop(&htab->freelist);
-               if (!l)
-                       err = -E2BIG;
-               else
+                       l = pcpu_freelist_pop(&htab->freelist);
+                       if (!l)
+                               return ERR_PTR(-E2BIG);
                        l_new = container_of(l, struct htab_elem, fnode);
-       } else {
-               if (atomic_inc_return(&htab->count) > htab->map.max_entries) {
-                       atomic_dec(&htab->count);
-                       err = -E2BIG;
-               } else {
-                       l_new = kmalloc(htab->elem_size,
-                                       GFP_ATOMIC | __GFP_NOWARN);
-                       if (!l_new)
-                               return ERR_PTR(-ENOMEM);
                }
-       }
-
-       if (err) {
-               if (!old_elem_exists)
-                       return ERR_PTR(err);
-
-               /* if we're updating the existing element and the hash table
-                * is full, use per-cpu extra elems
-                */
-               l_new = this_cpu_ptr(htab->extra_elems);
-               if (l_new->state != HTAB_EXTRA_ELEM_FREE)
-                       return ERR_PTR(-E2BIG);
-               l_new->state = HTAB_EXTRA_ELEM_USED;
        } else {
-               l_new->state = HTAB_NOT_AN_EXTRA_ELEM;
+               if (atomic_inc_return(&htab->count) > htab->map.max_entries)
+                       if (!old_elem) {
+                               /* when map is full and update() is replacing
+                                * old element, it's ok to allocate, since
+                                * old element will be freed immediately.
+                                * Otherwise return an error
+                                */
+                               atomic_dec(&htab->count);
+                               return ERR_PTR(-E2BIG);
+                       }
+               l_new = kmalloc(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN);
+               if (!l_new)
+                       return ERR_PTR(-ENOMEM);
        }
 
        memcpy(l_new->key, key, key_size);
@@ -731,7 +771,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
                goto err;
 
        l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false,
-                               !!l_old);
+                               l_old);
        if (IS_ERR(l_new)) {
                /* all pre-allocated elements are in use or memory exhausted */
                ret = PTR_ERR(l_new);
@@ -744,7 +784,8 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
        hlist_nulls_add_head_rcu(&l_new->hash_node, head);
        if (l_old) {
                hlist_nulls_del_rcu(&l_old->hash_node);
-               free_htab_elem(htab, l_old);
+               if (!htab_is_prealloc(htab))
+                       free_htab_elem(htab, l_old);
        }
        ret = 0;
 err:
@@ -856,7 +897,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
                                value, onallcpus);
        } else {
                l_new = alloc_htab_elem(htab, key, value, key_size,
-                                       hash, true, onallcpus, false);
+                                       hash, true, onallcpus, NULL);
                if (IS_ERR(l_new)) {
                        ret = PTR_ERR(l_new);
                        goto err;
@@ -1024,11 +1065,11 @@ static void delete_all_elements(struct bpf_htab *htab)
 
                hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
                        hlist_nulls_del_rcu(&l->hash_node);
-                       if (l->state != HTAB_EXTRA_ELEM_USED)
-                               htab_elem_free(htab, l);
+                       htab_elem_free(htab, l);
                }
        }
 }
+
 /* Called when map->refcnt goes to zero, either from workqueue or from syscall */
 static void htab_map_free(struct bpf_map *map)
 {
@@ -1045,7 +1086,7 @@ static void htab_map_free(struct bpf_map *map)
         * not have executed. Wait for them.
         */
        rcu_barrier();
-       if (htab->map.map_flags & BPF_F_NO_PREALLOC)
+       if (!htab_is_prealloc(htab))
                delete_all_elements(htab);
        else
                prealloc_destroy(htab);
@@ -1062,6 +1103,7 @@ static const struct bpf_map_ops htab_ops = {
        .map_lookup_elem = htab_map_lookup_elem,
        .map_update_elem = htab_map_update_elem,
        .map_delete_elem = htab_map_delete_elem,
+       .map_gen_lookup = htab_map_gen_lookup,
 };
 
 static struct bpf_map_type_list htab_type __ro_after_init = {
@@ -1184,12 +1226,118 @@ static struct bpf_map_type_list htab_lru_percpu_type __ro_after_init = {
        .type = BPF_MAP_TYPE_LRU_PERCPU_HASH,
 };
 
+static struct bpf_map *fd_htab_map_alloc(union bpf_attr *attr)
+{
+       struct bpf_map *map;
+
+       if (attr->value_size != sizeof(u32))
+               return ERR_PTR(-EINVAL);
+
+       /* pointer is stored internally */
+       attr->value_size = sizeof(void *);
+       map = htab_map_alloc(attr);
+       attr->value_size = sizeof(u32);
+
+       return map;
+}
+
+static void fd_htab_map_free(struct bpf_map *map)
+{
+       struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+       struct hlist_nulls_node *n;
+       struct hlist_nulls_head *head;
+       struct htab_elem *l;
+       int i;
+
+       for (i = 0; i < htab->n_buckets; i++) {
+               head = select_bucket(htab, i);
+
+               hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
+                       void *ptr = fd_htab_map_get_ptr(map, l);
+
+                       map->ops->map_fd_put_ptr(ptr);
+               }
+       }
+
+       htab_map_free(map);
+}
+
+/* only called from syscall */
+int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file,
+                               void *key, void *value, u64 map_flags)
+{
+       void *ptr;
+       int ret;
+       u32 ufd = *(u32 *)value;
+
+       ptr = map->ops->map_fd_get_ptr(map, map_file, ufd);
+       if (IS_ERR(ptr))
+               return PTR_ERR(ptr);
+
+       ret = htab_map_update_elem(map, key, &ptr, map_flags);
+       if (ret)
+               map->ops->map_fd_put_ptr(ptr);
+
+       return ret;
+}
+
+static struct bpf_map *htab_of_map_alloc(union bpf_attr *attr)
+{
+       struct bpf_map *map, *inner_map_meta;
+
+       inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd);
+       if (IS_ERR(inner_map_meta))
+               return inner_map_meta;
+
+       map = fd_htab_map_alloc(attr);
+       if (IS_ERR(map)) {
+               bpf_map_meta_free(inner_map_meta);
+               return map;
+       }
+
+       map->inner_map_meta = inner_map_meta;
+
+       return map;
+}
+
+static void *htab_of_map_lookup_elem(struct bpf_map *map, void *key)
+{
+       struct bpf_map **inner_map  = htab_map_lookup_elem(map, key);
+
+       if (!inner_map)
+               return NULL;
+
+       return READ_ONCE(*inner_map);
+}
+
+static void htab_of_map_free(struct bpf_map *map)
+{
+       bpf_map_meta_free(map->inner_map_meta);
+       fd_htab_map_free(map);
+}
+
+static const struct bpf_map_ops htab_of_map_ops = {
+       .map_alloc = htab_of_map_alloc,
+       .map_free = htab_of_map_free,
+       .map_get_next_key = htab_map_get_next_key,
+       .map_lookup_elem = htab_of_map_lookup_elem,
+       .map_delete_elem = htab_map_delete_elem,
+       .map_fd_get_ptr = bpf_map_fd_get_ptr,
+       .map_fd_put_ptr = bpf_map_fd_put_ptr,
+};
+
+static struct bpf_map_type_list htab_of_map_type __ro_after_init = {
+       .ops = &htab_of_map_ops,
+       .type = BPF_MAP_TYPE_HASH_OF_MAPS,
+};
+
 static int __init register_htab_map(void)
 {
        bpf_register_map_type(&htab_type);
        bpf_register_map_type(&htab_percpu_type);
        bpf_register_map_type(&htab_lru_type);
        bpf_register_map_type(&htab_lru_percpu_type);
+       bpf_register_map_type(&htab_of_map_type);
        return 0;
 }
 late_initcall(register_htab_map);
diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c
new file mode 100644 (file)
index 0000000..59bcdf8
--- /dev/null
@@ -0,0 +1,97 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/slab.h>
+#include <linux/bpf.h>
+
+#include "map_in_map.h"
+
+struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
+{
+       struct bpf_map *inner_map, *inner_map_meta;
+       struct fd f;
+
+       f = fdget(inner_map_ufd);
+       inner_map = __bpf_map_get(f);
+       if (IS_ERR(inner_map))
+               return inner_map;
+
+       /* prog_array->owner_prog_type and owner_jited
+        * is a runtime binding.  Doing static check alone
+        * in the verifier is not enough.
+        */
+       if (inner_map->map_type == BPF_MAP_TYPE_PROG_ARRAY) {
+               fdput(f);
+               return ERR_PTR(-ENOTSUPP);
+       }
+
+       /* Does not support >1 level map-in-map */
+       if (inner_map->inner_map_meta) {
+               fdput(f);
+               return ERR_PTR(-EINVAL);
+       }
+
+       inner_map_meta = kzalloc(sizeof(*inner_map_meta), GFP_USER);
+       if (!inner_map_meta) {
+               fdput(f);
+               return ERR_PTR(-ENOMEM);
+       }
+
+       inner_map_meta->map_type = inner_map->map_type;
+       inner_map_meta->key_size = inner_map->key_size;
+       inner_map_meta->value_size = inner_map->value_size;
+       inner_map_meta->map_flags = inner_map->map_flags;
+       inner_map_meta->ops = inner_map->ops;
+       inner_map_meta->max_entries = inner_map->max_entries;
+
+       fdput(f);
+       return inner_map_meta;
+}
+
+void bpf_map_meta_free(struct bpf_map *map_meta)
+{
+       kfree(map_meta);
+}
+
+bool bpf_map_meta_equal(const struct bpf_map *meta0,
+                       const struct bpf_map *meta1)
+{
+       /* No need to compare ops because it is covered by map_type */
+       return meta0->map_type == meta1->map_type &&
+               meta0->key_size == meta1->key_size &&
+               meta0->value_size == meta1->value_size &&
+               meta0->map_flags == meta1->map_flags &&
+               meta0->max_entries == meta1->max_entries;
+}
+
+void *bpf_map_fd_get_ptr(struct bpf_map *map,
+                        struct file *map_file /* not used */,
+                        int ufd)
+{
+       struct bpf_map *inner_map;
+       struct fd f;
+
+       f = fdget(ufd);
+       inner_map = __bpf_map_get(f);
+       if (IS_ERR(inner_map))
+               return inner_map;
+
+       if (bpf_map_meta_equal(map->inner_map_meta, inner_map))
+               inner_map = bpf_map_inc(inner_map, false);
+       else
+               inner_map = ERR_PTR(-EINVAL);
+
+       fdput(f);
+       return inner_map;
+}
+
+void bpf_map_fd_put_ptr(void *ptr)
+{
+       /* ptr->ops->map_free() has to go through one
+        * rcu grace period by itself.
+        */
+       bpf_map_put(ptr);
+}
diff --git a/kernel/bpf/map_in_map.h b/kernel/bpf/map_in_map.h
new file mode 100644 (file)
index 0000000..177fadb
--- /dev/null
@@ -0,0 +1,23 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#ifndef __MAP_IN_MAP_H__
+#define __MAP_IN_MAP_H__
+
+#include <linux/types.h>
+
+struct file;
+struct bpf_map;
+
+struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd);
+void bpf_map_meta_free(struct bpf_map *map_meta);
+bool bpf_map_meta_equal(const struct bpf_map *meta0,
+                       const struct bpf_map *meta1);
+void *bpf_map_fd_get_ptr(struct bpf_map *map, struct file *map_file,
+                        int ufd);
+void bpf_map_fd_put_ptr(void *ptr);
+
+#endif
index 7af0dcc5d7555679cea6c08395ab54710e7066e6..ab0cf4c43690e4241ef639e9e684f0347cb49161 100644 (file)
@@ -215,7 +215,7 @@ int bpf_map_new_fd(struct bpf_map *map)
                   offsetof(union bpf_attr, CMD##_LAST_FIELD) - \
                   sizeof(attr->CMD##_LAST_FIELD)) != NULL
 
-#define BPF_MAP_CREATE_LAST_FIELD map_flags
+#define BPF_MAP_CREATE_LAST_FIELD inner_map_fd
 /* called via syscall */
 static int map_create(union bpf_attr *attr)
 {
@@ -352,6 +352,9 @@ static int map_lookup_elem(union bpf_attr *attr)
                err = bpf_percpu_array_copy(map, key, value);
        } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
                err = bpf_stackmap_copy(map, key, value);
+       } else if (map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
+                  map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
+               err = -ENOTSUPP;
        } else {
                rcu_read_lock();
                ptr = map->ops->map_lookup_elem(map, key);
@@ -438,11 +441,17 @@ static int map_update_elem(union bpf_attr *attr)
                err = bpf_percpu_array_update(map, key, value, attr->flags);
        } else if (map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY ||
                   map->map_type == BPF_MAP_TYPE_PROG_ARRAY ||
-                  map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY) {
+                  map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY ||
+                  map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) {
                rcu_read_lock();
                err = bpf_fd_array_map_update_elem(map, f.file, key, value,
                                                   attr->flags);
                rcu_read_unlock();
+       } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
+               rcu_read_lock();
+               err = bpf_fd_htab_map_update_elem(map, f.file, key, value,
+                                                 attr->flags);
+               rcu_read_unlock();
        } else {
                rcu_read_lock();
                err = map->ops->map_update_elem(map, key, value, attr->flags);
@@ -586,59 +595,6 @@ void bpf_register_prog_type(struct bpf_prog_type_list *tl)
        list_add(&tl->list_node, &bpf_prog_types);
 }
 
-/* fixup insn->imm field of bpf_call instructions:
- * if (insn->imm == BPF_FUNC_map_lookup_elem)
- *      insn->imm = bpf_map_lookup_elem - __bpf_call_base;
- * else if (insn->imm == BPF_FUNC_map_update_elem)
- *      insn->imm = bpf_map_update_elem - __bpf_call_base;
- * else ...
- *
- * this function is called after eBPF program passed verification
- */
-static void fixup_bpf_calls(struct bpf_prog *prog)
-{
-       const struct bpf_func_proto *fn;
-       int i;
-
-       for (i = 0; i < prog->len; i++) {
-               struct bpf_insn *insn = &prog->insnsi[i];
-
-               if (insn->code == (BPF_JMP | BPF_CALL)) {
-                       /* we reach here when program has bpf_call instructions
-                        * and it passed bpf_check(), means that
-                        * ops->get_func_proto must have been supplied, check it
-                        */
-                       BUG_ON(!prog->aux->ops->get_func_proto);
-
-                       if (insn->imm == BPF_FUNC_get_route_realm)
-                               prog->dst_needed = 1;
-                       if (insn->imm == BPF_FUNC_get_prandom_u32)
-                               bpf_user_rnd_init_once();
-                       if (insn->imm == BPF_FUNC_xdp_adjust_head)
-                               prog->xdp_adjust_head = 1;
-                       if (insn->imm == BPF_FUNC_tail_call) {
-                               /* mark bpf_tail_call as different opcode
-                                * to avoid conditional branch in
-                                * interpeter for every normal call
-                                * and to prevent accidental JITing by
-                                * JIT compiler that doesn't support
-                                * bpf_tail_call yet
-                                */
-                               insn->imm = 0;
-                               insn->code |= BPF_X;
-                               continue;
-                       }
-
-                       fn = prog->aux->ops->get_func_proto(insn->imm);
-                       /* all functions that have prototype and verifier allowed
-                        * programs to call them, must be real in-kernel functions
-                        */
-                       BUG_ON(!fn->func);
-                       insn->imm = fn->func - __bpf_call_base;
-               }
-       }
-}
-
 /* drop refcnt on maps used by eBPF program and free auxilary data */
 static void free_used_maps(struct bpf_prog_aux *aux)
 {
@@ -892,9 +848,6 @@ static int bpf_prog_load(union bpf_attr *attr)
        if (err < 0)
                goto free_used_maps;
 
-       /* fixup BPF_CALL->imm field */
-       fixup_bpf_calls(prog);
-
        /* eBPF program is ready to be JITed */
        prog = bpf_prog_select_runtime(prog, &err);
        if (err < 0)
@@ -1020,6 +973,28 @@ static int bpf_prog_detach(const union bpf_attr *attr)
 }
 #endif /* CONFIG_CGROUP_BPF */
 
+#define BPF_PROG_TEST_RUN_LAST_FIELD test.duration
+
+static int bpf_prog_test_run(const union bpf_attr *attr,
+                            union bpf_attr __user *uattr)
+{
+       struct bpf_prog *prog;
+       int ret = -ENOTSUPP;
+
+       if (CHECK_ATTR(BPF_PROG_TEST_RUN))
+               return -EINVAL;
+
+       prog = bpf_prog_get(attr->test.prog_fd);
+       if (IS_ERR(prog))
+               return PTR_ERR(prog);
+
+       if (prog->aux->ops->test_run)
+               ret = prog->aux->ops->test_run(prog, attr, uattr);
+
+       bpf_prog_put(prog);
+       return ret;
+}
+
 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
 {
        union bpf_attr attr = {};
@@ -1086,7 +1061,6 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
        case BPF_OBJ_GET:
                err = bpf_obj_get(&attr);
                break;
-
 #ifdef CONFIG_CGROUP_BPF
        case BPF_PROG_ATTACH:
                err = bpf_prog_attach(&attr);
@@ -1095,7 +1069,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
                err = bpf_prog_detach(&attr);
                break;
 #endif
-
+       case BPF_PROG_TEST_RUN:
+               err = bpf_prog_test_run(&attr, uattr);
+               break;
        default:
                err = -EINVAL;
                break;
index 796b68d001198a39186cba850fe8161476a17bfa..09923cc5c7c7ed6fc65b0bbaaa473a61646ff649 100644 (file)
@@ -143,6 +143,8 @@ struct bpf_verifier_stack_elem {
 #define BPF_COMPLEXITY_LIMIT_INSNS     65536
 #define BPF_COMPLEXITY_LIMIT_STACK     1024
 
+#define BPF_MAP_PTR_POISON ((void *)0xeB9F + POISON_POINTER_DELTA)
+
 struct bpf_call_arg_meta {
        struct bpf_map *map_ptr;
        bool raw_mode;
@@ -1197,6 +1199,10 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
                    func_id != BPF_FUNC_current_task_under_cgroup)
                        goto error;
                break;
+       case BPF_MAP_TYPE_ARRAY_OF_MAPS:
+       case BPF_MAP_TYPE_HASH_OF_MAPS:
+               if (func_id != BPF_FUNC_map_lookup_elem)
+                       goto error;
        default:
                break;
        }
@@ -1273,7 +1279,7 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
        }
 }
 
-static int check_call(struct bpf_verifier_env *env, int func_id)
+static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
 {
        struct bpf_verifier_state *state = &env->cur_state;
        const struct bpf_func_proto *fn = NULL;
@@ -1357,6 +1363,8 @@ static int check_call(struct bpf_verifier_env *env, int func_id)
        } else if (fn->ret_type == RET_VOID) {
                regs[BPF_REG_0].type = NOT_INIT;
        } else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL) {
+               struct bpf_insn_aux_data *insn_aux;
+
                regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
                regs[BPF_REG_0].max_value = regs[BPF_REG_0].min_value = 0;
                /* remember map_ptr, so that check_map_access()
@@ -1369,6 +1377,11 @@ static int check_call(struct bpf_verifier_env *env, int func_id)
                }
                regs[BPF_REG_0].map_ptr = meta.map_ptr;
                regs[BPF_REG_0].id = ++env->id_gen;
+               insn_aux = &env->insn_aux_data[insn_idx];
+               if (!insn_aux->map_ptr)
+                       insn_aux->map_ptr = meta.map_ptr;
+               else if (insn_aux->map_ptr != meta.map_ptr)
+                       insn_aux->map_ptr = BPF_MAP_PTR_POISON;
        } else {
                verbose("unknown return type %d of func %s#%d\n",
                        fn->ret_type, func_id_name(func_id), func_id);
@@ -2092,14 +2105,19 @@ static void mark_map_reg(struct bpf_reg_state *regs, u32 regno, u32 id,
        struct bpf_reg_state *reg = &regs[regno];
 
        if (reg->type == PTR_TO_MAP_VALUE_OR_NULL && reg->id == id) {
-               reg->type = type;
+               if (type == UNKNOWN_VALUE) {
+                       __mark_reg_unknown_value(regs, regno);
+               } else if (reg->map_ptr->inner_map_meta) {
+                       reg->type = CONST_PTR_TO_MAP;
+                       reg->map_ptr = reg->map_ptr->inner_map_meta;
+               } else {
+                       reg->type = type;
+               }
                /* We don't need id from this point onwards anymore, thus we
                 * should better reset it, so that state pruning has chances
                 * to take effect.
                 */
                reg->id = 0;
-               if (type == UNKNOWN_VALUE)
-                       __mark_reg_unknown_value(regs, regno);
        }
 }
 
@@ -2940,7 +2958,7 @@ static int do_check(struct bpf_verifier_env *env)
                                        return -EINVAL;
                                }
 
-                               err = check_call(env, insn->imm);
+                               err = check_call(env, insn->imm, insn_idx);
                                if (err)
                                        return err;
 
@@ -3024,16 +3042,33 @@ process_bpf_exit:
        return 0;
 }
 
+static int check_map_prealloc(struct bpf_map *map)
+{
+       return (map->map_type != BPF_MAP_TYPE_HASH &&
+               map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
+               map->map_type != BPF_MAP_TYPE_HASH_OF_MAPS) ||
+               !(map->map_flags & BPF_F_NO_PREALLOC);
+}
+
 static int check_map_prog_compatibility(struct bpf_map *map,
                                        struct bpf_prog *prog)
 
 {
-       if (prog->type == BPF_PROG_TYPE_PERF_EVENT &&
-           (map->map_type == BPF_MAP_TYPE_HASH ||
-            map->map_type == BPF_MAP_TYPE_PERCPU_HASH) &&
-           (map->map_flags & BPF_F_NO_PREALLOC)) {
-               verbose("perf_event programs can only use preallocated hash map\n");
-               return -EINVAL;
+       /* Make sure that BPF_PROG_TYPE_PERF_EVENT programs only use
+        * preallocated hash maps, since doing memory allocation
+        * in overflow_handler can crash depending on where nmi got
+        * triggered.
+        */
+       if (prog->type == BPF_PROG_TYPE_PERF_EVENT) {
+               if (!check_map_prealloc(map)) {
+                       verbose("perf_event programs can only use preallocated hash map\n");
+                       return -EINVAL;
+               }
+               if (map->inner_map_meta &&
+                   !check_map_prealloc(map->inner_map_meta)) {
+                       verbose("perf_event programs can only use preallocated inner hash map\n");
+                       return -EINVAL;
+               }
        }
        return 0;
 }
@@ -3162,6 +3197,41 @@ static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
                        insn->src_reg = 0;
 }
 
+/* single env->prog->insni[off] instruction was replaced with the range
+ * insni[off, off + cnt).  Adjust corresponding insn_aux_data by copying
+ * [0, off) and [off, end) to new locations, so the patched range stays zero
+ */
+static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len,
+                               u32 off, u32 cnt)
+{
+       struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data;
+
+       if (cnt == 1)
+               return 0;
+       new_data = vzalloc(sizeof(struct bpf_insn_aux_data) * prog_len);
+       if (!new_data)
+               return -ENOMEM;
+       memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
+       memcpy(new_data + off + cnt - 1, old_data + off,
+              sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
+       env->insn_aux_data = new_data;
+       vfree(old_data);
+       return 0;
+}
+
+static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
+                                           const struct bpf_insn *patch, u32 len)
+{
+       struct bpf_prog *new_prog;
+
+       new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
+       if (!new_prog)
+               return NULL;
+       if (adjust_insn_aux_data(env, new_prog->len, off, len))
+               return NULL;
+       return new_prog;
+}
+
 /* convert load instructions that access fields of 'struct __sk_buff'
  * into sequence of instructions that access fields of 'struct sk_buff'
  */
@@ -3181,10 +3251,10 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
                        verbose("bpf verifier is misconfigured\n");
                        return -EINVAL;
                } else if (cnt) {
-                       new_prog = bpf_patch_insn_single(env->prog, 0,
-                                                        insn_buf, cnt);
+                       new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
                        if (!new_prog)
                                return -ENOMEM;
+
                        env->prog = new_prog;
                        delta += cnt - 1;
                }
@@ -3209,7 +3279,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
                else
                        continue;
 
-               if (env->insn_aux_data[i].ptr_type != PTR_TO_CTX)
+               if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX)
                        continue;
 
                cnt = ops->convert_ctx_access(type, insn, insn_buf, env->prog);
@@ -3218,8 +3288,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
                        return -EINVAL;
                }
 
-               new_prog = bpf_patch_insn_single(env->prog, i + delta, insn_buf,
-                                                cnt);
+               new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
                if (!new_prog)
                        return -ENOMEM;
 
@@ -3233,6 +3302,84 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
        return 0;
 }
 
+/* fixup insn->imm field of bpf_call instructions
+ * and inline eligible helpers as explicit sequence of BPF instructions
+ *
+ * this function is called after eBPF program passed verification
+ */
+static int fixup_bpf_calls(struct bpf_verifier_env *env)
+{
+       struct bpf_prog *prog = env->prog;
+       struct bpf_insn *insn = prog->insnsi;
+       const struct bpf_func_proto *fn;
+       const int insn_cnt = prog->len;
+       struct bpf_insn insn_buf[16];
+       struct bpf_prog *new_prog;
+       struct bpf_map *map_ptr;
+       int i, cnt, delta = 0;
+
+       for (i = 0; i < insn_cnt; i++, insn++) {
+               if (insn->code != (BPF_JMP | BPF_CALL))
+                       continue;
+
+               if (insn->imm == BPF_FUNC_get_route_realm)
+                       prog->dst_needed = 1;
+               if (insn->imm == BPF_FUNC_get_prandom_u32)
+                       bpf_user_rnd_init_once();
+               if (insn->imm == BPF_FUNC_xdp_adjust_head)
+                       prog->xdp_adjust_head = 1;
+               if (insn->imm == BPF_FUNC_tail_call) {
+                       /* mark bpf_tail_call as different opcode to avoid
+                        * conditional branch in the interpeter for every normal
+                        * call and to prevent accidental JITing by JIT compiler
+                        * that doesn't support bpf_tail_call yet
+                        */
+                       insn->imm = 0;
+                       insn->code |= BPF_X;
+                       continue;
+               }
+
+               if (ebpf_jit_enabled() && insn->imm == BPF_FUNC_map_lookup_elem) {
+                       map_ptr = env->insn_aux_data[i + delta].map_ptr;
+                       if (map_ptr == BPF_MAP_PTR_POISON ||
+                           !map_ptr->ops->map_gen_lookup)
+                               goto patch_call_imm;
+
+                       cnt = map_ptr->ops->map_gen_lookup(map_ptr, insn_buf);
+                       if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
+                               verbose("bpf verifier is misconfigured\n");
+                               return -EINVAL;
+                       }
+
+                       new_prog = bpf_patch_insn_data(env, i + delta, insn_buf,
+                                                      cnt);
+                       if (!new_prog)
+                               return -ENOMEM;
+
+                       delta += cnt - 1;
+
+                       /* keep walking new program and skip insns we just inserted */
+                       env->prog = prog = new_prog;
+                       insn      = new_prog->insnsi + i + delta;
+                       continue;
+               }
+
+patch_call_imm:
+               fn = prog->aux->ops->get_func_proto(insn->imm);
+               /* all functions that have prototype and verifier allowed
+                * programs to call them, must be real in-kernel functions
+                */
+               if (!fn->func) {
+                       verbose("kernel subsystem misconfigured func %s#%d\n",
+                               func_id_name(insn->imm), insn->imm);
+                       return -EFAULT;
+               }
+               insn->imm = fn->func - __bpf_call_base;
+       }
+
+       return 0;
+}
+
 static void free_states(struct bpf_verifier_env *env)
 {
        struct bpf_verifier_state_list *sl, *sln;
@@ -3328,6 +3475,9 @@ skip_full_check:
                /* program is valid, convert *(u32*)(ctx + off) accesses */
                ret = convert_ctx_accesses(env);
 
+       if (ret == 0)
+               ret = fixup_bpf_calls(env);
+
        if (log_level && log_len >= log_size - 1) {
                BUG_ON(log_len >= log_size);
                /* verifier log exceeded user supplied buffer */
index f7c063239fa5c74636922743ddb094052b9044c9..37b223e4fc05b74fc50aa51df0c307d65da026c3 100644 (file)
@@ -1335,26 +1335,21 @@ static int cpuhp_store_callbacks(enum cpuhp_state state, const char *name,
        struct cpuhp_step *sp;
        int ret = 0;
 
-       mutex_lock(&cpuhp_state_mutex);
-
        if (state == CPUHP_AP_ONLINE_DYN || state == CPUHP_BP_PREPARE_DYN) {
                ret = cpuhp_reserve_state(state);
                if (ret < 0)
-                       goto out;
+                       return ret;
                state = ret;
        }
        sp = cpuhp_get_step(state);
-       if (name && sp->name) {
-               ret = -EBUSY;
-               goto out;
-       }
+       if (name && sp->name)
+               return -EBUSY;
+
        sp->startup.single = startup;
        sp->teardown.single = teardown;
        sp->name = name;
        sp->multi_instance = multi_instance;
        INIT_HLIST_HEAD(&sp->list);
-out:
-       mutex_unlock(&cpuhp_state_mutex);
        return ret;
 }
 
@@ -1428,6 +1423,7 @@ int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
                return -EINVAL;
 
        get_online_cpus();
+       mutex_lock(&cpuhp_state_mutex);
 
        if (!invoke || !sp->startup.multi)
                goto add_node;
@@ -1447,16 +1443,14 @@ int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
                if (ret) {
                        if (sp->teardown.multi)
                                cpuhp_rollback_install(cpu, state, node);
-                       goto err;
+                       goto unlock;
                }
        }
 add_node:
        ret = 0;
-       mutex_lock(&cpuhp_state_mutex);
        hlist_add_head(node, &sp->list);
+unlock:
        mutex_unlock(&cpuhp_state_mutex);
-
-err:
        put_online_cpus();
        return ret;
 }
@@ -1491,6 +1485,7 @@ int __cpuhp_setup_state(enum cpuhp_state state,
                return -EINVAL;
 
        get_online_cpus();
+       mutex_lock(&cpuhp_state_mutex);
 
        ret = cpuhp_store_callbacks(state, name, startup, teardown,
                                    multi_instance);
@@ -1524,6 +1519,7 @@ int __cpuhp_setup_state(enum cpuhp_state state,
                }
        }
 out:
+       mutex_unlock(&cpuhp_state_mutex);
        put_online_cpus();
        /*
         * If the requested state is CPUHP_AP_ONLINE_DYN, return the
@@ -1547,6 +1543,8 @@ int __cpuhp_state_remove_instance(enum cpuhp_state state,
                return -EINVAL;
 
        get_online_cpus();
+       mutex_lock(&cpuhp_state_mutex);
+
        if (!invoke || !cpuhp_get_teardown_cb(state))
                goto remove;
        /*
@@ -1563,7 +1561,6 @@ int __cpuhp_state_remove_instance(enum cpuhp_state state,
        }
 
 remove:
-       mutex_lock(&cpuhp_state_mutex);
        hlist_del(node);
        mutex_unlock(&cpuhp_state_mutex);
        put_online_cpus();
@@ -1571,6 +1568,7 @@ remove:
        return 0;
 }
 EXPORT_SYMBOL_GPL(__cpuhp_state_remove_instance);
+
 /**
  * __cpuhp_remove_state - Remove the callbacks for an hotplug machine state
  * @state:     The state to remove
@@ -1589,6 +1587,7 @@ void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
 
        get_online_cpus();
 
+       mutex_lock(&cpuhp_state_mutex);
        if (sp->multi_instance) {
                WARN(!hlist_empty(&sp->list),
                     "Error: Removing state %d which has instances left.\n",
@@ -1613,6 +1612,7 @@ void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
        }
 remove:
        cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
+       mutex_unlock(&cpuhp_state_mutex);
        put_online_cpus();
 }
 EXPORT_SYMBOL(__cpuhp_remove_state);
index a17ed56c8ce1f918519cfbf96ee3c938734ecb08..ff01cba86f430fd29916ab73c755698bf81feff0 100644 (file)
@@ -4256,7 +4256,7 @@ int perf_event_release_kernel(struct perf_event *event)
 
        raw_spin_lock_irq(&ctx->lock);
        /*
-        * Mark this even as STATE_DEAD, there is no external reference to it
+        * Mark this event as STATE_DEAD, there is no external reference to it
         * anymore.
         *
         * Anybody acquiring event->child_mutex after the below loop _must_
@@ -10417,21 +10417,22 @@ void perf_event_free_task(struct task_struct *task)
                        continue;
 
                mutex_lock(&ctx->mutex);
-again:
-               list_for_each_entry_safe(event, tmp, &ctx->pinned_groups,
-                               group_entry)
-                       perf_free_event(event, ctx);
+               raw_spin_lock_irq(&ctx->lock);
+               /*
+                * Destroy the task <-> ctx relation and mark the context dead.
+                *
+                * This is important because even though the task hasn't been
+                * exposed yet the context has been (through child_list).
+                */
+               RCU_INIT_POINTER(task->perf_event_ctxp[ctxn], NULL);
+               WRITE_ONCE(ctx->task, TASK_TOMBSTONE);
+               put_task_struct(task); /* cannot be last */
+               raw_spin_unlock_irq(&ctx->lock);
 
-               list_for_each_entry_safe(event, tmp, &ctx->flexible_groups,
-                               group_entry)
+               list_for_each_entry_safe(event, tmp, &ctx->event_list, event_entry)
                        perf_free_event(event, ctx);
 
-               if (!list_empty(&ctx->pinned_groups) ||
-                               !list_empty(&ctx->flexible_groups))
-                       goto again;
-
                mutex_unlock(&ctx->mutex);
-
                put_ctx(ctx);
        }
 }
@@ -10469,7 +10470,12 @@ const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
 }
 
 /*
- * inherit a event from parent task to child task:
+ * Inherit a event from parent task to child task.
+ *
+ * Returns:
+ *  - valid pointer on success
+ *  - NULL for orphaned events
+ *  - IS_ERR() on error
  */
 static struct perf_event *
 inherit_event(struct perf_event *parent_event,
@@ -10563,6 +10569,16 @@ inherit_event(struct perf_event *parent_event,
        return child_event;
 }
 
+/*
+ * Inherits an event group.
+ *
+ * This will quietly suppress orphaned events; !inherit_event() is not an error.
+ * This matches with perf_event_release_kernel() removing all child events.
+ *
+ * Returns:
+ *  - 0 on success
+ *  - <0 on error
+ */
 static int inherit_group(struct perf_event *parent_event,
              struct task_struct *parent,
              struct perf_event_context *parent_ctx,
@@ -10577,6 +10593,11 @@ static int inherit_group(struct perf_event *parent_event,
                                 child, NULL, child_ctx);
        if (IS_ERR(leader))
                return PTR_ERR(leader);
+       /*
+        * @leader can be NULL here because of is_orphaned_event(). In this
+        * case inherit_event() will create individual events, similar to what
+        * perf_group_detach() would do anyway.
+        */
        list_for_each_entry(sub, &parent_event->sibling_list, group_entry) {
                child_ctr = inherit_event(sub, parent, parent_ctx,
                                            child, leader, child_ctx);
@@ -10586,6 +10607,17 @@ static int inherit_group(struct perf_event *parent_event,
        return 0;
 }
 
+/*
+ * Creates the child task context and tries to inherit the event-group.
+ *
+ * Clears @inherited_all on !attr.inherited or error. Note that we'll leave
+ * inherited_all set when we 'fail' to inherit an orphaned event; this is
+ * consistent with perf_event_release_kernel() removing all child events.
+ *
+ * Returns:
+ *  - 0 on success
+ *  - <0 on error
+ */
 static int
 inherit_task_group(struct perf_event *event, struct task_struct *parent,
                   struct perf_event_context *parent_ctx,
@@ -10608,7 +10640,6 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent,
                 * First allocate and initialize a context for the
                 * child.
                 */
-
                child_ctx = alloc_perf_context(parent_ctx->pmu, child);
                if (!child_ctx)
                        return -ENOMEM;
@@ -10670,7 +10701,7 @@ static int perf_event_init_context(struct task_struct *child, int ctxn)
                ret = inherit_task_group(event, parent, parent_ctx,
                                         child, ctxn, &inherited_all);
                if (ret)
-                       break;
+                       goto out_unlock;
        }
 
        /*
@@ -10686,7 +10717,7 @@ static int perf_event_init_context(struct task_struct *child, int ctxn)
                ret = inherit_task_group(event, parent, parent_ctx,
                                         child, ctxn, &inherited_all);
                if (ret)
-                       break;
+                       goto out_unlock;
        }
 
        raw_spin_lock_irqsave(&parent_ctx->lock, flags);
@@ -10714,6 +10745,7 @@ static int perf_event_init_context(struct task_struct *child, int ctxn)
        }
 
        raw_spin_unlock_irqrestore(&parent_ctx->lock, flags);
+out_unlock:
        mutex_unlock(&parent_ctx->mutex);
 
        perf_unpin_context(parent_ctx);
index 229a744b1781be2e4fccc1b5c290bd246d8b8694..45858ec739411f5741667e560552757697441e6b 100644 (file)
@@ -2815,7 +2815,6 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
 {
        struct hrtimer_sleeper timeout, *to = NULL;
        struct rt_mutex_waiter rt_waiter;
-       struct rt_mutex *pi_mutex = NULL;
        struct futex_hash_bucket *hb;
        union futex_key key2 = FUTEX_KEY_INIT;
        struct futex_q q = futex_q_init;
@@ -2899,6 +2898,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
                if (q.pi_state && (q.pi_state->owner != current)) {
                        spin_lock(q.lock_ptr);
                        ret = fixup_pi_state_owner(uaddr2, &q, current);
+                       if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current)
+                               rt_mutex_unlock(&q.pi_state->pi_mutex);
                        /*
                         * Drop the reference to the pi state which
                         * the requeue_pi() code acquired for us.
@@ -2907,6 +2908,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
                        spin_unlock(q.lock_ptr);
                }
        } else {
+               struct rt_mutex *pi_mutex;
+
                /*
                 * We have been woken up by futex_unlock_pi(), a timeout, or a
                 * signal.  futex_unlock_pi() will not destroy the lock_ptr nor
@@ -2930,18 +2933,19 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
                if (res)
                        ret = (res < 0) ? res : 0;
 
+               /*
+                * If fixup_pi_state_owner() faulted and was unable to handle
+                * the fault, unlock the rt_mutex and return the fault to
+                * userspace.
+                */
+               if (ret && rt_mutex_owner(pi_mutex) == current)
+                       rt_mutex_unlock(pi_mutex);
+
                /* Unqueue and drop the lock. */
                unqueue_me_pi(&q);
        }
 
-       /*
-        * If fixup_pi_state_owner() faulted and was unable to handle the
-        * fault, unlock the rt_mutex and return the fault to userspace.
-        */
-       if (ret == -EFAULT) {
-               if (pi_mutex && rt_mutex_owner(pi_mutex) == current)
-                       rt_mutex_unlock(pi_mutex);
-       } else if (ret == -EINTR) {
+       if (ret == -EINTR) {
                /*
                 * We've already been requeued, but cannot restart by calling
                 * futex_lock_pi() directly. We could restart this syscall, but
index 7bc24d477805d868b932aab7acc6997120931fc5..c65f7989f850d12508045896a2cb98d5b691c068 100644 (file)
@@ -213,10 +213,9 @@ int __sched __down_write_common(struct rw_semaphore *sem, int state)
                 */
                if (sem->count == 0)
                        break;
-               if (signal_pending_state(state, current)) {
-                       ret = -EINTR;
-                       goto out;
-               }
+               if (signal_pending_state(state, current))
+                       goto out_nolock;
+
                set_current_state(state);
                raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
                schedule();
@@ -224,12 +223,19 @@ int __sched __down_write_common(struct rw_semaphore *sem, int state)
        }
        /* got the lock */
        sem->count = -1;
-out:
        list_del(&waiter.list);
 
        raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
 
        return ret;
+
+out_nolock:
+       list_del(&waiter.list);
+       if (!list_empty(&sem->wait_list))
+               __rwsem_do_wake(sem, 1);
+       raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
+
+       return -EINTR;
 }
 
 void __sched __down_write(struct rw_semaphore *sem)
index 06123234f1189c86ee42dffdc2d14873b6b16895..07e85e5229da849d33391f97234c1e1fff2c5ce1 100644 (file)
@@ -247,11 +247,9 @@ static void devm_memremap_pages_release(struct device *dev, void *data)
        align_start = res->start & ~(SECTION_SIZE - 1);
        align_size = ALIGN(resource_size(res), SECTION_SIZE);
 
-       lock_device_hotplug();
        mem_hotplug_begin();
        arch_remove_memory(align_start, align_size);
        mem_hotplug_done();
-       unlock_device_hotplug();
 
        untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
        pgmap_radix_release(res);
@@ -364,11 +362,9 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
        if (error)
                goto err_pfn_remap;
 
-       lock_device_hotplug();
        mem_hotplug_begin();
        error = arch_add_memory(nid, align_start, align_size, true);
        mem_hotplug_done();
-       unlock_device_hotplug();
        if (error)
                goto err_add_memory;
 
index 99b2c33a9fbcb4411fd7b75d6dbaff36bf07f803..a2ce59015642c3ccc753006837a9485b2d9fbcd3 100644 (file)
@@ -445,13 +445,13 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se,
  *
  * This function returns true if:
  *
- *   runtime / (deadline - t) > dl_runtime / dl_period ,
+ *   runtime / (deadline - t) > dl_runtime / dl_deadline ,
  *
  * IOW we can't recycle current parameters.
  *
- * Notice that the bandwidth check is done against the period. For
+ * Notice that the bandwidth check is done against the deadline. For
  * task with deadline equal to period this is the same of using
- * dl_deadline instead of dl_period in the equation above.
+ * dl_period instead of dl_deadline in the equation above.
  */
 static bool dl_entity_overflow(struct sched_dl_entity *dl_se,
                               struct sched_dl_entity *pi_se, u64 t)
@@ -476,7 +476,7 @@ static bool dl_entity_overflow(struct sched_dl_entity *dl_se,
         * of anything below microseconds resolution is actually fiction
         * (but still we want to give the user that illusion >;).
         */
-       left = (pi_se->dl_period >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);
+       left = (pi_se->dl_deadline >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);
        right = ((dl_se->deadline - t) >> DL_SCALE) *
                (pi_se->dl_runtime >> DL_SCALE);
 
@@ -505,10 +505,15 @@ static void update_dl_entity(struct sched_dl_entity *dl_se,
        }
 }
 
+static inline u64 dl_next_period(struct sched_dl_entity *dl_se)
+{
+       return dl_se->deadline - dl_se->dl_deadline + dl_se->dl_period;
+}
+
 /*
  * If the entity depleted all its runtime, and if we want it to sleep
  * while waiting for some new execution time to become available, we
- * set the bandwidth enforcement timer to the replenishment instant
+ * set the bandwidth replenishment timer to the replenishment instant
  * and try to activate it.
  *
  * Notice that it is important for the caller to know if the timer
@@ -530,7 +535,7 @@ static int start_dl_timer(struct task_struct *p)
         * that it is actually coming from rq->clock and not from
         * hrtimer's time base reading.
         */
-       act = ns_to_ktime(dl_se->deadline);
+       act = ns_to_ktime(dl_next_period(dl_se));
        now = hrtimer_cb_get_time(timer);
        delta = ktime_to_ns(now) - rq_clock(rq);
        act = ktime_add_ns(act, delta);
@@ -638,6 +643,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
                lockdep_unpin_lock(&rq->lock, rf.cookie);
                rq = dl_task_offline_migration(rq, p);
                rf.cookie = lockdep_pin_lock(&rq->lock);
+               update_rq_clock(rq);
 
                /*
                 * Now that the task has been migrated to the new RQ and we
@@ -689,6 +695,37 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se)
        timer->function = dl_task_timer;
 }
 
+/*
+ * During the activation, CBS checks if it can reuse the current task's
+ * runtime and period. If the deadline of the task is in the past, CBS
+ * cannot use the runtime, and so it replenishes the task. This rule
+ * works fine for implicit deadline tasks (deadline == period), and the
+ * CBS was designed for implicit deadline tasks. However, a task with
+ * constrained deadline (deadine < period) might be awakened after the
+ * deadline, but before the next period. In this case, replenishing the
+ * task would allow it to run for runtime / deadline. As in this case
+ * deadline < period, CBS enables a task to run for more than the
+ * runtime / period. In a very loaded system, this can cause a domino
+ * effect, making other tasks miss their deadlines.
+ *
+ * To avoid this problem, in the activation of a constrained deadline
+ * task after the deadline but before the next period, throttle the
+ * task and set the replenishing timer to the begin of the next period,
+ * unless it is boosted.
+ */
+static inline void dl_check_constrained_dl(struct sched_dl_entity *dl_se)
+{
+       struct task_struct *p = dl_task_of(dl_se);
+       struct rq *rq = rq_of_dl_rq(dl_rq_of_se(dl_se));
+
+       if (dl_time_before(dl_se->deadline, rq_clock(rq)) &&
+           dl_time_before(rq_clock(rq), dl_next_period(dl_se))) {
+               if (unlikely(dl_se->dl_boosted || !start_dl_timer(p)))
+                       return;
+               dl_se->dl_throttled = 1;
+       }
+}
+
 static
 int dl_runtime_exceeded(struct sched_dl_entity *dl_se)
 {
@@ -922,6 +959,11 @@ static void dequeue_dl_entity(struct sched_dl_entity *dl_se)
        __dequeue_dl_entity(dl_se);
 }
 
+static inline bool dl_is_constrained(struct sched_dl_entity *dl_se)
+{
+       return dl_se->dl_deadline < dl_se->dl_period;
+}
+
 static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
 {
        struct task_struct *pi_task = rt_mutex_get_top_task(p);
@@ -947,6 +989,15 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
                return;
        }
 
+       /*
+        * Check if a constrained deadline task was activated
+        * after the deadline but before the next period.
+        * If that is the case, the task will be throttled and
+        * the replenishment timer will be set to the next period.
+        */
+       if (!p->dl.dl_throttled && dl_is_constrained(&p->dl))
+               dl_check_constrained_dl(&p->dl);
+
        /*
         * If p is throttled, we do nothing. In fact, if it exhausted
         * its budget it needs a replenishment and, since it now is on
index 7296b7308ecaebb6cca949e1a9e2d4361750f7c6..f15fb2bdbc0dee60d770da951424f8cf0635f5f6 100644 (file)
@@ -169,7 +169,7 @@ static inline int calc_load_write_idx(void)
         * If the folding window started, make sure we start writing in the
         * next idle-delta.
         */
-       if (!time_before(jiffies, calc_load_update))
+       if (!time_before(jiffies, READ_ONCE(calc_load_update)))
                idx++;
 
        return idx & 1;
@@ -202,8 +202,9 @@ void calc_load_exit_idle(void)
        struct rq *this_rq = this_rq();
 
        /*
-        * If we're still before the sample window, we're done.
+        * If we're still before the pending sample window, we're done.
         */
+       this_rq->calc_load_update = READ_ONCE(calc_load_update);
        if (time_before(jiffies, this_rq->calc_load_update))
                return;
 
@@ -212,7 +213,6 @@ void calc_load_exit_idle(void)
         * accounted through the nohz accounting, so skip the entire deal and
         * sync up for the next window.
         */
-       this_rq->calc_load_update = calc_load_update;
        if (time_before(jiffies, this_rq->calc_load_update + 10))
                this_rq->calc_load_update += LOAD_FREQ;
 }
@@ -308,13 +308,15 @@ calc_load_n(unsigned long load, unsigned long exp,
  */
 static void calc_global_nohz(void)
 {
+       unsigned long sample_window;
        long delta, active, n;
 
-       if (!time_before(jiffies, calc_load_update + 10)) {
+       sample_window = READ_ONCE(calc_load_update);
+       if (!time_before(jiffies, sample_window + 10)) {
                /*
                 * Catch-up, fold however many we are behind still
                 */
-               delta = jiffies - calc_load_update - 10;
+               delta = jiffies - sample_window - 10;
                n = 1 + (delta / LOAD_FREQ);
 
                active = atomic_long_read(&calc_load_tasks);
@@ -324,7 +326,7 @@ static void calc_global_nohz(void)
                avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n);
                avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n);
 
-               calc_load_update += n * LOAD_FREQ;
+               WRITE_ONCE(calc_load_update, sample_window + n * LOAD_FREQ);
        }
 
        /*
@@ -352,9 +354,11 @@ static inline void calc_global_nohz(void) { }
  */
 void calc_global_load(unsigned long ticks)
 {
+       unsigned long sample_window;
        long active, delta;
 
-       if (time_before(jiffies, calc_load_update + 10))
+       sample_window = READ_ONCE(calc_load_update);
+       if (time_before(jiffies, sample_window + 10))
                return;
 
        /*
@@ -371,7 +375,7 @@ void calc_global_load(unsigned long ticks)
        avenrun[1] = calc_load(avenrun[1], EXP_5, active);
        avenrun[2] = calc_load(avenrun[2], EXP_15, active);
 
-       calc_load_update += LOAD_FREQ;
+       WRITE_ONCE(calc_load_update, sample_window + LOAD_FREQ);
 
        /*
         * In case we idled for multiple LOAD_FREQ intervals, catch up in bulk.
index 295479b792ec488b6d984ef98e7e715f6ac162b4..6fa7208bcd564ec8fb6bcf25e206aef9bd724ecb 100644 (file)
@@ -125,9 +125,12 @@ void put_online_mems(void)
 
 }
 
+/* Serializes write accesses to mem_hotplug.active_writer. */
+static DEFINE_MUTEX(memory_add_remove_lock);
+
 void mem_hotplug_begin(void)
 {
-       assert_held_device_hotplug();
+       mutex_lock(&memory_add_remove_lock);
 
        mem_hotplug.active_writer = current;
 
@@ -147,6 +150,7 @@ void mem_hotplug_done(void)
        mem_hotplug.active_writer = NULL;
        mutex_unlock(&mem_hotplug.lock);
        memhp_lock_release();
+       mutex_unlock(&memory_add_remove_lock);
 }
 
 /* add this memory to iomem resource */
index 9b5bc86f96ad731269e2051719583f168a74bc51..b1ccb58ad397403214a220e4a0ac7901a6b6ae1e 100644 (file)
@@ -267,8 +267,6 @@ int free_swap_slot(swp_entry_t entry)
 {
        struct swap_slots_cache *cache;
 
-       BUG_ON(!swap_slot_cache_initialized);
-
        cache = &get_cpu_var(swp_slots);
        if (use_swap_slot_cache && cache->slots_ret) {
                spin_lock_irq(&cache->free_lock);
index 0dd80222b20bbd6ab3c6235134e5f8f37b57815a..0b057628a7ba5c45d722710082ce32df3f7e8e13 100644 (file)
@@ -1683,7 +1683,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 
                if (fatal_signal_pending(current)) {
                        area->nr_pages = i;
-                       goto fail;
+                       goto fail_no_warn;
                }
 
                if (node == NUMA_NO_NODE)
@@ -1709,6 +1709,7 @@ fail:
        warn_alloc(gfp_mask, NULL,
                          "vmalloc: allocation failure, allocated %ld of %ld bytes",
                          (area->nr_pages*PAGE_SIZE), area->size);
+fail_no_warn:
        vfree(area->addr);
        return NULL;
 }
index 8970a2fd3b1a5354fb4bc843292a1c7358eed51c..f9492bccfd794a1983eabbc4bff32df35b31cea8 100644 (file)
@@ -667,6 +667,7 @@ next:
                        z3fold_page_unlock(zhdr);
                        spin_lock(&pool->lock);
                        if (kref_put(&zhdr->refcount, release_z3fold_page)) {
+                               spin_unlock(&pool->lock);
                                atomic64_dec(&pool->pages_nr);
                                return 0;
                        }
index e97ab824e368cc16f9609acd70d5337866eb2936..9ee5787634e59690d67cb8fa148e03b18d455c99 100644 (file)
@@ -562,8 +562,7 @@ static int vlan_dev_init(struct net_device *dev)
                           NETIF_F_HIGHDMA | NETIF_F_SCTP_CRC |
                           NETIF_F_ALL_FCOE;
 
-       dev->features |= real_dev->vlan_features | NETIF_F_LLTX |
-                        NETIF_F_GSO_SOFTWARE;
+       dev->features |= dev->hw_features | NETIF_F_LLTX;
        dev->gso_max_size = real_dev->gso_max_size;
        dev->gso_max_segs = real_dev->gso_max_segs;
        if (dev->features & NETIF_F_VLAN_FEATURES)
index 9b681550e3a3ea3c6146ac67572b6c97a28c9d2c..9086ffbb508514c1e4fb1a5d2d04d6c6b1cf5bea 100644 (file)
@@ -12,7 +12,7 @@ obj-$(CONFIG_NET)             += $(tmp-y)
 
 # LLC has to be linked before the files in net/802/
 obj-$(CONFIG_LLC)              += llc/
-obj-$(CONFIG_NET)              += ethernet/ 802/ sched/ netlink/
+obj-$(CONFIG_NET)              += ethernet/ 802/ sched/ netlink/ bpf/
 obj-$(CONFIG_NETFILTER)                += netfilter/
 obj-$(CONFIG_INET)             += ipv4/
 obj-$(CONFIG_XFRM)             += xfrm/
index 53b4ac09e7b7d5d6a57f049dc1653c961b052622..ec527b62f79db1a4712d6fd654d2f899255de8bc 100644 (file)
@@ -106,7 +106,7 @@ static void unlink_clip_vcc(struct clip_vcc *clip_vcc)
                        entry->expires = jiffies - 1;
                        /* force resolution or expiration */
                        error = neigh_update(entry->neigh, NULL, NUD_NONE,
-                                            NEIGH_UPDATE_F_ADMIN);
+                                            NEIGH_UPDATE_F_ADMIN, 0);
                        if (error)
                                pr_crit("neigh_update failed with %d\n", error);
                        goto out;
@@ -481,7 +481,7 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip)
                link_vcc(clip_vcc, entry);
        }
        error = neigh_update(neigh, llc_oui, NUD_PERMANENT,
-                            NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN);
+                            NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN, 0);
        neigh_release(neigh);
        return error;
 }
index 9613381f5db04e28ff66749706d1d61d82f77b88..f06422f4108d209fde356457c453164f2f4d7289 100644 (file)
@@ -62,21 +62,16 @@ static void vcc_remove_socket(struct sock *sk)
        write_unlock_irq(&vcc_sklist_lock);
 }
 
-static struct sk_buff *alloc_tx(struct atm_vcc *vcc, unsigned int size)
+static bool vcc_tx_ready(struct atm_vcc *vcc, unsigned int size)
 {
-       struct sk_buff *skb;
        struct sock *sk = sk_atm(vcc);
 
        if (sk_wmem_alloc_get(sk) && !atm_may_send(vcc, size)) {
                pr_debug("Sorry: wmem_alloc = %d, size = %d, sndbuf = %d\n",
                         sk_wmem_alloc_get(sk), size, sk->sk_sndbuf);
-               return NULL;
+               return false;
        }
-       while (!(skb = alloc_skb(size, GFP_KERNEL)))
-               schedule();
-       pr_debug("%d += %d\n", sk_wmem_alloc_get(sk), skb->truesize);
-       atomic_add(skb->truesize, &sk->sk_wmem_alloc);
-       return skb;
+       return true;
 }
 
 static void vcc_sock_destruct(struct sock *sk)
@@ -606,7 +601,7 @@ int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t size)
        eff = (size+3) & ~3; /* align to word boundary */
        prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
        error = 0;
-       while (!(skb = alloc_tx(vcc, eff))) {
+       while (!vcc_tx_ready(vcc, eff)) {
                if (m->msg_flags & MSG_DONTWAIT) {
                        error = -EAGAIN;
                        break;
@@ -628,6 +623,15 @@ int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t size)
        finish_wait(sk_sleep(sk), &wait);
        if (error)
                goto out;
+
+       skb = alloc_skb(eff, GFP_KERNEL);
+       if (!skb) {
+               error = -ENOMEM;
+               goto out;
+       }
+       pr_debug("%d += %d\n", sk_wmem_alloc_get(sk), skb->truesize);
+       atomic_add(skb->truesize, &sk->sk_wmem_alloc);
+
        skb->dev = NULL; /* for paths shared with net_device interfaces */
        ATM_SKB(skb)->atm_options = vcc->atm_options;
        if (!copy_from_iter_full(skb_put(skb, size), size, &m->msg_iter)) {
index 7c3d994e90d87b868f2b1614cc5d26e2413e70ee..71343d0fec94b55f7318ec8578abc956148f7791 100644 (file)
@@ -2477,6 +2477,16 @@ static void batadv_iv_iface_activate(struct batadv_hard_iface *hard_iface)
        batadv_iv_ogm_schedule(hard_iface);
 }
 
+/**
+ * batadv_iv_init_sel_class - initialize GW selection class
+ * @bat_priv: the bat priv with all the soft interface information
+ */
+static void batadv_iv_init_sel_class(struct batadv_priv *bat_priv)
+{
+       /* set default TQ difference threshold to 20 */
+       atomic_set(&bat_priv->gw.sel_class, 20);
+}
+
 static struct batadv_gw_node *
 batadv_iv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
 {
@@ -2823,6 +2833,7 @@ static struct batadv_algo_ops batadv_batman_iv __read_mostly = {
                .del_if = batadv_iv_ogm_orig_del_if,
        },
        .gw = {
+               .init_sel_class = batadv_iv_init_sel_class,
                .get_best_gw_node = batadv_iv_gw_get_best_gw_node,
                .is_eligible = batadv_iv_gw_is_eligible,
 #ifdef CONFIG_BATMAN_ADV_DEBUGFS
index 0acd081dd286996444d121b526f4530c4c1c0845..a36c8e7291d61f171cdb128dee865739d22cb00e 100644 (file)
@@ -668,6 +668,16 @@ err_ifinfo1:
        return ret;
 }
 
+/**
+ * batadv_v_init_sel_class - initialize GW selection class
+ * @bat_priv: the bat priv with all the soft interface information
+ */
+static void batadv_v_init_sel_class(struct batadv_priv *bat_priv)
+{
+       /* set default throughput difference threshold to 5Mbps */
+       atomic_set(&bat_priv->gw.sel_class, 50);
+}
+
 static ssize_t batadv_v_store_sel_class(struct batadv_priv *bat_priv,
                                        char *buff, size_t count)
 {
@@ -1052,6 +1062,7 @@ static struct batadv_algo_ops batadv_batman_v __read_mostly = {
                .dump = batadv_v_orig_dump,
        },
        .gw = {
+               .init_sel_class = batadv_v_init_sel_class,
                .store_sel_class = batadv_v_store_sel_class,
                .show_sel_class = batadv_v_show_sel_class,
                .get_best_gw_node = batadv_v_gw_get_best_gw_node,
@@ -1092,9 +1103,6 @@ int batadv_v_mesh_init(struct batadv_priv *bat_priv)
        if (ret < 0)
                return ret;
 
-       /* set default throughput difference threshold to 5Mbps */
-       atomic_set(&bat_priv->gw.sel_class, 50);
-
        return 0;
 }
 
index 11a23fd6e1a07fa0c541fa3ea0a13775f9933893..8f964beaac284905c487ecfc5babaf2dd72d822c 100644 (file)
@@ -404,7 +404,7 @@ out:
  * batadv_frag_create - create a fragment from skb
  * @skb: skb to create fragment from
  * @frag_head: header to use in new fragment
- * @mtu: size of new fragment
+ * @fragment_size: size of new fragment
  *
  * Split the passed skb into two fragments: A new one with size matching the
  * passed mtu and the old one with the rest. The new skb contains data from the
@@ -414,11 +414,11 @@ out:
  */
 static struct sk_buff *batadv_frag_create(struct sk_buff *skb,
                                          struct batadv_frag_packet *frag_head,
-                                         unsigned int mtu)
+                                         unsigned int fragment_size)
 {
        struct sk_buff *skb_fragment;
        unsigned int header_size = sizeof(*frag_head);
-       unsigned int fragment_size = mtu - header_size;
+       unsigned int mtu = fragment_size + header_size;
 
        skb_fragment = netdev_alloc_skb(NULL, mtu + ETH_HLEN);
        if (!skb_fragment)
@@ -456,7 +456,7 @@ int batadv_frag_send_packet(struct sk_buff *skb,
        struct sk_buff *skb_fragment;
        unsigned int mtu = neigh_node->if_incoming->net_dev->mtu;
        unsigned int header_size = sizeof(frag_header);
-       unsigned int max_fragment_size, max_packet_size;
+       unsigned int max_fragment_size, num_fragments;
        int ret;
 
        /* To avoid merge and refragmentation at next-hops we never send
@@ -464,10 +464,15 @@ int batadv_frag_send_packet(struct sk_buff *skb,
         */
        mtu = min_t(unsigned int, mtu, BATADV_FRAG_MAX_FRAG_SIZE);
        max_fragment_size = mtu - header_size;
-       max_packet_size = max_fragment_size * BATADV_FRAG_MAX_FRAGMENTS;
+
+       if (skb->len == 0 || max_fragment_size == 0)
+               return -EINVAL;
+
+       num_fragments = (skb->len - 1) / max_fragment_size + 1;
+       max_fragment_size = (skb->len - 1) / num_fragments + 1;
 
        /* Don't even try to fragment, if we need more than 16 fragments */
-       if (skb->len > max_packet_size) {
+       if (num_fragments > BATADV_FRAG_MAX_FRAGMENTS) {
                ret = -EAGAIN;
                goto free_skb;
        }
@@ -507,7 +512,8 @@ int batadv_frag_send_packet(struct sk_buff *skb,
                        goto put_primary_if;
                }
 
-               skb_fragment = batadv_frag_create(skb, &frag_header, mtu);
+               skb_fragment = batadv_frag_create(skb, &frag_header,
+                                                 max_fragment_size);
                if (!skb_fragment) {
                        ret = -ENOMEM;
                        goto put_primary_if;
index 5db2e43e3775ef40fc3832984c93411c7f0dbb08..33940c5c74a8730c4ed3e06f7246e022cfb798da 100644 (file)
@@ -253,6 +253,11 @@ static void batadv_gw_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
  */
 void batadv_gw_init(struct batadv_priv *bat_priv)
 {
+       if (bat_priv->algo_ops->gw.init_sel_class)
+               bat_priv->algo_ops->gw.init_sel_class(bat_priv);
+       else
+               atomic_set(&bat_priv->gw.sel_class, 1);
+
        batadv_tvlv_handler_register(bat_priv, batadv_gw_tvlv_ogm_handler_v1,
                                     NULL, BATADV_TVLV_GW, 1,
                                     BATADV_TVLV_HANDLER_OGM_CIFNOTFND);
index 5d099b2e6cfccb8a436d98a10a6d513d89e31dc1..d042c99af028e2083307de1ba8978f2061fee45d 100644 (file)
@@ -819,7 +819,6 @@ static int batadv_softif_init_late(struct net_device *dev)
        atomic_set(&bat_priv->mcast.num_want_all_ipv6, 0);
 #endif
        atomic_set(&bat_priv->gw.mode, BATADV_GW_MODE_OFF);
-       atomic_set(&bat_priv->gw.sel_class, 20);
        atomic_set(&bat_priv->gw.bandwidth_down, 100);
        atomic_set(&bat_priv->gw.bandwidth_up, 20);
        atomic_set(&bat_priv->orig_interval, 1000);
index 66b25e410a41375e5c70bd7400a5b353bdff4520..246f21b4973bc39d0678273ad831da1f5b7e0df3 100644 (file)
@@ -1489,6 +1489,7 @@ struct batadv_algo_orig_ops {
 
 /**
  * struct batadv_algo_gw_ops - mesh algorithm callbacks (GW specific)
+ * @init_sel_class: initialize GW selection class (optional)
  * @store_sel_class: parse and stores a new GW selection class (optional)
  * @show_sel_class: prints the current GW selection class (optional)
  * @get_best_gw_node: select the best GW from the list of available nodes
@@ -1499,6 +1500,7 @@ struct batadv_algo_orig_ops {
  * @dump: dump gateways to a netlink socket (optional)
  */
 struct batadv_algo_gw_ops {
+       void (*init_sel_class)(struct batadv_priv *bat_priv);
        ssize_t (*store_sel_class)(struct batadv_priv *bat_priv, char *buff,
                                   size_t count);
        ssize_t (*show_sel_class)(struct batadv_priv *bat_priv, char *buff);
diff --git a/net/bpf/Makefile b/net/bpf/Makefile
new file mode 100644 (file)
index 0000000..27b2992
--- /dev/null
@@ -0,0 +1 @@
+obj-y  := test_run.o
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
new file mode 100644 (file)
index 0000000..8a6d0a3
--- /dev/null
@@ -0,0 +1,172 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/bpf.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/etherdevice.h>
+#include <linux/filter.h>
+#include <linux/sched/signal.h>
+
+static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx)
+{
+       u32 ret;
+
+       preempt_disable();
+       rcu_read_lock();
+       ret = BPF_PROG_RUN(prog, ctx);
+       rcu_read_unlock();
+       preempt_enable();
+
+       return ret;
+}
+
+static u32 bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *time)
+{
+       u64 time_start, time_spent = 0;
+       u32 ret = 0, i;
+
+       if (!repeat)
+               repeat = 1;
+       time_start = ktime_get_ns();
+       for (i = 0; i < repeat; i++) {
+               ret = bpf_test_run_one(prog, ctx);
+               if (need_resched()) {
+                       if (signal_pending(current))
+                               break;
+                       time_spent += ktime_get_ns() - time_start;
+                       cond_resched();
+                       time_start = ktime_get_ns();
+               }
+       }
+       time_spent += ktime_get_ns() - time_start;
+       do_div(time_spent, repeat);
+       *time = time_spent > U32_MAX ? U32_MAX : (u32)time_spent;
+
+       return ret;
+}
+
+static int bpf_test_finish(union bpf_attr __user *uattr, const void *data,
+                          u32 size, u32 retval, u32 duration)
+{
+       void __user *data_out = u64_to_user_ptr(uattr->test.data_out);
+       int err = -EFAULT;
+
+       if (data_out && copy_to_user(data_out, data, size))
+               goto out;
+       if (copy_to_user(&uattr->test.data_size_out, &size, sizeof(size)))
+               goto out;
+       if (copy_to_user(&uattr->test.retval, &retval, sizeof(retval)))
+               goto out;
+       if (copy_to_user(&uattr->test.duration, &duration, sizeof(duration)))
+               goto out;
+       err = 0;
+out:
+       return err;
+}
+
+static void *bpf_test_init(const union bpf_attr *kattr, u32 size,
+                          u32 headroom, u32 tailroom)
+{
+       void __user *data_in = u64_to_user_ptr(kattr->test.data_in);
+       void *data;
+
+       if (size < ETH_HLEN || size > PAGE_SIZE - headroom - tailroom)
+               return ERR_PTR(-EINVAL);
+
+       data = kzalloc(size + headroom + tailroom, GFP_USER);
+       if (!data)
+               return ERR_PTR(-ENOMEM);
+
+       if (copy_from_user(data + headroom, data_in, size)) {
+               kfree(data);
+               return ERR_PTR(-EFAULT);
+       }
+       return data;
+}
+
+int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
+                         union bpf_attr __user *uattr)
+{
+       bool is_l2 = false, is_direct_pkt_access = false;
+       u32 size = kattr->test.data_size_in;
+       u32 repeat = kattr->test.repeat;
+       u32 retval, duration;
+       struct sk_buff *skb;
+       void *data;
+       int ret;
+
+       data = bpf_test_init(kattr, size, NET_SKB_PAD,
+                            SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
+       if (IS_ERR(data))
+               return PTR_ERR(data);
+
+       switch (prog->type) {
+       case BPF_PROG_TYPE_SCHED_CLS:
+       case BPF_PROG_TYPE_SCHED_ACT:
+               is_l2 = true;
+               /* fall through */
+       case BPF_PROG_TYPE_LWT_IN:
+       case BPF_PROG_TYPE_LWT_OUT:
+       case BPF_PROG_TYPE_LWT_XMIT:
+               is_direct_pkt_access = true;
+               break;
+       default:
+               break;
+       }
+
+       skb = build_skb(data, 0);
+       if (!skb) {
+               kfree(data);
+               return -ENOMEM;
+       }
+
+       skb_reserve(skb, NET_SKB_PAD);
+       __skb_put(skb, size);
+       skb->protocol = eth_type_trans(skb, current->nsproxy->net_ns->loopback_dev);
+       skb_reset_network_header(skb);
+
+       if (is_l2)
+               __skb_push(skb, ETH_HLEN);
+       if (is_direct_pkt_access)
+               bpf_compute_data_end(skb);
+       retval = bpf_test_run(prog, skb, repeat, &duration);
+       if (!is_l2)
+               __skb_push(skb, ETH_HLEN);
+       size = skb->len;
+       /* bpf program can never convert linear skb to non-linear */
+       if (WARN_ON_ONCE(skb_is_nonlinear(skb)))
+               size = skb_headlen(skb);
+       ret = bpf_test_finish(uattr, skb->data, size, retval, duration);
+       kfree_skb(skb);
+       return ret;
+}
+
+int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
+                         union bpf_attr __user *uattr)
+{
+       u32 size = kattr->test.data_size_in;
+       u32 repeat = kattr->test.repeat;
+       struct xdp_buff xdp = {};
+       u32 retval, duration;
+       void *data;
+       int ret;
+
+       data = bpf_test_init(kattr, size, XDP_PACKET_HEADROOM, 0);
+       if (IS_ERR(data))
+               return PTR_ERR(data);
+
+       xdp.data_hard_start = data;
+       xdp.data = data + XDP_PACKET_HEADROOM;
+       xdp.data_end = xdp.data + size;
+
+       retval = bpf_test_run(prog, &xdp, repeat, &duration);
+       if (xdp.data != data + XDP_PACKET_HEADROOM)
+               size = xdp.data_end - xdp.data;
+       ret = bpf_test_finish(uattr, xdp.data, size, retval, duration);
+       kfree(data);
+       return ret;
+}
index 4f598dc2d9168cd323a3027d77d601854aa35f04..5a40a87c4f4fff9cdfa8e54ba54fe47587dc97b0 100644 (file)
@@ -106,7 +106,7 @@ static struct net_bridge_fdb_entry *br_fdb_find(struct net_bridge *br,
        struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)];
        struct net_bridge_fdb_entry *fdb;
 
-       WARN_ON_ONCE(!br_hash_lock_held(br));
+       lockdep_assert_held_once(&br->hash_lock);
 
        rcu_read_lock();
        fdb = fdb_find_rcu(head, addr, vid);
@@ -594,6 +594,9 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
                                fdb->updated = now;
                        if (unlikely(added_by_user))
                                fdb->added_by_user = 1;
+                       /* Take over HW learned entry */
+                       if (unlikely(fdb->added_by_external_learn))
+                               fdb->added_by_external_learn = 0;
                        if (unlikely(fdb_modified))
                                fdb_notify(br, fdb, RTM_NEWNEIGH);
                }
@@ -854,6 +857,8 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br,
                br_fdb_update(br, p, addr, vid, true);
                rcu_read_unlock();
                local_bh_enable();
+       } else if (ndm->ndm_flags & NTF_EXT_LEARNED) {
+               err = br_fdb_external_learn_add(br, p, addr, vid);
        } else {
                spin_lock_bh(&br->hash_lock);
                err = fdb_add_entry(br, p, addr, ndm->ndm_state,
index 8ac1770aa222f21f89027d303a218c49be9dc650..6eb52d422dd9c871dc4a54304fbc707ef68b90ba 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/if_ether.h>
 #include <linux/slab.h>
+#include <net/dsa.h>
 #include <net/sock.h>
 #include <linux/if_vlan.h>
 #include <net/switchdev.h>
index 1f1e62095464f99eaca8de49a772289f057bd943..067cf03134492a33f10982755105e103666cd1ef 100644 (file)
@@ -997,13 +997,10 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net,
        if (!elem)
                return okfn(net, sk, skb);
 
-       /* We may already have this, but read-locks nest anyway */
-       rcu_read_lock();
        nf_hook_state_init(&state, hook, NFPROTO_BRIDGE, indev, outdev,
                           sk, net, okfn);
 
        ret = nf_hook_slow(skb, &state, elem);
-       rcu_read_unlock();
        if (ret == 1)
                ret = okfn(net, sk, skb);
 
index 2288fca7756c5103fc4e8420ad61a2f9e633c097..61368186edea53841b1f00b37ddaa0d26461aee3 100644 (file)
@@ -531,15 +531,6 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
 int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
                              const unsigned char *addr, u16 vid);
 
-static inline bool br_hash_lock_held(struct net_bridge *br)
-{
-#ifdef CONFIG_LOCKDEP
-       return lockdep_is_held(&br->hash_lock);
-#else
-       return true;
-#endif
-}
-
 /* br_forward.c */
 enum br_pkt_type {
        BR_PKT_UNICAST,
index 98b9c8e8615ebc6e2ddefd1885a01af3ef58781b..707caea397433b66c887e527f0e4532eaf260880 100644 (file)
@@ -62,10 +62,10 @@ print_ports(const struct sk_buff *skb, uint8_t protocol, int offset)
                pptr = skb_header_pointer(skb, offset,
                                          sizeof(_ports), &_ports);
                if (pptr == NULL) {
-                       printk(" INCOMPLETE TCP/UDP header");
+                       pr_cont(" INCOMPLETE TCP/UDP header");
                        return;
                }
-               printk(" SPT=%u DPT=%u", ntohs(pptr->src), ntohs(pptr->dst));
+               pr_cont(" SPT=%u DPT=%u", ntohs(pptr->src), ntohs(pptr->dst));
        }
 }
 
@@ -100,11 +100,11 @@ ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum,
 
                ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
                if (ih == NULL) {
-                       printk(" INCOMPLETE IP header");
+                       pr_cont(" INCOMPLETE IP header");
                        goto out;
                }
-               printk(" IP SRC=%pI4 IP DST=%pI4, IP tos=0x%02X, IP proto=%d",
-                      &ih->saddr, &ih->daddr, ih->tos, ih->protocol);
+               pr_cont(" IP SRC=%pI4 IP DST=%pI4, IP tos=0x%02X, IP proto=%d",
+                       &ih->saddr, &ih->daddr, ih->tos, ih->protocol);
                print_ports(skb, ih->protocol, ih->ihl*4);
                goto out;
        }
@@ -120,11 +120,11 @@ ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum,
 
                ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
                if (ih == NULL) {
-                       printk(" INCOMPLETE IPv6 header");
+                       pr_cont(" INCOMPLETE IPv6 header");
                        goto out;
                }
-               printk(" IPv6 SRC=%pI6 IPv6 DST=%pI6, IPv6 priority=0x%01X, Next Header=%d",
-                      &ih->saddr, &ih->daddr, ih->priority, ih->nexthdr);
+               pr_cont(" IPv6 SRC=%pI6 IPv6 DST=%pI6, IPv6 priority=0x%01X, Next Header=%d",
+                       &ih->saddr, &ih->daddr, ih->priority, ih->nexthdr);
                nexthdr = ih->nexthdr;
                offset_ph = ipv6_skip_exthdr(skb, sizeof(_iph), &nexthdr, &frag_off);
                if (offset_ph == -1)
@@ -142,12 +142,12 @@ ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum,
 
                ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph);
                if (ah == NULL) {
-                       printk(" INCOMPLETE ARP header");
+                       pr_cont(" INCOMPLETE ARP header");
                        goto out;
                }
-               printk(" ARP HTYPE=%d, PTYPE=0x%04x, OPCODE=%d",
-                      ntohs(ah->ar_hrd), ntohs(ah->ar_pro),
-                      ntohs(ah->ar_op));
+               pr_cont(" ARP HTYPE=%d, PTYPE=0x%04x, OPCODE=%d",
+                       ntohs(ah->ar_hrd), ntohs(ah->ar_pro),
+                       ntohs(ah->ar_op));
 
                /* If it's for Ethernet and the lengths are OK,
                 * then log the ARP payload
@@ -161,17 +161,17 @@ ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum,
                        ap = skb_header_pointer(skb, sizeof(_arph),
                                                sizeof(_arpp), &_arpp);
                        if (ap == NULL) {
-                               printk(" INCOMPLETE ARP payload");
+                               pr_cont(" INCOMPLETE ARP payload");
                                goto out;
                        }
-                       printk(" ARP MAC SRC=%pM ARP IP SRC=%pI4 ARP MAC DST=%pM ARP IP DST=%pI4",
-                                       ap->mac_src, ap->ip_src, ap->mac_dst, ap->ip_dst);
+                       pr_cont(" ARP MAC SRC=%pM ARP IP SRC=%pI4 ARP MAC DST=%pM ARP IP DST=%pI4",
+                               ap->mac_src, ap->ip_src,
+                               ap->mac_dst, ap->ip_dst);
                }
        }
 out:
-       printk("\n");
+       pr_cont("\n");
        spin_unlock_bh(&ebt_log_lock);
-
 }
 
 static unsigned int
index 206dc266ecd237c2874d25352dd631e3bc31b002..346ef6b00b8f05b62edc911d06c01692624596d9 100644 (file)
@@ -375,11 +375,7 @@ static int nft_reject_bridge_init(const struct nft_ctx *ctx,
                                  const struct nlattr * const tb[])
 {
        struct nft_reject *priv = nft_expr_priv(expr);
-       int icmp_code, err;
-
-       err = nft_reject_bridge_validate(ctx, expr, NULL);
-       if (err < 0)
-               return err;
+       int icmp_code;
 
        if (tb[NFTA_REJECT_TYPE] == NULL)
                return -EINVAL;
index ea633342ab0d046cbc49e55b679440ef9e015c2d..4608aa245410ccdbcb3510c8e8c6dec2beac8a8d 100644 (file)
@@ -256,8 +256,12 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
                }
 
                spin_unlock_irqrestore(&queue->lock, cpu_flags);
-       } while (sk_can_busy_loop(sk) &&
-                sk_busy_loop(sk, flags & MSG_DONTWAIT));
+
+               if (!sk_can_busy_loop(sk))
+                       break;
+
+               sk_busy_loop(sk, flags & MSG_DONTWAIT);
+       } while (!skb_queue_empty(&sk->sk_receive_queue));
 
        error = -EAGAIN;
 
index 7869ae3837ca741e344b1731dc50d8408d8bcb6c..ef9fe60ee294b0e2503456f68136440646c86344 100644 (file)
@@ -5060,27 +5060,28 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock)
                do_softirq();
 }
 
-bool sk_busy_loop(struct sock *sk, int nonblock)
+void napi_busy_loop(unsigned int napi_id,
+                   bool (*loop_end)(void *, unsigned long),
+                   void *loop_end_arg)
 {
-       unsigned long end_time = !nonblock ? sk_busy_loop_end_time(sk) : 0;
+       unsigned long start_time = loop_end ? busy_loop_current_time() : 0;
        int (*napi_poll)(struct napi_struct *napi, int budget);
        void *have_poll_lock = NULL;
        struct napi_struct *napi;
-       int rc;
 
 restart:
-       rc = false;
        napi_poll = NULL;
 
        rcu_read_lock();
 
-       napi = napi_by_id(sk->sk_napi_id);
+       napi = napi_by_id(napi_id);
        if (!napi)
                goto out;
 
        preempt_disable();
        for (;;) {
-               rc = 0;
+               int work = 0;
+
                local_bh_disable();
                if (!napi_poll) {
                        unsigned long val = READ_ONCE(napi->state);
@@ -5098,16 +5099,15 @@ restart:
                        have_poll_lock = netpoll_poll_lock(napi);
                        napi_poll = napi->poll;
                }
-               rc = napi_poll(napi, BUSY_POLL_BUDGET);
-               trace_napi_poll(napi, rc, BUSY_POLL_BUDGET);
+               work = napi_poll(napi, BUSY_POLL_BUDGET);
+               trace_napi_poll(napi, work, BUSY_POLL_BUDGET);
 count:
-               if (rc > 0)
-                       __NET_ADD_STATS(sock_net(sk),
-                                       LINUX_MIB_BUSYPOLLRXPACKETS, rc);
+               if (work > 0)
+                       __NET_ADD_STATS(dev_net(napi->dev),
+                                       LINUX_MIB_BUSYPOLLRXPACKETS, work);
                local_bh_enable();
 
-               if (nonblock || !skb_queue_empty(&sk->sk_receive_queue) ||
-                   busy_loop_timeout(end_time))
+               if (!loop_end || loop_end(loop_end_arg, start_time))
                        break;
 
                if (unlikely(need_resched())) {
@@ -5116,9 +5116,8 @@ count:
                        preempt_enable();
                        rcu_read_unlock();
                        cond_resched();
-                       rc = !skb_queue_empty(&sk->sk_receive_queue);
-                       if (rc || busy_loop_timeout(end_time))
-                               return rc;
+                       if (loop_end(loop_end_arg, start_time))
+                               return;
                        goto restart;
                }
                cpu_relax();
@@ -5126,12 +5125,10 @@ count:
        if (napi_poll)
                busy_poll_stop(napi, have_poll_lock);
        preempt_enable();
-       rc = !skb_queue_empty(&sk->sk_receive_queue);
 out:
        rcu_read_unlock();
-       return rc;
 }
-EXPORT_SYMBOL(sk_busy_loop);
+EXPORT_SYMBOL(napi_busy_loop);
 
 #endif /* CONFIG_NET_RX_BUSY_POLL */
 
@@ -5143,10 +5140,10 @@ static void napi_hash_add(struct napi_struct *napi)
 
        spin_lock(&napi_hash_lock);
 
-       /* 0..NR_CPUS+1 range is reserved for sender_cpu use */
+       /* 0..NR_CPUS range is reserved for sender_cpu use */
        do {
-               if (unlikely(++napi_gen_id < NR_CPUS + 1))
-                       napi_gen_id = NR_CPUS + 1;
+               if (unlikely(++napi_gen_id < MIN_NAPI_ID))
+                       napi_gen_id = MIN_NAPI_ID;
        } while (napi_by_id(napi_gen_id));
        napi->napi_id = napi_gen_id;
 
index e9c1e6acfb6d196d4373dcc36bcf76577952dd32..24b766003a610368a4e4b23afadd49f651b5a2cc 100644 (file)
@@ -1493,8 +1493,686 @@ static int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb,
                if (err)
                        return err;
        }
+       return 0;
+}
+
+int devlink_dpipe_match_put(struct sk_buff *skb,
+                           struct devlink_dpipe_match *match)
+{
+       struct devlink_dpipe_header *header = match->header;
+       struct devlink_dpipe_field *field = &header->fields[match->field_id];
+       struct nlattr *match_attr;
+
+       match_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_MATCH);
+       if (!match_attr)
+               return -EMSGSIZE;
+
+       if (nla_put_u32(skb, DEVLINK_ATTR_DPIPE_MATCH_TYPE, match->type) ||
+           nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_INDEX, match->header_index) ||
+           nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_ID, header->id) ||
+           nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_ID, field->id) ||
+           nla_put_u8(skb, DEVLINK_ATTR_DPIPE_HEADER_GLOBAL, header->global))
+               goto nla_put_failure;
+
+       nla_nest_end(skb, match_attr);
+       return 0;
+
+nla_put_failure:
+       nla_nest_cancel(skb, match_attr);
+       return -EMSGSIZE;
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_match_put);
+
+static int devlink_dpipe_matches_put(struct devlink_dpipe_table *table,
+                                    struct sk_buff *skb)
+{
+       struct nlattr *matches_attr;
+
+       matches_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_TABLE_MATCHES);
+       if (!matches_attr)
+               return -EMSGSIZE;
+
+       if (table->table_ops->matches_dump(table->priv, skb))
+               goto nla_put_failure;
+
+       nla_nest_end(skb, matches_attr);
+       return 0;
+
+nla_put_failure:
+       nla_nest_cancel(skb, matches_attr);
+       return -EMSGSIZE;
+}
+
+int devlink_dpipe_action_put(struct sk_buff *skb,
+                            struct devlink_dpipe_action *action)
+{
+       struct devlink_dpipe_header *header = action->header;
+       struct devlink_dpipe_field *field = &header->fields[action->field_id];
+       struct nlattr *action_attr;
+
+       action_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_ACTION);
+       if (!action_attr)
+               return -EMSGSIZE;
+
+       if (nla_put_u32(skb, DEVLINK_ATTR_DPIPE_ACTION_TYPE, action->type) ||
+           nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_INDEX, action->header_index) ||
+           nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_ID, header->id) ||
+           nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_ID, field->id) ||
+           nla_put_u8(skb, DEVLINK_ATTR_DPIPE_HEADER_GLOBAL, header->global))
+               goto nla_put_failure;
+
+       nla_nest_end(skb, action_attr);
+       return 0;
+
+nla_put_failure:
+       nla_nest_cancel(skb, action_attr);
+       return -EMSGSIZE;
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_action_put);
+
+static int devlink_dpipe_actions_put(struct devlink_dpipe_table *table,
+                                    struct sk_buff *skb)
+{
+       struct nlattr *actions_attr;
+
+       actions_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_TABLE_ACTIONS);
+       if (!actions_attr)
+               return -EMSGSIZE;
+
+       if (table->table_ops->actions_dump(table->priv, skb))
+               goto nla_put_failure;
+
+       nla_nest_end(skb, actions_attr);
+       return 0;
+
+nla_put_failure:
+       nla_nest_cancel(skb, actions_attr);
+       return -EMSGSIZE;
+}
+
+static int devlink_dpipe_table_put(struct sk_buff *skb,
+                                  struct devlink_dpipe_table *table)
+{
+       struct nlattr *table_attr;
+
+       table_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_TABLE);
+       if (!table_attr)
+               return -EMSGSIZE;
+
+       if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_TABLE_NAME, table->name) ||
+           nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_SIZE, table->size,
+                             DEVLINK_ATTR_PAD))
+               goto nla_put_failure;
+       if (nla_put_u8(skb, DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED,
+                      table->counters_enabled))
+               goto nla_put_failure;
+
+       if (devlink_dpipe_matches_put(table, skb))
+               goto nla_put_failure;
+
+       if (devlink_dpipe_actions_put(table, skb))
+               goto nla_put_failure;
+
+       nla_nest_end(skb, table_attr);
+       return 0;
+
+nla_put_failure:
+       nla_nest_cancel(skb, table_attr);
+       return -EMSGSIZE;
+}
+
+static int devlink_dpipe_send_and_alloc_skb(struct sk_buff **pskb,
+                                           struct genl_info *info)
+{
+       int err;
+
+       if (*pskb) {
+               err = genlmsg_reply(*pskb, info);
+               if (err)
+                       return err;
+       }
+       *pskb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+       if (!*pskb)
+               return -ENOMEM;
+       return 0;
+}
+
+static int devlink_dpipe_tables_fill(struct genl_info *info,
+                                    enum devlink_command cmd, int flags,
+                                    struct list_head *dpipe_tables,
+                                    const char *table_name)
+{
+       struct devlink *devlink = info->user_ptr[0];
+       struct devlink_dpipe_table *table;
+       struct nlattr *tables_attr;
+       struct sk_buff *skb = NULL;
+       struct nlmsghdr *nlh;
+       bool incomplete;
+       void *hdr;
+       int i;
+       int err;
+
+       table = list_first_entry(dpipe_tables,
+                                struct devlink_dpipe_table, list);
+start_again:
+       err = devlink_dpipe_send_and_alloc_skb(&skb, info);
+       if (err)
+               return err;
+
+       hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
+                         &devlink_nl_family, NLM_F_MULTI, cmd);
+       if (!hdr)
+               return -EMSGSIZE;
+
+       if (devlink_nl_put_handle(skb, devlink))
+               goto nla_put_failure;
+       tables_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_TABLES);
+       if (!tables_attr)
+               goto nla_put_failure;
+
+       i = 0;
+       incomplete = false;
+       list_for_each_entry_from(table, dpipe_tables, list) {
+               if (!table_name) {
+                       err = devlink_dpipe_table_put(skb, table);
+                       if (err) {
+                               if (!i)
+                                       goto err_table_put;
+                               incomplete = true;
+                               break;
+                       }
+               } else {
+                       if (!strcmp(table->name, table_name)) {
+                               err = devlink_dpipe_table_put(skb, table);
+                               if (err)
+                                       break;
+                       }
+               }
+               i++;
+       }
+
+       nla_nest_end(skb, tables_attr);
+       genlmsg_end(skb, hdr);
+       if (incomplete)
+               goto start_again;
+
+send_done:
+       nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq,
+                       NLMSG_DONE, 0, flags | NLM_F_MULTI);
+       if (!nlh) {
+               err = devlink_dpipe_send_and_alloc_skb(&skb, info);
+               if (err)
+                       goto err_skb_send_alloc;
+               goto send_done;
+       }
+
+       return genlmsg_reply(skb, info);
+
+nla_put_failure:
+       err = -EMSGSIZE;
+err_table_put:
+err_skb_send_alloc:
+       genlmsg_cancel(skb, hdr);
+       nlmsg_free(skb);
+       return err;
+}
+
+static int devlink_nl_cmd_dpipe_table_get(struct sk_buff *skb,
+                                         struct genl_info *info)
+{
+       struct devlink *devlink = info->user_ptr[0];
+       const char *table_name =  NULL;
+
+       if (info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME])
+               table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]);
+
+       return devlink_dpipe_tables_fill(info, DEVLINK_CMD_DPIPE_TABLE_GET, 0,
+                                        &devlink->dpipe_table_list,
+                                        table_name);
+}
+
+static int devlink_dpipe_value_put(struct sk_buff *skb,
+                                  struct devlink_dpipe_value *value)
+{
+       if (nla_put(skb, DEVLINK_ATTR_DPIPE_VALUE,
+                   value->value_size, value->value))
+               return -EMSGSIZE;
+       if (value->mask)
+               if (nla_put(skb, DEVLINK_ATTR_DPIPE_VALUE_MASK,
+                           value->value_size, value->mask))
+                       return -EMSGSIZE;
+       if (value->mapping_valid)
+               if (nla_put_u32(skb, DEVLINK_ATTR_DPIPE_VALUE_MAPPING,
+                               value->mapping_value))
+                       return -EMSGSIZE;
+       return 0;
+}
+
+static int devlink_dpipe_action_value_put(struct sk_buff *skb,
+                                         struct devlink_dpipe_value *value)
+{
+       if (!value->action)
+               return -EINVAL;
+       if (devlink_dpipe_action_put(skb, value->action))
+               return -EMSGSIZE;
+       if (devlink_dpipe_value_put(skb, value))
+               return -EMSGSIZE;
+       return 0;
+}
+
+static int devlink_dpipe_action_values_put(struct sk_buff *skb,
+                                          struct devlink_dpipe_value *values,
+                                          unsigned int values_count)
+{
+       struct nlattr *action_attr;
+       int i;
+       int err;
+
+       for (i = 0; i < values_count; i++) {
+               action_attr = nla_nest_start(skb,
+                                            DEVLINK_ATTR_DPIPE_ACTION_VALUE);
+               if (!action_attr)
+                       return -EMSGSIZE;
+               err = devlink_dpipe_action_value_put(skb, &values[i]);
+               if (err)
+                       goto err_action_value_put;
+               nla_nest_end(skb, action_attr);
+       }
+       return 0;
+
+err_action_value_put:
+       nla_nest_cancel(skb, action_attr);
+       return err;
+}
+
+static int devlink_dpipe_match_value_put(struct sk_buff *skb,
+                                        struct devlink_dpipe_value *value)
+{
+       if (!value->match)
+               return -EINVAL;
+       if (devlink_dpipe_match_put(skb, value->match))
+               return -EMSGSIZE;
+       if (devlink_dpipe_value_put(skb, value))
+               return -EMSGSIZE;
+       return 0;
+}
+
+static int devlink_dpipe_match_values_put(struct sk_buff *skb,
+                                         struct devlink_dpipe_value *values,
+                                         unsigned int values_count)
+{
+       struct nlattr *match_attr;
+       int i;
+       int err;
+
+       for (i = 0; i < values_count; i++) {
+               match_attr = nla_nest_start(skb,
+                                           DEVLINK_ATTR_DPIPE_MATCH_VALUE);
+               if (!match_attr)
+                       return -EMSGSIZE;
+               err = devlink_dpipe_match_value_put(skb, &values[i]);
+               if (err)
+                       goto err_match_value_put;
+               nla_nest_end(skb, match_attr);
+       }
+       return 0;
+
+err_match_value_put:
+       nla_nest_cancel(skb, match_attr);
+       return err;
+}
+
+static int devlink_dpipe_entry_put(struct sk_buff *skb,
+                                  struct devlink_dpipe_entry *entry)
+{
+       struct nlattr *entry_attr, *matches_attr, *actions_attr;
+       int err;
+
+       entry_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_ENTRY);
+       if (!entry_attr)
+               return  -EMSGSIZE;
+
+       if (nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_ENTRY_INDEX, entry->index,
+                             DEVLINK_ATTR_PAD))
+               goto nla_put_failure;
+       if (entry->counter_valid)
+               if (nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_ENTRY_COUNTER,
+                                     entry->counter, DEVLINK_ATTR_PAD))
+                       goto nla_put_failure;
+
+       matches_attr = nla_nest_start(skb,
+                                     DEVLINK_ATTR_DPIPE_ENTRY_MATCH_VALUES);
+       if (!matches_attr)
+               goto nla_put_failure;
+
+       err = devlink_dpipe_match_values_put(skb, entry->match_values,
+                                            entry->match_values_count);
+       if (err) {
+               nla_nest_cancel(skb, matches_attr);
+               goto err_match_values_put;
+       }
+       nla_nest_end(skb, matches_attr);
+
+       actions_attr = nla_nest_start(skb,
+                                     DEVLINK_ATTR_DPIPE_ENTRY_ACTION_VALUES);
+       if (!actions_attr)
+               goto nla_put_failure;
+
+       err = devlink_dpipe_action_values_put(skb, entry->action_values,
+                                             entry->action_values_count);
+       if (err) {
+               nla_nest_cancel(skb, actions_attr);
+               goto err_action_values_put;
+       }
+       nla_nest_end(skb, actions_attr);
 
+       nla_nest_end(skb, entry_attr);
        return 0;
+
+nla_put_failure:
+       err = -EMSGSIZE;
+err_match_values_put:
+err_action_values_put:
+       nla_nest_cancel(skb, entry_attr);
+       return err;
+}
+
+static struct devlink_dpipe_table *
+devlink_dpipe_table_find(struct list_head *dpipe_tables,
+                        const char *table_name)
+{
+       struct devlink_dpipe_table *table;
+
+       list_for_each_entry_rcu(table, dpipe_tables, list) {
+               if (!strcmp(table->name, table_name))
+                       return table;
+       }
+       return NULL;
+}
+
+int devlink_dpipe_entry_ctx_prepare(struct devlink_dpipe_dump_ctx *dump_ctx)
+{
+       struct devlink *devlink;
+       int err;
+
+       err = devlink_dpipe_send_and_alloc_skb(&dump_ctx->skb,
+                                              dump_ctx->info);
+       if (err)
+               return err;
+
+       dump_ctx->hdr = genlmsg_put(dump_ctx->skb,
+                                   dump_ctx->info->snd_portid,
+                                   dump_ctx->info->snd_seq,
+                                   &devlink_nl_family, NLM_F_MULTI,
+                                   dump_ctx->cmd);
+       if (!dump_ctx->hdr)
+               goto nla_put_failure;
+
+       devlink = dump_ctx->info->user_ptr[0];
+       if (devlink_nl_put_handle(dump_ctx->skb, devlink))
+               goto nla_put_failure;
+       dump_ctx->nest = nla_nest_start(dump_ctx->skb,
+                                       DEVLINK_ATTR_DPIPE_ENTRIES);
+       if (!dump_ctx->nest)
+               goto nla_put_failure;
+       return 0;
+
+nla_put_failure:
+       genlmsg_cancel(dump_ctx->skb, dump_ctx->hdr);
+       nlmsg_free(dump_ctx->skb);
+       return -EMSGSIZE;
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_entry_ctx_prepare);
+
+int devlink_dpipe_entry_ctx_append(struct devlink_dpipe_dump_ctx *dump_ctx,
+                                  struct devlink_dpipe_entry *entry)
+{
+       return devlink_dpipe_entry_put(dump_ctx->skb, entry);
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_entry_ctx_append);
+
+int devlink_dpipe_entry_ctx_close(struct devlink_dpipe_dump_ctx *dump_ctx)
+{
+       nla_nest_end(dump_ctx->skb, dump_ctx->nest);
+       genlmsg_end(dump_ctx->skb, dump_ctx->hdr);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_entry_ctx_close);
+
+static int devlink_dpipe_entries_fill(struct genl_info *info,
+                                     enum devlink_command cmd, int flags,
+                                     struct devlink_dpipe_table *table)
+{
+       struct devlink_dpipe_dump_ctx dump_ctx;
+       struct nlmsghdr *nlh;
+       int err;
+
+       dump_ctx.skb = NULL;
+       dump_ctx.cmd = cmd;
+       dump_ctx.info = info;
+
+       err = table->table_ops->entries_dump(table->priv,
+                                            table->counters_enabled,
+                                            &dump_ctx);
+       if (err)
+               goto err_entries_dump;
+
+send_done:
+       nlh = nlmsg_put(dump_ctx.skb, info->snd_portid, info->snd_seq,
+                       NLMSG_DONE, 0, flags | NLM_F_MULTI);
+       if (!nlh) {
+               err = devlink_dpipe_send_and_alloc_skb(&dump_ctx.skb, info);
+               if (err)
+                       goto err_skb_send_alloc;
+               goto send_done;
+       }
+       return genlmsg_reply(dump_ctx.skb, info);
+
+err_entries_dump:
+err_skb_send_alloc:
+       genlmsg_cancel(dump_ctx.skb, dump_ctx.hdr);
+       nlmsg_free(dump_ctx.skb);
+       return err;
+}
+
+static int devlink_nl_cmd_dpipe_entries_get(struct sk_buff *skb,
+                                           struct genl_info *info)
+{
+       struct devlink *devlink = info->user_ptr[0];
+       struct devlink_dpipe_table *table;
+       const char *table_name;
+
+       if (!info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME])
+               return -EINVAL;
+
+       table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]);
+       table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
+                                        table_name);
+       if (!table)
+               return -EINVAL;
+
+       if (!table->table_ops->entries_dump)
+               return -EINVAL;
+
+       return devlink_dpipe_entries_fill(info, DEVLINK_CMD_DPIPE_ENTRIES_GET,
+                                         0, table);
+}
+
+static int devlink_dpipe_fields_put(struct sk_buff *skb,
+                                   const struct devlink_dpipe_header *header)
+{
+       struct devlink_dpipe_field *field;
+       struct nlattr *field_attr;
+       int i;
+
+       for (i = 0; i < header->fields_count; i++) {
+               field = &header->fields[i];
+               field_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_FIELD);
+               if (!field_attr)
+                       return -EMSGSIZE;
+               if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_FIELD_NAME, field->name) ||
+                   nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_ID, field->id) ||
+                   nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_BITWIDTH, field->bitwidth) ||
+                   nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_MAPPING_TYPE, field->mapping_type))
+                       goto nla_put_failure;
+               nla_nest_end(skb, field_attr);
+       }
+       return 0;
+
+nla_put_failure:
+       nla_nest_cancel(skb, field_attr);
+       return -EMSGSIZE;
+}
+
+static int devlink_dpipe_header_put(struct sk_buff *skb,
+                                   struct devlink_dpipe_header *header)
+{
+       struct nlattr *fields_attr, *header_attr;
+       int err;
+
+       header_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_HEADER);
+       if (!header)
+               return -EMSGSIZE;
+
+       if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_HEADER_NAME, header->name) ||
+           nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_ID, header->id) ||
+           nla_put_u8(skb, DEVLINK_ATTR_DPIPE_HEADER_GLOBAL, header->global))
+               goto nla_put_failure;
+
+       fields_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_HEADER_FIELDS);
+       if (!fields_attr)
+               goto nla_put_failure;
+
+       err = devlink_dpipe_fields_put(skb, header);
+       if (err) {
+               nla_nest_cancel(skb, fields_attr);
+               goto nla_put_failure;
+       }
+       nla_nest_end(skb, fields_attr);
+       nla_nest_end(skb, header_attr);
+       return 0;
+
+nla_put_failure:
+       err = -EMSGSIZE;
+       nla_nest_cancel(skb, header_attr);
+       return err;
+}
+
+static int devlink_dpipe_headers_fill(struct genl_info *info,
+                                     enum devlink_command cmd, int flags,
+                                     struct devlink_dpipe_headers *
+                                     dpipe_headers)
+{
+       struct devlink *devlink = info->user_ptr[0];
+       struct nlattr *headers_attr;
+       struct sk_buff *skb = NULL;
+       struct nlmsghdr *nlh;
+       void *hdr;
+       int i, j;
+       int err;
+
+       i = 0;
+start_again:
+       err = devlink_dpipe_send_and_alloc_skb(&skb, info);
+       if (err)
+               return err;
+
+       hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
+                         &devlink_nl_family, NLM_F_MULTI, cmd);
+       if (!hdr)
+               return -EMSGSIZE;
+
+       if (devlink_nl_put_handle(skb, devlink))
+               goto nla_put_failure;
+       headers_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_HEADERS);
+       if (!headers_attr)
+               goto nla_put_failure;
+
+       j = 0;
+       for (; i < dpipe_headers->headers_count; i++) {
+               err = devlink_dpipe_header_put(skb, dpipe_headers->headers[i]);
+               if (err) {
+                       if (!j)
+                               goto err_table_put;
+                       break;
+               }
+               j++;
+       }
+       nla_nest_end(skb, headers_attr);
+       genlmsg_end(skb, hdr);
+       if (i != dpipe_headers->headers_count)
+               goto start_again;
+
+send_done:
+       nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq,
+                       NLMSG_DONE, 0, flags | NLM_F_MULTI);
+       if (!nlh) {
+               err = devlink_dpipe_send_and_alloc_skb(&skb, info);
+               if (err)
+                       goto err_skb_send_alloc;
+               goto send_done;
+       }
+       return genlmsg_reply(skb, info);
+
+nla_put_failure:
+       err = -EMSGSIZE;
+err_table_put:
+err_skb_send_alloc:
+       genlmsg_cancel(skb, hdr);
+       nlmsg_free(skb);
+       return err;
+}
+
+static int devlink_nl_cmd_dpipe_headers_get(struct sk_buff *skb,
+                                           struct genl_info *info)
+{
+       struct devlink *devlink = info->user_ptr[0];
+
+       if (!devlink->dpipe_headers)
+               return -EOPNOTSUPP;
+       return devlink_dpipe_headers_fill(info, DEVLINK_CMD_DPIPE_HEADERS_GET,
+                                         0, devlink->dpipe_headers);
+}
+
+static int devlink_dpipe_table_counters_set(struct devlink *devlink,
+                                           const char *table_name,
+                                           bool enable)
+{
+       struct devlink_dpipe_table *table;
+
+       table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
+                                        table_name);
+       if (!table)
+               return -EINVAL;
+
+       if (table->counter_control_extern)
+               return -EOPNOTSUPP;
+
+       if (!(table->counters_enabled ^ enable))
+               return 0;
+
+       table->counters_enabled = enable;
+       if (table->table_ops->counters_set_update)
+               table->table_ops->counters_set_update(table->priv, enable);
+       return 0;
+}
+
+static int devlink_nl_cmd_dpipe_table_counters_set(struct sk_buff *skb,
+                                                  struct genl_info *info)
+{
+       struct devlink *devlink = info->user_ptr[0];
+       const char *table_name;
+       bool counters_enable;
+
+       if (!info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME] ||
+           !info->attrs[DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED])
+               return -EINVAL;
+
+       table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]);
+       counters_enable = !!nla_get_u8(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED]);
+
+       return devlink_dpipe_table_counters_set(devlink, table_name,
+                                               counters_enable);
 }
 
 static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
@@ -1512,6 +2190,8 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
        [DEVLINK_ATTR_SB_TC_INDEX] = { .type = NLA_U16 },
        [DEVLINK_ATTR_ESWITCH_MODE] = { .type = NLA_U16 },
        [DEVLINK_ATTR_ESWITCH_INLINE_MODE] = { .type = NLA_U8 },
+       [DEVLINK_ATTR_DPIPE_TABLE_NAME] = { .type = NLA_NUL_STRING },
+       [DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED] = { .type = NLA_U8 },
 };
 
 static const struct genl_ops devlink_nl_ops[] = {
@@ -1644,6 +2324,34 @@ static const struct genl_ops devlink_nl_ops[] = {
                .flags = GENL_ADMIN_PERM,
                .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
        },
+       {
+               .cmd = DEVLINK_CMD_DPIPE_TABLE_GET,
+               .doit = devlink_nl_cmd_dpipe_table_get,
+               .policy = devlink_nl_policy,
+               .flags = GENL_ADMIN_PERM,
+               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+       },
+       {
+               .cmd = DEVLINK_CMD_DPIPE_ENTRIES_GET,
+               .doit = devlink_nl_cmd_dpipe_entries_get,
+               .policy = devlink_nl_policy,
+               .flags = GENL_ADMIN_PERM,
+               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+       },
+       {
+               .cmd = DEVLINK_CMD_DPIPE_HEADERS_GET,
+               .doit = devlink_nl_cmd_dpipe_headers_get,
+               .policy = devlink_nl_policy,
+               .flags = GENL_ADMIN_PERM,
+               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+       },
+       {
+               .cmd = DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET,
+               .doit = devlink_nl_cmd_dpipe_table_counters_set,
+               .policy = devlink_nl_policy,
+               .flags = GENL_ADMIN_PERM,
+               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+       },
 };
 
 static struct genl_family devlink_nl_family __ro_after_init = {
@@ -1680,6 +2388,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
        devlink_net_set(devlink, &init_net);
        INIT_LIST_HEAD(&devlink->port_list);
        INIT_LIST_HEAD(&devlink->sb_list);
+       INIT_LIST_HEAD_RCU(&devlink->dpipe_table_list);
        return devlink;
 }
 EXPORT_SYMBOL_GPL(devlink_alloc);
@@ -1880,6 +2589,133 @@ void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index)
 }
 EXPORT_SYMBOL_GPL(devlink_sb_unregister);
 
+/**
+ *     devlink_dpipe_headers_register - register dpipe headers
+ *
+ *     @devlink: devlink
+ *     @dpipe_headers: dpipe header array
+ *
+ *     Register the headers supported by hardware.
+ */
+int devlink_dpipe_headers_register(struct devlink *devlink,
+                                  struct devlink_dpipe_headers *dpipe_headers)
+{
+       mutex_lock(&devlink_mutex);
+       devlink->dpipe_headers = dpipe_headers;
+       mutex_unlock(&devlink_mutex);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_headers_register);
+
+/**
+ *     devlink_dpipe_headers_unregister - unregister dpipe headers
+ *
+ *     @devlink: devlink
+ *
+ *     Unregister the headers supported by hardware.
+ */
+void devlink_dpipe_headers_unregister(struct devlink *devlink)
+{
+       mutex_lock(&devlink_mutex);
+       devlink->dpipe_headers = NULL;
+       mutex_unlock(&devlink_mutex);
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_headers_unregister);
+
+/**
+ *     devlink_dpipe_table_counter_enabled - check if counter allocation
+ *                                           required
+ *     @devlink: devlink
+ *     @table_name: tables name
+ *
+ *     Used by driver to check if counter allocation is required.
+ *     After counter allocation is turned on the table entries
+ *     are updated to include counter statistics.
+ *
+ *     After that point on the driver must respect the counter
+ *     state so that each entry added to the table is added
+ *     with a counter.
+ */
+bool devlink_dpipe_table_counter_enabled(struct devlink *devlink,
+                                        const char *table_name)
+{
+       struct devlink_dpipe_table *table;
+       bool enabled;
+
+       rcu_read_lock();
+       table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
+                                        table_name);
+       enabled = false;
+       if (table)
+               enabled = table->counters_enabled;
+       rcu_read_unlock();
+       return enabled;
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_table_counter_enabled);
+
+/**
+ *     devlink_dpipe_table_register - register dpipe table
+ *
+ *     @devlink: devlink
+ *     @table_name: table name
+ *     @table_ops: table ops
+ *     @priv: priv
+ *     @size: size
+ *     @counter_control_extern: external control for counters
+ */
+int devlink_dpipe_table_register(struct devlink *devlink,
+                                const char *table_name,
+                                struct devlink_dpipe_table_ops *table_ops,
+                                void *priv, u64 size,
+                                bool counter_control_extern)
+{
+       struct devlink_dpipe_table *table;
+
+       if (devlink_dpipe_table_find(&devlink->dpipe_table_list, table_name))
+               return -EEXIST;
+
+       table = kzalloc(sizeof(*table), GFP_KERNEL);
+       if (!table)
+               return -ENOMEM;
+
+       table->name = table_name;
+       table->table_ops = table_ops;
+       table->priv = priv;
+       table->size = size;
+       table->counter_control_extern = counter_control_extern;
+
+       mutex_lock(&devlink_mutex);
+       list_add_tail_rcu(&table->list, &devlink->dpipe_table_list);
+       mutex_unlock(&devlink_mutex);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_table_register);
+
+/**
+ *     devlink_dpipe_table_unregister - unregister dpipe table
+ *
+ *     @devlink: devlink
+ *     @table_name: table name
+ */
+void devlink_dpipe_table_unregister(struct devlink *devlink,
+                                   const char *table_name)
+{
+       struct devlink_dpipe_table *table;
+
+       mutex_lock(&devlink_mutex);
+       table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
+                                        table_name);
+       if (!table)
+               goto unlock;
+       list_del_rcu(&table->list);
+       mutex_unlock(&devlink_mutex);
+       kfree_rcu(table, rcu);
+       return;
+unlock:
+       mutex_unlock(&devlink_mutex);
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_table_unregister);
+
 static int __init devlink_module_init(void)
 {
        return genl_register_family(&devlink_nl_family);
index fb55327dcfeabdaf3eeecc3a8d176ae215612649..70ccda233bd1f1aab18535e6d9d0419bb9a1a23b 100644 (file)
@@ -412,9 +412,8 @@ static int __init init_net_drop_monitor(void)
        for_each_possible_cpu(cpu) {
                data = &per_cpu(dm_cpu_data, cpu);
                INIT_WORK(&data->dm_alert_work, send_dm_alert);
-               init_timer(&data->send_timer);
-               data->send_timer.data = (unsigned long)data;
-               data->send_timer.function = sched_send_work;
+               setup_timer(&data->send_timer, sched_send_work,
+                           (unsigned long)data);
                spin_lock_init(&data->lock);
                reset_per_cpu_data(data);
        }
index aecb2c7241b697e79628fdb79467f5087b2bbf9f..905a88ad28e096d57289eba7f966629336382032 100644 (file)
@@ -109,6 +109,7 @@ static const char
 rss_hash_func_strings[ETH_RSS_HASH_FUNCS_COUNT][ETH_GSTRING_LEN] = {
        [ETH_RSS_HASH_TOP_BIT] =        "toeplitz",
        [ETH_RSS_HASH_XOR_BIT] =        "xor",
+       [ETH_RSS_HASH_CRC32_BIT] =      "crc32",
 };
 
 static const char
index b6791d94841d56cf8b1027d3ba2d71dd21302caf..816e3ccb0ec9ffc41442300f64cc63c97d1af879 100644 (file)
@@ -23,6 +23,20 @@ static const struct fib_kuid_range fib_kuid_range_unset = {
        KUIDT_INIT(~0),
 };
 
+bool fib_rule_matchall(const struct fib_rule *rule)
+{
+       if (rule->iifindex || rule->oifindex || rule->mark || rule->tun_id ||
+           rule->flags)
+               return false;
+       if (rule->suppress_ifgroup != -1 || rule->suppress_prefixlen != -1)
+               return false;
+       if (!uid_eq(rule->uid_range.start, fib_kuid_range_unset.start) ||
+           !uid_eq(rule->uid_range.end, fib_kuid_range_unset.end))
+               return false;
+       return true;
+}
+EXPORT_SYMBOL_GPL(fib_rule_matchall);
+
 int fib_default_rule_add(struct fib_rules_ops *ops,
                         u32 pref, u32 table, u32 flags)
 {
index ebaeaf2e46e8bd0171379604930d232f205afd07..15e9a81ffebe61e3cdb4eea9f14473288f7a3167 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/mm.h>
 #include <linux/fcntl.h>
 #include <linux/socket.h>
+#include <linux/sock_diag.h>
 #include <linux/in.h>
 #include <linux/inet.h>
 #include <linux/netdevice.h>
@@ -928,7 +929,7 @@ static void sk_filter_release_rcu(struct rcu_head *rcu)
  */
 static void sk_filter_release(struct sk_filter *fp)
 {
-       if (atomic_dec_and_test(&fp->refcnt))
+       if (refcount_dec_and_test(&fp->refcnt))
                call_rcu(&fp->rcu, sk_filter_release_rcu);
 }
 
@@ -943,20 +944,27 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
 /* try to charge the socket memory if there is space available
  * return true on success
  */
-bool sk_filter_charge(struct sock *sk, struct sk_filter *fp)
+static bool __sk_filter_charge(struct sock *sk, struct sk_filter *fp)
 {
        u32 filter_size = bpf_prog_size(fp->prog->len);
 
        /* same check as in sock_kmalloc() */
        if (filter_size <= sysctl_optmem_max &&
            atomic_read(&sk->sk_omem_alloc) + filter_size < sysctl_optmem_max) {
-               atomic_inc(&fp->refcnt);
                atomic_add(filter_size, &sk->sk_omem_alloc);
                return true;
        }
        return false;
 }
 
+bool sk_filter_charge(struct sock *sk, struct sk_filter *fp)
+{
+       bool ret = __sk_filter_charge(sk, fp);
+       if (ret)
+               refcount_inc(&fp->refcnt);
+       return ret;
+}
+
 static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
 {
        struct sock_filter *old_prog;
@@ -1179,12 +1187,12 @@ static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk)
                return -ENOMEM;
 
        fp->prog = prog;
-       atomic_set(&fp->refcnt, 0);
 
-       if (!sk_filter_charge(sk, fp)) {
+       if (!__sk_filter_charge(sk, fp)) {
                kfree(fp);
                return -ENOMEM;
        }
+       refcount_set(&fp->refcnt, 1);
 
        old_fp = rcu_dereference_protected(sk->sk_filter,
                                           lockdep_sock_is_held(sk));
@@ -2599,6 +2607,36 @@ static const struct bpf_func_proto bpf_xdp_event_output_proto = {
        .arg5_type      = ARG_CONST_SIZE,
 };
 
+BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb)
+{
+       return skb->sk ? sock_gen_cookie(skb->sk) : 0;
+}
+
+static const struct bpf_func_proto bpf_get_socket_cookie_proto = {
+       .func           = bpf_get_socket_cookie,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+};
+
+BPF_CALL_1(bpf_get_socket_uid, struct sk_buff *, skb)
+{
+       struct sock *sk = sk_to_full_sk(skb->sk);
+       kuid_t kuid;
+
+       if (!sk || !sk_fullsock(sk))
+               return overflowuid;
+       kuid = sock_net_uid(sock_net(sk), sk);
+       return from_kuid_munged(sock_net(sk)->user_ns, kuid);
+}
+
+static const struct bpf_func_proto bpf_get_socket_uid_proto = {
+       .func           = bpf_get_socket_uid,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+};
+
 static const struct bpf_func_proto *
 bpf_base_func_proto(enum bpf_func_id func_id)
 {
@@ -2633,6 +2671,10 @@ sk_filter_func_proto(enum bpf_func_id func_id)
        switch (func_id) {
        case BPF_FUNC_skb_load_bytes:
                return &bpf_skb_load_bytes_proto;
+       case BPF_FUNC_get_socket_cookie:
+               return &bpf_get_socket_cookie_proto;
+       case BPF_FUNC_get_socket_uid:
+               return &bpf_get_socket_uid_proto;
        default:
                return bpf_base_func_proto(func_id);
        }
@@ -2692,6 +2734,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
                return &bpf_get_smp_processor_id_proto;
        case BPF_FUNC_skb_under_cgroup:
                return &bpf_skb_under_cgroup_proto;
+       case BPF_FUNC_get_socket_cookie:
+               return &bpf_get_socket_cookie_proto;
+       case BPF_FUNC_get_socket_uid:
+               return &bpf_get_socket_uid_proto;
        default:
                return bpf_base_func_proto(func_id);
        }
@@ -3263,24 +3309,28 @@ static const struct bpf_verifier_ops tc_cls_act_ops = {
        .is_valid_access        = tc_cls_act_is_valid_access,
        .convert_ctx_access     = tc_cls_act_convert_ctx_access,
        .gen_prologue           = tc_cls_act_prologue,
+       .test_run               = bpf_prog_test_run_skb,
 };
 
 static const struct bpf_verifier_ops xdp_ops = {
        .get_func_proto         = xdp_func_proto,
        .is_valid_access        = xdp_is_valid_access,
        .convert_ctx_access     = xdp_convert_ctx_access,
+       .test_run               = bpf_prog_test_run_xdp,
 };
 
 static const struct bpf_verifier_ops cg_skb_ops = {
        .get_func_proto         = cg_skb_func_proto,
        .is_valid_access        = sk_filter_is_valid_access,
        .convert_ctx_access     = bpf_convert_ctx_access,
+       .test_run               = bpf_prog_test_run_skb,
 };
 
 static const struct bpf_verifier_ops lwt_inout_ops = {
        .get_func_proto         = lwt_inout_func_proto,
        .is_valid_access        = lwt_is_valid_access,
        .convert_ctx_access     = bpf_convert_ctx_access,
+       .test_run               = bpf_prog_test_run_skb,
 };
 
 static const struct bpf_verifier_ops lwt_xmit_ops = {
@@ -3288,6 +3338,7 @@ static const struct bpf_verifier_ops lwt_xmit_ops = {
        .is_valid_access        = lwt_is_valid_access,
        .convert_ctx_access     = bpf_convert_ctx_access,
        .gen_prologue           = tc_cls_act_prologue,
+       .test_run               = bpf_prog_test_run_skb,
 };
 
 static const struct bpf_verifier_ops cg_sock_ops = {
index f765c11d8df567d704998185482c3d220280c148..f7f5d1932a2720767dd31f4033f196815ff08447 100644 (file)
@@ -47,7 +47,7 @@ struct flow_flush_info {
 
 static struct kmem_cache *flow_cachep __read_mostly;
 
-#define flow_cache_hash_size(cache)    (1 << (cache)->hash_shift)
+#define flow_cache_hash_size(cache)    (1U << (cache)->hash_shift)
 #define FLOW_HASH_RND_PERIOD           (10 * 60 * HZ)
 
 static void flow_cache_new_hashrnd(unsigned long arg)
@@ -99,7 +99,8 @@ static void flow_cache_gc_task(struct work_struct *work)
 }
 
 static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp,
-                                    int deleted, struct list_head *gc_list,
+                                    unsigned int deleted,
+                                    struct list_head *gc_list,
                                     struct netns_xfrm *xfrm)
 {
        if (deleted) {
@@ -114,17 +115,18 @@ static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp,
 
 static void __flow_cache_shrink(struct flow_cache *fc,
                                struct flow_cache_percpu *fcp,
-                               int shrink_to)
+                               unsigned int shrink_to)
 {
        struct flow_cache_entry *fle;
        struct hlist_node *tmp;
        LIST_HEAD(gc_list);
-       int i, deleted = 0;
+       unsigned int deleted = 0;
        struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm,
                                                flow_cache_global);
+       unsigned int i;
 
        for (i = 0; i < flow_cache_hash_size(fc); i++) {
-               int saved = 0;
+               unsigned int saved = 0;
 
                hlist_for_each_entry_safe(fle, tmp,
                                          &fcp->hash_table[i], u.hlist) {
@@ -145,7 +147,7 @@ static void __flow_cache_shrink(struct flow_cache *fc,
 static void flow_cache_shrink(struct flow_cache *fc,
                              struct flow_cache_percpu *fcp)
 {
-       int shrink_to = fc->low_watermark / flow_cache_hash_size(fc);
+       unsigned int shrink_to = fc->low_watermark / flow_cache_hash_size(fc);
 
        __flow_cache_shrink(fc, fcp, shrink_to);
 }
@@ -161,7 +163,7 @@ static void flow_new_hash_rnd(struct flow_cache *fc,
 static u32 flow_hash_code(struct flow_cache *fc,
                          struct flow_cache_percpu *fcp,
                          const struct flowi *key,
-                         size_t keysize)
+                         unsigned int keysize)
 {
        const u32 *k = (const u32 *) key;
        const u32 length = keysize * sizeof(flow_compare_t) / sizeof(u32);
@@ -174,7 +176,7 @@ static u32 flow_hash_code(struct flow_cache *fc,
  * important assumptions that we can here, such as alignment.
  */
 static int flow_key_compare(const struct flowi *key1, const struct flowi *key2,
-                           size_t keysize)
+                           unsigned int keysize)
 {
        const flow_compare_t *k1, *k1_lim, *k2;
 
@@ -199,7 +201,7 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
        struct flow_cache_percpu *fcp;
        struct flow_cache_entry *fle, *tfle;
        struct flow_cache_object *flo;
-       size_t keysize;
+       unsigned int keysize;
        unsigned int hash;
 
        local_bh_disable();
@@ -295,9 +297,10 @@ static void flow_cache_flush_tasklet(unsigned long data)
        struct flow_cache_entry *fle;
        struct hlist_node *tmp;
        LIST_HEAD(gc_list);
-       int i, deleted = 0;
+       unsigned int deleted = 0;
        struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm,
                                                flow_cache_global);
+       unsigned int i;
 
        fcp = this_cpu_ptr(fc->percpu);
        for (i = 0; i < flow_cache_hash_size(fc); i++) {
@@ -327,7 +330,7 @@ static void flow_cache_flush_tasklet(unsigned long data)
 static int flow_cache_percpu_empty(struct flow_cache *fc, int cpu)
 {
        struct flow_cache_percpu *fcp;
-       int i;
+       unsigned int i;
 
        fcp = per_cpu_ptr(fc->percpu, cpu);
        for (i = 0; i < flow_cache_hash_size(fc); i++)
@@ -402,12 +405,12 @@ void flow_cache_flush_deferred(struct net *net)
 static int flow_cache_cpu_prepare(struct flow_cache *fc, int cpu)
 {
        struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
-       size_t sz = sizeof(struct hlist_head) * flow_cache_hash_size(fc);
+       unsigned int sz = sizeof(struct hlist_head) * flow_cache_hash_size(fc);
 
        if (!fcp->hash_table) {
                fcp->hash_table = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu));
                if (!fcp->hash_table) {
-                       pr_err("NET: failed to allocate flow cache sz %zu\n", sz);
+                       pr_err("NET: failed to allocate flow cache sz %u\n", sz);
                        return -ENOMEM;
                }
                fcp->hash_rnd_recalc = 1;
index c35aae13c8d22680cb07222cbd9f1ee976f0bd64..5f3ae922fcd1d31580e1c3735d9b139d40212090 100644 (file)
@@ -113,6 +113,216 @@ __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
 }
 EXPORT_SYMBOL(__skb_flow_get_ports);
 
+enum flow_dissect_ret {
+       FLOW_DISSECT_RET_OUT_GOOD,
+       FLOW_DISSECT_RET_OUT_BAD,
+       FLOW_DISSECT_RET_OUT_PROTO_AGAIN,
+};
+
+static enum flow_dissect_ret
+__skb_flow_dissect_mpls(const struct sk_buff *skb,
+                       struct flow_dissector *flow_dissector,
+                       void *target_container, void *data, int nhoff, int hlen)
+{
+       struct flow_dissector_key_keyid *key_keyid;
+       struct mpls_label *hdr, _hdr[2];
+
+       if (!dissector_uses_key(flow_dissector,
+                               FLOW_DISSECTOR_KEY_MPLS_ENTROPY))
+               return FLOW_DISSECT_RET_OUT_GOOD;
+
+       hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data,
+                                  hlen, &_hdr);
+       if (!hdr)
+               return FLOW_DISSECT_RET_OUT_BAD;
+
+       if ((ntohl(hdr[0].entry) & MPLS_LS_LABEL_MASK) >>
+           MPLS_LS_LABEL_SHIFT == MPLS_LABEL_ENTROPY) {
+               key_keyid = skb_flow_dissector_target(flow_dissector,
+                                                     FLOW_DISSECTOR_KEY_MPLS_ENTROPY,
+                                                     target_container);
+               key_keyid->keyid = hdr[1].entry & htonl(MPLS_LS_LABEL_MASK);
+       }
+       return FLOW_DISSECT_RET_OUT_GOOD;
+}
+
+static enum flow_dissect_ret
+__skb_flow_dissect_arp(const struct sk_buff *skb,
+                      struct flow_dissector *flow_dissector,
+                      void *target_container, void *data, int nhoff, int hlen)
+{
+       struct flow_dissector_key_arp *key_arp;
+       struct {
+               unsigned char ar_sha[ETH_ALEN];
+               unsigned char ar_sip[4];
+               unsigned char ar_tha[ETH_ALEN];
+               unsigned char ar_tip[4];
+       } *arp_eth, _arp_eth;
+       const struct arphdr *arp;
+       struct arphdr *_arp;
+
+       if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ARP))
+               return FLOW_DISSECT_RET_OUT_GOOD;
+
+       arp = __skb_header_pointer(skb, nhoff, sizeof(_arp), data,
+                                  hlen, &_arp);
+       if (!arp)
+               return FLOW_DISSECT_RET_OUT_BAD;
+
+       if (arp->ar_hrd != htons(ARPHRD_ETHER) ||
+           arp->ar_pro != htons(ETH_P_IP) ||
+           arp->ar_hln != ETH_ALEN ||
+           arp->ar_pln != 4 ||
+           (arp->ar_op != htons(ARPOP_REPLY) &&
+            arp->ar_op != htons(ARPOP_REQUEST)))
+               return FLOW_DISSECT_RET_OUT_BAD;
+
+       arp_eth = __skb_header_pointer(skb, nhoff + sizeof(_arp),
+                                      sizeof(_arp_eth), data,
+                                      hlen, &_arp_eth);
+       if (!arp_eth)
+               return FLOW_DISSECT_RET_OUT_BAD;
+
+       key_arp = skb_flow_dissector_target(flow_dissector,
+                                           FLOW_DISSECTOR_KEY_ARP,
+                                           target_container);
+
+       memcpy(&key_arp->sip, arp_eth->ar_sip, sizeof(key_arp->sip));
+       memcpy(&key_arp->tip, arp_eth->ar_tip, sizeof(key_arp->tip));
+
+       /* Only store the lower byte of the opcode;
+        * this covers ARPOP_REPLY and ARPOP_REQUEST.
+        */
+       key_arp->op = ntohs(arp->ar_op) & 0xff;
+
+       ether_addr_copy(key_arp->sha, arp_eth->ar_sha);
+       ether_addr_copy(key_arp->tha, arp_eth->ar_tha);
+
+       return FLOW_DISSECT_RET_OUT_GOOD;
+}
+
+static enum flow_dissect_ret
+__skb_flow_dissect_gre(const struct sk_buff *skb,
+                      struct flow_dissector_key_control *key_control,
+                      struct flow_dissector *flow_dissector,
+                      void *target_container, void *data,
+                      __be16 *p_proto, int *p_nhoff, int *p_hlen,
+                      unsigned int flags)
+{
+       struct flow_dissector_key_keyid *key_keyid;
+       struct gre_base_hdr *hdr, _hdr;
+       int offset = 0;
+       u16 gre_ver;
+
+       hdr = __skb_header_pointer(skb, *p_nhoff, sizeof(_hdr),
+                                  data, *p_hlen, &_hdr);
+       if (!hdr)
+               return FLOW_DISSECT_RET_OUT_BAD;
+
+       /* Only look inside GRE without routing */
+       if (hdr->flags & GRE_ROUTING)
+               return FLOW_DISSECT_RET_OUT_GOOD;
+
+       /* Only look inside GRE for version 0 and 1 */
+       gre_ver = ntohs(hdr->flags & GRE_VERSION);
+       if (gre_ver > 1)
+               return FLOW_DISSECT_RET_OUT_GOOD;
+
+       *p_proto = hdr->protocol;
+       if (gre_ver) {
+               /* Version1 must be PPTP, and check the flags */
+               if (!(*p_proto == GRE_PROTO_PPP && (hdr->flags & GRE_KEY)))
+                       return FLOW_DISSECT_RET_OUT_GOOD;
+       }
+
+       offset += sizeof(struct gre_base_hdr);
+
+       if (hdr->flags & GRE_CSUM)
+               offset += sizeof(((struct gre_full_hdr *) 0)->csum) +
+                         sizeof(((struct gre_full_hdr *) 0)->reserved1);
+
+       if (hdr->flags & GRE_KEY) {
+               const __be32 *keyid;
+               __be32 _keyid;
+
+               keyid = __skb_header_pointer(skb, *p_nhoff + offset,
+                                            sizeof(_keyid),
+                                            data, *p_hlen, &_keyid);
+               if (!keyid)
+                       return FLOW_DISSECT_RET_OUT_BAD;
+
+               if (dissector_uses_key(flow_dissector,
+                                      FLOW_DISSECTOR_KEY_GRE_KEYID)) {
+                       key_keyid = skb_flow_dissector_target(flow_dissector,
+                                                             FLOW_DISSECTOR_KEY_GRE_KEYID,
+                                                             target_container);
+                       if (gre_ver == 0)
+                               key_keyid->keyid = *keyid;
+                       else
+                               key_keyid->keyid = *keyid & GRE_PPTP_KEY_MASK;
+               }
+               offset += sizeof(((struct gre_full_hdr *) 0)->key);
+       }
+
+       if (hdr->flags & GRE_SEQ)
+               offset += sizeof(((struct pptp_gre_header *) 0)->seq);
+
+       if (gre_ver == 0) {
+               if (*p_proto == htons(ETH_P_TEB)) {
+                       const struct ethhdr *eth;
+                       struct ethhdr _eth;
+
+                       eth = __skb_header_pointer(skb, *p_nhoff + offset,
+                                                  sizeof(_eth),
+                                                  data, *p_hlen, &_eth);
+                       if (!eth)
+                               return FLOW_DISSECT_RET_OUT_BAD;
+                       *p_proto = eth->h_proto;
+                       offset += sizeof(*eth);
+
+                       /* Cap headers that we access via pointers at the
+                        * end of the Ethernet header as our maximum alignment
+                        * at that point is only 2 bytes.
+                        */
+                       if (NET_IP_ALIGN)
+                               *p_hlen = *p_nhoff + offset;
+               }
+       } else { /* version 1, must be PPTP */
+               u8 _ppp_hdr[PPP_HDRLEN];
+               u8 *ppp_hdr;
+
+               if (hdr->flags & GRE_ACK)
+                       offset += sizeof(((struct pptp_gre_header *) 0)->ack);
+
+               ppp_hdr = __skb_header_pointer(skb, *p_nhoff + offset,
+                                              sizeof(_ppp_hdr),
+                                              data, *p_hlen, _ppp_hdr);
+               if (!ppp_hdr)
+                       return FLOW_DISSECT_RET_OUT_BAD;
+
+               switch (PPP_PROTOCOL(ppp_hdr)) {
+               case PPP_IP:
+                       *p_proto = htons(ETH_P_IP);
+                       break;
+               case PPP_IPV6:
+                       *p_proto = htons(ETH_P_IPV6);
+                       break;
+               default:
+                       /* Could probably catch some more like MPLS */
+                       break;
+               }
+
+               offset += PPP_HDRLEN;
+       }
+
+       *p_nhoff += offset;
+       key_control->flags |= FLOW_DIS_ENCAPSULATION;
+       if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP)
+               return FLOW_DISSECT_RET_OUT_GOOD;
+
+       return FLOW_DISSECT_RET_OUT_PROTO_AGAIN;
+}
+
 /**
  * __skb_flow_dissect - extract the flow_keys struct and return it
  * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified
@@ -138,12 +348,10 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
        struct flow_dissector_key_control *key_control;
        struct flow_dissector_key_basic *key_basic;
        struct flow_dissector_key_addrs *key_addrs;
-       struct flow_dissector_key_arp *key_arp;
        struct flow_dissector_key_ports *key_ports;
        struct flow_dissector_key_icmp *key_icmp;
        struct flow_dissector_key_tags *key_tags;
        struct flow_dissector_key_vlan *key_vlan;
-       struct flow_dissector_key_keyid *key_keyid;
        bool skip_vlan = false;
        u8 ip_proto = 0;
        bool ret;
@@ -181,7 +389,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
                memcpy(key_eth_addrs, &eth->h_dest, sizeof(*key_eth_addrs));
        }
 
-again:
+proto_again:
        switch (proto) {
        case htons(ETH_P_IP): {
                const struct iphdr *iph;
@@ -284,7 +492,7 @@ ipv6:
                        proto = vlan->h_vlan_encapsulated_proto;
                        nhoff += sizeof(*vlan);
                        if (skip_vlan)
-                               goto again;
+                               goto proto_again;
                }
 
                skip_vlan = true;
@@ -307,7 +515,7 @@ ipv6:
                        }
                }
 
-               goto again;
+               goto proto_again;
        }
        case htons(ETH_P_PPP_SES): {
                struct {
@@ -349,31 +557,17 @@ ipv6:
        }
 
        case htons(ETH_P_MPLS_UC):
-       case htons(ETH_P_MPLS_MC): {
-               struct mpls_label *hdr, _hdr[2];
+       case htons(ETH_P_MPLS_MC):
 mpls:
-               hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data,
-                                          hlen, &_hdr);
-               if (!hdr)
-                       goto out_bad;
-
-               if ((ntohl(hdr[0].entry) & MPLS_LS_LABEL_MASK) >>
-                    MPLS_LS_LABEL_SHIFT == MPLS_LABEL_ENTROPY) {
-                       if (dissector_uses_key(flow_dissector,
-                                              FLOW_DISSECTOR_KEY_MPLS_ENTROPY)) {
-                               key_keyid = skb_flow_dissector_target(flow_dissector,
-                                                                     FLOW_DISSECTOR_KEY_MPLS_ENTROPY,
-                                                                     target_container);
-                               key_keyid->keyid = hdr[1].entry &
-                                       htonl(MPLS_LS_LABEL_MASK);
-                       }
-
+               switch (__skb_flow_dissect_mpls(skb, flow_dissector,
+                                               target_container, data,
+                                               nhoff, hlen)) {
+               case FLOW_DISSECT_RET_OUT_GOOD:
                        goto out_good;
+               case FLOW_DISSECT_RET_OUT_BAD:
+               default:
+                       goto out_bad;
                }
-
-               goto out_good;
-       }
-
        case htons(ETH_P_FCOE):
                if ((hlen - nhoff) < FCOE_HEADER_LEN)
                        goto out_bad;
@@ -382,177 +576,33 @@ mpls:
                goto out_good;
 
        case htons(ETH_P_ARP):
-       case htons(ETH_P_RARP): {
-               struct {
-                       unsigned char ar_sha[ETH_ALEN];
-                       unsigned char ar_sip[4];
-                       unsigned char ar_tha[ETH_ALEN];
-                       unsigned char ar_tip[4];
-               } *arp_eth, _arp_eth;
-               const struct arphdr *arp;
-               struct arphdr *_arp;
-
-               arp = __skb_header_pointer(skb, nhoff, sizeof(_arp), data,
-                                          hlen, &_arp);
-               if (!arp)
-                       goto out_bad;
-
-               if (arp->ar_hrd != htons(ARPHRD_ETHER) ||
-                   arp->ar_pro != htons(ETH_P_IP) ||
-                   arp->ar_hln != ETH_ALEN ||
-                   arp->ar_pln != 4 ||
-                   (arp->ar_op != htons(ARPOP_REPLY) &&
-                    arp->ar_op != htons(ARPOP_REQUEST)))
-                       goto out_bad;
-
-               arp_eth = __skb_header_pointer(skb, nhoff + sizeof(_arp),
-                                              sizeof(_arp_eth), data,
-                                              hlen,
-                                              &_arp_eth);
-               if (!arp_eth)
+       case htons(ETH_P_RARP):
+               switch (__skb_flow_dissect_arp(skb, flow_dissector,
+                                              target_container, data,
+                                              nhoff, hlen)) {
+               case FLOW_DISSECT_RET_OUT_GOOD:
+                       goto out_good;
+               case FLOW_DISSECT_RET_OUT_BAD:
+               default:
                        goto out_bad;
-
-               if (dissector_uses_key(flow_dissector,
-                                      FLOW_DISSECTOR_KEY_ARP)) {
-
-                       key_arp = skb_flow_dissector_target(flow_dissector,
-                                                           FLOW_DISSECTOR_KEY_ARP,
-                                                           target_container);
-
-                       memcpy(&key_arp->sip, arp_eth->ar_sip,
-                              sizeof(key_arp->sip));
-                       memcpy(&key_arp->tip, arp_eth->ar_tip,
-                              sizeof(key_arp->tip));
-
-                       /* Only store the lower byte of the opcode;
-                        * this covers ARPOP_REPLY and ARPOP_REQUEST.
-                        */
-                       key_arp->op = ntohs(arp->ar_op) & 0xff;
-
-                       ether_addr_copy(key_arp->sha, arp_eth->ar_sha);
-                       ether_addr_copy(key_arp->tha, arp_eth->ar_tha);
                }
-
-               goto out_good;
-       }
-
        default:
                goto out_bad;
        }
 
 ip_proto_again:
        switch (ip_proto) {
-       case IPPROTO_GRE: {
-               struct gre_base_hdr *hdr, _hdr;
-               u16 gre_ver;
-               int offset = 0;
-
-               hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
-               if (!hdr)
+       case IPPROTO_GRE:
+               switch (__skb_flow_dissect_gre(skb, key_control, flow_dissector,
+                                              target_container, data,
+                                              &proto, &nhoff, &hlen, flags)) {
+               case FLOW_DISSECT_RET_OUT_GOOD:
+                       goto out_good;
+               case FLOW_DISSECT_RET_OUT_BAD:
                        goto out_bad;
-
-               /* Only look inside GRE without routing */
-               if (hdr->flags & GRE_ROUTING)
-                       break;
-
-               /* Only look inside GRE for version 0 and 1 */
-               gre_ver = ntohs(hdr->flags & GRE_VERSION);
-               if (gre_ver > 1)
-                       break;
-
-               proto = hdr->protocol;
-               if (gre_ver) {
-                       /* Version1 must be PPTP, and check the flags */
-                       if (!(proto == GRE_PROTO_PPP && (hdr->flags & GRE_KEY)))
-                               break;
-               }
-
-               offset += sizeof(struct gre_base_hdr);
-
-               if (hdr->flags & GRE_CSUM)
-                       offset += sizeof(((struct gre_full_hdr *)0)->csum) +
-                                 sizeof(((struct gre_full_hdr *)0)->reserved1);
-
-               if (hdr->flags & GRE_KEY) {
-                       const __be32 *keyid;
-                       __be32 _keyid;
-
-                       keyid = __skb_header_pointer(skb, nhoff + offset, sizeof(_keyid),
-                                                    data, hlen, &_keyid);
-                       if (!keyid)
-                               goto out_bad;
-
-                       if (dissector_uses_key(flow_dissector,
-                                              FLOW_DISSECTOR_KEY_GRE_KEYID)) {
-                               key_keyid = skb_flow_dissector_target(flow_dissector,
-                                                                     FLOW_DISSECTOR_KEY_GRE_KEYID,
-                                                                     target_container);
-                               if (gre_ver == 0)
-                                       key_keyid->keyid = *keyid;
-                               else
-                                       key_keyid->keyid = *keyid & GRE_PPTP_KEY_MASK;
-                       }
-                       offset += sizeof(((struct gre_full_hdr *)0)->key);
+               case FLOW_DISSECT_RET_OUT_PROTO_AGAIN:
+                       goto proto_again;
                }
-
-               if (hdr->flags & GRE_SEQ)
-                       offset += sizeof(((struct pptp_gre_header *)0)->seq);
-
-               if (gre_ver == 0) {
-                       if (proto == htons(ETH_P_TEB)) {
-                               const struct ethhdr *eth;
-                               struct ethhdr _eth;
-
-                               eth = __skb_header_pointer(skb, nhoff + offset,
-                                                          sizeof(_eth),
-                                                          data, hlen, &_eth);
-                               if (!eth)
-                                       goto out_bad;
-                               proto = eth->h_proto;
-                               offset += sizeof(*eth);
-
-                               /* Cap headers that we access via pointers at the
-                                * end of the Ethernet header as our maximum alignment
-                                * at that point is only 2 bytes.
-                                */
-                               if (NET_IP_ALIGN)
-                                       hlen = (nhoff + offset);
-                       }
-               } else { /* version 1, must be PPTP */
-                       u8 _ppp_hdr[PPP_HDRLEN];
-                       u8 *ppp_hdr;
-
-                       if (hdr->flags & GRE_ACK)
-                               offset += sizeof(((struct pptp_gre_header *)0)->ack);
-
-                       ppp_hdr = __skb_header_pointer(skb, nhoff + offset,
-                                                    sizeof(_ppp_hdr),
-                                                    data, hlen, _ppp_hdr);
-                       if (!ppp_hdr)
-                               goto out_bad;
-
-                       switch (PPP_PROTOCOL(ppp_hdr)) {
-                       case PPP_IP:
-                               proto = htons(ETH_P_IP);
-                               break;
-                       case PPP_IPV6:
-                               proto = htons(ETH_P_IPV6);
-                               break;
-                       default:
-                               /* Could probably catch some more like MPLS */
-                               break;
-                       }
-
-                       offset += PPP_HDRLEN;
-               }
-
-               nhoff += offset;
-               key_control->flags |= FLOW_DIS_ENCAPSULATION;
-               if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP)
-                       goto out_good;
-
-               goto again;
-       }
        case NEXTHDR_HOP:
        case NEXTHDR_ROUTING:
        case NEXTHDR_DEST: {
index 6df9f8fabf0ca5d2ced3070406900b7ec28a7924..b5888190223c4d3978639c81fdec3a19dd6ee8f2 100644 (file)
@@ -162,7 +162,6 @@ int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining)
        struct rtnexthop *rtnh = (struct rtnexthop *)attr;
        struct nlattr *nla_entype;
        struct nlattr *attrs;
-       struct nlattr *nla;
        u16 encap_type;
        int attrlen;
 
@@ -170,7 +169,6 @@ int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining)
                attrlen = rtnh_attrlen(rtnh);
                if (attrlen > 0) {
                        attrs = rtnh_attrs(rtnh);
-                       nla = nla_find(attrs, attrlen, RTA_ENCAP);
                        nla_entype = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
 
                        if (nla_entype) {
index e7c12caa20c88acc9a5dd86f07d11644fb58341d..7069f5e4a361d11825e9170853ea9ab060d18ec0 100644 (file)
@@ -52,8 +52,9 @@ do {                                          \
 #define PNEIGH_HASHMASK                0xF
 
 static void neigh_timer_handler(unsigned long arg);
-static void __neigh_notify(struct neighbour *n, int type, int flags);
-static void neigh_update_notify(struct neighbour *neigh);
+static void __neigh_notify(struct neighbour *n, int type, int flags,
+                          u32 pid);
+static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
 
 #ifdef CONFIG_PROC_FS
@@ -99,7 +100,7 @@ static void neigh_cleanup_and_release(struct neighbour *neigh)
        if (neigh->parms->neigh_cleanup)
                neigh->parms->neigh_cleanup(neigh);
 
-       __neigh_notify(neigh, RTM_DELNEIGH, 0);
+       __neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
        call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
        neigh_release(neigh);
 }
@@ -948,7 +949,7 @@ out:
        }
 
        if (notify)
-               neigh_update_notify(neigh);
+               neigh_update_notify(neigh, 0);
 
        neigh_release(neigh);
 }
@@ -1072,7 +1073,7 @@ static void neigh_update_hhs(struct neighbour *neigh)
  */
 
 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
-                u32 flags)
+                u32 flags, u32 nlmsg_pid)
 {
        u8 old;
        int err;
@@ -1229,7 +1230,7 @@ out:
        write_unlock_bh(&neigh->lock);
 
        if (notify)
-               neigh_update_notify(neigh);
+               neigh_update_notify(neigh, nlmsg_pid);
 
        return err;
 }
@@ -1260,7 +1261,7 @@ struct neighbour *neigh_event_ns(struct neigh_table *tbl,
                                                 lladdr || !dev->addr_len);
        if (neigh)
                neigh_update(neigh, lladdr, NUD_STALE,
-                            NEIGH_UPDATE_F_OVERRIDE);
+                            NEIGH_UPDATE_F_OVERRIDE, 0);
        return neigh;
 }
 EXPORT_SYMBOL(neigh_event_ns);
@@ -1638,7 +1639,8 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh)
 
        err = neigh_update(neigh, NULL, NUD_FAILED,
                           NEIGH_UPDATE_F_OVERRIDE |
-                          NEIGH_UPDATE_F_ADMIN);
+                          NEIGH_UPDATE_F_ADMIN,
+                          NETLINK_CB(skb).portid);
        neigh_release(neigh);
 
 out:
@@ -1729,7 +1731,8 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh)
                neigh_event_send(neigh, NULL);
                err = 0;
        } else
-               err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
+               err = neigh_update(neigh, lladdr, ndm->ndm_state, flags,
+                                  NETLINK_CB(skb).portid);
        neigh_release(neigh);
 
 out:
@@ -2229,10 +2232,10 @@ nla_put_failure:
        return -EMSGSIZE;
 }
 
-static void neigh_update_notify(struct neighbour *neigh)
+static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid)
 {
        call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
-       __neigh_notify(neigh, RTM_NEWNEIGH, 0);
+       __neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid);
 }
 
 static bool neigh_master_filtered(struct net_device *dev, int master_idx)
@@ -2830,7 +2833,8 @@ static inline size_t neigh_nlmsg_size(void)
               + nla_total_size(4); /* NDA_PROBES */
 }
 
-static void __neigh_notify(struct neighbour *n, int type, int flags)
+static void __neigh_notify(struct neighbour *n, int type, int flags,
+                          u32 pid)
 {
        struct net *net = dev_net(n->dev);
        struct sk_buff *skb;
@@ -2840,7 +2844,7 @@ static void __neigh_notify(struct neighbour *n, int type, int flags)
        if (skb == NULL)
                goto errout;
 
-       err = neigh_fill_info(skb, n, 0, 0, type, flags);
+       err = neigh_fill_info(skb, n, pid, 0, type, flags);
        if (err < 0) {
                /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
                WARN_ON(err == -EMSGSIZE);
@@ -2856,7 +2860,7 @@ errout:
 
 void neigh_app_ns(struct neighbour *n)
 {
-       __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
+       __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0);
 }
 EXPORT_SYMBOL(neigh_app_ns);
 
index 6ae56037bb1336d9cb6b6fc36043a203f3978202..029a61ac6cdd8a0b4dd54d2be3c5bdf047a82cb0 100644 (file)
@@ -71,27 +71,17 @@ static int update_classid_sock(const void *v, struct file *file, unsigned n)
        return 0;
 }
 
-static void update_classid(struct cgroup_subsys_state *css, void *v)
+static void cgrp_attach(struct cgroup_taskset *tset)
 {
-       struct css_task_iter it;
+       struct cgroup_subsys_state *css;
        struct task_struct *p;
 
-       css_task_iter_start(css, &it);
-       while ((p = css_task_iter_next(&it))) {
+       cgroup_taskset_for_each(p, css, tset) {
                task_lock(p);
-               iterate_fd(p->files, 0, update_classid_sock, v);
+               iterate_fd(p->files, 0, update_classid_sock,
+                          (void *)(unsigned long)css_cls_state(css)->classid);
                task_unlock(p);
        }
-       css_task_iter_end(&it);
-}
-
-static void cgrp_attach(struct cgroup_taskset *tset)
-{
-       struct cgroup_subsys_state *css;
-
-       cgroup_taskset_first(tset, &css);
-       update_classid(css,
-                      (void *)(unsigned long)css_cls_state(css)->classid);
 }
 
 static u64 read_classid(struct cgroup_subsys_state *css, struct cftype *cft)
@@ -103,12 +93,22 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft,
                         u64 value)
 {
        struct cgroup_cls_state *cs = css_cls_state(css);
+       struct css_task_iter it;
+       struct task_struct *p;
 
        cgroup_sk_alloc_disable();
 
        cs->classid = (u32)value;
 
-       update_classid(css, (void *)(unsigned long)cs->classid);
+       css_task_iter_start(css, &it);
+       while ((p = css_task_iter_next(&it))) {
+               task_lock(p);
+               iterate_fd(p->files, 0, update_classid_sock,
+                          (void *)(unsigned long)cs->classid);
+               task_unlock(p);
+       }
+       css_task_iter_end(&it);
+
        return 0;
 }
 
index 0f9275ee55958156a6cbac3f0d2b1ff54c3c89a5..1c4810919a0a35900d45a659de0cd780b7e500d3 100644 (file)
@@ -11,6 +11,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/module.h>
index c4e84c55824085b343679cc295a81f7b35e3acaf..9c3947a43eff8b299e66255cec0e1718d630d316 100644 (file)
@@ -4185,6 +4185,7 @@ void __init rtnetlink_init(void)
 
        rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all, NULL);
        rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all, NULL);
+       rtnl_register(PF_UNSPEC, RTM_GETNETCONF, NULL, rtnl_dump_all, NULL);
 
        rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, NULL);
        rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, NULL);
index 758f140b6bedc51669fed973b39ee317c2bf1570..fb87e78a2cc732ff5c75e5b2b0415c2fe805d990 100644 (file)
@@ -45,8 +45,8 @@ static u32 seq_scale(u32 seq)
 #endif
 
 #if IS_ENABLED(CONFIG_IPV6)
-u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
-                                __be16 sport, __be16 dport, u32 *tsoff)
+u32 secure_tcpv6_seq_and_tsoff(const __be32 *saddr, const __be32 *daddr,
+                              __be16 sport, __be16 dport, u32 *tsoff)
 {
        const struct {
                struct in6_addr saddr;
@@ -66,7 +66,7 @@ u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
        *tsoff = sysctl_tcp_timestamps == 1 ? (hash >> 32) : 0;
        return seq_scale(hash);
 }
-EXPORT_SYMBOL(secure_tcpv6_sequence_number);
+EXPORT_SYMBOL(secure_tcpv6_seq_and_tsoff);
 
 u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
                               __be16 dport)
@@ -89,14 +89,13 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral);
 
 #ifdef CONFIG_INET
 
-/* secure_tcp_sequence_number(a, b, 0, d) == secure_ipv4_port_ephemeral(a, b, d),
+/* secure_tcp_seq_and_tsoff(a, b, 0, d) == secure_ipv4_port_ephemeral(a, b, d),
  * but fortunately, `sport' cannot be 0 in any circumstances. If this changes,
  * it would be easy enough to have the former function use siphash_4u32, passing
  * the arguments as separate u32.
  */
-
-u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
-                              __be16 sport, __be16 dport, u32 *tsoff)
+u32 secure_tcp_seq_and_tsoff(__be32 saddr, __be32 daddr,
+                            __be16 sport, __be16 dport, u32 *tsoff)
 {
        u64 hash;
        net_secret_init();
index cd4ba8c6b6091651403cf74de8c60ccf69aa3e7b..9f781092fda9cb8cac22b0743b4bc7666a3bd91a 100644 (file)
@@ -3694,6 +3694,15 @@ static void sock_rmem_free(struct sk_buff *skb)
        atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
 }
 
+static void skb_set_err_queue(struct sk_buff *skb)
+{
+       /* pkt_type of skbs received on local sockets is never PACKET_OUTGOING.
+        * So, it is safe to (mis)use it to mark skbs on the error queue.
+        */
+       skb->pkt_type = PACKET_OUTGOING;
+       BUILD_BUG_ON(PACKET_OUTGOING == 0);
+}
+
 /*
  * Note: We dont mem charge error packets (no sk_forward_alloc changes)
  */
@@ -3707,6 +3716,7 @@ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
        skb->sk = sk;
        skb->destructor = sock_rmem_free;
        atomic_add(skb->truesize, &sk->sk_rmem_alloc);
+       skb_set_err_queue(skb);
 
        /* before exiting rcu section, make sure dst is refcounted */
        skb_dst_force(skb);
@@ -3783,16 +3793,20 @@ EXPORT_SYMBOL(skb_clone_sk);
 
 static void __skb_complete_tx_timestamp(struct sk_buff *skb,
                                        struct sock *sk,
-                                       int tstype)
+                                       int tstype,
+                                       bool opt_stats)
 {
        struct sock_exterr_skb *serr;
        int err;
 
+       BUILD_BUG_ON(sizeof(struct sock_exterr_skb) > sizeof(skb->cb));
+
        serr = SKB_EXT_ERR(skb);
        memset(serr, 0, sizeof(*serr));
        serr->ee.ee_errno = ENOMSG;
        serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
        serr->ee.ee_info = tstype;
+       serr->opt_stats = opt_stats;
        if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) {
                serr->ee.ee_data = skb_shinfo(skb)->tskey;
                if (sk->sk_protocol == IPPROTO_TCP &&
@@ -3833,7 +3847,7 @@ void skb_complete_tx_timestamp(struct sk_buff *skb,
         */
        if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) {
                *skb_hwtstamps(skb) = *hwtstamps;
-               __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND);
+               __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND, false);
                sock_put(sk);
        }
 }
@@ -3844,7 +3858,7 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
                     struct sock *sk, int tstype)
 {
        struct sk_buff *skb;
-       bool tsonly;
+       bool tsonly, opt_stats = false;
 
        if (!sk)
                return;
@@ -3857,9 +3871,10 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
 #ifdef CONFIG_INET
                if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS) &&
                    sk->sk_protocol == IPPROTO_TCP &&
-                   sk->sk_type == SOCK_STREAM)
+                   sk->sk_type == SOCK_STREAM) {
                        skb = tcp_get_timestamping_opt_stats(sk);
-               else
+                       opt_stats = true;
+               } else
 #endif
                        skb = alloc_skb(0, GFP_ATOMIC);
        } else {
@@ -3878,7 +3893,7 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
        else
                skb->tstamp = ktime_get_real();
 
-       __skb_complete_tx_timestamp(skb, sk, tstype);
+       __skb_complete_tx_timestamp(skb, sk, tstype, opt_stats);
 }
 EXPORT_SYMBOL_GPL(__skb_tstamp_tx);
 
index acb0d413749968f24ffc7df3e366b095f80e10f4..392f9b6f96e26b89751ec97f26d08a52e1c7cf57 100644 (file)
@@ -247,12 +247,66 @@ static const char *const af_family_kern_slock_key_strings[AF_MAX+1] = {
 static const char *const af_family_kern_clock_key_strings[AF_MAX+1] = {
        _sock_locks("k-clock-")
 };
+static const char *const af_family_rlock_key_strings[AF_MAX+1] = {
+  "rlock-AF_UNSPEC", "rlock-AF_UNIX"     , "rlock-AF_INET"     ,
+  "rlock-AF_AX25"  , "rlock-AF_IPX"      , "rlock-AF_APPLETALK",
+  "rlock-AF_NETROM", "rlock-AF_BRIDGE"   , "rlock-AF_ATMPVC"   ,
+  "rlock-AF_X25"   , "rlock-AF_INET6"    , "rlock-AF_ROSE"     ,
+  "rlock-AF_DECnet", "rlock-AF_NETBEUI"  , "rlock-AF_SECURITY" ,
+  "rlock-AF_KEY"   , "rlock-AF_NETLINK"  , "rlock-AF_PACKET"   ,
+  "rlock-AF_ASH"   , "rlock-AF_ECONET"   , "rlock-AF_ATMSVC"   ,
+  "rlock-AF_RDS"   , "rlock-AF_SNA"      , "rlock-AF_IRDA"     ,
+  "rlock-AF_PPPOX" , "rlock-AF_WANPIPE"  , "rlock-AF_LLC"      ,
+  "rlock-27"       , "rlock-28"          , "rlock-AF_CAN"      ,
+  "rlock-AF_TIPC"  , "rlock-AF_BLUETOOTH", "rlock-AF_IUCV"     ,
+  "rlock-AF_RXRPC" , "rlock-AF_ISDN"     , "rlock-AF_PHONET"   ,
+  "rlock-AF_IEEE802154", "rlock-AF_CAIF" , "rlock-AF_ALG"      ,
+  "rlock-AF_NFC"   , "rlock-AF_VSOCK"    , "rlock-AF_KCM"      ,
+  "rlock-AF_QIPCRTR", "rlock-AF_SMC"     , "rlock-AF_MAX"
+};
+static const char *const af_family_wlock_key_strings[AF_MAX+1] = {
+  "wlock-AF_UNSPEC", "wlock-AF_UNIX"     , "wlock-AF_INET"     ,
+  "wlock-AF_AX25"  , "wlock-AF_IPX"      , "wlock-AF_APPLETALK",
+  "wlock-AF_NETROM", "wlock-AF_BRIDGE"   , "wlock-AF_ATMPVC"   ,
+  "wlock-AF_X25"   , "wlock-AF_INET6"    , "wlock-AF_ROSE"     ,
+  "wlock-AF_DECnet", "wlock-AF_NETBEUI"  , "wlock-AF_SECURITY" ,
+  "wlock-AF_KEY"   , "wlock-AF_NETLINK"  , "wlock-AF_PACKET"   ,
+  "wlock-AF_ASH"   , "wlock-AF_ECONET"   , "wlock-AF_ATMSVC"   ,
+  "wlock-AF_RDS"   , "wlock-AF_SNA"      , "wlock-AF_IRDA"     ,
+  "wlock-AF_PPPOX" , "wlock-AF_WANPIPE"  , "wlock-AF_LLC"      ,
+  "wlock-27"       , "wlock-28"          , "wlock-AF_CAN"      ,
+  "wlock-AF_TIPC"  , "wlock-AF_BLUETOOTH", "wlock-AF_IUCV"     ,
+  "wlock-AF_RXRPC" , "wlock-AF_ISDN"     , "wlock-AF_PHONET"   ,
+  "wlock-AF_IEEE802154", "wlock-AF_CAIF" , "wlock-AF_ALG"      ,
+  "wlock-AF_NFC"   , "wlock-AF_VSOCK"    , "wlock-AF_KCM"      ,
+  "wlock-AF_QIPCRTR", "wlock-AF_SMC"     , "wlock-AF_MAX"
+};
+static const char *const af_family_elock_key_strings[AF_MAX+1] = {
+  "elock-AF_UNSPEC", "elock-AF_UNIX"     , "elock-AF_INET"     ,
+  "elock-AF_AX25"  , "elock-AF_IPX"      , "elock-AF_APPLETALK",
+  "elock-AF_NETROM", "elock-AF_BRIDGE"   , "elock-AF_ATMPVC"   ,
+  "elock-AF_X25"   , "elock-AF_INET6"    , "elock-AF_ROSE"     ,
+  "elock-AF_DECnet", "elock-AF_NETBEUI"  , "elock-AF_SECURITY" ,
+  "elock-AF_KEY"   , "elock-AF_NETLINK"  , "elock-AF_PACKET"   ,
+  "elock-AF_ASH"   , "elock-AF_ECONET"   , "elock-AF_ATMSVC"   ,
+  "elock-AF_RDS"   , "elock-AF_SNA"      , "elock-AF_IRDA"     ,
+  "elock-AF_PPPOX" , "elock-AF_WANPIPE"  , "elock-AF_LLC"      ,
+  "elock-27"       , "elock-28"          , "elock-AF_CAN"      ,
+  "elock-AF_TIPC"  , "elock-AF_BLUETOOTH", "elock-AF_IUCV"     ,
+  "elock-AF_RXRPC" , "elock-AF_ISDN"     , "elock-AF_PHONET"   ,
+  "elock-AF_IEEE802154", "elock-AF_CAIF" , "elock-AF_ALG"      ,
+  "elock-AF_NFC"   , "elock-AF_VSOCK"    , "elock-AF_KCM"      ,
+  "elock-AF_QIPCRTR", "elock-AF_SMC"     , "elock-AF_MAX"
+};
 
 /*
- * sk_callback_lock locking rules are per-address-family,
+ * sk_callback_lock and sk queues locking rules are per-address-family,
  * so split the lock classes by using a per-AF key:
  */
 static struct lock_class_key af_callback_keys[AF_MAX];
+static struct lock_class_key af_rlock_keys[AF_MAX];
+static struct lock_class_key af_wlock_keys[AF_MAX];
+static struct lock_class_key af_elock_keys[AF_MAX];
 static struct lock_class_key af_kern_callback_keys[AF_MAX];
 
 /* Take into consideration the size of the struct sk_buff overhead in the
@@ -1259,6 +1313,33 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
                v.val = sk->sk_incoming_cpu;
                break;
 
+       case SO_MEMINFO:
+       {
+               u32 meminfo[SK_MEMINFO_VARS];
+
+               if (get_user(len, optlen))
+                       return -EFAULT;
+
+               sk_get_meminfo(sk, meminfo);
+
+               len = min_t(unsigned int, len, sizeof(meminfo));
+               if (copy_to_user(optval, &meminfo, len))
+                       return -EFAULT;
+
+               goto lenout;
+       }
+
+#ifdef CONFIG_NET_RX_BUSY_POLL
+       case SO_INCOMING_NAPI_ID:
+               v.val = READ_ONCE(sk->sk_napi_id);
+
+               /* aggregate non-NAPI IDs down to 0 */
+               if (v.val < MIN_NAPI_ID)
+                       v.val = 0;
+
+               break;
+#endif
+
        default:
                /* We implement the SO_SNDLOWAT etc to not be settable
                 * (1003.1g 7).
@@ -1483,6 +1564,27 @@ void sk_free(struct sock *sk)
 }
 EXPORT_SYMBOL(sk_free);
 
+static void sk_init_common(struct sock *sk)
+{
+       skb_queue_head_init(&sk->sk_receive_queue);
+       skb_queue_head_init(&sk->sk_write_queue);
+       skb_queue_head_init(&sk->sk_error_queue);
+
+       rwlock_init(&sk->sk_callback_lock);
+       lockdep_set_class_and_name(&sk->sk_receive_queue.lock,
+                       af_rlock_keys + sk->sk_family,
+                       af_family_rlock_key_strings[sk->sk_family]);
+       lockdep_set_class_and_name(&sk->sk_write_queue.lock,
+                       af_wlock_keys + sk->sk_family,
+                       af_family_wlock_key_strings[sk->sk_family]);
+       lockdep_set_class_and_name(&sk->sk_error_queue.lock,
+                       af_elock_keys + sk->sk_family,
+                       af_family_elock_key_strings[sk->sk_family]);
+       lockdep_set_class_and_name(&sk->sk_callback_lock,
+                       af_callback_keys + sk->sk_family,
+                       af_family_clock_key_strings[sk->sk_family]);
+}
+
 /**
  *     sk_clone_lock - clone a socket, and lock its clone
  *     @sk: the socket to clone
@@ -1516,13 +1618,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
                 */
                atomic_set(&newsk->sk_wmem_alloc, 1);
                atomic_set(&newsk->sk_omem_alloc, 0);
-               skb_queue_head_init(&newsk->sk_receive_queue);
-               skb_queue_head_init(&newsk->sk_write_queue);
-
-               rwlock_init(&newsk->sk_callback_lock);
-               lockdep_set_class_and_name(&newsk->sk_callback_lock,
-                               af_callback_keys + newsk->sk_family,
-                               af_family_clock_key_strings[newsk->sk_family]);
+               sk_init_common(newsk);
 
                newsk->sk_dst_cache     = NULL;
                newsk->sk_dst_pending_confirm = 0;
@@ -1533,7 +1629,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
                newsk->sk_userlocks     = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
 
                sock_reset_flag(newsk, SOCK_DONE);
-               skb_queue_head_init(&newsk->sk_error_queue);
 
                filter = rcu_dereference_protected(newsk->sk_filter, 1);
                if (filter != NULL)
@@ -1544,6 +1639,12 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
                        is_charged = sk_filter_charge(newsk, filter);
 
                if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) {
+                       /* We need to make sure that we don't uncharge the new
+                        * socket if we couldn't charge it in the first place
+                        * as otherwise we uncharge the parent's filter.
+                        */
+                       if (!is_charged)
+                               RCU_INIT_POINTER(newsk->sk_filter, NULL);
                        sk_free_unlock_clone(newsk);
                        newsk = NULL;
                        goto out;
@@ -2460,10 +2561,7 @@ EXPORT_SYMBOL(sk_stop_timer);
 
 void sock_init_data(struct socket *sock, struct sock *sk)
 {
-       skb_queue_head_init(&sk->sk_receive_queue);
-       skb_queue_head_init(&sk->sk_write_queue);
-       skb_queue_head_init(&sk->sk_error_queue);
-
+       sk_init_common(sk);
        sk->sk_send_head        =       NULL;
 
        init_timer(&sk->sk_timer);
@@ -2515,7 +2613,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
        sk->sk_rcvtimeo         =       MAX_SCHEDULE_TIMEOUT;
        sk->sk_sndtimeo         =       MAX_SCHEDULE_TIMEOUT;
 
-       sk->sk_stamp = ktime_set(-1L, 0);
+       sk->sk_stamp = SK_DEFAULT_STAMP;
 
 #ifdef CONFIG_NET_RX_BUSY_POLL
        sk->sk_napi_id          =       0;
@@ -2796,6 +2894,21 @@ void sk_common_release(struct sock *sk)
 }
 EXPORT_SYMBOL(sk_common_release);
 
+void sk_get_meminfo(const struct sock *sk, u32 *mem)
+{
+       memset(mem, 0, sizeof(*mem) * SK_MEMINFO_VARS);
+
+       mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk);
+       mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf;
+       mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk);
+       mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf;
+       mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
+       mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued;
+       mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
+       mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len;
+       mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops);
+}
+
 #ifdef CONFIG_PROC_FS
 #define PROTO_INUSE_NR 64      /* should be enough for the first time */
 struct prot_inuse {
@@ -3136,3 +3249,14 @@ static int __init proto_init(void)
 subsys_initcall(proto_init);
 
 #endif /* PROC_FS */
+
+#ifdef CONFIG_NET_RX_BUSY_POLL
+bool sk_busy_loop_end(void *p, unsigned long start_time)
+{
+       struct sock *sk = p;
+
+       return !skb_queue_empty(&sk->sk_receive_queue) ||
+              sk_busy_loop_timeout(sk, start_time);
+}
+EXPORT_SYMBOL(sk_busy_loop_end);
+#endif /* CONFIG_NET_RX_BUSY_POLL */
index 6b10573cc9faa790fe261b452b85f3b774c3ec21..fb9d0e2fd148aa78fa9c33e27e341af5a47c530f 100644 (file)
@@ -19,7 +19,7 @@ static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh);
 static DEFINE_MUTEX(sock_diag_table_mutex);
 static struct workqueue_struct *broadcast_wq;
 
-static u64 sock_gen_cookie(struct sock *sk)
+u64 sock_gen_cookie(struct sock *sk)
 {
        while (1) {
                u64 res = atomic64_read(&sk->sk_cookie);
@@ -59,15 +59,7 @@ int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attrtype)
 {
        u32 mem[SK_MEMINFO_VARS];
 
-       mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk);
-       mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf;
-       mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk);
-       mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf;
-       mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
-       mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued;
-       mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
-       mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len;
-       mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops);
+       sk_get_meminfo(sk, mem);
 
        return nla_put(skb, attrtype, sizeof(mem), &mem);
 }
index 9a1a352fd1ebe598e4925bcda037dc0e4a2288bc..eed1ebf7f29d0fac552074b127e5636fecede65f 100644 (file)
@@ -13,9 +13,9 @@
 
 static DEFINE_SPINLOCK(reuseport_lock);
 
-static struct sock_reuseport *__reuseport_alloc(u16 max_socks)
+static struct sock_reuseport *__reuseport_alloc(unsigned int max_socks)
 {
-       size_t size = sizeof(struct sock_reuseport) +
+       unsigned int size = sizeof(struct sock_reuseport) +
                      sizeof(struct sock *) * max_socks;
        struct sock_reuseport *reuse = kzalloc(size, GFP_ATOMIC);
 
index 6592d7bbed394086a8ba8efcb370fb1d75db4449..d758880c09a73ee07fe406f42fd934eae0b300d0 100644 (file)
@@ -51,7 +51,7 @@ EXPORT_SYMBOL(net_ratelimit);
 
 __be32 in_aton(const char *str)
 {
-       unsigned long l;
+       unsigned int l;
        unsigned int val;
        int i;
 
index 7de5b40a5d0d1245ad995877f779e0d87d1cf398..9afa2a5030b2570c89de8decc3b20aad3a224e5c 100644 (file)
@@ -132,6 +132,7 @@ Version 0.0.6    2.1.110   07-aug-98   Eduardo Marcelo Serrat
 #include <net/neighbour.h>
 #include <net/dst.h>
 #include <net/fib_rules.h>
+#include <net/tcp.h>
 #include <net/dn.h>
 #include <net/dn_nsp.h>
 #include <net/dn_dev.h>
@@ -1469,18 +1470,18 @@ static int __dn_setsockopt(struct socket *sock, int level,int optname, char __us
        case DSO_NODELAY:
                if (optlen != sizeof(int))
                        return -EINVAL;
-               if (scp->nonagle == 2)
+               if (scp->nonagle == TCP_NAGLE_CORK)
                        return -EINVAL;
-               scp->nonagle = (u.val == 0) ? 0 : 1;
+               scp->nonagle = (u.val == 0) ? 0 : TCP_NAGLE_OFF;
                /* if (scp->nonagle == 1) { Push pending frames } */
                break;
 
        case DSO_CORK:
                if (optlen != sizeof(int))
                        return -EINVAL;
-               if (scp->nonagle == 1)
+               if (scp->nonagle == TCP_NAGLE_OFF)
                        return -EINVAL;
-               scp->nonagle = (u.val == 0) ? 0 : 2;
+               scp->nonagle = (u.val == 0) ? 0 : TCP_NAGLE_CORK;
                /* if (scp->nonagle == 0) { Push pending frames } */
                break;
 
@@ -1608,14 +1609,14 @@ static int __dn_getsockopt(struct socket *sock, int level,int optname, char __us
        case DSO_NODELAY:
                if (r_len > sizeof(int))
                        r_len = sizeof(int);
-               val = (scp->nonagle == 1);
+               val = (scp->nonagle == TCP_NAGLE_OFF);
                r_data = &val;
                break;
 
        case DSO_CORK:
                if (r_len > sizeof(int))
                        r_len = sizeof(int);
-               val = (scp->nonagle == 2);
+               val = (scp->nonagle == TCP_NAGLE_CORK);
                r_data = &val;
                break;
 
index 9649238eef404095a89d34a006dc0b504bc47038..da4d64f432db8ae4407990801f07786da4f1acc5 100644 (file)
@@ -6,7 +6,7 @@ config HAVE_NET_DSA
 
 config NET_DSA
        tristate "Distributed Switch Architecture"
-       depends on HAVE_NET_DSA
+       depends on HAVE_NET_DSA && MAY_USE_DEVLINK
        select NET_SWITCHDEV
        select PHYLIB
        ---help---
index b6d4f6a23f06c9d794a5eedc4c9f79810d5b06e5..95d1a756202c45b6b1332163fd83d2c3e0c123c4 100644 (file)
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/module.h>
-#include <net/dsa.h>
 #include <linux/of.h>
 #include <linux/of_mdio.h>
 #include <linux/of_platform.h>
 #include <linux/of_net.h>
 #include <linux/of_gpio.h>
+#include <linux/netdevice.h>
 #include <linux/sysfs.h>
 #include <linux/phy_fixed.h>
 #include <linux/gpio/consumer.h>
+#include <net/dsa.h>
 #include "dsa_priv.h"
 
 static struct sk_buff *dsa_slave_notag_xmit(struct sk_buff *skb,
index 737be6470c7f27ba032d01667e039f3c03c17ae8..033b3bfb63dc1887b15b3e08f00a00f70b706ec4 100644 (file)
 #include <linux/device.h>
 #include <linux/err.h>
 #include <linux/list.h>
+#include <linux/netdevice.h>
 #include <linux/slab.h>
 #include <linux/rtnetlink.h>
-#include <net/dsa.h>
 #include <linux/of.h>
 #include <linux/of_net.h>
+#include <net/dsa.h>
 #include "dsa_priv.h"
 
 static LIST_HEAD(dsa_switch_trees);
 static DEFINE_MUTEX(dsa2_mutex);
 
+static const struct devlink_ops dsa_devlink_ops = {
+};
+
 static struct dsa_switch_tree *dsa_get_dst(u32 tree)
 {
        struct dsa_switch_tree *dst;
@@ -222,12 +226,18 @@ static int dsa_dsa_port_apply(struct dsa_port *port, u32 index,
                return err;
        }
 
-       return 0;
+       memset(&ds->ports[index].devlink_port, 0,
+              sizeof(ds->ports[index].devlink_port));
+
+       return devlink_port_register(ds->devlink,
+                                    &ds->ports[index].devlink_port,
+                                    index);
 }
 
 static void dsa_dsa_port_unapply(struct dsa_port *port, u32 index,
                                 struct dsa_switch *ds)
 {
+       devlink_port_unregister(&ds->ports[index].devlink_port);
        dsa_cpu_dsa_destroy(port);
 }
 
@@ -245,12 +255,17 @@ static int dsa_cpu_port_apply(struct dsa_port *port, u32 index,
 
        ds->cpu_port_mask |= BIT(index);
 
-       return 0;
+       memset(&ds->ports[index].devlink_port, 0,
+              sizeof(ds->ports[index].devlink_port));
+       err = devlink_port_register(ds->devlink, &ds->ports[index].devlink_port,
+                                   index);
+       return err;
 }
 
 static void dsa_cpu_port_unapply(struct dsa_port *port, u32 index,
                                 struct dsa_switch *ds)
 {
+       devlink_port_unregister(&ds->ports[index].devlink_port);
        dsa_cpu_dsa_destroy(port);
        ds->cpu_port_mask &= ~BIT(index);
 
@@ -275,12 +290,23 @@ static int dsa_user_port_apply(struct dsa_port *port, u32 index,
                return err;
        }
 
+       memset(&ds->ports[index].devlink_port, 0,
+              sizeof(ds->ports[index].devlink_port));
+       err = devlink_port_register(ds->devlink, &ds->ports[index].devlink_port,
+                                   index);
+       if (err)
+               return err;
+
+       devlink_port_type_eth_set(&ds->ports[index].devlink_port,
+                                 ds->ports[index].netdev);
+
        return 0;
 }
 
 static void dsa_user_port_unapply(struct dsa_port *port, u32 index,
                                  struct dsa_switch *ds)
 {
+       devlink_port_unregister(&ds->ports[index].devlink_port);
        if (ds->ports[index].netdev) {
                dsa_slave_destroy(ds->ports[index].netdev);
                ds->ports[index].netdev = NULL;
@@ -301,6 +327,17 @@ static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
         */
        ds->phys_mii_mask = ds->enabled_port_mask;
 
+       /* Add the switch to devlink before calling setup, so that setup can
+        * add dpipe tables
+        */
+       ds->devlink = devlink_alloc(&dsa_devlink_ops, 0);
+       if (!ds->devlink)
+               return -ENOMEM;
+
+       err = devlink_register(ds->devlink, ds->dev);
+       if (err)
+               return err;
+
        err = ds->ops->setup(ds);
        if (err < 0)
                return err;
@@ -381,6 +418,13 @@ static void dsa_ds_unapply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
                mdiobus_unregister(ds->slave_mii_bus);
 
        dsa_switch_unregister_notifier(ds);
+
+       if (ds->devlink) {
+               devlink_unregister(ds->devlink);
+               devlink_free(ds->devlink);
+               ds->devlink = NULL;
+       }
+
 }
 
 static int dsa_dst_apply(struct dsa_switch_tree *dst)
index c34872e1febc4b75d1b69b18a8a1189405ca30fa..7693182df81e61d14540cd43be3ae7e134eef576 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/of_mdio.h>
 #include <linux/mdio.h>
 #include <linux/list.h>
+#include <net/dsa.h>
 #include <net/rtnetlink.h>
 #include <net/switchdev.h>
 #include <net/pkt_cls.h>
@@ -419,8 +420,8 @@ static int dsa_slave_vlan_filtering(struct net_device *dev,
        return 0;
 }
 
-static int dsa_fastest_ageing_time(struct dsa_switch *ds,
-                                  unsigned int ageing_time)
+static unsigned int dsa_fastest_ageing_time(struct dsa_switch *ds,
+                                           unsigned int ageing_time)
 {
        int i;
 
@@ -443,9 +444,13 @@ static int dsa_slave_ageing_time(struct net_device *dev,
        unsigned long ageing_jiffies = clock_t_to_jiffies(attr->u.ageing_time);
        unsigned int ageing_time = jiffies_to_msecs(ageing_jiffies);
 
-       /* bridge skips -EOPNOTSUPP, so skip the prepare phase */
-       if (switchdev_trans_ph_prepare(trans))
+       if (switchdev_trans_ph_prepare(trans)) {
+               if (ds->ageing_time_min && ageing_time < ds->ageing_time_min)
+                       return -ERANGE;
+               if (ds->ageing_time_max && ageing_time > ds->ageing_time_max)
+                       return -ERANGE;
                return 0;
+       }
 
        /* Keep the fastest ageing time in case of multiple bridges */
        p->dp->ageing_time = ageing_time;
index 6456dacf9ae9e0b88585defc026179423a80e4e4..ca6e26e514f089cfa8991ef6a7bd790bd8db14fe 100644 (file)
@@ -1,7 +1,8 @@
 /*
  * Handling of a single switch chip, part of a switch fabric
  *
- * Copyright (c) 2017 Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+ * Copyright (c) 2017 Savoir-faire Linux Inc.
+ *     Vivien Didelot <vivien.didelot@savoirfairelinux.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -19,9 +20,9 @@ static int dsa_switch_bridge_join(struct dsa_switch *ds,
        if (ds->index == info->sw_index && ds->ops->port_bridge_join)
                return ds->ops->port_bridge_join(ds, info->port, info->br);
 
-       if (ds->index != info->sw_index)
-               dev_dbg(ds->dev, "crosschip DSA port %d.%d bridged to %s\n",
-                       info->sw_index, info->port, netdev_name(info->br));
+       if (ds->index != info->sw_index && ds->ops->crosschip_bridge_join)
+               return ds->ops->crosschip_bridge_join(ds, info->sw_index,
+                                                     info->port, info->br);
 
        return 0;
 }
@@ -32,9 +33,9 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds,
        if (ds->index == info->sw_index && ds->ops->port_bridge_leave)
                ds->ops->port_bridge_leave(ds, info->port, info->br);
 
-       if (ds->index != info->sw_index)
-               dev_dbg(ds->dev, "crosschip DSA port %d.%d unbridged from %s\n",
-                       info->sw_index, info->port, netdev_name(info->br));
+       if (ds->index != info->sw_index && ds->ops->crosschip_bridge_leave)
+               ds->ops->crosschip_bridge_leave(ds, info->sw_index, info->port,
+                                               info->br);
 
        return 0;
 }
index 5d925b6b2bb14f78f84a06b84b4fa19bd6846e82..e2ed6cf68261f8b655bde8cc8fff4ea240194952 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/etherdevice.h>
 #include <linux/list.h>
 #include <linux/slab.h>
+#include <net/dsa.h>
 #include "dsa_priv.h"
 
 /* This tag length is 4 bytes, older ones were 6 bytes, we do not
index 72579ceea381b7e2bce99a28208810b707434f09..e42ba906100cbc14beec08fb98f283785c3183e0 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/etherdevice.h>
 #include <linux/list.h>
 #include <linux/slab.h>
+#include <net/dsa.h>
 #include "dsa_priv.h"
 
 #define DSA_HLEN       4
index 648c051817a1b4a4e64cda67bab8c81288027a4e..6a9b7a9e4e15b9d75571f4f4cc18b776e420610b 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/etherdevice.h>
 #include <linux/list.h>
 #include <linux/slab.h>
+#include <net/dsa.h>
 #include "dsa_priv.h"
 
 #define DSA_HLEN       4
index 30240f343aea8450b13936159b4b9a76126a8977..4e0dad759d047a3122ab0ed5005a37f1f3fa69da 100644 (file)
@@ -12,6 +12,7 @@
  */
 
 #include <linux/etherdevice.h>
+#include <net/dsa.h>
 #include "dsa_priv.h"
 
 #define QCA_HDR_LEN    2
index 26f977176978085af9c034319c754a1ac7501d4c..74c948512550f71649d62446b0c2e60bd88004f9 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/etherdevice.h>
 #include <linux/list.h>
 #include <linux/slab.h>
+#include <net/dsa.h>
 #include "dsa_priv.h"
 
 static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev)
index c6d4238ff94a8a329bf44bf59b30fb09fb07f707..f83de23a30e7e77303d011dee35eb92342adab5c 100644 (file)
@@ -11,7 +11,7 @@ obj-y     := route.o inetpeer.o protocol.o \
             tcp_rate.o tcp_recovery.o \
             tcp_offload.o datagram.o raw.o udp.o udplite.o \
             udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \
-            fib_frontend.o fib_semantics.o fib_trie.o \
+            fib_frontend.o fib_semantics.o fib_trie.o fib_notifier.o \
             inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o
 
 obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o
index 6b1fc6e4278ef4f1cba58412977918af31d73e62..d1a11707a12682fcd70f22f6df77087b779a5826 100644 (file)
@@ -1599,8 +1599,9 @@ static const struct net_protocol igmp_protocol = {
 };
 #endif
 
-static const struct net_protocol tcp_protocol = {
+static struct net_protocol tcp_protocol = {
        .early_demux    =       tcp_v4_early_demux,
+       .early_demux_handler =  tcp_v4_early_demux,
        .handler        =       tcp_v4_rcv,
        .err_handler    =       tcp_v4_err,
        .no_policy      =       1,
@@ -1608,8 +1609,9 @@ static const struct net_protocol tcp_protocol = {
        .icmp_strict_tag_validation = 1,
 };
 
-static const struct net_protocol udp_protocol = {
+static struct net_protocol udp_protocol = {
        .early_demux =  udp_v4_early_demux,
+       .early_demux_handler =  udp_v4_early_demux,
        .handler =      udp_rcv,
        .err_handler =  udp_err,
        .no_policy =    1,
@@ -1720,6 +1722,8 @@ static __net_init int inet_init_net(struct net *net)
        net->ipv4.sysctl_ip_default_ttl = IPDEFTTL;
        net->ipv4.sysctl_ip_dynaddr = 0;
        net->ipv4.sysctl_ip_early_demux = 1;
+       net->ipv4.sysctl_udp_early_demux = 1;
+       net->ipv4.sysctl_tcp_early_demux = 1;
 #ifdef CONFIG_SYSCTL
        net->ipv4.sysctl_ip_prot_sock = PROT_SOCK;
 #endif
index 51b27ae09fbd725bcd8030982e5850215ac4ce5c..0937b34c27cacb2dec73a67a76ff11fe26722500 100644 (file)
@@ -872,7 +872,7 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
                    skb->pkt_type != PACKET_HOST)
                        state = NUD_STALE;
                neigh_update(n, sha, state,
-                            override ? NEIGH_UPDATE_F_OVERRIDE : 0);
+                            override ? NEIGH_UPDATE_F_OVERRIDE : 0, 0);
                neigh_release(n);
        }
 
@@ -1033,7 +1033,7 @@ static int arp_req_set(struct net *net, struct arpreq *r,
                err = neigh_update(neigh, (r->arp_flags & ATF_COM) ?
                                   r->arp_ha.sa_data : NULL, state,
                                   NEIGH_UPDATE_F_OVERRIDE |
-                                  NEIGH_UPDATE_F_ADMIN);
+                                  NEIGH_UPDATE_F_ADMIN, 0);
                neigh_release(neigh);
        }
        return err;
@@ -1084,7 +1084,7 @@ static int arp_invalidate(struct net_device *dev, __be32 ip)
                if (neigh->nud_state & ~NUD_NOARP)
                        err = neigh_update(neigh, NULL, NUD_FAILED,
                                           NEIGH_UPDATE_F_OVERRIDE|
-                                          NEIGH_UPDATE_F_ADMIN);
+                                          NEIGH_UPDATE_F_ADMIN, 0);
                neigh_release(neigh);
        }
 
index cebedd545e5e2863afcfe116309725e2cd57206c..6d3602ec640c7ae620692527000f7b03556b0ef7 100644 (file)
@@ -1192,6 +1192,18 @@ out:
        return done;
 }
 
+static __be32 in_dev_select_addr(const struct in_device *in_dev,
+                                int scope)
+{
+       for_primary_ifa(in_dev) {
+               if (ifa->ifa_scope != RT_SCOPE_LINK &&
+                   ifa->ifa_scope <= scope)
+                       return ifa->ifa_local;
+       } endfor_ifa(in_dev);
+
+       return 0;
+}
+
 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
 {
        __be32 addr = 0;
@@ -1228,13 +1240,9 @@ no_in_dev:
        if (master_idx &&
            (dev = dev_get_by_index_rcu(net, master_idx)) &&
            (in_dev = __in_dev_get_rcu(dev))) {
-               for_primary_ifa(in_dev) {
-                       if (ifa->ifa_scope != RT_SCOPE_LINK &&
-                           ifa->ifa_scope <= scope) {
-                               addr = ifa->ifa_local;
-                               goto out_unlock;
-                       }
-               } endfor_ifa(in_dev);
+               addr = in_dev_select_addr(in_dev, scope);
+               if (addr)
+                       goto out_unlock;
        }
 
        /* Not loopback addresses on loopback should be preferred
@@ -1249,13 +1257,9 @@ no_in_dev:
                if (!in_dev)
                        continue;
 
-               for_primary_ifa(in_dev) {
-                       if (ifa->ifa_scope != RT_SCOPE_LINK &&
-                           ifa->ifa_scope <= scope) {
-                               addr = ifa->ifa_local;
-                               goto out_unlock;
-                       }
-               } endfor_ifa(in_dev);
+               addr = in_dev_select_addr(in_dev, scope);
+               if (addr)
+                       goto out_unlock;
        }
 out_unlock:
        rcu_read_unlock();
@@ -1798,6 +1802,9 @@ static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
        if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
                goto nla_put_failure;
 
+       if (!devconf)
+               goto out;
+
        if ((all || type == NETCONFA_FORWARDING) &&
            nla_put_s32(skb, NETCONFA_FORWARDING,
                        IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
@@ -1819,6 +1826,7 @@ static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
                        IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
                goto nla_put_failure;
 
+out:
        nlmsg_end(skb, nlh);
        return 0;
 
@@ -1827,8 +1835,8 @@ nla_put_failure:
        return -EMSGSIZE;
 }
 
-void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
-                                struct ipv4_devconf *devconf)
+void inet_netconf_notify_devconf(struct net *net, int event, int type,
+                                int ifindex, struct ipv4_devconf *devconf)
 {
        struct sk_buff *skb;
        int err = -ENOBUFS;
@@ -1838,7 +1846,7 @@ void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
                goto errout;
 
        err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
-                                       RTM_NEWNETCONF, 0, type);
+                                       event, 0, type);
        if (err < 0) {
                /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
                WARN_ON(err == -EMSGSIZE);
@@ -2017,10 +2025,12 @@ static void inet_forward_change(struct net *net)
 
        IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
        IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
-       inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
+       inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
+                                   NETCONFA_FORWARDING,
                                    NETCONFA_IFINDEX_ALL,
                                    net->ipv4.devconf_all);
-       inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
+       inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
+                                   NETCONFA_FORWARDING,
                                    NETCONFA_IFINDEX_DEFAULT,
                                    net->ipv4.devconf_dflt);
 
@@ -2033,7 +2043,8 @@ static void inet_forward_change(struct net *net)
                in_dev = __in_dev_get_rtnl(dev);
                if (in_dev) {
                        IN_DEV_CONF_SET(in_dev, FORWARDING, on);
-                       inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
+                       inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
+                                                   NETCONFA_FORWARDING,
                                                    dev->ifindex, &in_dev->cnf);
                }
        }
@@ -2078,19 +2089,22 @@ static int devinet_conf_proc(struct ctl_table *ctl, int write,
                if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
                    new_value != old_value) {
                        ifindex = devinet_conf_ifindex(net, cnf);
-                       inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
+                       inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
+                                                   NETCONFA_RP_FILTER,
                                                    ifindex, cnf);
                }
                if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
                    new_value != old_value) {
                        ifindex = devinet_conf_ifindex(net, cnf);
-                       inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
+                       inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
+                                                   NETCONFA_PROXY_NEIGH,
                                                    ifindex, cnf);
                }
                if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
                    new_value != old_value) {
                        ifindex = devinet_conf_ifindex(net, cnf);
-                       inet_netconf_notify_devconf(net, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
+                       inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
+                                                   NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
                                                    ifindex, cnf);
                }
        }
@@ -2125,7 +2139,7 @@ static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
                                        container_of(cnf, struct in_device, cnf);
                                if (*valp)
                                        dev_disable_lro(idev->dev);
-                               inet_netconf_notify_devconf(net,
+                               inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
                                                            NETCONFA_FORWARDING,
                                                            idev->dev->ifindex,
                                                            cnf);
@@ -2133,7 +2147,8 @@ static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
                        rtnl_unlock();
                        rt_cache_flush(net);
                } else
-                       inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
+                       inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
+                                                   NETCONFA_FORWARDING,
                                                    NETCONFA_IFINDEX_DEFAULT,
                                                    net->ipv4.devconf_dflt);
        }
@@ -2255,7 +2270,8 @@ static int __devinet_sysctl_register(struct net *net, char *dev_name,
 
        p->sysctl = t;
 
-       inet_netconf_notify_devconf(net, NETCONFA_ALL, ifindex, p);
+       inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
+                                   ifindex, p);
        return 0;
 
 free:
@@ -2264,16 +2280,18 @@ out:
        return -ENOBUFS;
 }
 
-static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
+static void __devinet_sysctl_unregister(struct net *net,
+                                       struct ipv4_devconf *cnf, int ifindex)
 {
        struct devinet_sysctl_table *t = cnf->sysctl;
 
-       if (!t)
-               return;
+       if (t) {
+               cnf->sysctl = NULL;
+               unregister_net_sysctl_table(t->sysctl_header);
+               kfree(t);
+       }
 
-       cnf->sysctl = NULL;
-       unregister_net_sysctl_table(t->sysctl_header);
-       kfree(t);
+       inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
 }
 
 static int devinet_sysctl_register(struct in_device *idev)
@@ -2295,7 +2313,9 @@ static int devinet_sysctl_register(struct in_device *idev)
 
 static void devinet_sysctl_unregister(struct in_device *idev)
 {
-       __devinet_sysctl_unregister(&idev->cnf);
+       struct net *net = dev_net(idev->dev);
+
+       __devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
        neigh_sysctl_unregister(idev->arp_parms);
 }
 
@@ -2370,9 +2390,9 @@ static __net_init int devinet_init_net(struct net *net)
 
 #ifdef CONFIG_SYSCTL
 err_reg_ctl:
-       __devinet_sysctl_unregister(dflt);
+       __devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
 err_reg_dflt:
-       __devinet_sysctl_unregister(all);
+       __devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
 err_reg_all:
        if (tbl != ctl_forward_entry)
                kfree(tbl);
@@ -2394,8 +2414,10 @@ static __net_exit void devinet_exit_net(struct net *net)
 
        tbl = net->ipv4.forw_hdr->ctl_table_arg;
        unregister_net_sysctl_table(net->ipv4.forw_hdr);
-       __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
-       __devinet_sysctl_unregister(net->ipv4.devconf_all);
+       __devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
+                                   NETCONFA_IFINDEX_DEFAULT);
+       __devinet_sysctl_unregister(net, net->ipv4.devconf_all,
+                                   NETCONFA_IFINDEX_ALL);
        kfree(tbl);
 #endif
        kfree(net->ipv4.devconf_dflt);
index 42bfd08109dd78ab509493e8d2205d72845bb3eb..8f2133ffc2ff1b94871408a5f934cb938d3462b5 100644 (file)
@@ -1083,7 +1083,8 @@ static void nl_fib_input(struct sk_buff *skb)
 
        net = sock_net(skb->sk);
        nlh = nlmsg_hdr(skb);
-       if (skb->len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len ||
+       if (skb->len < nlmsg_total_size(sizeof(*frn)) ||
+           skb->len < nlh->nlmsg_len ||
            nlmsg_len(nlh) < sizeof(*frn))
                return;
 
diff --git a/net/ipv4/fib_notifier.c b/net/ipv4/fib_notifier.c
new file mode 100644 (file)
index 0000000..e0714d9
--- /dev/null
@@ -0,0 +1,86 @@
+#include <linux/rtnetlink.h>
+#include <linux/notifier.h>
+#include <linux/rcupdate.h>
+#include <linux/kernel.h>
+#include <net/net_namespace.h>
+#include <net/netns/ipv4.h>
+#include <net/ip_fib.h>
+
+static ATOMIC_NOTIFIER_HEAD(fib_chain);
+
+int call_fib_notifier(struct notifier_block *nb, struct net *net,
+                     enum fib_event_type event_type,
+                     struct fib_notifier_info *info)
+{
+       info->net = net;
+       return nb->notifier_call(nb, event_type, info);
+}
+
+int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
+                      struct fib_notifier_info *info)
+{
+       net->ipv4.fib_seq++;
+       info->net = net;
+       return atomic_notifier_call_chain(&fib_chain, event_type, info);
+}
+
+static unsigned int fib_seq_sum(void)
+{
+       unsigned int fib_seq = 0;
+       struct net *net;
+
+       rtnl_lock();
+       for_each_net(net)
+               fib_seq += net->ipv4.fib_seq;
+       rtnl_unlock();
+
+       return fib_seq;
+}
+
+static bool fib_dump_is_consistent(struct notifier_block *nb,
+                                  void (*cb)(struct notifier_block *nb),
+                                  unsigned int fib_seq)
+{
+       atomic_notifier_chain_register(&fib_chain, nb);
+       if (fib_seq == fib_seq_sum())
+               return true;
+       atomic_notifier_chain_unregister(&fib_chain, nb);
+       if (cb)
+               cb(nb);
+       return false;
+}
+
+#define FIB_DUMP_MAX_RETRIES 5
+int register_fib_notifier(struct notifier_block *nb,
+                         void (*cb)(struct notifier_block *nb))
+{
+       int retries = 0;
+
+       do {
+               unsigned int fib_seq = fib_seq_sum();
+               struct net *net;
+
+               /* Mutex semantics guarantee that every change done to
+                * FIB tries before we read the change sequence counter
+                * is now visible to us.
+                */
+               rcu_read_lock();
+               for_each_net_rcu(net) {
+                       fib_rules_notify(net, nb);
+                       fib_notify(net, nb);
+               }
+               rcu_read_unlock();
+
+               if (fib_dump_is_consistent(nb, cb, fib_seq))
+                       return 0;
+       } while (++retries < FIB_DUMP_MAX_RETRIES);
+
+       return -EBUSY;
+}
+EXPORT_SYMBOL(register_fib_notifier);
+
+int unregister_fib_notifier(struct notifier_block *nb)
+{
+       return atomic_notifier_chain_unregister(&fib_chain, nb);
+}
+EXPORT_SYMBOL(unregister_fib_notifier);
index 2e50062f642d61bdc0c0893de4e4ff84b5be6c8d..778ecf977eb2bd7b7b3808691aaf818fc4bd680d 100644 (file)
@@ -47,6 +47,27 @@ struct fib4_rule {
 #endif
 };
 
+static bool fib4_rule_matchall(const struct fib_rule *rule)
+{
+       struct fib4_rule *r = container_of(rule, struct fib4_rule, common);
+
+       if (r->dst_len || r->src_len || r->tos)
+               return false;
+       return fib_rule_matchall(rule);
+}
+
+bool fib4_rule_default(const struct fib_rule *rule)
+{
+       if (!fib4_rule_matchall(rule) || rule->action != FR_ACT_TO_TBL ||
+           rule->l3mdev)
+               return false;
+       if (rule->table != RT_TABLE_LOCAL && rule->table != RT_TABLE_MAIN &&
+           rule->table != RT_TABLE_DEFAULT)
+               return false;
+       return true;
+}
+EXPORT_SYMBOL_GPL(fib4_rule_default);
+
 int __fib_lookup(struct net *net, struct flowi4 *flp,
                 struct fib_result *res, unsigned int flags)
 {
@@ -164,12 +185,36 @@ static struct fib_table *fib_empty_table(struct net *net)
        return NULL;
 }
 
+static int call_fib_rule_notifier(struct notifier_block *nb, struct net *net,
+                                 enum fib_event_type event_type,
+                                 struct fib_rule *rule)
+{
+       struct fib_rule_notifier_info info = {
+               .rule = rule,
+       };
+
+       return call_fib_notifier(nb, net, event_type, &info.info);
+}
+
 static int call_fib_rule_notifiers(struct net *net,
-                                  enum fib_event_type event_type)
+                                  enum fib_event_type event_type,
+                                  struct fib_rule *rule)
+{
+       struct fib_rule_notifier_info info = {
+               .rule = rule,
+       };
+
+       return call_fib_notifiers(net, event_type, &info.info);
+}
+
+/* Called with rcu_read_lock() */
+void fib_rules_notify(struct net *net, struct notifier_block *nb)
 {
-       struct fib_notifier_info info;
+       struct fib_rules_ops *ops = net->ipv4.rules_ops;
+       struct fib_rule *rule;
 
-       return call_fib_notifiers(net, event_type, &info);
+       list_for_each_entry_rcu(rule, &ops->rules_list, list)
+               call_fib_rule_notifier(nb, net, FIB_EVENT_RULE_ADD, rule);
 }
 
 static const struct nla_policy fib4_rule_policy[FRA_MAX+1] = {
@@ -228,7 +273,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
        rule4->tos = frh->tos;
 
        net->ipv4.fib_has_custom_rules = true;
-       call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD);
+       call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule);
 
        err = 0;
 errout:
@@ -250,7 +295,7 @@ static int fib4_rule_delete(struct fib_rule *rule)
                net->ipv4.fib_num_tclassid_users--;
 #endif
        net->ipv4.fib_has_custom_rules = true;
-       call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL);
+       call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule);
 errout:
        return err;
 }
index 317026a39cfa2b49bf06182d89a11af0fa2688af..da449ddb8cc172bd9091c00057a69a095f98b56d 100644 (file)
@@ -57,7 +57,6 @@ static unsigned int fib_info_cnt;
 static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-u32 fib_multipath_secret __read_mostly;
 
 #define for_nexthops(fi) {                                             \
        int nhsel; const struct fib_nh *nh;                             \
@@ -576,9 +575,6 @@ static void fib_rebalance(struct fib_info *fi)
 
                atomic_set(&nexthop_nh->nh_upper_bound, upper_bound);
        } endfor_nexthops(fi);
-
-       net_get_random_once(&fib_multipath_secret,
-                           sizeof(fib_multipath_secret));
 }
 
 static inline void fib_add_weight(struct fib_info *fi,
@@ -1641,7 +1637,7 @@ void fib_select_multipath(struct fib_result *res, int hash)
 #endif
 
 void fib_select_path(struct net *net, struct fib_result *res,
-                    struct flowi4 *fl4, int mp_hash)
+                    struct flowi4 *fl4, const struct sk_buff *skb)
 {
        bool oif_check;
 
@@ -1650,10 +1646,9 @@ void fib_select_path(struct net *net, struct fib_result *res,
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
        if (res->fi->fib_nhs > 1 && oif_check) {
-               if (mp_hash < 0)
-                       mp_hash = get_hash_from_flowi4(fl4) >> 1;
+               int h = fib_multipath_hash(res->fi, fl4, skb);
 
-               fib_select_multipath(res, mp_hash);
+               fib_select_multipath(res, h);
        }
        else
 #endif
index 2f0d8233950faeac91f287644bc9476f19f74578..1201409ba1dcb18ee028003b065410b87bf4a602 100644 (file)
 #include <trace/events/fib.h>
 #include "fib_lookup.h"
 
-static unsigned int fib_seq_sum(void)
-{
-       unsigned int fib_seq = 0;
-       struct net *net;
-
-       rtnl_lock();
-       for_each_net(net)
-               fib_seq += net->ipv4.fib_seq;
-       rtnl_unlock();
-
-       return fib_seq;
-}
-
-static ATOMIC_NOTIFIER_HEAD(fib_chain);
-
-static int call_fib_notifier(struct notifier_block *nb, struct net *net,
-                            enum fib_event_type event_type,
-                            struct fib_notifier_info *info)
-{
-       info->net = net;
-       return nb->notifier_call(nb, event_type, info);
-}
-
-static void fib_rules_notify(struct net *net, struct notifier_block *nb,
-                            enum fib_event_type event_type)
-{
-#ifdef CONFIG_IP_MULTIPLE_TABLES
-       struct fib_notifier_info info;
-
-       if (net->ipv4.fib_has_custom_rules)
-               call_fib_notifier(nb, net, event_type, &info);
-#endif
-}
-
-static void fib_notify(struct net *net, struct notifier_block *nb,
-                      enum fib_event_type event_type);
-
 static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net,
                                   enum fib_event_type event_type, u32 dst,
                                   int dst_len, struct fib_info *fi,
@@ -137,62 +100,6 @@ static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net,
        return call_fib_notifier(nb, net, event_type, &info.info);
 }
 
-static bool fib_dump_is_consistent(struct notifier_block *nb,
-                                  void (*cb)(struct notifier_block *nb),
-                                  unsigned int fib_seq)
-{
-       atomic_notifier_chain_register(&fib_chain, nb);
-       if (fib_seq == fib_seq_sum())
-               return true;
-       atomic_notifier_chain_unregister(&fib_chain, nb);
-       if (cb)
-               cb(nb);
-       return false;
-}
-
-#define FIB_DUMP_MAX_RETRIES 5
-int register_fib_notifier(struct notifier_block *nb,
-                         void (*cb)(struct notifier_block *nb))
-{
-       int retries = 0;
-
-       do {
-               unsigned int fib_seq = fib_seq_sum();
-               struct net *net;
-
-               /* Mutex semantics guarantee that every change done to
-                * FIB tries before we read the change sequence counter
-                * is now visible to us.
-                */
-               rcu_read_lock();
-               for_each_net_rcu(net) {
-                       fib_rules_notify(net, nb, FIB_EVENT_RULE_ADD);
-                       fib_notify(net, nb, FIB_EVENT_ENTRY_ADD);
-               }
-               rcu_read_unlock();
-
-               if (fib_dump_is_consistent(nb, cb, fib_seq))
-                       return 0;
-       } while (++retries < FIB_DUMP_MAX_RETRIES);
-
-       return -EBUSY;
-}
-EXPORT_SYMBOL(register_fib_notifier);
-
-int unregister_fib_notifier(struct notifier_block *nb)
-{
-       return atomic_notifier_chain_unregister(&fib_chain, nb);
-}
-EXPORT_SYMBOL(unregister_fib_notifier);
-
-int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
-                      struct fib_notifier_info *info)
-{
-       net->ipv4.fib_seq++;
-       info->net = net;
-       return atomic_notifier_call_chain(&fib_chain, event_type, info);
-}
-
 static int call_fib_entry_notifiers(struct net *net,
                                    enum fib_event_type event_type, u32 dst,
                                    int dst_len, struct fib_info *fi,
@@ -1995,8 +1902,7 @@ int fib_table_flush(struct net *net, struct fib_table *tb)
 }
 
 static void fib_leaf_notify(struct net *net, struct key_vector *l,
-                           struct fib_table *tb, struct notifier_block *nb,
-                           enum fib_event_type event_type)
+                           struct fib_table *tb, struct notifier_block *nb)
 {
        struct fib_alias *fa;
 
@@ -2012,22 +1918,21 @@ static void fib_leaf_notify(struct net *net, struct key_vector *l,
                if (tb->tb_id != fa->tb_id)
                        continue;
 
-               call_fib_entry_notifier(nb, net, event_type, l->key,
+               call_fib_entry_notifier(nb, net, FIB_EVENT_ENTRY_ADD, l->key,
                                        KEYLENGTH - fa->fa_slen, fi, fa->fa_tos,
                                        fa->fa_type, fa->tb_id);
        }
 }
 
 static void fib_table_notify(struct net *net, struct fib_table *tb,
-                            struct notifier_block *nb,
-                            enum fib_event_type event_type)
+                            struct notifier_block *nb)
 {
        struct trie *t = (struct trie *)tb->tb_data;
        struct key_vector *l, *tp = t->kv;
        t_key key = 0;
 
        while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
-               fib_leaf_notify(net, l, tb, nb, event_type);
+               fib_leaf_notify(net, l, tb, nb);
 
                key = l->key + 1;
                /* stop in case of wrap around */
@@ -2036,8 +1941,7 @@ static void fib_table_notify(struct net *net, struct fib_table *tb,
        }
 }
 
-static void fib_notify(struct net *net, struct notifier_block *nb,
-                      enum fib_event_type event_type)
+void fib_notify(struct net *net, struct notifier_block *nb)
 {
        unsigned int h;
 
@@ -2046,7 +1950,7 @@ static void fib_notify(struct net *net, struct notifier_block *nb,
                struct fib_table *tb;
 
                hlist_for_each_entry_rcu(tb, head, tb_hlist)
-                       fib_table_notify(net, tb, nb, event_type);
+                       fib_table_notify(net, tb, nb);
        }
 }
 
index fc310db2708bf6c9e96befe413e89ac931818f74..43318b5f56474bc15253e74e156962dd2c8df01f 100644 (file)
@@ -464,22 +464,6 @@ out_bh_enable:
        local_bh_enable();
 }
 
-#ifdef CONFIG_IP_ROUTE_MULTIPATH
-
-/* Source and destination is swapped. See ip_multipath_icmp_hash */
-static int icmp_multipath_hash_skb(const struct sk_buff *skb)
-{
-       const struct iphdr *iph = ip_hdr(skb);
-
-       return fib_multipath_hash(iph->daddr, iph->saddr);
-}
-
-#else
-
-#define icmp_multipath_hash_skb(skb) (-1)
-
-#endif
-
 static struct rtable *icmp_route_lookup(struct net *net,
                                        struct flowi4 *fl4,
                                        struct sk_buff *skb_in,
@@ -505,8 +489,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
        fl4->flowi4_oif = l3mdev_master_ifindex(skb_dst(skb_in)->dev);
 
        security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4));
-       rt = __ip_route_output_key_hash(net, fl4,
-                                       icmp_multipath_hash_skb(skb_in));
+       rt = __ip_route_output_key_hash(net, fl4, skb_in);
        if (IS_ERR(rt))
                return rt;
 
index bbe7f72db9c157ba2d6c5292637c2f58ad39a123..b3cdeec85f1f2c612c362590e828f50596a5c247 100644 (file)
@@ -198,6 +198,7 @@ static void ip_expire(unsigned long arg)
        qp = container_of((struct inet_frag_queue *) arg, struct ipq, q);
        net = container_of(qp->q.net, struct net, ipv4.frags);
 
+       rcu_read_lock();
        spin_lock(&qp->q.lock);
 
        if (qp->q.flags & INET_FRAG_COMPLETE)
@@ -207,7 +208,7 @@ static void ip_expire(unsigned long arg)
        __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
 
        if (!inet_frag_evicting(&qp->q)) {
-               struct sk_buff *head = qp->q.fragments;
+               struct sk_buff *clone, *head = qp->q.fragments;
                const struct iphdr *iph;
                int err;
 
@@ -216,32 +217,40 @@ static void ip_expire(unsigned long arg)
                if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !qp->q.fragments)
                        goto out;
 
-               rcu_read_lock();
                head->dev = dev_get_by_index_rcu(net, qp->iif);
                if (!head->dev)
-                       goto out_rcu_unlock;
+                       goto out;
+
 
                /* skb has no dst, perform route lookup again */
                iph = ip_hdr(head);
                err = ip_route_input_noref(head, iph->daddr, iph->saddr,
                                           iph->tos, head->dev);
                if (err)
-                       goto out_rcu_unlock;
+                       goto out;
 
                /* Only an end host needs to send an ICMP
                 * "Fragment Reassembly Timeout" message, per RFC792.
                 */
                if (frag_expire_skip_icmp(qp->user) &&
                    (skb_rtable(head)->rt_type != RTN_LOCAL))
-                       goto out_rcu_unlock;
+                       goto out;
+
+               clone = skb_clone(head, GFP_ATOMIC);
 
                /* Send an ICMP "Fragment Reassembly Timeout" message. */
-               icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
-out_rcu_unlock:
-               rcu_read_unlock();
+               if (clone) {
+                       spin_unlock(&qp->q.lock);
+                       icmp_send(clone, ICMP_TIME_EXCEEDED,
+                                 ICMP_EXC_FRAGTIME, 0);
+                       consume_skb(clone);
+                       goto out_rcu_unlock;
+               }
        }
 out:
        spin_unlock(&qp->q.lock);
+out_rcu_unlock:
+       rcu_read_unlock();
        ipq_put(qp);
 }
 
index d6feabb0351607f282e1f78f159c0ccb88bcec96..fa2dc8f692c631f1ff7fe814c3ee27f0de2a41d8 100644 (file)
@@ -313,6 +313,7 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
        const struct iphdr *iph = ip_hdr(skb);
        struct rtable *rt;
        struct net_device *dev = skb->dev;
+       void (*edemux)(struct sk_buff *skb);
 
        /* if ingress device is enslaved to an L3 master device pass the
         * skb to its handler for processing
@@ -329,8 +330,8 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
                int protocol = iph->protocol;
 
                ipprot = rcu_dereference(inet_protos[protocol]);
-               if (ipprot && ipprot->early_demux) {
-                       ipprot->early_demux(skb);
+               if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) {
+                       edemux(skb);
                        /* must reload iph, skb->head might have changed */
                        iph = ip_hdr(skb);
                }
index fd9f34bbd7408a0e9b0342ec6512c69cc30edc39..9def8ed31c760fa1e3b593484a286bbb9ceb3918 100644 (file)
@@ -57,6 +57,7 @@
 #include <linux/export.h>
 #include <net/net_namespace.h>
 #include <net/arp.h>
+#include <net/dsa.h>
 #include <net/ip.h>
 #include <net/ipconfig.h>
 #include <net/route.h>
index c0317c940bcdc303015f500b52198e0862440e17..5bca64fc71b717b95f196866adc3c6d951304eed 100644 (file)
@@ -631,7 +631,7 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify,
        in_dev = __in_dev_get_rtnl(dev);
        if (in_dev) {
                IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
-               inet_netconf_notify_devconf(dev_net(dev),
+               inet_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
                                            NETCONFA_MC_FORWARDING,
                                            dev->ifindex, &in_dev->cnf);
                ip_rt_multicast_event(in_dev);
@@ -820,8 +820,8 @@ static int vif_add(struct net *net, struct mr_table *mrt,
                return -EADDRNOTAVAIL;
        }
        IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
-       inet_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING, dev->ifindex,
-                                   &in_dev->cnf);
+       inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_MC_FORWARDING,
+                                   dev->ifindex, &in_dev->cnf);
        ip_rt_multicast_event(in_dev);
 
        /* Fill in the VIF structures */
@@ -1282,7 +1282,8 @@ static void mrtsock_destruct(struct sock *sk)
        ipmr_for_each_table(mrt, net) {
                if (sk == rtnl_dereference(mrt->mroute_sk)) {
                        IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
-                       inet_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
+                       inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
+                                                   NETCONFA_MC_FORWARDING,
                                                    NETCONFA_IFINDEX_ALL,
                                                    net->ipv4.devconf_all);
                        RCU_INIT_POINTER(mrt->mroute_sk, NULL);
@@ -1344,7 +1345,8 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
                if (ret == 0) {
                        rcu_assign_pointer(mrt->mroute_sk, sk);
                        IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
-                       inet_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
+                       inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
+                                                   NETCONFA_MC_FORWARDING,
                                                    NETCONFA_IFINDEX_ALL,
                                                    net->ipv4.devconf_all);
                }
index 6241a81fd7f5a3df8fb3cf251bfdd407dda6a1f6..f17dab1dee6e171148a386302081188c0e83ee5d 100644 (file)
@@ -562,8 +562,6 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
                    XT_ERROR_TARGET) == 0)
                        ++newinfo->stacksize;
        }
-       if (ret != 0)
-               goto out_free;
 
        ret = -EINVAL;
        if (i != repl->num_entries)
index 52f26459efc345a8a0c00d356306fb5fd398547e..fcbdc0c49b0e514d338bb48a84910b0ebf7ba6bd 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/icmp.h>
 #include <linux/if_arp.h>
 #include <linux/seq_file.h>
+#include <linux/refcount.h>
 #include <linux/netfilter_arp.h>
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
@@ -40,8 +41,8 @@ MODULE_DESCRIPTION("Xtables: CLUSTERIP target");
 
 struct clusterip_config {
        struct list_head list;                  /* list of all configs */
-       atomic_t refcount;                      /* reference count */
-       atomic_t entries;                       /* number of entries/rules
+       refcount_t refcount;                    /* reference count */
+       refcount_t entries;                     /* number of entries/rules
                                                 * referencing us */
 
        __be32 clusterip;                       /* the IP address */
@@ -77,7 +78,7 @@ struct clusterip_net {
 static inline void
 clusterip_config_get(struct clusterip_config *c)
 {
-       atomic_inc(&c->refcount);
+       refcount_inc(&c->refcount);
 }
 
 
@@ -89,7 +90,7 @@ static void clusterip_config_rcu_free(struct rcu_head *head)
 static inline void
 clusterip_config_put(struct clusterip_config *c)
 {
-       if (atomic_dec_and_test(&c->refcount))
+       if (refcount_dec_and_test(&c->refcount))
                call_rcu_bh(&c->rcu, clusterip_config_rcu_free);
 }
 
@@ -103,7 +104,7 @@ clusterip_config_entry_put(struct clusterip_config *c)
        struct clusterip_net *cn = net_generic(net, clusterip_net_id);
 
        local_bh_disable();
-       if (atomic_dec_and_lock(&c->entries, &cn->lock)) {
+       if (refcount_dec_and_lock(&c->entries, &cn->lock)) {
                list_del_rcu(&c->list);
                spin_unlock(&cn->lock);
                local_bh_enable();
@@ -149,10 +150,10 @@ clusterip_config_find_get(struct net *net, __be32 clusterip, int entry)
                        c = NULL;
                else
 #endif
-               if (unlikely(!atomic_inc_not_zero(&c->refcount)))
+               if (unlikely(!refcount_inc_not_zero(&c->refcount)))
                        c = NULL;
                else if (entry)
-                       atomic_inc(&c->entries);
+                       refcount_inc(&c->entries);
        }
        rcu_read_unlock_bh();
 
@@ -188,8 +189,8 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip,
        clusterip_config_init_nodelist(c, i);
        c->hash_mode = i->hash_mode;
        c->hash_initval = i->hash_initval;
-       atomic_set(&c->refcount, 1);
-       atomic_set(&c->entries, 1);
+       refcount_set(&c->refcount, 1);
+       refcount_set(&c->entries, 1);
 
        spin_lock_bh(&cn->lock);
        if (__clusterip_config_find(net, ip)) {
index c9b52c361da2e6acc746c2de86d8c7f3af0a9b39..ef49989c93b1918ab35b1b752981e8c22357c118 100644 (file)
@@ -998,18 +998,6 @@ err_id_free:
  *
  *****************************************************************************/
 
-static void hex_dump(const unsigned char *buf, size_t len)
-{
-       size_t i;
-
-       for (i = 0; i < len; i++) {
-               if (i && !(i % 16))
-                       printk("\n");
-               printk("%02x ", *(buf + i));
-       }
-       printk("\n");
-}
-
 /*
  * Parse and mangle SNMP message according to mapping.
  * (And this is the fucking 'basic' method).
@@ -1026,7 +1014,8 @@ static int snmp_parse_mangle(unsigned char *msg,
        struct snmp_object *obj;
 
        if (debug > 1)
-               hex_dump(msg, len);
+               print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_NONE, 16, 1,
+                              msg, len, 0);
 
        asn1_open(&ctx, msg, len);
 
index 146d86105183e1a456a0f17ed6bb5371aa1e8f76..7cd8d0d918f82e275e0ecd31c1cea8ec8fcf345d 100644 (file)
@@ -104,7 +104,6 @@ EXPORT_SYMBOL_GPL(nf_reject_ip_tcphdr_put);
 void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook)
 {
        struct sk_buff *nskb;
-       const struct iphdr *oiph;
        struct iphdr *niph;
        const struct tcphdr *oth;
        struct tcphdr _oth;
@@ -116,8 +115,6 @@ void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook)
        if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
                return;
 
-       oiph = ip_hdr(oldskb);
-
        nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) +
                         LL_MAX_HEADER, GFP_ATOMIC);
        if (!nskb)
index 2981291910dd2cac2d508fcde89083afc22affd4..f4e4462cb5bb1b877fac32d7718ead86e97e91f2 100644 (file)
@@ -90,7 +90,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
 
        if (nft_hook(pkt) == NF_INET_PRE_ROUTING &&
            nft_fib_is_loopback(pkt->skb, nft_in(pkt))) {
-               nft_fib_store_result(dest, priv->result, pkt,
+               nft_fib_store_result(dest, priv, pkt,
                                     nft_in(pkt)->ifindex);
                return;
        }
@@ -99,7 +99,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
        if (ipv4_is_zeronet(iph->saddr)) {
                if (ipv4_is_lbcast(iph->daddr) ||
                    ipv4_is_local_multicast(iph->daddr)) {
-                       nft_fib_store_result(dest, priv->result, pkt,
+                       nft_fib_store_result(dest, priv, pkt,
                                             get_ifindex(pkt->skb->dev));
                        return;
                }
index 69cf49e8356d0184f774840c9dc96560f2ae2f2b..4ccbf464d1acf5f433dd2a0768691f5d22e3033d 100644 (file)
@@ -199,7 +199,6 @@ static const struct snmp_mib snmp4_net_list[] = {
        SNMP_MIB_ITEM("TW", LINUX_MIB_TIMEWAITED),
        SNMP_MIB_ITEM("TWRecycled", LINUX_MIB_TIMEWAITRECYCLED),
        SNMP_MIB_ITEM("TWKilled", LINUX_MIB_TIMEWAITKILLED),
-       SNMP_MIB_ITEM("PAWSPassive", LINUX_MIB_PAWSPASSIVEREJECTED),
        SNMP_MIB_ITEM("PAWSActive", LINUX_MIB_PAWSACTIVEREJECTED),
        SNMP_MIB_ITEM("PAWSEstab", LINUX_MIB_PAWSESTABREJECTED),
        SNMP_MIB_ITEM("DelayedACKs", LINUX_MIB_DELAYEDACKS),
index 4b7c0ec65251ef40577a2d5e360fcbaed391a566..32a691b7ce2c7e79eab6491b52457a11e666f7d3 100644 (file)
@@ -28,7 +28,7 @@
 #include <linux/spinlock.h>
 #include <net/protocol.h>
 
-const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly;
+struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly;
 const struct net_offload __rcu *inet_offloads[MAX_INET_PROTOS] __read_mostly;
 EXPORT_SYMBOL(inet_offloads);
 
index 8471dd116771462d149e1da2807e446b69b74bcc..5dda1ef81c7e10bbc41e610aacf6bad15ba05b45 100644 (file)
@@ -1734,45 +1734,97 @@ out:
 }
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-
 /* To make ICMP packets follow the right flow, the multipath hash is
- * calculated from the inner IP addresses in reverse order.
+ * calculated from the inner IP addresses.
  */
-static int ip_multipath_icmp_hash(struct sk_buff *skb)
+static void ip_multipath_l3_keys(const struct sk_buff *skb,
+                                struct flow_keys *hash_keys)
 {
        const struct iphdr *outer_iph = ip_hdr(skb);
-       struct icmphdr _icmph;
+       const struct iphdr *inner_iph;
        const struct icmphdr *icmph;
        struct iphdr _inner_iph;
-       const struct iphdr *inner_iph;
+       struct icmphdr _icmph;
+
+       hash_keys->addrs.v4addrs.src = outer_iph->saddr;
+       hash_keys->addrs.v4addrs.dst = outer_iph->daddr;
+       if (likely(outer_iph->protocol != IPPROTO_ICMP))
+               return;
 
        if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0))
-               goto standard_hash;
+               return;
 
        icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph),
                                   &_icmph);
        if (!icmph)
-               goto standard_hash;
+               return;
 
        if (icmph->type != ICMP_DEST_UNREACH &&
            icmph->type != ICMP_REDIRECT &&
            icmph->type != ICMP_TIME_EXCEEDED &&
-           icmph->type != ICMP_PARAMETERPROB) {
-               goto standard_hash;
-       }
+           icmph->type != ICMP_PARAMETERPROB)
+               return;
 
        inner_iph = skb_header_pointer(skb,
                                       outer_iph->ihl * 4 + sizeof(_icmph),
                                       sizeof(_inner_iph), &_inner_iph);
        if (!inner_iph)
-               goto standard_hash;
+               return;
+       hash_keys->addrs.v4addrs.src = inner_iph->saddr;
+       hash_keys->addrs.v4addrs.dst = inner_iph->daddr;
+}
 
-       return fib_multipath_hash(inner_iph->daddr, inner_iph->saddr);
+/* if skb is set it will be used and fl4 can be NULL */
+int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
+                      const struct sk_buff *skb)
+{
+       struct net *net = fi->fib_net;
+       struct flow_keys hash_keys;
+       u32 mhash;
 
-standard_hash:
-       return fib_multipath_hash(outer_iph->saddr, outer_iph->daddr);
-}
+       switch (net->ipv4.sysctl_fib_multipath_hash_policy) {
+       case 0:
+               memset(&hash_keys, 0, sizeof(hash_keys));
+               hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+               if (skb) {
+                       ip_multipath_l3_keys(skb, &hash_keys);
+               } else {
+                       hash_keys.addrs.v4addrs.src = fl4->saddr;
+                       hash_keys.addrs.v4addrs.dst = fl4->daddr;
+               }
+               break;
+       case 1:
+               /* skb is currently provided only when forwarding */
+               if (skb) {
+                       unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
+                       struct flow_keys keys;
+
+                       /* short-circuit if we already have L4 hash present */
+                       if (skb->l4_hash)
+                               return skb_get_hash_raw(skb) >> 1;
+                       memset(&hash_keys, 0, sizeof(hash_keys));
+                       skb_flow_dissect_flow_keys(skb, &keys, flag);
+                       hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
+                       hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
+                       hash_keys.ports.src = keys.ports.src;
+                       hash_keys.ports.dst = keys.ports.dst;
+                       hash_keys.basic.ip_proto = keys.basic.ip_proto;
+               } else {
+                       memset(&hash_keys, 0, sizeof(hash_keys));
+                       hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+                       hash_keys.addrs.v4addrs.src = fl4->saddr;
+                       hash_keys.addrs.v4addrs.dst = fl4->daddr;
+                       hash_keys.ports.src = fl4->fl4_sport;
+                       hash_keys.ports.dst = fl4->fl4_dport;
+                       hash_keys.basic.ip_proto = fl4->flowi4_proto;
+               }
+               break;
+       }
+       mhash = flow_hash_from_keys(&hash_keys);
 
+       return mhash >> 1;
+}
+EXPORT_SYMBOL_GPL(fib_multipath_hash);
 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
 
 static int ip_mkroute_input(struct sk_buff *skb,
@@ -1782,12 +1834,8 @@ static int ip_mkroute_input(struct sk_buff *skb,
 {
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
        if (res->fi && res->fi->fib_nhs > 1) {
-               int h;
+               int h = fib_multipath_hash(res->fi, NULL, skb);
 
-               if (unlikely(ip_hdr(skb)->protocol == IPPROTO_ICMP))
-                       h = ip_multipath_icmp_hash(skb);
-               else
-                       h = fib_multipath_hash(saddr, daddr);
                fib_select_multipath(res, h);
        }
 #endif
@@ -2203,7 +2251,7 @@ add:
  */
 
 struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
-                                         int mp_hash)
+                                         const struct sk_buff *skb)
 {
        struct net_device *dev_out = NULL;
        __u8 tos = RT_FL_TOS(fl4);
@@ -2365,7 +2413,7 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
                goto make_route;
        }
 
-       fib_select_path(net, &res, fl4, mp_hash);
+       fib_select_path(net, &res, fl4, skb);
 
        dev_out = FIB_RES_DEV(res);
        fl4->flowi4_oif = dev_out->ifindex;
index d6880a6149ee80c6c75f4fe75b46a9d18d204d5d..6fb25693c00b92cbf881a13b06f2276b288853b1 100644 (file)
@@ -24,6 +24,7 @@
 #include <net/cipso_ipv4.h>
 #include <net/inet_frag.h>
 #include <net/ping.h>
+#include <net/protocol.h>
 
 static int zero;
 static int one = 1;
@@ -294,6 +295,58 @@ bad_key:
        return ret;
 }
 
+static void proc_configure_early_demux(int enabled, int protocol)
+{
+       struct net_protocol *ipprot;
+#if IS_ENABLED(CONFIG_IPV6)
+       struct inet6_protocol *ip6prot;
+#endif
+
+       ipprot = rcu_dereference(inet_protos[protocol]);
+       if (ipprot)
+               ipprot->early_demux = enabled ? ipprot->early_demux_handler :
+                                               NULL;
+
+#if IS_ENABLED(CONFIG_IPV6)
+       ip6prot = rcu_dereference(inet6_protos[protocol]);
+       if (ip6prot)
+               ip6prot->early_demux = enabled ? ip6prot->early_demux_handler :
+                                                NULL;
+#endif
+}
+
+static int proc_tcp_early_demux(struct ctl_table *table, int write,
+                               void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+       int ret = 0;
+
+       ret = proc_dointvec(table, write, buffer, lenp, ppos);
+
+       if (write && !ret) {
+               int enabled = init_net.ipv4.sysctl_tcp_early_demux;
+
+               proc_configure_early_demux(enabled, IPPROTO_TCP);
+       }
+
+       return ret;
+}
+
+static int proc_udp_early_demux(struct ctl_table *table, int write,
+                               void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+       int ret = 0;
+
+       ret = proc_dointvec(table, write, buffer, lenp, ppos);
+
+       if (write && !ret) {
+               int enabled = init_net.ipv4.sysctl_udp_early_demux;
+
+               proc_configure_early_demux(enabled, IPPROTO_UDP);
+       }
+
+       return ret;
+}
+
 static struct ctl_table ipv4_table[] = {
        {
                .procname       = "tcp_timestamps",
@@ -749,6 +802,20 @@ static struct ctl_table ipv4_net_table[] = {
                .mode           = 0644,
                .proc_handler   = proc_dointvec
        },
+       {
+               .procname       = "udp_early_demux",
+               .data           = &init_net.ipv4.sysctl_udp_early_demux,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_udp_early_demux
+       },
+       {
+               .procname       = "tcp_early_demux",
+               .data           = &init_net.ipv4.sysctl_tcp_early_demux,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_tcp_early_demux
+       },
        {
                .procname       = "ip_default_ttl",
                .data           = &init_net.ipv4.sysctl_ip_default_ttl,
@@ -980,13 +1047,6 @@ static struct ctl_table ipv4_net_table[] = {
                .mode           = 0644,
                .proc_handler   = proc_dointvec
        },
-       {
-               .procname       = "tcp_tw_recycle",
-               .data           = &init_net.ipv4.tcp_death_row.sysctl_tw_recycle,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec
-       },
        {
                .procname       = "tcp_max_syn_backlog",
                .data           = &init_net.ipv4.sysctl_max_syn_backlog,
@@ -1004,6 +1064,15 @@ static struct ctl_table ipv4_net_table[] = {
                .extra1         = &zero,
                .extra2         = &one,
        },
+       {
+               .procname       = "fib_multipath_hash_policy",
+               .data           = &init_net.ipv4.sysctl_fib_multipath_hash_policy,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &zero,
+               .extra2         = &one,
+       },
 #endif
        {
                .procname       = "ip_unprivileged_port_start",
index cf4555581282c608f920254078264e36e18584c6..1665948dff8c6ce1a14dbe01e3133550179c33bb 100644 (file)
@@ -2470,7 +2470,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
                /* Values greater than interface MTU won't take effect. However
                 * at the point when this call is done we typically don't yet
                 * know which interface is going to be used */
-               if (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW) {
+               if (val && (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW)) {
                        err = -EINVAL;
                        break;
                }
@@ -2770,7 +2770,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 {
        const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */
        const struct inet_connection_sock *icsk = inet_csk(sk);
-       u32 now = tcp_time_stamp, intv;
+       u32 now, intv;
        u64 rate64;
        bool slow;
        u32 rate;
@@ -2839,6 +2839,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
        info->tcpi_retrans = tp->retrans_out;
        info->tcpi_fackets = tp->fackets_out;
 
+       now = tcp_time_stamp;
        info->tcpi_last_data_sent = jiffies_to_msecs(now - tp->lsndtime);
        info->tcpi_last_data_recv = jiffies_to_msecs(now - icsk->icsk_ack.lrcvtime);
        info->tcpi_last_ack_recv = jiffies_to_msecs(now - tp->rcv_tstamp);
index 39c393cc0fd3c17130cd5d8d8b37f31ad3aeafd9..a75c48f62e272e34a852ce1ec2523a3828a43247 100644 (file)
@@ -5541,6 +5541,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
        struct inet_connection_sock *icsk = inet_csk(sk);
 
        tcp_set_state(sk, TCP_ESTABLISHED);
+       icsk->icsk_ack.lrcvtime = tcp_time_stamp;
 
        if (skb) {
                icsk->icsk_af_ops->sk_rx_dst_set(sk, skb);
@@ -5759,7 +5760,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
                         * to stand against the temptation 8)     --ANK
                         */
                        inet_csk_schedule_ack(sk);
-                       icsk->icsk_ack.lrcvtime = tcp_time_stamp;
                        tcp_enter_quickack_mode(sk);
                        inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
                                                  TCP_DELACK_MAX, TCP_RTO_MAX);
@@ -6324,36 +6324,14 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
                goto drop_and_free;
 
        if (isn && tmp_opt.tstamp_ok)
-               af_ops->init_seq(skb, &tcp_rsk(req)->ts_off);
+               af_ops->init_seq_tsoff(skb, &tcp_rsk(req)->ts_off);
 
        if (!want_cookie && !isn) {
-               /* VJ's idea. We save last timestamp seen
-                * from the destination in peer table, when entering
-                * state TIME-WAIT, and check against it before
-                * accepting new connection request.
-                *
-                * If "isn" is not zero, this request hit alive
-                * timewait bucket, so that all the necessary checks
-                * are made in the function processing timewait state.
-                */
-               if (net->ipv4.tcp_death_row.sysctl_tw_recycle) {
-                       bool strict;
-
-                       dst = af_ops->route_req(sk, &fl, req, &strict);
-
-                       if (dst && strict &&
-                           !tcp_peer_is_proven(req, dst, true,
-                                               tmp_opt.saw_tstamp)) {
-                               NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
-                               goto drop_and_release;
-                       }
-               }
                /* Kill the following clause, if you dislike this way. */
-               else if (!net->ipv4.sysctl_tcp_syncookies &&
-                        (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
-                         (net->ipv4.sysctl_max_syn_backlog >> 2)) &&
-                        !tcp_peer_is_proven(req, dst, false,
-                                            tmp_opt.saw_tstamp)) {
+               if (!net->ipv4.sysctl_tcp_syncookies &&
+                   (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
+                    (net->ipv4.sysctl_max_syn_backlog >> 2)) &&
+                   !tcp_peer_is_proven(req, dst)) {
                        /* Without syncookies last quarter of
                         * backlog is filled with destinations,
                         * proven to be alive.
@@ -6366,10 +6344,10 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
                        goto drop_and_release;
                }
 
-               isn = af_ops->init_seq(skb, &tcp_rsk(req)->ts_off);
+               isn = af_ops->init_seq_tsoff(skb, &tcp_rsk(req)->ts_off);
        }
        if (!dst) {
-               dst = af_ops->route_req(sk, &fl, req, NULL);
+               dst = af_ops->route_req(sk, &fl, req);
                if (!dst)
                        goto drop_and_free;
        }
index 575e19dcc01763ef3fa938dea3ea51995b573163..20cbd2f07f281717c1cb4e901c4c4e22f7c46bd6 100644 (file)
@@ -94,12 +94,12 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
 struct inet_hashinfo tcp_hashinfo;
 EXPORT_SYMBOL(tcp_hashinfo);
 
-static u32 tcp_v4_init_sequence(const struct sk_buff *skb, u32 *tsoff)
+static u32 tcp_v4_init_seq_and_tsoff(const struct sk_buff *skb, u32 *tsoff)
 {
-       return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
-                                         ip_hdr(skb)->saddr,
-                                         tcp_hdr(skb)->dest,
-                                         tcp_hdr(skb)->source, tsoff);
+       return secure_tcp_seq_and_tsoff(ip_hdr(skb)->daddr,
+                                       ip_hdr(skb)->saddr,
+                                       tcp_hdr(skb)->dest,
+                                       tcp_hdr(skb)->source, tsoff);
 }
 
 int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
@@ -198,10 +198,6 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
                        tp->write_seq      = 0;
        }
 
-       if (tcp_death_row->sysctl_tw_recycle &&
-           !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
-               tcp_fetch_timewait_stamp(sk, &rt->dst);
-
        inet->inet_dport = usin->sin_port;
        sk_daddr_set(sk, daddr);
 
@@ -236,11 +232,11 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
        rt = NULL;
 
        if (likely(!tp->repair)) {
-               seq = secure_tcp_sequence_number(inet->inet_saddr,
-                                                inet->inet_daddr,
-                                                inet->inet_sport,
-                                                usin->sin_port,
-                                                &tp->tsoffset);
+               seq = secure_tcp_seq_and_tsoff(inet->inet_saddr,
+                                              inet->inet_daddr,
+                                              inet->inet_sport,
+                                              usin->sin_port,
+                                              &tp->tsoffset);
                if (!tp->write_seq)
                        tp->write_seq = seq;
        }
@@ -1217,19 +1213,9 @@ static void tcp_v4_init_req(struct request_sock *req,
 
 static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
                                          struct flowi *fl,
-                                         const struct request_sock *req,
-                                         bool *strict)
+                                         const struct request_sock *req)
 {
-       struct dst_entry *dst = inet_csk_route_req(sk, &fl->u.ip4, req);
-
-       if (strict) {
-               if (fl->u.ip4.daddr == inet_rsk(req)->ir_rmt_addr)
-                       *strict = true;
-               else
-                       *strict = false;
-       }
-
-       return dst;
+       return inet_csk_route_req(sk, &fl->u.ip4, req);
 }
 
 struct request_sock_ops tcp_request_sock_ops __read_mostly = {
@@ -1253,7 +1239,7 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
        .cookie_init_seq =      cookie_v4_init_sequence,
 #endif
        .route_req      =       tcp_v4_route_req,
-       .init_seq       =       tcp_v4_init_sequence,
+       .init_seq_tsoff =       tcp_v4_init_seq_and_tsoff,
        .send_synack    =       tcp_v4_send_synack,
 };
 
@@ -1423,8 +1409,6 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
                if (!nsk)
                        goto discard;
                if (nsk != sk) {
-                       sock_rps_save_rxhash(nsk, skb);
-                       sk_mark_napi_id(nsk, skb);
                        if (tcp_child_process(sk, nsk, skb)) {
                                rsk = nsk;
                                goto reset;
@@ -2466,7 +2450,6 @@ static int __net_init tcp_sk_init(struct net *net)
        net->ipv4.sysctl_tcp_tw_reuse = 0;
 
        cnt = tcp_hashinfo.ehash_mask + 1;
-       net->ipv4.tcp_death_row.sysctl_tw_recycle = 0;
        net->ipv4.tcp_death_row.sysctl_max_tw_buckets = (cnt + 1) / 2;
        net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo;
 
index 0f46e5fe31ad1b6809ada1f70bce7b63df4f8c9c..9d0d4f39e42be15d8ad7389bc7562449b92ea5fa 100644 (file)
@@ -45,8 +45,6 @@ struct tcp_metrics_block {
        struct inetpeer_addr            tcpm_saddr;
        struct inetpeer_addr            tcpm_daddr;
        unsigned long                   tcpm_stamp;
-       u32                             tcpm_ts;
-       u32                             tcpm_ts_stamp;
        u32                             tcpm_lock;
        u32                             tcpm_vals[TCP_METRIC_MAX_KERNEL + 1];
        struct tcp_fastopen_metrics     tcpm_fastopen;
@@ -123,8 +121,6 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm,
        tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH);
        tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND);
        tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING);
-       tm->tcpm_ts = 0;
-       tm->tcpm_ts_stamp = 0;
        if (fastopen_clear) {
                tm->tcpm_fastopen.mss = 0;
                tm->tcpm_fastopen.syn_loss = 0;
@@ -273,48 +269,6 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req,
        return tm;
 }
 
-static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock *tw)
-{
-       struct tcp_metrics_block *tm;
-       struct inetpeer_addr saddr, daddr;
-       unsigned int hash;
-       struct net *net;
-
-       if (tw->tw_family == AF_INET) {
-               inetpeer_set_addr_v4(&saddr, tw->tw_rcv_saddr);
-               inetpeer_set_addr_v4(&daddr, tw->tw_daddr);
-               hash = ipv4_addr_hash(tw->tw_daddr);
-       }
-#if IS_ENABLED(CONFIG_IPV6)
-       else if (tw->tw_family == AF_INET6) {
-               if (ipv6_addr_v4mapped(&tw->tw_v6_daddr)) {
-                       inetpeer_set_addr_v4(&saddr, tw->tw_rcv_saddr);
-                       inetpeer_set_addr_v4(&daddr, tw->tw_daddr);
-                       hash = ipv4_addr_hash(tw->tw_daddr);
-               } else {
-                       inetpeer_set_addr_v6(&saddr, &tw->tw_v6_rcv_saddr);
-                       inetpeer_set_addr_v6(&daddr, &tw->tw_v6_daddr);
-                       hash = ipv6_addr_hash(&tw->tw_v6_daddr);
-               }
-       }
-#endif
-       else
-               return NULL;
-
-       net = twsk_net(tw);
-       hash ^= net_hash_mix(net);
-       hash = hash_32(hash, tcp_metrics_hash_log);
-
-       for (tm = rcu_dereference(tcp_metrics_hash[hash].chain); tm;
-            tm = rcu_dereference(tm->tcpm_next)) {
-               if (addr_same(&tm->tcpm_saddr, &saddr) &&
-                   addr_same(&tm->tcpm_daddr, &daddr) &&
-                   net_eq(tm_net(tm), net))
-                       break;
-       }
-       return tm;
-}
-
 static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk,
                                                 struct dst_entry *dst,
                                                 bool create)
@@ -573,8 +527,7 @@ reset:
        tp->snd_cwnd_stamp = tcp_time_stamp;
 }
 
-bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst,
-                       bool paws_check, bool timestamps)
+bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst)
 {
        struct tcp_metrics_block *tm;
        bool ret;
@@ -584,94 +537,10 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst,
 
        rcu_read_lock();
        tm = __tcp_get_metrics_req(req, dst);
-       if (paws_check) {
-               if (tm &&
-                   (u32)get_seconds() - tm->tcpm_ts_stamp < TCP_PAWS_MSL &&
-                   ((s32)(tm->tcpm_ts - req->ts_recent) > TCP_PAWS_WINDOW ||
-                    !timestamps))
-                       ret = false;
-               else
-                       ret = true;
-       } else {
-               if (tm && tcp_metric_get(tm, TCP_METRIC_RTT) && tm->tcpm_ts_stamp)
-                       ret = true;
-               else
-                       ret = false;
-       }
-       rcu_read_unlock();
-
-       return ret;
-}
-
-void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst)
-{
-       struct tcp_metrics_block *tm;
-
-       rcu_read_lock();
-       tm = tcp_get_metrics(sk, dst, true);
-       if (tm) {
-               struct tcp_sock *tp = tcp_sk(sk);
-
-               if ((u32)get_seconds() - tm->tcpm_ts_stamp <= TCP_PAWS_MSL) {
-                       tp->rx_opt.ts_recent_stamp = tm->tcpm_ts_stamp;
-                       tp->rx_opt.ts_recent = tm->tcpm_ts;
-               }
-       }
-       rcu_read_unlock();
-}
-EXPORT_SYMBOL_GPL(tcp_fetch_timewait_stamp);
-
-/* VJ's idea. Save last timestamp seen from this destination and hold
- * it at least for normal timewait interval to use for duplicate
- * segment detection in subsequent connections, before they enter
- * synchronized state.
- */
-bool tcp_remember_stamp(struct sock *sk)
-{
-       struct dst_entry *dst = __sk_dst_get(sk);
-       bool ret = false;
-
-       if (dst) {
-               struct tcp_metrics_block *tm;
-
-               rcu_read_lock();
-               tm = tcp_get_metrics(sk, dst, true);
-               if (tm) {
-                       struct tcp_sock *tp = tcp_sk(sk);
-
-                       if ((s32)(tm->tcpm_ts - tp->rx_opt.ts_recent) <= 0 ||
-                           ((u32)get_seconds() - tm->tcpm_ts_stamp > TCP_PAWS_MSL &&
-                            tm->tcpm_ts_stamp <= (u32)tp->rx_opt.ts_recent_stamp)) {
-                               tm->tcpm_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp;
-                               tm->tcpm_ts = tp->rx_opt.ts_recent;
-                       }
-                       ret = true;
-               }
-               rcu_read_unlock();
-       }
-       return ret;
-}
-
-bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw)
-{
-       struct tcp_metrics_block *tm;
-       bool ret = false;
-
-       rcu_read_lock();
-       tm = __tcp_get_metrics_tw(tw);
-       if (tm) {
-               const struct tcp_timewait_sock *tcptw;
-               struct sock *sk = (struct sock *) tw;
-
-               tcptw = tcp_twsk(sk);
-               if ((s32)(tm->tcpm_ts - tcptw->tw_ts_recent) <= 0 ||
-                   ((u32)get_seconds() - tm->tcpm_ts_stamp > TCP_PAWS_MSL &&
-                    tm->tcpm_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) {
-                       tm->tcpm_ts_stamp = (u32)tcptw->tw_ts_recent_stamp;
-                       tm->tcpm_ts        = tcptw->tw_ts_recent;
-               }
+       if (tm && tcp_metric_get(tm, TCP_METRIC_RTT))
                ret = true;
-       }
+       else
+               ret = false;
        rcu_read_unlock();
 
        return ret;
@@ -791,14 +660,6 @@ static int tcp_metrics_fill_info(struct sk_buff *msg,
                          jiffies - tm->tcpm_stamp,
                          TCP_METRICS_ATTR_PAD) < 0)
                goto nla_put_failure;
-       if (tm->tcpm_ts_stamp) {
-               if (nla_put_s32(msg, TCP_METRICS_ATTR_TW_TS_STAMP,
-                               (s32) (get_seconds() - tm->tcpm_ts_stamp)) < 0)
-                       goto nla_put_failure;
-               if (nla_put_u32(msg, TCP_METRICS_ATTR_TW_TSVAL,
-                               tm->tcpm_ts) < 0)
-                       goto nla_put_failure;
-       }
 
        {
                int n = 0;
index 7e16243cdb58c830f869fe483730e86400e2eb00..8f6373b0cd7729e7afde1b733879058197e9c5ca 100644 (file)
@@ -26,6 +26,7 @@
 #include <net/tcp.h>
 #include <net/inet_common.h>
 #include <net/xfrm.h>
+#include <net/busy_poll.h>
 
 int sysctl_tcp_abort_on_overflow __read_mostly;
 
@@ -94,7 +95,6 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
        struct tcp_options_received tmp_opt;
        struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
        bool paws_reject = false;
-       struct inet_timewait_death_row *tcp_death_row = &sock_net((struct sock*)tw)->ipv4.tcp_death_row;
 
        tmp_opt.saw_tstamp = 0;
        if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) {
@@ -149,12 +149,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
                        tcptw->tw_ts_recent       = tmp_opt.rcv_tsval;
                }
 
-               if (tcp_death_row->sysctl_tw_recycle &&
-                   tcptw->tw_ts_recent_stamp &&
-                   tcp_tw_remember_stamp(tw))
-                       inet_twsk_reschedule(tw, tw->tw_timeout);
-               else
-                       inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN);
+               inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN);
                return TCP_TW_ACK;
        }
 
@@ -259,12 +254,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
        const struct inet_connection_sock *icsk = inet_csk(sk);
        const struct tcp_sock *tp = tcp_sk(sk);
        struct inet_timewait_sock *tw;
-       bool recycle_ok = false;
        struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
 
-       if (tcp_death_row->sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp)
-               recycle_ok = tcp_remember_stamp(sk);
-
        tw = inet_twsk_alloc(sk, tcp_death_row, state);
 
        if (tw) {
@@ -317,13 +308,9 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
                if (timeo < rto)
                        timeo = rto;
 
-               if (recycle_ok) {
-                       tw->tw_timeout = rto;
-               } else {
-                       tw->tw_timeout = TCP_TIMEWAIT_LEN;
-                       if (state == TCP_TIME_WAIT)
-                               timeo = TCP_TIMEWAIT_LEN;
-               }
+               tw->tw_timeout = TCP_TIMEWAIT_LEN;
+               if (state == TCP_TIME_WAIT)
+                       timeo = TCP_TIMEWAIT_LEN;
 
                inet_twsk_schedule(tw, timeo);
                /* Linkage updates. */
@@ -460,6 +447,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
                newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
                minmax_reset(&newtp->rtt_min, tcp_time_stamp, ~0U);
                newicsk->icsk_rto = TCP_TIMEOUT_INIT;
+               newicsk->icsk_ack.lrcvtime = tcp_time_stamp;
 
                newtp->packets_out = 0;
                newtp->retrans_out = 0;
@@ -812,6 +800,9 @@ int tcp_child_process(struct sock *parent, struct sock *child,
        int ret = 0;
        int state = child->sk_state;
 
+       /* record NAPI ID of child */
+       sk_mark_napi_id(child, skb);
+
        tcp_segs_in(tcp_sk(child), skb);
        if (!sock_owned_by_user(child)) {
                ret = tcp_rcv_state_process(child, skb);
index 22548b5f05cbe5a655e0c53df2d31c5cc2e8a702..13971942211b214dc2bc26e1e442c4b7ba305e3d 100644 (file)
@@ -2561,7 +2561,6 @@ u32 __tcp_select_window(struct sock *sk)
        /* Don't do rounding if we are using window scaling, since the
         * scaled window will not line up with the MSS boundary anyway.
         */
-       window = tp->rcv_wnd;
        if (tp->rx_opt.rcv_wscale) {
                window = free_space;
 
@@ -2569,10 +2568,9 @@ u32 __tcp_select_window(struct sock *sk)
                 * Import case: prevent zero window announcement if
                 * 1<<rcv_wscale > mss.
                 */
-               if (((window >> tp->rx_opt.rcv_wscale) << tp->rx_opt.rcv_wscale) != window)
-                       window = (((window >> tp->rx_opt.rcv_wscale) + 1)
-                                 << tp->rx_opt.rcv_wscale);
+               window = ALIGN(window, (1 << tp->rx_opt.rcv_wscale));
        } else {
+               window = tp->rcv_wnd;
                /* Get the largest window that is a nice multiple of mss.
                 * Window clamp already applied above.
                 * If our current window offering is within 1 mss of the
@@ -2582,7 +2580,7 @@ u32 __tcp_select_window(struct sock *sk)
                 * is too small.
                 */
                if (window <= free_space - mss || window > free_space)
-                       window = (free_space / mss) * mss;
+                       window = rounddown(free_space, mss);
                else if (mss == full_space &&
                         free_space > window + (full_space >> 1))
                        window = free_space;
index fed66dc0e0f5f242cf0af25434fa9cfa89998958..9775453b8d174c848dc09df83d1fa185422cd8cc 100644 (file)
@@ -265,8 +265,8 @@ static size_t tcp_westwood_info(struct sock *sk, u32 ext, int *attr,
        if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
                info->vegas.tcpv_enabled = 1;
                info->vegas.tcpv_rttcnt = 0;
-               info->vegas.tcpv_rtt    = jiffies_to_usecs(ca->rtt),
-               info->vegas.tcpv_minrtt = jiffies_to_usecs(ca->rtt_min),
+               info->vegas.tcpv_rtt    = jiffies_to_usecs(ca->rtt);
+               info->vegas.tcpv_minrtt = jiffies_to_usecs(ca->rtt_min);
 
                *attr = INET_DIAG_VEGASINFO;
                return sizeof(struct tcpvegas_info);
index e2afe677a9d944a2c6c27a2e7b2d06227712cf89..48c452959d2c2fe687472c3732fe40246a8c863a 100644 (file)
@@ -307,6 +307,7 @@ config IPV6_SEG6_LWTUNNEL
        bool "IPv6: Segment Routing Header encapsulation support"
        depends on IPV6
        select LWTUNNEL
+       select DST_CACHE
        ---help---
          Support for encapsulation of packets within an outer IPv6
          header and a Segment Routing Header using the lightweight
index 363172527e433e321cfa9fe8e96cfe32e4a78043..67ec87ea5fb699eb8ba4634c91815f97d750ddf1 100644 (file)
@@ -224,6 +224,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
        .accept_ra_rtr_pref     = 1,
        .rtr_probe_interval     = 60 * HZ,
 #ifdef CONFIG_IPV6_ROUTE_INFO
+       .accept_ra_rt_info_min_plen = 0,
        .accept_ra_rt_info_max_plen = 0,
 #endif
 #endif
@@ -245,6 +246,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
 #endif
        .enhanced_dad           = 1,
        .addr_gen_mode          = IN6_ADDR_GEN_MODE_EUI64,
+       .disable_policy         = 0,
 };
 
 static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
@@ -276,6 +278,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
        .accept_ra_rtr_pref     = 1,
        .rtr_probe_interval     = 60 * HZ,
 #ifdef CONFIG_IPV6_ROUTE_INFO
+       .accept_ra_rt_info_min_plen = 0,
        .accept_ra_rt_info_max_plen = 0,
 #endif
 #endif
@@ -297,6 +300,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
 #endif
        .enhanced_dad           = 1,
        .addr_gen_mode          = IN6_ADDR_GEN_MODE_EUI64,
+       .disable_policy         = 0,
 };
 
 /* Check if a valid qdisc is available */
@@ -545,6 +549,9 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
        if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
                goto nla_put_failure;
 
+       if (!devconf)
+               goto out;
+
        if ((all || type == NETCONFA_FORWARDING) &&
            nla_put_s32(skb, NETCONFA_FORWARDING, devconf->forwarding) < 0)
                goto nla_put_failure;
@@ -563,6 +570,7 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
                        devconf->ignore_routes_with_linkdown) < 0)
                goto nla_put_failure;
 
+out:
        nlmsg_end(skb, nlh);
        return 0;
 
@@ -571,8 +579,8 @@ nla_put_failure:
        return -EMSGSIZE;
 }
 
-void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex,
-                                 struct ipv6_devconf *devconf)
+void inet6_netconf_notify_devconf(struct net *net, int event, int type,
+                                 int ifindex, struct ipv6_devconf *devconf)
 {
        struct sk_buff *skb;
        int err = -ENOBUFS;
@@ -582,7 +590,7 @@ void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex,
                goto errout;
 
        err = inet6_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
-                                        RTM_NEWNETCONF, 0, type);
+                                        event, 0, type);
        if (err < 0) {
                /* -EMSGSIZE implies BUG in inet6_netconf_msgsize_devconf() */
                WARN_ON(err == -EMSGSIZE);
@@ -765,7 +773,8 @@ static void dev_forward_change(struct inet6_dev *idev)
                else
                        addrconf_leave_anycast(ifa);
        }
-       inet6_netconf_notify_devconf(dev_net(dev), NETCONFA_FORWARDING,
+       inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
+                                    NETCONFA_FORWARDING,
                                     dev->ifindex, &idev->cnf);
 }
 
@@ -800,7 +809,8 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf)
 
        if (p == &net->ipv6.devconf_dflt->forwarding) {
                if ((!newf) ^ (!old))
-                       inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING,
+                       inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
+                                                    NETCONFA_FORWARDING,
                                                     NETCONFA_IFINDEX_DEFAULT,
                                                     net->ipv6.devconf_dflt);
                rtnl_unlock();
@@ -812,13 +822,15 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf)
 
                net->ipv6.devconf_dflt->forwarding = newf;
                if ((!newf) ^ (!old_dflt))
-                       inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING,
+                       inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
+                                                    NETCONFA_FORWARDING,
                                                     NETCONFA_IFINDEX_DEFAULT,
                                                     net->ipv6.devconf_dflt);
 
                addrconf_forward_change(net, newf);
                if ((!newf) ^ (!old))
-                       inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING,
+                       inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
+                                                    NETCONFA_FORWARDING,
                                                     NETCONFA_IFINDEX_ALL,
                                                     net->ipv6.devconf_all);
        } else if ((!newf) ^ (!old))
@@ -843,6 +855,7 @@ static void addrconf_linkdown_change(struct net *net, __s32 newf)
                        idev->cnf.ignore_routes_with_linkdown = newf;
                        if (changed)
                                inet6_netconf_notify_devconf(dev_net(dev),
+                                                            RTM_NEWNETCONF,
                                                             NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
                                                             dev->ifindex,
                                                             &idev->cnf);
@@ -865,6 +878,7 @@ static int addrconf_fixup_linkdown(struct ctl_table *table, int *p, int newf)
        if (p == &net->ipv6.devconf_dflt->ignore_routes_with_linkdown) {
                if ((!newf) ^ (!old))
                        inet6_netconf_notify_devconf(net,
+                                                    RTM_NEWNETCONF,
                                                     NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
                                                     NETCONFA_IFINDEX_DEFAULT,
                                                     net->ipv6.devconf_dflt);
@@ -877,6 +891,7 @@ static int addrconf_fixup_linkdown(struct ctl_table *table, int *p, int newf)
                addrconf_linkdown_change(net, newf);
                if ((!newf) ^ (!old))
                        inet6_netconf_notify_devconf(net,
+                                                    RTM_NEWNETCONF,
                                                     NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
                                                     NETCONFA_IFINDEX_ALL,
                                                     net->ipv6.devconf_all);
@@ -944,6 +959,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
              const struct in6_addr *peer_addr, int pfxlen,
              int scope, u32 flags, u32 valid_lft, u32 prefered_lft)
 {
+       struct net *net = dev_net(idev->dev);
        struct inet6_ifaddr *ifa = NULL;
        struct rt6_info *rt;
        unsigned int hash;
@@ -990,6 +1006,10 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
                goto out;
        }
 
+       if (net->ipv6.devconf_all->disable_policy ||
+           idev->cnf.disable_policy)
+               rt->dst.flags |= DST_NOPOLICY;
+
        neigh_parms_data_state_setall(idev->nd_parms);
 
        ifa->addr = *addr;
@@ -4972,6 +4992,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
        array[DEVCONF_RTR_PROBE_INTERVAL] =
                jiffies_to_msecs(cnf->rtr_probe_interval);
 #ifdef CONFIG_IPV6_ROUTE_INFO
+       array[DEVCONF_ACCEPT_RA_RT_INFO_MIN_PLEN] = cnf->accept_ra_rt_info_min_plen;
        array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen;
 #endif
 #endif
@@ -5003,6 +5024,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
 #endif
        array[DEVCONF_ENHANCED_DAD] = cnf->enhanced_dad;
        array[DEVCONF_ADDR_GEN_MODE] = cnf->addr_gen_mode;
+       array[DEVCONF_DISABLE_POLICY] = cnf->disable_policy;
 }
 
 static inline size_t inet6_ifla6_size(void)
@@ -5664,17 +5686,20 @@ int addrconf_sysctl_proxy_ndp(struct ctl_table *ctl, int write,
                        return restart_syscall();
 
                if (valp == &net->ipv6.devconf_dflt->proxy_ndp)
-                       inet6_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
+                       inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
+                                                    NETCONFA_PROXY_NEIGH,
                                                     NETCONFA_IFINDEX_DEFAULT,
                                                     net->ipv6.devconf_dflt);
                else if (valp == &net->ipv6.devconf_all->proxy_ndp)
-                       inet6_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
+                       inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
+                                                    NETCONFA_PROXY_NEIGH,
                                                     NETCONFA_IFINDEX_ALL,
                                                     net->ipv6.devconf_all);
                else {
                        struct inet6_dev *idev = ctl->extra1;
 
-                       inet6_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
+                       inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
+                                                    NETCONFA_PROXY_NEIGH,
                                                     idev->dev->ifindex,
                                                     &idev->cnf);
                }
@@ -5827,6 +5852,105 @@ int addrconf_sysctl_ignore_routes_with_linkdown(struct ctl_table *ctl,
        return ret;
 }
 
+static
+void addrconf_set_nopolicy(struct rt6_info *rt, int action)
+{
+       if (rt) {
+               if (action)
+                       rt->dst.flags |= DST_NOPOLICY;
+               else
+                       rt->dst.flags &= ~DST_NOPOLICY;
+       }
+}
+
+static
+void addrconf_disable_policy_idev(struct inet6_dev *idev, int val)
+{
+       struct inet6_ifaddr *ifa;
+
+       read_lock_bh(&idev->lock);
+       list_for_each_entry(ifa, &idev->addr_list, if_list) {
+               spin_lock(&ifa->lock);
+               if (ifa->rt) {
+                       struct rt6_info *rt = ifa->rt;
+                       struct fib6_table *table = rt->rt6i_table;
+                       int cpu;
+
+                       read_lock(&table->tb6_lock);
+                       addrconf_set_nopolicy(ifa->rt, val);
+                       if (rt->rt6i_pcpu) {
+                               for_each_possible_cpu(cpu) {
+                                       struct rt6_info **rtp;
+
+                                       rtp = per_cpu_ptr(rt->rt6i_pcpu, cpu);
+                                       addrconf_set_nopolicy(*rtp, val);
+                               }
+                       }
+                       read_unlock(&table->tb6_lock);
+               }
+               spin_unlock(&ifa->lock);
+       }
+       read_unlock_bh(&idev->lock);
+}
+
+static
+int addrconf_disable_policy(struct ctl_table *ctl, int *valp, int val)
+{
+       struct inet6_dev *idev;
+       struct net *net;
+
+       if (!rtnl_trylock())
+               return restart_syscall();
+
+       *valp = val;
+
+       net = (struct net *)ctl->extra2;
+       if (valp == &net->ipv6.devconf_dflt->disable_policy) {
+               rtnl_unlock();
+               return 0;
+       }
+
+       if (valp == &net->ipv6.devconf_all->disable_policy)  {
+               struct net_device *dev;
+
+               for_each_netdev(net, dev) {
+                       idev = __in6_dev_get(dev);
+                       if (idev)
+                               addrconf_disable_policy_idev(idev, val);
+               }
+       } else {
+               idev = (struct inet6_dev *)ctl->extra1;
+               addrconf_disable_policy_idev(idev, val);
+       }
+
+       rtnl_unlock();
+       return 0;
+}
+
+static
+int addrconf_sysctl_disable_policy(struct ctl_table *ctl, int write,
+                                  void __user *buffer, size_t *lenp,
+                                  loff_t *ppos)
+{
+       int *valp = ctl->data;
+       int val = *valp;
+       loff_t pos = *ppos;
+       struct ctl_table lctl;
+       int ret;
+
+       lctl = *ctl;
+       lctl.data = &val;
+       ret = proc_dointvec(&lctl, write, buffer, lenp, ppos);
+
+       if (write && (*valp != val))
+               ret = addrconf_disable_policy(ctl, valp, val);
+
+       if (ret)
+               *ppos = pos;
+
+       return ret;
+}
+
 static int minus_one = -1;
 static const int one = 1;
 static const int two_five_five = 255;
@@ -6014,6 +6138,13 @@ static const struct ctl_table addrconf_sysctl[] = {
                .proc_handler   = proc_dointvec_jiffies,
        },
 #ifdef CONFIG_IPV6_ROUTE_INFO
+       {
+               .procname       = "accept_ra_rt_info_min_plen",
+               .data           = &ipv6_devconf.accept_ra_rt_info_min_plen,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
        {
                .procname       = "accept_ra_rt_info_max_plen",
                .data           = &ipv6_devconf.accept_ra_rt_info_max_plen,
@@ -6184,6 +6315,13 @@ static const struct ctl_table addrconf_sysctl[] = {
                .mode                   = 0644,
                .proc_handler   = addrconf_sysctl_addr_gen_mode,
        },
+       {
+               .procname       = "disable_policy",
+               .data           = &ipv6_devconf.disable_policy,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = addrconf_sysctl_disable_policy,
+       },
        {
                /* sentinel */
        }
@@ -6224,7 +6362,8 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name,
                ifindex = NETCONFA_IFINDEX_DEFAULT;
        else
                ifindex = idev->dev->ifindex;
-       inet6_netconf_notify_devconf(net, NETCONFA_ALL, ifindex, p);
+       inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
+                                    ifindex, p);
        return 0;
 
 free:
@@ -6233,7 +6372,8 @@ out:
        return -ENOBUFS;
 }
 
-static void __addrconf_sysctl_unregister(struct ipv6_devconf *p)
+static void __addrconf_sysctl_unregister(struct net *net,
+                                        struct ipv6_devconf *p, int ifindex)
 {
        struct ctl_table *table;
 
@@ -6244,6 +6384,8 @@ static void __addrconf_sysctl_unregister(struct ipv6_devconf *p)
        unregister_net_sysctl_table(p->sysctl_header);
        p->sysctl_header = NULL;
        kfree(table);
+
+       inet6_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
 }
 
 static int addrconf_sysctl_register(struct inet6_dev *idev)
@@ -6267,7 +6409,8 @@ static int addrconf_sysctl_register(struct inet6_dev *idev)
 
 static void addrconf_sysctl_unregister(struct inet6_dev *idev)
 {
-       __addrconf_sysctl_unregister(&idev->cnf);
+       __addrconf_sysctl_unregister(dev_net(idev->dev), &idev->cnf,
+                                    idev->dev->ifindex);
        neigh_sysctl_unregister(idev->nd_parms);
 }
 
@@ -6310,7 +6453,7 @@ static int __net_init addrconf_init_net(struct net *net)
 
 #ifdef CONFIG_SYSCTL
 err_reg_dflt:
-       __addrconf_sysctl_unregister(all);
+       __addrconf_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
 err_reg_all:
        kfree(dflt);
 #endif
@@ -6323,8 +6466,10 @@ err_alloc_all:
 static void __net_exit addrconf_exit_net(struct net *net)
 {
 #ifdef CONFIG_SYSCTL
-       __addrconf_sysctl_unregister(net->ipv6.devconf_dflt);
-       __addrconf_sysctl_unregister(net->ipv6.devconf_all);
+       __addrconf_sysctl_unregister(net, net->ipv6.devconf_dflt,
+                                    NETCONFA_IFINDEX_DEFAULT);
+       __addrconf_sysctl_unregister(net, net->ipv6.devconf_all,
+                                    NETCONFA_IFINDEX_ALL);
 #endif
        kfree(net->ipv6.devconf_dflt);
        kfree(net->ipv6.devconf_all);
index a9a9553ee63df8eb6e16e00d5da8c29406435350..1635d218735e48f3d90d049db95fbbee82b54e81 100644 (file)
@@ -1005,6 +1005,10 @@ static int __init inet6_init(void)
        if (err)
                goto seg6_fail;
 
+       err = igmp6_late_init();
+       if (err)
+               goto igmp6_late_err;
+
 #ifdef CONFIG_SYSCTL
        err = ipv6_sysctl_register();
        if (err)
@@ -1015,8 +1019,10 @@ out:
 
 #ifdef CONFIG_SYSCTL
 sysctl_fail:
-       seg6_exit();
+       igmp6_late_cleanup();
 #endif
+igmp6_late_err:
+       seg6_exit();
 seg6_fail:
        calipso_exit();
 calipso_fail:
index aacfb4bce1533b3f3b38e1173c18cb1bb6b33099..b04539dd4629d2b71b5db27c4a64a89151b2d5d7 100644 (file)
@@ -49,6 +49,8 @@
 
 int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
+       void (*edemux)(struct sk_buff *skb);
+
        /* if ingress device is enslaved to an L3 master device pass the
         * skb to its handler for processing
         */
@@ -60,8 +62,8 @@ int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
                const struct inet6_protocol *ipprot;
 
                ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]);
-               if (ipprot && ipprot->early_demux)
-                       ipprot->early_demux(skb);
+               if (ipprot && (edemux = READ_ONCE(ipprot->early_demux)))
+                       edemux(skb);
        }
        if (!skb_valid_dst(skb))
                ip6_route_input(skb);
index 6ba6c900ebcf430cf313a2bef55ff69c114af218..fb4546e80c8282cdf17e7429506d5f4630809cc4 100644 (file)
@@ -815,7 +815,7 @@ static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
        in6_dev = __in6_dev_get(dev);
        if (in6_dev) {
                in6_dev->cnf.mc_forwarding--;
-               inet6_netconf_notify_devconf(dev_net(dev),
+               inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
                                             NETCONFA_MC_FORWARDING,
                                             dev->ifindex, &in6_dev->cnf);
        }
@@ -974,7 +974,7 @@ static int mif6_add(struct net *net, struct mr6_table *mrt,
        in6_dev = __in6_dev_get(dev);
        if (in6_dev) {
                in6_dev->cnf.mc_forwarding++;
-               inet6_netconf_notify_devconf(dev_net(dev),
+               inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
                                             NETCONFA_MC_FORWARDING,
                                             dev->ifindex, &in6_dev->cnf);
        }
@@ -1599,7 +1599,8 @@ static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
        write_unlock_bh(&mrt_lock);
 
        if (!err)
-               inet6_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
+               inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
+                                            NETCONFA_MC_FORWARDING,
                                             NETCONFA_IFINDEX_ALL,
                                             net->ipv6.devconf_all);
        rtnl_unlock();
@@ -1620,7 +1621,7 @@ int ip6mr_sk_done(struct sock *sk)
                        mrt->mroute6_sk = NULL;
                        net->ipv6.devconf_all->mc_forwarding--;
                        write_unlock_bh(&mrt_lock);
-                       inet6_netconf_notify_devconf(net,
+                       inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
                                                     NETCONFA_MC_FORWARDING,
                                                     NETCONFA_IFINDEX_ALL,
                                                     net->ipv6.devconf_all);
index 1bdc703cb9668bd77690c3d8f1ec0062d7b88c43..07403fa164e18aac704e3b77d1e2a094ad53c04c 100644 (file)
@@ -2463,7 +2463,6 @@ static void mld_ifc_event(struct inet6_dev *idev)
        mld_ifc_start_timer(idev, 1);
 }
 
-
 static void igmp6_timer_handler(unsigned long data)
 {
        struct ifmcaddr6 *ma = (struct ifmcaddr6 *) data;
@@ -2599,6 +2598,44 @@ void ipv6_mc_destroy_dev(struct inet6_dev *idev)
        write_unlock_bh(&idev->lock);
 }
 
+static void ipv6_mc_rejoin_groups(struct inet6_dev *idev)
+{
+       struct ifmcaddr6 *pmc;
+
+       ASSERT_RTNL();
+
+       if (mld_in_v1_mode(idev)) {
+               read_lock_bh(&idev->lock);
+               for (pmc = idev->mc_list; pmc; pmc = pmc->next)
+                       igmp6_join_group(pmc);
+               read_unlock_bh(&idev->lock);
+       } else
+               mld_send_report(idev, NULL);
+}
+
+static int ipv6_mc_netdev_event(struct notifier_block *this,
+                               unsigned long event,
+                               void *ptr)
+{
+       struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+       struct inet6_dev *idev = __in6_dev_get(dev);
+
+       switch (event) {
+       case NETDEV_RESEND_IGMP:
+               if (idev)
+                       ipv6_mc_rejoin_groups(idev);
+               break;
+       default:
+               break;
+       }
+
+       return NOTIFY_DONE;
+}
+
+static struct notifier_block igmp6_netdev_notifier = {
+       .notifier_call = ipv6_mc_netdev_event,
+};
+
 #ifdef CONFIG_PROC_FS
 struct igmp6_mc_iter_state {
        struct seq_net_private p;
@@ -2970,7 +3007,17 @@ int __init igmp6_init(void)
        return register_pernet_subsys(&igmp6_net_ops);
 }
 
+int __init igmp6_late_init(void)
+{
+       return register_netdevice_notifier(&igmp6_netdev_notifier);
+}
+
 void igmp6_cleanup(void)
 {
        unregister_pernet_subsys(&igmp6_net_ops);
 }
+
+void igmp6_late_cleanup(void)
+{
+       unregister_netdevice_notifier(&igmp6_netdev_notifier);
+}
index 7ebac630d3c603186be2fc0dcbaac7d7e74bfde6..b5812b3f75399df98ec3b487dc69f07ff01bc35f 100644 (file)
@@ -732,7 +732,7 @@ void ndisc_update(const struct net_device *dev, struct neighbour *neigh,
                  const u8 *lladdr, u8 new, u32 flags, u8 icmp6_type,
                  struct ndisc_options *ndopts)
 {
-       neigh_update(neigh, lladdr, new, flags);
+       neigh_update(neigh, lladdr, new, flags, 0);
        /* report ndisc ops about neighbour update */
        ndisc_ops_update(dev, neigh, flags, icmp6_type, ndopts);
 }
@@ -1418,6 +1418,8 @@ skip_linkparms:
                        if (ri->prefix_len == 0 &&
                            !in6_dev->cnf.accept_ra_defrtr)
                                continue;
+                       if (ri->prefix_len < in6_dev->cnf.accept_ra_rt_info_min_plen)
+                               continue;
                        if (ri->prefix_len > in6_dev->cnf.accept_ra_rt_info_max_plen)
                                continue;
                        rt6_route_rcv(skb->dev, (u8 *)p, (p->nd_opt_len) << 3,
index 765facf03d45c47b9913b1adcdaf59b6fe09383c..e8d88d82636b759c68f348455ec17b526408b926 100644 (file)
@@ -159,7 +159,7 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
 
        if (nft_hook(pkt) == NF_INET_PRE_ROUTING &&
            nft_fib_is_loopback(pkt->skb, nft_in(pkt))) {
-               nft_fib_store_result(dest, priv->result, pkt,
+               nft_fib_store_result(dest, priv, pkt,
                                     nft_in(pkt)->ifindex);
                return;
        }
index e3770abe688a3a9059456fe9195adbfcdfb73157..b5d54d4f995c0f4bade2e3f1c4def9616252ca55 100644 (file)
@@ -26,7 +26,7 @@
 #include <net/protocol.h>
 
 #if IS_ENABLED(CONFIG_IPV6)
-const struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS] __read_mostly;
+struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS] __read_mostly;
 EXPORT_SYMBOL(inet6_protos);
 
 int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol)
index 35c58b669ebdfd73db32035bf4d6ccfdd01bbaaf..9db1418993f2b8a5b4194895f243441033d4729a 100644 (file)
@@ -3423,6 +3423,8 @@ static int rt6_fill_node(struct net *net,
        }
        else if (rt->rt6i_flags & RTF_LOCAL)
                rtm->rtm_type = RTN_LOCAL;
+       else if (rt->rt6i_flags & RTF_ANYCAST)
+               rtm->rtm_type = RTN_ANYCAST;
        else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
                rtm->rtm_type = RTN_LOCAL;
        else
index 85582257d3af88146d435ef6c2e98f0bbef94a41..a644aaecdfd30cf629625127e422a4fe150821b7 100644 (file)
 #include <linux/seg6_iptunnel.h>
 #include <net/addrconf.h>
 #include <net/ip6_route.h>
-#ifdef CONFIG_DST_CACHE
 #include <net/dst_cache.h>
-#endif
 #ifdef CONFIG_IPV6_SEG6_HMAC
 #include <net/seg6_hmac.h>
 #endif
 
 struct seg6_lwt {
-#ifdef CONFIG_DST_CACHE
        struct dst_cache cache;
-#endif
        struct seg6_iptunnel_encap tuninfo[0];
 };
 
@@ -105,7 +101,7 @@ static int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
        hdrlen = (osrh->hdrlen + 1) << 3;
        tot_len = hdrlen + sizeof(*hdr);
 
-       err = pskb_expand_head(skb, tot_len, 0, GFP_ATOMIC);
+       err = skb_cow_head(skb, tot_len);
        if (unlikely(err))
                return err;
 
@@ -156,7 +152,7 @@ static int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
 
        hdrlen = (osrh->hdrlen + 1) << 3;
 
-       err = pskb_expand_head(skb, hdrlen, 0, GFP_ATOMIC);
+       err = skb_cow_head(skb, hdrlen);
        if (unlikely(err))
                return err;
 
@@ -237,6 +233,9 @@ static int seg6_do_srh(struct sk_buff *skb)
 
 static int seg6_input(struct sk_buff *skb)
 {
+       struct dst_entry *orig_dst = skb_dst(skb);
+       struct dst_entry *dst = NULL;
+       struct seg6_lwt *slwt;
        int err;
 
        err = seg6_do_srh(skb);
@@ -245,8 +244,26 @@ static int seg6_input(struct sk_buff *skb)
                return err;
        }
 
+       slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
+
+       preempt_disable();
+       dst = dst_cache_get(&slwt->cache);
+       preempt_enable();
+
        skb_dst_drop(skb);
-       ip6_route_input(skb);
+
+       if (!dst) {
+               ip6_route_input(skb);
+               dst = skb_dst(skb);
+               if (!dst->error) {
+                       preempt_disable();
+                       dst_cache_set_ip6(&slwt->cache, dst,
+                                         &ipv6_hdr(skb)->saddr);
+                       preempt_enable();
+               }
+       } else {
+               skb_dst_set(skb, dst);
+       }
 
        return dst_input(skb);
 }
@@ -264,11 +281,9 @@ static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 
        slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
 
-#ifdef CONFIG_DST_CACHE
        preempt_disable();
        dst = dst_cache_get(&slwt->cache);
        preempt_enable();
-#endif
 
        if (unlikely(!dst)) {
                struct ipv6hdr *hdr = ipv6_hdr(skb);
@@ -287,11 +302,9 @@ static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
                        goto drop;
                }
 
-#ifdef CONFIG_DST_CACHE
                preempt_disable();
                dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr);
                preempt_enable();
-#endif
        }
 
        skb_dst_drop(skb);
@@ -355,13 +368,11 @@ static int seg6_build_state(struct nlattr *nla,
 
        slwt = seg6_lwt_lwtunnel(newts);
 
-#ifdef CONFIG_DST_CACHE
        err = dst_cache_init(&slwt->cache, GFP_KERNEL);
        if (err) {
                kfree(newts);
                return err;
        }
-#endif
 
        memcpy(&slwt->tuninfo, tuninfo, tuninfo_len);
 
@@ -375,12 +386,10 @@ static int seg6_build_state(struct nlattr *nla,
        return 0;
 }
 
-#ifdef CONFIG_DST_CACHE
 static void seg6_destroy_state(struct lwtunnel_state *lwt)
 {
        dst_cache_destroy(&seg6_lwt_lwtunnel(lwt)->cache);
 }
-#endif
 
 static int seg6_fill_encap_info(struct sk_buff *skb,
                                struct lwtunnel_state *lwtstate)
@@ -414,9 +423,7 @@ static int seg6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
 
 static const struct lwtunnel_encap_ops seg6_iptun_ops = {
        .build_state = seg6_build_state,
-#ifdef CONFIG_DST_CACHE
        .destroy_state = seg6_destroy_state,
-#endif
        .output = seg6_output,
        .input = seg6_input,
        .fill_encap = seg6_fill_encap_info,
index 49fa2e8c3fa9212eef1198a1077a6726f0f1b6fc..8e42e8f54b705ed8780890c7434feeff1055599a 100644 (file)
@@ -101,12 +101,12 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
        }
 }
 
-static u32 tcp_v6_init_sequence(const struct sk_buff *skb, u32 *tsoff)
+static u32 tcp_v6_init_seq_and_tsoff(const struct sk_buff *skb, u32 *tsoff)
 {
-       return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
-                                           ipv6_hdr(skb)->saddr.s6_addr32,
-                                           tcp_hdr(skb)->dest,
-                                           tcp_hdr(skb)->source, tsoff);
+       return secure_tcpv6_seq_and_tsoff(ipv6_hdr(skb)->daddr.s6_addr32,
+                                         ipv6_hdr(skb)->saddr.s6_addr32,
+                                         tcp_hdr(skb)->dest,
+                                         tcp_hdr(skb)->source, tsoff);
 }
 
 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
@@ -265,11 +265,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
        sk->sk_gso_type = SKB_GSO_TCPV6;
        ip6_dst_store(sk, dst, NULL, NULL);
 
-       if (tcp_death_row->sysctl_tw_recycle &&
-           !tp->rx_opt.ts_recent_stamp &&
-           ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr))
-               tcp_fetch_timewait_stamp(sk, dst);
-
        icsk->icsk_ext_hdr_len = 0;
        if (opt)
                icsk->icsk_ext_hdr_len = opt->opt_flen +
@@ -287,11 +282,11 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
        sk_set_txhash(sk);
 
        if (likely(!tp->repair)) {
-               seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
-                                                  sk->sk_v6_daddr.s6_addr32,
-                                                  inet->inet_sport,
-                                                  inet->inet_dport,
-                                                  &tp->tsoffset);
+               seq = secure_tcpv6_seq_and_tsoff(np->saddr.s6_addr32,
+                                                sk->sk_v6_daddr.s6_addr32,
+                                                inet->inet_sport,
+                                                inet->inet_dport,
+                                                &tp->tsoffset);
                if (!tp->write_seq)
                        tp->write_seq = seq;
        }
@@ -727,11 +722,8 @@ static void tcp_v6_init_req(struct request_sock *req,
 
 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
                                          struct flowi *fl,
-                                         const struct request_sock *req,
-                                         bool *strict)
+                                         const struct request_sock *req)
 {
-       if (strict)
-               *strict = true;
        return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
 }
 
@@ -757,7 +749,7 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
        .cookie_init_seq =      cookie_v6_init_sequence,
 #endif
        .route_req      =       tcp_v6_route_req,
-       .init_seq       =       tcp_v6_init_sequence,
+       .init_seq_tsoff =       tcp_v6_init_seq_and_tsoff,
        .send_synack    =       tcp_v6_send_synack,
 };
 
@@ -1301,8 +1293,6 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
                        goto discard;
 
                if (nsk != sk) {
-                       sock_rps_save_rxhash(nsk, skb);
-                       sk_mark_napi_id(nsk, skb);
                        if (tcp_child_process(sk, nsk, skb))
                                goto reset;
                        if (opt_skb)
@@ -1933,8 +1923,9 @@ struct proto tcpv6_prot = {
        .diag_destroy           = tcp_abort,
 };
 
-static const struct inet6_protocol tcpv6_protocol = {
+static struct inet6_protocol tcpv6_protocol = {
        .early_demux    =       tcp_v6_early_demux,
+       .early_demux_handler =  tcp_v6_early_demux,
        .handler        =       tcp_v6_rcv,
        .err_handler    =       tcp_v6_err,
        .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
index 4e4c401e3bc69020deaa4af1c10633288faedf13..fd4b1c98a47230b94641c31fe3213b3dff6ac915 100644 (file)
@@ -864,6 +864,64 @@ discard:
        return 0;
 }
 
+static struct sock *__udp6_lib_demux_lookup(struct net *net,
+                       __be16 loc_port, const struct in6_addr *loc_addr,
+                       __be16 rmt_port, const struct in6_addr *rmt_addr,
+                       int dif)
+{
+       struct sock *sk;
+
+       rcu_read_lock();
+       sk = __udp6_lib_lookup(net, rmt_addr, rmt_port, loc_addr, loc_port,
+                              dif, &udp_table, NULL);
+       if (sk && !atomic_inc_not_zero(&sk->sk_refcnt))
+               sk = NULL;
+       rcu_read_unlock();
+
+       return sk;
+}
+
+static void udp_v6_early_demux(struct sk_buff *skb)
+{
+       struct net *net = dev_net(skb->dev);
+       const struct udphdr *uh;
+       struct sock *sk;
+       struct dst_entry *dst;
+       int dif = skb->dev->ifindex;
+
+       if (!pskb_may_pull(skb, skb_transport_offset(skb) +
+           sizeof(struct udphdr)))
+               return;
+
+       uh = udp_hdr(skb);
+
+       if (skb->pkt_type == PACKET_HOST)
+               sk = __udp6_lib_demux_lookup(net, uh->dest,
+                                            &ipv6_hdr(skb)->daddr,
+                                            uh->source, &ipv6_hdr(skb)->saddr,
+                                            dif);
+       else
+               return;
+
+       if (!sk)
+               return;
+
+       skb->sk = sk;
+       skb->destructor = sock_efree;
+       dst = READ_ONCE(sk->sk_rx_dst);
+
+       if (dst)
+               dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie);
+       if (dst) {
+               if (dst->flags & DST_NOCACHE) {
+                       if (likely(atomic_inc_not_zero(&dst->__refcnt)))
+                               skb_dst_set(skb, dst);
+               } else {
+                       skb_dst_set_noref(skb, dst);
+               }
+       }
+}
+
 static __inline__ int udpv6_rcv(struct sk_buff *skb)
 {
        return __udp6_lib_rcv(skb, &udp_table, IPPROTO_UDP);
@@ -1035,6 +1093,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
        ipc6.hlimit = -1;
        ipc6.tclass = -1;
        ipc6.dontfrag = -1;
+       sockc.tsflags = sk->sk_tsflags;
 
        /* destination address check */
        if (sin6) {
@@ -1159,7 +1218,6 @@ do_udp_sendmsg:
 
        fl6.flowi6_mark = sk->sk_mark;
        fl6.flowi6_uid = sk->sk_uid;
-       sockc.tsflags = sk->sk_tsflags;
 
        if (msg->msg_controllen) {
                opt = &opt_space;
@@ -1378,7 +1436,9 @@ int compat_udpv6_getsockopt(struct sock *sk, int level, int optname,
 }
 #endif
 
-static const struct inet6_protocol udpv6_protocol = {
+static struct inet6_protocol udpv6_protocol = {
+       .early_demux    =       udp_v6_early_demux,
+       .early_demux_handler =  udp_v6_early_demux,
        .handler        =       udpv6_rcv,
        .err_handler    =       udpv6_err,
        .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
index 56ccffa3f2bfc7731adfaabb1026ef7e8af68d32..62141dcec2d66020fb4eb7bb698e880809878028 100644 (file)
@@ -19,6 +19,7 @@
 #ifndef __IEEE802154_I_H
 #define __IEEE802154_I_H
 
+#include <linux/interrupt.h>
 #include <linux/mutex.h>
 #include <linux/hrtimer.h>
 #include <net/cfg802154.h>
index 33211f9a265608c378848c97b4be36a1cec9736d..5928d22ba9c86813a13621fec6ebdcc62fd7aeba 100644 (file)
@@ -24,6 +24,9 @@
 #include <net/nexthop.h>
 #include "internal.h"
 
+/* max memory we will use for mpls_route */
+#define MAX_MPLS_ROUTE_MEM     4096
+
 /* Maximum number of labels to look ahead at when selecting a path of
  * a multipath route
  */
@@ -32,7 +35,9 @@
 #define MPLS_NEIGH_TABLE_UNSPEC (NEIGH_LINK_TABLE + 1)
 
 static int zero = 0;
+static int one = 1;
 static int label_limit = (1 << 20) - 1;
+static int ttl_max = 255;
 
 static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt,
                       struct nlmsghdr *nlh, struct net *net, u32 portid,
@@ -58,10 +63,7 @@ EXPORT_SYMBOL_GPL(mpls_output_possible);
 
 static u8 *__mpls_nh_via(struct mpls_route *rt, struct mpls_nh *nh)
 {
-       u8 *nh0_via = PTR_ALIGN((u8 *)&rt->rt_nh[rt->rt_nhn], VIA_ALEN_ALIGN);
-       int nh_index = nh - rt->rt_nh;
-
-       return nh0_via + rt->rt_max_alen * nh_index;
+       return (u8 *)nh + rt->rt_via_offset;
 }
 
 static const u8 *mpls_nh_via(const struct mpls_route *rt,
@@ -187,21 +189,32 @@ static u32 mpls_multipath_hash(struct mpls_route *rt, struct sk_buff *skb)
        return hash;
 }
 
+static struct mpls_nh *mpls_get_nexthop(struct mpls_route *rt, u8 index)
+{
+       return (struct mpls_nh *)((u8 *)rt->rt_nh + index * rt->rt_nh_size);
+}
+
+/* number of alive nexthops (rt->rt_nhn_alive) and the flags for
+ * a next hop (nh->nh_flags) are modified by netdev event handlers.
+ * Since those fields can change at any moment, use READ_ONCE to
+ * access both.
+ */
 static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt,
                                             struct sk_buff *skb)
 {
-       int alive = ACCESS_ONCE(rt->rt_nhn_alive);
        u32 hash = 0;
        int nh_index = 0;
        int n = 0;
+       u8 alive;
 
        /* No need to look further into packet if there's only
         * one path
         */
        if (rt->rt_nhn == 1)
-               goto out;
+               return rt->rt_nh;
 
-       if (alive <= 0)
+       alive = READ_ONCE(rt->rt_nhn_alive);
+       if (alive == 0)
                return NULL;
 
        hash = mpls_multipath_hash(rt, skb);
@@ -209,7 +222,9 @@ static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt,
        if (alive == rt->rt_nhn)
                goto out;
        for_nexthops(rt) {
-               if (nh->nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))
+               unsigned int nh_flags = READ_ONCE(nh->nh_flags);
+
+               if (nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))
                        continue;
                if (n == nh_index)
                        return nh;
@@ -217,11 +232,11 @@ static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt,
        } endfor_nexthops(rt);
 
 out:
-       return &rt->rt_nh[nh_index];
+       return mpls_get_nexthop(rt, nh_index);
 }
 
-static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb,
-                       struct mpls_entry_decoded dec)
+static bool mpls_egress(struct net *net, struct mpls_route *rt,
+                       struct sk_buff *skb, struct mpls_entry_decoded dec)
 {
        enum mpls_payload_type payload_type;
        bool success = false;
@@ -246,22 +261,46 @@ static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb,
        switch (payload_type) {
        case MPT_IPV4: {
                struct iphdr *hdr4 = ip_hdr(skb);
+               u8 new_ttl;
                skb->protocol = htons(ETH_P_IP);
+
+               /* If propagating TTL, take the decremented TTL from
+                * the incoming MPLS header, otherwise decrement the
+                * TTL, but only if not 0 to avoid underflow.
+                */
+               if (rt->rt_ttl_propagate == MPLS_TTL_PROP_ENABLED ||
+                   (rt->rt_ttl_propagate == MPLS_TTL_PROP_DEFAULT &&
+                    net->mpls.ip_ttl_propagate))
+                       new_ttl = dec.ttl;
+               else
+                       new_ttl = hdr4->ttl ? hdr4->ttl - 1 : 0;
+
                csum_replace2(&hdr4->check,
                              htons(hdr4->ttl << 8),
-                             htons(dec.ttl << 8));
-               hdr4->ttl = dec.ttl;
+                             htons(new_ttl << 8));
+               hdr4->ttl = new_ttl;
                success = true;
                break;
        }
        case MPT_IPV6: {
                struct ipv6hdr *hdr6 = ipv6_hdr(skb);
                skb->protocol = htons(ETH_P_IPV6);
-               hdr6->hop_limit = dec.ttl;
+
+               /* If propagating TTL, take the decremented TTL from
+                * the incoming MPLS header, otherwise decrement the
+                * hop limit, but only if not 0 to avoid underflow.
+                */
+               if (rt->rt_ttl_propagate == MPLS_TTL_PROP_ENABLED ||
+                   (rt->rt_ttl_propagate == MPLS_TTL_PROP_DEFAULT &&
+                    net->mpls.ip_ttl_propagate))
+                       hdr6->hop_limit = dec.ttl;
+               else if (hdr6->hop_limit)
+                       hdr6->hop_limit = hdr6->hop_limit - 1;
                success = true;
                break;
        }
        case MPT_UNSPEC:
+               /* Should have decided which protocol it is by now */
                break;
        }
 
@@ -361,7 +400,7 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev,
 
        if (unlikely(!new_header_size && dec.bos)) {
                /* Penultimate hop popping */
-               if (!mpls_egress(rt, skb, dec))
+               if (!mpls_egress(dev_net(out_dev), rt, skb, dec))
                        goto err;
        } else {
                bool bos;
@@ -412,6 +451,7 @@ static struct packet_type mpls_packet_type __read_mostly = {
 static const struct nla_policy rtm_mpls_policy[RTA_MAX+1] = {
        [RTA_DST]               = { .type = NLA_U32 },
        [RTA_OIF]               = { .type = NLA_U32 },
+       [RTA_TTL_PROPAGATE]     = { .type = NLA_U8 },
 };
 
 struct mpls_route_config {
@@ -421,6 +461,7 @@ struct mpls_route_config {
        u8                      rc_via_alen;
        u8                      rc_via[MAX_VIA_ALEN];
        u32                     rc_label;
+       u8                      rc_ttl_propagate;
        u8                      rc_output_labels;
        u32                     rc_output_label[MAX_NEW_LABELS];
        u32                     rc_nlflags;
@@ -430,20 +471,27 @@ struct mpls_route_config {
        int                     rc_mp_len;
 };
 
-static struct mpls_route *mpls_rt_alloc(int num_nh, u8 max_alen)
+/* all nexthops within a route have the same size based on max
+ * number of labels and max via length for a hop
+ */
+static struct mpls_route *mpls_rt_alloc(u8 num_nh, u8 max_alen, u8 max_labels)
 {
-       u8 max_alen_aligned = ALIGN(max_alen, VIA_ALEN_ALIGN);
+       u8 nh_size = MPLS_NH_SIZE(max_labels, max_alen);
        struct mpls_route *rt;
+       size_t size;
 
-       rt = kzalloc(ALIGN(sizeof(*rt) + num_nh * sizeof(*rt->rt_nh),
-                          VIA_ALEN_ALIGN) +
-                    num_nh * max_alen_aligned,
-                    GFP_KERNEL);
-       if (rt) {
-               rt->rt_nhn = num_nh;
-               rt->rt_nhn_alive = num_nh;
-               rt->rt_max_alen = max_alen_aligned;
-       }
+       size = sizeof(*rt) + num_nh * nh_size;
+       if (size > MAX_MPLS_ROUTE_MEM)
+               return ERR_PTR(-EINVAL);
+
+       rt = kzalloc(size, GFP_KERNEL);
+       if (!rt)
+               return ERR_PTR(-ENOMEM);
+
+       rt->rt_nhn = num_nh;
+       rt->rt_nhn_alive = num_nh;
+       rt->rt_nh_size = nh_size;
+       rt->rt_via_offset = MPLS_NH_VIA_OFF(max_labels);
 
        return rt;
 }
@@ -648,9 +696,6 @@ static int mpls_nh_build_from_cfg(struct mpls_route_config *cfg,
                return -ENOMEM;
 
        err = -EINVAL;
-       /* Ensure only a supported number of labels are present */
-       if (cfg->rc_output_labels > MAX_NEW_LABELS)
-               goto errout;
 
        nh->nh_labels = cfg->rc_output_labels;
        for (i = 0; i < nh->nh_labels; i++)
@@ -675,7 +720,7 @@ errout:
 
 static int mpls_nh_build(struct net *net, struct mpls_route *rt,
                         struct mpls_nh *nh, int oif, struct nlattr *via,
-                        struct nlattr *newdst)
+                        struct nlattr *newdst, u8 max_labels)
 {
        int err = -ENOMEM;
 
@@ -683,7 +728,7 @@ static int mpls_nh_build(struct net *net, struct mpls_route *rt,
                goto errout;
 
        if (newdst) {
-               err = nla_get_labels(newdst, MAX_NEW_LABELS,
+               err = nla_get_labels(newdst, max_labels,
                                     &nh->nh_labels, nh->nh_label);
                if (err)
                        goto errout;
@@ -708,22 +753,20 @@ errout:
        return err;
 }
 
-static int mpls_count_nexthops(struct rtnexthop *rtnh, int len,
-                              u8 cfg_via_alen, u8 *max_via_alen)
+static u8 mpls_count_nexthops(struct rtnexthop *rtnh, int len,
+                             u8 cfg_via_alen, u8 *max_via_alen,
+                             u8 *max_labels)
 {
-       int nhs = 0;
        int remaining = len;
-
-       if (!rtnh) {
-               *max_via_alen = cfg_via_alen;
-               return 1;
-       }
+       u8 nhs = 0;
 
        *max_via_alen = 0;
+       *max_labels = 0;
 
        while (rtnh_ok(rtnh, remaining)) {
                struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
                int attrlen;
+               u8 n_labels = 0;
 
                attrlen = rtnh_attrlen(rtnh);
                nla = nla_find(attrs, attrlen, RTA_VIA);
@@ -737,7 +780,20 @@ static int mpls_count_nexthops(struct rtnexthop *rtnh, int len,
                                                      via_alen);
                }
 
+               nla = nla_find(attrs, attrlen, RTA_NEWDST);
+               if (nla &&
+                   nla_get_labels(nla, MAX_NEW_LABELS, &n_labels, NULL) != 0)
+                       return 0;
+
+               *max_labels = max_t(u8, *max_labels, n_labels);
+
+               /* number of nexthops is tracked by a u8.
+                * Check for overflow.
+                */
+               if (nhs == 255)
+                       return 0;
                nhs++;
+
                rtnh = rtnh_next(rtnh, &remaining);
        }
 
@@ -746,13 +802,13 @@ static int mpls_count_nexthops(struct rtnexthop *rtnh, int len,
 }
 
 static int mpls_nh_build_multi(struct mpls_route_config *cfg,
-                              struct mpls_route *rt)
+                              struct mpls_route *rt, u8 max_labels)
 {
        struct rtnexthop *rtnh = cfg->rc_mp;
        struct nlattr *nla_via, *nla_newdst;
        int remaining = cfg->rc_mp_len;
-       int nhs = 0;
        int err = 0;
+       u8 nhs = 0;
 
        change_nexthops(rt) {
                int attrlen;
@@ -779,7 +835,8 @@ static int mpls_nh_build_multi(struct mpls_route_config *cfg,
                }
 
                err = mpls_nh_build(cfg->rc_nlinfo.nl_net, rt, nh,
-                                   rtnh->rtnh_ifindex, nla_via, nla_newdst);
+                                   rtnh->rtnh_ifindex, nla_via, nla_newdst,
+                                   max_labels);
                if (err)
                        goto errout;
 
@@ -806,7 +863,8 @@ static int mpls_route_add(struct mpls_route_config *cfg)
        int err = -EINVAL;
        u8 max_via_alen;
        unsigned index;
-       int nhs;
+       u8 max_labels;
+       u8 nhs;
 
        index = cfg->rc_label;
 
@@ -844,21 +902,32 @@ static int mpls_route_add(struct mpls_route_config *cfg)
                goto errout;
 
        err = -EINVAL;
-       nhs = mpls_count_nexthops(cfg->rc_mp, cfg->rc_mp_len,
-                                 cfg->rc_via_alen, &max_via_alen);
+       if (cfg->rc_mp) {
+               nhs = mpls_count_nexthops(cfg->rc_mp, cfg->rc_mp_len,
+                                         cfg->rc_via_alen, &max_via_alen,
+                                         &max_labels);
+       } else {
+               max_via_alen = cfg->rc_via_alen;
+               max_labels = cfg->rc_output_labels;
+               nhs = 1;
+       }
+
        if (nhs == 0)
                goto errout;
 
        err = -ENOMEM;
-       rt = mpls_rt_alloc(nhs, max_via_alen);
-       if (!rt)
+       rt = mpls_rt_alloc(nhs, max_via_alen, max_labels);
+       if (IS_ERR(rt)) {
+               err = PTR_ERR(rt);
                goto errout;
+       }
 
        rt->rt_protocol = cfg->rc_protocol;
        rt->rt_payload_type = cfg->rc_payload_type;
+       rt->rt_ttl_propagate = cfg->rc_ttl_propagate;
 
        if (cfg->rc_mp)
-               err = mpls_nh_build_multi(cfg, rt);
+               err = mpls_nh_build_multi(cfg, rt, max_labels);
        else
                err = mpls_nh_build_from_cfg(cfg, rt);
        if (err)
@@ -1011,8 +1080,8 @@ static int mpls_netconf_msgsize_devconf(int type)
        return size;
 }
 
-static void mpls_netconf_notify_devconf(struct net *net, int type,
-                                       struct mpls_dev *mdev)
+static void mpls_netconf_notify_devconf(struct net *net, int event,
+                                       int type, struct mpls_dev *mdev)
 {
        struct sk_buff *skb;
        int err = -ENOBUFS;
@@ -1021,8 +1090,7 @@ static void mpls_netconf_notify_devconf(struct net *net, int type,
        if (!skb)
                goto errout;
 
-       err = mpls_netconf_fill_devconf(skb, mdev, 0, 0, RTM_NEWNETCONF,
-                                       0, type);
+       err = mpls_netconf_fill_devconf(skb, mdev, 0, 0, event, 0, type);
        if (err < 0) {
                /* -EMSGSIZE implies BUG in mpls_netconf_msgsize_devconf() */
                WARN_ON(err == -EMSGSIZE);
@@ -1155,9 +1223,8 @@ static int mpls_conf_proc(struct ctl_table *ctl, int write,
 
                if (i == offsetof(struct mpls_dev, input_enabled) &&
                    val != oval) {
-                       mpls_netconf_notify_devconf(net,
-                                                   NETCONFA_INPUT,
-                                                   mdev);
+                       mpls_netconf_notify_devconf(net, RTM_NEWNETCONF,
+                                                   NETCONFA_INPUT, mdev);
                }
        }
 
@@ -1198,10 +1265,11 @@ static int mpls_dev_sysctl_register(struct net_device *dev,
 
        snprintf(path, sizeof(path), "net/mpls/conf/%s", dev->name);
 
-       mdev->sysctl = register_net_sysctl(dev_net(dev), path, table);
+       mdev->sysctl = register_net_sysctl(net, path, table);
        if (!mdev->sysctl)
                goto free;
 
+       mpls_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL, mdev);
        return 0;
 
 free:
@@ -1210,13 +1278,17 @@ out:
        return -ENOBUFS;
 }
 
-static void mpls_dev_sysctl_unregister(struct mpls_dev *mdev)
+static void mpls_dev_sysctl_unregister(struct net_device *dev,
+                                      struct mpls_dev *mdev)
 {
+       struct net *net = dev_net(dev);
        struct ctl_table *table;
 
        table = mdev->sysctl->ctl_table_arg;
        unregister_net_sysctl_table(mdev->sysctl);
        kfree(table);
+
+       mpls_netconf_notify_devconf(net, RTM_DELNETCONF, 0, mdev);
 }
 
 static struct mpls_dev *mpls_add_dev(struct net_device *dev)
@@ -1242,11 +1314,12 @@ static struct mpls_dev *mpls_add_dev(struct net_device *dev)
                u64_stats_init(&mpls_stats->syncp);
        }
 
+       mdev->dev = dev;
+
        err = mpls_dev_sysctl_register(dev, mdev);
        if (err)
                goto free;
 
-       mdev->dev = dev;
        rcu_assign_pointer(dev->mpls_ptr, mdev);
 
        return mdev;
@@ -1269,6 +1342,7 @@ static void mpls_ifdown(struct net_device *dev, int event)
 {
        struct mpls_route __rcu **platform_label;
        struct net *net = dev_net(dev);
+       u8 alive, deleted;
        unsigned index;
 
        platform_label = rtnl_dereference(net->mpls.platform_label);
@@ -1278,32 +1352,49 @@ static void mpls_ifdown(struct net_device *dev, int event)
                if (!rt)
                        continue;
 
+               alive = 0;
+               deleted = 0;
                change_nexthops(rt) {
+                       unsigned int nh_flags = nh->nh_flags;
+
                        if (rtnl_dereference(nh->nh_dev) != dev)
-                               continue;
+                               goto next;
+
                        switch (event) {
                        case NETDEV_DOWN:
                        case NETDEV_UNREGISTER:
-                               nh->nh_flags |= RTNH_F_DEAD;
+                               nh_flags |= RTNH_F_DEAD;
                                /* fall through */
                        case NETDEV_CHANGE:
-                               nh->nh_flags |= RTNH_F_LINKDOWN;
-                               if (event != NETDEV_UNREGISTER)
-                                       ACCESS_ONCE(rt->rt_nhn_alive) = rt->rt_nhn_alive - 1;
+                               nh_flags |= RTNH_F_LINKDOWN;
                                break;
                        }
                        if (event == NETDEV_UNREGISTER)
                                RCU_INIT_POINTER(nh->nh_dev, NULL);
+
+                       if (nh->nh_flags != nh_flags)
+                               WRITE_ONCE(nh->nh_flags, nh_flags);
+next:
+                       if (!(nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)))
+                               alive++;
+                       if (!rtnl_dereference(nh->nh_dev))
+                               deleted++;
                } endfor_nexthops(rt);
+
+               WRITE_ONCE(rt->rt_nhn_alive, alive);
+
+               /* if there are no more nexthops, delete the route */
+               if (event == NETDEV_UNREGISTER && deleted == rt->rt_nhn)
+                       mpls_route_update(net, index, NULL, NULL);
        }
 }
 
-static void mpls_ifup(struct net_device *dev, unsigned int nh_flags)
+static void mpls_ifup(struct net_device *dev, unsigned int flags)
 {
        struct mpls_route __rcu **platform_label;
        struct net *net = dev_net(dev);
        unsigned index;
-       int alive;
+       u8 alive;
 
        platform_label = rtnl_dereference(net->mpls.platform_label);
        for (index = 0; index < net->mpls.platform_labels; index++) {
@@ -1314,20 +1405,22 @@ static void mpls_ifup(struct net_device *dev, unsigned int nh_flags)
 
                alive = 0;
                change_nexthops(rt) {
+                       unsigned int nh_flags = nh->nh_flags;
                        struct net_device *nh_dev =
                                rtnl_dereference(nh->nh_dev);
 
-                       if (!(nh->nh_flags & nh_flags)) {
+                       if (!(nh_flags & flags)) {
                                alive++;
                                continue;
                        }
                        if (nh_dev != dev)
                                continue;
                        alive++;
-                       nh->nh_flags &= ~nh_flags;
+                       nh_flags &= ~flags;
+                       WRITE_ONCE(nh->nh_flags, flags);
                } endfor_nexthops(rt);
 
-               ACCESS_ONCE(rt->rt_nhn_alive) = alive;
+               WRITE_ONCE(rt->rt_nhn_alive, alive);
        }
 }
 
@@ -1378,7 +1471,7 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
                mpls_ifdown(dev, event);
                mdev = mpls_dev_get(dev);
                if (mdev) {
-                       mpls_dev_sysctl_unregister(mdev);
+                       mpls_dev_sysctl_unregister(dev, mdev);
                        RCU_INIT_POINTER(dev->mpls_ptr, NULL);
                        call_rcu(&mdev->rcu, mpls_dev_destroy_rcu);
                }
@@ -1388,7 +1481,7 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
                if (mdev) {
                        int err;
 
-                       mpls_dev_sysctl_unregister(mdev);
+                       mpls_dev_sysctl_unregister(dev, mdev);
                        err = mpls_dev_sysctl_register(dev, mdev);
                        if (err)
                                return notifier_from_errno(err);
@@ -1448,16 +1541,18 @@ int nla_put_labels(struct sk_buff *skb, int attrtype,
 EXPORT_SYMBOL_GPL(nla_put_labels);
 
 int nla_get_labels(const struct nlattr *nla,
-                  u32 max_labels, u8 *labels, u32 label[])
+                  u8 max_labels, u8 *labels, u32 label[])
 {
        unsigned len = nla_len(nla);
-       unsigned nla_labels;
        struct mpls_shim_hdr *nla_label;
+       u8 nla_labels;
        bool bos;
        int i;
 
-       /* len needs to be an even multiple of 4 (the label size) */
-       if (len & 3)
+       /* len needs to be an even multiple of 4 (the label size). Number
+        * of labels is a u8 so check for overflow.
+        */
+       if (len & 3 || len / 4 > 255)
                return -EINVAL;
 
        /* Limit the number of new labels allowed */
@@ -1465,6 +1560,10 @@ int nla_get_labels(const struct nlattr *nla,
        if (nla_labels > max_labels)
                return -EINVAL;
 
+       /* when label == NULL, caller wants number of labels */
+       if (!label)
+               goto out;
+
        nla_label = nla_data(nla);
        bos = true;
        for (i = nla_labels - 1; i >= 0; i--, bos = false) {
@@ -1488,6 +1587,7 @@ int nla_get_labels(const struct nlattr *nla,
 
                label[i] = dec.label;
        }
+out:
        *labels = nla_labels;
        return 0;
 }
@@ -1549,7 +1649,6 @@ static int rtm_to_route_config(struct sk_buff *skb,  struct nlmsghdr *nlh,
 
        err = -EINVAL;
        rtm = nlmsg_data(nlh);
-       memset(cfg, 0, sizeof(*cfg));
 
        if (rtm->rtm_family != AF_MPLS)
                goto errout;
@@ -1577,6 +1676,7 @@ static int rtm_to_route_config(struct sk_buff *skb,  struct nlmsghdr *nlh,
        cfg->rc_label           = LABEL_NOT_SPECIFIED;
        cfg->rc_protocol        = rtm->rtm_protocol;
        cfg->rc_via_table       = MPLS_NEIGH_TABLE_UNSPEC;
+       cfg->rc_ttl_propagate   = MPLS_TTL_PROP_DEFAULT;
        cfg->rc_nlflags         = nlh->nlmsg_flags;
        cfg->rc_nlinfo.portid   = NETLINK_CB(skb).portid;
        cfg->rc_nlinfo.nlh      = nlh;
@@ -1623,6 +1723,17 @@ static int rtm_to_route_config(struct sk_buff *skb,  struct nlmsghdr *nlh,
                        cfg->rc_mp_len = nla_len(nla);
                        break;
                }
+               case RTA_TTL_PROPAGATE:
+               {
+                       u8 ttl_propagate = nla_get_u8(nla);
+
+                       if (ttl_propagate > 1)
+                               goto errout;
+                       cfg->rc_ttl_propagate = ttl_propagate ?
+                               MPLS_TTL_PROP_ENABLED :
+                               MPLS_TTL_PROP_DISABLED;
+                       break;
+               }
                default:
                        /* Unsupported attribute */
                        goto errout;
@@ -1636,27 +1747,43 @@ errout:
 
 static int mpls_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
-       struct mpls_route_config cfg;
+       struct mpls_route_config *cfg;
        int err;
 
-       err = rtm_to_route_config(skb, nlh, &cfg);
+       cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
+       if (!cfg)
+               return -ENOMEM;
+
+       err = rtm_to_route_config(skb, nlh, cfg);
        if (err < 0)
-               return err;
+               goto out;
 
-       return mpls_route_del(&cfg);
+       err = mpls_route_del(cfg);
+out:
+       kfree(cfg);
+
+       return err;
 }
 
 
 static int mpls_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
-       struct mpls_route_config cfg;
+       struct mpls_route_config *cfg;
        int err;
 
-       err = rtm_to_route_config(skb, nlh, &cfg);
+       cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
+       if (!cfg)
+               return -ENOMEM;
+
+       err = rtm_to_route_config(skb, nlh, cfg);
        if (err < 0)
-               return err;
+               goto out;
 
-       return mpls_route_add(&cfg);
+       err = mpls_route_add(cfg);
+out:
+       kfree(cfg);
+
+       return err;
 }
 
 static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event,
@@ -1683,6 +1810,15 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event,
 
        if (nla_put_labels(skb, RTA_DST, 1, &label))
                goto nla_put_failure;
+
+       if (rt->rt_ttl_propagate != MPLS_TTL_PROP_DEFAULT) {
+               bool ttl_propagate =
+                       rt->rt_ttl_propagate == MPLS_TTL_PROP_ENABLED;
+
+               if (nla_put_u8(skb, RTA_TTL_PROPAGATE,
+                              ttl_propagate))
+                       goto nla_put_failure;
+       }
        if (rt->rt_nhn == 1) {
                const struct mpls_nh *nh = rt->rt_nh;
 
@@ -1704,21 +1840,23 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event,
        } else {
                struct rtnexthop *rtnh;
                struct nlattr *mp;
-               int dead = 0;
-               int linkdown = 0;
+               u8 linkdown = 0;
+               u8 dead = 0;
 
                mp = nla_nest_start(skb, RTA_MULTIPATH);
                if (!mp)
                        goto nla_put_failure;
 
                for_nexthops(rt) {
+                       dev = rtnl_dereference(nh->nh_dev);
+                       if (!dev)
+                               continue;
+
                        rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
                        if (!rtnh)
                                goto nla_put_failure;
 
-                       dev = rtnl_dereference(nh->nh_dev);
-                       if (dev)
-                               rtnh->rtnh_ifindex = dev->ifindex;
+                       rtnh->rtnh_ifindex = dev->ifindex;
                        if (nh->nh_flags & RTNH_F_LINKDOWN) {
                                rtnh->rtnh_flags |= RTNH_F_LINKDOWN;
                                linkdown++;
@@ -1793,7 +1931,8 @@ static inline size_t lfib_nlmsg_size(struct mpls_route *rt)
 {
        size_t payload =
                NLMSG_ALIGN(sizeof(struct rtmsg))
-               + nla_total_size(4);                    /* RTA_DST */
+               + nla_total_size(4)                     /* RTA_DST */
+               + nla_total_size(1);                    /* RTA_TTL_PROPAGATE */
 
        if (rt->rt_nhn == 1) {
                struct mpls_nh *nh = rt->rt_nh;
@@ -1809,6 +1948,8 @@ static inline size_t lfib_nlmsg_size(struct mpls_route *rt)
                size_t nhsize = 0;
 
                for_nexthops(rt) {
+                       if (!rtnl_dereference(nh->nh_dev))
+                               continue;
                        nhsize += nla_total_size(sizeof(struct rtnexthop));
                        /* RTA_VIA */
                        if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC)
@@ -1871,12 +2012,13 @@ static int resize_platform_label_table(struct net *net, size_t limit)
        /* In case the predefined labels need to be populated */
        if (limit > MPLS_LABEL_IPV4NULL) {
                struct net_device *lo = net->loopback_dev;
-               rt0 = mpls_rt_alloc(1, lo->addr_len);
-               if (!rt0)
+               rt0 = mpls_rt_alloc(1, lo->addr_len, 0);
+               if (IS_ERR(rt0))
                        goto nort0;
                RCU_INIT_POINTER(rt0->rt_nh->nh_dev, lo);
                rt0->rt_protocol = RTPROT_KERNEL;
                rt0->rt_payload_type = MPT_IPV4;
+               rt0->rt_ttl_propagate = MPLS_TTL_PROP_DEFAULT;
                rt0->rt_nh->nh_via_table = NEIGH_LINK_TABLE;
                rt0->rt_nh->nh_via_alen = lo->addr_len;
                memcpy(__mpls_nh_via(rt0, rt0->rt_nh), lo->dev_addr,
@@ -1884,12 +2026,13 @@ static int resize_platform_label_table(struct net *net, size_t limit)
        }
        if (limit > MPLS_LABEL_IPV6NULL) {
                struct net_device *lo = net->loopback_dev;
-               rt2 = mpls_rt_alloc(1, lo->addr_len);
-               if (!rt2)
+               rt2 = mpls_rt_alloc(1, lo->addr_len, 0);
+               if (IS_ERR(rt2))
                        goto nort2;
                RCU_INIT_POINTER(rt2->rt_nh->nh_dev, lo);
                rt2->rt_protocol = RTPROT_KERNEL;
                rt2->rt_payload_type = MPT_IPV6;
+               rt2->rt_ttl_propagate = MPLS_TTL_PROP_DEFAULT;
                rt2->rt_nh->nh_via_table = NEIGH_LINK_TABLE;
                rt2->rt_nh->nh_via_alen = lo->addr_len;
                memcpy(__mpls_nh_via(rt2, rt2->rt_nh), lo->dev_addr,
@@ -1971,6 +2114,9 @@ static int mpls_platform_labels(struct ctl_table *table, int write,
        return ret;
 }
 
+#define MPLS_NS_SYSCTL_OFFSET(field)           \
+       (&((struct net *)0)->field)
+
 static const struct ctl_table mpls_table[] = {
        {
                .procname       = "platform_labels",
@@ -1979,21 +2125,47 @@ static const struct ctl_table mpls_table[] = {
                .mode           = 0644,
                .proc_handler   = mpls_platform_labels,
        },
+       {
+               .procname       = "ip_ttl_propagate",
+               .data           = MPLS_NS_SYSCTL_OFFSET(mpls.ip_ttl_propagate),
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &zero,
+               .extra2         = &one,
+       },
+       {
+               .procname       = "default_ttl",
+               .data           = MPLS_NS_SYSCTL_OFFSET(mpls.default_ttl),
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &one,
+               .extra2         = &ttl_max,
+       },
        { }
 };
 
 static int mpls_net_init(struct net *net)
 {
        struct ctl_table *table;
+       int i;
 
        net->mpls.platform_labels = 0;
        net->mpls.platform_label = NULL;
+       net->mpls.ip_ttl_propagate = 1;
+       net->mpls.default_ttl = 255;
 
        table = kmemdup(mpls_table, sizeof(mpls_table), GFP_KERNEL);
        if (table == NULL)
                return -ENOMEM;
 
-       table[0].data = net;
+       /* Table data contains only offsets relative to the base of
+        * the mdev at this point, so make them absolute.
+        */
+       for (i = 0; i < ARRAY_SIZE(mpls_table) - 1; i++)
+               table[i].data = (char *)net + (uintptr_t)table[i].data;
+
        net->mpls.ctl = register_net_sysctl(net, "net/mpls", table);
        if (net->mpls.ctl == NULL) {
                kfree(table);
index 76360d8b95798e148c5d6a48ce3375eeadcad5a5..4db6a59713220dcefac8be970ab06b1ae25bcd5c 100644 (file)
@@ -2,6 +2,11 @@
 #define MPLS_INTERNAL_H
 #include <net/mpls.h>
 
+/* put a reasonable limit on the number of labels
+ * we will accept from userspace
+ */
+#define MAX_NEW_LABELS 30
+
 struct mpls_entry_decoded {
        u32 label;
        u8 ttl;
@@ -64,7 +69,6 @@ struct mpls_dev {
 struct sk_buff;
 
 #define LABEL_NOT_SPECIFIED (1 << 20)
-#define MAX_NEW_LABELS 2
 
 /* This maximum ha length copied from the definition of struct neighbour */
 #define VIA_ALEN_ALIGN sizeof(unsigned long)
@@ -83,11 +87,35 @@ enum mpls_payload_type {
 
 struct mpls_nh { /* next hop label forwarding entry */
        struct net_device __rcu *nh_dev;
+
+       /* nh_flags is accessed under RCU in the packet path; it is
+        * modified handling netdev events with rtnl lock held
+        */
        unsigned int            nh_flags;
-       u32                     nh_label[MAX_NEW_LABELS];
        u8                      nh_labels;
        u8                      nh_via_alen;
        u8                      nh_via_table;
+       u8                      nh_reserved1;
+
+       u32                     nh_label[0];
+};
+
+/* offset of via from beginning of mpls_nh */
+#define MPLS_NH_VIA_OFF(num_labels) \
+               ALIGN(sizeof(struct mpls_nh) + (num_labels) * sizeof(u32), \
+                     VIA_ALEN_ALIGN)
+
+/* all nexthops within a route have the same size based on the
+ * max number of labels and max via length across all nexthops
+ */
+#define MPLS_NH_SIZE(num_labels, max_via_alen)         \
+               (MPLS_NH_VIA_OFF((num_labels)) +        \
+               ALIGN((max_via_alen), VIA_ALEN_ALIGN))
+
+enum mpls_ttl_propagation {
+       MPLS_TTL_PROP_DEFAULT,
+       MPLS_TTL_PROP_ENABLED,
+       MPLS_TTL_PROP_DISABLED,
 };
 
 /* The route, nexthops and vias are stored together in the same memory
@@ -98,16 +126,16 @@ struct mpls_nh { /* next hop label forwarding entry */
  * +----------------------+
  * | mpls_nh 0            |
  * +----------------------+
- * | ...                  |
- * +----------------------+
- * | mpls_nh n-1          |
- * +----------------------+
- * | alignment padding    |
+ * | alignment padding    |   4 bytes for odd number of labels
  * +----------------------+
  * | via[rt_max_alen] 0   |
  * +----------------------+
+ * | alignment padding    |   via's aligned on sizeof(unsigned long)
+ * +----------------------+
  * | ...                  |
  * +----------------------+
+ * | mpls_nh n-1          |
+ * +----------------------+
  * | via[rt_max_alen] n-1 |
  * +----------------------+
  */
@@ -116,22 +144,30 @@ struct mpls_route { /* next hop label forwarding entry */
        u8                      rt_protocol;
        u8                      rt_payload_type;
        u8                      rt_max_alen;
-       unsigned int            rt_nhn;
-       unsigned int            rt_nhn_alive;
+       u8                      rt_ttl_propagate;
+       u8                      rt_nhn;
+       /* rt_nhn_alive is accessed under RCU in the packet path; it
+        * is modified handling netdev events with rtnl lock held
+        */
+       u8                      rt_nhn_alive;
+       u8                      rt_nh_size;
+       u8                      rt_via_offset;
+       u8                      rt_reserved1;
        struct mpls_nh          rt_nh[0];
 };
 
 #define for_nexthops(rt) {                                             \
-       int nhsel; struct mpls_nh *nh;                  \
-       for (nhsel = 0, nh = (rt)->rt_nh;                               \
+       int nhsel; struct mpls_nh *nh;  u8 *__nh;                       \
+       for (nhsel = 0, nh = (rt)->rt_nh, __nh = (u8 *)((rt)->rt_nh);   \
             nhsel < (rt)->rt_nhn;                                      \
-            nh++, nhsel++)
+            __nh += rt->rt_nh_size, nh = (struct mpls_nh *)__nh, nhsel++)
 
 #define change_nexthops(rt) {                                          \
-       int nhsel; struct mpls_nh *nh;                          \
-       for (nhsel = 0, nh = (struct mpls_nh *)((rt)->rt_nh);   \
+       int nhsel; struct mpls_nh *nh; u8 *__nh;                        \
+       for (nhsel = 0, nh = (struct mpls_nh *)((rt)->rt_nh),           \
+                       __nh = (u8 *)((rt)->rt_nh);                     \
             nhsel < (rt)->rt_nhn;                                      \
-            nh++, nhsel++)
+            __nh += rt->rt_nh_size, nh = (struct mpls_nh *)__nh, nhsel++)
 
 #define endfor_nexthops(rt) }
 
@@ -166,7 +202,7 @@ static inline struct mpls_dev *mpls_dev_get(const struct net_device *dev)
 
 int nla_put_labels(struct sk_buff *skb, int attrtype,  u8 labels,
                   const u32 label[]);
-int nla_get_labels(const struct nlattr *nla, u32 max_labels, u8 *labels,
+int nla_get_labels(const struct nlattr *nla, u8 max_labels, u8 *labels,
                   u32 label[]);
 int nla_get_via(const struct nlattr *nla, u8 *via_alen, u8 *via_table,
                u8 via[]);
index e4e4424f9eb1f5531d22463687d74c2e2ca971a6..fe00e98667cf603c5f01150fb4899f98c86c8496 100644 (file)
@@ -29,6 +29,7 @@
 
 static const struct nla_policy mpls_iptunnel_policy[MPLS_IPTUNNEL_MAX + 1] = {
        [MPLS_IPTUNNEL_DST]     = { .type = NLA_U32 },
+       [MPLS_IPTUNNEL_TTL]     = { .type = NLA_U8 },
 };
 
 static unsigned int mpls_encap_size(struct mpls_iptunnel_encap *en)
@@ -49,6 +50,7 @@ static int mpls_xmit(struct sk_buff *skb)
        struct rtable *rt = NULL;
        struct rt6_info *rt6 = NULL;
        struct mpls_dev *out_mdev;
+       struct net *net;
        int err = 0;
        bool bos;
        int i;
@@ -56,17 +58,7 @@ static int mpls_xmit(struct sk_buff *skb)
 
        /* Find the output device */
        out_dev = dst->dev;
-
-       /* Obtain the ttl */
-       if (dst->ops->family == AF_INET) {
-               ttl = ip_hdr(skb)->ttl;
-               rt = (struct rtable *)dst;
-       } else if (dst->ops->family == AF_INET6) {
-               ttl = ipv6_hdr(skb)->hop_limit;
-               rt6 = (struct rt6_info *)dst;
-       } else {
-               goto drop;
-       }
+       net = dev_net(out_dev);
 
        skb_orphan(skb);
 
@@ -78,6 +70,38 @@ static int mpls_xmit(struct sk_buff *skb)
 
        tun_encap_info = mpls_lwtunnel_encap(dst->lwtstate);
 
+       /* Obtain the ttl using the following set of rules.
+        *
+        * LWT ttl propagation setting:
+        *  - disabled => use default TTL value from LWT
+        *  - enabled  => use TTL value from IPv4/IPv6 header
+        *  - default  =>
+        *   Global ttl propagation setting:
+        *    - disabled => use default TTL value from global setting
+        *    - enabled => use TTL value from IPv4/IPv6 header
+        */
+       if (dst->ops->family == AF_INET) {
+               if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DISABLED)
+                       ttl = tun_encap_info->default_ttl;
+               else if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DEFAULT &&
+                        !net->mpls.ip_ttl_propagate)
+                       ttl = net->mpls.default_ttl;
+               else
+                       ttl = ip_hdr(skb)->ttl;
+               rt = (struct rtable *)dst;
+       } else if (dst->ops->family == AF_INET6) {
+               if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DISABLED)
+                       ttl = tun_encap_info->default_ttl;
+               else if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DEFAULT &&
+                        !net->mpls.ip_ttl_propagate)
+                       ttl = net->mpls.default_ttl;
+               else
+                       ttl = ipv6_hdr(skb)->hop_limit;
+               rt6 = (struct rt6_info *)dst;
+       } else {
+               goto drop;
+       }
+
        /* Verify the destination can hold the packet */
        new_header_size = mpls_encap_size(tun_encap_info);
        mtu = mpls_dev_mtu(out_dev);
@@ -140,6 +164,7 @@ static int mpls_build_state(struct nlattr *nla,
        struct mpls_iptunnel_encap *tun_encap_info;
        struct nlattr *tb[MPLS_IPTUNNEL_MAX + 1];
        struct lwtunnel_state *newts;
+       u8 n_labels;
        int ret;
 
        ret = nla_parse_nested(tb, MPLS_IPTUNNEL_MAX, nla,
@@ -151,15 +176,32 @@ static int mpls_build_state(struct nlattr *nla,
                return -EINVAL;
 
 
-       newts = lwtunnel_state_alloc(sizeof(*tun_encap_info));
+       /* determine number of labels */
+       if (nla_get_labels(tb[MPLS_IPTUNNEL_DST],
+                          MAX_NEW_LABELS, &n_labels, NULL))
+               return -EINVAL;
+
+       newts = lwtunnel_state_alloc(sizeof(*tun_encap_info) +
+                                    n_labels * sizeof(u32));
        if (!newts)
                return -ENOMEM;
 
        tun_encap_info = mpls_lwtunnel_encap(newts);
-       ret = nla_get_labels(tb[MPLS_IPTUNNEL_DST], MAX_NEW_LABELS,
+       ret = nla_get_labels(tb[MPLS_IPTUNNEL_DST], n_labels,
                             &tun_encap_info->labels, tun_encap_info->label);
        if (ret)
                goto errout;
+
+       tun_encap_info->ttl_propagate = MPLS_TTL_PROP_DEFAULT;
+
+       if (tb[MPLS_IPTUNNEL_TTL]) {
+               tun_encap_info->default_ttl = nla_get_u8(tb[MPLS_IPTUNNEL_TTL]);
+               /* TTL 0 implies propagate from IP header */
+               tun_encap_info->ttl_propagate = tun_encap_info->default_ttl ?
+                       MPLS_TTL_PROP_DISABLED :
+                       MPLS_TTL_PROP_ENABLED;
+       }
+
        newts->type = LWTUNNEL_ENCAP_MPLS;
        newts->flags |= LWTUNNEL_STATE_XMIT_REDIRECT;
        newts->headroom = mpls_encap_size(tun_encap_info);
@@ -186,6 +228,10 @@ static int mpls_fill_encap_info(struct sk_buff *skb,
                           tun_encap_info->label))
                goto nla_put_failure;
 
+       if (tun_encap_info->ttl_propagate != MPLS_TTL_PROP_DEFAULT &&
+           nla_put_u8(skb, MPLS_IPTUNNEL_TTL, tun_encap_info->default_ttl))
+               goto nla_put_failure;
+
        return 0;
 
 nla_put_failure:
@@ -195,10 +241,16 @@ nla_put_failure:
 static int mpls_encap_nlsize(struct lwtunnel_state *lwtstate)
 {
        struct mpls_iptunnel_encap *tun_encap_info;
+       int nlsize;
 
        tun_encap_info = mpls_lwtunnel_encap(lwtstate);
 
-       return nla_total_size(tun_encap_info->labels * 4);
+       nlsize = nla_total_size(tun_encap_info->labels * 4);
+
+       if (tun_encap_info->ttl_propagate != MPLS_TTL_PROP_DEFAULT)
+               nlsize += nla_total_size(1);
+
+       return nlsize;
 }
 
 static int mpls_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
@@ -207,10 +259,12 @@ static int mpls_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
        struct mpls_iptunnel_encap *b_hdr = mpls_lwtunnel_encap(b);
        int l;
 
-       if (a_hdr->labels != b_hdr->labels)
+       if (a_hdr->labels != b_hdr->labels ||
+           a_hdr->ttl_propagate != b_hdr->ttl_propagate ||
+           a_hdr->default_ttl != b_hdr->default_ttl)
                return 1;
 
-       for (l = 0; l < MAX_NEW_LABELS; l++)
+       for (l = 0; l < a_hdr->labels; l++)
                if (a_hdr->label[l] != b_hdr->label[l])
                        return 1;
        return 0;
index e6a2753dff9e91dac406e657ad0d49875f052503..3d2ac71a83ec411294361037e7b1d77b6d4bb7e2 100644 (file)
@@ -181,7 +181,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
 
        if (!(cp->flags & IP_VS_CONN_F_HASHED)) {
                cp->flags |= IP_VS_CONN_F_HASHED;
-               atomic_inc(&cp->refcnt);
+               refcount_inc(&cp->refcnt);
                hlist_add_head_rcu(&cp->c_list, &ip_vs_conn_tab[hash]);
                ret = 1;
        } else {
@@ -215,7 +215,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
        if (cp->flags & IP_VS_CONN_F_HASHED) {
                hlist_del_rcu(&cp->c_list);
                cp->flags &= ~IP_VS_CONN_F_HASHED;
-               atomic_dec(&cp->refcnt);
+               refcount_dec(&cp->refcnt);
                ret = 1;
        } else
                ret = 0;
@@ -242,13 +242,13 @@ static inline bool ip_vs_conn_unlink(struct ip_vs_conn *cp)
        if (cp->flags & IP_VS_CONN_F_HASHED) {
                ret = false;
                /* Decrease refcnt and unlink conn only if we are last user */
-               if (atomic_cmpxchg(&cp->refcnt, 1, 0) == 1) {
+               if (refcount_dec_if_one(&cp->refcnt)) {
                        hlist_del_rcu(&cp->c_list);
                        cp->flags &= ~IP_VS_CONN_F_HASHED;
                        ret = true;
                }
        } else
-               ret = atomic_read(&cp->refcnt) ? false : true;
+               ret = refcount_read(&cp->refcnt) ? false : true;
 
        spin_unlock(&cp->lock);
        ct_write_unlock_bh(hash);
@@ -475,7 +475,7 @@ static void __ip_vs_conn_put_timer(struct ip_vs_conn *cp)
 void ip_vs_conn_put(struct ip_vs_conn *cp)
 {
        if ((cp->flags & IP_VS_CONN_F_ONE_PACKET) &&
-           (atomic_read(&cp->refcnt) == 1) &&
+           (refcount_read(&cp->refcnt) == 1) &&
            !timer_pending(&cp->timer))
                /* expire connection immediately */
                __ip_vs_conn_put_notimer(cp);
@@ -617,8 +617,8 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
                      IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
                      IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport),
                      ip_vs_fwd_tag(cp), cp->state,
-                     cp->flags, atomic_read(&cp->refcnt),
-                     atomic_read(&dest->refcnt));
+                     cp->flags, refcount_read(&cp->refcnt),
+                     refcount_read(&dest->refcnt));
 
        /* Update the connection counters */
        if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
@@ -714,8 +714,8 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
                      IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
                      IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport),
                      ip_vs_fwd_tag(cp), cp->state,
-                     cp->flags, atomic_read(&cp->refcnt),
-                     atomic_read(&dest->refcnt));
+                     cp->flags, refcount_read(&cp->refcnt),
+                     refcount_read(&dest->refcnt));
 
        /* Update the connection counters */
        if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) {
@@ -863,10 +863,10 @@ static void ip_vs_conn_expire(unsigned long data)
 
   expire_later:
        IP_VS_DBG(7, "delayed: conn->refcnt=%d conn->n_control=%d\n",
-                 atomic_read(&cp->refcnt),
+                 refcount_read(&cp->refcnt),
                  atomic_read(&cp->n_control));
 
-       atomic_inc(&cp->refcnt);
+       refcount_inc(&cp->refcnt);
        cp->timeout = 60*HZ;
 
        if (ipvs->sync_state & IP_VS_STATE_MASTER)
@@ -941,7 +941,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
         * it in the table, so that other thread run ip_vs_random_dropentry
         * but cannot drop this entry.
         */
-       atomic_set(&cp->refcnt, 1);
+       refcount_set(&cp->refcnt, 1);
 
        cp->control = NULL;
        atomic_set(&cp->n_control, 0);
index db40050f8785eb9205a7bf493a71c9b956b93ab8..b4a746d0e39bcc4b7418e53c2623a057f119a39d 100644 (file)
@@ -542,7 +542,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
                      IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
                      IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
                      IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport),
-                     cp->flags, atomic_read(&cp->refcnt));
+                     cp->flags, refcount_read(&cp->refcnt));
 
        ip_vs_conn_stats(cp, svc);
        return cp;
@@ -1193,7 +1193,7 @@ struct ip_vs_conn *ip_vs_new_conn_out(struct ip_vs_service *svc,
                      IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
                      IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
                      IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport),
-                     cp->flags, atomic_read(&cp->refcnt));
+                     cp->flags, refcount_read(&cp->refcnt));
        LeaveFunction(12);
        return cp;
 }
@@ -2231,8 +2231,6 @@ static int __net_init __ip_vs_init(struct net *net)
        if (ip_vs_sync_net_init(ipvs) < 0)
                goto sync_fail;
 
-       printk(KERN_INFO "IPVS: Creating netns size=%zu id=%d\n",
-                        sizeof(struct netns_ipvs), ipvs->gen);
        return 0;
 /*
  * Error handling
index 5aeb0dde6ccc5e525e740ca5fde3ba2c58c50070..541aa76947755e1228deb8d3ad40e576b182a83f 100644 (file)
@@ -699,7 +699,7 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, int dest_af,
                              dest->vfwmark,
                              IP_VS_DBG_ADDR(dest->af, &dest->addr),
                              ntohs(dest->port),
-                             atomic_read(&dest->refcnt));
+                             refcount_read(&dest->refcnt));
                if (dest->af == dest_af &&
                    ip_vs_addr_equal(dest_af, &dest->addr, daddr) &&
                    dest->port == dport &&
@@ -934,7 +934,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
        atomic_set(&dest->activeconns, 0);
        atomic_set(&dest->inactconns, 0);
        atomic_set(&dest->persistconns, 0);
-       atomic_set(&dest->refcnt, 1);
+       refcount_set(&dest->refcnt, 1);
 
        INIT_HLIST_NODE(&dest->d_list);
        spin_lock_init(&dest->dst_lock);
@@ -998,7 +998,7 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
                IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
                              "dest->refcnt=%d, service %u/%s:%u\n",
                              IP_VS_DBG_ADDR(udest->af, &daddr), ntohs(dport),
-                             atomic_read(&dest->refcnt),
+                             refcount_read(&dest->refcnt),
                              dest->vfwmark,
                              IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
                              ntohs(dest->vport));
@@ -1074,7 +1074,7 @@ static void __ip_vs_del_dest(struct netns_ipvs *ipvs, struct ip_vs_dest *dest,
        spin_lock_bh(&ipvs->dest_trash_lock);
        IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n",
                      IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
-                     atomic_read(&dest->refcnt));
+                     refcount_read(&dest->refcnt));
        if (list_empty(&ipvs->dest_trash) && !cleanup)
                mod_timer(&ipvs->dest_trash_timer,
                          jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1));
@@ -1157,7 +1157,7 @@ static void ip_vs_dest_trash_expire(unsigned long data)
 
        spin_lock(&ipvs->dest_trash_lock);
        list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) {
-               if (atomic_read(&dest->refcnt) > 1)
+               if (refcount_read(&dest->refcnt) > 1)
                        continue;
                if (dest->idle_start) {
                        if (time_before(now, dest->idle_start +
@@ -1545,7 +1545,7 @@ ip_vs_forget_dev(struct ip_vs_dest *dest, struct net_device *dev)
                              dev->name,
                              IP_VS_DBG_ADDR(dest->af, &dest->addr),
                              ntohs(dest->port),
-                             atomic_read(&dest->refcnt));
+                             refcount_read(&dest->refcnt));
                __ip_vs_dst_cache_reset(dest);
        }
        spin_unlock_bh(&dest->dst_lock);
index 5824927cf8e02b7fa02f319177d96219c9427033..b6aa4a970c6e97678e8c88e8fe0e0e0b4e4d476d 100644 (file)
@@ -448,7 +448,7 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc)
                      IP_VS_DBG_ADDR(least->af, &least->addr),
                      ntohs(least->port),
                      atomic_read(&least->activeconns),
-                     atomic_read(&least->refcnt),
+                     refcount_read(&least->refcnt),
                      atomic_read(&least->weight), loh);
 
        return least;
index 703f11877beece84cb56ec62d4bd13e87c0d67c3..c13ff575f9f73ab9fb53837ff1b01cd279156c9f 100644 (file)
@@ -204,7 +204,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
                      IP_VS_DBG_ADDR(least->af, &least->addr),
                      ntohs(least->port),
                      atomic_read(&least->activeconns),
-                     atomic_read(&least->refcnt),
+                     refcount_read(&least->refcnt),
                      atomic_read(&least->weight), loh);
        return least;
 }
@@ -249,7 +249,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
                      __func__,
                      IP_VS_DBG_ADDR(most->af, &most->addr), ntohs(most->port),
                      atomic_read(&most->activeconns),
-                     atomic_read(&most->refcnt),
+                     refcount_read(&most->refcnt),
                      atomic_read(&most->weight), moh);
        return most;
 }
@@ -612,7 +612,7 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc)
                      IP_VS_DBG_ADDR(least->af, &least->addr),
                      ntohs(least->port),
                      atomic_read(&least->activeconns),
-                     atomic_read(&least->refcnt),
+                     refcount_read(&least->refcnt),
                      atomic_read(&least->weight), loh);
 
        return least;
index a8b63401e7731e6c8fef37b62c43425e2f96b43c..7d9d4ac596ca5809a9322aa3c1276f6dc08f146c 100644 (file)
@@ -110,7 +110,7 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
                      IP_VS_DBG_ADDR(least->af, &least->addr),
                      ntohs(least->port),
                      atomic_read(&least->activeconns),
-                     atomic_read(&least->refcnt),
+                     refcount_read(&least->refcnt),
                      atomic_read(&least->weight), loh);
 
        return least;
index d952d67f904d1124ed0c5adfa20a51f82207181c..56f8e4b204ffcc4840a1042097a0f7d0f004df18 100644 (file)
@@ -447,7 +447,7 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
                                ntohs(cp->cport),
                                sctp_state_name(cp->state),
                                sctp_state_name(next_state),
-                               atomic_read(&cp->refcnt));
+                               refcount_read(&cp->refcnt));
                if (dest) {
                        if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
                                (next_state != IP_VS_SCTP_S_ESTABLISHED)) {
index 5117bcb7d2f00604d5ab73f0610246a7d4fef755..12dc8d5bc37d7ea03ba8448514a6d3caf03a62b0 100644 (file)
@@ -557,7 +557,7 @@ set_tcp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
                              ntohs(cp->cport),
                              tcp_state_name(cp->state),
                              tcp_state_name(new_state),
-                             atomic_read(&cp->refcnt));
+                             refcount_read(&cp->refcnt));
 
                if (dest) {
                        if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
index 58bacfc461ee6a1d6df4e6e024032fb52a044e35..ee0530d14c5f9d468e199bcb7cd53aad022b748b 100644 (file)
@@ -97,7 +97,7 @@ stop:
                      "activeconns %d refcnt %d weight %d\n",
                      IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
                      atomic_read(&dest->activeconns),
-                     atomic_read(&dest->refcnt), atomic_read(&dest->weight));
+                     refcount_read(&dest->refcnt), atomic_read(&dest->weight));
 
        return dest;
 }
index f8e2d00f528b945e774564854fc66f53dbc61970..ab23cf203437772407f800861ea68687d78f8ff6 100644 (file)
@@ -111,7 +111,7 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
                      IP_VS_DBG_ADDR(least->af, &least->addr),
                      ntohs(least->port),
                      atomic_read(&least->activeconns),
-                     atomic_read(&least->refcnt),
+                     refcount_read(&least->refcnt),
                      atomic_read(&least->weight), loh);
 
        return least;
index 6b366fd905542ff086a36da4111bd11646d274d8..6add39e0ec20d61d21883082e5079d16cab6942c 100644 (file)
@@ -83,7 +83,7 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
                      IP_VS_DBG_ADDR(least->af, &least->addr),
                      ntohs(least->port),
                      atomic_read(&least->activeconns),
-                     atomic_read(&least->refcnt),
+                     refcount_read(&least->refcnt),
                      atomic_read(&least->weight), loh);
 
        return least;
index 17e6d4406ca7c32657eff5e103d0aa1b9317e813..62258dd457ac9825aa14f26bfebb08bb2f2f1755 100644 (file)
@@ -218,7 +218,7 @@ found:
                      "activeconns %d refcnt %d weight %d\n",
                      IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
                      atomic_read(&dest->activeconns),
-                     atomic_read(&dest->refcnt),
+                     refcount_read(&dest->refcnt),
                      atomic_read(&dest->weight));
        mark->cl = dest;
 
index ffb78e5f7b70912a2bba608a7f32f0a2bc486adc..3d621b8d7b8a7ba6318ac1359e1efa795c63a479 100644 (file)
@@ -1133,7 +1133,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_free);
 
 /* Allocate a new conntrack: we return -ENOMEM if classification
    failed due to stress.  Otherwise it really is unclassifiable. */
-static struct nf_conntrack_tuple_hash *
+static noinline struct nf_conntrack_tuple_hash *
 init_conntrack(struct net *net, struct nf_conn *tmpl,
               const struct nf_conntrack_tuple *tuple,
               struct nf_conntrack_l3proto *l3proto,
@@ -1241,21 +1241,20 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
        return &ct->tuplehash[IP_CT_DIR_ORIGINAL];
 }
 
-/* On success, returns conntrack ptr, sets skb->_nfct | ctinfo */
-static inline struct nf_conn *
+/* On success, returns 0, sets skb->_nfct | ctinfo */
+static int
 resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
                  struct sk_buff *skb,
                  unsigned int dataoff,
                  u_int16_t l3num,
                  u_int8_t protonum,
                  struct nf_conntrack_l3proto *l3proto,
-                 struct nf_conntrack_l4proto *l4proto,
-                 int *set_reply,
-                 enum ip_conntrack_info *ctinfo)
+                 struct nf_conntrack_l4proto *l4proto)
 {
        const struct nf_conntrack_zone *zone;
        struct nf_conntrack_tuple tuple;
        struct nf_conntrack_tuple_hash *h;
+       enum ip_conntrack_info ctinfo;
        struct nf_conntrack_zone tmp;
        struct nf_conn *ct;
        u32 hash;
@@ -1264,7 +1263,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
                             dataoff, l3num, protonum, net, &tuple, l3proto,
                             l4proto)) {
                pr_debug("Can't get tuple\n");
-               return NULL;
+               return 0;
        }
 
        /* look for tuple match */
@@ -1275,33 +1274,30 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
                h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
                                   skb, dataoff, hash);
                if (!h)
-                       return NULL;
+                       return 0;
                if (IS_ERR(h))
-                       return (void *)h;
+                       return PTR_ERR(h);
        }
        ct = nf_ct_tuplehash_to_ctrack(h);
 
        /* It exists; we have (non-exclusive) reference. */
        if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) {
-               *ctinfo = IP_CT_ESTABLISHED_REPLY;
-               /* Please set reply bit if this packet OK */
-               *set_reply = 1;
+               ctinfo = IP_CT_ESTABLISHED_REPLY;
        } else {
                /* Once we've had two way comms, always ESTABLISHED. */
                if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
                        pr_debug("normal packet for %p\n", ct);
-                       *ctinfo = IP_CT_ESTABLISHED;
+                       ctinfo = IP_CT_ESTABLISHED;
                } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
                        pr_debug("related packet for %p\n", ct);
-                       *ctinfo = IP_CT_RELATED;
+                       ctinfo = IP_CT_RELATED;
                } else {
                        pr_debug("new packet for %p\n", ct);
-                       *ctinfo = IP_CT_NEW;
+                       ctinfo = IP_CT_NEW;
                }
-               *set_reply = 0;
        }
-       nf_ct_set(skb, ct, *ctinfo);
-       return ct;
+       nf_ct_set(skb, ct, ctinfo);
+       return 0;
 }
 
 unsigned int
@@ -1315,7 +1311,6 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
        unsigned int *timeouts;
        unsigned int dataoff;
        u_int8_t protonum;
-       int set_reply = 0;
        int ret;
 
        tmpl = nf_ct_get(skb, &ctinfo);
@@ -1358,23 +1353,22 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
                        goto out;
        }
 repeat:
-       ct = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum,
-                              l3proto, l4proto, &set_reply, &ctinfo);
-       if (!ct) {
-               /* Not valid part of a connection */
-               NF_CT_STAT_INC_ATOMIC(net, invalid);
-               ret = NF_ACCEPT;
-               goto out;
-       }
-
-       if (IS_ERR(ct)) {
+       ret = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum,
+                               l3proto, l4proto);
+       if (ret < 0) {
                /* Too stressed to deal. */
                NF_CT_STAT_INC_ATOMIC(net, drop);
                ret = NF_DROP;
                goto out;
        }
 
-       NF_CT_ASSERT(skb_nfct(skb));
+       ct = nf_ct_get(skb, &ctinfo);
+       if (!ct) {
+               /* Not valid part of a connection */
+               NF_CT_STAT_INC_ATOMIC(net, invalid);
+               ret = NF_ACCEPT;
+               goto out;
+       }
 
        /* Decide what timeout policy we want to apply to this flow. */
        timeouts = nf_ct_timeout_lookup(net, ct, l4proto);
@@ -1399,7 +1393,8 @@ repeat:
                goto out;
        }
 
-       if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
+       if (ctinfo == IP_CT_ESTABLISHED_REPLY &&
+           !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
                nf_conntrack_event_cache(IPCT_REPLY, ct);
 out:
        if (tmpl)
index 4b2e1fb28bb438d695715fc492f52bf7809ade5d..cb29e598605f5e6c22961c007bf23291fab0bbaa 100644 (file)
@@ -133,7 +133,7 @@ nf_ct_expect_find_get(struct net *net,
 
        rcu_read_lock();
        i = __nf_ct_expect_find(net, zone, tuple);
-       if (i && !atomic_inc_not_zero(&i->use))
+       if (i && !refcount_inc_not_zero(&i->use))
                i = NULL;
        rcu_read_unlock();
 
@@ -186,7 +186,7 @@ nf_ct_find_expectation(struct net *net,
                return NULL;
 
        if (exp->flags & NF_CT_EXPECT_PERMANENT) {
-               atomic_inc(&exp->use);
+               refcount_inc(&exp->use);
                return exp;
        } else if (del_timer(&exp->timeout)) {
                nf_ct_unlink_expect(exp);
@@ -275,7 +275,7 @@ struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me)
                return NULL;
 
        new->master = me;
-       atomic_set(&new->use, 1);
+       refcount_set(&new->use, 1);
        return new;
 }
 EXPORT_SYMBOL_GPL(nf_ct_expect_alloc);
@@ -348,7 +348,7 @@ static void nf_ct_expect_free_rcu(struct rcu_head *head)
 
 void nf_ct_expect_put(struct nf_conntrack_expect *exp)
 {
-       if (atomic_dec_and_test(&exp->use))
+       if (refcount_dec_and_test(&exp->use))
                call_rcu(&exp->rcu, nf_ct_expect_free_rcu);
 }
 EXPORT_SYMBOL_GPL(nf_ct_expect_put);
@@ -361,7 +361,7 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
        unsigned int h = nf_ct_expect_dst_hash(net, &exp->tuple);
 
        /* two references : one for hash insert, one for the timer */
-       atomic_add(2, &exp->use);
+       refcount_add(2, &exp->use);
 
        hlist_add_head(&exp->lnode, &master_help->expectations);
        master_help->expecting[exp->class]++;
index 6806b5e73567bb0220b248682abed3e5e34f780e..d49cc1e03c5bb072497a2baa97e6f8d0d31f3ee4 100644 (file)
@@ -2693,7 +2693,7 @@ restart:
                                                    cb->nlh->nlmsg_seq,
                                                    IPCTNL_MSG_EXP_NEW,
                                                    exp) < 0) {
-                               if (!atomic_inc_not_zero(&exp->use))
+                               if (!refcount_inc_not_zero(&exp->use))
                                        continue;
                                cb->args[1] = (unsigned long)exp;
                                goto out;
@@ -2739,7 +2739,7 @@ restart:
                                            cb->nlh->nlmsg_seq,
                                            IPCTNL_MSG_EXP_NEW,
                                            exp) < 0) {
-                       if (!atomic_inc_not_zero(&exp->use))
+                       if (!refcount_inc_not_zero(&exp->use))
                                continue;
                        cb->args[1] = (unsigned long)exp;
                        goto out;
index 434c739dfecaa8727dc193f8ad86e18133932660..2d822d2fd83062d28f54737a9e7aae652f608d1b 100644 (file)
@@ -1772,8 +1772,19 @@ static int nf_tables_newexpr(const struct nft_ctx *ctx,
                        goto err1;
        }
 
+       if (ops->validate) {
+               const struct nft_data *data = NULL;
+
+               err = ops->validate(ctx, expr, &data);
+               if (err < 0)
+                       goto err2;
+       }
+
        return 0;
 
+err2:
+       if (ops->destroy)
+               ops->destroy(ctx, expr);
 err1:
        expr->ops = NULL;
        return err;
@@ -2523,8 +2534,8 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net,
        return 0;
 }
 
-struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
-                                    const struct nlattr *nla, u8 genmask)
+static struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
+                                           const struct nlattr *nla, u8 genmask)
 {
        struct nft_set *set;
 
@@ -2538,11 +2549,10 @@ struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
        }
        return ERR_PTR(-ENOENT);
 }
-EXPORT_SYMBOL_GPL(nf_tables_set_lookup);
 
-struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
-                                         const struct nlattr *nla,
-                                         u8 genmask)
+static struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
+                                                const struct nlattr *nla,
+                                                u8 genmask)
 {
        struct nft_trans *trans;
        u32 id = ntohl(nla_get_be32(nla));
@@ -2557,7 +2567,25 @@ struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
        }
        return ERR_PTR(-ENOENT);
 }
-EXPORT_SYMBOL_GPL(nf_tables_set_lookup_byid);
+
+struct nft_set *nft_set_lookup(const struct net *net,
+                              const struct nft_table *table,
+                              const struct nlattr *nla_set_name,
+                              const struct nlattr *nla_set_id,
+                              u8 genmask)
+{
+       struct nft_set *set;
+
+       set = nf_tables_set_lookup(table, nla_set_name, genmask);
+       if (IS_ERR(set)) {
+               if (!nla_set_id)
+                       return set;
+
+               set = nf_tables_set_lookup_byid(net, nla_set_id, genmask);
+       }
+       return set;
+}
+EXPORT_SYMBOL_GPL(nft_set_lookup);
 
 static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
                                    const char *name)
@@ -4064,7 +4092,8 @@ static const struct nla_policy nft_obj_policy[NFTA_OBJ_MAX + 1] = {
        [NFTA_OBJ_DATA]         = { .type = NLA_NESTED },
 };
 
-static struct nft_object *nft_obj_init(const struct nft_object_type *type,
+static struct nft_object *nft_obj_init(const struct nft_ctx *ctx,
+                                      const struct nft_object_type *type,
                                       const struct nlattr *attr)
 {
        struct nlattr *tb[type->maxattr + 1];
@@ -4084,7 +4113,7 @@ static struct nft_object *nft_obj_init(const struct nft_object_type *type,
        if (obj == NULL)
                goto err1;
 
-       err = type->init((const struct nlattr * const *)tb, obj);
+       err = type->init(ctx, (const struct nlattr * const *)tb, obj);
        if (err < 0)
                goto err2;
 
@@ -4192,7 +4221,7 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
        if (IS_ERR(type))
                return PTR_ERR(type);
 
-       obj = nft_obj_init(type, nla[NFTA_OBJ_DATA]);
+       obj = nft_obj_init(&ctx, type, nla[NFTA_OBJ_DATA]);
        if (IS_ERR(obj)) {
                err = PTR_ERR(obj);
                goto err1;
index d44d89b561275e25bb31fe2b0ed198d13995533c..c86da174a5fced4e3c67307925332273b9f858fe 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
 #include <linux/atomic.h>
+#include <linux/refcount.h>
 #include <linux/netlink.h>
 #include <linux/rculist.h>
 #include <linux/slab.h>
@@ -32,7 +33,7 @@ struct nf_acct {
        atomic64_t              bytes;
        unsigned long           flags;
        struct list_head        head;
-       atomic_t                refcnt;
+       refcount_t              refcnt;
        char                    name[NFACCT_NAME_MAX];
        struct rcu_head         rcu_head;
        char                    data[0];
@@ -123,7 +124,7 @@ static int nfnl_acct_new(struct net *net, struct sock *nfnl,
                atomic64_set(&nfacct->pkts,
                             be64_to_cpu(nla_get_be64(tb[NFACCT_PKTS])));
        }
-       atomic_set(&nfacct->refcnt, 1);
+       refcount_set(&nfacct->refcnt, 1);
        list_add_tail_rcu(&nfacct->head, &net->nfnl_acct_list);
        return 0;
 }
@@ -166,7 +167,7 @@ nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
                         NFACCT_PAD) ||
            nla_put_be64(skb, NFACCT_BYTES, cpu_to_be64(bytes),
                         NFACCT_PAD) ||
-           nla_put_be32(skb, NFACCT_USE, htonl(atomic_read(&acct->refcnt))))
+           nla_put_be32(skb, NFACCT_USE, htonl(refcount_read(&acct->refcnt))))
                goto nla_put_failure;
        if (acct->flags & NFACCT_F_QUOTA) {
                u64 *quota = (u64 *)acct->data;
@@ -329,7 +330,7 @@ static int nfnl_acct_try_del(struct nf_acct *cur)
        /* We want to avoid races with nfnl_acct_put. So only when the current
         * refcnt is 1, we decrease it to 0.
         */
-       if (atomic_cmpxchg(&cur->refcnt, 1, 0) == 1) {
+       if (refcount_dec_if_one(&cur->refcnt)) {
                /* We are protected by nfnl mutex. */
                list_del_rcu(&cur->head);
                kfree_rcu(cur, rcu_head);
@@ -413,7 +414,7 @@ struct nf_acct *nfnl_acct_find_get(struct net *net, const char *acct_name)
                if (!try_module_get(THIS_MODULE))
                        goto err;
 
-               if (!atomic_inc_not_zero(&cur->refcnt)) {
+               if (!refcount_inc_not_zero(&cur->refcnt)) {
                        module_put(THIS_MODULE);
                        goto err;
                }
@@ -429,7 +430,7 @@ EXPORT_SYMBOL_GPL(nfnl_acct_find_get);
 
 void nfnl_acct_put(struct nf_acct *acct)
 {
-       if (atomic_dec_and_test(&acct->refcnt))
+       if (refcount_dec_and_test(&acct->refcnt))
                kfree_rcu(acct, rcu_head);
 
        module_put(THIS_MODULE);
@@ -502,7 +503,7 @@ static void __net_exit nfnl_acct_net_exit(struct net *net)
        list_for_each_entry_safe(cur, tmp, &net->nfnl_acct_list, head) {
                list_del_rcu(&cur->head);
 
-               if (atomic_dec_and_test(&cur->refcnt))
+               if (refcount_dec_and_test(&cur->refcnt))
                        kfree_rcu(cur, rcu_head);
        }
 }
index 139e0867e56e9e606942c98e75148eb17b2ec7eb..baa75f3ab7e75d2ab85e658b4d09b009649043df 100644 (file)
@@ -138,7 +138,7 @@ static int cttimeout_new_timeout(struct net *net, struct sock *ctnl,
        strcpy(timeout->name, nla_data(cda[CTA_TIMEOUT_NAME]));
        timeout->l3num = l3num;
        timeout->l4proto = l4proto;
-       atomic_set(&timeout->refcnt, 1);
+       refcount_set(&timeout->refcnt, 1);
        list_add_tail_rcu(&timeout->head, &net->nfct_timeout_list);
 
        return 0;
@@ -172,7 +172,7 @@ ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
            nla_put_be16(skb, CTA_TIMEOUT_L3PROTO, htons(timeout->l3num)) ||
            nla_put_u8(skb, CTA_TIMEOUT_L4PROTO, timeout->l4proto->l4proto) ||
            nla_put_be32(skb, CTA_TIMEOUT_USE,
-                        htonl(atomic_read(&timeout->refcnt))))
+                        htonl(refcount_read(&timeout->refcnt))))
                goto nla_put_failure;
 
        if (likely(l4proto->ctnl_timeout.obj_to_nlattr)) {
@@ -339,7 +339,7 @@ static int ctnl_timeout_try_del(struct net *net, struct ctnl_timeout *timeout)
        /* We want to avoid races with ctnl_timeout_put. So only when the
         * current refcnt is 1, we decrease it to 0.
         */
-       if (atomic_cmpxchg(&timeout->refcnt, 1, 0) == 1) {
+       if (refcount_dec_if_one(&timeout->refcnt)) {
                /* We are protected by nfnl mutex. */
                list_del_rcu(&timeout->head);
                nf_ct_l4proto_put(timeout->l4proto);
@@ -536,7 +536,7 @@ ctnl_timeout_find_get(struct net *net, const char *name)
                if (!try_module_get(THIS_MODULE))
                        goto err;
 
-               if (!atomic_inc_not_zero(&timeout->refcnt)) {
+               if (!refcount_inc_not_zero(&timeout->refcnt)) {
                        module_put(THIS_MODULE);
                        goto err;
                }
@@ -550,7 +550,7 @@ err:
 
 static void ctnl_timeout_put(struct ctnl_timeout *timeout)
 {
-       if (atomic_dec_and_test(&timeout->refcnt))
+       if (refcount_dec_and_test(&timeout->refcnt))
                kfree_rcu(timeout, rcu_head);
 
        module_put(THIS_MODULE);
@@ -601,7 +601,7 @@ static void __net_exit cttimeout_net_exit(struct net *net)
                list_del_rcu(&cur->head);
                nf_ct_l4proto_put(cur->l4proto);
 
-               if (atomic_dec_and_test(&cur->refcnt))
+               if (refcount_dec_and_test(&cur->refcnt))
                        kfree_rcu(cur, rcu_head);
        }
 }
index 08247bf7d7b836828c8151ed07f438e05973c2c0..ecd857b75ffe631c5d52e5e8ed9967e9152bc9ab 100644 (file)
@@ -40,6 +40,8 @@
 #include <net/netfilter/nfnetlink_log.h>
 
 #include <linux/atomic.h>
+#include <linux/refcount.h>
+
 
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 #include "../bridge/br_private.h"
@@ -57,7 +59,7 @@
 struct nfulnl_instance {
        struct hlist_node hlist;        /* global list of instances */
        spinlock_t lock;
-       atomic_t use;                   /* use count */
+       refcount_t use;                 /* use count */
 
        unsigned int qlen;              /* number of nlmsgs in skb */
        struct sk_buff *skb;            /* pre-allocatd skb */
@@ -115,7 +117,7 @@ __instance_lookup(struct nfnl_log_net *log, u_int16_t group_num)
 static inline void
 instance_get(struct nfulnl_instance *inst)
 {
-       atomic_inc(&inst->use);
+       refcount_inc(&inst->use);
 }
 
 static struct nfulnl_instance *
@@ -125,7 +127,7 @@ instance_lookup_get(struct nfnl_log_net *log, u_int16_t group_num)
 
        rcu_read_lock_bh();
        inst = __instance_lookup(log, group_num);
-       if (inst && !atomic_inc_not_zero(&inst->use))
+       if (inst && !refcount_inc_not_zero(&inst->use))
                inst = NULL;
        rcu_read_unlock_bh();
 
@@ -145,7 +147,7 @@ static void nfulnl_instance_free_rcu(struct rcu_head *head)
 static void
 instance_put(struct nfulnl_instance *inst)
 {
-       if (inst && atomic_dec_and_test(&inst->use))
+       if (inst && refcount_dec_and_test(&inst->use))
                call_rcu_bh(&inst->rcu, nfulnl_instance_free_rcu);
 }
 
@@ -180,7 +182,7 @@ instance_create(struct net *net, u_int16_t group_num,
        INIT_HLIST_NODE(&inst->hlist);
        spin_lock_init(&inst->lock);
        /* needs to be two, since we _put() after creation */
-       atomic_set(&inst->use, 2);
+       refcount_set(&inst->use, 2);
 
        setup_timer(&inst->timer, nfulnl_timer, (unsigned long)inst);
 
@@ -1031,7 +1033,7 @@ static int seq_show(struct seq_file *s, void *v)
                   inst->group_num,
                   inst->peer_portid, inst->qlen,
                   inst->copy_mode, inst->copy_range,
-                  inst->flushtimeout, atomic_read(&inst->use));
+                  inst->flushtimeout, refcount_read(&inst->use));
 
        return 0;
 }
index c21e7eb8dce02a6b73c5a466300ae793a2787b26..fab6bf3f955ed0a9251b4dfc72b41e5948fa4a21 100644 (file)
@@ -230,10 +230,6 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
        union nft_entry e = {};
        int ret;
 
-       ret = nft_compat_chain_validate_dependency(target->table, ctx->chain);
-       if (ret < 0)
-               goto err;
-
        target_compat_from_user(target, nla_data(tb[NFTA_TARGET_INFO]), info);
 
        if (ctx->nla[NFTA_RULE_COMPAT]) {
@@ -419,10 +415,6 @@ nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
        union nft_entry e = {};
        int ret;
 
-       ret = nft_compat_chain_validate_dependency(match->table, ctx->chain);
-       if (ret < 0)
-               goto err;
-
        match_compat_from_user(match, nla_data(tb[NFTA_MATCH_INFO]), info);
 
        if (ctx->nla[NFTA_RULE_COMPAT]) {
index 7f84222133414d4a65ed96508c83dea711295c9f..67a710ebde09da21464da4bb22a78a36c791274d 100644 (file)
@@ -82,7 +82,8 @@ static int nft_counter_do_init(const struct nlattr * const tb[],
        return 0;
 }
 
-static int nft_counter_obj_init(const struct nlattr * const tb[],
+static int nft_counter_obj_init(const struct nft_ctx *ctx,
+                               const struct nlattr * const tb[],
                                struct nft_object *obj)
 {
        struct nft_counter_percpu_priv *priv = nft_obj_data(obj);
index 0264258c46feb5071a8eebcf9299b4b717fd0a32..640fe5a5865ef26ea71378535927f6bbd9a6aceb 100644 (file)
@@ -32,6 +32,12 @@ struct nft_ct {
        };
 };
 
+struct nft_ct_helper_obj  {
+       struct nf_conntrack_helper *helper4;
+       struct nf_conntrack_helper *helper6;
+       u8 l4proto;
+};
+
 #ifdef CONFIG_NF_CONNTRACK_ZONES
 static DEFINE_PER_CPU(struct nf_conn *, nft_ct_pcpu_template);
 static unsigned int nft_ct_pcpu_template_refcnt __read_mostly;
@@ -733,6 +739,162 @@ static struct nft_expr_type nft_notrack_type __read_mostly = {
        .owner          = THIS_MODULE,
 };
 
+static int nft_ct_helper_obj_init(const struct nft_ctx *ctx,
+                                 const struct nlattr * const tb[],
+                                 struct nft_object *obj)
+{
+       struct nft_ct_helper_obj *priv = nft_obj_data(obj);
+       struct nf_conntrack_helper *help4, *help6;
+       char name[NF_CT_HELPER_NAME_LEN];
+       int family = ctx->afi->family;
+
+       if (!tb[NFTA_CT_HELPER_NAME] || !tb[NFTA_CT_HELPER_L4PROTO])
+               return -EINVAL;
+
+       priv->l4proto = nla_get_u8(tb[NFTA_CT_HELPER_L4PROTO]);
+       if (!priv->l4proto)
+               return -ENOENT;
+
+       nla_strlcpy(name, tb[NFTA_CT_HELPER_NAME], sizeof(name));
+
+       if (tb[NFTA_CT_HELPER_L3PROTO])
+               family = ntohs(nla_get_be16(tb[NFTA_CT_HELPER_L3PROTO]));
+
+       help4 = NULL;
+       help6 = NULL;
+
+       switch (family) {
+       case NFPROTO_IPV4:
+               if (ctx->afi->family == NFPROTO_IPV6)
+                       return -EINVAL;
+
+               help4 = nf_conntrack_helper_try_module_get(name, family,
+                                                          priv->l4proto);
+               break;
+       case NFPROTO_IPV6:
+               if (ctx->afi->family == NFPROTO_IPV4)
+                       return -EINVAL;
+
+               help6 = nf_conntrack_helper_try_module_get(name, family,
+                                                          priv->l4proto);
+               break;
+       case NFPROTO_NETDEV: /* fallthrough */
+       case NFPROTO_BRIDGE: /* same */
+       case NFPROTO_INET:
+               help4 = nf_conntrack_helper_try_module_get(name, NFPROTO_IPV4,
+                                                          priv->l4proto);
+               help6 = nf_conntrack_helper_try_module_get(name, NFPROTO_IPV6,
+                                                          priv->l4proto);
+               break;
+       default:
+               return -EAFNOSUPPORT;
+       }
+
+       /* && is intentional; only error if INET found neither ipv4 or ipv6 */
+       if (!help4 && !help6)
+               return -ENOENT;
+
+       priv->helper4 = help4;
+       priv->helper6 = help6;
+
+       return 0;
+}
+
+static void nft_ct_helper_obj_destroy(struct nft_object *obj)
+{
+       struct nft_ct_helper_obj *priv = nft_obj_data(obj);
+
+       if (priv->helper4)
+               module_put(priv->helper4->me);
+       if (priv->helper6)
+               module_put(priv->helper6->me);
+}
+
+static void nft_ct_helper_obj_eval(struct nft_object *obj,
+                                  struct nft_regs *regs,
+                                  const struct nft_pktinfo *pkt)
+{
+       const struct nft_ct_helper_obj *priv = nft_obj_data(obj);
+       struct nf_conn *ct = (struct nf_conn *)skb_nfct(pkt->skb);
+       struct nf_conntrack_helper *to_assign = NULL;
+       struct nf_conn_help *help;
+
+       if (!ct ||
+           nf_ct_is_confirmed(ct) ||
+           nf_ct_is_template(ct) ||
+           priv->l4proto != nf_ct_protonum(ct))
+               return;
+
+       switch (nf_ct_l3num(ct)) {
+       case NFPROTO_IPV4:
+               to_assign = priv->helper4;
+               break;
+       case NFPROTO_IPV6:
+               to_assign = priv->helper6;
+               break;
+       default:
+               WARN_ON_ONCE(1);
+               return;
+       }
+
+       if (!to_assign)
+               return;
+
+       if (test_bit(IPS_HELPER_BIT, &ct->status))
+               return;
+
+       help = nf_ct_helper_ext_add(ct, to_assign, GFP_ATOMIC);
+       if (help) {
+               rcu_assign_pointer(help->helper, to_assign);
+               set_bit(IPS_HELPER_BIT, &ct->status);
+       }
+}
+
+static int nft_ct_helper_obj_dump(struct sk_buff *skb,
+                                 struct nft_object *obj, bool reset)
+{
+       const struct nft_ct_helper_obj *priv = nft_obj_data(obj);
+       const struct nf_conntrack_helper *helper = priv->helper4;
+       u16 family;
+
+       if (nla_put_string(skb, NFTA_CT_HELPER_NAME, helper->name))
+               return -1;
+
+       if (nla_put_u8(skb, NFTA_CT_HELPER_L4PROTO, priv->l4proto))
+               return -1;
+
+       if (priv->helper4 && priv->helper6)
+               family = NFPROTO_INET;
+       else if (priv->helper6)
+               family = NFPROTO_IPV6;
+       else
+               family = NFPROTO_IPV4;
+
+       if (nla_put_be16(skb, NFTA_CT_HELPER_L3PROTO, htons(family)))
+               return -1;
+
+       return 0;
+}
+
+static const struct nla_policy nft_ct_helper_policy[NFTA_CT_HELPER_MAX + 1] = {
+       [NFTA_CT_HELPER_NAME] = { .type = NLA_STRING,
+                                 .len = NF_CT_HELPER_NAME_LEN - 1 },
+       [NFTA_CT_HELPER_L3PROTO] = { .type = NLA_U16 },
+       [NFTA_CT_HELPER_L4PROTO] = { .type = NLA_U8 },
+};
+
+static struct nft_object_type nft_ct_helper_obj __read_mostly = {
+       .type           = NFT_OBJECT_CT_HELPER,
+       .size           = sizeof(struct nft_ct_helper_obj),
+       .maxattr        = NFTA_CT_HELPER_MAX,
+       .policy         = nft_ct_helper_policy,
+       .eval           = nft_ct_helper_obj_eval,
+       .init           = nft_ct_helper_obj_init,
+       .destroy        = nft_ct_helper_obj_destroy,
+       .dump           = nft_ct_helper_obj_dump,
+       .owner          = THIS_MODULE,
+};
+
 static int __init nft_ct_module_init(void)
 {
        int err;
@@ -747,7 +909,14 @@ static int __init nft_ct_module_init(void)
        if (err < 0)
                goto err1;
 
+       err = nft_register_obj(&nft_ct_helper_obj);
+       if (err < 0)
+               goto err2;
+
        return 0;
+
+err2:
+       nft_unregister_expr(&nft_notrack_type);
 err1:
        nft_unregister_expr(&nft_ct_type);
        return err;
@@ -755,6 +924,7 @@ err1:
 
 static void __exit nft_ct_module_exit(void)
 {
+       nft_unregister_obj(&nft_ct_helper_obj);
        nft_unregister_expr(&nft_notrack_type);
        nft_unregister_expr(&nft_ct_type);
 }
@@ -766,3 +936,4 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
 MODULE_ALIAS_NFT_EXPR("ct");
 MODULE_ALIAS_NFT_EXPR("notrack");
+MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_CT_HELPER);
index 049ad2d9ee66959367a051903563dca6ba654edb..3948da380259538c2fd4823f65a9407241f8af4e 100644 (file)
@@ -133,16 +133,10 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
                        priv->invert = true;
        }
 
-       set = nf_tables_set_lookup(ctx->table, tb[NFTA_DYNSET_SET_NAME],
-                                  genmask);
-       if (IS_ERR(set)) {
-               if (tb[NFTA_DYNSET_SET_ID])
-                       set = nf_tables_set_lookup_byid(ctx->net,
-                                                       tb[NFTA_DYNSET_SET_ID],
-                                                       genmask);
-               if (IS_ERR(set))
-                       return PTR_ERR(set);
-       }
+       set = nft_set_lookup(ctx->net, ctx->table, tb[NFTA_DYNSET_SET_NAME],
+                            tb[NFTA_DYNSET_SET_ID], genmask);
+       if (IS_ERR(set))
+               return PTR_ERR(set);
 
        if (set->ops->update == NULL)
                return -EOPNOTSUPP;
index c308920b194cdbe5e3a2e9a09cfb8aab7267f588..d212a85d2f3336e1b1a393c5ac4bf4fc8f1eebca 100644 (file)
@@ -98,14 +98,21 @@ static void nft_exthdr_tcp_eval(const struct nft_expr *expr,
                        goto err;
 
                offset = i + priv->offset;
-               dest[priv->len / NFT_REG32_SIZE] = 0;
-               memcpy(dest, opt + offset, priv->len);
+               if (priv->flags & NFT_EXTHDR_F_PRESENT) {
+                       *dest = 1;
+               } else {
+                       dest[priv->len / NFT_REG32_SIZE] = 0;
+                       memcpy(dest, opt + offset, priv->len);
+               }
 
                return;
        }
 
 err:
-       regs->verdict.code = NFT_BREAK;
+       if (priv->flags & NFT_EXTHDR_F_PRESENT)
+               *dest = 0;
+       else
+               regs->verdict.code = NFT_BREAK;
 }
 
 static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = {
index 29a4906adc277cd3a2cf55242224e4c99fd070e7..21df8cccea6582e56d7bfbb6fba21f821b7c56d9 100644 (file)
@@ -24,7 +24,8 @@ const struct nla_policy nft_fib_policy[NFTA_FIB_MAX + 1] = {
 EXPORT_SYMBOL(nft_fib_policy);
 
 #define NFTA_FIB_F_ALL (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR | \
-                       NFTA_FIB_F_MARK | NFTA_FIB_F_IIF | NFTA_FIB_F_OIF)
+                       NFTA_FIB_F_MARK | NFTA_FIB_F_IIF | NFTA_FIB_F_OIF | \
+                       NFTA_FIB_F_PRESENT)
 
 int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
                     const struct nft_data **data)
@@ -112,7 +113,7 @@ int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
        if (err < 0)
                return err;
 
-       return nft_fib_validate(ctx, expr, NULL);
+       return 0;
 }
 EXPORT_SYMBOL_GPL(nft_fib_init);
 
@@ -133,19 +134,22 @@ int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr)
 }
 EXPORT_SYMBOL_GPL(nft_fib_dump);
 
-void nft_fib_store_result(void *reg, enum nft_fib_result r,
+void nft_fib_store_result(void *reg, const struct nft_fib *priv,
                          const struct nft_pktinfo *pkt, int index)
 {
        struct net_device *dev;
        u32 *dreg = reg;
 
-       switch (r) {
+       switch (priv->result) {
        case NFT_FIB_RESULT_OIF:
-               *dreg = index;
+               *dreg = (priv->flags & NFTA_FIB_F_PRESENT) ? !!index : index;
                break;
        case NFT_FIB_RESULT_OIFNAME:
                dev = dev_get_by_index_rcu(nft_net(pkt), index);
-               strncpy(reg, dev ? dev->name : "", IFNAMSIZ);
+               if (priv->flags & NFTA_FIB_F_PRESENT)
+                       *dreg = !!dev;
+               else
+                       strncpy(reg, dev ? dev->name : "", IFNAMSIZ);
                break;
        default:
                WARN_ON_ONCE(1);
index eb2721af898dbb54ab099f4878d7b2673cbb9522..a6a4633725bb4cba848112bc3ea9978fe34d6ea4 100644 (file)
@@ -17,7 +17,7 @@
 #include <net/netfilter/nf_tables_core.h>
 #include <linux/jhash.h>
 
-struct nft_hash {
+struct nft_jhash {
        enum nft_registers      sreg:8;
        enum nft_registers      dreg:8;
        u8                      len;
@@ -26,11 +26,11 @@ struct nft_hash {
        u32                     offset;
 };
 
-static void nft_hash_eval(const struct nft_expr *expr,
-                         struct nft_regs *regs,
-                         const struct nft_pktinfo *pkt)
+static void nft_jhash_eval(const struct nft_expr *expr,
+                          struct nft_regs *regs,
+                          const struct nft_pktinfo *pkt)
 {
-       struct nft_hash *priv = nft_expr_priv(expr);
+       struct nft_jhash *priv = nft_expr_priv(expr);
        const void *data = &regs->data[priv->sreg];
        u32 h;
 
@@ -38,6 +38,25 @@ static void nft_hash_eval(const struct nft_expr *expr,
        regs->data[priv->dreg] = h + priv->offset;
 }
 
+struct nft_symhash {
+       enum nft_registers      dreg:8;
+       u32                     modulus;
+       u32                     offset;
+};
+
+static void nft_symhash_eval(const struct nft_expr *expr,
+                            struct nft_regs *regs,
+                            const struct nft_pktinfo *pkt)
+{
+       struct nft_symhash *priv = nft_expr_priv(expr);
+       struct sk_buff *skb = pkt->skb;
+       u32 h;
+
+       h = reciprocal_scale(__skb_get_hash_symmetric(skb), priv->modulus);
+
+       regs->data[priv->dreg] = h + priv->offset;
+}
+
 static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = {
        [NFTA_HASH_SREG]        = { .type = NLA_U32 },
        [NFTA_HASH_DREG]        = { .type = NLA_U32 },
@@ -45,13 +64,14 @@ static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = {
        [NFTA_HASH_MODULUS]     = { .type = NLA_U32 },
        [NFTA_HASH_SEED]        = { .type = NLA_U32 },
        [NFTA_HASH_OFFSET]      = { .type = NLA_U32 },
+       [NFTA_HASH_TYPE]        = { .type = NLA_U32 },
 };
 
-static int nft_hash_init(const struct nft_ctx *ctx,
-                        const struct nft_expr *expr,
-                        const struct nlattr * const tb[])
+static int nft_jhash_init(const struct nft_ctx *ctx,
+                         const struct nft_expr *expr,
+                         const struct nlattr * const tb[])
 {
-       struct nft_hash *priv = nft_expr_priv(expr);
+       struct nft_jhash *priv = nft_expr_priv(expr);
        u32 len;
        int err;
 
@@ -92,10 +112,36 @@ static int nft_hash_init(const struct nft_ctx *ctx,
                                           NFT_DATA_VALUE, sizeof(u32));
 }
 
-static int nft_hash_dump(struct sk_buff *skb,
-                        const struct nft_expr *expr)
+static int nft_symhash_init(const struct nft_ctx *ctx,
+                           const struct nft_expr *expr,
+                           const struct nlattr * const tb[])
 {
-       const struct nft_hash *priv = nft_expr_priv(expr);
+       struct nft_symhash *priv = nft_expr_priv(expr);
+
+       if (!tb[NFTA_HASH_DREG]    ||
+           !tb[NFTA_HASH_MODULUS])
+               return -EINVAL;
+
+       if (tb[NFTA_HASH_OFFSET])
+               priv->offset = ntohl(nla_get_be32(tb[NFTA_HASH_OFFSET]));
+
+       priv->dreg = nft_parse_register(tb[NFTA_HASH_DREG]);
+
+       priv->modulus = ntohl(nla_get_be32(tb[NFTA_HASH_MODULUS]));
+       if (priv->modulus <= 1)
+               return -ERANGE;
+
+       if (priv->offset + priv->modulus - 1 < priv->offset)
+               return -EOVERFLOW;
+
+       return nft_validate_register_store(ctx, priv->dreg, NULL,
+                                          NFT_DATA_VALUE, sizeof(u32));
+}
+
+static int nft_jhash_dump(struct sk_buff *skb,
+                         const struct nft_expr *expr)
+{
+       const struct nft_jhash *priv = nft_expr_priv(expr);
 
        if (nft_dump_register(skb, NFTA_HASH_SREG, priv->sreg))
                goto nla_put_failure;
@@ -110,6 +156,28 @@ static int nft_hash_dump(struct sk_buff *skb,
        if (priv->offset != 0)
                if (nla_put_be32(skb, NFTA_HASH_OFFSET, htonl(priv->offset)))
                        goto nla_put_failure;
+       if (nla_put_be32(skb, NFTA_HASH_TYPE, htonl(NFT_HASH_JENKINS)))
+               goto nla_put_failure;
+       return 0;
+
+nla_put_failure:
+       return -1;
+}
+
+static int nft_symhash_dump(struct sk_buff *skb,
+                           const struct nft_expr *expr)
+{
+       const struct nft_symhash *priv = nft_expr_priv(expr);
+
+       if (nft_dump_register(skb, NFTA_HASH_DREG, priv->dreg))
+               goto nla_put_failure;
+       if (nla_put_be32(skb, NFTA_HASH_MODULUS, htonl(priv->modulus)))
+               goto nla_put_failure;
+       if (priv->offset != 0)
+               if (nla_put_be32(skb, NFTA_HASH_OFFSET, htonl(priv->offset)))
+                       goto nla_put_failure;
+       if (nla_put_be32(skb, NFTA_HASH_TYPE, htonl(NFT_HASH_SYM)))
+               goto nla_put_failure;
        return 0;
 
 nla_put_failure:
@@ -117,17 +185,46 @@ nla_put_failure:
 }
 
 static struct nft_expr_type nft_hash_type;
-static const struct nft_expr_ops nft_hash_ops = {
+static const struct nft_expr_ops nft_jhash_ops = {
        .type           = &nft_hash_type,
-       .size           = NFT_EXPR_SIZE(sizeof(struct nft_hash)),
-       .eval           = nft_hash_eval,
-       .init           = nft_hash_init,
-       .dump           = nft_hash_dump,
+       .size           = NFT_EXPR_SIZE(sizeof(struct nft_jhash)),
+       .eval           = nft_jhash_eval,
+       .init           = nft_jhash_init,
+       .dump           = nft_jhash_dump,
 };
 
+static const struct nft_expr_ops nft_symhash_ops = {
+       .type           = &nft_hash_type,
+       .size           = NFT_EXPR_SIZE(sizeof(struct nft_symhash)),
+       .eval           = nft_symhash_eval,
+       .init           = nft_symhash_init,
+       .dump           = nft_symhash_dump,
+};
+
+static const struct nft_expr_ops *
+nft_hash_select_ops(const struct nft_ctx *ctx,
+                   const struct nlattr * const tb[])
+{
+       u32 type;
+
+       if (!tb[NFTA_HASH_TYPE])
+               return &nft_jhash_ops;
+
+       type = ntohl(nla_get_be32(tb[NFTA_HASH_TYPE]));
+       switch (type) {
+       case NFT_HASH_SYM:
+               return &nft_symhash_ops;
+       case NFT_HASH_JENKINS:
+               return &nft_jhash_ops;
+       default:
+               break;
+       }
+       return ERR_PTR(-EOPNOTSUPP);
+}
+
 static struct nft_expr_type nft_hash_type __read_mostly = {
        .name           = "hash",
-       .ops            = &nft_hash_ops,
+       .select_ops     = &nft_hash_select_ops,
        .policy         = nft_hash_policy,
        .maxattr        = NFTA_HASH_MAX,
        .owner          = THIS_MODULE,
index c6baf412236d662b0d165fec3b4ff26579c9c6d8..18dd57a526513bd726944fa7ad7a9e41fcfb0251 100644 (file)
@@ -17,9 +17,8 @@
 #include <linux/netfilter/nf_tables.h>
 #include <net/netfilter/nf_tables.h>
 
-static DEFINE_SPINLOCK(limit_lock);
-
 struct nft_limit {
+       spinlock_t      lock;
        u64             last;
        u64             tokens;
        u64             tokens_max;
@@ -34,7 +33,7 @@ static inline bool nft_limit_eval(struct nft_limit *limit, u64 cost)
        u64 now, tokens;
        s64 delta;
 
-       spin_lock_bh(&limit_lock);
+       spin_lock_bh(&limit->lock);
        now = ktime_get_ns();
        tokens = limit->tokens + now - limit->last;
        if (tokens > limit->tokens_max)
@@ -44,11 +43,11 @@ static inline bool nft_limit_eval(struct nft_limit *limit, u64 cost)
        delta = tokens - cost;
        if (delta >= 0) {
                limit->tokens = delta;
-               spin_unlock_bh(&limit_lock);
+               spin_unlock_bh(&limit->lock);
                return limit->invert;
        }
        limit->tokens = tokens;
-       spin_unlock_bh(&limit_lock);
+       spin_unlock_bh(&limit->lock);
        return !limit->invert;
 }
 
@@ -86,6 +85,7 @@ static int nft_limit_init(struct nft_limit *limit,
                        limit->invert = true;
        }
        limit->last = ktime_get_ns();
+       spin_lock_init(&limit->lock);
 
        return 0;
 }
index e21aea7e5ec8f141ea3155d1da3c491484c00a73..475570e89ede710b323868792bb16fd5f09d1b09 100644 (file)
@@ -71,16 +71,10 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
            tb[NFTA_LOOKUP_SREG] == NULL)
                return -EINVAL;
 
-       set = nf_tables_set_lookup(ctx->table, tb[NFTA_LOOKUP_SET], genmask);
-       if (IS_ERR(set)) {
-               if (tb[NFTA_LOOKUP_SET_ID]) {
-                       set = nf_tables_set_lookup_byid(ctx->net,
-                                                       tb[NFTA_LOOKUP_SET_ID],
-                                                       genmask);
-               }
-               if (IS_ERR(set))
-                       return PTR_ERR(set);
-       }
+       set = nft_set_lookup(ctx->net, ctx->table, tb[NFTA_LOOKUP_SET],
+                            tb[NFTA_LOOKUP_SET_ID], genmask);
+       if (IS_ERR(set))
+               return PTR_ERR(set);
 
        if (set->flags & NFT_SET_EVAL)
                return -EOPNOTSUPP;
index 11ce016cd47948f3a2902402e0cbbda5e22d9438..6ac03d4266c9038cb4ffd3de412a216b769d07a1 100644 (file)
@@ -46,10 +46,6 @@ int nft_masq_init(const struct nft_ctx *ctx,
        struct nft_masq *priv = nft_expr_priv(expr);
        int err;
 
-       err = nft_masq_validate(ctx, expr, NULL);
-       if (err)
-               return err;
-
        if (tb[NFTA_MASQ_FLAGS]) {
                priv->flags = ntohl(nla_get_be32(tb[NFTA_MASQ_FLAGS]));
                if (priv->flags & ~NF_NAT_RANGE_MASK)
index 7b60e01f38ff9f2f9fa7d28f6f99b4f889d190d7..9563ce3c23aa078d781590c475f8981fe198d393 100644 (file)
@@ -372,10 +372,6 @@ int nft_meta_set_init(const struct nft_ctx *ctx,
                return -EOPNOTSUPP;
        }
 
-       err = nft_meta_set_validate(ctx, expr, NULL);
-       if (err < 0)
-               return err;
-
        priv->sreg = nft_parse_register(tb[NFTA_META_SREG]);
        err = nft_validate_register_load(priv->sreg, len);
        if (err < 0)
index 439e0bd152a004c98664a19ae6c920458fa6160a..ed548d06b6dda9a98888bb83f2baa6b45c965c15 100644 (file)
@@ -138,10 +138,6 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
                return -EINVAL;
        }
 
-       err = nft_nat_validate(ctx, expr, NULL);
-       if (err < 0)
-               return err;
-
        if (tb[NFTA_NAT_FAMILY] == NULL)
                return -EINVAL;
 
index 1ae8c49ca4a1fac06f69c41f68a36b7e85593adb..1dd428fbaaa3f836e7bcecb50355f60c14d8faad 100644 (file)
@@ -116,16 +116,10 @@ static int nft_objref_map_init(const struct nft_ctx *ctx,
        struct nft_set *set;
        int err;
 
-       set = nf_tables_set_lookup(ctx->table, tb[NFTA_OBJREF_SET_NAME], genmask);
-       if (IS_ERR(set)) {
-               if (tb[NFTA_OBJREF_SET_ID]) {
-                       set = nf_tables_set_lookup_byid(ctx->net,
-                                                       tb[NFTA_OBJREF_SET_ID],
-                                                       genmask);
-               }
-               if (IS_ERR(set))
-                       return PTR_ERR(set);
-       }
+       set = nft_set_lookup(ctx->net, ctx->table, tb[NFTA_OBJREF_SET_NAME],
+                            tb[NFTA_OBJREF_SET_ID], genmask);
+       if (IS_ERR(set))
+               return PTR_ERR(set);
 
        if (!(set->flags & NFT_SET_OBJECT))
                return -EINVAL;
index 2d6fe3559912674385e7679557fc31ddeb901b38..25e33159be57882fcf9875725079188dfaa7d113 100644 (file)
@@ -99,7 +99,8 @@ static int nft_quota_do_init(const struct nlattr * const tb[],
        return 0;
 }
 
-static int nft_quota_obj_init(const struct nlattr * const tb[],
+static int nft_quota_obj_init(const struct nft_ctx *ctx,
+                             const struct nlattr * const tb[],
                              struct nft_object *obj)
 {
        struct nft_quota *priv = nft_obj_data(obj);
index 40dcd05146d5fb0f346e6e170d16de9813e92804..1e66538bf0ff24e3286ec6312e4d593c6197bd9b 100644 (file)
@@ -47,10 +47,6 @@ int nft_redir_init(const struct nft_ctx *ctx,
        unsigned int plen;
        int err;
 
-       err = nft_redir_validate(ctx, expr, NULL);
-       if (err < 0)
-               return err;
-
        plen = FIELD_SIZEOF(struct nf_nat_range, min_addr.all);
        if (tb[NFTA_REDIR_REG_PROTO_MIN]) {
                priv->sreg_proto_min =
index c64de3f7379df551fa413a4af186f3c16886f112..29f5bd2377b0deaf7ede8ec0573bf71cfeef7478 100644 (file)
@@ -42,11 +42,6 @@ int nft_reject_init(const struct nft_ctx *ctx,
                    const struct nlattr * const tb[])
 {
        struct nft_reject *priv = nft_expr_priv(expr);
-       int err;
-
-       err = nft_reject_validate(ctx, expr, NULL);
-       if (err < 0)
-               return err;
 
        if (tb[NFTA_REJECT_TYPE] == NULL)
                return -EINVAL;
index 9e90a02cb104dad81daf84208902e90390a8f504..5a7fb5ff867d382f04633a2fab53997d0ca1f2b2 100644 (file)
@@ -66,11 +66,7 @@ static int nft_reject_inet_init(const struct nft_ctx *ctx,
                                const struct nlattr * const tb[])
 {
        struct nft_reject *priv = nft_expr_priv(expr);
-       int icmp_code, err;
-
-       err = nft_reject_validate(ctx, expr, NULL);
-       if (err < 0)
-               return err;
+       int icmp_code;
 
        if (tb[NFTA_REJECT_TYPE] == NULL)
                return -EINVAL;
index 78dfbf9588b368107bdc385c7ef208a9abd3d297..e97e2fb53f0a107b0361322be10f16b4ab4b5d32 100644 (file)
@@ -18,9 +18,8 @@
 #include <linux/netfilter/nf_tables.h>
 #include <net/netfilter/nf_tables.h>
 
-static DEFINE_SPINLOCK(nft_rbtree_lock);
-
 struct nft_rbtree {
+       rwlock_t                lock;
        struct rb_root          root;
 };
 
@@ -44,14 +43,14 @@ static bool nft_rbtree_equal(const struct nft_set *set, const void *this,
 static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
                              const u32 *key, const struct nft_set_ext **ext)
 {
-       const struct nft_rbtree *priv = nft_set_priv(set);
+       struct nft_rbtree *priv = nft_set_priv(set);
        const struct nft_rbtree_elem *rbe, *interval = NULL;
        u8 genmask = nft_genmask_cur(net);
        const struct rb_node *parent;
        const void *this;
        int d;
 
-       spin_lock_bh(&nft_rbtree_lock);
+       read_lock_bh(&priv->lock);
        parent = priv->root.rb_node;
        while (parent != NULL) {
                rbe = rb_entry(parent, struct nft_rbtree_elem, node);
@@ -75,7 +74,7 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
                        }
                        if (nft_rbtree_interval_end(rbe))
                                goto out;
-                       spin_unlock_bh(&nft_rbtree_lock);
+                       read_unlock_bh(&priv->lock);
 
                        *ext = &rbe->ext;
                        return true;
@@ -85,12 +84,12 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
        if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
            nft_set_elem_active(&interval->ext, genmask) &&
            !nft_rbtree_interval_end(interval)) {
-               spin_unlock_bh(&nft_rbtree_lock);
+               read_unlock_bh(&priv->lock);
                *ext = &interval->ext;
                return true;
        }
 out:
-       spin_unlock_bh(&nft_rbtree_lock);
+       read_unlock_bh(&priv->lock);
        return false;
 }
 
@@ -140,12 +139,13 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
                             const struct nft_set_elem *elem,
                             struct nft_set_ext **ext)
 {
+       struct nft_rbtree *priv = nft_set_priv(set);
        struct nft_rbtree_elem *rbe = elem->priv;
        int err;
 
-       spin_lock_bh(&nft_rbtree_lock);
+       write_lock_bh(&priv->lock);
        err = __nft_rbtree_insert(net, set, rbe, ext);
-       spin_unlock_bh(&nft_rbtree_lock);
+       write_unlock_bh(&priv->lock);
 
        return err;
 }
@@ -157,9 +157,9 @@ static void nft_rbtree_remove(const struct net *net,
        struct nft_rbtree *priv = nft_set_priv(set);
        struct nft_rbtree_elem *rbe = elem->priv;
 
-       spin_lock_bh(&nft_rbtree_lock);
+       write_lock_bh(&priv->lock);
        rb_erase(&rbe->node, &priv->root);
-       spin_unlock_bh(&nft_rbtree_lock);
+       write_unlock_bh(&priv->lock);
 }
 
 static void nft_rbtree_activate(const struct net *net,
@@ -224,12 +224,12 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
                            struct nft_set *set,
                            struct nft_set_iter *iter)
 {
-       const struct nft_rbtree *priv = nft_set_priv(set);
+       struct nft_rbtree *priv = nft_set_priv(set);
        struct nft_rbtree_elem *rbe;
        struct nft_set_elem elem;
        struct rb_node *node;
 
-       spin_lock_bh(&nft_rbtree_lock);
+       read_lock_bh(&priv->lock);
        for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
                rbe = rb_entry(node, struct nft_rbtree_elem, node);
 
@@ -242,13 +242,13 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
 
                iter->err = iter->fn(ctx, set, iter, &elem);
                if (iter->err < 0) {
-                       spin_unlock_bh(&nft_rbtree_lock);
+                       read_unlock_bh(&priv->lock);
                        return;
                }
 cont:
                iter->count++;
        }
-       spin_unlock_bh(&nft_rbtree_lock);
+       read_unlock_bh(&priv->lock);
 }
 
 static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[])
@@ -262,6 +262,7 @@ static int nft_rbtree_init(const struct nft_set *set,
 {
        struct nft_rbtree *priv = nft_set_priv(set);
 
+       rwlock_init(&priv->lock);
        priv->root = RB_ROOT;
        return 0;
 }
index dab962df178795612580a1c8e22257213bdab07d..d27b5f1ea619f9696912b58bd5012358206725d7 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/netfilter/xt_limit.h>
 
 struct xt_limit_priv {
+       spinlock_t lock;
        unsigned long prev;
        uint32_t credit;
 };
@@ -32,8 +33,6 @@ MODULE_ALIAS("ip6t_limit");
  * see net/sched/sch_tbf.c in the linux source tree
  */
 
-static DEFINE_SPINLOCK(limit_lock);
-
 /* Rusty: This is my (non-mathematically-inclined) understanding of
    this algorithm.  The `average rate' in jiffies becomes your initial
    amount of credit `credit' and the most credit you can ever have
@@ -72,7 +71,7 @@ limit_mt(const struct sk_buff *skb, struct xt_action_param *par)
        struct xt_limit_priv *priv = r->master;
        unsigned long now = jiffies;
 
-       spin_lock_bh(&limit_lock);
+       spin_lock_bh(&priv->lock);
        priv->credit += (now - xchg(&priv->prev, now)) * CREDITS_PER_JIFFY;
        if (priv->credit > r->credit_cap)
                priv->credit = r->credit_cap;
@@ -80,11 +79,11 @@ limit_mt(const struct sk_buff *skb, struct xt_action_param *par)
        if (priv->credit >= r->cost) {
                /* We're not limited. */
                priv->credit -= r->cost;
-               spin_unlock_bh(&limit_lock);
+               spin_unlock_bh(&priv->lock);
                return true;
        }
 
-       spin_unlock_bh(&limit_lock);
+       spin_unlock_bh(&priv->lock);
        return false;
 }
 
@@ -126,6 +125,8 @@ static int limit_mt_check(const struct xt_mtchk_param *par)
                r->credit_cap = priv->credit; /* Credits full. */
                r->cost = user2credits(r->avg);
        }
+       spin_lock_init(&priv->lock);
+
        return 0;
 }
 
index 7b73c7c161a9680b8691a712c31073b7789620f7..596eaff66649e5955d6c0f349f062b6d8360dc2d 100644 (file)
@@ -96,6 +96,44 @@ EXPORT_SYMBOL_GPL(nl_table);
 
 static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait);
 
+static struct lock_class_key nlk_cb_mutex_keys[MAX_LINKS];
+
+static const char *const nlk_cb_mutex_key_strings[MAX_LINKS + 1] = {
+       "nlk_cb_mutex-ROUTE",
+       "nlk_cb_mutex-1",
+       "nlk_cb_mutex-USERSOCK",
+       "nlk_cb_mutex-FIREWALL",
+       "nlk_cb_mutex-SOCK_DIAG",
+       "nlk_cb_mutex-NFLOG",
+       "nlk_cb_mutex-XFRM",
+       "nlk_cb_mutex-SELINUX",
+       "nlk_cb_mutex-ISCSI",
+       "nlk_cb_mutex-AUDIT",
+       "nlk_cb_mutex-FIB_LOOKUP",
+       "nlk_cb_mutex-CONNECTOR",
+       "nlk_cb_mutex-NETFILTER",
+       "nlk_cb_mutex-IP6_FW",
+       "nlk_cb_mutex-DNRTMSG",
+       "nlk_cb_mutex-KOBJECT_UEVENT",
+       "nlk_cb_mutex-GENERIC",
+       "nlk_cb_mutex-17",
+       "nlk_cb_mutex-SCSITRANSPORT",
+       "nlk_cb_mutex-ECRYPTFS",
+       "nlk_cb_mutex-RDMA",
+       "nlk_cb_mutex-CRYPTO",
+       "nlk_cb_mutex-SMC",
+       "nlk_cb_mutex-23",
+       "nlk_cb_mutex-24",
+       "nlk_cb_mutex-25",
+       "nlk_cb_mutex-26",
+       "nlk_cb_mutex-27",
+       "nlk_cb_mutex-28",
+       "nlk_cb_mutex-29",
+       "nlk_cb_mutex-30",
+       "nlk_cb_mutex-31",
+       "nlk_cb_mutex-MAX_LINKS"
+};
+
 static int netlink_dump(struct sock *sk);
 static void netlink_skb_destructor(struct sk_buff *skb);
 
@@ -585,6 +623,9 @@ static int __netlink_create(struct net *net, struct socket *sock,
        } else {
                nlk->cb_mutex = &nlk->cb_def_mutex;
                mutex_init(nlk->cb_mutex);
+               lockdep_set_class_and_name(nlk->cb_mutex,
+                                          nlk_cb_mutex_keys + protocol,
+                                          nlk_cb_mutex_key_strings[protocol]);
        }
        init_waitqueue_head(&nlk->wait);
 
index fb6e10fdb2174320c96608aea63d3c484d3625a0..92e0981f74040d7029b65863167b459322612024 100644 (file)
@@ -783,8 +783,10 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb)
 
                if (ctrl_fill_info(rt, NETLINK_CB(cb->skb).portid,
                                   cb->nlh->nlmsg_seq, NLM_F_MULTI,
-                                  skb, CTRL_CMD_NEWFAMILY) < 0)
+                                  skb, CTRL_CMD_NEWFAMILY) < 0) {
+                       n--;
                        break;
+               }
        }
 
        cb->args[0] = n;
index c82301ce3fffb6caeb41a9882a53289ec7b63c8d..e4610676299bcdac626db1a30cd4da44ccc62c0b 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007-2014 Nicira, Inc.
+ * Copyright (c) 2007-2017 Nicira, Inc.
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of version 2 of the GNU General Public
 #include "conntrack.h"
 #include "vport.h"
 
-static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
-                             struct sw_flow_key *key,
-                             const struct nlattr *attr, int len);
-
 struct deferred_action {
        struct sk_buff *skb;
        const struct nlattr *actions;
+       int actions_len;
 
        /* Store pkt_key clone when creating deferred action. */
        struct sw_flow_key pkt_key;
@@ -82,14 +79,31 @@ struct action_fifo {
        struct deferred_action fifo[DEFERRED_ACTION_FIFO_SIZE];
 };
 
-struct recirc_keys {
+struct action_flow_keys {
        struct sw_flow_key key[OVS_DEFERRED_ACTION_THRESHOLD];
 };
 
 static struct action_fifo __percpu *action_fifos;
-static struct recirc_keys __percpu *recirc_keys;
+static struct action_flow_keys __percpu *flow_keys;
 static DEFINE_PER_CPU(int, exec_actions_level);
 
+/* Make a clone of the 'key', using the pre-allocated percpu 'flow_keys'
+ * space. Return NULL if out of key spaces.
+ */
+static struct sw_flow_key *clone_key(const struct sw_flow_key *key_)
+{
+       struct action_flow_keys *keys = this_cpu_ptr(flow_keys);
+       int level = this_cpu_read(exec_actions_level);
+       struct sw_flow_key *key = NULL;
+
+       if (level <= OVS_DEFERRED_ACTION_THRESHOLD) {
+               key = &keys->key[level - 1];
+               *key = *key_;
+       }
+
+       return key;
+}
+
 static void action_fifo_init(struct action_fifo *fifo)
 {
        fifo->head = 0;
@@ -119,8 +133,9 @@ static struct deferred_action *action_fifo_put(struct action_fifo *fifo)
 
 /* Return true if fifo is not full */
 static struct deferred_action *add_deferred_actions(struct sk_buff *skb,
-                                                   const struct sw_flow_key *key,
-                                                   const struct nlattr *attr)
+                                   const struct sw_flow_key *key,
+                                   const struct nlattr *actions,
+                                   const int actions_len)
 {
        struct action_fifo *fifo;
        struct deferred_action *da;
@@ -129,7 +144,8 @@ static struct deferred_action *add_deferred_actions(struct sk_buff *skb,
        da = action_fifo_put(fifo);
        if (da) {
                da->skb = skb;
-               da->actions = attr;
+               da->actions = actions;
+               da->actions_len = actions_len;
                da->pkt_key = *key;
        }
 
@@ -146,6 +162,12 @@ static bool is_flow_key_valid(const struct sw_flow_key *key)
        return !(key->mac_proto & SW_FLOW_KEY_INVALID);
 }
 
+static int clone_execute(struct datapath *dp, struct sk_buff *skb,
+                        struct sw_flow_key *key,
+                        u32 recirc_id,
+                        const struct nlattr *actions, int len,
+                        bool last, bool clone_flow_key);
+
 static void update_ethertype(struct sk_buff *skb, struct ethhdr *hdr,
                             __be16 ethertype)
 {
@@ -908,72 +930,35 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
        return ovs_dp_upcall(dp, skb, key, &upcall, cutlen);
 }
 
+/* When 'last' is true, sample() should always consume the 'skb'.
+ * Otherwise, sample() should keep 'skb' intact regardless what
+ * actions are executed within sample().
+ */
 static int sample(struct datapath *dp, struct sk_buff *skb,
                  struct sw_flow_key *key, const struct nlattr *attr,
-                 const struct nlattr *actions, int actions_len)
+                 bool last)
 {
-       const struct nlattr *acts_list = NULL;
-       const struct nlattr *a;
-       int rem;
-       u32 cutlen = 0;
-
-       for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
-                a = nla_next(a, &rem)) {
-               u32 probability;
-
-               switch (nla_type(a)) {
-               case OVS_SAMPLE_ATTR_PROBABILITY:
-                       probability = nla_get_u32(a);
-                       if (!probability || prandom_u32() > probability)
-                               return 0;
-                       break;
-
-               case OVS_SAMPLE_ATTR_ACTIONS:
-                       acts_list = a;
-                       break;
-               }
-       }
-
-       rem = nla_len(acts_list);
-       a = nla_data(acts_list);
-
-       /* Actions list is empty, do nothing */
-       if (unlikely(!rem))
+       struct nlattr *actions;
+       struct nlattr *sample_arg;
+       int rem = nla_len(attr);
+       const struct sample_arg *arg;
+       bool clone_flow_key;
+
+       /* The first action is always 'OVS_SAMPLE_ATTR_ARG'. */
+       sample_arg = nla_data(attr);
+       arg = nla_data(sample_arg);
+       actions = nla_next(sample_arg, &rem);
+
+       if ((arg->probability != U32_MAX) &&
+           (!arg->probability || prandom_u32() > arg->probability)) {
+               if (last)
+                       consume_skb(skb);
                return 0;
-
-       /* The only known usage of sample action is having a single user-space
-        * action, or having a truncate action followed by a single user-space
-        * action. Treat this usage as a special case.
-        * The output_userspace() should clone the skb to be sent to the
-        * user space. This skb will be consumed by its caller.
-        */
-       if (unlikely(nla_type(a) == OVS_ACTION_ATTR_TRUNC)) {
-               struct ovs_action_trunc *trunc = nla_data(a);
-
-               if (skb->len > trunc->max_len)
-                       cutlen = skb->len - trunc->max_len;
-
-               a = nla_next(a, &rem);
        }
 
-       if (likely(nla_type(a) == OVS_ACTION_ATTR_USERSPACE &&
-                  nla_is_last(a, rem)))
-               return output_userspace(dp, skb, key, a, actions,
-                                       actions_len, cutlen);
-
-       skb = skb_clone(skb, GFP_ATOMIC);
-       if (!skb)
-               /* Skip the sample action when out of memory. */
-               return 0;
-
-       if (!add_deferred_actions(skb, key, a)) {
-               if (net_ratelimit())
-                       pr_warn("%s: deferred actions limit reached, dropping sample action\n",
-                               ovs_dp_name(dp));
-
-               kfree_skb(skb);
-       }
-       return 0;
+       clone_flow_key = !arg->exec;
+       return clone_execute(dp, skb, key, 0, actions, rem, last,
+                            clone_flow_key);
 }
 
 static void execute_hash(struct sk_buff *skb, struct sw_flow_key *key,
@@ -1084,10 +1069,9 @@ static int execute_masked_set_action(struct sk_buff *skb,
 
 static int execute_recirc(struct datapath *dp, struct sk_buff *skb,
                          struct sw_flow_key *key,
-                         const struct nlattr *a, int rem)
+                         const struct nlattr *a, bool last)
 {
-       struct deferred_action *da;
-       int level;
+       u32 recirc_id;
 
        if (!is_flow_key_valid(key)) {
                int err;
@@ -1098,43 +1082,8 @@ static int execute_recirc(struct datapath *dp, struct sk_buff *skb,
        }
        BUG_ON(!is_flow_key_valid(key));
 
-       if (!nla_is_last(a, rem)) {
-               /* Recirc action is the not the last action
-                * of the action list, need to clone the skb.
-                */
-               skb = skb_clone(skb, GFP_ATOMIC);
-
-               /* Skip the recirc action when out of memory, but
-                * continue on with the rest of the action list.
-                */
-               if (!skb)
-                       return 0;
-       }
-
-       level = this_cpu_read(exec_actions_level);
-       if (level <= OVS_DEFERRED_ACTION_THRESHOLD) {
-               struct recirc_keys *rks = this_cpu_ptr(recirc_keys);
-               struct sw_flow_key *recirc_key = &rks->key[level - 1];
-
-               *recirc_key = *key;
-               recirc_key->recirc_id = nla_get_u32(a);
-               ovs_dp_process_packet(skb, recirc_key);
-
-               return 0;
-       }
-
-       da = add_deferred_actions(skb, key, NULL);
-       if (da) {
-               da->pkt_key.recirc_id = nla_get_u32(a);
-       } else {
-               kfree_skb(skb);
-
-               if (net_ratelimit())
-                       pr_warn("%s: deferred action limit reached, drop recirc action\n",
-                               ovs_dp_name(dp));
-       }
-
-       return 0;
+       recirc_id = nla_get_u32(a);
+       return clone_execute(dp, skb, key, recirc_id, NULL, 0, last, true);
 }
 
 /* Execute a list of actions against 'skb'. */
@@ -1206,9 +1155,11 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
                        err = pop_vlan(skb, key);
                        break;
 
-               case OVS_ACTION_ATTR_RECIRC:
-                       err = execute_recirc(dp, skb, key, a, rem);
-                       if (nla_is_last(a, rem)) {
+               case OVS_ACTION_ATTR_RECIRC: {
+                       bool last = nla_is_last(a, rem);
+
+                       err = execute_recirc(dp, skb, key, a, last);
+                       if (last) {
                                /* If this is the last action, the skb has
                                 * been consumed or freed.
                                 * Return immediately.
@@ -1216,6 +1167,7 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
                                return err;
                        }
                        break;
+               }
 
                case OVS_ACTION_ATTR_SET:
                        err = execute_set_action(skb, key, nla_data(a));
@@ -1226,9 +1178,15 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
                        err = execute_masked_set_action(skb, key, nla_data(a));
                        break;
 
-               case OVS_ACTION_ATTR_SAMPLE:
-                       err = sample(dp, skb, key, a, attr, len);
+               case OVS_ACTION_ATTR_SAMPLE: {
+                       bool last = nla_is_last(a, rem);
+
+                       err = sample(dp, skb, key, a, last);
+                       if (last)
+                               return err;
+
                        break;
+               }
 
                case OVS_ACTION_ATTR_CT:
                        if (!is_flow_key_valid(key)) {
@@ -1264,6 +1222,79 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
        return 0;
 }
 
+/* Execute the actions on the clone of the packet. The effect of the
+ * execution does not affect the original 'skb' nor the original 'key'.
+ *
+ * The execution may be deferred in case the actions can not be executed
+ * immediately.
+ */
+static int clone_execute(struct datapath *dp, struct sk_buff *skb,
+                        struct sw_flow_key *key, u32 recirc_id,
+                        const struct nlattr *actions, int len,
+                        bool last, bool clone_flow_key)
+{
+       struct deferred_action *da;
+       struct sw_flow_key *clone;
+
+       skb = last ? skb : skb_clone(skb, GFP_ATOMIC);
+       if (!skb) {
+               /* Out of memory, skip this action.
+                */
+               return 0;
+       }
+
+       /* When clone_flow_key is false, the 'key' will not be change
+        * by the actions, then the 'key' can be used directly.
+        * Otherwise, try to clone key from the next recursion level of
+        * 'flow_keys'. If clone is successful, execute the actions
+        * without deferring.
+        */
+       clone = clone_flow_key ? clone_key(key) : key;
+       if (clone) {
+               int err = 0;
+
+               if (actions) { /* Sample action */
+                       if (clone_flow_key)
+                               __this_cpu_inc(exec_actions_level);
+
+                       err = do_execute_actions(dp, skb, clone,
+                                                actions, len);
+
+                       if (clone_flow_key)
+                               __this_cpu_dec(exec_actions_level);
+               } else { /* Recirc action */
+                       clone->recirc_id = recirc_id;
+                       ovs_dp_process_packet(skb, clone);
+               }
+               return err;
+       }
+
+       /* Out of 'flow_keys' space. Defer actions */
+       da = add_deferred_actions(skb, key, actions, len);
+       if (da) {
+               if (!actions) { /* Recirc action */
+                       key = &da->pkt_key;
+                       key->recirc_id = recirc_id;
+               }
+       } else {
+               /* Out of per CPU action FIFO space. Drop the 'skb' and
+                * log an error.
+                */
+               kfree_skb(skb);
+
+               if (net_ratelimit()) {
+                       if (actions) { /* Sample action */
+                               pr_warn("%s: deferred action limit reached, drop sample action\n",
+                                       ovs_dp_name(dp));
+                       } else {  /* Recirc action */
+                               pr_warn("%s: deferred action limit reached, drop recirc action\n",
+                                       ovs_dp_name(dp));
+                       }
+               }
+       }
+       return 0;
+}
+
 static void process_deferred_actions(struct datapath *dp)
 {
        struct action_fifo *fifo = this_cpu_ptr(action_fifos);
@@ -1278,10 +1309,10 @@ static void process_deferred_actions(struct datapath *dp)
                struct sk_buff *skb = da->skb;
                struct sw_flow_key *key = &da->pkt_key;
                const struct nlattr *actions = da->actions;
+               int actions_len = da->actions_len;
 
                if (actions)
-                       do_execute_actions(dp, skb, key, actions,
-                                          nla_len(actions));
+                       do_execute_actions(dp, skb, key, actions, actions_len);
                else
                        ovs_dp_process_packet(skb, key);
        } while (!action_fifo_is_empty(fifo));
@@ -1323,8 +1354,8 @@ int action_fifos_init(void)
        if (!action_fifos)
                return -ENOMEM;
 
-       recirc_keys = alloc_percpu(struct recirc_keys);
-       if (!recirc_keys) {
+       flow_keys = alloc_percpu(struct action_flow_keys);
+       if (!flow_keys) {
                free_percpu(action_fifos);
                return -ENOMEM;
        }
@@ -1335,5 +1366,5 @@ int action_fifos_init(void)
 void action_fifos_exit(void)
 {
        free_percpu(action_fifos);
-       free_percpu(recirc_keys);
+       free_percpu(flow_keys);
 }
index 1c6e9377436df1e93081c825c142b712a811277b..da931bdef8a7b5f25c189a5fe76e5fc2c4c1efdf 100644 (file)
@@ -34,8 +34,6 @@
 #define DP_MAX_PORTS           USHRT_MAX
 #define DP_VPORT_HASH_BUCKETS  1024
 
-#define SAMPLE_ACTION_DEPTH 3
-
 /**
  * struct dp_stats_percpu - per-cpu packet processing statistics for a given
  * datapath.
index a08ff834676ba9a8f82bb602cbc15d0d20e76478..df82b81a9b357250b725b47660dc211ea9e957ae 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007-2014 Nicira, Inc.
+ * Copyright (c) 2007-2017 Nicira, Inc.
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of version 2 of the GNU General Public
@@ -59,6 +59,39 @@ struct ovs_len_tbl {
 #define OVS_ATTR_NESTED -1
 #define OVS_ATTR_VARIABLE -2
 
+static bool actions_may_change_flow(const struct nlattr *actions)
+{
+       struct nlattr *nla;
+       int rem;
+
+       nla_for_each_nested(nla, actions, rem) {
+               u16 action = nla_type(nla);
+
+               switch (action) {
+               case OVS_ACTION_ATTR_OUTPUT:
+               case OVS_ACTION_ATTR_RECIRC:
+               case OVS_ACTION_ATTR_TRUNC:
+               case OVS_ACTION_ATTR_USERSPACE:
+                       break;
+
+               case OVS_ACTION_ATTR_CT:
+               case OVS_ACTION_ATTR_HASH:
+               case OVS_ACTION_ATTR_POP_ETH:
+               case OVS_ACTION_ATTR_POP_MPLS:
+               case OVS_ACTION_ATTR_POP_VLAN:
+               case OVS_ACTION_ATTR_PUSH_ETH:
+               case OVS_ACTION_ATTR_PUSH_MPLS:
+               case OVS_ACTION_ATTR_PUSH_VLAN:
+               case OVS_ACTION_ATTR_SAMPLE:
+               case OVS_ACTION_ATTR_SET:
+               case OVS_ACTION_ATTR_SET_MASKED:
+               default:
+                       return true;
+               }
+       }
+       return false;
+}
+
 static void update_range(struct sw_flow_match *match,
                         size_t offset, size_t size, bool is_mask)
 {
@@ -665,6 +698,8 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
                        tun_flags |= TUNNEL_VXLAN_OPT;
                        opts_type = type;
                        break;
+               case OVS_TUNNEL_KEY_ATTR_PAD:
+                       break;
                default:
                        OVS_NLERR(log, "Unknown IP tunnel attribute %d",
                                  type);
@@ -2021,18 +2056,20 @@ static inline void add_nested_action_end(struct sw_flow_actions *sfa,
 
 static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
                                  const struct sw_flow_key *key,
-                                 int depth, struct sw_flow_actions **sfa,
+                                 struct sw_flow_actions **sfa,
                                  __be16 eth_type, __be16 vlan_tci, bool log);
 
 static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
-                                   const struct sw_flow_key *key, int depth,
+                                   const struct sw_flow_key *key,
                                    struct sw_flow_actions **sfa,
-                                   __be16 eth_type, __be16 vlan_tci, bool log)
+                                   __be16 eth_type, __be16 vlan_tci,
+                                   bool log, bool last)
 {
        const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
        const struct nlattr *probability, *actions;
        const struct nlattr *a;
-       int rem, start, err, st_acts;
+       int rem, start, err;
+       struct sample_arg arg;
 
        memset(attrs, 0, sizeof(attrs));
        nla_for_each_nested(a, attr, rem) {
@@ -2056,20 +2093,32 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
        start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE, log);
        if (start < 0)
                return start;
-       err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY,
-                                nla_data(probability), sizeof(u32), log);
+
+       /* When both skb and flow may be changed, put the sample
+        * into a deferred fifo. On the other hand, if only skb
+        * may be modified, the actions can be executed in place.
+        *
+        * Do this analysis at the flow installation time.
+        * Set 'clone_action->exec' to true if the actions can be
+        * executed without being deferred.
+        *
+        * If the sample is the last action, it can always be excuted
+        * rather than deferred.
+        */
+       arg.exec = last || !actions_may_change_flow(actions);
+       arg.probability = nla_get_u32(probability);
+
+       err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_ARG, &arg, sizeof(arg),
+                                log);
        if (err)
                return err;
-       st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS, log);
-       if (st_acts < 0)
-               return st_acts;
 
-       err = __ovs_nla_copy_actions(net, actions, key, depth + 1, sfa,
+       err = __ovs_nla_copy_actions(net, actions, key, sfa,
                                     eth_type, vlan_tci, log);
+
        if (err)
                return err;
 
-       add_nested_action_end(*sfa, st_acts);
        add_nested_action_end(*sfa, start);
 
        return 0;
@@ -2406,16 +2455,13 @@ static int copy_action(const struct nlattr *from,
 
 static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
                                  const struct sw_flow_key *key,
-                                 int depth, struct sw_flow_actions **sfa,
+                                 struct sw_flow_actions **sfa,
                                  __be16 eth_type, __be16 vlan_tci, bool log)
 {
        u8 mac_proto = ovs_key_mac_proto(key);
        const struct nlattr *a;
        int rem, err;
 
-       if (depth >= SAMPLE_ACTION_DEPTH)
-               return -EOVERFLOW;
-
        nla_for_each_nested(a, attr, rem) {
                /* Expected argument lengths, (u32)-1 for variable length. */
                static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
@@ -2553,13 +2599,17 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
                                return err;
                        break;
 
-               case OVS_ACTION_ATTR_SAMPLE:
-                       err = validate_and_copy_sample(net, a, key, depth, sfa,
-                                                      eth_type, vlan_tci, log);
+               case OVS_ACTION_ATTR_SAMPLE: {
+                       bool last = nla_is_last(a, rem);
+
+                       err = validate_and_copy_sample(net, a, key, sfa,
+                                                      eth_type, vlan_tci,
+                                                      log, last);
                        if (err)
                                return err;
                        skip_copy = true;
                        break;
+               }
 
                case OVS_ACTION_ATTR_CT:
                        err = ovs_ct_copy_action(net, a, key, sfa, log);
@@ -2613,7 +2663,7 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
                return PTR_ERR(*sfa);
 
        (*sfa)->orig_len = nla_len(attr);
-       err = __ovs_nla_copy_actions(net, attr, key, 0, sfa, key->eth.type,
+       err = __ovs_nla_copy_actions(net, attr, key, sfa, key->eth.type,
                                     key->eth.vlan.tci, log);
        if (err)
                ovs_nla_free_flow_actions(*sfa);
@@ -2621,39 +2671,44 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
        return err;
 }
 
-static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb)
+static int sample_action_to_attr(const struct nlattr *attr,
+                                struct sk_buff *skb)
 {
-       const struct nlattr *a;
-       struct nlattr *start;
-       int err = 0, rem;
+       struct nlattr *start, *ac_start = NULL, *sample_arg;
+       int err = 0, rem = nla_len(attr);
+       const struct sample_arg *arg;
+       struct nlattr *actions;
 
        start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE);
        if (!start)
                return -EMSGSIZE;
 
-       nla_for_each_nested(a, attr, rem) {
-               int type = nla_type(a);
-               struct nlattr *st_sample;
+       sample_arg = nla_data(attr);
+       arg = nla_data(sample_arg);
+       actions = nla_next(sample_arg, &rem);
 
-               switch (type) {
-               case OVS_SAMPLE_ATTR_PROBABILITY:
-                       if (nla_put(skb, OVS_SAMPLE_ATTR_PROBABILITY,
-                                   sizeof(u32), nla_data(a)))
-                               return -EMSGSIZE;
-                       break;
-               case OVS_SAMPLE_ATTR_ACTIONS:
-                       st_sample = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS);
-                       if (!st_sample)
-                               return -EMSGSIZE;
-                       err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb);
-                       if (err)
-                               return err;
-                       nla_nest_end(skb, st_sample);
-                       break;
-               }
+       if (nla_put_u32(skb, OVS_SAMPLE_ATTR_PROBABILITY, arg->probability)) {
+               err = -EMSGSIZE;
+               goto out;
+       }
+
+       ac_start = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS);
+       if (!ac_start) {
+               err = -EMSGSIZE;
+               goto out;
+       }
+
+       err = ovs_nla_put_actions(actions, rem, skb);
+
+out:
+       if (err) {
+               nla_nest_cancel(skb, ac_start);
+               nla_nest_cancel(skb, start);
+       } else {
+               nla_nest_end(skb, ac_start);
+               nla_nest_end(skb, start);
        }
 
-       nla_nest_end(skb, start);
        return err;
 }
 
index b83c6807a5ae5cedc63b073c7f928b1776f42524..326fd97444f5bed5c82af7d632d8a4424f9908d3 100644 (file)
@@ -16,7 +16,7 @@ if QRTR
 
 config QRTR_SMD
        tristate "SMD IPC Router channels"
-       depends on QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n)
+       depends on RPMSG || (COMPILE_TEST && RPMSG=n)
        ---help---
          Say Y here to support SMD based ipcrouter channels.  SMD is the
          most common transport for IPC Router.
index 0d11132b3370a4024be644dede74a77d188a4f64..50615d5efac1529a0fd617c2692b1e9419da7137 100644 (file)
 
 #include <linux/module.h>
 #include <linux/skbuff.h>
-#include <linux/soc/qcom/smd.h>
+#include <linux/rpmsg.h>
 
 #include "qrtr.h"
 
 struct qrtr_smd_dev {
        struct qrtr_endpoint ep;
-       struct qcom_smd_channel *channel;
+       struct rpmsg_endpoint *channel;
        struct device *dev;
 };
 
 /* from smd to qrtr */
-static int qcom_smd_qrtr_callback(struct qcom_smd_channel *channel,
-                                 const void *data, size_t len)
+static int qcom_smd_qrtr_callback(struct rpmsg_device *rpdev,
+                                 void *data, int len, void *priv, u32 addr)
 {
-       struct qrtr_smd_dev *qdev = qcom_smd_get_drvdata(channel);
+       struct qrtr_smd_dev *qdev = dev_get_drvdata(&rpdev->dev);
        int rc;
 
        if (!qdev)
@@ -54,7 +54,7 @@ static int qcom_smd_qrtr_send(struct qrtr_endpoint *ep, struct sk_buff *skb)
        if (rc)
                goto out;
 
-       rc = qcom_smd_send(qdev->channel, skb->data, skb->len);
+       rc = rpmsg_send(qdev->channel, skb->data, skb->len);
 
 out:
        if (rc)
@@ -64,57 +64,55 @@ out:
        return rc;
 }
 
-static int qcom_smd_qrtr_probe(struct qcom_smd_device *sdev)
+static int qcom_smd_qrtr_probe(struct rpmsg_device *rpdev)
 {
        struct qrtr_smd_dev *qdev;
        int rc;
 
-       qdev = devm_kzalloc(&sdev->dev, sizeof(*qdev), GFP_KERNEL);
+       qdev = devm_kzalloc(&rpdev->dev, sizeof(*qdev), GFP_KERNEL);
        if (!qdev)
                return -ENOMEM;
 
-       qdev->channel = sdev->channel;
-       qdev->dev = &sdev->dev;
+       qdev->channel = rpdev->ept;
+       qdev->dev = &rpdev->dev;
        qdev->ep.xmit = qcom_smd_qrtr_send;
 
        rc = qrtr_endpoint_register(&qdev->ep, QRTR_EP_NID_AUTO);
        if (rc)
                return rc;
 
-       qcom_smd_set_drvdata(sdev->channel, qdev);
-       dev_set_drvdata(&sdev->dev, qdev);
+       dev_set_drvdata(&rpdev->dev, qdev);
 
-       dev_dbg(&sdev->dev, "Qualcomm SMD QRTR driver probed\n");
+       dev_dbg(&rpdev->dev, "Qualcomm SMD QRTR driver probed\n");
 
        return 0;
 }
 
-static void qcom_smd_qrtr_remove(struct qcom_smd_device *sdev)
+static void qcom_smd_qrtr_remove(struct rpmsg_device *rpdev)
 {
-       struct qrtr_smd_dev *qdev = dev_get_drvdata(&sdev->dev);
+       struct qrtr_smd_dev *qdev = dev_get_drvdata(&rpdev->dev);
 
        qrtr_endpoint_unregister(&qdev->ep);
 
-       dev_set_drvdata(&sdev->dev, NULL);
+       dev_set_drvdata(&rpdev->dev, NULL);
 }
 
-static const struct qcom_smd_id qcom_smd_qrtr_smd_match[] = {
+static const struct rpmsg_device_id qcom_smd_qrtr_smd_match[] = {
        { "IPCRTR" },
        {}
 };
 
-static struct qcom_smd_driver qcom_smd_qrtr_driver = {
+static struct rpmsg_driver qcom_smd_qrtr_driver = {
        .probe = qcom_smd_qrtr_probe,
        .remove = qcom_smd_qrtr_remove,
        .callback = qcom_smd_qrtr_callback,
-       .smd_match_table = qcom_smd_qrtr_smd_match,
-       .driver = {
+       .id_table = qcom_smd_qrtr_smd_match,
+       .drv = {
                .name = "qcom_smd_qrtr",
-               .owner = THIS_MODULE,
        },
 };
 
-module_qcom_smd_driver(qcom_smd_qrtr_driver);
+module_rpmsg_driver(qcom_smd_qrtr_driver);
 
 MODULE_DESCRIPTION("Qualcomm IPC-Router SMD interface driver");
 MODULE_LICENSE("GPL v2");
index 1fa75ab7b733230585666abcb7279ba691365256..6a5ebdea7d2e9eb3b624a01a8a87ef601e3b2f13 100644 (file)
@@ -333,11 +333,19 @@ void rds_conn_shutdown(struct rds_conn_path *cp)
                rds_conn_path_reset(cp);
 
                if (!rds_conn_path_transition(cp, RDS_CONN_DISCONNECTING,
+                                             RDS_CONN_DOWN) &&
+                   !rds_conn_path_transition(cp, RDS_CONN_ERROR,
                                              RDS_CONN_DOWN)) {
                        /* This can happen - eg when we're in the middle of tearing
                         * down the connection, and someone unloads the rds module.
-                        * Quite reproduceable with loopback connections.
+                        * Quite reproducible with loopback connections.
                         * Mostly harmless.
+                        *
+                        * Note that this also happens with rds-tcp because
+                        * we could have triggered rds_conn_path_drop in irq
+                        * mode from rds_tcp_state change on the receipt of
+                        * a FIN, thus we need to recheck for RDS_CONN_ERROR
+                        * here.
                         */
                        rds_conn_path_error(cp, "%s: failed to transition "
                                            "to state DOWN, current state "
index 1c38d2c7caa8e955585b45f0c9218a0775013b4d..80fb6f63e768d3461c47533615c875526bb8bab9 100644 (file)
@@ -702,9 +702,8 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
                event->param.conn.initiator_depth);
 
        /* rdma_accept() calls rdma_reject() internally if it fails */
-       err = rdma_accept(cm_id, &conn_param);
-       if (err)
-               rds_ib_conn_error(conn, "rdma_accept failed (%d)\n", err);
+       if (rdma_accept(cm_id, &conn_param))
+               rds_ib_conn_error(conn, "rdma_accept failed\n");
 
 out:
        if (conn)
index 4fe8f4fec4eee66c826b5beb5b02ae61d19b483a..86ef907067bb084e01ac4f8d5f00d0c17f40ac55 100644 (file)
@@ -78,17 +78,15 @@ struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev, int npages)
        return ibmr;
 
 out_no_cigar:
-       if (ibmr) {
-               if (fmr->fmr)
-                       ib_dealloc_fmr(fmr->fmr);
-               kfree(ibmr);
-       }
+       kfree(ibmr);
        atomic_dec(&pool->item_count);
+
        return ERR_PTR(err);
 }
 
-int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, struct rds_ib_mr *ibmr,
-                  struct scatterlist *sg, unsigned int nents)
+static int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev,
+                         struct rds_ib_mr *ibmr, struct scatterlist *sg,
+                         unsigned int nents)
 {
        struct ib_device *dev = rds_ibdev->dev;
        struct rds_ib_fmr *fmr = &ibmr->u.fmr;
@@ -114,29 +112,39 @@ int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, struct rds_ib_mr *ibmr,
                u64 dma_addr = ib_sg_dma_address(dev, &scat[i]);
 
                if (dma_addr & ~PAGE_MASK) {
-                       if (i > 0)
+                       if (i > 0) {
+                               ib_dma_unmap_sg(dev, sg, nents,
+                                               DMA_BIDIRECTIONAL);
                                return -EINVAL;
-                       else
+                       } else {
                                ++page_cnt;
+                       }
                }
                if ((dma_addr + dma_len) & ~PAGE_MASK) {
-                       if (i < sg_dma_len - 1)
+                       if (i < sg_dma_len - 1) {
+                               ib_dma_unmap_sg(dev, sg, nents,
+                                               DMA_BIDIRECTIONAL);
                                return -EINVAL;
-                       else
+                       } else {
                                ++page_cnt;
+                       }
                }
 
                len += dma_len;
        }
 
        page_cnt += len >> PAGE_SHIFT;
-       if (page_cnt > ibmr->pool->fmr_attr.max_pages)
+       if (page_cnt > ibmr->pool->fmr_attr.max_pages) {
+               ib_dma_unmap_sg(dev, sg, nents, DMA_BIDIRECTIONAL);
                return -EINVAL;
+       }
 
        dma_pages = kmalloc_node(sizeof(u64) * page_cnt, GFP_ATOMIC,
                                 rdsibdev_to_node(rds_ibdev));
-       if (!dma_pages)
+       if (!dma_pages) {
+               ib_dma_unmap_sg(dev, sg, nents, DMA_BIDIRECTIONAL);
                return -ENOMEM;
+       }
 
        page_cnt = 0;
        for (i = 0; i < sg_dma_len; ++i) {
@@ -149,8 +157,10 @@ int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, struct rds_ib_mr *ibmr,
        }
 
        ret = ib_map_phys_fmr(fmr->fmr, dma_pages, page_cnt, io_addr);
-       if (ret)
+       if (ret) {
+               ib_dma_unmap_sg(dev, sg, nents, DMA_BIDIRECTIONAL);
                goto out;
+       }
 
        /* Success - we successfully remapped the MR, so we can
         * safely tear down the old mapping.
index 5d6e98a79a5e4b3de1f472c5fc513fce545bf6f9..0ea4ab017a8cc3f807931e1194cddb5048a82956 100644 (file)
@@ -125,8 +125,6 @@ void rds_ib_mr_exit(void);
 void __rds_ib_teardown_mr(struct rds_ib_mr *);
 void rds_ib_teardown_mr(struct rds_ib_mr *);
 struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *, int);
-int rds_ib_map_fmr(struct rds_ib_device *, struct rds_ib_mr *,
-                  struct scatterlist *, unsigned int);
 struct rds_ib_mr *rds_ib_reuse_mr(struct rds_ib_mr_pool *);
 int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *, int, struct rds_ib_mr **);
 struct rds_ib_mr *rds_ib_reg_fmr(struct rds_ib_device *, struct scatterlist *,
index e36e333a0aa0d7430c852ce419ac7ed85094bbec..3e447d056d092a405311265a06a8596b2ce8dc87 100644 (file)
@@ -156,7 +156,7 @@ void rds_connect_worker(struct work_struct *work)
        struct rds_connection *conn = cp->cp_conn;
        int ret;
 
-       if (cp->cp_index > 1 && cp->cp_conn->c_laddr > cp->cp_conn->c_faddr)
+       if (cp->cp_index > 0 && cp->cp_conn->c_laddr > cp->cp_conn->c_faddr)
                return;
        clear_bit(RDS_RECONNECT_PENDING, &cp->cp_flags);
        ret = rds_conn_path_transition(cp, RDS_CONN_DOWN, RDS_CONN_CONNECTING);
index 3f9d8d7ec6323a95de3e08d01098abdfcf33ff4f..b099b64366f356c27dea0a4dd215cc1034e61b55 100644 (file)
@@ -275,6 +275,10 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
                rxrpc_conn_retransmit_call(conn, skb);
                return 0;
 
+       case RXRPC_PACKET_TYPE_BUSY:
+               /* Just ignore BUSY packets for now. */
+               return 0;
+
        case RXRPC_PACKET_TYPE_ABORT:
                if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
                                  &wtmp, sizeof(wtmp)) < 0)
index e978ccd4402cbc68ba1c46e20909a047978df1c2..6c319a40c1cc744734bd8d82b5d0c5baec26bbbc 100644 (file)
@@ -181,6 +181,9 @@ static int tcf_csum_ipv4_tcp(struct sk_buff *skb, unsigned int ihl,
        struct tcphdr *tcph;
        const struct iphdr *iph;
 
+       if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
+               return 1;
+
        tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph));
        if (tcph == NULL)
                return 0;
@@ -202,6 +205,9 @@ static int tcf_csum_ipv6_tcp(struct sk_buff *skb, unsigned int ihl,
        struct tcphdr *tcph;
        const struct ipv6hdr *ip6h;
 
+       if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
+               return 1;
+
        tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph));
        if (tcph == NULL)
                return 0;
@@ -225,6 +231,9 @@ static int tcf_csum_ipv4_udp(struct sk_buff *skb, unsigned int ihl,
        const struct iphdr *iph;
        u16 ul;
 
+       if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
+               return 1;
+
        /*
         * Support both UDP and UDPLITE checksum algorithms, Don't use
         * udph->len to get the real length without any protocol check,
@@ -278,6 +287,9 @@ static int tcf_csum_ipv6_udp(struct sk_buff *skb, unsigned int ihl,
        const struct ipv6hdr *ip6h;
        u16 ul;
 
+       if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
+               return 1;
+
        /*
         * Support both UDP and UDPLITE checksum algorithms, Don't use
         * udph->len to get the real length without any protocol check,
index 71e7ff22f7c92a86cacad9a1b8d18d3d726f52fb..c75ea5c9102c50f5810b52bf7b1d0e42a7acdfd2 100644 (file)
@@ -603,8 +603,8 @@ nla_put_failure:
        return -1;
 }
 
-int find_decode_metaid(struct sk_buff *skb, struct tcf_ife_info *ife,
-                      u16 metaid, u16 mlen, void *mdata)
+static int find_decode_metaid(struct sk_buff *skb, struct tcf_ife_info *ife,
+                             u16 metaid, u16 mlen, void *mdata)
 {
        struct tcf_meta_info *e;
 
index 3d6b9286c203f298b14b5254e5c12cb4781eb4b1..ca193af8634ad71a80e490c4ce916c4cedfd543f 100644 (file)
@@ -508,9 +508,8 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
                get_random_bytes(&fnew->hashrnd, 4);
        }
 
-       fnew->perturb_timer.function = flow_perturbation;
-       fnew->perturb_timer.data = (unsigned long)fnew;
-       init_timer_deferrable(&fnew->perturb_timer);
+       setup_deferrable_timer(&fnew->perturb_timer, flow_perturbation,
+                              (unsigned long)fnew);
 
        tcf_exts_change(tp, &fnew->exts, &e);
        tcf_em_tree_change(tp, &fnew->ematches, &t);
index bcf49cd2278670197f2a7e9d4e9a62ae8d117468..62567bfe52c723262a291360cecd572fefced164 100644 (file)
@@ -274,7 +274,7 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
        return NULL;
 }
 
-void qdisc_hash_add(struct Qdisc *q)
+void qdisc_hash_add(struct Qdisc *q, bool invisible)
 {
        if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
                struct Qdisc *root = qdisc_dev(q)->qdisc;
@@ -282,6 +282,8 @@ void qdisc_hash_add(struct Qdisc *q)
                WARN_ON_ONCE(root == &noop_qdisc);
                ASSERT_RTNL();
                hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
+               if (invisible)
+                       q->flags |= TCQ_F_INVISIBLE;
        }
 }
 EXPORT_SYMBOL(qdisc_hash_add);
@@ -1003,7 +1005,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
                                goto err_out4;
                }
 
-               qdisc_hash_add(sch);
+               qdisc_hash_add(sch, false);
 
                return sch;
        }
@@ -1401,9 +1403,14 @@ nla_put_failure:
        return -1;
 }
 
-static bool tc_qdisc_dump_ignore(struct Qdisc *q)
+static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
 {
-       return (q->flags & TCQ_F_BUILTIN) ? true : false;
+       if (q->flags & TCQ_F_BUILTIN)
+               return true;
+       if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
+               return true;
+
+       return false;
 }
 
 static int qdisc_notify(struct net *net, struct sk_buff *oskb,
@@ -1417,12 +1424,12 @@ static int qdisc_notify(struct net *net, struct sk_buff *oskb,
        if (!skb)
                return -ENOBUFS;
 
-       if (old && !tc_qdisc_dump_ignore(old)) {
+       if (old && !tc_qdisc_dump_ignore(old, false)) {
                if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
                                  0, RTM_DELQDISC) < 0)
                        goto err_out;
        }
-       if (new && !tc_qdisc_dump_ignore(new)) {
+       if (new && !tc_qdisc_dump_ignore(new, false)) {
                if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
                                  old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
                        goto err_out;
@@ -1439,7 +1446,8 @@ err_out:
 
 static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
                              struct netlink_callback *cb,
-                             int *q_idx_p, int s_q_idx, bool recur)
+                             int *q_idx_p, int s_q_idx, bool recur,
+                             bool dump_invisible)
 {
        int ret = 0, q_idx = *q_idx_p;
        struct Qdisc *q;
@@ -1452,7 +1460,7 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
        if (q_idx < s_q_idx) {
                q_idx++;
        } else {
-               if (!tc_qdisc_dump_ignore(q) &&
+               if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
                    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
                                  cb->nlh->nlmsg_seq, NLM_F_MULTI,
                                  RTM_NEWQDISC) <= 0)
@@ -1474,7 +1482,7 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
                        q_idx++;
                        continue;
                }
-               if (!tc_qdisc_dump_ignore(q) &&
+               if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
                    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
                                  cb->nlh->nlmsg_seq, NLM_F_MULTI,
                                  RTM_NEWQDISC) <= 0)
@@ -1496,12 +1504,21 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
        int idx, q_idx;
        int s_idx, s_q_idx;
        struct net_device *dev;
+       const struct nlmsghdr *nlh = cb->nlh;
+       struct tcmsg *tcm = nlmsg_data(nlh);
+       struct nlattr *tca[TCA_MAX + 1];
+       int err;
 
        s_idx = cb->args[0];
        s_q_idx = q_idx = cb->args[1];
 
        idx = 0;
        ASSERT_RTNL();
+
+       err = nlmsg_parse(nlh, sizeof(*tcm), tca, TCA_MAX, NULL);
+       if (err < 0)
+               return err;
+
        for_each_netdev(net, dev) {
                struct netdev_queue *dev_queue;
 
@@ -1512,13 +1529,14 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
                q_idx = 0;
 
                if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx,
-                                      true) < 0)
+                                      true, tca[TCA_DUMP_INVISIBLE]) < 0)
                        goto done;
 
                dev_queue = dev_ingress_queue(dev);
                if (dev_queue &&
                    tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
-                                      &q_idx, s_q_idx, false) < 0)
+                                      &q_idx, s_q_idx, false,
+                                      tca[TCA_DUMP_INVISIBLE]) < 0)
                        goto done;
 
 cont:
@@ -1762,7 +1780,7 @@ static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
 {
        struct qdisc_dump_args arg;
 
-       if (tc_qdisc_dump_ignore(q) ||
+       if (tc_qdisc_dump_ignore(q, false) ||
            *t_p < s_t || !q->ops->cl_ops ||
            (tcm->tcm_parent &&
             TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
index d6ca18dc04c3e9e72efedd44088e95118a06b711..cf93e5ff3d630e50442d65b5440883fb8467e6a0 100644 (file)
@@ -1161,6 +1161,8 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
                                      sch->handle);
        if (!q->link.q)
                q->link.q = &noop_qdisc;
+       else
+               qdisc_hash_add(q->link.q, true);
 
        q->link.priority = TC_CBQ_MAXPRIO - 1;
        q->link.priority2 = TC_CBQ_MAXPRIO - 1;
@@ -1600,6 +1602,9 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
        cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid);
        if (!cl->q)
                cl->q = &noop_qdisc;
+       else
+               qdisc_hash_add(cl->q, true);
+
        cl->common.classid = classid;
        cl->tparent = parent;
        cl->qdisc = sch;
index 3b86a97bc67c3e953cb181eddcb5c0c16bf3b27f..03ce895d7ff51291ac30291d510686e168aad8e8 100644 (file)
@@ -58,7 +58,6 @@ struct choke_sched_data {
 
 /* Variables */
        struct red_vars  vars;
-       struct tcf_proto __rcu *filter_list;
        struct {
                u32     prob_drop;      /* Early probability drops */
                u32     prob_mark;      /* Early probability marks */
@@ -152,11 +151,6 @@ static inline void choke_set_classid(struct sk_buff *skb, u16 classid)
        choke_skb_cb(skb)->classid = classid;
 }
 
-static u16 choke_get_classid(const struct sk_buff *skb)
-{
-       return choke_skb_cb(skb)->classid;
-}
-
 /*
  * Compare flow of two packets
  *  Returns true only if source and destination address and port match.
@@ -187,40 +181,6 @@ static bool choke_match_flow(struct sk_buff *skb1,
                       sizeof(choke_skb_cb(skb1)->keys));
 }
 
-/*
- * Classify flow using either:
- *  1. pre-existing classification result in skb
- *  2. fast internal classification
- *  3. use TC filter based classification
- */
-static bool choke_classify(struct sk_buff *skb,
-                          struct Qdisc *sch, int *qerr)
-
-{
-       struct choke_sched_data *q = qdisc_priv(sch);
-       struct tcf_result res;
-       struct tcf_proto *fl;
-       int result;
-
-       fl = rcu_dereference_bh(q->filter_list);
-       result = tc_classify(skb, fl, &res, false);
-       if (result >= 0) {
-#ifdef CONFIG_NET_CLS_ACT
-               switch (result) {
-               case TC_ACT_STOLEN:
-               case TC_ACT_QUEUED:
-                       *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
-               case TC_ACT_SHOT:
-                       return false;
-               }
-#endif
-               choke_set_classid(skb, TC_H_MIN(res.classid));
-               return true;
-       }
-
-       return false;
-}
-
 /*
  * Select a packet at random from queue
  * HACK: since queue can have holes from previous deletion; retry several
@@ -257,9 +217,6 @@ static bool choke_match_random(const struct choke_sched_data *q,
                return false;
 
        oskb = choke_peek_random(q, pidx);
-       if (rcu_access_pointer(q->filter_list))
-               return choke_get_classid(nskb) == choke_get_classid(oskb);
-
        return choke_match_flow(oskb, nskb);
 }
 
@@ -270,12 +227,6 @@ static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch,
        struct choke_sched_data *q = qdisc_priv(sch);
        const struct red_parms *p = &q->parms;
 
-       if (rcu_access_pointer(q->filter_list)) {
-               /* If using external classifiers, get result and record it. */
-               if (!choke_classify(skb, sch, &ret))
-                       goto other_drop;        /* Packet was eaten by filter */
-       }
-
        choke_skb_cb(skb)->keys_valid = 0;
        /* Compute average queue usage (see RED) */
        q->vars.qavg = red_calc_qavg(p, &q->vars, sch->q.qlen);
@@ -340,7 +291,6 @@ congestion_drop:
        qdisc_drop(skb, sch, to_free);
        return NET_XMIT_CN;
 
-other_drop:
        if (ret & __NET_XMIT_BYPASS)
                qdisc_qstats_drop(sch);
        __qdisc_drop(skb, to_free);
@@ -538,7 +488,6 @@ static void choke_destroy(struct Qdisc *sch)
 {
        struct choke_sched_data *q = qdisc_priv(sch);
 
-       tcf_destroy_chain(&q->filter_list);
        choke_free(q->tab);
 }
 
index bb4cbdf7500482b170eef6e7923cf2f2259e52b5..9fe67e257dfa8a52b38142a9269fe363616e1187 100644 (file)
@@ -117,6 +117,8 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
                                               &pfifo_qdisc_ops, classid);
        if (cl->qdisc == NULL)
                cl->qdisc = &noop_qdisc;
+       else
+               qdisc_hash_add(cl->qdisc, true);
 
        if (tca[TCA_RATE]) {
                err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est,
index 802ac7c2e5e87eed1341ba4c09d3e5d70bc75876..cfa1f2cdbaf73120bdb98abd109c4c3ff7723ab4 100644 (file)
@@ -201,9 +201,13 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch,
        pr_debug("%s(skb %p,sch %p,[qdisc %p])\n", __func__, skb, sch, p);
 
        if (p->set_tc_index) {
+               int wlen = skb_network_offset(skb);
+
                switch (tc_skb_protocol(skb)) {
                case htons(ETH_P_IP):
-                       if (skb_cow_head(skb, sizeof(struct iphdr)))
+                       wlen += sizeof(struct iphdr);
+                       if (!pskb_may_pull(skb, wlen) ||
+                           skb_try_make_writable(skb, wlen))
                                goto drop;
 
                        skb->tc_index = ipv4_get_dsfield(ip_hdr(skb))
@@ -211,7 +215,9 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch,
                        break;
 
                case htons(ETH_P_IPV6):
-                       if (skb_cow_head(skb, sizeof(struct ipv6hdr)))
+                       wlen += sizeof(struct ipv6hdr);
+                       if (!pskb_may_pull(skb, wlen) ||
+                           skb_try_make_writable(skb, wlen))
                                goto drop;
 
                        skb->tc_index = ipv6_get_dsfield(ipv6_hdr(skb))
@@ -368,6 +374,8 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt)
        p->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, sch->handle);
        if (p->q == NULL)
                p->q = &noop_qdisc;
+       else
+               qdisc_hash_add(p->q, true);
 
        pr_debug("%s: qdisc %p\n", __func__, p->q);
 
index 9f3a884d15903fd9012c01b5eee802e02f9f709e..097bbe9857a55f9ce3b41d07a834c197639469d1 100644 (file)
@@ -288,7 +288,6 @@ static struct sk_buff *fq_codel_dequeue(struct Qdisc *sch)
        struct fq_codel_flow *flow;
        struct list_head *head;
        u32 prev_drop_count, prev_ecn_mark;
-       unsigned int prev_backlog;
 
 begin:
        head = &q->new_flows;
@@ -307,7 +306,6 @@ begin:
 
        prev_drop_count = q->cstats.drop_count;
        prev_ecn_mark = q->cstats.ecn_mark;
-       prev_backlog = sch->qstats.backlog;
 
        skb = codel_dequeue(sch, &sch->qstats.backlog, &q->cparams,
                            &flow->cvars, &q->cstats, qdisc_pkt_len,
index b052b27a984e39c244c94132f1162a7033e5cc63..3e64d23e098cff218eea7ea0371302a535e6935c 100644 (file)
@@ -795,7 +795,7 @@ static void attach_default_qdiscs(struct net_device *dev)
        }
 #ifdef CONFIG_NET_SCHED
        if (dev->qdisc)
-               qdisc_hash_add(dev->qdisc);
+               qdisc_hash_add(dev->qdisc, false);
 #endif
 }
 
index 3ffaa6fb0990f0aa31487a2f1829b1f1accf8b21..0198c6cdda4973a0e4d9ac96e1c10c242d0954e9 100644 (file)
@@ -1066,6 +1066,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
                                      &pfifo_qdisc_ops, classid);
        if (cl->qdisc == NULL)
                cl->qdisc = &noop_qdisc;
+       else
+               qdisc_hash_add(cl->qdisc, true);
        INIT_LIST_HEAD(&cl->children);
        cl->vt_tree = RB_ROOT;
        cl->cf_tree = RB_ROOT;
@@ -1425,6 +1427,8 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
                                          sch->handle);
        if (q->root.qdisc == NULL)
                q->root.qdisc = &noop_qdisc;
+       else
+               qdisc_hash_add(q->root.qdisc, true);
        INIT_LIST_HEAD(&q->root.children);
        q->root.vt_tree = RB_ROOT;
        q->root.cf_tree = RB_ROOT;
index 4cd5fb134bc9e2dbcdd61b51fb951f94301ed54c..95867033542ec4c889e3c1e7ebd266700aafbef7 100644 (file)
@@ -1460,6 +1460,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
                qdisc_class_hash_insert(&q->clhash, &cl->common);
                if (parent)
                        parent->children++;
+               if (cl->un.leaf.q != &noop_qdisc)
+                       qdisc_hash_add(cl->un.leaf.q, true);
        } else {
                if (tca[TCA_RATE]) {
                        err = gen_replace_estimator(&cl->bstats, NULL,
index 20b7f1646f69270e08d8b7588759a0146f262e89..cadfdd4f1e521b3d68b8fa62d5797f3ff604651d 100644 (file)
@@ -84,7 +84,7 @@ static void mq_attach(struct Qdisc *sch)
                        qdisc_destroy(old);
 #ifdef CONFIG_NET_SCHED
                if (ntx < dev->real_num_tx_queues)
-                       qdisc_hash_add(qdisc);
+                       qdisc_hash_add(qdisc, false);
 #endif
 
        }
index 922683418e53853cb71747d8d30ab0e4a989254b..0a4cf27ea54bd78768d4fa084f7b082460f5f266 100644 (file)
 
 struct mqprio_sched {
        struct Qdisc            **qdiscs;
-       int hw_owned;
+       int hw_offload;
 };
 
 static void mqprio_destroy(struct Qdisc *sch)
 {
        struct net_device *dev = qdisc_dev(sch);
        struct mqprio_sched *priv = qdisc_priv(sch);
-       struct tc_to_netdev tc = {.type = TC_SETUP_MQPRIO};
        unsigned int ntx;
 
        if (priv->qdiscs) {
@@ -39,10 +38,15 @@ static void mqprio_destroy(struct Qdisc *sch)
                kfree(priv->qdiscs);
        }
 
-       if (priv->hw_owned && dev->netdev_ops->ndo_setup_tc)
+       if (priv->hw_offload && dev->netdev_ops->ndo_setup_tc) {
+               struct tc_mqprio_qopt offload = { 0 };
+               struct tc_to_netdev tc = { .type = TC_SETUP_MQPRIO,
+                                          { .mqprio = &offload } };
+
                dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, &tc);
-       else
+       } else {
                netdev_set_num_tc(dev, 0);
+       }
 }
 
 static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt)
@@ -59,15 +63,20 @@ static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt)
                        return -EINVAL;
        }
 
-       /* net_device does not support requested operation */
-       if (qopt->hw && !dev->netdev_ops->ndo_setup_tc)
-               return -EINVAL;
+       /* Limit qopt->hw to maximum supported offload value.  Drivers have
+        * the option of overriding this later if they don't support the a
+        * given offload type.
+        */
+       if (qopt->hw > TC_MQPRIO_HW_OFFLOAD_MAX)
+               qopt->hw = TC_MQPRIO_HW_OFFLOAD_MAX;
 
-       /* if hw owned qcount and qoffset are taken from LLD so
-        * no reason to verify them here
+       /* If hardware offload is requested we will leave it to the device
+        * to either populate the queue counts itself or to validate the
+        * provided queue counts.  If ndo_setup_tc is not present then
+        * hardware doesn't support offload and we should return an error.
         */
        if (qopt->hw)
-               return 0;
+               return dev->netdev_ops->ndo_setup_tc ? 0 : -EINVAL;
 
        for (i = 0; i < qopt->num_tc; i++) {
                unsigned int last = qopt->offset[i] + qopt->count[i];
@@ -139,13 +148,15 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
         * supplied and verified mapping
         */
        if (qopt->hw) {
-               struct tc_to_netdev tc = {.type = TC_SETUP_MQPRIO,
-                                         { .tc = qopt->num_tc }};
+               struct tc_mqprio_qopt offload = *qopt;
+               struct tc_to_netdev tc = { .type = TC_SETUP_MQPRIO,
+                                          { .mqprio = &offload } };
 
-               priv->hw_owned = 1;
                err = dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, &tc);
                if (err)
                        return err;
+
+               priv->hw_offload = offload.hw;
        } else {
                netdev_set_num_tc(dev, qopt->num_tc);
                for (i = 0; i < qopt->num_tc; i++)
@@ -175,7 +186,7 @@ static void mqprio_attach(struct Qdisc *sch)
                if (old)
                        qdisc_destroy(old);
                if (ntx < dev->real_num_tx_queues)
-                       qdisc_hash_add(qdisc);
+                       qdisc_hash_add(qdisc, false);
        }
        kfree(priv->qdiscs);
        priv->qdiscs = NULL;
@@ -243,7 +254,7 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
 
        opt.num_tc = netdev_get_num_tc(dev);
        memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map));
-       opt.hw = priv->hw_owned;
+       opt.hw = priv->hw_offload;
 
        for (i = 0; i < netdev_get_num_tc(dev); i++) {
                opt.count[i] = dev->tc_to_txq[i].count;
index e7839a0d0eaa52572f675fdb1dfc590c2a70ac76..43a3a10b3c8118fc2e0deff98be2635d2ad81330 100644 (file)
@@ -217,6 +217,8 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
                                sch_tree_lock(sch);
                                old = q->queues[i];
                                q->queues[i] = child;
+                               if (child != &noop_qdisc)
+                                       qdisc_hash_add(child, true);
 
                                if (old != &noop_qdisc) {
                                        qdisc_tree_reduce_backlog(old,
index c8bb62a1e7449344a0fd81241fe0102ea2f9c0f9..94b4928ad4134b25e0a28baf1aaa4879b18f56c0 100644 (file)
@@ -462,7 +462,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
        /* If a delay is expected, orphan the skb. (orphaning usually takes
         * place at TX completion time, so _before_ the link transit delay)
         */
-       if (q->latency || q->jitter)
+       if (q->latency || q->jitter || q->rate)
                skb_orphan_partial(skb);
 
        /*
@@ -530,21 +530,31 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
                now = psched_get_time();
 
                if (q->rate) {
-                       struct sk_buff *last;
+                       struct netem_skb_cb *last = NULL;
+
+                       if (sch->q.tail)
+                               last = netem_skb_cb(sch->q.tail);
+                       if (q->t_root.rb_node) {
+                               struct sk_buff *t_skb;
+                               struct netem_skb_cb *t_last;
+
+                               t_skb = netem_rb_to_skb(rb_last(&q->t_root));
+                               t_last = netem_skb_cb(t_skb);
+                               if (!last ||
+                                   t_last->time_to_send > last->time_to_send) {
+                                       last = t_last;
+                               }
+                       }
 
-                       if (sch->q.qlen)
-                               last = sch->q.tail;
-                       else
-                               last = netem_rb_to_skb(rb_last(&q->t_root));
                        if (last) {
                                /*
                                 * Last packet in queue is reference point (now),
                                 * calculate this time bonus and subtract
                                 * from delay.
                                 */
-                               delay -= netem_skb_cb(last)->time_to_send - now;
+                               delay -= last->time_to_send - now;
                                delay = max_t(psched_tdiff_t, 0, delay);
-                               now = netem_skb_cb(last)->time_to_send;
+                               now = last->time_to_send;
                        }
 
                        delay += packet_len_2_sched_time(qdisc_pkt_len(skb), q);
index d4d7db267b6edfa56582ca4a588590e0ded9fe66..92c2e6d448d7984af35d6beb2cb3aea717b76511 100644 (file)
@@ -192,8 +192,11 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
                qdisc_destroy(child);
        }
 
-       for (i = oldbands; i < q->bands; i++)
+       for (i = oldbands; i < q->bands; i++) {
                q->queues[i] = queues[i];
+               if (q->queues[i] != &noop_qdisc)
+                       qdisc_hash_add(q->queues[i], true);
+       }
 
        sch_tree_unlock(sch);
        return 0;
index f9e712ce2d15ce9280c31d2f75d62b84034ae51d..6c85f3e9239bbc2b127ca7b7e61826de3b57873c 100644 (file)
@@ -494,6 +494,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
                        goto destroy_class;
        }
 
+       if (cl->qdisc != &noop_qdisc)
+               qdisc_hash_add(cl->qdisc, true);
        sch_tree_lock(sch);
        qdisc_class_hash_insert(&q->clhash, &cl->common);
        sch_tree_unlock(sch);
index 249b2a18acbd99288eb0a2579a0f29c2ab0b3ded..799ea6dd69b266ccb25d52abab68116e3508b3cb 100644 (file)
@@ -191,6 +191,8 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
                        return PTR_ERR(child);
        }
 
+       if (child != &noop_qdisc)
+               qdisc_hash_add(child, true);
        sch_tree_lock(sch);
        q->flags = ctl->flags;
        q->limit = ctl->limit;
index fe6963d2151956c508b510edec680b89201173ce..ae862f172c944283be1cbb56f971cf821cd12bf8 100644 (file)
@@ -513,6 +513,8 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt)
        if (IS_ERR(child))
                return PTR_ERR(child);
 
+       if (child != &noop_qdisc)
+               qdisc_hash_add(child, true);
        sch_tree_lock(sch);
 
        qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
index 42e8c8615e6563a2deabbb3c3437e3985d01ae14..b00e02c139de8d7c0b66ec6ee0d8b6c677529609 100644 (file)
@@ -714,9 +714,8 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
        struct sfq_sched_data *q = qdisc_priv(sch);
        int i;
 
-       q->perturb_timer.function = sfq_perturbation;
-       q->perturb_timer.data = (unsigned long)sch;
-       init_timer_deferrable(&q->perturb_timer);
+       setup_deferrable_timer(&q->perturb_timer, sfq_perturbation,
+                              (unsigned long)sch);
 
        for (i = 0; i < SFQ_MAX_DEPTH + 1; i++) {
                q->dep[i].next = i + SFQ_MAX_FLOWS;
index 303355c449ab336227d9b115496e0882f2f2a079..9850126129a378d46105d0535fde46e0a7f1a9c4 100644 (file)
@@ -396,6 +396,8 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
                                          q->qdisc->qstats.backlog);
                qdisc_destroy(q->qdisc);
                q->qdisc = child;
+               if (child != &noop_qdisc)
+                       qdisc_hash_add(child, true);
        }
        q->limit = qopt->limit;
        if (tb[TCA_TBF_PBURST])
index 2a6835b4562b61cff52425a530524f1c48bc7919..0439a1a6836784fd5096d85a4217980cb5c49690 100644 (file)
@@ -71,9 +71,8 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
 {
        struct net *net = sock_net(sk);
        struct sctp_sock *sp;
-       int i;
        sctp_paramhdr_t *p;
-       int err;
+       int i;
 
        /* Retrieve the SCTP per socket area.  */
        sp = sctp_sk((struct sock *)sk);
@@ -264,8 +263,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
 
        /* AUTH related initializations */
        INIT_LIST_HEAD(&asoc->endpoint_shared_keys);
-       err = sctp_auth_asoc_copy_shkeys(ep, asoc, gfp);
-       if (err)
+       if (sctp_auth_asoc_copy_shkeys(ep, asoc, gfp))
                goto fail_init;
 
        asoc->active_key_id = ep->active_key_id;
index e3621cb4827fadb5f5cb41ebe8455dfa3300a765..697721a7a3f1761373aa66b847bd744ea1b42d10 100644 (file)
@@ -306,14 +306,24 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk)
 
        if (SCTP_PR_TTL_ENABLED(chunk->sinfo.sinfo_flags) &&
            time_after(jiffies, chunk->msg->expires_at)) {
-               if (chunk->sent_count)
+               struct sctp_stream_out *streamout =
+                       &chunk->asoc->stream->out[chunk->sinfo.sinfo_stream];
+
+               if (chunk->sent_count) {
                        chunk->asoc->abandoned_sent[SCTP_PR_INDEX(TTL)]++;
-               else
+                       streamout->abandoned_sent[SCTP_PR_INDEX(TTL)]++;
+               } else {
                        chunk->asoc->abandoned_unsent[SCTP_PR_INDEX(TTL)]++;
+                       streamout->abandoned_unsent[SCTP_PR_INDEX(TTL)]++;
+               }
                return 1;
        } else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) &&
                   chunk->sent_count > chunk->sinfo.sinfo_timetolive) {
+               struct sctp_stream_out *streamout =
+                       &chunk->asoc->stream->out[chunk->sinfo.sinfo_stream];
+
                chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
+               streamout->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
                return 1;
        } else if (!SCTP_PR_POLICY(chunk->sinfo.sinfo_flags) &&
                   chunk->msg->expires_at &&
index 71ce6b945dcb54d831425bdb02e315a14dae69ef..1224421036b3e59c4dba1dd5d672923b55c7923c 100644 (file)
@@ -546,7 +546,6 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
        struct sctp_association *asoc = tp->asoc;
        struct sctp_chunk *chunk, *tmp;
        int pkt_count, gso = 0;
-       int confirm;
        struct dst_entry *dst;
        struct sk_buff *head;
        struct sctphdr *sh;
@@ -625,13 +624,13 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
                        asoc->peer.last_sent_to = tp;
        }
        head->ignore_df = packet->ipfragok;
-       confirm = tp->dst_pending_confirm;
-       if (confirm)
+       if (tp->dst_pending_confirm)
                skb_set_dst_pending_confirm(head, 1);
        /* neighbour should be confirmed on successful transmission or
         * positive error
         */
-       if (tp->af_specific->sctp_xmit(head, tp) >= 0 && confirm)
+       if (tp->af_specific->sctp_xmit(head, tp) >= 0 &&
+           tp->dst_pending_confirm)
                tp->dst_pending_confirm = 0;
 
 out:
index db352e5d61f8980dc461a162959643d872997217..3f78d7f06e14780f2bfe88e18a73e658b0e307e1 100644 (file)
@@ -353,6 +353,8 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc,
        struct sctp_chunk *chk, *temp;
 
        list_for_each_entry_safe(chk, temp, queue, transmitted_list) {
+               struct sctp_stream_out *streamout;
+
                if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
                    chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive)
                        continue;
@@ -361,8 +363,10 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc,
                sctp_insert_list(&asoc->outqueue.abandoned,
                                 &chk->transmitted_list);
 
+               streamout = &asoc->stream->out[chk->sinfo.sinfo_stream];
                asoc->sent_cnt_removable--;
                asoc->abandoned_sent[SCTP_PR_INDEX(PRIO)]++;
+               streamout->abandoned_sent[SCTP_PR_INDEX(PRIO)]++;
 
                if (!chk->tsn_gap_acked) {
                        if (chk->transport)
@@ -382,19 +386,26 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc,
 }
 
 static int sctp_prsctp_prune_unsent(struct sctp_association *asoc,
-                                   struct sctp_sndrcvinfo *sinfo,
-                                   struct list_head *queue, int msg_len)
+                                   struct sctp_sndrcvinfo *sinfo, int msg_len)
 {
+       struct sctp_outq *q = &asoc->outqueue;
        struct sctp_chunk *chk, *temp;
 
-       list_for_each_entry_safe(chk, temp, queue, list) {
+       list_for_each_entry_safe(chk, temp, &q->out_chunk_list, list) {
                if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
                    chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive)
                        continue;
 
                list_del_init(&chk->list);
+               q->out_qlen -= chk->skb->len;
                asoc->sent_cnt_removable--;
                asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
+               if (chk->sinfo.sinfo_stream < asoc->stream->outcnt) {
+                       struct sctp_stream_out *streamout =
+                               &asoc->stream->out[chk->sinfo.sinfo_stream];
+
+                       streamout->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
+               }
 
                msg_len -= SCTP_DATA_SNDSIZE(chk) +
                           sizeof(struct sk_buff) +
@@ -431,9 +442,7 @@ void sctp_prsctp_prune(struct sctp_association *asoc,
                        return;
        }
 
-       sctp_prsctp_prune_unsent(asoc, sinfo,
-                                &asoc->outqueue.out_chunk_list,
-                                msg_len);
+       sctp_prsctp_prune_unsent(asoc, sinfo, msg_len);
 }
 
 /* Mark all the eligible packets on a transport for retransmission.  */
index e03bb1aab4d095b65259c33f4fba6990e90f586b..ab1374fa5ab084f8ac88699d059c6438daabcccf 100644 (file)
@@ -3872,9 +3872,18 @@ sctp_disposition_t sctp_sf_do_reconf(struct net *net,
                else if (param.p->type == SCTP_PARAM_RESET_IN_REQUEST)
                        reply = sctp_process_strreset_inreq(
                                (struct sctp_association *)asoc, param, &ev);
-               /* More handles for other types will be added here, by now it
-                * just ignores other types.
-                */
+               else if (param.p->type == SCTP_PARAM_RESET_TSN_REQUEST)
+                       reply = sctp_process_strreset_tsnreq(
+                               (struct sctp_association *)asoc, param, &ev);
+               else if (param.p->type == SCTP_PARAM_RESET_ADD_OUT_STREAMS)
+                       reply = sctp_process_strreset_addstrm_out(
+                               (struct sctp_association *)asoc, param, &ev);
+               else if (param.p->type == SCTP_PARAM_RESET_ADD_IN_STREAMS)
+                       reply = sctp_process_strreset_addstrm_in(
+                               (struct sctp_association *)asoc, param, &ev);
+               else if (param.p->type == SCTP_PARAM_RESET_RESPONSE)
+                       reply = sctp_process_strreset_resp(
+                               (struct sctp_association *)asoc, param, &ev);
 
                if (ev)
                        sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
index 0f378ea2ae38828d75dc215abdbc258e75cec431..6489446925e60bec6e31b1dab277b0f1316d7a6f 100644 (file)
@@ -3758,6 +3758,39 @@ out:
        return retval;
 }
 
+static int sctp_setsockopt_reconfig_supported(struct sock *sk,
+                                             char __user *optval,
+                                             unsigned int optlen)
+{
+       struct sctp_assoc_value params;
+       struct sctp_association *asoc;
+       int retval = -EINVAL;
+
+       if (optlen != sizeof(params))
+               goto out;
+
+       if (copy_from_user(&params, optval, optlen)) {
+               retval = -EFAULT;
+               goto out;
+       }
+
+       asoc = sctp_id2assoc(sk, params.assoc_id);
+       if (asoc) {
+               asoc->reconf_enable = !!params.assoc_value;
+       } else if (!params.assoc_id) {
+               struct sctp_sock *sp = sctp_sk(sk);
+
+               sp->ep->reconf_enable = !!params.assoc_value;
+       } else {
+               goto out;
+       }
+
+       retval = 0;
+
+out:
+       return retval;
+}
+
 static int sctp_setsockopt_enable_strreset(struct sock *sk,
                                           char __user *optval,
                                           unsigned int optlen)
@@ -4038,6 +4071,9 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname,
        case SCTP_DEFAULT_PRINFO:
                retval = sctp_setsockopt_default_prinfo(sk, optval, optlen);
                break;
+       case SCTP_RECONFIG_SUPPORTED:
+               retval = sctp_setsockopt_reconfig_supported(sk, optval, optlen);
+               break;
        case SCTP_ENABLE_STREAM_RESET:
                retval = sctp_setsockopt_enable_strreset(sk, optval, optlen);
                break;
@@ -6540,6 +6576,102 @@ out:
        return retval;
 }
 
+static int sctp_getsockopt_pr_streamstatus(struct sock *sk, int len,
+                                          char __user *optval,
+                                          int __user *optlen)
+{
+       struct sctp_stream_out *streamout;
+       struct sctp_association *asoc;
+       struct sctp_prstatus params;
+       int retval = -EINVAL;
+       int policy;
+
+       if (len < sizeof(params))
+               goto out;
+
+       len = sizeof(params);
+       if (copy_from_user(&params, optval, len)) {
+               retval = -EFAULT;
+               goto out;
+       }
+
+       policy = params.sprstat_policy;
+       if (policy & ~SCTP_PR_SCTP_MASK)
+               goto out;
+
+       asoc = sctp_id2assoc(sk, params.sprstat_assoc_id);
+       if (!asoc || params.sprstat_sid >= asoc->stream->outcnt)
+               goto out;
+
+       streamout = &asoc->stream->out[params.sprstat_sid];
+       if (policy == SCTP_PR_SCTP_NONE) {
+               params.sprstat_abandoned_unsent = 0;
+               params.sprstat_abandoned_sent = 0;
+               for (policy = 0; policy <= SCTP_PR_INDEX(MAX); policy++) {
+                       params.sprstat_abandoned_unsent +=
+                               streamout->abandoned_unsent[policy];
+                       params.sprstat_abandoned_sent +=
+                               streamout->abandoned_sent[policy];
+               }
+       } else {
+               params.sprstat_abandoned_unsent =
+                       streamout->abandoned_unsent[__SCTP_PR_INDEX(policy)];
+               params.sprstat_abandoned_sent =
+                       streamout->abandoned_sent[__SCTP_PR_INDEX(policy)];
+       }
+
+       if (put_user(len, optlen) || copy_to_user(optval, &params, len)) {
+               retval = -EFAULT;
+               goto out;
+       }
+
+       retval = 0;
+
+out:
+       return retval;
+}
+
+static int sctp_getsockopt_reconfig_supported(struct sock *sk, int len,
+                                             char __user *optval,
+                                             int __user *optlen)
+{
+       struct sctp_assoc_value params;
+       struct sctp_association *asoc;
+       int retval = -EFAULT;
+
+       if (len < sizeof(params)) {
+               retval = -EINVAL;
+               goto out;
+       }
+
+       len = sizeof(params);
+       if (copy_from_user(&params, optval, len))
+               goto out;
+
+       asoc = sctp_id2assoc(sk, params.assoc_id);
+       if (asoc) {
+               params.assoc_value = asoc->reconf_enable;
+       } else if (!params.assoc_id) {
+               struct sctp_sock *sp = sctp_sk(sk);
+
+               params.assoc_value = sp->ep->reconf_enable;
+       } else {
+               retval = -EINVAL;
+               goto out;
+       }
+
+       if (put_user(len, optlen))
+               goto out;
+
+       if (copy_to_user(optval, &params, len))
+               goto out;
+
+       retval = 0;
+
+out:
+       return retval;
+}
+
 static int sctp_getsockopt_enable_strreset(struct sock *sk, int len,
                                           char __user *optval,
                                           int __user *optlen)
@@ -6748,6 +6880,14 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
                retval = sctp_getsockopt_pr_assocstatus(sk, len, optval,
                                                        optlen);
                break;
+       case SCTP_PR_STREAM_STATUS:
+               retval = sctp_getsockopt_pr_streamstatus(sk, len, optval,
+                                                        optlen);
+               break;
+       case SCTP_RECONFIG_SUPPORTED:
+               retval = sctp_getsockopt_reconfig_supported(sk, len, optval,
+                                                           optlen);
+               break;
        case SCTP_ENABLE_STREAM_RESET:
                retval = sctp_getsockopt_enable_strreset(sk, len, optval,
                                                         optlen);
@@ -7437,9 +7577,12 @@ struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags,
                if (sk->sk_shutdown & RCV_SHUTDOWN)
                        break;
 
-               if (sk_can_busy_loop(sk) &&
-                   sk_busy_loop(sk, noblock))
-                       continue;
+               if (sk_can_busy_loop(sk)) {
+                       sk_busy_loop(sk, noblock);
+
+                       if (!skb_queue_empty(&sk->sk_receive_queue))
+                               continue;
+               }
 
                /* User doesn't want to wait.  */
                error = -EAGAIN;
index 1c6cc04fa3a41f7266597f9cd80420c228094a2b..961d0a1e99d1bcd7a0af5903703a447706ede4d9 100644 (file)
@@ -267,18 +267,6 @@ int sctp_send_add_streams(struct sctp_association *asoc,
                stream->out = streamout;
        }
 
-       if (in) {
-               struct sctp_stream_in *streamin;
-
-               streamin = krealloc(stream->in, incnt * sizeof(*streamin),
-                                   GFP_KERNEL);
-               if (!streamin)
-                       goto out;
-
-               memset(streamin + stream->incnt, 0, in * sizeof(*streamin));
-               stream->in = streamin;
-       }
-
        chunk = sctp_make_strreset_addstrm(asoc, out, in);
        if (!chunk)
                goto out;
@@ -303,13 +291,14 @@ out:
 }
 
 static sctp_paramhdr_t *sctp_chunk_lookup_strreset_param(
-                       struct sctp_association *asoc, __u32 resp_seq)
+                       struct sctp_association *asoc, __u32 resp_seq,
+                       __be16 type)
 {
        struct sctp_chunk *chunk = asoc->strreset_chunk;
        struct sctp_reconf_chunk *hdr;
        union sctp_params param;
 
-       if (ntohl(resp_seq) != asoc->strreset_outseq || !chunk)
+       if (!chunk)
                return NULL;
 
        hdr = (struct sctp_reconf_chunk *)chunk->chunk_hdr;
@@ -320,7 +309,8 @@ static sctp_paramhdr_t *sctp_chunk_lookup_strreset_param(
                 */
                struct sctp_strreset_tsnreq *req = param.v;
 
-               if (req->request_seq == resp_seq)
+               if ((!resp_seq || req->request_seq == resp_seq) &&
+                   (!type || type == req->param_hdr.type))
                        return param.v;
        }
 
@@ -361,13 +351,9 @@ struct sctp_chunk *sctp_process_strreset_outreq(
                goto out;
 
        if (asoc->strreset_chunk) {
-               sctp_paramhdr_t *param_hdr;
-               struct sctp_transport *t;
-
-               param_hdr = sctp_chunk_lookup_strreset_param(
-                                       asoc, outreq->response_seq);
-               if (!param_hdr || param_hdr->type !=
-                                       SCTP_PARAM_RESET_IN_REQUEST) {
+               if (!sctp_chunk_lookup_strreset_param(
+                               asoc, outreq->response_seq,
+                               SCTP_PARAM_RESET_IN_REQUEST)) {
                        /* same process with outstanding isn't 0 */
                        result = SCTP_STRRESET_ERR_IN_PROGRESS;
                        goto out;
@@ -377,6 +363,8 @@ struct sctp_chunk *sctp_process_strreset_outreq(
                asoc->strreset_outseq++;
 
                if (!asoc->strreset_outstanding) {
+                       struct sctp_transport *t;
+
                        t = asoc->strreset_chunk->transport;
                        if (del_timer(&t->reconf_timer))
                                sctp_transport_put(t);
@@ -477,3 +465,367 @@ out:
 
        return chunk;
 }
+
+struct sctp_chunk *sctp_process_strreset_tsnreq(
+                               struct sctp_association *asoc,
+                               union sctp_params param,
+                               struct sctp_ulpevent **evp)
+{
+       __u32 init_tsn = 0, next_tsn = 0, max_tsn_seen;
+       struct sctp_strreset_tsnreq *tsnreq = param.v;
+       struct sctp_stream *stream = asoc->stream;
+       __u32 result = SCTP_STRRESET_DENIED;
+       __u32 request_seq;
+       __u16 i;
+
+       request_seq = ntohl(tsnreq->request_seq);
+       if (request_seq > asoc->strreset_inseq) {
+               result = SCTP_STRRESET_ERR_BAD_SEQNO;
+               goto out;
+       } else if (request_seq == asoc->strreset_inseq) {
+               asoc->strreset_inseq++;
+       }
+
+       if (!(asoc->strreset_enable & SCTP_ENABLE_RESET_ASSOC_REQ))
+               goto out;
+
+       if (asoc->strreset_outstanding) {
+               result = SCTP_STRRESET_ERR_IN_PROGRESS;
+               goto out;
+       }
+
+       /* G3: The same processing as though a SACK chunk with no gap report
+        *     and a cumulative TSN ACK of the Sender's Next TSN minus 1 were
+        *     received MUST be performed.
+        */
+       max_tsn_seen = sctp_tsnmap_get_max_tsn_seen(&asoc->peer.tsn_map);
+       sctp_ulpq_reasm_flushtsn(&asoc->ulpq, max_tsn_seen);
+       sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC);
+
+       /* G1: Compute an appropriate value for the Receiver's Next TSN -- the
+        *     TSN that the peer should use to send the next DATA chunk.  The
+        *     value SHOULD be the smallest TSN not acknowledged by the
+        *     receiver of the request plus 2^31.
+        */
+       init_tsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map) + (1 << 31);
+       sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_INITIAL,
+                        init_tsn, GFP_ATOMIC);
+
+       /* G4: The same processing as though a FWD-TSN chunk (as defined in
+        *     [RFC3758]) with all streams affected and a new cumulative TSN
+        *     ACK of the Receiver's Next TSN minus 1 were received MUST be
+        *     performed.
+        */
+       sctp_outq_free(&asoc->outqueue);
+
+       /* G2: Compute an appropriate value for the local endpoint's next TSN,
+        *     i.e., the next TSN assigned by the receiver of the SSN/TSN reset
+        *     chunk.  The value SHOULD be the highest TSN sent by the receiver
+        *     of the request plus 1.
+        */
+       next_tsn = asoc->next_tsn;
+       asoc->ctsn_ack_point = next_tsn - 1;
+       asoc->adv_peer_ack_point = asoc->ctsn_ack_point;
+
+       /* G5:  The next expected and outgoing SSNs MUST be reset to 0 for all
+        *      incoming and outgoing streams.
+        */
+       for (i = 0; i < stream->outcnt; i++)
+               stream->out[i].ssn = 0;
+       for (i = 0; i < stream->incnt; i++)
+               stream->in[i].ssn = 0;
+
+       result = SCTP_STRRESET_PERFORMED;
+
+       *evp = sctp_ulpevent_make_assoc_reset_event(asoc, 0, init_tsn,
+                                                   next_tsn, GFP_ATOMIC);
+
+out:
+       return sctp_make_strreset_tsnresp(asoc, result, request_seq,
+                                         next_tsn, init_tsn);
+}
+
+struct sctp_chunk *sctp_process_strreset_addstrm_out(
+                               struct sctp_association *asoc,
+                               union sctp_params param,
+                               struct sctp_ulpevent **evp)
+{
+       struct sctp_strreset_addstrm *addstrm = param.v;
+       struct sctp_stream *stream = asoc->stream;
+       __u32 result = SCTP_STRRESET_DENIED;
+       struct sctp_stream_in *streamin;
+       __u32 request_seq, incnt;
+       __u16 in;
+
+       request_seq = ntohl(addstrm->request_seq);
+       if (request_seq > asoc->strreset_inseq) {
+               result = SCTP_STRRESET_ERR_BAD_SEQNO;
+               goto out;
+       } else if (request_seq == asoc->strreset_inseq) {
+               asoc->strreset_inseq++;
+       }
+
+       if (!(asoc->strreset_enable & SCTP_ENABLE_CHANGE_ASSOC_REQ))
+               goto out;
+
+       if (asoc->strreset_chunk) {
+               if (!sctp_chunk_lookup_strreset_param(
+                       asoc, 0, SCTP_PARAM_RESET_ADD_IN_STREAMS)) {
+                       /* same process with outstanding isn't 0 */
+                       result = SCTP_STRRESET_ERR_IN_PROGRESS;
+                       goto out;
+               }
+
+               asoc->strreset_outstanding--;
+               asoc->strreset_outseq++;
+
+               if (!asoc->strreset_outstanding) {
+                       struct sctp_transport *t;
+
+                       t = asoc->strreset_chunk->transport;
+                       if (del_timer(&t->reconf_timer))
+                               sctp_transport_put(t);
+
+                       sctp_chunk_put(asoc->strreset_chunk);
+                       asoc->strreset_chunk = NULL;
+               }
+       }
+
+       in = ntohs(addstrm->number_of_streams);
+       incnt = stream->incnt + in;
+       if (!in || incnt > SCTP_MAX_STREAM)
+               goto out;
+
+       streamin = krealloc(stream->in, incnt * sizeof(*streamin),
+                           GFP_ATOMIC);
+       if (!streamin)
+               goto out;
+
+       memset(streamin + stream->incnt, 0, in * sizeof(*streamin));
+       stream->in = streamin;
+       stream->incnt = incnt;
+
+       result = SCTP_STRRESET_PERFORMED;
+
+       *evp = sctp_ulpevent_make_stream_change_event(asoc,
+               0, ntohs(addstrm->number_of_streams), 0, GFP_ATOMIC);
+
+out:
+       return sctp_make_strreset_resp(asoc, result, request_seq);
+}
+
+struct sctp_chunk *sctp_process_strreset_addstrm_in(
+                               struct sctp_association *asoc,
+                               union sctp_params param,
+                               struct sctp_ulpevent **evp)
+{
+       struct sctp_strreset_addstrm *addstrm = param.v;
+       struct sctp_stream *stream = asoc->stream;
+       __u32 result = SCTP_STRRESET_DENIED;
+       struct sctp_stream_out *streamout;
+       struct sctp_chunk *chunk = NULL;
+       __u32 request_seq, outcnt;
+       __u16 out;
+
+       request_seq = ntohl(addstrm->request_seq);
+       if (request_seq > asoc->strreset_inseq) {
+               result = SCTP_STRRESET_ERR_BAD_SEQNO;
+               goto out;
+       } else if (request_seq == asoc->strreset_inseq) {
+               asoc->strreset_inseq++;
+       }
+
+       if (!(asoc->strreset_enable & SCTP_ENABLE_CHANGE_ASSOC_REQ))
+               goto out;
+
+       if (asoc->strreset_outstanding) {
+               result = SCTP_STRRESET_ERR_IN_PROGRESS;
+               goto out;
+       }
+
+       out = ntohs(addstrm->number_of_streams);
+       outcnt = stream->outcnt + out;
+       if (!out || outcnt > SCTP_MAX_STREAM)
+               goto out;
+
+       streamout = krealloc(stream->out, outcnt * sizeof(*streamout),
+                            GFP_ATOMIC);
+       if (!streamout)
+               goto out;
+
+       memset(streamout + stream->outcnt, 0, out * sizeof(*streamout));
+       stream->out = streamout;
+
+       chunk = sctp_make_strreset_addstrm(asoc, out, 0);
+       if (!chunk)
+               goto out;
+
+       asoc->strreset_chunk = chunk;
+       asoc->strreset_outstanding = 1;
+       sctp_chunk_hold(asoc->strreset_chunk);
+
+       stream->outcnt = outcnt;
+
+       *evp = sctp_ulpevent_make_stream_change_event(asoc,
+               0, 0, ntohs(addstrm->number_of_streams), GFP_ATOMIC);
+
+out:
+       if (!chunk)
+               chunk = sctp_make_strreset_resp(asoc, result, request_seq);
+
+       return chunk;
+}
+
+struct sctp_chunk *sctp_process_strreset_resp(
+                               struct sctp_association *asoc,
+                               union sctp_params param,
+                               struct sctp_ulpevent **evp)
+{
+       struct sctp_strreset_resp *resp = param.v;
+       struct sctp_stream *stream = asoc->stream;
+       struct sctp_transport *t;
+       __u16 i, nums, flags = 0;
+       sctp_paramhdr_t *req;
+       __u32 result;
+
+       req = sctp_chunk_lookup_strreset_param(asoc, resp->response_seq, 0);
+       if (!req)
+               return NULL;
+
+       result = ntohl(resp->result);
+       if (result != SCTP_STRRESET_PERFORMED) {
+               /* if in progress, do nothing but retransmit */
+               if (result == SCTP_STRRESET_IN_PROGRESS)
+                       return NULL;
+               else if (result == SCTP_STRRESET_DENIED)
+                       flags = SCTP_STREAM_RESET_DENIED;
+               else
+                       flags = SCTP_STREAM_RESET_FAILED;
+       }
+
+       if (req->type == SCTP_PARAM_RESET_OUT_REQUEST) {
+               struct sctp_strreset_outreq *outreq;
+               __u16 *str_p = NULL;
+
+               outreq = (struct sctp_strreset_outreq *)req;
+               nums = (ntohs(outreq->param_hdr.length) - sizeof(*outreq)) / 2;
+
+               if (result == SCTP_STRRESET_PERFORMED) {
+                       if (nums) {
+                               str_p = outreq->list_of_streams;
+                               for (i = 0; i < nums; i++)
+                                       stream->out[ntohs(str_p[i])].ssn = 0;
+                       } else {
+                               for (i = 0; i < stream->outcnt; i++)
+                                       stream->out[i].ssn = 0;
+                       }
+
+                       flags = SCTP_STREAM_RESET_OUTGOING_SSN;
+               }
+
+               for (i = 0; i < stream->outcnt; i++)
+                       stream->out[i].state = SCTP_STREAM_OPEN;
+
+               *evp = sctp_ulpevent_make_stream_reset_event(asoc, flags,
+                       nums, str_p, GFP_ATOMIC);
+       } else if (req->type == SCTP_PARAM_RESET_IN_REQUEST) {
+               struct sctp_strreset_inreq *inreq;
+               __u16 *str_p = NULL;
+
+               /* if the result is performed, it's impossible for inreq */
+               if (result == SCTP_STRRESET_PERFORMED)
+                       return NULL;
+
+               inreq = (struct sctp_strreset_inreq *)req;
+               nums = (ntohs(inreq->param_hdr.length) - sizeof(*inreq)) / 2;
+
+               str_p = inreq->list_of_streams;
+               *evp = sctp_ulpevent_make_stream_reset_event(asoc, flags,
+                       nums, str_p, GFP_ATOMIC);
+       } else if (req->type == SCTP_PARAM_RESET_TSN_REQUEST) {
+               struct sctp_strreset_resptsn *resptsn;
+               __u32 stsn, rtsn;
+
+               /* check for resptsn, as sctp_verify_reconf didn't do it*/
+               if (ntohs(param.p->length) != sizeof(*resptsn))
+                       return NULL;
+
+               resptsn = (struct sctp_strreset_resptsn *)resp;
+               stsn = ntohl(resptsn->senders_next_tsn);
+               rtsn = ntohl(resptsn->receivers_next_tsn);
+
+               if (result == SCTP_STRRESET_PERFORMED) {
+                       __u32 mtsn = sctp_tsnmap_get_max_tsn_seen(
+                                               &asoc->peer.tsn_map);
+
+                       sctp_ulpq_reasm_flushtsn(&asoc->ulpq, mtsn);
+                       sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC);
+
+                       sctp_tsnmap_init(&asoc->peer.tsn_map,
+                                        SCTP_TSN_MAP_INITIAL,
+                                        stsn, GFP_ATOMIC);
+
+                       sctp_outq_free(&asoc->outqueue);
+
+                       asoc->next_tsn = rtsn;
+                       asoc->ctsn_ack_point = asoc->next_tsn - 1;
+                       asoc->adv_peer_ack_point = asoc->ctsn_ack_point;
+
+                       for (i = 0; i < stream->outcnt; i++)
+                               stream->out[i].ssn = 0;
+                       for (i = 0; i < stream->incnt; i++)
+                               stream->in[i].ssn = 0;
+               }
+
+               for (i = 0; i < stream->outcnt; i++)
+                       stream->out[i].state = SCTP_STREAM_OPEN;
+
+               *evp = sctp_ulpevent_make_assoc_reset_event(asoc, flags,
+                       stsn, rtsn, GFP_ATOMIC);
+       } else if (req->type == SCTP_PARAM_RESET_ADD_OUT_STREAMS) {
+               struct sctp_strreset_addstrm *addstrm;
+               __u16 number;
+
+               addstrm = (struct sctp_strreset_addstrm *)req;
+               nums = ntohs(addstrm->number_of_streams);
+               number = stream->outcnt - nums;
+
+               if (result == SCTP_STRRESET_PERFORMED)
+                       for (i = number; i < stream->outcnt; i++)
+                               stream->out[i].state = SCTP_STREAM_OPEN;
+               else
+                       stream->outcnt = number;
+
+               *evp = sctp_ulpevent_make_stream_change_event(asoc, flags,
+                       0, nums, GFP_ATOMIC);
+       } else if (req->type == SCTP_PARAM_RESET_ADD_IN_STREAMS) {
+               struct sctp_strreset_addstrm *addstrm;
+
+               /* if the result is performed, it's impossible for addstrm in
+                * request.
+                */
+               if (result == SCTP_STRRESET_PERFORMED)
+                       return NULL;
+
+               addstrm = (struct sctp_strreset_addstrm *)req;
+               nums = ntohs(addstrm->number_of_streams);
+
+               *evp = sctp_ulpevent_make_stream_change_event(asoc, flags,
+                       nums, 0, GFP_ATOMIC);
+       }
+
+       asoc->strreset_outstanding--;
+       asoc->strreset_outseq++;
+
+       /* remove everything for this reconf request */
+       if (!asoc->strreset_outstanding) {
+               t = asoc->strreset_chunk->transport;
+               if (del_timer(&t->reconf_timer))
+                       sctp_transport_put(t);
+
+               sctp_chunk_put(asoc->strreset_chunk);
+               asoc->strreset_chunk = NULL;
+       }
+
+       return NULL;
+}
index daf8554fd42a5e537bb58572823b2028f74be930..0e732f68c2bfc3b791dade5a85c36628904ee490 100644 (file)
@@ -274,6 +274,13 @@ static struct ctl_table sctp_net_table[] = {
                .mode           = 0644,
                .proc_handler   = proc_dointvec,
        },
+       {
+               .procname       = "reconf_enable",
+               .data           = &init_net.sctp.reconf_enable,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
        {
                .procname       = "auth_enable",
                .data           = &init_net.sctp.auth_enable,
index c8881bc542a066e6f7f234beea3c7208394242c5..ec2b3e013c2f4ba48eabfc8d8ed20921cabc08a1 100644 (file)
@@ -883,6 +883,62 @@ struct sctp_ulpevent *sctp_ulpevent_make_stream_reset_event(
        return event;
 }
 
+struct sctp_ulpevent *sctp_ulpevent_make_assoc_reset_event(
+       const struct sctp_association *asoc, __u16 flags, __u32 local_tsn,
+       __u32 remote_tsn, gfp_t gfp)
+{
+       struct sctp_assoc_reset_event *areset;
+       struct sctp_ulpevent *event;
+       struct sk_buff *skb;
+
+       event = sctp_ulpevent_new(sizeof(struct sctp_assoc_reset_event),
+                                 MSG_NOTIFICATION, gfp);
+       if (!event)
+               return NULL;
+
+       skb = sctp_event2skb(event);
+       areset = (struct sctp_assoc_reset_event *)
+               skb_put(skb, sizeof(struct sctp_assoc_reset_event));
+
+       areset->assocreset_type = SCTP_ASSOC_RESET_EVENT;
+       areset->assocreset_flags = flags;
+       areset->assocreset_length = sizeof(struct sctp_assoc_reset_event);
+       sctp_ulpevent_set_owner(event, asoc);
+       areset->assocreset_assoc_id = sctp_assoc2id(asoc);
+       areset->assocreset_local_tsn = local_tsn;
+       areset->assocreset_remote_tsn = remote_tsn;
+
+       return event;
+}
+
+struct sctp_ulpevent *sctp_ulpevent_make_stream_change_event(
+       const struct sctp_association *asoc, __u16 flags,
+       __u32 strchange_instrms, __u32 strchange_outstrms, gfp_t gfp)
+{
+       struct sctp_stream_change_event *schange;
+       struct sctp_ulpevent *event;
+       struct sk_buff *skb;
+
+       event = sctp_ulpevent_new(sizeof(struct sctp_stream_change_event),
+                                 MSG_NOTIFICATION, gfp);
+       if (!event)
+               return NULL;
+
+       skb = sctp_event2skb(event);
+       schange = (struct sctp_stream_change_event *)
+               skb_put(skb, sizeof(struct sctp_stream_change_event));
+
+       schange->strchange_type = SCTP_STREAM_CHANGE_EVENT;
+       schange->strchange_flags = flags;
+       schange->strchange_length = sizeof(struct sctp_stream_change_event);
+       sctp_ulpevent_set_owner(event, asoc);
+       schange->strchange_assoc_id = sctp_assoc2id(asoc);
+       schange->strchange_instrms = strchange_instrms;
+       schange->strchange_outstrms = strchange_outstrms;
+
+       return event;
+}
+
 /* Return the notification type, assuming this is a notification
  * event.
  */
index a95f74bb556915f92d0fa1bdbd2e34eb9814c3b1..7e1f0e24d17790f526aa50d07ff5e5d6596b6f3c 100644 (file)
@@ -11,6 +11,7 @@
 #ifndef _SMC_IB_H
 #define _SMC_IB_H
 
+#include <linux/interrupt.h>
 #include <linux/if_ether.h>
 #include <rdma/ib_verbs.h>
 
index e034fe4164beec7731c68ba2bc6920627741561b..985ef06792d6e54c69d296f3e15baf89be972f9c 100644 (file)
@@ -652,6 +652,16 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
 }
 EXPORT_SYMBOL(kernel_sendmsg);
 
+static bool skb_is_err_queue(const struct sk_buff *skb)
+{
+       /* pkt_type of skbs enqueued on the error queue are set to
+        * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do
+        * in recvmsg, since skbs received on a local socket will never
+        * have a pkt_type of PACKET_OUTGOING.
+        */
+       return skb->pkt_type == PACKET_OUTGOING;
+}
+
 /*
  * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
  */
@@ -695,7 +705,8 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
                put_cmsg(msg, SOL_SOCKET,
                         SCM_TIMESTAMPING, sizeof(tss), &tss);
 
-               if (skb->len && (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS))
+               if (skb_is_err_queue(skb) && skb->len &&
+                   SKB_EXT_ERR(skb)->opt_stats)
                        put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
                                 skb->len, skb->data);
        }
index 81cd31acf690f41573e5fedd9b837376543f5ce9..3b332b395045b5b0ad07bc13a30db1420d7f7082 100644 (file)
@@ -503,7 +503,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
        struct ib_cq *sendcq, *recvcq;
        int rc;
 
-       max_sge = min(ia->ri_device->attrs.max_sge, RPCRDMA_MAX_SEND_SGES);
+       max_sge = min_t(unsigned int, ia->ri_device->attrs.max_sge,
+                       RPCRDMA_MAX_SEND_SGES);
        if (max_sge < RPCRDMA_MIN_SEND_SGES) {
                pr_warn("rpcrdma: HCA provides only %d send SGEs\n", max_sge);
                return -ENOMEM;
index 9be6592e4a6fa20c78995396ffa3dfcd1f19537a..bd0aac87b41ac627e5c897256a79150226926514 100644 (file)
@@ -416,6 +416,7 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq,
 
        tipc_subscrp_convert_seq(&s->evt.s.seq, s->swap, &ns);
 
+       tipc_subscrp_get(s);
        list_add(&s->nameseq_list, &nseq->subscriptions);
 
        if (!sseq)
@@ -787,6 +788,7 @@ void tipc_nametbl_unsubscribe(struct tipc_subscription *s)
        if (seq != NULL) {
                spin_lock_bh(&seq->lock);
                list_del_init(&s->nameseq_list);
+               tipc_subscrp_put(s);
                if (!seq->first_free && list_empty(&seq->subscriptions)) {
                        hlist_del_init_rcu(&seq->ns_list);
                        kfree(seq->sseqs);
index 7130e73bd42c21758e88b24b875da4bd97b3c4d2..15f6ce7bf8687a95a9fe54080ec84cf62de9c762 100644 (file)
@@ -2511,6 +2511,28 @@ static int tipc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
        }
 }
 
+static int tipc_socketpair(struct socket *sock1, struct socket *sock2)
+{
+       struct tipc_sock *tsk2 = tipc_sk(sock2->sk);
+       struct tipc_sock *tsk1 = tipc_sk(sock1->sk);
+       u32 onode = tipc_own_addr(sock_net(sock1->sk));
+
+       tsk1->peer.family = AF_TIPC;
+       tsk1->peer.addrtype = TIPC_ADDR_ID;
+       tsk1->peer.scope = TIPC_NODE_SCOPE;
+       tsk1->peer.addr.id.ref = tsk2->portid;
+       tsk1->peer.addr.id.node = onode;
+       tsk2->peer.family = AF_TIPC;
+       tsk2->peer.addrtype = TIPC_ADDR_ID;
+       tsk2->peer.scope = TIPC_NODE_SCOPE;
+       tsk2->peer.addr.id.ref = tsk1->portid;
+       tsk2->peer.addr.id.node = onode;
+
+       tipc_sk_finish_conn(tsk1, tsk2->portid, onode);
+       tipc_sk_finish_conn(tsk2, tsk1->portid, onode);
+       return 0;
+}
+
 /* Protocol switches for the various types of TIPC sockets */
 
 static const struct proto_ops msg_ops = {
@@ -2519,7 +2541,7 @@ static const struct proto_ops msg_ops = {
        .release        = tipc_release,
        .bind           = tipc_bind,
        .connect        = tipc_connect,
-       .socketpair     = sock_no_socketpair,
+       .socketpair     = tipc_socketpair,
        .accept         = sock_no_accept,
        .getname        = tipc_getname,
        .poll           = tipc_poll,
@@ -2540,7 +2562,7 @@ static const struct proto_ops packet_ops = {
        .release        = tipc_release,
        .bind           = tipc_bind,
        .connect        = tipc_connect,
-       .socketpair     = sock_no_socketpair,
+       .socketpair     = tipc_socketpair,
        .accept         = tipc_accept,
        .getname        = tipc_getname,
        .poll           = tipc_poll,
@@ -2561,7 +2583,7 @@ static const struct proto_ops stream_ops = {
        .release        = tipc_release,
        .bind           = tipc_bind,
        .connect        = tipc_connect,
-       .socketpair     = sock_no_socketpair,
+       .socketpair     = tipc_socketpair,
        .accept         = tipc_accept,
        .getname        = tipc_getname,
        .poll           = tipc_poll,
index 9d94e65d0894183b4af94ed24e84b94c0478b551..0bf91cd3733cb37ecc8ba4ccf7ae5a26cb6e966d 100644 (file)
@@ -54,8 +54,6 @@ struct tipc_subscriber {
 
 static void tipc_subscrp_delete(struct tipc_subscription *sub);
 static void tipc_subscrb_put(struct tipc_subscriber *subscriber);
-static void tipc_subscrp_put(struct tipc_subscription *subscription);
-static void tipc_subscrp_get(struct tipc_subscription *subscription);
 
 /**
  * htohl - convert value to endianness used by destination
@@ -125,7 +123,6 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower,
 {
        struct tipc_name_seq seq;
 
-       tipc_subscrp_get(sub);
        tipc_subscrp_convert_seq(&sub->evt.s.seq, sub->swap, &seq);
        if (!tipc_subscrp_check_overlap(&seq, found_lower, found_upper))
                return;
@@ -135,12 +132,17 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower,
 
        tipc_subscrp_send_event(sub, found_lower, found_upper, event, port_ref,
                                node);
-       tipc_subscrp_put(sub);
 }
 
 static void tipc_subscrp_timeout(unsigned long data)
 {
        struct tipc_subscription *sub = (struct tipc_subscription *)data;
+       struct tipc_subscriber *subscriber = sub->subscriber;
+
+       spin_lock_bh(&subscriber->lock);
+       tipc_nametbl_unsubscribe(sub);
+       list_del(&sub->subscrp_list);
+       spin_unlock_bh(&subscriber->lock);
 
        /* Notify subscriber of timeout */
        tipc_subscrp_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper,
@@ -172,21 +174,17 @@ static void tipc_subscrp_kref_release(struct kref *kref)
        struct tipc_net *tn = net_generic(sub->net, tipc_net_id);
        struct tipc_subscriber *subscriber = sub->subscriber;
 
-       spin_lock_bh(&subscriber->lock);
-       tipc_nametbl_unsubscribe(sub);
-       list_del(&sub->subscrp_list);
        atomic_dec(&tn->subscription_count);
-       spin_unlock_bh(&subscriber->lock);
        kfree(sub);
        tipc_subscrb_put(subscriber);
 }
 
-static void tipc_subscrp_put(struct tipc_subscription *subscription)
+void tipc_subscrp_put(struct tipc_subscription *subscription)
 {
        kref_put(&subscription->kref, tipc_subscrp_kref_release);
 }
 
-static void tipc_subscrp_get(struct tipc_subscription *subscription)
+void tipc_subscrp_get(struct tipc_subscription *subscription)
 {
        kref_get(&subscription->kref);
 }
@@ -205,11 +203,9 @@ static void tipc_subscrb_subscrp_delete(struct tipc_subscriber *subscriber,
                if (s && memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr)))
                        continue;
 
-               tipc_subscrp_get(sub);
-               spin_unlock_bh(&subscriber->lock);
+               tipc_nametbl_unsubscribe(sub);
+               list_del(&sub->subscrp_list);
                tipc_subscrp_delete(sub);
-               tipc_subscrp_put(sub);
-               spin_lock_bh(&subscriber->lock);
 
                if (s)
                        break;
index ffdc214c117a924f34b416fde415fcd18201ebc0..ee52957dc9524a76ac371aa19d950dc90bfe4035 100644 (file)
@@ -78,4 +78,7 @@ u32 tipc_subscrp_convert_seq_type(u32 type, int swap);
 int tipc_topsrv_start(struct net *net);
 void tipc_topsrv_stop(struct net *net);
 
+void tipc_subscrp_put(struct tipc_subscription *subscription);
+void tipc_subscrp_get(struct tipc_subscription *subscription);
+
 #endif
index 6a0d48525fcf9a71f54bb43495b200b300f5341e..c36757e728442bb936c17f8a975b420f5f8a5972 100644 (file)
@@ -146,6 +146,7 @@ void unix_notinflight(struct user_struct *user, struct file *fp)
        if (s) {
                struct unix_sock *u = unix_sk(s);
 
+               BUG_ON(!atomic_long_read(&u->inflight));
                BUG_ON(list_empty(&u->link));
 
                if (atomic_long_dec_and_test(&u->inflight))
@@ -341,6 +342,14 @@ void unix_gc(void)
        }
        list_del(&cursor);
 
+       /* Now gc_candidates contains only garbage.  Restore original
+        * inflight counters for these as well, and remove the skbuffs
+        * which are creating the cycle(s).
+        */
+       skb_queue_head_init(&hitlist);
+       list_for_each_entry(u, &gc_candidates, link)
+               scan_children(&u->sk, inc_inflight, &hitlist);
+
        /* not_cycle_list contains those sockets which do not make up a
         * cycle.  Restore these to the inflight list.
         */
@@ -350,14 +359,6 @@ void unix_gc(void)
                list_move_tail(&u->link, &gc_inflight_list);
        }
 
-       /* Now gc_candidates contains only garbage.  Restore original
-        * inflight counters for these as well, and remove the skbuffs
-        * which are creating the cycle(s).
-        */
-       skb_queue_head_init(&hitlist);
-       list_for_each_entry(u, &gc_candidates, link)
-       scan_children(&u->sk, inc_inflight, &hitlist);
-
        spin_unlock(&unix_gc_lock);
 
        /* Here we are. Hitlist is filled. Die. */
index 9f770f33c10098fd3fcccfd9c739ab9a28a6b6f5..6f7f6757ceefb500551fafbf40c462835c4baf88 100644 (file)
@@ -1102,10 +1102,19 @@ static const struct proto_ops vsock_dgram_ops = {
        .sendpage = sock_no_sendpage,
 };
 
+static int vsock_transport_cancel_pkt(struct vsock_sock *vsk)
+{
+       if (!transport->cancel_pkt)
+               return -EOPNOTSUPP;
+
+       return transport->cancel_pkt(vsk);
+}
+
 static void vsock_connect_timeout(struct work_struct *work)
 {
        struct sock *sk;
        struct vsock_sock *vsk;
+       int cancel = 0;
 
        vsk = container_of(work, struct vsock_sock, dwork.work);
        sk = sk_vsock(vsk);
@@ -1116,8 +1125,11 @@ static void vsock_connect_timeout(struct work_struct *work)
                sk->sk_state = SS_UNCONNECTED;
                sk->sk_err = ETIMEDOUT;
                sk->sk_error_report(sk);
+               cancel = 1;
        }
        release_sock(sk);
+       if (cancel)
+               vsock_transport_cancel_pkt(vsk);
 
        sock_put(sk);
 }
@@ -1224,11 +1236,13 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
                        err = sock_intr_errno(timeout);
                        sk->sk_state = SS_UNCONNECTED;
                        sock->state = SS_UNCONNECTED;
+                       vsock_transport_cancel_pkt(vsk);
                        goto out_wait;
                } else if (timeout == 0) {
                        err = -ETIMEDOUT;
                        sk->sk_state = SS_UNCONNECTED;
                        sock->state = SS_UNCONNECTED;
+                       vsock_transport_cancel_pkt(vsk);
                        goto out_wait;
                }
 
index 9d24c0e958b18e614e30b24c0fcfbbe2152941f3..68675a151f22b8b63c02b25a67b833d9a6046d84 100644 (file)
@@ -213,6 +213,47 @@ virtio_transport_send_pkt(struct virtio_vsock_pkt *pkt)
        return len;
 }
 
+static int
+virtio_transport_cancel_pkt(struct vsock_sock *vsk)
+{
+       struct virtio_vsock *vsock;
+       struct virtio_vsock_pkt *pkt, *n;
+       int cnt = 0;
+       LIST_HEAD(freeme);
+
+       vsock = virtio_vsock_get();
+       if (!vsock) {
+               return -ENODEV;
+       }
+
+       spin_lock_bh(&vsock->send_pkt_list_lock);
+       list_for_each_entry_safe(pkt, n, &vsock->send_pkt_list, list) {
+               if (pkt->vsk != vsk)
+                       continue;
+               list_move(&pkt->list, &freeme);
+       }
+       spin_unlock_bh(&vsock->send_pkt_list_lock);
+
+       list_for_each_entry_safe(pkt, n, &freeme, list) {
+               if (pkt->reply)
+                       cnt++;
+               list_del(&pkt->list);
+               virtio_transport_free_pkt(pkt);
+       }
+
+       if (cnt) {
+               struct virtqueue *rx_vq = vsock->vqs[VSOCK_VQ_RX];
+               int new_cnt;
+
+               new_cnt = atomic_sub_return(cnt, &vsock->queued_replies);
+               if (new_cnt + cnt >= virtqueue_get_vring_size(rx_vq) &&
+                   new_cnt < virtqueue_get_vring_size(rx_vq))
+                       queue_work(virtio_vsock_workqueue, &vsock->rx_work);
+       }
+
+       return 0;
+}
+
 static void virtio_vsock_rx_fill(struct virtio_vsock *vsock)
 {
        int buf_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE;
@@ -462,6 +503,7 @@ static struct virtio_transport virtio_transport = {
                .release                  = virtio_transport_release,
                .connect                  = virtio_transport_connect,
                .shutdown                 = virtio_transport_shutdown,
+               .cancel_pkt               = virtio_transport_cancel_pkt,
 
                .dgram_bind               = virtio_transport_dgram_bind,
                .dgram_dequeue            = virtio_transport_dgram_dequeue,
index 8d592a45b59786746d186e12d0c362d07c30bdac..af087b44ceea2311e53060e2442b4af2024bb037 100644 (file)
@@ -58,6 +58,7 @@ virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info,
        pkt->len                = len;
        pkt->hdr.len            = cpu_to_le32(len);
        pkt->reply              = info->reply;
+       pkt->vsk                = info->vsk;
 
        if (info->msg && len > 0) {
                pkt->buf = kmalloc(len, GFP_KERNEL);
@@ -180,6 +181,7 @@ static int virtio_transport_send_credit_update(struct vsock_sock *vsk,
        struct virtio_vsock_pkt_info info = {
                .op = VIRTIO_VSOCK_OP_CREDIT_UPDATE,
                .type = type,
+               .vsk = vsk,
        };
 
        return virtio_transport_send_pkt_info(vsk, &info);
@@ -519,6 +521,7 @@ int virtio_transport_connect(struct vsock_sock *vsk)
        struct virtio_vsock_pkt_info info = {
                .op = VIRTIO_VSOCK_OP_REQUEST,
                .type = VIRTIO_VSOCK_TYPE_STREAM,
+               .vsk = vsk,
        };
 
        return virtio_transport_send_pkt_info(vsk, &info);
@@ -534,6 +537,7 @@ int virtio_transport_shutdown(struct vsock_sock *vsk, int mode)
                          VIRTIO_VSOCK_SHUTDOWN_RCV : 0) |
                         (mode & SEND_SHUTDOWN ?
                          VIRTIO_VSOCK_SHUTDOWN_SEND : 0),
+               .vsk = vsk,
        };
 
        return virtio_transport_send_pkt_info(vsk, &info);
@@ -560,6 +564,7 @@ virtio_transport_stream_enqueue(struct vsock_sock *vsk,
                .type = VIRTIO_VSOCK_TYPE_STREAM,
                .msg = msg,
                .pkt_len = len,
+               .vsk = vsk,
        };
 
        return virtio_transport_send_pkt_info(vsk, &info);
@@ -581,6 +586,7 @@ static int virtio_transport_reset(struct vsock_sock *vsk,
                .op = VIRTIO_VSOCK_OP_RST,
                .type = VIRTIO_VSOCK_TYPE_STREAM,
                .reply = !!pkt,
+               .vsk = vsk,
        };
 
        /* Send RST only if the original pkt is not a RST pkt */
@@ -826,6 +832,7 @@ virtio_transport_send_response(struct vsock_sock *vsk,
                .remote_cid = le64_to_cpu(pkt->hdr.src_cid),
                .remote_port = le32_to_cpu(pkt->hdr.src_port),
                .reply = true,
+               .vsk = vsk,
        };
 
        return virtio_transport_send_pkt_info(vsk, &info);
index 4be4fbbc0b5035662b1cd756bd4e99dd3351309e..10ae7823a19def7bde20d669e3913a40178e7da2 100644 (file)
@@ -96,31 +96,23 @@ static int PROTOCOL_OVERRIDE = -1;
 
 static s32 vmci_transport_error_to_vsock_error(s32 vmci_error)
 {
-       int err;
-
        switch (vmci_error) {
        case VMCI_ERROR_NO_MEM:
-               err = ENOMEM;
-               break;
+               return -ENOMEM;
        case VMCI_ERROR_DUPLICATE_ENTRY:
        case VMCI_ERROR_ALREADY_EXISTS:
-               err = EADDRINUSE;
-               break;
+               return -EADDRINUSE;
        case VMCI_ERROR_NO_ACCESS:
-               err = EPERM;
-               break;
+               return -EPERM;
        case VMCI_ERROR_NO_RESOURCES:
-               err = ENOBUFS;
-               break;
+               return -ENOBUFS;
        case VMCI_ERROR_INVALID_RESOURCE:
-               err = EHOSTUNREACH;
-               break;
+               return -EHOSTUNREACH;
        case VMCI_ERROR_INVALID_ARGS:
        default:
-               err = EINVAL;
+               break;
        }
-
-       return err > 0 ? -err : err;
+       return -EINVAL;
 }
 
 static u32 vmci_transport_peer_rid(u32 peer_cid)
index d7f8be4e321a32eba3a615aa69a860c212511625..2312dc2ffdb98b37b2909274c57eed68935267d7 100644 (file)
@@ -545,22 +545,18 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb,
 {
        int err;
 
-       rtnl_lock();
-
        if (!cb->args[0]) {
                err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
                                  genl_family_attrbuf(&nl80211_fam),
                                  nl80211_fam.maxattr, nl80211_policy);
                if (err)
-                       goto out_unlock;
+                       return err;
 
                *wdev = __cfg80211_wdev_from_attrs(
                                        sock_net(skb->sk),
                                        genl_family_attrbuf(&nl80211_fam));
-               if (IS_ERR(*wdev)) {
-                       err = PTR_ERR(*wdev);
-                       goto out_unlock;
-               }
+               if (IS_ERR(*wdev))
+                       return PTR_ERR(*wdev);
                *rdev = wiphy_to_rdev((*wdev)->wiphy);
                /* 0 is the first index - add 1 to parse only once */
                cb->args[0] = (*rdev)->wiphy_idx + 1;
@@ -570,10 +566,8 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb,
                struct wiphy *wiphy = wiphy_idx_to_wiphy(cb->args[0] - 1);
                struct wireless_dev *tmp;
 
-               if (!wiphy) {
-                       err = -ENODEV;
-                       goto out_unlock;
-               }
+               if (!wiphy)
+                       return -ENODEV;
                *rdev = wiphy_to_rdev(wiphy);
                *wdev = NULL;
 
@@ -584,21 +578,11 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb,
                        }
                }
 
-               if (!*wdev) {
-                       err = -ENODEV;
-                       goto out_unlock;
-               }
+               if (!*wdev)
+                       return -ENODEV;
        }
 
        return 0;
- out_unlock:
-       rtnl_unlock();
-       return err;
-}
-
-static void nl80211_finish_wdev_dump(struct cfg80211_registered_device *rdev)
-{
-       rtnl_unlock();
 }
 
 /* IE validation */
@@ -2608,17 +2592,17 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback *
        int filter_wiphy = -1;
        struct cfg80211_registered_device *rdev;
        struct wireless_dev *wdev;
+       int ret;
 
        rtnl_lock();
        if (!cb->args[2]) {
                struct nl80211_dump_wiphy_state state = {
                        .filter_wiphy = -1,
                };
-               int ret;
 
                ret = nl80211_dump_wiphy_parse(skb, cb, &state);
                if (ret)
-                       return ret;
+                       goto out_unlock;
 
                filter_wiphy = state.filter_wiphy;
 
@@ -2663,12 +2647,14 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback *
                wp_idx++;
        }
  out:
-       rtnl_unlock();
-
        cb->args[0] = wp_idx;
        cb->args[1] = if_idx;
 
-       return skb->len;
+       ret = skb->len;
+ out_unlock:
+       rtnl_unlock();
+
+       return ret;
 }
 
 static int nl80211_get_interface(struct sk_buff *skb, struct genl_info *info)
@@ -4452,9 +4438,10 @@ static int nl80211_dump_station(struct sk_buff *skb,
        int sta_idx = cb->args[2];
        int err;
 
+       rtnl_lock();
        err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev);
        if (err)
-               return err;
+               goto out_err;
 
        if (!wdev->netdev) {
                err = -EINVAL;
@@ -4489,7 +4476,7 @@ static int nl80211_dump_station(struct sk_buff *skb,
        cb->args[2] = sta_idx;
        err = skb->len;
  out_err:
-       nl80211_finish_wdev_dump(rdev);
+       rtnl_unlock();
 
        return err;
 }
@@ -5275,9 +5262,10 @@ static int nl80211_dump_mpath(struct sk_buff *skb,
        int path_idx = cb->args[2];
        int err;
 
+       rtnl_lock();
        err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev);
        if (err)
-               return err;
+               goto out_err;
 
        if (!rdev->ops->dump_mpath) {
                err = -EOPNOTSUPP;
@@ -5310,7 +5298,7 @@ static int nl80211_dump_mpath(struct sk_buff *skb,
        cb->args[2] = path_idx;
        err = skb->len;
  out_err:
-       nl80211_finish_wdev_dump(rdev);
+       rtnl_unlock();
        return err;
 }
 
@@ -5470,9 +5458,10 @@ static int nl80211_dump_mpp(struct sk_buff *skb,
        int path_idx = cb->args[2];
        int err;
 
+       rtnl_lock();
        err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev);
        if (err)
-               return err;
+               goto out_err;
 
        if (!rdev->ops->dump_mpp) {
                err = -EOPNOTSUPP;
@@ -5505,7 +5494,7 @@ static int nl80211_dump_mpp(struct sk_buff *skb,
        cb->args[2] = path_idx;
        err = skb->len;
  out_err:
-       nl80211_finish_wdev_dump(rdev);
+       rtnl_unlock();
        return err;
 }
 
@@ -7674,9 +7663,12 @@ static int nl80211_dump_scan(struct sk_buff *skb, struct netlink_callback *cb)
        int start = cb->args[2], idx = 0;
        int err;
 
+       rtnl_lock();
        err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev);
-       if (err)
+       if (err) {
+               rtnl_unlock();
                return err;
+       }
 
        wdev_lock(wdev);
        spin_lock_bh(&rdev->bss_lock);
@@ -7699,7 +7691,7 @@ static int nl80211_dump_scan(struct sk_buff *skb, struct netlink_callback *cb)
        wdev_unlock(wdev);
 
        cb->args[2] = idx;
-       nl80211_finish_wdev_dump(rdev);
+       rtnl_unlock();
 
        return skb->len;
 }
@@ -7784,9 +7776,10 @@ static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb)
        int res;
        bool radio_stats;
 
+       rtnl_lock();
        res = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev);
        if (res)
-               return res;
+               goto out_err;
 
        /* prepare_wdev_dump parsed the attributes */
        radio_stats = attrbuf[NL80211_ATTR_SURVEY_RADIO_STATS];
@@ -7827,7 +7820,7 @@ static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb)
        cb->args[2] = survey_idx;
        res = skb->len;
  out_err:
-       nl80211_finish_wdev_dump(rdev);
+       rtnl_unlock();
        return res;
 }
 
@@ -11508,17 +11501,13 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb,
        void *data = NULL;
        unsigned int data_len = 0;
 
-       rtnl_lock();
-
        if (cb->args[0]) {
                /* subtract the 1 again here */
                struct wiphy *wiphy = wiphy_idx_to_wiphy(cb->args[0] - 1);
                struct wireless_dev *tmp;
 
-               if (!wiphy) {
-                       err = -ENODEV;
-                       goto out_unlock;
-               }
+               if (!wiphy)
+                       return -ENODEV;
                *rdev = wiphy_to_rdev(wiphy);
                *wdev = NULL;
 
@@ -11538,23 +11527,19 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb,
        err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
                          attrbuf, nl80211_fam.maxattr, nl80211_policy);
        if (err)
-               goto out_unlock;
+               return err;
 
        if (!attrbuf[NL80211_ATTR_VENDOR_ID] ||
-           !attrbuf[NL80211_ATTR_VENDOR_SUBCMD]) {
-               err = -EINVAL;
-               goto out_unlock;
-       }
+           !attrbuf[NL80211_ATTR_VENDOR_SUBCMD])
+               return -EINVAL;
 
        *wdev = __cfg80211_wdev_from_attrs(sock_net(skb->sk), attrbuf);
        if (IS_ERR(*wdev))
                *wdev = NULL;
 
        *rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk), attrbuf);
-       if (IS_ERR(*rdev)) {
-               err = PTR_ERR(*rdev);
-               goto out_unlock;
-       }
+       if (IS_ERR(*rdev))
+               return PTR_ERR(*rdev);
 
        vid = nla_get_u32(attrbuf[NL80211_ATTR_VENDOR_ID]);
        subcmd = nla_get_u32(attrbuf[NL80211_ATTR_VENDOR_SUBCMD]);
@@ -11567,19 +11552,15 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb,
                if (vcmd->info.vendor_id != vid || vcmd->info.subcmd != subcmd)
                        continue;
 
-               if (!vcmd->dumpit) {
-                       err = -EOPNOTSUPP;
-                       goto out_unlock;
-               }
+               if (!vcmd->dumpit)
+                       return -EOPNOTSUPP;
 
                vcmd_idx = i;
                break;
        }
 
-       if (vcmd_idx < 0) {
-               err = -EOPNOTSUPP;
-               goto out_unlock;
-       }
+       if (vcmd_idx < 0)
+               return -EOPNOTSUPP;
 
        if (attrbuf[NL80211_ATTR_VENDOR_DATA]) {
                data = nla_data(attrbuf[NL80211_ATTR_VENDOR_DATA]);
@@ -11596,9 +11577,6 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb,
 
        /* keep rtnl locked in successful case */
        return 0;
- out_unlock:
-       rtnl_unlock();
-       return err;
 }
 
 static int nl80211_vendor_cmd_dump(struct sk_buff *skb,
@@ -11613,9 +11591,10 @@ static int nl80211_vendor_cmd_dump(struct sk_buff *skb,
        int err;
        struct nlattr *vendor_data;
 
+       rtnl_lock();
        err = nl80211_prepare_vendor_dump(skb, cb, &rdev, &wdev);
        if (err)
-               return err;
+               goto out;
 
        vcmd_idx = cb->args[2];
        data = (void *)cb->args[3];
@@ -11624,15 +11603,21 @@ static int nl80211_vendor_cmd_dump(struct sk_buff *skb,
 
        if (vcmd->flags & (WIPHY_VENDOR_CMD_NEED_WDEV |
                           WIPHY_VENDOR_CMD_NEED_NETDEV)) {
-               if (!wdev)
-                       return -EINVAL;
+               if (!wdev) {
+                       err = -EINVAL;
+                       goto out;
+               }
                if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_NETDEV &&
-                   !wdev->netdev)
-                       return -EINVAL;
+                   !wdev->netdev) {
+                       err = -EINVAL;
+                       goto out;
+               }
 
                if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_RUNNING) {
-                       if (!wdev_running(wdev))
-                               return -ENETDOWN;
+                       if (!wdev_running(wdev)) {
+                               err = -ENETDOWN;
+                               goto out;
+                       }
                }
        }
 
index 09e9d535bd7487d81574cf8572a41b6e697566fd..d42b495b099278cc7a96b4dc0945d56c38c06287 100644 (file)
@@ -34,6 +34,8 @@ hostprogs-y += sampleip
 hostprogs-y += tc_l2_redirect
 hostprogs-y += lwt_len_hist
 hostprogs-y += xdp_tx_iptunnel
+hostprogs-y += test_map_in_map
+hostprogs-y += per_socket_stats_example
 
 # Libbpf dependencies
 LIBBPF := ../../tools/lib/bpf/bpf.o
@@ -72,6 +74,8 @@ sampleip-objs := bpf_load.o $(LIBBPF) sampleip_user.o
 tc_l2_redirect-objs := bpf_load.o $(LIBBPF) tc_l2_redirect_user.o
 lwt_len_hist-objs := bpf_load.o $(LIBBPF) lwt_len_hist_user.o
 xdp_tx_iptunnel-objs := bpf_load.o $(LIBBPF) xdp_tx_iptunnel_user.o
+test_map_in_map-objs := bpf_load.o $(LIBBPF) test_map_in_map_user.o
+per_socket_stats_example-objs := $(LIBBPF) cookie_uid_helper_example.o
 
 # Tell kbuild to always build the programs
 always := $(hostprogs-y)
@@ -105,6 +109,8 @@ always += trace_event_kern.o
 always += sampleip_kern.o
 always += lwt_len_hist_kern.o
 always += xdp_tx_iptunnel_kern.o
+always += test_map_in_map_kern.o
+always += cookie_uid_helper_example.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
 HOSTCFLAGS += -I$(srctree)/tools/lib/
@@ -139,6 +145,7 @@ HOSTLOADLIBES_sampleip += -lelf
 HOSTLOADLIBES_tc_l2_redirect += -l elf
 HOSTLOADLIBES_lwt_len_hist += -l elf
 HOSTLOADLIBES_xdp_tx_iptunnel += -lelf
+HOSTLOADLIBES_test_map_in_map += -lelf
 
 # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
 #  make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
index faaffe2e139a989de6f90835121a725091d4a289..52de9d88c0213547da837deedac238023644342d 100644 (file)
@@ -80,6 +80,7 @@ struct bpf_map_def {
        unsigned int value_size;
        unsigned int max_entries;
        unsigned int map_flags;
+       unsigned int inner_map_idx;
 };
 
 static int (*bpf_skb_load_bytes)(void *ctx, int off, void *to, int len) =
index b86ee54da2d14d6ba0de18481d2c55ed1a70a67b..dcdce1270d38617af84087f532ba16a402d46124 100644 (file)
@@ -43,6 +43,7 @@ struct bpf_map_def {
        unsigned int value_size;
        unsigned int max_entries;
        unsigned int map_flags;
+       unsigned int inner_map_idx;
 };
 
 static int populate_prog_array(const char *event, int prog_fd)
@@ -198,11 +199,22 @@ static int load_maps(struct bpf_map_def *maps, int len)
 
        for (i = 0; i < len / sizeof(struct bpf_map_def); i++) {
 
-               map_fd[i] = bpf_create_map(maps[i].type,
-                                          maps[i].key_size,
-                                          maps[i].value_size,
-                                          maps[i].max_entries,
-                                          maps[i].map_flags);
+               if (maps[i].type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
+                   maps[i].type == BPF_MAP_TYPE_HASH_OF_MAPS) {
+                       int inner_map_fd = map_fd[maps[i].inner_map_idx];
+
+                       map_fd[i] = bpf_create_map_in_map(maps[i].type,
+                                                         maps[i].key_size,
+                                                         inner_map_fd,
+                                                         maps[i].max_entries,
+                                                         maps[i].map_flags);
+               } else {
+                       map_fd[i] = bpf_create_map(maps[i].type,
+                                                  maps[i].key_size,
+                                                  maps[i].value_size,
+                                                  maps[i].max_entries,
+                                                  maps[i].map_flags);
+               }
                if (map_fd[i] < 0) {
                        printf("failed to create a map: %d %s\n",
                               errno, strerror(errno));
diff --git a/samples/bpf/cookie_uid_helper_example.c b/samples/bpf/cookie_uid_helper_example.c
new file mode 100644 (file)
index 0000000..f6e5e58
--- /dev/null
@@ -0,0 +1,217 @@
+/* This test is a demo of using get_socket_uid and get_socket_cookie
+ * helper function to do per socket based network traffic monitoring.
+ * It requires iptables version higher then 1.6.1. to load pinned eBPF
+ * program into the xt_bpf match.
+ *
+ * TEST:
+ * ./run_cookie_uid_helper_example.sh
+ * Then generate some traffic in variate ways. ping 0 -c 10 would work
+ * but the cookie and uid in this case could both be 0. A sample output
+ * with some traffic generated by web browser is shown below:
+ *
+ * cookie: 877, uid: 0x3e8, Pakcet Count: 20, Bytes Count: 11058
+ * cookie: 132, uid: 0x0, Pakcet Count: 2, Bytes Count: 286
+ * cookie: 812, uid: 0x3e8, Pakcet Count: 3, Bytes Count: 1726
+ * cookie: 802, uid: 0x3e8, Pakcet Count: 2, Bytes Count: 104
+ * cookie: 877, uid: 0x3e8, Pakcet Count: 20, Bytes Count: 11058
+ * cookie: 831, uid: 0x3e8, Pakcet Count: 2, Bytes Count: 104
+ * cookie: 0, uid: 0x0, Pakcet Count: 6, Bytes Count: 712
+ * cookie: 880, uid: 0xfffe, Pakcet Count: 1, Bytes Count: 70
+ *
+ * Clean up: if using shell script, the script file will delete the iptables
+ * rule and unmount the bpf program when exit. Else the iptables rule need
+ * to be deleted by hand, see run_cookie_uid_helper_example.sh for detail.
+ */
+
+#define _GNU_SOURCE
+
+#define offsetof(type, member) __builtin_offsetof(type, member)
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <limits.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <bpf/bpf.h>
+#include "libbpf.h"
+
+struct stats {
+       uint32_t uid;
+       uint64_t packets;
+       uint64_t bytes;
+};
+
+static int map_fd, prog_fd;
+
+static void maps_create(void)
+{
+       map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(uint32_t),
+                               sizeof(struct stats), 100, 0);
+       if (map_fd < 0)
+               error(1, errno, "map create failed!\n");
+}
+
+static void prog_load(void)
+{
+       static char log_buf[1 << 16];
+
+       struct bpf_insn prog[] = {
+               /*
+                * Save sk_buff for future usage. value stored in R6 to R10 will
+                * not be reset after a bpf helper function call.
+                */
+               BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+               /*
+                * pc1: BPF_FUNC_get_socket_cookie takes one parameter,
+                * R1: sk_buff
+                */
+               BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                               BPF_FUNC_get_socket_cookie),
+               /* pc2-4: save &socketCookie to r7 for future usage*/
+               BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+               BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+               BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+               /*
+                * pc5-8: set up the registers for BPF_FUNC_map_lookup_elem,
+                * it takes two parameters (R1: map_fd,  R2: &socket_cookie)
+                */
+               BPF_LD_MAP_FD(BPF_REG_1, map_fd),
+               BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+               BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                               BPF_FUNC_map_lookup_elem),
+               /*
+                * pc9. if r0 != 0x0, go to pc+14, since we have the cookie
+                * stored already
+                * Otherwise do pc10-22 to setup a new data entry.
+                */
+               BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 14),
+               BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+               BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                               BPF_FUNC_get_socket_uid),
+               /*
+                * Place a struct stats in the R10 stack and sequentially
+                * place the member value into the memory. Packets value
+                * is set by directly place a IMM value 1 into the stack.
+                */
+               BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0,
+                               -32 + offsetof(struct stats, uid)),
+               BPF_ST_MEM(BPF_DW, BPF_REG_10,
+                               -32 + offsetof(struct stats, packets), 1),
+               /*
+                * __sk_buff is a special struct used for eBPF program to
+                * directly access some sk_buff field.
+                */
+               BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
+                               offsetof(struct __sk_buff, len)),
+               BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1,
+                               -32 + offsetof(struct stats, bytes)),
+               /*
+                * add new map entry using BPF_FUNC_map_update_elem, it takes
+                * 4 parameters (R1: map_fd, R2: &socket_cookie, R3: &stats,
+                * R4: flags)
+                */
+               BPF_LD_MAP_FD(BPF_REG_1, map_fd),
+               BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+               BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
+               BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -32),
+               BPF_MOV64_IMM(BPF_REG_4, 0),
+               BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                               BPF_FUNC_map_update_elem),
+               BPF_JMP_IMM(BPF_JA, 0, 0, 5),
+               /*
+                * pc24-30 update the packet info to a exist data entry, it can
+                * be done by directly write to pointers instead of using
+                * BPF_FUNC_map_update_elem helper function
+                */
+               BPF_MOV64_REG(BPF_REG_9, BPF_REG_0),
+               BPF_MOV64_IMM(BPF_REG_1, 1),
+               BPF_STX_XADD(BPF_DW, BPF_REG_9, BPF_REG_1,
+                               offsetof(struct stats, packets)),
+               BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
+                               offsetof(struct __sk_buff, len)),
+               BPF_STX_XADD(BPF_DW, BPF_REG_9, BPF_REG_1,
+                               offsetof(struct stats, bytes)),
+               BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6,
+                               offsetof(struct __sk_buff, len)),
+               BPF_EXIT_INSN(),
+       };
+       prog_fd = bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, prog,
+                                       ARRAY_SIZE(prog), "GPL", 0,
+                                       log_buf, sizeof(log_buf));
+       if (prog_fd < 0)
+               error(1, errno, "failed to load prog\n%s\n", log_buf);
+}
+
+static void prog_attach_iptables(char *file)
+{
+       int ret;
+       char rules[100];
+
+       if (bpf_obj_pin(prog_fd, file))
+               error(1, errno, "bpf_obj_pin");
+       if (strlen(file) > 50) {
+               printf("file path too long: %s\n", file);
+               exit(1);
+       }
+       sprintf(rules, "iptables -A INPUT -m bpf --object-pinned %s -j ACCEPT",
+               file);
+       ret = system(rules);
+       if (ret < 0) {
+               printf("iptables rule update failed: %d/n", WEXITSTATUS(ret));
+               exit(1);
+       }
+}
+
+static void print_table(void)
+{
+       struct stats curEntry;
+       uint32_t curN = UINT32_MAX;
+       uint32_t nextN, res;
+
+       while (bpf_map_get_next_key(map_fd, &curN, &nextN) > -1) {
+               curN = nextN;
+               res = bpf_map_lookup_elem(map_fd, &curN, &curEntry);
+               if (res < 0) {
+                       error(1, errno, "fail to get entry value of Key: %u\n",
+                               curN);
+               } else {
+                       printf("cookie: %u, uid: 0x%x, Packet Count: %lu,"
+                               " Bytes Count: %lu\n", curN, curEntry.uid,
+                               curEntry.packets, curEntry.bytes);
+               }
+       }
+}
+
+int main(int argc, char *argv[])
+{
+       if (argc > 2) {
+               printf("Too many argument provided\n");
+               return 1;
+       } else if (argc < 2) {
+               printf("Usage: %s bpfObjName\n", argv[0]);
+               return 1;
+       }
+
+       maps_create();
+       prog_load();
+       prog_attach_iptables(argv[1]);
+
+       while (true) {
+               print_table();
+               printf("\n");
+               sleep(1);
+       };
+
+       return 0;
+}
index 3705fba453a005fb32f5dfb51dd5763f5f364ccf..8ab36a04c174a9c154594b7ef4fe2ef2c5c5dc5e 100644 (file)
@@ -135,6 +135,16 @@ struct bpf_insn;
                .off   = OFF,                                   \
                .imm   = 0 })
 
+/* Atomic memory add, *(uint *)(dst_reg + off16) += src_reg */
+
+#define BPF_STX_XADD(SIZE, DST, SRC, OFF)                      \
+       ((struct bpf_insn) {                                    \
+               .code  = BPF_STX | BPF_SIZE(SIZE) | BPF_XADD,   \
+               .dst_reg = DST,                                 \
+               .src_reg = SRC,                                 \
+               .off   = OFF,                                   \
+               .imm   = 0 })
+
 /* Memory store, *(uint *) (dst_reg + off16) = imm32 */
 
 #define BPF_ST_MEM(SIZE, DST, OFF, IMM)                                \
index a91872a97742a6413c316548c66ab8349ba1aff0..9da2a3441b0a2e88eb63c3b60b2e8b76949b72fe 100644 (file)
@@ -65,6 +65,13 @@ struct bpf_map_def SEC("maps") lpm_trie_map_alloc = {
        .map_flags = BPF_F_NO_PREALLOC,
 };
 
+struct bpf_map_def SEC("maps") array_map = {
+       .type = BPF_MAP_TYPE_ARRAY,
+       .key_size = sizeof(u32),
+       .value_size = sizeof(long),
+       .max_entries = MAX_ENTRIES,
+};
+
 SEC("kprobe/sys_getuid")
 int stress_hmap(struct pt_regs *ctx)
 {
@@ -165,5 +172,31 @@ int stress_lpm_trie_map_alloc(struct pt_regs *ctx)
        return 0;
 }
 
+SEC("kprobe/sys_getpgid")
+int stress_hash_map_lookup(struct pt_regs *ctx)
+{
+       u32 key = 1, i;
+       long *value;
+
+#pragma clang loop unroll(full)
+       for (i = 0; i < 64; ++i)
+               value = bpf_map_lookup_elem(&hash_map, &key);
+
+       return 0;
+}
+
+SEC("kprobe/sys_getpgrp")
+int stress_array_map_lookup(struct pt_regs *ctx)
+{
+       u32 key = 1, i;
+       long *value;
+
+#pragma clang loop unroll(full)
+       for (i = 0; i < 64; ++i)
+               value = bpf_map_lookup_elem(&array_map, &key);
+
+       return 0;
+}
+
 char _license[] SEC("license") = "GPL";
 u32 _version SEC("version") = LINUX_VERSION_CODE;
index 680260a91f50c893dd26a1b968cc220a299c530f..e29ff318a79365ae5f84e833b45f9d2a46895421 100644 (file)
@@ -38,6 +38,8 @@ static __u64 time_get_ns(void)
 #define LRU_HASH_PREALLOC      (1 << 4)
 #define PERCPU_LRU_HASH_PREALLOC       (1 << 5)
 #define LPM_KMALLOC            (1 << 6)
+#define HASH_LOOKUP            (1 << 7)
+#define ARRAY_LOOKUP           (1 << 8)
 
 static int test_flags = ~0;
 
@@ -125,6 +127,30 @@ static void test_lpm_kmalloc(int cpu)
               cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
 }
 
+static void test_hash_lookup(int cpu)
+{
+       __u64 start_time;
+       int i;
+
+       start_time = time_get_ns();
+       for (i = 0; i < MAX_CNT; i++)
+               syscall(__NR_getpgid, 0);
+       printf("%d:hash_lookup %lld lookups per sec\n",
+              cpu, MAX_CNT * 1000000000ll * 64 / (time_get_ns() - start_time));
+}
+
+static void test_array_lookup(int cpu)
+{
+       __u64 start_time;
+       int i;
+
+       start_time = time_get_ns();
+       for (i = 0; i < MAX_CNT; i++)
+               syscall(__NR_getpgrp, 0);
+       printf("%d:array_lookup %lld lookups per sec\n",
+              cpu, MAX_CNT * 1000000000ll * 64 / (time_get_ns() - start_time));
+}
+
 static void loop(int cpu)
 {
        cpu_set_t cpuset;
@@ -153,6 +179,12 @@ static void loop(int cpu)
 
        if (test_flags & LPM_KMALLOC)
                test_lpm_kmalloc(cpu);
+
+       if (test_flags & HASH_LOOKUP)
+               test_hash_lookup(cpu);
+
+       if (test_flags & ARRAY_LOOKUP)
+               test_array_lookup(cpu);
 }
 
 static void run_perf_test(int tasks)
diff --git a/samples/bpf/run_cookie_uid_helper_example.sh b/samples/bpf/run_cookie_uid_helper_example.sh
new file mode 100644 (file)
index 0000000..40da8aa
--- /dev/null
@@ -0,0 +1,14 @@
+#!/bin/bash
+local_dir="$(pwd)"
+root_dir=$local_dir/../..
+mnt_dir=$(mktemp -d --tmp)
+
+on_exit() {
+       iptables -D INPUT -m bpf --object-pinned ${mnt_dir}/bpf_prog -j ACCEPT
+       umount ${mnt_dir}
+       rm -r ${mnt_dir}
+}
+
+trap on_exit EXIT
+mount -t bpf bpf ${mnt_dir}
+./per_socket_stats_example ${mnt_dir}/bpf_prog
diff --git a/samples/bpf/test_map_in_map_kern.c b/samples/bpf/test_map_in_map_kern.c
new file mode 100644 (file)
index 0000000..42c44d0
--- /dev/null
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#define KBUILD_MODNAME "foo"
+#include <linux/ptrace.h>
+#include <linux/version.h>
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/in6.h>
+#include "bpf_helpers.h"
+
+#define MAX_NR_PORTS 65536
+
+/* map #0 */
+struct bpf_map_def SEC("maps") port_a = {
+       .type = BPF_MAP_TYPE_ARRAY,
+       .key_size = sizeof(u32),
+       .value_size = sizeof(int),
+       .max_entries = MAX_NR_PORTS,
+};
+
+/* map #1 */
+struct bpf_map_def SEC("maps") port_h = {
+       .type = BPF_MAP_TYPE_HASH,
+       .key_size = sizeof(u32),
+       .value_size = sizeof(int),
+       .max_entries = 1,
+};
+
+/* map #2 */
+struct bpf_map_def SEC("maps") reg_result_h = {
+       .type = BPF_MAP_TYPE_HASH,
+       .key_size = sizeof(u32),
+       .value_size = sizeof(int),
+       .max_entries = 1,
+};
+
+/* map #3 */
+struct bpf_map_def SEC("maps") inline_result_h = {
+       .type = BPF_MAP_TYPE_HASH,
+       .key_size = sizeof(u32),
+       .value_size = sizeof(int),
+       .max_entries = 1,
+};
+
+/* map #4 */ /* Test case #0 */
+struct bpf_map_def SEC("maps") a_of_port_a = {
+       .type = BPF_MAP_TYPE_ARRAY_OF_MAPS,
+       .key_size = sizeof(u32),
+       .inner_map_idx = 0, /* map_fd[0] is port_a */
+       .max_entries = MAX_NR_PORTS,
+};
+
+/* map #5 */ /* Test case #1 */
+struct bpf_map_def SEC("maps") h_of_port_a = {
+       .type = BPF_MAP_TYPE_HASH_OF_MAPS,
+       .key_size = sizeof(u32),
+       .inner_map_idx = 0, /* map_fd[0] is port_a */
+       .max_entries = 1,
+};
+
+/* map #6 */ /* Test case #2 */
+struct bpf_map_def SEC("maps") h_of_port_h = {
+       .type = BPF_MAP_TYPE_HASH_OF_MAPS,
+       .key_size = sizeof(u32),
+       .inner_map_idx = 1, /* map_fd[1] is port_h */
+       .max_entries = 1,
+};
+
+static __always_inline int do_reg_lookup(void *inner_map, u32 port)
+{
+       int *result;
+
+       result = bpf_map_lookup_elem(inner_map, &port);
+       return result ? *result : -ENOENT;
+}
+
+static __always_inline int do_inline_array_lookup(void *inner_map, u32 port)
+{
+       int *result;
+
+       if (inner_map != &port_a)
+               return -EINVAL;
+
+       result = bpf_map_lookup_elem(&port_a, &port);
+       return result ? *result : -ENOENT;
+}
+
+static __always_inline int do_inline_hash_lookup(void *inner_map, u32 port)
+{
+       int *result;
+
+       if (inner_map != &port_h)
+               return -EINVAL;
+
+       result = bpf_map_lookup_elem(&port_h, &port);
+       return result ? *result : -ENOENT;
+}
+
+SEC("kprobe/sys_connect")
+int trace_sys_connect(struct pt_regs *ctx)
+{
+       struct sockaddr_in6 *in6;
+       u16 test_case, port, dst6[8];
+       int addrlen, ret, inline_ret, ret_key = 0;
+       u32 port_key;
+       void *outer_map, *inner_map;
+       bool inline_hash = false;
+
+       in6 = (struct sockaddr_in6 *)PT_REGS_PARM2(ctx);
+       addrlen = (int)PT_REGS_PARM3(ctx);
+
+       if (addrlen != sizeof(*in6))
+               return 0;
+
+       ret = bpf_probe_read(dst6, sizeof(dst6), &in6->sin6_addr);
+       if (ret) {
+               inline_ret = ret;
+               goto done;
+       }
+
+       if (dst6[0] != 0xdead || dst6[1] != 0xbeef)
+               return 0;
+
+       test_case = dst6[7];
+
+       ret = bpf_probe_read(&port, sizeof(port), &in6->sin6_port);
+       if (ret) {
+               inline_ret = ret;
+               goto done;
+       }
+
+       port_key = port;
+
+       ret = -ENOENT;
+       if (test_case == 0) {
+               outer_map = &a_of_port_a;
+       } else if (test_case == 1) {
+               outer_map = &h_of_port_a;
+       } else if (test_case == 2) {
+               outer_map = &h_of_port_h;
+       } else {
+               ret = __LINE__;
+               inline_ret = ret;
+               goto done;
+       }
+
+       inner_map = bpf_map_lookup_elem(outer_map, &port_key);
+       if (!inner_map) {
+               ret = __LINE__;
+               inline_ret = ret;
+               goto done;
+       }
+
+       ret = do_reg_lookup(inner_map, port_key);
+
+       if (test_case == 0 || test_case == 1)
+               inline_ret = do_inline_array_lookup(inner_map, port_key);
+       else
+               inline_ret = do_inline_hash_lookup(inner_map, port_key);
+
+done:
+       bpf_map_update_elem(&reg_result_h, &ret_key, &ret, BPF_ANY);
+       bpf_map_update_elem(&inline_result_h, &ret_key, &inline_ret, BPF_ANY);
+
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/test_map_in_map_user.c b/samples/bpf/test_map_in_map_user.c
new file mode 100644 (file)
index 0000000..f62fdc2
--- /dev/null
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <sys/resource.h>
+#include <sys/socket.h>
+#include <arpa/inet.h>
+#include <stdint.h>
+#include <assert.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+
+#define PORT_A         (map_fd[0])
+#define PORT_H         (map_fd[1])
+#define REG_RESULT_H   (map_fd[2])
+#define INLINE_RESULT_H        (map_fd[3])
+#define A_OF_PORT_A    (map_fd[4]) /* Test case #0 */
+#define H_OF_PORT_A    (map_fd[5]) /* Test case #1 */
+#define H_OF_PORT_H    (map_fd[6]) /* Test case #2 */
+
+static const char * const test_names[] = {
+       "Array of Array",
+       "Hash of Array",
+       "Hash of Hash",
+};
+
+#define NR_TESTS (sizeof(test_names) / sizeof(*test_names))
+
+static void populate_map(uint32_t port_key, int magic_result)
+{
+       int ret;
+
+       ret = bpf_map_update_elem(PORT_A, &port_key, &magic_result, BPF_ANY);
+       assert(!ret);
+
+       ret = bpf_map_update_elem(PORT_H, &port_key, &magic_result,
+                                 BPF_NOEXIST);
+       assert(!ret);
+
+       ret = bpf_map_update_elem(A_OF_PORT_A, &port_key, &PORT_A, BPF_ANY);
+       assert(!ret);
+
+       ret = bpf_map_update_elem(H_OF_PORT_A, &port_key, &PORT_A, BPF_NOEXIST);
+       assert(!ret);
+
+       ret = bpf_map_update_elem(H_OF_PORT_H, &port_key, &PORT_H, BPF_NOEXIST);
+       assert(!ret);
+}
+
+static void test_map_in_map(void)
+{
+       struct sockaddr_in6 in6 = { .sin6_family = AF_INET6 };
+       uint32_t result_key = 0, port_key;
+       int result, inline_result;
+       int magic_result = 0xfaceb00c;
+       int ret;
+       int i;
+
+       port_key = rand() & 0x00FF;
+       populate_map(port_key, magic_result);
+
+       in6.sin6_addr.s6_addr16[0] = 0xdead;
+       in6.sin6_addr.s6_addr16[1] = 0xbeef;
+       in6.sin6_port = port_key;
+
+       for (i = 0; i < NR_TESTS; i++) {
+               printf("%s: ", test_names[i]);
+
+               in6.sin6_addr.s6_addr16[7] = i;
+               ret = connect(-1, (struct sockaddr *)&in6, sizeof(in6));
+               assert(ret == -1 && errno == EBADF);
+
+               ret = bpf_map_lookup_elem(REG_RESULT_H, &result_key, &result);
+               assert(!ret);
+
+               ret = bpf_map_lookup_elem(INLINE_RESULT_H, &result_key,
+                                         &inline_result);
+               assert(!ret);
+
+               if (result != magic_result || inline_result != magic_result) {
+                       printf("Error. result:%d inline_result:%d\n",
+                              result, inline_result);
+                       exit(1);
+               }
+
+               bpf_map_delete_elem(REG_RESULT_H, &result_key);
+               bpf_map_delete_elem(INLINE_RESULT_H, &result_key);
+
+               printf("Pass\n");
+       }
+}
+
+int main(int argc, char **argv)
+{
+       struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+       char filename[256];
+
+       assert(!setrlimit(RLIMIT_MEMLOCK, &r));
+
+       snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+       if (load_bpf_file(filename)) {
+               printf("%s", bpf_log_buf);
+               return 1;
+       }
+
+       test_map_in_map();
+
+       return 0;
+}
index 2ca9cde939d44976365aa67fe72f51be9b92897d..8e67bb4c9caba658b18f9d1dd0593907c8556c27 100644 (file)
@@ -69,6 +69,7 @@ static struct nlmsg_perm nlmsg_route_perms[] =
        { RTM_GETDCB,           NETLINK_ROUTE_SOCKET__NLMSG_READ  },
        { RTM_SETDCB,           NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
        { RTM_NEWNETCONF,       NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
+       { RTM_DELNETCONF,       NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
        { RTM_GETNETCONF,       NETLINK_ROUTE_SOCKET__NLMSG_READ  },
        { RTM_NEWMDB,           NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
        { RTM_DELMDB,           NETLINK_ROUTE_SOCKET__NLMSG_WRITE  },
index 4c935202ce23be4fc57c9d79c3fe0a160d9b8a89..f3b1d7f50b81156d4c29c5c0958a884e9292bbae 100644 (file)
@@ -1832,6 +1832,7 @@ static int snd_seq_ioctl_set_client_pool(struct snd_seq_client *client,
             info->output_pool != client->pool->size)) {
                if (snd_seq_write_pool_allocated(client)) {
                        /* remove all existing cells */
+                       snd_seq_pool_mark_closing(client->pool);
                        snd_seq_queue_client_leave_cells(client->number);
                        snd_seq_pool_done(client->pool);
                }
index 448efd4e980edf97138b43b6263a9909d07c076a..33980d1c803796a3a77cf2ee69c35a9b5fcf1c9e 100644 (file)
@@ -72,6 +72,9 @@ void snd_seq_fifo_delete(struct snd_seq_fifo **fifo)
                return;
        *fifo = NULL;
 
+       if (f->pool)
+               snd_seq_pool_mark_closing(f->pool);
+
        snd_seq_fifo_clear(f);
 
        /* wake up clients if any */
index 1a1acf3ddda4c9aeb022548b9438498e0f036762..d4c61ec9be13d7389addd27bc70acf58bda2eecc 100644 (file)
@@ -415,6 +415,18 @@ int snd_seq_pool_init(struct snd_seq_pool *pool)
        return 0;
 }
 
+/* refuse the further insertion to the pool */
+void snd_seq_pool_mark_closing(struct snd_seq_pool *pool)
+{
+       unsigned long flags;
+
+       if (snd_BUG_ON(!pool))
+               return;
+       spin_lock_irqsave(&pool->lock, flags);
+       pool->closing = 1;
+       spin_unlock_irqrestore(&pool->lock, flags);
+}
+
 /* remove events */
 int snd_seq_pool_done(struct snd_seq_pool *pool)
 {
@@ -425,10 +437,6 @@ int snd_seq_pool_done(struct snd_seq_pool *pool)
                return -EINVAL;
 
        /* wait for closing all threads */
-       spin_lock_irqsave(&pool->lock, flags);
-       pool->closing = 1;
-       spin_unlock_irqrestore(&pool->lock, flags);
-
        if (waitqueue_active(&pool->output_sleep))
                wake_up(&pool->output_sleep);
 
@@ -485,6 +493,7 @@ int snd_seq_pool_delete(struct snd_seq_pool **ppool)
        *ppool = NULL;
        if (pool == NULL)
                return 0;
+       snd_seq_pool_mark_closing(pool);
        snd_seq_pool_done(pool);
        kfree(pool);
        return 0;
index 4a2ec779b8a701b1aba2402e9de583878f7a39ca..32f959c17786d9ac8c071ba0e6fd070dc06da78b 100644 (file)
@@ -84,6 +84,7 @@ static inline int snd_seq_total_cells(struct snd_seq_pool *pool)
 int snd_seq_pool_init(struct snd_seq_pool *pool);
 
 /* done pool - free events */
+void snd_seq_pool_mark_closing(struct snd_seq_pool *pool);
 int snd_seq_pool_done(struct snd_seq_pool *pool);
 
 /* create pool */
index ab4cdab5cfa57abf3db2a8da806d0bf7031fed67..79edd88d5cd08398afb86c63b3d32b025162164e 100644 (file)
@@ -1905,7 +1905,7 @@ static int hw_card_start(struct hw *hw)
                return err;
 
        /* Set DMA transfer mask */
-       if (dma_set_mask(&pci->dev, DMA_BIT_MASK(dma_bits))) {
+       if (!dma_set_mask(&pci->dev, DMA_BIT_MASK(dma_bits))) {
                dma_set_coherent_mask(&pci->dev, DMA_BIT_MASK(dma_bits));
        } else {
                dma_set_mask(&pci->dev, DMA_BIT_MASK(32));
index c15c51bea26d0afdcc6d8c806993754eaaa2e031..69266b8ea2ad7b498097c4bc231fbad6e55ff37e 100644 (file)
@@ -261,6 +261,7 @@ enum {
        CXT_FIXUP_HP_530,
        CXT_FIXUP_CAP_MIX_AMP_5047,
        CXT_FIXUP_MUTE_LED_EAPD,
+       CXT_FIXUP_HP_DOCK,
        CXT_FIXUP_HP_SPECTRE,
        CXT_FIXUP_HP_GATE_MIC,
 };
@@ -778,6 +779,14 @@ static const struct hda_fixup cxt_fixups[] = {
                .type = HDA_FIXUP_FUNC,
                .v.func = cxt_fixup_mute_led_eapd,
        },
+       [CXT_FIXUP_HP_DOCK] = {
+               .type = HDA_FIXUP_PINS,
+               .v.pins = (const struct hda_pintbl[]) {
+                       { 0x16, 0x21011020 }, /* line-out */
+                       { 0x18, 0x2181103f }, /* line-in */
+                       { }
+               }
+       },
        [CXT_FIXUP_HP_SPECTRE] = {
                .type = HDA_FIXUP_PINS,
                .v.pins = (const struct hda_pintbl[]) {
@@ -839,6 +848,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = {
        SND_PCI_QUIRK(0x1025, 0x0543, "Acer Aspire One 522", CXT_FIXUP_STEREO_DMIC),
        SND_PCI_QUIRK(0x1025, 0x054c, "Acer Aspire 3830TG", CXT_FIXUP_ASPIRE_DMIC),
        SND_PCI_QUIRK(0x1025, 0x054f, "Acer Aspire 4830T", CXT_FIXUP_ASPIRE_DMIC),
+       SND_PCI_QUIRK(0x103c, 0x8079, "HP EliteBook 840 G3", CXT_FIXUP_HP_DOCK),
        SND_PCI_QUIRK(0x103c, 0x8174, "HP Spectre x360", CXT_FIXUP_HP_SPECTRE),
        SND_PCI_QUIRK(0x103c, 0x8115, "HP Z1 Gen3", CXT_FIXUP_HP_GATE_MIC),
        SND_PCI_QUIRK(0x1043, 0x138d, "Asus", CXT_FIXUP_HEADPHONE_MIC_PIN),
@@ -871,6 +881,7 @@ static const struct hda_model_fixup cxt5066_fixup_models[] = {
        { .id = CXT_PINCFG_LEMOTE_A1205, .name = "lemote-a1205" },
        { .id = CXT_FIXUP_OLPC_XO, .name = "olpc-xo" },
        { .id = CXT_FIXUP_MUTE_LED_EAPD, .name = "mute-led-eapd" },
+       { .id = CXT_FIXUP_HP_DOCK, .name = "hp-dock" },
        {}
 };
 
index 4e112221d825462ef7e1ae38c092e40e58667f38..7f989898cbd9aa6fcf16360f7cd8b48d6f87e681 100644 (file)
@@ -4847,6 +4847,7 @@ enum {
        ALC286_FIXUP_HP_GPIO_LED,
        ALC280_FIXUP_HP_GPIO2_MIC_HOTKEY,
        ALC280_FIXUP_HP_DOCK_PINS,
+       ALC269_FIXUP_HP_DOCK_GPIO_MIC1_LED,
        ALC280_FIXUP_HP_9480M,
        ALC288_FIXUP_DELL_HEADSET_MODE,
        ALC288_FIXUP_DELL1_MIC_NO_PRESENCE,
@@ -5388,6 +5389,16 @@ static const struct hda_fixup alc269_fixups[] = {
                .chained = true,
                .chain_id = ALC280_FIXUP_HP_GPIO4
        },
+       [ALC269_FIXUP_HP_DOCK_GPIO_MIC1_LED] = {
+               .type = HDA_FIXUP_PINS,
+               .v.pins = (const struct hda_pintbl[]) {
+                       { 0x1b, 0x21011020 }, /* line-out */
+                       { 0x18, 0x2181103f }, /* line-in */
+                       { },
+               },
+               .chained = true,
+               .chain_id = ALC269_FIXUP_HP_GPIO_MIC1_LED
+       },
        [ALC280_FIXUP_HP_9480M] = {
                .type = HDA_FIXUP_FUNC,
                .v.func = alc280_fixup_hp_9480m,
@@ -5647,7 +5658,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x103c, 0x2256, "HP", ALC269_FIXUP_HP_GPIO_MIC1_LED),
        SND_PCI_QUIRK(0x103c, 0x2257, "HP", ALC269_FIXUP_HP_GPIO_MIC1_LED),
        SND_PCI_QUIRK(0x103c, 0x2259, "HP", ALC269_FIXUP_HP_GPIO_MIC1_LED),
-       SND_PCI_QUIRK(0x103c, 0x225a, "HP", ALC269_FIXUP_HP_GPIO_MIC1_LED),
+       SND_PCI_QUIRK(0x103c, 0x225a, "HP", ALC269_FIXUP_HP_DOCK_GPIO_MIC1_LED),
        SND_PCI_QUIRK(0x103c, 0x2260, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
        SND_PCI_QUIRK(0x103c, 0x2263, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
        SND_PCI_QUIRK(0x103c, 0x2264, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
@@ -5816,6 +5827,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = {
        {.id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC, .name = "headset-mode-no-hp-mic"},
        {.id = ALC269_FIXUP_LENOVO_DOCK, .name = "lenovo-dock"},
        {.id = ALC269_FIXUP_HP_GPIO_LED, .name = "hp-gpio-led"},
+       {.id = ALC269_FIXUP_HP_DOCK_GPIO_MIC1_LED, .name = "hp-dock-gpio-mic1-led"},
        {.id = ALC269_FIXUP_DELL1_MIC_NO_PRESENCE, .name = "dell-headset-multi"},
        {.id = ALC269_FIXUP_DELL2_MIC_NO_PRESENCE, .name = "dell-headset-dock"},
        {.id = ALC283_FIXUP_CHROME_BOOK, .name = "alc283-dac-wcaps"},
@@ -6090,6 +6102,8 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
                ALC295_STANDARD_PINS,
                {0x17, 0x21014040},
                {0x18, 0x21a19050}),
+       SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
+               ALC295_STANDARD_PINS),
        SND_HDA_PIN_QUIRK(0x10ec0298, 0x1028, "Dell", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE,
                ALC298_STANDARD_PINS,
                {0x17, 0x90170110}),
index 84c8f8fc597cd6046d17ee27839024b98329e33d..8adf4d1bd46e71237e6827f0eda8fc89e7b40ea1 100644 (file)
@@ -1,6 +1,7 @@
 menuconfig SND_X86
-       tristate "X86 sound devices"
+       bool "X86 sound devices"
        depends on X86
+       default y
        ---help---
          X86 sound devices that don't fall under SoC or PCI categories
 
index 4aa5369ffa4ef295a8841edd380c7f678de80893..d85968cb1bf235feb1352809ac9d7229c152d74a 100755 (executable)
@@ -101,9 +101,25 @@ function create_bond_cfg_redhat {
        echo BONDING_OPTS=\"mode=active-backup miimon=100 primary=$2\" >>$fn
 }
 
+function del_eth_cfg_ubuntu {
+       local fn=$cfgdir/interfaces
+       local tmpfl=$(mktemp)
+
+       local nic_start='^[ \t]*(auto|iface|mapping|allow-.*)[ \t]+'$1
+       local nic_end='^[ \t]*(auto|iface|mapping|allow-.*|source)'
+
+       awk "/$nic_end/{x=0} x{next} /$nic_start/{x=1;next} 1"  $fn >$tmpfl
+
+       cp $tmpfl $fn
+
+       rm $tmpfl
+}
+
 function create_eth_cfg_ubuntu {
        local fn=$cfgdir/interfaces
 
+       del_eth_cfg_ubuntu $1
+
        echo $'\n'auto $1 >>$fn
        echo iface $1 inet manual >>$fn
        echo bond-master $2 >>$fn
@@ -119,6 +135,8 @@ function create_eth_cfg_pri_ubuntu {
 function create_bond_cfg_ubuntu {
        local fn=$cfgdir/interfaces
 
+       del_eth_cfg_ubuntu $1
+
        echo $'\n'auto $1 >>$fn
        echo iface $1 inet dhcp >>$fn
        echo bond-mode active-backup >>$fn
index 0539a0ceef38155835552360667070552ebce641..a1d95386f562fe7ec7e5a2783346f55c8a1cfbd9 100644 (file)
@@ -81,6 +81,7 @@ enum bpf_cmd {
        BPF_OBJ_GET,
        BPF_PROG_ATTACH,
        BPF_PROG_DETACH,
+       BPF_PROG_TEST_RUN,
 };
 
 enum bpf_map_type {
@@ -96,6 +97,8 @@ enum bpf_map_type {
        BPF_MAP_TYPE_LRU_HASH,
        BPF_MAP_TYPE_LRU_PERCPU_HASH,
        BPF_MAP_TYPE_LPM_TRIE,
+       BPF_MAP_TYPE_ARRAY_OF_MAPS,
+       BPF_MAP_TYPE_HASH_OF_MAPS,
 };
 
 enum bpf_prog_type {
@@ -152,6 +155,7 @@ union bpf_attr {
                __u32   value_size;     /* size of value in bytes */
                __u32   max_entries;    /* max number of entries in a map */
                __u32   map_flags;      /* prealloc or not */
+               __u32   inner_map_fd;   /* fd pointing to the inner map */
        };
 
        struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
@@ -186,6 +190,17 @@ union bpf_attr {
                __u32           attach_type;
                __u32           attach_flags;
        };
+
+       struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */
+               __u32           prog_fd;
+               __u32           retval;
+               __u32           data_size_in;
+               __u32           data_size_out;
+               __aligned_u64   data_in;
+               __aligned_u64   data_out;
+               __u32           repeat;
+               __u32           duration;
+       } test;
 } __attribute__((aligned(8)));
 
 /* BPF helper function descriptions:
@@ -456,6 +471,18 @@ union bpf_attr {
  *     Return:
  *       > 0 length of the string including the trailing NUL on success
  *       < 0 error
+ *
+ * u64 bpf_bpf_get_socket_cookie(skb)
+ *     Get the cookie for the socket stored inside sk_buff.
+ *     @skb: pointer to skb
+ *     Return: 8 Bytes non-decreasing number on success or 0 if the socket
+ *     field is missing inside sk_buff
+ *
+ * u32 bpf_get_socket_uid(skb)
+ *     Get the owner uid of the socket stored inside sk_buff.
+ *     @skb: pointer to skb
+ *     Return: uid of the socket owner on success or 0 if the socket pointer
+ *     inside sk_buff is NULL
  */
 #define __BPF_FUNC_MAPPER(FN)          \
        FN(unspec),                     \
@@ -503,7 +530,9 @@ union bpf_attr {
        FN(get_numa_node_id),           \
        FN(skb_change_head),            \
        FN(xdp_adjust_head),            \
-       FN(probe_read_str),
+       FN(probe_read_str),             \
+       FN(get_socket_cookie),          \
+       FN(get_socket_uid),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
index 207c2eeddab064d7c304efed09653ba6e227d6e9..f84c398c11f4c3f1637c32f07d4ecd4da4df74ef 100644 (file)
@@ -69,6 +69,23 @@ int bpf_create_map(enum bpf_map_type map_type, int key_size,
        return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
 }
 
+int bpf_create_map_in_map(enum bpf_map_type map_type, int key_size,
+                         int inner_map_fd, int max_entries, __u32 map_flags)
+{
+       union bpf_attr attr;
+
+       memset(&attr, '\0', sizeof(attr));
+
+       attr.map_type = map_type;
+       attr.key_size = key_size;
+       attr.value_size = 4;
+       attr.inner_map_fd = inner_map_fd;
+       attr.max_entries = max_entries;
+       attr.map_flags = map_flags;
+
+       return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+}
+
 int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
                     size_t insns_cnt, const char *license,
                     __u32 kern_version, char *log_buf, size_t log_buf_sz)
@@ -192,3 +209,27 @@ int bpf_prog_detach(int target_fd, enum bpf_attach_type type)
 
        return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
 }
+
+int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size,
+                     void *data_out, __u32 *size_out, __u32 *retval,
+                     __u32 *duration)
+{
+       union bpf_attr attr;
+       int ret;
+
+       bzero(&attr, sizeof(attr));
+       attr.test.prog_fd = prog_fd;
+       attr.test.data_in = ptr_to_u64(data);
+       attr.test.data_out = ptr_to_u64(data_out);
+       attr.test.data_size_in = size;
+       attr.test.repeat = repeat;
+
+       ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
+       if (size_out)
+               *size_out = attr.test.data_size_out;
+       if (retval)
+               *retval = attr.test.retval;
+       if (duration)
+               *duration = attr.test.duration;
+       return ret;
+}
index 09c3dcac04963e6973d7a002e54b6a139af6212a..edb4daeff7a52c44f6bc366c265a7f5a3aadfc10 100644 (file)
@@ -26,6 +26,8 @@
 
 int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size,
                   int max_entries, __u32 map_flags);
+int bpf_create_map_in_map(enum bpf_map_type map_type, int key_size,
+                         int inner_map_fd, int max_entries, __u32 map_flags);
 
 /* Recommend log buffer size */
 #define BPF_LOG_BUF_SIZE 65536
@@ -45,6 +47,8 @@ int bpf_obj_get(const char *pathname);
 int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type,
                    unsigned int flags);
 int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type);
-
+int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size,
+                     void *data_out, __u32 *size_out, __u32 *retval,
+                     __u32 *duration);
 
 #endif
index ac6eb863b2a40df00c4ead9f48c872ab67949c19..1a2c07eb7795bb4fb43e4a97bdc721d7cbc7f3b8 100644 (file)
@@ -1618,8 +1618,7 @@ int bpf_program__nth_fd(struct bpf_program *prog, int n)
        return fd;
 }
 
-static void bpf_program__set_type(struct bpf_program *prog,
-                                 enum bpf_prog_type type)
+void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type)
 {
        prog->type = type;
 }
index b30394f9947a35356af870223664a4409f8e5fd6..32c7252f734e42f9895c4686c4236f4b259d99a3 100644 (file)
@@ -25,6 +25,7 @@
 #include <stdint.h>
 #include <stdbool.h>
 #include <sys/types.h>  // for size_t
+#include <linux/bpf.h>
 
 enum libbpf_errno {
        __LIBBPF_ERRNO__START = 4000,
@@ -185,6 +186,7 @@ int bpf_program__set_sched_cls(struct bpf_program *prog);
 int bpf_program__set_sched_act(struct bpf_program *prog);
 int bpf_program__set_xdp(struct bpf_program *prog);
 int bpf_program__set_perf_event(struct bpf_program *prog);
+void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type);
 
 bool bpf_program__is_socket_filter(struct bpf_program *prog);
 bool bpf_program__is_tracepoint(struct bpf_program *prog);
index 70e389bc4af71aa8f18ae67507fb65b5093a7f98..9b4d8ba22fed85f1f2bef6f5d47dc88cbb4df5d1 100644 (file)
@@ -202,7 +202,7 @@ void symbols__fixup_end(struct rb_root *symbols)
 
        /* Last entry */
        if (curr->end == curr->start)
-               curr->end = roundup(curr->start, 4096);
+               curr->end = roundup(curr->start, 4096) + 4096;
 }
 
 void __map_groups__fixup_end(struct map_groups *mg, enum map_type type)
index 67531f47781b4069a33557f2f5a832b3aa57492a..32fb7a294f0fb01556f65338585e2a100835d425 100644 (file)
@@ -1,22 +1,32 @@
 LIBDIR := ../../../lib
-BPFOBJ := $(LIBDIR)/bpf/bpf.o
+BPFDIR := $(LIBDIR)/bpf
 
-CFLAGS += -Wall -O2 -lcap -I../../../include/uapi -I$(LIBDIR) $(BPFOBJ)
+CFLAGS += -Wall -O2 -I../../../include/uapi -I$(LIBDIR) -I../../../include
+LDLIBS += -lcap -lelf
 
-TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map
+TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs
+
+TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o
 
 TEST_PROGS := test_kmod.sh
 
-all: $(TEST_GEN_PROGS)
+include ../lib.mk
+
+BPFOBJ := $(OUTPUT)/libbpf.a
 
-.PHONY: all clean force
+$(TEST_GEN_PROGS): $(BPFOBJ)
+
+.PHONY: force
 
 # force a rebuild of BPFOBJ when its dependencies are updated
 force:
 
 $(BPFOBJ): force
-       $(MAKE) -C $(dir $(BPFOBJ))
+       $(MAKE) -C $(BPFDIR) OUTPUT=$(OUTPUT)/
 
-$(test_objs): $(BPFOBJ)
+CLANG ?= clang
 
-include ../lib.mk
+%.o: %.c
+       $(CLANG) -I../../../include/uapi -I../../../../samples/bpf/ \
+               -D__x86_64__ -Wno-compare-distinct-pointer-types \
+               -O2 -target bpf -c $< -o $@
diff --git a/tools/testing/selftests/bpf/test_iptunnel_common.h b/tools/testing/selftests/bpf/test_iptunnel_common.h
new file mode 100644 (file)
index 0000000..e4cd252
--- /dev/null
@@ -0,0 +1,37 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#ifndef _TEST_IPTNL_COMMON_H
+#define _TEST_IPTNL_COMMON_H
+
+#include <linux/types.h>
+
+#define MAX_IPTNL_ENTRIES 256U
+
+struct vip {
+       union {
+               __u32 v6[4];
+               __u32 v4;
+       } daddr;
+       __u16 dport;
+       __u16 family;
+       __u8 protocol;
+};
+
+struct iptnl_info {
+       union {
+               __u32 v6[4];
+               __u32 v4;
+       } saddr;
+       union {
+               __u32 v6[4];
+               __u32 v4;
+       } daddr;
+       __u16 family;
+       __u8 dmac[6];
+};
+
+#endif
diff --git a/tools/testing/selftests/bpf/test_l4lb.c b/tools/testing/selftests/bpf/test_l4lb.c
new file mode 100644 (file)
index 0000000..368bfe8
--- /dev/null
@@ -0,0 +1,474 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <linux/pkt_cls.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include "bpf_helpers.h"
+#include "test_iptunnel_common.h"
+
+#define htons __builtin_bswap16
+#define ntohs __builtin_bswap16
+int _version SEC("version") = 1;
+
+static inline __u32 rol32(__u32 word, unsigned int shift)
+{
+       return (word << shift) | (word >> ((-shift) & 31));
+}
+
+/* copy paste of jhash from kernel sources to make sure llvm
+ * can compile it into valid sequence of bpf instructions
+ */
+#define __jhash_mix(a, b, c)                   \
+{                                              \
+       a -= c;  a ^= rol32(c, 4);  c += b;     \
+       b -= a;  b ^= rol32(a, 6);  a += c;     \
+       c -= b;  c ^= rol32(b, 8);  b += a;     \
+       a -= c;  a ^= rol32(c, 16); c += b;     \
+       b -= a;  b ^= rol32(a, 19); a += c;     \
+       c -= b;  c ^= rol32(b, 4);  b += a;     \
+}
+
+#define __jhash_final(a, b, c)                 \
+{                                              \
+       c ^= b; c -= rol32(b, 14);              \
+       a ^= c; a -= rol32(c, 11);              \
+       b ^= a; b -= rol32(a, 25);              \
+       c ^= b; c -= rol32(b, 16);              \
+       a ^= c; a -= rol32(c, 4);               \
+       b ^= a; b -= rol32(a, 14);              \
+       c ^= b; c -= rol32(b, 24);              \
+}
+
+#define JHASH_INITVAL          0xdeadbeef
+
+typedef unsigned int u32;
+
+static inline u32 jhash(const void *key, u32 length, u32 initval)
+{
+       u32 a, b, c;
+       const unsigned char *k = key;
+
+       a = b = c = JHASH_INITVAL + length + initval;
+
+       while (length > 12) {
+               a += *(u32 *)(k);
+               b += *(u32 *)(k + 4);
+               c += *(u32 *)(k + 8);
+               __jhash_mix(a, b, c);
+               length -= 12;
+               k += 12;
+       }
+       switch (length) {
+       case 12: c += (u32)k[11]<<24;
+       case 11: c += (u32)k[10]<<16;
+       case 10: c += (u32)k[9]<<8;
+       case 9:  c += k[8];
+       case 8:  b += (u32)k[7]<<24;
+       case 7:  b += (u32)k[6]<<16;
+       case 6:  b += (u32)k[5]<<8;
+       case 5:  b += k[4];
+       case 4:  a += (u32)k[3]<<24;
+       case 3:  a += (u32)k[2]<<16;
+       case 2:  a += (u32)k[1]<<8;
+       case 1:  a += k[0];
+                __jhash_final(a, b, c);
+       case 0: /* Nothing left to add */
+               break;
+       }
+
+       return c;
+}
+
+static inline u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
+{
+       a += initval;
+       b += initval;
+       c += initval;
+       __jhash_final(a, b, c);
+       return c;
+}
+
+static inline u32 jhash_2words(u32 a, u32 b, u32 initval)
+{
+       return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
+}
+
+#define PCKT_FRAGMENTED 65343
+#define IPV4_HDR_LEN_NO_OPT 20
+#define IPV4_PLUS_ICMP_HDR 28
+#define IPV6_PLUS_ICMP_HDR 48
+#define RING_SIZE 2
+#define MAX_VIPS 12
+#define MAX_REALS 5
+#define CTL_MAP_SIZE 16
+#define CH_RINGS_SIZE (MAX_VIPS * RING_SIZE)
+#define F_IPV6 (1 << 0)
+#define F_HASH_NO_SRC_PORT (1 << 0)
+#define F_ICMP (1 << 0)
+#define F_SYN_SET (1 << 1)
+
+struct packet_description {
+       union {
+               __be32 src;
+               __be32 srcv6[4];
+       };
+       union {
+               __be32 dst;
+               __be32 dstv6[4];
+       };
+       union {
+               __u32 ports;
+               __u16 port16[2];
+       };
+       __u8 proto;
+       __u8 flags;
+};
+
+struct ctl_value {
+       union {
+               __u64 value;
+               __u32 ifindex;
+               __u8 mac[6];
+       };
+};
+
+struct vip_meta {
+       __u32 flags;
+       __u32 vip_num;
+};
+
+struct real_definition {
+       union {
+               __be32 dst;
+               __be32 dstv6[4];
+       };
+       __u8 flags;
+};
+
+struct vip_stats {
+       __u64 bytes;
+       __u64 pkts;
+};
+
+struct eth_hdr {
+       unsigned char eth_dest[ETH_ALEN];
+       unsigned char eth_source[ETH_ALEN];
+       unsigned short eth_proto;
+};
+
+struct bpf_map_def SEC("maps") vip_map = {
+       .type = BPF_MAP_TYPE_HASH,
+       .key_size = sizeof(struct vip),
+       .value_size = sizeof(struct vip_meta),
+       .max_entries = MAX_VIPS,
+};
+
+struct bpf_map_def SEC("maps") ch_rings = {
+       .type = BPF_MAP_TYPE_ARRAY,
+       .key_size = sizeof(__u32),
+       .value_size = sizeof(__u32),
+       .max_entries = CH_RINGS_SIZE,
+};
+
+struct bpf_map_def SEC("maps") reals = {
+       .type = BPF_MAP_TYPE_ARRAY,
+       .key_size = sizeof(__u32),
+       .value_size = sizeof(struct real_definition),
+       .max_entries = MAX_REALS,
+};
+
+struct bpf_map_def SEC("maps") stats = {
+       .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+       .key_size = sizeof(__u32),
+       .value_size = sizeof(struct vip_stats),
+       .max_entries = MAX_VIPS,
+};
+
+struct bpf_map_def SEC("maps") ctl_array = {
+       .type = BPF_MAP_TYPE_ARRAY,
+       .key_size = sizeof(__u32),
+       .value_size = sizeof(struct ctl_value),
+       .max_entries = CTL_MAP_SIZE,
+};
+
+static __always_inline __u32 get_packet_hash(struct packet_description *pckt,
+                                            bool ipv6)
+{
+       if (ipv6)
+               return jhash_2words(jhash(pckt->srcv6, 16, MAX_VIPS),
+                                   pckt->ports, CH_RINGS_SIZE);
+       else
+               return jhash_2words(pckt->src, pckt->ports, CH_RINGS_SIZE);
+}
+
+static __always_inline bool get_packet_dst(struct real_definition **real,
+                                          struct packet_description *pckt,
+                                          struct vip_meta *vip_info,
+                                          bool is_ipv6)
+{
+       __u32 hash = get_packet_hash(pckt, is_ipv6) % RING_SIZE;
+       __u32 key = RING_SIZE * vip_info->vip_num + hash;
+       __u32 *real_pos;
+
+       real_pos = bpf_map_lookup_elem(&ch_rings, &key);
+       if (!real_pos)
+               return false;
+       key = *real_pos;
+       *real = bpf_map_lookup_elem(&reals, &key);
+       if (!(*real))
+               return false;
+       return true;
+}
+
+static __always_inline int parse_icmpv6(void *data, void *data_end, __u64 off,
+                                       struct packet_description *pckt)
+{
+       struct icmp6hdr *icmp_hdr;
+       struct ipv6hdr *ip6h;
+
+       icmp_hdr = data + off;
+       if (icmp_hdr + 1 > data_end)
+               return TC_ACT_SHOT;
+       if (icmp_hdr->icmp6_type != ICMPV6_PKT_TOOBIG)
+               return TC_ACT_OK;
+       off += sizeof(struct icmp6hdr);
+       ip6h = data + off;
+       if (ip6h + 1 > data_end)
+               return TC_ACT_SHOT;
+       pckt->proto = ip6h->nexthdr;
+       pckt->flags |= F_ICMP;
+       memcpy(pckt->srcv6, ip6h->daddr.s6_addr32, 16);
+       memcpy(pckt->dstv6, ip6h->saddr.s6_addr32, 16);
+       return TC_ACT_UNSPEC;
+}
+
+static __always_inline int parse_icmp(void *data, void *data_end, __u64 off,
+                                     struct packet_description *pckt)
+{
+       struct icmphdr *icmp_hdr;
+       struct iphdr *iph;
+
+       icmp_hdr = data + off;
+       if (icmp_hdr + 1 > data_end)
+               return TC_ACT_SHOT;
+       if (icmp_hdr->type != ICMP_DEST_UNREACH ||
+           icmp_hdr->code != ICMP_FRAG_NEEDED)
+               return TC_ACT_OK;
+       off += sizeof(struct icmphdr);
+       iph = data + off;
+       if (iph + 1 > data_end)
+               return TC_ACT_SHOT;
+       if (iph->ihl != 5)
+               return TC_ACT_SHOT;
+       pckt->proto = iph->protocol;
+       pckt->flags |= F_ICMP;
+       pckt->src = iph->daddr;
+       pckt->dst = iph->saddr;
+       return TC_ACT_UNSPEC;
+}
+
+static __always_inline bool parse_udp(void *data, __u64 off, void *data_end,
+                                     struct packet_description *pckt)
+{
+       struct udphdr *udp;
+       udp = data + off;
+
+       if (udp + 1 > data_end)
+               return false;
+
+       if (!(pckt->flags & F_ICMP)) {
+               pckt->port16[0] = udp->source;
+               pckt->port16[1] = udp->dest;
+       } else {
+               pckt->port16[0] = udp->dest;
+               pckt->port16[1] = udp->source;
+       }
+       return true;
+}
+
+static __always_inline bool parse_tcp(void *data, __u64 off, void *data_end,
+                                     struct packet_description *pckt)
+{
+       struct tcphdr *tcp;
+
+       tcp = data + off;
+       if (tcp + 1 > data_end)
+               return false;
+
+       if (tcp->syn)
+               pckt->flags |= F_SYN_SET;
+
+       if (!(pckt->flags & F_ICMP)) {
+               pckt->port16[0] = tcp->source;
+               pckt->port16[1] = tcp->dest;
+       } else {
+               pckt->port16[0] = tcp->dest;
+               pckt->port16[1] = tcp->source;
+       }
+       return true;
+}
+
+static __always_inline int process_packet(void *data, __u64 off, void *data_end,
+                                         bool is_ipv6, struct __sk_buff *skb)
+{
+       void *pkt_start = (void *)(long)skb->data;
+       struct packet_description pckt = {};
+       struct eth_hdr *eth = pkt_start;
+       struct bpf_tunnel_key tkey = {};
+       struct vip_stats *data_stats;
+       struct real_definition *dst;
+       struct vip_meta *vip_info;
+       struct ctl_value *cval;
+       __u32 v4_intf_pos = 1;
+       __u32 v6_intf_pos = 2;
+       struct ipv6hdr *ip6h;
+       struct vip vip = {};
+       struct iphdr *iph;
+       int tun_flag = 0;
+       __u16 pkt_bytes;
+       __u64 iph_len;
+       __u32 ifindex;
+       __u8 protocol;
+       __u32 vip_num;
+       int action;
+
+       tkey.tunnel_ttl = 64;
+       if (is_ipv6) {
+               ip6h = data + off;
+               if (ip6h + 1 > data_end)
+                       return TC_ACT_SHOT;
+
+               iph_len = sizeof(struct ipv6hdr);
+               protocol = ip6h->nexthdr;
+               pckt.proto = protocol;
+               pkt_bytes = ntohs(ip6h->payload_len);
+               off += iph_len;
+               if (protocol == IPPROTO_FRAGMENT) {
+                       return TC_ACT_SHOT;
+               } else if (protocol == IPPROTO_ICMPV6) {
+                       action = parse_icmpv6(data, data_end, off, &pckt);
+                       if (action >= 0)
+                               return action;
+                       off += IPV6_PLUS_ICMP_HDR;
+               } else {
+                       memcpy(pckt.srcv6, ip6h->saddr.s6_addr32, 16);
+                       memcpy(pckt.dstv6, ip6h->daddr.s6_addr32, 16);
+               }
+       } else {
+               iph = data + off;
+               if (iph + 1 > data_end)
+                       return TC_ACT_SHOT;
+               if (iph->ihl != 5)
+                       return TC_ACT_SHOT;
+
+               protocol = iph->protocol;
+               pckt.proto = protocol;
+               pkt_bytes = ntohs(iph->tot_len);
+               off += IPV4_HDR_LEN_NO_OPT;
+
+               if (iph->frag_off & PCKT_FRAGMENTED)
+                       return TC_ACT_SHOT;
+               if (protocol == IPPROTO_ICMP) {
+                       action = parse_icmp(data, data_end, off, &pckt);
+                       if (action >= 0)
+                               return action;
+                       off += IPV4_PLUS_ICMP_HDR;
+               } else {
+                       pckt.src = iph->saddr;
+                       pckt.dst = iph->daddr;
+               }
+       }
+       protocol = pckt.proto;
+
+       if (protocol == IPPROTO_TCP) {
+               if (!parse_tcp(data, off, data_end, &pckt))
+                       return TC_ACT_SHOT;
+       } else if (protocol == IPPROTO_UDP) {
+               if (!parse_udp(data, off, data_end, &pckt))
+                       return TC_ACT_SHOT;
+       } else {
+               return TC_ACT_SHOT;
+       }
+
+       if (is_ipv6)
+               memcpy(vip.daddr.v6, pckt.dstv6, 16);
+       else
+               vip.daddr.v4 = pckt.dst;
+
+       vip.dport = pckt.port16[1];
+       vip.protocol = pckt.proto;
+       vip_info = bpf_map_lookup_elem(&vip_map, &vip);
+       if (!vip_info) {
+               vip.dport = 0;
+               vip_info = bpf_map_lookup_elem(&vip_map, &vip);
+               if (!vip_info)
+                       return TC_ACT_SHOT;
+               pckt.port16[1] = 0;
+       }
+
+       if (vip_info->flags & F_HASH_NO_SRC_PORT)
+               pckt.port16[0] = 0;
+
+       if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6))
+               return TC_ACT_SHOT;
+
+       if (dst->flags & F_IPV6) {
+               cval = bpf_map_lookup_elem(&ctl_array, &v6_intf_pos);
+               if (!cval)
+                       return TC_ACT_SHOT;
+               ifindex = cval->ifindex;
+               memcpy(tkey.remote_ipv6, dst->dstv6, 16);
+               tun_flag = BPF_F_TUNINFO_IPV6;
+       } else {
+               cval = bpf_map_lookup_elem(&ctl_array, &v4_intf_pos);
+               if (!cval)
+                       return TC_ACT_SHOT;
+               ifindex = cval->ifindex;
+               tkey.remote_ipv4 = dst->dst;
+       }
+       vip_num = vip_info->vip_num;
+       data_stats = bpf_map_lookup_elem(&stats, &vip_num);
+       if (!data_stats)
+               return TC_ACT_SHOT;
+       data_stats->pkts++;
+       data_stats->bytes += pkt_bytes;
+       bpf_skb_set_tunnel_key(skb, &tkey, sizeof(tkey), tun_flag);
+       *(u32 *)eth->eth_dest = tkey.remote_ipv4;
+       return bpf_redirect(ifindex, 0);
+}
+
+SEC("l4lb-demo")
+int balancer_ingress(struct __sk_buff *ctx)
+{
+       void *data_end = (void *)(long)ctx->data_end;
+       void *data = (void *)(long)ctx->data;
+       struct eth_hdr *eth = data;
+       __u32 eth_proto;
+       __u32 nh_off;
+
+       nh_off = sizeof(struct eth_hdr);
+       if (data + nh_off > data_end)
+               return TC_ACT_SHOT;
+       eth_proto = eth->eth_proto;
+       if (eth_proto == htons(ETH_P_IP))
+               return process_packet(data, nh_off, data_end, false, ctx);
+       else if (eth_proto == htons(ETH_P_IPV6))
+               return process_packet(data, nh_off, data_end, true, ctx);
+       else
+               return TC_ACT_SHOT;
+}
+char _license[] SEC("license") = "GPL";
index cada17ac00b8e6b5af37554ea8489be6ffc873a2..a0aa2009b0e0a81e65672eb795039a2172484c55 100644 (file)
@@ -80,8 +80,9 @@ static void test_hashmap(int task, void *data)
        assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == 0);
        key = 2;
        assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
-       key = 1;
-       assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
+       key = 3;
+       assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+              errno == E2BIG);
 
        /* Check that key = 0 doesn't exist. */
        key = 0;
@@ -110,6 +111,24 @@ static void test_hashmap(int task, void *data)
        close(fd);
 }
 
+static void test_hashmap_sizes(int task, void *data)
+{
+       int fd, i, j;
+
+       for (i = 1; i <= 512; i <<= 1)
+               for (j = 1; j <= 1 << 18; j <<= 1) {
+                       fd = bpf_create_map(BPF_MAP_TYPE_HASH, i, j,
+                                           2, map_flags);
+                       if (fd < 0) {
+                               printf("Failed to create hashmap key=%d value=%d '%s'\n",
+                                      i, j, strerror(errno));
+                               exit(1);
+                       }
+                       close(fd);
+                       usleep(10); /* give kernel time to destroy */
+               }
+}
+
 static void test_hashmap_percpu(int task, void *data)
 {
        unsigned int nr_cpus = bpf_num_possible_cpus();
@@ -317,7 +336,10 @@ static void test_arraymap_percpu(int task, void *data)
 static void test_arraymap_percpu_many_keys(void)
 {
        unsigned int nr_cpus = bpf_num_possible_cpus();
-       unsigned int nr_keys = 20000;
+       /* nr_keys is not too large otherwise the test stresses percpu
+        * allocator more than anything else
+        */
+       unsigned int nr_keys = 2000;
        long values[nr_cpus];
        int key, fd, i;
 
@@ -419,6 +441,7 @@ static void test_map_stress(void)
 {
        run_parallel(100, test_hashmap, NULL);
        run_parallel(100, test_hashmap_percpu, NULL);
+       run_parallel(100, test_hashmap_sizes, NULL);
 
        run_parallel(100, test_arraymap, NULL);
        run_parallel(100, test_arraymap_percpu, NULL);
diff --git a/tools/testing/selftests/bpf/test_pkt_access.c b/tools/testing/selftests/bpf/test_pkt_access.c
new file mode 100644 (file)
index 0000000..fd1e083
--- /dev/null
@@ -0,0 +1,64 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/in.h>
+#include <linux/tcp.h>
+#include <linux/pkt_cls.h>
+#include "bpf_helpers.h"
+
+#define _htons __builtin_bswap16
+#define barrier() __asm__ __volatile__("": : :"memory")
+int _version SEC("version") = 1;
+
+SEC("test1")
+int process(struct __sk_buff *skb)
+{
+       void *data_end = (void *)(long)skb->data_end;
+       void *data = (void *)(long)skb->data;
+       struct ethhdr *eth = (struct ethhdr *)(data);
+       struct tcphdr *tcp = NULL;
+       __u8 proto = 255;
+       __u64 ihl_len;
+
+       if (eth + 1 > data_end)
+               return TC_ACT_SHOT;
+
+       if (eth->h_proto == _htons(ETH_P_IP)) {
+               struct iphdr *iph = (struct iphdr *)(eth + 1);
+
+               if (iph + 1 > data_end)
+                       return TC_ACT_SHOT;
+               ihl_len = iph->ihl * 4;
+               proto = iph->protocol;
+               tcp = (struct tcphdr *)((void *)(iph) + ihl_len);
+       } else if (eth->h_proto == _htons(ETH_P_IPV6)) {
+               struct ipv6hdr *ip6h = (struct ipv6hdr *)(eth + 1);
+
+               if (ip6h + 1 > data_end)
+                       return TC_ACT_SHOT;
+               ihl_len = sizeof(*ip6h);
+               proto = ip6h->nexthdr;
+               tcp = (struct tcphdr *)((void *)(ip6h) + ihl_len);
+       }
+
+       if (tcp) {
+               if (((void *)(tcp) + 20) > data_end || proto != 6)
+                       return TC_ACT_SHOT;
+               barrier(); /* to force ordering of checks */
+               if (((void *)(tcp) + 18) > data_end)
+                       return TC_ACT_SHOT;
+               if (tcp->urg_ptr == 123)
+                       return TC_ACT_OK;
+       }
+
+       return TC_ACT_UNSPEC;
+}
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
new file mode 100644 (file)
index 0000000..5275d4a
--- /dev/null
@@ -0,0 +1,284 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <assert.h>
+#include <stdlib.h>
+
+#include <linux/types.h>
+typedef __u16 __sum16;
+#include <arpa/inet.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+
+#include <sys/wait.h>
+#include <sys/resource.h>
+
+#include <linux/bpf.h>
+#include <linux/err.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include "test_iptunnel_common.h"
+#include "bpf_util.h"
+
+#define _htons __builtin_bswap16
+
+static int error_cnt, pass_cnt;
+
+#define MAGIC_BYTES 123
+
+/* ipv4 test vector */
+static struct {
+       struct ethhdr eth;
+       struct iphdr iph;
+       struct tcphdr tcp;
+} __packed pkt_v4 = {
+       .eth.h_proto = _htons(ETH_P_IP),
+       .iph.ihl = 5,
+       .iph.protocol = 6,
+       .iph.tot_len = _htons(MAGIC_BYTES),
+       .tcp.urg_ptr = 123,
+};
+
+/* ipv6 test vector */
+static struct {
+       struct ethhdr eth;
+       struct ipv6hdr iph;
+       struct tcphdr tcp;
+} __packed pkt_v6 = {
+       .eth.h_proto = _htons(ETH_P_IPV6),
+       .iph.nexthdr = 6,
+       .iph.payload_len = _htons(MAGIC_BYTES),
+       .tcp.urg_ptr = 123,
+};
+
+#define CHECK(condition, tag, format...) ({                            \
+       int __ret = !!(condition);                                      \
+       if (__ret) {                                                    \
+               error_cnt++;                                            \
+               printf("%s:FAIL:%s ", __func__, tag);                   \
+               printf(format);                                         \
+       } else {                                                        \
+               pass_cnt++;                                             \
+               printf("%s:PASS:%s %d nsec\n", __func__, tag, duration);\
+       }                                                               \
+})
+
+static int bpf_prog_load(const char *file, enum bpf_prog_type type,
+                        struct bpf_object **pobj, int *prog_fd)
+{
+       struct bpf_program *prog;
+       struct bpf_object *obj;
+       int err;
+
+       obj = bpf_object__open(file);
+       if (IS_ERR(obj)) {
+               error_cnt++;
+               return -ENOENT;
+       }
+
+       prog = bpf_program__next(NULL, obj);
+       if (!prog) {
+               bpf_object__close(obj);
+               error_cnt++;
+               return -ENOENT;
+       }
+
+       bpf_program__set_type(prog, type);
+       err = bpf_object__load(obj);
+       if (err) {
+               bpf_object__close(obj);
+               error_cnt++;
+               return -EINVAL;
+       }
+
+       *pobj = obj;
+       *prog_fd = bpf_program__fd(prog);
+       return 0;
+}
+
+static int bpf_find_map(const char *test, struct bpf_object *obj,
+                       const char *name)
+{
+       struct bpf_map *map;
+
+       map = bpf_object__find_map_by_name(obj, name);
+       if (!map) {
+               printf("%s:FAIL:map '%s' not found\n", test, name);
+               error_cnt++;
+               return -1;
+       }
+       return bpf_map__fd(map);
+}
+
+static void test_pkt_access(void)
+{
+       const char *file = "./test_pkt_access.o";
+       struct bpf_object *obj;
+       __u32 duration, retval;
+       int err, prog_fd;
+
+       err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
+       if (err)
+               return;
+
+       err = bpf_prog_test_run(prog_fd, 100000, &pkt_v4, sizeof(pkt_v4),
+                               NULL, NULL, &retval, &duration);
+       CHECK(err || errno || retval, "ipv4",
+             "err %d errno %d retval %d duration %d\n",
+             err, errno, retval, duration);
+
+       err = bpf_prog_test_run(prog_fd, 100000, &pkt_v6, sizeof(pkt_v6),
+                               NULL, NULL, &retval, &duration);
+       CHECK(err || errno || retval, "ipv6",
+             "err %d errno %d retval %d duration %d\n",
+             err, errno, retval, duration);
+       bpf_object__close(obj);
+}
+
+static void test_xdp(void)
+{
+       struct vip key4 = {.protocol = 6, .family = AF_INET};
+       struct vip key6 = {.protocol = 6, .family = AF_INET6};
+       struct iptnl_info value4 = {.family = AF_INET};
+       struct iptnl_info value6 = {.family = AF_INET6};
+       const char *file = "./test_xdp.o";
+       struct bpf_object *obj;
+       char buf[128];
+       struct ipv6hdr *iph6 = (void *)buf + sizeof(struct ethhdr);
+       struct iphdr *iph = (void *)buf + sizeof(struct ethhdr);
+       __u32 duration, retval, size;
+       int err, prog_fd, map_fd;
+
+       err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
+       if (err)
+               return;
+
+       map_fd = bpf_find_map(__func__, obj, "vip2tnl");
+       if (map_fd < 0)
+               goto out;
+       bpf_map_update_elem(map_fd, &key4, &value4, 0);
+       bpf_map_update_elem(map_fd, &key6, &value6, 0);
+
+       err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
+                               buf, &size, &retval, &duration);
+
+       CHECK(err || errno || retval != XDP_TX || size != 74 ||
+             iph->protocol != IPPROTO_IPIP, "ipv4",
+             "err %d errno %d retval %d size %d\n",
+             err, errno, retval, size);
+
+       err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6),
+                               buf, &size, &retval, &duration);
+       CHECK(err || errno || retval != XDP_TX || size != 114 ||
+             iph6->nexthdr != IPPROTO_IPV6, "ipv6",
+             "err %d errno %d retval %d size %d\n",
+             err, errno, retval, size);
+out:
+       bpf_object__close(obj);
+}
+
+#define MAGIC_VAL 0x1234
+#define NUM_ITER 100000
+#define VIP_NUM 5
+
+static void test_l4lb(void)
+{
+       unsigned int nr_cpus = bpf_num_possible_cpus();
+       const char *file = "./test_l4lb.o";
+       struct vip key = {.protocol = 6};
+       struct vip_meta {
+               __u32 flags;
+               __u32 vip_num;
+       } value = {.vip_num = VIP_NUM};
+       __u32 stats_key = VIP_NUM;
+       struct vip_stats {
+               __u64 bytes;
+               __u64 pkts;
+       } stats[nr_cpus];
+       struct real_definition {
+               union {
+                       __be32 dst;
+                       __be32 dstv6[4];
+               };
+               __u8 flags;
+       } real_def = {.dst = MAGIC_VAL};
+       __u32 ch_key = 11, real_num = 3;
+       __u32 duration, retval, size;
+       int err, i, prog_fd, map_fd;
+       __u64 bytes = 0, pkts = 0;
+       struct bpf_object *obj;
+       char buf[128];
+       u32 *magic = (u32 *)buf;
+
+       err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
+       if (err)
+               return;
+
+       map_fd = bpf_find_map(__func__, obj, "vip_map");
+       if (map_fd < 0)
+               goto out;
+       bpf_map_update_elem(map_fd, &key, &value, 0);
+
+       map_fd = bpf_find_map(__func__, obj, "ch_rings");
+       if (map_fd < 0)
+               goto out;
+       bpf_map_update_elem(map_fd, &ch_key, &real_num, 0);
+
+       map_fd = bpf_find_map(__func__, obj, "reals");
+       if (map_fd < 0)
+               goto out;
+       bpf_map_update_elem(map_fd, &real_num, &real_def, 0);
+
+       err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4),
+                               buf, &size, &retval, &duration);
+       CHECK(err || errno || retval != 7/*TC_ACT_REDIRECT*/ || size != 54 ||
+             *magic != MAGIC_VAL, "ipv4",
+             "err %d errno %d retval %d size %d magic %x\n",
+             err, errno, retval, size, *magic);
+
+       err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6),
+                               buf, &size, &retval, &duration);
+       CHECK(err || errno || retval != 7/*TC_ACT_REDIRECT*/ || size != 74 ||
+             *magic != MAGIC_VAL, "ipv6",
+             "err %d errno %d retval %d size %d magic %x\n",
+             err, errno, retval, size, *magic);
+
+       map_fd = bpf_find_map(__func__, obj, "stats");
+       if (map_fd < 0)
+               goto out;
+       bpf_map_lookup_elem(map_fd, &stats_key, stats);
+       for (i = 0; i < nr_cpus; i++) {
+               bytes += stats[i].bytes;
+               pkts += stats[i].pkts;
+       }
+       if (bytes != MAGIC_BYTES * NUM_ITER * 2 || pkts != NUM_ITER * 2) {
+               error_cnt++;
+               printf("test_l4lb:FAIL:stats %lld %lld\n", bytes, pkts);
+       }
+out:
+       bpf_object__close(obj);
+}
+
+int main(void)
+{
+       struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
+
+       setrlimit(RLIMIT_MEMLOCK, &rinf);
+
+       test_pkt_access();
+       test_xdp();
+       test_l4lb();
+
+       printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt);
+       return 0;
+}
index d1555e4240c0fb3066c459e05f76a6f91f3b66af..f4f43c98cf7f14951ba5a9f448cc427746549b7b 100644 (file)
@@ -38,6 +38,7 @@
 
 #define MAX_INSNS      512
 #define MAX_FIXUPS     8
+#define MAX_NR_MAPS    4
 
 struct bpf_test {
        const char *descr;
@@ -45,6 +46,7 @@ struct bpf_test {
        int fixup_map1[MAX_FIXUPS];
        int fixup_map2[MAX_FIXUPS];
        int fixup_prog[MAX_FIXUPS];
+       int fixup_map_in_map[MAX_FIXUPS];
        const char *errstr;
        const char *errstr_unpriv;
        enum {
@@ -4452,7 +4454,76 @@ static struct bpf_test tests[] = {
                .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
                .result = REJECT,
                .result_unpriv = REJECT,
-       }
+       },
+       {
+               "map in map access",
+               .insns = {
+                       BPF_ST_MEM(0, BPF_REG_10, -4, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+                       BPF_ST_MEM(0, BPF_REG_10, -4, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+                       BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_MOV64_REG(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map_in_map = { 3 },
+               .result = ACCEPT,
+       },
+       {
+               "invalid inner map pointer",
+               .insns = {
+                       BPF_ST_MEM(0, BPF_REG_10, -4, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+                       BPF_ST_MEM(0, BPF_REG_10, -4, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+                       BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_MOV64_REG(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map_in_map = { 3 },
+               .errstr = "R1 type=inv expected=map_ptr",
+               .errstr_unpriv = "R1 pointer arithmetic prohibited",
+               .result = REJECT,
+       },
+       {
+               "forgot null checking on the inner map pointer",
+               .insns = {
+                       BPF_ST_MEM(0, BPF_REG_10, -4, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_ST_MEM(0, BPF_REG_10, -4, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+                       BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_MOV64_REG(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map_in_map = { 3 },
+               .errstr = "R1 type=map_value_or_null expected=map_ptr",
+               .result = REJECT,
+       },
 };
 
 static int probe_filter_length(const struct bpf_insn *fp)
@@ -4489,42 +4560,73 @@ static int create_prog_array(void)
        return fd;
 }
 
+static int create_map_in_map(void)
+{
+       int inner_map_fd, outer_map_fd;
+
+       inner_map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(int),
+                                     sizeof(int), 1, 0);
+       if (inner_map_fd < 0) {
+               printf("Failed to create array '%s'!\n", strerror(errno));
+               return inner_map_fd;
+       }
+
+       outer_map_fd = bpf_create_map_in_map(BPF_MAP_TYPE_ARRAY_OF_MAPS,
+                                            sizeof(int), inner_map_fd, 1, 0);
+       if (outer_map_fd < 0)
+               printf("Failed to create array of maps '%s'!\n",
+                      strerror(errno));
+
+       close(inner_map_fd);
+
+       return outer_map_fd;
+}
+
 static char bpf_vlog[32768];
 
 static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog,
-                         int *fd_f1, int *fd_f2, int *fd_f3)
+                         int *map_fds)
 {
        int *fixup_map1 = test->fixup_map1;
        int *fixup_map2 = test->fixup_map2;
        int *fixup_prog = test->fixup_prog;
+       int *fixup_map_in_map = test->fixup_map_in_map;
 
        /* Allocating HTs with 1 elem is fine here, since we only test
         * for verifier and not do a runtime lookup, so the only thing
         * that really matters is value size in this case.
         */
        if (*fixup_map1) {
-               *fd_f1 = create_map(sizeof(long long), 1);
+               map_fds[0] = create_map(sizeof(long long), 1);
                do {
-                       prog[*fixup_map1].imm = *fd_f1;
+                       prog[*fixup_map1].imm = map_fds[0];
                        fixup_map1++;
                } while (*fixup_map1);
        }
 
        if (*fixup_map2) {
-               *fd_f2 = create_map(sizeof(struct test_val), 1);
+               map_fds[1] = create_map(sizeof(struct test_val), 1);
                do {
-                       prog[*fixup_map2].imm = *fd_f2;
+                       prog[*fixup_map2].imm = map_fds[1];
                        fixup_map2++;
                } while (*fixup_map2);
        }
 
        if (*fixup_prog) {
-               *fd_f3 = create_prog_array();
+               map_fds[2] = create_prog_array();
                do {
-                       prog[*fixup_prog].imm = *fd_f3;
+                       prog[*fixup_prog].imm = map_fds[2];
                        fixup_prog++;
                } while (*fixup_prog);
        }
+
+       if (*fixup_map_in_map) {
+               map_fds[3] = create_map_in_map();
+               do {
+                       prog[*fixup_map_in_map].imm = map_fds[3];
+                       fixup_map_in_map++;
+               } while (*fixup_map_in_map);
+       }
 }
 
 static void do_test_single(struct bpf_test *test, bool unpriv,
@@ -4533,11 +4635,15 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
        struct bpf_insn *prog = test->insns;
        int prog_len = probe_filter_length(prog);
        int prog_type = test->prog_type;
-       int fd_f1 = -1, fd_f2 = -1, fd_f3 = -1;
+       int map_fds[MAX_NR_MAPS];
        int fd_prog, expected_ret;
        const char *expected_err;
+       int i;
+
+       for (i = 0; i < MAX_NR_MAPS; i++)
+               map_fds[i] = -1;
 
-       do_test_fixup(test, prog, &fd_f1, &fd_f2, &fd_f3);
+       do_test_fixup(test, prog, map_fds);
 
        fd_prog = bpf_load_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER,
                                   prog, prog_len, "GPL", 0, bpf_vlog,
@@ -4568,9 +4674,8 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
        printf("OK\n");
 close_fds:
        close(fd_prog);
-       close(fd_f1);
-       close(fd_f2);
-       close(fd_f3);
+       for (i = 0; i < MAX_NR_MAPS; i++)
+               close(map_fds[i]);
        sched_yield();
        return;
 fail_log:
diff --git a/tools/testing/selftests/bpf/test_xdp.c b/tools/testing/selftests/bpf/test_xdp.c
new file mode 100644 (file)
index 0000000..9a33b03
--- /dev/null
@@ -0,0 +1,236 @@
+/* Copyright (c) 2016,2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/in.h>
+#include <linux/udp.h>
+#include <linux/tcp.h>
+#include <linux/pkt_cls.h>
+#include <sys/socket.h>
+#include "bpf_helpers.h"
+#include "test_iptunnel_common.h"
+
+#define htons __builtin_bswap16
+#define ntohs __builtin_bswap16
+int _version SEC("version") = 1;
+
+struct bpf_map_def SEC("maps") rxcnt = {
+       .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+       .key_size = sizeof(__u32),
+       .value_size = sizeof(__u64),
+       .max_entries = 256,
+};
+
+struct bpf_map_def SEC("maps") vip2tnl = {
+       .type = BPF_MAP_TYPE_HASH,
+       .key_size = sizeof(struct vip),
+       .value_size = sizeof(struct iptnl_info),
+       .max_entries = MAX_IPTNL_ENTRIES,
+};
+
+static __always_inline void count_tx(__u32 protocol)
+{
+       __u64 *rxcnt_count;
+
+       rxcnt_count = bpf_map_lookup_elem(&rxcnt, &protocol);
+       if (rxcnt_count)
+               *rxcnt_count += 1;
+}
+
+static __always_inline int get_dport(void *trans_data, void *data_end,
+                                    __u8 protocol)
+{
+       struct tcphdr *th;
+       struct udphdr *uh;
+
+       switch (protocol) {
+       case IPPROTO_TCP:
+               th = (struct tcphdr *)trans_data;
+               if (th + 1 > data_end)
+                       return -1;
+               return th->dest;
+       case IPPROTO_UDP:
+               uh = (struct udphdr *)trans_data;
+               if (uh + 1 > data_end)
+                       return -1;
+               return uh->dest;
+       default:
+               return 0;
+       }
+}
+
+static __always_inline void set_ethhdr(struct ethhdr *new_eth,
+                                      const struct ethhdr *old_eth,
+                                      const struct iptnl_info *tnl,
+                                      __be16 h_proto)
+{
+       memcpy(new_eth->h_source, old_eth->h_dest, sizeof(new_eth->h_source));
+       memcpy(new_eth->h_dest, tnl->dmac, sizeof(new_eth->h_dest));
+       new_eth->h_proto = h_proto;
+}
+
+static __always_inline int handle_ipv4(struct xdp_md *xdp)
+{
+       void *data_end = (void *)(long)xdp->data_end;
+       void *data = (void *)(long)xdp->data;
+       struct iptnl_info *tnl;
+       struct ethhdr *new_eth;
+       struct ethhdr *old_eth;
+       struct iphdr *iph = data + sizeof(struct ethhdr);
+       __u16 *next_iph;
+       __u16 payload_len;
+       struct vip vip = {};
+       int dport;
+       __u32 csum = 0;
+       int i;
+
+       if (iph + 1 > data_end)
+               return XDP_DROP;
+
+       dport = get_dport(iph + 1, data_end, iph->protocol);
+       if (dport == -1)
+               return XDP_DROP;
+
+       vip.protocol = iph->protocol;
+       vip.family = AF_INET;
+       vip.daddr.v4 = iph->daddr;
+       vip.dport = dport;
+       payload_len = ntohs(iph->tot_len);
+
+       tnl = bpf_map_lookup_elem(&vip2tnl, &vip);
+       /* It only does v4-in-v4 */
+       if (!tnl || tnl->family != AF_INET)
+               return XDP_PASS;
+
+       if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr)))
+               return XDP_DROP;
+
+       data = (void *)(long)xdp->data;
+       data_end = (void *)(long)xdp->data_end;
+
+       new_eth = data;
+       iph = data + sizeof(*new_eth);
+       old_eth = data + sizeof(*iph);
+
+       if (new_eth + 1 > data_end ||
+           old_eth + 1 > data_end ||
+           iph + 1 > data_end)
+               return XDP_DROP;
+
+       set_ethhdr(new_eth, old_eth, tnl, htons(ETH_P_IP));
+
+       iph->version = 4;
+       iph->ihl = sizeof(*iph) >> 2;
+       iph->frag_off = 0;
+       iph->protocol = IPPROTO_IPIP;
+       iph->check = 0;
+       iph->tos = 0;
+       iph->tot_len = htons(payload_len + sizeof(*iph));
+       iph->daddr = tnl->daddr.v4;
+       iph->saddr = tnl->saddr.v4;
+       iph->ttl = 8;
+
+       next_iph = (__u16 *)iph;
+#pragma clang loop unroll(full)
+       for (i = 0; i < sizeof(*iph) >> 1; i++)
+               csum += *next_iph++;
+
+       iph->check = ~((csum & 0xffff) + (csum >> 16));
+
+       count_tx(vip.protocol);
+
+       return XDP_TX;
+}
+
+static __always_inline int handle_ipv6(struct xdp_md *xdp)
+{
+       void *data_end = (void *)(long)xdp->data_end;
+       void *data = (void *)(long)xdp->data;
+       struct iptnl_info *tnl;
+       struct ethhdr *new_eth;
+       struct ethhdr *old_eth;
+       struct ipv6hdr *ip6h = data + sizeof(struct ethhdr);
+       __u16 payload_len;
+       struct vip vip = {};
+       int dport;
+
+       if (ip6h + 1 > data_end)
+               return XDP_DROP;
+
+       dport = get_dport(ip6h + 1, data_end, ip6h->nexthdr);
+       if (dport == -1)
+               return XDP_DROP;
+
+       vip.protocol = ip6h->nexthdr;
+       vip.family = AF_INET6;
+       memcpy(vip.daddr.v6, ip6h->daddr.s6_addr32, sizeof(vip.daddr));
+       vip.dport = dport;
+       payload_len = ip6h->payload_len;
+
+       tnl = bpf_map_lookup_elem(&vip2tnl, &vip);
+       /* It only does v6-in-v6 */
+       if (!tnl || tnl->family != AF_INET6)
+               return XDP_PASS;
+
+       if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr)))
+               return XDP_DROP;
+
+       data = (void *)(long)xdp->data;
+       data_end = (void *)(long)xdp->data_end;
+
+       new_eth = data;
+       ip6h = data + sizeof(*new_eth);
+       old_eth = data + sizeof(*ip6h);
+
+       if (new_eth + 1 > data_end || old_eth + 1 > data_end ||
+           ip6h + 1 > data_end)
+               return XDP_DROP;
+
+       set_ethhdr(new_eth, old_eth, tnl, htons(ETH_P_IPV6));
+
+       ip6h->version = 6;
+       ip6h->priority = 0;
+       memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl));
+       ip6h->payload_len = htons(ntohs(payload_len) + sizeof(*ip6h));
+       ip6h->nexthdr = IPPROTO_IPV6;
+       ip6h->hop_limit = 8;
+       memcpy(ip6h->saddr.s6_addr32, tnl->saddr.v6, sizeof(tnl->saddr.v6));
+       memcpy(ip6h->daddr.s6_addr32, tnl->daddr.v6, sizeof(tnl->daddr.v6));
+
+       count_tx(vip.protocol);
+
+       return XDP_TX;
+}
+
+SEC("xdp_tx_iptunnel")
+int _xdp_tx_iptunnel(struct xdp_md *xdp)
+{
+       void *data_end = (void *)(long)xdp->data_end;
+       void *data = (void *)(long)xdp->data;
+       struct ethhdr *eth = data;
+       __u16 h_proto;
+
+       if (eth + 1 > data_end)
+               return XDP_DROP;
+
+       h_proto = eth->h_proto;
+
+       if (h_proto == htons(ETH_P_IP))
+               return handle_ipv4(xdp);
+       else if (h_proto == htons(ETH_P_IPV6))
+
+               return handle_ipv6(xdp);
+       else
+               return XDP_DROP;
+}
+
+char _license[] SEC("license") = "GPL";