summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/bpf/instruction-set.rst156
-rw-r--r--Documentation/networking/device_drivers/can/freescale/flexcan.rst54
-rw-r--r--Documentation/networking/device_drivers/can/index.rst20
-rw-r--r--Documentation/networking/device_drivers/ethernet/amazon/ena.rst2
-rw-r--r--Documentation/networking/device_drivers/index.rst1
-rw-r--r--Documentation/networking/devlink/mlx5.rst1
-rw-r--r--arch/arm64/net/bpf_jit_comp.c5
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c6
-rw-r--r--drivers/net/can/Makefile2
-rw-r--r--drivers/net/can/flexcan/Makefile7
-rw-r--r--drivers/net/can/flexcan/flexcan-core.c (renamed from drivers/net/can/flexcan.c)234
-rw-r--r--drivers/net/can/flexcan/flexcan-ethtool.c114
-rw-r--r--drivers/net/can/flexcan/flexcan.h163
-rw-r--r--drivers/net/can/janz-ican3.c2
-rw-r--r--drivers/net/can/spi/mcp251xfd/Makefile5
-rw-r--r--drivers/net/can/spi/mcp251xfd/mcp251xfd-chip-fifo.c119
-rw-r--r--drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c1083
-rw-r--r--drivers/net/can/spi/mcp251xfd/mcp251xfd-regmap.c1
-rw-r--r--drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c269
-rw-r--r--drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c260
-rw-r--r--drivers/net/can/spi/mcp251xfd/mcp251xfd-tef.c260
-rw-r--r--drivers/net/can/spi/mcp251xfd/mcp251xfd-tx.c205
-rw-r--r--drivers/net/can/spi/mcp251xfd/mcp251xfd.h36
-rw-r--r--drivers/net/dsa/ocelot/felix.c12
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_admin_defs.h10
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_com.c8
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_com.h13
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_ethtool.c15
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_netdev.c125
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_netdev.h24
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c20
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c9
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_adminq.c29
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h4
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_common.c15
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_prototype.h14
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_status.h2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c34
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_adminq.c4
-rw-r--r--drivers/net/ethernet/intel/ice/ice_common.c2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_eswitch.c169
-rw-r--r--drivers/net/ethernet/intel/ice/ice_eswitch.h25
-rw-r--r--drivers/net/ethernet/intel/ice/ice_flex_pipe.c6
-rw-r--r--drivers/net/ethernet/intel/ice/ice_fltr.c80
-rw-r--r--drivers/net/ethernet/intel/ice/ice_fltr.h3
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c24
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ptp.c2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_repr.c17
-rw-r--r--drivers/net/ethernet/intel/ice/ice_repr.h5
-rw-r--r--drivers/net/ethernet/intel/ice/ice_switch.c96
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx_lib.c3
-rw-r--r--drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Makefile2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h13
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c19
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rx.c20
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.c101
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eq.c104
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c226
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h30
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c308
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h39
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/Kconfig2
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/pci.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/reg.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.c97
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.h6
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum2_mr_tcam.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c351
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c46
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h6
-rw-r--r--drivers/net/ethernet/mscc/ocelot.c63
-rw-r--r--include/linux/bpf.h20
-rw-r--r--include/linux/filter.h4
-rw-r--r--include/linux/mlx5/eq.h4
-rw-r--r--include/net/page_pool.h11
-rw-r--r--include/net/sock.h1
-rw-r--r--include/net/xdp.h3
-rw-r--r--include/soc/mscc/ocelot.h1
-rw-r--r--kernel/bpf/cpumap.c8
-rw-r--r--kernel/bpf/devmap.c32
-rw-r--r--kernel/bpf/verifier.c13
-rw-r--r--net/core/filter.c81
-rw-r--r--net/core/page_pool.c6
-rw-r--r--net/core/sock_map.c21
-rw-r--r--net/core/xdp.c94
-rw-r--r--net/ipv4/af_inet.c2
-rw-r--r--net/ipv4/ping.c1
-rw-r--r--net/ipv4/tcp_bpf.c27
-rw-r--r--net/ipv4/tcp_ipv4.c1
-rw-r--r--net/ipv4/udp.c1
-rw-r--r--net/ipv6/af_inet6.c2
-rw-r--r--net/ipv6/ping.c1
-rw-r--r--net/ipv6/tcp_ipv6.c1
-rw-r--r--net/ipv6/udp.c1
-rw-r--r--net/mptcp/options.c109
-rw-r--r--net/mptcp/pm.c34
-rw-r--r--net/mptcp/pm_netlink.c197
-rw-r--r--net/mptcp/protocol.c307
-rw-r--r--net/mptcp/protocol.h64
-rw-r--r--net/mptcp/sockopt.c24
-rw-r--r--net/mptcp/subflow.c25
-rw-r--r--net/mptcp/token.c1
-rw-r--r--net/packet/af_packet.c27
-rw-r--r--net/sched/sch_cake.c40
-rw-r--r--net/tls/tls_sw.c1
-rw-r--r--samples/bpf/xdpsock_user.c363
-rw-r--r--tools/bpf/bpftool/feature.c109
-rw-r--r--tools/bpf/bpftool/prog.c2
-rw-r--r--tools/lib/bpf/bpf.c8
-rw-r--r--tools/lib/bpf/bpf.h115
-rw-r--r--tools/lib/bpf/libbpf.c29
-rw-r--r--tools/lib/bpf/libbpf.h5
-rw-r--r--tools/testing/selftests/bpf/prog_tests/d_path.c22
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_redirect.c7
-rw-r--r--tools/testing/selftests/bpf/progs/loop3.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_d_path_check_rdonly_mem.c28
-rw-r--r--tools/testing/selftests/bpf/test_sock.c370
-rw-r--r--tools/testing/selftests/bpf/verifier/spill_fill.c28
-rw-r--r--tools/testing/selftests/net/mptcp/config1
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_connect.c148
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect.sh39
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh203
131 files changed, 5282 insertions, 2643 deletions
diff --git a/Documentation/bpf/instruction-set.rst b/Documentation/bpf/instruction-set.rst
index 1af51143ff9f..3704836fe6df 100644
--- a/Documentation/bpf/instruction-set.rst
+++ b/Documentation/bpf/instruction-set.rst
@@ -19,23 +19,37 @@ The eBPF calling convention is defined as:
R0 - R5 are scratch registers and eBPF programs needs to spill/fill them if
necessary across calls.
+Instruction encoding
+====================
+
+eBPF uses 64-bit instructions with the following encoding:
+
+ ============= ======= =============== ==================== ============
+ 32 bits (MSB) 16 bits 4 bits 4 bits 8 bits (LSB)
+ ============= ======= =============== ==================== ============
+ immediate offset source register destination register opcode
+ ============= ======= =============== ==================== ============
+
+Note that most instructions do not use all of the fields.
+Unused fields shall be cleared to zero.
+
Instruction classes
-===================
+-------------------
The three LSB bits of the 'opcode' field store the instruction class:
- ========= =====
- class value
- ========= =====
- BPF_LD 0x00
- BPF_LDX 0x01
- BPF_ST 0x02
- BPF_STX 0x03
- BPF_ALU 0x04
- BPF_JMP 0x05
- BPF_JMP32 0x06
- BPF_ALU64 0x07
- ========= =====
+ ========= ===== ===============================
+ class value description
+ ========= ===== ===============================
+ BPF_LD 0x00 non-standard load operations
+ BPF_LDX 0x01 load into register operations
+ BPF_ST 0x02 store from immediate operations
+ BPF_STX 0x03 store from register operations
+ BPF_ALU 0x04 32-bit arithmetic operations
+ BPF_JMP 0x05 64-bit jump operations
+ BPF_JMP32 0x06 32-bit jump operations
+ BPF_ALU64 0x07 64-bit arithmetic operations
+ ========= ===== ===============================
Arithmetic and jump instructions
================================
@@ -60,66 +74,78 @@ The 4th bit encodes the source operand:
The four MSB bits store the operation code.
-For class BPF_ALU or BPF_ALU64:
- ======== ===== =========================
+Arithmetic instructions
+-----------------------
+
+BPF_ALU uses 32-bit wide operands while BPF_ALU64 uses 64-bit wide operands for
+otherwise identical operations.
+The code field encodes the operation as below:
+
+ ======== ===== ==========================
code value description
- ======== ===== =========================
- BPF_ADD 0x00
- BPF_SUB 0x10
- BPF_MUL 0x20
- BPF_DIV 0x30
- BPF_OR 0x40
- BPF_AND 0x50
- BPF_LSH 0x60
- BPF_RSH 0x70
- BPF_NEG 0x80
- BPF_MOD 0x90
- BPF_XOR 0xa0
- BPF_MOV 0xb0 mov reg to reg
+ ======== ===== ==========================
+ BPF_ADD 0x00 dst += src
+ BPF_SUB 0x10 dst -= src
+ BPF_MUL 0x20 dst \*= src
+ BPF_DIV 0x30 dst /= src
+ BPF_OR 0x40 dst \|= src
+ BPF_AND 0x50 dst &= src
+ BPF_LSH 0x60 dst <<= src
+ BPF_RSH 0x70 dst >>= src
+ BPF_NEG 0x80 dst = ~src
+ BPF_MOD 0x90 dst %= src
+ BPF_XOR 0xa0 dst ^= src
+ BPF_MOV 0xb0 dst = src
BPF_ARSH 0xc0 sign extending shift right
BPF_END 0xd0 endianness conversion
- ======== ===== =========================
+ ======== ===== ==========================
-For class BPF_JMP or BPF_JMP32:
+BPF_ADD | BPF_X | BPF_ALU means::
- ======== ===== =========================
- code value description
- ======== ===== =========================
- BPF_JA 0x00 BPF_JMP only
- BPF_JEQ 0x10
- BPF_JGT 0x20
- BPF_JGE 0x30
- BPF_JSET 0x40
- BPF_JNE 0x50 jump '!='
- BPF_JSGT 0x60 signed '>'
- BPF_JSGE 0x70 signed '>='
- BPF_CALL 0x80 function call
- BPF_EXIT 0x90 function return
- BPF_JLT 0xa0 unsigned '<'
- BPF_JLE 0xb0 unsigned '<='
- BPF_JSLT 0xc0 signed '<'
- BPF_JSLE 0xd0 signed '<='
- ======== ===== =========================
+ dst_reg = (u32) dst_reg + (u32) src_reg;
-So BPF_ADD | BPF_X | BPF_ALU means::
+BPF_ADD | BPF_X | BPF_ALU64 means::
- dst_reg = (u32) dst_reg + (u32) src_reg;
+ dst_reg = dst_reg + src_reg
-Similarly, BPF_XOR | BPF_K | BPF_ALU means::
+BPF_XOR | BPF_K | BPF_ALU means::
src_reg = (u32) src_reg ^ (u32) imm32
-eBPF is using BPF_MOV | BPF_X | BPF_ALU to represent A = B moves. BPF_ALU64
-is used to mean exactly the same operations as BPF_ALU, but with 64-bit wide
-operands instead. So BPF_ADD | BPF_X | BPF_ALU64 means 64-bit addition, i.e.::
+BPF_XOR | BPF_K | BPF_ALU64 means::
- dst_reg = dst_reg + src_reg
+ src_reg = src_reg ^ imm32
+
+
+Jump instructions
+-----------------
-BPF_JMP | BPF_EXIT means function exit only. The eBPF program needs to store
-the return value into register R0 before doing a BPF_EXIT. Class 6 is used as
-BPF_JMP32 to mean exactly the same operations as BPF_JMP, but with 32-bit wide
-operands for the comparisons instead.
+BPF_JMP32 uses 32-bit wide operands while BPF_JMP uses 64-bit wide operands for
+otherwise identical operations.
+The code field encodes the operation as below:
+
+ ======== ===== ========================= ============
+ code value description notes
+ ======== ===== ========================= ============
+ BPF_JA 0x00 PC += off BPF_JMP only
+ BPF_JEQ 0x10 PC += off if dst == src
+ BPF_JGT 0x20 PC += off if dst > src unsigned
+ BPF_JGE 0x30 PC += off if dst >= src unsigned
+ BPF_JSET 0x40 PC += off if dst & src
+ BPF_JNE 0x50 PC += off if dst != src
+ BPF_JSGT 0x60 PC += off if dst > src signed
+ BPF_JSGE 0x70 PC += off if dst >= src signed
+ BPF_CALL 0x80 function call
+ BPF_EXIT 0x90 function / program return BPF_JMP only
+ BPF_JLT 0xa0 PC += off if dst < src unsigned
+ BPF_JLE 0xb0 PC += off if dst <= src unsigned
+ BPF_JSLT 0xc0 PC += off if dst < src signed
+ BPF_JSLE 0xd0 PC += off if dst <= src signed
+ ======== ===== ========================= ============
+
+The eBPF program needs to store the return value into register R0 before doing a
+BPF_EXIT.
Load and store instructions
@@ -147,15 +173,15 @@ The size modifier is one of:
The mode modifier is one of:
- ============= ===== =====================
+ ============= ===== ====================================
mode modifier value description
- ============= ===== =====================
+ ============= ===== ====================================
BPF_IMM 0x00 used for 64-bit mov
- BPF_ABS 0x20
- BPF_IND 0x40
- BPF_MEM 0x60
+ BPF_ABS 0x20 legacy BPF packet access
+ BPF_IND 0x40 legacy BPF packet access
+ BPF_MEM 0x60 all normal load and store operations
BPF_ATOMIC 0xc0 atomic operations
- ============= ===== =====================
+ ============= ===== ====================================
BPF_MEM | <size> | BPF_STX means::
diff --git a/Documentation/networking/device_drivers/can/freescale/flexcan.rst b/Documentation/networking/device_drivers/can/freescale/flexcan.rst
new file mode 100644
index 000000000000..4e3eec6cecd2
--- /dev/null
+++ b/Documentation/networking/device_drivers/can/freescale/flexcan.rst
@@ -0,0 +1,54 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+=============================
+Flexcan CAN Controller driver
+=============================
+
+Authors: Marc Kleine-Budde <mkl@pengutronix.de>,
+Dario Binacchi <dario.binacchi@amarula.solutions.com>
+
+On/off RTR frames reception
+===========================
+
+For most flexcan IP cores the driver supports 2 RX modes:
+
+- FIFO
+- mailbox
+
+The older flexcan cores (integrated into the i.MX25, i.MX28, i.MX35
+and i.MX53 SOCs) only receive RTR frames if the controller is
+configured for RX-FIFO mode.
+
+The RX FIFO mode uses a hardware FIFO with a depth of 6 CAN frames,
+while the mailbox mode uses a software FIFO with a depth of up to 62
+CAN frames. With the help of the bigger buffer, the mailbox mode
+performs better under high system load situations.
+
+As reception of RTR frames is part of the CAN standard, all flexcan
+cores come up in a mode where RTR reception is possible.
+
+With the "rx-rtr" private flag the ability to receive RTR frames can
+be waived at the expense of losing the ability to receive RTR
+messages. This trade off is beneficial in certain use cases.
+
+"rx-rtr" on
+ Receive RTR frames. (default)
+
+ The CAN controller can and will receive RTR frames.
+
+ On some IP cores the controller cannot receive RTR frames in the
+ more performant "RX mailbox" mode and will use "RX FIFO" mode
+ instead.
+
+"rx-rtr" off
+
+ Waive ability to receive RTR frames. (not supported on all IP cores)
+
+ This mode activates the "RX mailbox mode" for better performance, on
+ some IP cores RTR frames cannot be received anymore.
+
+The setting can only be changed if the interface is down::
+
+ ip link set dev can0 down
+ ethtool --set-priv-flags can0 rx-rtr {off|on}
+ ip link set dev can0 up
diff --git a/Documentation/networking/device_drivers/can/index.rst b/Documentation/networking/device_drivers/can/index.rst
new file mode 100644
index 000000000000..58b6e0ad3030
--- /dev/null
+++ b/Documentation/networking/device_drivers/can/index.rst
@@ -0,0 +1,20 @@
+.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+
+Controller Area Network (CAN) Device Drivers
+============================================
+
+Device drivers for CAN devices.
+
+Contents:
+
+.. toctree::
+ :maxdepth: 2
+
+ freescale/flexcan
+
+.. only:: subproject and html
+
+ Indices
+ =======
+
+ * :ref:`genindex`
diff --git a/Documentation/networking/device_drivers/ethernet/amazon/ena.rst b/Documentation/networking/device_drivers/ethernet/amazon/ena.rst
index 01b2a69b0cb0..8bcb173e0353 100644
--- a/Documentation/networking/device_drivers/ethernet/amazon/ena.rst
+++ b/Documentation/networking/device_drivers/ethernet/amazon/ena.rst
@@ -135,7 +135,7 @@ The ENA driver supports two Queue Operation modes for Tx SQs:
- **Low Latency Queue (LLQ) mode or "push-mode":**
In this mode the driver pushes the transmit descriptors and the
- first 128 bytes of the packet directly to the ENA device memory
+ first 96 bytes of the packet directly to the ENA device memory
space. The rest of the packet payload is fetched by the
device. For this operation mode, the driver uses a dedicated PCI
device memory BAR, which is mapped with write-combine capability.
diff --git a/Documentation/networking/device_drivers/index.rst b/Documentation/networking/device_drivers/index.rst
index 3a5a1d46e77e..5f5cfdb2a300 100644
--- a/Documentation/networking/device_drivers/index.rst
+++ b/Documentation/networking/device_drivers/index.rst
@@ -11,6 +11,7 @@ Contents:
appletalk/index
atm/index
cable/index
+ can/index
cellular/index
ethernet/index
fddi/index
diff --git a/Documentation/networking/devlink/mlx5.rst b/Documentation/networking/devlink/mlx5.rst
index 38e94ed65936..29ad304e6fba 100644
--- a/Documentation/networking/devlink/mlx5.rst
+++ b/Documentation/networking/devlink/mlx5.rst
@@ -17,6 +17,7 @@ Parameters
- Validation
* - ``enable_roce``
- driverinit
+ - Type: Boolean
* - ``io_eq_size``
- driverinit
- The range is between 64 and 4096.
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 07aad85848fa..e96d4d87291f 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -792,7 +792,10 @@ emit_cond_jmp:
u64 imm64;
imm64 = (u64)insn1.imm << 32 | (u32)imm;
- emit_a64_mov_i64(dst, imm64, ctx);
+ if (bpf_pseudo_func(insn))
+ emit_addr_mov_i64(dst, imm64, ctx);
+ else
+ emit_a64_mov_i64(dst, imm64, ctx);
return 1;
}
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 91eb615b89ee..86842cd580ba 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -1541,16 +1541,10 @@ int mlx5r_odp_create_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq)
eq->irq_nb.notifier_call = mlx5_ib_eq_pf_int;
param = (struct mlx5_eq_param) {
- .irq_index = MLX5_IRQ_EQ_CTRL,
.nent = MLX5_IB_NUM_PF_EQE,
};
param.mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_FAULT;
- if (!zalloc_cpumask_var(&param.affinity, GFP_KERNEL)) {
- err = -ENOMEM;
- goto err_wq;
- }
eq->core = mlx5_eq_create_generic(dev->mdev, &param);
- free_cpumask_var(param.affinity);
if (IS_ERR(eq->core)) {
err = PTR_ERR(eq->core);
goto err_wq;
diff --git a/drivers/net/can/Makefile b/drivers/net/can/Makefile
index a2b4463d8480..1e660afcb61b 100644
--- a/drivers/net/can/Makefile
+++ b/drivers/net/can/Makefile
@@ -16,7 +16,7 @@ obj-y += softing/
obj-$(CONFIG_CAN_AT91) += at91_can.o
obj-$(CONFIG_CAN_CC770) += cc770/
obj-$(CONFIG_CAN_C_CAN) += c_can/
-obj-$(CONFIG_CAN_FLEXCAN) += flexcan.o
+obj-$(CONFIG_CAN_FLEXCAN) += flexcan/
obj-$(CONFIG_CAN_GRCAN) += grcan.o
obj-$(CONFIG_CAN_IFI_CANFD) += ifi_canfd/
obj-$(CONFIG_CAN_JANZ_ICAN3) += janz-ican3.o
diff --git a/drivers/net/can/flexcan/Makefile b/drivers/net/can/flexcan/Makefile
new file mode 100644
index 000000000000..89d5695c902e
--- /dev/null
+++ b/drivers/net/can/flexcan/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_CAN_FLEXCAN) += flexcan.o
+
+flexcan-objs :=
+flexcan-objs += flexcan-core.o
+flexcan-objs += flexcan-ethtool.o
diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan/flexcan-core.c
index 12b60ad95b02..0bff1884d5cc 100644
--- a/drivers/net/can/flexcan.c
+++ b/drivers/net/can/flexcan/flexcan-core.c
@@ -15,7 +15,6 @@
#include <linux/can/dev.h>
#include <linux/can/error.h>
#include <linux/can/led.h>
-#include <linux/can/rx-offload.h>
#include <linux/clk.h>
#include <linux/delay.h>
#include <linux/firmware/imx/sci.h>
@@ -33,6 +32,8 @@
#include <linux/regmap.h>
#include <linux/regulator/consumer.h>
+#include "flexcan.h"
+
#define DRV_NAME "flexcan"
/* 8 for RX fifo and 2 error handling */
@@ -173,9 +174,9 @@
/* FLEXCAN interrupt flag register (IFLAG) bits */
/* Errata ERR005829 step7: Reserve first valid MB */
-#define FLEXCAN_TX_MB_RESERVED_OFF_FIFO 8
-#define FLEXCAN_TX_MB_RESERVED_OFF_TIMESTAMP 0
-#define FLEXCAN_RX_MB_OFF_TIMESTAMP_FIRST (FLEXCAN_TX_MB_RESERVED_OFF_TIMESTAMP + 1)
+#define FLEXCAN_TX_MB_RESERVED_RX_FIFO 8
+#define FLEXCAN_TX_MB_RESERVED_RX_MAILBOX 0
+#define FLEXCAN_RX_MB_RX_MAILBOX_FIRST (FLEXCAN_TX_MB_RESERVED_RX_MAILBOX + 1)
#define FLEXCAN_IFLAG_MB(x) BIT_ULL(x)
#define FLEXCAN_IFLAG_RX_FIFO_OVERFLOW BIT(7)
#define FLEXCAN_IFLAG_RX_FIFO_WARN BIT(6)
@@ -206,53 +207,6 @@
#define FLEXCAN_TIMEOUT_US (250)
-/* FLEXCAN hardware feature flags
- *
- * Below is some version info we got:
- * SOC Version IP-Version Glitch- [TR]WRN_INT IRQ Err Memory err RTR rece- FD Mode MB
- * Filter? connected? Passive detection ption in MB Supported?
- * MCF5441X FlexCAN2 ? no yes no no yes no 16
- * MX25 FlexCAN2 03.00.00.00 no no no no no no 64
- * MX28 FlexCAN2 03.00.04.00 yes yes no no no no 64
- * MX35 FlexCAN2 03.00.00.00 no no no no no no 64
- * MX53 FlexCAN2 03.00.00.00 yes no no no no no 64
- * MX6s FlexCAN3 10.00.12.00 yes yes no no yes no 64
- * MX8QM FlexCAN3 03.00.23.00 yes yes no no yes yes 64
- * MX8MP FlexCAN3 03.00.17.01 yes yes no yes yes yes 64
- * VF610 FlexCAN3 ? no yes no yes yes? no 64
- * LS1021A FlexCAN2 03.00.04.00 no yes no no yes no 64
- * LX2160A FlexCAN3 03.00.23.00 no yes no yes yes yes 64
- *
- * Some SOCs do not have the RX_WARN & TX_WARN interrupt line connected.
- */
-
-/* [TR]WRN_INT not connected */
-#define FLEXCAN_QUIRK_BROKEN_WERR_STATE BIT(1)
- /* Disable RX FIFO Global mask */
-#define FLEXCAN_QUIRK_DISABLE_RXFG BIT(2)
-/* Enable EACEN and RRS bit in ctrl2 */
-#define FLEXCAN_QUIRK_ENABLE_EACEN_RRS BIT(3)
-/* Disable non-correctable errors interrupt and freeze mode */
-#define FLEXCAN_QUIRK_DISABLE_MECR BIT(4)
-/* Use timestamp based offloading */
-#define FLEXCAN_QUIRK_USE_OFF_TIMESTAMP BIT(5)
-/* No interrupt for error passive */
-#define FLEXCAN_QUIRK_BROKEN_PERR_STATE BIT(6)
-/* default to BE register access */
-#define FLEXCAN_QUIRK_DEFAULT_BIG_ENDIAN BIT(7)
-/* Setup stop mode with GPR to support wakeup */
-#define FLEXCAN_QUIRK_SETUP_STOP_MODE_GPR BIT(8)
-/* Support CAN-FD mode */
-#define FLEXCAN_QUIRK_SUPPORT_FD BIT(9)
-/* support memory detection and correction */
-#define FLEXCAN_QUIRK_SUPPORT_ECC BIT(10)
-/* Setup stop mode with SCU firmware to support wakeup */
-#define FLEXCAN_QUIRK_SETUP_STOP_MODE_SCFW BIT(11)
-/* Setup 3 separate interrupts, main, boff and err */
-#define FLEXCAN_QUIRK_NR_IRQ_3 BIT(12)
-/* Setup 16 mailboxes */
-#define FLEXCAN_QUIRK_NR_MB_16 BIT(13)
-
/* Structure of the message buffer */
struct flexcan_mb {
u32 can_ctrl;
@@ -339,106 +293,80 @@ struct flexcan_regs {
static_assert(sizeof(struct flexcan_regs) == 0x4 * 18 + 0xfb8);
-struct flexcan_devtype_data {
- u32 quirks; /* quirks needed for different IP cores */
-};
-
-struct flexcan_stop_mode {
- struct regmap *gpr;
- u8 req_gpr;
- u8 req_bit;
-};
-
-struct flexcan_priv {
- struct can_priv can;
- struct can_rx_offload offload;
- struct device *dev;
-
- struct flexcan_regs __iomem *regs;
- struct flexcan_mb __iomem *tx_mb;
- struct flexcan_mb __iomem *tx_mb_reserved;
- u8 tx_mb_idx;
- u8 mb_count;
- u8 mb_size;
- u8 clk_src; /* clock source of CAN Protocol Engine */
- u8 scu_idx;
-
- u64 rx_mask;
- u64 tx_mask;
- u32 reg_ctrl_default;
-
- struct clk *clk_ipg;
- struct clk *clk_per;
- const struct flexcan_devtype_data *devtype_data;
- struct regulator *reg_xceiver;
- struct flexcan_stop_mode stm;
-
- int irq_boff;
- int irq_err;
-
- /* IPC handle when setup stop mode by System Controller firmware(scfw) */
- struct imx_sc_ipc *sc_ipc_handle;
-
- /* Read and Write APIs */
- u32 (*read)(void __iomem *addr);
- void (*write)(u32 val, void __iomem *addr);
-};
-
static const struct flexcan_devtype_data fsl_mcf5441x_devtype_data = {
.quirks = FLEXCAN_QUIRK_BROKEN_PERR_STATE |
- FLEXCAN_QUIRK_NR_IRQ_3 | FLEXCAN_QUIRK_NR_MB_16,
+ FLEXCAN_QUIRK_NR_IRQ_3 | FLEXCAN_QUIRK_NR_MB_16 |
+ FLEXCAN_QUIRK_SUPPPORT_RX_FIFO,
};
static const struct flexcan_devtype_data fsl_p1010_devtype_data = {
.quirks = FLEXCAN_QUIRK_BROKEN_WERR_STATE |
FLEXCAN_QUIRK_BROKEN_PERR_STATE |
- FLEXCAN_QUIRK_DEFAULT_BIG_ENDIAN,
+ FLEXCAN_QUIRK_DEFAULT_BIG_ENDIAN |
+ FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX |
+ FLEXCAN_QUIRK_SUPPPORT_RX_FIFO,
};
static const struct flexcan_devtype_data fsl_imx25_devtype_data = {
.quirks = FLEXCAN_QUIRK_BROKEN_WERR_STATE |
- FLEXCAN_QUIRK_BROKEN_PERR_STATE,
+ FLEXCAN_QUIRK_BROKEN_PERR_STATE |
+ FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX |
+ FLEXCAN_QUIRK_SUPPPORT_RX_FIFO,
};
static const struct flexcan_devtype_data fsl_imx28_devtype_data = {
- .quirks = FLEXCAN_QUIRK_BROKEN_PERR_STATE,
+ .quirks = FLEXCAN_QUIRK_BROKEN_PERR_STATE |
+ FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX |
+ FLEXCAN_QUIRK_SUPPPORT_RX_FIFO,
};
static const struct flexcan_devtype_data fsl_imx6q_devtype_data = {
.quirks = FLEXCAN_QUIRK_DISABLE_RXFG | FLEXCAN_QUIRK_ENABLE_EACEN_RRS |
- FLEXCAN_QUIRK_USE_OFF_TIMESTAMP | FLEXCAN_QUIRK_BROKEN_PERR_STATE |
- FLEXCAN_QUIRK_SETUP_STOP_MODE_GPR,
+ FLEXCAN_QUIRK_USE_RX_MAILBOX | FLEXCAN_QUIRK_BROKEN_PERR_STATE |
+ FLEXCAN_QUIRK_SETUP_STOP_MODE_GPR |
+ FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX |
+ FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX_RTR,
};
static const struct flexcan_devtype_data fsl_imx8qm_devtype_data = {
.quirks = FLEXCAN_QUIRK_DISABLE_RXFG | FLEXCAN_QUIRK_ENABLE_EACEN_RRS |
- FLEXCAN_QUIRK_USE_OFF_TIMESTAMP | FLEXCAN_QUIRK_BROKEN_PERR_STATE |
- FLEXCAN_QUIRK_SUPPORT_FD | FLEXCAN_QUIRK_SETUP_STOP_MODE_SCFW,
+ FLEXCAN_QUIRK_USE_RX_MAILBOX | FLEXCAN_QUIRK_BROKEN_PERR_STATE |
+ FLEXCAN_QUIRK_SUPPORT_FD | FLEXCAN_QUIRK_SETUP_STOP_MODE_SCFW |
+ FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX |
+ FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX_RTR,
};
static struct flexcan_devtype_data fsl_imx8mp_devtype_data = {
.quirks = FLEXCAN_QUIRK_DISABLE_RXFG | FLEXCAN_QUIRK_ENABLE_EACEN_RRS |
- FLEXCAN_QUIRK_DISABLE_MECR | FLEXCAN_QUIRK_USE_OFF_TIMESTAMP |
+ FLEXCAN_QUIRK_DISABLE_MECR | FLEXCAN_QUIRK_USE_RX_MAILBOX |
FLEXCAN_QUIRK_BROKEN_PERR_STATE | FLEXCAN_QUIRK_SETUP_STOP_MODE_GPR |
- FLEXCAN_QUIRK_SUPPORT_FD | FLEXCAN_QUIRK_SUPPORT_ECC,
+ FLEXCAN_QUIRK_SUPPORT_FD | FLEXCAN_QUIRK_SUPPORT_ECC |
+ FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX |
+ FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX_RTR,
};
static const struct flexcan_devtype_data fsl_vf610_devtype_data = {
.quirks = FLEXCAN_QUIRK_DISABLE_RXFG | FLEXCAN_QUIRK_ENABLE_EACEN_RRS |
- FLEXCAN_QUIRK_DISABLE_MECR | FLEXCAN_QUIRK_USE_OFF_TIMESTAMP |
- FLEXCAN_QUIRK_BROKEN_PERR_STATE | FLEXCAN_QUIRK_SUPPORT_ECC,
+ FLEXCAN_QUIRK_DISABLE_MECR | FLEXCAN_QUIRK_USE_RX_MAILBOX |
+ FLEXCAN_QUIRK_BROKEN_PERR_STATE | FLEXCAN_QUIRK_SUPPORT_ECC |
+ FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX |
+ FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX_RTR,
};
static const struct flexcan_devtype_data fsl_ls1021a_r2_devtype_data = {
.quirks = FLEXCAN_QUIRK_DISABLE_RXFG | FLEXCAN_QUIRK_ENABLE_EACEN_RRS |
- FLEXCAN_QUIRK_BROKEN_PERR_STATE | FLEXCAN_QUIRK_USE_OFF_TIMESTAMP,
+ FLEXCAN_QUIRK_BROKEN_PERR_STATE | FLEXCAN_QUIRK_USE_RX_MAILBOX |
+ FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX |
+ FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX_RTR,
};
static const struct flexcan_devtype_data fsl_lx2160a_r1_devtype_data = {
.quirks = FLEXCAN_QUIRK_DISABLE_RXFG | FLEXCAN_QUIRK_ENABLE_EACEN_RRS |
FLEXCAN_QUIRK_DISABLE_MECR | FLEXCAN_QUIRK_BROKEN_PERR_STATE |
- FLEXCAN_QUIRK_USE_OFF_TIMESTAMP | FLEXCAN_QUIRK_SUPPORT_FD |
- FLEXCAN_QUIRK_SUPPORT_ECC,
+ FLEXCAN_QUIRK_USE_RX_MAILBOX | FLEXCAN_QUIRK_SUPPORT_FD |
+ FLEXCAN_QUIRK_SUPPORT_ECC |
+ FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX |
+ FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX_RTR,
};
static const struct can_bittiming_const flexcan_bittiming_const = {
@@ -600,7 +528,7 @@ static inline int flexcan_enter_stop_mode(struct flexcan_priv *priv)
priv->write(reg_mcr, &regs->mcr);
/* enable stop request */
- if (priv->devtype_data->quirks & FLEXCAN_QUIRK_SETUP_STOP_MODE_SCFW) {
+ if (priv->devtype_data.quirks & FLEXCAN_QUIRK_SETUP_STOP_MODE_SCFW) {
ret = flexcan_stop_mode_enable_scfw(priv, true);
if (ret < 0)
return ret;
@@ -619,7 +547,7 @@ static inline int flexcan_exit_stop_mode(struct flexcan_priv *priv)
int ret;
/* remove stop request */
- if (priv->devtype_data->quirks & FLEXCAN_QUIRK_SETUP_STOP_MODE_SCFW) {
+ if (priv->devtype_data.quirks & FLEXCAN_QUIRK_SETUP_STOP_MODE_SCFW) {
ret = flexcan_stop_mode_enable_scfw(priv, false);
if (ret < 0)
return ret;
@@ -1022,7 +950,7 @@ static struct sk_buff *flexcan_mailbox_read(struct can_rx_offload *offload,
mb = flexcan_get_mb(priv, n);
- if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP) {
+ if (priv->devtype_data.quirks & FLEXCAN_QUIRK_USE_RX_MAILBOX) {
u32 code;
do {
@@ -1087,7 +1015,7 @@ static struct sk_buff *flexcan_mailbox_read(struct can_rx_offload *offload,
}
mark_as_read:
- if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP)
+ if (priv->devtype_data.quirks & FLEXCAN_QUIRK_USE_RX_MAILBOX)
flexcan_write64(priv, FLEXCAN_IFLAG_MB(n), &regs->iflag1);
else
priv->write(FLEXCAN_IFLAG_RX_FIFO_AVAILABLE, &regs->iflag1);
@@ -1113,7 +1041,7 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id)
enum can_state last_state = priv->can.state;
/* reception interrupt */
- if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP) {
+ if (priv->devtype_data.quirks & FLEXCAN_QUIRK_USE_RX_MAILBOX) {
u64 reg_iflag_rx;
int ret;
@@ -1173,7 +1101,7 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id)
/* state change interrupt or broken error state quirk fix is enabled */
if ((reg_esr & FLEXCAN_ESR_ERR_STATE) ||
- (priv->devtype_data->quirks & (FLEXCAN_QUIRK_BROKEN_WERR_STATE |
+ (priv->devtype_data.quirks & (FLEXCAN_QUIRK_BROKEN_WERR_STATE |
FLEXCAN_QUIRK_BROKEN_PERR_STATE)))
flexcan_irq_state(dev, reg_esr);
@@ -1195,11 +1123,11 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id)
* (1): enabled if FLEXCAN_QUIRK_BROKEN_WERR_STATE is enabled
*/
if ((last_state != priv->can.state) &&
- (priv->devtype_data->quirks & FLEXCAN_QUIRK_BROKEN_PERR_STATE) &&
+ (priv->devtype_data.quirks & FLEXCAN_QUIRK_BROKEN_PERR_STATE) &&
!(priv->can.ctrlmode & CAN_CTRLMODE_BERR_REPORTING)) {
switch (priv->can.state) {
case CAN_STATE_ERROR_ACTIVE:
- if (priv->devtype_data->quirks &
+ if (priv->devtype_data.quirks &
FLEXCAN_QUIRK_BROKEN_WERR_STATE)
flexcan_error_irq_enable(priv);
else
@@ -1423,26 +1351,26 @@ static int flexcan_rx_offload_setup(struct net_device *dev)
else
priv->mb_size = sizeof(struct flexcan_mb) + CAN_MAX_DLEN;
- if (priv->devtype_data->quirks & FLEXCAN_QUIRK_NR_MB_16)
+ if (priv->devtype_data.quirks & FLEXCAN_QUIRK_NR_MB_16)
priv->mb_count = 16;
else
priv->mb_count = (sizeof(priv->regs->mb[0]) / priv->mb_size) +
(sizeof(priv->regs->mb[1]) / priv->mb_size);
- if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP)
+ if (priv->devtype_data.quirks & FLEXCAN_QUIRK_USE_RX_MAILBOX)
priv->tx_mb_reserved =
- flexcan_get_mb(priv, FLEXCAN_TX_MB_RESERVED_OFF_TIMESTAMP);
+ flexcan_get_mb(priv, FLEXCAN_TX_MB_RESERVED_RX_MAILBOX);
else
priv->tx_mb_reserved =
- flexcan_get_mb(priv, FLEXCAN_TX_MB_RESERVED_OFF_FIFO);
+ flexcan_get_mb(priv, FLEXCAN_TX_MB_RESERVED_RX_FIFO);
priv->tx_mb_idx = priv->mb_count - 1;
priv->tx_mb = flexcan_get_mb(priv, priv->tx_mb_idx);
priv->tx_mask = FLEXCAN_IFLAG_MB(priv->tx_mb_idx);
priv->offload.mailbox_read = flexcan_mailbox_read;
- if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP) {
- priv->offload.mb_first = FLEXCAN_RX_MB_OFF_TIMESTAMP_FIRST;
+ if (priv->devtype_data.quirks & FLEXCAN_QUIRK_USE_RX_MAILBOX) {
+ priv->offload.mb_first = FLEXCAN_RX_MB_RX_MAILBOX_FIRST;
priv->offload.mb_last = priv->mb_count - 2;
priv->rx_mask = GENMASK_ULL(priv->offload.mb_last,
@@ -1506,7 +1434,7 @@ static int flexcan_chip_start(struct net_device *dev)
if (err)
goto out_chip_disable;
- if (priv->devtype_data->quirks & FLEXCAN_QUIRK_SUPPORT_ECC)
+ if (priv->devtype_data.quirks & FLEXCAN_QUIRK_SUPPORT_ECC)
flexcan_ram_init(dev);
flexcan_set_bittiming(dev);
@@ -1532,10 +1460,10 @@ static int flexcan_chip_start(struct net_device *dev)
/* MCR
*
* FIFO:
- * - disable for timestamp mode
+ * - disable for mailbox mode
* - enable for FIFO mode
*/
- if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP)
+ if (priv->devtype_data.quirks & FLEXCAN_QUIRK_USE_RX_MAILBOX)
reg_mcr &= ~FLEXCAN_MCR_FEN;
else
reg_mcr |= FLEXCAN_MCR_FEN;
@@ -1586,7 +1514,7 @@ static int flexcan_chip_start(struct net_device *dev)
* on most Flexcan cores, too. Otherwise we don't get
* any error warning or passive interrupts.
*/
- if (priv->devtype_data->quirks & FLEXCAN_QUIRK_BROKEN_WERR_STATE ||
+ if (priv->devtype_data.quirks & FLEXCAN_QUIRK_BROKEN_WERR_STATE ||
priv->can.ctrlmode & CAN_CTRLMODE_BERR_REPORTING)
reg_ctrl |= FLEXCAN_CTRL_ERR_MSK;
else
@@ -1599,7 +1527,7 @@ static int flexcan_chip_start(struct net_device *dev)
netdev_dbg(dev, "%s: writing ctrl=0x%08x", __func__, reg_ctrl);
priv->write(reg_ctrl, &regs->ctrl);
- if ((priv->devtype_data->quirks & FLEXCAN_QUIRK_ENABLE_EACEN_RRS)) {
+ if ((priv->devtype_data.quirks & FLEXCAN_QUIRK_ENABLE_EACEN_RRS)) {
reg_ctrl2 = priv->read(&regs->ctrl2);
reg_ctrl2 |= FLEXCAN_CTRL2_EACEN | FLEXCAN_CTRL2_RRS;
priv->write(reg_ctrl2, &regs->ctrl2);
@@ -1631,7 +1559,7 @@ static int flexcan_chip_start(struct net_device *dev)
priv->write(reg_fdctrl, &regs->fdctrl);
}
- if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP) {
+ if (priv->devtype_data.quirks & FLEXCAN_QUIRK_USE_RX_MAILBOX) {
for (i = priv->offload.mb_first; i <= priv->offload.mb_last; i++) {
mb = flexcan_get_mb(priv, i);
priv->write(FLEXCAN_MB_CODE_RX_EMPTY,
@@ -1639,7 +1567,7 @@ static int flexcan_chip_start(struct net_device *dev)
}
} else {
/* clear and invalidate unused mailboxes first */
- for (i = FLEXCAN_TX_MB_RESERVED_OFF_FIFO; i < priv->mb_count; i++) {
+ for (i = FLEXCAN_TX_MB_RESERVED_RX_FIFO; i < priv->mb_count; i++) {
mb = flexcan_get_mb(priv, i);
priv->write(FLEXCAN_MB_CODE_RX_INACTIVE,
&mb->can_ctrl);
@@ -1659,7 +1587,7 @@ static int flexcan_chip_start(struct net_device *dev)
priv->write(0x0, &regs->rx14mask);
priv->write(0x0, &regs->rx15mask);
- if (priv->devtype_data->quirks & FLEXCAN_QUIRK_DISABLE_RXFG)
+ if (priv->devtype_data.quirks & FLEXCAN_QUIRK_DISABLE_RXFG)
priv->write(0x0, &regs->rxfgmask);
/* clear acceptance filters */
@@ -1673,7 +1601,7 @@ static int flexcan_chip_start(struct net_device *dev)
* This also works around errata e5295 which generates false
* positive memory errors and put the device in freeze mode.
*/
- if (priv->devtype_data->quirks & FLEXCAN_QUIRK_DISABLE_MECR) {
+ if (priv->devtype_data.quirks & FLEXCAN_QUIRK_DISABLE_MECR) {
/* Follow the protocol as described in "Detection
* and Correction of Memory Errors" to write to
* MECR register (step 1 - 5)
@@ -1799,7 +1727,7 @@ static int flexcan_open(struct net_device *dev)
if (err)
goto out_can_rx_offload_disable;
- if (priv->devtype_data->quirks & FLEXCAN_QUIRK_NR_IRQ_3) {
+ if (priv->devtype_data.quirks & FLEXCAN_QUIRK_NR_IRQ_3) {
err = request_irq(priv->irq_boff,
flexcan_irq, IRQF_SHARED, dev->name, dev);
if (err)
@@ -1845,7 +1773,7 @@ static int flexcan_close(struct net_device *dev)
netif_stop_queue(dev);
flexcan_chip_interrupts_disable(dev);
- if (priv->devtype_data->quirks & FLEXCAN_QUIRK_NR_IRQ_3) {
+ if (priv->devtype_data.quirks & FLEXCAN_QUIRK_NR_IRQ_3) {
free_irq(priv->irq_err, dev);
free_irq(priv->irq_boff, dev);
}
@@ -2051,9 +1979,9 @@ static int flexcan_setup_stop_mode(struct platform_device *pdev)
priv = netdev_priv(dev);
- if (priv->devtype_data->quirks & FLEXCAN_QUIRK_SETUP_STOP_MODE_SCFW)
+ if (priv->devtype_data.quirks & FLEXCAN_QUIRK_SETUP_STOP_MODE_SCFW)
ret = flexcan_setup_stop_mode_scfw(pdev);
- else if (priv->devtype_data->quirks & FLEXCAN_QUIRK_SETUP_STOP_MODE_GPR)
+ else if (priv->devtype_data.quirks & FLEXCAN_QUIRK_SETUP_STOP_MODE_GPR)
ret = flexcan_setup_stop_mode_gpr(pdev);
else
/* return 0 directly if doesn't support stop mode feature */
@@ -2164,8 +2092,25 @@ static int flexcan_probe(struct platform_device *pdev)
return -ENODEV;
if ((devtype_data->quirks & FLEXCAN_QUIRK_SUPPORT_FD) &&
- !(devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP)) {
- dev_err(&pdev->dev, "CAN-FD mode doesn't work with FIFO mode!\n");
+ !((devtype_data->quirks &
+ (FLEXCAN_QUIRK_USE_RX_MAILBOX |
+ FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX |
+ FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX_RTR |
+ FLEXCAN_QUIRK_SUPPPORT_RX_FIFO)) ==
+ (FLEXCAN_QUIRK_USE_RX_MAILBOX |
+ FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX |
+ FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX_RTR))) {
+ dev_err(&pdev->dev, "CAN-FD mode doesn't work in RX-FIFO mode!\n");
+ return -EINVAL;
+ }
+
+ if ((devtype_data->quirks &
+ (FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX |
+ FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX_RTR)) ==
+ FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX_RTR) {
+ dev_err(&pdev->dev,
+ "Quirks (0x%08x) inconsistent: RX_MAILBOX_RX supported but not RX_MAILBOX\n",
+ devtype_data->quirks);
return -EINVAL;
}
@@ -2177,13 +2122,15 @@ static int flexcan_probe(struct platform_device *pdev)
SET_NETDEV_DEV(dev, &pdev->dev);
dev->netdev_ops = &flexcan_netdev_ops;
+ flexcan_set_ethtool_ops(dev);
dev->irq = irq;
dev->flags |= IFF_ECHO;
priv = netdev_priv(dev);
+ priv->devtype_data = *devtype_data;
if (of_property_read_bool(pdev->dev.of_node, "big-endian") ||
- devtype_data->quirks & FLEXCAN_QUIRK_DEFAULT_BIG_ENDIAN) {
+ priv->devtype_data.quirks & FLEXCAN_QUIRK_DEFAULT_BIG_ENDIAN) {
priv->read = flexcan_read_be;
priv->write = flexcan_write_be;
} else {
@@ -2202,10 +2149,9 @@ static int flexcan_probe(struct platform_device *pdev)
priv->clk_ipg = clk_ipg;
priv->clk_per = clk_per;
priv->clk_src = clk_src;
- priv->devtype_data = devtype_data;
priv->reg_xceiver = reg_xceiver;
- if (devtype_data->quirks & FLEXCAN_QUIRK_NR_IRQ_3) {
+ if (priv->devtype_data.quirks & FLEXCAN_QUIRK_NR_IRQ_3) {
priv->irq_boff = platform_get_irq(pdev, 1);
if (priv->irq_boff <= 0) {
err = -ENODEV;
@@ -2218,7 +2164,7 @@ static int flexcan_probe(struct platform_device *pdev)
}
}
- if (priv->devtype_data->quirks & FLEXCAN_QUIRK_SUPPORT_FD) {
+ if (priv->devtype_data.quirks & FLEXCAN_QUIRK_SUPPORT_FD) {
priv->can.ctrlmode_supported |= CAN_CTRLMODE_FD |
CAN_CTRLMODE_FD_NON_ISO;
priv->can.bittiming_const = &flexcan_fd_bittiming_const;
diff --git a/drivers/net/can/flexcan/flexcan-ethtool.c b/drivers/net/can/flexcan/flexcan-ethtool.c
new file mode 100644
index 000000000000..3ae535577700
--- /dev/null
+++ b/drivers/net/can/flexcan/flexcan-ethtool.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright (c) 2022 Amarula Solutions, Dario Binacchi <dario.binacchi@amarulasolutions.com>
+ * Copyright (c) 2022 Pengutronix, Marc Kleine-Budde <kernel@pengutronix.de>
+ *
+ */
+
+#include <linux/can/dev.h>
+#include <linux/ethtool.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/platform_device.h>
+
+#include "flexcan.h"
+
+static const char flexcan_priv_flags_strings[][ETH_GSTRING_LEN] = {
+#define FLEXCAN_PRIV_FLAGS_RX_RTR BIT(0)
+ "rx-rtr",
+};
+
+static void
+flexcan_get_ringparam(struct net_device *ndev, struct ethtool_ringparam *ring,
+ struct kernel_ethtool_ringparam *kernel_ring,
+ struct netlink_ext_ack *ext_ack)
+{
+ const struct flexcan_priv *priv = netdev_priv(ndev);
+
+ ring->rx_max_pending = priv->mb_count;
+ ring->tx_max_pending = priv->mb_count;
+
+ if (priv->devtype_data.quirks & FLEXCAN_QUIRK_USE_RX_MAILBOX)
+ ring->rx_pending = priv->offload.mb_last -
+ priv->offload.mb_first + 1;
+ else
+ ring->rx_pending = 6; /* RX-FIFO depth is fixed */
+
+ /* the drive currently supports only on TX buffer */
+ ring->tx_pending = 1;
+}
+
+static void
+flexcan_get_strings(struct net_device *ndev, u32 stringset, u8 *data)
+{
+ switch (stringset) {
+ case ETH_SS_PRIV_FLAGS:
+ memcpy(data, flexcan_priv_flags_strings,
+ sizeof(flexcan_priv_flags_strings));
+ }
+}
+
+static u32 flexcan_get_priv_flags(struct net_device *ndev)
+{
+ const struct flexcan_priv *priv = netdev_priv(ndev);
+ u32 priv_flags = 0;
+
+ if (flexcan_active_rx_rtr(priv))
+ priv_flags |= FLEXCAN_PRIV_FLAGS_RX_RTR;
+
+ return priv_flags;
+}
+
+static int flexcan_set_priv_flags(struct net_device *ndev, u32 priv_flags)
+{
+ struct flexcan_priv *priv = netdev_priv(ndev);
+ u32 quirks = priv->devtype_data.quirks;
+
+ if (priv_flags & FLEXCAN_PRIV_FLAGS_RX_RTR) {
+ if (flexcan_supports_rx_mailbox_rtr(priv))
+ quirks |= FLEXCAN_QUIRK_USE_RX_MAILBOX;
+ else if (flexcan_supports_rx_fifo(priv))
+ quirks &= ~FLEXCAN_QUIRK_USE_RX_MAILBOX;
+ else
+ quirks |= FLEXCAN_QUIRK_USE_RX_MAILBOX;
+ } else {
+ if (flexcan_supports_rx_mailbox(priv))
+ quirks |= FLEXCAN_QUIRK_USE_RX_MAILBOX;
+ else
+ quirks &= ~FLEXCAN_QUIRK_USE_RX_MAILBOX;
+ }
+
+ if (quirks != priv->devtype_data.quirks && netif_running(ndev))
+ return -EBUSY;
+
+ priv->devtype_data.quirks = quirks;
+
+ if (!(priv_flags & FLEXCAN_PRIV_FLAGS_RX_RTR) &&
+ !flexcan_active_rx_rtr(priv))
+ netdev_info(ndev,
+ "Activating RX mailbox mode, cannot receive RTR frames.\n");
+
+ return 0;
+}
+
+static int flexcan_get_sset_count(struct net_device *netdev, int sset)
+{
+ switch (sset) {
+ case ETH_SS_PRIV_FLAGS:
+ return ARRAY_SIZE(flexcan_priv_flags_strings);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static const struct ethtool_ops flexcan_ethtool_ops = {
+ .get_ringparam = flexcan_get_ringparam,
+ .get_strings = flexcan_get_strings,
+ .get_priv_flags = flexcan_get_priv_flags,
+ .set_priv_flags = flexcan_set_priv_flags,
+ .get_sset_count = flexcan_get_sset_count,
+};
+
+void flexcan_set_ethtool_ops(struct net_device *netdev)
+{
+ netdev->ethtool_ops = &flexcan_ethtool_ops;
+}
diff --git a/drivers/net/can/flexcan/flexcan.h b/drivers/net/can/flexcan/flexcan.h
new file mode 100644
index 000000000000..fccdff8b1f0f
--- /dev/null
+++ b/drivers/net/can/flexcan/flexcan.h
@@ -0,0 +1,163 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * flexcan.c - FLEXCAN CAN controller driver
+ *
+ * Copyright (c) 2005-2006 Varma Electronics Oy
+ * Copyright (c) 2009 Sascha Hauer, Pengutronix
+ * Copyright (c) 2010-2017 Pengutronix, Marc Kleine-Budde <kernel@pengutronix.de>
+ * Copyright (c) 2014 David Jander, Protonic Holland
+ * Copyright (C) 2022 Amarula Solutions, Dario Binacchi <dario.binacchi@amarulasolutions.com>
+ *
+ * Based on code originally by Andrey Volkov <avolkov@varma-el.com>
+ *
+ */
+
+#ifndef _FLEXCAN_H
+#define _FLEXCAN_H
+
+#include <linux/can/rx-offload.h>
+
+/* FLEXCAN hardware feature flags
+ *
+ * Below is some version info we got:
+ * SOC Version IP-Version Glitch- [TR]WRN_INT IRQ Err Memory err RTR rece- FD Mode MB
+ * Filter? connected? Passive detection ption in MB Supported?
+ * MCF5441X FlexCAN2 ? no yes no no yes no 16
+ * MX25 FlexCAN2 03.00.00.00 no no no no no no 64
+ * MX28 FlexCAN2 03.00.04.00 yes yes no no no no 64
+ * MX35 FlexCAN2 03.00.00.00 no no no no no no 64
+ * MX53 FlexCAN2 03.00.00.00 yes no no no no no 64
+ * MX6s FlexCAN3 10.00.12.00 yes yes no no yes no 64
+ * MX8QM FlexCAN3 03.00.23.00 yes yes no no yes yes 64
+ * MX8MP FlexCAN3 03.00.17.01 yes yes no yes yes yes 64
+ * VF610 FlexCAN3 ? no yes no yes yes? no 64
+ * LS1021A FlexCAN2 03.00.04.00 no yes no no yes no 64
+ * LX2160A FlexCAN3 03.00.23.00 no yes no yes yes yes 64
+ *
+ * Some SOCs do not have the RX_WARN & TX_WARN interrupt line connected.
+ */
+
+/* [TR]WRN_INT not connected */
+#define FLEXCAN_QUIRK_BROKEN_WERR_STATE BIT(1)
+ /* Disable RX FIFO Global mask */
+#define FLEXCAN_QUIRK_DISABLE_RXFG BIT(2)
+/* Enable EACEN and RRS bit in ctrl2 */
+#define FLEXCAN_QUIRK_ENABLE_EACEN_RRS BIT(3)
+/* Disable non-correctable errors interrupt and freeze mode */
+#define FLEXCAN_QUIRK_DISABLE_MECR BIT(4)
+/* Use mailboxes (not FIFO) for RX path */
+#define FLEXCAN_QUIRK_USE_RX_MAILBOX BIT(5)
+/* No interrupt for error passive */
+#define FLEXCAN_QUIRK_BROKEN_PERR_STATE BIT(6)
+/* default to BE register access */
+#define FLEXCAN_QUIRK_DEFAULT_BIG_ENDIAN BIT(7)
+/* Setup stop mode with GPR to support wakeup */
+#define FLEXCAN_QUIRK_SETUP_STOP_MODE_GPR BIT(8)
+/* Support CAN-FD mode */
+#define FLEXCAN_QUIRK_SUPPORT_FD BIT(9)
+/* support memory detection and correction */
+#define FLEXCAN_QUIRK_SUPPORT_ECC BIT(10)
+/* Setup stop mode with SCU firmware to support wakeup */
+#define FLEXCAN_QUIRK_SETUP_STOP_MODE_SCFW BIT(11)
+/* Setup 3 separate interrupts, main, boff and err */
+#define FLEXCAN_QUIRK_NR_IRQ_3 BIT(12)
+/* Setup 16 mailboxes */
+#define FLEXCAN_QUIRK_NR_MB_16 BIT(13)
+/* Device supports RX via mailboxes */
+#define FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX BIT(14)
+/* Device supports RTR reception via mailboxes */
+#define FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX_RTR BIT(15)
+/* Device supports RX via FIFO */
+#define FLEXCAN_QUIRK_SUPPPORT_RX_FIFO BIT(16)
+
+struct flexcan_devtype_data {
+ u32 quirks; /* quirks needed for different IP cores */
+};
+
+struct flexcan_stop_mode {
+ struct regmap *gpr;
+ u8 req_gpr;
+ u8 req_bit;
+};
+
+struct flexcan_priv {
+ struct can_priv can;
+ struct can_rx_offload offload;
+ struct device *dev;
+
+ struct flexcan_regs __iomem *regs;
+ struct flexcan_mb __iomem *tx_mb;
+ struct flexcan_mb __iomem *tx_mb_reserved;
+ u8 tx_mb_idx;
+ u8 mb_count;
+ u8 mb_size;
+ u8 clk_src; /* clock source of CAN Protocol Engine */
+ u8 scu_idx;
+
+ u64 rx_mask;
+ u64 tx_mask;
+ u32 reg_ctrl_default;
+
+ struct clk *clk_ipg;
+ struct clk *clk_per;
+ struct flexcan_devtype_data devtype_data;
+ struct regulator *reg_xceiver;
+ struct flexcan_stop_mode stm;
+
+ int irq_boff;
+ int irq_err;
+
+ /* IPC handle when setup stop mode by System Controller firmware(scfw) */
+ struct imx_sc_ipc *sc_ipc_handle;
+
+ /* Read and Write APIs */
+ u32 (*read)(void __iomem *addr);
+ void (*write)(u32 val, void __iomem *addr);
+};
+
+void flexcan_set_ethtool_ops(struct net_device *dev);
+
+static inline bool
+flexcan_supports_rx_mailbox(const struct flexcan_priv *priv)
+{
+ const u32 quirks = priv->devtype_data.quirks;
+
+ return quirks & FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX;
+}
+
+static inline bool
+flexcan_supports_rx_mailbox_rtr(const struct flexcan_priv *priv)
+{
+ const u32 quirks = priv->devtype_data.quirks;
+
+ return (quirks & (FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX |
+ FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX_RTR)) ==
+ (FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX |
+ FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX_RTR);
+}
+
+static inline bool
+flexcan_supports_rx_fifo(const struct flexcan_priv *priv)
+{
+ const u32 quirks = priv->devtype_data.quirks;
+
+ return quirks & FLEXCAN_QUIRK_SUPPPORT_RX_FIFO;
+}
+
+static inline bool
+flexcan_active_rx_rtr(const struct flexcan_priv *priv)
+{
+ const u32 quirks = priv->devtype_data.quirks;
+
+ if (quirks & FLEXCAN_QUIRK_USE_RX_MAILBOX) {
+ if (quirks & FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX_RTR)
+ return true;
+ } else {
+ /* RX-FIFO is always RTR capable */
+ return true;
+ }
+
+ return false;
+}
+
+
+#endif /* _FLEXCAN_H */
diff --git a/drivers/net/can/janz-ican3.c b/drivers/net/can/janz-ican3.c
index 5b677af5f2a4..808c105cf8f7 100644
--- a/drivers/net/can/janz-ican3.c
+++ b/drivers/net/can/janz-ican3.c
@@ -1285,7 +1285,7 @@ static unsigned int ican3_get_echo_skb(struct ican3_dev *mod)
{
struct sk_buff *skb = skb_dequeue(&mod->echoq);
struct can_frame *cf;
- u8 dlc;
+ u8 dlc = 0;
/* this should never trigger unless there is a driver bug */
if (!skb) {
diff --git a/drivers/net/can/spi/mcp251xfd/Makefile b/drivers/net/can/spi/mcp251xfd/Makefile
index 3cba3b9447ea..a83d685d64e0 100644
--- a/drivers/net/can/spi/mcp251xfd/Makefile
+++ b/drivers/net/can/spi/mcp251xfd/Makefile
@@ -3,9 +3,14 @@
obj-$(CONFIG_CAN_MCP251XFD) += mcp251xfd.o
mcp251xfd-objs :=
+mcp251xfd-objs += mcp251xfd-chip-fifo.o
mcp251xfd-objs += mcp251xfd-core.o
mcp251xfd-objs += mcp251xfd-crc16.o
mcp251xfd-objs += mcp251xfd-regmap.o
+mcp251xfd-objs += mcp251xfd-ring.o
+mcp251xfd-objs += mcp251xfd-rx.o
+mcp251xfd-objs += mcp251xfd-tef.o
mcp251xfd-objs += mcp251xfd-timestamp.o
+mcp251xfd-objs += mcp251xfd-tx.o
mcp251xfd-$(CONFIG_DEV_COREDUMP) += mcp251xfd-dump.o
diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-chip-fifo.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-chip-fifo.c
new file mode 100644
index 000000000000..2f9a623d381d
--- /dev/null
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-chip-fifo.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// mcp251xfd - Microchip MCP251xFD Family CAN controller driver
+//
+// Copyright (c) 2019, 2020, 2021 Pengutronix,
+// Marc Kleine-Budde <kernel@pengutronix.de>
+//
+// Based on:
+//
+// CAN bus driver for Microchip 25XXFD CAN Controller with SPI Interface
+//
+// Copyright (c) 2019 Martin Sperl <kernel@martin.sperl.org>
+//
+
+#include <linux/bitfield.h>
+
+#include "mcp251xfd.h"
+
+static int
+mcp251xfd_chip_rx_fifo_init_one(const struct mcp251xfd_priv *priv,
+ const struct mcp251xfd_rx_ring *ring)
+{
+ u32 fifo_con;
+
+ /* Enable RXOVIE on _all_ RX FIFOs, not just the last one.
+ *
+ * FIFOs hit by a RX MAB overflow and RXOVIE enabled will
+ * generate a RXOVIF, use this to properly detect RX MAB
+ * overflows.
+ */
+ fifo_con = FIELD_PREP(MCP251XFD_REG_FIFOCON_FSIZE_MASK,
+ ring->obj_num - 1) |
+ MCP251XFD_REG_FIFOCON_RXTSEN |
+ MCP251XFD_REG_FIFOCON_RXOVIE |
+ MCP251XFD_REG_FIFOCON_TFNRFNIE;
+
+ if (mcp251xfd_is_fd_mode(priv))
+ fifo_con |= FIELD_PREP(MCP251XFD_REG_FIFOCON_PLSIZE_MASK,
+ MCP251XFD_REG_FIFOCON_PLSIZE_64);
+ else
+ fifo_con |= FIELD_PREP(MCP251XFD_REG_FIFOCON_PLSIZE_MASK,
+ MCP251XFD_REG_FIFOCON_PLSIZE_8);
+
+ return regmap_write(priv->map_reg,
+ MCP251XFD_REG_FIFOCON(ring->fifo_nr), fifo_con);
+}
+
+static int
+mcp251xfd_chip_rx_filter_init_one(const struct mcp251xfd_priv *priv,
+ const struct mcp251xfd_rx_ring *ring)
+{
+ u32 fltcon;
+
+ fltcon = MCP251XFD_REG_FLTCON_FLTEN(ring->nr) |
+ MCP251XFD_REG_FLTCON_FBP(ring->nr, ring->fifo_nr);
+
+ return regmap_update_bits(priv->map_reg,
+ MCP251XFD_REG_FLTCON(ring->nr >> 2),
+ MCP251XFD_REG_FLTCON_FLT_MASK(ring->nr),
+ fltcon);
+}
+
+int mcp251xfd_chip_fifo_init(const struct mcp251xfd_priv *priv)
+{
+ const struct mcp251xfd_tx_ring *tx_ring = priv->tx;
+ const struct mcp251xfd_rx_ring *rx_ring;
+ u32 val;
+ int err, n;
+
+ /* TEF */
+ val = FIELD_PREP(MCP251XFD_REG_TEFCON_FSIZE_MASK,
+ tx_ring->obj_num - 1) |
+ MCP251XFD_REG_TEFCON_TEFTSEN |
+ MCP251XFD_REG_TEFCON_TEFOVIE |
+ MCP251XFD_REG_TEFCON_TEFNEIE;
+
+ err = regmap_write(priv->map_reg, MCP251XFD_REG_TEFCON, val);
+ if (err)
+ return err;
+
+ /* FIFO 1 - TX */
+ val = FIELD_PREP(MCP251XFD_REG_FIFOCON_FSIZE_MASK,
+ tx_ring->obj_num - 1) |
+ MCP251XFD_REG_FIFOCON_TXEN |
+ MCP251XFD_REG_FIFOCON_TXATIE;
+
+ if (mcp251xfd_is_fd_mode(priv))
+ val |= FIELD_PREP(MCP251XFD_REG_FIFOCON_PLSIZE_MASK,
+ MCP251XFD_REG_FIFOCON_PLSIZE_64);
+ else
+ val |= FIELD_PREP(MCP251XFD_REG_FIFOCON_PLSIZE_MASK,
+ MCP251XFD_REG_FIFOCON_PLSIZE_8);
+
+ if (priv->can.ctrlmode & CAN_CTRLMODE_ONE_SHOT)
+ val |= FIELD_PREP(MCP251XFD_REG_FIFOCON_TXAT_MASK,
+ MCP251XFD_REG_FIFOCON_TXAT_ONE_SHOT);
+ else
+ val |= FIELD_PREP(MCP251XFD_REG_FIFOCON_TXAT_MASK,
+ MCP251XFD_REG_FIFOCON_TXAT_UNLIMITED);
+
+ err = regmap_write(priv->map_reg,
+ MCP251XFD_REG_FIFOCON(MCP251XFD_TX_FIFO),
+ val);
+ if (err)
+ return err;
+
+ /* RX FIFOs */
+ mcp251xfd_for_each_rx_ring(priv, rx_ring, n) {
+ err = mcp251xfd_chip_rx_fifo_init_one(priv, rx_ring);
+ if (err)
+ return err;
+
+ err = mcp251xfd_chip_rx_filter_init_one(priv, rx_ring);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
index e16dc482f327..b5986df6eca0 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
@@ -20,8 +20,6 @@
#include <linux/pm_runtime.h>
#include <linux/property.h>
-#include <asm/unaligned.h>
-
#include "mcp251xfd.h"
#define DEVICE_NAME "mcp251xfd"
@@ -180,330 +178,6 @@ static int mcp251xfd_clks_and_vdd_disable(const struct mcp251xfd_priv *priv)
return 0;
}
-static inline u8
-mcp251xfd_cmd_prepare_write_reg(const struct mcp251xfd_priv *priv,
- union mcp251xfd_write_reg_buf *write_reg_buf,
- const u16 reg, const u32 mask, const u32 val)
-{
- u8 first_byte, last_byte, len;
- u8 *data;
- __le32 val_le32;
-
- first_byte = mcp251xfd_first_byte_set(mask);
- last_byte = mcp251xfd_last_byte_set(mask);
- len = last_byte - first_byte + 1;
-
- data = mcp251xfd_spi_cmd_write(priv, write_reg_buf, reg + first_byte);
- val_le32 = cpu_to_le32(val >> BITS_PER_BYTE * first_byte);
- memcpy(data, &val_le32, len);
-
- if (priv->devtype_data.quirks & MCP251XFD_QUIRK_CRC_REG) {
- u16 crc;
-
- mcp251xfd_spi_cmd_crc_set_len_in_reg(&write_reg_buf->crc.cmd,
- len);
- /* CRC */
- len += sizeof(write_reg_buf->crc.cmd);
- crc = mcp251xfd_crc16_compute(&write_reg_buf->crc, len);
- put_unaligned_be16(crc, (void *)write_reg_buf + len);
-
- /* Total length */
- len += sizeof(write_reg_buf->crc.crc);
- } else {
- len += sizeof(write_reg_buf->nocrc.cmd);
- }
-
- return len;
-}
-
-static inline int
-mcp251xfd_tef_tail_get_from_chip(const struct mcp251xfd_priv *priv,
- u8 *tef_tail)
-{
- u32 tef_ua;
- int err;
-
- err = regmap_read(priv->map_reg, MCP251XFD_REG_TEFUA, &tef_ua);
- if (err)
- return err;
-
- *tef_tail = tef_ua / sizeof(struct mcp251xfd_hw_tef_obj);
-
- return 0;
-}
-
-static inline int
-mcp251xfd_tx_tail_get_from_chip(const struct mcp251xfd_priv *priv,
- u8 *tx_tail)
-{
- u32 fifo_sta;
- int err;
-
- err = regmap_read(priv->map_reg,
- MCP251XFD_REG_FIFOSTA(MCP251XFD_TX_FIFO),
- &fifo_sta);
- if (err)
- return err;
-
- *tx_tail = FIELD_GET(MCP251XFD_REG_FIFOSTA_FIFOCI_MASK, fifo_sta);
-
- return 0;
-}
-
-static inline int
-mcp251xfd_rx_head_get_from_chip(const struct mcp251xfd_priv *priv,
- const struct mcp251xfd_rx_ring *ring,
- u8 *rx_head)
-{
- u32 fifo_sta;
- int err;
-
- err = regmap_read(priv->map_reg, MCP251XFD_REG_FIFOSTA(ring->fifo_nr),
- &fifo_sta);
- if (err)
- return err;
-
- *rx_head = FIELD_GET(MCP251XFD_REG_FIFOSTA_FIFOCI_MASK, fifo_sta);
-
- return 0;
-}
-
-static inline int
-mcp251xfd_rx_tail_get_from_chip(const struct mcp251xfd_priv *priv,
- const struct mcp251xfd_rx_ring *ring,
- u8 *rx_tail)
-{
- u32 fifo_ua;
- int err;
-
- err = regmap_read(priv->map_reg, MCP251XFD_REG_FIFOUA(ring->fifo_nr),
- &fifo_ua);
- if (err)
- return err;
-
- fifo_ua -= ring->base - MCP251XFD_RAM_START;
- *rx_tail = fifo_ua / ring->obj_size;
-
- return 0;
-}
-
-static void
-mcp251xfd_tx_ring_init_tx_obj(const struct mcp251xfd_priv *priv,
- const struct mcp251xfd_tx_ring *ring,
- struct mcp251xfd_tx_obj *tx_obj,
- const u8 rts_buf_len,
- const u8 n)
-{
- struct spi_transfer *xfer;
- u16 addr;
-
- /* FIFO load */
- addr = mcp251xfd_get_tx_obj_addr(ring, n);
- if (priv->devtype_data.quirks & MCP251XFD_QUIRK_CRC_TX)
- mcp251xfd_spi_cmd_write_crc_set_addr(&tx_obj->buf.crc.cmd,
- addr);
- else
- mcp251xfd_spi_cmd_write_nocrc(&tx_obj->buf.nocrc.cmd,
- addr);
-
- xfer = &tx_obj->xfer[0];
- xfer->tx_buf = &tx_obj->buf;
- xfer->len = 0; /* actual len is assigned on the fly */
- xfer->cs_change = 1;
- xfer->cs_change_delay.value = 0;
- xfer->cs_change_delay.unit = SPI_DELAY_UNIT_NSECS;
-
- /* FIFO request to send */
- xfer = &tx_obj->xfer[1];
- xfer->tx_buf = &ring->rts_buf;
- xfer->len = rts_buf_len;
-
- /* SPI message */
- spi_message_init_with_transfers(&tx_obj->msg, tx_obj->xfer,
- ARRAY_SIZE(tx_obj->xfer));
-}
-
-static void mcp251xfd_ring_init(struct mcp251xfd_priv *priv)
-{
- struct mcp251xfd_tef_ring *tef_ring;
- struct mcp251xfd_tx_ring *tx_ring;
- struct mcp251xfd_rx_ring *rx_ring, *prev_rx_ring = NULL;
- struct mcp251xfd_tx_obj *tx_obj;
- struct spi_transfer *xfer;
- u32 val;
- u16 addr;
- u8 len;
- int i, j;
-
- netdev_reset_queue(priv->ndev);
-
- /* TEF */
- tef_ring = priv->tef;
- tef_ring->head = 0;
- tef_ring->tail = 0;
-
- /* FIFO increment TEF tail pointer */
- addr = MCP251XFD_REG_TEFCON;
- val = MCP251XFD_REG_TEFCON_UINC;
- len = mcp251xfd_cmd_prepare_write_reg(priv, &tef_ring->uinc_buf,
- addr, val, val);
-
- for (j = 0; j < ARRAY_SIZE(tef_ring->uinc_xfer); j++) {
- xfer = &tef_ring->uinc_xfer[j];
- xfer->tx_buf = &tef_ring->uinc_buf;
- xfer->len = len;
- xfer->cs_change = 1;
- xfer->cs_change_delay.value = 0;
- xfer->cs_change_delay.unit = SPI_DELAY_UNIT_NSECS;
- }
-
- /* "cs_change == 1" on the last transfer results in an active
- * chip select after the complete SPI message. This causes the
- * controller to interpret the next register access as
- * data. Set "cs_change" of the last transfer to "0" to
- * properly deactivate the chip select at the end of the
- * message.
- */
- xfer->cs_change = 0;
-
- /* TX */
- tx_ring = priv->tx;
- tx_ring->head = 0;
- tx_ring->tail = 0;
- tx_ring->base = mcp251xfd_get_tef_obj_addr(tx_ring->obj_num);
-
- /* FIFO request to send */
- addr = MCP251XFD_REG_FIFOCON(MCP251XFD_TX_FIFO);
- val = MCP251XFD_REG_FIFOCON_TXREQ | MCP251XFD_REG_FIFOCON_UINC;
- len = mcp251xfd_cmd_prepare_write_reg(priv, &tx_ring->rts_buf,
- addr, val, val);
-
- mcp251xfd_for_each_tx_obj(tx_ring, tx_obj, i)
- mcp251xfd_tx_ring_init_tx_obj(priv, tx_ring, tx_obj, len, i);
-
- /* RX */
- mcp251xfd_for_each_rx_ring(priv, rx_ring, i) {
- rx_ring->head = 0;
- rx_ring->tail = 0;
- rx_ring->nr = i;
- rx_ring->fifo_nr = MCP251XFD_RX_FIFO(i);
-
- if (!prev_rx_ring)
- rx_ring->base =
- mcp251xfd_get_tx_obj_addr(tx_ring,
- tx_ring->obj_num);
- else
- rx_ring->base = prev_rx_ring->base +
- prev_rx_ring->obj_size *
- prev_rx_ring->obj_num;
-
- prev_rx_ring = rx_ring;
-
- /* FIFO increment RX tail pointer */
- addr = MCP251XFD_REG_FIFOCON(rx_ring->fifo_nr);
- val = MCP251XFD_REG_FIFOCON_UINC;
- len = mcp251xfd_cmd_prepare_write_reg(priv, &rx_ring->uinc_buf,
- addr, val, val);
-
- for (j = 0; j < ARRAY_SIZE(rx_ring->uinc_xfer); j++) {
- xfer = &rx_ring->uinc_xfer[j];
- xfer->tx_buf = &rx_ring->uinc_buf;
- xfer->len = len;
- xfer->cs_change = 1;
- xfer->cs_change_delay.value = 0;
- xfer->cs_change_delay.unit = SPI_DELAY_UNIT_NSECS;
- }
-
- /* "cs_change == 1" on the last transfer results in an
- * active chip select after the complete SPI
- * message. This causes the controller to interpret
- * the next register access as data. Set "cs_change"
- * of the last transfer to "0" to properly deactivate
- * the chip select at the end of the message.
- */
- xfer->cs_change = 0;
- }
-}
-
-static void mcp251xfd_ring_free(struct mcp251xfd_priv *priv)
-{
- int i;
-
- for (i = ARRAY_SIZE(priv->rx) - 1; i >= 0; i--) {
- kfree(priv->rx[i]);
- priv->rx[i] = NULL;
- }
-}
-
-static int mcp251xfd_ring_alloc(struct mcp251xfd_priv *priv)
-{
- struct mcp251xfd_tx_ring *tx_ring;
- struct mcp251xfd_rx_ring *rx_ring;
- int tef_obj_size, tx_obj_size, rx_obj_size;
- int tx_obj_num;
- int ram_free, i;
-
- tef_obj_size = sizeof(struct mcp251xfd_hw_tef_obj);
- /* listen-only mode works like FD mode */
- if (priv->can.ctrlmode & (CAN_CTRLMODE_LISTENONLY | CAN_CTRLMODE_FD)) {
- tx_obj_num = MCP251XFD_TX_OBJ_NUM_CANFD;
- tx_obj_size = sizeof(struct mcp251xfd_hw_tx_obj_canfd);
- rx_obj_size = sizeof(struct mcp251xfd_hw_rx_obj_canfd);
- } else {
- tx_obj_num = MCP251XFD_TX_OBJ_NUM_CAN;
- tx_obj_size = sizeof(struct mcp251xfd_hw_tx_obj_can);
- rx_obj_size = sizeof(struct mcp251xfd_hw_rx_obj_can);
- }
-
- tx_ring = priv->tx;
- tx_ring->obj_num = tx_obj_num;
- tx_ring->obj_size = tx_obj_size;
-
- ram_free = MCP251XFD_RAM_SIZE - tx_obj_num *
- (tef_obj_size + tx_obj_size);
-
- for (i = 0;
- i < ARRAY_SIZE(priv->rx) && ram_free >= rx_obj_size;
- i++) {
- int rx_obj_num;
-
- rx_obj_num = ram_free / rx_obj_size;
- rx_obj_num = min(1 << (fls(rx_obj_num) - 1),
- MCP251XFD_RX_OBJ_NUM_MAX);
-
- rx_ring = kzalloc(sizeof(*rx_ring) + rx_obj_size * rx_obj_num,
- GFP_KERNEL);
- if (!rx_ring) {
- mcp251xfd_ring_free(priv);
- return -ENOMEM;
- }
- rx_ring->obj_num = rx_obj_num;
- rx_ring->obj_size = rx_obj_size;
- priv->rx[i] = rx_ring;
-
- ram_free -= rx_ring->obj_num * rx_ring->obj_size;
- }
- priv->rx_ring_num = i;
-
- netdev_dbg(priv->ndev,
- "FIFO setup: TEF: %d*%d bytes = %d bytes, TX: %d*%d bytes = %d bytes\n",
- tx_obj_num, tef_obj_size, tef_obj_size * tx_obj_num,
- tx_obj_num, tx_obj_size, tx_obj_size * tx_obj_num);
-
- mcp251xfd_for_each_rx_ring(priv, rx_ring, i) {
- netdev_dbg(priv->ndev,
- "FIFO setup: RX-%d: %d*%d bytes = %d bytes\n",
- i, rx_ring->obj_num, rx_ring->obj_size,
- rx_ring->obj_size * rx_ring->obj_num);
- }
-
- netdev_dbg(priv->ndev,
- "FIFO setup: free: %d bytes\n",
- ram_free);
-
- return 0;
-}
-
static inline int
mcp251xfd_chip_get_mode(const struct mcp251xfd_priv *priv, u8 *mode)
{
@@ -838,108 +512,6 @@ static int mcp251xfd_chip_rx_int_disable(const struct mcp251xfd_priv *priv)
return regmap_write(priv->map_reg, MCP251XFD_REG_IOCON, val);
}
-static int
-mcp251xfd_chip_rx_fifo_init_one(const struct mcp251xfd_priv *priv,
- const struct mcp251xfd_rx_ring *ring)
-{
- u32 fifo_con;
-
- /* Enable RXOVIE on _all_ RX FIFOs, not just the last one.
- *
- * FIFOs hit by a RX MAB overflow and RXOVIE enabled will
- * generate a RXOVIF, use this to properly detect RX MAB
- * overflows.
- */
- fifo_con = FIELD_PREP(MCP251XFD_REG_FIFOCON_FSIZE_MASK,
- ring->obj_num - 1) |
- MCP251XFD_REG_FIFOCON_RXTSEN |
- MCP251XFD_REG_FIFOCON_RXOVIE |
- MCP251XFD_REG_FIFOCON_TFNRFNIE;
-
- if (priv->can.ctrlmode & (CAN_CTRLMODE_LISTENONLY | CAN_CTRLMODE_FD))
- fifo_con |= FIELD_PREP(MCP251XFD_REG_FIFOCON_PLSIZE_MASK,
- MCP251XFD_REG_FIFOCON_PLSIZE_64);
- else
- fifo_con |= FIELD_PREP(MCP251XFD_REG_FIFOCON_PLSIZE_MASK,
- MCP251XFD_REG_FIFOCON_PLSIZE_8);
-
- return regmap_write(priv->map_reg,
- MCP251XFD_REG_FIFOCON(ring->fifo_nr), fifo_con);
-}
-
-static int
-mcp251xfd_chip_rx_filter_init_one(const struct mcp251xfd_priv *priv,
- const struct mcp251xfd_rx_ring *ring)
-{
- u32 fltcon;
-
- fltcon = MCP251XFD_REG_FLTCON_FLTEN(ring->nr) |
- MCP251XFD_REG_FLTCON_FBP(ring->nr, ring->fifo_nr);
-
- return regmap_update_bits(priv->map_reg,
- MCP251XFD_REG_FLTCON(ring->nr >> 2),
- MCP251XFD_REG_FLTCON_FLT_MASK(ring->nr),
- fltcon);
-}
-
-static int mcp251xfd_chip_fifo_init(const struct mcp251xfd_priv *priv)
-{
- const struct mcp251xfd_tx_ring *tx_ring = priv->tx;
- const struct mcp251xfd_rx_ring *rx_ring;
- u32 val;
- int err, n;
-
- /* TEF */
- val = FIELD_PREP(MCP251XFD_REG_TEFCON_FSIZE_MASK,
- tx_ring->obj_num - 1) |
- MCP251XFD_REG_TEFCON_TEFTSEN |
- MCP251XFD_REG_TEFCON_TEFOVIE |
- MCP251XFD_REG_TEFCON_TEFNEIE;
-
- err = regmap_write(priv->map_reg, MCP251XFD_REG_TEFCON, val);
- if (err)
- return err;
-
- /* FIFO 1 - TX */
- val = FIELD_PREP(MCP251XFD_REG_FIFOCON_FSIZE_MASK,
- tx_ring->obj_num - 1) |
- MCP251XFD_REG_FIFOCON_TXEN |
- MCP251XFD_REG_FIFOCON_TXATIE;
-
- if (priv->can.ctrlmode & (CAN_CTRLMODE_LISTENONLY | CAN_CTRLMODE_FD))
- val |= FIELD_PREP(MCP251XFD_REG_FIFOCON_PLSIZE_MASK,
- MCP251XFD_REG_FIFOCON_PLSIZE_64);
- else
- val |= FIELD_PREP(MCP251XFD_REG_FIFOCON_PLSIZE_MASK,
- MCP251XFD_REG_FIFOCON_PLSIZE_8);
-
- if (priv->can.ctrlmode & CAN_CTRLMODE_ONE_SHOT)
- val |= FIELD_PREP(MCP251XFD_REG_FIFOCON_TXAT_MASK,
- MCP251XFD_REG_FIFOCON_TXAT_ONE_SHOT);
- else
- val |= FIELD_PREP(MCP251XFD_REG_FIFOCON_TXAT_MASK,
- MCP251XFD_REG_FIFOCON_TXAT_UNLIMITED);
-
- err = regmap_write(priv->map_reg,
- MCP251XFD_REG_FIFOCON(MCP251XFD_TX_FIFO),
- val);
- if (err)
- return err;
-
- /* RX FIFOs */
- mcp251xfd_for_each_rx_ring(priv, rx_ring, n) {
- err = mcp251xfd_chip_rx_fifo_init_one(priv, rx_ring);
- if (err)
- return err;
-
- err = mcp251xfd_chip_rx_filter_init_one(priv, rx_ring);
- if (err)
- return err;
- }
-
- return 0;
-}
-
static int mcp251xfd_chip_ecc_init(struct mcp251xfd_priv *priv)
{
struct mcp251xfd_ecc *ecc = &priv->ecc;
@@ -968,18 +540,10 @@ static int mcp251xfd_chip_ecc_init(struct mcp251xfd_priv *priv)
return err;
}
-static inline void mcp251xfd_ecc_tefif_successful(struct mcp251xfd_priv *priv)
-{
- struct mcp251xfd_ecc *ecc = &priv->ecc;
-
- ecc->ecc_stat = 0;
-}
-
static u8 mcp251xfd_get_normal_mode(const struct mcp251xfd_priv *priv)
{
u8 mode;
-
if (priv->can.ctrlmode & CAN_CTRLMODE_LOOPBACK)
mode = MCP251XFD_REG_CON_MODE_INT_LOOPBACK;
else if (priv->can.ctrlmode & CAN_CTRLMODE_LISTENONLY)
@@ -1186,433 +750,6 @@ static int mcp251xfd_get_berr_counter(const struct net_device *ndev,
return __mcp251xfd_get_berr_counter(ndev, bec);
}
-static int mcp251xfd_check_tef_tail(const struct mcp251xfd_priv *priv)
-{
- u8 tef_tail_chip, tef_tail;
- int err;
-
- if (!IS_ENABLED(CONFIG_CAN_MCP251XFD_SANITY))
- return 0;
-
- err = mcp251xfd_tef_tail_get_from_chip(priv, &tef_tail_chip);
- if (err)
- return err;
-
- tef_tail = mcp251xfd_get_tef_tail(priv);
- if (tef_tail_chip != tef_tail) {
- netdev_err(priv->ndev,
- "TEF tail of chip (0x%02x) and ours (0x%08x) inconsistent.\n",
- tef_tail_chip, tef_tail);
- return -EILSEQ;
- }
-
- return 0;
-}
-
-static int
-mcp251xfd_check_rx_tail(const struct mcp251xfd_priv *priv,
- const struct mcp251xfd_rx_ring *ring)
-{
- u8 rx_tail_chip, rx_tail;
- int err;
-
- if (!IS_ENABLED(CONFIG_CAN_MCP251XFD_SANITY))
- return 0;
-
- err = mcp251xfd_rx_tail_get_from_chip(priv, ring, &rx_tail_chip);
- if (err)
- return err;
-
- rx_tail = mcp251xfd_get_rx_tail(ring);
- if (rx_tail_chip != rx_tail) {
- netdev_err(priv->ndev,
- "RX tail of chip (%d) and ours (%d) inconsistent.\n",
- rx_tail_chip, rx_tail);
- return -EILSEQ;
- }
-
- return 0;
-}
-
-static int
-mcp251xfd_handle_tefif_recover(const struct mcp251xfd_priv *priv, const u32 seq)
-{
- const struct mcp251xfd_tx_ring *tx_ring = priv->tx;
- u32 tef_sta;
- int err;
-
- err = regmap_read(priv->map_reg, MCP251XFD_REG_TEFSTA, &tef_sta);
- if (err)
- return err;
-
- if (tef_sta & MCP251XFD_REG_TEFSTA_TEFOVIF) {
- netdev_err(priv->ndev,
- "Transmit Event FIFO buffer overflow.\n");
- return -ENOBUFS;
- }
-
- netdev_info(priv->ndev,
- "Transmit Event FIFO buffer %s. (seq=0x%08x, tef_tail=0x%08x, tef_head=0x%08x, tx_head=0x%08x).\n",
- tef_sta & MCP251XFD_REG_TEFSTA_TEFFIF ?
- "full" : tef_sta & MCP251XFD_REG_TEFSTA_TEFNEIF ?
- "not empty" : "empty",
- seq, priv->tef->tail, priv->tef->head, tx_ring->head);
-
- /* The Sequence Number in the TEF doesn't match our tef_tail. */
- return -EAGAIN;
-}
-
-static int
-mcp251xfd_handle_tefif_one(struct mcp251xfd_priv *priv,
- const struct mcp251xfd_hw_tef_obj *hw_tef_obj,
- unsigned int *frame_len_ptr)
-{
- struct net_device_stats *stats = &priv->ndev->stats;
- struct sk_buff *skb;
- u32 seq, seq_masked, tef_tail_masked, tef_tail;
-
- seq = FIELD_GET(MCP251XFD_OBJ_FLAGS_SEQ_MCP2518FD_MASK,
- hw_tef_obj->flags);
-
- /* Use the MCP2517FD mask on the MCP2518FD, too. We only
- * compare 7 bits, this should be enough to detect
- * net-yet-completed, i.e. old TEF objects.
- */
- seq_masked = seq &
- field_mask(MCP251XFD_OBJ_FLAGS_SEQ_MCP2517FD_MASK);
- tef_tail_masked = priv->tef->tail &
- field_mask(MCP251XFD_OBJ_FLAGS_SEQ_MCP2517FD_MASK);
- if (seq_masked != tef_tail_masked)
- return mcp251xfd_handle_tefif_recover(priv, seq);
-
- tef_tail = mcp251xfd_get_tef_tail(priv);
- skb = priv->can.echo_skb[tef_tail];
- if (skb)
- mcp251xfd_skb_set_timestamp(priv, skb, hw_tef_obj->ts);
- stats->tx_bytes +=
- can_rx_offload_get_echo_skb(&priv->offload,
- tef_tail, hw_tef_obj->ts,
- frame_len_ptr);
- stats->tx_packets++;
- priv->tef->tail++;
-
- return 0;
-}
-
-static int mcp251xfd_tef_ring_update(struct mcp251xfd_priv *priv)
-{
- const struct mcp251xfd_tx_ring *tx_ring = priv->tx;
- unsigned int new_head;
- u8 chip_tx_tail;
- int err;
-
- err = mcp251xfd_tx_tail_get_from_chip(priv, &chip_tx_tail);
- if (err)
- return err;
-
- /* chip_tx_tail, is the next TX-Object send by the HW.
- * The new TEF head must be >= the old head, ...
- */
- new_head = round_down(priv->tef->head, tx_ring->obj_num) + chip_tx_tail;
- if (new_head <= priv->tef->head)
- new_head += tx_ring->obj_num;
-
- /* ... but it cannot exceed the TX head. */
- priv->tef->head = min(new_head, tx_ring->head);
-
- return mcp251xfd_check_tef_tail(priv);
-}
-
-static inline int
-mcp251xfd_tef_obj_read(const struct mcp251xfd_priv *priv,
- struct mcp251xfd_hw_tef_obj *hw_tef_obj,
- const u8 offset, const u8 len)
-{
- const struct mcp251xfd_tx_ring *tx_ring = priv->tx;
- const int val_bytes = regmap_get_val_bytes(priv->map_rx);
-
- if (IS_ENABLED(CONFIG_CAN_MCP251XFD_SANITY) &&
- (offset > tx_ring->obj_num ||
- len > tx_ring->obj_num ||
- offset + len > tx_ring->obj_num)) {
- netdev_err(priv->ndev,
- "Trying to read to many TEF objects (max=%d, offset=%d, len=%d).\n",
- tx_ring->obj_num, offset, len);
- return -ERANGE;
- }
-
- return regmap_bulk_read(priv->map_rx,
- mcp251xfd_get_tef_obj_addr(offset),
- hw_tef_obj,
- sizeof(*hw_tef_obj) / val_bytes * len);
-}
-
-static int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv)
-{
- struct mcp251xfd_hw_tef_obj hw_tef_obj[MCP251XFD_TX_OBJ_NUM_MAX];
- unsigned int total_frame_len = 0;
- u8 tef_tail, len, l;
- int err, i;
-
- err = mcp251xfd_tef_ring_update(priv);
- if (err)
- return err;
-
- tef_tail = mcp251xfd_get_tef_tail(priv);
- len = mcp251xfd_get_tef_len(priv);
- l = mcp251xfd_get_tef_linear_len(priv);
- err = mcp251xfd_tef_obj_read(priv, hw_tef_obj, tef_tail, l);
- if (err)
- return err;
-
- if (l < len) {
- err = mcp251xfd_tef_obj_read(priv, &hw_tef_obj[l], 0, len - l);
- if (err)
- return err;
- }
-
- for (i = 0; i < len; i++) {
- unsigned int frame_len = 0;
-
- err = mcp251xfd_handle_tefif_one(priv, &hw_tef_obj[i], &frame_len);
- /* -EAGAIN means the Sequence Number in the TEF
- * doesn't match our tef_tail. This can happen if we
- * read the TEF objects too early. Leave loop let the
- * interrupt handler call us again.
- */
- if (err == -EAGAIN)
- goto out_netif_wake_queue;
- if (err)
- return err;
-
- total_frame_len += frame_len;
- }
-
- out_netif_wake_queue:
- len = i; /* number of handled goods TEFs */
- if (len) {
- struct mcp251xfd_tef_ring *ring = priv->tef;
- struct mcp251xfd_tx_ring *tx_ring = priv->tx;
- int offset;
-
- /* Increment the TEF FIFO tail pointer 'len' times in
- * a single SPI message.
- *
- * Note:
- * Calculate offset, so that the SPI transfer ends on
- * the last message of the uinc_xfer array, which has
- * "cs_change == 0", to properly deactivate the chip
- * select.
- */
- offset = ARRAY_SIZE(ring->uinc_xfer) - len;
- err = spi_sync_transfer(priv->spi,
- ring->uinc_xfer + offset, len);
- if (err)
- return err;
-
- tx_ring->tail += len;
- netdev_completed_queue(priv->ndev, len, total_frame_len);
-
- err = mcp251xfd_check_tef_tail(priv);
- if (err)
- return err;
- }
-
- mcp251xfd_ecc_tefif_successful(priv);
-
- if (mcp251xfd_get_tx_free(priv->tx)) {
- /* Make sure that anybody stopping the queue after
- * this sees the new tx_ring->tail.
- */
- smp_mb();
- netif_wake_queue(priv->ndev);
- }
-
- return 0;
-}
-
-static int
-mcp251xfd_rx_ring_update(const struct mcp251xfd_priv *priv,
- struct mcp251xfd_rx_ring *ring)
-{
- u32 new_head;
- u8 chip_rx_head;
- int err;
-
- err = mcp251xfd_rx_head_get_from_chip(priv, ring, &chip_rx_head);
- if (err)
- return err;
-
- /* chip_rx_head, is the next RX-Object filled by the HW.
- * The new RX head must be >= the old head.
- */
- new_head = round_down(ring->head, ring->obj_num) + chip_rx_head;
- if (new_head <= ring->head)
- new_head += ring->obj_num;
-
- ring->head = new_head;
-
- return mcp251xfd_check_rx_tail(priv, ring);
-}
-
-static void
-mcp251xfd_hw_rx_obj_to_skb(const struct mcp251xfd_priv *priv,
- const struct mcp251xfd_hw_rx_obj_canfd *hw_rx_obj,
- struct sk_buff *skb)
-{
- struct canfd_frame *cfd = (struct canfd_frame *)skb->data;
- u8 dlc;
-
- if (hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_IDE) {
- u32 sid, eid;
-
- eid = FIELD_GET(MCP251XFD_OBJ_ID_EID_MASK, hw_rx_obj->id);
- sid = FIELD_GET(MCP251XFD_OBJ_ID_SID_MASK, hw_rx_obj->id);
-
- cfd->can_id = CAN_EFF_FLAG |
- FIELD_PREP(MCP251XFD_REG_FRAME_EFF_EID_MASK, eid) |
- FIELD_PREP(MCP251XFD_REG_FRAME_EFF_SID_MASK, sid);
- } else {
- cfd->can_id = FIELD_GET(MCP251XFD_OBJ_ID_SID_MASK,
- hw_rx_obj->id);
- }
-
- dlc = FIELD_GET(MCP251XFD_OBJ_FLAGS_DLC_MASK, hw_rx_obj->flags);
-
- /* CANFD */
- if (hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_FDF) {
-
- if (hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_ESI)
- cfd->flags |= CANFD_ESI;
-
- if (hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_BRS)
- cfd->flags |= CANFD_BRS;
-
- cfd->len = can_fd_dlc2len(dlc);
- } else {
- if (hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_RTR)
- cfd->can_id |= CAN_RTR_FLAG;
-
- can_frame_set_cc_len((struct can_frame *)cfd, dlc,
- priv->can.ctrlmode);
- }
-
- if (!(hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_RTR))
- memcpy(cfd->data, hw_rx_obj->data, cfd->len);
-
- mcp251xfd_skb_set_timestamp(priv, skb, hw_rx_obj->ts);
-}
-
-static int
-mcp251xfd_handle_rxif_one(struct mcp251xfd_priv *priv,
- struct mcp251xfd_rx_ring *ring,
- const struct mcp251xfd_hw_rx_obj_canfd *hw_rx_obj)
-{
- struct net_device_stats *stats = &priv->ndev->stats;
- struct sk_buff *skb;
- struct canfd_frame *cfd;
- int err;
-
- if (hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_FDF)
- skb = alloc_canfd_skb(priv->ndev, &cfd);
- else
- skb = alloc_can_skb(priv->ndev, (struct can_frame **)&cfd);
-
- if (!skb) {
- stats->rx_dropped++;
- return 0;
- }
-
- mcp251xfd_hw_rx_obj_to_skb(priv, hw_rx_obj, skb);
- err = can_rx_offload_queue_sorted(&priv->offload, skb, hw_rx_obj->ts);
- if (err)
- stats->rx_fifo_errors++;
-
- return 0;
-}
-
-static inline int
-mcp251xfd_rx_obj_read(const struct mcp251xfd_priv *priv,
- const struct mcp251xfd_rx_ring *ring,
- struct mcp251xfd_hw_rx_obj_canfd *hw_rx_obj,
- const u8 offset, const u8 len)
-{
- const int val_bytes = regmap_get_val_bytes(priv->map_rx);
- int err;
-
- err = regmap_bulk_read(priv->map_rx,
- mcp251xfd_get_rx_obj_addr(ring, offset),
- hw_rx_obj,
- len * ring->obj_size / val_bytes);
-
- return err;
-}
-
-static int
-mcp251xfd_handle_rxif_ring(struct mcp251xfd_priv *priv,
- struct mcp251xfd_rx_ring *ring)
-{
- struct mcp251xfd_hw_rx_obj_canfd *hw_rx_obj = ring->obj;
- u8 rx_tail, len;
- int err, i;
-
- err = mcp251xfd_rx_ring_update(priv, ring);
- if (err)
- return err;
-
- while ((len = mcp251xfd_get_rx_linear_len(ring))) {
- int offset;
-
- rx_tail = mcp251xfd_get_rx_tail(ring);
-
- err = mcp251xfd_rx_obj_read(priv, ring, hw_rx_obj,
- rx_tail, len);
- if (err)
- return err;
-
- for (i = 0; i < len; i++) {
- err = mcp251xfd_handle_rxif_one(priv, ring,
- (void *)hw_rx_obj +
- i * ring->obj_size);
- if (err)
- return err;
- }
-
- /* Increment the RX FIFO tail pointer 'len' times in a
- * single SPI message.
- *
- * Note:
- * Calculate offset, so that the SPI transfer ends on
- * the last message of the uinc_xfer array, which has
- * "cs_change == 0", to properly deactivate the chip
- * select.
- */
- offset = ARRAY_SIZE(ring->uinc_xfer) - len;
- err = spi_sync_transfer(priv->spi,
- ring->uinc_xfer + offset, len);
- if (err)
- return err;
-
- ring->tail += len;
- }
-
- return 0;
-}
-
-static int mcp251xfd_handle_rxif(struct mcp251xfd_priv *priv)
-{
- struct mcp251xfd_rx_ring *ring;
- int err, n;
-
- mcp251xfd_for_each_rx_ring(priv, ring, n) {
- err = mcp251xfd_handle_rxif_ring(priv, ring);
- if (err)
- return err;
- }
-
- return 0;
-}
-
static struct sk_buff *
mcp251xfd_alloc_can_err_skb(struct mcp251xfd_priv *priv,
struct can_frame **cf, u32 *timestamp)
@@ -1653,12 +790,15 @@ static int mcp251xfd_handle_rxovif(struct mcp251xfd_priv *priv)
/* If SERRIF is active, there was a RX MAB overflow. */
if (priv->regs_status.intf & MCP251XFD_REG_INT_SERRIF) {
- netdev_info(priv->ndev,
- "RX-%d: MAB overflow detected.\n",
- ring->nr);
+ if (net_ratelimit())
+ netdev_dbg(priv->ndev,
+ "RX-%d: MAB overflow detected.\n",
+ ring->nr);
} else {
- netdev_info(priv->ndev,
- "RX-%d: FIFO overflow.\n", ring->nr);
+ if (net_ratelimit())
+ netdev_dbg(priv->ndev,
+ "RX-%d: FIFO overflow.\n",
+ ring->nr);
}
err = regmap_update_bits(priv->map_reg,
@@ -2311,212 +1451,23 @@ static irqreturn_t mcp251xfd_irq(int irq, void *dev_id)
return handled;
}
-static inline struct
-mcp251xfd_tx_obj *mcp251xfd_get_tx_obj_next(struct mcp251xfd_tx_ring *tx_ring)
-{
- u8 tx_head;
-
- tx_head = mcp251xfd_get_tx_head(tx_ring);
-
- return &tx_ring->obj[tx_head];
-}
-
-static void
-mcp251xfd_tx_obj_from_skb(const struct mcp251xfd_priv *priv,
- struct mcp251xfd_tx_obj *tx_obj,
- const struct sk_buff *skb,
- unsigned int seq)
-{
- const struct canfd_frame *cfd = (struct canfd_frame *)skb->data;
- struct mcp251xfd_hw_tx_obj_raw *hw_tx_obj;
- union mcp251xfd_tx_obj_load_buf *load_buf;
- u8 dlc;
- u32 id, flags;
- int len_sanitized = 0, len;
-
- if (cfd->can_id & CAN_EFF_FLAG) {
- u32 sid, eid;
-
- sid = FIELD_GET(MCP251XFD_REG_FRAME_EFF_SID_MASK, cfd->can_id);
- eid = FIELD_GET(MCP251XFD_REG_FRAME_EFF_EID_MASK, cfd->can_id);
-
- id = FIELD_PREP(MCP251XFD_OBJ_ID_EID_MASK, eid) |
- FIELD_PREP(MCP251XFD_OBJ_ID_SID_MASK, sid);
-
- flags = MCP251XFD_OBJ_FLAGS_IDE;
- } else {
- id = FIELD_PREP(MCP251XFD_OBJ_ID_SID_MASK, cfd->can_id);
- flags = 0;
- }
-
- /* Use the MCP2518FD mask even on the MCP2517FD. It doesn't
- * harm, only the lower 7 bits will be transferred into the
- * TEF object.
- */
- flags |= FIELD_PREP(MCP251XFD_OBJ_FLAGS_SEQ_MCP2518FD_MASK, seq);
-
- if (cfd->can_id & CAN_RTR_FLAG)
- flags |= MCP251XFD_OBJ_FLAGS_RTR;
- else
- len_sanitized = canfd_sanitize_len(cfd->len);
-
- /* CANFD */
- if (can_is_canfd_skb(skb)) {
- if (cfd->flags & CANFD_ESI)
- flags |= MCP251XFD_OBJ_FLAGS_ESI;
-
- flags |= MCP251XFD_OBJ_FLAGS_FDF;
-
- if (cfd->flags & CANFD_BRS)
- flags |= MCP251XFD_OBJ_FLAGS_BRS;
-
- dlc = can_fd_len2dlc(cfd->len);
- } else {
- dlc = can_get_cc_dlc((struct can_frame *)cfd,
- priv->can.ctrlmode);
- }
-
- flags |= FIELD_PREP(MCP251XFD_OBJ_FLAGS_DLC_MASK, dlc);
-
- load_buf = &tx_obj->buf;
- if (priv->devtype_data.quirks & MCP251XFD_QUIRK_CRC_TX)
- hw_tx_obj = &load_buf->crc.hw_tx_obj;
- else
- hw_tx_obj = &load_buf->nocrc.hw_tx_obj;
-
- put_unaligned_le32(id, &hw_tx_obj->id);
- put_unaligned_le32(flags, &hw_tx_obj->flags);
-
- /* Copy data */
- memcpy(hw_tx_obj->data, cfd->data, cfd->len);
-
- /* Clear unused data at end of CAN frame */
- if (MCP251XFD_SANITIZE_CAN && len_sanitized) {
- int pad_len;
-
- pad_len = len_sanitized - cfd->len;
- if (pad_len)
- memset(hw_tx_obj->data + cfd->len, 0x0, pad_len);
- }
-
- /* Number of bytes to be written into the RAM of the controller */
- len = sizeof(hw_tx_obj->id) + sizeof(hw_tx_obj->flags);
- if (MCP251XFD_SANITIZE_CAN)
- len += round_up(len_sanitized, sizeof(u32));
- else
- len += round_up(cfd->len, sizeof(u32));
-
- if (priv->devtype_data.quirks & MCP251XFD_QUIRK_CRC_TX) {
- u16 crc;
-
- mcp251xfd_spi_cmd_crc_set_len_in_ram(&load_buf->crc.cmd,
- len);
- /* CRC */
- len += sizeof(load_buf->crc.cmd);
- crc = mcp251xfd_crc16_compute(&load_buf->crc, len);
- put_unaligned_be16(crc, (void *)load_buf + len);
-
- /* Total length */
- len += sizeof(load_buf->crc.crc);
- } else {
- len += sizeof(load_buf->nocrc.cmd);
- }
-
- tx_obj->xfer[0].len = len;
-}
-
-static int mcp251xfd_tx_obj_write(const struct mcp251xfd_priv *priv,
- struct mcp251xfd_tx_obj *tx_obj)
-{
- return spi_async(priv->spi, &tx_obj->msg);
-}
-
-static bool mcp251xfd_tx_busy(const struct mcp251xfd_priv *priv,
- struct mcp251xfd_tx_ring *tx_ring)
-{
- if (mcp251xfd_get_tx_free(tx_ring) > 0)
- return false;
-
- netif_stop_queue(priv->ndev);
-
- /* Memory barrier before checking tx_free (head and tail) */
- smp_mb();
-
- if (mcp251xfd_get_tx_free(tx_ring) == 0) {
- netdev_dbg(priv->ndev,
- "Stopping tx-queue (tx_head=0x%08x, tx_tail=0x%08x, len=%d).\n",
- tx_ring->head, tx_ring->tail,
- tx_ring->head - tx_ring->tail);
-
- return true;
- }
-
- netif_start_queue(priv->ndev);
-
- return false;
-}
-
-static netdev_tx_t mcp251xfd_start_xmit(struct sk_buff *skb,
- struct net_device *ndev)
-{
- struct mcp251xfd_priv *priv = netdev_priv(ndev);
- struct mcp251xfd_tx_ring *tx_ring = priv->tx;
- struct mcp251xfd_tx_obj *tx_obj;
- unsigned int frame_len;
- u8 tx_head;
- int err;
-
- if (can_dropped_invalid_skb(ndev, skb))
- return NETDEV_TX_OK;
-
- if (mcp251xfd_tx_busy(priv, tx_ring))
- return NETDEV_TX_BUSY;
-
- tx_obj = mcp251xfd_get_tx_obj_next(tx_ring);
- mcp251xfd_tx_obj_from_skb(priv, tx_obj, skb, tx_ring->head);
-
- /* Stop queue if we occupy the complete TX FIFO */
- tx_head = mcp251xfd_get_tx_head(tx_ring);
- tx_ring->head++;
- if (mcp251xfd_get_tx_free(tx_ring) == 0)
- netif_stop_queue(ndev);
-
- frame_len = can_skb_get_frame_len(skb);
- err = can_put_echo_skb(skb, ndev, tx_head, frame_len);
- if (!err)
- netdev_sent_queue(priv->ndev, frame_len);
-
- err = mcp251xfd_tx_obj_write(priv, tx_obj);
- if (err)
- goto out_err;
-
- return NETDEV_TX_OK;
-
- out_err:
- netdev_err(priv->ndev, "ERROR in %s: %d\n", __func__, err);
-
- return NETDEV_TX_OK;
-}
-
static int mcp251xfd_open(struct net_device *ndev)
{
struct mcp251xfd_priv *priv = netdev_priv(ndev);
const struct spi_device *spi = priv->spi;
int err;
- err = pm_runtime_get_sync(ndev->dev.parent);
- if (err < 0) {
- pm_runtime_put_noidle(ndev->dev.parent);
+ err = open_candev(ndev);
+ if (err)
return err;
- }
- err = open_candev(ndev);
+ err = pm_runtime_resume_and_get(ndev->dev.parent);
if (err)
- goto out_pm_runtime_put;
+ goto out_close_candev;
err = mcp251xfd_ring_alloc(priv);
if (err)
- goto out_close_candev;
+ goto out_pm_runtime_put;
err = mcp251xfd_transceiver_enable(priv);
if (err)
@@ -2552,11 +1503,11 @@ static int mcp251xfd_open(struct net_device *ndev)
mcp251xfd_transceiver_disable(priv);
out_mcp251xfd_ring_free:
mcp251xfd_ring_free(priv);
- out_close_candev:
- close_candev(ndev);
out_pm_runtime_put:
mcp251xfd_chip_stop(priv, CAN_STATE_STOPPED);
pm_runtime_put(ndev->dev.parent);
+ out_close_candev:
+ close_candev(ndev);
return err;
}
@@ -2625,7 +1576,7 @@ static int mcp251xfd_register_chip_detect(struct mcp251xfd_priv *priv)
if (!mcp251xfd_is_251X(priv) &&
priv->devtype_data.model != devtype_data->model) {
netdev_info(ndev,
- "Detected %s, but firmware specifies a %s. Fixing up.",
+ "Detected %s, but firmware specifies a %s. Fixing up.\n",
__mcp251xfd_get_model_str(devtype_data->model),
mcp251xfd_get_model_str(priv));
}
@@ -2662,7 +1613,7 @@ static int mcp251xfd_register_check_rx_int(struct mcp251xfd_priv *priv)
return 0;
netdev_info(priv->ndev,
- "RX_INT active after softreset, disabling RX_INT support.");
+ "RX_INT active after softreset, disabling RX_INT support.\n");
devm_gpiod_put(&priv->spi->dev, priv->rx_int);
priv->rx_int = NULL;
diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-regmap.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-regmap.c
index 297491516a26..7b120c716228 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-regmap.c
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-regmap.c
@@ -250,7 +250,6 @@ mcp251xfd_regmap_crc_read_check_crc(const struct mcp251xfd_map_buf_crc * const b
return 0;
}
-
static int
mcp251xfd_regmap_crc_read_one(struct mcp251xfd_priv *priv,
struct spi_message *msg, unsigned int data_len)
diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c
new file mode 100644
index 000000000000..92f9e9b01289
--- /dev/null
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c
@@ -0,0 +1,269 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// mcp251xfd - Microchip MCP251xFD Family CAN controller driver
+//
+// Copyright (c) 2019, 2020, 2021 Pengutronix,
+// Marc Kleine-Budde <kernel@pengutronix.de>
+//
+// Based on:
+//
+// CAN bus driver for Microchip 25XXFD CAN Controller with SPI Interface
+//
+// Copyright (c) 2019 Martin Sperl <kernel@martin.sperl.org>
+//
+
+#include <asm/unaligned.h>
+
+#include "mcp251xfd.h"
+
+static inline u8
+mcp251xfd_cmd_prepare_write_reg(const struct mcp251xfd_priv *priv,
+ union mcp251xfd_write_reg_buf *write_reg_buf,
+ const u16 reg, const u32 mask, const u32 val)
+{
+ u8 first_byte, last_byte, len;
+ u8 *data;
+ __le32 val_le32;
+
+ first_byte = mcp251xfd_first_byte_set(mask);
+ last_byte = mcp251xfd_last_byte_set(mask);
+ len = last_byte - first_byte + 1;
+
+ data = mcp251xfd_spi_cmd_write(priv, write_reg_buf, reg + first_byte);
+ val_le32 = cpu_to_le32(val >> BITS_PER_BYTE * first_byte);
+ memcpy(data, &val_le32, len);
+
+ if (priv->devtype_data.quirks & MCP251XFD_QUIRK_CRC_REG) {
+ u16 crc;
+
+ mcp251xfd_spi_cmd_crc_set_len_in_reg(&write_reg_buf->crc.cmd,
+ len);
+ /* CRC */
+ len += sizeof(write_reg_buf->crc.cmd);
+ crc = mcp251xfd_crc16_compute(&write_reg_buf->crc, len);
+ put_unaligned_be16(crc, (void *)write_reg_buf + len);
+
+ /* Total length */
+ len += sizeof(write_reg_buf->crc.crc);
+ } else {
+ len += sizeof(write_reg_buf->nocrc.cmd);
+ }
+
+ return len;
+}
+
+static void
+mcp251xfd_tx_ring_init_tx_obj(const struct mcp251xfd_priv *priv,
+ const struct mcp251xfd_tx_ring *ring,
+ struct mcp251xfd_tx_obj *tx_obj,
+ const u8 rts_buf_len,
+ const u8 n)
+{
+ struct spi_transfer *xfer;
+ u16 addr;
+
+ /* FIFO load */
+ addr = mcp251xfd_get_tx_obj_addr(ring, n);
+ if (priv->devtype_data.quirks & MCP251XFD_QUIRK_CRC_TX)
+ mcp251xfd_spi_cmd_write_crc_set_addr(&tx_obj->buf.crc.cmd,
+ addr);
+ else
+ mcp251xfd_spi_cmd_write_nocrc(&tx_obj->buf.nocrc.cmd,
+ addr);
+
+ xfer = &tx_obj->xfer[0];
+ xfer->tx_buf = &tx_obj->buf;
+ xfer->len = 0; /* actual len is assigned on the fly */
+ xfer->cs_change = 1;
+ xfer->cs_change_delay.value = 0;
+ xfer->cs_change_delay.unit = SPI_DELAY_UNIT_NSECS;
+
+ /* FIFO request to send */
+ xfer = &tx_obj->xfer[1];
+ xfer->tx_buf = &ring->rts_buf;
+ xfer->len = rts_buf_len;
+
+ /* SPI message */
+ spi_message_init_with_transfers(&tx_obj->msg, tx_obj->xfer,
+ ARRAY_SIZE(tx_obj->xfer));
+}
+
+void mcp251xfd_ring_init(struct mcp251xfd_priv *priv)
+{
+ struct mcp251xfd_tef_ring *tef_ring;
+ struct mcp251xfd_tx_ring *tx_ring;
+ struct mcp251xfd_rx_ring *rx_ring, *prev_rx_ring = NULL;
+ struct mcp251xfd_tx_obj *tx_obj;
+ struct spi_transfer *xfer;
+ u32 val;
+ u16 addr;
+ u8 len;
+ int i, j;
+
+ netdev_reset_queue(priv->ndev);
+
+ /* TEF */
+ tef_ring = priv->tef;
+ tef_ring->head = 0;
+ tef_ring->tail = 0;
+
+ /* FIFO increment TEF tail pointer */
+ addr = MCP251XFD_REG_TEFCON;
+ val = MCP251XFD_REG_TEFCON_UINC;
+ len = mcp251xfd_cmd_prepare_write_reg(priv, &tef_ring->uinc_buf,
+ addr, val, val);
+
+ for (j = 0; j < ARRAY_SIZE(tef_ring->uinc_xfer); j++) {
+ xfer = &tef_ring->uinc_xfer[j];
+ xfer->tx_buf = &tef_ring->uinc_buf;
+ xfer->len = len;
+ xfer->cs_change = 1;
+ xfer->cs_change_delay.value = 0;
+ xfer->cs_change_delay.unit = SPI_DELAY_UNIT_NSECS;
+ }
+
+ /* "cs_change == 1" on the last transfer results in an active
+ * chip select after the complete SPI message. This causes the
+ * controller to interpret the next register access as
+ * data. Set "cs_change" of the last transfer to "0" to
+ * properly deactivate the chip select at the end of the
+ * message.
+ */
+ xfer->cs_change = 0;
+
+ /* TX */
+ tx_ring = priv->tx;
+ tx_ring->head = 0;
+ tx_ring->tail = 0;
+ tx_ring->base = mcp251xfd_get_tef_obj_addr(tx_ring->obj_num);
+
+ /* FIFO request to send */
+ addr = MCP251XFD_REG_FIFOCON(MCP251XFD_TX_FIFO);
+ val = MCP251XFD_REG_FIFOCON_TXREQ | MCP251XFD_REG_FIFOCON_UINC;
+ len = mcp251xfd_cmd_prepare_write_reg(priv, &tx_ring->rts_buf,
+ addr, val, val);
+
+ mcp251xfd_for_each_tx_obj(tx_ring, tx_obj, i)
+ mcp251xfd_tx_ring_init_tx_obj(priv, tx_ring, tx_obj, len, i);
+
+ /* RX */
+ mcp251xfd_for_each_rx_ring(priv, rx_ring, i) {
+ rx_ring->head = 0;
+ rx_ring->tail = 0;
+ rx_ring->nr = i;
+ rx_ring->fifo_nr = MCP251XFD_RX_FIFO(i);
+
+ if (!prev_rx_ring)
+ rx_ring->base =
+ mcp251xfd_get_tx_obj_addr(tx_ring,
+ tx_ring->obj_num);
+ else
+ rx_ring->base = prev_rx_ring->base +
+ prev_rx_ring->obj_size *
+ prev_rx_ring->obj_num;
+
+ prev_rx_ring = rx_ring;
+
+ /* FIFO increment RX tail pointer */
+ addr = MCP251XFD_REG_FIFOCON(rx_ring->fifo_nr);
+ val = MCP251XFD_REG_FIFOCON_UINC;
+ len = mcp251xfd_cmd_prepare_write_reg(priv, &rx_ring->uinc_buf,
+ addr, val, val);
+
+ for (j = 0; j < ARRAY_SIZE(rx_ring->uinc_xfer); j++) {
+ xfer = &rx_ring->uinc_xfer[j];
+ xfer->tx_buf = &rx_ring->uinc_buf;
+ xfer->len = len;
+ xfer->cs_change = 1;
+ xfer->cs_change_delay.value = 0;
+ xfer->cs_change_delay.unit = SPI_DELAY_UNIT_NSECS;
+ }
+
+ /* "cs_change == 1" on the last transfer results in an
+ * active chip select after the complete SPI
+ * message. This causes the controller to interpret
+ * the next register access as data. Set "cs_change"
+ * of the last transfer to "0" to properly deactivate
+ * the chip select at the end of the message.
+ */
+ xfer->cs_change = 0;
+ }
+}
+
+void mcp251xfd_ring_free(struct mcp251xfd_priv *priv)
+{
+ int i;
+
+ for (i = ARRAY_SIZE(priv->rx) - 1; i >= 0; i--) {
+ kfree(priv->rx[i]);
+ priv->rx[i] = NULL;
+ }
+}
+
+int mcp251xfd_ring_alloc(struct mcp251xfd_priv *priv)
+{
+ struct mcp251xfd_tx_ring *tx_ring;
+ struct mcp251xfd_rx_ring *rx_ring;
+ int tef_obj_size, tx_obj_size, rx_obj_size;
+ int tx_obj_num;
+ int ram_free, i;
+
+ tef_obj_size = sizeof(struct mcp251xfd_hw_tef_obj);
+ if (mcp251xfd_is_fd_mode(priv)) {
+ tx_obj_num = MCP251XFD_TX_OBJ_NUM_CANFD;
+ tx_obj_size = sizeof(struct mcp251xfd_hw_tx_obj_canfd);
+ rx_obj_size = sizeof(struct mcp251xfd_hw_rx_obj_canfd);
+ } else {
+ tx_obj_num = MCP251XFD_TX_OBJ_NUM_CAN;
+ tx_obj_size = sizeof(struct mcp251xfd_hw_tx_obj_can);
+ rx_obj_size = sizeof(struct mcp251xfd_hw_rx_obj_can);
+ }
+
+ tx_ring = priv->tx;
+ tx_ring->obj_num = tx_obj_num;
+ tx_ring->obj_size = tx_obj_size;
+
+ ram_free = MCP251XFD_RAM_SIZE - tx_obj_num *
+ (tef_obj_size + tx_obj_size);
+
+ for (i = 0;
+ i < ARRAY_SIZE(priv->rx) && ram_free >= rx_obj_size;
+ i++) {
+ int rx_obj_num;
+
+ rx_obj_num = ram_free / rx_obj_size;
+ rx_obj_num = min(1 << (fls(rx_obj_num) - 1),
+ MCP251XFD_RX_OBJ_NUM_MAX);
+
+ rx_ring = kzalloc(sizeof(*rx_ring) + rx_obj_size * rx_obj_num,
+ GFP_KERNEL);
+ if (!rx_ring) {
+ mcp251xfd_ring_free(priv);
+ return -ENOMEM;
+ }
+ rx_ring->obj_num = rx_obj_num;
+ rx_ring->obj_size = rx_obj_size;
+ priv->rx[i] = rx_ring;
+
+ ram_free -= rx_ring->obj_num * rx_ring->obj_size;
+ }
+ priv->rx_ring_num = i;
+
+ netdev_dbg(priv->ndev,
+ "FIFO setup: TEF: %d*%d bytes = %d bytes, TX: %d*%d bytes = %d bytes\n",
+ tx_obj_num, tef_obj_size, tef_obj_size * tx_obj_num,
+ tx_obj_num, tx_obj_size, tx_obj_size * tx_obj_num);
+
+ mcp251xfd_for_each_rx_ring(priv, rx_ring, i) {
+ netdev_dbg(priv->ndev,
+ "FIFO setup: RX-%d: %d*%d bytes = %d bytes\n",
+ i, rx_ring->obj_num, rx_ring->obj_size,
+ rx_ring->obj_size * rx_ring->obj_num);
+ }
+
+ netdev_dbg(priv->ndev,
+ "FIFO setup: free: %d bytes\n",
+ ram_free);
+
+ return 0;
+}
diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c
new file mode 100644
index 000000000000..63f2526464b3
--- /dev/null
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c
@@ -0,0 +1,260 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// mcp251xfd - Microchip MCP251xFD Family CAN controller driver
+//
+// Copyright (c) 2019, 2020, 2021 Pengutronix,
+// Marc Kleine-Budde <kernel@pengutronix.de>
+//
+// Based on:
+//
+// CAN bus driver for Microchip 25XXFD CAN Controller with SPI Interface
+//
+// Copyright (c) 2019 Martin Sperl <kernel@martin.sperl.org>
+//
+
+#include <linux/bitfield.h>
+
+#include "mcp251xfd.h"
+
+static inline int
+mcp251xfd_rx_head_get_from_chip(const struct mcp251xfd_priv *priv,
+ const struct mcp251xfd_rx_ring *ring,
+ u8 *rx_head)
+{
+ u32 fifo_sta;
+ int err;
+
+ err = regmap_read(priv->map_reg, MCP251XFD_REG_FIFOSTA(ring->fifo_nr),
+ &fifo_sta);
+ if (err)
+ return err;
+
+ *rx_head = FIELD_GET(MCP251XFD_REG_FIFOSTA_FIFOCI_MASK, fifo_sta);
+
+ return 0;
+}
+
+static inline int
+mcp251xfd_rx_tail_get_from_chip(const struct mcp251xfd_priv *priv,
+ const struct mcp251xfd_rx_ring *ring,
+ u8 *rx_tail)
+{
+ u32 fifo_ua;
+ int err;
+
+ err = regmap_read(priv->map_reg, MCP251XFD_REG_FIFOUA(ring->fifo_nr),
+ &fifo_ua);
+ if (err)
+ return err;
+
+ fifo_ua -= ring->base - MCP251XFD_RAM_START;
+ *rx_tail = fifo_ua / ring->obj_size;
+
+ return 0;
+}
+
+static int
+mcp251xfd_check_rx_tail(const struct mcp251xfd_priv *priv,
+ const struct mcp251xfd_rx_ring *ring)
+{
+ u8 rx_tail_chip, rx_tail;
+ int err;
+
+ if (!IS_ENABLED(CONFIG_CAN_MCP251XFD_SANITY))
+ return 0;
+
+ err = mcp251xfd_rx_tail_get_from_chip(priv, ring, &rx_tail_chip);
+ if (err)
+ return err;
+
+ rx_tail = mcp251xfd_get_rx_tail(ring);
+ if (rx_tail_chip != rx_tail) {
+ netdev_err(priv->ndev,
+ "RX tail of chip (%d) and ours (%d) inconsistent.\n",
+ rx_tail_chip, rx_tail);
+ return -EILSEQ;
+ }
+
+ return 0;
+}
+
+static int
+mcp251xfd_rx_ring_update(const struct mcp251xfd_priv *priv,
+ struct mcp251xfd_rx_ring *ring)
+{
+ u32 new_head;
+ u8 chip_rx_head;
+ int err;
+
+ err = mcp251xfd_rx_head_get_from_chip(priv, ring, &chip_rx_head);
+ if (err)
+ return err;
+
+ /* chip_rx_head, is the next RX-Object filled by the HW.
+ * The new RX head must be >= the old head.
+ */
+ new_head = round_down(ring->head, ring->obj_num) + chip_rx_head;
+ if (new_head <= ring->head)
+ new_head += ring->obj_num;
+
+ ring->head = new_head;
+
+ return mcp251xfd_check_rx_tail(priv, ring);
+}
+
+static void
+mcp251xfd_hw_rx_obj_to_skb(const struct mcp251xfd_priv *priv,
+ const struct mcp251xfd_hw_rx_obj_canfd *hw_rx_obj,
+ struct sk_buff *skb)
+{
+ struct canfd_frame *cfd = (struct canfd_frame *)skb->data;
+ u8 dlc;
+
+ if (hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_IDE) {
+ u32 sid, eid;
+
+ eid = FIELD_GET(MCP251XFD_OBJ_ID_EID_MASK, hw_rx_obj->id);
+ sid = FIELD_GET(MCP251XFD_OBJ_ID_SID_MASK, hw_rx_obj->id);
+
+ cfd->can_id = CAN_EFF_FLAG |
+ FIELD_PREP(MCP251XFD_REG_FRAME_EFF_EID_MASK, eid) |
+ FIELD_PREP(MCP251XFD_REG_FRAME_EFF_SID_MASK, sid);
+ } else {
+ cfd->can_id = FIELD_GET(MCP251XFD_OBJ_ID_SID_MASK,
+ hw_rx_obj->id);
+ }
+
+ dlc = FIELD_GET(MCP251XFD_OBJ_FLAGS_DLC_MASK, hw_rx_obj->flags);
+
+ /* CANFD */
+ if (hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_FDF) {
+ if (hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_ESI)
+ cfd->flags |= CANFD_ESI;
+
+ if (hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_BRS)
+ cfd->flags |= CANFD_BRS;
+
+ cfd->len = can_fd_dlc2len(dlc);
+ } else {
+ if (hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_RTR)
+ cfd->can_id |= CAN_RTR_FLAG;
+
+ can_frame_set_cc_len((struct can_frame *)cfd, dlc,
+ priv->can.ctrlmode);
+ }
+
+ if (!(hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_RTR))
+ memcpy(cfd->data, hw_rx_obj->data, cfd->len);
+
+ mcp251xfd_skb_set_timestamp(priv, skb, hw_rx_obj->ts);
+}
+
+static int
+mcp251xfd_handle_rxif_one(struct mcp251xfd_priv *priv,
+ struct mcp251xfd_rx_ring *ring,
+ const struct mcp251xfd_hw_rx_obj_canfd *hw_rx_obj)
+{
+ struct net_device_stats *stats = &priv->ndev->stats;
+ struct sk_buff *skb;
+ struct canfd_frame *cfd;
+ int err;
+
+ if (hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_FDF)
+ skb = alloc_canfd_skb(priv->ndev, &cfd);
+ else
+ skb = alloc_can_skb(priv->ndev, (struct can_frame **)&cfd);
+
+ if (!skb) {
+ stats->rx_dropped++;
+ return 0;
+ }
+
+ mcp251xfd_hw_rx_obj_to_skb(priv, hw_rx_obj, skb);
+ err = can_rx_offload_queue_sorted(&priv->offload, skb, hw_rx_obj->ts);
+ if (err)
+ stats->rx_fifo_errors++;
+
+ return 0;
+}
+
+static inline int
+mcp251xfd_rx_obj_read(const struct mcp251xfd_priv *priv,
+ const struct mcp251xfd_rx_ring *ring,
+ struct mcp251xfd_hw_rx_obj_canfd *hw_rx_obj,
+ const u8 offset, const u8 len)
+{
+ const int val_bytes = regmap_get_val_bytes(priv->map_rx);
+ int err;
+
+ err = regmap_bulk_read(priv->map_rx,
+ mcp251xfd_get_rx_obj_addr(ring, offset),
+ hw_rx_obj,
+ len * ring->obj_size / val_bytes);
+
+ return err;
+}
+
+static int
+mcp251xfd_handle_rxif_ring(struct mcp251xfd_priv *priv,
+ struct mcp251xfd_rx_ring *ring)
+{
+ struct mcp251xfd_hw_rx_obj_canfd *hw_rx_obj = ring->obj;
+ u8 rx_tail, len;
+ int err, i;
+
+ err = mcp251xfd_rx_ring_update(priv, ring);
+ if (err)
+ return err;
+
+ while ((len = mcp251xfd_get_rx_linear_len(ring))) {
+ int offset;
+
+ rx_tail = mcp251xfd_get_rx_tail(ring);
+
+ err = mcp251xfd_rx_obj_read(priv, ring, hw_rx_obj,
+ rx_tail, len);
+ if (err)
+ return err;
+
+ for (i = 0; i < len; i++) {
+ err = mcp251xfd_handle_rxif_one(priv, ring,
+ (void *)hw_rx_obj +
+ i * ring->obj_size);
+ if (err)
+ return err;
+ }
+
+ /* Increment the RX FIFO tail pointer 'len' times in a
+ * single SPI message.
+ *
+ * Note:
+ * Calculate offset, so that the SPI transfer ends on
+ * the last message of the uinc_xfer array, which has
+ * "cs_change == 0", to properly deactivate the chip
+ * select.
+ */
+ offset = ARRAY_SIZE(ring->uinc_xfer) - len;
+ err = spi_sync_transfer(priv->spi,
+ ring->uinc_xfer + offset, len);
+ if (err)
+ return err;
+
+ ring->tail += len;
+ }
+
+ return 0;
+}
+
+int mcp251xfd_handle_rxif(struct mcp251xfd_priv *priv)
+{
+ struct mcp251xfd_rx_ring *ring;
+ int err, n;
+
+ mcp251xfd_for_each_rx_ring(priv, ring, n) {
+ err = mcp251xfd_handle_rxif_ring(priv, ring);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-tef.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-tef.c
new file mode 100644
index 000000000000..406166005b99
--- /dev/null
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-tef.c
@@ -0,0 +1,260 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// mcp251xfd - Microchip MCP251xFD Family CAN controller driver
+//
+// Copyright (c) 2019, 2020, 2021 Pengutronix,
+// Marc Kleine-Budde <kernel@pengutronix.de>
+//
+// Based on:
+//
+// CAN bus driver for Microchip 25XXFD CAN Controller with SPI Interface
+//
+// Copyright (c) 2019 Martin Sperl <kernel@martin.sperl.org>
+//
+
+#include <linux/bitfield.h>
+
+#include "mcp251xfd.h"
+
+static inline int
+mcp251xfd_tef_tail_get_from_chip(const struct mcp251xfd_priv *priv,
+ u8 *tef_tail)
+{
+ u32 tef_ua;
+ int err;
+
+ err = regmap_read(priv->map_reg, MCP251XFD_REG_TEFUA, &tef_ua);
+ if (err)
+ return err;
+
+ *tef_tail = tef_ua / sizeof(struct mcp251xfd_hw_tef_obj);
+
+ return 0;
+}
+
+static int mcp251xfd_check_tef_tail(const struct mcp251xfd_priv *priv)
+{
+ u8 tef_tail_chip, tef_tail;
+ int err;
+
+ if (!IS_ENABLED(CONFIG_CAN_MCP251XFD_SANITY))
+ return 0;
+
+ err = mcp251xfd_tef_tail_get_from_chip(priv, &tef_tail_chip);
+ if (err)
+ return err;
+
+ tef_tail = mcp251xfd_get_tef_tail(priv);
+ if (tef_tail_chip != tef_tail) {
+ netdev_err(priv->ndev,
+ "TEF tail of chip (0x%02x) and ours (0x%08x) inconsistent.\n",
+ tef_tail_chip, tef_tail);
+ return -EILSEQ;
+ }
+
+ return 0;
+}
+
+static int
+mcp251xfd_handle_tefif_recover(const struct mcp251xfd_priv *priv, const u32 seq)
+{
+ const struct mcp251xfd_tx_ring *tx_ring = priv->tx;
+ u32 tef_sta;
+ int err;
+
+ err = regmap_read(priv->map_reg, MCP251XFD_REG_TEFSTA, &tef_sta);
+ if (err)
+ return err;
+
+ if (tef_sta & MCP251XFD_REG_TEFSTA_TEFOVIF) {
+ netdev_err(priv->ndev,
+ "Transmit Event FIFO buffer overflow.\n");
+ return -ENOBUFS;
+ }
+
+ netdev_info(priv->ndev,
+ "Transmit Event FIFO buffer %s. (seq=0x%08x, tef_tail=0x%08x, tef_head=0x%08x, tx_head=0x%08x).\n",
+ tef_sta & MCP251XFD_REG_TEFSTA_TEFFIF ?
+ "full" : tef_sta & MCP251XFD_REG_TEFSTA_TEFNEIF ?
+ "not empty" : "empty",
+ seq, priv->tef->tail, priv->tef->head, tx_ring->head);
+
+ /* The Sequence Number in the TEF doesn't match our tef_tail. */
+ return -EAGAIN;
+}
+
+static int
+mcp251xfd_handle_tefif_one(struct mcp251xfd_priv *priv,
+ const struct mcp251xfd_hw_tef_obj *hw_tef_obj,
+ unsigned int *frame_len_ptr)
+{
+ struct net_device_stats *stats = &priv->ndev->stats;
+ struct sk_buff *skb;
+ u32 seq, seq_masked, tef_tail_masked, tef_tail;
+
+ seq = FIELD_GET(MCP251XFD_OBJ_FLAGS_SEQ_MCP2518FD_MASK,
+ hw_tef_obj->flags);
+
+ /* Use the MCP2517FD mask on the MCP2518FD, too. We only
+ * compare 7 bits, this should be enough to detect
+ * net-yet-completed, i.e. old TEF objects.
+ */
+ seq_masked = seq &
+ field_mask(MCP251XFD_OBJ_FLAGS_SEQ_MCP2517FD_MASK);
+ tef_tail_masked = priv->tef->tail &
+ field_mask(MCP251XFD_OBJ_FLAGS_SEQ_MCP2517FD_MASK);
+ if (seq_masked != tef_tail_masked)
+ return mcp251xfd_handle_tefif_recover(priv, seq);
+
+ tef_tail = mcp251xfd_get_tef_tail(priv);
+ skb = priv->can.echo_skb[tef_tail];
+ if (skb)
+ mcp251xfd_skb_set_timestamp(priv, skb, hw_tef_obj->ts);
+ stats->tx_bytes +=
+ can_rx_offload_get_echo_skb(&priv->offload,
+ tef_tail, hw_tef_obj->ts,
+ frame_len_ptr);
+ stats->tx_packets++;
+ priv->tef->tail++;
+
+ return 0;
+}
+
+static int mcp251xfd_tef_ring_update(struct mcp251xfd_priv *priv)
+{
+ const struct mcp251xfd_tx_ring *tx_ring = priv->tx;
+ unsigned int new_head;
+ u8 chip_tx_tail;
+ int err;
+
+ err = mcp251xfd_tx_tail_get_from_chip(priv, &chip_tx_tail);
+ if (err)
+ return err;
+
+ /* chip_tx_tail, is the next TX-Object send by the HW.
+ * The new TEF head must be >= the old head, ...
+ */
+ new_head = round_down(priv->tef->head, tx_ring->obj_num) + chip_tx_tail;
+ if (new_head <= priv->tef->head)
+ new_head += tx_ring->obj_num;
+
+ /* ... but it cannot exceed the TX head. */
+ priv->tef->head = min(new_head, tx_ring->head);
+
+ return mcp251xfd_check_tef_tail(priv);
+}
+
+static inline int
+mcp251xfd_tef_obj_read(const struct mcp251xfd_priv *priv,
+ struct mcp251xfd_hw_tef_obj *hw_tef_obj,
+ const u8 offset, const u8 len)
+{
+ const struct mcp251xfd_tx_ring *tx_ring = priv->tx;
+ const int val_bytes = regmap_get_val_bytes(priv->map_rx);
+
+ if (IS_ENABLED(CONFIG_CAN_MCP251XFD_SANITY) &&
+ (offset > tx_ring->obj_num ||
+ len > tx_ring->obj_num ||
+ offset + len > tx_ring->obj_num)) {
+ netdev_err(priv->ndev,
+ "Trying to read too many TEF objects (max=%d, offset=%d, len=%d).\n",
+ tx_ring->obj_num, offset, len);
+ return -ERANGE;
+ }
+
+ return regmap_bulk_read(priv->map_rx,
+ mcp251xfd_get_tef_obj_addr(offset),
+ hw_tef_obj,
+ sizeof(*hw_tef_obj) / val_bytes * len);
+}
+
+static inline void mcp251xfd_ecc_tefif_successful(struct mcp251xfd_priv *priv)
+{
+ struct mcp251xfd_ecc *ecc = &priv->ecc;
+
+ ecc->ecc_stat = 0;
+}
+
+int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv)
+{
+ struct mcp251xfd_hw_tef_obj hw_tef_obj[MCP251XFD_TX_OBJ_NUM_MAX];
+ unsigned int total_frame_len = 0;
+ u8 tef_tail, len, l;
+ int err, i;
+
+ err = mcp251xfd_tef_ring_update(priv);
+ if (err)
+ return err;
+
+ tef_tail = mcp251xfd_get_tef_tail(priv);
+ len = mcp251xfd_get_tef_len(priv);
+ l = mcp251xfd_get_tef_linear_len(priv);
+ err = mcp251xfd_tef_obj_read(priv, hw_tef_obj, tef_tail, l);
+ if (err)
+ return err;
+
+ if (l < len) {
+ err = mcp251xfd_tef_obj_read(priv, &hw_tef_obj[l], 0, len - l);
+ if (err)
+ return err;
+ }
+
+ for (i = 0; i < len; i++) {
+ unsigned int frame_len = 0;
+
+ err = mcp251xfd_handle_tefif_one(priv, &hw_tef_obj[i], &frame_len);
+ /* -EAGAIN means the Sequence Number in the TEF
+ * doesn't match our tef_tail. This can happen if we
+ * read the TEF objects too early. Leave loop let the
+ * interrupt handler call us again.
+ */
+ if (err == -EAGAIN)
+ goto out_netif_wake_queue;
+ if (err)
+ return err;
+
+ total_frame_len += frame_len;
+ }
+
+ out_netif_wake_queue:
+ len = i; /* number of handled goods TEFs */
+ if (len) {
+ struct mcp251xfd_tef_ring *ring = priv->tef;
+ struct mcp251xfd_tx_ring *tx_ring = priv->tx;
+ int offset;
+
+ /* Increment the TEF FIFO tail pointer 'len' times in
+ * a single SPI message.
+ *
+ * Note:
+ * Calculate offset, so that the SPI transfer ends on
+ * the last message of the uinc_xfer array, which has
+ * "cs_change == 0", to properly deactivate the chip
+ * select.
+ */
+ offset = ARRAY_SIZE(ring->uinc_xfer) - len;
+ err = spi_sync_transfer(priv->spi,
+ ring->uinc_xfer + offset, len);
+ if (err)
+ return err;
+
+ tx_ring->tail += len;
+ netdev_completed_queue(priv->ndev, len, total_frame_len);
+
+ err = mcp251xfd_check_tef_tail(priv);
+ if (err)
+ return err;
+ }
+
+ mcp251xfd_ecc_tefif_successful(priv);
+
+ if (mcp251xfd_get_tx_free(priv->tx)) {
+ /* Make sure that anybody stopping the queue after
+ * this sees the new tx_ring->tail.
+ */
+ smp_mb();
+ netif_wake_queue(priv->ndev);
+ }
+
+ return 0;
+}
diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-tx.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-tx.c
new file mode 100644
index 000000000000..ffb6c36b7d9b
--- /dev/null
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-tx.c
@@ -0,0 +1,205 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// mcp251xfd - Microchip MCP251xFD Family CAN controller driver
+//
+// Copyright (c) 2019, 2020, 2021 Pengutronix,
+// Marc Kleine-Budde <kernel@pengutronix.de>
+//
+// Based on:
+//
+// CAN bus driver for Microchip 25XXFD CAN Controller with SPI Interface
+//
+// Copyright (c) 2019 Martin Sperl <kernel@martin.sperl.org>
+//
+
+#include <asm/unaligned.h>
+#include <linux/bitfield.h>
+
+#include "mcp251xfd.h"
+
+static inline struct
+mcp251xfd_tx_obj *mcp251xfd_get_tx_obj_next(struct mcp251xfd_tx_ring *tx_ring)
+{
+ u8 tx_head;
+
+ tx_head = mcp251xfd_get_tx_head(tx_ring);
+
+ return &tx_ring->obj[tx_head];
+}
+
+static void
+mcp251xfd_tx_obj_from_skb(const struct mcp251xfd_priv *priv,
+ struct mcp251xfd_tx_obj *tx_obj,
+ const struct sk_buff *skb,
+ unsigned int seq)
+{
+ const struct canfd_frame *cfd = (struct canfd_frame *)skb->data;
+ struct mcp251xfd_hw_tx_obj_raw *hw_tx_obj;
+ union mcp251xfd_tx_obj_load_buf *load_buf;
+ u8 dlc;
+ u32 id, flags;
+ int len_sanitized = 0, len;
+
+ if (cfd->can_id & CAN_EFF_FLAG) {
+ u32 sid, eid;
+
+ sid = FIELD_GET(MCP251XFD_REG_FRAME_EFF_SID_MASK, cfd->can_id);
+ eid = FIELD_GET(MCP251XFD_REG_FRAME_EFF_EID_MASK, cfd->can_id);
+
+ id = FIELD_PREP(MCP251XFD_OBJ_ID_EID_MASK, eid) |
+ FIELD_PREP(MCP251XFD_OBJ_ID_SID_MASK, sid);
+
+ flags = MCP251XFD_OBJ_FLAGS_IDE;
+ } else {
+ id = FIELD_PREP(MCP251XFD_OBJ_ID_SID_MASK, cfd->can_id);
+ flags = 0;
+ }
+
+ /* Use the MCP2518FD mask even on the MCP2517FD. It doesn't
+ * harm, only the lower 7 bits will be transferred into the
+ * TEF object.
+ */
+ flags |= FIELD_PREP(MCP251XFD_OBJ_FLAGS_SEQ_MCP2518FD_MASK, seq);
+
+ if (cfd->can_id & CAN_RTR_FLAG)
+ flags |= MCP251XFD_OBJ_FLAGS_RTR;
+ else
+ len_sanitized = canfd_sanitize_len(cfd->len);
+
+ /* CANFD */
+ if (can_is_canfd_skb(skb)) {
+ if (cfd->flags & CANFD_ESI)
+ flags |= MCP251XFD_OBJ_FLAGS_ESI;
+
+ flags |= MCP251XFD_OBJ_FLAGS_FDF;
+
+ if (cfd->flags & CANFD_BRS)
+ flags |= MCP251XFD_OBJ_FLAGS_BRS;
+
+ dlc = can_fd_len2dlc(cfd->len);
+ } else {
+ dlc = can_get_cc_dlc((struct can_frame *)cfd,
+ priv->can.ctrlmode);
+ }
+
+ flags |= FIELD_PREP(MCP251XFD_OBJ_FLAGS_DLC_MASK, dlc);
+
+ load_buf = &tx_obj->buf;
+ if (priv->devtype_data.quirks & MCP251XFD_QUIRK_CRC_TX)
+ hw_tx_obj = &load_buf->crc.hw_tx_obj;
+ else
+ hw_tx_obj = &load_buf->nocrc.hw_tx_obj;
+
+ put_unaligned_le32(id, &hw_tx_obj->id);
+ put_unaligned_le32(flags, &hw_tx_obj->flags);
+
+ /* Copy data */
+ memcpy(hw_tx_obj->data, cfd->data, cfd->len);
+
+ /* Clear unused data at end of CAN frame */
+ if (MCP251XFD_SANITIZE_CAN && len_sanitized) {
+ int pad_len;
+
+ pad_len = len_sanitized - cfd->len;
+ if (pad_len)
+ memset(hw_tx_obj->data + cfd->len, 0x0, pad_len);
+ }
+
+ /* Number of bytes to be written into the RAM of the controller */
+ len = sizeof(hw_tx_obj->id) + sizeof(hw_tx_obj->flags);
+ if (MCP251XFD_SANITIZE_CAN)
+ len += round_up(len_sanitized, sizeof(u32));
+ else
+ len += round_up(cfd->len, sizeof(u32));
+
+ if (priv->devtype_data.quirks & MCP251XFD_QUIRK_CRC_TX) {
+ u16 crc;
+
+ mcp251xfd_spi_cmd_crc_set_len_in_ram(&load_buf->crc.cmd,
+ len);
+ /* CRC */
+ len += sizeof(load_buf->crc.cmd);
+ crc = mcp251xfd_crc16_compute(&load_buf->crc, len);
+ put_unaligned_be16(crc, (void *)load_buf + len);
+
+ /* Total length */
+ len += sizeof(load_buf->crc.crc);
+ } else {
+ len += sizeof(load_buf->nocrc.cmd);
+ }
+
+ tx_obj->xfer[0].len = len;
+}
+
+static int mcp251xfd_tx_obj_write(const struct mcp251xfd_priv *priv,
+ struct mcp251xfd_tx_obj *tx_obj)
+{
+ return spi_async(priv->spi, &tx_obj->msg);
+}
+
+static bool mcp251xfd_tx_busy(const struct mcp251xfd_priv *priv,
+ struct mcp251xfd_tx_ring *tx_ring)
+{
+ if (mcp251xfd_get_tx_free(tx_ring) > 0)
+ return false;
+
+ netif_stop_queue(priv->ndev);
+
+ /* Memory barrier before checking tx_free (head and tail) */
+ smp_mb();
+
+ if (mcp251xfd_get_tx_free(tx_ring) == 0) {
+ netdev_dbg(priv->ndev,
+ "Stopping tx-queue (tx_head=0x%08x, tx_tail=0x%08x, len=%d).\n",
+ tx_ring->head, tx_ring->tail,
+ tx_ring->head - tx_ring->tail);
+
+ return true;
+ }
+
+ netif_start_queue(priv->ndev);
+
+ return false;
+}
+
+netdev_tx_t mcp251xfd_start_xmit(struct sk_buff *skb,
+ struct net_device *ndev)
+{
+ struct mcp251xfd_priv *priv = netdev_priv(ndev);
+ struct mcp251xfd_tx_ring *tx_ring = priv->tx;
+ struct mcp251xfd_tx_obj *tx_obj;
+ unsigned int frame_len;
+ u8 tx_head;
+ int err;
+
+ if (can_dropped_invalid_skb(ndev, skb))
+ return NETDEV_TX_OK;
+
+ if (mcp251xfd_tx_busy(priv, tx_ring))
+ return NETDEV_TX_BUSY;
+
+ tx_obj = mcp251xfd_get_tx_obj_next(tx_ring);
+ mcp251xfd_tx_obj_from_skb(priv, tx_obj, skb, tx_ring->head);
+
+ /* Stop queue if we occupy the complete TX FIFO */
+ tx_head = mcp251xfd_get_tx_head(tx_ring);
+ tx_ring->head++;
+ if (mcp251xfd_get_tx_free(tx_ring) == 0)
+ netif_stop_queue(ndev);
+
+ frame_len = can_skb_get_frame_len(skb);
+ err = can_put_echo_skb(skb, ndev, tx_head, frame_len);
+ if (!err)
+ netdev_sent_queue(priv->ndev, frame_len);
+
+ err = mcp251xfd_tx_obj_write(priv, tx_obj);
+ if (err)
+ goto out_err;
+
+ return NETDEV_TX_OK;
+
+ out_err:
+ netdev_err(priv->ndev, "ERROR in %s: %d\n", __func__, err);
+
+ return NETDEV_TX_OK;
+}
diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd.h b/drivers/net/can/spi/mcp251xfd/mcp251xfd.h
index 0f322dabaf65..f551c900803e 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd.h
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd.h
@@ -10,6 +10,7 @@
#ifndef _MCP251XFD_H
#define _MCP251XFD_H
+#include <linux/bitfield.h>
#include <linux/can/core.h>
#include <linux/can/dev.h>
#include <linux/can/rx-offload.h>
@@ -625,6 +626,12 @@ MCP251XFD_IS(2517);
MCP251XFD_IS(2518);
MCP251XFD_IS(251X);
+static inline bool mcp251xfd_is_fd_mode(const struct mcp251xfd_priv *priv)
+{
+ /* listen-only mode works like FD mode */
+ return priv->can.ctrlmode & (CAN_CTRLMODE_LISTENONLY | CAN_CTRLMODE_FD);
+}
+
static inline u8 mcp251xfd_first_byte_set(u32 mask)
{
return (mask & 0x0000ffff) ?
@@ -761,6 +768,24 @@ mcp251xfd_get_rx_obj_addr(const struct mcp251xfd_rx_ring *ring, u8 n)
return ring->base + ring->obj_size * n;
}
+static inline int
+mcp251xfd_tx_tail_get_from_chip(const struct mcp251xfd_priv *priv,
+ u8 *tx_tail)
+{
+ u32 fifo_sta;
+ int err;
+
+ err = regmap_read(priv->map_reg,
+ MCP251XFD_REG_FIFOSTA(MCP251XFD_TX_FIFO),
+ &fifo_sta);
+ if (err)
+ return err;
+
+ *tx_tail = FIELD_GET(MCP251XFD_REG_FIFOSTA_FIFOCI_MASK, fifo_sta);
+
+ return 0;
+}
+
static inline u8 mcp251xfd_get_tef_head(const struct mcp251xfd_priv *priv)
{
return priv->tef->head & (priv->tx->obj_num - 1);
@@ -849,15 +874,24 @@ mcp251xfd_get_rx_linear_len(const struct mcp251xfd_rx_ring *ring)
(n) < (priv)->rx_ring_num; \
(n)++, (ring) = *((priv)->rx + (n)))
-int mcp251xfd_regmap_init(struct mcp251xfd_priv *priv);
+int mcp251xfd_chip_fifo_init(const struct mcp251xfd_priv *priv);
u16 mcp251xfd_crc16_compute2(const void *cmd, size_t cmd_size,
const void *data, size_t data_size);
u16 mcp251xfd_crc16_compute(const void *data, size_t data_size);
+int mcp251xfd_regmap_init(struct mcp251xfd_priv *priv);
+void mcp251xfd_ring_init(struct mcp251xfd_priv *priv);
+void mcp251xfd_ring_free(struct mcp251xfd_priv *priv);
+int mcp251xfd_ring_alloc(struct mcp251xfd_priv *priv);
+int mcp251xfd_handle_rxif(struct mcp251xfd_priv *priv);
+int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv);
void mcp251xfd_skb_set_timestamp(const struct mcp251xfd_priv *priv,
struct sk_buff *skb, u32 timestamp);
void mcp251xfd_timestamp_init(struct mcp251xfd_priv *priv);
void mcp251xfd_timestamp_stop(struct mcp251xfd_priv *priv);
+netdev_tx_t mcp251xfd_start_xmit(struct sk_buff *skb,
+ struct net_device *ndev);
+
#if IS_ENABLED(CONFIG_DEV_COREDUMP)
void mcp251xfd_dump(const struct mcp251xfd_priv *priv);
#else
diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c
index bb2a43070ea8..9957772201d5 100644
--- a/drivers/net/dsa/ocelot/felix.c
+++ b/drivers/net/dsa/ocelot/felix.c
@@ -639,6 +639,17 @@ static int felix_set_ageing_time(struct dsa_switch *ds,
return 0;
}
+static void felix_port_fast_age(struct dsa_switch *ds, int port)
+{
+ struct ocelot *ocelot = ds->priv;
+ int err;
+
+ err = ocelot_mact_flush(ocelot, port);
+ if (err)
+ dev_err(ds->dev, "Flushing MAC table on port %d returned %pe\n",
+ port, ERR_PTR(err));
+}
+
static int felix_fdb_dump(struct dsa_switch *ds, int port,
dsa_fdb_dump_cb_t *cb, void *data)
{
@@ -1622,6 +1633,7 @@ const struct dsa_switch_ops felix_switch_ops = {
.phylink_mac_config = felix_phylink_mac_config,
.phylink_mac_link_down = felix_phylink_mac_link_down,
.phylink_mac_link_up = felix_phylink_mac_link_up,
+ .port_fast_age = felix_port_fast_age,
.port_fdb_dump = felix_fdb_dump,
.port_fdb_add = felix_fdb_add,
.port_fdb_del = felix_fdb_del,
diff --git a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
index f5ec35fa4c63..466ad9470d1f 100644
--- a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
+++ b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
@@ -48,6 +48,11 @@ enum ena_admin_aq_feature_id {
ENA_ADMIN_FEATURES_OPCODE_NUM = 32,
};
+/* device capabilities */
+enum ena_admin_aq_caps_id {
+ ENA_ADMIN_ENI_STATS = 0,
+};
+
enum ena_admin_placement_policy_type {
/* descriptors and headers are in host memory */
ENA_ADMIN_PLACEMENT_POLICY_HOST = 1,
@@ -455,7 +460,10 @@ struct ena_admin_device_attr_feature_desc {
*/
u32 supported_features;
- u32 reserved3;
+ /* bitmap of ena_admin_aq_caps_id, which represents device
+ * capabilities.
+ */
+ u32 capabilities;
/* Indicates how many bits are used physical address access. */
u32 phys_addr_width;
diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
index ab413fc1f68e..8c8b4c88c7de 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c
@@ -1971,6 +1971,7 @@ int ena_com_get_dev_attr_feat(struct ena_com_dev *ena_dev,
sizeof(get_resp.u.dev_attr));
ena_dev->supported_features = get_resp.u.dev_attr.supported_features;
+ ena_dev->capabilities = get_resp.u.dev_attr.capabilities;
if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
rc = ena_com_get_feature(ena_dev, &get_resp,
@@ -2223,6 +2224,13 @@ int ena_com_get_eni_stats(struct ena_com_dev *ena_dev,
struct ena_com_stats_ctx ctx;
int ret;
+ if (!ena_com_get_cap(ena_dev, ENA_ADMIN_ENI_STATS)) {
+ netdev_err(ena_dev->net_device,
+ "Capability %d isn't supported\n",
+ ENA_ADMIN_ENI_STATS);
+ return -EOPNOTSUPP;
+ }
+
memset(&ctx, 0x0, sizeof(ctx));
ret = ena_get_dev_stats(ena_dev, &ctx, ENA_ADMIN_GET_STATS_TYPE_ENI);
if (likely(ret == 0))
diff --git a/drivers/net/ethernet/amazon/ena/ena_com.h b/drivers/net/ethernet/amazon/ena/ena_com.h
index 73b03ce59412..3c5081d9d25d 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.h
+++ b/drivers/net/ethernet/amazon/ena/ena_com.h
@@ -314,6 +314,7 @@ struct ena_com_dev {
struct ena_rss rss;
u32 supported_features;
+ u32 capabilities;
u32 dma_addr_bits;
struct ena_host_attribute host_attr;
@@ -967,6 +968,18 @@ static inline void ena_com_disable_adaptive_moderation(struct ena_com_dev *ena_d
ena_dev->adaptive_coalescing = false;
}
+/* ena_com_get_cap - query whether device supports a capability.
+ * @ena_dev: ENA communication layer struct
+ * @cap_id: enum value representing the capability
+ *
+ * @return - true if capability is supported or false otherwise
+ */
+static inline bool ena_com_get_cap(struct ena_com_dev *ena_dev,
+ enum ena_admin_aq_caps_id cap_id)
+{
+ return !!(ena_dev->capabilities & BIT(cap_id));
+}
+
/* ena_com_update_intr_reg - Prepare interrupt register
* @intr_reg: interrupt register to update.
* @rx_delay_interval: Rx interval in usecs
diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
index 6b9b43e422c1..39242c5a1729 100644
--- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c
+++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
@@ -82,7 +82,7 @@ static const struct ena_stats ena_stats_rx_strings[] = {
ENA_STAT_RX_ENTRY(rx_copybreak_pkt),
ENA_STAT_RX_ENTRY(csum_good),
ENA_STAT_RX_ENTRY(refil_partial),
- ENA_STAT_RX_ENTRY(bad_csum),
+ ENA_STAT_RX_ENTRY(csum_bad),
ENA_STAT_RX_ENTRY(page_alloc_fail),
ENA_STAT_RX_ENTRY(skb_alloc_fail),
ENA_STAT_RX_ENTRY(dma_mapping_err),
@@ -110,8 +110,7 @@ static const struct ena_stats ena_stats_ena_com_strings[] = {
#define ENA_STATS_ARRAY_TX ARRAY_SIZE(ena_stats_tx_strings)
#define ENA_STATS_ARRAY_RX ARRAY_SIZE(ena_stats_rx_strings)
#define ENA_STATS_ARRAY_ENA_COM ARRAY_SIZE(ena_stats_ena_com_strings)
-#define ENA_STATS_ARRAY_ENI(adapter) \
- (ARRAY_SIZE(ena_stats_eni_strings) * (adapter)->eni_stats_supported)
+#define ENA_STATS_ARRAY_ENI(adapter) ARRAY_SIZE(ena_stats_eni_strings)
static void ena_safe_update_stat(u64 *src, u64 *dst,
struct u64_stats_sync *syncp)
@@ -213,8 +212,9 @@ static void ena_get_ethtool_stats(struct net_device *netdev,
u64 *data)
{
struct ena_adapter *adapter = netdev_priv(netdev);
+ struct ena_com_dev *dev = adapter->ena_dev;
- ena_get_stats(adapter, data, adapter->eni_stats_supported);
+ ena_get_stats(adapter, data, ena_com_get_cap(dev, ENA_ADMIN_ENI_STATS));
}
static int ena_get_sw_stats_count(struct ena_adapter *adapter)
@@ -226,7 +226,9 @@ static int ena_get_sw_stats_count(struct ena_adapter *adapter)
static int ena_get_hw_stats_count(struct ena_adapter *adapter)
{
- return ENA_STATS_ARRAY_ENI(adapter);
+ bool supported = ena_com_get_cap(adapter->ena_dev, ENA_ADMIN_ENI_STATS);
+
+ return ENA_STATS_ARRAY_ENI(adapter) * supported;
}
int ena_get_sset_count(struct net_device *netdev, int sset)
@@ -316,10 +318,11 @@ static void ena_get_ethtool_strings(struct net_device *netdev,
u8 *data)
{
struct ena_adapter *adapter = netdev_priv(netdev);
+ struct ena_com_dev *dev = adapter->ena_dev;
switch (sset) {
case ETH_SS_STATS:
- ena_get_strings(adapter, data, adapter->eni_stats_supported);
+ ena_get_strings(adapter, data, ena_com_get_cap(dev, ENA_ADMIN_ENI_STATS));
break;
}
}
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 05cadc4e66e0..53080fd143dc 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -103,7 +103,7 @@ static void ena_tx_timeout(struct net_device *dev, unsigned int txqueue)
if (test_and_set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
return;
- adapter->reset_reason = ENA_REGS_RESET_OS_NETDEV_WD;
+ ena_reset_device(adapter, ENA_REGS_RESET_OS_NETDEV_WD);
ena_increase_stat(&adapter->dev_stats.tx_timeout, 1, &adapter->syncp);
netif_err(adapter, tx_err, dev, "Transmit time out\n");
@@ -166,11 +166,9 @@ static int ena_xmit_common(struct net_device *dev,
"Failed to prepare tx bufs\n");
ena_increase_stat(&ring->tx_stats.prepare_ctx_err, 1,
&ring->syncp);
- if (rc != -ENOMEM) {
- adapter->reset_reason =
- ENA_REGS_RESET_DRIVER_INVALID_STATE;
- set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
- }
+ if (rc != -ENOMEM)
+ ena_reset_device(adapter,
+ ENA_REGS_RESET_DRIVER_INVALID_STATE);
return rc;
}
@@ -1269,20 +1267,18 @@ static int handle_invalid_req_id(struct ena_ring *ring, u16 req_id,
netif_err(ring->adapter,
tx_done,
ring->netdev,
- "tx_info doesn't have valid %s",
- is_xdp ? "xdp frame" : "skb");
+ "tx_info doesn't have valid %s. qid %u req_id %u",
+ is_xdp ? "xdp frame" : "skb", ring->qid, req_id);
else
netif_err(ring->adapter,
tx_done,
ring->netdev,
- "Invalid req_id: %hu\n",
- req_id);
+ "Invalid req_id %u in qid %u\n",
+ req_id, ring->qid);
ena_increase_stat(&ring->tx_stats.bad_req_id, 1, &ring->syncp);
+ ena_reset_device(ring->adapter, ENA_REGS_RESET_INV_TX_REQ_ID);
- /* Trigger device reset */
- ring->adapter->reset_reason = ENA_REGS_RESET_INV_TX_REQ_ID;
- set_bit(ENA_FLAG_TRIGGER_RESET, &ring->adapter->flags);
return -EFAULT;
}
@@ -1445,10 +1441,7 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
netif_err(adapter, rx_err, rx_ring->netdev,
"Page is NULL. qid %u req_id %u\n", rx_ring->qid, req_id);
ena_increase_stat(&rx_ring->rx_stats.bad_req_id, 1, &rx_ring->syncp);
- adapter->reset_reason = ENA_REGS_RESET_INV_RX_REQ_ID;
- /* Make sure reset reason is set before triggering the reset */
- smp_mb__before_atomic();
- set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
+ ena_reset_device(adapter, ENA_REGS_RESET_INV_RX_REQ_ID);
return NULL;
}
@@ -1558,7 +1551,7 @@ static void ena_rx_checksum(struct ena_ring *rx_ring,
(ena_rx_ctx->l3_csum_err))) {
/* ipv4 checksum error */
skb->ip_summed = CHECKSUM_NONE;
- ena_increase_stat(&rx_ring->rx_stats.bad_csum, 1,
+ ena_increase_stat(&rx_ring->rx_stats.csum_bad, 1,
&rx_ring->syncp);
netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev,
"RX IPv4 header checksum error\n");
@@ -1570,7 +1563,7 @@ static void ena_rx_checksum(struct ena_ring *rx_ring,
(ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP))) {
if (unlikely(ena_rx_ctx->l4_csum_err)) {
/* TCP/UDP checksum error */
- ena_increase_stat(&rx_ring->rx_stats.bad_csum, 1,
+ ena_increase_stat(&rx_ring->rx_stats.csum_bad, 1,
&rx_ring->syncp);
netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev,
"RX L4 checksum error\n");
@@ -1781,15 +1774,12 @@ error:
if (rc == -ENOSPC) {
ena_increase_stat(&rx_ring->rx_stats.bad_desc_num, 1,
&rx_ring->syncp);
- adapter->reset_reason = ENA_REGS_RESET_TOO_MANY_RX_DESCS;
+ ena_reset_device(adapter, ENA_REGS_RESET_TOO_MANY_RX_DESCS);
} else {
ena_increase_stat(&rx_ring->rx_stats.bad_req_id, 1,
&rx_ring->syncp);
- adapter->reset_reason = ENA_REGS_RESET_INV_RX_REQ_ID;
+ ena_reset_device(adapter, ENA_REGS_RESET_INV_RX_REQ_ID);
}
-
- set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
-
return 0;
}
@@ -3253,11 +3243,11 @@ err:
int ena_update_hw_stats(struct ena_adapter *adapter)
{
- int rc = 0;
+ int rc;
rc = ena_com_get_eni_stats(adapter->ena_dev, &adapter->eni_stats);
if (rc) {
- dev_info_once(&adapter->pdev->dev, "Failed to get ENI stats\n");
+ netdev_err(adapter->netdev, "Failed to get ENI stats\n");
return rc;
}
@@ -3654,8 +3644,6 @@ static int ena_restore_device(struct ena_adapter *adapter)
mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
adapter->last_keep_alive_jiffies = jiffies;
- dev_err(&pdev->dev, "Device reset completed successfully\n");
-
return rc;
err_disable_msix:
ena_free_mgmnt_irq(adapter);
@@ -3685,6 +3673,8 @@ static void ena_fw_reset_device(struct work_struct *work)
if (likely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
ena_destroy_device(adapter, false);
ena_restore_device(adapter);
+
+ dev_err(&adapter->pdev->dev, "Device reset completed successfully\n");
}
rtnl_unlock();
@@ -3707,9 +3697,8 @@ static int check_for_rx_interrupt_queue(struct ena_adapter *adapter,
netif_err(adapter, rx_err, adapter->netdev,
"Potential MSIX issue on Rx side Queue = %d. Reset the device\n",
rx_ring->qid);
- adapter->reset_reason = ENA_REGS_RESET_MISS_INTERRUPT;
- smp_mb__before_atomic();
- set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
+
+ ena_reset_device(adapter, ENA_REGS_RESET_MISS_INTERRUPT);
return -EIO;
}
@@ -3746,9 +3735,7 @@ static int check_missing_comp_in_tx_queue(struct ena_adapter *adapter,
netif_err(adapter, tx_err, adapter->netdev,
"Potential MSIX issue on Tx side Queue = %d. Reset the device\n",
tx_ring->qid);
- adapter->reset_reason = ENA_REGS_RESET_MISS_INTERRUPT;
- smp_mb__before_atomic();
- set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
+ ena_reset_device(adapter, ENA_REGS_RESET_MISS_INTERRUPT);
return -EIO;
}
@@ -3774,9 +3761,7 @@ static int check_missing_comp_in_tx_queue(struct ena_adapter *adapter,
"The number of lost tx completions is above the threshold (%d > %d). Reset the device\n",
missed_tx,
adapter->missing_tx_completion_threshold);
- adapter->reset_reason =
- ENA_REGS_RESET_MISS_TX_CMPL;
- set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
+ ena_reset_device(adapter, ENA_REGS_RESET_MISS_TX_CMPL);
rc = -EIO;
}
@@ -3897,8 +3882,7 @@ static void check_for_missing_keep_alive(struct ena_adapter *adapter)
"Keep alive watchdog timeout.\n");
ena_increase_stat(&adapter->dev_stats.wd_expired, 1,
&adapter->syncp);
- adapter->reset_reason = ENA_REGS_RESET_KEEP_ALIVE_TO;
- set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
+ ena_reset_device(adapter, ENA_REGS_RESET_KEEP_ALIVE_TO);
}
}
@@ -3909,8 +3893,7 @@ static void check_for_admin_com_state(struct ena_adapter *adapter)
"ENA admin queue is not in running state!\n");
ena_increase_stat(&adapter->dev_stats.admin_q_pause, 1,
&adapter->syncp);
- adapter->reset_reason = ENA_REGS_RESET_ADMIN_TO;
- set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
+ ena_reset_device(adapter, ENA_REGS_RESET_ADMIN_TO);
}
}
@@ -4110,7 +4093,7 @@ static int ena_rss_init_default(struct ena_adapter *adapter)
val = ethtool_rxfh_indir_default(i, adapter->num_io_queues);
rc = ena_com_indirect_table_fill_entry(ena_dev, i,
ENA_IO_RXQ_IDX(val));
- if (unlikely(rc && (rc != -EOPNOTSUPP))) {
+ if (unlikely(rc)) {
dev_err(dev, "Cannot fill indirect table\n");
goto err_fill_indir;
}
@@ -4146,10 +4129,11 @@ static void ena_release_bars(struct ena_com_dev *ena_dev, struct pci_dev *pdev)
}
-static int ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *ctx)
+static void ena_calc_io_queue_size(struct ena_adapter *adapter,
+ struct ena_com_dev_get_features_ctx *get_feat_ctx)
{
- struct ena_admin_feature_llq_desc *llq = &ctx->get_feat_ctx->llq;
- struct ena_com_dev *ena_dev = ctx->ena_dev;
+ struct ena_admin_feature_llq_desc *llq = &get_feat_ctx->llq;
+ struct ena_com_dev *ena_dev = adapter->ena_dev;
u32 tx_queue_size = ENA_DEFAULT_RING_SIZE;
u32 rx_queue_size = ENA_DEFAULT_RING_SIZE;
u32 max_tx_queue_size;
@@ -4157,7 +4141,7 @@ static int ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *ctx)
if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
struct ena_admin_queue_ext_feature_fields *max_queue_ext =
- &ctx->get_feat_ctx->max_queue_ext.max_queue_ext;
+ &get_feat_ctx->max_queue_ext.max_queue_ext;
max_rx_queue_size = min_t(u32, max_queue_ext->max_rx_cq_depth,
max_queue_ext->max_rx_sq_depth);
max_tx_queue_size = max_queue_ext->max_tx_cq_depth;
@@ -4169,13 +4153,13 @@ static int ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *ctx)
max_tx_queue_size = min_t(u32, max_tx_queue_size,
max_queue_ext->max_tx_sq_depth);
- ctx->max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
- max_queue_ext->max_per_packet_tx_descs);
- ctx->max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
- max_queue_ext->max_per_packet_rx_descs);
+ adapter->max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
+ max_queue_ext->max_per_packet_tx_descs);
+ adapter->max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
+ max_queue_ext->max_per_packet_rx_descs);
} else {
struct ena_admin_queue_feature_desc *max_queues =
- &ctx->get_feat_ctx->max_queues;
+ &get_feat_ctx->max_queues;
max_rx_queue_size = min_t(u32, max_queues->max_cq_depth,
max_queues->max_sq_depth);
max_tx_queue_size = max_queues->max_cq_depth;
@@ -4187,10 +4171,10 @@ static int ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *ctx)
max_tx_queue_size = min_t(u32, max_tx_queue_size,
max_queues->max_sq_depth);
- ctx->max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
- max_queues->max_packet_tx_descs);
- ctx->max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
- max_queues->max_packet_rx_descs);
+ adapter->max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
+ max_queues->max_packet_tx_descs);
+ adapter->max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
+ max_queues->max_packet_rx_descs);
}
max_tx_queue_size = rounddown_pow_of_two(max_tx_queue_size);
@@ -4204,12 +4188,10 @@ static int ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *ctx)
tx_queue_size = rounddown_pow_of_two(tx_queue_size);
rx_queue_size = rounddown_pow_of_two(rx_queue_size);
- ctx->max_tx_queue_size = max_tx_queue_size;
- ctx->max_rx_queue_size = max_rx_queue_size;
- ctx->tx_queue_size = tx_queue_size;
- ctx->rx_queue_size = rx_queue_size;
-
- return 0;
+ adapter->max_tx_ring_size = max_tx_queue_size;
+ adapter->max_rx_ring_size = max_rx_queue_size;
+ adapter->requested_tx_ring_size = tx_queue_size;
+ adapter->requested_rx_ring_size = rx_queue_size;
}
/* ena_probe - Device Initialization Routine
@@ -4224,7 +4206,6 @@ static int ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *ctx)
*/
static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
{
- struct ena_calc_queue_size_ctx calc_queue_ctx = {};
struct ena_com_dev_get_features_ctx get_feat_ctx;
struct ena_com_dev *ena_dev = NULL;
struct ena_adapter *adapter;
@@ -4309,10 +4290,6 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
goto err_device_destroy;
}
- calc_queue_ctx.ena_dev = ena_dev;
- calc_queue_ctx.get_feat_ctx = &get_feat_ctx;
- calc_queue_ctx.pdev = pdev;
-
/* Initial TX and RX interrupt delay. Assumes 1 usec granularity.
* Updated during device initialization with the real granularity
*/
@@ -4320,8 +4297,8 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
ena_dev->intr_moder_rx_interval = ENA_INTR_INITIAL_RX_INTERVAL_USECS;
ena_dev->intr_delay_resolution = ENA_DEFAULT_INTR_DELAY_RESOLUTION;
max_num_io_queues = ena_calc_max_io_queue_num(pdev, ena_dev, &get_feat_ctx);
- rc = ena_calc_io_queue_size(&calc_queue_ctx);
- if (rc || !max_num_io_queues) {
+ ena_calc_io_queue_size(adapter, &get_feat_ctx);
+ if (unlikely(!max_num_io_queues)) {
rc = -EFAULT;
goto err_device_destroy;
}
@@ -4330,13 +4307,6 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
adapter->reset_reason = ENA_REGS_RESET_NORMAL;
- adapter->requested_tx_ring_size = calc_queue_ctx.tx_queue_size;
- adapter->requested_rx_ring_size = calc_queue_ctx.rx_queue_size;
- adapter->max_tx_ring_size = calc_queue_ctx.max_tx_queue_size;
- adapter->max_rx_ring_size = calc_queue_ctx.max_rx_queue_size;
- adapter->max_tx_sgl_size = calc_queue_ctx.max_tx_sgl_size;
- adapter->max_rx_sgl_size = calc_queue_ctx.max_rx_sgl_size;
-
adapter->num_io_queues = max_num_io_queues;
adapter->max_num_io_queues = max_num_io_queues;
adapter->last_monitored_tx_qid = 0;
@@ -4387,11 +4357,6 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
ena_config_debug_area(adapter);
- if (!ena_update_hw_stats(adapter))
- adapter->eni_stats_supported = true;
- else
- adapter->eni_stats_supported = false;
-
memcpy(adapter->netdev->perm_addr, adapter->mac_addr, netdev->addr_len);
netif_carrier_off(netdev);
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h
index 9391c7101fba..1bdce99bf688 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.h
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h
@@ -140,18 +140,6 @@ struct ena_napi {
struct dim dim;
};
-struct ena_calc_queue_size_ctx {
- struct ena_com_dev_get_features_ctx *get_feat_ctx;
- struct ena_com_dev *ena_dev;
- struct pci_dev *pdev;
- u32 tx_queue_size;
- u32 rx_queue_size;
- u32 max_tx_queue_size;
- u32 max_rx_queue_size;
- u16 max_tx_sgl_size;
- u16 max_rx_sgl_size;
-};
-
struct ena_tx_buffer {
struct sk_buff *skb;
/* num of ena desc for this specific skb
@@ -216,7 +204,7 @@ struct ena_stats_rx {
u64 rx_copybreak_pkt;
u64 csum_good;
u64 refil_partial;
- u64 bad_csum;
+ u64 csum_bad;
u64 page_alloc_fail;
u64 skb_alloc_fail;
u64 dma_mapping_err;
@@ -379,7 +367,6 @@ struct ena_adapter {
struct u64_stats_sync syncp;
struct ena_stats_dev dev_stats;
struct ena_admin_eni_stats eni_stats;
- bool eni_stats_supported;
/* last queue index that was checked for uncompleted tx packets */
u32 last_monitored_tx_qid;
@@ -407,6 +394,15 @@ int ena_update_queue_count(struct ena_adapter *adapter, u32 new_channel_count);
int ena_get_sset_count(struct net_device *netdev, int sset);
+static inline void ena_reset_device(struct ena_adapter *adapter,
+ enum ena_regs_reset_reason_types reset_reason)
+{
+ adapter->reset_reason = reset_reason;
+ /* Make sure reset reason is set before triggering the reset */
+ smp_mb__before_atomic();
+ set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
+}
+
enum ena_xdp_errors_t {
ENA_XDP_ALLOWED = 0,
ENA_XDP_CURRENT_MTU_TOO_LARGE,
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c
index 7f509f427e3d..623d113b6581 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c
@@ -41,7 +41,7 @@ static int phy_mode(enum dpmac_eth_if eth_if, phy_interface_t *if_mode)
static struct fwnode_handle *dpaa2_mac_get_node(struct device *dev,
u16 dpmac_id)
{
- struct fwnode_handle *fwnode, *parent, *child = NULL;
+ struct fwnode_handle *fwnode, *parent = NULL, *child = NULL;
struct device_node *dpmacs = NULL;
int err;
u32 id;
@@ -54,8 +54,17 @@ static struct fwnode_handle *dpaa2_mac_get_node(struct device *dev,
parent = of_fwnode_handle(dpmacs);
} else if (is_acpi_node(fwnode)) {
parent = fwnode;
+ } else {
+ /* The root dprc device didn't yet get to finalize it's probe,
+ * thus the fwnode field is not yet set. Defer probe if we are
+ * facing this situation.
+ */
+ return ERR_PTR(-EPROBE_DEFER);
}
+ if (!parent)
+ return NULL;
+
fwnode_for_each_child_node(parent, child) {
err = -EINVAL;
if (is_acpi_device_node(child))
@@ -327,6 +336,7 @@ int dpaa2_mac_open(struct dpaa2_mac *mac)
{
struct fsl_mc_device *dpmac_dev = mac->mc_dev;
struct net_device *net_dev = mac->net_dev;
+ struct fwnode_handle *fw_node;
int err;
err = dpmac_open(mac->mc_io, 0, dpmac_dev->obj_desc.id,
@@ -346,7 +356,13 @@ int dpaa2_mac_open(struct dpaa2_mac *mac)
/* Find the device node representing the MAC device and link the device
* behind the associated netdev to it.
*/
- mac->fw_node = dpaa2_mac_get_node(&mac->mc_dev->dev, mac->attr.id);
+ fw_node = dpaa2_mac_get_node(&mac->mc_dev->dev, mac->attr.id);
+ if (IS_ERR(fw_node)) {
+ err = PTR_ERR(fw_node);
+ goto err_close_dpmac;
+ }
+
+ mac->fw_node = fw_node;
net_dev->dev.of_node = to_of_node(mac->fw_node);
return 0;
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
index d039457928b0..9b5512b4f15d 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
@@ -394,7 +394,8 @@ static int dpaa2_switch_dellink(struct ethsw_core *ethsw, u16 vid)
for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
ppriv_local = ethsw->ports[i];
- ppriv_local->vlans[vid] = 0;
+ if (ppriv_local)
+ ppriv_local->vlans[vid] = 0;
}
return 0;
@@ -1896,9 +1897,11 @@ static int dpaa2_switch_port_del_vlan(struct ethsw_port_priv *port_priv, u16 vid
/* Delete VLAN from switch if it is no longer configured on
* any port
*/
- for (i = 0; i < ethsw->sw_attr.num_ifs; i++)
- if (ethsw->ports[i]->vlans[vid] & ETHSW_VLAN_MEMBER)
+ for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
+ if (ethsw->ports[i] &&
+ ethsw->ports[i]->vlans[vid] & ETHSW_VLAN_MEMBER)
return 0; /* Found a port member in VID */
+ }
ethsw->vlans[vid] &= ~ETHSW_VLAN_GLOBAL;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
index 593912b17609..7abef88801fb 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
@@ -769,21 +769,22 @@ static bool i40e_asq_done(struct i40e_hw *hw)
}
/**
- * i40e_asq_send_command - send command to Admin Queue
+ * i40e_asq_send_command_atomic - send command to Admin Queue
* @hw: pointer to the hw struct
* @desc: prefilled descriptor describing the command (non DMA mem)
* @buff: buffer to use for indirect commands
* @buff_size: size of buffer for indirect commands
* @cmd_details: pointer to command details structure
+ * @is_atomic_context: is the function called in an atomic context?
*
* This is the main send command driver routine for the Admin Queue send
* queue. It runs the queue, cleans the queue, etc
**/
-i40e_status i40e_asq_send_command(struct i40e_hw *hw,
- struct i40e_aq_desc *desc,
- void *buff, /* can be NULL */
- u16 buff_size,
- struct i40e_asq_cmd_details *cmd_details)
+i40e_status
+i40e_asq_send_command_atomic(struct i40e_hw *hw, struct i40e_aq_desc *desc,
+ void *buff, /* can be NULL */ u16 buff_size,
+ struct i40e_asq_cmd_details *cmd_details,
+ bool is_atomic_context)
{
i40e_status status = 0;
struct i40e_dma_mem *dma_buff = NULL;
@@ -910,7 +911,12 @@ i40e_status i40e_asq_send_command(struct i40e_hw *hw,
*/
if (i40e_asq_done(hw))
break;
- udelay(50);
+
+ if (is_atomic_context)
+ udelay(50);
+ else
+ usleep_range(40, 60);
+
total_delay += 50;
} while (total_delay < hw->aq.asq_cmd_timeout);
}
@@ -967,6 +973,15 @@ asq_send_command_error:
return status;
}
+i40e_status
+i40e_asq_send_command(struct i40e_hw *hw, struct i40e_aq_desc *desc,
+ void *buff, /* can be NULL */ u16 buff_size,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ return i40e_asq_send_command_atomic(hw, desc, buff, buff_size,
+ cmd_details, false);
+}
+
/**
* i40e_fill_default_direct_cmd_desc - AQ descriptor helper function
* @desc: pointer to the temp descriptor (non DMA mem)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
index 140b677f114d..60f9e0a6aaca 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
@@ -11,8 +11,8 @@
*/
#define I40E_FW_API_VERSION_MAJOR 0x0001
-#define I40E_FW_API_VERSION_MINOR_X722 0x0009
-#define I40E_FW_API_VERSION_MINOR_X710 0x0009
+#define I40E_FW_API_VERSION_MINOR_X722 0x000C
+#define I40E_FW_API_VERSION_MINOR_X710 0x000F
#define I40E_FW_MINOR_VERSION(_h) ((_h)->mac.type == I40E_MAC_XL710 ? \
I40E_FW_API_VERSION_MINOR_X710 : \
diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c
index b4d3fed0d2f2..9ddeb015eb7e 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
@@ -154,8 +154,8 @@ const char *i40e_stat_str(struct i40e_hw *hw, i40e_status stat_err)
return "I40E_ERR_INVALID_MAC_ADDR";
case I40E_ERR_DEVICE_NOT_SUPPORTED:
return "I40E_ERR_DEVICE_NOT_SUPPORTED";
- case I40E_ERR_MASTER_REQUESTS_PENDING:
- return "I40E_ERR_MASTER_REQUESTS_PENDING";
+ case I40E_ERR_PRIMARY_REQUESTS_PENDING:
+ return "I40E_ERR_PRIMARY_REQUESTS_PENDING";
case I40E_ERR_INVALID_LINK_SETTINGS:
return "I40E_ERR_INVALID_LINK_SETTINGS";
case I40E_ERR_AUTONEG_NOT_COMPLETE:
@@ -2073,7 +2073,8 @@ enum i40e_status_code i40e_aq_set_vsi_mc_promisc_on_vlan(struct i40e_hw *hw,
cmd->seid = cpu_to_le16(seid);
cmd->vlan_tag = cpu_to_le16(vid | I40E_AQC_SET_VSI_VLAN_VALID);
- status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+ status = i40e_asq_send_command_atomic(hw, &desc, NULL, 0,
+ cmd_details, true);
return status;
}
@@ -2114,7 +2115,8 @@ enum i40e_status_code i40e_aq_set_vsi_uc_promisc_on_vlan(struct i40e_hw *hw,
cmd->seid = cpu_to_le16(seid);
cmd->vlan_tag = cpu_to_le16(vid | I40E_AQC_SET_VSI_VLAN_VALID);
- status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+ status = i40e_asq_send_command_atomic(hw, &desc, NULL, 0,
+ cmd_details, true);
return status;
}
@@ -4136,7 +4138,6 @@ static i40e_status i40e_validate_filter_settings(struct i40e_hw *hw,
struct i40e_filter_control_settings *settings)
{
u32 fcoe_cntx_size, fcoe_filt_size;
- u32 pe_cntx_size, pe_filt_size;
u32 fcoe_fmax;
u32 val;
@@ -4180,8 +4181,6 @@ static i40e_status i40e_validate_filter_settings(struct i40e_hw *hw,
case I40E_HASH_FILTER_SIZE_256K:
case I40E_HASH_FILTER_SIZE_512K:
case I40E_HASH_FILTER_SIZE_1M:
- pe_filt_size = I40E_HASH_FILTER_BASE_SIZE;
- pe_filt_size <<= (u32)settings->pe_filt_num;
break;
default:
return I40E_ERR_PARAM;
@@ -4198,8 +4197,6 @@ static i40e_status i40e_validate_filter_settings(struct i40e_hw *hw,
case I40E_DMA_CNTX_SIZE_64K:
case I40E_DMA_CNTX_SIZE_128K:
case I40E_DMA_CNTX_SIZE_256K:
- pe_cntx_size = I40E_DMA_CNTX_BASE_SIZE;
- pe_cntx_size <<= (u32)settings->pe_cntx_num;
break;
default:
return I40E_ERR_PARAM;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
index aaea297640e0..9241b6005ad3 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
@@ -22,11 +22,15 @@ void i40e_adminq_init_ring_data(struct i40e_hw *hw);
i40e_status i40e_clean_arq_element(struct i40e_hw *hw,
struct i40e_arq_event_info *e,
u16 *events_pending);
-i40e_status i40e_asq_send_command(struct i40e_hw *hw,
- struct i40e_aq_desc *desc,
- void *buff, /* can be NULL */
- u16 buff_size,
- struct i40e_asq_cmd_details *cmd_details);
+i40e_status
+i40e_asq_send_command(struct i40e_hw *hw, struct i40e_aq_desc *desc,
+ void *buff, /* can be NULL */ u16 buff_size,
+ struct i40e_asq_cmd_details *cmd_details);
+i40e_status
+i40e_asq_send_command_atomic(struct i40e_hw *hw, struct i40e_aq_desc *desc,
+ void *buff, /* can be NULL */ u16 buff_size,
+ struct i40e_asq_cmd_details *cmd_details,
+ bool is_atomic_context);
/* debug function for adminq */
void i40e_debug_aq(struct i40e_hw *hw, enum i40e_debug_mask mask,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_status.h b/drivers/net/ethernet/intel/i40e/i40e_status.h
index 77be0702d07c..db3714a65dc7 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_status.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_status.h
@@ -18,7 +18,7 @@ enum i40e_status_code {
I40E_ERR_ADAPTER_STOPPED = -9,
I40E_ERR_INVALID_MAC_ADDR = -10,
I40E_ERR_DEVICE_NOT_SUPPORTED = -11,
- I40E_ERR_MASTER_REQUESTS_PENDING = -12,
+ I40E_ERR_PRIMARY_REQUESTS_PENDING = -12,
I40E_ERR_INVALID_LINK_SETTINGS = -13,
I40E_ERR_AUTONEG_NOT_COMPLETE = -14,
I40E_ERR_RESET_FAILED = -15,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 048f1678ab8a..b785d09c19f8 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -2704,12 +2704,21 @@ error_param:
(u8 *)&stats, sizeof(stats));
}
+#define I40E_MAX_MACVLAN_PER_HW 3072
+#define I40E_MAX_MACVLAN_PER_PF(num_ports) (I40E_MAX_MACVLAN_PER_HW / \
+ (num_ports))
/* If the VF is not trusted restrict the number of MAC/VLAN it can program
* MAC filters: 16 for multicast, 1 for MAC, 1 for broadcast
*/
#define I40E_VC_MAX_MAC_ADDR_PER_VF (16 + 1 + 1)
#define I40E_VC_MAX_VLAN_PER_VF 16
+#define I40E_VC_MAX_MACVLAN_PER_TRUSTED_VF(vf_num, num_ports) \
+({ typeof(vf_num) vf_num_ = (vf_num); \
+ typeof(num_ports) num_ports_ = (num_ports); \
+ ((I40E_MAX_MACVLAN_PER_PF(num_ports_) - vf_num_ * \
+ I40E_VC_MAX_MAC_ADDR_PER_VF) / vf_num_) + \
+ I40E_VC_MAX_MAC_ADDR_PER_VF; })
/**
* i40e_check_vf_permission
* @vf: pointer to the VF info
@@ -2734,6 +2743,7 @@ static inline int i40e_check_vf_permission(struct i40e_vf *vf,
{
struct i40e_pf *pf = vf->pf;
struct i40e_vsi *vsi = pf->vsi[vf->lan_vsi_idx];
+ struct i40e_hw *hw = &pf->hw;
int mac2add_cnt = 0;
int i;
@@ -2775,12 +2785,26 @@ static inline int i40e_check_vf_permission(struct i40e_vf *vf,
* number of addresses. Check to make sure that the additions do not
* push us over the limit.
*/
- if (!test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps) &&
- (i40e_count_filters(vsi) + mac2add_cnt) >
+ if (!test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps)) {
+ if ((i40e_count_filters(vsi) + mac2add_cnt) >
I40E_VC_MAX_MAC_ADDR_PER_VF) {
- dev_err(&pf->pdev->dev,
- "Cannot add more MAC addresses, VF is not trusted, switch the VF to trusted to add more functionality\n");
- return -EPERM;
+ dev_err(&pf->pdev->dev,
+ "Cannot add more MAC addresses, VF is not trusted, switch the VF to trusted to add more functionality\n");
+ return -EPERM;
+ }
+ /* If this VF is trusted, it can use more resources than untrusted.
+ * However to ensure that every trusted VF has appropriate number of
+ * resources, divide whole pool of resources per port and then across
+ * all VFs.
+ */
+ } else {
+ if ((i40e_count_filters(vsi) + mac2add_cnt) >
+ I40E_VC_MAX_MACVLAN_PER_TRUSTED_VF(pf->num_alloc_vfs,
+ hw->num_ports)) {
+ dev_err(&pf->pdev->dev,
+ "Cannot add more MAC addresses, trusted VF exhausted it's resources\n");
+ return -EPERM;
+ }
}
return 0;
}
diff --git a/drivers/net/ethernet/intel/iavf/iavf_adminq.c b/drivers/net/ethernet/intel/iavf/iavf_adminq.c
index 9fa3fa99b4c2..cd4e6a22d0f9 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_adminq.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_adminq.c
@@ -551,15 +551,13 @@ init_adminq_exit:
**/
enum iavf_status iavf_shutdown_adminq(struct iavf_hw *hw)
{
- enum iavf_status ret_code = 0;
-
if (iavf_check_asq_alive(hw))
iavf_aq_queue_shutdown(hw, true);
iavf_shutdown_asq(hw);
iavf_shutdown_arq(hw);
- return ret_code;
+ return 0;
}
/**
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 2a1ee60e85f4..408d15a5b0e3 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -4603,7 +4603,7 @@ static int ice_replay_pre_init(struct ice_hw *hw)
* will allow adding rules entries back to filt_rules list,
* which is operational list.
*/
- for (i = 0; i < ICE_SW_LKUP_LAST; i++)
+ for (i = 0; i < ICE_MAX_NUM_RECIPES; i++)
list_replace_init(&sw->recp_list[i].filt_rules,
&sw->recp_list[i].filt_replay_rules);
ice_sched_replay_agg_vsi_preinit(hw);
diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c
index d1d7389b0bff..864692b157b6 100644
--- a/drivers/net/ethernet/intel/ice/ice_eswitch.c
+++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c
@@ -10,6 +10,100 @@
#include "ice_tc_lib.h"
/**
+ * ice_eswitch_add_vf_mac_rule - add adv rule with VF's MAC
+ * @pf: pointer to PF struct
+ * @vf: pointer to VF struct
+ * @mac: VF's MAC address
+ *
+ * This function adds advanced rule that forwards packets with
+ * VF's MAC address (src MAC) to the corresponding switchdev ctrl VSI queue.
+ */
+int
+ice_eswitch_add_vf_mac_rule(struct ice_pf *pf, struct ice_vf *vf, const u8 *mac)
+{
+ struct ice_vsi *ctrl_vsi = pf->switchdev.control_vsi;
+ struct ice_adv_rule_info rule_info = { 0 };
+ struct ice_adv_lkup_elem *list;
+ struct ice_hw *hw = &pf->hw;
+ const u16 lkups_cnt = 1;
+ int err;
+
+ list = kcalloc(lkups_cnt, sizeof(*list), GFP_ATOMIC);
+ if (!list)
+ return -ENOMEM;
+
+ list[0].type = ICE_MAC_OFOS;
+ ether_addr_copy(list[0].h_u.eth_hdr.src_addr, mac);
+ eth_broadcast_addr(list[0].m_u.eth_hdr.src_addr);
+
+ rule_info.sw_act.flag |= ICE_FLTR_TX;
+ rule_info.sw_act.vsi_handle = ctrl_vsi->idx;
+ rule_info.sw_act.fltr_act = ICE_FWD_TO_Q;
+ rule_info.rx = false;
+ rule_info.sw_act.fwd_id.q_id = hw->func_caps.common_cap.rxq_first_id +
+ ctrl_vsi->rxq_map[vf->vf_id];
+ rule_info.flags_info.act |= ICE_SINGLE_ACT_LB_ENABLE;
+ rule_info.flags_info.act_valid = true;
+
+ err = ice_add_adv_rule(hw, list, lkups_cnt, &rule_info,
+ vf->repr->mac_rule);
+ if (err)
+ dev_err(ice_pf_to_dev(pf), "Unable to add VF mac rule in switchdev mode for VF %d",
+ vf->vf_id);
+ else
+ vf->repr->rule_added = true;
+
+ kfree(list);
+ return err;
+}
+
+/**
+ * ice_eswitch_replay_vf_mac_rule - replay adv rule with VF's MAC
+ * @vf: pointer to vF struct
+ *
+ * This function replays VF's MAC rule after reset.
+ */
+void ice_eswitch_replay_vf_mac_rule(struct ice_vf *vf)
+{
+ int err;
+
+ if (!ice_is_switchdev_running(vf->pf))
+ return;
+
+ if (is_valid_ether_addr(vf->hw_lan_addr.addr)) {
+ err = ice_eswitch_add_vf_mac_rule(vf->pf, vf,
+ vf->hw_lan_addr.addr);
+ if (err) {
+ dev_err(ice_pf_to_dev(vf->pf), "Failed to add MAC %pM for VF %d\n, error %d\n",
+ vf->hw_lan_addr.addr, vf->vf_id, err);
+ return;
+ }
+ vf->num_mac++;
+
+ ether_addr_copy(vf->dev_lan_addr.addr, vf->hw_lan_addr.addr);
+ }
+}
+
+/**
+ * ice_eswitch_del_vf_mac_rule - delete adv rule with VF's MAC
+ * @vf: pointer to the VF struct
+ *
+ * Delete the advanced rule that was used to forward packets with the VF's MAC
+ * address (src MAC) to the corresponding switchdev ctrl VSI queue.
+ */
+void ice_eswitch_del_vf_mac_rule(struct ice_vf *vf)
+{
+ if (!ice_is_switchdev_running(vf->pf))
+ return;
+
+ if (!vf->repr->rule_added)
+ return;
+
+ ice_rem_adv_rule_by_id(&vf->pf->hw, vf->repr->mac_rule);
+ vf->repr->rule_added = false;
+}
+
+/**
* ice_eswitch_setup_env - configure switchdev HW filters
* @pf: pointer to PF struct
*
@@ -21,7 +115,6 @@ static int ice_eswitch_setup_env(struct ice_pf *pf)
struct ice_vsi *uplink_vsi = pf->switchdev.uplink_vsi;
struct net_device *uplink_netdev = uplink_vsi->netdev;
struct ice_vsi *ctrl_vsi = pf->switchdev.control_vsi;
- struct ice_port_info *pi = pf->hw.port_info;
bool rule_added = false;
ice_vsi_manage_vlan_stripping(ctrl_vsi, false);
@@ -42,29 +135,17 @@ static int ice_eswitch_setup_env(struct ice_pf *pf)
rule_added = true;
}
- if (ice_cfg_dflt_vsi(pi->hw, ctrl_vsi->idx, true, ICE_FLTR_TX))
- goto err_def_tx;
-
if (ice_vsi_update_security(uplink_vsi, ice_vsi_ctx_set_allow_override))
goto err_override_uplink;
if (ice_vsi_update_security(ctrl_vsi, ice_vsi_ctx_set_allow_override))
goto err_override_control;
- if (ice_fltr_update_flags_dflt_rule(ctrl_vsi, pi->dflt_tx_vsi_rule_id,
- ICE_FLTR_TX,
- ICE_SINGLE_ACT_LB_ENABLE))
- goto err_update_action;
-
return 0;
-err_update_action:
- ice_vsi_update_security(ctrl_vsi, ice_vsi_ctx_clear_allow_override);
err_override_control:
ice_vsi_update_security(uplink_vsi, ice_vsi_ctx_clear_allow_override);
err_override_uplink:
- ice_cfg_dflt_vsi(pi->hw, ctrl_vsi->idx, false, ICE_FLTR_TX);
-err_def_tx:
if (rule_added)
ice_clear_dflt_vsi(uplink_vsi->vsw);
err_def_rx:
@@ -167,21 +248,11 @@ static int ice_eswitch_setup_reprs(struct ice_pf *pf)
netif_keep_dst(vf->repr->netdev);
}
- kfree(ctrl_vsi->target_netdevs);
-
- ctrl_vsi->target_netdevs = kcalloc(max_vsi_num + 1,
- sizeof(*ctrl_vsi->target_netdevs),
- GFP_KERNEL);
- if (!ctrl_vsi->target_netdevs)
- goto err;
-
ice_for_each_vf(pf, i) {
struct ice_repr *repr = pf->vf[i].repr;
struct ice_vsi *vsi = repr->src_vsi;
struct metadata_dst *dst;
- ctrl_vsi->target_netdevs[vsi->vsi_num] = repr->netdev;
-
dst = repr->dst;
dst->u.port_info.port_id = vsi->vsi_num;
dst->u.port_info.lower_dev = repr->netdev;
@@ -214,7 +285,6 @@ ice_eswitch_release_reprs(struct ice_pf *pf, struct ice_vsi *ctrl_vsi)
{
int i;
- kfree(ctrl_vsi->target_netdevs);
ice_for_each_vf(pf, i) {
struct ice_vsi *vsi = pf->vf[i].repr->src_vsi;
struct ice_vf *vf = &pf->vf[i];
@@ -320,7 +390,6 @@ static void ice_eswitch_release_env(struct ice_pf *pf)
ice_vsi_update_security(ctrl_vsi, ice_vsi_ctx_clear_allow_override);
ice_vsi_update_security(uplink_vsi, ice_vsi_ctx_clear_allow_override);
- ice_cfg_dflt_vsi(&pf->hw, ctrl_vsi->idx, false, ICE_FLTR_TX);
ice_clear_dflt_vsi(uplink_vsi->vsw);
ice_fltr_add_mac_and_broadcast(uplink_vsi,
uplink_vsi->port_info->mac.perm_addr,
@@ -375,24 +444,6 @@ static void ice_eswitch_napi_disable(struct ice_pf *pf)
}
/**
- * ice_eswitch_set_rxdid - configure rxdid on all Rx queues from VSI
- * @vsi: VSI to setup rxdid on
- * @rxdid: flex descriptor id
- */
-static void ice_eswitch_set_rxdid(struct ice_vsi *vsi, u32 rxdid)
-{
- struct ice_hw *hw = &vsi->back->hw;
- int i;
-
- ice_for_each_rxq(vsi, i) {
- struct ice_rx_ring *ring = vsi->rx_rings[i];
- u16 pf_q = vsi->rxq_map[ring->q_index];
-
- ice_write_qrxflxp_cntxt(hw, pf_q, rxdid, 0x3, true);
- }
-}
-
-/**
* ice_eswitch_enable_switchdev - configure eswitch in switchdev mode
* @pf: pointer to PF structure
*/
@@ -425,8 +476,6 @@ static int ice_eswitch_enable_switchdev(struct ice_pf *pf)
ice_eswitch_napi_enable(pf);
- ice_eswitch_set_rxdid(ctrl_vsi, ICE_RXDID_FLEX_NIC_2);
-
return 0;
err_setup_reprs:
@@ -448,6 +497,7 @@ static void ice_eswitch_disable_switchdev(struct ice_pf *pf)
ice_eswitch_napi_disable(pf);
ice_eswitch_release_env(pf);
+ ice_rem_adv_rule_for_vsi(&pf->hw, ctrl_vsi->idx);
ice_eswitch_release_reprs(pf, ctrl_vsi);
ice_vsi_release(ctrl_vsi);
ice_repr_rem_from_all_vfs(pf);
@@ -497,34 +547,6 @@ ice_eswitch_mode_set(struct devlink *devlink, u16 mode,
}
/**
- * ice_eswitch_get_target_netdev - return port representor netdev
- * @rx_ring: pointer to Rx ring
- * @rx_desc: pointer to Rx descriptor
- *
- * When working in switchdev mode context (when control VSI is used), this
- * function returns netdev of appropriate port representor. For non-switchdev
- * context, regular netdev associated with Rx ring is returned.
- */
-struct net_device *
-ice_eswitch_get_target_netdev(struct ice_rx_ring *rx_ring,
- union ice_32b_rx_flex_desc *rx_desc)
-{
- struct ice_32b_rx_flex_desc_nic_2 *desc;
- struct ice_vsi *vsi = rx_ring->vsi;
- struct ice_vsi *control_vsi;
- u16 target_vsi_id;
-
- control_vsi = vsi->back->switchdev.control_vsi;
- if (vsi != control_vsi)
- return rx_ring->netdev;
-
- desc = (struct ice_32b_rx_flex_desc_nic_2 *)rx_desc;
- target_vsi_id = le16_to_cpu(desc->src_vsi);
-
- return vsi->target_netdevs[target_vsi_id];
-}
-
-/**
* ice_eswitch_mode_get - get current eswitch mode
* @devlink: pointer to devlink structure
* @mode: output parameter for current eswitch mode
@@ -648,7 +670,6 @@ int ice_eswitch_rebuild(struct ice_pf *pf)
return status;
ice_eswitch_napi_enable(pf);
- ice_eswitch_set_rxdid(ctrl_vsi, ICE_RXDID_FLEX_NIC_2);
ice_eswitch_start_all_tx_queues(pf);
return 0;
diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.h b/drivers/net/ethernet/intel/ice/ice_eswitch.h
index 364cd2a79c37..bd58d9d2e565 100644
--- a/drivers/net/ethernet/intel/ice/ice_eswitch.h
+++ b/drivers/net/ethernet/intel/ice/ice_eswitch.h
@@ -20,10 +20,11 @@ bool ice_is_eswitch_mode_switchdev(struct ice_pf *pf);
void ice_eswitch_update_repr(struct ice_vsi *vsi);
void ice_eswitch_stop_all_tx_queues(struct ice_pf *pf);
-
-struct net_device *
-ice_eswitch_get_target_netdev(struct ice_rx_ring *rx_ring,
- union ice_32b_rx_flex_desc *rx_desc);
+int
+ice_eswitch_add_vf_mac_rule(struct ice_pf *pf, struct ice_vf *vf,
+ const u8 *mac);
+void ice_eswitch_replay_vf_mac_rule(struct ice_vf *vf);
+void ice_eswitch_del_vf_mac_rule(struct ice_vf *vf);
void ice_eswitch_set_target_vsi(struct sk_buff *skb,
struct ice_tx_offload_params *off);
@@ -33,6 +34,15 @@ ice_eswitch_port_start_xmit(struct sk_buff *skb, struct net_device *netdev);
static inline void ice_eswitch_release(struct ice_pf *pf) { }
static inline void ice_eswitch_stop_all_tx_queues(struct ice_pf *pf) { }
+static inline void ice_eswitch_replay_vf_mac_rule(struct ice_vf *vf) { }
+static inline void ice_eswitch_del_vf_mac_rule(struct ice_vf *vf) { }
+
+static inline int
+ice_eswitch_add_vf_mac_rule(struct ice_pf *pf, struct ice_vf *vf,
+ const u8 *mac)
+{
+ return -EOPNOTSUPP;
+}
static inline void
ice_eswitch_set_target_vsi(struct sk_buff *skb,
@@ -67,13 +77,6 @@ static inline bool ice_is_eswitch_mode_switchdev(struct ice_pf *pf)
return false;
}
-static inline struct net_device *
-ice_eswitch_get_target_netdev(struct ice_rx_ring *rx_ring,
- union ice_32b_rx_flex_desc *rx_desc)
-{
- return rx_ring->netdev;
-}
-
static inline netdev_tx_t
ice_eswitch_port_start_xmit(struct sk_buff *skb, struct net_device *netdev)
{
diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
index d29197ab3d02..4deb2c9446ec 100644
--- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
+++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
@@ -4440,7 +4440,7 @@ ice_update_fd_swap(struct ice_hw *hw, u16 prof_id, struct ice_fv_word *es)
for (j = 0; j < ICE_FD_SRC_DST_PAIR_COUNT; j++)
if (es[i].prot_id == ice_fd_pairs[j].prot_id &&
es[i].off == ice_fd_pairs[j].off) {
- set_bit(j, pair_list);
+ __set_bit(j, pair_list);
pair_start[j] = i;
}
}
@@ -4710,7 +4710,7 @@ ice_add_prof(struct ice_hw *hw, enum ice_block blk, u64 id, u8 ptypes[],
if (test_bit(ptg, ptgs_used))
continue;
- set_bit(ptg, ptgs_used);
+ __set_bit(ptg, ptgs_used);
/* Check to see there are any attributes for
* this PTYPE, and add them if found.
*/
@@ -5339,7 +5339,7 @@ ice_adj_prof_priorities(struct ice_hw *hw, enum ice_block blk, u16 vsig,
}
/* keep track of used ptgs */
- set_bit(t->tcam[i].ptg, ptgs_used);
+ __set_bit(t->tcam[i].ptg, ptgs_used);
}
}
diff --git a/drivers/net/ethernet/intel/ice/ice_fltr.c b/drivers/net/ethernet/intel/ice/ice_fltr.c
index 94903b450345..c29177c6bb9d 100644
--- a/drivers/net/ethernet/intel/ice/ice_fltr.c
+++ b/drivers/net/ethernet/intel/ice/ice_fltr.c
@@ -445,83 +445,3 @@ int ice_fltr_remove_eth(struct ice_vsi *vsi, u16 ethertype, u16 flag,
return ice_fltr_prepare_eth(vsi, ethertype, flag, action,
ice_fltr_remove_eth_list);
}
-
-/**
- * ice_fltr_update_rule_flags - update lan_en/lb_en flags
- * @hw: pointer to hw
- * @rule_id: id of rule being updated
- * @recipe_id: recipe id of rule
- * @act: current action field
- * @type: Rx or Tx
- * @src: source VSI
- * @new_flags: combinations of lb_en and lan_en
- */
-static int
-ice_fltr_update_rule_flags(struct ice_hw *hw, u16 rule_id, u16 recipe_id,
- u32 act, u16 type, u16 src, u32 new_flags)
-{
- struct ice_aqc_sw_rules_elem *s_rule;
- u32 flags_mask;
- int err;
-
- s_rule = kzalloc(ICE_SW_RULE_RX_TX_NO_HDR_SIZE, GFP_KERNEL);
- if (!s_rule)
- return -ENOMEM;
-
- flags_mask = ICE_SINGLE_ACT_LB_ENABLE | ICE_SINGLE_ACT_LAN_ENABLE;
- act &= ~flags_mask;
- act |= (flags_mask & new_flags);
-
- s_rule->pdata.lkup_tx_rx.recipe_id = cpu_to_le16(recipe_id);
- s_rule->pdata.lkup_tx_rx.index = cpu_to_le16(rule_id);
- s_rule->pdata.lkup_tx_rx.act = cpu_to_le32(act);
-
- if (type & ICE_FLTR_RX) {
- s_rule->pdata.lkup_tx_rx.src =
- cpu_to_le16(hw->port_info->lport);
- s_rule->type = cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_RX);
-
- } else {
- s_rule->pdata.lkup_tx_rx.src = cpu_to_le16(src);
- s_rule->type = cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_TX);
- }
-
- err = ice_aq_sw_rules(hw, s_rule, ICE_SW_RULE_RX_TX_NO_HDR_SIZE, 1,
- ice_aqc_opc_update_sw_rules, NULL);
-
- kfree(s_rule);
- return err;
-}
-
-/**
- * ice_fltr_build_action - build action for rule
- * @vsi_id: id of VSI which is use to build action
- */
-static u32 ice_fltr_build_action(u16 vsi_id)
-{
- return ((vsi_id << ICE_SINGLE_ACT_VSI_ID_S) & ICE_SINGLE_ACT_VSI_ID_M) |
- ICE_SINGLE_ACT_VSI_FORWARDING | ICE_SINGLE_ACT_VALID_BIT;
-}
-
-/**
- * ice_fltr_update_flags_dflt_rule - update flags on default rule
- * @vsi: pointer to VSI
- * @rule_id: id of rule
- * @direction: Tx or Rx
- * @new_flags: flags to update
- *
- * Function updates flags on default rule with ICE_SW_LKUP_DFLT.
- *
- * Flags should be a combination of ICE_SINGLE_ACT_LB_ENABLE and
- * ICE_SINGLE_ACT_LAN_ENABLE.
- */
-int
-ice_fltr_update_flags_dflt_rule(struct ice_vsi *vsi, u16 rule_id, u8 direction,
- u32 new_flags)
-{
- u32 action = ice_fltr_build_action(vsi->vsi_num);
- struct ice_hw *hw = &vsi->back->hw;
-
- return ice_fltr_update_rule_flags(hw, rule_id, ICE_SW_LKUP_DFLT, action,
- direction, vsi->vsi_num, new_flags);
-}
diff --git a/drivers/net/ethernet/intel/ice/ice_fltr.h b/drivers/net/ethernet/intel/ice/ice_fltr.h
index 0e65fd021ad2..3eb42479175f 100644
--- a/drivers/net/ethernet/intel/ice/ice_fltr.h
+++ b/drivers/net/ethernet/intel/ice/ice_fltr.h
@@ -50,7 +50,4 @@ void ice_fltr_remove_all(struct ice_vsi *vsi);
int
ice_fltr_update_flags(struct ice_vsi *vsi, u16 rule_id, u16 recipe_id,
u32 new_flags);
-int
-ice_fltr_update_flags_dflt_rule(struct ice_vsi *vsi, u16 rule_id, u8 direction,
- u32 new_flags);
#endif
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index e29176889c23..30814435f779 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -472,6 +472,25 @@ static void ice_pf_dis_all_vsi(struct ice_pf *pf, bool locked)
}
/**
+ * ice_clear_sw_switch_recipes - clear switch recipes
+ * @pf: board private structure
+ *
+ * Mark switch recipes as not created in sw structures. There are cases where
+ * rules (especially advanced rules) need to be restored, either re-read from
+ * hardware or added again. For example after the reset. 'recp_created' flag
+ * prevents from doing that and need to be cleared upfront.
+ */
+static void ice_clear_sw_switch_recipes(struct ice_pf *pf)
+{
+ struct ice_sw_recipe *recp;
+ u8 i;
+
+ recp = pf->hw.switch_info->recp_list;
+ for (i = 0; i < ICE_MAX_NUM_RECIPES; i++)
+ recp[i].recp_created = false;
+}
+
+/**
* ice_prepare_for_reset - prep for reset
* @pf: board private structure
* @reset_type: reset type requested
@@ -501,6 +520,11 @@ ice_prepare_for_reset(struct ice_pf *pf, enum ice_reset_req reset_type)
ice_for_each_vf(pf, i)
ice_set_vf_state_qs_dis(&pf->vf[i]);
+ if (ice_is_eswitch_mode_switchdev(pf)) {
+ if (reset_type != ICE_RESET_PFR)
+ ice_clear_sw_switch_recipes(pf);
+ }
+
/* release ADQ specific HW and SW resources */
vsi = ice_get_main_vsi(pf);
if (!vsi)
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c
index d3f65e061a62..ae291d442539 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp.c
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.c
@@ -2229,7 +2229,7 @@ ice_ptp_release_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx)
kfree(tx->tstamps);
tx->tstamps = NULL;
- kfree(tx->in_use);
+ bitmap_free(tx->in_use);
tx->in_use = NULL;
tx->len = 0;
diff --git a/drivers/net/ethernet/intel/ice/ice_repr.c b/drivers/net/ethernet/intel/ice/ice_repr.c
index af8e6ef5f571..dcc310e29300 100644
--- a/drivers/net/ethernet/intel/ice/ice_repr.c
+++ b/drivers/net/ethernet/intel/ice/ice_repr.c
@@ -244,6 +244,14 @@ static int ice_repr_add(struct ice_vf *vf)
if (!repr)
return -ENOMEM;
+#ifdef CONFIG_ICE_SWITCHDEV
+ repr->mac_rule = kzalloc(sizeof(*repr->mac_rule), GFP_KERNEL);
+ if (!repr->mac_rule) {
+ err = -ENOMEM;
+ goto err_alloc_rule;
+ }
+#endif
+
repr->netdev = alloc_etherdev(sizeof(struct ice_netdev_priv));
if (!repr->netdev) {
err = -ENOMEM;
@@ -287,6 +295,11 @@ err_alloc_q_vector:
free_netdev(repr->netdev);
repr->netdev = NULL;
err_alloc:
+#ifdef CONFIG_ICE_SWITCHDEV
+ kfree(repr->mac_rule);
+ repr->mac_rule = NULL;
+err_alloc_rule:
+#endif
kfree(repr);
vf->repr = NULL;
return err;
@@ -304,6 +317,10 @@ static void ice_repr_rem(struct ice_vf *vf)
unregister_netdev(vf->repr->netdev);
free_netdev(vf->repr->netdev);
vf->repr->netdev = NULL;
+#ifdef CONFIG_ICE_SWITCHDEV
+ kfree(vf->repr->mac_rule);
+ vf->repr->mac_rule = NULL;
+#endif
kfree(vf->repr);
vf->repr = NULL;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_repr.h b/drivers/net/ethernet/intel/ice/ice_repr.h
index 806de22933c6..0c77ff050d15 100644
--- a/drivers/net/ethernet/intel/ice/ice_repr.h
+++ b/drivers/net/ethernet/intel/ice/ice_repr.h
@@ -13,6 +13,11 @@ struct ice_repr {
struct ice_q_vector *q_vector;
struct net_device *netdev;
struct metadata_dst *dst;
+#ifdef CONFIG_ICE_SWITCHDEV
+ /* info about slow path MAC rule */
+ struct ice_rule_query_data *mac_rule;
+ u8 rule_added;
+#endif
};
int ice_repr_add_for_all_vfs(struct ice_pf *pf);
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c
index e477c2b1d5bb..11ae0bee3590 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.c
+++ b/drivers/net/ethernet/intel/ice/ice_switch.c
@@ -4070,10 +4070,8 @@ ice_find_free_recp_res_idx(struct ice_hw *hw, const unsigned long *profiles,
DECLARE_BITMAP(used_idx, ICE_MAX_FV_WORDS);
u16 bit;
- bitmap_zero(possible_idx, ICE_MAX_FV_WORDS);
bitmap_zero(recipes, ICE_MAX_NUM_RECIPES);
bitmap_zero(used_idx, ICE_MAX_FV_WORDS);
- bitmap_zero(free_idx, ICE_MAX_FV_WORDS);
bitmap_set(possible_idx, 0, ICE_MAX_FV_WORDS);
@@ -5652,6 +5650,91 @@ ice_rem_adv_rule_by_id(struct ice_hw *hw,
}
/**
+ * ice_rem_adv_rule_for_vsi - removes existing advanced switch rules for a
+ * given VSI handle
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: VSI handle for which we are supposed to remove all the rules.
+ *
+ * This function is used to remove all the rules for a given VSI and as soon
+ * as removing a rule fails, it will return immediately with the error code,
+ * else it will return success.
+ */
+int ice_rem_adv_rule_for_vsi(struct ice_hw *hw, u16 vsi_handle)
+{
+ struct ice_adv_fltr_mgmt_list_entry *list_itr, *tmp_entry;
+ struct ice_vsi_list_map_info *map_info;
+ struct ice_adv_rule_info rinfo;
+ struct list_head *list_head;
+ struct ice_switch_info *sw;
+ int status;
+ u8 rid;
+
+ sw = hw->switch_info;
+ for (rid = 0; rid < ICE_MAX_NUM_RECIPES; rid++) {
+ if (!sw->recp_list[rid].recp_created)
+ continue;
+ if (!sw->recp_list[rid].adv_rule)
+ continue;
+
+ list_head = &sw->recp_list[rid].filt_rules;
+ list_for_each_entry_safe(list_itr, tmp_entry, list_head,
+ list_entry) {
+ rinfo = list_itr->rule_info;
+
+ if (rinfo.sw_act.fltr_act == ICE_FWD_TO_VSI_LIST) {
+ map_info = list_itr->vsi_list_info;
+ if (!map_info)
+ continue;
+
+ if (!test_bit(vsi_handle, map_info->vsi_map))
+ continue;
+ } else if (rinfo.sw_act.vsi_handle != vsi_handle) {
+ continue;
+ }
+
+ rinfo.sw_act.vsi_handle = vsi_handle;
+ status = ice_rem_adv_rule(hw, list_itr->lkups,
+ list_itr->lkups_cnt, &rinfo);
+ if (status)
+ return status;
+ }
+ }
+ return 0;
+}
+
+/**
+ * ice_replay_vsi_adv_rule - Replay advanced rule for requested VSI
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: driver VSI handle
+ * @list_head: list for which filters need to be replayed
+ *
+ * Replay the advanced rule for the given VSI.
+ */
+static int
+ice_replay_vsi_adv_rule(struct ice_hw *hw, u16 vsi_handle,
+ struct list_head *list_head)
+{
+ struct ice_rule_query_data added_entry = { 0 };
+ struct ice_adv_fltr_mgmt_list_entry *adv_fltr;
+ int status = 0;
+
+ if (list_empty(list_head))
+ return status;
+ list_for_each_entry(adv_fltr, list_head, list_entry) {
+ struct ice_adv_rule_info *rinfo = &adv_fltr->rule_info;
+ u16 lk_cnt = adv_fltr->lkups_cnt;
+
+ if (vsi_handle != rinfo->sw_act.vsi_handle)
+ continue;
+ status = ice_add_adv_rule(hw, adv_fltr->lkups, lk_cnt, rinfo,
+ &added_entry);
+ if (status)
+ break;
+ }
+ return status;
+}
+
+/**
* ice_replay_vsi_all_fltr - replay all filters stored in bookkeeping lists
* @hw: pointer to the hardware structure
* @vsi_handle: driver VSI handle
@@ -5661,14 +5744,17 @@ ice_rem_adv_rule_by_id(struct ice_hw *hw,
int ice_replay_vsi_all_fltr(struct ice_hw *hw, u16 vsi_handle)
{
struct ice_switch_info *sw = hw->switch_info;
- int status = 0;
+ int status;
u8 i;
- for (i = 0; i < ICE_SW_LKUP_LAST; i++) {
+ for (i = 0; i < ICE_MAX_NUM_RECIPES; i++) {
struct list_head *head;
head = &sw->recp_list[i].filt_replay_rules;
- status = ice_replay_vsi_fltr(hw, vsi_handle, i, head);
+ if (!sw->recp_list[i].adv_rule)
+ status = ice_replay_vsi_fltr(hw, vsi_handle, i, head);
+ else
+ status = ice_replay_vsi_adv_rule(hw, vsi_handle, head);
if (status)
return status;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
index 9520b140bdbf..0e87b98e0966 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
@@ -189,8 +189,7 @@ ice_process_skb_fields(struct ice_rx_ring *rx_ring,
ice_rx_hash(rx_ring, rx_desc, skb, ptype);
/* modifies the skb - consumes the enet header */
- skb->protocol = eth_type_trans(skb, ice_eswitch_get_target_netdev
- (rx_ring, rx_desc));
+ skb->protocol = eth_type_trans(skb, rx_ring->netdev);
ice_rx_csum(rx_ring, skb, rx_desc, ptype);
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index 61b2db3342ed..39b80124d282 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -1632,6 +1632,8 @@ bool ice_reset_vf(struct ice_vf *vf, bool is_vflr)
dev_err(dev, "disabling promiscuous mode failed\n");
}
+ ice_eswitch_del_vf_mac_rule(vf);
+
ice_vf_fdir_exit(vf);
ice_vf_fdir_init(vf);
/* clean VF control VSI when resetting VF since it should be setup
@@ -1650,6 +1652,7 @@ bool ice_reset_vf(struct ice_vf *vf, bool is_vflr)
ice_vf_post_vsi_rebuild(vf);
vsi = ice_get_vf_vsi(vf);
ice_eswitch_update_repr(vsi);
+ ice_eswitch_replay_vf_mac_rule(vf);
/* if the VF has been reset allow it to come up again */
if (ice_mbx_clear_malvf(&hw->mbx_snapshot, pf->malvfs, ICE_MAX_VF_COUNT, vf->vf_id))
@@ -4496,6 +4499,7 @@ static int ice_vc_repr_add_mac(struct ice_vf *vf, u8 *msg)
for (i = 0; i < al->num_elements; i++) {
u8 *mac_addr = al->list[i].addr;
+ int result;
if (!is_unicast_ether_addr(mac_addr) ||
ether_addr_equal(mac_addr, vf->hw_lan_addr.addr))
@@ -4507,6 +4511,13 @@ static int ice_vc_repr_add_mac(struct ice_vf *vf, u8 *msg)
goto handle_mac_exit;
}
+ result = ice_eswitch_add_vf_mac_rule(pf, vf, mac_addr);
+ if (result) {
+ dev_err(ice_pf_to_dev(pf), "Failed to add MAC %pM for VF %d\n, error %d\n",
+ mac_addr, vf->vf_id, result);
+ goto handle_mac_exit;
+ }
+
ice_vfhw_mac_add(vf, &al->list[i]);
vf->num_mac++;
break;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 33904bc87efa..fcfd38fa9e6c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -109,7 +109,7 @@ mlx5_core-$(CONFIG_MLX5_SW_STEERING) += steering/dr_domain.o steering/dr_table.o
#
# SF device
#
-mlx5_core-$(CONFIG_MLX5_SF) += sf/vhca_event.o sf/dev/dev.o sf/dev/driver.o
+mlx5_core-$(CONFIG_MLX5_SF) += sf/vhca_event.o sf/dev/dev.o sf/dev/driver.o irq_affinity.o
#
# SF manager
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index c5f959a9e14b..812e6810cb3b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -984,7 +984,7 @@ struct mlx5e_profile {
};
#define mlx5e_profile_feature_cap(profile, feature) \
- ((profile)->features & (MLX5E_PROFILE_FEATURE_## feature))
+ ((profile)->features & BIT(MLX5E_PROFILE_FEATURE_##feature))
void mlx5e_build_ptys2ethtool_map(void);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c
index 074ffa4fa5af..b4f3bd7d346e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c
@@ -120,14 +120,14 @@ static void mlx5e_hv_vhca_stats_cleanup(struct mlx5_hv_vhca_agent *agent)
cancel_delayed_work_sync(&priv->stats_agent.work);
}
-int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv)
+void mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv)
{
int buf_len = mlx5e_hv_vhca_stats_buf_size(priv);
struct mlx5_hv_vhca_agent *agent;
priv->stats_agent.buf = kvzalloc(buf_len, GFP_KERNEL);
if (!priv->stats_agent.buf)
- return -ENOMEM;
+ return;
agent = mlx5_hv_vhca_agent_create(priv->mdev->hv_vhca,
MLX5_HV_VHCA_AGENT_STATS,
@@ -142,13 +142,11 @@ int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv)
PTR_ERR(agent));
kvfree(priv->stats_agent.buf);
- return IS_ERR_OR_NULL(agent);
+ return;
}
priv->stats_agent.agent = agent;
INIT_DELAYED_WORK(&priv->stats_agent.work, mlx5e_hv_vhca_stats_work);
-
- return 0;
}
void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h
index 664463faf77b..29c8c6d3260f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h
@@ -7,19 +7,12 @@
#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
-int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv);
+void mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv);
void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv);
#else
-
-static inline int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv)
-{
- return 0;
-}
-
-static inline void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv)
-{
-}
+static inline void mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv) {}
+static inline void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv) {}
#endif
#endif /* __MLX5_EN_STATS_VHCA_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c
index a0832b86016c..c614fc7fdc9c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c
@@ -45,14 +45,10 @@ verify_uplink_forwarding(struct mlx5e_priv *priv,
termination_table_raw_traffic)) {
NL_SET_ERR_MSG_MOD(extack,
"devices are both uplink, can't offload forwarding");
- pr_err("devices %s %s are both uplink, can't offload forwarding\n",
- priv->netdev->name, out_dev->name);
return -EOPNOTSUPP;
} else if (out_dev != rep_priv->netdev) {
NL_SET_ERR_MSG_MOD(extack,
"devices are not the same uplink, can't offload forwarding");
- pr_err("devices %s %s are both uplink but not the same, can't offload forwarding\n",
- priv->netdev->name, out_dev->name);
return -EOPNOTSUPP;
}
return 0;
@@ -160,10 +156,6 @@ tc_act_can_offload_mirred(struct mlx5e_tc_act_parse_state *parse_state,
}
NL_SET_ERR_MSG_MOD(extack, "devices are not on same switch HW, can't offload forwarding");
- netdev_warn(priv->netdev,
- "devices %s %s not on same switch HW, can't offload forwarding\n",
- netdev_name(priv->netdev),
- out_dev->name);
return false;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 536fcb2c5e90..57d755db1cf5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -1883,24 +1883,19 @@ static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable,
bool is_rx_cq)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
- struct mlx5_core_dev *mdev = priv->mdev;
- struct mlx5e_params new_params;
- bool mode_changed;
u8 cq_period_mode, current_cq_period_mode;
+ struct mlx5e_params new_params;
+
+ if (enable && !MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
+ return -EOPNOTSUPP;
+
+ cq_period_mode = cqe_mode_to_period_mode(enable);
- cq_period_mode = enable ?
- MLX5_CQ_PERIOD_MODE_START_FROM_CQE :
- MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
current_cq_period_mode = is_rx_cq ?
priv->channels.params.rx_cq_moderation.cq_period_mode :
priv->channels.params.tx_cq_moderation.cq_period_mode;
- mode_changed = cq_period_mode != current_cq_period_mode;
-
- if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE &&
- !MLX5_CAP_GEN(mdev, cq_period_start_from_cqe))
- return -EOPNOTSUPP;
- if (!mode_changed)
+ if (cq_period_mode == current_cq_period_mode)
return 0;
new_params = priv->channels.params;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 31c911182498..d36489363a26 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3605,11 +3605,6 @@ static int set_feature_hw_gro(struct net_device *netdev, bool enable)
new_params = priv->channels.params;
if (enable) {
- if (MLX5E_GET_PFLAG(&new_params, MLX5E_PFLAG_RX_CQE_COMPRESS)) {
- netdev_warn(netdev, "Can't set HW-GRO when CQE compress is active\n");
- err = -EINVAL;
- goto out;
- }
new_params.packet_merge.type = MLX5E_PACKET_MERGE_SHAMPO;
new_params.packet_merge.shampo.match_criteria_type =
MLX5_RQC_SHAMPO_MATCH_CRITERIA_TYPE_EXTENDED;
@@ -3871,6 +3866,11 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev,
features &= ~NETIF_F_RXHASH;
if (netdev->features & NETIF_F_RXHASH)
netdev_warn(netdev, "Disabling rxhash, not supported when CQE compress is active\n");
+
+ if (features & NETIF_F_GRO_HW) {
+ netdev_warn(netdev, "Disabling HW-GRO, not supported when CQE compress is active\n");
+ features &= ~NETIF_F_GRO_HW;
+ }
}
if (mlx5e_is_uplink_rep(priv))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index f09b57c31ed7..96e260fd7987 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -1603,6 +1603,12 @@ static void trigger_report(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
}
}
+static void mlx5e_handle_rx_err_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+{
+ trigger_report(rq, cqe);
+ rq->stats->wqe_err++;
+}
+
static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
{
struct mlx5_wq_cyc *wq = &rq->wqe.wq;
@@ -1616,8 +1622,7 @@ static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
- trigger_report(rq, cqe);
- rq->stats->wqe_err++;
+ mlx5e_handle_rx_err_cqe(rq, cqe);
goto free_wqe;
}
@@ -1670,7 +1675,7 @@ static void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
- rq->stats->wqe_err++;
+ mlx5e_handle_rx_err_cqe(rq, cqe);
goto free_wqe;
}
@@ -1719,8 +1724,7 @@ static void mlx5e_handle_rx_cqe_mpwrq_rep(struct mlx5e_rq *rq, struct mlx5_cqe64
wi->consumed_strides += cstrides;
if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
- trigger_report(rq, cqe);
- rq->stats->wqe_err++;
+ mlx5e_handle_rx_err_cqe(rq, cqe);
goto mpwrq_cqe_out;
}
@@ -1988,8 +1992,7 @@ static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cq
wi->consumed_strides += cstrides;
if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
- trigger_report(rq, cqe);
- stats->wqe_err++;
+ mlx5e_handle_rx_err_cqe(rq, cqe);
goto mpwrq_cqe_out;
}
@@ -2058,8 +2061,7 @@ static void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cq
wi->consumed_strides += cstrides;
if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
- trigger_report(rq, cqe);
- rq->stats->wqe_err++;
+ mlx5e_handle_rx_err_cqe(rq, cqe);
goto mpwrq_cqe_out;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index 73fcd9fb17dd..7e7c0c1019f6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -35,6 +35,7 @@
#include "en_accel/tls.h"
#include "en_accel/en_accel.h"
#include "en/ptp.h"
+#include "en/port.h"
static unsigned int stats_grps_num(struct mlx5e_priv *priv)
{
@@ -1158,12 +1159,99 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(phy)
mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
}
-void mlx5e_stats_fec_get(struct mlx5e_priv *priv,
- struct ethtool_fec_stats *fec_stats)
+static int fec_num_lanes(struct mlx5_core_dev *dev)
+{
+ u32 out[MLX5_ST_SZ_DW(pmlp_reg)] = {};
+ u32 in[MLX5_ST_SZ_DW(pmlp_reg)] = {};
+ int err;
+
+ MLX5_SET(pmlp_reg, in, local_port, 1);
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+ MLX5_REG_PMLP, 0, 0);
+ if (err)
+ return 0;
+
+ return MLX5_GET(pmlp_reg, out, width);
+}
+
+static int fec_active_mode(struct mlx5_core_dev *mdev)
+{
+ unsigned long fec_active_long;
+ u32 fec_active;
+
+ if (mlx5e_get_fec_mode(mdev, &fec_active, NULL))
+ return MLX5E_FEC_NOFEC;
+
+ fec_active_long = fec_active;
+ return find_first_bit(&fec_active_long, sizeof(unsigned long) * BITS_PER_BYTE);
+}
+
+#define MLX5E_STATS_SET_FEC_BLOCK(idx) ({ \
+ fec_stats->corrected_blocks.lanes[(idx)] = \
+ MLX5E_READ_CTR64_BE_F(ppcnt, phys_layer_cntrs, \
+ fc_fec_corrected_blocks_lane##idx); \
+ fec_stats->uncorrectable_blocks.lanes[(idx)] = \
+ MLX5E_READ_CTR64_BE_F(ppcnt, phys_layer_cntrs, \
+ fc_fec_uncorrectable_blocks_lane##idx); \
+})
+
+static void fec_set_fc_stats(struct ethtool_fec_stats *fec_stats,
+ u32 *ppcnt, u8 lanes)
+{
+ if (lanes > 3) { /* 4 lanes */
+ MLX5E_STATS_SET_FEC_BLOCK(3);
+ MLX5E_STATS_SET_FEC_BLOCK(2);
+ }
+ if (lanes > 1) /* 2 lanes */
+ MLX5E_STATS_SET_FEC_BLOCK(1);
+ if (lanes > 0) /* 1 lane */
+ MLX5E_STATS_SET_FEC_BLOCK(0);
+}
+
+static void fec_set_rs_stats(struct ethtool_fec_stats *fec_stats, u32 *ppcnt)
+{
+ fec_stats->corrected_blocks.total =
+ MLX5E_READ_CTR64_BE_F(ppcnt, phys_layer_cntrs,
+ rs_fec_corrected_blocks);
+ fec_stats->uncorrectable_blocks.total =
+ MLX5E_READ_CTR64_BE_F(ppcnt, phys_layer_cntrs,
+ rs_fec_uncorrectable_blocks);
+}
+
+static void fec_set_block_stats(struct mlx5e_priv *priv,
+ struct ethtool_fec_stats *fec_stats)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 out[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
+ u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
+ int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+ int mode = fec_active_mode(mdev);
+
+ if (mode == MLX5E_FEC_NOFEC)
+ return;
+
+ MLX5_SET(ppcnt_reg, in, local_port, 1);
+ MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP);
+ if (mlx5_core_access_reg(mdev, in, sz, outl, sz, MLX5_REG_PPCNT, 0, 0))
+ return;
+
+ switch (mode) {
+ case MLX5E_FEC_RS_528_514:
+ case MLX5E_FEC_RS_544_514:
+ case MLX5E_FEC_LLRS_272_257_1:
+ fec_set_rs_stats(fec_stats, out);
+ return;
+ case MLX5E_FEC_FIRECODE:
+ fec_set_fc_stats(fec_stats, out, fec_num_lanes(mdev));
+ }
+}
+
+static void fec_set_corrected_bits_total(struct mlx5e_priv *priv,
+ struct ethtool_fec_stats *fec_stats)
{
u32 ppcnt_phy_statistical[MLX5_ST_SZ_DW(ppcnt_reg)];
struct mlx5_core_dev *mdev = priv->mdev;
- u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0};
+ u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
if (!MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group))
@@ -1181,6 +1269,13 @@ void mlx5e_stats_fec_get(struct mlx5e_priv *priv,
phy_corrected_bits);
}
+void mlx5e_stats_fec_get(struct mlx5e_priv *priv,
+ struct ethtool_fec_stats *fec_stats)
+{
+ fec_set_corrected_bits_total(priv, fec_stats);
+ fec_set_block_stats(priv, fec_stats);
+}
+
#define PPORT_ETH_EXT_OFF(c) \
MLX5_BYTE_OFF(ppcnt_reg, \
counter_set.eth_extended_cntrs_grp_data_layout.c##_high)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index b695aad71ee1..48a45aa54a3c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -59,6 +59,8 @@ struct mlx5_eq_table {
struct mutex lock; /* sync async eqs creations */
int num_comp_eqs;
struct mlx5_irq_table *irq_table;
+ struct mlx5_irq **comp_irqs;
+ struct mlx5_irq *ctrl_irq;
#ifdef CONFIG_RFS_ACCEL
struct cpu_rmap *rmap;
#endif
@@ -266,8 +268,8 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0};
u8 log_eq_stride = ilog2(MLX5_EQE_SIZE);
struct mlx5_priv *priv = &dev->priv;
- u16 vecidx = param->irq_index;
__be64 *pas;
+ u16 vecidx;
void *eqc;
int inlen;
u32 *in;
@@ -289,20 +291,16 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
mlx5_init_fbc(eq->frag_buf.frags, log_eq_stride, log_eq_size, &eq->fbc);
init_eq_buf(eq);
- eq->irq = mlx5_irq_request(dev, vecidx, param->affinity);
- if (IS_ERR(eq->irq)) {
- err = PTR_ERR(eq->irq);
- goto err_buf;
- }
-
+ eq->irq = param->irq;
vecidx = mlx5_irq_get_index(eq->irq);
+
inlen = MLX5_ST_SZ_BYTES(create_eq_in) +
MLX5_FLD_SZ_BYTES(create_eq_in, pas[0]) * eq->frag_buf.npages;
in = kvzalloc(inlen, GFP_KERNEL);
if (!in) {
err = -ENOMEM;
- goto err_irq;
+ goto err_buf;
}
pas = (__be64 *)MLX5_ADDR_OF(create_eq_in, in, pas);
@@ -346,8 +344,6 @@ err_eq:
err_in:
kvfree(in);
-err_irq:
- mlx5_irq_release(eq->irq);
err_buf:
mlx5_frag_buf_free(dev, &eq->frag_buf);
return err;
@@ -401,7 +397,6 @@ static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
if (err)
mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n",
eq->eqn);
- mlx5_irq_release(eq->irq);
mlx5_frag_buf_free(dev, &eq->frag_buf);
return err;
@@ -594,11 +589,8 @@ setup_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq_async *eq,
eq->irq_nb.notifier_call = mlx5_eq_async_int;
spin_lock_init(&eq->lock);
- if (!zalloc_cpumask_var(&param->affinity, GFP_KERNEL))
- return -ENOMEM;
err = create_async_eq(dev, &eq->core, param);
- free_cpumask_var(param->affinity);
if (err) {
mlx5_core_warn(dev, "failed to create %s EQ %d\n", name, err);
return err;
@@ -643,11 +635,18 @@ static int create_async_eqs(struct mlx5_core_dev *dev)
struct mlx5_eq_param param = {};
int err;
+ /* All the async_eqs are using single IRQ, request one IRQ and share its
+ * index among all the async_eqs of this device.
+ */
+ table->ctrl_irq = mlx5_ctrl_irq_request(dev);
+ if (IS_ERR(table->ctrl_irq))
+ return PTR_ERR(table->ctrl_irq);
+
MLX5_NB_INIT(&table->cq_err_nb, cq_err_event_notifier, CQ_ERROR);
mlx5_eq_notifier_register(dev, &table->cq_err_nb);
param = (struct mlx5_eq_param) {
- .irq_index = MLX5_IRQ_EQ_CTRL,
+ .irq = table->ctrl_irq,
.nent = MLX5_NUM_CMD_EQE,
.mask[0] = 1ull << MLX5_EVENT_TYPE_CMD,
};
@@ -660,7 +659,7 @@ static int create_async_eqs(struct mlx5_core_dev *dev)
mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL);
param = (struct mlx5_eq_param) {
- .irq_index = MLX5_IRQ_EQ_CTRL,
+ .irq = table->ctrl_irq,
.nent = async_eq_depth_devlink_param_get(dev),
};
@@ -670,7 +669,7 @@ static int create_async_eqs(struct mlx5_core_dev *dev)
goto err2;
param = (struct mlx5_eq_param) {
- .irq_index = MLX5_IRQ_EQ_CTRL,
+ .irq = table->ctrl_irq,
.nent = /* TODO: sriov max_vf + */ 1,
.mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_REQUEST,
};
@@ -689,6 +688,7 @@ err2:
err1:
mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL);
mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
+ mlx5_ctrl_irq_release(table->ctrl_irq);
return err;
}
@@ -703,6 +703,7 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev)
cleanup_async_eq(dev, &table->cmd_eq, "cmd");
mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL);
mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
+ mlx5_ctrl_irq_release(table->ctrl_irq);
}
struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev)
@@ -730,12 +731,10 @@ mlx5_eq_create_generic(struct mlx5_core_dev *dev,
struct mlx5_eq *eq = kvzalloc(sizeof(*eq), GFP_KERNEL);
int err;
- if (!cpumask_available(param->affinity))
- return ERR_PTR(-EINVAL);
-
if (!eq)
return ERR_PTR(-ENOMEM);
+ param->irq = dev->priv.eq_table->ctrl_irq;
err = create_async_eq(dev, eq, param);
if (err) {
kvfree(eq);
@@ -795,6 +794,54 @@ void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm)
}
EXPORT_SYMBOL(mlx5_eq_update_ci);
+static void comp_irqs_release(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eq_table *table = dev->priv.eq_table;
+
+ if (mlx5_core_is_sf(dev))
+ mlx5_irq_affinity_irqs_release(dev, table->comp_irqs, table->num_comp_eqs);
+ else
+ mlx5_irqs_release_vectors(table->comp_irqs, table->num_comp_eqs);
+ kfree(table->comp_irqs);
+}
+
+static int comp_irqs_request(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eq_table *table = dev->priv.eq_table;
+ int ncomp_eqs = table->num_comp_eqs;
+ u16 *cpus;
+ int ret;
+ int i;
+
+ ncomp_eqs = table->num_comp_eqs;
+ table->comp_irqs = kcalloc(ncomp_eqs, sizeof(*table->comp_irqs), GFP_KERNEL);
+ if (!table->comp_irqs)
+ return -ENOMEM;
+ if (mlx5_core_is_sf(dev)) {
+ ret = mlx5_irq_affinity_irqs_request_auto(dev, ncomp_eqs, table->comp_irqs);
+ if (ret < 0)
+ goto free_irqs;
+ return ret;
+ }
+
+ cpus = kcalloc(ncomp_eqs, sizeof(*cpus), GFP_KERNEL);
+ if (!cpus) {
+ ret = -ENOMEM;
+ goto free_irqs;
+ }
+ for (i = 0; i < ncomp_eqs; i++)
+ cpus[i] = cpumask_local_spread(i, dev->priv.numa_node);
+ ret = mlx5_irqs_request_vectors(dev, cpus, ncomp_eqs, table->comp_irqs);
+ kfree(cpus);
+ if (ret < 0)
+ goto free_irqs;
+ return ret;
+
+free_irqs:
+ kfree(table->comp_irqs);
+ return ret;
+}
+
static void destroy_comp_eqs(struct mlx5_core_dev *dev)
{
struct mlx5_eq_table *table = dev->priv.eq_table;
@@ -809,6 +856,7 @@ static void destroy_comp_eqs(struct mlx5_core_dev *dev)
tasklet_disable(&eq->tasklet_ctx.task);
kfree(eq);
}
+ comp_irqs_release(dev);
}
static u16 comp_eq_depth_devlink_param_get(struct mlx5_core_dev *dev)
@@ -835,12 +883,13 @@ static int create_comp_eqs(struct mlx5_core_dev *dev)
int err;
int i;
+ ncomp_eqs = comp_irqs_request(dev);
+ if (ncomp_eqs < 0)
+ return ncomp_eqs;
INIT_LIST_HEAD(&table->comp_eqs_list);
- ncomp_eqs = table->num_comp_eqs;
nent = comp_eq_depth_devlink_param_get(dev);
for (i = 0; i < ncomp_eqs; i++) {
struct mlx5_eq_param param = {};
- int vecidx = i;
eq = kzalloc(sizeof(*eq), GFP_KERNEL);
if (!eq) {
@@ -855,18 +904,11 @@ static int create_comp_eqs(struct mlx5_core_dev *dev)
eq->irq_nb.notifier_call = mlx5_eq_comp_int;
param = (struct mlx5_eq_param) {
- .irq_index = vecidx,
+ .irq = table->comp_irqs[i],
.nent = nent,
};
- if (!zalloc_cpumask_var(&param.affinity, GFP_KERNEL)) {
- err = -ENOMEM;
- goto clean_eq;
- }
- cpumask_set_cpu(cpumask_local_spread(i, dev->priv.numa_node),
- param.affinity);
err = create_map_eq(dev, &eq->core, &param);
- free_cpumask_var(param.affinity);
if (err)
goto clean_eq;
err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb);
@@ -880,7 +922,9 @@ static int create_comp_eqs(struct mlx5_core_dev *dev)
list_add_tail(&eq->list, &table->comp_eqs_list);
}
+ table->num_comp_eqs = ncomp_eqs;
return 0;
+
clean_eq:
kfree(eq);
clean:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c
index 2b52f7c09152..9d17206d1625 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c
@@ -431,7 +431,7 @@ int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
int err = 0;
if (!mlx5_esw_allowed(esw))
- return -EPERM;
+ return vlan ? -EPERM : 0;
if (vlan || qos)
set_flags = SET_VLAN_STRIP | SET_VLAN_INSERT;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c
new file mode 100644
index 000000000000..380a208ab137
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c
@@ -0,0 +1,226 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include "mlx5_core.h"
+#include "mlx5_irq.h"
+#include "pci_irq.h"
+
+static void cpu_put(struct mlx5_irq_pool *pool, int cpu)
+{
+ pool->irqs_per_cpu[cpu]--;
+}
+
+static void cpu_get(struct mlx5_irq_pool *pool, int cpu)
+{
+ pool->irqs_per_cpu[cpu]++;
+}
+
+/* Gets the least loaded CPU. e.g.: the CPU with least IRQs bound to it */
+static int cpu_get_least_loaded(struct mlx5_irq_pool *pool,
+ const struct cpumask *req_mask)
+{
+ int best_cpu = -1;
+ int cpu;
+
+ for_each_cpu_and(cpu, req_mask, cpu_online_mask) {
+ /* CPU has zero IRQs on it. No need to search any more CPUs. */
+ if (!pool->irqs_per_cpu[cpu]) {
+ best_cpu = cpu;
+ break;
+ }
+ if (best_cpu < 0)
+ best_cpu = cpu;
+ if (pool->irqs_per_cpu[cpu] < pool->irqs_per_cpu[best_cpu])
+ best_cpu = cpu;
+ }
+ if (best_cpu == -1) {
+ /* There isn't online CPUs in req_mask */
+ mlx5_core_err(pool->dev, "NO online CPUs in req_mask (%*pbl)\n",
+ cpumask_pr_args(req_mask));
+ best_cpu = cpumask_first(cpu_online_mask);
+ }
+ pool->irqs_per_cpu[best_cpu]++;
+ return best_cpu;
+}
+
+/* Creating an IRQ from irq_pool */
+static struct mlx5_irq *
+irq_pool_request_irq(struct mlx5_irq_pool *pool, const struct cpumask *req_mask)
+{
+ cpumask_var_t auto_mask;
+ struct mlx5_irq *irq;
+ u32 irq_index;
+ int err;
+
+ if (!zalloc_cpumask_var(&auto_mask, GFP_KERNEL))
+ return ERR_PTR(-ENOMEM);
+ err = xa_alloc(&pool->irqs, &irq_index, NULL, pool->xa_num_irqs, GFP_KERNEL);
+ if (err)
+ return ERR_PTR(err);
+ if (pool->irqs_per_cpu) {
+ if (cpumask_weight(req_mask) > 1)
+ /* if req_mask contain more then one CPU, set the least loadad CPU
+ * of req_mask
+ */
+ cpumask_set_cpu(cpu_get_least_loaded(pool, req_mask), auto_mask);
+ else
+ cpu_get(pool, cpumask_first(req_mask));
+ }
+ irq = mlx5_irq_alloc(pool, irq_index, cpumask_empty(auto_mask) ? req_mask : auto_mask);
+ free_cpumask_var(auto_mask);
+ return irq;
+}
+
+/* Looking for the IRQ with the smallest refcount that fits req_mask.
+ * If pool is sf_comp_pool, then we are looking for an IRQ with any of the
+ * requested CPUs in req_mask.
+ * for example: req_mask = 0xf, irq0_mask = 0x10, irq1_mask = 0x1. irq0_mask
+ * isn't subset of req_mask, so we will skip it. irq1_mask is subset of req_mask,
+ * we don't skip it.
+ * If pool is sf_ctrl_pool, then all IRQs have the same mask, so any IRQ will
+ * fit. And since mask is subset of itself, we will pass the first if bellow.
+ */
+static struct mlx5_irq *
+irq_pool_find_least_loaded(struct mlx5_irq_pool *pool, const struct cpumask *req_mask)
+{
+ int start = pool->xa_num_irqs.min;
+ int end = pool->xa_num_irqs.max;
+ struct mlx5_irq *irq = NULL;
+ struct mlx5_irq *iter;
+ int irq_refcount = 0;
+ unsigned long index;
+
+ lockdep_assert_held(&pool->lock);
+ xa_for_each_range(&pool->irqs, index, iter, start, end) {
+ struct cpumask *iter_mask = mlx5_irq_get_affinity_mask(iter);
+ int iter_refcount = mlx5_irq_read_locked(iter);
+
+ if (!cpumask_subset(iter_mask, req_mask))
+ /* skip IRQs with a mask which is not subset of req_mask */
+ continue;
+ if (iter_refcount < pool->min_threshold)
+ /* If we found an IRQ with less than min_thres, return it */
+ return iter;
+ if (!irq || iter_refcount < irq_refcount) {
+ /* In case we won't find an IRQ with less than min_thres,
+ * keep a pointer to the least used IRQ
+ */
+ irq_refcount = iter_refcount;
+ irq = iter;
+ }
+ }
+ return irq;
+}
+
+/**
+ * mlx5_irq_affinity_request - request an IRQ according to the given mask.
+ * @pool: IRQ pool to request from.
+ * @req_mask: cpumask requested for this IRQ.
+ *
+ * This function returns a pointer to IRQ, or ERR_PTR in case of error.
+ */
+struct mlx5_irq *
+mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, const struct cpumask *req_mask)
+{
+ struct mlx5_irq *least_loaded_irq, *new_irq;
+
+ mutex_lock(&pool->lock);
+ least_loaded_irq = irq_pool_find_least_loaded(pool, req_mask);
+ if (least_loaded_irq &&
+ mlx5_irq_read_locked(least_loaded_irq) < pool->min_threshold)
+ goto out;
+ /* We didn't find an IRQ with less than min_thres, try to allocate a new IRQ */
+ new_irq = irq_pool_request_irq(pool, req_mask);
+ if (IS_ERR(new_irq)) {
+ if (!least_loaded_irq) {
+ /* We failed to create an IRQ and we didn't find an IRQ */
+ mlx5_core_err(pool->dev, "Didn't find a matching IRQ. err = %ld\n",
+ PTR_ERR(new_irq));
+ mutex_unlock(&pool->lock);
+ return new_irq;
+ }
+ /* We failed to create a new IRQ for the requested affinity,
+ * sharing existing IRQ.
+ */
+ goto out;
+ }
+ least_loaded_irq = new_irq;
+ goto unlock;
+out:
+ mlx5_irq_get_locked(least_loaded_irq);
+ if (mlx5_irq_read_locked(least_loaded_irq) > pool->max_threshold)
+ mlx5_core_dbg(pool->dev, "IRQ %u overloaded, pool_name: %s, %u EQs on this irq\n",
+ pci_irq_vector(pool->dev->pdev,
+ mlx5_irq_get_index(least_loaded_irq)), pool->name,
+ mlx5_irq_read_locked(least_loaded_irq) / MLX5_EQ_REFS_PER_IRQ);
+unlock:
+ mutex_unlock(&pool->lock);
+ return least_loaded_irq;
+}
+
+void mlx5_irq_affinity_irqs_release(struct mlx5_core_dev *dev, struct mlx5_irq **irqs,
+ int num_irqs)
+{
+ struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev);
+ int i;
+
+ for (i = 0; i < num_irqs; i++) {
+ int cpu = cpumask_first(mlx5_irq_get_affinity_mask(irqs[i]));
+
+ synchronize_irq(pci_irq_vector(pool->dev->pdev,
+ mlx5_irq_get_index(irqs[i])));
+ if (mlx5_irq_put(irqs[i]))
+ if (pool->irqs_per_cpu)
+ cpu_put(pool, cpu);
+ }
+}
+
+/**
+ * mlx5_irq_affinity_irqs_request_auto - request one or more IRQs for mlx5 device.
+ * @dev: mlx5 device that is requesting the IRQs.
+ * @nirqs: number of IRQs to request.
+ * @irqs: an output array of IRQs pointers.
+ *
+ * Each IRQ is bounded to at most 1 CPU.
+ * This function is requesting IRQs according to the default assignment.
+ * The default assignment policy is:
+ * - in each iteration, request the least loaded IRQ which is not bound to any
+ * CPU of the previous IRQs requested.
+ *
+ * This function returns the number of IRQs requested, (which might be smaller than
+ * @nirqs), if successful, or a negative error code in case of an error.
+ */
+int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev, int nirqs,
+ struct mlx5_irq **irqs)
+{
+ struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev);
+ cpumask_var_t req_mask;
+ struct mlx5_irq *irq;
+ int i = 0;
+
+ if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL))
+ return -ENOMEM;
+ cpumask_copy(req_mask, cpu_online_mask);
+ for (i = 0; i < nirqs; i++) {
+ if (mlx5_irq_pool_is_sf_pool(pool))
+ irq = mlx5_irq_affinity_request(pool, req_mask);
+ else
+ /* In case SF pool doesn't exists, fallback to the PF IRQs.
+ * The PF IRQs are already allocated and binded to CPU
+ * at this point. Hence, only an index is needed.
+ */
+ irq = mlx5_irq_request(dev, i, NULL);
+ if (IS_ERR(irq))
+ break;
+ irqs[i] = irq;
+ cpumask_clear_cpu(cpumask_first(mlx5_irq_get_affinity_mask(irq)), req_mask);
+ mlx5_core_dbg(pool->dev, "IRQ %u mapped to cpu %*pbl, %u EQs on this irq\n",
+ pci_irq_vector(dev->pdev, mlx5_irq_get_index(irq)),
+ cpumask_pr_args(mlx5_irq_get_affinity_mask(irq)),
+ mlx5_irq_read_locked(irq) / MLX5_EQ_REFS_PER_IRQ);
+ }
+ free_cpumask_var(req_mask);
+ if (!i)
+ return PTR_ERR(irq);
+ return i;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 6b225bb5751a..2c774f367199 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -98,6 +98,8 @@ enum {
MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS = 0x1,
};
+#define LOG_MAX_SUPPORTED_QPS 0xff
+
static struct mlx5_profile profile[] = {
[0] = {
.mask = 0,
@@ -109,7 +111,7 @@ static struct mlx5_profile profile[] = {
[2] = {
.mask = MLX5_PROF_MASK_QP_SIZE |
MLX5_PROF_MASK_MR_CACHE,
- .log_max_qp = 18,
+ .log_max_qp = LOG_MAX_SUPPORTED_QPS,
.mr_cache[0] = {
.size = 500,
.limit = 250
@@ -523,7 +525,9 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
to_fw_pkey_sz(dev, 128));
/* Check log_max_qp from HCA caps to set in current profile */
- if (MLX5_CAP_GEN_MAX(dev, log_max_qp) < prof->log_max_qp) {
+ if (prof->log_max_qp == LOG_MAX_SUPPORTED_QPS) {
+ prof->log_max_qp = MLX5_CAP_GEN_MAX(dev, log_max_qp);
+ } else if (MLX5_CAP_GEN_MAX(dev, log_max_qp) < prof->log_max_qp) {
mlx5_core_warn(dev, "log_max_qp value in current profile is %d, changing it to HCA capability limit (%d)\n",
prof->log_max_qp,
MLX5_CAP_GEN_MAX(dev, log_max_qp));
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
index 8116815663a7..23cb63fa4588 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
@@ -22,12 +22,40 @@ int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int devfn,
int msix_vec_count);
int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs);
+struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev);
+void mlx5_ctrl_irq_release(struct mlx5_irq *ctrl_irq);
struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx,
struct cpumask *affinity);
-void mlx5_irq_release(struct mlx5_irq *irq);
+int mlx5_irqs_request_vectors(struct mlx5_core_dev *dev, u16 *cpus, int nirqs,
+ struct mlx5_irq **irqs);
+void mlx5_irqs_release_vectors(struct mlx5_irq **irqs, int nirqs);
int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb);
int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb);
struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq);
int mlx5_irq_get_index(struct mlx5_irq *irq);
+struct mlx5_irq_pool;
+#ifdef CONFIG_MLX5_SF
+int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev, int nirqs,
+ struct mlx5_irq **irqs);
+struct mlx5_irq *mlx5_irq_affinity_request(struct mlx5_irq_pool *pool,
+ const struct cpumask *req_mask);
+void mlx5_irq_affinity_irqs_release(struct mlx5_core_dev *dev, struct mlx5_irq **irqs,
+ int num_irqs);
+#else
+static inline int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev, int nirqs,
+ struct mlx5_irq **irqs)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline struct mlx5_irq *
+mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, const struct cpumask *req_mask)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline void mlx5_irq_affinity_irqs_release(struct mlx5_core_dev *dev,
+ struct mlx5_irq **irqs, int num_irqs) {}
+#endif
#endif /* __MLX5_IRQ_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
index 163e01fe500e..90fec0649ef5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
@@ -7,15 +7,12 @@
#include <linux/mlx5/driver.h>
#include "mlx5_core.h"
#include "mlx5_irq.h"
+#include "pci_irq.h"
#include "lib/sf.h"
#ifdef CONFIG_RFS_ACCEL
#include <linux/cpu_rmap.h>
#endif
-#define MLX5_MAX_IRQ_NAME (32)
-/* max irq_index is 2047, so four chars */
-#define MLX5_MAX_IRQ_IDX_CHARS (4)
-
#define MLX5_SFS_PER_CTRL_IRQ 64
#define MLX5_IRQ_CTRL_SF_MAX 8
/* min num of vectors for SFs to be enabled */
@@ -25,7 +22,6 @@
#define MLX5_EQ_SHARE_IRQ_MAX_CTRL (UINT_MAX)
#define MLX5_EQ_SHARE_IRQ_MIN_COMP (1)
#define MLX5_EQ_SHARE_IRQ_MIN_CTRL (4)
-#define MLX5_EQ_REFS_PER_IRQ (2)
struct mlx5_irq {
struct atomic_notifier_head nh;
@@ -37,16 +33,6 @@ struct mlx5_irq {
int irqn;
};
-struct mlx5_irq_pool {
- char name[MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS];
- struct xa_limit xa_num_irqs;
- struct mutex lock; /* sync IRQs creations */
- struct xarray irqs;
- u32 max_threshold;
- u32 min_threshold;
- struct mlx5_core_dev *dev;
-};
-
struct mlx5_irq_table {
struct mlx5_irq_pool *pf_pool;
struct mlx5_irq_pool *sf_ctrl_pool;
@@ -153,18 +139,28 @@ static void irq_release(struct mlx5_irq *irq)
kfree(irq);
}
-static void irq_put(struct mlx5_irq *irq)
+int mlx5_irq_put(struct mlx5_irq *irq)
{
struct mlx5_irq_pool *pool = irq->pool;
+ int ret = 0;
mutex_lock(&pool->lock);
irq->refcount--;
- if (!irq->refcount)
+ if (!irq->refcount) {
irq_release(irq);
+ ret = 1;
+ }
mutex_unlock(&pool->lock);
+ return ret;
+}
+
+int mlx5_irq_read_locked(struct mlx5_irq *irq)
+{
+ lockdep_assert_held(&irq->pool->lock);
+ return irq->refcount;
}
-static int irq_get_locked(struct mlx5_irq *irq)
+int mlx5_irq_get_locked(struct mlx5_irq *irq)
{
lockdep_assert_held(&irq->pool->lock);
if (WARN_ON_ONCE(!irq->refcount))
@@ -178,7 +174,7 @@ static int irq_get(struct mlx5_irq *irq)
int err;
mutex_lock(&irq->pool->lock);
- err = irq_get_locked(irq);
+ err = mlx5_irq_get_locked(irq);
mutex_unlock(&irq->pool->lock);
return err;
}
@@ -210,12 +206,8 @@ static void irq_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", vecidx);
}
-static bool irq_pool_is_sf_pool(struct mlx5_irq_pool *pool)
-{
- return !strncmp("mlx5_sf", pool->name, strlen("mlx5_sf"));
-}
-
-static struct mlx5_irq *irq_request(struct mlx5_irq_pool *pool, int i)
+struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i,
+ const struct cpumask *affinity)
{
struct mlx5_core_dev *dev = pool->dev;
char name[MLX5_MAX_IRQ_NAME];
@@ -226,7 +218,7 @@ static struct mlx5_irq *irq_request(struct mlx5_irq_pool *pool, int i)
if (!irq)
return ERR_PTR(-ENOMEM);
irq->irqn = pci_irq_vector(dev->pdev, i);
- if (!irq_pool_is_sf_pool(pool))
+ if (!mlx5_irq_pool_is_sf_pool(pool))
irq_set_name(pool, name, i);
else
irq_sf_set_name(pool, name, i);
@@ -244,6 +236,10 @@ static struct mlx5_irq *irq_request(struct mlx5_irq_pool *pool, int i)
err = -ENOMEM;
goto err_cpumask;
}
+ if (affinity) {
+ cpumask_copy(irq->mask, affinity);
+ irq_set_affinity_hint(irq->irqn, irq->mask);
+ }
irq->pool = pool;
irq->refcount = 1;
irq->index = i;
@@ -255,6 +251,7 @@ static struct mlx5_irq *irq_request(struct mlx5_irq_pool *pool, int i)
}
return irq;
err_xa:
+ irq_set_affinity_hint(irq->irqn, NULL);
free_cpumask_var(irq->mask);
err_cpumask:
free_irq(irq->irqn, &irq->nh);
@@ -275,7 +272,7 @@ int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb)
return -ENOENT;
ret = atomic_notifier_chain_register(&irq->nh, nb);
if (ret)
- irq_put(irq);
+ mlx5_irq_put(irq);
return ret;
}
@@ -284,7 +281,7 @@ int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb)
int err = 0;
err = atomic_notifier_chain_unregister(&irq->nh, nb);
- irq_put(irq);
+ mlx5_irq_put(irq);
return err;
}
@@ -300,131 +297,121 @@ int mlx5_irq_get_index(struct mlx5_irq *irq)
/* irq_pool API */
-/* creating an irq from irq_pool */
-static struct mlx5_irq *irq_pool_create_irq(struct mlx5_irq_pool *pool,
- struct cpumask *affinity)
+/* requesting an irq from a given pool according to given index */
+static struct mlx5_irq *
+irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx,
+ struct cpumask *affinity)
{
struct mlx5_irq *irq;
- u32 irq_index;
- int err;
- err = xa_alloc(&pool->irqs, &irq_index, NULL, pool->xa_num_irqs,
- GFP_KERNEL);
- if (err)
- return ERR_PTR(err);
- irq = irq_request(pool, irq_index);
- if (IS_ERR(irq))
- return irq;
- cpumask_copy(irq->mask, affinity);
- irq_set_affinity_hint(irq->irqn, irq->mask);
+ mutex_lock(&pool->lock);
+ irq = xa_load(&pool->irqs, vecidx);
+ if (irq) {
+ mlx5_irq_get_locked(irq);
+ goto unlock;
+ }
+ irq = mlx5_irq_alloc(pool, vecidx, affinity);
+unlock:
+ mutex_unlock(&pool->lock);
return irq;
}
-/* looking for the irq with the smallest refcount and the same affinity */
-static struct mlx5_irq *irq_pool_find_least_loaded(struct mlx5_irq_pool *pool,
- struct cpumask *affinity)
+static struct mlx5_irq_pool *sf_ctrl_irq_pool_get(struct mlx5_irq_table *irq_table)
{
- int start = pool->xa_num_irqs.min;
- int end = pool->xa_num_irqs.max;
- struct mlx5_irq *irq = NULL;
- struct mlx5_irq *iter;
- unsigned long index;
+ return irq_table->sf_ctrl_pool;
+}
- lockdep_assert_held(&pool->lock);
- xa_for_each_range(&pool->irqs, index, iter, start, end) {
- if (!cpumask_equal(iter->mask, affinity))
- continue;
- if (iter->refcount < pool->min_threshold)
- return iter;
- if (!irq || iter->refcount < irq->refcount)
- irq = iter;
- }
- return irq;
+static struct mlx5_irq_pool *sf_irq_pool_get(struct mlx5_irq_table *irq_table)
+{
+ return irq_table->sf_comp_pool;
}
-/* requesting an irq from a given pool according to given affinity */
-static struct mlx5_irq *irq_pool_request_affinity(struct mlx5_irq_pool *pool,
- struct cpumask *affinity)
+struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev)
{
- struct mlx5_irq *least_loaded_irq, *new_irq;
+ struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
+ struct mlx5_irq_pool *pool = NULL;
- mutex_lock(&pool->lock);
- least_loaded_irq = irq_pool_find_least_loaded(pool, affinity);
- if (least_loaded_irq &&
- least_loaded_irq->refcount < pool->min_threshold)
- goto out;
- new_irq = irq_pool_create_irq(pool, affinity);
- if (IS_ERR(new_irq)) {
- if (!least_loaded_irq) {
- mlx5_core_err(pool->dev, "Didn't find a matching IRQ. err = %ld\n",
- PTR_ERR(new_irq));
- mutex_unlock(&pool->lock);
- return new_irq;
- }
- /* We failed to create a new IRQ for the requested affinity,
- * sharing existing IRQ.
- */
- goto out;
- }
- least_loaded_irq = new_irq;
- goto unlock;
-out:
- irq_get_locked(least_loaded_irq);
- if (least_loaded_irq->refcount > pool->max_threshold)
- mlx5_core_dbg(pool->dev, "IRQ %u overloaded, pool_name: %s, %u EQs on this irq\n",
- least_loaded_irq->irqn, pool->name,
- least_loaded_irq->refcount / MLX5_EQ_REFS_PER_IRQ);
-unlock:
- mutex_unlock(&pool->lock);
- return least_loaded_irq;
+ if (mlx5_core_is_sf(dev))
+ pool = sf_irq_pool_get(irq_table);
+
+ /* In some configs, there won't be a pool of SFs IRQs. Hence, returning
+ * the PF IRQs pool in case the SF pool doesn't exist.
+ */
+ return pool ? pool : irq_table->pf_pool;
}
-/* requesting an irq from a given pool according to given index */
-static struct mlx5_irq *
-irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx,
- struct cpumask *affinity)
+static struct mlx5_irq_pool *ctrl_irq_pool_get(struct mlx5_core_dev *dev)
{
- struct mlx5_irq *irq;
+ struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
+ struct mlx5_irq_pool *pool = NULL;
- mutex_lock(&pool->lock);
- irq = xa_load(&pool->irqs, vecidx);
- if (irq) {
- irq_get_locked(irq);
- goto unlock;
+ if (mlx5_core_is_sf(dev))
+ pool = sf_ctrl_irq_pool_get(irq_table);
+
+ /* In some configs, there won't be a pool of SFs IRQs. Hence, returning
+ * the PF IRQs pool in case the SF pool doesn't exist.
+ */
+ return pool ? pool : irq_table->pf_pool;
+}
+
+/**
+ * mlx5_irqs_release - release one or more IRQs back to the system.
+ * @irqs: IRQs to be released.
+ * @nirqs: number of IRQs to be released.
+ */
+static void mlx5_irqs_release(struct mlx5_irq **irqs, int nirqs)
+{
+ int i;
+
+ for (i = 0; i < nirqs; i++) {
+ synchronize_irq(irqs[i]->irqn);
+ mlx5_irq_put(irqs[i]);
}
- irq = irq_request(pool, vecidx);
- if (IS_ERR(irq) || !affinity)
- goto unlock;
- cpumask_copy(irq->mask, affinity);
- if (!irq_pool_is_sf_pool(pool) && !pool->xa_num_irqs.max &&
- cpumask_empty(irq->mask))
- cpumask_set_cpu(cpumask_first(cpu_online_mask), irq->mask);
- irq_set_affinity_hint(irq->irqn, irq->mask);
-unlock:
- mutex_unlock(&pool->lock);
- return irq;
}
-static struct mlx5_irq_pool *find_sf_irq_pool(struct mlx5_irq_table *irq_table,
- int i, struct cpumask *affinity)
+/**
+ * mlx5_ctrl_irq_release - release a ctrl IRQ back to the system.
+ * @ctrl_irq: ctrl IRQ to be released.
+ */
+void mlx5_ctrl_irq_release(struct mlx5_irq *ctrl_irq)
{
- if (cpumask_empty(affinity) && i == MLX5_IRQ_EQ_CTRL)
- return irq_table->sf_ctrl_pool;
- return irq_table->sf_comp_pool;
+ mlx5_irqs_release(&ctrl_irq, 1);
}
/**
- * mlx5_irq_release - release an IRQ back to the system.
- * @irq: irq to be released.
+ * mlx5_ctrl_irq_request - request a ctrl IRQ for mlx5 device.
+ * @dev: mlx5 device that requesting the IRQ.
+ *
+ * This function returns a pointer to IRQ, or ERR_PTR in case of error.
*/
-void mlx5_irq_release(struct mlx5_irq *irq)
+struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev)
{
- synchronize_irq(irq->irqn);
- irq_put(irq);
+ struct mlx5_irq_pool *pool = ctrl_irq_pool_get(dev);
+ cpumask_var_t req_mask;
+ struct mlx5_irq *irq;
+
+ if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL))
+ return ERR_PTR(-ENOMEM);
+ cpumask_copy(req_mask, cpu_online_mask);
+ if (!mlx5_irq_pool_is_sf_pool(pool)) {
+ /* In case we are allocating a control IRQ for PF/VF */
+ if (!pool->xa_num_irqs.max) {
+ cpumask_clear(req_mask);
+ /* In case we only have a single IRQ for PF/VF */
+ cpumask_set_cpu(cpumask_first(cpu_online_mask), req_mask);
+ }
+ /* Allocate the IRQ in the last index of the pool */
+ irq = irq_pool_request_vector(pool, pool->xa_num_irqs.max, req_mask);
+ } else {
+ irq = mlx5_irq_affinity_request(pool, req_mask);
+ }
+
+ free_cpumask_var(req_mask);
+ return irq;
}
/**
- * mlx5_irq_request - request an IRQ for mlx5 device.
+ * mlx5_irq_request - request an IRQ for mlx5 PF/VF device.
* @dev: mlx5 device that requesting the IRQ.
* @vecidx: vector index of the IRQ. This argument is ignore if affinity is
* provided.
@@ -439,23 +426,8 @@ struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx,
struct mlx5_irq_pool *pool;
struct mlx5_irq *irq;
- if (mlx5_core_is_sf(dev)) {
- pool = find_sf_irq_pool(irq_table, vecidx, affinity);
- if (!pool)
- /* we don't have IRQs for SFs, using the PF IRQs */
- goto pf_irq;
- if (cpumask_empty(affinity) && !strcmp(pool->name, "mlx5_sf_comp"))
- /* In case an SF user request IRQ with vecidx */
- irq = irq_pool_request_vector(pool, vecidx, NULL);
- else
- irq = irq_pool_request_affinity(pool, affinity);
- goto out;
- }
-pf_irq:
pool = irq_table->pf_pool;
- vecidx = (vecidx == MLX5_IRQ_EQ_CTRL) ? pool->xa_num_irqs.max : vecidx;
irq = irq_pool_request_vector(pool, vecidx, affinity);
-out:
if (IS_ERR(irq))
return irq;
mlx5_core_dbg(dev, "irq %u mapped to cpu %*pbl, %u EQs on this irq\n",
@@ -464,6 +436,51 @@ out:
return irq;
}
+/**
+ * mlx5_irqs_release_vectors - release one or more IRQs back to the system.
+ * @irqs: IRQs to be released.
+ * @nirqs: number of IRQs to be released.
+ */
+void mlx5_irqs_release_vectors(struct mlx5_irq **irqs, int nirqs)
+{
+ mlx5_irqs_release(irqs, nirqs);
+}
+
+/**
+ * mlx5_irqs_request_vectors - request one or more IRQs for mlx5 device.
+ * @dev: mlx5 device that is requesting the IRQs.
+ * @cpus: CPUs array for binding the IRQs
+ * @nirqs: number of IRQs to request.
+ * @irqs: an output array of IRQs pointers.
+ *
+ * Each IRQ is bound to at most 1 CPU.
+ * This function is requests nirqs IRQs, starting from @vecidx.
+ *
+ * This function returns the number of IRQs requested, (which might be smaller than
+ * @nirqs), if successful, or a negative error code in case of an error.
+ */
+int mlx5_irqs_request_vectors(struct mlx5_core_dev *dev, u16 *cpus, int nirqs,
+ struct mlx5_irq **irqs)
+{
+ cpumask_var_t req_mask;
+ struct mlx5_irq *irq;
+ int i;
+
+ if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL))
+ return -ENOMEM;
+ for (i = 0; i < nirqs; i++) {
+ cpumask_set_cpu(cpus[i], req_mask);
+ irq = mlx5_irq_request(dev, i, req_mask);
+ if (IS_ERR(irq))
+ break;
+ cpumask_clear(req_mask);
+ irqs[i] = irq;
+ }
+
+ free_cpumask_var(req_mask);
+ return i ? i : PTR_ERR(irq);
+}
+
static struct mlx5_irq_pool *
irq_pool_alloc(struct mlx5_core_dev *dev, int start, int size, char *name,
u32 min_threshold, u32 max_threshold)
@@ -500,6 +517,7 @@ static void irq_pool_free(struct mlx5_irq_pool *pool)
irq_release(irq);
xa_destroy(&pool->irqs);
mutex_destroy(&pool->lock);
+ kfree(pool->irqs_per_cpu);
kvfree(pool);
}
@@ -547,7 +565,17 @@ static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pf_vec)
err = PTR_ERR(table->sf_comp_pool);
goto err_sf_ctrl;
}
+
+ table->sf_comp_pool->irqs_per_cpu = kcalloc(nr_cpu_ids, sizeof(u16), GFP_KERNEL);
+ if (!table->sf_comp_pool->irqs_per_cpu) {
+ err = -ENOMEM;
+ goto err_irqs_per_cpu;
+ }
+
return 0;
+
+err_irqs_per_cpu:
+ irq_pool_free(table->sf_comp_pool);
err_sf_ctrl:
irq_pool_free(table->sf_ctrl_pool);
err_pf:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h
new file mode 100644
index 000000000000..5c7e68bee43a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __PCI_IRQ_H__
+#define __PCI_IRQ_H__
+
+#include <linux/mlx5/driver.h>
+
+#define MLX5_MAX_IRQ_NAME (32)
+/* max irq_index is 2047, so four chars */
+#define MLX5_MAX_IRQ_IDX_CHARS (4)
+#define MLX5_EQ_REFS_PER_IRQ (2)
+
+struct mlx5_irq;
+
+struct mlx5_irq_pool {
+ char name[MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS];
+ struct xa_limit xa_num_irqs;
+ struct mutex lock; /* sync IRQs creations */
+ struct xarray irqs;
+ u32 max_threshold;
+ u32 min_threshold;
+ u16 *irqs_per_cpu;
+ struct mlx5_core_dev *dev;
+};
+
+struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev);
+static inline bool mlx5_irq_pool_is_sf_pool(struct mlx5_irq_pool *pool)
+{
+ return !strncmp("mlx5_sf", pool->name, strlen("mlx5_sf"));
+}
+
+struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i,
+ const struct cpumask *affinity);
+int mlx5_irq_get_locked(struct mlx5_irq *irq);
+int mlx5_irq_read_locked(struct mlx5_irq *irq);
+int mlx5_irq_put(struct mlx5_irq *irq);
+
+#endif /* __PCI_IRQ_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
index d1ae248e125c..4683312861ac 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
@@ -66,7 +66,7 @@ config MLXSW_SPECTRUM
default m
help
This driver supports Mellanox Technologies
- Spectrum/Spectrum-2/Spectrum-3 Ethernet Switch ASICs.
+ Spectrum/Spectrum-2/Spectrum-3/Spectrum-4 Ethernet Switch ASICs.
To compile this driver as a module, choose M here: the
module will be called mlxsw_spectrum.
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c
index f1b09c2f9eda..bd1a51a0a540 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c
@@ -32,8 +32,8 @@ static const struct mlxsw_afk_element_info mlxsw_afk_element_infos[] = {
MLXSW_AFK_ELEMENT_INFO_U32(IP_TTL_, 0x18, 0, 8),
MLXSW_AFK_ELEMENT_INFO_U32(IP_ECN, 0x18, 9, 2),
MLXSW_AFK_ELEMENT_INFO_U32(IP_DSCP, 0x18, 11, 6),
- MLXSW_AFK_ELEMENT_INFO_U32(VIRT_ROUTER_8_10, 0x18, 17, 3),
- MLXSW_AFK_ELEMENT_INFO_U32(VIRT_ROUTER_0_7, 0x18, 20, 8),
+ MLXSW_AFK_ELEMENT_INFO_U32(VIRT_ROUTER_MSB, 0x18, 17, 3),
+ MLXSW_AFK_ELEMENT_INFO_U32(VIRT_ROUTER_LSB, 0x18, 20, 8),
MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_96_127, 0x20, 4),
MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_64_95, 0x24, 4),
MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_32_63, 0x28, 4),
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h
index a47a17c04c62..3a037fe47211 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h
@@ -33,8 +33,8 @@ enum mlxsw_afk_element {
MLXSW_AFK_ELEMENT_IP_TTL_,
MLXSW_AFK_ELEMENT_IP_ECN,
MLXSW_AFK_ELEMENT_IP_DSCP,
- MLXSW_AFK_ELEMENT_VIRT_ROUTER_8_10,
- MLXSW_AFK_ELEMENT_VIRT_ROUTER_0_7,
+ MLXSW_AFK_ELEMENT_VIRT_ROUTER_MSB,
+ MLXSW_AFK_ELEMENT_VIRT_ROUTER_LSB,
MLXSW_AFK_ELEMENT_MAX,
};
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.h b/drivers/net/ethernet/mellanox/mlxsw/pci.h
index 9899c1a2ea8f..cacc2f9fa1d4 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.h
@@ -9,6 +9,7 @@
#define PCI_DEVICE_ID_MELLANOX_SPECTRUM 0xcb84
#define PCI_DEVICE_ID_MELLANOX_SPECTRUM2 0xcf6c
#define PCI_DEVICE_ID_MELLANOX_SPECTRUM3 0xcf70
+#define PCI_DEVICE_ID_MELLANOX_SPECTRUM4 0xcf80
#if IS_ENABLED(CONFIG_MLXSW_PCI)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index c97d2c744725..24cc65018b41 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -3914,6 +3914,7 @@ MLXSW_ITEM32(reg, qeec, max_shaper_bs, 0x1C, 0, 6);
#define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP1 5
#define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP2 11
#define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP3 11
+#define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP4 11
static inline void mlxsw_reg_qeec_pack(char *payload, u16 local_port,
enum mlxsw_reg_qeec_hr hr, u8 index,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 5251f33af0fb..aa411dec62f0 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -95,6 +95,7 @@ static const struct mlxsw_fw_rev mlxsw_sp3_fw_rev = {
static const char mlxsw_sp1_driver_name[] = "mlxsw_spectrum";
static const char mlxsw_sp2_driver_name[] = "mlxsw_spectrum2";
static const char mlxsw_sp3_driver_name[] = "mlxsw_spectrum3";
+static const char mlxsw_sp4_driver_name[] = "mlxsw_spectrum4";
static const unsigned char mlxsw_sp1_mac_mask[ETH_ALEN] = {
0xff, 0xff, 0xff, 0xff, 0xfc, 0x00
@@ -3155,6 +3156,7 @@ static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core,
mlxsw_sp->mr_tcam_ops = &mlxsw_sp2_mr_tcam_ops;
mlxsw_sp->acl_rulei_ops = &mlxsw_sp2_acl_rulei_ops;
mlxsw_sp->acl_tcam_ops = &mlxsw_sp2_acl_tcam_ops;
+ mlxsw_sp->acl_bf_ops = &mlxsw_sp2_acl_bf_ops;
mlxsw_sp->nve_ops_arr = mlxsw_sp2_nve_ops_arr;
mlxsw_sp->mac_mask = mlxsw_sp2_mac_mask;
mlxsw_sp->sb_vals = &mlxsw_sp2_sb_vals;
@@ -3184,6 +3186,7 @@ static int mlxsw_sp3_init(struct mlxsw_core *mlxsw_core,
mlxsw_sp->mr_tcam_ops = &mlxsw_sp2_mr_tcam_ops;
mlxsw_sp->acl_rulei_ops = &mlxsw_sp2_acl_rulei_ops;
mlxsw_sp->acl_tcam_ops = &mlxsw_sp2_acl_tcam_ops;
+ mlxsw_sp->acl_bf_ops = &mlxsw_sp2_acl_bf_ops;
mlxsw_sp->nve_ops_arr = mlxsw_sp2_nve_ops_arr;
mlxsw_sp->mac_mask = mlxsw_sp2_mac_mask;
mlxsw_sp->sb_vals = &mlxsw_sp2_sb_vals;
@@ -3200,6 +3203,36 @@ static int mlxsw_sp3_init(struct mlxsw_core *mlxsw_core,
return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info, extack);
}
+static int mlxsw_sp4_init(struct mlxsw_core *mlxsw_core,
+ const struct mlxsw_bus_info *mlxsw_bus_info,
+ struct netlink_ext_ack *extack)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+
+ mlxsw_sp->switchdev_ops = &mlxsw_sp2_switchdev_ops;
+ mlxsw_sp->kvdl_ops = &mlxsw_sp2_kvdl_ops;
+ mlxsw_sp->afa_ops = &mlxsw_sp2_act_afa_ops;
+ mlxsw_sp->afk_ops = &mlxsw_sp4_afk_ops;
+ mlxsw_sp->mr_tcam_ops = &mlxsw_sp2_mr_tcam_ops;
+ mlxsw_sp->acl_rulei_ops = &mlxsw_sp2_acl_rulei_ops;
+ mlxsw_sp->acl_tcam_ops = &mlxsw_sp2_acl_tcam_ops;
+ mlxsw_sp->acl_bf_ops = &mlxsw_sp4_acl_bf_ops;
+ mlxsw_sp->nve_ops_arr = mlxsw_sp2_nve_ops_arr;
+ mlxsw_sp->mac_mask = mlxsw_sp2_mac_mask;
+ mlxsw_sp->sb_vals = &mlxsw_sp2_sb_vals;
+ mlxsw_sp->sb_ops = &mlxsw_sp3_sb_ops;
+ mlxsw_sp->port_type_speed_ops = &mlxsw_sp2_port_type_speed_ops;
+ mlxsw_sp->ptp_ops = &mlxsw_sp2_ptp_ops;
+ mlxsw_sp->span_ops = &mlxsw_sp3_span_ops;
+ mlxsw_sp->policer_core_ops = &mlxsw_sp2_policer_core_ops;
+ mlxsw_sp->trap_ops = &mlxsw_sp2_trap_ops;
+ mlxsw_sp->mall_ops = &mlxsw_sp2_mall_ops;
+ mlxsw_sp->router_ops = &mlxsw_sp2_router_ops;
+ mlxsw_sp->lowest_shaper_bs = MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP4;
+
+ return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info, extack);
+}
+
static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
{
struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
@@ -3759,6 +3792,45 @@ static struct mlxsw_driver mlxsw_sp3_driver = {
.temp_warn_enabled = true,
};
+static struct mlxsw_driver mlxsw_sp4_driver = {
+ .kind = mlxsw_sp4_driver_name,
+ .priv_size = sizeof(struct mlxsw_sp),
+ .init = mlxsw_sp4_init,
+ .fini = mlxsw_sp_fini,
+ .basic_trap_groups_set = mlxsw_sp_basic_trap_groups_set,
+ .port_split = mlxsw_sp_port_split,
+ .port_unsplit = mlxsw_sp_port_unsplit,
+ .sb_pool_get = mlxsw_sp_sb_pool_get,
+ .sb_pool_set = mlxsw_sp_sb_pool_set,
+ .sb_port_pool_get = mlxsw_sp_sb_port_pool_get,
+ .sb_port_pool_set = mlxsw_sp_sb_port_pool_set,
+ .sb_tc_pool_bind_get = mlxsw_sp_sb_tc_pool_bind_get,
+ .sb_tc_pool_bind_set = mlxsw_sp_sb_tc_pool_bind_set,
+ .sb_occ_snapshot = mlxsw_sp_sb_occ_snapshot,
+ .sb_occ_max_clear = mlxsw_sp_sb_occ_max_clear,
+ .sb_occ_port_pool_get = mlxsw_sp_sb_occ_port_pool_get,
+ .sb_occ_tc_port_bind_get = mlxsw_sp_sb_occ_tc_port_bind_get,
+ .trap_init = mlxsw_sp_trap_init,
+ .trap_fini = mlxsw_sp_trap_fini,
+ .trap_action_set = mlxsw_sp_trap_action_set,
+ .trap_group_init = mlxsw_sp_trap_group_init,
+ .trap_group_set = mlxsw_sp_trap_group_set,
+ .trap_policer_init = mlxsw_sp_trap_policer_init,
+ .trap_policer_fini = mlxsw_sp_trap_policer_fini,
+ .trap_policer_set = mlxsw_sp_trap_policer_set,
+ .trap_policer_counter_get = mlxsw_sp_trap_policer_counter_get,
+ .txhdr_construct = mlxsw_sp_txhdr_construct,
+ .resources_register = mlxsw_sp2_resources_register,
+ .params_register = mlxsw_sp2_params_register,
+ .params_unregister = mlxsw_sp2_params_unregister,
+ .ptp_transmitted = mlxsw_sp_ptp_transmitted,
+ .txhdr_len = MLXSW_TXHDR_LEN,
+ .profile = &mlxsw_sp2_config_profile,
+ .res_query_enabled = true,
+ .fw_fatal_enabled = true,
+ .temp_warn_enabled = true,
+};
+
bool mlxsw_sp_port_dev_check(const struct net_device *dev)
{
return dev->netdev_ops == &mlxsw_sp_port_netdev_ops;
@@ -4926,6 +4998,16 @@ static struct pci_driver mlxsw_sp3_pci_driver = {
.id_table = mlxsw_sp3_pci_id_table,
};
+static const struct pci_device_id mlxsw_sp4_pci_id_table[] = {
+ {PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_SPECTRUM4), 0},
+ {0, },
+};
+
+static struct pci_driver mlxsw_sp4_pci_driver = {
+ .name = mlxsw_sp4_driver_name,
+ .id_table = mlxsw_sp4_pci_id_table,
+};
+
static int __init mlxsw_sp_module_init(void)
{
int err;
@@ -4945,6 +5027,10 @@ static int __init mlxsw_sp_module_init(void)
if (err)
goto err_sp3_core_driver_register;
+ err = mlxsw_core_driver_register(&mlxsw_sp4_driver);
+ if (err)
+ goto err_sp4_core_driver_register;
+
err = mlxsw_pci_driver_register(&mlxsw_sp1_pci_driver);
if (err)
goto err_sp1_pci_driver_register;
@@ -4957,13 +5043,21 @@ static int __init mlxsw_sp_module_init(void)
if (err)
goto err_sp3_pci_driver_register;
+ err = mlxsw_pci_driver_register(&mlxsw_sp4_pci_driver);
+ if (err)
+ goto err_sp4_pci_driver_register;
+
return 0;
+err_sp4_pci_driver_register:
+ mlxsw_pci_driver_unregister(&mlxsw_sp3_pci_driver);
err_sp3_pci_driver_register:
mlxsw_pci_driver_unregister(&mlxsw_sp2_pci_driver);
err_sp2_pci_driver_register:
mlxsw_pci_driver_unregister(&mlxsw_sp1_pci_driver);
err_sp1_pci_driver_register:
+ mlxsw_core_driver_unregister(&mlxsw_sp4_driver);
+err_sp4_core_driver_register:
mlxsw_core_driver_unregister(&mlxsw_sp3_driver);
err_sp3_core_driver_register:
mlxsw_core_driver_unregister(&mlxsw_sp2_driver);
@@ -4977,9 +5071,11 @@ err_sp1_core_driver_register:
static void __exit mlxsw_sp_module_exit(void)
{
+ mlxsw_pci_driver_unregister(&mlxsw_sp4_pci_driver);
mlxsw_pci_driver_unregister(&mlxsw_sp3_pci_driver);
mlxsw_pci_driver_unregister(&mlxsw_sp2_pci_driver);
mlxsw_pci_driver_unregister(&mlxsw_sp1_pci_driver);
+ mlxsw_core_driver_unregister(&mlxsw_sp4_driver);
mlxsw_core_driver_unregister(&mlxsw_sp3_driver);
mlxsw_core_driver_unregister(&mlxsw_sp2_driver);
mlxsw_core_driver_unregister(&mlxsw_sp1_driver);
@@ -4996,6 +5092,7 @@ MODULE_DESCRIPTION("Mellanox Spectrum driver");
MODULE_DEVICE_TABLE(pci, mlxsw_sp1_pci_id_table);
MODULE_DEVICE_TABLE(pci, mlxsw_sp2_pci_id_table);
MODULE_DEVICE_TABLE(pci, mlxsw_sp3_pci_id_table);
+MODULE_DEVICE_TABLE(pci, mlxsw_sp4_pci_id_table);
MODULE_FIRMWARE(MLXSW_SP1_FW_FILENAME);
MODULE_FIRMWARE(MLXSW_SP2_FW_FILENAME);
MODULE_FIRMWARE(MLXSW_SP3_FW_FILENAME);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 8445fc5c9ea3..bb2442e1f705 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -190,6 +190,7 @@ struct mlxsw_sp {
const struct mlxsw_sp_mr_tcam_ops *mr_tcam_ops;
const struct mlxsw_sp_acl_rulei_ops *acl_rulei_ops;
const struct mlxsw_sp_acl_tcam_ops *acl_tcam_ops;
+ const struct mlxsw_sp_acl_bf_ops *acl_bf_ops;
const struct mlxsw_sp_nve_ops **nve_ops_arr;
const struct mlxsw_sp_sb_vals *sb_vals;
const struct mlxsw_sp_sb_ops *sb_ops;
@@ -1106,6 +1107,11 @@ extern const struct mlxsw_afa_ops mlxsw_sp2_act_afa_ops;
/* spectrum_acl_flex_keys.c */
extern const struct mlxsw_afk_ops mlxsw_sp1_afk_ops;
extern const struct mlxsw_afk_ops mlxsw_sp2_afk_ops;
+extern const struct mlxsw_afk_ops mlxsw_sp4_afk_ops;
+
+/* spectrum_acl_bloom_filter.c */
+extern const struct mlxsw_sp_acl_bf_ops mlxsw_sp2_acl_bf_ops;
+extern const struct mlxsw_sp_acl_bf_ops mlxsw_sp4_acl_bf_ops;
/* spectrum_matchall.c */
struct mlxsw_sp_mall_ops {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_mr_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_mr_tcam.c
index a11d911302f1..e4f4cded2b6f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_mr_tcam.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_mr_tcam.c
@@ -45,8 +45,8 @@ static int mlxsw_sp2_mr_tcam_bind_group(struct mlxsw_sp *mlxsw_sp,
}
static const enum mlxsw_afk_element mlxsw_sp2_mr_tcam_usage_ipv4[] = {
- MLXSW_AFK_ELEMENT_VIRT_ROUTER_8_10,
- MLXSW_AFK_ELEMENT_VIRT_ROUTER_0_7,
+ MLXSW_AFK_ELEMENT_VIRT_ROUTER_MSB,
+ MLXSW_AFK_ELEMENT_VIRT_ROUTER_LSB,
MLXSW_AFK_ELEMENT_SRC_IP_0_31,
MLXSW_AFK_ELEMENT_DST_IP_0_31,
};
@@ -89,8 +89,8 @@ static void mlxsw_sp2_mr_tcam_ipv4_fini(struct mlxsw_sp2_mr_tcam *mr_tcam)
}
static const enum mlxsw_afk_element mlxsw_sp2_mr_tcam_usage_ipv6[] = {
- MLXSW_AFK_ELEMENT_VIRT_ROUTER_8_10,
- MLXSW_AFK_ELEMENT_VIRT_ROUTER_0_7,
+ MLXSW_AFK_ELEMENT_VIRT_ROUTER_MSB,
+ MLXSW_AFK_ELEMENT_VIRT_ROUTER_LSB,
MLXSW_AFK_ELEMENT_SRC_IP_96_127,
MLXSW_AFK_ELEMENT_SRC_IP_64_95,
MLXSW_AFK_ELEMENT_SRC_IP_32_63,
@@ -189,10 +189,10 @@ mlxsw_sp2_mr_tcam_rule_parse(struct mlxsw_sp_acl_rule *rule,
rulei = mlxsw_sp_acl_rule_rulei(rule);
rulei->priority = priority;
- mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_VIRT_ROUTER_0_7,
+ mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_VIRT_ROUTER_LSB,
key->vrid, GENMASK(7, 0));
mlxsw_sp_acl_rulei_keymask_u32(rulei,
- MLXSW_AFK_ELEMENT_VIRT_ROUTER_8_10,
+ MLXSW_AFK_ELEMENT_VIRT_ROUTER_MSB,
key->vrid >> 8, GENMASK(2, 0));
switch (key->proto) {
case MLXSW_SP_L3_PROTO_IPV4:
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c
index dbd3bebf11ec..e2aced7ab454 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c
@@ -17,9 +17,9 @@ struct mlxsw_sp_acl_bf {
};
/* Bloom filter uses a crc-16 hash over chunks of data which contain 4 key
- * blocks, eRP ID and region ID. In Spectrum-2, region key is combined of up to
- * 12 key blocks, so there can be up to 3 chunks in the Bloom filter key,
- * depending on the actual number of key blocks used in the region.
+ * blocks, eRP ID and region ID. In Spectrum-2 and above, region key is combined
+ * of up to 12 key blocks, so there can be up to 3 chunks in the Bloom filter
+ * key, depending on the actual number of key blocks used in the region.
* The layout of the Bloom filter key is as follows:
*
* +-------------------------+------------------------+------------------------+
@@ -27,7 +27,9 @@ struct mlxsw_sp_acl_bf {
* +-------------------------+------------------------+------------------------+
*/
#define MLXSW_BLOOM_KEY_CHUNKS 3
-#define MLXSW_BLOOM_KEY_LEN 69
+
+/* Spectrum-2 and Spectrum-3 chunks */
+#define MLXSW_SP2_BLOOM_KEY_LEN 69
/* Each chunk size is 23 bytes. 18 bytes of it contain 4 key blocks, each is
* 36 bits, 2 bytes which hold eRP ID and region ID, and 3 bytes of zero
@@ -42,31 +44,21 @@ struct mlxsw_sp_acl_bf {
* | 0 | region ID | eRP ID | 4 Key blocks (18 Bytes) |
* +---------+-----------+----------+-----------------------------------+
*/
-#define MLXSW_BLOOM_CHUNK_PAD_BYTES 3
-#define MLXSW_BLOOM_CHUNK_KEY_BYTES 18
-#define MLXSW_BLOOM_KEY_CHUNK_BYTES 23
+#define MLXSW_SP2_BLOOM_CHUNK_PAD_BYTES 3
+#define MLXSW_SP2_BLOOM_CHUNK_KEY_BYTES 18
+#define MLXSW_SP2_BLOOM_KEY_CHUNK_BYTES 23
/* The offset of the key block within a chunk is 5 bytes as it comes after
* 3 bytes of zero padding and 16 bits of region ID and eRP ID.
*/
-#define MLXSW_BLOOM_CHUNK_KEY_OFFSET 5
+#define MLXSW_SP2_BLOOM_CHUNK_KEY_OFFSET 5
-/* Each chunk contains 4 key blocks. Chunk 2 uses key blocks 11-8,
- * and we need to populate it with 4 key blocks copied from the entry encoded
- * key. Since the encoded key contains a padding, key block 11 starts at offset
- * 2. block 7 that is used in chunk 1 starts at offset 20 as 4 key blocks take
- * 18 bytes.
- * This array defines key offsets for easy access when copying key blocks from
- * entry key to Bloom filter chunk.
- */
-static const u8 chunk_key_offsets[MLXSW_BLOOM_KEY_CHUNKS] = {2, 20, 38};
-
-/* This table is just the CRC of each possible byte. It is
- * computed, Msbit first, for the Bloom filter polynomial
- * which is 0x8529 (1 + x^3 + x^5 + x^8 + x^10 + x^15 and
+/* This table is just the CRC of each possible byte which is used for
+ * Spectrum-{2-3}. It is computed, Msbit first, for the Bloom filter
+ * polynomial which is 0x8529 (1 + x^3 + x^5 + x^8 + x^10 + x^15 and
* the implicit x^16).
*/
-static const u16 mlxsw_sp_acl_bf_crc_tab[256] = {
+static const u16 mlxsw_sp2_acl_bf_crc16_tab[256] = {
0x0000, 0x8529, 0x8f7b, 0x0a52, 0x9bdf, 0x1ef6, 0x14a4, 0x918d,
0xb297, 0x37be, 0x3dec, 0xb8c5, 0x2948, 0xac61, 0xa633, 0x231a,
0xe007, 0x652e, 0x6f7c, 0xea55, 0x7bd8, 0xfef1, 0xf4a3, 0x718a,
@@ -101,24 +93,146 @@ static const u16 mlxsw_sp_acl_bf_crc_tab[256] = {
0x0c4c, 0x8965, 0x8337, 0x061e, 0x9793, 0x12ba, 0x18e8, 0x9dc1,
};
-static u16 mlxsw_sp_acl_bf_crc_byte(u16 crc, u8 c)
+/* Spectrum-4 chunks */
+#define MLXSW_SP4_BLOOM_KEY_LEN 60
+
+/* In Spectrum-4, there is no padding. Each chunk size is 20 bytes.
+ * 18 bytes of it contain 4 key blocks, each is 36 bits, and 2 bytes which hold
+ * eRP ID and region ID.
+ * The layout of each chunk is as follows:
+ *
+ * +----------------------+-----------------------------------+
+ * | 2 bytes | 18 bytes |
+ * +-----------+----------+-----------------------------------+
+ * | 157:148 | 147:144 | 143:0 |
+ * +---------+-----------+----------+-------------------------+
+ * | region ID | eRP ID | 4 Key blocks (18 Bytes) |
+ * +-----------+----------+-----------------------------------+
+ */
+
+#define MLXSW_SP4_BLOOM_CHUNK_PAD_BYTES 0
+#define MLXSW_SP4_BLOOM_CHUNK_KEY_BYTES 18
+#define MLXSW_SP4_BLOOM_KEY_CHUNK_BYTES 20
+
+/* The offset of the key block within a chunk is 2 bytes as it comes after
+ * 16 bits of region ID and eRP ID.
+ */
+#define MLXSW_SP4_BLOOM_CHUNK_KEY_OFFSET 2
+
+/* For Spectrum-4, two hash functions are used, CRC-10 and CRC-6 based.
+ * The result is combination of the two calculations -
+ * 6 bit column are MSB (result of CRC-6),
+ * 10 bit row are LSB (result of CRC-10).
+ */
+
+/* This table is just the CRC of each possible byte which is used for
+ * Spectrum-4. It is computed, Msbit first, for the Bloom filter
+ * polynomial which is 0x1b (1 + x^1 + x^3 + x^4 and the implicit x^10).
+ */
+static const u16 mlxsw_sp4_acl_bf_crc10_tab[256] = {
+0x0000, 0x001b, 0x0036, 0x002d, 0x006c, 0x0077, 0x005a, 0x0041,
+0x00d8, 0x00c3, 0x00ee, 0x00f5, 0x00b4, 0x00af, 0x0082, 0x0099,
+0x01b0, 0x01ab, 0x0186, 0x019d, 0x01dc, 0x01c7, 0x01ea, 0x01f1,
+0x0168, 0x0173, 0x015e, 0x0145, 0x0104, 0x011f, 0x0132, 0x0129,
+0x0360, 0x037b, 0x0356, 0x034d, 0x030c, 0x0317, 0x033a, 0x0321,
+0x03b8, 0x03a3, 0x038e, 0x0395, 0x03d4, 0x03cf, 0x03e2, 0x03f9,
+0x02d0, 0x02cb, 0x02e6, 0x02fd, 0x02bc, 0x02a7, 0x028a, 0x0291,
+0x0208, 0x0213, 0x023e, 0x0225, 0x0264, 0x027f, 0x0252, 0x0249,
+0x02db, 0x02c0, 0x02ed, 0x02f6, 0x02b7, 0x02ac, 0x0281, 0x029a,
+0x0203, 0x0218, 0x0235, 0x022e, 0x026f, 0x0274, 0x0259, 0x0242,
+0x036b, 0x0370, 0x035d, 0x0346, 0x0307, 0x031c, 0x0331, 0x032a,
+0x03b3, 0x03a8, 0x0385, 0x039e, 0x03df, 0x03c4, 0x03e9, 0x03f2,
+0x01bb, 0x01a0, 0x018d, 0x0196, 0x01d7, 0x01cc, 0x01e1, 0x01fa,
+0x0163, 0x0178, 0x0155, 0x014e, 0x010f, 0x0114, 0x0139, 0x0122,
+0x000b, 0x0010, 0x003d, 0x0026, 0x0067, 0x007c, 0x0051, 0x004a,
+0x00d3, 0x00c8, 0x00e5, 0x00fe, 0x00bf, 0x00a4, 0x0089, 0x0092,
+0x01ad, 0x01b6, 0x019b, 0x0180, 0x01c1, 0x01da, 0x01f7, 0x01ec,
+0x0175, 0x016e, 0x0143, 0x0158, 0x0119, 0x0102, 0x012f, 0x0134,
+0x001d, 0x0006, 0x002b, 0x0030, 0x0071, 0x006a, 0x0047, 0x005c,
+0x00c5, 0x00de, 0x00f3, 0x00e8, 0x00a9, 0x00b2, 0x009f, 0x0084,
+0x02cd, 0x02d6, 0x02fb, 0x02e0, 0x02a1, 0x02ba, 0x0297, 0x028c,
+0x0215, 0x020e, 0x0223, 0x0238, 0x0279, 0x0262, 0x024f, 0x0254,
+0x037d, 0x0366, 0x034b, 0x0350, 0x0311, 0x030a, 0x0327, 0x033c,
+0x03a5, 0x03be, 0x0393, 0x0388, 0x03c9, 0x03d2, 0x03ff, 0x03e4,
+0x0376, 0x036d, 0x0340, 0x035b, 0x031a, 0x0301, 0x032c, 0x0337,
+0x03ae, 0x03b5, 0x0398, 0x0383, 0x03c2, 0x03d9, 0x03f4, 0x03ef,
+0x02c6, 0x02dd, 0x02f0, 0x02eb, 0x02aa, 0x02b1, 0x029c, 0x0287,
+0x021e, 0x0205, 0x0228, 0x0233, 0x0272, 0x0269, 0x0244, 0x025f,
+0x0016, 0x000d, 0x0020, 0x003b, 0x007a, 0x0061, 0x004c, 0x0057,
+0x00ce, 0x00d5, 0x00f8, 0x00e3, 0x00a2, 0x00b9, 0x0094, 0x008f,
+0x01a6, 0x01bd, 0x0190, 0x018b, 0x01ca, 0x01d1, 0x01fc, 0x01e7,
+0x017e, 0x0165, 0x0148, 0x0153, 0x0112, 0x0109, 0x0124, 0x013f,
+};
+
+/* This table is just the CRC of each possible byte which is used for
+ * Spectrum-4. It is computed, Msbit first, for the Bloom filter
+ * polynomial which is 0x2d (1 + x^2+ x^3 + x^5 and the implicit x^6).
+ */
+static const u8 mlxsw_sp4_acl_bf_crc6_tab[256] = {
+0x00, 0x2d, 0x37, 0x1a, 0x03, 0x2e, 0x34, 0x19,
+0x06, 0x2b, 0x31, 0x1c, 0x05, 0x28, 0x32, 0x1f,
+0x0c, 0x21, 0x3b, 0x16, 0x0f, 0x22, 0x38, 0x15,
+0x0a, 0x27, 0x3d, 0x10, 0x09, 0x24, 0x3e, 0x13,
+0x18, 0x35, 0x2f, 0x02, 0x1b, 0x36, 0x2c, 0x01,
+0x1e, 0x33, 0x29, 0x04, 0x1d, 0x30, 0x2a, 0x07,
+0x14, 0x39, 0x23, 0x0e, 0x17, 0x3a, 0x20, 0x0d,
+0x12, 0x3f, 0x25, 0x08, 0x11, 0x3c, 0x26, 0x0b,
+0x30, 0x1d, 0x07, 0x2a, 0x33, 0x1e, 0x04, 0x29,
+0x36, 0x1b, 0x01, 0x2c, 0x35, 0x18, 0x02, 0x2f,
+0x3c, 0x11, 0x0b, 0x26, 0x3f, 0x12, 0x08, 0x25,
+0x3a, 0x17, 0x0d, 0x20, 0x39, 0x14, 0x0e, 0x23,
+0x28, 0x05, 0x1f, 0x32, 0x2b, 0x06, 0x1c, 0x31,
+0x2e, 0x03, 0x19, 0x34, 0x2d, 0x00, 0x1a, 0x37,
+0x24, 0x09, 0x13, 0x3e, 0x27, 0x0a, 0x10, 0x3d,
+0x22, 0x0f, 0x15, 0x38, 0x21, 0x0c, 0x16, 0x3b,
+0x0d, 0x20, 0x3a, 0x17, 0x0e, 0x23, 0x39, 0x14,
+0x0b, 0x26, 0x3c, 0x11, 0x08, 0x25, 0x3f, 0x12,
+0x01, 0x2c, 0x36, 0x1b, 0x02, 0x2f, 0x35, 0x18,
+0x07, 0x2a, 0x30, 0x1d, 0x04, 0x29, 0x33, 0x1e,
+0x15, 0x38, 0x22, 0x0f, 0x16, 0x3b, 0x21, 0x0c,
+0x13, 0x3e, 0x24, 0x09, 0x10, 0x3d, 0x27, 0x0a,
+0x19, 0x34, 0x2e, 0x03, 0x1a, 0x37, 0x2d, 0x00,
+0x1f, 0x32, 0x28, 0x05, 0x1c, 0x31, 0x2b, 0x06,
+0x3d, 0x10, 0x0a, 0x27, 0x3e, 0x13, 0x09, 0x24,
+0x3b, 0x16, 0x0c, 0x21, 0x38, 0x15, 0x0f, 0x22,
+0x31, 0x1c, 0x06, 0x2b, 0x32, 0x1f, 0x05, 0x28,
+0x37, 0x1a, 0x00, 0x2d, 0x34, 0x19, 0x03, 0x2e,
+0x25, 0x08, 0x12, 0x3f, 0x26, 0x0b, 0x11, 0x3c,
+0x23, 0x0e, 0x14, 0x39, 0x20, 0x0d, 0x17, 0x3a,
+0x29, 0x04, 0x1e, 0x33, 0x2a, 0x07, 0x1d, 0x30,
+0x2f, 0x02, 0x18, 0x35, 0x2c, 0x01, 0x1b, 0x36,
+};
+
+/* Each chunk contains 4 key blocks. Chunk 2 uses key blocks 11-8,
+ * and we need to populate it with 4 key blocks copied from the entry encoded
+ * key. The original keys layout is same for Spectrum-{2,3,4}.
+ * Since the encoded key contains a 2 bytes padding, key block 11 starts at
+ * offset 2. block 7 that is used in chunk 1 starts at offset 20 as 4 key blocks
+ * take 18 bytes. See 'MLXSW_SP2_AFK_BLOCK_LAYOUT' for more details.
+ * This array defines key offsets for easy access when copying key blocks from
+ * entry key to Bloom filter chunk.
+ */
+static const u8 chunk_key_offsets[MLXSW_BLOOM_KEY_CHUNKS] = {2, 20, 38};
+
+static u16 mlxsw_sp2_acl_bf_crc16_byte(u16 crc, u8 c)
{
- return (crc << 8) ^ mlxsw_sp_acl_bf_crc_tab[(crc >> 8) ^ c];
+ return (crc << 8) ^ mlxsw_sp2_acl_bf_crc16_tab[(crc >> 8) ^ c];
}
-static u16 mlxsw_sp_acl_bf_crc(const u8 *buffer, size_t len)
+static u16 mlxsw_sp2_acl_bf_crc(const u8 *buffer, size_t len)
{
u16 crc = 0;
while (len--)
- crc = mlxsw_sp_acl_bf_crc_byte(crc, *buffer++);
+ crc = mlxsw_sp2_acl_bf_crc16_byte(crc, *buffer++);
return crc;
}
static void
-mlxsw_sp_acl_bf_key_encode(struct mlxsw_sp_acl_atcam_region *aregion,
- struct mlxsw_sp_acl_atcam_entry *aentry,
- char *output, u8 *len)
+__mlxsw_sp_acl_bf_key_encode(struct mlxsw_sp_acl_atcam_region *aregion,
+ struct mlxsw_sp_acl_atcam_entry *aentry,
+ char *output, u8 *len, u8 max_chunks, u8 pad_bytes,
+ u8 key_offset, u8 chunk_key_len, u8 chunk_len)
{
struct mlxsw_afk_key_info *key_info = aregion->region->key_info;
u8 chunk_index, chunk_count, block_count;
@@ -129,37 +243,168 @@ mlxsw_sp_acl_bf_key_encode(struct mlxsw_sp_acl_atcam_region *aregion,
chunk_count = 1 + ((block_count - 1) >> 2);
erp_region_id = cpu_to_be16(aentry->ht_key.erp_id |
(aregion->region->id << 4));
- for (chunk_index = MLXSW_BLOOM_KEY_CHUNKS - chunk_count;
- chunk_index < MLXSW_BLOOM_KEY_CHUNKS; chunk_index++) {
- memset(chunk, 0, MLXSW_BLOOM_CHUNK_PAD_BYTES);
- memcpy(chunk + MLXSW_BLOOM_CHUNK_PAD_BYTES, &erp_region_id,
+ for (chunk_index = max_chunks - chunk_count; chunk_index < max_chunks;
+ chunk_index++) {
+ memset(chunk, 0, pad_bytes);
+ memcpy(chunk + pad_bytes, &erp_region_id,
sizeof(erp_region_id));
- memcpy(chunk + MLXSW_BLOOM_CHUNK_KEY_OFFSET,
+ memcpy(chunk + key_offset,
&aentry->enc_key[chunk_key_offsets[chunk_index]],
- MLXSW_BLOOM_CHUNK_KEY_BYTES);
- chunk += MLXSW_BLOOM_KEY_CHUNK_BYTES;
+ chunk_key_len);
+ chunk += chunk_len;
}
- *len = chunk_count * MLXSW_BLOOM_KEY_CHUNK_BYTES;
+ *len = chunk_count * chunk_len;
+}
+
+static void
+mlxsw_sp2_acl_bf_key_encode(struct mlxsw_sp_acl_atcam_region *aregion,
+ struct mlxsw_sp_acl_atcam_entry *aentry,
+ char *output, u8 *len)
+{
+ __mlxsw_sp_acl_bf_key_encode(aregion, aentry, output, len,
+ MLXSW_BLOOM_KEY_CHUNKS,
+ MLXSW_SP2_BLOOM_CHUNK_PAD_BYTES,
+ MLXSW_SP2_BLOOM_CHUNK_KEY_OFFSET,
+ MLXSW_SP2_BLOOM_CHUNK_KEY_BYTES,
+ MLXSW_SP2_BLOOM_KEY_CHUNK_BYTES);
}
static unsigned int
-mlxsw_sp_acl_bf_rule_count_index_get(struct mlxsw_sp_acl_bf *bf,
- unsigned int erp_bank,
- unsigned int bf_index)
+mlxsw_sp2_acl_bf_index_get(struct mlxsw_sp_acl_bf *bf,
+ struct mlxsw_sp_acl_atcam_region *aregion,
+ struct mlxsw_sp_acl_atcam_entry *aentry)
{
- return erp_bank * bf->bank_size + bf_index;
+ char bf_key[MLXSW_SP2_BLOOM_KEY_LEN];
+ u8 bf_size;
+
+ mlxsw_sp2_acl_bf_key_encode(aregion, aentry, bf_key, &bf_size);
+ return mlxsw_sp2_acl_bf_crc(bf_key, bf_size);
+}
+
+static u16 mlxsw_sp4_acl_bf_crc10_byte(u16 crc, u8 c)
+{
+ u8 index = ((crc >> 2) ^ c) & 0xff;
+
+ return ((crc << 8) ^ mlxsw_sp4_acl_bf_crc10_tab[index]) & 0x3ff;
+}
+
+static u16 mlxsw_sp4_acl_bf_crc6_byte(u16 crc, u8 c)
+{
+ u8 index = (crc ^ c) & 0xff;
+
+ return ((crc << 6) ^ (mlxsw_sp4_acl_bf_crc6_tab[index] << 2)) & 0xfc;
+}
+
+static u16 mlxsw_sp4_acl_bf_crc(const u8 *buffer, size_t len)
+{
+ u16 crc_row = 0, crc_col = 0;
+
+ while (len--) {
+ crc_row = mlxsw_sp4_acl_bf_crc10_byte(crc_row, *buffer);
+ crc_col = mlxsw_sp4_acl_bf_crc6_byte(crc_col, *buffer);
+ buffer++;
+ }
+
+ crc_col >>= 2;
+
+ /* 6 bit column are MSB, 10 bit row are LSB */
+ return (crc_col << 10) | crc_row;
+}
+
+static void right_shift_array(char *arr, u8 len, u8 shift_bits)
+{
+ u8 byte_mask = 0xff >> shift_bits;
+ int i;
+
+ if (WARN_ON(!shift_bits || shift_bits >= 8))
+ return;
+
+ for (i = len - 1; i >= 0; i--) {
+ /* The first iteration looks like out-of-bounds access,
+ * but actually references a buffer that the array is shifted
+ * into. This move is legal as we never send the last chunk to
+ * this function.
+ */
+ arr[i + 1] &= byte_mask;
+ arr[i + 1] |= arr[i] << (8 - shift_bits);
+ arr[i] = arr[i] >> shift_bits;
+ }
+}
+
+static void mlxsw_sp4_bf_key_shift_chunks(u8 chunk_count, char *output)
+{
+ /* The chunks are suppoosed to be continuous, with no padding.
+ * Since region ID and eRP ID use 14 bits, and not fully 2 bytes,
+ * and in Spectrum-4 there is no padding, it is necessary to shift some
+ * chunks 2 bits right.
+ */
+ switch (chunk_count) {
+ case 2:
+ /* The chunks are copied as follow:
+ * +-------------+-----------------+
+ * | Chunk 0 | Chunk 1 |
+ * | IDs | keys |(**) IDs | keys |
+ * +-------------+-----------------+
+ * In (**), there are two unused bits, therefore, chunk 0 needs
+ * to be shifted two bits right.
+ */
+ right_shift_array(output, MLXSW_SP4_BLOOM_KEY_CHUNK_BYTES, 2);
+ break;
+ case 3:
+ /* The chunks are copied as follow:
+ * +-------------+-----------------+-----------------+
+ * | Chunk 0 | Chunk 1 | Chunk 2 |
+ * | IDs | keys |(**) IDs | keys |(**) IDs | keys |
+ * +-------------+-----------------+-----------------+
+ * In (**), there are two unused bits, therefore, chunk 1 needs
+ * to be shifted two bits right and chunk 0 needs to be shifted
+ * four bits right.
+ */
+ right_shift_array(output + MLXSW_SP4_BLOOM_KEY_CHUNK_BYTES,
+ MLXSW_SP4_BLOOM_KEY_CHUNK_BYTES, 2);
+ right_shift_array(output, MLXSW_SP4_BLOOM_KEY_CHUNK_BYTES, 4);
+ break;
+ default:
+ WARN_ON(chunk_count > MLXSW_BLOOM_KEY_CHUNKS);
+ }
+}
+
+static void
+mlxsw_sp4_acl_bf_key_encode(struct mlxsw_sp_acl_atcam_region *aregion,
+ struct mlxsw_sp_acl_atcam_entry *aentry,
+ char *output, u8 *len)
+{
+ struct mlxsw_afk_key_info *key_info = aregion->region->key_info;
+ u8 block_count = mlxsw_afk_key_info_blocks_count_get(key_info);
+ u8 chunk_count = 1 + ((block_count - 1) >> 2);
+
+ __mlxsw_sp_acl_bf_key_encode(aregion, aentry, output, len,
+ MLXSW_BLOOM_KEY_CHUNKS,
+ MLXSW_SP4_BLOOM_CHUNK_PAD_BYTES,
+ MLXSW_SP4_BLOOM_CHUNK_KEY_OFFSET,
+ MLXSW_SP4_BLOOM_CHUNK_KEY_BYTES,
+ MLXSW_SP4_BLOOM_KEY_CHUNK_BYTES);
+ mlxsw_sp4_bf_key_shift_chunks(chunk_count, output);
}
static unsigned int
-mlxsw_sp_acl_bf_index_get(struct mlxsw_sp_acl_bf *bf,
- struct mlxsw_sp_acl_atcam_region *aregion,
- struct mlxsw_sp_acl_atcam_entry *aentry)
+mlxsw_sp4_acl_bf_index_get(struct mlxsw_sp_acl_bf *bf,
+ struct mlxsw_sp_acl_atcam_region *aregion,
+ struct mlxsw_sp_acl_atcam_entry *aentry)
{
- char bf_key[MLXSW_BLOOM_KEY_LEN];
+ char bf_key[MLXSW_SP4_BLOOM_KEY_LEN] = {};
u8 bf_size;
- mlxsw_sp_acl_bf_key_encode(aregion, aentry, bf_key, &bf_size);
- return mlxsw_sp_acl_bf_crc(bf_key, bf_size);
+ mlxsw_sp4_acl_bf_key_encode(aregion, aentry, bf_key, &bf_size);
+ return mlxsw_sp4_acl_bf_crc(bf_key, bf_size);
+}
+
+static unsigned int
+mlxsw_sp_acl_bf_rule_count_index_get(struct mlxsw_sp_acl_bf *bf,
+ unsigned int erp_bank,
+ unsigned int bf_index)
+{
+ return erp_bank * bf->bank_size + bf_index;
}
int
@@ -176,7 +421,7 @@ mlxsw_sp_acl_bf_entry_add(struct mlxsw_sp *mlxsw_sp,
mutex_lock(&bf->lock);
- bf_index = mlxsw_sp_acl_bf_index_get(bf, aregion, aentry);
+ bf_index = mlxsw_sp->acl_bf_ops->index_get(bf, aregion, aentry);
rule_index = mlxsw_sp_acl_bf_rule_count_index_get(bf, erp_bank,
bf_index);
@@ -219,7 +464,7 @@ mlxsw_sp_acl_bf_entry_del(struct mlxsw_sp *mlxsw_sp,
mutex_lock(&bf->lock);
- bf_index = mlxsw_sp_acl_bf_index_get(bf, aregion, aentry);
+ bf_index = mlxsw_sp->acl_bf_ops->index_get(bf, aregion, aentry);
rule_index = mlxsw_sp_acl_bf_rule_count_index_get(bf, erp_bank,
bf_index);
@@ -267,3 +512,11 @@ void mlxsw_sp_acl_bf_fini(struct mlxsw_sp_acl_bf *bf)
mutex_destroy(&bf->lock);
kfree(bf);
}
+
+const struct mlxsw_sp_acl_bf_ops mlxsw_sp2_acl_bf_ops = {
+ .index_get = mlxsw_sp2_acl_bf_index_get,
+};
+
+const struct mlxsw_sp_acl_bf_ops mlxsw_sp4_acl_bf_ops = {
+ .index_get = mlxsw_sp4_acl_bf_index_get,
+};
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c
index 279c241f76f0..00c32320f891 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c
@@ -168,8 +168,8 @@ static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_2[] = {
};
static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_4[] = {
- MLXSW_AFK_ELEMENT_INST_U32(VIRT_ROUTER_0_7, 0x04, 24, 8),
- MLXSW_AFK_ELEMENT_INST_U32(VIRT_ROUTER_8_10, 0x00, 0, 3),
+ MLXSW_AFK_ELEMENT_INST_U32(VIRT_ROUTER_LSB, 0x04, 24, 8),
+ MLXSW_AFK_ELEMENT_INST_U32(VIRT_ROUTER_MSB, 0x00, 0, 3),
};
static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_0[] = {
@@ -311,3 +311,45 @@ const struct mlxsw_afk_ops mlxsw_sp2_afk_ops = {
.encode_block = mlxsw_sp2_afk_encode_block,
.clear_block = mlxsw_sp2_afk_clear_block,
};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_mac_5b[] = {
+ MLXSW_AFK_ELEMENT_INST_U32(VID, 0x04, 18, 12),
+ MLXSW_AFK_ELEMENT_INST_EXT_U32(SRC_SYS_PORT, 0x04, 0, 9, -1, true), /* RX_ACL_SYSTEM_PORT */
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_4b[] = {
+ MLXSW_AFK_ELEMENT_INST_U32(VIRT_ROUTER_LSB, 0x04, 13, 8),
+ MLXSW_AFK_ELEMENT_INST_EXT_U32(VIRT_ROUTER_MSB, 0x04, 21, 4, 0, true),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_2b[] = {
+ MLXSW_AFK_ELEMENT_INST_BUF(DST_IP_96_127, 0x04, 4),
+};
+
+static const struct mlxsw_afk_block mlxsw_sp4_afk_blocks[] = {
+ MLXSW_AFK_BLOCK(0x10, mlxsw_sp_afk_element_info_mac_0),
+ MLXSW_AFK_BLOCK(0x11, mlxsw_sp_afk_element_info_mac_1),
+ MLXSW_AFK_BLOCK(0x12, mlxsw_sp_afk_element_info_mac_2),
+ MLXSW_AFK_BLOCK(0x13, mlxsw_sp_afk_element_info_mac_3),
+ MLXSW_AFK_BLOCK(0x14, mlxsw_sp_afk_element_info_mac_4),
+ MLXSW_AFK_BLOCK(0x1A, mlxsw_sp_afk_element_info_mac_5b),
+ MLXSW_AFK_BLOCK(0x38, mlxsw_sp_afk_element_info_ipv4_0),
+ MLXSW_AFK_BLOCK(0x39, mlxsw_sp_afk_element_info_ipv4_1),
+ MLXSW_AFK_BLOCK(0x3A, mlxsw_sp_afk_element_info_ipv4_2),
+ MLXSW_AFK_BLOCK(0x35, mlxsw_sp_afk_element_info_ipv4_4b),
+ MLXSW_AFK_BLOCK(0x40, mlxsw_sp_afk_element_info_ipv6_0),
+ MLXSW_AFK_BLOCK(0x41, mlxsw_sp_afk_element_info_ipv6_1),
+ MLXSW_AFK_BLOCK(0x47, mlxsw_sp_afk_element_info_ipv6_2b),
+ MLXSW_AFK_BLOCK(0x43, mlxsw_sp_afk_element_info_ipv6_3),
+ MLXSW_AFK_BLOCK(0x44, mlxsw_sp_afk_element_info_ipv6_4),
+ MLXSW_AFK_BLOCK(0x45, mlxsw_sp_afk_element_info_ipv6_5),
+ MLXSW_AFK_BLOCK(0x90, mlxsw_sp_afk_element_info_l4_0),
+ MLXSW_AFK_BLOCK(0x92, mlxsw_sp_afk_element_info_l4_2),
+};
+
+const struct mlxsw_afk_ops mlxsw_sp4_afk_ops = {
+ .blocks = mlxsw_sp4_afk_blocks,
+ .blocks_count = ARRAY_SIZE(mlxsw_sp4_afk_blocks),
+ .encode_block = mlxsw_sp2_afk_encode_block,
+ .clear_block = mlxsw_sp2_afk_clear_block,
+};
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h
index a41df10ade9b..edbbc89e7a71 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h
@@ -287,6 +287,12 @@ void mlxsw_sp_acl_erps_fini(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_acl_bf;
+struct mlxsw_sp_acl_bf_ops {
+ unsigned int (*index_get)(struct mlxsw_sp_acl_bf *bf,
+ struct mlxsw_sp_acl_atcam_region *aregion,
+ struct mlxsw_sp_acl_atcam_entry *aentry);
+};
+
int
mlxsw_sp_acl_bf_entry_add(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_acl_bf *bf,
diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index 9b42187a026a..b1311b656e17 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -1341,6 +1341,43 @@ static int ocelot_mact_read(struct ocelot *ocelot, int port, int row, int col,
return 0;
}
+int ocelot_mact_flush(struct ocelot *ocelot, int port)
+{
+ int err;
+
+ mutex_lock(&ocelot->mact_lock);
+
+ /* Program ageing filter for a single port */
+ ocelot_write(ocelot, ANA_ANAGEFIL_PID_EN | ANA_ANAGEFIL_PID_VAL(port),
+ ANA_ANAGEFIL);
+
+ /* Flushing dynamic FDB entries requires two successive age scans */
+ ocelot_write(ocelot,
+ ANA_TABLES_MACACCESS_MAC_TABLE_CMD(MACACCESS_CMD_AGE),
+ ANA_TABLES_MACACCESS);
+
+ err = ocelot_mact_wait_for_completion(ocelot);
+ if (err) {
+ mutex_unlock(&ocelot->mact_lock);
+ return err;
+ }
+
+ /* And second... */
+ ocelot_write(ocelot,
+ ANA_TABLES_MACACCESS_MAC_TABLE_CMD(MACACCESS_CMD_AGE),
+ ANA_TABLES_MACACCESS);
+
+ err = ocelot_mact_wait_for_completion(ocelot);
+
+ /* Restore ageing filter */
+ ocelot_write(ocelot, 0, ANA_ANAGEFIL);
+
+ mutex_unlock(&ocelot->mact_lock);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(ocelot_mact_flush);
+
int ocelot_fdb_dump(struct ocelot *ocelot, int port,
dsa_fdb_dump_cb_t *cb, void *data)
{
@@ -1787,8 +1824,7 @@ int ocelot_get_ts_info(struct ocelot *ocelot, int port,
}
EXPORT_SYMBOL(ocelot_get_ts_info);
-static u32 ocelot_get_bond_mask(struct ocelot *ocelot, struct net_device *bond,
- bool only_active_ports)
+static u32 ocelot_get_bond_mask(struct ocelot *ocelot, struct net_device *bond)
{
u32 mask = 0;
int port;
@@ -1799,12 +1835,8 @@ static u32 ocelot_get_bond_mask(struct ocelot *ocelot, struct net_device *bond,
if (!ocelot_port)
continue;
- if (ocelot_port->bond == bond) {
- if (only_active_ports && !ocelot_port->lag_tx_active)
- continue;
-
+ if (ocelot_port->bond == bond)
mask |= BIT(port);
- }
}
return mask;
@@ -1904,10 +1936,8 @@ void ocelot_apply_bridge_fwd_mask(struct ocelot *ocelot, bool joining)
mask = ocelot_get_bridge_fwd_mask(ocelot, port);
mask |= cpu_fwd_mask;
mask &= ~BIT(port);
- if (bond) {
- mask &= ~ocelot_get_bond_mask(ocelot, bond,
- false);
- }
+ if (bond)
+ mask &= ~ocelot_get_bond_mask(ocelot, bond);
} else {
/* Standalone ports forward only to DSA tag_8021q CPU
* ports (if those exist), or to the hardware CPU port
@@ -2246,13 +2276,17 @@ static void ocelot_set_aggr_pgids(struct ocelot *ocelot)
if (!bond || (visited & BIT(lag)))
continue;
- bond_mask = ocelot_get_bond_mask(ocelot, bond, true);
+ bond_mask = ocelot_get_bond_mask(ocelot, bond);
for_each_set_bit(port, &bond_mask, ocelot->num_phys_ports) {
+ struct ocelot_port *ocelot_port = ocelot->ports[port];
+
// Destination mask
ocelot_write_rix(ocelot, bond_mask,
ANA_PGID_PGID, port);
- aggr_idx[num_active_ports++] = port;
+
+ if (ocelot_port->lag_tx_active)
+ aggr_idx[num_active_ports++] = port;
}
for_each_aggr_pgid(ocelot, i) {
@@ -2301,8 +2335,7 @@ static void ocelot_setup_logical_port_ids(struct ocelot *ocelot)
bond = ocelot_port->bond;
if (bond) {
- int lag = __ffs(ocelot_get_bond_mask(ocelot, bond,
- false));
+ int lag = __ffs(ocelot_get_bond_mask(ocelot, bond));
ocelot_rmw_gix(ocelot,
ANA_PORT_PORT_CFG_PORTID_VAL(lag),
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 26753139d5b4..6e947cd91152 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1669,17 +1669,17 @@ void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth);
struct btf *bpf_get_btf_vmlinux(void);
/* Map specifics */
-struct xdp_buff;
+struct xdp_frame;
struct sk_buff;
struct bpf_dtab_netdev;
struct bpf_cpu_map_entry;
void __dev_flush(void);
-int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
+int dev_xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
struct net_device *dev_rx);
-int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
+int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_frame *xdpf,
struct net_device *dev_rx);
-int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
+int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx,
struct bpf_map *map, bool exclude_ingress);
int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
struct bpf_prog *xdp_prog);
@@ -1688,7 +1688,7 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
bool exclude_ingress);
void __cpu_map_flush(void);
-int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
+int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf,
struct net_device *dev_rx);
int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu,
struct sk_buff *skb);
@@ -1866,26 +1866,26 @@ static inline void __dev_flush(void)
{
}
-struct xdp_buff;
+struct xdp_frame;
struct bpf_dtab_netdev;
struct bpf_cpu_map_entry;
static inline
-int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
+int dev_xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
struct net_device *dev_rx)
{
return 0;
}
static inline
-int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
+int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_frame *xdpf,
struct net_device *dev_rx)
{
return 0;
}
static inline
-int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
+int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx,
struct bpf_map *map, bool exclude_ingress)
{
return 0;
@@ -1913,7 +1913,7 @@ static inline void __cpu_map_flush(void)
}
static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu,
- struct xdp_buff *xdp,
+ struct xdp_frame *xdpf,
struct net_device *dev_rx)
{
return 0;
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 60eec80fa1d4..71fa57b88bfc 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1019,6 +1019,10 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
int xdp_do_redirect(struct net_device *dev,
struct xdp_buff *xdp,
struct bpf_prog *prog);
+int xdp_do_redirect_frame(struct net_device *dev,
+ struct xdp_buff *xdp,
+ struct xdp_frame *xdpf,
+ struct bpf_prog *prog);
void xdp_do_flush(void);
/* The xdp_do_flush_map() helper has been renamed to drop the _map suffix, as
diff --git a/include/linux/mlx5/eq.h b/include/linux/mlx5/eq.h
index ea3ff5a8ced3..3705a382276b 100644
--- a/include/linux/mlx5/eq.h
+++ b/include/linux/mlx5/eq.h
@@ -9,13 +9,13 @@
#define MLX5_NUM_SPARE_EQE (0x80)
struct mlx5_eq;
+struct mlx5_irq;
struct mlx5_core_dev;
struct mlx5_eq_param {
- u8 irq_index;
int nent;
u64 mask[4];
- cpumask_var_t affinity;
+ struct mlx5_irq *irq;
};
struct mlx5_eq *
diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index a4082406a003..79a805542d0f 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -80,6 +80,8 @@ struct page_pool_params {
enum dma_data_direction dma_dir; /* DMA mapping direction */
unsigned int max_len; /* max DMA sync memory size */
unsigned int offset; /* DMA addr offset */
+ void (*init_callback)(struct page *page, void *arg);
+ void *init_arg;
};
struct page_pool {
@@ -94,6 +96,7 @@ struct page_pool {
unsigned int frag_offset;
struct page *frag_page;
long frag_users;
+ u32 xdp_mem_id;
/*
* Data structure for allocation side
@@ -168,9 +171,12 @@ bool page_pool_return_skb_page(struct page *page);
struct page_pool *page_pool_create(const struct page_pool_params *params);
+struct xdp_mem_info;
+
#ifdef CONFIG_PAGE_POOL
void page_pool_destroy(struct page_pool *pool);
-void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *));
+void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *),
+ struct xdp_mem_info *mem);
void page_pool_release_page(struct page_pool *pool, struct page *page);
void page_pool_put_page_bulk(struct page_pool *pool, void **data,
int count);
@@ -180,7 +186,8 @@ static inline void page_pool_destroy(struct page_pool *pool)
}
static inline void page_pool_use_xdp_mem(struct page_pool *pool,
- void (*disconnect)(void *))
+ void (*disconnect)(void *),
+ struct xdp_mem_info *mem)
{
}
static inline void page_pool_release_page(struct page_pool *pool,
diff --git a/include/net/sock.h b/include/net/sock.h
index 7b4b4237e6e0..ff9b508d9c5f 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1209,6 +1209,7 @@ struct proto {
void (*unhash)(struct sock *sk);
void (*rehash)(struct sock *sk);
int (*get_port)(struct sock *sk, unsigned short snum);
+ void (*put_port)(struct sock *sk);
#ifdef CONFIG_BPF_SYSCALL
int (*psock_update_sk_prot)(struct sock *sk,
struct sk_psock *psock,
diff --git a/include/net/xdp.h b/include/net/xdp.h
index 447f9b1578f3..8f0812e4996d 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -260,6 +260,9 @@ bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq);
int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
enum xdp_mem_type type, void *allocator);
void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq);
+int xdp_reg_mem_model(struct xdp_mem_info *mem,
+ enum xdp_mem_type type, void *allocator);
+void xdp_unreg_mem_model(struct xdp_mem_info *mem);
/* Drivers not supporting XDP metadata can use this helper, which
* rejects any room expansion for metadata as a result.
diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index 3e9454b00562..5c3a3597f1d2 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -833,6 +833,7 @@ void ocelot_port_bridge_join(struct ocelot *ocelot, int port,
struct net_device *bridge);
void ocelot_port_bridge_leave(struct ocelot *ocelot, int port,
struct net_device *bridge);
+int ocelot_mact_flush(struct ocelot *ocelot, int port);
int ocelot_fdb_dump(struct ocelot *ocelot, int port,
dsa_fdb_dump_cb_t *cb, void *data);
int ocelot_fdb_add(struct ocelot *ocelot, int port,
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index 0421061d95f1..b3e6b9422238 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -746,15 +746,9 @@ static void bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf)
list_add(&bq->flush_node, flush_list);
}
-int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
+int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf,
struct net_device *dev_rx)
{
- struct xdp_frame *xdpf;
-
- xdpf = xdp_convert_buff_to_frame(xdp);
- if (unlikely(!xdpf))
- return -EOVERFLOW;
-
/* Info needed when constructing SKB on remote CPU */
xdpf->dev_rx = dev_rx;
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 6feea293ff10..fe019dbdb3f0 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -467,24 +467,19 @@ static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
bq->q[bq->count++] = xdpf;
}
-static inline int __xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
+static inline int __xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
struct net_device *dev_rx,
struct bpf_prog *xdp_prog)
{
- struct xdp_frame *xdpf;
int err;
if (!dev->netdev_ops->ndo_xdp_xmit)
return -EOPNOTSUPP;
- err = xdp_ok_fwd_dev(dev, xdp->data_end - xdp->data);
+ err = xdp_ok_fwd_dev(dev, xdpf->len);
if (unlikely(err))
return err;
- xdpf = xdp_convert_buff_to_frame(xdp);
- if (unlikely(!xdpf))
- return -EOVERFLOW;
-
bq_enqueue(dev, xdpf, dev_rx, xdp_prog);
return 0;
}
@@ -520,27 +515,27 @@ static u32 dev_map_bpf_prog_run_skb(struct sk_buff *skb, struct bpf_dtab_netdev
return act;
}
-int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
+int dev_xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
struct net_device *dev_rx)
{
- return __xdp_enqueue(dev, xdp, dev_rx, NULL);
+ return __xdp_enqueue(dev, xdpf, dev_rx, NULL);
}
-int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
+int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_frame *xdpf,
struct net_device *dev_rx)
{
struct net_device *dev = dst->dev;
- return __xdp_enqueue(dev, xdp, dev_rx, dst->xdp_prog);
+ return __xdp_enqueue(dev, xdpf, dev_rx, dst->xdp_prog);
}
-static bool is_valid_dst(struct bpf_dtab_netdev *obj, struct xdp_buff *xdp)
+static bool is_valid_dst(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf)
{
if (!obj ||
!obj->dev->netdev_ops->ndo_xdp_xmit)
return false;
- if (xdp_ok_fwd_dev(obj->dev, xdp->data_end - xdp->data))
+ if (xdp_ok_fwd_dev(obj->dev, xdpf->len))
return false;
return true;
@@ -586,14 +581,13 @@ static int get_upper_ifindexes(struct net_device *dev, int *indexes)
return n;
}
-int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
+int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx,
struct bpf_map *map, bool exclude_ingress)
{
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
struct bpf_dtab_netdev *dst, *last_dst = NULL;
int excluded_devices[1+MAX_NEST_DEV];
struct hlist_head *head;
- struct xdp_frame *xdpf;
int num_excluded = 0;
unsigned int i;
int err;
@@ -603,15 +597,11 @@ int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
excluded_devices[num_excluded++] = dev_rx->ifindex;
}
- xdpf = xdp_convert_buff_to_frame(xdp);
- if (unlikely(!xdpf))
- return -EOVERFLOW;
-
if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
for (i = 0; i < map->max_entries; i++) {
dst = rcu_dereference_check(dtab->netdev_map[i],
rcu_read_lock_bh_held());
- if (!is_valid_dst(dst, xdp))
+ if (!is_valid_dst(dst, xdpf))
continue;
if (is_ifindex_excluded(excluded_devices, num_excluded, dst->dev->ifindex))
@@ -634,7 +624,7 @@ int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
head = dev_map_index_hash(dtab, i);
hlist_for_each_entry_rcu(dst, head, index_hlist,
lockdep_is_held(&dtab->index_lock)) {
- if (!is_valid_dst(dst, xdp))
+ if (!is_valid_dst(dst, xdpf))
continue;
if (is_ifindex_excluded(excluded_devices, num_excluded,
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index b70c66c6db3b..bfb45381fb3f 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6031,6 +6031,7 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn
}
if (insn->code == (BPF_JMP | BPF_CALL) &&
+ insn->src_reg == 0 &&
insn->imm == BPF_FUNC_timer_set_callback) {
struct bpf_verifier_state *async_cb;
@@ -9079,15 +9080,15 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
{
if (type_may_be_null(reg->type) && reg->id == id &&
!WARN_ON_ONCE(!reg->id)) {
- /* Old offset (both fixed and variable parts) should
- * have been known-zero, because we don't allow pointer
- * arithmetic on pointers that might be NULL.
- */
if (WARN_ON_ONCE(reg->smin_value || reg->smax_value ||
!tnum_equals_const(reg->var_off, 0) ||
reg->off)) {
- __mark_reg_known_zero(reg);
- reg->off = 0;
+ /* Old offset (both fixed and variable parts) should
+ * have been known-zero, because we don't allow pointer
+ * arithmetic on pointers that might be NULL. If we
+ * see this happening, don't convert the register.
+ */
+ return;
}
if (is_null) {
reg->type = SCALAR_VALUE;
diff --git a/net/core/filter.c b/net/core/filter.c
index 606ab5a98a1a..4603b7cd3cd1 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3957,10 +3957,35 @@ u32 xdp_master_redirect(struct xdp_buff *xdp)
}
EXPORT_SYMBOL_GPL(xdp_master_redirect);
-int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
- struct bpf_prog *xdp_prog)
+static inline int __xdp_do_redirect_xsk(struct bpf_redirect_info *ri,
+ struct net_device *dev,
+ struct xdp_buff *xdp,
+ struct bpf_prog *xdp_prog)
+{
+ enum bpf_map_type map_type = ri->map_type;
+ void *fwd = ri->tgt_value;
+ u32 map_id = ri->map_id;
+ int err;
+
+ ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */
+ ri->map_type = BPF_MAP_TYPE_UNSPEC;
+
+ err = __xsk_map_redirect(fwd, xdp);
+ if (unlikely(err))
+ goto err;
+
+ _trace_xdp_redirect_map(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index);
+ return 0;
+err:
+ _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err);
+ return err;
+}
+
+static __always_inline int __xdp_do_redirect_frame(struct bpf_redirect_info *ri,
+ struct net_device *dev,
+ struct xdp_frame *xdpf,
+ struct bpf_prog *xdp_prog)
{
- struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
enum bpf_map_type map_type = ri->map_type;
void *fwd = ri->tgt_value;
u32 map_id = ri->map_id;
@@ -3970,6 +3995,11 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */
ri->map_type = BPF_MAP_TYPE_UNSPEC;
+ if (unlikely(!xdpf)) {
+ err = -EOVERFLOW;
+ goto err;
+ }
+
switch (map_type) {
case BPF_MAP_TYPE_DEVMAP:
fallthrough;
@@ -3977,17 +4007,14 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
map = READ_ONCE(ri->map);
if (unlikely(map)) {
WRITE_ONCE(ri->map, NULL);
- err = dev_map_enqueue_multi(xdp, dev, map,
+ err = dev_map_enqueue_multi(xdpf, dev, map,
ri->flags & BPF_F_EXCLUDE_INGRESS);
} else {
- err = dev_map_enqueue(fwd, xdp, dev);
+ err = dev_map_enqueue(fwd, xdpf, dev);
}
break;
case BPF_MAP_TYPE_CPUMAP:
- err = cpu_map_enqueue(fwd, xdp, dev);
- break;
- case BPF_MAP_TYPE_XSKMAP:
- err = __xsk_map_redirect(fwd, xdp);
+ err = cpu_map_enqueue(fwd, xdpf, dev);
break;
case BPF_MAP_TYPE_UNSPEC:
if (map_id == INT_MAX) {
@@ -3996,7 +4023,7 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
err = -EINVAL;
break;
}
- err = dev_xdp_enqueue(fwd, xdp, dev);
+ err = dev_xdp_enqueue(fwd, xdpf, dev);
break;
}
fallthrough;
@@ -4013,8 +4040,34 @@ err:
_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err);
return err;
}
+
+int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
+ struct bpf_prog *xdp_prog)
+{
+ struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+ enum bpf_map_type map_type = ri->map_type;
+
+ if (map_type == BPF_MAP_TYPE_XSKMAP)
+ return __xdp_do_redirect_xsk(ri, dev, xdp, xdp_prog);
+
+ return __xdp_do_redirect_frame(ri, dev, xdp_convert_buff_to_frame(xdp),
+ xdp_prog);
+}
EXPORT_SYMBOL_GPL(xdp_do_redirect);
+int xdp_do_redirect_frame(struct net_device *dev, struct xdp_buff *xdp,
+ struct xdp_frame *xdpf, struct bpf_prog *xdp_prog)
+{
+ struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+ enum bpf_map_type map_type = ri->map_type;
+
+ if (map_type == BPF_MAP_TYPE_XSKMAP)
+ return __xdp_do_redirect_xsk(ri, dev, xdp, xdp_prog);
+
+ return __xdp_do_redirect_frame(ri, dev, xdpf, xdp_prog);
+}
+EXPORT_SYMBOL_GPL(xdp_do_redirect_frame);
+
static int xdp_do_generic_redirect_map(struct net_device *dev,
struct sk_buff *skb,
struct xdp_buff *xdp,
@@ -4741,12 +4794,14 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
switch (optname) {
case SO_RCVBUF:
val = min_t(u32, val, sysctl_rmem_max);
+ val = min_t(int, val, INT_MAX / 2);
sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
WRITE_ONCE(sk->sk_rcvbuf,
max_t(int, val * 2, SOCK_MIN_RCVBUF));
break;
case SO_SNDBUF:
val = min_t(u32, val, sysctl_wmem_max);
+ val = min_t(int, val, INT_MAX / 2);
sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
WRITE_ONCE(sk->sk_sndbuf,
max_t(int, val * 2, SOCK_MIN_SNDBUF));
@@ -4967,6 +5022,12 @@ static int _bpf_getsockopt(struct sock *sk, int level, int optname,
goto err_clear;
switch (optname) {
+ case SO_RCVBUF:
+ *((int *)optval) = sk->sk_rcvbuf;
+ break;
+ case SO_SNDBUF:
+ *((int *)optval) = sk->sk_sndbuf;
+ break;
case SO_MARK:
*((int *)optval) = sk->sk_mark;
break;
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 1a6978427d6c..7347d5c7dbe0 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -217,6 +217,8 @@ static void page_pool_set_pp_info(struct page_pool *pool,
{
page->pp = pool;
page->pp_magic |= PP_SIGNATURE;
+ if (pool->p.init_callback)
+ pool->p.init_callback(page, pool->p.init_arg);
}
static void page_pool_clear_pp_info(struct page *page)
@@ -691,10 +693,12 @@ static void page_pool_release_retry(struct work_struct *wq)
schedule_delayed_work(&pool->release_dw, DEFER_TIME);
}
-void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *))
+void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *),
+ struct xdp_mem_info *mem)
{
refcount_inc(&pool->user_cnt);
pool->disconnect = disconnect;
+ pool->xdp_mem_id = mem->id;
}
void page_pool_destroy(struct page_pool *pool)
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 9618ab6d7cc9..1827669eedd6 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -292,15 +292,23 @@ static int sock_map_link(struct bpf_map *map, struct sock *sk)
if (skb_verdict)
psock_set_prog(&psock->progs.skb_verdict, skb_verdict);
+ /* msg_* and stream_* programs references tracked in psock after this
+ * point. Reference dec and cleanup will occur through psock destructor
+ */
ret = sock_map_init_proto(sk, psock);
- if (ret < 0)
- goto out_drop;
+ if (ret < 0) {
+ sk_psock_put(sk, psock);
+ goto out;
+ }
write_lock_bh(&sk->sk_callback_lock);
if (stream_parser && stream_verdict && !psock->saved_data_ready) {
ret = sk_psock_init_strp(sk, psock);
- if (ret)
- goto out_unlock_drop;
+ if (ret) {
+ write_unlock_bh(&sk->sk_callback_lock);
+ sk_psock_put(sk, psock);
+ goto out;
+ }
sk_psock_start_strp(sk, psock);
} else if (!stream_parser && stream_verdict && !psock->saved_data_ready) {
sk_psock_start_verdict(sk,psock);
@@ -309,10 +317,6 @@ static int sock_map_link(struct bpf_map *map, struct sock *sk)
}
write_unlock_bh(&sk->sk_callback_lock);
return 0;
-out_unlock_drop:
- write_unlock_bh(&sk->sk_callback_lock);
-out_drop:
- sk_psock_put(sk, psock);
out_progs:
if (skb_verdict)
bpf_prog_put(skb_verdict);
@@ -325,6 +329,7 @@ out_put_stream_parser:
out_put_stream_verdict:
if (stream_verdict)
bpf_prog_put(stream_verdict);
+out:
return ret;
}
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 7fe1df85f505..7aba35504986 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -110,20 +110,15 @@ static void mem_allocator_disconnect(void *allocator)
mutex_unlock(&mem_id_lock);
}
-void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
+void xdp_unreg_mem_model(struct xdp_mem_info *mem)
{
struct xdp_mem_allocator *xa;
- int type = xdp_rxq->mem.type;
- int id = xdp_rxq->mem.id;
+ int type = mem->type;
+ int id = mem->id;
/* Reset mem info to defaults */
- xdp_rxq->mem.id = 0;
- xdp_rxq->mem.type = 0;
-
- if (xdp_rxq->reg_state != REG_STATE_REGISTERED) {
- WARN(1, "Missing register, driver bug");
- return;
- }
+ mem->id = 0;
+ mem->type = 0;
if (id == 0)
return;
@@ -135,6 +130,17 @@ void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
rcu_read_unlock();
}
}
+EXPORT_SYMBOL_GPL(xdp_unreg_mem_model);
+
+void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
+{
+ if (xdp_rxq->reg_state != REG_STATE_REGISTERED) {
+ WARN(1, "Missing register, driver bug");
+ return;
+ }
+
+ xdp_unreg_mem_model(&xdp_rxq->mem);
+}
EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg_mem_model);
void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq)
@@ -259,28 +265,24 @@ static bool __is_supported_mem_type(enum xdp_mem_type type)
return true;
}
-int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
- enum xdp_mem_type type, void *allocator)
+static struct xdp_mem_allocator *__xdp_reg_mem_model(struct xdp_mem_info *mem,
+ enum xdp_mem_type type,
+ void *allocator)
{
struct xdp_mem_allocator *xdp_alloc;
gfp_t gfp = GFP_KERNEL;
int id, errno, ret;
void *ptr;
- if (xdp_rxq->reg_state != REG_STATE_REGISTERED) {
- WARN(1, "Missing register, driver bug");
- return -EFAULT;
- }
-
if (!__is_supported_mem_type(type))
- return -EOPNOTSUPP;
+ return ERR_PTR(-EOPNOTSUPP);
- xdp_rxq->mem.type = type;
+ mem->type = type;
if (!allocator) {
if (type == MEM_TYPE_PAGE_POOL)
- return -EINVAL; /* Setup time check page_pool req */
- return 0;
+ return ERR_PTR(-EINVAL); /* Setup time check page_pool req */
+ return NULL;
}
/* Delay init of rhashtable to save memory if feature isn't used */
@@ -290,13 +292,13 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
mutex_unlock(&mem_id_lock);
if (ret < 0) {
WARN_ON(1);
- return ret;
+ return ERR_PTR(ret);
}
}
xdp_alloc = kzalloc(sizeof(*xdp_alloc), gfp);
if (!xdp_alloc)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
mutex_lock(&mem_id_lock);
id = __mem_id_cyclic_get(gfp);
@@ -304,31 +306,61 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
errno = id;
goto err;
}
- xdp_rxq->mem.id = id;
- xdp_alloc->mem = xdp_rxq->mem;
+ mem->id = id;
+ xdp_alloc->mem = *mem;
xdp_alloc->allocator = allocator;
/* Insert allocator into ID lookup table */
ptr = rhashtable_insert_slow(mem_id_ht, &id, &xdp_alloc->node);
if (IS_ERR(ptr)) {
- ida_simple_remove(&mem_id_pool, xdp_rxq->mem.id);
- xdp_rxq->mem.id = 0;
+ ida_simple_remove(&mem_id_pool, mem->id);
+ mem->id = 0;
errno = PTR_ERR(ptr);
goto err;
}
if (type == MEM_TYPE_PAGE_POOL)
- page_pool_use_xdp_mem(allocator, mem_allocator_disconnect);
+ page_pool_use_xdp_mem(allocator, mem_allocator_disconnect, mem);
mutex_unlock(&mem_id_lock);
- trace_mem_connect(xdp_alloc, xdp_rxq);
- return 0;
+ return xdp_alloc;
err:
mutex_unlock(&mem_id_lock);
kfree(xdp_alloc);
- return errno;
+ return ERR_PTR(errno);
+}
+
+int xdp_reg_mem_model(struct xdp_mem_info *mem,
+ enum xdp_mem_type type, void *allocator)
+{
+ struct xdp_mem_allocator *xdp_alloc;
+
+ xdp_alloc = __xdp_reg_mem_model(mem, type, allocator);
+ if (IS_ERR(xdp_alloc))
+ return PTR_ERR(xdp_alloc);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(xdp_reg_mem_model);
+
+int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
+ enum xdp_mem_type type, void *allocator)
+{
+ struct xdp_mem_allocator *xdp_alloc;
+
+ if (xdp_rxq->reg_state != REG_STATE_REGISTERED) {
+ WARN(1, "Missing register, driver bug");
+ return -EFAULT;
+ }
+
+ xdp_alloc = __xdp_reg_mem_model(&xdp_rxq->mem, type, allocator);
+ if (IS_ERR(xdp_alloc))
+ return PTR_ERR(xdp_alloc);
+
+ trace_mem_connect(xdp_alloc, xdp_rxq);
+ return 0;
}
+
EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model);
/* XDP RX runs under NAPI protection, and in different delivery error
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index f53184767ee7..9c465bac1eb0 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -531,6 +531,8 @@ int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
err = BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk);
if (err) {
inet->inet_saddr = inet->inet_rcv_saddr = 0;
+ if (sk->sk_prot->put_port)
+ sk->sk_prot->put_port(sk);
goto out_release_sock;
}
}
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index e540b0dcf085..0e56df3a45e2 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -994,6 +994,7 @@ struct proto ping_prot = {
.hash = ping_hash,
.unhash = ping_unhash,
.get_port = ping_get_port,
+ .put_port = ping_unhash,
.obj_size = sizeof(struct inet_sock),
};
EXPORT_SYMBOL(ping_prot);
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index f70aa0932bd6..9b9b02052fd3 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -196,12 +196,39 @@ msg_bytes_ready:
long timeo;
int data;
+ if (sock_flag(sk, SOCK_DONE))
+ goto out;
+
+ if (sk->sk_err) {
+ copied = sock_error(sk);
+ goto out;
+ }
+
+ if (sk->sk_shutdown & RCV_SHUTDOWN)
+ goto out;
+
+ if (sk->sk_state == TCP_CLOSE) {
+ copied = -ENOTCONN;
+ goto out;
+ }
+
timeo = sock_rcvtimeo(sk, nonblock);
+ if (!timeo) {
+ copied = -EAGAIN;
+ goto out;
+ }
+
+ if (signal_pending(current)) {
+ copied = sock_intr_errno(timeo);
+ goto out;
+ }
+
data = tcp_msg_wait_data(sk, psock, timeo);
if (data && !sk_psock_queue_empty(psock))
goto msg_bytes_ready;
copied = -EAGAIN;
}
+out:
release_sock(sk);
sk_psock_put(sk, psock);
return copied;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index ac10e4cdd8d0..9861786b8336 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -3076,6 +3076,7 @@ struct proto tcp_prot = {
.hash = inet_hash,
.unhash = inet_unhash,
.get_port = inet_csk_get_port,
+ .put_port = inet_put_port,
#ifdef CONFIG_BPF_SYSCALL
.psock_update_sk_prot = tcp_bpf_update_proto,
#endif
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 7b18a6f42f18..c2a4411d2b04 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2927,6 +2927,7 @@ struct proto udp_prot = {
.unhash = udp_lib_unhash,
.rehash = udp_v4_rehash,
.get_port = udp_v4_get_port,
+ .put_port = udp_lib_unhash,
#ifdef CONFIG_BPF_SYSCALL
.psock_update_sk_prot = udp_bpf_update_proto,
#endif
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index d1636425654e..8fe7900f1949 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -413,6 +413,8 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
if (err) {
sk->sk_ipv6only = saved_ipv6only;
inet_reset_saddr(sk);
+ if (sk->sk_prot->put_port)
+ sk->sk_prot->put_port(sk);
goto out;
}
}
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 6ac88fe24a8e..9256f6ba87ef 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -177,6 +177,7 @@ struct proto pingv6_prot = {
.hash = ping_hash,
.unhash = ping_unhash,
.get_port = ping_get_port,
+ .put_port = ping_unhash,
.obj_size = sizeof(struct raw6_sock),
};
EXPORT_SYMBOL_GPL(pingv6_prot);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 1ac243d18c2b..075ee8a2df3b 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -2181,6 +2181,7 @@ struct proto tcpv6_prot = {
.hash = inet6_hash,
.unhash = inet_unhash,
.get_port = inet_csk_get_port,
+ .put_port = inet_put_port,
#ifdef CONFIG_BPF_SYSCALL
.psock_update_sk_prot = tcp_bpf_update_proto,
#endif
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index df216268cb02..528b81ef19c9 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1733,6 +1733,7 @@ struct proto udpv6_prot = {
.unhash = udp_lib_unhash,
.rehash = udp_v6_rehash,
.get_port = udp_v6_get_port,
+ .put_port = udp_lib_unhash,
#ifdef CONFIG_BPF_SYSCALL
.psock_update_sk_prot = udp_bpf_update_proto,
#endif
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index fe98e4f475ba..8ed2d9f4a84d 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -768,6 +768,28 @@ static noinline bool mptcp_established_options_rst(struct sock *sk, struct sk_bu
return true;
}
+static bool mptcp_established_options_fastclose(struct sock *sk,
+ unsigned int *size,
+ unsigned int remaining,
+ struct mptcp_out_options *opts)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+
+ if (likely(!subflow->send_fastclose))
+ return false;
+
+ if (remaining < TCPOLEN_MPTCP_FASTCLOSE)
+ return false;
+
+ *size = TCPOLEN_MPTCP_FASTCLOSE;
+ opts->suboptions |= OPTION_MPTCP_FASTCLOSE;
+ opts->rcvr_key = msk->remote_key;
+
+ pr_debug("FASTCLOSE key=%llu", opts->rcvr_key);
+ return true;
+}
+
static bool mptcp_established_options_mp_fail(struct sock *sk,
unsigned int *size,
unsigned int remaining,
@@ -806,10 +828,12 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
return false;
if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) {
- if (mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) {
+ if (mptcp_established_options_fastclose(sk, &opt_size, remaining, opts) ||
+ mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) {
*size += opt_size;
remaining -= opt_size;
}
+ /* MP_RST can be used with MP_FASTCLOSE and MP_FAIL if there is room */
if (mptcp_established_options_rst(sk, skb, &opt_size, remaining, opts)) {
*size += opt_size;
remaining -= opt_size;
@@ -1209,7 +1233,7 @@ static void mptcp_set_rwin(const struct tcp_sock *tp)
WRITE_ONCE(msk->rcv_wnd_sent, ack_seq);
}
-static u16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __sum16 sum)
+u16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum)
{
struct csum_pseudo_header header;
__wsum csum;
@@ -1224,14 +1248,14 @@ static u16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __sum1
header.data_len = htons(data_len);
header.csum = 0;
- csum = csum_partial(&header, sizeof(header), ~csum_unfold(sum));
+ csum = csum_partial(&header, sizeof(header), sum);
return (__force u16)csum_fold(csum);
}
static u16 mptcp_make_csum(const struct mptcp_ext *mpext)
{
return __mptcp_make_csum(mpext->data_seq, mpext->subflow_seq, mpext->data_len,
- mpext->csum);
+ ~csum_unfold(mpext->csum));
}
void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
@@ -1251,17 +1275,8 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
ptr += 2;
}
- /* RST is mutually exclusive with everything else */
- if (unlikely(OPTION_MPTCP_RST & opts->suboptions)) {
- *ptr++ = mptcp_option(MPTCPOPT_RST,
- TCPOLEN_MPTCP_RST,
- opts->reset_transient,
- opts->reset_reason);
- return;
- }
-
- /* DSS, MPC, MPJ and ADD_ADDR are mutually exclusive, see
- * mptcp_established_options*()
+ /* DSS, MPC, MPJ, ADD_ADDR, FASTCLOSE and RST are mutually exclusive,
+ * see mptcp_established_options*()
*/
if (likely(OPTION_MPTCP_DSS & opts->suboptions)) {
struct mptcp_ext *mpext = &opts->ext_copy;
@@ -1361,7 +1376,7 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
__mptcp_make_csum(opts->data_seq,
opts->subflow_seq,
opts->data_len,
- opts->csum), ptr);
+ ~csum_unfold(opts->csum)), ptr);
} else {
put_unaligned_be32(opts->data_len << 16 |
TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
@@ -1370,27 +1385,29 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
/* MPC is additionally mutually exclusive with MP_PRIO */
goto mp_capable_done;
- } else if (OPTION_MPTCP_MPJ_SYN & opts->suboptions) {
- *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
- TCPOLEN_MPTCP_MPJ_SYN,
- opts->backup, opts->join_id);
- put_unaligned_be32(opts->token, ptr);
- ptr += 1;
- put_unaligned_be32(opts->nonce, ptr);
- ptr += 1;
- } else if (OPTION_MPTCP_MPJ_SYNACK & opts->suboptions) {
- *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
- TCPOLEN_MPTCP_MPJ_SYNACK,
- opts->backup, opts->join_id);
- put_unaligned_be64(opts->thmac, ptr);
- ptr += 2;
- put_unaligned_be32(opts->nonce, ptr);
- ptr += 1;
- } else if (OPTION_MPTCP_MPJ_ACK & opts->suboptions) {
- *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
- TCPOLEN_MPTCP_MPJ_ACK, 0, 0);
- memcpy(ptr, opts->hmac, MPTCPOPT_HMAC_LEN);
- ptr += 5;
+ } else if (OPTIONS_MPTCP_MPJ & opts->suboptions) {
+ if (OPTION_MPTCP_MPJ_SYN & opts->suboptions) {
+ *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
+ TCPOLEN_MPTCP_MPJ_SYN,
+ opts->backup, opts->join_id);
+ put_unaligned_be32(opts->token, ptr);
+ ptr += 1;
+ put_unaligned_be32(opts->nonce, ptr);
+ ptr += 1;
+ } else if (OPTION_MPTCP_MPJ_SYNACK & opts->suboptions) {
+ *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
+ TCPOLEN_MPTCP_MPJ_SYNACK,
+ opts->backup, opts->join_id);
+ put_unaligned_be64(opts->thmac, ptr);
+ ptr += 2;
+ put_unaligned_be32(opts->nonce, ptr);
+ ptr += 1;
+ } else {
+ *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
+ TCPOLEN_MPTCP_MPJ_ACK, 0, 0);
+ memcpy(ptr, opts->hmac, MPTCPOPT_HMAC_LEN);
+ ptr += 5;
+ }
} else if (OPTION_MPTCP_ADD_ADDR & opts->suboptions) {
u8 len = TCPOLEN_MPTCP_ADD_ADDR_BASE;
u8 echo = MPTCP_ADDR_ECHO;
@@ -1447,6 +1464,24 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
ptr += 1;
}
}
+ } else if (unlikely(OPTION_MPTCP_FASTCLOSE & opts->suboptions)) {
+ /* FASTCLOSE is mutually exclusive with others except RST */
+ *ptr++ = mptcp_option(MPTCPOPT_MP_FASTCLOSE,
+ TCPOLEN_MPTCP_FASTCLOSE,
+ 0, 0);
+ put_unaligned_be64(opts->rcvr_key, ptr);
+ ptr += 2;
+
+ if (OPTION_MPTCP_RST & opts->suboptions)
+ goto mp_rst;
+ return;
+ } else if (unlikely(OPTION_MPTCP_RST & opts->suboptions)) {
+mp_rst:
+ *ptr++ = mptcp_option(MPTCPOPT_RST,
+ TCPOLEN_MPTCP_RST,
+ opts->reset_transient,
+ opts->reset_reason);
+ return;
}
if (OPTION_MPTCP_PRIO & opts->suboptions) {
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 6ab386ff3294..696b2c4613a7 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -172,9 +172,28 @@ void mptcp_pm_subflow_established(struct mptcp_sock *msk)
spin_unlock_bh(&pm->lock);
}
-void mptcp_pm_subflow_closed(struct mptcp_sock *msk, u8 id)
+void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk,
+ const struct mptcp_subflow_context *subflow)
{
- pr_debug("msk=%p", msk);
+ struct mptcp_pm_data *pm = &msk->pm;
+ bool update_subflows;
+
+ update_subflows = (ssk->sk_state == TCP_CLOSE) &&
+ (subflow->request_join || subflow->mp_join);
+ if (!READ_ONCE(pm->work_pending) && !update_subflows)
+ return;
+
+ spin_lock_bh(&pm->lock);
+ if (update_subflows)
+ pm->subflows--;
+
+ /* Even if this subflow is not really established, tell the PM to try
+ * to pick the next ones, if possible.
+ */
+ if (mptcp_pm_nl_check_work_pending(msk))
+ mptcp_pm_schedule_work(msk, MPTCP_PM_SUBFLOW_ESTABLISHED);
+
+ spin_unlock_bh(&pm->lock);
}
void mptcp_pm_add_addr_received(struct mptcp_sock *msk,
@@ -356,7 +375,7 @@ void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk)
}
}
-void mptcp_pm_data_init(struct mptcp_sock *msk)
+void mptcp_pm_data_reset(struct mptcp_sock *msk)
{
msk->pm.add_addr_signaled = 0;
msk->pm.add_addr_accepted = 0;
@@ -370,11 +389,16 @@ void mptcp_pm_data_init(struct mptcp_sock *msk)
WRITE_ONCE(msk->pm.accept_subflow, false);
WRITE_ONCE(msk->pm.remote_deny_join_id0, false);
msk->pm.status = 0;
+ bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
+
+ mptcp_pm_nl_data_init(msk);
+}
+void mptcp_pm_data_init(struct mptcp_sock *msk)
+{
spin_lock_init(&msk->pm.lock);
INIT_LIST_HEAD(&msk->pm.anno_list);
-
- mptcp_pm_nl_data_init(msk);
+ mptcp_pm_data_reset(msk);
}
void __init mptcp_pm_init(void)
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 6cde58c259a8..75af1f701e1d 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -38,10 +38,6 @@ struct mptcp_pm_add_entry {
u8 retrans_times;
};
-/* max value of mptcp_addr_info.id */
-#define MAX_ADDR_ID U8_MAX
-#define BITMAP_SZ DIV_ROUND_UP(MAX_ADDR_ID + 1, BITS_PER_LONG)
-
struct pm_nl_pernet {
/* protects pernet updates */
spinlock_t lock;
@@ -53,14 +49,14 @@ struct pm_nl_pernet {
unsigned int local_addr_max;
unsigned int subflows_max;
unsigned int next_id;
- unsigned long id_bitmap[BITMAP_SZ];
+ DECLARE_BITMAP(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
};
#define MPTCP_PM_ADDR_MAX 8
#define ADD_ADDR_RETRANS_MAX 3
static bool addresses_equal(const struct mptcp_addr_info *a,
- struct mptcp_addr_info *b, bool use_port)
+ const struct mptcp_addr_info *b, bool use_port)
{
bool addr_equals = false;
@@ -169,11 +165,13 @@ select_local_address(const struct pm_nl_pernet *pernet,
msk_owned_by_me(msk);
rcu_read_lock();
- __mptcp_flush_join_list(msk);
list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW))
continue;
+ if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap))
+ continue;
+
if (entry->addr.family != sk->sk_family) {
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
if ((entry->addr.family == AF_INET &&
@@ -184,23 +182,17 @@ select_local_address(const struct pm_nl_pernet *pernet,
continue;
}
- /* avoid any address already in use by subflows and
- * pending join
- */
- if (!lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) {
- ret = entry;
- break;
- }
+ ret = entry;
+ break;
}
rcu_read_unlock();
return ret;
}
static struct mptcp_pm_addr_entry *
-select_signal_address(struct pm_nl_pernet *pernet, unsigned int pos)
+select_signal_address(struct pm_nl_pernet *pernet, struct mptcp_sock *msk)
{
struct mptcp_pm_addr_entry *entry, *ret = NULL;
- int i = 0;
rcu_read_lock();
/* do not keep any additional per socket state, just signal
@@ -209,12 +201,14 @@ select_signal_address(struct pm_nl_pernet *pernet, unsigned int pos)
* can lead to additional addresses not being announced.
*/
list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
+ if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap))
+ continue;
+
if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL))
continue;
- if (i++ == pos) {
- ret = entry;
- break;
- }
+
+ ret = entry;
+ break;
}
rcu_read_unlock();
return ret;
@@ -256,12 +250,17 @@ unsigned int mptcp_pm_get_local_addr_max(struct mptcp_sock *msk)
}
EXPORT_SYMBOL_GPL(mptcp_pm_get_local_addr_max);
-static void check_work_pending(struct mptcp_sock *msk)
+bool mptcp_pm_nl_check_work_pending(struct mptcp_sock *msk)
{
- if (msk->pm.add_addr_signaled == mptcp_pm_get_add_addr_signal_max(msk) &&
- (msk->pm.local_addr_used == mptcp_pm_get_local_addr_max(msk) ||
- msk->pm.subflows == mptcp_pm_get_subflows_max(msk)))
+ struct pm_nl_pernet *pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id);
+
+ if (msk->pm.subflows == mptcp_pm_get_subflows_max(msk) ||
+ (find_next_and_bit(pernet->id_bitmap, msk->pm.id_avail_bitmap,
+ MPTCP_PM_MAX_ADDR_ID + 1, 0) == MPTCP_PM_MAX_ADDR_ID + 1)) {
WRITE_ONCE(msk->pm.work_pending, false);
+ return false;
+ }
+ return true;
}
struct mptcp_pm_add_entry *
@@ -430,6 +429,7 @@ static bool lookup_address_in_vec(struct mptcp_addr_info *addrs, unsigned int nr
static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullmesh,
struct mptcp_addr_info *addrs)
{
+ bool deny_id0 = READ_ONCE(msk->pm.remote_deny_join_id0);
struct sock *sk = (struct sock *)msk, *ssk;
struct mptcp_subflow_context *subflow;
struct mptcp_addr_info remote = { 0 };
@@ -437,22 +437,28 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullm
int i = 0;
subflows_max = mptcp_pm_get_subflows_max(msk);
+ remote_address((struct sock_common *)sk, &remote);
/* Non-fullmesh endpoint, fill in the single entry
* corresponding to the primary MPC subflow remote address
*/
if (!fullmesh) {
- remote_address((struct sock_common *)sk, &remote);
+ if (deny_id0)
+ return 0;
+
msk->pm.subflows++;
addrs[i++] = remote;
} else {
mptcp_for_each_subflow(msk, subflow) {
ssk = mptcp_subflow_tcp_sock(subflow);
- remote_address((struct sock_common *)ssk, &remote);
- if (!lookup_address_in_vec(addrs, i, &remote) &&
+ remote_address((struct sock_common *)ssk, &addrs[i]);
+ if (deny_id0 && addresses_equal(&addrs[i], &remote, false))
+ continue;
+
+ if (!lookup_address_in_vec(addrs, i, &addrs[i]) &&
msk->pm.subflows < subflows_max) {
msk->pm.subflows++;
- addrs[i++] = remote;
+ i++;
}
}
}
@@ -460,6 +466,35 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullm
return i;
}
+static struct mptcp_pm_addr_entry *
+__lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id)
+{
+ struct mptcp_pm_addr_entry *entry;
+
+ list_for_each_entry(entry, &pernet->local_addr_list, list) {
+ if (entry->addr.id == id)
+ return entry;
+ }
+ return NULL;
+}
+
+static int
+lookup_id_by_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *addr)
+{
+ struct mptcp_pm_addr_entry *entry;
+ int ret = -1;
+
+ rcu_read_lock();
+ list_for_each_entry(entry, &pernet->local_addr_list, list) {
+ if (addresses_equal(&entry->addr, addr, entry->addr.port)) {
+ ret = entry->addr.id;
+ break;
+ }
+ }
+ rcu_read_unlock();
+ return ret;
+}
+
static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
{
struct sock *sk = (struct sock *)msk;
@@ -475,6 +510,19 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
local_addr_max = mptcp_pm_get_local_addr_max(msk);
subflows_max = mptcp_pm_get_subflows_max(msk);
+ /* do lazy endpoint usage accounting for the MPC subflows */
+ if (unlikely(!(msk->pm.status & BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED))) && msk->first) {
+ struct mptcp_addr_info mpc_addr;
+ int mpc_id;
+
+ local_address((struct sock_common *)msk->first, &mpc_addr);
+ mpc_id = lookup_id_by_addr(pernet, &mpc_addr);
+ if (mpc_id >= 0)
+ __clear_bit(mpc_id, msk->pm.id_avail_bitmap);
+
+ msk->pm.status |= BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED);
+ }
+
pr_debug("local %d:%d signal %d:%d subflows %d:%d\n",
msk->pm.local_addr_used, local_addr_max,
msk->pm.add_addr_signaled, add_addr_signal_max,
@@ -482,47 +530,41 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
/* check first for announce */
if (msk->pm.add_addr_signaled < add_addr_signal_max) {
- local = select_signal_address(pernet,
- msk->pm.add_addr_signaled);
+ local = select_signal_address(pernet, msk);
if (local) {
if (mptcp_pm_alloc_anno_list(msk, local)) {
+ __clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
msk->pm.add_addr_signaled++;
mptcp_pm_announce_addr(msk, &local->addr, false);
mptcp_pm_nl_addr_send_ack(msk);
}
- } else {
- /* pick failed, avoid fourther attempts later */
- msk->pm.local_addr_used = add_addr_signal_max;
}
-
- check_work_pending(msk);
}
/* check if should create a new subflow */
- if (msk->pm.local_addr_used < local_addr_max &&
- msk->pm.subflows < subflows_max &&
- !READ_ONCE(msk->pm.remote_deny_join_id0)) {
+ while (msk->pm.local_addr_used < local_addr_max &&
+ msk->pm.subflows < subflows_max) {
+ struct mptcp_addr_info addrs[MPTCP_PM_ADDR_MAX];
+ bool fullmesh;
+ int i, nr;
+
local = select_local_address(pernet, msk);
- if (local) {
- bool fullmesh = !!(local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH);
- struct mptcp_addr_info addrs[MPTCP_PM_ADDR_MAX];
- int i, nr;
+ if (!local)
+ break;
- msk->pm.local_addr_used++;
- check_work_pending(msk);
- nr = fill_remote_addresses_vec(msk, fullmesh, addrs);
- spin_unlock_bh(&msk->pm.lock);
- for (i = 0; i < nr; i++)
- __mptcp_subflow_connect(sk, &local->addr, &addrs[i]);
- spin_lock_bh(&msk->pm.lock);
- return;
- }
+ fullmesh = !!(local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH);
- /* lookup failed, avoid fourther attempts later */
- msk->pm.local_addr_used = local_addr_max;
- check_work_pending(msk);
+ msk->pm.local_addr_used++;
+ nr = fill_remote_addresses_vec(msk, fullmesh, addrs);
+ if (nr)
+ __clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
+ spin_unlock_bh(&msk->pm.lock);
+ for (i = 0; i < nr; i++)
+ __mptcp_subflow_connect(sk, &local->addr, &addrs[i]);
+ spin_lock_bh(&msk->pm.lock);
}
+ mptcp_pm_nl_check_work_pending(msk);
}
static void mptcp_pm_nl_fully_established(struct mptcp_sock *msk)
@@ -552,7 +594,6 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
subflows_max = mptcp_pm_get_subflows_max(msk);
rcu_read_lock();
- __mptcp_flush_join_list(msk);
list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
if (!(entry->flags & MPTCP_PM_ADDR_FLAG_FULLMESH))
continue;
@@ -641,7 +682,6 @@ void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk)
!mptcp_pm_should_rm_signal(msk))
return;
- __mptcp_flush_join_list(msk);
subflow = list_first_entry_or_null(&msk->conn_list, typeof(*subflow), node);
if (subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
@@ -711,6 +751,8 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk,
return;
for (i = 0; i < rm_list->nr; i++) {
+ bool removed = false;
+
list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
int how = RCV_SHUTDOWN | SEND_SHUTDOWN;
@@ -727,18 +769,24 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk,
i, rm_list->ids[i], subflow->local_id, subflow->remote_id);
spin_unlock_bh(&msk->pm.lock);
mptcp_subflow_shutdown(sk, ssk, how);
+
+ /* the following takes care of updating the subflows counter */
mptcp_close_ssk(sk, ssk, subflow);
spin_lock_bh(&msk->pm.lock);
- if (rm_type == MPTCP_MIB_RMADDR) {
- msk->pm.add_addr_accepted--;
- WRITE_ONCE(msk->pm.accept_addr, true);
- } else if (rm_type == MPTCP_MIB_RMSUBFLOW) {
- msk->pm.local_addr_used--;
- }
- msk->pm.subflows--;
+ removed = true;
__MPTCP_INC_STATS(sock_net(sk), rm_type);
}
+ __set_bit(rm_list->ids[1], msk->pm.id_avail_bitmap);
+ if (!removed)
+ continue;
+
+ if (rm_type == MPTCP_MIB_RMADDR) {
+ msk->pm.add_addr_accepted--;
+ WRITE_ONCE(msk->pm.accept_addr, true);
+ } else if (rm_type == MPTCP_MIB_RMSUBFLOW) {
+ msk->pm.local_addr_used--;
+ }
}
}
@@ -759,6 +807,9 @@ void mptcp_pm_nl_work(struct mptcp_sock *msk)
msk_owned_by_me(msk);
+ if (!(pm->status & MPTCP_PM_WORK_MASK))
+ return;
+
spin_lock_bh(&msk->pm.lock);
pr_debug("msk=%p status=%x", msk, pm->status);
@@ -804,7 +855,7 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
/* to keep the code simple, don't do IDR-like allocation for address ID,
* just bail when we exceed limits
*/
- if (pernet->next_id == MAX_ADDR_ID)
+ if (pernet->next_id == MPTCP_PM_MAX_ADDR_ID)
pernet->next_id = 1;
if (pernet->addrs >= MPTCP_PM_ADDR_MAX)
goto out;
@@ -824,7 +875,7 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
if (!entry->addr.id) {
find_next:
entry->addr.id = find_next_zero_bit(pernet->id_bitmap,
- MAX_ADDR_ID + 1,
+ MPTCP_PM_MAX_ADDR_ID + 1,
pernet->next_id);
if (!entry->addr.id && pernet->next_id != 1) {
pernet->next_id = 1;
@@ -1191,18 +1242,6 @@ static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info)
return 0;
}
-static struct mptcp_pm_addr_entry *
-__lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id)
-{
- struct mptcp_pm_addr_entry *entry;
-
- list_for_each_entry(entry, &pernet->local_addr_list, list) {
- if (entry->addr.id == id)
- return entry;
- }
- return NULL;
-}
-
int mptcp_pm_get_flags_and_ifindex_by_id(struct net *net, unsigned int id,
u8 *flags, int *ifindex)
{
@@ -1461,7 +1500,7 @@ static int mptcp_nl_cmd_flush_addrs(struct sk_buff *skb, struct genl_info *info)
list_splice_init(&pernet->local_addr_list, &free_list);
__reset_counters(pernet);
pernet->next_id = 1;
- bitmap_zero(pernet->id_bitmap, MAX_ADDR_ID + 1);
+ bitmap_zero(pernet->id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
spin_unlock_bh(&pernet->lock);
mptcp_nl_remove_addrs_list(sock_net(skb->sk), &free_list);
synchronize_rcu();
@@ -1571,7 +1610,7 @@ static int mptcp_nl_cmd_dump_addrs(struct sk_buff *msg,
pernet = net_generic(net, pm_nl_pernet_id);
spin_lock_bh(&pernet->lock);
- for (i = id; i < MAX_ADDR_ID + 1; i++) {
+ for (i = id; i < MPTCP_PM_MAX_ADDR_ID + 1; i++) {
if (test_bit(i, pernet->id_bitmap)) {
entry = __lookup_addr_by_id(pernet, i);
if (!entry)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index df5a0cf431c1..62d418813503 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -763,7 +763,7 @@ static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
if (!sock_owned_by_user(sk))
__mptcp_error_report(sk);
else
- set_bit(MPTCP_ERROR_REPORT, &msk->flags);
+ __set_bit(MPTCP_ERROR_REPORT, &msk->cb_flags);
}
/* If the moves have caught up with the DATA_FIN sequence number
@@ -808,47 +808,38 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
mptcp_data_unlock(sk);
}
-static bool mptcp_do_flush_join_list(struct mptcp_sock *msk)
+static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
{
- struct mptcp_subflow_context *subflow;
- bool ret = false;
+ struct sock *sk = (struct sock *)msk;
- if (likely(list_empty(&msk->join_list)))
+ if (sk->sk_state != TCP_ESTABLISHED)
return false;
- spin_lock_bh(&msk->join_list_lock);
- list_for_each_entry(subflow, &msk->join_list, node) {
- u32 sseq = READ_ONCE(subflow->setsockopt_seq);
-
- mptcp_propagate_sndbuf((struct sock *)msk, mptcp_subflow_tcp_sock(subflow));
- if (READ_ONCE(msk->setsockopt_seq) != sseq)
- ret = true;
- }
- list_splice_tail_init(&msk->join_list, &msk->conn_list);
- spin_unlock_bh(&msk->join_list_lock);
-
- return ret;
-}
-
-void __mptcp_flush_join_list(struct mptcp_sock *msk)
-{
- if (likely(!mptcp_do_flush_join_list(msk)))
- return;
+ /* attach to msk socket only after we are sure we will deal with it
+ * at close time
+ */
+ if (sk->sk_socket && !ssk->sk_socket)
+ mptcp_sock_graft(ssk, sk->sk_socket);
- if (!test_and_set_bit(MPTCP_WORK_SYNC_SETSOCKOPT, &msk->flags))
- mptcp_schedule_work((struct sock *)msk);
+ mptcp_propagate_sndbuf((struct sock *)msk, ssk);
+ mptcp_sockopt_sync_locked(msk, ssk);
+ return true;
}
-static void mptcp_flush_join_list(struct mptcp_sock *msk)
+static void __mptcp_flush_join_list(struct sock *sk)
{
- bool sync_needed = test_and_clear_bit(MPTCP_WORK_SYNC_SETSOCKOPT, &msk->flags);
-
- might_sleep();
+ struct mptcp_subflow_context *tmp, *subflow;
+ struct mptcp_sock *msk = mptcp_sk(sk);
- if (!mptcp_do_flush_join_list(msk) && !sync_needed)
- return;
+ list_for_each_entry_safe(subflow, tmp, &msk->join_list, node) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ bool slow = lock_sock_fast(ssk);
- mptcp_sockopt_sync_all(msk);
+ list_move_tail(&subflow->node, &msk->conn_list);
+ if (!__mptcp_finish_join(msk, ssk))
+ mptcp_subflow_reset(ssk);
+ unlock_sock_fast(ssk, slow);
+ }
}
static bool mptcp_timer_pending(struct sock *sk)
@@ -1526,9 +1517,8 @@ static void mptcp_update_post_push(struct mptcp_sock *msk,
void mptcp_check_and_set_pending(struct sock *sk)
{
- if (mptcp_send_head(sk) &&
- !test_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags))
- set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags);
+ if (mptcp_send_head(sk))
+ mptcp_sk(sk)->push_pending |= BIT(MPTCP_PUSH_PENDING);
}
void __mptcp_push_pending(struct sock *sk, unsigned int flags)
@@ -1549,7 +1539,6 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
int ret = 0;
prev_ssk = ssk;
- __mptcp_flush_join_list(msk);
ssk = mptcp_subflow_get_send(msk);
/* First check. If the ssk has changed since
@@ -1954,7 +1943,6 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk)
unsigned int moved = 0;
bool ret, done;
- mptcp_flush_join_list(msk);
do {
struct sock *ssk = mptcp_subflow_recv_lookup(msk);
bool slowpath;
@@ -2145,7 +2133,7 @@ static void mptcp_retransmit_timer(struct timer_list *t)
mptcp_schedule_work(sk);
} else {
/* delegate our work to tcp_release_cb() */
- set_bit(MPTCP_RETRANSMIT, &msk->flags);
+ __set_bit(MPTCP_RETRANSMIT, &msk->cb_flags);
}
bh_unlock_sock(sk);
sock_put(sk);
@@ -2253,6 +2241,10 @@ bool __mptcp_retransmit_pending_data(struct sock *sk)
return true;
}
+/* flags for __mptcp_close_ssk() */
+#define MPTCP_CF_PUSH BIT(1)
+#define MPTCP_CF_FASTCLOSE BIT(2)
+
/* subflow sockets can be either outgoing (connect) or incoming
* (accept).
*
@@ -2262,22 +2254,37 @@ bool __mptcp_retransmit_pending_data(struct sock *sk)
* parent socket.
*/
static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
- struct mptcp_subflow_context *subflow)
+ struct mptcp_subflow_context *subflow,
+ unsigned int flags)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- bool need_push;
+ bool need_push, dispose_it;
- list_del(&subflow->node);
+ dispose_it = !msk->subflow || ssk != msk->subflow->sk;
+ if (dispose_it)
+ list_del(&subflow->node);
lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
+ if (flags & MPTCP_CF_FASTCLOSE)
+ subflow->send_fastclose = 1;
+
+ need_push = (flags & MPTCP_CF_PUSH) && __mptcp_retransmit_pending_data(sk);
+ if (!dispose_it) {
+ tcp_disconnect(ssk, 0);
+ msk->subflow->state = SS_UNCONNECTED;
+ mptcp_subflow_ctx_reset(subflow);
+ release_sock(ssk);
+
+ goto out;
+ }
+
/* if we are invoked by the msk cleanup code, the subflow is
* already orphaned
*/
if (ssk->sk_socket)
sock_orphan(ssk);
- need_push = __mptcp_retransmit_pending_data(sk);
subflow->disposable = 1;
/* if ssk hit tcp_done(), tcp_cleanup_ulp() cleared the related ops
@@ -2297,14 +2304,12 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
sock_put(ssk);
- if (ssk == msk->last_snd)
- msk->last_snd = NULL;
-
if (ssk == msk->first)
msk->first = NULL;
- if (msk->subflow && ssk == msk->subflow->sk)
- mptcp_dispose_initial_subflow(msk);
+out:
+ if (ssk == msk->last_snd)
+ msk->last_snd = NULL;
if (need_push)
__mptcp_push_pending(sk, 0);
@@ -2315,7 +2320,13 @@ void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
{
if (sk->sk_state == TCP_ESTABLISHED)
mptcp_event(MPTCP_EVENT_SUB_CLOSED, mptcp_sk(sk), ssk, GFP_KERNEL);
- __mptcp_close_ssk(sk, ssk, subflow);
+
+ /* subflow aborted before reaching the fully_established status
+ * attempt the creation of the next subflow
+ */
+ mptcp_pm_subflow_check_next(mptcp_sk(sk), ssk, subflow);
+
+ __mptcp_close_ssk(sk, ssk, subflow, MPTCP_CF_PUSH);
}
static unsigned int mptcp_sync_mss(struct sock *sk, u32 pmtu)
@@ -2467,12 +2478,10 @@ static void mptcp_worker(struct work_struct *work)
goto unlock;
mptcp_check_data_fin_ack(sk);
- mptcp_flush_join_list(msk);
mptcp_check_fastclose(msk);
- if (msk->pm.status)
- mptcp_pm_nl_work(msk);
+ mptcp_pm_nl_work(msk);
if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags))
mptcp_check_for_eof(msk);
@@ -2506,8 +2515,6 @@ static int __mptcp_init_sock(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- spin_lock_init(&msk->join_list_lock);
-
INIT_LIST_HEAD(&msk->conn_list);
INIT_LIST_HEAD(&msk->join_list);
INIT_LIST_HEAD(&msk->rtx_queue);
@@ -2533,9 +2540,20 @@ static int __mptcp_init_sock(struct sock *sk)
return 0;
}
-static int mptcp_init_sock(struct sock *sk)
+static void mptcp_ca_reset(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
+
+ tcp_assign_congestion_control(sk);
+ strcpy(mptcp_sk(sk)->ca_name, icsk->icsk_ca_ops->name);
+
+ /* no need to keep a reference to the ops, the name will suffice */
+ tcp_cleanup_congestion_control(sk);
+ icsk->icsk_ca_ops = NULL;
+}
+
+static int mptcp_init_sock(struct sock *sk)
+{
struct net *net = sock_net(sk);
int ret;
@@ -2556,12 +2574,7 @@ static int mptcp_init_sock(struct sock *sk)
/* fetch the ca name; do it outside __mptcp_init_sock(), so that clone will
* propagate the correct value
*/
- tcp_assign_congestion_control(sk);
- strcpy(mptcp_sk(sk)->ca_name, icsk->icsk_ca_ops->name);
-
- /* no need to keep a reference to the ops, the name will suffice */
- tcp_cleanup_congestion_control(sk);
- icsk->icsk_ca_ops = NULL;
+ mptcp_ca_reset(sk);
sk_sockets_allocated_inc(sk);
sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
@@ -2666,6 +2679,7 @@ static void __mptcp_check_send_data_fin(struct sock *sk)
* state now
*/
if (__mptcp_check_fallback(msk)) {
+ WRITE_ONCE(msk->snd_una, msk->write_seq);
if ((1 << sk->sk_state) & (TCPF_CLOSING | TCPF_LAST_ACK)) {
inet_sk_state_store(sk, TCP_CLOSE);
mptcp_close_wake_up(sk);
@@ -2674,7 +2688,6 @@ static void __mptcp_check_send_data_fin(struct sock *sk)
}
}
- mptcp_flush_join_list(msk);
mptcp_for_each_subflow(msk, subflow) {
struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow);
@@ -2707,21 +2720,20 @@ static void __mptcp_destroy_sock(struct sock *sk)
might_sleep();
- /* be sure to always acquire the join list lock, to sync vs
- * mptcp_finish_join().
- */
- spin_lock_bh(&msk->join_list_lock);
- list_splice_tail_init(&msk->join_list, &msk->conn_list);
- spin_unlock_bh(&msk->join_list_lock);
+ /* join list will be eventually flushed (with rst) at sock lock release time*/
list_splice_init(&msk->conn_list, &conn_list);
sk_stop_timer(sk, &msk->sk.icsk_retransmit_timer);
sk_stop_timer(sk, &sk->sk_timer);
msk->pm.status = 0;
+ /* clears msk->subflow, allowing the following loop to close
+ * even the initial subflow
+ */
+ mptcp_dispose_initial_subflow(msk);
list_for_each_entry_safe(subflow, tmp, &conn_list, node) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
- __mptcp_close_ssk(sk, ssk, subflow);
+ __mptcp_close_ssk(sk, ssk, subflow, 0);
}
sk->sk_prot->destroy(sk);
@@ -2732,7 +2744,6 @@ static void __mptcp_destroy_sock(struct sock *sk)
xfrm_sk_free_policy(sk);
sk_refcnt_debug_release(sk);
- mptcp_dispose_initial_subflow(msk);
sock_put(sk);
}
@@ -2768,6 +2779,9 @@ cleanup:
sock_hold(sk);
pr_debug("msk=%p state=%d", sk, sk->sk_state);
+ if (mptcp_sk(sk)->token)
+ mptcp_event(MPTCP_EVENT_CLOSED, mptcp_sk(sk), NULL, GFP_KERNEL);
+
if (sk->sk_state == TCP_CLOSE) {
__mptcp_destroy_sock(sk);
do_cancel_work = true;
@@ -2778,9 +2792,6 @@ cleanup:
if (do_cancel_work)
mptcp_cancel_work(sk);
- if (mptcp_sk(sk)->token)
- mptcp_event(MPTCP_EVENT_CLOSED, mptcp_sk(sk), NULL, GFP_KERNEL);
-
sock_put(sk);
}
@@ -2812,15 +2823,38 @@ static int mptcp_disconnect(struct sock *sk, int flags)
struct mptcp_subflow_context *subflow;
struct mptcp_sock *msk = mptcp_sk(sk);
- mptcp_do_flush_join_list(msk);
+ inet_sk_state_store(sk, TCP_CLOSE);
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
- lock_sock(ssk);
- tcp_disconnect(ssk, flags);
- release_sock(ssk);
+ __mptcp_close_ssk(sk, ssk, subflow, MPTCP_CF_FASTCLOSE);
}
+
+ sk_stop_timer(sk, &msk->sk.icsk_retransmit_timer);
+ sk_stop_timer(sk, &sk->sk_timer);
+
+ if (mptcp_sk(sk)->token)
+ mptcp_event(MPTCP_EVENT_CLOSED, mptcp_sk(sk), NULL, GFP_KERNEL);
+
+ mptcp_destroy_common(msk);
+ msk->last_snd = NULL;
+ WRITE_ONCE(msk->flags, 0);
+ msk->cb_flags = 0;
+ msk->push_pending = 0;
+ msk->recovery = false;
+ msk->can_ack = false;
+ msk->fully_established = false;
+ msk->rcv_data_fin = false;
+ msk->snd_data_fin_enable = false;
+ msk->rcv_fastclose = false;
+ msk->use_64bit_ack = false;
+ WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
+ mptcp_pm_data_reset(msk);
+ mptcp_ca_reset(sk);
+
+ sk->sk_shutdown = 0;
+ sk_error_report(sk);
return 0;
}
@@ -2960,9 +2994,11 @@ void mptcp_destroy_common(struct mptcp_sock *msk)
__mptcp_clear_xmit(sk);
/* move to sk_receive_queue, sk_stream_kill_queues will purge it */
+ mptcp_data_lock(sk);
skb_queue_splice_tail_init(&msk->receive_queue, &sk->sk_receive_queue);
__skb_queue_purge(&sk->sk_receive_queue);
skb_rbtree_purge(&msk->out_of_order_queue);
+ mptcp_data_unlock(sk);
/* move all the rx fwd alloc into the sk_mem_reclaim_final in
* inet_sock_destruct() will dispose it
@@ -2986,7 +3022,7 @@ void __mptcp_data_acked(struct sock *sk)
if (!sock_owned_by_user(sk))
__mptcp_clean_una(sk);
else
- set_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->flags);
+ __set_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->cb_flags);
if (mptcp_pending_data_fin_ack(sk))
mptcp_schedule_work(sk);
@@ -3005,20 +3041,23 @@ void __mptcp_check_push(struct sock *sk, struct sock *ssk)
else if (xmit_ssk)
mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk), MPTCP_DELEGATE_SEND);
} else {
- set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags);
+ __set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->cb_flags);
}
}
+#define MPTCP_FLAGS_PROCESS_CTX_NEED (BIT(MPTCP_PUSH_PENDING) | \
+ BIT(MPTCP_RETRANSMIT) | \
+ BIT(MPTCP_FLUSH_JOIN_LIST))
+
/* processes deferred events and flush wmem */
static void mptcp_release_cb(struct sock *sk)
+ __must_hold(&sk->sk_lock.slock)
{
- for (;;) {
- unsigned long flags = 0;
+ struct mptcp_sock *msk = mptcp_sk(sk);
- if (test_and_clear_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags))
- flags |= BIT(MPTCP_PUSH_PENDING);
- if (test_and_clear_bit(MPTCP_RETRANSMIT, &mptcp_sk(sk)->flags))
- flags |= BIT(MPTCP_RETRANSMIT);
+ for (;;) {
+ unsigned long flags = (msk->cb_flags & MPTCP_FLAGS_PROCESS_CTX_NEED) |
+ msk->push_pending;
if (!flags)
break;
@@ -3029,8 +3068,11 @@ static void mptcp_release_cb(struct sock *sk)
* datapath acquires the msk socket spinlock while helding
* the subflow socket lock
*/
-
+ msk->push_pending = 0;
+ msk->cb_flags &= ~flags;
spin_unlock_bh(&sk->sk_lock.slock);
+ if (flags & BIT(MPTCP_FLUSH_JOIN_LIST))
+ __mptcp_flush_join_list(sk);
if (flags & BIT(MPTCP_PUSH_PENDING))
__mptcp_push_pending(sk, 0);
if (flags & BIT(MPTCP_RETRANSMIT))
@@ -3043,11 +3085,11 @@ static void mptcp_release_cb(struct sock *sk)
/* be sure to set the current sk state before tacking actions
* depending on sk_state
*/
- if (test_and_clear_bit(MPTCP_CONNECTED, &mptcp_sk(sk)->flags))
+ if (__test_and_clear_bit(MPTCP_CONNECTED, &msk->cb_flags))
__mptcp_set_connected(sk);
- if (test_and_clear_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->flags))
+ if (__test_and_clear_bit(MPTCP_CLEAN_UNA, &msk->cb_flags))
__mptcp_clean_una_wakeup(sk);
- if (test_and_clear_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->flags))
+ if (__test_and_clear_bit(MPTCP_ERROR_REPORT, &msk->cb_flags))
__mptcp_error_report(sk);
__mptcp_update_rmem(sk);
@@ -3089,7 +3131,7 @@ void mptcp_subflow_process_delegated(struct sock *ssk)
if (!sock_owned_by_user(sk))
__mptcp_subflow_push_pending(sk, ssk);
else
- set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags);
+ __set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->cb_flags);
mptcp_data_unlock(sk);
mptcp_subflow_delegated_done(subflow, MPTCP_DELEGATE_SEND);
}
@@ -3175,8 +3217,7 @@ bool mptcp_finish_join(struct sock *ssk)
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
struct sock *parent = (void *)msk;
- struct socket *parent_sock;
- bool ret;
+ bool ret = true;
pr_debug("msk=%p, subflow=%p", msk, subflow);
@@ -3189,35 +3230,38 @@ bool mptcp_finish_join(struct sock *ssk)
if (!msk->pm.server_side)
goto out;
- if (!mptcp_pm_allow_new_subflow(msk)) {
- subflow->reset_reason = MPTCP_RST_EPROHIBIT;
- return false;
- }
+ if (!mptcp_pm_allow_new_subflow(msk))
+ goto err_prohibited;
+
+ if (WARN_ON_ONCE(!list_empty(&subflow->node)))
+ goto err_prohibited;
- /* active connections are already on conn_list, and we can't acquire
- * msk lock here.
- * use the join list lock as synchronization point and double-check
- * msk status to avoid racing with __mptcp_destroy_sock()
+ /* active connections are already on conn_list.
+ * If we can't acquire msk socket lock here, let the release callback
+ * handle it
*/
- spin_lock_bh(&msk->join_list_lock);
- ret = inet_sk_state_load(parent) == TCP_ESTABLISHED;
- if (ret && !WARN_ON_ONCE(!list_empty(&subflow->node))) {
- list_add_tail(&subflow->node, &msk->join_list);
+ mptcp_data_lock(parent);
+ if (!sock_owned_by_user(parent)) {
+ ret = __mptcp_finish_join(msk, ssk);
+ if (ret) {
+ sock_hold(ssk);
+ list_add_tail(&subflow->node, &msk->conn_list);
+ }
+ } else {
sock_hold(ssk);
+ list_add_tail(&subflow->node, &msk->join_list);
+ __set_bit(MPTCP_FLUSH_JOIN_LIST, &msk->cb_flags);
}
- spin_unlock_bh(&msk->join_list_lock);
+ mptcp_data_unlock(parent);
+
if (!ret) {
+err_prohibited:
subflow->reset_reason = MPTCP_RST_EPROHIBIT;
return false;
}
- /* attach to msk socket only after we are sure he will deal with us
- * at close time
- */
- parent_sock = READ_ONCE(parent->sk_socket);
- if (parent_sock && !ssk->sk_socket)
- mptcp_sock_graft(ssk, parent_sock);
subflow->map_seq = READ_ONCE(msk->ack_seq);
+
out:
mptcp_event(MPTCP_EVENT_SUB_ESTABLISHED, msk, ssk, GFP_ATOMIC);
return true;
@@ -3352,9 +3396,20 @@ static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
struct mptcp_sock *msk = mptcp_sk(sock->sk);
struct mptcp_subflow_context *subflow;
struct socket *ssock;
- int err;
+ int err = -EINVAL;
lock_sock(sock->sk);
+ if (uaddr) {
+ if (addr_len < sizeof(uaddr->sa_family))
+ goto unlock;
+
+ if (uaddr->sa_family == AF_UNSPEC) {
+ err = mptcp_disconnect(sock->sk, flags);
+ sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
+ goto unlock;
+ }
+ }
+
if (sock->state != SS_UNCONNECTED && msk->subflow) {
/* pending connection or invalid state, let existing subflow
* cope with that
@@ -3364,10 +3419,8 @@ static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
}
ssock = __mptcp_nmpc_socket(msk);
- if (!ssock) {
- err = -EINVAL;
+ if (!ssock)
goto unlock;
- }
mptcp_token_destroy(msk);
inet_sk_state_store(sock->sk, TCP_SYN_SENT);
@@ -3441,17 +3494,9 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
pr_debug("msk=%p", msk);
- lock_sock(sock->sk);
- if (sock->sk->sk_state != TCP_LISTEN)
- goto unlock_fail;
-
ssock = __mptcp_nmpc_socket(msk);
if (!ssock)
- goto unlock_fail;
-
- clear_bit(MPTCP_DATA_READY, &msk->flags);
- sock_hold(ssock->sk);
- release_sock(sock->sk);
+ return -EINVAL;
err = ssock->ops->accept(sock, newsock, flags, kern);
if (err == 0 && !mptcp_is_tcpsk(newsock->sk)) {
@@ -3481,7 +3526,6 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
/* set ssk->sk_socket of accept()ed flows to mptcp socket.
* This is needed so NOSPACE flag can be set from tcp stack.
*/
- mptcp_flush_join_list(msk);
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
@@ -3491,14 +3535,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
release_sock(newsk);
}
- if (inet_csk_listen_poll(ssock->sk))
- set_bit(MPTCP_DATA_READY, &msk->flags);
- sock_put(ssock->sk);
return err;
-
-unlock_fail:
- release_sock(sock->sk);
- return -EINVAL;
}
static __poll_t mptcp_check_readable(struct mptcp_sock *msk)
@@ -3544,8 +3581,12 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
state = inet_sk_state_load(sk);
pr_debug("msk=%p state=%d flags=%lx", msk, state, msk->flags);
- if (state == TCP_LISTEN)
- return test_bit(MPTCP_DATA_READY, &msk->flags) ? EPOLLIN | EPOLLRDNORM : 0;
+ if (state == TCP_LISTEN) {
+ if (WARN_ON_ONCE(!msk->subflow || !msk->subflow->sk))
+ return 0;
+
+ return inet_csk_listen_poll(msk->subflow->sk);
+ }
if (state != TCP_SYN_SENT && state != TCP_SYN_RECV) {
mask |= mptcp_check_readable(msk);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 0486c9f5b38b..0e6b42c76ea0 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -110,19 +110,20 @@
/* MPTCP TCPRST flags */
#define MPTCP_RST_TRANSIENT BIT(0)
-/* MPTCP socket flags */
-#define MPTCP_DATA_READY 0
+/* MPTCP socket atomic flags */
#define MPTCP_NOSPACE 1
#define MPTCP_WORK_RTX 2
#define MPTCP_WORK_EOF 3
#define MPTCP_FALLBACK_DONE 4
#define MPTCP_WORK_CLOSE_SUBFLOW 5
-#define MPTCP_PUSH_PENDING 6
-#define MPTCP_CLEAN_UNA 7
-#define MPTCP_ERROR_REPORT 8
-#define MPTCP_RETRANSMIT 9
-#define MPTCP_WORK_SYNC_SETSOCKOPT 10
-#define MPTCP_CONNECTED 11
+
+/* MPTCP socket release cb flags */
+#define MPTCP_PUSH_PENDING 1
+#define MPTCP_CLEAN_UNA 2
+#define MPTCP_ERROR_REPORT 3
+#define MPTCP_RETRANSMIT 4
+#define MPTCP_FLUSH_JOIN_LIST 5
+#define MPTCP_CONNECTED 6
static inline bool before64(__u64 seq1, __u64 seq2)
{
@@ -174,16 +175,25 @@ enum mptcp_pm_status {
MPTCP_PM_ADD_ADDR_SEND_ACK,
MPTCP_PM_RM_ADDR_RECEIVED,
MPTCP_PM_ESTABLISHED,
- MPTCP_PM_ALREADY_ESTABLISHED, /* persistent status, set after ESTABLISHED event */
MPTCP_PM_SUBFLOW_ESTABLISHED,
+ MPTCP_PM_ALREADY_ESTABLISHED, /* persistent status, set after ESTABLISHED event */
+ MPTCP_PM_MPC_ENDPOINT_ACCOUNTED /* persistent status, set after MPC local address is
+ * accounted int id_avail_bitmap
+ */
};
+/* Status bits below MPTCP_PM_ALREADY_ESTABLISHED need pm worker actions */
+#define MPTCP_PM_WORK_MASK ((1 << MPTCP_PM_ALREADY_ESTABLISHED) - 1)
+
enum mptcp_addr_signal_status {
MPTCP_ADD_ADDR_SIGNAL,
MPTCP_ADD_ADDR_ECHO,
MPTCP_RM_ADDR_SIGNAL,
};
+/* max value of mptcp_addr_info.id */
+#define MPTCP_PM_MAX_ADDR_ID U8_MAX
+
struct mptcp_pm_data {
struct mptcp_addr_info local;
struct mptcp_addr_info remote;
@@ -202,6 +212,7 @@ struct mptcp_pm_data {
u8 local_addr_used;
u8 subflows;
u8 status;
+ DECLARE_BITMAP(id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
struct mptcp_rm_list rm_list_tx;
struct mptcp_rm_list rm_list_rx;
};
@@ -241,6 +252,8 @@ struct mptcp_sock {
u32 token;
int rmem_released;
unsigned long flags;
+ unsigned long cb_flags;
+ unsigned long push_pending;
bool recovery; /* closing subflow write queue reinjected */
bool can_ack;
bool fully_established;
@@ -252,7 +265,6 @@ struct mptcp_sock {
u8 recvmsg_inq:1,
cork:1,
nodelay:1;
- spinlock_t join_list_lock;
struct work_struct work;
struct sk_buff *ooo_last_skb;
struct rb_root out_of_order_queue;
@@ -395,6 +407,9 @@ DECLARE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions);
/* MPTCP subflow context */
struct mptcp_subflow_context {
struct list_head node;/* conn_list of subflows */
+
+ char reset_start[0];
+
unsigned long avg_pacing_rate; /* protected by msk socket lock */
u64 local_key;
u64 remote_key;
@@ -423,6 +438,7 @@ struct mptcp_subflow_context {
backup : 1,
send_mp_prio : 1,
send_mp_fail : 1,
+ send_fastclose : 1,
rx_eof : 1,
can_ack : 1, /* only after processing the remote a key */
disposable : 1, /* ctx can be free at ulp release time */
@@ -441,6 +457,9 @@ struct mptcp_subflow_context {
u8 stale_count;
long delegated_status;
+
+ char reset_end[0];
+
struct list_head delegated_node; /* link into delegated_action, protected by local BH */
u32 setsockopt_seq;
@@ -472,6 +491,13 @@ mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow)
return subflow->tcp_sock;
}
+static inline void
+mptcp_subflow_ctx_reset(struct mptcp_subflow_context *subflow)
+{
+ memset(subflow->reset_start, 0, subflow->reset_end - subflow->reset_start);
+ subflow->request_mptcp = 1;
+}
+
static inline u64
mptcp_subflow_get_map_offset(const struct mptcp_subflow_context *subflow)
{
@@ -486,15 +512,6 @@ mptcp_subflow_get_mapped_dsn(const struct mptcp_subflow_context *subflow)
return subflow->map_seq + mptcp_subflow_get_map_offset(subflow);
}
-static inline void mptcp_add_pending_subflow(struct mptcp_sock *msk,
- struct mptcp_subflow_context *subflow)
-{
- sock_hold(mptcp_subflow_tcp_sock(subflow));
- spin_lock_bh(&msk->join_list_lock);
- list_add_tail(&subflow->node, &msk->join_list);
- spin_unlock_bh(&msk->join_list_lock);
-}
-
void mptcp_subflow_process_delegated(struct sock *ssk);
static inline void mptcp_subflow_delegate(struct mptcp_subflow_context *subflow, int action)
@@ -659,7 +676,6 @@ void __mptcp_data_acked(struct sock *sk);
void __mptcp_error_report(struct sock *sk);
void mptcp_subflow_eof(struct sock *sk);
bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit);
-void __mptcp_flush_join_list(struct mptcp_sock *msk);
static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk)
{
return READ_ONCE(msk->snd_data_fin_enable) &&
@@ -709,9 +725,11 @@ void mptcp_token_destroy(struct mptcp_sock *msk);
void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn);
void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac);
+u16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum);
void __init mptcp_pm_init(void);
void mptcp_pm_data_init(struct mptcp_sock *msk);
+void mptcp_pm_data_reset(struct mptcp_sock *msk);
void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk);
void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk);
void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side);
@@ -719,7 +737,9 @@ void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk,
bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk);
void mptcp_pm_connection_closed(struct mptcp_sock *msk);
void mptcp_pm_subflow_established(struct mptcp_sock *msk);
-void mptcp_pm_subflow_closed(struct mptcp_sock *msk, u8 id);
+bool mptcp_pm_nl_check_work_pending(struct mptcp_sock *msk);
+void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk,
+ const struct mptcp_subflow_context *subflow);
void mptcp_pm_add_addr_received(struct mptcp_sock *msk,
const struct mptcp_addr_info *addr);
void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk,
@@ -816,7 +836,7 @@ unsigned int mptcp_pm_get_subflows_max(struct mptcp_sock *msk);
unsigned int mptcp_pm_get_local_addr_max(struct mptcp_sock *msk);
void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk);
-void mptcp_sockopt_sync_all(struct mptcp_sock *msk);
+void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk);
static inline struct mptcp_ext *mptcp_get_ext(const struct sk_buff *skb)
{
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index aa3fcd86dbe2..dacf3cee0027 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -1285,27 +1285,15 @@ void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk)
}
}
-void mptcp_sockopt_sync_all(struct mptcp_sock *msk)
+void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk)
{
- struct mptcp_subflow_context *subflow;
- struct sock *sk = (struct sock *)msk;
- u32 seq;
-
- seq = sockopt_seq_reset(sk);
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
- mptcp_for_each_subflow(msk, subflow) {
- struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
- u32 sseq = READ_ONCE(subflow->setsockopt_seq);
+ msk_owned_by_me(msk);
- if (sseq != msk->setsockopt_seq) {
- __mptcp_sockopt_sync(msk, ssk);
- WRITE_ONCE(subflow->setsockopt_seq, seq);
- } else if (sseq != seq) {
- WRITE_ONCE(subflow->setsockopt_seq, seq);
- }
+ if (READ_ONCE(subflow->setsockopt_seq) != msk->setsockopt_seq) {
+ sync_socket_options(msk, ssk);
- cond_resched();
+ subflow->setsockopt_seq = msk->setsockopt_seq;
}
-
- msk->setsockopt_seq = seq;
}
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 24bc9d5e87be..bea47a1180dc 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -388,7 +388,7 @@ static void mptcp_set_connected(struct sock *sk)
if (!sock_owned_by_user(sk))
__mptcp_set_connected(sk);
else
- set_bit(MPTCP_CONNECTED, &mptcp_sk(sk)->flags);
+ __set_bit(MPTCP_CONNECTED, &mptcp_sk(sk)->cb_flags);
mptcp_data_unlock(sk);
}
@@ -845,9 +845,8 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *
bool csum_reqd)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
- struct csum_pseudo_header header;
u32 offset, seq, delta;
- __wsum csum;
+ u16 csum;
int len;
if (!csum_reqd)
@@ -908,13 +907,11 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *
* while the pseudo header requires the original DSS data len,
* including that
*/
- header.data_seq = cpu_to_be64(subflow->map_seq);
- header.subflow_seq = htonl(subflow->map_subflow_seq);
- header.data_len = htons(subflow->map_data_len + subflow->map_data_fin);
- header.csum = 0;
-
- csum = csum_partial(&header, sizeof(header), subflow->map_data_csum);
- if (unlikely(csum_fold(csum))) {
+ csum = __mptcp_make_csum(subflow->map_seq,
+ subflow->map_subflow_seq,
+ subflow->map_data_len + subflow->map_data_fin,
+ subflow->map_data_csum);
+ if (unlikely(csum)) {
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR);
subflow->send_mp_fail = 1;
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPFAILTX);
@@ -1274,7 +1271,7 @@ static void subflow_error_report(struct sock *ssk)
if (!sock_owned_by_user(sk))
__mptcp_error_report(sk);
else
- set_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->flags);
+ __set_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->cb_flags);
mptcp_data_unlock(sk);
}
@@ -1293,7 +1290,6 @@ static void subflow_data_ready(struct sock *sk)
if (reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue))
return;
- set_bit(MPTCP_DATA_READY, &msk->flags);
parent->sk_data_ready(parent);
return;
}
@@ -1442,7 +1438,8 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
subflow->request_bkup = !!(flags & MPTCP_PM_ADDR_FLAG_BACKUP);
mptcp_info2sockaddr(remote, &addr, ssk->sk_family);
- mptcp_add_pending_subflow(msk, subflow);
+ sock_hold(ssk);
+ list_add_tail(&subflow->node, &msk->conn_list);
err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK);
if (err && err != -EINPROGRESS)
goto failed_unlink;
@@ -1453,9 +1450,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
return err;
failed_unlink:
- spin_lock_bh(&msk->join_list_lock);
list_del(&subflow->node);
- spin_unlock_bh(&msk->join_list_lock);
sock_put(mptcp_subflow_tcp_sock(subflow));
failed:
diff --git a/net/mptcp/token.c b/net/mptcp/token.c
index e581b341c5be..f52ee7b26aed 100644
--- a/net/mptcp/token.c
+++ b/net/mptcp/token.c
@@ -384,6 +384,7 @@ void mptcp_token_destroy(struct mptcp_sock *msk)
bucket->chain_len--;
}
spin_unlock_bh(&bucket->lock);
+ WRITE_ONCE(msk->token, 0);
}
void __init mptcp_token_init(void)
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 9bbe7282efb6..5bd409ab4cc2 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -3162,12 +3162,10 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
__be16 proto)
{
struct packet_sock *po = pkt_sk(sk);
- struct net_device *dev_curr;
- __be16 proto_curr;
- bool need_rehook;
struct net_device *dev = NULL;
- int ret = 0;
bool unlisted = false;
+ bool need_rehook;
+ int ret = 0;
lock_sock(sk);
spin_lock(&po->bind_lock);
@@ -3192,14 +3190,10 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
}
}
- dev_hold(dev);
-
- proto_curr = po->prot_hook.type;
- dev_curr = po->prot_hook.dev;
-
- need_rehook = proto_curr != proto || dev_curr != dev;
+ need_rehook = po->prot_hook.type != proto || po->prot_hook.dev != dev;
if (need_rehook) {
+ dev_hold(dev);
if (po->running) {
rcu_read_unlock();
/* prevents packet_notifier() from calling
@@ -3208,7 +3202,6 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
WRITE_ONCE(po->num, 0);
__unregister_prot_hook(sk, true);
rcu_read_lock();
- dev_curr = po->prot_hook.dev;
if (dev)
unlisted = !dev_get_by_index_rcu(sock_net(sk),
dev->ifindex);
@@ -3218,25 +3211,21 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
WRITE_ONCE(po->num, proto);
po->prot_hook.type = proto;
- dev_put_track(dev_curr, &po->prot_hook.dev_tracker);
- dev_curr = NULL;
+ dev_put_track(po->prot_hook.dev, &po->prot_hook.dev_tracker);
if (unlikely(unlisted)) {
- dev_put(dev);
po->prot_hook.dev = NULL;
WRITE_ONCE(po->ifindex, -1);
packet_cached_dev_reset(po);
} else {
- if (dev)
- netdev_tracker_alloc(dev,
- &po->prot_hook.dev_tracker,
- GFP_ATOMIC);
+ dev_hold_track(dev, &po->prot_hook.dev_tracker,
+ GFP_ATOMIC);
po->prot_hook.dev = dev;
WRITE_ONCE(po->ifindex, dev ? dev->ifindex : 0);
packet_cached_dev_assign(po, dev);
}
+ dev_put(dev);
}
- dev_put_track(dev_curr, &po->prot_hook.dev_tracker);
if (proto == 0 || !need_rehook)
goto out_unlock;
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 857aaebd49f4..a43a58a73d09 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -2342,9 +2342,7 @@ static int cake_config_precedence(struct Qdisc *sch)
/* List of known Diffserv codepoints:
*
- * Least Effort (CS1, LE)
- * Best Effort (CS0)
- * Max Reliability & LLT "Lo" (TOS1)
+ * Default Forwarding (DF/CS0) - Best Effort
* Max Throughput (TOS2)
* Min Delay (TOS4)
* LLT "La" (TOS5)
@@ -2352,6 +2350,7 @@ static int cake_config_precedence(struct Qdisc *sch)
* Assured Forwarding 2 (AF2x) - x3
* Assured Forwarding 3 (AF3x) - x3
* Assured Forwarding 4 (AF4x) - x3
+ * Precedence Class 1 (CS1)
* Precedence Class 2 (CS2)
* Precedence Class 3 (CS3)
* Precedence Class 4 (CS4)
@@ -2360,8 +2359,9 @@ static int cake_config_precedence(struct Qdisc *sch)
* Precedence Class 7 (CS7)
* Voice Admit (VA)
* Expedited Forwarding (EF)
-
- * Total 25 codepoints.
+ * Lower Effort (LE)
+ *
+ * Total 26 codepoints.
*/
/* List of traffic classes in RFC 4594, updated by RFC 8622:
@@ -2375,12 +2375,12 @@ static int cake_config_precedence(struct Qdisc *sch)
* Realtime Interactive (CS4) - eg. games
* Multimedia Streaming (AF3x) - eg. YouTube, NetFlix, Twitch
* Broadcast Video (CS3)
- * Low Latency Data (AF2x,TOS4) - eg. database
- * Ops, Admin, Management (CS2,TOS1) - eg. ssh
- * Standard Service (CS0 & unrecognised codepoints)
- * High Throughput Data (AF1x,TOS2) - eg. web traffic
- * Low Priority Data (CS1,LE) - eg. BitTorrent
-
+ * Low-Latency Data (AF2x,TOS4) - eg. database
+ * Ops, Admin, Management (CS2) - eg. ssh
+ * Standard Service (DF & unrecognised codepoints)
+ * High-Throughput Data (AF1x,TOS2) - eg. web traffic
+ * Low-Priority Data (LE,CS1) - eg. BitTorrent
+ *
* Total 12 traffic classes.
*/
@@ -2390,12 +2390,12 @@ static int cake_config_diffserv8(struct Qdisc *sch)
*
* Network Control (CS6, CS7)
* Minimum Latency (EF, VA, CS5, CS4)
- * Interactive Shell (CS2, TOS1)
+ * Interactive Shell (CS2)
* Low Latency Transactions (AF2x, TOS4)
* Video Streaming (AF4x, AF3x, CS3)
- * Bog Standard (CS0 etc.)
- * High Throughput (AF1x, TOS2)
- * Background Traffic (CS1, LE)
+ * Bog Standard (DF etc.)
+ * High Throughput (AF1x, TOS2, CS1)
+ * Background Traffic (LE)
*
* Total 8 traffic classes.
*/
@@ -2437,9 +2437,9 @@ static int cake_config_diffserv4(struct Qdisc *sch)
/* Further pruned list of traffic classes for four-class system:
*
* Latency Sensitive (CS7, CS6, EF, VA, CS5, CS4)
- * Streaming Media (AF4x, AF3x, CS3, AF2x, TOS4, CS2, TOS1)
- * Best Effort (CS0, AF1x, TOS2, and those not specified)
- * Background Traffic (CS1, LE)
+ * Streaming Media (AF4x, AF3x, CS3, AF2x, TOS4, CS2)
+ * Best Effort (DF, AF1x, TOS2, and those not specified)
+ * Background Traffic (LE, CS1)
*
* Total 4 traffic classes.
*/
@@ -2477,9 +2477,9 @@ static int cake_config_diffserv4(struct Qdisc *sch)
static int cake_config_diffserv3(struct Qdisc *sch)
{
/* Simplified Diffserv structure with 3 tins.
- * Low Priority (CS1, LE)
+ * Latency Sensitive (CS7, CS6, EF, VA, TOS4)
* Best Effort
- * Latency Sensitive (TOS4, VA, EF, CS6, CS7)
+ * Low Priority (LE, CS1)
*/
struct cake_sched_data *q = qdisc_priv(sch);
u32 mtu = psched_mtu(qdisc_dev(sch));
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 3f271e29812f..95e774f1b91f 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1990,6 +1990,7 @@ recv_end:
end:
release_sock(sk);
+ sk_defer_free_flush(sk);
if (psock)
sk_psock_put(sk, psock);
return copied ? : err;
diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c
index 616d663d55aa..aa50864e4415 100644
--- a/samples/bpf/xdpsock_user.c
+++ b/samples/bpf/xdpsock_user.c
@@ -14,6 +14,7 @@
#include <arpa/inet.h>
#include <locale.h>
#include <net/ethernet.h>
+#include <netinet/ether.h>
#include <net/if.h>
#include <poll.h>
#include <pthread.h>
@@ -30,6 +31,7 @@
#include <sys/un.h>
#include <time.h>
#include <unistd.h>
+#include <sched.h>
#include <bpf/libbpf.h>
#include <bpf/xsk.h>
@@ -56,12 +58,27 @@
#define DEBUG_HEXDUMP 0
+#define VLAN_PRIO_MASK 0xe000 /* Priority Code Point */
+#define VLAN_PRIO_SHIFT 13
+#define VLAN_VID_MASK 0x0fff /* VLAN Identifier */
+#define VLAN_VID__DEFAULT 1
+#define VLAN_PRI__DEFAULT 0
+
+#define NSEC_PER_SEC 1000000000UL
+#define NSEC_PER_USEC 1000
+
+#define SCHED_PRI__DEFAULT 0
+
typedef __u64 u64;
typedef __u32 u32;
typedef __u16 u16;
typedef __u8 u8;
static unsigned long prev_time;
+static long tx_cycle_diff_min;
+static long tx_cycle_diff_max;
+static double tx_cycle_diff_ave;
+static long tx_cycle_cnt;
enum benchmark_type {
BENCH_RXDROP = 0,
@@ -81,14 +98,23 @@ static u32 opt_batch_size = 64;
static int opt_pkt_count;
static u16 opt_pkt_size = MIN_PKT_SIZE;
static u32 opt_pkt_fill_pattern = 0x12345678;
+static bool opt_vlan_tag;
+static u16 opt_pkt_vlan_id = VLAN_VID__DEFAULT;
+static u16 opt_pkt_vlan_pri = VLAN_PRI__DEFAULT;
+static struct ether_addr opt_txdmac = {{ 0x3c, 0xfd, 0xfe,
+ 0x9e, 0x7f, 0x71 }};
+static struct ether_addr opt_txsmac = {{ 0xec, 0xb1, 0xd7,
+ 0x98, 0x3a, 0xc0 }};
static bool opt_extra_stats;
static bool opt_quiet;
static bool opt_app_stats;
static const char *opt_irq_str = "";
static u32 irq_no;
static int irqs_at_init = -1;
+static u32 sequence;
static int opt_poll;
static int opt_interval = 1;
+static int opt_retries = 3;
static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP;
static u32 opt_umem_flags;
static int opt_unaligned_chunks;
@@ -100,6 +126,27 @@ static u32 opt_num_xsks = 1;
static u32 prog_id;
static bool opt_busy_poll;
static bool opt_reduced_cap;
+static clockid_t opt_clock = CLOCK_MONOTONIC;
+static unsigned long opt_tx_cycle_ns;
+static int opt_schpolicy = SCHED_OTHER;
+static int opt_schprio = SCHED_PRI__DEFAULT;
+static bool opt_tstamp;
+
+struct vlan_ethhdr {
+ unsigned char h_dest[6];
+ unsigned char h_source[6];
+ __be16 h_vlan_proto;
+ __be16 h_vlan_TCI;
+ __be16 h_vlan_encapsulated_proto;
+};
+
+#define PKTGEN_MAGIC 0xbe9be955
+struct pktgen_hdr {
+ __be32 pgh_magic;
+ __be32 seq_num;
+ __be32 tv_sec;
+ __be32 tv_usec;
+};
struct xsk_ring_stats {
unsigned long rx_npkts;
@@ -156,15 +203,63 @@ struct xsk_socket_info {
u32 outstanding_tx;
};
+static const struct clockid_map {
+ const char *name;
+ clockid_t clockid;
+} clockids_map[] = {
+ { "REALTIME", CLOCK_REALTIME },
+ { "TAI", CLOCK_TAI },
+ { "BOOTTIME", CLOCK_BOOTTIME },
+ { "MONOTONIC", CLOCK_MONOTONIC },
+ { NULL }
+};
+
+static const struct sched_map {
+ const char *name;
+ int policy;
+} schmap[] = {
+ { "OTHER", SCHED_OTHER },
+ { "FIFO", SCHED_FIFO },
+ { NULL }
+};
+
static int num_socks;
struct xsk_socket_info *xsks[MAX_SOCKS];
int sock;
+static int get_clockid(clockid_t *id, const char *name)
+{
+ const struct clockid_map *clk;
+
+ for (clk = clockids_map; clk->name; clk++) {
+ if (strcasecmp(clk->name, name) == 0) {
+ *id = clk->clockid;
+ return 0;
+ }
+ }
+
+ return -1;
+}
+
+static int get_schpolicy(int *policy, const char *name)
+{
+ const struct sched_map *sch;
+
+ for (sch = schmap; sch->name; sch++) {
+ if (strcasecmp(sch->name, name) == 0) {
+ *policy = sch->policy;
+ return 0;
+ }
+ }
+
+ return -1;
+}
+
static unsigned long get_nsecs(void)
{
struct timespec ts;
- clock_gettime(CLOCK_MONOTONIC, &ts);
+ clock_gettime(opt_clock, &ts);
return ts.tv_sec * 1000000000UL + ts.tv_nsec;
}
@@ -257,6 +352,15 @@ static void dump_app_stats(long dt)
xsks[i]->app_stats.prev_tx_wakeup_sendtos = xsks[i]->app_stats.tx_wakeup_sendtos;
xsks[i]->app_stats.prev_opt_polls = xsks[i]->app_stats.opt_polls;
}
+
+ if (opt_tx_cycle_ns) {
+ printf("\n%-18s %-10s %-10s %-10s %-10s %-10s\n",
+ "", "period", "min", "ave", "max", "cycle");
+ printf("%-18s %-10lu %-10lu %-10lu %-10lu %-10lu\n",
+ "Cyclic TX", opt_tx_cycle_ns, tx_cycle_diff_min,
+ (long)(tx_cycle_diff_ave / tx_cycle_cnt),
+ tx_cycle_diff_max, tx_cycle_cnt);
+ }
}
static bool get_interrupt_number(void)
@@ -740,29 +844,69 @@ static inline u16 udp_csum(u32 saddr, u32 daddr, u32 len,
#define ETH_FCS_SIZE 4
-#define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \
- sizeof(struct udphdr))
+#define ETH_HDR_SIZE (opt_vlan_tag ? sizeof(struct vlan_ethhdr) : \
+ sizeof(struct ethhdr))
+#define PKTGEN_HDR_SIZE (opt_tstamp ? sizeof(struct pktgen_hdr) : 0)
+#define PKT_HDR_SIZE (ETH_HDR_SIZE + sizeof(struct iphdr) + \
+ sizeof(struct udphdr) + PKTGEN_HDR_SIZE)
+#define PKTGEN_HDR_OFFSET (ETH_HDR_SIZE + sizeof(struct iphdr) + \
+ sizeof(struct udphdr))
+#define PKTGEN_SIZE_MIN (PKTGEN_HDR_OFFSET + sizeof(struct pktgen_hdr) + \
+ ETH_FCS_SIZE)
#define PKT_SIZE (opt_pkt_size - ETH_FCS_SIZE)
-#define IP_PKT_SIZE (PKT_SIZE - sizeof(struct ethhdr))
+#define IP_PKT_SIZE (PKT_SIZE - ETH_HDR_SIZE)
#define UDP_PKT_SIZE (IP_PKT_SIZE - sizeof(struct iphdr))
-#define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr))
+#define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - \
+ (sizeof(struct udphdr) + PKTGEN_HDR_SIZE))
static u8 pkt_data[XSK_UMEM__DEFAULT_FRAME_SIZE];
static void gen_eth_hdr_data(void)
{
- struct udphdr *udp_hdr = (struct udphdr *)(pkt_data +
+ struct pktgen_hdr *pktgen_hdr;
+ struct udphdr *udp_hdr;
+ struct iphdr *ip_hdr;
+
+ if (opt_vlan_tag) {
+ struct vlan_ethhdr *veth_hdr = (struct vlan_ethhdr *)pkt_data;
+ u16 vlan_tci = 0;
+
+ udp_hdr = (struct udphdr *)(pkt_data +
+ sizeof(struct vlan_ethhdr) +
+ sizeof(struct iphdr));
+ ip_hdr = (struct iphdr *)(pkt_data +
+ sizeof(struct vlan_ethhdr));
+ pktgen_hdr = (struct pktgen_hdr *)(pkt_data +
+ sizeof(struct vlan_ethhdr) +
+ sizeof(struct iphdr) +
+ sizeof(struct udphdr));
+ /* ethernet & VLAN header */
+ memcpy(veth_hdr->h_dest, &opt_txdmac, ETH_ALEN);
+ memcpy(veth_hdr->h_source, &opt_txsmac, ETH_ALEN);
+ veth_hdr->h_vlan_proto = htons(ETH_P_8021Q);
+ vlan_tci = opt_pkt_vlan_id & VLAN_VID_MASK;
+ vlan_tci |= (opt_pkt_vlan_pri << VLAN_PRIO_SHIFT) & VLAN_PRIO_MASK;
+ veth_hdr->h_vlan_TCI = htons(vlan_tci);
+ veth_hdr->h_vlan_encapsulated_proto = htons(ETH_P_IP);
+ } else {
+ struct ethhdr *eth_hdr = (struct ethhdr *)pkt_data;
+
+ udp_hdr = (struct udphdr *)(pkt_data +
+ sizeof(struct ethhdr) +
+ sizeof(struct iphdr));
+ ip_hdr = (struct iphdr *)(pkt_data +
+ sizeof(struct ethhdr));
+ pktgen_hdr = (struct pktgen_hdr *)(pkt_data +
sizeof(struct ethhdr) +
- sizeof(struct iphdr));
- struct iphdr *ip_hdr = (struct iphdr *)(pkt_data +
- sizeof(struct ethhdr));
- struct ethhdr *eth_hdr = (struct ethhdr *)pkt_data;
+ sizeof(struct iphdr) +
+ sizeof(struct udphdr));
+ /* ethernet header */
+ memcpy(eth_hdr->h_dest, &opt_txdmac, ETH_ALEN);
+ memcpy(eth_hdr->h_source, &opt_txsmac, ETH_ALEN);
+ eth_hdr->h_proto = htons(ETH_P_IP);
+ }
- /* ethernet header */
- memcpy(eth_hdr->h_dest, "\x3c\xfd\xfe\x9e\x7f\x71", ETH_ALEN);
- memcpy(eth_hdr->h_source, "\xec\xb1\xd7\x98\x3a\xc0", ETH_ALEN);
- eth_hdr->h_proto = htons(ETH_P_IP);
/* IP header */
ip_hdr->version = IPVERSION;
@@ -785,6 +929,9 @@ static void gen_eth_hdr_data(void)
udp_hdr->dest = htons(0x1000);
udp_hdr->len = htons(UDP_PKT_SIZE);
+ if (opt_tstamp)
+ pktgen_hdr->pgh_magic = htonl(PKTGEN_MAGIC);
+
/* UDP data */
memset32_htonl(pkt_data + PKT_HDR_SIZE, opt_pkt_fill_pattern,
UDP_PKT_DATA_SIZE);
@@ -908,6 +1055,7 @@ static struct option long_options[] = {
{"xdp-skb", no_argument, 0, 'S'},
{"xdp-native", no_argument, 0, 'N'},
{"interval", required_argument, 0, 'n'},
+ {"retries", required_argument, 0, 'O'},
{"zero-copy", no_argument, 0, 'z'},
{"copy", no_argument, 0, 'c'},
{"frame-size", required_argument, 0, 'f'},
@@ -916,10 +1064,20 @@ static struct option long_options[] = {
{"shared-umem", no_argument, 0, 'M'},
{"force", no_argument, 0, 'F'},
{"duration", required_argument, 0, 'd'},
+ {"clock", required_argument, 0, 'w'},
{"batch-size", required_argument, 0, 'b'},
{"tx-pkt-count", required_argument, 0, 'C'},
{"tx-pkt-size", required_argument, 0, 's'},
{"tx-pkt-pattern", required_argument, 0, 'P'},
+ {"tx-vlan", no_argument, 0, 'V'},
+ {"tx-vlan-id", required_argument, 0, 'J'},
+ {"tx-vlan-pri", required_argument, 0, 'K'},
+ {"tx-dmac", required_argument, 0, 'G'},
+ {"tx-smac", required_argument, 0, 'H'},
+ {"tx-cycle", required_argument, 0, 'T'},
+ {"tstamp", no_argument, 0, 'y'},
+ {"policy", required_argument, 0, 'W'},
+ {"schpri", required_argument, 0, 'U'},
{"extra-stats", no_argument, 0, 'x'},
{"quiet", no_argument, 0, 'Q'},
{"app-stats", no_argument, 0, 'a'},
@@ -943,6 +1101,7 @@ static void usage(const char *prog)
" -S, --xdp-skb=n Use XDP skb-mod\n"
" -N, --xdp-native=n Enforce XDP native mode\n"
" -n, --interval=n Specify statistics update interval (default 1 sec).\n"
+ " -O, --retries=n Specify time-out retries (1s interval) attempt (default 3).\n"
" -z, --zero-copy Force zero-copy mode.\n"
" -c, --copy Force copy mode.\n"
" -m, --no-need-wakeup Turn off use of driver need wakeup flag.\n"
@@ -952,6 +1111,7 @@ static void usage(const char *prog)
" -F, --force Force loading the XDP prog\n"
" -d, --duration=n Duration in secs to run command.\n"
" Default: forever.\n"
+ " -w, --clock=CLOCK Clock NAME (default MONOTONIC).\n"
" -b, --batch-size=n Batch size for sending or receiving\n"
" packets. Default: %d\n"
" -C, --tx-pkt-count=n Number of packets to send.\n"
@@ -960,6 +1120,15 @@ static void usage(const char *prog)
" (Default: %d bytes)\n"
" Min size: %d, Max size %d.\n"
" -P, --tx-pkt-pattern=nPacket fill pattern. Default: 0x%x\n"
+ " -V, --tx-vlan Send VLAN tagged packets (For -t|--txonly)\n"
+ " -J, --tx-vlan-id=n Tx VLAN ID [1-4095]. Default: %d (For -V|--tx-vlan)\n"
+ " -K, --tx-vlan-pri=n Tx VLAN Priority [0-7]. Default: %d (For -V|--tx-vlan)\n"
+ " -G, --tx-dmac=<MAC> Dest MAC addr of TX frame in aa:bb:cc:dd:ee:ff format (For -V|--tx-vlan)\n"
+ " -H, --tx-smac=<MAC> Src MAC addr of TX frame in aa:bb:cc:dd:ee:ff format (For -V|--tx-vlan)\n"
+ " -T, --tx-cycle=n Tx cycle time in micro-seconds (For -t|--txonly).\n"
+ " -y, --tstamp Add time-stamp to packet (For -t|--txonly).\n"
+ " -W, --policy=POLICY Schedule policy. Default: SCHED_OTHER\n"
+ " -U, --schpri=n Schedule priority. Default: %d\n"
" -x, --extra-stats Display extra statistics.\n"
" -Q, --quiet Do not display any stats.\n"
" -a, --app-stats Display application (syscall) statistics.\n"
@@ -969,7 +1138,9 @@ static void usage(const char *prog)
"\n";
fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE,
opt_batch_size, MIN_PKT_SIZE, MIN_PKT_SIZE,
- XSK_UMEM__DEFAULT_FRAME_SIZE, opt_pkt_fill_pattern);
+ XSK_UMEM__DEFAULT_FRAME_SIZE, opt_pkt_fill_pattern,
+ VLAN_VID__DEFAULT, VLAN_PRI__DEFAULT,
+ SCHED_PRI__DEFAULT);
exit(EXIT_FAILURE);
}
@@ -981,7 +1152,8 @@ static void parse_command_line(int argc, char **argv)
opterr = 0;
for (;;) {
- c = getopt_long(argc, argv, "Frtli:q:pSNn:czf:muMd:b:C:s:P:xQaI:BR",
+ c = getopt_long(argc, argv,
+ "Frtli:q:pSNn:w:O:czf:muMd:b:C:s:P:VJ:K:G:H:T:yW:U:xQaI:BR",
long_options, &option_index);
if (c == -1)
break;
@@ -1015,6 +1187,17 @@ static void parse_command_line(int argc, char **argv)
case 'n':
opt_interval = atoi(optarg);
break;
+ case 'w':
+ if (get_clockid(&opt_clock, optarg)) {
+ fprintf(stderr,
+ "ERROR: Invalid clock %s. Default to CLOCK_MONOTONIC.\n",
+ optarg);
+ opt_clock = CLOCK_MONOTONIC;
+ }
+ break;
+ case 'O':
+ opt_retries = atoi(optarg);
+ break;
case 'z':
opt_xdp_bind_flags |= XDP_ZEROCOPY;
break;
@@ -1062,6 +1245,49 @@ static void parse_command_line(int argc, char **argv)
case 'P':
opt_pkt_fill_pattern = strtol(optarg, NULL, 16);
break;
+ case 'V':
+ opt_vlan_tag = true;
+ break;
+ case 'J':
+ opt_pkt_vlan_id = atoi(optarg);
+ break;
+ case 'K':
+ opt_pkt_vlan_pri = atoi(optarg);
+ break;
+ case 'G':
+ if (!ether_aton_r(optarg,
+ (struct ether_addr *)&opt_txdmac)) {
+ fprintf(stderr, "Invalid dmac address:%s\n",
+ optarg);
+ usage(basename(argv[0]));
+ }
+ break;
+ case 'H':
+ if (!ether_aton_r(optarg,
+ (struct ether_addr *)&opt_txsmac)) {
+ fprintf(stderr, "Invalid smac address:%s\n",
+ optarg);
+ usage(basename(argv[0]));
+ }
+ break;
+ case 'T':
+ opt_tx_cycle_ns = atoi(optarg);
+ opt_tx_cycle_ns *= NSEC_PER_USEC;
+ break;
+ case 'y':
+ opt_tstamp = 1;
+ break;
+ case 'W':
+ if (get_schpolicy(&opt_schpolicy, optarg)) {
+ fprintf(stderr,
+ "ERROR: Invalid policy %s. Default to SCHED_OTHER.\n",
+ optarg);
+ opt_schpolicy = SCHED_OTHER;
+ }
+ break;
+ case 'U':
+ opt_schprio = atoi(optarg);
+ break;
case 'x':
opt_extra_stats = 1;
break;
@@ -1267,16 +1493,22 @@ static void rx_drop_all(void)
}
}
-static void tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, int batch_size)
+static int tx_only(struct xsk_socket_info *xsk, u32 *frame_nb,
+ int batch_size, unsigned long tx_ns)
{
- u32 idx;
+ u32 idx, tv_sec, tv_usec;
unsigned int i;
while (xsk_ring_prod__reserve(&xsk->tx, batch_size, &idx) <
batch_size) {
complete_tx_only(xsk, batch_size);
if (benchmark_done)
- return;
+ return 0;
+ }
+
+ if (opt_tstamp) {
+ tv_sec = (u32)(tx_ns / NSEC_PER_SEC);
+ tv_usec = (u32)((tx_ns % NSEC_PER_SEC) / 1000);
}
for (i = 0; i < batch_size; i++) {
@@ -1284,6 +1516,21 @@ static void tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, int batch_size)
idx + i);
tx_desc->addr = (*frame_nb + i) * opt_xsk_frame_size;
tx_desc->len = PKT_SIZE;
+
+ if (opt_tstamp) {
+ struct pktgen_hdr *pktgen_hdr;
+ u64 addr = tx_desc->addr;
+ char *pkt;
+
+ pkt = xsk_umem__get_data(xsk->umem->buffer, addr);
+ pktgen_hdr = (struct pktgen_hdr *)(pkt + PKTGEN_HDR_OFFSET);
+
+ pktgen_hdr->seq_num = htonl(sequence++);
+ pktgen_hdr->tv_sec = htonl(tv_sec);
+ pktgen_hdr->tv_usec = htonl(tv_usec);
+
+ hex_dump(pkt, PKT_SIZE, addr);
+ }
}
xsk_ring_prod__submit(&xsk->tx, batch_size);
@@ -1292,6 +1539,8 @@ static void tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, int batch_size)
*frame_nb += batch_size;
*frame_nb %= NUM_FRAMES;
complete_tx_only(xsk, batch_size);
+
+ return batch_size;
}
static inline int get_batch_size(int pkt_cnt)
@@ -1318,23 +1567,48 @@ static void complete_tx_only_all(void)
pending = !!xsks[i]->outstanding_tx;
}
}
- } while (pending);
+ sleep(1);
+ } while (pending && opt_retries-- > 0);
}
static void tx_only_all(void)
{
struct pollfd fds[MAX_SOCKS] = {};
u32 frame_nb[MAX_SOCKS] = {};
+ unsigned long next_tx_ns = 0;
int pkt_cnt = 0;
int i, ret;
+ if (opt_poll && opt_tx_cycle_ns) {
+ fprintf(stderr,
+ "Error: --poll and --tx-cycles are both set\n");
+ return;
+ }
+
for (i = 0; i < num_socks; i++) {
fds[0].fd = xsk_socket__fd(xsks[i]->xsk);
fds[0].events = POLLOUT;
}
+ if (opt_tx_cycle_ns) {
+ /* Align Tx time to micro-second boundary */
+ next_tx_ns = (get_nsecs() / NSEC_PER_USEC + 1) *
+ NSEC_PER_USEC;
+ next_tx_ns += opt_tx_cycle_ns;
+
+ /* Initialize periodic Tx scheduling variance */
+ tx_cycle_diff_min = 1000000000;
+ tx_cycle_diff_max = 0;
+ tx_cycle_diff_ave = 0.0;
+ }
+
while ((opt_pkt_count && pkt_cnt < opt_pkt_count) || !opt_pkt_count) {
int batch_size = get_batch_size(pkt_cnt);
+ unsigned long tx_ns = 0;
+ struct timespec next;
+ int tx_cnt = 0;
+ long diff;
+ int err;
if (opt_poll) {
for (i = 0; i < num_socks; i++)
@@ -1347,13 +1621,43 @@ static void tx_only_all(void)
continue;
}
+ if (opt_tx_cycle_ns) {
+ next.tv_sec = next_tx_ns / NSEC_PER_SEC;
+ next.tv_nsec = next_tx_ns % NSEC_PER_SEC;
+ err = clock_nanosleep(opt_clock, TIMER_ABSTIME, &next, NULL);
+ if (err) {
+ if (err != EINTR)
+ fprintf(stderr,
+ "clock_nanosleep failed. Err:%d errno:%d\n",
+ err, errno);
+ break;
+ }
+
+ /* Measure periodic Tx scheduling variance */
+ tx_ns = get_nsecs();
+ diff = tx_ns - next_tx_ns;
+ if (diff < tx_cycle_diff_min)
+ tx_cycle_diff_min = diff;
+
+ if (diff > tx_cycle_diff_max)
+ tx_cycle_diff_max = diff;
+
+ tx_cycle_diff_ave += (double)diff;
+ tx_cycle_cnt++;
+ } else if (opt_tstamp) {
+ tx_ns = get_nsecs();
+ }
+
for (i = 0; i < num_socks; i++)
- tx_only(xsks[i], &frame_nb[i], batch_size);
+ tx_cnt += tx_only(xsks[i], &frame_nb[i], batch_size, tx_ns);
- pkt_cnt += batch_size;
+ pkt_cnt += tx_cnt;
if (benchmark_done)
break;
+
+ if (opt_tx_cycle_ns)
+ next_tx_ns += opt_tx_cycle_ns;
}
if (opt_pkt_count)
@@ -1584,6 +1888,7 @@ int main(int argc, char **argv)
struct __user_cap_data_struct data[2] = { { 0 } };
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
bool rx = false, tx = false;
+ struct sched_param schparam;
struct xsk_umem_info *umem;
struct bpf_object *obj;
int xsks_map_fd = 0;
@@ -1646,6 +1951,9 @@ int main(int argc, char **argv)
apply_setsockopt(xsks[i]);
if (opt_bench == BENCH_TXONLY) {
+ if (opt_tstamp && opt_pkt_size < PKTGEN_SIZE_MIN)
+ opt_pkt_size = PKTGEN_SIZE_MIN;
+
gen_eth_hdr_data();
for (i = 0; i < NUM_FRAMES; i++)
@@ -1685,6 +1993,16 @@ int main(int argc, char **argv)
prev_time = get_nsecs();
start_time = prev_time;
+ /* Configure sched priority for better wake-up accuracy */
+ memset(&schparam, 0, sizeof(schparam));
+ schparam.sched_priority = opt_schprio;
+ ret = sched_setscheduler(0, opt_schpolicy, &schparam);
+ if (ret) {
+ fprintf(stderr, "Error(%d) in setting priority(%d): %s\n",
+ errno, opt_schprio, strerror(errno));
+ goto out;
+ }
+
if (opt_bench == BENCH_RXDROP)
rx_drop_all();
else if (opt_bench == BENCH_TXONLY)
@@ -1692,6 +2010,7 @@ int main(int argc, char **argv)
else
l2fwd_all();
+out:
benchmark_done = true;
if (!opt_quiet)
diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c
index 6719b9282eca..e999159fa28d 100644
--- a/tools/bpf/bpftool/feature.c
+++ b/tools/bpf/bpftool/feature.c
@@ -642,6 +642,30 @@ probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type,
printf("\n");
}
+static void
+probe_misc_feature(struct bpf_insn *insns, size_t len,
+ const char *define_prefix, __u32 ifindex,
+ const char *feat_name, const char *plain_name,
+ const char *define_name)
+{
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
+ .prog_ifindex = ifindex,
+ );
+ bool res;
+ int fd;
+
+ errno = 0;
+ fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL",
+ insns, len, &opts);
+ res = fd >= 0 || !errno;
+
+ if (fd >= 0)
+ close(fd);
+
+ print_bool_feature(feat_name, plain_name, define_name, res,
+ define_prefix);
+}
+
/*
* Probe for availability of kernel commit (5.3):
*
@@ -649,29 +673,81 @@ probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type,
*/
static void probe_large_insn_limit(const char *define_prefix, __u32 ifindex)
{
- LIBBPF_OPTS(bpf_prog_load_opts, opts,
- .prog_ifindex = ifindex,
- );
struct bpf_insn insns[BPF_MAXINSNS + 1];
- bool res;
- int i, fd;
+ int i;
for (i = 0; i < BPF_MAXINSNS; i++)
insns[i] = BPF_MOV64_IMM(BPF_REG_0, 1);
insns[BPF_MAXINSNS] = BPF_EXIT_INSN();
- errno = 0;
- fd = bpf_prog_load(BPF_PROG_TYPE_SCHED_CLS, NULL, "GPL",
- insns, ARRAY_SIZE(insns), &opts);
- res = fd >= 0 || (errno != E2BIG && errno != EINVAL);
+ probe_misc_feature(insns, ARRAY_SIZE(insns),
+ define_prefix, ifindex,
+ "have_large_insn_limit",
+ "Large program size limit",
+ "LARGE_INSN_LIMIT");
+}
- if (fd >= 0)
- close(fd);
+/*
+ * Probe for bounded loop support introduced in commit 2589726d12a1
+ * ("bpf: introduce bounded loops").
+ */
+static void
+probe_bounded_loops(const char *define_prefix, __u32 ifindex)
+{
+ struct bpf_insn insns[4] = {
+ BPF_MOV64_IMM(BPF_REG_0, 10),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 1),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, -2),
+ BPF_EXIT_INSN()
+ };
- print_bool_feature("have_large_insn_limit",
- "Large program size limit",
- "LARGE_INSN_LIMIT",
- res, define_prefix);
+ probe_misc_feature(insns, ARRAY_SIZE(insns),
+ define_prefix, ifindex,
+ "have_bounded_loops",
+ "Bounded loop support",
+ "BOUNDED_LOOPS");
+}
+
+/*
+ * Probe for the v2 instruction set extension introduced in commit 92b31a9af73b
+ * ("bpf: add BPF_J{LT,LE,SLT,SLE} instructions").
+ */
+static void
+probe_v2_isa_extension(const char *define_prefix, __u32 ifindex)
+{
+ struct bpf_insn insns[4] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 0, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN()
+ };
+
+ probe_misc_feature(insns, ARRAY_SIZE(insns),
+ define_prefix, ifindex,
+ "have_v2_isa_extension",
+ "ISA extension v2",
+ "V2_ISA_EXTENSION");
+}
+
+/*
+ * Probe for the v3 instruction set extension introduced in commit 092ed0968bb6
+ * ("bpf: verifier support JMP32").
+ */
+static void
+probe_v3_isa_extension(const char *define_prefix, __u32 ifindex)
+{
+ struct bpf_insn insns[4] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP32_IMM(BPF_JLT, BPF_REG_0, 0, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN()
+ };
+
+ probe_misc_feature(insns, ARRAY_SIZE(insns),
+ define_prefix, ifindex,
+ "have_v3_isa_extension",
+ "ISA extension v3",
+ "V3_ISA_EXTENSION");
}
static void
@@ -788,6 +864,9 @@ static void section_misc(const char *define_prefix, __u32 ifindex)
"/*** eBPF misc features ***/",
define_prefix);
probe_large_insn_limit(define_prefix, ifindex);
+ probe_bounded_loops(define_prefix, ifindex);
+ probe_v2_isa_extension(define_prefix, ifindex);
+ probe_v3_isa_extension(define_prefix, ifindex);
print_end_section();
}
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index f874896c4154..2a21d50516bc 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -1655,7 +1655,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
j = 0;
idx = 0;
bpf_object__for_each_map(map, obj) {
- if (!bpf_map__is_offload_neutral(map))
+ if (bpf_map__type(map) != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
bpf_map__set_ifindex(map, ifindex);
if (j < old_map_fds && idx == map_replace[j].idx) {
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 9b64eed2b003..550b4cbb6c99 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -691,11 +691,11 @@ static int bpf_map_batch_common(int cmd, int fd, void *in_batch,
return libbpf_err_errno(ret);
}
-int bpf_map_delete_batch(int fd, void *keys, __u32 *count,
+int bpf_map_delete_batch(int fd, const void *keys, __u32 *count,
const struct bpf_map_batch_opts *opts)
{
return bpf_map_batch_common(BPF_MAP_DELETE_BATCH, fd, NULL,
- NULL, keys, NULL, count, opts);
+ NULL, (void *)keys, NULL, count, opts);
}
int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch, void *keys,
@@ -715,11 +715,11 @@ int bpf_map_lookup_and_delete_batch(int fd, void *in_batch, void *out_batch,
count, opts);
}
-int bpf_map_update_batch(int fd, void *keys, void *values, __u32 *count,
+int bpf_map_update_batch(int fd, const void *keys, const void *values, __u32 *count,
const struct bpf_map_batch_opts *opts)
{
return bpf_map_batch_common(BPF_MAP_UPDATE_BATCH, fd, NULL, NULL,
- keys, values, count, opts);
+ (void *)keys, (void *)values, count, opts);
}
int bpf_obj_pin(int fd, const char *pathname)
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 00619f64a040..14e0d97ad2cf 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -254,17 +254,128 @@ struct bpf_map_batch_opts {
};
#define bpf_map_batch_opts__last_field flags
-LIBBPF_API int bpf_map_delete_batch(int fd, void *keys,
+
+/**
+ * @brief **bpf_map_delete_batch()** allows for batch deletion of multiple
+ * elements in a BPF map.
+ *
+ * @param fd BPF map file descriptor
+ * @param keys pointer to an array of *count* keys
+ * @param count input and output parameter; on input **count** represents the
+ * number of elements in the map to delete in batch;
+ * on output if a non-EFAULT error is returned, **count** represents the number of deleted
+ * elements if the output **count** value is not equal to the input **count** value
+ * If EFAULT is returned, **count** should not be trusted to be correct.
+ * @param opts options for configuring the way the batch deletion works
+ * @return 0, on success; negative error code, otherwise (errno is also set to
+ * the error code)
+ */
+LIBBPF_API int bpf_map_delete_batch(int fd, const void *keys,
__u32 *count,
const struct bpf_map_batch_opts *opts);
+
+/**
+ * @brief **bpf_map_lookup_batch()** allows for batch lookup of BPF map elements.
+ *
+ * The parameter *in_batch* is the address of the first element in the batch to read.
+ * *out_batch* is an output parameter that should be passed as *in_batch* to subsequent
+ * calls to **bpf_map_lookup_batch()**. NULL can be passed for *in_batch* to indicate
+ * that the batched lookup starts from the beginning of the map.
+ *
+ * The *keys* and *values* are output parameters which must point to memory large enough to
+ * hold *count* items based on the key and value size of the map *map_fd*. The *keys*
+ * buffer must be of *key_size* * *count*. The *values* buffer must be of
+ * *value_size* * *count*.
+ *
+ * @param fd BPF map file descriptor
+ * @param in_batch address of the first element in batch to read, can pass NULL to
+ * indicate that the batched lookup starts from the beginning of the map.
+ * @param out_batch output parameter that should be passed to next call as *in_batch*
+ * @param keys pointer to an array large enough for *count* keys
+ * @param values pointer to an array large enough for *count* values
+ * @param count input and output parameter; on input it's the number of elements
+ * in the map to read in batch; on output it's the number of elements that were
+ * successfully read.
+ * If a non-EFAULT error is returned, count will be set as the number of elements
+ * that were read before the error occurred.
+ * If EFAULT is returned, **count** should not be trusted to be correct.
+ * @param opts options for configuring the way the batch lookup works
+ * @return 0, on success; negative error code, otherwise (errno is also set to
+ * the error code)
+ */
LIBBPF_API int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch,
void *keys, void *values, __u32 *count,
const struct bpf_map_batch_opts *opts);
+
+/**
+ * @brief **bpf_map_lookup_and_delete_batch()** allows for batch lookup and deletion
+ * of BPF map elements where each element is deleted after being retrieved.
+ *
+ * @param fd BPF map file descriptor
+ * @param in_batch address of the first element in batch to read, can pass NULL to
+ * get address of the first element in *out_batch*
+ * @param out_batch output parameter that should be passed to next call as *in_batch*
+ * @param keys pointer to an array of *count* keys
+ * @param values pointer to an array large enough for *count* values
+ * @param count input and output parameter; on input it's the number of elements
+ * in the map to read and delete in batch; on output it represents the number of
+ * elements that were successfully read and deleted
+ * If a non-**EFAULT** error code is returned and if the output **count** value
+ * is not equal to the input **count** value, up to **count** elements may
+ * have been deleted.
+ * if **EFAULT** is returned up to *count* elements may have been deleted without
+ * being returned via the *keys* and *values* output parameters.
+ * @param opts options for configuring the way the batch lookup and delete works
+ * @return 0, on success; negative error code, otherwise (errno is also set to
+ * the error code)
+ */
LIBBPF_API int bpf_map_lookup_and_delete_batch(int fd, void *in_batch,
void *out_batch, void *keys,
void *values, __u32 *count,
const struct bpf_map_batch_opts *opts);
-LIBBPF_API int bpf_map_update_batch(int fd, void *keys, void *values,
+
+/**
+ * @brief **bpf_map_update_batch()** updates multiple elements in a map
+ * by specifying keys and their corresponding values.
+ *
+ * The *keys* and *values* parameters must point to memory large enough
+ * to hold *count* items based on the key and value size of the map.
+ *
+ * The *opts* parameter can be used to control how *bpf_map_update_batch()*
+ * should handle keys that either do or do not already exist in the map.
+ * In particular the *flags* parameter of *bpf_map_batch_opts* can be
+ * one of the following:
+ *
+ * Note that *count* is an input and output parameter, where on output it
+ * represents how many elements were successfully updated. Also note that if
+ * **EFAULT** then *count* should not be trusted to be correct.
+ *
+ * **BPF_ANY**
+ * Create new elements or update existing.
+ *
+ * **BPF_NOEXIST**
+ * Create new elements only if they do not exist.
+ *
+ * **BPF_EXIST**
+ * Update existing elements.
+ *
+ * **BPF_F_LOCK**
+ * Update spin_lock-ed map elements. This must be
+ * specified if the map value contains a spinlock.
+ *
+ * @param fd BPF map file descriptor
+ * @param keys pointer to an array of *count* keys
+ * @param values pointer to an array of *count* values
+ * @param count input and output parameter; on input it's the number of elements
+ * in the map to update in batch; on output if a non-EFAULT error is returned,
+ * **count** represents the number of updated elements if the output **count**
+ * value is not equal to the input **count** value.
+ * If EFAULT is returned, **count** should not be trusted to be correct.
+ * @param opts options for configuring the way the batch update works
+ * @return 0, on success; negative error code, otherwise (errno is also set to
+ * the error code)
+ */
+LIBBPF_API int bpf_map_update_batch(int fd, const void *keys, const void *values,
__u32 *count,
const struct bpf_map_batch_opts *opts);
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 9cb99d1e2385..7f10dd501a52 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -9916,7 +9916,10 @@ static int append_to_file(const char *file, const char *fmt, ...)
static void gen_kprobe_legacy_event_name(char *buf, size_t buf_sz,
const char *kfunc_name, size_t offset)
{
- snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx", getpid(), kfunc_name, offset);
+ static int index = 0;
+
+ snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx_%d", getpid(), kfunc_name, offset,
+ __sync_fetch_and_add(&index, 1));
}
static int add_kprobe_event_legacy(const char *probe_name, bool retprobe,
@@ -10017,7 +10020,7 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog,
gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name),
func_name, offset);
- legacy_probe = strdup(func_name);
+ legacy_probe = strdup(probe_name);
if (!legacy_probe)
return libbpf_err_ptr(-ENOMEM);
@@ -10676,10 +10679,10 @@ struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map)
return link;
}
-enum bpf_perf_event_ret
-bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
- void **copy_mem, size_t *copy_size,
- bpf_perf_event_print_t fn, void *private_data)
+static enum bpf_perf_event_ret
+perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
+ void **copy_mem, size_t *copy_size,
+ bpf_perf_event_print_t fn, void *private_data)
{
struct perf_event_mmap_page *header = mmap_mem;
__u64 data_head = ring_buffer_read_head(header);
@@ -10724,6 +10727,12 @@ bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
return libbpf_err(ret);
}
+__attribute__((alias("perf_event_read_simple")))
+enum bpf_perf_event_ret
+bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
+ void **copy_mem, size_t *copy_size,
+ bpf_perf_event_print_t fn, void *private_data);
+
struct perf_buffer;
struct perf_buffer_params {
@@ -11132,10 +11141,10 @@ static int perf_buffer__process_records(struct perf_buffer *pb,
{
enum bpf_perf_event_ret ret;
- ret = bpf_perf_event_read_simple(cpu_buf->base, pb->mmap_size,
- pb->page_size, &cpu_buf->buf,
- &cpu_buf->buf_size,
- perf_buffer__process_record, cpu_buf);
+ ret = perf_event_read_simple(cpu_buf->base, pb->mmap_size,
+ pb->page_size, &cpu_buf->buf,
+ &cpu_buf->buf_size,
+ perf_buffer__process_record, cpu_buf);
if (ret != LIBBPF_PERF_EVENT_CONT)
return ret;
return 0;
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 85dfef88b3d2..8b9bc5e90c2b 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -677,7 +677,8 @@ bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name);
* Get bpf_map through the offset of corresponding struct bpf_map_def
* in the BPF object file.
*/
-LIBBPF_API struct bpf_map *
+LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_object__find_map_by_name() instead")
+struct bpf_map *
bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset);
LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__next_map() instead")
@@ -744,6 +745,7 @@ LIBBPF_API void *bpf_map__priv(const struct bpf_map *map);
LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map,
const void *data, size_t size);
LIBBPF_API const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_map__type() instead")
LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map);
/**
@@ -1026,6 +1028,7 @@ LIBBPF_API int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_i
typedef enum bpf_perf_event_ret
(*bpf_perf_event_print_t)(struct perf_event_header *hdr,
void *private_data);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use perf_buffer__poll() or perf_buffer__consume() instead")
LIBBPF_API enum bpf_perf_event_ret
bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
void **copy_mem, size_t *copy_size,
diff --git a/tools/testing/selftests/bpf/prog_tests/d_path.c b/tools/testing/selftests/bpf/prog_tests/d_path.c
index 0a577a248d34..32fc5b3b5cf6 100644
--- a/tools/testing/selftests/bpf/prog_tests/d_path.c
+++ b/tools/testing/selftests/bpf/prog_tests/d_path.c
@@ -9,6 +9,7 @@
#define MAX_FILES 7
#include "test_d_path.skel.h"
+#include "test_d_path_check_rdonly_mem.skel.h"
static int duration;
@@ -99,7 +100,7 @@ out_close:
return ret;
}
-void test_d_path(void)
+static void test_d_path_basic(void)
{
struct test_d_path__bss *bss;
struct test_d_path *skel;
@@ -155,3 +156,22 @@ void test_d_path(void)
cleanup:
test_d_path__destroy(skel);
}
+
+static void test_d_path_check_rdonly_mem(void)
+{
+ struct test_d_path_check_rdonly_mem *skel;
+
+ skel = test_d_path_check_rdonly_mem__open_and_load();
+ ASSERT_ERR_PTR(skel, "unexpected_load_overwriting_rdonly_mem");
+
+ test_d_path_check_rdonly_mem__destroy(skel);
+}
+
+void test_d_path(void)
+{
+ if (test__start_subtest("basic"))
+ test_d_path_basic();
+
+ if (test__start_subtest("check_rdonly_mem"))
+ test_d_path_check_rdonly_mem();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
index 4b18b73df10b..c2426df58e17 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -105,6 +105,13 @@ static int setns_by_fd(int nsfd)
if (!ASSERT_OK(err, "unshare"))
return err;
+ /* Make our /sys mount private, so the following umount won't
+ * trigger the global umount in case it's shared.
+ */
+ err = mount("none", "/sys", NULL, MS_PRIVATE, NULL);
+ if (!ASSERT_OK(err, "remount private /sys"))
+ return err;
+
err = umount2("/sys", MNT_DETACH);
if (!ASSERT_OK(err, "umount2 /sys"))
return err;
diff --git a/tools/testing/selftests/bpf/progs/loop3.c b/tools/testing/selftests/bpf/progs/loop3.c
index 76e93b31c14b..717dab14322b 100644
--- a/tools/testing/selftests/bpf/progs/loop3.c
+++ b/tools/testing/selftests/bpf/progs/loop3.c
@@ -12,9 +12,9 @@
char _license[] SEC("license") = "GPL";
SEC("raw_tracepoint/consume_skb")
-int while_true(volatile struct pt_regs* ctx)
+int while_true(struct pt_regs *ctx)
{
- __u64 i = 0, sum = 0;
+ volatile __u64 i = 0, sum = 0;
do {
i++;
sum += PT_REGS_RC(ctx);
diff --git a/tools/testing/selftests/bpf/progs/test_d_path_check_rdonly_mem.c b/tools/testing/selftests/bpf/progs/test_d_path_check_rdonly_mem.c
new file mode 100644
index 000000000000..27c27cff6a3a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_d_path_check_rdonly_mem.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Google */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+extern const int bpf_prog_active __ksym;
+
+SEC("fentry/security_inode_getattr")
+int BPF_PROG(d_path_check_rdonly_mem, struct path *path, struct kstat *stat,
+ __u32 request_mask, unsigned int query_flags)
+{
+ void *active;
+ __u32 cpu;
+
+ cpu = bpf_get_smp_processor_id();
+ active = (void *)bpf_per_cpu_ptr(&bpf_prog_active, cpu);
+ if (active) {
+ /* FAIL here! 'active' points to readonly memory. bpf helpers
+ * that update its arguments can not write into it.
+ */
+ bpf_d_path(path, active, sizeof(int));
+ }
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_sock.c b/tools/testing/selftests/bpf/test_sock.c
index e8edd3dd3ec2..fe10f8134278 100644
--- a/tools/testing/selftests/bpf/test_sock.c
+++ b/tools/testing/selftests/bpf/test_sock.c
@@ -35,18 +35,21 @@ struct sock_test {
/* Endpoint to bind() to */
const char *ip;
unsigned short port;
+ unsigned short port_retry;
/* Expected test result */
enum {
LOAD_REJECT,
ATTACH_REJECT,
BIND_REJECT,
SUCCESS,
+ RETRY_SUCCESS,
+ RETRY_REJECT
} result;
};
static struct sock_test tests[] = {
{
- "bind4 load with invalid access: src_ip6",
+ .descr = "bind4 load with invalid access: src_ip6",
.insns = {
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
@@ -54,16 +57,12 @@ static struct sock_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET4_POST_BIND,
- BPF_CGROUP_INET4_POST_BIND,
- 0,
- 0,
- NULL,
- 0,
- LOAD_REJECT,
+ .expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .result = LOAD_REJECT,
},
{
- "bind4 load with invalid access: mark",
+ .descr = "bind4 load with invalid access: mark",
.insns = {
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
@@ -71,16 +70,12 @@ static struct sock_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET4_POST_BIND,
- BPF_CGROUP_INET4_POST_BIND,
- 0,
- 0,
- NULL,
- 0,
- LOAD_REJECT,
+ .expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .result = LOAD_REJECT,
},
{
- "bind6 load with invalid access: src_ip4",
+ .descr = "bind6 load with invalid access: src_ip4",
.insns = {
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
@@ -88,16 +83,12 @@ static struct sock_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET6_POST_BIND,
- BPF_CGROUP_INET6_POST_BIND,
- 0,
- 0,
- NULL,
- 0,
- LOAD_REJECT,
+ .expected_attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .result = LOAD_REJECT,
},
{
- "sock_create load with invalid access: src_port",
+ .descr = "sock_create load with invalid access: src_port",
.insns = {
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
@@ -105,128 +96,106 @@ static struct sock_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET_SOCK_CREATE,
- BPF_CGROUP_INET_SOCK_CREATE,
- 0,
- 0,
- NULL,
- 0,
- LOAD_REJECT,
+ .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE,
+ .attach_type = BPF_CGROUP_INET_SOCK_CREATE,
+ .result = LOAD_REJECT,
},
{
- "sock_create load w/o expected_attach_type (compat mode)",
+ .descr = "sock_create load w/o expected_attach_type (compat mode)",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- 0,
- BPF_CGROUP_INET_SOCK_CREATE,
- AF_INET,
- SOCK_STREAM,
- "127.0.0.1",
- 8097,
- SUCCESS,
+ .expected_attach_type = 0,
+ .attach_type = BPF_CGROUP_INET_SOCK_CREATE,
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ .ip = "127.0.0.1",
+ .port = 8097,
+ .result = SUCCESS,
},
{
- "sock_create load w/ expected_attach_type",
+ .descr = "sock_create load w/ expected_attach_type",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET_SOCK_CREATE,
- BPF_CGROUP_INET_SOCK_CREATE,
- AF_INET,
- SOCK_STREAM,
- "127.0.0.1",
- 8097,
- SUCCESS,
+ .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE,
+ .attach_type = BPF_CGROUP_INET_SOCK_CREATE,
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ .ip = "127.0.0.1",
+ .port = 8097,
+ .result = SUCCESS,
},
{
- "attach type mismatch bind4 vs bind6",
+ .descr = "attach type mismatch bind4 vs bind6",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET4_POST_BIND,
- BPF_CGROUP_INET6_POST_BIND,
- 0,
- 0,
- NULL,
- 0,
- ATTACH_REJECT,
+ .expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .result = ATTACH_REJECT,
},
{
- "attach type mismatch bind6 vs bind4",
+ .descr = "attach type mismatch bind6 vs bind4",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET6_POST_BIND,
- BPF_CGROUP_INET4_POST_BIND,
- 0,
- 0,
- NULL,
- 0,
- ATTACH_REJECT,
+ .expected_attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .result = ATTACH_REJECT,
},
{
- "attach type mismatch default vs bind4",
+ .descr = "attach type mismatch default vs bind4",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- 0,
- BPF_CGROUP_INET4_POST_BIND,
- 0,
- 0,
- NULL,
- 0,
- ATTACH_REJECT,
+ .expected_attach_type = 0,
+ .attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .result = ATTACH_REJECT,
},
{
- "attach type mismatch bind6 vs sock_create",
+ .descr = "attach type mismatch bind6 vs sock_create",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET6_POST_BIND,
- BPF_CGROUP_INET_SOCK_CREATE,
- 0,
- 0,
- NULL,
- 0,
- ATTACH_REJECT,
+ .expected_attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .attach_type = BPF_CGROUP_INET_SOCK_CREATE,
+ .result = ATTACH_REJECT,
},
{
- "bind4 reject all",
+ .descr = "bind4 reject all",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET4_POST_BIND,
- BPF_CGROUP_INET4_POST_BIND,
- AF_INET,
- SOCK_STREAM,
- "0.0.0.0",
- 0,
- BIND_REJECT,
+ .expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ .ip = "0.0.0.0",
+ .result = BIND_REJECT,
},
{
- "bind6 reject all",
+ .descr = "bind6 reject all",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET6_POST_BIND,
- BPF_CGROUP_INET6_POST_BIND,
- AF_INET6,
- SOCK_STREAM,
- "::",
- 0,
- BIND_REJECT,
+ .expected_attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .domain = AF_INET6,
+ .type = SOCK_STREAM,
+ .ip = "::",
+ .result = BIND_REJECT,
},
{
- "bind6 deny specific IP & port",
+ .descr = "bind6 deny specific IP & port",
.insns = {
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
@@ -247,16 +216,16 @@ static struct sock_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET6_POST_BIND,
- BPF_CGROUP_INET6_POST_BIND,
- AF_INET6,
- SOCK_STREAM,
- "::1",
- 8193,
- BIND_REJECT,
+ .expected_attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .domain = AF_INET6,
+ .type = SOCK_STREAM,
+ .ip = "::1",
+ .port = 8193,
+ .result = BIND_REJECT,
},
{
- "bind4 allow specific IP & port",
+ .descr = "bind4 allow specific IP & port",
.insns = {
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
@@ -277,41 +246,132 @@ static struct sock_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET4_POST_BIND,
- BPF_CGROUP_INET4_POST_BIND,
- AF_INET,
- SOCK_STREAM,
- "127.0.0.1",
- 4098,
- SUCCESS,
+ .expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ .ip = "127.0.0.1",
+ .port = 4098,
+ .result = SUCCESS,
},
{
- "bind4 allow all",
+ .descr = "bind4 deny specific IP & port of TCP, and retry",
.insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+ /* if (ip == expected && port == expected) */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_ip4)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7,
+ __bpf_constant_ntohl(0x7F000001), 4),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_port)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x1002, 2),
+
+ /* return DENY; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_A(1),
+
+ /* else return ALLOW; */
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET4_POST_BIND,
- BPF_CGROUP_INET4_POST_BIND,
- AF_INET,
- SOCK_STREAM,
- "0.0.0.0",
- 0,
- SUCCESS,
+ .expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ .ip = "127.0.0.1",
+ .port = 4098,
+ .port_retry = 5000,
+ .result = RETRY_SUCCESS,
},
{
- "bind6 allow all",
+ .descr = "bind4 deny specific IP & port of UDP, and retry",
.insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+ /* if (ip == expected && port == expected) */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_ip4)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7,
+ __bpf_constant_ntohl(0x7F000001), 4),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_port)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x1002, 2),
+
+ /* return DENY; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_A(1),
+
+ /* else return ALLOW; */
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET6_POST_BIND,
- BPF_CGROUP_INET6_POST_BIND,
- AF_INET6,
- SOCK_STREAM,
- "::",
- 0,
- SUCCESS,
+ .expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .domain = AF_INET,
+ .type = SOCK_DGRAM,
+ .ip = "127.0.0.1",
+ .port = 4098,
+ .port_retry = 5000,
+ .result = RETRY_SUCCESS,
+ },
+ {
+ .descr = "bind6 deny specific IP & port, and retry",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+ /* if (ip == expected && port == expected) */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_ip6[3])),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7,
+ __bpf_constant_ntohl(0x00000001), 4),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_port)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x2001, 2),
+
+ /* return DENY; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_A(1),
+
+ /* else return ALLOW; */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .expected_attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .domain = AF_INET6,
+ .type = SOCK_STREAM,
+ .ip = "::1",
+ .port = 8193,
+ .port_retry = 9000,
+ .result = RETRY_SUCCESS,
+ },
+ {
+ .descr = "bind4 allow all",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ .ip = "0.0.0.0",
+ .result = SUCCESS,
+ },
+ {
+ .descr = "bind6 allow all",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .expected_attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .domain = AF_INET6,
+ .type = SOCK_STREAM,
+ .ip = "::",
+ .result = SUCCESS,
},
};
@@ -351,14 +411,15 @@ static int attach_sock_prog(int cgfd, int progfd,
return bpf_prog_attach(progfd, cgfd, attach_type, BPF_F_ALLOW_OVERRIDE);
}
-static int bind_sock(int domain, int type, const char *ip, unsigned short port)
+static int bind_sock(int domain, int type, const char *ip,
+ unsigned short port, unsigned short port_retry)
{
struct sockaddr_storage addr;
struct sockaddr_in6 *addr6;
struct sockaddr_in *addr4;
int sockfd = -1;
socklen_t len;
- int err = 0;
+ int res = SUCCESS;
sockfd = socket(domain, type, 0);
if (sockfd < 0)
@@ -384,21 +445,44 @@ static int bind_sock(int domain, int type, const char *ip, unsigned short port)
goto err;
}
- if (bind(sockfd, (const struct sockaddr *)&addr, len) == -1)
- goto err;
+ if (bind(sockfd, (const struct sockaddr *)&addr, len) == -1) {
+ /* sys_bind() may fail for different reasons, errno has to be
+ * checked to confirm that BPF program rejected it.
+ */
+ if (errno != EPERM)
+ goto err;
+ if (port_retry)
+ goto retry;
+ res = BIND_REJECT;
+ goto out;
+ }
goto out;
+retry:
+ if (domain == AF_INET)
+ addr4->sin_port = htons(port_retry);
+ else
+ addr6->sin6_port = htons(port_retry);
+ if (bind(sockfd, (const struct sockaddr *)&addr, len) == -1) {
+ if (errno != EPERM)
+ goto err;
+ res = RETRY_REJECT;
+ } else {
+ res = RETRY_SUCCESS;
+ }
+ goto out;
err:
- err = -1;
+ res = -1;
out:
close(sockfd);
- return err;
+ return res;
}
static int run_test_case(int cgfd, const struct sock_test *test)
{
int progfd = -1;
int err = 0;
+ int res;
printf("Test case: %s .. ", test->descr);
progfd = load_sock_prog(test->insns, test->expected_attach_type);
@@ -416,21 +500,11 @@ static int run_test_case(int cgfd, const struct sock_test *test)
goto err;
}
- if (bind_sock(test->domain, test->type, test->ip, test->port) == -1) {
- /* sys_bind() may fail for different reasons, errno has to be
- * checked to confirm that BPF program rejected it.
- */
- if (test->result == BIND_REJECT && errno == EPERM)
- goto out;
- else
- goto err;
- }
-
+ res = bind_sock(test->domain, test->type, test->ip, test->port,
+ test->port_retry);
+ if (res > 0 && test->result == res)
+ goto out;
- if (test->result != SUCCESS)
- goto err;
-
- goto out;
err:
err = -1;
out:
diff --git a/tools/testing/selftests/bpf/verifier/spill_fill.c b/tools/testing/selftests/bpf/verifier/spill_fill.c
index 6c907144311f..1a8eb9672bd1 100644
--- a/tools/testing/selftests/bpf/verifier/spill_fill.c
+++ b/tools/testing/selftests/bpf/verifier/spill_fill.c
@@ -59,6 +59,34 @@
.result_unpriv = ACCEPT,
},
{
+ "check with invalid reg offset 0",
+ .insns = {
+ /* reserve 8 byte ringbuf memory */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_2, 8),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve),
+ /* store a pointer to the reserved memory in R6 */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ /* add invalid offset to memory or NULL */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
+ /* check whether the reservation was successful */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ /* should not be able to access *(R7) = 0 */
+ BPF_ST_MEM(BPF_W, BPF_REG_6, 0, 0),
+ /* submit the reserved ringbuf memory */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_submit),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_ringbuf = { 1 },
+ .result = REJECT,
+ .errstr = "R0 pointer arithmetic on mem_or_null prohibited",
+},
+{
"check corrupted spill/fill",
.insns = {
/* spill R1(ctx) into stack */
diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config
index d548bc139b5d..d36b7da5082a 100644
--- a/tools/testing/selftests/net/mptcp/config
+++ b/tools/testing/selftests/net/mptcp/config
@@ -17,4 +17,5 @@ CONFIG_NFT_TPROXY=m
CONFIG_NFT_SOCKET=m
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_NF_TARGET_REJECT=m
CONFIG_IPV6_MULTIPLE_TABLES=y
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index a30e93c5c549..8628aa61b763 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -16,6 +16,7 @@
#include <unistd.h>
#include <time.h>
+#include <sys/ioctl.h>
#include <sys/poll.h>
#include <sys/sendfile.h>
#include <sys/stat.h>
@@ -28,6 +29,7 @@
#include <linux/tcp.h>
#include <linux/time_types.h>
+#include <linux/sockios.h>
extern int optind;
@@ -68,6 +70,8 @@ static unsigned int cfg_time;
static unsigned int cfg_do_w;
static int cfg_wait;
static uint32_t cfg_mark;
+static char *cfg_input;
+static int cfg_repeat = 1;
struct cfg_cmsg_types {
unsigned int cmsg_enabled:1;
@@ -91,22 +95,31 @@ static struct cfg_sockopt_types cfg_sockopt_types;
static void die_usage(void)
{
- fprintf(stderr, "Usage: mptcp_connect [-6] [-u] [-s MPTCP|TCP] [-p port] [-m mode]"
- "[-l] [-w sec] [-t num] [-T num] connect_address\n");
+ fprintf(stderr, "Usage: mptcp_connect [-6] [-c cmsg] [-i file] [-I num] [-j] [-l] "
+ "[-m mode] [-M mark] [-o option] [-p port] [-P mode] [-j] [-l] [-r num] "
+ "[-s MPTCP|TCP] [-S num] [-r num] [-t num] [-T num] [-u] [-w sec] connect_address\n");
fprintf(stderr, "\t-6 use ipv6\n");
- fprintf(stderr, "\t-t num -- set poll timeout to num\n");
- fprintf(stderr, "\t-T num -- set expected runtime to num ms\n");
- fprintf(stderr, "\t-S num -- set SO_SNDBUF to num\n");
- fprintf(stderr, "\t-R num -- set SO_RCVBUF to num\n");
- fprintf(stderr, "\t-p num -- use port num\n");
- fprintf(stderr, "\t-s [MPTCP|TCP] -- use mptcp(default) or tcp sockets\n");
+ fprintf(stderr, "\t-c cmsg -- test cmsg type <cmsg>\n");
+ fprintf(stderr, "\t-i file -- read the data to send from the given file instead of stdin");
+ fprintf(stderr, "\t-I num -- repeat the transfer 'num' times. In listen mode accepts num "
+ "incoming connections, in client mode, disconnect and reconnect to the server\n");
+ fprintf(stderr, "\t-j -- add additional sleep at connection start and tear down "
+ "-- for MPJ tests\n");
+ fprintf(stderr, "\t-l -- listens mode, accepts incoming connection\n");
fprintf(stderr, "\t-m [poll|mmap|sendfile] -- use poll(default)/mmap+write/sendfile\n");
fprintf(stderr, "\t-M mark -- set socket packet mark\n");
- fprintf(stderr, "\t-w num -- wait num sec before closing the socket\n");
- fprintf(stderr, "\t-c cmsg -- test cmsg type <cmsg>\n");
fprintf(stderr, "\t-o option -- test sockopt <option>\n");
+ fprintf(stderr, "\t-p num -- use port num\n");
fprintf(stderr,
"\t-P [saveWithPeek|saveAfterPeek] -- save data with/after MSG_PEEK form tcp socket\n");
+ fprintf(stderr, "\t-t num -- set poll timeout to num\n");
+ fprintf(stderr, "\t-T num -- set expected runtime to num ms\n");
+ fprintf(stderr, "\t-r num -- enable slow mode, limiting each write to num bytes "
+ "-- for remove addr tests\n");
+ fprintf(stderr, "\t-R num -- set SO_RCVBUF to num\n");
+ fprintf(stderr, "\t-s [MPTCP|TCP] -- use mptcp(default) or tcp sockets\n");
+ fprintf(stderr, "\t-S num -- set SO_SNDBUF to num\n");
+ fprintf(stderr, "\t-w num -- wait num sec before closing the socket\n");
exit(1);
}
@@ -310,7 +323,8 @@ static int sock_listen_mptcp(const char * const listenaddr,
}
static int sock_connect_mptcp(const char * const remoteaddr,
- const char * const port, int proto)
+ const char * const port, int proto,
+ struct addrinfo **peer)
{
struct addrinfo hints = {
.ai_protocol = IPPROTO_TCP,
@@ -334,8 +348,10 @@ static int sock_connect_mptcp(const char * const remoteaddr,
if (cfg_mark)
set_mark(sock, cfg_mark);
- if (connect(sock, a->ai_addr, a->ai_addrlen) == 0)
+ if (connect(sock, a->ai_addr, a->ai_addrlen) == 0) {
+ *peer = a;
break; /* success */
+ }
perror("connect()");
close(sock);
@@ -524,14 +540,17 @@ static ssize_t do_rnd_read(const int fd, char *buf, const size_t len)
return ret;
}
-static void set_nonblock(int fd)
+static void set_nonblock(int fd, bool nonblock)
{
int flags = fcntl(fd, F_GETFL);
if (flags == -1)
return;
- fcntl(fd, F_SETFL, flags | O_NONBLOCK);
+ if (nonblock)
+ fcntl(fd, F_SETFL, flags | O_NONBLOCK);
+ else
+ fcntl(fd, F_SETFL, flags & ~O_NONBLOCK);
}
static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after_out)
@@ -543,7 +562,7 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after
unsigned int woff = 0, wlen = 0;
char wbuf[8192];
- set_nonblock(peerfd);
+ set_nonblock(peerfd, true);
for (;;) {
char rbuf[8192];
@@ -638,7 +657,6 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after
if (cfg_remove)
usleep(cfg_wait);
- close(peerfd);
return 0;
}
@@ -780,7 +798,7 @@ static int copyfd_io_sendfile(int infd, int peerfd, int outfd,
return err;
}
-static int copyfd_io(int infd, int peerfd, int outfd)
+static int copyfd_io(int infd, int peerfd, int outfd, bool close_peerfd)
{
bool in_closed_after_out = false;
struct timespec start, end;
@@ -819,6 +837,9 @@ static int copyfd_io(int infd, int peerfd, int outfd)
if (ret)
return ret;
+ if (close_peerfd)
+ close(peerfd);
+
if (cfg_time) {
unsigned int delta_ms;
@@ -930,7 +951,7 @@ static void maybe_close(int fd)
{
unsigned int r = rand();
- if (!(cfg_join || cfg_remove) && (r & 1))
+ if (!(cfg_join || cfg_remove || cfg_repeat > 1) && (r & 1))
close(fd);
}
@@ -940,7 +961,9 @@ int main_loop_s(int listensock)
struct pollfd polls;
socklen_t salen;
int remotesock;
+ int fd = 0;
+again:
polls.fd = listensock;
polls.events = POLLIN;
@@ -961,14 +984,27 @@ int main_loop_s(int listensock)
check_sockaddr(pf, &ss, salen);
check_getpeername(remotesock, &ss, salen);
+ if (cfg_input) {
+ fd = open(cfg_input, O_RDONLY);
+ if (fd < 0)
+ xerror("can't open %s: %d", cfg_input, errno);
+ }
+
SOCK_TEST_TCPULP(remotesock, 0);
- return copyfd_io(0, remotesock, 1);
+ copyfd_io(fd, remotesock, 1, true);
+ } else {
+ perror("accept");
+ return 1;
}
- perror("accept");
+ if (--cfg_repeat > 0) {
+ if (cfg_input)
+ close(fd);
+ goto again;
+ }
- return 1;
+ return 0;
}
static void init_rng(void)
@@ -1057,15 +1093,47 @@ static void parse_setsock_options(const char *name)
exit(1);
}
+void xdisconnect(int fd, int addrlen)
+{
+ struct sockaddr_storage empty;
+ int msec_sleep = 10;
+ int queued = 1;
+ int i;
+
+ shutdown(fd, SHUT_WR);
+
+ /* while until the pending data is completely flushed, the later
+ * disconnect will bypass/ignore/drop any pending data.
+ */
+ for (i = 0; ; i += msec_sleep) {
+ if (ioctl(fd, SIOCOUTQ, &queued) < 0)
+ xerror("can't query out socket queue: %d", errno);
+
+ if (!queued)
+ break;
+
+ if (i > poll_timeout)
+ xerror("timeout while waiting for spool to complete");
+ usleep(msec_sleep * 1000);
+ }
+
+ memset(&empty, 0, sizeof(empty));
+ empty.ss_family = AF_UNSPEC;
+ if (connect(fd, (struct sockaddr *)&empty, addrlen) < 0)
+ xerror("can't disconnect: %d", errno);
+}
+
int main_loop(void)
{
- int fd;
+ int fd, ret, fd_in = 0;
+ struct addrinfo *peer;
/* listener is ready. */
- fd = sock_connect_mptcp(cfg_host, cfg_port, cfg_sock_proto);
+ fd = sock_connect_mptcp(cfg_host, cfg_port, cfg_sock_proto, &peer);
if (fd < 0)
return 2;
+again:
check_getpeername_connect(fd);
SOCK_TEST_TCPULP(fd, cfg_sock_proto);
@@ -1077,7 +1145,31 @@ int main_loop(void)
if (cfg_cmsg_types.cmsg_enabled)
apply_cmsg_types(fd, &cfg_cmsg_types);
- return copyfd_io(0, fd, 1);
+ if (cfg_input) {
+ fd_in = open(cfg_input, O_RDONLY);
+ if (fd < 0)
+ xerror("can't open %s:%d", cfg_input, errno);
+ }
+
+ /* close the client socket open only if we are not going to reconnect */
+ ret = copyfd_io(fd_in, fd, 1, cfg_repeat == 1);
+ if (ret)
+ return ret;
+
+ if (--cfg_repeat > 0) {
+ xdisconnect(fd, peer->ai_addrlen);
+
+ /* the socket could be unblocking at this point, we need the
+ * connect to be blocking
+ */
+ set_nonblock(fd, false);
+ if (connect(fd, peer->ai_addr, peer->ai_addrlen))
+ xerror("can't reconnect: %d", errno);
+ if (cfg_input)
+ close(fd_in);
+ goto again;
+ }
+ return 0;
}
int parse_proto(const char *proto)
@@ -1162,7 +1254,7 @@ static void parse_opts(int argc, char **argv)
{
int c;
- while ((c = getopt(argc, argv, "6jr:lp:s:ht:T:m:S:R:w:M:P:c:o:")) != -1) {
+ while ((c = getopt(argc, argv, "6c:hi:I:jlm:M:o:p:P:r:R:s:S:t:T:w:")) != -1) {
switch (c) {
case 'j':
cfg_join = true;
@@ -1176,6 +1268,12 @@ static void parse_opts(int argc, char **argv)
if (cfg_do_w <= 0)
cfg_do_w = 50;
break;
+ case 'i':
+ cfg_input = optarg;
+ break;
+ case 'I':
+ cfg_repeat = atoi(optarg);
+ break;
case 'l':
listen_mode = true;
break;
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index ee28f82a6c89..cb5809b89081 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -7,6 +7,7 @@ optstring="S:R:d:e:l:r:h4cm:f:tC"
ret=0
sin=""
sout=""
+cin_disconnect=""
cin=""
cout=""
ksft_skip=4
@@ -24,6 +25,7 @@ options_log=true
do_tcp=0
checksum=false
filesize=0
+connect_per_transfer=1
if [ $tc_loss -eq 100 ];then
tc_loss=1%
@@ -127,6 +129,7 @@ TEST_COUNT=0
cleanup()
{
+ rm -f "$cin_disconnect" "$cout_disconnect"
rm -f "$cin" "$cout"
rm -f "$sin" "$sout"
rm -f "$capout"
@@ -149,6 +152,8 @@ sout=$(mktemp)
cin=$(mktemp)
cout=$(mktemp)
capout=$(mktemp)
+cin_disconnect="$cin".disconnect
+cout_disconnect="$cout".disconnect
trap cleanup EXIT
for i in "$ns1" "$ns2" "$ns3" "$ns4";do
@@ -500,8 +505,8 @@ do_transfer()
cookies=${cookies##*=}
if [ ${cl_proto} = "MPTCP" ] && [ ${srv_proto} = "MPTCP" ]; then
- expect_synrx=$((stat_synrx_last_l+1))
- expect_ackrx=$((stat_ackrx_last_l+1))
+ expect_synrx=$((stat_synrx_last_l+$connect_per_transfer))
+ expect_ackrx=$((stat_ackrx_last_l+$connect_per_transfer))
fi
if [ ${stat_synrx_now_l} -lt ${expect_synrx} ]; then
@@ -738,6 +743,33 @@ run_tests_peekmode()
run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 "-P ${peekmode}"
}
+run_tests_disconnect()
+{
+ local peekmode="$1"
+ local old_cin=$cin
+ local old_sin=$sin
+
+ cat $cin $cin $cin > "$cin".disconnect
+
+ # force do_transfer to cope with the multiple tranmissions
+ sin="$cin.disconnect"
+ sin_disconnect=$old_sin
+ cin="$cin.disconnect"
+ cin_disconnect="$old_cin"
+ connect_per_transfer=3
+
+ echo "INFO: disconnect"
+ run_tests_lo "$ns1" "$ns1" 10.0.1.1 1 "-I 3 -i $old_cin"
+ run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 "-I 3 -i $old_cin"
+
+ # restore previous status
+ cout=$old_cout
+ cout_disconnect="$cout".disconnect
+ cin=$old_cin
+ cin_disconnect="$cin".disconnect
+ connect_per_transfer=1
+}
+
display_time()
{
time_end=$(date +%s)
@@ -853,6 +885,9 @@ stop_if_error "Tests with peek mode have failed"
# connect to ns4 ip address, ns2 should intercept/proxy
run_test_transparent 10.0.3.1 "tproxy ipv4"
run_test_transparent dead:beef:3::1 "tproxy ipv6"
+stop_if_error "Tests with tproxy have failed"
+
+run_tests_disconnect
display_time
exit $ret
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 7ef639a9d4a6..27d0eb9afdca 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -238,6 +238,45 @@ is_v6()
[ -z "${1##*:*}" ]
}
+# $1: ns, $2: port
+wait_local_port_listen()
+{
+ local listener_ns="${1}"
+ local port="${2}"
+
+ local port_hex i
+
+ port_hex="$(printf "%04X" "${port}")"
+ for i in $(seq 10); do
+ ip netns exec "${listener_ns}" cat /proc/net/tcp* | \
+ awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" &&
+ break
+ sleep 0.1
+ done
+}
+
+rm_addr_count()
+{
+ ns=${1}
+
+ ip netns exec ${ns} nstat -as | grep MPTcpExtRmAddr | awk '{print $2}'
+}
+
+# $1: ns, $2: old rm_addr counter in $ns
+wait_rm_addr()
+{
+ local ns="${1}"
+ local old_cnt="${2}"
+ local cnt
+ local i
+
+ for i in $(seq 10); do
+ cnt=$(rm_addr_count ${ns})
+ [ "$cnt" = "${old_cnt}" ] || break
+ sleep 0.1
+ done
+}
+
do_transfer()
{
listener_ns="$1"
@@ -307,7 +346,7 @@ do_transfer()
fi
spid=$!
- sleep 1
+ wait_local_port_listen "${listener_ns}" "${port}"
if [ "$test_link_fail" -eq 0 ];then
timeout ${timeout_test} \
@@ -324,10 +363,13 @@ do_transfer()
fi
cpid=$!
+ # let the mptcp subflow be established in background before
+ # do endpoint manipulation
+ [ $addr_nr_ns1 = "0" -a $addr_nr_ns2 = "0" ] || sleep 1
+
if [ $addr_nr_ns1 -gt 0 ]; then
let add_nr_ns1=addr_nr_ns1
counter=2
- sleep 1
while [ $add_nr_ns1 -gt 0 ]; do
local addr
if is_v6 "${connect_addr}"; then
@@ -339,7 +381,6 @@ do_transfer()
let counter+=1
let add_nr_ns1-=1
done
- sleep 1
elif [ $addr_nr_ns1 -lt 0 ]; then
let rm_nr_ns1=-addr_nr_ns1
if [ $rm_nr_ns1 -lt 8 ]; then
@@ -347,22 +388,19 @@ do_transfer()
pos=1
dump=(`ip netns exec ${listener_ns} ./pm_nl_ctl dump`)
if [ ${#dump[@]} -gt 0 ]; then
- sleep 1
-
while [ $counter -le $rm_nr_ns1 ]
do
id=${dump[$pos]}
+ rm_addr=$(rm_addr_count ${connector_ns})
ip netns exec ${listener_ns} ./pm_nl_ctl del $id
- sleep 1
+ wait_rm_addr ${connector_ns} ${rm_addr}
let counter+=1
let pos+=5
done
fi
elif [ $rm_nr_ns1 -eq 8 ]; then
- sleep 1
ip netns exec ${listener_ns} ./pm_nl_ctl flush
elif [ $rm_nr_ns1 -eq 9 ]; then
- sleep 1
ip netns exec ${listener_ns} ./pm_nl_ctl del 0 ${connect_addr}
fi
fi
@@ -373,10 +411,13 @@ do_transfer()
addr_nr_ns2=${addr_nr_ns2:9}
fi
+ # if newly added endpoints must be deleted, give the background msk
+ # some time to created them
+ [ $addr_nr_ns1 -gt 0 -a $addr_nr_ns2 -lt 0 ] && sleep 1
+
if [ $addr_nr_ns2 -gt 0 ]; then
let add_nr_ns2=addr_nr_ns2
counter=3
- sleep 1
while [ $add_nr_ns2 -gt 0 ]; do
local addr
if is_v6 "${connect_addr}"; then
@@ -388,7 +429,6 @@ do_transfer()
let counter+=1
let add_nr_ns2-=1
done
- sleep 1
elif [ $addr_nr_ns2 -lt 0 ]; then
let rm_nr_ns2=-addr_nr_ns2
if [ $rm_nr_ns2 -lt 8 ]; then
@@ -396,19 +436,18 @@ do_transfer()
pos=1
dump=(`ip netns exec ${connector_ns} ./pm_nl_ctl dump`)
if [ ${#dump[@]} -gt 0 ]; then
- sleep 1
-
while [ $counter -le $rm_nr_ns2 ]
do
+ # rm_addr are serialized, allow the previous one to complete
id=${dump[$pos]}
+ rm_addr=$(rm_addr_count ${listener_ns})
ip netns exec ${connector_ns} ./pm_nl_ctl del $id
- sleep 1
+ wait_rm_addr ${listener_ns} ${rm_addr}
let counter+=1
let pos+=5
done
fi
elif [ $rm_nr_ns2 -eq 8 ]; then
- sleep 1
ip netns exec ${connector_ns} ./pm_nl_ctl flush
elif [ $rm_nr_ns2 -eq 9 ]; then
local addr
@@ -417,7 +456,6 @@ do_transfer()
else
addr="10.0.1.2"
fi
- sleep 1
ip netns exec ${connector_ns} ./pm_nl_ctl del 0 $addr
fi
fi
@@ -539,6 +577,14 @@ run_tests()
lret=$?
}
+dump_stats()
+{
+ echo Server ns stats
+ ip netns exec $ns1 nstat -as | grep Tcp
+ echo Client ns stats
+ ip netns exec $ns2 nstat -as | grep Tcp
+}
+
chk_csum_nr()
{
local msg=${1:-""}
@@ -570,12 +616,7 @@ chk_csum_nr()
else
echo "[ ok ]"
fi
- if [ "${dump_stats}" = 1 ]; then
- echo Server ns stats
- ip netns exec $ns1 nstat -as | grep MPTcp
- echo Client ns stats
- ip netns exec $ns2 nstat -as | grep MPTcp
- fi
+ [ "${dump_stats}" = 1 ] && dump_stats
}
chk_fail_nr()
@@ -607,12 +648,7 @@ chk_fail_nr()
echo "[ ok ]"
fi
- if [ "${dump_stats}" = 1 ]; then
- echo Server ns stats
- ip netns exec $ns1 nstat -as | grep MPTcp
- echo Client ns stats
- ip netns exec $ns2 nstat -as | grep MPTcp
- fi
+ [ "${dump_stats}" = 1 ] && dump_stats
}
chk_join_nr()
@@ -656,12 +692,7 @@ chk_join_nr()
else
echo "[ ok ]"
fi
- if [ "${dump_stats}" = 1 ]; then
- echo Server ns stats
- ip netns exec $ns1 nstat -as | grep MPTcp
- echo Client ns stats
- ip netns exec $ns2 nstat -as | grep MPTcp
- fi
+ [ "${dump_stats}" = 1 ] && dump_stats
if [ $checksum -eq 1 ]; then
chk_csum_nr
chk_fail_nr 0 0
@@ -823,12 +854,7 @@ chk_add_nr()
echo ""
fi
- if [ "${dump_stats}" = 1 ]; then
- echo Server ns stats
- ip netns exec $ns1 nstat -as | grep MPTcp
- echo Client ns stats
- ip netns exec $ns2 nstat -as | grep MPTcp
- fi
+ [ "${dump_stats}" = 1 ] && dump_stats
}
chk_rm_nr()
@@ -871,12 +897,7 @@ chk_rm_nr()
echo "[ ok ]"
fi
- if [ "${dump_stats}" = 1 ]; then
- echo Server ns stats
- ip netns exec $ns1 nstat -as | grep MPTcp
- echo Client ns stats
- ip netns exec $ns2 nstat -as | grep MPTcp
- fi
+ [ "${dump_stats}" = 1 ] && dump_stats
}
chk_prio_nr()
@@ -908,12 +929,7 @@ chk_prio_nr()
echo "[ ok ]"
fi
- if [ "${dump_stats}" = 1 ]; then
- echo Server ns stats
- ip netns exec $ns1 nstat -as | grep MPTcp
- echo Client ns stats
- ip netns exec $ns2 nstat -as | grep MPTcp
- fi
+ [ "${dump_stats}" = 1 ] && dump_stats
}
chk_link_usage()
@@ -937,6 +953,22 @@ chk_link_usage()
fi
}
+wait_for_tw()
+{
+ local timeout_ms=$((timeout_poll * 1000))
+ local time=0
+ local ns=$1
+
+ while [ $time -lt $timeout_ms ]; do
+ local cnt=$(ip netns exec $ns ss -t state time-wait |wc -l)
+
+ [ "$cnt" = 1 ] && return 1
+ time=$((time + 100))
+ sleep 0.1
+ done
+ return 1
+}
+
subflows_tests()
{
reset
@@ -994,6 +1026,61 @@ subflows_tests()
chk_join_nr "single subflow, dev" 1 1 1
}
+subflows_error_tests()
+{
+ # If a single subflow is configured, and matches the MPC src
+ # address, no additional subflow should be created
+ reset
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.1.2 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
+ chk_join_nr "no MPC reuse with single endpoint" 0 0 0
+
+ # multiple subflows, with subflow creation error
+ reset
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 2
+ ip netns exec $ns2 ./pm_nl_ctl limits 0 2
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
+ ip netns exec $ns1 iptables -A INPUT -s 10.0.3.2 -p tcp -j REJECT
+ run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
+ chk_join_nr "multi subflows, with failing subflow" 1 1 1
+
+ # multiple subflows, with subflow timeout on MPJ
+ reset
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 2
+ ip netns exec $ns2 ./pm_nl_ctl limits 0 2
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
+ ip netns exec $ns1 iptables -A INPUT -s 10.0.3.2 -p tcp -j DROP
+ run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
+ chk_join_nr "multi subflows, with subflow timeout" 1 1 1
+
+ # multiple subflows, check that the endpoint corresponding to
+ # closed subflow (due to reset) is not reused if additional
+ # subflows are added later
+ reset
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ ip netns exec $ns1 iptables -A INPUT -s 10.0.3.2 -p tcp -j REJECT
+ run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow &
+
+ # updates in the child shell do not have any effect here, we
+ # need to bump the test counter for the above case
+ TEST_COUNT=$((TEST_COUNT+1))
+
+ # mpj subflow will be in TW after the reset
+ wait_for_tw $ns2
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
+ wait
+
+ # additional subflow could be created only if the PM select
+ # the later endpoint, skipping the already used one
+ chk_join_nr "multi subflows, fair usage on close" 1 1 1
+}
+
signal_address_tests()
{
# add_address, unused
@@ -1071,7 +1158,10 @@ signal_address_tests()
ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags signal
ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags signal
run_tests $ns1 $ns2 10.0.1.1
- chk_add_nr 4 4
+
+ # the server will not signal the address terminating
+ # the MPC subflow
+ chk_add_nr 3 3
}
link_failure_tests()
@@ -1577,7 +1667,7 @@ add_addr_ports_tests()
ip netns exec $ns2 ./pm_nl_ctl limits 1 3
ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
- run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow
+ run_tests $ns1 $ns2 10.0.1.1 0 -8 -2 slow
chk_join_nr "flush subflows and signal with port" 3 3 3
chk_add_nr 1 1
chk_rm_nr 2 2
@@ -1802,6 +1892,7 @@ fullmesh_tests()
all_tests()
{
subflows_tests
+ subflows_error_tests
signal_address_tests
link_failure_tests
add_addr_timeout_tests
@@ -1821,6 +1912,7 @@ usage()
{
echo "mptcp_join usage:"
echo " -f subflows_tests"
+ echo " -e subflows_error_tests"
echo " -s signal_address_tests"
echo " -l link_failure_tests"
echo " -t add_addr_timeout_tests"
@@ -1869,11 +1961,14 @@ if [ $do_all_tests -eq 1 ]; then
exit $ret
fi
-while getopts 'fsltra64bpkdmchCS' opt; do
+while getopts 'fesltra64bpkdmchCS' opt; do
case $opt in
f)
subflows_tests
;;
+ e)
+ subflows_error_tests
+ ;;
s)
signal_address_tests
;;