The properties described here are those specific to Marvell devices.
Additional required and optional properties can be found in dsa.txt.
+The compatibility string is used only to find an identification register,
+which is at a different MDIO base address in different switch families.
+- "marvell,mv88e6085" : Switch has base address 0x10. Use with models:
+ 6085, 6095, 6097, 6123, 6131, 6141, 6161, 6165,
+ 6171, 6172, 6175, 6176, 6185, 6240, 6320, 6321,
+ 6341, 6350, 6351, 6352
+- "marvell,mv88e6190" : Switch has base address 0x00. Use with models:
+ 6190, 6190X, 6191, 6290, 6390, 6390X
+
Required properties:
- compatible : Should be one of "marvell,mv88e6085" or
- "marvell,mv88e6190"
+ "marvell,mv88e6190" as indicated above
- reg : Address on the MII bus for the switch.
Optional properties:
compatible = "marvell,mv88e6085";
reg = <0>;
reset-gpios = <&gpio5 1 GPIO_ACTIVE_LOW>;
- };
- mdio {
- #address-cells = <1>;
- #size-cells = <0>;
- switch1phy0: switch1phy0@0 {
- reg = <0>;
- interrupt-parent = <&switch0>;
- interrupts = <0 IRQ_TYPE_LEVEL_HIGH>;
+
+ mdio {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ switch1phy0: switch1phy0@0 {
+ reg = <0>;
+ interrupt-parent = <&switch0>;
+ interrupts = <0 IRQ_TYPE_LEVEL_HIGH>;
+ };
};
};
};
compatible = "marvell,mv88e6390";
reg = <0>;
reset-gpios = <&gpio5 1 GPIO_ACTIVE_LOW>;
- };
- mdio {
- #address-cells = <1>;
- #size-cells = <0>;
- switch1phy0: switch1phy0@0 {
- reg = <0>;
- interrupt-parent = <&switch0>;
- interrupts = <0 IRQ_TYPE_LEVEL_HIGH>;
+
+ mdio {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ switch1phy0: switch1phy0@0 {
+ reg = <0>;
+ interrupt-parent = <&switch0>;
+ interrupts = <0 IRQ_TYPE_LEVEL_HIGH>;
+ };
};
- };
- mdio1 {
- compatible = "marvell,mv88e6xxx-mdio-external";
- #address-cells = <1>;
- #size-cells = <0>;
- switch1phy9: switch1phy0@9 {
- reg = <9>;
+ mdio1 {
+ compatible = "marvell,mv88e6xxx-mdio-external";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ switch1phy9: switch1phy0@9 {
+ reg = <9>;
+ };
};
};
};
F: Documentation/ABI/testing/sysfs-bus-iio-potentiometer-mcp4531
F: drivers/iio/potentiometer/mcp4531.c
+MCR20A IEEE-802.15.4 RADIO DRIVER
+M: Xue Liu <liuxuenetmail@gmail.com>
+L: linux-wpan@vger.kernel.org
+W: https://github.com/xueliu/mcr20a-linux
+S: Maintained
+F: drivers/net/ieee802154/mcr20a.c
+F: drivers/net/ieee802154/mcr20a.h
+F: Documentation/devicetree/bindings/net/ieee802154/mcr20a.txt
+
MEASUREMENT COMPUTING CIO-DAC IIO DRIVER
M: William Breathitt Gray <vilhelm.gray@gmail.com>
L: linux-iio@vger.kernel.org
F: include/linux/platform_data/microchip-ksz.h
F: Documentation/devicetree/bindings/net/dsa/ksz.txt
+MICROCHIP LAN743X ETHERNET DRIVER
+M: Bryan Whitehead <bryan.whitehead@microchip.com>
+M: Microchip Linux Driver Support <UNGLinuxDriver@microchip.com>
+L: netdev@vger.kernel.org
+S: Maintained
+F: drivers/net/ethernet/microchip/lan743x_*
+
MICROCHIP USB251XB DRIVER
M: Richard Leitner <richard.leitner@skidata.com>
L: linux-usb@vger.kernel.org
F: include/linux/nvmem-consumer.h
F: include/linux/nvmem-provider.h
+ NXP SGTL5000 DRIVER
+ M: Fabio Estevam <fabio.estevam@nxp.com>
+ L: alsa-devel@alsa-project.org (moderated for non-subscribers)
+ S: Maintained
+ F: Documentation/devicetree/bindings/sound/sgtl5000.txt
+ F: sound/soc/codecs/sgtl5000*
+
NXP TDA998X DRM DRIVER
M: Russell King <linux@armlinux.org.uk>
S: Supported
F: include/linux/oprofile.h
ORACLE CLUSTER FILESYSTEM 2 (OCFS2)
- M: Mark Fasheh <mfasheh@versity.com>
+ M: Mark Fasheh <mark@fasheh.com>
M: Joel Becker <jlbec@evilplan.org>
L: ocfs2-devel@oss.oracle.com (moderated for non-subscribers)
W: http://ocfs2.wiki.kernel.org
PER-CPU MEMORY ALLOCATOR
M: Tejun Heo <tj@kernel.org>
M: Christoph Lameter <cl@linux.com>
+ M: Dennis Zhou <dennisszhou@gmail.com>
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu.git
S: Maintained
F: include/linux/percpu*.h
L: alsa-devel@alsa-project.org (moderated for non-subscribers)
S: Supported
F: sound/soc/samsung/
+ F: Documentation/devicetree/bindings/sound/samsung*
SAMSUNG EXYNOS PSEUDO RANDOM NUMBER GENERATOR (RNG) DRIVER
M: Krzysztof Kozlowski <krzk@kernel.org>
#include <linux/netdevice.h>
#include <linux/filter.h>
#include <linux/if_vlan.h>
-#include <asm/cacheflush.h>
+#include <linux/bpf.h>
+
#include <asm/set_memory.h>
#include <asm/nospec-branch.h>
-#include <linux/bpf.h>
/*
* assembly code in arch/x86/net/bpf_jit.S
static bool is_simm32(s64 value)
{
- return value == (s64) (s32) value;
+ return value == (s64)(s32)value;
+}
+
+static bool is_uimm32(u64 value)
+{
+ return value == (u64)(u32)value;
}
/* mov dst, src */
#define X86_JLE 0x7E
#define X86_JG 0x7F
-static void bpf_flush_icache(void *start, void *end)
-{
- mm_segment_t old_fs = get_fs();
-
- set_fs(KERNEL_DS);
- smp_wmb();
- flush_icache_range((unsigned long)start, (unsigned long)end);
- set_fs(old_fs);
-}
-
#define CHOOSE_LOAD_FUNC(K, func) \
((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
/* emit x64 prologue code for BPF program and check it's size.
* bpf_tail_call helper will skip it while jumping into another program
*/
-static void emit_prologue(u8 **pprog, u32 stack_depth)
+static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
{
u8 *prog = *pprog;
int cnt = 0;
/* mov qword ptr [rbp+24],r15 */
EMIT4(0x4C, 0x89, 0x7D, 24);
- /* Clear the tail call counter (tail_call_cnt): for eBPF tail calls
- * we need to reset the counter to 0. It's done in two instructions,
- * resetting rax register to 0 (xor on eax gets 0 extended), and
- * moving it to the counter location.
- */
+ if (!ebpf_from_cbpf) {
+ /* Clear the tail call counter (tail_call_cnt): for eBPF tail
+ * calls we need to reset the counter to 0. It's done in two
+ * instructions, resetting rax register to 0, and moving it
+ * to the counter location.
+ */
- /* xor eax, eax */
- EMIT2(0x31, 0xc0);
- /* mov qword ptr [rbp+32], rax */
- EMIT4(0x48, 0x89, 0x45, 32);
+ /* xor eax, eax */
+ EMIT2(0x31, 0xc0);
+ /* mov qword ptr [rbp+32], rax */
+ EMIT4(0x48, 0x89, 0x45, 32);
+
+ BUILD_BUG_ON(cnt != PROLOGUE_SIZE);
+ }
- BUILD_BUG_ON(cnt != PROLOGUE_SIZE);
*pprog = prog;
}
*pprog = prog;
}
+static void emit_mov_imm32(u8 **pprog, bool sign_propagate,
+ u32 dst_reg, const u32 imm32)
+{
+ u8 *prog = *pprog;
+ u8 b1, b2, b3;
+ int cnt = 0;
+
+ /* optimization: if imm32 is positive, use 'mov %eax, imm32'
+ * (which zero-extends imm32) to save 2 bytes.
+ */
+ if (sign_propagate && (s32)imm32 < 0) {
+ /* 'mov %rax, imm32' sign extends imm32 */
+ b1 = add_1mod(0x48, dst_reg);
+ b2 = 0xC7;
+ b3 = 0xC0;
+ EMIT3_off32(b1, b2, add_1reg(b3, dst_reg), imm32);
+ goto done;
+ }
+
+ /* optimization: if imm32 is zero, use 'xor %eax, %eax'
+ * to save 3 bytes.
+ */
+ if (imm32 == 0) {
+ if (is_ereg(dst_reg))
+ EMIT1(add_2mod(0x40, dst_reg, dst_reg));
+ b2 = 0x31; /* xor */
+ b3 = 0xC0;
+ EMIT2(b2, add_2reg(b3, dst_reg, dst_reg));
+ goto done;
+ }
+
+ /* mov %eax, imm32 */
+ if (is_ereg(dst_reg))
+ EMIT1(add_1mod(0x40, dst_reg));
+ EMIT1_off32(add_1reg(0xB8, dst_reg), imm32);
+done:
+ *pprog = prog;
+}
+
+static void emit_mov_imm64(u8 **pprog, u32 dst_reg,
+ const u32 imm32_hi, const u32 imm32_lo)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+
+ if (is_uimm32(((u64)imm32_hi << 32) | (u32)imm32_lo)) {
+ /* For emitting plain u32, where sign bit must not be
+ * propagated LLVM tends to load imm64 over mov32
+ * directly, so save couple of bytes by just doing
+ * 'mov %eax, imm32' instead.
+ */
+ emit_mov_imm32(&prog, false, dst_reg, imm32_lo);
+ } else {
+ /* movabsq %rax, imm64 */
+ EMIT2(add_1mod(0x48, dst_reg), add_1reg(0xB8, dst_reg));
+ EMIT(imm32_lo, 4);
+ EMIT(imm32_hi, 4);
+ }
+
+ *pprog = prog;
+}
+
+static void emit_mov_reg(u8 **pprog, bool is64, u32 dst_reg, u32 src_reg)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+
+ if (is64) {
+ /* mov dst, src */
+ EMIT_mov(dst_reg, src_reg);
+ } else {
+ /* mov32 dst, src */
+ if (is_ereg(dst_reg) || is_ereg(src_reg))
+ EMIT1(add_2mod(0x40, dst_reg, src_reg));
+ EMIT2(0x89, add_2reg(0xC0, dst_reg, src_reg));
+ }
+
+ *pprog = prog;
+}
+
static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
int oldproglen, struct jit_context *ctx)
{
int proglen = 0;
u8 *prog = temp;
- emit_prologue(&prog, bpf_prog->aux->stack_depth);
+ emit_prologue(&prog, bpf_prog->aux->stack_depth,
+ bpf_prog_was_classic(bpf_prog));
if (seen_ld_abs)
emit_load_skb_data_hlen(&prog);
const s32 imm32 = insn->imm;
u32 dst_reg = insn->dst_reg;
u32 src_reg = insn->src_reg;
- u8 b1 = 0, b2 = 0, b3 = 0;
+ u8 b2 = 0, b3 = 0;
s64 jmp_offset;
u8 jmp_cond;
bool reload_skb_data;
EMIT2(b2, add_2reg(0xC0, dst_reg, src_reg));
break;
- /* mov dst, src */
case BPF_ALU64 | BPF_MOV | BPF_X:
- EMIT_mov(dst_reg, src_reg);
- break;
-
- /* mov32 dst, src */
case BPF_ALU | BPF_MOV | BPF_X:
- if (is_ereg(dst_reg) || is_ereg(src_reg))
- EMIT1(add_2mod(0x40, dst_reg, src_reg));
- EMIT2(0x89, add_2reg(0xC0, dst_reg, src_reg));
+ emit_mov_reg(&prog,
+ BPF_CLASS(insn->code) == BPF_ALU64,
+ dst_reg, src_reg);
break;
/* neg dst */
break;
case BPF_ALU64 | BPF_MOV | BPF_K:
- /* optimization: if imm32 is positive,
- * use 'mov eax, imm32' (which zero-extends imm32)
- * to save 2 bytes
- */
- if (imm32 < 0) {
- /* 'mov rax, imm32' sign extends imm32 */
- b1 = add_1mod(0x48, dst_reg);
- b2 = 0xC7;
- b3 = 0xC0;
- EMIT3_off32(b1, b2, add_1reg(b3, dst_reg), imm32);
- break;
- }
-
case BPF_ALU | BPF_MOV | BPF_K:
- /* optimization: if imm32 is zero, use 'xor <dst>,<dst>'
- * to save 3 bytes.
- */
- if (imm32 == 0) {
- if (is_ereg(dst_reg))
- EMIT1(add_2mod(0x40, dst_reg, dst_reg));
- b2 = 0x31; /* xor */
- b3 = 0xC0;
- EMIT2(b2, add_2reg(b3, dst_reg, dst_reg));
- break;
- }
-
- /* mov %eax, imm32 */
- if (is_ereg(dst_reg))
- EMIT1(add_1mod(0x40, dst_reg));
- EMIT1_off32(add_1reg(0xB8, dst_reg), imm32);
+ emit_mov_imm32(&prog, BPF_CLASS(insn->code) == BPF_ALU64,
+ dst_reg, imm32);
break;
case BPF_LD | BPF_IMM | BPF_DW:
- /* optimization: if imm64 is zero, use 'xor <dst>,<dst>'
- * to save 7 bytes.
- */
- if (insn[0].imm == 0 && insn[1].imm == 0) {
- b1 = add_2mod(0x48, dst_reg, dst_reg);
- b2 = 0x31; /* xor */
- b3 = 0xC0;
- EMIT3(b1, b2, add_2reg(b3, dst_reg, dst_reg));
-
- insn++;
- i++;
- break;
- }
-
- /* movabsq %rax, imm64 */
- EMIT2(add_1mod(0x48, dst_reg), add_1reg(0xB8, dst_reg));
- EMIT(insn[0].imm, 4);
- EMIT(insn[1].imm, 4);
-
+ emit_mov_imm64(&prog, dst_reg, insn[1].imm, insn[0].imm);
insn++;
i++;
break;
case BPF_ALU | BPF_MUL | BPF_X:
case BPF_ALU64 | BPF_MUL | BPF_K:
case BPF_ALU64 | BPF_MUL | BPF_X:
- EMIT1(0x50); /* push rax */
- EMIT1(0x52); /* push rdx */
+ {
+ bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
+
+ if (dst_reg != BPF_REG_0)
+ EMIT1(0x50); /* push rax */
+ if (dst_reg != BPF_REG_3)
+ EMIT1(0x52); /* push rdx */
/* mov r11, dst_reg */
EMIT_mov(AUX_REG, dst_reg);
if (BPF_SRC(insn->code) == BPF_X)
- /* mov rax, src_reg */
- EMIT_mov(BPF_REG_0, src_reg);
+ emit_mov_reg(&prog, is64, BPF_REG_0, src_reg);
else
- /* mov rax, imm32 */
- EMIT3_off32(0x48, 0xC7, 0xC0, imm32);
+ emit_mov_imm32(&prog, is64, BPF_REG_0, imm32);
- if (BPF_CLASS(insn->code) == BPF_ALU64)
+ if (is64)
EMIT1(add_1mod(0x48, AUX_REG));
else if (is_ereg(AUX_REG))
EMIT1(add_1mod(0x40, AUX_REG));
/* mul(q) r11 */
EMIT2(0xF7, add_1reg(0xE0, AUX_REG));
- /* mov r11, rax */
- EMIT_mov(AUX_REG, BPF_REG_0);
-
- EMIT1(0x5A); /* pop rdx */
- EMIT1(0x58); /* pop rax */
-
- /* mov dst_reg, r11 */
- EMIT_mov(dst_reg, AUX_REG);
+ if (dst_reg != BPF_REG_3)
+ EMIT1(0x5A); /* pop rdx */
+ if (dst_reg != BPF_REG_0) {
+ /* mov dst_reg, rax */
+ EMIT_mov(dst_reg, BPF_REG_0);
+ EMIT1(0x58); /* pop rax */
+ }
break;
-
+ }
/* shifts */
case BPF_ALU | BPF_LSH | BPF_K:
case BPF_ALU | BPF_RSH | BPF_K:
case BPF_RSH: b3 = 0xE8; break;
case BPF_ARSH: b3 = 0xF8; break;
}
- EMIT3(0xC1, add_1reg(b3, dst_reg), imm32);
+
+ if (imm32 == 1)
+ EMIT2(0xD1, add_1reg(b3, dst_reg));
+ else
+ EMIT3(0xC1, add_1reg(b3, dst_reg), imm32);
break;
case BPF_ALU | BPF_LSH | BPF_X:
* may converge on the last pass. In such case do one more
* pass to emit the final image
*/
- for (pass = 0; pass < 10 || image; pass++) {
+ for (pass = 0; pass < 20 || image; pass++) {
proglen = do_jit(prog, addrs, image, oldproglen, &ctx);
if (proglen <= 0) {
image = NULL;
}
}
oldproglen = proglen;
+ cond_resched();
}
if (bpf_jit_enable > 1)
bpf_jit_dump(prog->len, proglen, pass + 1, image);
if (image) {
- bpf_flush_icache(header, image + proglen);
if (!prog->is_func || extra_pass) {
bpf_jit_binary_lock_ro(header);
} else {
{ USB_DEVICE(0x0930, 0x0227), .driver_info = BTUSB_ATH3012 },
{ USB_DEVICE(0x0b05, 0x17d0), .driver_info = BTUSB_ATH3012 },
{ USB_DEVICE(0x0cf3, 0x0036), .driver_info = BTUSB_ATH3012 },
- { USB_DEVICE(0x0cf3, 0x3004), .driver_info = BTUSB_ATH3012 },
{ USB_DEVICE(0x0cf3, 0x3008), .driver_info = BTUSB_ATH3012 },
{ USB_DEVICE(0x0cf3, 0x311d), .driver_info = BTUSB_ATH3012 },
{ USB_DEVICE(0x0cf3, 0x311e), .driver_info = BTUSB_ATH3012 },
{ USB_DEVICE(0x0489, 0xe03c), .driver_info = BTUSB_ATH3012 },
/* QCA ROME chipset */
+ { USB_DEVICE(0x0cf3, 0x3004), .driver_info = BTUSB_QCA_ROME },
{ USB_DEVICE(0x0cf3, 0xe007), .driver_info = BTUSB_QCA_ROME },
{ USB_DEVICE(0x0cf3, 0xe009), .driver_info = BTUSB_QCA_ROME },
{ USB_DEVICE(0x0cf3, 0xe010), .driver_info = BTUSB_QCA_ROME },
/* Intel Bluetooth devices */
{ USB_DEVICE(0x8087, 0x0025), .driver_info = BTUSB_INTEL_NEW },
+ { USB_DEVICE(0x8087, 0x0026), .driver_info = BTUSB_INTEL_NEW },
{ USB_DEVICE(0x8087, 0x07da), .driver_info = BTUSB_CSR },
{ USB_DEVICE(0x8087, 0x07dc), .driver_info = BTUSB_INTEL },
{ USB_DEVICE(0x8087, 0x0a2a), .driver_info = BTUSB_INTEL },
{ USB_DEVICE(0x13d3, 0x3461), .driver_info = BTUSB_REALTEK },
{ USB_DEVICE(0x13d3, 0x3462), .driver_info = BTUSB_REALTEK },
+ /* Additional Realtek 8822BE Bluetooth devices */
+ { USB_DEVICE(0x0b05, 0x185c), .driver_info = BTUSB_REALTEK },
+
/* Silicon Wave based devices */
{ USB_DEVICE(0x0c10, 0x0000), .driver_info = BTUSB_SWAVE },
*/
static const struct dmi_system_id btusb_needs_reset_resume_table[] = {
{
- /* Lenovo Yoga 920 (QCA Rome device 0cf3:e300) */
+ /* Dell OptiPlex 3060 (QCA ROME device 0cf3:e007) */
.matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
- DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo YOGA 920"),
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 3060"),
},
},
{}
case 0x0c: /* WsP */
case 0x11: /* JfP */
case 0x12: /* ThP */
+ case 0x13: /* HrP */
+ case 0x14: /* QnJ, IcP */
break;
default:
BT_ERR("%s: Unsupported Intel hardware variant (%u)",
break;
case 0x11: /* JfP */
case 0x12: /* ThP */
+ case 0x13: /* HrP */
+ case 0x14: /* QnJ, IcP */
snprintf(fwname, sizeof(fwname), "intel/ibt-%u-%u-%u.sfi",
le16_to_cpu(ver.hw_variant),
le16_to_cpu(ver.hw_revision),
break;
case 0x11: /* JfP */
case 0x12: /* ThP */
+ case 0x13: /* HrP */
+ case 0x14: /* QnJ, IcP */
snprintf(fwname, sizeof(fwname), "intel/ibt-%u-%u-%u.ddc",
le16_to_cpu(ver.hw_variant),
le16_to_cpu(ver.hw_revision),
IPV6_ADDR_LINKLOCAL;
struct rt6_info *rt = rt6_lookup(dev_net(net_dev), &dst_addr->sin6_addr,
&src_addr->sin6_addr, net_dev->ifindex,
- strict);
+ NULL, strict);
bool ret;
if (!rt)
continue;
/* different dest port -> unique */
- if (!cma_any_port(cur_daddr) &&
+ if (!cma_any_port(daddr) &&
+ !cma_any_port(cur_daddr) &&
(dport != cur_dport))
continue;
continue;
/* different dst address -> unique */
- if (!cma_any_addr(cur_daddr) &&
+ if (!cma_any_addr(daddr) &&
+ !cma_any_addr(cur_daddr) &&
cma_addr_cmp(daddr, cur_daddr))
continue;
}
#endif
}
+ daddr = cma_dst_addr(id_priv);
+ daddr->sa_family = addr->sa_family;
+
ret = cma_get_port(id_priv);
if (ret)
goto err2;
- daddr = cma_dst_addr(id_priv);
- daddr->sa_family = addr->sa_family;
-
return 0;
err2:
if (id_priv->cma_dev)
struct cma_multicast *mc;
int ret;
+ if (!id->device)
+ return -EINVAL;
+
id_priv = container_of(id, struct rdma_id_private, id);
if (!cma_comp(id_priv, RDMA_CM_ADDR_BOUND) &&
!cma_comp(id_priv, RDMA_CM_ADDR_RESOLVED))
.exit = cma_exit_net,
.id = &cma_pernet_id,
.size = sizeof(struct cma_pernet),
+ .async = true,
};
static int __init cma_init(void)
}
}
-static void *get_cqe_from_buf(struct mlx5_ib_cq_buf *buf, int n, int size)
-{
- return mlx5_buf_offset(&buf->buf, n * size);
-}
-
static void *get_cqe(struct mlx5_ib_cq *cq, int n)
{
- return get_cqe_from_buf(&cq->buf, n, cq->mcq.cqe_sz);
+ return mlx5_frag_buf_get_wqe(&cq->buf.fbc, n);
}
static u8 sw_ownership_bit(int n, int nent)
wc->ex.invalidate_rkey = be32_to_cpu(cqe->imm_inval_pkey);
break;
}
- wc->slid = be16_to_cpu(cqe->slid);
wc->src_qp = be32_to_cpu(cqe->flags_rqpn) & 0xffffff;
wc->dlid_path_bits = cqe->ml_path;
g = (be32_to_cpu(cqe->flags_rqpn) >> 28) & 3;
}
if (ll != IB_LINK_LAYER_ETHERNET) {
+ wc->slid = be16_to_cpu(cqe->slid);
wc->sl = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0xf;
return;
}
+ wc->slid = 0;
vlan_present = cqe->l4_l3_hdr_type & 0x1;
roce_packet_type = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0x3;
if (vlan_present) {
static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf)
{
- mlx5_buf_free(dev->mdev, &buf->buf);
+ mlx5_frag_buf_free(dev->mdev, &buf->fbc.frag_buf);
}
static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe,
return ret;
}
-static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf,
- int nent, int cqe_size)
+static int alloc_cq_frag_buf(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_cq_buf *buf,
+ int nent,
+ int cqe_size)
{
+ struct mlx5_frag_buf_ctrl *c = &buf->fbc;
+ struct mlx5_frag_buf *frag_buf = &c->frag_buf;
+ u32 cqc_buff[MLX5_ST_SZ_DW(cqc)] = {0};
int err;
- err = mlx5_buf_alloc(dev->mdev, nent * cqe_size, &buf->buf);
+ MLX5_SET(cqc, cqc_buff, log_cq_size, ilog2(cqe_size));
+ MLX5_SET(cqc, cqc_buff, cqe_sz, (cqe_size == 128) ? 1 : 0);
+
+ mlx5_core_init_cq_frag_buf(&buf->fbc, cqc_buff);
+
+ err = mlx5_frag_buf_alloc_node(dev->mdev,
+ nent * cqe_size,
+ frag_buf,
+ dev->mdev->priv.numa_node);
if (err)
return err;
ib_umem_release(cq->buf.umem);
}
-static void init_cq_buf(struct mlx5_ib_cq *cq, struct mlx5_ib_cq_buf *buf)
+static void init_cq_frag_buf(struct mlx5_ib_cq *cq,
+ struct mlx5_ib_cq_buf *buf)
{
int i;
void *cqe;
struct mlx5_cqe64 *cqe64;
for (i = 0; i < buf->nent; i++) {
- cqe = get_cqe_from_buf(buf, i, buf->cqe_size);
+ cqe = get_cqe(cq, i);
cqe64 = buf->cqe_size == 64 ? cqe : cqe + 64;
cqe64->op_own = MLX5_CQE_INVALID << 4;
}
cq->mcq.arm_db = cq->db.db + 1;
cq->mcq.cqe_sz = cqe_size;
- err = alloc_cq_buf(dev, &cq->buf, entries, cqe_size);
+ err = alloc_cq_frag_buf(dev, &cq->buf, entries, cqe_size);
if (err)
goto err_db;
- init_cq_buf(cq, &cq->buf);
+ init_cq_frag_buf(cq, &cq->buf);
*inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
- MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * cq->buf.buf.npages;
+ MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) *
+ cq->buf.fbc.frag_buf.npages;
*cqb = kvzalloc(*inlen, GFP_KERNEL);
if (!*cqb) {
err = -ENOMEM;
}
pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas);
- mlx5_fill_page_array(&cq->buf.buf, pas);
+ mlx5_fill_page_frag_array(&cq->buf.fbc.frag_buf, pas);
cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context);
MLX5_SET(cqc, cqc, log_page_size,
- cq->buf.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+ cq->buf.fbc.frag_buf.page_shift -
+ MLX5_ADAPTER_PAGE_SHIFT);
*index = dev->mdev->priv.uar->index;
if (ucmd.reserved0 || ucmd.reserved1)
return -EINVAL;
- umem = ib_umem_get(context, ucmd.buf_addr, entries * ucmd.cqe_size,
+ /* check multiplication overflow */
+ if (ucmd.cqe_size && SIZE_MAX / ucmd.cqe_size <= entries - 1)
+ return -EINVAL;
+
+ umem = ib_umem_get(context, ucmd.buf_addr,
+ (size_t)ucmd.cqe_size * entries,
IB_ACCESS_LOCAL_WRITE, 1);
if (IS_ERR(umem)) {
err = PTR_ERR(umem);
if (!cq->resize_buf)
return -ENOMEM;
- err = alloc_cq_buf(dev, cq->resize_buf, entries, cqe_size);
+ err = alloc_cq_frag_buf(dev, cq->resize_buf, entries, cqe_size);
if (err)
goto ex;
- init_cq_buf(cq, cq->resize_buf);
+ init_cq_frag_buf(cq, cq->resize_buf);
return 0;
}
while ((scqe64->op_own >> 4) != MLX5_CQE_RESIZE_CQ) {
- dcqe = get_cqe_from_buf(cq->resize_buf,
- (i + 1) & (cq->resize_buf->nent),
- dsize);
+ dcqe = mlx5_frag_buf_get_wqe(&cq->resize_buf->fbc,
+ (i + 1) & cq->resize_buf->nent);
dcqe64 = dsize == 64 ? dcqe : dcqe + 64;
sw_own = sw_ownership_bit(i + 1, cq->resize_buf->nent);
memcpy(dcqe, scqe, dsize);
cqe_size = 64;
err = resize_kernel(dev, cq, entries, cqe_size);
if (!err) {
- npas = cq->resize_buf->buf.npages;
- page_shift = cq->resize_buf->buf.page_shift;
+ struct mlx5_frag_buf_ctrl *c;
+
+ c = &cq->resize_buf->fbc;
+ npas = c->frag_buf.npages;
+ page_shift = c->frag_buf.page_shift;
}
}
mlx5_ib_populate_pas(dev, cq->resize_umem, page_shift,
pas, 0);
else
- mlx5_fill_page_array(&cq->resize_buf->buf, pas);
+ mlx5_fill_page_frag_array(&cq->resize_buf->fbc.frag_buf,
+ pas);
MLX5_SET(modify_cq_in, in,
modify_field_select_resize_field_select.resize_field_select.resize_field_select,
--- /dev/null
- STAGE_CREATE(MLX5_IB_STAGE_UMR_RESOURCES,
- mlx5_ib_stage_umr_res_init,
- mlx5_ib_stage_umr_res_cleanup),
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ */
+
+#include "ib_rep.h"
+
+static const struct mlx5_ib_profile rep_profile = {
+ STAGE_CREATE(MLX5_IB_STAGE_INIT,
+ mlx5_ib_stage_init_init,
+ mlx5_ib_stage_init_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB,
+ mlx5_ib_stage_rep_flow_db_init,
+ NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_CAPS,
+ mlx5_ib_stage_caps_init,
+ NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB,
+ mlx5_ib_stage_rep_non_default_cb,
+ NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_ROCE,
+ mlx5_ib_stage_rep_roce_init,
+ mlx5_ib_stage_rep_roce_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
+ mlx5_ib_stage_dev_res_init,
+ mlx5_ib_stage_dev_res_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
+ mlx5_ib_stage_counters_init,
+ mlx5_ib_stage_counters_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_BFREG,
+ mlx5_ib_stage_bfrag_init,
+ mlx5_ib_stage_bfrag_cleanup),
++ STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR,
++ NULL,
++ mlx5_ib_stage_pre_ib_reg_umr_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
+ mlx5_ib_stage_ib_reg_init,
+ mlx5_ib_stage_ib_reg_cleanup),
++ STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR,
++ mlx5_ib_stage_post_ib_reg_umr_init,
++ NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_CLASS_ATTR,
+ mlx5_ib_stage_class_attr_init,
+ NULL),
+};
+
+static int
+mlx5_ib_nic_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
+{
+ return 0;
+}
+
+static void
+mlx5_ib_nic_rep_unload(struct mlx5_eswitch_rep *rep)
+{
+ rep->rep_if[REP_IB].priv = NULL;
+}
+
+static int
+mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
+{
+ struct mlx5_ib_dev *ibdev;
+
+ ibdev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*ibdev));
+ if (!ibdev)
+ return -ENOMEM;
+
+ ibdev->rep = rep;
+ ibdev->mdev = dev;
+ ibdev->num_ports = max(MLX5_CAP_GEN(dev, num_ports),
+ MLX5_CAP_GEN(dev, num_vhca_ports));
+ if (!__mlx5_ib_add(ibdev, &rep_profile))
+ return -EINVAL;
+
+ rep->rep_if[REP_IB].priv = ibdev;
+
+ return 0;
+}
+
+static void
+mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
+{
+ struct mlx5_ib_dev *dev;
+
+ if (!rep->rep_if[REP_IB].priv)
+ return;
+
+ dev = mlx5_ib_rep_to_dev(rep);
+ __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
+ rep->rep_if[REP_IB].priv = NULL;
+}
+
+static void *mlx5_ib_vport_get_proto_dev(struct mlx5_eswitch_rep *rep)
+{
+ return mlx5_ib_rep_to_dev(rep);
+}
+
+static void mlx5_ib_rep_register_vf_vports(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
+ int total_vfs = MLX5_TOTAL_VPORTS(dev->mdev);
+ int vport;
+
+ for (vport = 1; vport < total_vfs; vport++) {
+ struct mlx5_eswitch_rep_if rep_if = {};
+
+ rep_if.load = mlx5_ib_vport_rep_load;
+ rep_if.unload = mlx5_ib_vport_rep_unload;
+ rep_if.get_proto_dev = mlx5_ib_vport_get_proto_dev;
+ mlx5_eswitch_register_vport_rep(esw, vport, &rep_if, REP_IB);
+ }
+}
+
+static void mlx5_ib_rep_unregister_vf_vports(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
+ int total_vfs = MLX5_TOTAL_VPORTS(dev->mdev);
+ int vport;
+
+ for (vport = 1; vport < total_vfs; vport++)
+ mlx5_eswitch_unregister_vport_rep(esw, vport, REP_IB);
+}
+
+void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
+ struct mlx5_eswitch_rep_if rep_if = {};
+
+ rep_if.load = mlx5_ib_nic_rep_load;
+ rep_if.unload = mlx5_ib_nic_rep_unload;
+ rep_if.get_proto_dev = mlx5_ib_vport_get_proto_dev;
+ rep_if.priv = dev;
+
+ mlx5_eswitch_register_vport_rep(esw, 0, &rep_if, REP_IB);
+
+ mlx5_ib_rep_register_vf_vports(dev);
+}
+
+void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
+
+ mlx5_ib_rep_unregister_vf_vports(dev); /* VFs vports */
+ mlx5_eswitch_unregister_vport_rep(esw, 0, REP_IB); /* UPLINK PF*/
+}
+
+u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw)
+{
+ return mlx5_eswitch_mode(esw);
+}
+
+struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw,
+ int vport_index)
+{
+ return mlx5_eswitch_get_proto_dev(esw, vport_index, REP_IB);
+}
+
+struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
+ int vport_index)
+{
+ return mlx5_eswitch_get_proto_dev(esw, vport_index, REP_ETH);
+}
+
+struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw)
+{
+ return mlx5_eswitch_uplink_get_proto_dev(esw, REP_IB);
+}
+
+struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw, int vport)
+{
+ return mlx5_eswitch_vport_rep(esw, vport);
+}
+
+int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_sq *sq)
+{
+ struct mlx5_flow_handle *flow_rule;
+ struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
+
+ if (!dev->rep)
+ return 0;
+
+ flow_rule =
+ mlx5_eswitch_add_send_to_vport_rule(esw,
+ dev->rep->vport,
+ sq->base.mqp.qpn);
+ if (IS_ERR(flow_rule))
+ return PTR_ERR(flow_rule);
+ sq->flow_rule = flow_rule;
+
+ return 0;
+}
#include <linux/in.h>
#include <linux/etherdevice.h>
#include "mlx5_ib.h"
+#include "ib_rep.h"
#include "cmd.h"
+#include <linux/mlx5/fs_helpers.h>
#define DRIVER_NAME "mlx5_ib"
#define DRIVER_VERSION "5.0-0"
int ret;
memset(&attr, 0, sizeof(attr));
- ret = mlx5_ib_query_port(ibdev, port_num, &attr);
+ ret = ibdev->query_port(ibdev, port_num, &attr);
if (!ret)
*state = attr.state;
return ret;
case NETDEV_REGISTER:
case NETDEV_UNREGISTER:
write_lock(&roce->netdev_lock);
-
- if (ndev->dev.parent == &mdev->pdev->dev)
- roce->netdev = (event == NETDEV_UNREGISTER) ?
+ if (ibdev->rep) {
+ struct mlx5_eswitch *esw = ibdev->mdev->priv.eswitch;
+ struct net_device *rep_ndev;
+
+ rep_ndev = mlx5_ib_get_rep_netdev(esw,
+ ibdev->rep->vport);
+ if (rep_ndev == ndev)
+ roce->netdev = (event == NETDEV_UNREGISTER) ?
NULL : ndev;
+ } else if (ndev->dev.parent == &ibdev->mdev->pdev->dev) {
+ roce->netdev = (event == NETDEV_UNREGISTER) ?
+ NULL : ndev;
+ }
write_unlock(&roce->netdev_lock);
break;
struct mlx5_ib_multiport_info *mpi;
struct mlx5_ib_port *port;
+ if (!mlx5_core_mp_enabled(ibdev->mdev) ||
+ ll != IB_LINK_LAYER_ETHERNET) {
+ if (native_port_num)
+ *native_port_num = ib_port_num;
+ return ibdev->mdev;
+ }
+
if (native_port_num)
*native_port_num = 1;
- if (!mlx5_core_mp_enabled(ibdev->mdev) || ll != IB_LINK_LAYER_ETHERNET)
- return ibdev->mdev;
-
port = &ibdev->port[ib_port_num - 1];
if (!port)
return NULL;
return ret;
}
+static int mlx5_ib_rep_query_port(struct ib_device *ibdev, u8 port,
+ struct ib_port_attr *props)
+{
+ int ret;
+
+ /* Only link layer == ethernet is valid for representors */
+ ret = mlx5_query_port_roce(ibdev, port, props);
+ if (ret || !props)
+ return ret;
+
+ /* We don't support GIDS */
+ props->gid_tbl_len = 0;
+
+ return ret;
+}
+
static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
union ib_gid *gid)
{
offsetof(typeof(filter), field) -\
sizeof(filter.field))
-#define IPV4_VERSION 4
-#define IPV6_VERSION 6
static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
u32 *match_v, const union ib_flow_spec *ib_spec,
- u32 *tag_id, bool *is_drop)
+ struct mlx5_flow_act *action)
{
void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
misc_parameters);
MLX5_SET(fte_match_set_lyr_2_4, headers_c,
ip_version, 0xf);
MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- ip_version, IPV4_VERSION);
+ ip_version, MLX5_FS_IPV4_VERSION);
} else {
MLX5_SET(fte_match_set_lyr_2_4, headers_c,
ethertype, 0xffff);
MLX5_SET(fte_match_set_lyr_2_4, headers_c,
ip_version, 0xf);
MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- ip_version, IPV6_VERSION);
+ ip_version, MLX5_FS_IPV6_VERSION);
} else {
MLX5_SET(fte_match_set_lyr_2_4, headers_c,
ethertype, 0xffff);
if (ib_spec->flow_tag.tag_id >= BIT(24))
return -EINVAL;
- *tag_id = ib_spec->flow_tag.tag_id;
+ action->flow_tag = ib_spec->flow_tag.tag_id;
+ action->has_flow_tag = true;
break;
case IB_FLOW_SPEC_ACTION_DROP:
if (FIELDS_NOT_SUPPORTED(ib_spec->drop,
LAST_DROP_FIELD))
return -EOPNOTSUPP;
- *is_drop = true;
+ action->action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
break;
default:
return -EINVAL;
ibflow);
struct mlx5_ib_flow_handler *iter, *tmp;
- mutex_lock(&dev->flow_db.lock);
+ mutex_lock(&dev->flow_db->lock);
list_for_each_entry_safe(iter, tmp, &handler->list, list) {
mlx5_del_flow_rules(iter->rule);
mlx5_del_flow_rules(handler->rule);
put_flow_table(dev, handler->prio, true);
- mutex_unlock(&dev->flow_db.lock);
+ mutex_unlock(&dev->flow_db->lock);
kfree(handler);
MLX5_FLOW_NAMESPACE_BYPASS);
num_entries = MLX5_FS_MAX_ENTRIES;
num_groups = MLX5_FS_MAX_TYPES;
- prio = &dev->flow_db.prios[priority];
+ prio = &dev->flow_db->prios[priority];
} else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
ns = mlx5_get_flow_namespace(dev->mdev,
build_leftovers_ft_param(&priority,
&num_entries,
&num_groups);
- prio = &dev->flow_db.prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
+ prio = &dev->flow_db->prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
} else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
if (!MLX5_CAP_FLOWTABLE(dev->mdev,
allow_sniffer_and_nic_rx_shared_tir))
MLX5_FLOW_NAMESPACE_SNIFFER_RX :
MLX5_FLOW_NAMESPACE_SNIFFER_TX);
- prio = &dev->flow_db.sniffer[ft_type];
+ prio = &dev->flow_db->sniffer[ft_type];
priority = 0;
num_entries = 1;
num_groups = 1;
{
struct mlx5_flow_table *ft = ft_prio->flow_table;
struct mlx5_ib_flow_handler *handler;
- struct mlx5_flow_act flow_act = {0};
+ struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG};
struct mlx5_flow_spec *spec;
struct mlx5_flow_destination *rule_dst = dst;
const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr);
unsigned int spec_index;
- u32 flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
- bool is_drop = false;
int err = 0;
int dest_num = 1;
for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
err = parse_flow_attr(dev->mdev, spec->match_criteria,
spec->match_value,
- ib_flow, &flow_tag, &is_drop);
+ ib_flow, &flow_act);
if (err < 0)
goto free;
if (!flow_is_multicast_only(flow_attr))
set_underlay_qp(dev, spec, underlay_qpn);
+ if (dev->rep) {
+ void *misc;
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
+ MLX5_SET(fte_match_set_misc, misc, source_port,
+ dev->rep->vport);
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+ }
+
spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
- if (is_drop) {
- flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
+ if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) {
rule_dst = NULL;
dest_num = 0;
} else {
MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
}
- if (flow_tag != MLX5_FS_DEFAULT_FLOW_TAG &&
+ if (flow_act.has_flow_tag &&
(flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n",
- flow_tag, flow_attr->type);
+ flow_act.flow_tag, flow_attr->type);
err = -EINVAL;
goto free;
}
- flow_act.flow_tag = flow_tag;
handler->rule = mlx5_add_flow_rules(ft, spec,
&flow_act,
rule_dst, dest_num);
if (!dst)
return ERR_PTR(-ENOMEM);
- mutex_lock(&dev->flow_db.lock);
+ mutex_lock(&dev->flow_db->lock);
ft_prio = get_flow_table(dev, flow_attr, MLX5_IB_FT_RX);
if (IS_ERR(ft_prio)) {
goto destroy_ft;
}
- mutex_unlock(&dev->flow_db.lock);
+ mutex_unlock(&dev->flow_db->lock);
kfree(dst);
return &handler->ibflow;
if (ft_prio_tx)
put_flow_table(dev, ft_prio_tx, false);
unlock:
- mutex_unlock(&dev->flow_db.lock);
+ mutex_unlock(&dev->flow_db->lock);
kfree(dst);
kfree(handler);
return ERR_PTR(err);
struct mlx5_ib_dev *ibdev;
struct ib_event ibev;
bool fatal = false;
- u8 port = 0;
+ u8 port = (u8)work->param;
if (mlx5_core_is_mp_slave(work->dev)) {
ibdev = mlx5_ib_get_ibdev_from_mpi(work->context);
case MLX5_DEV_EVENT_PORT_UP:
case MLX5_DEV_EVENT_PORT_DOWN:
case MLX5_DEV_EVENT_PORT_INITIALIZED:
- port = (u8)work->param;
-
/* In RoCE, port up/down events are handled in
* mlx5_netdev_event().
*/
case MLX5_DEV_EVENT_LID_CHANGE:
ibev.event = IB_EVENT_LID_CHANGE;
- port = (u8)work->param;
break;
case MLX5_DEV_EVENT_PKEY_CHANGE:
ibev.event = IB_EVENT_PKEY_CHANGE;
- port = (u8)work->param;
-
schedule_work(&ibdev->devr.ports[port - 1].pkey_change_work);
break;
case MLX5_DEV_EVENT_GUID_CHANGE:
ibev.event = IB_EVENT_GID_CHANGE;
- port = (u8)work->param;
break;
case MLX5_DEV_EVENT_CLIENT_REREG:
ibev.event = IB_EVENT_CLIENT_REREGISTER;
- port = (u8)work->param;
break;
case MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT:
schedule_work(&ibdev->delay_drop.delay_drop_work);
ibev.device = &ibdev->ib_dev;
ibev.element.port_num = port;
- if (port < 1 || port > ibdev->num_ports) {
+ if (!rdma_is_port_valid(&ibdev->ib_dev, port)) {
mlx5_ib_warn(ibdev, "warning: event on port %d\n", port);
goto out;
}
return 0;
}
+static int mlx5_port_rep_immutable(struct ib_device *ibdev, u8 port_num,
+ struct ib_port_immutable *immutable)
+{
+ struct ib_port_attr attr;
+ int err;
+
+ immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET;
+
+ err = ib_query_port(ibdev, port_num, &attr);
+ if (err)
+ return err;
+
+ immutable->pkey_tbl_len = attr.pkey_tbl_len;
+ immutable->gid_tbl_len = attr.gid_tbl_len;
+ immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET;
+
+ return 0;
+}
+
static void get_dev_fw_str(struct ib_device *ibdev, char *str)
{
struct mlx5_ib_dev *dev =
goto err_destroy_vport_lag;
}
- dev->flow_db.lag_demux_ft = ft;
+ dev->flow_db->lag_demux_ft = ft;
return 0;
err_destroy_vport_lag:
{
struct mlx5_core_dev *mdev = dev->mdev;
- if (dev->flow_db.lag_demux_ft) {
- mlx5_destroy_flow_table(dev->flow_db.lag_demux_ft);
- dev->flow_db.lag_demux_ft = NULL;
+ if (dev->flow_db->lag_demux_ft) {
+ mlx5_destroy_flow_table(dev->flow_db->lag_demux_ft);
+ dev->flow_db->lag_demux_ft = NULL;
mlx5_cmd_destroy_vport_lag(mdev);
}
{
int err;
- err = mlx5_add_netdev_notifier(dev, port_num);
- if (err)
- return err;
-
if (MLX5_CAP_GEN(dev->mdev, roce)) {
err = mlx5_nic_vport_enable_roce(dev->mdev);
if (err)
- goto err_unregister_netdevice_notifier;
+ return err;
}
err = mlx5_eth_lag_init(dev);
if (MLX5_CAP_GEN(dev->mdev, roce))
mlx5_nic_vport_disable_roce(dev->mdev);
-err_unregister_netdevice_notifier:
- mlx5_remove_netdev_notifier(dev, port_num);
return err;
}
mlx5_nic_vport_disable_roce(dev->mdev);
}
-static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
{
mlx5_ib_cleanup_multiport_master(dev);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
kfree(dev->port);
}
-static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
const char *name;
goto err_free_port;
if (!mlx5_core_mp_enabled(mdev)) {
- int i;
-
for (i = 1; i <= dev->num_ports; i++) {
err = get_port_caps(dev, i);
if (err)
dev->mdev->priv.eq_table.num_comp_vectors;
dev->ib_dev.dev.parent = &mdev->pdev->dev;
- mutex_init(&dev->flow_db.lock);
mutex_init(&dev->cap_mask_mutex);
INIT_LIST_HEAD(&dev->qp_list);
spin_lock_init(&dev->reset_flow_resource_lock);
return -ENOMEM;
}
-static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
+static int mlx5_ib_stage_flow_db_init(struct mlx5_ib_dev *dev)
+{
+ dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL);
+
+ if (!dev->flow_db)
+ return -ENOMEM;
+
+ mutex_init(&dev->flow_db->lock);
+
+ return 0;
+}
+
+int mlx5_ib_stage_rep_flow_db_init(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_ib_dev *nic_dev;
+
+ nic_dev = mlx5_ib_get_uplink_ibdev(dev->mdev->priv.eswitch);
+
+ if (!nic_dev)
+ return -EINVAL;
+
+ dev->flow_db = nic_dev->flow_db;
+
+ return 0;
+}
+
+static void mlx5_ib_stage_flow_db_cleanup(struct mlx5_ib_dev *dev)
+{
+ kfree(dev->flow_db);
+}
+
+int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
int err;
(1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ);
dev->ib_dev.query_device = mlx5_ib_query_device;
- dev->ib_dev.query_port = mlx5_ib_query_port;
dev->ib_dev.get_link_layer = mlx5_ib_port_link_layer;
dev->ib_dev.query_gid = mlx5_ib_query_gid;
dev->ib_dev.add_gid = mlx5_ib_add_gid;
dev->ib_dev.alloc_mr = mlx5_ib_alloc_mr;
dev->ib_dev.map_mr_sg = mlx5_ib_map_mr_sg;
dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status;
- dev->ib_dev.get_port_immutable = mlx5_port_immutable;
dev->ib_dev.get_dev_fw_str = get_dev_fw_str;
dev->ib_dev.get_vector_affinity = mlx5_ib_get_vector_affinity;
if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads))
return 0;
}
+static int mlx5_ib_stage_non_default_cb(struct mlx5_ib_dev *dev)
+{
+ dev->ib_dev.get_port_immutable = mlx5_port_immutable;
+ dev->ib_dev.query_port = mlx5_ib_query_port;
+
+ return 0;
+}
+
+int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev)
+{
+ dev->ib_dev.get_port_immutable = mlx5_port_rep_immutable;
+ dev->ib_dev.query_port = mlx5_ib_rep_query_port;
+
+ return 0;
+}
+
+static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev,
+ u8 port_num)
+{
+ int i;
+
+ for (i = 0; i < dev->num_ports; i++) {
+ dev->roce[i].dev = dev;
+ dev->roce[i].native_port_num = i + 1;
+ dev->roce[i].last_port_state = IB_PORT_DOWN;
+ }
+
+ dev->ib_dev.get_netdev = mlx5_ib_get_netdev;
+ dev->ib_dev.create_wq = mlx5_ib_create_wq;
+ dev->ib_dev.modify_wq = mlx5_ib_modify_wq;
+ dev->ib_dev.destroy_wq = mlx5_ib_destroy_wq;
+ dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table;
+ dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table;
+
+ dev->ib_dev.uverbs_ex_cmd_mask |=
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
+ (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
+ (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
+ (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
+
+ return mlx5_add_netdev_notifier(dev, port_num);
+}
+
+static void mlx5_ib_stage_common_roce_cleanup(struct mlx5_ib_dev *dev)
+{
+ u8 port_num = mlx5_core_native_port_num(dev->mdev) - 1;
+
+ mlx5_remove_netdev_notifier(dev, port_num);
+}
+
+int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_core_dev *mdev = dev->mdev;
+ enum rdma_link_layer ll;
+ int port_type_cap;
+ int err = 0;
+ u8 port_num;
+
+ port_num = mlx5_core_native_port_num(dev->mdev) - 1;
+ port_type_cap = MLX5_CAP_GEN(mdev, port_type);
+ ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
+
+ if (ll == IB_LINK_LAYER_ETHERNET)
+ err = mlx5_ib_stage_common_roce_init(dev, port_num);
+
+ return err;
+}
+
+void mlx5_ib_stage_rep_roce_cleanup(struct mlx5_ib_dev *dev)
+{
+ mlx5_ib_stage_common_roce_cleanup(dev);
+}
+
static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
int port_type_cap;
u8 port_num;
int err;
- int i;
port_num = mlx5_core_native_port_num(dev->mdev) - 1;
port_type_cap = MLX5_CAP_GEN(mdev, port_type);
ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
if (ll == IB_LINK_LAYER_ETHERNET) {
- for (i = 0; i < dev->num_ports; i++) {
- dev->roce[i].dev = dev;
- dev->roce[i].native_port_num = i + 1;
- dev->roce[i].last_port_state = IB_PORT_DOWN;
- }
+ err = mlx5_ib_stage_common_roce_init(dev, port_num);
+ if (err)
+ return err;
- dev->ib_dev.get_netdev = mlx5_ib_get_netdev;
- dev->ib_dev.create_wq = mlx5_ib_create_wq;
- dev->ib_dev.modify_wq = mlx5_ib_modify_wq;
- dev->ib_dev.destroy_wq = mlx5_ib_destroy_wq;
- dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table;
- dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table;
- dev->ib_dev.uverbs_ex_cmd_mask |=
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
- (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
- (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
- (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
err = mlx5_enable_eth(dev, port_num);
if (err)
- return err;
+ goto cleanup;
}
return 0;
+cleanup:
+ mlx5_ib_stage_common_roce_cleanup(dev);
+
+ return err;
}
static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev)
if (ll == IB_LINK_LAYER_ETHERNET) {
mlx5_disable_eth(dev);
- mlx5_remove_netdev_notifier(dev, port_num);
+ mlx5_ib_stage_common_roce_cleanup(dev);
}
}
-static int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev)
{
return create_dev_resources(&dev->devr);
}
-static void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev)
{
destroy_dev_resources(&dev->devr);
}
return mlx5_ib_odp_init_one(dev);
}
-static int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev)
{
if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {
dev->ib_dev.get_hw_stats = mlx5_ib_get_hw_stats;
return 0;
}
-static void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev)
{
if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
mlx5_ib_dealloc_counters(dev);
mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar);
}
-static int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev)
{
int err;
return err;
}
-static void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev)
{
mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg);
mlx5_free_bfreg(dev->mdev, &dev->bfreg);
}
-static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
{
return ib_register_device(&dev->ib_dev, NULL);
}
- void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev)
-static void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev)
++void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev)
{
- ib_unregister_device(&dev->ib_dev);
+ destroy_umrc_res(dev);
}
- int mlx5_ib_stage_umr_res_init(struct mlx5_ib_dev *dev)
-static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev)
++void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev)
{
- return create_umr_res(dev);
+ ib_unregister_device(&dev->ib_dev);
}
- void mlx5_ib_stage_umr_res_cleanup(struct mlx5_ib_dev *dev)
-static int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev)
++int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev)
{
- destroy_umrc_res(dev);
+ return create_umr_res(dev);
}
static int mlx5_ib_stage_delay_drop_init(struct mlx5_ib_dev *dev)
cancel_delay_drop(dev);
}
-static int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev)
{
int err;
int i;
return 0;
}
-static void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
- const struct mlx5_ib_profile *profile,
- int stage)
+static int mlx5_ib_stage_rep_reg_init(struct mlx5_ib_dev *dev)
+{
+ mlx5_ib_register_vport_reps(dev);
+
+ return 0;
+}
+
+static void mlx5_ib_stage_rep_reg_cleanup(struct mlx5_ib_dev *dev)
+{
+ mlx5_ib_unregister_vport_reps(dev);
+}
+
+void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
+ const struct mlx5_ib_profile *profile,
+ int stage)
{
/* Number of stages to cleanup */
while (stage) {
static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num);
-static void *__mlx5_ib_add(struct mlx5_core_dev *mdev,
- const struct mlx5_ib_profile *profile)
+void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
+ const struct mlx5_ib_profile *profile)
{
- struct mlx5_ib_dev *dev;
int err;
int i;
printk_once(KERN_INFO "%s", mlx5_version);
- dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
- if (!dev)
- return NULL;
-
- dev->mdev = mdev;
- dev->num_ports = max(MLX5_CAP_GEN(mdev, num_ports),
- MLX5_CAP_GEN(mdev, num_vhca_ports));
-
for (i = 0; i < MLX5_IB_STAGE_MAX; i++) {
if (profile->stage[i].init) {
err = profile->stage[i].init(dev);
STAGE_CREATE(MLX5_IB_STAGE_INIT,
mlx5_ib_stage_init_init,
mlx5_ib_stage_init_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB,
+ mlx5_ib_stage_flow_db_init,
+ mlx5_ib_stage_flow_db_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_CAPS,
mlx5_ib_stage_caps_init,
NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB,
+ mlx5_ib_stage_non_default_cb,
+ NULL),
STAGE_CREATE(MLX5_IB_STAGE_ROCE,
mlx5_ib_stage_roce_init,
mlx5_ib_stage_roce_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_BFREG,
mlx5_ib_stage_bfrag_init,
mlx5_ib_stage_bfrag_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR,
+ NULL,
+ mlx5_ib_stage_pre_ib_reg_umr_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
mlx5_ib_stage_ib_reg_init,
mlx5_ib_stage_ib_reg_cleanup),
- STAGE_CREATE(MLX5_IB_STAGE_UMR_RESOURCES,
- mlx5_ib_stage_umr_res_init,
- mlx5_ib_stage_umr_res_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR,
+ mlx5_ib_stage_post_ib_reg_umr_init,
+ NULL),
STAGE_CREATE(MLX5_IB_STAGE_DELAY_DROP,
mlx5_ib_stage_delay_drop_init,
mlx5_ib_stage_delay_drop_cleanup),
NULL),
};
- STAGE_CREATE(MLX5_IB_STAGE_UMR_RESOURCES,
- mlx5_ib_stage_umr_res_init,
- mlx5_ib_stage_umr_res_cleanup),
+static const struct mlx5_ib_profile nic_rep_profile = {
+ STAGE_CREATE(MLX5_IB_STAGE_INIT,
+ mlx5_ib_stage_init_init,
+ mlx5_ib_stage_init_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB,
+ mlx5_ib_stage_flow_db_init,
+ mlx5_ib_stage_flow_db_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_CAPS,
+ mlx5_ib_stage_caps_init,
+ NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB,
+ mlx5_ib_stage_rep_non_default_cb,
+ NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_ROCE,
+ mlx5_ib_stage_rep_roce_init,
+ mlx5_ib_stage_rep_roce_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
+ mlx5_ib_stage_dev_res_init,
+ mlx5_ib_stage_dev_res_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
+ mlx5_ib_stage_counters_init,
+ mlx5_ib_stage_counters_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_UAR,
+ mlx5_ib_stage_uar_init,
+ mlx5_ib_stage_uar_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_BFREG,
+ mlx5_ib_stage_bfrag_init,
+ mlx5_ib_stage_bfrag_cleanup),
++ STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR,
++ NULL,
++ mlx5_ib_stage_pre_ib_reg_umr_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
+ mlx5_ib_stage_ib_reg_init,
+ mlx5_ib_stage_ib_reg_cleanup),
++ STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR,
++ mlx5_ib_stage_post_ib_reg_umr_init,
++ NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_CLASS_ATTR,
+ mlx5_ib_stage_class_attr_init,
+ NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_REP_REG,
+ mlx5_ib_stage_rep_reg_init,
+ mlx5_ib_stage_rep_reg_cleanup),
+};
+
static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num)
{
struct mlx5_ib_multiport_info *mpi;
static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
{
enum rdma_link_layer ll;
+ struct mlx5_ib_dev *dev;
int port_type_cap;
+ printk_once(KERN_INFO "%s", mlx5_version);
+
port_type_cap = MLX5_CAP_GEN(mdev, port_type);
ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
return mlx5_ib_add_slave_port(mdev, port_num);
}
- return __mlx5_ib_add(mdev, &pf_profile);
+ dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
+ if (!dev)
+ return NULL;
+
+ dev->mdev = mdev;
+ dev->num_ports = max(MLX5_CAP_GEN(mdev, num_ports),
+ MLX5_CAP_GEN(mdev, num_vhca_ports));
+
+ if (MLX5_VPORT_MANAGER(mdev) &&
+ mlx5_ib_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) {
+ dev->rep = mlx5_ib_vport_rep(mdev->priv.eswitch, 0);
+
+ return __mlx5_ib_add(dev, &nic_rep_profile);
+ }
+
+ return __mlx5_ib_add(dev, &pf_profile);
}
static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
struct mlx5_ib_wq *sq;
struct mlx5_ib_ubuffer ubuffer;
struct mlx5_db *doorbell;
+ struct mlx5_flow_handle *flow_rule;
u32 tisn;
u8 state;
};
struct mlx5_ib_rss_qp rss_qp;
struct mlx5_ib_dct dct;
};
- struct mlx5_buf buf;
+ struct mlx5_frag_buf buf;
struct mlx5_db db;
struct mlx5_ib_wq rq;
};
struct mlx5_ib_cq_buf {
- struct mlx5_buf buf;
+ struct mlx5_frag_buf_ctrl fbc;
struct ib_umem *umem;
int cqe_size;
int nent;
struct mlx5_ib_srq {
struct ib_srq ibsrq;
struct mlx5_core_srq msrq;
- struct mlx5_buf buf;
+ struct mlx5_frag_buf buf;
struct mlx5_db db;
u64 *wrid;
/* protect SRQ hanlding
enum mlx5_ib_stages {
MLX5_IB_STAGE_INIT,
+ MLX5_IB_STAGE_FLOW_DB,
MLX5_IB_STAGE_CAPS,
+ MLX5_IB_STAGE_NON_DEFAULT_CB,
MLX5_IB_STAGE_ROCE,
MLX5_IB_STAGE_DEVICE_RESOURCES,
MLX5_IB_STAGE_ODP,
MLX5_IB_STAGE_CONG_DEBUGFS,
MLX5_IB_STAGE_UAR,
MLX5_IB_STAGE_BFREG,
+ MLX5_IB_STAGE_PRE_IB_REG_UMR,
MLX5_IB_STAGE_IB_REG,
- MLX5_IB_STAGE_UMR_RESOURCES,
+ MLX5_IB_STAGE_POST_IB_REG_UMR,
MLX5_IB_STAGE_DELAY_DROP,
MLX5_IB_STAGE_CLASS_ATTR,
+ MLX5_IB_STAGE_REP_REG,
MLX5_IB_STAGE_MAX,
};
struct srcu_struct mr_srcu;
u32 null_mkey;
#endif
- struct mlx5_ib_flow_db flow_db;
+ struct mlx5_ib_flow_db *flow_db;
/* protect resources needed as part of reset flow */
spinlock_t reset_flow_resource_lock;
struct list_head qp_list;
struct mlx5_sq_bfreg fp_bfreg;
struct mlx5_ib_delay_drop delay_drop;
const struct mlx5_ib_profile *profile;
+ struct mlx5_eswitch_rep *rep;
/* protect the user_td */
struct mutex lb_mutex;
#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
- int mlx5_ib_stage_umr_res_init(struct mlx5_ib_dev *dev);
- void mlx5_ib_stage_umr_res_cleanup(struct mlx5_ib_dev *dev);
+/* Needed for rep profile */
+int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_rep_flow_db_init(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_rep_roce_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev);
++void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev);
++int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev);
+void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
+ const struct mlx5_ib_profile *profile,
+ int stage);
+void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
+ const struct mlx5_ib_profile *profile);
+
int mlx5_ib_get_vf_config(struct ib_device *device, int vf,
u8 port, struct ifla_vf_info *info);
int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf,
static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
{
- if (!mlx5_debugfs_root)
+ if (!mlx5_debugfs_root || dev->rep)
return;
debugfs_remove_recursive(dev->cache.root);
struct mlx5_cache_ent *ent;
int i;
- if (!mlx5_debugfs_root)
+ if (!mlx5_debugfs_root || dev->rep)
return 0;
cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root);
MLX5_IB_UMR_OCTOWORD;
ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) &&
+ !dev->rep &&
mlx5_core_is_pf(dev->mdev))
ent->limit = dev->mdev->profile->mr_cache[i].limit;
else
*umem = ib_umem_get(pd->uobject->context, start, length,
access_flags, 0);
err = PTR_ERR_OR_ZERO(*umem);
- if (err < 0) {
+ if (err) {
+ *umem = NULL;
mlx5_ib_err(dev, "umem get failed (%d)\n", err);
return err;
}
if (err) {
mlx5_ib_warn(dev, "Failed to rereg UMR\n");
ib_umem_release(mr->umem);
+ mr->umem = NULL;
clean_mr(dev, mr);
return err;
}
u32 key = mr->mmkey.key;
err = destroy_mkey(dev, mr);
- kfree(mr);
if (err) {
mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
key, err);
return err;
}
- } else {
- mlx5_mr_cache_free(dev, mr);
}
return 0;
atomic_sub(npages, &dev->mdev->priv.reg_pages);
}
+ if (!mr->allocated_from_cache)
+ kfree(mr);
+ else
+ mlx5_mr_cache_free(dev, mr);
+
return 0;
}
mr->ibmr.iova = sg_dma_address(sg) + sg_offset;
mr->ibmr.length = 0;
- mr->ndescs = sg_nents;
for_each_sg(sgl, sg, sg_nents, i) {
if (unlikely(i >= mr->max_descs))
sg_offset = 0;
}
+ mr->ndescs = i;
if (sg_offset_p)
*sg_offset_p = sg_offset;
#include <rdma/ib_user_verbs.h>
#include <linux/mlx5/fs.h>
#include "mlx5_ib.h"
+#include "ib_rep.h"
/* not supported currently */
static int wq_signature;
mlx5_core_destroy_tis(dev->mdev, sq->tisn);
}
+static void destroy_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_sq *sq)
+{
+ if (sq->flow_rule)
+ mlx5_del_flow_rules(sq->flow_rule);
+}
+
static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
struct mlx5_ib_sq *sq, void *qpin,
struct ib_pd *pd)
if (err)
goto err_umem;
+ err = create_flow_rule_vport_sq(dev, sq);
+ if (err)
+ goto err_flow;
+
return 0;
+err_flow:
+ mlx5_core_destroy_sq_tracked(dev->mdev, &sq->base.mqp);
+
err_umem:
ib_umem_release(sq->ubuffer.umem);
sq->ubuffer.umem = NULL;
static void destroy_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
struct mlx5_ib_sq *sq)
{
+ destroy_flow_rule_vport_sq(dev, sq);
mlx5_core_destroy_sq_tracked(dev->mdev, &sq->base.mqp);
ib_umem_release(sq->ubuffer.umem);
}
- static int get_rq_pas_size(void *qpc)
+ static size_t get_rq_pas_size(void *qpc)
{
u32 log_page_size = MLX5_GET(qpc, qpc, log_page_size) + 12;
u32 log_rq_stride = MLX5_GET(qpc, qpc, log_rq_stride);
}
static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
- struct mlx5_ib_rq *rq, void *qpin)
+ struct mlx5_ib_rq *rq, void *qpin,
+ size_t qpinlen)
{
struct mlx5_ib_qp *mqp = rq->base.container_mibqp;
__be64 *pas;
void *rqc;
void *wq;
void *qpc = MLX5_ADDR_OF(create_qp_in, qpin, qpc);
- int inlen;
+ size_t rq_pas_size = get_rq_pas_size(qpc);
+ size_t inlen;
int err;
- u32 rq_pas_size = get_rq_pas_size(qpc);
+
+ if (qpinlen < rq_pas_size + MLX5_BYTE_OFF(create_qp_in, pas))
+ return -EINVAL;
inlen = MLX5_ST_SZ_BYTES(create_rq_in) + rq_pas_size;
in = kvzalloc(inlen, GFP_KERNEL);
if (tunnel_offload_en)
MLX5_SET(tirc, tirc, tunneled_offload_en, 1);
+ if (dev->rep)
+ MLX5_SET(tirc, tirc, self_lb_block,
+ MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_);
+
err = mlx5_core_create_tir(dev->mdev, in, inlen, &rq->tirn);
kvfree(in);
}
static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
- u32 *in,
+ u32 *in, size_t inlen,
struct ib_pd *pd)
{
struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
rq->flags |= MLX5_IB_RQ_CVLAN_STRIPPING;
if (qp->flags & MLX5_IB_QP_PCI_WRITE_END_PADDING)
rq->flags |= MLX5_IB_RQ_PCI_WRITE_END_PADDING;
- err = create_raw_packet_qp_rq(dev, rq, in);
+ err = create_raw_packet_qp_rq(dev, rq, in, inlen);
if (err)
goto err_destroy_sq;
MLX5_SET(rx_hash_field_select, hfso, selected_fields, selected_fields);
create_tir:
+ if (dev->rep)
+ MLX5_SET(tirc, tirc, self_lb_block,
+ MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_);
+
err = mlx5_core_create_tir(dev->mdev, in, inlen, &qp->rss_qp.tirn);
if (err)
u32 uidx = MLX5_IB_DEFAULT_UIDX;
struct mlx5_ib_create_qp ucmd;
struct mlx5_ib_qp_base *base;
+ int mlx5_st;
void *qpc;
u32 *in;
int err;
spin_lock_init(&qp->sq.lock);
spin_lock_init(&qp->rq.lock);
+ mlx5_st = to_mlx5_st(init_attr->qp_type);
+ if (mlx5_st < 0)
+ return -EINVAL;
+
if (init_attr->rwq_ind_tbl) {
if (!udata)
return -ENOSYS;
qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
- MLX5_SET(qpc, qpc, st, to_mlx5_st(init_attr->qp_type));
+ MLX5_SET(qpc, qpc, st, mlx5_st);
MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
if (init_attr->qp_type != MLX5_IB_QPT_REG_UMR)
}
}
+ if (inlen < 0) {
+ err = -EINVAL;
+ goto err;
+ }
+
if (init_attr->qp_type == IB_QPT_RAW_PACKET ||
qp->flags & MLX5_IB_QP_UNDERLAY) {
qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd.sq_buf_addr;
raw_packet_qp_copy_info(qp, &qp->raw_packet_qp);
- err = create_raw_packet_qp(dev, qp, in, pd);
+ err = create_raw_packet_qp(dev, qp, in, inlen, pd);
} else {
err = mlx5_core_create_qp(dev->mdev, &base->mqp, in, inlen);
}
struct ib_qp_init_attr *attr,
struct mlx5_ib_create_qp *ucmd)
{
- struct mlx5_ib_dev *dev;
struct mlx5_ib_qp *qp;
int err = 0;
u32 uidx = MLX5_IB_DEFAULT_UIDX;
if (!attr->srq || !attr->recv_cq)
return ERR_PTR(-EINVAL);
- dev = to_mdev(pd->device);
-
err = get_qp_user_index(to_mucontext(pd->uobject->context),
ucmd, sizeof(*ucmd), &uidx);
if (err)
goto out;
if (mlx5_cur >= MLX5_QP_NUM_STATE || mlx5_new >= MLX5_QP_NUM_STATE ||
- !optab[mlx5_cur][mlx5_new])
+ !optab[mlx5_cur][mlx5_new]) {
+ err = -EINVAL;
goto out;
+ }
op = optab[mlx5_cur][mlx5_new];
optpar = ib_mask_to_mlx5_opt(attr_mask);
unsigned int i;
for (i = 0; i < mib_size; i++)
- memcpy(data + i * ETH_GSTRING_LEN,
- mibs[i].name, ETH_GSTRING_LEN);
+ strlcpy(data + i * ETH_GSTRING_LEN,
+ mibs[i].name, ETH_GSTRING_LEN);
}
EXPORT_SYMBOL(b53_get_strings);
}
EXPORT_SYMBOL(b53_get_ethtool_stats);
-int b53_get_sset_count(struct dsa_switch *ds)
+int b53_get_sset_count(struct dsa_switch *ds, int port)
{
struct b53_device *dev = ds->priv;
#include "t4fw_api.h"
#include "t4fw_version.h"
#include "cxgb4_dcb.h"
+#include "srq.h"
#include "cxgb4_debugfs.h"
#include "clip_tbl.h"
#include "l2t.h"
case 40000:
s = "40Gbps";
break;
+ case 50000:
+ s = "50Gbps";
+ break;
case 100000:
s = "100Gbps";
break;
const struct cpl_abort_rpl_rss *p = (void *)rsp;
hash_del_filter_rpl(q->adap, p);
+ } else if (opcode == CPL_SRQ_TABLE_RPL) {
+ const struct cpl_srq_table_rpl *p = (void *)rsp;
+
+ do_srq_table_rpl(q->adap, p);
} else
dev_err(q->adap->pdev_dev,
"unexpected CPL %#x on FW event queue\n", opcode);
int cxgb4_read_tpte(struct net_device *dev, u32 stag, __be32 *tpte)
{
- struct adapter *adap;
- u32 offset, memtype, memaddr;
u32 edc0_size, edc1_size, mc0_size, mc1_size, size;
u32 edc0_end, edc1_end, mc0_end, mc1_end;
+ u32 offset, memtype, memaddr;
+ struct adapter *adap;
+ u32 hma_size = 0;
int ret;
adap = netdev2adap(dev);
size = t4_read_reg(adap, MA_EXT_MEMORY0_BAR_A);
mc0_size = EXT_MEM0_SIZE_G(size) << 20;
+ if (t4_read_reg(adap, MA_TARGET_MEM_ENABLE_A) & HMA_MUX_F) {
+ size = t4_read_reg(adap, MA_EXT_MEMORY1_BAR_A);
+ hma_size = EXT_MEM1_SIZE_G(size) << 20;
+ }
edc0_end = edc0_size;
edc1_end = edc0_end + edc1_size;
mc0_end = edc1_end + mc0_size;
memtype = MEM_EDC1;
memaddr = offset - edc0_end;
} else {
- if (offset < mc0_end) {
+ if (hma_size && (offset < (edc1_end + hma_size))) {
+ memtype = MEM_HMA;
+ memaddr = offset - edc1_end;
+ } else if (offset < mc0_end) {
memtype = MEM_MC0;
memaddr = offset - edc1_end;
} else if (is_t5(adap->params.chip)) {
/* Convert from Mbps to Kbps */
req_rate = rate << 10;
- /* Max rate is 10 Gbps */
+ /* Max rate is 100 Gbps */
if (req_rate >= SCHED_MAX_RATE_KBPS) {
dev_err(adap->pdev_dev,
- "Invalid rate %u Mbps, Max rate is %u Gbps\n",
- rate, SCHED_MAX_RATE_KBPS);
+ "Invalid rate %u Mbps, Max rate is %u Mbps\n",
+ rate, SCHED_MAX_RATE_KBPS >> 10);
return -ERANGE;
}
.get_drvinfo = cxgb4_mgmt_get_drvinfo,
};
+static void notify_fatal_err(struct work_struct *work)
+{
+ struct adapter *adap;
+
+ adap = container_of(work, struct adapter, fatal_err_notify_task);
+ notify_ulds(adap, CXGB4_STATE_FATAL_ERROR);
+}
+
void t4_fatal_err(struct adapter *adap)
{
int port;
netif_carrier_off(dev);
}
dev_alert(adap->pdev_dev, "encountered fatal error, adapter stopped\n");
+ queue_work(adap->workq, &adap->fatal_err_notify_task);
}
static void setup_memwin(struct adapter *adap)
}
}
+/* HMA Definitions */
+
+/* The maximum number of address that can be send in a single FW cmd */
+#define HMA_MAX_ADDR_IN_CMD 5
+
+#define HMA_PAGE_SIZE PAGE_SIZE
+
+#define HMA_MAX_NO_FW_ADDRESS (16 << 10) /* FW supports 16K addresses */
+
+#define HMA_PAGE_ORDER \
+ ((HMA_PAGE_SIZE < HMA_MAX_NO_FW_ADDRESS) ? \
+ ilog2(HMA_MAX_NO_FW_ADDRESS / HMA_PAGE_SIZE) : 0)
+
+/* The minimum and maximum possible HMA sizes that can be specified in the FW
+ * configuration(in units of MB).
+ */
+#define HMA_MIN_TOTAL_SIZE 1
+#define HMA_MAX_TOTAL_SIZE \
+ (((HMA_PAGE_SIZE << HMA_PAGE_ORDER) * \
+ HMA_MAX_NO_FW_ADDRESS) >> 20)
+
+static void adap_free_hma_mem(struct adapter *adapter)
+{
+ struct scatterlist *iter;
+ struct page *page;
+ int i;
+
+ if (!adapter->hma.sgt)
+ return;
+
+ if (adapter->hma.flags & HMA_DMA_MAPPED_FLAG) {
+ dma_unmap_sg(adapter->pdev_dev, adapter->hma.sgt->sgl,
+ adapter->hma.sgt->nents, PCI_DMA_BIDIRECTIONAL);
+ adapter->hma.flags &= ~HMA_DMA_MAPPED_FLAG;
+ }
+
+ for_each_sg(adapter->hma.sgt->sgl, iter,
+ adapter->hma.sgt->orig_nents, i) {
+ page = sg_page(iter);
+ if (page)
+ __free_pages(page, HMA_PAGE_ORDER);
+ }
+
+ kfree(adapter->hma.phy_addr);
+ sg_free_table(adapter->hma.sgt);
+ kfree(adapter->hma.sgt);
+ adapter->hma.sgt = NULL;
+}
+
+static int adap_config_hma(struct adapter *adapter)
+{
+ struct scatterlist *sgl, *iter;
+ struct sg_table *sgt;
+ struct page *newpage;
+ unsigned int i, j, k;
+ u32 param, hma_size;
+ unsigned int ncmds;
+ size_t page_size;
+ u32 page_order;
+ int node, ret;
+
+ /* HMA is supported only for T6+ cards.
+ * Avoid initializing HMA in kdump kernels.
+ */
+ if (is_kdump_kernel() ||
+ CHELSIO_CHIP_VERSION(adapter->params.chip) < CHELSIO_T6)
+ return 0;
+
+ /* Get the HMA region size required by fw */
+ param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
+ FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_HMA_SIZE));
+ ret = t4_query_params(adapter, adapter->mbox, adapter->pf, 0,
+ 1, ¶m, &hma_size);
+ /* An error means card has its own memory or HMA is not supported by
+ * the firmware. Return without any errors.
+ */
+ if (ret || !hma_size)
+ return 0;
+
+ if (hma_size < HMA_MIN_TOTAL_SIZE ||
+ hma_size > HMA_MAX_TOTAL_SIZE) {
+ dev_err(adapter->pdev_dev,
+ "HMA size %uMB beyond bounds(%u-%lu)MB\n",
+ hma_size, HMA_MIN_TOTAL_SIZE, HMA_MAX_TOTAL_SIZE);
+ return -EINVAL;
+ }
+
+ page_size = HMA_PAGE_SIZE;
+ page_order = HMA_PAGE_ORDER;
+ adapter->hma.sgt = kzalloc(sizeof(*adapter->hma.sgt), GFP_KERNEL);
+ if (unlikely(!adapter->hma.sgt)) {
+ dev_err(adapter->pdev_dev, "HMA SG table allocation failed\n");
+ return -ENOMEM;
+ }
+ sgt = adapter->hma.sgt;
+ /* FW returned value will be in MB's
+ */
+ sgt->orig_nents = (hma_size << 20) / (page_size << page_order);
+ if (sg_alloc_table(sgt, sgt->orig_nents, GFP_KERNEL)) {
+ dev_err(adapter->pdev_dev, "HMA SGL allocation failed\n");
+ kfree(adapter->hma.sgt);
+ adapter->hma.sgt = NULL;
+ return -ENOMEM;
+ }
+
+ sgl = adapter->hma.sgt->sgl;
+ node = dev_to_node(adapter->pdev_dev);
+ for_each_sg(sgl, iter, sgt->orig_nents, i) {
+ newpage = alloc_pages_node(node, __GFP_NOWARN | GFP_KERNEL,
+ page_order);
+ if (!newpage) {
+ dev_err(adapter->pdev_dev,
+ "Not enough memory for HMA page allocation\n");
+ ret = -ENOMEM;
+ goto free_hma;
+ }
+ sg_set_page(iter, newpage, page_size << page_order, 0);
+ }
+
+ sgt->nents = dma_map_sg(adapter->pdev_dev, sgl, sgt->orig_nents,
+ DMA_BIDIRECTIONAL);
+ if (!sgt->nents) {
+ dev_err(adapter->pdev_dev,
+ "Not enough memory for HMA DMA mapping");
+ ret = -ENOMEM;
+ goto free_hma;
+ }
+ adapter->hma.flags |= HMA_DMA_MAPPED_FLAG;
+
+ adapter->hma.phy_addr = kcalloc(sgt->nents, sizeof(dma_addr_t),
+ GFP_KERNEL);
+ if (unlikely(!adapter->hma.phy_addr))
+ goto free_hma;
+
+ for_each_sg(sgl, iter, sgt->nents, i) {
+ newpage = sg_page(iter);
+ adapter->hma.phy_addr[i] = sg_dma_address(iter);
+ }
+
+ ncmds = DIV_ROUND_UP(sgt->nents, HMA_MAX_ADDR_IN_CMD);
+ /* Pass on the addresses to firmware */
+ for (i = 0, k = 0; i < ncmds; i++, k += HMA_MAX_ADDR_IN_CMD) {
+ struct fw_hma_cmd hma_cmd;
+ u8 naddr = HMA_MAX_ADDR_IN_CMD;
+ u8 soc = 0, eoc = 0;
+ u8 hma_mode = 1; /* Presently we support only Page table mode */
+
+ soc = (i == 0) ? 1 : 0;
+ eoc = (i == ncmds - 1) ? 1 : 0;
+
+ /* For last cmd, set naddr corresponding to remaining
+ * addresses
+ */
+ if (i == ncmds - 1) {
+ naddr = sgt->nents % HMA_MAX_ADDR_IN_CMD;
+ naddr = naddr ? naddr : HMA_MAX_ADDR_IN_CMD;
+ }
+ memset(&hma_cmd, 0, sizeof(hma_cmd));
+ hma_cmd.op_pkd = htonl(FW_CMD_OP_V(FW_HMA_CMD) |
+ FW_CMD_REQUEST_F | FW_CMD_WRITE_F);
+ hma_cmd.retval_len16 = htonl(FW_LEN16(hma_cmd));
+
+ hma_cmd.mode_to_pcie_params =
+ htonl(FW_HMA_CMD_MODE_V(hma_mode) |
+ FW_HMA_CMD_SOC_V(soc) | FW_HMA_CMD_EOC_V(eoc));
+
+ /* HMA cmd size specified in MB's */
+ hma_cmd.naddr_size =
+ htonl(FW_HMA_CMD_SIZE_V(hma_size) |
+ FW_HMA_CMD_NADDR_V(naddr));
+
+ /* Total Page size specified in units of 4K */
+ hma_cmd.addr_size_pkd =
+ htonl(FW_HMA_CMD_ADDR_SIZE_V
+ ((page_size << page_order) >> 12));
+
+ /* Fill the 5 addresses */
+ for (j = 0; j < naddr; j++) {
+ hma_cmd.phy_address[j] =
+ cpu_to_be64(adapter->hma.phy_addr[j + k]);
+ }
+ ret = t4_wr_mbox(adapter, adapter->mbox, &hma_cmd,
+ sizeof(hma_cmd), &hma_cmd);
+ if (ret) {
+ dev_err(adapter->pdev_dev,
+ "HMA FW command failed with err %d\n", ret);
+ goto free_hma;
+ }
+ }
+
+ if (!ret)
+ dev_info(adapter->pdev_dev,
+ "Reserved %uMB host memory for HMA\n", hma_size);
+ return ret;
+
+free_hma:
+ adap_free_hma_mem(adapter);
+ return ret;
+}
+
static int adap_init1(struct adapter *adap, struct fw_caps_config_cmd *c)
{
u32 v;
if (ret < 0)
goto bye;
+ /* We will proceed even if HMA init fails. */
+ ret = adap_config_hma(adapter);
+ if (ret)
+ dev_err(adapter->pdev_dev,
+ "HMA configuration failed with error %d\n", ret);
+
/*
* And finally tell the firmware to initialize itself using the
* parameters from the Configuration File.
* effect. Otherwise, it's time to try initializing the adapter.
*/
if (state == DEV_STATE_INIT) {
+ ret = adap_config_hma(adap);
+ if (ret)
+ dev_err(adap->pdev_dev,
+ "HMA configuration failed with error %d\n",
+ ret);
dev_info(adap->pdev_dev, "Coming up as %s: "\
"Adapter already initialized\n",
adap->flags & MASTER_PF ? "MASTER" : "SLAVE");
adap->vres.pbl.start = val[4];
adap->vres.pbl.size = val[5] - val[4] + 1;
+ params[0] = FW_PARAM_PFVF(SRQ_START);
+ params[1] = FW_PARAM_PFVF(SRQ_END);
+ ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 2,
+ params, val);
+ if (!ret) {
+ adap->vres.srq.start = val[0];
+ adap->vres.srq.size = val[1] - val[0] + 1;
+ }
+ if (adap->vres.srq.size) {
+ adap->srq = t4_init_srq(adap->vres.srq.size);
+ if (!adap->srq)
+ dev_warn(&adap->pdev->dev, "could not allocate SRQ, continuing\n");
+ }
+
params[0] = FW_PARAM_PFVF(SQRQ_START);
params[1] = FW_PARAM_PFVF(SQRQ_END);
params[2] = FW_PARAM_PFVF(CQ_START);
"max_ordird_qp %d max_ird_adapter %d\n",
adap->params.max_ordird_qp,
adap->params.max_ird_adapter);
+
+ /* Enable write_with_immediate if FW supports it */
+ params[0] = FW_PARAM_DEV(RDMA_WRITE_WITH_IMM);
+ ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 1, params,
+ val);
+ adap->params.write_w_imm_support = (ret == 0 && val[0] != 0);
+
+ /* Enable write_cmpl if FW supports it */
+ params[0] = FW_PARAM_DEV(RI_WRITE_CMPL_WR);
+ ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 1, params,
+ val);
+ adap->params.write_cmpl_support = (ret == 0 && val[0] != 0);
adap->num_ofld_uld += 2;
}
if (caps_cmd.iscsicaps) {
* happened to HW/FW, stop issuing commands.
*/
bye:
+ adap_free_hma_mem(adap);
kfree(adap->sge.egr_map);
kfree(adap->sge.ingr_map);
kfree(adap->sge.starving_fl);
kvfree(adapter->smt);
kvfree(adapter->l2t);
+ kvfree(adapter->srq);
t4_cleanup_sched(adapter);
kvfree(adapter->tids.tid_tab);
cxgb4_cleanup_tc_flower(adapter);
/* Initialize the device structure. */
dev->netdev_ops = &cxgb4_mgmt_netdev_ops;
dev->ethtool_ops = &cxgb4_mgmt_ethtool_ops;
- dev->needs_free_netdev = true;
}
static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs)
adapter->name = pci_name(pdev);
adapter->mbox = func;
adapter->pf = func;
+ adapter->params.chip = chip;
+ adapter->adap_idx = adap_idx;
adapter->msg_enable = DFLT_MSG_ENABLE;
adapter->mbox_log = kzalloc(sizeof(*adapter->mbox_log) +
(sizeof(struct mbox_cmd) *
INIT_WORK(&adapter->tid_release_task, process_tid_release_list);
INIT_WORK(&adapter->db_full_task, process_db_full);
INIT_WORK(&adapter->db_drop_task, process_db_drop);
+ INIT_WORK(&adapter->fatal_err_notify_task, notify_fatal_err);
err = t4_prep_adapter(adapter);
if (err)
t4_uld_clean_up(adapter);
}
+ adap_free_hma_mem(adapter);
+
disable_interrupts(adapter);
for_each_port(adapter, i)
err);
}
+ if (!!(net_dev->flags & IFF_ALLMULTI) != priv->mac_dev->allmulti) {
+ priv->mac_dev->allmulti = !priv->mac_dev->allmulti;
+ err = priv->mac_dev->set_allmulti(priv->mac_dev->fman_mac,
+ priv->mac_dev->allmulti);
+ if (err < 0)
+ netif_err(priv, drv, net_dev,
+ "mac_dev->set_allmulti() = %d\n",
+ err);
+ }
+
err = priv->mac_dev->set_multi(net_dev, priv->mac_dev);
if (err < 0)
netif_err(priv, drv, net_dev, "mac_dev->set_multi() = %d\n",
goto csum_failed;
}
+ /* SGT[0] is used by the linear part */
sgt = (struct qm_sg_entry *)(sgt_buf + priv->tx_headroom);
- qm_sg_entry_set_len(&sgt[0], skb_headlen(skb));
+ frag_len = skb_headlen(skb);
+ qm_sg_entry_set_len(&sgt[0], frag_len);
sgt[0].bpid = FSL_DPAA_BPID_INV;
sgt[0].offset = 0;
addr = dma_map_single(dev, skb->data,
qm_sg_entry_set64(&sgt[0], addr);
/* populate the rest of SGT entries */
- frag = &skb_shinfo(skb)->frags[0];
- frag_len = frag->size;
- for (i = 1; i <= nr_frags; i++, frag++) {
+ for (i = 0; i < nr_frags; i++) {
+ frag = &skb_shinfo(skb)->frags[i];
+ frag_len = frag->size;
WARN_ON(!skb_frag_page(frag));
addr = skb_frag_dma_map(dev, frag, 0,
frag_len, dma_dir);
goto sg_map_failed;
}
- qm_sg_entry_set_len(&sgt[i], frag_len);
- sgt[i].bpid = FSL_DPAA_BPID_INV;
- sgt[i].offset = 0;
+ qm_sg_entry_set_len(&sgt[i + 1], frag_len);
+ sgt[i + 1].bpid = FSL_DPAA_BPID_INV;
+ sgt[i + 1].offset = 0;
/* keep the offset in the address */
- qm_sg_entry_set64(&sgt[i], addr);
- frag_len = frag->size;
+ qm_sg_entry_set64(&sgt[i + 1], addr);
}
- qm_sg_entry_set_f(&sgt[i - 1], frag_len);
+
+ /* Set the final bit in the last used entry of the SGT */
+ qm_sg_entry_set_f(&sgt[nr_frags], frag_len);
qm_fd_set_sg(fd, priv->tx_headroom, skb->len);
}
if (unlikely(err < 0)) {
- percpu_stats->tx_errors++;
percpu_stats->tx_fifo_errors++;
return err;
}
/* MAX_SKB_FRAGS is equal or larger than our dpaa_SGT_MAX_ENTRIES;
* make sure we don't feed FMan with more fragments than it supports.
*/
- if (nonlinear &&
- likely(skb_shinfo(skb)->nr_frags < DPAA_SGT_MAX_ENTRIES)) {
- /* Just create a S/G fd based on the skb */
- err = skb_to_sg_fd(priv, skb, &fd);
- percpu_priv->tx_frag_skbuffs++;
- } else {
+ if (unlikely(nonlinear &&
+ (skb_shinfo(skb)->nr_frags >= DPAA_SGT_MAX_ENTRIES))) {
/* If the egress skb contains more fragments than we support
* we have no choice but to linearize it ourselves.
*/
- if (unlikely(nonlinear) && __skb_linearize(skb))
+ if (__skb_linearize(skb))
goto enomem;
- /* Finally, create a contig FD from this skb */
+ nonlinear = skb_is_nonlinear(skb);
+ }
+
+ if (nonlinear) {
+ /* Just create a S/G fd based on the skb */
+ err = skb_to_sg_fd(priv, skb, &fd);
+ percpu_priv->tx_frag_skbuffs++;
+ } else {
+ /* Create a contig FD from this skb */
err = skb_to_contig_fd(priv, skb, &fd, &offset);
}
if (unlikely(err < 0))
if (dpaa_eth_napi_schedule(percpu_priv, portal))
return qman_cb_dqrr_stop;
- if (dpaa_eth_refill_bpools(priv))
- /* Unable to refill the buffer pool due to insufficient
- * system memory. Just release the frame back into the pool,
- * otherwise we'll soon end up with an empty buffer pool.
- */
- dpaa_fd_release(net_dev, &dq->fd);
- else
- dpaa_rx_error(net_dev, priv, percpu_priv, &dq->fd, fq->fqid);
+ dpaa_eth_refill_bpools(priv);
+ dpaa_rx_error(net_dev, priv, percpu_priv, &dq->fd, fq->fqid);
return qman_cb_dqrr_consume;
}
vaddr = phys_to_virt(addr);
prefetch(vaddr + qm_fd_get_offset(fd));
- fd_format = qm_fd_get_format(fd);
/* The only FD types that we may receive are contig and S/G */
WARN_ON((fd_format != qm_fd_contig) && (fd_format != qm_fd_sg));
skb_len = skb->len;
- if (unlikely(netif_receive_skb(skb) == NET_RX_DROP))
+ if (unlikely(netif_receive_skb(skb) == NET_RX_DROP)) {
+ percpu_stats->rx_dropped++;
return qman_cb_dqrr_consume;
+ }
percpu_stats->rx_packets++;
percpu_stats->rx_bytes += skb_len;
priv->channel = (u16)channel;
- /* Start a thread that will walk the CPUs with affine portals
+ /* Walk the CPUs with affine portals
* and add this pool channel to each's dequeue mask.
*/
dpaa_eth_add_channel(priv->channel);
struct device *dev;
int err;
- dev = &pdev->dev;
+ dev = pdev->dev.parent;
net_dev = dev_get_drvdata(dev);
priv = netdev_priv(net_dev);
set_bucket(dtsec->regs, bucket, true);
/* Create element to be added to the driver hash table */
- hash_entry = kmalloc(sizeof(*hash_entry), GFP_KERNEL);
+ hash_entry = kmalloc(sizeof(*hash_entry), GFP_ATOMIC);
if (!hash_entry)
return -ENOMEM;
hash_entry->addr = addr;
return 0;
}
+int dtsec_set_allmulti(struct fman_mac *dtsec, bool enable)
+{
+ u32 tmp;
+ struct dtsec_regs __iomem *regs = dtsec->regs;
+
+ if (!is_init_done(dtsec->dtsec_drv_param))
+ return -EINVAL;
+
+ tmp = ioread32be(®s->rctrl);
+ if (enable)
+ tmp |= RCTRL_MPROM;
+ else
+ tmp &= ~RCTRL_MPROM;
+
+ iowrite32be(tmp, ®s->rctrl);
+
+ return 0;
+}
+
int dtsec_del_hash_mac_address(struct fman_mac *dtsec, enet_addr_t *eth_addr)
{
struct dtsec_regs __iomem *regs = dtsec->regs;
/*
* drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017, 2018 Mellanox Technologies. All rights reserved.
* Copyright (c) 2017 Jiri Pirko <jiri@mellanox.com>
*
* Redistribution and use in source and binary forms, with or without
}
EXPORT_SYMBOL(mlxsw_afa_block_jump);
+ int mlxsw_afa_block_terminate(struct mlxsw_afa_block *block)
+ {
+ if (block->finished)
+ return -EINVAL;
+ mlxsw_afa_set_goto_set(block->cur_set,
+ MLXSW_AFA_SET_GOTO_BINDING_CMD_TERM, 0);
+ block->finished = true;
+ return 0;
+ }
+ EXPORT_SYMBOL(mlxsw_afa_block_terminate);
+
static struct mlxsw_afa_fwd_entry *
mlxsw_afa_fwd_entry_create(struct mlxsw_afa *mlxsw_afa, u8 local_port)
{
struct mlxsw_afa_resource resource;
int span_id;
u8 local_in_port;
- u8 local_out_port;
bool ingress;
};
{
block->afa->ops->mirror_del(block->afa->ops_priv,
mirror->local_in_port,
- mirror->local_out_port,
+ mirror->span_id,
mirror->ingress);
kfree(mirror);
}
}
static struct mlxsw_afa_mirror *
-mlxsw_afa_mirror_create(struct mlxsw_afa_block *block,
- u8 local_in_port, u8 local_out_port,
- bool ingress)
+mlxsw_afa_mirror_create(struct mlxsw_afa_block *block, u8 local_in_port,
+ const struct net_device *out_dev, bool ingress)
{
struct mlxsw_afa_mirror *mirror;
int err;
return ERR_PTR(-ENOMEM);
err = block->afa->ops->mirror_add(block->afa->ops_priv,
- local_in_port, local_out_port,
+ local_in_port, out_dev,
ingress, &mirror->span_id);
if (err)
goto err_mirror_add;
mirror->ingress = ingress;
- mirror->local_out_port = local_out_port;
mirror->local_in_port = local_in_port;
mirror->resource.destructor = mlxsw_afa_mirror_destructor;
mlxsw_afa_resource_add(block, &mirror->resource);
}
int
-mlxsw_afa_block_append_mirror(struct mlxsw_afa_block *block,
- u8 local_in_port, u8 local_out_port, bool ingress)
+mlxsw_afa_block_append_mirror(struct mlxsw_afa_block *block, u8 local_in_port,
+ const struct net_device *out_dev, bool ingress)
{
struct mlxsw_afa_mirror *mirror;
int err;
- mirror = mlxsw_afa_mirror_create(block, local_in_port, local_out_port,
+ mirror = mlxsw_afa_mirror_create(block, local_in_port, out_dev,
ingress);
if (IS_ERR(mirror))
return PTR_ERR(mirror);
#define _MLXSW_CORE_ACL_FLEX_ACTIONS_H
#include <linux/types.h>
+#include <linux/netdevice.h>
struct mlxsw_afa;
struct mlxsw_afa_block;
void (*kvdl_fwd_entry_del)(void *priv, u32 kvdl_index);
int (*counter_index_get)(void *priv, unsigned int *p_counter_index);
void (*counter_index_put)(void *priv, unsigned int counter_index);
- int (*mirror_add)(void *priv, u8 locol_in_port, u8 local_out_port,
+ int (*mirror_add)(void *priv, u8 local_in_port,
+ const struct net_device *out_dev,
bool ingress, int *p_span_id);
- void (*mirror_del)(void *priv, u8 locol_in_port, u8 local_out_port,
+ void (*mirror_del)(void *priv, u8 local_in_port, int span_id,
bool ingress);
};
u32 mlxsw_afa_block_first_set_kvdl_index(struct mlxsw_afa_block *block);
int mlxsw_afa_block_continue(struct mlxsw_afa_block *block);
int mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id);
+ int mlxsw_afa_block_terminate(struct mlxsw_afa_block *block);
int mlxsw_afa_block_append_drop(struct mlxsw_afa_block *block);
int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block, u16 trap_id);
int mlxsw_afa_block_append_trap_and_forward(struct mlxsw_afa_block *block,
u16 trap_id);
int mlxsw_afa_block_append_mirror(struct mlxsw_afa_block *block,
- u8 local_in_port, u8 local_out_port,
+ u8 local_in_port,
+ const struct net_device *out_dev,
bool ingress);
int mlxsw_afa_block_append_fwd(struct mlxsw_afa_block *block,
u8 local_port, bool in_port);
#define MLXSW_SP_RESOURCE_NAME_KVD_LINEAR "linear"
#define MLXSW_SP_RESOURCE_NAME_KVD_HASH_SINGLE "hash_single"
#define MLXSW_SP_RESOURCE_NAME_KVD_HASH_DOUBLE "hash_double"
+#define MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_SINGLES "singles"
+#define MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_CHUNKS "chunks"
+#define MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_LARGE_CHUNKS "large_chunks"
enum mlxsw_sp_resource_id {
MLXSW_SP_RESOURCE_KVD,
MLXSW_SP_RESOURCE_KVD_LINEAR,
MLXSW_SP_RESOURCE_KVD_HASH_SINGLE,
MLXSW_SP_RESOURCE_KVD_HASH_DOUBLE,
+ MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE,
+ MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS,
+ MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS,
};
struct mlxsw_sp_port;
struct mlxsw_sp_rif;
+struct mlxsw_sp_span_entry;
struct mlxsw_sp_upper {
struct net_device *dev;
unsigned long *ports_in_mid; /* bits array */
};
-enum mlxsw_sp_span_type {
- MLXSW_SP_SPAN_EGRESS,
- MLXSW_SP_SPAN_INGRESS
-};
-
-struct mlxsw_sp_span_inspected_port {
- struct list_head list;
- enum mlxsw_sp_span_type type;
- u8 local_port;
-
- /* Whether this is a directly bound mirror (port-to-port) or an ACL. */
- bool bound;
-};
-
-struct mlxsw_sp_span_entry {
- u8 local_port;
- bool used;
- struct list_head bound_ports_list;
- int ref_count;
- int id;
-};
-
enum mlxsw_sp_port_mall_action_type {
MLXSW_SP_PORT_MALL_MIRROR,
MLXSW_SP_PORT_MALL_SAMPLE,
};
struct mlxsw_sp_port_mall_mirror_tc_entry {
- u8 to_local_port;
+ int span_id;
bool ingress;
};
u64 wred_drop[TC_MAX_QUEUE];
u64 tail_drop[TC_MAX_QUEUE];
u64 backlog[TC_MAX_QUEUE];
+ u64 tx_bytes[IEEE_8021QAZ_MAX_TCS];
+ u64 tx_packets[IEEE_8021QAZ_MAX_TCS];
};
struct mlxsw_sp_port {
struct mlxsw_sp_port_sample *sample;
struct list_head vlans_list;
struct mlxsw_sp_qdisc *root_qdisc;
+ struct mlxsw_sp_qdisc *tclass_qdiscs;
unsigned acl_rule_count;
struct mlxsw_sp_acl_block *ing_acl_block;
struct mlxsw_sp_acl_block *eg_acl_block;
struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev);
void mlxsw_sp_port_dev_put(struct mlxsw_sp_port *mlxsw_sp_port);
struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find_rcu(struct net_device *dev);
-int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
- struct mlxsw_sp_port *to,
- enum mlxsw_sp_span_type type,
- bool bind);
-void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from,
- u8 destination_port,
- enum mlxsw_sp_span_type type,
- bool bind);
-struct mlxsw_sp_span_entry *
-mlxsw_sp_span_entry_find(struct mlxsw_sp *mlxsw_sp, u8 local_port);
/* spectrum_dcb.c */
#ifdef CONFIG_MLXSW_SPECTRUM_DCB
unsigned int entry_count,
unsigned int *p_alloc_size);
u64 mlxsw_sp_kvdl_occ_get(const struct mlxsw_sp *mlxsw_sp);
+int mlxsw_sp_kvdl_resources_register(struct devlink *devlink);
struct mlxsw_sp_acl_rule_info {
unsigned int priority;
int mlxsw_sp_acl_rulei_act_continue(struct mlxsw_sp_acl_rule_info *rulei);
int mlxsw_sp_acl_rulei_act_jump(struct mlxsw_sp_acl_rule_info *rulei,
u16 group_id);
+ int mlxsw_sp_acl_rulei_act_terminate(struct mlxsw_sp_acl_rule_info *rulei);
int mlxsw_sp_acl_rulei_act_drop(struct mlxsw_sp_acl_rule_info *rulei);
int mlxsw_sp_acl_rulei_act_trap(struct mlxsw_sp_acl_rule_info *rulei);
int mlxsw_sp_acl_rulei_act_mirror(struct mlxsw_sp *mlxsw_sp,
return block->disable_count;
}
+static bool
+mlxsw_sp_acl_ruleset_is_singular(const struct mlxsw_sp_acl_ruleset *ruleset)
+{
+ /* We hold a reference on ruleset ourselves */
+ return ruleset->ref_count == 2;
+}
+
static int
mlxsw_sp_acl_ruleset_bind(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_acl_block *block,
if (err)
goto err_ht_insert;
- if (!chain_index) {
- /* We only need ruleset with chain index 0, the implicit one,
- * to be directly bound to device. The rest of the rulesets
- * are bound by "Goto action set".
- */
- err = mlxsw_sp_acl_ruleset_block_bind(mlxsw_sp, ruleset, block);
- if (err)
- goto err_ruleset_bind;
- }
-
return ruleset;
-err_ruleset_bind:
- rhashtable_remove_fast(&acl->ruleset_ht, &ruleset->ht_node,
- mlxsw_sp_acl_ruleset_ht_params);
err_ht_insert:
ops->ruleset_del(mlxsw_sp, ruleset->priv);
err_ops_ruleset_add:
struct mlxsw_sp_acl_ruleset *ruleset)
{
const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops;
- struct mlxsw_sp_acl_block *block = ruleset->ht_key.block;
- u32 chain_index = ruleset->ht_key.chain_index;
struct mlxsw_sp_acl *acl = mlxsw_sp->acl;
- if (!chain_index)
- mlxsw_sp_acl_ruleset_block_unbind(mlxsw_sp, ruleset, block);
rhashtable_remove_fast(&acl->ruleset_ht, &ruleset->ht_node,
mlxsw_sp_acl_ruleset_ht_params);
ops->ruleset_del(mlxsw_sp, ruleset->priv);
return mlxsw_afa_block_jump(rulei->act_block, group_id);
}
+ int mlxsw_sp_acl_rulei_act_terminate(struct mlxsw_sp_acl_rule_info *rulei)
+ {
+ return mlxsw_afa_block_terminate(rulei->act_block);
+ }
+
int mlxsw_sp_acl_rulei_act_drop(struct mlxsw_sp_acl_rule_info *rulei)
{
return mlxsw_afa_block_append_drop(rulei->act_block);
struct net_device *out_dev)
{
struct mlxsw_sp_acl_block_binding *binding;
- struct mlxsw_sp_port *out_port;
struct mlxsw_sp_port *in_port;
if (!list_is_singular(&block->binding_list))
binding = list_first_entry(&block->binding_list,
struct mlxsw_sp_acl_block_binding, list);
in_port = binding->mlxsw_sp_port;
- if (!mlxsw_sp_port_dev_check(out_dev))
- return -EINVAL;
-
- out_port = netdev_priv(out_dev);
- if (out_port->mlxsw_sp != mlxsw_sp)
- return -EINVAL;
return mlxsw_afa_block_append_mirror(rulei->act_block,
in_port->local_port,
- out_port->local_port,
+ out_dev,
binding->ingress);
}
if (err)
goto err_rhashtable_insert;
+ if (!ruleset->ht_key.chain_index &&
+ mlxsw_sp_acl_ruleset_is_singular(ruleset)) {
+ /* We only need ruleset with chain index 0, the implicit
+ * one, to be directly bound to device. The rest of the
+ * rulesets are bound by "Goto action set".
+ */
+ err = mlxsw_sp_acl_ruleset_block_bind(mlxsw_sp, ruleset,
+ ruleset->ht_key.block);
+ if (err)
+ goto err_ruleset_block_bind;
+ }
+
list_add_tail(&rule->list, &mlxsw_sp->acl->rules);
ruleset->ht_key.block->rule_count++;
return 0;
+err_ruleset_block_bind:
+ rhashtable_remove_fast(&ruleset->rule_ht, &rule->ht_node,
+ mlxsw_sp_acl_rule_ht_params);
err_rhashtable_insert:
ops->rule_del(mlxsw_sp, rule->priv);
return err;
ruleset->ht_key.block->rule_count--;
list_del(&rule->list);
+ if (!ruleset->ht_key.chain_index &&
+ mlxsw_sp_acl_ruleset_is_singular(ruleset))
+ mlxsw_sp_acl_ruleset_block_unbind(mlxsw_sp, ruleset,
+ ruleset->ht_key.block);
rhashtable_remove_fast(&ruleset->rule_ht, &rule->ht_node,
mlxsw_sp_acl_rule_ht_params);
ops->rule_del(mlxsw_sp, rule->priv);
--- /dev/null
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/mlxsw_span.h
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _MLXSW_SPECTRUM_SPAN_H
+#define _MLXSW_SPECTRUM_SPAN_H
+
+#include <linux/types.h>
+#include <linux/if_ether.h>
+
+#include "spectrum_router.h"
+
+struct mlxsw_sp;
+struct mlxsw_sp_port;
+
+enum mlxsw_sp_span_type {
+ MLXSW_SP_SPAN_EGRESS,
+ MLXSW_SP_SPAN_INGRESS
+};
+
+struct mlxsw_sp_span_inspected_port {
+ struct list_head list;
+ enum mlxsw_sp_span_type type;
+ u8 local_port;
++
++ /* Whether this is a directly bound mirror (port-to-port) or an ACL. */
++ bool bound;
+};
+
+struct mlxsw_sp_span_parms {
+ struct mlxsw_sp_port *dest_port; /* NULL for unoffloaded SPAN. */
+ unsigned int ttl;
+ unsigned char dmac[ETH_ALEN];
+ unsigned char smac[ETH_ALEN];
+ union mlxsw_sp_l3addr daddr;
+ union mlxsw_sp_l3addr saddr;
+};
+
+struct mlxsw_sp_span_entry_ops;
+
+struct mlxsw_sp_span_entry {
+ const struct net_device *to_dev;
+ const struct mlxsw_sp_span_entry_ops *ops;
+ struct mlxsw_sp_span_parms parms;
+ struct list_head bound_ports_list;
+ int ref_count;
+ int id;
+};
+
+struct mlxsw_sp_span_entry_ops {
+ bool (*can_handle)(const struct net_device *to_dev);
+ int (*parms)(const struct net_device *to_dev,
+ struct mlxsw_sp_span_parms *sparmsp);
+ int (*configure)(struct mlxsw_sp_span_entry *span_entry,
+ struct mlxsw_sp_span_parms sparms);
+ void (*deconfigure)(struct mlxsw_sp_span_entry *span_entry);
+};
+
+int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp);
+void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp);
+void mlxsw_sp_span_respin(struct mlxsw_sp *mlxsw_sp);
+
+int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
+ const struct net_device *to_dev,
+ enum mlxsw_sp_span_type type,
+ bool bind, int *p_span_id);
+void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, int span_id,
+ enum mlxsw_sp_span_type type, bool bind);
+struct mlxsw_sp_span_entry *
+mlxsw_sp_span_entry_find_by_port(struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *to_dev);
+
+void mlxsw_sp_span_entry_invalidate(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_span_entry *span_entry);
+
+int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu);
+
+#endif
iph = (struct iphdr *)((u8 *)(ethh) + eth_hlen);
if (eth_type == ETH_P_IP) {
+ if (iph->protocol != IPPROTO_TCP) {
+ DP_NOTICE(p_hwfn,
+ "Unexpected ip protocol on ll2 %x\n",
+ iph->protocol);
+ return -EINVAL;
+ }
+
cm_info->local_ip[0] = ntohl(iph->daddr);
cm_info->remote_ip[0] = ntohl(iph->saddr);
cm_info->ip_version = TCP_IPV4;
*payload_len = ntohs(iph->tot_len) - ip_hlen;
} else if (eth_type == ETH_P_IPV6) {
ip6h = (struct ipv6hdr *)iph;
+
+ if (ip6h->nexthdr != IPPROTO_TCP) {
+ DP_NOTICE(p_hwfn,
+ "Unexpected ip protocol on ll2 %x\n",
+ iph->protocol);
+ return -EINVAL;
+ }
+
for (i = 0; i < 4; i++) {
cm_info->local_ip[i] =
ntohl(ip6h->daddr.in6_u.u6_addr32[i]);
/* fpdu can be fragmented over maximum 3 bds: header, partial mpa, unaligned */
#define QED_IWARP_MAX_BDS_PER_FPDU 3
-char *pkt_type_str[] = {
+static const char * const pkt_type_str[] = {
"QED_IWARP_MPA_PKT_PACKED",
"QED_IWARP_MPA_PKT_PARTIAL",
"QED_IWARP_MPA_PKT_UNALIGNED"
/* Missing lower byte is now available */
mpa_len = fpdu->fpdu_length | *mpa_data;
fpdu->fpdu_length = QED_IWARP_FPDU_LEN_WITH_PAD(mpa_len);
- fpdu->mpa_frag_len = fpdu->fpdu_length;
/* one byte of hdr */
+ fpdu->mpa_frag_len = 1;
fpdu->incomplete_bytes = fpdu->fpdu_length - 1;
DP_VERBOSE(p_hwfn,
QED_MSG_RDMA,
#define CPDMA_RXCP 0x60
#define CPSW_POLL_WEIGHT 64
+#define CPSW_RX_VLAN_ENCAP_HDR_SIZE 4
#define CPSW_MIN_PACKET_SIZE (VLAN_ETH_ZLEN)
-#define CPSW_MAX_PACKET_SIZE (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN)
+#define CPSW_MAX_PACKET_SIZE (VLAN_ETH_FRAME_LEN +\
+ ETH_FCS_LEN +\
+ CPSW_RX_VLAN_ENCAP_HDR_SIZE)
#define RX_PRIORITY_MAPPING 0x76543210
#define TX_PRIORITY_MAPPING 0x33221100
#define CPDMA_TX_PRIORITY_MAP 0x01234567
#define CPSW_VLAN_AWARE BIT(1)
+#define CPSW_RX_VLAN_ENCAP BIT(2)
#define CPSW_ALE_VLAN_AWARE 1
#define CPSW_FIFO_NORMAL_MODE (0 << 16)
#define CPSW_MAX_QUEUES 8
#define CPSW_CPDMA_DESCS_POOL_SIZE_DEFAULT 256
+#define CPSW_RX_VLAN_ENCAP_HDR_PRIO_SHIFT 29
+#define CPSW_RX_VLAN_ENCAP_HDR_PRIO_MSK GENMASK(2, 0)
+#define CPSW_RX_VLAN_ENCAP_HDR_VID_SHIFT 16
+#define CPSW_RX_VLAN_ENCAP_HDR_PKT_TYPE_SHIFT 8
+#define CPSW_RX_VLAN_ENCAP_HDR_PKT_TYPE_MSK GENMASK(1, 0)
+enum {
+ CPSW_RX_VLAN_ENCAP_HDR_PKT_VLAN_TAG = 0,
+ CPSW_RX_VLAN_ENCAP_HDR_PKT_RESERV,
+ CPSW_RX_VLAN_ENCAP_HDR_PKT_PRIO_TAG,
+ CPSW_RX_VLAN_ENCAP_HDR_PKT_UNTAG,
+};
+
static int debug_level;
module_param(debug_level, int, 0);
MODULE_PARM_DESC(debug_level, "cpsw debug level (NETIF_MSG bits)");
dev_kfree_skb_any(skb);
}
+static void cpsw_rx_vlan_encap(struct sk_buff *skb)
+{
+ struct cpsw_priv *priv = netdev_priv(skb->dev);
+ struct cpsw_common *cpsw = priv->cpsw;
+ u32 rx_vlan_encap_hdr = *((u32 *)skb->data);
+ u16 vtag, vid, prio, pkt_type;
+
+ /* Remove VLAN header encapsulation word */
+ skb_pull(skb, CPSW_RX_VLAN_ENCAP_HDR_SIZE);
+
+ pkt_type = (rx_vlan_encap_hdr >>
+ CPSW_RX_VLAN_ENCAP_HDR_PKT_TYPE_SHIFT) &
+ CPSW_RX_VLAN_ENCAP_HDR_PKT_TYPE_MSK;
+ /* Ignore unknown & Priority-tagged packets*/
+ if (pkt_type == CPSW_RX_VLAN_ENCAP_HDR_PKT_RESERV ||
+ pkt_type == CPSW_RX_VLAN_ENCAP_HDR_PKT_PRIO_TAG)
+ return;
+
+ vid = (rx_vlan_encap_hdr >>
+ CPSW_RX_VLAN_ENCAP_HDR_VID_SHIFT) &
+ VLAN_VID_MASK;
+ /* Ignore vid 0 and pass packet as is */
+ if (!vid)
+ return;
+ /* Ignore default vlans in dual mac mode */
+ if (cpsw->data.dual_emac &&
+ vid == cpsw->slaves[priv->emac_port].port_vlan)
+ return;
+
+ prio = (rx_vlan_encap_hdr >>
+ CPSW_RX_VLAN_ENCAP_HDR_PRIO_SHIFT) &
+ CPSW_RX_VLAN_ENCAP_HDR_PRIO_MSK;
+
+ vtag = (prio << VLAN_PRIO_SHIFT) | vid;
+ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vtag);
+
+ /* strip vlan tag for VLAN-tagged packet */
+ if (pkt_type == CPSW_RX_VLAN_ENCAP_HDR_PKT_VLAN_TAG) {
+ memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN);
+ skb_pull(skb, VLAN_HLEN);
+ }
+}
+
static void cpsw_rx_handler(void *token, int len, int status)
{
struct cpdma_chan *ch;
if (new_skb) {
skb_copy_queue_mapping(new_skb, skb);
skb_put(skb, len);
+ if (status & CPDMA_RX_VLAN_ENCAP)
+ cpsw_rx_vlan_encap(skb);
cpts_rx_timestamp(cpsw->cpts, skb);
skb->protocol = eth_type_trans(skb, ndev);
netif_receive_skb(skb);
/* set speed_in input in case RMII mode is used in 100Mbps */
if (phy->speed == 100)
mac_control |= BIT(15);
- else if (phy->speed == 10)
+ /* in band mode only works in 10Mbps RGMII mode */
+ else if ((phy->speed == 10) && phy_interface_is_rgmii(phy))
mac_control |= BIT(18); /* In Band mode */
if (priv->rx_pause)
cpsw_ale_control_set(cpsw->ale, HOST_PORT_NUM, ALE_VLAN_AWARE,
CPSW_ALE_VLAN_AWARE);
control_reg = readl(&cpsw->regs->control);
- control_reg |= CPSW_VLAN_AWARE;
+ control_reg |= CPSW_VLAN_AWARE | CPSW_RX_VLAN_ENCAP;
writel(control_reg, &cpsw->regs->control);
fifo_mode = (cpsw->data.dual_emac) ? CPSW_FIFO_DUAL_MAC_MODE :
CPSW_FIFO_NORMAL_MODE;
cpsw->quirk_irq = true;
}
- ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+ ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_CTAG_RX;
ndev->netdev_ops = &cpsw_netdev_ops;
ndev->ethtool_ops = &cpsw_ethtool_ops;
#include <asm/sync_bitops.h>
#include "hyperv_net.h"
+#include "netvsc_trace.h"
/*
* Switch the data path from the synthetic interface to the VF
init_pkt->msg.v4_msg.active_dp.active_datapath =
NVSP_DATAPATH_SYNTHETIC;
+ trace_nvsp_send(ndev, init_pkt);
+
vmbus_sendpacket(dev->channel, init_pkt,
sizeof(struct nvsp_message),
(unsigned long)init_pkt,
= container_of(head, struct netvsc_device, rcu);
int i;
+ kfree(nvdev->extension);
+ vfree(nvdev->recv_buf);
+ vfree(nvdev->send_buf);
+ kfree(nvdev->send_section_map);
+
for (i = 0; i < VRSS_CHANNEL_MAX; i++)
vfree(nvdev->chan_table[i].mrc.slots);
revoke_packet->msg.v1_msg.
revoke_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID;
+ trace_nvsp_send(ndev, revoke_packet);
+
ret = vmbus_sendpacket(device->channel,
revoke_packet,
sizeof(struct nvsp_message),
revoke_packet->msg.v1_msg.revoke_send_buf.id =
NETVSC_SEND_BUFFER_ID;
+ trace_nvsp_send(ndev, revoke_packet);
+
ret = vmbus_sendpacket(device->channel,
revoke_packet,
sizeof(struct nvsp_message),
net_device->recv_buf_gpadl_handle = 0;
}
- if (net_device->recv_buf) {
- /* Free up the receive buffer */
- vfree(net_device->recv_buf);
- net_device->recv_buf = NULL;
- }
-
if (net_device->send_buf_gpadl_handle) {
ret = vmbus_teardown_gpadl(device->channel,
net_device->send_buf_gpadl_handle);
}
net_device->send_buf_gpadl_handle = 0;
}
- if (net_device->send_buf) {
- /* Free up the send buffer */
- vfree(net_device->send_buf);
- net_device->send_buf = NULL;
- }
- kfree(net_device->send_section_map);
}
int netvsc_alloc_recv_comp_ring(struct netvsc_device *net_device, u32 q_idx)
init_packet->msg.v1_msg.
send_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID;
+ trace_nvsp_send(ndev, init_packet);
+
/* Send the gpadl notification request */
ret = vmbus_sendpacket(device->channel, init_packet,
sizeof(struct nvsp_message),
net_device->send_buf_gpadl_handle;
init_packet->msg.v1_msg.send_send_buf.id = NETVSC_SEND_BUFFER_ID;
+ trace_nvsp_send(ndev, init_packet);
+
/* Send the gpadl notification request */
ret = vmbus_sendpacket(device->channel, init_packet,
sizeof(struct nvsp_message),
init_packet->msg.init_msg.init.min_protocol_ver = nvsp_ver;
init_packet->msg.init_msg.init.max_protocol_ver = nvsp_ver;
+ trace_nvsp_send(ndev, init_packet);
+
/* Send the init request */
ret = vmbus_sendpacket(device->channel, init_packet,
sizeof(struct nvsp_message),
init_packet->msg.v2_msg.send_ndis_config.capability.teaming = 1;
}
+ trace_nvsp_send(ndev, init_packet);
+
ret = vmbus_sendpacket(device->channel, init_packet,
sizeof(struct nvsp_message),
(unsigned long)init_packet,
struct netvsc_device *net_device,
const struct netvsc_device_info *device_info)
{
+ struct net_device *ndev = hv_get_drvdata(device);
static const u32 ver_list[] = {
NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2,
NVSP_PROTOCOL_VERSION_4, NVSP_PROTOCOL_VERSION_5
send_ndis_ver.ndis_minor_ver =
ndis_version & 0xFFFF;
+ trace_nvsp_send(ndev, init_packet);
+
/* Send the init request */
ret = vmbus_sendpacket(device->channel, init_packet,
sizeof(struct nvsp_message),
= rtnl_dereference(net_device_ctx->nvdev);
int i;
- cancel_work_sync(&net_device->subchan_work);
-
netvsc_revoke_buf(device, net_device);
RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
+ /* And disassociate NAPI context from device */
+ for (i = 0; i < net_device->num_chn; i++)
+ netif_napi_del(&net_device->chan_table[i].napi);
+
/*
* At this point, no one should be accessing net_device
* except in here
*/
netdev_dbg(ndev, "net device safe to remove\n");
+ /* older versions require that buffer be revoked before close */
+ if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_4)
+ netvsc_teardown_gpadl(device, net_device);
+
/* Now, we can close the channel safely */
vmbus_close(device->channel);
- netvsc_teardown_gpadl(device, net_device);
-
- /* And dissassociate NAPI context from device */
- for (i = 0; i < net_device->num_chn; i++)
- netif_napi_del(&net_device->chan_table[i].napi);
+ if (net_device->nvsp_version >= NVSP_PROTOCOL_VERSION_4)
+ netvsc_teardown_gpadl(device, net_device);
/* Release all resources */
free_netvsc_device_rcu(net_device);
queue_sends =
atomic_dec_return(&net_device->chan_table[q_idx].queue_sends);
- if (net_device->destroy && queue_sends == 0)
- wake_up(&net_device->wait_drain);
-
- if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) &&
- (hv_ringbuf_avail_percent(&channel->outbound) > RING_AVAIL_PERCENT_HIWATER ||
- queue_sends < 1)) {
- netif_tx_wake_queue(netdev_get_tx_queue(ndev, q_idx));
- ndev_ctx->eth_stats.wake_queue++;
+ if (unlikely(net_device->destroy)) {
+ if (queue_sends == 0)
+ wake_up(&net_device->wait_drain);
+ } else {
+ struct netdev_queue *txq = netdev_get_tx_queue(ndev, q_idx);
+
+ if (netif_tx_queue_stopped(txq) &&
+ (hv_ringbuf_avail_percent(&channel->outbound) > RING_AVAIL_PERCENT_HIWATER ||
+ queue_sends < 1)) {
+ netif_tx_wake_queue(txq);
+ ndev_ctx->eth_stats.wake_queue++;
+ }
}
}
struct sk_buff *skb)
{
struct nvsp_message nvmsg;
- struct nvsp_1_message_send_rndis_packet * const rpkt =
+ struct nvsp_1_message_send_rndis_packet *rpkt =
&nvmsg.msg.v1_msg.send_rndis_pkt;
struct netvsc_channel * const nvchan =
&net_device->chan_table[packet->q_idx];
if (out_channel->rescind)
return -ENODEV;
+ trace_nvsp_send_pkt(ndev, out_channel, rpkt);
+
if (packet->page_buf_cnt) {
if (packet->cp_partial)
pb += packet->rmsg_pgcnt;
+ vmxferpage_packet->ranges[i].byte_offset;
u32 buflen = vmxferpage_packet->ranges[i].byte_count;
+ trace_rndis_recv(ndev, q_idx, data);
+
/* Pass it to the upper layer */
status = rndis_filter_receive(ndev, net_device,
channel, data, buflen);
struct net_device_context *net_device_ctx = netdev_priv(ndev);
struct nvsp_message *nvmsg = hv_pkt_data(desc);
+ trace_nvsp_recv(ndev, channel, nvmsg);
+
switch (desc->type) {
case VM_PKT_COMP:
netvsc_send_completion(net_device, channel, device,
#include <linux/rtnetlink.h>
#include "hyperv_net.h"
+#include "netvsc_trace.h"
static void rndis_set_multicast(struct work_struct *w);
pb[0].len;
}
+ trace_rndis_send(dev->ndev, 0, &req->request_msg);
+
rcu_read_lock_bh();
ret = netvsc_send(dev->ndev, packet, NULL, pb, NULL);
rcu_read_unlock_bh();
}
}
- static void rndis_filter_receive_response(struct rndis_device *dev,
- struct rndis_message *resp)
+ static void rndis_filter_receive_response(struct net_device *ndev,
+ struct netvsc_device *nvdev,
+ const struct rndis_message *resp)
{
+ struct rndis_device *dev = nvdev->extension;
struct rndis_request *request = NULL;
bool found = false;
unsigned long flags;
- struct net_device *ndev = dev->ndev;
+
+ /* This should never happen, it means control message
+ * response received after device removed.
+ */
+ if (dev->state == RNDIS_DEV_UNINITIALIZED) {
+ netdev_err(ndev,
+ "got rndis message uninitialized\n");
+ return;
+ }
spin_lock_irqsave(&dev->request_lock, flags);
list_for_each_entry(request, &dev->req_list, list_ent) {
static int rndis_filter_receive_data(struct net_device *ndev,
struct netvsc_device *nvdev,
- struct rndis_device *dev,
struct rndis_message *msg,
struct vmbus_channel *channel,
void *data, u32 data_buflen)
* should be the data packet size plus the trailer padding size
*/
if (unlikely(data_buflen < rndis_pkt->data_len)) {
- netdev_err(dev->ndev, "rndis message buffer "
+ netdev_err(ndev, "rndis message buffer "
"overflow detected (got %u, min %u)"
"...dropping this message!\n",
data_buflen, rndis_pkt->data_len);
void *data, u32 buflen)
{
struct net_device_context *net_device_ctx = netdev_priv(ndev);
- struct rndis_device *rndis_dev = net_dev->extension;
struct rndis_message *rndis_msg = data;
- /* Make sure the rndis device state is initialized */
- if (unlikely(!rndis_dev)) {
- netif_dbg(net_device_ctx, rx_err, ndev,
- "got rndis message but no rndis device!\n");
- return NVSP_STAT_FAIL;
- }
-
- if (unlikely(rndis_dev->state == RNDIS_DEV_UNINITIALIZED)) {
- netif_dbg(net_device_ctx, rx_err, ndev,
- "got rndis message uninitialized\n");
- return NVSP_STAT_FAIL;
- }
-
if (netif_msg_rx_status(net_device_ctx))
dump_rndis_message(ndev, rndis_msg);
switch (rndis_msg->ndis_msg_type) {
case RNDIS_MSG_PACKET:
- return rndis_filter_receive_data(ndev, net_dev,
- rndis_dev, rndis_msg,
+ return rndis_filter_receive_data(ndev, net_dev, rndis_msg,
channel, data, buflen);
case RNDIS_MSG_INIT_C:
case RNDIS_MSG_QUERY_C:
case RNDIS_MSG_SET_C:
/* completion msgs */
- rndis_filter_receive_response(rndis_dev, rndis_msg);
+ rndis_filter_receive_response(ndev, net_dev, rndis_msg);
break;
case RNDIS_MSG_INDICATE:
struct rndis_set_request *set;
int ret;
+ if (dev->filter == new_filter)
+ return 0;
+
request = get_rndis_request(dev, RNDIS_MSG_SET,
RNDIS_MESSAGE_SIZE(struct rndis_set_request) +
sizeof(u32));
if (!request)
return -ENOMEM;
-
/* Setup the rndis set */
set = &request->request_msg.msg.set_req;
set->oid = RNDIS_OID_GEN_CURRENT_PACKET_FILTER;
&new_filter, sizeof(u32));
ret = rndis_filter_send_request(dev, request);
- if (ret == 0)
+ if (ret == 0) {
wait_for_completion(&request->wait_event);
+ dev->filter = new_filter;
+ }
put_rndis_request(dev, request);
filter = NDIS_PACKET_TYPE_PROMISCUOUS;
} else {
if (flags & IFF_ALLMULTI)
- flags |= NDIS_PACKET_TYPE_ALL_MULTICAST;
+ filter |= NDIS_PACKET_TYPE_ALL_MULTICAST;
if (flags & IFF_BROADCAST)
- flags |= NDIS_PACKET_TYPE_BROADCAST;
+ filter |= NDIS_PACKET_TYPE_BROADCAST;
}
rndis_filter_set_packet_filter(rdev, filter);
return true;
}
-static void rndis_filter_halt_device(struct rndis_device *dev)
+static void rndis_filter_halt_device(struct netvsc_device *nvdev,
+ struct rndis_device *dev)
{
struct rndis_request *request;
struct rndis_halt_request *halt;
- struct net_device_context *net_device_ctx = netdev_priv(dev->ndev);
- struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
/* Attempt to do a rndis device halt */
request = get_rndis_request(dev, RNDIS_MSG_HALT,
init_packet->msg.v5_msg.subchn_req.op = NVSP_SUBCHANNEL_ALLOCATE;
init_packet->msg.v5_msg.subchn_req.num_subchannels =
nvdev->num_chn - 1;
+ trace_nvsp_send(ndev, init_packet);
+
ret = vmbus_sendpacket(hv_dev->channel, init_packet,
sizeof(struct nvsp_message),
(unsigned long)init_packet,
for (i = 0; i < VRSS_SEND_TAB_SIZE; i++)
ndev_ctx->tx_table[i] = i % nvdev->num_chn;
+ netif_device_attach(ndev);
rtnl_unlock();
return;
nvdev->max_chn = 1;
nvdev->num_chn = 1;
+
+ netif_device_attach(ndev);
unlock:
rtnl_unlock();
}
net_device->num_chn = 1;
}
+ /* No sub channels, device is ready */
+ if (net_device->num_chn == 1)
+ netif_device_attach(net);
+
return net_device;
err_dev_remv:
{
struct rndis_device *rndis_dev = net_dev->extension;
- /* Don't try and setup sub channels if about to halt */
- cancel_work_sync(&net_dev->subchan_work);
-
/* Halt and release the rndis device */
- rndis_filter_halt_device(rndis_dev);
+ rndis_filter_halt_device(net_dev, rndis_dev);
net_dev->extension = NULL;
netvsc_device_remove(dev);
- kfree(rndis_dev);
}
int rndis_filter_open(struct netvsc_device *nvdev)
return rndis_filter_close_device(nvdev->extension);
}
-
- bool rndis_filter_opened(const struct netvsc_device *nvdev)
- {
- const struct rndis_device *dev = nvdev->extension;
-
- return dev->state == RNDIS_DEV_DATAINITIALIZED;
- }
return err;
/* There appears to be a bug in the 88e1512 when used in
- * SGMII to copper mode, where the AN advertisment register
+ * SGMII to copper mode, where the AN advertisement register
* clears the pause bits each time a negotiation occurs.
* This means we can never be truely sure what was advertised,
* so disable Pause support.
int i;
for (i = 0; i < ARRAY_SIZE(marvell_hw_stats); i++) {
- memcpy(data + i * ETH_GSTRING_LEN,
- marvell_hw_stats[i].string, ETH_GSTRING_LEN);
+ strlcpy(data + i * ETH_GSTRING_LEN,
+ marvell_hw_stats[i].string, ETH_GSTRING_LEN);
}
}
phy_trigger_machine(phydev, false);
}
- goto phy_err;
+ /**
+ * phy_disable_interrupts - Disable the PHY interrupts from the PHY side
+ * @phydev: target phy_device struct
+ */
+ static int phy_disable_interrupts(struct phy_device *phydev)
+ {
+ int err;
+
+ /* Disable PHY interrupts */
+ err = phy_config_interrupt(phydev, PHY_INTERRUPT_DISABLED);
+ if (err)
- err = phy_clear_interrupt(phydev);
- if (err)
- goto phy_err;
-
- return 0;
-
-phy_err:
- phy_error(phydev);
-
- return err;
++ return err;
+
+ /* Clear the interrupt */
++ return phy_clear_interrupt(phydev);
+ }
+
+ /**
+ * phy_change - Called by the phy_interrupt to handle PHY changes
+ * @phydev: phy_device struct that interrupted
+ */
+ static irqreturn_t phy_change(struct phy_device *phydev)
+ {
+ if (phy_interrupt_is_valid(phydev)) {
+ if (phydev->drv->did_interrupt &&
+ !phydev->drv->did_interrupt(phydev))
+ return IRQ_NONE;
+
+ if (phydev->state == PHY_HALTED)
+ if (phy_disable_interrupts(phydev))
+ goto phy_err;
+ }
+
+ mutex_lock(&phydev->lock);
+ if ((PHY_RUNNING == phydev->state) || (PHY_NOLINK == phydev->state))
+ phydev->state = PHY_CHANGELINK;
+ mutex_unlock(&phydev->lock);
+
+ /* reschedule state queue work to run as soon as possible */
+ phy_trigger_machine(phydev, true);
+
+ if (phy_interrupt_is_valid(phydev) && phy_clear_interrupt(phydev))
+ goto phy_err;
+ return IRQ_HANDLED;
+
+ phy_err:
+ phy_error(phydev);
+ return IRQ_NONE;
+ }
+
+ /**
+ * phy_change_work - Scheduled by the phy_mac_interrupt to handle PHY changes
+ * @work: work_struct that describes the work to be done
+ */
+ void phy_change_work(struct work_struct *work)
+ {
+ struct phy_device *phydev =
+ container_of(work, struct phy_device, phy_queue);
+
+ phy_change(phydev);
+ }
+
/**
* phy_interrupt - PHY interrupt handler
* @irq: interrupt line
if (PHY_HALTED == phydev->state)
return IRQ_NONE; /* It can't be ours. */
- phy_change(phydev);
-
- return IRQ_HANDLED;
+ return phy_change(phydev);
}
/**
return phy_config_interrupt(phydev, PHY_INTERRUPT_ENABLED);
}
- /**
- * phy_disable_interrupts - Disable the PHY interrupts from the PHY side
- * @phydev: target phy_device struct
- */
- static int phy_disable_interrupts(struct phy_device *phydev)
- {
- int err;
-
- /* Disable PHY interrupts */
- err = phy_config_interrupt(phydev, PHY_INTERRUPT_DISABLED);
- if (err)
- return err;
-
- /* Clear the interrupt */
- return phy_clear_interrupt(phydev);
- }
-
/**
* phy_start_interrupts - request and enable interrupts for a PHY device
* @phydev: target phy_device struct
}
EXPORT_SYMBOL(phy_stop_interrupts);
- /**
- * phy_change - Called by the phy_interrupt to handle PHY changes
- * @phydev: phy_device struct that interrupted
- */
- void phy_change(struct phy_device *phydev)
- {
- if (phy_interrupt_is_valid(phydev)) {
- if (phydev->drv->did_interrupt &&
- !phydev->drv->did_interrupt(phydev))
- return;
-
- if (phydev->state == PHY_HALTED)
- if (phy_disable_interrupts(phydev))
- goto phy_err;
- }
-
- mutex_lock(&phydev->lock);
- if ((PHY_RUNNING == phydev->state) || (PHY_NOLINK == phydev->state))
- phydev->state = PHY_CHANGELINK;
- mutex_unlock(&phydev->lock);
-
- /* reschedule state queue work to run as soon as possible */
- phy_trigger_machine(phydev, true);
-
- if (phy_interrupt_is_valid(phydev) && phy_clear_interrupt(phydev))
- goto phy_err;
- return;
-
- phy_err:
- phy_error(phydev);
- }
-
- /**
- * phy_change_work - Scheduled by the phy_mac_interrupt to handle PHY changes
- * @work: work_struct that describes the work to be done
- */
- void phy_change_work(struct work_struct *work)
- {
- struct phy_device *phydev =
- container_of(work, struct phy_device, phy_queue);
-
- phy_change(phydev);
- }
-
/**
* phy_stop - Bring down the PHY link, and stop checking the status
* @phydev: target phy_device struct
if (PHY_HALTED == phydev->state)
goto out_unlock;
- if (phy_interrupt_is_valid(phydev)) {
- /* Disable PHY Interrupts */
- phy_config_interrupt(phydev, PHY_INTERRUPT_DISABLED);
-
- /* Clear any pending interrupts */
- phy_clear_interrupt(phydev);
- }
+ if (phy_interrupt_is_valid(phydev))
+ phy_disable_interrupts(phydev);
phydev->state = PHY_HALTED;
dev->duplex = -1;
dev->pause = 0;
dev->asym_pause = 0;
- dev->link = 1;
+ dev->link = 0;
dev->interface = PHY_INTERFACE_MODE_GMII;
dev->autoneg = AUTONEG_ENABLE;
err = sysfs_create_link(&phydev->mdio.dev.kobj, &dev->dev.kobj,
"attached_dev");
if (!err) {
- err = sysfs_create_link(&dev->dev.kobj, &phydev->mdio.dev.kobj,
- "phydev");
- if (err)
- goto error;
+ err = sysfs_create_link_nowarn(&dev->dev.kobj,
+ &phydev->mdio.dev.kobj,
+ "phydev");
+ if (err) {
+ dev_err(&dev->dev, "could not add device link to %s err %d\n",
+ kobject_name(&phydev->mdio.dev.kobj),
+ err);
+ /* non-fatal - some net drivers can use one netdevice
+ * with more then one phy
+ */
+ }
phydev->sysfs_links = true;
}
}
EXPORT_SYMBOL(genphy_config_init);
+ /* This is used for the phy device which doesn't support the MMD extended
+ * register access, but it does have side effect when we are trying to access
+ * the MMD register via indirect method.
+ */
+ int genphy_read_mmd_unsupported(struct phy_device *phdev, int devad, u16 regnum)
+ {
+ return -EOPNOTSUPP;
+ }
+ EXPORT_SYMBOL(genphy_read_mmd_unsupported);
+
+ int genphy_write_mmd_unsupported(struct phy_device *phdev, int devnum,
+ u16 regnum, u16 val)
+ {
+ return -EOPNOTSUPP;
+ }
+ EXPORT_SYMBOL(genphy_write_mmd_unsupported);
+
int genphy_suspend(struct phy_device *phydev)
{
return phy_set_bits(phydev, MII_BMCR, BMCR_PDOWN);
/* Prototypes. */
static int ppp_unattached_ioctl(struct net *net, struct ppp_file *pf,
struct file *file, unsigned int cmd, unsigned long arg);
- static void ppp_xmit_process(struct ppp *ppp);
+ static void ppp_xmit_process(struct ppp *ppp, struct sk_buff *skb);
static void ppp_send_frame(struct ppp *ppp, struct sk_buff *skb);
static void ppp_push(struct ppp *ppp);
static void ppp_channel_push(struct channel *pch);
goto out;
}
- skb_queue_tail(&pf->xq, skb);
-
switch (pf->kind) {
case INTERFACE:
- ppp_xmit_process(PF_TO_PPP(pf));
+ ppp_xmit_process(PF_TO_PPP(pf), skb);
break;
case CHANNEL:
+ skb_queue_tail(&pf->xq, skb);
ppp_channel_push(PF_TO_CHANNEL(pf));
break;
}
.exit = ppp_exit_net,
.id = &ppp_net_id,
.size = sizeof(struct ppp_net),
+ .async = true,
};
static int ppp_unit_register(struct ppp *ppp, int unit, bool ifname_is_set)
put_unaligned_be16(proto, pp);
skb_scrub_packet(skb, !net_eq(ppp->ppp_net, dev_net(dev)));
- skb_queue_tail(&ppp->file.xq, skb);
- ppp_xmit_process(ppp);
+ ppp_xmit_process(ppp, skb);
+
return NETDEV_TX_OK;
outf:
*/
/* Called to do any work queued up on the transmit side that can now be done */
- static void __ppp_xmit_process(struct ppp *ppp)
+ static void __ppp_xmit_process(struct ppp *ppp, struct sk_buff *skb)
{
- struct sk_buff *skb;
-
ppp_xmit_lock(ppp);
if (!ppp->closing) {
ppp_push(ppp);
+
+ if (skb)
+ skb_queue_tail(&ppp->file.xq, skb);
while (!ppp->xmit_pending &&
(skb = skb_dequeue(&ppp->file.xq)))
ppp_send_frame(ppp, skb);
ppp_xmit_unlock(ppp);
}
- static void ppp_xmit_process(struct ppp *ppp)
+ static void ppp_xmit_process(struct ppp *ppp, struct sk_buff *skb)
{
local_bh_disable();
goto err;
(*this_cpu_ptr(ppp->xmit_recursion))++;
- __ppp_xmit_process(ppp);
+ __ppp_xmit_process(ppp, skb);
(*this_cpu_ptr(ppp->xmit_recursion))--;
local_bh_enable();
err:
local_bh_enable();
+ kfree_skb(skb);
+
if (net_ratelimit())
netdev_err(ppp->dev, "recursion detected\n");
}
if (skb_queue_empty(&pch->file.xq)) {
ppp = pch->ppp;
if (ppp)
- __ppp_xmit_process(ppp);
+ __ppp_xmit_process(ppp, NULL);
}
}
}
#endif
-static int team_upper_dev_link(struct team *team, struct team_port *port)
+static int team_upper_dev_link(struct team *team, struct team_port *port,
+ struct netlink_ext_ack *extack)
{
struct netdev_lag_upper_info lag_upper_info;
int err;
lag_upper_info.tx_type = team->mode->lag_tx_type;
err = netdev_master_upper_dev_link(port->dev, team->dev, NULL,
- &lag_upper_info, NULL);
+ &lag_upper_info, extack);
if (err)
return err;
port->dev->priv_flags |= IFF_TEAM_PORT;
static int team_dev_type_check_change(struct net_device *dev,
struct net_device *port_dev);
-static int team_port_add(struct team *team, struct net_device *port_dev)
+static int team_port_add(struct team *team, struct net_device *port_dev,
+ struct netlink_ext_ack *extack)
{
struct net_device *dev = team->dev;
struct team_port *port;
int err;
if (port_dev->flags & IFF_LOOPBACK) {
+ NL_SET_ERR_MSG(extack, "Loopback device can't be added as a team port");
netdev_err(dev, "Device %s is loopback device. Loopback devices can't be added as a team port\n",
portname);
return -EINVAL;
}
if (team_port_exists(port_dev)) {
+ NL_SET_ERR_MSG(extack, "Device is already a port of a team device");
netdev_err(dev, "Device %s is already a port "
"of a team device\n", portname);
return -EBUSY;
if (port_dev->features & NETIF_F_VLAN_CHALLENGED &&
vlan_uses_dev(dev)) {
+ NL_SET_ERR_MSG(extack, "Device is VLAN challenged and team device has VLAN set up");
netdev_err(dev, "Device %s is VLAN challenged and team device has VLAN set up\n",
portname);
return -EPERM;
return err;
if (port_dev->flags & IFF_UP) {
+ NL_SET_ERR_MSG(extack, "Device is up. Set it down before adding it as a team port");
netdev_err(dev, "Device %s is up. Set it down before adding it as a team port\n",
portname);
return -EBUSY;
goto err_handler_register;
}
- err = team_upper_dev_link(team, port);
+ err = team_upper_dev_link(team, port, extack);
if (err) {
netdev_err(dev, "Device %s failed to set upper link\n",
portname);
int err;
mutex_lock(&team->lock);
- err = team_port_add(team, port_dev);
+ err = team_port_add(team, port_dev, extack);
mutex_unlock(&team->lock);
if (!err)
if (!nlh) {
err = __send_and_alloc_skb(&skb, team, portid, send_func);
if (err)
- goto errout;
+ return err;
goto send_done;
}
if (!nlh) {
err = __send_and_alloc_skb(&skb, team, portid, send_func);
if (err)
- goto errout;
+ return err;
goto send_done;
}
#include <linux/mutex.h>
#include <linux/uaccess.h>
+#include <linux/proc_fs.h>
/* Uncomment to enable debugging */
/* #define TUN_DEBUG 1 */
return tun;
}
- static void tun_ptr_free(void *ptr)
+ void tun_ptr_free(void *ptr)
{
if (!ptr)
return;
__skb_array_destroy_skb(ptr);
}
}
+ EXPORT_SYMBOL_GPL(tun_ptr_free);
static void tun_queue_purge(struct tun_file *tfile)
{
unsigned int delta = 0;
char *buf;
size_t copied;
- bool xdp_xmit = false;
int err, pad = TUN_RX_PAD;
rcu_read_lock();
preempt_enable();
return NULL;
case XDP_TX:
- xdp_xmit = true;
- /* fall through */
+ get_page(alloc_frag->page);
+ alloc_frag->offset += buflen;
+ if (tun_xdp_xmit(tun->dev, &xdp))
+ goto err_redirect;
+ tun_xdp_flush(tun->dev);
+ rcu_read_unlock();
+ preempt_enable();
+ return NULL;
case XDP_PASS:
delta = orig_data - xdp.data;
break;
get_page(alloc_frag->page);
alloc_frag->offset += buflen;
- if (xdp_xmit) {
- skb->dev = tun->dev;
- generic_xdp_tx(skb, xdp_prog);
- rcu_read_unlock();
- preempt_enable();
- return NULL;
- }
-
rcu_read_unlock();
preempt_enable();
return -EINVAL;
}
+static size_t tun_get_size(const struct net_device *dev)
+{
+ BUILD_BUG_ON(sizeof(u32) != sizeof(uid_t));
+ BUILD_BUG_ON(sizeof(u32) != sizeof(gid_t));
+
+ return nla_total_size(sizeof(uid_t)) + /* OWNER */
+ nla_total_size(sizeof(gid_t)) + /* GROUP */
+ nla_total_size(sizeof(u8)) + /* TYPE */
+ nla_total_size(sizeof(u8)) + /* PI */
+ nla_total_size(sizeof(u8)) + /* VNET_HDR */
+ nla_total_size(sizeof(u8)) + /* PERSIST */
+ nla_total_size(sizeof(u8)) + /* MULTI_QUEUE */
+ nla_total_size(sizeof(u32)) + /* NUM_QUEUES */
+ nla_total_size(sizeof(u32)) + /* NUM_DISABLED_QUEUES */
+ 0;
+}
+
+static int tun_fill_info(struct sk_buff *skb, const struct net_device *dev)
+{
+ struct tun_struct *tun = netdev_priv(dev);
+
+ if (nla_put_u8(skb, IFLA_TUN_TYPE, tun->flags & TUN_TYPE_MASK))
+ goto nla_put_failure;
+ if (uid_valid(tun->owner) &&
+ nla_put_u32(skb, IFLA_TUN_OWNER,
+ from_kuid_munged(current_user_ns(), tun->owner)))
+ goto nla_put_failure;
+ if (gid_valid(tun->group) &&
+ nla_put_u32(skb, IFLA_TUN_GROUP,
+ from_kgid_munged(current_user_ns(), tun->group)))
+ goto nla_put_failure;
+ if (nla_put_u8(skb, IFLA_TUN_PI, !(tun->flags & IFF_NO_PI)))
+ goto nla_put_failure;
+ if (nla_put_u8(skb, IFLA_TUN_VNET_HDR, !!(tun->flags & IFF_VNET_HDR)))
+ goto nla_put_failure;
+ if (nla_put_u8(skb, IFLA_TUN_PERSIST, !!(tun->flags & IFF_PERSIST)))
+ goto nla_put_failure;
+ if (nla_put_u8(skb, IFLA_TUN_MULTI_QUEUE,
+ !!(tun->flags & IFF_MULTI_QUEUE)))
+ goto nla_put_failure;
+ if (tun->flags & IFF_MULTI_QUEUE) {
+ if (nla_put_u32(skb, IFLA_TUN_NUM_QUEUES, tun->numqueues))
+ goto nla_put_failure;
+ if (nla_put_u32(skb, IFLA_TUN_NUM_DISABLED_QUEUES,
+ tun->numdisabled))
+ goto nla_put_failure;
+ }
+
+ return 0;
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
static struct rtnl_link_ops tun_link_ops __read_mostly = {
.kind = DRV_NAME,
.priv_size = sizeof(struct tun_struct),
.setup = tun_setup,
.validate = tun_validate,
+ .get_size = tun_get_size,
+ .fill_info = tun_fill_info,
};
static void tun_sock_write_space(struct sock *sk)
struct tun_struct *tun;
void __user* argp = (void __user*)arg;
struct ifreq ifr;
+ struct net *net;
kuid_t owner;
kgid_t group;
int sndbuf;
int le;
int ret;
- if (cmd == TUNSETIFF || cmd == TUNSETQUEUE || _IOC_TYPE(cmd) == SOCK_IOC_TYPE) {
+ if (cmd == TUNSETIFF || cmd == TUNSETQUEUE ||
+ (_IOC_TYPE(cmd) == SOCK_IOC_TYPE && cmd != SIOCGSKNS)) {
if (copy_from_user(&ifr, argp, ifreq_len))
return -EFAULT;
} else {
rtnl_lock();
tun = tun_get(tfile);
+ net = sock_net(&tfile->sk);
if (cmd == TUNSETIFF) {
ret = -EEXIST;
if (tun)
ifr.ifr_name[IFNAMSIZ-1] = '\0';
- ret = tun_set_iff(sock_net(&tfile->sk), file, &ifr);
+ ret = tun_set_iff(net, file, &ifr);
if (ret)
goto unlock;
tfile->ifindex = ifindex;
goto unlock;
}
+ if (cmd == SIOCGSKNS) {
+ ret = -EPERM;
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+ goto unlock;
+
+ ret = open_related_ns(&net->ns, get_net_ns);
+ goto unlock;
+ }
ret = -EBADFD;
if (!tun)
static unsigned int hwsim_net_id;
-static int hwsim_netgroup;
+static struct ida hwsim_netgroup_ida = IDA_INIT;
struct hwsim_net {
int netgroup;
return hwsim_net->netgroup;
}
-static inline void hwsim_net_set_netgroup(struct net *net)
+static inline int hwsim_net_set_netgroup(struct net *net)
{
struct hwsim_net *hwsim_net = net_generic(net, hwsim_net_id);
- hwsim_net->netgroup = hwsim_netgroup++;
+ hwsim_net->netgroup = ida_simple_get(&hwsim_netgroup_ida,
+ 0, 0, GFP_KERNEL);
+ return hwsim_net->netgroup >= 0 ? 0 : -ENOMEM;
}
static inline u32 hwsim_net_get_wmediumd(struct net *net)
static struct workqueue_struct *hwsim_wq;
static struct rhashtable hwsim_radios_rht;
static int hwsim_radio_idx;
+static int hwsim_radios_generation = 1;
static struct platform_driver mac80211_hwsim_driver = {
.driver = {
[HWSIM_ATTR_RADIO_NAME] = { .type = NLA_STRING },
[HWSIM_ATTR_NO_VIF] = { .type = NLA_FLAG },
[HWSIM_ATTR_FREQ] = { .type = NLA_U32 },
+ [HWSIM_ATTR_PERM_ADDR] = { .type = NLA_UNSPEC, .len = ETH_ALEN },
};
static void mac80211_hwsim_tx_frame(struct ieee80211_hw *hw,
bool destroy_on_close;
const char *hwname;
bool no_vif;
+ const u8 *perm_addr;
};
static void hwsim_mcast_config_msg(struct sk_buff *mcast_skb,
skb_queue_head_init(&data->pending);
SET_IEEE80211_DEV(hw, data->dev);
- eth_zero_addr(addr);
- addr[0] = 0x02;
- addr[3] = idx >> 8;
- addr[4] = idx;
- memcpy(data->addresses[0].addr, addr, ETH_ALEN);
- memcpy(data->addresses[1].addr, addr, ETH_ALEN);
- data->addresses[1].addr[0] |= 0x40;
- hw->wiphy->n_addresses = 2;
- hw->wiphy->addresses = data->addresses;
+ if (!param->perm_addr) {
+ eth_zero_addr(addr);
+ addr[0] = 0x02;
+ addr[3] = idx >> 8;
+ addr[4] = idx;
+ memcpy(data->addresses[0].addr, addr, ETH_ALEN);
+ /* Why need here second address ? */
+ data->addresses[1].addr[0] |= 0x40;
+ memcpy(data->addresses[1].addr, addr, ETH_ALEN);
+ hw->wiphy->n_addresses = 2;
+ hw->wiphy->addresses = data->addresses;
+ /* possible address clash is checked at hash table insertion */
+ } else {
+ memcpy(data->addresses[0].addr, param->perm_addr, ETH_ALEN);
+ /* compatibility with automatically generated mac addr */
+ memcpy(data->addresses[1].addr, param->perm_addr, ETH_ALEN);
+ hw->wiphy->n_addresses = 2;
+ hw->wiphy->addresses = data->addresses;
+ }
data->channels = param->channels;
data->use_chanctx = param->use_chanctx;
mutex_init(&data->mutex);
data->netgroup = hwsim_net_get_netgroup(net);
+ data->wmediumd = hwsim_net_get_wmediumd(net);
/* Enable frame retransmissions for lossy channels */
hw->max_rates = 4;
err = rhashtable_insert_fast(&hwsim_radios_rht, &data->rht,
hwsim_rht_params);
if (err < 0) {
- pr_debug("mac80211_hwsim: radio index %d already present\n",
- idx);
+ if (info) {
+ GENL_SET_ERR_MSG(info, "perm addr already present");
+ NL_SET_BAD_ATTR(info->extack,
+ info->attrs[HWSIM_ATTR_PERM_ADDR]);
+ }
spin_unlock_bh(&hwsim_radio_lock);
goto failed_final_insert;
}
list_add_tail(&data->list, &hwsim_radios);
+ hwsim_radios_generation++;
spin_unlock_bh(&hwsim_radio_lock);
if (idx > 0)
param.regd = hwsim_world_regdom_custom[idx];
}
+ if (info->attrs[HWSIM_ATTR_PERM_ADDR]) {
+ if (!is_valid_ether_addr(
+ nla_data(info->attrs[HWSIM_ATTR_PERM_ADDR]))) {
+ GENL_SET_ERR_MSG(info,"MAC is no valid source addr");
+ NL_SET_BAD_ATTR(info->extack,
+ info->attrs[HWSIM_ATTR_PERM_ADDR]);
+ return -EINVAL;
+ }
+
+
+ param.perm_addr = nla_data(info->attrs[HWSIM_ATTR_PERM_ADDR]);
+ }
+
ret = mac80211_hwsim_new_radio(info, ¶m);
kfree(hwname);
return ret;
list_del(&data->list);
rhashtable_remove_fast(&hwsim_radios_rht, &data->rht,
hwsim_rht_params);
+ hwsim_radios_generation++;
spin_unlock_bh(&hwsim_radio_lock);
mac80211_hwsim_del_radio(data, wiphy_name(data->hw->wiphy),
info);
static int hwsim_dump_radio_nl(struct sk_buff *skb,
struct netlink_callback *cb)
{
- int idx = cb->args[0];
+ int last_idx = cb->args[0];
struct mac80211_hwsim_data *data = NULL;
- int res;
+ int res = 0;
+ void *hdr;
spin_lock_bh(&hwsim_radio_lock);
+ cb->seq = hwsim_radios_generation;
- if (idx == hwsim_radio_idx)
+ if (last_idx >= hwsim_radio_idx-1)
goto done;
list_for_each_entry(data, &hwsim_radios, list) {
- if (data->idx < idx)
+ if (data->idx <= last_idx)
continue;
if (!net_eq(wiphy_net(data->hw->wiphy), sock_net(skb->sk)))
if (res < 0)
break;
- idx = data->idx + 1;
+ last_idx = data->idx;
}
- cb->args[0] = idx;
+ cb->args[0] = last_idx;
+
+ /* list changed, but no new element sent, set interrupted flag */
+ if (skb->len == 0 && cb->prev_seq && cb->seq != cb->prev_seq) {
+ hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, &hwsim_genl_family,
+ NLM_F_MULTI, HWSIM_CMD_GET_RADIO);
+ if (!hdr)
+ res = -EMSGSIZE;
+ genl_dump_check_consistent(cb, hdr);
+ genlmsg_end(skb, hdr);
+ }
done:
spin_unlock_bh(&hwsim_radio_lock);
- return skb->len;
+ return res ?: skb->len;
}
/* Generic Netlink operations array */
struct mac80211_hwsim_data *data =
container_of(work, struct mac80211_hwsim_data, destroy_work);
+ hwsim_radios_generation++;
mac80211_hwsim_del_radio(data, wiphy_name(data->hw->wiphy), NULL);
}
static __net_init int hwsim_init_net(struct net *net)
{
- hwsim_net_set_netgroup(net);
-
- return 0;
+ return hwsim_net_set_netgroup(net);
}
static void __net_exit hwsim_exit_net(struct net *net)
queue_work(hwsim_wq, &data->destroy_work);
}
spin_unlock_bh(&hwsim_radio_lock);
+
+ ida_simple_remove(&hwsim_netgroup_ida, hwsim_net_get_netgroup(net));
}
static struct pernet_operations hwsim_net_ops = {
.exit = hwsim_exit_net,
.id = &hwsim_net_id,
.size = sizeof(struct hwsim_net),
+ .async = true,
};
static void hwsim_exit_netlink(void)
queue == card->qdio.no_in_queues - 1;
}
-
- static int qeth_issue_next_read(struct qeth_card *card)
+ static int __qeth_issue_next_read(struct qeth_card *card)
{
int rc;
struct qeth_cmd_buffer *iob;
return rc;
}
+ static int qeth_issue_next_read(struct qeth_card *card)
+ {
+ int ret;
+
+ spin_lock_irq(get_ccwdev_lock(CARD_RDEV(card)));
+ ret = __qeth_issue_next_read(card);
+ spin_unlock_irq(get_ccwdev_lock(CARD_RDEV(card)));
+
+ return ret;
+ }
+
static struct qeth_reply *qeth_alloc_reply(struct qeth_card *card)
{
struct qeth_reply *reply;
QETH_DBF_HEX(CTRL, 2, buffer, QETH_DBF_CTRL_LEN);
if ((buffer[2] & 0xc0) == 0xc0) {
- QETH_DBF_MESSAGE(2, "received an IDX TERMINATE "
- "with cause code 0x%02x%s\n",
- buffer[4],
- ((buffer[4] == 0x22) ?
- " -- try another portname" : ""));
+ QETH_DBF_MESSAGE(2, "received an IDX TERMINATE with cause code %#02x\n",
+ buffer[4]);
QETH_CARD_TEXT(card, 2, "ckidxres");
QETH_CARD_TEXT(card, 2, " idxterm");
QETH_CARD_TEXT_(card, 2, " rc%d", -EIO);
spin_lock_irqsave(&card->thread_mask_lock, flags);
card->thread_running_mask &= ~thread;
spin_unlock_irqrestore(&card->thread_mask_lock, flags);
- wake_up(&card->wait_q);
+ wake_up_all(&card->wait_q);
}
EXPORT_SYMBOL_GPL(qeth_clear_thread_running_bit);
}
rc = qeth_get_problem(cdev, irb);
if (rc) {
+ card->read_or_write_problem = 1;
qeth_clear_ipacmd_list(card);
qeth_schedule_recovery(card);
goto out;
return;
if (channel == &card->read &&
channel->state == CH_STATE_UP)
- qeth_issue_next_read(card);
+ __qeth_issue_next_read(card);
iob = channel->iob;
index = channel->buf_no;
int i;
if ((card->options.cq == QETH_CQ_ENABLED) && (!buf->rx_skb)) {
- buf->rx_skb = dev_alloc_skb(QETH_RX_PULL_LEN + ETH_HLEN);
+ buf->rx_skb = netdev_alloc_skb(card->dev,
+ QETH_RX_PULL_LEN + ETH_HLEN);
if (!buf->rx_skb)
return 1;
}
QETH_DBF_TEXT(SETUP, 2, "initqdqs");
/* inbound queue */
- qdio_reset_buffers(card->qdio.in_q->qdio_bufs,
- QDIO_MAX_BUFFERS_PER_Q);
+ qdio_reset_buffers(card->qdio.in_q->qdio_bufs, QDIO_MAX_BUFFERS_PER_Q);
+ memset(&card->rx, 0, sizeof(struct qeth_rx));
qeth_initialize_working_pool_list(card);
/*give only as many buffers to hardware as we have buffer pool entries*/
for (i = 0; i < card->qdio.in_buf_pool.buf_count - 1; ++i)
enum qeth_ipa_cmds ipacmd, enum qeth_prot_versions prot)
{
struct qeth_cmd_buffer *iob;
- struct qeth_ipa_cmd *cmd;
iob = qeth_get_buffer(&card->write);
if (iob) {
- cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
- qeth_fill_ipacmd_header(card, cmd, ipacmd, prot);
+ qeth_fill_ipacmd_header(card, __ipa_cmd(iob), ipacmd, prot);
} else {
dev_warn(&card->gdev->dev,
"The qeth driver ran out of channel command buffers\n");
iob = qeth_get_ipacmd_buffer(card, IPA_CMD_SETADAPTERPARMS,
QETH_PROT_IPV4);
if (iob) {
- cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+ cmd = __ipa_cmd(iob);
cmd->data.setadapterparms.hdr.cmdlength = cmdlen;
cmd->data.setadapterparms.hdr.command_code = command;
cmd->data.setadapterparms.hdr.used_total = 1;
iob = qeth_get_ipacmd_buffer(card, IPA_CMD_SET_DIAG_ASS, 0);
if (!iob)
return -ENOMEM;
- cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+ cmd = __ipa_cmd(iob);
cmd->data.diagass.subcmd_len = 16;
cmd->data.diagass.subcmd = QETH_DIAGS_CMD_QUERY;
return qeth_send_ipa_cmd(card, iob, qeth_query_setdiagass_cb, NULL);
iob = qeth_get_ipacmd_buffer(card, IPA_CMD_SET_DIAG_ASS, 0);
if (!iob)
return -ENOMEM;
- cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+ cmd = __ipa_cmd(iob);
cmd->data.diagass.subcmd_len = 80;
cmd->data.diagass.subcmd = QETH_DIAGS_CMD_TRAP;
cmd->data.diagass.type = 1;
sizeof(struct qeth_ipacmd_setadpparms_hdr) + 8);
if (!iob)
return;
- cmd = (struct qeth_ipa_cmd *)(iob->data + IPA_PDU_HEADER_SIZE);
+ cmd = __ipa_cmd(iob);
cmd->data.setadapterparms.data.mode = mode;
qeth_send_ipa_cmd(card, iob, qeth_setadp_promisc_mode_cb, NULL);
}
sizeof(struct qeth_change_addr));
if (!iob)
return -ENOMEM;
- cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+ cmd = __ipa_cmd(iob);
cmd->data.setadapterparms.data.change_addr.cmd = CHANGE_ADDR_READ_MAC;
cmd->data.setadapterparms.data.change_addr.addr_size = ETH_ALEN;
ether_addr_copy(cmd->data.setadapterparms.data.change_addr.addr,
sizeof(struct qeth_set_access_ctrl));
if (!iob)
return -ENOMEM;
- cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+ cmd = __ipa_cmd(iob);
access_ctrl_req = &cmd->data.setadapterparms.data.set_access_ctrl;
access_ctrl_req->subcmd_code = isolation;
rc = -ENOMEM;
goto out;
}
- cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+ cmd = __ipa_cmd(iob);
memcpy(&cmd->data.setadapterparms.data.snmp, &ureq->cmd, req_len);
rc = qeth_send_ipa_snmp_cmd(card, iob, QETH_SETADP_BASE_LEN + req_len,
qeth_snmp_command_cb, (void *)&qinfo);
rc = -ENOMEM;
goto out_free;
}
- cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+ cmd = __ipa_cmd(iob);
oat_req = &cmd->data.setadapterparms.data.query_oat;
oat_req->subcmd_code = oat_data.command;
QETH_DBF_HEX(SETUP, 2, &card, sizeof(void *));
qeth_clean_channel(&card->read);
qeth_clean_channel(&card->write);
- if (card->dev)
- free_netdev(card->dev);
qeth_free_qdio_buffers(card);
unregister_service_level(&card->qeth_service_level);
kfree(card);
} else {
unsigned int linear = (use_rx_sg) ? QETH_RX_PULL_LEN : skb_len;
- skb = dev_alloc_skb(linear + headroom);
+ skb = napi_alloc_skb(&card->napi, linear + headroom);
}
if (!skb)
goto no_mem;
iob = qeth_get_ipacmd_buffer(card, IPA_CMD_SETASSPARMS, prot);
if (iob) {
- cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+ cmd = __ipa_cmd(iob);
cmd->data.setassparms.hdr.assist_no = ipa_func;
cmd->data.setassparms.hdr.length = 8 + len;
cmd->data.setassparms.hdr.command_code = cmd_code;
QETH_CARD_TEXT(card, 4, "sendassp");
- cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+ cmd = __ipa_cmd(iob);
if (len <= sizeof(__u32))
cmd->data.setassparms.data.flags_32bit = (__u32) data;
else /* (len > sizeof(__u32)) */
iob = qeth_get_ipacmd_buffer(card, ipacmd, QETH_PROT_IPV4);
if (!iob)
return -ENOMEM;
- cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+ cmd = __ipa_cmd(iob);
cmd->data.setdelmac.mac_length = ETH_ALEN;
ether_addr_copy(cmd->data.setdelmac.mac, mac);
return qeth_setdelmac_makerc(card, qeth_send_ipa_cmd(card, iob,
iob = qeth_get_ipacmd_buffer(card, ipacmd, QETH_PROT_IPV4);
if (!iob)
return -ENOMEM;
- cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+ cmd = __ipa_cmd(iob);
cmd->data.setdelvlan.vlan_id = i;
return qeth_setdelvlan_makerc(card, qeth_send_ipa_cmd(card, iob,
qeth_l2_send_setdelvlan_cb, NULL));
*done = 1;
break;
}
- skb->dev = card->dev;
switch (hdr->hdr.l2.id) {
case QETH_HEADER_TYPE_LAYER2:
- skb->pkt_type = PACKET_HOST;
skb->protocol = eth_type_trans(skb, skb->dev);
if ((card->dev->features & NETIF_F_RXCSUM)
&& ((hdr->hdr.l2.flags[1] &
qeth_l2_set_offline(cgdev);
if (card->dev) {
- netif_napi_del(&card->napi);
unregister_netdev(card->dev);
+ free_netdev(card->dev);
card->dev = NULL;
}
return;
return -ENODEV;
card->dev->ml_priv = card;
+ card->dev->priv_flags |= IFF_UNICAST_FLT;
card->dev->watchdog_timeo = QETH_TX_TIMEOUT;
card->dev->mtu = card->info.initial_mtu;
card->dev->min_mtu = 64;
card->dev->features |= NETIF_F_VLAN_CHALLENGED;
else
card->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+
+ if (card->info.type != QETH_CARD_TYPE_OSN &&
+ card->info.type != QETH_CARD_TYPE_IQD) {
+ card->dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+ card->dev->needed_headroom = sizeof(struct qeth_hdr);
+ card->dev->hw_features |= NETIF_F_SG;
+ card->dev->vlan_features |= NETIF_F_SG;
+ }
+
if (card->info.type == QETH_CARD_TYPE_OSD && !card->info.guestlan) {
- card->dev->hw_features = NETIF_F_SG;
- card->dev->vlan_features = NETIF_F_SG;
card->dev->features |= NETIF_F_SG;
/* OSA 3S and earlier has no RX/TX support */
if (qeth_is_supported(card, IPA_OUTBOUND_CHECKSUM)) {
card->dev->vlan_features |= NETIF_F_RXCSUM;
}
}
- if (card->info.type != QETH_CARD_TYPE_OSN &&
- card->info.type != QETH_CARD_TYPE_IQD) {
- card->dev->priv_flags &= ~IFF_TX_SKB_SHARING;
- card->dev->needed_headroom = sizeof(struct qeth_hdr);
- }
card->info.broadcast_capable = 1;
qeth_l2_request_initial_mac(card);
qeth_l2_setup_bridgeport_attrs(card);
card->state = CARD_STATE_HARDSETUP;
- memset(&card->rx, 0, sizeof(struct qeth_rx));
qeth_print_status_message(card);
/* softsetup */
{
struct qeth_cmd_buffer *iob;
struct qeth_card *card;
- int rc;
if (!dev)
return -ENODEV;
if (!qeth_card_hw_is_reachable(card))
return -ENODEV;
iob = qeth_wait_for_buffer(&card->write);
- memcpy(iob->data+IPA_PDU_HEADER_SIZE, data, data_len);
- rc = qeth_osn_send_ipa_cmd(card, iob, data_len);
- return rc;
+ memcpy(__ipa_cmd(iob), data, data_len);
+ return qeth_osn_send_ipa_cmd(card, iob, data_len);
}
EXPORT_SYMBOL(qeth_osn_assist);
iob = qeth_get_ipacmd_buffer(card, ipa_cmd, 0);
if (!iob)
return iob;
- cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+ cmd = __ipa_cmd(iob);
cmd->data.sbp.hdr.cmdlength = sizeof(struct qeth_ipacmd_sbp_hdr) +
cmd_length;
cmd->data.sbp.hdr.command_code = sbp_cmd;
return -ENOMEM;
/* create header for request */
- cmd = (struct qeth_ipa_cmd *)(iob->data + IPA_PDU_HEADER_SIZE);
+ cmd = __ipa_cmd(iob);
req = &cmd->data.vnicc;
/* create sub command header for request */
qeth_l3_ipaddr6_to_string(addr, buf);
}
+static struct qeth_ipaddr *qeth_l3_get_addr_buffer(enum qeth_prot_versions prot)
+{
+ struct qeth_ipaddr *addr = kmalloc(sizeof(*addr), GFP_ATOMIC);
+
+ if (addr)
+ qeth_l3_init_ipaddr(addr, QETH_IP_TYPE_NORMAL, prot);
+ return addr;
+}
+
static struct qeth_ipaddr *qeth_l3_find_addr_by_ip(struct qeth_card *card,
struct qeth_ipaddr *query)
{
return rc;
}
-int qeth_l3_delete_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr)
+static int qeth_l3_delete_ip(struct qeth_card *card,
+ struct qeth_ipaddr *tmp_addr)
{
int rc = 0;
struct qeth_ipaddr *addr;
- QETH_CARD_TEXT(card, 4, "delip");
+ if (tmp_addr->type == QETH_IP_TYPE_RXIP)
+ QETH_CARD_TEXT(card, 2, "delrxip");
+ else if (tmp_addr->type == QETH_IP_TYPE_VIPA)
+ QETH_CARD_TEXT(card, 2, "delvipa");
+ else
+ QETH_CARD_TEXT(card, 2, "delip");
if (tmp_addr->proto == QETH_PROT_IPV4)
QETH_CARD_HEX(card, 4, &tmp_addr->u.a4.addr, 4);
return rc;
}
-int qeth_l3_add_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr)
+static int qeth_l3_add_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr)
{
int rc = 0;
struct qeth_ipaddr *addr;
char buf[40];
- QETH_CARD_TEXT(card, 4, "addip");
+ if (tmp_addr->type == QETH_IP_TYPE_RXIP)
+ QETH_CARD_TEXT(card, 2, "addrxip");
+ else if (tmp_addr->type == QETH_IP_TYPE_VIPA)
+ QETH_CARD_TEXT(card, 2, "addvipa");
+ else
+ QETH_CARD_TEXT(card, 2, "addip");
if (tmp_addr->proto == QETH_PROT_IPV4)
QETH_CARD_HEX(card, 4, &tmp_addr->u.a4.addr, 4);
if (qeth_l3_is_addr_covered_by_ipato(card, addr)) {
QETH_CARD_TEXT(card, 2, "tkovaddr");
- addr->set_flags |= QETH_IPA_SETIP_TAKEOVER_FLAG;
+ addr->ipato = 1;
}
hash_add(card->ip_htable, &addr->hnode,
qeth_l3_ipaddr_hash(addr));
return rc;
}
-
-struct qeth_ipaddr *qeth_l3_get_addr_buffer(
- enum qeth_prot_versions prot)
-{
- struct qeth_ipaddr *addr;
-
- addr = kzalloc(sizeof(struct qeth_ipaddr), GFP_ATOMIC);
- if (!addr)
- return NULL;
-
- addr->type = QETH_IP_TYPE_NORMAL;
- addr->disp_flag = QETH_DISP_ADDR_DO_NOTHING;
- addr->proto = prot;
-
- return addr;
-}
-
static void qeth_l3_clear_ip_htable(struct qeth_card *card, int recover)
{
struct qeth_ipaddr *addr;
iob = qeth_get_ipacmd_buffer(card, ipacmd, addr->proto);
if (!iob)
return -ENOMEM;
- cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+ cmd = __ipa_cmd(iob);
ether_addr_copy(cmd->data.setdelipm.mac, addr->mac);
if (addr->proto == QETH_PROT_IPV6)
memcpy(cmd->data.setdelipm.ip6, &addr->u.a6.addr,
}
}
+static u32 qeth_l3_get_setdelip_flags(struct qeth_ipaddr *addr, bool set)
+{
+ switch (addr->type) {
+ case QETH_IP_TYPE_RXIP:
+ return (set) ? QETH_IPA_SETIP_TAKEOVER_FLAG : 0;
+ case QETH_IP_TYPE_VIPA:
+ return (set) ? QETH_IPA_SETIP_VIPA_FLAG :
+ QETH_IPA_DELIP_VIPA_FLAG;
+ default:
+ return (set && addr->ipato) ? QETH_IPA_SETIP_TAKEOVER_FLAG : 0;
+ }
+}
+
static int qeth_l3_send_setdelip(struct qeth_card *card,
- struct qeth_ipaddr *addr, int ipacmd, unsigned int flags)
+ struct qeth_ipaddr *addr,
+ enum qeth_ipa_cmds ipacmd)
{
- int rc;
struct qeth_cmd_buffer *iob;
struct qeth_ipa_cmd *cmd;
__u8 netmask[16];
+ u32 flags;
QETH_CARD_TEXT(card, 4, "setdelip");
- QETH_CARD_TEXT_(card, 4, "flags%02X", flags);
iob = qeth_get_ipacmd_buffer(card, ipacmd, addr->proto);
if (!iob)
return -ENOMEM;
- cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+ cmd = __ipa_cmd(iob);
+
+ flags = qeth_l3_get_setdelip_flags(addr, ipacmd == IPA_CMD_SETIP);
+ QETH_CARD_TEXT_(card, 4, "flags%02X", flags);
+
if (addr->proto == QETH_PROT_IPV6) {
memcpy(cmd->data.setdelip6.ip_addr, &addr->u.a6.addr,
sizeof(struct in6_addr));
cmd->data.setdelip4.flags = flags;
}
- rc = qeth_send_ipa_cmd(card, iob, NULL, NULL);
-
- return rc;
+ return qeth_send_ipa_cmd(card, iob, NULL, NULL);
}
static int qeth_l3_send_setrouting(struct qeth_card *card,
iob = qeth_get_ipacmd_buffer(card, IPA_CMD_SETRTG, prot);
if (!iob)
return -ENOMEM;
- cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+ cmd = __ipa_cmd(iob);
cmd->data.setrtg.type = (type);
rc = qeth_send_ipa_cmd(card, iob, NULL, NULL);
hash_for_each(card->ip_htable, i, addr, hnode) {
if (addr->type != QETH_IP_TYPE_NORMAL)
continue;
- if (qeth_l3_is_addr_covered_by_ipato(card, addr))
- addr->set_flags |= QETH_IPA_SETIP_TAKEOVER_FLAG;
- else
- addr->set_flags &= ~QETH_IPA_SETIP_TAKEOVER_FLAG;
+ addr->ipato = qeth_l3_is_addr_covered_by_ipato(card, addr);
}
}
return rc;
}
-/*
- * VIPA related functions
- */
-int qeth_l3_add_vipa(struct qeth_card *card, enum qeth_prot_versions proto,
- const u8 *addr)
-{
- struct qeth_ipaddr *ipaddr;
- int rc;
-
- ipaddr = qeth_l3_get_addr_buffer(proto);
- if (ipaddr) {
- if (proto == QETH_PROT_IPV4) {
- QETH_CARD_TEXT(card, 2, "addvipa4");
- memcpy(&ipaddr->u.a4.addr, addr, 4);
- ipaddr->u.a4.mask = 0;
- } else if (proto == QETH_PROT_IPV6) {
- QETH_CARD_TEXT(card, 2, "addvipa6");
- memcpy(&ipaddr->u.a6.addr, addr, 16);
- ipaddr->u.a6.pfxlen = 0;
- }
- ipaddr->type = QETH_IP_TYPE_VIPA;
- ipaddr->set_flags = QETH_IPA_SETIP_VIPA_FLAG;
- ipaddr->del_flags = QETH_IPA_DELIP_VIPA_FLAG;
- } else
- return -ENOMEM;
-
- spin_lock_bh(&card->ip_lock);
- rc = qeth_l3_add_ip(card, ipaddr);
- spin_unlock_bh(&card->ip_lock);
-
- kfree(ipaddr);
-
- return rc;
-}
-
-int qeth_l3_del_vipa(struct qeth_card *card, enum qeth_prot_versions proto,
- const u8 *addr)
-{
- struct qeth_ipaddr *ipaddr;
- int rc;
-
- ipaddr = qeth_l3_get_addr_buffer(proto);
- if (ipaddr) {
- if (proto == QETH_PROT_IPV4) {
- QETH_CARD_TEXT(card, 2, "delvipa4");
- memcpy(&ipaddr->u.a4.addr, addr, 4);
- ipaddr->u.a4.mask = 0;
- } else if (proto == QETH_PROT_IPV6) {
- QETH_CARD_TEXT(card, 2, "delvipa6");
- memcpy(&ipaddr->u.a6.addr, addr, 16);
- ipaddr->u.a6.pfxlen = 0;
- }
- ipaddr->type = QETH_IP_TYPE_VIPA;
- } else
- return -ENOMEM;
-
- spin_lock_bh(&card->ip_lock);
- rc = qeth_l3_delete_ip(card, ipaddr);
- spin_unlock_bh(&card->ip_lock);
-
- kfree(ipaddr);
- return rc;
-}
-
-/*
- * proxy ARP related functions
- */
-int qeth_l3_add_rxip(struct qeth_card *card, enum qeth_prot_versions proto,
- const u8 *addr)
+int qeth_l3_modify_rxip_vipa(struct qeth_card *card, bool add, const u8 *ip,
+ enum qeth_ip_types type,
+ enum qeth_prot_versions proto)
{
- struct qeth_ipaddr *ipaddr;
+ struct qeth_ipaddr addr;
int rc;
- ipaddr = qeth_l3_get_addr_buffer(proto);
- if (ipaddr) {
- if (proto == QETH_PROT_IPV4) {
- QETH_CARD_TEXT(card, 2, "addrxip4");
- memcpy(&ipaddr->u.a4.addr, addr, 4);
- ipaddr->u.a4.mask = 0;
- } else if (proto == QETH_PROT_IPV6) {
- QETH_CARD_TEXT(card, 2, "addrxip6");
- memcpy(&ipaddr->u.a6.addr, addr, 16);
- ipaddr->u.a6.pfxlen = 0;
- }
-
- ipaddr->type = QETH_IP_TYPE_RXIP;
- ipaddr->set_flags = QETH_IPA_SETIP_TAKEOVER_FLAG;
- ipaddr->del_flags = 0;
- } else
- return -ENOMEM;
+ qeth_l3_init_ipaddr(&addr, type, proto);
+ if (proto == QETH_PROT_IPV4)
+ memcpy(&addr.u.a4.addr, ip, 4);
+ else
+ memcpy(&addr.u.a6.addr, ip, 16);
spin_lock_bh(&card->ip_lock);
- rc = qeth_l3_add_ip(card, ipaddr);
+ rc = add ? qeth_l3_add_ip(card, &addr) : qeth_l3_delete_ip(card, &addr);
spin_unlock_bh(&card->ip_lock);
-
- kfree(ipaddr);
-
return rc;
}
-int qeth_l3_del_rxip(struct qeth_card *card, enum qeth_prot_versions proto,
- const u8 *addr)
+int qeth_l3_modify_hsuid(struct qeth_card *card, bool add)
{
- struct qeth_ipaddr *ipaddr;
- int rc;
+ struct qeth_ipaddr addr;
+ int rc, i;
- ipaddr = qeth_l3_get_addr_buffer(proto);
- if (ipaddr) {
- if (proto == QETH_PROT_IPV4) {
- QETH_CARD_TEXT(card, 2, "delrxip4");
- memcpy(&ipaddr->u.a4.addr, addr, 4);
- ipaddr->u.a4.mask = 0;
- } else if (proto == QETH_PROT_IPV6) {
- QETH_CARD_TEXT(card, 2, "delrxip6");
- memcpy(&ipaddr->u.a6.addr, addr, 16);
- ipaddr->u.a6.pfxlen = 0;
- }
- ipaddr->type = QETH_IP_TYPE_RXIP;
- } else
- return -ENOMEM;
+ qeth_l3_init_ipaddr(&addr, QETH_IP_TYPE_NORMAL, QETH_PROT_IPV6);
+ addr.u.a6.addr.s6_addr[0] = 0xfe;
+ addr.u.a6.addr.s6_addr[1] = 0x80;
+ for (i = 0; i < 8; i++)
+ addr.u.a6.addr.s6_addr[8+i] = card->options.hsuid[i];
spin_lock_bh(&card->ip_lock);
- rc = qeth_l3_delete_ip(card, ipaddr);
+ rc = add ? qeth_l3_add_ip(card, &addr) : qeth_l3_delete_ip(card, &addr);
spin_unlock_bh(&card->ip_lock);
-
- kfree(ipaddr);
return rc;
}
if (addr->is_multicast)
rc = qeth_l3_send_setdelmc(card, addr, IPA_CMD_SETIPM);
else
- rc = qeth_l3_send_setdelip(card, addr, IPA_CMD_SETIP,
- addr->set_flags);
+ rc = qeth_l3_send_setdelip(card, addr, IPA_CMD_SETIP);
if (rc)
QETH_CARD_TEXT(card, 2, "failed");
} while ((--cnt > 0) && rc);
if (addr->is_multicast)
rc = qeth_l3_send_setdelmc(card, addr, IPA_CMD_DELIPM);
else
- rc = qeth_l3_send_setdelip(card, addr, IPA_CMD_DELIP,
- addr->del_flags);
+ rc = qeth_l3_send_setdelip(card, addr, IPA_CMD_DELIP);
if (rc)
QETH_CARD_TEXT(card, 2, "failed");
QETH_PROT_IPV6);
if (!iob)
return -ENOMEM;
- cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+ cmd = __ipa_cmd(iob);
*((__u16 *) &cmd->data.create_destroy_addr.unique_id[6]) =
card->info.unique_id;
QETH_PROT_IPV6);
if (!iob)
return -ENOMEM;
- cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+ cmd = __ipa_cmd(iob);
*((__u16 *) &cmd->data.create_destroy_addr.unique_id[6]) =
card->info.unique_id;
iob = qeth_get_ipacmd_buffer(card, IPA_CMD_SET_DIAG_ASS, 0);
if (!iob)
return -ENOMEM;
- cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+ cmd = __ipa_cmd(iob);
cmd->data.diagass.subcmd_len = 16;
cmd->data.diagass.subcmd = QETH_DIAGS_CMD_TRACE;
cmd->data.diagass.type = QETH_DIAGS_TYPE_HIPERSOCKET;
ipv6_eth_mc_map(&ipv6_hdr(skb)->daddr, tg_addr);
card->stats.multicast++;
- skb->pkt_type = PACKET_MULTICAST;
break;
case QETH_CAST_BROADCAST:
ether_addr_copy(tg_addr, card->dev->broadcast);
card->stats.multicast++;
- skb->pkt_type = PACKET_BROADCAST;
break;
- case QETH_CAST_UNICAST:
- case QETH_CAST_ANYCAST:
- case QETH_CAST_NOCAST:
default:
if (card->options.sniffer)
skb->pkt_type = PACKET_OTHERHOST;
- else
- skb->pkt_type = PACKET_HOST;
ether_addr_copy(tg_addr, card->dev->dev_addr);
}
+
if (hdr->hdr.l3.ext_flags & QETH_HDR_EXT_SRC_MAC_ADDR)
card->dev->header_ops->create(skb, card->dev, prot,
tg_addr, &hdr->hdr.l3.next_hop.rx.src_mac,
- card->dev->addr_len);
+ skb->len);
else
card->dev->header_ops->create(skb, card->dev, prot,
- tg_addr, "FAKELL", card->dev->addr_len);
+ tg_addr, "FAKELL", skb->len);
}
skb->protocol = eth_type_trans(skb, card->dev);
*done = 1;
break;
}
- skb->dev = card->dev;
switch (hdr->hdr.l3.id) {
case QETH_HEADER_TYPE_LAYER3:
magic = *(__u16 *)skb->data;
if ((card->info.type == QETH_CARD_TYPE_IQD) &&
(magic == ETH_P_AF_IUCV)) {
skb->protocol = cpu_to_be16(ETH_P_AF_IUCV);
- skb->pkt_type = PACKET_HOST;
- skb->mac_header = NET_SKB_PAD;
- skb->dev = card->dev;
len = skb->len;
card->dev->header_ops->create(skb, card->dev, 0,
- card->dev->dev_addr, "FAKELL",
- card->dev->addr_len);
+ card->dev->dev_addr, "FAKELL", len);
+ skb_reset_mac_header(skb);
netif_receive_skb(skb);
} else {
qeth_l3_rebuild_skb(card, skb, hdr);
}
break;
case QETH_HEADER_TYPE_LAYER2: /* for HiperSockets sniffer */
- skb->pkt_type = PACKET_HOST;
skb->protocol = eth_type_trans(skb, skb->dev);
len = skb->len;
netif_receive_skb(skb);
return work_done;
}
-static int qeth_l3_verify_vlan_dev(struct net_device *dev,
- struct qeth_card *card)
-{
- int rc = 0;
- u16 vid;
-
- for_each_set_bit(vid, card->active_vlans, VLAN_N_VID) {
- struct net_device *netdev;
-
- rcu_read_lock();
- netdev = __vlan_find_dev_deep_rcu(card->dev, htons(ETH_P_8021Q),
- vid);
- rcu_read_unlock();
- if (netdev == dev) {
- rc = QETH_VLAN_CARD;
- break;
- }
- }
-
- if (rc && !(vlan_dev_real_dev(dev)->ml_priv == (void *)card))
- return 0;
-
- return rc;
-}
-
-static int qeth_l3_verify_dev(struct net_device *dev)
-{
- struct qeth_card *card;
- int rc = 0;
- unsigned long flags;
-
- read_lock_irqsave(&qeth_core_card_list.rwlock, flags);
- list_for_each_entry(card, &qeth_core_card_list.list, list) {
- if (card->dev == dev) {
- rc = QETH_REAL_CARD;
- break;
- }
- rc = qeth_l3_verify_vlan_dev(dev, card);
- if (rc)
- break;
- }
- read_unlock_irqrestore(&qeth_core_card_list.rwlock, flags);
-
- return rc;
-}
-
-static struct qeth_card *qeth_l3_get_card_from_dev(struct net_device *dev)
-{
- struct qeth_card *card = NULL;
- int rc;
-
- rc = qeth_l3_verify_dev(dev);
- if (rc == QETH_REAL_CARD)
- card = dev->ml_priv;
- else if (rc == QETH_VLAN_CARD)
- card = vlan_dev_real_dev(dev)->ml_priv;
- if (card && card->options.layer2)
- card = NULL;
- if (card)
- QETH_CARD_TEXT_(card, 4, "%d", rc);
- return card ;
-}
-
static void qeth_l3_stop_card(struct qeth_card *card, int recovery_mode)
{
QETH_DBF_TEXT(SETUP, 2, "stopcard");
prot);
if (!iob)
return -ENOMEM;
- cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+ cmd = __ipa_cmd(iob);
cmd->data.setassparms.data.query_arp.request_bits = 0x000F;
cmd->data.setassparms.data.query_arp.reply_bits = 0;
cmd->data.setassparms.data.query_arp.no_entries = 0;
if (!(card->info.unique_id & UNIQUE_ID_NOT_BY_CARD))
card->dev->dev_id = card->info.unique_id &
0xffff;
+
+ card->dev->hw_features |= NETIF_F_SG;
+ card->dev->vlan_features |= NETIF_F_SG;
+
if (!card->info.guestlan) {
- card->dev->hw_features = NETIF_F_SG |
- NETIF_F_RXCSUM | NETIF_F_IP_CSUM |
- NETIF_F_TSO;
- card->dev->vlan_features = NETIF_F_SG |
- NETIF_F_RXCSUM | NETIF_F_IP_CSUM |
- NETIF_F_TSO;
card->dev->features |= NETIF_F_SG;
+ card->dev->hw_features |= NETIF_F_TSO |
+ NETIF_F_RXCSUM | NETIF_F_IP_CSUM;
+ card->dev->vlan_features |= NETIF_F_TSO |
+ NETIF_F_RXCSUM | NETIF_F_IP_CSUM;
}
}
} else if (card->info.type == QETH_CARD_TYPE_IQD) {
qeth_l3_set_offline(cgdev);
if (card->dev) {
- netif_napi_del(&card->napi);
unregister_netdev(card->dev);
+ free_netdev(card->dev);
card->dev = NULL;
}
card->info.hwtrap = 0;
card->state = CARD_STATE_HARDSETUP;
- memset(&card->rx, 0, sizeof(struct qeth_rx));
qeth_print_status_message(card);
/* softsetup */
};
EXPORT_SYMBOL_GPL(qeth_l3_discipline);
+static int qeth_l3_handle_ip_event(struct qeth_card *card,
+ struct qeth_ipaddr *addr,
+ unsigned long event)
+{
+ switch (event) {
+ case NETDEV_UP:
+ spin_lock_bh(&card->ip_lock);
+ qeth_l3_add_ip(card, addr);
+ spin_unlock_bh(&card->ip_lock);
+ return NOTIFY_OK;
+ case NETDEV_DOWN:
+ spin_lock_bh(&card->ip_lock);
+ qeth_l3_delete_ip(card, addr);
+ spin_unlock_bh(&card->ip_lock);
+ return NOTIFY_OK;
+ default:
+ return NOTIFY_DONE;
+ }
+}
+
+static struct qeth_card *qeth_l3_get_card_from_dev(struct net_device *dev)
+{
+ if (is_vlan_dev(dev))
+ dev = vlan_dev_real_dev(dev);
+ if (dev->netdev_ops == &qeth_l3_osa_netdev_ops ||
+ dev->netdev_ops == &qeth_l3_netdev_ops)
+ return (struct qeth_card *) dev->ml_priv;
+ return NULL;
+}
+
static int qeth_l3_ip_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
- struct net_device *dev = (struct net_device *)ifa->ifa_dev->dev;
- struct qeth_ipaddr *addr;
+ struct net_device *dev = ifa->ifa_dev->dev;
+ struct qeth_ipaddr addr;
struct qeth_card *card;
if (dev_net(dev) != &init_net)
return NOTIFY_DONE;
QETH_CARD_TEXT(card, 3, "ipevent");
- addr = qeth_l3_get_addr_buffer(QETH_PROT_IPV4);
- if (addr) {
- addr->u.a4.addr = be32_to_cpu(ifa->ifa_address);
- addr->u.a4.mask = be32_to_cpu(ifa->ifa_mask);
- addr->type = QETH_IP_TYPE_NORMAL;
- } else
- return NOTIFY_DONE;
-
- switch (event) {
- case NETDEV_UP:
- spin_lock_bh(&card->ip_lock);
- qeth_l3_add_ip(card, addr);
- spin_unlock_bh(&card->ip_lock);
- break;
- case NETDEV_DOWN:
- spin_lock_bh(&card->ip_lock);
- qeth_l3_delete_ip(card, addr);
- spin_unlock_bh(&card->ip_lock);
- break;
- }
+ qeth_l3_init_ipaddr(&addr, QETH_IP_TYPE_NORMAL, QETH_PROT_IPV4);
+ addr.u.a4.addr = be32_to_cpu(ifa->ifa_address);
+ addr.u.a4.mask = be32_to_cpu(ifa->ifa_mask);
- kfree(addr);
- return NOTIFY_DONE;
+ return qeth_l3_handle_ip_event(card, &addr, event);
}
static struct notifier_block qeth_l3_ip_notifier = {
unsigned long event, void *ptr)
{
struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr;
- struct net_device *dev = (struct net_device *)ifa->idev->dev;
- struct qeth_ipaddr *addr;
+ struct net_device *dev = ifa->idev->dev;
+ struct qeth_ipaddr addr;
struct qeth_card *card;
card = qeth_l3_get_card_from_dev(dev);
if (!qeth_is_supported(card, IPA_IPV6))
return NOTIFY_DONE;
- addr = qeth_l3_get_addr_buffer(QETH_PROT_IPV6);
- if (addr) {
- memcpy(&addr->u.a6.addr, &ifa->addr, sizeof(struct in6_addr));
- addr->u.a6.pfxlen = ifa->prefix_len;
- addr->type = QETH_IP_TYPE_NORMAL;
- } else
- return NOTIFY_DONE;
-
- switch (event) {
- case NETDEV_UP:
- spin_lock_bh(&card->ip_lock);
- qeth_l3_add_ip(card, addr);
- spin_unlock_bh(&card->ip_lock);
- break;
- case NETDEV_DOWN:
- spin_lock_bh(&card->ip_lock);
- qeth_l3_delete_ip(card, addr);
- spin_unlock_bh(&card->ip_lock);
- break;
- }
+ qeth_l3_init_ipaddr(&addr, QETH_IP_TYPE_NORMAL, QETH_PROT_IPV6);
+ addr.u.a6.addr = ifa->addr;
+ addr.u.a6.pfxlen = ifa->prefix_len;
- kfree(addr);
- return NOTIFY_DONE;
+ return qeth_l3_handle_ip_event(card, &addr, event);
}
static struct notifier_block qeth_l3_ip6_notifier = {
if (nvq->rx_ring && !vhost_net_buf_is_empty(rxq)) {
ptr_ring_unconsume(nvq->rx_ring, rxq->queue + rxq->head,
vhost_net_buf_get_size(rxq),
- __skb_array_destroy_skb);
+ tun_ptr_free);
rxq->head = rxq->tail = 0;
}
}
n->vqs[i].done_idx = 0;
n->vqs[i].vhost_hlen = 0;
n->vqs[i].sock_hlen = 0;
+ n->vqs[i].rx_ring = NULL;
vhost_net_buf_init(&n->vqs[i].rxq);
}
vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX);
vhost_net_disable_vq(n, vq);
vq->private_data = NULL;
vhost_net_buf_unproduce(nvq);
+ nvq->rx_ring = NULL;
mutex_unlock(&vq->mutex);
return sock;
}
struct sockaddr_ll sa;
char buf[MAX_ADDR_LEN];
} uaddr;
- int uaddr_len = sizeof uaddr, r;
+ int r;
struct socket *sock = sockfd_lookup(fd, &r);
if (!sock)
goto err;
}
- r = sock->ops->getname(sock, (struct sockaddr *)&uaddr.sa,
- &uaddr_len, 0);
- if (r)
+ r = sock->ops->getname(sock, (struct sockaddr *)&uaddr.sa, 0);
+ if (r < 0)
goto err;
if (uaddr.sa.sll_family != AF_PACKET) {
vhost_net_disable_vq(n, vq);
vq->private_data = sock;
vhost_net_buf_unproduce(nvq);
- if (index == VHOST_NET_VQ_RX)
- nvq->rx_ring = get_tap_ptr_ring(fd);
r = vhost_vq_init_access(vq);
if (r)
goto err_used;
r = vhost_net_enable_vq(n, vq);
if (r)
goto err_used;
+ if (index == VHOST_NET_VQ_RX)
+ nvq->rx_ring = get_tap_ptr_ring(fd);
oldubufs = nvq->ubufs;
nvq->ubufs = ubufs;
dma_addr_t map;
};
-struct mlx5_buf {
- struct mlx5_buf_list direct;
- int npages;
- int size;
- u8 page_shift;
-};
-
struct mlx5_frag_buf {
struct mlx5_buf_list *frags;
int npages;
u8 page_shift;
};
+struct mlx5_frag_buf_ctrl {
+ struct mlx5_frag_buf frag_buf;
+ u32 sz_m1;
+ u32 frag_sz_m1;
+ u8 log_sz;
+ u8 log_stride;
+ u8 log_frag_strides;
+};
+
struct mlx5_eq_tasklet {
struct list_head list;
struct list_head process_list;
mempool_t *pool;
};
+struct mlx5_cq_table {
+ /* protect radix tree */
+ spinlock_t lock;
+ struct radix_tree_root tree;
+};
+
struct mlx5_eq {
struct mlx5_core_dev *dev;
+ struct mlx5_cq_table cq_table;
__be32 __iomem *doorbell;
u32 cons_index;
- struct mlx5_buf buf;
+ struct mlx5_frag_buf buf;
int size;
unsigned int irqn;
u8 eqn;
struct mlx5_core_rsc_common common; /* must be first */
u32 srqn;
int max;
- int max_gs;
- int max_avail_gather;
+ size_t max_gs;
+ size_t max_avail_gather;
int wqe_shift;
void (*event) (struct mlx5_core_srq *, enum mlx5_event);
struct delayed_work recover_work;
};
-struct mlx5_cq_table {
- /* protect radix tree
- */
- spinlock_t lock;
- struct radix_tree_root tree;
-};
-
struct mlx5_qp_table {
/* protect radix tree
*/
struct dentry *cmdif_debugfs;
/* end: qp staff */
- /* start: cq staff */
- struct mlx5_cq_table cq_table;
- /* end: cq staff */
-
/* start: mkey staff */
struct mlx5_mkey_table mkey_table;
/* end: mkey staff */
bool grh_required;
};
-static inline void *mlx5_buf_offset(struct mlx5_buf *buf, int offset)
+static inline void *mlx5_buf_offset(struct mlx5_frag_buf *buf, int offset)
{
- return buf->direct.buf + offset;
+ return buf->frags->buf + offset;
}
#define STRUCT_FIELD(header, field) \
return key & 0xffffff00u;
}
+static inline void mlx5_core_init_cq_frag_buf(struct mlx5_frag_buf_ctrl *fbc,
+ void *cqc)
+{
+ fbc->log_stride = 6 + MLX5_GET(cqc, cqc, cqe_sz);
+ fbc->log_sz = MLX5_GET(cqc, cqc, log_cq_size);
+ fbc->sz_m1 = (1 << fbc->log_sz) - 1;
+ fbc->log_frag_strides = PAGE_SHIFT - fbc->log_stride;
+ fbc->frag_sz_m1 = (1 << fbc->log_frag_strides) - 1;
+}
+
+static inline void *mlx5_frag_buf_get_wqe(struct mlx5_frag_buf_ctrl *fbc,
+ u32 ix)
+{
+ unsigned int frag = (ix >> fbc->log_frag_strides);
+
+ return fbc->frag_buf.frags[frag].buf +
+ ((fbc->frag_sz_m1 & ix) << fbc->log_stride);
+}
+
int mlx5_cmd_init(struct mlx5_core_dev *dev);
void mlx5_cmd_cleanup(struct mlx5_core_dev *dev);
void mlx5_cmd_use_events(struct mlx5_core_dev *dev);
void mlx5_trigger_health_work(struct mlx5_core_dev *dev);
void mlx5_drain_health_recovery(struct mlx5_core_dev *dev);
int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size,
- struct mlx5_buf *buf, int node);
-int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf);
-void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_buf *buf);
+ struct mlx5_frag_buf *buf, int node);
+int mlx5_buf_alloc(struct mlx5_core_dev *dev,
+ int size, struct mlx5_frag_buf *buf);
+void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf);
int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size,
struct mlx5_frag_buf *buf, int node);
void mlx5_frag_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf);
int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev);
void mlx5_register_debugfs(void);
void mlx5_unregister_debugfs(void);
-int mlx5_eq_init(struct mlx5_core_dev *dev);
-void mlx5_eq_cleanup(struct mlx5_core_dev *dev);
-void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas);
+
+void mlx5_fill_page_array(struct mlx5_frag_buf *buf, __be64 *pas);
void mlx5_fill_page_frag_array(struct mlx5_frag_buf *frag_buf, __be64 *pas);
-void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn);
void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type);
void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type);
struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn);
-void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced);
-void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type);
-int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
- int nent, u64 mask, const char *name,
- enum mlx5_eq_type type);
-int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
-int mlx5_start_eqs(struct mlx5_core_dev *dev);
-void mlx5_stop_eqs(struct mlx5_core_dev *dev);
int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
unsigned int *irqn);
int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn);
int size_in, void *data_out, int size_out,
u16 reg_num, int arg, int write);
-int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
-void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
-int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
- u32 *out, int outlen);
-int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev);
-void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev);
-int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev);
-void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev);
int mlx5_db_alloc(struct mlx5_core_dev *dev, struct mlx5_db *db);
int mlx5_db_alloc_node(struct mlx5_core_dev *dev, struct mlx5_db *db,
int node);
return !(dev->priv.pci_dev_data & MLX5_PCI_DEV_IS_VF);
}
+#define MLX5_TOTAL_VPORTS(mdev) (1 + pci_sriov_get_totalvfs((mdev)->pdev))
+#define MLX5_VPORT_MANAGER(mdev) \
+ (MLX5_CAP_GEN(mdev, vport_group_manager) && \
+ (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && \
+ mlx5_core_is_pf(mdev))
+
static inline int mlx5_get_gid_table_len(u16 param)
{
if (param > 4) {
struct socket *newsock, int flags, bool kern);
int (*getname) (struct socket *sock,
struct sockaddr *addr,
- int *sockaddr_len, int peer);
+ int peer);
__poll_t (*poll) (struct file *file, struct socket *sock,
struct poll_table_struct *wait);
int (*ioctl) (struct socket *sock, unsigned int cmd,
int sock_wake_async(struct socket_wq *sk_wq, int how, int band);
int sock_register(const struct net_proto_family *fam);
void sock_unregister(int family);
+ bool sock_is_registered(int family);
int __sock_create(struct net *net, int family, int type, int proto,
struct socket **res, int kern);
int sock_create(int family, int type, int proto, struct socket **res);
int kernel_accept(struct socket *sock, struct socket **newsock, int flags);
int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
int flags);
-int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
- int *addrlen);
-int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
- int *addrlen);
+int kernel_getsockname(struct socket *sock, struct sockaddr *addr);
+int kernel_getpeername(struct socket *sock, struct sockaddr *addr);
int kernel_getsockopt(struct socket *sock, int level, int optname, char *optval,
int *optlen);
int kernel_setsockopt(struct socket *sock, int level, int optname, char *optval,
{
return 0;
}
+ int genphy_read_mmd_unsupported(struct phy_device *phdev, int devad,
+ u16 regnum);
+ int genphy_write_mmd_unsupported(struct phy_device *phdev, int devnum,
+ u16 regnum, u16 val);
/* Clause 45 PHY */
int genphy_c45_restart_aneg(struct phy_device *phydev);
int genphy_c45_an_disable_aneg(struct phy_device *phydev);
int genphy_c45_read_mdix(struct phy_device *phydev);
+/* The gen10g_* functions are the old Clause 45 stub */
+int gen10g_config_aneg(struct phy_device *phydev);
+int gen10g_read_status(struct phy_device *phydev);
+int gen10g_no_soft_reset(struct phy_device *phydev);
+int gen10g_config_init(struct phy_device *phydev);
+int gen10g_suspend(struct phy_device *phydev);
+int gen10g_resume(struct phy_device *phydev);
+
static inline int phy_read_status(struct phy_device *phydev)
{
if (!phydev->drv)
int phy_drivers_register(struct phy_driver *new_driver, int n,
struct module *owner);
void phy_state_machine(struct work_struct *work);
- void phy_change(struct phy_device *phydev);
void phy_change_work(struct work_struct *work);
void phy_mac_interrupt(struct phy_device *phydev);
void phy_start_machine(struct phy_device *phydev);
#define skb_uarg(SKB) ((struct ubuf_info *)(skb_shinfo(SKB)->destructor_arg))
+int mm_account_pinned_pages(struct mmpin *mmp, size_t size);
+void mm_unaccount_pinned_pages(struct mmpin *mmp);
+
struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size);
struct ubuf_info *sock_zerocopy_realloc(struct sock *sk, size_t size,
struct ubuf_info *uarg);
return skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6;
}
+ /* Note: Should be called only if skb_is_gso(skb) is true */
+ static inline bool skb_is_gso_sctp(const struct sk_buff *skb)
+ {
+ return skb_shinfo(skb)->gso_type & SKB_GSO_SCTP;
+ }
+
static inline void skb_gso_reset(struct sk_buff *skb)
{
skb_shinfo(skb)->gso_size = 0;
skb_shinfo(skb)->gso_type = 0;
}
+ static inline void skb_increase_gso_size(struct skb_shared_info *shinfo,
+ u16 increment)
+ {
+ if (WARN_ON_ONCE(shinfo->gso_size == GSO_BY_FRAGS))
+ return;
+ shinfo->gso_size += increment;
+ }
+
+ static inline void skb_decrease_gso_size(struct skb_shared_info *shinfo,
+ u16 decrement)
+ {
+ if (WARN_ON_ONCE(shinfo->gso_size == GSO_BY_FRAGS))
+ return;
+ shinfo->gso_size -= decrement;
+ }
+
void __skb_warn_lro_forwarding(const struct sk_buff *skb);
static inline bool skb_warn_if_lro(const struct sk_buff *skb)
return 0;
}
+/* Special input handler for packets caught by router alert option.
+ They are selected only by protocol field, and then processed likely
+ local ones; but only if someone wants them! Otherwise, router
+ not running rsvpd will kill RSVP.
+
+ It is user level problem, what it will make with them.
+ I have no idea, how it will masquearde or NAT them (it is joke, joke :-)),
+ but receiver should be enough clever f.e. to forward mtrace requests,
+ sent to multicast group to reach destination designated router.
+ */
+
struct ip_ra_chain {
struct ip_ra_chain __rcu *next;
struct sock *sk;
struct rcu_head rcu;
};
-extern struct ip_ra_chain __rcu *ip_ra_chain;
-
/* IP flags. */
#define IP_CE 0x8000 /* Flag: "Congestion" */
#define IP_DF 0x4000 /* Flag: "Don't Fragment" */
void ip4_datagram_release_cb(struct sock *sk);
struct ip_reply_arg {
- struct kvec iov[1];
+ struct kvec iov[1];
int flags;
__wsum csum;
int csumoffset; /* u16 offset of csum in iov[0].iov_base */
- /* -1 if not needed */
+ /* -1 if not needed */
int bound_dev_if;
u8 tos;
kuid_t uid;
-};
+};
#define IP_REPLY_ARG_NOSRCCHECK 1
return --iph->ttl;
}
+ static inline int ip_mtu_locked(const struct dst_entry *dst)
+ {
+ const struct rtable *rt = (const struct rtable *)dst;
+
+ return rt->rt_mtu_locked || dst_metric_locked(dst, RTAX_MTU);
+ }
+
static inline
int ip_dont_fragment(const struct sock *sk, const struct dst_entry *dst)
{
return pmtudisc == IP_PMTUDISC_DO ||
(pmtudisc == IP_PMTUDISC_WANT &&
- !(dst_metric_locked(dst, RTAX_MTU)));
+ !ip_mtu_locked(dst));
}
static inline bool ip_sk_accept_pmtu(const struct sock *sk)
struct net *net = dev_net(dst->dev);
if (net->ipv4.sysctl_ip_fwd_use_pmtu ||
- dst_metric_locked(dst, RTAX_MTU) ||
+ ip_mtu_locked(dst) ||
!forwarding)
return dst_mtu(dst);
/*
* Functions provided by ip_forward.c
*/
-
+
int ip_forward(struct sk_buff *skb);
-
+
/*
* Functions provided by ip_options.c
*/
-
+
void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
__be32 daddr, struct rtable *rt, int is_frag);
void ip6_route_input(struct sk_buff *skb);
struct dst_entry *ip6_route_input_lookup(struct net *net,
struct net_device *dev,
- struct flowi6 *fl6, int flags);
+ struct flowi6 *fl6,
+ const struct sk_buff *skb, int flags);
struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
struct flowi6 *fl6, int flags);
}
struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
- int flags);
+ const struct sk_buff *skb, int flags);
struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
- int ifindex, struct flowi6 *fl6, int flags);
+ int ifindex, struct flowi6 *fl6,
+ const struct sk_buff *skb, int flags);
void ip6_route_init_special_entries(void);
int ip6_route_init(void);
}
struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
- const struct in6_addr *saddr, int oif, int flags);
-u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb);
+ const struct in6_addr *saddr, int oif,
+ const struct sk_buff *skb, int flags);
+u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
+ const struct sk_buff *skb, struct flow_keys *hkeys);
struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6);
void rt6_sync_down_dev(struct net_device *dev, unsigned long event);
void rt6_multipath_rebalance(struct rt6_info *rt);
+ void rt6_uncached_list_add(struct rt6_info *rt);
+ void rt6_uncached_list_del(struct rt6_info *rt);
+
static inline const struct rt6_info *skb_rt6_info(const struct sk_buff *skb)
{
const struct dst_entry *dst = skb_dst(skb);
ipv6_addr_equal(&a->rt6i_gateway, &b->rt6i_gateway) &&
!lwtunnel_cmp_encap(a->dst.lwtstate, b->dst.lwtstate);
}
+
#endif
int fnhe_genid;
__be32 fnhe_daddr;
u32 fnhe_pmtu;
+ bool fnhe_mtu_locked;
__be32 fnhe_gw;
unsigned long fnhe_expires;
struct rtable __rcu *fnhe_rth_input;
unsigned char nh_sel;
unsigned char type;
unsigned char scope;
- int err;
+ int err;
};
#ifdef CONFIG_IP_ROUTE_MULTIPATH
return 0;
}
+static inline bool fib4_rules_early_flow_dissect(struct net *net,
+ struct sk_buff *skb,
+ struct flowi4 *fl4,
+ struct flow_keys *flkeys)
+{
+ return false;
+}
#else /* CONFIG_IP_MULTIPLE_TABLES */
int __net_init fib4_rules_init(struct net *net);
void __net_exit fib4_rules_exit(struct net *net);
int fib4_rules_dump(struct net *net, struct notifier_block *nb);
unsigned int fib4_rules_seq_read(struct net *net);
+static inline bool fib4_rules_early_flow_dissect(struct net *net,
+ struct sk_buff *skb,
+ struct flowi4 *fl4,
+ struct flow_keys *flkeys)
+{
+ unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
+
+ if (!net->ipv4.fib_rules_require_fldissect)
+ return false;
+
+ skb_flow_dissect_flow_keys(skb, flkeys, flag);
+ fl4->fl4_sport = flkeys->ports.src;
+ fl4->fl4_dport = flkeys->ports.dst;
+ fl4->flowi4_proto = flkeys->basic.ip_proto;
+
+ return true;
+}
+
#endif /* CONFIG_IP_MULTIPLE_TABLES */
/* Exported by fib_frontend.c */
int fib_sync_up(struct net_device *dev, unsigned int nh_flags);
#ifdef CONFIG_IP_ROUTE_MULTIPATH
-int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
- const struct sk_buff *skb);
+int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
+ const struct sk_buff *skb, struct flow_keys *flkeys);
#endif
void fib_select_multipath(struct fib_result *res, int hash);
void fib_select_path(struct net *net, struct fib_result *res,
* Copyright 2007-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015 - 2017 Intel Deutschland GmbH
+ * Copyright (C) 2018 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
u8 ampdu_len;
u8 antenna;
u16 tx_time;
+ bool is_valid_ack_signal;
void *status_driver_data[19 / sizeof(void *)];
} status;
struct {
* the first subframe.
* @RX_FLAG_ICV_STRIPPED: The ICV is stripped from this frame. CRC checking must
* be done in the hardware.
+ * @RX_FLAG_AMPDU_EOF_BIT: Value of the EOF bit in the A-MPDU delimiter for this
+ * frame
+ * @RX_FLAG_AMPDU_EOF_BIT_KNOWN: The EOF value is known
*/
enum mac80211_rx_flags {
RX_FLAG_MMIC_ERROR = BIT(0),
RX_FLAG_MIC_STRIPPED = BIT(21),
RX_FLAG_ALLOW_SAME_PN = BIT(22),
RX_FLAG_ICV_STRIPPED = BIT(23),
+ RX_FLAG_AMPDU_EOF_BIT = BIT(24),
+ RX_FLAG_AMPDU_EOF_BIT_KNOWN = BIT(25),
};
/**
* @IEEE80211_HW_SUPPORTS_TDLS_BUFFER_STA: Hardware supports buffer STA on
* TDLS links.
*
+ * @IEEE80211_HW_DEAUTH_NEED_MGD_TX_PREP: The driver requires the
+ * mgd_prepare_tx() callback to be called before transmission of a
+ * deauthentication frame in case the association was completed but no
+ * beacon was heard. This is required in multi-channel scenarios, where the
+ * virtual interface might not be given air time for the transmission of
+ * the frame, as it is not synced with the AP/P2P GO yet, and thus the
+ * deauthentication frame might not be transmitted.
++ >
+ * @IEEE80211_HW_DOESNT_SUPPORT_QOS_NDP: The driver (or firmware) doesn't
+ * support QoS NDP for AP probing - that's most likely a driver bug.
*
* @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays
*/
IEEE80211_HW_REPORTS_LOW_ACK,
IEEE80211_HW_SUPPORTS_TX_FRAG,
IEEE80211_HW_SUPPORTS_TDLS_BUFFER_STA,
+ IEEE80211_HW_DEAUTH_NEED_MGD_TX_PREP,
+ IEEE80211_HW_DOESNT_SUPPORT_QOS_NDP,
/* keep last, obviously */
NUM_IEEE80211_HW_FLAGS
* management frame prior to having successfully associated to allow the
* driver to give it channel time for the transmission, to get a response
* and to be able to synchronize with the GO.
+ * For drivers that set %IEEE80211_HW_DEAUTH_NEED_MGD_TX_PREP, mac80211
+ * would also call this function before transmitting a deauthentication
+ * frame in case that no beacon was heard from the AP/P2P GO.
* The callback will be called before each transmission and upon return
* mac80211 will transmit the frame right away.
* The callback is optional and can (should!) sleep.
__be32 rt_gateway;
/* Miscellaneous cached information */
- u32 rt_pmtu;
+ u32 rt_mtu_locked:1,
+ rt_pmtu:31;
- u32 rt_table_id;
-
struct list_head rt_uncached;
struct uncached_list *rt_uncached_list;
};
void fib_add_ifaddr(struct in_ifaddr *);
void fib_del_ifaddr(struct in_ifaddr *, struct in_ifaddr *);
+ void rt_add_uncached_list(struct rtable *rt);
+ void rt_del_uncached_list(struct rtable *rt);
+
static inline void ip_rt_put(struct rtable *rt)
{
/* dst_release() accepts a NULL parameter.
return false;
}
-/* Reset all TX qdiscs greater then index of a device. */
+/* Reset all TX qdiscs greater than index of a device. */
static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i)
{
struct Qdisc *qdisc;
*to_free = skb;
}
+ static inline void __qdisc_drop_all(struct sk_buff *skb,
+ struct sk_buff **to_free)
+ {
+ if (skb->prev)
+ skb->prev->next = *to_free;
+ else
+ skb->next = *to_free;
+ *to_free = skb;
+ }
+
static inline unsigned int __qdisc_queue_drop_head(struct Qdisc *sch,
struct qdisc_skb_head *qh,
struct sk_buff **to_free)
return NET_XMIT_DROP;
}
+ static inline int qdisc_drop_all(struct sk_buff *skb, struct Qdisc *sch,
+ struct sk_buff **to_free)
+ {
+ __qdisc_drop_all(skb, to_free);
+ qdisc_qstats_drop(sch);
+
+ return NET_XMIT_DROP;
+ }
+
/* Length to Time (L2T) lookup in a qdisc_rate_table, to determine how
long it will take to send a packet given its size.
*/
struct page_frag sk_frag;
netdev_features_t sk_route_caps;
netdev_features_t sk_route_nocaps;
+ netdev_features_t sk_route_forced_caps;
int sk_gso_type;
unsigned int sk_gso_max_size;
gfp_t sk_allocation;
int proto_register(struct proto *prot, int alloc_slab);
void proto_unregister(struct proto *prot);
+ int sock_load_diag_module(int family, int protocol);
#ifdef SOCK_REFCNT_DEBUG
static inline void sk_refcnt_debug_inc(struct sock *sk)
int sock_no_connect(struct socket *, struct sockaddr *, int, int);
int sock_no_socketpair(struct socket *, struct socket *);
int sock_no_accept(struct socket *, struct socket *, int, bool);
-int sock_no_getname(struct socket *, struct sockaddr *, int *, int);
+int sock_no_getname(struct socket *, struct sockaddr *, int);
__poll_t sock_no_poll(struct file *, struct socket *,
struct poll_table_struct *);
int sock_no_ioctl(struct socket *, unsigned int, unsigned long);
sk->sk_route_caps &= ~flags;
}
-static inline bool sk_check_csum_caps(struct sock *sk)
-{
- return (sk->sk_route_caps & NETIF_F_HW_CSUM) ||
- (sk->sk_family == PF_INET &&
- (sk->sk_route_caps & NETIF_F_IP_CSUM)) ||
- (sk->sk_family == PF_INET6 &&
- (sk->sk_route_caps & NETIF_F_IPV6_CSUM));
-}
-
static inline int skb_do_copy_data_nocache(struct sock *sk, struct sk_buff *skb,
struct iov_iter *from, char *to,
int copy, int offset)
bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag);
+int sk_alloc_sg(struct sock *sk, int len, struct scatterlist *sg,
+ int sg_start, int *sg_curr, unsigned int *sg_size,
+ int first_coalesce);
+
/*
* Default write policy as shown to user space via poll/select/SIGIO
*/
*/
#define ETH_ALEN 6 /* Octets in one ethernet addr */
+ #define ETH_TLEN 2 /* Octets in ethernet type field */
#define ETH_HLEN 14 /* Total octets in header. */
#define ETH_ZLEN 60 /* Min. octets in frame sans FCS */
#define ETH_DATA_LEN 1500 /* Max. octets in payload */
#define ETH_P_AOE 0x88A2 /* ATA over Ethernet */
#define ETH_P_8021AD 0x88A8 /* 802.1ad Service VLAN */
#define ETH_P_802_EX1 0x88B5 /* 802.1 Local Experimental 1. */
+#define ETH_P_PREAUTH 0x88C7 /* 802.11 Preauthentication */
#define ETH_P_TIPC 0x88CA /* TIPC */
#define ETH_P_MACSEC 0x88E5 /* 802.1ae MACsec */
#define ETH_P_8021AH 0x88E7 /* 802.1ah Backbone Service Tag */
#define BPF_PROG_ATTACH_LAST_FIELD attach_flags
-static int sockmap_get_from_fd(const union bpf_attr *attr, bool attach)
+static int sockmap_get_from_fd(const union bpf_attr *attr,
+ int type, bool attach)
{
struct bpf_prog *prog = NULL;
int ufd = attr->target_fd;
return PTR_ERR(map);
if (attach) {
- prog = bpf_prog_get_type(attr->attach_bpf_fd,
- BPF_PROG_TYPE_SK_SKB);
+ prog = bpf_prog_get_type(attr->attach_bpf_fd, type);
if (IS_ERR(prog)) {
fdput(f);
return PTR_ERR(prog);
case BPF_CGROUP_DEVICE:
ptype = BPF_PROG_TYPE_CGROUP_DEVICE;
break;
+ case BPF_SK_MSG_VERDICT:
+ return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_MSG, true);
case BPF_SK_SKB_STREAM_PARSER:
case BPF_SK_SKB_STREAM_VERDICT:
- return sockmap_get_from_fd(attr, true);
+ return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, true);
default:
return -EINVAL;
}
case BPF_CGROUP_DEVICE:
ptype = BPF_PROG_TYPE_CGROUP_DEVICE;
break;
+ case BPF_SK_MSG_VERDICT:
+ return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_MSG, false);
case BPF_SK_SKB_STREAM_PARSER:
case BPF_SK_SKB_STREAM_VERDICT:
- return sockmap_get_from_fd(attr, false);
+ return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, false);
default:
return -EINVAL;
}
union bpf_attr attr = {};
int err;
- if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled)
+ if (sysctl_unprivileged_bpf_disabled && !capable(CAP_SYS_ADMIN))
return -EPERM;
err = check_uarg_tail_zero(uattr, sizeof(attr), size);
.arg3_type = ARG_ANYTHING,
};
- BPF_CALL_3(bpf_perf_prog_read_value_tp, struct bpf_perf_event_data_kern *, ctx,
+ static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id)
+ {
+ switch (func_id) {
+ case BPF_FUNC_perf_event_output:
+ return &bpf_perf_event_output_proto_tp;
+ case BPF_FUNC_get_stackid:
+ return &bpf_get_stackid_proto_tp;
+ default:
+ return tracing_func_proto(func_id);
+ }
+ }
+
+ static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type,
+ struct bpf_insn_access_aux *info)
+ {
+ if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE)
+ return false;
+ if (type != BPF_READ)
+ return false;
+ if (off % size != 0)
+ return false;
+
+ BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(__u64));
+ return true;
+ }
+
+ const struct bpf_verifier_ops tracepoint_verifier_ops = {
+ .get_func_proto = tp_prog_func_proto,
+ .is_valid_access = tp_prog_is_valid_access,
+ };
+
+ const struct bpf_prog_ops tracepoint_prog_ops = {
+ };
+
+ BPF_CALL_3(bpf_perf_prog_read_value, struct bpf_perf_event_data_kern *, ctx,
struct bpf_perf_event_value *, buf, u32, size)
{
int err = -EINVAL;
return err;
}
- static const struct bpf_func_proto bpf_perf_prog_read_value_proto_tp = {
- .func = bpf_perf_prog_read_value_tp,
+ static const struct bpf_func_proto bpf_perf_prog_read_value_proto = {
+ .func = bpf_perf_prog_read_value,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg3_type = ARG_CONST_SIZE,
};
- static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id)
+ static const struct bpf_func_proto *pe_prog_func_proto(enum bpf_func_id func_id)
{
switch (func_id) {
case BPF_FUNC_perf_event_output:
case BPF_FUNC_get_stackid:
return &bpf_get_stackid_proto_tp;
case BPF_FUNC_perf_prog_read_value:
- return &bpf_perf_prog_read_value_proto_tp;
+ return &bpf_perf_prog_read_value_proto;
default:
return tracing_func_proto(func_id);
}
}
- static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type,
- struct bpf_insn_access_aux *info)
- {
- if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE)
- return false;
- if (type != BPF_READ)
- return false;
- if (off % size != 0)
- return false;
-
- BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(__u64));
- return true;
- }
-
- const struct bpf_verifier_ops tracepoint_verifier_ops = {
- .get_func_proto = tp_prog_func_proto,
- .is_valid_access = tp_prog_is_valid_access,
- };
-
- const struct bpf_prog_ops tracepoint_prog_ops = {
- };
-
static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
struct bpf_insn_access_aux *info)
{
- const int size_sp = FIELD_SIZEOF(struct bpf_perf_event_data,
- sample_period);
+ const int size_u64 = sizeof(u64);
if (off < 0 || off >= sizeof(struct bpf_perf_event_data))
return false;
switch (off) {
case bpf_ctx_range(struct bpf_perf_event_data, sample_period):
- bpf_ctx_record_field_size(info, size_sp);
- if (!bpf_ctx_narrow_access_ok(off, size, size_sp))
+ bpf_ctx_record_field_size(info, size_u64);
+ if (!bpf_ctx_narrow_access_ok(off, size, size_u64))
+ return false;
+ break;
+ case bpf_ctx_range(struct bpf_perf_event_data, addr):
+ bpf_ctx_record_field_size(info, size_u64);
+ if (!bpf_ctx_narrow_access_ok(off, size, size_u64))
return false;
break;
default:
bpf_target_off(struct perf_sample_data, period, 8,
target_size));
break;
+ case offsetof(struct bpf_perf_event_data, addr):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
+ data), si->dst_reg, si->src_reg,
+ offsetof(struct bpf_perf_event_data_kern, data));
+ *insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg,
+ bpf_target_off(struct perf_sample_data, addr, 8,
+ target_size));
+ break;
default:
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
regs), si->dst_reg, si->src_reg,
}
const struct bpf_verifier_ops perf_event_verifier_ops = {
- .get_func_proto = tp_prog_func_proto,
+ .get_func_proto = pe_prog_func_proto,
.is_valid_access = pe_prog_is_valid_access,
.convert_ctx_access = pe_prog_convert_ctx_access,
};
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2018 B.A.T.M.A.N. contributors:
*
* Antonio Quartulli
*
#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/list.h>
+#include <linux/netlink.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
#include <linux/seq_file.h>
#include <linux/string.h>
#include <linux/workqueue.h>
#include <net/arp.h>
+#include <net/genetlink.h>
+#include <net/netlink.h>
+#include <net/sock.h>
+#include <uapi/linux/batman_adv.h>
#include "bridge_loop_avoidance.h"
#include "hard-interface.h"
#include "hash.h"
#include "log.h"
+#include "netlink.h"
#include "originator.h"
#include "send.h"
+#include "soft-interface.h"
#include "translation-table.h"
#include "tvlv.h"
batadv_arp_hw_src(skb, hdr_size), &ip_src,
batadv_arp_hw_dst(skb, hdr_size), &ip_dst);
- if (hdr_size == 0)
+ if (hdr_size < sizeof(struct batadv_unicast_packet))
return;
unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data;
* the one with the lowest address
*/
if (tmp_max == max && max_orig_node &&
- batadv_compare_eth(candidate->orig, max_orig_node->orig) > 0)
+ batadv_compare_eth(candidate->orig, max_orig_node->orig))
goto out;
ret = true;
}
#endif
+/**
+ * batadv_dat_cache_dump_entry() - dump one entry of the DAT cache table to a
+ * netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @dat_entry: entry to dump
+ *
+ * Return: 0 or error code.
+ */
+static int
+batadv_dat_cache_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+ struct batadv_dat_entry *dat_entry)
+{
+ int msecs;
+ void *hdr;
+
+ hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+ NLM_F_MULTI, BATADV_CMD_GET_DAT_CACHE);
+ if (!hdr)
+ return -ENOBUFS;
+
+ msecs = jiffies_to_msecs(jiffies - dat_entry->last_update);
+
+ if (nla_put_in_addr(msg, BATADV_ATTR_DAT_CACHE_IP4ADDRESS,
+ dat_entry->ip) ||
+ nla_put(msg, BATADV_ATTR_DAT_CACHE_HWADDRESS, ETH_ALEN,
+ dat_entry->mac_addr) ||
+ nla_put_u16(msg, BATADV_ATTR_DAT_CACHE_VID, dat_entry->vid) ||
+ nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS, msecs)) {
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+ }
+
+ genlmsg_end(msg, hdr);
+ return 0;
+}
+
+/**
+ * batadv_dat_cache_dump_bucket() - dump one bucket of the DAT cache table to
+ * a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @head: bucket to dump
+ * @idx_skip: How many entries to skip
+ *
+ * Return: 0 or error code.
+ */
+static int
+batadv_dat_cache_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+ struct hlist_head *head, int *idx_skip)
+{
+ struct batadv_dat_entry *dat_entry;
+ int idx = 0;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(dat_entry, head, hash_entry) {
+ if (idx < *idx_skip)
+ goto skip;
+
+ if (batadv_dat_cache_dump_entry(msg, portid, seq,
+ dat_entry)) {
+ rcu_read_unlock();
+ *idx_skip = idx;
+
+ return -EMSGSIZE;
+ }
+
+skip:
+ idx++;
+ }
+ rcu_read_unlock();
+
+ return 0;
+}
+
+/**
+ * batadv_dat_cache_dump() - dump DAT cache table to a netlink socket
+ * @msg: buffer for the message
+ * @cb: callback structure containing arguments
+ *
+ * Return: message length.
+ */
+int batadv_dat_cache_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+ struct batadv_hard_iface *primary_if = NULL;
+ int portid = NETLINK_CB(cb->skb).portid;
+ struct net *net = sock_net(cb->skb->sk);
+ struct net_device *soft_iface;
+ struct batadv_hashtable *hash;
+ struct batadv_priv *bat_priv;
+ int bucket = cb->args[0];
+ struct hlist_head *head;
+ int idx = cb->args[1];
+ int ifindex;
+ int ret = 0;
+
+ ifindex = batadv_netlink_get_ifindex(cb->nlh,
+ BATADV_ATTR_MESH_IFINDEX);
+ if (!ifindex)
+ return -EINVAL;
+
+ soft_iface = dev_get_by_index(net, ifindex);
+ if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ bat_priv = netdev_priv(soft_iface);
+ hash = bat_priv->dat.hash;
+
+ primary_if = batadv_primary_if_get_selected(bat_priv);
+ if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ while (bucket < hash->size) {
+ head = &hash->table[bucket];
+
+ if (batadv_dat_cache_dump_bucket(msg, portid,
+ cb->nlh->nlmsg_seq, head,
+ &idx))
+ break;
+
+ bucket++;
+ idx = 0;
+ }
+
+ cb->args[0] = bucket;
+ cb->args[1] = idx;
+
+ ret = msg->len;
+
+out:
+ if (primary_if)
+ batadv_hardif_put(primary_if);
+
+ if (soft_iface)
+ dev_put(soft_iface);
+
+ return ret;
+}
+
/**
* batadv_arp_get_type() - parse an ARP packet and gets the type
* @bat_priv: the bat priv with all the soft interface information
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
#include <linux/debugfs.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
+ #include <linux/eventpoll.h>
#include <linux/export.h>
#include <linux/fcntl.h>
#include <linux/fs.h>
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2010-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
#include <linux/compiler.h>
#include <linux/debugfs.h>
#include <linux/errno.h>
+ #include <linux/eventpoll.h>
#include <linux/export.h>
#include <linux/fcntl.h>
#include <linux/fs.h>
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2014-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2014-2018 B.A.T.M.A.N. contributors:
*
* Linus Lüssing
*
#include <linux/list.h>
#include <linux/lockdep.h>
#include <linux/netdevice.h>
+#include <linux/netlink.h>
#include <linux/printk.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
#include <linux/types.h>
#include <linux/workqueue.h>
#include <net/addrconf.h>
+#include <net/genetlink.h>
#include <net/if_inet6.h>
#include <net/ip.h>
#include <net/ipv6.h>
+#include <net/netlink.h>
+#include <net/sock.h>
#include <uapi/linux/batadv_packet.h>
+#include <uapi/linux/batman_adv.h>
#include "hard-interface.h"
#include "hash.h"
#include "log.h"
+#include "netlink.h"
+#include "soft-interface.h"
#include "translation-table.h"
#include "tvlv.h"
return upper;
}
+/**
+ * batadv_mcast_addr_is_ipv4() - check if multicast MAC is IPv4
+ * @addr: the MAC address to check
+ *
+ * Return: True, if MAC address is one reserved for IPv4 multicast, false
+ * otherwise.
+ */
+static bool batadv_mcast_addr_is_ipv4(const u8 *addr)
+{
+ static const u8 prefix[] = {0x01, 0x00, 0x5E};
+
+ return memcmp(prefix, addr, sizeof(prefix)) == 0;
+}
+
+/**
+ * batadv_mcast_addr_is_ipv6() - check if multicast MAC is IPv6
+ * @addr: the MAC address to check
+ *
+ * Return: True, if MAC address is one reserved for IPv6 multicast, false
+ * otherwise.
+ */
+static bool batadv_mcast_addr_is_ipv6(const u8 *addr)
+{
+ static const u8 prefix[] = {0x33, 0x33};
+
+ return memcmp(prefix, addr, sizeof(prefix)) == 0;
+}
+
/**
* batadv_mcast_mla_softif_get() - get softif multicast listeners
+ * @bat_priv: the bat priv with all the soft interface information
* @dev: the device to collect multicast addresses from
* @mcast_list: a list to put found addresses into
*
* Return: -ENOMEM on memory allocation error or the number of
* items added to the mcast_list otherwise.
*/
-static int batadv_mcast_mla_softif_get(struct net_device *dev,
+static int batadv_mcast_mla_softif_get(struct batadv_priv *bat_priv,
+ struct net_device *dev,
struct hlist_head *mcast_list)
{
+ bool all_ipv4 = bat_priv->mcast.flags & BATADV_MCAST_WANT_ALL_IPV4;
+ bool all_ipv6 = bat_priv->mcast.flags & BATADV_MCAST_WANT_ALL_IPV6;
struct net_device *bridge = batadv_mcast_get_bridge(dev);
struct netdev_hw_addr *mc_list_entry;
struct batadv_hw_addr *new;
netif_addr_lock_bh(bridge ? bridge : dev);
netdev_for_each_mc_addr(mc_list_entry, bridge ? bridge : dev) {
+ if (all_ipv4 && batadv_mcast_addr_is_ipv4(mc_list_entry->addr))
+ continue;
+
+ if (all_ipv6 && batadv_mcast_addr_is_ipv6(mc_list_entry->addr))
+ continue;
+
new = kmalloc(sizeof(*new), GFP_ATOMIC);
if (!new) {
ret = -ENOMEM;
/**
* batadv_mcast_mla_bridge_get() - get bridged-in multicast listeners
+ * @bat_priv: the bat priv with all the soft interface information
* @dev: a bridge slave whose bridge to collect multicast addresses from
* @mcast_list: a list to put found addresses into
*
* Return: -ENOMEM on memory allocation error or the number of
* items added to the mcast_list otherwise.
*/
-static int batadv_mcast_mla_bridge_get(struct net_device *dev,
+static int batadv_mcast_mla_bridge_get(struct batadv_priv *bat_priv,
+ struct net_device *dev,
struct hlist_head *mcast_list)
{
struct list_head bridge_mcast_list = LIST_HEAD_INIT(bridge_mcast_list);
+ bool all_ipv4 = bat_priv->mcast.flags & BATADV_MCAST_WANT_ALL_IPV4;
+ bool all_ipv6 = bat_priv->mcast.flags & BATADV_MCAST_WANT_ALL_IPV6;
struct br_ip_list *br_ip_entry, *tmp;
struct batadv_hw_addr *new;
u8 mcast_addr[ETH_ALEN];
goto out;
list_for_each_entry(br_ip_entry, &bridge_mcast_list, list) {
+ if (all_ipv4 && br_ip_entry->addr.proto == htons(ETH_P_IP))
+ continue;
+
+ if (all_ipv6 && br_ip_entry->addr.proto == htons(ETH_P_IPV6))
+ continue;
+
batadv_mcast_mla_br_addr_cpy(mcast_addr, &br_ip_entry->addr);
if (batadv_mcast_mla_is_duplicate(mcast_addr, mcast_list))
continue;
bat_priv->mcast.enabled = true;
}
- return !(mcast_data.flags &
- (BATADV_MCAST_WANT_ALL_IPV4 | BATADV_MCAST_WANT_ALL_IPV6));
+ return !(mcast_data.flags & BATADV_MCAST_WANT_ALL_IPV4 &&
+ mcast_data.flags & BATADV_MCAST_WANT_ALL_IPV6);
}
/**
if (!batadv_mcast_mla_tvlv_update(bat_priv))
goto update;
- ret = batadv_mcast_mla_softif_get(soft_iface, &mcast_list);
+ ret = batadv_mcast_mla_softif_get(bat_priv, soft_iface, &mcast_list);
if (ret < 0)
goto out;
- ret = batadv_mcast_mla_bridge_get(soft_iface, &mcast_list);
+ ret = batadv_mcast_mla_bridge_get(bat_priv, soft_iface, &mcast_list);
if (ret < 0)
goto out;
}
#endif
+/**
+ * batadv_mcast_mesh_info_put() - put multicast info into a netlink message
+ * @msg: buffer for the message
+ * @bat_priv: the bat priv with all the soft interface information
+ *
+ * Return: 0 or error code.
+ */
+int batadv_mcast_mesh_info_put(struct sk_buff *msg,
+ struct batadv_priv *bat_priv)
+{
+ u32 flags = bat_priv->mcast.flags;
+ u32 flags_priv = BATADV_NO_FLAGS;
+
+ if (bat_priv->mcast.bridged) {
+ flags_priv |= BATADV_MCAST_FLAGS_BRIDGED;
+
+ if (bat_priv->mcast.querier_ipv4.exists)
+ flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV4_EXISTS;
+ if (bat_priv->mcast.querier_ipv6.exists)
+ flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV6_EXISTS;
+ if (bat_priv->mcast.querier_ipv4.shadowing)
+ flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV4_SHADOWING;
+ if (bat_priv->mcast.querier_ipv6.shadowing)
+ flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV6_SHADOWING;
+ }
+
+ if (nla_put_u32(msg, BATADV_ATTR_MCAST_FLAGS, flags) ||
+ nla_put_u32(msg, BATADV_ATTR_MCAST_FLAGS_PRIV, flags_priv))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+/**
+ * batadv_mcast_flags_dump_entry() - dump one entry of the multicast flags table
+ * to a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @orig_node: originator to dump the multicast flags of
+ *
+ * Return: 0 or error code.
+ */
+static int
+batadv_mcast_flags_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+ struct batadv_orig_node *orig_node)
+{
+ void *hdr;
+
+ hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+ NLM_F_MULTI, BATADV_CMD_GET_MCAST_FLAGS);
+ if (!hdr)
+ return -ENOBUFS;
+
+ if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN,
+ orig_node->orig)) {
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+ }
+
+ if (test_bit(BATADV_ORIG_CAPA_HAS_MCAST,
+ &orig_node->capabilities)) {
+ if (nla_put_u32(msg, BATADV_ATTR_MCAST_FLAGS,
+ orig_node->mcast_flags)) {
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+ }
+ }
+
+ genlmsg_end(msg, hdr);
+ return 0;
+}
+
+/**
+ * batadv_mcast_flags_dump_bucket() - dump one bucket of the multicast flags
+ * table to a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @head: bucket to dump
+ * @idx_skip: How many entries to skip
+ *
+ * Return: 0 or error code.
+ */
+static int
+batadv_mcast_flags_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+ struct hlist_head *head, long *idx_skip)
+{
+ struct batadv_orig_node *orig_node;
+ long idx = 0;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
+ if (!test_bit(BATADV_ORIG_CAPA_HAS_MCAST,
+ &orig_node->capa_initialized))
+ continue;
+
+ if (idx < *idx_skip)
+ goto skip;
+
+ if (batadv_mcast_flags_dump_entry(msg, portid, seq,
+ orig_node)) {
+ rcu_read_unlock();
+ *idx_skip = idx;
+
+ return -EMSGSIZE;
+ }
+
+skip:
+ idx++;
+ }
+ rcu_read_unlock();
+
+ return 0;
+}
+
+/**
+ * __batadv_mcast_flags_dump() - dump multicast flags table to a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @bat_priv: the bat priv with all the soft interface information
+ * @bucket: current bucket to dump
+ * @idx: index in current bucket to the next entry to dump
+ *
+ * Return: 0 or error code.
+ */
+static int
+__batadv_mcast_flags_dump(struct sk_buff *msg, u32 portid, u32 seq,
+ struct batadv_priv *bat_priv, long *bucket, long *idx)
+{
+ struct batadv_hashtable *hash = bat_priv->orig_hash;
+ long bucket_tmp = *bucket;
+ struct hlist_head *head;
+ long idx_tmp = *idx;
+
+ while (bucket_tmp < hash->size) {
+ head = &hash->table[bucket_tmp];
+
+ if (batadv_mcast_flags_dump_bucket(msg, portid, seq, head,
+ &idx_tmp))
+ break;
+
+ bucket_tmp++;
+ idx_tmp = 0;
+ }
+
+ *bucket = bucket_tmp;
+ *idx = idx_tmp;
+
+ return msg->len;
+}
+
+/**
+ * batadv_mcast_netlink_get_primary() - get primary interface from netlink
+ * callback
+ * @cb: netlink callback structure
+ * @primary_if: the primary interface pointer to return the result in
+ *
+ * Return: 0 or error code.
+ */
+static int
+batadv_mcast_netlink_get_primary(struct netlink_callback *cb,
+ struct batadv_hard_iface **primary_if)
+{
+ struct batadv_hard_iface *hard_iface = NULL;
+ struct net *net = sock_net(cb->skb->sk);
+ struct net_device *soft_iface;
+ struct batadv_priv *bat_priv;
+ int ifindex;
+ int ret = 0;
+
+ ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX);
+ if (!ifindex)
+ return -EINVAL;
+
+ soft_iface = dev_get_by_index(net, ifindex);
+ if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ bat_priv = netdev_priv(soft_iface);
+
+ hard_iface = batadv_primary_if_get_selected(bat_priv);
+ if (!hard_iface || hard_iface->if_status != BATADV_IF_ACTIVE) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+out:
+ if (soft_iface)
+ dev_put(soft_iface);
+
+ if (!ret && primary_if)
+ *primary_if = hard_iface;
+ else
+ batadv_hardif_put(hard_iface);
+
+ return ret;
+}
+
+/**
+ * batadv_mcast_flags_dump() - dump multicast flags table to a netlink socket
+ * @msg: buffer for the message
+ * @cb: callback structure containing arguments
+ *
+ * Return: message length.
+ */
+int batadv_mcast_flags_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+ struct batadv_hard_iface *primary_if = NULL;
+ int portid = NETLINK_CB(cb->skb).portid;
+ struct batadv_priv *bat_priv;
+ long *bucket = &cb->args[0];
+ long *idx = &cb->args[1];
+ int ret;
+
+ ret = batadv_mcast_netlink_get_primary(cb, &primary_if);
+ if (ret)
+ return ret;
+
+ bat_priv = netdev_priv(primary_if->soft_iface);
+ ret = __batadv_mcast_flags_dump(msg, portid, cb->nlh->nlmsg_seq,
+ bat_priv, bucket, idx);
+
+ batadv_hardif_put(primary_if);
+ return ret;
+}
+
/**
* batadv_mcast_free() - free the multicast optimizations structures
* @bat_priv: the bat priv with all the soft interface information
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
/**
* batadv_reroute_unicast_packet() - update the unicast header for re-routing
* @bat_priv: the bat priv with all the soft interface information
+ * @skb: unicast packet to process
* @unicast_packet: the unicast header to be updated
* @dst_addr: the payload destination
* @vid: VLAN identifier
* Return: true if the packet header has been updated, false otherwise
*/
static bool
- batadv_reroute_unicast_packet(struct batadv_priv *bat_priv,
+ batadv_reroute_unicast_packet(struct batadv_priv *bat_priv, struct sk_buff *skb,
struct batadv_unicast_packet *unicast_packet,
u8 *dst_addr, unsigned short vid)
{
}
/* update the packet header */
+ skb_postpull_rcsum(skb, unicast_packet, sizeof(*unicast_packet));
ether_addr_copy(unicast_packet->dest, orig_addr);
unicast_packet->ttvn = orig_ttvn;
+ skb_postpush_rcsum(skb, unicast_packet, sizeof(*unicast_packet));
ret = true;
out:
* the packet to
*/
if (batadv_tt_local_client_is_roaming(bat_priv, ethhdr->h_dest, vid)) {
- if (batadv_reroute_unicast_packet(bat_priv, unicast_packet,
+ if (batadv_reroute_unicast_packet(bat_priv, skb, unicast_packet,
ethhdr->h_dest, vid))
batadv_dbg_ratelimited(BATADV_DBG_TT,
bat_priv,
* destination can possibly be updated and forwarded towards the new
* target host
*/
- if (batadv_reroute_unicast_packet(bat_priv, unicast_packet,
+ if (batadv_reroute_unicast_packet(bat_priv, skb, unicast_packet,
ethhdr->h_dest, vid)) {
batadv_dbg_ratelimited(BATADV_DBG_TT, bat_priv,
"Rerouting unicast packet to %pM (dst=%pM): TTVN mismatch old_ttvn=%u new_ttvn=%u\n",
if (!primary_if)
return false;
+ /* update the packet header */
+ skb_postpull_rcsum(skb, unicast_packet, sizeof(*unicast_packet));
ether_addr_copy(unicast_packet->dest, primary_if->net_dev->dev_addr);
+ unicast_packet->ttvn = curr_ttvn;
+ skb_postpush_rcsum(skb, unicast_packet, sizeof(*unicast_packet));
batadv_hardif_put(primary_if);
- unicast_packet->ttvn = curr_ttvn;
-
return true;
}
struct batadv_orig_node *orig_node = NULL, *orig_node_gw = NULL;
int check, hdr_size = sizeof(*unicast_packet);
enum batadv_subtype subtype;
- struct ethhdr *ethhdr;
int ret = NET_RX_DROP;
bool is4addr, is_gw;
unicast_packet = (struct batadv_unicast_packet *)skb->data;
- unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data;
- ethhdr = eth_hdr(skb);
-
is4addr = unicast_packet->packet_type == BATADV_UNICAST_4ADDR;
/* the caller function should have already pulled 2 bytes */
if (is4addr)
if (!batadv_check_unicast_ttvn(bat_priv, skb, hdr_size))
goto free_skb;
+ unicast_packet = (struct batadv_unicast_packet *)skb->data;
+
/* packet for me */
if (batadv_is_my_mac(bat_priv, unicast_packet->dest)) {
/* If this is a unicast packet from another backgone gw,
* drop it.
*/
- orig_addr_gw = ethhdr->h_source;
+ orig_addr_gw = eth_hdr(skb)->h_source;
orig_node_gw = batadv_orig_hash_find(bat_priv, orig_addr_gw);
if (orig_node_gw) {
is_gw = batadv_bla_is_backbone_gw(skb, orig_node_gw,
}
if (is4addr) {
+ unicast_4addr_packet =
+ (struct batadv_unicast_4addr_packet *)skb->data;
subtype = unicast_4addr_packet->subtype;
batadv_dat_inc_counter(bat_priv, subtype);
/*
* Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
- * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
+ * greater than real_num_tx_queues stale skbs on the qdisc must be flushed.
*/
int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
{
#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
static void skb_update_prio(struct sk_buff *skb)
{
- struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap);
+ const struct netprio_map *map;
+ const struct sock *sk;
+ unsigned int prioidx;
- if (!skb->priority && skb->sk && map) {
- unsigned int prioidx =
- sock_cgroup_prioidx(&skb->sk->sk_cgrp_data);
+ if (skb->priority)
+ return;
+ map = rcu_dereference_bh(skb->dev->priomap);
+ if (!map)
+ return;
+ sk = skb_to_full_sk(skb);
+ if (!sk)
+ return;
- if (prioidx < map->priomap_len)
- skb->priority = map->priomap[prioidx];
- }
+ prioidx = sock_cgroup_prioidx(&sk->sk_cgrp_data);
+
+ if (prioidx < map->priomap_len)
+ skb->priority = map->priomap[prioidx];
}
#else
#define skb_update_prio(skb)
if (netdev_is_rx_handler_busy(dev))
return -EBUSY;
+ if (dev->priv_flags & IFF_NO_RX_HANDLER)
+ return -EINVAL;
+
/* Note: rx_handler_data must be set before rx_handler */
rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
rcu_assign_pointer(dev->rx_handler, rx_handler);
}
}
+ /* LRO/HW-GRO features cannot be combined with RX-FCS */
+ if (features & NETIF_F_RXFCS) {
+ if (features & NETIF_F_LRO) {
+ netdev_dbg(dev, "Dropping LRO feature since RX-FCS is requested.\n");
+ features &= ~NETIF_F_LRO;
+ }
+
+ if (features & NETIF_F_GRO_HW) {
+ netdev_dbg(dev, "Dropping HW-GRO feature since RX-FCS is requested.\n");
+ features &= ~NETIF_F_GRO_HW;
+ }
+ }
+
return features;
}
{
int err;
- rtnl_lock();
+ if (rtnl_lock_killable())
+ return -EINTR;
err = register_netdevice(dev);
rtnl_unlock();
return err;
BUG_ON(!list_empty(&dev->ptype_specific));
WARN_ON(rcu_access_pointer(dev->ip_ptr));
WARN_ON(rcu_access_pointer(dev->ip6_ptr));
+#if IS_ENABLED(CONFIG_DECNET)
WARN_ON(dev->dn_ptr);
-
+#endif
if (dev->priv_destructor)
dev->priv_destructor(dev);
if (dev->needs_free_netdev)
static struct pernet_operations __net_initdata netdev_net_ops = {
.init = netdev_init,
.exit = netdev_exit,
+ .async = true,
};
static void __net_exit default_device_exit(struct net *net)
static struct pernet_operations __net_initdata default_device_ops = {
.exit = default_device_exit,
.exit_batch = default_device_exit_batch,
+ .async = true,
};
/*
if (!nlh) {
err = devlink_dpipe_send_and_alloc_skb(&skb, info);
if (err)
- goto err_skb_send_alloc;
+ return err;
goto send_done;
}
nla_put_failure:
err = -EMSGSIZE;
err_table_put:
- err_skb_send_alloc:
genlmsg_cancel(skb, hdr);
nlmsg_free(skb);
return err;
table->counters_enabled,
&dump_ctx);
if (err)
- goto err_entries_dump;
+ return err;
send_done:
nlh = nlmsg_put(dump_ctx.skb, info->snd_portid, info->snd_seq,
if (!nlh) {
err = devlink_dpipe_send_and_alloc_skb(&dump_ctx.skb, info);
if (err)
- goto err_skb_send_alloc;
+ return err;
goto send_done;
}
return genlmsg_reply(dump_ctx.skb, info);
-
- err_entries_dump:
- err_skb_send_alloc:
- genlmsg_cancel(dump_ctx.skb, dump_ctx.hdr);
- nlmsg_free(dump_ctx.skb);
- return err;
}
static int devlink_nl_cmd_dpipe_entries_get(struct sk_buff *skb,
if (!nlh) {
err = devlink_dpipe_send_and_alloc_skb(&skb, info);
if (err)
- goto err_skb_send_alloc;
+ return err;
goto send_done;
}
return genlmsg_reply(skb, info);
nla_put_failure:
err = -EMSGSIZE;
err_table_put:
- err_skb_send_alloc:
genlmsg_cancel(skb, hdr);
nlmsg_free(skb);
return err;
resource->size_valid = size_valid;
}
+static int
+devlink_resource_validate_size(struct devlink_resource *resource, u64 size,
+ struct netlink_ext_ack *extack)
+{
+ u64 reminder;
+ int err = 0;
+
+ if (size > resource->size_params.size_max) {
+ NL_SET_ERR_MSG_MOD(extack, "Size larger than maximum");
+ err = -EINVAL;
+ }
+
+ if (size < resource->size_params.size_min) {
+ NL_SET_ERR_MSG_MOD(extack, "Size smaller than minimum");
+ err = -EINVAL;
+ }
+
+ div64_u64_rem(size, resource->size_params.size_granularity, &reminder);
+ if (reminder) {
+ NL_SET_ERR_MSG_MOD(extack, "Wrong granularity");
+ err = -EINVAL;
+ }
+
+ return err;
+}
+
static int devlink_nl_cmd_resource_set(struct sk_buff *skb,
struct genl_info *info)
{
if (!resource)
return -EINVAL;
- if (!resource->resource_ops->size_validate)
- return -EINVAL;
-
size = nla_get_u64(info->attrs[DEVLINK_ATTR_RESOURCE_SIZE]);
- err = resource->resource_ops->size_validate(devlink, size,
- info->extack);
+ err = devlink_resource_validate_size(resource, size, info->extack);
if (err)
return err;
.cmd = DEVLINK_CMD_DPIPE_TABLE_GET,
.doit = devlink_nl_cmd_dpipe_table_get,
.policy = devlink_nl_policy,
- .flags = GENL_ADMIN_PERM,
.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ /* can be retrieved by unprivileged users */
},
{
.cmd = DEVLINK_CMD_DPIPE_ENTRIES_GET,
.doit = devlink_nl_cmd_dpipe_entries_get,
.policy = devlink_nl_policy,
- .flags = GENL_ADMIN_PERM,
.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ /* can be retrieved by unprivileged users */
},
{
.cmd = DEVLINK_CMD_DPIPE_HEADERS_GET,
.doit = devlink_nl_cmd_dpipe_headers_get,
.policy = devlink_nl_policy,
- .flags = GENL_ADMIN_PERM,
.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ /* can be retrieved by unprivileged users */
},
{
.cmd = DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET,
.cmd = DEVLINK_CMD_RESOURCE_DUMP,
.doit = devlink_nl_cmd_resource_dump,
.policy = devlink_nl_policy,
- .flags = GENL_ADMIN_PERM,
.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ /* can be retrieved by unprivileged users */
},
{
.cmd = DEVLINK_CMD_RELOAD,
*/
int devlink_resource_register(struct devlink *devlink,
const char *resource_name,
- bool top_hierarchy,
u64 resource_size,
u64 resource_id,
u64 parent_resource_id,
{
struct devlink_resource *resource;
struct list_head *resource_list;
+ bool top_hierarchy;
int err = 0;
+ top_hierarchy = parent_resource_id == DEVLINK_RESOURCE_ID_PARENT_TOP;
+
mutex_lock(&devlink->lock);
resource = devlink_resource_find(devlink, NULL, resource_id);
if (resource) {
.arg4_type = ARG_ANYTHING,
};
+BPF_CALL_4(bpf_msg_redirect_map, struct sk_msg_buff *, msg,
+ struct bpf_map *, map, u32, key, u64, flags)
+{
+ /* If user passes invalid input drop the packet. */
+ if (unlikely(flags))
+ return SK_DROP;
+
+ msg->key = key;
+ msg->flags = flags;
+ msg->map = map;
+
+ return SK_PASS;
+}
+
+struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
+{
+ struct sock *sk = NULL;
+
+ if (msg->map) {
+ sk = __sock_map_lookup_elem(msg->map, msg->key);
+
+ msg->key = 0;
+ msg->map = NULL;
+ }
+
+ return sk;
+}
+
+static const struct bpf_func_proto bpf_msg_redirect_map_proto = {
+ .func = bpf_msg_redirect_map,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_CONST_MAP_PTR,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_ANYTHING,
+};
+
+BPF_CALL_2(bpf_msg_apply_bytes, struct sk_msg_buff *, msg, u32, bytes)
+{
+ msg->apply_bytes = bytes;
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_msg_apply_bytes_proto = {
+ .func = bpf_msg_apply_bytes,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+};
+
+BPF_CALL_2(bpf_msg_cork_bytes, struct sk_msg_buff *, msg, u32, bytes)
+{
+ msg->cork_bytes = bytes;
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_msg_cork_bytes_proto = {
+ .func = bpf_msg_cork_bytes,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+};
+
+BPF_CALL_4(bpf_msg_pull_data,
+ struct sk_msg_buff *, msg, u32, start, u32, end, u64, flags)
+{
+ unsigned int len = 0, offset = 0, copy = 0;
+ struct scatterlist *sg = msg->sg_data;
+ int first_sg, last_sg, i, shift;
+ unsigned char *p, *to, *from;
+ int bytes = end - start;
+ struct page *page;
+
+ if (unlikely(flags || end <= start))
+ return -EINVAL;
+
+ /* First find the starting scatterlist element */
+ i = msg->sg_start;
+ do {
+ len = sg[i].length;
+ offset += len;
+ if (start < offset + len)
+ break;
+ i++;
+ if (i == MAX_SKB_FRAGS)
+ i = 0;
+ } while (i != msg->sg_end);
+
+ if (unlikely(start >= offset + len))
+ return -EINVAL;
+
+ if (!msg->sg_copy[i] && bytes <= len)
+ goto out;
+
+ first_sg = i;
+
+ /* At this point we need to linearize multiple scatterlist
+ * elements or a single shared page. Either way we need to
+ * copy into a linear buffer exclusively owned by BPF. Then
+ * place the buffer in the scatterlist and fixup the original
+ * entries by removing the entries now in the linear buffer
+ * and shifting the remaining entries. For now we do not try
+ * to copy partial entries to avoid complexity of running out
+ * of sg_entry slots. The downside is reading a single byte
+ * will copy the entire sg entry.
+ */
+ do {
+ copy += sg[i].length;
+ i++;
+ if (i == MAX_SKB_FRAGS)
+ i = 0;
+ if (bytes < copy)
+ break;
+ } while (i != msg->sg_end);
+ last_sg = i;
+
+ if (unlikely(copy < end - start))
+ return -EINVAL;
+
+ page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC, get_order(copy));
+ if (unlikely(!page))
+ return -ENOMEM;
+ p = page_address(page);
+ offset = 0;
+
+ i = first_sg;
+ do {
+ from = sg_virt(&sg[i]);
+ len = sg[i].length;
+ to = p + offset;
+
+ memcpy(to, from, len);
+ offset += len;
+ sg[i].length = 0;
+ put_page(sg_page(&sg[i]));
+
+ i++;
+ if (i == MAX_SKB_FRAGS)
+ i = 0;
+ } while (i != last_sg);
+
+ sg[first_sg].length = copy;
+ sg_set_page(&sg[first_sg], page, copy, 0);
+
+ /* To repair sg ring we need to shift entries. If we only
+ * had a single entry though we can just replace it and
+ * be done. Otherwise walk the ring and shift the entries.
+ */
+ shift = last_sg - first_sg - 1;
+ if (!shift)
+ goto out;
+
+ i = first_sg + 1;
+ do {
+ int move_from;
+
+ if (i + shift >= MAX_SKB_FRAGS)
+ move_from = i + shift - MAX_SKB_FRAGS;
+ else
+ move_from = i + shift;
+
+ if (move_from == msg->sg_end)
+ break;
+
+ sg[i] = sg[move_from];
+ sg[move_from].length = 0;
+ sg[move_from].page_link = 0;
+ sg[move_from].offset = 0;
+
+ i++;
+ if (i == MAX_SKB_FRAGS)
+ i = 0;
+ } while (1);
+ msg->sg_end -= shift;
+ if (msg->sg_end < 0)
+ msg->sg_end += MAX_SKB_FRAGS;
+out:
+ msg->data = sg_virt(&sg[i]) + start - offset;
+ msg->data_end = msg->data + bytes;
+
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_msg_pull_data_proto = {
+ .func = bpf_msg_pull_data,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_ANYTHING,
+};
+
BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
{
return task_get_classid(skb);
u32 off = skb_mac_header_len(skb);
int ret;
+ /* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */
+ if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb)))
+ return -ENOTSUPP;
+
ret = skb_cow(skb, len_diff);
if (unlikely(ret < 0))
return ret;
return ret;
if (skb_is_gso(skb)) {
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
+
/* SKB_GSO_TCPV4 needs to be changed into
* SKB_GSO_TCPV6.
*/
- if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) {
- skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV4;
- skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6;
+ if (shinfo->gso_type & SKB_GSO_TCPV4) {
+ shinfo->gso_type &= ~SKB_GSO_TCPV4;
+ shinfo->gso_type |= SKB_GSO_TCPV6;
}
/* Due to IPv6 header, MSS needs to be downgraded. */
- skb_shinfo(skb)->gso_size -= len_diff;
+ skb_decrease_gso_size(shinfo, len_diff);
/* Header must be checked, and gso_segs recomputed. */
- skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
- skb_shinfo(skb)->gso_segs = 0;
+ shinfo->gso_type |= SKB_GSO_DODGY;
+ shinfo->gso_segs = 0;
}
skb->protocol = htons(ETH_P_IPV6);
u32 off = skb_mac_header_len(skb);
int ret;
+ /* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */
+ if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb)))
+ return -ENOTSUPP;
+
ret = skb_unclone(skb, GFP_ATOMIC);
if (unlikely(ret < 0))
return ret;
return ret;
if (skb_is_gso(skb)) {
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
+
/* SKB_GSO_TCPV6 needs to be changed into
* SKB_GSO_TCPV4.
*/
- if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) {
- skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV6;
- skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4;
+ if (shinfo->gso_type & SKB_GSO_TCPV6) {
+ shinfo->gso_type &= ~SKB_GSO_TCPV6;
+ shinfo->gso_type |= SKB_GSO_TCPV4;
}
/* Due to IPv4 header, MSS can be upgraded. */
- skb_shinfo(skb)->gso_size += len_diff;
+ skb_increase_gso_size(shinfo, len_diff);
/* Header must be checked, and gso_segs recomputed. */
- skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
- skb_shinfo(skb)->gso_segs = 0;
+ shinfo->gso_type |= SKB_GSO_DODGY;
+ shinfo->gso_segs = 0;
}
skb->protocol = htons(ETH_P_IP);
u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb);
int ret;
+ /* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */
+ if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb)))
+ return -ENOTSUPP;
+
ret = skb_cow(skb, len_diff);
if (unlikely(ret < 0))
return ret;
return ret;
if (skb_is_gso(skb)) {
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
+
/* Due to header grow, MSS needs to be downgraded. */
- skb_shinfo(skb)->gso_size -= len_diff;
+ skb_decrease_gso_size(shinfo, len_diff);
/* Header must be checked, and gso_segs recomputed. */
- skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
- skb_shinfo(skb)->gso_segs = 0;
+ shinfo->gso_type |= SKB_GSO_DODGY;
+ shinfo->gso_segs = 0;
}
return 0;
u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb);
int ret;
+ /* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */
+ if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb)))
+ return -ENOTSUPP;
+
ret = skb_unclone(skb, GFP_ATOMIC);
if (unlikely(ret < 0))
return ret;
return ret;
if (skb_is_gso(skb)) {
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
+
/* Due to header shrink, MSS can be upgraded. */
- skb_shinfo(skb)->gso_size += len_diff;
+ skb_increase_gso_size(shinfo, len_diff);
/* Header must be checked, and gso_segs recomputed. */
- skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
- skb_shinfo(skb)->gso_segs = 0;
+ shinfo->gso_type |= SKB_GSO_DODGY;
+ shinfo->gso_segs = 0;
}
return 0;
func == bpf_l3_csum_replace ||
func == bpf_l4_csum_replace ||
func == bpf_xdp_adjust_head ||
- func == bpf_xdp_adjust_meta)
+ func == bpf_xdp_adjust_meta ||
+ func == bpf_msg_pull_data)
return true;
return false;
struct ip_tunnel_info *info;
if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
- BPF_F_DONT_FRAGMENT)))
+ BPF_F_DONT_FRAGMENT | BPF_F_SEQ_NUMBER)))
return -EINVAL;
if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
switch (size) {
info->key.tun_flags |= TUNNEL_DONT_FRAGMENT;
if (flags & BPF_F_ZERO_CSUM_TX)
info->key.tun_flags &= ~TUNNEL_CSUM;
+ if (flags & BPF_F_SEQ_NUMBER)
+ info->key.tun_flags |= TUNNEL_SEQ;
info->key.tun_id = cpu_to_be64(from->tunnel_id);
info->key.tos = from->tunnel_tos;
}
}
+static const struct bpf_func_proto *sk_msg_func_proto(enum bpf_func_id func_id)
+{
+ switch (func_id) {
+ case BPF_FUNC_msg_redirect_map:
+ return &bpf_msg_redirect_map_proto;
+ case BPF_FUNC_msg_apply_bytes:
+ return &bpf_msg_apply_bytes_proto;
+ case BPF_FUNC_msg_cork_bytes:
+ return &bpf_msg_cork_bytes_proto;
+ case BPF_FUNC_msg_pull_data:
+ return &bpf_msg_pull_data_proto;
+ default:
+ return bpf_base_func_proto(func_id);
+ }
+}
+
static const struct bpf_func_proto *sk_skb_func_proto(enum bpf_func_id func_id)
{
switch (func_id) {
return bpf_skb_is_valid_access(off, size, type, info);
}
+static bool sk_msg_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ struct bpf_insn_access_aux *info)
+{
+ if (type == BPF_WRITE)
+ return false;
+
+ switch (off) {
+ case offsetof(struct sk_msg_md, data):
+ info->reg_type = PTR_TO_PACKET;
+ break;
+ case offsetof(struct sk_msg_md, data_end):
+ info->reg_type = PTR_TO_PACKET_END;
+ break;
+ }
+
+ if (off < 0 || off >= sizeof(struct sk_msg_md))
+ return false;
+ if (off % size != 0)
+ return false;
+ if (size != sizeof(__u64))
+ return false;
+
+ return true;
+}
+
static u32 bpf_convert_ctx_access(enum bpf_access_type type,
const struct bpf_insn *si,
struct bpf_insn *insn_buf,
return insn - insn_buf;
}
+static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
+ const struct bpf_insn *si,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog, u32 *target_size)
+{
+ struct bpf_insn *insn = insn_buf;
+
+ switch (si->off) {
+ case offsetof(struct sk_msg_md, data):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg_buff, data),
+ si->dst_reg, si->src_reg,
+ offsetof(struct sk_msg_buff, data));
+ break;
+ case offsetof(struct sk_msg_md, data_end):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg_buff, data_end),
+ si->dst_reg, si->src_reg,
+ offsetof(struct sk_msg_buff, data_end));
+ break;
+ }
+
+ return insn - insn_buf;
+}
+
const struct bpf_verifier_ops sk_filter_verifier_ops = {
.get_func_proto = sk_filter_func_proto,
.is_valid_access = sk_filter_is_valid_access,
const struct bpf_prog_ops sk_skb_prog_ops = {
};
+const struct bpf_verifier_ops sk_msg_verifier_ops = {
+ .get_func_proto = sk_msg_func_proto,
+ .is_valid_access = sk_msg_is_valid_access,
+ .convert_ctx_access = sk_msg_convert_ctx_access,
+};
+
+const struct bpf_prog_ops sk_msg_prog_ops = {
+};
+
int sk_detach_filter(struct sock *sk)
{
int ret = -ENOENT;
#include <linux/capability.h>
#include <linux/user_namespace.h>
-struct kmem_cache *skbuff_head_cache __read_mostly;
-static struct kmem_cache *skbuff_fclone_cache __read_mostly;
+struct kmem_cache *skbuff_head_cache __ro_after_init;
+static struct kmem_cache *skbuff_fclone_cache __ro_after_init;
int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
EXPORT_SYMBOL(sysctl_max_skb_frags);
}
EXPORT_SYMBOL_GPL(skb_morph);
-static int mm_account_pinned_pages(struct mmpin *mmp, size_t size)
+int mm_account_pinned_pages(struct mmpin *mmp, size_t size)
{
unsigned long max_pg, num_pg, new_pg, old_pg;
struct user_struct *user;
return 0;
}
+EXPORT_SYMBOL_GPL(mm_account_pinned_pages);
-static void mm_unaccount_pinned_pages(struct mmpin *mmp)
+void mm_unaccount_pinned_pages(struct mmpin *mmp)
{
if (mmp->user) {
atomic_long_sub(mmp->num_pg, &mmp->user->locked_vm);
free_uid(mmp->user);
}
}
+EXPORT_SYMBOL_GPL(mm_unaccount_pinned_pages);
struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size)
{
skb_queue_tail(&sk->sk_error_queue, skb);
if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_data_ready(sk);
+ sk->sk_error_report(sk);
return 0;
}
EXPORT_SYMBOL(sock_queue_err_skb);
thlen += inner_tcp_hdrlen(skb);
} else if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) {
thlen = tcp_hdrlen(skb);
- } else if (unlikely(shinfo->gso_type & SKB_GSO_SCTP)) {
+ } else if (unlikely(skb_is_gso_sctp(skb))) {
thlen = sizeof(struct sctphdr);
}
/* UFO sets gso_size to the size of the fragmentation
static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb)
{
+ int mac_len;
+
if (skb_cow(skb, skb_headroom(skb)) < 0) {
kfree_skb(skb);
return NULL;
}
- memmove(skb->data - ETH_HLEN, skb->data - skb->mac_len - VLAN_HLEN,
- 2 * ETH_ALEN);
+ mac_len = skb->data - skb_mac_header(skb);
+ memmove(skb_mac_header(skb) + VLAN_HLEN, skb_mac_header(skb),
+ mac_len - VLAN_HLEN - ETH_TLEN);
skb->mac_header += VLAN_HLEN;
return skb;
}
break;
case SO_ZEROCOPY:
- if (sk->sk_family != PF_INET && sk->sk_family != PF_INET6)
+ if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6) {
+ if (sk->sk_protocol != IPPROTO_TCP)
+ ret = -ENOTSUPP;
+ } else if (sk->sk_family != PF_RDS) {
ret = -ENOTSUPP;
- else if (sk->sk_protocol != IPPROTO_TCP)
- ret = -ENOTSUPP;
- else if (sk->sk_state != TCP_CLOSE)
- ret = -EBUSY;
- else if (val < 0 || val > 1)
- ret = -EINVAL;
- else
- sock_valbool_flag(sk, SOCK_ZEROCOPY, valbool);
+ }
+ if (!ret) {
+ if (val < 0 || val > 1)
+ ret = -EINVAL;
+ else
+ sock_valbool_flag(sk, SOCK_ZEROCOPY, valbool);
+ }
break;
default:
{
char address[128];
- if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
+ lv = sock->ops->getname(sock, (struct sockaddr *)address, 2);
+ if (lv < 0)
return -ENOTCONN;
if (lv < len)
return -EINVAL;
u32 max_segs = 1;
sk_dst_set(sk, dst);
- sk->sk_route_caps = dst->dev->features;
+ sk->sk_route_caps = dst->dev->features | sk->sk_route_forced_caps;
if (sk->sk_route_caps & NETIF_F_GSO)
sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
sk->sk_route_caps &= ~sk->sk_route_nocaps;
}
EXPORT_SYMBOL(sk_page_frag_refill);
+int sk_alloc_sg(struct sock *sk, int len, struct scatterlist *sg,
+ int sg_start, int *sg_curr_index, unsigned int *sg_curr_size,
+ int first_coalesce)
+{
+ int sg_curr = *sg_curr_index, use = 0, rc = 0;
+ unsigned int size = *sg_curr_size;
+ struct page_frag *pfrag;
+ struct scatterlist *sge;
+
+ len -= size;
+ pfrag = sk_page_frag(sk);
+
+ while (len > 0) {
+ unsigned int orig_offset;
+
+ if (!sk_page_frag_refill(sk, pfrag)) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ use = min_t(int, len, pfrag->size - pfrag->offset);
+
+ if (!sk_wmem_schedule(sk, use)) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ sk_mem_charge(sk, use);
+ size += use;
+ orig_offset = pfrag->offset;
+ pfrag->offset += use;
+
+ sge = sg + sg_curr - 1;
+ if (sg_curr > first_coalesce && sg_page(sg) == pfrag->page &&
+ sg->offset + sg->length == orig_offset) {
+ sg->length += use;
+ } else {
+ sge = sg + sg_curr;
+ sg_unmark_end(sge);
+ sg_set_page(sge, pfrag->page, use, orig_offset);
+ get_page(pfrag->page);
+ sg_curr++;
+
+ if (sg_curr == MAX_SKB_FRAGS)
+ sg_curr = 0;
+
+ if (sg_curr == sg_start) {
+ rc = -ENOSPC;
+ break;
+ }
+ }
+
+ len -= use;
+ }
+out:
+ *sg_curr_size = size;
+ *sg_curr_index = sg_curr;
+ return rc;
+}
+EXPORT_SYMBOL(sk_alloc_sg);
+
static void __lock_sock(struct sock *sk)
__releases(&sk->sk_lock.slock)
__acquires(&sk->sk_lock.slock)
EXPORT_SYMBOL(sock_no_accept);
int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
- int *len, int peer)
+ int peer)
{
return -EOPNOTSUPP;
}
static struct pernet_operations net_inuse_ops = {
.init = sock_inuse_init_net,
.exit = sock_inuse_exit_net,
+ .async = true,
};
static __init int net_inuse_init(void)
}
EXPORT_SYMBOL(proto_unregister);
+ int sock_load_diag_module(int family, int protocol)
+ {
+ if (!protocol) {
+ if (!sock_is_registered(family))
+ return -ENOENT;
+
+ return request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
+ NETLINK_SOCK_DIAG, family);
+ }
+
+ #ifdef CONFIG_INET
+ if (family == AF_INET &&
+ !rcu_access_pointer(inet_protos[protocol]))
+ return -ENOENT;
+ #endif
+
+ return request_module("net-pf-%d-proto-%d-type-%d-%d", PF_NETLINK,
+ NETLINK_SOCK_DIAG, family, protocol);
+ }
+ EXPORT_SYMBOL(sock_load_diag_module);
+
#ifdef CONFIG_PROC_FS
static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(proto_list_mutex)
static __net_initdata struct pernet_operations proto_net_ops = {
.init = proto_init_net,
.exit = proto_exit_net,
+ .async = true,
};
static int __init proto_init(void)
return -EINVAL;
if (sock_diag_handlers[req->sdiag_family] == NULL)
- request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
- NETLINK_SOCK_DIAG, req->sdiag_family);
+ sock_load_diag_module(req->sdiag_family, 0);
mutex_lock(&sock_diag_table_mutex);
hndl = sock_diag_handlers[req->sdiag_family];
case TCPDIAG_GETSOCK:
case DCCPDIAG_GETSOCK:
if (inet_rcv_compat == NULL)
- request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
- NETLINK_SOCK_DIAG, AF_INET);
+ sock_load_diag_module(AF_INET, 0);
mutex_lock(&sock_diag_table_mutex);
if (inet_rcv_compat != NULL)
case SKNLGRP_INET_TCP_DESTROY:
case SKNLGRP_INET_UDP_DESTROY:
if (!sock_diag_handlers[AF_INET])
- request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
- NETLINK_SOCK_DIAG, AF_INET);
+ sock_load_diag_module(AF_INET, 0);
break;
case SKNLGRP_INET6_TCP_DESTROY:
case SKNLGRP_INET6_UDP_DESTROY:
if (!sock_diag_handlers[AF_INET6])
- request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
- NETLINK_SOCK_DIAG, AF_INET6);
+ sock_load_diag_module(AF_INET6, 0);
break;
}
return 0;
static struct pernet_operations diag_net_ops = {
.init = diag_net_init,
.exit = diag_net_exit,
+ .async = true,
};
static int __init sock_diag_init(void)
/* We need an ipv6hdr as minimum len when calling xmit */
ldev->hard_header_len = sizeof(struct ipv6hdr);
ldev->flags = IFF_BROADCAST | IFF_MULTICAST;
+ ldev->priv_flags |= IFF_NO_QUEUE;
ldev->netdev_ops = &lowpan_netdev_ops;
ldev->header_ops = &lowpan_header_ops;
static int lowpan_device_event(struct notifier_block *unused,
unsigned long event, void *ptr)
{
- struct net_device *wdev = netdev_notifier_info_to_dev(ptr);
+ struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
+ struct wpan_dev *wpan_dev;
- if (wdev->type != ARPHRD_IEEE802154)
+ if (ndev->type != ARPHRD_IEEE802154)
+ return NOTIFY_DONE;
+ wpan_dev = ndev->ieee802154_ptr;
+ if (!wpan_dev)
return NOTIFY_DONE;
switch (event) {
* also delete possible lowpan interfaces which belongs
* to the wpan interface.
*/
- if (wdev->ieee802154_ptr->lowpan_dev)
- lowpan_dellink(wdev->ieee802154_ptr->lowpan_dev, NULL);
+ if (wpan_dev->lowpan_dev)
+ lowpan_dellink(wpan_dev->lowpan_dev, NULL);
break;
default:
return NOTIFY_DONE;
static struct pernet_operations ip_rt_proc_ops __net_initdata = {
.init = ip_rt_do_proc_init,
.exit = ip_rt_do_proc_exit,
+ .async = true,
};
static int __init ip_rt_proc_init(void)
static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe)
{
rt->rt_pmtu = fnhe->fnhe_pmtu;
+ rt->rt_mtu_locked = fnhe->fnhe_mtu_locked;
rt->dst.expires = fnhe->fnhe_expires;
if (fnhe->fnhe_gw) {
}
static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
- u32 pmtu, unsigned long expires)
+ u32 pmtu, bool lock, unsigned long expires)
{
struct fnhe_hash_bucket *hash;
struct fib_nh_exception *fnhe;
fnhe->fnhe_genid = genid;
if (gw)
fnhe->fnhe_gw = gw;
- if (pmtu)
+ if (pmtu) {
fnhe->fnhe_pmtu = pmtu;
+ fnhe->fnhe_mtu_locked = lock;
+ }
fnhe->fnhe_expires = max(1UL, expires);
/* Update all cached dsts too */
rt = rcu_dereference(fnhe->fnhe_rth_input);
fnhe->fnhe_daddr = daddr;
fnhe->fnhe_gw = gw;
fnhe->fnhe_pmtu = pmtu;
+ fnhe->fnhe_mtu_locked = lock;
fnhe->fnhe_expires = expires;
/* Exception created; mark the cached routes for the nexthop
struct fib_nh *nh = &FIB_RES_NH(res);
update_or_create_fnhe(nh, fl4->daddr, new_gw,
- 0, jiffies + ip_rt_gc_timeout);
+ 0, false,
+ jiffies + ip_rt_gc_timeout);
}
if (kill_route)
rt->dst.obsolete = DST_OBSOLETE_KILL;
{
struct dst_entry *dst = &rt->dst;
struct fib_result res;
+ bool lock = false;
- if (dst_metric_locked(dst, RTAX_MTU))
+ if (ip_mtu_locked(dst))
return;
if (ipv4_mtu(dst) < mtu)
return;
- if (mtu < ip_rt_min_pmtu)
+ if (mtu < ip_rt_min_pmtu) {
+ lock = true;
mtu = ip_rt_min_pmtu;
+ }
if (rt->rt_pmtu == mtu &&
time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2))
if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) {
struct fib_nh *nh = &FIB_RES_NH(res);
- update_or_create_fnhe(nh, fl4->daddr, 0, mtu,
+ update_or_create_fnhe(nh, fl4->daddr, 0, mtu, lock,
jiffies + ip_rt_mtu_expires);
}
rcu_read_unlock();
mtu = READ_ONCE(dst->dev->mtu);
- if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
+ if (unlikely(ip_mtu_locked(dst))) {
if (rt->rt_uses_gateway && mtu > 576)
mtu = 576;
}
static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt_uncached_list);
- static void rt_add_uncached_list(struct rtable *rt)
+ void rt_add_uncached_list(struct rtable *rt)
{
struct uncached_list *ul = raw_cpu_ptr(&rt_uncached_list);
spin_unlock_bh(&ul->lock);
}
- static void ipv4_dst_destroy(struct dst_entry *dst)
+ void rt_del_uncached_list(struct rtable *rt)
{
- struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst);
- struct rtable *rt = (struct rtable *) dst;
-
- if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt))
- kfree(p);
-
if (!list_empty(&rt->rt_uncached)) {
struct uncached_list *ul = rt->rt_uncached_list;
}
}
+ static void ipv4_dst_destroy(struct dst_entry *dst)
+ {
+ struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst);
+ struct rtable *rt = (struct rtable *)dst;
+
+ if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt))
+ kfree(p);
+
+ rt_del_uncached_list(rt);
+ }
+
void rt_flush_dev(struct net_device *dev)
{
struct net *net = dev_net(dev);
rt->rt_is_input = 0;
rt->rt_iif = 0;
rt->rt_pmtu = 0;
+ rt->rt_mtu_locked = 0;
rt->rt_gateway = 0;
rt->rt_uses_gateway = 0;
- rt->rt_table_id = 0;
INIT_LIST_HEAD(&rt->rt_uncached);
rt->dst.output = ip_output;
spin_unlock_bh(&fnhe_lock);
}
-static void set_lwt_redirect(struct rtable *rth)
-{
- if (lwtunnel_output_redirect(rth->dst.lwtstate)) {
- rth->dst.lwtstate->orig_output = rth->dst.output;
- rth->dst.output = lwtunnel_output;
- }
-
- if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
- rth->dst.lwtstate->orig_input = rth->dst.input;
- rth->dst.input = lwtunnel_input;
- }
-}
-
/* called in rcu_read_lock() section */
static int __mkroute_input(struct sk_buff *skb,
const struct fib_result *res,
}
rth->rt_is_input = 1;
- if (res->table)
- rth->rt_table_id = res->table->tb_id;
RT_CACHE_STAT_INC(in_slow_tot);
rth->dst.input = ip_forward;
rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag,
do_cache);
- set_lwt_redirect(rth);
+ lwtunnel_set_redirect(&rth->dst);
skb_dst_set(skb, &rth->dst);
out:
err = 0;
struct flow_keys *hash_keys)
{
const struct iphdr *outer_iph = ip_hdr(skb);
+ const struct iphdr *key_iph = outer_iph;
const struct iphdr *inner_iph;
const struct icmphdr *icmph;
struct iphdr _inner_iph;
struct icmphdr _icmph;
- hash_keys->addrs.v4addrs.src = outer_iph->saddr;
- hash_keys->addrs.v4addrs.dst = outer_iph->daddr;
if (likely(outer_iph->protocol != IPPROTO_ICMP))
- return;
+ goto out;
if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0))
- return;
+ goto out;
icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph),
&_icmph);
if (!icmph)
- return;
+ goto out;
if (icmph->type != ICMP_DEST_UNREACH &&
icmph->type != ICMP_REDIRECT &&
icmph->type != ICMP_TIME_EXCEEDED &&
icmph->type != ICMP_PARAMETERPROB)
- return;
+ goto out;
inner_iph = skb_header_pointer(skb,
outer_iph->ihl * 4 + sizeof(_icmph),
sizeof(_inner_iph), &_inner_iph);
if (!inner_iph)
- return;
- hash_keys->addrs.v4addrs.src = inner_iph->saddr;
- hash_keys->addrs.v4addrs.dst = inner_iph->daddr;
+ goto out;
+
+ key_iph = inner_iph;
+out:
+ hash_keys->addrs.v4addrs.src = key_iph->saddr;
+ hash_keys->addrs.v4addrs.dst = key_iph->daddr;
}
/* if skb is set it will be used and fl4 can be NULL */
-int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
- const struct sk_buff *skb)
+int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
+ const struct sk_buff *skb, struct flow_keys *flkeys)
{
- struct net *net = fi->fib_net;
struct flow_keys hash_keys;
u32 mhash;
/* short-circuit if we already have L4 hash present */
if (skb->l4_hash)
return skb_get_hash_raw(skb) >> 1;
+
memset(&hash_keys, 0, sizeof(hash_keys));
- skb_flow_dissect_flow_keys(skb, &keys, flag);
+
+ if (!flkeys) {
+ skb_flow_dissect_flow_keys(skb, &keys, flag);
+ flkeys = &keys;
+ }
hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
- hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
- hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
- hash_keys.ports.src = keys.ports.src;
- hash_keys.ports.dst = keys.ports.dst;
- hash_keys.basic.ip_proto = keys.basic.ip_proto;
+ hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src;
+ hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst;
+ hash_keys.ports.src = flkeys->ports.src;
+ hash_keys.ports.dst = flkeys->ports.dst;
+ hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
} else {
memset(&hash_keys, 0, sizeof(hash_keys));
hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
return mhash >> 1;
}
-EXPORT_SYMBOL_GPL(fib_multipath_hash);
#endif /* CONFIG_IP_ROUTE_MULTIPATH */
static int ip_mkroute_input(struct sk_buff *skb,
struct fib_result *res,
struct in_device *in_dev,
- __be32 daddr, __be32 saddr, u32 tos)
+ __be32 daddr, __be32 saddr, u32 tos,
+ struct flow_keys *hkeys)
{
#ifdef CONFIG_IP_ROUTE_MULTIPATH
if (res->fi && res->fi->fib_nhs > 1) {
- int h = fib_multipath_hash(res->fi, NULL, skb);
+ int h = fib_multipath_hash(res->fi->fib_net, NULL, skb, hkeys);
fib_select_multipath(res, h);
}
struct fib_result *res)
{
struct in_device *in_dev = __in_dev_get_rcu(dev);
+ struct flow_keys *flkeys = NULL, _flkeys;
+ struct net *net = dev_net(dev);
struct ip_tunnel_info *tun_info;
- struct flowi4 fl4;
+ int err = -EINVAL;
unsigned int flags = 0;
u32 itag = 0;
struct rtable *rth;
- int err = -EINVAL;
- struct net *net = dev_net(dev);
+ struct flowi4 fl4;
bool do_cache;
/* IP on this device is disabled. */
fl4.daddr = daddr;
fl4.saddr = saddr;
fl4.flowi4_uid = sock_net_uid(net, NULL);
+
+ if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys))
+ flkeys = &_flkeys;
+
err = fib_lookup(net, &fl4, res, 0);
if (err != 0) {
if (!IN_DEV_FORWARD(in_dev))
if (res->type != RTN_UNICAST)
goto martian_destination;
- err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos);
+ err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos, flkeys);
out: return err;
brd_input:
rth->dst.tclassid = itag;
#endif
rth->rt_is_input = 1;
- if (res->table)
- rth->rt_table_id = res->table->tb_id;
RT_CACHE_STAT_INC(in_slow_tot);
if (res->type == RTN_UNREACHABLE) {
return ERR_PTR(-ENOBUFS);
rth->rt_iif = orig_oif;
- if (res->table)
- rth->rt_table_id = res->table->tb_id;
RT_CACHE_STAT_INC(out_slow_tot);
}
rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0, do_cache);
- set_lwt_redirect(rth);
+ lwtunnel_set_redirect(&rth->dst);
return rth;
}
rt->rt_is_input = ort->rt_is_input;
rt->rt_iif = ort->rt_iif;
rt->rt_pmtu = ort->rt_pmtu;
+ rt->rt_mtu_locked = ort->rt_mtu_locked;
rt->rt_genid = rt_genid_ipv4(net);
rt->rt_flags = ort->rt_flags;
memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
if (rt->rt_pmtu && expires)
metrics[RTAX_MTU - 1] = rt->rt_pmtu;
+ if (rt->rt_mtu_locked && expires)
+ metrics[RTAX_LOCK - 1] |= BIT(RTAX_MTU);
if (rtnetlink_put_metrics(skb, metrics) < 0)
goto nla_put_failure;
rt->rt_flags |= RTCF_NOTIFY;
if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE)
- table_id = rt->rt_table_id;
+ table_id = res.table ? res.table->tb_id : 0;
if (rtm->rtm_flags & RTM_F_FIB_MATCH) {
if (!res.fi) {
static __net_initdata struct pernet_operations sysctl_route_ops = {
.init = sysctl_route_net_init,
.exit = sysctl_route_net_exit,
+ .async = true,
};
#endif
static __net_initdata struct pernet_operations rt_genid_ops = {
.init = rt_genid_init,
+ .async = true,
};
static int __net_init ipv4_inetpeer_init(struct net *net)
static __net_initdata struct pernet_operations ipv4_inetpeer_ops = {
.init = ipv4_inetpeer_init,
.exit = ipv4_inetpeer_exit,
+ .async = true,
};
#ifdef CONFIG_IP_ROUTE_CLASSID
sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
sk_sockets_allocated_inc(sk);
+ sk->sk_route_forced_caps = NETIF_F_GSO;
}
EXPORT_SYMBOL(tcp_init_sock);
struct tcp_sock *tp = tcp_sk(sk);
u32 new_size_goal, size_goal;
- if (!large_allowed || !sk_can_gso(sk))
+ if (!large_allowed)
return mss_now;
/* Note : tcp_tso_autosize() will eventually split this later */
get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
- skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
+
+ if (!(flags & MSG_NO_SHARED_FRAGS))
+ skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
skb->len += copy;
skb->data_len += copy;
int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset,
size_t size, int flags)
{
- if (!(sk->sk_route_caps & NETIF_F_SG) ||
- !sk_check_csum_caps(sk))
+ if (!(sk->sk_route_caps & NETIF_F_SG))
return sock_no_sendpage_locked(sk, page, offset, size, flags);
tcp_rate_check_app_limited(sk); /* is sending application-limited? */
return 0;
}
-static int select_size(const struct sock *sk, bool sg, bool first_skb, bool zc)
+static int select_size(bool first_skb, bool zc)
{
- const struct tcp_sock *tp = tcp_sk(sk);
- int tmp = tp->mss_cache;
-
- if (sg) {
- if (zc)
- return 0;
-
- if (sk_can_gso(sk)) {
- tmp = linear_payload_sz(first_skb);
- } else {
- int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER);
-
- if (tmp >= pgbreak &&
- tmp <= pgbreak + (MAX_SKB_FRAGS - 1) * PAGE_SIZE)
- tmp = pgbreak;
- }
- }
-
- return tmp;
+ if (zc)
+ return 0;
+ return linear_payload_sz(first_skb);
}
void tcp_free_fastopen_req(struct tcp_sock *tp)
int flags, err, copied = 0;
int mss_now = 0, size_goal, copied_syn = 0;
bool process_backlog = false;
- bool sg, zc = false;
+ bool zc = false;
long timeo;
flags = msg->msg_flags;
goto out_err;
}
- zc = sk_check_csum_caps(sk) && sk->sk_route_caps & NETIF_F_SG;
+ zc = sk->sk_route_caps & NETIF_F_SG;
if (!zc)
uarg->zerocopy = 0;
}
if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
goto do_error;
- sg = !!(sk->sk_route_caps & NETIF_F_SG);
-
while (msg_data_left(msg)) {
int copy = 0;
- int max = size_goal;
skb = tcp_write_queue_tail(sk);
- if (skb) {
- if (skb->ip_summed == CHECKSUM_NONE)
- max = mss_now;
- copy = max - skb->len;
- }
+ if (skb)
+ copy = size_goal - skb->len;
if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) {
bool first_skb;
goto restart;
}
first_skb = tcp_rtx_and_write_queues_empty(sk);
- linear = select_size(sk, sg, first_skb, zc);
+ linear = select_size(first_skb, zc);
skb = sk_stream_alloc_skb(sk, linear, sk->sk_allocation,
first_skb);
if (!skb)
goto wait_for_memory;
process_backlog = true;
- /*
- * Check whether we can use HW checksum.
- */
- if (sk_check_csum_caps(sk))
- skb->ip_summed = CHECKSUM_PARTIAL;
+ skb->ip_summed = CHECKSUM_PARTIAL;
skb_entail(sk, skb);
copy = size_goal;
- max = size_goal;
/* All packets are restored as if they have
* already been sent. skb_mstamp isn't set to
if (!skb_can_coalesce(skb, i, pfrag->page,
pfrag->offset)) {
- if (i >= sysctl_max_skb_frags || !sg) {
+ if (i >= sysctl_max_skb_frags) {
tcp_mark_push(tp, skb);
goto new_segment;
}
goto out;
}
- if (skb->len < max || (flags & MSG_OOB) || unlikely(tp->repair))
+ if (skb->len < size_goal || (flags & MSG_OOB) || unlikely(tp->repair))
continue;
if (forced_push(tp)) {
u32 rate;
stats = alloc_skb(7 * nla_total_size_64bit(sizeof(u64)) +
- 3 * nla_total_size(sizeof(u32)) +
- 2 * nla_total_size(sizeof(u8)), GFP_ATOMIC);
+ 5 * nla_total_size(sizeof(u32)) +
+ 3 * nla_total_size(sizeof(u8)), GFP_ATOMIC);
if (!stats)
return NULL;
nla_put_u8(stats, TCP_NLA_RECUR_RETRANS, inet_csk(sk)->icsk_retransmits);
nla_put_u8(stats, TCP_NLA_DELIVERY_RATE_APP_LMT, !!tp->rate_app_limited);
+ nla_put_u32(stats, TCP_NLA_SND_SSTHRESH, tp->snd_ssthresh);
+
+ nla_put_u32(stats, TCP_NLA_SNDQ_SIZE, tp->write_seq - tp->snd_una);
+ nla_put_u8(stats, TCP_NLA_CA_STATE, inet_csk(sk)->icsk_ca_state);
return stats;
}
bh_unlock_sock(sk);
local_bh_enable();
+ tcp_write_queue_purge(sk);
release_sock(sk);
return 0;
}
xdst->u.rt.rt_gateway = rt->rt_gateway;
xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway;
xdst->u.rt.rt_pmtu = rt->rt_pmtu;
- xdst->u.rt.rt_table_id = rt->rt_table_id;
+ xdst->u.rt.rt_mtu_locked = rt->rt_mtu_locked;
INIT_LIST_HEAD(&xdst->u.rt.rt_uncached);
+ rt_add_uncached_list(&xdst->u.rt);
return 0;
}
struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
dst_destroy_metrics_generic(dst);
-
+ if (xdst->u.rt.rt_uncached_list)
+ rt_del_uncached_list(&xdst->u.rt);
xfrm_dst_destroy(xdst);
}
static struct pernet_operations __net_initdata xfrm4_net_ops = {
.init = xfrm4_net_init,
.exit = xfrm4_net_exit,
+ .async = true,
};
static void __init xfrm4_policy_init(void)
xfrm4_protocol_init();
register_pernet_subsys(&xfrm4_net_ops);
}
-
struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
struct inet_sock *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
- struct in6_addr *daddr;
+ struct in6_addr *daddr, old_daddr;
+ __be32 fl6_flowlabel = 0;
+ __be32 old_fl6_flowlabel;
+ __be16 old_dport;
int addr_type;
int err;
- __be32 fl6_flowlabel = 0;
if (usin->sin6_family == AF_INET) {
if (__ipv6_only_sock(sk))
}
}
+ /* save the current peer information before updating it */
+ old_daddr = sk->sk_v6_daddr;
+ old_fl6_flowlabel = np->flow_label;
+ old_dport = inet->inet_dport;
+
sk->sk_v6_daddr = *daddr;
np->flow_label = fl6_flowlabel;
-
inet->inet_dport = usin->sin6_port;
/*
err = ip6_datagram_dst_update(sk, true);
if (err) {
- /* Reset daddr and dport so that udp_v6_early_demux()
- * fails to find this socket
+ /* Restore the socket peer info, to keep it consistent with
+ * the old socket state
*/
- memset(&sk->sk_v6_daddr, 0, sizeof(sk->sk_v6_daddr));
- inet->inet_dport = 0;
+ sk->sk_v6_daddr = old_daddr;
+ np->flow_label = old_fl6_flowlabel;
+ inet->inet_dport = old_dport;
goto out;
}
if (addr_type != IPV6_ADDR_ANY) {
int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL;
if (!(inet_sk(sk)->freebind || inet_sk(sk)->transparent) &&
- !ipv6_chk_addr(net, &src_info->ipi6_addr,
- strict ? dev : NULL, 0) &&
+ !ipv6_chk_addr_and_flags(net, &src_info->ipi6_addr,
+ dev, !strict, 0,
+ IFA_F_TENTATIVE) &&
!ipv6_chk_acast_addr_src(net, dev,
&src_info->ipi6_addr))
err = -EINVAL;
struct ip6_tnl *t, *cand = NULL;
struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
int dev_type = (gre_proto == htons(ETH_P_TEB) ||
- gre_proto == htons(ETH_P_ERSPAN)) ?
+ gre_proto == htons(ETH_P_ERSPAN) ||
+ gre_proto == htons(ETH_P_ERSPAN2)) ?
ARPHRD_ETHER : ARPHRD_IP6GRE;
int score, cand_score = 4;
return t;
dev = ign->fb_tunnel_dev;
- if (dev->flags & IFF_UP)
+ if (dev && dev->flags & IFF_UP)
return netdev_priv(dev);
return NULL;
else
fl6->daddr = tunnel->parms.raddr;
- if (tunnel->parms.o_flags & TUNNEL_SEQ)
- tunnel->o_seqno++;
-
/* Push GRE header. */
protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto;
fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL);
dsfield = key->tos;
- flags = key->tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
+ flags = key->tun_flags &
+ (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
tunnel->tun_hlen = gre_calc_hlen(flags);
gre_build_header(skb, tunnel->tun_hlen,
flags, protocol,
- tunnel_id_to_key32(tun_info->key.tun_id), 0);
+ tunnel_id_to_key32(tun_info->key.tun_id),
+ (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++)
+ : 0);
} else {
+ if (tunnel->parms.o_flags & TUNNEL_SEQ)
+ tunnel->o_seqno++;
+
gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
protocol, tunnel->parms.o_key,
htonl(tunnel->o_seqno));
truncate = true;
}
+ if (skb_cow_head(skb, dev->needed_headroom))
+ goto tx_err;
+
t->parms.o_flags &= ~TUNNEL_KEY;
IPCB(skb)->flags = 0;
md->u.md2.dir,
get_hwid(&md->u.md2),
truncate, false);
+ } else {
+ goto tx_err;
}
} else {
switch (skb->protocol) {
struct rt6_info *rt = rt6_lookup(t->net,
&p->raddr, &p->laddr,
- p->link, strict);
+ p->link, NULL, strict);
if (!rt)
return;
struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
int err;
+ if (!net_has_fallback_tunnels(net))
+ return 0;
ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6gre0",
NET_NAME_UNKNOWN,
ip6gre_tunnel_setup);
.exit_batch = ip6gre_exit_batch_net,
.id = &ip6gre_net_id,
.size = sizeof(struct ip6gre_net),
+ .async = true,
};
static int ip6gre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
netif_keep_dst(dev);
}
+bool is_ip6gretap_dev(const struct net_device *dev)
+{
+ return dev->netdev_ops == &ip6gre_tap_netdev_ops;
+}
+EXPORT_SYMBOL_GPL(is_ip6gretap_dev);
+
static bool ip6gre_netlink_encap_parms(struct nlattr *data[],
struct ip_tunnel_encap *ipencap)
{
}
if (!dev->addr_len)
- inc_opt = 0;
+ inc_opt = false;
if (inc_opt)
optlen += ndisc_opt_addr_space(dev,
NDISC_NEIGHBOUR_ADVERTISEMENT);
int probes = atomic_read(&neigh->probes);
if (skb && ipv6_chk_addr_and_flags(dev_net(dev), &ipv6_hdr(skb)->saddr,
- dev, 1,
+ dev, false, 1,
IFA_F_TENTATIVE|IFA_F_OPTIMISTIC))
saddr = &ipv6_hdr(skb)->saddr;
probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES);
*(opt++) = (rd_len >> 3);
opt += 6;
- memcpy(opt, ipv6_hdr(orig_skb), rd_len - 8);
+ skb_copy_bits(orig_skb, skb_network_offset(orig_skb), opt,
+ rd_len - 8);
}
void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
static struct pernet_operations ndisc_net_ops = {
.init = ndisc_net_init,
.exit = ndisc_net_exit,
+ .async = true,
};
int __init ndisc_init(void)
static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
- static void rt6_uncached_list_add(struct rt6_info *rt)
+ void rt6_uncached_list_add(struct rt6_info *rt)
{
struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
spin_unlock_bh(&ul->lock);
}
- static void rt6_uncached_list_del(struct rt6_info *rt)
+ void rt6_uncached_list_del(struct rt6_info *rt)
{
if (!list_empty(&rt->rt6i_uncached)) {
struct uncached_list *ul = rt->rt6i_uncached_list;
return false;
}
-static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
+static struct rt6_info *rt6_multipath_select(const struct net *net,
+ struct rt6_info *match,
struct flowi6 *fl6, int oif,
+ const struct sk_buff *skb,
int strict)
{
struct rt6_info *sibling, *next_sibling;
* case it will always be non-zero. Otherwise now is the time to do it.
*/
if (!fl6->mp_hash)
- fl6->mp_hash = rt6_multipath_hash(fl6, NULL);
+ fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
if (fl6->mp_hash <= atomic_read(&match->rt6i_nh_upper_bound))
return match;
static struct rt6_info *ip6_pol_route_lookup(struct net *net,
struct fib6_table *table,
- struct flowi6 *fl6, int flags)
+ struct flowi6 *fl6,
+ const struct sk_buff *skb,
+ int flags)
{
struct rt6_info *rt, *rt_cache;
struct fib6_node *fn;
rt = rt6_device_match(net, rt, &fl6->saddr,
fl6->flowi6_oif, flags);
if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
- rt = rt6_multipath_select(rt, fl6,
- fl6->flowi6_oif, flags);
+ rt = rt6_multipath_select(net, rt, fl6, fl6->flowi6_oif,
+ skb, flags);
}
if (rt == net->ipv6.ip6_null_entry) {
fn = fib6_backtrack(fn, &fl6->saddr);
}
struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
- int flags)
+ const struct sk_buff *skb, int flags)
{
- return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
+ return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
}
EXPORT_SYMBOL_GPL(ip6_route_lookup);
struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
- const struct in6_addr *saddr, int oif, int strict)
+ const struct in6_addr *saddr, int oif,
+ const struct sk_buff *skb, int strict)
{
struct flowi6 fl6 = {
.flowi6_oif = oif,
flags |= RT6_LOOKUP_F_HAS_SADDR;
}
- dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
+ dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
if (dst->error == 0)
return (struct rt6_info *) dst;
}
}
- static void rt6_exceptions_update_pmtu(struct rt6_info *rt, int mtu)
+ static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
+ struct rt6_info *rt, int mtu)
+ {
+ /* If the new MTU is lower than the route PMTU, this new MTU will be the
+ * lowest MTU in the path: always allow updating the route PMTU to
+ * reflect PMTU decreases.
+ *
+ * If the new MTU is higher, and the route PMTU is equal to the local
+ * MTU, this means the old MTU is the lowest in the path, so allow
+ * updating it: if other nodes now have lower MTUs, PMTU discovery will
+ * handle this.
+ */
+
+ if (dst_mtu(&rt->dst) >= mtu)
+ return true;
+
+ if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
+ return true;
+
+ return false;
+ }
+
+ static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
+ struct rt6_info *rt, int mtu)
{
struct rt6_exception_bucket *bucket;
struct rt6_exception *rt6_ex;
bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
lockdep_is_held(&rt6_exception_lock));
- if (bucket) {
- for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
- hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
- struct rt6_info *entry = rt6_ex->rt6i;
- /* For RTF_CACHE with rt6i_pmtu == 0
- * (i.e. a redirected route),
- * the metrics of its rt->dst.from has already
- * been updated.
- */
- if (entry->rt6i_pmtu && entry->rt6i_pmtu > mtu)
- entry->rt6i_pmtu = mtu;
- }
- bucket++;
+ if (!bucket)
+ return;
+
+ for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
+ hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
+ struct rt6_info *entry = rt6_ex->rt6i;
+
+ /* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
+ * route), the metrics of its rt->dst.from have already
+ * been updated.
+ */
+ if (entry->rt6i_pmtu &&
+ rt6_mtu_change_route_allowed(idev, entry, mtu))
+ entry->rt6i_pmtu = mtu;
}
+ bucket++;
}
}
}
struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
- int oif, struct flowi6 *fl6, int flags)
+ int oif, struct flowi6 *fl6,
+ const struct sk_buff *skb, int flags)
{
struct fib6_node *fn, *saved_fn;
struct rt6_info *rt, *rt_cache;
redo_rt6_select:
rt = rt6_select(net, fn, oif, strict);
if (rt->rt6i_nsiblings)
- rt = rt6_multipath_select(rt, fl6, oif, strict);
+ rt = rt6_multipath_select(net, rt, fl6, oif, skb, strict);
if (rt == net->ipv6.ip6_null_entry) {
fn = fib6_backtrack(fn, &fl6->saddr);
if (fn)
}
EXPORT_SYMBOL_GPL(ip6_pol_route);
-static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
- struct flowi6 *fl6, int flags)
+static struct rt6_info *ip6_pol_route_input(struct net *net,
+ struct fib6_table *table,
+ struct flowi6 *fl6,
+ const struct sk_buff *skb,
+ int flags)
{
- return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
+ return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
}
struct dst_entry *ip6_route_input_lookup(struct net *net,
struct net_device *dev,
- struct flowi6 *fl6, int flags)
+ struct flowi6 *fl6,
+ const struct sk_buff *skb,
+ int flags)
{
if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
flags |= RT6_LOOKUP_F_IFACE;
- return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
+ return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
}
EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
static void ip6_multipath_l3_keys(const struct sk_buff *skb,
- struct flow_keys *keys)
+ struct flow_keys *keys,
+ struct flow_keys *flkeys)
{
const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
const struct ipv6hdr *key_iph = outer_iph;
+ struct flow_keys *_flkeys = flkeys;
const struct ipv6hdr *inner_iph;
const struct icmp6hdr *icmph;
struct ipv6hdr _inner_iph;
goto out;
key_iph = inner_iph;
+ _flkeys = NULL;
out:
- memset(keys, 0, sizeof(*keys));
- keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
- keys->addrs.v6addrs.src = key_iph->saddr;
- keys->addrs.v6addrs.dst = key_iph->daddr;
- keys->tags.flow_label = ip6_flowinfo(key_iph);
- keys->basic.ip_proto = key_iph->nexthdr;
+ if (_flkeys) {
+ keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
+ keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
+ keys->tags.flow_label = _flkeys->tags.flow_label;
+ keys->basic.ip_proto = _flkeys->basic.ip_proto;
+ } else {
+ keys->addrs.v6addrs.src = key_iph->saddr;
+ keys->addrs.v6addrs.dst = key_iph->daddr;
+ keys->tags.flow_label = ip6_flowinfo(key_iph);
+ keys->basic.ip_proto = key_iph->nexthdr;
+ }
}
/* if skb is set it will be used and fl6 can be NULL */
-u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb)
+u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
+ const struct sk_buff *skb, struct flow_keys *flkeys)
{
struct flow_keys hash_keys;
+ u32 mhash;
- if (skb) {
- ip6_multipath_l3_keys(skb, &hash_keys);
- return flow_hash_from_keys(&hash_keys) >> 1;
+ switch (ip6_multipath_hash_policy(net)) {
+ case 0:
+ memset(&hash_keys, 0, sizeof(hash_keys));
+ hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+ if (skb) {
+ ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
+ } else {
+ hash_keys.addrs.v6addrs.src = fl6->saddr;
+ hash_keys.addrs.v6addrs.dst = fl6->daddr;
+ hash_keys.tags.flow_label = (__force u32)fl6->flowlabel;
+ hash_keys.basic.ip_proto = fl6->flowi6_proto;
+ }
+ break;
+ case 1:
+ if (skb) {
+ unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
+ struct flow_keys keys;
+
+ /* short-circuit if we already have L4 hash present */
+ if (skb->l4_hash)
+ return skb_get_hash_raw(skb) >> 1;
+
+ memset(&hash_keys, 0, sizeof(hash_keys));
+
+ if (!flkeys) {
+ skb_flow_dissect_flow_keys(skb, &keys, flag);
+ flkeys = &keys;
+ }
+ hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+ hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
+ hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
+ hash_keys.ports.src = flkeys->ports.src;
+ hash_keys.ports.dst = flkeys->ports.dst;
+ hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
+ } else {
+ memset(&hash_keys, 0, sizeof(hash_keys));
+ hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+ hash_keys.addrs.v6addrs.src = fl6->saddr;
+ hash_keys.addrs.v6addrs.dst = fl6->daddr;
+ hash_keys.ports.src = fl6->fl6_sport;
+ hash_keys.ports.dst = fl6->fl6_dport;
+ hash_keys.basic.ip_proto = fl6->flowi6_proto;
+ }
+ break;
}
+ mhash = flow_hash_from_keys(&hash_keys);
- return get_hash_from_flowi6(fl6) >> 1;
+ return mhash >> 1;
}
void ip6_route_input(struct sk_buff *skb)
.flowi6_mark = skb->mark,
.flowi6_proto = iph->nexthdr,
};
+ struct flow_keys *flkeys = NULL, _flkeys;
tun_info = skb_tunnel_info(skb);
if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
+
+ if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
+ flkeys = &_flkeys;
+
if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
- fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
+ fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
skb_dst_drop(skb);
- skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
+ skb_dst_set(skb,
+ ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
}
-static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
- struct flowi6 *fl6, int flags)
+static struct rt6_info *ip6_pol_route_output(struct net *net,
+ struct fib6_table *table,
+ struct flowi6 *fl6,
+ const struct sk_buff *skb,
+ int flags)
{
- return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
+ return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
}
struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
else if (sk)
flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
- return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
+ return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
}
EXPORT_SYMBOL_GPL(ip6_route_output_flags);
static struct rt6_info *__ip6_route_redirect(struct net *net,
struct fib6_table *table,
struct flowi6 *fl6,
+ const struct sk_buff *skb,
int flags)
{
struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
};
static struct dst_entry *ip6_route_redirect(struct net *net,
- const struct flowi6 *fl6,
- const struct in6_addr *gateway)
+ const struct flowi6 *fl6,
+ const struct sk_buff *skb,
+ const struct in6_addr *gateway)
{
int flags = RT6_LOOKUP_F_HAS_SADDR;
struct ip6rd_flowi rdfl;
rdfl.fl6 = *fl6;
rdfl.gateway = *gateway;
- return fib6_rule_lookup(net, &rdfl.fl6,
+ return fib6_rule_lookup(net, &rdfl.fl6, skb,
flags, __ip6_route_redirect);
}
fl6.flowlabel = ip6_flowinfo(iph);
fl6.flowi6_uid = uid;
- dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
+ dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
rt6_do_redirect(dst, NULL, skb);
dst_release(dst);
}
fl6.saddr = iph->daddr;
fl6.flowi6_uid = sock_net_uid(net, NULL);
- dst = ip6_route_redirect(net, &fl6, &iph->saddr);
+ dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
rt6_do_redirect(dst, NULL, skb);
dst_release(dst);
}
flags |= RT6_LOOKUP_F_HAS_SADDR;
flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
- rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
+ rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
/* if table lookup failed, fall back to full lookup */
if (rt == net->ipv6.ip6_null_entry) {
static int ip6_route_check_nh_onlink(struct net *net,
struct fib6_config *cfg,
- struct net_device *dev,
+ const struct net_device *dev,
struct netlink_ext_ack *extack)
{
u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
}
if (!grt)
- grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
+ grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
if (!grt)
goto out;
return err;
}
+static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
+ struct net_device **_dev, struct inet6_dev **idev,
+ struct netlink_ext_ack *extack)
+{
+ const struct in6_addr *gw_addr = &cfg->fc_gateway;
+ int gwa_type = ipv6_addr_type(gw_addr);
+ bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
+ const struct net_device *dev = *_dev;
+ bool need_addr_check = !dev;
+ int err = -EINVAL;
+
+ /* if gw_addr is local we will fail to detect this in case
+ * address is still TENTATIVE (DAD in progress). rt6_lookup()
+ * will return already-added prefix route via interface that
+ * prefix route was assigned to, which might be non-loopback.
+ */
+ if (dev &&
+ ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
+ NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
+ goto out;
+ }
+
+ if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
+ /* IPv6 strictly inhibits using not link-local
+ * addresses as nexthop address.
+ * Otherwise, router will not able to send redirects.
+ * It is very good, but in some (rare!) circumstances
+ * (SIT, PtP, NBMA NOARP links) it is handy to allow
+ * some exceptions. --ANK
+ * We allow IPv4-mapped nexthops to support RFC4798-type
+ * addressing
+ */
+ if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
+ NL_SET_ERR_MSG(extack, "Invalid gateway address");
+ goto out;
+ }
+
+ if (cfg->fc_flags & RTNH_F_ONLINK)
+ err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
+ else
+ err = ip6_route_check_nh(net, cfg, _dev, idev);
+
+ if (err)
+ goto out;
+ }
+
+ /* reload in case device was changed */
+ dev = *_dev;
+
+ err = -EINVAL;
+ if (!dev) {
+ NL_SET_ERR_MSG(extack, "Egress device not specified");
+ goto out;
+ } else if (dev->flags & IFF_LOOPBACK) {
+ NL_SET_ERR_MSG(extack,
+ "Egress device can not be loopback device for this route");
+ goto out;
+ }
+
+ /* if we did not check gw_addr above, do so now that the
+ * egress device has been resolved.
+ */
+ if (need_addr_check &&
+ ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
+ NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
+ goto out;
+ }
+
+ err = 0;
+out:
+ return err;
+}
+
static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
struct netlink_ext_ack *extack)
{
if (err)
goto out;
rt->dst.lwtstate = lwtstate_get(lwtstate);
- if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
- rt->dst.lwtstate->orig_output = rt->dst.output;
- rt->dst.output = lwtunnel_output;
- }
- if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
- rt->dst.lwtstate->orig_input = rt->dst.input;
- rt->dst.input = lwtunnel_input;
- }
+ lwtunnel_set_redirect(&rt->dst);
}
ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
}
if (cfg->fc_flags & RTF_GATEWAY) {
- const struct in6_addr *gw_addr;
- int gwa_type;
-
- gw_addr = &cfg->fc_gateway;
- gwa_type = ipv6_addr_type(gw_addr);
-
- /* if gw_addr is local we will fail to detect this in case
- * address is still TENTATIVE (DAD in progress). rt6_lookup()
- * will return already-added prefix route via interface that
- * prefix route was assigned to, which might be non-loopback.
- */
- err = -EINVAL;
- if (ipv6_chk_addr_and_flags(net, gw_addr,
- gwa_type & IPV6_ADDR_LINKLOCAL ?
- dev : NULL, 0, 0)) {
- NL_SET_ERR_MSG(extack, "Invalid gateway address");
+ err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
+ if (err)
goto out;
- }
- rt->rt6i_gateway = *gw_addr;
-
- if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
- /* IPv6 strictly inhibits using not link-local
- addresses as nexthop address.
- Otherwise, router will not able to send redirects.
- It is very good, but in some (rare!) circumstances
- (SIT, PtP, NBMA NOARP links) it is handy to allow
- some exceptions. --ANK
- We allow IPv4-mapped nexthops to support RFC4798-type
- addressing
- */
- if (!(gwa_type & (IPV6_ADDR_UNICAST |
- IPV6_ADDR_MAPPED))) {
- NL_SET_ERR_MSG(extack,
- "Invalid gateway address");
- goto out;
- }
- if (cfg->fc_flags & RTNH_F_ONLINK) {
- err = ip6_route_check_nh_onlink(net, cfg, dev,
- extack);
- } else {
- err = ip6_route_check_nh(net, cfg, &dev, &idev);
- }
- if (err)
- goto out;
- }
- err = -EINVAL;
- if (!dev) {
- NL_SET_ERR_MSG(extack, "Egress device not specified");
- goto out;
- } else if (dev->flags & IFF_LOOPBACK) {
- NL_SET_ERR_MSG(extack,
- "Egress device can not be loopback device for this route");
- goto out;
- }
+ rt->rt6i_gateway = cfg->fc_gateway;
}
err = -ENODEV;
Since RFC 1981 doesn't include administrative MTU increase
update PMTU increase is a MUST. (i.e. jumbo frame)
*/
- /*
- If new MTU is less than route PMTU, this new MTU will be the
- lowest MTU in the path, update the route PMTU to reflect PMTU
- decreases; if new MTU is greater than route PMTU, and the
- old MTU is the lowest MTU in the path, update the route PMTU
- to reflect the increase. In this case if the other nodes' MTU
- also have the lowest MTU, TOO BIG MESSAGE will be lead to
- PMTU discovery.
- */
if (rt->dst.dev == arg->dev &&
- dst_metric_raw(&rt->dst, RTAX_MTU) &&
!dst_metric_locked(&rt->dst, RTAX_MTU)) {
spin_lock_bh(&rt6_exception_lock);
- if (dst_mtu(&rt->dst) >= arg->mtu ||
- (dst_mtu(&rt->dst) < arg->mtu &&
- dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
+ if (dst_metric_raw(&rt->dst, RTAX_MTU) &&
+ rt6_mtu_change_route_allowed(idev, rt, arg->mtu))
dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
- }
- rt6_exceptions_update_pmtu(rt, arg->mtu);
+ rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
spin_unlock_bh(&rt6_exception_lock);
}
return 0;
r_cfg.fc_encap_type = nla_get_u16(nla);
}
+ r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
rt = ip6_route_info_create(&r_cfg, extack);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
if (!ipv6_addr_any(&fl6.saddr))
flags |= RT6_LOOKUP_F_HAS_SADDR;
- dst = ip6_route_input_lookup(net, dev, &fl6, flags);
+ dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
rcu_read_unlock();
} else {
static struct pernet_operations ip6_route_net_ops = {
.init = ip6_route_net_init,
.exit = ip6_route_net_exit,
+ .async = true,
};
static int __net_init ipv6_inetpeer_init(struct net *net)
static struct pernet_operations ipv6_inetpeer_ops = {
.init = ipv6_inetpeer_init,
.exit = ipv6_inetpeer_exit,
+ .async = true,
};
static struct pernet_operations ip6_route_net_late_ops = {
.init = ip6_route_net_init_late,
.exit = ip6_route_net_exit_late,
+ .async = true,
};
static struct notifier_block ip6_route_dev_notifier = {
xdst->u.rt6.rt6i_gateway = rt->rt6i_gateway;
xdst->u.rt6.rt6i_dst = rt->rt6i_dst;
xdst->u.rt6.rt6i_src = rt->rt6i_src;
+ INIT_LIST_HEAD(&xdst->u.rt6.rt6i_uncached);
+ rt6_uncached_list_add(&xdst->u.rt6);
+ atomic_inc(&dev_net(dev)->ipv6.rt6_stats->fib_rt_uncache);
return 0;
}
if (likely(xdst->u.rt6.rt6i_idev))
in6_dev_put(xdst->u.rt6.rt6i_idev);
dst_destroy_metrics_generic(dst);
+ if (xdst->u.rt6.rt6i_uncached_list)
+ rt6_uncached_list_del(&xdst->u.rt6);
xfrm_dst_destroy(xdst);
}
static struct pernet_operations xfrm6_net_ops = {
.init = xfrm6_net_init,
.exit = xfrm6_net_exit,
+ .async = true,
};
int __init xfrm6_init(void)
}
static int iucv_sock_getname(struct socket *sock, struct sockaddr *addr,
- int *len, int peer)
+ int peer)
{
struct sockaddr_iucv *siucv = (struct sockaddr_iucv *) addr;
struct sock *sk = sock->sk;
struct iucv_sock *iucv = iucv_sk(sk);
addr->sa_family = AF_IUCV;
- *len = sizeof(struct sockaddr_iucv);
if (peer) {
memcpy(siucv->siucv_user_id, iucv->dst_user_id, 8);
memset(&siucv->siucv_addr, 0, sizeof(siucv->siucv_addr));
memset(&siucv->siucv_nodeid, 0, sizeof(siucv->siucv_nodeid));
- return 0;
+ return sizeof(struct sockaddr_iucv);
}
/**
af_iucv_dev->driver = &af_iucv_driver;
err = device_register(af_iucv_dev);
if (err)
- goto out_driver;
+ goto out_iucv_dev;
return 0;
+ out_iucv_dev:
+ put_device(af_iucv_dev);
out_driver:
driver_unregister(&af_iucv_driver);
out_iucv:
.parse_msg = kcm_parse_func_strparser,
.read_sock_done = kcm_read_sock_done,
};
- int err;
+ int err = 0;
csk = csock->sk;
if (!csk)
return -EINVAL;
+ lock_sock(csk);
+
/* Only allow TCP sockets to be attached for now */
if ((csk->sk_family != AF_INET && csk->sk_family != AF_INET6) ||
- csk->sk_protocol != IPPROTO_TCP)
- return -EOPNOTSUPP;
+ csk->sk_protocol != IPPROTO_TCP) {
+ err = -EOPNOTSUPP;
+ goto out;
+ }
/* Don't allow listeners or closed sockets */
- if (csk->sk_state == TCP_LISTEN || csk->sk_state == TCP_CLOSE)
- return -EOPNOTSUPP;
+ if (csk->sk_state == TCP_LISTEN || csk->sk_state == TCP_CLOSE) {
+ err = -EOPNOTSUPP;
+ goto out;
+ }
psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL);
- if (!psock)
- return -ENOMEM;
+ if (!psock) {
+ err = -ENOMEM;
+ goto out;
+ }
psock->mux = mux;
psock->sk = csk;
err = strp_init(&psock->strp, csk, &cb);
if (err) {
kmem_cache_free(kcm_psockp, psock);
- return err;
+ goto out;
}
write_lock_bh(&csk->sk_callback_lock);
*/
if (csk->sk_user_data) {
write_unlock_bh(&csk->sk_callback_lock);
+ strp_stop(&psock->strp);
strp_done(&psock->strp);
kmem_cache_free(kcm_psockp, psock);
- return -EALREADY;
+ err = -EALREADY;
+ goto out;
}
psock->save_data_ready = csk->sk_data_ready;
/* Schedule RX work in case there are already bytes queued */
strp_check_rcv(&psock->strp);
- return 0;
+ out:
+ release_sock(csk);
+
+ return err;
}
static int kcm_attach_ioctl(struct socket *sock, struct kcm_attach *info)
if (WARN_ON(psock->rx_kcm)) {
write_unlock_bh(&csk->sk_callback_lock);
+ release_sock(csk);
return;
}
.exit = kcm_exit_net,
.id = &kcm_net_id,
.size = sizeof(struct kcm_net),
+ .async = true,
};
static int __init kcm_init(void)
spinlock_t l2tp_session_hlist_lock;
};
+ #if IS_ENABLED(CONFIG_IPV6)
+ static bool l2tp_sk_is_v6(struct sock *sk)
+ {
+ return sk->sk_family == PF_INET6 &&
+ !ipv6_addr_v4mapped(&sk->sk_v6_daddr);
+ }
+ #endif
static inline struct l2tp_tunnel *l2tp_tunnel(struct sock *sk)
{
/* Queue the packet to IP for output */
skb->ignore_df = 1;
#if IS_ENABLED(CONFIG_IPV6)
- if (tunnel->sock->sk_family == PF_INET6 && !tunnel->v4mapped)
+ if (l2tp_sk_is_v6(tunnel->sock))
error = inet6_csk_xmit(tunnel->sock, skb, NULL);
else
#endif
goto out_unlock;
}
+ /* The user-space may change the connection status for the user-space
+ * provided socket at run time: we must check it under the socket lock
+ */
+ if (tunnel->fd >= 0 && sk->sk_state != TCP_ESTABLISHED) {
+ kfree_skb(skb);
+ ret = NET_XMIT_DROP;
+ goto out_unlock;
+ }
+
/* Get routing info from the tunnel socket */
skb_dst_drop(skb);
skb_dst_set(skb, dst_clone(__sk_dst_check(sk, 0)));
/* Calculate UDP checksum if configured to do so */
#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == PF_INET6 && !tunnel->v4mapped)
+ if (l2tp_sk_is_v6(sk))
udp6_set_csum(udp_get_no_check6_tx(sk),
skb, &inet6_sk(sk)->saddr,
&sk->sk_v6_daddr, udp_len);
encap = cfg->encap;
/* Quick sanity checks */
+ err = -EPROTONOSUPPORT;
+ if (sk->sk_type != SOCK_DGRAM) {
+ pr_debug("tunl %hu: fd %d wrong socket type\n",
+ tunnel_id, fd);
+ goto err;
+ }
switch (encap) {
case L2TP_ENCAPTYPE_UDP:
- err = -EPROTONOSUPPORT;
if (sk->sk_protocol != IPPROTO_UDP) {
pr_err("tunl %hu: fd %d wrong protocol, got %d, expected %d\n",
tunnel_id, fd, sk->sk_protocol, IPPROTO_UDP);
}
break;
case L2TP_ENCAPTYPE_IP:
- err = -EPROTONOSUPPORT;
if (sk->sk_protocol != IPPROTO_L2TP) {
pr_err("tunl %hu: fd %d wrong protocol, got %d, expected %d\n",
tunnel_id, fd, sk->sk_protocol, IPPROTO_L2TP);
if (cfg != NULL)
tunnel->debug = cfg->debug;
- #if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == PF_INET6) {
- struct ipv6_pinfo *np = inet6_sk(sk);
-
- if (ipv6_addr_v4mapped(&np->saddr) &&
- ipv6_addr_v4mapped(&sk->sk_v6_daddr)) {
- struct inet_sock *inet = inet_sk(sk);
-
- tunnel->v4mapped = true;
- inet->inet_saddr = np->saddr.s6_addr32[3];
- inet->inet_rcv_saddr = sk->sk_v6_rcv_saddr.s6_addr32[3];
- inet->inet_daddr = sk->sk_v6_daddr.s6_addr32[3];
- } else {
- tunnel->v4mapped = false;
- }
- }
- #endif
-
/* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
tunnel->encap = encap;
if (encap == L2TP_ENCAPTYPE_UDP) {
.exit = l2tp_exit_net,
.id = &l2tp_net_id,
.size = sizeof(struct l2tp_net),
+ .async = true,
};
static int __init l2tp_init(void)
FLAG(REPORTS_LOW_ACK),
FLAG(SUPPORTS_TX_FRAG),
FLAG(SUPPORTS_TDLS_BUFFER_STA),
+ FLAG(DEAUTH_NEED_MGD_TX_PREP),
+ FLAG(DOESNT_SUPPORT_QOS_NDP),
#undef FLAG
};
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015 - 2017 Intel Deutschland GmbH
+ * Copyright (C) 2018 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
struct ieee80211_hdr_3addr *nullfunc;
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
- skb = ieee80211_nullfunc_get(&local->hw, &sdata->vif, true);
+ skb = ieee80211_nullfunc_get(&local->hw, &sdata->vif,
+ !ieee80211_hw_check(&local->hw, DOESNT_SUPPORT_QOS_NDP));
if (!skb)
return;
ieee80211_flush_queues(local, sdata, true);
/* deauthenticate/disassociate now */
- if (tx || frame_buf)
+ if (tx || frame_buf) {
+ struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+
+ /*
+ * In multi channel scenarios guarantee that the virtual
+ * interface is granted immediate airtime to transmit the
+ * deauthentication frame by calling mgd_prepare_tx, if the
+ * driver requested so.
+ */
+ if (ieee80211_hw_check(&local->hw, DEAUTH_NEED_MGD_TX_PREP) &&
+ !ifmgd->have_beacon)
+ drv_mgd_prepare_tx(sdata->local, sdata);
+
ieee80211_send_deauth_disassoc(sdata, ifmgd->bssid, stype,
reason, tx, frame_buf);
+ }
/* flush out frame - make sure the deauth was actually sent */
if (tx)
u16 tx_time)
{
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
- u16 tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
+ u16 tid = ieee80211_get_tid(hdr);
int ac = ieee80211_ac_from_tid(tid);
struct ieee80211_sta_tx_tspec *tx_tspec = &ifmgd->tx_tspec[ac];
unsigned long now = jiffies;
static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable)
{
cancel_delayed_work_sync(&flowtable->data.gc_work);
+ kfree(flowtable->ops);
kfree(flowtable->name);
flowtable->data.type->free(&flowtable->data);
rhashtable_destroy(&flowtable->data.rhashtable);
static struct pernet_operations nf_tables_net_ops = {
.init = nf_tables_init_net,
.exit = nf_tables_exit_net,
+ .async = true,
};
static int __init nf_tables_module_init(void)
return buf;
}
+ /**
+ * xt_check_proc_name - check that name is suitable for /proc file creation
+ *
+ * @name: file name candidate
+ * @size: length of buffer
+ *
+ * some x_tables modules wish to create a file in /proc.
+ * This function makes sure that the name is suitable for this
+ * purpose, it checks that name is NUL terminated and isn't a 'special'
+ * name, like "..".
+ *
+ * returns negative number on error or 0 if name is useable.
+ */
+ int xt_check_proc_name(const char *name, unsigned int size)
+ {
+ if (name[0] == '\0')
+ return -EINVAL;
+
+ if (strnlen(name, size) == size)
+ return -ENAMETOOLONG;
+
+ if (strcmp(name, ".") == 0 ||
+ strcmp(name, "..") == 0 ||
+ strchr(name, '/'))
+ return -EINVAL;
+
+ return 0;
+ }
+ EXPORT_SYMBOL(xt_check_proc_name);
+
int xt_check_match(struct xt_mtchk_param *par,
unsigned int size, u_int8_t proto, bool inv_proto)
{
static struct pernet_operations xt_net_ops = {
.init = xt_net_init,
.exit = xt_net_exit,
+ .async = true,
};
static int __init xt_init(void)
struct hashlimit_cfg3 cfg = {};
int ret;
- if (info->name[sizeof(info->name) - 1] != '\0')
- return -EINVAL;
+ ret = xt_check_proc_name(info->name, sizeof(info->name));
+ if (ret)
+ return ret;
ret = cfg_copy(&cfg, (void *)&info->cfg, 1);
struct hashlimit_cfg3 cfg = {};
int ret;
- if (info->name[sizeof(info->name) - 1] != '\0')
- return -EINVAL;
+ ret = xt_check_proc_name(info->name, sizeof(info->name));
+ if (ret)
+ return ret;
ret = cfg_copy(&cfg, (void *)&info->cfg, 2);
static int hashlimit_mt_check(const struct xt_mtchk_param *par)
{
struct xt_hashlimit_mtinfo3 *info = par->matchinfo;
+ int ret;
- if (info->name[sizeof(info->name) - 1] != '\0')
- return -EINVAL;
+ ret = xt_check_proc_name(info->name, sizeof(info->name));
+ if (ret)
+ return ret;
return hashlimit_mt_check_common(par, &info->hinfo, &info->cfg,
info->name, 3);
.exit = hashlimit_net_exit,
.id = &hashlimit_net_id,
.size = sizeof(struct hashlimit_net),
+ .async = true,
};
static int __init hashlimit_mt_init(void)
info->hit_count, XT_RECENT_MAX_NSTAMPS - 1);
return -EINVAL;
}
- if (info->name[0] == '\0' ||
- strnlen(info->name, XT_RECENT_NAME_LEN) == XT_RECENT_NAME_LEN)
- return -EINVAL;
+ ret = xt_check_proc_name(info->name, sizeof(info->name));
+ if (ret)
+ return ret;
if (ip_pkt_list_tot && info->hit_count < ip_pkt_list_tot)
nstamp_mask = roundup_pow_of_two(ip_pkt_list_tot) - 1;
.exit = recent_net_exit,
.id = &recent_net_id,
.size = sizeof(struct recent_net),
+ .async = true,
};
static struct xt_match recent_mt_reg[] __read_mostly = {
static struct pernet_operations genl_pernet_ops = {
.init = genl_pernet_init,
.exit = genl_pernet_exit,
+ .async = true,
};
static int __init genl_init(void)
if (!err)
delivered = true;
else if (err != -ESRCH)
- goto error;
+ return err;
return delivered ? 0 : -ESRCH;
error:
kfree_skb(skb);
static int tcf_bpf_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **act,
- int replace, int bind)
+ int replace, int bind, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, bpf_net_id);
struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
return res;
out:
if (res == ACT_P_CREATED)
- tcf_idr_cleanup(*act, est);
+ tcf_idr_release(*act, bind);
return ret;
}
static int tcf_bpf_walker(struct net *net, struct sk_buff *skb,
struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops)
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, bpf_net_id);
- return tcf_generic_walker(tn, skb, cb, type, ops);
+ return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, bpf_net_id);
.exit_batch = bpf_exit_net,
.id = &bpf_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
static int __init bpf_init_module(void)
static int tcf_csum_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a, int ovr,
- int bind)
+ int bind, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, csum_net_id);
struct tcf_csum_params *params_old, *params_new;
{
struct sctphdr *sctph;
- if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_SCTP)
+ if (skb_is_gso(skb) && skb_is_gso_sctp(skb))
return 1;
sctph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*sctph));
struct tcf_csum_params *params;
params = rcu_dereference_protected(p->params, 1);
- kfree_rcu(params, rcu);
+ if (params)
+ kfree_rcu(params, rcu);
}
static int tcf_csum_walker(struct net *net, struct sk_buff *skb,
struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops)
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, csum_net_id);
- return tcf_generic_walker(tn, skb, cb, type, ops);
+ return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_csum_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_csum_search(struct net *net, struct tc_action **a, u32 index,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, csum_net_id);
.exit_batch = csum_exit_net,
.id = &csum_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
MODULE_DESCRIPTION("Checksum updating actions");
static void tcf_ipt_release(struct tc_action *a)
{
struct tcf_ipt *ipt = to_ipt(a);
- ipt_destroy_target(ipt->tcfi_t);
+
+ if (ipt->tcfi_t) {
+ ipt_destroy_target(ipt->tcfi_t);
+ kfree(ipt->tcfi_t);
+ }
kfree(ipt->tcfi_tname);
- kfree(ipt->tcfi_t);
}
static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = {
kfree(tname);
err1:
if (ret == ACT_P_CREATED)
- tcf_idr_cleanup(*a, est);
+ tcf_idr_release(*a, bind);
return err;
}
static int tcf_ipt_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a, int ovr,
- int bind)
+ int bind, struct netlink_ext_ack *extack)
{
return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops, ovr,
bind);
static int tcf_xt_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a, int ovr,
- int bind)
+ int bind, struct netlink_ext_ack *extack)
{
return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops, ovr,
bind);
static int tcf_ipt_walker(struct net *net, struct sk_buff *skb,
struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops)
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, ipt_net_id);
- return tcf_generic_walker(tn, skb, cb, type, ops);
+ return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, ipt_net_id);
.exit_batch = ipt_exit_net,
.id = &ipt_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
static int tcf_xt_walker(struct net *net, struct sk_buff *skb,
struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops)
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, xt_net_id);
- return tcf_generic_walker(tn, skb, cb, type, ops);
+ return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, xt_net_id);
.exit_batch = xt_exit_net,
.id = &xt_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
MODULE_AUTHOR("Jamal Hadi Salim(2002-13)");
static int tcf_pedit_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind)
+ int ovr, int bind, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, pedit_net_id);
struct nlattr *tb[TCA_PEDIT_MAX + 1];
p = to_pedit(*a);
keys = kmalloc(ksize, GFP_KERNEL);
if (keys == NULL) {
- tcf_idr_cleanup(*a, est);
+ tcf_idr_release(*a, bind);
kfree(keys_ex);
return -ENOMEM;
}
static int tcf_pedit_walker(struct net *net, struct sk_buff *skb,
struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops)
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, pedit_net_id);
- return tcf_generic_walker(tn, skb, cb, type, ops);
+ return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, pedit_net_id);
.exit_batch = pedit_exit_net,
.id = &pedit_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
MODULE_AUTHOR("Jamal Hadi Salim(2002-4)");
static int tcf_act_police_walker(struct net *net, struct sk_buff *skb,
struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops)
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, police_net_id);
- return tcf_generic_walker(tn, skb, cb, type, ops);
+ return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
static int tcf_act_police_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind)
+ int ovr, int bind,
+ struct netlink_ext_ack *extack)
{
int ret = 0, err;
struct nlattr *tb[TCA_POLICE_MAX + 1];
qdisc_put_rtab(P_tab);
qdisc_put_rtab(R_tab);
if (ret == ACT_P_CREATED)
- tcf_idr_cleanup(*a, est);
+ tcf_idr_release(*a, bind);
return err;
}
return -1;
}
-static int tcf_police_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_police_search(struct net *net, struct tc_action **a, u32 index,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, police_net_id);
.exit_batch = police_exit_net,
.id = &police_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
static int __init police_init_module(void)
static int tcf_sample_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a, int ovr,
- int bind)
+ int bind, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, sample_net_id);
struct nlattr *tb[TCA_SAMPLE_MAX + 1];
psample_group = rtnl_dereference(s->psample_group);
RCU_INIT_POINTER(s->psample_group, NULL);
- psample_group_put(psample_group);
+ if (psample_group)
+ psample_group_put(psample_group);
}
static bool tcf_sample_dev_ok_push(struct net_device *dev)
static int tcf_sample_walker(struct net *net, struct sk_buff *skb,
struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops)
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, sample_net_id);
- return tcf_generic_walker(tn, skb, cb, type, ops);
+ return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, sample_net_id);
.exit_batch = sample_exit_net,
.id = &sample_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
static int __init sample_init_module(void)
static int tcf_simp_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind)
+ int ovr, int bind, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, simp_net_id);
struct nlattr *tb[TCA_DEF_MAX + 1];
d = to_defact(*a);
ret = alloc_defdata(d, defdata);
if (ret < 0) {
- tcf_idr_cleanup(*a, est);
+ tcf_idr_release(*a, bind);
return ret;
}
d->tcf_action = parm->action;
static int tcf_simp_walker(struct net *net, struct sk_buff *skb,
struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops)
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, simp_net_id);
- return tcf_generic_walker(tn, skb, cb, type, ops);
+ return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, simp_net_id);
.exit_batch = simp_exit_net,
.id = &simp_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
MODULE_AUTHOR("Jamal Hadi Salim(2005)");
static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind)
+ int ovr, int bind, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, skbmod_net_id);
struct nlattr *tb[TCA_SKBMOD_MAX + 1];
ASSERT_RTNL();
p = kzalloc(sizeof(struct tcf_skbmod_params), GFP_KERNEL);
if (unlikely(!p)) {
- if (ovr)
+ if (ret == ACT_P_CREATED)
tcf_idr_release(*a, bind);
return -ENOMEM;
}
struct tcf_skbmod_params *p;
p = rcu_dereference_protected(d->skbmod_p, 1);
- kfree_rcu(p, rcu);
+ if (p)
+ kfree_rcu(p, rcu);
}
static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a,
static int tcf_skbmod_walker(struct net *net, struct sk_buff *skb,
struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops)
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, skbmod_net_id);
- return tcf_generic_walker(tn, skb, cb, type, ops);
+ return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, skbmod_net_id);
.exit_batch = skbmod_exit_net,
.id = &skbmod_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
MODULE_AUTHOR("Jamal Hadi Salim, <jhs@mojatatu.com>");
static int tunnel_key_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind)
+ int ovr, int bind, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1];
metadata->u.tun_info.mode |= IP_TUNNEL_INFO_TX;
break;
default:
+ ret = -EINVAL;
goto err_out;
}
struct tcf_tunnel_key_params *params;
params = rcu_dereference_protected(t->params, 1);
+ if (params) {
+ if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET)
+ dst_release(¶ms->tcft_enc_metadata->dst);
- if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET)
- dst_release(¶ms->tcft_enc_metadata->dst);
-
- kfree_rcu(params, rcu);
+ kfree_rcu(params, rcu);
+ }
}
static int tunnel_key_dump_addresses(struct sk_buff *skb,
static int tunnel_key_walker(struct net *net, struct sk_buff *skb,
struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops)
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
- return tcf_generic_walker(tn, skb, cb, type, ops);
+ return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index)
+static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
.exit_batch = tunnel_key_exit_net,
.id = &tunnel_key_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
static int __init tunnel_key_init_module(void)
static int tcf_vlan_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind)
+ int ovr, int bind, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, vlan_net_id);
struct nlattr *tb[TCA_VLAN_MAX + 1];
ASSERT_RTNL();
p = kzalloc(sizeof(*p), GFP_KERNEL);
if (!p) {
- if (ovr)
+ if (ret == ACT_P_CREATED)
tcf_idr_release(*a, bind);
return -ENOMEM;
}
struct tcf_vlan_params *p;
p = rcu_dereference_protected(v->vlan_p, 1);
- kfree_rcu(p, rcu);
+ if (p)
+ kfree_rcu(p, rcu);
}
static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a,
static int tcf_vlan_walker(struct net *net, struct sk_buff *skb,
struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops)
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, vlan_net_id);
- return tcf_generic_walker(tn, skb, cb, type, ops);
+ return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, vlan_net_id);
.exit_batch = vlan_exit_net,
.id = &vlan_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
static int __init vlan_init_module(void)
* applicable with RoCE-cards only
*
* Initial restrictions:
- * - non-blocking connect postponed
- * - IPv6 support postponed
* - support for alternate links postponed
* - partial support for non-blocking sockets only
* - support for urgent data postponed
*
- * Copyright IBM Corp. 2016
+ * Copyright IBM Corp. 2016, 2018
*
* Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
* based on prototype from Frank Blaschka
#include <linux/module.h>
#include <linux/socket.h>
-#include <linux/inetdevice.h>
#include <linux/workqueue.h>
#include <linux/in.h>
#include <linux/sched/signal.h>
.lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock),
};
+static struct smc_hashinfo smc_v6_hashinfo = {
+ .lock = __RW_LOCK_UNLOCKED(smc_v6_hashinfo.lock),
+};
+
int smc_hash_sk(struct sock *sk)
{
struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
};
EXPORT_SYMBOL_GPL(smc_proto);
+struct proto smc_proto6 = {
+ .name = "SMC6",
+ .owner = THIS_MODULE,
+ .keepalive = smc_set_keepalive,
+ .hash = smc_hash_sk,
+ .unhash = smc_unhash_sk,
+ .obj_size = sizeof(struct smc_sock),
+ .h.smc_hash = &smc_v6_hashinfo,
+ .slab_flags = SLAB_TYPESAFE_BY_RCU,
+};
+EXPORT_SYMBOL_GPL(smc_proto6);
+
static int smc_release(struct socket *sock)
{
struct sock *sk = sock->sk;
sk_refcnt_debug_dec(sk);
}
-static struct sock *smc_sock_alloc(struct net *net, struct socket *sock)
+static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
+ int protocol)
{
struct smc_sock *smc;
+ struct proto *prot;
struct sock *sk;
- sk = sk_alloc(net, PF_SMC, GFP_KERNEL, &smc_proto, 0);
+ prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto;
+ sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0);
if (!sk)
return NULL;
sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
sk->sk_state = SMC_INIT;
sk->sk_destruct = smc_destruct;
- sk->sk_protocol = SMCPROTO_SMC;
+ sk->sk_protocol = protocol;
smc = smc_sk(sk);
INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
INIT_LIST_HEAD(&smc->accept_q);
goto out;
rc = -EAFNOSUPPORT;
+ if (addr->sin_family != AF_INET &&
+ addr->sin_family != AF_INET6 &&
+ addr->sin_family != AF_UNSPEC)
+ goto out;
/* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */
- if ((addr->sin_family != AF_INET) &&
- ((addr->sin_family != AF_UNSPEC) ||
- (addr->sin_addr.s_addr != htonl(INADDR_ANY))))
+ if (addr->sin_family == AF_UNSPEC &&
+ addr->sin_addr.s_addr != htonl(INADDR_ANY))
goto out;
lock_sock(sk);
smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC);
}
-/* determine subnet and mask of internal TCP socket */
-int smc_netinfo_by_tcpsk(struct socket *clcsock,
- __be32 *subnet, u8 *prefix_len)
-{
- struct dst_entry *dst = sk_dst_get(clcsock->sk);
- struct in_device *in_dev;
- struct sockaddr_in addr;
- int rc = -ENOENT;
- int len;
-
- if (!dst) {
- rc = -ENOTCONN;
- goto out;
- }
- if (!dst->dev) {
- rc = -ENODEV;
- goto out_rel;
- }
-
- /* get address to which the internal TCP socket is bound */
- kernel_getsockname(clcsock, (struct sockaddr *)&addr, &len);
- /* analyze IPv4 specific data of net_device belonging to TCP socket */
- rcu_read_lock();
- in_dev = __in_dev_get_rcu(dst->dev);
- for_ifa(in_dev) {
- if (!inet_ifa_match(addr.sin_addr.s_addr, ifa))
- continue;
- *prefix_len = inet_mask_len(ifa->ifa_mask);
- *subnet = ifa->ifa_address & ifa->ifa_mask;
- rc = 0;
- break;
- } endfor_ifa(in_dev);
- rcu_read_unlock();
-
-out_rel:
- dst_release(dst);
-out:
- return rc;
-}
-
-static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid)
+static int smc_clnt_conf_first_link(struct smc_sock *smc)
{
struct smc_link_group *lgr = smc->conn.lgr;
struct smc_link *link;
return rc;
}
+ if (link->llc_confirm_rc)
+ return SMC_CLC_DECL_RMBE_EC;
+
rc = smc_ib_modify_qp_rts(link);
if (rc)
return SMC_CLC_DECL_INTERR;
/* send CONFIRM LINK response over RoCE fabric */
rc = smc_llc_send_confirm_link(link,
link->smcibdev->mac[link->ibport - 1],
- gid, SMC_LLC_RESP);
+ &link->smcibdev->gid[link->ibport - 1],
+ SMC_LLC_RESP);
if (rc < 0)
return SMC_CLC_DECL_TCL;
- return rc;
+ /* receive ADD LINK request from server over RoCE fabric */
+ rest = wait_for_completion_interruptible_timeout(&link->llc_add,
+ SMC_LLC_WAIT_TIME);
+ if (rest <= 0) {
+ struct smc_clc_msg_decline dclc;
+
+ rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
+ SMC_CLC_DECLINE);
+ return rc;
+ }
+
+ /* send add link reject message, only one link supported for now */
+ rc = smc_llc_send_add_link(link,
+ link->smcibdev->mac[link->ibport - 1],
+ &link->smcibdev->gid[link->ibport - 1],
+ SMC_LLC_RESP);
+ if (rc < 0)
+ return SMC_CLC_DECL_TCL;
+
+ link->state = SMC_LNK_ACTIVE;
+
+ return 0;
}
static void smc_conn_save_peer_info(struct smc_sock *smc,
link->peer_mtu = clc->qp_mtu;
}
-static void smc_lgr_forget(struct smc_link_group *lgr)
-{
- spin_lock_bh(&smc_lgr_list.lock);
- /* do not use this link group for new connections */
- if (!list_empty(&lgr->list))
- list_del_init(&lgr->list);
- spin_unlock_bh(&smc_lgr_list.lock);
-}
-
/* setup for RDMA connection of client */
static int smc_connect_rdma(struct smc_sock *smc)
{
- struct sockaddr_in *inaddr = (struct sockaddr_in *)smc->addr;
struct smc_clc_msg_accept_confirm aclc;
int local_contact = SMC_FIRST_CONTACT;
struct smc_ib_device *smcibdev;
srv_first_contact = aclc.hdr.flag;
mutex_lock(&smc_create_lgr_pending);
- local_contact = smc_conn_create(smc, inaddr->sin_addr.s_addr, smcibdev,
- ibport, &aclc.lcl, srv_first_contact);
+ local_contact = smc_conn_create(smc, smcibdev, ibport, &aclc.lcl,
+ srv_first_contact);
if (local_contact < 0) {
rc = local_contact;
if (rc == -ENOMEM)
if (local_contact == SMC_FIRST_CONTACT) {
/* QP confirmation over RoCE fabric */
- reason_code = smc_clnt_conf_first_link(
- smc, &smcibdev->gid[ibport - 1]);
+ reason_code = smc_clnt_conf_first_link(smc);
if (reason_code < 0) {
rc = reason_code;
goto out_err_unlock;
/* separate smc parameter checking to be safe */
if (alen < sizeof(addr->sa_family))
goto out_err;
- if (addr->sa_family != AF_INET)
+ if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
goto out_err;
- smc->addr = addr; /* needed for nonblocking connect */
lock_sock(sk);
switch (sk->sk_state) {
int rc;
release_sock(lsk);
- new_sk = smc_sock_alloc(sock_net(lsk), NULL);
+ new_sk = smc_sock_alloc(sock_net(lsk), NULL, lsk->sk_protocol);
if (!new_sk) {
rc = -ENOMEM;
lsk->sk_err = ENOMEM;
rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
SMC_CLC_DECLINE);
+ return rc;
}
- return rc;
+ if (link->llc_confirm_resp_rc)
+ return SMC_CLC_DECL_RMBE_EC;
+
+ /* send ADD LINK request to client over the RoCE fabric */
+ rc = smc_llc_send_add_link(link,
+ link->smcibdev->mac[link->ibport - 1],
+ &link->smcibdev->gid[link->ibport - 1],
+ SMC_LLC_REQ);
+ if (rc < 0)
+ return SMC_CLC_DECL_TCL;
+
+ /* receive ADD LINK response from client over the RoCE fabric */
+ rest = wait_for_completion_interruptible_timeout(&link->llc_add_resp,
+ SMC_LLC_WAIT_TIME);
+ if (rest <= 0) {
+ struct smc_clc_msg_decline dclc;
+
+ rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
+ SMC_CLC_DECLINE);
+ return rc;
+ }
+
+ link->state = SMC_LNK_ACTIVE;
+
+ return 0;
}
/* setup for RDMA connection of server */
struct sock *newsmcsk = &new_smc->sk;
struct smc_clc_msg_proposal *pclc;
struct smc_ib_device *smcibdev;
- struct sockaddr_in peeraddr;
u8 buf[SMC_CLC_MAX_LEN];
struct smc_link *link;
int reason_code = 0;
- int rc = 0, len;
- __be32 subnet;
- u8 prefix_len;
+ int rc = 0;
u8 ibport;
/* check if peer is smc capable */
goto decline_rdma;
}
- /* determine subnet and mask from internal TCP socket */
- rc = smc_netinfo_by_tcpsk(newclcsock, &subnet, &prefix_len);
- if (rc) {
- reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
- goto decline_rdma;
- }
-
pclc = (struct smc_clc_msg_proposal *)&buf;
pclc_prfx = smc_clc_proposal_get_prefix(pclc);
- if (pclc_prfx->outgoing_subnet != subnet ||
- pclc_prfx->prefix_len != prefix_len) {
+
+ rc = smc_clc_prfx_match(newclcsock, pclc_prfx);
+ if (rc) {
reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
goto decline_rdma;
}
- /* get address of the peer connected to the internal TCP socket */
- kernel_getpeername(newclcsock, (struct sockaddr *)&peeraddr, &len);
-
/* allocate connection / link group */
mutex_lock(&smc_create_lgr_pending);
- local_contact = smc_conn_create(new_smc, peeraddr.sin_addr.s_addr,
- smcibdev, ibport, &pclc->lcl, 0);
+ local_contact = smc_conn_create(new_smc, smcibdev, ibport, &pclc->lcl,
+ 0);
if (local_contact < 0) {
rc = local_contact;
if (rc == -ENOMEM)
lsmc->clcsock = NULL;
}
release_sock(lsk);
- /* no more listening, wake up smc_close_wait_listen_clcsock and
- * accept
- */
- lsk->sk_state_change(lsk);
sock_put(&lsmc->sk); /* sock_hold in smc_listen */
}
}
static int smc_getname(struct socket *sock, struct sockaddr *addr,
- int *len, int peer)
+ int peer)
{
struct smc_sock *smc;
smc = smc_sk(sock->sk);
- return smc->clcsock->ops->getname(smc->clcsock, addr, len, peer);
+ return smc->clcsock->ops->getname(smc->clcsock, addr, peer);
}
static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
static int smc_create(struct net *net, struct socket *sock, int protocol,
int kern)
{
+ int family = (protocol == SMCPROTO_SMC6) ? PF_INET6 : PF_INET;
struct smc_sock *smc;
struct sock *sk;
int rc;
goto out;
rc = -EPROTONOSUPPORT;
- if ((protocol != IPPROTO_IP) && (protocol != IPPROTO_TCP))
+ if (protocol != SMCPROTO_SMC && protocol != SMCPROTO_SMC6)
goto out;
rc = -ENOBUFS;
sock->ops = &smc_sock_ops;
- sk = smc_sock_alloc(net, sock);
+ sk = smc_sock_alloc(net, sock, protocol);
if (!sk)
goto out;
/* create internal TCP socket for CLC handshake and fallback */
smc = smc_sk(sk);
smc->use_fallback = false; /* assume rdma capability first */
- rc = sock_create_kern(net, PF_INET, SOCK_STREAM,
- IPPROTO_TCP, &smc->clcsock);
+ rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
+ &smc->clcsock);
if (rc) {
sk_common_release(sk);
goto out;
rc = proto_register(&smc_proto, 1);
if (rc) {
- pr_err("%s: proto_register fails with %d\n", __func__, rc);
+ pr_err("%s: proto_register(v4) fails with %d\n", __func__, rc);
goto out_pnet;
}
+ rc = proto_register(&smc_proto6, 1);
+ if (rc) {
+ pr_err("%s: proto_register(v6) fails with %d\n", __func__, rc);
+ goto out_proto;
+ }
+
rc = sock_register(&smc_sock_family_ops);
if (rc) {
pr_err("%s: sock_register fails with %d\n", __func__, rc);
- goto out_proto;
+ goto out_proto6;
}
INIT_HLIST_HEAD(&smc_v4_hashinfo.ht);
+ INIT_HLIST_HEAD(&smc_v6_hashinfo.ht);
rc = smc_ib_register_client();
if (rc) {
out_sock:
sock_unregister(PF_SMC);
+out_proto6:
+ proto_unregister(&smc_proto6);
out_proto:
proto_unregister(&smc_proto);
out_pnet:
spin_unlock_bh(&smc_lgr_list.lock);
list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) {
list_del_init(&lgr->list);
+ cancel_delayed_work_sync(&lgr->free_work);
smc_lgr_free(lgr); /* free link group */
}
static_branch_disable(&tcp_have_smc);
smc_ib_unregister_client();
sock_unregister(PF_SMC);
+ proto_unregister(&smc_proto6);
proto_unregister(&smc_proto);
smc_pnet_exit();
}
#include <linux/ipv6_route.h>
#include <linux/route.h>
#include <linux/sockios.h>
-#include <linux/atalk.h>
#include <net/busy_poll.h>
#include <linux/errqueue.h>
return __put_user(klen, ulen);
}
-static struct kmem_cache *sock_inode_cachep __read_mostly;
+static struct kmem_cache *sock_inode_cachep __ro_after_init;
static struct inode *sock_alloc_inode(struct super_block *sb)
{
* what to do with it - that's up to the protocol still.
*/
-static struct ns_common *get_net_ns(struct ns_common *ns)
+struct ns_common *get_net_ns(struct ns_common *ns)
{
return &get_net(container_of(ns, struct net, ns))->ns;
}
+EXPORT_SYMBOL_GPL(get_net_ns);
static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
{
goto out_fd;
if (upeer_sockaddr) {
- if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
- &len, 2) < 0) {
+ len = newsock->ops->getname(newsock,
+ (struct sockaddr *)&address, 2);
+ if (len < 0) {
err = -ECONNABORTED;
goto out_fd;
}
{
struct socket *sock;
struct sockaddr_storage address;
- int len, err, fput_needed;
+ int err, fput_needed;
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (!sock)
if (err)
goto out_put;
- err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
- if (err)
+ err = sock->ops->getname(sock, (struct sockaddr *)&address, 0);
+ if (err < 0)
goto out_put;
- err = move_addr_to_user(&address, len, usockaddr, usockaddr_len);
+ /* "err" is actually length in this case */
+ err = move_addr_to_user(&address, err, usockaddr, usockaddr_len);
out_put:
fput_light(sock->file, fput_needed);
{
struct socket *sock;
struct sockaddr_storage address;
- int len, err, fput_needed;
+ int err, fput_needed;
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (sock != NULL) {
return err;
}
- err =
- sock->ops->getname(sock, (struct sockaddr *)&address, &len,
- 1);
- if (!err)
- err = move_addr_to_user(&address, len, usockaddr,
+ err = sock->ops->getname(sock, (struct sockaddr *)&address, 1);
+ if (err >= 0)
+ /* "err" is actually length in this case */
+ err = move_addr_to_user(&address, err, usockaddr,
usockaddr_len);
fput_light(sock->file, fput_needed);
}
if (!sock)
return err;
- err = sock_error(sock->sk);
- if (err) {
- datagrams = err;
- goto out_put;
+ if (likely(!(flags & MSG_ERRQUEUE))) {
+ err = sock_error(sock->sk);
+ if (err) {
+ datagrams = err;
+ goto out_put;
+ }
}
entry = mmsg;
}
EXPORT_SYMBOL(sock_unregister);
+ bool sock_is_registered(int family)
+ {
+ return family < NPROTO && rcu_access_pointer(net_families[family]);
+ }
+
static int __init sock_init(void)
{
int err;
}
EXPORT_SYMBOL(kernel_connect);
-int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
- int *addrlen)
+int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
{
- return sock->ops->getname(sock, addr, addrlen, 0);
+ return sock->ops->getname(sock, addr, 0);
}
EXPORT_SYMBOL(kernel_getsockname);
-int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
- int *addrlen)
+int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
{
- return sock->ops->getname(sock, addr, addrlen, 1);
+ return sock->ops->getname(sock, addr, 1);
}
EXPORT_SYMBOL(kernel_getpeername);
static int xfrm_get_tos(const struct flowi *fl, int family)
{
const struct xfrm_policy_afinfo *afinfo;
- int tos = 0;
+ int tos;
afinfo = xfrm_policy_get_afinfo(family);
- tos = afinfo ? afinfo->get_tos(fl) : 0;
+ if (!afinfo)
+ return 0;
+
+ tos = afinfo->get_tos(fl);
rcu_read_unlock();
spin_unlock(&pq->hold_queue.lock);
dst_hold(xfrm_dst_path(dst));
- dst = xfrm_lookup(net, xfrm_dst_path(dst), &fl, sk, 0);
+ dst = xfrm_lookup(net, xfrm_dst_path(dst), &fl, sk, XFRM_LOOKUP_QUEUE);
if (IS_ERR(dst))
goto purge_queue;
while (dst->xfrm) {
const struct xfrm_state *xfrm = dst->xfrm;
+ dst = xfrm_dst_child(dst);
+
if (xfrm->props.mode == XFRM_MODE_TRANSPORT)
continue;
if (xfrm->type->flags & XFRM_TYPE_REMOTE_COADDR)
daddr = xfrm->coaddr;
else if (!(xfrm->type->flags & XFRM_TYPE_LOCAL_COADDR))
daddr = &xfrm->id.daddr;
-
- dst = xfrm_dst_child(dst);
}
return daddr;
}
static struct pernet_operations __net_initdata xfrm_net_ops = {
.init = xfrm_net_init,
.exit = xfrm_net_exit,
+ .async = true,
};
void __init xfrm_init(void)
struct nlattr *rt = attrs[XFRMA_REPLAY_ESN_VAL];
struct xfrm_replay_state_esn *rs;
- if (p->flags & XFRM_STATE_ESN) {
- if (!rt)
- return -EINVAL;
-
- rs = nla_data(rt);
+ if (!rt)
+ return (p->flags & XFRM_STATE_ESN) ? -EINVAL : 0;
- if (rs->bmp_len > XFRMA_REPLAY_ESN_MAX / sizeof(rs->bmp[0]) / 8)
- return -EINVAL;
+ rs = nla_data(rt);
- if (nla_len(rt) < (int)xfrm_replay_state_esn_len(rs) &&
- nla_len(rt) != sizeof(*rs))
- return -EINVAL;
- }
+ if (rs->bmp_len > XFRMA_REPLAY_ESN_MAX / sizeof(rs->bmp[0]) / 8)
+ return -EINVAL;
- if (!rt)
- return 0;
+ if (nla_len(rt) < (int)xfrm_replay_state_esn_len(rs) &&
+ nla_len(rt) != sizeof(*rs))
+ return -EINVAL;
/* As only ESP and AH support ESN feature. */
if ((p->id.proto != IPPROTO_ESP) && (p->id.proto != IPPROTO_AH))
static struct pernet_operations xfrm_user_net_ops = {
.init = xfrm_user_net_init,
.exit_batch = xfrm_user_net_exit,
+ .async = true,
};
static int __init xfrm_user_init(void)