summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/core/filter.c81
-rw-r--r--net/core/page_pool.c6
-rw-r--r--net/core/sock_map.c21
-rw-r--r--net/core/xdp.c94
-rw-r--r--net/ipv4/af_inet.c2
-rw-r--r--net/ipv4/ping.c1
-rw-r--r--net/ipv4/tcp_bpf.c27
-rw-r--r--net/ipv4/tcp_ipv4.c1
-rw-r--r--net/ipv4/udp.c1
-rw-r--r--net/ipv6/af_inet6.c2
-rw-r--r--net/ipv6/ping.c1
-rw-r--r--net/ipv6/tcp_ipv6.c1
-rw-r--r--net/ipv6/udp.c1
-rw-r--r--net/mptcp/options.c109
-rw-r--r--net/mptcp/pm.c34
-rw-r--r--net/mptcp/pm_netlink.c197
-rw-r--r--net/mptcp/protocol.c307
-rw-r--r--net/mptcp/protocol.h64
-rw-r--r--net/mptcp/sockopt.c24
-rw-r--r--net/mptcp/subflow.c25
-rw-r--r--net/mptcp/token.c1
-rw-r--r--net/packet/af_packet.c27
-rw-r--r--net/sched/sch_cake.c40
-rw-r--r--net/tls/tls_sw.c1
24 files changed, 670 insertions, 398 deletions
diff --git a/net/core/filter.c b/net/core/filter.c
index 606ab5a98a1a..4603b7cd3cd1 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3957,10 +3957,35 @@ u32 xdp_master_redirect(struct xdp_buff *xdp)
}
EXPORT_SYMBOL_GPL(xdp_master_redirect);
-int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
- struct bpf_prog *xdp_prog)
+static inline int __xdp_do_redirect_xsk(struct bpf_redirect_info *ri,
+ struct net_device *dev,
+ struct xdp_buff *xdp,
+ struct bpf_prog *xdp_prog)
+{
+ enum bpf_map_type map_type = ri->map_type;
+ void *fwd = ri->tgt_value;
+ u32 map_id = ri->map_id;
+ int err;
+
+ ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */
+ ri->map_type = BPF_MAP_TYPE_UNSPEC;
+
+ err = __xsk_map_redirect(fwd, xdp);
+ if (unlikely(err))
+ goto err;
+
+ _trace_xdp_redirect_map(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index);
+ return 0;
+err:
+ _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err);
+ return err;
+}
+
+static __always_inline int __xdp_do_redirect_frame(struct bpf_redirect_info *ri,
+ struct net_device *dev,
+ struct xdp_frame *xdpf,
+ struct bpf_prog *xdp_prog)
{
- struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
enum bpf_map_type map_type = ri->map_type;
void *fwd = ri->tgt_value;
u32 map_id = ri->map_id;
@@ -3970,6 +3995,11 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */
ri->map_type = BPF_MAP_TYPE_UNSPEC;
+ if (unlikely(!xdpf)) {
+ err = -EOVERFLOW;
+ goto err;
+ }
+
switch (map_type) {
case BPF_MAP_TYPE_DEVMAP:
fallthrough;
@@ -3977,17 +4007,14 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
map = READ_ONCE(ri->map);
if (unlikely(map)) {
WRITE_ONCE(ri->map, NULL);
- err = dev_map_enqueue_multi(xdp, dev, map,
+ err = dev_map_enqueue_multi(xdpf, dev, map,
ri->flags & BPF_F_EXCLUDE_INGRESS);
} else {
- err = dev_map_enqueue(fwd, xdp, dev);
+ err = dev_map_enqueue(fwd, xdpf, dev);
}
break;
case BPF_MAP_TYPE_CPUMAP:
- err = cpu_map_enqueue(fwd, xdp, dev);
- break;
- case BPF_MAP_TYPE_XSKMAP:
- err = __xsk_map_redirect(fwd, xdp);
+ err = cpu_map_enqueue(fwd, xdpf, dev);
break;
case BPF_MAP_TYPE_UNSPEC:
if (map_id == INT_MAX) {
@@ -3996,7 +4023,7 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
err = -EINVAL;
break;
}
- err = dev_xdp_enqueue(fwd, xdp, dev);
+ err = dev_xdp_enqueue(fwd, xdpf, dev);
break;
}
fallthrough;
@@ -4013,8 +4040,34 @@ err:
_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err);
return err;
}
+
+int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
+ struct bpf_prog *xdp_prog)
+{
+ struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+ enum bpf_map_type map_type = ri->map_type;
+
+ if (map_type == BPF_MAP_TYPE_XSKMAP)
+ return __xdp_do_redirect_xsk(ri, dev, xdp, xdp_prog);
+
+ return __xdp_do_redirect_frame(ri, dev, xdp_convert_buff_to_frame(xdp),
+ xdp_prog);
+}
EXPORT_SYMBOL_GPL(xdp_do_redirect);
+int xdp_do_redirect_frame(struct net_device *dev, struct xdp_buff *xdp,
+ struct xdp_frame *xdpf, struct bpf_prog *xdp_prog)
+{
+ struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+ enum bpf_map_type map_type = ri->map_type;
+
+ if (map_type == BPF_MAP_TYPE_XSKMAP)
+ return __xdp_do_redirect_xsk(ri, dev, xdp, xdp_prog);
+
+ return __xdp_do_redirect_frame(ri, dev, xdpf, xdp_prog);
+}
+EXPORT_SYMBOL_GPL(xdp_do_redirect_frame);
+
static int xdp_do_generic_redirect_map(struct net_device *dev,
struct sk_buff *skb,
struct xdp_buff *xdp,
@@ -4741,12 +4794,14 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
switch (optname) {
case SO_RCVBUF:
val = min_t(u32, val, sysctl_rmem_max);
+ val = min_t(int, val, INT_MAX / 2);
sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
WRITE_ONCE(sk->sk_rcvbuf,
max_t(int, val * 2, SOCK_MIN_RCVBUF));
break;
case SO_SNDBUF:
val = min_t(u32, val, sysctl_wmem_max);
+ val = min_t(int, val, INT_MAX / 2);
sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
WRITE_ONCE(sk->sk_sndbuf,
max_t(int, val * 2, SOCK_MIN_SNDBUF));
@@ -4967,6 +5022,12 @@ static int _bpf_getsockopt(struct sock *sk, int level, int optname,
goto err_clear;
switch (optname) {
+ case SO_RCVBUF:
+ *((int *)optval) = sk->sk_rcvbuf;
+ break;
+ case SO_SNDBUF:
+ *((int *)optval) = sk->sk_sndbuf;
+ break;
case SO_MARK:
*((int *)optval) = sk->sk_mark;
break;
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 1a6978427d6c..7347d5c7dbe0 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -217,6 +217,8 @@ static void page_pool_set_pp_info(struct page_pool *pool,
{
page->pp = pool;
page->pp_magic |= PP_SIGNATURE;
+ if (pool->p.init_callback)
+ pool->p.init_callback(page, pool->p.init_arg);
}
static void page_pool_clear_pp_info(struct page *page)
@@ -691,10 +693,12 @@ static void page_pool_release_retry(struct work_struct *wq)
schedule_delayed_work(&pool->release_dw, DEFER_TIME);
}
-void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *))
+void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *),
+ struct xdp_mem_info *mem)
{
refcount_inc(&pool->user_cnt);
pool->disconnect = disconnect;
+ pool->xdp_mem_id = mem->id;
}
void page_pool_destroy(struct page_pool *pool)
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 9618ab6d7cc9..1827669eedd6 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -292,15 +292,23 @@ static int sock_map_link(struct bpf_map *map, struct sock *sk)
if (skb_verdict)
psock_set_prog(&psock->progs.skb_verdict, skb_verdict);
+ /* msg_* and stream_* programs references tracked in psock after this
+ * point. Reference dec and cleanup will occur through psock destructor
+ */
ret = sock_map_init_proto(sk, psock);
- if (ret < 0)
- goto out_drop;
+ if (ret < 0) {
+ sk_psock_put(sk, psock);
+ goto out;
+ }
write_lock_bh(&sk->sk_callback_lock);
if (stream_parser && stream_verdict && !psock->saved_data_ready) {
ret = sk_psock_init_strp(sk, psock);
- if (ret)
- goto out_unlock_drop;
+ if (ret) {
+ write_unlock_bh(&sk->sk_callback_lock);
+ sk_psock_put(sk, psock);
+ goto out;
+ }
sk_psock_start_strp(sk, psock);
} else if (!stream_parser && stream_verdict && !psock->saved_data_ready) {
sk_psock_start_verdict(sk,psock);
@@ -309,10 +317,6 @@ static int sock_map_link(struct bpf_map *map, struct sock *sk)
}
write_unlock_bh(&sk->sk_callback_lock);
return 0;
-out_unlock_drop:
- write_unlock_bh(&sk->sk_callback_lock);
-out_drop:
- sk_psock_put(sk, psock);
out_progs:
if (skb_verdict)
bpf_prog_put(skb_verdict);
@@ -325,6 +329,7 @@ out_put_stream_parser:
out_put_stream_verdict:
if (stream_verdict)
bpf_prog_put(stream_verdict);
+out:
return ret;
}
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 7fe1df85f505..7aba35504986 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -110,20 +110,15 @@ static void mem_allocator_disconnect(void *allocator)
mutex_unlock(&mem_id_lock);
}
-void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
+void xdp_unreg_mem_model(struct xdp_mem_info *mem)
{
struct xdp_mem_allocator *xa;
- int type = xdp_rxq->mem.type;
- int id = xdp_rxq->mem.id;
+ int type = mem->type;
+ int id = mem->id;
/* Reset mem info to defaults */
- xdp_rxq->mem.id = 0;
- xdp_rxq->mem.type = 0;
-
- if (xdp_rxq->reg_state != REG_STATE_REGISTERED) {
- WARN(1, "Missing register, driver bug");
- return;
- }
+ mem->id = 0;
+ mem->type = 0;
if (id == 0)
return;
@@ -135,6 +130,17 @@ void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
rcu_read_unlock();
}
}
+EXPORT_SYMBOL_GPL(xdp_unreg_mem_model);
+
+void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
+{
+ if (xdp_rxq->reg_state != REG_STATE_REGISTERED) {
+ WARN(1, "Missing register, driver bug");
+ return;
+ }
+
+ xdp_unreg_mem_model(&xdp_rxq->mem);
+}
EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg_mem_model);
void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq)
@@ -259,28 +265,24 @@ static bool __is_supported_mem_type(enum xdp_mem_type type)
return true;
}
-int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
- enum xdp_mem_type type, void *allocator)
+static struct xdp_mem_allocator *__xdp_reg_mem_model(struct xdp_mem_info *mem,
+ enum xdp_mem_type type,
+ void *allocator)
{
struct xdp_mem_allocator *xdp_alloc;
gfp_t gfp = GFP_KERNEL;
int id, errno, ret;
void *ptr;
- if (xdp_rxq->reg_state != REG_STATE_REGISTERED) {
- WARN(1, "Missing register, driver bug");
- return -EFAULT;
- }
-
if (!__is_supported_mem_type(type))
- return -EOPNOTSUPP;
+ return ERR_PTR(-EOPNOTSUPP);
- xdp_rxq->mem.type = type;
+ mem->type = type;
if (!allocator) {
if (type == MEM_TYPE_PAGE_POOL)
- return -EINVAL; /* Setup time check page_pool req */
- return 0;
+ return ERR_PTR(-EINVAL); /* Setup time check page_pool req */
+ return NULL;
}
/* Delay init of rhashtable to save memory if feature isn't used */
@@ -290,13 +292,13 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
mutex_unlock(&mem_id_lock);
if (ret < 0) {
WARN_ON(1);
- return ret;
+ return ERR_PTR(ret);
}
}
xdp_alloc = kzalloc(sizeof(*xdp_alloc), gfp);
if (!xdp_alloc)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
mutex_lock(&mem_id_lock);
id = __mem_id_cyclic_get(gfp);
@@ -304,31 +306,61 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
errno = id;
goto err;
}
- xdp_rxq->mem.id = id;
- xdp_alloc->mem = xdp_rxq->mem;
+ mem->id = id;
+ xdp_alloc->mem = *mem;
xdp_alloc->allocator = allocator;
/* Insert allocator into ID lookup table */
ptr = rhashtable_insert_slow(mem_id_ht, &id, &xdp_alloc->node);
if (IS_ERR(ptr)) {
- ida_simple_remove(&mem_id_pool, xdp_rxq->mem.id);
- xdp_rxq->mem.id = 0;
+ ida_simple_remove(&mem_id_pool, mem->id);
+ mem->id = 0;
errno = PTR_ERR(ptr);
goto err;
}
if (type == MEM_TYPE_PAGE_POOL)
- page_pool_use_xdp_mem(allocator, mem_allocator_disconnect);
+ page_pool_use_xdp_mem(allocator, mem_allocator_disconnect, mem);
mutex_unlock(&mem_id_lock);
- trace_mem_connect(xdp_alloc, xdp_rxq);
- return 0;
+ return xdp_alloc;
err:
mutex_unlock(&mem_id_lock);
kfree(xdp_alloc);
- return errno;
+ return ERR_PTR(errno);
+}
+
+int xdp_reg_mem_model(struct xdp_mem_info *mem,
+ enum xdp_mem_type type, void *allocator)
+{
+ struct xdp_mem_allocator *xdp_alloc;
+
+ xdp_alloc = __xdp_reg_mem_model(mem, type, allocator);
+ if (IS_ERR(xdp_alloc))
+ return PTR_ERR(xdp_alloc);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(xdp_reg_mem_model);
+
+int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
+ enum xdp_mem_type type, void *allocator)
+{
+ struct xdp_mem_allocator *xdp_alloc;
+
+ if (xdp_rxq->reg_state != REG_STATE_REGISTERED) {
+ WARN(1, "Missing register, driver bug");
+ return -EFAULT;
+ }
+
+ xdp_alloc = __xdp_reg_mem_model(&xdp_rxq->mem, type, allocator);
+ if (IS_ERR(xdp_alloc))
+ return PTR_ERR(xdp_alloc);
+
+ trace_mem_connect(xdp_alloc, xdp_rxq);
+ return 0;
}
+
EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model);
/* XDP RX runs under NAPI protection, and in different delivery error
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index f53184767ee7..9c465bac1eb0 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -531,6 +531,8 @@ int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
err = BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk);
if (err) {
inet->inet_saddr = inet->inet_rcv_saddr = 0;
+ if (sk->sk_prot->put_port)
+ sk->sk_prot->put_port(sk);
goto out_release_sock;
}
}
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index e540b0dcf085..0e56df3a45e2 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -994,6 +994,7 @@ struct proto ping_prot = {
.hash = ping_hash,
.unhash = ping_unhash,
.get_port = ping_get_port,
+ .put_port = ping_unhash,
.obj_size = sizeof(struct inet_sock),
};
EXPORT_SYMBOL(ping_prot);
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index f70aa0932bd6..9b9b02052fd3 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -196,12 +196,39 @@ msg_bytes_ready:
long timeo;
int data;
+ if (sock_flag(sk, SOCK_DONE))
+ goto out;
+
+ if (sk->sk_err) {
+ copied = sock_error(sk);
+ goto out;
+ }
+
+ if (sk->sk_shutdown & RCV_SHUTDOWN)
+ goto out;
+
+ if (sk->sk_state == TCP_CLOSE) {
+ copied = -ENOTCONN;
+ goto out;
+ }
+
timeo = sock_rcvtimeo(sk, nonblock);
+ if (!timeo) {
+ copied = -EAGAIN;
+ goto out;
+ }
+
+ if (signal_pending(current)) {
+ copied = sock_intr_errno(timeo);
+ goto out;
+ }
+
data = tcp_msg_wait_data(sk, psock, timeo);
if (data && !sk_psock_queue_empty(psock))
goto msg_bytes_ready;
copied = -EAGAIN;
}
+out:
release_sock(sk);
sk_psock_put(sk, psock);
return copied;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index ac10e4cdd8d0..9861786b8336 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -3076,6 +3076,7 @@ struct proto tcp_prot = {
.hash = inet_hash,
.unhash = inet_unhash,
.get_port = inet_csk_get_port,
+ .put_port = inet_put_port,
#ifdef CONFIG_BPF_SYSCALL
.psock_update_sk_prot = tcp_bpf_update_proto,
#endif
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 7b18a6f42f18..c2a4411d2b04 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2927,6 +2927,7 @@ struct proto udp_prot = {
.unhash = udp_lib_unhash,
.rehash = udp_v4_rehash,
.get_port = udp_v4_get_port,
+ .put_port = udp_lib_unhash,
#ifdef CONFIG_BPF_SYSCALL
.psock_update_sk_prot = udp_bpf_update_proto,
#endif
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index d1636425654e..8fe7900f1949 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -413,6 +413,8 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
if (err) {
sk->sk_ipv6only = saved_ipv6only;
inet_reset_saddr(sk);
+ if (sk->sk_prot->put_port)
+ sk->sk_prot->put_port(sk);
goto out;
}
}
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 6ac88fe24a8e..9256f6ba87ef 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -177,6 +177,7 @@ struct proto pingv6_prot = {
.hash = ping_hash,
.unhash = ping_unhash,
.get_port = ping_get_port,
+ .put_port = ping_unhash,
.obj_size = sizeof(struct raw6_sock),
};
EXPORT_SYMBOL_GPL(pingv6_prot);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 1ac243d18c2b..075ee8a2df3b 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -2181,6 +2181,7 @@ struct proto tcpv6_prot = {
.hash = inet6_hash,
.unhash = inet_unhash,
.get_port = inet_csk_get_port,
+ .put_port = inet_put_port,
#ifdef CONFIG_BPF_SYSCALL
.psock_update_sk_prot = tcp_bpf_update_proto,
#endif
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index df216268cb02..528b81ef19c9 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1733,6 +1733,7 @@ struct proto udpv6_prot = {
.unhash = udp_lib_unhash,
.rehash = udp_v6_rehash,
.get_port = udp_v6_get_port,
+ .put_port = udp_lib_unhash,
#ifdef CONFIG_BPF_SYSCALL
.psock_update_sk_prot = udp_bpf_update_proto,
#endif
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index fe98e4f475ba..8ed2d9f4a84d 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -768,6 +768,28 @@ static noinline bool mptcp_established_options_rst(struct sock *sk, struct sk_bu
return true;
}
+static bool mptcp_established_options_fastclose(struct sock *sk,
+ unsigned int *size,
+ unsigned int remaining,
+ struct mptcp_out_options *opts)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+
+ if (likely(!subflow->send_fastclose))
+ return false;
+
+ if (remaining < TCPOLEN_MPTCP_FASTCLOSE)
+ return false;
+
+ *size = TCPOLEN_MPTCP_FASTCLOSE;
+ opts->suboptions |= OPTION_MPTCP_FASTCLOSE;
+ opts->rcvr_key = msk->remote_key;
+
+ pr_debug("FASTCLOSE key=%llu", opts->rcvr_key);
+ return true;
+}
+
static bool mptcp_established_options_mp_fail(struct sock *sk,
unsigned int *size,
unsigned int remaining,
@@ -806,10 +828,12 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
return false;
if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) {
- if (mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) {
+ if (mptcp_established_options_fastclose(sk, &opt_size, remaining, opts) ||
+ mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) {
*size += opt_size;
remaining -= opt_size;
}
+ /* MP_RST can be used with MP_FASTCLOSE and MP_FAIL if there is room */
if (mptcp_established_options_rst(sk, skb, &opt_size, remaining, opts)) {
*size += opt_size;
remaining -= opt_size;
@@ -1209,7 +1233,7 @@ static void mptcp_set_rwin(const struct tcp_sock *tp)
WRITE_ONCE(msk->rcv_wnd_sent, ack_seq);
}
-static u16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __sum16 sum)
+u16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum)
{
struct csum_pseudo_header header;
__wsum csum;
@@ -1224,14 +1248,14 @@ static u16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __sum1
header.data_len = htons(data_len);
header.csum = 0;
- csum = csum_partial(&header, sizeof(header), ~csum_unfold(sum));
+ csum = csum_partial(&header, sizeof(header), sum);
return (__force u16)csum_fold(csum);
}
static u16 mptcp_make_csum(const struct mptcp_ext *mpext)
{
return __mptcp_make_csum(mpext->data_seq, mpext->subflow_seq, mpext->data_len,
- mpext->csum);
+ ~csum_unfold(mpext->csum));
}
void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
@@ -1251,17 +1275,8 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
ptr += 2;
}
- /* RST is mutually exclusive with everything else */
- if (unlikely(OPTION_MPTCP_RST & opts->suboptions)) {
- *ptr++ = mptcp_option(MPTCPOPT_RST,
- TCPOLEN_MPTCP_RST,
- opts->reset_transient,
- opts->reset_reason);
- return;
- }
-
- /* DSS, MPC, MPJ and ADD_ADDR are mutually exclusive, see
- * mptcp_established_options*()
+ /* DSS, MPC, MPJ, ADD_ADDR, FASTCLOSE and RST are mutually exclusive,
+ * see mptcp_established_options*()
*/
if (likely(OPTION_MPTCP_DSS & opts->suboptions)) {
struct mptcp_ext *mpext = &opts->ext_copy;
@@ -1361,7 +1376,7 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
__mptcp_make_csum(opts->data_seq,
opts->subflow_seq,
opts->data_len,
- opts->csum), ptr);
+ ~csum_unfold(opts->csum)), ptr);
} else {
put_unaligned_be32(opts->data_len << 16 |
TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
@@ -1370,27 +1385,29 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
/* MPC is additionally mutually exclusive with MP_PRIO */
goto mp_capable_done;
- } else if (OPTION_MPTCP_MPJ_SYN & opts->suboptions) {
- *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
- TCPOLEN_MPTCP_MPJ_SYN,
- opts->backup, opts->join_id);
- put_unaligned_be32(opts->token, ptr);
- ptr += 1;
- put_unaligned_be32(opts->nonce, ptr);
- ptr += 1;
- } else if (OPTION_MPTCP_MPJ_SYNACK & opts->suboptions) {
- *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
- TCPOLEN_MPTCP_MPJ_SYNACK,
- opts->backup, opts->join_id);
- put_unaligned_be64(opts->thmac, ptr);
- ptr += 2;
- put_unaligned_be32(opts->nonce, ptr);
- ptr += 1;
- } else if (OPTION_MPTCP_MPJ_ACK & opts->suboptions) {
- *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
- TCPOLEN_MPTCP_MPJ_ACK, 0, 0);
- memcpy(ptr, opts->hmac, MPTCPOPT_HMAC_LEN);
- ptr += 5;
+ } else if (OPTIONS_MPTCP_MPJ & opts->suboptions) {
+ if (OPTION_MPTCP_MPJ_SYN & opts->suboptions) {
+ *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
+ TCPOLEN_MPTCP_MPJ_SYN,
+ opts->backup, opts->join_id);
+ put_unaligned_be32(opts->token, ptr);
+ ptr += 1;
+ put_unaligned_be32(opts->nonce, ptr);
+ ptr += 1;
+ } else if (OPTION_MPTCP_MPJ_SYNACK & opts->suboptions) {
+ *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
+ TCPOLEN_MPTCP_MPJ_SYNACK,
+ opts->backup, opts->join_id);
+ put_unaligned_be64(opts->thmac, ptr);
+ ptr += 2;
+ put_unaligned_be32(opts->nonce, ptr);
+ ptr += 1;
+ } else {
+ *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
+ TCPOLEN_MPTCP_MPJ_ACK, 0, 0);
+ memcpy(ptr, opts->hmac, MPTCPOPT_HMAC_LEN);
+ ptr += 5;
+ }
} else if (OPTION_MPTCP_ADD_ADDR & opts->suboptions) {
u8 len = TCPOLEN_MPTCP_ADD_ADDR_BASE;
u8 echo = MPTCP_ADDR_ECHO;
@@ -1447,6 +1464,24 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
ptr += 1;
}
}
+ } else if (unlikely(OPTION_MPTCP_FASTCLOSE & opts->suboptions)) {
+ /* FASTCLOSE is mutually exclusive with others except RST */
+ *ptr++ = mptcp_option(MPTCPOPT_MP_FASTCLOSE,
+ TCPOLEN_MPTCP_FASTCLOSE,
+ 0, 0);
+ put_unaligned_be64(opts->rcvr_key, ptr);
+ ptr += 2;
+
+ if (OPTION_MPTCP_RST & opts->suboptions)
+ goto mp_rst;
+ return;
+ } else if (unlikely(OPTION_MPTCP_RST & opts->suboptions)) {
+mp_rst:
+ *ptr++ = mptcp_option(MPTCPOPT_RST,
+ TCPOLEN_MPTCP_RST,
+ opts->reset_transient,
+ opts->reset_reason);
+ return;
}
if (OPTION_MPTCP_PRIO & opts->suboptions) {
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 6ab386ff3294..696b2c4613a7 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -172,9 +172,28 @@ void mptcp_pm_subflow_established(struct mptcp_sock *msk)
spin_unlock_bh(&pm->lock);
}
-void mptcp_pm_subflow_closed(struct mptcp_sock *msk, u8 id)
+void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk,
+ const struct mptcp_subflow_context *subflow)
{
- pr_debug("msk=%p", msk);
+ struct mptcp_pm_data *pm = &msk->pm;
+ bool update_subflows;
+
+ update_subflows = (ssk->sk_state == TCP_CLOSE) &&
+ (subflow->request_join || subflow->mp_join);
+ if (!READ_ONCE(pm->work_pending) && !update_subflows)
+ return;
+
+ spin_lock_bh(&pm->lock);
+ if (update_subflows)
+ pm->subflows--;
+
+ /* Even if this subflow is not really established, tell the PM to try
+ * to pick the next ones, if possible.
+ */
+ if (mptcp_pm_nl_check_work_pending(msk))
+ mptcp_pm_schedule_work(msk, MPTCP_PM_SUBFLOW_ESTABLISHED);
+
+ spin_unlock_bh(&pm->lock);
}
void mptcp_pm_add_addr_received(struct mptcp_sock *msk,
@@ -356,7 +375,7 @@ void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk)
}
}
-void mptcp_pm_data_init(struct mptcp_sock *msk)
+void mptcp_pm_data_reset(struct mptcp_sock *msk)
{
msk->pm.add_addr_signaled = 0;
msk->pm.add_addr_accepted = 0;
@@ -370,11 +389,16 @@ void mptcp_pm_data_init(struct mptcp_sock *msk)
WRITE_ONCE(msk->pm.accept_subflow, false);
WRITE_ONCE(msk->pm.remote_deny_join_id0, false);
msk->pm.status = 0;
+ bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
+
+ mptcp_pm_nl_data_init(msk);
+}
+void mptcp_pm_data_init(struct mptcp_sock *msk)
+{
spin_lock_init(&msk->pm.lock);
INIT_LIST_HEAD(&msk->pm.anno_list);
-
- mptcp_pm_nl_data_init(msk);
+ mptcp_pm_data_reset(msk);
}
void __init mptcp_pm_init(void)
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 6cde58c259a8..75af1f701e1d 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -38,10 +38,6 @@ struct mptcp_pm_add_entry {
u8 retrans_times;
};
-/* max value of mptcp_addr_info.id */
-#define MAX_ADDR_ID U8_MAX
-#define BITMAP_SZ DIV_ROUND_UP(MAX_ADDR_ID + 1, BITS_PER_LONG)
-
struct pm_nl_pernet {
/* protects pernet updates */
spinlock_t lock;
@@ -53,14 +49,14 @@ struct pm_nl_pernet {
unsigned int local_addr_max;
unsigned int subflows_max;
unsigned int next_id;
- unsigned long id_bitmap[BITMAP_SZ];
+ DECLARE_BITMAP(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
};
#define MPTCP_PM_ADDR_MAX 8
#define ADD_ADDR_RETRANS_MAX 3
static bool addresses_equal(const struct mptcp_addr_info *a,
- struct mptcp_addr_info *b, bool use_port)
+ const struct mptcp_addr_info *b, bool use_port)
{
bool addr_equals = false;
@@ -169,11 +165,13 @@ select_local_address(const struct pm_nl_pernet *pernet,
msk_owned_by_me(msk);
rcu_read_lock();
- __mptcp_flush_join_list(msk);
list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW))
continue;
+ if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap))
+ continue;
+
if (entry->addr.family != sk->sk_family) {
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
if ((entry->addr.family == AF_INET &&
@@ -184,23 +182,17 @@ select_local_address(const struct pm_nl_pernet *pernet,
continue;
}
- /* avoid any address already in use by subflows and
- * pending join
- */
- if (!lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) {
- ret = entry;
- break;
- }
+ ret = entry;
+ break;
}
rcu_read_unlock();
return ret;
}
static struct mptcp_pm_addr_entry *
-select_signal_address(struct pm_nl_pernet *pernet, unsigned int pos)
+select_signal_address(struct pm_nl_pernet *pernet, struct mptcp_sock *msk)
{
struct mptcp_pm_addr_entry *entry, *ret = NULL;
- int i = 0;
rcu_read_lock();
/* do not keep any additional per socket state, just signal
@@ -209,12 +201,14 @@ select_signal_address(struct pm_nl_pernet *pernet, unsigned int pos)
* can lead to additional addresses not being announced.
*/
list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
+ if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap))
+ continue;
+
if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL))
continue;
- if (i++ == pos) {
- ret = entry;
- break;
- }
+
+ ret = entry;
+ break;
}
rcu_read_unlock();
return ret;
@@ -256,12 +250,17 @@ unsigned int mptcp_pm_get_local_addr_max(struct mptcp_sock *msk)
}
EXPORT_SYMBOL_GPL(mptcp_pm_get_local_addr_max);
-static void check_work_pending(struct mptcp_sock *msk)
+bool mptcp_pm_nl_check_work_pending(struct mptcp_sock *msk)
{
- if (msk->pm.add_addr_signaled == mptcp_pm_get_add_addr_signal_max(msk) &&
- (msk->pm.local_addr_used == mptcp_pm_get_local_addr_max(msk) ||
- msk->pm.subflows == mptcp_pm_get_subflows_max(msk)))
+ struct pm_nl_pernet *pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id);
+
+ if (msk->pm.subflows == mptcp_pm_get_subflows_max(msk) ||
+ (find_next_and_bit(pernet->id_bitmap, msk->pm.id_avail_bitmap,
+ MPTCP_PM_MAX_ADDR_ID + 1, 0) == MPTCP_PM_MAX_ADDR_ID + 1)) {
WRITE_ONCE(msk->pm.work_pending, false);
+ return false;
+ }
+ return true;
}
struct mptcp_pm_add_entry *
@@ -430,6 +429,7 @@ static bool lookup_address_in_vec(struct mptcp_addr_info *addrs, unsigned int nr
static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullmesh,
struct mptcp_addr_info *addrs)
{
+ bool deny_id0 = READ_ONCE(msk->pm.remote_deny_join_id0);
struct sock *sk = (struct sock *)msk, *ssk;
struct mptcp_subflow_context *subflow;
struct mptcp_addr_info remote = { 0 };
@@ -437,22 +437,28 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullm
int i = 0;
subflows_max = mptcp_pm_get_subflows_max(msk);
+ remote_address((struct sock_common *)sk, &remote);
/* Non-fullmesh endpoint, fill in the single entry
* corresponding to the primary MPC subflow remote address
*/
if (!fullmesh) {
- remote_address((struct sock_common *)sk, &remote);
+ if (deny_id0)
+ return 0;
+
msk->pm.subflows++;
addrs[i++] = remote;
} else {
mptcp_for_each_subflow(msk, subflow) {
ssk = mptcp_subflow_tcp_sock(subflow);
- remote_address((struct sock_common *)ssk, &remote);
- if (!lookup_address_in_vec(addrs, i, &remote) &&
+ remote_address((struct sock_common *)ssk, &addrs[i]);
+ if (deny_id0 && addresses_equal(&addrs[i], &remote, false))
+ continue;
+
+ if (!lookup_address_in_vec(addrs, i, &addrs[i]) &&
msk->pm.subflows < subflows_max) {
msk->pm.subflows++;
- addrs[i++] = remote;
+ i++;
}
}
}
@@ -460,6 +466,35 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullm
return i;
}
+static struct mptcp_pm_addr_entry *
+__lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id)
+{
+ struct mptcp_pm_addr_entry *entry;
+
+ list_for_each_entry(entry, &pernet->local_addr_list, list) {
+ if (entry->addr.id == id)
+ return entry;
+ }
+ return NULL;
+}
+
+static int
+lookup_id_by_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *addr)
+{
+ struct mptcp_pm_addr_entry *entry;
+ int ret = -1;
+
+ rcu_read_lock();
+ list_for_each_entry(entry, &pernet->local_addr_list, list) {
+ if (addresses_equal(&entry->addr, addr, entry->addr.port)) {
+ ret = entry->addr.id;
+ break;
+ }
+ }
+ rcu_read_unlock();
+ return ret;
+}
+
static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
{
struct sock *sk = (struct sock *)msk;
@@ -475,6 +510,19 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
local_addr_max = mptcp_pm_get_local_addr_max(msk);
subflows_max = mptcp_pm_get_subflows_max(msk);
+ /* do lazy endpoint usage accounting for the MPC subflows */
+ if (unlikely(!(msk->pm.status & BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED))) && msk->first) {
+ struct mptcp_addr_info mpc_addr;
+ int mpc_id;
+
+ local_address((struct sock_common *)msk->first, &mpc_addr);
+ mpc_id = lookup_id_by_addr(pernet, &mpc_addr);
+ if (mpc_id >= 0)
+ __clear_bit(mpc_id, msk->pm.id_avail_bitmap);
+
+ msk->pm.status |= BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED);
+ }
+
pr_debug("local %d:%d signal %d:%d subflows %d:%d\n",
msk->pm.local_addr_used, local_addr_max,
msk->pm.add_addr_signaled, add_addr_signal_max,
@@ -482,47 +530,41 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
/* check first for announce */
if (msk->pm.add_addr_signaled < add_addr_signal_max) {
- local = select_signal_address(pernet,
- msk->pm.add_addr_signaled);
+ local = select_signal_address(pernet, msk);
if (local) {
if (mptcp_pm_alloc_anno_list(msk, local)) {
+ __clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
msk->pm.add_addr_signaled++;
mptcp_pm_announce_addr(msk, &local->addr, false);
mptcp_pm_nl_addr_send_ack(msk);
}
- } else {
- /* pick failed, avoid fourther attempts later */
- msk->pm.local_addr_used = add_addr_signal_max;
}
-
- check_work_pending(msk);
}
/* check if should create a new subflow */
- if (msk->pm.local_addr_used < local_addr_max &&
- msk->pm.subflows < subflows_max &&
- !READ_ONCE(msk->pm.remote_deny_join_id0)) {
+ while (msk->pm.local_addr_used < local_addr_max &&
+ msk->pm.subflows < subflows_max) {
+ struct mptcp_addr_info addrs[MPTCP_PM_ADDR_MAX];
+ bool fullmesh;
+ int i, nr;
+
local = select_local_address(pernet, msk);
- if (local) {
- bool fullmesh = !!(local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH);
- struct mptcp_addr_info addrs[MPTCP_PM_ADDR_MAX];
- int i, nr;
+ if (!local)
+ break;
- msk->pm.local_addr_used++;
- check_work_pending(msk);
- nr = fill_remote_addresses_vec(msk, fullmesh, addrs);
- spin_unlock_bh(&msk->pm.lock);
- for (i = 0; i < nr; i++)
- __mptcp_subflow_connect(sk, &local->addr, &addrs[i]);
- spin_lock_bh(&msk->pm.lock);
- return;
- }
+ fullmesh = !!(local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH);
- /* lookup failed, avoid fourther attempts later */
- msk->pm.local_addr_used = local_addr_max;
- check_work_pending(msk);
+ msk->pm.local_addr_used++;
+ nr = fill_remote_addresses_vec(msk, fullmesh, addrs);
+ if (nr)
+ __clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
+ spin_unlock_bh(&msk->pm.lock);
+ for (i = 0; i < nr; i++)
+ __mptcp_subflow_connect(sk, &local->addr, &addrs[i]);
+ spin_lock_bh(&msk->pm.lock);
}
+ mptcp_pm_nl_check_work_pending(msk);
}
static void mptcp_pm_nl_fully_established(struct mptcp_sock *msk)
@@ -552,7 +594,6 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
subflows_max = mptcp_pm_get_subflows_max(msk);
rcu_read_lock();
- __mptcp_flush_join_list(msk);
list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
if (!(entry->flags & MPTCP_PM_ADDR_FLAG_FULLMESH))
continue;
@@ -641,7 +682,6 @@ void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk)
!mptcp_pm_should_rm_signal(msk))
return;
- __mptcp_flush_join_list(msk);
subflow = list_first_entry_or_null(&msk->conn_list, typeof(*subflow), node);
if (subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
@@ -711,6 +751,8 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk,
return;
for (i = 0; i < rm_list->nr; i++) {
+ bool removed = false;
+
list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
int how = RCV_SHUTDOWN | SEND_SHUTDOWN;
@@ -727,18 +769,24 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk,
i, rm_list->ids[i], subflow->local_id, subflow->remote_id);
spin_unlock_bh(&msk->pm.lock);
mptcp_subflow_shutdown(sk, ssk, how);
+
+ /* the following takes care of updating the subflows counter */
mptcp_close_ssk(sk, ssk, subflow);
spin_lock_bh(&msk->pm.lock);
- if (rm_type == MPTCP_MIB_RMADDR) {
- msk->pm.add_addr_accepted--;
- WRITE_ONCE(msk->pm.accept_addr, true);
- } else if (rm_type == MPTCP_MIB_RMSUBFLOW) {
- msk->pm.local_addr_used--;
- }
- msk->pm.subflows--;
+ removed = true;
__MPTCP_INC_STATS(sock_net(sk), rm_type);
}
+ __set_bit(rm_list->ids[1], msk->pm.id_avail_bitmap);
+ if (!removed)
+ continue;
+
+ if (rm_type == MPTCP_MIB_RMADDR) {
+ msk->pm.add_addr_accepted--;
+ WRITE_ONCE(msk->pm.accept_addr, true);
+ } else if (rm_type == MPTCP_MIB_RMSUBFLOW) {
+ msk->pm.local_addr_used--;
+ }
}
}
@@ -759,6 +807,9 @@ void mptcp_pm_nl_work(struct mptcp_sock *msk)
msk_owned_by_me(msk);
+ if (!(pm->status & MPTCP_PM_WORK_MASK))
+ return;
+
spin_lock_bh(&msk->pm.lock);
pr_debug("msk=%p status=%x", msk, pm->status);
@@ -804,7 +855,7 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
/* to keep the code simple, don't do IDR-like allocation for address ID,
* just bail when we exceed limits
*/
- if (pernet->next_id == MAX_ADDR_ID)
+ if (pernet->next_id == MPTCP_PM_MAX_ADDR_ID)
pernet->next_id = 1;
if (pernet->addrs >= MPTCP_PM_ADDR_MAX)
goto out;
@@ -824,7 +875,7 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
if (!entry->addr.id) {
find_next:
entry->addr.id = find_next_zero_bit(pernet->id_bitmap,
- MAX_ADDR_ID + 1,
+ MPTCP_PM_MAX_ADDR_ID + 1,
pernet->next_id);
if (!entry->addr.id && pernet->next_id != 1) {
pernet->next_id = 1;
@@ -1191,18 +1242,6 @@ static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info)
return 0;
}
-static struct mptcp_pm_addr_entry *
-__lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id)
-{
- struct mptcp_pm_addr_entry *entry;
-
- list_for_each_entry(entry, &pernet->local_addr_list, list) {
- if (entry->addr.id == id)
- return entry;
- }
- return NULL;
-}
-
int mptcp_pm_get_flags_and_ifindex_by_id(struct net *net, unsigned int id,
u8 *flags, int *ifindex)
{
@@ -1461,7 +1500,7 @@ static int mptcp_nl_cmd_flush_addrs(struct sk_buff *skb, struct genl_info *info)
list_splice_init(&pernet->local_addr_list, &free_list);
__reset_counters(pernet);
pernet->next_id = 1;
- bitmap_zero(pernet->id_bitmap, MAX_ADDR_ID + 1);
+ bitmap_zero(pernet->id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
spin_unlock_bh(&pernet->lock);
mptcp_nl_remove_addrs_list(sock_net(skb->sk), &free_list);
synchronize_rcu();
@@ -1571,7 +1610,7 @@ static int mptcp_nl_cmd_dump_addrs(struct sk_buff *msg,
pernet = net_generic(net, pm_nl_pernet_id);
spin_lock_bh(&pernet->lock);
- for (i = id; i < MAX_ADDR_ID + 1; i++) {
+ for (i = id; i < MPTCP_PM_MAX_ADDR_ID + 1; i++) {
if (test_bit(i, pernet->id_bitmap)) {
entry = __lookup_addr_by_id(pernet, i);
if (!entry)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index df5a0cf431c1..62d418813503 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -763,7 +763,7 @@ static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
if (!sock_owned_by_user(sk))
__mptcp_error_report(sk);
else
- set_bit(MPTCP_ERROR_REPORT, &msk->flags);
+ __set_bit(MPTCP_ERROR_REPORT, &msk->cb_flags);
}
/* If the moves have caught up with the DATA_FIN sequence number
@@ -808,47 +808,38 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
mptcp_data_unlock(sk);
}
-static bool mptcp_do_flush_join_list(struct mptcp_sock *msk)
+static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
{
- struct mptcp_subflow_context *subflow;
- bool ret = false;
+ struct sock *sk = (struct sock *)msk;
- if (likely(list_empty(&msk->join_list)))
+ if (sk->sk_state != TCP_ESTABLISHED)
return false;
- spin_lock_bh(&msk->join_list_lock);
- list_for_each_entry(subflow, &msk->join_list, node) {
- u32 sseq = READ_ONCE(subflow->setsockopt_seq);
-
- mptcp_propagate_sndbuf((struct sock *)msk, mptcp_subflow_tcp_sock(subflow));
- if (READ_ONCE(msk->setsockopt_seq) != sseq)
- ret = true;
- }
- list_splice_tail_init(&msk->join_list, &msk->conn_list);
- spin_unlock_bh(&msk->join_list_lock);
-
- return ret;
-}
-
-void __mptcp_flush_join_list(struct mptcp_sock *msk)
-{
- if (likely(!mptcp_do_flush_join_list(msk)))
- return;
+ /* attach to msk socket only after we are sure we will deal with it
+ * at close time
+ */
+ if (sk->sk_socket && !ssk->sk_socket)
+ mptcp_sock_graft(ssk, sk->sk_socket);
- if (!test_and_set_bit(MPTCP_WORK_SYNC_SETSOCKOPT, &msk->flags))
- mptcp_schedule_work((struct sock *)msk);
+ mptcp_propagate_sndbuf((struct sock *)msk, ssk);
+ mptcp_sockopt_sync_locked(msk, ssk);
+ return true;
}
-static void mptcp_flush_join_list(struct mptcp_sock *msk)
+static void __mptcp_flush_join_list(struct sock *sk)
{
- bool sync_needed = test_and_clear_bit(MPTCP_WORK_SYNC_SETSOCKOPT, &msk->flags);
-
- might_sleep();
+ struct mptcp_subflow_context *tmp, *subflow;
+ struct mptcp_sock *msk = mptcp_sk(sk);
- if (!mptcp_do_flush_join_list(msk) && !sync_needed)
- return;
+ list_for_each_entry_safe(subflow, tmp, &msk->join_list, node) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ bool slow = lock_sock_fast(ssk);
- mptcp_sockopt_sync_all(msk);
+ list_move_tail(&subflow->node, &msk->conn_list);
+ if (!__mptcp_finish_join(msk, ssk))
+ mptcp_subflow_reset(ssk);
+ unlock_sock_fast(ssk, slow);
+ }
}
static bool mptcp_timer_pending(struct sock *sk)
@@ -1526,9 +1517,8 @@ static void mptcp_update_post_push(struct mptcp_sock *msk,
void mptcp_check_and_set_pending(struct sock *sk)
{
- if (mptcp_send_head(sk) &&
- !test_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags))
- set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags);
+ if (mptcp_send_head(sk))
+ mptcp_sk(sk)->push_pending |= BIT(MPTCP_PUSH_PENDING);
}
void __mptcp_push_pending(struct sock *sk, unsigned int flags)
@@ -1549,7 +1539,6 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
int ret = 0;
prev_ssk = ssk;
- __mptcp_flush_join_list(msk);
ssk = mptcp_subflow_get_send(msk);
/* First check. If the ssk has changed since
@@ -1954,7 +1943,6 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk)
unsigned int moved = 0;
bool ret, done;
- mptcp_flush_join_list(msk);
do {
struct sock *ssk = mptcp_subflow_recv_lookup(msk);
bool slowpath;
@@ -2145,7 +2133,7 @@ static void mptcp_retransmit_timer(struct timer_list *t)
mptcp_schedule_work(sk);
} else {
/* delegate our work to tcp_release_cb() */
- set_bit(MPTCP_RETRANSMIT, &msk->flags);
+ __set_bit(MPTCP_RETRANSMIT, &msk->cb_flags);
}
bh_unlock_sock(sk);
sock_put(sk);
@@ -2253,6 +2241,10 @@ bool __mptcp_retransmit_pending_data(struct sock *sk)
return true;
}
+/* flags for __mptcp_close_ssk() */
+#define MPTCP_CF_PUSH BIT(1)
+#define MPTCP_CF_FASTCLOSE BIT(2)
+
/* subflow sockets can be either outgoing (connect) or incoming
* (accept).
*
@@ -2262,22 +2254,37 @@ bool __mptcp_retransmit_pending_data(struct sock *sk)
* parent socket.
*/
static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
- struct mptcp_subflow_context *subflow)
+ struct mptcp_subflow_context *subflow,
+ unsigned int flags)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- bool need_push;
+ bool need_push, dispose_it;
- list_del(&subflow->node);
+ dispose_it = !msk->subflow || ssk != msk->subflow->sk;
+ if (dispose_it)
+ list_del(&subflow->node);
lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
+ if (flags & MPTCP_CF_FASTCLOSE)
+ subflow->send_fastclose = 1;
+
+ need_push = (flags & MPTCP_CF_PUSH) && __mptcp_retransmit_pending_data(sk);
+ if (!dispose_it) {
+ tcp_disconnect(ssk, 0);
+ msk->subflow->state = SS_UNCONNECTED;
+ mptcp_subflow_ctx_reset(subflow);
+ release_sock(ssk);
+
+ goto out;
+ }
+
/* if we are invoked by the msk cleanup code, the subflow is
* already orphaned
*/
if (ssk->sk_socket)
sock_orphan(ssk);
- need_push = __mptcp_retransmit_pending_data(sk);
subflow->disposable = 1;
/* if ssk hit tcp_done(), tcp_cleanup_ulp() cleared the related ops
@@ -2297,14 +2304,12 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
sock_put(ssk);
- if (ssk == msk->last_snd)
- msk->last_snd = NULL;
-
if (ssk == msk->first)
msk->first = NULL;
- if (msk->subflow && ssk == msk->subflow->sk)
- mptcp_dispose_initial_subflow(msk);
+out:
+ if (ssk == msk->last_snd)
+ msk->last_snd = NULL;
if (need_push)
__mptcp_push_pending(sk, 0);
@@ -2315,7 +2320,13 @@ void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
{
if (sk->sk_state == TCP_ESTABLISHED)
mptcp_event(MPTCP_EVENT_SUB_CLOSED, mptcp_sk(sk), ssk, GFP_KERNEL);
- __mptcp_close_ssk(sk, ssk, subflow);
+
+ /* subflow aborted before reaching the fully_established status
+ * attempt the creation of the next subflow
+ */
+ mptcp_pm_subflow_check_next(mptcp_sk(sk), ssk, subflow);
+
+ __mptcp_close_ssk(sk, ssk, subflow, MPTCP_CF_PUSH);
}
static unsigned int mptcp_sync_mss(struct sock *sk, u32 pmtu)
@@ -2467,12 +2478,10 @@ static void mptcp_worker(struct work_struct *work)
goto unlock;
mptcp_check_data_fin_ack(sk);
- mptcp_flush_join_list(msk);
mptcp_check_fastclose(msk);
- if (msk->pm.status)
- mptcp_pm_nl_work(msk);
+ mptcp_pm_nl_work(msk);
if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags))
mptcp_check_for_eof(msk);
@@ -2506,8 +2515,6 @@ static int __mptcp_init_sock(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- spin_lock_init(&msk->join_list_lock);
-
INIT_LIST_HEAD(&msk->conn_list);
INIT_LIST_HEAD(&msk->join_list);
INIT_LIST_HEAD(&msk->rtx_queue);
@@ -2533,9 +2540,20 @@ static int __mptcp_init_sock(struct sock *sk)
return 0;
}
-static int mptcp_init_sock(struct sock *sk)
+static void mptcp_ca_reset(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
+
+ tcp_assign_congestion_control(sk);
+ strcpy(mptcp_sk(sk)->ca_name, icsk->icsk_ca_ops->name);
+
+ /* no need to keep a reference to the ops, the name will suffice */
+ tcp_cleanup_congestion_control(sk);
+ icsk->icsk_ca_ops = NULL;
+}
+
+static int mptcp_init_sock(struct sock *sk)
+{
struct net *net = sock_net(sk);
int ret;
@@ -2556,12 +2574,7 @@ static int mptcp_init_sock(struct sock *sk)
/* fetch the ca name; do it outside __mptcp_init_sock(), so that clone will
* propagate the correct value
*/
- tcp_assign_congestion_control(sk);
- strcpy(mptcp_sk(sk)->ca_name, icsk->icsk_ca_ops->name);
-
- /* no need to keep a reference to the ops, the name will suffice */
- tcp_cleanup_congestion_control(sk);
- icsk->icsk_ca_ops = NULL;
+ mptcp_ca_reset(sk);
sk_sockets_allocated_inc(sk);
sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
@@ -2666,6 +2679,7 @@ static void __mptcp_check_send_data_fin(struct sock *sk)
* state now
*/
if (__mptcp_check_fallback(msk)) {
+ WRITE_ONCE(msk->snd_una, msk->write_seq);
if ((1 << sk->sk_state) & (TCPF_CLOSING | TCPF_LAST_ACK)) {
inet_sk_state_store(sk, TCP_CLOSE);
mptcp_close_wake_up(sk);
@@ -2674,7 +2688,6 @@ static void __mptcp_check_send_data_fin(struct sock *sk)
}
}
- mptcp_flush_join_list(msk);
mptcp_for_each_subflow(msk, subflow) {
struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow);
@@ -2707,21 +2720,20 @@ static void __mptcp_destroy_sock(struct sock *sk)
might_sleep();
- /* be sure to always acquire the join list lock, to sync vs
- * mptcp_finish_join().
- */
- spin_lock_bh(&msk->join_list_lock);
- list_splice_tail_init(&msk->join_list, &msk->conn_list);
- spin_unlock_bh(&msk->join_list_lock);
+ /* join list will be eventually flushed (with rst) at sock lock release time*/
list_splice_init(&msk->conn_list, &conn_list);
sk_stop_timer(sk, &msk->sk.icsk_retransmit_timer);
sk_stop_timer(sk, &sk->sk_timer);
msk->pm.status = 0;
+ /* clears msk->subflow, allowing the following loop to close
+ * even the initial subflow
+ */
+ mptcp_dispose_initial_subflow(msk);
list_for_each_entry_safe(subflow, tmp, &conn_list, node) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
- __mptcp_close_ssk(sk, ssk, subflow);
+ __mptcp_close_ssk(sk, ssk, subflow, 0);
}
sk->sk_prot->destroy(sk);
@@ -2732,7 +2744,6 @@ static void __mptcp_destroy_sock(struct sock *sk)
xfrm_sk_free_policy(sk);
sk_refcnt_debug_release(sk);
- mptcp_dispose_initial_subflow(msk);
sock_put(sk);
}
@@ -2768,6 +2779,9 @@ cleanup:
sock_hold(sk);
pr_debug("msk=%p state=%d", sk, sk->sk_state);
+ if (mptcp_sk(sk)->token)
+ mptcp_event(MPTCP_EVENT_CLOSED, mptcp_sk(sk), NULL, GFP_KERNEL);
+
if (sk->sk_state == TCP_CLOSE) {
__mptcp_destroy_sock(sk);
do_cancel_work = true;
@@ -2778,9 +2792,6 @@ cleanup:
if (do_cancel_work)
mptcp_cancel_work(sk);
- if (mptcp_sk(sk)->token)
- mptcp_event(MPTCP_EVENT_CLOSED, mptcp_sk(sk), NULL, GFP_KERNEL);
-
sock_put(sk);
}
@@ -2812,15 +2823,38 @@ static int mptcp_disconnect(struct sock *sk, int flags)
struct mptcp_subflow_context *subflow;
struct mptcp_sock *msk = mptcp_sk(sk);
- mptcp_do_flush_join_list(msk);
+ inet_sk_state_store(sk, TCP_CLOSE);
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
- lock_sock(ssk);
- tcp_disconnect(ssk, flags);
- release_sock(ssk);
+ __mptcp_close_ssk(sk, ssk, subflow, MPTCP_CF_FASTCLOSE);
}
+
+ sk_stop_timer(sk, &msk->sk.icsk_retransmit_timer);
+ sk_stop_timer(sk, &sk->sk_timer);
+
+ if (mptcp_sk(sk)->token)
+ mptcp_event(MPTCP_EVENT_CLOSED, mptcp_sk(sk), NULL, GFP_KERNEL);
+
+ mptcp_destroy_common(msk);
+ msk->last_snd = NULL;
+ WRITE_ONCE(msk->flags, 0);
+ msk->cb_flags = 0;
+ msk->push_pending = 0;
+ msk->recovery = false;
+ msk->can_ack = false;
+ msk->fully_established = false;
+ msk->rcv_data_fin = false;
+ msk->snd_data_fin_enable = false;
+ msk->rcv_fastclose = false;
+ msk->use_64bit_ack = false;
+ WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
+ mptcp_pm_data_reset(msk);
+ mptcp_ca_reset(sk);
+
+ sk->sk_shutdown = 0;
+ sk_error_report(sk);
return 0;
}
@@ -2960,9 +2994,11 @@ void mptcp_destroy_common(struct mptcp_sock *msk)
__mptcp_clear_xmit(sk);
/* move to sk_receive_queue, sk_stream_kill_queues will purge it */
+ mptcp_data_lock(sk);
skb_queue_splice_tail_init(&msk->receive_queue, &sk->sk_receive_queue);
__skb_queue_purge(&sk->sk_receive_queue);
skb_rbtree_purge(&msk->out_of_order_queue);
+ mptcp_data_unlock(sk);
/* move all the rx fwd alloc into the sk_mem_reclaim_final in
* inet_sock_destruct() will dispose it
@@ -2986,7 +3022,7 @@ void __mptcp_data_acked(struct sock *sk)
if (!sock_owned_by_user(sk))
__mptcp_clean_una(sk);
else
- set_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->flags);
+ __set_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->cb_flags);
if (mptcp_pending_data_fin_ack(sk))
mptcp_schedule_work(sk);
@@ -3005,20 +3041,23 @@ void __mptcp_check_push(struct sock *sk, struct sock *ssk)
else if (xmit_ssk)
mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk), MPTCP_DELEGATE_SEND);
} else {
- set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags);
+ __set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->cb_flags);
}
}
+#define MPTCP_FLAGS_PROCESS_CTX_NEED (BIT(MPTCP_PUSH_PENDING) | \
+ BIT(MPTCP_RETRANSMIT) | \
+ BIT(MPTCP_FLUSH_JOIN_LIST))
+
/* processes deferred events and flush wmem */
static void mptcp_release_cb(struct sock *sk)
+ __must_hold(&sk->sk_lock.slock)
{
- for (;;) {
- unsigned long flags = 0;
+ struct mptcp_sock *msk = mptcp_sk(sk);
- if (test_and_clear_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags))
- flags |= BIT(MPTCP_PUSH_PENDING);
- if (test_and_clear_bit(MPTCP_RETRANSMIT, &mptcp_sk(sk)->flags))
- flags |= BIT(MPTCP_RETRANSMIT);
+ for (;;) {
+ unsigned long flags = (msk->cb_flags & MPTCP_FLAGS_PROCESS_CTX_NEED) |
+ msk->push_pending;
if (!flags)
break;
@@ -3029,8 +3068,11 @@ static void mptcp_release_cb(struct sock *sk)
* datapath acquires the msk socket spinlock while helding
* the subflow socket lock
*/
-
+ msk->push_pending = 0;
+ msk->cb_flags &= ~flags;
spin_unlock_bh(&sk->sk_lock.slock);
+ if (flags & BIT(MPTCP_FLUSH_JOIN_LIST))
+ __mptcp_flush_join_list(sk);
if (flags & BIT(MPTCP_PUSH_PENDING))
__mptcp_push_pending(sk, 0);
if (flags & BIT(MPTCP_RETRANSMIT))
@@ -3043,11 +3085,11 @@ static void mptcp_release_cb(struct sock *sk)
/* be sure to set the current sk state before tacking actions
* depending on sk_state
*/
- if (test_and_clear_bit(MPTCP_CONNECTED, &mptcp_sk(sk)->flags))
+ if (__test_and_clear_bit(MPTCP_CONNECTED, &msk->cb_flags))
__mptcp_set_connected(sk);
- if (test_and_clear_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->flags))
+ if (__test_and_clear_bit(MPTCP_CLEAN_UNA, &msk->cb_flags))
__mptcp_clean_una_wakeup(sk);
- if (test_and_clear_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->flags))
+ if (__test_and_clear_bit(MPTCP_ERROR_REPORT, &msk->cb_flags))
__mptcp_error_report(sk);
__mptcp_update_rmem(sk);
@@ -3089,7 +3131,7 @@ void mptcp_subflow_process_delegated(struct sock *ssk)
if (!sock_owned_by_user(sk))
__mptcp_subflow_push_pending(sk, ssk);
else
- set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags);
+ __set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->cb_flags);
mptcp_data_unlock(sk);
mptcp_subflow_delegated_done(subflow, MPTCP_DELEGATE_SEND);
}
@@ -3175,8 +3217,7 @@ bool mptcp_finish_join(struct sock *ssk)
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
struct sock *parent = (void *)msk;
- struct socket *parent_sock;
- bool ret;
+ bool ret = true;
pr_debug("msk=%p, subflow=%p", msk, subflow);
@@ -3189,35 +3230,38 @@ bool mptcp_finish_join(struct sock *ssk)
if (!msk->pm.server_side)
goto out;
- if (!mptcp_pm_allow_new_subflow(msk)) {
- subflow->reset_reason = MPTCP_RST_EPROHIBIT;
- return false;
- }
+ if (!mptcp_pm_allow_new_subflow(msk))
+ goto err_prohibited;
+
+ if (WARN_ON_ONCE(!list_empty(&subflow->node)))
+ goto err_prohibited;
- /* active connections are already on conn_list, and we can't acquire
- * msk lock here.
- * use the join list lock as synchronization point and double-check
- * msk status to avoid racing with __mptcp_destroy_sock()
+ /* active connections are already on conn_list.
+ * If we can't acquire msk socket lock here, let the release callback
+ * handle it
*/
- spin_lock_bh(&msk->join_list_lock);
- ret = inet_sk_state_load(parent) == TCP_ESTABLISHED;
- if (ret && !WARN_ON_ONCE(!list_empty(&subflow->node))) {
- list_add_tail(&subflow->node, &msk->join_list);
+ mptcp_data_lock(parent);
+ if (!sock_owned_by_user(parent)) {
+ ret = __mptcp_finish_join(msk, ssk);
+ if (ret) {
+ sock_hold(ssk);
+ list_add_tail(&subflow->node, &msk->conn_list);
+ }
+ } else {
sock_hold(ssk);
+ list_add_tail(&subflow->node, &msk->join_list);
+ __set_bit(MPTCP_FLUSH_JOIN_LIST, &msk->cb_flags);
}
- spin_unlock_bh(&msk->join_list_lock);
+ mptcp_data_unlock(parent);
+
if (!ret) {
+err_prohibited:
subflow->reset_reason = MPTCP_RST_EPROHIBIT;
return false;
}
- /* attach to msk socket only after we are sure he will deal with us
- * at close time
- */
- parent_sock = READ_ONCE(parent->sk_socket);
- if (parent_sock && !ssk->sk_socket)
- mptcp_sock_graft(ssk, parent_sock);
subflow->map_seq = READ_ONCE(msk->ack_seq);
+
out:
mptcp_event(MPTCP_EVENT_SUB_ESTABLISHED, msk, ssk, GFP_ATOMIC);
return true;
@@ -3352,9 +3396,20 @@ static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
struct mptcp_sock *msk = mptcp_sk(sock->sk);
struct mptcp_subflow_context *subflow;
struct socket *ssock;
- int err;
+ int err = -EINVAL;
lock_sock(sock->sk);
+ if (uaddr) {
+ if (addr_len < sizeof(uaddr->sa_family))
+ goto unlock;
+
+ if (uaddr->sa_family == AF_UNSPEC) {
+ err = mptcp_disconnect(sock->sk, flags);
+ sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
+ goto unlock;
+ }
+ }
+
if (sock->state != SS_UNCONNECTED && msk->subflow) {
/* pending connection or invalid state, let existing subflow
* cope with that
@@ -3364,10 +3419,8 @@ static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
}
ssock = __mptcp_nmpc_socket(msk);
- if (!ssock) {
- err = -EINVAL;
+ if (!ssock)
goto unlock;
- }
mptcp_token_destroy(msk);
inet_sk_state_store(sock->sk, TCP_SYN_SENT);
@@ -3441,17 +3494,9 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
pr_debug("msk=%p", msk);
- lock_sock(sock->sk);
- if (sock->sk->sk_state != TCP_LISTEN)
- goto unlock_fail;
-
ssock = __mptcp_nmpc_socket(msk);
if (!ssock)
- goto unlock_fail;
-
- clear_bit(MPTCP_DATA_READY, &msk->flags);
- sock_hold(ssock->sk);
- release_sock(sock->sk);
+ return -EINVAL;
err = ssock->ops->accept(sock, newsock, flags, kern);
if (err == 0 && !mptcp_is_tcpsk(newsock->sk)) {
@@ -3481,7 +3526,6 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
/* set ssk->sk_socket of accept()ed flows to mptcp socket.
* This is needed so NOSPACE flag can be set from tcp stack.
*/
- mptcp_flush_join_list(msk);
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
@@ -3491,14 +3535,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
release_sock(newsk);
}
- if (inet_csk_listen_poll(ssock->sk))
- set_bit(MPTCP_DATA_READY, &msk->flags);
- sock_put(ssock->sk);
return err;
-
-unlock_fail:
- release_sock(sock->sk);
- return -EINVAL;
}
static __poll_t mptcp_check_readable(struct mptcp_sock *msk)
@@ -3544,8 +3581,12 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
state = inet_sk_state_load(sk);
pr_debug("msk=%p state=%d flags=%lx", msk, state, msk->flags);
- if (state == TCP_LISTEN)
- return test_bit(MPTCP_DATA_READY, &msk->flags) ? EPOLLIN | EPOLLRDNORM : 0;
+ if (state == TCP_LISTEN) {
+ if (WARN_ON_ONCE(!msk->subflow || !msk->subflow->sk))
+ return 0;
+
+ return inet_csk_listen_poll(msk->subflow->sk);
+ }
if (state != TCP_SYN_SENT && state != TCP_SYN_RECV) {
mask |= mptcp_check_readable(msk);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 0486c9f5b38b..0e6b42c76ea0 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -110,19 +110,20 @@
/* MPTCP TCPRST flags */
#define MPTCP_RST_TRANSIENT BIT(0)
-/* MPTCP socket flags */
-#define MPTCP_DATA_READY 0
+/* MPTCP socket atomic flags */
#define MPTCP_NOSPACE 1
#define MPTCP_WORK_RTX 2
#define MPTCP_WORK_EOF 3
#define MPTCP_FALLBACK_DONE 4
#define MPTCP_WORK_CLOSE_SUBFLOW 5
-#define MPTCP_PUSH_PENDING 6
-#define MPTCP_CLEAN_UNA 7
-#define MPTCP_ERROR_REPORT 8
-#define MPTCP_RETRANSMIT 9
-#define MPTCP_WORK_SYNC_SETSOCKOPT 10
-#define MPTCP_CONNECTED 11
+
+/* MPTCP socket release cb flags */
+#define MPTCP_PUSH_PENDING 1
+#define MPTCP_CLEAN_UNA 2
+#define MPTCP_ERROR_REPORT 3
+#define MPTCP_RETRANSMIT 4
+#define MPTCP_FLUSH_JOIN_LIST 5
+#define MPTCP_CONNECTED 6
static inline bool before64(__u64 seq1, __u64 seq2)
{
@@ -174,16 +175,25 @@ enum mptcp_pm_status {
MPTCP_PM_ADD_ADDR_SEND_ACK,
MPTCP_PM_RM_ADDR_RECEIVED,
MPTCP_PM_ESTABLISHED,
- MPTCP_PM_ALREADY_ESTABLISHED, /* persistent status, set after ESTABLISHED event */
MPTCP_PM_SUBFLOW_ESTABLISHED,
+ MPTCP_PM_ALREADY_ESTABLISHED, /* persistent status, set after ESTABLISHED event */
+ MPTCP_PM_MPC_ENDPOINT_ACCOUNTED /* persistent status, set after MPC local address is
+ * accounted int id_avail_bitmap
+ */
};
+/* Status bits below MPTCP_PM_ALREADY_ESTABLISHED need pm worker actions */
+#define MPTCP_PM_WORK_MASK ((1 << MPTCP_PM_ALREADY_ESTABLISHED) - 1)
+
enum mptcp_addr_signal_status {
MPTCP_ADD_ADDR_SIGNAL,
MPTCP_ADD_ADDR_ECHO,
MPTCP_RM_ADDR_SIGNAL,
};
+/* max value of mptcp_addr_info.id */
+#define MPTCP_PM_MAX_ADDR_ID U8_MAX
+
struct mptcp_pm_data {
struct mptcp_addr_info local;
struct mptcp_addr_info remote;
@@ -202,6 +212,7 @@ struct mptcp_pm_data {
u8 local_addr_used;
u8 subflows;
u8 status;
+ DECLARE_BITMAP(id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
struct mptcp_rm_list rm_list_tx;
struct mptcp_rm_list rm_list_rx;
};
@@ -241,6 +252,8 @@ struct mptcp_sock {
u32 token;
int rmem_released;
unsigned long flags;
+ unsigned long cb_flags;
+ unsigned long push_pending;
bool recovery; /* closing subflow write queue reinjected */
bool can_ack;
bool fully_established;
@@ -252,7 +265,6 @@ struct mptcp_sock {
u8 recvmsg_inq:1,
cork:1,
nodelay:1;
- spinlock_t join_list_lock;
struct work_struct work;
struct sk_buff *ooo_last_skb;
struct rb_root out_of_order_queue;
@@ -395,6 +407,9 @@ DECLARE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions);
/* MPTCP subflow context */
struct mptcp_subflow_context {
struct list_head node;/* conn_list of subflows */
+
+ char reset_start[0];
+
unsigned long avg_pacing_rate; /* protected by msk socket lock */
u64 local_key;
u64 remote_key;
@@ -423,6 +438,7 @@ struct mptcp_subflow_context {
backup : 1,
send_mp_prio : 1,
send_mp_fail : 1,
+ send_fastclose : 1,
rx_eof : 1,
can_ack : 1, /* only after processing the remote a key */
disposable : 1, /* ctx can be free at ulp release time */
@@ -441,6 +457,9 @@ struct mptcp_subflow_context {
u8 stale_count;
long delegated_status;
+
+ char reset_end[0];
+
struct list_head delegated_node; /* link into delegated_action, protected by local BH */
u32 setsockopt_seq;
@@ -472,6 +491,13 @@ mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow)
return subflow->tcp_sock;
}
+static inline void
+mptcp_subflow_ctx_reset(struct mptcp_subflow_context *subflow)
+{
+ memset(subflow->reset_start, 0, subflow->reset_end - subflow->reset_start);
+ subflow->request_mptcp = 1;
+}
+
static inline u64
mptcp_subflow_get_map_offset(const struct mptcp_subflow_context *subflow)
{
@@ -486,15 +512,6 @@ mptcp_subflow_get_mapped_dsn(const struct mptcp_subflow_context *subflow)
return subflow->map_seq + mptcp_subflow_get_map_offset(subflow);
}
-static inline void mptcp_add_pending_subflow(struct mptcp_sock *msk,
- struct mptcp_subflow_context *subflow)
-{
- sock_hold(mptcp_subflow_tcp_sock(subflow));
- spin_lock_bh(&msk->join_list_lock);
- list_add_tail(&subflow->node, &msk->join_list);
- spin_unlock_bh(&msk->join_list_lock);
-}
-
void mptcp_subflow_process_delegated(struct sock *ssk);
static inline void mptcp_subflow_delegate(struct mptcp_subflow_context *subflow, int action)
@@ -659,7 +676,6 @@ void __mptcp_data_acked(struct sock *sk);
void __mptcp_error_report(struct sock *sk);
void mptcp_subflow_eof(struct sock *sk);
bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit);
-void __mptcp_flush_join_list(struct mptcp_sock *msk);
static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk)
{
return READ_ONCE(msk->snd_data_fin_enable) &&
@@ -709,9 +725,11 @@ void mptcp_token_destroy(struct mptcp_sock *msk);
void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn);
void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac);
+u16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum);
void __init mptcp_pm_init(void);
void mptcp_pm_data_init(struct mptcp_sock *msk);
+void mptcp_pm_data_reset(struct mptcp_sock *msk);
void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk);
void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk);
void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side);
@@ -719,7 +737,9 @@ void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk,
bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk);
void mptcp_pm_connection_closed(struct mptcp_sock *msk);
void mptcp_pm_subflow_established(struct mptcp_sock *msk);
-void mptcp_pm_subflow_closed(struct mptcp_sock *msk, u8 id);
+bool mptcp_pm_nl_check_work_pending(struct mptcp_sock *msk);
+void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk,
+ const struct mptcp_subflow_context *subflow);
void mptcp_pm_add_addr_received(struct mptcp_sock *msk,
const struct mptcp_addr_info *addr);
void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk,
@@ -816,7 +836,7 @@ unsigned int mptcp_pm_get_subflows_max(struct mptcp_sock *msk);
unsigned int mptcp_pm_get_local_addr_max(struct mptcp_sock *msk);
void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk);
-void mptcp_sockopt_sync_all(struct mptcp_sock *msk);
+void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk);
static inline struct mptcp_ext *mptcp_get_ext(const struct sk_buff *skb)
{
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index aa3fcd86dbe2..dacf3cee0027 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -1285,27 +1285,15 @@ void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk)
}
}
-void mptcp_sockopt_sync_all(struct mptcp_sock *msk)
+void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk)
{
- struct mptcp_subflow_context *subflow;
- struct sock *sk = (struct sock *)msk;
- u32 seq;
-
- seq = sockopt_seq_reset(sk);
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
- mptcp_for_each_subflow(msk, subflow) {
- struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
- u32 sseq = READ_ONCE(subflow->setsockopt_seq);
+ msk_owned_by_me(msk);
- if (sseq != msk->setsockopt_seq) {
- __mptcp_sockopt_sync(msk, ssk);
- WRITE_ONCE(subflow->setsockopt_seq, seq);
- } else if (sseq != seq) {
- WRITE_ONCE(subflow->setsockopt_seq, seq);
- }
+ if (READ_ONCE(subflow->setsockopt_seq) != msk->setsockopt_seq) {
+ sync_socket_options(msk, ssk);
- cond_resched();
+ subflow->setsockopt_seq = msk->setsockopt_seq;
}
-
- msk->setsockopt_seq = seq;
}
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 24bc9d5e87be..bea47a1180dc 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -388,7 +388,7 @@ static void mptcp_set_connected(struct sock *sk)
if (!sock_owned_by_user(sk))
__mptcp_set_connected(sk);
else
- set_bit(MPTCP_CONNECTED, &mptcp_sk(sk)->flags);
+ __set_bit(MPTCP_CONNECTED, &mptcp_sk(sk)->cb_flags);
mptcp_data_unlock(sk);
}
@@ -845,9 +845,8 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *
bool csum_reqd)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
- struct csum_pseudo_header header;
u32 offset, seq, delta;
- __wsum csum;
+ u16 csum;
int len;
if (!csum_reqd)
@@ -908,13 +907,11 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *
* while the pseudo header requires the original DSS data len,
* including that
*/
- header.data_seq = cpu_to_be64(subflow->map_seq);
- header.subflow_seq = htonl(subflow->map_subflow_seq);
- header.data_len = htons(subflow->map_data_len + subflow->map_data_fin);
- header.csum = 0;
-
- csum = csum_partial(&header, sizeof(header), subflow->map_data_csum);
- if (unlikely(csum_fold(csum))) {
+ csum = __mptcp_make_csum(subflow->map_seq,
+ subflow->map_subflow_seq,
+ subflow->map_data_len + subflow->map_data_fin,
+ subflow->map_data_csum);
+ if (unlikely(csum)) {
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR);
subflow->send_mp_fail = 1;
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPFAILTX);
@@ -1274,7 +1271,7 @@ static void subflow_error_report(struct sock *ssk)
if (!sock_owned_by_user(sk))
__mptcp_error_report(sk);
else
- set_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->flags);
+ __set_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->cb_flags);
mptcp_data_unlock(sk);
}
@@ -1293,7 +1290,6 @@ static void subflow_data_ready(struct sock *sk)
if (reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue))
return;
- set_bit(MPTCP_DATA_READY, &msk->flags);
parent->sk_data_ready(parent);
return;
}
@@ -1442,7 +1438,8 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
subflow->request_bkup = !!(flags & MPTCP_PM_ADDR_FLAG_BACKUP);
mptcp_info2sockaddr(remote, &addr, ssk->sk_family);
- mptcp_add_pending_subflow(msk, subflow);
+ sock_hold(ssk);
+ list_add_tail(&subflow->node, &msk->conn_list);
err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK);
if (err && err != -EINPROGRESS)
goto failed_unlink;
@@ -1453,9 +1450,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
return err;
failed_unlink:
- spin_lock_bh(&msk->join_list_lock);
list_del(&subflow->node);
- spin_unlock_bh(&msk->join_list_lock);
sock_put(mptcp_subflow_tcp_sock(subflow));
failed:
diff --git a/net/mptcp/token.c b/net/mptcp/token.c
index e581b341c5be..f52ee7b26aed 100644
--- a/net/mptcp/token.c
+++ b/net/mptcp/token.c
@@ -384,6 +384,7 @@ void mptcp_token_destroy(struct mptcp_sock *msk)
bucket->chain_len--;
}
spin_unlock_bh(&bucket->lock);
+ WRITE_ONCE(msk->token, 0);
}
void __init mptcp_token_init(void)
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 9bbe7282efb6..5bd409ab4cc2 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -3162,12 +3162,10 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
__be16 proto)
{
struct packet_sock *po = pkt_sk(sk);
- struct net_device *dev_curr;
- __be16 proto_curr;
- bool need_rehook;
struct net_device *dev = NULL;
- int ret = 0;
bool unlisted = false;
+ bool need_rehook;
+ int ret = 0;
lock_sock(sk);
spin_lock(&po->bind_lock);
@@ -3192,14 +3190,10 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
}
}
- dev_hold(dev);
-
- proto_curr = po->prot_hook.type;
- dev_curr = po->prot_hook.dev;
-
- need_rehook = proto_curr != proto || dev_curr != dev;
+ need_rehook = po->prot_hook.type != proto || po->prot_hook.dev != dev;
if (need_rehook) {
+ dev_hold(dev);
if (po->running) {
rcu_read_unlock();
/* prevents packet_notifier() from calling
@@ -3208,7 +3202,6 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
WRITE_ONCE(po->num, 0);
__unregister_prot_hook(sk, true);
rcu_read_lock();
- dev_curr = po->prot_hook.dev;
if (dev)
unlisted = !dev_get_by_index_rcu(sock_net(sk),
dev->ifindex);
@@ -3218,25 +3211,21 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
WRITE_ONCE(po->num, proto);
po->prot_hook.type = proto;
- dev_put_track(dev_curr, &po->prot_hook.dev_tracker);
- dev_curr = NULL;
+ dev_put_track(po->prot_hook.dev, &po->prot_hook.dev_tracker);
if (unlikely(unlisted)) {
- dev_put(dev);
po->prot_hook.dev = NULL;
WRITE_ONCE(po->ifindex, -1);
packet_cached_dev_reset(po);
} else {
- if (dev)
- netdev_tracker_alloc(dev,
- &po->prot_hook.dev_tracker,
- GFP_ATOMIC);
+ dev_hold_track(dev, &po->prot_hook.dev_tracker,
+ GFP_ATOMIC);
po->prot_hook.dev = dev;
WRITE_ONCE(po->ifindex, dev ? dev->ifindex : 0);
packet_cached_dev_assign(po, dev);
}
+ dev_put(dev);
}
- dev_put_track(dev_curr, &po->prot_hook.dev_tracker);
if (proto == 0 || !need_rehook)
goto out_unlock;
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 857aaebd49f4..a43a58a73d09 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -2342,9 +2342,7 @@ static int cake_config_precedence(struct Qdisc *sch)
/* List of known Diffserv codepoints:
*
- * Least Effort (CS1, LE)
- * Best Effort (CS0)
- * Max Reliability & LLT "Lo" (TOS1)
+ * Default Forwarding (DF/CS0) - Best Effort
* Max Throughput (TOS2)
* Min Delay (TOS4)
* LLT "La" (TOS5)
@@ -2352,6 +2350,7 @@ static int cake_config_precedence(struct Qdisc *sch)
* Assured Forwarding 2 (AF2x) - x3
* Assured Forwarding 3 (AF3x) - x3
* Assured Forwarding 4 (AF4x) - x3
+ * Precedence Class 1 (CS1)
* Precedence Class 2 (CS2)
* Precedence Class 3 (CS3)
* Precedence Class 4 (CS4)
@@ -2360,8 +2359,9 @@ static int cake_config_precedence(struct Qdisc *sch)
* Precedence Class 7 (CS7)
* Voice Admit (VA)
* Expedited Forwarding (EF)
-
- * Total 25 codepoints.
+ * Lower Effort (LE)
+ *
+ * Total 26 codepoints.
*/
/* List of traffic classes in RFC 4594, updated by RFC 8622:
@@ -2375,12 +2375,12 @@ static int cake_config_precedence(struct Qdisc *sch)
* Realtime Interactive (CS4) - eg. games
* Multimedia Streaming (AF3x) - eg. YouTube, NetFlix, Twitch
* Broadcast Video (CS3)
- * Low Latency Data (AF2x,TOS4) - eg. database
- * Ops, Admin, Management (CS2,TOS1) - eg. ssh
- * Standard Service (CS0 & unrecognised codepoints)
- * High Throughput Data (AF1x,TOS2) - eg. web traffic
- * Low Priority Data (CS1,LE) - eg. BitTorrent
-
+ * Low-Latency Data (AF2x,TOS4) - eg. database
+ * Ops, Admin, Management (CS2) - eg. ssh
+ * Standard Service (DF & unrecognised codepoints)
+ * High-Throughput Data (AF1x,TOS2) - eg. web traffic
+ * Low-Priority Data (LE,CS1) - eg. BitTorrent
+ *
* Total 12 traffic classes.
*/
@@ -2390,12 +2390,12 @@ static int cake_config_diffserv8(struct Qdisc *sch)
*
* Network Control (CS6, CS7)
* Minimum Latency (EF, VA, CS5, CS4)
- * Interactive Shell (CS2, TOS1)
+ * Interactive Shell (CS2)
* Low Latency Transactions (AF2x, TOS4)
* Video Streaming (AF4x, AF3x, CS3)
- * Bog Standard (CS0 etc.)
- * High Throughput (AF1x, TOS2)
- * Background Traffic (CS1, LE)
+ * Bog Standard (DF etc.)
+ * High Throughput (AF1x, TOS2, CS1)
+ * Background Traffic (LE)
*
* Total 8 traffic classes.
*/
@@ -2437,9 +2437,9 @@ static int cake_config_diffserv4(struct Qdisc *sch)
/* Further pruned list of traffic classes for four-class system:
*
* Latency Sensitive (CS7, CS6, EF, VA, CS5, CS4)
- * Streaming Media (AF4x, AF3x, CS3, AF2x, TOS4, CS2, TOS1)
- * Best Effort (CS0, AF1x, TOS2, and those not specified)
- * Background Traffic (CS1, LE)
+ * Streaming Media (AF4x, AF3x, CS3, AF2x, TOS4, CS2)
+ * Best Effort (DF, AF1x, TOS2, and those not specified)
+ * Background Traffic (LE, CS1)
*
* Total 4 traffic classes.
*/
@@ -2477,9 +2477,9 @@ static int cake_config_diffserv4(struct Qdisc *sch)
static int cake_config_diffserv3(struct Qdisc *sch)
{
/* Simplified Diffserv structure with 3 tins.
- * Low Priority (CS1, LE)
+ * Latency Sensitive (CS7, CS6, EF, VA, TOS4)
* Best Effort
- * Latency Sensitive (TOS4, VA, EF, CS6, CS7)
+ * Low Priority (LE, CS1)
*/
struct cake_sched_data *q = qdisc_priv(sch);
u32 mtu = psched_mtu(qdisc_dev(sch));
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 3f271e29812f..95e774f1b91f 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1990,6 +1990,7 @@ recv_end:
end:
release_sock(sk);
+ sk_defer_free_flush(sk);
if (psock)
sk_psock_put(sk, psock);
return copied ? : err;