Commit | Line | Data |
---|---|---|
1b1c7a0e PK |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* Multipath TCP | |
3 | * | |
4 | * Copyright (c) 2019, Intel Corporation. | |
5 | */ | |
c85adced GT |
6 | #define pr_fmt(fmt) "MPTCP: " fmt |
7 | ||
1b1c7a0e PK |
8 | #include <linux/kernel.h> |
9 | #include <net/tcp.h> | |
10 | #include <net/mptcp.h> | |
11 | #include "protocol.h" | |
12 | ||
fc1b4e3b PA |
13 | #include "mib.h" |
14 | ||
1b1c7a0e PK |
15 | /* path manager command handlers */ |
16 | ||
17 | int mptcp_pm_announce_addr(struct mptcp_sock *msk, | |
6a6c05a8 | 18 | const struct mptcp_addr_info *addr, |
f7efc777 | 19 | bool echo) |
1b1c7a0e | 20 | { |
13ad9f01 | 21 | u8 add_addr = READ_ONCE(msk->pm.addr_signal); |
d91d322a | 22 | |
18fc1a92 | 23 | pr_debug("msk=%p, local_id=%d, echo=%d", msk, addr->id, echo); |
926bdeab | 24 | |
3abc05d9 FW |
25 | lockdep_assert_held(&msk->pm.lock); |
26 | ||
18fc1a92 YL |
27 | if (add_addr & |
28 | (echo ? BIT(MPTCP_ADD_ADDR_ECHO) : BIT(MPTCP_ADD_ADDR_SIGNAL))) { | |
29 | pr_warn("addr_signal error, add_addr=%d, echo=%d", add_addr, echo); | |
42842a42 GT |
30 | return -EINVAL; |
31 | } | |
32 | ||
18fc1a92 YL |
33 | if (echo) { |
34 | msk->pm.remote = *addr; | |
d91d322a | 35 | add_addr |= BIT(MPTCP_ADD_ADDR_ECHO); |
18fc1a92 YL |
36 | } else { |
37 | msk->pm.local = *addr; | |
38 | add_addr |= BIT(MPTCP_ADD_ADDR_SIGNAL); | |
39 | } | |
13ad9f01 | 40 | WRITE_ONCE(msk->pm.addr_signal, add_addr); |
926bdeab | 41 | return 0; |
1b1c7a0e PK |
42 | } |
43 | ||
cbde2787 | 44 | int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list) |
1b1c7a0e | 45 | { |
13ad9f01 | 46 | u8 rm_addr = READ_ONCE(msk->pm.addr_signal); |
42842a42 | 47 | |
cbde2787 | 48 | pr_debug("msk=%p, rm_list_nr=%d", msk, rm_list->nr); |
b6c08380 | 49 | |
42842a42 GT |
50 | if (rm_addr) { |
51 | pr_warn("addr_signal error, rm_addr=%d", rm_addr); | |
52 | return -EINVAL; | |
53 | } | |
54 | ||
cbde2787 | 55 | msk->pm.rm_list_tx = *rm_list; |
42842a42 | 56 | rm_addr |= BIT(MPTCP_RM_ADDR_SIGNAL); |
13ad9f01 | 57 | WRITE_ONCE(msk->pm.addr_signal, rm_addr); |
b46a0238 | 58 | mptcp_pm_nl_addr_send_ack(msk); |
b6c08380 | 59 | return 0; |
1b1c7a0e PK |
60 | } |
61 | ||
ddd14bb8 | 62 | int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list) |
1b1c7a0e | 63 | { |
ddd14bb8 | 64 | pr_debug("msk=%p, rm_list_nr=%d", msk, rm_list->nr); |
0ee4261a GT |
65 | |
66 | spin_lock_bh(&msk->pm.lock); | |
ddd14bb8 | 67 | mptcp_pm_nl_rm_subflow_received(msk, rm_list); |
0ee4261a GT |
68 | spin_unlock_bh(&msk->pm.lock); |
69 | return 0; | |
1b1c7a0e PK |
70 | } |
71 | ||
72 | /* path manager event handlers */ | |
73 | ||
6c714f1b | 74 | void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side) |
1b1c7a0e PK |
75 | { |
76 | struct mptcp_pm_data *pm = &msk->pm; | |
77 | ||
78 | pr_debug("msk=%p, token=%u side=%d", msk, msk->token, server_side); | |
79 | ||
80 | WRITE_ONCE(pm->server_side, server_side); | |
b911c97c | 81 | mptcp_event(MPTCP_EVENT_CREATED, msk, ssk, GFP_ATOMIC); |
1b1c7a0e PK |
82 | } |
83 | ||
84 | bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk) | |
85 | { | |
926bdeab | 86 | struct mptcp_pm_data *pm = &msk->pm; |
a914e586 | 87 | unsigned int subflows_max; |
f58f065a | 88 | int ret = 0; |
926bdeab | 89 | |
a914e586 GT |
90 | subflows_max = mptcp_pm_get_subflows_max(msk); |
91 | ||
926bdeab | 92 | pr_debug("msk=%p subflows=%d max=%d allow=%d", msk, pm->subflows, |
a914e586 | 93 | subflows_max, READ_ONCE(pm->accept_subflow)); |
926bdeab PK |
94 | |
95 | /* try to avoid acquiring the lock below */ | |
96 | if (!READ_ONCE(pm->accept_subflow)) | |
97 | return false; | |
98 | ||
99 | spin_lock_bh(&pm->lock); | |
f58f065a | 100 | if (READ_ONCE(pm->accept_subflow)) { |
a914e586 GT |
101 | ret = pm->subflows < subflows_max; |
102 | if (ret && ++pm->subflows == subflows_max) | |
f58f065a GT |
103 | WRITE_ONCE(pm->accept_subflow, false); |
104 | } | |
926bdeab PK |
105 | spin_unlock_bh(&pm->lock); |
106 | ||
107 | return ret; | |
108 | } | |
109 | ||
110 | /* return true if the new status bit is currently cleared, that is, this event | |
111 | * can be server, eventually by an already scheduled work | |
112 | */ | |
113 | static bool mptcp_pm_schedule_work(struct mptcp_sock *msk, | |
114 | enum mptcp_pm_status new_status) | |
115 | { | |
116 | pr_debug("msk=%p status=%x new=%lx", msk, msk->pm.status, | |
117 | BIT(new_status)); | |
118 | if (msk->pm.status & BIT(new_status)) | |
119 | return false; | |
120 | ||
121 | msk->pm.status |= BIT(new_status); | |
ba8f48f7 | 122 | mptcp_schedule_work((struct sock *)msk); |
926bdeab | 123 | return true; |
1b1c7a0e PK |
124 | } |
125 | ||
6c714f1b | 126 | void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp) |
1b1c7a0e | 127 | { |
926bdeab | 128 | struct mptcp_pm_data *pm = &msk->pm; |
b911c97c | 129 | bool announce = false; |
926bdeab | 130 | |
1b1c7a0e | 131 | pr_debug("msk=%p", msk); |
926bdeab | 132 | |
926bdeab PK |
133 | spin_lock_bh(&pm->lock); |
134 | ||
5b950ff4 PA |
135 | /* mptcp_pm_fully_established() can be invoked by multiple |
136 | * racing paths - accept() and check_fully_established() | |
137 | * be sure to serve this event only once. | |
138 | */ | |
139 | if (READ_ONCE(pm->work_pending) && | |
140 | !(msk->pm.status & BIT(MPTCP_PM_ALREADY_ESTABLISHED))) | |
926bdeab PK |
141 | mptcp_pm_schedule_work(msk, MPTCP_PM_ESTABLISHED); |
142 | ||
b911c97c FW |
143 | if ((msk->pm.status & BIT(MPTCP_PM_ALREADY_ESTABLISHED)) == 0) |
144 | announce = true; | |
145 | ||
146 | msk->pm.status |= BIT(MPTCP_PM_ALREADY_ESTABLISHED); | |
926bdeab | 147 | spin_unlock_bh(&pm->lock); |
b911c97c FW |
148 | |
149 | if (announce) | |
150 | mptcp_event(MPTCP_EVENT_ESTABLISHED, msk, ssk, gfp); | |
1b1c7a0e PK |
151 | } |
152 | ||
153 | void mptcp_pm_connection_closed(struct mptcp_sock *msk) | |
154 | { | |
155 | pr_debug("msk=%p", msk); | |
156 | } | |
157 | ||
62535200 | 158 | void mptcp_pm_subflow_established(struct mptcp_sock *msk) |
1b1c7a0e | 159 | { |
926bdeab PK |
160 | struct mptcp_pm_data *pm = &msk->pm; |
161 | ||
1b1c7a0e | 162 | pr_debug("msk=%p", msk); |
926bdeab PK |
163 | |
164 | if (!READ_ONCE(pm->work_pending)) | |
165 | return; | |
166 | ||
167 | spin_lock_bh(&pm->lock); | |
168 | ||
169 | if (READ_ONCE(pm->work_pending)) | |
170 | mptcp_pm_schedule_work(msk, MPTCP_PM_SUBFLOW_ESTABLISHED); | |
171 | ||
172 | spin_unlock_bh(&pm->lock); | |
1b1c7a0e PK |
173 | } |
174 | ||
175 | void mptcp_pm_subflow_closed(struct mptcp_sock *msk, u8 id) | |
176 | { | |
177 | pr_debug("msk=%p", msk); | |
178 | } | |
179 | ||
180 | void mptcp_pm_add_addr_received(struct mptcp_sock *msk, | |
181 | const struct mptcp_addr_info *addr) | |
182 | { | |
926bdeab PK |
183 | struct mptcp_pm_data *pm = &msk->pm; |
184 | ||
185 | pr_debug("msk=%p remote_id=%d accept=%d", msk, addr->id, | |
186 | READ_ONCE(pm->accept_addr)); | |
187 | ||
b911c97c FW |
188 | mptcp_event_addr_announced(msk, addr); |
189 | ||
926bdeab PK |
190 | spin_lock_bh(&pm->lock); |
191 | ||
84dfe367 | 192 | if (!READ_ONCE(pm->accept_addr)) { |
f7efc777 | 193 | mptcp_pm_announce_addr(msk, addr, true); |
84dfe367 GT |
194 | mptcp_pm_add_addr_send_ack(msk); |
195 | } else if (mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_RECEIVED)) { | |
926bdeab | 196 | pm->remote = *addr; |
84dfe367 | 197 | } |
926bdeab PK |
198 | |
199 | spin_unlock_bh(&pm->lock); | |
84dfe367 GT |
200 | } |
201 | ||
557963c3 GT |
202 | void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk, |
203 | struct mptcp_addr_info *addr) | |
204 | { | |
205 | struct mptcp_pm_data *pm = &msk->pm; | |
206 | ||
207 | pr_debug("msk=%p", msk); | |
208 | ||
209 | spin_lock_bh(&pm->lock); | |
210 | ||
211 | if (mptcp_lookup_anno_list_by_saddr(msk, addr) && READ_ONCE(pm->work_pending)) | |
212 | mptcp_pm_schedule_work(msk, MPTCP_PM_SUBFLOW_ESTABLISHED); | |
213 | ||
214 | spin_unlock_bh(&pm->lock); | |
215 | } | |
216 | ||
84dfe367 GT |
217 | void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk) |
218 | { | |
b5a7acd3 | 219 | if (!mptcp_pm_should_add_signal(msk)) |
84dfe367 GT |
220 | return; |
221 | ||
222 | mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_SEND_ACK); | |
1b1c7a0e PK |
223 | } |
224 | ||
5c4a824d GT |
225 | void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, |
226 | const struct mptcp_rm_list *rm_list) | |
d0876b22 GT |
227 | { |
228 | struct mptcp_pm_data *pm = &msk->pm; | |
5c4a824d | 229 | u8 i; |
d0876b22 | 230 | |
5c4a824d | 231 | pr_debug("msk=%p remote_ids_nr=%d", msk, rm_list->nr); |
d0876b22 | 232 | |
5c4a824d GT |
233 | for (i = 0; i < rm_list->nr; i++) |
234 | mptcp_event_addr_removed(msk, rm_list->ids[i]); | |
b911c97c | 235 | |
d0876b22 GT |
236 | spin_lock_bh(&pm->lock); |
237 | mptcp_pm_schedule_work(msk, MPTCP_PM_RM_ADDR_RECEIVED); | |
b5c55f33 | 238 | pm->rm_list_rx = *rm_list; |
d0876b22 GT |
239 | spin_unlock_bh(&pm->lock); |
240 | } | |
241 | ||
40453a5c GT |
242 | void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup) |
243 | { | |
244 | struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); | |
245 | ||
246 | pr_debug("subflow->backup=%d, bkup=%d\n", subflow->backup, bkup); | |
247 | subflow->backup = bkup; | |
b911c97c FW |
248 | |
249 | mptcp_event(MPTCP_EVENT_SUB_PRIORITY, mptcp_sk(subflow->conn), sk, GFP_ATOMIC); | |
40453a5c GT |
250 | } |
251 | ||
1b1c7a0e PK |
252 | /* path manager helpers */ |
253 | ||
1f5e9e2f YL |
254 | bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, struct sk_buff *skb, |
255 | unsigned int opt_size, unsigned int remaining, | |
f462a446 | 256 | struct mptcp_addr_info *addr, bool *echo, |
1f5e9e2f | 257 | bool *port, bool *drop_other_suboptions) |
1b1c7a0e | 258 | { |
926bdeab | 259 | int ret = false; |
119c0220 | 260 | u8 add_addr; |
f462a446 | 261 | u8 family; |
926bdeab PK |
262 | |
263 | spin_lock_bh(&msk->pm.lock); | |
264 | ||
265 | /* double check after the lock is acquired */ | |
f643b803 | 266 | if (!mptcp_pm_should_add_signal(msk)) |
926bdeab PK |
267 | goto out_unlock; |
268 | ||
1f5e9e2f YL |
269 | /* always drop every other options for pure ack ADD_ADDR; this is a |
270 | * plain dup-ack from TCP perspective. The other MPTCP-relevant info, | |
271 | * if any, will be carried by the 'original' TCP ack | |
272 | */ | |
273 | if (skb && skb_is_tcp_pure_ack(skb)) { | |
274 | remaining += opt_size; | |
275 | *drop_other_suboptions = true; | |
276 | } | |
277 | ||
d91d322a | 278 | *echo = mptcp_pm_should_add_signal_echo(msk); |
c233ef13 | 279 | *port = !!(*echo ? msk->pm.remote.port : msk->pm.local.port); |
456afe01 | 280 | |
f462a446 YL |
281 | family = *echo ? msk->pm.remote.family : msk->pm.local.family; |
282 | if (remaining < mptcp_add_addr_len(family, *echo, *port)) | |
926bdeab PK |
283 | goto out_unlock; |
284 | ||
f462a446 YL |
285 | if (*echo) { |
286 | *addr = msk->pm.remote; | |
119c0220 | 287 | add_addr = msk->pm.addr_signal & ~BIT(MPTCP_ADD_ADDR_ECHO); |
f462a446 YL |
288 | } else { |
289 | *addr = msk->pm.local; | |
119c0220 | 290 | add_addr = msk->pm.addr_signal & ~BIT(MPTCP_ADD_ADDR_SIGNAL); |
f462a446 | 291 | } |
119c0220 | 292 | WRITE_ONCE(msk->pm.addr_signal, add_addr); |
926bdeab PK |
293 | ret = true; |
294 | ||
295 | out_unlock: | |
296 | spin_unlock_bh(&msk->pm.lock); | |
297 | return ret; | |
1b1c7a0e PK |
298 | } |
299 | ||
5cb104ae | 300 | bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, |
6445e17a | 301 | struct mptcp_rm_list *rm_list) |
5cb104ae | 302 | { |
cbde2787 | 303 | int ret = false, len; |
119c0220 | 304 | u8 rm_addr; |
5cb104ae GT |
305 | |
306 | spin_lock_bh(&msk->pm.lock); | |
307 | ||
308 | /* double check after the lock is acquired */ | |
309 | if (!mptcp_pm_should_rm_signal(msk)) | |
310 | goto out_unlock; | |
311 | ||
119c0220 | 312 | rm_addr = msk->pm.addr_signal & ~BIT(MPTCP_RM_ADDR_SIGNAL); |
cbde2787 GT |
313 | len = mptcp_rm_addr_len(&msk->pm.rm_list_tx); |
314 | if (len < 0) { | |
119c0220 | 315 | WRITE_ONCE(msk->pm.addr_signal, rm_addr); |
cbde2787 GT |
316 | goto out_unlock; |
317 | } | |
318 | if (remaining < len) | |
5cb104ae GT |
319 | goto out_unlock; |
320 | ||
cbde2787 | 321 | *rm_list = msk->pm.rm_list_tx; |
119c0220 | 322 | WRITE_ONCE(msk->pm.addr_signal, rm_addr); |
5cb104ae GT |
323 | ret = true; |
324 | ||
325 | out_unlock: | |
326 | spin_unlock_bh(&msk->pm.lock); | |
327 | return ret; | |
328 | } | |
329 | ||
1b1c7a0e PK |
330 | int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) |
331 | { | |
01cacb00 | 332 | return mptcp_pm_nl_get_local_id(msk, skc); |
1b1c7a0e PK |
333 | } |
334 | ||
71b7dec2 PA |
335 | void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) |
336 | { | |
337 | struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); | |
338 | u32 rcv_tstamp = READ_ONCE(tcp_sk(ssk)->rcv_tstamp); | |
339 | ||
340 | /* keep track of rtx periods with no progress */ | |
341 | if (!subflow->stale_count) { | |
342 | subflow->stale_rcv_tstamp = rcv_tstamp; | |
343 | subflow->stale_count++; | |
344 | } else if (subflow->stale_rcv_tstamp == rcv_tstamp) { | |
345 | if (subflow->stale_count < U8_MAX) | |
346 | subflow->stale_count++; | |
ff5a0b42 | 347 | mptcp_pm_nl_subflow_chk_stale(msk, ssk); |
71b7dec2 PA |
348 | } else { |
349 | subflow->stale_count = 0; | |
ff5a0b42 | 350 | mptcp_subflow_set_active(subflow); |
71b7dec2 PA |
351 | } |
352 | } | |
353 | ||
1b1c7a0e PK |
354 | void mptcp_pm_data_init(struct mptcp_sock *msk) |
355 | { | |
356 | msk->pm.add_addr_signaled = 0; | |
357 | msk->pm.add_addr_accepted = 0; | |
358 | msk->pm.local_addr_used = 0; | |
359 | msk->pm.subflows = 0; | |
cbde2787 | 360 | msk->pm.rm_list_tx.nr = 0; |
b5c55f33 | 361 | msk->pm.rm_list_rx.nr = 0; |
1b1c7a0e | 362 | WRITE_ONCE(msk->pm.work_pending, false); |
13ad9f01 | 363 | WRITE_ONCE(msk->pm.addr_signal, 0); |
1b1c7a0e PK |
364 | WRITE_ONCE(msk->pm.accept_addr, false); |
365 | WRITE_ONCE(msk->pm.accept_subflow, false); | |
df377be3 | 366 | WRITE_ONCE(msk->pm.remote_deny_join_id0, false); |
1b1c7a0e PK |
367 | msk->pm.status = 0; |
368 | ||
369 | spin_lock_init(&msk->pm.lock); | |
b6c08380 | 370 | INIT_LIST_HEAD(&msk->pm.anno_list); |
01cacb00 PA |
371 | |
372 | mptcp_pm_nl_data_init(msk); | |
1b1c7a0e PK |
373 | } |
374 | ||
d39dceca | 375 | void __init mptcp_pm_init(void) |
1b1c7a0e | 376 | { |
01cacb00 | 377 | mptcp_pm_nl_init(); |
1b1c7a0e | 378 | } |