Commit | Line | Data |
---|---|---|
1b1c7a0e PK |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* Multipath TCP | |
3 | * | |
4 | * Copyright (c) 2019, Intel Corporation. | |
5 | */ | |
c85adced GT |
6 | #define pr_fmt(fmt) "MPTCP: " fmt |
7 | ||
1b1c7a0e PK |
8 | #include <linux/kernel.h> |
9 | #include <net/tcp.h> | |
10 | #include <net/mptcp.h> | |
11 | #include "protocol.h" | |
12 | ||
fc1b4e3b PA |
13 | #include "mib.h" |
14 | ||
1b1c7a0e PK |
15 | /* path manager command handlers */ |
16 | ||
17 | int mptcp_pm_announce_addr(struct mptcp_sock *msk, | |
6a6c05a8 | 18 | const struct mptcp_addr_info *addr, |
f7efc777 | 19 | bool echo) |
1b1c7a0e | 20 | { |
13ad9f01 | 21 | u8 add_addr = READ_ONCE(msk->pm.addr_signal); |
d91d322a | 22 | |
18fc1a92 | 23 | pr_debug("msk=%p, local_id=%d, echo=%d", msk, addr->id, echo); |
926bdeab | 24 | |
3abc05d9 FW |
25 | lockdep_assert_held(&msk->pm.lock); |
26 | ||
18fc1a92 YL |
27 | if (add_addr & |
28 | (echo ? BIT(MPTCP_ADD_ADDR_ECHO) : BIT(MPTCP_ADD_ADDR_SIGNAL))) { | |
29 | pr_warn("addr_signal error, add_addr=%d, echo=%d", add_addr, echo); | |
42842a42 GT |
30 | return -EINVAL; |
31 | } | |
32 | ||
18fc1a92 YL |
33 | if (echo) { |
34 | msk->pm.remote = *addr; | |
d91d322a | 35 | add_addr |= BIT(MPTCP_ADD_ADDR_ECHO); |
18fc1a92 YL |
36 | } else { |
37 | msk->pm.local = *addr; | |
38 | add_addr |= BIT(MPTCP_ADD_ADDR_SIGNAL); | |
39 | } | |
13ad9f01 | 40 | WRITE_ONCE(msk->pm.addr_signal, add_addr); |
926bdeab | 41 | return 0; |
1b1c7a0e PK |
42 | } |
43 | ||
cbde2787 | 44 | int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list) |
1b1c7a0e | 45 | { |
13ad9f01 | 46 | u8 rm_addr = READ_ONCE(msk->pm.addr_signal); |
42842a42 | 47 | |
cbde2787 | 48 | pr_debug("msk=%p, rm_list_nr=%d", msk, rm_list->nr); |
b6c08380 | 49 | |
42842a42 GT |
50 | if (rm_addr) { |
51 | pr_warn("addr_signal error, rm_addr=%d", rm_addr); | |
52 | return -EINVAL; | |
53 | } | |
54 | ||
cbde2787 | 55 | msk->pm.rm_list_tx = *rm_list; |
42842a42 | 56 | rm_addr |= BIT(MPTCP_RM_ADDR_SIGNAL); |
13ad9f01 | 57 | WRITE_ONCE(msk->pm.addr_signal, rm_addr); |
b46a0238 | 58 | mptcp_pm_nl_addr_send_ack(msk); |
b6c08380 | 59 | return 0; |
1b1c7a0e PK |
60 | } |
61 | ||
ddd14bb8 | 62 | int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list) |
1b1c7a0e | 63 | { |
ddd14bb8 | 64 | pr_debug("msk=%p, rm_list_nr=%d", msk, rm_list->nr); |
0ee4261a GT |
65 | |
66 | spin_lock_bh(&msk->pm.lock); | |
ddd14bb8 | 67 | mptcp_pm_nl_rm_subflow_received(msk, rm_list); |
0ee4261a GT |
68 | spin_unlock_bh(&msk->pm.lock); |
69 | return 0; | |
1b1c7a0e PK |
70 | } |
71 | ||
72 | /* path manager event handlers */ | |
73 | ||
6c714f1b | 74 | void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side) |
1b1c7a0e PK |
75 | { |
76 | struct mptcp_pm_data *pm = &msk->pm; | |
77 | ||
78 | pr_debug("msk=%p, token=%u side=%d", msk, msk->token, server_side); | |
79 | ||
80 | WRITE_ONCE(pm->server_side, server_side); | |
b911c97c | 81 | mptcp_event(MPTCP_EVENT_CREATED, msk, ssk, GFP_ATOMIC); |
1b1c7a0e PK |
82 | } |
83 | ||
84 | bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk) | |
85 | { | |
926bdeab | 86 | struct mptcp_pm_data *pm = &msk->pm; |
a914e586 | 87 | unsigned int subflows_max; |
f58f065a | 88 | int ret = 0; |
926bdeab | 89 | |
a914e586 GT |
90 | subflows_max = mptcp_pm_get_subflows_max(msk); |
91 | ||
926bdeab | 92 | pr_debug("msk=%p subflows=%d max=%d allow=%d", msk, pm->subflows, |
a914e586 | 93 | subflows_max, READ_ONCE(pm->accept_subflow)); |
926bdeab PK |
94 | |
95 | /* try to avoid acquiring the lock below */ | |
96 | if (!READ_ONCE(pm->accept_subflow)) | |
97 | return false; | |
98 | ||
99 | spin_lock_bh(&pm->lock); | |
f58f065a | 100 | if (READ_ONCE(pm->accept_subflow)) { |
a914e586 GT |
101 | ret = pm->subflows < subflows_max; |
102 | if (ret && ++pm->subflows == subflows_max) | |
f58f065a GT |
103 | WRITE_ONCE(pm->accept_subflow, false); |
104 | } | |
926bdeab PK |
105 | spin_unlock_bh(&pm->lock); |
106 | ||
107 | return ret; | |
108 | } | |
109 | ||
110 | /* return true if the new status bit is currently cleared, that is, this event | |
111 | * can be server, eventually by an already scheduled work | |
112 | */ | |
113 | static bool mptcp_pm_schedule_work(struct mptcp_sock *msk, | |
114 | enum mptcp_pm_status new_status) | |
115 | { | |
116 | pr_debug("msk=%p status=%x new=%lx", msk, msk->pm.status, | |
117 | BIT(new_status)); | |
118 | if (msk->pm.status & BIT(new_status)) | |
119 | return false; | |
120 | ||
121 | msk->pm.status |= BIT(new_status); | |
ba8f48f7 | 122 | mptcp_schedule_work((struct sock *)msk); |
926bdeab | 123 | return true; |
1b1c7a0e PK |
124 | } |
125 | ||
6c714f1b | 126 | void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp) |
1b1c7a0e | 127 | { |
926bdeab | 128 | struct mptcp_pm_data *pm = &msk->pm; |
b911c97c | 129 | bool announce = false; |
926bdeab | 130 | |
1b1c7a0e | 131 | pr_debug("msk=%p", msk); |
926bdeab | 132 | |
926bdeab PK |
133 | spin_lock_bh(&pm->lock); |
134 | ||
5b950ff4 PA |
135 | /* mptcp_pm_fully_established() can be invoked by multiple |
136 | * racing paths - accept() and check_fully_established() | |
137 | * be sure to serve this event only once. | |
138 | */ | |
139 | if (READ_ONCE(pm->work_pending) && | |
140 | !(msk->pm.status & BIT(MPTCP_PM_ALREADY_ESTABLISHED))) | |
926bdeab PK |
141 | mptcp_pm_schedule_work(msk, MPTCP_PM_ESTABLISHED); |
142 | ||
b911c97c FW |
143 | if ((msk->pm.status & BIT(MPTCP_PM_ALREADY_ESTABLISHED)) == 0) |
144 | announce = true; | |
145 | ||
146 | msk->pm.status |= BIT(MPTCP_PM_ALREADY_ESTABLISHED); | |
926bdeab | 147 | spin_unlock_bh(&pm->lock); |
b911c97c FW |
148 | |
149 | if (announce) | |
150 | mptcp_event(MPTCP_EVENT_ESTABLISHED, msk, ssk, gfp); | |
1b1c7a0e PK |
151 | } |
152 | ||
153 | void mptcp_pm_connection_closed(struct mptcp_sock *msk) | |
154 | { | |
155 | pr_debug("msk=%p", msk); | |
156 | } | |
157 | ||
62535200 | 158 | void mptcp_pm_subflow_established(struct mptcp_sock *msk) |
1b1c7a0e | 159 | { |
926bdeab PK |
160 | struct mptcp_pm_data *pm = &msk->pm; |
161 | ||
1b1c7a0e | 162 | pr_debug("msk=%p", msk); |
926bdeab PK |
163 | |
164 | if (!READ_ONCE(pm->work_pending)) | |
165 | return; | |
166 | ||
167 | spin_lock_bh(&pm->lock); | |
168 | ||
169 | if (READ_ONCE(pm->work_pending)) | |
170 | mptcp_pm_schedule_work(msk, MPTCP_PM_SUBFLOW_ESTABLISHED); | |
171 | ||
172 | spin_unlock_bh(&pm->lock); | |
1b1c7a0e PK |
173 | } |
174 | ||
a88c9e49 PA |
175 | void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk, |
176 | const struct mptcp_subflow_context *subflow) | |
1b1c7a0e | 177 | { |
a88c9e49 PA |
178 | struct mptcp_pm_data *pm = &msk->pm; |
179 | bool update_subflows; | |
180 | ||
181 | update_subflows = (ssk->sk_state == TCP_CLOSE) && | |
182 | (subflow->request_join || subflow->mp_join); | |
183 | if (!READ_ONCE(pm->work_pending) && !update_subflows) | |
184 | return; | |
185 | ||
186 | spin_lock_bh(&pm->lock); | |
187 | if (update_subflows) | |
188 | pm->subflows--; | |
189 | ||
190 | /* Even if this subflow is not really established, tell the PM to try | |
191 | * to pick the next ones, if possible. | |
192 | */ | |
193 | if (mptcp_pm_nl_check_work_pending(msk)) | |
194 | mptcp_pm_schedule_work(msk, MPTCP_PM_SUBFLOW_ESTABLISHED); | |
195 | ||
196 | spin_unlock_bh(&pm->lock); | |
1b1c7a0e PK |
197 | } |
198 | ||
199 | void mptcp_pm_add_addr_received(struct mptcp_sock *msk, | |
200 | const struct mptcp_addr_info *addr) | |
201 | { | |
926bdeab PK |
202 | struct mptcp_pm_data *pm = &msk->pm; |
203 | ||
204 | pr_debug("msk=%p remote_id=%d accept=%d", msk, addr->id, | |
205 | READ_ONCE(pm->accept_addr)); | |
206 | ||
b911c97c FW |
207 | mptcp_event_addr_announced(msk, addr); |
208 | ||
926bdeab PK |
209 | spin_lock_bh(&pm->lock); |
210 | ||
84dfe367 | 211 | if (!READ_ONCE(pm->accept_addr)) { |
f7efc777 | 212 | mptcp_pm_announce_addr(msk, addr, true); |
84dfe367 GT |
213 | mptcp_pm_add_addr_send_ack(msk); |
214 | } else if (mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_RECEIVED)) { | |
926bdeab | 215 | pm->remote = *addr; |
f73c1194 PA |
216 | } else { |
217 | __MPTCP_INC_STATS(sock_net((struct sock *)msk), MPTCP_MIB_ADDADDRDROP); | |
84dfe367 | 218 | } |
926bdeab PK |
219 | |
220 | spin_unlock_bh(&pm->lock); | |
84dfe367 GT |
221 | } |
222 | ||
557963c3 | 223 | void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk, |
90d93088 | 224 | const struct mptcp_addr_info *addr) |
557963c3 GT |
225 | { |
226 | struct mptcp_pm_data *pm = &msk->pm; | |
227 | ||
228 | pr_debug("msk=%p", msk); | |
229 | ||
230 | spin_lock_bh(&pm->lock); | |
231 | ||
232 | if (mptcp_lookup_anno_list_by_saddr(msk, addr) && READ_ONCE(pm->work_pending)) | |
233 | mptcp_pm_schedule_work(msk, MPTCP_PM_SUBFLOW_ESTABLISHED); | |
234 | ||
235 | spin_unlock_bh(&pm->lock); | |
236 | } | |
237 | ||
84dfe367 GT |
238 | void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk) |
239 | { | |
b5a7acd3 | 240 | if (!mptcp_pm_should_add_signal(msk)) |
84dfe367 GT |
241 | return; |
242 | ||
243 | mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_SEND_ACK); | |
1b1c7a0e PK |
244 | } |
245 | ||
5c4a824d GT |
246 | void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, |
247 | const struct mptcp_rm_list *rm_list) | |
d0876b22 GT |
248 | { |
249 | struct mptcp_pm_data *pm = &msk->pm; | |
5c4a824d | 250 | u8 i; |
d0876b22 | 251 | |
5c4a824d | 252 | pr_debug("msk=%p remote_ids_nr=%d", msk, rm_list->nr); |
d0876b22 | 253 | |
5c4a824d GT |
254 | for (i = 0; i < rm_list->nr; i++) |
255 | mptcp_event_addr_removed(msk, rm_list->ids[i]); | |
b911c97c | 256 | |
d0876b22 | 257 | spin_lock_bh(&pm->lock); |
f73c1194 PA |
258 | if (mptcp_pm_schedule_work(msk, MPTCP_PM_RM_ADDR_RECEIVED)) |
259 | pm->rm_list_rx = *rm_list; | |
260 | else | |
261 | __MPTCP_INC_STATS(sock_net((struct sock *)msk), MPTCP_MIB_RMADDRDROP); | |
d0876b22 GT |
262 | spin_unlock_bh(&pm->lock); |
263 | } | |
264 | ||
43f5b111 | 265 | void mptcp_pm_mp_prio_received(struct sock *ssk, u8 bkup) |
40453a5c | 266 | { |
43f5b111 PA |
267 | struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); |
268 | struct sock *sk = subflow->conn; | |
269 | struct mptcp_sock *msk; | |
40453a5c GT |
270 | |
271 | pr_debug("subflow->backup=%d, bkup=%d\n", subflow->backup, bkup); | |
43f5b111 PA |
272 | msk = mptcp_sk(sk); |
273 | if (subflow->backup != bkup) { | |
274 | subflow->backup = bkup; | |
275 | mptcp_data_lock(sk); | |
276 | if (!sock_owned_by_user(sk)) | |
277 | msk->last_snd = NULL; | |
278 | else | |
279 | __set_bit(MPTCP_RESET_SCHEDULER, &msk->cb_flags); | |
280 | mptcp_data_unlock(sk); | |
281 | } | |
b911c97c | 282 | |
43f5b111 | 283 | mptcp_event(MPTCP_EVENT_SUB_PRIORITY, msk, ssk, GFP_ATOMIC); |
40453a5c GT |
284 | } |
285 | ||
5580d41b GT |
286 | void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq) |
287 | { | |
1e39e5a3 GT |
288 | struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); |
289 | struct mptcp_sock *msk = mptcp_sk(subflow->conn); | |
290 | ||
5580d41b | 291 | pr_debug("fail_seq=%llu", fail_seq); |
1e39e5a3 GT |
292 | |
293 | if (!mptcp_has_another_subflow(sk) && READ_ONCE(msk->allow_infinite_fallback)) | |
294 | subflow->send_infinite_map = 1; | |
5580d41b GT |
295 | } |
296 | ||
1b1c7a0e PK |
297 | /* path manager helpers */ |
298 | ||
90d93088 | 299 | bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const struct sk_buff *skb, |
1f5e9e2f | 300 | unsigned int opt_size, unsigned int remaining, |
f462a446 | 301 | struct mptcp_addr_info *addr, bool *echo, |
af7939f3 | 302 | bool *drop_other_suboptions) |
1b1c7a0e | 303 | { |
926bdeab | 304 | int ret = false; |
119c0220 | 305 | u8 add_addr; |
f462a446 | 306 | u8 family; |
af7939f3 | 307 | bool port; |
926bdeab PK |
308 | |
309 | spin_lock_bh(&msk->pm.lock); | |
310 | ||
311 | /* double check after the lock is acquired */ | |
f643b803 | 312 | if (!mptcp_pm_should_add_signal(msk)) |
926bdeab PK |
313 | goto out_unlock; |
314 | ||
1f5e9e2f YL |
315 | /* always drop every other options for pure ack ADD_ADDR; this is a |
316 | * plain dup-ack from TCP perspective. The other MPTCP-relevant info, | |
317 | * if any, will be carried by the 'original' TCP ack | |
318 | */ | |
319 | if (skb && skb_is_tcp_pure_ack(skb)) { | |
320 | remaining += opt_size; | |
321 | *drop_other_suboptions = true; | |
322 | } | |
323 | ||
d91d322a | 324 | *echo = mptcp_pm_should_add_signal_echo(msk); |
af7939f3 | 325 | port = !!(*echo ? msk->pm.remote.port : msk->pm.local.port); |
456afe01 | 326 | |
f462a446 | 327 | family = *echo ? msk->pm.remote.family : msk->pm.local.family; |
af7939f3 | 328 | if (remaining < mptcp_add_addr_len(family, *echo, port)) |
926bdeab PK |
329 | goto out_unlock; |
330 | ||
f462a446 YL |
331 | if (*echo) { |
332 | *addr = msk->pm.remote; | |
119c0220 | 333 | add_addr = msk->pm.addr_signal & ~BIT(MPTCP_ADD_ADDR_ECHO); |
f462a446 YL |
334 | } else { |
335 | *addr = msk->pm.local; | |
119c0220 | 336 | add_addr = msk->pm.addr_signal & ~BIT(MPTCP_ADD_ADDR_SIGNAL); |
f462a446 | 337 | } |
119c0220 | 338 | WRITE_ONCE(msk->pm.addr_signal, add_addr); |
926bdeab PK |
339 | ret = true; |
340 | ||
341 | out_unlock: | |
342 | spin_unlock_bh(&msk->pm.lock); | |
343 | return ret; | |
1b1c7a0e PK |
344 | } |
345 | ||
5cb104ae | 346 | bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, |
6445e17a | 347 | struct mptcp_rm_list *rm_list) |
5cb104ae | 348 | { |
cbde2787 | 349 | int ret = false, len; |
119c0220 | 350 | u8 rm_addr; |
5cb104ae GT |
351 | |
352 | spin_lock_bh(&msk->pm.lock); | |
353 | ||
354 | /* double check after the lock is acquired */ | |
355 | if (!mptcp_pm_should_rm_signal(msk)) | |
356 | goto out_unlock; | |
357 | ||
119c0220 | 358 | rm_addr = msk->pm.addr_signal & ~BIT(MPTCP_RM_ADDR_SIGNAL); |
cbde2787 GT |
359 | len = mptcp_rm_addr_len(&msk->pm.rm_list_tx); |
360 | if (len < 0) { | |
119c0220 | 361 | WRITE_ONCE(msk->pm.addr_signal, rm_addr); |
cbde2787 GT |
362 | goto out_unlock; |
363 | } | |
364 | if (remaining < len) | |
5cb104ae GT |
365 | goto out_unlock; |
366 | ||
cbde2787 | 367 | *rm_list = msk->pm.rm_list_tx; |
119c0220 | 368 | WRITE_ONCE(msk->pm.addr_signal, rm_addr); |
5cb104ae GT |
369 | ret = true; |
370 | ||
371 | out_unlock: | |
372 | spin_unlock_bh(&msk->pm.lock); | |
373 | return ret; | |
374 | } | |
375 | ||
1b1c7a0e PK |
376 | int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) |
377 | { | |
01cacb00 | 378 | return mptcp_pm_nl_get_local_id(msk, skc); |
1b1c7a0e PK |
379 | } |
380 | ||
71b7dec2 PA |
381 | void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) |
382 | { | |
383 | struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); | |
384 | u32 rcv_tstamp = READ_ONCE(tcp_sk(ssk)->rcv_tstamp); | |
385 | ||
386 | /* keep track of rtx periods with no progress */ | |
387 | if (!subflow->stale_count) { | |
388 | subflow->stale_rcv_tstamp = rcv_tstamp; | |
389 | subflow->stale_count++; | |
390 | } else if (subflow->stale_rcv_tstamp == rcv_tstamp) { | |
391 | if (subflow->stale_count < U8_MAX) | |
392 | subflow->stale_count++; | |
ff5a0b42 | 393 | mptcp_pm_nl_subflow_chk_stale(msk, ssk); |
71b7dec2 PA |
394 | } else { |
395 | subflow->stale_count = 0; | |
ff5a0b42 | 396 | mptcp_subflow_set_active(subflow); |
71b7dec2 PA |
397 | } |
398 | } | |
399 | ||
b29fcfb5 | 400 | void mptcp_pm_data_reset(struct mptcp_sock *msk) |
1b1c7a0e PK |
401 | { |
402 | msk->pm.add_addr_signaled = 0; | |
403 | msk->pm.add_addr_accepted = 0; | |
404 | msk->pm.local_addr_used = 0; | |
405 | msk->pm.subflows = 0; | |
cbde2787 | 406 | msk->pm.rm_list_tx.nr = 0; |
b5c55f33 | 407 | msk->pm.rm_list_rx.nr = 0; |
1b1c7a0e | 408 | WRITE_ONCE(msk->pm.work_pending, false); |
13ad9f01 | 409 | WRITE_ONCE(msk->pm.addr_signal, 0); |
1b1c7a0e PK |
410 | WRITE_ONCE(msk->pm.accept_addr, false); |
411 | WRITE_ONCE(msk->pm.accept_subflow, false); | |
df377be3 | 412 | WRITE_ONCE(msk->pm.remote_deny_join_id0, false); |
1b1c7a0e | 413 | msk->pm.status = 0; |
86e39e04 | 414 | bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); |
1b1c7a0e | 415 | |
b29fcfb5 PA |
416 | mptcp_pm_nl_data_init(msk); |
417 | } | |
418 | ||
419 | void mptcp_pm_data_init(struct mptcp_sock *msk) | |
420 | { | |
1b1c7a0e | 421 | spin_lock_init(&msk->pm.lock); |
b6c08380 | 422 | INIT_LIST_HEAD(&msk->pm.anno_list); |
b29fcfb5 | 423 | mptcp_pm_data_reset(msk); |
1b1c7a0e PK |
424 | } |
425 | ||
d39dceca | 426 | void __init mptcp_pm_init(void) |
1b1c7a0e | 427 | { |
01cacb00 | 428 | mptcp_pm_nl_init(); |
1b1c7a0e | 429 | } |