Commit | Line | Data |
---|---|---|
1b1c7a0e PK |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* Multipath TCP | |
3 | * | |
4 | * Copyright (c) 2019, Intel Corporation. | |
5 | */ | |
c85adced GT |
6 | #define pr_fmt(fmt) "MPTCP: " fmt |
7 | ||
1b1c7a0e PK |
8 | #include <linux/kernel.h> |
9 | #include <net/tcp.h> | |
10 | #include <net/mptcp.h> | |
11 | #include "protocol.h" | |
12 | ||
fc1b4e3b PA |
13 | #include "mib.h" |
14 | ||
1b1c7a0e PK |
15 | /* path manager command handlers */ |
16 | ||
17 | int mptcp_pm_announce_addr(struct mptcp_sock *msk, | |
6a6c05a8 | 18 | const struct mptcp_addr_info *addr, |
f7efc777 | 19 | bool echo) |
1b1c7a0e | 20 | { |
13ad9f01 | 21 | u8 add_addr = READ_ONCE(msk->pm.addr_signal); |
d91d322a | 22 | |
18fc1a92 | 23 | pr_debug("msk=%p, local_id=%d, echo=%d", msk, addr->id, echo); |
926bdeab | 24 | |
3abc05d9 FW |
25 | lockdep_assert_held(&msk->pm.lock); |
26 | ||
18fc1a92 YL |
27 | if (add_addr & |
28 | (echo ? BIT(MPTCP_ADD_ADDR_ECHO) : BIT(MPTCP_ADD_ADDR_SIGNAL))) { | |
29 | pr_warn("addr_signal error, add_addr=%d, echo=%d", add_addr, echo); | |
42842a42 GT |
30 | return -EINVAL; |
31 | } | |
32 | ||
18fc1a92 YL |
33 | if (echo) { |
34 | msk->pm.remote = *addr; | |
d91d322a | 35 | add_addr |= BIT(MPTCP_ADD_ADDR_ECHO); |
18fc1a92 YL |
36 | } else { |
37 | msk->pm.local = *addr; | |
38 | add_addr |= BIT(MPTCP_ADD_ADDR_SIGNAL); | |
39 | } | |
13ad9f01 | 40 | WRITE_ONCE(msk->pm.addr_signal, add_addr); |
926bdeab | 41 | return 0; |
1b1c7a0e PK |
42 | } |
43 | ||
cbde2787 | 44 | int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list) |
1b1c7a0e | 45 | { |
13ad9f01 | 46 | u8 rm_addr = READ_ONCE(msk->pm.addr_signal); |
42842a42 | 47 | |
cbde2787 | 48 | pr_debug("msk=%p, rm_list_nr=%d", msk, rm_list->nr); |
b6c08380 | 49 | |
42842a42 GT |
50 | if (rm_addr) { |
51 | pr_warn("addr_signal error, rm_addr=%d", rm_addr); | |
52 | return -EINVAL; | |
53 | } | |
54 | ||
cbde2787 | 55 | msk->pm.rm_list_tx = *rm_list; |
42842a42 | 56 | rm_addr |= BIT(MPTCP_RM_ADDR_SIGNAL); |
13ad9f01 | 57 | WRITE_ONCE(msk->pm.addr_signal, rm_addr); |
b46a0238 | 58 | mptcp_pm_nl_addr_send_ack(msk); |
b6c08380 | 59 | return 0; |
1b1c7a0e PK |
60 | } |
61 | ||
ddd14bb8 | 62 | int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list) |
1b1c7a0e | 63 | { |
ddd14bb8 | 64 | pr_debug("msk=%p, rm_list_nr=%d", msk, rm_list->nr); |
0ee4261a GT |
65 | |
66 | spin_lock_bh(&msk->pm.lock); | |
ddd14bb8 | 67 | mptcp_pm_nl_rm_subflow_received(msk, rm_list); |
0ee4261a GT |
68 | spin_unlock_bh(&msk->pm.lock); |
69 | return 0; | |
1b1c7a0e PK |
70 | } |
71 | ||
72 | /* path manager event handlers */ | |
73 | ||
6c714f1b | 74 | void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side) |
1b1c7a0e PK |
75 | { |
76 | struct mptcp_pm_data *pm = &msk->pm; | |
77 | ||
78 | pr_debug("msk=%p, token=%u side=%d", msk, msk->token, server_side); | |
79 | ||
80 | WRITE_ONCE(pm->server_side, server_side); | |
b911c97c | 81 | mptcp_event(MPTCP_EVENT_CREATED, msk, ssk, GFP_ATOMIC); |
1b1c7a0e PK |
82 | } |
83 | ||
84 | bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk) | |
85 | { | |
926bdeab | 86 | struct mptcp_pm_data *pm = &msk->pm; |
a914e586 | 87 | unsigned int subflows_max; |
f58f065a | 88 | int ret = 0; |
926bdeab | 89 | |
a914e586 GT |
90 | subflows_max = mptcp_pm_get_subflows_max(msk); |
91 | ||
926bdeab | 92 | pr_debug("msk=%p subflows=%d max=%d allow=%d", msk, pm->subflows, |
a914e586 | 93 | subflows_max, READ_ONCE(pm->accept_subflow)); |
926bdeab PK |
94 | |
95 | /* try to avoid acquiring the lock below */ | |
96 | if (!READ_ONCE(pm->accept_subflow)) | |
97 | return false; | |
98 | ||
99 | spin_lock_bh(&pm->lock); | |
f58f065a | 100 | if (READ_ONCE(pm->accept_subflow)) { |
a914e586 GT |
101 | ret = pm->subflows < subflows_max; |
102 | if (ret && ++pm->subflows == subflows_max) | |
f58f065a GT |
103 | WRITE_ONCE(pm->accept_subflow, false); |
104 | } | |
926bdeab PK |
105 | spin_unlock_bh(&pm->lock); |
106 | ||
107 | return ret; | |
108 | } | |
109 | ||
110 | /* return true if the new status bit is currently cleared, that is, this event | |
111 | * can be server, eventually by an already scheduled work | |
112 | */ | |
113 | static bool mptcp_pm_schedule_work(struct mptcp_sock *msk, | |
114 | enum mptcp_pm_status new_status) | |
115 | { | |
116 | pr_debug("msk=%p status=%x new=%lx", msk, msk->pm.status, | |
117 | BIT(new_status)); | |
118 | if (msk->pm.status & BIT(new_status)) | |
119 | return false; | |
120 | ||
121 | msk->pm.status |= BIT(new_status); | |
ba8f48f7 | 122 | mptcp_schedule_work((struct sock *)msk); |
926bdeab | 123 | return true; |
1b1c7a0e PK |
124 | } |
125 | ||
6c714f1b | 126 | void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp) |
1b1c7a0e | 127 | { |
926bdeab | 128 | struct mptcp_pm_data *pm = &msk->pm; |
b911c97c | 129 | bool announce = false; |
926bdeab | 130 | |
1b1c7a0e | 131 | pr_debug("msk=%p", msk); |
926bdeab | 132 | |
926bdeab PK |
133 | spin_lock_bh(&pm->lock); |
134 | ||
5b950ff4 PA |
135 | /* mptcp_pm_fully_established() can be invoked by multiple |
136 | * racing paths - accept() and check_fully_established() | |
137 | * be sure to serve this event only once. | |
138 | */ | |
139 | if (READ_ONCE(pm->work_pending) && | |
140 | !(msk->pm.status & BIT(MPTCP_PM_ALREADY_ESTABLISHED))) | |
926bdeab PK |
141 | mptcp_pm_schedule_work(msk, MPTCP_PM_ESTABLISHED); |
142 | ||
b911c97c FW |
143 | if ((msk->pm.status & BIT(MPTCP_PM_ALREADY_ESTABLISHED)) == 0) |
144 | announce = true; | |
145 | ||
146 | msk->pm.status |= BIT(MPTCP_PM_ALREADY_ESTABLISHED); | |
926bdeab | 147 | spin_unlock_bh(&pm->lock); |
b911c97c FW |
148 | |
149 | if (announce) | |
150 | mptcp_event(MPTCP_EVENT_ESTABLISHED, msk, ssk, gfp); | |
1b1c7a0e PK |
151 | } |
152 | ||
153 | void mptcp_pm_connection_closed(struct mptcp_sock *msk) | |
154 | { | |
155 | pr_debug("msk=%p", msk); | |
156 | } | |
157 | ||
62535200 | 158 | void mptcp_pm_subflow_established(struct mptcp_sock *msk) |
1b1c7a0e | 159 | { |
926bdeab PK |
160 | struct mptcp_pm_data *pm = &msk->pm; |
161 | ||
1b1c7a0e | 162 | pr_debug("msk=%p", msk); |
926bdeab PK |
163 | |
164 | if (!READ_ONCE(pm->work_pending)) | |
165 | return; | |
166 | ||
167 | spin_lock_bh(&pm->lock); | |
168 | ||
169 | if (READ_ONCE(pm->work_pending)) | |
170 | mptcp_pm_schedule_work(msk, MPTCP_PM_SUBFLOW_ESTABLISHED); | |
171 | ||
172 | spin_unlock_bh(&pm->lock); | |
1b1c7a0e PK |
173 | } |
174 | ||
a88c9e49 PA |
175 | void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk, |
176 | const struct mptcp_subflow_context *subflow) | |
1b1c7a0e | 177 | { |
a88c9e49 PA |
178 | struct mptcp_pm_data *pm = &msk->pm; |
179 | bool update_subflows; | |
180 | ||
181 | update_subflows = (ssk->sk_state == TCP_CLOSE) && | |
182 | (subflow->request_join || subflow->mp_join); | |
183 | if (!READ_ONCE(pm->work_pending) && !update_subflows) | |
184 | return; | |
185 | ||
186 | spin_lock_bh(&pm->lock); | |
187 | if (update_subflows) | |
188 | pm->subflows--; | |
189 | ||
190 | /* Even if this subflow is not really established, tell the PM to try | |
191 | * to pick the next ones, if possible. | |
192 | */ | |
193 | if (mptcp_pm_nl_check_work_pending(msk)) | |
194 | mptcp_pm_schedule_work(msk, MPTCP_PM_SUBFLOW_ESTABLISHED); | |
195 | ||
196 | spin_unlock_bh(&pm->lock); | |
1b1c7a0e PK |
197 | } |
198 | ||
199 | void mptcp_pm_add_addr_received(struct mptcp_sock *msk, | |
200 | const struct mptcp_addr_info *addr) | |
201 | { | |
926bdeab PK |
202 | struct mptcp_pm_data *pm = &msk->pm; |
203 | ||
204 | pr_debug("msk=%p remote_id=%d accept=%d", msk, addr->id, | |
205 | READ_ONCE(pm->accept_addr)); | |
206 | ||
b911c97c FW |
207 | mptcp_event_addr_announced(msk, addr); |
208 | ||
926bdeab PK |
209 | spin_lock_bh(&pm->lock); |
210 | ||
84dfe367 | 211 | if (!READ_ONCE(pm->accept_addr)) { |
f7efc777 | 212 | mptcp_pm_announce_addr(msk, addr, true); |
84dfe367 GT |
213 | mptcp_pm_add_addr_send_ack(msk); |
214 | } else if (mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_RECEIVED)) { | |
926bdeab | 215 | pm->remote = *addr; |
f73c1194 PA |
216 | } else { |
217 | __MPTCP_INC_STATS(sock_net((struct sock *)msk), MPTCP_MIB_ADDADDRDROP); | |
84dfe367 | 218 | } |
926bdeab PK |
219 | |
220 | spin_unlock_bh(&pm->lock); | |
84dfe367 GT |
221 | } |
222 | ||
557963c3 | 223 | void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk, |
90d93088 | 224 | const struct mptcp_addr_info *addr) |
557963c3 GT |
225 | { |
226 | struct mptcp_pm_data *pm = &msk->pm; | |
227 | ||
228 | pr_debug("msk=%p", msk); | |
229 | ||
230 | spin_lock_bh(&pm->lock); | |
231 | ||
232 | if (mptcp_lookup_anno_list_by_saddr(msk, addr) && READ_ONCE(pm->work_pending)) | |
233 | mptcp_pm_schedule_work(msk, MPTCP_PM_SUBFLOW_ESTABLISHED); | |
234 | ||
235 | spin_unlock_bh(&pm->lock); | |
236 | } | |
237 | ||
84dfe367 GT |
238 | void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk) |
239 | { | |
b5a7acd3 | 240 | if (!mptcp_pm_should_add_signal(msk)) |
84dfe367 GT |
241 | return; |
242 | ||
243 | mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_SEND_ACK); | |
1b1c7a0e PK |
244 | } |
245 | ||
5c4a824d GT |
246 | void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, |
247 | const struct mptcp_rm_list *rm_list) | |
d0876b22 GT |
248 | { |
249 | struct mptcp_pm_data *pm = &msk->pm; | |
5c4a824d | 250 | u8 i; |
d0876b22 | 251 | |
5c4a824d | 252 | pr_debug("msk=%p remote_ids_nr=%d", msk, rm_list->nr); |
d0876b22 | 253 | |
5c4a824d GT |
254 | for (i = 0; i < rm_list->nr; i++) |
255 | mptcp_event_addr_removed(msk, rm_list->ids[i]); | |
b911c97c | 256 | |
d0876b22 | 257 | spin_lock_bh(&pm->lock); |
f73c1194 PA |
258 | if (mptcp_pm_schedule_work(msk, MPTCP_PM_RM_ADDR_RECEIVED)) |
259 | pm->rm_list_rx = *rm_list; | |
260 | else | |
261 | __MPTCP_INC_STATS(sock_net((struct sock *)msk), MPTCP_MIB_RMADDRDROP); | |
d0876b22 GT |
262 | spin_unlock_bh(&pm->lock); |
263 | } | |
264 | ||
43f5b111 | 265 | void mptcp_pm_mp_prio_received(struct sock *ssk, u8 bkup) |
40453a5c | 266 | { |
43f5b111 PA |
267 | struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); |
268 | struct sock *sk = subflow->conn; | |
269 | struct mptcp_sock *msk; | |
40453a5c GT |
270 | |
271 | pr_debug("subflow->backup=%d, bkup=%d\n", subflow->backup, bkup); | |
43f5b111 PA |
272 | msk = mptcp_sk(sk); |
273 | if (subflow->backup != bkup) { | |
274 | subflow->backup = bkup; | |
275 | mptcp_data_lock(sk); | |
276 | if (!sock_owned_by_user(sk)) | |
277 | msk->last_snd = NULL; | |
278 | else | |
279 | __set_bit(MPTCP_RESET_SCHEDULER, &msk->cb_flags); | |
280 | mptcp_data_unlock(sk); | |
281 | } | |
b911c97c | 282 | |
43f5b111 | 283 | mptcp_event(MPTCP_EVENT_SUB_PRIORITY, msk, ssk, GFP_ATOMIC); |
40453a5c GT |
284 | } |
285 | ||
5580d41b GT |
286 | void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq) |
287 | { | |
288 | pr_debug("fail_seq=%llu", fail_seq); | |
289 | } | |
290 | ||
1b1c7a0e PK |
291 | /* path manager helpers */ |
292 | ||
90d93088 | 293 | bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const struct sk_buff *skb, |
1f5e9e2f | 294 | unsigned int opt_size, unsigned int remaining, |
f462a446 | 295 | struct mptcp_addr_info *addr, bool *echo, |
af7939f3 | 296 | bool *drop_other_suboptions) |
1b1c7a0e | 297 | { |
926bdeab | 298 | int ret = false; |
119c0220 | 299 | u8 add_addr; |
f462a446 | 300 | u8 family; |
af7939f3 | 301 | bool port; |
926bdeab PK |
302 | |
303 | spin_lock_bh(&msk->pm.lock); | |
304 | ||
305 | /* double check after the lock is acquired */ | |
f643b803 | 306 | if (!mptcp_pm_should_add_signal(msk)) |
926bdeab PK |
307 | goto out_unlock; |
308 | ||
1f5e9e2f YL |
309 | /* always drop every other options for pure ack ADD_ADDR; this is a |
310 | * plain dup-ack from TCP perspective. The other MPTCP-relevant info, | |
311 | * if any, will be carried by the 'original' TCP ack | |
312 | */ | |
313 | if (skb && skb_is_tcp_pure_ack(skb)) { | |
314 | remaining += opt_size; | |
315 | *drop_other_suboptions = true; | |
316 | } | |
317 | ||
d91d322a | 318 | *echo = mptcp_pm_should_add_signal_echo(msk); |
af7939f3 | 319 | port = !!(*echo ? msk->pm.remote.port : msk->pm.local.port); |
456afe01 | 320 | |
f462a446 | 321 | family = *echo ? msk->pm.remote.family : msk->pm.local.family; |
af7939f3 | 322 | if (remaining < mptcp_add_addr_len(family, *echo, port)) |
926bdeab PK |
323 | goto out_unlock; |
324 | ||
f462a446 YL |
325 | if (*echo) { |
326 | *addr = msk->pm.remote; | |
119c0220 | 327 | add_addr = msk->pm.addr_signal & ~BIT(MPTCP_ADD_ADDR_ECHO); |
f462a446 YL |
328 | } else { |
329 | *addr = msk->pm.local; | |
119c0220 | 330 | add_addr = msk->pm.addr_signal & ~BIT(MPTCP_ADD_ADDR_SIGNAL); |
f462a446 | 331 | } |
119c0220 | 332 | WRITE_ONCE(msk->pm.addr_signal, add_addr); |
926bdeab PK |
333 | ret = true; |
334 | ||
335 | out_unlock: | |
336 | spin_unlock_bh(&msk->pm.lock); | |
337 | return ret; | |
1b1c7a0e PK |
338 | } |
339 | ||
5cb104ae | 340 | bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, |
6445e17a | 341 | struct mptcp_rm_list *rm_list) |
5cb104ae | 342 | { |
cbde2787 | 343 | int ret = false, len; |
119c0220 | 344 | u8 rm_addr; |
5cb104ae GT |
345 | |
346 | spin_lock_bh(&msk->pm.lock); | |
347 | ||
348 | /* double check after the lock is acquired */ | |
349 | if (!mptcp_pm_should_rm_signal(msk)) | |
350 | goto out_unlock; | |
351 | ||
119c0220 | 352 | rm_addr = msk->pm.addr_signal & ~BIT(MPTCP_RM_ADDR_SIGNAL); |
cbde2787 GT |
353 | len = mptcp_rm_addr_len(&msk->pm.rm_list_tx); |
354 | if (len < 0) { | |
119c0220 | 355 | WRITE_ONCE(msk->pm.addr_signal, rm_addr); |
cbde2787 GT |
356 | goto out_unlock; |
357 | } | |
358 | if (remaining < len) | |
5cb104ae GT |
359 | goto out_unlock; |
360 | ||
cbde2787 | 361 | *rm_list = msk->pm.rm_list_tx; |
119c0220 | 362 | WRITE_ONCE(msk->pm.addr_signal, rm_addr); |
5cb104ae GT |
363 | ret = true; |
364 | ||
365 | out_unlock: | |
366 | spin_unlock_bh(&msk->pm.lock); | |
367 | return ret; | |
368 | } | |
369 | ||
1b1c7a0e PK |
370 | int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) |
371 | { | |
01cacb00 | 372 | return mptcp_pm_nl_get_local_id(msk, skc); |
1b1c7a0e PK |
373 | } |
374 | ||
71b7dec2 PA |
375 | void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) |
376 | { | |
377 | struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); | |
378 | u32 rcv_tstamp = READ_ONCE(tcp_sk(ssk)->rcv_tstamp); | |
379 | ||
380 | /* keep track of rtx periods with no progress */ | |
381 | if (!subflow->stale_count) { | |
382 | subflow->stale_rcv_tstamp = rcv_tstamp; | |
383 | subflow->stale_count++; | |
384 | } else if (subflow->stale_rcv_tstamp == rcv_tstamp) { | |
385 | if (subflow->stale_count < U8_MAX) | |
386 | subflow->stale_count++; | |
ff5a0b42 | 387 | mptcp_pm_nl_subflow_chk_stale(msk, ssk); |
71b7dec2 PA |
388 | } else { |
389 | subflow->stale_count = 0; | |
ff5a0b42 | 390 | mptcp_subflow_set_active(subflow); |
71b7dec2 PA |
391 | } |
392 | } | |
393 | ||
b29fcfb5 | 394 | void mptcp_pm_data_reset(struct mptcp_sock *msk) |
1b1c7a0e PK |
395 | { |
396 | msk->pm.add_addr_signaled = 0; | |
397 | msk->pm.add_addr_accepted = 0; | |
398 | msk->pm.local_addr_used = 0; | |
399 | msk->pm.subflows = 0; | |
cbde2787 | 400 | msk->pm.rm_list_tx.nr = 0; |
b5c55f33 | 401 | msk->pm.rm_list_rx.nr = 0; |
1b1c7a0e | 402 | WRITE_ONCE(msk->pm.work_pending, false); |
13ad9f01 | 403 | WRITE_ONCE(msk->pm.addr_signal, 0); |
1b1c7a0e PK |
404 | WRITE_ONCE(msk->pm.accept_addr, false); |
405 | WRITE_ONCE(msk->pm.accept_subflow, false); | |
df377be3 | 406 | WRITE_ONCE(msk->pm.remote_deny_join_id0, false); |
1b1c7a0e | 407 | msk->pm.status = 0; |
86e39e04 | 408 | bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); |
1b1c7a0e | 409 | |
b29fcfb5 PA |
410 | mptcp_pm_nl_data_init(msk); |
411 | } | |
412 | ||
413 | void mptcp_pm_data_init(struct mptcp_sock *msk) | |
414 | { | |
1b1c7a0e | 415 | spin_lock_init(&msk->pm.lock); |
b6c08380 | 416 | INIT_LIST_HEAD(&msk->pm.anno_list); |
b29fcfb5 | 417 | mptcp_pm_data_reset(msk); |
1b1c7a0e PK |
418 | } |
419 | ||
d39dceca | 420 | void __init mptcp_pm_init(void) |
1b1c7a0e | 421 | { |
01cacb00 | 422 | mptcp_pm_nl_init(); |
1b1c7a0e | 423 | } |