Commit | Line | Data |
---|---|---|
1b1c7a0e PK |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* Multipath TCP | |
3 | * | |
4 | * Copyright (c) 2019, Intel Corporation. | |
5 | */ | |
c85adced GT |
6 | #define pr_fmt(fmt) "MPTCP: " fmt |
7 | ||
1b1c7a0e PK |
8 | #include <linux/kernel.h> |
9 | #include <net/tcp.h> | |
10 | #include <net/mptcp.h> | |
11 | #include "protocol.h" | |
12 | ||
fc1b4e3b PA |
13 | #include "mib.h" |
14 | ||
1b1c7a0e PK |
15 | /* path manager command handlers */ |
16 | ||
17 | int mptcp_pm_announce_addr(struct mptcp_sock *msk, | |
6a6c05a8 | 18 | const struct mptcp_addr_info *addr, |
f7efc777 | 19 | bool echo) |
1b1c7a0e | 20 | { |
13ad9f01 | 21 | u8 add_addr = READ_ONCE(msk->pm.addr_signal); |
d91d322a | 22 | |
18fc1a92 | 23 | pr_debug("msk=%p, local_id=%d, echo=%d", msk, addr->id, echo); |
926bdeab | 24 | |
3abc05d9 FW |
25 | lockdep_assert_held(&msk->pm.lock); |
26 | ||
18fc1a92 YL |
27 | if (add_addr & |
28 | (echo ? BIT(MPTCP_ADD_ADDR_ECHO) : BIT(MPTCP_ADD_ADDR_SIGNAL))) { | |
29 | pr_warn("addr_signal error, add_addr=%d, echo=%d", add_addr, echo); | |
42842a42 GT |
30 | return -EINVAL; |
31 | } | |
32 | ||
18fc1a92 YL |
33 | if (echo) { |
34 | msk->pm.remote = *addr; | |
d91d322a | 35 | add_addr |= BIT(MPTCP_ADD_ADDR_ECHO); |
18fc1a92 YL |
36 | } else { |
37 | msk->pm.local = *addr; | |
38 | add_addr |= BIT(MPTCP_ADD_ADDR_SIGNAL); | |
39 | } | |
13ad9f01 | 40 | WRITE_ONCE(msk->pm.addr_signal, add_addr); |
926bdeab | 41 | return 0; |
1b1c7a0e PK |
42 | } |
43 | ||
cbde2787 | 44 | int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list) |
1b1c7a0e | 45 | { |
13ad9f01 | 46 | u8 rm_addr = READ_ONCE(msk->pm.addr_signal); |
42842a42 | 47 | |
cbde2787 | 48 | pr_debug("msk=%p, rm_list_nr=%d", msk, rm_list->nr); |
b6c08380 | 49 | |
42842a42 GT |
50 | if (rm_addr) { |
51 | pr_warn("addr_signal error, rm_addr=%d", rm_addr); | |
52 | return -EINVAL; | |
53 | } | |
54 | ||
cbde2787 | 55 | msk->pm.rm_list_tx = *rm_list; |
42842a42 | 56 | rm_addr |= BIT(MPTCP_RM_ADDR_SIGNAL); |
13ad9f01 | 57 | WRITE_ONCE(msk->pm.addr_signal, rm_addr); |
b46a0238 | 58 | mptcp_pm_nl_addr_send_ack(msk); |
b6c08380 | 59 | return 0; |
1b1c7a0e PK |
60 | } |
61 | ||
ddd14bb8 | 62 | int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list) |
1b1c7a0e | 63 | { |
ddd14bb8 | 64 | pr_debug("msk=%p, rm_list_nr=%d", msk, rm_list->nr); |
0ee4261a GT |
65 | |
66 | spin_lock_bh(&msk->pm.lock); | |
ddd14bb8 | 67 | mptcp_pm_nl_rm_subflow_received(msk, rm_list); |
0ee4261a GT |
68 | spin_unlock_bh(&msk->pm.lock); |
69 | return 0; | |
1b1c7a0e PK |
70 | } |
71 | ||
72 | /* path manager event handlers */ | |
73 | ||
6c714f1b | 74 | void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side) |
1b1c7a0e PK |
75 | { |
76 | struct mptcp_pm_data *pm = &msk->pm; | |
77 | ||
78 | pr_debug("msk=%p, token=%u side=%d", msk, msk->token, server_side); | |
79 | ||
80 | WRITE_ONCE(pm->server_side, server_side); | |
b911c97c | 81 | mptcp_event(MPTCP_EVENT_CREATED, msk, ssk, GFP_ATOMIC); |
1b1c7a0e PK |
82 | } |
83 | ||
84 | bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk) | |
85 | { | |
926bdeab | 86 | struct mptcp_pm_data *pm = &msk->pm; |
a914e586 | 87 | unsigned int subflows_max; |
f58f065a | 88 | int ret = 0; |
926bdeab | 89 | |
a914e586 GT |
90 | subflows_max = mptcp_pm_get_subflows_max(msk); |
91 | ||
926bdeab | 92 | pr_debug("msk=%p subflows=%d max=%d allow=%d", msk, pm->subflows, |
a914e586 | 93 | subflows_max, READ_ONCE(pm->accept_subflow)); |
926bdeab PK |
94 | |
95 | /* try to avoid acquiring the lock below */ | |
96 | if (!READ_ONCE(pm->accept_subflow)) | |
97 | return false; | |
98 | ||
99 | spin_lock_bh(&pm->lock); | |
f58f065a | 100 | if (READ_ONCE(pm->accept_subflow)) { |
a914e586 GT |
101 | ret = pm->subflows < subflows_max; |
102 | if (ret && ++pm->subflows == subflows_max) | |
f58f065a GT |
103 | WRITE_ONCE(pm->accept_subflow, false); |
104 | } | |
926bdeab PK |
105 | spin_unlock_bh(&pm->lock); |
106 | ||
107 | return ret; | |
108 | } | |
109 | ||
110 | /* return true if the new status bit is currently cleared, that is, this event | |
111 | * can be server, eventually by an already scheduled work | |
112 | */ | |
113 | static bool mptcp_pm_schedule_work(struct mptcp_sock *msk, | |
114 | enum mptcp_pm_status new_status) | |
115 | { | |
116 | pr_debug("msk=%p status=%x new=%lx", msk, msk->pm.status, | |
117 | BIT(new_status)); | |
118 | if (msk->pm.status & BIT(new_status)) | |
119 | return false; | |
120 | ||
121 | msk->pm.status |= BIT(new_status); | |
ba8f48f7 | 122 | mptcp_schedule_work((struct sock *)msk); |
926bdeab | 123 | return true; |
1b1c7a0e PK |
124 | } |
125 | ||
6c714f1b | 126 | void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp) |
1b1c7a0e | 127 | { |
926bdeab | 128 | struct mptcp_pm_data *pm = &msk->pm; |
b911c97c | 129 | bool announce = false; |
926bdeab | 130 | |
1b1c7a0e | 131 | pr_debug("msk=%p", msk); |
926bdeab | 132 | |
926bdeab PK |
133 | spin_lock_bh(&pm->lock); |
134 | ||
5b950ff4 PA |
135 | /* mptcp_pm_fully_established() can be invoked by multiple |
136 | * racing paths - accept() and check_fully_established() | |
137 | * be sure to serve this event only once. | |
138 | */ | |
139 | if (READ_ONCE(pm->work_pending) && | |
140 | !(msk->pm.status & BIT(MPTCP_PM_ALREADY_ESTABLISHED))) | |
926bdeab PK |
141 | mptcp_pm_schedule_work(msk, MPTCP_PM_ESTABLISHED); |
142 | ||
b911c97c FW |
143 | if ((msk->pm.status & BIT(MPTCP_PM_ALREADY_ESTABLISHED)) == 0) |
144 | announce = true; | |
145 | ||
146 | msk->pm.status |= BIT(MPTCP_PM_ALREADY_ESTABLISHED); | |
926bdeab | 147 | spin_unlock_bh(&pm->lock); |
b911c97c FW |
148 | |
149 | if (announce) | |
150 | mptcp_event(MPTCP_EVENT_ESTABLISHED, msk, ssk, gfp); | |
1b1c7a0e PK |
151 | } |
152 | ||
153 | void mptcp_pm_connection_closed(struct mptcp_sock *msk) | |
154 | { | |
155 | pr_debug("msk=%p", msk); | |
156 | } | |
157 | ||
62535200 | 158 | void mptcp_pm_subflow_established(struct mptcp_sock *msk) |
1b1c7a0e | 159 | { |
926bdeab PK |
160 | struct mptcp_pm_data *pm = &msk->pm; |
161 | ||
1b1c7a0e | 162 | pr_debug("msk=%p", msk); |
926bdeab PK |
163 | |
164 | if (!READ_ONCE(pm->work_pending)) | |
165 | return; | |
166 | ||
167 | spin_lock_bh(&pm->lock); | |
168 | ||
169 | if (READ_ONCE(pm->work_pending)) | |
170 | mptcp_pm_schedule_work(msk, MPTCP_PM_SUBFLOW_ESTABLISHED); | |
171 | ||
172 | spin_unlock_bh(&pm->lock); | |
1b1c7a0e PK |
173 | } |
174 | ||
a88c9e49 PA |
175 | void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk, |
176 | const struct mptcp_subflow_context *subflow) | |
1b1c7a0e | 177 | { |
a88c9e49 PA |
178 | struct mptcp_pm_data *pm = &msk->pm; |
179 | bool update_subflows; | |
180 | ||
181 | update_subflows = (ssk->sk_state == TCP_CLOSE) && | |
182 | (subflow->request_join || subflow->mp_join); | |
183 | if (!READ_ONCE(pm->work_pending) && !update_subflows) | |
184 | return; | |
185 | ||
186 | spin_lock_bh(&pm->lock); | |
187 | if (update_subflows) | |
188 | pm->subflows--; | |
189 | ||
190 | /* Even if this subflow is not really established, tell the PM to try | |
191 | * to pick the next ones, if possible. | |
192 | */ | |
193 | if (mptcp_pm_nl_check_work_pending(msk)) | |
194 | mptcp_pm_schedule_work(msk, MPTCP_PM_SUBFLOW_ESTABLISHED); | |
195 | ||
196 | spin_unlock_bh(&pm->lock); | |
1b1c7a0e PK |
197 | } |
198 | ||
199 | void mptcp_pm_add_addr_received(struct mptcp_sock *msk, | |
200 | const struct mptcp_addr_info *addr) | |
201 | { | |
926bdeab PK |
202 | struct mptcp_pm_data *pm = &msk->pm; |
203 | ||
204 | pr_debug("msk=%p remote_id=%d accept=%d", msk, addr->id, | |
205 | READ_ONCE(pm->accept_addr)); | |
206 | ||
b911c97c FW |
207 | mptcp_event_addr_announced(msk, addr); |
208 | ||
926bdeab PK |
209 | spin_lock_bh(&pm->lock); |
210 | ||
84dfe367 | 211 | if (!READ_ONCE(pm->accept_addr)) { |
f7efc777 | 212 | mptcp_pm_announce_addr(msk, addr, true); |
84dfe367 GT |
213 | mptcp_pm_add_addr_send_ack(msk); |
214 | } else if (mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_RECEIVED)) { | |
926bdeab | 215 | pm->remote = *addr; |
84dfe367 | 216 | } |
926bdeab PK |
217 | |
218 | spin_unlock_bh(&pm->lock); | |
84dfe367 GT |
219 | } |
220 | ||
557963c3 GT |
221 | void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk, |
222 | struct mptcp_addr_info *addr) | |
223 | { | |
224 | struct mptcp_pm_data *pm = &msk->pm; | |
225 | ||
226 | pr_debug("msk=%p", msk); | |
227 | ||
228 | spin_lock_bh(&pm->lock); | |
229 | ||
230 | if (mptcp_lookup_anno_list_by_saddr(msk, addr) && READ_ONCE(pm->work_pending)) | |
231 | mptcp_pm_schedule_work(msk, MPTCP_PM_SUBFLOW_ESTABLISHED); | |
232 | ||
233 | spin_unlock_bh(&pm->lock); | |
234 | } | |
235 | ||
84dfe367 GT |
236 | void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk) |
237 | { | |
b5a7acd3 | 238 | if (!mptcp_pm_should_add_signal(msk)) |
84dfe367 GT |
239 | return; |
240 | ||
241 | mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_SEND_ACK); | |
1b1c7a0e PK |
242 | } |
243 | ||
5c4a824d GT |
244 | void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, |
245 | const struct mptcp_rm_list *rm_list) | |
d0876b22 GT |
246 | { |
247 | struct mptcp_pm_data *pm = &msk->pm; | |
5c4a824d | 248 | u8 i; |
d0876b22 | 249 | |
5c4a824d | 250 | pr_debug("msk=%p remote_ids_nr=%d", msk, rm_list->nr); |
d0876b22 | 251 | |
5c4a824d GT |
252 | for (i = 0; i < rm_list->nr; i++) |
253 | mptcp_event_addr_removed(msk, rm_list->ids[i]); | |
b911c97c | 254 | |
d0876b22 GT |
255 | spin_lock_bh(&pm->lock); |
256 | mptcp_pm_schedule_work(msk, MPTCP_PM_RM_ADDR_RECEIVED); | |
b5c55f33 | 257 | pm->rm_list_rx = *rm_list; |
d0876b22 GT |
258 | spin_unlock_bh(&pm->lock); |
259 | } | |
260 | ||
40453a5c GT |
261 | void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup) |
262 | { | |
263 | struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); | |
264 | ||
265 | pr_debug("subflow->backup=%d, bkup=%d\n", subflow->backup, bkup); | |
266 | subflow->backup = bkup; | |
b911c97c FW |
267 | |
268 | mptcp_event(MPTCP_EVENT_SUB_PRIORITY, mptcp_sk(subflow->conn), sk, GFP_ATOMIC); | |
40453a5c GT |
269 | } |
270 | ||
5580d41b GT |
271 | void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq) |
272 | { | |
273 | pr_debug("fail_seq=%llu", fail_seq); | |
274 | } | |
275 | ||
1b1c7a0e PK |
276 | /* path manager helpers */ |
277 | ||
1f5e9e2f YL |
278 | bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, struct sk_buff *skb, |
279 | unsigned int opt_size, unsigned int remaining, | |
f462a446 | 280 | struct mptcp_addr_info *addr, bool *echo, |
af7939f3 | 281 | bool *drop_other_suboptions) |
1b1c7a0e | 282 | { |
926bdeab | 283 | int ret = false; |
119c0220 | 284 | u8 add_addr; |
f462a446 | 285 | u8 family; |
af7939f3 | 286 | bool port; |
926bdeab PK |
287 | |
288 | spin_lock_bh(&msk->pm.lock); | |
289 | ||
290 | /* double check after the lock is acquired */ | |
f643b803 | 291 | if (!mptcp_pm_should_add_signal(msk)) |
926bdeab PK |
292 | goto out_unlock; |
293 | ||
1f5e9e2f YL |
294 | /* always drop every other options for pure ack ADD_ADDR; this is a |
295 | * plain dup-ack from TCP perspective. The other MPTCP-relevant info, | |
296 | * if any, will be carried by the 'original' TCP ack | |
297 | */ | |
298 | if (skb && skb_is_tcp_pure_ack(skb)) { | |
299 | remaining += opt_size; | |
300 | *drop_other_suboptions = true; | |
301 | } | |
302 | ||
d91d322a | 303 | *echo = mptcp_pm_should_add_signal_echo(msk); |
af7939f3 | 304 | port = !!(*echo ? msk->pm.remote.port : msk->pm.local.port); |
456afe01 | 305 | |
f462a446 | 306 | family = *echo ? msk->pm.remote.family : msk->pm.local.family; |
af7939f3 | 307 | if (remaining < mptcp_add_addr_len(family, *echo, port)) |
926bdeab PK |
308 | goto out_unlock; |
309 | ||
f462a446 YL |
310 | if (*echo) { |
311 | *addr = msk->pm.remote; | |
119c0220 | 312 | add_addr = msk->pm.addr_signal & ~BIT(MPTCP_ADD_ADDR_ECHO); |
f462a446 YL |
313 | } else { |
314 | *addr = msk->pm.local; | |
119c0220 | 315 | add_addr = msk->pm.addr_signal & ~BIT(MPTCP_ADD_ADDR_SIGNAL); |
f462a446 | 316 | } |
119c0220 | 317 | WRITE_ONCE(msk->pm.addr_signal, add_addr); |
926bdeab PK |
318 | ret = true; |
319 | ||
320 | out_unlock: | |
321 | spin_unlock_bh(&msk->pm.lock); | |
322 | return ret; | |
1b1c7a0e PK |
323 | } |
324 | ||
5cb104ae | 325 | bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, |
6445e17a | 326 | struct mptcp_rm_list *rm_list) |
5cb104ae | 327 | { |
cbde2787 | 328 | int ret = false, len; |
119c0220 | 329 | u8 rm_addr; |
5cb104ae GT |
330 | |
331 | spin_lock_bh(&msk->pm.lock); | |
332 | ||
333 | /* double check after the lock is acquired */ | |
334 | if (!mptcp_pm_should_rm_signal(msk)) | |
335 | goto out_unlock; | |
336 | ||
119c0220 | 337 | rm_addr = msk->pm.addr_signal & ~BIT(MPTCP_RM_ADDR_SIGNAL); |
cbde2787 GT |
338 | len = mptcp_rm_addr_len(&msk->pm.rm_list_tx); |
339 | if (len < 0) { | |
119c0220 | 340 | WRITE_ONCE(msk->pm.addr_signal, rm_addr); |
cbde2787 GT |
341 | goto out_unlock; |
342 | } | |
343 | if (remaining < len) | |
5cb104ae GT |
344 | goto out_unlock; |
345 | ||
cbde2787 | 346 | *rm_list = msk->pm.rm_list_tx; |
119c0220 | 347 | WRITE_ONCE(msk->pm.addr_signal, rm_addr); |
5cb104ae GT |
348 | ret = true; |
349 | ||
350 | out_unlock: | |
351 | spin_unlock_bh(&msk->pm.lock); | |
352 | return ret; | |
353 | } | |
354 | ||
1b1c7a0e PK |
355 | int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) |
356 | { | |
01cacb00 | 357 | return mptcp_pm_nl_get_local_id(msk, skc); |
1b1c7a0e PK |
358 | } |
359 | ||
71b7dec2 PA |
360 | void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) |
361 | { | |
362 | struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); | |
363 | u32 rcv_tstamp = READ_ONCE(tcp_sk(ssk)->rcv_tstamp); | |
364 | ||
365 | /* keep track of rtx periods with no progress */ | |
366 | if (!subflow->stale_count) { | |
367 | subflow->stale_rcv_tstamp = rcv_tstamp; | |
368 | subflow->stale_count++; | |
369 | } else if (subflow->stale_rcv_tstamp == rcv_tstamp) { | |
370 | if (subflow->stale_count < U8_MAX) | |
371 | subflow->stale_count++; | |
ff5a0b42 | 372 | mptcp_pm_nl_subflow_chk_stale(msk, ssk); |
71b7dec2 PA |
373 | } else { |
374 | subflow->stale_count = 0; | |
ff5a0b42 | 375 | mptcp_subflow_set_active(subflow); |
71b7dec2 PA |
376 | } |
377 | } | |
378 | ||
b29fcfb5 | 379 | void mptcp_pm_data_reset(struct mptcp_sock *msk) |
1b1c7a0e PK |
380 | { |
381 | msk->pm.add_addr_signaled = 0; | |
382 | msk->pm.add_addr_accepted = 0; | |
383 | msk->pm.local_addr_used = 0; | |
384 | msk->pm.subflows = 0; | |
cbde2787 | 385 | msk->pm.rm_list_tx.nr = 0; |
b5c55f33 | 386 | msk->pm.rm_list_rx.nr = 0; |
1b1c7a0e | 387 | WRITE_ONCE(msk->pm.work_pending, false); |
13ad9f01 | 388 | WRITE_ONCE(msk->pm.addr_signal, 0); |
1b1c7a0e PK |
389 | WRITE_ONCE(msk->pm.accept_addr, false); |
390 | WRITE_ONCE(msk->pm.accept_subflow, false); | |
df377be3 | 391 | WRITE_ONCE(msk->pm.remote_deny_join_id0, false); |
1b1c7a0e | 392 | msk->pm.status = 0; |
86e39e04 | 393 | bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); |
1b1c7a0e | 394 | |
b29fcfb5 PA |
395 | mptcp_pm_nl_data_init(msk); |
396 | } | |
397 | ||
398 | void mptcp_pm_data_init(struct mptcp_sock *msk) | |
399 | { | |
1b1c7a0e | 400 | spin_lock_init(&msk->pm.lock); |
b6c08380 | 401 | INIT_LIST_HEAD(&msk->pm.anno_list); |
b29fcfb5 | 402 | mptcp_pm_data_reset(msk); |
1b1c7a0e PK |
403 | } |
404 | ||
d39dceca | 405 | void __init mptcp_pm_init(void) |
1b1c7a0e | 406 | { |
01cacb00 | 407 | mptcp_pm_nl_init(); |
1b1c7a0e | 408 | } |