| 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
| 2 | /* Multipath TCP |
| 3 | * |
| 4 | * Copyright (c) 2017 - 2019, Intel Corporation. |
| 5 | */ |
| 6 | |
| 7 | #ifndef __MPTCP_PROTOCOL_H |
| 8 | #define __MPTCP_PROTOCOL_H |
| 9 | |
| 10 | #include <linux/random.h> |
| 11 | #include <net/tcp.h> |
| 12 | #include <net/inet_connection_sock.h> |
| 13 | #include <uapi/linux/mptcp.h> |
| 14 | #include <net/genetlink.h> |
| 15 | #include <net/rstreason.h> |
| 16 | |
| 17 | #define MPTCP_SUPPORTED_VERSION 1 |
| 18 | |
| 19 | /* MPTCP option bits */ |
| 20 | #define OPTION_MPTCP_MPC_SYN BIT(0) |
| 21 | #define OPTION_MPTCP_MPC_SYNACK BIT(1) |
| 22 | #define OPTION_MPTCP_MPC_ACK BIT(2) |
| 23 | #define OPTION_MPTCP_MPJ_SYN BIT(3) |
| 24 | #define OPTION_MPTCP_MPJ_SYNACK BIT(4) |
| 25 | #define OPTION_MPTCP_MPJ_ACK BIT(5) |
| 26 | #define OPTION_MPTCP_ADD_ADDR BIT(6) |
| 27 | #define OPTION_MPTCP_RM_ADDR BIT(7) |
| 28 | #define OPTION_MPTCP_FASTCLOSE BIT(8) |
| 29 | #define OPTION_MPTCP_PRIO BIT(9) |
| 30 | #define OPTION_MPTCP_RST BIT(10) |
| 31 | #define OPTION_MPTCP_DSS BIT(11) |
| 32 | #define OPTION_MPTCP_FAIL BIT(12) |
| 33 | |
| 34 | #define OPTION_MPTCP_CSUMREQD BIT(13) |
| 35 | |
| 36 | #define OPTIONS_MPTCP_MPC (OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK | \ |
| 37 | OPTION_MPTCP_MPC_ACK) |
| 38 | #define OPTIONS_MPTCP_MPJ (OPTION_MPTCP_MPJ_SYN | OPTION_MPTCP_MPJ_SYNACK | \ |
| 39 | OPTION_MPTCP_MPJ_ACK) |
| 40 | |
| 41 | /* MPTCP option subtypes */ |
| 42 | #define MPTCPOPT_MP_CAPABLE 0 |
| 43 | #define MPTCPOPT_MP_JOIN 1 |
| 44 | #define MPTCPOPT_DSS 2 |
| 45 | #define MPTCPOPT_ADD_ADDR 3 |
| 46 | #define MPTCPOPT_RM_ADDR 4 |
| 47 | #define MPTCPOPT_MP_PRIO 5 |
| 48 | #define MPTCPOPT_MP_FAIL 6 |
| 49 | #define MPTCPOPT_MP_FASTCLOSE 7 |
| 50 | #define MPTCPOPT_RST 8 |
| 51 | |
| 52 | /* MPTCP suboption lengths */ |
| 53 | #define TCPOLEN_MPTCP_MPC_SYN 4 |
| 54 | #define TCPOLEN_MPTCP_MPC_SYNACK 12 |
| 55 | #define TCPOLEN_MPTCP_MPC_ACK 20 |
| 56 | #define TCPOLEN_MPTCP_MPC_ACK_DATA 22 |
| 57 | #define TCPOLEN_MPTCP_MPJ_SYN 12 |
| 58 | #define TCPOLEN_MPTCP_MPJ_SYNACK 16 |
| 59 | #define TCPOLEN_MPTCP_MPJ_ACK 24 |
| 60 | #define TCPOLEN_MPTCP_DSS_BASE 4 |
| 61 | #define TCPOLEN_MPTCP_DSS_ACK32 4 |
| 62 | #define TCPOLEN_MPTCP_DSS_ACK64 8 |
| 63 | #define TCPOLEN_MPTCP_DSS_MAP32 10 |
| 64 | #define TCPOLEN_MPTCP_DSS_MAP64 14 |
| 65 | #define TCPOLEN_MPTCP_DSS_CHECKSUM 2 |
| 66 | #define TCPOLEN_MPTCP_ADD_ADDR 16 |
| 67 | #define TCPOLEN_MPTCP_ADD_ADDR_PORT 18 |
| 68 | #define TCPOLEN_MPTCP_ADD_ADDR_BASE 8 |
| 69 | #define TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT 10 |
| 70 | #define TCPOLEN_MPTCP_ADD_ADDR6 28 |
| 71 | #define TCPOLEN_MPTCP_ADD_ADDR6_PORT 30 |
| 72 | #define TCPOLEN_MPTCP_ADD_ADDR6_BASE 20 |
| 73 | #define TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT 22 |
| 74 | #define TCPOLEN_MPTCP_PORT_LEN 2 |
| 75 | #define TCPOLEN_MPTCP_PORT_ALIGN 2 |
| 76 | #define TCPOLEN_MPTCP_RM_ADDR_BASE 3 |
| 77 | #define TCPOLEN_MPTCP_PRIO 3 |
| 78 | #define TCPOLEN_MPTCP_PRIO_ALIGN 4 |
| 79 | #define TCPOLEN_MPTCP_FASTCLOSE 12 |
| 80 | #define TCPOLEN_MPTCP_RST 4 |
| 81 | #define TCPOLEN_MPTCP_FAIL 12 |
| 82 | |
| 83 | #define TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM (TCPOLEN_MPTCP_DSS_CHECKSUM + TCPOLEN_MPTCP_MPC_ACK_DATA) |
| 84 | |
| 85 | /* MPTCP MP_JOIN flags */ |
| 86 | #define MPTCPOPT_BACKUP BIT(0) |
| 87 | #define MPTCPOPT_THMAC_LEN 8 |
| 88 | |
| 89 | /* MPTCP MP_CAPABLE flags */ |
| 90 | #define MPTCP_VERSION_MASK (0x0F) |
| 91 | #define MPTCP_CAP_CHECKSUM_REQD BIT(7) |
| 92 | #define MPTCP_CAP_EXTENSIBILITY BIT(6) |
| 93 | #define MPTCP_CAP_DENY_JOIN_ID0 BIT(5) |
| 94 | #define MPTCP_CAP_HMAC_SHA256 BIT(0) |
| 95 | #define MPTCP_CAP_FLAG_MASK (0x1F) |
| 96 | |
| 97 | /* MPTCP DSS flags */ |
| 98 | #define MPTCP_DSS_DATA_FIN BIT(4) |
| 99 | #define MPTCP_DSS_DSN64 BIT(3) |
| 100 | #define MPTCP_DSS_HAS_MAP BIT(2) |
| 101 | #define MPTCP_DSS_ACK64 BIT(1) |
| 102 | #define MPTCP_DSS_HAS_ACK BIT(0) |
| 103 | #define MPTCP_DSS_FLAG_MASK (0x1F) |
| 104 | |
| 105 | /* MPTCP ADD_ADDR flags */ |
| 106 | #define MPTCP_ADDR_ECHO BIT(0) |
| 107 | |
| 108 | /* MPTCP MP_PRIO flags */ |
| 109 | #define MPTCP_PRIO_BKUP BIT(0) |
| 110 | |
| 111 | /* MPTCP TCPRST flags */ |
| 112 | #define MPTCP_RST_TRANSIENT BIT(0) |
| 113 | |
| 114 | /* MPTCP socket atomic flags */ |
| 115 | #define MPTCP_WORK_RTX 1 |
| 116 | #define MPTCP_FALLBACK_DONE 2 |
| 117 | #define MPTCP_WORK_CLOSE_SUBFLOW 3 |
| 118 | |
| 119 | /* MPTCP socket release cb flags */ |
| 120 | #define MPTCP_PUSH_PENDING 1 |
| 121 | #define MPTCP_CLEAN_UNA 2 |
| 122 | #define MPTCP_ERROR_REPORT 3 |
| 123 | #define MPTCP_RETRANSMIT 4 |
| 124 | #define MPTCP_FLUSH_JOIN_LIST 5 |
| 125 | #define MPTCP_SYNC_STATE 6 |
| 126 | #define MPTCP_SYNC_SNDBUF 7 |
| 127 | #define MPTCP_DEQUEUE 8 |
| 128 | |
| 129 | struct mptcp_skb_cb { |
| 130 | u64 map_seq; |
| 131 | u64 end_seq; |
| 132 | u32 offset; |
| 133 | u8 has_rxtstamp; |
| 134 | u8 cant_coalesce; |
| 135 | }; |
| 136 | |
| 137 | #define MPTCP_SKB_CB(__skb) ((struct mptcp_skb_cb *)&((__skb)->cb[0])) |
| 138 | |
| 139 | static inline bool before64(__u64 seq1, __u64 seq2) |
| 140 | { |
| 141 | return (__s64)(seq1 - seq2) < 0; |
| 142 | } |
| 143 | |
| 144 | #define after64(seq2, seq1) before64(seq1, seq2) |
| 145 | |
| 146 | struct mptcp_options_received { |
| 147 | u64 sndr_key; |
| 148 | u64 rcvr_key; |
| 149 | u64 data_ack; |
| 150 | u64 data_seq; |
| 151 | u32 subflow_seq; |
| 152 | u16 data_len; |
| 153 | __sum16 csum; |
| 154 | struct_group(status, |
| 155 | u16 suboptions; |
| 156 | u16 use_map:1, |
| 157 | dsn64:1, |
| 158 | data_fin:1, |
| 159 | use_ack:1, |
| 160 | ack64:1, |
| 161 | mpc_map:1, |
| 162 | reset_reason:4, |
| 163 | reset_transient:1, |
| 164 | echo:1, |
| 165 | backup:1, |
| 166 | deny_join_id0:1, |
| 167 | __unused:2; |
| 168 | ); |
| 169 | u8 join_id; |
| 170 | u32 token; |
| 171 | u32 nonce; |
| 172 | u64 thmac; |
| 173 | u8 hmac[MPTCPOPT_HMAC_LEN]; |
| 174 | struct mptcp_addr_info addr; |
| 175 | struct mptcp_rm_list rm_list; |
| 176 | u64 ahmac; |
| 177 | u64 fail_seq; |
| 178 | }; |
| 179 | |
| 180 | static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field) |
| 181 | { |
| 182 | return htonl((TCPOPT_MPTCP << 24) | (len << 16) | (subopt << 12) | |
| 183 | ((nib & 0xF) << 8) | field); |
| 184 | } |
| 185 | |
| 186 | enum mptcp_pm_status { |
| 187 | MPTCP_PM_ADD_ADDR_RECEIVED, |
| 188 | MPTCP_PM_ADD_ADDR_SEND_ACK, |
| 189 | MPTCP_PM_RM_ADDR_RECEIVED, |
| 190 | MPTCP_PM_ESTABLISHED, |
| 191 | MPTCP_PM_SUBFLOW_ESTABLISHED, |
| 192 | MPTCP_PM_ALREADY_ESTABLISHED, /* persistent status, set after ESTABLISHED event */ |
| 193 | MPTCP_PM_MPC_ENDPOINT_ACCOUNTED /* persistent status, set after MPC local address is |
| 194 | * accounted int id_avail_bitmap |
| 195 | */ |
| 196 | }; |
| 197 | |
| 198 | enum mptcp_pm_type { |
| 199 | MPTCP_PM_TYPE_KERNEL = 0, |
| 200 | MPTCP_PM_TYPE_USERSPACE, |
| 201 | |
| 202 | __MPTCP_PM_TYPE_NR, |
| 203 | __MPTCP_PM_TYPE_MAX = __MPTCP_PM_TYPE_NR - 1, |
| 204 | }; |
| 205 | |
| 206 | /* Status bits below MPTCP_PM_ALREADY_ESTABLISHED need pm worker actions */ |
| 207 | #define MPTCP_PM_WORK_MASK ((1 << MPTCP_PM_ALREADY_ESTABLISHED) - 1) |
| 208 | |
| 209 | enum mptcp_addr_signal_status { |
| 210 | MPTCP_ADD_ADDR_SIGNAL, |
| 211 | MPTCP_ADD_ADDR_ECHO, |
| 212 | MPTCP_RM_ADDR_SIGNAL, |
| 213 | }; |
| 214 | |
| 215 | /* max value of mptcp_addr_info.id */ |
| 216 | #define MPTCP_PM_MAX_ADDR_ID U8_MAX |
| 217 | |
| 218 | struct mptcp_pm_data { |
| 219 | struct mptcp_addr_info local; |
| 220 | struct mptcp_addr_info remote; |
| 221 | struct list_head anno_list; |
| 222 | struct list_head userspace_pm_local_addr_list; |
| 223 | |
| 224 | spinlock_t lock; /*protects the whole PM data */ |
| 225 | |
| 226 | struct_group(reset, |
| 227 | |
| 228 | u8 addr_signal; |
| 229 | bool server_side; |
| 230 | bool work_pending; |
| 231 | bool accept_addr; |
| 232 | bool accept_subflow; |
| 233 | bool remote_deny_join_id0; |
| 234 | u8 add_addr_signaled; |
| 235 | u8 add_addr_accepted; |
| 236 | u8 local_addr_used; |
| 237 | u8 pm_type; |
| 238 | u8 subflows; |
| 239 | u8 status; |
| 240 | |
| 241 | ); |
| 242 | |
| 243 | DECLARE_BITMAP(id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); |
| 244 | struct mptcp_rm_list rm_list_tx; |
| 245 | struct mptcp_rm_list rm_list_rx; |
| 246 | }; |
| 247 | |
| 248 | struct mptcp_pm_local { |
| 249 | struct mptcp_addr_info addr; |
| 250 | u8 flags; |
| 251 | int ifindex; |
| 252 | }; |
| 253 | |
| 254 | struct mptcp_pm_addr_entry { |
| 255 | struct list_head list; |
| 256 | struct mptcp_addr_info addr; |
| 257 | u8 flags; |
| 258 | int ifindex; |
| 259 | struct socket *lsk; |
| 260 | }; |
| 261 | |
| 262 | struct mptcp_data_frag { |
| 263 | struct list_head list; |
| 264 | u64 data_seq; |
| 265 | u16 data_len; |
| 266 | u16 offset; |
| 267 | u16 overhead; |
| 268 | u16 already_sent; |
| 269 | struct page *page; |
| 270 | }; |
| 271 | |
| 272 | /* MPTCP connection sock */ |
| 273 | struct mptcp_sock { |
| 274 | /* inet_connection_sock must be the first member */ |
| 275 | struct inet_connection_sock sk; |
| 276 | u64 local_key; /* protected by the first subflow socket lock |
| 277 | * lockless access read |
| 278 | */ |
| 279 | u64 remote_key; /* same as above */ |
| 280 | u64 write_seq; |
| 281 | u64 bytes_sent; |
| 282 | u64 snd_nxt; |
| 283 | u64 bytes_received; |
| 284 | u64 ack_seq; |
| 285 | atomic64_t rcv_wnd_sent; |
| 286 | u64 rcv_data_fin_seq; |
| 287 | u64 bytes_retrans; |
| 288 | u64 bytes_consumed; |
| 289 | int snd_burst; |
| 290 | int old_wspace; |
| 291 | u64 recovery_snd_nxt; /* in recovery mode accept up to this seq; |
| 292 | * recovery related fields are under data_lock |
| 293 | * protection |
| 294 | */ |
| 295 | u64 bytes_acked; |
| 296 | u64 snd_una; |
| 297 | u64 wnd_end; |
| 298 | u32 last_data_sent; |
| 299 | u32 last_data_recv; |
| 300 | u32 last_ack_recv; |
| 301 | unsigned long timer_ival; |
| 302 | u32 token; |
| 303 | unsigned long flags; |
| 304 | unsigned long cb_flags; |
| 305 | bool recovery; /* closing subflow write queue reinjected */ |
| 306 | bool can_ack; |
| 307 | bool fully_established; |
| 308 | bool rcv_data_fin; |
| 309 | bool snd_data_fin_enable; |
| 310 | bool rcv_fastclose; |
| 311 | bool use_64bit_ack; /* Set when we received a 64-bit DSN */ |
| 312 | bool csum_enabled; |
| 313 | bool allow_infinite_fallback; |
| 314 | u8 pending_state; /* A subflow asked to set this sk_state, |
| 315 | * protected by the msk data lock |
| 316 | */ |
| 317 | u8 mpc_endpoint_id; |
| 318 | u8 recvmsg_inq:1, |
| 319 | cork:1, |
| 320 | nodelay:1, |
| 321 | fastopening:1, |
| 322 | in_accept_queue:1, |
| 323 | free_first:1, |
| 324 | rcvspace_init:1; |
| 325 | u32 notsent_lowat; |
| 326 | int keepalive_cnt; |
| 327 | int keepalive_idle; |
| 328 | int keepalive_intvl; |
| 329 | struct work_struct work; |
| 330 | struct sk_buff *ooo_last_skb; |
| 331 | struct rb_root out_of_order_queue; |
| 332 | struct list_head conn_list; |
| 333 | struct list_head rtx_queue; |
| 334 | struct mptcp_data_frag *first_pending; |
| 335 | struct list_head join_list; |
| 336 | struct sock *first; /* The mptcp ops can safely dereference, using suitable |
| 337 | * ONCE annotation, the subflow outside the socket |
| 338 | * lock as such sock is freed after close(). |
| 339 | */ |
| 340 | struct mptcp_pm_data pm; |
| 341 | struct mptcp_sched_ops *sched; |
| 342 | struct { |
| 343 | u32 space; /* bytes copied in last measurement window */ |
| 344 | u32 copied; /* bytes copied in this measurement window */ |
| 345 | u64 time; /* start time of measurement window */ |
| 346 | u64 rtt_us; /* last maximum rtt of subflows */ |
| 347 | } rcvq_space; |
| 348 | u8 scaling_ratio; |
| 349 | |
| 350 | u32 subflow_id; |
| 351 | u32 setsockopt_seq; |
| 352 | char ca_name[TCP_CA_NAME_MAX]; |
| 353 | }; |
| 354 | |
| 355 | #define mptcp_data_lock(sk) spin_lock_bh(&(sk)->sk_lock.slock) |
| 356 | #define mptcp_data_unlock(sk) spin_unlock_bh(&(sk)->sk_lock.slock) |
| 357 | |
| 358 | #define mptcp_for_each_subflow(__msk, __subflow) \ |
| 359 | list_for_each_entry(__subflow, &((__msk)->conn_list), node) |
| 360 | #define mptcp_for_each_subflow_safe(__msk, __subflow, __tmp) \ |
| 361 | list_for_each_entry_safe(__subflow, __tmp, &((__msk)->conn_list), node) |
| 362 | #define mptcp_next_subflow(__msk, __subflow) \ |
| 363 | list_next_entry_circular(__subflow, &((__msk)->conn_list), node) |
| 364 | |
| 365 | extern struct genl_family mptcp_genl_family; |
| 366 | |
| 367 | static inline void msk_owned_by_me(const struct mptcp_sock *msk) |
| 368 | { |
| 369 | sock_owned_by_me((const struct sock *)msk); |
| 370 | } |
| 371 | |
| 372 | #ifdef CONFIG_DEBUG_NET |
| 373 | /* MPTCP-specific: we might (indirectly) call this helper with the wrong sk */ |
| 374 | #undef tcp_sk |
| 375 | #define tcp_sk(ptr) ({ \ |
| 376 | typeof(ptr) _ptr = (ptr); \ |
| 377 | WARN_ON(_ptr->sk_protocol != IPPROTO_TCP); \ |
| 378 | container_of_const(_ptr, struct tcp_sock, inet_conn.icsk_inet.sk); \ |
| 379 | }) |
| 380 | #define mptcp_sk(ptr) ({ \ |
| 381 | typeof(ptr) _ptr = (ptr); \ |
| 382 | WARN_ON(_ptr->sk_protocol != IPPROTO_MPTCP); \ |
| 383 | container_of_const(_ptr, struct mptcp_sock, sk.icsk_inet.sk); \ |
| 384 | }) |
| 385 | |
| 386 | #else /* !CONFIG_DEBUG_NET */ |
| 387 | #define mptcp_sk(ptr) container_of_const(ptr, struct mptcp_sock, sk.icsk_inet.sk) |
| 388 | #endif |
| 389 | |
| 390 | static inline int mptcp_win_from_space(const struct sock *sk, int space) |
| 391 | { |
| 392 | return __tcp_win_from_space(mptcp_sk(sk)->scaling_ratio, space); |
| 393 | } |
| 394 | |
| 395 | static inline int mptcp_space_from_win(const struct sock *sk, int win) |
| 396 | { |
| 397 | return __tcp_space_from_win(mptcp_sk(sk)->scaling_ratio, win); |
| 398 | } |
| 399 | |
| 400 | static inline int __mptcp_space(const struct sock *sk) |
| 401 | { |
| 402 | return mptcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) - |
| 403 | sk_rmem_alloc_get(sk)); |
| 404 | } |
| 405 | |
| 406 | static inline struct mptcp_data_frag *mptcp_send_head(const struct sock *sk) |
| 407 | { |
| 408 | const struct mptcp_sock *msk = mptcp_sk(sk); |
| 409 | |
| 410 | return READ_ONCE(msk->first_pending); |
| 411 | } |
| 412 | |
| 413 | static inline struct mptcp_data_frag *mptcp_send_next(struct sock *sk) |
| 414 | { |
| 415 | struct mptcp_sock *msk = mptcp_sk(sk); |
| 416 | struct mptcp_data_frag *cur; |
| 417 | |
| 418 | cur = msk->first_pending; |
| 419 | return list_is_last(&cur->list, &msk->rtx_queue) ? NULL : |
| 420 | list_next_entry(cur, list); |
| 421 | } |
| 422 | |
| 423 | static inline struct mptcp_data_frag *mptcp_pending_tail(const struct sock *sk) |
| 424 | { |
| 425 | const struct mptcp_sock *msk = mptcp_sk(sk); |
| 426 | |
| 427 | if (!msk->first_pending) |
| 428 | return NULL; |
| 429 | |
| 430 | if (WARN_ON_ONCE(list_empty(&msk->rtx_queue))) |
| 431 | return NULL; |
| 432 | |
| 433 | return list_last_entry(&msk->rtx_queue, struct mptcp_data_frag, list); |
| 434 | } |
| 435 | |
| 436 | static inline struct mptcp_data_frag *mptcp_rtx_head(struct sock *sk) |
| 437 | { |
| 438 | struct mptcp_sock *msk = mptcp_sk(sk); |
| 439 | |
| 440 | if (msk->snd_una == msk->snd_nxt) |
| 441 | return NULL; |
| 442 | |
| 443 | return list_first_entry_or_null(&msk->rtx_queue, struct mptcp_data_frag, list); |
| 444 | } |
| 445 | |
| 446 | struct csum_pseudo_header { |
| 447 | __be64 data_seq; |
| 448 | __be32 subflow_seq; |
| 449 | __be16 data_len; |
| 450 | __sum16 csum; |
| 451 | }; |
| 452 | |
| 453 | struct mptcp_subflow_request_sock { |
| 454 | struct tcp_request_sock sk; |
| 455 | u16 mp_capable : 1, |
| 456 | mp_join : 1, |
| 457 | backup : 1, |
| 458 | request_bkup : 1, |
| 459 | csum_reqd : 1, |
| 460 | allow_join_id0 : 1; |
| 461 | u8 local_id; |
| 462 | u8 remote_id; |
| 463 | u64 local_key; |
| 464 | u64 idsn; |
| 465 | u32 token; |
| 466 | u32 ssn_offset; |
| 467 | u64 thmac; |
| 468 | u32 local_nonce; |
| 469 | u32 remote_nonce; |
| 470 | struct mptcp_sock *msk; |
| 471 | struct hlist_nulls_node token_node; |
| 472 | }; |
| 473 | |
| 474 | static inline struct mptcp_subflow_request_sock * |
| 475 | mptcp_subflow_rsk(const struct request_sock *rsk) |
| 476 | { |
| 477 | return (struct mptcp_subflow_request_sock *)rsk; |
| 478 | } |
| 479 | |
| 480 | struct mptcp_delegated_action { |
| 481 | struct napi_struct napi; |
| 482 | local_lock_t bh_lock; |
| 483 | struct list_head head; |
| 484 | }; |
| 485 | |
| 486 | DECLARE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions); |
| 487 | |
| 488 | #define MPTCP_DELEGATE_SCHEDULED 0 |
| 489 | #define MPTCP_DELEGATE_SEND 1 |
| 490 | #define MPTCP_DELEGATE_ACK 2 |
| 491 | #define MPTCP_DELEGATE_SNDBUF 3 |
| 492 | |
| 493 | #define MPTCP_DELEGATE_ACTIONS_MASK (~BIT(MPTCP_DELEGATE_SCHEDULED)) |
| 494 | /* MPTCP subflow context */ |
| 495 | struct mptcp_subflow_context { |
| 496 | struct list_head node;/* conn_list of subflows */ |
| 497 | |
| 498 | struct_group(reset, |
| 499 | |
| 500 | unsigned long avg_pacing_rate; /* protected by msk socket lock */ |
| 501 | u64 local_key; |
| 502 | u64 remote_key; |
| 503 | u64 idsn; |
| 504 | u64 map_seq; |
| 505 | u32 snd_isn; |
| 506 | u32 token; |
| 507 | u32 rel_write_seq; |
| 508 | u32 map_subflow_seq; |
| 509 | u32 ssn_offset; |
| 510 | u32 map_data_len; |
| 511 | __wsum map_data_csum; |
| 512 | u32 map_csum_len; |
| 513 | u32 request_mptcp : 1, /* send MP_CAPABLE */ |
| 514 | request_join : 1, /* send MP_JOIN */ |
| 515 | request_bkup : 1, |
| 516 | mp_capable : 1, /* remote is MPTCP capable */ |
| 517 | mp_join : 1, /* remote is JOINing */ |
| 518 | pm_notified : 1, /* PM hook called for established status */ |
| 519 | conn_finished : 1, |
| 520 | map_valid : 1, |
| 521 | map_csum_reqd : 1, |
| 522 | map_data_fin : 1, |
| 523 | mpc_map : 1, |
| 524 | backup : 1, |
| 525 | send_mp_prio : 1, |
| 526 | send_mp_fail : 1, |
| 527 | send_fastclose : 1, |
| 528 | send_infinite_map : 1, |
| 529 | remote_key_valid : 1, /* received the peer key from */ |
| 530 | disposable : 1, /* ctx can be free at ulp release time */ |
| 531 | stale : 1, /* unable to snd/rcv data, do not use for xmit */ |
| 532 | valid_csum_seen : 1, /* at least one csum validated */ |
| 533 | is_mptfo : 1, /* subflow is doing TFO */ |
| 534 | close_event_done : 1, /* has done the post-closed part */ |
| 535 | mpc_drop : 1, /* the MPC option has been dropped in a rtx */ |
| 536 | __unused : 9; |
| 537 | bool data_avail; |
| 538 | bool scheduled; |
| 539 | bool pm_listener; /* a listener managed by the kernel PM? */ |
| 540 | bool fully_established; /* path validated */ |
| 541 | u32 remote_nonce; |
| 542 | u64 thmac; |
| 543 | u32 local_nonce; |
| 544 | u32 remote_token; |
| 545 | union { |
| 546 | u8 hmac[MPTCPOPT_HMAC_LEN]; /* MPJ subflow only */ |
| 547 | u64 iasn; /* initial ack sequence number, MPC subflows only */ |
| 548 | }; |
| 549 | s16 local_id; /* if negative not initialized yet */ |
| 550 | u8 remote_id; |
| 551 | u8 reset_seen:1; |
| 552 | u8 reset_transient:1; |
| 553 | u8 reset_reason:4; |
| 554 | u8 stale_count; |
| 555 | |
| 556 | u32 subflow_id; |
| 557 | |
| 558 | long delegated_status; |
| 559 | unsigned long fail_tout; |
| 560 | |
| 561 | ); |
| 562 | |
| 563 | struct list_head delegated_node; /* link into delegated_action, protected by local BH */ |
| 564 | |
| 565 | u32 setsockopt_seq; |
| 566 | u32 stale_rcv_tstamp; |
| 567 | int cached_sndbuf; /* sndbuf size when last synced with the msk sndbuf, |
| 568 | * protected by the msk socket lock |
| 569 | */ |
| 570 | |
| 571 | struct sock *tcp_sock; /* tcp sk backpointer */ |
| 572 | struct sock *conn; /* parent mptcp_sock */ |
| 573 | const struct inet_connection_sock_af_ops *icsk_af_ops; |
| 574 | void (*tcp_state_change)(struct sock *sk); |
| 575 | void (*tcp_error_report)(struct sock *sk); |
| 576 | |
| 577 | struct rcu_head rcu; |
| 578 | }; |
| 579 | |
| 580 | static inline struct mptcp_subflow_context * |
| 581 | mptcp_subflow_ctx(const struct sock *sk) |
| 582 | { |
| 583 | const struct inet_connection_sock *icsk = inet_csk(sk); |
| 584 | |
| 585 | /* Use RCU on icsk_ulp_data only for sock diag code */ |
| 586 | return (__force struct mptcp_subflow_context *)icsk->icsk_ulp_data; |
| 587 | } |
| 588 | |
| 589 | static inline struct sock * |
| 590 | mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow) |
| 591 | { |
| 592 | return subflow->tcp_sock; |
| 593 | } |
| 594 | |
| 595 | static inline void |
| 596 | mptcp_subflow_ctx_reset(struct mptcp_subflow_context *subflow) |
| 597 | { |
| 598 | memset(&subflow->reset, 0, sizeof(subflow->reset)); |
| 599 | subflow->request_mptcp = 1; |
| 600 | WRITE_ONCE(subflow->local_id, -1); |
| 601 | } |
| 602 | |
| 603 | /* Convert reset reasons in MPTCP to enum sk_rst_reason type */ |
| 604 | static inline enum sk_rst_reason |
| 605 | sk_rst_convert_mptcp_reason(u32 reason) |
| 606 | { |
| 607 | switch (reason) { |
| 608 | case MPTCP_RST_EUNSPEC: |
| 609 | return SK_RST_REASON_MPTCP_RST_EUNSPEC; |
| 610 | case MPTCP_RST_EMPTCP: |
| 611 | return SK_RST_REASON_MPTCP_RST_EMPTCP; |
| 612 | case MPTCP_RST_ERESOURCE: |
| 613 | return SK_RST_REASON_MPTCP_RST_ERESOURCE; |
| 614 | case MPTCP_RST_EPROHIBIT: |
| 615 | return SK_RST_REASON_MPTCP_RST_EPROHIBIT; |
| 616 | case MPTCP_RST_EWQ2BIG: |
| 617 | return SK_RST_REASON_MPTCP_RST_EWQ2BIG; |
| 618 | case MPTCP_RST_EBADPERF: |
| 619 | return SK_RST_REASON_MPTCP_RST_EBADPERF; |
| 620 | case MPTCP_RST_EMIDDLEBOX: |
| 621 | return SK_RST_REASON_MPTCP_RST_EMIDDLEBOX; |
| 622 | default: |
| 623 | /* It should not happen, or else errors may occur |
| 624 | * in MPTCP layer |
| 625 | */ |
| 626 | return SK_RST_REASON_ERROR; |
| 627 | } |
| 628 | } |
| 629 | |
| 630 | static inline void |
| 631 | mptcp_send_active_reset_reason(struct sock *sk) |
| 632 | { |
| 633 | struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); |
| 634 | enum sk_rst_reason reason; |
| 635 | |
| 636 | reason = sk_rst_convert_mptcp_reason(subflow->reset_reason); |
| 637 | tcp_send_active_reset(sk, GFP_ATOMIC, reason); |
| 638 | } |
| 639 | |
| 640 | static inline u64 |
| 641 | mptcp_subflow_get_map_offset(const struct mptcp_subflow_context *subflow) |
| 642 | { |
| 643 | return tcp_sk(mptcp_subflow_tcp_sock(subflow))->copied_seq - |
| 644 | subflow->ssn_offset - |
| 645 | subflow->map_subflow_seq; |
| 646 | } |
| 647 | |
| 648 | static inline u64 |
| 649 | mptcp_subflow_get_mapped_dsn(const struct mptcp_subflow_context *subflow) |
| 650 | { |
| 651 | return subflow->map_seq + mptcp_subflow_get_map_offset(subflow); |
| 652 | } |
| 653 | |
| 654 | void mptcp_subflow_process_delegated(struct sock *ssk, long actions); |
| 655 | |
| 656 | static inline void mptcp_subflow_delegate(struct mptcp_subflow_context *subflow, int action) |
| 657 | { |
| 658 | long old, set_bits = BIT(MPTCP_DELEGATE_SCHEDULED) | BIT(action); |
| 659 | struct mptcp_delegated_action *delegated; |
| 660 | bool schedule; |
| 661 | |
| 662 | /* the caller held the subflow bh socket lock */ |
| 663 | lockdep_assert_in_softirq(); |
| 664 | |
| 665 | /* The implied barrier pairs with tcp_release_cb_override() |
| 666 | * mptcp_napi_poll(), and ensures the below list check sees list |
| 667 | * updates done prior to delegated status bits changes |
| 668 | */ |
| 669 | old = set_mask_bits(&subflow->delegated_status, 0, set_bits); |
| 670 | if (!(old & BIT(MPTCP_DELEGATE_SCHEDULED))) { |
| 671 | if (WARN_ON_ONCE(!list_empty(&subflow->delegated_node))) |
| 672 | return; |
| 673 | |
| 674 | local_lock_nested_bh(&mptcp_delegated_actions.bh_lock); |
| 675 | delegated = this_cpu_ptr(&mptcp_delegated_actions); |
| 676 | schedule = list_empty(&delegated->head); |
| 677 | list_add_tail(&subflow->delegated_node, &delegated->head); |
| 678 | local_unlock_nested_bh(&mptcp_delegated_actions.bh_lock); |
| 679 | sock_hold(mptcp_subflow_tcp_sock(subflow)); |
| 680 | if (schedule) |
| 681 | napi_schedule(&delegated->napi); |
| 682 | } |
| 683 | } |
| 684 | |
| 685 | static inline struct mptcp_subflow_context * |
| 686 | mptcp_subflow_delegated_next(struct mptcp_delegated_action *delegated) |
| 687 | { |
| 688 | struct mptcp_subflow_context *ret; |
| 689 | |
| 690 | local_lock_nested_bh(&mptcp_delegated_actions.bh_lock); |
| 691 | if (list_empty(&delegated->head)) { |
| 692 | local_unlock_nested_bh(&mptcp_delegated_actions.bh_lock); |
| 693 | return NULL; |
| 694 | } |
| 695 | |
| 696 | ret = list_first_entry(&delegated->head, struct mptcp_subflow_context, delegated_node); |
| 697 | list_del_init(&ret->delegated_node); |
| 698 | local_unlock_nested_bh(&mptcp_delegated_actions.bh_lock); |
| 699 | return ret; |
| 700 | } |
| 701 | |
| 702 | int mptcp_is_enabled(const struct net *net); |
| 703 | unsigned int mptcp_get_add_addr_timeout(const struct net *net); |
| 704 | int mptcp_is_checksum_enabled(const struct net *net); |
| 705 | int mptcp_allow_join_id0(const struct net *net); |
| 706 | unsigned int mptcp_stale_loss_cnt(const struct net *net); |
| 707 | unsigned int mptcp_close_timeout(const struct sock *sk); |
| 708 | int mptcp_get_pm_type(const struct net *net); |
| 709 | const char *mptcp_get_path_manager(const struct net *net); |
| 710 | const char *mptcp_get_scheduler(const struct net *net); |
| 711 | |
| 712 | void mptcp_active_disable(struct sock *sk); |
| 713 | bool mptcp_active_should_disable(struct sock *ssk); |
| 714 | void mptcp_active_enable(struct sock *sk); |
| 715 | |
| 716 | void mptcp_get_available_schedulers(char *buf, size_t maxlen); |
| 717 | void __mptcp_subflow_fully_established(struct mptcp_sock *msk, |
| 718 | struct mptcp_subflow_context *subflow, |
| 719 | const struct mptcp_options_received *mp_opt); |
| 720 | bool __mptcp_retransmit_pending_data(struct sock *sk); |
| 721 | void mptcp_check_and_set_pending(struct sock *sk); |
| 722 | void __mptcp_push_pending(struct sock *sk, unsigned int flags); |
| 723 | bool mptcp_subflow_data_available(struct sock *sk); |
| 724 | void __init mptcp_subflow_init(void); |
| 725 | void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how); |
| 726 | void mptcp_close_ssk(struct sock *sk, struct sock *ssk, |
| 727 | struct mptcp_subflow_context *subflow); |
| 728 | void __mptcp_subflow_send_ack(struct sock *ssk); |
| 729 | void mptcp_subflow_reset(struct sock *ssk); |
| 730 | void mptcp_subflow_queue_clean(struct sock *sk, struct sock *ssk); |
| 731 | void mptcp_sock_graft(struct sock *sk, struct socket *parent); |
| 732 | struct sock *__mptcp_nmpc_sk(struct mptcp_sock *msk); |
| 733 | bool __mptcp_close(struct sock *sk, long timeout); |
| 734 | void mptcp_cancel_work(struct sock *sk); |
| 735 | void __mptcp_unaccepted_force_close(struct sock *sk); |
| 736 | void mptcp_set_state(struct sock *sk, int state); |
| 737 | |
| 738 | bool mptcp_addresses_equal(const struct mptcp_addr_info *a, |
| 739 | const struct mptcp_addr_info *b, bool use_port); |
| 740 | void mptcp_local_address(const struct sock_common *skc, |
| 741 | struct mptcp_addr_info *addr); |
| 742 | void mptcp_remote_address(const struct sock_common *skc, |
| 743 | struct mptcp_addr_info *addr); |
| 744 | |
| 745 | /* called with sk socket lock held */ |
| 746 | int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_pm_local *local, |
| 747 | const struct mptcp_addr_info *remote); |
| 748 | int mptcp_subflow_create_socket(struct sock *sk, unsigned short family, |
| 749 | struct socket **new_sock); |
| 750 | void mptcp_info2sockaddr(const struct mptcp_addr_info *info, |
| 751 | struct sockaddr_storage *addr, |
| 752 | unsigned short family); |
| 753 | struct mptcp_sched_ops *mptcp_sched_find(const char *name); |
| 754 | int mptcp_validate_scheduler(struct mptcp_sched_ops *sched); |
| 755 | int mptcp_register_scheduler(struct mptcp_sched_ops *sched); |
| 756 | void mptcp_unregister_scheduler(struct mptcp_sched_ops *sched); |
| 757 | void mptcp_sched_init(void); |
| 758 | int mptcp_init_sched(struct mptcp_sock *msk, |
| 759 | struct mptcp_sched_ops *sched); |
| 760 | void mptcp_release_sched(struct mptcp_sock *msk); |
| 761 | void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow, |
| 762 | bool scheduled); |
| 763 | struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk); |
| 764 | struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk); |
| 765 | int mptcp_sched_get_send(struct mptcp_sock *msk); |
| 766 | int mptcp_sched_get_retrans(struct mptcp_sock *msk); |
| 767 | |
| 768 | static inline u64 mptcp_data_avail(const struct mptcp_sock *msk) |
| 769 | { |
| 770 | return READ_ONCE(msk->bytes_received) - READ_ONCE(msk->bytes_consumed); |
| 771 | } |
| 772 | |
| 773 | static inline bool mptcp_epollin_ready(const struct sock *sk) |
| 774 | { |
| 775 | u64 data_avail = mptcp_data_avail(mptcp_sk(sk)); |
| 776 | |
| 777 | if (!data_avail) |
| 778 | return false; |
| 779 | |
| 780 | /* mptcp doesn't have to deal with small skbs in the receive queue, |
| 781 | * as it can always coalesce them |
| 782 | */ |
| 783 | return (data_avail >= sk->sk_rcvlowat) || |
| 784 | (mem_cgroup_sockets_enabled && sk->sk_memcg && |
| 785 | mem_cgroup_under_socket_pressure(sk->sk_memcg)) || |
| 786 | READ_ONCE(tcp_memory_pressure); |
| 787 | } |
| 788 | |
| 789 | int mptcp_set_rcvlowat(struct sock *sk, int val); |
| 790 | |
| 791 | static inline bool __tcp_can_send(const struct sock *ssk) |
| 792 | { |
| 793 | /* only send if our side has not closed yet */ |
| 794 | return ((1 << inet_sk_state_load(ssk)) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)); |
| 795 | } |
| 796 | |
| 797 | static inline bool __mptcp_subflow_active(struct mptcp_subflow_context *subflow) |
| 798 | { |
| 799 | /* can't send if JOIN hasn't completed yet (i.e. is usable for mptcp) */ |
| 800 | if (subflow->request_join && !READ_ONCE(subflow->fully_established)) |
| 801 | return false; |
| 802 | |
| 803 | return __tcp_can_send(mptcp_subflow_tcp_sock(subflow)); |
| 804 | } |
| 805 | |
| 806 | void mptcp_subflow_set_active(struct mptcp_subflow_context *subflow); |
| 807 | |
| 808 | bool mptcp_subflow_active(struct mptcp_subflow_context *subflow); |
| 809 | |
| 810 | void mptcp_subflow_drop_ctx(struct sock *ssk); |
| 811 | |
| 812 | static inline void mptcp_subflow_tcp_fallback(struct sock *sk, |
| 813 | struct mptcp_subflow_context *ctx) |
| 814 | { |
| 815 | sk->sk_data_ready = sock_def_readable; |
| 816 | sk->sk_state_change = ctx->tcp_state_change; |
| 817 | sk->sk_write_space = sk_stream_write_space; |
| 818 | sk->sk_error_report = ctx->tcp_error_report; |
| 819 | |
| 820 | inet_csk(sk)->icsk_af_ops = ctx->icsk_af_ops; |
| 821 | } |
| 822 | |
| 823 | void __init mptcp_proto_init(void); |
| 824 | #if IS_ENABLED(CONFIG_MPTCP_IPV6) |
| 825 | int __init mptcp_proto_v6_init(void); |
| 826 | #endif |
| 827 | |
| 828 | struct sock *mptcp_sk_clone_init(const struct sock *sk, |
| 829 | const struct mptcp_options_received *mp_opt, |
| 830 | struct sock *ssk, |
| 831 | struct request_sock *req); |
| 832 | void mptcp_get_options(const struct sk_buff *skb, |
| 833 | struct mptcp_options_received *mp_opt); |
| 834 | |
| 835 | void mptcp_finish_connect(struct sock *sk); |
| 836 | void __mptcp_sync_state(struct sock *sk, int state); |
| 837 | void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout); |
| 838 | |
| 839 | static inline void mptcp_stop_tout_timer(struct sock *sk) |
| 840 | { |
| 841 | if (!inet_csk(sk)->icsk_mtup.probe_timestamp) |
| 842 | return; |
| 843 | |
| 844 | sk_stop_timer(sk, &sk->sk_timer); |
| 845 | inet_csk(sk)->icsk_mtup.probe_timestamp = 0; |
| 846 | } |
| 847 | |
| 848 | static inline void mptcp_set_close_tout(struct sock *sk, unsigned long tout) |
| 849 | { |
| 850 | /* avoid 0 timestamp, as that means no close timeout */ |
| 851 | inet_csk(sk)->icsk_mtup.probe_timestamp = tout ? : 1; |
| 852 | } |
| 853 | |
| 854 | static inline void mptcp_start_tout_timer(struct sock *sk) |
| 855 | { |
| 856 | mptcp_set_close_tout(sk, tcp_jiffies32); |
| 857 | mptcp_reset_tout_timer(mptcp_sk(sk), 0); |
| 858 | } |
| 859 | |
| 860 | static inline bool mptcp_is_fully_established(struct sock *sk) |
| 861 | { |
| 862 | return inet_sk_state_load(sk) == TCP_ESTABLISHED && |
| 863 | READ_ONCE(mptcp_sk(sk)->fully_established); |
| 864 | } |
| 865 | |
| 866 | void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk); |
| 867 | void mptcp_data_ready(struct sock *sk, struct sock *ssk); |
| 868 | bool mptcp_finish_join(struct sock *sk); |
| 869 | bool mptcp_schedule_work(struct sock *sk); |
| 870 | int mptcp_setsockopt(struct sock *sk, int level, int optname, |
| 871 | sockptr_t optval, unsigned int optlen); |
| 872 | int mptcp_getsockopt(struct sock *sk, int level, int optname, |
| 873 | char __user *optval, int __user *option); |
| 874 | |
| 875 | u64 __mptcp_expand_seq(u64 old_seq, u64 cur_seq); |
| 876 | static inline u64 mptcp_expand_seq(u64 old_seq, u64 cur_seq, bool use_64bit) |
| 877 | { |
| 878 | if (use_64bit) |
| 879 | return cur_seq; |
| 880 | |
| 881 | return __mptcp_expand_seq(old_seq, cur_seq); |
| 882 | } |
| 883 | void __mptcp_check_push(struct sock *sk, struct sock *ssk); |
| 884 | void __mptcp_data_acked(struct sock *sk); |
| 885 | void __mptcp_error_report(struct sock *sk); |
| 886 | bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit); |
| 887 | static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk) |
| 888 | { |
| 889 | return READ_ONCE(msk->snd_data_fin_enable) && |
| 890 | READ_ONCE(msk->write_seq) == READ_ONCE(msk->snd_nxt); |
| 891 | } |
| 892 | |
| 893 | static inline u32 mptcp_notsent_lowat(const struct sock *sk) |
| 894 | { |
| 895 | struct net *net = sock_net(sk); |
| 896 | u32 val; |
| 897 | |
| 898 | val = READ_ONCE(mptcp_sk(sk)->notsent_lowat); |
| 899 | return val ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat); |
| 900 | } |
| 901 | |
| 902 | static inline bool mptcp_stream_memory_free(const struct sock *sk, int wake) |
| 903 | { |
| 904 | const struct mptcp_sock *msk = mptcp_sk(sk); |
| 905 | u32 notsent_bytes; |
| 906 | |
| 907 | notsent_bytes = READ_ONCE(msk->write_seq) - READ_ONCE(msk->snd_nxt); |
| 908 | return (notsent_bytes << wake) < mptcp_notsent_lowat(sk); |
| 909 | } |
| 910 | |
| 911 | static inline bool __mptcp_stream_is_writeable(const struct sock *sk, int wake) |
| 912 | { |
| 913 | return mptcp_stream_memory_free(sk, wake) && |
| 914 | __sk_stream_is_writeable(sk, wake); |
| 915 | } |
| 916 | |
| 917 | static inline void mptcp_write_space(struct sock *sk) |
| 918 | { |
| 919 | /* pairs with memory barrier in mptcp_poll */ |
| 920 | smp_mb(); |
| 921 | if (mptcp_stream_memory_free(sk, 1)) |
| 922 | sk_stream_write_space(sk); |
| 923 | } |
| 924 | |
| 925 | static inline void __mptcp_sync_sndbuf(struct sock *sk) |
| 926 | { |
| 927 | struct mptcp_subflow_context *subflow; |
| 928 | int ssk_sndbuf, new_sndbuf; |
| 929 | |
| 930 | if (sk->sk_userlocks & SOCK_SNDBUF_LOCK) |
| 931 | return; |
| 932 | |
| 933 | new_sndbuf = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[0]); |
| 934 | mptcp_for_each_subflow(mptcp_sk(sk), subflow) { |
| 935 | ssk_sndbuf = READ_ONCE(mptcp_subflow_tcp_sock(subflow)->sk_sndbuf); |
| 936 | |
| 937 | subflow->cached_sndbuf = ssk_sndbuf; |
| 938 | new_sndbuf += ssk_sndbuf; |
| 939 | } |
| 940 | |
| 941 | /* the msk max wmem limit is <nr_subflows> * tcp wmem[2] */ |
| 942 | WRITE_ONCE(sk->sk_sndbuf, new_sndbuf); |
| 943 | mptcp_write_space(sk); |
| 944 | } |
| 945 | |
| 946 | /* The called held both the msk socket and the subflow socket locks, |
| 947 | * possibly under BH |
| 948 | */ |
| 949 | static inline void __mptcp_propagate_sndbuf(struct sock *sk, struct sock *ssk) |
| 950 | { |
| 951 | struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); |
| 952 | |
| 953 | if (READ_ONCE(ssk->sk_sndbuf) != subflow->cached_sndbuf) |
| 954 | __mptcp_sync_sndbuf(sk); |
| 955 | } |
| 956 | |
| 957 | /* the caller held only the subflow socket lock, either in process or |
| 958 | * BH context. Additionally this can be called under the msk data lock, |
| 959 | * so we can't acquire such lock here: let the delegate action acquires |
| 960 | * the needed locks in suitable order. |
| 961 | */ |
| 962 | static inline void mptcp_propagate_sndbuf(struct sock *sk, struct sock *ssk) |
| 963 | { |
| 964 | struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); |
| 965 | |
| 966 | if (likely(READ_ONCE(ssk->sk_sndbuf) == subflow->cached_sndbuf)) |
| 967 | return; |
| 968 | |
| 969 | local_bh_disable(); |
| 970 | mptcp_subflow_delegate(subflow, MPTCP_DELEGATE_SNDBUF); |
| 971 | local_bh_enable(); |
| 972 | } |
| 973 | |
| 974 | void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags); |
| 975 | |
| 976 | #define MPTCP_TOKEN_MAX_RETRIES 4 |
| 977 | |
| 978 | void __init mptcp_token_init(void); |
| 979 | static inline void mptcp_token_init_request(struct request_sock *req) |
| 980 | { |
| 981 | mptcp_subflow_rsk(req)->token_node.pprev = NULL; |
| 982 | } |
| 983 | |
| 984 | int mptcp_token_new_request(struct request_sock *req); |
| 985 | void mptcp_token_destroy_request(struct request_sock *req); |
| 986 | int mptcp_token_new_connect(struct sock *ssk); |
| 987 | void mptcp_token_accept(struct mptcp_subflow_request_sock *r, |
| 988 | struct mptcp_sock *msk); |
| 989 | bool mptcp_token_exists(u32 token); |
| 990 | struct mptcp_sock *mptcp_token_get_sock(struct net *net, u32 token); |
| 991 | struct mptcp_sock *mptcp_token_iter_next(const struct net *net, long *s_slot, |
| 992 | long *s_num); |
| 993 | void mptcp_token_destroy(struct mptcp_sock *msk); |
| 994 | |
| 995 | void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn); |
| 996 | |
| 997 | void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac); |
| 998 | __sum16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum); |
| 999 | |
| 1000 | void __init mptcp_pm_init(void); |
| 1001 | void mptcp_pm_data_init(struct mptcp_sock *msk); |
| 1002 | void mptcp_pm_data_reset(struct mptcp_sock *msk); |
| 1003 | void mptcp_pm_destroy(struct mptcp_sock *msk); |
| 1004 | int mptcp_pm_parse_addr(struct nlattr *attr, struct genl_info *info, |
| 1005 | struct mptcp_addr_info *addr); |
| 1006 | int mptcp_pm_parse_entry(struct nlattr *attr, struct genl_info *info, |
| 1007 | bool require_family, |
| 1008 | struct mptcp_pm_addr_entry *entry); |
| 1009 | bool mptcp_pm_addr_families_match(const struct sock *sk, |
| 1010 | const struct mptcp_addr_info *loc, |
| 1011 | const struct mptcp_addr_info *rem); |
| 1012 | void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk); |
| 1013 | void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side); |
| 1014 | void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk); |
| 1015 | bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk); |
| 1016 | void mptcp_pm_connection_closed(struct mptcp_sock *msk); |
| 1017 | void mptcp_pm_subflow_established(struct mptcp_sock *msk); |
| 1018 | bool mptcp_pm_nl_check_work_pending(struct mptcp_sock *msk); |
| 1019 | void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, |
| 1020 | const struct mptcp_subflow_context *subflow); |
| 1021 | void mptcp_pm_add_addr_received(const struct sock *ssk, |
| 1022 | const struct mptcp_addr_info *addr); |
| 1023 | void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk, |
| 1024 | const struct mptcp_addr_info *addr); |
| 1025 | void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk); |
| 1026 | void mptcp_pm_send_ack(struct mptcp_sock *msk, |
| 1027 | struct mptcp_subflow_context *subflow, |
| 1028 | bool prio, bool backup); |
| 1029 | void mptcp_pm_addr_send_ack(struct mptcp_sock *msk); |
| 1030 | void mptcp_pm_nl_rm_addr(struct mptcp_sock *msk, u8 rm_id); |
| 1031 | void mptcp_pm_rm_subflow(struct mptcp_sock *msk, |
| 1032 | const struct mptcp_rm_list *rm_list); |
| 1033 | void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, |
| 1034 | const struct mptcp_rm_list *rm_list); |
| 1035 | void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup); |
| 1036 | void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq); |
| 1037 | int mptcp_pm_mp_prio_send_ack(struct mptcp_sock *msk, |
| 1038 | struct mptcp_addr_info *addr, |
| 1039 | struct mptcp_addr_info *rem, |
| 1040 | u8 bkup); |
| 1041 | bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, |
| 1042 | const struct mptcp_addr_info *addr); |
| 1043 | bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk); |
| 1044 | struct mptcp_pm_add_entry * |
| 1045 | mptcp_pm_del_add_timer(struct mptcp_sock *msk, |
| 1046 | const struct mptcp_addr_info *addr, bool check_id); |
| 1047 | bool mptcp_lookup_subflow_by_saddr(const struct list_head *list, |
| 1048 | const struct mptcp_addr_info *saddr); |
| 1049 | bool mptcp_remove_anno_list_by_saddr(struct mptcp_sock *msk, |
| 1050 | const struct mptcp_addr_info *addr); |
| 1051 | int mptcp_pm_nl_set_flags(struct mptcp_pm_addr_entry *local, |
| 1052 | struct genl_info *info); |
| 1053 | int mptcp_userspace_pm_set_flags(struct mptcp_pm_addr_entry *local, |
| 1054 | struct genl_info *info); |
| 1055 | int mptcp_pm_announce_addr(struct mptcp_sock *msk, |
| 1056 | const struct mptcp_addr_info *addr, |
| 1057 | bool echo); |
| 1058 | int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); |
| 1059 | void mptcp_pm_remove_addr_entry(struct mptcp_sock *msk, |
| 1060 | struct mptcp_pm_addr_entry *entry); |
| 1061 | |
| 1062 | /* the default path manager, used in mptcp_pm_unregister */ |
| 1063 | extern struct mptcp_pm_ops mptcp_pm_kernel; |
| 1064 | |
| 1065 | struct mptcp_pm_ops *mptcp_pm_find(const char *name); |
| 1066 | int mptcp_pm_register(struct mptcp_pm_ops *pm_ops); |
| 1067 | void mptcp_pm_unregister(struct mptcp_pm_ops *pm_ops); |
| 1068 | int mptcp_pm_validate(struct mptcp_pm_ops *pm_ops); |
| 1069 | void mptcp_pm_get_available(char *buf, size_t maxlen); |
| 1070 | |
| 1071 | void mptcp_userspace_pm_free_local_addr_list(struct mptcp_sock *msk); |
| 1072 | |
| 1073 | void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk, |
| 1074 | const struct sock *ssk, gfp_t gfp); |
| 1075 | void mptcp_event_addr_announced(const struct sock *ssk, const struct mptcp_addr_info *info); |
| 1076 | void mptcp_event_addr_removed(const struct mptcp_sock *msk, u8 id); |
| 1077 | void mptcp_event_pm_listener(const struct sock *ssk, |
| 1078 | enum mptcp_event_type event); |
| 1079 | bool mptcp_userspace_pm_active(const struct mptcp_sock *msk); |
| 1080 | |
| 1081 | void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subflow, |
| 1082 | struct request_sock *req); |
| 1083 | int mptcp_pm_genl_fill_addr(struct sk_buff *msg, |
| 1084 | struct netlink_callback *cb, |
| 1085 | struct mptcp_pm_addr_entry *entry); |
| 1086 | |
| 1087 | static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk) |
| 1088 | { |
| 1089 | return READ_ONCE(msk->pm.addr_signal) & |
| 1090 | (BIT(MPTCP_ADD_ADDR_SIGNAL) | BIT(MPTCP_ADD_ADDR_ECHO)); |
| 1091 | } |
| 1092 | |
| 1093 | static inline bool mptcp_pm_should_add_signal_addr(struct mptcp_sock *msk) |
| 1094 | { |
| 1095 | return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_SIGNAL); |
| 1096 | } |
| 1097 | |
| 1098 | static inline bool mptcp_pm_should_add_signal_echo(struct mptcp_sock *msk) |
| 1099 | { |
| 1100 | return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_ECHO); |
| 1101 | } |
| 1102 | |
| 1103 | static inline bool mptcp_pm_should_rm_signal(struct mptcp_sock *msk) |
| 1104 | { |
| 1105 | return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_RM_ADDR_SIGNAL); |
| 1106 | } |
| 1107 | |
| 1108 | static inline bool mptcp_pm_is_userspace(const struct mptcp_sock *msk) |
| 1109 | { |
| 1110 | return READ_ONCE(msk->pm.pm_type) == MPTCP_PM_TYPE_USERSPACE; |
| 1111 | } |
| 1112 | |
| 1113 | static inline bool mptcp_pm_is_kernel(const struct mptcp_sock *msk) |
| 1114 | { |
| 1115 | return READ_ONCE(msk->pm.pm_type) == MPTCP_PM_TYPE_KERNEL; |
| 1116 | } |
| 1117 | |
| 1118 | static inline unsigned int mptcp_add_addr_len(int family, bool echo, bool port) |
| 1119 | { |
| 1120 | u8 len = TCPOLEN_MPTCP_ADD_ADDR_BASE; |
| 1121 | |
| 1122 | if (family == AF_INET6) |
| 1123 | len = TCPOLEN_MPTCP_ADD_ADDR6_BASE; |
| 1124 | if (!echo) |
| 1125 | len += MPTCPOPT_THMAC_LEN; |
| 1126 | /* account for 2 trailing 'nop' options */ |
| 1127 | if (port) |
| 1128 | len += TCPOLEN_MPTCP_PORT_LEN + TCPOLEN_MPTCP_PORT_ALIGN; |
| 1129 | |
| 1130 | return len; |
| 1131 | } |
| 1132 | |
| 1133 | static inline int mptcp_rm_addr_len(const struct mptcp_rm_list *rm_list) |
| 1134 | { |
| 1135 | if (rm_list->nr == 0 || rm_list->nr > MPTCP_RM_IDS_MAX) |
| 1136 | return -EINVAL; |
| 1137 | |
| 1138 | return TCPOLEN_MPTCP_RM_ADDR_BASE + roundup(rm_list->nr - 1, 4) + 1; |
| 1139 | } |
| 1140 | |
| 1141 | bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const struct sk_buff *skb, |
| 1142 | unsigned int opt_size, unsigned int remaining, |
| 1143 | struct mptcp_addr_info *addr, bool *echo, |
| 1144 | bool *drop_other_suboptions); |
| 1145 | bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, |
| 1146 | struct mptcp_rm_list *rm_list); |
| 1147 | int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); |
| 1148 | int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, |
| 1149 | struct mptcp_pm_addr_entry *skc); |
| 1150 | int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, |
| 1151 | struct mptcp_pm_addr_entry *skc); |
| 1152 | bool mptcp_pm_is_backup(struct mptcp_sock *msk, struct sock_common *skc); |
| 1153 | bool mptcp_pm_nl_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc); |
| 1154 | bool mptcp_userspace_pm_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc); |
| 1155 | int mptcp_pm_nl_dump_addr(struct sk_buff *msg, |
| 1156 | struct netlink_callback *cb); |
| 1157 | int mptcp_userspace_pm_dump_addr(struct sk_buff *msg, |
| 1158 | struct netlink_callback *cb); |
| 1159 | int mptcp_pm_nl_get_addr(u8 id, struct mptcp_pm_addr_entry *addr, |
| 1160 | struct genl_info *info); |
| 1161 | int mptcp_userspace_pm_get_addr(u8 id, struct mptcp_pm_addr_entry *addr, |
| 1162 | struct genl_info *info); |
| 1163 | |
| 1164 | static inline u8 subflow_get_local_id(const struct mptcp_subflow_context *subflow) |
| 1165 | { |
| 1166 | int local_id = READ_ONCE(subflow->local_id); |
| 1167 | |
| 1168 | if (local_id < 0) |
| 1169 | return 0; |
| 1170 | return local_id; |
| 1171 | } |
| 1172 | |
| 1173 | void __init mptcp_pm_kernel_register(void); |
| 1174 | void __init mptcp_pm_userspace_register(void); |
| 1175 | void __init mptcp_pm_nl_init(void); |
| 1176 | void mptcp_pm_worker(struct mptcp_sock *msk); |
| 1177 | void __mptcp_pm_kernel_worker(struct mptcp_sock *msk); |
| 1178 | unsigned int mptcp_pm_get_add_addr_signal_max(const struct mptcp_sock *msk); |
| 1179 | unsigned int mptcp_pm_get_add_addr_accept_max(const struct mptcp_sock *msk); |
| 1180 | unsigned int mptcp_pm_get_subflows_max(const struct mptcp_sock *msk); |
| 1181 | unsigned int mptcp_pm_get_local_addr_max(const struct mptcp_sock *msk); |
| 1182 | |
| 1183 | /* called under PM lock */ |
| 1184 | static inline void __mptcp_pm_close_subflow(struct mptcp_sock *msk) |
| 1185 | { |
| 1186 | if (--msk->pm.subflows < mptcp_pm_get_subflows_max(msk)) |
| 1187 | WRITE_ONCE(msk->pm.accept_subflow, true); |
| 1188 | } |
| 1189 | |
| 1190 | static inline void mptcp_pm_close_subflow(struct mptcp_sock *msk) |
| 1191 | { |
| 1192 | spin_lock_bh(&msk->pm.lock); |
| 1193 | __mptcp_pm_close_subflow(msk); |
| 1194 | spin_unlock_bh(&msk->pm.lock); |
| 1195 | } |
| 1196 | |
| 1197 | void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk); |
| 1198 | |
| 1199 | static inline struct mptcp_ext *mptcp_get_ext(const struct sk_buff *skb) |
| 1200 | { |
| 1201 | return (struct mptcp_ext *)skb_ext_find(skb, SKB_EXT_MPTCP); |
| 1202 | } |
| 1203 | |
| 1204 | void mptcp_diag_subflow_init(struct tcp_ulp_ops *ops); |
| 1205 | |
| 1206 | static inline bool __mptcp_check_fallback(const struct mptcp_sock *msk) |
| 1207 | { |
| 1208 | return test_bit(MPTCP_FALLBACK_DONE, &msk->flags); |
| 1209 | } |
| 1210 | |
| 1211 | static inline bool mptcp_check_fallback(const struct sock *sk) |
| 1212 | { |
| 1213 | struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); |
| 1214 | struct mptcp_sock *msk = mptcp_sk(subflow->conn); |
| 1215 | |
| 1216 | return __mptcp_check_fallback(msk); |
| 1217 | } |
| 1218 | |
| 1219 | static inline void __mptcp_do_fallback(struct mptcp_sock *msk) |
| 1220 | { |
| 1221 | if (__mptcp_check_fallback(msk)) { |
| 1222 | pr_debug("TCP fallback already done (msk=%p)\n", msk); |
| 1223 | return; |
| 1224 | } |
| 1225 | if (WARN_ON_ONCE(!READ_ONCE(msk->allow_infinite_fallback))) |
| 1226 | return; |
| 1227 | set_bit(MPTCP_FALLBACK_DONE, &msk->flags); |
| 1228 | } |
| 1229 | |
| 1230 | static inline bool __mptcp_has_initial_subflow(const struct mptcp_sock *msk) |
| 1231 | { |
| 1232 | struct sock *ssk = READ_ONCE(msk->first); |
| 1233 | |
| 1234 | return ssk && ((1 << inet_sk_state_load(ssk)) & |
| 1235 | (TCPF_ESTABLISHED | TCPF_SYN_SENT | |
| 1236 | TCPF_SYN_RECV | TCPF_LISTEN)); |
| 1237 | } |
| 1238 | |
| 1239 | static inline void mptcp_do_fallback(struct sock *ssk) |
| 1240 | { |
| 1241 | struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); |
| 1242 | struct sock *sk = subflow->conn; |
| 1243 | struct mptcp_sock *msk; |
| 1244 | |
| 1245 | msk = mptcp_sk(sk); |
| 1246 | __mptcp_do_fallback(msk); |
| 1247 | if (READ_ONCE(msk->snd_data_fin_enable) && !(ssk->sk_shutdown & SEND_SHUTDOWN)) { |
| 1248 | gfp_t saved_allocation = ssk->sk_allocation; |
| 1249 | |
| 1250 | /* we are in a atomic (BH) scope, override ssk default for data |
| 1251 | * fin allocation |
| 1252 | */ |
| 1253 | ssk->sk_allocation = GFP_ATOMIC; |
| 1254 | ssk->sk_shutdown |= SEND_SHUTDOWN; |
| 1255 | tcp_shutdown(ssk, SEND_SHUTDOWN); |
| 1256 | ssk->sk_allocation = saved_allocation; |
| 1257 | } |
| 1258 | } |
| 1259 | |
| 1260 | #define pr_fallback(a) pr_debug("%s:fallback to TCP (msk=%p)\n", __func__, a) |
| 1261 | |
| 1262 | static inline void mptcp_subflow_early_fallback(struct mptcp_sock *msk, |
| 1263 | struct mptcp_subflow_context *subflow) |
| 1264 | { |
| 1265 | pr_fallback(msk); |
| 1266 | subflow->request_mptcp = 0; |
| 1267 | __mptcp_do_fallback(msk); |
| 1268 | } |
| 1269 | |
| 1270 | static inline bool mptcp_check_infinite_map(struct sk_buff *skb) |
| 1271 | { |
| 1272 | struct mptcp_ext *mpext; |
| 1273 | |
| 1274 | mpext = skb ? mptcp_get_ext(skb) : NULL; |
| 1275 | if (mpext && mpext->infinite_map) |
| 1276 | return true; |
| 1277 | |
| 1278 | return false; |
| 1279 | } |
| 1280 | |
| 1281 | static inline bool is_active_ssk(struct mptcp_subflow_context *subflow) |
| 1282 | { |
| 1283 | return (subflow->request_mptcp || subflow->request_join); |
| 1284 | } |
| 1285 | |
| 1286 | static inline bool subflow_simultaneous_connect(struct sock *sk) |
| 1287 | { |
| 1288 | struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); |
| 1289 | |
| 1290 | return (1 << sk->sk_state) & |
| 1291 | (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 | TCPF_CLOSING) && |
| 1292 | is_active_ssk(subflow) && |
| 1293 | !subflow->conn_finished; |
| 1294 | } |
| 1295 | |
| 1296 | #ifdef CONFIG_SYN_COOKIES |
| 1297 | void subflow_init_req_cookie_join_save(const struct mptcp_subflow_request_sock *subflow_req, |
| 1298 | struct sk_buff *skb); |
| 1299 | bool mptcp_token_join_cookie_init_state(struct mptcp_subflow_request_sock *subflow_req, |
| 1300 | struct sk_buff *skb); |
| 1301 | void __init mptcp_join_cookie_init(void); |
| 1302 | #else |
| 1303 | static inline void |
| 1304 | subflow_init_req_cookie_join_save(const struct mptcp_subflow_request_sock *subflow_req, |
| 1305 | struct sk_buff *skb) {} |
| 1306 | static inline bool |
| 1307 | mptcp_token_join_cookie_init_state(struct mptcp_subflow_request_sock *subflow_req, |
| 1308 | struct sk_buff *skb) |
| 1309 | { |
| 1310 | return false; |
| 1311 | } |
| 1312 | |
| 1313 | static inline void mptcp_join_cookie_init(void) {} |
| 1314 | #endif |
| 1315 | |
| 1316 | #endif /* __MPTCP_PROTOCOL_H */ |