| 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | /* |
| 3 | * Shared Memory Communications over RDMA (SMC-R) and RoCE |
| 4 | * |
| 5 | * Socket Closing - normal and abnormal |
| 6 | * |
| 7 | * Copyright IBM Corp. 2016 |
| 8 | * |
| 9 | * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> |
| 10 | */ |
| 11 | |
| 12 | #include <linux/workqueue.h> |
| 13 | #include <linux/sched/signal.h> |
| 14 | |
| 15 | #include <net/sock.h> |
| 16 | #include <net/tcp.h> |
| 17 | |
| 18 | #include "smc.h" |
| 19 | #include "smc_tx.h" |
| 20 | #include "smc_cdc.h" |
| 21 | #include "smc_close.h" |
| 22 | |
| 23 | /* release the clcsock that is assigned to the smc_sock */ |
| 24 | void smc_clcsock_release(struct smc_sock *smc) |
| 25 | { |
| 26 | struct socket *tcp; |
| 27 | |
| 28 | if (smc->listen_smc && current_work() != &smc->smc_listen_work) |
| 29 | cancel_work_sync(&smc->smc_listen_work); |
| 30 | mutex_lock(&smc->clcsock_release_lock); |
| 31 | if (smc->clcsock) { |
| 32 | tcp = smc->clcsock; |
| 33 | smc->clcsock = NULL; |
| 34 | sock_release(tcp); |
| 35 | } |
| 36 | mutex_unlock(&smc->clcsock_release_lock); |
| 37 | } |
| 38 | |
| 39 | static void smc_close_cleanup_listen(struct sock *parent) |
| 40 | { |
| 41 | struct sock *sk; |
| 42 | |
| 43 | /* Close non-accepted connections */ |
| 44 | while ((sk = smc_accept_dequeue(parent, NULL))) |
| 45 | smc_close_non_accepted(sk); |
| 46 | } |
| 47 | |
| 48 | /* wait for sndbuf data being transmitted */ |
| 49 | static void smc_close_stream_wait(struct smc_sock *smc, long timeout) |
| 50 | { |
| 51 | DEFINE_WAIT_FUNC(wait, woken_wake_function); |
| 52 | struct sock *sk = &smc->sk; |
| 53 | |
| 54 | if (!timeout) |
| 55 | return; |
| 56 | |
| 57 | if (!smc_tx_prepared_sends(&smc->conn)) |
| 58 | return; |
| 59 | |
| 60 | /* Send out corked data remaining in sndbuf */ |
| 61 | smc_tx_pending(&smc->conn); |
| 62 | |
| 63 | smc->wait_close_tx_prepared = 1; |
| 64 | add_wait_queue(sk_sleep(sk), &wait); |
| 65 | while (!signal_pending(current) && timeout) { |
| 66 | int rc; |
| 67 | |
| 68 | rc = sk_wait_event(sk, &timeout, |
| 69 | !smc_tx_prepared_sends(&smc->conn) || |
| 70 | READ_ONCE(sk->sk_err) == ECONNABORTED || |
| 71 | READ_ONCE(sk->sk_err) == ECONNRESET || |
| 72 | smc->conn.killed, |
| 73 | &wait); |
| 74 | if (rc) |
| 75 | break; |
| 76 | } |
| 77 | remove_wait_queue(sk_sleep(sk), &wait); |
| 78 | smc->wait_close_tx_prepared = 0; |
| 79 | } |
| 80 | |
| 81 | void smc_close_wake_tx_prepared(struct smc_sock *smc) |
| 82 | { |
| 83 | if (smc->wait_close_tx_prepared) |
| 84 | /* wake up socket closing */ |
| 85 | smc->sk.sk_state_change(&smc->sk); |
| 86 | } |
| 87 | |
| 88 | static int smc_close_wr(struct smc_connection *conn) |
| 89 | { |
| 90 | conn->local_tx_ctrl.conn_state_flags.peer_done_writing = 1; |
| 91 | |
| 92 | return smc_cdc_get_slot_and_msg_send(conn); |
| 93 | } |
| 94 | |
| 95 | static int smc_close_final(struct smc_connection *conn) |
| 96 | { |
| 97 | if (atomic_read(&conn->bytes_to_rcv)) |
| 98 | conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; |
| 99 | else |
| 100 | conn->local_tx_ctrl.conn_state_flags.peer_conn_closed = 1; |
| 101 | if (conn->killed) |
| 102 | return -EPIPE; |
| 103 | |
| 104 | return smc_cdc_get_slot_and_msg_send(conn); |
| 105 | } |
| 106 | |
| 107 | int smc_close_abort(struct smc_connection *conn) |
| 108 | { |
| 109 | conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; |
| 110 | |
| 111 | return smc_cdc_get_slot_and_msg_send(conn); |
| 112 | } |
| 113 | |
| 114 | static void smc_close_cancel_work(struct smc_sock *smc) |
| 115 | { |
| 116 | struct sock *sk = &smc->sk; |
| 117 | |
| 118 | release_sock(sk); |
| 119 | if (cancel_work_sync(&smc->conn.close_work)) |
| 120 | sock_put(sk); |
| 121 | cancel_delayed_work_sync(&smc->conn.tx_work); |
| 122 | lock_sock(sk); |
| 123 | } |
| 124 | |
| 125 | /* terminate smc socket abnormally - active abort |
| 126 | * link group is terminated, i.e. RDMA communication no longer possible |
| 127 | */ |
| 128 | void smc_close_active_abort(struct smc_sock *smc) |
| 129 | { |
| 130 | struct sock *sk = &smc->sk; |
| 131 | bool release_clcsock = false; |
| 132 | |
| 133 | if (sk->sk_state != SMC_INIT && smc->clcsock && smc->clcsock->sk) { |
| 134 | sk->sk_err = ECONNABORTED; |
| 135 | if (smc->clcsock && smc->clcsock->sk) |
| 136 | tcp_abort(smc->clcsock->sk, ECONNABORTED); |
| 137 | } |
| 138 | switch (sk->sk_state) { |
| 139 | case SMC_ACTIVE: |
| 140 | case SMC_APPCLOSEWAIT1: |
| 141 | case SMC_APPCLOSEWAIT2: |
| 142 | sk->sk_state = SMC_PEERABORTWAIT; |
| 143 | smc_close_cancel_work(smc); |
| 144 | if (sk->sk_state != SMC_PEERABORTWAIT) |
| 145 | break; |
| 146 | sk->sk_state = SMC_CLOSED; |
| 147 | sock_put(sk); /* (postponed) passive closing */ |
| 148 | break; |
| 149 | case SMC_PEERCLOSEWAIT1: |
| 150 | case SMC_PEERCLOSEWAIT2: |
| 151 | case SMC_PEERFINCLOSEWAIT: |
| 152 | sk->sk_state = SMC_PEERABORTWAIT; |
| 153 | smc_close_cancel_work(smc); |
| 154 | if (sk->sk_state != SMC_PEERABORTWAIT) |
| 155 | break; |
| 156 | sk->sk_state = SMC_CLOSED; |
| 157 | smc_conn_free(&smc->conn); |
| 158 | release_clcsock = true; |
| 159 | sock_put(sk); /* passive closing */ |
| 160 | break; |
| 161 | case SMC_PROCESSABORT: |
| 162 | case SMC_APPFINCLOSEWAIT: |
| 163 | sk->sk_state = SMC_PEERABORTWAIT; |
| 164 | smc_close_cancel_work(smc); |
| 165 | if (sk->sk_state != SMC_PEERABORTWAIT) |
| 166 | break; |
| 167 | sk->sk_state = SMC_CLOSED; |
| 168 | smc_conn_free(&smc->conn); |
| 169 | release_clcsock = true; |
| 170 | break; |
| 171 | case SMC_INIT: |
| 172 | case SMC_PEERABORTWAIT: |
| 173 | case SMC_CLOSED: |
| 174 | break; |
| 175 | } |
| 176 | |
| 177 | smc_sock_set_flag(sk, SOCK_DEAD); |
| 178 | sk->sk_state_change(sk); |
| 179 | |
| 180 | if (release_clcsock) { |
| 181 | release_sock(sk); |
| 182 | smc_clcsock_release(smc); |
| 183 | lock_sock(sk); |
| 184 | } |
| 185 | } |
| 186 | |
| 187 | static inline bool smc_close_sent_any_close(struct smc_connection *conn) |
| 188 | { |
| 189 | return conn->local_tx_ctrl.conn_state_flags.peer_conn_abort || |
| 190 | conn->local_tx_ctrl.conn_state_flags.peer_conn_closed; |
| 191 | } |
| 192 | |
| 193 | int smc_close_active(struct smc_sock *smc) |
| 194 | { |
| 195 | struct smc_cdc_conn_state_flags *txflags = |
| 196 | &smc->conn.local_tx_ctrl.conn_state_flags; |
| 197 | struct smc_connection *conn = &smc->conn; |
| 198 | struct sock *sk = &smc->sk; |
| 199 | int old_state; |
| 200 | long timeout; |
| 201 | int rc = 0; |
| 202 | int rc1 = 0; |
| 203 | |
| 204 | timeout = current->flags & PF_EXITING ? |
| 205 | 0 : sock_flag(sk, SOCK_LINGER) ? |
| 206 | sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT; |
| 207 | |
| 208 | old_state = sk->sk_state; |
| 209 | again: |
| 210 | switch (sk->sk_state) { |
| 211 | case SMC_INIT: |
| 212 | sk->sk_state = SMC_CLOSED; |
| 213 | break; |
| 214 | case SMC_LISTEN: |
| 215 | sk->sk_state = SMC_CLOSED; |
| 216 | sk->sk_state_change(sk); /* wake up accept */ |
| 217 | if (smc->clcsock && smc->clcsock->sk) { |
| 218 | write_lock_bh(&smc->clcsock->sk->sk_callback_lock); |
| 219 | smc_clcsock_restore_cb(&smc->clcsock->sk->sk_data_ready, |
| 220 | &smc->clcsk_data_ready); |
| 221 | smc->clcsock->sk->sk_user_data = NULL; |
| 222 | write_unlock_bh(&smc->clcsock->sk->sk_callback_lock); |
| 223 | rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); |
| 224 | } |
| 225 | smc_close_cleanup_listen(sk); |
| 226 | release_sock(sk); |
| 227 | flush_work(&smc->tcp_listen_work); |
| 228 | lock_sock(sk); |
| 229 | break; |
| 230 | case SMC_ACTIVE: |
| 231 | smc_close_stream_wait(smc, timeout); |
| 232 | release_sock(sk); |
| 233 | cancel_delayed_work_sync(&conn->tx_work); |
| 234 | lock_sock(sk); |
| 235 | if (sk->sk_state == SMC_ACTIVE) { |
| 236 | /* send close request */ |
| 237 | rc = smc_close_final(conn); |
| 238 | sk->sk_state = SMC_PEERCLOSEWAIT1; |
| 239 | |
| 240 | /* actively shutdown clcsock before peer close it, |
| 241 | * prevent peer from entering TIME_WAIT state. |
| 242 | */ |
| 243 | if (smc->clcsock && smc->clcsock->sk) { |
| 244 | rc1 = kernel_sock_shutdown(smc->clcsock, |
| 245 | SHUT_RDWR); |
| 246 | rc = rc ? rc : rc1; |
| 247 | } |
| 248 | } else { |
| 249 | /* peer event has changed the state */ |
| 250 | goto again; |
| 251 | } |
| 252 | break; |
| 253 | case SMC_APPFINCLOSEWAIT: |
| 254 | /* socket already shutdown wr or both (active close) */ |
| 255 | if (txflags->peer_done_writing && |
| 256 | !smc_close_sent_any_close(conn)) { |
| 257 | /* just shutdown wr done, send close request */ |
| 258 | rc = smc_close_final(conn); |
| 259 | } |
| 260 | sk->sk_state = SMC_CLOSED; |
| 261 | break; |
| 262 | case SMC_APPCLOSEWAIT1: |
| 263 | case SMC_APPCLOSEWAIT2: |
| 264 | if (!smc_cdc_rxed_any_close(conn)) |
| 265 | smc_close_stream_wait(smc, timeout); |
| 266 | release_sock(sk); |
| 267 | cancel_delayed_work_sync(&conn->tx_work); |
| 268 | lock_sock(sk); |
| 269 | if (sk->sk_state != SMC_APPCLOSEWAIT1 && |
| 270 | sk->sk_state != SMC_APPCLOSEWAIT2) |
| 271 | goto again; |
| 272 | /* confirm close from peer */ |
| 273 | rc = smc_close_final(conn); |
| 274 | if (smc_cdc_rxed_any_close(conn)) { |
| 275 | /* peer has closed the socket already */ |
| 276 | sk->sk_state = SMC_CLOSED; |
| 277 | sock_put(sk); /* postponed passive closing */ |
| 278 | } else { |
| 279 | /* peer has just issued a shutdown write */ |
| 280 | sk->sk_state = SMC_PEERFINCLOSEWAIT; |
| 281 | } |
| 282 | break; |
| 283 | case SMC_PEERCLOSEWAIT1: |
| 284 | case SMC_PEERCLOSEWAIT2: |
| 285 | if (txflags->peer_done_writing && |
| 286 | !smc_close_sent_any_close(conn)) { |
| 287 | /* just shutdown wr done, send close request */ |
| 288 | rc = smc_close_final(conn); |
| 289 | } |
| 290 | /* peer sending PeerConnectionClosed will cause transition */ |
| 291 | break; |
| 292 | case SMC_PEERFINCLOSEWAIT: |
| 293 | /* peer sending PeerConnectionClosed will cause transition */ |
| 294 | break; |
| 295 | case SMC_PROCESSABORT: |
| 296 | rc = smc_close_abort(conn); |
| 297 | sk->sk_state = SMC_CLOSED; |
| 298 | break; |
| 299 | case SMC_PEERABORTWAIT: |
| 300 | sk->sk_state = SMC_CLOSED; |
| 301 | break; |
| 302 | case SMC_CLOSED: |
| 303 | /* nothing to do, add tracing in future patch */ |
| 304 | break; |
| 305 | } |
| 306 | |
| 307 | if (old_state != sk->sk_state) |
| 308 | sk->sk_state_change(sk); |
| 309 | return rc; |
| 310 | } |
| 311 | |
| 312 | static void smc_close_passive_abort_received(struct smc_sock *smc) |
| 313 | { |
| 314 | struct smc_cdc_conn_state_flags *txflags = |
| 315 | &smc->conn.local_tx_ctrl.conn_state_flags; |
| 316 | struct sock *sk = &smc->sk; |
| 317 | |
| 318 | switch (sk->sk_state) { |
| 319 | case SMC_INIT: |
| 320 | case SMC_ACTIVE: |
| 321 | case SMC_APPCLOSEWAIT1: |
| 322 | sk->sk_state = SMC_PROCESSABORT; |
| 323 | sock_put(sk); /* passive closing */ |
| 324 | break; |
| 325 | case SMC_APPFINCLOSEWAIT: |
| 326 | sk->sk_state = SMC_PROCESSABORT; |
| 327 | break; |
| 328 | case SMC_PEERCLOSEWAIT1: |
| 329 | case SMC_PEERCLOSEWAIT2: |
| 330 | if (txflags->peer_done_writing && |
| 331 | !smc_close_sent_any_close(&smc->conn)) |
| 332 | /* just shutdown, but not yet closed locally */ |
| 333 | sk->sk_state = SMC_PROCESSABORT; |
| 334 | else |
| 335 | sk->sk_state = SMC_CLOSED; |
| 336 | sock_put(sk); /* passive closing */ |
| 337 | break; |
| 338 | case SMC_APPCLOSEWAIT2: |
| 339 | case SMC_PEERFINCLOSEWAIT: |
| 340 | sk->sk_state = SMC_CLOSED; |
| 341 | sock_put(sk); /* passive closing */ |
| 342 | break; |
| 343 | case SMC_PEERABORTWAIT: |
| 344 | sk->sk_state = SMC_CLOSED; |
| 345 | break; |
| 346 | case SMC_PROCESSABORT: |
| 347 | /* nothing to do, add tracing in future patch */ |
| 348 | break; |
| 349 | } |
| 350 | } |
| 351 | |
| 352 | /* Either some kind of closing has been received: peer_conn_closed, |
| 353 | * peer_conn_abort, or peer_done_writing |
| 354 | * or the link group of the connection terminates abnormally. |
| 355 | */ |
| 356 | static void smc_close_passive_work(struct work_struct *work) |
| 357 | { |
| 358 | struct smc_connection *conn = container_of(work, |
| 359 | struct smc_connection, |
| 360 | close_work); |
| 361 | struct smc_sock *smc = container_of(conn, struct smc_sock, conn); |
| 362 | struct smc_cdc_conn_state_flags *rxflags; |
| 363 | bool release_clcsock = false; |
| 364 | struct sock *sk = &smc->sk; |
| 365 | int old_state; |
| 366 | |
| 367 | lock_sock(sk); |
| 368 | old_state = sk->sk_state; |
| 369 | |
| 370 | rxflags = &conn->local_rx_ctrl.conn_state_flags; |
| 371 | if (rxflags->peer_conn_abort) { |
| 372 | /* peer has not received all data */ |
| 373 | smc_close_passive_abort_received(smc); |
| 374 | release_sock(sk); |
| 375 | cancel_delayed_work_sync(&conn->tx_work); |
| 376 | lock_sock(sk); |
| 377 | goto wakeup; |
| 378 | } |
| 379 | |
| 380 | switch (sk->sk_state) { |
| 381 | case SMC_INIT: |
| 382 | sk->sk_state = SMC_APPCLOSEWAIT1; |
| 383 | break; |
| 384 | case SMC_ACTIVE: |
| 385 | sk->sk_state = SMC_APPCLOSEWAIT1; |
| 386 | /* postpone sock_put() for passive closing to cover |
| 387 | * received SEND_SHUTDOWN as well |
| 388 | */ |
| 389 | break; |
| 390 | case SMC_PEERCLOSEWAIT1: |
| 391 | if (rxflags->peer_done_writing) |
| 392 | sk->sk_state = SMC_PEERCLOSEWAIT2; |
| 393 | fallthrough; |
| 394 | /* to check for closing */ |
| 395 | case SMC_PEERCLOSEWAIT2: |
| 396 | if (!smc_cdc_rxed_any_close(conn)) |
| 397 | break; |
| 398 | if (sock_flag(sk, SOCK_DEAD) && |
| 399 | smc_close_sent_any_close(conn)) { |
| 400 | /* smc_release has already been called locally */ |
| 401 | sk->sk_state = SMC_CLOSED; |
| 402 | } else { |
| 403 | /* just shutdown, but not yet closed locally */ |
| 404 | sk->sk_state = SMC_APPFINCLOSEWAIT; |
| 405 | } |
| 406 | sock_put(sk); /* passive closing */ |
| 407 | break; |
| 408 | case SMC_PEERFINCLOSEWAIT: |
| 409 | if (smc_cdc_rxed_any_close(conn)) { |
| 410 | sk->sk_state = SMC_CLOSED; |
| 411 | sock_put(sk); /* passive closing */ |
| 412 | } |
| 413 | break; |
| 414 | case SMC_APPCLOSEWAIT1: |
| 415 | case SMC_APPCLOSEWAIT2: |
| 416 | /* postpone sock_put() for passive closing to cover |
| 417 | * received SEND_SHUTDOWN as well |
| 418 | */ |
| 419 | break; |
| 420 | case SMC_APPFINCLOSEWAIT: |
| 421 | case SMC_PEERABORTWAIT: |
| 422 | case SMC_PROCESSABORT: |
| 423 | case SMC_CLOSED: |
| 424 | /* nothing to do, add tracing in future patch */ |
| 425 | break; |
| 426 | } |
| 427 | |
| 428 | wakeup: |
| 429 | sk->sk_data_ready(sk); /* wakeup blocked rcvbuf consumers */ |
| 430 | sk->sk_write_space(sk); /* wakeup blocked sndbuf producers */ |
| 431 | |
| 432 | if (old_state != sk->sk_state) { |
| 433 | sk->sk_state_change(sk); |
| 434 | if ((sk->sk_state == SMC_CLOSED) && |
| 435 | (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) { |
| 436 | smc_conn_free(conn); |
| 437 | if (smc->clcsock) |
| 438 | release_clcsock = true; |
| 439 | } |
| 440 | } |
| 441 | release_sock(sk); |
| 442 | if (release_clcsock) |
| 443 | smc_clcsock_release(smc); |
| 444 | sock_put(sk); /* sock_hold done by schedulers of close_work */ |
| 445 | } |
| 446 | |
| 447 | int smc_close_shutdown_write(struct smc_sock *smc) |
| 448 | { |
| 449 | struct smc_connection *conn = &smc->conn; |
| 450 | struct sock *sk = &smc->sk; |
| 451 | int old_state; |
| 452 | long timeout; |
| 453 | int rc = 0; |
| 454 | |
| 455 | timeout = current->flags & PF_EXITING ? |
| 456 | 0 : sock_flag(sk, SOCK_LINGER) ? |
| 457 | sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT; |
| 458 | |
| 459 | old_state = sk->sk_state; |
| 460 | again: |
| 461 | switch (sk->sk_state) { |
| 462 | case SMC_ACTIVE: |
| 463 | smc_close_stream_wait(smc, timeout); |
| 464 | release_sock(sk); |
| 465 | cancel_delayed_work_sync(&conn->tx_work); |
| 466 | lock_sock(sk); |
| 467 | if (sk->sk_state != SMC_ACTIVE) |
| 468 | goto again; |
| 469 | /* send close wr request */ |
| 470 | rc = smc_close_wr(conn); |
| 471 | sk->sk_state = SMC_PEERCLOSEWAIT1; |
| 472 | break; |
| 473 | case SMC_APPCLOSEWAIT1: |
| 474 | /* passive close */ |
| 475 | if (!smc_cdc_rxed_any_close(conn)) |
| 476 | smc_close_stream_wait(smc, timeout); |
| 477 | release_sock(sk); |
| 478 | cancel_delayed_work_sync(&conn->tx_work); |
| 479 | lock_sock(sk); |
| 480 | if (sk->sk_state != SMC_APPCLOSEWAIT1) |
| 481 | goto again; |
| 482 | /* confirm close from peer */ |
| 483 | rc = smc_close_wr(conn); |
| 484 | sk->sk_state = SMC_APPCLOSEWAIT2; |
| 485 | break; |
| 486 | case SMC_APPCLOSEWAIT2: |
| 487 | case SMC_PEERFINCLOSEWAIT: |
| 488 | case SMC_PEERCLOSEWAIT1: |
| 489 | case SMC_PEERCLOSEWAIT2: |
| 490 | case SMC_APPFINCLOSEWAIT: |
| 491 | case SMC_PROCESSABORT: |
| 492 | case SMC_PEERABORTWAIT: |
| 493 | /* nothing to do, add tracing in future patch */ |
| 494 | break; |
| 495 | } |
| 496 | |
| 497 | if (old_state != sk->sk_state) |
| 498 | sk->sk_state_change(sk); |
| 499 | return rc; |
| 500 | } |
| 501 | |
| 502 | /* Initialize close properties on connection establishment. */ |
| 503 | void smc_close_init(struct smc_sock *smc) |
| 504 | { |
| 505 | INIT_WORK(&smc->conn.close_work, smc_close_passive_work); |
| 506 | } |