net/smc: check for healthy link group resp. connections
authorUrsula Braun <ubraun@linux.vnet.ibm.com>
Thu, 25 Jan 2018 10:15:36 +0000 (11:15 +0100)
committerDavid S. Miller <davem@davemloft.net>
Thu, 25 Jan 2018 21:10:42 +0000 (16:10 -0500)
If a problem for at least one connection of a link group is detected,
the whole link group and all its connections are terminated.
This patch adds a check for healthy link group when trying to reserve
a work request, and checks for healthy connections before starting
a tx worker.

Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
net/smc/smc_cdc.c
net/smc/smc_diag.c
net/smc/smc_tx.c
net/smc/smc_wr.c

index 51805334e0013ddfb0062b1e5d5cedc333e2a332..6e8f5fbe0f0915c97501bbd3e92310f2c2e9c09a 100644 (file)
@@ -65,9 +65,14 @@ int smc_cdc_get_free_slot(struct smc_connection *conn,
                          struct smc_cdc_tx_pend **pend)
 {
        struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
+       int rc;
 
-       return smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf,
-                                      (struct smc_wr_tx_pend_priv **)pend);
+       rc = smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf,
+                                    (struct smc_wr_tx_pend_priv **)pend);
+       if (!conn->alert_token_local)
+               /* abnormal termination */
+               rc = -EPIPE;
+       return rc;
 }
 
 static inline void smc_cdc_add_pending_send(struct smc_connection *conn,
index d2d01cf7022445d3d55f7d791b07f36655726d0d..427b91c1c964f05e01c1cf221a397dc8d6d46628 100644 (file)
@@ -86,7 +86,8 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
        if (smc_diag_msg_attrs_fill(sk, skb, r, user_ns))
                goto errout;
 
-       if ((req->diag_ext & (1 << (SMC_DIAG_CONNINFO - 1))) && smc->conn.lgr) {
+       if ((req->diag_ext & (1 << (SMC_DIAG_CONNINFO - 1))) &&
+           smc->conn.alert_token_local) {
                struct smc_connection *conn = &smc->conn;
                struct smc_diag_conninfo cinfo = {
                        .token = conn->alert_token_local,
@@ -124,7 +125,8 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
                        goto errout;
        }
 
-       if ((req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && smc->conn.lgr) {
+       if ((req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && smc->conn.lgr &&
+           !list_empty(&smc->conn.lgr->list)) {
                struct smc_diag_lgrinfo linfo = {
                        .role = smc->conn.lgr->role,
                        .lnk[0].ibport = smc->conn.lgr->lnk[0].ibport,
index 71b7d9f079f0a1805fa12ee4bb2477742cbe8b2c..838bce20c3610a16d2d5f6d1533f1f6300e7438b 100644 (file)
@@ -408,8 +408,9 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
                                goto out_unlock;
                        }
                        rc = 0;
-                       schedule_delayed_work(&conn->tx_work,
-                                             SMC_TX_WORK_DELAY);
+                       if (conn->alert_token_local) /* connection healthy */
+                               schedule_delayed_work(&conn->tx_work,
+                                                     SMC_TX_WORK_DELAY);
                }
                goto out_unlock;
        }
@@ -440,10 +441,17 @@ static void smc_tx_work(struct work_struct *work)
        int rc;
 
        lock_sock(&smc->sk);
+       if (smc->sk.sk_err ||
+           !conn->alert_token_local ||
+           conn->local_rx_ctrl.conn_state_flags.peer_conn_abort)
+               goto out;
+
        rc = smc_tx_sndbuf_nonempty(conn);
        if (!rc && conn->local_rx_ctrl.prod_flags.write_blocked &&
            !atomic_read(&conn->bytes_to_rcv))
                conn->local_rx_ctrl.prod_flags.write_blocked = 0;
+
+out:
        release_sock(&smc->sk);
 }
 
@@ -464,7 +472,8 @@ void smc_tx_consumer_update(struct smc_connection *conn)
            ((to_confirm > conn->rmbe_update_limit) &&
             ((to_confirm > (conn->rmbe_size / 2)) ||
              conn->local_rx_ctrl.prod_flags.write_blocked))) {
-               if (smc_cdc_get_slot_and_msg_send(conn) < 0) {
+               if ((smc_cdc_get_slot_and_msg_send(conn) < 0) &&
+                   conn->alert_token_local) { /* connection healthy */
                        schedule_delayed_work(&conn->tx_work,
                                              SMC_TX_WORK_DELAY);
                        return;
index 621c65850a18f93e1c33b37675f34bfe1ddf6161..1b8af23e6e2b96fc12fd1985eab5c8f16d407003 100644 (file)
@@ -174,9 +174,9 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,
                            struct smc_wr_tx_pend_priv **wr_pend_priv)
 {
        struct smc_wr_tx_pend *wr_pend;
+       u32 idx = link->wr_tx_cnt;
        struct ib_send_wr *wr_ib;
        u64 wr_id;
-       u32 idx;
        int rc;
 
        *wr_buf = NULL;
@@ -186,16 +186,17 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,
                if (rc)
                        return rc;
        } else {
+               struct smc_link_group *lgr;
+
+               lgr = container_of(link, struct smc_link_group,
+                                  lnk[SMC_SINGLE_LINK]);
                rc = wait_event_timeout(
                        link->wr_tx_wait,
+                       list_empty(&lgr->list) || /* lgr terminated */
                        (smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY),
                        SMC_WR_TX_WAIT_FREE_SLOT_TIME);
                if (!rc) {
                        /* timeout - terminate connections */
-                       struct smc_link_group *lgr;
-
-                       lgr = container_of(link, struct smc_link_group,
-                                          lnk[SMC_SINGLE_LINK]);
                        smc_lgr_terminate(lgr);
                        return -EPIPE;
                }