net/smc: don't send CDC/LLC message if link not ready
authorDust Li <dust.li@linux.alibaba.com>
Tue, 28 Dec 2021 09:03:24 +0000 (17:03 +0800)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 5 Jan 2022 11:40:31 +0000 (12:40 +0100)
[ Upstream commit 90cee52f2e780345d3629e278291aea5ac74f40f ]

We found smc_llc_send_link_delete_all() sometimes wait
for 2s timeout when testing with RDMA link up/down.
It is possible when a smc_link is in ACTIVATING state,
the underlaying QP is still in RESET or RTR state, which
cannot send any messages out.

smc_llc_send_link_delete_all() use smc_link_usable() to
checks whether the link is usable, if the QP is still in
RESET or RTR state, but the smc_link is in ACTIVATING, this
LLC message will always fail without any CQE entering the
CQ, and we will always wait 2s before timeout.

Since we cannot send any messages through the QP before
the QP enter RTS. I add a wrapper smc_link_sendable()
which checks the state of QP along with the link state.
And replace smc_link_usable() with smc_link_sendable()
in all LLC & CDC message sending routine.

Fixes: 5f08318f617b ("smc: connection data control (CDC)")
Signed-off-by: Dust Li <dust.li@linux.alibaba.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <sashal@kernel.org>
net/smc/smc_core.c
net/smc/smc_core.h
net/smc/smc_llc.c
net/smc/smc_wr.c
net/smc/smc_wr.h

index 135949ef85b3cc00d4cb0cdea3595d77589fdc4e..fb4327a81a0f0af7af06521d609dae3b53dece8a 100644 (file)
@@ -226,7 +226,7 @@ static void smcr_lgr_link_deactivate_all(struct smc_link_group *lgr)
        for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
                struct smc_link *lnk = &lgr->lnk[i];
 
-               if (smc_link_usable(lnk))
+               if (smc_link_sendable(lnk))
                        lnk->state = SMC_LNK_INACTIVE;
        }
        wake_up_all(&lgr->llc_msg_waiter);
index 4745a9a5a28f5d921d816bdfee0b74f33575a2d7..9364d0f35ccecf7352ce59a9ae26ef2b7d7df686 100644 (file)
@@ -359,6 +359,12 @@ static inline bool smc_link_usable(struct smc_link *lnk)
        return true;
 }
 
+static inline bool smc_link_sendable(struct smc_link *lnk)
+{
+       return smc_link_usable(lnk) &&
+               lnk->qp_attr.cur_qp_state == IB_QPS_RTS;
+}
+
 static inline bool smc_link_active(struct smc_link *lnk)
 {
        return lnk->state == SMC_LNK_ACTIVE;
index f1d323439a2af3658e7997cc0e8e62319f719100..ee1f0fdba08558690b5f20e8756e9ab83f2138d9 100644 (file)
@@ -1358,7 +1358,7 @@ void smc_llc_send_link_delete_all(struct smc_link_group *lgr, bool ord, u32 rsn)
        delllc.reason = htonl(rsn);
 
        for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
-               if (!smc_link_usable(&lgr->lnk[i]))
+               if (!smc_link_sendable(&lgr->lnk[i]))
                        continue;
                if (!smc_llc_send_message_wait(&lgr->lnk[i], &delllc))
                        break;
index a71c9631f1ad3c0305528d25459730b8bf533a92..cae22d240e0a624641daf187e2dc1f3111a9ad0d 100644 (file)
@@ -169,7 +169,7 @@ void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context)
 static inline int smc_wr_tx_get_free_slot_index(struct smc_link *link, u32 *idx)
 {
        *idx = link->wr_tx_cnt;
-       if (!smc_link_usable(link))
+       if (!smc_link_sendable(link))
                return -ENOLINK;
        for_each_clear_bit(*idx, link->wr_tx_mask, link->wr_tx_cnt) {
                if (!test_and_set_bit(*idx, link->wr_tx_mask))
@@ -212,7 +212,7 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,
        } else {
                rc = wait_event_interruptible_timeout(
                        link->wr_tx_wait,
-                       !smc_link_usable(link) ||
+                       !smc_link_sendable(link) ||
                        lgr->terminating ||
                        (smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY),
                        SMC_WR_TX_WAIT_FREE_SLOT_TIME);
index 2bc626f230a56dca33fb5617972443c8aecf98ee..102d515757ee2633f9d298a08f5de4268cccd533 100644 (file)
@@ -62,7 +62,7 @@ static inline void smc_wr_tx_set_wr_id(atomic_long_t *wr_tx_id, long val)
 
 static inline bool smc_wr_tx_link_hold(struct smc_link *link)
 {
-       if (!smc_link_usable(link))
+       if (!smc_link_sendable(link))
                return false;
        atomic_inc(&link->wr_tx_refcnt);
        return true;