net/smc: log important pnetid and state change events
authorKarsten Graul <kgraul@linux.ibm.com>
Tue, 5 May 2020 13:01:20 +0000 (15:01 +0200)
committerDavid S. Miller <davem@davemloft.net>
Tue, 5 May 2020 19:56:52 +0000 (12:56 -0700)
Print to system log when SMC links are available or go down, link group
state changes or pnetids are applied to and removed from devices.
The log entries are triggered by either user configuration actions or
adapter activation/deactivation events and are not expected to happen
often. The entries help SMC users to keep track of the SMC link group
status and to detect when actions are needed (like to add replacements
for failed adapters).

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
net/smc/af_smc.c
net/smc/smc_core.c
net/smc/smc_core.h
net/smc/smc_ib.c
net/smc/smc_ism.c
net/smc/smc_llc.c
net/smc/smc_llc.h
net/smc/smc_pnet.c

index 4e4421c95ca17007b6b1a9084f80468b32b38795..9033215438384b8f4a8b9872184c082d58c5afeb 100644 (file)
@@ -378,8 +378,6 @@ static int smcr_clnt_conf_first_link(struct smc_sock *smc)
        struct smc_llc_qentry *qentry;
        int rc;
 
-       link->lgr->type = SMC_LGR_SINGLE;
-
        /* receive CONFIRM LINK request from server over RoCE fabric */
        qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME,
                              SMC_LLC_CONFIRM_LINK);
@@ -414,6 +412,7 @@ static int smcr_clnt_conf_first_link(struct smc_sock *smc)
                return SMC_CLC_DECL_TIMEOUT_CL;
 
        smc_llc_link_active(link);
+       smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE);
 
        /* optional 2nd link, receive ADD LINK request from server */
        qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME,
@@ -1037,8 +1036,6 @@ static int smcr_serv_conf_first_link(struct smc_sock *smc)
        struct smc_llc_qentry *qentry;
        int rc;
 
-       link->lgr->type = SMC_LGR_SINGLE;
-
        if (smcr_link_reg_rmb(link, smc->conn.rmb_desc))
                return SMC_CLC_DECL_ERR_REGRMB;
 
@@ -1067,6 +1064,7 @@ static int smcr_serv_conf_first_link(struct smc_sock *smc)
        smc->conn.rmb_desc->is_conf_rkey = true;
 
        smc_llc_link_active(link);
+       smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE);
 
        /* initial contact - try to establish second link */
        smc_llc_srv_add_link(link);
index fb5f685ff494649243c2428876c3951c3436d2f7..65de700e1f176c960530062b53501a07a5d59822 100644 (file)
@@ -369,7 +369,7 @@ dealloc_pd:
 free_link_mem:
        smc_wr_free_link_mem(lnk);
 clear_llc_lnk:
-       smc_llc_link_clear(lnk);
+       smc_llc_link_clear(lnk, false);
 out:
        put_device(&ini->ib_dev->ibdev->dev);
        memset(lnk, 0, sizeof(struct smc_link));
@@ -718,14 +718,14 @@ static void smcr_rtoken_clear_link(struct smc_link *lnk)
 }
 
 /* must be called under lgr->llc_conf_mutex lock */
-void smcr_link_clear(struct smc_link *lnk)
+void smcr_link_clear(struct smc_link *lnk, bool log)
 {
        struct smc_ib_device *smcibdev;
 
        if (!lnk->lgr || lnk->state == SMC_LNK_UNUSED)
                return;
        lnk->peer_qpn = 0;
-       smc_llc_link_clear(lnk);
+       smc_llc_link_clear(lnk, log);
        smcr_buf_unmap_lgr(lnk);
        smcr_rtoken_clear_link(lnk);
        smc_ib_modify_qp_reset(lnk);
@@ -812,7 +812,7 @@ static void smc_lgr_free(struct smc_link_group *lgr)
                mutex_lock(&lgr->llc_conf_mutex);
                for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
                        if (lgr->lnk[i].state != SMC_LNK_UNUSED)
-                               smcr_link_clear(&lgr->lnk[i]);
+                               smcr_link_clear(&lgr->lnk[i], false);
                }
                mutex_unlock(&lgr->llc_conf_mutex);
                smc_llc_lgr_clear(lgr);
@@ -1040,12 +1040,36 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
 /* set new lgr type and clear all asymmetric link tagging */
 void smcr_lgr_set_type(struct smc_link_group *lgr, enum smc_lgr_type new_type)
 {
+       char *lgr_type = "";
        int i;
 
        for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
                if (smc_link_usable(&lgr->lnk[i]))
                        lgr->lnk[i].link_is_asym = false;
+       if (lgr->type == new_type)
+               return;
        lgr->type = new_type;
+
+       switch (lgr->type) {
+       case SMC_LGR_NONE:
+               lgr_type = "NONE";
+               break;
+       case SMC_LGR_SINGLE:
+               lgr_type = "SINGLE";
+               break;
+       case SMC_LGR_SYMMETRIC:
+               lgr_type = "SYMMETRIC";
+               break;
+       case SMC_LGR_ASYMMETRIC_PEER:
+               lgr_type = "ASYMMETRIC_PEER";
+               break;
+       case SMC_LGR_ASYMMETRIC_LOCAL:
+               lgr_type = "ASYMMETRIC_LOCAL";
+               break;
+       }
+       pr_warn_ratelimited("smc: SMC-R lg %*phN state changed: "
+                           "%s, pnetid %.16s\n", SMC_LGR_ID_SIZE, &lgr->id,
+                           lgr_type, lgr->pnet_id);
 }
 
 /* set new lgr type and tag a link as asymmetric */
@@ -1146,7 +1170,7 @@ static void smcr_link_down(struct smc_link *lnk)
        smc_ib_modify_qp_reset(lnk);
        to_lnk = smc_switch_conns(lgr, lnk, true);
        if (!to_lnk) { /* no backup link available */
-               smcr_link_clear(lnk);
+               smcr_link_clear(lnk, true);
                return;
        }
        smcr_lgr_set_type(lgr, SMC_LGR_SINGLE);
index 4ae76802214ff5c26f8608942d8094c441053a57..86d160f0d187b450426f0623d812acd1be37cab8 100644 (file)
@@ -383,7 +383,7 @@ void smc_core_exit(void);
 
 int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
                   u8 link_idx, struct smc_init_info *ini);
-void smcr_link_clear(struct smc_link *lnk);
+void smcr_link_clear(struct smc_link *lnk, bool log);
 int smcr_buf_map_lgr(struct smc_link *lnk);
 int smcr_buf_reg_lgr(struct smc_link *lnk);
 void smcr_lgr_set_type(struct smc_link_group *lgr, enum smc_lgr_type new_type);
index 2c743caad69a12a590be327ebc846a5cf557a5db..f0a5064bf9bd756d21c0bfaecdfafd63a4fa02b2 100644 (file)
@@ -575,6 +575,8 @@ static void smc_ib_add_dev(struct ib_device *ibdev)
 
        /* trigger reading of the port attributes */
        port_cnt = smcibdev->ibdev->phys_port_cnt;
+       pr_warn_ratelimited("smc: adding ib device %s with port count %d\n",
+                           smcibdev->ibdev->name, port_cnt);
        for (i = 0;
             i < min_t(size_t, port_cnt, SMC_MAX_PORTS);
             i++) {
@@ -583,6 +585,13 @@ static void smc_ib_add_dev(struct ib_device *ibdev)
                if (smc_pnetid_by_dev_port(ibdev->dev.parent, i,
                                           smcibdev->pnetid[i]))
                        smc_pnetid_by_table_ib(smcibdev, i + 1);
+               pr_warn_ratelimited("smc:    ib device %s port %d has pnetid "
+                                   "%.16s%s\n",
+                                   smcibdev->ibdev->name, i + 1,
+                                   smcibdev->pnetid[i],
+                                   smcibdev->pnetid_by_user[i] ?
+                                    " (user defined)" :
+                                    "");
        }
        schedule_work(&smcibdev->port_event_work);
 }
@@ -599,6 +608,8 @@ static void smc_ib_remove_dev(struct ib_device *ibdev, void *client_data)
        spin_lock(&smc_ib_devices.lock);
        list_del_init(&smcibdev->list); /* remove from smc_ib_devices */
        spin_unlock(&smc_ib_devices.lock);
+       pr_warn_ratelimited("smc: removing ib device %s\n",
+                           smcibdev->ibdev->name);
        smc_smcr_terminate_all(smcibdev);
        smc_ib_cleanup_per_ibdev(smcibdev);
        ib_unregister_event_handler(&smcibdev->event_handler);
index 32be2da2cb8524cf9ba82582c4efe087a88dbc56..91f85fc09fb8dacf2d87b118efedba9f6f558e0e 100644 (file)
@@ -321,12 +321,18 @@ int smcd_register_dev(struct smcd_dev *smcd)
        list_add_tail(&smcd->list, &smcd_dev_list.list);
        spin_unlock(&smcd_dev_list.lock);
 
+       pr_warn_ratelimited("smc: adding smcd device %s with pnetid %.16s%s\n",
+                           dev_name(&smcd->dev), smcd->pnetid,
+                           smcd->pnetid_by_user ? " (user defined)" : "");
+
        return device_add(&smcd->dev);
 }
 EXPORT_SYMBOL_GPL(smcd_register_dev);
 
 void smcd_unregister_dev(struct smcd_dev *smcd)
 {
+       pr_warn_ratelimited("smc: removing smcd device %s\n",
+                           dev_name(&smcd->dev));
        spin_lock(&smcd_dev_list.lock);
        list_del_init(&smcd->list);
        spin_unlock(&smcd_dev_list.lock);
index 66ddc9cf5e2fd1db4e974c104e0dfa5aa5f45094..4cc583678ac7d8d261600e3072ca687f77dc82cf 100644 (file)
@@ -870,7 +870,7 @@ int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry)
        if (!rc)
                goto out;
 out_clear_lnk:
-       smcr_link_clear(lnk_new);
+       smcr_link_clear(lnk_new, false);
 out_reject:
        smc_llc_cli_add_link_reject(qentry);
 out:
@@ -977,7 +977,7 @@ static void smc_llc_delete_asym_link(struct smc_link_group *lgr)
        }
        smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
 out_free:
-       smcr_link_clear(lnk_asym);
+       smcr_link_clear(lnk_asym, true);
 }
 
 static int smc_llc_srv_rkey_exchange(struct smc_link *link,
@@ -1121,7 +1121,7 @@ int smc_llc_srv_add_link(struct smc_link *link)
                goto out_err;
        return 0;
 out_err:
-       smcr_link_clear(link_new);
+       smcr_link_clear(link_new, false);
        return rc;
 }
 
@@ -1227,7 +1227,7 @@ static void smc_llc_process_cli_delete_link(struct smc_link_group *lgr)
                smc_switch_conns(lgr, lnk_del, false);
                smc_wr_tx_wait_no_pending_sends(lnk_del);
        }
-       smcr_link_clear(lnk_del);
+       smcr_link_clear(lnk_del, true);
 
        active_links = smc_llc_active_link_count(lgr);
        if (lnk_del == lnk_asym) {
@@ -1320,7 +1320,7 @@ static void smc_llc_process_srv_delete_link(struct smc_link_group *lgr)
                        }
                }
        }
-       smcr_link_clear(lnk_del);
+       smcr_link_clear(lnk_del, true);
 
        active_links = smc_llc_active_link_count(lgr);
        if (active_links == 1) {
@@ -1711,6 +1711,12 @@ int smc_llc_link_init(struct smc_link *link)
 
 void smc_llc_link_active(struct smc_link *link)
 {
+       pr_warn_ratelimited("smc: SMC-R lg %*phN link added: id %*phN, "
+                           "peerid %*phN, ibdev %s, ibport %d\n",
+                           SMC_LGR_ID_SIZE, &link->lgr->id,
+                           SMC_LGR_ID_SIZE, &link->link_uid,
+                           SMC_LGR_ID_SIZE, &link->peer_link_uid,
+                           link->smcibdev->ibdev->name, link->ibport);
        link->state = SMC_LNK_ACTIVE;
        if (link->lgr->llc_testlink_time) {
                link->llc_testlink_time = link->lgr->llc_testlink_time * HZ;
@@ -1720,8 +1726,15 @@ void smc_llc_link_active(struct smc_link *link)
 }
 
 /* called in worker context */
-void smc_llc_link_clear(struct smc_link *link)
+void smc_llc_link_clear(struct smc_link *link, bool log)
 {
+       if (log)
+               pr_warn_ratelimited("smc: SMC-R lg %*phN link removed: id %*phN"
+                                   ", peerid %*phN, ibdev %s, ibport %d\n",
+                                   SMC_LGR_ID_SIZE, &link->lgr->id,
+                                   SMC_LGR_ID_SIZE, &link->link_uid,
+                                   SMC_LGR_ID_SIZE, &link->peer_link_uid,
+                                   link->smcibdev->ibdev->name, link->ibport);
        complete(&link->llc_testlink_resp);
        cancel_delayed_work_sync(&link->llc_testlink_wrk);
        smc_wr_wakeup_reg_wait(link);
index 55287376112d974e6b1817c15515300203c88c97..a5d2fe3eea61ff475294583e609a03e5f94716cb 100644 (file)
@@ -82,7 +82,7 @@ void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc);
 void smc_llc_lgr_clear(struct smc_link_group *lgr);
 int smc_llc_link_init(struct smc_link *link);
 void smc_llc_link_active(struct smc_link *link);
-void smc_llc_link_clear(struct smc_link *link);
+void smc_llc_link_clear(struct smc_link *link, bool log);
 int smc_llc_do_confirm_rkey(struct smc_link *send_link,
                            struct smc_buf_desc *rmb_desc);
 int smc_llc_do_delete_rkey(struct smc_link_group *lgr,
index 50c96e843fab5fa7e03d63f5d4eceac5368af4f9..be03f1260d595a694ac9fc783fd1598897248c5b 100644 (file)
@@ -110,8 +110,14 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name)
                if (!pnet_name ||
                    smc_pnet_match(pnetelem->pnet_name, pnet_name)) {
                        list_del(&pnetelem->list);
-                       if (pnetelem->type == SMC_PNET_ETH && pnetelem->ndev)
+                       if (pnetelem->type == SMC_PNET_ETH && pnetelem->ndev) {
                                dev_put(pnetelem->ndev);
+                               pr_warn_ratelimited("smc: net device %s "
+                                                   "erased user defined "
+                                                   "pnetid %.16s\n",
+                                                   pnetelem->eth_name,
+                                                   pnetelem->pnet_name);
+                       }
                        kfree(pnetelem);
                        rc = 0;
                }
@@ -130,6 +136,12 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name)
                            (!pnet_name ||
                             smc_pnet_match(pnet_name,
                                            ibdev->pnetid[ibport]))) {
+                               pr_warn_ratelimited("smc: ib device %s ibport "
+                                                   "%d erased user defined "
+                                                   "pnetid %.16s\n",
+                                                   ibdev->ibdev->name,
+                                                   ibport + 1,
+                                                   ibdev->pnetid[ibport]);
                                memset(ibdev->pnetid[ibport], 0,
                                       SMC_MAX_PNETID_LEN);
                                ibdev->pnetid_by_user[ibport] = false;
@@ -144,6 +156,10 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name)
                if (smcd_dev->pnetid_by_user &&
                    (!pnet_name ||
                     smc_pnet_match(pnet_name, smcd_dev->pnetid))) {
+                       pr_warn_ratelimited("smc: smcd device %s "
+                                           "erased user defined pnetid "
+                                           "%.16s\n", dev_name(&smcd_dev->dev),
+                                           smcd_dev->pnetid);
                        memset(smcd_dev->pnetid, 0, SMC_MAX_PNETID_LEN);
                        smcd_dev->pnetid_by_user = false;
                        rc = 0;
@@ -174,6 +190,10 @@ static int smc_pnet_add_by_ndev(struct net_device *ndev)
                        dev_hold(ndev);
                        pnetelem->ndev = ndev;
                        rc = 0;
+                       pr_warn_ratelimited("smc: adding net device %s with "
+                                           "user defined pnetid %.16s\n",
+                                           pnetelem->eth_name,
+                                           pnetelem->pnet_name);
                        break;
                }
        }
@@ -201,6 +221,10 @@ static int smc_pnet_remove_by_ndev(struct net_device *ndev)
                        dev_put(pnetelem->ndev);
                        pnetelem->ndev = NULL;
                        rc = 0;
+                       pr_warn_ratelimited("smc: removing net device %s with "
+                                           "user defined pnetid %.16s\n",
+                                           pnetelem->eth_name,
+                                           pnetelem->pnet_name);
                        break;
                }
        }
@@ -357,6 +381,10 @@ static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net,
                kfree(new_pe);
                goto out_put;
        }
+       if (ndev)
+               pr_warn_ratelimited("smc: net device %s "
+                                   "applied user defined pnetid %.16s\n",
+                                   new_pe->eth_name, new_pe->pnet_name);
        return 0;
 
 out_put:
@@ -377,11 +405,24 @@ static int smc_pnet_add_ib(struct smc_pnettable *pnettable, char *ib_name,
 
        /* try to apply the pnetid to active devices */
        ib_dev = smc_pnet_find_ib(ib_name);
-       if (ib_dev)
+       if (ib_dev) {
                ibdev_applied = smc_pnet_apply_ib(ib_dev, ib_port, pnet_name);
+               if (ibdev_applied)
+                       pr_warn_ratelimited("smc: ib device %s ibport %d "
+                                           "applied user defined pnetid "
+                                           "%.16s\n", ib_dev->ibdev->name,
+                                           ib_port,
+                                           ib_dev->pnetid[ib_port - 1]);
+       }
        smcd_dev = smc_pnet_find_smcd(ib_name);
-       if (smcd_dev)
+       if (smcd_dev) {
                smcddev_applied = smc_pnet_apply_smcd(smcd_dev, pnet_name);
+               if (smcddev_applied)
+                       pr_warn_ratelimited("smc: smcd device %s "
+                                           "applied user defined pnetid "
+                                           "%.16s\n", dev_name(&smcd_dev->dev),
+                                           smcd_dev->pnetid);
+       }
        /* Apply fails when a device has a hardware-defined pnetid set, do not
         * add a pnet table entry in that case.
         */