cifs: distribute channels across interfaces based on speed
authorShyam Prasad N <sprasad@microsoft.com>
Mon, 26 Dec 2022 11:24:56 +0000 (11:24 +0000)
committerSteve French <stfrench@microsoft.com>
Thu, 9 Nov 2023 16:25:17 +0000 (10:25 -0600)
Today, if the server interfaces RSS capable, we simply
choose the fastest interface to setup a channel. This is not
a scalable approach, and does not make a lot of attempt to
distribute the connections.

This change does a weighted distribution of channels across
all the available server interfaces, where the weight is
a function of the advertised interface speed.

Also make sure that we don't mix rdma and non-rdma for channels.

Signed-off-by: Shyam Prasad N <sprasad@microsoft.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
fs/smb/client/cifs_debug.c
fs/smb/client/cifsglob.h
fs/smb/client/sess.c

index 772d4226957f94b1af313fb4dd9d47ad2bac1d09..5596c9f30ccb280f78834ee7dee05c437c265ce4 100644 (file)
@@ -284,6 +284,8 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
        struct cifs_ses *ses;
        struct cifs_tcon *tcon;
        struct cifs_server_iface *iface;
+       size_t iface_weight = 0, iface_min_speed = 0;
+       struct cifs_server_iface *last_iface = NULL;
        int c, i, j;
 
        seq_puts(m,
@@ -549,11 +551,25 @@ skip_rdma:
                                           "\tLast updated: %lu seconds ago",
                                           ses->iface_count,
                                           (jiffies - ses->iface_last_update) / HZ);
+
+                       last_iface = list_last_entry(&ses->iface_list,
+                                                    struct cifs_server_iface,
+                                                    iface_head);
+                       iface_min_speed = last_iface->speed;
+
                        j = 0;
                        list_for_each_entry(iface, &ses->iface_list,
                                                 iface_head) {
                                seq_printf(m, "\n\t%d)", ++j);
                                cifs_dump_iface(m, iface);
+
+                               iface_weight = iface->speed / iface_min_speed;
+                               seq_printf(m, "\t\tWeight (cur,total): (%zu,%zu)"
+                                          "\n\t\tAllocated channels: %u\n",
+                                          iface->weight_fulfilled,
+                                          iface_weight,
+                                          iface->num_channels);
+
                                if (is_ses_using_iface(ses, iface))
                                        seq_puts(m, "\t\t[CONNECTED]\n");
                        }
index eae138040edf9bc548ff0cd69ad75ef801518520..30763c68cc39b24283b4f10b26bca644e052e553 100644 (file)
@@ -969,6 +969,8 @@ struct cifs_server_iface {
        struct list_head iface_head;
        struct kref refcount;
        size_t speed;
+       size_t weight_fulfilled;
+       unsigned int num_channels;
        unsigned int rdma_capable : 1;
        unsigned int rss_capable : 1;
        unsigned int is_active : 1; /* unset if non existent */
index d13a2461371051c9b9e6152a5bb232287dbd94c7..336b64d93e41cec9c45f0bb75545e31515dcfa03 100644 (file)
@@ -179,7 +179,9 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
        int left;
        int rc = 0;
        int tries = 0;
+       size_t iface_weight = 0, iface_min_speed = 0;
        struct cifs_server_iface *iface = NULL, *niface = NULL;
+       struct cifs_server_iface *last_iface = NULL;
 
        spin_lock(&ses->chan_lock);
 
@@ -207,21 +209,11 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
        }
        spin_unlock(&ses->chan_lock);
 
-       /*
-        * Keep connecting to same, fastest, iface for all channels as
-        * long as its RSS. Try next fastest one if not RSS or channel
-        * creation fails.
-        */
-       spin_lock(&ses->iface_lock);
-       iface = list_first_entry(&ses->iface_list, struct cifs_server_iface,
-                                iface_head);
-       spin_unlock(&ses->iface_lock);
-
        while (left > 0) {
 
                tries++;
                if (tries > 3*ses->chan_max) {
-                       cifs_dbg(FYI, "too many channel open attempts (%d channels left to open)\n",
+                       cifs_dbg(VFS, "too many channel open attempts (%d channels left to open)\n",
                                 left);
                        break;
                }
@@ -229,17 +221,35 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
                spin_lock(&ses->iface_lock);
                if (!ses->iface_count) {
                        spin_unlock(&ses->iface_lock);
+                       cifs_dbg(VFS, "server %s does not advertise interfaces\n",
+                                     ses->server->hostname);
                        break;
                }
 
+               if (!iface)
+                       iface = list_first_entry(&ses->iface_list, struct cifs_server_iface,
+                                                iface_head);
+               last_iface = list_last_entry(&ses->iface_list, struct cifs_server_iface,
+                                            iface_head);
+               iface_min_speed = last_iface->speed;
+
                list_for_each_entry_safe_from(iface, niface, &ses->iface_list,
                                    iface_head) {
+                       /* do not mix rdma and non-rdma interfaces */
+                       if (iface->rdma_capable != ses->server->rdma)
+                               continue;
+
                        /* skip ifaces that are unusable */
                        if (!iface->is_active ||
                            (is_ses_using_iface(ses, iface) &&
-                            !iface->rss_capable)) {
+                            !iface->rss_capable))
+                               continue;
+
+                       /* check if we already allocated enough channels */
+                       iface_weight = iface->speed / iface_min_speed;
+
+                       if (iface->weight_fulfilled >= iface_weight)
                                continue;
-                       }
 
                        /* take ref before unlock */
                        kref_get(&iface->refcount);
@@ -256,10 +266,21 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
                                continue;
                        }
 
-                       cifs_dbg(FYI, "successfully opened new channel on iface:%pIS\n",
+                       iface->num_channels++;
+                       iface->weight_fulfilled++;
+                       cifs_dbg(VFS, "successfully opened new channel on iface:%pIS\n",
                                 &iface->sockaddr);
                        break;
                }
+
+               /* reached end of list. reset weight_fulfilled and start over */
+               if (list_entry_is_head(iface, &ses->iface_list, iface_head)) {
+                       list_for_each_entry(iface, &ses->iface_list, iface_head)
+                               iface->weight_fulfilled = 0;
+                       spin_unlock(&ses->iface_lock);
+                       iface = NULL;
+                       continue;
+               }
                spin_unlock(&ses->iface_lock);
 
                left--;
@@ -278,8 +299,10 @@ int
 cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server)
 {
        unsigned int chan_index;
+       size_t iface_weight = 0, iface_min_speed = 0;
        struct cifs_server_iface *iface = NULL;
        struct cifs_server_iface *old_iface = NULL;
+       struct cifs_server_iface *last_iface = NULL;
        int rc = 0;
 
        spin_lock(&ses->chan_lock);
@@ -299,13 +322,34 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server)
        spin_unlock(&ses->chan_lock);
 
        spin_lock(&ses->iface_lock);
+       if (!ses->iface_count) {
+               spin_unlock(&ses->iface_lock);
+               cifs_dbg(VFS, "server %s does not advertise interfaces\n", ses->server->hostname);
+               return 0;
+       }
+
+       last_iface = list_last_entry(&ses->iface_list, struct cifs_server_iface,
+                                    iface_head);
+       iface_min_speed = last_iface->speed;
+
        /* then look for a new one */
        list_for_each_entry(iface, &ses->iface_list, iface_head) {
+               /* do not mix rdma and non-rdma interfaces */
+               if (iface->rdma_capable != server->rdma)
+                       continue;
+
                if (!iface->is_active ||
                    (is_ses_using_iface(ses, iface) &&
                     !iface->rss_capable)) {
                        continue;
                }
+
+               /* check if we already allocated enough channels */
+               iface_weight = iface->speed / iface_min_speed;
+
+               if (iface->weight_fulfilled >= iface_weight)
+                       continue;
+
                kref_get(&iface->refcount);
                break;
        }
@@ -321,10 +365,22 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server)
                cifs_dbg(FYI, "replacing iface: %pIS with %pIS\n",
                         &old_iface->sockaddr,
                         &iface->sockaddr);
+
+               old_iface->num_channels--;
+               if (old_iface->weight_fulfilled)
+                       old_iface->weight_fulfilled--;
+               iface->num_channels++;
+               iface->weight_fulfilled++;
+
                kref_put(&old_iface->refcount, release_iface);
        } else if (old_iface) {
                cifs_dbg(FYI, "releasing ref to iface: %pIS\n",
                         &old_iface->sockaddr);
+
+               old_iface->num_channels--;
+               if (old_iface->weight_fulfilled)
+                       old_iface->weight_fulfilled--;
+
                kref_put(&old_iface->refcount, release_iface);
        } else {
                WARN_ON(!iface);