From: Shyam Prasad N <nspmangalore@gmail.com>
To: smfrench@gmail.com, bharathsm.hsk@gmail.com, pc@cjr.nz,
tom@talpey.com, linux-cifs@vger.kernel.org
Cc: Shyam Prasad N <sprasad@microsoft.com>
Subject: [PATCH 08/11] cifs: distribute channels across interfaces based on speed
Date: Fri, 10 Mar 2023 15:32:07 +0000 [thread overview]
Message-ID: <20230310153211.10982-8-sprasad@microsoft.com> (raw)
In-Reply-To: <20230310153211.10982-1-sprasad@microsoft.com>
Today, if the server interfaces RSS capable, we simply
choose the fastest interface to setup a channel. This is not
a scalable approach, and does not make a lot of attempt to
distribute the connections.
This change does a weighted distribution of channels across
all the available server interfaces, where the weight is
a function of the advertised interface speed.
Also make sure that we don't mix rdma and non-rdma for channels.
Signed-off-by: Shyam Prasad N <sprasad@microsoft.com>
---
fs/cifs/cifs_debug.c | 16 +++++++++++
fs/cifs/cifsglob.h | 2 ++
fs/cifs/sess.c | 67 +++++++++++++++++++++++++++++++++++---------
3 files changed, 71 insertions(+), 14 deletions(-)
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 4391c7aac3cb..cee3af02e2c3 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -219,6 +219,8 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
struct cifs_ses *ses;
struct cifs_tcon *tcon;
struct cifs_server_iface *iface;
+ size_t iface_weight = 0, iface_min_speed = 0;
+ struct cifs_server_iface *last_iface = NULL;
int c, i, j;
seq_puts(m,
@@ -465,11 +467,25 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
"\tLast updated: %lu seconds ago",
ses->iface_count,
(jiffies - ses->iface_last_update) / HZ);
+
+ last_iface = list_last_entry(&ses->iface_list,
+ struct cifs_server_iface,
+ iface_head);
+ iface_min_speed = last_iface->speed;
+
j = 0;
list_for_each_entry(iface, &ses->iface_list,
iface_head) {
seq_printf(m, "\n\t%d)", ++j);
cifs_dump_iface(m, iface);
+
+ iface_weight = iface->speed / iface_min_speed;
+ seq_printf(m, "\t\tWeight (cur,total): (%zu,%zu)"
+ "\n\t\tAllocated channels: %u\n",
+ iface->weight_fulfilled,
+ iface_weight,
+ iface->num_channels);
+
if (is_ses_using_iface(ses, iface))
seq_puts(m, "\t\t[CONNECTED]\n");
}
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index a11e7b10f607..e3ba5c979832 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -948,6 +948,8 @@ struct cifs_server_iface {
struct list_head iface_head;
struct kref refcount;
size_t speed;
+ size_t weight_fulfilled;
+ unsigned int num_channels;
unsigned int rdma_capable : 1;
unsigned int rss_capable : 1;
unsigned int is_active : 1; /* unset if non existent */
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index b8bfebe4498e..78a7cfa75e91 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -167,7 +167,9 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
int left;
int rc = 0;
int tries = 0;
+ size_t iface_weight = 0, iface_min_speed = 0;
struct cifs_server_iface *iface = NULL, *niface = NULL;
+ struct cifs_server_iface *last_iface = NULL;
spin_lock(&ses->chan_lock);
@@ -196,21 +198,11 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
}
spin_unlock(&ses->chan_lock);
- /*
- * Keep connecting to same, fastest, iface for all channels as
- * long as its RSS. Try next fastest one if not RSS or channel
- * creation fails.
- */
- spin_lock(&ses->iface_lock);
- iface = list_first_entry(&ses->iface_list, struct cifs_server_iface,
- iface_head);
- spin_unlock(&ses->iface_lock);
-
while (left > 0) {
tries++;
if (tries > 3*ses->chan_max) {
- cifs_dbg(FYI, "too many channel open attempts (%d channels left to open)\n",
+ cifs_dbg(VFS, "too many channel open attempts (%d channels left to open)\n",
left);
break;
}
@@ -218,17 +210,34 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
spin_lock(&ses->iface_lock);
if (!ses->iface_count) {
spin_unlock(&ses->iface_lock);
+ cifs_dbg(VFS, "server %s does not advertise interfaces\n", ses->server->hostname);
break;
}
+ if (!iface)
+ iface = list_first_entry(&ses->iface_list, struct cifs_server_iface,
+ iface_head);
+ last_iface = list_last_entry(&ses->iface_list, struct cifs_server_iface,
+ iface_head);
+ iface_min_speed = last_iface->speed;
+
list_for_each_entry_safe_from(iface, niface, &ses->iface_list,
iface_head) {
+ /* do not mix rdma and non-rdma interfaces */
+ if (iface->rdma_capable != ses->server->rdma)
+ continue;
+
/* skip ifaces that are unusable */
if (!iface->is_active ||
(is_ses_using_iface(ses, iface) &&
- !iface->rss_capable)) {
+ !iface->rss_capable))
+ continue;
+
+ /* check if we already allocated enough channels */
+ iface_weight = iface->speed / iface_min_speed;
+
+ if (iface->weight_fulfilled >= iface_weight)
continue;
- }
/* take ref before unlock */
kref_get(&iface->refcount);
@@ -245,10 +254,17 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
continue;
}
- cifs_dbg(FYI, "successfully opened new channel on iface:%pIS\n",
+ iface->num_channels++;
+ iface->weight_fulfilled++;
+ cifs_dbg(VFS, "successfully opened new channel on iface:%pIS\n",
&iface->sockaddr);
break;
}
+
+ /* reached end of list. reset weight_fulfilled */
+ if (list_entry_is_head(iface, &ses->iface_list, iface_head))
+ list_for_each_entry(iface, &ses->iface_list, iface_head)
+ iface->weight_fulfilled = 0;
spin_unlock(&ses->iface_lock);
left--;
@@ -267,8 +283,10 @@ int
cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server)
{
unsigned int chan_index;
+ size_t iface_weight = 0, iface_min_speed = 0;
struct cifs_server_iface *iface = NULL;
struct cifs_server_iface *old_iface = NULL;
+ struct cifs_server_iface *last_iface = NULL;
int rc = 0;
spin_lock(&ses->chan_lock);
@@ -288,13 +306,34 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server)
spin_unlock(&ses->chan_lock);
spin_lock(&ses->iface_lock);
+ if (!ses->iface_count) {
+ spin_unlock(&ses->iface_lock);
+ cifs_dbg(VFS, "server %s does not advertise interfaces\n", ses->server->hostname);
+ return 0;
+ }
+
+ last_iface = list_last_entry(&ses->iface_list, struct cifs_server_iface,
+ iface_head);
+ iface_min_speed = last_iface->speed;
+
/* then look for a new one */
list_for_each_entry(iface, &ses->iface_list, iface_head) {
+ /* do not mix rdma and non-rdma interfaces */
+ if (iface->rdma_capable != server->rdma)
+ continue;
+
if (!iface->is_active ||
(is_ses_using_iface(ses, iface) &&
!iface->rss_capable)) {
continue;
}
+
+ /* check if we already allocated enough channels */
+ iface_weight = iface->speed / iface_min_speed;
+
+ if (iface->weight_fulfilled >= iface_weight)
+ continue;
+
kref_get(&iface->refcount);
break;
}
--
2.34.1
next prev parent reply other threads:[~2023-03-10 15:45 UTC|newest]
Thread overview: 31+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-03-10 15:32 [PATCH 01/11] cifs: fix tcon status change after tree connect Shyam Prasad N
2023-03-10 15:32 ` [PATCH 02/11] cifs: generate signkey for the channel that's reconnecting Shyam Prasad N
2023-03-10 15:32 ` [PATCH 03/11] cifs: avoid race conditions with parallel reconnects Shyam Prasad N
2023-03-10 15:32 ` [PATCH 04/11] cifs: serialize channel reconnects Shyam Prasad N
2023-03-10 22:40 ` Steve French
2023-03-10 15:32 ` [PATCH 05/11] cifs: lock chan_lock outside match_session Shyam Prasad N
2023-03-10 15:32 ` [PATCH 06/11] cifs: fix sockaddr comparison in iface_cmp Shyam Prasad N
2023-03-11 4:51 ` kernel test robot
2023-03-10 15:32 ` [PATCH 07/11] cifs: do not poll server interfaces too regularly Shyam Prasad N
2023-03-10 15:32 ` Shyam Prasad N [this message]
2024-02-27 11:16 ` [PATCH 08/11] cifs: distribute channels across interfaces based on speed Jan Čermák
2024-02-27 16:17 ` Shyam Prasad N
2024-02-28 9:22 ` Jan Čermák
2024-03-05 14:56 ` Shyam Prasad N
2024-03-06 15:43 ` Paulo Alcantara
2024-03-11 10:01 ` Jan Čermák
2024-03-11 11:14 ` Shyam Prasad N
2024-03-12 14:20 ` Jan Čermák
2024-03-13 10:45 ` Shyam Prasad N
2024-03-26 14:10 ` Jan Čermák
2023-03-10 15:32 ` [PATCH 09/11] cifs: account for primary channel in the interface list Shyam Prasad N
2023-03-13 5:27 ` kernel test robot
2023-03-10 15:32 ` [PATCH 10/11] cifs: handle when server stops supporting multichannel Shyam Prasad N
2023-03-13 6:09 ` kernel test robot
2023-03-10 15:32 ` [PATCH 11/11] cifs: empty interface list when server doesn't support query interfaces Shyam Prasad N
2023-03-14 22:19 ` [PATCH 01/11] cifs: fix tcon status change after tree connect Paulo Alcantara
2023-03-16 10:57 ` Shyam Prasad N
2023-03-16 20:59 ` Paulo Alcantara
2023-03-17 10:48 ` Shyam Prasad N
2023-03-17 12:35 ` Paulo Alcantara
2023-03-17 18:25 ` Steve French
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230310153211.10982-8-sprasad@microsoft.com \
--to=nspmangalore@gmail.com \
--cc=bharathsm.hsk@gmail.com \
--cc=linux-cifs@vger.kernel.org \
--cc=pc@cjr.nz \
--cc=smfrench@gmail.com \
--cc=sprasad@microsoft.com \
--cc=tom@talpey.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.