From: Shyam Prasad N <nspmangalore@gmail.com>
To: smfrench@gmail.com, bharathsm.hsk@gmail.com, pc@cjr.nz,
tom@talpey.com, linux-cifs@vger.kernel.org
Cc: Shyam Prasad N <sprasad@microsoft.com>
Subject: [PATCH 08/11] cifs: distribute channels across interfaces based on speed
Date: Fri, 10 Mar 2023 15:32:07 +0000 [thread overview]
Message-ID: <20230310153211.10982-8-sprasad@microsoft.com> (raw)
In-Reply-To: <20230310153211.10982-1-sprasad@microsoft.com>
Today, if the server interfaces RSS capable, we simply
choose the fastest interface to setup a channel. This is not
a scalable approach, and does not make a lot of attempt to
distribute the connections.
This change does a weighted distribution of channels across
all the available server interfaces, where the weight is
a function of the advertised interface speed.
Also make sure that we don't mix rdma and non-rdma for channels.
Signed-off-by: Shyam Prasad N <sprasad@microsoft.com>
---
fs/cifs/cifs_debug.c | 16 +++++++++++
fs/cifs/cifsglob.h | 2 ++
fs/cifs/sess.c | 67 +++++++++++++++++++++++++++++++++++---------
3 files changed, 71 insertions(+), 14 deletions(-)
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 4391c7aac3cb..cee3af02e2c3 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -219,6 +219,8 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
struct cifs_ses *ses;
struct cifs_tcon *tcon;
struct cifs_server_iface *iface;
+ size_t iface_weight = 0, iface_min_speed = 0;
+ struct cifs_server_iface *last_iface = NULL;
int c, i, j;
seq_puts(m,
@@ -465,11 +467,25 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
"\tLast updated: %lu seconds ago",
ses->iface_count,
(jiffies - ses->iface_last_update) / HZ);
+
+ last_iface = list_last_entry(&ses->iface_list,
+ struct cifs_server_iface,
+ iface_head);
+ iface_min_speed = last_iface->speed;
+
j = 0;
list_for_each_entry(iface, &ses->iface_list,
iface_head) {
seq_printf(m, "\n\t%d)", ++j);
cifs_dump_iface(m, iface);
+
+ iface_weight = iface->speed / iface_min_speed;
+ seq_printf(m, "\t\tWeight (cur,total): (%zu,%zu)"
+ "\n\t\tAllocated channels: %u\n",
+ iface->weight_fulfilled,
+ iface_weight,
+ iface->num_channels);
+
if (is_ses_using_iface(ses, iface))
seq_puts(m, "\t\t[CONNECTED]\n");
}
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index a11e7b10f607..e3ba5c979832 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -948,6 +948,8 @@ struct cifs_server_iface {
struct list_head iface_head;
struct kref refcount;
size_t speed;
+ size_t weight_fulfilled;
+ unsigned int num_channels;
unsigned int rdma_capable : 1;
unsigned int rss_capable : 1;
unsigned int is_active : 1; /* unset if non existent */
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index b8bfebe4498e..78a7cfa75e91 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -167,7 +167,9 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
int left;
int rc = 0;
int tries = 0;
+ size_t iface_weight = 0, iface_min_speed = 0;
struct cifs_server_iface *iface = NULL, *niface = NULL;
+ struct cifs_server_iface *last_iface = NULL;
spin_lock(&ses->chan_lock);
@@ -196,21 +198,11 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
}
spin_unlock(&ses->chan_lock);
- /*
- * Keep connecting to same, fastest, iface for all channels as
- * long as its RSS. Try next fastest one if not RSS or channel
- * creation fails.
- */
- spin_lock(&ses->iface_lock);
- iface = list_first_entry(&ses->iface_list, struct cifs_server_iface,
- iface_head);
- spin_unlock(&ses->iface_lock);
-
while (left > 0) {
tries++;
if (tries > 3*ses->chan_max) {
- cifs_dbg(FYI, "too many channel open attempts (%d channels left to open)\n",
+ cifs_dbg(VFS, "too many channel open attempts (%d channels left to open)\n",
left);
break;
}
@@ -218,17 +210,34 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
spin_lock(&ses->iface_lock);
if (!ses->iface_count) {
spin_unlock(&ses->iface_lock);
+ cifs_dbg(VFS, "server %s does not advertise interfaces\n", ses->server->hostname);
break;
}
+ if (!iface)
+ iface = list_first_entry(&ses->iface_list, struct cifs_server_iface,
+ iface_head);
+ last_iface = list_last_entry(&ses->iface_list, struct cifs_server_iface,
+ iface_head);
+ iface_min_speed = last_iface->speed;
+
list_for_each_entry_safe_from(iface, niface, &ses->iface_list,
iface_head) {
+ /* do not mix rdma and non-rdma interfaces */
+ if (iface->rdma_capable != ses->server->rdma)
+ continue;
+
/* skip ifaces that are unusable */
if (!iface->is_active ||
(is_ses_using_iface(ses, iface) &&
- !iface->rss_capable)) {
+ !iface->rss_capable))
+ continue;
+
+ /* check if we already allocated enough channels */
+ iface_weight = iface->speed / iface_min_speed;
+
+ if (iface->weight_fulfilled >= iface_weight)
continue;
- }
/* take ref before unlock */
kref_get(&iface->refcount);
@@ -245,10 +254,17 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
continue;
}
- cifs_dbg(FYI, "successfully opened new channel on iface:%pIS\n",
+ iface->num_channels++;
+ iface->weight_fulfilled++;
+ cifs_dbg(VFS, "successfully opened new channel on iface:%pIS\n",
&iface->sockaddr);
break;
}
+
+ /* reached end of list. reset weight_fulfilled */
+ if (list_entry_is_head(iface, &ses->iface_list, iface_head))
+ list_for_each_entry(iface, &ses->iface_list, iface_head)
+ iface->weight_fulfilled = 0;
spin_unlock(&ses->iface_lock);
left--;
@@ -267,8 +283,10 @@ int
cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server)
{
unsigned int chan_index;
+ size_t iface_weight = 0, iface_min_speed = 0;
struct cifs_server_iface *iface = NULL;
struct cifs_server_iface *old_iface = NULL;
+ struct cifs_server_iface *last_iface = NULL;
int rc = 0;
spin_lock(&ses->chan_lock);
@@ -288,13 +306,34 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server)
spin_unlock(&ses->chan_lock);
spin_lock(&ses->iface_lock);
+ if (!ses->iface_count) {
+ spin_unlock(&ses->iface_lock);
+ cifs_dbg(VFS, "server %s does not advertise interfaces\n", ses->server->hostname);
+ return 0;
+ }
+
+ last_iface = list_last_entry(&ses->iface_list, struct cifs_server_iface,
+ iface_head);
+ iface_min_speed = last_iface->speed;
+
/* then look for a new one */
list_for_each_entry(iface, &ses->iface_list, iface_head) {
+ /* do not mix rdma and non-rdma interfaces */
+ if (iface->rdma_capable != server->rdma)
+ continue;
+
if (!iface->is_active ||
(is_ses_using_iface(ses, iface) &&
!iface->rss_capable)) {
continue;
}
+
+ /* check if we already allocated enough channels */
+ iface_weight = iface->speed / iface_min_speed;
+
+ if (iface->weight_fulfilled >= iface_weight)
+ continue;
+
kref_get(&iface->refcount);
break;
}
--
2.34.1
next prev parent reply other threads:[~2023-03-10 15:45 UTC|newest]
Thread overview: 31+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-03-10 15:32 [PATCH 01/11] cifs: fix tcon status change after tree connect Shyam Prasad N
2023-03-10 15:32 ` [PATCH 02/11] cifs: generate signkey for the channel that's reconnecting Shyam Prasad N
2023-03-10 15:32 ` [PATCH 03/11] cifs: avoid race conditions with parallel reconnects Shyam Prasad N
2023-03-10 15:32 ` [PATCH 04/11] cifs: serialize channel reconnects Shyam Prasad N
2023-03-10 22:40 ` Steve French
2023-03-10 15:32 ` [PATCH 05/11] cifs: lock chan_lock outside match_session Shyam Prasad N
2023-03-10 15:32 ` [PATCH 06/11] cifs: fix sockaddr comparison in iface_cmp Shyam Prasad N
2023-03-11 4:51 ` kernel test robot
2023-03-10 15:32 ` [PATCH 07/11] cifs: do not poll server interfaces too regularly Shyam Prasad N
2023-03-10 15:32 ` Shyam Prasad N [this message]
2024-02-27 11:16 ` [PATCH 08/11] cifs: distribute channels across interfaces based on speed Jan Čermák
2024-02-27 16:17 ` Shyam Prasad N
2024-02-28 9:22 ` Jan Čermák
2024-03-05 14:56 ` Shyam Prasad N
2024-03-06 15:43 ` Paulo Alcantara
2024-03-11 10:01 ` Jan Čermák
2024-03-11 11:14 ` Shyam Prasad N
2024-03-12 14:20 ` Jan Čermák
2024-03-13 10:45 ` Shyam Prasad N
2024-03-26 14:10 ` Jan Čermák
2023-03-10 15:32 ` [PATCH 09/11] cifs: account for primary channel in the interface list Shyam Prasad N
2023-03-13 5:27 ` kernel test robot
2023-03-10 15:32 ` [PATCH 10/11] cifs: handle when server stops supporting multichannel Shyam Prasad N
2023-03-13 6:09 ` kernel test robot
2023-03-10 15:32 ` [PATCH 11/11] cifs: empty interface list when server doesn't support query interfaces Shyam Prasad N
2023-03-14 22:19 ` [PATCH 01/11] cifs: fix tcon status change after tree connect Paulo Alcantara
2023-03-16 10:57 ` Shyam Prasad N
2023-03-16 20:59 ` Paulo Alcantara
2023-03-17 10:48 ` Shyam Prasad N
2023-03-17 12:35 ` Paulo Alcantara
2023-03-17 18:25 ` Steve French
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230310153211.10982-8-sprasad@microsoft.com \
--to=nspmangalore@gmail.com \
--cc=bharathsm.hsk@gmail.com \
--cc=linux-cifs@vger.kernel.org \
--cc=pc@cjr.nz \
--cc=smfrench@gmail.com \
--cc=sprasad@microsoft.com \
--cc=tom@talpey.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox