public inbox for linux-cifs@vger.kernel.org
 help / color / mirror / Atom feed
From: nspmangalore@gmail.com
To: smfrench@gmail.com, pc@manguebit.com, bharathsm.hsk@gmail.com,
	linux-cifs@vger.kernel.org
Cc: Shyam Prasad N <sprasad@microsoft.com>
Subject: [PATCH 07/14] cifs: distribute channels across interfaces based on speed
Date: Mon, 30 Oct 2023 11:00:13 +0000	[thread overview]
Message-ID: <20231030110020.45627-7-sprasad@microsoft.com> (raw)
In-Reply-To: <20231030110020.45627-1-sprasad@microsoft.com>

From: Shyam Prasad N <sprasad@microsoft.com>

Today, if the server interfaces RSS capable, we simply
choose the fastest interface to setup a channel. This is not
a scalable approach, and does not make a lot of attempt to
distribute the connections.

This change does a weighted distribution of channels across
all the available server interfaces, where the weight is
a function of the advertised interface speed.

Also make sure that we don't mix rdma and non-rdma for channels.

Signed-off-by: Shyam Prasad N <sprasad@microsoft.com>
---
 fs/smb/client/cifs_debug.c | 16 ++++++++
 fs/smb/client/cifsglob.h   |  2 +
 fs/smb/client/sess.c       | 83 +++++++++++++++++++++++++++++++-------
 3 files changed, 87 insertions(+), 14 deletions(-)

diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c
index 9fca09539728..e23fcabb78d6 100644
--- a/fs/smb/client/cifs_debug.c
+++ b/fs/smb/client/cifs_debug.c
@@ -284,6 +284,8 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
 	struct cifs_ses *ses;
 	struct cifs_tcon *tcon;
 	struct cifs_server_iface *iface;
+	size_t iface_weight = 0, iface_min_speed = 0;
+	struct cifs_server_iface *last_iface = NULL;
 	int c, i, j;
 
 	seq_puts(m,
@@ -543,11 +545,25 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
 					   "\tLast updated: %lu seconds ago",
 					   ses->iface_count,
 					   (jiffies - ses->iface_last_update) / HZ);
+
+			last_iface = list_last_entry(&ses->iface_list,
+						     struct cifs_server_iface,
+						     iface_head);
+			iface_min_speed = last_iface->speed;
+
 			j = 0;
 			list_for_each_entry(iface, &ses->iface_list,
 						 iface_head) {
 				seq_printf(m, "\n\t%d)", ++j);
 				cifs_dump_iface(m, iface);
+
+				iface_weight = iface->speed / iface_min_speed;
+				seq_printf(m, "\t\tWeight (cur,total): (%zu,%zu)"
+					   "\n\t\tAllocated channels: %u\n",
+					   iface->weight_fulfilled,
+					   iface_weight,
+					   iface->num_channels);
+
 				if (is_ses_using_iface(ses, iface))
 					seq_puts(m, "\t\t[CONNECTED]\n");
 			}
diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
index 552ed441281a..81e7a45f413d 100644
--- a/fs/smb/client/cifsglob.h
+++ b/fs/smb/client/cifsglob.h
@@ -969,6 +969,8 @@ struct cifs_server_iface {
 	struct list_head iface_head;
 	struct kref refcount;
 	size_t speed;
+	size_t weight_fulfilled;
+	unsigned int num_channels;
 	unsigned int rdma_capable : 1;
 	unsigned int rss_capable : 1;
 	unsigned int is_active : 1; /* unset if non existent */
diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c
index 9d2228c2d7e5..d009994f82cf 100644
--- a/fs/smb/client/sess.c
+++ b/fs/smb/client/sess.c
@@ -178,7 +178,9 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
 	int left;
 	int rc = 0;
 	int tries = 0;
+	size_t iface_weight = 0, iface_min_speed = 0;
 	struct cifs_server_iface *iface = NULL, *niface = NULL;
+	struct cifs_server_iface *last_iface = NULL;
 
 	spin_lock(&ses->chan_lock);
 
@@ -206,21 +208,11 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
 	}
 	spin_unlock(&ses->chan_lock);
 
-	/*
-	 * Keep connecting to same, fastest, iface for all channels as
-	 * long as its RSS. Try next fastest one if not RSS or channel
-	 * creation fails.
-	 */
-	spin_lock(&ses->iface_lock);
-	iface = list_first_entry(&ses->iface_list, struct cifs_server_iface,
-				 iface_head);
-	spin_unlock(&ses->iface_lock);
-
 	while (left > 0) {
 
 		tries++;
 		if (tries > 3*ses->chan_max) {
-			cifs_dbg(FYI, "too many channel open attempts (%d channels left to open)\n",
+			cifs_dbg(VFS, "too many channel open attempts (%d channels left to open)\n",
 				 left);
 			break;
 		}
@@ -228,17 +220,34 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
 		spin_lock(&ses->iface_lock);
 		if (!ses->iface_count) {
 			spin_unlock(&ses->iface_lock);
+			cifs_dbg(VFS, "server %s does not advertise interfaces\n", ses->server->hostname);
 			break;
 		}
 
+		if (!iface)
+			iface = list_first_entry(&ses->iface_list, struct cifs_server_iface,
+						 iface_head);
+		last_iface = list_last_entry(&ses->iface_list, struct cifs_server_iface,
+					     iface_head);
+		iface_min_speed = last_iface->speed;
+
 		list_for_each_entry_safe_from(iface, niface, &ses->iface_list,
 				    iface_head) {
+			/* do not mix rdma and non-rdma interfaces */
+			if (iface->rdma_capable != ses->server->rdma)
+				continue;
+
 			/* skip ifaces that are unusable */
 			if (!iface->is_active ||
 			    (is_ses_using_iface(ses, iface) &&
-			     !iface->rss_capable)) {
+			     !iface->rss_capable))
+				continue;
+
+			/* check if we already allocated enough channels */
+			iface_weight = iface->speed / iface_min_speed;
+
+			if (iface->weight_fulfilled >= iface_weight)
 				continue;
-			}
 
 			/* take ref before unlock */
 			kref_get(&iface->refcount);
@@ -255,10 +264,21 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
 				continue;
 			}
 
-			cifs_dbg(FYI, "successfully opened new channel on iface:%pIS\n",
+			iface->num_channels++;
+			iface->weight_fulfilled++;
+			cifs_dbg(VFS, "successfully opened new channel on iface:%pIS\n",
 				 &iface->sockaddr);
 			break;
 		}
+
+		/* reached end of list. reset weight_fulfilled and start over */
+		if (list_entry_is_head(iface, &ses->iface_list, iface_head)) {
+			list_for_each_entry(iface, &ses->iface_list, iface_head)
+				iface->weight_fulfilled = 0;
+			spin_unlock(&ses->iface_lock);
+			iface = NULL;
+			continue;
+		}
 		spin_unlock(&ses->iface_lock);
 
 		left--;
@@ -277,8 +297,10 @@ int
 cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server)
 {
 	unsigned int chan_index;
+	size_t iface_weight = 0, iface_min_speed = 0;
 	struct cifs_server_iface *iface = NULL;
 	struct cifs_server_iface *old_iface = NULL;
+	struct cifs_server_iface *last_iface = NULL;
 	int rc = 0;
 
 	spin_lock(&ses->chan_lock);
@@ -298,13 +320,34 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server)
 	spin_unlock(&ses->chan_lock);
 
 	spin_lock(&ses->iface_lock);
+	if (!ses->iface_count) {
+		spin_unlock(&ses->iface_lock);
+		cifs_dbg(VFS, "server %s does not advertise interfaces\n", ses->server->hostname);
+		return 0;
+	}
+
+	last_iface = list_last_entry(&ses->iface_list, struct cifs_server_iface,
+				     iface_head);
+	iface_min_speed = last_iface->speed;
+
 	/* then look for a new one */
 	list_for_each_entry(iface, &ses->iface_list, iface_head) {
+		/* do not mix rdma and non-rdma interfaces */
+		if (iface->rdma_capable != server->rdma)
+			continue;
+
 		if (!iface->is_active ||
 		    (is_ses_using_iface(ses, iface) &&
 		     !iface->rss_capable)) {
 			continue;
 		}
+
+		/* check if we already allocated enough channels */
+		iface_weight = iface->speed / iface_min_speed;
+
+		if (iface->weight_fulfilled >= iface_weight)
+			continue;
+
 		kref_get(&iface->refcount);
 		break;
 	}
@@ -320,10 +363,22 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server)
 		cifs_dbg(FYI, "replacing iface: %pIS with %pIS\n",
 			 &old_iface->sockaddr,
 			 &iface->sockaddr);
+
+		old_iface->num_channels--;
+		if (old_iface->weight_fulfilled)
+			old_iface->weight_fulfilled--;
+		iface->num_channels++;
+		iface->weight_fulfilled++;
+
 		kref_put(&old_iface->refcount, release_iface);
 	} else if (old_iface) {
 		cifs_dbg(FYI, "releasing ref to iface: %pIS\n",
 			 &old_iface->sockaddr);
+
+		old_iface->num_channels--;
+		if (old_iface->weight_fulfilled)
+			old_iface->weight_fulfilled--;
+
 		kref_put(&old_iface->refcount, release_iface);
 	} else {
 		WARN_ON(!iface);
-- 
2.34.1


  parent reply	other threads:[~2023-10-30 11:00 UTC|newest]

Thread overview: 61+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-10-30 11:00 [PATCH 01/14] cifs: print server capabilities in DebugData nspmangalore
2023-10-30 11:00 ` [PATCH 02/14] cifs: add xid to query server interface call nspmangalore
2023-10-31  5:35   ` Bharath SM
2023-10-30 11:00 ` [PATCH 03/14] cifs: reconnect helper should set reconnect for the right channel nspmangalore
2023-10-31 15:27   ` Paulo Alcantara
2023-10-31 18:29     ` Steve French
2023-10-30 11:00 ` [PATCH 04/14] cifs: do not reset chan_max if multichannel is not supported at mount nspmangalore
2023-11-01  2:57   ` Steve French
2023-11-01  3:14   ` Steve French
2023-10-30 11:00 ` [PATCH 05/14] cifs: force interface update before a fresh session setup nspmangalore
2023-11-01  3:14   ` Steve French
2023-10-30 11:00 ` [PATCH 06/14] cifs: handle cases where a channel is closed nspmangalore
2023-11-01  3:09   ` Steve French
2023-11-02 12:26     ` Shyam Prasad N
2023-10-30 11:00 ` nspmangalore [this message]
2023-10-30 11:00 ` [PATCH 08/14] cifs: account for primary channel in the interface list nspmangalore
2023-11-08 15:44   ` Paulo Alcantara
2023-11-08 18:16     ` Steve French
2023-11-08 19:03       ` Paulo Alcantara
2023-10-30 11:00 ` [PATCH 09/14] cifs: add a back pointer to cifs_sb from tcon nspmangalore
2023-11-01  3:30   ` Steve French
2023-11-03 21:03   ` Paulo Alcantara
2023-11-06 16:12     ` Shyam Prasad N
2023-11-06 17:04       ` Shyam Prasad N
     [not found]         ` <CAH2r5msQLTcdiHBrOKd+q6LPPHW_Jj3QbpFZyZ48CJbrtDqC5w@mail.gmail.com>
     [not found]           ` <CAH2r5mt4hC5x2w2D46y13j_OtjkJk9_ZaeGXbb7YKukffBk2LQ@mail.gmail.com>
2023-11-06 19:36             ` Fwd: " Steve French
2023-11-08 15:24         ` Paulo Alcantara
2023-11-08 16:11           ` Steve French
2023-10-30 11:00 ` [PATCH 10/14] cifs: reconnect work should have reference on server struct nspmangalore
2023-11-16 17:10   ` Paulo Alcantara
     [not found]     ` <CAH2r5mtDeP323Z8=9WjCCYVVb9B2AmO5Q4PDtcMz8wxVUCVRBA@mail.gmail.com>
2023-11-16 19:35       ` Paulo Alcantara
2023-10-30 11:00 ` [PATCH 11/14] cifs: handle when server starts supporting multichannel nspmangalore
2023-11-01  3:30   ` Steve French
2023-11-01 15:52   ` Paulo Alcantara
2023-11-04  7:50     ` Shyam Prasad N
2023-11-02 20:28   ` Paulo Alcantara
2023-11-03  0:43     ` Steve French
2023-11-03 20:32       ` Paulo Alcantara
     [not found]       ` <notmuch-sha1-c3bfa7f4ae0bb24c5ee7cfddb408c2fbeca5d8f7>
2023-11-08 16:02         ` Paulo Alcantara
2023-11-08 19:25           ` Steve French
2023-11-08 19:31             ` Paulo Alcantara
2023-10-30 11:00 ` [PATCH 12/14] cifs: handle when server stops " nspmangalore
2023-11-08 16:35   ` Paulo Alcantara
     [not found]   ` <notmuch-sha1-9ed0289358ca5c90903408ad9c0ac0310afee598>
2023-11-08 19:13     ` Paulo Alcantara
2023-11-08 19:41       ` Paulo Alcantara
2023-11-09 11:44         ` Shyam Prasad N
2023-11-09 13:28           ` Paulo Alcantara
2023-11-09 13:49             ` Shyam Prasad N
2023-11-10  4:09               ` Shyam Prasad N
2023-11-11 17:23                 ` Paulo Alcantara
2023-11-12 18:52                   ` Steve French
     [not found]                   ` <CAH2r5mvG3zLBxknPOuaz9=GarZO6n6bhcduiZHHfiqVYZYJiVQ@mail.gmail.com>
2023-11-12 19:32                     ` Paulo Alcantara
2023-10-30 11:00 ` [PATCH 13/14] cifs: display the endpoint IP details in DebugData nspmangalore
2023-10-31 15:18   ` Paulo Alcantara
     [not found]   ` <notmuch-sha1-260ef7fe7af7face0e1486229c0fda5149fe14e2>
2023-11-01 14:12     ` Paulo Alcantara
2023-11-01 14:19       ` Steve French
2023-11-04  7:44       ` Shyam Prasad N
2023-11-04 19:00         ` Paulo Alcantara
2023-10-30 12:34 ` [PATCH 01/14] cifs: print server capabilities " Bharath SM
2023-10-30 12:40   ` Shyam Prasad N
2023-10-30 12:51     ` Shyam Prasad N
2023-10-30 14:54 ` Steve French

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231030110020.45627-7-sprasad@microsoft.com \
    --to=nspmangalore@gmail.com \
    --cc=bharathsm.hsk@gmail.com \
    --cc=linux-cifs@vger.kernel.org \
    --cc=pc@manguebit.com \
    --cc=smfrench@gmail.com \
    --cc=sprasad@microsoft.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox