From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: stable@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
patches@lists.linux.dev, Shyam Prasad N <sprasad@microsoft.com>,
Steve French <stfrench@microsoft.com>,
Sasha Levin <sashal@kernel.org>
Subject: [PATCH 6.1 35/82] cifs: distribute channels across interfaces based on speed
Date: Thu, 30 Nov 2023 16:22:06 +0000 [thread overview]
Message-ID: <20231130162137.070386121@linuxfoundation.org> (raw)
In-Reply-To: <20231130162135.977485944@linuxfoundation.org>
6.1-stable review patch. If anyone has any objections, please let me know.
------------------
From: Shyam Prasad N <sprasad@microsoft.com>
[ Upstream commit a6d8fb54a515f0546ffdb7870102b1238917e567 ]
Today, if the server interfaces RSS capable, we simply
choose the fastest interface to setup a channel. This is not
a scalable approach, and does not make a lot of attempt to
distribute the connections.
This change does a weighted distribution of channels across
all the available server interfaces, where the weight is
a function of the advertised interface speed.
Also make sure that we don't mix rdma and non-rdma for channels.
Signed-off-by: Shyam Prasad N <sprasad@microsoft.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
Stable-dep-of: fa1d0508bdd4 ("cifs: account for primary channel in the interface list")
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
fs/smb/client/cifs_debug.c | 16 ++++++++
fs/smb/client/cifsglob.h | 2 +
fs/smb/client/sess.c | 84 +++++++++++++++++++++++++++++++-------
3 files changed, 88 insertions(+), 14 deletions(-)
diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c
index 8233fb2f0ca63..0acb455368f23 100644
--- a/fs/smb/client/cifs_debug.c
+++ b/fs/smb/client/cifs_debug.c
@@ -220,6 +220,8 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
struct cifs_ses *ses;
struct cifs_tcon *tcon;
struct cifs_server_iface *iface;
+ size_t iface_weight = 0, iface_min_speed = 0;
+ struct cifs_server_iface *last_iface = NULL;
int c, i, j;
seq_puts(m,
@@ -461,11 +463,25 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
"\tLast updated: %lu seconds ago",
ses->iface_count,
(jiffies - ses->iface_last_update) / HZ);
+
+ last_iface = list_last_entry(&ses->iface_list,
+ struct cifs_server_iface,
+ iface_head);
+ iface_min_speed = last_iface->speed;
+
j = 0;
list_for_each_entry(iface, &ses->iface_list,
iface_head) {
seq_printf(m, "\n\t%d)", ++j);
cifs_dump_iface(m, iface);
+
+ iface_weight = iface->speed / iface_min_speed;
+ seq_printf(m, "\t\tWeight (cur,total): (%zu,%zu)"
+ "\n\t\tAllocated channels: %u\n",
+ iface->weight_fulfilled,
+ iface_weight,
+ iface->num_channels);
+
if (is_ses_using_iface(ses, iface))
seq_puts(m, "\t\t[CONNECTED]\n");
}
diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
index 6c8a55608c9bd..2e814eadd6aef 100644
--- a/fs/smb/client/cifsglob.h
+++ b/fs/smb/client/cifsglob.h
@@ -956,6 +956,8 @@ struct cifs_server_iface {
struct list_head iface_head;
struct kref refcount;
size_t speed;
+ size_t weight_fulfilled;
+ unsigned int num_channels;
unsigned int rdma_capable : 1;
unsigned int rss_capable : 1;
unsigned int is_active : 1; /* unset if non existent */
diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c
index f0d164873500b..33e724545c5b4 100644
--- a/fs/smb/client/sess.c
+++ b/fs/smb/client/sess.c
@@ -164,7 +164,9 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
int left;
int rc = 0;
int tries = 0;
+ size_t iface_weight = 0, iface_min_speed = 0;
struct cifs_server_iface *iface = NULL, *niface = NULL;
+ struct cifs_server_iface *last_iface = NULL;
spin_lock(&ses->chan_lock);
@@ -192,21 +194,11 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
}
spin_unlock(&ses->chan_lock);
- /*
- * Keep connecting to same, fastest, iface for all channels as
- * long as its RSS. Try next fastest one if not RSS or channel
- * creation fails.
- */
- spin_lock(&ses->iface_lock);
- iface = list_first_entry(&ses->iface_list, struct cifs_server_iface,
- iface_head);
- spin_unlock(&ses->iface_lock);
-
while (left > 0) {
tries++;
if (tries > 3*ses->chan_max) {
- cifs_dbg(FYI, "too many channel open attempts (%d channels left to open)\n",
+ cifs_dbg(VFS, "too many channel open attempts (%d channels left to open)\n",
left);
break;
}
@@ -214,17 +206,35 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
spin_lock(&ses->iface_lock);
if (!ses->iface_count) {
spin_unlock(&ses->iface_lock);
+ cifs_dbg(VFS, "server %s does not advertise interfaces\n",
+ ses->server->hostname);
break;
}
+ if (!iface)
+ iface = list_first_entry(&ses->iface_list, struct cifs_server_iface,
+ iface_head);
+ last_iface = list_last_entry(&ses->iface_list, struct cifs_server_iface,
+ iface_head);
+ iface_min_speed = last_iface->speed;
+
list_for_each_entry_safe_from(iface, niface, &ses->iface_list,
iface_head) {
+ /* do not mix rdma and non-rdma interfaces */
+ if (iface->rdma_capable != ses->server->rdma)
+ continue;
+
/* skip ifaces that are unusable */
if (!iface->is_active ||
(is_ses_using_iface(ses, iface) &&
- !iface->rss_capable)) {
+ !iface->rss_capable))
+ continue;
+
+ /* check if we already allocated enough channels */
+ iface_weight = iface->speed / iface_min_speed;
+
+ if (iface->weight_fulfilled >= iface_weight)
continue;
- }
/* take ref before unlock */
kref_get(&iface->refcount);
@@ -241,10 +251,21 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
continue;
}
- cifs_dbg(FYI, "successfully opened new channel on iface:%pIS\n",
+ iface->num_channels++;
+ iface->weight_fulfilled++;
+ cifs_dbg(VFS, "successfully opened new channel on iface:%pIS\n",
&iface->sockaddr);
break;
}
+
+ /* reached end of list. reset weight_fulfilled and start over */
+ if (list_entry_is_head(iface, &ses->iface_list, iface_head)) {
+ list_for_each_entry(iface, &ses->iface_list, iface_head)
+ iface->weight_fulfilled = 0;
+ spin_unlock(&ses->iface_lock);
+ iface = NULL;
+ continue;
+ }
spin_unlock(&ses->iface_lock);
left--;
@@ -263,8 +284,10 @@ int
cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server)
{
unsigned int chan_index;
+ size_t iface_weight = 0, iface_min_speed = 0;
struct cifs_server_iface *iface = NULL;
struct cifs_server_iface *old_iface = NULL;
+ struct cifs_server_iface *last_iface = NULL;
int rc = 0;
spin_lock(&ses->chan_lock);
@@ -284,13 +307,34 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server)
spin_unlock(&ses->chan_lock);
spin_lock(&ses->iface_lock);
+ if (!ses->iface_count) {
+ spin_unlock(&ses->iface_lock);
+ cifs_dbg(VFS, "server %s does not advertise interfaces\n", ses->server->hostname);
+ return 0;
+ }
+
+ last_iface = list_last_entry(&ses->iface_list, struct cifs_server_iface,
+ iface_head);
+ iface_min_speed = last_iface->speed;
+
/* then look for a new one */
list_for_each_entry(iface, &ses->iface_list, iface_head) {
+ /* do not mix rdma and non-rdma interfaces */
+ if (iface->rdma_capable != server->rdma)
+ continue;
+
if (!iface->is_active ||
(is_ses_using_iface(ses, iface) &&
!iface->rss_capable)) {
continue;
}
+
+ /* check if we already allocated enough channels */
+ iface_weight = iface->speed / iface_min_speed;
+
+ if (iface->weight_fulfilled >= iface_weight)
+ continue;
+
kref_get(&iface->refcount);
break;
}
@@ -306,10 +350,22 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server)
cifs_dbg(FYI, "replacing iface: %pIS with %pIS\n",
&old_iface->sockaddr,
&iface->sockaddr);
+
+ old_iface->num_channels--;
+ if (old_iface->weight_fulfilled)
+ old_iface->weight_fulfilled--;
+ iface->num_channels++;
+ iface->weight_fulfilled++;
+
kref_put(&old_iface->refcount, release_iface);
} else if (old_iface) {
cifs_dbg(FYI, "releasing ref to iface: %pIS\n",
&old_iface->sockaddr);
+
+ old_iface->num_channels--;
+ if (old_iface->weight_fulfilled)
+ old_iface->weight_fulfilled--;
+
kref_put(&old_iface->refcount, release_iface);
} else {
WARN_ON(!iface);
--
2.42.0
next prev parent reply other threads:[~2023-11-30 16:29 UTC|newest]
Thread overview: 91+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-11-30 16:21 [PATCH 6.1 00/82] 6.1.65-rc1 review Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 01/82] afs: Fix afs_server_list to be cleaned up with RCU Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 02/82] afs: Make error on cell lookup failure consistent with OpenAFS Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 03/82] drm/panel: boe-tv101wum-nl6: Fine tune the panel power sequence Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 04/82] drm/panel: auo,b101uan08.3: " Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 05/82] drm/panel: simple: Fix Innolux G101ICE-L01 bus flags Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 06/82] drm/panel: simple: Fix Innolux G101ICE-L01 timings Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 07/82] wireguard: use DEV_STATS_INC() Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 08/82] octeontx2-pf: Fix memory leak during interface down Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 09/82] ata: pata_isapnp: Add missing error check for devm_ioport_map() Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 10/82] drm/i915: do not clean GT table on error path Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 11/82] drm/rockchip: vop: Fix color for RGB888/BGR888 format on VOP full Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 12/82] HID: fix HID device resource race between HID core and debugging support Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 13/82] ipv4: Correct/silence an endian warning in __ip_do_redirect Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 14/82] net: usb: ax88179_178a: fix failed operations during ax88179_reset Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 15/82] net/smc: avoid data corruption caused by decline Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 16/82] arm/xen: fix xen_vcpu_info allocation alignment Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 17/82] octeontx2-pf: Fix ntuple rule creation to direct packet to VF with higher Rx queue than its PF Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 18/82] amd-xgbe: handle corner-case during sfp hotplug Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 19/82] amd-xgbe: handle the corner-case during tx completion Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 20/82] amd-xgbe: propagate the correct speed and duplex status Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 21/82] net: axienet: Fix check for partial TX checksum Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 22/82] afs: Return ENOENT if no cell DNS record can be found Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 23/82] afs: Fix file locking on R/O volumes to operate in local mode Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 24/82] mm,kfence: decouple kfence from page granularity mapping judgement Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 25/82] arm64: mm: Fix "rodata=on" when CONFIG_RODATA_FULL_DEFAULT_ENABLED=y Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 26/82] i40e: use ERR_PTR error print in i40e messages Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 27/82] i40e: Fix adding unsupported cloud filters Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 28/82] nvmet: nul-terminate the NQNs passed in the connect command Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 29/82] USB: dwc3: qcom: fix resource leaks on probe deferral Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 30/82] USB: dwc3: qcom: fix ACPI platform device leak Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 31/82] lockdep: Fix block chain corruption Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 32/82] cifs: minor cleanup of some headers Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 33/82] smb3: allow dumping session and tcon id to improve stats analysis and debugging Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 34/82] cifs: print last update time for interface list Greg Kroah-Hartman
2023-11-30 16:22 ` Greg Kroah-Hartman [this message]
2023-11-30 16:22 ` [PATCH 6.1 36/82] cifs: account for primary channel in the " Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 37/82] cifs: fix leak of iface for primary channel Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 38/82] MIPS: KVM: Fix a build warning about variable set but not used Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 39/82] media: camss: Split power domain management Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 40/82] media: camss: Convert to platform remove callback returning void Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 41/82] media: qcom: Initialise V4L2 async notifier later Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 42/82] media: qcom: camss: Fix V4L2 async notifier error path Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 43/82] media: qcom: camss: Fix genpd cleanup Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 44/82] ext4: add a new helper to check if es must be kept Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 45/82] ext4: factor out __es_alloc_extent() and __es_free_extent() Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 46/82] ext4: use pre-allocated es in __es_insert_extent() Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 47/82] ext4: use pre-allocated es in __es_remove_extent() Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 48/82] ext4: using nofail preallocation in ext4_es_remove_extent() Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 49/82] ext4: using nofail preallocation in ext4_es_insert_delayed_block() Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 50/82] ext4: using nofail preallocation in ext4_es_insert_extent() Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 51/82] ext4: fix slab-use-after-free " Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 52/82] ext4: make sure allocate pending entry not fail Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 53/82] NFSD: Fix "start of NFS reply" pointer passed to nfsd_cache_update() Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 54/82] NFSD: Fix checksum mismatches in the duplicate reply cache Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 55/82] arm64: dts: imx8mn-var-som: add 20ms delay to ethernet regulator enable Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 56/82] ACPI: resource: Skip IRQ override on ASUS ExpertBook B1402CVA Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 57/82] swiotlb-xen: provide the "max_mapping_size" method Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 58/82] bcache: replace a mistaken IS_ERR() by IS_ERR_OR_NULL() in btree_gc_coalesce() Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 59/82] md: fix bi_status reporting in md_end_clone_io Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 60/82] bcache: fixup multi-threaded bch_sectors_dirty_init() wake-up race Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 61/82] io_uring/fs: consider link->flags when getting path for LINKAT Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 62/82] s390/dasd: protect device queue against concurrent access Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 63/82] USB: serial: option: add Luat Air72*U series products Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 64/82] hv_netvsc: fix race of netvsc and VF register_netdevice Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 65/82] hv_netvsc: Fix race of register_netdevice_notifier and VF register Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 66/82] hv_netvsc: Mark VF as slave before exposing it to user-mode Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 67/82] dm-delay: fix a race between delay_presuspend and delay_bio Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 68/82] bcache: check return value from btree_node_alloc_replacement() Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 69/82] bcache: prevent potential division by zero error Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 70/82] bcache: fixup init dirty data errors Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 71/82] bcache: fixup lock c->root error Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 72/82] usb: cdnsp: Fix deadlock issue during using NCM gadget Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 73/82] USB: serial: option: add Fibocom L7xx modules Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 74/82] USB: serial: option: fix FM101R-GL defines Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 75/82] USB: serial: option: dont claim interface 4 for ZTE MF290 Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 76/82] usb: typec: tcpm: Skip hard reset when in error recovery Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 77/82] USB: dwc2: write HCINT with INTMASK applied Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 78/82] usb: dwc3: Fix default mode initialization Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 79/82] usb: dwc3: set the dma max_seg_size Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 80/82] USB: dwc3: qcom: fix software node leak on probe errors Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 81/82] USB: dwc3: qcom: fix wakeup after probe deferral Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 82/82] io_uring: fix off-by one bvec index Greg Kroah-Hartman
2023-11-30 19:10 ` [PATCH 6.1 00/82] 6.1.65-rc1 review Florian Fainelli
2023-12-01 0:09 ` Shuah Khan
2023-12-01 10:54 ` Jon Hunter
2023-12-01 11:01 ` Conor Dooley
2023-12-01 13:41 ` Naresh Kamboju
2023-12-01 20:30 ` Guenter Roeck
2023-12-02 0:40 ` SeongJae Park
2023-12-02 2:40 ` Ron Economos
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20231130162137.070386121@linuxfoundation.org \
--to=gregkh@linuxfoundation.org \
--cc=patches@lists.linux.dev \
--cc=sashal@kernel.org \
--cc=sprasad@microsoft.com \
--cc=stable@vger.kernel.org \
--cc=stfrench@microsoft.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.