From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: stable@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
patches@lists.linux.dev, Shyam Prasad N <sprasad@microsoft.com>,
Steve French <stfrench@microsoft.com>,
Sasha Levin <sashal@kernel.org>
Subject: [PATCH 6.1 35/82] cifs: distribute channels across interfaces based on speed
Date: Thu, 30 Nov 2023 16:22:06 +0000 [thread overview]
Message-ID: <20231130162137.070386121@linuxfoundation.org> (raw)
In-Reply-To: <20231130162135.977485944@linuxfoundation.org>
6.1-stable review patch. If anyone has any objections, please let me know.
------------------
From: Shyam Prasad N <sprasad@microsoft.com>
[ Upstream commit a6d8fb54a515f0546ffdb7870102b1238917e567 ]
Today, if the server interfaces RSS capable, we simply
choose the fastest interface to setup a channel. This is not
a scalable approach, and does not make a lot of attempt to
distribute the connections.
This change does a weighted distribution of channels across
all the available server interfaces, where the weight is
a function of the advertised interface speed.
Also make sure that we don't mix rdma and non-rdma for channels.
Signed-off-by: Shyam Prasad N <sprasad@microsoft.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
Stable-dep-of: fa1d0508bdd4 ("cifs: account for primary channel in the interface list")
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
fs/smb/client/cifs_debug.c | 16 ++++++++
fs/smb/client/cifsglob.h | 2 +
fs/smb/client/sess.c | 84 +++++++++++++++++++++++++++++++-------
3 files changed, 88 insertions(+), 14 deletions(-)
diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c
index 8233fb2f0ca63..0acb455368f23 100644
--- a/fs/smb/client/cifs_debug.c
+++ b/fs/smb/client/cifs_debug.c
@@ -220,6 +220,8 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
struct cifs_ses *ses;
struct cifs_tcon *tcon;
struct cifs_server_iface *iface;
+ size_t iface_weight = 0, iface_min_speed = 0;
+ struct cifs_server_iface *last_iface = NULL;
int c, i, j;
seq_puts(m,
@@ -461,11 +463,25 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
"\tLast updated: %lu seconds ago",
ses->iface_count,
(jiffies - ses->iface_last_update) / HZ);
+
+ last_iface = list_last_entry(&ses->iface_list,
+ struct cifs_server_iface,
+ iface_head);
+ iface_min_speed = last_iface->speed;
+
j = 0;
list_for_each_entry(iface, &ses->iface_list,
iface_head) {
seq_printf(m, "\n\t%d)", ++j);
cifs_dump_iface(m, iface);
+
+ iface_weight = iface->speed / iface_min_speed;
+ seq_printf(m, "\t\tWeight (cur,total): (%zu,%zu)"
+ "\n\t\tAllocated channels: %u\n",
+ iface->weight_fulfilled,
+ iface_weight,
+ iface->num_channels);
+
if (is_ses_using_iface(ses, iface))
seq_puts(m, "\t\t[CONNECTED]\n");
}
diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
index 6c8a55608c9bd..2e814eadd6aef 100644
--- a/fs/smb/client/cifsglob.h
+++ b/fs/smb/client/cifsglob.h
@@ -956,6 +956,8 @@ struct cifs_server_iface {
struct list_head iface_head;
struct kref refcount;
size_t speed;
+ size_t weight_fulfilled;
+ unsigned int num_channels;
unsigned int rdma_capable : 1;
unsigned int rss_capable : 1;
unsigned int is_active : 1; /* unset if non existent */
diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c
index f0d164873500b..33e724545c5b4 100644
--- a/fs/smb/client/sess.c
+++ b/fs/smb/client/sess.c
@@ -164,7 +164,9 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
int left;
int rc = 0;
int tries = 0;
+ size_t iface_weight = 0, iface_min_speed = 0;
struct cifs_server_iface *iface = NULL, *niface = NULL;
+ struct cifs_server_iface *last_iface = NULL;
spin_lock(&ses->chan_lock);
@@ -192,21 +194,11 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
}
spin_unlock(&ses->chan_lock);
- /*
- * Keep connecting to same, fastest, iface for all channels as
- * long as its RSS. Try next fastest one if not RSS or channel
- * creation fails.
- */
- spin_lock(&ses->iface_lock);
- iface = list_first_entry(&ses->iface_list, struct cifs_server_iface,
- iface_head);
- spin_unlock(&ses->iface_lock);
-
while (left > 0) {
tries++;
if (tries > 3*ses->chan_max) {
- cifs_dbg(FYI, "too many channel open attempts (%d channels left to open)\n",
+ cifs_dbg(VFS, "too many channel open attempts (%d channels left to open)\n",
left);
break;
}
@@ -214,17 +206,35 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
spin_lock(&ses->iface_lock);
if (!ses->iface_count) {
spin_unlock(&ses->iface_lock);
+ cifs_dbg(VFS, "server %s does not advertise interfaces\n",
+ ses->server->hostname);
break;
}
+ if (!iface)
+ iface = list_first_entry(&ses->iface_list, struct cifs_server_iface,
+ iface_head);
+ last_iface = list_last_entry(&ses->iface_list, struct cifs_server_iface,
+ iface_head);
+ iface_min_speed = last_iface->speed;
+
list_for_each_entry_safe_from(iface, niface, &ses->iface_list,
iface_head) {
+ /* do not mix rdma and non-rdma interfaces */
+ if (iface->rdma_capable != ses->server->rdma)
+ continue;
+
/* skip ifaces that are unusable */
if (!iface->is_active ||
(is_ses_using_iface(ses, iface) &&
- !iface->rss_capable)) {
+ !iface->rss_capable))
+ continue;
+
+ /* check if we already allocated enough channels */
+ iface_weight = iface->speed / iface_min_speed;
+
+ if (iface->weight_fulfilled >= iface_weight)
continue;
- }
/* take ref before unlock */
kref_get(&iface->refcount);
@@ -241,10 +251,21 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
continue;
}
- cifs_dbg(FYI, "successfully opened new channel on iface:%pIS\n",
+ iface->num_channels++;
+ iface->weight_fulfilled++;
+ cifs_dbg(VFS, "successfully opened new channel on iface:%pIS\n",
&iface->sockaddr);
break;
}
+
+ /* reached end of list. reset weight_fulfilled and start over */
+ if (list_entry_is_head(iface, &ses->iface_list, iface_head)) {
+ list_for_each_entry(iface, &ses->iface_list, iface_head)
+ iface->weight_fulfilled = 0;
+ spin_unlock(&ses->iface_lock);
+ iface = NULL;
+ continue;
+ }
spin_unlock(&ses->iface_lock);
left--;
@@ -263,8 +284,10 @@ int
cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server)
{
unsigned int chan_index;
+ size_t iface_weight = 0, iface_min_speed = 0;
struct cifs_server_iface *iface = NULL;
struct cifs_server_iface *old_iface = NULL;
+ struct cifs_server_iface *last_iface = NULL;
int rc = 0;
spin_lock(&ses->chan_lock);
@@ -284,13 +307,34 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server)
spin_unlock(&ses->chan_lock);
spin_lock(&ses->iface_lock);
+ if (!ses->iface_count) {
+ spin_unlock(&ses->iface_lock);
+ cifs_dbg(VFS, "server %s does not advertise interfaces\n", ses->server->hostname);
+ return 0;
+ }
+
+ last_iface = list_last_entry(&ses->iface_list, struct cifs_server_iface,
+ iface_head);
+ iface_min_speed = last_iface->speed;
+
/* then look for a new one */
list_for_each_entry(iface, &ses->iface_list, iface_head) {
+ /* do not mix rdma and non-rdma interfaces */
+ if (iface->rdma_capable != server->rdma)
+ continue;
+
if (!iface->is_active ||
(is_ses_using_iface(ses, iface) &&
!iface->rss_capable)) {
continue;
}
+
+ /* check if we already allocated enough channels */
+ iface_weight = iface->speed / iface_min_speed;
+
+ if (iface->weight_fulfilled >= iface_weight)
+ continue;
+
kref_get(&iface->refcount);
break;
}
@@ -306,10 +350,22 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server)
cifs_dbg(FYI, "replacing iface: %pIS with %pIS\n",
&old_iface->sockaddr,
&iface->sockaddr);
+
+ old_iface->num_channels--;
+ if (old_iface->weight_fulfilled)
+ old_iface->weight_fulfilled--;
+ iface->num_channels++;
+ iface->weight_fulfilled++;
+
kref_put(&old_iface->refcount, release_iface);
} else if (old_iface) {
cifs_dbg(FYI, "releasing ref to iface: %pIS\n",
&old_iface->sockaddr);
+
+ old_iface->num_channels--;
+ if (old_iface->weight_fulfilled)
+ old_iface->weight_fulfilled--;
+
kref_put(&old_iface->refcount, release_iface);
} else {
WARN_ON(!iface);
--
2.42.0
next prev parent reply other threads:[~2023-11-30 16:29 UTC|newest]
Thread overview: 91+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-11-30 16:21 [PATCH 6.1 00/82] 6.1.65-rc1 review Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 01/82] afs: Fix afs_server_list to be cleaned up with RCU Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 02/82] afs: Make error on cell lookup failure consistent with OpenAFS Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 03/82] drm/panel: boe-tv101wum-nl6: Fine tune the panel power sequence Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 04/82] drm/panel: auo,b101uan08.3: " Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 05/82] drm/panel: simple: Fix Innolux G101ICE-L01 bus flags Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 06/82] drm/panel: simple: Fix Innolux G101ICE-L01 timings Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 07/82] wireguard: use DEV_STATS_INC() Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 08/82] octeontx2-pf: Fix memory leak during interface down Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 09/82] ata: pata_isapnp: Add missing error check for devm_ioport_map() Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 10/82] drm/i915: do not clean GT table on error path Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 11/82] drm/rockchip: vop: Fix color for RGB888/BGR888 format on VOP full Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 12/82] HID: fix HID device resource race between HID core and debugging support Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 13/82] ipv4: Correct/silence an endian warning in __ip_do_redirect Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 14/82] net: usb: ax88179_178a: fix failed operations during ax88179_reset Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 15/82] net/smc: avoid data corruption caused by decline Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 16/82] arm/xen: fix xen_vcpu_info allocation alignment Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 17/82] octeontx2-pf: Fix ntuple rule creation to direct packet to VF with higher Rx queue than its PF Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 18/82] amd-xgbe: handle corner-case during sfp hotplug Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 19/82] amd-xgbe: handle the corner-case during tx completion Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 20/82] amd-xgbe: propagate the correct speed and duplex status Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 21/82] net: axienet: Fix check for partial TX checksum Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 22/82] afs: Return ENOENT if no cell DNS record can be found Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 23/82] afs: Fix file locking on R/O volumes to operate in local mode Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 24/82] mm,kfence: decouple kfence from page granularity mapping judgement Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 25/82] arm64: mm: Fix "rodata=on" when CONFIG_RODATA_FULL_DEFAULT_ENABLED=y Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 26/82] i40e: use ERR_PTR error print in i40e messages Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 27/82] i40e: Fix adding unsupported cloud filters Greg Kroah-Hartman
2023-11-30 16:21 ` [PATCH 6.1 28/82] nvmet: nul-terminate the NQNs passed in the connect command Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 29/82] USB: dwc3: qcom: fix resource leaks on probe deferral Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 30/82] USB: dwc3: qcom: fix ACPI platform device leak Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 31/82] lockdep: Fix block chain corruption Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 32/82] cifs: minor cleanup of some headers Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 33/82] smb3: allow dumping session and tcon id to improve stats analysis and debugging Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 34/82] cifs: print last update time for interface list Greg Kroah-Hartman
2023-11-30 16:22 ` Greg Kroah-Hartman [this message]
2023-11-30 16:22 ` [PATCH 6.1 36/82] cifs: account for primary channel in the " Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 37/82] cifs: fix leak of iface for primary channel Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 38/82] MIPS: KVM: Fix a build warning about variable set but not used Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 39/82] media: camss: Split power domain management Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 40/82] media: camss: Convert to platform remove callback returning void Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 41/82] media: qcom: Initialise V4L2 async notifier later Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 42/82] media: qcom: camss: Fix V4L2 async notifier error path Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 43/82] media: qcom: camss: Fix genpd cleanup Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 44/82] ext4: add a new helper to check if es must be kept Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 45/82] ext4: factor out __es_alloc_extent() and __es_free_extent() Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 46/82] ext4: use pre-allocated es in __es_insert_extent() Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 47/82] ext4: use pre-allocated es in __es_remove_extent() Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 48/82] ext4: using nofail preallocation in ext4_es_remove_extent() Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 49/82] ext4: using nofail preallocation in ext4_es_insert_delayed_block() Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 50/82] ext4: using nofail preallocation in ext4_es_insert_extent() Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 51/82] ext4: fix slab-use-after-free " Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 52/82] ext4: make sure allocate pending entry not fail Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 53/82] NFSD: Fix "start of NFS reply" pointer passed to nfsd_cache_update() Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 54/82] NFSD: Fix checksum mismatches in the duplicate reply cache Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 55/82] arm64: dts: imx8mn-var-som: add 20ms delay to ethernet regulator enable Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 56/82] ACPI: resource: Skip IRQ override on ASUS ExpertBook B1402CVA Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 57/82] swiotlb-xen: provide the "max_mapping_size" method Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 58/82] bcache: replace a mistaken IS_ERR() by IS_ERR_OR_NULL() in btree_gc_coalesce() Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 59/82] md: fix bi_status reporting in md_end_clone_io Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 60/82] bcache: fixup multi-threaded bch_sectors_dirty_init() wake-up race Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 61/82] io_uring/fs: consider link->flags when getting path for LINKAT Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 62/82] s390/dasd: protect device queue against concurrent access Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 63/82] USB: serial: option: add Luat Air72*U series products Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 64/82] hv_netvsc: fix race of netvsc and VF register_netdevice Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 65/82] hv_netvsc: Fix race of register_netdevice_notifier and VF register Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 66/82] hv_netvsc: Mark VF as slave before exposing it to user-mode Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 67/82] dm-delay: fix a race between delay_presuspend and delay_bio Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 68/82] bcache: check return value from btree_node_alloc_replacement() Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 69/82] bcache: prevent potential division by zero error Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 70/82] bcache: fixup init dirty data errors Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 71/82] bcache: fixup lock c->root error Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 72/82] usb: cdnsp: Fix deadlock issue during using NCM gadget Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 73/82] USB: serial: option: add Fibocom L7xx modules Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 74/82] USB: serial: option: fix FM101R-GL defines Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 75/82] USB: serial: option: dont claim interface 4 for ZTE MF290 Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 76/82] usb: typec: tcpm: Skip hard reset when in error recovery Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 77/82] USB: dwc2: write HCINT with INTMASK applied Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 78/82] usb: dwc3: Fix default mode initialization Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 79/82] usb: dwc3: set the dma max_seg_size Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 80/82] USB: dwc3: qcom: fix software node leak on probe errors Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 81/82] USB: dwc3: qcom: fix wakeup after probe deferral Greg Kroah-Hartman
2023-11-30 16:22 ` [PATCH 6.1 82/82] io_uring: fix off-by one bvec index Greg Kroah-Hartman
2023-11-30 19:10 ` [PATCH 6.1 00/82] 6.1.65-rc1 review Florian Fainelli
2023-12-01 0:09 ` Shuah Khan
2023-12-01 10:54 ` Jon Hunter
2023-12-01 11:01 ` Conor Dooley
2023-12-01 13:41 ` Naresh Kamboju
2023-12-01 20:30 ` Guenter Roeck
2023-12-02 0:40 ` SeongJae Park
2023-12-02 2:40 ` Ron Economos
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20231130162137.070386121@linuxfoundation.org \
--to=gregkh@linuxfoundation.org \
--cc=patches@lists.linux.dev \
--cc=sashal@kernel.org \
--cc=sprasad@microsoft.com \
--cc=stable@vger.kernel.org \
--cc=stfrench@microsoft.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox