[RFC PATCH for-next] smb: client: parallelize multichannel write issue

All of lore.kernel.org
 help / color / mirror / Atom feed

* [RFC PATCH for-next] smb: client: parallelize multichannel write issue
@ 2026-06-26 16:19 Henrique Carvalho
  2026-06-26 19:54 ` Enzo Matsumiya
  0 siblings, 1 reply; 2+ messages in thread
From: Henrique Carvalho @ 2026-06-26 16:19 UTC (permalink / raw)
  To: sfrench; +Cc: pc, ronniesahlberg, sprasad, tom, bharathsm, ematsumiya,
	linux-cifs

I'm sending this as an RFC PATCH first so the approach and results can
be sanity checked more broadly.

The netfs writeback path issues write subrequests through the filesystem
issue_write() callback. For multichannel, those subrequests may target
different channels, but the issue callback is still entered serially by
the netfs issuing context.

As a result, while one channel is running the write issue path, write
subrequests for other channels may be left waiting to be issued. This
can limit multichannel writeback throughput because the channels are not
kept busy independently.

For multichannel sessions, queue the existing write issue path to a
workqueue. This lets the netfs issuing context return quickly and
continue issuing subsequent write subrequests for other channels.
Single-channel sessions keep the existing synchronous issue path.

Preliminary fio testing showed improvments in throughput by up to 2.5x
in 4MiB writes with larger dirty limits (1g/256m), 1.4x improvement for
1GiB writes with larger dirty limits, and is neutral when dirty limits
keep the pipeline shallow (4m/1m).

Signed-off-by: Henrique Carvalho <henrique.carvalho@suse.com>
---
 fs/smb/client/cifsfs.c    | 14 +++++++++-
 fs/smb/client/cifsglob.h  |  1 +
 fs/smb/client/cifsproto.h |  1 +
 fs/smb/client/file.c      | 57 ++++++++++++++++++++++++++++++++++++++-
 4 files changed, 71 insertions(+), 2 deletions(-)

diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c
index ce23924f01b3..2ade5b47ef12 100644
--- a/fs/smb/client/cifsfs.c
+++ b/fs/smb/client/cifsfs.c
@@ -163,6 +163,7 @@ struct workqueue_struct	*cifsoplockd_wq;
 struct workqueue_struct	*deferredclose_wq;
 struct workqueue_struct	*serverclose_wq;
 struct workqueue_struct	*cfid_put_wq;
+struct workqueue_struct	*cifs_write_issue_wq;
 __u32 cifs_lock_secret;
 
 /*
@@ -2009,9 +2010,17 @@ init_cifs(void)
 		goto out_destroy_serverclose_wq;
 	}
 
+	cifs_write_issue_wq = alloc_workqueue("cifs_write_issue",
+					      WQ_UNBOUND | WQ_MEM_RECLAIM,
+					      0);
+	if (!cifs_write_issue_wq) {
+		rc = -ENOMEM;
+		goto out_destroy_cfid_put_wq;
+	}
+
 	rc = cifs_init_inodecache();
 	if (rc)
-		goto out_destroy_cfid_put_wq;
+		goto out_destroy_write_issue_wq;
 
 	rc = cifs_init_netfs();
 	if (rc)
@@ -2079,6 +2088,8 @@ init_cifs(void)
 	cifs_destroy_netfs();
 out_destroy_inodecache:
 	cifs_destroy_inodecache();
+out_destroy_write_issue_wq:
+	destroy_workqueue(cifs_write_issue_wq);
 out_destroy_cfid_put_wq:
 	destroy_workqueue(cfid_put_wq);
 out_destroy_serverclose_wq:
@@ -2119,6 +2130,7 @@ exit_cifs(void)
 	destroy_mids();
 	cifs_destroy_netfs();
 	cifs_destroy_inodecache();
+	destroy_workqueue(cifs_write_issue_wq);
 	destroy_workqueue(deferredclose_wq);
 	destroy_workqueue(cifsoplockd_wq);
 	destroy_workqueue(decrypt_wq);
diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
index 82e0adc1dabd..591353f261a8 100644
--- a/fs/smb/client/cifsglob.h
+++ b/fs/smb/client/cifsglob.h
@@ -2137,6 +2137,7 @@ extern struct workqueue_struct *cifsoplockd_wq;
 extern struct workqueue_struct *deferredclose_wq;
 extern struct workqueue_struct *serverclose_wq;
 extern struct workqueue_struct *cfid_put_wq;
+extern struct workqueue_struct *cifs_write_issue_wq;
 extern __u32 cifs_lock_secret;
 
 extern mempool_t *cifs_sm_req_poolp;
diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h
index 79d891f7df1a..113ffeec0e2f 100644
--- a/fs/smb/client/cifsproto.h
+++ b/fs/smb/client/cifsproto.h
@@ -59,6 +59,7 @@ int init_cifs_idmap(void);
 void exit_cifs_idmap(void);
 int init_cifs_spnego(void);
 void exit_cifs_spnego(void);
+int cifs_init_write_issue_wq(void);
 const char *build_path_from_dentry(struct dentry *direntry, void *page);
 char *__build_path_from_dentry_optional_prefix(struct dentry *direntry,
 					       void *page, const char *tree,
diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c
index b60344125f27..3a070962a1f5 100644
--- a/fs/smb/client/file.c
+++ b/fs/smb/client/file.c
@@ -38,6 +38,46 @@
 #include <trace/events/netfs.h>
 
 static int cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush);
+static void __cifs_issue_write(struct netfs_io_subrequest *subreq);
+
+struct cifs_issue_write_work {
+	struct work_struct work;
+	struct netfs_io_subrequest *subreq;
+};
+
+static bool cifs_write_is_mchan(struct cifs_ses *ses)
+{
+	bool is_mchan;
+
+	spin_lock(&ses->chan_lock);
+	is_mchan = ses->chan_count > 1;
+	spin_unlock(&ses->chan_lock);
+
+	return is_mchan;
+}
+
+static void cifs_issue_write_work_fn(struct work_struct *work)
+{
+	struct cifs_issue_write_work *w = container_of(work, struct cifs_issue_write_work, work);
+
+	__cifs_issue_write(w->subreq);
+	kfree(w);
+}
+
+static int cifs_issue_parallel_write(struct cifs_ses *ses,
+				      struct netfs_io_subrequest *subreq)
+{
+	struct cifs_issue_write_work *w = kmalloc_obj(*w, GFP_NOFS);
+
+	if (!w)
+		return -ENOMEM;
+
+	w->subreq = subreq;
+	INIT_WORK(&w->work, cifs_issue_write_work_fn);
+	queue_work(cifs_write_issue_wq, &w->work);
+
+	return 0;
+}
 
 /*
  * Prepare a subrequest to upload to the server.  We need to allocate credits
@@ -108,7 +148,7 @@ static void cifs_prepare_write(struct netfs_io_subrequest *subreq)
 /*
  * Issue a subrequest to upload to the server.
  */
-static void cifs_issue_write(struct netfs_io_subrequest *subreq)
+static void __cifs_issue_write(struct netfs_io_subrequest *subreq)
 {
 	struct cifs_io_subrequest *wdata =
 		container_of(subreq, struct cifs_io_subrequest, subreq);
@@ -142,6 +182,21 @@ static void cifs_issue_write(struct netfs_io_subrequest *subreq)
 	goto out;
 }
 
+static void cifs_issue_write(struct netfs_io_subrequest *subreq)
+{
+	struct cifs_io_subrequest *wdata = container_of(subreq, struct cifs_io_subrequest, subreq);
+	struct cifs_ses *ses = tlink_tcon(wdata->req->cfile->tlink)->ses;
+
+	if (cifs_write_is_mchan(ses)) {
+		int err = cifs_issue_parallel_write(ses, subreq);
+
+		if (!err)
+			return;
+	}
+
+	__cifs_issue_write(subreq);
+}
+
 static void cifs_netfs_invalidate_cache(struct netfs_io_request *wreq)
 {
 	cifs_invalidate_cache(wreq->inode, 0);
-- 
2.54.0


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [RFC PATCH for-next] smb: client: parallelize multichannel write issue
  2026-06-26 16:19 [RFC PATCH for-next] smb: client: parallelize multichannel write issue Henrique Carvalho
@ 2026-06-26 19:54 ` Enzo Matsumiya
  0 siblings, 0 replies; 2+ messages in thread
From: Enzo Matsumiya @ 2026-06-26 19:54 UTC (permalink / raw)
  To: Henrique Carvalho
  Cc: sfrench, pc, ronniesahlberg, sprasad, tom, bharathsm, linux-cifs

On 06/26, Henrique Carvalho wrote:
>I'm sending this as an RFC PATCH first so the approach and results can
>be sanity checked more broadly.
>
>The netfs writeback path issues write subrequests through the filesystem
>issue_write() callback. For multichannel, those subrequests may target
>different channels, but the issue callback is still entered serially by
>the netfs issuing context.
>
>As a result, while one channel is running the write issue path, write
>subrequests for other channels may be left waiting to be issued. This
>can limit multichannel writeback throughput because the channels are not
>kept busy independently.
>
>For multichannel sessions, queue the existing write issue path to a
>workqueue. This lets the netfs issuing context return quickly and
>continue issuing subsequent write subrequests for other channels.
>Single-channel sessions keep the existing synchronous issue path.
>
>Preliminary fio testing showed improvments in throughput by up to 2.5x
>in 4MiB writes with larger dirty limits (1g/256m), 1.4x improvement for
>1GiB writes with larger dirty limits, and is neutral when dirty limits
>keep the pipeline shallow (4m/1m).

Works great.

A few concerns for a next version inlined below.

>+	destroy_workqueue(cifs_write_issue_wq);

Make sure to flush the workqueue at the appropriate time for a clean
destroy.

>+static void __cifs_issue_write(struct netfs_io_subrequest *subreq);
>+
>+struct cifs_issue_write_work {
>+	struct work_struct work;
>+	struct netfs_io_subrequest *subreq;
>+};
>+
>+static bool cifs_write_is_mchan(struct cifs_ses *ses)
>+{
>+	bool is_mchan;
>+
>+	spin_lock(&ses->chan_lock);
>+	is_mchan = ses->chan_count > 1;
>+	spin_unlock(&ses->chan_lock);
>+
>+	return is_mchan;
>+}

Not really a concern here, but why limit it to multichannel?
AFAICS single channel mounts would also benefit from this, no?

>+static void cifs_issue_write_work_fn(struct work_struct *work)
>+{
>+	struct cifs_issue_write_work *w = container_of(work, struct cifs_issue_write_work, work);
>+
>+	__cifs_issue_write(w->subreq);
>+	kfree(w);
>+}
>+
>+static int cifs_issue_parallel_write(struct cifs_ses *ses,
>+				      struct netfs_io_subrequest *subreq)
>+{
>+	struct cifs_issue_write_work *w = kmalloc_obj(*w, GFP_NOFS);
>+
>+	if (!w)
>+		return -ENOMEM;
>+
>+	w->subreq = subreq;
>+	INIT_WORK(&w->work, cifs_issue_write_work_fn);
>+	queue_work(cifs_write_issue_wq, &w->work);
>+
>+	return 0;
>+}

I think you need to find a way to track these works somehow, so you can
e.g.:
- capture/propagate -ERESTARTSYS/-EINTR
- (and thus) properly cancel_work() when needed

I tested the patch with 4 channels and thousands of writer processes and
it worked great on a healthy scenario.

Dropping the network mid-operation shows that, after reconnect is
successful, if I kill my writers and try to umount, there are several
cifs_write_issue kworkers hanging (didn't investigate further).

>+static void cifs_issue_write(struct netfs_io_subrequest *subreq)
>+{
>+	struct cifs_io_subrequest *wdata = container_of(subreq, struct cifs_io_subrequest, subreq);
>+	struct cifs_ses *ses = tlink_tcon(wdata->req->cfile->tlink)->ses;
>+
>+	if (cifs_write_is_mchan(ses)) {
>+		int err = cifs_issue_parallel_write(ses, subreq);
>+
>+		if (!err)
>+			return;
>+	}
>+
>+	__cifs_issue_write(subreq);
>+}

cifs_issue_parallel_write() returns -ENOMEM or 0.  If it returns
-ENOMEM you really shouldn't fallback to __cifs_issue_write(), but
rather follow __cifs_issue_write() "fail" case (to make netfs aware
of the error).


Cheers,

Enzo

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2026-06-26 19:54 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-06-26 16:19 [RFC PATCH for-next] smb: client: parallelize multichannel write issue Henrique Carvalho
2026-06-26 19:54 ` Enzo Matsumiya

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.