public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: David Howells <dhowells@redhat.com>
To: Viacheslav Dubeyko <slava@dubeyko.com>,
	Alex Markuze <amarkuze@redhat.com>
Cc: David Howells <dhowells@redhat.com>,
	Ilya Dryomov <idryomov@gmail.com>,
	Jeff Layton <jlayton@kernel.org>,
	Dongsheng Yang <dongsheng.yang@easystack.cn>,
	ceph-devel@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	linux-block@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: [RFC PATCH 28/35] netfs: Adjust group handling
Date: Thu, 13 Mar 2025 23:33:20 +0000	[thread overview]
Message-ID: <20250313233341.1675324-29-dhowells@redhat.com> (raw)
In-Reply-To: <20250313233341.1675324-1-dhowells@redhat.com>

Make some adjustments to the handling of netfs groups so that ceph can use
them for snap contexts:

 - Move netfs_get_group(), netfs_put_group() and netfs_put_group_many() to
   linux/netfs.h so that ceph can build its snap context on netfs groups.

 - Move netfs_set_group() and __netfs_set_group() to linux/netfs.h so that
   ceph_dirty_folio() can call them from inside of the locked section in
   which it finds the snap context to attach.

 - Provide a netfs_writepages_group() that takes a group as a parameter and
   attaches it to the request and make netfs_free_request() drop the ref on
   it.  netfs_writepages() then becomes a wrapper that passes in a NULL
   group.

 - In netfs_perform_write(), only consider a folio to have a conflicting
   group if the folio's group pointer isn't NULL and if the folio is dirty.

 - In netfs_perform_write(), interject a small 10ms sleep after every 16
   attempts to flush a folio within a single call.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: Viacheslav Dubeyko <slava@dubeyko.com>
cc: Alex Markuze <amarkuze@redhat.com>
cc: Ilya Dryomov <idryomov@gmail.com>
cc: ceph-devel@vger.kernel.org
cc: linux-fsdevel@vger.kernel.org
---
 fs/netfs/buffered_write.c | 25 ++++-------------
 fs/netfs/internal.h       | 32 ---------------------
 fs/netfs/objects.c        |  1 +
 fs/netfs/write_issue.c    | 38 +++++++++++++++++++++----
 include/linux/netfs.h     | 59 +++++++++++++++++++++++++++++++++++++++
 5 files changed, 98 insertions(+), 57 deletions(-)

diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c
index 0245449b93e3..12ddbe9bc78b 100644
--- a/fs/netfs/buffered_write.c
+++ b/fs/netfs/buffered_write.c
@@ -11,26 +11,9 @@
 #include <linux/pagemap.h>
 #include <linux/slab.h>
 #include <linux/pagevec.h>
+#include <linux/delay.h>
 #include "internal.h"
 
-static void __netfs_set_group(struct folio *folio, struct netfs_group *netfs_group)
-{
-	if (netfs_group)
-		folio_attach_private(folio, netfs_get_group(netfs_group));
-}
-
-static void netfs_set_group(struct folio *folio, struct netfs_group *netfs_group)
-{
-	void *priv = folio_get_private(folio);
-
-	if (unlikely(priv != netfs_group)) {
-		if (netfs_group && (!priv || priv == NETFS_FOLIO_COPY_TO_CACHE))
-			folio_attach_private(folio, netfs_get_group(netfs_group));
-		else if (!netfs_group && priv == NETFS_FOLIO_COPY_TO_CACHE)
-			folio_detach_private(folio);
-	}
-}
-
 /*
  * Grab a folio for writing and lock it.  Attempt to allocate as large a folio
  * as possible to hold as much of the remaining length as possible in one go.
@@ -113,6 +96,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
 	};
 	struct netfs_io_request *wreq = NULL;
 	struct folio *folio = NULL, *writethrough = NULL;
+	unsigned int flush_counter = 0;
 	unsigned int bdp_flags = (iocb->ki_flags & IOCB_NOWAIT) ? BDP_ASYNC : 0;
 	ssize_t written = 0, ret, ret2;
 	loff_t i_size, pos = iocb->ki_pos;
@@ -208,7 +192,8 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
 		group = netfs_folio_group(folio);
 
 		if (unlikely(group != netfs_group) &&
-		    group != NETFS_FOLIO_COPY_TO_CACHE)
+		    group != NETFS_FOLIO_COPY_TO_CACHE &&
+		    (group || folio_test_dirty(folio)))
 			goto flush_content;
 
 		if (folio_test_uptodate(folio)) {
@@ -341,6 +326,8 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
 		trace_netfs_folio(folio, netfs_flush_content);
 		folio_unlock(folio);
 		folio_put(folio);
+		if ((++flush_counter & 0xf) == 0xf)
+			msleep(10);
 		ret = filemap_write_and_wait_range(mapping, fpos, fpos + flen - 1);
 		if (ret < 0)
 			goto error_folio_unlock;
diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h
index eebb4f0f660e..2a6123c4da35 100644
--- a/fs/netfs/internal.h
+++ b/fs/netfs/internal.h
@@ -261,38 +261,6 @@ static inline bool netfs_is_cache_enabled(struct netfs_inode *ctx)
 #endif
 }
 
-/*
- * Get a ref on a netfs group attached to a dirty page (e.g. a ceph snap).
- */
-static inline struct netfs_group *netfs_get_group(struct netfs_group *netfs_group)
-{
-	if (netfs_group && netfs_group != NETFS_FOLIO_COPY_TO_CACHE)
-		refcount_inc(&netfs_group->ref);
-	return netfs_group;
-}
-
-/*
- * Dispose of a netfs group attached to a dirty page (e.g. a ceph snap).
- */
-static inline void netfs_put_group(struct netfs_group *netfs_group)
-{
-	if (netfs_group &&
-	    netfs_group != NETFS_FOLIO_COPY_TO_CACHE &&
-	    refcount_dec_and_test(&netfs_group->ref))
-		netfs_group->free(netfs_group);
-}
-
-/*
- * Dispose of a netfs group attached to a dirty page (e.g. a ceph snap).
- */
-static inline void netfs_put_group_many(struct netfs_group *netfs_group, int nr)
-{
-	if (netfs_group &&
-	    netfs_group != NETFS_FOLIO_COPY_TO_CACHE &&
-	    refcount_sub_and_test(nr, &netfs_group->ref))
-		netfs_group->free(netfs_group);
-}
-
 /*
  * Check to see if a buffer aligns with the crypto block size.  If it doesn't
  * the crypto layer is going to copy all the data - in which case relying on
diff --git a/fs/netfs/objects.c b/fs/netfs/objects.c
index 52d6fce70837..7fdbaa5c5cab 100644
--- a/fs/netfs/objects.c
+++ b/fs/netfs/objects.c
@@ -153,6 +153,7 @@ static void netfs_free_request(struct work_struct *work)
 		kvfree(rreq->direct_bv);
 	}
 
+	netfs_put_group(rreq->group);
 	rolling_buffer_clear(&rreq->buffer);
 	rolling_buffer_clear(&rreq->bounce);
 	if (test_bit(NETFS_RREQ_PUT_RMW_TAIL, &rreq->flags))
diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c
index 93601033ba08..3921fcf4f859 100644
--- a/fs/netfs/write_issue.c
+++ b/fs/netfs/write_issue.c
@@ -418,7 +418,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq,
 		netfs_issue_write(wreq, upload);
 	} else if (fgroup != wreq->group) {
 		/* We can't write this page to the server yet. */
-		kdebug("wrong group");
+		kdebug("wrong group %px != %px", fgroup, wreq->group);
 		folio_redirty_for_writepage(wbc, folio);
 		folio_unlock(folio);
 		netfs_issue_write(wreq, upload);
@@ -593,11 +593,19 @@ static void netfs_end_issue_write(struct netfs_io_request *wreq)
 		netfs_wake_write_collector(wreq, false);
 }
 
-/*
- * Write some of the pending data back to the server
+/**
+ * netfs_writepages_group - Flush data from the pagecache for a file
+ * @mapping: The file to flush from
+ * @wbc: Details of what should be flushed
+ * @group: The write grouping to flush (or NULL)
+ *
+ * Start asynchronous write back operations to flush dirty data belonging to a
+ * particular group in a file's pagecache back to the server and to the local
+ * cache.
  */
-int netfs_writepages(struct address_space *mapping,
-		     struct writeback_control *wbc)
+int netfs_writepages_group(struct address_space *mapping,
+			   struct writeback_control *wbc,
+			   struct netfs_group *group)
 {
 	struct netfs_inode *ictx = netfs_inode(mapping->host);
 	struct netfs_io_request *wreq = NULL;
@@ -618,12 +626,15 @@ int netfs_writepages(struct address_space *mapping,
 	if (!folio)
 		goto out;
 
-	wreq = netfs_create_write_req(mapping, NULL, folio_pos(folio), NETFS_WRITEBACK);
+	wreq = netfs_create_write_req(mapping, NULL, folio_pos(folio),
+				      NETFS_WRITEBACK);
 	if (IS_ERR(wreq)) {
 		error = PTR_ERR(wreq);
 		goto couldnt_start;
 	}
 
+	wreq->group = netfs_get_group(group);
+
 	trace_netfs_write(wreq, netfs_write_trace_writeback);
 	netfs_stat(&netfs_n_wh_writepages);
 
@@ -659,6 +670,21 @@ int netfs_writepages(struct address_space *mapping,
 	_leave(" = %d", error);
 	return error;
 }
+EXPORT_SYMBOL(netfs_writepages_group);
+
+/**
+ * netfs_writepages - Flush data from the pagecache for a file
+ * @mapping: The file to flush from
+ * @wbc: Details of what should be flushed
+ *
+ * Start asynchronous write back operations to flush dirty data in a file's
+ * pagecache back to the server and to the local cache.
+ */
+int netfs_writepages(struct address_space *mapping,
+		     struct writeback_control *wbc)
+{
+	return netfs_writepages_group(mapping, wbc, NULL);
+}
 EXPORT_SYMBOL(netfs_writepages);
 
 /*
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index a67297de8a20..69052ac47ab1 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -457,6 +457,9 @@ int netfs_read_folio(struct file *, struct folio *);
 int netfs_write_begin(struct netfs_inode *, struct file *,
 		      struct address_space *, loff_t pos, unsigned int len,
 		      struct folio **, void **fsdata);
+int netfs_writepages_group(struct address_space *mapping,
+			   struct writeback_control *wbc,
+			   struct netfs_group *group);
 int netfs_writepages(struct address_space *mapping,
 		     struct writeback_control *wbc);
 bool netfs_dirty_folio(struct address_space *mapping, struct folio *folio);
@@ -597,4 +600,60 @@ static inline void netfs_wait_for_outstanding_io(struct inode *inode)
 	wait_var_event(&ictx->io_count, atomic_read(&ictx->io_count) == 0);
 }
 
+/*
+ * Get a ref on a netfs group attached to a dirty page (e.g. a ceph snap).
+ */
+static inline struct netfs_group *netfs_get_group(struct netfs_group *netfs_group)
+{
+	if (netfs_group && netfs_group != NETFS_FOLIO_COPY_TO_CACHE)
+		refcount_inc(&netfs_group->ref);
+	return netfs_group;
+}
+
+/*
+ * Dispose of a netfs group attached to a dirty page (e.g. a ceph snap).
+ */
+static inline void netfs_put_group(struct netfs_group *netfs_group)
+{
+	if (netfs_group &&
+	    netfs_group != NETFS_FOLIO_COPY_TO_CACHE &&
+	    refcount_dec_and_test(&netfs_group->ref))
+		netfs_group->free(netfs_group);
+}
+
+/*
+ * Dispose of a netfs group attached to a dirty page (e.g. a ceph snap).
+ */
+static inline void netfs_put_group_many(struct netfs_group *netfs_group, int nr)
+{
+	if (netfs_group &&
+	    netfs_group != NETFS_FOLIO_COPY_TO_CACHE &&
+	    refcount_sub_and_test(nr, &netfs_group->ref))
+		netfs_group->free(netfs_group);
+}
+
+/*
+ * Set the group pointer directly on a folio.
+ */
+static inline void __netfs_set_group(struct folio *folio, struct netfs_group *netfs_group)
+{
+	if (netfs_group)
+		folio_attach_private(folio, netfs_get_group(netfs_group));
+}
+
+/*
+ * Set the group pointer on a folio or the folio info record.
+ */
+static inline void netfs_set_group(struct folio *folio, struct netfs_group *netfs_group)
+{
+	void *priv = folio_get_private(folio);
+
+	if (unlikely(priv != netfs_group)) {
+		if (netfs_group && (!priv || priv == NETFS_FOLIO_COPY_TO_CACHE))
+			folio_attach_private(folio, netfs_get_group(netfs_group));
+		else if (!netfs_group && priv == NETFS_FOLIO_COPY_TO_CACHE)
+			folio_detach_private(folio);
+	}
+}
+
 #endif /* _LINUX_NETFS_H */


  parent reply	other threads:[~2025-03-13 23:35 UTC|newest]

Thread overview: 72+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-03-13 23:32 [RFC PATCH 00/35] ceph, rbd, netfs: Make ceph fully use netfslib David Howells
2025-03-13 23:32 ` [RFC PATCH 01/35] ceph: Fix incorrect flush end position calculation David Howells
2025-03-13 23:32 ` [RFC PATCH 02/35] libceph: Rename alignment to offset David Howells
2025-03-14 19:04   ` Viacheslav Dubeyko
2025-03-14 20:01     ` David Howells
2025-03-13 23:32 ` [RFC PATCH 03/35] libceph: Add a new data container type, ceph_databuf David Howells
2025-03-14 20:06   ` Viacheslav Dubeyko
2025-03-17 11:27     ` David Howells
2025-03-13 23:32 ` [RFC PATCH 04/35] ceph: Convert ceph_mds_request::r_pagelist to a databuf David Howells
2025-03-14 22:27   ` slava
2025-03-17 11:52     ` David Howells
2025-03-20 20:34       ` Viacheslav Dubeyko
2025-03-20 22:01         ` David Howells
2025-03-13 23:32 ` [RFC PATCH 05/35] libceph: Add functions to add ceph_databufs to requests David Howells
2025-03-13 23:32 ` [RFC PATCH 06/35] rbd: Use ceph_databuf for rbd_obj_read_sync() David Howells
2025-03-17 19:08   ` Viacheslav Dubeyko
2025-04-11 13:48     ` David Howells
2025-03-13 23:32 ` [RFC PATCH 07/35] libceph: Change ceph_osdc_call()'s reply to a ceph_databuf David Howells
2025-03-17 19:41   ` Viacheslav Dubeyko
2025-03-17 22:12     ` David Howells
2025-03-13 23:33 ` [RFC PATCH 08/35] libceph: Unexport osd_req_op_cls_request_data_pages() David Howells
2025-03-13 23:33 ` [RFC PATCH 09/35] libceph: Remove osd_req_op_cls_response_data_pages() David Howells
2025-03-13 23:33 ` [RFC PATCH 10/35] libceph: Convert notify_id_pages to a ceph_databuf David Howells
2025-03-13 23:33 ` [RFC PATCH 11/35] ceph: Use ceph_databuf in DIO David Howells
2025-03-17 20:03   ` Viacheslav Dubeyko
2025-03-17 22:26     ` David Howells
2025-03-13 23:33 ` [RFC PATCH 12/35] libceph: Bypass the messenger-v1 Tx loop for databuf/iter data blobs David Howells
2025-03-13 23:33 ` [RFC PATCH 13/35] rbd: Switch from using bvec_iter to iov_iter David Howells
2025-03-18 19:38   ` Viacheslav Dubeyko
2025-03-18 22:13     ` David Howells
2025-03-13 23:33 ` [RFC PATCH 14/35] libceph: Remove bvec and bio data container types David Howells
2025-03-13 23:33 ` [RFC PATCH 15/35] libceph: Make osd_req_op_cls_init() use a ceph_databuf and map it David Howells
2025-03-13 23:33 ` [RFC PATCH 16/35] libceph: Convert req_page of ceph_osdc_call() to ceph_databuf David Howells
2025-03-13 23:33 ` [RFC PATCH 17/35] libceph, rbd: Use ceph_databuf encoding start/stop David Howells
2025-03-18 19:59   ` Viacheslav Dubeyko
2025-03-18 22:19     ` David Howells
2025-03-20 21:45       ` Viacheslav Dubeyko
2025-03-13 23:33 ` [RFC PATCH 18/35] libceph, rbd: Convert some page arrays to ceph_databuf David Howells
2025-03-18 20:02   ` Viacheslav Dubeyko
2025-03-18 22:25     ` David Howells
2025-03-13 23:33 ` [RFC PATCH 19/35] libceph, ceph: Convert users of ceph_pagelist " David Howells
2025-03-18 20:09   ` Viacheslav Dubeyko
2025-03-18 22:27     ` David Howells
2025-03-13 23:33 ` [RFC PATCH 20/35] libceph: Remove ceph_pagelist David Howells
2025-03-13 23:33 ` [RFC PATCH 21/35] libceph: Make notify code use ceph_databuf_enc_start/stop David Howells
2025-03-18 20:12   ` Viacheslav Dubeyko
2025-03-18 22:36     ` David Howells
2025-03-13 23:33 ` [RFC PATCH 22/35] libceph, rbd: Convert ceph_osdc_notify() reply to ceph_databuf David Howells
2025-03-19  0:08   ` Viacheslav Dubeyko
2025-03-20 14:44     ` David Howells
2025-03-13 23:33 ` [RFC PATCH 23/35] rbd: Use ceph_databuf_enc_start/stop() David Howells
2025-03-19  0:32   ` Viacheslav Dubeyko
2025-03-20 14:59     ` Why use plain numbers and totals rather than predef'd constants for RPC sizes? David Howells
2025-03-20 21:48       ` Viacheslav Dubeyko
2025-03-13 23:33 ` [RFC PATCH 24/35] ceph: Make ceph_calc_file_object_mapping() return size as size_t David Howells
2025-03-13 23:33 ` [RFC PATCH 25/35] ceph: Wrap POSIX_FADV_WILLNEED to get caps David Howells
2025-03-13 23:33 ` [RFC PATCH 26/35] ceph: Kill ceph_rw_context David Howells
2025-03-13 23:33 ` [RFC PATCH 27/35] netfs: Pass extra write context to write functions David Howells
2025-03-13 23:33 ` David Howells [this message]
2025-03-19 18:57   ` [RFC PATCH 28/35] netfs: Adjust group handling Viacheslav Dubeyko
2025-03-20 15:22     ` David Howells
2025-03-13 23:33 ` [RFC PATCH 29/35] netfs: Allow fs-private data to be handed through to request alloc David Howells
2025-03-13 23:33 ` [RFC PATCH 30/35] netfs: Make netfs_page_mkwrite() use folio_mkwrite_check_truncate() David Howells
2025-03-13 23:33 ` [RFC PATCH 31/35] netfs: Fix netfs_unbuffered_read() to return ssize_t rather than int David Howells
2025-03-13 23:33 ` [RFC PATCH 32/35] netfs: Add some more RMW support for ceph David Howells
2025-03-19 19:14   ` Viacheslav Dubeyko
2025-03-20 15:25     ` David Howells
2025-03-13 23:33 ` [RFC PATCH 33/35] ceph: Use netfslib [INCOMPLETE] David Howells
2025-03-19 19:54   ` Viacheslav Dubeyko
2025-03-20 15:38     ` David Howells
2025-03-13 23:33 ` [RFC PATCH 34/35] ceph: Enable multipage folios for ceph files David Howells
2025-03-13 23:33 ` [RFC PATCH 35/35] ceph: Remove old I/O API bits David Howells

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250313233341.1675324-29-dhowells@redhat.com \
    --to=dhowells@redhat.com \
    --cc=amarkuze@redhat.com \
    --cc=ceph-devel@vger.kernel.org \
    --cc=dongsheng.yang@easystack.cn \
    --cc=idryomov@gmail.com \
    --cc=jlayton@kernel.org \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=slava@dubeyko.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox