From: David Howells <dhowells@redhat.com>
To: Viacheslav Dubeyko <slava@dubeyko.com>,
Alex Markuze <amarkuze@redhat.com>
Cc: David Howells <dhowells@redhat.com>,
Ilya Dryomov <idryomov@gmail.com>,
Jeff Layton <jlayton@kernel.org>,
Dongsheng Yang <dongsheng.yang@easystack.cn>,
ceph-devel@vger.kernel.org, linux-fsdevel@vger.kernel.org,
linux-block@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: [RFC PATCH 28/35] netfs: Adjust group handling
Date: Thu, 13 Mar 2025 23:33:20 +0000 [thread overview]
Message-ID: <20250313233341.1675324-29-dhowells@redhat.com> (raw)
In-Reply-To: <20250313233341.1675324-1-dhowells@redhat.com>
Make some adjustments to the handling of netfs groups so that ceph can use
them for snap contexts:
- Move netfs_get_group(), netfs_put_group() and netfs_put_group_many() to
linux/netfs.h so that ceph can build its snap context on netfs groups.
- Move netfs_set_group() and __netfs_set_group() to linux/netfs.h so that
ceph_dirty_folio() can call them from inside of the locked section in
which it finds the snap context to attach.
- Provide a netfs_writepages_group() that takes a group as a parameter and
attaches it to the request and make netfs_free_request() drop the ref on
it. netfs_writepages() then becomes a wrapper that passes in a NULL
group.
- In netfs_perform_write(), only consider a folio to have a conflicting
group if the folio's group pointer isn't NULL and if the folio is dirty.
- In netfs_perform_write(), interject a small 10ms sleep after every 16
attempts to flush a folio within a single call.
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: Viacheslav Dubeyko <slava@dubeyko.com>
cc: Alex Markuze <amarkuze@redhat.com>
cc: Ilya Dryomov <idryomov@gmail.com>
cc: ceph-devel@vger.kernel.org
cc: linux-fsdevel@vger.kernel.org
---
fs/netfs/buffered_write.c | 25 ++++-------------
fs/netfs/internal.h | 32 ---------------------
fs/netfs/objects.c | 1 +
fs/netfs/write_issue.c | 38 +++++++++++++++++++++----
include/linux/netfs.h | 59 +++++++++++++++++++++++++++++++++++++++
5 files changed, 98 insertions(+), 57 deletions(-)
diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c
index 0245449b93e3..12ddbe9bc78b 100644
--- a/fs/netfs/buffered_write.c
+++ b/fs/netfs/buffered_write.c
@@ -11,26 +11,9 @@
#include <linux/pagemap.h>
#include <linux/slab.h>
#include <linux/pagevec.h>
+#include <linux/delay.h>
#include "internal.h"
-static void __netfs_set_group(struct folio *folio, struct netfs_group *netfs_group)
-{
- if (netfs_group)
- folio_attach_private(folio, netfs_get_group(netfs_group));
-}
-
-static void netfs_set_group(struct folio *folio, struct netfs_group *netfs_group)
-{
- void *priv = folio_get_private(folio);
-
- if (unlikely(priv != netfs_group)) {
- if (netfs_group && (!priv || priv == NETFS_FOLIO_COPY_TO_CACHE))
- folio_attach_private(folio, netfs_get_group(netfs_group));
- else if (!netfs_group && priv == NETFS_FOLIO_COPY_TO_CACHE)
- folio_detach_private(folio);
- }
-}
-
/*
* Grab a folio for writing and lock it. Attempt to allocate as large a folio
* as possible to hold as much of the remaining length as possible in one go.
@@ -113,6 +96,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
};
struct netfs_io_request *wreq = NULL;
struct folio *folio = NULL, *writethrough = NULL;
+ unsigned int flush_counter = 0;
unsigned int bdp_flags = (iocb->ki_flags & IOCB_NOWAIT) ? BDP_ASYNC : 0;
ssize_t written = 0, ret, ret2;
loff_t i_size, pos = iocb->ki_pos;
@@ -208,7 +192,8 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
group = netfs_folio_group(folio);
if (unlikely(group != netfs_group) &&
- group != NETFS_FOLIO_COPY_TO_CACHE)
+ group != NETFS_FOLIO_COPY_TO_CACHE &&
+ (group || folio_test_dirty(folio)))
goto flush_content;
if (folio_test_uptodate(folio)) {
@@ -341,6 +326,8 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
trace_netfs_folio(folio, netfs_flush_content);
folio_unlock(folio);
folio_put(folio);
+ if ((++flush_counter & 0xf) == 0xf)
+ msleep(10);
ret = filemap_write_and_wait_range(mapping, fpos, fpos + flen - 1);
if (ret < 0)
goto error_folio_unlock;
diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h
index eebb4f0f660e..2a6123c4da35 100644
--- a/fs/netfs/internal.h
+++ b/fs/netfs/internal.h
@@ -261,38 +261,6 @@ static inline bool netfs_is_cache_enabled(struct netfs_inode *ctx)
#endif
}
-/*
- * Get a ref on a netfs group attached to a dirty page (e.g. a ceph snap).
- */
-static inline struct netfs_group *netfs_get_group(struct netfs_group *netfs_group)
-{
- if (netfs_group && netfs_group != NETFS_FOLIO_COPY_TO_CACHE)
- refcount_inc(&netfs_group->ref);
- return netfs_group;
-}
-
-/*
- * Dispose of a netfs group attached to a dirty page (e.g. a ceph snap).
- */
-static inline void netfs_put_group(struct netfs_group *netfs_group)
-{
- if (netfs_group &&
- netfs_group != NETFS_FOLIO_COPY_TO_CACHE &&
- refcount_dec_and_test(&netfs_group->ref))
- netfs_group->free(netfs_group);
-}
-
-/*
- * Dispose of a netfs group attached to a dirty page (e.g. a ceph snap).
- */
-static inline void netfs_put_group_many(struct netfs_group *netfs_group, int nr)
-{
- if (netfs_group &&
- netfs_group != NETFS_FOLIO_COPY_TO_CACHE &&
- refcount_sub_and_test(nr, &netfs_group->ref))
- netfs_group->free(netfs_group);
-}
-
/*
* Check to see if a buffer aligns with the crypto block size. If it doesn't
* the crypto layer is going to copy all the data - in which case relying on
diff --git a/fs/netfs/objects.c b/fs/netfs/objects.c
index 52d6fce70837..7fdbaa5c5cab 100644
--- a/fs/netfs/objects.c
+++ b/fs/netfs/objects.c
@@ -153,6 +153,7 @@ static void netfs_free_request(struct work_struct *work)
kvfree(rreq->direct_bv);
}
+ netfs_put_group(rreq->group);
rolling_buffer_clear(&rreq->buffer);
rolling_buffer_clear(&rreq->bounce);
if (test_bit(NETFS_RREQ_PUT_RMW_TAIL, &rreq->flags))
diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c
index 93601033ba08..3921fcf4f859 100644
--- a/fs/netfs/write_issue.c
+++ b/fs/netfs/write_issue.c
@@ -418,7 +418,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq,
netfs_issue_write(wreq, upload);
} else if (fgroup != wreq->group) {
/* We can't write this page to the server yet. */
- kdebug("wrong group");
+ kdebug("wrong group %px != %px", fgroup, wreq->group);
folio_redirty_for_writepage(wbc, folio);
folio_unlock(folio);
netfs_issue_write(wreq, upload);
@@ -593,11 +593,19 @@ static void netfs_end_issue_write(struct netfs_io_request *wreq)
netfs_wake_write_collector(wreq, false);
}
-/*
- * Write some of the pending data back to the server
+/**
+ * netfs_writepages_group - Flush data from the pagecache for a file
+ * @mapping: The file to flush from
+ * @wbc: Details of what should be flushed
+ * @group: The write grouping to flush (or NULL)
+ *
+ * Start asynchronous write back operations to flush dirty data belonging to a
+ * particular group in a file's pagecache back to the server and to the local
+ * cache.
*/
-int netfs_writepages(struct address_space *mapping,
- struct writeback_control *wbc)
+int netfs_writepages_group(struct address_space *mapping,
+ struct writeback_control *wbc,
+ struct netfs_group *group)
{
struct netfs_inode *ictx = netfs_inode(mapping->host);
struct netfs_io_request *wreq = NULL;
@@ -618,12 +626,15 @@ int netfs_writepages(struct address_space *mapping,
if (!folio)
goto out;
- wreq = netfs_create_write_req(mapping, NULL, folio_pos(folio), NETFS_WRITEBACK);
+ wreq = netfs_create_write_req(mapping, NULL, folio_pos(folio),
+ NETFS_WRITEBACK);
if (IS_ERR(wreq)) {
error = PTR_ERR(wreq);
goto couldnt_start;
}
+ wreq->group = netfs_get_group(group);
+
trace_netfs_write(wreq, netfs_write_trace_writeback);
netfs_stat(&netfs_n_wh_writepages);
@@ -659,6 +670,21 @@ int netfs_writepages(struct address_space *mapping,
_leave(" = %d", error);
return error;
}
+EXPORT_SYMBOL(netfs_writepages_group);
+
+/**
+ * netfs_writepages - Flush data from the pagecache for a file
+ * @mapping: The file to flush from
+ * @wbc: Details of what should be flushed
+ *
+ * Start asynchronous write back operations to flush dirty data in a file's
+ * pagecache back to the server and to the local cache.
+ */
+int netfs_writepages(struct address_space *mapping,
+ struct writeback_control *wbc)
+{
+ return netfs_writepages_group(mapping, wbc, NULL);
+}
EXPORT_SYMBOL(netfs_writepages);
/*
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index a67297de8a20..69052ac47ab1 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -457,6 +457,9 @@ int netfs_read_folio(struct file *, struct folio *);
int netfs_write_begin(struct netfs_inode *, struct file *,
struct address_space *, loff_t pos, unsigned int len,
struct folio **, void **fsdata);
+int netfs_writepages_group(struct address_space *mapping,
+ struct writeback_control *wbc,
+ struct netfs_group *group);
int netfs_writepages(struct address_space *mapping,
struct writeback_control *wbc);
bool netfs_dirty_folio(struct address_space *mapping, struct folio *folio);
@@ -597,4 +600,60 @@ static inline void netfs_wait_for_outstanding_io(struct inode *inode)
wait_var_event(&ictx->io_count, atomic_read(&ictx->io_count) == 0);
}
+/*
+ * Get a ref on a netfs group attached to a dirty page (e.g. a ceph snap).
+ */
+static inline struct netfs_group *netfs_get_group(struct netfs_group *netfs_group)
+{
+ if (netfs_group && netfs_group != NETFS_FOLIO_COPY_TO_CACHE)
+ refcount_inc(&netfs_group->ref);
+ return netfs_group;
+}
+
+/*
+ * Dispose of a netfs group attached to a dirty page (e.g. a ceph snap).
+ */
+static inline void netfs_put_group(struct netfs_group *netfs_group)
+{
+ if (netfs_group &&
+ netfs_group != NETFS_FOLIO_COPY_TO_CACHE &&
+ refcount_dec_and_test(&netfs_group->ref))
+ netfs_group->free(netfs_group);
+}
+
+/*
+ * Dispose of a netfs group attached to a dirty page (e.g. a ceph snap).
+ */
+static inline void netfs_put_group_many(struct netfs_group *netfs_group, int nr)
+{
+ if (netfs_group &&
+ netfs_group != NETFS_FOLIO_COPY_TO_CACHE &&
+ refcount_sub_and_test(nr, &netfs_group->ref))
+ netfs_group->free(netfs_group);
+}
+
+/*
+ * Set the group pointer directly on a folio.
+ */
+static inline void __netfs_set_group(struct folio *folio, struct netfs_group *netfs_group)
+{
+ if (netfs_group)
+ folio_attach_private(folio, netfs_get_group(netfs_group));
+}
+
+/*
+ * Set the group pointer on a folio or the folio info record.
+ */
+static inline void netfs_set_group(struct folio *folio, struct netfs_group *netfs_group)
+{
+ void *priv = folio_get_private(folio);
+
+ if (unlikely(priv != netfs_group)) {
+ if (netfs_group && (!priv || priv == NETFS_FOLIO_COPY_TO_CACHE))
+ folio_attach_private(folio, netfs_get_group(netfs_group));
+ else if (!netfs_group && priv == NETFS_FOLIO_COPY_TO_CACHE)
+ folio_detach_private(folio);
+ }
+}
+
#endif /* _LINUX_NETFS_H */
next prev parent reply other threads:[~2025-03-13 23:35 UTC|newest]
Thread overview: 72+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-03-13 23:32 [RFC PATCH 00/35] ceph, rbd, netfs: Make ceph fully use netfslib David Howells
2025-03-13 23:32 ` [RFC PATCH 01/35] ceph: Fix incorrect flush end position calculation David Howells
2025-03-13 23:32 ` [RFC PATCH 02/35] libceph: Rename alignment to offset David Howells
2025-03-14 19:04 ` Viacheslav Dubeyko
2025-03-14 20:01 ` David Howells
2025-03-13 23:32 ` [RFC PATCH 03/35] libceph: Add a new data container type, ceph_databuf David Howells
2025-03-14 20:06 ` Viacheslav Dubeyko
2025-03-17 11:27 ` David Howells
2025-03-13 23:32 ` [RFC PATCH 04/35] ceph: Convert ceph_mds_request::r_pagelist to a databuf David Howells
2025-03-14 22:27 ` slava
2025-03-17 11:52 ` David Howells
2025-03-20 20:34 ` Viacheslav Dubeyko
2025-03-20 22:01 ` David Howells
2025-03-13 23:32 ` [RFC PATCH 05/35] libceph: Add functions to add ceph_databufs to requests David Howells
2025-03-13 23:32 ` [RFC PATCH 06/35] rbd: Use ceph_databuf for rbd_obj_read_sync() David Howells
2025-03-17 19:08 ` Viacheslav Dubeyko
2025-04-11 13:48 ` David Howells
2025-03-13 23:32 ` [RFC PATCH 07/35] libceph: Change ceph_osdc_call()'s reply to a ceph_databuf David Howells
2025-03-17 19:41 ` Viacheslav Dubeyko
2025-03-17 22:12 ` David Howells
2025-03-13 23:33 ` [RFC PATCH 08/35] libceph: Unexport osd_req_op_cls_request_data_pages() David Howells
2025-03-13 23:33 ` [RFC PATCH 09/35] libceph: Remove osd_req_op_cls_response_data_pages() David Howells
2025-03-13 23:33 ` [RFC PATCH 10/35] libceph: Convert notify_id_pages to a ceph_databuf David Howells
2025-03-13 23:33 ` [RFC PATCH 11/35] ceph: Use ceph_databuf in DIO David Howells
2025-03-17 20:03 ` Viacheslav Dubeyko
2025-03-17 22:26 ` David Howells
2025-03-13 23:33 ` [RFC PATCH 12/35] libceph: Bypass the messenger-v1 Tx loop for databuf/iter data blobs David Howells
2025-03-13 23:33 ` [RFC PATCH 13/35] rbd: Switch from using bvec_iter to iov_iter David Howells
2025-03-18 19:38 ` Viacheslav Dubeyko
2025-03-18 22:13 ` David Howells
2025-03-13 23:33 ` [RFC PATCH 14/35] libceph: Remove bvec and bio data container types David Howells
2025-03-13 23:33 ` [RFC PATCH 15/35] libceph: Make osd_req_op_cls_init() use a ceph_databuf and map it David Howells
2025-03-13 23:33 ` [RFC PATCH 16/35] libceph: Convert req_page of ceph_osdc_call() to ceph_databuf David Howells
2025-03-13 23:33 ` [RFC PATCH 17/35] libceph, rbd: Use ceph_databuf encoding start/stop David Howells
2025-03-18 19:59 ` Viacheslav Dubeyko
2025-03-18 22:19 ` David Howells
2025-03-20 21:45 ` Viacheslav Dubeyko
2025-03-13 23:33 ` [RFC PATCH 18/35] libceph, rbd: Convert some page arrays to ceph_databuf David Howells
2025-03-18 20:02 ` Viacheslav Dubeyko
2025-03-18 22:25 ` David Howells
2025-03-13 23:33 ` [RFC PATCH 19/35] libceph, ceph: Convert users of ceph_pagelist " David Howells
2025-03-18 20:09 ` Viacheslav Dubeyko
2025-03-18 22:27 ` David Howells
2025-03-13 23:33 ` [RFC PATCH 20/35] libceph: Remove ceph_pagelist David Howells
2025-03-13 23:33 ` [RFC PATCH 21/35] libceph: Make notify code use ceph_databuf_enc_start/stop David Howells
2025-03-18 20:12 ` Viacheslav Dubeyko
2025-03-18 22:36 ` David Howells
2025-03-13 23:33 ` [RFC PATCH 22/35] libceph, rbd: Convert ceph_osdc_notify() reply to ceph_databuf David Howells
2025-03-19 0:08 ` Viacheslav Dubeyko
2025-03-20 14:44 ` David Howells
2025-03-13 23:33 ` [RFC PATCH 23/35] rbd: Use ceph_databuf_enc_start/stop() David Howells
2025-03-19 0:32 ` Viacheslav Dubeyko
2025-03-20 14:59 ` Why use plain numbers and totals rather than predef'd constants for RPC sizes? David Howells
2025-03-20 21:48 ` Viacheslav Dubeyko
2025-03-13 23:33 ` [RFC PATCH 24/35] ceph: Make ceph_calc_file_object_mapping() return size as size_t David Howells
2025-03-13 23:33 ` [RFC PATCH 25/35] ceph: Wrap POSIX_FADV_WILLNEED to get caps David Howells
2025-03-13 23:33 ` [RFC PATCH 26/35] ceph: Kill ceph_rw_context David Howells
2025-03-13 23:33 ` [RFC PATCH 27/35] netfs: Pass extra write context to write functions David Howells
2025-03-13 23:33 ` David Howells [this message]
2025-03-19 18:57 ` [RFC PATCH 28/35] netfs: Adjust group handling Viacheslav Dubeyko
2025-03-20 15:22 ` David Howells
2025-03-13 23:33 ` [RFC PATCH 29/35] netfs: Allow fs-private data to be handed through to request alloc David Howells
2025-03-13 23:33 ` [RFC PATCH 30/35] netfs: Make netfs_page_mkwrite() use folio_mkwrite_check_truncate() David Howells
2025-03-13 23:33 ` [RFC PATCH 31/35] netfs: Fix netfs_unbuffered_read() to return ssize_t rather than int David Howells
2025-03-13 23:33 ` [RFC PATCH 32/35] netfs: Add some more RMW support for ceph David Howells
2025-03-19 19:14 ` Viacheslav Dubeyko
2025-03-20 15:25 ` David Howells
2025-03-13 23:33 ` [RFC PATCH 33/35] ceph: Use netfslib [INCOMPLETE] David Howells
2025-03-19 19:54 ` Viacheslav Dubeyko
2025-03-20 15:38 ` David Howells
2025-03-13 23:33 ` [RFC PATCH 34/35] ceph: Enable multipage folios for ceph files David Howells
2025-03-13 23:33 ` [RFC PATCH 35/35] ceph: Remove old I/O API bits David Howells
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250313233341.1675324-29-dhowells@redhat.com \
--to=dhowells@redhat.com \
--cc=amarkuze@redhat.com \
--cc=ceph-devel@vger.kernel.org \
--cc=dongsheng.yang@easystack.cn \
--cc=idryomov@gmail.com \
--cc=jlayton@kernel.org \
--cc=linux-block@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=slava@dubeyko.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox