[PATCH 6/6] rpdfs: add read_folio, dirty_folio, write_begin, write_end

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Valerie Aurora <val@versity.com>
To: rpdfs-devel@lists.linux.dev
Subject: [PATCH 6/6] rpdfs: add read_folio, dirty_folio, write_begin, write_end
Date: Thu,  7 May 2026 15:21:53 +0200	[thread overview]
Message-ID: <20260507132153.1161324-7-val@versity.com> (raw)
In-Reply-To: <20260507132153.1161324-1-val@versity.com>

This commit completes basic file data read/write support, including
avoidance of deadlock in read_folio. It does not include
->writepages() or readahead().

Signed-off-by: Valerie Aurora <val@versity.com>
---
 fs/rpdfs/data.c  | 245 +++++++++++++++++++++++++++++++++++++++++++++++
 fs/rpdfs/inode.c |   1 +
 2 files changed, 246 insertions(+)

diff --git a/fs/rpdfs/data.c b/fs/rpdfs/data.c
index b1ccf77ced7a..b3647e1ff42f 100644
--- a/fs/rpdfs/data.c
+++ b/fs/rpdfs/data.c
@@ -351,3 +351,248 @@ static int get_or_alloc_data_block(struct rpdfs_fs_info *rfi, struct rpdfs_trans
 		  ret, rpdfs_inode_ino(inode), lblk, refs ? le64_to_cpu(refs[0].bnr) : 0, *hnd_ret);
 	return ret;
 }
+
+/*
+ * We have to avoid a potential deadlock between the kernel's lock on
+ * each folio and our cache consistency algorithm. The order of
+ * acquisition on a read or write or similar operation is:
+ *
+ * 1. kernel grabs folio lock, calls file system routine
+ * 2. local node attempts to get access to a block
+ *
+ * But invalidation of a page has this order:
+ *
+ * 1. remote node requests exclusive access to a block
+ * 2. local node receives cache invalidate message and tries to get folio lock
+ *
+ * If the local node is trying to get access to a block while another
+ * node tries to get exclusive access, we could end up with:
+ *
+ * 1. local node holds folio lock, can't get access to block
+ * 2. local node attempts to service invalidate request but can't get folio lock
+ *
+ * The solution is to acquire all blocks in non-blocking mode. If that
+ * fails, drop the folio lock and acquire the block in blocking mode,
+ * then release it and return AOP_TRUNCATED_PAGE. This return code means
+ * "the page was truncated away beneath me, please retry." The page
+ * cache will restart the read_folio operation, which will likely
+ * succeed (as long as no other node has requested the block since then).
+ *
+ * Note that readahead() should satisfy most read requests
+ * asynchronously, leaving us to do any remaining synchronous requests
+ * in read_folio().
+ */
+static int rpdfs_read_folio(struct file *file, struct folio *folio)
+{
+	struct inode *inode = folio->mapping->host;
+	struct rpdfs_fs_info *rfi = RPDFS_INODE_FS(inode);
+	loff_t pos = folio_pos(folio);
+	size_t len = folio_size(folio);
+	struct rpdfs_block_handle *inode_hnd = NULL;
+	struct rpdfs_block_handle *blk_hnd = NULL;
+	rbaf_t rbaf = RBAF_NONBLOCK_MODE;
+	u64 lblk;
+	int ret;
+
+	lblk = lblk_from_offset(pos);
+
+	rpdfs_prd("ino %llu lblk %llu pos %lld len %lu",
+		  rpdfs_inode_ino(inode), lblk, pos, len);
+
+	/* we turn off atime always, inode will not be written */
+	ret = rpdfs_inode_acquire(rfi, NULL, inode, &inode_hnd, RBAF_NONBLOCK_MODE);
+	if (ret == -EAGAIN) {
+		folio_unlock(folio);
+
+		rpdfs_prd("could not acquire ino %llu non-blocking, ret %d, retrying",
+			  rpdfs_inode_ino(inode), ret);
+
+		ret = rpdfs_inode_acquire(rfi, NULL, inode, &inode_hnd, 0);
+		if (ret == 0) {
+			rpdfs_block_release(rfi, &inode_hnd);
+			ret = AOP_TRUNCATED_PAGE;
+		}
+		goto out;
+	}
+	if (ret < 0)
+		goto out_unlock;
+
+	ret = get_or_alloc_data_block(rfi, NULL, inode, inode_hnd, lblk, rbaf, &blk_hnd);
+	if (ret == -EAGAIN) {
+		folio_unlock(folio);
+
+		rpdfs_prd("could not acquire ino %llu lblk %llu non-blocking, ret %d, retrying",
+			  rpdfs_inode_ino(inode), lblk, ret);
+
+		rbaf &= ~RBAF_NONBLOCK_MODE;
+		ret = get_or_alloc_data_block(rfi, NULL, inode, inode_hnd, lblk, rbaf, &blk_hnd);
+		if (ret == 0) {
+			rpdfs_block_release(rfi, &blk_hnd);
+			ret = AOP_TRUNCATED_PAGE;
+		}
+		goto out;
+	}
+	if (ret < 0)
+		goto out_unlock;
+
+	/* copy the data into the actual block */
+	if (blk_hnd)
+		memcpy(folio_address(folio), blk_hnd->data, len);
+	else
+		memset(folio_address(folio), 0, len);
+
+	rpdfs_prd("copied %4s len %lu to %4s", (char *) blk_hnd->data, len,
+		  (char *) folio_address(folio));
+
+	folio_mark_uptodate(folio);
+out_unlock:
+	folio_unlock(folio);
+out:
+	rpdfs_block_release(rfi, &blk_hnd);
+	rpdfs_block_release(rfi, &inode_hnd);
+
+	rpdfs_prd("ret %d", ret);
+
+	return ret;
+}
+
+static bool rpdfs_dirty_folio(struct address_space *mapping, struct folio *folio)
+{
+	rpdfs_prd("ino %lu index %lu", mapping->host->i_ino, folio->index);
+
+	return filemap_dirty_folio(folio_mapping(folio), folio);
+}
+
+
+/*
+ * Info to be passed from write_begin to write_end to complete the write
+ * within a transaction.
+ */
+struct rpdfs_write_cb {
+	struct rpdfs_block_handle *inode_hnd;
+	struct rpdfs_block_handle *blk_hnd;
+	struct rpdfs_transaction txn;
+};
+
+/*
+ * Do whatever preparation is necessary to allocate space for a
+ * write. In the future it might check quotas, file system error state,
+ * etc.
+ *
+ * Called with the inode block already acquired read/write. Returns a
+ * locked folio on success.
+ */
+static int rpdfs_write_begin(struct file *file, struct address_space *mapping,
+			     loff_t pos, unsigned len,
+			     struct folio **foliop, void **fsdata)
+{
+	struct inode *inode = mapping->host;
+	struct rpdfs_fs_info *rfi = RPDFS_INODE_FS(inode);
+	struct folio *folio = NULL;
+	struct rpdfs_write_cb *cb;
+	rbaf_t rbaf;
+	u64 lblk;
+	int ret;
+	unsigned offset;
+
+	lblk = lblk_from_offset(pos);
+
+	rpdfs_prd("ino %llu lblk %llu pos %lld len %u", rpdfs_inode_ino(inode), lblk, pos, len);
+
+	/* allocate txn and pass to write_end for updating inode i_size/times */
+	cb = kzalloc(sizeof(struct rpdfs_write_cb), GFP_NOFS);
+	if (!cb) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	folio = __filemap_get_folio(mapping, pos >> PAGE_SHIFT, FGP_WRITEBEGIN,
+				    mapping_gfp_mask(mapping));
+	if (IS_ERR(folio)) {
+		ret = PTR_ERR(folio);
+		goto out;
+	}
+
+	offset = offset_in_folio(folio, pos);
+	/* XXX use pos/len/offset to figure out when it is an overwrite */
+	rbaf = RBAF_WRITE;
+
+	ret = get_or_alloc_data_block(rfi, &cb->txn, inode, cb->inode_hnd, lblk, rbaf, &cb->blk_hnd);
+	if (ret < 0)
+		goto out;
+
+	*foliop = folio;
+	*fsdata = cb;
+	ret = len;
+out:
+	if (ret < 0) {
+		if (cb) {
+			rpdfs_txn_finish(rfi, &cb->txn);
+			kfree(cb);
+		}
+		if (!IS_ERR_OR_NULL(folio)) {
+			folio_unlock(folio);
+			folio_put(folio);
+		}
+		*foliop = NULL;
+	}
+	return ret;
+}
+
+static int rpdfs_write_end(struct file *file, struct address_space *mapping,
+			   loff_t pos, unsigned len, unsigned copied,
+			   struct folio *folio, void *fsdata)
+{
+	struct inode *inode = mapping->host;
+	struct rpdfs_fs_info *rfi = RPDFS_INODE_FS(inode);
+	struct rpdfs_write_cb *cb = fsdata;
+	loff_t old_size = inode->i_size;
+	bool i_size_changed = false;
+	unsigned offset;
+
+	offset = offset_in_folio(folio, pos);
+	/*
+	 * Copy the data from the folio into the blcok.
+	 *
+	 * TODO: replace with writepages.
+	 */
+	memcpy(cb->blk_hnd->data + offset, folio_address(folio) + offset, len);
+
+	rpdfs_prd("copied %4s len %d to %4s", (char *) folio_address(folio) + offset, len,
+		  (char *) cb->blk_hnd->data + offset);
+
+	if (pos + copied > inode->i_size) {
+		i_size_write(inode, pos + copied);
+		i_size_changed = true;
+	}
+
+	folio_mark_dirty(folio);
+	folio_unlock(folio);
+	folio_put(folio);
+
+	if (old_size < pos)
+		pagecache_isize_extended(inode, old_size, pos);
+
+	/* mark inode dirty outside of folio lock for performance reasons */
+	if (i_size_changed)
+		mark_inode_dirty(inode);
+
+	/* finalize changes to the inode and block */
+	rpdfs_block_release(rfi, &cb->blk_hnd);
+	rpdfs_inode_update(rfi, inode, cb->inode_hnd);
+	rpdfs_block_release(rfi, &cb->inode_hnd);
+
+	rpdfs_txn_finish(rfi, &cb->txn);
+	kfree(cb);
+
+	rpdfs_prd("i_size %lld copied %d", i_size_read(inode), copied);
+
+	return copied;
+}
+
+const struct address_space_operations rpdfs_aops = {
+	.read_folio =		rpdfs_read_folio,
+	.dirty_folio =		rpdfs_dirty_folio,
+	.write_begin =		rpdfs_write_begin,
+	.write_end =		rpdfs_write_end,
+};
diff --git a/fs/rpdfs/inode.c b/fs/rpdfs/inode.c
index fd0913e0e4b6..ec7f1e64ba18 100644
--- a/fs/rpdfs/inode.c
+++ b/fs/rpdfs/inode.c
@@ -163,6 +163,7 @@ void rpdfs_inode_init_ops(struct inode *inode)
 	case S_IFREG:
 		inode->i_op = &rpdfs_file_iops;
 		inode->i_fop = &rpdfs_file_fops;
+		inode->i_mapping->a_ops = &rpdfs_aops;
 		break;
 	case S_IFDIR:
 		inode->i_op = &rpdfs_dir_iops;
-- 
2.49.0

     prev parent reply	other threads:[~2026-05-07 13:22 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-07 13:21 [PATCH 0/6] File data read/write version 2 Valerie Aurora
2026-05-07 13:21 ` [PATCH 1/6] rpdfs: add rpdfs_file_llseek Valerie Aurora
2026-05-07 13:21 ` [PATCH 2/6] rpdfs: add inode change debugging routine Valerie Aurora
2026-05-07 13:21 ` [PATCH 3/6] rpdfs: add basic file data initialization Valerie Aurora
2026-05-07 13:21 ` [PATCH 4/6] rpdfs: add file data allocation and lookup routines Valerie Aurora
2026-05-07 13:21 ` [PATCH 5/6] rpdfs: add rpdfs_write_iter/rpdfs_read_iter Valerie Aurora
2026-05-07 13:21 ` Valerie Aurora [this message]

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:b1ccf77ced7 dfblob:b3647e1ff42 dfblob:fd0913e0e4b
dfblob:ec7f1e64ba1 )
 OR (
bs:"[PATCH 6/6] rpdfs: add read_folio, dirty_folio, write_begin, write_end" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260507132153.1161324-7-val@versity.com \
    --to=val@versity.com \
    --cc=rpdfs-devel@lists.linux.dev \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.