linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jeffle Xu <jefflexu@linux.alibaba.com>
To: dhowells@redhat.com, linux-cachefs@redhat.com, xiang@kernel.org,
	chao@kernel.org, linux-erofs@lists.ozlabs.org
Cc: torvalds@linux-foundation.org, gregkh@linuxfoundation.org,
	willy@infradead.org, linux-fsdevel@vger.kernel.org,
	joseph.qi@linux.alibaba.com, bo.liu@linux.alibaba.com,
	tao.peng@linux.alibaba.com, gerry@linux.alibaba.com,
	eguan@linux.alibaba.com, linux-kernel@vger.kernel.org
Subject: [PATCH v4 05/21] cachefiles: implement on-demand read
Date: Mon,  7 Mar 2022 20:32:49 +0800	[thread overview]
Message-ID: <20220307123305.79520-6-jefflexu@linux.alibaba.com> (raw)
In-Reply-To: <20220307123305.79520-1-jefflexu@linux.alibaba.com>

Implement the data plane of on-demand read mode.

A new NETFS_READ_HOLE_ONDEMAND flag is introduced to indicate that
on-demand read should be done when a cache miss encountered. In this
case, the read routine will send a READ request to user daemon, along
with the anonymous fd and the file range that shall be read. Now user
daemon is responsible for fetching data in the given file range, and
then writing the fetched data into cache file with the given anonymous
fd.

After sending the READ request, the read routine will hang there, until
the READ request is handled by user daemon. Then it will retry to read
from the same file range. If a cache miss is encountered again on the
same file range, the read routine will fail then.

Signed-off-by: Jeffle Xu <jefflexu@linux.alibaba.com>
---
 fs/cachefiles/daemon.c          | 98 +++++++++++++++++++++++++++++++++
 fs/cachefiles/internal.h        |  8 +++
 fs/cachefiles/io.c              | 11 ++++
 include/linux/netfs.h           |  1 +
 include/uapi/linux/cachefiles.h |  7 +++
 5 files changed, 125 insertions(+)

diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c
index 72a21942aaf6..36ddf64d5e62 100644
--- a/fs/cachefiles/daemon.c
+++ b/fs/cachefiles/daemon.c
@@ -46,6 +46,7 @@ static int cachefiles_daemon_bind(struct cachefiles_cache *, char *);
 static void cachefiles_daemon_unbind(struct cachefiles_cache *);
 #ifdef CONFIG_CACHEFILES_ONDEMAND
 static int cachefiles_ondemand_cinit(struct cachefiles_cache *, char *);
+static int cachefiles_ondemand_cread(struct cachefiles_cache *, char *);
 #endif
 
 static unsigned long cachefiles_open;
@@ -81,6 +82,7 @@ static const struct cachefiles_daemon_cmd cachefiles_daemon_cmds[] = {
 	{ "tag",	cachefiles_daemon_tag		},
 #ifdef CONFIG_CACHEFILES_ONDEMAND
 	{ "cinit",	cachefiles_ondemand_cinit	},
+	{ "cread",	cachefiles_ondemand_cread	},
 #endif
 	{ "",		NULL				}
 };
@@ -139,6 +141,9 @@ bool cachefiles_ondemand_daemon_bind(struct cachefiles_cache *cache, char *args)
 static int cachefiles_ondemand_fd_release(struct inode *inode, struct file *file)
 {
 	struct cachefiles_object *object = file->private_data;
+	struct cachefiles_cache *cache = object->volume->cache;
+	struct cachefiles_req *req;
+	unsigned long index;
 
 	/*
 	 * Uninstall anon_fd to the cachefiles object, so that no further
@@ -146,6 +151,15 @@ static int cachefiles_ondemand_fd_release(struct inode *inode, struct file *file
 	 */
 	object->fd = -1;
 
+	/* complete all associated pending requests */
+	xa_for_each(&cache->reqs, index, req) {
+		if (req->object == object &&
+		    req->msg.opcode == CACHEFILES_OP_READ) {
+			req->error = -EIO;
+			complete(&req->done);
+		}
+	}
+
 	cachefiles_put_object(object, cachefiles_obj_put_ondemand_fd);
 	return 0;
 }
@@ -261,6 +275,36 @@ static int cachefiles_ondemand_cinit(struct cachefiles_cache *cache, char *args)
 	return ret;
 }
 
+/*
+ * Read request completion
+ * - command: "cread <id>"
+ */
+static int cachefiles_ondemand_cread(struct cachefiles_cache *cache, char *args)
+{
+	struct cachefiles_req *req;
+	unsigned long id;
+	int ret;
+
+	if (!test_bit(CACHEFILES_ONDEMAND_MODE, &cache->flags))
+		return -EOPNOTSUPP;
+
+	if (!*args) {
+		pr_err("Empty id specified\n");
+		return -EINVAL;
+	}
+
+	ret = kstrtoul(args, 0, &id);
+	if (ret)
+		return ret;
+
+	req = xa_erase(&cache->reqs, id);
+	if (!req)
+		return -EINVAL;
+
+	complete(&req->done);
+	return 0;
+}
+
 static int cachefiles_ondemand_get_fd(struct cachefiles_req *req)
 {
 	struct cachefiles_init *init;
@@ -460,6 +504,60 @@ int cachefiles_ondemand_init_object(struct cachefiles_object *object)
 	return ret;
 }
 
+static struct cachefiles_req *
+cachefiles_alloc_read_req(struct cachefiles_object *object,
+			  loff_t pos, size_t len)
+{
+	struct cachefiles_req *req;
+	struct cachefiles_read *read;
+	int fd = object->fd;
+
+	/* Stop enqueuig request when daemon closes anon_fd prematurely. */
+	if (WARN_ON_ONCE(fd == -1))
+		return NULL;
+
+	req = cachefiles_alloc_req(object, CACHEFILES_OP_READ, sizeof(*read));
+	if (!req)
+		return NULL;
+
+	read = (void *)&req->msg.data;
+	read->off = pos;
+	read->len = len;
+	read->fd  = fd;
+
+	return req;
+}
+
+int cachefiles_ondemand_read(struct cachefiles_object *object,
+			     loff_t pos, size_t len)
+{
+	struct cachefiles_cache *cache = object->volume->cache;
+	struct cachefiles_req *req;
+	int ret;
+
+	ret = cachefiles_ondemand_check(cache);
+	if (ret)
+		return ret;
+
+	req = cachefiles_alloc_read_req(object, pos, len);
+	if (!req)
+		return -ENOMEM;
+
+	/*
+	 * 1) Checking object->fd and 2) enqueuing request into xarray, is not
+	 * atomic as a whole here. Thus similarly, when anon_fd is closed, it's
+	 * possible that a new request may be enqueued into xarray, after
+	 * associated requests in xarray have already been flushed. But it won't
+	 * cause infinite hang since user daemon will still fetch and handle
+	 * this request. And since the anon_fd has alrady been closed, any
+	 * following file operation with this anon_fd will fail in this case.
+	 */
+	ret = cachefiles_ondemand_send_req(cache, req);
+
+	kfree(req);
+	return ret;
+}
+
 #else
 static inline void cachefiles_ondemand_open(struct cachefiles_cache *cache) {}
 static inline void cachefiles_ondemand_release(struct cachefiles_cache *cache) {}
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index 8450ebd77949..5f336ec15cea 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h
@@ -167,6 +167,8 @@ extern const struct file_operations cachefiles_daemon_fops;
 
 #ifdef CONFIG_CACHEFILES_ONDEMAND
 extern int cachefiles_ondemand_init_object(struct cachefiles_object *object);
+extern int cachefiles_ondemand_read(struct cachefiles_object *object,
+				    loff_t pos, size_t len);
 
 #else
 static inline
@@ -174,6 +176,12 @@ int cachefiles_ondemand_init_object(struct cachefiles_object *object)
 {
 	return 0;
 }
+
+static inline int cachefiles_ondemand_read(struct cachefiles_object *object,
+					   loff_t pos, size_t len)
+{
+	return -EOPNOTSUPP;
+}
 #endif
 
 /*
diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c
index 8dbc1eb254a3..ee1283ba7a2c 100644
--- a/fs/cachefiles/io.c
+++ b/fs/cachefiles/io.c
@@ -95,6 +95,7 @@ static int cachefiles_read(struct netfs_cache_resources *cres,
 	       file, file_inode(file)->i_ino, start_pos, len,
 	       i_size_read(file_inode(file)));
 
+retry:
 	/* If the caller asked us to seek for data before doing the read, then
 	 * we should do that now.  If we find a gap, we fill it with zeros.
 	 */
@@ -119,6 +120,16 @@ static int cachefiles_read(struct netfs_cache_resources *cres,
 			if (read_hole == NETFS_READ_HOLE_FAIL)
 				goto presubmission_error;
 
+			if (read_hole == NETFS_READ_HOLE_ONDEMAND) {
+				if (!cachefiles_ondemand_read(object, off, len)) {
+					/* fail the read if no progress achieved */
+					read_hole = NETFS_READ_HOLE_FAIL;
+					goto retry;
+				}
+
+				goto presubmission_error;
+			}
+
 			iov_iter_zero(len, iter);
 			skipped = len;
 			ret = 0;
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 614f22213e21..2a9c50d3a928 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -203,6 +203,7 @@ enum netfs_read_from_hole {
 	NETFS_READ_HOLE_IGNORE,
 	NETFS_READ_HOLE_CLEAR,
 	NETFS_READ_HOLE_FAIL,
+	NETFS_READ_HOLE_ONDEMAND,
 };
 
 /*
diff --git a/include/uapi/linux/cachefiles.h b/include/uapi/linux/cachefiles.h
index 759fb6693d75..88a78e9d001f 100644
--- a/include/uapi/linux/cachefiles.h
+++ b/include/uapi/linux/cachefiles.h
@@ -8,6 +8,7 @@
 
 enum cachefiles_opcode {
 	CACHEFILES_OP_INIT,
+	CACHEFILES_OP_READ,
 };
 
 /*
@@ -38,4 +39,10 @@ enum cachefiles_init_flags {
 
 #define CACHEFILES_INIT_FL_WANT_CACHE_SIZE	(1 << CACHEFILES_INIT_WANT_CACHE_SIZE)
 
+struct cachefiles_read {
+	__u64 off;
+	__u64 len;
+	__u32 fd;
+};
+
 #endif
-- 
2.27.0


  parent reply	other threads:[~2022-03-07 12:33 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-03-07 12:32 [PATCH v4 00/21] fscache,erofs: fscache-based on-demand read semantics Jeffle Xu
2022-03-07 12:32 ` [PATCH v4 01/21] fscache: export fscache_end_operation() Jeffle Xu
2022-03-07 12:32 ` [PATCH v4 02/21] cachefiles: export write routine Jeffle Xu
2022-03-07 12:32 ` [PATCH v4 03/21] cachefiles: introduce on-demand read mode Jeffle Xu
2022-03-07 12:32 ` [PATCH v4 04/21] cachefiles: notify user daemon with anon_fd when opening cache file Jeffle Xu
2022-03-07 12:32 ` Jeffle Xu [this message]
2022-03-07 12:32 ` [PATCH v4 06/21] cachefiles: document on-demand read mode Jeffle Xu
2022-03-07 12:32 ` [PATCH v4 07/21] erofs: use meta buffers for erofs_read_superblock() Jeffle Xu
2022-03-11  7:36   ` Chao Yu
2022-03-07 12:32 ` [PATCH v4 08/21] erofs: export erofs_map_blocks() Jeffle Xu
2022-03-07 12:32 ` [PATCH v4 09/21] erofs: add mode checking helper Jeffle Xu
2022-03-07 12:32 ` [PATCH v4 10/21] erofs: register global fscache volume Jeffle Xu
2022-03-07 12:32 ` [PATCH v4 11/21] erofs: add cookie context helper functions Jeffle Xu
2022-03-07 12:32 ` [PATCH v4 12/21] erofs: add anonymous inode managing page cache of blob file Jeffle Xu
2022-03-07 12:32 ` [PATCH v4 13/21] erofs: add erofs_fscache_read_pages() helper Jeffle Xu
2022-03-07 12:32 ` [PATCH v4 14/21] erofs: register cookie context for bootstrap blob Jeffle Xu
2022-03-07 12:32 ` [PATCH v4 15/21] erofs: implement fscache-based metadata read Jeffle Xu
2022-03-07 12:33 ` [PATCH v4 16/21] erofs: implement fscache-based data read for non-inline layout Jeffle Xu
2022-03-07 12:33 ` [PATCH v4 17/21] erofs: implement fscache-based data read for inline layout Jeffle Xu
2022-03-07 12:33 ` [PATCH v4 18/21] erofs: register cookie context for data blobs Jeffle Xu
2022-03-07 12:33 ` [PATCH v4 19/21] erofs: implement fscache-based data read " Jeffle Xu
2022-03-07 12:33 ` [PATCH v4 20/21] erofs: implement fscache-based data readahead Jeffle Xu
2022-03-07 12:33 ` [PATCH v4 21/21] erofs: add 'uuid' mount option Jeffle Xu
2022-03-14  9:24 ` [PATCH v4 00/21] fscache,erofs: fscache-based on-demand read semantics luodaowen.backend
2022-03-18  9:42 ` Fan,Naihao
2022-03-18 11:48 ` JeffleXu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220307123305.79520-6-jefflexu@linux.alibaba.com \
    --to=jefflexu@linux.alibaba.com \
    --cc=bo.liu@linux.alibaba.com \
    --cc=chao@kernel.org \
    --cc=dhowells@redhat.com \
    --cc=eguan@linux.alibaba.com \
    --cc=gerry@linux.alibaba.com \
    --cc=gregkh@linuxfoundation.org \
    --cc=joseph.qi@linux.alibaba.com \
    --cc=linux-cachefs@redhat.com \
    --cc=linux-erofs@lists.ozlabs.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=tao.peng@linux.alibaba.com \
    --cc=torvalds@linux-foundation.org \
    --cc=willy@infradead.org \
    --cc=xiang@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).