Linux CXL
 help / color / mirror / Atom feed
From: John Groves <John@Groves.net>
To: John Groves <John@Groves.net>,
	Dan Williams <dan.j.williams@intel.com>,
	Miklos Szeredi <miklos@szeredb.hu>,
	Bernd Schubert <bschubert@ddn.com>
Cc: John Groves <jgroves@micron.com>,
	Jonathan Corbet <corbet@lwn.net>,
	Vishal Verma <vishal.l.verma@intel.com>,
	Dave Jiang <dave.jiang@intel.com>,
	Matthew Wilcox <willy@infradead.org>, Jan Kara <jack@suse.cz>,
	Alexander Viro <viro@zeniv.linux.org.uk>,
	Christian Brauner <brauner@kernel.org>,
	"Darrick J . Wong" <djwong@kernel.org>,
	Luis Henriques <luis@igalia.com>,
	Randy Dunlap <rdunlap@infradead.org>,
	Jeff Layton <jlayton@kernel.org>,
	Kent Overstreet <kent.overstreet@linux.dev>,
	Petr Vorel <pvorel@suse.cz>, Brian Foster <bfoster@redhat.com>,
	linux-doc@vger.kernel.org, linux-kernel@vger.kernel.org,
	nvdimm@lists.linux.dev, linux-cxl@vger.kernel.org,
	linux-fsdevel@vger.kernel.org,
	Amir Goldstein <amir73il@gmail.com>,
	Jonathan Cameron <Jonathan.Cameron@huawei.com>,
	Stefan Hajnoczi <shajnocz@redhat.com>,
	Joanne Koong <joannelkoong@gmail.com>,
	Josef Bacik <josef@toxicpanda.com>,
	Aravind Ramesh <arramesh@micron.com>,
	Ajay Joshi <ajayjoshi@micron.com>, John Groves <john@groves.net>
Subject: [RFC PATCH 12/19] famfs_fuse: Plumb the GET_FMAP message/response
Date: Sun, 20 Apr 2025 20:33:39 -0500	[thread overview]
Message-ID: <20250421013346.32530-13-john@groves.net> (raw)
In-Reply-To: <20250421013346.32530-1-john@groves.net>

Upon completion of a LOOKUP, if we're in famfs-mode we do a GET_FMAP to
retrieve and cache up the file-to-dax map in the kernel. If this
succeeds, read/write/mmap are resolved direct-to-dax with no upcalls.

Signed-off-by: John Groves <john@groves.net>
---
 fs/fuse/dir.c             | 69 +++++++++++++++++++++++++++++++++++++++
 fs/fuse/fuse_i.h          | 36 +++++++++++++++++++-
 fs/fuse/inode.c           | 15 +++++++++
 include/uapi/linux/fuse.h |  4 +++
 4 files changed, 123 insertions(+), 1 deletion(-)

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index bc29db0117f4..ae135c55b9f6 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -359,6 +359,56 @@ bool fuse_invalid_attr(struct fuse_attr *attr)
 	return !fuse_valid_type(attr->mode) || !fuse_valid_size(attr->size);
 }
 
+#define FMAP_BUFSIZE 4096
+
+#if IS_ENABLED(CONFIG_FUSE_FAMFS_DAX)
+static void
+fuse_get_fmap_init(
+	struct fuse_conn *fc,
+	struct fuse_args *args,
+	u64 nodeid,
+	void *outbuf,
+	size_t outbuf_size)
+{
+	memset(outbuf, 0, outbuf_size);
+	args->opcode = FUSE_GET_FMAP;
+	args->nodeid = nodeid;
+
+	args->in_numargs = 0;
+
+	args->out_numargs = 1;
+	args->out_args[0].size = FMAP_BUFSIZE;
+	args->out_args[0].value = outbuf;
+}
+
+static int
+fuse_get_fmap(struct fuse_mount *fm, struct inode *inode, u64 nodeid)
+{
+	size_t fmap_size;
+	void *fmap_buf;
+	int err;
+
+	pr_notice("%s: nodeid=%lld, inode=%llx\n", __func__,
+		  nodeid, (u64)inode);
+	fmap_buf = kcalloc(1, FMAP_BUFSIZE, GFP_KERNEL);
+	FUSE_ARGS(args);
+	fuse_get_fmap_init(fm->fc, &args, nodeid, fmap_buf, FMAP_BUFSIZE);
+
+	/* Send GET_FMAP command */
+	err = fuse_simple_request(fm, &args);
+	if (err) {
+		pr_err("%s: err=%d from fuse_simple_request()\n",
+		       __func__, err);
+		return err;
+	}
+
+	fmap_size = args.out_args[0].size;
+	pr_notice("%s: nodei=%lld fmap_size=%ld\n", __func__, nodeid, fmap_size);
+
+	return 0;
+}
+#endif
+
 int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name,
 		     struct fuse_entry_out *outarg, struct inode **inode)
 {
@@ -404,6 +454,25 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name
 		fuse_queue_forget(fm->fc, forget, outarg->nodeid, 1);
 		goto out;
 	}
+
+#if IS_ENABLED(CONFIG_FUSE_FAMFS_DAX)
+	if (fm->fc->famfs_iomap) {
+		if (S_ISREG((*inode)->i_mode)) {
+			/* Note Lookup returns the looked-up inode in the attr
+			 * struct, but not in outarg->nodeid !
+			 */
+			pr_notice("%s: outarg: size=%d nodeid=%lld attr.ino=%lld\n",
+				 __func__, args.out_args[0].size, outarg->nodeid,
+				 outarg->attr.ino);
+			/* Get the famfs fmap */
+			fuse_get_fmap(fm, *inode, outarg->attr.ino);
+		} else
+			pr_notice("%s: no get_fmap for non-regular file\n",
+				 __func__);
+	} else
+		pr_notice("%s: fc->dax_iomap is not set\n", __func__);
+#endif
+
 	err = 0;
 
  out_put_forget:
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 931613102d32..437177c2f092 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -193,6 +193,10 @@ struct fuse_inode {
 	/** Reference to backing file in passthrough mode */
 	struct fuse_backing *fb;
 #endif
+
+#if IS_ENABLED(CONFIG_FUSE_FAMFS_DAX)
+	void *famfs_meta;
+#endif
 };
 
 /** FUSE inode state bits */
@@ -942,6 +946,8 @@ struct fuse_conn {
 #endif
 
 #if IS_ENABLED(CONFIG_FUSE_FAMFS_DAX)
+	struct rw_semaphore famfs_devlist_sem;
+	struct famfs_dax_devlist *dax_devlist;
 	char *shadow;
 #endif
 };
@@ -1432,11 +1438,14 @@ void fuse_free_conn(struct fuse_conn *fc);
 
 /* dax.c */
 
+static inline int fuse_file_famfs(struct fuse_inode *fi); /* forward */
+
 /* This macro is used by virtio_fs, but now it also needs to filter for
  * "not famfs"
  */
 #define FUSE_IS_VIRTIO_DAX(fuse_inode) (IS_ENABLED(CONFIG_FUSE_DAX)	\
-					&& IS_DAX(&fuse_inode->inode))
+					&& IS_DAX(&fuse_inode->inode)	\
+					&& !fuse_file_famfs(fuse_inode))
 
 ssize_t fuse_dax_read_iter(struct kiocb *iocb, struct iov_iter *to);
 ssize_t fuse_dax_write_iter(struct kiocb *iocb, struct iov_iter *from);
@@ -1547,4 +1556,29 @@ extern void fuse_sysctl_unregister(void);
 #define fuse_sysctl_unregister()	do { } while (0)
 #endif /* CONFIG_SYSCTL */
 
+/* famfs.c */
+static inline struct fuse_backing *famfs_meta_set(struct fuse_inode *fi,
+						       void *meta)
+{
+#if IS_ENABLED(CONFIG_FUSE_FAMFS_DAX)
+	return xchg(&fi->famfs_meta, meta);
+#else
+	return NULL;
+#endif
+}
+
+static inline void famfs_meta_free(struct fuse_inode *fi)
+{
+	/* Stub wil be connected in a subsequent commit */
+}
+
+static inline int fuse_file_famfs(struct fuse_inode *fi)
+{
+#if IS_ENABLED(CONFIG_FUSE_FAMFS_DAX)
+	return (fi->famfs_meta != NULL);
+#else
+	return 0;
+#endif
+}
+
 #endif /* _FS_FUSE_I_H */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 7f4b73e739cb..848c8818e6f7 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -117,6 +117,9 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
 	if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
 		fuse_inode_backing_set(fi, NULL);
 
+	if (IS_ENABLED(CONFIG_FUSE_FAMFS_DAX))
+		famfs_meta_set(fi, NULL);
+
 	return &fi->inode;
 
 out_free_forget:
@@ -138,6 +141,13 @@ static void fuse_free_inode(struct inode *inode)
 	if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
 		fuse_backing_put(fuse_inode_backing(fi));
 
+#if IS_ENABLED(CONFIG_FUSE_FAMFS_DAX)
+	if (S_ISREG(inode->i_mode) && fi->famfs_meta) {
+		famfs_meta_free(fi);
+		famfs_meta_set(fi, NULL);
+	}
+#endif
+
 	kmem_cache_free(fuse_inode_cachep, fi);
 }
 
@@ -1002,6 +1012,11 @@ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm,
 	if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
 		fuse_backing_files_init(fc);
 
+	if (IS_ENABLED(CONFIG_FUSE_FAMFS_DAX)) {
+		pr_notice("%s: Kernel is FUSE_FAMFS_DAX capable\n", __func__);
+		init_rwsem(&fc->famfs_devlist_sem);
+	}
+
 	INIT_LIST_HEAD(&fc->mounts);
 	list_add(&fm->fc_entry, &fc->mounts);
 	fm->fc = fc;
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index f9e14180367a..d85fb692cf3b 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -652,6 +652,10 @@ enum fuse_opcode {
 	FUSE_TMPFILE		= 51,
 	FUSE_STATX		= 52,
 
+	/* Famfs / devdax opcodes */
+	FUSE_GET_FMAP           = 53,
+	FUSE_GET_DAXDEV         = 54,
+
 	/* CUSE specific operations */
 	CUSE_INIT		= 4096,
 
-- 
2.49.0


  parent reply	other threads:[~2025-04-21  1:34 UTC|newest]

Thread overview: 58+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-04-21  1:33 [RFC PATCH 00/19] famfs: port into fuse John Groves
2025-04-21  1:33 ` [RFC PATCH 01/19] dev_dax_iomap: Move dax_pgoff_to_phys() from device.c to bus.c John Groves
2025-04-21  1:33 ` [RFC PATCH 02/19] dev_dax_iomap: Add fs_dax_get() func to prepare dax for fs-dax usage John Groves
2025-04-21  1:33 ` [RFC PATCH 03/19] dev_dax_iomap: Save the kva from memremap John Groves
2025-04-21  1:33 ` [RFC PATCH 04/19] dev_dax_iomap: Add dax_operations for use by fs-dax on devdax John Groves
2025-04-21  1:33 ` [RFC PATCH 05/19] dev_dax_iomap: export dax_dev_get() John Groves
2025-04-21  1:33 ` [RFC PATCH 06/19] dev_dax_iomap: (ignore!) Drop poisoned page warning in fs/dax.c John Groves
2025-04-21  1:33 ` [RFC PATCH 07/19] famfs_fuse: magic.h: Add famfs magic numbers John Groves
2025-04-21  1:33 ` [RFC PATCH 08/19] famfs_fuse: Kconfig John Groves
2025-04-21  1:33 ` [RFC PATCH 09/19] famfs_fuse: Update macro s/FUSE_IS_DAX/FUSE_IS_VIRTIO_DAX/ John Groves
2025-04-21  1:33 ` [RFC PATCH 10/19] famfs_fuse: Basic fuse kernel ABI enablement for famfs John Groves
2025-04-23  1:36   ` Joanne Koong
2025-04-23 20:23     ` John Groves
2025-04-21  1:33 ` [RFC PATCH 11/19] famfs_fuse: Basic famfs mount opts John Groves
2025-04-23  1:51   ` Joanne Koong
2025-04-23 20:19     ` John Groves
2025-04-21  1:33 ` John Groves [this message]
2025-05-02  5:48   ` [RFC PATCH 12/19] famfs_fuse: Plumb the GET_FMAP message/response Joanne Koong
2025-05-02 20:35     ` Darrick J. Wong
2025-05-12 16:28     ` John Groves
2025-05-22 15:45       ` Amir Goldstein
2025-05-23  0:30         ` John Groves
2025-04-21  1:33 ` [RFC PATCH 13/19] famfs_fuse: Create files with famfs fmaps John Groves
2025-04-21 21:57   ` Darrick J. Wong
2025-04-21 22:31     ` John Groves
2025-04-24 13:43   ` John Groves
2025-04-24 14:38     ` Darrick J. Wong
2025-04-28  1:48       ` John Groves
2025-04-28 19:00         ` Darrick J. Wong
2025-05-06 16:56           ` Miklos Szeredi
2025-05-08 15:56             ` Darrick J. Wong
2025-05-13  9:14               ` Miklos Szeredi
2025-05-15  2:06                 ` Darrick J. Wong
2025-05-16 10:06                   ` Miklos Szeredi
2025-05-16 23:17                     ` Darrick J. Wong
2025-05-12 19:51             ` John Groves
2025-05-13  4:03               ` Darrick J. Wong
2025-04-21  1:33 ` [RFC PATCH 14/19] famfs_fuse: GET_DAXDEV message and daxdev_table John Groves
2025-04-21  3:43   ` Randy Dunlap
2025-04-21 20:57     ` John Groves
2025-04-21  1:33 ` [RFC PATCH 15/19] famfs_fuse: Plumb dax iomap and fuse read/write/mmap John Groves
2025-04-21  1:33 ` [RFC PATCH 16/19] famfs_fuse: Add holder_operations for dax notify_failure() John Groves
2025-04-21  1:33 ` [RFC PATCH 17/19] famfs_fuse: Add famfs metadata documentation John Groves
2025-04-21  3:51   ` Randy Dunlap
2025-04-21 21:00     ` John Groves
2025-04-21  1:33 ` [RFC PATCH 18/19] famfs_fuse: Add documentation John Groves
2025-04-22  2:10   ` Randy Dunlap
2025-04-28  1:50     ` John Groves
2025-04-21  1:33 ` [RFC PATCH 19/19] famfs_fuse: (ignore) debug cruft John Groves
2025-04-21 18:27 ` [RFC PATCH 00/19] famfs: port into fuse Darrick J. Wong
2025-04-21 22:00   ` John Groves
2025-04-22  1:25     ` Darrick J. Wong
2025-04-22 11:50       ` John Groves
2025-04-30 14:42 ` Alireza Sanaee
2025-05-01  2:13   ` John Groves
2025-05-21 22:30 ` John Groves
2025-05-21 23:11   ` Darrick J. Wong
2025-05-22 15:55   ` Amir Goldstein

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250421013346.32530-13-john@groves.net \
    --to=john@groves.net \
    --cc=Jonathan.Cameron@huawei.com \
    --cc=ajayjoshi@micron.com \
    --cc=amir73il@gmail.com \
    --cc=arramesh@micron.com \
    --cc=bfoster@redhat.com \
    --cc=brauner@kernel.org \
    --cc=bschubert@ddn.com \
    --cc=corbet@lwn.net \
    --cc=dan.j.williams@intel.com \
    --cc=dave.jiang@intel.com \
    --cc=djwong@kernel.org \
    --cc=jack@suse.cz \
    --cc=jgroves@micron.com \
    --cc=jlayton@kernel.org \
    --cc=joannelkoong@gmail.com \
    --cc=josef@toxicpanda.com \
    --cc=kent.overstreet@linux.dev \
    --cc=linux-cxl@vger.kernel.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luis@igalia.com \
    --cc=miklos@szeredb.hu \
    --cc=nvdimm@lists.linux.dev \
    --cc=pvorel@suse.cz \
    --cc=rdunlap@infradead.org \
    --cc=shajnocz@redhat.com \
    --cc=viro@zeniv.linux.org.uk \
    --cc=vishal.l.verma@intel.com \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox