public inbox for linux-fsdevel@vger.kernel.org
 help / color / mirror / Atom feed
From: "John Groves" <john@jagalactic.com>
To: "John Groves" <John@Groves.net>,
	"Miklos Szeredi" <miklos@szeredi.hu>,
	"Dan Williams" <dan.j.williams@intel.com>,
	"Bernd Schubert" <bschubert@ddn.com>,
	"Alison Schofield" <alison.schofield@intel.com>
Cc: "John Groves" <jgroves@micron.com>,
	"Jonathan Corbet" <corbet@lwn.net>,
	"Shuah Khan" <skhan@linuxfoundation.org>,
	"Vishal Verma" <vishal.l.verma@intel.com>,
	"Dave Jiang" <dave.jiang@intel.com>,
	"Matthew Wilcox" <willy@infradead.org>, "Jan Kara" <jack@suse.cz>,
	"Alexander Viro" <viro@zeniv.linux.org.uk>,
	"David Hildenbrand" <david@kernel.org>,
	"Christian Brauner" <brauner@kernel.org>,
	"Darrick J . Wong" <djwong@kernel.org>,
	"Randy Dunlap" <rdunlap@infradead.org>,
	"Jeff Layton" <jlayton@kernel.org>,
	"Amir Goldstein" <amir73il@gmail.com>,
	"Jonathan Cameron" <Jonathan.Cameron@huawei.com>,
	"Stefan Hajnoczi" <shajnocz@redhat.com>,
	"Joanne Koong" <joannelkoong@gmail.com>,
	"Josef Bacik" <josef@toxicpanda.com>,
	"Bagas Sanjaya" <bagasdotme@gmail.com>,
	"Chen Linxuan" <chenlinxuan@uniontech.com>,
	"James Morse" <james.morse@arm.com>,
	"Fuad Tabba" <tabba@google.com>,
	"Sean Christopherson" <seanjc@google.com>,
	"Shivank Garg" <shivankg@amd.com>,
	"Ackerley Tng" <ackerleytng@google.com>,
	"Gregory Price" <gourry@gourry.net>,
	"Aravind Ramesh" <arramesh@micron.com>,
	"Ajay Joshi" <ajayjoshi@micron.com>,
	"venkataravis@micron.com" <venkataravis@micron.com>,
	"linux-doc@vger.kernel.org" <linux-doc@vger.kernel.org>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	"nvdimm@lists.linux.dev" <nvdimm@lists.linux.dev>,
	"linux-cxl@vger.kernel.org" <linux-cxl@vger.kernel.org>,
	"linux-fsdevel@vger.kernel.org" <linux-fsdevel@vger.kernel.org>,
	"John Groves" <john@groves.net>
Subject: [PATCH V9 03/10] famfs_fuse: Plumb the GET_FMAP message/response
Date: Tue, 24 Mar 2026 00:40:57 +0000	[thread overview]
Message-ID: <0100019d1d499f55-8614dc64-8a4d-432f-a31e-1aa8b91ff8c3-000000@email.amazonses.com> (raw)
In-Reply-To: <0100019d1d48b7e8-4468329f-b446-43f1-87db-3c7e1ff6f28b-000000@email.amazonses.com>

From: John Groves <john@groves.net>

Upon completion of an OPEN, if we're in famfs-mode we do a GET_FMAP to
retrieve and cache up the file-to-dax map in the kernel. If this
succeeds, read/write/mmap are resolved direct-to-dax with no upcalls.

Signed-off-by: John Groves <john@groves.net>
---
 MAINTAINERS               |  8 +++++
 fs/fuse/Makefile          |  1 +
 fs/fuse/famfs.c           | 73 +++++++++++++++++++++++++++++++++++++++
 fs/fuse/file.c            | 14 +++++++-
 fs/fuse/fuse_i.h          | 70 ++++++++++++++++++++++++++++++++++---
 fs/fuse/inode.c           |  8 ++++-
 fs/fuse/iomode.c          |  2 +-
 include/uapi/linux/fuse.h |  7 ++++
 8 files changed, 175 insertions(+), 8 deletions(-)
 create mode 100644 fs/fuse/famfs.c

diff --git a/MAINTAINERS b/MAINTAINERS
index eedf4cce56ed..71040a6494a3 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10512,6 +10512,14 @@ F:	fs/fuse/
 F:	include/uapi/linux/fuse.h
 F:	tools/testing/selftests/filesystems/fuse/
 
+FUSE [FAMFS Fabric-Attached Memory File System]
+M:	John Groves <jgroves@micron.com>
+M:	John Groves <John@Groves.net>
+L:	linux-cxl@vger.kernel.org
+L:	linux-fsdevel@vger.kernel.org
+S:	Supported
+F:	fs/fuse/famfs.c
+
 FUTEX SUBSYSTEM
 M:	Thomas Gleixner <tglx@kernel.org>
 M:	Ingo Molnar <mingo@redhat.com>
diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile
index 22ad9538dfc4..3f8dcc8cbbd0 100644
--- a/fs/fuse/Makefile
+++ b/fs/fuse/Makefile
@@ -17,5 +17,6 @@ fuse-$(CONFIG_FUSE_DAX) += dax.o
 fuse-$(CONFIG_FUSE_PASSTHROUGH) += passthrough.o backing.o
 fuse-$(CONFIG_SYSCTL) += sysctl.o
 fuse-$(CONFIG_FUSE_IO_URING) += dev_uring.o
+fuse-$(CONFIG_FUSE_FAMFS_DAX) += famfs.o
 
 virtiofs-y := virtio_fs.o
diff --git a/fs/fuse/famfs.c b/fs/fuse/famfs.c
new file mode 100644
index 000000000000..d238d853afa8
--- /dev/null
+++ b/fs/fuse/famfs.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * famfs - dax file system for shared fabric-attached memory
+ *
+ * Copyright 2023-2026 Micron Technology, Inc.
+ *
+ * This file system, originally based on ramfs the dax support from xfs,
+ * is intended to allow multiple host systems to mount a common file system
+ * view of dax files that map to shared memory.
+ */
+
+#include <linux/cleanup.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/dax.h>
+#include <linux/iomap.h>
+#include <linux/path.h>
+#include <linux/namei.h>
+#include <linux/string.h>
+
+#include "fuse_i.h"
+
+
+#define FMAP_BUFSIZE PAGE_SIZE
+
+int fuse_get_fmap(struct fuse_mount *fm, struct inode *inode)
+{
+	struct fuse_inode *fi = get_fuse_inode(inode);
+	size_t fmap_bufsize = FMAP_BUFSIZE;
+	u64 nodeid = get_node_id(inode);
+	ssize_t fmap_size;
+	int rc;
+
+	FUSE_ARGS(args);
+
+	/* Don't retrieve if we already have the famfs metadata */
+	if (fi->famfs_meta)
+		return 0;
+
+	void *fmap_buf __free(kfree) = kzalloc(FMAP_BUFSIZE, GFP_KERNEL);
+
+	if (!fmap_buf)
+		return -ENOMEM;
+
+	args.opcode = FUSE_GET_FMAP;
+	args.nodeid = nodeid;
+
+	/* Variable-sized output buffer
+	 * this causes fuse_simple_request() to return the size of the
+	 * output payload
+	 */
+	args.out_argvar = true;
+	args.out_numargs = 1;
+	args.out_args[0].size = fmap_bufsize;
+	args.out_args[0].value = fmap_buf;
+
+	/* Send GET_FMAP command */
+	rc = fuse_simple_request(fm, &args);
+	if (rc < 0) {
+		pr_err("%s: err=%d from fuse_simple_request()\n",
+		       __func__, rc);
+		return rc;
+	}
+	fmap_size = rc;
+
+	/* We retrieved the "fmap" (the file's map to memory), but
+	 * we haven't used it yet. A call to famfs_file_init_dax() will be added
+	 * here in a subsequent patch, when we add the ability to attach
+	 * fmaps to files.
+	 */
+
+	return 0;
+}
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 4ee5065737d8..d7fd934f4412 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -277,6 +277,16 @@ static int fuse_open(struct inode *inode, struct file *file)
 	err = fuse_do_open(fm, get_node_id(inode), file, false);
 	if (!err) {
 		ff = file->private_data;
+
+		if ((fm->fc->famfs_iomap) && (S_ISREG(inode->i_mode))) {
+			/* Get the famfs fmap - failure is fatal */
+			err = fuse_get_fmap(fm, inode);
+			if (err) {
+				fuse_sync_release(fi, ff, file->f_flags);
+				goto out_nowrite;
+			}
+		}
+
 		err = fuse_finish_open(inode, file);
 		if (err)
 			fuse_sync_release(fi, ff, file->f_flags);
@@ -284,12 +294,14 @@ static int fuse_open(struct inode *inode, struct file *file)
 			fuse_truncate_update_attr(inode, file);
 	}
 
+out_nowrite:
 	if (is_wb_truncate || dax_truncate)
 		fuse_release_nowrite(inode);
 	if (!err) {
 		if (is_truncate)
 			truncate_pagecache(inode, 0);
-		else if (!(ff->open_flags & FOPEN_KEEP_CACHE))
+		else if (!(ff->open_flags & FOPEN_KEEP_CACHE) &&
+			 !fuse_file_famfs(fi))
 			invalidate_inode_pages2(inode->i_mapping);
 	}
 	if (dax_truncate)
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 2839efb219a9..b66b5ca0bc11 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -223,6 +223,14 @@ struct fuse_inode {
 	 * so preserve the blocksize specified by the server.
 	 */
 	u8 cached_i_blkbits;
+
+#if IS_ENABLED(CONFIG_FUSE_FAMFS_DAX)
+	/* Pointer to the file's famfs metadata. Primary content is the
+	 * in-memory version of the fmap - the map from file's offset range
+	 * to DAX memory
+	 */
+	void *famfs_meta;
+#endif
 };
 
 /** FUSE inode state bits */
@@ -1511,11 +1519,8 @@ void fuse_free_conn(struct fuse_conn *fc);
 
 /* dax.c */
 
-static inline bool fuse_file_famfs(struct fuse_inode *fuse_inode) /* Will be superseded */
-{
-	(void)fuse_inode;
-	return false;
-}
+static inline int fuse_file_famfs(struct fuse_inode *fi); /* forward */
+
 #define FUSE_IS_VIRTIO_DAX(fuse_inode) (IS_ENABLED(CONFIG_FUSE_DAX)	\
 					&& IS_DAX(&fuse_inode->inode)  \
 					&& !fuse_file_famfs(fuse_inode))
@@ -1634,4 +1639,59 @@ extern void fuse_sysctl_unregister(void);
 #define fuse_sysctl_unregister()	do { } while (0)
 #endif /* CONFIG_SYSCTL */
 
+/* famfs.c */
+
+#if IS_ENABLED(CONFIG_FUSE_FAMFS_DAX)
+void __famfs_meta_free(void *map);
+
+/* Set fi->famfs_meta = NULL regardless of prior value */
+static inline void famfs_meta_init(struct fuse_inode *fi)
+{
+	fi->famfs_meta = NULL;
+}
+
+/* Set fi->famfs_meta iff the current value is NULL */
+static inline struct fuse_backing *famfs_meta_set(struct fuse_inode *fi,
+						  void *meta)
+{
+	return cmpxchg(&fi->famfs_meta, NULL, meta);
+}
+
+static inline void famfs_meta_free(struct fuse_inode *fi)
+{
+	famfs_meta_set(fi, NULL);
+}
+
+static inline int fuse_file_famfs(struct fuse_inode *fi)
+{
+	return (READ_ONCE(fi->famfs_meta) != NULL);
+}
+
+int fuse_get_fmap(struct fuse_mount *fm, struct inode *inode);
+
+#else /* !CONFIG_FUSE_FAMFS_DAX */
+
+static inline struct fuse_backing *famfs_meta_set(struct fuse_inode *fi,
+						  void *meta)
+{
+	return NULL;
+}
+
+static inline void famfs_meta_free(struct fuse_inode *fi)
+{
+}
+
+static inline int fuse_file_famfs(struct fuse_inode *fi)
+{
+	return 0;
+}
+
+static inline int
+fuse_get_fmap(struct fuse_mount *fm, struct inode *inode)
+{
+	return 0;
+}
+
+#endif /* CONFIG_FUSE_FAMFS_DAX */
+
 #endif /* _FS_FUSE_I_H */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index fa77add7d9f8..e39a00c79085 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -120,6 +120,9 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
 	if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
 		fuse_inode_backing_set(fi, NULL);
 
+	if (IS_ENABLED(CONFIG_FUSE_FAMFS_DAX))
+		famfs_meta_set(fi, NULL);
+
 	return &fi->inode;
 
 out_free_forget:
@@ -141,6 +144,9 @@ static void fuse_free_inode(struct inode *inode)
 	if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
 		fuse_backing_put(fuse_inode_backing(fi));
 
+	if (S_ISREG(inode->i_mode) && fuse_file_famfs(fi))
+		famfs_meta_free(fi);
+
 	kmem_cache_free(fuse_inode_cachep, fi);
 }
 
@@ -162,7 +168,7 @@ static void fuse_evict_inode(struct inode *inode)
 	/* Will write inode on close/munmap and in all other dirtiers */
 	WARN_ON(inode_state_read_once(inode) & I_DIRTY_INODE);
 
-	if (FUSE_IS_VIRTIO_DAX(fi))
+	if (FUSE_IS_VIRTIO_DAX(fi) || fuse_file_famfs(fi))
 		dax_break_layout_final(inode);
 
 	truncate_inode_pages_final(&inode->i_data);
diff --git a/fs/fuse/iomode.c b/fs/fuse/iomode.c
index 31ee7f3304c6..948148316ef0 100644
--- a/fs/fuse/iomode.c
+++ b/fs/fuse/iomode.c
@@ -203,7 +203,7 @@ int fuse_file_io_open(struct file *file, struct inode *inode)
 	 * io modes are not relevant with DAX and with server that does not
 	 * implement open.
 	 */
-	if (FUSE_IS_VIRTIO_DAX(fi) || !ff->args)
+	if (FUSE_IS_VIRTIO_DAX(fi) || fuse_file_famfs(fi) || !ff->args)
 		return 0;
 
 	/*
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 25686f088e6a..9eff9083d3b5 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -669,6 +669,9 @@ enum fuse_opcode {
 	FUSE_STATX		= 52,
 	FUSE_COPY_FILE_RANGE_64	= 53,
 
+	/* Famfs / devdax opcodes */
+	FUSE_GET_FMAP           = 54,
+
 	/* CUSE specific operations */
 	CUSE_INIT		= 4096,
 
@@ -1313,4 +1316,8 @@ struct fuse_uring_cmd_req {
 	uint8_t padding[6];
 };
 
+/* Famfs fmap message components */
+
+#define FAMFS_FMAP_MAX 32768 /* Largest supported fmap message */
+
 #endif /* _LINUX_FUSE_H */
-- 
2.53.0



  parent reply	other threads:[~2026-03-24  0:40 UTC|newest]

Thread overview: 41+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <20260324003630.4930-1-john@jagalactic.com>
2026-03-24  0:36 ` [PATCH BUNDLE v9] famfs: Fabric-Attached Memory File System John Groves
2026-03-24  0:37   ` [PATCH V9 0/8] dax: prepare for famfs John Groves
2026-03-24  0:37     ` [PATCH V9 1/8] dax: move dax_pgoff_to_phys from [drivers/dax/] device.c to bus.c John Groves
2026-03-24 14:18       ` Jonathan Cameron
2026-03-24 23:44         ` Ira Weiny
2026-03-25 11:55           ` Jonathan Cameron
2026-03-24  0:38     ` [PATCH V9 2/8] dax: Factor out dax_folio_reset_order() helper John Groves
2026-03-24 14:23       ` Jonathan Cameron
2026-03-24  0:38     ` [PATCH V9 3/8] dax: add fsdev.c driver for fs-dax on character dax John Groves
2026-03-24 14:39       ` Jonathan Cameron
2026-03-25 12:43         ` John Groves
2026-03-25 16:04           ` Ira Weiny
2026-03-26 14:33             ` John Groves
2026-03-24 15:19       ` Dave Jiang
2026-03-25  4:48       ` Ira Weiny
2026-03-24  0:38     ` [PATCH V9 4/8] dax: Save the kva from memremap John Groves
2026-03-24 14:40       ` Jonathan Cameron
2026-03-24  0:39     ` [PATCH V9 5/8] dax: Add dax_operations for use by fs-dax on fsdev dax John Groves
2026-03-24 14:51       ` Jonathan Cameron
2026-03-24 15:23       ` Dave Jiang
2026-03-25 21:28       ` Dave Jiang
2026-03-25 22:40       ` Dave Jiang
2026-03-24  0:39     ` [PATCH V9 6/8] dax: Add dax_set_ops() for setting dax_operations at bind time John Groves
2026-03-24 14:53       ` Jonathan Cameron
2026-03-24  0:39     ` [PATCH V9 7/8] dax: Add fs_dax_get() func to prepare dax for fs-dax usage John Groves
2026-03-24 15:05       ` Jonathan Cameron
2026-03-24 15:25       ` Dave Jiang
2026-03-24  0:39     ` [PATCH V9 8/8] dax: export dax_dev_get() John Groves
2026-03-24 15:06       ` Jonathan Cameron
2026-03-24  0:39   ` [PATCH V9 00/10] famfs: port into fuse John Groves
2026-03-24  0:40     ` [PATCH V9 01/10] famfs_fuse: Update macro s/FUSE_IS_DAX/FUSE_IS_VIRTIO_DAX/ John Groves
2026-03-24 15:12       ` Jonathan Cameron
2026-03-24  0:40     ` [PATCH V9 02/10] famfs_fuse: Basic fuse kernel ABI enablement for famfs John Groves
2026-03-24  0:40     ` John Groves [this message]
2026-03-24  0:41     ` [PATCH V9 04/10] famfs_fuse: Create files with famfs fmaps John Groves
2026-03-24  0:41     ` [PATCH V9 05/10] famfs_fuse: GET_DAXDEV message and daxdev_table John Groves
2026-03-24  0:41     ` [PATCH V9 06/10] famfs_fuse: Plumb dax iomap and fuse read/write/mmap John Groves
2026-03-24  0:41     ` [PATCH V9 07/10] famfs_fuse: Add holder_operations for dax notify_failure() John Groves
2026-03-24  0:41     ` [PATCH V9 08/10] famfs_fuse: Add DAX address_space_operations with noop_dirty_folio John Groves
2026-03-24  0:42     ` [PATCH V9 09/10] famfs_fuse: Add famfs fmap metadata documentation John Groves
2026-03-24  0:42     ` [PATCH V9 10/10] famfs_fuse: Add documentation John Groves

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=0100019d1d499f55-8614dc64-8a4d-432f-a31e-1aa8b91ff8c3-000000@email.amazonses.com \
    --to=john@jagalactic.com \
    --cc=John@Groves.net \
    --cc=Jonathan.Cameron@huawei.com \
    --cc=ackerleytng@google.com \
    --cc=ajayjoshi@micron.com \
    --cc=alison.schofield@intel.com \
    --cc=amir73il@gmail.com \
    --cc=arramesh@micron.com \
    --cc=bagasdotme@gmail.com \
    --cc=brauner@kernel.org \
    --cc=bschubert@ddn.com \
    --cc=chenlinxuan@uniontech.com \
    --cc=corbet@lwn.net \
    --cc=dan.j.williams@intel.com \
    --cc=dave.jiang@intel.com \
    --cc=david@kernel.org \
    --cc=djwong@kernel.org \
    --cc=gourry@gourry.net \
    --cc=jack@suse.cz \
    --cc=james.morse@arm.com \
    --cc=jgroves@micron.com \
    --cc=jlayton@kernel.org \
    --cc=joannelkoong@gmail.com \
    --cc=josef@toxicpanda.com \
    --cc=linux-cxl@vger.kernel.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=miklos@szeredi.hu \
    --cc=nvdimm@lists.linux.dev \
    --cc=rdunlap@infradead.org \
    --cc=seanjc@google.com \
    --cc=shajnocz@redhat.com \
    --cc=shivankg@amd.com \
    --cc=skhan@linuxfoundation.org \
    --cc=tabba@google.com \
    --cc=venkataravis@micron.com \
    --cc=viro@zeniv.linux.org.uk \
    --cc=vishal.l.verma@intel.com \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox