All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] famfs: FUSE_GET_DAXDEV -> FUSE_DEV_IOC_BACKING_OPEN
@ 2026-05-29 13:43 Miklos Szeredi
  2026-05-31 17:36 ` Amir Goldstein
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: Miklos Szeredi @ 2026-05-29 13:43 UTC (permalink / raw)
  To: John Groves; +Cc: fuse-devel, linux-fsdevel

The difference is that now the server needs to look at the fmap and check
if any new device needs registering, the device index is now allocated by
the kernel and returned by the ioctl().

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
This is the first part of what we were talking about: using the backing
file API for dax devices.

It's on top of the famfs-v10 patchset.

Totally untested, haven't even tried booting it.  Obviously needs
nontrivial conversion in the server code as well.

Thanks,
Miklos

fs/fuse/Kconfig           |   1 +
 fs/fuse/backing.c         |  28 ++-
 fs/fuse/famfs.c           | 346 ++++++++++----------------------------
 fs/fuse/famfs_kfmap.h     |   2 -
 fs/fuse/fuse_i.h          |  25 +--
 fs/fuse/inode.c           |   7 +-
 include/uapi/linux/fuse.h |   8 +-
 7 files changed, 123 insertions(+), 294 deletions(-)

diff --git a/fs/fuse/Kconfig b/fs/fuse/Kconfig
index 17fe1f490cbd..8dbd6663808e 100644
--- a/fs/fuse/Kconfig
+++ b/fs/fuse/Kconfig
@@ -79,6 +79,7 @@ config FUSE_IO_URING
 
 config FUSE_FAMFS_DAX
 	bool "FUSE support for fs-dax filesystems backed by devdax"
+	select FUSE_PASSTHROUGH
 	depends on FUSE_FS
 	depends on DEV_DAX_FSDEV
 	default FUSE_FS
diff --git a/fs/fuse/backing.c b/fs/fuse/backing.c
index d95dfa48483f..e56b7f8d5d2b 100644
--- a/fs/fuse/backing.c
+++ b/fs/fuse/backing.c
@@ -37,14 +37,14 @@ void fuse_backing_files_init(struct fuse_conn *fc)
 	idr_init(&fc->backing_files_map);
 }
 
-static int fuse_backing_id_alloc(struct fuse_conn *fc, struct fuse_backing *fb)
+int fuse_backing_id_alloc(struct fuse_conn *fc, void *obj)
 {
 	int id;
 
 	idr_preload(GFP_KERNEL);
 	spin_lock(&fc->lock);
 	/* FIXME: xarray might be space inefficient */
-	id = idr_alloc_cyclic(&fc->backing_files_map, fb, 1, 0, GFP_ATOMIC);
+	id = idr_alloc_cyclic(&fc->backing_files_map, obj, 1, 0, GFP_ATOMIC);
 	spin_unlock(&fc->lock);
 	idr_preload_end();
 
@@ -75,7 +75,10 @@ static int fuse_backing_id_free(int id, void *p, void *data)
 
 void fuse_backing_files_free(struct fuse_conn *fc)
 {
-	idr_for_each(&fc->backing_files_map, fuse_backing_id_free, NULL);
+	if (fc->famfs_iomap)
+		famfs_teardown(fc);
+	else
+		idr_for_each(&fc->backing_files_map, fuse_backing_id_free, NULL);
 	idr_destroy(&fc->backing_files_map);
 }
 
@@ -88,13 +91,23 @@ int fuse_backing_open(struct fuse_conn *fc, struct fuse_backing_map *map)
 
 	pr_debug("%s: fd=%d flags=0x%x\n", __func__, map->fd, map->flags);
 
+	if (map->padding)
+		return -EINVAL;
+
+	if (fc->famfs_iomap) {
+		if (map->flags != FUSE_BACKING_DAXDEV)
+			return -EINVAL;
+
+		return famfs_daxdev_open(fc, map);
+	}
+
 	/* TODO: relax CAP_SYS_ADMIN once backing files are visible to lsof */
 	res = -EPERM;
 	if (!fc->passthrough || !capable(CAP_SYS_ADMIN))
 		goto out;
 
 	res = -EINVAL;
-	if (map->flags || map->padding)
+	if (map->flags)
 		goto out;
 
 	file = fget_raw(map->fd);
@@ -144,6 +157,10 @@ int fuse_backing_close(struct fuse_conn *fc, int backing_id)
 
 	pr_debug("%s: backing_id=%d\n", __func__, backing_id);
 
+	/* No close for dax device (yet) */
+	if (fc->famfs_iomap)
+		return -EINVAL;
+
 	/* TODO: relax CAP_SYS_ADMIN once backing files are visible to lsof */
 	err = -EPERM;
 	if (!fc->passthrough || !capable(CAP_SYS_ADMIN))
@@ -170,6 +187,9 @@ struct fuse_backing *fuse_backing_lookup(struct fuse_conn *fc, int backing_id)
 {
 	struct fuse_backing *fb;
 
+	if (fc->famfs_iomap)
+		return NULL;
+
 	rcu_read_lock();
 	fb = idr_find(&fc->backing_files_map, backing_id);
 	fb = fuse_backing_get(fb);
diff --git a/fs/fuse/famfs.c b/fs/fuse/famfs.c
index 121ed74e9727..c49362d19b36 100644
--- a/fs/fuse/famfs.c
+++ b/fs/fuse/famfs.c
@@ -23,15 +23,15 @@
 #include "fuse_i.h"
 
 static void famfs_set_daxdev_err(
-	struct fuse_conn *fc, struct dax_device *dax_devp);
+	struct famfs_daxdev *dd, struct dax_device *dax_devp);
 
 static int
 famfs_dax_notify_failure(struct dax_device *dax_devp, u64 offset,
 			u64 len, int mf_flags)
 {
-	struct fuse_conn *fc = dax_holder(dax_devp);
+	struct famfs_daxdev *dd = dax_holder(dax_devp);
 
-	famfs_set_daxdev_err(fc, dax_devp);
+	famfs_set_daxdev_err(dd, dax_devp);
 
 	return 0;
 }
@@ -51,6 +51,26 @@ static const struct address_space_operations famfs_dax_aops = {
 
 /*****************************************************************************/
 
+static void famfs_daxdev_free(struct famfs_daxdev *dd)
+{
+	/* Only call fs_put_dax if fs_dax_get succeeded */
+	if (dd->devp) {
+		if (!dd->dax_err)
+			fs_put_dax(dd->devp, dd);
+		put_dax(dd->devp);
+	}
+	kfree(dd->name);
+	kfree(dd);
+}
+
+DEFINE_FREE(famfs_daxdev_free, struct famfs_daxdev *, if (_T) famfs_daxdev_free(_T));
+
+static int famfs_devidx_free(int id, void *p, void *data)
+{
+	famfs_daxdev_free(p);
+	return 0;
+}
+
 /*
  * famfs_teardown()
  *
@@ -59,263 +79,90 @@ static const struct address_space_operations famfs_dax_aops = {
 void
 famfs_teardown(struct fuse_conn *fc)
 {
-	struct famfs_dax_devlist *devlist __free(kfree) = fc->dax_devlist;
-	int i;
-
-	fc->dax_devlist = NULL;
-
-	if (!devlist)
-		return;
-
-	if (!devlist->devlist)
-		return;
-
-	/* Close & release all the daxdevs in our table */
-	for (i = 0; i < devlist->nslots; i++) {
-		struct famfs_daxdev *dd = &devlist->devlist[i];
-
-		if (!dd->valid)
-			continue;
-
-		/* Only call fs_put_dax if fs_dax_get succeeded */
-		if (dd->devp) {
-			if (!dd->dax_err)
-				fs_put_dax(dd->devp, fc);
-			put_dax(dd->devp);
-		}
-
-		kfree(dd->name);
-	}
-	kfree(devlist->devlist);
+	idr_for_each(&fc->backing_files_map, famfs_devidx_free,  NULL);
 }
 
 static int
-famfs_verify_daxdev(const char *pathname, dev_t *devno)
+famfs_verify_daxdev(const struct path *path, dev_t *devno)
 {
-	struct inode *inode;
-	struct path path;
-	int err;
+	struct inode *inode = d_inode(path->dentry);
 
-	if (!pathname || !*pathname)
+	if (!S_ISCHR(inode->i_mode))
 		return -EINVAL;
 
-	err = kern_path(pathname, LOOKUP_FOLLOW, &path);
-	if (err)
-		return err;
-
-	inode = d_backing_inode(path.dentry);
-	if (!S_ISCHR(inode->i_mode)) {
-		err = -EINVAL;
-		goto out_path_put;
-	}
-
-	if (!may_open_dev(&path)) { /* had to export this */
-		err = -EACCES;
-		goto out_path_put;
-	}
+	if (!may_open_dev(path)) /* had to export this */
+		return -EACCES;
 
 	*devno = inode->i_rdev;
 
-out_path_put:
-	path_put(&path);
-	return err;
+	return 0;
 }
 
-/**
- * famfs_fuse_get_daxdev() - Retrieve info for a DAX device from fuse server
- *
- * Send a GET_DAXDEV message to the fuse server to retrieve info on a
- * dax device.
- *
- * @fm:     fuse_mount
- * @index:  the index of the dax device; daxdevs are referred to by index
- *          in fmaps, and the server resolves the index to a particular daxdev
- *
- * Returns: 0=success
- *          -errno=failure
- */
-static int
-famfs_fuse_get_daxdev(struct fuse_mount *fm, const u64 index)
+static int famfs_fuse_get_daxdev(const struct path *path, struct famfs_daxdev *dd)
 {
-	struct fuse_daxdev_out daxdev_out = { 0 };
-	struct fuse_conn *fc = fm->fc;
-	struct famfs_daxdev *daxdev;
 	int rc;
 
-	FUSE_ARGS(args);
-
-	/* Store the daxdev in our table */
-	if (index >= fc->dax_devlist->nslots) {
-		pr_err("%s: index(%lld) > nslots(%d)\n",
-		       __func__, index, fc->dax_devlist->nslots);
-		return -EINVAL;
-	}
-
-	args.opcode = FUSE_GET_DAXDEV;
-	args.nodeid = index;
-
-	args.in_numargs = 0;
-
-	args.out_numargs = 1;
-	args.out_args[0].size = sizeof(daxdev_out);
-	args.out_args[0].value = &daxdev_out;
-
-	/* Send GET_DAXDEV command */
-	rc = fuse_simple_request(fm, &args);
+	/* Verify dev is valid and can be opened and gets the devno */
+	rc = famfs_verify_daxdev(path, &dd->devno);
 	if (rc) {
-		pr_err("%s: rc=%d from fuse_simple_request()\n",
-		       __func__, rc);
-		/* Error will be that the payload is smaller than FMAP_BUFSIZE,
-		 * which is the max we can handle. Empty payload handled below.
-		 */
+		pr_err("%s: rc=%d from famfs_verify_daxdev()\n", __func__, rc);
 		return rc;
 	}
 
-	scoped_guard(rwsem_write, &fc->famfs_devlist_sem) {
-		daxdev = &fc->dax_devlist->devlist[index];
-
-		/* Abort if daxdev is now valid (races are possible here) */
-		if (daxdev->valid) {
-			pr_debug("%s: daxdev already known\n", __func__);
-			return 0;
-		}
-
-		/* Verify dev is valid and can be opened and gets the devno */
-		rc = famfs_verify_daxdev(daxdev_out.name, &daxdev->devno);
-		if (rc) {
-			pr_err("%s: rc=%d from famfs_verify_daxdev()\n",
-			       __func__, rc);
-			return rc;
-		}
-
-		daxdev->name = kstrdup(daxdev_out.name, GFP_KERNEL);
-		if (!daxdev->name)
-			return -ENOMEM;
+	dd->name = kasprintf(GFP_KERNEL, "%pd4", path->dentry);
+	if (!dd->name)
+		return -ENOMEM;
 
-		/* This will fail if it's not a dax device */
-		daxdev->devp = dax_dev_get(daxdev->devno);
-		if (!daxdev->devp) {
-			pr_warn("%s: device %s not found or not dax\n",
-				__func__, daxdev_out.name);
-			kfree(daxdev->name);
-			daxdev->name = NULL;
+	/* This will fail if it's not a dax device */
+	dd->devp = dax_dev_get(dd->devno);
+	if (!dd->devp) {
+		pr_warn("%s: device %s not found or not dax\n",__func__, dd->name);
+			kfree(dd->name);
+			dd->name = NULL;
 			return -ENODEV;
 		}
 
-		rc = fs_dax_get(daxdev->devp, fc, &famfs_fuse_dax_holder_ops);
-		if (rc) {
-			/* Mark as valid with dax_err to prevent retry loop.
-			 * famfs_dax_err() will return -EIO on access attempts.
-			 * Teardown handles this case: skips fs_put_dax, calls put_dax.
-			 */
-			daxdev->dax_err = 1;
-			pr_err("%s: fs_dax_get(%lld) failed\n",
-			       __func__, (u64)daxdev->devno);
-		}
-
-		wmb(); /* All other fields must be visible before valid */
-		daxdev->valid = 1;
+	rc = fs_dax_get(dd->devp, dd, &famfs_fuse_dax_holder_ops);
+	if (rc) {
+		/* Mark as valid with dax_err to prevent retry loop.
+		 * famfs_dax_err() will return -EIO on access attempts.
+		 * Teardown handles this case: skips fs_put_dax, calls put_dax.
+		 */
+		dd->dax_err = 1;
+		pr_err("%s: fs_dax_get(%lld) failed\n", __func__, (u64)dd->devno);
 	}
 
 	return 0;
 }
 
-/**
- * famfs_update_daxdev_table() - Update the daxdev table
- * @fm:   fuse_mount
- * @meta: famfs_file_meta, in-memory format, built from a GET_FMAP response
- *
- * This function is called for each new file fmap, to verify whether all
- * referenced daxdevs are already known (i.e. in the table). Any daxdev
- * indices referenced in @meta but not in the table will be retrieved via
- * famfs_fuse_get_daxdev() and added to the table
- *
- * Return: 0=success
- *         -errno=failure
- */
-static int
-famfs_update_daxdev_table(
-	struct fuse_mount *fm,
-	const struct famfs_file_meta *meta)
+int famfs_daxdev_open(struct fuse_conn *fc, struct fuse_backing_map *map)
 {
-	struct famfs_dax_devlist *local_devlist;
-	struct fuse_conn *fc = fm->fc;
-	int indices_to_fetch[MAX_DAXDEVS];
-	int n_to_fetch = 0;
-	int err;
-
-	/* First time through we will need to allocate the dax_devlist */
-	if (!fc->dax_devlist) {
-		local_devlist = kcalloc(1, sizeof(*fc->dax_devlist), GFP_KERNEL);
-		if (!local_devlist)
-			return -ENOMEM;
-
-		local_devlist->nslots = MAX_DAXDEVS;
-
-		local_devlist->devlist = kcalloc(MAX_DAXDEVS,
-						 sizeof(struct famfs_daxdev),
-						 GFP_KERNEL);
-		if (!local_devlist->devlist) {
-			kfree(local_devlist);
-			return -ENOMEM;
-		}
-
-		/* We don't need famfs_devlist_sem here because we use cmpxchg */
-		if (cmpxchg(&fc->dax_devlist, NULL, local_devlist) != NULL) {
-			kfree(local_devlist->devlist);
-			kfree(local_devlist); /* another thread beat us to it */
-		}
-	}
+	struct famfs_daxdev *dd __free(famfs_daxdev_free) = kzalloc_obj(*dd);
+	int rc;
 
-	/* Collect indices that need fetching while holding read lock */
-	scoped_guard(rwsem_read, &fc->famfs_devlist_sem) {
-		unsigned long i;
+	CLASS(fd_raw, f)(map->fd);
+	if (fd_empty(f))
+		return -EBADF;
 
-		for_each_set_bit(i, (unsigned long *)&meta->dev_bitmap, MAX_DAXDEVS) {
-			if (!(fc->dax_devlist->devlist[i].valid))
-				indices_to_fetch[n_to_fetch++] = i;
-		}
-	}
+	rc = famfs_fuse_get_daxdev(&fd_file(f)->f_path, dd);
+	if (rc)
+		return rc;
 
-	/* Fetch needed daxdevs outside the read lock */
-	for (int j = 0; j < n_to_fetch; j++) {
-		err = famfs_fuse_get_daxdev(fm, indices_to_fetch[j]);
-		if (err)
-			pr_err("%s: failed to get daxdev=%d\n",
-			       __func__, indices_to_fetch[j]);
-	}
+	rc = fuse_backing_id_alloc(fc, dd);
+	if (rc >= 0)
+		retain_and_null_ptr(dd);
 
-	return 0;
+	return rc;
 }
 
 static void
 famfs_set_daxdev_err(
-	struct fuse_conn *fc,
-	struct dax_device *dax_devp)
+	struct famfs_daxdev *dd, struct dax_device *dax_devp)
 {
-	int i;
-
-	/* Gotta search the list by dax_devp;
-	 * read lock because we're not adding or removing daxdev entries
-	 */
-	scoped_guard(rwsem_write, &fc->famfs_devlist_sem) {
-		for (i = 0; i < fc->dax_devlist->nslots; i++) {
-			if (fc->dax_devlist->devlist[i].valid) {
-				struct famfs_daxdev *dd;
-
-				dd = &fc->dax_devlist->devlist[i];
-				if (dd->devp != dax_devp)
-					continue;
 
-				dd->error = true;
+	dd->error = true;
 
-				pr_err("%s: memory error on daxdev %s (%d)\n",
-				       __func__, dd->name, i);
-				return;
-			}
-		}
-	}
-	pr_err("%s: memory err on unrecognized daxdev\n", __func__);
+	pr_err("%s: memory error on daxdev %s\n", __func__, dd->name);
 }
 
 /***************************************************************************/
@@ -447,9 +294,6 @@ famfs_fuse_meta_alloc(
 			meta->se[i].ext_offset = se_in[i].se_offset;
 			meta->se[i].ext_len    = se_in[i].se_len;
 
-			/* Record bitmap of referenced daxdev indices */
-			meta->dev_bitmap |= (1 << meta->se[i].dev_index);
-
 			errs += famfs_check_ext_alignment(&meta->se[i]);
 
 			extent_total += meta->se[i].ext_len;
@@ -545,9 +389,6 @@ famfs_fuse_meta_alloc(
 				strips_out[j].ext_offset = offset;
 				strips_out[j].ext_len    = len;
 
-				/* Record bitmap of referenced daxdev indices */
-				meta->dev_bitmap |= (1 << devindex);
-
 				extent_total += len;
 				errs += famfs_check_ext_alignment(&strips_out[j]);
 				size_remainder -= len;
@@ -603,7 +444,7 @@ famfs_fuse_meta_alloc(
  * Return: 0=success
  *          -errno=failure
  */
-int
+static int
 famfs_file_init_dax(
 	struct fuse_mount *fm,
 	struct inode *inode,
@@ -625,9 +466,6 @@ famfs_file_init_dax(
 	if (rc)
 		goto errout;
 
-	/* Make sure this fmap doesn't reference any unknown daxdevs */
-	famfs_update_daxdev_table(fm, meta);
-
 	/* Publish the famfs metadata on fi->famfs_meta */
 	inode_lock(inode);
 
@@ -662,11 +500,6 @@ static int famfs_file_bad(struct inode *inode);
 
 static int famfs_dax_err(struct famfs_daxdev *dd)
 {
-	if (!dd->valid) {
-		pr_err("%s: daxdev=%s invalid\n",
-		       __func__, dd->name);
-		return -EIO;
-	}
 	if (dd->dax_err) {
 		pr_err("%s: daxdev=%s dax_err\n",
 		       __func__, dd->name);
@@ -680,6 +513,18 @@ static int famfs_dax_err(struct famfs_daxdev *dd)
 	return 0;
 }
 
+static struct famfs_daxdev *
+famfs_daxdev_lookup(struct fuse_conn *fc, int devidx)
+{
+	struct famfs_daxdev *dd;
+
+	rcu_read_lock();
+	dd = idr_find(&fc->backing_files_map, devidx);
+	rcu_read_unlock();
+
+	return dd;
+}
+
 static int
 famfs_interleave_fileofs_to_daxofs(struct inode *inode, struct iomap *iomap,
 			 loff_t file_offset, off_t len, unsigned int flags)
@@ -730,21 +575,13 @@ famfs_interleave_fileofs_to_daxofs(struct inode *inode, struct iomap *iomap,
 			u64 strip_devidx = fei->ie_strips[strip_num].dev_index;
 			int rc;
 
-			if (strip_devidx >= fc->dax_devlist->nslots) {
-				pr_err("%s: strip_devidx %llu >= nslots %d\n",
-				       __func__, strip_devidx,
-				       fc->dax_devlist->nslots);
-				goto err_out;
-			}
-
-			if (!fc->dax_devlist->devlist[strip_devidx].valid) {
+			dd = famfs_daxdev_lookup(fc, strip_devidx);
+			if (!dd) {
 				pr_err("%s: daxdev=%lld invalid\n", __func__,
 					strip_devidx);
 				goto err_out;
 			}
 
-			dd = &fc->dax_devlist->devlist[strip_devidx];
-
 			rc = famfs_dax_err(dd);
 			if (rc) {
 				/* Shut down access to this file */
@@ -756,7 +593,7 @@ famfs_interleave_fileofs_to_daxofs(struct inode *inode, struct iomap *iomap,
 			iomap->offset  = file_offset;
 			iomap->length  = min_t(loff_t, len, chunk_remainder);
 
-			iomap->dax_dev = fc->dax_devlist->devlist[strip_devidx].devp;
+			iomap->dax_dev = dd->devp;
 
 			iomap->type    = IOMAP_MAPPED;
 			iomap->flags   = flags;
@@ -815,11 +652,6 @@ famfs_fileofs_to_daxofs(struct inode *inode, struct iomap *iomap,
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	loff_t local_offset = file_offset;
 
-	if (!fc->dax_devlist) {
-		pr_err("%s: null dax_devlist\n", __func__);
-		goto err_out;
-	}
-
 	if (famfs_file_bad(inode))
 		goto err_out;
 
@@ -851,15 +683,13 @@ famfs_fileofs_to_daxofs(struct inode *inode, struct iomap *iomap,
 			struct famfs_daxdev *dd;
 			int rc;
 
-			if (daxdev_idx >= fc->dax_devlist->nslots) {
-				pr_err("%s: daxdev_idx %llu >= nslots %d\n",
-				       __func__, daxdev_idx,
-				       fc->dax_devlist->nslots);
+			dd = famfs_daxdev_lookup(fc, daxdev_idx);
+			if (!dd) {
+				pr_err("%s: daxdev=%lld invalid\n", __func__,
+					daxdev_idx);
 				goto err_out;
 			}
 
-			dd = &fc->dax_devlist->devlist[daxdev_idx];
-
 			rc = famfs_dax_err(dd);
 			if (rc) {
 				/* Shut down access to this file */
@@ -884,7 +714,7 @@ famfs_fileofs_to_daxofs(struct inode *inode, struct iomap *iomap,
 			iomap->offset  = file_offset;
 			iomap->length  = min_t(loff_t, len, ext_len_remainder);
 
-			iomap->dax_dev = fc->dax_devlist->devlist[daxdev_idx].devp;
+			iomap->dax_dev = dd->devp;
 
 			iomap->type    = IOMAP_MAPPED;
 			iomap->flags   = flags;
diff --git a/fs/fuse/famfs_kfmap.h b/fs/fuse/famfs_kfmap.h
index 970ad802b492..3f72ae8e4696 100644
--- a/fs/fuse/famfs_kfmap.h
+++ b/fs/fuse/famfs_kfmap.h
@@ -124,7 +124,6 @@ struct famfs_file_meta {
 	enum famfs_file_type   file_type;
 	size_t                 file_size;
 	enum famfs_extent_type fm_extent_type;
-	u64 dev_bitmap; /* bitmap of referenced daxdevs by index */
 	union {
 		struct {
 			size_t         fm_nextents;
@@ -145,7 +144,6 @@ struct famfs_file_meta {
  */
 struct famfs_daxdev {
 	/* Include dev uuid? */
-	bool valid;
 	bool error; /* Dax has reported a memory error (probably poison) */
 	bool dax_err; /* fs_dax_get() failed */
 	dev_t devno;
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index dcbeaceda918..adc88a8681e2 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -1006,11 +1006,6 @@ struct fuse_conn {
 		/* Request timeout (in jiffies). 0 = no timeout */
 		unsigned int req_timeout;
 	} timeout;
-
-#if IS_ENABLED(CONFIG_FUSE_FAMFS_DAX)
-	struct rw_semaphore famfs_devlist_sem;
-	struct famfs_dax_devlist *dax_devlist;
-#endif
 };
 
 /*
@@ -1572,6 +1567,7 @@ void fuse_file_release(struct inode *inode, struct fuse_file *ff,
 struct fuse_backing *fuse_backing_get(struct fuse_backing *fb);
 void fuse_backing_put(struct fuse_backing *fb);
 struct fuse_backing *fuse_backing_lookup(struct fuse_conn *fc, int backing_id);
+int fuse_backing_id_alloc(struct fuse_conn *fc, void *obj);
 #else
 
 static inline struct fuse_backing *fuse_backing_get(struct fuse_backing *fb)
@@ -1647,22 +1643,14 @@ extern void fuse_sysctl_unregister(void);
 /* famfs.c */
 
 #if IS_ENABLED(CONFIG_FUSE_FAMFS_DAX)
-int famfs_file_init_dax(struct fuse_mount *fm,
-			struct inode *inode, void *fmap_buf,
-			size_t fmap_size);
 ssize_t famfs_fuse_write_iter(struct kiocb *iocb, struct iov_iter *from);
 ssize_t famfs_fuse_read_iter(struct kiocb *iocb, struct iov_iter	*to);
 int famfs_fuse_mmap(struct file *file, struct vm_area_struct *vma);
 void __famfs_meta_free(void *map);
 
+int famfs_daxdev_open(struct fuse_conn *fc, struct fuse_backing_map *map);
 void famfs_teardown(struct fuse_conn *fc);
 
-/* Set fi->famfs_meta = NULL regardless of prior value */
-static inline void famfs_meta_init(struct fuse_inode *fi)
-{
-	fi->famfs_meta = NULL;
-}
-
 /* Set fi->famfs_meta iff the current value is NULL */
 static inline struct fuse_backing *famfs_meta_set(struct fuse_inode *fi,
 						  void *meta)
@@ -1678,11 +1666,6 @@ static inline void famfs_meta_free(struct fuse_inode *fi)
 	}
 }
 
-static inline void famfs_init_devlist_sem(struct fuse_conn *fc)
-{
-	init_rwsem(&fc->famfs_devlist_sem);
-}
-
 static inline int fuse_file_famfs(struct fuse_inode *fi)
 {
 	return (READ_ONCE(fi->famfs_meta) != NULL);
@@ -1736,6 +1719,10 @@ fuse_get_fmap(struct fuse_mount *fm, struct inode *inode)
 	return 0;
 }
 
+static inline int famfs_daxdev_open(struct fuse_conn *fc, struct fuse_backing_map *map)
+{
+	return -EINVAL;
+}
 #endif /* CONFIG_FUSE_FAMFS_DAX */
 
 #endif /* _FS_FUSE_I_H */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 40e7ea5b6437..5e692fc84297 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -1048,9 +1048,6 @@ void fuse_conn_put(struct fuse_conn *fc)
 		WARN_ON(atomic_read(&bucket->count) != 1);
 		kfree(bucket);
 	}
-	if (IS_ENABLED(CONFIG_FUSE_FAMFS_DAX))
-		famfs_teardown(fc);
-
 	if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
 		fuse_backing_files_free(fc);
 	call_rcu(&fc->rcu, delayed_release);
@@ -1480,10 +1477,8 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args,
 				u64 in_flags = FIELD_PREP(GENMASK_ULL(63, 32), ia->in.flags2)
 						| ia->in.flags;
 
-				if (in_flags & FUSE_DAX_FMAP) {
-					famfs_init_devlist_sem(fc);
+				if (in_flags & FUSE_DAX_FMAP)
 					fc->famfs_iomap = 1;
-				}
 			}
 		} else {
 			ra_pages = fc->max_read / PAGE_SIZE;
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 1b82895108be..d6e8dc75c8a1 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -248,7 +248,6 @@
  *    - struct fuse_famfs_iext
  *    - struct fuse_famfs_fmap_header
  *  - Add the following structs for the GET_DAXDEV message and reply
- *    - struct fuse_get_daxdev_in
  *    - struct fuse_get_daxdev_out
  *  - Add the following enumerated types
  *    - enum fuse_famfs_file_type
@@ -1145,6 +1144,9 @@ struct fuse_notify_prune_out {
 	uint64_t	spare;
 };
 
+/* Backing is a dax device */
+#define FUSE_BACKING_DAXDEV	(1 << 0)
+
 struct fuse_backing_map {
 	int32_t		fd;
 	uint32_t	flags;
@@ -1373,10 +1375,6 @@ struct fuse_famfs_fmap_header {
 	uint64_t reserved1;
 };
 
-struct fuse_get_daxdev_in {
-	uint32_t        daxdev_num;
-};
-
 #define DAXDEV_NAME_MAX 256
 
 /* fuse_daxdev_out has enough space for a uuid if we need it */
-- 
2.54.0


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH] famfs: FUSE_GET_DAXDEV -> FUSE_DEV_IOC_BACKING_OPEN
  2026-05-29 13:43 [PATCH] famfs: FUSE_GET_DAXDEV -> FUSE_DEV_IOC_BACKING_OPEN Miklos Szeredi
@ 2026-05-31 17:36 ` Amir Goldstein
  2026-06-01  8:42   ` Miklos Szeredi
  2026-06-02  6:58 ` John Groves
  2026-06-03 22:09 ` Joanne Koong
  2 siblings, 1 reply; 9+ messages in thread
From: Amir Goldstein @ 2026-05-31 17:36 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: John Groves, fuse-devel, linux-fsdevel, Darrick J. Wong

On Fri, May 29, 2026 at 3:43 PM Miklos Szeredi <mszeredi@redhat.com> wrote:
>
> The difference is that now the server needs to look at the fmap and check
> if any new device needs registering, the device index is now allocated by
> the kernel and returned by the ioctl().
>
> Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
> ---
> This is the first part of what we were talking about: using the backing
> file API for dax devices.
>
> It's on top of the famfs-v10 patchset.
>
> Totally untested, haven't even tried booting it.  Obviously needs
> nontrivial conversion in the server code as well.
>
> Thanks,
> Miklos
>
> fs/fuse/Kconfig           |   1 +
>  fs/fuse/backing.c         |  28 ++-
>  fs/fuse/famfs.c           | 346 ++++++++++----------------------------
>  fs/fuse/famfs_kfmap.h     |   2 -
>  fs/fuse/fuse_i.h          |  25 +--
>  fs/fuse/inode.c           |   7 +-
>  include/uapi/linux/fuse.h |   8 +-
>  7 files changed, 123 insertions(+), 294 deletions(-)

I like this diffstat :)
Code looks much simpler.
Could possibly use fuse_backing_ops that Darrick used for iomap
backing devices to generalize the backing id ioctls:

https://lore.kernel.org/fuse-devel/177922945327.2436837.2738315299316875094.stgit@frogsfrogsfrogs/

Although at this point, I would not do any dynamic resolve of backing_ops
and use a single ops struct per filesystem mode.

Thanks,
Amir.

>
> diff --git a/fs/fuse/Kconfig b/fs/fuse/Kconfig
> index 17fe1f490cbd..8dbd6663808e 100644
> --- a/fs/fuse/Kconfig
> +++ b/fs/fuse/Kconfig
> @@ -79,6 +79,7 @@ config FUSE_IO_URING
>
>  config FUSE_FAMFS_DAX
>         bool "FUSE support for fs-dax filesystems backed by devdax"
> +       select FUSE_PASSTHROUGH
>         depends on FUSE_FS
>         depends on DEV_DAX_FSDEV
>         default FUSE_FS
> diff --git a/fs/fuse/backing.c b/fs/fuse/backing.c
> index d95dfa48483f..e56b7f8d5d2b 100644
> --- a/fs/fuse/backing.c
> +++ b/fs/fuse/backing.c
> @@ -37,14 +37,14 @@ void fuse_backing_files_init(struct fuse_conn *fc)
>         idr_init(&fc->backing_files_map);
>  }
>
> -static int fuse_backing_id_alloc(struct fuse_conn *fc, struct fuse_backing *fb)
> +int fuse_backing_id_alloc(struct fuse_conn *fc, void *obj)
>  {
>         int id;
>
>         idr_preload(GFP_KERNEL);
>         spin_lock(&fc->lock);
>         /* FIXME: xarray might be space inefficient */
> -       id = idr_alloc_cyclic(&fc->backing_files_map, fb, 1, 0, GFP_ATOMIC);
> +       id = idr_alloc_cyclic(&fc->backing_files_map, obj, 1, 0, GFP_ATOMIC);
>         spin_unlock(&fc->lock);
>         idr_preload_end();
>
> @@ -75,7 +75,10 @@ static int fuse_backing_id_free(int id, void *p, void *data)
>
>  void fuse_backing_files_free(struct fuse_conn *fc)
>  {
> -       idr_for_each(&fc->backing_files_map, fuse_backing_id_free, NULL);
> +       if (fc->famfs_iomap)
> +               famfs_teardown(fc);
> +       else
> +               idr_for_each(&fc->backing_files_map, fuse_backing_id_free, NULL);
>         idr_destroy(&fc->backing_files_map);
>  }
>
> @@ -88,13 +91,23 @@ int fuse_backing_open(struct fuse_conn *fc, struct fuse_backing_map *map)
>
>         pr_debug("%s: fd=%d flags=0x%x\n", __func__, map->fd, map->flags);
>
> +       if (map->padding)
> +               return -EINVAL;
> +
> +       if (fc->famfs_iomap) {
> +               if (map->flags != FUSE_BACKING_DAXDEV)
> +                       return -EINVAL;
> +
> +               return famfs_daxdev_open(fc, map);
> +       }
> +
>         /* TODO: relax CAP_SYS_ADMIN once backing files are visible to lsof */
>         res = -EPERM;
>         if (!fc->passthrough || !capable(CAP_SYS_ADMIN))
>                 goto out;
>
>         res = -EINVAL;
> -       if (map->flags || map->padding)
> +       if (map->flags)
>                 goto out;
>
>         file = fget_raw(map->fd);
> @@ -144,6 +157,10 @@ int fuse_backing_close(struct fuse_conn *fc, int backing_id)
>
>         pr_debug("%s: backing_id=%d\n", __func__, backing_id);
>
> +       /* No close for dax device (yet) */
> +       if (fc->famfs_iomap)
> +               return -EINVAL;
> +
>         /* TODO: relax CAP_SYS_ADMIN once backing files are visible to lsof */
>         err = -EPERM;
>         if (!fc->passthrough || !capable(CAP_SYS_ADMIN))
> @@ -170,6 +187,9 @@ struct fuse_backing *fuse_backing_lookup(struct fuse_conn *fc, int backing_id)
>  {
>         struct fuse_backing *fb;
>
> +       if (fc->famfs_iomap)
> +               return NULL;
> +
>         rcu_read_lock();
>         fb = idr_find(&fc->backing_files_map, backing_id);
>         fb = fuse_backing_get(fb);
> diff --git a/fs/fuse/famfs.c b/fs/fuse/famfs.c
> index 121ed74e9727..c49362d19b36 100644
> --- a/fs/fuse/famfs.c
> +++ b/fs/fuse/famfs.c
> @@ -23,15 +23,15 @@
>  #include "fuse_i.h"
>
>  static void famfs_set_daxdev_err(
> -       struct fuse_conn *fc, struct dax_device *dax_devp);
> +       struct famfs_daxdev *dd, struct dax_device *dax_devp);
>
>  static int
>  famfs_dax_notify_failure(struct dax_device *dax_devp, u64 offset,
>                         u64 len, int mf_flags)
>  {
> -       struct fuse_conn *fc = dax_holder(dax_devp);
> +       struct famfs_daxdev *dd = dax_holder(dax_devp);
>
> -       famfs_set_daxdev_err(fc, dax_devp);
> +       famfs_set_daxdev_err(dd, dax_devp);
>
>         return 0;
>  }
> @@ -51,6 +51,26 @@ static const struct address_space_operations famfs_dax_aops = {
>
>  /*****************************************************************************/
>
> +static void famfs_daxdev_free(struct famfs_daxdev *dd)
> +{
> +       /* Only call fs_put_dax if fs_dax_get succeeded */
> +       if (dd->devp) {
> +               if (!dd->dax_err)
> +                       fs_put_dax(dd->devp, dd);
> +               put_dax(dd->devp);
> +       }
> +       kfree(dd->name);
> +       kfree(dd);
> +}
> +
> +DEFINE_FREE(famfs_daxdev_free, struct famfs_daxdev *, if (_T) famfs_daxdev_free(_T));
> +
> +static int famfs_devidx_free(int id, void *p, void *data)
> +{
> +       famfs_daxdev_free(p);
> +       return 0;
> +}
> +
>  /*
>   * famfs_teardown()
>   *
> @@ -59,263 +79,90 @@ static const struct address_space_operations famfs_dax_aops = {
>  void
>  famfs_teardown(struct fuse_conn *fc)
>  {
> -       struct famfs_dax_devlist *devlist __free(kfree) = fc->dax_devlist;
> -       int i;
> -
> -       fc->dax_devlist = NULL;
> -
> -       if (!devlist)
> -               return;
> -
> -       if (!devlist->devlist)
> -               return;
> -
> -       /* Close & release all the daxdevs in our table */
> -       for (i = 0; i < devlist->nslots; i++) {
> -               struct famfs_daxdev *dd = &devlist->devlist[i];
> -
> -               if (!dd->valid)
> -                       continue;
> -
> -               /* Only call fs_put_dax if fs_dax_get succeeded */
> -               if (dd->devp) {
> -                       if (!dd->dax_err)
> -                               fs_put_dax(dd->devp, fc);
> -                       put_dax(dd->devp);
> -               }
> -
> -               kfree(dd->name);
> -       }
> -       kfree(devlist->devlist);
> +       idr_for_each(&fc->backing_files_map, famfs_devidx_free,  NULL);
>  }
>
>  static int
> -famfs_verify_daxdev(const char *pathname, dev_t *devno)
> +famfs_verify_daxdev(const struct path *path, dev_t *devno)
>  {
> -       struct inode *inode;
> -       struct path path;
> -       int err;
> +       struct inode *inode = d_inode(path->dentry);
>
> -       if (!pathname || !*pathname)
> +       if (!S_ISCHR(inode->i_mode))
>                 return -EINVAL;
>
> -       err = kern_path(pathname, LOOKUP_FOLLOW, &path);
> -       if (err)
> -               return err;
> -
> -       inode = d_backing_inode(path.dentry);
> -       if (!S_ISCHR(inode->i_mode)) {
> -               err = -EINVAL;
> -               goto out_path_put;
> -       }
> -
> -       if (!may_open_dev(&path)) { /* had to export this */
> -               err = -EACCES;
> -               goto out_path_put;
> -       }
> +       if (!may_open_dev(path)) /* had to export this */
> +               return -EACCES;
>
>         *devno = inode->i_rdev;
>
> -out_path_put:
> -       path_put(&path);
> -       return err;
> +       return 0;
>  }
>
> -/**
> - * famfs_fuse_get_daxdev() - Retrieve info for a DAX device from fuse server
> - *
> - * Send a GET_DAXDEV message to the fuse server to retrieve info on a
> - * dax device.
> - *
> - * @fm:     fuse_mount
> - * @index:  the index of the dax device; daxdevs are referred to by index
> - *          in fmaps, and the server resolves the index to a particular daxdev
> - *
> - * Returns: 0=success
> - *          -errno=failure
> - */
> -static int
> -famfs_fuse_get_daxdev(struct fuse_mount *fm, const u64 index)
> +static int famfs_fuse_get_daxdev(const struct path *path, struct famfs_daxdev *dd)
>  {
> -       struct fuse_daxdev_out daxdev_out = { 0 };
> -       struct fuse_conn *fc = fm->fc;
> -       struct famfs_daxdev *daxdev;
>         int rc;
>
> -       FUSE_ARGS(args);
> -
> -       /* Store the daxdev in our table */
> -       if (index >= fc->dax_devlist->nslots) {
> -               pr_err("%s: index(%lld) > nslots(%d)\n",
> -                      __func__, index, fc->dax_devlist->nslots);
> -               return -EINVAL;
> -       }
> -
> -       args.opcode = FUSE_GET_DAXDEV;
> -       args.nodeid = index;
> -
> -       args.in_numargs = 0;
> -
> -       args.out_numargs = 1;
> -       args.out_args[0].size = sizeof(daxdev_out);
> -       args.out_args[0].value = &daxdev_out;
> -
> -       /* Send GET_DAXDEV command */
> -       rc = fuse_simple_request(fm, &args);
> +       /* Verify dev is valid and can be opened and gets the devno */
> +       rc = famfs_verify_daxdev(path, &dd->devno);
>         if (rc) {
> -               pr_err("%s: rc=%d from fuse_simple_request()\n",
> -                      __func__, rc);
> -               /* Error will be that the payload is smaller than FMAP_BUFSIZE,
> -                * which is the max we can handle. Empty payload handled below.
> -                */
> +               pr_err("%s: rc=%d from famfs_verify_daxdev()\n", __func__, rc);
>                 return rc;
>         }
>
> -       scoped_guard(rwsem_write, &fc->famfs_devlist_sem) {
> -               daxdev = &fc->dax_devlist->devlist[index];
> -
> -               /* Abort if daxdev is now valid (races are possible here) */
> -               if (daxdev->valid) {
> -                       pr_debug("%s: daxdev already known\n", __func__);
> -                       return 0;
> -               }
> -
> -               /* Verify dev is valid and can be opened and gets the devno */
> -               rc = famfs_verify_daxdev(daxdev_out.name, &daxdev->devno);
> -               if (rc) {
> -                       pr_err("%s: rc=%d from famfs_verify_daxdev()\n",
> -                              __func__, rc);
> -                       return rc;
> -               }
> -
> -               daxdev->name = kstrdup(daxdev_out.name, GFP_KERNEL);
> -               if (!daxdev->name)
> -                       return -ENOMEM;
> +       dd->name = kasprintf(GFP_KERNEL, "%pd4", path->dentry);
> +       if (!dd->name)
> +               return -ENOMEM;
>
> -               /* This will fail if it's not a dax device */
> -               daxdev->devp = dax_dev_get(daxdev->devno);
> -               if (!daxdev->devp) {
> -                       pr_warn("%s: device %s not found or not dax\n",
> -                               __func__, daxdev_out.name);
> -                       kfree(daxdev->name);
> -                       daxdev->name = NULL;
> +       /* This will fail if it's not a dax device */
> +       dd->devp = dax_dev_get(dd->devno);
> +       if (!dd->devp) {
> +               pr_warn("%s: device %s not found or not dax\n",__func__, dd->name);
> +                       kfree(dd->name);
> +                       dd->name = NULL;
>                         return -ENODEV;
>                 }
>
> -               rc = fs_dax_get(daxdev->devp, fc, &famfs_fuse_dax_holder_ops);
> -               if (rc) {
> -                       /* Mark as valid with dax_err to prevent retry loop.
> -                        * famfs_dax_err() will return -EIO on access attempts.
> -                        * Teardown handles this case: skips fs_put_dax, calls put_dax.
> -                        */
> -                       daxdev->dax_err = 1;
> -                       pr_err("%s: fs_dax_get(%lld) failed\n",
> -                              __func__, (u64)daxdev->devno);
> -               }
> -
> -               wmb(); /* All other fields must be visible before valid */
> -               daxdev->valid = 1;
> +       rc = fs_dax_get(dd->devp, dd, &famfs_fuse_dax_holder_ops);
> +       if (rc) {
> +               /* Mark as valid with dax_err to prevent retry loop.
> +                * famfs_dax_err() will return -EIO on access attempts.
> +                * Teardown handles this case: skips fs_put_dax, calls put_dax.
> +                */
> +               dd->dax_err = 1;
> +               pr_err("%s: fs_dax_get(%lld) failed\n", __func__, (u64)dd->devno);
>         }
>
>         return 0;
>  }
>
> -/**
> - * famfs_update_daxdev_table() - Update the daxdev table
> - * @fm:   fuse_mount
> - * @meta: famfs_file_meta, in-memory format, built from a GET_FMAP response
> - *
> - * This function is called for each new file fmap, to verify whether all
> - * referenced daxdevs are already known (i.e. in the table). Any daxdev
> - * indices referenced in @meta but not in the table will be retrieved via
> - * famfs_fuse_get_daxdev() and added to the table
> - *
> - * Return: 0=success
> - *         -errno=failure
> - */
> -static int
> -famfs_update_daxdev_table(
> -       struct fuse_mount *fm,
> -       const struct famfs_file_meta *meta)
> +int famfs_daxdev_open(struct fuse_conn *fc, struct fuse_backing_map *map)
>  {
> -       struct famfs_dax_devlist *local_devlist;
> -       struct fuse_conn *fc = fm->fc;
> -       int indices_to_fetch[MAX_DAXDEVS];
> -       int n_to_fetch = 0;
> -       int err;
> -
> -       /* First time through we will need to allocate the dax_devlist */
> -       if (!fc->dax_devlist) {
> -               local_devlist = kcalloc(1, sizeof(*fc->dax_devlist), GFP_KERNEL);
> -               if (!local_devlist)
> -                       return -ENOMEM;
> -
> -               local_devlist->nslots = MAX_DAXDEVS;
> -
> -               local_devlist->devlist = kcalloc(MAX_DAXDEVS,
> -                                                sizeof(struct famfs_daxdev),
> -                                                GFP_KERNEL);
> -               if (!local_devlist->devlist) {
> -                       kfree(local_devlist);
> -                       return -ENOMEM;
> -               }
> -
> -               /* We don't need famfs_devlist_sem here because we use cmpxchg */
> -               if (cmpxchg(&fc->dax_devlist, NULL, local_devlist) != NULL) {
> -                       kfree(local_devlist->devlist);
> -                       kfree(local_devlist); /* another thread beat us to it */
> -               }
> -       }
> +       struct famfs_daxdev *dd __free(famfs_daxdev_free) = kzalloc_obj(*dd);
> +       int rc;
>
> -       /* Collect indices that need fetching while holding read lock */
> -       scoped_guard(rwsem_read, &fc->famfs_devlist_sem) {
> -               unsigned long i;
> +       CLASS(fd_raw, f)(map->fd);
> +       if (fd_empty(f))
> +               return -EBADF;
>
> -               for_each_set_bit(i, (unsigned long *)&meta->dev_bitmap, MAX_DAXDEVS) {
> -                       if (!(fc->dax_devlist->devlist[i].valid))
> -                               indices_to_fetch[n_to_fetch++] = i;
> -               }
> -       }
> +       rc = famfs_fuse_get_daxdev(&fd_file(f)->f_path, dd);
> +       if (rc)
> +               return rc;
>
> -       /* Fetch needed daxdevs outside the read lock */
> -       for (int j = 0; j < n_to_fetch; j++) {
> -               err = famfs_fuse_get_daxdev(fm, indices_to_fetch[j]);
> -               if (err)
> -                       pr_err("%s: failed to get daxdev=%d\n",
> -                              __func__, indices_to_fetch[j]);
> -       }
> +       rc = fuse_backing_id_alloc(fc, dd);
> +       if (rc >= 0)
> +               retain_and_null_ptr(dd);
>
> -       return 0;
> +       return rc;
>  }
>
>  static void
>  famfs_set_daxdev_err(
> -       struct fuse_conn *fc,
> -       struct dax_device *dax_devp)
> +       struct famfs_daxdev *dd, struct dax_device *dax_devp)
>  {
> -       int i;
> -
> -       /* Gotta search the list by dax_devp;
> -        * read lock because we're not adding or removing daxdev entries
> -        */
> -       scoped_guard(rwsem_write, &fc->famfs_devlist_sem) {
> -               for (i = 0; i < fc->dax_devlist->nslots; i++) {
> -                       if (fc->dax_devlist->devlist[i].valid) {
> -                               struct famfs_daxdev *dd;
> -
> -                               dd = &fc->dax_devlist->devlist[i];
> -                               if (dd->devp != dax_devp)
> -                                       continue;
>
> -                               dd->error = true;
> +       dd->error = true;
>
> -                               pr_err("%s: memory error on daxdev %s (%d)\n",
> -                                      __func__, dd->name, i);
> -                               return;
> -                       }
> -               }
> -       }
> -       pr_err("%s: memory err on unrecognized daxdev\n", __func__);
> +       pr_err("%s: memory error on daxdev %s\n", __func__, dd->name);
>  }
>
>  /***************************************************************************/
> @@ -447,9 +294,6 @@ famfs_fuse_meta_alloc(
>                         meta->se[i].ext_offset = se_in[i].se_offset;
>                         meta->se[i].ext_len    = se_in[i].se_len;
>
> -                       /* Record bitmap of referenced daxdev indices */
> -                       meta->dev_bitmap |= (1 << meta->se[i].dev_index);
> -
>                         errs += famfs_check_ext_alignment(&meta->se[i]);
>
>                         extent_total += meta->se[i].ext_len;
> @@ -545,9 +389,6 @@ famfs_fuse_meta_alloc(
>                                 strips_out[j].ext_offset = offset;
>                                 strips_out[j].ext_len    = len;
>
> -                               /* Record bitmap of referenced daxdev indices */
> -                               meta->dev_bitmap |= (1 << devindex);
> -
>                                 extent_total += len;
>                                 errs += famfs_check_ext_alignment(&strips_out[j]);
>                                 size_remainder -= len;
> @@ -603,7 +444,7 @@ famfs_fuse_meta_alloc(
>   * Return: 0=success
>   *          -errno=failure
>   */
> -int
> +static int
>  famfs_file_init_dax(
>         struct fuse_mount *fm,
>         struct inode *inode,
> @@ -625,9 +466,6 @@ famfs_file_init_dax(
>         if (rc)
>                 goto errout;
>
> -       /* Make sure this fmap doesn't reference any unknown daxdevs */
> -       famfs_update_daxdev_table(fm, meta);
> -
>         /* Publish the famfs metadata on fi->famfs_meta */
>         inode_lock(inode);
>
> @@ -662,11 +500,6 @@ static int famfs_file_bad(struct inode *inode);
>
>  static int famfs_dax_err(struct famfs_daxdev *dd)
>  {
> -       if (!dd->valid) {
> -               pr_err("%s: daxdev=%s invalid\n",
> -                      __func__, dd->name);
> -               return -EIO;
> -       }
>         if (dd->dax_err) {
>                 pr_err("%s: daxdev=%s dax_err\n",
>                        __func__, dd->name);
> @@ -680,6 +513,18 @@ static int famfs_dax_err(struct famfs_daxdev *dd)
>         return 0;
>  }
>
> +static struct famfs_daxdev *
> +famfs_daxdev_lookup(struct fuse_conn *fc, int devidx)
> +{
> +       struct famfs_daxdev *dd;
> +
> +       rcu_read_lock();
> +       dd = idr_find(&fc->backing_files_map, devidx);
> +       rcu_read_unlock();
> +
> +       return dd;
> +}
> +
>  static int
>  famfs_interleave_fileofs_to_daxofs(struct inode *inode, struct iomap *iomap,
>                          loff_t file_offset, off_t len, unsigned int flags)
> @@ -730,21 +575,13 @@ famfs_interleave_fileofs_to_daxofs(struct inode *inode, struct iomap *iomap,
>                         u64 strip_devidx = fei->ie_strips[strip_num].dev_index;
>                         int rc;
>
> -                       if (strip_devidx >= fc->dax_devlist->nslots) {
> -                               pr_err("%s: strip_devidx %llu >= nslots %d\n",
> -                                      __func__, strip_devidx,
> -                                      fc->dax_devlist->nslots);
> -                               goto err_out;
> -                       }
> -
> -                       if (!fc->dax_devlist->devlist[strip_devidx].valid) {
> +                       dd = famfs_daxdev_lookup(fc, strip_devidx);
> +                       if (!dd) {
>                                 pr_err("%s: daxdev=%lld invalid\n", __func__,
>                                         strip_devidx);
>                                 goto err_out;
>                         }
>
> -                       dd = &fc->dax_devlist->devlist[strip_devidx];
> -
>                         rc = famfs_dax_err(dd);
>                         if (rc) {
>                                 /* Shut down access to this file */
> @@ -756,7 +593,7 @@ famfs_interleave_fileofs_to_daxofs(struct inode *inode, struct iomap *iomap,
>                         iomap->offset  = file_offset;
>                         iomap->length  = min_t(loff_t, len, chunk_remainder);
>
> -                       iomap->dax_dev = fc->dax_devlist->devlist[strip_devidx].devp;
> +                       iomap->dax_dev = dd->devp;
>
>                         iomap->type    = IOMAP_MAPPED;
>                         iomap->flags   = flags;
> @@ -815,11 +652,6 @@ famfs_fileofs_to_daxofs(struct inode *inode, struct iomap *iomap,
>         struct fuse_conn *fc = get_fuse_conn(inode);
>         loff_t local_offset = file_offset;
>
> -       if (!fc->dax_devlist) {
> -               pr_err("%s: null dax_devlist\n", __func__);
> -               goto err_out;
> -       }
> -
>         if (famfs_file_bad(inode))
>                 goto err_out;
>
> @@ -851,15 +683,13 @@ famfs_fileofs_to_daxofs(struct inode *inode, struct iomap *iomap,
>                         struct famfs_daxdev *dd;
>                         int rc;
>
> -                       if (daxdev_idx >= fc->dax_devlist->nslots) {
> -                               pr_err("%s: daxdev_idx %llu >= nslots %d\n",
> -                                      __func__, daxdev_idx,
> -                                      fc->dax_devlist->nslots);
> +                       dd = famfs_daxdev_lookup(fc, daxdev_idx);
> +                       if (!dd) {
> +                               pr_err("%s: daxdev=%lld invalid\n", __func__,
> +                                       daxdev_idx);
>                                 goto err_out;
>                         }
>
> -                       dd = &fc->dax_devlist->devlist[daxdev_idx];
> -
>                         rc = famfs_dax_err(dd);
>                         if (rc) {
>                                 /* Shut down access to this file */
> @@ -884,7 +714,7 @@ famfs_fileofs_to_daxofs(struct inode *inode, struct iomap *iomap,
>                         iomap->offset  = file_offset;
>                         iomap->length  = min_t(loff_t, len, ext_len_remainder);
>
> -                       iomap->dax_dev = fc->dax_devlist->devlist[daxdev_idx].devp;
> +                       iomap->dax_dev = dd->devp;
>
>                         iomap->type    = IOMAP_MAPPED;
>                         iomap->flags   = flags;
> diff --git a/fs/fuse/famfs_kfmap.h b/fs/fuse/famfs_kfmap.h
> index 970ad802b492..3f72ae8e4696 100644
> --- a/fs/fuse/famfs_kfmap.h
> +++ b/fs/fuse/famfs_kfmap.h
> @@ -124,7 +124,6 @@ struct famfs_file_meta {
>         enum famfs_file_type   file_type;
>         size_t                 file_size;
>         enum famfs_extent_type fm_extent_type;
> -       u64 dev_bitmap; /* bitmap of referenced daxdevs by index */
>         union {
>                 struct {
>                         size_t         fm_nextents;
> @@ -145,7 +144,6 @@ struct famfs_file_meta {
>   */
>  struct famfs_daxdev {
>         /* Include dev uuid? */
> -       bool valid;
>         bool error; /* Dax has reported a memory error (probably poison) */
>         bool dax_err; /* fs_dax_get() failed */
>         dev_t devno;
> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
> index dcbeaceda918..adc88a8681e2 100644
> --- a/fs/fuse/fuse_i.h
> +++ b/fs/fuse/fuse_i.h
> @@ -1006,11 +1006,6 @@ struct fuse_conn {
>                 /* Request timeout (in jiffies). 0 = no timeout */
>                 unsigned int req_timeout;
>         } timeout;
> -
> -#if IS_ENABLED(CONFIG_FUSE_FAMFS_DAX)
> -       struct rw_semaphore famfs_devlist_sem;
> -       struct famfs_dax_devlist *dax_devlist;
> -#endif
>  };
>
>  /*
> @@ -1572,6 +1567,7 @@ void fuse_file_release(struct inode *inode, struct fuse_file *ff,
>  struct fuse_backing *fuse_backing_get(struct fuse_backing *fb);
>  void fuse_backing_put(struct fuse_backing *fb);
>  struct fuse_backing *fuse_backing_lookup(struct fuse_conn *fc, int backing_id);
> +int fuse_backing_id_alloc(struct fuse_conn *fc, void *obj);
>  #else
>
>  static inline struct fuse_backing *fuse_backing_get(struct fuse_backing *fb)
> @@ -1647,22 +1643,14 @@ extern void fuse_sysctl_unregister(void);
>  /* famfs.c */
>
>  #if IS_ENABLED(CONFIG_FUSE_FAMFS_DAX)
> -int famfs_file_init_dax(struct fuse_mount *fm,
> -                       struct inode *inode, void *fmap_buf,
> -                       size_t fmap_size);
>  ssize_t famfs_fuse_write_iter(struct kiocb *iocb, struct iov_iter *from);
>  ssize_t famfs_fuse_read_iter(struct kiocb *iocb, struct iov_iter       *to);
>  int famfs_fuse_mmap(struct file *file, struct vm_area_struct *vma);
>  void __famfs_meta_free(void *map);
>
> +int famfs_daxdev_open(struct fuse_conn *fc, struct fuse_backing_map *map);
>  void famfs_teardown(struct fuse_conn *fc);
>
> -/* Set fi->famfs_meta = NULL regardless of prior value */
> -static inline void famfs_meta_init(struct fuse_inode *fi)
> -{
> -       fi->famfs_meta = NULL;
> -}
> -
>  /* Set fi->famfs_meta iff the current value is NULL */
>  static inline struct fuse_backing *famfs_meta_set(struct fuse_inode *fi,
>                                                   void *meta)
> @@ -1678,11 +1666,6 @@ static inline void famfs_meta_free(struct fuse_inode *fi)
>         }
>  }
>
> -static inline void famfs_init_devlist_sem(struct fuse_conn *fc)
> -{
> -       init_rwsem(&fc->famfs_devlist_sem);
> -}
> -
>  static inline int fuse_file_famfs(struct fuse_inode *fi)
>  {
>         return (READ_ONCE(fi->famfs_meta) != NULL);
> @@ -1736,6 +1719,10 @@ fuse_get_fmap(struct fuse_mount *fm, struct inode *inode)
>         return 0;
>  }
>
> +static inline int famfs_daxdev_open(struct fuse_conn *fc, struct fuse_backing_map *map)
> +{
> +       return -EINVAL;
> +}
>  #endif /* CONFIG_FUSE_FAMFS_DAX */
>
>  #endif /* _FS_FUSE_I_H */
> diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
> index 40e7ea5b6437..5e692fc84297 100644
> --- a/fs/fuse/inode.c
> +++ b/fs/fuse/inode.c
> @@ -1048,9 +1048,6 @@ void fuse_conn_put(struct fuse_conn *fc)
>                 WARN_ON(atomic_read(&bucket->count) != 1);
>                 kfree(bucket);
>         }
> -       if (IS_ENABLED(CONFIG_FUSE_FAMFS_DAX))
> -               famfs_teardown(fc);
> -
>         if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
>                 fuse_backing_files_free(fc);
>         call_rcu(&fc->rcu, delayed_release);
> @@ -1480,10 +1477,8 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args,
>                                 u64 in_flags = FIELD_PREP(GENMASK_ULL(63, 32), ia->in.flags2)
>                                                 | ia->in.flags;
>
> -                               if (in_flags & FUSE_DAX_FMAP) {
> -                                       famfs_init_devlist_sem(fc);
> +                               if (in_flags & FUSE_DAX_FMAP)
>                                         fc->famfs_iomap = 1;
> -                               }
>                         }
>                 } else {
>                         ra_pages = fc->max_read / PAGE_SIZE;
> diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
> index 1b82895108be..d6e8dc75c8a1 100644
> --- a/include/uapi/linux/fuse.h
> +++ b/include/uapi/linux/fuse.h
> @@ -248,7 +248,6 @@
>   *    - struct fuse_famfs_iext
>   *    - struct fuse_famfs_fmap_header
>   *  - Add the following structs for the GET_DAXDEV message and reply
> - *    - struct fuse_get_daxdev_in
>   *    - struct fuse_get_daxdev_out
>   *  - Add the following enumerated types
>   *    - enum fuse_famfs_file_type
> @@ -1145,6 +1144,9 @@ struct fuse_notify_prune_out {
>         uint64_t        spare;
>  };
>
> +/* Backing is a dax device */
> +#define FUSE_BACKING_DAXDEV    (1 << 0)
> +
>  struct fuse_backing_map {
>         int32_t         fd;
>         uint32_t        flags;
> @@ -1373,10 +1375,6 @@ struct fuse_famfs_fmap_header {
>         uint64_t reserved1;
>  };
>
> -struct fuse_get_daxdev_in {
> -       uint32_t        daxdev_num;
> -};
> -
>  #define DAXDEV_NAME_MAX 256
>
>  /* fuse_daxdev_out has enough space for a uuid if we need it */
> --
> 2.54.0
>
>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] famfs: FUSE_GET_DAXDEV -> FUSE_DEV_IOC_BACKING_OPEN
  2026-05-31 17:36 ` Amir Goldstein
@ 2026-06-01  8:42   ` Miklos Szeredi
  0 siblings, 0 replies; 9+ messages in thread
From: Miklos Szeredi @ 2026-06-01  8:42 UTC (permalink / raw)
  To: Amir Goldstein
  Cc: Miklos Szeredi, John Groves, fuse-devel, linux-fsdevel,
	Darrick J. Wong

On Sun, 31 May 2026 at 19:36, Amir Goldstein <amir73il@gmail.com> wrote:

> Could possibly use fuse_backing_ops that Darrick used for iomap
> backing devices to generalize the backing id ioctls:
>
> https://lore.kernel.org/fuse-devel/177922945327.2436837.2738315299316875094.stgit@frogsfrogsfrogs/

Yeah, this is very much just the userspace API fix.  Cleaning up the
kAPI will happen but is lower priority.

Thanks,
Miklos

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] famfs: FUSE_GET_DAXDEV -> FUSE_DEV_IOC_BACKING_OPEN
  2026-05-29 13:43 [PATCH] famfs: FUSE_GET_DAXDEV -> FUSE_DEV_IOC_BACKING_OPEN Miklos Szeredi
  2026-05-31 17:36 ` Amir Goldstein
@ 2026-06-02  6:58 ` John Groves
  2026-06-02 13:01   ` Amir Goldstein
  2026-06-03  8:35   ` Miklos Szeredi
  2026-06-03 22:09 ` Joanne Koong
  2 siblings, 2 replies; 9+ messages in thread
From: John Groves @ 2026-06-02  6:58 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: fuse-devel, linux-fsdevel

On 26/05/29 03:43PM, Miklos Szeredi wrote:
> The difference is that now the server needs to look at the fmap and check
> if any new device needs registering, the device index is now allocated by
> the kernel and returned by the ioctl().
> 
> Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
> ---
> This is the first part of what we were talking about: using the backing
> file API for dax devices.
> 
> It's on top of the famfs-v10 patchset.
> 
> Totally untested, haven't even tried booting it.  Obviously needs
> nontrivial conversion in the server code as well.
> 
> Thanks,
> Miklos

Miklos, thanks for looking at this.

First, a scope check. This patch touches only the daxdev registration -
GET_FMAP, struct fuse_famfs_fmap_header, and the simple/interleave
extent structs in include/uapi/linux/fuse.h are untouched. I'm reading
that as tacit agreement to GET_FMAP and its ABI. Can you confirm? If so
the large piece is settled and this is really just about how a daxdev
gets registered.

Device indices - the part that has to be right
----------------------------------------------

Everything below turns on what a famfs device index *is*, so let me make
it explicit, because the patch's model of it (kernel-allocated) isn't
workable for famfs.

Famfs daxdevs are shared memory. The same physical memory is mapped by
famfs on multiple nodes, and on each node it most likely has a different
name and dev_t. What is invariant across nodes is the daxdev's UUID. So
a daxdev's definitive identity is its UUID, not a path or a dev_t.

The index is assigned by famfs userspace, as the order of appearance of
daxdevs in the famfs metadata log:
  - the primary daxdev (which holds the superblock) is always index 0;
  - additional daxdevs are introduced by ADD_DAXDEV log entries and take
    1..n in order of appearance.
  - ADD_DAXDEV log entries establish the UUID-to-index mapping
  - any file may have extents referencing any or all daxdevs defined
    previously in the log - by index

Two properties make the index a userspace-owned, cluster-invariant value
the kernel cannot allocate:

  1. Bootstrap. The famfs metadata log establishes the indices - each
     ADD_DAXDEV entry defines the next - and an index can then appear in
     any later file fmap. But the log is itself a file on index 0 (with
     the superblock), so index 0 must resolve before the kernel can even
     read the log. The numbering is fixed in the log before the kernel
     sees any of it, so the kernel can't be the allocator.
     (idr_alloc_cyclic never hands out 0 anyway, so a kernel-assigned
     primary couldn't even be index 0.)

  2. Cluster-invariance. fmaps live in the shared-memory log, read
     identically by every node. A given dev_index must denote the same
     daxdev (the same UUID) on every node, even though it most likely has
     a different name and dev_t on each node. A kernel-allocated id is
     node-local and mount-local - the wrong kind of value to live in a
     shared fmap.

So the index must be supplied by userspace (which owns the log and
resolves each index's UUID to a local device), and the kernel looks
devices up by that index. Resolving UUID -> local device is inherently
userspace's job; the kernel trusts that resolution and never needs the
UUID.

This is also why the hand-rolled daxdev table is the right structure, not
incidental cruft: a small self-contained array in famfs.c, keyed directly
by the dense zero-based index, closing the discovery race (valid flag +
wmb) and giving O(1) lookup. That's exactly what this index namespace
calls for.

Identity by reference, not by path - this part is right
-------------------------------------------------------

The genuinely better idea here is getting the kernel out of resolving a
device pathname: the daemon hands over an already-resolved reference
(you do it with an fd, from which the kernel takes inode->i_rdev)
instead of a string the kernel has to kern_path(). I want to adopt that
- credit where it's due.

The minimal way to realize it is to have GET_DAXDEV return a dev_t: the
kernel uses dax_dev_find() and kern_path()/may_open_dev() drop out
entirely. No ABI change - struct fuse_daxdev_out already carries the
index and has reserved space (sized with room for a uuid), so it's just
filling a reserved field, and the index is already first-class in the
reply.

The fd delivers that same dev_t (the kernel already reads it from
f_path), so BACKING_OPEN can realize the same idea - but the dev_t is
the part that's actually the improvement, and the daxdev reply ABI
already models (index, dev_t) while fuse_backing_map models neither. So
realizing it is cleaner without the fd than with it; the costs of going
through BACKING_OPEN are below.

On BACKING_OPEN
---------------

If you feel strongly about FUSE_DEV_IOC_BACKING_OPEN, I'll make it work
- but I want to be honest that for famfs it's a shared-almost-nothing
approach:

  - famfs needs its own handler regardless (the famfs branch of
    fuse_backing_open), and that handler can keep populating the famfs
    devlist exactly as today - so the only things actually shared are the
    ioctl entry and the {fd, flags} struct.

  - it forces 'select FUSE_PASSTHROUGH', pulling in file retention, I/O
    forwarding, refcounting and close - none of which famfs uses. Famfs
    maps device memory directly via iomap/dax_direct_access; there is no
    backing file and no famfs close path (fuse_backing_close already
    returns -EINVAL when famfs_iomap is set).

  - the ABI doesn't fit. struct fuse_backing_map is {int32_t fd; uint32_t
    flags; uint64_t padding} - it's fd-shaped, with no dev_t field and no
    index field. To use it for famfs I'd have to carry the device by fd
    (which forces the daemon to open() the device just to register it -
    the daemon otherwise never opens it) and smuggle the native index
    through padding. And the fd does no real work: in famfs_daxdev_open()
    it's borrowed only to read f_path -> devno, then dropped; exclusivity
    comes from fs_dax_get()'s holder, not the fd.

On access control, in case it's a concern behind the fd: the famfs path
is already gated on the server holding CAP_SYS_RAWIO at init -
FUSE_DAX_FMAP is only offered under capable(CAP_SYS_RAWIO) in
fuse_send_init(), and famfs_iomap (hence GET_FMAP/GET_DAXDEV) is enabled
only if that flag comes back. So the registering daemon is already
raw-IO-trusted; there's no access-control gap an fd's open would close
(may_open_dev() isn't a capability check anyway, just a mount-nodev test).

Net
---

I'm glad to (a) confirm we're aligned on GET_FMAP and its ABI, and (b)
switch GET_DAXDEV to return a dev_t instead of a path. I can't move index
allocation into the kernel - the index is a userspace-owned,
cluster-invariant value for the reasons above - and I'd keep the famfs
daxdev table where it is. Lazy discovery has to keep working either way:
only the primary daxdev is known at mount; the rest are discovered as the
famfs metadata log is replayed and first referenced by an fmap.

If you still want BACKING_OPEN after the above, say so and I'll make it
work, ABI overloads and all - but I think GET_DAXDEV + dev_t is the
smaller and better-fitting change.

<snip>

Thanks,
John


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] famfs: FUSE_GET_DAXDEV -> FUSE_DEV_IOC_BACKING_OPEN
  2026-06-02  6:58 ` John Groves
@ 2026-06-02 13:01   ` Amir Goldstein
  2026-06-03  8:35   ` Miklos Szeredi
  1 sibling, 0 replies; 9+ messages in thread
From: Amir Goldstein @ 2026-06-02 13:01 UTC (permalink / raw)
  To: John Groves; +Cc: Miklos Szeredi, fuse-devel, linux-fsdevel

On Tue, Jun 2, 2026 at 8:59 AM John Groves <john@groves.net> wrote:
>
> On 26/05/29 03:43PM, Miklos Szeredi wrote:
> > The difference is that now the server needs to look at the fmap and check
> > if any new device needs registering, the device index is now allocated by
> > the kernel and returned by the ioctl().
> >
> > Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
> > ---
> > This is the first part of what we were talking about: using the backing
> > file API for dax devices.
> >
> > It's on top of the famfs-v10 patchset.
> >
> > Totally untested, haven't even tried booting it.  Obviously needs
> > nontrivial conversion in the server code as well.
> >
> > Thanks,
> > Miklos
>
> Miklos, thanks for looking at this.
>
> First, a scope check. This patch touches only the daxdev registration -
> GET_FMAP, struct fuse_famfs_fmap_header, and the simple/interleave
> extent structs in include/uapi/linux/fuse.h are untouched. I'm reading
> that as tacit agreement to GET_FMAP and its ABI. Can you confirm? If so
> the large piece is settled and this is really just about how a daxdev
> gets registered.
>
> Device indices - the part that has to be right
> ----------------------------------------------
>
> Everything below turns on what a famfs device index *is*, so let me make
> it explicit, because the patch's model of it (kernel-allocated) isn't
> workable for famfs.
>
> Famfs daxdevs are shared memory. The same physical memory is mapped by
> famfs on multiple nodes, and on each node it most likely has a different
> name and dev_t. What is invariant across nodes is the daxdev's UUID. So
> a daxdev's definitive identity is its UUID, not a path or a dev_t.
>
> The index is assigned by famfs userspace, as the order of appearance of
> daxdevs in the famfs metadata log:
>   - the primary daxdev (which holds the superblock) is always index 0;
>   - additional daxdevs are introduced by ADD_DAXDEV log entries and take
>     1..n in order of appearance.
>   - ADD_DAXDEV log entries establish the UUID-to-index mapping
>   - any file may have extents referencing any or all daxdevs defined
>     previously in the log - by index
>
> Two properties make the index a userspace-owned, cluster-invariant value
> the kernel cannot allocate:
>
>   1. Bootstrap. The famfs metadata log establishes the indices - each
>      ADD_DAXDEV entry defines the next - and an index can then appear in
>      any later file fmap. But the log is itself a file on index 0 (with
>      the superblock), so index 0 must resolve before the kernel can even
>      read the log. The numbering is fixed in the log before the kernel
>      sees any of it, so the kernel can't be the allocator.
>      (idr_alloc_cyclic never hands out 0 anyway, so a kernel-assigned
>      primary couldn't even be index 0.)
>
>   2. Cluster-invariance. fmaps live in the shared-memory log, read
>      identically by every node. A given dev_index must denote the same
>      daxdev (the same UUID) on every node, even though it most likely has
>      a different name and dev_t on each node. A kernel-allocated id is
>      node-local and mount-local - the wrong kind of value to live in a
>      shared fmap.
>
> So the index must be supplied by userspace (which owns the log and
> resolves each index's UUID to a local device), and the kernel looks
> devices up by that index. Resolving UUID -> local device is inherently
> userspace's job; the kernel trusts that resolution and never needs the
> UUID.
>
> This is also why the hand-rolled daxdev table is the right structure, not
> incidental cruft: a small self-contained array in famfs.c, keyed directly
> by the dense zero-based index, closing the discovery race (valid flag +
> wmb) and giving O(1) lookup. That's exactly what this index namespace
> calls for.
>
> Identity by reference, not by path - this part is right
> -------------------------------------------------------
>
> The genuinely better idea here is getting the kernel out of resolving a
> device pathname: the daemon hands over an already-resolved reference
> (you do it with an fd, from which the kernel takes inode->i_rdev)
> instead of a string the kernel has to kern_path(). I want to adopt that
> - credit where it's due.
>
> The minimal way to realize it is to have GET_DAXDEV return a dev_t: the
> kernel uses dax_dev_find() and kern_path()/may_open_dev() drop out
> entirely. No ABI change - struct fuse_daxdev_out already carries the
> index and has reserved space (sized with room for a uuid), so it's just
> filling a reserved field, and the index is already first-class in the
> reply.
>
> The fd delivers that same dev_t (the kernel already reads it from
> f_path), so BACKING_OPEN can realize the same idea - but the dev_t is
> the part that's actually the improvement, and the daxdev reply ABI
> already models (index, dev_t) while fuse_backing_map models neither. So
> realizing it is cleaner without the fd than with it; the costs of going
> through BACKING_OPEN are below.
>
> On BACKING_OPEN
> ---------------
>
> If you feel strongly about FUSE_DEV_IOC_BACKING_OPEN, I'll make it work
> - but I want to be honest that for famfs it's a shared-almost-nothing
> approach:
>
>   - famfs needs its own handler regardless (the famfs branch of
>     fuse_backing_open), and that handler can keep populating the famfs
>     devlist exactly as today - so the only things actually shared are the
>     ioctl entry and the {fd, flags} struct.
>
>   - it forces 'select FUSE_PASSTHROUGH', pulling in file retention, I/O
>     forwarding, refcounting and close - none of which famfs uses. Famfs
>     maps device memory directly via iomap/dax_direct_access; there is no
>     backing file and no famfs close path (fuse_backing_close already
>     returns -EINVAL when famfs_iomap is set).
>
>   - the ABI doesn't fit. struct fuse_backing_map is {int32_t fd; uint32_t
>     flags; uint64_t padding} - it's fd-shaped, with no dev_t field and no
>     index field. To use it for famfs I'd have to carry the device by fd
>     (which forces the daemon to open() the device just to register it -
>     the daemon otherwise never opens it) and smuggle the native index
>     through padding. And the fd does no real work: in famfs_daxdev_open()
>     it's borrowed only to read f_path -> devno, then dropped; exclusivity
>     comes from fs_dax_get()'s holder, not the fd.
>
> On access control, in case it's a concern behind the fd: the famfs path
> is already gated on the server holding CAP_SYS_RAWIO at init -
> FUSE_DAX_FMAP is only offered under capable(CAP_SYS_RAWIO) in
> fuse_send_init(), and famfs_iomap (hence GET_FMAP/GET_DAXDEV) is enabled
> only if that flag comes back. So the registering daemon is already
> raw-IO-trusted; there's no access-control gap an fd's open would close
> (may_open_dev() isn't a capability check anyway, just a mount-nodev test).
>
> Net
> ---
>
> I'm glad to (a) confirm we're aligned on GET_FMAP and its ABI, and (b)
> switch GET_DAXDEV to return a dev_t instead of a path. I can't move index
> allocation into the kernel - the index is a userspace-owned,
> cluster-invariant value for the reasons above - and I'd keep the famfs
> daxdev table where it is. Lazy discovery has to keep working either way:
> only the primary daxdev is known at mount; the rest are discovered as the
> famfs metadata log is replayed and first referenced by an fmap.
>
> If you still want BACKING_OPEN after the above, say so and I'll make it
> work, ABI overloads and all - but I think GET_DAXDEV + dev_t is the
> smaller and better-fitting change.

John,

Allow me to be blunt, because I find your email counter productive.

I insist on BACKING_OPEN. Please stop going in circles.

Miklos is working hard, doing all the work for you, basically preparing famfs
for merge nowish and what I hear when reading in this email is a very long rant.

Maybe I am slow, but from this very long email, the only thing that I find
technically relevant is "the index is a userspace-owned".

All the arguments like "famfs does not need to open the device" and alike
are, unless I am missing something, completely irrelevant and counter
produtive to your goal of merging famfs.

Yes, you will need to make changes to userspace, please don't list those
as technical arguments for how the kernel should be implemented.

Letting userspace assign the backing id is trivial.
Can either create a new ioctl DAXDEV_OPEN or reuse existing fields
and flags in fuse_backing_map, so that passthrough server could also
assign its own ids if it wants to.

It's all about sharing the infrastructure, not the ioctl number.

Are there any other technical problems that you can point out
with the patches that Miklos posted (including FUSE_DEV_IOC_STRIPE_OPEN)?

Sorry for the tone of my email.
I hope that the good intention of working together towards an acceptable
way to merge famfs speaks louder than my ranting.

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] famfs: FUSE_GET_DAXDEV -> FUSE_DEV_IOC_BACKING_OPEN
  2026-06-02  6:58 ` John Groves
  2026-06-02 13:01   ` Amir Goldstein
@ 2026-06-03  8:35   ` Miklos Szeredi
  2026-06-15 16:20     ` John Groves
  1 sibling, 1 reply; 9+ messages in thread
From: Miklos Szeredi @ 2026-06-03  8:35 UTC (permalink / raw)
  To: John Groves; +Cc: Miklos Szeredi, fuse-devel, linux-fsdevel

On Tue, 2 Jun 2026 at 09:04, John Groves <john@groves.net> wrote:

> Device indices - the part that has to be right
> ----------------------------------------------
>
> Everything below turns on what a famfs device index *is*, so let me make
> it explicit, because the patch's model of it (kernel-allocated) isn't
> workable for famfs.

The backing ID is a temporary reference for the lifetime of the fuse
filesystem.  If there's a persistent device index the server just
needs to map the persistent index to the temporary backing ID.

AFAICS the only case where this matters is debug messages, otherwise
the kernel couldn't care less about whether the device index is
persistent or not.

Am I missing something?

> Identity by reference, not by path - this part is right
> -------------------------------------------------------
>
> The genuinely better idea here is getting the kernel out of resolving a
> device pathname: the daemon hands over an already-resolved reference
> (you do it with an fd, from which the kernel takes inode->i_rdev)
> instead of a string the kernel has to kern_path(). I want to adopt that
> - credit where it's due.

Name, dev_t  or fd are are all just ways to reference the same thing:
the underlying DAX device.  They are completely interchangable, so if
we have an interface that accepts one, there's no point in making it
accept the others.

dev_t -> name:
   readlink -f /dev/char/$dev

name -> dev_t:
   stat -c %Hr:%Lr $name

name -> fd
  fd = open(name, O_PATH);

fd -> dev_t
  fstat(fd, &st);

Thanks,
Miklos

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] famfs: FUSE_GET_DAXDEV -> FUSE_DEV_IOC_BACKING_OPEN
  2026-05-29 13:43 [PATCH] famfs: FUSE_GET_DAXDEV -> FUSE_DEV_IOC_BACKING_OPEN Miklos Szeredi
  2026-05-31 17:36 ` Amir Goldstein
  2026-06-02  6:58 ` John Groves
@ 2026-06-03 22:09 ` Joanne Koong
  2026-06-04 14:17   ` Miklos Szeredi
  2 siblings, 1 reply; 9+ messages in thread
From: Joanne Koong @ 2026-06-03 22:09 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: John Groves, fuse-devel, linux-fsdevel

On Fri, May 29, 2026 at 6:45 AM Miklos Szeredi <mszeredi@redhat.com> wrote:
>
> The difference is that now the server needs to look at the fmap and check
> if any new device needs registering, the device index is now allocated by
> the kernel and returned by the ioctl().
>
> Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
> ---
> This is the first part of what we were talking about: using the backing
> file API for dax devices.

Nice, this looks cleaner to me.

>
> It's on top of the famfs-v10 patchset.
>
> Totally untested, haven't even tried booting it.  Obviously needs
> nontrivial conversion in the server code as well.
>
> Thanks,
> Miklos
>
> fs/fuse/Kconfig           |   1 +
>  fs/fuse/backing.c         |  28 ++-
>  fs/fuse/famfs.c           | 346 ++++++++++----------------------------
>  fs/fuse/famfs_kfmap.h     |   2 -
>  fs/fuse/fuse_i.h          |  25 +--
>  fs/fuse/inode.c           |   7 +-
>  include/uapi/linux/fuse.h |   8 +-
>  7 files changed, 123 insertions(+), 294 deletions(-)
>
> diff --git a/fs/fuse/backing.c b/fs/fuse/backing.c
> index d95dfa48483f..e56b7f8d5d2b 100644
> --- a/fs/fuse/backing.c
> +++ b/fs/fuse/backing.c
> @@ -37,14 +37,14 @@ void fuse_backing_files_init(struct fuse_conn *fc)
>         idr_init(&fc->backing_files_map);
>  }
>
> -static int fuse_backing_id_alloc(struct fuse_conn *fc, struct fuse_backing *fb)
> +int fuse_backing_id_alloc(struct fuse_conn *fc, void *obj)

Would it be better to just have daxdev as a type of fuse_backing
instead of having daxdev be an entirely different type in
backing_files_map? From the struct definiton of famfs_daxdev, I see:

struct famfs_daxdev {
   bool error;
   bool dax_err;
   dev_t devno;
   struct dax_device *devp;
   char *name;
};

afaict, dax_err can be removed now (see comment below), name is only
used for diagnostic logging or could be found through
fb->file->f_path.dentry, and devno can be calculated/found from
fb->file (eg file_inode(fb->file)->i_rdev).

It seems like things would be a lot simpler if struct fuse_backing was
something like

struct fuse_backing {
        enum fuse_backing_type backing_type;
        struct file *file;
        bool error;

       union {
           const struct cred *cred;      /* FUSE_BACKING_FILE */
           struct dax_device *devp;   /*FUSE_BACKING_DAXDEV */
       };

        /* refcount */
        refcount_t count;
        struct rcu_head rcu;
};

where that would let the idr just be one type and would let us drop
the fc->famfs_iomap branches in
fuse_backing_{open/lookup/close/files_free} and the corresponding
famfs_{daxdev_lookup/daxdev_free/devidx_free/teardown} functions and
let daxdev piggyback off the refcount/rcu in fuse_backing. Otherwise I
think we will need to also add the recount and rcu to famfs_daxdev
separately once unregistering a daxdev backing is supported.

>  {
>         int id;
>
>         idr_preload(GFP_KERNEL);
>         spin_lock(&fc->lock);
>         /* FIXME: xarray might be space inefficient */
> -       id = idr_alloc_cyclic(&fc->backing_files_map, fb, 1, 0, GFP_ATOMIC);
> +       id = idr_alloc_cyclic(&fc->backing_files_map, obj, 1, 0, GFP_ATOMIC);
>         spin_unlock(&fc->lock);
>         idr_preload_end();
>
> @@ -75,7 +75,10 @@ static int fuse_backing_id_free(int id, void *p, void *data)
>
>  void fuse_backing_files_free(struct fuse_conn *fc)
>  {
> -       idr_for_each(&fc->backing_files_map, fuse_backing_id_free, NULL);
> +       if (fc->famfs_iomap)
> +               famfs_teardown(fc);
> +       else
> +               idr_for_each(&fc->backing_files_map, fuse_backing_id_free, NULL);
>         idr_destroy(&fc->backing_files_map);
>  }
>
> @@ -88,13 +91,23 @@ int fuse_backing_open(struct fuse_conn *fc, struct fuse_backing_map *map)
>
>         pr_debug("%s: fd=%d flags=0x%x\n", __func__, map->fd, map->flags);
>
> +       if (map->padding)
> +               return -EINVAL;
> +
> +       if (fc->famfs_iomap) {
> +               if (map->flags != FUSE_BACKING_DAXDEV)
> +                       return -EINVAL;
> +
> +               return famfs_daxdev_open(fc, map);
> +       }
> +
>         /* TODO: relax CAP_SYS_ADMIN once backing files are visible to lsof */
>         res = -EPERM;
>         if (!fc->passthrough || !capable(CAP_SYS_ADMIN))
>                 goto out;
>
>         res = -EINVAL;
> -       if (map->flags || map->padding)
> +       if (map->flags)
>                 goto out;
>
>         file = fget_raw(map->fd);
> @@ -144,6 +157,10 @@ int fuse_backing_close(struct fuse_conn *fc, int backing_id)
>
>         pr_debug("%s: backing_id=%d\n", __func__, backing_id);
>
> +       /* No close for dax device (yet) */
> +       if (fc->famfs_iomap)
> +               return -EINVAL;
> +
>         /* TODO: relax CAP_SYS_ADMIN once backing files are visible to lsof */
>         err = -EPERM;
>         if (!fc->passthrough || !capable(CAP_SYS_ADMIN))
> @@ -170,6 +187,9 @@ struct fuse_backing *fuse_backing_lookup(struct fuse_conn *fc, int backing_id)
>  {
>         struct fuse_backing *fb;
>
> +       if (fc->famfs_iomap)
> +               return NULL;
> +
>         rcu_read_lock();
>         fb = idr_find(&fc->backing_files_map, backing_id);
>         fb = fuse_backing_get(fb);
> diff --git a/fs/fuse/famfs.c b/fs/fuse/famfs.c
> index 121ed74e9727..c49362d19b36 100644
> --- a/fs/fuse/famfs.c
> +++ b/fs/fuse/famfs.c
> @@ -23,15 +23,15 @@
>  #include "fuse_i.h"
>
>  static void famfs_set_daxdev_err(
> -       struct fuse_conn *fc, struct dax_device *dax_devp);
> +       struct famfs_daxdev *dd, struct dax_device *dax_devp);
>
>  static int
>  famfs_dax_notify_failure(struct dax_device *dax_devp, u64 offset,
>                         u64 len, int mf_flags)
>  {
> -       struct fuse_conn *fc = dax_holder(dax_devp);
> +       struct famfs_daxdev *dd = dax_holder(dax_devp);
>
> -       famfs_set_daxdev_err(fc, dax_devp);
> +       famfs_set_daxdev_err(dd, dax_devp);
>
>         return 0;
>  }
> @@ -51,6 +51,26 @@ static const struct address_space_operations famfs_dax_aops = {
>
>  /*****************************************************************************/
>
> +static void famfs_daxdev_free(struct famfs_daxdev *dd)
> +{
> +       /* Only call fs_put_dax if fs_dax_get succeeded */
> +       if (dd->devp) {
> +               if (!dd->dax_err)
> +                       fs_put_dax(dd->devp, dd);
> +               put_dax(dd->devp);
> +       }
> +       kfree(dd->name);
> +       kfree(dd);
> +}
> +
> +DEFINE_FREE(famfs_daxdev_free, struct famfs_daxdev *, if (_T) famfs_daxdev_free(_T));
> +
> +static int famfs_devidx_free(int id, void *p, void *data)
> +{
> +       famfs_daxdev_free(p);
> +       return 0;
> +}
> +
>  /*
>   * famfs_teardown()
>   *
> @@ -59,263 +79,90 @@ static const struct address_space_operations famfs_dax_aops = {
>  void
>  famfs_teardown(struct fuse_conn *fc)
>  {
> -       struct famfs_dax_devlist *devlist __free(kfree) = fc->dax_devlist;
> -       int i;
> -
> -       fc->dax_devlist = NULL;
> -
> -       if (!devlist)
> -               return;
> -
> -       if (!devlist->devlist)
> -               return;
> -
> -       /* Close & release all the daxdevs in our table */
> -       for (i = 0; i < devlist->nslots; i++) {
> -               struct famfs_daxdev *dd = &devlist->devlist[i];
> -
> -               if (!dd->valid)
> -                       continue;
> -
> -               /* Only call fs_put_dax if fs_dax_get succeeded */
> -               if (dd->devp) {
> -                       if (!dd->dax_err)
> -                               fs_put_dax(dd->devp, fc);
> -                       put_dax(dd->devp);
> -               }
> -
> -               kfree(dd->name);
> -       }
> -       kfree(devlist->devlist);
> +       idr_for_each(&fc->backing_files_map, famfs_devidx_free,  NULL);
>  }
>
>  static int
> -famfs_verify_daxdev(const char *pathname, dev_t *devno)
> +famfs_verify_daxdev(const struct path *path, dev_t *devno)
>  {
> -       struct inode *inode;
> -       struct path path;
> -       int err;
> +       struct inode *inode = d_inode(path->dentry);
>
> -       if (!pathname || !*pathname)
> +       if (!S_ISCHR(inode->i_mode))
>                 return -EINVAL;
>
> -       err = kern_path(pathname, LOOKUP_FOLLOW, &path);
> -       if (err)
> -               return err;
> -
> -       inode = d_backing_inode(path.dentry);
> -       if (!S_ISCHR(inode->i_mode)) {
> -               err = -EINVAL;
> -               goto out_path_put;
> -       }
> -
> -       if (!may_open_dev(&path)) { /* had to export this */
> -               err = -EACCES;
> -               goto out_path_put;
> -       }
> +       if (!may_open_dev(path)) /* had to export this */
> +               return -EACCES;
>
>         *devno = inode->i_rdev;
>
> -out_path_put:
> -       path_put(&path);
> -       return err;
> +       return 0;
>  }
>
> -/**
> - * famfs_fuse_get_daxdev() - Retrieve info for a DAX device from fuse server
> - *
> - * Send a GET_DAXDEV message to the fuse server to retrieve info on a
> - * dax device.
> - *
> - * @fm:     fuse_mount
> - * @index:  the index of the dax device; daxdevs are referred to by index
> - *          in fmaps, and the server resolves the index to a particular daxdev
> - *
> - * Returns: 0=success
> - *          -errno=failure
> - */
> -static int
> -famfs_fuse_get_daxdev(struct fuse_mount *fm, const u64 index)
> +static int famfs_fuse_get_daxdev(const struct path *path, struct famfs_daxdev *dd)
>  {
> -       struct fuse_daxdev_out daxdev_out = { 0 };
> -       struct fuse_conn *fc = fm->fc;
> -       struct famfs_daxdev *daxdev;
>         int rc;
>
> -       FUSE_ARGS(args);
> -
> -       /* Store the daxdev in our table */
> -       if (index >= fc->dax_devlist->nslots) {
> -               pr_err("%s: index(%lld) > nslots(%d)\n",
> -                      __func__, index, fc->dax_devlist->nslots);
> -               return -EINVAL;
> -       }
> -
> -       args.opcode = FUSE_GET_DAXDEV;
> -       args.nodeid = index;
> -
> -       args.in_numargs = 0;
> -
> -       args.out_numargs = 1;
> -       args.out_args[0].size = sizeof(daxdev_out);
> -       args.out_args[0].value = &daxdev_out;
> -
> -       /* Send GET_DAXDEV command */
> -       rc = fuse_simple_request(fm, &args);
> +       /* Verify dev is valid and can be opened and gets the devno */
> +       rc = famfs_verify_daxdev(path, &dd->devno);
>         if (rc) {
> -               pr_err("%s: rc=%d from fuse_simple_request()\n",
> -                      __func__, rc);
> -               /* Error will be that the payload is smaller than FMAP_BUFSIZE,
> -                * which is the max we can handle. Empty payload handled below.
> -                */
> +               pr_err("%s: rc=%d from famfs_verify_daxdev()\n", __func__, rc);
>                 return rc;
>         }
>
> -       scoped_guard(rwsem_write, &fc->famfs_devlist_sem) {
> -               daxdev = &fc->dax_devlist->devlist[index];
> -
> -               /* Abort if daxdev is now valid (races are possible here) */
> -               if (daxdev->valid) {
> -                       pr_debug("%s: daxdev already known\n", __func__);
> -                       return 0;
> -               }
> -
> -               /* Verify dev is valid and can be opened and gets the devno */
> -               rc = famfs_verify_daxdev(daxdev_out.name, &daxdev->devno);
> -               if (rc) {
> -                       pr_err("%s: rc=%d from famfs_verify_daxdev()\n",
> -                              __func__, rc);
> -                       return rc;
> -               }
> -
> -               daxdev->name = kstrdup(daxdev_out.name, GFP_KERNEL);
> -               if (!daxdev->name)
> -                       return -ENOMEM;
> +       dd->name = kasprintf(GFP_KERNEL, "%pd4", path->dentry);
> +       if (!dd->name)
> +               return -ENOMEM;
>
> -               /* This will fail if it's not a dax device */
> -               daxdev->devp = dax_dev_get(daxdev->devno);
> -               if (!daxdev->devp) {
> -                       pr_warn("%s: device %s not found or not dax\n",
> -                               __func__, daxdev_out.name);
> -                       kfree(daxdev->name);
> -                       daxdev->name = NULL;
> +       /* This will fail if it's not a dax device */
> +       dd->devp = dax_dev_get(dd->devno);
> +       if (!dd->devp) {
> +               pr_warn("%s: device %s not found or not dax\n",__func__, dd->name);
> +                       kfree(dd->name);
> +                       dd->name = NULL;
>                         return -ENODEV;
>                 }
>
> -               rc = fs_dax_get(daxdev->devp, fc, &famfs_fuse_dax_holder_ops);
> -               if (rc) {
> -                       /* Mark as valid with dax_err to prevent retry loop.
> -                        * famfs_dax_err() will return -EIO on access attempts.
> -                        * Teardown handles this case: skips fs_put_dax, calls put_dax.
> -                        */
> -                       daxdev->dax_err = 1;
> -                       pr_err("%s: fs_dax_get(%lld) failed\n",
> -                              __func__, (u64)daxdev->devno);
> -               }
> -
> -               wmb(); /* All other fields must be visible before valid */
> -               daxdev->valid = 1;
> +       rc = fs_dax_get(dd->devp, dd, &famfs_fuse_dax_holder_ops);
> +       if (rc) {
> +               /* Mark as valid with dax_err to prevent retry loop.
> +                * famfs_dax_err() will return -EIO on access attempts.
> +                * Teardown handles this case: skips fs_put_dax, calls put_dax.
> +                */
> +               dd->dax_err = 1;

Seems like we could remove dax_err entirely now given that there's no
more retry loop anymore.

> +               pr_err("%s: fs_dax_get(%lld) failed\n", __func__, (u64)dd->devno);
>         }
>
>         return 0;

return rc;?
>  }
>
> +int famfs_daxdev_open(struct fuse_conn *fc, struct fuse_backing_map *map)
>  {
> -       struct famfs_dax_devlist *local_devlist;
> -       struct fuse_conn *fc = fm->fc;
> -       int indices_to_fetch[MAX_DAXDEVS];
> -       int n_to_fetch = 0;
> -       int err;
> -
> -       /* First time through we will need to allocate the dax_devlist */
> -       if (!fc->dax_devlist) {
> -               local_devlist = kcalloc(1, sizeof(*fc->dax_devlist), GFP_KERNEL);
> -               if (!local_devlist)
> -                       return -ENOMEM;
> -
> -               local_devlist->nslots = MAX_DAXDEVS;
> -
> -               local_devlist->devlist = kcalloc(MAX_DAXDEVS,
> -                                                sizeof(struct famfs_daxdev),
> -                                                GFP_KERNEL);
> -               if (!local_devlist->devlist) {
> -                       kfree(local_devlist);
> -                       return -ENOMEM;
> -               }
> -
> -               /* We don't need famfs_devlist_sem here because we use cmpxchg */
> -               if (cmpxchg(&fc->dax_devlist, NULL, local_devlist) != NULL) {
> -                       kfree(local_devlist->devlist);
> -                       kfree(local_devlist); /* another thread beat us to it */
> -               }
> -       }
> +       struct famfs_daxdev *dd __free(famfs_daxdev_free) = kzalloc_obj(*dd);
> +       int rc;
>
> -       /* Collect indices that need fetching while holding read lock */
> -       scoped_guard(rwsem_read, &fc->famfs_devlist_sem) {
> -               unsigned long i;
> +       CLASS(fd_raw, f)(map->fd);
> +       if (fd_empty(f))
> +               return -EBADF;
>
> -               for_each_set_bit(i, (unsigned long *)&meta->dev_bitmap, MAX_DAXDEVS) {
> -                       if (!(fc->dax_devlist->devlist[i].valid))
> -                               indices_to_fetch[n_to_fetch++] = i;
> -               }
> -       }
> +       rc = famfs_fuse_get_daxdev(&fd_file(f)->f_path, dd);

I think we need to check dd for null after allocation first before using.

> +       if (rc)
> +               return rc;
>
> -       /* Fetch needed daxdevs outside the read lock */
> -       for (int j = 0; j < n_to_fetch; j++) {
> -               err = famfs_fuse_get_daxdev(fm, indices_to_fetch[j]);
> -               if (err)
> -                       pr_err("%s: failed to get daxdev=%d\n",
> -                              __func__, indices_to_fetch[j]);
> -       }
> +       rc = fuse_backing_id_alloc(fc, dd);
> +       if (rc >= 0)
> +               retain_and_null_ptr(dd);
>
> -       return 0;
> +       return rc;
>  }
>
>  static void
>  famfs_set_daxdev_err(
> -       struct fuse_conn *fc,
> -       struct dax_device *dax_devp)
> +       struct famfs_daxdev *dd, struct dax_device *dax_devp)
>  {
> -       int i;
> -
> -       /* Gotta search the list by dax_devp;
> -        * read lock because we're not adding or removing daxdev entries
> -        */
> -       scoped_guard(rwsem_write, &fc->famfs_devlist_sem) {
> -               for (i = 0; i < fc->dax_devlist->nslots; i++) {
> -                       if (fc->dax_devlist->devlist[i].valid) {
> -                               struct famfs_daxdev *dd;
> -
> -                               dd = &fc->dax_devlist->devlist[i];
> -                               if (dd->devp != dax_devp)
> -                                       continue;
>
> -                               dd->error = true;
> +       dd->error = true;
>
> -                               pr_err("%s: memory error on daxdev %s (%d)\n",
> -                                      __func__, dd->name, i);
> -                               return;
> -                       }
> -               }
> -       }
> -       pr_err("%s: memory err on unrecognized daxdev\n", __func__);
> +       pr_err("%s: memory error on daxdev %s\n", __func__, dd->name);
>  }
>
>  /***************************************************************************/
> @@ -447,9 +294,6 @@ famfs_fuse_meta_alloc(
>                         meta->se[i].ext_offset = se_in[i].se_offset;
>                         meta->se[i].ext_len    = se_in[i].se_len;
>
> -                       /* Record bitmap of referenced daxdev indices */
> -                       meta->dev_bitmap |= (1 << meta->se[i].dev_index);
> -
>                         errs += famfs_check_ext_alignment(&meta->se[i]);
>
>                         extent_total += meta->se[i].ext_len;
> @@ -545,9 +389,6 @@ famfs_fuse_meta_alloc(
>                                 strips_out[j].ext_offset = offset;
>                                 strips_out[j].ext_len    = len;
>
> -                               /* Record bitmap of referenced daxdev indices */
> -                               meta->dev_bitmap |= (1 << devindex);
> -
>                                 extent_total += len;
>                                 errs += famfs_check_ext_alignment(&strips_out[j]);
>                                 size_remainder -= len;
> @@ -603,7 +444,7 @@ famfs_fuse_meta_alloc(
>   * Return: 0=success
>   *          -errno=failure
>   */
> -int
> +static int
>  famfs_file_init_dax(
>         struct fuse_mount *fm,
>         struct inode *inode,
> @@ -625,9 +466,6 @@ famfs_file_init_dax(
>         if (rc)
>                 goto errout;
>
> -       /* Make sure this fmap doesn't reference any unknown daxdevs */
> -       famfs_update_daxdev_table(fm, meta);
> -
>         /* Publish the famfs metadata on fi->famfs_meta */
>         inode_lock(inode);
>
> @@ -662,11 +500,6 @@ static int famfs_file_bad(struct inode *inode);
>
>  static int famfs_dax_err(struct famfs_daxdev *dd)
>  {
> -       if (!dd->valid) {
> -               pr_err("%s: daxdev=%s invalid\n",
> -                      __func__, dd->name);
> -               return -EIO;
> -       }
>         if (dd->dax_err) {
>                 pr_err("%s: daxdev=%s dax_err\n",
>                        __func__, dd->name);
> @@ -680,6 +513,18 @@ static int famfs_dax_err(struct famfs_daxdev *dd)
>         return 0;
>  }
>
> +static struct famfs_daxdev *
> +famfs_daxdev_lookup(struct fuse_conn *fc, int devidx)
> +{
> +       struct famfs_daxdev *dd;
> +
> +       rcu_read_lock();
> +       dd = idr_find(&fc->backing_files_map, devidx);
> +       rcu_read_unlock();
> +
> +       return dd;
> +}
> +
> diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
> index 1b82895108be..d6e8dc75c8a1 100644
> --- a/include/uapi/linux/fuse.h
> +++ b/include/uapi/linux/fuse.h
> @@ -248,7 +248,6 @@
>   *    - struct fuse_famfs_iext
>   *    - struct fuse_famfs_fmap_header
>   *  - Add the following structs for the GET_DAXDEV message and reply
> - *    - struct fuse_get_daxdev_in
>   *    - struct fuse_get_daxdev_out
>   *  - Add the following enumerated types
>   *    - enum fuse_famfs_file_type
> @@ -1145,6 +1144,9 @@ struct fuse_notify_prune_out {
>         uint64_t        spare;
>  };
>
> +/* Backing is a dax device */
> +#define FUSE_BACKING_DAXDEV    (1 << 0)
> +
>  struct fuse_backing_map {
>         int32_t         fd;
>         uint32_t        flags;
> @@ -1373,10 +1375,6 @@ struct fuse_famfs_fmap_header {
>         uint64_t reserved1;
>  };
>
> -struct fuse_get_daxdev_in {
> -       uint32_t        daxdev_num;
> -};
> -
>  #define DAXDEV_NAME_MAX 256
>
>  /* fuse_daxdev_out has enough space for a uuid if we need it */

Looks like enum FUSE_GET_DAXDEV, #define DAX_DEV_NAME_MAX, and struct
fuse_daxdev_out could get removed as well

Thanks,
Joanne


> --
> 2.54.0
>
>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] famfs: FUSE_GET_DAXDEV -> FUSE_DEV_IOC_BACKING_OPEN
  2026-06-03 22:09 ` Joanne Koong
@ 2026-06-04 14:17   ` Miklos Szeredi
  0 siblings, 0 replies; 9+ messages in thread
From: Miklos Szeredi @ 2026-06-04 14:17 UTC (permalink / raw)
  To: Joanne Koong; +Cc: Miklos Szeredi, John Groves, fuse-devel, linux-fsdevel

On Thu, 4 Jun 2026 at 00:13, Joanne Koong <joannelkoong@gmail.com> wrote:

> struct fuse_backing {
>         enum fuse_backing_type backing_type;
>         struct file *file;
>         bool error;
>
>        union {
>            const struct cred *cred;      /* FUSE_BACKING_FILE */
>            struct dax_device *devp;   /*FUSE_BACKING_DAXDEV */
>        };
>
>         /* refcount */
>         refcount_t count;
>         struct rcu_head rcu;
> };
>
> where that would let the idr just be one type and would let us drop
> the fc->famfs_iomap branches in
> fuse_backing_{open/lookup/close/files_free} and the corresponding
> famfs_{daxdev_lookup/daxdev_free/devidx_free/teardown} functions and
> let daxdev piggyback off the refcount/rcu in fuse_backing. Otherwise I
> think we will need to also add the recount and rcu to famfs_daxdev
> separately once unregistering a daxdev backing is supported.

Agree.  This is more of a PoC for the uAPI so John can experiment with
it.  Will clean up the kAPI once it gets some testing.

Thanks,
Miklos

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] famfs: FUSE_GET_DAXDEV -> FUSE_DEV_IOC_BACKING_OPEN
  2026-06-03  8:35   ` Miklos Szeredi
@ 2026-06-15 16:20     ` John Groves
  0 siblings, 0 replies; 9+ messages in thread
From: John Groves @ 2026-06-15 16:20 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: Miklos Szeredi, fuse-devel, linux-fsdevel

On 26/06/03 10:35AM, Miklos Szeredi wrote:
> On Tue, 2 Jun 2026 at 09:04, John Groves <john@groves.net> wrote:
> 
> > Device indices - the part that has to be right
> > ----------------------------------------------
> >
> > Everything below turns on what a famfs device index *is*, so let me make
> > it explicit, because the patch's model of it (kernel-allocated) isn't
> > workable for famfs.
> 
> The backing ID is a temporary reference for the lifetime of the fuse
> filesystem.  If there's a persistent device index the server just
> needs to map the persistent index to the temporary backing ID.
> 
> AFAICS the only case where this matters is debug messages, otherwise
> the kernel couldn't care less about whether the device index is
> persistent or not.
> 
> Am I missing something?
> 
> > Identity by reference, not by path - this part is right
> > -------------------------------------------------------
> >
> > The genuinely better idea here is getting the kernel out of resolving a
> > device pathname: the daemon hands over an already-resolved reference
> > (you do it with an fd, from which the kernel takes inode->i_rdev)
> > instead of a string the kernel has to kern_path(). I want to adopt that
> > - credit where it's due.
> 
> Name, dev_t  or fd are are all just ways to reference the same thing:
> the underlying DAX device.  They are completely interchangable, so if
> we have an interface that accepts one, there's no point in making it
> accept the others.
> 
> dev_t -> name:
>    readlink -f /dev/char/$dev
> 
> name -> dev_t:
>    stat -c %Hr:%Lr $name
> 
> name -> fd
>   fd = open(name, O_PATH);
> 
> fd -> dev_t
>   fstat(fd, &st);
> 
> Thanks,
> Miklos

Miklos,

Apologies for the slow response - I've been traveling and juggling the
other parts of my job. (If anyone here has only one job, don't tell me;
I'll just be jealous.)

I'm on vacation this week, but I plan to join the Thursday call -- from a
beach, signal permitting.

And starting next week I plan to take a real run at an implementation on
top of FUSE_DEV_IOC_BACKING_OPEN. So... beware ;)

Thanks,   
John


^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2026-06-15 16:20 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-05-29 13:43 [PATCH] famfs: FUSE_GET_DAXDEV -> FUSE_DEV_IOC_BACKING_OPEN Miklos Szeredi
2026-05-31 17:36 ` Amir Goldstein
2026-06-01  8:42   ` Miklos Szeredi
2026-06-02  6:58 ` John Groves
2026-06-02 13:01   ` Amir Goldstein
2026-06-03  8:35   ` Miklos Szeredi
2026-06-15 16:20     ` John Groves
2026-06-03 22:09 ` Joanne Koong
2026-06-04 14:17   ` Miklos Szeredi

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.