* [PATCH 02/11] xfs: start creating infrastructure for health monitoring
2026-01-21 6:34 [PATCHSET v7 1/3] xfs: autonomous self healing of filesystems Darrick J. Wong
@ 2026-01-21 6:35 ` Darrick J. Wong
0 siblings, 0 replies; 11+ messages in thread
From: Darrick J. Wong @ 2026-01-21 6:35 UTC (permalink / raw)
To: djwong, cem; +Cc: hch, linux-fsdevel, linux-xfs, hch
From: Darrick J. Wong <djwong@kernel.org>
Start creating helper functions and infrastructure to pass filesystem
health events to a health monitoring file. Since this is an
administrative interface, we only support a single health monitor
process per filesystem, so we don't need to use anything fancy such as
notifier chains (== tons of indirect calls).
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
fs/xfs/libxfs/xfs_fs.h | 7 +
fs/xfs/xfs_healthmon.h | 36 +++++++
fs/xfs/xfs_mount.h | 4 +
fs/xfs/Makefile | 1
fs/xfs/xfs_health.c | 1
fs/xfs/xfs_healthmon.c | 262 ++++++++++++++++++++++++++++++++++++++++++++++++
fs/xfs/xfs_ioctl.c | 4 +
fs/xfs/xfs_mount.c | 2
8 files changed, 317 insertions(+)
create mode 100644 fs/xfs/xfs_healthmon.h
create mode 100644 fs/xfs/xfs_healthmon.c
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index 12463ba766da05..c58e55b3df4099 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -1003,6 +1003,12 @@ struct xfs_rtgroup_geometry {
#define XFS_RTGROUP_GEOM_SICK_RMAPBT (1U << 3) /* reverse mappings */
#define XFS_RTGROUP_GEOM_SICK_REFCNTBT (1U << 4) /* reference counts */
+struct xfs_health_monitor {
+ __u64 flags; /* flags */
+ __u8 format; /* output format */
+ __u8 pad[23]; /* zeroes */
+};
+
/*
* ioctl commands that are used by Linux filesystems
*/
@@ -1042,6 +1048,7 @@ struct xfs_rtgroup_geometry {
#define XFS_IOC_GETPARENTS_BY_HANDLE _IOWR('X', 63, struct xfs_getparents_by_handle)
#define XFS_IOC_SCRUBV_METADATA _IOWR('X', 64, struct xfs_scrub_vec_head)
#define XFS_IOC_RTGROUP_GEOMETRY _IOWR('X', 65, struct xfs_rtgroup_geometry)
+#define XFS_IOC_HEALTH_MONITOR _IOW ('X', 68, struct xfs_health_monitor)
/*
* ioctl commands that replace IRIX syssgi()'s
diff --git a/fs/xfs/xfs_healthmon.h b/fs/xfs/xfs_healthmon.h
new file mode 100644
index 00000000000000..218d5aac87b012
--- /dev/null
+++ b/fs/xfs/xfs_healthmon.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2024-2026 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#ifndef __XFS_HEALTHMON_H__
+#define __XFS_HEALTHMON_H__
+
+struct xfs_healthmon {
+ /*
+ * Weak reference to the xfs filesystem that is being monitored. It
+ * will be set to zero when the filesystem detaches from the monitor.
+ * Do not dereference this pointer.
+ */
+ uintptr_t mount_cookie;
+
+ /*
+ * Device number of the filesystem being monitored. This is for
+ * consistent tracing even after unmount.
+ */
+ dev_t dev;
+
+ /*
+ * Reference count of this structure. The open healthmon fd holds one
+ * ref, the xfs_mount holds another ref if it points to this object,
+ * and running event handlers hold their own refs.
+ */
+ refcount_t ref;
+};
+
+void xfs_healthmon_unmount(struct xfs_mount *mp);
+
+long xfs_ioc_health_monitor(struct file *file,
+ struct xfs_health_monitor __user *arg);
+
+#endif /* __XFS_HEALTHMON_H__ */
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index b871dfde372b52..61c71128d171cb 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -13,6 +13,7 @@ struct xfs_ail;
struct xfs_quotainfo;
struct xfs_da_geometry;
struct xfs_perag;
+struct xfs_healthmon;
/* dynamic preallocation free space thresholds, 5% down to 1% */
enum {
@@ -342,6 +343,9 @@ typedef struct xfs_mount {
/* Hook to feed dirent updates to an active online repair. */
struct xfs_hooks m_dir_update_hooks;
+
+ /* Private data referring to a health monitor object. */
+ struct xfs_healthmon *m_healthmon;
} xfs_mount_t;
#define M_IGEO(mp) (&(mp)->m_ino_geo)
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 5bf501cf827172..1b7385e23b3463 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -88,6 +88,7 @@ xfs-y += xfs_aops.o \
xfs_globals.o \
xfs_handle.o \
xfs_health.o \
+ xfs_healthmon.o \
xfs_icache.o \
xfs_ioctl.o \
xfs_iomap.o \
diff --git a/fs/xfs/xfs_health.c b/fs/xfs/xfs_health.c
index fbb8886c72fe5e..3d50397f8f7c00 100644
--- a/fs/xfs/xfs_health.c
+++ b/fs/xfs/xfs_health.c
@@ -19,6 +19,7 @@
#include "xfs_da_btree.h"
#include "xfs_quota_defs.h"
#include "xfs_rtgroup.h"
+#include "xfs_healthmon.h"
#include <linux/fserror.h>
diff --git a/fs/xfs/xfs_healthmon.c b/fs/xfs/xfs_healthmon.c
new file mode 100644
index 00000000000000..b7095ea55897c5
--- /dev/null
+++ b/fs/xfs/xfs_healthmon.c
@@ -0,0 +1,262 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2024-2026 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_inode.h"
+#include "xfs_trace.h"
+#include "xfs_ag.h"
+#include "xfs_btree.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_quota_defs.h"
+#include "xfs_rtgroup.h"
+#include "xfs_healthmon.h"
+
+#include <linux/anon_inodes.h>
+#include <linux/eventpoll.h>
+#include <linux/poll.h>
+
+/*
+ * Live Health Monitoring
+ * ======================
+ *
+ * Autonomous self-healing of XFS filesystems requires a means for the kernel
+ * to send filesystem health events to a monitoring daemon in userspace. To
+ * accomplish this, we establish a thread_with_file kthread object to handle
+ * translating internal events about filesystem health into a format that can
+ * be parsed easily by userspace. When those internal events occur, the core
+ * filesystem code calls this health monitor to convey the events to userspace.
+ * Userspace reads events from the file descriptor returned by the ioctl.
+ *
+ * The healthmon abstraction has a weak reference to the host filesystem mount
+ * so that the queueing and processing of the events do not pin the mount and
+ * cannot slow down the main filesystem. The healthmon object can exist past
+ * the end of the filesystem mount.
+ */
+
+/* sign of a detached health monitor */
+#define DETACHED_MOUNT_COOKIE ((uintptr_t)0)
+
+/* spinlock for atomically updating xfs_mount <-> xfs_healthmon pointers */
+static DEFINE_SPINLOCK(xfs_healthmon_lock);
+
+/* Grab a reference to the healthmon object for a given mount, if any. */
+static struct xfs_healthmon *
+xfs_healthmon_get(
+ struct xfs_mount *mp)
+{
+ struct xfs_healthmon *hm;
+
+ rcu_read_lock();
+ hm = mp->m_healthmon;
+ if (hm && !refcount_inc_not_zero(&hm->ref))
+ hm = NULL;
+ rcu_read_unlock();
+
+ return hm;
+}
+
+/*
+ * Release the reference to a healthmon object. If there are no more holders,
+ * free the health monitor after an RCU grace period to eliminate possibility
+ * of races with xfs_healthmon_get.
+ */
+static void
+xfs_healthmon_put(
+ struct xfs_healthmon *hm)
+{
+ if (refcount_dec_and_test(&hm->ref))
+ kfree_rcu_mightsleep(hm);
+}
+
+/* Attach a health monitor to an xfs_mount. Only one allowed at a time. */
+STATIC int
+xfs_healthmon_attach(
+ struct xfs_mount *mp,
+ struct xfs_healthmon *hm)
+{
+ spin_lock(&xfs_healthmon_lock);
+ if (mp->m_healthmon != NULL) {
+ spin_unlock(&xfs_healthmon_lock);
+ return -EEXIST;
+ }
+
+ refcount_inc(&hm->ref);
+ mp->m_healthmon = hm;
+ hm->mount_cookie = (uintptr_t)mp->m_super;
+ spin_unlock(&xfs_healthmon_lock);
+
+ return 0;
+}
+
+/* Detach a xfs mount from a specific healthmon instance. */
+STATIC void
+xfs_healthmon_detach(
+ struct xfs_healthmon *hm)
+{
+ spin_lock(&xfs_healthmon_lock);
+ if (hm->mount_cookie == DETACHED_MOUNT_COOKIE) {
+ spin_unlock(&xfs_healthmon_lock);
+ return;
+ }
+
+ XFS_M((struct super_block *)hm->mount_cookie)->m_healthmon = NULL;
+ hm->mount_cookie = DETACHED_MOUNT_COOKIE;
+ spin_unlock(&xfs_healthmon_lock);
+
+ xfs_healthmon_put(hm);
+}
+
+/* Detach the xfs mount from this healthmon instance. */
+void
+xfs_healthmon_unmount(
+ struct xfs_mount *mp)
+{
+ struct xfs_healthmon *hm = xfs_healthmon_get(mp);
+
+ if (!hm)
+ return;
+
+ xfs_healthmon_detach(hm);
+ xfs_healthmon_put(hm);
+}
+
+STATIC ssize_t
+xfs_healthmon_read_iter(
+ struct kiocb *iocb,
+ struct iov_iter *to)
+{
+ return -EIO;
+}
+
+/* Free the health monitoring information. */
+STATIC int
+xfs_healthmon_release(
+ struct inode *inode,
+ struct file *file)
+{
+ struct xfs_healthmon *hm = file->private_data;
+
+ /*
+ * We might be closing the healthmon file before the filesystem
+ * unmounts, because userspace processes can terminate at any time and
+ * for any reason. Null out xfs_mount::m_healthmon so that another
+ * process can create another health monitor file.
+ */
+ xfs_healthmon_detach(hm);
+
+ xfs_healthmon_put(hm);
+ return 0;
+}
+
+/* Validate ioctl parameters. */
+static inline bool
+xfs_healthmon_validate(
+ const struct xfs_health_monitor *hmo)
+{
+ if (hmo->flags)
+ return false;
+ if (hmo->format)
+ return false;
+ if (memchr_inv(&hmo->pad, 0, sizeof(hmo->pad)))
+ return false;
+ return true;
+}
+
+/* Emit some data about the health monitoring fd. */
+static void
+xfs_healthmon_show_fdinfo(
+ struct seq_file *m,
+ struct file *file)
+{
+ struct xfs_healthmon *hm = file->private_data;
+
+ seq_printf(m, "state:\t%s\ndev:\t%d:%d\n",
+ hm->mount_cookie == DETACHED_MOUNT_COOKIE ?
+ "dead" : "alive",
+ MAJOR(hm->dev), MINOR(hm->dev));
+}
+
+static const struct file_operations xfs_healthmon_fops = {
+ .owner = THIS_MODULE,
+ .show_fdinfo = xfs_healthmon_show_fdinfo,
+ .read_iter = xfs_healthmon_read_iter,
+ .release = xfs_healthmon_release,
+};
+
+/*
+ * Create a health monitoring file. Returns an index to the fd table or a
+ * negative errno.
+ */
+long
+xfs_ioc_health_monitor(
+ struct file *file,
+ struct xfs_health_monitor __user *arg)
+{
+ struct xfs_health_monitor hmo;
+ struct xfs_healthmon *hm;
+ struct xfs_inode *ip = XFS_I(file_inode(file));
+ struct xfs_mount *mp = ip->i_mount;
+ int ret;
+
+ /*
+ * The only intended user of the health monitoring system should be the
+ * xfs_healer daemon running on behalf of the whole filesystem in the
+ * initial user namespace. IOWs, we don't allow unprivileged userspace
+ * (they can use fsnotify) nor do we allow containers.
+ */
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ if (ip->i_ino != mp->m_sb.sb_rootino)
+ return -EPERM;
+ if (current_user_ns() != &init_user_ns)
+ return -EPERM;
+
+ if (copy_from_user(&hmo, arg, sizeof(hmo)))
+ return -EFAULT;
+
+ if (!xfs_healthmon_validate(&hmo))
+ return -EINVAL;
+
+ hm = kzalloc(sizeof(*hm), GFP_KERNEL);
+ if (!hm)
+ return -ENOMEM;
+ hm->dev = mp->m_super->s_dev;
+ refcount_set(&hm->ref, 1);
+
+ /*
+ * Try to attach this health monitor to the xfs_mount. The monitor is
+ * considered live and will receive events if this succeeds.
+ */
+ ret = xfs_healthmon_attach(mp, hm);
+ if (ret)
+ goto out_hm;
+
+ /*
+ * Create the anonymous file and install a fd for it. If it succeeds,
+ * the file owns hm and can go away at any time, so we must not access
+ * it again. This must go last because we can't undo a fd table
+ * installation.
+ */
+ ret = anon_inode_getfd("xfs_healthmon", &xfs_healthmon_fops, hm,
+ O_CLOEXEC | O_RDONLY);
+ if (ret < 0)
+ goto out_mp;
+
+ return ret;
+
+out_mp:
+ xfs_healthmon_detach(hm);
+out_hm:
+ ASSERT(refcount_read(&hm->ref) == 1);
+ xfs_healthmon_put(hm);
+ return ret;
+}
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 59eaad77437181..c04c41ca924e37 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -41,6 +41,7 @@
#include "xfs_exchrange.h"
#include "xfs_handle.h"
#include "xfs_rtgroup.h"
+#include "xfs_healthmon.h"
#include <linux/mount.h>
#include <linux/fileattr.h>
@@ -1419,6 +1420,9 @@ xfs_file_ioctl(
case XFS_IOC_COMMIT_RANGE:
return xfs_ioc_commit_range(filp, arg);
+ case XFS_IOC_HEALTH_MONITOR:
+ return xfs_ioc_health_monitor(filp, arg);
+
default:
return -ENOTTY;
}
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 0953f6ae94abc8..ab67c91915384c 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -41,6 +41,7 @@
#include "xfs_rtrefcount_btree.h"
#include "scrub/stats.h"
#include "xfs_zone_alloc.h"
+#include "xfs_healthmon.h"
static DEFINE_MUTEX(xfs_uuid_table_mutex);
static int xfs_uuid_table_size;
@@ -625,6 +626,7 @@ xfs_unmount_flush_inodes(
cancel_delayed_work_sync(&mp->m_reclaim_work);
xfs_reclaim_inodes(mp);
xfs_health_unmount(mp);
+ xfs_healthmon_unmount(mp);
}
static void
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH v6.1 11/11] xfs: add media verification ioctl
[not found] ` <20260120180040.GU15551@frogsfrogsfrogs>
@ 2026-01-21 7:05 ` Christoph Hellwig
2026-01-21 19:58 ` Darrick J. Wong
0 siblings, 1 reply; 11+ messages in thread
From: Christoph Hellwig @ 2026-01-21 7:05 UTC (permalink / raw)
To: Darrick J. Wong; +Cc: Christoph Hellwig, cem, linux-xfs, linux-fsdevel
On Tue, Jan 20, 2026 at 10:00:40AM -0800, Darrick J. Wong wrote:
> On Tue, Jan 20, 2026 at 08:18:30AM +0100, Christoph Hellwig wrote:
> >
> > > + unsigned int bio_bbcount;
> > > + blk_status_t bio_status;
> > > +
> > > + bio_reset(bio, btp->bt_bdev, REQ_OP_READ);
> > > + bio->bi_iter.bi_sector = daddr;
> > > + bio_add_folio_nofail(bio, folio,
> > > + min(bbcount << SECTOR_SHIFT, folio_size(folio)),
> > > + 0);
> >
> > You could actually use bio_reuse as you implied in the previous mail here
> > and save the bio_add_folio_nofail call. Not really going to make much
> > of a difference, so:
>
> Hrm. Is that bio_reuse patch queued for upstream? Though maybe it'd be
> easier to make a mental note (ha!) to clean this up once both appear
> upstream.
It is queued up in the xfs for-next tree.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v6.1 11/11] xfs: add media verification ioctl
2026-01-21 7:05 ` [PATCH v6.1 11/11] xfs: add media verification ioctl Christoph Hellwig
@ 2026-01-21 19:58 ` Darrick J. Wong
0 siblings, 0 replies; 11+ messages in thread
From: Darrick J. Wong @ 2026-01-21 19:58 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: cem, linux-xfs, linux-fsdevel
On Wed, Jan 21, 2026 at 08:05:56AM +0100, Christoph Hellwig wrote:
> On Tue, Jan 20, 2026 at 10:00:40AM -0800, Darrick J. Wong wrote:
> > On Tue, Jan 20, 2026 at 08:18:30AM +0100, Christoph Hellwig wrote:
> > >
> > > > + unsigned int bio_bbcount;
> > > > + blk_status_t bio_status;
> > > > +
> > > > + bio_reset(bio, btp->bt_bdev, REQ_OP_READ);
> > > > + bio->bi_iter.bi_sector = daddr;
> > > > + bio_add_folio_nofail(bio, folio,
> > > > + min(bbcount << SECTOR_SHIFT, folio_size(folio)),
> > > > + 0);
> > >
> > > You could actually use bio_reuse as you implied in the previous mail here
> > > and save the bio_add_folio_nofail call. Not really going to make much
> > > of a difference, so:
> >
> > Hrm. Is that bio_reuse patch queued for upstream? Though maybe it'd be
> > easier to make a mental note (ha!) to clean this up once both appear
> > upstream.
>
> It is queued up in the xfs for-next tree.
Ah, heh. I'll see if cem merges the series atop his xfs-7.0-merge
branch and send a followup. As it is I'm already going to ask Linus if
I can remove the old fsnotify error function (and any new callers that
might pop up) right before -rc1.
--D
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v6.1 11/11] xfs: add media verification ioctl
[not found] ` <20260120041226.GJ15551@frogsfrogsfrogs>
[not found] ` <20260120071830.GA5686@lst.de>
@ 2026-02-06 3:01 ` Chris Mason
2026-02-06 4:53 ` Darrick J. Wong
1 sibling, 1 reply; 11+ messages in thread
From: Chris Mason @ 2026-02-06 3:01 UTC (permalink / raw)
To: Darrick J. Wong; +Cc: cem, hch, linux-xfs, linux-fsdevel
"Darrick J. Wong" <djwong@kernel.org> wrote:
> From: Darrick J. Wong <djwong@kernel.org>
>
> Add a new privileged ioctl so that xfs_scrub can ask the kernel to
> verify the media of the devices backing an xfs filesystem, and have any
> resulting media errors reported to fsnotify and xfs_healer.
Hi everyone,
I'm trying out my AI review prompts on a few more trees, and I ran it
on the fuse-iomap-cache branch:
https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux.git/log/?h=fuse-iomap-cache
This was the latest email lore could find, apologies if I grabbed the wrong one.
Hopefully the review below is useful, please let me know if there are places it
has gone wildly wrong. I kept the header to make it obvious exactly which
commit I'd run through.
(This is another XFS commit that ended up in my rev-list range, low confidence
in the actual review, but I'm curious if AI got this one right)
commit 2e5c40a5b6ca722202bc0e99830b6ecf8755d19c
Author: Darrick J. Wong <djwong@kernel.org>
xfs: add media verification ioctl
[ ... ]
> diff --git a/fs/xfs/xfs_verify_media.c b/fs/xfs/xfs_verify_media.c
> --- /dev/null
> +++ b/fs/xfs/xfs_verify_media.c
> @@ -0,0 +1,445 @@
[ ... ]
> +/* Walk reverse mappings to look for all file data loss */
> +static int
> +xfs_verify_report_losses(
> + struct xfs_mount *mp,
> + enum xfs_group_type type,
> + xfs_daddr_t daddr,
> + u64 bblen)
> +{
> + struct xfs_group *xg = NULL;
> + struct xfs_trans *tp;
> + xfs_fsblock_t start_bno, end_bno;
> + uint32_t start_gno, end_gno;
> + int error;
> +
> + if (type == XG_TYPE_RTG) {
> + start_bno = xfs_daddr_to_rtb(mp, daddr);
> + end_bno = xfs_daddr_to_rtb(mp, daddr + bblen - 1);
> + } else {
> + start_bno = XFS_DADDR_TO_FSB(mp, daddr);
> + end_bno = XFS_DADDR_TO_FSB(mp, daddr + bblen - 1);
> + }
> +
> + tp = xfs_trans_alloc_empty(mp);
> + start_gno = xfs_fsb_to_gno(mp, start_bno, type);
> + end_gno = xfs_fsb_to_gno(mp, end_bno, type);
> + while ((xg = xfs_group_next_range(mp, xg, start_gno, end_gno, type))) {
> + struct xfs_buf *agf_bp = NULL;
> + struct xfs_rtgroup *rtg = NULL;
> + struct xfs_btree_cur *cur;
> + struct xfs_rmap_irec ri_low = { };
> + struct xfs_rmap_irec ri_high;
> + struct xfs_group_data_lost lost;
> +
> + if (type == XG_TYPE_AG) {
> + struct xfs_perag *pag = to_perag(xg);
> +
> + error = xfs_alloc_read_agf(pag, tp, 0, &agf_bp);
> + if (error) {
> + xfs_perag_put(pag);
> + break;
> + }
> +
> + cur = xfs_rmapbt_init_cursor(mp, tp, agf_bp, pag);
> + } else {
> + rtg = to_rtg(xg);
> + xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP);
> + cur = xfs_rtrmapbt_init_cursor(tp, rtg);
> + }
Does this code have a reference counting mismatch in the error paths?
The xfs_group returned by xfs_group_next_range() is acquired via
xfs_group_grab() which increments xg_active_ref.
In the error path at line 120, the code uses xfs_perag_put() to release
the reference:
error = xfs_alloc_read_agf(pag, tp, 0, &agf_bp);
if (error) {
xfs_perag_put(pag);
break;
}
And later in the loop, line 144 uses xfs_group_put().
Both xfs_perag_put() and xfs_group_put() decrement xg_ref, not
xg_active_ref. Looking at the reference counting implementation:
xfs_group_grab() calls atomic_inc_not_zero(&xg->xg_active_ref)
xfs_group_rele() calls atomic_dec(&xg->xg_active_ref)
xfs_group_put() calls atomic_dec(&xg->xg_ref)
Should these error paths use xfs_group_rele() instead to match the
xfs_group_grab() in xfs_group_next_range()?
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v6.1 11/11] xfs: add media verification ioctl
2026-02-06 3:01 ` Chris Mason
@ 2026-02-06 4:53 ` Darrick J. Wong
0 siblings, 0 replies; 11+ messages in thread
From: Darrick J. Wong @ 2026-02-06 4:53 UTC (permalink / raw)
To: Chris Mason; +Cc: cem, hch, linux-xfs, linux-fsdevel
On Thu, Feb 05, 2026 at 07:01:32PM -0800, Chris Mason wrote:
> "Darrick J. Wong" <djwong@kernel.org> wrote:
> > From: Darrick J. Wong <djwong@kernel.org>
> >
> > Add a new privileged ioctl so that xfs_scrub can ask the kernel to
> > verify the media of the devices backing an xfs filesystem, and have any
> > resulting media errors reported to fsnotify and xfs_healer.
>
> Hi everyone,
>
> I'm trying out my AI review prompts on a few more trees, and I ran it
> on the fuse-iomap-cache branch:
>
> https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux.git/log/?h=fuse-iomap-cache
>
> This was the latest email lore could find, apologies if I grabbed the wrong one.
>
> Hopefully the review below is useful, please let me know if there are places it
> has gone wildly wrong. I kept the header to make it obvious exactly which
> commit I'd run through.
>
> (This is another XFS commit that ended up in my rev-list range, low confidence
> in the actual review, but I'm curious if AI got this one right)
>
> commit 2e5c40a5b6ca722202bc0e99830b6ecf8755d19c
> Author: Darrick J. Wong <djwong@kernel.org>
>
> xfs: add media verification ioctl
>
> [ ... ]
>
> > diff --git a/fs/xfs/xfs_verify_media.c b/fs/xfs/xfs_verify_media.c
> > --- /dev/null
> > +++ b/fs/xfs/xfs_verify_media.c
> > @@ -0,0 +1,445 @@
>
> [ ... ]
>
> > +/* Walk reverse mappings to look for all file data loss */
> > +static int
> > +xfs_verify_report_losses(
> > + struct xfs_mount *mp,
> > + enum xfs_group_type type,
> > + xfs_daddr_t daddr,
> > + u64 bblen)
> > +{
> > + struct xfs_group *xg = NULL;
> > + struct xfs_trans *tp;
> > + xfs_fsblock_t start_bno, end_bno;
> > + uint32_t start_gno, end_gno;
> > + int error;
> > +
> > + if (type == XG_TYPE_RTG) {
> > + start_bno = xfs_daddr_to_rtb(mp, daddr);
> > + end_bno = xfs_daddr_to_rtb(mp, daddr + bblen - 1);
> > + } else {
> > + start_bno = XFS_DADDR_TO_FSB(mp, daddr);
> > + end_bno = XFS_DADDR_TO_FSB(mp, daddr + bblen - 1);
> > + }
> > +
> > + tp = xfs_trans_alloc_empty(mp);
> > + start_gno = xfs_fsb_to_gno(mp, start_bno, type);
> > + end_gno = xfs_fsb_to_gno(mp, end_bno, type);
> > + while ((xg = xfs_group_next_range(mp, xg, start_gno, end_gno, type))) {
> > + struct xfs_buf *agf_bp = NULL;
> > + struct xfs_rtgroup *rtg = NULL;
> > + struct xfs_btree_cur *cur;
> > + struct xfs_rmap_irec ri_low = { };
> > + struct xfs_rmap_irec ri_high;
> > + struct xfs_group_data_lost lost;
> > +
> > + if (type == XG_TYPE_AG) {
> > + struct xfs_perag *pag = to_perag(xg);
> > +
> > + error = xfs_alloc_read_agf(pag, tp, 0, &agf_bp);
> > + if (error) {
> > + xfs_perag_put(pag);
> > + break;
> > + }
> > +
> > + cur = xfs_rmapbt_init_cursor(mp, tp, agf_bp, pag);
> > + } else {
> > + rtg = to_rtg(xg);
> > + xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP);
> > + cur = xfs_rtrmapbt_init_cursor(tp, rtg);
> > + }
>
> Does this code have a reference counting mismatch in the error paths?
>
> The xfs_group returned by xfs_group_next_range() is acquired via
> xfs_group_grab() which increments xg_active_ref.
>
> In the error path at line 120, the code uses xfs_perag_put() to release
> the reference:
>
> error = xfs_alloc_read_agf(pag, tp, 0, &agf_bp);
> if (error) {
> xfs_perag_put(pag);
> break;
> }
>
> And later in the loop, line 144 uses xfs_group_put().
>
> Both xfs_perag_put() and xfs_group_put() decrement xg_ref, not
> xg_active_ref. Looking at the reference counting implementation:
>
> xfs_group_grab() calls atomic_inc_not_zero(&xg->xg_active_ref)
> xfs_group_rele() calls atomic_dec(&xg->xg_active_ref)
> xfs_group_put() calls atomic_dec(&xg->xg_ref)
>
> Should these error paths use xfs_group_rele() instead to match the
> xfs_group_grab() in xfs_group_next_range()?
Yep. Wouldn't it be nice if our type system could keep those things
straight... :/
--D
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 02/11] xfs: start creating infrastructure for health monitoring
[not found] ` <176852588582.2137143.1283636639551788931.stgit@frogsfrogsfrogs>
@ 2026-02-06 13:07 ` Pankaj Raghav (Samsung)
2026-02-06 17:47 ` Darrick J. Wong
0 siblings, 1 reply; 11+ messages in thread
From: Pankaj Raghav (Samsung) @ 2026-02-06 13:07 UTC (permalink / raw)
To: Darrick J. Wong; +Cc: cem, hch, linux-xfs, linux-fsdevel, p.raghav
> +static DEFINE_SPINLOCK(xfs_healthmon_lock);
> +
> +/* Grab a reference to the healthmon object for a given mount, if any. */
> +static struct xfs_healthmon *
> +xfs_healthmon_get(
> + struct xfs_mount *mp)
> +{
> + struct xfs_healthmon *hm;
> +
> + rcu_read_lock();
> + hm = mp->m_healthmon;
Nit: Should we do a READ_ONCE(mp->m_healthmon) here to avoid any
compiler tricks that can result in an undefined behaviour? I am not sure
if I am being paranoid here.
> + if (hm && !refcount_inc_not_zero(&hm->ref))
> + hm = NULL;
> + rcu_read_unlock();
> +
> + return hm;
> +}
> +
> +/*
--
Pankaj
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 02/11] xfs: start creating infrastructure for health monitoring
2026-02-06 13:07 ` [PATCH 02/11] xfs: start creating infrastructure for health monitoring Pankaj Raghav (Samsung)
@ 2026-02-06 17:47 ` Darrick J. Wong
2026-02-06 18:54 ` Pankaj Raghav
0 siblings, 1 reply; 11+ messages in thread
From: Darrick J. Wong @ 2026-02-06 17:47 UTC (permalink / raw)
To: Pankaj Raghav (Samsung); +Cc: cem, hch, linux-xfs, linux-fsdevel, p.raghav
On Fri, Feb 06, 2026 at 02:07:56PM +0100, Pankaj Raghav (Samsung) wrote:
> > +static DEFINE_SPINLOCK(xfs_healthmon_lock);
> > +
> > +/* Grab a reference to the healthmon object for a given mount, if any. */
> > +static struct xfs_healthmon *
> > +xfs_healthmon_get(
> > + struct xfs_mount *mp)
> > +{
> > + struct xfs_healthmon *hm;
> > +
> > + rcu_read_lock();
> > + hm = mp->m_healthmon;
>
> Nit: Should we do a READ_ONCE(mp->m_healthmon) here to avoid any
> compiler tricks that can result in an undefined behaviour? I am not sure
> if I am being paranoid here.
Compiler tricks? We've taken the rcu read lock, which adds an
optimization barrier so that the mp->m_healthmon access can't be
reordered before the rcu_read_lock. I'm not sure if that answers your
question.
<confused>
--D
> > + if (hm && !refcount_inc_not_zero(&hm->ref))
> > + hm = NULL;
> > + rcu_read_unlock();
> > +
> > + return hm;
> > +}
> > +
> > +/*
> --
> Pankaj
>
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 02/11] xfs: start creating infrastructure for health monitoring
2026-02-06 17:47 ` Darrick J. Wong
@ 2026-02-06 18:54 ` Pankaj Raghav
2026-02-06 20:41 ` Darrick J. Wong
0 siblings, 1 reply; 11+ messages in thread
From: Pankaj Raghav @ 2026-02-06 18:54 UTC (permalink / raw)
To: Darrick J. Wong; +Cc: cem, hch, linux-xfs, linux-fsdevel, p.raghav
On 2/6/26 18:47, Darrick J. Wong wrote:
> On Fri, Feb 06, 2026 at 02:07:56PM +0100, Pankaj Raghav (Samsung) wrote:
>>> +static DEFINE_SPINLOCK(xfs_healthmon_lock);
>>> +
>>> +/* Grab a reference to the healthmon object for a given mount, if any. */
>>> +static struct xfs_healthmon *
>>> +xfs_healthmon_get(
>>> + struct xfs_mount *mp)
>>> +{
>>> + struct xfs_healthmon *hm;
>>> +
>>> + rcu_read_lock();
>>> + hm = mp->m_healthmon;
>>
>> Nit: Should we do a READ_ONCE(mp->m_healthmon) here to avoid any
>> compiler tricks that can result in an undefined behaviour? I am not sure
>> if I am being paranoid here.
>
> Compiler tricks? We've taken the rcu read lock, which adds an
> optimization barrier so that the mp->m_healthmon access can't be
> reordered before the rcu_read_lock. I'm not sure if that answers your
> question.
>
This answers. So this is my understanding: RCU guarantees that we get a valid
object (actual data of m_healthmon) but does not guarantee the compiler will not reread
the pointer between checking if hm is !NULL and accessing the pointer as we are doing it
lockless.
So just a barrier() call in rcu_read_lock is enough to make sure this doesn't happen and probably
adding a READ_ONCE() is not needed?
--
Pankaj
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 02/11] xfs: start creating infrastructure for health monitoring
2026-02-06 18:54 ` Pankaj Raghav
@ 2026-02-06 20:41 ` Darrick J. Wong
2026-02-09 6:34 ` Christoph Hellwig
0 siblings, 1 reply; 11+ messages in thread
From: Darrick J. Wong @ 2026-02-06 20:41 UTC (permalink / raw)
To: Pankaj Raghav; +Cc: cem, hch, linux-xfs, linux-fsdevel, p.raghav
On Fri, Feb 06, 2026 at 07:54:51PM +0100, Pankaj Raghav wrote:
>
>
> On 2/6/26 18:47, Darrick J. Wong wrote:
> > On Fri, Feb 06, 2026 at 02:07:56PM +0100, Pankaj Raghav (Samsung) wrote:
> >>> +static DEFINE_SPINLOCK(xfs_healthmon_lock);
> >>> +
> >>> +/* Grab a reference to the healthmon object for a given mount, if any. */
> >>> +static struct xfs_healthmon *
> >>> +xfs_healthmon_get(
> >>> + struct xfs_mount *mp)
> >>> +{
> >>> + struct xfs_healthmon *hm;
> >>> +
> >>> + rcu_read_lock();
> >>> + hm = mp->m_healthmon;
> >>
> >> Nit: Should we do a READ_ONCE(mp->m_healthmon) here to avoid any
> >> compiler tricks that can result in an undefined behaviour? I am not sure
> >> if I am being paranoid here.
> >
> > Compiler tricks? We've taken the rcu read lock, which adds an
> > optimization barrier so that the mp->m_healthmon access can't be
> > reordered before the rcu_read_lock. I'm not sure if that answers your
> > question.
> >
>
> This answers. So this is my understanding: RCU guarantees that we get
> a valid object (actual data of m_healthmon) but does not guarantee the
> compiler will not reread the pointer between checking if hm is !NULL
> and accessing the pointer as we are doing it lockless.
Oh, now I see what you're concerned about. You're worried that the
compiler could turn this:
if (hm && !refcount_inc_not_zero(&hm->ref))
into this:
if (mp->m_healthmon && !refcount_inc_not_zero(&mp->m_healthmon->ref))
which then gives xfs_healthmon_detach the opening it needs to slip in
between the two dereferences of mp and turn m_healthmon into NULL,
leading the "mp->m_healthmon->ref" expression to become a NULL pointer
dereference.
> So just a barrier() call in rcu_read_lock is enough to make sure this
> doesn't happen and probably adding a READ_ONCE() is not needed?
Nope. You're right, we do need READ_ONCE here.
--D
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 02/11] xfs: start creating infrastructure for health monitoring
2026-02-06 20:41 ` Darrick J. Wong
@ 2026-02-09 6:34 ` Christoph Hellwig
2026-02-10 4:57 ` Darrick J. Wong
0 siblings, 1 reply; 11+ messages in thread
From: Christoph Hellwig @ 2026-02-09 6:34 UTC (permalink / raw)
To: Darrick J. Wong
Cc: Pankaj Raghav, cem, hch, linux-xfs, linux-fsdevel, p.raghav
On Fri, Feb 06, 2026 at 12:41:35PM -0800, Darrick J. Wong wrote:
> > So just a barrier() call in rcu_read_lock is enough to make sure this
> > doesn't happen and probably adding a READ_ONCE() is not needed?
>
> Nope. You're right, we do need READ_ONCE here.
The right thing is to use rcu_dereference / rcu_assign_pointer and add a
__rcu annotation to m_healthmon.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 02/11] xfs: start creating infrastructure for health monitoring
2026-02-09 6:34 ` Christoph Hellwig
@ 2026-02-10 4:57 ` Darrick J. Wong
0 siblings, 0 replies; 11+ messages in thread
From: Darrick J. Wong @ 2026-02-10 4:57 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: Pankaj Raghav, cem, linux-xfs, linux-fsdevel, p.raghav
On Mon, Feb 09, 2026 at 07:34:21AM +0100, Christoph Hellwig wrote:
> On Fri, Feb 06, 2026 at 12:41:35PM -0800, Darrick J. Wong wrote:
> > > So just a barrier() call in rcu_read_lock is enough to make sure this
> > > doesn't happen and probably adding a READ_ONCE() is not needed?
> >
> > Nope. You're right, we do need READ_ONCE here.
>
> The right thing is to use rcu_dereference / rcu_assign_pointer and add a
> __rcu annotation to m_healthmon.
Noted, will change my fixpatch to do that. Thanks for the pointer!
--D
^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2026-02-10 4:57 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <176852588473.2137143.1604994842772101197.stgit@frogsfrogsfrogs>
[not found] ` <176852588776.2137143.7103003682733018282.stgit@frogsfrogsfrogs>
[not found] ` <20260120041226.GJ15551@frogsfrogsfrogs>
[not found] ` <20260120071830.GA5686@lst.de>
[not found] ` <20260120180040.GU15551@frogsfrogsfrogs>
2026-01-21 7:05 ` [PATCH v6.1 11/11] xfs: add media verification ioctl Christoph Hellwig
2026-01-21 19:58 ` Darrick J. Wong
2026-02-06 3:01 ` Chris Mason
2026-02-06 4:53 ` Darrick J. Wong
[not found] ` <176852588582.2137143.1283636639551788931.stgit@frogsfrogsfrogs>
2026-02-06 13:07 ` [PATCH 02/11] xfs: start creating infrastructure for health monitoring Pankaj Raghav (Samsung)
2026-02-06 17:47 ` Darrick J. Wong
2026-02-06 18:54 ` Pankaj Raghav
2026-02-06 20:41 ` Darrick J. Wong
2026-02-09 6:34 ` Christoph Hellwig
2026-02-10 4:57 ` Darrick J. Wong
2026-01-21 6:34 [PATCHSET v7 1/3] xfs: autonomous self healing of filesystems Darrick J. Wong
2026-01-21 6:35 ` [PATCH 02/11] xfs: start creating infrastructure for health monitoring Darrick J. Wong
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox