From: Christoph Hellwig <hch@lst.de>
To: "Darrick J. Wong" <djwong@kernel.org>
Cc: cem@kernel.org, linux-xfs@vger.kernel.org,
linux-fsdevel@vger.kernel.org, hch@lst.de
Subject: Re: [PATCH 11/11] xfs: add media verification ioctl
Date: Mon, 19 Jan 2026 16:56:39 +0100 [thread overview]
Message-ID: <20260119155639.GA10822@lst.de> (raw)
In-Reply-To: <176852588776.2137143.7103003682733018282.stgit@frogsfrogsfrogs>
On Thu, Jan 15, 2026 at 09:44:53PM -0800, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
>
> Add a new privileged ioctl so that xfs_scrub can ask the kernel to
> verify the media of the devices backing an xfs filesystem, and have any
> resulting media errors reported to fsnotify and xfs_healer.
I really wish this would explain the approach (reading data into a
kernel buffer, and the choices of the buffer size and I/O pattern)
and their rationale a bit better here.
> +
> +struct xfs_group_data_lost {
> + xfs_agblock_t startblock;
> + xfs_extlen_t blockcount;
> +};
> +
> +/* Report lost file data from rmap records */
> +STATIC int
> +xfs_verify_report_data_lost(
> + struct xfs_btree_cur *cur,
> + const struct xfs_rmap_irec *rec,
> + void *data)
> +{
> + struct xfs_mount *mp = cur->bc_mp;
> + struct xfs_inode *ip;
> + struct xfs_group_data_lost *lost = data;
> + xfs_fileoff_t fileoff = rec->rm_offset;
> + xfs_extlen_t blocks = rec->rm_blockcount;
> + const bool is_attr =
> + (rec->rm_flags & XFS_RMAP_ATTR_FORK);
> + const xfs_agblock_t lost_end =
> + lost->startblock + lost->blockcount;
> + const xfs_agblock_t rmap_end =
> + rec->rm_startblock + rec->rm_blockcount;
> + int error = 0;
> +
> + if (XFS_RMAP_NON_INODE_OWNER(rec->rm_owner))
> + return 0;
> +
> + error = xfs_iget(mp, cur->bc_tp, rec->rm_owner, 0, 0, &ip);
> + if (error)
> + return 0;
> +
> + if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK) {
> + xfs_bmap_mark_sick(ip, is_attr ? XFS_ATTR_FORK : XFS_DATA_FORK);
> + goto out_rele;
> + }
> +
> + if (is_attr) {
> + xfs_inode_mark_sick(ip, XFS_SICK_INO_XATTR);
> + goto out_rele;
> + }
> +
> + if (lost->startblock > rec->rm_startblock) {
> + fileoff += lost->startblock - rec->rm_startblock;
> + blocks -= lost->startblock - rec->rm_startblock;
> + }
> + if (rmap_end > lost_end)
> + blocks -= rmap_end - lost_end;
> +
> + fserror_report_data_lost(VFS_I(ip), XFS_FSB_TO_B(mp, fileoff),
> + XFS_FSB_TO_B(mp, blocks), GFP_NOFS);
> +
> +out_rele:
> + xfs_irele(ip);
> + return 0;
> +}
> +
> +/* Walk reverse mappings to look for all file data loss */
> +STATIC int
> +xfs_verify_report_losses(
> + struct xfs_mount *mp,
> + enum xfs_group_type type,
> + xfs_daddr_t daddr,
> + u64 bblen)
> +{
> + struct xfs_group *xg = NULL;
> + struct xfs_trans *tp;
> + xfs_fsblock_t start_bno, end_bno;
> + uint32_t start_gno, end_gno;
> + int error;
> +
> + if (type == XG_TYPE_RTG) {
> + start_bno = xfs_daddr_to_rtb(mp, daddr);
> + end_bno = xfs_daddr_to_rtb(mp, daddr + bblen - 1);
> + } else {
> + start_bno = XFS_DADDR_TO_FSB(mp, daddr);
> + end_bno = XFS_DADDR_TO_FSB(mp, daddr + bblen - 1);
> + }
> +
> + tp = xfs_trans_alloc_empty(mp);
> + start_gno = xfs_fsb_to_gno(mp, start_bno, type);
> + end_gno = xfs_fsb_to_gno(mp, end_bno, type);
> + while ((xg = xfs_group_next_range(mp, xg, start_gno, end_gno, type))) {
> + struct xfs_buf *agf_bp = NULL;
> + struct xfs_rtgroup *rtg = NULL;
> + struct xfs_btree_cur *cur;
> + struct xfs_rmap_irec ri_low = { };
> + struct xfs_rmap_irec ri_high;
> + struct xfs_group_data_lost lost;
> +
> + if (type == XG_TYPE_AG) {
> + struct xfs_perag *pag = to_perag(xg);
> +
> + error = xfs_alloc_read_agf(pag, tp, 0, &agf_bp);
> + if (error) {
> + xfs_perag_put(pag);
> + break;
> + }
> +
> + cur = xfs_rmapbt_init_cursor(mp, tp, agf_bp, pag);
> + } else {
> + rtg = to_rtg(xg);
> + xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP);
> + cur = xfs_rtrmapbt_init_cursor(tp, rtg);
> + }
> +
> + /*
> + * Set the rmap range from ri_low to ri_high, which represents
> + * a [start, end] where we looking for the files or metadata.
> + */
> + memset(&ri_high, 0xFF, sizeof(ri_high));
> + if (xg->xg_gno == start_gno)
> + ri_low.rm_startblock =
> + xfs_fsb_to_gbno(mp, start_bno, type);
> + if (xg->xg_gno == end_gno)
> + ri_high.rm_startblock =
> + xfs_fsb_to_gbno(mp, end_bno, type);
> +
> + lost.startblock = ri_low.rm_startblock;
> + lost.blockcount = min(xg->xg_block_count,
> + ri_high.rm_startblock + 1) -
> + ri_low.rm_startblock;
> +
> + error = xfs_rmap_query_range(cur, &ri_low, &ri_high,
> + xfs_verify_report_data_lost, &lost);
> + xfs_btree_del_cursor(cur, error);
> + if (agf_bp)
> + xfs_trans_brelse(tp, agf_bp);
> + if (rtg)
> + xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_RMAP);
> + if (error) {
> + xfs_group_put(xg);
> + break;
> + }
> + }
> +
> + xfs_trans_cancel(tp);
> + return 0;
> +}
> +
> +/*
> + * Compute the desired verify IO size.
> + *
> + * To minimize command overhead, we'd like to create bios that are 1MB, though
> + * we allow the user to ask for a smaller size.
> + */
> +STATIC unsigned int
> +xfs_verify_iosize(
> + const struct xfs_verify_media *me,
> + struct xfs_buftarg *btp,
> + uint64_t bbcount)
> +{
> + unsigned int iosize =
> + min_not_zero(SZ_1M, me->me_max_io_size);
> +
> + BUILD_BUG_ON(BBSHIFT != SECTOR_SHIFT);
> + ASSERT(BBTOB(bbcount) >= bdev_logical_block_size(btp->bt_bdev));
> +
> + return clamp(iosize, bdev_logical_block_size(btp->bt_bdev),
> + BBTOB(bbcount));
> +}
> +/* Allocate as much memory as we can get for verification buffer. */
> +STATIC struct folio *
Can we please retired STATIC already?
> +STATIC void
> +xfs_verify_media_error(
> + struct xfs_mount *mp,
> + struct xfs_verify_media *me,
> + struct xfs_buftarg *btp,
> + xfs_daddr_t daddr,
> + unsigned int bio_bbcount,
> + blk_status_t bio_status)
> +{
> + trace_xfs_verify_media_error(mp, me, btp->bt_bdev->bd_dev, daddr,
> + bio_bbcount, bio_status);
> +
> + /*
> + * Pass any I/O error up to the caller if we didn't successfully verify
> + * any bytes at all.
> + */
> + if (me->me_start_daddr == daddr)
> + me->me_ioerror = -blk_status_to_errno(bio_status);
> +
> + /*
> + * PI validation failures, medium errors, or general IO errors are
> + * treated as indicators of data loss. Everything else are (hopefully)
> + * transient errors and are not reported.
> + */
But still left in me->me_ioerror. Is that intentional?
> + switch (me->me_dev) {
> + case XFS_DEV_DATA:
> + xfs_verify_report_losses(mp, XG_TYPE_AG, daddr, bio_bbcount);
> + break;
> + case XFS_DEV_RT:
> + xfs_verify_report_losses(mp, XG_TYPE_RTG, daddr, bio_bbcount);
> + break;
> + }
At some point we really need dev_to_group_type and vice versa helpers.
next prev parent reply other threads:[~2026-01-19 15:56 UTC|newest]
Thread overview: 38+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-01-16 5:42 [PATCHSET v6] xfs: autonomous self healing of filesystems Darrick J. Wong
2026-01-16 5:42 ` [PATCH 01/11] docs: discuss autonomous self healing in the xfs online repair design doc Darrick J. Wong
2026-01-16 5:42 ` [PATCH 02/11] xfs: start creating infrastructure for health monitoring Darrick J. Wong
2026-02-06 13:07 ` Pankaj Raghav (Samsung)
2026-02-06 17:47 ` Darrick J. Wong
2026-02-06 18:54 ` Pankaj Raghav
2026-02-06 20:41 ` Darrick J. Wong
2026-02-09 6:34 ` Christoph Hellwig
2026-02-10 4:57 ` Darrick J. Wong
2026-01-16 5:42 ` [PATCH 03/11] xfs: create event queuing, formatting, and discovery infrastructure Darrick J. Wong
2026-01-16 5:43 ` [PATCH 04/11] xfs: convey filesystem unmount events to the health monitor Darrick J. Wong
2026-01-19 15:44 ` Christoph Hellwig
2026-01-16 5:43 ` [PATCH 05/11] xfs: convey metadata health " Darrick J. Wong
2026-01-16 5:43 ` [PATCH 06/11] xfs: convey filesystem shutdown " Darrick J. Wong
2026-01-19 15:44 ` Christoph Hellwig
2026-01-16 5:43 ` [PATCH 07/11] xfs: convey externally discovered fsdax media errors " Darrick J. Wong
2026-01-16 5:44 ` [PATCH 08/11] xfs: convey file I/O " Darrick J. Wong
2026-01-16 5:44 ` [PATCH 09/11] xfs: allow toggling verbose logging on the health monitoring file Darrick J. Wong
2026-01-16 5:44 ` [PATCH 10/11] xfs: check if an open file is on the health monitored fs Darrick J. Wong
2026-01-16 5:44 ` [PATCH 11/11] xfs: add media verification ioctl Darrick J. Wong
2026-01-19 15:56 ` Christoph Hellwig [this message]
2026-01-19 17:35 ` Darrick J. Wong
2026-01-20 4:12 ` [PATCH v6.1 " Darrick J. Wong
2026-01-20 7:18 ` Christoph Hellwig
2026-01-20 18:00 ` Darrick J. Wong
2026-01-21 7:05 ` Christoph Hellwig
2026-01-21 19:58 ` Darrick J. Wong
2026-02-06 3:01 ` Chris Mason
2026-02-06 4:53 ` Darrick J. Wong
-- strict thread matches above, loose matches on Subject: below --
2026-01-21 6:34 [PATCHSET v7 1/3] xfs: autonomous self healing of filesystems Darrick J. Wong
2026-01-21 6:37 ` [PATCH 11/11] xfs: add media verification ioctl Darrick J. Wong
2026-01-13 0:32 [PATCHSET v5] xfs: autonomous self healing of filesystems Darrick J. Wong
2026-01-13 0:35 ` [PATCH 11/11] xfs: add media verification ioctl Darrick J. Wong
2026-01-13 15:57 ` Christoph Hellwig
2026-01-13 23:21 ` Darrick J. Wong
2026-01-14 5:40 ` Darrick J. Wong
2026-01-14 6:02 ` Christoph Hellwig
2026-01-14 6:07 ` Darrick J. Wong
2026-01-14 6:15 ` Christoph Hellwig
2026-01-14 6:19 ` Darrick J. Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260119155639.GA10822@lst.de \
--to=hch@lst.de \
--cc=cem@kernel.org \
--cc=djwong@kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-xfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox