public inbox for linux-fsdevel@vger.kernel.org
 help / color / mirror / Atom feed
From: Christoph Hellwig <hch@lst.de>
To: "Darrick J. Wong" <djwong@kernel.org>
Cc: cem@kernel.org, linux-xfs@vger.kernel.org,
	linux-fsdevel@vger.kernel.org, hch@lst.de
Subject: Re: [PATCH 11/11] xfs: add media verification ioctl
Date: Mon, 19 Jan 2026 16:56:39 +0100	[thread overview]
Message-ID: <20260119155639.GA10822@lst.de> (raw)
In-Reply-To: <176852588776.2137143.7103003682733018282.stgit@frogsfrogsfrogs>

On Thu, Jan 15, 2026 at 09:44:53PM -0800, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
> 
> Add a new privileged ioctl so that xfs_scrub can ask the kernel to
> verify the media of the devices backing an xfs filesystem, and have any
> resulting media errors reported to fsnotify and xfs_healer.

I really wish this would explain the approach (reading data into a
kernel buffer, and the choices of the buffer size and I/O pattern)
and their rationale a bit better here.

> +
> +struct xfs_group_data_lost {
> +	xfs_agblock_t		startblock;
> +	xfs_extlen_t		blockcount;
> +};
> +
> +/* Report lost file data from rmap records */
> +STATIC int
> +xfs_verify_report_data_lost(
> +	struct xfs_btree_cur		*cur,
> +	const struct xfs_rmap_irec	*rec,
> +	void				*data)
> +{
> +	struct xfs_mount		*mp = cur->bc_mp;
> +	struct xfs_inode		*ip;
> +	struct xfs_group_data_lost	*lost = data;
> +	xfs_fileoff_t			fileoff = rec->rm_offset;
> +	xfs_extlen_t			blocks = rec->rm_blockcount;
> +	const bool			is_attr =
> +			(rec->rm_flags & XFS_RMAP_ATTR_FORK);
> +	const xfs_agblock_t		lost_end =
> +			lost->startblock + lost->blockcount;
> +	const xfs_agblock_t		rmap_end =
> +			rec->rm_startblock + rec->rm_blockcount;
> +	int				error = 0;
> +
> +	if (XFS_RMAP_NON_INODE_OWNER(rec->rm_owner))
> +	       return 0;
> +
> +	error = xfs_iget(mp, cur->bc_tp, rec->rm_owner, 0, 0, &ip);
> +	if (error)
> +		return 0;
> +
> +	if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK) {
> +		xfs_bmap_mark_sick(ip, is_attr ? XFS_ATTR_FORK : XFS_DATA_FORK);
> +		goto out_rele;
> +	}
> +
> +	if (is_attr) {
> +		xfs_inode_mark_sick(ip, XFS_SICK_INO_XATTR);
> +		goto out_rele;
> +	}
> +
> +	if (lost->startblock > rec->rm_startblock) {
> +		fileoff += lost->startblock - rec->rm_startblock;
> +		blocks -= lost->startblock - rec->rm_startblock;
> +	}
> +	if (rmap_end > lost_end)
> +		blocks -= rmap_end - lost_end;
> +
> +	fserror_report_data_lost(VFS_I(ip), XFS_FSB_TO_B(mp, fileoff),
> +			XFS_FSB_TO_B(mp, blocks), GFP_NOFS);
> +
> +out_rele:
> +	xfs_irele(ip);
> +	return 0;
> +}
> +
> +/* Walk reverse mappings to look for all file data loss */
> +STATIC int
> +xfs_verify_report_losses(
> +	struct xfs_mount	*mp,
> +	enum xfs_group_type	type,
> +	xfs_daddr_t		daddr,
> +	u64			bblen)
> +{
> +	struct xfs_group	*xg = NULL;
> +	struct xfs_trans	*tp;
> +	xfs_fsblock_t		start_bno, end_bno;
> +	uint32_t		start_gno, end_gno;
> +	int			error;
> +
> +	if (type == XG_TYPE_RTG) {
> +		start_bno = xfs_daddr_to_rtb(mp, daddr);
> +		end_bno = xfs_daddr_to_rtb(mp, daddr + bblen - 1);
> +	} else {
> +		start_bno = XFS_DADDR_TO_FSB(mp, daddr);
> +		end_bno = XFS_DADDR_TO_FSB(mp, daddr + bblen - 1);
> +	}
> +
> +	tp = xfs_trans_alloc_empty(mp);
> +	start_gno = xfs_fsb_to_gno(mp, start_bno, type);
> +	end_gno = xfs_fsb_to_gno(mp, end_bno, type);
> +	while ((xg = xfs_group_next_range(mp, xg, start_gno, end_gno, type))) {
> +		struct xfs_buf		*agf_bp = NULL;
> +		struct xfs_rtgroup	*rtg = NULL;
> +		struct xfs_btree_cur	*cur;
> +		struct xfs_rmap_irec	ri_low = { };
> +		struct xfs_rmap_irec	ri_high;
> +		struct xfs_group_data_lost lost;
> +
> +		if (type == XG_TYPE_AG) {
> +			struct xfs_perag	*pag = to_perag(xg);
> +
> +			error = xfs_alloc_read_agf(pag, tp, 0, &agf_bp);
> +			if (error) {
> +				xfs_perag_put(pag);
> +				break;
> +			}
> +
> +			cur = xfs_rmapbt_init_cursor(mp, tp, agf_bp, pag);
> +		} else {
> +			rtg = to_rtg(xg);
> +			xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP);
> +			cur = xfs_rtrmapbt_init_cursor(tp, rtg);
> +		}
> +
> +		/*
> +		 * Set the rmap range from ri_low to ri_high, which represents
> +		 * a [start, end] where we looking for the files or metadata.
> +		 */
> +		memset(&ri_high, 0xFF, sizeof(ri_high));
> +		if (xg->xg_gno == start_gno)
> +			ri_low.rm_startblock =
> +				xfs_fsb_to_gbno(mp, start_bno, type);
> +		if (xg->xg_gno == end_gno)
> +			ri_high.rm_startblock =
> +				xfs_fsb_to_gbno(mp, end_bno, type);
> +
> +		lost.startblock = ri_low.rm_startblock;
> +		lost.blockcount = min(xg->xg_block_count,
> +				      ri_high.rm_startblock + 1) -
> +							ri_low.rm_startblock;
> +
> +		error = xfs_rmap_query_range(cur, &ri_low, &ri_high,
> +				xfs_verify_report_data_lost, &lost);
> +		xfs_btree_del_cursor(cur, error);
> +		if (agf_bp)
> +			xfs_trans_brelse(tp, agf_bp);
> +		if (rtg)
> +			xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_RMAP);
> +		if (error) {
> +			xfs_group_put(xg);
> +			break;
> +		}
> +	}
> +
> +	xfs_trans_cancel(tp);
> +	return 0;
> +}
> +
> +/*
> + * Compute the desired verify IO size.
> + *
> + * To minimize command overhead, we'd like to create bios that are 1MB, though
> + * we allow the user to ask for a smaller size.
> + */
> +STATIC unsigned int
> +xfs_verify_iosize(
> +	const struct xfs_verify_media	*me,
> +	struct xfs_buftarg		*btp,
> +	uint64_t			bbcount)
> +{
> +	unsigned int			iosize =
> +			min_not_zero(SZ_1M, me->me_max_io_size);
> +
> +	BUILD_BUG_ON(BBSHIFT != SECTOR_SHIFT);
> +	ASSERT(BBTOB(bbcount) >= bdev_logical_block_size(btp->bt_bdev));
> +
> +	return clamp(iosize, bdev_logical_block_size(btp->bt_bdev),
> +			BBTOB(bbcount));
> +}

> +/* Allocate as much memory as we can get for verification buffer. */
> +STATIC struct folio *

Can we please retired STATIC already?

> +STATIC void
> +xfs_verify_media_error(
> +	struct xfs_mount	*mp,
> +	struct xfs_verify_media	*me,
> +	struct xfs_buftarg	*btp,
> +	xfs_daddr_t		daddr,
> +	unsigned int		bio_bbcount,
> +	blk_status_t		bio_status)
> +{
> +	trace_xfs_verify_media_error(mp, me, btp->bt_bdev->bd_dev, daddr,
> +			bio_bbcount, bio_status);
> +
> +	/*
> +	 * Pass any I/O error up to the caller if we didn't successfully verify
> +	 * any bytes at all.
> +	 */
> +	if (me->me_start_daddr == daddr)
> +		me->me_ioerror = -blk_status_to_errno(bio_status);
> +
> +	/*
> +	 * PI validation failures, medium errors, or general IO errors are
> +	 * treated as indicators of data loss.  Everything else are (hopefully)
> +	 * transient errors and are not reported.
> +	 */

But still left in me->me_ioerror.  Is that intentional?

> +	switch (me->me_dev) {
> +	case XFS_DEV_DATA:
> +		xfs_verify_report_losses(mp, XG_TYPE_AG, daddr, bio_bbcount);
> +		break;
> +	case XFS_DEV_RT:
> +		xfs_verify_report_losses(mp, XG_TYPE_RTG, daddr, bio_bbcount);
> +		break;
> +	}

At some point we really need dev_to_group_type and vice versa helpers.


  reply	other threads:[~2026-01-19 15:56 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-01-16  5:42 [PATCHSET v6] xfs: autonomous self healing of filesystems Darrick J. Wong
2026-01-16  5:42 ` [PATCH 01/11] docs: discuss autonomous self healing in the xfs online repair design doc Darrick J. Wong
2026-01-16  5:42 ` [PATCH 02/11] xfs: start creating infrastructure for health monitoring Darrick J. Wong
2026-02-06 13:07   ` Pankaj Raghav (Samsung)
2026-02-06 17:47     ` Darrick J. Wong
2026-02-06 18:54       ` Pankaj Raghav
2026-02-06 20:41         ` Darrick J. Wong
2026-02-09  6:34           ` Christoph Hellwig
2026-02-10  4:57             ` Darrick J. Wong
2026-01-16  5:42 ` [PATCH 03/11] xfs: create event queuing, formatting, and discovery infrastructure Darrick J. Wong
2026-01-16  5:43 ` [PATCH 04/11] xfs: convey filesystem unmount events to the health monitor Darrick J. Wong
2026-01-19 15:44   ` Christoph Hellwig
2026-01-16  5:43 ` [PATCH 05/11] xfs: convey metadata health " Darrick J. Wong
2026-01-16  5:43 ` [PATCH 06/11] xfs: convey filesystem shutdown " Darrick J. Wong
2026-01-19 15:44   ` Christoph Hellwig
2026-01-16  5:43 ` [PATCH 07/11] xfs: convey externally discovered fsdax media errors " Darrick J. Wong
2026-01-16  5:44 ` [PATCH 08/11] xfs: convey file I/O " Darrick J. Wong
2026-01-16  5:44 ` [PATCH 09/11] xfs: allow toggling verbose logging on the health monitoring file Darrick J. Wong
2026-01-16  5:44 ` [PATCH 10/11] xfs: check if an open file is on the health monitored fs Darrick J. Wong
2026-01-16  5:44 ` [PATCH 11/11] xfs: add media verification ioctl Darrick J. Wong
2026-01-19 15:56   ` Christoph Hellwig [this message]
2026-01-19 17:35     ` Darrick J. Wong
2026-01-20  4:12   ` [PATCH v6.1 " Darrick J. Wong
2026-01-20  7:18     ` Christoph Hellwig
2026-01-20 18:00       ` Darrick J. Wong
2026-01-21  7:05         ` Christoph Hellwig
2026-01-21 19:58           ` Darrick J. Wong
2026-02-06  3:01     ` Chris Mason
2026-02-06  4:53       ` Darrick J. Wong
  -- strict thread matches above, loose matches on Subject: below --
2026-01-21  6:34 [PATCHSET v7 1/3] xfs: autonomous self healing of filesystems Darrick J. Wong
2026-01-21  6:37 ` [PATCH 11/11] xfs: add media verification ioctl Darrick J. Wong
2026-01-13  0:32 [PATCHSET v5] xfs: autonomous self healing of filesystems Darrick J. Wong
2026-01-13  0:35 ` [PATCH 11/11] xfs: add media verification ioctl Darrick J. Wong
2026-01-13 15:57   ` Christoph Hellwig
2026-01-13 23:21     ` Darrick J. Wong
2026-01-14  5:40       ` Darrick J. Wong
2026-01-14  6:02       ` Christoph Hellwig
2026-01-14  6:07         ` Darrick J. Wong
2026-01-14  6:15           ` Christoph Hellwig
2026-01-14  6:19             ` Darrick J. Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260119155639.GA10822@lst.de \
    --to=hch@lst.de \
    --cc=cem@kernel.org \
    --cc=djwong@kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-xfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox