linux-xfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 5/9] repair: detect CRC errors in AG headers
  2014-04-15  8:24 [PATCH 0/9] xfs_db, xfs_repair: improve CRC error detection Dave Chinner
@ 2014-04-15  8:24 ` Dave Chinner
  2014-04-15 19:40   ` Brian Foster
  2014-04-21  7:11   ` Christoph Hellwig
  0 siblings, 2 replies; 28+ messages in thread
From: Dave Chinner @ 2014-04-15  8:24 UTC (permalink / raw)
  To: xfs

From: Dave Chinner <dchinner@redhat.com>

repair doesn't currently detect verifier errors in AG header
blocks - apart from the primary superblock they are not detected.
They are, fortunately, corrected in the important cases (AGF, AGI
and AGFL) because these structures are rebuilt in phase 5, but if
you run xfs_repair in checking mode it won't report them as bad.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 repair/scan.c | 66 ++++++++++++++++++++++++++++++++++-------------------------
 1 file changed, 38 insertions(+), 28 deletions(-)

diff --git a/repair/scan.c b/repair/scan.c
index 1744c32..6c43474 100644
--- a/repair/scan.c
+++ b/repair/scan.c
@@ -1207,28 +1207,31 @@ scan_ag(
 	void		*arg)
 {
 	struct aghdr_cnts *agcnts = arg;
-	xfs_agf_t	*agf;
-	xfs_buf_t	*agfbuf;
+	struct xfs_agf	*agf;
+	struct xfs_buf	*agfbuf = NULL;
 	int		agf_dirty = 0;
-	xfs_agi_t	*agi;
-	xfs_buf_t	*agibuf;
+	struct xfs_agi	*agi;
+	struct xfs_buf	*agibuf = NULL;
 	int		agi_dirty = 0;
-	xfs_sb_t	*sb;
-	xfs_buf_t	*sbbuf;
+	struct xfs_sb	*sb = NULL;
+	struct xfs_buf	*sbbuf = NULL;
 	int		sb_dirty = 0;
 	int		status;
+	char		*objname = NULL;
 
 	sbbuf = libxfs_readbuf(mp->m_dev, XFS_AG_DADDR(mp, agno, XFS_SB_DADDR),
 				XFS_FSS_TO_BB(mp, 1), 0, &xfs_sb_buf_ops);
 	if (!sbbuf)  {
-		do_error(_("can't get root superblock for ag %d\n"), agno);
-		return;
+		objname = _("root superblock");
+		goto out_free;
 	}
+	if (sbbuf->b_error == EFSBADCRC || sbbuf->b_error == EFSCORRUPTED)
+		sb_dirty = 1;
+
 	sb = (xfs_sb_t *)calloc(BBSIZE, 1);
 	if (!sb) {
 		do_error(_("can't allocate memory for superblock\n"));
-		libxfs_putbuf(sbbuf);
-		return;
+		goto out_free;
 	}
 	libxfs_sb_from_disk(sb, XFS_BUF_TO_SBP(sbbuf));
 
@@ -1236,23 +1239,22 @@ scan_ag(
 			XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
 			XFS_FSS_TO_BB(mp, 1), 0, &xfs_agf_buf_ops);
 	if (!agfbuf)  {
-		do_error(_("can't read agf block for ag %d\n"), agno);
-		libxfs_putbuf(sbbuf);
-		free(sb);
-		return;
+		objname = _("agf block");
+		goto out_free;
 	}
+	if (agfbuf->b_error == EFSBADCRC || agfbuf->b_error == EFSCORRUPTED)
+		agf_dirty = 1;
 	agf = XFS_BUF_TO_AGF(agfbuf);
 
 	agibuf = libxfs_readbuf(mp->m_dev,
 			XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
 			XFS_FSS_TO_BB(mp, 1), 0, &xfs_agi_buf_ops);
 	if (!agibuf)  {
-		do_error(_("can't read agi block for ag %d\n"), agno);
-		libxfs_putbuf(agfbuf);
-		libxfs_putbuf(sbbuf);
-		free(sb);
-		return;
+		objname = _("agi block");
+		goto out_free;
 	}
+	if (agibuf->b_error == EFSBADCRC || agibuf->b_error == EFSCORRUPTED)
+		agi_dirty = 1;
 	agi = XFS_BUF_TO_AGI(agibuf);
 
 	/* fix up bad ag headers */
@@ -1277,7 +1279,7 @@ scan_ag(
 			do_warn(_("would reset bad sb for ag %d\n"), agno);
 		}
 	}
-	if (status & XR_AG_AGF)  {
+	if (agf_dirty || status & XR_AG_AGF)  {
 		if (!no_modify) {
 			do_warn(_("reset bad agf for ag %d\n"), agno);
 			agf_dirty = 1;
@@ -1285,7 +1287,7 @@ scan_ag(
 			do_warn(_("would reset bad agf for ag %d\n"), agno);
 		}
 	}
-	if (status & XR_AG_AGI)  {
+	if (agi_dirty || status & XR_AG_AGI)  {
 		if (!no_modify) {
 			do_warn(_("reset bad agi for ag %d\n"), agno);
 			agi_dirty = 1;
@@ -1295,15 +1297,9 @@ scan_ag(
 	}
 
 	if (status && no_modify)  {
-		libxfs_putbuf(agibuf);
-		libxfs_putbuf(agfbuf);
-		libxfs_putbuf(sbbuf);
-		free(sb);
-
 		do_warn(_("bad uncorrected agheader %d, skipping ag...\n"),
 			agno);
-
-		return;
+		goto out_free;
 	}
 
 	scan_freelist(agf, agcnts);
@@ -1341,6 +1337,20 @@ scan_ag(
 	print_inode_list(i);
 #endif
 	return;
+
+out_free:
+	if (sb)
+		free(sb);
+	if (agibuf)
+		libxfs_putbuf(agibuf);
+	if (agfbuf)
+		libxfs_putbuf(agfbuf);
+	if (sbbuf)
+		libxfs_putbuf(sbbuf);
+	if (objname)
+		do_error(_("can't get %s for ag %d\n"), objname, agno);
+	return;
+
 }
 
 #define SCAN_THREADS 32
-- 
1.9.0

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH 5/9] repair: detect CRC errors in AG headers
  2014-04-15  8:24 ` [PATCH 5/9] repair: detect CRC errors in AG headers Dave Chinner
@ 2014-04-15 19:40   ` Brian Foster
  2014-04-15 21:52     ` Dave Chinner
  2014-04-21  7:11   ` Christoph Hellwig
  1 sibling, 1 reply; 28+ messages in thread
From: Brian Foster @ 2014-04-15 19:40 UTC (permalink / raw)
  To: Dave Chinner; +Cc: xfs

On Tue, Apr 15, 2014 at 06:24:57PM +1000, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
> 
> repair doesn't currently detect verifier errors in AG header
> blocks - apart from the primary superblock they are not detected.
> They are, fortunately, corrected in the important cases (AGF, AGI
> and AGFL) because these structures are rebuilt in phase 5, but if
> you run xfs_repair in checking mode it won't report them as bad.
> 
> Signed-off-by: Dave Chinner <dchinner@redhat.com>
> ---
>  repair/scan.c | 66 ++++++++++++++++++++++++++++++++++-------------------------
>  1 file changed, 38 insertions(+), 28 deletions(-)
> 
> diff --git a/repair/scan.c b/repair/scan.c
> index 1744c32..6c43474 100644
> --- a/repair/scan.c
> +++ b/repair/scan.c
> @@ -1207,28 +1207,31 @@ scan_ag(
>  	void		*arg)
>  {
>  	struct aghdr_cnts *agcnts = arg;
> -	xfs_agf_t	*agf;
> -	xfs_buf_t	*agfbuf;
> +	struct xfs_agf	*agf;
> +	struct xfs_buf	*agfbuf = NULL;
>  	int		agf_dirty = 0;
> -	xfs_agi_t	*agi;
> -	xfs_buf_t	*agibuf;
> +	struct xfs_agi	*agi;
> +	struct xfs_buf	*agibuf = NULL;
>  	int		agi_dirty = 0;
> -	xfs_sb_t	*sb;
> -	xfs_buf_t	*sbbuf;
> +	struct xfs_sb	*sb = NULL;
> +	struct xfs_buf	*sbbuf = NULL;
>  	int		sb_dirty = 0;
>  	int		status;
> +	char		*objname = NULL;
>  
>  	sbbuf = libxfs_readbuf(mp->m_dev, XFS_AG_DADDR(mp, agno, XFS_SB_DADDR),
>  				XFS_FSS_TO_BB(mp, 1), 0, &xfs_sb_buf_ops);
>  	if (!sbbuf)  {
> -		do_error(_("can't get root superblock for ag %d\n"), agno);
> -		return;
> +		objname = _("root superblock");
> +		goto out_free;
>  	}
> +	if (sbbuf->b_error == EFSBADCRC || sbbuf->b_error == EFSCORRUPTED)
> +		sb_dirty = 1;
> +
>  	sb = (xfs_sb_t *)calloc(BBSIZE, 1);
>  	if (!sb) {
>  		do_error(_("can't allocate memory for superblock\n"));
> -		libxfs_putbuf(sbbuf);
> -		return;
> +		goto out_free;
>  	}
>  	libxfs_sb_from_disk(sb, XFS_BUF_TO_SBP(sbbuf));
>  
> @@ -1236,23 +1239,22 @@ scan_ag(
>  			XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
>  			XFS_FSS_TO_BB(mp, 1), 0, &xfs_agf_buf_ops);
>  	if (!agfbuf)  {
> -		do_error(_("can't read agf block for ag %d\n"), agno);
> -		libxfs_putbuf(sbbuf);
> -		free(sb);
> -		return;
> +		objname = _("agf block");
> +		goto out_free;
>  	}
> +	if (agfbuf->b_error == EFSBADCRC || agfbuf->b_error == EFSCORRUPTED)
> +		agf_dirty = 1;
>  	agf = XFS_BUF_TO_AGF(agfbuf);
>  
>  	agibuf = libxfs_readbuf(mp->m_dev,
>  			XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
>  			XFS_FSS_TO_BB(mp, 1), 0, &xfs_agi_buf_ops);
>  	if (!agibuf)  {
> -		do_error(_("can't read agi block for ag %d\n"), agno);
> -		libxfs_putbuf(agfbuf);
> -		libxfs_putbuf(sbbuf);
> -		free(sb);
> -		return;
> +		objname = _("agi block");
> +		goto out_free;
>  	}
> +	if (agibuf->b_error == EFSBADCRC || agibuf->b_error == EFSCORRUPTED)
> +		agi_dirty = 1;
>  	agi = XFS_BUF_TO_AGI(agibuf);
>  
>  	/* fix up bad ag headers */
> @@ -1277,7 +1279,7 @@ scan_ag(
>  			do_warn(_("would reset bad sb for ag %d\n"), agno);
>  		}
>  	}
> -	if (status & XR_AG_AGF)  {
> +	if (agf_dirty || status & XR_AG_AGF)  {
>  		if (!no_modify) {
>  			do_warn(_("reset bad agf for ag %d\n"), agno);
>  			agf_dirty = 1;
> @@ -1285,7 +1287,7 @@ scan_ag(
>  			do_warn(_("would reset bad agf for ag %d\n"), agno);
>  		}
>  	}
> -	if (status & XR_AG_AGI)  {
> +	if (agi_dirty || status & XR_AG_AGI)  {
>  		if (!no_modify) {
>  			do_warn(_("reset bad agi for ag %d\n"), agno);
>  			agi_dirty = 1;

There are a few asserts a bit further down this function that assume
*_dirty is set only when in !no_modify mode. E.g.:

	ASSERT(agi_dirty == 0 || (agi_dirty && !no_modify));

You'll probably want to remove those. Or...

> @@ -1295,15 +1297,9 @@ scan_ag(
>  	}
>  
>  	if (status && no_modify)  {
> -		libxfs_putbuf(agibuf);
> -		libxfs_putbuf(agfbuf);
> -		libxfs_putbuf(sbbuf);
> -		free(sb);
> -
>  		do_warn(_("bad uncorrected agheader %d, skipping ag...\n"),
>  			agno);
> -
> -		return;
> +		goto out_free;
>  	}

Would we want to skip the ag, as such, on a CRC error in no_modify mode?
If so, perhaps we could set the status variable on crc errors and
bitwise or the value returned from verify_set_agheader().

Brian

>  
>  	scan_freelist(agf, agcnts);
> @@ -1341,6 +1337,20 @@ scan_ag(
>  	print_inode_list(i);
>  #endif
>  	return;
> +
> +out_free:
> +	if (sb)
> +		free(sb);
> +	if (agibuf)
> +		libxfs_putbuf(agibuf);
> +	if (agfbuf)
> +		libxfs_putbuf(agfbuf);
> +	if (sbbuf)
> +		libxfs_putbuf(sbbuf);
> +	if (objname)
> +		do_error(_("can't get %s for ag %d\n"), objname, agno);
> +	return;
> +
>  }
>  
>  #define SCAN_THREADS 32
> -- 
> 1.9.0
> 
> _______________________________________________
> xfs mailing list
> xfs@oss.sgi.com
> http://oss.sgi.com/mailman/listinfo/xfs

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 5/9] repair: detect CRC errors in AG headers
  2014-04-15 19:40   ` Brian Foster
@ 2014-04-15 21:52     ` Dave Chinner
  0 siblings, 0 replies; 28+ messages in thread
From: Dave Chinner @ 2014-04-15 21:52 UTC (permalink / raw)
  To: Brian Foster; +Cc: xfs

On Tue, Apr 15, 2014 at 03:40:29PM -0400, Brian Foster wrote:
> On Tue, Apr 15, 2014 at 06:24:57PM +1000, Dave Chinner wrote:
> > From: Dave Chinner <dchinner@redhat.com>
> > 
> > repair doesn't currently detect verifier errors in AG header
> > blocks - apart from the primary superblock they are not detected.
> > They are, fortunately, corrected in the important cases (AGF, AGI
> > and AGFL) because these structures are rebuilt in phase 5, but if
> > you run xfs_repair in checking mode it won't report them as bad.
> > 
> > Signed-off-by: Dave Chinner <dchinner@redhat.com>
....
> > @@ -1285,7 +1287,7 @@ scan_ag(
> >  			do_warn(_("would reset bad agf for ag %d\n"), agno);
> >  		}
> >  	}
> > -	if (status & XR_AG_AGI)  {
> > +	if (agi_dirty || status & XR_AG_AGI)  {
> >  		if (!no_modify) {
> >  			do_warn(_("reset bad agi for ag %d\n"), agno);
> >  			agi_dirty = 1;
> 
> There are a few asserts a bit further down this function that assume
> *_dirty is set only when in !no_modify mode. E.g.:
> 
> 	ASSERT(agi_dirty == 0 || (agi_dirty && !no_modify));
> 
> You'll probably want to remove those. Or...

I thought I caught all of those...

> 
> > @@ -1295,15 +1297,9 @@ scan_ag(
> >  	}
> >  
> >  	if (status && no_modify)  {
> > -		libxfs_putbuf(agibuf);
> > -		libxfs_putbuf(agfbuf);
> > -		libxfs_putbuf(sbbuf);
> > -		free(sb);
> > -
> >  		do_warn(_("bad uncorrected agheader %d, skipping ag...\n"),
> >  			agno);
> > -
> > -		return;
> > +		goto out_free;
> >  	}
> 
> Would we want to skip the ag, as such, on a CRC error in no_modify mode?
> If so, perhaps we could set the status variable on crc errors and
> bitwise or the value returned from verify_set_agheader().

I figured that for CRC errors we really should try to validate
everything if we can. If nothing else fails the validation, then it
might be a bit flip in an unused portion of the sector and in that
case we can actually continue onwards. IOWs, a CRC error is not
necessarily fatal....

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 5/9] repair: detect CRC errors in AG headers
  2014-04-15  8:24 ` [PATCH 5/9] repair: detect CRC errors in AG headers Dave Chinner
  2014-04-15 19:40   ` Brian Foster
@ 2014-04-21  7:11   ` Christoph Hellwig
  2014-04-21 23:35     ` Dave Chinner
  1 sibling, 1 reply; 28+ messages in thread
From: Christoph Hellwig @ 2014-04-21  7:11 UTC (permalink / raw)
  To: Dave Chinner; +Cc: xfs

On Tue, Apr 15, 2014 at 06:24:57PM +1000, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
> 
> repair doesn't currently detect verifier errors in AG header
> blocks - apart from the primary superblock they are not detected.
> They are, fortunately, corrected in the important cases (AGF, AGI
> and AGFL) because these structures are rebuilt in phase 5, but if
> you run xfs_repair in checking mode it won't report them as bad.

Shouldn't we apply the same scheme as for directories here, that is if
it fails with a verifier error re-read without the verifier and then
still do the full check as well?

Btw, it might make sense to have special read_buf variants in libxfs
that always return a valid buffer even if the verifier fails, just
telling us about it without having to re-read.

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 5/9] repair: detect CRC errors in AG headers
  2014-04-21  7:11   ` Christoph Hellwig
@ 2014-04-21 23:35     ` Dave Chinner
  2014-04-22  6:47       ` Christoph Hellwig
  0 siblings, 1 reply; 28+ messages in thread
From: Dave Chinner @ 2014-04-21 23:35 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: xfs

On Mon, Apr 21, 2014 at 12:11:06AM -0700, Christoph Hellwig wrote:
> On Tue, Apr 15, 2014 at 06:24:57PM +1000, Dave Chinner wrote:
> > From: Dave Chinner <dchinner@redhat.com>
> > 
> > repair doesn't currently detect verifier errors in AG header
> > blocks - apart from the primary superblock they are not detected.
> > They are, fortunately, corrected in the important cases (AGF, AGI
> > and AGFL) because these structures are rebuilt in phase 5, but if
> > you run xfs_repair in checking mode it won't report them as bad.
> 
> Shouldn't we apply the same scheme as for directories here, that is if
> it fails with a verifier error re-read without the verifier and then
> still do the full check as well?

The directory code is the special case - it uses xfs_trans_read_buf*
interfaces, which return either a good buffer with no error or an
error with no buffer. Hence for the directory code, we have to
re-read the buffer without the verifier to grab the unchecked buffer
from the cache when the verifier detects an error.

> Btw, it might make sense to have special read_buf variants in libxfs
> that always return a valid buffer even if the verifier fails, just
> telling us about it without having to re-read.

That's exactly what they do now - you get the xfs_buf, the data in
bp->b_addr and the verifier error in bp->b_error all in one call.

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 5/9] repair: detect CRC errors in AG headers
  2014-04-21 23:35     ` Dave Chinner
@ 2014-04-22  6:47       ` Christoph Hellwig
  2014-04-22  9:10         ` Dave Chinner
  0 siblings, 1 reply; 28+ messages in thread
From: Christoph Hellwig @ 2014-04-22  6:47 UTC (permalink / raw)
  To: Dave Chinner; +Cc: Christoph Hellwig, xfs

On Tue, Apr 22, 2014 at 09:35:12AM +1000, Dave Chinner wrote:
> > Shouldn't we apply the same scheme as for directories here, that is if
> > it fails with a verifier error re-read without the verifier and then
> > still do the full check as well?
> 
> The directory code is the special case - it uses xfs_trans_read_buf*
> interfaces, which return either a good buffer with no error or an
> error with no buffer. Hence for the directory code, we have to
> re-read the buffer without the verifier to grab the unchecked buffer
> from the cache when the verifier detects an error.

How about just having a variant of xfs_da_read_buf that doesn't use
xfs_trans_read_buf *?  xfs_da_read_buf is pretty simple, especially
when removing the magic test that has been superceeded by the verifiers.

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 5/9] repair: detect CRC errors in AG headers
  2014-04-22  6:47       ` Christoph Hellwig
@ 2014-04-22  9:10         ` Dave Chinner
  2014-04-22  9:41           ` Christoph Hellwig
  0 siblings, 1 reply; 28+ messages in thread
From: Dave Chinner @ 2014-04-22  9:10 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: xfs

On Mon, Apr 21, 2014 at 11:47:37PM -0700, Christoph Hellwig wrote:
> On Tue, Apr 22, 2014 at 09:35:12AM +1000, Dave Chinner wrote:
> > > Shouldn't we apply the same scheme as for directories here, that is if
> > > it fails with a verifier error re-read without the verifier and then
> > > still do the full check as well?
> > 
> > The directory code is the special case - it uses xfs_trans_read_buf*
> > interfaces, which return either a good buffer with no error or an
> > error with no buffer. Hence for the directory code, we have to
> > re-read the buffer without the verifier to grab the unchecked buffer
> > from the cache when the verifier detects an error.
> 
> How about just having a variant of xfs_da_read_buf that doesn't use
> xfs_trans_read_buf *?  xfs_da_read_buf is pretty simple, especially
> when removing the magic test that has been superceeded by the verifiers.

Right now I ijust want to keep the change as small as possible and
get a release out.

Yes, I agree there's much more cleanup that is needed in this code,
but at this point is seems unnecessary and doesn't matter for the
purpose of providing this functionality initially.  We have to
provide the same functionality for the kernel code for it to be able
to handle CRC errors sanely, and so we're going to need to
restructure xfs_trans_read_buf() to handle it. in doing this, we
solve the libxfs problem, too, because what we do to the kernel code
will flow on to userspace...

So really, I'd prefer to leave the userspace code doing this retry
for this one piece of infrastructure and do all the major
restructing of the directory buffer read code in the kernel code
first. That way we can pick up all the changes for userspace when
the libxfs code is updated.

Keep in mind that right now we've got a *lot* of updates from the
kernel to the libxfs code pending that are stalled waiting for a
release to be done...

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 5/9] repair: detect CRC errors in AG headers
  2014-04-22  9:10         ` Dave Chinner
@ 2014-04-22  9:41           ` Christoph Hellwig
  0 siblings, 0 replies; 28+ messages in thread
From: Christoph Hellwig @ 2014-04-22  9:41 UTC (permalink / raw)
  To: Dave Chinner; +Cc: Christoph Hellwig, xfs

On Tue, Apr 22, 2014 at 07:10:43PM +1000, Dave Chinner wrote:
> Right now I ijust want to keep the change as small as possible and
> get a release out.

Oh well.  I'll probably ACK it next time it comes around.  I think the
series needs enough changes for a resend anyway.

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 0/9 V2] xfs_db, xfs_repair: improve CRC error detection
@ 2014-04-24  5:01 Dave Chinner
  2014-04-24  5:01 ` [PATCH 1/9] db: don't claim unchecked CRCs are correct Dave Chinner
                   ` (8 more replies)
  0 siblings, 9 replies; 28+ messages in thread
From: Dave Chinner @ 2014-04-24  5:01 UTC (permalink / raw)
  To: xfs

Hi folks,

This is version 2 of the patchset first posted here:

http://oss.sgi.com/archives/xfs/2014-04/msg00374.html

The version corrects all the issues mentioned in the first review.
It doesn't try to rework the directory buffer read issue that
Christoph was concerned about - API changes are necessary so we'll
do that through the kernel first in a separate patchset.

Version 2:
- move LIBXFS_B_UNCHECKED to the correct patch (patch 1)
- set_cur_iotype assumes a valid type (patch 2)
- comments added to explain the way LIBXFS_B_UNCHECKED and dirty
  buffers are supposed to interact (patch 3)
- fixed comment flow and added separate "crc_error" return variable
  to dir_read_buf() (patch 4)
- reworked CRC error handling of AG headers (patch 5)
- readded missing dirty/no_modify assert (patch 6)
- fixed typos (patch 8)
- fixed "repair" variable initialisation flow (patch 9)
- fixed dirty buffer accounting on the cursor to dirty the correct
  buffer on CRC errors, added some clarifying comments (patch 9)

Comments and testing welcome!

-Dave.

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 1/9] db: don't claim unchecked CRCs are correct
  2014-04-24  5:01 [PATCH 0/9 V2] xfs_db, xfs_repair: improve CRC error detection Dave Chinner
@ 2014-04-24  5:01 ` Dave Chinner
  2014-04-24  5:01 ` [PATCH 2/9] db: verify buffer on type change Dave Chinner
                   ` (7 subsequent siblings)
  8 siblings, 0 replies; 28+ messages in thread
From: Dave Chinner @ 2014-04-24  5:01 UTC (permalink / raw)
  To: xfs

From: Dave Chinner <dchinner@redhat.com>

Currently xfs_db will claim the CRC on a structure is correct if the
buffer is not marked with an error. However, buffers may have been
read without a verifier, and hence have not had their CRCs
validated. in this case, we shoul dreport "unchecked" rather than
"correct". For example:

xfs_db> fsb 0x6003f
xfs_db> type dir3
xfs_db> p
dhdr.hdr.magic = 0x58444433
dhdr.hdr.crc = 0x2d0f9c9d (unchecked)
....

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 db/fprint.c      | 15 ++++++++++++++-
 db/io.c          |  2 ++
 db/io.h          | 12 +++++++++---
 include/libxfs.h |  1 +
 4 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/db/fprint.c b/db/fprint.c
index 435d984..52782e2 100644
--- a/db/fprint.c
+++ b/db/fprint.c
@@ -206,7 +206,20 @@ fp_crc(
 	__int64_t	val;
 	char		*ok;
 
-	ok = iocur_crc_valid() ? "correct" : "bad";
+	switch (iocur_crc_valid()) {
+	case -1:
+		ok = "unchecked";
+		break;
+	case 0:
+		ok = "bad";
+		break;
+	case 1:
+		ok = "correct";
+		break;
+	default:
+		ok = "unknown state";
+		break;
+	}
 
 	for (i = 0, bitpos = bit;
 	     i < count && !seenint();
diff --git a/db/io.c b/db/io.c
index 5eb61d9..387f171 100644
--- a/db/io.c
+++ b/db/io.c
@@ -531,6 +531,8 @@ set_cur(
 		return;
 	iocur_top->buf = bp->b_addr;
 	iocur_top->bp = bp;
+	if (!ops)
+		bp->b_flags |= LIBXFS_B_UNCHECKED;
 
 	iocur_top->bb = d;
 	iocur_top->blen = c;
diff --git a/db/io.h b/db/io.h
index ad39bee..7875119 100644
--- a/db/io.h
+++ b/db/io.h
@@ -63,10 +63,16 @@ extern void	set_cur(const struct typ *t, __int64_t d, int c, int ring_add,
 			bbmap_t *bbmap);
 extern void     ring_add(void);
 
-static inline bool
+/*
+ * returns -1 for unchecked, 0 for bad and 1 for good
+ */
+static inline int
 iocur_crc_valid()
 {
-	return (iocur_top->bp &&
-		iocur_top->bp->b_error != EFSBADCRC &&
+	if (!iocur_top->bp)
+		return -1;
+	if (iocur_top->bp->b_flags & LIBXFS_B_UNCHECKED)
+		return -1;
+	return (iocur_top->bp->b_error != EFSBADCRC &&
 		(!iocur_top->ino_buf || iocur_top->ino_crc_ok));
 }
diff --git a/include/libxfs.h b/include/libxfs.h
index 6bc6c94..6b1e276 100644
--- a/include/libxfs.h
+++ b/include/libxfs.h
@@ -333,6 +333,7 @@ enum xfs_buf_flags_t {	/* b_flags bits */
 	LIBXFS_B_STALE		= 0x0004,	/* buffer marked as invalid */
 	LIBXFS_B_UPTODATE	= 0x0008,	/* buffer is sync'd to disk */
 	LIBXFS_B_DISCONTIG	= 0x0010,	/* discontiguous buffer */
+	LIBXFS_B_UNCHECKED	= 0x0020,	/* needs verification */
 };
 
 #define XFS_BUF_DADDR_NULL		((xfs_daddr_t) (-1LL))
-- 
1.9.0

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 2/9] db: verify buffer on type change
  2014-04-24  5:01 [PATCH 0/9 V2] xfs_db, xfs_repair: improve CRC error detection Dave Chinner
  2014-04-24  5:01 ` [PATCH 1/9] db: don't claim unchecked CRCs are correct Dave Chinner
@ 2014-04-24  5:01 ` Dave Chinner
  2014-04-25  5:41   ` Christoph Hellwig
  2014-04-24  5:01 ` [PATCH 3/9] repair: ensure prefetched buffers have CRCs validated Dave Chinner
                   ` (6 subsequent siblings)
  8 siblings, 1 reply; 28+ messages in thread
From: Dave Chinner @ 2014-04-24  5:01 UTC (permalink / raw)
  To: xfs

From: Dave Chinner <dchinner@redhat.com>

Currently when the type command is run, we simply change the type
associated with the buffer, but don't verify it. This results in
unchecked CRCs being displayed. Hence when changing the type, run
the verifier associated with the type to determine if the buffer
contents is valid or not.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 db/io.c   | 24 ++++++++++++++++++++++++
 db/io.h   |  1 +
 db/type.c |  9 +++++----
 3 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/db/io.c b/db/io.c
index 387f171..7f1b76a 100644
--- a/db/io.c
+++ b/db/io.c
@@ -552,6 +552,30 @@ set_cur(
 		ring_add();
 }
 
+void
+set_iocur_type(
+	const typ_t	*t)
+{
+	struct xfs_buf	*bp = iocur_top->bp;
+
+	iocur_top->typ = t;
+
+	/* verify the buffer if the type has one. */
+	if (!bp)
+		return;
+	if (!t->bops) {
+		bp->b_ops = NULL;
+		bp->b_flags |= LIBXFS_B_UNCHECKED;
+		return;
+	}
+	if (!(bp->b_flags & LIBXFS_B_UPTODATE))
+		return;
+	bp->b_error = 0;
+	bp->b_ops = t->bops;
+	bp->b_ops->verify_read(bp);
+	bp->b_flags &= ~LIBXFS_B_UNCHECKED;
+}
+
 static void
 stack_help(void)
 {
diff --git a/db/io.h b/db/io.h
index 7875119..71082e6 100644
--- a/db/io.h
+++ b/db/io.h
@@ -62,6 +62,7 @@ extern void     write_cur(void);
 extern void	set_cur(const struct typ *t, __int64_t d, int c, int ring_add,
 			bbmap_t *bbmap);
 extern void     ring_add(void);
+extern void	set_iocur_type(const struct typ *t);
 
 /*
  * returns -1 for unchecked, 0 for bad and 1 for good
diff --git a/db/type.c b/db/type.c
index 04d0d56..b29f2a4 100644
--- a/db/type.c
+++ b/db/type.c
@@ -162,10 +162,11 @@ type_f(
 		if (tt == NULL) {
 			dbprintf(_("no such type %s\n"), argv[1]);
 		} else {
-			if (iocur_top->typ == NULL) {
-			    dbprintf(_("no current object\n"));
-			} else {
-			    iocur_top->typ = cur_typ = tt;
+			if (iocur_top->typ == NULL)
+				dbprintf(_("no current object\n"));
+			else {
+				cur_typ = tt;
+				set_iocur_type(tt);
 			}
 		}
 	}
-- 
1.9.0

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 3/9] repair: ensure prefetched buffers have CRCs validated
  2014-04-24  5:01 [PATCH 0/9 V2] xfs_db, xfs_repair: improve CRC error detection Dave Chinner
  2014-04-24  5:01 ` [PATCH 1/9] db: don't claim unchecked CRCs are correct Dave Chinner
  2014-04-24  5:01 ` [PATCH 2/9] db: verify buffer on type change Dave Chinner
@ 2014-04-24  5:01 ` Dave Chinner
  2014-04-25  5:47   ` Christoph Hellwig
  2014-04-24  5:01 ` [PATCH 4/9] repair: detect and correct CRC errors in directory blocks Dave Chinner
                   ` (5 subsequent siblings)
  8 siblings, 1 reply; 28+ messages in thread
From: Dave Chinner @ 2014-04-24  5:01 UTC (permalink / raw)
  To: xfs

From: Dave Chinner <dchinner@redhat.com>

Prefetch currently does not do CRC validation when the IO completes
due to the optimisation it performs and the fact that it does not
know what the type of metadata into the buffer is supposed to be.
Hence, mark all prefetched buffers as "suspect" so that when the
end user tries to read it with a supplied validation function the
validation is run even though the buffer was already in the cache.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 libxfs/rdwr.c     | 43 ++++++++++++++++++++++++++++++++++++++-----
 repair/prefetch.c |  3 +++
 2 files changed, 41 insertions(+), 5 deletions(-)

diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c
index 7208a2f..92b1182 100644
--- a/libxfs/rdwr.c
+++ b/libxfs/rdwr.c
@@ -718,12 +718,32 @@ libxfs_readbuf(struct xfs_buftarg *btp, xfs_daddr_t blkno, int len, int flags,
 	bp = libxfs_getbuf(btp, blkno, len);
 	if (!bp)
 		return NULL;
-	if ((bp->b_flags & (LIBXFS_B_UPTODATE|LIBXFS_B_DIRTY)))
+
+	/*
+	 * if the buffer was prefetched, it is likely that it was not validated.
+	 * Hence if we are supplied an ops function and the buffer is marked as
+	 * unchecked, we need to validate it now.
+	 *
+	 * We do this verification even if the buffer is dirty - the
+	 * verification is almost certainly going to fail the CRC check in this
+	 * case as a dirty buffer has not had the CRC recalculated. However, we
+	 * should not be dirtying unchecked buffers and therefore failing it
+	 * here because it's dirty and unchecked indicates we've screwed up
+	 * somewhere else.
+	 */
+	if ((bp->b_flags & (LIBXFS_B_UPTODATE|LIBXFS_B_DIRTY))) {
+		if (ops && (bp->b_flags & LIBXFS_B_UNCHECKED)) {
+			bp->b_error = 0;
+			bp->b_ops = ops;
+			bp->b_ops->verify_read(bp);
+			bp->b_flags &= ~LIBXFS_B_UNCHECKED;
+		}
 		return bp;
+	}
 
 	/*
-	 * only set the ops on a cache miss (i.e. first physical read) as the
-	 * verifier may change the ops to match the typ eof buffer it contains.
+	 * Set the ops on a cache miss (i.e. first physical read) as the
+	 * verifier may change the ops to match the type of buffer it contains.
 	 * A cache hit might reset the verifier to the original type if we set
 	 * it again, but it won't get called again and set to match the buffer
 	 * contents. *cough* xfs_da_node_buf_ops *cough*.
@@ -733,8 +753,10 @@ libxfs_readbuf(struct xfs_buftarg *btp, xfs_daddr_t blkno, int len, int flags,
 	error = libxfs_readbufr(btp, blkno, bp, len, flags);
 	if (error)
 		bp->b_error = error;
-	else if (bp->b_ops)
+	else if (bp->b_ops) {
 		bp->b_ops->verify_read(bp);
+		bp->b_flags &= ~LIBXFS_B_UNCHECKED;
+	}
 	return bp;
 }
 
@@ -786,6 +808,14 @@ libxfs_readbuf_map(struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps,
 		return NULL;
 
 	bp->b_error = 0;
+	if ((bp->b_flags & (LIBXFS_B_UPTODATE|LIBXFS_B_DIRTY))) {
+		if (ops && (bp->b_flags & LIBXFS_B_UNCHECKED)) {
+			bp->b_ops = ops;
+			bp->b_ops->verify_read(bp);
+			bp->b_flags &= ~LIBXFS_B_UNCHECKED;
+		}
+		return bp;
+	}
 	bp->b_ops = ops;
 	if ((bp->b_flags & (LIBXFS_B_UPTODATE|LIBXFS_B_DIRTY)))
 		return bp;
@@ -793,8 +823,10 @@ libxfs_readbuf_map(struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps,
 	error = libxfs_readbufr_map(btp, bp, flags);
 	if (!error) {
 		bp->b_flags |= LIBXFS_B_UPTODATE;
-		if (bp->b_ops)
+		if (bp->b_ops) {
 			bp->b_ops->verify_read(bp);
+			bp->b_flags &= ~LIBXFS_B_UNCHECKED;
+		}
 	}
 #ifdef IO_DEBUG
 	printf("%lx: %s: read %lu bytes, error %d, blkno=%llu(%llu), %p\n",
@@ -889,6 +921,7 @@ libxfs_writebufr(xfs_buf_t *bp)
 	if (!error) {
 		bp->b_flags |= LIBXFS_B_UPTODATE;
 		bp->b_flags &= ~(LIBXFS_B_DIRTY | LIBXFS_B_EXIT);
+		bp->b_flags &= ~LIBXFS_B_UNCHECKED;
 	}
 	return error;
 }
diff --git a/repair/prefetch.c b/repair/prefetch.c
index 6d6d344..d794ba3 100644
--- a/repair/prefetch.c
+++ b/repair/prefetch.c
@@ -389,6 +389,7 @@ pf_read_inode_dirs(
 
 	bp->b_ops = &xfs_inode_buf_ops;
 	bp->b_ops->verify_read(bp);
+	bp->b_flags &= ~LIBXFS_B_UNCHECKED;
 	if (bp->b_error)
 		return;
 
@@ -460,6 +461,7 @@ pf_read_discontig(
 
 	pthread_mutex_unlock(&args->lock);
 	libxfs_readbufr_map(mp->m_ddev_targp, bp, 0);
+	bp->b_flags |= LIBXFS_B_UNCHECKED;
 	libxfs_putbuf(bp);
 	pthread_mutex_lock(&args->lock);
 }
@@ -583,6 +585,7 @@ pf_batch_read(
 					break;
 				memcpy(XFS_BUF_PTR(bplist[i]), pbuf, size);
 				bplist[i]->b_flags |= LIBXFS_B_UPTODATE;
+				bplist[i]->b_flags |= LIBXFS_B_UNCHECKED;
 				len -= size;
 				if (B_IS_INODE(XFS_BUF_PRIORITY(bplist[i])))
 					pf_read_inode_dirs(args, bplist[i]);
-- 
1.9.0

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 4/9] repair: detect and correct CRC errors in directory blocks
  2014-04-24  5:01 [PATCH 0/9 V2] xfs_db, xfs_repair: improve CRC error detection Dave Chinner
                   ` (2 preceding siblings ...)
  2014-04-24  5:01 ` [PATCH 3/9] repair: ensure prefetched buffers have CRCs validated Dave Chinner
@ 2014-04-24  5:01 ` Dave Chinner
  2014-04-25  5:47   ` Christoph Hellwig
  2014-04-24  5:01 ` [PATCH 5/9] repair: detect CRC errors in AG headers Dave Chinner
                   ` (4 subsequent siblings)
  8 siblings, 1 reply; 28+ messages in thread
From: Dave Chinner @ 2014-04-24  5:01 UTC (permalink / raw)
  To: xfs

From: Dave Chinner <dchinner@redhat.com>

repair doesn't currently verifier errors in directory blocks - they
cause repair to ignore blocks and hence fail because it can't read
critical blocks from the directory.

Fix this by having the directory buffer read code detect a verifier
error and retry the read without the verifier if the verifier has
detected an error. Then pass the verifer error with the successfully
read buffer back to the caller, so the caller can handle the error
appropriately. In most cases, this is simply marking the directory
as needing a rebuild, so once the directory entries have been
checked and repaired, it will rewrite all the directory buffers
(including the clean ones) and in the process recalculate all the
the CRC on the directory blocks.

Hence pure CRC errors in directory blocks are now handled correctly
by xfs_repair.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 libxfs/rdwr.c   |  3 +-
 repair/phase6.c | 94 +++++++++++++++++++++++++++++++++++++++++++++------------
 2 files changed, 76 insertions(+), 21 deletions(-)

diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c
index 92b1182..f0bc040 100644
--- a/libxfs/rdwr.c
+++ b/libxfs/rdwr.c
@@ -731,9 +731,9 @@ libxfs_readbuf(struct xfs_buftarg *btp, xfs_daddr_t blkno, int len, int flags,
 	 * here because it's dirty and unchecked indicates we've screwed up
 	 * somewhere else.
 	 */
+	bp->b_error = 0;
 	if ((bp->b_flags & (LIBXFS_B_UPTODATE|LIBXFS_B_DIRTY))) {
 		if (ops && (bp->b_flags & LIBXFS_B_UNCHECKED)) {
-			bp->b_error = 0;
 			bp->b_ops = ops;
 			bp->b_ops->verify_read(bp);
 			bp->b_flags &= ~LIBXFS_B_UNCHECKED;
@@ -748,7 +748,6 @@ libxfs_readbuf(struct xfs_buftarg *btp, xfs_daddr_t blkno, int len, int flags,
 	 * it again, but it won't get called again and set to match the buffer
 	 * contents. *cough* xfs_da_node_buf_ops *cough*.
 	 */
-	bp->b_error = 0;
 	bp->b_ops = ops;
 	error = libxfs_readbufr(btp, blkno, bp, len, flags);
 	if (error)
diff --git a/repair/phase6.c b/repair/phase6.c
index 0c35e1c..5ae6a3d 100644
--- a/repair/phase6.c
+++ b/repair/phase6.c
@@ -125,6 +125,45 @@ typedef struct freetab {
 #define	DIR_HASH_CK_TOTAL	6
 
 /*
+ * Need to handle CRC and validation errors specially here. If there is a
+ * validator error, re-read without the verifier so that we get a buffer we can
+ * check and repair. Re-attach the ops to the buffer after the read so that when
+ * it is rewritten the CRC is recalculated.
+ *
+ * If the buffer was not read, we return an error. If the buffer was read but
+ * had a CRC or corruption error, we reread it without the verifier and if it is
+ * read successfully we increment *crc_error and return 0. Otherwise we
+ * return the read error.
+ */
+static int
+dir_read_buf(
+	struct xfs_inode	*ip,
+	xfs_dablk_t		bno,
+	xfs_daddr_t		mappedbno,
+	struct xfs_buf		**bpp,
+	const struct xfs_buf_ops *ops,
+	int			*crc_error)
+{
+	int error;
+	int error2;
+
+	error = libxfs_da_read_buf(NULL, ip, bno, mappedbno, bpp,
+				   XFS_DATA_FORK, ops);
+
+	if (error != EFSBADCRC && error != EFSCORRUPTED)
+		return error;
+
+	error2 = libxfs_da_read_buf(NULL, ip, bno, mappedbno, bpp,
+				   XFS_DATA_FORK, NULL);
+	if (error2)
+		return error2;
+
+	(*crc_error)++;
+	(*bpp)->b_ops = ops;
+	return 0;
+}
+
+/*
  * Returns 0 if the name already exists (ie. a duplicate)
  */
 static int
@@ -1906,15 +1945,19 @@ longform_dir2_check_leaf(
 	int			seeval;
 	struct xfs_dir2_leaf_entry *ents;
 	struct xfs_dir3_icleaf_hdr leafhdr;
+	int			error;
+	int			fixit = 0;
 
 	da_bno = mp->m_dirleafblk;
-	if (libxfs_da_read_buf(NULL, ip, da_bno, -1, &bp, XFS_DATA_FORK,
-				&xfs_dir3_leaf1_buf_ops)) {
+	error = dir_read_buf(ip, da_bno, -1, &bp, &xfs_dir3_leaf1_buf_ops,
+			     &fixit);
+	if (error) {
 		do_error(
-	_("can't read block %u for directory inode %" PRIu64 "\n"),
-			da_bno, ip->i_ino);
+	_("can't read block %u for directory inode %" PRIu64 ", error %d\n"),
+			da_bno, ip->i_ino, error);
 		/* NOTREACHED */
 	}
+
 	leaf = bp->b_addr;
 	xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
 	ents = xfs_dir3_leaf_ents_p(leaf);
@@ -1951,7 +1994,7 @@ longform_dir2_check_leaf(
 		return 1;
 	}
 	libxfs_putbuf(bp);
-	return 0;
+	return fixit;
 }
 
 /*
@@ -1978,6 +2021,8 @@ longform_dir2_check_node(
 	struct xfs_dir3_icleaf_hdr leafhdr;
 	struct xfs_dir3_icfree_hdr freehdr;
 	__be16			*bests;
+	int			error;
+	int			fixit = 0;
 
 	for (da_bno = mp->m_dirleafblk, next_da_bno = 0;
 			next_da_bno != NULLFILEOFF && da_bno < mp->m_dirfreeblk;
@@ -1993,11 +2038,12 @@ longform_dir2_check_node(
 		 * a node block, then we'll skip it below based on a magic
 		 * number check.
 		 */
-		if (libxfs_da_read_buf(NULL, ip, da_bno, -1, &bp,
-				XFS_DATA_FORK, &xfs_da3_node_buf_ops)) {
+		error = dir_read_buf(ip, da_bno, -1, &bp,
+				     &xfs_da3_node_buf_ops, &fixit);
+		if (error) {
 			do_warn(
-	_("can't read leaf block %u for directory inode %" PRIu64 "\n"),
-				da_bno, ip->i_ino);
+	_("can't read leaf block %u for directory inode %" PRIu64 ", error %d\n"),
+				da_bno, ip->i_ino, error);
 			return 1;
 		}
 		leaf = bp->b_addr;
@@ -2016,6 +2062,12 @@ longform_dir2_check_node(
 			libxfs_putbuf(bp);
 			return 1;
 		}
+
+		/*
+		 * If there's a validator error, we need to ensure that we got
+		 * the right ops on the buffer for when we write it back out.
+		 */
+		bp->b_ops = &xfs_dir3_leafn_buf_ops;
 		if (leafhdr.count > xfs_dir3_max_leaf_ents(mp, leaf) ||
 		    leafhdr.count < leafhdr.stale) {
 			do_warn(
@@ -2039,11 +2091,13 @@ longform_dir2_check_node(
 		next_da_bno = da_bno + mp->m_dirblkfsbs - 1;
 		if (bmap_next_offset(NULL, ip, &next_da_bno, XFS_DATA_FORK))
 			break;
-		if (libxfs_da_read_buf(NULL, ip, da_bno, -1, &bp,
-				XFS_DATA_FORK, &xfs_dir3_free_buf_ops)) {
+
+		error = dir_read_buf(ip, da_bno, -1, &bp,
+				     &xfs_dir3_free_buf_ops, &fixit);
+		if (error) {
 			do_warn(
-	_("can't read freespace block %u for directory inode %" PRIu64 "\n"),
-				da_bno, ip->i_ino);
+	_("can't read freespace block %u for directory inode %" PRIu64 ", error %d\n"),
+				da_bno, ip->i_ino, error);
 			return 1;
 		}
 		free = bp->b_addr;
@@ -2093,7 +2147,7 @@ longform_dir2_check_node(
 			return 1;
 		}
 	}
-	return 0;
+	return fixit;
 }
 
 /*
@@ -2148,6 +2202,7 @@ longform_dir2_entry_check(xfs_mount_t	*mp,
 	     next_da_bno != NULLFILEOFF && da_bno < mp->m_dirleafblk;
 	     da_bno = (xfs_dablk_t)next_da_bno) {
 		const struct xfs_buf_ops *ops;
+		int			 error;
 
 		next_da_bno = da_bno + mp->m_dirblkfsbs - 1;
 		if (bmap_next_offset(NULL, ip, &next_da_bno, XFS_DATA_FORK))
@@ -2167,11 +2222,12 @@ longform_dir2_entry_check(xfs_mount_t	*mp,
 			ops = &xfs_dir3_block_buf_ops;
 		else
 			ops = &xfs_dir3_data_buf_ops;
-		if (libxfs_da_read_buf(NULL, ip, da_bno, -1, &bplist[db],
-				       XFS_DATA_FORK, ops)) {
+
+		error = dir_read_buf(ip, da_bno, -1, &bplist[db], ops, &fixit);
+		if (error) {
 			do_warn(
-	_("can't read data block %u for directory inode %" PRIu64 "\n"),
-				da_bno, ino);
+	_("can't read data block %u for directory inode %" PRIu64 " error %d\n"),
+				da_bno, ino, error);
 			*num_illegal += 1;
 
 			/*
@@ -2189,7 +2245,7 @@ longform_dir2_entry_check(xfs_mount_t	*mp,
 				irec, ino_offset, &bplist[db], hashtab,
 				&freetab, da_bno, isblock);
 	}
-	fixit = (*num_illegal != 0) || dir2_is_badino(ino) || *need_dot;
+	fixit |= (*num_illegal != 0) || dir2_is_badino(ino) || *need_dot;
 
 	if (!dotdot_update) {
 		/* check btree and freespace */
-- 
1.9.0

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 5/9] repair: detect CRC errors in AG headers
  2014-04-24  5:01 [PATCH 0/9 V2] xfs_db, xfs_repair: improve CRC error detection Dave Chinner
                   ` (3 preceding siblings ...)
  2014-04-24  5:01 ` [PATCH 4/9] repair: detect and correct CRC errors in directory blocks Dave Chinner
@ 2014-04-24  5:01 ` Dave Chinner
  2014-04-25  5:55   ` Christoph Hellwig
  2014-04-24  5:01 ` [PATCH 6/9] repair: report AG btree verifier errors Dave Chinner
                   ` (3 subsequent siblings)
  8 siblings, 1 reply; 28+ messages in thread
From: Dave Chinner @ 2014-04-24  5:01 UTC (permalink / raw)
  To: xfs

From: Dave Chinner <dchinner@redhat.com>

repair doesn't currently detect verifier errors in AG header
blocks - apart from the primary superblock they are not detected.
They are, fortunately, corrected in the important cases (AGF, AGI
and AGFL) because these structures are rebuilt in phase 5, but if
you run xfs_repair in checking mode it won't report them as bad.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 repair/scan.c | 81 ++++++++++++++++++++++++++++++++++++-----------------------
 1 file changed, 49 insertions(+), 32 deletions(-)

diff --git a/repair/scan.c b/repair/scan.c
index 1744c32..d022723 100644
--- a/repair/scan.c
+++ b/repair/scan.c
@@ -1207,28 +1207,29 @@ scan_ag(
 	void		*arg)
 {
 	struct aghdr_cnts *agcnts = arg;
-	xfs_agf_t	*agf;
-	xfs_buf_t	*agfbuf;
+	struct xfs_agf	*agf;
+	struct xfs_buf	*agfbuf = NULL;
 	int		agf_dirty = 0;
-	xfs_agi_t	*agi;
-	xfs_buf_t	*agibuf;
+	struct xfs_agi	*agi;
+	struct xfs_buf	*agibuf = NULL;
 	int		agi_dirty = 0;
-	xfs_sb_t	*sb;
-	xfs_buf_t	*sbbuf;
+	struct xfs_sb	*sb = NULL;
+	struct xfs_buf	*sbbuf = NULL;
 	int		sb_dirty = 0;
 	int		status;
+	char		*objname = NULL;
 
 	sbbuf = libxfs_readbuf(mp->m_dev, XFS_AG_DADDR(mp, agno, XFS_SB_DADDR),
 				XFS_FSS_TO_BB(mp, 1), 0, &xfs_sb_buf_ops);
 	if (!sbbuf)  {
-		do_error(_("can't get root superblock for ag %d\n"), agno);
-		return;
+		objname = _("root superblock");
+		goto out_free;
 	}
+
 	sb = (xfs_sb_t *)calloc(BBSIZE, 1);
 	if (!sb) {
 		do_error(_("can't allocate memory for superblock\n"));
-		libxfs_putbuf(sbbuf);
-		return;
+		goto out_free;
 	}
 	libxfs_sb_from_disk(sb, XFS_BUF_TO_SBP(sbbuf));
 
@@ -1236,10 +1237,8 @@ scan_ag(
 			XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
 			XFS_FSS_TO_BB(mp, 1), 0, &xfs_agf_buf_ops);
 	if (!agfbuf)  {
-		do_error(_("can't read agf block for ag %d\n"), agno);
-		libxfs_putbuf(sbbuf);
-		free(sb);
-		return;
+		objname = _("agf block");
+		goto out_free;
 	}
 	agf = XFS_BUF_TO_AGF(agfbuf);
 
@@ -1247,11 +1246,8 @@ scan_ag(
 			XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
 			XFS_FSS_TO_BB(mp, 1), 0, &xfs_agi_buf_ops);
 	if (!agibuf)  {
-		do_error(_("can't read agi block for ag %d\n"), agno);
-		libxfs_putbuf(agfbuf);
-		libxfs_putbuf(sbbuf);
-		free(sb);
-		return;
+		objname = _("agi block");
+		goto out_free;
 	}
 	agi = XFS_BUF_TO_AGI(agibuf);
 
@@ -1277,7 +1273,7 @@ scan_ag(
 			do_warn(_("would reset bad sb for ag %d\n"), agno);
 		}
 	}
-	if (status & XR_AG_AGF)  {
+	if (agf_dirty || status & XR_AG_AGF)  {
 		if (!no_modify) {
 			do_warn(_("reset bad agf for ag %d\n"), agno);
 			agf_dirty = 1;
@@ -1285,7 +1281,7 @@ scan_ag(
 			do_warn(_("would reset bad agf for ag %d\n"), agno);
 		}
 	}
-	if (status & XR_AG_AGI)  {
+	if (agi_dirty || status & XR_AG_AGI)  {
 		if (!no_modify) {
 			do_warn(_("reset bad agi for ag %d\n"), agno);
 			agi_dirty = 1;
@@ -1295,15 +1291,9 @@ scan_ag(
 	}
 
 	if (status && no_modify)  {
-		libxfs_putbuf(agibuf);
-		libxfs_putbuf(agfbuf);
-		libxfs_putbuf(sbbuf);
-		free(sb);
-
 		do_warn(_("bad uncorrected agheader %d, skipping ag...\n"),
 			agno);
-
-		return;
+		goto out_free;
 	}
 
 	scan_freelist(agf, agcnts);
@@ -1312,21 +1302,34 @@ scan_ag(
 	validate_agi(agi, agno, agcnts);
 
 	ASSERT(agi_dirty == 0 || (agi_dirty && !no_modify));
+	ASSERT(agf_dirty == 0 || (agf_dirty && !no_modify));
+	ASSERT(sb_dirty == 0 || (sb_dirty && !no_modify));
+
+	/*
+	 * Only pay attention to CRC/verifier errors if we can correct them.
+	 * While there, ensure that we corrected a corruption error if the
+	 * verifier detected one.
+	 */
+	if (!no_modify) {
+		ASSERT(agi_dirty || agibuf->b_error != EFSCORRUPTED);
+		ASSERT(agf_dirty || agfbuf->b_error != EFSCORRUPTED);
+		ASSERT(sb_dirty || sbbuf->b_error != EFSCORRUPTED);
+
+		agi_dirty += (agibuf->b_error == EFSBADCRC);
+		agf_dirty += (agfbuf->b_error == EFSBADCRC);
+		sb_dirty += (sbbuf->b_error == EFSBADCRC);
+	}
 
 	if (agi_dirty && !no_modify)
 		libxfs_writebuf(agibuf, 0);
 	else
 		libxfs_putbuf(agibuf);
 
-	ASSERT(agf_dirty == 0 || (agf_dirty && !no_modify));
-
 	if (agf_dirty && !no_modify)
 		libxfs_writebuf(agfbuf, 0);
 	else
 		libxfs_putbuf(agfbuf);
 
-	ASSERT(sb_dirty == 0 || (sb_dirty && !no_modify));
-
 	if (sb_dirty && !no_modify) {
 		if (agno == 0)
 			memcpy(&mp->m_sb, sb, sizeof(xfs_sb_t));
@@ -1341,6 +1344,20 @@ scan_ag(
 	print_inode_list(i);
 #endif
 	return;
+
+out_free:
+	if (sb)
+		free(sb);
+	if (agibuf)
+		libxfs_putbuf(agibuf);
+	if (agfbuf)
+		libxfs_putbuf(agfbuf);
+	if (sbbuf)
+		libxfs_putbuf(sbbuf);
+	if (objname)
+		do_error(_("can't get %s for ag %d\n"), objname, agno);
+	return;
+
 }
 
 #define SCAN_THREADS 32
-- 
1.9.0

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 6/9] repair: report AG btree verifier errors
  2014-04-24  5:01 [PATCH 0/9 V2] xfs_db, xfs_repair: improve CRC error detection Dave Chinner
                   ` (4 preceding siblings ...)
  2014-04-24  5:01 ` [PATCH 5/9] repair: detect CRC errors in AG headers Dave Chinner
@ 2014-04-24  5:01 ` Dave Chinner
  2014-04-25  5:55   ` Christoph Hellwig
  2014-04-24  5:02 ` [PATCH 7/9] repair: remove more dirv1 leftovers Dave Chinner
                   ` (2 subsequent siblings)
  8 siblings, 1 reply; 28+ messages in thread
From: Dave Chinner @ 2014-04-24  5:01 UTC (permalink / raw)
  To: xfs

From: Dave Chinner <dchinner@redhat.com>

When we scan the filesystem freespace and inode maps in phase 2, we
don't report CRC errors that are found. We don't really care from a
repair perspective, because the trees are completely rebuilt from
the ground up in phase 5, but froma checking perspective we need to
inform the user that we found inconsistencies.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 repair/scan.c | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/repair/scan.c b/repair/scan.c
index d022723..9cd0e7d 100644
--- a/repair/scan.c
+++ b/repair/scan.c
@@ -82,6 +82,12 @@ scan_sbtree(
 		do_error(_("can't read btree block %d/%d\n"), agno, root);
 		return;
 	}
+	if (bp->b_error == EFSBADCRC || bp->b_error == EFSCORRUPTED) {
+		do_warn(_("btree block %d/%d is suspect, error %d\n"),
+			agno, root, bp->b_error);
+		suspect = 1;
+	}
+
 	(*func)(XFS_BUF_TO_BLOCK(bp), nlevels - 1, root, agno, suspect,
 							isroot, magic, priv);
 	libxfs_putbuf(bp);
@@ -123,6 +129,7 @@ scan_lbtree(
 	xfs_buf_t	*bp;
 	int		err;
 	int		dirty = 0;
+	bool		badcrc = false;
 
 	bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, root),
 		      XFS_FSB_TO_BB(mp, 1), 0, ops);
@@ -132,6 +139,19 @@ scan_lbtree(
 			XFS_FSB_TO_AGBNO(mp, root));
 		return(1);
 	}
+
+	/*
+	 * only check for bad CRC here - caller will determine if there
+	 * is a corruption or not and whether it got corrected and so needs
+	 * writing back. CRC errors always imply we need to write the block.
+	 */
+	if (bp->b_error == EFSBADCRC) {
+		do_warn(_("btree block %d/%d is suspect, error %d\n"),
+			XFS_FSB_TO_AGNO(mp, root),
+			XFS_FSB_TO_AGBNO(mp, root), bp->b_error);
+		badcrc = true;
+	}
+
 	err = (*func)(XFS_BUF_TO_BLOCK(bp), nlevels - 1,
 			type, whichfork, root, ino, tot, nex, blkmapp,
 			bm_cursor, isroot, check_dups, &dirty,
@@ -139,7 +159,7 @@ scan_lbtree(
 
 	ASSERT(dirty == 0 || (dirty && !no_modify));
 
-	if (dirty && !no_modify)
+	if ((dirty || badcrc) && !no_modify)
 		libxfs_writebuf(bp, 0);
 	else
 		libxfs_putbuf(bp);
@@ -1066,6 +1086,9 @@ scan_freelist(
 		do_abort(_("can't read agfl block for ag %d\n"), agno);
 		return;
 	}
+	if (agflbuf->b_error == EFSBADCRC)
+		do_warn(_("agfl has bad CRC for ag %d\n"), agno);
+
 	freelist = XFS_BUF_TO_AGFL_BNO(mp, agflbuf);
 	i = be32_to_cpu(agf->agf_flfirst);
 
-- 
1.9.0

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 7/9] repair: remove more dirv1 leftovers
  2014-04-24  5:01 [PATCH 0/9 V2] xfs_db, xfs_repair: improve CRC error detection Dave Chinner
                   ` (5 preceding siblings ...)
  2014-04-24  5:01 ` [PATCH 6/9] repair: report AG btree verifier errors Dave Chinner
@ 2014-04-24  5:02 ` Dave Chinner
  2014-04-24  5:02 ` [PATCH 8/9] repair: handle remote symlink CRC errors Dave Chinner
  2014-04-24  5:02 ` [PATCH 9/9] repair: detect and handle attribute tree " Dave Chinner
  8 siblings, 0 replies; 28+ messages in thread
From: Dave Chinner @ 2014-04-24  5:02 UTC (permalink / raw)
  To: xfs

From: Dave Chinner <dchinner@redhat.com>

get_bmapi() and it's children were only called by dirv1 code. There
are no current callers, so remove them.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 repair/dinode.c | 239 --------------------------------------------------------
 repair/dinode.h |   6 --
 2 files changed, 245 deletions(-)

diff --git a/repair/dinode.c b/repair/dinode.c
index 48f17ac..b086bec 100644
--- a/repair/dinode.c
+++ b/repair/dinode.c
@@ -870,245 +870,6 @@ get_agino_buf(xfs_mount_t	 *mp,
 }
 
 /*
- * these next routines return the filesystem blockno of the
- * block containing the block "bno" in the file whose bmap
- * tree (or extent list) is rooted by "rootblock".
- *
- * the next routines are utility routines for the third
- * routine, get_bmapi().
- *
- * NOTE: getfunc_extlist only used by dirv1 checking code
- */
-static xfs_dfsbno_t
-getfunc_extlist(xfs_mount_t		*mp,
-		xfs_ino_t		ino,
-		xfs_dinode_t		*dip,
-		xfs_dfiloff_t		bno,
-		int			whichfork)
-{
-	xfs_bmbt_irec_t		irec;
-	xfs_dfsbno_t		final_fsbno = NULLDFSBNO;
-	xfs_bmbt_rec_t		*rootblock = (xfs_bmbt_rec_t *)
-						XFS_DFORK_PTR(dip, whichfork);
-	xfs_extnum_t		nextents = XFS_DFORK_NEXTENTS(dip, whichfork);
-	int			i;
-
-	for (i = 0; i < nextents; i++)  {
-		libxfs_bmbt_disk_get_all(rootblock + i, &irec);
-		if (irec.br_startoff <= bno &&
-				bno < irec.br_startoff + irec.br_blockcount) {
-			final_fsbno = bno - irec.br_startoff + irec.br_startblock;
-			break;
-		}
-	}
-
-	return(final_fsbno);
-}
-
-/*
- * NOTE: getfunc_btree only used by dirv1 checking code... 
- */
-static xfs_dfsbno_t
-getfunc_btree(xfs_mount_t		*mp,
-		xfs_ino_t		ino,
-		xfs_dinode_t		*dip,
-		xfs_dfiloff_t		bno,
-		int			whichfork)
-{
-	int			i;
-#ifdef DEBUG
-	int			prev_level;
-#endif
-	int			found;
-	int			numrecs;
-	xfs_bmbt_rec_t		*rec;
-	xfs_bmbt_irec_t		irec;
-	xfs_bmbt_ptr_t		*pp;
-	xfs_bmbt_key_t		*key;
-	xfs_bmdr_key_t		*rkey;
-	xfs_bmdr_ptr_t		*rp;
-	xfs_dfsbno_t		fsbno;
-	xfs_buf_t		*bp;
-	xfs_dfsbno_t		final_fsbno = NULLDFSBNO;
-	struct xfs_btree_block	*block;
-	xfs_bmdr_block_t	*rootblock = (xfs_bmdr_block_t *)
-						XFS_DFORK_PTR(dip, whichfork);
-
-	ASSERT(rootblock->bb_level != 0);
-	/*
-	 * deal with root block, it's got a slightly different
-	 * header structure than interior nodes.  We know that
-	 * a btree should have at least 2 levels otherwise it
-	 * would be an extent list.
-	 */
-	rkey = XFS_BMDR_KEY_ADDR(rootblock, 1);
-	rp = XFS_BMDR_PTR_ADDR(rootblock, 1,
-		xfs_bmdr_maxrecs(mp, XFS_DFORK_SIZE(dip, mp, whichfork), 1));
-	found = -1;
-	for (i = 0; i < be16_to_cpu(rootblock->bb_numrecs) - 1; i++) {
-		if (be64_to_cpu(rkey[i].br_startoff) <= bno && 
-				bno < be64_to_cpu(rkey[i + 1].br_startoff)) {
-			found = i;
-			break;
-		}
-	}
-	if (i == be16_to_cpu(rootblock->bb_numrecs) - 1 && 
-				bno >= be64_to_cpu(rkey[i].br_startoff))
-		found = i;
-
-	ASSERT(found != -1);
-
-	fsbno = be64_to_cpu(rp[found]);
-
-	ASSERT(verify_dfsbno(mp, fsbno));
-
-	bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, fsbno),
-				XFS_FSB_TO_BB(mp, 1), 0, NULL);
-	if (!bp) {
-		do_error(_("cannot read bmap block %" PRIu64 "\n"), fsbno);
-		return(NULLDFSBNO);
-	}
-	block = XFS_BUF_TO_BLOCK(bp);
-	numrecs = be16_to_cpu(block->bb_numrecs);
-
-	/*
-	 * ok, now traverse any interior btree nodes
-	 */
-#ifdef DEBUG
-	prev_level = be16_to_cpu(block->bb_level);
-#endif
-
-	while (be16_to_cpu(block->bb_level) > 0)  {
-#ifdef DEBUG
-		ASSERT(be16_to_cpu(block->bb_level) < prev_level);
-
-		prev_level = be16_to_cpu(block->bb_level);
-#endif
-		if (numrecs > mp->m_bmap_dmxr[1]) {
-			do_warn(
-_("# of bmap records in inode %" PRIu64 " exceeds max (%u, max - %u)\n"),
-				ino, numrecs,
-				mp->m_bmap_dmxr[1]);
-			libxfs_putbuf(bp);
-			return(NULLDFSBNO);
-		}
-		if (verbose && numrecs < mp->m_bmap_dmnr[1]) {
-			do_warn(
-_("- # of bmap records in inode %" PRIu64 " less than minimum (%u, min - %u), proceeding ...\n"),
-				ino, numrecs, mp->m_bmap_dmnr[1]);
-		}
-		key = XFS_BMBT_KEY_ADDR(mp, block, 1);
-		pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
-		for (found = -1, i = 0; i < numrecs - 1; i++) {
-			if (be64_to_cpu(key[i].br_startoff) <= bno && bno < 
-					be64_to_cpu(key[i + 1].br_startoff)) {
-				found = i;
-				break;
-			}
-		}
-		if (i == numrecs - 1 && bno >= be64_to_cpu(key[i].br_startoff))
-			found = i;
-
-		ASSERT(found != -1);
-		fsbno = be64_to_cpu(pp[found]);
-
-		ASSERT(verify_dfsbno(mp, fsbno));
-
-		/*
-		 * release current btree block and read in the
-		 * next btree block to be traversed
-		 */
-		libxfs_putbuf(bp);
-		bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, fsbno),
-					XFS_FSB_TO_BB(mp, 1), 0, NULL);
-		if (!bp) {
-			do_error(_("cannot read bmap block %" PRIu64 "\n"),
-				fsbno);
-			return(NULLDFSBNO);
-		}
-		block = XFS_BUF_TO_BLOCK(bp);
-		numrecs = be16_to_cpu(block->bb_numrecs);
-	}
-
-	/*
-	 * current block must be a leaf block
-	 */
-	ASSERT(be16_to_cpu(block->bb_level) == 0);
-	if (numrecs > mp->m_bmap_dmxr[0]) {
-		do_warn(
-_("# of bmap records in inode %" PRIu64 " greater than maximum (%u, max - %u)\n"),
-			ino, numrecs, mp->m_bmap_dmxr[0]);
-		libxfs_putbuf(bp);
-		return(NULLDFSBNO);
-	}
-	if (verbose && numrecs < mp->m_bmap_dmnr[0])
-		do_warn(
-_("- # of bmap records in inode %" PRIu64 " less than minimum (%u, min - %u), continuing...\n"),
-			ino, numrecs, mp->m_bmap_dmnr[0]);
-
-	rec = XFS_BMBT_REC_ADDR(mp, block, 1);
-	for (i = 0; i < numrecs; i++)  {
-		libxfs_bmbt_disk_get_all(rec + i, &irec);
-		if (irec.br_startoff <= bno &&
-				bno < irec.br_startoff + irec.br_blockcount) {
-			final_fsbno = bno - irec.br_startoff +
-							irec.br_startblock;
-			break;
-		}
-	}
-	libxfs_putbuf(bp);
-
-	if (final_fsbno == NULLDFSBNO)
-		do_warn(_("could not map block %" PRIu64 "\n"), bno);
-
-	return(final_fsbno);
-}
-
-/*
- * this could be smarter.  maybe we should have an open inode
- * routine that would get the inode buffer and return back
- * an inode handle.  I'm betting for the moment that this
- * is used only by the directory and attribute checking code
- * and that the avl tree find and buffer cache search are
- * relatively cheap.  If they're too expensive, we'll just
- * have to fix this and add an inode handle to the da btree
- * cursor.
- *
- * caller is responsible for checking doubly referenced blocks
- * and references to holes
- *
- * NOTE: get_bmapi only used by dirv1 checking code
- */
-xfs_dfsbno_t
-get_bmapi(xfs_mount_t *mp, xfs_dinode_t *dino_p,
-		xfs_ino_t ino_num, xfs_dfiloff_t bno, int whichfork)
-{
-	xfs_dfsbno_t		fsbno;
-
-	switch (XFS_DFORK_FORMAT(dino_p, whichfork)) {
-	case XFS_DINODE_FMT_EXTENTS:
-		fsbno = getfunc_extlist(mp, ino_num, dino_p, bno, whichfork);
-		break;
-	case XFS_DINODE_FMT_BTREE:
-		fsbno = getfunc_btree(mp, ino_num, dino_p, bno, whichfork);
-		break;
-	case XFS_DINODE_FMT_LOCAL:
-		do_error(_("get_bmapi() called for local inode %" PRIu64 "\n"),
-			ino_num);
-		fsbno = NULLDFSBNO;
-		break;
-	default:
-		/*
-		 * shouldn't happen
-		 */
-		do_error(_("bad inode format for inode %" PRIu64 "\n"), ino_num);
-		fsbno = NULLDFSBNO;
-	}
-
-	return(fsbno);
-}
-
-/*
  * higher level inode processing stuff starts here:
  * first, one utility routine for each type of inode
  */
diff --git a/repair/dinode.h b/repair/dinode.h
index 5ee51ca..80f3e4e 100644
--- a/repair/dinode.h
+++ b/repair/dinode.h
@@ -119,12 +119,6 @@ get_agino_buf(xfs_mount_t	*mp,
 		xfs_agino_t	agino,
 		xfs_dinode_t	**dipp);
 
-xfs_dfsbno_t
-get_bmapi(xfs_mount_t		*mp,
-		xfs_dinode_t	*dip,
-		xfs_ino_t	ino_num,
-		xfs_dfiloff_t	bno,
-		int             whichfork );
 
 void dinode_bmbt_translation_init(void);
 char * get_forkname(int whichfork);
-- 
1.9.0

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 8/9] repair: handle remote symlink CRC errors
  2014-04-24  5:01 [PATCH 0/9 V2] xfs_db, xfs_repair: improve CRC error detection Dave Chinner
                   ` (6 preceding siblings ...)
  2014-04-24  5:02 ` [PATCH 7/9] repair: remove more dirv1 leftovers Dave Chinner
@ 2014-04-24  5:02 ` Dave Chinner
  2014-04-25  6:01   ` Christoph Hellwig
  2014-04-24  5:02 ` [PATCH 9/9] repair: detect and handle attribute tree " Dave Chinner
  8 siblings, 1 reply; 28+ messages in thread
From: Dave Chinner @ 2014-04-24  5:02 UTC (permalink / raw)
  To: xfs

From: Dave Chinner <dchinner@redhat.com>

We can't really repair broken symlink buffer contents, but we can at
least warn about it and correct the CRC error so the symlink is
again readable.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 repair/dinode.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/repair/dinode.c b/repair/dinode.c
index b086bec..6360dba 100644
--- a/repair/dinode.c
+++ b/repair/dinode.c
@@ -1254,6 +1254,7 @@ process_symlink_remote(
 	while (pathlen > 0) {
 		int	blk_cnt = 1;
 		int	byte_cnt;
+		int	dirty = 0;
 
 		fsbno = blkmap_get(blkmap, i);
 		if (fsbno == NULLDFSBNO) {
@@ -1284,6 +1285,12 @@ _("cannot read inode %" PRIu64 ", file block %d, disk block %" PRIu64 "\n"),
 				lino, i, fsbno);
 			return 1;
 		}
+		if (bp->b_error == EFSBADCRC) {
+			do_warn(
+_("Bad symlink buffer CRC, block %" PRIu64 ", inode %" PRIu64 ".\n"
+  "Correcting CRC, but symlink may be bad.\n"), fsbno, lino);
+			dirty = 1;
+		}
 
 		byte_cnt = XFS_SYMLINK_BUF_SPACE(mp, byte_cnt);
 		byte_cnt = MIN(pathlen, byte_cnt);
@@ -1307,7 +1314,10 @@ _("bad symlink header ino %" PRIu64 ", file block %d, disk block %" PRIu64 "\n")
 		offset += byte_cnt;
 		i++;
 
-		libxfs_putbuf(bp);
+		if (dirty)
+			libxfs_writebuf(bp, 0);
+		else
+			libxfs_putbuf(bp);
 	}
 	return 0;
 }
-- 
1.9.0

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 9/9] repair: detect and handle attribute tree CRC errors
  2014-04-24  5:01 [PATCH 0/9 V2] xfs_db, xfs_repair: improve CRC error detection Dave Chinner
                   ` (7 preceding siblings ...)
  2014-04-24  5:02 ` [PATCH 8/9] repair: handle remote symlink CRC errors Dave Chinner
@ 2014-04-24  5:02 ` Dave Chinner
  8 siblings, 0 replies; 28+ messages in thread
From: Dave Chinner @ 2014-04-24  5:02 UTC (permalink / raw)
  To: xfs

From: Dave Chinner <dchinner@redhat.com>

Currently the attribute code will not detect and correct errors in
the attribute tree. It also fails to validate the CRCs and headers
on remote attribute blocks. Ensure that all the attribute blocks are
CRC checked and that the processing functions understand the correct
block formats for decoding.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 repair/attr_repair.c | 45 +++++++++++++++++++++++++++++++++++++--------
 1 file changed, 37 insertions(+), 8 deletions(-)

diff --git a/repair/attr_repair.c b/repair/attr_repair.c
index ba85ac2..9b57960 100644
--- a/repair/attr_repair.c
+++ b/repair/attr_repair.c
@@ -604,6 +604,7 @@ verify_da_path(xfs_mount_t	*mp,
 			libxfs_putbuf(bp);
 			return(1);
 		}
+
 		/*
 		 * update cursor, write out the *current* level if
 		 * required.  don't write out the descendant level
@@ -615,6 +616,8 @@ verify_da_path(xfs_mount_t	*mp,
 			libxfs_writebuf(cursor->level[this_level].bp, 0);
 		else
 			libxfs_putbuf(cursor->level[this_level].bp);
+
+		/* switch cursor to point at the new buffer we just read */
 		cursor->level[this_level].bp = bp;
 		cursor->level[this_level].dirty = 0;
 		cursor->level[this_level].bno = dabno;
@@ -624,6 +627,14 @@ verify_da_path(xfs_mount_t	*mp,
 		cursor->level[this_level].n = newnode;
 #endif
 		entry = cursor->level[this_level].index = 0;
+
+		/*
+		 * We want to rewrite the buffer a CRC error seeing as it
+		 * contains what appears to be a valid node block, but only if
+		 * we are fixing errors.
+		 */
+		if (bp->b_error == EFSBADCRC && !no_modify)
+			cursor->level[this_level].dirty++;
 	}
 	/*
 	 * ditto for block numbers
@@ -974,6 +985,10 @@ rmtval_get(xfs_mount_t *mp, xfs_ino_t ino, blkmap_t *blkmap,
 	xfs_dfsbno_t	bno;
 	xfs_buf_t	*bp;
 	int		clearit = 0, i = 0, length = 0, amountdone = 0;
+	int		hdrsize = 0;
+
+	if (xfs_sb_version_hascrc(&mp->m_sb))
+		hdrsize = sizeof(struct xfs_attr3_rmt_hdr);
 
 	/* ASSUMPTION: valuelen is a valid number, so use it for looping */
 	/* Note that valuelen is not a multiple of blocksize */
@@ -986,16 +1001,26 @@ rmtval_get(xfs_mount_t *mp, xfs_ino_t ino, blkmap_t *blkmap,
 			break;
 		}
 		bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, bno),
-				XFS_FSB_TO_BB(mp, 1), 0, NULL);
+				    XFS_FSB_TO_BB(mp, 1), 0,
+				    &xfs_attr3_rmt_buf_ops);
 		if (!bp) {
 			do_warn(
 	_("can't read remote block for attributes of inode %" PRIu64 "\n"), ino);
 			clearit = 1;
 			break;
 		}
+
+		if (bp->b_error == EFSBADCRC || bp->b_error == EFSCORRUPTED) {
+			do_warn(
+	_("Corrupt remote block for attributes of inode %" PRIu64 "\n"), ino);
+			clearit = 1;
+			break;
+		}
+
 		ASSERT(mp->m_sb.sb_blocksize == XFS_BUF_COUNT(bp));
-		length = MIN(XFS_BUF_COUNT(bp), valuelen - amountdone);
-		memmove(value, XFS_BUF_PTR(bp), length);
+
+		length = MIN(XFS_BUF_COUNT(bp) - hdrsize, valuelen - amountdone);
+		memmove(value, XFS_BUF_PTR(bp) + hdrsize, length);
 		amountdone += length;
 		value += length;
 		i++;
@@ -1143,7 +1168,6 @@ process_leaf_attr_block(
 
 	xfs_attr3_leaf_hdr_from_disk(&leafhdr, leaf);
 	clearit = usedbs = 0;
-	*repair = 0;
 	firstb = mp->m_sb.sb_blocksize;
 	stop = xfs_attr3_leaf_hdr_size(leaf);
 
@@ -1320,13 +1344,16 @@ process_leaf_attr_level(xfs_mount_t	*mp,
 		}
 
 		bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, dev_bno),
-					XFS_FSB_TO_BB(mp, 1), 0, NULL);
+				    XFS_FSB_TO_BB(mp, 1), 0,
+				    &xfs_attr3_leaf_buf_ops);
 		if (!bp) {
 			do_warn(
 	_("can't read file block %u (fsbno %" PRIu64 ") for attribute fork of inode %" PRIu64 "\n"),
 				da_bno, dev_bno, ino);
 			goto error_out;
 		}
+		if (bp->b_error == EFSBADCRC)
+			repair++;
 
 		leaf = bp->b_addr;
 		xfs_attr3_leaf_hdr_from_disk(&leafhdr, leaf);
@@ -1382,9 +1409,9 @@ process_leaf_attr_level(xfs_mount_t	*mp,
 
 		current_hashval = greatest_hashval;
 
-		if (repair && !no_modify) 
+		if (repair && !no_modify)
 			libxfs_writebuf(bp, 0);
-		else 
+		else
 			libxfs_putbuf(bp);
 	} while (da_bno != 0);
 
@@ -1512,6 +1539,8 @@ process_longform_attr(
 			ino);
 		return(1);
 	}
+	if (bp->b_error == EFSBADCRC)
+		(*repair)++;
 
 	/* verify leaf block */
 	leaf = (xfs_attr_leafblock_t *)XFS_BUF_PTR(bp);
@@ -1555,7 +1584,7 @@ process_longform_attr(
 	case XFS_DA_NODE_MAGIC:		/* btree-form attribute */
 	case XFS_DA3_NODE_MAGIC:
 		/* must do this now, to release block 0 before the traversal */
-		if (repairlinks) {
+		if (*repair || repairlinks) {
 			*repair = 1;
 			libxfs_writebuf(bp, 0);
 		} else
-- 
1.9.0

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH 2/9] db: verify buffer on type change
  2014-04-24  5:01 ` [PATCH 2/9] db: verify buffer on type change Dave Chinner
@ 2014-04-25  5:41   ` Christoph Hellwig
  0 siblings, 0 replies; 28+ messages in thread
From: Christoph Hellwig @ 2014-04-25  5:41 UTC (permalink / raw)
  To: Dave Chinner; +Cc: xfs

Looks good,

Reviewed-by: Christoph Hellwig <hch@lst.de>

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 3/9] repair: ensure prefetched buffers have CRCs validated
  2014-04-24  5:01 ` [PATCH 3/9] repair: ensure prefetched buffers have CRCs validated Dave Chinner
@ 2014-04-25  5:47   ` Christoph Hellwig
  0 siblings, 0 replies; 28+ messages in thread
From: Christoph Hellwig @ 2014-04-25  5:47 UTC (permalink / raw)
  To: Dave Chinner; +Cc: xfs

On Thu, Apr 24, 2014 at 03:01:56PM +1000, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
> 
> Prefetch currently does not do CRC validation when the IO completes
> due to the optimisation it performs and the fact that it does not
> know what the type of metadata into the buffer is supposed to be.
> Hence, mark all prefetched buffers as "suspect" so that when the
> end user tries to read it with a supplied validation function the
> validation is run even though the buffer was already in the cache.
> 
> Signed-off-by: Dave Chinner <dchinner@redhat.com>

Looks good, but a few minor nitpicks below:

> +		if (ops && (bp->b_flags & LIBXFS_B_UNCHECKED)) {
> +			bp->b_error = 0;
> +			bp->b_ops = ops;
> +			bp->b_ops->verify_read(bp);
> +			bp->b_flags &= ~LIBXFS_B_UNCHECKED;
> +		}

There's three copies of code in the previous and this patch, it probably
should go into a helper function.

> +	else if (bp->b_ops) {
>  		bp->b_ops->verify_read(bp);
> +		bp->b_flags &= ~LIBXFS_B_UNCHECKED;
> +	}

Same with this.

>  		bp->b_flags |= LIBXFS_B_UPTODATE;
>  		bp->b_flags &= ~(LIBXFS_B_DIRTY | LIBXFS_B_EXIT);
> +		bp->b_flags &= ~LIBXFS_B_UNCHECKED;

Any reason not to clear all three flags in a single line?

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 4/9] repair: detect and correct CRC errors in directory blocks
  2014-04-24  5:01 ` [PATCH 4/9] repair: detect and correct CRC errors in directory blocks Dave Chinner
@ 2014-04-25  5:47   ` Christoph Hellwig
  0 siblings, 0 replies; 28+ messages in thread
From: Christoph Hellwig @ 2014-04-25  5:47 UTC (permalink / raw)
  To: Dave Chinner; +Cc: xfs

Looks good,

Reviewed-by: Christoph Hellwig <hch@lst.de>

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 5/9] repair: detect CRC errors in AG headers
  2014-04-24  5:01 ` [PATCH 5/9] repair: detect CRC errors in AG headers Dave Chinner
@ 2014-04-25  5:55   ` Christoph Hellwig
  0 siblings, 0 replies; 28+ messages in thread
From: Christoph Hellwig @ 2014-04-25  5:55 UTC (permalink / raw)
  To: Dave Chinner; +Cc: xfs

>  	sb = (xfs_sb_t *)calloc(BBSIZE, 1);

If you already do various cosmetic changes I'd recommend removing the
useles case here as well.

> -	if (status & XR_AG_AGF)  {
> +	if (agf_dirty || status & XR_AG_AGF)  {

> -	if (status & XR_AG_AGI)  {
> +	if (agi_dirty || status & XR_AG_AGI)  {

I can't see how agf_dirty and agi_dirty would ever be set at this point.

> +out_free:
> +	if (sb)
> +		free(sb);
> +	if (agibuf)
> +		libxfs_putbuf(agibuf);
> +	if (agfbuf)
> +		libxfs_putbuf(agfbuf);
> +	if (sbbuf)
> +		libxfs_putbuf(sbbuf);
> +	if (objname)
> +		do_error(_("can't get %s for ag %d\n"), objname, agno);
> +	return;

No need for a return statement at the end of a void returning function.

Also any reason for not using one goto for each unwind step like we do
elsewhere instead of the if (!NULL) checks?

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 6/9] repair: report AG btree verifier errors
  2014-04-24  5:01 ` [PATCH 6/9] repair: report AG btree verifier errors Dave Chinner
@ 2014-04-25  5:55   ` Christoph Hellwig
  0 siblings, 0 replies; 28+ messages in thread
From: Christoph Hellwig @ 2014-04-25  5:55 UTC (permalink / raw)
  To: Dave Chinner; +Cc: xfs

Looks good,

Reviewed-by: Christoph Hellwig <hch@lst.de>

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 8/9] repair: handle remote symlink CRC errors
  2014-04-24  5:02 ` [PATCH 8/9] repair: handle remote symlink CRC errors Dave Chinner
@ 2014-04-25  6:01   ` Christoph Hellwig
  0 siblings, 0 replies; 28+ messages in thread
From: Christoph Hellwig @ 2014-04-25  6:01 UTC (permalink / raw)
  To: Dave Chinner; +Cc: xfs

On Thu, Apr 24, 2014 at 03:02:01PM +1000, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
> 
> We can't really repair broken symlink buffer contents, but we can at
> least warn about it and correct the CRC error so the symlink is
> again readable.

> +			do_warn(
> +_("Bad symlink buffer CRC, block %" PRIu64 ", inode %" PRIu64 ".\n"
> +  "Correcting CRC, but symlink may be bad.\n"), fsbno, lino);
> +			dirty = 1;

Can you use the badcrc variable name here like in a few other places?

> +		if (dirty)
> +			libxfs_writebuf(bp, 0);
> +		else
> +			libxfs_putbuf(bp);

This needs a no_modify check.

Hmm, given how often we have this pattern and how easy it is do get
wrong, maybe libxfs_writebuf should do the no_modify check for us can we
can get rid of all this code?  Or maybe at least in a repair_writebuf
wrapper?  Not saying it should go into this series of course.

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 5/9] repair: detect CRC errors in AG headers
  2014-04-28 21:04 [PATCH 0/9 v3] xfs_db, xfs_repair: improve CRC error detection Dave Chinner
@ 2014-04-28 21:04 ` Dave Chinner
  2014-04-29 14:06   ` Brian Foster
  2014-04-29 18:16   ` Christoph Hellwig
  0 siblings, 2 replies; 28+ messages in thread
From: Dave Chinner @ 2014-04-28 21:04 UTC (permalink / raw)
  To: xfs

From: Dave Chinner <dchinner@redhat.com>

repair doesn't currently detect verifier errors in AG header
blocks - apart from the primary superblock they are not detected.
They are, fortunately, corrected in the important cases (AGF, AGI
and AGFL) because these structures are rebuilt in phase 5, but if
you run xfs_repair in checking mode it won't report them as bad.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 repair/scan.c | 83 +++++++++++++++++++++++++++++++++++------------------------
 1 file changed, 49 insertions(+), 34 deletions(-)

diff --git a/repair/scan.c b/repair/scan.c
index 1744c32..dec84ed 100644
--- a/repair/scan.c
+++ b/repair/scan.c
@@ -1207,39 +1207,38 @@ scan_ag(
 	void		*arg)
 {
 	struct aghdr_cnts *agcnts = arg;
-	xfs_agf_t	*agf;
-	xfs_buf_t	*agfbuf;
+	struct xfs_agf	*agf;
+	struct xfs_buf	*agfbuf = NULL;
 	int		agf_dirty = 0;
-	xfs_agi_t	*agi;
-	xfs_buf_t	*agibuf;
+	struct xfs_agi	*agi;
+	struct xfs_buf	*agibuf = NULL;
 	int		agi_dirty = 0;
-	xfs_sb_t	*sb;
-	xfs_buf_t	*sbbuf;
+	struct xfs_sb	*sb = NULL;
+	struct xfs_buf	*sbbuf = NULL;
 	int		sb_dirty = 0;
 	int		status;
+	char		*objname = NULL;
 
-	sbbuf = libxfs_readbuf(mp->m_dev, XFS_AG_DADDR(mp, agno, XFS_SB_DADDR),
-				XFS_FSS_TO_BB(mp, 1), 0, &xfs_sb_buf_ops);
-	if (!sbbuf)  {
-		do_error(_("can't get root superblock for ag %d\n"), agno);
-		return;
-	}
-	sb = (xfs_sb_t *)calloc(BBSIZE, 1);
+	sb = (struct xfs_sb *)calloc(BBSIZE, 1);
 	if (!sb) {
 		do_error(_("can't allocate memory for superblock\n"));
-		libxfs_putbuf(sbbuf);
 		return;
 	}
+
+	sbbuf = libxfs_readbuf(mp->m_dev, XFS_AG_DADDR(mp, agno, XFS_SB_DADDR),
+				XFS_FSS_TO_BB(mp, 1), 0, &xfs_sb_buf_ops);
+	if (!sbbuf)  {
+		objname = _("root superblock");
+		goto out_free_sb;
+	}
 	libxfs_sb_from_disk(sb, XFS_BUF_TO_SBP(sbbuf));
 
 	agfbuf = libxfs_readbuf(mp->m_dev,
 			XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
 			XFS_FSS_TO_BB(mp, 1), 0, &xfs_agf_buf_ops);
 	if (!agfbuf)  {
-		do_error(_("can't read agf block for ag %d\n"), agno);
-		libxfs_putbuf(sbbuf);
-		free(sb);
-		return;
+		objname = _("agf block");
+		goto out_free_sbbuf;
 	}
 	agf = XFS_BUF_TO_AGF(agfbuf);
 
@@ -1247,11 +1246,8 @@ scan_ag(
 			XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
 			XFS_FSS_TO_BB(mp, 1), 0, &xfs_agi_buf_ops);
 	if (!agibuf)  {
-		do_error(_("can't read agi block for ag %d\n"), agno);
-		libxfs_putbuf(agfbuf);
-		libxfs_putbuf(sbbuf);
-		free(sb);
-		return;
+		objname = _("agi block");
+		goto out_free_agfbuf;
 	}
 	agi = XFS_BUF_TO_AGI(agibuf);
 
@@ -1295,15 +1291,9 @@ scan_ag(
 	}
 
 	if (status && no_modify)  {
-		libxfs_putbuf(agibuf);
-		libxfs_putbuf(agfbuf);
-		libxfs_putbuf(sbbuf);
-		free(sb);
-
 		do_warn(_("bad uncorrected agheader %d, skipping ag...\n"),
 			agno);
-
-		return;
+		goto out_free_agibuf;
 	}
 
 	scan_freelist(agf, agcnts);
@@ -1312,21 +1302,34 @@ scan_ag(
 	validate_agi(agi, agno, agcnts);
 
 	ASSERT(agi_dirty == 0 || (agi_dirty && !no_modify));
+	ASSERT(agf_dirty == 0 || (agf_dirty && !no_modify));
+	ASSERT(sb_dirty == 0 || (sb_dirty && !no_modify));
+
+	/*
+	 * Only pay attention to CRC/verifier errors if we can correct them.
+	 * While there, ensure that we corrected a corruption error if the
+	 * verifier detected one.
+	 */
+	if (!no_modify) {
+		ASSERT(agi_dirty || agibuf->b_error != EFSCORRUPTED);
+		ASSERT(agf_dirty || agfbuf->b_error != EFSCORRUPTED);
+		ASSERT(sb_dirty || sbbuf->b_error != EFSCORRUPTED);
+
+		agi_dirty += (agibuf->b_error == EFSBADCRC);
+		agf_dirty += (agfbuf->b_error == EFSBADCRC);
+		sb_dirty += (sbbuf->b_error == EFSBADCRC);
+	}
 
 	if (agi_dirty && !no_modify)
 		libxfs_writebuf(agibuf, 0);
 	else
 		libxfs_putbuf(agibuf);
 
-	ASSERT(agf_dirty == 0 || (agf_dirty && !no_modify));
-
 	if (agf_dirty && !no_modify)
 		libxfs_writebuf(agfbuf, 0);
 	else
 		libxfs_putbuf(agfbuf);
 
-	ASSERT(sb_dirty == 0 || (sb_dirty && !no_modify));
-
 	if (sb_dirty && !no_modify) {
 		if (agno == 0)
 			memcpy(&mp->m_sb, sb, sizeof(xfs_sb_t));
@@ -1341,6 +1344,18 @@ scan_ag(
 	print_inode_list(i);
 #endif
 	return;
+
+out_free_agibuf:
+	libxfs_putbuf(agibuf);
+out_free_agfbuf:
+	libxfs_putbuf(agfbuf);
+out_free_sbbuf:
+	libxfs_putbuf(sbbuf);
+out_free_sb:
+	free(sb);
+
+	if (objname)
+		do_error(_("can't get %s for ag %d\n"), objname, agno);
 }
 
 #define SCAN_THREADS 32
-- 
1.9.0

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH 5/9] repair: detect CRC errors in AG headers
  2014-04-28 21:04 ` [PATCH 5/9] repair: detect CRC errors in AG headers Dave Chinner
@ 2014-04-29 14:06   ` Brian Foster
  2014-05-01 23:27     ` Dave Chinner
  2014-04-29 18:16   ` Christoph Hellwig
  1 sibling, 1 reply; 28+ messages in thread
From: Brian Foster @ 2014-04-29 14:06 UTC (permalink / raw)
  To: Dave Chinner; +Cc: xfs

On Tue, Apr 29, 2014 at 07:04:55AM +1000, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
> 
> repair doesn't currently detect verifier errors in AG header
> blocks - apart from the primary superblock they are not detected.
> They are, fortunately, corrected in the important cases (AGF, AGI
> and AGFL) because these structures are rebuilt in phase 5, but if
> you run xfs_repair in checking mode it won't report them as bad.
> 
> Signed-off-by: Dave Chinner <dchinner@redhat.com>
> ---
>  repair/scan.c | 83 +++++++++++++++++++++++++++++++++++------------------------
>  1 file changed, 49 insertions(+), 34 deletions(-)
> 
> diff --git a/repair/scan.c b/repair/scan.c
> index 1744c32..dec84ed 100644
> --- a/repair/scan.c
> +++ b/repair/scan.c
> @@ -1207,39 +1207,38 @@ scan_ag(
>  	void		*arg)
>  {
>  	struct aghdr_cnts *agcnts = arg;
> -	xfs_agf_t	*agf;
> -	xfs_buf_t	*agfbuf;
> +	struct xfs_agf	*agf;
> +	struct xfs_buf	*agfbuf = NULL;
>  	int		agf_dirty = 0;
> -	xfs_agi_t	*agi;
> -	xfs_buf_t	*agibuf;
> +	struct xfs_agi	*agi;
> +	struct xfs_buf	*agibuf = NULL;
>  	int		agi_dirty = 0;
> -	xfs_sb_t	*sb;
> -	xfs_buf_t	*sbbuf;
> +	struct xfs_sb	*sb = NULL;
> +	struct xfs_buf	*sbbuf = NULL;
>  	int		sb_dirty = 0;
>  	int		status;
> +	char		*objname = NULL;
>  
> -	sbbuf = libxfs_readbuf(mp->m_dev, XFS_AG_DADDR(mp, agno, XFS_SB_DADDR),
> -				XFS_FSS_TO_BB(mp, 1), 0, &xfs_sb_buf_ops);
> -	if (!sbbuf)  {
> -		do_error(_("can't get root superblock for ag %d\n"), agno);
> -		return;
> -	}
> -	sb = (xfs_sb_t *)calloc(BBSIZE, 1);
> +	sb = (struct xfs_sb *)calloc(BBSIZE, 1);
>  	if (!sb) {
>  		do_error(_("can't allocate memory for superblock\n"));
> -		libxfs_putbuf(sbbuf);
>  		return;
>  	}
> +
> +	sbbuf = libxfs_readbuf(mp->m_dev, XFS_AG_DADDR(mp, agno, XFS_SB_DADDR),
> +				XFS_FSS_TO_BB(mp, 1), 0, &xfs_sb_buf_ops);
> +	if (!sbbuf)  {
> +		objname = _("root superblock");
> +		goto out_free_sb;
> +	}
>  	libxfs_sb_from_disk(sb, XFS_BUF_TO_SBP(sbbuf));
>  
>  	agfbuf = libxfs_readbuf(mp->m_dev,
>  			XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
>  			XFS_FSS_TO_BB(mp, 1), 0, &xfs_agf_buf_ops);
>  	if (!agfbuf)  {
> -		do_error(_("can't read agf block for ag %d\n"), agno);
> -		libxfs_putbuf(sbbuf);
> -		free(sb);
> -		return;
> +		objname = _("agf block");
> +		goto out_free_sbbuf;
>  	}
>  	agf = XFS_BUF_TO_AGF(agfbuf);
>  
> @@ -1247,11 +1246,8 @@ scan_ag(
>  			XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
>  			XFS_FSS_TO_BB(mp, 1), 0, &xfs_agi_buf_ops);
>  	if (!agibuf)  {
> -		do_error(_("can't read agi block for ag %d\n"), agno);
> -		libxfs_putbuf(agfbuf);
> -		libxfs_putbuf(sbbuf);
> -		free(sb);
> -		return;
> +		objname = _("agi block");
> +		goto out_free_agfbuf;
>  	}
>  	agi = XFS_BUF_TO_AGI(agibuf);
>  
> @@ -1295,15 +1291,9 @@ scan_ag(
>  	}
>  
>  	if (status && no_modify)  {
> -		libxfs_putbuf(agibuf);
> -		libxfs_putbuf(agfbuf);
> -		libxfs_putbuf(sbbuf);
> -		free(sb);
> -
>  		do_warn(_("bad uncorrected agheader %d, skipping ag...\n"),
>  			agno);
> -
> -		return;
> +		goto out_free_agibuf;
>  	}
>  
>  	scan_freelist(agf, agcnts);
> @@ -1312,21 +1302,34 @@ scan_ag(
>  	validate_agi(agi, agno, agcnts);
>  
>  	ASSERT(agi_dirty == 0 || (agi_dirty && !no_modify));
> +	ASSERT(agf_dirty == 0 || (agf_dirty && !no_modify));
> +	ASSERT(sb_dirty == 0 || (sb_dirty && !no_modify));
> +
> +	/*
> +	 * Only pay attention to CRC/verifier errors if we can correct them.
> +	 * While there, ensure that we corrected a corruption error if the
> +	 * verifier detected one.
> +	 */
> +	if (!no_modify) {
> +		ASSERT(agi_dirty || agibuf->b_error != EFSCORRUPTED);
> +		ASSERT(agf_dirty || agfbuf->b_error != EFSCORRUPTED);
> +		ASSERT(sb_dirty || sbbuf->b_error != EFSCORRUPTED);
> +
> +		agi_dirty += (agibuf->b_error == EFSBADCRC);
> +		agf_dirty += (agfbuf->b_error == EFSBADCRC);
> +		sb_dirty += (sbbuf->b_error == EFSBADCRC);
> +	}

So we'll detect and correct the CRC error in normal mode, but no longer
issue the preceding warnings ("would reset bad ...") for CRC errors in
no_modify mode. Is that desired?

I ask because it looks like a departure from previous versions.
Otherwise, the code looks fine to me.

Brian

>  
>  	if (agi_dirty && !no_modify)
>  		libxfs_writebuf(agibuf, 0);
>  	else
>  		libxfs_putbuf(agibuf);
>  
> -	ASSERT(agf_dirty == 0 || (agf_dirty && !no_modify));
> -
>  	if (agf_dirty && !no_modify)
>  		libxfs_writebuf(agfbuf, 0);
>  	else
>  		libxfs_putbuf(agfbuf);
>  
> -	ASSERT(sb_dirty == 0 || (sb_dirty && !no_modify));
> -
>  	if (sb_dirty && !no_modify) {
>  		if (agno == 0)
>  			memcpy(&mp->m_sb, sb, sizeof(xfs_sb_t));
> @@ -1341,6 +1344,18 @@ scan_ag(
>  	print_inode_list(i);
>  #endif
>  	return;
> +
> +out_free_agibuf:
> +	libxfs_putbuf(agibuf);
> +out_free_agfbuf:
> +	libxfs_putbuf(agfbuf);
> +out_free_sbbuf:
> +	libxfs_putbuf(sbbuf);
> +out_free_sb:
> +	free(sb);
> +
> +	if (objname)
> +		do_error(_("can't get %s for ag %d\n"), objname, agno);
>  }
>  
>  #define SCAN_THREADS 32
> -- 
> 1.9.0
> 
> _______________________________________________
> xfs mailing list
> xfs@oss.sgi.com
> http://oss.sgi.com/mailman/listinfo/xfs

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 5/9] repair: detect CRC errors in AG headers
  2014-04-28 21:04 ` [PATCH 5/9] repair: detect CRC errors in AG headers Dave Chinner
  2014-04-29 14:06   ` Brian Foster
@ 2014-04-29 18:16   ` Christoph Hellwig
  1 sibling, 0 replies; 28+ messages in thread
From: Christoph Hellwig @ 2014-04-29 18:16 UTC (permalink / raw)
  To: Dave Chinner; +Cc: xfs

On Tue, Apr 29, 2014 at 07:04:55AM +1000, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
> 
> repair doesn't currently detect verifier errors in AG header
> blocks - apart from the primary superblock they are not detected.
> They are, fortunately, corrected in the important cases (AGF, AGI
> and AGFL) because these structures are rebuilt in phase 5, but if
> you run xfs_repair in checking mode it won't report them as bad.
> 
> Signed-off-by: Dave Chinner <dchinner@redhat.com>

Looks good,

Reviewed-by: Christoph Hellwig <hch@lst.de>

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 5/9] repair: detect CRC errors in AG headers
  2014-04-29 14:06   ` Brian Foster
@ 2014-05-01 23:27     ` Dave Chinner
  0 siblings, 0 replies; 28+ messages in thread
From: Dave Chinner @ 2014-05-01 23:27 UTC (permalink / raw)
  To: Brian Foster; +Cc: xfs

On Tue, Apr 29, 2014 at 10:06:19AM -0400, Brian Foster wrote:
> On Tue, Apr 29, 2014 at 07:04:55AM +1000, Dave Chinner wrote:
> > From: Dave Chinner <dchinner@redhat.com>
> > 
> > repair doesn't currently detect verifier errors in AG header
> > blocks - apart from the primary superblock they are not detected.
> > They are, fortunately, corrected in the important cases (AGF, AGI
> > and AGFL) because these structures are rebuilt in phase 5, but if
> > you run xfs_repair in checking mode it won't report them as bad.
> > 
> > Signed-off-by: Dave Chinner <dchinner@redhat.com>
.....
> > @@ -1312,21 +1302,34 @@ scan_ag(
> >  	validate_agi(agi, agno, agcnts);
> >  
> >  	ASSERT(agi_dirty == 0 || (agi_dirty && !no_modify));
> > +	ASSERT(agf_dirty == 0 || (agf_dirty && !no_modify));
> > +	ASSERT(sb_dirty == 0 || (sb_dirty && !no_modify));
> > +
> > +	/*
> > +	 * Only pay attention to CRC/verifier errors if we can correct them.
> > +	 * While there, ensure that we corrected a corruption error if the
> > +	 * verifier detected one.
> > +	 */
> > +	if (!no_modify) {
> > +		ASSERT(agi_dirty || agibuf->b_error != EFSCORRUPTED);
> > +		ASSERT(agf_dirty || agfbuf->b_error != EFSCORRUPTED);
> > +		ASSERT(sb_dirty || sbbuf->b_error != EFSCORRUPTED);
> > +
> > +		agi_dirty += (agibuf->b_error == EFSBADCRC);
> > +		agf_dirty += (agfbuf->b_error == EFSBADCRC);
> > +		sb_dirty += (sbbuf->b_error == EFSBADCRC);
> > +	}
> 
> So we'll detect and correct the CRC error in normal mode, but no longer
> issue the preceding warnings ("would reset bad ...") for CRC errors in
> no_modify mode. Is that desired?

It will still throw a bad CRC error, so the user is still told that
there is a problem.

> I ask because it looks like a departure from previous versions.
> Otherwise, the code looks fine to me.

Slightly, but I don't think it makes much difference...

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 28+ messages in thread

end of thread, other threads:[~2014-05-01 23:27 UTC | newest]

Thread overview: 28+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-04-24  5:01 [PATCH 0/9 V2] xfs_db, xfs_repair: improve CRC error detection Dave Chinner
2014-04-24  5:01 ` [PATCH 1/9] db: don't claim unchecked CRCs are correct Dave Chinner
2014-04-24  5:01 ` [PATCH 2/9] db: verify buffer on type change Dave Chinner
2014-04-25  5:41   ` Christoph Hellwig
2014-04-24  5:01 ` [PATCH 3/9] repair: ensure prefetched buffers have CRCs validated Dave Chinner
2014-04-25  5:47   ` Christoph Hellwig
2014-04-24  5:01 ` [PATCH 4/9] repair: detect and correct CRC errors in directory blocks Dave Chinner
2014-04-25  5:47   ` Christoph Hellwig
2014-04-24  5:01 ` [PATCH 5/9] repair: detect CRC errors in AG headers Dave Chinner
2014-04-25  5:55   ` Christoph Hellwig
2014-04-24  5:01 ` [PATCH 6/9] repair: report AG btree verifier errors Dave Chinner
2014-04-25  5:55   ` Christoph Hellwig
2014-04-24  5:02 ` [PATCH 7/9] repair: remove more dirv1 leftovers Dave Chinner
2014-04-24  5:02 ` [PATCH 8/9] repair: handle remote symlink CRC errors Dave Chinner
2014-04-25  6:01   ` Christoph Hellwig
2014-04-24  5:02 ` [PATCH 9/9] repair: detect and handle attribute tree " Dave Chinner
  -- strict thread matches above, loose matches on Subject: below --
2014-04-28 21:04 [PATCH 0/9 v3] xfs_db, xfs_repair: improve CRC error detection Dave Chinner
2014-04-28 21:04 ` [PATCH 5/9] repair: detect CRC errors in AG headers Dave Chinner
2014-04-29 14:06   ` Brian Foster
2014-05-01 23:27     ` Dave Chinner
2014-04-29 18:16   ` Christoph Hellwig
2014-04-15  8:24 [PATCH 0/9] xfs_db, xfs_repair: improve CRC error detection Dave Chinner
2014-04-15  8:24 ` [PATCH 5/9] repair: detect CRC errors in AG headers Dave Chinner
2014-04-15 19:40   ` Brian Foster
2014-04-15 21:52     ` Dave Chinner
2014-04-21  7:11   ` Christoph Hellwig
2014-04-21 23:35     ` Dave Chinner
2014-04-22  6:47       ` Christoph Hellwig
2014-04-22  9:10         ` Dave Chinner
2014-04-22  9:41           ` Christoph Hellwig

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).