REVIEW: zero existing secondary superblocks with mkfs.xfs -f

public inbox for linux-xfs@vger.kernel.org
 help / color / mirror / Atom feed

* REVIEW: zero existing secondary superblocks with mkfs.xfs -f
@ 2007-07-11  9:07 Barry Naujok
  2007-07-11 18:06 ` Eric Sandeen
  0 siblings, 1 reply; 4+ messages in thread
From: Barry Naujok @ 2007-07-11  9:07 UTC (permalink / raw)
  To: xfs@oss.sgi.com, xfs-dev

[-- Attachment #1: Type: text/plain, Size: 884 bytes --]

The attached patch zeros all the secondary superblocks if
overwriting an existing XFS filesystem.

This allows xfs_repair to detect the correct secondary block
if the primary is destroyed (like XFS QA test 030).

To demonstrate the bad behaviour, try the following:

# mkfs.xfs -f /dev/xxx
... [note agcount reported]
# dd if=/dev/zero of=/dev/xxx oflag=direct bs=512 count=1
# xfs_repair /dev/xxx
...
(it should recover and a second repair will succeed.)
# mkfs.xfs -f -d agcount=<smaller than above> /dev/xxx
...
# dd if=/dev/zero of=/dev/xxx oflag=direct bs=512 count=1
# xfs_repair /dev/xxx

It will either die or succeed (in my test, xfs_repair
actually crashed because it doesn't handle bad extents yet!).

If success, try and mount it and run:
# xfs_info /dev/xxx
(It should be the geometry of the first mkfs run.)

Try the patch and repeat - success.

[-- Attachment #2: mkfs_zero_old_ags.patch --]
[-- Type: application/octet-stream, Size: 7924 bytes --]

--- a/xfsprogs/mkfs/xfs_mkfs.c	2007-07-11 18:52:55.000000000 +1000
+++ b/xfsprogs/mkfs/xfs_mkfs.c	2007-07-11 18:51:41.484668802 +1000
@@ -412,7 +412,7 @@ calc_default_ag_geometry(
 	 * based on the prefered AG size, not vice-versa - the
 	 * count can be increased by growfs, so prefer to use
 	 * smaller counts at mkfs time.
-	 * 
+	 *
 	 * This scales us up smoothly between min/max AG sizes.
 	 */
 	if (dblocks > GIGABYTES(512, blocklog))
@@ -477,7 +477,7 @@ validate_ag_geometry(
 	_("too few allocation groups for size = %lld\n"), (long long)agsize);
 		fprintf(stderr,
 	_("need at least %lld allocation groups\n"),
-		(long long)(dblocks / XFS_AG_MAX_BLOCKS(blocklog) + 
+		(long long)(dblocks / XFS_AG_MAX_BLOCKS(blocklog) +
 			(dblocks % XFS_AG_MAX_BLOCKS(blocklog) != 0)));
 		usage();
 	}
@@ -506,6 +506,70 @@ validate_ag_geometry(
 	}
 }
 
+static void
+zero_old_xfs_structures(
+	libxfs_init_t		*xi,
+	int			sectsize)
+{
+	void 			*buf;
+	xfs_sb_t 		sb;
+	__uint32_t		bsize;
+	int			i;
+	xfs_off_t		off;
+
+	/*
+	 * read in existing filesystem superblock, use it's geometry
+	 * settings and zero the existing secondary superblocks.
+	 */
+	buf = memalign(libxfs_device_alignment(), sectsize);
+	if (!buf) {
+		fprintf(stderr,
+	_("error reading existing superblock -- failed to memalign buffer\n"));
+		return;
+	}
+	bzero(buf, sectsize);
+
+	if (pread(xi->dfd, buf, sectsize, 0) != sectsize) {
+		fprintf(stderr, _("existing superblock read failed: %s\n"),
+			strerror(errno));
+		free(buf);
+		return;
+	}
+	libxfs_xlate_sb(buf, &sb, 1, XFS_SB_ALL_BITS);
+
+	/*
+	 * perform same basic superblock validation to make sure we
+	 * actually zero secondary blocks
+	 */
+	if (sb.sb_magicnum != XFS_SB_MAGIC || sb.sb_blocksize == 0)
+		goto done;
+
+	for (bsize = 1, i = 0; bsize < sb.sb_blocksize &&
+			i < sizeof(sb.sb_blocksize) * NBBY; i++)
+		bsize <<= 1;
+
+	if (i < XFS_MIN_BLOCKSIZE_LOG || i > XFS_MAX_BLOCKSIZE_LOG ||
+			i != sb.sb_blocklog)
+		goto done;
+
+	if (sb.sb_dblocks > ((__uint64_t)sb.sb_agcount * sb.sb_agblocks) ||
+			sb.sb_dblocks < ((__uint64_t)(sb.sb_agcount - 1) *
+					 sb.sb_agblocks + XFS_MIN_AG_BLOCKS))
+		goto done;
+
+	/*
+	 * block size and basic geometry seems alright, zero the secondaries.
+	 */
+	bzero(buf, sectsize);
+	for (i = 1; i < sb.sb_agcount; i++)  {
+		off = (xfs_off_t)i * sb.sb_agblocks << sb.sb_blocklog;
+		if (pwrite(xi->dfd, buf, sectsize, off) == -1)
+			break;
+	}
+done:
+	free(buf);
+}
+
 int
 main(
 	int			argc,
@@ -1479,7 +1543,7 @@ main(
 			if (XFS_MIN_RTEXTSIZE <= rtextbytes &&
 			    (rtextbytes <= XFS_MAX_RTEXTSIZE)) {
 				rtextblocks = rswidth;
-			} 
+			}
 		}
 		if (!rtextblocks) {
 			rtextblocks = (blocksize < XFS_MIN_RTEXTSIZE) ?
@@ -1587,7 +1651,7 @@ main(
 	else if (!dsize) {
 		fprintf(stderr, _("can't get size of data subvolume\n"));
 		usage();
-	} 
+	}
 	if (dblocks < XFS_MIN_DATA_BLOCKS) {
 		fprintf(stderr,
 	_("size %lld of data subvolume is too small, minimum %d blocks\n"),
@@ -1661,7 +1725,7 @@ _("size %s specified for log subvolume i
 		logblocks = MAX(logblocks,
 				MAX(XFS_DFL_LOG_SIZE,
 					max_tr_res * XFS_DFL_LOG_FACTOR));
-		logblocks = MIN(logblocks, XFS_MAX_LOG_BLOCKS); 
+		logblocks = MIN(logblocks, XFS_MAX_LOG_BLOCKS);
 		if ((logblocks << blocklog) > XFS_MAX_LOG_BYTES) {
 			logblocks = XFS_MAX_LOG_BYTES >> blocklog;
 		}
@@ -1743,20 +1807,20 @@ _("size %s specified for log subvolume i
 	 * If dsunit is a multiple of fs blocksize, then check that is a
 	 * multiple of the agsize too
 	 */
-	if (dsunit && !(BBTOB(dsunit) % blocksize) && 
+	if (dsunit && !(BBTOB(dsunit) % blocksize) &&
 	    dswidth && !(BBTOB(dswidth) % blocksize)) {
 
 		/* convert from 512 byte blocks to fs blocksize */
 		dsunit = DTOBT(dsunit);
 		dswidth = DTOBT(dswidth);
 
-		/* 
+		/*
 		 * agsize is not a multiple of dsunit
 		 */
 		if ((agsize % dsunit) != 0) {
 			/*
-			 * Round up to stripe unit boundary. Also make sure 
-			 * that agsize is still larger than 
+			 * Round up to stripe unit boundary. Also make sure
+			 * that agsize is still larger than
 			 * XFS_AG_MIN_BLOCKS(blocklog)
 		 	 */
 			tmp_agsize = ((agsize + (dsunit - 1))/ dsunit) * dsunit;
@@ -1770,7 +1834,7 @@ _("size %s specified for log subvolume i
 			    (tmp_agsize <= XFS_AG_MAX_BLOCKS(blocklog)) &&
 			    !daflag) {
 				agsize = tmp_agsize;
-				agcount = dblocks/agsize + 
+				agcount = dblocks/agsize +
 						(dblocks % agsize != 0);
 				if (dasize || daflag)
 					fprintf(stderr,
@@ -1779,7 +1843,7 @@ _("size %s specified for log subvolume i
 			} else {
 				if (nodsflag) {
 					dsunit = dswidth = 0;
-				} else { 
+				} else {
 					fprintf(stderr,
 _("Allocation group size (%lld) is not a multiple of the stripe unit (%d)\n"),
 						(long long)agsize, dsunit);
@@ -1789,7 +1853,7 @@ _("Allocation group size (%lld) is not a
 		}
 		if (dswidth && ((agsize % dswidth) == 0) && (agcount > 1)) {
 			/* This is a non-optimal configuration because all AGs
-			 * start on the same disk in the stripe.  Changing 
+			 * start on the same disk in the stripe.  Changing
 			 * the AG size by one sunit will guarantee that this
 			 * does not happen.
 			 */
@@ -1826,12 +1890,12 @@ an AG size that is one stripe unit small
 	} else {
 		if (nodsflag)
 			dsunit = dswidth = 0;
-		else { 
+		else {
 			fprintf(stderr,
 				_("%s: Stripe unit(%d) or stripe width(%d) is "
 				"not a multiple of the block size(%d)\n"),
-				progname, BBTOB(dsunit), BBTOB(dswidth), 
-				blocksize); 	
+				progname, BBTOB(dsunit), BBTOB(dswidth),
+				blocksize);
 			exit(1);
 		}
 	}
@@ -2005,6 +2069,9 @@ an AG size that is one stripe unit small
 			(sectorsize != BBSIZE || lsectorsize != BBSIZE),
 			sbp->sb_features2 != 0);
 
+	if (force_overwrite)
+		zero_old_xfs_structures(&xi, sectorsize);
+
 	/*
 	 * Zero out the beginning of the device, to obliterate any old
 	 * filesystem signatures out there.  This should take care of
@@ -2039,7 +2106,7 @@ an AG size that is one stripe unit small
  	 * (MD sb is ~64k from the end, take out a wider swath to be sure)
 	 */
 	if (!xi.disfile) {
-		buf = libxfs_getbuf(xi.ddev, (xi.dsize - BTOBB(WHACK_SIZE)), 
+		buf = libxfs_getbuf(xi.ddev, (xi.dsize - BTOBB(WHACK_SIZE)),
 				    BTOBB(WHACK_SIZE));
 		bzero(XFS_BUF_PTR(buf), WHACK_SIZE);
 		libxfs_writebuf(buf, LIBXFS_EXIT_ON_FAILURE);
@@ -2101,7 +2168,7 @@ an AG size that is one stripe unit small
 		INT_SET(agf->agf_longest, ARCH_CONVERT, nbmblocks);
 		if (loginternal && agno == logagno) {
 			INT_MOD(agf->agf_freeblks, ARCH_CONVERT, -logblocks);
-			INT_SET(agf->agf_longest, ARCH_CONVERT, agsize - 
+			INT_SET(agf->agf_longest, ARCH_CONVERT, agsize -
 				XFS_FSB_TO_AGBNO(mp, logstart) - logblocks);
 		}
 		if (XFS_MIN_FREELIST(agf, mp) > worst_freelist)
@@ -2153,7 +2220,7 @@ an AG size that is one stripe unit small
 				 * Have to insert two records
 				 * Insert pad record for stripe align of log
 				 */
-				INT_SET(arec->ar_blockcount, ARCH_CONVERT, 
+				INT_SET(arec->ar_blockcount, ARCH_CONVERT,
 					(xfs_extlen_t)(XFS_FSB_TO_AGBNO(
 						mp, logstart)
 				  	- (INT_GET(arec->ar_startblock,
@@ -2169,12 +2236,12 @@ an AG size that is one stripe unit small
 						ARCH_CONVERT));
 				arec = nrec;
 				INT_MOD(block->bb_numrecs, ARCH_CONVERT, 1);
-			} 
+			}
 			/*
 			 * Change record start to after the internal log
 			 */
 			INT_MOD(arec->ar_startblock, ARCH_CONVERT, logblocks);
-		} 
+		}
 		INT_SET(arec->ar_blockcount, ARCH_CONVERT,
 			(xfs_extlen_t)(agsize -
 				INT_GET(arec->ar_startblock, ARCH_CONVERT)));
@@ -2212,7 +2279,7 @@ an AG size that is one stripe unit small
 				INT_MOD(block->bb_numrecs, ARCH_CONVERT, 1);
 			}
 			INT_MOD(arec->ar_startblock, ARCH_CONVERT, logblocks);
-		}	
+		}
 		INT_SET(arec->ar_blockcount, ARCH_CONVERT, (xfs_extlen_t)
 			(agsize - INT_GET(arec->ar_startblock, ARCH_CONVERT)));
 		libxfs_writebuf(buf, LIBXFS_EXIT_ON_FAILURE);

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: REVIEW: zero existing secondary superblocks with mkfs.xfs -f
  2007-07-11  9:07 REVIEW: zero existing secondary superblocks with mkfs.xfs -f Barry Naujok
@ 2007-07-11 18:06 ` Eric Sandeen
  2007-07-12  0:19   ` Barry Naujok
  0 siblings, 1 reply; 4+ messages in thread
From: Eric Sandeen @ 2007-07-11 18:06 UTC (permalink / raw)
  To: Barry Naujok; +Cc: xfs@oss.sgi.com, xfs-dev

Barry Naujok wrote:
> The attached patch zeros all the secondary superblocks if
> overwriting an existing XFS filesystem.
> 
> This allows xfs_repair to detect the correct secondary block
> if the primary is destroyed (like XFS QA test 030).

Seems good to me (after filtering out all of the quilt-induced
whitespace changes ;-) )

+	/*
+	 * perform same basic superblock validation to make sure we
+	 * actually zero secondary blocks
+	 */
+	if (sb.sb_magicnum != XFS_SB_MAGIC || sb.sb_blocksize == 0)
+		goto done;

Is there any chance we'd be here if the first test weren't already true?
 *shrug* harmless though I guess.

If something goes wrong and the old found SB is full of junk, this is
non-fatal, right.

Out of curiosity, why not just call verify_sb for the sanity checks
instead of recreating a subset of them in zero_old_xfs_structures?

Thanks,

-Eric

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: REVIEW: zero existing secondary superblocks with mkfs.xfs -f
  2007-07-11 18:06 ` Eric Sandeen
@ 2007-07-12  0:19   ` Barry Naujok
  2007-07-12  2:35     ` Eric Sandeen
  0 siblings, 1 reply; 4+ messages in thread
From: Barry Naujok @ 2007-07-12  0:19 UTC (permalink / raw)
  To: Eric Sandeen; +Cc: xfs@oss.sgi.com, xfs-dev

On Thu, 12 Jul 2007 04:06:02 +1000, Eric Sandeen <sandeen@sandeen.net>  
wrote:

> Barry Naujok wrote:
>> The attached patch zeros all the secondary superblocks if
>> overwriting an existing XFS filesystem.
>>
>> This allows xfs_repair to detect the correct secondary block
>> if the primary is destroyed (like XFS QA test 030).
>
> Seems good to me (after filtering out all of the quilt-induced
> whitespace changes ;-) )

Doh, forgot to prune that.

> +	/*
> +	 * perform same basic superblock validation to make sure we
> +	 * actually zero secondary blocks
> +	 */
> +	if (sb.sb_magicnum != XFS_SB_MAGIC || sb.sb_blocksize == 0)
> +		goto done;
>
> Is there any chance we'd be here if the first test weren't already true?
>  *shrug* harmless though I guess.

It's quite easy to get there if you run mkfs.xfs -f on non-XFS
filesystem.

> If something goes wrong and the old found SB is full of junk, this is
> non-fatal, right.

Yep. Worst that will happen is the pwrite loop fails, and if so, it
stops and exits. Maybe one subtle enhancement is to make sure the
pwrite loop doesn't extend beyond the new filesystem size.

> Out of curiosity, why not just call verify_sb for the sanity checks
> instead of recreating a subset of them in zero_old_xfs_structures?

Because that code is in xfs_repair and not mkfs. With mkfs.xfs, we
don't really care if anything else in the SB is bad.

Barry.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: REVIEW: zero existing secondary superblocks with mkfs.xfs -f
  2007-07-12  0:19   ` Barry Naujok
@ 2007-07-12  2:35     ` Eric Sandeen
  0 siblings, 0 replies; 4+ messages in thread
From: Eric Sandeen @ 2007-07-12  2:35 UTC (permalink / raw)
  To: Barry Naujok; +Cc: xfs@oss.sgi.com, xfs-dev

Barry Naujok wrote:
> On Thu, 12 Jul 2007 04:06:02 +1000, Eric Sandeen <sandeen@sandeen.net>  
> wrote:

>> +	if (sb.sb_magicnum != XFS_SB_MAGIC || sb.sb_blocksize == 0)
>> +		goto done;
>>
>> Is there any chance we'd be here if the first test weren't already true?
>>  *shrug* harmless though I guess.
> 
> It's quite easy to get there if you run mkfs.xfs -f on non-XFS
> filesystem.

Oh, duh.  Of course.

>> If something goes wrong and the old found SB is full of junk, this is
>> non-fatal, right.
> 
> Yep. Worst that will happen is the pwrite loop fails, and if so, it
> stops and exits. Maybe one subtle enhancement is to make sure the
> pwrite loop doesn't extend beyond the new filesystem size.
> 
>> Out of curiosity, why not just call verify_sb for the sanity checks
>> instead of recreating a subset of them in zero_old_xfs_structures?
> 
> Because that code is in xfs_repair and not mkfs. With mkfs.xfs, we
> don't really care if anything else in the SB is bad.

Maybe a libxfs candidate?  But, ok.  Just a thought.  :)

-Eric

> Barry.
> 

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2007-07-12  2:35 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-07-11  9:07 REVIEW: zero existing secondary superblocks with mkfs.xfs -f Barry Naujok
2007-07-11 18:06 ` Eric Sandeen
2007-07-12  0:19   ` Barry Naujok
2007-07-12  2:35     ` Eric Sandeen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox