public inbox for linux-xfs@vger.kernel.org
 help / color / mirror / Atom feed
From: Brian Foster <bfoster@redhat.com>
To: xfs@oss.sgi.com
Subject: [PATCH 03/28] xfs: support min/max agbno args in block allocator
Date: Tue,  2 Jun 2015 14:41:36 -0400	[thread overview]
Message-ID: <1433270521-62026-4-git-send-email-bfoster@redhat.com> (raw)
In-Reply-To: <1433270521-62026-1-git-send-email-bfoster@redhat.com>

The block allocator supports various arguments to tweak block allocation
behavior and set allocation requirements. The sparse inode chunk feature
introduces a new requirement not supported by the current arguments.
Sparse inode allocations must convert or merge into an inode record that
describes a fixed length chunk (64 inodes x inodesize). Full inode chunk
allocations by definition always result in valid inode records. Sparse
chunk allocations are smaller and the associated records can refer to
blocks not owned by the inode chunk. This model can result in invalid
inode records in certain cases.

For example, if a sparse allocation occurs near the start of an AG, the
aligned inode record for that chunk might refer to agbno 0. If an
allocation occurs towards the end of the AG and the AG size is not
aligned, the inode record could refer to blocks beyond the end of the
AG. While neither of these scenarios directly result in corruption, they
both insert invalid inode records and at minimum cause repair to
complain, are unlikely to merge into full chunks over time and set land
mines for other areas of code.

To guarantee sparse inode chunk allocation creates valid inode records,
support the ability to specify an agbno range limit for
XFS_ALLOCTYPE_NEAR_BNO block allocations. The min/max agbno's are
specified in the allocation arguments and limit the block allocation
algorithms to that range. The starting 'agbno' hint is clamped to the
range if the specified agbno is out of range. If no sufficient extent is
available within the range, the allocation fails. For backwards
compatibility, the min/max fields can be initialized to 0 to disable
range limiting (e.g., equivalent to min=0,max=agsize).

Signed-off-by: Brian Foster <bfoster@redhat.com>
---
 libxfs/xfs_alloc.c | 42 +++++++++++++++++++++++++++++++++++++-----
 libxfs/xfs_alloc.h |  2 ++
 2 files changed, 39 insertions(+), 5 deletions(-)

diff --git a/libxfs/xfs_alloc.c b/libxfs/xfs_alloc.c
index 23e3c53..5d4f094 100644
--- a/libxfs/xfs_alloc.c
+++ b/libxfs/xfs_alloc.c
@@ -145,13 +145,27 @@ xfs_alloc_compute_aligned(
 {
 	xfs_agblock_t	bno;
 	xfs_extlen_t	len;
+	xfs_extlen_t	diff;
 
 	/* Trim busy sections out of found extent */
 	xfs_extent_busy_trim(args, foundbno, foundlen, &bno, &len);
 
+	/*
+	 * If we have a largish extent that happens to start before min_agbno,
+	 * see if we can shift it into range...
+	 */
+	if (bno < args->min_agbno && bno + len > args->min_agbno) {
+		diff = args->min_agbno - bno;
+		if (len > diff) {
+			bno += diff;
+			len -= diff;
+		}
+	}
+
 	if (args->alignment > 1 && len >= args->minlen) {
 		xfs_agblock_t	aligned_bno = roundup(bno, args->alignment);
-		xfs_extlen_t	diff = aligned_bno - bno;
+
+		diff = aligned_bno - bno;
 
 		*resbno = aligned_bno;
 		*reslen = diff >= len ? 0 : len - diff;
@@ -791,9 +805,13 @@ xfs_alloc_find_best_extent(
 		 * The good extent is closer than this one.
 		 */
 		if (!dir) {
+			if (*sbnoa > args->max_agbno)
+				goto out_use_good;
 			if (*sbnoa >= args->agbno + gdiff)
 				goto out_use_good;
 		} else {
+			if (*sbnoa < args->min_agbno)
+				goto out_use_good;
 			if (*sbnoa <= args->agbno - gdiff)
 				goto out_use_good;
 		}
@@ -880,6 +898,17 @@ xfs_alloc_ag_vextent_near(
 	dofirst = prandom_u32() & 1;
 #endif
 
+	/* handle unitialized agbno range so caller doesn't have to */
+	if (!args->min_agbno && !args->max_agbno)
+		args->max_agbno = args->mp->m_sb.sb_agblocks - 1;
+	ASSERT(args->min_agbno <= args->max_agbno);
+
+	/* clamp agbno to the range if it's outside */
+	if (args->agbno < args->min_agbno)
+		args->agbno = args->min_agbno;
+	if (args->agbno > args->max_agbno)
+		args->agbno = args->max_agbno;
+
 restart:
 	bno_cur_lt = NULL;
 	bno_cur_gt = NULL;
@@ -972,6 +1001,8 @@ restart:
 						  &ltbnoa, &ltlena);
 			if (ltlena < args->minlen)
 				continue;
+			if (ltbnoa < args->min_agbno || ltbnoa > args->max_agbno)
+				continue;
 			args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
 			xfs_alloc_fix_len(args);
 			ASSERT(args->len >= args->minlen);
@@ -1092,11 +1123,11 @@ restart:
 			XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
 			xfs_alloc_compute_aligned(args, ltbno, ltlen,
 						  &ltbnoa, &ltlena);
-			if (ltlena >= args->minlen)
+			if (ltlena >= args->minlen && ltbnoa >= args->min_agbno)
 				break;
 			if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i)))
 				goto error0;
-			if (!i) {
+			if (!i || ltbnoa < args->min_agbno) {
 				xfs_btree_del_cursor(bno_cur_lt,
 						     XFS_BTREE_NOERROR);
 				bno_cur_lt = NULL;
@@ -1108,11 +1139,11 @@ restart:
 			XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
 			xfs_alloc_compute_aligned(args, gtbno, gtlen,
 						  &gtbnoa, &gtlena);
-			if (gtlena >= args->minlen)
+			if (gtlena >= args->minlen && gtbnoa <= args->max_agbno)
 				break;
 			if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
 				goto error0;
-			if (!i) {
+			if (!i || gtbnoa > args->max_agbno) {
 				xfs_btree_del_cursor(bno_cur_gt,
 						     XFS_BTREE_NOERROR);
 				bno_cur_gt = NULL;
@@ -1212,6 +1243,7 @@ restart:
 	ASSERT(ltnew >= ltbno);
 	ASSERT(ltnew + rlen <= ltbnoa + ltlena);
 	ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
+	ASSERT(ltnew >= args->min_agbno && ltnew <= args->max_agbno);
 	args->agbno = ltnew;
 
 	if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
diff --git a/libxfs/xfs_alloc.h b/libxfs/xfs_alloc.h
index db5da4a..fbe383f 100644
--- a/libxfs/xfs_alloc.h
+++ b/libxfs/xfs_alloc.h
@@ -112,6 +112,8 @@ typedef struct xfs_alloc_arg {
 	xfs_extlen_t	total;		/* total blocks needed in xaction */
 	xfs_extlen_t	alignment;	/* align answer to multiple of this */
 	xfs_extlen_t	minalignslop;	/* slop for minlen+alignment calcs */
+	xfs_agblock_t	min_agbno;	/* set an agbno range for NEAR allocs */
+	xfs_agblock_t	max_agbno;	/* ... */
 	xfs_extlen_t	len;		/* output: actual size of extent */
 	xfs_alloctype_t	type;		/* allocation type XFS_ALLOCTYPE_... */
 	xfs_alloctype_t	otype;		/* original allocation type */
-- 
1.9.3

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

  parent reply	other threads:[~2015-06-02 18:42 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-06-02 18:41 [PATCH 00/28] xfsprogs: sparse inode chunks Brian Foster
2015-06-02 18:41 ` [PATCH 01/28] xfs: create individual inode alloc. helper Brian Foster
2015-06-02 18:41 ` [PATCH 02/28] xfs: update free inode record logic to support sparse inode records Brian Foster
2015-06-02 18:41 ` Brian Foster [this message]
2015-06-02 18:41 ` [PATCH 04/28] xfs: add sparse inode chunk alignment superblock field Brian Foster
2015-06-02 18:41 ` [PATCH 05/28] xfs: use sparse chunk alignment for min. inode allocation requirement Brian Foster
2015-06-02 18:41 ` [PATCH 06/28] xfs: sparse inode chunks feature helpers and mount requirements Brian Foster
2015-06-02 18:41 ` [PATCH 07/28] xfs: add fs geometry bit for sparse inode chunks Brian Foster
2015-06-02 18:41 ` [PATCH 08/28] xfs: introduce inode record hole mask " Brian Foster
2015-06-02 18:41 ` [PATCH 09/28] xfs: pass inode count through ordered icreate log item Brian Foster
2015-06-02 18:41 ` [PATCH 10/28] xfs: enable sparse inode chunks for v5 superblocks Brian Foster
2015-06-02 18:41 ` [PATCH 11/28] mkfs: sparse inode chunk support Brian Foster
2015-06-02 18:41 ` [PATCH 12/28] db: support sparse inode chunk inobt record and sb fields Brian Foster
2015-06-02 18:41 ` [PATCH 13/28] db: show sparse inodes feature state in version command output Brian Foster
2015-06-02 18:41 ` [PATCH 14/28] growfs: display sparse inode status from xfs_info Brian Foster
2015-06-02 18:41 ` [PATCH 15/28] repair: handle sparse format inobt record freecount correctly Brian Foster
2015-06-05  0:53   ` Dave Chinner
2015-06-02 18:41 ` [PATCH 16/28] repair: remove duplicate field from aghdr_cnts Brian Foster
2015-06-02 18:41 ` [PATCH 17/28] repair: use ir_count for filesystems with sparse inode support Brian Foster
2015-06-02 18:41 ` [PATCH 18/28] repair: scan and track sparse inode chunks correctly Brian Foster
2015-06-05  0:56   ` Dave Chinner
2015-06-02 18:41 ` [PATCH 19/28] repair: scan sparse finobt records correctly Brian Foster
2015-06-05  1:03   ` Dave Chinner
2015-06-05 16:52     ` Brian Foster
2015-06-02 18:41 ` [PATCH 20/28] repair: validate ir_count field for sparse format records Brian Foster
2015-06-02 18:41 ` [PATCH 21/28] repair: process sparse inode records correctly Brian Foster
2015-06-05  1:12   ` Dave Chinner
2015-06-02 18:41 ` [PATCH 22/28] repair: factor out sparse inodes from finobt reconstruction Brian Foster
2015-06-02 18:41 ` [PATCH 23/28] repair: do not account sparse inodes in phase 5 cursor init Brian Foster
2015-06-02 18:41 ` [PATCH 24/28] repair: reconstruct sparse inode records correctly on disk Brian Foster
2015-06-02 18:41 ` [PATCH 25/28] repair: do not prefetch holes in sparse inode chunks Brian Foster
2015-06-02 18:41 ` [PATCH 26/28] repair: handle sparse inode alignment Brian Foster
2015-06-02 18:42 ` [PATCH 27/28] metadump: reorder inode record sanity checks and inode buffer read Brian Foster
2015-06-02 18:42 ` [PATCH 28/28] metadump: support sparse inode records Brian Foster
2015-06-16  0:33 ` [PATCH 00/28] xfsprogs: sparse inode chunks Dave Chinner
2015-06-16  0:39   ` Dave Chinner
2015-06-16 10:55     ` Brian Foster
2015-06-16 20:26       ` Dave Chinner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1433270521-62026-4-git-send-email-bfoster@redhat.com \
    --to=bfoster@redhat.com \
    --cc=xfs@oss.sgi.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox