Re: Some 64-bit tests - Aneesh Kumar K.V

linux-ext4.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
To: Theodore Tso <tytso@mit.edu>
Cc: Nick Dokos <nicholas.dokos@hp.com>,
	Valerie Aurora <vaurora@redhat.com>,
	linux-ext4@vger.kernel.org
Subject: Re: Some 64-bit tests
Date: Thu, 11 Jun 2009 11:20:06 +0530	[thread overview]
Message-ID: <20090611055006.GA8073@skywalker> (raw)
In-Reply-To: <20090610181320.GA6953@mit.edu>

On Wed, Jun 10, 2009 at 02:13:20PM -0400, Theodore Tso wrote:
> On Mon, Jun 08, 2009 at 11:13:48PM -0400, Nick Dokos wrote:
> > 
> > I tried this on top of 2.6.30-rc8 and I hit a couple of BUGs, one in pdflush
> > and the other in the Lustre teest program (liverfs):
> > 
> > Jun  8 22:49:13 shifter kernel: ------------[ cut here ]------------
> > Jun  8 22:49:13 shifter kernel: kernel BUG at fs/ext4/mballoc.c:3245!
> > Jun  8 22:49:13 shifter kernel: invalid opcode: 0000 [#1] SMP 
> 
> Hmmm, that would be the BUG_ON check:
> 
>      BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
> 

I already have a RFC patch which Nick actually tested. It is giving 32MB
extents, which is expected because the max order in buddy cache is
blocksize_bits + 1. I have a Fixme in there regarding scaling the start
block which was hoping to fix soon.

Attaching the patch below.

commit f1fbc2ac43fefb6bac227fc995fe2b79c67ccfad
Author: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Date:   Tue Jun 9 01:38:53 2009 +0530

    ext4: Use different normalization method for allocation size.
    
    Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index ed8482e..9745b84 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -633,7 +633,7 @@ static void ext4_mb_mark_free_simple(struct super_block *sb,
 
 	BUG_ON(len > EXT4_BLOCKS_PER_GROUP(sb));
 
-	border = 2 << sb->s_blocksize_bits;
+	border = 1 << (sb->s_blocksize_bits + 1);
 
 	while (len > 0) {
 		/* find how many blocks can be covered since this position */
@@ -3063,8 +3063,10 @@ static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
 ext4_mb_normalize_request(struct ext4_allocation_context *ac,
 				struct ext4_allocation_request *ar)
 {
-	int bsbits, max;
+	loff_t max;
 	ext4_lblk_t end;
+	int bsbits, chunk_blks;
+	unsigned int s_mb_stream_request;
 	loff_t size, orig_size, start_off;
 	ext4_lblk_t start, orig_start;
 	struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
@@ -3090,54 +3092,61 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
 	}
 
 	bsbits = ac->ac_sb->s_blocksize_bits;
+	s_mb_stream_request = EXT4_SB(ac->ac_sb)->s_mb_stream_request;
+	/* make sure this is power of 2 */
+	s_mb_stream_request =
+		roundup_pow_of_two((unsigned long)s_mb_stream_request);
 
 	/* first, let's learn actual file size
 	 * given current request is allocated */
 	size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len;
-	size = size << bsbits;
-	if (size < i_size_read(ac->ac_inode))
-		size = i_size_read(ac->ac_inode);
-
-	/* max size of free chunks */
-	max = 2 << bsbits;
+	if (size < (i_size_read(ac->ac_inode) >> bsbits))
+		size = i_size_read(ac->ac_inode) >> bsbits;
+	/*
+	 * max free chunk blocks.
+	 * (max buddy cache order is (bsbits + 1).
+	 */
+	max = 1 << (bsbits + 1);
 
-#define NRL_CHECK_SIZE(req, size, max, chunk_size)	\
-		(req <= (size) || max <= (chunk_size))
+	/*
+	 * If buddy cache says it can have more than
+	 * blocks per group then limit to blocks per group.
+	 */
+	if (max > EXT4_BLOCKS_PER_GROUP(ac->ac_sb))
+		max = EXT4_BLOCKS_PER_GROUP(ac->ac_sb);
 
 	/* first, try to predict filesize */
 	/* XXX: should this table be tunable? */
-	start_off = 0;
-	if (size <= 16 * 1024) {
-		size = 16 * 1024;
-	} else if (size <= 32 * 1024) {
-		size = 32 * 1024;
-	} else if (size <= 64 * 1024) {
-		size = 64 * 1024;
-	} else if (size <= 128 * 1024) {
-		size = 128 * 1024;
-	} else if (size <= 256 * 1024) {
-		size = 256 * 1024;
-	} else if (size <= 512 * 1024) {
-		size = 512 * 1024;
-	} else if (size <= 1024 * 1024) {
-		size = 1024 * 1024;
-	} else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, 2 * 1024)) {
-		start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
-						(21 - bsbits)) << 21;
-		size = 2 * 1024 * 1024;
-	} else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, 4 * 1024)) {
-		start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
-							(22 - bsbits)) << 22;
-		size = 4 * 1024 * 1024;
-	} else if (NRL_CHECK_SIZE(ac->ac_o_ex.fe_len,
-					(8<<20)>>bsbits, max, 8 * 1024)) {
-		start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
-							(23 - bsbits)) << 23;
-		size = 8 * 1024 * 1024;
-	} else {
-		start_off = (loff_t)ac->ac_o_ex.fe_logical << bsbits;
-		size	  = ac->ac_o_ex.fe_len << bsbits;
+	/*
+	 * less than s_mb_stream_request is using
+	 * locality group preallocation
+	 */
+	if (size <= s_mb_stream_request) {
+		size = s_mb_stream_request << bsbits;
+		goto found_size;
+	}
+	chunk_blks = s_mb_stream_request << 1;
+	while (1) {
+		if (size <= chunk_blks) {
+			if (max <= chunk_blks)
+				size = max << bsbits;
+			else
+				size = chunk_blks << bsbits;
+			break;
+		}
+		chunk_blks = chunk_blks << 1;
 	}
+
+found_size:
+#if 0
+	/* Will i end up requesting for less that what i asked for ? */
+	start_off = (loff_t)(ac->ac_o_ex.fe_logical << bsbits) & ~(size - 1);
+	start_off = start_off * size;
+#else
+	start_off = (loff_t)(ac->ac_o_ex.fe_logical << bsbits);
+#endif
+
+	/* convert into blocks */
 	orig_size = size = size >> bsbits;
 	orig_start = start = start_off >> bsbits;
 
@@ -3216,6 +3225,10 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
 	}
 	BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
 			start > ac->ac_o_ex.fe_logical);
+
+	if (size <= 0 ||  size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb))
+		printk(KERN_ALERT "size is %ld orig size is %ld\n", (long)size, (long)orig_size);
+
 	BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
 
 	/* now prepare goal request */

next prev parent reply	other threads:[~2009-06-11  5:50 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-06-08 13:57 Some 64-bit tests Nick Dokos
2009-06-08 19:00 ` Valerie Aurora
2009-06-08 20:10 ` Aneesh Kumar K.V
2009-06-09  3:13   ` Nick Dokos
2009-06-10 18:13     ` Theodore Tso
2009-06-11  5:50       ` Aneesh Kumar K.V [this message]
2009-06-13  4:24         ` Theodore Tso
2009-06-18 21:11         ` Theodore Tso
2009-06-19 11:34           ` Aneesh Kumar K.V

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:ed8482e dfblob:9745b84 )
 OR (
bs:"Re: Some 64-bit tests" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20090611055006.GA8073@skywalker \
    --to=aneesh.kumar@linux.vnet.ibm.com \
    --cc=linux-ext4@vger.kernel.org \
    --cc=nicholas.dokos@hp.com \
    --cc=tytso@mit.edu \
    --cc=vaurora@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).