From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
To: Theodore Tso <tytso@mit.edu>
Cc: Nick Dokos <nicholas.dokos@hp.com>,
Valerie Aurora <vaurora@redhat.com>,
linux-ext4@vger.kernel.org
Subject: Re: Some 64-bit tests
Date: Thu, 11 Jun 2009 11:20:06 +0530 [thread overview]
Message-ID: <20090611055006.GA8073@skywalker> (raw)
In-Reply-To: <20090610181320.GA6953@mit.edu>
On Wed, Jun 10, 2009 at 02:13:20PM -0400, Theodore Tso wrote:
> On Mon, Jun 08, 2009 at 11:13:48PM -0400, Nick Dokos wrote:
> >
> > I tried this on top of 2.6.30-rc8 and I hit a couple of BUGs, one in pdflush
> > and the other in the Lustre teest program (liverfs):
> >
> > Jun 8 22:49:13 shifter kernel: ------------[ cut here ]------------
> > Jun 8 22:49:13 shifter kernel: kernel BUG at fs/ext4/mballoc.c:3245!
> > Jun 8 22:49:13 shifter kernel: invalid opcode: 0000 [#1] SMP
>
> Hmmm, that would be the BUG_ON check:
>
> BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
>
I already have a RFC patch which Nick actually tested. It is giving 32MB
extents, which is expected because the max order in buddy cache is
blocksize_bits + 1. I have a Fixme in there regarding scaling the start
block which was hoping to fix soon.
Attaching the patch below.
commit f1fbc2ac43fefb6bac227fc995fe2b79c67ccfad
Author: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue Jun 9 01:38:53 2009 +0530
ext4: Use different normalization method for allocation size.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index ed8482e..9745b84 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -633,7 +633,7 @@ static void ext4_mb_mark_free_simple(struct super_block *sb,
BUG_ON(len > EXT4_BLOCKS_PER_GROUP(sb));
- border = 2 << sb->s_blocksize_bits;
+ border = 1 << (sb->s_blocksize_bits + 1);
while (len > 0) {
/* find how many blocks can be covered since this position */
@@ -3063,8 +3063,10 @@ static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
ext4_mb_normalize_request(struct ext4_allocation_context *ac,
struct ext4_allocation_request *ar)
{
- int bsbits, max;
+ loff_t max;
ext4_lblk_t end;
+ int bsbits, chunk_blks;
+ unsigned int s_mb_stream_request;
loff_t size, orig_size, start_off;
ext4_lblk_t start, orig_start;
struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
@@ -3090,54 +3092,61 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
}
bsbits = ac->ac_sb->s_blocksize_bits;
+ s_mb_stream_request = EXT4_SB(ac->ac_sb)->s_mb_stream_request;
+ /* make sure this is power of 2 */
+ s_mb_stream_request =
+ roundup_pow_of_two((unsigned long)s_mb_stream_request);
/* first, let's learn actual file size
* given current request is allocated */
size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len;
- size = size << bsbits;
- if (size < i_size_read(ac->ac_inode))
- size = i_size_read(ac->ac_inode);
-
- /* max size of free chunks */
- max = 2 << bsbits;
+ if (size < (i_size_read(ac->ac_inode) >> bsbits))
+ size = i_size_read(ac->ac_inode) >> bsbits;
+ /*
+ * max free chunk blocks.
+ * (max buddy cache order is (bsbits + 1).
+ */
+ max = 1 << (bsbits + 1);
-#define NRL_CHECK_SIZE(req, size, max, chunk_size) \
- (req <= (size) || max <= (chunk_size))
+ /*
+ * If buddy cache says it can have more than
+ * blocks per group then limit to blocks per group.
+ */
+ if (max > EXT4_BLOCKS_PER_GROUP(ac->ac_sb))
+ max = EXT4_BLOCKS_PER_GROUP(ac->ac_sb);
/* first, try to predict filesize */
/* XXX: should this table be tunable? */
- start_off = 0;
- if (size <= 16 * 1024) {
- size = 16 * 1024;
- } else if (size <= 32 * 1024) {
- size = 32 * 1024;
- } else if (size <= 64 * 1024) {
- size = 64 * 1024;
- } else if (size <= 128 * 1024) {
- size = 128 * 1024;
- } else if (size <= 256 * 1024) {
- size = 256 * 1024;
- } else if (size <= 512 * 1024) {
- size = 512 * 1024;
- } else if (size <= 1024 * 1024) {
- size = 1024 * 1024;
- } else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, 2 * 1024)) {
- start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
- (21 - bsbits)) << 21;
- size = 2 * 1024 * 1024;
- } else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, 4 * 1024)) {
- start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
- (22 - bsbits)) << 22;
- size = 4 * 1024 * 1024;
- } else if (NRL_CHECK_SIZE(ac->ac_o_ex.fe_len,
- (8<<20)>>bsbits, max, 8 * 1024)) {
- start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
- (23 - bsbits)) << 23;
- size = 8 * 1024 * 1024;
- } else {
- start_off = (loff_t)ac->ac_o_ex.fe_logical << bsbits;
- size = ac->ac_o_ex.fe_len << bsbits;
+ /*
+ * less than s_mb_stream_request is using
+ * locality group preallocation
+ */
+ if (size <= s_mb_stream_request) {
+ size = s_mb_stream_request << bsbits;
+ goto found_size;
+ }
+ chunk_blks = s_mb_stream_request << 1;
+ while (1) {
+ if (size <= chunk_blks) {
+ if (max <= chunk_blks)
+ size = max << bsbits;
+ else
+ size = chunk_blks << bsbits;
+ break;
+ }
+ chunk_blks = chunk_blks << 1;
}
+
+found_size:
+#if 0
+ /* Will i end up requesting for less that what i asked for ? */
+ start_off = (loff_t)(ac->ac_o_ex.fe_logical << bsbits) & ~(size - 1);
+ start_off = start_off * size;
+#else
+ start_off = (loff_t)(ac->ac_o_ex.fe_logical << bsbits);
+#endif
+
+ /* convert into blocks */
orig_size = size = size >> bsbits;
orig_start = start = start_off >> bsbits;
@@ -3216,6 +3225,10 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
}
BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
start > ac->ac_o_ex.fe_logical);
+
+ if (size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb))
+ printk(KERN_ALERT "size is %ld orig size is %ld\n", (long)size, (long)orig_size);
+
BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
/* now prepare goal request */
next prev parent reply other threads:[~2009-06-11 5:50 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-06-08 13:57 Some 64-bit tests Nick Dokos
2009-06-08 19:00 ` Valerie Aurora
2009-06-08 20:10 ` Aneesh Kumar K.V
2009-06-09 3:13 ` Nick Dokos
2009-06-10 18:13 ` Theodore Tso
2009-06-11 5:50 ` Aneesh Kumar K.V [this message]
2009-06-13 4:24 ` Theodore Tso
2009-06-18 21:11 ` Theodore Tso
2009-06-19 11:34 ` Aneesh Kumar K.V
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20090611055006.GA8073@skywalker \
--to=aneesh.kumar@linux.vnet.ibm.com \
--cc=linux-ext4@vger.kernel.org \
--cc=nicholas.dokos@hp.com \
--cc=tytso@mit.edu \
--cc=vaurora@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).