linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Boaz Harrosh <bharrosh@panasas.com>
To: linux-fsdevel <linux-fsdevel@vger.kernel.org>,
	Jens Axboe <jaxboe@fusionio.com>, open-osd <osd-dev@open-osd.org>
Cc: Chris Mason <chris.mason@oracle.com>,
	Marc Dionne <marc.c.dionne@gmail.com>,
	Stable Tree <stable@kernel.org>
Subject: [PATCH] exofs: Fix read ahead BUG, caused by moving inodes to sb->s_bdi
Date: Thu, 03 Feb 2011 13:22:06 +0200	[thread overview]
Message-ID: <4D4A8FDE.2030408@panasas.com> (raw)
In-Reply-To: <4D498ADC.4060306@panasas.com>


commit: [115e19c] exofs: Set i_mapping->backing_dev_info anyway

Has caused a regression in read-ahead because bdi_init() does
not set bdi->ra_pages to anything and it was left as zero.
So when moving all inodes to point to the super-block's
bdi, they all had ra_pages disabled.

Should bdi_init() be patched to some sane default value?

I fix that by calculating a read_ahead that is:
- preferable 2 stripes long (I'll add a mount option to next Kernel)
- Minimum 128K aligned up to stripe-size
- Caped to maximum-IO-sizes round down to stripe_size.
  (Max sizes are governed by max bio size that fits a page times
   # of devices)

This is a regression since the 2.6.37 Kernel.
2.6.36 was fine

CC: Stable Tree <stable@kernel.org>
Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
---
 fs/exofs/exofs.h |    2 ++
 fs/exofs/inode.c |   17 +++++++++++++----
 fs/exofs/super.c |   18 ++++++++++++++++++
 3 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
index 2dc925f..99fcb91 100644
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -256,6 +256,8 @@ static inline int exofs_oi_read(struct exofs_i_info *oi,
 }
 
 /* inode.c               */
+unsigned exofs_max_io_pages(struct exofs_layout *layout,
+			    unsigned expected_pages);
 int exofs_setattr(struct dentry *, struct iattr *);
 int exofs_write_begin(struct file *file, struct address_space *mapping,
 		loff_t pos, unsigned len, unsigned flags,
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 4268542..deede10 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -43,6 +43,17 @@ enum { BIO_MAX_PAGES_KMALLOC =
 		PAGE_SIZE / sizeof(struct page *),
 };
 
+unsigned exofs_max_io_pages(struct exofs_layout *layout,
+			    unsigned expected_pages)
+{
+	unsigned pages = min_t(unsigned, expected_pages, MAX_PAGES_KMALLOC);
+
+	/* TODO: easily support bio chaining */
+	pages =  min_t(unsigned, pages,
+		       layout->group_width * BIO_MAX_PAGES_KMALLOC);
+	return pages;
+}
+
 struct page_collect {
 	struct exofs_sb_info *sbi;
 	struct inode *inode;
@@ -97,8 +108,7 @@ static void _pcol_reset(struct page_collect *pcol)
 
 static int pcol_try_alloc(struct page_collect *pcol)
 {
-	unsigned pages = min_t(unsigned, pcol->expected_pages,
-			  MAX_PAGES_KMALLOC);
+	unsigned pages;
 
 	if (!pcol->ios) { /* First time allocate io_state */
 		int ret = exofs_get_io_state(&pcol->sbi->layout, &pcol->ios);
@@ -108,8 +118,7 @@ static int pcol_try_alloc(struct page_collect *pcol)
 	}
 
 	/* TODO: easily support bio chaining */
-	pages =  min_t(unsigned, pages,
-		       pcol->sbi->layout.group_width * BIO_MAX_PAGES_KMALLOC);
+	pages =  exofs_max_io_pages(&pcol->sbi->layout, pcol->expected_pages);
 
 	for (; pages; pages >>= 1) {
 		pcol->pages = kmalloc(pages * sizeof(struct page *),
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 79c3ae6..75d42ac 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -383,6 +383,23 @@ static int _read_and_match_data_map(struct exofs_sb_info *sbi, unsigned numdevs,
 	return 0;
 }
 
+static unsigned __ra_pages(struct exofs_layout *layout)
+{
+	const unsigned _MIN_RA = 32; /* min 128K read-ahead */
+	unsigned ra_pages = layout->group_width * layout->stripe_unit /
+				PAGE_SIZE;
+	unsigned max_io_pages = exofs_max_io_pages(layout, ~0);
+
+	ra_pages *= 2; /* two stripes */
+	if (ra_pages < _MIN_RA)
+		ra_pages = roundup(_MIN_RA, ra_pages / 2);
+
+	if (ra_pages > max_io_pages)
+		ra_pages = max_io_pages;
+
+	return ra_pages;
+}
+
 /* @odi is valid only as long as @fscb_dev is valid */
 static int exofs_devs_2_odi(struct exofs_dt_device_info *dt_dev,
 			     struct osd_dev_info *odi)
@@ -616,6 +633,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
 	}
 
 	/* set up operation vectors */
+	sbi->bdi.ra_pages = __ra_pages(&sbi->layout);
 	sb->s_bdi = &sbi->bdi;
 	sb->s_fs_info = sbi;
 	sb->s_op = &exofs_sops;
-- 
1.7.2.3



  parent reply	other threads:[~2011-02-03 11:22 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-02-02 16:48 Please Help: 2.6.37 a_ops->readpages is never called, 2.6.36 was fine Boaz Harrosh
2011-02-02 18:46 ` Problem found, but why??? Boaz Harrosh
2011-02-02 21:51   ` Chris Mason
2011-02-03 11:22 ` Boaz Harrosh [this message]
2011-02-03 12:12   ` [PATCH] exofs: Fix read ahead BUG, caused by moving inodes to sb->s_bdi Boaz Harrosh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4D4A8FDE.2030408@panasas.com \
    --to=bharrosh@panasas.com \
    --cc=chris.mason@oracle.com \
    --cc=jaxboe@fusionio.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=marc.c.dionne@gmail.com \
    --cc=osd-dev@open-osd.org \
    --cc=stable@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).