From: "Jayson R. King" <dev@jaysonking.com>
To: LKML <linux-kernel@vger.kernel.org>,
Stable team <stable@kernel.org>,
Greg Kroah-Hartman <gregkh@suse.de>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>,
Mingming Cao <cmm@us.ibm.com>, Theodore Ts'o <tytso@mit.edu>,
linux-ext4@vger.kernel.org,
Andrew Morton <akpm@linux-foundation.org>,
Jayson King <dev@jaysonking.com>
Subject: [10/11] ext4: Fix file fragmentation during large file write.
Date: Sat, 27 Feb 2010 00:33:22 -0600 [thread overview]
Message-ID: <4B88BCB2.7020702@jaysonking.com> (raw)
In-Reply-To: <4B88BA1B.4050500@jaysonking.com>
From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Date: Thu Oct 16 10:10:36 2008 -0400
Subject: ext4: Fix file fragmentation during large file write.
commit 22208dedbd7626e5fc4339c417f8d24cc21f79d7 upstream.
The range_cyclic writeback mode uses the address_space writeback_index
as the start index for writeback. With delayed allocation we were
updating writeback_index wrongly resulting in highly fragmented file.
This patch reduces the number of extents reduced from 4000 to 27 for a
3GB file.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
[dev@jaysonking.com: Some changed lines from the original version of this patch were dropped, since they were rolled up with another cherry-picked patch applied to 2.6.27.y earlier.]
Signed-off-by: Jayson R. King <dev@jaysonking.com>
---
fs/ext4/inode.c | 88 ++++++++++++++++++++++++++++------------------
1 file changed, 55 insertions(+), 33 deletions(-)
diff -urNp linux-2.6.27.orig/fs/ext4/inode.c linux-2.6.27/fs/ext4/inode.c
--- linux-2.6.27.orig/fs/ext4/inode.c 2010-02-26 14:25:37.157252759 -0600
+++ linux-2.6.27/fs/ext4/inode.c 2010-02-26 14:27:31.342252417 -0600
@@ -1721,7 +1721,11 @@ static int mpage_da_submit_io(struct mpa
pages_skipped = mpd->wbc->pages_skipped;
err = mapping->a_ops->writepage(page, mpd->wbc);
- if (!err)
+ if (!err && (pages_skipped == mpd->wbc->pages_skipped))
+ /*
+ * have successfully written the page
+ * without skipping the same
+ */
mpd->pages_written++;
/*
* In error case, we have to continue because
@@ -2175,7 +2179,6 @@ static int mpage_da_writepages(struct ad
struct writeback_control *wbc,
struct mpage_da_data *mpd)
{
- long to_write;
int ret;
if (!mpd->get_block)
@@ -2190,19 +2193,18 @@ static int mpage_da_writepages(struct ad
mpd->pages_written = 0;
mpd->retval = 0;
- to_write = wbc->nr_to_write;
-
ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, mpd);
-
/*
* Handle last extent of pages
*/
if (!mpd->io_done && mpd->next_page != mpd->first_page) {
if (mpage_da_map_blocks(mpd) == 0)
mpage_da_submit_io(mpd);
- }
- wbc->nr_to_write = to_write - mpd->pages_written;
+ mpd->io_done = 1;
+ ret = MPAGE_DA_EXTENT_TAIL;
+ }
+ wbc->nr_to_write -= mpd->pages_written;
return ret;
}
@@ -2447,11 +2449,14 @@ static int ext4_da_writepages_trans_bloc
static int ext4_da_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
+ pgoff_t index;
+ int range_whole = 0;
handle_t *handle = NULL;
struct mpage_da_data mpd;
struct inode *inode = mapping->host;
+ int no_nrwrite_index_update;
+ long pages_written = 0, pages_skipped;
int needed_blocks, ret = 0, nr_to_writebump = 0;
- long to_write, pages_skipped = 0;
struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
/*
@@ -2485,16 +2490,26 @@ static int ext4_da_writepages(struct add
nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write;
wbc->nr_to_write = sbi->s_mb_stream_request;
}
+ if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
+ range_whole = 1;
-
- pages_skipped = wbc->pages_skipped;
+ if (wbc->range_cyclic)
+ index = mapping->writeback_index;
+ else
+ index = wbc->range_start >> PAGE_CACHE_SHIFT;
mpd.wbc = wbc;
mpd.inode = mapping->host;
-restart_loop:
- to_write = wbc->nr_to_write;
- while (!ret && to_write > 0) {
+ /*
+ * we don't want write_cache_pages to update
+ * nr_to_write and writeback_index
+ */
+ no_nrwrite_index_update = wbc->no_nrwrite_index_update;
+ wbc->no_nrwrite_index_update = 1;
+ pages_skipped = wbc->pages_skipped;
+
+ while (!ret && wbc->nr_to_write > 0) {
/*
* we insert one extent at a time. So we need
@@ -2527,46 +2542,53 @@ restart_loop:
goto out_writepages;
}
}
- to_write -= wbc->nr_to_write;
-
mpd.get_block = ext4_da_get_block_write;
ret = mpage_da_writepages(mapping, wbc, &mpd);
ext4_journal_stop(handle);
- if (mpd.retval == -ENOSPC)
+ if (mpd.retval == -ENOSPC) {
+ /* commit the transaction which would
+ * free blocks released in the transaction
+ * and try again
+ */
jbd2_journal_force_commit_nested(sbi->s_journal);
-
- /* reset the retry count */
- if (ret == MPAGE_DA_EXTENT_TAIL) {
+ wbc->pages_skipped = pages_skipped;
+ ret = 0;
+ } else if (ret == MPAGE_DA_EXTENT_TAIL) {
/*
* got one extent now try with
* rest of the pages
*/
- to_write += wbc->nr_to_write;
+ pages_written += mpd.pages_written;
+ wbc->pages_skipped = pages_skipped;
ret = 0;
- } else if (wbc->nr_to_write) {
+ } else if (wbc->nr_to_write)
/*
* There is no more writeout needed
* or we requested for a noblocking writeout
* and we found the device congested
*/
- to_write += wbc->nr_to_write;
break;
- }
- wbc->nr_to_write = to_write;
- }
-
- if (!wbc->range_cyclic && (pages_skipped != wbc->pages_skipped)) {
- /* We skipped pages in this loop */
- wbc->nr_to_write = to_write +
- wbc->pages_skipped - pages_skipped;
- wbc->pages_skipped = pages_skipped;
- goto restart_loop;
}
+ if (pages_skipped != wbc->pages_skipped)
+ printk(KERN_EMERG "This should not happen leaving %s "
+ "with nr_to_write = %ld ret = %d\n",
+ __func__, wbc->nr_to_write, ret);
+
+ /* Update index */
+ index += pages_written;
+ if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
+ /*
+ * set the writeback_index so that range_cyclic
+ * mode will write it back later
+ */
+ mapping->writeback_index = index;
out_writepages:
- wbc->nr_to_write = to_write - nr_to_writebump;
+ if (!no_nrwrite_index_update)
+ wbc->no_nrwrite_index_update = 0;
+ wbc->nr_to_write -= nr_to_writebump;
return ret;
}
next prev parent reply other threads:[~2010-02-27 7:23 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-02-27 6:22 [00/11] fix ext4 deadlock on 2.6.27.y Jayson R. King
2010-02-27 6:32 ` [01/11] ext4: invalidate pages if delalloc block allocation fails Jayson R. King
2010-02-27 6:32 ` [02/11] percpu counter: clean up percpu_counter_sum_and_set() Jayson R. King
2010-02-27 6:32 ` [03/11] ext4: Make sure all the block allocation paths reserve blocks Jayson R. King
2010-02-27 6:32 ` Jayson R. King
2010-02-27 6:32 ` [04/11] ext4: Add percpu dirty block accounting Jayson R. King
2010-02-27 6:32 ` [05/11] ext4: Retry block reservation Jayson R. King
2010-02-27 6:32 ` [06/11] ext4: Retry block allocation if we have free blocks left Jayson R. King
2010-02-27 6:32 ` Jayson R. King
2010-02-27 6:33 ` [07/11] ext4: Use tag dirty lookup during mpage_da_submit_io Jayson R. King
2010-02-27 6:33 ` [08/11] vfs: Remove the range_cont writeback mode Jayson R. King
2010-02-27 6:33 ` [09/11] vfs: Add no_nrwrite_index_update writeback control flag Jayson R. King
2010-02-27 6:33 ` Jayson R. King [this message]
2010-02-27 6:33 ` [11/11] ext4: Implement range_cyclic in ext4_da_writepages instead of write_cache_pages Jayson R. King
2010-02-28 3:16 ` [00/11] fix ext4 deadlock on 2.6.27.y Greg KH
2010-03-15 20:09 ` [stable] " Greg KH
2010-03-15 23:17 ` tytso
2010-03-15 20:09 ` Greg KH
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4B88BCB2.7020702@jaysonking.com \
--to=dev@jaysonking.com \
--cc=akpm@linux-foundation.org \
--cc=aneesh.kumar@linux.vnet.ibm.com \
--cc=cmm@us.ibm.com \
--cc=gregkh@suse.de \
--cc=linux-ext4@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=stable@kernel.org \
--cc=tytso@mit.edu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.