From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
To: cmm@us.ibm.com, tytso@mit.edu, sandeen@redhat.com
Cc: linux-ext4@vger.kernel.org,
"Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>,
Jan Kara <jack@suse.cz>
Subject: [PATCH] ext4: Fix delalloc sync hang with journal lock inversion
Date: Fri, 6 Jun 2008 23:54:52 +0530 [thread overview]
Message-ID: <1212776693-435-7-git-send-email-aneesh.kumar@linux.vnet.ibm.com> (raw)
In-Reply-To: <1212776693-435-6-git-send-email-aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
fs/ext4/inode.c | 107 ++++++++++++++++++++++++++++++++++++------------------
fs/mpage.c | 12 +++----
2 files changed, 76 insertions(+), 43 deletions(-)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 0f8d071..b5bc627 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1480,50 +1480,74 @@ static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
up_write(&EXT4_I(inode)->i_data_sem);
if (EXT4_I(inode)->i_disksize == disksize) {
- if (handle == NULL)
- handle = ext4_journal_start(inode, 1);
- if (!IS_ERR(handle))
- ext4_mark_inode_dirty(handle, inode);
+ ret = ext4_mark_inode_dirty(handle, inode);
+ return ret;
}
}
-
ret = 0;
}
-
return ret;
}
+
+static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh)
+{
+ return !buffer_mapped(bh) || buffer_delay(bh);
+}
+
/* FIXME!! only support data=writeback mode */
-static int __ext4_da_writepage(struct page *page,
+/*
+ * get called vi ext4_da_writepages after taking page lock
+ * We may end up doing block allocation here in case
+ * mpage_da_map_blocks failed to allocate blocks.
+ */
+static int ext4_da_writepage(struct page *page,
struct writeback_control *wbc)
{
- struct inode *inode = page->mapping->host;
- handle_t *handle = NULL;
int ret = 0;
+ loff_t size;
+ unsigned long len;
+ handle_t *handle = NULL;
+ struct buffer_head *page_bufs;
+ struct inode *inode = page->mapping->host;
handle = ext4_journal_current_handle();
+ if (!handle) {
+ /*
+ * This can happen when we aren't called via
+ * ext4_da_writepages() but directly (shrink_page_list).
+ * We cannot easily start a transaction here so we just skip
+ * writing the page in case we would have to do so.
+ */
+ size = i_size_read(inode);
- if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
- ret = nobh_writepage(page, ext4_get_block, wbc);
- else
- ret = block_write_full_page(page, ext4_get_block, wbc);
+ page_bufs = page_buffers(page);
+ if (page->index == size >> PAGE_CACHE_SHIFT)
+ len = size & ~PAGE_CACHE_MASK;
+ else
+ len = PAGE_CACHE_SIZE;
- if (!ret && inode->i_size > EXT4_I(inode)->i_disksize) {
- EXT4_I(inode)->i_disksize = inode->i_size;
- ext4_mark_inode_dirty(handle, inode);
+ if (walk_page_buffers(NULL, page_bufs, 0,
+ len, NULL, ext4_bh_unmapped_or_delay)) {
+ /*
+ * We can't do block allocation under
+ * page lock without a handle . So redirty
+ * the page and return
+ */
+ BUG_ON(wbc->sync_mode != WB_SYNC_NONE);
+ redirty_page_for_writepage(wbc, page);
+ unlock_page(page);
+ return 0;
+ }
}
+ if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
+ ret = nobh_writepage(page, ext4_da_get_block_write, wbc);
+ else
+ ret = block_write_full_page(page, ext4_da_get_block_write, wbc);
+
return ret;
}
-static int ext4_da_writepage(struct page *page,
- struct writeback_control *wbc)
-{
- if (!ext4_journal_current_handle())
- return __ext4_da_writepage(page, wbc);
- redirty_page_for_writepage(wbc, page);
- unlock_page(page);
- return 0;
-}
/*
* For now just follow the DIO way to estimate the max credits
@@ -1545,8 +1569,8 @@ static int ext4_da_writepages(struct address_space *mapping,
handle_t *handle = NULL;
int needed_blocks;
int ret = 0;
- unsigned range_cyclic;
long to_write;
+ loff_t range_start = 0;
/*
* No pages to write? This is mainly a kludge to avoid starting
@@ -1563,8 +1587,14 @@ static int ext4_da_writepages(struct address_space *mapping,
needed_blocks = EXT4_MAX_WRITEBACK_CREDITS;
to_write = wbc->nr_to_write;
- range_cyclic = wbc->range_cyclic;
- wbc->range_cyclic = 1;
+ if (!wbc->range_cyclic) {
+ /*
+ * If range_cyclic is not set force range_cont
+ * and save the old writeback_index
+ */
+ wbc->range_cont = 1;
+ range_start = wbc->range_start;
+ }
while (!ret && to_write) {
/* start a new transaction*/
@@ -1579,17 +1609,27 @@ static int ext4_da_writepages(struct address_space *mapping,
*/
if (wbc->nr_to_write > EXT4_MAX_WRITEBACK_PAGES)
wbc->nr_to_write = EXT4_MAX_WRITEBACK_PAGES;
- to_write -= wbc->nr_to_write;
+ to_write -= wbc->nr_to_write;
ret = mpage_da_writepages(mapping, wbc,
ext4_da_get_block_write);
ext4_journal_stop(handle);
- to_write += wbc->nr_to_write;
+ if (wbc->nr_to_write) {
+ /*
+ * There is no more writeout needed
+ * or we requested for a noblocking writeout
+ * and we found the device congested
+ */
+ to_write += wbc->nr_to_write;
+ break;
+ }
+ wbc->nr_to_write = to_write;
}
out_writepages:
wbc->nr_to_write = to_write;
- wbc->range_cyclic = range_cyclic;
+ if (range_start)
+ wbc->range_start = range_start;
return ret;
}
@@ -1720,11 +1760,6 @@ static int bput_one(handle_t *handle, struct buffer_head *bh)
return 0;
}
-static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh)
-{
- return !buffer_mapped(bh) || buffer_delay(bh);
-}
-
/*
* Note that we don't need to start a transaction unless we're journaling data
* because we should have holes filled from ext4_page_mkwrite(). We even don't
diff --git a/fs/mpage.c b/fs/mpage.c
index cde7f11..c4376ec 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -849,13 +849,11 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
do {
if (cur_logical >= logical + blocks)
break;
-
if (buffer_delay(bh)) {
bh->b_blocknr = pblock;
clear_buffer_delay(bh);
- } else if (buffer_mapped(bh)) {
+ } else if (buffer_mapped(bh))
BUG_ON(bh->b_blocknr != pblock);
- }
cur_logical++;
pblock++;
@@ -930,10 +928,10 @@ static void mpage_da_map_blocks(struct mpage_da_data *mpd)
if (buffer_delay(lbh))
mpage_put_bnr_to_bhs(mpd, next, &new);
- /* go for the remaining blocks */
- next += new.b_size >> mpd->inode->i_blkbits;
- remain -= new.b_size;
- }
+ /* go for the remaining blocks */
+ next += new.b_size >> mpd->inode->i_blkbits;
+ remain -= new.b_size;
+ }
}
#define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | (1 << BH_Delay))
--
1.5.5.1.357.g1af8b.dirty
next prev parent reply other threads:[~2008-06-06 18:25 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-06-06 18:24 Patches for the patchqueue Aneesh Kumar K.V
2008-06-06 18:24 ` [PATCH] ext4: cleanup blockallocator Aneesh Kumar K.V
2008-06-06 18:24 ` [PATCH] ext2: Use page_mkwrite vma_operations to get mmap write notification Aneesh Kumar K.V
2008-06-06 18:24 ` [PATCH] ext3: " Aneesh Kumar K.V
2008-06-06 18:24 ` [PATCH] vfs: Don't flush delay buffer to disk Aneesh Kumar K.V
2008-06-06 18:24 ` [PATCH] mm: Add range_cont mode for writeback Aneesh Kumar K.V
2008-06-06 18:24 ` Aneesh Kumar K.V [this message]
2008-06-06 18:24 ` [PATCH] ext4: delalloc block reservation fix Aneesh Kumar K.V
2008-06-07 1:12 ` [PATCH] ext4: cleanup blockallocator Mingming Cao
-- strict thread matches above, loose matches on Subject: below --
2008-05-30 13:39 [PATCH -v2] delalloc and journal locking order inversion fixes Aneesh Kumar K.V
2008-05-30 13:39 ` [PATCH] ext4: Use page_mkwrite vma_operations to get mmap write notification Aneesh Kumar K.V
2008-05-30 13:39 ` [PATCH] ext4: Inverse locking order of page_lock and transaction start Aneesh Kumar K.V
2008-05-30 13:39 ` [PATCH] vfs: Move mark_inode_dirty() from under page lock in generic_write_end() Aneesh Kumar K.V
2008-05-30 13:39 ` [PATCH] ext4: Add validation to jbd lock inversion patch and split and writepage Aneesh Kumar K.V
2008-05-30 13:39 ` [PATCH] ext4: inverse locking ordering of page_lock and transaction start in delalloc Aneesh Kumar K.V
2008-05-30 13:39 ` [PATCH] ext4: Fix delalloc sync hang with journal lock inversion Aneesh Kumar K.V
2008-06-02 9:35 ` Jan Kara
2008-06-02 9:59 ` Aneesh Kumar K.V
2008-06-02 10:27 ` Jan Kara
2008-06-05 13:54 ` Aneesh Kumar K.V
2008-06-05 16:22 ` Jan Kara
2008-06-05 19:19 ` Aneesh Kumar K.V
2008-06-11 12:41 ` Jan Kara
2008-06-11 13:56 ` Aneesh Kumar K.V
2008-06-11 17:48 ` Jan Kara
2008-06-12 23:10 ` Mingming Cao
2008-05-21 17:44 delalloc and journal locking order inversion fixes Aneesh Kumar K.V
2008-05-21 17:44 ` [PATCH] ext4: Add validation to jbd lock inversion patch and split and writepage Aneesh Kumar K.V
2008-05-21 17:44 ` [PATCH] ext4: inverse locking ordering of page_lock and transaction start in delalloc Aneesh Kumar K.V
2008-05-21 17:44 ` [PATCH] ext4: Fix delalloc sync hang with journal lock inversion Aneesh Kumar K.V
2008-05-22 10:25 ` Aneesh Kumar K.V
2008-05-22 17:58 ` Mingming
2008-05-22 18:23 ` Aneesh Kumar K.V
2008-05-22 19:45 ` Mingming
2008-05-22 18:10 ` Mingming
2008-05-22 18:26 ` Aneesh Kumar K.V
2008-05-22 19:26 ` Mingming
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1212776693-435-7-git-send-email-aneesh.kumar@linux.vnet.ibm.com \
--to=aneesh.kumar@linux.vnet.ibm.com \
--cc=cmm@us.ibm.com \
--cc=jack@suse.cz \
--cc=linux-ext4@vger.kernel.org \
--cc=sandeen@redhat.com \
--cc=tytso@mit.edu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.