From: Jan Kara <jack@suse.cz>
To: Ted Tso <tytso@mit.edu>
Cc: linux-ext4@vger.kernel.org, Jan Kara <jack@suse.cz>
Subject: [PATCH 01/29] ext4: Make ext4_bio_write_page() use BH_Async_Write flags instead page pointers from ext4_io_end
Date: Mon, 8 Apr 2013 23:32:06 +0200 [thread overview]
Message-ID: <1365456754-29373-2-git-send-email-jack@suse.cz> (raw)
In-Reply-To: <1365456754-29373-1-git-send-email-jack@suse.cz>
So far ext4_bio_write_page() attached all the pages to ext4_io_end structure.
This makes that structure pretty heavy (1 KB for pointers + 16 bytes per
page attached to the bio). Also later we would like to share ext4_io_end
structure among several bios in case IO to a single extent needs to be split
among several bios and pointing to pages from ext4_io_end makes this complex.
We remove page pointers from ext4_io_end and use pointers from bio itself
instead. This isn't as easy when blocksize < pagesize because then we can have
several bios in flight for a single page and we have to be careful when to call
end_page_writeback(). However this is a known problem already solved by
block_write_full_page() / end_buffer_async_write() so we mimic its behavior
here. We mark buffers going to disk with BH_Async_Write flag and in
ext4_bio_end_io() we check whether there are any buffers with BH_Async_Write
flag left. If there are not, we can call end_page_writeback().
Signed-off-by: Jan Kara <jack@suse.cz>
---
fs/ext4/ext4.h | 14 -----
fs/ext4/page-io.c | 163 +++++++++++++++++++++++++----------------------------
2 files changed, 77 insertions(+), 100 deletions(-)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 4a01ba3..3c70547 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -196,19 +196,8 @@ struct mpage_da_data {
#define EXT4_IO_END_ERROR 0x0002
#define EXT4_IO_END_DIRECT 0x0004
-struct ext4_io_page {
- struct page *p_page;
- atomic_t p_count;
-};
-
-#define MAX_IO_PAGES 128
-
/*
* For converting uninitialized extents on a work queue.
- *
- * 'page' is only used from the writepage() path; 'pages' is only used for
- * buffered writes; they are used to keep page references until conversion
- * takes place. For AIO/DIO, neither field is filled in.
*/
typedef struct ext4_io_end {
struct list_head list; /* per-file finished IO list */
@@ -218,15 +207,12 @@ typedef struct ext4_io_end {
ssize_t size; /* size of the extent */
struct kiocb *iocb; /* iocb struct for AIO */
int result; /* error value for AIO */
- int num_io_pages; /* for writepages() */
- struct ext4_io_page *pages[MAX_IO_PAGES]; /* for writepages() */
} ext4_io_end_t;
struct ext4_io_submit {
int io_op;
struct bio *io_bio;
ext4_io_end_t *io_end;
- struct ext4_io_page *io_page;
sector_t io_next_block;
};
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 809b310..73bc011 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -29,25 +29,19 @@
#include "xattr.h"
#include "acl.h"
-static struct kmem_cache *io_page_cachep, *io_end_cachep;
+static struct kmem_cache *io_end_cachep;
int __init ext4_init_pageio(void)
{
- io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT);
- if (io_page_cachep == NULL)
- return -ENOMEM;
io_end_cachep = KMEM_CACHE(ext4_io_end, SLAB_RECLAIM_ACCOUNT);
- if (io_end_cachep == NULL) {
- kmem_cache_destroy(io_page_cachep);
+ if (io_end_cachep == NULL)
return -ENOMEM;
- }
return 0;
}
void ext4_exit_pageio(void)
{
kmem_cache_destroy(io_end_cachep);
- kmem_cache_destroy(io_page_cachep);
}
void ext4_ioend_wait(struct inode *inode)
@@ -57,15 +51,6 @@ void ext4_ioend_wait(struct inode *inode)
wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0));
}
-static void put_io_page(struct ext4_io_page *io_page)
-{
- if (atomic_dec_and_test(&io_page->p_count)) {
- end_page_writeback(io_page->p_page);
- put_page(io_page->p_page);
- kmem_cache_free(io_page_cachep, io_page);
- }
-}
-
void ext4_free_io_end(ext4_io_end_t *io)
{
int i;
@@ -74,9 +59,6 @@ void ext4_free_io_end(ext4_io_end_t *io)
BUG_ON(!list_empty(&io->list));
BUG_ON(io->flag & EXT4_IO_END_UNWRITTEN);
- for (i = 0; i < io->num_io_pages; i++)
- put_io_page(io->pages[i]);
- io->num_io_pages = 0;
if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count))
wake_up_all(ext4_ioend_wq(io->inode));
kmem_cache_free(io_end_cachep, io);
@@ -233,45 +215,56 @@ static void ext4_end_bio(struct bio *bio, int error)
ext4_io_end_t *io_end = bio->bi_private;
struct inode *inode;
int i;
+ int blocksize;
sector_t bi_sector = bio->bi_sector;
BUG_ON(!io_end);
+ inode = io_end->inode;
+ blocksize = 1 << inode->i_blkbits;
bio->bi_private = NULL;
bio->bi_end_io = NULL;
if (test_bit(BIO_UPTODATE, &bio->bi_flags))
error = 0;
- bio_put(bio);
-
- for (i = 0; i < io_end->num_io_pages; i++) {
- struct page *page = io_end->pages[i]->p_page;
+ for (i = 0; i < bio->bi_vcnt; i++) {
+ struct bio_vec *bvec = &bio->bi_io_vec[i];
+ struct page *page = bvec->bv_page;
struct buffer_head *bh, *head;
- loff_t offset;
- loff_t io_end_offset;
+ unsigned bio_start = bvec->bv_offset;
+ unsigned bio_end = bio_start + bvec->bv_len;
+ unsigned under_io = 0;
+ unsigned long flags;
+
+ if (!page)
+ continue;
if (error) {
SetPageError(page);
set_bit(AS_EIO, &page->mapping->flags);
- head = page_buffers(page);
- BUG_ON(!head);
-
- io_end_offset = io_end->offset + io_end->size;
-
- offset = (sector_t) page->index << PAGE_CACHE_SHIFT;
- bh = head;
- do {
- if ((offset >= io_end->offset) &&
- (offset+bh->b_size <= io_end_offset))
- buffer_io_error(bh);
-
- offset += bh->b_size;
- bh = bh->b_this_page;
- } while (bh != head);
}
-
- put_io_page(io_end->pages[i]);
+ bh = head = page_buffers(page);
+ /*
+ * We check all buffers in the page under BH_Uptodate_Lock
+ * to avoid races with other end io clearing async_write flags
+ */
+ local_irq_save(flags);
+ bit_spin_lock(BH_Uptodate_Lock, &head->b_state);
+ do {
+ if (bh_offset(bh) < bio_start ||
+ bh_offset(bh) + blocksize > bio_end) {
+ if (buffer_async_write(bh))
+ under_io++;
+ continue;
+ }
+ clear_buffer_async_write(bh);
+ if (error)
+ buffer_io_error(bh);
+ } while ((bh = bh->b_this_page) != head);
+ bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
+ local_irq_restore(flags);
+ if (!under_io)
+ end_page_writeback(page);
}
- io_end->num_io_pages = 0;
- inode = io_end->inode;
+ bio_put(bio);
if (error) {
io_end->flag |= EXT4_IO_END_ERROR;
@@ -335,7 +328,6 @@ static int io_submit_init(struct ext4_io_submit *io,
}
static int io_submit_add_bh(struct ext4_io_submit *io,
- struct ext4_io_page *io_page,
struct inode *inode,
struct writeback_control *wbc,
struct buffer_head *bh)
@@ -343,11 +335,6 @@ static int io_submit_add_bh(struct ext4_io_submit *io,
ext4_io_end_t *io_end;
int ret;
- if (buffer_new(bh)) {
- clear_buffer_new(bh);
- unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
- }
-
if (io->io_bio && bh->b_blocknr != io->io_next_block) {
submit_and_retry:
ext4_io_submit(io);
@@ -358,9 +345,6 @@ submit_and_retry:
return ret;
}
io_end = io->io_end;
- if ((io_end->num_io_pages >= MAX_IO_PAGES) &&
- (io_end->pages[io_end->num_io_pages-1] != io_page))
- goto submit_and_retry;
if (buffer_uninit(bh))
ext4_set_io_unwritten_flag(inode, io_end);
io->io_end->size += bh->b_size;
@@ -368,11 +352,6 @@ submit_and_retry:
ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh));
if (ret != bh->b_size)
goto submit_and_retry;
- if ((io_end->num_io_pages == 0) ||
- (io_end->pages[io_end->num_io_pages-1] != io_page)) {
- io_end->pages[io_end->num_io_pages++] = io_page;
- atomic_inc(&io_page->p_count);
- }
return 0;
}
@@ -382,33 +361,29 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
struct writeback_control *wbc)
{
struct inode *inode = page->mapping->host;
- unsigned block_start, block_end, blocksize;
- struct ext4_io_page *io_page;
+ unsigned block_start, blocksize;
struct buffer_head *bh, *head;
int ret = 0;
+ int nr_submitted = 0;
blocksize = 1 << inode->i_blkbits;
BUG_ON(!PageLocked(page));
BUG_ON(PageWriteback(page));
- io_page = kmem_cache_alloc(io_page_cachep, GFP_NOFS);
- if (!io_page) {
- redirty_page_for_writepage(wbc, page);
- unlock_page(page);
- return -ENOMEM;
- }
- io_page->p_page = page;
- atomic_set(&io_page->p_count, 1);
- get_page(page);
set_page_writeback(page);
ClearPageError(page);
- for (bh = head = page_buffers(page), block_start = 0;
- bh != head || !block_start;
- block_start = block_end, bh = bh->b_this_page) {
-
- block_end = block_start + blocksize;
+ /*
+ * In the first loop we prepare and mark buffers to submit. We have to
+ * mark all buffers in the page before submitting so that
+ * end_page_writeback() cannot be called from ext4_bio_end_io() when IO
+ * on the first buffer finishes and we are still working on submitting
+ * the second buffer.
+ */
+ bh = head = page_buffers(page);
+ do {
+ block_start = bh_offset(bh);
if (block_start >= len) {
/*
* Comments copied from block_write_full_page_endio:
@@ -421,7 +396,8 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
* mapped, and writes to that region are not written
* out to the file."
*/
- zero_user_segment(page, block_start, block_end);
+ zero_user_segment(page, block_start,
+ block_start + blocksize);
clear_buffer_dirty(bh);
set_buffer_uptodate(bh);
continue;
@@ -435,7 +411,19 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
ext4_io_submit(io);
continue;
}
- ret = io_submit_add_bh(io, io_page, inode, wbc, bh);
+ if (buffer_new(bh)) {
+ clear_buffer_new(bh);
+ unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
+ }
+ set_buffer_async_write(bh);
+ } while ((bh = bh->b_this_page) != head);
+
+ /* Now submit buffers to write */
+ bh = head = page_buffers(page);
+ do {
+ if (!buffer_async_write(bh))
+ continue;
+ ret = io_submit_add_bh(io, inode, wbc, bh);
if (ret) {
/*
* We only get here on ENOMEM. Not much else
@@ -445,17 +433,20 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
redirty_page_for_writepage(wbc, page);
break;
}
+ nr_submitted++;
clear_buffer_dirty(bh);
+ } while ((bh = bh->b_this_page) != head);
+
+ /* Error stopped previous loop? Clean up buffers... */
+ if (ret) {
+ do {
+ clear_buffer_async_write(bh);
+ bh = bh->b_this_page;
+ } while (bh != head);
}
unlock_page(page);
- /*
- * If the page was truncated before we could do the writeback,
- * or we had a memory allocation error while trying to write
- * the first buffer head, we won't have submitted any pages for
- * I/O. In that case we need to make sure we've cleared the
- * PageWriteback bit from the page to prevent the system from
- * wedging later on.
- */
- put_io_page(io_page);
+ /* Nothing submitted - we have to end page writeback */
+ if (!nr_submitted)
+ end_page_writeback(page);
return ret;
}
--
1.7.1
next prev parent reply other threads:[~2013-04-08 21:32 UTC|newest]
Thread overview: 76+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-04-08 21:32 [PATCH 00/22 v1] Fixes and improvements in ext4 writeback path Jan Kara
2013-04-08 21:32 ` Jan Kara [this message]
2013-04-10 18:05 ` [PATCH 01/29] ext4: Make ext4_bio_write_page() use BH_Async_Write flags instead page pointers from ext4_io_end Dmitry Monakhov
2013-04-11 13:38 ` Zheng Liu
2013-04-12 3:50 ` Theodore Ts'o
2013-04-08 21:32 ` [PATCH 02/29] ext4: Use io_end for multiple bios Jan Kara
2013-04-11 5:10 ` Dmitry Monakhov
2013-04-11 14:04 ` Zheng Liu
2013-04-12 3:55 ` Theodore Ts'o
2013-04-08 21:32 ` [PATCH 03/29] ext4: Clear buffer_uninit flag when submitting IO Jan Kara
2013-04-11 14:08 ` Zheng Liu
2013-04-08 21:32 ` [PATCH 04/29] jbd2: Reduce journal_head size Jan Kara
2013-04-11 14:10 ` Zheng Liu
2013-04-12 4:04 ` Theodore Ts'o
2013-04-08 21:32 ` [PATCH 05/29] jbd2: Don't create journal_head for temporary journal buffers Jan Kara
2013-04-12 8:01 ` Zheng Liu
2013-04-08 21:32 ` [PATCH 06/29] jbd2: Remove journal_head from descriptor buffers Jan Kara
2013-04-12 8:10 ` Zheng Liu
2013-04-08 21:32 ` [PATCH 07/29] jbd2: Refine waiting for shadow buffers Jan Kara
2013-05-03 14:16 ` Zheng Liu
2013-05-03 20:44 ` Jan Kara
2013-04-08 21:32 ` [PATCH 08/29] jbd2: Remove outdated comment Jan Kara
2013-05-03 14:20 ` Zheng Liu
2013-04-08 21:32 ` [PATCH 09/29] jbd2: Cleanup needed free block estimates when starting a transaction Jan Kara
2013-05-05 8:17 ` Zheng Liu
2013-04-08 21:32 ` [PATCH 10/29] jbd2: Fix race in t_outstanding_credits update in jbd2_journal_extend() Jan Kara
2013-05-05 8:37 ` Zheng Liu
2013-04-08 21:32 ` [PATCH 11/29] jbd2: Remove unused waitqueues Jan Kara
2013-05-05 8:41 ` Zheng Liu
2013-04-08 21:32 ` [PATCH 12/29] jbd2: Transaction reservation support Jan Kara
2013-05-05 9:39 ` Zheng Liu
2013-05-06 12:49 ` Jan Kara
2013-05-07 5:22 ` Zheng Liu
2013-04-08 21:32 ` [PATCH 13/29] ext4: Provide wrappers for transaction reservation calls Jan Kara
2013-05-05 11:51 ` Zheng Liu
2013-05-05 11:58 ` Zheng Liu
2013-05-06 12:51 ` Jan Kara
2013-04-08 21:32 ` [PATCH 14/29] ext4: Stop messing with nr_to_write in ext4_da_writepages() Jan Kara
2013-05-05 12:40 ` Zheng Liu
2013-04-08 21:32 ` [PATCH 15/29] ext4: Deprecate max_writeback_mb_bump sysfs attribute Jan Kara
2013-05-05 12:47 ` Zheng Liu
2013-05-06 12:55 ` Jan Kara
2013-04-08 21:32 ` [PATCH 16/29] ext4: Improve writepage credit estimate for files with indirect blocks Jan Kara
2013-05-07 5:39 ` Zheng Liu
2013-04-08 21:32 ` [PATCH 17/29] ext4: Better estimate credits needed for ext4_da_writepages() Jan Kara
2013-05-07 6:33 ` Zheng Liu
2013-05-07 14:17 ` Jan Kara
2013-04-08 21:32 ` [PATCH 18/29] ext4: Restructure writeback path Jan Kara
2013-05-08 3:48 ` Zheng Liu
2013-05-08 11:20 ` Jan Kara
2013-04-08 21:32 ` [PATCH 19/29] ext4: Remove buffer_uninit handling Jan Kara
2013-05-08 6:56 ` Zheng Liu
2013-04-08 21:32 ` [PATCH 20/29] ext4: Use transaction reservation for extent conversion in ext4_end_io Jan Kara
2013-05-08 6:57 ` Zheng Liu
2013-04-08 21:32 ` [PATCH 21/29] ext4: Split extent conversion lists to reserved & unreserved parts Jan Kara
2013-05-08 7:03 ` Zheng Liu
2013-05-08 11:23 ` Jan Kara
2013-05-08 11:49 ` Zheng Liu
2013-04-08 21:32 ` [PATCH 22/29] ext4: Defer clearing of PageWriteback after extent conversion Jan Kara
2013-05-08 7:08 ` Zheng Liu
2013-04-08 21:32 ` [PATCH 23/29] ext4: Protect extent conversion after DIO with i_dio_count Jan Kara
2013-05-08 7:08 ` Zheng Liu
2013-04-08 21:32 ` [PATCH 24/29] ext4: Remove wait for unwritten extent conversion from ext4_ext_truncate() Jan Kara
2013-05-08 7:35 ` Zheng Liu
2013-04-08 21:32 ` [PATCH 25/29] ext4: Use generic_file_fsync() in ext4_file_fsync() in nojournal mode Jan Kara
2013-05-08 7:37 ` Zheng Liu
2013-05-08 11:29 ` Jan Kara
2013-04-08 21:32 ` [PATCH 26/29] ext4: Remove i_mutex from ext4_file_sync() Jan Kara
2013-05-08 7:41 ` Zheng Liu
2013-04-08 21:32 ` [PATCH 27/29] ext4: Remove wait for unwritten extents in ext4_ind_direct_IO() Jan Kara
2013-05-08 7:55 ` Zheng Liu
2013-04-08 21:32 ` [PATCH 28/29] ext4: Don't wait for extent conversion in ext4_ext_punch_hole() Jan Kara
2013-05-08 7:56 ` Zheng Liu
2013-04-08 21:32 ` [PATCH 29/29] ext4: Remove ext4_ioend_wait() Jan Kara
2013-05-08 7:57 ` Zheng Liu
2013-05-08 11:32 ` Jan Kara
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1365456754-29373-2-git-send-email-jack@suse.cz \
--to=jack@suse.cz \
--cc=linux-ext4@vger.kernel.org \
--cc=tytso@mit.edu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).