From: Dmitry Monakhov <dmonakhov@openvz.org>
To: Jan Kara <jack@suse.cz>, Ted Tso <tytso@mit.edu>
Cc: linux-ext4@vger.kernel.org, Jan Kara <jack@suse.cz>
Subject: Re: [PATCH 01/29] ext4: Make ext4_bio_write_page() use BH_Async_Write flags instead page pointers from ext4_io_end
Date: Wed, 10 Apr 2013 22:05:08 +0400 [thread overview]
Message-ID: <87wqsagt2j.fsf@openvz.org> (raw)
In-Reply-To: <1365456754-29373-2-git-send-email-jack@suse.cz>
On Mon, 8 Apr 2013 23:32:06 +0200, Jan Kara <jack@suse.cz> wrote:
> So far ext4_bio_write_page() attached all the pages to ext4_io_end structure.
> This makes that structure pretty heavy (1 KB for pointers + 16 bytes per
> page attached to the bio). Also later we would like to share ext4_io_end
> structure among several bios in case IO to a single extent needs to be split
> among several bios and pointing to pages from ext4_io_end makes this complex.
>
> We remove page pointers from ext4_io_end and use pointers from bio itself
> instead. This isn't as easy when blocksize < pagesize because then we can have
> several bios in flight for a single page and we have to be careful when to call
> end_page_writeback(). However this is a known problem already solved by
> block_write_full_page() / end_buffer_async_write() so we mimic its behavior
> here. We mark buffers going to disk with BH_Async_Write flag and in
> ext4_bio_end_io() we check whether there are any buffers with BH_Async_Write
> flag left. If there are not, we can call end_page_writeback().
Definitely looks better than my semi-fix.
Reviewed-by: Dmitry Monakhov <dmonakhov@openvz.org>
>
> Signed-off-by: Jan Kara <jack@suse.cz>
> ---
> fs/ext4/ext4.h | 14 -----
> fs/ext4/page-io.c | 163 +++++++++++++++++++++++++----------------------------
> 2 files changed, 77 insertions(+), 100 deletions(-)
>
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 4a01ba3..3c70547 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -196,19 +196,8 @@ struct mpage_da_data {
> #define EXT4_IO_END_ERROR 0x0002
> #define EXT4_IO_END_DIRECT 0x0004
>
> -struct ext4_io_page {
> - struct page *p_page;
> - atomic_t p_count;
> -};
> -
> -#define MAX_IO_PAGES 128
> -
> /*
> * For converting uninitialized extents on a work queue.
> - *
> - * 'page' is only used from the writepage() path; 'pages' is only used for
> - * buffered writes; they are used to keep page references until conversion
> - * takes place. For AIO/DIO, neither field is filled in.
> */
> typedef struct ext4_io_end {
> struct list_head list; /* per-file finished IO list */
> @@ -218,15 +207,12 @@ typedef struct ext4_io_end {
> ssize_t size; /* size of the extent */
> struct kiocb *iocb; /* iocb struct for AIO */
> int result; /* error value for AIO */
> - int num_io_pages; /* for writepages() */
> - struct ext4_io_page *pages[MAX_IO_PAGES]; /* for writepages() */
> } ext4_io_end_t;
>
> struct ext4_io_submit {
> int io_op;
> struct bio *io_bio;
> ext4_io_end_t *io_end;
> - struct ext4_io_page *io_page;
> sector_t io_next_block;
> };
>
> diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
> index 809b310..73bc011 100644
> --- a/fs/ext4/page-io.c
> +++ b/fs/ext4/page-io.c
> @@ -29,25 +29,19 @@
> #include "xattr.h"
> #include "acl.h"
>
> -static struct kmem_cache *io_page_cachep, *io_end_cachep;
> +static struct kmem_cache *io_end_cachep;
>
> int __init ext4_init_pageio(void)
> {
> - io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT);
> - if (io_page_cachep == NULL)
> - return -ENOMEM;
> io_end_cachep = KMEM_CACHE(ext4_io_end, SLAB_RECLAIM_ACCOUNT);
> - if (io_end_cachep == NULL) {
> - kmem_cache_destroy(io_page_cachep);
> + if (io_end_cachep == NULL)
> return -ENOMEM;
> - }
> return 0;
> }
>
> void ext4_exit_pageio(void)
> {
> kmem_cache_destroy(io_end_cachep);
> - kmem_cache_destroy(io_page_cachep);
> }
>
> void ext4_ioend_wait(struct inode *inode)
> @@ -57,15 +51,6 @@ void ext4_ioend_wait(struct inode *inode)
> wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0));
> }
>
> -static void put_io_page(struct ext4_io_page *io_page)
> -{
> - if (atomic_dec_and_test(&io_page->p_count)) {
> - end_page_writeback(io_page->p_page);
> - put_page(io_page->p_page);
> - kmem_cache_free(io_page_cachep, io_page);
> - }
> -}
> -
> void ext4_free_io_end(ext4_io_end_t *io)
> {
> int i;
> @@ -74,9 +59,6 @@ void ext4_free_io_end(ext4_io_end_t *io)
> BUG_ON(!list_empty(&io->list));
> BUG_ON(io->flag & EXT4_IO_END_UNWRITTEN);
>
> - for (i = 0; i < io->num_io_pages; i++)
> - put_io_page(io->pages[i]);
> - io->num_io_pages = 0;
> if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count))
> wake_up_all(ext4_ioend_wq(io->inode));
> kmem_cache_free(io_end_cachep, io);
> @@ -233,45 +215,56 @@ static void ext4_end_bio(struct bio *bio, int error)
> ext4_io_end_t *io_end = bio->bi_private;
> struct inode *inode;
> int i;
> + int blocksize;
> sector_t bi_sector = bio->bi_sector;
>
> BUG_ON(!io_end);
> + inode = io_end->inode;
> + blocksize = 1 << inode->i_blkbits;
> bio->bi_private = NULL;
> bio->bi_end_io = NULL;
> if (test_bit(BIO_UPTODATE, &bio->bi_flags))
> error = 0;
> - bio_put(bio);
> -
> - for (i = 0; i < io_end->num_io_pages; i++) {
> - struct page *page = io_end->pages[i]->p_page;
> + for (i = 0; i < bio->bi_vcnt; i++) {
> + struct bio_vec *bvec = &bio->bi_io_vec[i];
> + struct page *page = bvec->bv_page;
> struct buffer_head *bh, *head;
> - loff_t offset;
> - loff_t io_end_offset;
> + unsigned bio_start = bvec->bv_offset;
> + unsigned bio_end = bio_start + bvec->bv_len;
> + unsigned under_io = 0;
> + unsigned long flags;
> +
> + if (!page)
> + continue;
>
> if (error) {
> SetPageError(page);
> set_bit(AS_EIO, &page->mapping->flags);
> - head = page_buffers(page);
> - BUG_ON(!head);
> -
> - io_end_offset = io_end->offset + io_end->size;
> -
> - offset = (sector_t) page->index << PAGE_CACHE_SHIFT;
> - bh = head;
> - do {
> - if ((offset >= io_end->offset) &&
> - (offset+bh->b_size <= io_end_offset))
> - buffer_io_error(bh);
> -
> - offset += bh->b_size;
> - bh = bh->b_this_page;
> - } while (bh != head);
> }
> -
> - put_io_page(io_end->pages[i]);
> + bh = head = page_buffers(page);
> + /*
> + * We check all buffers in the page under BH_Uptodate_Lock
> + * to avoid races with other end io clearing async_write flags
> + */
> + local_irq_save(flags);
> + bit_spin_lock(BH_Uptodate_Lock, &head->b_state);
> + do {
> + if (bh_offset(bh) < bio_start ||
> + bh_offset(bh) + blocksize > bio_end) {
> + if (buffer_async_write(bh))
> + under_io++;
> + continue;
> + }
> + clear_buffer_async_write(bh);
> + if (error)
> + buffer_io_error(bh);
> + } while ((bh = bh->b_this_page) != head);
> + bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
> + local_irq_restore(flags);
> + if (!under_io)
> + end_page_writeback(page);
> }
> - io_end->num_io_pages = 0;
> - inode = io_end->inode;
> + bio_put(bio);
>
> if (error) {
> io_end->flag |= EXT4_IO_END_ERROR;
> @@ -335,7 +328,6 @@ static int io_submit_init(struct ext4_io_submit *io,
> }
>
> static int io_submit_add_bh(struct ext4_io_submit *io,
> - struct ext4_io_page *io_page,
> struct inode *inode,
> struct writeback_control *wbc,
> struct buffer_head *bh)
> @@ -343,11 +335,6 @@ static int io_submit_add_bh(struct ext4_io_submit *io,
> ext4_io_end_t *io_end;
> int ret;
>
> - if (buffer_new(bh)) {
> - clear_buffer_new(bh);
> - unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
> - }
> -
> if (io->io_bio && bh->b_blocknr != io->io_next_block) {
> submit_and_retry:
> ext4_io_submit(io);
> @@ -358,9 +345,6 @@ submit_and_retry:
> return ret;
> }
> io_end = io->io_end;
> - if ((io_end->num_io_pages >= MAX_IO_PAGES) &&
> - (io_end->pages[io_end->num_io_pages-1] != io_page))
> - goto submit_and_retry;
> if (buffer_uninit(bh))
> ext4_set_io_unwritten_flag(inode, io_end);
> io->io_end->size += bh->b_size;
> @@ -368,11 +352,6 @@ submit_and_retry:
> ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh));
> if (ret != bh->b_size)
> goto submit_and_retry;
> - if ((io_end->num_io_pages == 0) ||
> - (io_end->pages[io_end->num_io_pages-1] != io_page)) {
> - io_end->pages[io_end->num_io_pages++] = io_page;
> - atomic_inc(&io_page->p_count);
> - }
> return 0;
> }
>
> @@ -382,33 +361,29 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
> struct writeback_control *wbc)
> {
> struct inode *inode = page->mapping->host;
> - unsigned block_start, block_end, blocksize;
> - struct ext4_io_page *io_page;
> + unsigned block_start, blocksize;
> struct buffer_head *bh, *head;
> int ret = 0;
> + int nr_submitted = 0;
>
> blocksize = 1 << inode->i_blkbits;
>
> BUG_ON(!PageLocked(page));
> BUG_ON(PageWriteback(page));
>
> - io_page = kmem_cache_alloc(io_page_cachep, GFP_NOFS);
> - if (!io_page) {
> - redirty_page_for_writepage(wbc, page);
> - unlock_page(page);
> - return -ENOMEM;
> - }
> - io_page->p_page = page;
> - atomic_set(&io_page->p_count, 1);
> - get_page(page);
> set_page_writeback(page);
> ClearPageError(page);
>
> - for (bh = head = page_buffers(page), block_start = 0;
> - bh != head || !block_start;
> - block_start = block_end, bh = bh->b_this_page) {
> -
> - block_end = block_start + blocksize;
> + /*
> + * In the first loop we prepare and mark buffers to submit. We have to
> + * mark all buffers in the page before submitting so that
> + * end_page_writeback() cannot be called from ext4_bio_end_io() when IO
> + * on the first buffer finishes and we are still working on submitting
> + * the second buffer.
> + */
> + bh = head = page_buffers(page);
> + do {
> + block_start = bh_offset(bh);
> if (block_start >= len) {
> /*
> * Comments copied from block_write_full_page_endio:
> @@ -421,7 +396,8 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
> * mapped, and writes to that region are not written
> * out to the file."
> */
> - zero_user_segment(page, block_start, block_end);
> + zero_user_segment(page, block_start,
> + block_start + blocksize);
> clear_buffer_dirty(bh);
> set_buffer_uptodate(bh);
> continue;
> @@ -435,7 +411,19 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
> ext4_io_submit(io);
> continue;
> }
> - ret = io_submit_add_bh(io, io_page, inode, wbc, bh);
> + if (buffer_new(bh)) {
> + clear_buffer_new(bh);
> + unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
> + }
> + set_buffer_async_write(bh);
> + } while ((bh = bh->b_this_page) != head);
> +
> + /* Now submit buffers to write */
> + bh = head = page_buffers(page);
> + do {
> + if (!buffer_async_write(bh))
> + continue;
> + ret = io_submit_add_bh(io, inode, wbc, bh);
> if (ret) {
> /*
> * We only get here on ENOMEM. Not much else
> @@ -445,17 +433,20 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
> redirty_page_for_writepage(wbc, page);
> break;
> }
> + nr_submitted++;
> clear_buffer_dirty(bh);
> + } while ((bh = bh->b_this_page) != head);
> +
> + /* Error stopped previous loop? Clean up buffers... */
> + if (ret) {
> + do {
> + clear_buffer_async_write(bh);
> + bh = bh->b_this_page;
> + } while (bh != head);
> }
> unlock_page(page);
> - /*
> - * If the page was truncated before we could do the writeback,
> - * or we had a memory allocation error while trying to write
> - * the first buffer head, we won't have submitted any pages for
> - * I/O. In that case we need to make sure we've cleared the
> - * PageWriteback bit from the page to prevent the system from
> - * wedging later on.
> - */
> - put_io_page(io_page);
> + /* Nothing submitted - we have to end page writeback */
> + if (!nr_submitted)
> + end_page_writeback(page);
> return ret;
> }
> --
> 1.7.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
next prev parent reply other threads:[~2013-04-10 18:05 UTC|newest]
Thread overview: 76+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-04-08 21:32 [PATCH 00/22 v1] Fixes and improvements in ext4 writeback path Jan Kara
2013-04-08 21:32 ` [PATCH 01/29] ext4: Make ext4_bio_write_page() use BH_Async_Write flags instead page pointers from ext4_io_end Jan Kara
2013-04-10 18:05 ` Dmitry Monakhov [this message]
2013-04-11 13:38 ` Zheng Liu
2013-04-12 3:50 ` Theodore Ts'o
2013-04-08 21:32 ` [PATCH 02/29] ext4: Use io_end for multiple bios Jan Kara
2013-04-11 5:10 ` Dmitry Monakhov
2013-04-11 14:04 ` Zheng Liu
2013-04-12 3:55 ` Theodore Ts'o
2013-04-08 21:32 ` [PATCH 03/29] ext4: Clear buffer_uninit flag when submitting IO Jan Kara
2013-04-11 14:08 ` Zheng Liu
2013-04-08 21:32 ` [PATCH 04/29] jbd2: Reduce journal_head size Jan Kara
2013-04-11 14:10 ` Zheng Liu
2013-04-12 4:04 ` Theodore Ts'o
2013-04-08 21:32 ` [PATCH 05/29] jbd2: Don't create journal_head for temporary journal buffers Jan Kara
2013-04-12 8:01 ` Zheng Liu
2013-04-08 21:32 ` [PATCH 06/29] jbd2: Remove journal_head from descriptor buffers Jan Kara
2013-04-12 8:10 ` Zheng Liu
2013-04-08 21:32 ` [PATCH 07/29] jbd2: Refine waiting for shadow buffers Jan Kara
2013-05-03 14:16 ` Zheng Liu
2013-05-03 20:44 ` Jan Kara
2013-04-08 21:32 ` [PATCH 08/29] jbd2: Remove outdated comment Jan Kara
2013-05-03 14:20 ` Zheng Liu
2013-04-08 21:32 ` [PATCH 09/29] jbd2: Cleanup needed free block estimates when starting a transaction Jan Kara
2013-05-05 8:17 ` Zheng Liu
2013-04-08 21:32 ` [PATCH 10/29] jbd2: Fix race in t_outstanding_credits update in jbd2_journal_extend() Jan Kara
2013-05-05 8:37 ` Zheng Liu
2013-04-08 21:32 ` [PATCH 11/29] jbd2: Remove unused waitqueues Jan Kara
2013-05-05 8:41 ` Zheng Liu
2013-04-08 21:32 ` [PATCH 12/29] jbd2: Transaction reservation support Jan Kara
2013-05-05 9:39 ` Zheng Liu
2013-05-06 12:49 ` Jan Kara
2013-05-07 5:22 ` Zheng Liu
2013-04-08 21:32 ` [PATCH 13/29] ext4: Provide wrappers for transaction reservation calls Jan Kara
2013-05-05 11:51 ` Zheng Liu
2013-05-05 11:58 ` Zheng Liu
2013-05-06 12:51 ` Jan Kara
2013-04-08 21:32 ` [PATCH 14/29] ext4: Stop messing with nr_to_write in ext4_da_writepages() Jan Kara
2013-05-05 12:40 ` Zheng Liu
2013-04-08 21:32 ` [PATCH 15/29] ext4: Deprecate max_writeback_mb_bump sysfs attribute Jan Kara
2013-05-05 12:47 ` Zheng Liu
2013-05-06 12:55 ` Jan Kara
2013-04-08 21:32 ` [PATCH 16/29] ext4: Improve writepage credit estimate for files with indirect blocks Jan Kara
2013-05-07 5:39 ` Zheng Liu
2013-04-08 21:32 ` [PATCH 17/29] ext4: Better estimate credits needed for ext4_da_writepages() Jan Kara
2013-05-07 6:33 ` Zheng Liu
2013-05-07 14:17 ` Jan Kara
2013-04-08 21:32 ` [PATCH 18/29] ext4: Restructure writeback path Jan Kara
2013-05-08 3:48 ` Zheng Liu
2013-05-08 11:20 ` Jan Kara
2013-04-08 21:32 ` [PATCH 19/29] ext4: Remove buffer_uninit handling Jan Kara
2013-05-08 6:56 ` Zheng Liu
2013-04-08 21:32 ` [PATCH 20/29] ext4: Use transaction reservation for extent conversion in ext4_end_io Jan Kara
2013-05-08 6:57 ` Zheng Liu
2013-04-08 21:32 ` [PATCH 21/29] ext4: Split extent conversion lists to reserved & unreserved parts Jan Kara
2013-05-08 7:03 ` Zheng Liu
2013-05-08 11:23 ` Jan Kara
2013-05-08 11:49 ` Zheng Liu
2013-04-08 21:32 ` [PATCH 22/29] ext4: Defer clearing of PageWriteback after extent conversion Jan Kara
2013-05-08 7:08 ` Zheng Liu
2013-04-08 21:32 ` [PATCH 23/29] ext4: Protect extent conversion after DIO with i_dio_count Jan Kara
2013-05-08 7:08 ` Zheng Liu
2013-04-08 21:32 ` [PATCH 24/29] ext4: Remove wait for unwritten extent conversion from ext4_ext_truncate() Jan Kara
2013-05-08 7:35 ` Zheng Liu
2013-04-08 21:32 ` [PATCH 25/29] ext4: Use generic_file_fsync() in ext4_file_fsync() in nojournal mode Jan Kara
2013-05-08 7:37 ` Zheng Liu
2013-05-08 11:29 ` Jan Kara
2013-04-08 21:32 ` [PATCH 26/29] ext4: Remove i_mutex from ext4_file_sync() Jan Kara
2013-05-08 7:41 ` Zheng Liu
2013-04-08 21:32 ` [PATCH 27/29] ext4: Remove wait for unwritten extents in ext4_ind_direct_IO() Jan Kara
2013-05-08 7:55 ` Zheng Liu
2013-04-08 21:32 ` [PATCH 28/29] ext4: Don't wait for extent conversion in ext4_ext_punch_hole() Jan Kara
2013-05-08 7:56 ` Zheng Liu
2013-04-08 21:32 ` [PATCH 29/29] ext4: Remove ext4_ioend_wait() Jan Kara
2013-05-08 7:57 ` Zheng Liu
2013-05-08 11:32 ` Jan Kara
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=87wqsagt2j.fsf@openvz.org \
--to=dmonakhov@openvz.org \
--cc=jack@suse.cz \
--cc=linux-ext4@vger.kernel.org \
--cc=tytso@mit.edu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.