From: Andrew Morton <akpm@linux-foundation.org>
To: npiggin@suse.de
Cc: linux-fsdevel@vger.kernel.org
Subject: Re: [patch 1/7] mm: write_cache_pages writepage error fix
Date: Tue, 21 Oct 2008 14:55:18 -0700 [thread overview]
Message-ID: <20081021145518.c5516cfe.akpm@linux-foundation.org> (raw)
In-Reply-To: <20081021081138.768065000@nick.local0.net>
On Tue, 21 Oct 2008 19:09:48 +1100
npiggin@suse.de wrote:
> In write_cache_pages, if ret signals a real error, but we still have some pages
> left in the pagevec, done would be set to 1, but the remaining pages would
> continue to be processed and ret will be overwritten in the process. It could
> easily be overwritten with success, and thus success will be returned even if
> there is an error. Thus the caller is told all writes succeeded, wheras in
> reality some did not.
>
> Fix this by bailing immediately if there is an error, and retaining the first
> error code.
>
> This is a data interity bug.
>
> Signed-off-by: Nick Piggin <npiggin@suse.de>
> ---
> Index: linux-2.6/mm/page-writeback.c
> ===================================================================
> --- linux-2.6.orig/mm/page-writeback.c
> +++ linux-2.6/mm/page-writeback.c
> @@ -931,12 +931,16 @@ retry:
> }
>
> ret = (*writepage)(page, wbc, data);
> -
> - if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) {
> - unlock_page(page);
> - ret = 0;
> - }
> - if (ret || (--nr_to_write <= 0))
> + if (unlikely(ret)) {
> + if (ret == AOP_WRITEPAGE_ACTIVATE) {
> + unlock_page(page);
> + ret = 0;
> + } else {
> + done = 1;
> + break;
> + }
> + }
> + if (--nr_to_write <= 0)
> done = 1;
> if (wbc->nonblocking && bdi_write_congested(bdi)) {
> wbc->encountered_congestion = 1;
>
> --
I don't think I like the implementation much.
In all cases in that function where we set done=1, we want to bale out
right now at this page, rather than processing the remaining pages in
the pagevec.
So it would be better to implement a bit of code which releases the
pagevec pages and then breaks out of the loop. Then this bug
automatically gets fixed.
Like this:
--- a/mm/page-writeback.c~mm-write_cache_pages-writepage-error-fix
+++ a/mm/page-writeback.c
@@ -865,7 +865,6 @@ int write_cache_pages(struct address_spa
{
struct backing_dev_info *bdi = mapping->backing_dev_info;
int ret = 0;
- int done = 0;
struct pagevec pvec;
int nr_pages;
pgoff_t index;
@@ -891,10 +890,10 @@ int write_cache_pages(struct address_spa
scanned = 1;
}
retry:
- while (!done && (index <= end) &&
+ while ((index <= end) &&
(nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
- PAGECACHE_TAG_DIRTY,
- min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
+ PAGECACHE_TAG_DIRTY,
+ min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
unsigned i;
scanned = 1;
@@ -903,9 +902,9 @@ retry:
/*
* At this point we hold neither mapping->tree_lock nor
- * lock on the page itself: the page may be truncated or
- * invalidated (changing page->mapping to NULL), or even
- * swizzled back from swapper_space to tmpfs file
+ * lock on the page itself: the page may be truncated
+ * or invalidated (changing page->mapping to NULL), or
+ * even swizzled back from swapper_space to tmpfs file
* mapping
*/
lock_page(page);
@@ -916,8 +915,8 @@ retry:
}
if (!wbc->range_cyclic && page->index > end) {
- done = 1;
unlock_page(page);
+ goto bale;
continue;
}
@@ -937,16 +936,16 @@ retry:
ret = 0;
}
if (ret || (--nr_to_write <= 0))
- done = 1;
+ goto bale;
if (wbc->nonblocking && bdi_write_congested(bdi)) {
wbc->encountered_congestion = 1;
- done = 1;
+ goto bale;
}
}
pagevec_release(&pvec);
cond_resched();
}
- if (!scanned && !done) {
+ if (!scanned) {
/*
* We hit the last page and there is more work to be done: wrap
* back to the start of the file
@@ -961,7 +960,11 @@ retry:
wbc->nr_to_write = nr_to_write;
}
+out:
return ret;
+bale:
+ pagevec_release(&pvec);
+ goto out;
}
EXPORT_SYMBOL(write_cache_pages);
_
Which yields this:
int write_cache_pages(struct address_space *mapping,
struct writeback_control *wbc, writepage_t writepage,
void *data)
{
struct backing_dev_info *bdi = mapping->backing_dev_info;
int ret = 0;
struct pagevec pvec;
int nr_pages;
pgoff_t index;
pgoff_t end; /* Inclusive */
int scanned = 0;
int range_whole = 0;
long nr_to_write = wbc->nr_to_write;
if (wbc->nonblocking && bdi_write_congested(bdi)) {
wbc->encountered_congestion = 1;
return 0;
}
pagevec_init(&pvec, 0);
if (wbc->range_cyclic) {
index = mapping->writeback_index; /* Start from prev offset */
end = -1;
} else {
index = wbc->range_start >> PAGE_CACHE_SHIFT;
end = wbc->range_end >> PAGE_CACHE_SHIFT;
if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
range_whole = 1;
scanned = 1;
}
retry:
while ((index <= end) &&
(nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
PAGECACHE_TAG_DIRTY,
min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
unsigned i;
scanned = 1;
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
/*
* At this point we hold neither mapping->tree_lock nor
* lock on the page itself: the page may be truncated
* or invalidated (changing page->mapping to NULL), or
* even swizzled back from swapper_space to tmpfs file
* mapping
*/
lock_page(page);
if (unlikely(page->mapping != mapping)) {
unlock_page(page);
continue;
}
if (!wbc->range_cyclic && page->index > end) {
unlock_page(page);
goto bale;
continue;
}
if (wbc->sync_mode != WB_SYNC_NONE)
wait_on_page_writeback(page);
if (PageWriteback(page) ||
!clear_page_dirty_for_io(page)) {
unlock_page(page);
continue;
}
ret = (*writepage)(page, wbc, data);
if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) {
unlock_page(page);
ret = 0;
}
if (ret || (--nr_to_write <= 0))
goto bale;
if (wbc->nonblocking && bdi_write_congested(bdi)) {
wbc->encountered_congestion = 1;
goto bale;
}
}
pagevec_release(&pvec);
cond_resched();
}
if (!scanned) {
/*
* We hit the last page and there is more work to be done: wrap
* back to the start of the file
*/
scanned = 1;
index = 0;
goto retry;
}
if (!wbc->no_nrwrite_index_update) {
if (wbc->range_cyclic || (range_whole && nr_to_write > 0))
mapping->writeback_index = index;
wbc->nr_to_write = nr_to_write;
}
out:
return ret;
bale:
pagevec_release(&pvec);
goto out;
}
next prev parent reply other threads:[~2008-10-21 21:55 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-10-21 8:09 [patch 0/7] writeback data integrity and other fixes npiggin
2008-10-21 8:09 ` [patch 1/7] mm: write_cache_pages writepage error fix npiggin
2008-10-21 21:55 ` Andrew Morton [this message]
2008-10-21 22:01 ` Matthew Wilcox
2008-10-21 22:05 ` Andrew Morton
2008-10-22 9:36 ` Nick Piggin
2008-10-22 16:37 ` Andrew Morton
2008-10-28 13:18 ` Nick Piggin
2008-10-21 8:09 ` [patch 2/7] mm: write_cache_pages integrity fix npiggin
2008-10-21 8:09 ` [patch 3/7] mm: do_sync_mapping_range " npiggin
2008-10-21 8:09 ` [patch 4/7] mm: write_cache_pages cyclic fix npiggin
2008-10-21 8:09 ` [patch 5/7] mm: write_cache_pages cleanups npiggin
2008-10-21 8:09 ` [patch 6/7] mm: write_cache_pages optimise page cleaning npiggin
2008-10-21 8:09 ` [patch 7/7] mm: write_cache_pages terminate quickly npiggin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20081021145518.c5516cfe.akpm@linux-foundation.org \
--to=akpm@linux-foundation.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=npiggin@suse.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).