All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andrew Morton <akpm@linux-foundation.org>
To: npiggin@suse.de
Cc: linux-fsdevel@vger.kernel.org
Subject: Re: [patch 1/7] mm: write_cache_pages writepage error fix
Date: Tue, 21 Oct 2008 14:55:18 -0700	[thread overview]
Message-ID: <20081021145518.c5516cfe.akpm@linux-foundation.org> (raw)
In-Reply-To: <20081021081138.768065000@nick.local0.net>

On Tue, 21 Oct 2008 19:09:48 +1100
npiggin@suse.de wrote:

> In write_cache_pages, if ret signals a real error, but we still have some pages
> left in the pagevec, done would be set to 1, but the remaining pages would
> continue to be processed and ret will be overwritten in the process. It could
> easily be overwritten with success, and thus success will be returned even if
> there is an error. Thus the caller is told all writes succeeded, wheras in
> reality some did not.
> 
> Fix this by bailing immediately if there is an error, and retaining the first
> error code.
> 
> This is a data interity bug.
> 
> Signed-off-by: Nick Piggin <npiggin@suse.de>
> ---
> Index: linux-2.6/mm/page-writeback.c
> ===================================================================
> --- linux-2.6.orig/mm/page-writeback.c
> +++ linux-2.6/mm/page-writeback.c
> @@ -931,12 +931,16 @@ retry:
>  			}
>  
>  			ret = (*writepage)(page, wbc, data);
> -
> -			if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) {
> -				unlock_page(page);
> -				ret = 0;
> -			}
> -			if (ret || (--nr_to_write <= 0))
> +			if (unlikely(ret)) {
> +				if (ret == AOP_WRITEPAGE_ACTIVATE) {
> +					unlock_page(page);
> +					ret = 0;
> +				} else {
> +					done = 1;
> +					break;
> +				}
> + 			}
> +			if (--nr_to_write <= 0)
>  				done = 1;
>  			if (wbc->nonblocking && bdi_write_congested(bdi)) {
>  				wbc->encountered_congestion = 1;
> 
> -- 

I don't think I like the implementation much.

In all cases in that function where we set done=1, we want to bale out
right now at this page, rather than processing the remaining pages in
the pagevec.

So it would be better to implement a bit of code which releases the
pagevec pages and then breaks out of the loop.  Then this bug
automatically gets fixed.

Like this:

--- a/mm/page-writeback.c~mm-write_cache_pages-writepage-error-fix
+++ a/mm/page-writeback.c
@@ -865,7 +865,6 @@ int write_cache_pages(struct address_spa
 {
 	struct backing_dev_info *bdi = mapping->backing_dev_info;
 	int ret = 0;
-	int done = 0;
 	struct pagevec pvec;
 	int nr_pages;
 	pgoff_t index;
@@ -891,10 +890,10 @@ int write_cache_pages(struct address_spa
 		scanned = 1;
 	}
 retry:
-	while (!done && (index <= end) &&
+	while ((index <= end) &&
 	       (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
-					      PAGECACHE_TAG_DIRTY,
-					      min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
+				PAGECACHE_TAG_DIRTY,
+				min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
 		unsigned i;
 
 		scanned = 1;
@@ -903,9 +902,9 @@ retry:
 
 			/*
 			 * At this point we hold neither mapping->tree_lock nor
-			 * lock on the page itself: the page may be truncated or
-			 * invalidated (changing page->mapping to NULL), or even
-			 * swizzled back from swapper_space to tmpfs file
+			 * lock on the page itself: the page may be truncated
+			 * or invalidated (changing page->mapping to NULL), or
+			 * even swizzled back from swapper_space to tmpfs file
 			 * mapping
 			 */
 			lock_page(page);
@@ -916,8 +915,8 @@ retry:
 			}
 
 			if (!wbc->range_cyclic && page->index > end) {
-				done = 1;
 				unlock_page(page);
+				goto bale;
 				continue;
 			}
 
@@ -937,16 +936,16 @@ retry:
 				ret = 0;
 			}
 			if (ret || (--nr_to_write <= 0))
-				done = 1;
+				goto bale;
 			if (wbc->nonblocking && bdi_write_congested(bdi)) {
 				wbc->encountered_congestion = 1;
-				done = 1;
+				goto bale;
 			}
 		}
 		pagevec_release(&pvec);
 		cond_resched();
 	}
-	if (!scanned && !done) {
+	if (!scanned) {
 		/*
 		 * We hit the last page and there is more work to be done: wrap
 		 * back to the start of the file
@@ -961,7 +960,11 @@ retry:
 		wbc->nr_to_write = nr_to_write;
 	}
 
+out:
 	return ret;
+bale:
+	pagevec_release(&pvec);
+	goto out;
 }
 EXPORT_SYMBOL(write_cache_pages);
 
_


Which yields this:

int write_cache_pages(struct address_space *mapping,
		      struct writeback_control *wbc, writepage_t writepage,
		      void *data)
{
	struct backing_dev_info *bdi = mapping->backing_dev_info;
	int ret = 0;
	struct pagevec pvec;
	int nr_pages;
	pgoff_t index;
	pgoff_t end;		/* Inclusive */
	int scanned = 0;
	int range_whole = 0;
	long nr_to_write = wbc->nr_to_write;

	if (wbc->nonblocking && bdi_write_congested(bdi)) {
		wbc->encountered_congestion = 1;
		return 0;
	}

	pagevec_init(&pvec, 0);
	if (wbc->range_cyclic) {
		index = mapping->writeback_index; /* Start from prev offset */
		end = -1;
	} else {
		index = wbc->range_start >> PAGE_CACHE_SHIFT;
		end = wbc->range_end >> PAGE_CACHE_SHIFT;
		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
			range_whole = 1;
		scanned = 1;
	}
retry:
	while ((index <= end) &&
	       (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
				PAGECACHE_TAG_DIRTY,
				min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
		unsigned i;

		scanned = 1;
		for (i = 0; i < nr_pages; i++) {
			struct page *page = pvec.pages[i];

			/*
			 * At this point we hold neither mapping->tree_lock nor
			 * lock on the page itself: the page may be truncated
			 * or invalidated (changing page->mapping to NULL), or
			 * even swizzled back from swapper_space to tmpfs file
			 * mapping
			 */
			lock_page(page);

			if (unlikely(page->mapping != mapping)) {
				unlock_page(page);
				continue;
			}

			if (!wbc->range_cyclic && page->index > end) {
				unlock_page(page);
				goto bale;
				continue;
			}

			if (wbc->sync_mode != WB_SYNC_NONE)
				wait_on_page_writeback(page);

			if (PageWriteback(page) ||
			    !clear_page_dirty_for_io(page)) {
				unlock_page(page);
				continue;
			}

			ret = (*writepage)(page, wbc, data);

			if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) {
				unlock_page(page);
				ret = 0;
			}
			if (ret || (--nr_to_write <= 0))
				goto bale;
			if (wbc->nonblocking && bdi_write_congested(bdi)) {
				wbc->encountered_congestion = 1;
				goto bale;
			}
		}
		pagevec_release(&pvec);
		cond_resched();
	}
	if (!scanned) {
		/*
		 * We hit the last page and there is more work to be done: wrap
		 * back to the start of the file
		 */
		scanned = 1;
		index = 0;
		goto retry;
	}
	if (!wbc->no_nrwrite_index_update) {
		if (wbc->range_cyclic || (range_whole && nr_to_write > 0))
			mapping->writeback_index = index;
		wbc->nr_to_write = nr_to_write;
	}

out:
	return ret;
bale:
	pagevec_release(&pvec);
	goto out;
}


  reply	other threads:[~2008-10-21 21:55 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-10-21  8:09 [patch 0/7] writeback data integrity and other fixes npiggin
2008-10-21  8:09 ` [patch 1/7] mm: write_cache_pages writepage error fix npiggin
2008-10-21 21:55   ` Andrew Morton [this message]
2008-10-21 22:01     ` Matthew Wilcox
2008-10-21 22:05       ` Andrew Morton
2008-10-22  9:36     ` Nick Piggin
2008-10-22 16:37       ` Andrew Morton
2008-10-28 13:18         ` Nick Piggin
2008-10-21  8:09 ` [patch 2/7] mm: write_cache_pages integrity fix npiggin
2008-10-21  8:09 ` [patch 3/7] mm: do_sync_mapping_range " npiggin
2008-10-21  8:09 ` [patch 4/7] mm: write_cache_pages cyclic fix npiggin
2008-10-21  8:09 ` [patch 5/7] mm: write_cache_pages cleanups npiggin
2008-10-21  8:09 ` [patch 6/7] mm: write_cache_pages optimise page cleaning npiggin
2008-10-21  8:09 ` [patch 7/7] mm: write_cache_pages terminate quickly npiggin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20081021145518.c5516cfe.akpm@linux-foundation.org \
    --to=akpm@linux-foundation.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=npiggin@suse.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.