Re: [PATCH] fix memory corruption with small buffer reads

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Eric Sandeen <sandeen@sandeen.net>
To: Christoph Hellwig <hch@lst.de>
Cc: xfs@oss.sgi.com
Subject: Re: [PATCH] fix memory corruption with small buffer reads
Date: Thu, 15 May 2008 11:18:05 -0500	[thread overview]
Message-ID: <482C623D.7070208@sandeen.net> (raw)
In-Reply-To: <20080515142306.GA29842@lst.de>

Christoph Hellwig wrote:
> When we have multiple buffers in a single page for a blocksize == pagesize
> filesystem we might overwrite the page contents if two callers hit it
> shortly after each other.  To prevent that we need to keep the page
> locked until I/O is completed and the page marked uptodate.
> 
> Thanks to Eric Sandeen for triaging this bug and finding a reproducible
> testcase and Dave Chinner for additional advice.
> 
> This should fix kernel.org bz #10421.

Oh, this should go to -stable too, when everyone is happy with it...

Thanks,

-Eric

> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> 
> Index: linux-2.6-xfs/fs/xfs/linux-2.6/xfs_buf.c
> ===================================================================
> --- linux-2.6-xfs.orig/fs/xfs/linux-2.6/xfs_buf.c	2008-05-15 11:45:10.000000000 +0200
> +++ linux-2.6-xfs/fs/xfs/linux-2.6/xfs_buf.c	2008-05-15 15:26:09.000000000 +0200
> @@ -386,6 +386,8 @@ _xfs_buf_lookup_pages(
>  		if (unlikely(page == NULL)) {
>  			if (flags & XBF_READ_AHEAD) {
>  				bp->b_page_count = i;
> +				for (i = 0; i < bp->b_page_count; i++)
> +					unlock_page(bp->b_pages[i]);
>  				return -ENOMEM;
>  			}
>  
> @@ -415,17 +417,24 @@ _xfs_buf_lookup_pages(
>  		ASSERT(!PagePrivate(page));
>  		if (!PageUptodate(page)) {
>  			page_count--;
> -			if (blocksize < PAGE_CACHE_SIZE && !PagePrivate(page)) {
> +			if (blocksize >= PAGE_CACHE_SIZE) {
> +				if (flags & XBF_READ)
> +					bp->b_flags |= _XBF_PAGE_LOCKED;
> +			} else if (!PagePrivate(page)) {
>  				if (test_page_region(page, offset, nbytes))
>  					page_count++;
>  			}
>  		}
>  
> -		unlock_page(page);
>  		bp->b_pages[i] = page;
>  		offset = 0;
>  	}
>  
> +	if (!(bp->b_flags & _XBF_PAGE_LOCKED)) {
> +		for (i = 0; i < bp->b_page_count; i++)
> +			unlock_page(bp->b_pages[i]);
> +	}
> +
>  	if (page_count == bp->b_page_count)
>  		bp->b_flags |= XBF_DONE;
>  
> @@ -746,6 +755,7 @@ xfs_buf_associate_memory(
>  	bp->b_count_desired = len;
>  	bp->b_buffer_length = buflen;
>  	bp->b_flags |= XBF_MAPPED;
> +	bp->b_flags &= ~_XBF_PAGE_LOCKED;
>  
>  	return 0;
>  }
> @@ -1093,8 +1103,10 @@ _xfs_buf_ioend(
>  	xfs_buf_t		*bp,
>  	int			schedule)
>  {
> -	if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
> +	if (atomic_dec_and_test(&bp->b_io_remaining) == 1) {
> +		bp->b_flags &= ~_XBF_PAGE_LOCKED;
>  		xfs_buf_ioend(bp, schedule);
> +	}
>  }
>  
>  STATIC void
> @@ -1125,6 +1137,9 @@ xfs_buf_bio_end_io(
>  
>  		if (--bvec >= bio->bi_io_vec)
>  			prefetchw(&bvec->bv_page->flags);
> +
> +		if (bp->b_flags & _XBF_PAGE_LOCKED)
> +			unlock_page(page);
>  	} while (bvec >= bio->bi_io_vec);
>  
>  	_xfs_buf_ioend(bp, 1);
> @@ -1163,7 +1178,8 @@ _xfs_buf_ioapply(
>  	 * filesystem block size is not smaller than the page size.
>  	 */
>  	if ((bp->b_buffer_length < PAGE_CACHE_SIZE) &&
> -	    (bp->b_flags & XBF_READ) &&
> +	    ((bp->b_flags & (XBF_READ|_XBF_PAGE_LOCKED)) ==
> +	      (XBF_READ|_XBF_PAGE_LOCKED)) &&
>  	    (blocksize >= PAGE_CACHE_SIZE)) {
>  		bio = bio_alloc(GFP_NOIO, 1);
>  
> Index: linux-2.6-xfs/fs/xfs/linux-2.6/xfs_buf.h
> ===================================================================
> --- linux-2.6-xfs.orig/fs/xfs/linux-2.6/xfs_buf.h	2008-05-15 11:45:10.000000000 +0200
> +++ linux-2.6-xfs/fs/xfs/linux-2.6/xfs_buf.h	2008-05-15 15:26:09.000000000 +0200
> @@ -66,6 +66,25 @@ typedef enum {
>  	_XBF_PAGES = (1 << 18),	    /* backed by refcounted pages	   */
>  	_XBF_RUN_QUEUES = (1 << 19),/* run block device task queue	   */
>  	_XBF_DELWRI_Q = (1 << 21),   /* buffer on delwri queue		   */
> +
> +	/*
> +	 * Special flag for supporting metadata blocks smaller than a FSB.
> +	 *
> +	 * In this case we can have multiple xfs_buf_t on a single page and
> +	 * need to lock out concurrent xfs_buf_t readers as they only
> +	 * serialise access to the buffer.
> +	 *
> +	 * If the FSB size >= PAGE_CACHE_SIZE case, we have no serialisation
> +	 * between reads of the page. Hence we can have one thread read the
> +	 * page and modify it, but then race with another thread that thinks
> +	 * the page is not up-to-date and hence reads it again.
> +	 *
> +	 * The result is that the first modifcation to the page is lost.
> +	 * This sort of AGF/AGI reading race can happen when unlinking inodes
> +	 * that require truncation and results in the AGI unlinked list
> +	 * modifications being lost.
> +	 */
> +	_XBF_PAGE_LOCKED = (1 << 22),
>  } xfs_buf_flags_t;
>  
>  typedef enum {
> 
>

     prev parent reply	other threads:[~2008-05-15 16:17 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-05-15 14:23 [PATCH] fix memory corruption with small buffer reads Christoph Hellwig
2008-05-15 16:17 ` Eric Sandeen
2008-05-15 16:18 ` Eric Sandeen [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=482C623D.7070208@sandeen.net \
    --to=sandeen@sandeen.net \
    --cc=hch@lst.de \
    --cc=xfs@oss.sgi.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.