* [PATCH] fix memory corruption with small buffer reads
@ 2008-05-15 14:23 Christoph Hellwig
2008-05-15 16:17 ` Eric Sandeen
2008-05-15 16:18 ` Eric Sandeen
0 siblings, 2 replies; 3+ messages in thread
From: Christoph Hellwig @ 2008-05-15 14:23 UTC (permalink / raw)
To: xfs
When we have multiple buffers in a single page for a blocksize == pagesize
filesystem we might overwrite the page contents if two callers hit it
shortly after each other. To prevent that we need to keep the page
locked until I/O is completed and the page marked uptodate.
Thanks to Eric Sandeen for triaging this bug and finding a reproducible
testcase and Dave Chinner for additional advice.
This should fix kernel.org bz #10421.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Index: linux-2.6-xfs/fs/xfs/linux-2.6/xfs_buf.c
===================================================================
--- linux-2.6-xfs.orig/fs/xfs/linux-2.6/xfs_buf.c 2008-05-15 11:45:10.000000000 +0200
+++ linux-2.6-xfs/fs/xfs/linux-2.6/xfs_buf.c 2008-05-15 15:26:09.000000000 +0200
@@ -386,6 +386,8 @@ _xfs_buf_lookup_pages(
if (unlikely(page == NULL)) {
if (flags & XBF_READ_AHEAD) {
bp->b_page_count = i;
+ for (i = 0; i < bp->b_page_count; i++)
+ unlock_page(bp->b_pages[i]);
return -ENOMEM;
}
@@ -415,17 +417,24 @@ _xfs_buf_lookup_pages(
ASSERT(!PagePrivate(page));
if (!PageUptodate(page)) {
page_count--;
- if (blocksize < PAGE_CACHE_SIZE && !PagePrivate(page)) {
+ if (blocksize >= PAGE_CACHE_SIZE) {
+ if (flags & XBF_READ)
+ bp->b_flags |= _XBF_PAGE_LOCKED;
+ } else if (!PagePrivate(page)) {
if (test_page_region(page, offset, nbytes))
page_count++;
}
}
- unlock_page(page);
bp->b_pages[i] = page;
offset = 0;
}
+ if (!(bp->b_flags & _XBF_PAGE_LOCKED)) {
+ for (i = 0; i < bp->b_page_count; i++)
+ unlock_page(bp->b_pages[i]);
+ }
+
if (page_count == bp->b_page_count)
bp->b_flags |= XBF_DONE;
@@ -746,6 +755,7 @@ xfs_buf_associate_memory(
bp->b_count_desired = len;
bp->b_buffer_length = buflen;
bp->b_flags |= XBF_MAPPED;
+ bp->b_flags &= ~_XBF_PAGE_LOCKED;
return 0;
}
@@ -1093,8 +1103,10 @@ _xfs_buf_ioend(
xfs_buf_t *bp,
int schedule)
{
- if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
+ if (atomic_dec_and_test(&bp->b_io_remaining) == 1) {
+ bp->b_flags &= ~_XBF_PAGE_LOCKED;
xfs_buf_ioend(bp, schedule);
+ }
}
STATIC void
@@ -1125,6 +1137,9 @@ xfs_buf_bio_end_io(
if (--bvec >= bio->bi_io_vec)
prefetchw(&bvec->bv_page->flags);
+
+ if (bp->b_flags & _XBF_PAGE_LOCKED)
+ unlock_page(page);
} while (bvec >= bio->bi_io_vec);
_xfs_buf_ioend(bp, 1);
@@ -1163,7 +1178,8 @@ _xfs_buf_ioapply(
* filesystem block size is not smaller than the page size.
*/
if ((bp->b_buffer_length < PAGE_CACHE_SIZE) &&
- (bp->b_flags & XBF_READ) &&
+ ((bp->b_flags & (XBF_READ|_XBF_PAGE_LOCKED)) ==
+ (XBF_READ|_XBF_PAGE_LOCKED)) &&
(blocksize >= PAGE_CACHE_SIZE)) {
bio = bio_alloc(GFP_NOIO, 1);
Index: linux-2.6-xfs/fs/xfs/linux-2.6/xfs_buf.h
===================================================================
--- linux-2.6-xfs.orig/fs/xfs/linux-2.6/xfs_buf.h 2008-05-15 11:45:10.000000000 +0200
+++ linux-2.6-xfs/fs/xfs/linux-2.6/xfs_buf.h 2008-05-15 15:26:09.000000000 +0200
@@ -66,6 +66,25 @@ typedef enum {
_XBF_PAGES = (1 << 18), /* backed by refcounted pages */
_XBF_RUN_QUEUES = (1 << 19),/* run block device task queue */
_XBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */
+
+ /*
+ * Special flag for supporting metadata blocks smaller than a FSB.
+ *
+ * In this case we can have multiple xfs_buf_t on a single page and
+ * need to lock out concurrent xfs_buf_t readers as they only
+ * serialise access to the buffer.
+ *
+ * If the FSB size >= PAGE_CACHE_SIZE case, we have no serialisation
+ * between reads of the page. Hence we can have one thread read the
+ * page and modify it, but then race with another thread that thinks
+ * the page is not up-to-date and hence reads it again.
+ *
+ * The result is that the first modifcation to the page is lost.
+ * This sort of AGF/AGI reading race can happen when unlinking inodes
+ * that require truncation and results in the AGI unlinked list
+ * modifications being lost.
+ */
+ _XBF_PAGE_LOCKED = (1 << 22),
} xfs_buf_flags_t;
typedef enum {
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH] fix memory corruption with small buffer reads
2008-05-15 14:23 [PATCH] fix memory corruption with small buffer reads Christoph Hellwig
@ 2008-05-15 16:17 ` Eric Sandeen
2008-05-15 16:18 ` Eric Sandeen
1 sibling, 0 replies; 3+ messages in thread
From: Eric Sandeen @ 2008-05-15 16:17 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: xfs
Christoph Hellwig wrote:
> When we have multiple buffers in a single page for a blocksize == pagesize
> filesystem we might overwrite the page contents if two callers hit it
> shortly after each other. To prevent that we need to keep the page
> locked until I/O is completed and the page marked uptodate.
>
> Thanks to Eric Sandeen for triaging this bug and finding a reproducible
> testcase and Dave Chinner for additional advice.
>
> This should fix kernel.org bz #10421.
>
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
Thanks for the fix, Christoph.
This has passed many, many iterations of my original testcase, which
almost always failed first-time on stock 2.6.25.
Tested-by: Eric Sandeen <sandeen@sandeen.net>
> Index: linux-2.6-xfs/fs/xfs/linux-2.6/xfs_buf.c
> ===================================================================
> --- linux-2.6-xfs.orig/fs/xfs/linux-2.6/xfs_buf.c 2008-05-15 11:45:10.000000000 +0200
> +++ linux-2.6-xfs/fs/xfs/linux-2.6/xfs_buf.c 2008-05-15 15:26:09.000000000 +0200
> @@ -386,6 +386,8 @@ _xfs_buf_lookup_pages(
> if (unlikely(page == NULL)) {
> if (flags & XBF_READ_AHEAD) {
> bp->b_page_count = i;
> + for (i = 0; i < bp->b_page_count; i++)
> + unlock_page(bp->b_pages[i]);
> return -ENOMEM;
> }
>
> @@ -415,17 +417,24 @@ _xfs_buf_lookup_pages(
> ASSERT(!PagePrivate(page));
> if (!PageUptodate(page)) {
> page_count--;
> - if (blocksize < PAGE_CACHE_SIZE && !PagePrivate(page)) {
> + if (blocksize >= PAGE_CACHE_SIZE) {
> + if (flags & XBF_READ)
> + bp->b_flags |= _XBF_PAGE_LOCKED;
> + } else if (!PagePrivate(page)) {
> if (test_page_region(page, offset, nbytes))
> page_count++;
> }
> }
>
> - unlock_page(page);
> bp->b_pages[i] = page;
> offset = 0;
> }
>
> + if (!(bp->b_flags & _XBF_PAGE_LOCKED)) {
> + for (i = 0; i < bp->b_page_count; i++)
> + unlock_page(bp->b_pages[i]);
> + }
> +
> if (page_count == bp->b_page_count)
> bp->b_flags |= XBF_DONE;
>
> @@ -746,6 +755,7 @@ xfs_buf_associate_memory(
> bp->b_count_desired = len;
> bp->b_buffer_length = buflen;
> bp->b_flags |= XBF_MAPPED;
> + bp->b_flags &= ~_XBF_PAGE_LOCKED;
>
> return 0;
> }
> @@ -1093,8 +1103,10 @@ _xfs_buf_ioend(
> xfs_buf_t *bp,
> int schedule)
> {
> - if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
> + if (atomic_dec_and_test(&bp->b_io_remaining) == 1) {
> + bp->b_flags &= ~_XBF_PAGE_LOCKED;
> xfs_buf_ioend(bp, schedule);
> + }
> }
>
> STATIC void
> @@ -1125,6 +1137,9 @@ xfs_buf_bio_end_io(
>
> if (--bvec >= bio->bi_io_vec)
> prefetchw(&bvec->bv_page->flags);
> +
> + if (bp->b_flags & _XBF_PAGE_LOCKED)
> + unlock_page(page);
> } while (bvec >= bio->bi_io_vec);
>
> _xfs_buf_ioend(bp, 1);
> @@ -1163,7 +1178,8 @@ _xfs_buf_ioapply(
> * filesystem block size is not smaller than the page size.
> */
> if ((bp->b_buffer_length < PAGE_CACHE_SIZE) &&
> - (bp->b_flags & XBF_READ) &&
> + ((bp->b_flags & (XBF_READ|_XBF_PAGE_LOCKED)) ==
> + (XBF_READ|_XBF_PAGE_LOCKED)) &&
> (blocksize >= PAGE_CACHE_SIZE)) {
> bio = bio_alloc(GFP_NOIO, 1);
>
> Index: linux-2.6-xfs/fs/xfs/linux-2.6/xfs_buf.h
> ===================================================================
> --- linux-2.6-xfs.orig/fs/xfs/linux-2.6/xfs_buf.h 2008-05-15 11:45:10.000000000 +0200
> +++ linux-2.6-xfs/fs/xfs/linux-2.6/xfs_buf.h 2008-05-15 15:26:09.000000000 +0200
> @@ -66,6 +66,25 @@ typedef enum {
> _XBF_PAGES = (1 << 18), /* backed by refcounted pages */
> _XBF_RUN_QUEUES = (1 << 19),/* run block device task queue */
> _XBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */
> +
> + /*
> + * Special flag for supporting metadata blocks smaller than a FSB.
> + *
> + * In this case we can have multiple xfs_buf_t on a single page and
> + * need to lock out concurrent xfs_buf_t readers as they only
> + * serialise access to the buffer.
> + *
> + * If the FSB size >= PAGE_CACHE_SIZE case, we have no serialisation
> + * between reads of the page. Hence we can have one thread read the
> + * page and modify it, but then race with another thread that thinks
> + * the page is not up-to-date and hence reads it again.
> + *
> + * The result is that the first modifcation to the page is lost.
> + * This sort of AGF/AGI reading race can happen when unlinking inodes
> + * that require truncation and results in the AGI unlinked list
> + * modifications being lost.
> + */
> + _XBF_PAGE_LOCKED = (1 << 22),
> } xfs_buf_flags_t;
>
> typedef enum {
>
>
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH] fix memory corruption with small buffer reads
2008-05-15 14:23 [PATCH] fix memory corruption with small buffer reads Christoph Hellwig
2008-05-15 16:17 ` Eric Sandeen
@ 2008-05-15 16:18 ` Eric Sandeen
1 sibling, 0 replies; 3+ messages in thread
From: Eric Sandeen @ 2008-05-15 16:18 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: xfs
Christoph Hellwig wrote:
> When we have multiple buffers in a single page for a blocksize == pagesize
> filesystem we might overwrite the page contents if two callers hit it
> shortly after each other. To prevent that we need to keep the page
> locked until I/O is completed and the page marked uptodate.
>
> Thanks to Eric Sandeen for triaging this bug and finding a reproducible
> testcase and Dave Chinner for additional advice.
>
> This should fix kernel.org bz #10421.
Oh, this should go to -stable too, when everyone is happy with it...
Thanks,
-Eric
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
>
> Index: linux-2.6-xfs/fs/xfs/linux-2.6/xfs_buf.c
> ===================================================================
> --- linux-2.6-xfs.orig/fs/xfs/linux-2.6/xfs_buf.c 2008-05-15 11:45:10.000000000 +0200
> +++ linux-2.6-xfs/fs/xfs/linux-2.6/xfs_buf.c 2008-05-15 15:26:09.000000000 +0200
> @@ -386,6 +386,8 @@ _xfs_buf_lookup_pages(
> if (unlikely(page == NULL)) {
> if (flags & XBF_READ_AHEAD) {
> bp->b_page_count = i;
> + for (i = 0; i < bp->b_page_count; i++)
> + unlock_page(bp->b_pages[i]);
> return -ENOMEM;
> }
>
> @@ -415,17 +417,24 @@ _xfs_buf_lookup_pages(
> ASSERT(!PagePrivate(page));
> if (!PageUptodate(page)) {
> page_count--;
> - if (blocksize < PAGE_CACHE_SIZE && !PagePrivate(page)) {
> + if (blocksize >= PAGE_CACHE_SIZE) {
> + if (flags & XBF_READ)
> + bp->b_flags |= _XBF_PAGE_LOCKED;
> + } else if (!PagePrivate(page)) {
> if (test_page_region(page, offset, nbytes))
> page_count++;
> }
> }
>
> - unlock_page(page);
> bp->b_pages[i] = page;
> offset = 0;
> }
>
> + if (!(bp->b_flags & _XBF_PAGE_LOCKED)) {
> + for (i = 0; i < bp->b_page_count; i++)
> + unlock_page(bp->b_pages[i]);
> + }
> +
> if (page_count == bp->b_page_count)
> bp->b_flags |= XBF_DONE;
>
> @@ -746,6 +755,7 @@ xfs_buf_associate_memory(
> bp->b_count_desired = len;
> bp->b_buffer_length = buflen;
> bp->b_flags |= XBF_MAPPED;
> + bp->b_flags &= ~_XBF_PAGE_LOCKED;
>
> return 0;
> }
> @@ -1093,8 +1103,10 @@ _xfs_buf_ioend(
> xfs_buf_t *bp,
> int schedule)
> {
> - if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
> + if (atomic_dec_and_test(&bp->b_io_remaining) == 1) {
> + bp->b_flags &= ~_XBF_PAGE_LOCKED;
> xfs_buf_ioend(bp, schedule);
> + }
> }
>
> STATIC void
> @@ -1125,6 +1137,9 @@ xfs_buf_bio_end_io(
>
> if (--bvec >= bio->bi_io_vec)
> prefetchw(&bvec->bv_page->flags);
> +
> + if (bp->b_flags & _XBF_PAGE_LOCKED)
> + unlock_page(page);
> } while (bvec >= bio->bi_io_vec);
>
> _xfs_buf_ioend(bp, 1);
> @@ -1163,7 +1178,8 @@ _xfs_buf_ioapply(
> * filesystem block size is not smaller than the page size.
> */
> if ((bp->b_buffer_length < PAGE_CACHE_SIZE) &&
> - (bp->b_flags & XBF_READ) &&
> + ((bp->b_flags & (XBF_READ|_XBF_PAGE_LOCKED)) ==
> + (XBF_READ|_XBF_PAGE_LOCKED)) &&
> (blocksize >= PAGE_CACHE_SIZE)) {
> bio = bio_alloc(GFP_NOIO, 1);
>
> Index: linux-2.6-xfs/fs/xfs/linux-2.6/xfs_buf.h
> ===================================================================
> --- linux-2.6-xfs.orig/fs/xfs/linux-2.6/xfs_buf.h 2008-05-15 11:45:10.000000000 +0200
> +++ linux-2.6-xfs/fs/xfs/linux-2.6/xfs_buf.h 2008-05-15 15:26:09.000000000 +0200
> @@ -66,6 +66,25 @@ typedef enum {
> _XBF_PAGES = (1 << 18), /* backed by refcounted pages */
> _XBF_RUN_QUEUES = (1 << 19),/* run block device task queue */
> _XBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */
> +
> + /*
> + * Special flag for supporting metadata blocks smaller than a FSB.
> + *
> + * In this case we can have multiple xfs_buf_t on a single page and
> + * need to lock out concurrent xfs_buf_t readers as they only
> + * serialise access to the buffer.
> + *
> + * If the FSB size >= PAGE_CACHE_SIZE case, we have no serialisation
> + * between reads of the page. Hence we can have one thread read the
> + * page and modify it, but then race with another thread that thinks
> + * the page is not up-to-date and hence reads it again.
> + *
> + * The result is that the first modifcation to the page is lost.
> + * This sort of AGF/AGI reading race can happen when unlinking inodes
> + * that require truncation and results in the AGI unlinked list
> + * modifications being lost.
> + */
> + _XBF_PAGE_LOCKED = (1 << 22),
> } xfs_buf_flags_t;
>
> typedef enum {
>
>
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2008-05-15 16:17 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-05-15 14:23 [PATCH] fix memory corruption with small buffer reads Christoph Hellwig
2008-05-15 16:17 ` Eric Sandeen
2008-05-15 16:18 ` Eric Sandeen
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox