public inbox for linux-btrfs@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] Btrfs: correct mistakes in direct I/O read found by fsx
@ 2010-01-25 14:01 jim owens
  0 siblings, 0 replies; only message in thread
From: jim owens @ 2010-01-25 14:01 UTC (permalink / raw)
  To: linux-btrfs, Chris Mason, Josef Bacik


Thanks to Josef Bacik for breaking the code with fsx and
figuring out with Chris what I was doing wrong:

1) change inline extent processing because an inline can
   be at the beginning of a large file when a hole follows
   and any access within the first block points at the
   inline even when the starting fpos is in that hole.
2) change compressed extent expansion because the end of
   the extent can be another implied hole that is not
   part of the compressed data stream.
3) reorder EOF test to be safer with concurrent write.
4) add better error reporting for inline extents.

Signed-off-by: jim owens <jowens@hp.com>
---
 fs/btrfs/dio.c |  101 ++++++++++++++++++++++++++++++++++++++++----------------
 1 files changed, 72 insertions(+), 29 deletions(-)

diff --git a/fs/btrfs/dio.c b/fs/btrfs/dio.c
index 2c0579a..3315cc9 100644
--- a/fs/btrfs/dio.c
+++ b/fs/btrfs/dio.c
@@ -203,7 +203,7 @@ static void btrfs_dio_submit_bio(struct btrfs_dio_extcb *extcb, int dvn);
 static int btrfs_dio_add_user_pages(u64 *dev_left, struct btrfs_dio_extcb *extcb, int dvn);
 static int btrfs_dio_add_temp_pages(u64 *dev_left, struct btrfs_dio_extcb *extcb, int dvn);
 static int btrfs_dio_hole_read(struct btrfs_diocb *diocb, u64 hole_len);
-static int btrfs_dio_inline_read(struct btrfs_diocb *diocb, u64 data_len);
+static int btrfs_dio_inline_read(struct btrfs_diocb *diocb, u64 *data_len);
 static int btrfs_dio_read_csum(struct btrfs_dio_extcb *extcb);
 static void btrfs_dio_free_retry(struct btrfs_dio_extcb *extcb);
 static int btrfs_dio_retry_block(struct btrfs_dio_extcb *extcb);
@@ -433,9 +433,21 @@ static void btrfs_dio_read(struct btrfs_diocb *diocb)
 
 	/* expand lock region to include what we read to validate checksum */ 
 	diocb->lockstart = diocb->start & ~(diocb->blocksize-1);
+	diocb->lockend = ALIGN(diocb->terminate, diocb->blocksize) - 1;
 
 getlock:
 	mutex_lock(&diocb->inode->i_mutex);
+		
+	/* ensure writeout and btree update on everything
+	 * we might read for checksum or compressed extents
+	 */
+	data_len = diocb->lockend + 1 - diocb->lockstart;
+	err = btrfs_wait_ordered_range(diocb->inode, diocb->lockstart, data_len);
+	if (err) {
+		diocb->error = err;
+		mutex_unlock(&diocb->inode->i_mutex);
+		return;
+	}
 	data_len = i_size_read(diocb->inode);
 	if (data_len < end)
 		end = data_len;
@@ -448,17 +460,7 @@ getlock:
 		diocb->terminate = end;
 		diocb->lockend = ALIGN(diocb->terminate, diocb->blocksize) - 1;
 	}
-		
-	/* ensure writeout and btree update on everything
-	 * we might read for checksum or compressed extents
-	 */
-	data_len = diocb->lockend + 1 - diocb->lockstart;
-	err = btrfs_wait_ordered_range(diocb->inode, diocb->lockstart, data_len);
-	if (err) {
-		diocb->error = err;
-		mutex_unlock(&diocb->inode->i_mutex);
-		return;
-	}
+
 	lock_extent(io_tree, diocb->lockstart, diocb->lockend, GFP_NOFS);
 	mutex_unlock(&diocb->inode->i_mutex);
 
@@ -483,7 +485,18 @@ getlock:
 		}
 		
 		if (em->block_start == EXTENT_MAP_INLINE) {
-			err = btrfs_dio_inline_read(diocb, len);
+			/* ugly stuff because inline can exist in a large file
+			 * with other extents if a hole immediately follows.
+			 * the inline might end short of the btrfs block with
+			 * an implied hole that we need to zero here.
+			 */
+			u64 expected = min(diocb->start + len, em->start + em->len);
+			err = btrfs_dio_inline_read(diocb, &len);
+			if (!err && expected > diocb->start) {
+				data_len -= len;
+				len = expected - diocb->start;
+				err = btrfs_dio_hole_read(diocb, len);
+			}
 		} else {
 			len = min(len, em->len - (diocb->start - em->start));
 			if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) ||
@@ -1183,9 +1196,24 @@ static void btrfs_dio_decompress(struct btrfs_dio_extcb *extcb)
 	u32 len = extcb->icb.out_len;
 
 	extcb->error = btrfs_zlib_inflate(&extcb->icb);
-	if (extcb->icb.out_len != len && !extcb->error)
-		extcb->error = -EIO;
 
+	/* ugly again - compressed extents can end with an implied hole */
+	if (!extcb->error && extcb->icb.out_len != len) {
+		while (extcb->umc.todo) {
+			struct bio_vec uv;
+			char *out;
+
+			extcb->error = btrfs_dio_get_user_bvec(&uv, &extcb->umc);
+			if (extcb->error)
+				goto fail;
+			out = kmap_atomic(uv.bv_page, KM_USER0);
+			memset(out + uv.bv_offset, 0, uv.bv_len);
+			kunmap_atomic(out, KM_USER0);
+
+			btrfs_dio_done_with_out(&uv, NULL);
+		}
+	}
+fail:
 	btrfs_dio_release_bios(extcb, 0);
 }
 
@@ -1432,7 +1460,7 @@ fail:
 	return err;
 }
 
-static int btrfs_dio_inline_read(struct btrfs_diocb *diocb, u64 data_len)
+static int btrfs_dio_inline_read(struct btrfs_diocb *diocb, u64 *data_len)
 {
 	int err;
 	size_t size;
@@ -1452,8 +1480,11 @@ static int btrfs_dio_inline_read(struct btrfs_diocb *diocb, u64 data_len)
 		if (err < 0)
 			goto notfound;
 		err= -EDOM;
-		if (path->slots[0] == 0)
+		if (path->slots[0] == 0) {
+		printk(KERN_ERR "btrfs directIO inline extent leaf not found ino %lu\n",
+				diocb->inode->i_ino);
 			goto fail;
+		}
 		path->slots[0]--;
 	}
 
@@ -1473,16 +1504,26 @@ static int btrfs_dio_inline_read(struct btrfs_diocb *diocb, u64 data_len)
 	extent_start = found_key.offset;
 	/* uncompressed size */
 	size = btrfs_file_extent_inline_len(leaf, item);
-	if (diocb->start < extent_start || diocb->start >= extent_start + size) {
-		printk(KERN_ERR "btrfs directIO inline extent leaf mismatch ino %lu\n",
-				diocb->inode->i_ino);
+	if (diocb->start < extent_start) {
+		printk(KERN_ERR "btrfs directIO inline extent range mismatch ino %lu"
+			" fpos %lld found start %lld size %ld\n",
+			diocb->inode->i_ino,diocb->start,extent_start,size);
 		err= -EDOM;
 		goto fail;
 	}
 
+	/* we can end here when we start in an implied hole on a larger file */
+	if (diocb->start >= extent_start + size) {
+		*data_len = 0;
+		err = 0;
+		goto fail;
+	}
+
 	extent_offset = diocb->start - extent_start;
+	size = min_t(u64, *data_len, size - extent_offset);
 
-	size = min_t(u64, data_len, size);
+	size = min_t(u64, *data_len, size);
+	*data_len = size;
 
 	if (btrfs_file_extent_compression(leaf, item) ==
 						BTRFS_COMPRESS_ZLIB) {
@@ -1523,11 +1564,11 @@ static int btrfs_dio_inline_read(struct btrfs_diocb *diocb, u64 data_len)
 		if (!err)
 			diocb->start += size;
 
-		/* needed if we ever allowed extents after inline
-		 * diocb->umc.work_iov = extcb->umc.work_iov;
-		 * diocb->umc.user_iov = extcb->umc.user_iov;
-		 * diocb->umc.remaining = extcb->umc.remaining;
-		 */
+		/* we allow extents after inline if a hole follows */
+		diocb->umc.work_iov = extcb->umc.work_iov;
+		diocb->umc.user_iov = extcb->umc.user_iov;
+		diocb->umc.remaining = extcb->umc.remaining;
+
 		kfree(extcb);
 	} else {
 		unsigned long inline_start;
@@ -1556,9 +1597,11 @@ fail:
 	btrfs_release_path(root, path);
 notfound:
 	btrfs_free_path(path);
-	unlock_extent(&BTRFS_I(diocb->inode)->io_tree, diocb->lockstart,
-			diocb->lockstart + data_len - 1, GFP_NOFS);
-	diocb->lockstart += data_len;
+	if (!err && *data_len) {
+		unlock_extent(&BTRFS_I(diocb->inode)->io_tree, diocb->lockstart,
+				diocb->lockstart + *data_len - 1, GFP_NOFS);
+		diocb->lockstart += *data_len;
+	}
 	return err;
 }
 
-- 
1.5.6.3

^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2010-01-25 14:01 UTC | newest]

Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-01-25 14:01 [PATCH] Btrfs: correct mistakes in direct I/O read found by fsx jim owens

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox