[PATCH 1/2] Btrfs: correctly flush compressed data before/after direct IO

linux-btrfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH 1/2] Btrfs: correctly flush compressed data before/after direct IO
@ 2014-10-09 20:18 Filipe Manana
  2014-10-09 20:18 ` [PATCH 2/2] Btrfs: add helper btrfs_fdatawrite_range Filipe Manana
  0 siblings, 1 reply; 3+ messages in thread
From: Filipe Manana @ 2014-10-09 20:18 UTC (permalink / raw)
  To: linux-btrfs; +Cc: Filipe Manana

For compressed writes, after doing the first filemap_fdatawrite_range() we
don't get the pages tagged for writeback immediately. Instead we create
a workqueue task, which is run by other kthread, and keep the pages locked.
That other kthread compresses data, creates the respective ordered extent/s,
tags the pages for writeback and unlocks them. Therefore we need a second
call to filemap_fdatawrite_range() if we have compressed writes, as this
second call will wait for the pages to become unlocked, then see they became
tagged for writeback and finally wait for the writeback to finish.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
---
 fs/btrfs/file.c  | 12 +++++++++++-
 fs/btrfs/inode.c | 16 +++++++++++++---
 2 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 29b147d..82c7229 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1692,8 +1692,18 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb,
 		err = written_buffered;
 		goto out;
 	}
+	/*
+	 * Ensure all data is persisted. We want the next direct IO read to be
+	 * able to read what was just written.
+	 */
 	endbyte = pos + written_buffered - 1;
-	err = filemap_write_and_wait_range(file->f_mapping, pos, endbyte);
+	err = filemap_fdatawrite_range(file->f_mapping, pos, endbyte);
+	if (!err && test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
+			     &BTRFS_I(file_inode(file))->runtime_flags))
+		err = filemap_fdatawrite_range(file->f_mapping, pos, endbyte);
+	if (err)
+		goto out;
+	err = filemap_fdatawait_range(file->f_mapping, pos, endbyte);
 	if (err)
 		goto out;
 	written += written_buffered;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index aef0fa3..752ff18 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7052,9 +7052,19 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
 			btrfs_put_ordered_extent(ordered);
 		} else {
 			/* Screw you mmap */
-			ret = filemap_write_and_wait_range(inode->i_mapping,
-							   lockstart,
-							   lockend);
+			ret = filemap_fdatawrite_range(inode->i_mapping,
+						       lockstart,
+						       lockend);
+			if (!ret && test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
+					     &BTRFS_I(inode)->runtime_flags))
+				ret = filemap_fdatawrite_range(inode->i_mapping,
+							       lockstart,
+							       lockend);
+			if (ret)
+				break;
+			ret = filemap_fdatawait_range(inode->i_mapping,
+						      lockstart,
+						      lockend);
 			if (ret)
 				break;
 
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH 2/2] Btrfs: add helper btrfs_fdatawrite_range
  2014-10-09 20:18 [PATCH 1/2] Btrfs: correctly flush compressed data before/after direct IO Filipe Manana
@ 2014-10-09 20:18 ` Filipe Manana
  2014-10-10  8:43   ` [PATCH 2/2 v2] " Filipe Manana
  0 siblings, 1 reply; 3+ messages in thread
From: Filipe Manana @ 2014-10-09 20:18 UTC (permalink / raw)
  To: linux-btrfs; +Cc: Filipe Manana

To avoid duplicating this double filemap_fdatawrite_range() call for
inodes with async extents (compressed writes) so often.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
---
 fs/btrfs/ctree.h        |  1 +
 fs/btrfs/file.c         | 36 ++++++++++++++++++++++++++++--------
 fs/btrfs/inode.c        |  9 +--------
 fs/btrfs/ordered-data.c | 24 ++----------------------
 4 files changed, 32 insertions(+), 38 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 089f6da..4e0ad8c 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3896,6 +3896,7 @@ int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
 		      struct page **pages, size_t num_pages,
 		      loff_t pos, size_t write_bytes,
 		      struct extent_state **cached);
+int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end);
 
 /* tree-defrag.c */
 int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 82c7229..2df1dce 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1697,10 +1697,7 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb,
 	 * able to read what was just written.
 	 */
 	endbyte = pos + written_buffered - 1;
-	err = filemap_fdatawrite_range(file->f_mapping, pos, endbyte);
-	if (!err && test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
-			     &BTRFS_I(file_inode(file))->runtime_flags))
-		err = filemap_fdatawrite_range(file->f_mapping, pos, endbyte);
+	err = btrfs_fdatawrite_range(file->f_mapping, pos, endbyte);
 	if (err)
 		goto out;
 	err = filemap_fdatawait_range(file->f_mapping, pos, endbyte);
@@ -1864,10 +1861,7 @@ static int start_ordered_ops(struct inode *inode, loff_t start, loff_t end)
 	int ret;
 
 	atomic_inc(&BTRFS_I(inode)->sync_writers);
-	ret = filemap_fdatawrite_range(inode->i_mapping, start, end);
-	if (!ret && test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
-			     &BTRFS_I(inode)->runtime_flags))
-		ret = filemap_fdatawrite_range(inode->i_mapping, start, end);
+	ret = btrfs_fdatawrite_range(inode->i_mapping, start, end);
 	atomic_dec(&BTRFS_I(inode)->sync_writers);
 
 	return ret;
@@ -2820,3 +2814,29 @@ int btrfs_auto_defrag_init(void)
 
 	return 0;
 }
+
+int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end)
+{
+	int ret;
+
+	/*
+	 * So with compression we will find and lock a dirty page and clear the
+	 * first one as dirty, setup an async extent, and immediately return
+	 * with the entire range locked but with nobody actually marked with
+	 * writeback.  So we can't just filemap_write_and_wait_range() and
+	 * expect it to work since it will just kick off a thread to do the
+	 * actual work.  So we need to call filemap_fdatawrite_range _again_
+	 * since it will wait on the page lock, which won't be unlocked until
+	 * after the pages have been marked as writeback and so we're good to go
+	 * from there.  We have to do this otherwise we'll miss the ordered
+	 * extents and that results in badness.  Please Josef, do not think you
+	 * know better and pull this out at some point in the future, it is
+	 * right and you are wrong.
+	 */
+	ret = filemap_fdatawrite_range(inode->i_mapping, start, end);
+	if (!ret && test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
+			     &BTRFS_I(inode)->runtime_flags))
+		ret = filemap_fdatawrite_range(inode->i_mapping, start, end);
+
+	return ret;
+}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 752ff18..be955481 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7052,14 +7052,7 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
 			btrfs_put_ordered_extent(ordered);
 		} else {
 			/* Screw you mmap */
-			ret = filemap_fdatawrite_range(inode->i_mapping,
-						       lockstart,
-						       lockend);
-			if (!ret && test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
-					     &BTRFS_I(inode)->runtime_flags))
-				ret = filemap_fdatawrite_range(inode->i_mapping,
-							       lockstart,
-							       lockend);
+			ret = btrfs_fdatawrite_range(inode, lockstart, lockend);
 			if (ret)
 				break;
 			ret = filemap_fdatawait_range(inode->i_mapping,
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index ac734ec..1401b1a 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -725,30 +725,10 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
 	/* start IO across the range first to instantiate any delalloc
 	 * extents
 	 */
-	ret = filemap_fdatawrite_range(inode->i_mapping, start, orig_end);
+	ret = btrfs_fdatawrite_range(inode, start, orig_end);
 	if (ret)
 		return ret;
-	/*
-	 * So with compression we will find and lock a dirty page and clear the
-	 * first one as dirty, setup an async extent, and immediately return
-	 * with the entire range locked but with nobody actually marked with
-	 * writeback.  So we can't just filemap_write_and_wait_range() and
-	 * expect it to work since it will just kick off a thread to do the
-	 * actual work.  So we need to call filemap_fdatawrite_range _again_
-	 * since it will wait on the page lock, which won't be unlocked until
-	 * after the pages have been marked as writeback and so we're good to go
-	 * from there.  We have to do this otherwise we'll miss the ordered
-	 * extents and that results in badness.  Please Josef, do not think you
-	 * know better and pull this out at some point in the future, it is
-	 * right and you are wrong.
-	 */
-	if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
-		     &BTRFS_I(inode)->runtime_flags)) {
-		ret = filemap_fdatawrite_range(inode->i_mapping, start,
-					       orig_end);
-		if (ret)
-			return ret;
-	}
+
 	ret = filemap_fdatawait_range(inode->i_mapping, start, orig_end);
 	if (ret)
 		return ret;
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH 2/2 v2] Btrfs: add helper btrfs_fdatawrite_range
  2014-10-09 20:18 ` [PATCH 2/2] Btrfs: add helper btrfs_fdatawrite_range Filipe Manana
@ 2014-10-10  8:43   ` Filipe Manana
  0 siblings, 0 replies; 3+ messages in thread
From: Filipe Manana @ 2014-10-10  8:43 UTC (permalink / raw)
  To: linux-btrfs; +Cc: Filipe Manana

To avoid duplicating this double filemap_fdatawrite_range() call for
inodes with async extents (compressed writes) so often.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
---

V2: Pass right arguments to the new helper. Missed unstaged changes.

 fs/btrfs/ctree.h        |  1 +
 fs/btrfs/file.c         | 39 ++++++++++++++++++++++++++++++---------
 fs/btrfs/inode.c        |  9 +--------
 fs/btrfs/ordered-data.c | 24 ++----------------------
 4 files changed, 34 insertions(+), 39 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 089f6da..4e0ad8c 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3896,6 +3896,7 @@ int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
 		      struct page **pages, size_t num_pages,
 		      loff_t pos, size_t write_bytes,
 		      struct extent_state **cached);
+int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end);
 
 /* tree-defrag.c */
 int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 82c7229..bbd474b 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1676,6 +1676,7 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb,
 				    loff_t pos)
 {
 	struct file *file = iocb->ki_filp;
+	struct inode *inode = file_inode(file);
 	ssize_t written;
 	ssize_t written_buffered;
 	loff_t endbyte;
@@ -1697,13 +1698,10 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb,
 	 * able to read what was just written.
 	 */
 	endbyte = pos + written_buffered - 1;
-	err = filemap_fdatawrite_range(file->f_mapping, pos, endbyte);
-	if (!err && test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
-			     &BTRFS_I(file_inode(file))->runtime_flags))
-		err = filemap_fdatawrite_range(file->f_mapping, pos, endbyte);
+	err = btrfs_fdatawrite_range(inode, pos, endbyte);
 	if (err)
 		goto out;
-	err = filemap_fdatawait_range(file->f_mapping, pos, endbyte);
+	err = filemap_fdatawait_range(inode->i_mapping, pos, endbyte);
 	if (err)
 		goto out;
 	written += written_buffered;
@@ -1864,10 +1862,7 @@ static int start_ordered_ops(struct inode *inode, loff_t start, loff_t end)
 	int ret;
 
 	atomic_inc(&BTRFS_I(inode)->sync_writers);
-	ret = filemap_fdatawrite_range(inode->i_mapping, start, end);
-	if (!ret && test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
-			     &BTRFS_I(inode)->runtime_flags))
-		ret = filemap_fdatawrite_range(inode->i_mapping, start, end);
+	ret = btrfs_fdatawrite_range(inode, start, end);
 	atomic_dec(&BTRFS_I(inode)->sync_writers);
 
 	return ret;
@@ -2820,3 +2815,29 @@ int btrfs_auto_defrag_init(void)
 
 	return 0;
 }
+
+int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end)
+{
+	int ret;
+
+	/*
+	 * So with compression we will find and lock a dirty page and clear the
+	 * first one as dirty, setup an async extent, and immediately return
+	 * with the entire range locked but with nobody actually marked with
+	 * writeback.  So we can't just filemap_write_and_wait_range() and
+	 * expect it to work since it will just kick off a thread to do the
+	 * actual work.  So we need to call filemap_fdatawrite_range _again_
+	 * since it will wait on the page lock, which won't be unlocked until
+	 * after the pages have been marked as writeback and so we're good to go
+	 * from there.  We have to do this otherwise we'll miss the ordered
+	 * extents and that results in badness.  Please Josef, do not think you
+	 * know better and pull this out at some point in the future, it is
+	 * right and you are wrong.
+	 */
+	ret = filemap_fdatawrite_range(inode->i_mapping, start, end);
+	if (!ret && test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
+			     &BTRFS_I(inode)->runtime_flags))
+		ret = filemap_fdatawrite_range(inode->i_mapping, start, end);
+
+	return ret;
+}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 752ff18..be955481 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7052,14 +7052,7 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
 			btrfs_put_ordered_extent(ordered);
 		} else {
 			/* Screw you mmap */
-			ret = filemap_fdatawrite_range(inode->i_mapping,
-						       lockstart,
-						       lockend);
-			if (!ret && test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
-					     &BTRFS_I(inode)->runtime_flags))
-				ret = filemap_fdatawrite_range(inode->i_mapping,
-							       lockstart,
-							       lockend);
+			ret = btrfs_fdatawrite_range(inode, lockstart, lockend);
 			if (ret)
 				break;
 			ret = filemap_fdatawait_range(inode->i_mapping,
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index ac734ec..1401b1a 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -725,30 +725,10 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
 	/* start IO across the range first to instantiate any delalloc
 	 * extents
 	 */
-	ret = filemap_fdatawrite_range(inode->i_mapping, start, orig_end);
+	ret = btrfs_fdatawrite_range(inode, start, orig_end);
 	if (ret)
 		return ret;
-	/*
-	 * So with compression we will find and lock a dirty page and clear the
-	 * first one as dirty, setup an async extent, and immediately return
-	 * with the entire range locked but with nobody actually marked with
-	 * writeback.  So we can't just filemap_write_and_wait_range() and
-	 * expect it to work since it will just kick off a thread to do the
-	 * actual work.  So we need to call filemap_fdatawrite_range _again_
-	 * since it will wait on the page lock, which won't be unlocked until
-	 * after the pages have been marked as writeback and so we're good to go
-	 * from there.  We have to do this otherwise we'll miss the ordered
-	 * extents and that results in badness.  Please Josef, do not think you
-	 * know better and pull this out at some point in the future, it is
-	 * right and you are wrong.
-	 */
-	if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
-		     &BTRFS_I(inode)->runtime_flags)) {
-		ret = filemap_fdatawrite_range(inode->i_mapping, start,
-					       orig_end);
-		if (ret)
-			return ret;
-	}
+
 	ret = filemap_fdatawait_range(inode->i_mapping, start, orig_end);
 	if (ret)
 		return ret;
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2014-10-10  7:44 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-10-09 20:18 [PATCH 1/2] Btrfs: correctly flush compressed data before/after direct IO Filipe Manana
2014-10-09 20:18 ` [PATCH 2/2] Btrfs: add helper btrfs_fdatawrite_range Filipe Manana
2014-10-10  8:43   ` [PATCH 2/2 v2] " Filipe Manana

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).