linux-btrfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH RESEND 1/3] Btrfs: flush all dirty inodes if writeback can not start
@ 2012-12-20 11:19 Miao Xie
  2012-12-24  9:57 ` [PATCH V2] " Miao Xie
  0 siblings, 1 reply; 4+ messages in thread
From: Miao Xie @ 2012-12-20 11:19 UTC (permalink / raw)
  To: Linux Btrfs

From: Miao Xie <miaox@cn.fujitsu.com>

We may try to flush some dirty pages when there is no enough space to reserve.
But it is possible that this operation fails, in order to get enough space to
reserve successfully, we will sync all the delalloc file. This operation is
safe, we needn't worry about the case that the filesystem goes from r/w to r/o.
because the filesystem should guarantee all the dirty pages have been written
into the disk after it becomes readonly, so the sync operation will do nothing
if the filesystem is already readonly. Though it may waste lots of time,
as a corner case, we needn't care.

Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
---
 fs/btrfs/extent-tree.c | 40 +++++++++++++++++++++++++++++++---------
 1 file changed, 31 insertions(+), 9 deletions(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index d133edf..44a0b58 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3689,12 +3689,15 @@ static int can_overcommit(struct btrfs_root *root,
 	return 0;
 }
 
-static int writeback_inodes_sb_nr_if_idle_safe(struct super_block *sb,
-					       unsigned long nr_pages,
-					       enum wb_reason reason)
+static inline int writeback_inodes_sb_nr_if_idle_safe(struct super_block *sb,
+						      unsigned long nr_pages,
+						      enum wb_reason reason)
 {
-	if (!writeback_in_progress(sb->s_bdi) &&
-	    down_read_trylock(&sb->s_umount)) {
+	/* the flusher is dealing with the dirty inodes now. */
+	if (writeback_in_progress(sb->s_bdi))
+		return 1;
+
+	if (down_read_trylock(&sb->s_umount)) {
 		writeback_inodes_sb_nr(sb, nr_pages, reason);
 		up_read(&sb->s_umount);
 		return 1;
@@ -3703,6 +3706,28 @@ static int writeback_inodes_sb_nr_if_idle_safe(struct super_block *sb,
 	return 0;
 }
 
+void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root,
+				  unsigned long nr_pages)
+{
+	struct super_block *sb = root->fs_info->sb;
+	int started;
+
+	/* If we can not start writeback, just sync all the delalloc file. */
+	started = writeback_inodes_sb_nr_if_idle_safe(sb, nr_pages,
+						      WB_REASON_FS_FREE_SPACE);
+	if (!started) {
+		/*
+		 * We needn't worry the filesystem going from r/w to r/o though
+		 * we don't acquire ->s_umount mutex, because the filesystem
+		 * should guarantee the delalloc inodes list be empty after
+		 * the filesystem is readonly(all dirty pages are written to
+		 * the disk).
+		 */
+		btrfs_start_delalloc_inodes(root, 0);
+		btrfs_wait_ordered_extents(root, 0);
+	}
+}
+
 /*
  * shrink metadata reservation for delalloc
  */
@@ -3735,10 +3760,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
 	while (delalloc_bytes && loops < 3) {
 		max_reclaim = min(delalloc_bytes, to_reclaim);
 		nr_pages = max_reclaim >> PAGE_CACHE_SHIFT;
-		writeback_inodes_sb_nr_if_idle_safe(root->fs_info->sb,
-						    nr_pages,
-						    WB_REASON_FS_FREE_SPACE);
-
+		btrfs_writeback_inodes_sb_nr(root, nr_pages);
 		/*
 		 * We need to wait for the async pages to actually start before
 		 * we do anything.
-- 
1.7.11.7

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH V2] Btrfs: flush all dirty inodes if writeback can not start
  2012-12-20 11:19 [PATCH RESEND 1/3] Btrfs: flush all dirty inodes if writeback can not start Miao Xie
@ 2012-12-24  9:57 ` Miao Xie
  2013-01-08 13:02   ` Josef Bacik
  0 siblings, 1 reply; 4+ messages in thread
From: Miao Xie @ 2012-12-24  9:57 UTC (permalink / raw)
  To: Linux Btrfs; +Cc: Josef Bacik

We may try to flush some dirty pages when there is no enough space to reserve.
But it is possible that this operation fails, in order to get enough space to
reserve successfully, we will sync all the delalloc file. This operation is
safe, we needn't worry about the case that the filesystem goes from r/w to r/o.
because the filesystem should guarantee all the dirty pages have been written
into the disk after it becomes readonly, so the sync operation will do nothing
if the filesystem is already readonly. Though it may waste lots of time,
as a corner case, we needn't care.

Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
---
Changelog v1 -> v2:
- make the function static
---
 fs/btrfs/extent-tree.c | 40 +++++++++++++++++++++++++++++++---------
 1 file changed, 31 insertions(+), 9 deletions(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index b6ed965..2d9fe27 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3695,12 +3695,15 @@ static int can_overcommit(struct btrfs_root *root,
 	return 0;
 }
 
-static int writeback_inodes_sb_nr_if_idle_safe(struct super_block *sb,
-					       unsigned long nr_pages,
-					       enum wb_reason reason)
+static inline int writeback_inodes_sb_nr_if_idle_safe(struct super_block *sb,
+						      unsigned long nr_pages,
+						      enum wb_reason reason)
 {
-	if (!writeback_in_progress(sb->s_bdi) &&
-	    down_read_trylock(&sb->s_umount)) {
+	/* the flusher is dealing with the dirty inodes now. */
+	if (writeback_in_progress(sb->s_bdi))
+		return 1;
+
+	if (down_read_trylock(&sb->s_umount)) {
 		writeback_inodes_sb_nr(sb, nr_pages, reason);
 		up_read(&sb->s_umount);
 		return 1;
@@ -3709,6 +3712,28 @@ static int writeback_inodes_sb_nr_if_idle_safe(struct super_block *sb,
 	return 0;
 }
 
+static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root,
+					 unsigned long nr_pages)
+{
+	struct super_block *sb = root->fs_info->sb;
+	int started;
+
+	/* If we can not start writeback, just sync all the delalloc file. */
+	started = writeback_inodes_sb_nr_if_idle_safe(sb, nr_pages,
+						      WB_REASON_FS_FREE_SPACE);
+	if (!started) {
+		/*
+		 * We needn't worry the filesystem going from r/w to r/o though
+		 * we don't acquire ->s_umount mutex, because the filesystem
+		 * should guarantee the delalloc inodes list be empty after
+		 * the filesystem is readonly(all dirty pages are written to
+		 * the disk).
+		 */
+		btrfs_start_delalloc_inodes(root, 0);
+		btrfs_wait_ordered_extents(root, 0);
+	}
+}
+
 /*
  * shrink metadata reservation for delalloc
  */
@@ -3741,10 +3766,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
 	while (delalloc_bytes && loops < 3) {
 		max_reclaim = min(delalloc_bytes, to_reclaim);
 		nr_pages = max_reclaim >> PAGE_CACHE_SHIFT;
-		writeback_inodes_sb_nr_if_idle_safe(root->fs_info->sb,
-						    nr_pages,
-						    WB_REASON_FS_FREE_SPACE);
-
+		btrfs_writeback_inodes_sb_nr(root, nr_pages);
 		/*
 		 * We need to wait for the async pages to actually start before
 		 * we do anything.
-- 
1.7.11.7


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH V2] Btrfs: flush all dirty inodes if writeback can not start
  2012-12-24  9:57 ` [PATCH V2] " Miao Xie
@ 2013-01-08 13:02   ` Josef Bacik
  2013-01-10  3:30     ` [PATCH V3] " Miao Xie
  0 siblings, 1 reply; 4+ messages in thread
From: Josef Bacik @ 2013-01-08 13:02 UTC (permalink / raw)
  To: Miao Xie; +Cc: Linux Btrfs, Josef Bacik

On Mon, Dec 24, 2012 at 02:57:19AM -0700, Miao Xie wrote:
> We may try to flush some dirty pages when there is no enough space to reserve.
> But it is possible that this operation fails, in order to get enough space to
> reserve successfully, we will sync all the delalloc file. This operation is
> safe, we needn't worry about the case that the filesystem goes from r/w to r/o.
> because the filesystem should guarantee all the dirty pages have been written
> into the disk after it becomes readonly, so the sync operation will do nothing
> if the filesystem is already readonly. Though it may waste lots of time,
> as a corner case, we needn't care.
> 
> Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
> ---
> Changelog v1 -> v2:
> - make the function static
> ---
>  fs/btrfs/extent-tree.c | 40 +++++++++++++++++++++++++++++++---------
>  1 file changed, 31 insertions(+), 9 deletions(-)
> 
> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
> index b6ed965..2d9fe27 100644
> --- a/fs/btrfs/extent-tree.c
> +++ b/fs/btrfs/extent-tree.c
> @@ -3695,12 +3695,15 @@ static int can_overcommit(struct btrfs_root *root,
>  	return 0;
>  }
>  
> -static int writeback_inodes_sb_nr_if_idle_safe(struct super_block *sb,
> -					       unsigned long nr_pages,
> -					       enum wb_reason reason)
> +static inline int writeback_inodes_sb_nr_if_idle_safe(struct super_block *sb,
> +						      unsigned long nr_pages,
> +						      enum wb_reason reason)
>  {
> -	if (!writeback_in_progress(sb->s_bdi) &&
> -	    down_read_trylock(&sb->s_umount)) {
> +	/* the flusher is dealing with the dirty inodes now. */
> +	if (writeback_in_progress(sb->s_bdi))
> +		return 1;
> +
> +	if (down_read_trylock(&sb->s_umount)) {
>  		writeback_inodes_sb_nr(sb, nr_pages, reason);
>  		up_read(&sb->s_umount);
>  		return 1;
> @@ -3709,6 +3712,28 @@ static int writeback_inodes_sb_nr_if_idle_safe(struct super_block *sb,
>  	return 0;
>  }
>  
> +static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root,
> +					 unsigned long nr_pages)
> +{
> +	struct super_block *sb = root->fs_info->sb;
> +	int started;
> +
> +	/* If we can not start writeback, just sync all the delalloc file. */
> +	started = writeback_inodes_sb_nr_if_idle_safe(sb, nr_pages,
> +						      WB_REASON_FS_FREE_SPACE);
> +	if (!started) {
> +		/*
> +		 * We needn't worry the filesystem going from r/w to r/o though
> +		 * we don't acquire ->s_umount mutex, because the filesystem
> +		 * should guarantee the delalloc inodes list be empty after
> +		 * the filesystem is readonly(all dirty pages are written to
> +		 * the disk).
> +		 */
> +		btrfs_start_delalloc_inodes(root, 0);
> +		btrfs_wait_ordered_extents(root, 0);

We can't just call wait_ordered_extents, we may have an open trans handle which
could make us deadlock if a transaction commit starts.  Thanks,

Josef

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH V3] Btrfs: flush all dirty inodes if writeback can not start
  2013-01-08 13:02   ` Josef Bacik
@ 2013-01-10  3:30     ` Miao Xie
  0 siblings, 0 replies; 4+ messages in thread
From: Miao Xie @ 2013-01-10  3:30 UTC (permalink / raw)
  To: Josef Bacik; +Cc: Linux Btrfs

We may try to flush some dirty pages when there is no enough space to reserve.
But it is possible that this operation fails, in order to get enough space to
reserve successfully, we will sync all the delalloc file. This operation is
safe, we needn't worry about the case that the filesystem goes from r/w to r/o.
because the filesystem should guarantee all the dirty pages have been written
into the disk after it becomes readonly, so the sync operation will do nothing
if the filesystem is already readonly. Though it may waste lots of time,
as a corner case, we needn't care.

Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
---
Changelog v2 -> v3:
- remove unnecessary btrfs_wait_ordered_extents()

Changelog v1 -> v2:
- make the function static
---
 fs/btrfs/extent-tree.c | 45 ++++++++++++++++++++++++++++++++-------------
 1 file changed, 32 insertions(+), 13 deletions(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index b6ed965..93a2bfc 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3695,12 +3695,15 @@ static int can_overcommit(struct btrfs_root *root,
 	return 0;
 }
 
-static int writeback_inodes_sb_nr_if_idle_safe(struct super_block *sb,
-					       unsigned long nr_pages,
-					       enum wb_reason reason)
+static inline int writeback_inodes_sb_nr_if_idle_safe(struct super_block *sb,
+						      unsigned long nr_pages,
+						      enum wb_reason reason)
 {
-	if (!writeback_in_progress(sb->s_bdi) &&
-	    down_read_trylock(&sb->s_umount)) {
+	/* the flusher is dealing with the dirty inodes now. */
+	if (writeback_in_progress(sb->s_bdi))
+		return 1;
+
+	if (down_read_trylock(&sb->s_umount)) {
 		writeback_inodes_sb_nr(sb, nr_pages, reason);
 		up_read(&sb->s_umount);
 		return 1;
@@ -3709,6 +3712,27 @@ static int writeback_inodes_sb_nr_if_idle_safe(struct super_block *sb,
 	return 0;
 }
 
+static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root,
+					 unsigned long nr_pages)
+{
+	struct super_block *sb = root->fs_info->sb;
+	int started;
+
+	/* If we can not start writeback, just sync all the delalloc file. */
+	started = writeback_inodes_sb_nr_if_idle_safe(sb, nr_pages,
+						      WB_REASON_FS_FREE_SPACE);
+	if (!started) {
+		/*
+		 * We needn't worry the filesystem going from r/w to r/o though
+		 * we don't acquire ->s_umount mutex, because the filesystem
+		 * should guarantee the delalloc inodes list be empty after
+		 * the filesystem is readonly(all dirty pages are written to
+		 * the disk).
+		 */
+		btrfs_start_delalloc_inodes(root, 0);
+	}
+}
+
 /*
  * shrink metadata reservation for delalloc
  */
@@ -3738,13 +3762,12 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
 		return;
 	}
 
+	flush = trans ? BTRFS_RESERVE_NO_FLUSH : BTRFS_RESERVE_FLUSH_ALL;
+
 	while (delalloc_bytes && loops < 3) {
 		max_reclaim = min(delalloc_bytes, to_reclaim);
 		nr_pages = max_reclaim >> PAGE_CACHE_SHIFT;
-		writeback_inodes_sb_nr_if_idle_safe(root->fs_info->sb,
-						    nr_pages,
-						    WB_REASON_FS_FREE_SPACE);
-
+		btrfs_writeback_inodes_sb_nr(root, nr_pages);
 		/*
 		 * We need to wait for the async pages to actually start before
 		 * we do anything.
@@ -3752,10 +3775,6 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
 		wait_event(root->fs_info->async_submit_wait,
 			   !atomic_read(&root->fs_info->async_delalloc_pages));
 
-		if (!trans)
-			flush = BTRFS_RESERVE_FLUSH_ALL;
-		else
-			flush = BTRFS_RESERVE_NO_FLUSH;
 		spin_lock(&space_info->lock);
 		if (can_overcommit(root, space_info, orig, flush)) {
 			spin_unlock(&space_info->lock);
-- 
1.7.11.7


^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2013-01-10  3:29 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-12-20 11:19 [PATCH RESEND 1/3] Btrfs: flush all dirty inodes if writeback can not start Miao Xie
2012-12-24  9:57 ` [PATCH V2] " Miao Xie
2013-01-08 13:02   ` Josef Bacik
2013-01-10  3:30     ` [PATCH V3] " Miao Xie

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).