Re: [PATCH] Btrfs: allow us to overcommit our enospc reservations TEST THIS PLEASE!!!

linux-btrfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Josef Bacik <josef@redhat.com>
To: Mitch Harder <mitch.harder@sabayonlinux.org>
Cc: Josef Bacik <josef@redhat.com>, linux-btrfs@vger.kernel.org
Subject: Re: [PATCH] Btrfs: allow us to overcommit our enospc reservations TEST THIS PLEASE!!!
Date: Wed, 12 Oct 2011 13:50:47 -0400	[thread overview]
Message-ID: <20111012175047.GA14280@localhost.localdomain> (raw)
In-Reply-To: <CAKcLGm-y5on_dWjaNdSARd5_K5nri0G9zuMZAsV19ecuYmXH9g@mail.gmail.com>

On Tue, Oct 11, 2011 at 03:45:45PM -0500, Mitch Harder wrote:
> On Tue, Oct 11, 2011 at 3:01 PM, Josef Bacik <josef@redhat.com> wrote=
:
> > On Tue, Oct 11, 2011 at 02:44:09PM -0500, Mitch Harder wrote:
> >> On Tue, Oct 11, 2011 at 2:00 PM, Josef Bacik <josef@redhat.com> wr=
ote:
> >> > On Tue, Oct 11, 2011 at 12:33:48PM -0500, Mitch Harder wrote:
> >> >> On Mon, Sep 26, 2011 at 4:22 PM, Josef Bacik <josef@redhat.com>=
 wrote:
> >> >> >
> >> >> > go from taking around 45 minutes to 10 seconds on my freshly =
formatted 3 TiB
> >> >> > file system. =A0This doesn't seem to break my other enospc te=
sts, but could really
> >> >> > use some more testing as this is a super scary change. =A0Tha=
nks,
> >> >> >
> >> >>
> >> >> I've been testing Josef's git.kernel.org testing tree, and I've
> >> >> bisected an error down to this commit.
> >> >>
> >> >> I'm triggering the error using a removedirs benchmark in filebe=
nch
> >> >> with the following profile:
> >> >> load removedirs
> >> >> set $dir=3D/mnt/benchmark/filebench
> >> >> set $ndirs=3D400000
> >> >> run
> >> >>
> >> >
> >> > Ok try this one, it will write out more and harder, see if that =
helps. =A0Thanks,
> >> >
> >>
> >> Still running into BUG at fs/btrfs/inode.c:2176!
> >
> > How about this one?
> >
>=20
> Sorry, still getting the same bug.
>=20
> [  175.956273] kernel BUG at fs/btrfs/inode.c:2176!

Ok I think I see what's happening, this patch replaces the previous one=
, let me
know how it goes.  Thanks,

Josef

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index fc0de68..e595372 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3334,7 +3334,7 @@ out:
  * shrink metadata reservation for delalloc
  */
 static int shrink_delalloc(struct btrfs_trans_handle *trans,
-			   struct btrfs_root *root, u64 to_reclaim, int sync)
+			   struct btrfs_root *root, u64 to_reclaim, int retries)
 {
 	struct btrfs_block_rsv *block_rsv;
 	struct btrfs_space_info *space_info;
@@ -3365,12 +3365,10 @@ static int shrink_delalloc(struct btrfs_trans_h=
andle *trans,
 	}
=20
 	max_reclaim =3D min(reserved, to_reclaim);
+	if (max_reclaim > (2 * 1024 * 1024))
+		nr_pages =3D max_reclaim >> PAGE_CACHE_SHIFT;
=20
 	while (loops < 1024) {
-		/* have the flusher threads jump in and do some IO */
-		smp_mb();
-		nr_pages =3D min_t(unsigned long, nr_pages,
-		       root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT);
 		writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
=20
 		spin_lock(&space_info->lock);
@@ -3384,14 +3382,22 @@ static int shrink_delalloc(struct btrfs_trans_h=
andle *trans,
 		if (reserved =3D=3D 0 || reclaimed >=3D max_reclaim)
 			break;
=20
-		if (trans && trans->transaction->blocked)
+		if (trans)
 			return -EAGAIN;
=20
-		time_left =3D schedule_timeout_interruptible(1);
+		if (!retries) {
+			time_left =3D schedule_timeout_interruptible(1);
=20
-		/* We were interrupted, exit */
-		if (time_left)
-			break;
+			/* We were interrupted, exit */
+			if (time_left)
+				break;
+		} else {
+			/*
+			 * We've already done this song and dance once, let's
+			 * really wait for some work to get done.
+			 */
+			btrfs_wait_ordered_extents(root, 0, 0);
+		}
=20
 		/* we've kicked the IO a few times, if anything has been freed,
 		 * exit.  There is no sense in looping here for a long time
@@ -3399,15 +3405,13 @@ static int shrink_delalloc(struct btrfs_trans_h=
andle *trans,
 		 * just too many writers without enough free space
 		 */
=20
-		if (loops > 3) {
+		if (!retries && loops > 3) {
 			smp_mb();
 			if (progress !=3D space_info->reservation_progress)
 				break;
 		}
=20
 	}
-	if (reclaimed < to_reclaim && !trans)
-		btrfs_wait_ordered_extents(root, 0, 0);
 	return reclaimed >=3D to_reclaim;
 }
=20
@@ -3552,7 +3556,7 @@ again:
 	 * We do synchronous shrinking since we don't actually unreserve
 	 * metadata until after the IO is completed.
 	 */
-	ret =3D shrink_delalloc(trans, root, num_bytes, 1);
+	ret =3D shrink_delalloc(trans, root, num_bytes, retries);
 	if (ret < 0)
 		goto out;
=20
@@ -3568,17 +3572,6 @@ again:
 		goto again;
 	}
=20
-	/*
-	 * Not enough space to be reclaimed, don't bother committing the
-	 * transaction.
-	 */
-	spin_lock(&space_info->lock);
-	if (space_info->bytes_pinned < orig_bytes)
-		ret =3D -ENOSPC;
-	spin_unlock(&space_info->lock);
-	if (ret)
-		goto out;
-
 	ret =3D -EAGAIN;
 	if (trans)
 		goto out;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 1153731..1785307 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2758,7 +2758,16 @@ static struct btrfs_trans_handle *__unlink_start=
_trans(struct inode *dir,
 	u64 ino =3D btrfs_ino(inode);
 	u64 dir_ino =3D btrfs_ino(dir);
=20
-	trans =3D btrfs_start_transaction(root, 10);
+	/*
+	 * 1 for the possible orphan item
+	 * 1 for the dir item
+	 * 1 for the dir index
+	 * 1 for the inode ref
+	 * 1 for the inode ref in the tree log
+	 * 2 for the dir entries in the log
+	 * 1 for the inode
+	 */
+	trans =3D btrfs_start_transaction(root, 8);
 	if (!IS_ERR(trans) || PTR_ERR(trans) !=3D -ENOSPC)
 		return trans;
=20
@@ -2781,7 +2790,8 @@ static struct btrfs_trans_handle *__unlink_start_=
trans(struct inode *dir,
 		return ERR_PTR(-ENOMEM);
 	}
=20
-	trans =3D btrfs_start_transaction(root, 0);
+	/* 1 for the orphan item */
+	trans =3D btrfs_start_transaction(root, 1);
 	if (IS_ERR(trans)) {
 		btrfs_free_path(path);
 		root->fs_info->enospc_unlink =3D 0;
@@ -2892,6 +2902,11 @@ out:
 		return ERR_PTR(err);
 	}
=20
+	ret =3D btrfs_block_rsv_migrate(trans->block_rsv,
+				      &root->fs_info->global_block_rsv,
+				      btrfs_calc_trans_metadata_size(root, 1));
+	BUG_ON(ret);
+
 	trans->block_rsv =3D &root->fs_info->global_block_rsv;
 	return trans;
 }
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" =
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

next prev parent reply	other threads:[~2011-10-12 17:50 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-09-26 21:22 [PATCH] Btrfs: allow us to overcommit our enospc reservations TEST THIS PLEASE!!! Josef Bacik
2011-10-11 17:33 ` Mitch Harder
2011-10-11 17:43   ` Josef Bacik
2011-10-11 18:27   ` Josef Bacik
2011-10-11 19:00   ` Josef Bacik
2011-10-11 19:44     ` Mitch Harder
2011-10-11 20:01       ` Josef Bacik
2011-10-11 20:45         ` Mitch Harder
2011-10-12 17:50           ` Josef Bacik [this message]
2011-10-12 20:45             ` Mitch Harder
2011-10-13 12:57               ` Josef Bacik
2011-10-13 15:03                 ` Christian Brunner

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:fc0de68 dfblob:e595372 dfblob:1153731 dfblob:1785307 )
 OR (
bs:"Re: [PATCH] Btrfs: allow us to overcommit our enospc reservations TEST THIS PLEASE!!!" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20111012175047.GA14280@localhost.localdomain \
    --to=josef@redhat.com \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=mitch.harder@sabayonlinux.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).