From: Josef Bacik <josef@redhat.com>
To: Mitch Harder <mitch.harder@sabayonlinux.org>
Cc: Josef Bacik <josef@redhat.com>, linux-btrfs@vger.kernel.org
Subject: Re: [PATCH] Btrfs: allow us to overcommit our enospc reservations TEST THIS PLEASE!!!
Date: Thu, 13 Oct 2011 08:57:47 -0400 [thread overview]
Message-ID: <20111013125746.GA2310@localhost.localdomain> (raw)
In-Reply-To: <CAKcLGm_MMzv0rK169pZJyHu4r2=6xXBE0OjxcoLZVzLMkf3evg@mail.gmail.com>
On Wed, Oct 12, 2011 at 03:45:04PM -0500, Mitch Harder wrote:
> On Wed, Oct 12, 2011 at 12:50 PM, Josef Bacik <josef@redhat.com> wrot=
e:
> > On Tue, Oct 11, 2011 at 03:45:45PM -0500, Mitch Harder wrote:
> >> On Tue, Oct 11, 2011 at 3:01 PM, Josef Bacik <josef@redhat.com> wr=
ote:
> >> > On Tue, Oct 11, 2011 at 02:44:09PM -0500, Mitch Harder wrote:
> >> >> On Tue, Oct 11, 2011 at 2:00 PM, Josef Bacik <josef@redhat.com>=
wrote:
> >> >> > On Tue, Oct 11, 2011 at 12:33:48PM -0500, Mitch Harder wrote:
> >> >> >> On Mon, Sep 26, 2011 at 4:22 PM, Josef Bacik <josef@redhat.c=
om> wrote:
> >> >> >> >
> >> >> >> > go from taking around 45 minutes to 10 seconds on my fresh=
ly formatted 3 TiB
> >> >> >> > file system. =A0This doesn't seem to break my other enospc=
tests, but could really
> >> >> >> > use some more testing as this is a super scary change. =A0=
Thanks,
> >> >> >> >
> >> >> >>
> >> >> >> I've been testing Josef's git.kernel.org testing tree, and I=
've
> >> >> >> bisected an error down to this commit.
> >> >> >>
> >> >> >> I'm triggering the error using a removedirs benchmark in fil=
ebench
> >> >> >> with the following profile:
> >> >> >> load removedirs
> >> >> >> set $dir=3D/mnt/benchmark/filebench
> >> >> >> set $ndirs=3D400000
> >> >> >> run
> >> >> >>
> >> >> >
> >> >> > Ok try this one, it will write out more and harder, see if th=
at helps. =A0Thanks,
> >> >> >
> >> >>
> >> >> Still running into BUG at fs/btrfs/inode.c:2176!
> >> >
> >> > How about this one?
> >> >
> >>
> >> Sorry, still getting the same bug.
> >>
> >> [ =A0175.956273] kernel BUG at fs/btrfs/inode.c:2176!
> >
> > Ok I think I see what's happening, this patch replaces the previous=
one, let me
> > know how it goes. =A0Thanks,
> >
>=20
> Getting a slightly different BUG this time:
>=20
Ok looks like I've fixed the original problem and now we're hitting a p=
roblem
with the free space cache. This patch will replace the last one, its a=
ll the
fixes up to now and a new set of BUG_ON()'s to figure out which free sp=
ace cache
inode is screwing us up. Thanks,
Josef
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index fc0de68..e595372 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3334,7 +3334,7 @@ out:
* shrink metadata reservation for delalloc
*/
static int shrink_delalloc(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 to_reclaim, int sync)
+ struct btrfs_root *root, u64 to_reclaim, int retries)
{
struct btrfs_block_rsv *block_rsv;
struct btrfs_space_info *space_info;
@@ -3365,12 +3365,10 @@ static int shrink_delalloc(struct btrfs_trans_h=
andle *trans,
}
=20
max_reclaim =3D min(reserved, to_reclaim);
+ if (max_reclaim > (2 * 1024 * 1024))
+ nr_pages =3D max_reclaim >> PAGE_CACHE_SHIFT;
=20
while (loops < 1024) {
- /* have the flusher threads jump in and do some IO */
- smp_mb();
- nr_pages =3D min_t(unsigned long, nr_pages,
- root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT);
writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
=20
spin_lock(&space_info->lock);
@@ -3384,14 +3382,22 @@ static int shrink_delalloc(struct btrfs_trans_h=
andle *trans,
if (reserved =3D=3D 0 || reclaimed >=3D max_reclaim)
break;
=20
- if (trans && trans->transaction->blocked)
+ if (trans)
return -EAGAIN;
=20
- time_left =3D schedule_timeout_interruptible(1);
+ if (!retries) {
+ time_left =3D schedule_timeout_interruptible(1);
=20
- /* We were interrupted, exit */
- if (time_left)
- break;
+ /* We were interrupted, exit */
+ if (time_left)
+ break;
+ } else {
+ /*
+ * We've already done this song and dance once, let's
+ * really wait for some work to get done.
+ */
+ btrfs_wait_ordered_extents(root, 0, 0);
+ }
=20
/* we've kicked the IO a few times, if anything has been freed,
* exit. There is no sense in looping here for a long time
@@ -3399,15 +3405,13 @@ static int shrink_delalloc(struct btrfs_trans_h=
andle *trans,
* just too many writers without enough free space
*/
=20
- if (loops > 3) {
+ if (!retries && loops > 3) {
smp_mb();
if (progress !=3D space_info->reservation_progress)
break;
}
=20
}
- if (reclaimed < to_reclaim && !trans)
- btrfs_wait_ordered_extents(root, 0, 0);
return reclaimed >=3D to_reclaim;
}
=20
@@ -3552,7 +3556,7 @@ again:
* We do synchronous shrinking since we don't actually unreserve
* metadata until after the IO is completed.
*/
- ret =3D shrink_delalloc(trans, root, num_bytes, 1);
+ ret =3D shrink_delalloc(trans, root, num_bytes, retries);
if (ret < 0)
goto out;
=20
@@ -3568,17 +3572,6 @@ again:
goto again;
}
=20
- /*
- * Not enough space to be reclaimed, don't bother committing the
- * transaction.
- */
- spin_lock(&space_info->lock);
- if (space_info->bytes_pinned < orig_bytes)
- ret =3D -ENOSPC;
- spin_unlock(&space_info->lock);
- if (ret)
- goto out;
-
ret =3D -EAGAIN;
if (trans)
goto out;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index d6ba353..cb63904 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -782,7 +782,8 @@ static noinline int cow_file_range(struct inode *in=
ode,
struct extent_map_tree *em_tree =3D &BTRFS_I(inode)->extent_tree;
int ret =3D 0;
=20
- BUG_ON(btrfs_is_free_space_inode(root, inode));
+ BUG_ON(root =3D=3D root->fs_info->tree_root);
+ BUG_ON(BTRFS_I(inode)->location.objectid =3D=3D BTRFS_FREE_INO_OBJECT=
ID);
trans =3D btrfs_join_transaction(root);
BUG_ON(IS_ERR(trans));
trans->block_rsv =3D &root->fs_info->delalloc_block_rsv;
@@ -2790,7 +2791,8 @@ static struct btrfs_trans_handle *__unlink_start_=
trans(struct inode *dir,
return ERR_PTR(-ENOMEM);
}
=20
- trans =3D btrfs_start_transaction(root, 0);
+ /* 1 for the orphan item */
+ trans =3D btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
btrfs_free_path(path);
root->fs_info->enospc_unlink =3D 0;
@@ -2901,6 +2903,11 @@ out:
return ERR_PTR(err);
}
=20
+ ret =3D btrfs_block_rsv_migrate(trans->block_rsv,
+ &root->fs_info->global_block_rsv,
+ btrfs_calc_trans_metadata_size(root, 1));
+ BUG_ON(ret);
+
trans->block_rsv =3D &root->fs_info->global_block_rsv;
return trans;
}
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" =
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
next prev parent reply other threads:[~2011-10-13 12:57 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-09-26 21:22 [PATCH] Btrfs: allow us to overcommit our enospc reservations TEST THIS PLEASE!!! Josef Bacik
2011-10-11 17:33 ` Mitch Harder
2011-10-11 17:43 ` Josef Bacik
2011-10-11 18:27 ` Josef Bacik
2011-10-11 19:00 ` Josef Bacik
2011-10-11 19:44 ` Mitch Harder
2011-10-11 20:01 ` Josef Bacik
2011-10-11 20:45 ` Mitch Harder
2011-10-12 17:50 ` Josef Bacik
2011-10-12 20:45 ` Mitch Harder
2011-10-13 12:57 ` Josef Bacik [this message]
2011-10-13 15:03 ` Christian Brunner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20111013125746.GA2310@localhost.localdomain \
--to=josef@redhat.com \
--cc=linux-btrfs@vger.kernel.org \
--cc=mitch.harder@sabayonlinux.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.