* [PATCH] btrfs: change how we decide to commit transactions during flushing
@ 2017-08-18 20:09 josef
2017-08-19 9:03 ` Nikolay Borisov
0 siblings, 1 reply; 2+ messages in thread
From: josef @ 2017-08-18 20:09 UTC (permalink / raw)
To: linux-btrfs, kernel-team; +Cc: Josef Bacik
From: Josef Bacik <jbacik@fb.com>
Nikolay reported that generic/273 was failing currently with ENOSPC.
Turns out this is because we get to the point where the outstanding
reservations are greater than the pinned space on the fs. This is a
mistake, previously we used the current reservation amount in
may_commit_transaction, not the entire outstanding reservation amount.
Fix this to find the minimum byte size needed to make progress in
flushing, and pass that into may_commit_transaction. From there we can
make a smarter decision on whether to commit the transaction or not.
This fixes the failure in generic/273.
Reported-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Josef Bacik <jbacik@fb.com>
---
fs/btrfs/extent-tree.c | 40 +++++++++++++++++++++++++++++-----------
1 file changed, 29 insertions(+), 11 deletions(-)
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index fac42b8..4509e47 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4872,8 +4872,12 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info,
return -ENOSPC;
spin_lock(&delayed_rsv->lock);
+ if (delayed_rsv->size > bytes)
+ bytes = 0;
+ else
+ bytes -= delayed_rsv->size;
if (percpu_counter_compare(&space_info->total_bytes_pinned,
- bytes - delayed_rsv->size) < 0) {
+ bytes) < 0) {
spin_unlock(&delayed_rsv->lock);
return -ENOSPC;
}
@@ -4901,7 +4905,7 @@ struct reserve_ticket {
*/
static void flush_space(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info, u64 num_bytes,
- int state)
+ u64 min_bytes, int state)
{
struct btrfs_root *root = fs_info->extent_root;
struct btrfs_trans_handle *trans;
@@ -4944,7 +4948,7 @@ static void flush_space(struct btrfs_fs_info *fs_info,
break;
case COMMIT_TRANS:
ret = may_commit_transaction(fs_info, space_info,
- num_bytes, 0);
+ min_bytes, 0);
break;
default:
ret = -ENOSPC;
@@ -4959,17 +4963,23 @@ static void flush_space(struct btrfs_fs_info *fs_info,
static inline u64
btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info,
- bool system_chunk)
+ u64 *min_reclaim, bool system_chunk)
{
struct reserve_ticket *ticket;
u64 used;
u64 expected;
u64 to_reclaim = 0;
- list_for_each_entry(ticket, &space_info->tickets, list)
+ list_for_each_entry(ticket, &space_info->tickets, list) {
+ if (min_reclaim && *min_reclaim == 0)
+ *min_reclaim = ticket->bytes;
to_reclaim += ticket->bytes;
- list_for_each_entry(ticket, &space_info->priority_tickets, list)
+ }
+ list_for_each_entry(ticket, &space_info->priority_tickets, list) {
+ if (min_reclaim && *min_reclaim == 0)
+ *min_reclaim = ticket->bytes;
to_reclaim += ticket->bytes;
+ }
if (to_reclaim)
return to_reclaim;
@@ -4992,6 +5002,8 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
to_reclaim = 0;
to_reclaim = min(to_reclaim, space_info->bytes_may_use +
space_info->bytes_reserved);
+ if (min_reclaim)
+ *min_reclaim = to_reclaim;
return to_reclaim;
}
@@ -5005,7 +5017,7 @@ static inline int need_do_async_reclaim(struct btrfs_fs_info *fs_info,
if ((space_info->bytes_used + space_info->bytes_reserved) >= thresh)
return 0;
- if (!btrfs_calc_reclaim_metadata_size(fs_info, space_info,
+ if (!btrfs_calc_reclaim_metadata_size(fs_info, space_info, NULL,
system_chunk))
return 0;
@@ -5038,13 +5050,14 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
int flush_state;
int commit_cycles = 0;
u64 last_tickets_id;
+ u64 min_reclaim = 0;
fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work);
space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
spin_lock(&space_info->lock);
to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info,
- false);
+ &min_reclaim, false);
if (!to_reclaim) {
space_info->flush = 0;
spin_unlock(&space_info->lock);
@@ -5055,15 +5068,18 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
flush_state = FLUSH_DELAYED_ITEMS_NR;
do {
- flush_space(fs_info, space_info, to_reclaim, flush_state);
+ flush_space(fs_info, space_info, to_reclaim, min_reclaim,
+ flush_state);
spin_lock(&space_info->lock);
if (list_empty(&space_info->tickets)) {
space_info->flush = 0;
spin_unlock(&space_info->lock);
return;
}
+ min_reclaim = 0;
to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info,
space_info,
+ &min_reclaim,
false);
if (last_tickets_id == space_info->tickets_id) {
flush_state++;
@@ -5097,11 +5113,12 @@ static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
struct reserve_ticket *ticket)
{
u64 to_reclaim;
+ u64 min_reclaim = 0;
int flush_state = FLUSH_DELAYED_ITEMS_NR;
spin_lock(&space_info->lock);
to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info,
- false);
+ &min_reclaim, false);
if (!to_reclaim) {
spin_unlock(&space_info->lock);
return;
@@ -5109,7 +5126,8 @@ static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
spin_unlock(&space_info->lock);
do {
- flush_space(fs_info, space_info, to_reclaim, flush_state);
+ flush_space(fs_info, space_info, to_reclaim, min_reclaim,
+ flush_state);
flush_state++;
spin_lock(&space_info->lock);
if (ticket->bytes == 0) {
--
2.7.4
^ permalink raw reply related [flat|nested] 2+ messages in thread
* Re: [PATCH] btrfs: change how we decide to commit transactions during flushing
2017-08-18 20:09 [PATCH] btrfs: change how we decide to commit transactions during flushing josef
@ 2017-08-19 9:03 ` Nikolay Borisov
0 siblings, 0 replies; 2+ messages in thread
From: Nikolay Borisov @ 2017-08-19 9:03 UTC (permalink / raw)
To: josef, linux-btrfs, kernel-team; +Cc: Josef Bacik
On 18.08.2017 23:09, josef@toxicpanda.com wrote:
> From: Josef Bacik <jbacik@fb.com>
>
> Nikolay reported that generic/273 was failing currently with ENOSPC.
> Turns out this is because we get to the point where the outstanding
> reservations are greater than the pinned space on the fs. This is a
> mistake, previously we used the current reservation amount in
> may_commit_transaction, not the entire outstanding reservation amount.
> Fix this to find the minimum byte size needed to make progress in
> flushing, and pass that into may_commit_transaction. From there we can
> make a smarter decision on whether to commit the transaction or not.
> This fixes the failure in generic/273.
>
How about achieving the same end-goal with this diff. E.g. localising
the changes in may_commit transaction?
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 6ca75dc62a3c..49629266f824 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4837,6 +4837,13 @@ static void shrink_delalloc(struct btrfs_fs_info
*fs_info, u64 to_reclaim,
}
}
+struct reserve_ticket {
+ u64 bytes;
+ int error;
+ struct list_head list;
+ wait_queue_head_t wait;
+};
+
/**
* maybe_commit_transaction - possibly commit the transaction if its ok to
* @root - the root we're allocating for
@@ -4873,9 +4880,24 @@ static int may_commit_transaction(struct
btrfs_fs_info *fs_info,
if (space_info != delayed_rsv->space_info)
return -ENOSPC;
+ spin_lock(&space_info->lock);
+ if (!list_empty(&space_info->tickets))
+ bytes = list_first_entry(&space_info->tickets,
+ struct reserve_ticket,
+ list)->bytes;
+ else if (!list_empty(&space_info->priority_tickets))
+ bytes = list_first_entry(&space_info->priority_tickets,
+ struct reserve_ticket,
+ list)->bytes;
+ spin_unlock(&space_info->lock);
+
spin_lock(&delayed_rsv->lock);
+ if (delayed_rsv->size > bytes)
+ bytes = 0;
+ else
+ bytes -= delayed_rsv->size;
if (percpu_counter_compare(&space_info->total_bytes_pinned,
- bytes - delayed_rsv->size) < 0) {
+ bytes) < 0) {
spin_unlock(&delayed_rsv->lock);
return -ENOSPC;
}
@@ -4889,13 +4911,6 @@ static int may_commit_transaction(struct
btrfs_fs_info *fs_info,
return btrfs_commit_transaction(trans);
}
-struct reserve_ticket {
- u64 bytes;
- int error;
- struct list_head list;
- wait_queue_head_t wait;
-};
-
/*
* Try to flush some data based on policy set by @state. This is only
advisory
* and may fail for various reasons. The caller is supposed to examine the
^ permalink raw reply related [flat|nested] 2+ messages in thread
end of thread, other threads:[~2017-08-19 9:03 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-08-18 20:09 [PATCH] btrfs: change how we decide to commit transactions during flushing josef
2017-08-19 9:03 ` Nikolay Borisov
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox