From: Liu Bo <bo.li.liu@oracle.com>
To: Josef Bacik <jbacik@fb.com>
Cc: linux-btrfs@vger.kernel.org
Subject: Re: [PATCH 07/14] Btrfs: introduce ticketed enospc infrastructure
Date: Mon, 9 May 2016 14:29:14 -0700 [thread overview]
Message-ID: <20160509212914.GB4954@localhost.localdomain> (raw)
In-Reply-To: <1458926760-17563-8-git-send-email-jbacik@fb.com>
On Fri, Mar 25, 2016 at 01:25:53PM -0400, Josef Bacik wrote:
> Our enospc flushing sucks. It is born from a time where we were early
> enospc'ing constantly because multiple threads would race in for the same
> reservation and randomly starve other ones out. So I came up with this solution
> to block any other reservations from happening while one guy tried to flush
> stuff to satisfy his reservation. This gives us pretty good correctness, but
> completely crap latency.
>
> The solution I've come up with is ticketed reservations. Basically we try to
> make our reservation, and if we can't we put a ticket on a list in order and
> kick off an async flusher thread. This async flusher thread does the same old
> flushing we always did, just asynchronously. As space is freed and added back
> to the space_info it checks and sees if we have any tickets that need
> satisfying, and adds space to the tickets and wakes up anything we've satisfied.
>
> Once the flusher thread stops making progress it wakes up all the current
> tickets and tells them to take a hike.
>
> There is a priority list for things that can't flush, since the async flusher
> could do anything we need to avoid deadlocks. These guys get priority for
> having their reservation made, and will still do manual flushing themselves in
> case the async flusher isn't running.
>
> This patch gives us significantly better latencies. Thanks,
>
> Signed-off-by: Josef Bacik <jbacik@fb.com>
> ---
> fs/btrfs/ctree.h | 2 +
> fs/btrfs/extent-tree.c | 524 +++++++++++++++++++++++++++++++++++--------------
> 2 files changed, 375 insertions(+), 151 deletions(-)
>
> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> index b675066..7437c8a 100644
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -1229,6 +1229,8 @@ struct btrfs_space_info {
> struct list_head list;
> /* Protected by the spinlock 'lock'. */
> struct list_head ro_bgs;
> + struct list_head priority_tickets;
> + struct list_head tickets;
>
> struct rw_semaphore groups_sem;
> /* for block groups in our same type */
> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
> index 0db4319..1673365 100644
> --- a/fs/btrfs/extent-tree.c
> +++ b/fs/btrfs/extent-tree.c
> @@ -111,6 +111,16 @@ static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
> u64 num_bytes);
> int btrfs_pin_extent(struct btrfs_root *root,
> u64 bytenr, u64 num_bytes, int reserved);
> +static int __reserve_metadata_bytes(struct btrfs_root *root,
> + struct btrfs_space_info *space_info,
> + u64 orig_bytes,
> + enum btrfs_reserve_flush_enum flush);
> +static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info,
> + struct btrfs_space_info *space_info,
> + u64 num_bytes);
> +static void space_info_add_old_bytes(struct btrfs_fs_info *fs_info,
> + struct btrfs_space_info *space_info,
> + u64 num_bytes);
>
> static noinline int
> block_group_cache_done(struct btrfs_block_group_cache *cache)
> @@ -3867,6 +3877,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
> found->bytes_readonly += bytes_readonly;
> if (total_bytes > 0)
> found->full = 0;
> + space_info_add_new_bytes(info, found, total_bytes -
> + bytes_used - bytes_readonly);
> spin_unlock(&found->lock);
> *space_info = found;
> return 0;
> @@ -3901,6 +3913,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
> found->flush = 0;
> init_waitqueue_head(&found->wait);
> INIT_LIST_HEAD(&found->ro_bgs);
> + INIT_LIST_HEAD(&found->tickets);
> + INIT_LIST_HEAD(&found->priority_tickets);
>
> ret = kobject_init_and_add(&found->kobj, &space_info_ktype,
> info->space_info_kobj, "%s",
> @@ -4514,12 +4528,19 @@ static int can_overcommit(struct btrfs_root *root,
> struct btrfs_space_info *space_info, u64 bytes,
> enum btrfs_reserve_flush_enum flush)
> {
> - struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
> - u64 profile = btrfs_get_alloc_profile(root, 0);
> + struct btrfs_block_rsv *global_rsv;
> + u64 profile;
> u64 space_size;
> u64 avail;
> u64 used;
>
> + /* Don't overcommit when in mixed mode. */
> + if (space_info->flags & BTRFS_BLOCK_GROUP_DATA)
> + return 0;
> +
> + BUG_ON(root->fs_info == NULL);
> + global_rsv = &root->fs_info->global_block_rsv;
> + profile = btrfs_get_alloc_profile(root, 0);
> used = space_info->bytes_used + space_info->bytes_reserved +
> space_info->bytes_pinned + space_info->bytes_readonly;
>
> @@ -4669,6 +4690,11 @@ skip_async:
> spin_unlock(&space_info->lock);
> break;
> }
> + if (list_empty(&space_info->tickets) &&
> + list_empty(&space_info->priority_tickets)) {
> + spin_unlock(&space_info->lock);
> + break;
> + }
> spin_unlock(&space_info->lock);
>
> loops++;
> @@ -4745,6 +4771,13 @@ enum flush_state {
> COMMIT_TRANS = 6,
> };
>
> +struct reserve_ticket {
> + u64 bytes;
> + int error;
> + struct list_head list;
> + wait_queue_head_t wait;
> +};
> +
> static int flush_space(struct btrfs_root *root,
> struct btrfs_space_info *space_info, u64 num_bytes,
> u64 orig_bytes, int state)
> @@ -4802,17 +4835,22 @@ static inline u64
> btrfs_calc_reclaim_metadata_size(struct btrfs_root *root,
> struct btrfs_space_info *space_info)
> {
> + struct reserve_ticket *ticket;
> u64 used;
> u64 expected;
> - u64 to_reclaim;
> + u64 to_reclaim = 0;
>
> to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M);
> - spin_lock(&space_info->lock);
> if (can_overcommit(root, space_info, to_reclaim,
> - BTRFS_RESERVE_FLUSH_ALL)) {
> - to_reclaim = 0;
> - goto out;
> - }
> + BTRFS_RESERVE_FLUSH_ALL))
> + return 0;
> +
> + list_for_each_entry(ticket, &space_info->tickets, list)
> + to_reclaim += ticket->bytes;
> + list_for_each_entry(ticket, &space_info->priority_tickets, list)
> + to_reclaim += ticket->bytes;
> + if (to_reclaim)
> + return to_reclaim;
>
> used = space_info->bytes_used + space_info->bytes_reserved +
> space_info->bytes_pinned + space_info->bytes_readonly +
> @@ -4828,9 +4866,6 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_root *root,
> to_reclaim = 0;
> to_reclaim = min(to_reclaim, space_info->bytes_may_use +
> space_info->bytes_reserved);
> -out:
> - spin_unlock(&space_info->lock);
> -
> return to_reclaim;
> }
>
> @@ -4847,69 +4882,169 @@ static inline int need_do_async_reclaim(struct btrfs_space_info *space_info,
> !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state));
> }
>
> -static int btrfs_need_do_async_reclaim(struct btrfs_space_info *space_info,
> - struct btrfs_fs_info *fs_info,
> - int flush_state)
> +static void wake_all_tickets(struct list_head *head)
> {
> - u64 used;
> + struct reserve_ticket *ticket;
>
> - spin_lock(&space_info->lock);
> - /*
> - * We run out of space and have not got any free space via flush_space,
> - * so don't bother doing async reclaim.
> - */
> - if (flush_state > COMMIT_TRANS && space_info->full) {
> - spin_unlock(&space_info->lock);
> - return 0;
> + while (!list_empty(head)) {
> + ticket = list_first_entry(head, struct reserve_ticket, list);
> + list_del_init(&ticket->list);
> + ticket->error = -ENOSPC;
> + wake_up(&ticket->wait);
> }
> -
> - used = space_info->bytes_used + space_info->bytes_reserved +
> - space_info->bytes_pinned + space_info->bytes_readonly +
> - space_info->bytes_may_use;
> - if (need_do_async_reclaim(space_info, fs_info, used)) {
> - spin_unlock(&space_info->lock);
> - return 1;
> - }
> - spin_unlock(&space_info->lock);
> -
> - return 0;
> }
>
> +/*
> + * This is for normal flushers, we can wait all goddamned day if we want to. We
> + * will loop and continuously try to flush as long as we are making progress.
> + * We count progress as clearing off tickets each time we have to loop.
> + */
> static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
> {
> + struct reserve_ticket *last_ticket = NULL;
> struct btrfs_fs_info *fs_info;
> struct btrfs_space_info *space_info;
> u64 to_reclaim;
> int flush_state;
> + int commit_cycles = 0;
>
> fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work);
> space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
>
> + spin_lock(&space_info->lock);
> to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root,
> space_info);
> - if (!to_reclaim)
> + if (!to_reclaim) {
> + space_info->flush = 0;
> + spin_unlock(&space_info->lock);
> return;
> + }
> + last_ticket = list_first_entry(&space_info->tickets,
> + struct reserve_ticket, list);
> + spin_unlock(&space_info->lock);
>
> flush_state = FLUSH_DELAYED_ITEMS_NR;
> do {
> + struct reserve_ticket *ticket;
> + int ret;
> +
> + ret = flush_space(fs_info->fs_root, space_info, to_reclaim,
> + to_reclaim, flush_state);
> + spin_lock(&space_info->lock);
> + if (list_empty(&space_info->tickets)) {
> + space_info->flush = 0;
> + spin_unlock(&space_info->lock);
> + return;
> + }
> + to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root,
> + space_info);
> + ticket = list_first_entry(&space_info->tickets,
> + struct reserve_ticket, list);
> + if (last_ticket == ticket) {
> + flush_state++;
> + } else {
> + last_ticket = ticket;
> + flush_state = FLUSH_DELAYED_ITEMS_NR;
> + if (commit_cycles)
> + commit_cycles--;
> + }
> +
> + if (flush_state > COMMIT_TRANS) {
> + commit_cycles++;
> + if (commit_cycles > 2) {
> + wake_all_tickets(&space_info->tickets);
> + space_info->flush = 0;
> + } else {
> + flush_state = FLUSH_DELAYED_ITEMS_NR;
> + }
> + }
> + spin_unlock(&space_info->lock);
> + } while (flush_state <= COMMIT_TRANS);
> +}
> +
> +void btrfs_init_async_reclaim_work(struct work_struct *work)
> +{
> + INIT_WORK(work, btrfs_async_reclaim_metadata_space);
> +}
> +
> +static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
> + struct btrfs_space_info *space_info,
> + struct reserve_ticket *ticket)
> +{
> + u64 to_reclaim;
> + int flush_state = FLUSH_DELAYED_ITEMS_NR;
> +
> + spin_lock(&space_info->lock);
> + to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root,
> + space_info);
> + if (!to_reclaim) {
> + spin_unlock(&space_info->lock);
> + return;
> + }
> + spin_unlock(&space_info->lock);
> +
> + do {
> flush_space(fs_info->fs_root, space_info, to_reclaim,
> to_reclaim, flush_state);
> flush_state++;
> - if (!btrfs_need_do_async_reclaim(space_info, fs_info,
> - flush_state))
> + spin_lock(&space_info->lock);
> + if (ticket->bytes == 0) {
> + spin_unlock(&space_info->lock);
> return;
> + }
> + spin_unlock(&space_info->lock);
> +
> + /*
> + * Priority flushers can't wait on delalloc without
> + * deadlocking.
> + */
> + if (flush_state == FLUSH_DELALLOC ||
> + flush_state == FLUSH_DELALLOC_WAIT)
> + flush_state = ALLOC_CHUNK;
> } while (flush_state < COMMIT_TRANS);
> }
>
> -void btrfs_init_async_reclaim_work(struct work_struct *work)
> +static int wait_reserve_ticket(struct btrfs_fs_info *fs_info,
> + struct btrfs_space_info *space_info,
> + struct reserve_ticket *ticket, u64 orig_bytes)
> +
> {
> - INIT_WORK(work, btrfs_async_reclaim_metadata_space);
> + DEFINE_WAIT(wait);
> + int ret = 0;
> +
> + spin_lock(&space_info->lock);
> + while (ticket->bytes > 0 && ticket->error == 0) {
> + ret = prepare_to_wait_event(&ticket->wait, &wait, TASK_KILLABLE);
> + if (ret) {
> + ret = -EINTR;
> + break;
> + }
> + spin_unlock(&space_info->lock);
> +
> + schedule();
> +
> + finish_wait(&ticket->wait, &wait);
> + spin_lock(&space_info->lock);
> + }
> + if (!ret)
> + ret = ticket->error;
> + if (!list_empty(&ticket->list))
> + list_del_init(&ticket->list);
> + if (ticket->bytes && ticket->bytes < orig_bytes) {
> + u64 num_bytes = orig_bytes - ticket->bytes;
> + space_info->bytes_may_use -= num_bytes;
> + trace_btrfs_space_reservation(fs_info, "space_info",
> + space_info->flags, num_bytes, 0);
> + }
> + spin_unlock(&space_info->lock);
> +
> + return ret;
> }
>
> /**
> * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
> * @root - the root we're allocating for
> - * @block_rsv - the block_rsv we're allocating for
> + * @space_info - the space info we want to allocate from
> * @orig_bytes - the number of bytes we want
> * @flush - whether or not we can flush to make our reservation
> *
> @@ -4920,81 +5055,34 @@ void btrfs_init_async_reclaim_work(struct work_struct *work)
> * regain reservations will be made and this will fail if there is not enough
> * space already.
> */
> -static int reserve_metadata_bytes(struct btrfs_root *root,
> - struct btrfs_block_rsv *block_rsv,
> - u64 orig_bytes,
> - enum btrfs_reserve_flush_enum flush)
> +static int __reserve_metadata_bytes(struct btrfs_root *root,
> + struct btrfs_space_info *space_info,
> + u64 orig_bytes,
> + enum btrfs_reserve_flush_enum flush)
> {
> - struct btrfs_space_info *space_info = block_rsv->space_info;
> + struct reserve_ticket ticket;
> u64 used;
> - u64 num_bytes = orig_bytes;
> - int flush_state = FLUSH_DELAYED_ITEMS_NR;
> int ret = 0;
> - bool flushing = false;
>
> -again:
> - ret = 0;
> + ASSERT(orig_bytes);
> spin_lock(&space_info->lock);
> - /*
> - * We only want to wait if somebody other than us is flushing and we
> - * are actually allowed to flush all things.
> - */
> - while (flush == BTRFS_RESERVE_FLUSH_ALL && !flushing &&
> - space_info->flush) {
> - spin_unlock(&space_info->lock);
> - /*
> - * If we have a trans handle we can't wait because the flusher
> - * may have to commit the transaction, which would mean we would
> - * deadlock since we are waiting for the flusher to finish, but
> - * hold the current transaction open.
> - */
> - if (current->journal_info)
> - return -EAGAIN;
> - ret = wait_event_killable(space_info->wait, !space_info->flush);
> - /* Must have been killed, return */
> - if (ret)
> - return -EINTR;
> -
> - spin_lock(&space_info->lock);
> - }
> -
> ret = -ENOSPC;
> used = space_info->bytes_used + space_info->bytes_reserved +
> space_info->bytes_pinned + space_info->bytes_readonly +
> space_info->bytes_may_use;
>
> /*
> - * The idea here is that we've not already over-reserved the block group
> - * then we can go ahead and save our reservation first and then start
> - * flushing if we need to. Otherwise if we've already overcommitted
> - * lets start flushing stuff first and then come back and try to make
> - * our reservation.
> + * If we have enough space then hooray, make our reservation and carry
> + * on. If not see if we can overcommit, and if we can, hooray carry on.
> + * If not things get more complicated.
> */
> - if (used <= space_info->total_bytes) {
> - if (used + orig_bytes <= space_info->total_bytes) {
> - space_info->bytes_may_use += orig_bytes;
> - trace_btrfs_space_reservation(root->fs_info,
> - "space_info", space_info->flags, orig_bytes, 1);
> - ret = 0;
> - } else {
> - /*
> - * Ok set num_bytes to orig_bytes since we aren't
> - * overocmmitted, this way we only try and reclaim what
> - * we need.
> - */
> - num_bytes = orig_bytes;
> - }
> - } else {
> - /*
> - * Ok we're over committed, set num_bytes to the overcommitted
> - * amount plus the amount of bytes that we need for this
> - * reservation.
> - */
> - num_bytes = used - space_info->total_bytes +
> - (orig_bytes * 2);
> - }
> -
> - if (ret && can_overcommit(root, space_info, orig_bytes, flush)) {
> + if (used + orig_bytes <= space_info->total_bytes) {
> + space_info->bytes_may_use += orig_bytes;
> + trace_btrfs_space_reservation(root->fs_info, "space_info",
> + space_info->flags, orig_bytes,
> + 1);
> + ret = 0;
> + } else if (can_overcommit(root, space_info, orig_bytes, flush)) {
> space_info->bytes_may_use += orig_bytes;
> trace_btrfs_space_reservation(root->fs_info, "space_info",
> space_info->flags, orig_bytes,
> @@ -5003,16 +5091,27 @@ again:
> }
>
> /*
> - * Couldn't make our reservation, save our place so while we're trying
> - * to reclaim space we can actually use it instead of somebody else
> - * stealing it from us.
> + * If we couldn't make a reservation then setup our reservation ticket
> + * and kick the async worker if it's not already running.
> *
> - * We make the other tasks wait for the flush only when we can flush
> - * all things.
> + * If we are a priority flusher then we just need to add our ticket to
> + * the list and we will do our own flushing further down.
> */
> if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
> - flushing = true;
> - space_info->flush = 1;
> + ticket.bytes = orig_bytes;
> + ticket.error = 0;
> + init_waitqueue_head(&ticket.wait);
> + if (flush == BTRFS_RESERVE_FLUSH_ALL) {
> + list_add_tail(&ticket.list, &space_info->tickets);
> + if (!space_info->flush) {
> + space_info->flush = 1;
> + queue_work(system_unbound_wq,
> + &root->fs_info->async_reclaim_work);
> + }
> + } else {
> + list_add_tail(&ticket.list,
> + &space_info->priority_tickets);
> + }
> } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
> used += orig_bytes;
> /*
> @@ -5027,33 +5126,56 @@ again:
> &root->fs_info->async_reclaim_work);
> }
> spin_unlock(&space_info->lock);
> -
> if (!ret || flush == BTRFS_RESERVE_NO_FLUSH)
> - goto out;
> + return ret;
>
> - ret = flush_space(root, space_info, num_bytes, orig_bytes,
> - flush_state);
> - flush_state++;
> + if (flush == BTRFS_RESERVE_FLUSH_ALL)
> + return wait_reserve_ticket(root->fs_info, space_info, &ticket,
> + orig_bytes);
>
> - /*
> - * If we are FLUSH_LIMIT, we can not flush delalloc, or the deadlock
> - * would happen. So skip delalloc flush.
> - */
> - if (flush == BTRFS_RESERVE_FLUSH_LIMIT &&
> - (flush_state == FLUSH_DELALLOC ||
> - flush_state == FLUSH_DELALLOC_WAIT))
> - flush_state = ALLOC_CHUNK;
> + ret = 0;
> + priority_reclaim_metadata_space(root->fs_info, space_info, &ticket);
> + spin_lock(&space_info->lock);
> + if (ticket.bytes) {
> + if (ticket.bytes < orig_bytes) {
> + u64 num_bytes = orig_bytes - ticket.bytes;
> + space_info->bytes_may_use -= num_bytes;
> + trace_btrfs_space_reservation(root->fs_info,
> + "space_info", space_info->flags,
> + num_bytes, 0);
>
> - if (!ret)
> - goto again;
> - else if (flush == BTRFS_RESERVE_FLUSH_LIMIT &&
> - flush_state < COMMIT_TRANS)
> - goto again;
> - else if (flush == BTRFS_RESERVE_FLUSH_ALL &&
> - flush_state <= COMMIT_TRANS)
> - goto again;
> + }
> + list_del_init(&ticket.list);
> + ret = -ENOSPC;
> + }
> + spin_unlock(&space_info->lock);
> + ASSERT(list_empty(&ticket.list));
> + return ret;
> +}
>
> -out:
> +/**
> + * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
> + * @root - the root we're allocating for
> + * @block_rsv - the block_rsv we're allocating for
> + * @orig_bytes - the number of bytes we want
> + * @flush - whether or not we can flush to make our reservation
> + *
> + * This will reserve orgi_bytes number of bytes from the space info associated
> + * with the block_rsv. If there is not enough space it will make an attempt to
> + * flush out space to make room. It will do this by flushing delalloc if
> + * possible or committing the transaction. If flush is 0 then no attempts to
> + * regain reservations will be made and this will fail if there is not enough
> + * space already.
> + */
> +static int reserve_metadata_bytes(struct btrfs_root *root,
> + struct btrfs_block_rsv *block_rsv,
> + u64 orig_bytes,
> + enum btrfs_reserve_flush_enum flush)
> +{
> + int ret;
> +
> + ret = __reserve_metadata_bytes(root, block_rsv->space_info, orig_bytes,
> + flush);
> if (ret == -ENOSPC &&
> unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) {
> struct btrfs_block_rsv *global_rsv =
> @@ -5066,13 +5188,8 @@ out:
> if (ret == -ENOSPC)
> trace_btrfs_space_reservation(root->fs_info,
> "space_info:enospc",
> - space_info->flags, orig_bytes, 1);
> - if (flushing) {
> - spin_lock(&space_info->lock);
> - space_info->flush = 0;
> - wake_up_all(&space_info->wait);
> - spin_unlock(&space_info->lock);
> - }
> + block_rsv->space_info->flags,
> + orig_bytes, 1);
> return ret;
> }
>
> @@ -5148,6 +5265,103 @@ int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
> return 0;
> }
>
> +/*
> + * This is for space we already have accounted in space_info->bytes_may_use, so
> + * basically when we're returning space from block_rsv's.
> + */
> +static void space_info_add_old_bytes(struct btrfs_fs_info *fs_info,
> + struct btrfs_space_info *space_info,
> + u64 num_bytes)
> +{
> + struct reserve_ticket *ticket;
> + struct list_head *head;
> + u64 used;
> + enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_NO_FLUSH;
> + bool check_overcommit = false;
> +
> + spin_lock(&space_info->lock);
> + head = &space_info->priority_tickets;
> +
> + /*
> + * First we want to see if we're over our limit, because if we are then
> + * we need to make sure we are still ok overcommitting before we satisfy
> + * another reservation.
> + */
> + used = space_info->bytes_used + space_info->bytes_reserved +
> + space_info->bytes_pinned + space_info->bytes_readonly;
> + if (used - num_bytes >= space_info->total_bytes)
> + check_overcommit = true;
'used' without bytes_may_use should be less than ->total_bytes,
you wanna check if (used + num_bytes >= space_info->total_bytes) ?
Others are sane to me.
Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
Thanks,
-liubo
> +again:
> + while (!list_empty(head) && num_bytes) {
> + ticket = list_first_entry(head, struct reserve_ticket,
> + list);
> + if (check_overcommit &&
> + !can_overcommit(fs_info->extent_root, space_info,
> + ticket->bytes, flush))
> + break;
> + if (num_bytes >= ticket->bytes) {
> + list_del_init(&ticket->list);
> + num_bytes -= ticket->bytes;
> + ticket->bytes = 0;
> + wake_up(&ticket->wait);
> + } else {
> + ticket->bytes -= num_bytes;
> + num_bytes = 0;
> + }
> + }
> +
> + if (num_bytes && head == &space_info->priority_tickets) {
> + head = &space_info->tickets;
> + flush = BTRFS_RESERVE_FLUSH_ALL;
> + goto again;
> + }
> + space_info->bytes_may_use -= num_bytes;
> + trace_btrfs_space_reservation(fs_info, "space_info",
> + space_info->flags, num_bytes, 0);
> + spin_unlock(&space_info->lock);
> +}
> +
> +/*
> + * This is for newly allocated space that isn't accounted in
> + * space_info->bytes_may_use yet. So if we allocate a chunk or unpin an extent
> + * we use this helper.
> + */
> +static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info,
> + struct btrfs_space_info *space_info,
> + u64 num_bytes)
> +{
> + struct reserve_ticket *ticket;
> + struct list_head *head = &space_info->priority_tickets;
> +
> +again:
> + while (!list_empty(head) && num_bytes) {
> + ticket = list_first_entry(head, struct reserve_ticket,
> + list);
> + if (num_bytes >= ticket->bytes) {
> + trace_btrfs_space_reservation(fs_info, "space_info",
> + space_info->flags,
> + ticket->bytes, 1);
> + list_del_init(&ticket->list);
> + num_bytes -= ticket->bytes;
> + space_info->bytes_may_use += ticket->bytes;
> + ticket->bytes = 0;
> + wake_up(&ticket->wait);
> + } else {
> + trace_btrfs_space_reservation(fs_info, "space_info",
> + space_info->flags,
> + num_bytes, 1);
> + space_info->bytes_may_use += num_bytes;
> + ticket->bytes -= num_bytes;
> + num_bytes = 0;
> + }
> + }
> +
> + if (num_bytes && head == &space_info->priority_tickets) {
> + head = &space_info->tickets;
> + goto again;
> + }
> +}
> +
> static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
> struct btrfs_block_rsv *block_rsv,
> struct btrfs_block_rsv *dest, u64 num_bytes)
> @@ -5182,13 +5396,9 @@ static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
> }
> spin_unlock(&dest->lock);
> }
> - if (num_bytes) {
> - spin_lock(&space_info->lock);
> - space_info->bytes_may_use -= num_bytes;
> - trace_btrfs_space_reservation(fs_info, "space_info",
> - space_info->flags, num_bytes, 0);
> - spin_unlock(&space_info->lock);
> - }
> + if (num_bytes)
> + space_info_add_old_bytes(fs_info, space_info,
> + num_bytes);
> }
> }
>
> @@ -6346,17 +6556,29 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end,
> readonly = true;
> }
> spin_unlock(&cache->lock);
> - if (!readonly && global_rsv->space_info == space_info) {
> + if (!readonly && return_free_space &&
> + global_rsv->space_info == space_info) {
> + u64 to_add = len;
> + WARN_ON(!return_free_space);
> spin_lock(&global_rsv->lock);
> if (!global_rsv->full) {
> - len = min(len, global_rsv->size -
> - global_rsv->reserved);
> - global_rsv->reserved += len;
> - space_info->bytes_may_use += len;
> + to_add = min(len, global_rsv->size -
> + global_rsv->reserved);
> + global_rsv->reserved += to_add;
> + space_info->bytes_may_use += to_add;
> if (global_rsv->reserved >= global_rsv->size)
> global_rsv->full = 1;
> + trace_btrfs_space_reservation(fs_info,
> + "space_info",
> + space_info->flags,
> + to_add, 1);
> + len -= to_add;
> }
> spin_unlock(&global_rsv->lock);
> + /* Add to any tickets we may have */
> + if (len)
> + space_info_add_new_bytes(fs_info, space_info,
> + len);
> }
> spin_unlock(&space_info->lock);
> }
> --
> 2.5.0
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
next prev parent reply other threads:[~2016-05-09 21:27 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-03-25 17:25 [PATCH 00/14] Enospc rework Josef Bacik
2016-03-25 17:25 ` [PATCH 01/14] Btrfs: add bytes_readonly to the spaceinfo at once Josef Bacik
2016-03-25 17:25 ` [PATCH 02/14] Btrfs: fix callers of btrfs_block_rsv_migrate Josef Bacik
2016-03-25 17:25 ` [PATCH 03/14] Btrfs: always reserve metadata for delalloc extents Josef Bacik
2016-03-25 18:04 ` Liu Bo
2016-03-25 17:25 ` [PATCH 04/14] Btrfs: change delayed reservation fallback behavior Josef Bacik
2016-03-25 17:25 ` [PATCH 05/14] Btrfs: warn_on for unaccounted spaces Josef Bacik
2016-06-27 4:47 ` Qu Wenruo
2016-06-27 13:03 ` Chris Mason
2016-06-28 0:16 ` Qu Wenruo
2016-03-25 17:25 ` [PATCH 06/14] Btrfs: add tracepoint for adding block groups Josef Bacik
2016-03-25 17:25 ` [PATCH 07/14] Btrfs: introduce ticketed enospc infrastructure Josef Bacik
2016-05-09 21:29 ` Liu Bo [this message]
2016-05-17 17:30 ` [PATCH V2] " Josef Bacik
2016-05-18 11:24 ` Austin S. Hemmelgarn
2016-05-19 12:47 ` Austin S. Hemmelgarn
2016-05-18 22:46 ` David Sterba
2016-03-25 17:25 ` [PATCH 08/14] Btrfs: trace pinned extents Josef Bacik
2016-03-25 17:25 ` [PATCH 09/14] Btrfs: fix delalloc reservation amount tracepoint Josef Bacik
2016-03-25 17:25 ` [PATCH 10/14] Btrfs: add tracepoints for flush events Josef Bacik
2016-03-25 17:25 ` [PATCH 11/14] Btrfs: add fsid to some tracepoints Josef Bacik
2016-03-25 17:25 ` [PATCH 12/14] Btrfs: fix release reserved extents trace points Josef Bacik
2016-05-09 21:33 ` Liu Bo
2016-03-25 17:25 ` [PATCH 13/14] Btrfs: don't bother kicking async if there's nothing to reclaim Josef Bacik
2016-03-25 17:26 ` [PATCH 14/14] Btrfs: don't do nocow check unless we have to Josef Bacik
2016-03-25 17:50 ` Liu Bo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20160509212914.GB4954@localhost.localdomain \
--to=bo.li.liu@oracle.com \
--cc=jbacik@fb.com \
--cc=linux-btrfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).