All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jeff Mahoney <jeffm@suse.com>
To: linux-btrfs <linux-btrfs@vger.kernel.org>
Subject: [PATCH v2] btrfs: add missing discards when unpinning extents with -o discard
Date: Tue, 05 May 2015 14:19:19 -0400	[thread overview]
Message-ID: <554909A7.6030100@suse.com> (raw)

When we clear the dirty bits in btrfs_delete_unused_bgs for extents
in the empty block group, it results in btrfs_finish_extent_commit being
unable to discard the freed extents.

The block group removal patch added an alternate path to forget extents
other than btrfs_finish_extent_commit. As a result, any extents that would
be freed when the block group is removed aren't discarded. In my
test run, with a large copy of mixed sized files followed by removal, it
left nearly 2/3 of extents undiscarded.

To clean up the block groups, we add the removed block group onto a list
that will be discarded after transaction commit.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
---
 fs/btrfs/ctree.h            |  2 ++
 fs/btrfs/extent-tree.c      | 58 +++++++++++++++++++++++++++++++++++++++++++++
 fs/btrfs/free-space-cache.c | 45 +++++++++++++++++++----------------
 fs/btrfs/super.c            |  2 +-
 fs/btrfs/transaction.c      |  2 ++
 fs/btrfs/transaction.h      |  2 ++
 6 files changed, 90 insertions(+), 21 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 6f364e1..3448a54 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3438,6 +3438,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
 			     struct btrfs_root *root, u64 group_start,
 			     struct extent_map *em);
 void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info);
+void btrfs_cleanup_block_group_mapping(struct btrfs_block_group_cache *cache);
 void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
 				       struct btrfs_root *root);
 u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data);
@@ -4068,6 +4069,7 @@ __printf(5, 6)
 void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
 		     unsigned int line, int errno, const char *fmt, ...);
 
+const char *btrfs_decode_error(int errno);
 
 void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
 			       struct btrfs_root *root, const char *function,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 6d1d74d..521a294 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -6011,6 +6011,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
 			       struct btrfs_root *root)
 {
 	struct btrfs_fs_info *fs_info = root->fs_info;
+	struct btrfs_block_group_cache *block_group, *tmp;
+	struct list_head *deleted_bgs;
 	struct extent_io_tree *unpin;
 	u64 start;
 	u64 end;
@@ -6043,6 +6045,29 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
 		cond_resched();
 	}
 
+	/* Transaction is finished. We don't need the lock anymore. */
+	deleted_bgs = &trans->transaction->deleted_bgs;
+	list_for_each_entry_safe(block_group, tmp, deleted_bgs, bg_list) {
+		u64 start, end, trimmed = 0;
+		int ret;
+		list_del_init(&block_group->bg_list);
+
+		start = block_group->key.objectid;
+		end = start + block_group->key.offset - 1;
+
+		ret = btrfs_discard_extent(root, start, end, &trimmed);
+
+		btrfs_cleanup_block_group_mapping(block_group);
+		btrfs_put_block_group(block_group);
+
+		if (ret) {
+			const char *errstr = btrfs_decode_error(ret);
+			btrfs_warn(fs_info,
+				   "Discard failed while removing blockgroup: errno=%d %s\n",
+				   ret, errstr);
+		}
+	}
+
 	return 0;
 }
 
@@ -9802,6 +9827,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
 	 * currently running transaction might finish and a new one start,
 	 * allowing for new block groups to be created that can reuse the same
 	 * physical device locations unless we take this special care.
+	 *
+	 * There may also be an implicit trim operation if the file system
+	 * is mounted with -odiscard. The same protections must remain
+	 * in place until the extents have been discarded completely when
+	 * the transaction commit has completed.
 	 */
 	remove_em = (atomic_read(&block_group->trimming) == 0);
 	/*
@@ -9876,6 +9906,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
 	spin_lock(&fs_info->unused_bgs_lock);
 	while (!list_empty(&fs_info->unused_bgs)) {
 		u64 start, end;
+		int trimming;
 
 		block_group = list_first_entry(&fs_info->unused_bgs,
 					       struct btrfs_block_group_cache,
@@ -9973,12 +10004,39 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
 		spin_unlock(&block_group->lock);
 		spin_unlock(&space_info->lock);
 
+		/* DISCARD can flip during remount */
+		trimming = btrfs_test_opt(root, DISCARD);
+
+		/* Implicit trim during transaction commit. */
+		if (trimming)
+			atomic_inc(&block_group->trimming);
+
 		/*
 		 * Btrfs_remove_chunk will abort the transaction if things go
 		 * horribly wrong.
 		 */
 		ret = btrfs_remove_chunk(trans, root,
 					 block_group->key.objectid);
+
+		if (ret) {
+			if (trimming)
+				atomic_dec(&block_group->trimming);
+			goto end_trans;
+		}
+
+		/*
+		 * If we're not mounted with -odiscard, we can just forget
+		 * about this block group. Otherwise we'll need to wait
+		 * until transaction commit to do the actual discard.
+		 */
+		if (trimming) {
+			WARN_ON(!list_empty(&block_group->bg_list));
+			spin_lock(&trans->transaction->deleted_bgs_lock);
+			list_add(&block_group->bg_list,
+				 &trans->transaction->deleted_bgs);
+			spin_unlock(&trans->transaction->deleted_bgs_lock);
+			btrfs_get_block_group(block_group);
+		}
 end_trans:
 		btrfs_end_transaction(trans, root);
 next:
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 41c510b..d33b146 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -3274,6 +3274,30 @@ next:
 	return ret;
 }
 
+void btrfs_cleanup_block_group_mapping(struct btrfs_block_group_cache *cache)
+{
+	struct extent_map_tree *em_tree;
+	struct extent_map *em;
+
+	lock_chunks(cache->fs_info->chunk_root);
+	em_tree = &cache->fs_info->mapping_tree.map_tree;
+	write_lock(&em_tree->lock);
+	em = lookup_extent_mapping(em_tree, cache->key.objectid, 1);
+	BUG_ON(!em); /* logic error, can't happen */
+
+	/*
+	 * remove_extent_mapping() will delete us from the pinned_chunks
+	 * list, which is protected by the chunk mutex.
+	 */
+	remove_extent_mapping(em_tree, em);
+	write_unlock(&em_tree->lock);
+	unlock_chunks(cache->fs_info->chunk_root);
+
+	/* once for us and once for the tree */
+	free_extent_map(em);
+	free_extent_map(em);
+}
+
 int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
 			   u64 *trimmed, u64 start, u64 end, u64 minlen)
 {
@@ -3298,28 +3322,9 @@ out:
 	spin_lock(&block_group->lock);
 	if (atomic_dec_and_test(&block_group->trimming) &&
 	    block_group->removed) {
-		struct extent_map_tree *em_tree;
-		struct extent_map *em;
-
 		spin_unlock(&block_group->lock);
 
-		lock_chunks(block_group->fs_info->chunk_root);
-		em_tree = &block_group->fs_info->mapping_tree.map_tree;
-		write_lock(&em_tree->lock);
-		em = lookup_extent_mapping(em_tree, block_group->key.objectid,
-					   1);
-		BUG_ON(!em); /* logic error, can't happen */
-		/*
-		 * remove_extent_mapping() will delete us from the pinned_chunks
-		 * list, which is protected by the chunk mutex.
-		 */
-		remove_extent_mapping(em_tree, em);
-		write_unlock(&em_tree->lock);
-		unlock_chunks(block_group->fs_info->chunk_root);
-
-		/* once for us and once for the tree */
-		free_extent_map(em);
-		free_extent_map(em);
+		btrfs_cleanup_block_group_mapping(block_group);
 
 		/*
 		 * We've left one free space entry and other tasks trimming
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 9e66f5e..016e65a 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -69,7 +69,7 @@ static struct file_system_type btrfs_fs_type;
 
 static int btrfs_remount(struct super_block *sb, int *flags, char *data);
 
-static const char *btrfs_decode_error(int errno)
+const char *btrfs_decode_error(int errno)
 {
 	char *errstr = "unknown";
 
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 5628e25..2005262 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -256,6 +256,8 @@ loop:
 	mutex_init(&cur_trans->cache_write_mutex);
 	cur_trans->num_dirty_bgs = 0;
 	spin_lock_init(&cur_trans->dirty_bgs_lock);
+	INIT_LIST_HEAD(&cur_trans->deleted_bgs);
+	spin_lock_init(&cur_trans->deleted_bgs_lock);
 	list_add_tail(&cur_trans->list, &fs_info->trans_list);
 	extent_io_tree_init(&cur_trans->dirty_pages,
 			     fs_info->btree_inode->i_mapping);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 0b24755..14325f2 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -74,6 +74,8 @@ struct btrfs_transaction {
 	 */
 	struct mutex cache_write_mutex;
 	spinlock_t dirty_bgs_lock;
+	struct list_head deleted_bgs;
+	spinlock_t deleted_bgs_lock;
 	struct btrfs_delayed_ref_root delayed_refs;
 	int aborted;
 	int dirty_bg_run;
-- 
1.8.5.6


-- 
Jeff Mahoney
SUSE Labs

             reply	other threads:[~2015-05-05 18:19 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-05-05 18:19 Jeff Mahoney [this message]
2015-05-08 21:10 ` [PATCH v2] btrfs: add missing discards when unpinning extents with -o discard Filipe David Manana
2015-05-14  9:42   ` Filipe David Manana
2015-05-14 13:06     ` Jeff Mahoney

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=554909A7.6030100@suse.com \
    --to=jeffm@suse.com \
    --cc=linux-btrfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.