From: Mark Fasheh <mfasheh@suse.de>
To: linux-btrfs@vger.kernel.org
Cc: Chris Mason <clm@fb.com>, Josef Bacik <jbacik@fb.com>,
Mark Fasheh <mfasheh@suse.de>
Subject: [PATCH 4/5] btrfs: delete qgroup items in drop_snapshot
Date: Thu, 17 Jul 2014 12:39:03 -0700 [thread overview]
Message-ID: <1405625944-4883-5-git-send-email-mfasheh@suse.de> (raw)
In-Reply-To: <1405625944-4883-1-git-send-email-mfasheh@suse.de>
btrfs_drop_snapshot() leaves subvolume qgroup items on disk after
completion. This wastes space and also can cause problems with snapshot
creation. If a new snapshot tries to claim the deleted subvolumes id,
btrfs will get -EEXIST from add_qgroup_item() and go read-only.
We can partially fix this by catching -EEXIST in add_qgroup_item() and
initializing the existing items. This will leave orphaned relation items
(BTRFS_QGROUP_RELATION_KEY) around however would be confusing to the end
user. Also this does nothing to fix the wasted space taken up by orphaned
qgroup items.
So the full fix is to delete all qgroup items related to the deleted
snapshot in btrfs_drop_snapshot. If an item persists (either due to a
previous drop_snapshot without the fix, or some error) we can still continue
with snapshot create instead of throwing the whole filesystem readonly.
In the very small chance that some relation items persist, they will not
affect functioning of our level 0 subvolume qgroup.
Signed-off-by: Mark Fasheh <mfasheh@suse.de>
---
fs/btrfs/extent-tree.c | 6 +++
fs/btrfs/qgroup.c | 114 +++++++++++++++++++++++++++++++++++++++++++++++--
fs/btrfs/qgroup.h | 3 ++
3 files changed, 120 insertions(+), 3 deletions(-)
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index ed9e13c..2dad701 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -8296,6 +8296,12 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
if (err)
goto out_end_trans;
+ ret = btrfs_del_qgroup_items(trans, root);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ goto out_end_trans;
+ }
+
ret = btrfs_del_root(trans, tree_root, &root->root_key);
if (ret) {
btrfs_abort_transaction(trans, tree_root, ret);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 1569338..2ec2432 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -35,7 +35,6 @@
#include "qgroup.h"
/* TODO XXX FIXME
- * - subvol delete -> delete when ref goes to 0? delete limits also?
* - reorganize keys
* - compressed
* - sync
@@ -99,6 +98,16 @@ struct btrfs_qgroup_list {
struct btrfs_qgroup *member;
};
+/*
+ * used in remove_qgroup_relations() to track qgroup relations that
+ * need deleting
+ */
+struct relation_rec {
+ struct list_head list;
+ u64 src;
+ u64 dst;
+};
+
#define ptr_to_u64(x) ((u64)(uintptr_t)x)
#define u64_to_ptr(x) ((struct btrfs_qgroup *)(uintptr_t)x)
@@ -551,9 +560,15 @@ static int add_qgroup_item(struct btrfs_trans_handle *trans,
key.type = BTRFS_QGROUP_INFO_KEY;
key.offset = qgroupid;
+ /*
+ * Avoid a transaction abort by catching -EEXIST here. In that
+ * case, we proceed by re-initializing the existing structure
+ * on disk.
+ */
+
ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
sizeof(*qgroup_info));
- if (ret)
+ if (ret && ret != -EEXIST)
goto out;
leaf = path->nodes[0];
@@ -572,7 +587,7 @@ static int add_qgroup_item(struct btrfs_trans_handle *trans,
key.type = BTRFS_QGROUP_LIMIT_KEY;
ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
sizeof(*qgroup_limit));
- if (ret)
+ if (ret && ret != -EEXIST)
goto out;
leaf = path->nodes[0];
@@ -2817,3 +2832,96 @@ btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info)
btrfs_queue_work(fs_info->qgroup_rescan_workers,
&fs_info->qgroup_rescan_work);
}
+
+static struct relation_rec *
+qlist_to_relation_rec(struct btrfs_qgroup_list *qlist, struct list_head *all)
+{
+ u64 group, member;
+ struct relation_rec *rec;
+
+ BUILD_BUG_ON(sizeof(struct btrfs_qgroup_list) < sizeof(struct relation_rec));
+
+ list_del(&qlist->next_group);
+ list_del(&qlist->next_member);
+ group = qlist->group->qgroupid;
+ member = qlist->member->qgroupid;
+ rec = (struct relation_rec *)qlist;
+ rec->src = group;
+ rec->dst = member;
+
+ list_add(&rec->list, all);
+ return rec;
+}
+
+static int remove_qgroup_relations(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info, u64 qgroupid)
+{
+ int ret, err;
+ struct btrfs_root *quota_root = fs_info->quota_root;
+ struct relation_rec *rec;
+ struct btrfs_qgroup_list *qlist;
+ struct btrfs_qgroup *qgroup;
+ LIST_HEAD(relations);
+
+ spin_lock(&fs_info->qgroup_lock);
+ qgroup = find_qgroup_rb(fs_info, qgroupid);
+
+ while (!list_empty(&qgroup->groups)) {
+ qlist = list_first_entry(&qgroup->groups,
+ struct btrfs_qgroup_list, next_group);
+ rec = qlist_to_relation_rec(qlist, &relations);
+ }
+
+ while (!list_empty(&qgroup->members)) {
+ qlist = list_first_entry(&qgroup->members,
+ struct btrfs_qgroup_list, next_member);
+ rec = qlist_to_relation_rec(qlist, &relations);
+ }
+
+ spin_unlock(&fs_info->qgroup_lock);
+
+ ret = 0;
+ list_for_each_entry(rec, &relations, list) {
+ ret = del_qgroup_relation_item(trans, quota_root, rec->src, rec->dst);
+ err = del_qgroup_relation_item(trans, quota_root, rec->dst, rec->src);
+ if (err && !ret)
+ ret = err;
+ if (ret && ret != -ENOENT)
+ break;
+ ret = 0;
+ }
+
+ return ret;
+}
+
+int btrfs_del_qgroup_items(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root)
+{
+ int ret;
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_root *quota_root = fs_info->quota_root;
+ u64 qgroupid = root->root_key.objectid;
+
+ if (!fs_info->quota_enabled)
+ return 0;
+
+ mutex_lock(&fs_info->qgroup_ioctl_lock);
+
+ ret = remove_qgroup_relations(trans, fs_info, qgroupid);
+ if (ret)
+ goto out_unlock;
+
+ spin_lock(&fs_info->qgroup_lock);
+ del_qgroup_rb(quota_root->fs_info, qgroupid);
+ spin_unlock(&fs_info->qgroup_lock);
+
+ ret = del_qgroup_item(trans, quota_root, qgroupid);
+ if (ret && ret != -ENOENT)
+ goto out_unlock;
+
+ ret = 0;
+out_unlock:
+ mutex_unlock(&fs_info->qgroup_ioctl_lock);
+
+ return ret;
+}
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 18cc68c..b22a2ce 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -98,6 +98,9 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes);
void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes);
+int btrfs_del_qgroup_items(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root);
+
void assert_qgroups_uptodate(struct btrfs_trans_handle *trans);
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
--
1.8.4.5
next prev parent reply other threads:[~2014-07-17 19:40 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-07-17 19:38 [PATCH 0/5] btrfs: qgroup fixes for btrfs_drop_snapshot V5 Mark Fasheh
2014-07-17 19:39 ` [PATCH 1/5] btrfs: add trace for qgroup accounting Mark Fasheh
2014-07-17 19:39 ` [PATCH 2/5] btrfs: qgroup: account shared subtrees during snapshot delete Mark Fasheh
2014-08-12 18:22 ` Chris Mason
2014-08-12 18:32 ` Mark Fasheh
2014-08-12 18:36 ` Chris Mason
2014-08-12 19:01 ` Mark Fasheh
2014-08-12 19:08 ` Chris Mason
2014-07-17 19:39 ` [PATCH 3/5] Btrfs: __btrfs_mod_ref should always use no_quota Mark Fasheh
2014-07-17 19:39 ` Mark Fasheh [this message]
2014-07-17 19:39 ` [PATCH 5/5] btrfs: correctly handle return from ulist_add Mark Fasheh
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1405625944-4883-5-git-send-email-mfasheh@suse.de \
--to=mfasheh@suse.de \
--cc=clm@fb.com \
--cc=jbacik@fb.com \
--cc=linux-btrfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).