* [PATCH v4 1/2] btrfs: slightly loose the requirement for qgroup removal
2024-05-21 3:03 [PATCH v4 0/2] btrfs: qgroup: stale qgroups related impromvents Qu Wenruo
@ 2024-05-21 3:03 ` Qu Wenruo
2024-05-21 3:03 ` [PATCH v4 2/2] btrfs: automatically remove the subvolume qgroup Qu Wenruo
1 sibling, 0 replies; 3+ messages in thread
From: Qu Wenruo @ 2024-05-21 3:03 UTC (permalink / raw)
To: linux-btrfs; +Cc: Boris Burkov
[BUG]
Currently if one is utilizing "qgroups/drop_subtree_threshold" sysfs,
and a snapshot with level higher than that value is dropped, btrfs will
not be able to delete the qgroup until next qgroup rescan:
uuid=ffffffff-eeee-dddd-cccc-000000000000
wipefs -fa $dev
mkfs.btrfs -f $dev -O quota -s 4k -n 4k -U $uuid
mount $dev $mnt
btrfs subvolume create $mnt/subv1/
for (( i = 0; i < 1024; i++ )); do
xfs_io -f -c "pwrite 0 2k" $mnt/subv1/file_$i > /dev/null
done
sync
btrfs subv snapshot $mnt/subv1 $mnt/snapshot
btrfs quota enable $mnt
btrfs quota rescan -w $mnt
sync
echo 1 > /sys/fs/btrfs/$uuid/qgroups/drop_subtree_threshold
btrfs subvolume delete $mnt/snapshot
btrfs subv sync $mnt
btrfs qgroup show -prce --sync $mnt
btrfs qgroup destroy 0/257 $mnt
umount $mnt
The final qgroup removal would fail with the following error:
ERROR: unable to destroy quota group: Device or resource busy
[CAUSE]
The above script would generate a subvolume of level 2, then snapshot
it, enable qgroup, set the drop_subtree_threshold, then drop the
snapshot.
Since the subvolume drop would meet the threshold, qgroup would be
marked inconsistent and skip accounting to avoid hanging the system at
transaction commit.
But currently we do not allow a qgroup with any rfer/excl numbers to be
dropped, and this is not really compatible with the new
drop_subtree_threshold behavior.
[FIX]
Only require the strong zero rfer/excl/rfer_cmpr/excl_cmpr for squota
mode.
This is due to the fact that squota can never go inconsistent, and it
can have dropped subvolume but with non-zero qgroup numbers for future
accounting.
For full qgroup mode, we only check if there is a subvolume for it.
Reviewed-by: Boris Burkov <boris@bur.io>
Signed-off-by: Qu Wenruo <wqu@suse.com>
---
fs/btrfs/qgroup.c | 91 +++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 84 insertions(+), 7 deletions(-)
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index fc2a7ea26354..9b4de1b43298 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1748,13 +1748,57 @@ int btrfs_create_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
return ret;
}
-static bool qgroup_has_usage(struct btrfs_qgroup *qgroup)
+/*
+ * Return 0 if we can not delete the qgroup (not empty or has children etc).
+ * Return >0 if we can delete the qgroup.
+ * Return <0 for other errors during tree search.
+ */
+static int can_delete_qgroup(struct btrfs_fs_info *fs_info,
+ struct btrfs_qgroup *qgroup)
{
- return (qgroup->rfer > 0 || qgroup->rfer_cmpr > 0 ||
- qgroup->excl > 0 || qgroup->excl_cmpr > 0 ||
- qgroup->rsv.values[BTRFS_QGROUP_RSV_DATA] > 0 ||
- qgroup->rsv.values[BTRFS_QGROUP_RSV_META_PREALLOC] > 0 ||
- qgroup->rsv.values[BTRFS_QGROUP_RSV_META_PERTRANS] > 0);
+ struct btrfs_key key;
+ struct btrfs_path *path;
+ int ret;
+
+ /*
+ * Squota would never be inconsistent, but there can still be
+ * case where a dropped subvolume still has qgroup numbers, and
+ * squota relies on such qgroup for future accounting.
+ *
+ * So for squota, do not allow dropping any non-zero qgroup.
+ */
+ if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_SIMPLE &&
+ (qgroup->rfer || qgroup->excl || qgroup->excl_cmpr ||
+ qgroup->rfer_cmpr))
+ return 0;
+
+ /* For higher level qgroup, we can only delete it if it has no child. */
+ if (btrfs_qgroup_level(qgroup->qgroupid)) {
+ if (!list_empty(&qgroup->members))
+ return 0;
+ return 1;
+ }
+
+ /*
+ * For level-0 qgroups, we can only delete it if it has no subvolume
+ * for it.
+ * This means even a subvolume is unlinked but not yet fully dropped,
+ * we can not delete the qgroup.
+ */
+ key.objectid = qgroup->qgroupid;
+ key.type = BTRFS_ROOT_ITEM_KEY;
+ key.offset = -1ULL;
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ ret = btrfs_find_root(fs_info->tree_root, &key, path, NULL, NULL);
+ btrfs_free_path(path);
+ /*
+ * The @ret from btrfs_find_root() exactly matches our definition for
+ * the return value, thus can be returned directly.
+ */
+ return ret;
}
int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
@@ -1776,7 +1820,10 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
goto out;
}
- if (is_fstree(qgroupid) && qgroup_has_usage(qgroup)) {
+ ret = can_delete_qgroup(fs_info, qgroup);
+ if (ret < 0)
+ goto out;
+ if (ret == 0) {
ret = -EBUSY;
goto out;
}
@@ -1801,6 +1848,36 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
}
spin_lock(&fs_info->qgroup_lock);
+ /*
+ * Warn on reserved space. The subvolume should has no child nor
+ * corresponding subvolume.
+ * Thus its reserved space should all be zero, no matter if qgroup
+ * is consistent or the mode.
+ */
+ WARN_ON(qgroup->rsv.values[BTRFS_QGROUP_RSV_DATA] ||
+ qgroup->rsv.values[BTRFS_QGROUP_RSV_META_PREALLOC] ||
+ qgroup->rsv.values[BTRFS_QGROUP_RSV_META_PERTRANS]);
+ /*
+ * The same for rfer/excl numbers, but that's only if our qgroup is
+ * consistent and if it's in regular qgroup mode.
+ * For simple mode it's not as accurate thus we can hit non-zero values
+ * very frequently.
+ */
+ if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_FULL &&
+ !(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT)) {
+ if (WARN_ON(qgroup->rfer || qgroup->excl ||
+ qgroup->rfer_cmpr || qgroup->excl_cmpr)) {
+ btrfs_warn_rl(fs_info,
+"to be deleted qgroup %u/%llu has non-zero numbers, rfer %llu rfer_cmpr %llu excl %llu excl_cmpr %llu",
+ btrfs_qgroup_level(qgroup->qgroupid),
+ btrfs_qgroup_subvolid(qgroup->qgroupid),
+ qgroup->rfer,
+ qgroup->rfer_cmpr,
+ qgroup->excl,
+ qgroup->excl_cmpr);
+ qgroup_mark_inconsistent(fs_info);
+ }
+ }
del_qgroup_rb(fs_info, qgroupid);
spin_unlock(&fs_info->qgroup_lock);
--
2.45.1
^ permalink raw reply related [flat|nested] 3+ messages in thread* [PATCH v4 2/2] btrfs: automatically remove the subvolume qgroup
2024-05-21 3:03 [PATCH v4 0/2] btrfs: qgroup: stale qgroups related impromvents Qu Wenruo
2024-05-21 3:03 ` [PATCH v4 1/2] btrfs: slightly loose the requirement for qgroup removal Qu Wenruo
@ 2024-05-21 3:03 ` Qu Wenruo
1 sibling, 0 replies; 3+ messages in thread
From: Qu Wenruo @ 2024-05-21 3:03 UTC (permalink / raw)
To: linux-btrfs; +Cc: Boris Burkov
Currently if we fully removed a subvolume (not only unlinked, but fully
dropped its root item), its qgroup would not be removed.
Thus we have "btrfs qgroup clear-stale" to handle such 0 level qgroups.
This patch changes the behavior by automatically removing the qgroup of
a fully dropped subvolume when possible:
- Full qgroup but still consistent
We can and should remove the qgroup.
The qgroup numbers should be 0, without any rsv.
- Full qgroup but inconsistent
Can happen with drop_subtree_threshold feature (skip accounting
and mark qgroup inconsistent).
We can and should remove the qgroup.
Higher level qgroup numbers will be incorrect, but since qgroup
is already inconsistent, it should not be a problem.
- Squota mode
This is the special case, we can only drop the qgroup if its numbers
are all 0.
This would be handled by can_delete_qgroup(), so we only need to check
the return value and ignore the -EBUSY error.
Reviewed-by: Boris Burkov <boris@bur.io>
Link: https://bugzilla.suse.com/show_bug.cgi?id=1222847
Signed-off-by: Qu Wenruo <wqu@suse.com>
---
fs/btrfs/extent-tree.c | 8 ++++++++
fs/btrfs/qgroup.c | 38 ++++++++++++++++++++++++++++++++++++++
fs/btrfs/qgroup.h | 2 ++
3 files changed, 48 insertions(+)
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 3774c191e36d..32f03c0a7b9e 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -5833,6 +5833,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
struct btrfs_root_item *root_item = &root->root_item;
struct walk_control *wc;
struct btrfs_key key;
+ const u64 rootid = btrfs_root_id(root);
int err = 0;
int ret;
int level;
@@ -6063,6 +6064,13 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
kfree(wc);
btrfs_free_path(path);
out:
+ if (!err && root_dropped) {
+ ret = btrfs_qgroup_cleanup_dropped_subvolume(fs_info, rootid);
+ if (ret < 0)
+ btrfs_warn_rl(fs_info,
+ "failed to cleanup qgroup 0/%llu: %d",
+ rootid, ret);
+ }
/*
* We were an unfinished drop root, check to see if there are any
* pending, and if not clear and wake up any waiters.
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 9b4de1b43298..826d972a7c75 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1893,6 +1893,44 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
return ret;
}
+int btrfs_qgroup_cleanup_dropped_subvolume(struct btrfs_fs_info *fs_info,
+ u64 subvolid)
+{
+ struct btrfs_trans_handle *trans;
+ int ret;
+
+ if (!is_fstree(subvolid) || !btrfs_qgroup_enabled(fs_info) ||
+ !fs_info->quota_root)
+ return 0;
+
+ /*
+ * Commit current transaction to make sure all the rfer/excl numbers
+ * get updated.
+ */
+ trans = btrfs_start_transaction(fs_info->quota_root, 0);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
+
+ ret = btrfs_commit_transaction(trans);
+ if (ret < 0)
+ return ret;
+
+ /* Start new trans to delete the qgroup info and limit items. */
+ trans = btrfs_start_transaction(fs_info->quota_root, 2);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
+ ret = btrfs_remove_qgroup(trans, subvolid);
+ btrfs_end_transaction(trans);
+ /*
+ * It's squota and the subvolume still has numbers needed
+ * for future accounting, in this case we can not delete.
+ * Just skip it.
+ */
+ if (ret == -EBUSY)
+ ret = 0;
+ return ret;
+}
+
int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid,
struct btrfs_qgroup_limit *limit)
{
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 706640be0ec2..3f93856a02e1 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -327,6 +327,8 @@ int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
u64 dst);
int btrfs_create_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid);
int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid);
+int btrfs_qgroup_cleanup_dropped_subvolume(struct btrfs_fs_info *fs_info,
+ u64 subvolid);
int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid,
struct btrfs_qgroup_limit *limit);
int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info);
--
2.45.1
^ permalink raw reply related [flat|nested] 3+ messages in thread