From: Kevin Wolf <kwolf@redhat.com>
To: Andrey Drobyshev <andrey.drobyshev@virtuozzo.com>
Cc: Fiona Ebner <f.ebner@proxmox.com>,
qemu-block@nongnu.org, qemu-devel@nongnu.org, den@virtuozzo.com,
hreitz@redhat.com, stefanha@redhat.com, eblake@redhat.com,
jsnow@redhat.com, vsementsov@yandex-team.ru,
xiechanglong.d@gmail.com, wencongyang2@huawei.com,
berto@igalia.com, fam@euphon.net, ari@tuxera.com
Subject: Re: [PATCH v2 03/24] block/snapshot: move drain outside of read-locked bdrv_snapshot_delete()
Date: Mon, 26 May 2025 11:10:37 +0200 [thread overview]
Message-ID: <aDQwDbJLqPYVxgCN@redhat.com> (raw)
In-Reply-To: <4fdff680-5e77-40f2-812b-70697ad8ae64@virtuozzo.com>
Am 23.05.2025 um 20:12 hat Andrey Drobyshev geschrieben:
> On 5/20/25 1:29 PM, Fiona Ebner wrote:
> > This is in preparation to mark bdrv_drained_begin() as GRAPH_UNLOCKED.
> >
> > More granular draining is not trivially possible, because
> > bdrv_snapshot_delete() can recursively call itself.
> >
> > The return value of bdrv_all_delete_snapshot() changes from -1 to
> > -errno propagated from failed sub-calls. This is fine for the existing
> > callers of bdrv_all_delete_snapshot().
> >
> > Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
> > ---
> >
> > Changes in v2:
> > * Use 'must be drained' instead of 'needs to be drained'.
> > * Use goto for cleanup/error handling in bdrv_all_delete_snapshot().
> > * Don't use atomics to access bs->quiesce_counter.
> >
> > block/snapshot.c | 26 +++++++++++++++-----------
> > blockdev.c | 25 +++++++++++++++++--------
> > qemu-img.c | 2 ++
> > 3 files changed, 34 insertions(+), 19 deletions(-)
> >
> > diff --git a/block/snapshot.c b/block/snapshot.c
> > index 22567f1fb9..9f300a78bd 100644
> > --- a/block/snapshot.c
> > +++ b/block/snapshot.c
> > @@ -327,7 +327,7 @@ int bdrv_snapshot_goto(BlockDriverState *bs,
> >
> > /**
> > * Delete an internal snapshot by @snapshot_id and @name.
> > - * @bs: block device used in the operation
> > + * @bs: block device used in the operation, must be drained
> > * @snapshot_id: unique snapshot ID, or NULL
> > * @name: snapshot name, or NULL
> > * @errp: location to store error
> > @@ -358,6 +358,8 @@ int bdrv_snapshot_delete(BlockDriverState *bs,
> >
> > GLOBAL_STATE_CODE();
> >
> > + assert(bs->quiesce_counter > 0);
> > +
> > if (!drv) {
> > error_setg(errp, "Device '%s' has no medium",
> > bdrv_get_device_name(bs));
> > @@ -368,9 +370,6 @@ int bdrv_snapshot_delete(BlockDriverState *bs,
> > return -EINVAL;
> > }
> >
> > - /* drain all pending i/o before deleting snapshot */
> > - bdrv_drained_begin(bs);
> > -
> > if (drv->bdrv_snapshot_delete) {
> > ret = drv->bdrv_snapshot_delete(bs, snapshot_id, name, errp);
> > } else if (fallback_bs) {
> > @@ -382,7 +381,6 @@ int bdrv_snapshot_delete(BlockDriverState *bs,
> > ret = -ENOTSUP;
> > }
> >
> > - bdrv_drained_end(bs);
> > return ret;
> > }
> >
> > @@ -571,19 +569,22 @@ int bdrv_all_delete_snapshot(const char *name,
> > ERRP_GUARD();
> > g_autoptr(GList) bdrvs = NULL;
> > GList *iterbdrvs;
> > + int ret = 0;
> >
> > GLOBAL_STATE_CODE();
> > - GRAPH_RDLOCK_GUARD_MAINLOOP();
> >
> > - if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) {
> > - return -1;
> > + bdrv_drain_all_begin();
> > + bdrv_graph_rdlock_main_loop();
> > +
> > + ret = bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp);
> > + if (ret < 0) {
> > + goto out;
> > }
> >
> > iterbdrvs = bdrvs;
> > while (iterbdrvs) {
> > BlockDriverState *bs = iterbdrvs->data;
> > QEMUSnapshotInfo sn1, *snapshot = &sn1;
> > - int ret = 0;
> >
> > if ((devices || bdrv_all_snapshots_includes_bs(bs)) &&
> > bdrv_snapshot_find(bs, snapshot, name) >= 0)
> > @@ -594,13 +595,16 @@ int bdrv_all_delete_snapshot(const char *name,
> > if (ret < 0) {
> > error_prepend(errp, "Could not delete snapshot '%s' on '%s': ",
> > name, bdrv_get_device_or_node_name(bs));
> > - return -1;
> > + goto out;
> > }
> >
> > iterbdrvs = iterbdrvs->next;
> > }
> >
> > - return 0;
> > +out:
> > + bdrv_graph_rdunlock_main_loop();
> > + bdrv_drain_all_end();
> > + return ret;
> > }
> >
> >
> > diff --git a/blockdev.c b/blockdev.c
> > index 21443b4514..3982f9776b 100644
> > --- a/blockdev.c
> > +++ b/blockdev.c
> > @@ -1132,39 +1132,41 @@ SnapshotInfo *qmp_blockdev_snapshot_delete_internal_sync(const char *device,
> > int ret;
> >
> > GLOBAL_STATE_CODE();
> > - GRAPH_RDLOCK_GUARD_MAINLOOP();
> > +
> > + bdrv_drain_all_begin();
> > + bdrv_graph_rdlock_main_loop();
> >
> > bs = qmp_get_root_bs(device, errp);
> > if (!bs) {
> > - return NULL;
> > + goto error;
> > }
> >
> > if (!id && !name) {
> > error_setg(errp, "Name or id must be provided");
> > - return NULL;
> > + goto error;
> > }
> >
> > if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_INTERNAL_SNAPSHOT_DELETE, errp)) {
> > - return NULL;
> > + goto error;
> > }
> >
> > ret = bdrv_snapshot_find_by_id_and_name(bs, id, name, &sn, &local_err);
> > if (local_err) {
> > error_propagate(errp, local_err);
> > - return NULL;
> > + goto error;
> > }
> > if (!ret) {
> > error_setg(errp,
> > "Snapshot with id '%s' and name '%s' does not exist on "
> > "device '%s'",
> > STR_OR_NULL(id), STR_OR_NULL(name), device);
> > - return NULL;
> > + goto error;
> > }
> >
> > bdrv_snapshot_delete(bs, id, name, &local_err);
> > if (local_err) {
> > error_propagate(errp, local_err);
> > - return NULL;
> > + goto error;
> > }
> >
> > info = g_new0(SnapshotInfo, 1);
> > @@ -1180,6 +1182,9 @@ SnapshotInfo *qmp_blockdev_snapshot_delete_internal_sync(const char *device,
> > info->has_icount = true;
> > }
> >
> > +error:
> > + bdrv_graph_rdunlock_main_loop();
> > + bdrv_drain_all_end();
> > return info;
> > }
> >
> > @@ -1295,12 +1300,14 @@ static void internal_snapshot_abort(void *opaque)
> > Error *local_error = NULL;
> >
> > GLOBAL_STATE_CODE();
> > - GRAPH_RDLOCK_GUARD_MAINLOOP();
> >
> > if (!state->created) {
> > return;
> > }
> >
> > + bdrv_drain_all_begin();
> > + bdrv_graph_rdlock_main_loop();
> > +
> > if (bdrv_snapshot_delete(bs, sn->id_str, sn->name, &local_error) < 0) {
> > error_reportf_err(local_error,
> > "Failed to delete snapshot with id '%s' and "
> > @@ -1308,6 +1315,8 @@ static void internal_snapshot_abort(void *opaque)
> > sn->id_str, sn->name,
> > bdrv_get_device_name(bs));
> > }
> > + bdrv_graph_rdunlock_main_loop();
> > + bdrv_drain_all_end();
> > }
> >
>
> Okay, I've got a very simple and naive question to ask. We've got this
> pattern recurring throughout the series:
>
> > GLOBAL_STATE_CODE();
> > bdrv_drain_all_begin();
> > bdrv_graph_rdlock_main_loop();
> >
> > ...
> >
> > bdrv_graph_rdunlock_main_loop();
> > bdrv_drain_all_end();
>
> bdrv_graph_rdlock_main_loop() doesn't actually take any locks, it
> asserts that we're running in the main thread and not in a coroutine.
> bdrv_graph_rdunlock_main_loop() does the same.
> GRAPH_RDLOCK_GUARD_MAINLOOP() does both those calls, in the beginning of
> a function and when leaving its scope, so essentially it also just does
> assert(qemu_in_main_thread() && !qemu_in_coroutine()).
>
> Therefore:
>
> 1. Is there any real benefit from using those
> {rdlock/rdunlock}_main_loop() constructions, or they're here due to
> historical reasons only?
It's the price we pay for the compiler to verify our locking rules.
> 2. Would it hurt if we only leave GRAPH_RDLOCK_GUARD_MAINLOOP() in all
> such occurrences? At least when it's obvious we can't get out of the
> main thread. That would simply deliver us from performing same checks
> several times, similar to what's done in commit 22/24 ("block/io: remove
> duplicate GLOBAL_STATE_CODE() in bdrv_do_drained_end()").
Once bdrv_drain_all_begin() is marked GRAPH_UNLOCKED, calling it after
GRAPH_RDLOCK_GUARD_MAINLOOP() would be wrong according to TSA rules
(which don't know anything about this being only a fake lock) and the
build would fail.
Kevin
next prev parent reply other threads:[~2025-05-26 9:11 UTC|newest]
Thread overview: 49+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-05-20 10:29 [PATCH v2 00/24] block: do not drain while holding the graph lock Fiona Ebner
2025-05-20 10:29 ` [PATCH v2 01/24] block: remove outdated comments about AioContext locking Fiona Ebner
2025-05-21 9:41 ` Kevin Wolf
2025-05-20 10:29 ` [PATCH v2 02/24] block: move drain outside of read-locked bdrv_reopen_queue_child() Fiona Ebner
2025-05-21 9:45 ` Kevin Wolf
2025-05-20 10:29 ` [PATCH v2 03/24] block/snapshot: move drain outside of read-locked bdrv_snapshot_delete() Fiona Ebner
2025-05-21 9:53 ` Kevin Wolf
2025-05-23 18:12 ` Andrey Drobyshev
2025-05-26 9:10 ` Kevin Wolf [this message]
2025-05-26 10:33 ` Fiona Ebner
2025-05-26 11:42 ` Kevin Wolf
2025-05-20 10:29 ` [PATCH v2 04/24] block: move drain outside of read-locked bdrv_inactivate_recurse() Fiona Ebner
2025-05-21 9:56 ` Kevin Wolf
2025-05-20 10:29 ` [PATCH v2 05/24] block: mark bdrv_parent_change_aio_context() GRAPH_RDLOCK Fiona Ebner
2025-05-21 10:04 ` Kevin Wolf
2025-05-20 10:29 ` [PATCH v2 06/24] block: mark change_aio_ctx() callback and instances as GRAPH_RDLOCK(_PTR) Fiona Ebner
2025-05-20 15:47 ` Eric Blake
2025-05-21 15:23 ` Kevin Wolf
2025-05-20 10:29 ` [PATCH v2 07/24] block: mark bdrv_child_change_aio_context() GRAPH_RDLOCK Fiona Ebner
2025-05-20 15:48 ` Eric Blake
2025-05-21 15:28 ` Kevin Wolf
2025-05-23 17:20 ` Andrey Drobyshev
2025-05-26 9:05 ` Kevin Wolf
2025-05-20 10:29 ` [PATCH v2 08/24] block: move drain outside of bdrv_change_aio_context() and mark GRAPH_RDLOCK Fiona Ebner
2025-05-21 16:05 ` Kevin Wolf
2025-05-22 9:09 ` Fiona Ebner
2025-05-22 12:05 ` Kevin Wolf
2025-05-20 10:29 ` [PATCH v2 09/24] block: move drain outside of bdrv_try_change_aio_context() Fiona Ebner
2025-05-21 16:36 ` Kevin Wolf
2025-05-22 9:56 ` Fiona Ebner
2025-05-22 12:36 ` Kevin Wolf
2025-05-20 10:29 ` [PATCH v2 10/24] block: move drain outside of bdrv_attach_child_common(_abort)() Fiona Ebner
2025-05-20 10:29 ` [PATCH v2 11/24] block: move drain outside of bdrv_set_backing_hd_drained() Fiona Ebner
2025-05-20 10:30 ` [PATCH v2 12/24] block: move drain outside of bdrv_root_attach_child() Fiona Ebner
2025-05-20 10:30 ` [PATCH v2 13/24] block: move drain outside of bdrv_attach_child() Fiona Ebner
2025-05-20 10:30 ` [PATCH v2 14/24] block: move drain outside of quorum_add_child() Fiona Ebner
2025-05-20 10:30 ` [PATCH v2 15/24] block: move drain outside of bdrv_root_unref_child() Fiona Ebner
2025-05-20 10:30 ` [PATCH v2 16/24] block: move drain outside of quorum_del_child() Fiona Ebner
2025-05-20 10:30 ` [PATCH v2 17/24] blockdev: drain while unlocked in internal_snapshot_action() Fiona Ebner
2025-05-20 10:30 ` [PATCH v2 18/24] blockdev: drain while unlocked in external_snapshot_action() Fiona Ebner
2025-05-20 10:30 ` [PATCH v2 19/24] block: mark bdrv_drained_begin() as GRAPH_UNLOCKED Fiona Ebner
2025-05-23 17:20 ` Andrey Drobyshev
2025-05-20 10:30 ` [PATCH v2 20/24] iotests/graph-changes-while-io: remove image file after test Fiona Ebner
2025-05-20 10:30 ` [PATCH v2 21/24] iotests/graph-changes-while-io: add test case with removal of lower snapshot Fiona Ebner
2025-05-20 10:30 ` [PATCH v2 22/24] block/io: remove duplicate GLOBAL_STATE_CODE() in bdrv_do_drained_end() Fiona Ebner
2025-05-20 10:30 ` [PATCH v2 23/24] block: never use atomics to access bs->quiesce_counter Fiona Ebner
2025-05-20 10:30 ` [PATCH v2 24/24] block: add bdrv_graph_wrlock_drained() convenience wrapper Fiona Ebner
2025-05-20 15:54 ` Eric Blake
2025-05-20 15:44 ` [PATCH v2 00/24] block: do not drain while holding the graph lock Eric Blake
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=aDQwDbJLqPYVxgCN@redhat.com \
--to=kwolf@redhat.com \
--cc=andrey.drobyshev@virtuozzo.com \
--cc=ari@tuxera.com \
--cc=berto@igalia.com \
--cc=den@virtuozzo.com \
--cc=eblake@redhat.com \
--cc=f.ebner@proxmox.com \
--cc=fam@euphon.net \
--cc=hreitz@redhat.com \
--cc=jsnow@redhat.com \
--cc=qemu-block@nongnu.org \
--cc=qemu-devel@nongnu.org \
--cc=stefanha@redhat.com \
--cc=vsementsov@yandex-team.ru \
--cc=wencongyang2@huawei.com \
--cc=xiechanglong.d@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).