From: Kevin Wolf <kwolf@redhat.com>
To: Andrey Drobyshev <andrey.drobyshev@virtuozzo.com>
Cc: Fiona Ebner <f.ebner@proxmox.com>,
qemu-block@nongnu.org, qemu-devel@nongnu.org, den@virtuozzo.com,
hreitz@redhat.com, stefanha@redhat.com, eblake@redhat.com,
jsnow@redhat.com, vsementsov@yandex-team.ru,
xiechanglong.d@gmail.com, wencongyang2@huawei.com,
berto@igalia.com, fam@euphon.net, ari@tuxera.com
Subject: Re: [PATCH v2 03/24] block/snapshot: move drain outside of read-locked bdrv_snapshot_delete()
Date: Mon, 26 May 2025 11:10:37 +0200 [thread overview]
Message-ID: <aDQwDbJLqPYVxgCN@redhat.com> (raw)
In-Reply-To: <4fdff680-5e77-40f2-812b-70697ad8ae64@virtuozzo.com>
Am 23.05.2025 um 20:12 hat Andrey Drobyshev geschrieben:
> On 5/20/25 1:29 PM, Fiona Ebner wrote:
> > This is in preparation to mark bdrv_drained_begin() as GRAPH_UNLOCKED.
> >
> > More granular draining is not trivially possible, because
> > bdrv_snapshot_delete() can recursively call itself.
> >
> > The return value of bdrv_all_delete_snapshot() changes from -1 to
> > -errno propagated from failed sub-calls. This is fine for the existing
> > callers of bdrv_all_delete_snapshot().
> >
> > Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
> > ---
> >
> > Changes in v2:
> > * Use 'must be drained' instead of 'needs to be drained'.
> > * Use goto for cleanup/error handling in bdrv_all_delete_snapshot().
> > * Don't use atomics to access bs->quiesce_counter.
> >
> > block/snapshot.c | 26 +++++++++++++++-----------
> > blockdev.c | 25 +++++++++++++++++--------
> > qemu-img.c | 2 ++
> > 3 files changed, 34 insertions(+), 19 deletions(-)
> >
> > diff --git a/block/snapshot.c b/block/snapshot.c
> > index 22567f1fb9..9f300a78bd 100644
> > --- a/block/snapshot.c
> > +++ b/block/snapshot.c
> > @@ -327,7 +327,7 @@ int bdrv_snapshot_goto(BlockDriverState *bs,
> >
> > /**
> > * Delete an internal snapshot by @snapshot_id and @name.
> > - * @bs: block device used in the operation
> > + * @bs: block device used in the operation, must be drained
> > * @snapshot_id: unique snapshot ID, or NULL
> > * @name: snapshot name, or NULL
> > * @errp: location to store error
> > @@ -358,6 +358,8 @@ int bdrv_snapshot_delete(BlockDriverState *bs,
> >
> > GLOBAL_STATE_CODE();
> >
> > + assert(bs->quiesce_counter > 0);
> > +
> > if (!drv) {
> > error_setg(errp, "Device '%s' has no medium",
> > bdrv_get_device_name(bs));
> > @@ -368,9 +370,6 @@ int bdrv_snapshot_delete(BlockDriverState *bs,
> > return -EINVAL;
> > }
> >
> > - /* drain all pending i/o before deleting snapshot */
> > - bdrv_drained_begin(bs);
> > -
> > if (drv->bdrv_snapshot_delete) {
> > ret = drv->bdrv_snapshot_delete(bs, snapshot_id, name, errp);
> > } else if (fallback_bs) {
> > @@ -382,7 +381,6 @@ int bdrv_snapshot_delete(BlockDriverState *bs,
> > ret = -ENOTSUP;
> > }
> >
> > - bdrv_drained_end(bs);
> > return ret;
> > }
> >
> > @@ -571,19 +569,22 @@ int bdrv_all_delete_snapshot(const char *name,
> > ERRP_GUARD();
> > g_autoptr(GList) bdrvs = NULL;
> > GList *iterbdrvs;
> > + int ret = 0;
> >
> > GLOBAL_STATE_CODE();
> > - GRAPH_RDLOCK_GUARD_MAINLOOP();
> >
> > - if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) {
> > - return -1;
> > + bdrv_drain_all_begin();
> > + bdrv_graph_rdlock_main_loop();
> > +
> > + ret = bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp);
> > + if (ret < 0) {
> > + goto out;
> > }
> >
> > iterbdrvs = bdrvs;
> > while (iterbdrvs) {
> > BlockDriverState *bs = iterbdrvs->data;
> > QEMUSnapshotInfo sn1, *snapshot = &sn1;
> > - int ret = 0;
> >
> > if ((devices || bdrv_all_snapshots_includes_bs(bs)) &&
> > bdrv_snapshot_find(bs, snapshot, name) >= 0)
> > @@ -594,13 +595,16 @@ int bdrv_all_delete_snapshot(const char *name,
> > if (ret < 0) {
> > error_prepend(errp, "Could not delete snapshot '%s' on '%s': ",
> > name, bdrv_get_device_or_node_name(bs));
> > - return -1;
> > + goto out;
> > }
> >
> > iterbdrvs = iterbdrvs->next;
> > }
> >
> > - return 0;
> > +out:
> > + bdrv_graph_rdunlock_main_loop();
> > + bdrv_drain_all_end();
> > + return ret;
> > }
> >
> >
> > diff --git a/blockdev.c b/blockdev.c
> > index 21443b4514..3982f9776b 100644
> > --- a/blockdev.c
> > +++ b/blockdev.c
> > @@ -1132,39 +1132,41 @@ SnapshotInfo *qmp_blockdev_snapshot_delete_internal_sync(const char *device,
> > int ret;
> >
> > GLOBAL_STATE_CODE();
> > - GRAPH_RDLOCK_GUARD_MAINLOOP();
> > +
> > + bdrv_drain_all_begin();
> > + bdrv_graph_rdlock_main_loop();
> >
> > bs = qmp_get_root_bs(device, errp);
> > if (!bs) {
> > - return NULL;
> > + goto error;
> > }
> >
> > if (!id && !name) {
> > error_setg(errp, "Name or id must be provided");
> > - return NULL;
> > + goto error;
> > }
> >
> > if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_INTERNAL_SNAPSHOT_DELETE, errp)) {
> > - return NULL;
> > + goto error;
> > }
> >
> > ret = bdrv_snapshot_find_by_id_and_name(bs, id, name, &sn, &local_err);
> > if (local_err) {
> > error_propagate(errp, local_err);
> > - return NULL;
> > + goto error;
> > }
> > if (!ret) {
> > error_setg(errp,
> > "Snapshot with id '%s' and name '%s' does not exist on "
> > "device '%s'",
> > STR_OR_NULL(id), STR_OR_NULL(name), device);
> > - return NULL;
> > + goto error;
> > }
> >
> > bdrv_snapshot_delete(bs, id, name, &local_err);
> > if (local_err) {
> > error_propagate(errp, local_err);
> > - return NULL;
> > + goto error;
> > }
> >
> > info = g_new0(SnapshotInfo, 1);
> > @@ -1180,6 +1182,9 @@ SnapshotInfo *qmp_blockdev_snapshot_delete_internal_sync(const char *device,
> > info->has_icount = true;
> > }
> >
> > +error:
> > + bdrv_graph_rdunlock_main_loop();
> > + bdrv_drain_all_end();
> > return info;
> > }
> >
> > @@ -1295,12 +1300,14 @@ static void internal_snapshot_abort(void *opaque)
> > Error *local_error = NULL;
> >
> > GLOBAL_STATE_CODE();
> > - GRAPH_RDLOCK_GUARD_MAINLOOP();
> >
> > if (!state->created) {
> > return;
> > }
> >
> > + bdrv_drain_all_begin();
> > + bdrv_graph_rdlock_main_loop();
> > +
> > if (bdrv_snapshot_delete(bs, sn->id_str, sn->name, &local_error) < 0) {
> > error_reportf_err(local_error,
> > "Failed to delete snapshot with id '%s' and "
> > @@ -1308,6 +1315,8 @@ static void internal_snapshot_abort(void *opaque)
> > sn->id_str, sn->name,
> > bdrv_get_device_name(bs));
> > }
> > + bdrv_graph_rdunlock_main_loop();
> > + bdrv_drain_all_end();
> > }
> >
>
> Okay, I've got a very simple and naive question to ask. We've got this
> pattern recurring throughout the series:
>
> > GLOBAL_STATE_CODE();
> > bdrv_drain_all_begin();
> > bdrv_graph_rdlock_main_loop();
> >
> > ...
> >
> > bdrv_graph_rdunlock_main_loop();
> > bdrv_drain_all_end();
>
> bdrv_graph_rdlock_main_loop() doesn't actually take any locks, it
> asserts that we're running in the main thread and not in a coroutine.
> bdrv_graph_rdunlock_main_loop() does the same.
> GRAPH_RDLOCK_GUARD_MAINLOOP() does both those calls, in the beginning of
> a function and when leaving its scope, so essentially it also just does
> assert(qemu_in_main_thread() && !qemu_in_coroutine()).
>
> Therefore:
>
> 1. Is there any real benefit from using those
> {rdlock/rdunlock}_main_loop() constructions, or they're here due to
> historical reasons only?
It's the price we pay for the compiler to verify our locking rules.
> 2. Would it hurt if we only leave GRAPH_RDLOCK_GUARD_MAINLOOP() in all
> such occurrences? At least when it's obvious we can't get out of the
> main thread. That would simply deliver us from performing same checks
> several times, similar to what's done in commit 22/24 ("block/io: remove
> duplicate GLOBAL_STATE_CODE() in bdrv_do_drained_end()").
Once bdrv_drain_all_begin() is marked GRAPH_UNLOCKED, calling it after
GRAPH_RDLOCK_GUARD_MAINLOOP() would be wrong according to TSA rules
(which don't know anything about this being only a fake lock) and the
build would fail.
Kevin
next prev parent reply other threads:[~2025-05-26 9:11 UTC|newest]
Thread overview: 49+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-05-20 10:29 [PATCH v2 00/24] block: do not drain while holding the graph lock Fiona Ebner
2025-05-20 10:29 ` [PATCH v2 01/24] block: remove outdated comments about AioContext locking Fiona Ebner
2025-05-21 9:41 ` Kevin Wolf
2025-05-20 10:29 ` [PATCH v2 02/24] block: move drain outside of read-locked bdrv_reopen_queue_child() Fiona Ebner
2025-05-21 9:45 ` Kevin Wolf
2025-05-20 10:29 ` [PATCH v2 03/24] block/snapshot: move drain outside of read-locked bdrv_snapshot_delete() Fiona Ebner
2025-05-21 9:53 ` Kevin Wolf
2025-05-23 18:12 ` Andrey Drobyshev
2025-05-26 9:10 ` Kevin Wolf [this message]
2025-05-26 10:33 ` Fiona Ebner
2025-05-26 11:42 ` Kevin Wolf
2025-05-20 10:29 ` [PATCH v2 04/24] block: move drain outside of read-locked bdrv_inactivate_recurse() Fiona Ebner
2025-05-21 9:56 ` Kevin Wolf
2025-05-20 10:29 ` [PATCH v2 05/24] block: mark bdrv_parent_change_aio_context() GRAPH_RDLOCK Fiona Ebner
2025-05-21 10:04 ` Kevin Wolf
2025-05-20 10:29 ` [PATCH v2 06/24] block: mark change_aio_ctx() callback and instances as GRAPH_RDLOCK(_PTR) Fiona Ebner
2025-05-20 15:47 ` Eric Blake
2025-05-21 15:23 ` Kevin Wolf
2025-05-20 10:29 ` [PATCH v2 07/24] block: mark bdrv_child_change_aio_context() GRAPH_RDLOCK Fiona Ebner
2025-05-20 15:48 ` Eric Blake
2025-05-21 15:28 ` Kevin Wolf
2025-05-23 17:20 ` Andrey Drobyshev
2025-05-26 9:05 ` Kevin Wolf
2025-05-20 10:29 ` [PATCH v2 08/24] block: move drain outside of bdrv_change_aio_context() and mark GRAPH_RDLOCK Fiona Ebner
2025-05-21 16:05 ` Kevin Wolf
2025-05-22 9:09 ` Fiona Ebner
2025-05-22 12:05 ` Kevin Wolf
2025-05-20 10:29 ` [PATCH v2 09/24] block: move drain outside of bdrv_try_change_aio_context() Fiona Ebner
2025-05-21 16:36 ` Kevin Wolf
2025-05-22 9:56 ` Fiona Ebner
2025-05-22 12:36 ` Kevin Wolf
2025-05-20 10:29 ` [PATCH v2 10/24] block: move drain outside of bdrv_attach_child_common(_abort)() Fiona Ebner
2025-05-20 10:29 ` [PATCH v2 11/24] block: move drain outside of bdrv_set_backing_hd_drained() Fiona Ebner
2025-05-20 10:30 ` [PATCH v2 12/24] block: move drain outside of bdrv_root_attach_child() Fiona Ebner
2025-05-20 10:30 ` [PATCH v2 13/24] block: move drain outside of bdrv_attach_child() Fiona Ebner
2025-05-20 10:30 ` [PATCH v2 14/24] block: move drain outside of quorum_add_child() Fiona Ebner
2025-05-20 10:30 ` [PATCH v2 15/24] block: move drain outside of bdrv_root_unref_child() Fiona Ebner
2025-05-20 10:30 ` [PATCH v2 16/24] block: move drain outside of quorum_del_child() Fiona Ebner
2025-05-20 10:30 ` [PATCH v2 17/24] blockdev: drain while unlocked in internal_snapshot_action() Fiona Ebner
2025-05-20 10:30 ` [PATCH v2 18/24] blockdev: drain while unlocked in external_snapshot_action() Fiona Ebner
2025-05-20 10:30 ` [PATCH v2 19/24] block: mark bdrv_drained_begin() as GRAPH_UNLOCKED Fiona Ebner
2025-05-23 17:20 ` Andrey Drobyshev
2025-05-20 10:30 ` [PATCH v2 20/24] iotests/graph-changes-while-io: remove image file after test Fiona Ebner
2025-05-20 10:30 ` [PATCH v2 21/24] iotests/graph-changes-while-io: add test case with removal of lower snapshot Fiona Ebner
2025-05-20 10:30 ` [PATCH v2 22/24] block/io: remove duplicate GLOBAL_STATE_CODE() in bdrv_do_drained_end() Fiona Ebner
2025-05-20 10:30 ` [PATCH v2 23/24] block: never use atomics to access bs->quiesce_counter Fiona Ebner
2025-05-20 10:30 ` [PATCH v2 24/24] block: add bdrv_graph_wrlock_drained() convenience wrapper Fiona Ebner
2025-05-20 15:54 ` Eric Blake
2025-05-20 15:44 ` [PATCH v2 00/24] block: do not drain while holding the graph lock Eric Blake
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=aDQwDbJLqPYVxgCN@redhat.com \
--to=kwolf@redhat.com \
--cc=andrey.drobyshev@virtuozzo.com \
--cc=ari@tuxera.com \
--cc=berto@igalia.com \
--cc=den@virtuozzo.com \
--cc=eblake@redhat.com \
--cc=f.ebner@proxmox.com \
--cc=fam@euphon.net \
--cc=hreitz@redhat.com \
--cc=jsnow@redhat.com \
--cc=qemu-block@nongnu.org \
--cc=qemu-devel@nongnu.org \
--cc=stefanha@redhat.com \
--cc=vsementsov@yandex-team.ru \
--cc=wencongyang2@huawei.com \
--cc=xiechanglong.d@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.