All of lore.kernel.org
 help / color / mirror / Atom feed
From: Kevin Wolf <kwolf@redhat.com>
To: qemu-block@nongnu.org
Cc: kwolf@redhat.com, stefanha@redhat.com, eblake@redhat.com,
	eesposit@redhat.com, pbonzini@redhat.com,
	vsementsov@yandex-team.ru, qemu-devel@nongnu.org
Subject: [PATCH v2 03/21] preallocate: Don't poll during permission updates
Date: Mon, 11 Sep 2023 11:46:02 +0200	[thread overview]
Message-ID: <20230911094620.45040-4-kwolf@redhat.com> (raw)
In-Reply-To: <20230911094620.45040-1-kwolf@redhat.com>

When the permission related BlockDriver callbacks are called, we are in
the middle of an operation traversing the block graph. Polling in such a
place is a very bad idea because the graph could change in unexpected
ways. In the future, callers will also hold the graph lock, which is
likely to turn polling into a deadlock.

So we need to get rid of calls to functions like bdrv_getlength() or
bdrv_truncate() there as these functions poll internally. They are
currently used so that when no parent has write/resize permissions on
the image any more, the preallocate filter drops the extra preallocated
area in the image file and gives up write/resize permissions itself.

In order to achieve this without polling in .bdrv_check_perm, don't
immediately truncate the image, but only schedule a BH to do so. The
filter keeps the write/resize permissions a bit longer now until the BH
has executed.

There is one case in which delaying doesn't work: Reopening the image
read-only. In this case, bs->file will likely be reopened read-only,
too, so keeping write permissions a bit longer on it doesn't work. But
we can already cover this case in preallocate_reopen_prepare() and not
rely on the permission updates for it.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 block/preallocate.c | 89 +++++++++++++++++++++++++++++++++++----------
 1 file changed, 69 insertions(+), 20 deletions(-)

diff --git a/block/preallocate.c b/block/preallocate.c
index 3173d80534..bfb638d8b1 100644
--- a/block/preallocate.c
+++ b/block/preallocate.c
@@ -75,8 +75,14 @@ typedef struct BDRVPreallocateState {
      * be invalid (< 0) when we don't have both exclusive BLK_PERM_RESIZE and
      * BLK_PERM_WRITE permissions on file child.
      */
+
+    /* Gives up the resize permission on children when parents don't need it */
+    QEMUBH *drop_resize_bh;
 } BDRVPreallocateState;
 
+static int preallocate_drop_resize(BlockDriverState *bs, Error **errp);
+static void preallocate_drop_resize_bh(void *opaque);
+
 #define PREALLOCATE_OPT_PREALLOC_ALIGN "prealloc-align"
 #define PREALLOCATE_OPT_PREALLOC_SIZE "prealloc-size"
 static QemuOptsList runtime_opts = {
@@ -142,6 +148,7 @@ static int preallocate_open(BlockDriverState *bs, QDict *options, int flags,
      * For this to work, mark them invalid.
      */
     s->file_end = s->zero_start = s->data_end = -EINVAL;
+    s->drop_resize_bh = qemu_bh_new(preallocate_drop_resize_bh, bs);
 
     ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
     if (ret < 0) {
@@ -193,6 +200,9 @@ static void preallocate_close(BlockDriverState *bs)
 {
     BDRVPreallocateState *s = bs->opaque;
 
+    qemu_bh_cancel(s->drop_resize_bh);
+    qemu_bh_delete(s->drop_resize_bh);
+
     if (s->data_end >= 0) {
         preallocate_truncate_to_real_size(bs, NULL);
     }
@@ -211,6 +221,7 @@ static int preallocate_reopen_prepare(BDRVReopenState *reopen_state,
                                       BlockReopenQueue *queue, Error **errp)
 {
     PreallocateOpts *opts = g_new0(PreallocateOpts, 1);
+    int ret;
 
     if (!preallocate_absorb_opts(opts, reopen_state->options,
                                  reopen_state->bs->file->bs, errp)) {
@@ -218,6 +229,19 @@ static int preallocate_reopen_prepare(BDRVReopenState *reopen_state,
         return -EINVAL;
     }
 
+    /*
+     * Drop the preallocation already here if reopening read-only. The child
+     * might also be reopened read-only and then scheduling a BH during the
+     * permission update is too late.
+     */
+    if ((reopen_state->flags & BDRV_O_RDWR) == 0) {
+        ret = preallocate_drop_resize(reopen_state->bs, errp);
+        if (ret < 0) {
+            g_free(opts);
+            return ret;
+        }
+    }
+
     reopen_state->opaque = opts;
 
     return 0;
@@ -475,41 +499,61 @@ preallocate_co_getlength(BlockDriverState *bs)
     return ret;
 }
 
-static int preallocate_check_perm(BlockDriverState *bs,
-                                  uint64_t perm, uint64_t shared, Error **errp)
+static int preallocate_drop_resize(BlockDriverState *bs, Error **errp)
 {
     BDRVPreallocateState *s = bs->opaque;
+    int ret;
 
-    if (s->data_end >= 0 && !can_write_resize(perm)) {
-        /*
-         * Lose permissions.
-         * We should truncate in check_perm, as in set_perm bs->file->perm will
-         * be already changed, and we should not violate it.
-         */
-        return preallocate_truncate_to_real_size(bs, errp);
+    if (s->data_end < 0) {
+        return 0;
+    }
+
+    /*
+     * Before switching children to be read-only, truncate them to remove
+     * the preallocation and let them have the real size.
+     */
+    ret = preallocate_truncate_to_real_size(bs, errp);
+    if (ret < 0) {
+        return ret;
     }
 
+    /*
+     * We'll drop our permissions and will allow other users to take write and
+     * resize permissions (see preallocate_child_perm). Anyone will be able to
+     * change the child, so mark all states invalid. We'll regain control if a
+     * parent requests write access again.
+     */
+    s->data_end = s->file_end = s->zero_start = -EINVAL;
+
+    bdrv_graph_rdlock_main_loop();
+    bdrv_child_refresh_perms(bs, bs->file, NULL);
+    bdrv_graph_rdunlock_main_loop();
+
     return 0;
 }
 
+static void preallocate_drop_resize_bh(void *opaque)
+{
+    /*
+     * In case of errors, we'll simply keep the exclusive lock on the image
+     * indefinitely.
+     */
+    preallocate_drop_resize(opaque, NULL);
+}
+
 static void preallocate_set_perm(BlockDriverState *bs,
                                  uint64_t perm, uint64_t shared)
 {
     BDRVPreallocateState *s = bs->opaque;
 
     if (can_write_resize(perm)) {
+        qemu_bh_cancel(s->drop_resize_bh);
         if (s->data_end < 0) {
             s->data_end = s->file_end = s->zero_start =
-                bdrv_getlength(bs->file->bs);
+                bs->file->bs->total_sectors * BDRV_SECTOR_SIZE;
         }
     } else {
-        /*
-         * We drop our permissions, as well as allow shared
-         * permissions (see preallocate_child_perm), anyone will be able to
-         * change the child, so mark all states invalid. We'll regain control if
-         * get good permissions back.
-         */
-        s->data_end = s->file_end = s->zero_start = -EINVAL;
+        qemu_bh_schedule(s->drop_resize_bh);
     }
 }
 
@@ -517,10 +561,16 @@ static void preallocate_child_perm(BlockDriverState *bs, BdrvChild *c,
     BdrvChildRole role, BlockReopenQueue *reopen_queue,
     uint64_t perm, uint64_t shared, uint64_t *nperm, uint64_t *nshared)
 {
+    BDRVPreallocateState *s = bs->opaque;
+
     bdrv_default_perms(bs, c, role, reopen_queue, perm, shared, nperm, nshared);
 
-    if (can_write_resize(perm)) {
-        /* This should come by default, but let's enforce: */
+    /*
+     * We need exclusive write and resize permissions on the child not only when
+     * the parent can write to it, but also after the parent gave up write
+     * permissions until preallocate_drop_resize() has completed.
+     */
+    if (can_write_resize(perm) || s->data_end != -EINVAL) {
         *nperm |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
 
         /*
@@ -550,7 +600,6 @@ static BlockDriver bdrv_preallocate_filter = {
     .bdrv_co_flush = preallocate_co_flush,
     .bdrv_co_truncate = preallocate_co_truncate,
 
-    .bdrv_check_perm = preallocate_check_perm,
     .bdrv_set_perm = preallocate_set_perm,
     .bdrv_child_perm = preallocate_child_perm,
 
-- 
2.41.0



  parent reply	other threads:[~2023-09-11  9:47 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-09-11  9:45 [PATCH v2 00/21] Graph locking part 4 (node management) Kevin Wolf
2023-09-11  9:46 ` [PATCH v2 01/21] block: Remove unused BlockReopenQueueEntry.perms_checked Kevin Wolf
2023-09-11  9:46 ` [PATCH v2 02/21] preallocate: Factor out preallocate_truncate_to_real_size() Kevin Wolf
2023-09-11  9:46 ` Kevin Wolf [this message]
2023-10-05 19:55   ` [PATCH v2 03/21] preallocate: Don't poll during permission updates Vladimir Sementsov-Ogievskiy
2023-10-06  8:56     ` Kevin Wolf
2023-10-06 18:10       ` Vladimir Sementsov-Ogievskiy
2023-10-09  9:28         ` Denis V. Lunev
2023-09-11  9:46 ` [PATCH v2 04/21] block: Take AioContext lock for bdrv_append() more consistently Kevin Wolf
2023-09-11  9:46 ` [PATCH v2 05/21] block: Introduce bdrv_schedule_unref() Kevin Wolf
2023-09-12 16:49   ` Stefan Hajnoczi
2023-09-11  9:46 ` [PATCH v2 06/21] block-coroutine-wrapper: Add no_co_wrapper_bdrv_wrlock functions Kevin Wolf
2023-09-11  9:46 ` [PATCH v2 07/21] block-coroutine-wrapper: Allow arbitrary parameter names Kevin Wolf
2023-09-11  9:46 ` [PATCH v2 08/21] block: Mark bdrv_replace_child_noperm() GRAPH_WRLOCK Kevin Wolf
2023-09-11  9:46 ` [PATCH v2 09/21] block: Mark bdrv_replace_child_tran() GRAPH_WRLOCK Kevin Wolf
2023-09-11  9:46 ` [PATCH v2 10/21] block: Mark bdrv_attach_child_common() GRAPH_WRLOCK Kevin Wolf
2023-09-11  9:46 ` [PATCH v2 11/21] block: Call transaction callbacks with lock held Kevin Wolf
2023-09-11  9:46 ` [PATCH v2 12/21] block: Mark bdrv_attach_child() GRAPH_WRLOCK Kevin Wolf
2023-09-11  9:46 ` [PATCH v2 13/21] block: Mark bdrv_parent_perms_conflict() and callers GRAPH_RDLOCK Kevin Wolf
2023-09-11  9:46 ` [PATCH v2 14/21] block: Mark bdrv_get_cumulative_perm() " Kevin Wolf
2023-09-11  9:46 ` [PATCH v2 15/21] block: Mark bdrv_child_perm() GRAPH_RDLOCK Kevin Wolf
2023-09-11  9:46 ` [PATCH v2 16/21] block: Mark bdrv_parent_cb_change_media() GRAPH_RDLOCK Kevin Wolf
2023-09-11  9:46 ` [PATCH v2 17/21] block: Take graph rdlock in bdrv_drop_intermediate() Kevin Wolf
2023-09-12 16:49   ` Stefan Hajnoczi
2023-09-11  9:46 ` [PATCH v2 18/21] block: Take graph rdlock in bdrv_change_aio_context() Kevin Wolf
2023-09-12 16:49   ` Stefan Hajnoczi
2023-09-11  9:46 ` [PATCH v2 19/21] block: Mark bdrv_root_unref_child() GRAPH_WRLOCK Kevin Wolf
2023-09-11  9:46 ` [PATCH v2 20/21] block: Mark bdrv_unref_child() GRAPH_WRLOCK Kevin Wolf
2023-09-11  9:46 ` [PATCH v2 21/21] block: Mark bdrv_add/del_child() and caller GRAPH_WRLOCK Kevin Wolf
2023-09-12 16:49 ` [PATCH v2 00/21] Graph locking part 4 (node management) Stefan Hajnoczi
2023-09-14 13:12   ` Kevin Wolf

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230911094620.45040-4-kwolf@redhat.com \
    --to=kwolf@redhat.com \
    --cc=eblake@redhat.com \
    --cc=eesposit@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=stefanha@redhat.com \
    --cc=vsementsov@yandex-team.ru \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.