[Qemu-devel] [PATCH 03/10] vdi: make it thread-safe

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

From: Paolo Bonzini <pbonzini@redhat.com>
To: qemu-devel@nongnu.org
Cc: stefanha@redhat.com, qemu-block@nongnu.org, kwolf@redhat.com
Subject: [Qemu-devel] [PATCH 03/10] vdi: make it thread-safe
Date: Tue, 27 Jun 2017 17:22:41 +0200	[thread overview]
Message-ID: <20170627152248.10446-4-pbonzini@redhat.com> (raw)
In-Reply-To: <20170627152248.10446-1-pbonzini@redhat.com>

The VirtualBox driver is using a mutex to order all allocating writes,
but it is not protecting accesses to the bitmap because they implicitly
happen under the AioContext mutex.  Change this to use a CoRwlock
explicitly.

Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 block/vdi.c | 48 ++++++++++++++++++++++++------------------------
 1 file changed, 24 insertions(+), 24 deletions(-)

diff --git a/block/vdi.c b/block/vdi.c
index 79af47763b..080e1562e3 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -172,7 +172,7 @@ typedef struct {
     /* VDI header (converted to host endianness). */
     VdiHeader header;
 
-    CoMutex write_lock;
+    CoRwlock bmap_lock;
 
     Error *migration_blocker;
 } BDRVVdiState;
@@ -485,7 +485,7 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
         goto fail_free_bmap;
     }
 
-    qemu_co_mutex_init(&s->write_lock);
+    qemu_co_rwlock_init(&s->bmap_lock);
 
     return 0;
 
@@ -557,7 +557,9 @@ vdi_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
                n_bytes, offset);
 
         /* prepare next AIO request */
+        qemu_co_rwlock_rdlock(&s->bmap_lock);
         bmap_entry = le32_to_cpu(s->bmap[block_index]);
+        qemu_co_rwlock_unlock(&s->bmap_lock);
         if (!VDI_IS_ALLOCATED(bmap_entry)) {
             /* Block not allocated, return zeros, no need to wait. */
             qemu_iovec_memset(qiov, bytes_done, 0, n_bytes);
@@ -595,6 +597,7 @@ vdi_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
     uint32_t block_index;
     uint32_t offset_in_block;
     uint32_t n_bytes;
+    uint64_t data_offset;
     uint32_t bmap_first = VDI_UNALLOCATED;
     uint32_t bmap_last = VDI_UNALLOCATED;
     uint8_t *block = NULL;
@@ -614,10 +617,19 @@ vdi_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
                n_bytes, offset);
 
         /* prepare next AIO request */
+        qemu_co_rwlock_rdlock(&s->bmap_lock);
         bmap_entry = le32_to_cpu(s->bmap[block_index]);
         if (!VDI_IS_ALLOCATED(bmap_entry)) {
             /* Allocate new block and write to it. */
             uint64_t data_offset;
+            qemu_co_rwlock_upgrade(&s->bmap_lock);
+            bmap_entry = le32_to_cpu(s->bmap[block_index]);
+            if (VDI_IS_ALLOCATED(bmap_entry)) {
+                /* A concurrent allocation did the work for us.  */
+                qemu_co_rwlock_downgrade(&s->bmap_lock);
+                goto nonallocating_write;
+            }
+
             bmap_entry = s->header.blocks_allocated;
             s->bmap[block_index] = cpu_to_le32(bmap_entry);
             s->header.blocks_allocated++;
@@ -635,30 +647,18 @@ vdi_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
             memset(block + offset_in_block + n_bytes, 0,
                    s->block_size - n_bytes - offset_in_block);
 
-            /* Note that this coroutine does not yield anywhere from reading the
-             * bmap entry until here, so in regards to all the coroutines trying
-             * to write to this cluster, the one doing the allocation will
-             * always be the first to try to acquire the lock.
-             * Therefore, it is also the first that will actually be able to
-             * acquire the lock and thus the padded cluster is written before
-             * the other coroutines can write to the affected area. */
-            qemu_co_mutex_lock(&s->write_lock);
+            /* Write the new block under CoRwLock write-side protection,
+             * so this full-cluster write does not overlap a partial write
+             * of the same cluster, issued from the "else" branch.
+             */
             ret = bdrv_pwrite(bs->file, data_offset, block, s->block_size);
-            qemu_co_mutex_unlock(&s->write_lock);
+            qemu_co_rwlock_unlock(&s->bmap_lock);
         } else {
-            uint64_t data_offset = s->header.offset_data +
-                                   (uint64_t)bmap_entry * s->block_size +
-                                   offset_in_block;
-            qemu_co_mutex_lock(&s->write_lock);
-            /* This lock is only used to make sure the following write operation
-             * is executed after the write issued by the coroutine allocating
-             * this cluster, therefore we do not need to keep it locked.
-             * As stated above, the allocating coroutine will always try to lock
-             * the mutex before all the other concurrent accesses to that
-             * cluster, therefore at this point we can be absolutely certain
-             * that that write operation has returned (there may be other writes
-             * in flight, but they do not concern this very operation). */
-            qemu_co_mutex_unlock(&s->write_lock);
+nonallocating_write:
+            data_offset = s->header.offset_data +
+                           (uint64_t)bmap_entry * s->block_size +
+                           offset_in_block;
+            qemu_co_rwlock_unlock(&s->bmap_lock);
 
             qemu_iovec_reset(&local_qiov);
             qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
-- 
2.13.0

next prev parent reply	other threads:[~2017-06-27 16:55 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-06-27 15:22 [Qemu-devel] [PATCH 00/10] Block layer thread-safety, part 2 Paolo Bonzini
2017-06-27 15:22 ` [Qemu-devel] [PATCH 01/10] qcow2: call CoQueue APIs under CoMutex Paolo Bonzini
2017-06-27 15:22 ` [Qemu-devel] [PATCH 02/10] coroutine-lock: add qemu_co_rwlock_downgrade and qemu_co_rwlock_upgrade Paolo Bonzini
2017-06-27 15:22 ` Paolo Bonzini [this message]
2017-06-27 15:22 ` [Qemu-devel] [PATCH 04/10] vpc: make it thread-safe Paolo Bonzini
2017-06-27 15:22 ` [Qemu-devel] [PATCH 05/10] vvfat: " Paolo Bonzini
2017-06-27 15:22 ` [Qemu-devel] [PATCH 06/10] qed: move tail of qed_aio_write_main to qed_aio_write_{cow, alloc} Paolo Bonzini
2017-06-27 15:22 ` [Qemu-devel] [PATCH 07/10] block: invoke .bdrv_drain callback in coroutine context and from AioContext Paolo Bonzini
2017-06-27 15:22 ` [Qemu-devel] [PATCH 08/10] qed: protect table cache with CoMutex Paolo Bonzini
2017-06-27 15:22 ` [Qemu-devel] [PATCH 09/10] sheepdog: add queue_lock Paolo Bonzini
2017-06-27 15:22 ` [Qemu-devel] [PATCH 10/10] ssh: support I/O from any AioContext Paolo Bonzini
2017-06-27 18:21 ` [Qemu-devel] [PATCH 00/10] Block layer thread-safety, part 2 Paolo Bonzini
2017-06-28  6:08 ` no-reply

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:79af47763 dfblob:080e1562e )
 OR (
bs:"[Qemu-devel] [PATCH 03/10] vdi: make it thread-safe" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170627152248.10446-4-pbonzini@redhat.com \
    --to=pbonzini@redhat.com \
    --cc=kwolf@redhat.com \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=stefanha@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).