qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: "Denis V. Lunev" <den@openvz.org>
Cc: Kevin Wolf <kwolf@redhat.com>, "Denis V. Lunev" <den@openvz.org>,
	qemu-devel@nongnu.org, Stefan Hajnoczi <stefanha@redhat.com>,
	qemu-stable@nongnu.org
Subject: [Qemu-devel] [PATCH 06/10] io: guard aio_poll with aio_context_acquire
Date: Tue,  3 Nov 2015 17:12:09 +0300	[thread overview]
Message-ID: <1446559933-28965-7-git-send-email-den@openvz.org> (raw)
In-Reply-To: <1446559933-28965-1-git-send-email-den@openvz.org>

There is no problem if this is called from iothread, when AioContext is
properly acquired. Unfortunately, this code is called from HMP thread
and this leads to a disaster.

        HMP thread                     IO thread (in aio_poll)
            |                                    |
    qemu_coroutine_enter                         |
    while (rwco.ret == NOT_DONE)                 |
        aio_poll                                 |
            aio_context_acquire                  |
            |                         ret from qemu_poll_ns
            |                         aio_context_acquire (nested = 2)
            |                         process bdrv_rw_co_entry, set rwco.ret
            |                       aio_context_release (nested = )
            |                       reenters aio_poll, clear events
            |                       aio_context_release
            aio_context_release
            qemu_poll_ns

In this case HMP thread will be never waked up. Alas.

This means that all such patterns MUST be guarded with aio_context_is_owner
checks, but this is terrible as if we'll find all such places we can fix
them with ease.

Another approach would be to take the lock at the very top (at the beginning
of the operation) but this is much more difficult and leads to spreading
of aio_context_acquire to a lot of unrelated pieces.

Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
---
 block.c           |  5 ++++-
 block/curl.c      |  3 +++
 block/io.c        | 11 +++++++++++
 block/iscsi.c     |  2 ++
 block/nfs.c       |  5 +++++
 block/qed-table.c | 20 ++++++++++++++++----
 block/sheepdog.c  |  2 ++
 blockjob.c        |  6 ++++++
 qemu-io-cmds.c    |  6 +++++-
 9 files changed, 54 insertions(+), 6 deletions(-)

diff --git a/block.c b/block.c
index 98b0b66..cf858a7 100644
--- a/block.c
+++ b/block.c
@@ -359,11 +359,14 @@ int bdrv_create(BlockDriver *drv, const char* filename,
         /* Fast-path if already in coroutine context */
         bdrv_create_co_entry(&cco);
     } else {
+        AioContext *ctx = qemu_get_aio_context();
         co = qemu_coroutine_create(bdrv_create_co_entry);
+        aio_context_acquire(ctx);
         qemu_coroutine_enter(co, &cco);
         while (cco.ret == NOT_DONE) {
-            aio_poll(qemu_get_aio_context(), true);
+            aio_poll(ctx, true);
         }
+        aio_context_release(ctx);
     }
 
     ret = cco.ret;
diff --git a/block/curl.c b/block/curl.c
index 8994182..33c024d 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -378,6 +378,7 @@ static CURLState *curl_init_state(BlockDriverState *bs, BDRVCURLState *s)
 {
     CURLState *state = NULL;
     int i, j;
+    AioContext *ctx = bdrv_get_aio_context(bs);
 
     do {
         for (i=0; i<CURL_NUM_STATES; i++) {
@@ -392,7 +393,9 @@ static CURLState *curl_init_state(BlockDriverState *bs, BDRVCURLState *s)
             break;
         }
         if (!state) {
+            aio_context_acquire(ctx);
             aio_poll(bdrv_get_aio_context(bs), true);
+            aio_context_release(ctx);
         }
     } while(!state);
 
diff --git a/block/io.c b/block/io.c
index 8dcad3b..05aa32e 100644
--- a/block/io.c
+++ b/block/io.c
@@ -560,11 +560,13 @@ static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset,
     } else {
         AioContext *aio_context = bdrv_get_aio_context(bs);
 
+        aio_context_acquire(aio_context);
         co = qemu_coroutine_create(bdrv_rw_co_entry);
         qemu_coroutine_enter(co, &rwco);
         while (rwco.ret == NOT_DONE) {
             aio_poll(aio_context, true);
         }
+        aio_context_release(aio_context);
     }
     return rwco.ret;
 }
@@ -1606,12 +1608,15 @@ int64_t bdrv_get_block_status_above(BlockDriverState *bs,
         bdrv_get_block_status_above_co_entry(&data);
     } else {
         AioContext *aio_context = bdrv_get_aio_context(bs);
+        aio_context_acquire(aio_context);
 
         co = qemu_coroutine_create(bdrv_get_block_status_above_co_entry);
         qemu_coroutine_enter(co, &data);
+
         while (!data.done) {
             aio_poll(aio_context, true);
         }
+        aio_context_release(aio_context);
     }
     return data.ret;
 }
@@ -2391,12 +2396,15 @@ int bdrv_flush(BlockDriverState *bs)
         bdrv_flush_co_entry(&rwco);
     } else {
         AioContext *aio_context = bdrv_get_aio_context(bs);
+        aio_context_acquire(aio_context);
 
         co = qemu_coroutine_create(bdrv_flush_co_entry);
         qemu_coroutine_enter(co, &rwco);
+
         while (rwco.ret == NOT_DONE) {
             aio_poll(aio_context, true);
         }
+        aio_context_release(aio_context);
     }
 
     return rwco.ret;
@@ -2504,12 +2512,15 @@ int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
         bdrv_discard_co_entry(&rwco);
     } else {
         AioContext *aio_context = bdrv_get_aio_context(bs);
+        aio_context_acquire(aio_context);
 
         co = qemu_coroutine_create(bdrv_discard_co_entry);
         qemu_coroutine_enter(co, &rwco);
+
         while (rwco.ret == NOT_DONE) {
             aio_poll(aio_context, true);
         }
+        aio_context_release(aio_context);
     }
 
     return rwco.ret;
diff --git a/block/iscsi.c b/block/iscsi.c
index 9a628b7..1d6200d 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -829,11 +829,13 @@ static int iscsi_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
         break;
     case SG_IO:
         status = -EINPROGRESS;
+        aio_context_acquire(iscsilun->aio_context);
         iscsi_aio_ioctl(bs, req, buf, ioctl_cb, &status);
 
         while (status == -EINPROGRESS) {
             aio_poll(iscsilun->aio_context, true);
         }
+        aio_context_release(iscsilun->aio_context);
 
         return 0;
     default:
diff --git a/block/nfs.c b/block/nfs.c
index fd79f89..36ec1e1 100644
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -462,6 +462,7 @@ static int64_t nfs_get_allocated_file_size(BlockDriverState *bs)
     NFSClient *client = bs->opaque;
     NFSRPC task = {0};
     struct stat st;
+    AioContext *ctx;
 
     if (bdrv_is_read_only(bs) &&
         !(bs->open_flags & BDRV_O_NOCACHE)) {
@@ -469,8 +470,11 @@ static int64_t nfs_get_allocated_file_size(BlockDriverState *bs)
     }
 
     task.st = &st;
+    ctx = bdrv_get_aio_context(bs);
+    aio_context_acquire(ctx);
     if (nfs_fstat_async(client->context, client->fh, nfs_co_generic_cb,
                         &task) != 0) {
+        aio_context_release(ctx);
         return -ENOMEM;
     }
 
@@ -478,6 +482,7 @@ static int64_t nfs_get_allocated_file_size(BlockDriverState *bs)
         nfs_set_events(client);
         aio_poll(client->aio_context, true);
     }
+    aio_context_release(ctx);
 
     return (task.ret < 0 ? task.ret : st.st_blocks * 512);
 }
diff --git a/block/qed-table.c b/block/qed-table.c
index f4219b8..fa13aba 100644
--- a/block/qed-table.c
+++ b/block/qed-table.c
@@ -169,12 +169,15 @@ static void qed_sync_cb(void *opaque, int ret)
 int qed_read_l1_table_sync(BDRVQEDState *s)
 {
     int ret = -EINPROGRESS;
+    AioContext *ctx = bdrv_get_aio_context(s->bs);
 
+    aio_context_acquire(ctx);
     qed_read_table(s, s->header.l1_table_offset,
                    s->l1_table, qed_sync_cb, &ret);
     while (ret == -EINPROGRESS) {
-        aio_poll(bdrv_get_aio_context(s->bs), true);
+        aio_poll(ctx, true);
     }
+    aio_context_release(ctx);
 
     return ret;
 }
@@ -191,11 +194,14 @@ int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index,
                             unsigned int n)
 {
     int ret = -EINPROGRESS;
+    AioContext *ctx = bdrv_get_aio_context(s->bs);
 
+    aio_context_acquire(ctx);
     qed_write_l1_table(s, index, n, qed_sync_cb, &ret);
     while (ret == -EINPROGRESS) {
-        aio_poll(bdrv_get_aio_context(s->bs), true);
+        aio_poll(ctx, true);
     }
+    aio_context_release(ctx);
 
     return ret;
 }
@@ -264,11 +270,14 @@ void qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset,
 int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request, uint64_t offset)
 {
     int ret = -EINPROGRESS;
+    AioContext *ctx = bdrv_get_aio_context(s->bs);
 
+    aio_context_acquire(ctx);
     qed_read_l2_table(s, request, offset, qed_sync_cb, &ret);
     while (ret == -EINPROGRESS) {
-        aio_poll(bdrv_get_aio_context(s->bs), true);
+        aio_poll(ctx, true);
     }
+    aio_context_release(ctx);
 
     return ret;
 }
@@ -286,11 +295,14 @@ int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
                             unsigned int index, unsigned int n, bool flush)
 {
     int ret = -EINPROGRESS;
+    AioContext *ctx = bdrv_get_aio_context(s->bs);
 
+    aio_context_acquire(ctx);
     qed_write_l2_table(s, request, index, n, flush, qed_sync_cb, &ret);
     while (ret == -EINPROGRESS) {
-        aio_poll(bdrv_get_aio_context(s->bs), true);
+        aio_poll(ctx, true);
     }
+    aio_context_release(ctx);
 
     return ret;
 }
diff --git a/block/sheepdog.c b/block/sheepdog.c
index d80e4ed..038a385 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -715,11 +715,13 @@ static int do_req(int sockfd, AioContext *aio_context, SheepdogReq *hdr,
     if (qemu_in_coroutine()) {
         do_co_req(&srco);
     } else {
+        aio_context_acquire(aio_context);
         co = qemu_coroutine_create(do_co_req);
         qemu_coroutine_enter(co, &srco);
         while (!srco.finished) {
             aio_poll(aio_context, true);
         }
+        aio_context_release(aio_context);
     }
 
     return srco.ret;
diff --git a/blockjob.c b/blockjob.c
index c02fe59..9ddb958 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -194,6 +194,7 @@ static int block_job_finish_sync(BlockJob *job,
     struct BlockFinishData data;
     BlockDriverState *bs = job->bs;
     Error *local_err = NULL;
+    AioContext *ctx;
 
     assert(bs->job == job);
 
@@ -206,14 +207,19 @@ static int block_job_finish_sync(BlockJob *job,
     data.ret = -EINPROGRESS;
     job->cb = block_job_finish_cb;
     job->opaque = &data;
+
+    ctx = bdrv_get_aio_context(bs);
+    aio_context_acquire(ctx);
     finish(job, &local_err);
     if (local_err) {
+        aio_context_release(ctx);
         error_propagate(errp, local_err);
         return -EBUSY;
     }
     while (data.ret == -EINPROGRESS) {
         aio_poll(bdrv_get_aio_context(bs), true);
     }
+    aio_context_release(ctx);
     return (data.cancelled && data.ret == 0) ? -ECANCELED : data.ret;
 }
 
diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
index 6e5d1e4..45299cd 100644
--- a/qemu-io-cmds.c
+++ b/qemu-io-cmds.c
@@ -474,12 +474,16 @@ static int do_co_write_zeroes(BlockBackend *blk, int64_t offset, int count,
         .total  = total,
         .done   = false,
     };
+    AioContext *ctx = blk_get_aio_context(blk);
+    aio_context_acquire(ctx);
 
     co = qemu_coroutine_create(co_write_zeroes_entry);
     qemu_coroutine_enter(co, &data);
     while (!data.done) {
-        aio_poll(blk_get_aio_context(blk), true);
+        aio_poll(ctx, true);
     }
+    aio_context_release(ctx);
+
     if (data.ret < 0) {
         return data.ret;
     } else {
-- 
2.5.0

  parent reply	other threads:[~2015-11-03 14:12 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-11-03 14:12 [Qemu-devel] [PATCH QEMU 2.5 v4 0/10] dataplane snapshot fixes + aio_poll fixes Denis V. Lunev
2015-11-03 14:12 ` [Qemu-devel] [PATCH 01/10] migration: add missed aio_context_acquire for state writing/reading Denis V. Lunev
2015-11-03 14:12 ` [Qemu-devel] [PATCH 02/10] block: add missed aio_context_acquire around bdrv_set_aio_context Denis V. Lunev
2015-11-03 14:12 ` [Qemu-devel] [PATCH 03/10] migration: added missed aio_context_acquire around bdrv_snapshot_delete Denis V. Lunev
2015-11-03 14:51   ` Juan Quintela
2015-11-04  7:32     ` [Qemu-devel] [RFC PATCH 1/1] dataplane: alternative approach to locking Denis V. Lunev
2015-11-04  9:49       ` Juan Quintela
2015-11-04 11:12         ` Denis V. Lunev
2015-11-04 12:03           ` Juan Quintela
2015-11-04 12:07             ` Denis V. Lunev
2015-11-04 11:31         ` [Qemu-devel] [PATCH RFC 1/2] snapshot: create helper to test that block drivers supports snapshots Denis V. Lunev
2015-11-04 11:31           ` [Qemu-devel] [PATCH RFC 2/2] snapshot: create bdrv_snapshot_all_del_snapshot helper Denis V. Lunev
2015-11-04 12:10             ` Juan Quintela
2015-11-04 12:07           ` [Qemu-devel] [PATCH RFC 1/2] snapshot: create helper to test that block drivers supports snapshots Juan Quintela
2015-11-04 13:50             ` Stefan Hajnoczi
2015-11-04 13:52           ` Stefan Hajnoczi
2015-11-03 14:12 ` [Qemu-devel] [PATCH 04/10] blockdev: acquire AioContext in hmp_commit() Denis V. Lunev
2015-11-03 14:38   ` Denis V. Lunev
2015-11-03 14:12 ` [Qemu-devel] [PATCH 05/10] block: guard bdrv_drain in bdrv_close with aio_context_acquire Denis V. Lunev
2015-11-03 14:12 ` Denis V. Lunev [this message]
2015-11-03 14:12 ` [Qemu-devel] [PATCH 07/10] block: call aio_context_acquire in qemu_img/nbd/io Denis V. Lunev
2015-11-03 14:12 ` [Qemu-devel] [PATCH 08/10] fifolock: create rfifolock_is_owner helper Denis V. Lunev
2015-11-03 14:12 ` [Qemu-devel] [PATCH 09/10] aio_context: create aio_context_is_owner helper Denis V. Lunev
2015-11-03 14:12 ` [Qemu-devel] [PATCH 10/10] aio: change aio_poll constraints Denis V. Lunev

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1446559933-28965-7-git-send-email-den@openvz.org \
    --to=den@openvz.org \
    --cc=kwolf@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-stable@nongnu.org \
    --cc=stefanha@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).