qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Hanna Czenczek <hreitz@redhat.com>
To: Kevin Wolf <kwolf@redhat.com>
Cc: qemu-block@nongnu.org, qemu-devel@nongnu.org,
	"Stefan Hajnoczi" <stefanha@redhat.com>,
	"Paolo Bonzini" <pbonzini@redhat.com>,
	"Richard W . M . Jones" <rjones@redhat.com>,
	"Ilya Dryomov" <idryomov@gmail.com>,
	"Peter Lieven" <pl@dlhnet.de>,
	"Philippe Mathieu-Daudé" <philmd@linaro.org>,
	"Alex Bennée" <alex.bennee@linaro.org>,
	"Fam Zheng" <fam@euphon.net>,
	"Ronnie Sahlberg" <ronniesahlberg@gmail.com>
Subject: Re: [PATCH 05/16] curl: Fix coroutine waking
Date: Fri, 31 Oct 2025 10:15:23 +0100	[thread overview]
Message-ID: <102c3064-6615-4958-b3dc-00fd2ba99b10@redhat.com> (raw)
In-Reply-To: <aQJHebvyff9rKAsz@redhat.com>

On 29.10.25 17:57, Kevin Wolf wrote:
> Am 28.10.2025 um 17:33 hat Hanna Czenczek geschrieben:
>> If we wake a coroutine from a different context, we must ensure that it
>> will yield exactly once (now or later), awaiting that wake.
>>
>> curl’s current .ret == -EINPROGRESS loop may lead to the coroutine not
>> yielding if the request finishes before the loop gets run.  To fix it,
>> drop the loop and just yield exactly once, unless the request is served
>> from the cache or failed before it was submitted – that last part makes
>> it a bit complicated, as the result of curl_find_buf() now needs to be a
>> tristate.
>>
>> (Can be reproduced with multiqueue by adding a usleep(100000) before the
>> `while (acb.ret == -EINPROGRESS)` loop.)
>>
>> Also, add a comment why aio_co_wake() is safe regardless of whether the
>> coroutine and curl_multi_check_completion() run in the same context.
>>
>> Cc: qemu-stable@nongnu.org
>> Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
>> ---
>>   block/curl.c | 55 +++++++++++++++++++++++++++++++++++-----------------
>>   1 file changed, 37 insertions(+), 18 deletions(-)
>>
>> diff --git a/block/curl.c b/block/curl.c
>> index 68cf83ce55..65996a8866 100644
>> --- a/block/curl.c
>> +++ b/block/curl.c
>> @@ -124,6 +124,16 @@ typedef struct BDRVCURLState {
>>       char *proxypassword;
>>   } BDRVCURLState;
>>   
>> +/** Possible result states of curl_find_buf() */
>> +typedef enum {
>> +    /* No buffer found, need to create new request */
>> +    CURL_NO_BUF_FOUND,
>> +    /* Buffer found, request filled and done */
>> +    CURL_REQUEST_FILLED,
>> +    /* Ongoing request found, need to yield */
>> +    CURL_REQUEST_ONGOING,
>> +} CURLFindBufResult;
>> +
>>   static void curl_clean_state(CURLState *s);
>>   static void curl_multi_do(void *arg);
>>   
>> @@ -258,8 +268,8 @@ read_end:
>>   }
>>   
>>   /* Called with s->mutex held.  */
>> -static bool curl_find_buf(BDRVCURLState *s, uint64_t start, uint64_t len,
>> -                          CURLAIOCB *acb)
>> +static CURLFindBufResult curl_find_buf(BDRVCURLState *s, uint64_t start,
>> +                                       uint64_t len, CURLAIOCB *acb)
>>   {
>>       int i;
>>       uint64_t end = start + len;
>> @@ -289,7 +299,7 @@ static bool curl_find_buf(BDRVCURLState *s, uint64_t start, uint64_t len,
>>                   qemu_iovec_memset(acb->qiov, clamped_len, 0, len - clamped_len);
>>               }
>>               acb->ret = 0;
>> -            return true;
>> +            return CURL_REQUEST_FILLED;
>>           }
>>   
>>           // Wait for unfinished chunks
>> @@ -307,13 +317,13 @@ static bool curl_find_buf(BDRVCURLState *s, uint64_t start, uint64_t len,
>>               for (j=0; j<CURL_NUM_ACB; j++) {
>>                   if (!state->acb[j]) {
>>                       state->acb[j] = acb;
>> -                    return true;
>> +                    return CURL_REQUEST_ONGOING;
>>                   }
>>               }
>>           }
>>       }
>>   
>> -    return false;
>> +    return CURL_NO_BUF_FOUND;
>>   }
>>   
>>   /* Called with s->mutex held.  */
>> @@ -378,6 +388,16 @@ static void curl_multi_check_completion(BDRVCURLState *s)
>>                   acb->ret = error ? -EIO : 0;
>>                   state->acb[i] = NULL;
>>                   qemu_mutex_unlock(&s->mutex);
>> +                /*
>> +                 * Current AioContext is the BDS context, which may or may not
>> +                 * be the request (coroutine) context.
>> +                 * - If it is, the coroutine must have yielded or the FD handler
>> +                 *   (curl_multi_do()/curl_multi_timeout_do()) could not have
>> +                 *   been called and we would not be here
>> +                 * - If it is not, it doesn't matter whether it has already
>> +                 *   yielded or not; it will be scheduled once it does yield
>> +                 * So aio_co_wake() is safe to call.
>> +                 */
>>                   aio_co_wake(acb->co);
>>                   qemu_mutex_lock(&s->mutex);
>>               }
>> @@ -868,7 +888,8 @@ out_noclean:
>>       return -EINVAL;
>>   }
>>   
>> -static void coroutine_fn curl_setup_preadv(BlockDriverState *bs, CURLAIOCB *acb)
>> +/* Return whether a request was submitted that requires yielding */
>> +static bool coroutine_fn curl_setup_preadv(BlockDriverState *bs, CURLAIOCB *acb)
>>   {
>>       CURLState *state;
>>       int running;
>> @@ -877,13 +898,15 @@ static void coroutine_fn curl_setup_preadv(BlockDriverState *bs, CURLAIOCB *acb)
>>   
>>       uint64_t start = acb->offset;
>>       uint64_t end;
>> +    CURLFindBufResult find_buf_res;
>>   
>> -    qemu_mutex_lock(&s->mutex);
>> +    QEMU_LOCK_GUARD(&s->mutex);
>>   
>>       // In case we have the requested data already (e.g. read-ahead),
>>       // we can just call the callback and be done.
>> -    if (curl_find_buf(s, start, acb->bytes, acb)) {
>> -        goto out;
>> +    find_buf_res = curl_find_buf(s, start, acb->bytes, acb);
>> +    if (find_buf_res != CURL_NO_BUF_FOUND) {
>> +        return find_buf_res == CURL_REQUEST_ONGOING;
>>       }
>>   
>>       // No cache found, so let's start a new request
>> @@ -898,7 +921,7 @@ static void coroutine_fn curl_setup_preadv(BlockDriverState *bs, CURLAIOCB *acb)
>>       if (curl_init_state(s, state) < 0) {
>>           curl_clean_state(state);
>>           acb->ret = -EIO;
>> -        goto out;
>> +        return false;
>>       }
>>   
>>       acb->start = 0;
>> @@ -913,7 +936,7 @@ static void coroutine_fn curl_setup_preadv(BlockDriverState *bs, CURLAIOCB *acb)
>>       if (state->buf_len && state->orig_buf == NULL) {
>>           curl_clean_state(state);
>>           acb->ret = -ENOMEM;
>> -        goto out;
>> +        return false;
>>       }
>>       state->acb[0] = acb;
>>   
>> @@ -925,14 +948,12 @@ static void coroutine_fn curl_setup_preadv(BlockDriverState *bs, CURLAIOCB *acb)
>>           acb->ret = -EIO;
>>   
>>           curl_clean_state(state);
>> -        goto out;
>> +        return false;
>>       }
>>   
>>       /* Tell curl it needs to kick things off */
>>       curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running);
>> -
>> -out:
>> -    qemu_mutex_unlock(&s->mutex);
>> +    return true;
>>   }
>>   
>>   static int coroutine_fn curl_co_preadv(BlockDriverState *bs,
>> @@ -941,14 +962,12 @@ static int coroutine_fn curl_co_preadv(BlockDriverState *bs,
>>   {
>>       CURLAIOCB acb = {
>>           .co = qemu_coroutine_self(),
>> -        .ret = -EINPROGRESS,
>>           .qiov = qiov,
>>           .offset = offset,
>>           .bytes = bytes
>>       };
> Let's leave -EINPROGRESS here even if no other code checks for this
> value any more. It can be helpful for debugging when you can distinguish
> "completed successfully" from "still running".

Does that mean you want me to keep the `complete` field on rbd and nfs, too?

>>   
>> -    curl_setup_preadv(bs, &acb);
>> -    while (acb.ret == -EINPROGRESS) {
>> +    if (curl_setup_preadv(bs, &acb)) {
>>           qemu_coroutine_yield();
>>       }
>>       return acb.ret;
> That whole pattern of returning true and false or even a new enum
> everywhere to tell if we are waiting for something felt strange to me.
> Took me a while, but I think now I know what I expected instead: Why
> don't these places just yield immediately instead of requiring the outer
> layer to understand what happened in the functions it called?

I was considering the same.  My result was, if they yielded immediately, 
we might as well fully inline curl_setup_preadv() into this function.  I 
didn’t want to do that at the time, but if you prefer, no problem.

Hanna



  reply	other threads:[~2025-10-31  9:17 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-28 16:33 [PATCH 00/16] block: Some multi-threading fixes Hanna Czenczek
2025-10-28 16:33 ` [PATCH 01/16] block: Note on aio_co_wake use if not yet yielding Hanna Czenczek
2025-10-28 16:33 ` [PATCH 02/16] rbd: Run co BH CB in the coroutine’s AioContext Hanna Czenczek
2025-10-28 16:33 ` [PATCH 03/16] iscsi: " Hanna Czenczek
2025-10-29 14:27   ` Kevin Wolf
2025-10-31  9:07     ` Hanna Czenczek
2025-10-31 13:27       ` Kevin Wolf
2025-10-28 16:33 ` [PATCH 04/16] nfs: " Hanna Czenczek
2025-10-28 16:33 ` [PATCH 05/16] curl: Fix coroutine waking Hanna Czenczek
2025-10-29 16:57   ` Kevin Wolf
2025-10-31  9:15     ` Hanna Czenczek [this message]
2025-10-31 13:17       ` Kevin Wolf
2025-10-28 16:33 ` [PATCH 06/16] gluster: Do not move coroutine into BDS context Hanna Czenczek
2025-10-29 17:10   ` Kevin Wolf
2025-10-31  9:16     ` Hanna Czenczek
2025-10-28 16:33 ` [PATCH 07/16] nvme: Kick and check completions in " Hanna Czenczek
2025-10-29 17:23   ` Kevin Wolf
2025-10-29 17:39     ` Kevin Wolf
2025-10-31  9:18       ` Hanna Czenczek
2025-10-31  9:19     ` Hanna Czenczek
2025-10-28 16:33 ` [PATCH 08/16] nvme: Fix coroutine waking Hanna Czenczek
2025-10-29 17:43   ` Kevin Wolf
2025-10-28 16:33 ` [PATCH 09/16] block/io: Take reqs_lock for tracked_requests Hanna Czenczek
2025-10-28 16:33 ` [PATCH 10/16] qcow2: Fix cache_clean_timer Hanna Czenczek
2025-10-29 20:23   ` Kevin Wolf
2025-10-31  9:29     ` Hanna Czenczek
2025-10-31 13:03       ` Kevin Wolf
2025-11-06 16:08         ` Hanna Czenczek
2025-10-28 16:33 ` [PATCH 11/16] ssh: Run restart_coroutine in current AioContext Hanna Czenczek
2025-10-28 16:33 ` [PATCH 12/16] blkreplay: Run BH in coroutine’s AioContext Hanna Czenczek
2025-10-28 16:33 ` [PATCH 13/16] block: Note in which AioContext AIO CBs are called Hanna Czenczek
2025-10-28 16:33 ` [PATCH 14/16] iscsi: Create AIO BH in original AioContext Hanna Czenczek
2025-10-28 16:33 ` [PATCH 15/16] null-aio: Run CB " Hanna Czenczek
2025-10-28 16:33 ` [PATCH 16/16] win32-aio: Run CB in original context Hanna Czenczek
2025-10-30 14:12   ` Kevin Wolf
2025-10-31  9:31     ` Hanna Czenczek

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=102c3064-6615-4958-b3dc-00fd2ba99b10@redhat.com \
    --to=hreitz@redhat.com \
    --cc=alex.bennee@linaro.org \
    --cc=fam@euphon.net \
    --cc=idryomov@gmail.com \
    --cc=kwolf@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=philmd@linaro.org \
    --cc=pl@dlhnet.de \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=rjones@redhat.com \
    --cc=ronniesahlberg@gmail.com \
    --cc=stefanha@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).