qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
To: Sergio Lopez <slp@redhat.com>
Cc: qemu-devel@nongnu.org, Max Reitz <mreitz@redhat.com>,
	qemu-block@nongnu.org, Nir Soffer <nsoffer@redhat.com>,
	Kevin Wolf <kwolf@redhat.com>, Eric Blake <eblake@redhat.com>
Subject: Re: [PATCH v2 2/2] nbd/server: Use drained block ops to quiesce the server
Date: Wed, 2 Jun 2021 15:48:51 +0300	[thread overview]
Message-ID: <f251a366-fd4b-d39c-704d-96b6edfa743a@virtuozzo.com> (raw)
In-Reply-To: <20210602124430.7v6jbeydyie4as26@mhamilton>

02.06.2021 15:44, Sergio Lopez wrote:
> On Wed, Jun 02, 2021 at 03:06:02PM +0300, Vladimir Sementsov-Ogievskiy wrote:
>> 02.06.2021 09:05, Sergio Lopez wrote:
>>> Before switching between AioContexts we need to make sure that we're
>>> fully quiesced ("nb_requests == 0" for every client) when entering the
>>> drained section.
>>>
>>> To do this, we set "quiescing = true" for every client on
>>> ".drained_begin" to prevent new coroutines from being created, and
>>> check if "nb_requests == 0" on ".drained_poll". Finally, once we're
>>> exiting the drained section, on ".drained_end" we set "quiescing =
>>> false" and call "nbd_client_receive_next_request()" to resume the
>>> processing of new requests.
>>>
>>> With these changes, "blk_aio_attach()" and "blk_aio_detach()" can be
>>> reverted to be as simple as they were before f148ae7d36.
>>>
>>> RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1960137
>>> Suggested-by: Kevin Wolf <kwolf@redhat.com>
>>> Signed-off-by: Sergio Lopez <slp@redhat.com>
>>> ---
>>>    nbd/server.c | 82 ++++++++++++++++++++++++++++++++++++++--------------
>>>    1 file changed, 61 insertions(+), 21 deletions(-)
>>>
>>> diff --git a/nbd/server.c b/nbd/server.c
>>> index 86a44a9b41..b60ebc3ab6 100644
>>> --- a/nbd/server.c
>>> +++ b/nbd/server.c
>>> @@ -1513,6 +1513,11 @@ static void nbd_request_put(NBDRequestData *req)
>>>        g_free(req);
>>>        client->nb_requests--;
>>> +
>>> +    if (client->quiescing && client->nb_requests == 0) {
>>> +        aio_wait_kick();
>>> +    }
>>> +
>>>        nbd_client_receive_next_request(client);
>>>        nbd_client_put(client);
>>> @@ -1530,49 +1535,68 @@ static void blk_aio_attached(AioContext *ctx, void *opaque)
>>>        QTAILQ_FOREACH(client, &exp->clients, next) {
>>>            qio_channel_attach_aio_context(client->ioc, ctx);
>>> +        assert(client->nb_requests == 0);
>>>            assert(client->recv_coroutine == NULL);
>>>            assert(client->send_coroutine == NULL);
>>> -
>>> -        if (client->quiescing) {
>>> -            client->quiescing = false;
>>> -            nbd_client_receive_next_request(client);
>>> -        }
>>>        }
>>>    }
>>> -static void nbd_aio_detach_bh(void *opaque)
>>> +static void blk_aio_detach(void *opaque)
>>>    {
>>>        NBDExport *exp = opaque;
>>>        NBDClient *client;
>>> +    trace_nbd_blk_aio_detach(exp->name, exp->common.ctx);
>>> +
>>>        QTAILQ_FOREACH(client, &exp->clients, next) {
>>>            qio_channel_detach_aio_context(client->ioc);
>>> +    }
>>> +
>>> +    exp->common.ctx = NULL;
>>> +}
>>> +
>>> +static void nbd_drained_begin(void *opaque)
>>> +{
>>> +    NBDExport *exp = opaque;
>>> +    NBDClient *client;
>>> +
>>> +    QTAILQ_FOREACH(client, &exp->clients, next) {
>>>            client->quiescing = true;
>>> +    }
>>> +}
>>> -        if (client->recv_coroutine) {
>>> -            if (client->read_yielding) {
>>> -                qemu_aio_coroutine_enter(exp->common.ctx,
>>> -                                         client->recv_coroutine);
>>> -            } else {
>>> -                AIO_WAIT_WHILE(exp->common.ctx, client->recv_coroutine != NULL);
>>> -            }
>>> -        }
>>> +static void nbd_drained_end(void *opaque)
>>> +{
>>> +    NBDExport *exp = opaque;
>>> +    NBDClient *client;
>>> -        if (client->send_coroutine) {
>>> -            AIO_WAIT_WHILE(exp->common.ctx, client->send_coroutine != NULL);
>>> -        }
>>> +    QTAILQ_FOREACH(client, &exp->clients, next) {
>>> +        client->quiescing = false;
>>> +        nbd_client_receive_next_request(client);
>>>        }
>>>    }
>>> -static void blk_aio_detach(void *opaque)
>>> +static bool nbd_drained_poll(void *opaque)
>>>    {
>>>        NBDExport *exp = opaque;
>>> +    NBDClient *client;
>>> -    trace_nbd_blk_aio_detach(exp->name, exp->common.ctx);
>>> +    QTAILQ_FOREACH(client, &exp->clients, next) {
>>> +        if (client->nb_requests != 0) {
>>> +            /*
>>> +             * If there's a coroutine waiting for a request on nbd_read_eof()
>>> +             * enter it here so we don't depend on the client to wake it up.
>>> +             */
>>> +            if (client->recv_coroutine != NULL && client->read_yielding) {
>>> +                qemu_aio_coroutine_enter(exp->common.ctx,
>>> +                                         client->recv_coroutine);
>>> +            }
>>> -    aio_wait_bh_oneshot(exp->common.ctx, nbd_aio_detach_bh, exp);
>>> +            return true;
>>> +        }
>>> +    }
>>> -    exp->common.ctx = NULL;
>>> +    return false;
>>>    }
>>>    static void nbd_eject_notifier(Notifier *n, void *data)
>>> @@ -1594,6 +1618,12 @@ void nbd_export_set_on_eject_blk(BlockExport *exp, BlockBackend *blk)
>>>        blk_add_remove_bs_notifier(blk, &nbd_exp->eject_notifier);
>>>    }
>>> +static const BlockDevOps nbd_block_ops = {
>>> +    .drained_begin = nbd_drained_begin,
>>> +    .drained_end = nbd_drained_end,
>>> +    .drained_poll = nbd_drained_poll,
>>> +};
>>> +
>>>    static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args,
>>>                                 Error **errp)
>>>    {
>>> @@ -1715,8 +1745,17 @@ static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args,
>>>        exp->allocation_depth = arg->allocation_depth;
>>> +    /*
>>> +     * We need to inhibit request queuing in the block layer to ensure we can
>>> +     * be properly quiesced when entering a drained section, as our coroutines
>>> +     * servicing pending requests might enter blk_pread().
>>> +     */
>>
>> Not very understandable to me :(. What's bad in queuing requests at blk layer during drained section?
> 
> We need to make sure that all coroutines in the NBD server have
> finished (client->nb_requests == 0) before detaching from the
> AioContext. If we don't inhibit request queuing, some coroutines may
> get stuck in blk_pread()->...->blk_wait_while_drained(), causing
> nbd_drained_poll() to always return that we're busy.

Ah, OK. Thanks for explanation.

> 
>>> +    blk_set_disable_request_queuing(blk, true);
>>> +
>>>        blk_add_aio_context_notifier(blk, blk_aio_attached, blk_aio_detach, exp);
>>> +    blk_set_dev_ops(blk, &nbd_block_ops, exp);
>>> +
>>>        QTAILQ_INSERT_TAIL(&exports, exp, next);
>>>        return 0;
>>> @@ -1788,6 +1827,7 @@ static void nbd_export_delete(BlockExport *blk_exp)
>>>            }
>>>            blk_remove_aio_context_notifier(exp->common.blk, blk_aio_attached,
>>>                                            blk_aio_detach, exp);
>>> +        blk_set_disable_request_queuing(exp->common.blk, false);
>>>        }
>>>        for (i = 0; i < exp->nr_export_bitmaps; i++) {
>>>
>>
>> I don't follow the whole logic of clients quiescing, but overall looks good to me. As I understand, prior to patch we have a kind of drain_begin realization in blk_aio_detach handler, and after patch we instead utilize generic code of drained sections with help of appropriate devops handlers.
> 
> Doing that in blk_aio_detach() was a bit too late since we were
> already in the drained section, and as stated above, we need it alive
> to ensure that all of the NBD server coroutines finish properly.
> 
> Also, .drained_poll allows us to align quiescing the NBD server with
> the block layer.
> 
> Thanks,
> Sergio.
> 
>> weak:
>>
>> Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
>>
>>
>> -- 
>> Best regards,
>> Vladimir
>>


-- 
Best regards,
Vladimir


  reply	other threads:[~2021-06-02 12:50 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-06-02  6:05 [PATCH v2 0/2] nbd/server: Quiesce server on drained section Sergio Lopez
2021-06-02  6:05 ` [PATCH v2 1/2] block-backend: add drained_poll Sergio Lopez
2021-06-02  8:34   ` Vladimir Sementsov-Ogievskiy
2021-06-02  6:05 ` [PATCH v2 2/2] nbd/server: Use drained block ops to quiesce the server Sergio Lopez
2021-06-02 12:06   ` Vladimir Sementsov-Ogievskiy
2021-06-02 12:44     ` Sergio Lopez
2021-06-02 12:48       ` Vladimir Sementsov-Ogievskiy [this message]
2021-06-02 11:57 ` [PATCH v2 0/2] nbd/server: Quiesce server on drained section Kevin Wolf

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=f251a366-fd4b-d39c-704d-96b6edfa743a@virtuozzo.com \
    --to=vsementsov@virtuozzo.com \
    --cc=eblake@redhat.com \
    --cc=kwolf@redhat.com \
    --cc=mreitz@redhat.com \
    --cc=nsoffer@redhat.com \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=slp@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).