* [PATCH 1/4] scsi: only access SCSIDevice->requests from one thread
2023-11-23 19:49 [PATCH 0/4] scsi: eliminate AioContext lock Stefan Hajnoczi
@ 2023-11-23 19:49 ` Stefan Hajnoczi
2023-11-27 15:14 ` Eric Blake
2023-12-01 16:03 ` Kevin Wolf
2023-11-23 19:49 ` [PATCH 2/4] virtio-scsi: don't lock AioContext around virtio_queue_aio_attach_host_notifier() Stefan Hajnoczi
` (3 subsequent siblings)
4 siblings, 2 replies; 18+ messages in thread
From: Stefan Hajnoczi @ 2023-11-23 19:49 UTC (permalink / raw)
To: qemu-devel
Cc: Michael S. Tsirkin, David Hildenbrand,
Philippe Mathieu-Daudé, Peter Xu, Paolo Bonzini, Fam Zheng,
Stefan Hajnoczi
Stop depending on the AioContext lock and instead access
SCSIDevice->requests from only one thread at a time:
- When the VM is running only the BlockBackend's AioContext may access
the requests list.
- When the VM is stopped only the main loop may access the requests
list.
These constraints protect the requests list without the need for locking
in the I/O code path.
Note that multiple IOThreads are not supported yet because the code
assumes all SCSIRequests are executed from a single AioContext. Leave
that as future work.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
include/hw/scsi/scsi.h | 7 +-
hw/scsi/scsi-bus.c | 174 ++++++++++++++++++++++++++++-------------
2 files changed, 124 insertions(+), 57 deletions(-)
diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h
index 3692ca82f3..10c4e8288d 100644
--- a/include/hw/scsi/scsi.h
+++ b/include/hw/scsi/scsi.h
@@ -69,14 +69,19 @@ struct SCSIDevice
{
DeviceState qdev;
VMChangeStateEntry *vmsentry;
- QEMUBH *bh;
uint32_t id;
BlockConf conf;
SCSISense unit_attention;
bool sense_is_ua;
uint8_t sense[SCSI_SENSE_BUF_SIZE];
uint32_t sense_len;
+
+ /*
+ * The requests list is only accessed from the AioContext that executes
+ * requests or from the main loop when IOThread processing is stopped.
+ */
QTAILQ_HEAD(, SCSIRequest) requests;
+
uint32_t channel;
uint32_t lun;
int blocksize;
diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
index fc4b77fdb0..b8bfde9565 100644
--- a/hw/scsi/scsi-bus.c
+++ b/hw/scsi/scsi-bus.c
@@ -85,6 +85,82 @@ SCSIDevice *scsi_device_get(SCSIBus *bus, int channel, int id, int lun)
return d;
}
+/*
+ * Invoke @fn() for each enqueued request in device @s. Must be called from the
+ * main loop thread while the guest is stopped. This is only suitable for
+ * vmstate ->put(), use scsi_device_for_each_req_async() for other cases.
+ */
+static void scsi_device_for_each_req_sync(SCSIDevice *s,
+ void (*fn)(SCSIRequest *, void *),
+ void *opaque)
+{
+ SCSIRequest *req;
+ SCSIRequest *next_req;
+
+ assert(!runstate_is_running());
+ assert(qemu_in_main_thread());
+
+ QTAILQ_FOREACH_SAFE(req, &s->requests, next, next_req) {
+ fn(req, opaque);
+ }
+}
+
+typedef struct {
+ SCSIDevice *s;
+ void (*fn)(SCSIRequest *, void *);
+ void *fn_opaque;
+} SCSIDeviceForEachReqAsyncData;
+
+static void scsi_device_for_each_req_async_bh(void *opaque)
+{
+ g_autofree SCSIDeviceForEachReqAsyncData *data = opaque;
+ SCSIDevice *s = data->s;
+ SCSIRequest *req;
+ SCSIRequest *next;
+
+ /*
+ * It is unlikely that the AioContext will change before this BH is called,
+ * but if it happens then ->requests must not be accessed from this
+ * AioContext.
+ */
+ if (blk_get_aio_context(s->conf.blk) == qemu_get_current_aio_context()) {
+ QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) {
+ data->fn(req, data->fn_opaque);
+ }
+ }
+
+ /* Drop the reference taken by scsi_device_for_each_req_async() */
+ object_unref(OBJECT(s));
+}
+
+/*
+ * Schedule @fn() to be invoked for each enqueued request in device @s. @fn()
+ * runs in the AioContext that is executing the request.
+ */
+static void scsi_device_for_each_req_async(SCSIDevice *s,
+ void (*fn)(SCSIRequest *, void *),
+ void *opaque)
+{
+ assert(qemu_in_main_thread());
+
+ SCSIDeviceForEachReqAsyncData *data =
+ g_new(SCSIDeviceForEachReqAsyncData, 1);
+
+ data->s = s;
+ data->fn = fn;
+ data->fn_opaque = opaque;
+
+ /*
+ * Hold a reference to the SCSIDevice until
+ * scsi_device_for_each_req_async_bh() finishes.
+ */
+ object_ref(OBJECT(s));
+
+ aio_bh_schedule_oneshot(blk_get_aio_context(s->conf.blk),
+ scsi_device_for_each_req_async_bh,
+ data);
+}
+
static void scsi_device_realize(SCSIDevice *s, Error **errp)
{
SCSIDeviceClass *sc = SCSI_DEVICE_GET_CLASS(s);
@@ -144,20 +220,18 @@ void scsi_bus_init_named(SCSIBus *bus, size_t bus_size, DeviceState *host,
qbus_set_bus_hotplug_handler(BUS(bus));
}
-static void scsi_dma_restart_bh(void *opaque)
+void scsi_req_retry(SCSIRequest *req)
{
- SCSIDevice *s = opaque;
- SCSIRequest *req, *next;
+ req->retry = true;
+}
- qemu_bh_delete(s->bh);
- s->bh = NULL;
-
- aio_context_acquire(blk_get_aio_context(s->conf.blk));
- QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) {
- scsi_req_ref(req);
- if (req->retry) {
- req->retry = false;
- switch (req->cmd.mode) {
+/* Called in the AioContext that is executing the request */
+static void scsi_dma_restart_req(SCSIRequest *req, void *opaque)
+{
+ scsi_req_ref(req);
+ if (req->retry) {
+ req->retry = false;
+ switch (req->cmd.mode) {
case SCSI_XFER_FROM_DEV:
case SCSI_XFER_TO_DEV:
scsi_req_continue(req);
@@ -166,37 +240,22 @@ static void scsi_dma_restart_bh(void *opaque)
scsi_req_dequeue(req);
scsi_req_enqueue(req);
break;
- }
}
- scsi_req_unref(req);
}
- aio_context_release(blk_get_aio_context(s->conf.blk));
- /* Drop the reference that was acquired in scsi_dma_restart_cb */
- object_unref(OBJECT(s));
-}
-
-void scsi_req_retry(SCSIRequest *req)
-{
- /* No need to save a reference, because scsi_dma_restart_bh just
- * looks at the request list. */
- req->retry = true;
+ scsi_req_unref(req);
}
static void scsi_dma_restart_cb(void *opaque, bool running, RunState state)
{
SCSIDevice *s = opaque;
+ assert(qemu_in_main_thread());
+
if (!running) {
return;
}
- if (!s->bh) {
- AioContext *ctx = blk_get_aio_context(s->conf.blk);
- /* The reference is dropped in scsi_dma_restart_bh.*/
- object_ref(OBJECT(s));
- s->bh = aio_bh_new_guarded(ctx, scsi_dma_restart_bh, s,
- &DEVICE(s)->mem_reentrancy_guard);
- qemu_bh_schedule(s->bh);
- }
+
+ scsi_device_for_each_req_async(s, scsi_dma_restart_req, NULL);
}
static bool scsi_bus_is_address_free(SCSIBus *bus,
@@ -1657,15 +1716,16 @@ void scsi_device_set_ua(SCSIDevice *sdev, SCSISense sense)
}
}
+static void scsi_device_purge_one_req(SCSIRequest *req, void *opaque)
+{
+ scsi_req_cancel_async(req, NULL);
+}
+
void scsi_device_purge_requests(SCSIDevice *sdev, SCSISense sense)
{
- SCSIRequest *req;
+ scsi_device_for_each_req_async(sdev, scsi_device_purge_one_req, NULL);
aio_context_acquire(blk_get_aio_context(sdev->conf.blk));
- while (!QTAILQ_EMPTY(&sdev->requests)) {
- req = QTAILQ_FIRST(&sdev->requests);
- scsi_req_cancel_async(req, NULL);
- }
blk_drain(sdev->conf.blk);
aio_context_release(blk_get_aio_context(sdev->conf.blk));
scsi_device_set_ua(sdev, sense);
@@ -1737,31 +1797,33 @@ static char *scsibus_get_fw_dev_path(DeviceState *dev)
/* SCSI request list. For simplicity, pv points to the whole device */
+static void put_scsi_req(SCSIRequest *req, void *opaque)
+{
+ QEMUFile *f = opaque;
+
+ assert(!req->io_canceled);
+ assert(req->status == -1 && req->host_status == -1);
+ assert(req->enqueued);
+
+ qemu_put_sbyte(f, req->retry ? 1 : 2);
+ qemu_put_buffer(f, req->cmd.buf, sizeof(req->cmd.buf));
+ qemu_put_be32s(f, &req->tag);
+ qemu_put_be32s(f, &req->lun);
+ if (req->bus->info->save_request) {
+ req->bus->info->save_request(f, req);
+ }
+ if (req->ops->save_request) {
+ req->ops->save_request(f, req);
+ }
+}
+
static int put_scsi_requests(QEMUFile *f, void *pv, size_t size,
const VMStateField *field, JSONWriter *vmdesc)
{
SCSIDevice *s = pv;
- SCSIBus *bus = DO_UPCAST(SCSIBus, qbus, s->qdev.parent_bus);
- SCSIRequest *req;
- QTAILQ_FOREACH(req, &s->requests, next) {
- assert(!req->io_canceled);
- assert(req->status == -1 && req->host_status == -1);
- assert(req->enqueued);
-
- qemu_put_sbyte(f, req->retry ? 1 : 2);
- qemu_put_buffer(f, req->cmd.buf, sizeof(req->cmd.buf));
- qemu_put_be32s(f, &req->tag);
- qemu_put_be32s(f, &req->lun);
- if (bus->info->save_request) {
- bus->info->save_request(f, req);
- }
- if (req->ops->save_request) {
- req->ops->save_request(f, req);
- }
- }
+ scsi_device_for_each_req_sync(s, put_scsi_req, f);
qemu_put_sbyte(f, 0);
-
return 0;
}
--
2.42.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* Re: [PATCH 1/4] scsi: only access SCSIDevice->requests from one thread
2023-11-23 19:49 ` [PATCH 1/4] scsi: only access SCSIDevice->requests from one thread Stefan Hajnoczi
@ 2023-11-27 15:14 ` Eric Blake
2023-12-01 16:03 ` Kevin Wolf
1 sibling, 0 replies; 18+ messages in thread
From: Eric Blake @ 2023-11-27 15:14 UTC (permalink / raw)
To: Stefan Hajnoczi
Cc: qemu-devel, Michael S. Tsirkin, David Hildenbrand,
Philippe Mathieu-Daudé, Peter Xu, Paolo Bonzini, Fam Zheng
On Thu, Nov 23, 2023 at 02:49:28PM -0500, Stefan Hajnoczi wrote:
> Stop depending on the AioContext lock and instead access
> SCSIDevice->requests from only one thread at a time:
> - When the VM is running only the BlockBackend's AioContext may access
> the requests list.
> - When the VM is stopped only the main loop may access the requests
> list.
>
> These constraints protect the requests list without the need for locking
> in the I/O code path.
>
> Note that multiple IOThreads are not supported yet because the code
> assumes all SCSIRequests are executed from a single AioContext. Leave
> that as future work.
>
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> ---
> include/hw/scsi/scsi.h | 7 +-
> hw/scsi/scsi-bus.c | 174 ++++++++++++++++++++++++++++-------------
> 2 files changed, 124 insertions(+), 57 deletions(-)
>
Reviewed-by: Eric Blake <eblake@redhat.com>
--
Eric Blake, Principal Software Engineer
Red Hat, Inc.
Virtualization: qemu.org | libguestfs.org
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 1/4] scsi: only access SCSIDevice->requests from one thread
2023-11-23 19:49 ` [PATCH 1/4] scsi: only access SCSIDevice->requests from one thread Stefan Hajnoczi
2023-11-27 15:14 ` Eric Blake
@ 2023-12-01 16:03 ` Kevin Wolf
2023-12-04 16:30 ` Stefan Hajnoczi
1 sibling, 1 reply; 18+ messages in thread
From: Kevin Wolf @ 2023-12-01 16:03 UTC (permalink / raw)
To: Stefan Hajnoczi
Cc: qemu-devel, Michael S. Tsirkin, David Hildenbrand,
Philippe Mathieu-Daudé, Peter Xu, Paolo Bonzini, Fam Zheng
Am 23.11.2023 um 20:49 hat Stefan Hajnoczi geschrieben:
> Stop depending on the AioContext lock and instead access
> SCSIDevice->requests from only one thread at a time:
> - When the VM is running only the BlockBackend's AioContext may access
> the requests list.
> - When the VM is stopped only the main loop may access the requests
> list.
>
> These constraints protect the requests list without the need for locking
> in the I/O code path.
>
> Note that multiple IOThreads are not supported yet because the code
> assumes all SCSIRequests are executed from a single AioContext. Leave
> that as future work.
>
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> ---
> include/hw/scsi/scsi.h | 7 +-
> hw/scsi/scsi-bus.c | 174 ++++++++++++++++++++++++++++-------------
> 2 files changed, 124 insertions(+), 57 deletions(-)
>
> diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h
> index 3692ca82f3..10c4e8288d 100644
> --- a/include/hw/scsi/scsi.h
> +++ b/include/hw/scsi/scsi.h
> @@ -69,14 +69,19 @@ struct SCSIDevice
> {
> DeviceState qdev;
> VMChangeStateEntry *vmsentry;
> - QEMUBH *bh;
> uint32_t id;
> BlockConf conf;
> SCSISense unit_attention;
> bool sense_is_ua;
> uint8_t sense[SCSI_SENSE_BUF_SIZE];
> uint32_t sense_len;
> +
> + /*
> + * The requests list is only accessed from the AioContext that executes
> + * requests or from the main loop when IOThread processing is stopped.
> + */
> QTAILQ_HEAD(, SCSIRequest) requests;
> +
> uint32_t channel;
> uint32_t lun;
> int blocksize;
> diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
> index fc4b77fdb0..b8bfde9565 100644
> --- a/hw/scsi/scsi-bus.c
> +++ b/hw/scsi/scsi-bus.c
> @@ -85,6 +85,82 @@ SCSIDevice *scsi_device_get(SCSIBus *bus, int channel, int id, int lun)
> return d;
> }
>
> +/*
> + * Invoke @fn() for each enqueued request in device @s. Must be called from the
> + * main loop thread while the guest is stopped. This is only suitable for
> + * vmstate ->put(), use scsi_device_for_each_req_async() for other cases.
> + */
> +static void scsi_device_for_each_req_sync(SCSIDevice *s,
> + void (*fn)(SCSIRequest *, void *),
> + void *opaque)
> +{
> + SCSIRequest *req;
> + SCSIRequest *next_req;
> +
> + assert(!runstate_is_running());
> + assert(qemu_in_main_thread());
> +
> + QTAILQ_FOREACH_SAFE(req, &s->requests, next, next_req) {
> + fn(req, opaque);
> + }
> +}
> +
> +typedef struct {
> + SCSIDevice *s;
> + void (*fn)(SCSIRequest *, void *);
> + void *fn_opaque;
> +} SCSIDeviceForEachReqAsyncData;
> +
> +static void scsi_device_for_each_req_async_bh(void *opaque)
> +{
> + g_autofree SCSIDeviceForEachReqAsyncData *data = opaque;
> + SCSIDevice *s = data->s;
> + SCSIRequest *req;
> + SCSIRequest *next;
> +
> + /*
> + * It is unlikely that the AioContext will change before this BH is called,
> + * but if it happens then ->requests must not be accessed from this
> + * AioContext.
> + */
What is the scenario where this happens? I would have expected that
switching the AioContext of a node involves draining the node first,
which would execute this BH before the context changes.
The other option I see is an empty BlockBackend, which can change its
AioContext without polling BHs, but in that case there is no connection
to other users, so the only change could come from virtio-scsi itself.
If there is such a case, it would probably be helpful to be specific in
the comment.
> + if (blk_get_aio_context(s->conf.blk) == qemu_get_current_aio_context()) {
> + QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) {
> + data->fn(req, data->fn_opaque);
> + }
> + }
Of course, if the situation does happen, the question is why just doing
nothing is correct. Wouldn't that mean that the guest still sees stuck
requests?
Would rescheduling the BH in the new context be better?
> +
> + /* Drop the reference taken by scsi_device_for_each_req_async() */
> + object_unref(OBJECT(s));
> +}
> +
> +/*
> + * Schedule @fn() to be invoked for each enqueued request in device @s. @fn()
> + * runs in the AioContext that is executing the request.
> + */
> +static void scsi_device_for_each_req_async(SCSIDevice *s,
> + void (*fn)(SCSIRequest *, void *),
> + void *opaque)
If we keep the behaviour above (doesn't do anything if the AioContext
changes), then I think it needs to be documented for this function and
callers should be explicit about why it's okay.
Kevin
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 1/4] scsi: only access SCSIDevice->requests from one thread
2023-12-01 16:03 ` Kevin Wolf
@ 2023-12-04 16:30 ` Stefan Hajnoczi
2023-12-05 10:00 ` Kevin Wolf
0 siblings, 1 reply; 18+ messages in thread
From: Stefan Hajnoczi @ 2023-12-04 16:30 UTC (permalink / raw)
To: Kevin Wolf
Cc: qemu-devel, Michael S. Tsirkin, David Hildenbrand,
Philippe Mathieu-Daudé, Peter Xu, Paolo Bonzini, Fam Zheng
[-- Attachment #1: Type: text/plain, Size: 5557 bytes --]
On Fri, Dec 01, 2023 at 05:03:13PM +0100, Kevin Wolf wrote:
> Am 23.11.2023 um 20:49 hat Stefan Hajnoczi geschrieben:
> > Stop depending on the AioContext lock and instead access
> > SCSIDevice->requests from only one thread at a time:
> > - When the VM is running only the BlockBackend's AioContext may access
> > the requests list.
> > - When the VM is stopped only the main loop may access the requests
> > list.
> >
> > These constraints protect the requests list without the need for locking
> > in the I/O code path.
> >
> > Note that multiple IOThreads are not supported yet because the code
> > assumes all SCSIRequests are executed from a single AioContext. Leave
> > that as future work.
> >
> > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> > ---
> > include/hw/scsi/scsi.h | 7 +-
> > hw/scsi/scsi-bus.c | 174 ++++++++++++++++++++++++++++-------------
> > 2 files changed, 124 insertions(+), 57 deletions(-)
> >
> > diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h
> > index 3692ca82f3..10c4e8288d 100644
> > --- a/include/hw/scsi/scsi.h
> > +++ b/include/hw/scsi/scsi.h
> > @@ -69,14 +69,19 @@ struct SCSIDevice
> > {
> > DeviceState qdev;
> > VMChangeStateEntry *vmsentry;
> > - QEMUBH *bh;
> > uint32_t id;
> > BlockConf conf;
> > SCSISense unit_attention;
> > bool sense_is_ua;
> > uint8_t sense[SCSI_SENSE_BUF_SIZE];
> > uint32_t sense_len;
> > +
> > + /*
> > + * The requests list is only accessed from the AioContext that executes
> > + * requests or from the main loop when IOThread processing is stopped.
> > + */
> > QTAILQ_HEAD(, SCSIRequest) requests;
> > +
> > uint32_t channel;
> > uint32_t lun;
> > int blocksize;
> > diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
> > index fc4b77fdb0..b8bfde9565 100644
> > --- a/hw/scsi/scsi-bus.c
> > +++ b/hw/scsi/scsi-bus.c
> > @@ -85,6 +85,82 @@ SCSIDevice *scsi_device_get(SCSIBus *bus, int channel, int id, int lun)
> > return d;
> > }
> >
> > +/*
> > + * Invoke @fn() for each enqueued request in device @s. Must be called from the
> > + * main loop thread while the guest is stopped. This is only suitable for
> > + * vmstate ->put(), use scsi_device_for_each_req_async() for other cases.
> > + */
> > +static void scsi_device_for_each_req_sync(SCSIDevice *s,
> > + void (*fn)(SCSIRequest *, void *),
> > + void *opaque)
> > +{
> > + SCSIRequest *req;
> > + SCSIRequest *next_req;
> > +
> > + assert(!runstate_is_running());
> > + assert(qemu_in_main_thread());
> > +
> > + QTAILQ_FOREACH_SAFE(req, &s->requests, next, next_req) {
> > + fn(req, opaque);
> > + }
> > +}
> > +
> > +typedef struct {
> > + SCSIDevice *s;
> > + void (*fn)(SCSIRequest *, void *);
> > + void *fn_opaque;
> > +} SCSIDeviceForEachReqAsyncData;
> > +
> > +static void scsi_device_for_each_req_async_bh(void *opaque)
> > +{
> > + g_autofree SCSIDeviceForEachReqAsyncData *data = opaque;
> > + SCSIDevice *s = data->s;
> > + SCSIRequest *req;
> > + SCSIRequest *next;
> > +
> > + /*
> > + * It is unlikely that the AioContext will change before this BH is called,
> > + * but if it happens then ->requests must not be accessed from this
> > + * AioContext.
> > + */
>
> What is the scenario where this happens? I would have expected that
> switching the AioContext of a node involves draining the node first,
> which would execute this BH before the context changes.
I don't think aio_poll() is invoked by bdrv_drained_begin() when there
are no requests in flight. In that case the BH could remain pending
across bdrv_drained_begin()/bdrv_drained_end().
> The other option I see is an empty BlockBackend, which can change its
> AioContext without polling BHs, but in that case there is no connection
> to other users, so the only change could come from virtio-scsi itself.
> If there is such a case, it would probably be helpful to be specific in
> the comment.
>
> > + if (blk_get_aio_context(s->conf.blk) == qemu_get_current_aio_context()) {
> > + QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) {
> > + data->fn(req, data->fn_opaque);
> > + }
> > + }
>
> Of course, if the situation does happen, the question is why just doing
> nothing is correct. Wouldn't that mean that the guest still sees stuck
> requests?
>
> Would rescheduling the BH in the new context be better?
In the case where there are no requests it is correct to do nothing, but
it's not a general solution.
> > +
> > + /* Drop the reference taken by scsi_device_for_each_req_async() */
> > + object_unref(OBJECT(s));
> > +}
> > +
> > +/*
> > + * Schedule @fn() to be invoked for each enqueued request in device @s. @fn()
> > + * runs in the AioContext that is executing the request.
> > + */
> > +static void scsi_device_for_each_req_async(SCSIDevice *s,
> > + void (*fn)(SCSIRequest *, void *),
> > + void *opaque)
>
> If we keep the behaviour above (doesn't do anything if the AioContext
> changes), then I think it needs to be documented for this function and
> callers should be explicit about why it's okay.
I think your suggestion to reschedule in the new AioContext is best.
Stefan
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 1/4] scsi: only access SCSIDevice->requests from one thread
2023-12-04 16:30 ` Stefan Hajnoczi
@ 2023-12-05 10:00 ` Kevin Wolf
2023-12-06 16:25 ` Stefan Hajnoczi
0 siblings, 1 reply; 18+ messages in thread
From: Kevin Wolf @ 2023-12-05 10:00 UTC (permalink / raw)
To: Stefan Hajnoczi
Cc: qemu-devel, Michael S. Tsirkin, David Hildenbrand,
Philippe Mathieu-Daudé, Peter Xu, Paolo Bonzini, Fam Zheng
[-- Attachment #1: Type: text/plain, Size: 6531 bytes --]
Am 04.12.2023 um 17:30 hat Stefan Hajnoczi geschrieben:
> On Fri, Dec 01, 2023 at 05:03:13PM +0100, Kevin Wolf wrote:
> > Am 23.11.2023 um 20:49 hat Stefan Hajnoczi geschrieben:
> > > Stop depending on the AioContext lock and instead access
> > > SCSIDevice->requests from only one thread at a time:
> > > - When the VM is running only the BlockBackend's AioContext may access
> > > the requests list.
> > > - When the VM is stopped only the main loop may access the requests
> > > list.
> > >
> > > These constraints protect the requests list without the need for locking
> > > in the I/O code path.
> > >
> > > Note that multiple IOThreads are not supported yet because the code
> > > assumes all SCSIRequests are executed from a single AioContext. Leave
> > > that as future work.
> > >
> > > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> > > ---
> > > include/hw/scsi/scsi.h | 7 +-
> > > hw/scsi/scsi-bus.c | 174 ++++++++++++++++++++++++++++-------------
> > > 2 files changed, 124 insertions(+), 57 deletions(-)
> > >
> > > diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h
> > > index 3692ca82f3..10c4e8288d 100644
> > > --- a/include/hw/scsi/scsi.h
> > > +++ b/include/hw/scsi/scsi.h
> > > @@ -69,14 +69,19 @@ struct SCSIDevice
> > > {
> > > DeviceState qdev;
> > > VMChangeStateEntry *vmsentry;
> > > - QEMUBH *bh;
> > > uint32_t id;
> > > BlockConf conf;
> > > SCSISense unit_attention;
> > > bool sense_is_ua;
> > > uint8_t sense[SCSI_SENSE_BUF_SIZE];
> > > uint32_t sense_len;
> > > +
> > > + /*
> > > + * The requests list is only accessed from the AioContext that executes
> > > + * requests or from the main loop when IOThread processing is stopped.
> > > + */
> > > QTAILQ_HEAD(, SCSIRequest) requests;
> > > +
> > > uint32_t channel;
> > > uint32_t lun;
> > > int blocksize;
> > > diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
> > > index fc4b77fdb0..b8bfde9565 100644
> > > --- a/hw/scsi/scsi-bus.c
> > > +++ b/hw/scsi/scsi-bus.c
> > > @@ -85,6 +85,82 @@ SCSIDevice *scsi_device_get(SCSIBus *bus, int channel, int id, int lun)
> > > return d;
> > > }
> > >
> > > +/*
> > > + * Invoke @fn() for each enqueued request in device @s. Must be called from the
> > > + * main loop thread while the guest is stopped. This is only suitable for
> > > + * vmstate ->put(), use scsi_device_for_each_req_async() for other cases.
> > > + */
> > > +static void scsi_device_for_each_req_sync(SCSIDevice *s,
> > > + void (*fn)(SCSIRequest *, void *),
> > > + void *opaque)
> > > +{
> > > + SCSIRequest *req;
> > > + SCSIRequest *next_req;
> > > +
> > > + assert(!runstate_is_running());
> > > + assert(qemu_in_main_thread());
> > > +
> > > + QTAILQ_FOREACH_SAFE(req, &s->requests, next, next_req) {
> > > + fn(req, opaque);
> > > + }
> > > +}
> > > +
> > > +typedef struct {
> > > + SCSIDevice *s;
> > > + void (*fn)(SCSIRequest *, void *);
> > > + void *fn_opaque;
> > > +} SCSIDeviceForEachReqAsyncData;
> > > +
> > > +static void scsi_device_for_each_req_async_bh(void *opaque)
> > > +{
> > > + g_autofree SCSIDeviceForEachReqAsyncData *data = opaque;
> > > + SCSIDevice *s = data->s;
> > > + SCSIRequest *req;
> > > + SCSIRequest *next;
> > > +
> > > + /*
> > > + * It is unlikely that the AioContext will change before this BH is called,
> > > + * but if it happens then ->requests must not be accessed from this
> > > + * AioContext.
> > > + */
> >
> > What is the scenario where this happens? I would have expected that
> > switching the AioContext of a node involves draining the node first,
> > which would execute this BH before the context changes.
>
> I don't think aio_poll() is invoked by bdrv_drained_begin() when there
> are no requests in flight. In that case the BH could remain pending
> across bdrv_drained_begin()/bdrv_drained_end().
Hm, I wonder if that is actually a bug. Without executing pending BHs,
you can't be sure that nothing touches the node any more.
Before commit 5e8ac217, we always polled at least once, though I think
that was an unintentional side effect.
> > The other option I see is an empty BlockBackend, which can change its
> > AioContext without polling BHs, but in that case there is no connection
> > to other users, so the only change could come from virtio-scsi itself.
> > If there is such a case, it would probably be helpful to be specific in
> > the comment.
> >
> > > + if (blk_get_aio_context(s->conf.blk) == qemu_get_current_aio_context()) {
> > > + QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) {
> > > + data->fn(req, data->fn_opaque);
> > > + }
> > > + }
> >
> > Of course, if the situation does happen, the question is why just doing
> > nothing is correct. Wouldn't that mean that the guest still sees stuck
> > requests?
> >
> > Would rescheduling the BH in the new context be better?
>
> In the case where there are no requests it is correct to do nothing,
> but it's not a general solution.
Why is it correct when there are no requests? I can see this for
scsi_device_purge_requests() because it only cancels in-flight requests,
but scsi_dma_restart_cb() is about requests queued at the device level
that are not in flight in the block layer. Not restarting them if there
aren't any other requests in flight seems wrong.
> > > +
> > > + /* Drop the reference taken by scsi_device_for_each_req_async() */
> > > + object_unref(OBJECT(s));
> > > +}
> > > +
> > > +/*
> > > + * Schedule @fn() to be invoked for each enqueued request in device @s. @fn()
> > > + * runs in the AioContext that is executing the request.
> > > + */
> > > +static void scsi_device_for_each_req_async(SCSIDevice *s,
> > > + void (*fn)(SCSIRequest *, void *),
> > > + void *opaque)
> >
> > If we keep the behaviour above (doesn't do anything if the AioContext
> > changes), then I think it needs to be documented for this function and
> > callers should be explicit about why it's okay.
>
> I think your suggestion to reschedule in the new AioContext is best.
Ok, then the answer for the above is less important.
Kevin
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 1/4] scsi: only access SCSIDevice->requests from one thread
2023-12-05 10:00 ` Kevin Wolf
@ 2023-12-06 16:25 ` Stefan Hajnoczi
0 siblings, 0 replies; 18+ messages in thread
From: Stefan Hajnoczi @ 2023-12-06 16:25 UTC (permalink / raw)
To: Kevin Wolf
Cc: Stefan Hajnoczi, qemu-devel, Michael S. Tsirkin,
David Hildenbrand, Philippe Mathieu-Daudé, Peter Xu,
Paolo Bonzini, Fam Zheng
On Tue, 5 Dec 2023 at 05:01, Kevin Wolf <kwolf@redhat.com> wrote:
>
> Am 04.12.2023 um 17:30 hat Stefan Hajnoczi geschrieben:
> > On Fri, Dec 01, 2023 at 05:03:13PM +0100, Kevin Wolf wrote:
> > > Am 23.11.2023 um 20:49 hat Stefan Hajnoczi geschrieben:
> > > > Stop depending on the AioContext lock and instead access
> > > > SCSIDevice->requests from only one thread at a time:
> > > > - When the VM is running only the BlockBackend's AioContext may access
> > > > the requests list.
> > > > - When the VM is stopped only the main loop may access the requests
> > > > list.
> > > >
> > > > These constraints protect the requests list without the need for locking
> > > > in the I/O code path.
> > > >
> > > > Note that multiple IOThreads are not supported yet because the code
> > > > assumes all SCSIRequests are executed from a single AioContext. Leave
> > > > that as future work.
> > > >
> > > > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> > > > ---
> > > > include/hw/scsi/scsi.h | 7 +-
> > > > hw/scsi/scsi-bus.c | 174 ++++++++++++++++++++++++++++-------------
> > > > 2 files changed, 124 insertions(+), 57 deletions(-)
> > > >
> > > > diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h
> > > > index 3692ca82f3..10c4e8288d 100644
> > > > --- a/include/hw/scsi/scsi.h
> > > > +++ b/include/hw/scsi/scsi.h
> > > > @@ -69,14 +69,19 @@ struct SCSIDevice
> > > > {
> > > > DeviceState qdev;
> > > > VMChangeStateEntry *vmsentry;
> > > > - QEMUBH *bh;
> > > > uint32_t id;
> > > > BlockConf conf;
> > > > SCSISense unit_attention;
> > > > bool sense_is_ua;
> > > > uint8_t sense[SCSI_SENSE_BUF_SIZE];
> > > > uint32_t sense_len;
> > > > +
> > > > + /*
> > > > + * The requests list is only accessed from the AioContext that executes
> > > > + * requests or from the main loop when IOThread processing is stopped.
> > > > + */
> > > > QTAILQ_HEAD(, SCSIRequest) requests;
> > > > +
> > > > uint32_t channel;
> > > > uint32_t lun;
> > > > int blocksize;
> > > > diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
> > > > index fc4b77fdb0..b8bfde9565 100644
> > > > --- a/hw/scsi/scsi-bus.c
> > > > +++ b/hw/scsi/scsi-bus.c
> > > > @@ -85,6 +85,82 @@ SCSIDevice *scsi_device_get(SCSIBus *bus, int channel, int id, int lun)
> > > > return d;
> > > > }
> > > >
> > > > +/*
> > > > + * Invoke @fn() for each enqueued request in device @s. Must be called from the
> > > > + * main loop thread while the guest is stopped. This is only suitable for
> > > > + * vmstate ->put(), use scsi_device_for_each_req_async() for other cases.
> > > > + */
> > > > +static void scsi_device_for_each_req_sync(SCSIDevice *s,
> > > > + void (*fn)(SCSIRequest *, void *),
> > > > + void *opaque)
> > > > +{
> > > > + SCSIRequest *req;
> > > > + SCSIRequest *next_req;
> > > > +
> > > > + assert(!runstate_is_running());
> > > > + assert(qemu_in_main_thread());
> > > > +
> > > > + QTAILQ_FOREACH_SAFE(req, &s->requests, next, next_req) {
> > > > + fn(req, opaque);
> > > > + }
> > > > +}
> > > > +
> > > > +typedef struct {
> > > > + SCSIDevice *s;
> > > > + void (*fn)(SCSIRequest *, void *);
> > > > + void *fn_opaque;
> > > > +} SCSIDeviceForEachReqAsyncData;
> > > > +
> > > > +static void scsi_device_for_each_req_async_bh(void *opaque)
> > > > +{
> > > > + g_autofree SCSIDeviceForEachReqAsyncData *data = opaque;
> > > > + SCSIDevice *s = data->s;
> > > > + SCSIRequest *req;
> > > > + SCSIRequest *next;
> > > > +
> > > > + /*
> > > > + * It is unlikely that the AioContext will change before this BH is called,
> > > > + * but if it happens then ->requests must not be accessed from this
> > > > + * AioContext.
> > > > + */
> > >
> > > What is the scenario where this happens? I would have expected that
> > > switching the AioContext of a node involves draining the node first,
> > > which would execute this BH before the context changes.
> >
> > I don't think aio_poll() is invoked by bdrv_drained_begin() when there
> > are no requests in flight. In that case the BH could remain pending
> > across bdrv_drained_begin()/bdrv_drained_end().
>
> Hm, I wonder if that is actually a bug. Without executing pending BHs,
> you can't be sure that nothing touches the node any more.
>
> Before commit 5e8ac217, we always polled at least once, though I think
> that was an unintentional side effect.
It makes the programming model easier and safer if aio_bh_poll() is
guaranteed to be called by bdrv_drained_begin().
Then I could convert this conditional into an assertion and assume it
never happens.
> > > The other option I see is an empty BlockBackend, which can change its
> > > AioContext without polling BHs, but in that case there is no connection
> > > to other users, so the only change could come from virtio-scsi itself.
> > > If there is such a case, it would probably be helpful to be specific in
> > > the comment.
> > >
> > > > + if (blk_get_aio_context(s->conf.blk) == qemu_get_current_aio_context()) {
> > > > + QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) {
> > > > + data->fn(req, data->fn_opaque);
> > > > + }
> > > > + }
> > >
> > > Of course, if the situation does happen, the question is why just doing
> > > nothing is correct. Wouldn't that mean that the guest still sees stuck
> > > requests?
> > >
> > > Would rescheduling the BH in the new context be better?
> >
> > In the case where there are no requests it is correct to do nothing,
> > but it's not a general solution.
>
> Why is it correct when there are no requests? I can see this for
> scsi_device_purge_requests() because it only cancels in-flight requests,
> but scsi_dma_restart_cb() is about requests queued at the device level
> that are not in flight in the block layer. Not restarting them if there
> aren't any other requests in flight seems wrong.
You're right!
> > > > +
> > > > + /* Drop the reference taken by scsi_device_for_each_req_async() */
> > > > + object_unref(OBJECT(s));
> > > > +}
> > > > +
> > > > +/*
> > > > + * Schedule @fn() to be invoked for each enqueued request in device @s. @fn()
> > > > + * runs in the AioContext that is executing the request.
> > > > + */
> > > > +static void scsi_device_for_each_req_async(SCSIDevice *s,
> > > > + void (*fn)(SCSIRequest *, void *),
> > > > + void *opaque)
> > >
> > > If we keep the behaviour above (doesn't do anything if the AioContext
> > > changes), then I think it needs to be documented for this function and
> > > callers should be explicit about why it's okay.
> >
> > I think your suggestion to reschedule in the new AioContext is best.
>
> Ok, then the answer for the above is less important.
I already sent v2 but will send a v3 if you think bdrv_drained_begin()
should always call aio_poll()?
Stefan
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH 2/4] virtio-scsi: don't lock AioContext around virtio_queue_aio_attach_host_notifier()
2023-11-23 19:49 [PATCH 0/4] scsi: eliminate AioContext lock Stefan Hajnoczi
2023-11-23 19:49 ` [PATCH 1/4] scsi: only access SCSIDevice->requests from one thread Stefan Hajnoczi
@ 2023-11-23 19:49 ` Stefan Hajnoczi
2023-11-27 15:21 ` Eric Blake
2023-12-01 16:11 ` Kevin Wolf
2023-11-23 19:49 ` [PATCH 3/4] scsi: don't lock AioContext in I/O code path Stefan Hajnoczi
` (2 subsequent siblings)
4 siblings, 2 replies; 18+ messages in thread
From: Stefan Hajnoczi @ 2023-11-23 19:49 UTC (permalink / raw)
To: qemu-devel
Cc: Michael S. Tsirkin, David Hildenbrand,
Philippe Mathieu-Daudé, Peter Xu, Paolo Bonzini, Fam Zheng,
Stefan Hajnoczi
virtio_queue_aio_attach_host_notifier() does not require the AioContext
lock. Stop taking the lock and remember add an explicit smp_wmb()
because we were relying on the implicit barrier in the AioContext lock
before.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
hw/scsi/virtio-scsi-dataplane.c | 8 +-------
1 file changed, 1 insertion(+), 7 deletions(-)
diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c
index 1e684beebe..135e23fe54 100644
--- a/hw/scsi/virtio-scsi-dataplane.c
+++ b/hw/scsi/virtio-scsi-dataplane.c
@@ -149,23 +149,17 @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev)
memory_region_transaction_commit();
- /*
- * These fields are visible to the IOThread so we rely on implicit barriers
- * in aio_context_acquire() on the write side and aio_notify_accept() on
- * the read side.
- */
s->dataplane_starting = false;
s->dataplane_started = true;
+ smp_wmb(); /* paired with aio_notify_accept() */
if (s->bus.drain_count == 0) {
- aio_context_acquire(s->ctx);
virtio_queue_aio_attach_host_notifier(vs->ctrl_vq, s->ctx);
virtio_queue_aio_attach_host_notifier_no_poll(vs->event_vq, s->ctx);
for (i = 0; i < vs->conf.num_queues; i++) {
virtio_queue_aio_attach_host_notifier(vs->cmd_vqs[i], s->ctx);
}
- aio_context_release(s->ctx);
}
return 0;
--
2.42.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* Re: [PATCH 2/4] virtio-scsi: don't lock AioContext around virtio_queue_aio_attach_host_notifier()
2023-11-23 19:49 ` [PATCH 2/4] virtio-scsi: don't lock AioContext around virtio_queue_aio_attach_host_notifier() Stefan Hajnoczi
@ 2023-11-27 15:21 ` Eric Blake
2023-12-04 15:37 ` Stefan Hajnoczi
2023-12-01 16:11 ` Kevin Wolf
1 sibling, 1 reply; 18+ messages in thread
From: Eric Blake @ 2023-11-27 15:21 UTC (permalink / raw)
To: Stefan Hajnoczi
Cc: qemu-devel, Michael S. Tsirkin, David Hildenbrand,
Philippe Mathieu-Daudé, Peter Xu, Paolo Bonzini, Fam Zheng
On Thu, Nov 23, 2023 at 02:49:29PM -0500, Stefan Hajnoczi wrote:
> virtio_queue_aio_attach_host_notifier() does not require the AioContext
> lock. Stop taking the lock and remember add an explicit smp_wmb()
s/remember// ?
> because we were relying on the implicit barrier in the AioContext lock
> before.
>
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> ---
> hw/scsi/virtio-scsi-dataplane.c | 8 +-------
> 1 file changed, 1 insertion(+), 7 deletions(-)
Reviewed-by: Eric Blake <eblake@redhat.com>
--
Eric Blake, Principal Software Engineer
Red Hat, Inc.
Virtualization: qemu.org | libguestfs.org
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 2/4] virtio-scsi: don't lock AioContext around virtio_queue_aio_attach_host_notifier()
2023-11-27 15:21 ` Eric Blake
@ 2023-12-04 15:37 ` Stefan Hajnoczi
0 siblings, 0 replies; 18+ messages in thread
From: Stefan Hajnoczi @ 2023-12-04 15:37 UTC (permalink / raw)
To: Eric Blake
Cc: qemu-devel, Michael S. Tsirkin, David Hildenbrand,
Philippe Mathieu-Daudé, Peter Xu, Paolo Bonzini, Fam Zheng
[-- Attachment #1: Type: text/plain, Size: 330 bytes --]
On Mon, Nov 27, 2023 at 09:21:08AM -0600, Eric Blake wrote:
> On Thu, Nov 23, 2023 at 02:49:29PM -0500, Stefan Hajnoczi wrote:
> > virtio_queue_aio_attach_host_notifier() does not require the AioContext
> > lock. Stop taking the lock and remember add an explicit smp_wmb()
>
> s/remember// ?
Will fix, thanks!
Stefan
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 2/4] virtio-scsi: don't lock AioContext around virtio_queue_aio_attach_host_notifier()
2023-11-23 19:49 ` [PATCH 2/4] virtio-scsi: don't lock AioContext around virtio_queue_aio_attach_host_notifier() Stefan Hajnoczi
2023-11-27 15:21 ` Eric Blake
@ 2023-12-01 16:11 ` Kevin Wolf
1 sibling, 0 replies; 18+ messages in thread
From: Kevin Wolf @ 2023-12-01 16:11 UTC (permalink / raw)
To: Stefan Hajnoczi
Cc: qemu-devel, Michael S. Tsirkin, David Hildenbrand,
Philippe Mathieu-Daudé, Peter Xu, Paolo Bonzini, Fam Zheng
Am 23.11.2023 um 20:49 hat Stefan Hajnoczi geschrieben:
> virtio_queue_aio_attach_host_notifier() does not require the AioContext
> lock. Stop taking the lock and remember add an explicit smp_wmb()
> because we were relying on the implicit barrier in the AioContext lock
> before.
>
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH 3/4] scsi: don't lock AioContext in I/O code path
2023-11-23 19:49 [PATCH 0/4] scsi: eliminate AioContext lock Stefan Hajnoczi
2023-11-23 19:49 ` [PATCH 1/4] scsi: only access SCSIDevice->requests from one thread Stefan Hajnoczi
2023-11-23 19:49 ` [PATCH 2/4] virtio-scsi: don't lock AioContext around virtio_queue_aio_attach_host_notifier() Stefan Hajnoczi
@ 2023-11-23 19:49 ` Stefan Hajnoczi
2023-11-27 15:58 ` Eric Blake
2023-12-01 16:38 ` Kevin Wolf
2023-11-23 19:49 ` [PATCH 4/4] dma-helpers: don't lock AioContext in dma_blk_cb() Stefan Hajnoczi
2023-11-23 19:57 ` [PATCH 0/4] scsi: eliminate AioContext lock Stefan Hajnoczi
4 siblings, 2 replies; 18+ messages in thread
From: Stefan Hajnoczi @ 2023-11-23 19:49 UTC (permalink / raw)
To: qemu-devel
Cc: Michael S. Tsirkin, David Hildenbrand,
Philippe Mathieu-Daudé, Peter Xu, Paolo Bonzini, Fam Zheng,
Stefan Hajnoczi
blk_aio_*() doesn't require the AioContext lock and the SCSI subsystem's
internal state also does not anymore.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
hw/scsi/scsi-disk.c | 23 -----------------------
hw/scsi/scsi-generic.c | 20 +++-----------------
2 files changed, 3 insertions(+), 40 deletions(-)
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index 6691f5edb8..2c1bbb3530 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -273,8 +273,6 @@ static void scsi_aio_complete(void *opaque, int ret)
SCSIDiskReq *r = (SCSIDiskReq *)opaque;
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
-
assert(r->req.aiocb != NULL);
r->req.aiocb = NULL;
@@ -286,7 +284,6 @@ static void scsi_aio_complete(void *opaque, int ret)
scsi_req_complete(&r->req, GOOD);
done:
- aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
scsi_req_unref(&r->req);
}
@@ -394,8 +391,6 @@ static void scsi_read_complete(void *opaque, int ret)
SCSIDiskReq *r = (SCSIDiskReq *)opaque;
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
-
assert(r->req.aiocb != NULL);
r->req.aiocb = NULL;
@@ -406,7 +401,6 @@ static void scsi_read_complete(void *opaque, int ret)
trace_scsi_disk_read_complete(r->req.tag, r->qiov.size);
}
scsi_read_complete_noio(r, ret);
- aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
}
/* Actually issue a read to the block device. */
@@ -448,8 +442,6 @@ static void scsi_do_read_cb(void *opaque, int ret)
SCSIDiskReq *r = (SCSIDiskReq *)opaque;
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
-
assert (r->req.aiocb != NULL);
r->req.aiocb = NULL;
@@ -459,7 +451,6 @@ static void scsi_do_read_cb(void *opaque, int ret)
block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
}
scsi_do_read(opaque, ret);
- aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
}
/* Read more data from scsi device into buffer. */
@@ -533,8 +524,6 @@ static void scsi_write_complete(void * opaque, int ret)
SCSIDiskReq *r = (SCSIDiskReq *)opaque;
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
-
assert (r->req.aiocb != NULL);
r->req.aiocb = NULL;
@@ -544,7 +533,6 @@ static void scsi_write_complete(void * opaque, int ret)
block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
}
scsi_write_complete_noio(r, ret);
- aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
}
static void scsi_write_data(SCSIRequest *req)
@@ -1742,8 +1730,6 @@ static void scsi_unmap_complete(void *opaque, int ret)
SCSIDiskReq *r = data->r;
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
-
assert(r->req.aiocb != NULL);
r->req.aiocb = NULL;
@@ -1754,7 +1740,6 @@ static void scsi_unmap_complete(void *opaque, int ret)
block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
scsi_unmap_complete_noio(data, ret);
}
- aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
}
static void scsi_disk_emulate_unmap(SCSIDiskReq *r, uint8_t *inbuf)
@@ -1822,8 +1807,6 @@ static void scsi_write_same_complete(void *opaque, int ret)
SCSIDiskReq *r = data->r;
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
-
assert(r->req.aiocb != NULL);
r->req.aiocb = NULL;
@@ -1847,7 +1830,6 @@ static void scsi_write_same_complete(void *opaque, int ret)
data->sector << BDRV_SECTOR_BITS,
&data->qiov, 0,
scsi_write_same_complete, data);
- aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
return;
}
@@ -1857,7 +1839,6 @@ done:
scsi_req_unref(&r->req);
qemu_vfree(data->iov.iov_base);
g_free(data);
- aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
}
static void scsi_disk_emulate_write_same(SCSIDiskReq *r, uint8_t *inbuf)
@@ -2810,7 +2791,6 @@ static void scsi_block_sgio_complete(void *opaque, int ret)
{
SCSIBlockReq *req = (SCSIBlockReq *)opaque;
SCSIDiskReq *r = &req->req;
- SCSIDevice *s = r->req.dev;
sg_io_hdr_t *io_hdr = &req->io_header;
if (ret == 0) {
@@ -2827,13 +2807,10 @@ static void scsi_block_sgio_complete(void *opaque, int ret)
}
if (ret > 0) {
- aio_context_acquire(blk_get_aio_context(s->conf.blk));
if (scsi_handle_rw_error(r, ret, true)) {
- aio_context_release(blk_get_aio_context(s->conf.blk));
scsi_req_unref(&r->req);
return;
}
- aio_context_release(blk_get_aio_context(s->conf.blk));
/* Ignore error. */
ret = 0;
diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c
index 2417f0ad84..b7b04e1d63 100644
--- a/hw/scsi/scsi-generic.c
+++ b/hw/scsi/scsi-generic.c
@@ -109,15 +109,11 @@ done:
static void scsi_command_complete(void *opaque, int ret)
{
SCSIGenericReq *r = (SCSIGenericReq *)opaque;
- SCSIDevice *s = r->req.dev;
-
- aio_context_acquire(blk_get_aio_context(s->conf.blk));
assert(r->req.aiocb != NULL);
r->req.aiocb = NULL;
scsi_command_complete_noio(r, ret);
- aio_context_release(blk_get_aio_context(s->conf.blk));
}
static int execute_command(BlockBackend *blk,
@@ -274,14 +270,12 @@ static void scsi_read_complete(void * opaque, int ret)
SCSIDevice *s = r->req.dev;
int len;
- aio_context_acquire(blk_get_aio_context(s->conf.blk));
-
assert(r->req.aiocb != NULL);
r->req.aiocb = NULL;
if (ret || r->req.io_canceled) {
scsi_command_complete_noio(r, ret);
- goto done;
+ return;
}
len = r->io_header.dxfer_len - r->io_header.resid;
@@ -320,7 +314,7 @@ static void scsi_read_complete(void * opaque, int ret)
r->io_header.status != GOOD ||
len == 0) {
scsi_command_complete_noio(r, 0);
- goto done;
+ return;
}
/* Snoop READ CAPACITY output to set the blocksize. */
@@ -356,9 +350,6 @@ static void scsi_read_complete(void * opaque, int ret)
req_complete:
scsi_req_data(&r->req, len);
scsi_req_unref(&r->req);
-
-done:
- aio_context_release(blk_get_aio_context(s->conf.blk));
}
/* Read more data from scsi device into buffer. */
@@ -391,14 +382,12 @@ static void scsi_write_complete(void * opaque, int ret)
trace_scsi_generic_write_complete(ret);
- aio_context_acquire(blk_get_aio_context(s->conf.blk));
-
assert(r->req.aiocb != NULL);
r->req.aiocb = NULL;
if (ret || r->req.io_canceled) {
scsi_command_complete_noio(r, ret);
- goto done;
+ return;
}
if (r->req.cmd.buf[0] == MODE_SELECT && r->req.cmd.buf[4] == 12 &&
@@ -408,9 +397,6 @@ static void scsi_write_complete(void * opaque, int ret)
}
scsi_command_complete_noio(r, ret);
-
-done:
- aio_context_release(blk_get_aio_context(s->conf.blk));
}
/* Write data to a scsi device. Returns nonzero on failure.
--
2.42.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* Re: [PATCH 3/4] scsi: don't lock AioContext in I/O code path
2023-11-23 19:49 ` [PATCH 3/4] scsi: don't lock AioContext in I/O code path Stefan Hajnoczi
@ 2023-11-27 15:58 ` Eric Blake
2023-12-01 16:38 ` Kevin Wolf
1 sibling, 0 replies; 18+ messages in thread
From: Eric Blake @ 2023-11-27 15:58 UTC (permalink / raw)
To: Stefan Hajnoczi
Cc: qemu-devel, Michael S. Tsirkin, David Hildenbrand,
Philippe Mathieu-Daudé, Peter Xu, Paolo Bonzini, Fam Zheng
On Thu, Nov 23, 2023 at 02:49:30PM -0500, Stefan Hajnoczi wrote:
> blk_aio_*() doesn't require the AioContext lock and the SCSI subsystem's
> internal state also does not anymore.
>
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> ---
> hw/scsi/scsi-disk.c | 23 -----------------------
> hw/scsi/scsi-generic.c | 20 +++-----------------
> 2 files changed, 3 insertions(+), 40 deletions(-)
>
Reviewed-by: Eric Blake <eblake@redhat.com>
--
Eric Blake, Principal Software Engineer
Red Hat, Inc.
Virtualization: qemu.org | libguestfs.org
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 3/4] scsi: don't lock AioContext in I/O code path
2023-11-23 19:49 ` [PATCH 3/4] scsi: don't lock AioContext in I/O code path Stefan Hajnoczi
2023-11-27 15:58 ` Eric Blake
@ 2023-12-01 16:38 ` Kevin Wolf
1 sibling, 0 replies; 18+ messages in thread
From: Kevin Wolf @ 2023-12-01 16:38 UTC (permalink / raw)
To: Stefan Hajnoczi
Cc: qemu-devel, Michael S. Tsirkin, David Hildenbrand,
Philippe Mathieu-Daudé, Peter Xu, Paolo Bonzini, Fam Zheng
Am 23.11.2023 um 20:49 hat Stefan Hajnoczi geschrieben:
> blk_aio_*() doesn't require the AioContext lock and the SCSI subsystem's
> internal state also does not anymore.
>
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
With this one, I'm not really confident enough to give a R-b. I couldn't
find a problem, but I suppose it's more a case of "looks plausible
enough, we'll see what breaks". And while scsi is managable enough that
I could in theory invest a few more hours, we'll get more changes of the
same type anyway when merging the full removal of the AioContext lock
and reviewing all of that in detail will be impossible.
Acked-by: Kevin Wolf <kwolf@redhat.com>
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH 4/4] dma-helpers: don't lock AioContext in dma_blk_cb()
2023-11-23 19:49 [PATCH 0/4] scsi: eliminate AioContext lock Stefan Hajnoczi
` (2 preceding siblings ...)
2023-11-23 19:49 ` [PATCH 3/4] scsi: don't lock AioContext in I/O code path Stefan Hajnoczi
@ 2023-11-23 19:49 ` Stefan Hajnoczi
2023-11-27 18:46 ` Eric Blake
2023-12-01 16:48 ` Kevin Wolf
2023-11-23 19:57 ` [PATCH 0/4] scsi: eliminate AioContext lock Stefan Hajnoczi
4 siblings, 2 replies; 18+ messages in thread
From: Stefan Hajnoczi @ 2023-11-23 19:49 UTC (permalink / raw)
To: qemu-devel
Cc: Michael S. Tsirkin, David Hildenbrand,
Philippe Mathieu-Daudé, Peter Xu, Paolo Bonzini, Fam Zheng,
Stefan Hajnoczi
Commit abfcd2760b3e ("dma-helpers: prevent dma_blk_cb() vs
dma_aio_cancel() race") acquired the AioContext lock inside dma_blk_cb()
to avoid a race with scsi_device_purge_requests() running in the main
loop thread.
The SCSI code no longer calls dma_aio_cancel() from the main loop thread
while I/O is running in the IOThread AioContext. Therefore it is no
longer necessary to take this lock to protect DMAAIOCB fields. The
->cb() function also does not require the lock because blk_aio_*() and
friends do not need the AioContext lock.
Both hw/ide/core.c and hw/ide/macio.c also call dma_blk_io() but don't
rely on it taking the AioContext lock, so this change is safe.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
system/dma-helpers.c | 7 ++-----
1 file changed, 2 insertions(+), 5 deletions(-)
diff --git a/system/dma-helpers.c b/system/dma-helpers.c
index 36211acc7e..528117f256 100644
--- a/system/dma-helpers.c
+++ b/system/dma-helpers.c
@@ -119,13 +119,12 @@ static void dma_blk_cb(void *opaque, int ret)
trace_dma_blk_cb(dbs, ret);
- aio_context_acquire(ctx);
dbs->acb = NULL;
dbs->offset += dbs->iov.size;
if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) {
dma_complete(dbs, ret);
- goto out;
+ return;
}
dma_blk_unmap(dbs);
@@ -168,7 +167,7 @@ static void dma_blk_cb(void *opaque, int ret)
trace_dma_map_wait(dbs);
dbs->bh = aio_bh_new(ctx, reschedule_dma, dbs);
cpu_register_map_client(dbs->bh);
- goto out;
+ return;
}
if (!QEMU_IS_ALIGNED(dbs->iov.size, dbs->align)) {
@@ -179,8 +178,6 @@ static void dma_blk_cb(void *opaque, int ret)
dbs->acb = dbs->io_func(dbs->offset, &dbs->iov,
dma_blk_cb, dbs, dbs->io_func_opaque);
assert(dbs->acb);
-out:
- aio_context_release(ctx);
}
static void dma_aio_cancel(BlockAIOCB *acb)
--
2.42.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* Re: [PATCH 4/4] dma-helpers: don't lock AioContext in dma_blk_cb()
2023-11-23 19:49 ` [PATCH 4/4] dma-helpers: don't lock AioContext in dma_blk_cb() Stefan Hajnoczi
@ 2023-11-27 18:46 ` Eric Blake
2023-12-01 16:48 ` Kevin Wolf
1 sibling, 0 replies; 18+ messages in thread
From: Eric Blake @ 2023-11-27 18:46 UTC (permalink / raw)
To: Stefan Hajnoczi
Cc: qemu-devel, Michael S. Tsirkin, David Hildenbrand,
Philippe Mathieu-Daudé, Peter Xu, Paolo Bonzini, Fam Zheng
On Thu, Nov 23, 2023 at 02:49:31PM -0500, Stefan Hajnoczi wrote:
> Commit abfcd2760b3e ("dma-helpers: prevent dma_blk_cb() vs
> dma_aio_cancel() race") acquired the AioContext lock inside dma_blk_cb()
> to avoid a race with scsi_device_purge_requests() running in the main
> loop thread.
>
> The SCSI code no longer calls dma_aio_cancel() from the main loop thread
> while I/O is running in the IOThread AioContext. Therefore it is no
> longer necessary to take this lock to protect DMAAIOCB fields. The
> ->cb() function also does not require the lock because blk_aio_*() and
> friends do not need the AioContext lock.
>
> Both hw/ide/core.c and hw/ide/macio.c also call dma_blk_io() but don't
> rely on it taking the AioContext lock, so this change is safe.
>
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> ---
> system/dma-helpers.c | 7 ++-----
> 1 file changed, 2 insertions(+), 5 deletions(-)
Reviewed-by: Eric Blake <eblake@redhat.com>
--
Eric Blake, Principal Software Engineer
Red Hat, Inc.
Virtualization: qemu.org | libguestfs.org
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 4/4] dma-helpers: don't lock AioContext in dma_blk_cb()
2023-11-23 19:49 ` [PATCH 4/4] dma-helpers: don't lock AioContext in dma_blk_cb() Stefan Hajnoczi
2023-11-27 18:46 ` Eric Blake
@ 2023-12-01 16:48 ` Kevin Wolf
1 sibling, 0 replies; 18+ messages in thread
From: Kevin Wolf @ 2023-12-01 16:48 UTC (permalink / raw)
To: Stefan Hajnoczi
Cc: qemu-devel, Michael S. Tsirkin, David Hildenbrand,
Philippe Mathieu-Daudé, Peter Xu, Paolo Bonzini, Fam Zheng
Am 23.11.2023 um 20:49 hat Stefan Hajnoczi geschrieben:
> Commit abfcd2760b3e ("dma-helpers: prevent dma_blk_cb() vs
> dma_aio_cancel() race") acquired the AioContext lock inside dma_blk_cb()
> to avoid a race with scsi_device_purge_requests() running in the main
> loop thread.
>
> The SCSI code no longer calls dma_aio_cancel() from the main loop thread
> while I/O is running in the IOThread AioContext. Therefore it is no
> longer necessary to take this lock to protect DMAAIOCB fields. The
> ->cb() function also does not require the lock because blk_aio_*() and
> friends do not need the AioContext lock.
>
> Both hw/ide/core.c and hw/ide/macio.c also call dma_blk_io() but don't
> rely on it taking the AioContext lock, so this change is safe.
>
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
The commit message neglects to talk about dbs->io_func, which is what
took the AioContext lock even before commit abfcd2760b3e. I think the
reason is the same as for the previous patch.
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 0/4] scsi: eliminate AioContext lock
2023-11-23 19:49 [PATCH 0/4] scsi: eliminate AioContext lock Stefan Hajnoczi
` (3 preceding siblings ...)
2023-11-23 19:49 ` [PATCH 4/4] dma-helpers: don't lock AioContext in dma_blk_cb() Stefan Hajnoczi
@ 2023-11-23 19:57 ` Stefan Hajnoczi
4 siblings, 0 replies; 18+ messages in thread
From: Stefan Hajnoczi @ 2023-11-23 19:57 UTC (permalink / raw)
To: qemu-devel
Cc: Michael S. Tsirkin, David Hildenbrand,
Philippe Mathieu-Daudé, Peter Xu, Paolo Bonzini, Fam Zheng,
kwolf, qemu-block
[-- Attachment #1: Type: text/plain, Size: 1467 bytes --]
On Thu, Nov 23, 2023 at 02:49:27PM -0500, Stefan Hajnoczi wrote:
> The SCSI subsystem uses the AioContext lock to protect internal state. This is
> necessary because the main loop and the IOThread can access SCSI state in
> parallel. This inter-thread access happens during scsi_device_purge_requests()
> and scsi_dma_restart_cb().
>
> This patch series modifies the code so SCSI state is only accessed from the
> IOThread that is executing requests. Once this has been achieved the AioContext
> lock is no longer necessary.
>
> Note that a few aio_context_acquire()/aio_context_release() calls still remain
> after this series. They surround API calls that invoke AIO_WAIT_WHILE() and
> therefore still rely on the AioContext lock for now.
>
> Stefan Hajnoczi (4):
> scsi: only access SCSIDevice->requests from one thread
> virtio-scsi: don't lock AioContext around
> virtio_queue_aio_attach_host_notifier()
> scsi: don't lock AioContext in I/O code path
> dma-helpers: don't lock AioContext in dma_blk_cb()
>
> include/hw/scsi/scsi.h | 7 +-
> hw/scsi/scsi-bus.c | 174 ++++++++++++++++++++++----------
> hw/scsi/scsi-disk.c | 23 -----
> hw/scsi/scsi-generic.c | 20 +---
> hw/scsi/virtio-scsi-dataplane.c | 8 +-
> system/dma-helpers.c | 7 +-
> 6 files changed, 130 insertions(+), 109 deletions(-)
CCing Kevin and qemu-block
>
> --
> 2.42.0
>
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]
^ permalink raw reply [flat|nested] 18+ messages in thread