* [PULL v2 01/17] util/vfio-helpers: Pass page protections to qemu_vfio_pci_map_bar()
2020-10-05 15:43 [PULL v2 00/17] Block patches Stefan Hajnoczi
@ 2020-10-05 15:43 ` Stefan Hajnoczi
2020-10-05 15:43 ` [PULL v2 02/17] block/nvme: Map doorbells pages write-only Stefan Hajnoczi
` (16 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: Stefan Hajnoczi @ 2020-10-05 15:43 UTC (permalink / raw)
To: qemu-devel, Peter Maydell
Cc: Kevin Wolf, Fam Zheng, Eduardo Habkost, qemu-block, Max Reitz,
Stefan Hajnoczi, Cleber Rosa, Philippe Mathieu-Daudé
From: Philippe Mathieu-Daudé <philmd@redhat.com>
Pages are currently mapped READ/WRITE. To be able to use different
protections, add a new argument to qemu_vfio_pci_map_bar().
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20200922083821.578519-2-philmd@redhat.com>
---
include/qemu/vfio-helpers.h | 2 +-
block/nvme.c | 3 ++-
util/vfio-helpers.c | 4 ++--
3 files changed, 5 insertions(+), 4 deletions(-)
diff --git a/include/qemu/vfio-helpers.h b/include/qemu/vfio-helpers.h
index 1f057c2b9e..4491c8e1a6 100644
--- a/include/qemu/vfio-helpers.h
+++ b/include/qemu/vfio-helpers.h
@@ -22,7 +22,7 @@ int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t size,
int qemu_vfio_dma_reset_temporary(QEMUVFIOState *s);
void qemu_vfio_dma_unmap(QEMUVFIOState *s, void *host);
void *qemu_vfio_pci_map_bar(QEMUVFIOState *s, int index,
- uint64_t offset, uint64_t size,
+ uint64_t offset, uint64_t size, int prot,
Error **errp);
void qemu_vfio_pci_unmap_bar(QEMUVFIOState *s, int index, void *bar,
uint64_t offset, uint64_t size);
diff --git a/block/nvme.c b/block/nvme.c
index f4f27b6da7..5a4dc6a722 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -712,7 +712,8 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
goto out;
}
- s->regs = qemu_vfio_pci_map_bar(s->vfio, 0, 0, NVME_BAR_SIZE, errp);
+ s->regs = qemu_vfio_pci_map_bar(s->vfio, 0, 0, NVME_BAR_SIZE,
+ PROT_READ | PROT_WRITE, errp);
if (!s->regs) {
ret = -EINVAL;
goto out;
diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c
index 583bdfb36f..9ac307e3d4 100644
--- a/util/vfio-helpers.c
+++ b/util/vfio-helpers.c
@@ -146,13 +146,13 @@ static int qemu_vfio_pci_init_bar(QEMUVFIOState *s, int index, Error **errp)
* Map a PCI bar area.
*/
void *qemu_vfio_pci_map_bar(QEMUVFIOState *s, int index,
- uint64_t offset, uint64_t size,
+ uint64_t offset, uint64_t size, int prot,
Error **errp)
{
void *p;
assert_bar_index_valid(s, index);
p = mmap(NULL, MIN(size, s->bar_region_info[index].size - offset),
- PROT_READ | PROT_WRITE, MAP_SHARED,
+ prot, MAP_SHARED,
s->device, s->bar_region_info[index].offset + offset);
if (p == MAP_FAILED) {
error_setg_errno(errp, errno, "Failed to map BAR region");
--
2.26.2
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PULL v2 02/17] block/nvme: Map doorbells pages write-only
2020-10-05 15:43 [PULL v2 00/17] Block patches Stefan Hajnoczi
2020-10-05 15:43 ` [PULL v2 01/17] util/vfio-helpers: Pass page protections to qemu_vfio_pci_map_bar() Stefan Hajnoczi
@ 2020-10-05 15:43 ` Stefan Hajnoczi
2020-10-05 15:43 ` [PULL v2 03/17] block/nvme: Reduce I/O registers scope Stefan Hajnoczi
` (15 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: Stefan Hajnoczi @ 2020-10-05 15:43 UTC (permalink / raw)
To: qemu-devel, Peter Maydell
Cc: Kevin Wolf, Fam Zheng, Eduardo Habkost, qemu-block, Max Reitz,
Stefan Hajnoczi, Cleber Rosa, Philippe Mathieu-Daudé
From: Philippe Mathieu-Daudé <philmd@redhat.com>
Per the datasheet sections 3.1.13/3.1.14:
"The host should not read the doorbell registers."
As we don't need read access, map the doorbells with write-only
permission. We keep a reference to this mapped address in the
BDRVNVMeState structure.
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20200922083821.578519-3-philmd@redhat.com>
---
block/nvme.c | 29 +++++++++++++++++++----------
1 file changed, 19 insertions(+), 10 deletions(-)
diff --git a/block/nvme.c b/block/nvme.c
index 5a4dc6a722..3c834da8fe 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -31,7 +31,7 @@
#define NVME_SQ_ENTRY_BYTES 64
#define NVME_CQ_ENTRY_BYTES 16
#define NVME_QUEUE_SIZE 128
-#define NVME_BAR_SIZE 8192
+#define NVME_DOORBELL_SIZE 4096
/*
* We have to leave one slot empty as that is the full queue case where
@@ -84,10 +84,6 @@ typedef struct {
/* Memory mapped registers */
typedef volatile struct {
NvmeBar ctrl;
- struct {
- uint32_t sq_tail;
- uint32_t cq_head;
- } doorbells[];
} NVMeRegs;
#define INDEX_ADMIN 0
@@ -103,6 +99,11 @@ struct BDRVNVMeState {
AioContext *aio_context;
QEMUVFIOState *vfio;
NVMeRegs *regs;
+ /* Memory mapped registers */
+ volatile struct {
+ uint32_t sq_tail;
+ uint32_t cq_head;
+ } *doorbells;
/* The submission/completion queue pairs.
* [0]: admin queue.
* [1..]: io queues.
@@ -247,14 +248,14 @@ static NVMeQueuePair *nvme_create_queue_pair(BDRVNVMeState *s,
error_propagate(errp, local_err);
goto fail;
}
- q->sq.doorbell = &s->regs->doorbells[idx * s->doorbell_scale].sq_tail;
+ q->sq.doorbell = &s->doorbells[idx * s->doorbell_scale].sq_tail;
nvme_init_queue(s, &q->cq, size, NVME_CQ_ENTRY_BYTES, &local_err);
if (local_err) {
error_propagate(errp, local_err);
goto fail;
}
- q->cq.doorbell = &s->regs->doorbells[idx * s->doorbell_scale].cq_head;
+ q->cq.doorbell = &s->doorbells[idx * s->doorbell_scale].cq_head;
return q;
fail:
@@ -712,13 +713,12 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
goto out;
}
- s->regs = qemu_vfio_pci_map_bar(s->vfio, 0, 0, NVME_BAR_SIZE,
+ s->regs = qemu_vfio_pci_map_bar(s->vfio, 0, 0, sizeof(NvmeBar),
PROT_READ | PROT_WRITE, errp);
if (!s->regs) {
ret = -EINVAL;
goto out;
}
-
/* Perform initialize sequence as described in NVMe spec "7.6.1
* Initialization". */
@@ -748,6 +748,13 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
}
}
+ s->doorbells = qemu_vfio_pci_map_bar(s->vfio, 0, sizeof(NvmeBar),
+ NVME_DOORBELL_SIZE, PROT_WRITE, errp);
+ if (!s->doorbells) {
+ ret = -EINVAL;
+ goto out;
+ }
+
/* Set up admin queue. */
s->queues = g_new(NVMeQueuePair *, 1);
s->queues[INDEX_ADMIN] = nvme_create_queue_pair(s, aio_context, 0,
@@ -873,7 +880,9 @@ static void nvme_close(BlockDriverState *bs)
&s->irq_notifier[MSIX_SHARED_IRQ_IDX],
false, NULL, NULL);
event_notifier_cleanup(&s->irq_notifier[MSIX_SHARED_IRQ_IDX]);
- qemu_vfio_pci_unmap_bar(s->vfio, 0, (void *)s->regs, 0, NVME_BAR_SIZE);
+ qemu_vfio_pci_unmap_bar(s->vfio, 0, (void *)s->doorbells,
+ sizeof(NvmeBar), NVME_DOORBELL_SIZE);
+ qemu_vfio_pci_unmap_bar(s->vfio, 0, (void *)s->regs, 0, sizeof(NvmeBar));
qemu_vfio_close(s->vfio);
g_free(s->device);
--
2.26.2
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PULL v2 03/17] block/nvme: Reduce I/O registers scope
2020-10-05 15:43 [PULL v2 00/17] Block patches Stefan Hajnoczi
2020-10-05 15:43 ` [PULL v2 01/17] util/vfio-helpers: Pass page protections to qemu_vfio_pci_map_bar() Stefan Hajnoczi
2020-10-05 15:43 ` [PULL v2 02/17] block/nvme: Map doorbells pages write-only Stefan Hajnoczi
@ 2020-10-05 15:43 ` Stefan Hajnoczi
2020-10-05 15:43 ` [PULL v2 04/17] block/nvme: Drop NVMeRegs structure, directly use NvmeBar Stefan Hajnoczi
` (14 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: Stefan Hajnoczi @ 2020-10-05 15:43 UTC (permalink / raw)
To: qemu-devel, Peter Maydell
Cc: Kevin Wolf, Fam Zheng, Eduardo Habkost, qemu-block, Max Reitz,
Stefan Hajnoczi, Cleber Rosa, Philippe Mathieu-Daudé
From: Philippe Mathieu-Daudé <philmd@redhat.com>
We only access the I/O register in nvme_init().
Remove the reference in BDRVNVMeState and reduce its scope.
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20200922083821.578519-4-philmd@redhat.com>
---
block/nvme.c | 29 ++++++++++++++++-------------
1 file changed, 16 insertions(+), 13 deletions(-)
diff --git a/block/nvme.c b/block/nvme.c
index 3c834da8fe..e517c7539f 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -98,7 +98,6 @@ enum {
struct BDRVNVMeState {
AioContext *aio_context;
QEMUVFIOState *vfio;
- NVMeRegs *regs;
/* Memory mapped registers */
volatile struct {
uint32_t sq_tail;
@@ -695,6 +694,7 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
uint64_t timeout_ms;
uint64_t deadline, now;
Error *local_err = NULL;
+ NVMeRegs *regs;
qemu_co_mutex_init(&s->dma_map_lock);
qemu_co_queue_init(&s->dma_flush_queue);
@@ -713,16 +713,16 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
goto out;
}
- s->regs = qemu_vfio_pci_map_bar(s->vfio, 0, 0, sizeof(NvmeBar),
- PROT_READ | PROT_WRITE, errp);
- if (!s->regs) {
+ regs = qemu_vfio_pci_map_bar(s->vfio, 0, 0, sizeof(NvmeBar),
+ PROT_READ | PROT_WRITE, errp);
+ if (!regs) {
ret = -EINVAL;
goto out;
}
/* Perform initialize sequence as described in NVMe spec "7.6.1
* Initialization". */
- cap = le64_to_cpu(s->regs->ctrl.cap);
+ cap = le64_to_cpu(regs->ctrl.cap);
if (!(cap & (1ULL << 37))) {
error_setg(errp, "Device doesn't support NVMe command set");
ret = -EINVAL;
@@ -735,10 +735,10 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
timeout_ms = MIN(500 * ((cap >> 24) & 0xFF), 30000);
/* Reset device to get a clean state. */
- s->regs->ctrl.cc = cpu_to_le32(le32_to_cpu(s->regs->ctrl.cc) & 0xFE);
+ regs->ctrl.cc = cpu_to_le32(le32_to_cpu(regs->ctrl.cc) & 0xFE);
/* Wait for CSTS.RDY = 0. */
deadline = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + timeout_ms * SCALE_MS;
- while (le32_to_cpu(s->regs->ctrl.csts) & 0x1) {
+ while (le32_to_cpu(regs->ctrl.csts) & 0x1) {
if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) {
error_setg(errp, "Timeout while waiting for device to reset (%"
PRId64 " ms)",
@@ -766,18 +766,18 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
}
s->nr_queues = 1;
QEMU_BUILD_BUG_ON(NVME_QUEUE_SIZE & 0xF000);
- s->regs->ctrl.aqa = cpu_to_le32((NVME_QUEUE_SIZE << 16) | NVME_QUEUE_SIZE);
- s->regs->ctrl.asq = cpu_to_le64(s->queues[INDEX_ADMIN]->sq.iova);
- s->regs->ctrl.acq = cpu_to_le64(s->queues[INDEX_ADMIN]->cq.iova);
+ regs->ctrl.aqa = cpu_to_le32((NVME_QUEUE_SIZE << 16) | NVME_QUEUE_SIZE);
+ regs->ctrl.asq = cpu_to_le64(s->queues[INDEX_ADMIN]->sq.iova);
+ regs->ctrl.acq = cpu_to_le64(s->queues[INDEX_ADMIN]->cq.iova);
/* After setting up all control registers we can enable device now. */
- s->regs->ctrl.cc = cpu_to_le32((ctz32(NVME_CQ_ENTRY_BYTES) << 20) |
+ regs->ctrl.cc = cpu_to_le32((ctz32(NVME_CQ_ENTRY_BYTES) << 20) |
(ctz32(NVME_SQ_ENTRY_BYTES) << 16) |
0x1);
/* Wait for CSTS.RDY = 1. */
now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
deadline = now + timeout_ms * 1000000;
- while (!(le32_to_cpu(s->regs->ctrl.csts) & 0x1)) {
+ while (!(le32_to_cpu(regs->ctrl.csts) & 0x1)) {
if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) {
error_setg(errp, "Timeout while waiting for device to start (%"
PRId64 " ms)",
@@ -808,6 +808,10 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
ret = -EIO;
}
out:
+ if (regs) {
+ qemu_vfio_pci_unmap_bar(s->vfio, 0, (void *)regs, 0, sizeof(NvmeBar));
+ }
+
/* Cleaning up is done in nvme_file_open() upon error. */
return ret;
}
@@ -882,7 +886,6 @@ static void nvme_close(BlockDriverState *bs)
event_notifier_cleanup(&s->irq_notifier[MSIX_SHARED_IRQ_IDX]);
qemu_vfio_pci_unmap_bar(s->vfio, 0, (void *)s->doorbells,
sizeof(NvmeBar), NVME_DOORBELL_SIZE);
- qemu_vfio_pci_unmap_bar(s->vfio, 0, (void *)s->regs, 0, sizeof(NvmeBar));
qemu_vfio_close(s->vfio);
g_free(s->device);
--
2.26.2
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PULL v2 04/17] block/nvme: Drop NVMeRegs structure, directly use NvmeBar
2020-10-05 15:43 [PULL v2 00/17] Block patches Stefan Hajnoczi
` (2 preceding siblings ...)
2020-10-05 15:43 ` [PULL v2 03/17] block/nvme: Reduce I/O registers scope Stefan Hajnoczi
@ 2020-10-05 15:43 ` Stefan Hajnoczi
2020-10-05 15:43 ` [PULL v2 05/17] block/nvme: Use register definitions from 'block/nvme.h' Stefan Hajnoczi
` (13 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: Stefan Hajnoczi @ 2020-10-05 15:43 UTC (permalink / raw)
To: qemu-devel, Peter Maydell
Cc: Kevin Wolf, Fam Zheng, Eduardo Habkost, qemu-block, Max Reitz,
Stefan Hajnoczi, Cleber Rosa, Philippe Mathieu-Daudé
From: Philippe Mathieu-Daudé <philmd@redhat.com>
NVMeRegs only contains NvmeBar. Simplify the code by using NvmeBar
directly.
This triggers a checkpatch.pl error:
ERROR: Use of volatile is usually wrong, please add a comment
#30: FILE: block/nvme.c:691:
+ volatile NvmeBar *regs;
This is a false positive as in our case we are using I/O registers,
so the 'volatile' use is justified.
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20200922083821.578519-5-philmd@redhat.com>
---
block/nvme.c | 23 +++++++++--------------
1 file changed, 9 insertions(+), 14 deletions(-)
diff --git a/block/nvme.c b/block/nvme.c
index e517c7539f..bd82990b66 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -81,11 +81,6 @@ typedef struct {
QEMUBH *completion_bh;
} NVMeQueuePair;
-/* Memory mapped registers */
-typedef volatile struct {
- NvmeBar ctrl;
-} NVMeRegs;
-
#define INDEX_ADMIN 0
#define INDEX_IO(n) (1 + n)
@@ -694,7 +689,7 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
uint64_t timeout_ms;
uint64_t deadline, now;
Error *local_err = NULL;
- NVMeRegs *regs;
+ volatile NvmeBar *regs = NULL;
qemu_co_mutex_init(&s->dma_map_lock);
qemu_co_queue_init(&s->dma_flush_queue);
@@ -722,7 +717,7 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
/* Perform initialize sequence as described in NVMe spec "7.6.1
* Initialization". */
- cap = le64_to_cpu(regs->ctrl.cap);
+ cap = le64_to_cpu(regs->cap);
if (!(cap & (1ULL << 37))) {
error_setg(errp, "Device doesn't support NVMe command set");
ret = -EINVAL;
@@ -735,10 +730,10 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
timeout_ms = MIN(500 * ((cap >> 24) & 0xFF), 30000);
/* Reset device to get a clean state. */
- regs->ctrl.cc = cpu_to_le32(le32_to_cpu(regs->ctrl.cc) & 0xFE);
+ regs->cc = cpu_to_le32(le32_to_cpu(regs->cc) & 0xFE);
/* Wait for CSTS.RDY = 0. */
deadline = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + timeout_ms * SCALE_MS;
- while (le32_to_cpu(regs->ctrl.csts) & 0x1) {
+ while (le32_to_cpu(regs->csts) & 0x1) {
if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) {
error_setg(errp, "Timeout while waiting for device to reset (%"
PRId64 " ms)",
@@ -766,18 +761,18 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
}
s->nr_queues = 1;
QEMU_BUILD_BUG_ON(NVME_QUEUE_SIZE & 0xF000);
- regs->ctrl.aqa = cpu_to_le32((NVME_QUEUE_SIZE << 16) | NVME_QUEUE_SIZE);
- regs->ctrl.asq = cpu_to_le64(s->queues[INDEX_ADMIN]->sq.iova);
- regs->ctrl.acq = cpu_to_le64(s->queues[INDEX_ADMIN]->cq.iova);
+ regs->aqa = cpu_to_le32((NVME_QUEUE_SIZE << 16) | NVME_QUEUE_SIZE);
+ regs->asq = cpu_to_le64(s->queues[INDEX_ADMIN]->sq.iova);
+ regs->acq = cpu_to_le64(s->queues[INDEX_ADMIN]->cq.iova);
/* After setting up all control registers we can enable device now. */
- regs->ctrl.cc = cpu_to_le32((ctz32(NVME_CQ_ENTRY_BYTES) << 20) |
+ regs->cc = cpu_to_le32((ctz32(NVME_CQ_ENTRY_BYTES) << 20) |
(ctz32(NVME_SQ_ENTRY_BYTES) << 16) |
0x1);
/* Wait for CSTS.RDY = 1. */
now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
deadline = now + timeout_ms * 1000000;
- while (!(le32_to_cpu(regs->ctrl.csts) & 0x1)) {
+ while (!(le32_to_cpu(regs->csts) & 0x1)) {
if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) {
error_setg(errp, "Timeout while waiting for device to start (%"
PRId64 " ms)",
--
2.26.2
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PULL v2 05/17] block/nvme: Use register definitions from 'block/nvme.h'
2020-10-05 15:43 [PULL v2 00/17] Block patches Stefan Hajnoczi
` (3 preceding siblings ...)
2020-10-05 15:43 ` [PULL v2 04/17] block/nvme: Drop NVMeRegs structure, directly use NvmeBar Stefan Hajnoczi
@ 2020-10-05 15:43 ` Stefan Hajnoczi
2020-10-05 15:43 ` [PULL v2 06/17] block/nvme: Replace magic value by SCALE_MS definition Stefan Hajnoczi
` (12 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: Stefan Hajnoczi @ 2020-10-05 15:43 UTC (permalink / raw)
To: qemu-devel, Peter Maydell
Cc: Kevin Wolf, Fam Zheng, Eduardo Habkost, qemu-block, Max Reitz,
Stefan Hajnoczi, Cleber Rosa, Philippe Mathieu-Daudé
From: Philippe Mathieu-Daudé <philmd@redhat.com>
Use the NVMe register definitions from "block/nvme.h" which
ease a bit reviewing the code while matching the datasheet.
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20200922083821.578519-6-philmd@redhat.com>
---
block/nvme.c | 21 +++++++++++----------
1 file changed, 11 insertions(+), 10 deletions(-)
diff --git a/block/nvme.c b/block/nvme.c
index bd82990b66..959569d262 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -718,22 +718,22 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
* Initialization". */
cap = le64_to_cpu(regs->cap);
- if (!(cap & (1ULL << 37))) {
+ if (!NVME_CAP_CSS(cap)) {
error_setg(errp, "Device doesn't support NVMe command set");
ret = -EINVAL;
goto out;
}
- s->page_size = MAX(4096, 1 << (12 + ((cap >> 48) & 0xF)));
- s->doorbell_scale = (4 << (((cap >> 32) & 0xF))) / sizeof(uint32_t);
+ s->page_size = MAX(4096, 1 << NVME_CAP_MPSMIN(cap));
+ s->doorbell_scale = (4 << NVME_CAP_DSTRD(cap)) / sizeof(uint32_t);
bs->bl.opt_mem_alignment = s->page_size;
- timeout_ms = MIN(500 * ((cap >> 24) & 0xFF), 30000);
+ timeout_ms = MIN(500 * NVME_CAP_TO(cap), 30000);
/* Reset device to get a clean state. */
regs->cc = cpu_to_le32(le32_to_cpu(regs->cc) & 0xFE);
/* Wait for CSTS.RDY = 0. */
deadline = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + timeout_ms * SCALE_MS;
- while (le32_to_cpu(regs->csts) & 0x1) {
+ while (NVME_CSTS_RDY(le32_to_cpu(regs->csts))) {
if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) {
error_setg(errp, "Timeout while waiting for device to reset (%"
PRId64 " ms)",
@@ -761,18 +761,19 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
}
s->nr_queues = 1;
QEMU_BUILD_BUG_ON(NVME_QUEUE_SIZE & 0xF000);
- regs->aqa = cpu_to_le32((NVME_QUEUE_SIZE << 16) | NVME_QUEUE_SIZE);
+ regs->aqa = cpu_to_le32((NVME_QUEUE_SIZE << AQA_ACQS_SHIFT) |
+ (NVME_QUEUE_SIZE << AQA_ASQS_SHIFT));
regs->asq = cpu_to_le64(s->queues[INDEX_ADMIN]->sq.iova);
regs->acq = cpu_to_le64(s->queues[INDEX_ADMIN]->cq.iova);
/* After setting up all control registers we can enable device now. */
- regs->cc = cpu_to_le32((ctz32(NVME_CQ_ENTRY_BYTES) << 20) |
- (ctz32(NVME_SQ_ENTRY_BYTES) << 16) |
- 0x1);
+ regs->cc = cpu_to_le32((ctz32(NVME_CQ_ENTRY_BYTES) << CC_IOCQES_SHIFT) |
+ (ctz32(NVME_SQ_ENTRY_BYTES) << CC_IOSQES_SHIFT) |
+ CC_EN_MASK);
/* Wait for CSTS.RDY = 1. */
now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
deadline = now + timeout_ms * 1000000;
- while (!(le32_to_cpu(regs->csts) & 0x1)) {
+ while (!NVME_CSTS_RDY(le32_to_cpu(regs->csts))) {
if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) {
error_setg(errp, "Timeout while waiting for device to start (%"
PRId64 " ms)",
--
2.26.2
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PULL v2 06/17] block/nvme: Replace magic value by SCALE_MS definition
2020-10-05 15:43 [PULL v2 00/17] Block patches Stefan Hajnoczi
` (4 preceding siblings ...)
2020-10-05 15:43 ` [PULL v2 05/17] block/nvme: Use register definitions from 'block/nvme.h' Stefan Hajnoczi
@ 2020-10-05 15:43 ` Stefan Hajnoczi
2020-10-05 15:43 ` [PULL v2 07/17] block: return error-code from bdrv_invalidate_cache Stefan Hajnoczi
` (11 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: Stefan Hajnoczi @ 2020-10-05 15:43 UTC (permalink / raw)
To: qemu-devel, Peter Maydell
Cc: Kevin Wolf, Fam Zheng, Eduardo Habkost, qemu-block, Max Reitz,
Stefan Hajnoczi, Cleber Rosa, Philippe Mathieu-Daudé
From: Philippe Mathieu-Daudé <philmd@redhat.com>
Use self-explicit SCALE_MS definition instead of magic value
(missed in similar commit e4f310fe7f5).
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20200922083821.578519-7-philmd@redhat.com>
---
block/nvme.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/block/nvme.c b/block/nvme.c
index 959569d262..b48f6f2588 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -772,7 +772,7 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
CC_EN_MASK);
/* Wait for CSTS.RDY = 1. */
now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
- deadline = now + timeout_ms * 1000000;
+ deadline = now + timeout_ms * SCALE_MS;
while (!NVME_CSTS_RDY(le32_to_cpu(regs->csts))) {
if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) {
error_setg(errp, "Timeout while waiting for device to start (%"
--
2.26.2
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PULL v2 07/17] block: return error-code from bdrv_invalidate_cache
2020-10-05 15:43 [PULL v2 00/17] Block patches Stefan Hajnoczi
` (5 preceding siblings ...)
2020-10-05 15:43 ` [PULL v2 06/17] block/nvme: Replace magic value by SCALE_MS definition Stefan Hajnoczi
@ 2020-10-05 15:43 ` Stefan Hajnoczi
2020-10-05 15:43 ` [PULL v2 08/17] block/io: refactor coroutine wrappers Stefan Hajnoczi
` (10 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: Stefan Hajnoczi @ 2020-10-05 15:43 UTC (permalink / raw)
To: qemu-devel, Peter Maydell
Cc: Kevin Wolf, Fam Zheng, Vladimir Sementsov-Ogievskiy,
Eduardo Habkost, qemu-block, Philippe Mathieu-Daudé,
Max Reitz, Stefan Hajnoczi, Cleber Rosa
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
This is the only coroutine wrapper from block.c and block/io.c which
doesn't return a value, so let's convert it to the common behavior, to
simplify moving to generated coroutine wrappers in a further commit.
Also, bdrv_invalidate_cache is a void function, returning error only
through **errp parameter, which is considered to be bad practice, as
it forces callers to define and propagate local_err variable, so
conversion is good anyway.
This patch leaves the conversion of .bdrv_co_invalidate_cache() driver
callbacks and bdrv_invalidate_cache_all() for another day.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20200924185414.28642-2-vsementsov@virtuozzo.com>
---
include/block/block.h | 2 +-
block.c | 32 ++++++++++++++++++--------------
2 files changed, 19 insertions(+), 15 deletions(-)
diff --git a/include/block/block.h b/include/block/block.h
index 981ab5b314..81d591dd4c 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -460,7 +460,7 @@ void bdrv_aio_cancel_async(BlockAIOCB *acb);
int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf);
/* Invalidate any cached metadata used by image formats */
-void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp);
+int bdrv_invalidate_cache(BlockDriverState *bs, Error **errp);
void bdrv_invalidate_cache_all(Error **errp);
int bdrv_inactivate_all(void);
diff --git a/block.c b/block.c
index f4b6dd5d3d..8de14de49c 100644
--- a/block.c
+++ b/block.c
@@ -5781,8 +5781,8 @@ void bdrv_init_with_whitelist(void)
bdrv_init();
}
-static void coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs,
- Error **errp)
+static int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs,
+ Error **errp)
{
BdrvChild *child, *parent;
uint64_t perm, shared_perm;
@@ -5791,14 +5791,14 @@ static void coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs,
BdrvDirtyBitmap *bm;
if (!bs->drv) {
- return;
+ return -ENOMEDIUM;
}
QLIST_FOREACH(child, &bs->children, next) {
bdrv_co_invalidate_cache(child->bs, &local_err);
if (local_err) {
error_propagate(errp, local_err);
- return;
+ return -EINVAL;
}
}
@@ -5821,7 +5821,7 @@ static void coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs,
ret = bdrv_check_perm(bs, NULL, perm, shared_perm, NULL, NULL, errp);
if (ret < 0) {
bs->open_flags |= BDRV_O_INACTIVE;
- return;
+ return ret;
}
bdrv_set_perm(bs, perm, shared_perm);
@@ -5830,7 +5830,7 @@ static void coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs,
if (local_err) {
bs->open_flags |= BDRV_O_INACTIVE;
error_propagate(errp, local_err);
- return;
+ return -EINVAL;
}
}
@@ -5842,7 +5842,7 @@ static void coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs,
if (ret < 0) {
bs->open_flags |= BDRV_O_INACTIVE;
error_setg_errno(errp, -ret, "Could not refresh total sector count");
- return;
+ return ret;
}
}
@@ -5852,27 +5852,30 @@ static void coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs,
if (local_err) {
bs->open_flags |= BDRV_O_INACTIVE;
error_propagate(errp, local_err);
- return;
+ return -EINVAL;
}
}
}
+
+ return 0;
}
typedef struct InvalidateCacheCo {
BlockDriverState *bs;
Error **errp;
bool done;
+ int ret;
} InvalidateCacheCo;
static void coroutine_fn bdrv_invalidate_cache_co_entry(void *opaque)
{
InvalidateCacheCo *ico = opaque;
- bdrv_co_invalidate_cache(ico->bs, ico->errp);
+ ico->ret = bdrv_co_invalidate_cache(ico->bs, ico->errp);
ico->done = true;
aio_wait_kick();
}
-void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
+int bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
{
Coroutine *co;
InvalidateCacheCo ico = {
@@ -5889,22 +5892,23 @@ void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
bdrv_coroutine_enter(bs, co);
BDRV_POLL_WHILE(bs, !ico.done);
}
+
+ return ico.ret;
}
void bdrv_invalidate_cache_all(Error **errp)
{
BlockDriverState *bs;
- Error *local_err = NULL;
BdrvNextIterator it;
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
AioContext *aio_context = bdrv_get_aio_context(bs);
+ int ret;
aio_context_acquire(aio_context);
- bdrv_invalidate_cache(bs, &local_err);
+ ret = bdrv_invalidate_cache(bs, errp);
aio_context_release(aio_context);
- if (local_err) {
- error_propagate(errp, local_err);
+ if (ret < 0) {
bdrv_next_cleanup(&it);
return;
}
--
2.26.2
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PULL v2 08/17] block/io: refactor coroutine wrappers
2020-10-05 15:43 [PULL v2 00/17] Block patches Stefan Hajnoczi
` (6 preceding siblings ...)
2020-10-05 15:43 ` [PULL v2 07/17] block: return error-code from bdrv_invalidate_cache Stefan Hajnoczi
@ 2020-10-05 15:43 ` Stefan Hajnoczi
2020-10-05 15:43 ` [PULL v2 09/17] block: declare some coroutine functions in block/coroutines.h Stefan Hajnoczi
` (9 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: Stefan Hajnoczi @ 2020-10-05 15:43 UTC (permalink / raw)
To: qemu-devel, Peter Maydell
Cc: Kevin Wolf, Fam Zheng, Vladimir Sementsov-Ogievskiy,
Eduardo Habkost, qemu-block, Philippe Mathieu-Daudé,
Max Reitz, Stefan Hajnoczi, Cleber Rosa
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Most of our coroutine wrappers already follow this convention:
We have 'coroutine_fn bdrv_co_<something>(<normal argument list>)' as
the core function, and a wrapper 'bdrv_<something>(<same argument
list>)' which does parameter packing and calls bdrv_run_co().
The only outsiders are the bdrv_prwv_co and
bdrv_common_block_status_above wrappers. Let's refactor them to behave
as the others, it simplifies further conversion of coroutine wrappers.
This patch adds an indirection layer, but it will be compensated by
a further commit, which will drop bdrv_co_prwv together with the
is_write logic, to keep the read and write paths separate.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20200924185414.28642-3-vsementsov@virtuozzo.com>
---
block/io.c | 60 +++++++++++++++++++++++++++++-------------------------
1 file changed, 32 insertions(+), 28 deletions(-)
diff --git a/block/io.c b/block/io.c
index 11df1889f1..b4f6ab0ab1 100644
--- a/block/io.c
+++ b/block/io.c
@@ -933,27 +933,31 @@ typedef struct RwCo {
BdrvRequestFlags flags;
} RwCo;
+static int coroutine_fn bdrv_co_prwv(BdrvChild *child, int64_t offset,
+ QEMUIOVector *qiov, bool is_write,
+ BdrvRequestFlags flags)
+{
+ if (is_write) {
+ return bdrv_co_pwritev(child, offset, qiov->size, qiov, flags);
+ } else {
+ return bdrv_co_preadv(child, offset, qiov->size, qiov, flags);
+ }
+}
+
static int coroutine_fn bdrv_rw_co_entry(void *opaque)
{
RwCo *rwco = opaque;
- if (!rwco->is_write) {
- return bdrv_co_preadv(rwco->child, rwco->offset,
- rwco->qiov->size, rwco->qiov,
- rwco->flags);
- } else {
- return bdrv_co_pwritev(rwco->child, rwco->offset,
- rwco->qiov->size, rwco->qiov,
- rwco->flags);
- }
+ return bdrv_co_prwv(rwco->child, rwco->offset, rwco->qiov,
+ rwco->is_write, rwco->flags);
}
/*
* Process a vectored synchronous request using coroutines
*/
-static int bdrv_prwv_co(BdrvChild *child, int64_t offset,
- QEMUIOVector *qiov, bool is_write,
- BdrvRequestFlags flags)
+static int bdrv_prwv(BdrvChild *child, int64_t offset,
+ QEMUIOVector *qiov, bool is_write,
+ BdrvRequestFlags flags)
{
RwCo rwco = {
.child = child,
@@ -971,8 +975,7 @@ int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
{
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, bytes);
- return bdrv_prwv_co(child, offset, &qiov, true,
- BDRV_REQ_ZERO_WRITE | flags);
+ return bdrv_prwv(child, offset, &qiov, true, BDRV_REQ_ZERO_WRITE | flags);
}
/*
@@ -1021,7 +1024,7 @@ int bdrv_preadv(BdrvChild *child, int64_t offset, QEMUIOVector *qiov)
{
int ret;
- ret = bdrv_prwv_co(child, offset, qiov, false, 0);
+ ret = bdrv_prwv(child, offset, qiov, false, 0);
if (ret < 0) {
return ret;
}
@@ -1045,7 +1048,7 @@ int bdrv_pwritev(BdrvChild *child, int64_t offset, QEMUIOVector *qiov)
{
int ret;
- ret = bdrv_prwv_co(child, offset, qiov, true, 0);
+ ret = bdrv_prwv(child, offset, qiov, true, 0);
if (ret < 0) {
return ret;
}
@@ -2449,14 +2452,15 @@ early_out:
return ret;
}
-static int coroutine_fn bdrv_co_block_status_above(BlockDriverState *bs,
- BlockDriverState *base,
- bool want_zero,
- int64_t offset,
- int64_t bytes,
- int64_t *pnum,
- int64_t *map,
- BlockDriverState **file)
+static int coroutine_fn
+bdrv_co_common_block_status_above(BlockDriverState *bs,
+ BlockDriverState *base,
+ bool want_zero,
+ int64_t offset,
+ int64_t bytes,
+ int64_t *pnum,
+ int64_t *map,
+ BlockDriverState **file)
{
BlockDriverState *p;
int ret = 0;
@@ -2494,10 +2498,10 @@ static int coroutine_fn bdrv_block_status_above_co_entry(void *opaque)
{
BdrvCoBlockStatusData *data = opaque;
- return bdrv_co_block_status_above(data->bs, data->base,
- data->want_zero,
- data->offset, data->bytes,
- data->pnum, data->map, data->file);
+ return bdrv_co_common_block_status_above(data->bs, data->base,
+ data->want_zero,
+ data->offset, data->bytes,
+ data->pnum, data->map, data->file);
}
/*
--
2.26.2
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PULL v2 09/17] block: declare some coroutine functions in block/coroutines.h
2020-10-05 15:43 [PULL v2 00/17] Block patches Stefan Hajnoczi
` (7 preceding siblings ...)
2020-10-05 15:43 ` [PULL v2 08/17] block/io: refactor coroutine wrappers Stefan Hajnoczi
@ 2020-10-05 15:43 ` Stefan Hajnoczi
2020-10-05 15:43 ` [PULL v2 10/17] scripts: add block-coroutine-wrapper.py Stefan Hajnoczi
` (8 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: Stefan Hajnoczi @ 2020-10-05 15:43 UTC (permalink / raw)
To: qemu-devel, Peter Maydell
Cc: Kevin Wolf, Fam Zheng, Vladimir Sementsov-Ogievskiy,
Eduardo Habkost, qemu-block, Philippe Mathieu-Daudé,
Max Reitz, Stefan Hajnoczi, Cleber Rosa
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
We are going to keep coroutine-wrappers code (structure-packing
parameters, BDRV_POLL wrapper functions) in separate auto-generated
files. So, we'll need a header with declaration of original _co_
functions, for those which are static now. As well, we'll need
declarations for wrapper functions. Do these declarations now, as a
preparation step.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20200924185414.28642-4-vsementsov@virtuozzo.com>
---
block/coroutines.h | 67 ++++++++++++++++++++++++++++++++++++++++++++++
block.c | 8 +++---
block/io.c | 34 +++++++++++------------
3 files changed, 88 insertions(+), 21 deletions(-)
create mode 100644 block/coroutines.h
diff --git a/block/coroutines.h b/block/coroutines.h
new file mode 100644
index 0000000000..9ce1730a09
--- /dev/null
+++ b/block/coroutines.h
@@ -0,0 +1,67 @@
+/*
+ * Block layer I/O functions
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef BLOCK_COROUTINES_INT_H
+#define BLOCK_COROUTINES_INT_H
+
+#include "block/block_int.h"
+
+int coroutine_fn bdrv_co_check(BlockDriverState *bs,
+ BdrvCheckResult *res, BdrvCheckMode fix);
+int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp);
+
+int coroutine_fn
+bdrv_co_prwv(BdrvChild *child, int64_t offset, QEMUIOVector *qiov,
+ bool is_write, BdrvRequestFlags flags);
+int
+bdrv_prwv(BdrvChild *child, int64_t offset, QEMUIOVector *qiov,
+ bool is_write, BdrvRequestFlags flags);
+
+int coroutine_fn
+bdrv_co_common_block_status_above(BlockDriverState *bs,
+ BlockDriverState *base,
+ bool want_zero,
+ int64_t offset,
+ int64_t bytes,
+ int64_t *pnum,
+ int64_t *map,
+ BlockDriverState **file);
+int
+bdrv_common_block_status_above(BlockDriverState *bs,
+ BlockDriverState *base,
+ bool want_zero,
+ int64_t offset,
+ int64_t bytes,
+ int64_t *pnum,
+ int64_t *map,
+ BlockDriverState **file);
+
+int coroutine_fn
+bdrv_co_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
+ bool is_read);
+int
+bdrv_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
+ bool is_read);
+
+#endif /* BLOCK_COROUTINES_INT_H */
diff --git a/block.c b/block.c
index 8de14de49c..324714351c 100644
--- a/block.c
+++ b/block.c
@@ -48,6 +48,7 @@
#include "qemu/timer.h"
#include "qemu/cutils.h"
#include "qemu/id.h"
+#include "block/coroutines.h"
#ifdef CONFIG_BSD
#include <sys/ioctl.h>
@@ -4676,8 +4677,8 @@ static void bdrv_delete(BlockDriverState *bs)
* free of errors) or -errno when an internal error occurred. The results of the
* check are stored in res.
*/
-static int coroutine_fn bdrv_co_check(BlockDriverState *bs,
- BdrvCheckResult *res, BdrvCheckMode fix)
+int coroutine_fn bdrv_co_check(BlockDriverState *bs,
+ BdrvCheckResult *res, BdrvCheckMode fix)
{
if (bs->drv == NULL) {
return -ENOMEDIUM;
@@ -5781,8 +5782,7 @@ void bdrv_init_with_whitelist(void)
bdrv_init();
}
-static int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs,
- Error **errp)
+int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp)
{
BdrvChild *child, *parent;
uint64_t perm, shared_perm;
diff --git a/block/io.c b/block/io.c
index b4f6ab0ab1..55b3b7692c 100644
--- a/block/io.c
+++ b/block/io.c
@@ -29,6 +29,7 @@
#include "block/blockjob.h"
#include "block/blockjob_int.h"
#include "block/block_int.h"
+#include "block/coroutines.h"
#include "qemu/cutils.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
@@ -933,9 +934,9 @@ typedef struct RwCo {
BdrvRequestFlags flags;
} RwCo;
-static int coroutine_fn bdrv_co_prwv(BdrvChild *child, int64_t offset,
- QEMUIOVector *qiov, bool is_write,
- BdrvRequestFlags flags)
+int coroutine_fn bdrv_co_prwv(BdrvChild *child, int64_t offset,
+ QEMUIOVector *qiov, bool is_write,
+ BdrvRequestFlags flags)
{
if (is_write) {
return bdrv_co_pwritev(child, offset, qiov->size, qiov, flags);
@@ -955,9 +956,9 @@ static int coroutine_fn bdrv_rw_co_entry(void *opaque)
/*
* Process a vectored synchronous request using coroutines
*/
-static int bdrv_prwv(BdrvChild *child, int64_t offset,
- QEMUIOVector *qiov, bool is_write,
- BdrvRequestFlags flags)
+int bdrv_prwv(BdrvChild *child, int64_t offset,
+ QEMUIOVector *qiov, bool is_write,
+ BdrvRequestFlags flags)
{
RwCo rwco = {
.child = child,
@@ -2452,7 +2453,7 @@ early_out:
return ret;
}
-static int coroutine_fn
+int coroutine_fn
bdrv_co_common_block_status_above(BlockDriverState *bs,
BlockDriverState *base,
bool want_zero,
@@ -2509,12 +2510,12 @@ static int coroutine_fn bdrv_block_status_above_co_entry(void *opaque)
*
* See bdrv_co_block_status_above() for details.
*/
-static int bdrv_common_block_status_above(BlockDriverState *bs,
- BlockDriverState *base,
- bool want_zero, int64_t offset,
- int64_t bytes, int64_t *pnum,
- int64_t *map,
- BlockDriverState **file)
+int bdrv_common_block_status_above(BlockDriverState *bs,
+ BlockDriverState *base,
+ bool want_zero, int64_t offset,
+ int64_t bytes, int64_t *pnum,
+ int64_t *map,
+ BlockDriverState **file)
{
BdrvCoBlockStatusData data = {
.bs = bs,
@@ -2630,7 +2631,7 @@ typedef struct BdrvVmstateCo {
bool is_read;
} BdrvVmstateCo;
-static int coroutine_fn
+int coroutine_fn
bdrv_co_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
bool is_read)
{
@@ -2663,9 +2664,8 @@ static int coroutine_fn bdrv_co_rw_vmstate_entry(void *opaque)
return bdrv_co_rw_vmstate(co->bs, co->qiov, co->pos, co->is_read);
}
-static inline int
-bdrv_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
- bool is_read)
+int bdrv_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
+ bool is_read)
{
BdrvVmstateCo data = {
.bs = bs,
--
2.26.2
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PULL v2 10/17] scripts: add block-coroutine-wrapper.py
2020-10-05 15:43 [PULL v2 00/17] Block patches Stefan Hajnoczi
` (8 preceding siblings ...)
2020-10-05 15:43 ` [PULL v2 09/17] block: declare some coroutine functions in block/coroutines.h Stefan Hajnoczi
@ 2020-10-05 15:43 ` Stefan Hajnoczi
2020-10-05 15:43 ` [PULL v2 11/17] block: generate coroutine-wrapper code Stefan Hajnoczi
` (7 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: Stefan Hajnoczi @ 2020-10-05 15:43 UTC (permalink / raw)
To: qemu-devel, Peter Maydell
Cc: Kevin Wolf, Fam Zheng, Vladimir Sementsov-Ogievskiy,
Eduardo Habkost, qemu-block, Max Reitz, Stefan Hajnoczi,
Cleber Rosa
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
We have a very frequent pattern of creating a coroutine from a function
with several arguments:
- create a structure to pack parameters
- create _entry function to call original function taking parameters
from struct
- do different magic to handle completion: set ret to NOT_DONE or
EINPROGRESS or use separate bool field
- fill the struct and create coroutine from _entry function with this
struct as a parameter
- do coroutine enter and BDRV_POLL_WHILE loop
Let's reduce code duplication by generating coroutine wrappers.
This patch adds scripts/block-coroutine-wrapper.py together with some
friends, which will generate functions with declared prototypes marked
by the 'generated_co_wrapper' specifier.
The usage of new code generation is as follows:
1. define the coroutine function somewhere
int coroutine_fn bdrv_co_NAME(...) {...}
2. declare in some header file
int generated_co_wrapper bdrv_NAME(...);
with same list of parameters (generated_co_wrapper is
defined in "include/block/block.h").
3. Make sure the block_gen_c declaration in block/meson.build
mentions the file with your marker function.
Still, no function is now marked, this work is for the following
commit.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20200924185414.28642-5-vsementsov@virtuozzo.com>
[Added encoding='utf-8' to open() calls as requested by Vladimir. Fixed
typo and grammar issues pointed out by Eric Blake. Removed clang-format
dependency that caused build test issues.
--Stefan]
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
block/block-gen.h | 49 ++++++++
include/block/block.h | 10 ++
block/meson.build | 8 ++
docs/devel/block-coroutine-wrapper.rst | 54 ++++++++
docs/devel/index.rst | 1 +
scripts/block-coroutine-wrapper.py | 167 +++++++++++++++++++++++++
6 files changed, 289 insertions(+)
create mode 100644 block/block-gen.h
create mode 100644 docs/devel/block-coroutine-wrapper.rst
create mode 100644 scripts/block-coroutine-wrapper.py
diff --git a/block/block-gen.h b/block/block-gen.h
new file mode 100644
index 0000000000..f80cf4897d
--- /dev/null
+++ b/block/block-gen.h
@@ -0,0 +1,49 @@
+/*
+ * Block coroutine wrapping core, used by auto-generated block/block-gen.c
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ * Copyright (c) 2020 Virtuozzo International GmbH
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef BLOCK_BLOCK_GEN_H
+#define BLOCK_BLOCK_GEN_H
+
+#include "block/block_int.h"
+
+/* Base structure for argument packing structures */
+typedef struct BdrvPollCo {
+ BlockDriverState *bs;
+ bool in_progress;
+ int ret;
+ Coroutine *co; /* Keep pointer here for debugging */
+} BdrvPollCo;
+
+static inline int bdrv_poll_co(BdrvPollCo *s)
+{
+ assert(!qemu_in_coroutine());
+
+ bdrv_coroutine_enter(s->bs, s->co);
+ BDRV_POLL_WHILE(s->bs, s->in_progress);
+
+ return s->ret;
+}
+
+#endif /* BLOCK_BLOCK_GEN_H */
diff --git a/include/block/block.h b/include/block/block.h
index 81d591dd4c..0f0ddc51b4 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -10,6 +10,16 @@
#include "block/blockjob.h"
#include "qemu/hbitmap.h"
+/*
+ * generated_co_wrapper
+ *
+ * Function specifier, which does nothing but mark functions to be
+ * generated by scripts/block-coroutine-wrapper.py
+ *
+ * Read more in docs/devel/block-coroutine-wrapper.rst
+ */
+#define generated_co_wrapper
+
/* block.c */
typedef struct BlockDriver BlockDriver;
typedef struct BdrvChild BdrvChild;
diff --git a/block/meson.build b/block/meson.build
index 0b38dc36f7..78e8b25232 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -107,6 +107,14 @@ module_block_h = custom_target('module_block.h',
command: [module_block_py, '@OUTPUT0@', modsrc])
block_ss.add(module_block_h)
+wrapper_py = find_program('../scripts/block-coroutine-wrapper.py')
+block_gen_c = custom_target('block-gen.c',
+ output: 'block-gen.c',
+ input: files('../include/block/block.h',
+ 'coroutines.h'),
+ command: [wrapper_py, '@OUTPUT@', '@INPUT@'])
+block_ss.add(block_gen_c)
+
block_ss.add(files('stream.c'))
softmmu_ss.add(files('qapi-sysemu.c'))
diff --git a/docs/devel/block-coroutine-wrapper.rst b/docs/devel/block-coroutine-wrapper.rst
new file mode 100644
index 0000000000..412851986b
--- /dev/null
+++ b/docs/devel/block-coroutine-wrapper.rst
@@ -0,0 +1,54 @@
+=======================
+block-coroutine-wrapper
+=======================
+
+A lot of functions in QEMU block layer (see ``block/*``) can only be
+called in coroutine context. Such functions are normally marked by the
+coroutine_fn specifier. Still, sometimes we need to call them from
+non-coroutine context; for this we need to start a coroutine, run the
+needed function from it and wait for the coroutine to finish in a
+BDRV_POLL_WHILE() loop. To run a coroutine we need a function with one
+void* argument. So for each coroutine_fn function which needs a
+non-coroutine interface, we should define a structure to pack the
+parameters, define a separate function to unpack the parameters and
+call the original function and finally define a new interface function
+with same list of arguments as original one, which will pack the
+parameters into a struct, create a coroutine, run it and wait in
+BDRV_POLL_WHILE() loop. It's boring to create such wrappers by hand,
+so we have a script to generate them.
+
+Usage
+=====
+
+Assume we have defined the ``coroutine_fn`` function
+``bdrv_co_foo(<some args>)`` and need a non-coroutine interface for it,
+called ``bdrv_foo(<same args>)``. In this case the script can help. To
+trigger the generation:
+
+1. You need ``bdrv_foo`` declaration somewhere (for example, in
+ ``block/coroutines.h``) with the ``generated_co_wrapper`` mark,
+ like this:
+
+.. code-block:: c
+
+ int generated_co_wrapper bdrv_foo(<some args>);
+
+2. You need to feed this declaration to block-coroutine-wrapper script.
+ For this, add the .h (or .c) file with the declaration to the
+ ``input: files(...)`` list of ``block_gen_c`` target declaration in
+ ``block/meson.build``
+
+You are done. During the build, coroutine wrappers will be generated in
+``<BUILD_DIR>/block/block-gen.c``.
+
+Links
+=====
+
+1. The script location is ``scripts/block-coroutine-wrapper.py``.
+
+2. Generic place for private ``generated_co_wrapper`` declarations is
+ ``block/coroutines.h``, for public declarations:
+ ``include/block/block.h``
+
+3. The core API of generated coroutine wrappers is placed in
+ (not generated) ``block/block-gen.h``
diff --git a/docs/devel/index.rst b/docs/devel/index.rst
index c34b43ec28..5fda2d3509 100644
--- a/docs/devel/index.rst
+++ b/docs/devel/index.rst
@@ -32,3 +32,4 @@ Contents:
s390-dasd-ipl
clocks
qom
+ block-coroutine-wrapper
diff --git a/scripts/block-coroutine-wrapper.py b/scripts/block-coroutine-wrapper.py
new file mode 100644
index 0000000000..0461fd1c45
--- /dev/null
+++ b/scripts/block-coroutine-wrapper.py
@@ -0,0 +1,167 @@
+#! /usr/bin/env python3
+"""Generate coroutine wrappers for block subsystem.
+
+The program parses one or several concatenated c files from stdin,
+searches for functions with the 'generated_co_wrapper' specifier
+and generates corresponding wrappers on stdout.
+
+Usage: block-coroutine-wrapper.py generated-file.c FILE.[ch]...
+
+Copyright (c) 2020 Virtuozzo International GmbH.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program. If not, see <http://www.gnu.org/licenses/>.
+"""
+
+import sys
+import re
+from typing import Iterator
+
+
+def gen_header():
+ copyright = re.sub('^.*Copyright', 'Copyright', __doc__, flags=re.DOTALL)
+ copyright = re.sub('^(?=.)', ' * ', copyright.strip(), flags=re.MULTILINE)
+ copyright = re.sub('^$', ' *', copyright, flags=re.MULTILINE)
+ return f"""\
+/*
+ * File is generated by scripts/block-coroutine-wrapper.py
+ *
+{copyright}
+ */
+
+#include "qemu/osdep.h"
+#include "block/coroutines.h"
+#include "block/block-gen.h"
+#include "block/block_int.h"\
+"""
+
+
+class ParamDecl:
+ param_re = re.compile(r'(?P<decl>'
+ r'(?P<type>.*[ *])'
+ r'(?P<name>[a-z][a-z0-9_]*)'
+ r')')
+
+ def __init__(self, param_decl: str) -> None:
+ m = self.param_re.match(param_decl.strip())
+ if m is None:
+ raise ValueError(f'Wrong parameter declaration: "{param_decl}"')
+ self.decl = m.group('decl')
+ self.type = m.group('type')
+ self.name = m.group('name')
+
+
+class FuncDecl:
+ def __init__(self, return_type: str, name: str, args: str) -> None:
+ self.return_type = return_type.strip()
+ self.name = name.strip()
+ self.args = [ParamDecl(arg.strip()) for arg in args.split(',')]
+
+ def gen_list(self, format: str) -> str:
+ return ', '.join(format.format_map(arg.__dict__) for arg in self.args)
+
+ def gen_block(self, format: str) -> str:
+ return '\n'.join(format.format_map(arg.__dict__) for arg in self.args)
+
+
+# Match wrappers declared with a generated_co_wrapper mark
+func_decl_re = re.compile(r'^int\s*generated_co_wrapper\s*'
+ r'(?P<wrapper_name>[a-z][a-z0-9_]*)'
+ r'\((?P<args>[^)]*)\);$', re.MULTILINE)
+
+
+def func_decl_iter(text: str) -> Iterator:
+ for m in func_decl_re.finditer(text):
+ yield FuncDecl(return_type='int',
+ name=m.group('wrapper_name'),
+ args=m.group('args'))
+
+
+def snake_to_camel(func_name: str) -> str:
+ """
+ Convert underscore names like 'some_function_name' to camel-case like
+ 'SomeFunctionName'
+ """
+ words = func_name.split('_')
+ words = [w[0].upper() + w[1:] for w in words]
+ return ''.join(words)
+
+
+def gen_wrapper(func: FuncDecl) -> str:
+ assert func.name.startswith('bdrv_')
+ assert not func.name.startswith('bdrv_co_')
+ assert func.return_type == 'int'
+ assert func.args[0].type in ['BlockDriverState *', 'BdrvChild *']
+
+ name = 'bdrv_co_' + func.name[5:]
+ bs = 'bs' if func.args[0].type == 'BlockDriverState *' else 'child->bs'
+ struct_name = snake_to_camel(name)
+
+ return f"""\
+/*
+ * Wrappers for {name}
+ */
+
+typedef struct {struct_name} {{
+ BdrvPollCo poll_state;
+{ func.gen_block(' {decl};') }
+}} {struct_name};
+
+static void coroutine_fn {name}_entry(void *opaque)
+{{
+ {struct_name} *s = opaque;
+
+ s->poll_state.ret = {name}({ func.gen_list('s->{name}') });
+ s->poll_state.in_progress = false;
+
+ aio_wait_kick();
+}}
+
+int {func.name}({ func.gen_list('{decl}') })
+{{
+ if (qemu_in_coroutine()) {{
+ return {name}({ func.gen_list('{name}') });
+ }} else {{
+ {struct_name} s = {{
+ .poll_state.bs = {bs},
+ .poll_state.in_progress = true,
+
+{ func.gen_block(' .{name} = {name},') }
+ }};
+
+ s.poll_state.co = qemu_coroutine_create({name}_entry, &s);
+
+ return bdrv_poll_co(&s.poll_state);
+ }}
+}}"""
+
+
+def gen_wrappers(input_code: str) -> str:
+ res = ''
+ for func in func_decl_iter(input_code):
+ res += '\n\n\n'
+ res += gen_wrapper(func)
+
+ return res
+
+
+if __name__ == '__main__':
+ if len(sys.argv) < 3:
+ exit(f'Usage: {sys.argv[0]} OUT_FILE.c IN_FILE.[ch]...')
+
+ with open(sys.argv[1], 'w', encoding='utf-8') as f_out:
+ f_out.write(gen_header())
+ for fname in sys.argv[2:]:
+ with open(fname, encoding='utf-8') as f_in:
+ f_out.write(gen_wrappers(f_in.read()))
+ f_out.write('\n')
--
2.26.2
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PULL v2 11/17] block: generate coroutine-wrapper code
2020-10-05 15:43 [PULL v2 00/17] Block patches Stefan Hajnoczi
` (9 preceding siblings ...)
2020-10-05 15:43 ` [PULL v2 10/17] scripts: add block-coroutine-wrapper.py Stefan Hajnoczi
@ 2020-10-05 15:43 ` Stefan Hajnoczi
2020-10-05 15:43 ` [PULL v2 12/17] block: drop bdrv_prwv Stefan Hajnoczi
` (6 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: Stefan Hajnoczi @ 2020-10-05 15:43 UTC (permalink / raw)
To: qemu-devel, Peter Maydell
Cc: Kevin Wolf, Fam Zheng, Vladimir Sementsov-Ogievskiy,
Eduardo Habkost, qemu-block, Max Reitz, Stefan Hajnoczi,
Cleber Rosa
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Use code generation implemented in previous commit to generated
coroutine wrappers in block.c and block/io.c
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20200924185414.28642-6-vsementsov@virtuozzo.com>
---
block/coroutines.h | 6 +-
include/block/block.h | 16 ++--
block.c | 73 ---------------
block/io.c | 212 ------------------------------------------
4 files changed, 13 insertions(+), 294 deletions(-)
diff --git a/block/coroutines.h b/block/coroutines.h
index 9ce1730a09..c62b3a2697 100644
--- a/block/coroutines.h
+++ b/block/coroutines.h
@@ -34,7 +34,7 @@ int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp);
int coroutine_fn
bdrv_co_prwv(BdrvChild *child, int64_t offset, QEMUIOVector *qiov,
bool is_write, BdrvRequestFlags flags);
-int
+int generated_co_wrapper
bdrv_prwv(BdrvChild *child, int64_t offset, QEMUIOVector *qiov,
bool is_write, BdrvRequestFlags flags);
@@ -47,7 +47,7 @@ bdrv_co_common_block_status_above(BlockDriverState *bs,
int64_t *pnum,
int64_t *map,
BlockDriverState **file);
-int
+int generated_co_wrapper
bdrv_common_block_status_above(BlockDriverState *bs,
BlockDriverState *base,
bool want_zero,
@@ -60,7 +60,7 @@ bdrv_common_block_status_above(BlockDriverState *bs,
int coroutine_fn
bdrv_co_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
bool is_read);
-int
+int generated_co_wrapper
bdrv_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
bool is_read);
diff --git a/include/block/block.h b/include/block/block.h
index 0f0ddc51b4..f2d85f2cf1 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -403,8 +403,9 @@ void bdrv_refresh_filename(BlockDriverState *bs);
int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
PreallocMode prealloc, BdrvRequestFlags flags,
Error **errp);
-int bdrv_truncate(BdrvChild *child, int64_t offset, bool exact,
- PreallocMode prealloc, BdrvRequestFlags flags, Error **errp);
+int generated_co_wrapper
+bdrv_truncate(BdrvChild *child, int64_t offset, bool exact,
+ PreallocMode prealloc, BdrvRequestFlags flags, Error **errp);
int64_t bdrv_nb_sectors(BlockDriverState *bs);
int64_t bdrv_getlength(BlockDriverState *bs);
@@ -446,7 +447,8 @@ typedef enum {
BDRV_FIX_ERRORS = 2,
} BdrvCheckMode;
-int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix);
+int generated_co_wrapper bdrv_check(BlockDriverState *bs, BdrvCheckResult *res,
+ BdrvCheckMode fix);
/* The units of offset and total_work_size may be chosen arbitrarily by the
* block driver; total_work_size may change during the course of the amendment
@@ -470,12 +472,13 @@ void bdrv_aio_cancel_async(BlockAIOCB *acb);
int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf);
/* Invalidate any cached metadata used by image formats */
-int bdrv_invalidate_cache(BlockDriverState *bs, Error **errp);
+int generated_co_wrapper bdrv_invalidate_cache(BlockDriverState *bs,
+ Error **errp);
void bdrv_invalidate_cache_all(Error **errp);
int bdrv_inactivate_all(void);
/* Ensure contents are flushed to disk. */
-int bdrv_flush(BlockDriverState *bs);
+int generated_co_wrapper bdrv_flush(BlockDriverState *bs);
int coroutine_fn bdrv_co_flush(BlockDriverState *bs);
int bdrv_flush_all(void);
void bdrv_close_all(void);
@@ -490,7 +493,8 @@ void bdrv_drain_all(void);
AIO_WAIT_WHILE(bdrv_get_aio_context(bs_), \
cond); })
-int bdrv_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes);
+int generated_co_wrapper bdrv_pdiscard(BdrvChild *child, int64_t offset,
+ int64_t bytes);
int bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes);
int bdrv_has_zero_init_1(BlockDriverState *bs);
int bdrv_has_zero_init(BlockDriverState *bs);
diff --git a/block.c b/block.c
index 324714351c..52b2e2709f 100644
--- a/block.c
+++ b/block.c
@@ -4691,43 +4691,6 @@ int coroutine_fn bdrv_co_check(BlockDriverState *bs,
return bs->drv->bdrv_co_check(bs, res, fix);
}
-typedef struct CheckCo {
- BlockDriverState *bs;
- BdrvCheckResult *res;
- BdrvCheckMode fix;
- int ret;
-} CheckCo;
-
-static void coroutine_fn bdrv_check_co_entry(void *opaque)
-{
- CheckCo *cco = opaque;
- cco->ret = bdrv_co_check(cco->bs, cco->res, cco->fix);
- aio_wait_kick();
-}
-
-int bdrv_check(BlockDriverState *bs,
- BdrvCheckResult *res, BdrvCheckMode fix)
-{
- Coroutine *co;
- CheckCo cco = {
- .bs = bs,
- .res = res,
- .ret = -EINPROGRESS,
- .fix = fix,
- };
-
- if (qemu_in_coroutine()) {
- /* Fast-path if already in coroutine context */
- bdrv_check_co_entry(&cco);
- } else {
- co = qemu_coroutine_create(bdrv_check_co_entry, &cco);
- bdrv_coroutine_enter(bs, co);
- BDRV_POLL_WHILE(bs, cco.ret == -EINPROGRESS);
- }
-
- return cco.ret;
-}
-
/*
* Return values:
* 0 - success
@@ -5860,42 +5823,6 @@ int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp)
return 0;
}
-typedef struct InvalidateCacheCo {
- BlockDriverState *bs;
- Error **errp;
- bool done;
- int ret;
-} InvalidateCacheCo;
-
-static void coroutine_fn bdrv_invalidate_cache_co_entry(void *opaque)
-{
- InvalidateCacheCo *ico = opaque;
- ico->ret = bdrv_co_invalidate_cache(ico->bs, ico->errp);
- ico->done = true;
- aio_wait_kick();
-}
-
-int bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
-{
- Coroutine *co;
- InvalidateCacheCo ico = {
- .bs = bs,
- .done = false,
- .errp = errp
- };
-
- if (qemu_in_coroutine()) {
- /* Fast-path if already in coroutine context */
- bdrv_invalidate_cache_co_entry(&ico);
- } else {
- co = qemu_coroutine_create(bdrv_invalidate_cache_co_entry, &ico);
- bdrv_coroutine_enter(bs, co);
- BDRV_POLL_WHILE(bs, !ico.done);
- }
-
- return ico.ret;
-}
-
void bdrv_invalidate_cache_all(Error **errp)
{
BlockDriverState *bs;
diff --git a/block/io.c b/block/io.c
index 55b3b7692c..2891303a8d 100644
--- a/block/io.c
+++ b/block/io.c
@@ -890,50 +890,6 @@ static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
return 0;
}
-typedef int coroutine_fn BdrvRequestEntry(void *opaque);
-typedef struct BdrvRunCo {
- BdrvRequestEntry *entry;
- void *opaque;
- int ret;
- bool done;
- Coroutine *co; /* Coroutine, running bdrv_run_co_entry, for debugging */
-} BdrvRunCo;
-
-static void coroutine_fn bdrv_run_co_entry(void *opaque)
-{
- BdrvRunCo *arg = opaque;
-
- arg->ret = arg->entry(arg->opaque);
- arg->done = true;
- aio_wait_kick();
-}
-
-static int bdrv_run_co(BlockDriverState *bs, BdrvRequestEntry *entry,
- void *opaque)
-{
- if (qemu_in_coroutine()) {
- /* Fast-path if already in coroutine context */
- return entry(opaque);
- } else {
- BdrvRunCo s = { .entry = entry, .opaque = opaque };
-
- s.co = qemu_coroutine_create(bdrv_run_co_entry, &s);
- bdrv_coroutine_enter(bs, s.co);
-
- BDRV_POLL_WHILE(bs, !s.done);
-
- return s.ret;
- }
-}
-
-typedef struct RwCo {
- BdrvChild *child;
- int64_t offset;
- QEMUIOVector *qiov;
- bool is_write;
- BdrvRequestFlags flags;
-} RwCo;
-
int coroutine_fn bdrv_co_prwv(BdrvChild *child, int64_t offset,
QEMUIOVector *qiov, bool is_write,
BdrvRequestFlags flags)
@@ -945,32 +901,6 @@ int coroutine_fn bdrv_co_prwv(BdrvChild *child, int64_t offset,
}
}
-static int coroutine_fn bdrv_rw_co_entry(void *opaque)
-{
- RwCo *rwco = opaque;
-
- return bdrv_co_prwv(rwco->child, rwco->offset, rwco->qiov,
- rwco->is_write, rwco->flags);
-}
-
-/*
- * Process a vectored synchronous request using coroutines
- */
-int bdrv_prwv(BdrvChild *child, int64_t offset,
- QEMUIOVector *qiov, bool is_write,
- BdrvRequestFlags flags)
-{
- RwCo rwco = {
- .child = child,
- .offset = offset,
- .qiov = qiov,
- .is_write = is_write,
- .flags = flags,
- };
-
- return bdrv_run_co(child->bs, bdrv_rw_co_entry, &rwco);
-}
-
int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
int bytes, BdrvRequestFlags flags)
{
@@ -2247,18 +2177,6 @@ int bdrv_flush_all(void)
return result;
}
-
-typedef struct BdrvCoBlockStatusData {
- BlockDriverState *bs;
- BlockDriverState *base;
- bool want_zero;
- int64_t offset;
- int64_t bytes;
- int64_t *pnum;
- int64_t *map;
- BlockDriverState **file;
-} BdrvCoBlockStatusData;
-
/*
* Returns the allocation status of the specified sectors.
* Drivers not implementing the functionality are assumed to not support
@@ -2494,43 +2412,6 @@ bdrv_co_common_block_status_above(BlockDriverState *bs,
return ret;
}
-/* Coroutine wrapper for bdrv_block_status_above() */
-static int coroutine_fn bdrv_block_status_above_co_entry(void *opaque)
-{
- BdrvCoBlockStatusData *data = opaque;
-
- return bdrv_co_common_block_status_above(data->bs, data->base,
- data->want_zero,
- data->offset, data->bytes,
- data->pnum, data->map, data->file);
-}
-
-/*
- * Synchronous wrapper around bdrv_co_block_status_above().
- *
- * See bdrv_co_block_status_above() for details.
- */
-int bdrv_common_block_status_above(BlockDriverState *bs,
- BlockDriverState *base,
- bool want_zero, int64_t offset,
- int64_t bytes, int64_t *pnum,
- int64_t *map,
- BlockDriverState **file)
-{
- BdrvCoBlockStatusData data = {
- .bs = bs,
- .base = base,
- .want_zero = want_zero,
- .offset = offset,
- .bytes = bytes,
- .pnum = pnum,
- .map = map,
- .file = file,
- };
-
- return bdrv_run_co(bs, bdrv_block_status_above_co_entry, &data);
-}
-
int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
int64_t offset, int64_t bytes, int64_t *pnum,
int64_t *map, BlockDriverState **file)
@@ -2624,13 +2505,6 @@ int bdrv_is_allocated_above(BlockDriverState *top,
return 0;
}
-typedef struct BdrvVmstateCo {
- BlockDriverState *bs;
- QEMUIOVector *qiov;
- int64_t pos;
- bool is_read;
-} BdrvVmstateCo;
-
int coroutine_fn
bdrv_co_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
bool is_read)
@@ -2657,26 +2531,6 @@ bdrv_co_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
return ret;
}
-static int coroutine_fn bdrv_co_rw_vmstate_entry(void *opaque)
-{
- BdrvVmstateCo *co = opaque;
-
- return bdrv_co_rw_vmstate(co->bs, co->qiov, co->pos, co->is_read);
-}
-
-int bdrv_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
- bool is_read)
-{
- BdrvVmstateCo data = {
- .bs = bs,
- .qiov = qiov,
- .pos = pos,
- .is_read = is_read,
- };
-
- return bdrv_run_co(bs, bdrv_co_rw_vmstate_entry, &data);
-}
-
int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
int64_t pos, int size)
{
@@ -2752,11 +2606,6 @@ void bdrv_aio_cancel_async(BlockAIOCB *acb)
/**************************************************************/
/* Coroutine block device emulation */
-static int coroutine_fn bdrv_flush_co_entry(void *opaque)
-{
- return bdrv_co_flush(opaque);
-}
-
int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
{
BdrvChild *primary_child = bdrv_primary_child(bs);
@@ -2880,24 +2729,6 @@ early_exit:
return ret;
}
-int bdrv_flush(BlockDriverState *bs)
-{
- return bdrv_run_co(bs, bdrv_flush_co_entry, bs);
-}
-
-typedef struct DiscardCo {
- BdrvChild *child;
- int64_t offset;
- int64_t bytes;
-} DiscardCo;
-
-static int coroutine_fn bdrv_pdiscard_co_entry(void *opaque)
-{
- DiscardCo *rwco = opaque;
-
- return bdrv_co_pdiscard(rwco->child, rwco->offset, rwco->bytes);
-}
-
int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset,
int64_t bytes)
{
@@ -3012,17 +2843,6 @@ out:
return ret;
}
-int bdrv_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes)
-{
- DiscardCo rwco = {
- .child = child,
- .offset = offset,
- .bytes = bytes,
- };
-
- return bdrv_run_co(child->bs, bdrv_pdiscard_co_entry, &rwco);
-}
-
int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf)
{
BlockDriver *drv = bs->drv;
@@ -3424,35 +3244,3 @@ out:
return ret;
}
-
-typedef struct TruncateCo {
- BdrvChild *child;
- int64_t offset;
- bool exact;
- PreallocMode prealloc;
- BdrvRequestFlags flags;
- Error **errp;
-} TruncateCo;
-
-static int coroutine_fn bdrv_truncate_co_entry(void *opaque)
-{
- TruncateCo *tco = opaque;
-
- return bdrv_co_truncate(tco->child, tco->offset, tco->exact,
- tco->prealloc, tco->flags, tco->errp);
-}
-
-int bdrv_truncate(BdrvChild *child, int64_t offset, bool exact,
- PreallocMode prealloc, BdrvRequestFlags flags, Error **errp)
-{
- TruncateCo tco = {
- .child = child,
- .offset = offset,
- .exact = exact,
- .prealloc = prealloc,
- .flags = flags,
- .errp = errp,
- };
-
- return bdrv_run_co(child->bs, bdrv_truncate_co_entry, &tco);
-}
--
2.26.2
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PULL v2 12/17] block: drop bdrv_prwv
2020-10-05 15:43 [PULL v2 00/17] Block patches Stefan Hajnoczi
` (10 preceding siblings ...)
2020-10-05 15:43 ` [PULL v2 11/17] block: generate coroutine-wrapper code Stefan Hajnoczi
@ 2020-10-05 15:43 ` Stefan Hajnoczi
2020-10-05 15:43 ` [PULL v2 13/17] block/io: refactor save/load vmstate Stefan Hajnoczi
` (5 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: Stefan Hajnoczi @ 2020-10-05 15:43 UTC (permalink / raw)
To: qemu-devel, Peter Maydell
Cc: Kevin Wolf, Fam Zheng, Vladimir Sementsov-Ogievskiy,
Eduardo Habkost, qemu-block, Philippe Mathieu-Daudé,
Max Reitz, Stefan Hajnoczi, Cleber Rosa
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Now that we are not maintaining boilerplate code for coroutine
wrappers, there is no more sense in keeping the extra indirection layer
of bdrv_prwv(). Let's drop it and instead generate pure bdrv_preadv()
and bdrv_pwritev().
Currently, bdrv_pwritev() and bdrv_preadv() are returning bytes on
success, auto generated functions will instead return zero, as their
_co_ prototype. Still, it's simple to make the conversion safe: the
only external user of bdrv_pwritev() is test-bdrv-drain, and it is
comfortable enough with bdrv_co_pwritev() instead. So prototypes are
moved to local block/coroutines.h. Next, the only internal use is
bdrv_pread() and bdrv_pwrite(), which are modified to return bytes on
success.
Of course, it would be great to convert bdrv_pread() and bdrv_pwrite()
to return 0 on success. But this requires audit (and probably
conversion) of all their users, let's leave it for another day
refactoring.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20200924185414.28642-7-vsementsov@virtuozzo.com>
---
block/coroutines.h | 10 ++++-----
include/block/block.h | 2 --
block/io.c | 49 ++++++++---------------------------------
tests/test-bdrv-drain.c | 2 +-
4 files changed, 15 insertions(+), 48 deletions(-)
diff --git a/block/coroutines.h b/block/coroutines.h
index c62b3a2697..6c63a819c9 100644
--- a/block/coroutines.h
+++ b/block/coroutines.h
@@ -31,12 +31,12 @@ int coroutine_fn bdrv_co_check(BlockDriverState *bs,
BdrvCheckResult *res, BdrvCheckMode fix);
int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp);
-int coroutine_fn
-bdrv_co_prwv(BdrvChild *child, int64_t offset, QEMUIOVector *qiov,
- bool is_write, BdrvRequestFlags flags);
int generated_co_wrapper
-bdrv_prwv(BdrvChild *child, int64_t offset, QEMUIOVector *qiov,
- bool is_write, BdrvRequestFlags flags);
+bdrv_preadv(BdrvChild *child, int64_t offset, unsigned int bytes,
+ QEMUIOVector *qiov, BdrvRequestFlags flags);
+int generated_co_wrapper
+bdrv_pwritev(BdrvChild *child, int64_t offset, unsigned int bytes,
+ QEMUIOVector *qiov, BdrvRequestFlags flags);
int coroutine_fn
bdrv_co_common_block_status_above(BlockDriverState *bs,
diff --git a/include/block/block.h b/include/block/block.h
index f2d85f2cf1..eef4cceaf0 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -383,9 +383,7 @@ int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
int bytes, BdrvRequestFlags flags);
int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags);
int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int bytes);
-int bdrv_preadv(BdrvChild *child, int64_t offset, QEMUIOVector *qiov);
int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf, int bytes);
-int bdrv_pwritev(BdrvChild *child, int64_t offset, QEMUIOVector *qiov);
int bdrv_pwrite_sync(BdrvChild *child, int64_t offset,
const void *buf, int count);
/*
diff --git a/block/io.c b/block/io.c
index 2891303a8d..c3dc1db036 100644
--- a/block/io.c
+++ b/block/io.c
@@ -890,23 +890,11 @@ static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
return 0;
}
-int coroutine_fn bdrv_co_prwv(BdrvChild *child, int64_t offset,
- QEMUIOVector *qiov, bool is_write,
- BdrvRequestFlags flags)
-{
- if (is_write) {
- return bdrv_co_pwritev(child, offset, qiov->size, qiov, flags);
- } else {
- return bdrv_co_preadv(child, offset, qiov->size, qiov, flags);
- }
-}
-
int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
int bytes, BdrvRequestFlags flags)
{
- QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, bytes);
-
- return bdrv_prwv(child, offset, &qiov, true, BDRV_REQ_ZERO_WRITE | flags);
+ return bdrv_pwritev(child, offset, bytes, NULL,
+ BDRV_REQ_ZERO_WRITE | flags);
}
/*
@@ -950,41 +938,19 @@ int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags)
}
}
-/* return < 0 if error. See bdrv_pwrite() for the return codes */
-int bdrv_preadv(BdrvChild *child, int64_t offset, QEMUIOVector *qiov)
-{
- int ret;
-
- ret = bdrv_prwv(child, offset, qiov, false, 0);
- if (ret < 0) {
- return ret;
- }
-
- return qiov->size;
-}
-
/* See bdrv_pwrite() for the return codes */
int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int bytes)
{
+ int ret;
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
if (bytes < 0) {
return -EINVAL;
}
- return bdrv_preadv(child, offset, &qiov);
-}
+ ret = bdrv_preadv(child, offset, bytes, &qiov, 0);
-int bdrv_pwritev(BdrvChild *child, int64_t offset, QEMUIOVector *qiov)
-{
- int ret;
-
- ret = bdrv_prwv(child, offset, qiov, true, 0);
- if (ret < 0) {
- return ret;
- }
-
- return qiov->size;
+ return ret < 0 ? ret : bytes;
}
/* Return no. of bytes on success or < 0 on error. Important errors are:
@@ -995,13 +961,16 @@ int bdrv_pwritev(BdrvChild *child, int64_t offset, QEMUIOVector *qiov)
*/
int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf, int bytes)
{
+ int ret;
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
if (bytes < 0) {
return -EINVAL;
}
- return bdrv_pwritev(child, offset, &qiov);
+ ret = bdrv_pwritev(child, offset, bytes, &qiov, 0);
+
+ return ret < 0 ? ret : bytes;
}
/*
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
index 1107271840..1595bbc92e 100644
--- a/tests/test-bdrv-drain.c
+++ b/tests/test-bdrv-drain.c
@@ -1872,7 +1872,7 @@ static int coroutine_fn bdrv_replace_test_co_preadv(BlockDriverState *bs,
}
s->io_co = NULL;
- ret = bdrv_preadv(bs->backing, offset, qiov);
+ ret = bdrv_co_preadv(bs->backing, offset, bytes, qiov, 0);
s->has_read = true;
/* Wake up drain_co if it runs */
--
2.26.2
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PULL v2 13/17] block/io: refactor save/load vmstate
2020-10-05 15:43 [PULL v2 00/17] Block patches Stefan Hajnoczi
` (11 preceding siblings ...)
2020-10-05 15:43 ` [PULL v2 12/17] block: drop bdrv_prwv Stefan Hajnoczi
@ 2020-10-05 15:43 ` Stefan Hajnoczi
2020-10-05 15:43 ` [PULL v2 14/17] include/block/block.h: drop non-ascii quotation mark Stefan Hajnoczi
` (4 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: Stefan Hajnoczi @ 2020-10-05 15:43 UTC (permalink / raw)
To: qemu-devel, Peter Maydell
Cc: Kevin Wolf, Fam Zheng, Vladimir Sementsov-Ogievskiy,
Eduardo Habkost, qemu-block, Max Reitz, Stefan Hajnoczi,
Cleber Rosa
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Like for read/write in a previous commit, drop extra indirection layer,
generate directly bdrv_readv_vmstate() and bdrv_writev_vmstate().
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20200924185414.28642-8-vsementsov@virtuozzo.com>
---
block/coroutines.h | 10 +++----
include/block/block.h | 6 ++--
block/io.c | 70 ++++++++++++++++++++++---------------------
3 files changed, 44 insertions(+), 42 deletions(-)
diff --git a/block/coroutines.h b/block/coroutines.h
index 6c63a819c9..f69179f5ef 100644
--- a/block/coroutines.h
+++ b/block/coroutines.h
@@ -57,11 +57,9 @@ bdrv_common_block_status_above(BlockDriverState *bs,
int64_t *map,
BlockDriverState **file);
-int coroutine_fn
-bdrv_co_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
- bool is_read);
-int generated_co_wrapper
-bdrv_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
- bool is_read);
+int coroutine_fn bdrv_co_readv_vmstate(BlockDriverState *bs,
+ QEMUIOVector *qiov, int64_t pos);
+int coroutine_fn bdrv_co_writev_vmstate(BlockDriverState *bs,
+ QEMUIOVector *qiov, int64_t pos);
#endif /* BLOCK_COROUTINES_INT_H */
diff --git a/include/block/block.h b/include/block/block.h
index eef4cceaf0..8b87df69a1 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -572,8 +572,10 @@ int path_has_protocol(const char *path);
int path_is_absolute(const char *path);
char *path_combine(const char *base_path, const char *filename);
-int bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
-int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
+int generated_co_wrapper
+bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
+int generated_co_wrapper
+bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
int64_t pos, int size);
diff --git a/block/io.c b/block/io.c
index c3dc1db036..54f0968aee 100644
--- a/block/io.c
+++ b/block/io.c
@@ -2475,28 +2475,50 @@ int bdrv_is_allocated_above(BlockDriverState *top,
}
int coroutine_fn
-bdrv_co_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
- bool is_read)
+bdrv_co_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
{
BlockDriver *drv = bs->drv;
BlockDriverState *child_bs = bdrv_primary_bs(bs);
int ret = -ENOTSUP;
+ if (!drv) {
+ return -ENOMEDIUM;
+ }
+
bdrv_inc_in_flight(bs);
+ if (drv->bdrv_load_vmstate) {
+ ret = drv->bdrv_load_vmstate(bs, qiov, pos);
+ } else if (child_bs) {
+ ret = bdrv_co_readv_vmstate(child_bs, qiov, pos);
+ }
+
+ bdrv_dec_in_flight(bs);
+
+ return ret;
+}
+
+int coroutine_fn
+bdrv_co_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
+{
+ BlockDriver *drv = bs->drv;
+ BlockDriverState *child_bs = bdrv_primary_bs(bs);
+ int ret = -ENOTSUP;
+
if (!drv) {
- ret = -ENOMEDIUM;
- } else if (drv->bdrv_load_vmstate) {
- if (is_read) {
- ret = drv->bdrv_load_vmstate(bs, qiov, pos);
- } else {
- ret = drv->bdrv_save_vmstate(bs, qiov, pos);
- }
+ return -ENOMEDIUM;
+ }
+
+ bdrv_inc_in_flight(bs);
+
+ if (drv->bdrv_save_vmstate) {
+ ret = drv->bdrv_save_vmstate(bs, qiov, pos);
} else if (child_bs) {
- ret = bdrv_co_rw_vmstate(child_bs, qiov, pos, is_read);
+ ret = bdrv_co_writev_vmstate(child_bs, qiov, pos);
}
bdrv_dec_in_flight(bs);
+
return ret;
}
@@ -2504,38 +2526,18 @@ int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
int64_t pos, int size)
{
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size);
- int ret;
+ int ret = bdrv_writev_vmstate(bs, &qiov, pos);
- ret = bdrv_writev_vmstate(bs, &qiov, pos);
- if (ret < 0) {
- return ret;
- }
-
- return size;
-}
-
-int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
-{
- return bdrv_rw_vmstate(bs, qiov, pos, false);
+ return ret < 0 ? ret : size;
}
int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
int64_t pos, int size)
{
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size);
- int ret;
+ int ret = bdrv_readv_vmstate(bs, &qiov, pos);
- ret = bdrv_readv_vmstate(bs, &qiov, pos);
- if (ret < 0) {
- return ret;
- }
-
- return size;
-}
-
-int bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
-{
- return bdrv_rw_vmstate(bs, qiov, pos, true);
+ return ret < 0 ? ret : size;
}
/**************************************************************/
--
2.26.2
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PULL v2 14/17] include/block/block.h: drop non-ascii quotation mark
2020-10-05 15:43 [PULL v2 00/17] Block patches Stefan Hajnoczi
` (12 preceding siblings ...)
2020-10-05 15:43 ` [PULL v2 13/17] block/io: refactor save/load vmstate Stefan Hajnoczi
@ 2020-10-05 15:43 ` Stefan Hajnoczi
2020-10-05 15:43 ` [PULL v2 15/17] docs: add 'io_uring' option to 'aio' param in qemu-options.hx Stefan Hajnoczi
` (3 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: Stefan Hajnoczi @ 2020-10-05 15:43 UTC (permalink / raw)
To: qemu-devel, Peter Maydell
Cc: Kevin Wolf, Fam Zheng, Vladimir Sementsov-Ogievskiy,
Eduardo Habkost, qemu-block, Max Reitz, Stefan Hajnoczi,
Cleber Rosa
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
This is the only non-ascii character in the file and it doesn't really
needed here. Let's use normal "'" symbol for consistency with the rest
11 occurrences of "'" in the file.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
include/block/block.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/block/block.h b/include/block/block.h
index 8b87df69a1..ce2ac39299 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -304,7 +304,7 @@ enum BdrvChildRoleBits {
BDRV_CHILD_FILTERED = (1 << 2),
/*
- * Child from which to read all data that isn’t allocated in the
+ * Child from which to read all data that isn't allocated in the
* parent (i.e., the backing child); such data is copied to the
* parent through COW (and optionally COR).
* This field is mutually exclusive with DATA, METADATA, and
--
2.26.2
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PULL v2 15/17] docs: add 'io_uring' option to 'aio' param in qemu-options.hx
2020-10-05 15:43 [PULL v2 00/17] Block patches Stefan Hajnoczi
` (13 preceding siblings ...)
2020-10-05 15:43 ` [PULL v2 14/17] include/block/block.h: drop non-ascii quotation mark Stefan Hajnoczi
@ 2020-10-05 15:43 ` Stefan Hajnoczi
2020-10-05 15:43 ` [PULL v2 16/17] util/vfio-helpers: Collect IOVA reserved regions Stefan Hajnoczi
` (2 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: Stefan Hajnoczi @ 2020-10-05 15:43 UTC (permalink / raw)
To: qemu-devel, Peter Maydell
Cc: Kevin Wolf, Fam Zheng, Eduardo Habkost, qemu-block, Pankaj Gupta,
Julia Suvorova, Max Reitz, Stefan Hajnoczi, Cleber Rosa,
Stefano Garzarella
From: Stefano Garzarella <sgarzare@redhat.com>
When we added io_uring AIO engine, we forgot to update qemu-options.hx,
so qemu(1) man page and qemu help were outdated.
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Julia Suvorova <jusual@redhat.com>
Reviewed-by: Pankaj Gupta <pankaj.gupta.linux@gmail.com>
Message-Id: <20200924151511.131471-1-sgarzare@redhat.com>
---
qemu-options.hx | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/qemu-options.hx b/qemu-options.hx
index 3564c2303f..1da52a269c 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1053,7 +1053,8 @@ SRST
The path to the image file in the local filesystem
``aio``
- Specifies the AIO backend (threads/native, default: threads)
+ Specifies the AIO backend (threads/native/io_uring,
+ default: threads)
``locking``
Specifies whether the image file is protected with Linux OFD
@@ -1175,7 +1176,8 @@ DEF("drive", HAS_ARG, QEMU_OPTION_drive,
"-drive [file=file][,if=type][,bus=n][,unit=m][,media=d][,index=i]\n"
" [,cache=writethrough|writeback|none|directsync|unsafe][,format=f]\n"
" [,snapshot=on|off][,rerror=ignore|stop|report]\n"
- " [,werror=ignore|stop|report|enospc][,id=name][,aio=threads|native]\n"
+ " [,werror=ignore|stop|report|enospc][,id=name]\n"
+ " [,aio=threads|native|io_uring]\n"
" [,readonly=on|off][,copy-on-read=on|off]\n"
" [,discard=ignore|unmap][,detect-zeroes=on|off|unmap]\n"
" [[,bps=b]|[[,bps_rd=r][,bps_wr=w]]]\n"
@@ -1247,8 +1249,8 @@ SRST
The default mode is ``cache=writeback``.
``aio=aio``
- aio is "threads", or "native" and selects between pthread based
- disk I/O and native Linux AIO.
+ aio is "threads", "native", or "io_uring" and selects between pthread
+ based disk I/O, native Linux AIO, or Linux io_uring API.
``format=format``
Specify which disk format will be used rather than detecting the
--
2.26.2
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PULL v2 16/17] util/vfio-helpers: Collect IOVA reserved regions
2020-10-05 15:43 [PULL v2 00/17] Block patches Stefan Hajnoczi
` (14 preceding siblings ...)
2020-10-05 15:43 ` [PULL v2 15/17] docs: add 'io_uring' option to 'aio' param in qemu-options.hx Stefan Hajnoczi
@ 2020-10-05 15:43 ` Stefan Hajnoczi
2020-10-05 15:43 ` [PULL v2 17/17] util/vfio-helpers: Rework the IOVA allocator to avoid " Stefan Hajnoczi
2020-10-06 12:18 ` [PULL v2 00/17] Block patches Peter Maydell
17 siblings, 0 replies; 19+ messages in thread
From: Stefan Hajnoczi @ 2020-10-05 15:43 UTC (permalink / raw)
To: qemu-devel, Peter Maydell
Cc: Kevin Wolf, Fam Zheng, Eduardo Habkost, qemu-block, Max Reitz,
Eric Auger, Stefan Hajnoczi, Cleber Rosa
From: Eric Auger <eric.auger@redhat.com>
The IOVA allocator currently ignores host reserved regions.
As a result some chosen IOVAs may collide with some of them,
resulting in VFIO MAP_DMA errors later on. This happens on ARM
where the MSI reserved window quickly is encountered:
[0x8000000, 0x8100000]. since 5.4 kernel, VFIO returns the usable
IOVA regions. So let's enumerate them in the prospect to avoid
them, later on.
Signed-off-by: Eric Auger <eric.auger@redhat.com>
Message-id: 20200929085550.30926-2-eric.auger@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
util/vfio-helpers.c | 72 +++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 70 insertions(+), 2 deletions(-)
diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c
index 9ac307e3d4..fe9ca9ce38 100644
--- a/util/vfio-helpers.c
+++ b/util/vfio-helpers.c
@@ -40,6 +40,11 @@ typedef struct {
uint64_t iova;
} IOVAMapping;
+struct IOVARange {
+ uint64_t start;
+ uint64_t end;
+};
+
struct QEMUVFIOState {
QemuMutex lock;
@@ -49,6 +54,8 @@ struct QEMUVFIOState {
int device;
RAMBlockNotifier ram_notifier;
struct vfio_region_info config_region_info, bar_region_info[6];
+ struct IOVARange *usable_iova_ranges;
+ uint8_t nb_iova_ranges;
/* These fields are protected by @lock */
/* VFIO's IO virtual address space is managed by splitting into a few
@@ -236,6 +243,35 @@ static int qemu_vfio_pci_write_config(QEMUVFIOState *s, void *buf, int size, int
return ret == size ? 0 : -errno;
}
+static void collect_usable_iova_ranges(QEMUVFIOState *s, void *buf)
+{
+ struct vfio_iommu_type1_info *info = (struct vfio_iommu_type1_info *)buf;
+ struct vfio_info_cap_header *cap = (void *)buf + info->cap_offset;
+ struct vfio_iommu_type1_info_cap_iova_range *cap_iova_range;
+ int i;
+
+ while (cap->id != VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE) {
+ if (!cap->next) {
+ return;
+ }
+ cap = (struct vfio_info_cap_header *)(buf + cap->next);
+ }
+
+ cap_iova_range = (struct vfio_iommu_type1_info_cap_iova_range *)cap;
+
+ s->nb_iova_ranges = cap_iova_range->nr_iovas;
+ if (s->nb_iova_ranges > 1) {
+ s->usable_iova_ranges =
+ g_realloc(s->usable_iova_ranges,
+ s->nb_iova_ranges * sizeof(struct IOVARange));
+ }
+
+ for (i = 0; i < s->nb_iova_ranges; i++) {
+ s->usable_iova_ranges[i].start = cap_iova_range->iova_ranges[i].start;
+ s->usable_iova_ranges[i].end = cap_iova_range->iova_ranges[i].end;
+ }
+}
+
static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device,
Error **errp)
{
@@ -243,10 +279,13 @@ static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device,
int i;
uint16_t pci_cmd;
struct vfio_group_status group_status = { .argsz = sizeof(group_status) };
- struct vfio_iommu_type1_info iommu_info = { .argsz = sizeof(iommu_info) };
+ struct vfio_iommu_type1_info *iommu_info = NULL;
+ size_t iommu_info_size = sizeof(*iommu_info);
struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
char *group_file = NULL;
+ s->usable_iova_ranges = NULL;
+
/* Create a new container */
s->container = open("/dev/vfio/vfio", O_RDWR);
@@ -310,13 +349,35 @@ static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device,
goto fail;
}
+ iommu_info = g_malloc0(iommu_info_size);
+ iommu_info->argsz = iommu_info_size;
+
/* Get additional IOMMU info */
- if (ioctl(s->container, VFIO_IOMMU_GET_INFO, &iommu_info)) {
+ if (ioctl(s->container, VFIO_IOMMU_GET_INFO, iommu_info)) {
error_setg_errno(errp, errno, "Failed to get IOMMU info");
ret = -errno;
goto fail;
}
+ /*
+ * if the kernel does not report usable IOVA regions, choose
+ * the legacy [QEMU_VFIO_IOVA_MIN, QEMU_VFIO_IOVA_MAX -1] region
+ */
+ s->nb_iova_ranges = 1;
+ s->usable_iova_ranges = g_new0(struct IOVARange, 1);
+ s->usable_iova_ranges[0].start = QEMU_VFIO_IOVA_MIN;
+ s->usable_iova_ranges[0].end = QEMU_VFIO_IOVA_MAX - 1;
+
+ if (iommu_info->argsz > iommu_info_size) {
+ iommu_info_size = iommu_info->argsz;
+ iommu_info = g_realloc(iommu_info, iommu_info_size);
+ if (ioctl(s->container, VFIO_IOMMU_GET_INFO, iommu_info)) {
+ ret = -errno;
+ goto fail;
+ }
+ collect_usable_iova_ranges(s, iommu_info);
+ }
+
s->device = ioctl(s->group, VFIO_GROUP_GET_DEVICE_FD, device);
if (s->device < 0) {
@@ -365,8 +426,13 @@ static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device,
if (ret) {
goto fail;
}
+ g_free(iommu_info);
return 0;
fail:
+ g_free(s->usable_iova_ranges);
+ s->usable_iova_ranges = NULL;
+ s->nb_iova_ranges = 0;
+ g_free(iommu_info);
close(s->group);
fail_container:
close(s->container);
@@ -716,6 +782,8 @@ void qemu_vfio_close(QEMUVFIOState *s)
qemu_vfio_undo_mapping(s, &s->mappings[i], NULL);
}
ram_block_notifier_remove(&s->ram_notifier);
+ g_free(s->usable_iova_ranges);
+ s->nb_iova_ranges = 0;
qemu_vfio_reset(s);
close(s->device);
close(s->group);
--
2.26.2
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PULL v2 17/17] util/vfio-helpers: Rework the IOVA allocator to avoid IOVA reserved regions
2020-10-05 15:43 [PULL v2 00/17] Block patches Stefan Hajnoczi
` (15 preceding siblings ...)
2020-10-05 15:43 ` [PULL v2 16/17] util/vfio-helpers: Collect IOVA reserved regions Stefan Hajnoczi
@ 2020-10-05 15:43 ` Stefan Hajnoczi
2020-10-06 12:18 ` [PULL v2 00/17] Block patches Peter Maydell
17 siblings, 0 replies; 19+ messages in thread
From: Stefan Hajnoczi @ 2020-10-05 15:43 UTC (permalink / raw)
To: qemu-devel, Peter Maydell
Cc: Kevin Wolf, Fam Zheng, Eduardo Habkost, qemu-block, Max Reitz,
Eric Auger, Stefan Hajnoczi, Cleber Rosa
From: Eric Auger <eric.auger@redhat.com>
Introduce the qemu_vfio_find_fixed/temp_iova helpers which
respectively allocate IOVAs from the bottom/top parts of the
usable IOVA range, without picking within host IOVA reserved
windows. The allocation remains basic: if the size is too big
for the remaining of the current usable IOVA range, we jump
to the next one, leaving a hole in the address map.
Signed-off-by: Eric Auger <eric.auger@redhat.com>
Message-id: 20200929085550.30926-3-eric.auger@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
util/vfio-helpers.c | 57 +++++++++++++++++++++++++++++++++++++++++----
1 file changed, 53 insertions(+), 4 deletions(-)
diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c
index fe9ca9ce38..c469beb061 100644
--- a/util/vfio-helpers.c
+++ b/util/vfio-helpers.c
@@ -667,6 +667,50 @@ static bool qemu_vfio_verify_mappings(QEMUVFIOState *s)
return true;
}
+static int
+qemu_vfio_find_fixed_iova(QEMUVFIOState *s, size_t size, uint64_t *iova)
+{
+ int i;
+
+ for (i = 0; i < s->nb_iova_ranges; i++) {
+ if (s->usable_iova_ranges[i].end < s->low_water_mark) {
+ continue;
+ }
+ s->low_water_mark =
+ MAX(s->low_water_mark, s->usable_iova_ranges[i].start);
+
+ if (s->usable_iova_ranges[i].end - s->low_water_mark + 1 >= size ||
+ s->usable_iova_ranges[i].end - s->low_water_mark + 1 == 0) {
+ *iova = s->low_water_mark;
+ s->low_water_mark += size;
+ return 0;
+ }
+ }
+ return -ENOMEM;
+}
+
+static int
+qemu_vfio_find_temp_iova(QEMUVFIOState *s, size_t size, uint64_t *iova)
+{
+ int i;
+
+ for (i = s->nb_iova_ranges - 1; i >= 0; i--) {
+ if (s->usable_iova_ranges[i].start > s->high_water_mark) {
+ continue;
+ }
+ s->high_water_mark =
+ MIN(s->high_water_mark, s->usable_iova_ranges[i].end + 1);
+
+ if (s->high_water_mark - s->usable_iova_ranges[i].start + 1 >= size ||
+ s->high_water_mark - s->usable_iova_ranges[i].start + 1 == 0) {
+ *iova = s->high_water_mark - size;
+ s->high_water_mark = *iova;
+ return 0;
+ }
+ }
+ return -ENOMEM;
+}
+
/* Map [host, host + size) area into a contiguous IOVA address space, and store
* the result in @iova if not NULL. The caller need to make sure the area is
* aligned to page size, and mustn't overlap with existing mapping areas (split
@@ -693,7 +737,11 @@ int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t size,
goto out;
}
if (!temporary) {
- iova0 = s->low_water_mark;
+ if (qemu_vfio_find_fixed_iova(s, size, &iova0)) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
mapping = qemu_vfio_add_mapping(s, host, size, index + 1, iova0);
if (!mapping) {
ret = -ENOMEM;
@@ -705,15 +753,16 @@ int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t size,
qemu_vfio_undo_mapping(s, mapping, NULL);
goto out;
}
- s->low_water_mark += size;
qemu_vfio_dump_mappings(s);
} else {
- iova0 = s->high_water_mark - size;
+ if (qemu_vfio_find_temp_iova(s, size, &iova0)) {
+ ret = -ENOMEM;
+ goto out;
+ }
ret = qemu_vfio_do_mapping(s, host, size, iova0);
if (ret) {
goto out;
}
- s->high_water_mark -= size;
}
}
if (iova) {
--
2.26.2
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [PULL v2 00/17] Block patches
2020-10-05 15:43 [PULL v2 00/17] Block patches Stefan Hajnoczi
` (16 preceding siblings ...)
2020-10-05 15:43 ` [PULL v2 17/17] util/vfio-helpers: Rework the IOVA allocator to avoid " Stefan Hajnoczi
@ 2020-10-06 12:18 ` Peter Maydell
17 siblings, 0 replies; 19+ messages in thread
From: Peter Maydell @ 2020-10-06 12:18 UTC (permalink / raw)
To: Stefan Hajnoczi
Cc: Kevin Wolf, Fam Zheng, Eduardo Habkost, Qemu-block,
QEMU Developers, Max Reitz, Cleber Rosa
On Mon, 5 Oct 2020 at 16:43, Stefan Hajnoczi <stefanha@redhat.com> wrote:
>
> The following changes since commit 469e72ab7dbbd7ff4ee601e5ea7c29545d46593b:
>
> Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging (2020-10-02 16:19:42 +0100)
>
> are available in the Git repository at:
>
> https://gitlab.com/stefanha/qemu.git tags/block-pull-request
>
> for you to fetch changes up to 9ab5741164b1727d22f69fe7001382baf0d56977:
>
> util/vfio-helpers: Rework the IOVA allocator to avoid IOVA reserved regions (2020-10-05 10:59:42 +0100)
>
> ----------------------------------------------------------------
> Pull request
>
> v2:
> * Removed clang-format call from scripts/block-coroutine-wrapper.py. This
> avoids the issue with clang version incompatibility. It could be added back
> in the future but the code is readable without reformatting and it also
> makes the build less dependent on the environment.
>
> ----------------------------------------------------------------
Applied, thanks.
Please update the changelog at https://wiki.qemu.org/ChangeLog/5.2
for any user-visible changes.
-- PMM
^ permalink raw reply [flat|nested] 19+ messages in thread