* [Qemu-devel] [PATCH 2/6] block: Check if block drivers can do copy offloading
2018-06-08 6:04 [Qemu-devel] [PATCH 0/6] mirror: Use copy offloading Fam Zheng
2018-06-08 6:04 ` [Qemu-devel] [PATCH 1/6] file-posix: Fix EINTR handling Fam Zheng
@ 2018-06-08 6:04 ` Fam Zheng
2018-06-15 15:00 ` Stefan Hajnoczi
2018-06-08 6:04 ` [Qemu-devel] [PATCH 3/6] block-backend: Refactor AIO emulation Fam Zheng
` (3 subsequent siblings)
5 siblings, 1 reply; 13+ messages in thread
From: Fam Zheng @ 2018-06-08 6:04 UTC (permalink / raw)
To: qemu-devel
Cc: qemu-block, Fam Zheng, Kevin Wolf, Max Reitz, Jeff Cody,
Stefan Hajnoczi
This avoids the wasteful cluster allocation in qcow2 before actually
trying an unsupported copy range call, for example.
Signed-off-by: Fam Zheng <famz@redhat.com>
---
block.c | 12 ++++++++++++
block/file-posix.c | 9 +++++++++
block/io.c | 3 +++
block/iscsi.c | 8 ++++++++
block/qcow2.c | 11 +++++++++++
block/raw-format.c | 6 ++++++
include/block/block_int.h | 4 ++++
7 files changed, 53 insertions(+)
diff --git a/block.c b/block.c
index 501b64c819..28aa8d8a65 100644
--- a/block.c
+++ b/block.c
@@ -5320,3 +5320,15 @@ bool bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name,
return drv->bdrv_can_store_new_dirty_bitmap(bs, name, granularity, errp);
}
+
+bool bdrv_can_copy_range(BdrvChild *src, BdrvChild *dst)
+{
+ BlockDriverState *bs;
+
+ if (!src || !src->bs) {
+ return false;
+ }
+ bs = src->bs;
+ return bs && bs->drv && bs->drv->bdrv_can_copy_range &&
+ bs->drv->bdrv_can_copy_range(bs, dst);
+}
diff --git a/block/file-posix.c b/block/file-posix.c
index c6dae38f94..41c491c65b 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -2539,6 +2539,13 @@ static int coroutine_fn raw_co_copy_range_to(BlockDriverState *bs,
NULL, bytes, QEMU_AIO_COPY_RANGE);
}
+static bool raw_can_copy_range(BlockDriverState *bs,
+ BdrvChild *dst)
+{
+ return dst->bs && dst->bs->drv &&
+ dst->bs->drv->bdrv_can_copy_range == raw_can_copy_range;
+}
+
BlockDriver bdrv_file = {
.format_name = "file",
.protocol_name = "file",
@@ -2564,6 +2571,7 @@ BlockDriver bdrv_file = {
.bdrv_aio_pdiscard = raw_aio_pdiscard,
.bdrv_co_copy_range_from = raw_co_copy_range_from,
.bdrv_co_copy_range_to = raw_co_copy_range_to,
+ .bdrv_can_copy_range = raw_can_copy_range,
.bdrv_refresh_limits = raw_refresh_limits,
.bdrv_io_plug = raw_aio_plug,
.bdrv_io_unplug = raw_aio_unplug,
@@ -3044,6 +3052,7 @@ static BlockDriver bdrv_host_device = {
.bdrv_aio_pdiscard = hdev_aio_pdiscard,
.bdrv_co_copy_range_from = raw_co_copy_range_from,
.bdrv_co_copy_range_to = raw_co_copy_range_to,
+ .bdrv_can_copy_range = raw_can_copy_range,
.bdrv_refresh_limits = raw_refresh_limits,
.bdrv_io_plug = raw_aio_plug,
.bdrv_io_unplug = raw_aio_unplug,
diff --git a/block/io.c b/block/io.c
index b7beaeeb9f..d8039793c2 100644
--- a/block/io.c
+++ b/block/io.c
@@ -2913,6 +2913,9 @@ int coroutine_fn bdrv_co_copy_range(BdrvChild *src, uint64_t src_offset,
BlockDriverState *dst_bs = dst->bs;
int ret;
+ if (!bdrv_can_copy_range(src, dst)) {
+ return -ENOTSUP;
+ }
bdrv_inc_in_flight(src_bs);
bdrv_inc_in_flight(dst_bs);
tracked_request_begin(&src_req, src_bs, src_offset,
diff --git a/block/iscsi.c b/block/iscsi.c
index c2fbd8a8aa..6c465ebd46 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -2420,6 +2420,12 @@ out_unlock:
return r;
}
+static bool iscsi_can_copy_range(BlockDriverState *bs, BdrvChild *dst)
+{
+ return dst->bs && dst->bs->drv &&
+ dst->bs->drv->bdrv_can_copy_range == iscsi_can_copy_range;
+}
+
static QemuOptsList iscsi_create_opts = {
.name = "iscsi-create-opts",
.head = QTAILQ_HEAD_INITIALIZER(iscsi_create_opts.head),
@@ -2456,6 +2462,7 @@ static BlockDriver bdrv_iscsi = {
.bdrv_co_pdiscard = iscsi_co_pdiscard,
.bdrv_co_copy_range_from = iscsi_co_copy_range_from,
.bdrv_co_copy_range_to = iscsi_co_copy_range_to,
+ .bdrv_can_copy_range = iscsi_can_copy_range,
.bdrv_co_pwrite_zeroes = iscsi_co_pwrite_zeroes,
.bdrv_co_readv = iscsi_co_readv,
.bdrv_co_writev = iscsi_co_writev,
@@ -2493,6 +2500,7 @@ static BlockDriver bdrv_iser = {
.bdrv_co_pdiscard = iscsi_co_pdiscard,
.bdrv_co_copy_range_from = iscsi_co_copy_range_from,
.bdrv_co_copy_range_to = iscsi_co_copy_range_to,
+ .bdrv_can_copy_range = iscsi_can_copy_range,
.bdrv_co_pwrite_zeroes = iscsi_co_pwrite_zeroes,
.bdrv_co_readv = iscsi_co_readv,
.bdrv_co_writev = iscsi_co_writev,
diff --git a/block/qcow2.c b/block/qcow2.c
index 549fee9b69..1326410d1c 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -3440,6 +3440,16 @@ fail:
return ret;
}
+static bool qcow2_can_copy_range(BlockDriverState *bs, BdrvChild *dst)
+{
+ bool r = bdrv_can_copy_range(bs->file, dst);
+
+ if (bs->backing) {
+ r = r && bdrv_can_copy_range(bs->backing, dst);
+ }
+ return r;
+}
+
static int qcow2_truncate(BlockDriverState *bs, int64_t offset,
PreallocMode prealloc, Error **errp)
{
@@ -4690,6 +4700,7 @@ BlockDriver bdrv_qcow2 = {
.bdrv_co_pdiscard = qcow2_co_pdiscard,
.bdrv_co_copy_range_from = qcow2_co_copy_range_from,
.bdrv_co_copy_range_to = qcow2_co_copy_range_to,
+ .bdrv_can_copy_range = qcow2_can_copy_range,
.bdrv_truncate = qcow2_truncate,
.bdrv_co_pwritev_compressed = qcow2_co_pwritev_compressed,
.bdrv_make_empty = qcow2_make_empty,
diff --git a/block/raw-format.c b/block/raw-format.c
index f2e468df6f..707b25fc77 100644
--- a/block/raw-format.c
+++ b/block/raw-format.c
@@ -527,6 +527,11 @@ static int coroutine_fn raw_co_copy_range_to(BlockDriverState *bs,
flags);
}
+static bool raw_can_copy_range(BlockDriverState *bs, BdrvChild *dst)
+{
+ return bdrv_can_copy_range(bs->file, dst);
+}
+
BlockDriver bdrv_raw = {
.format_name = "raw",
.instance_size = sizeof(BDRVRawState),
@@ -545,6 +550,7 @@ BlockDriver bdrv_raw = {
.bdrv_co_block_status = &raw_co_block_status,
.bdrv_co_copy_range_from = &raw_co_copy_range_from,
.bdrv_co_copy_range_to = &raw_co_copy_range_to,
+ .bdrv_can_copy_range = &raw_can_copy_range,
.bdrv_truncate = &raw_truncate,
.bdrv_getlength = &raw_getlength,
.has_variable_length = true,
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 888b7f7bff..2c51cd420f 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -235,6 +235,9 @@ struct BlockDriver {
uint64_t bytes,
BdrvRequestFlags flags);
+ bool (*bdrv_can_copy_range)(BlockDriverState *bs,
+ BdrvChild *dst);
+
/*
* Building block for bdrv_block_status[_above] and
* bdrv_is_allocated[_above]. The driver should answer only
@@ -1139,5 +1142,6 @@ int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, uint64_t src_offset,
int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, uint64_t src_offset,
BdrvChild *dst, uint64_t dst_offset,
uint64_t bytes, BdrvRequestFlags flags);
+bool bdrv_can_copy_range(BdrvChild *src, BdrvChild *dst);
#endif /* BLOCK_INT_H */
--
2.17.0
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [Qemu-devel] [PATCH 3/6] block-backend: Refactor AIO emulation
2018-06-08 6:04 [Qemu-devel] [PATCH 0/6] mirror: Use copy offloading Fam Zheng
2018-06-08 6:04 ` [Qemu-devel] [PATCH 1/6] file-posix: Fix EINTR handling Fam Zheng
2018-06-08 6:04 ` [Qemu-devel] [PATCH 2/6] block: Check if block drivers can do copy offloading Fam Zheng
@ 2018-06-08 6:04 ` Fam Zheng
2018-06-15 15:21 ` Stefan Hajnoczi
2018-06-08 6:04 ` [Qemu-devel] [PATCH 4/6] block-backend: Add blk_aio_copy_range Fam Zheng
` (2 subsequent siblings)
5 siblings, 1 reply; 13+ messages in thread
From: Fam Zheng @ 2018-06-08 6:04 UTC (permalink / raw)
To: qemu-devel
Cc: qemu-block, Fam Zheng, Kevin Wolf, Max Reitz, Jeff Cody,
Stefan Hajnoczi
BlkRwCo fields are multi-purposed. @offset is sometimes used to pass the
'req' number for blk_ioctl and blk_aio_ioctl; @iobuf is sometimes the
pointer for QEMUIOVector @qiov sometimes the ioctl @buf. This is not as
clean as it can be. As the coming copy range emulation wants to add
more differentiation in parameters, refactor a bit.
Move the per-request fields to a union and create one struct for each
type. While at it also move the bytes parameter from BlkAioEmAIOCB to
BlkRwCo.
Signed-off-by: Fam Zheng <famz@redhat.com>
---
block/block-backend.c | 211 +++++++++++++++++++++++++++---------------
1 file changed, 134 insertions(+), 77 deletions(-)
diff --git a/block/block-backend.c b/block/block-backend.c
index d55c328736..e20a204bee 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -1192,62 +1192,79 @@ int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
typedef struct BlkRwCo {
BlockBackend *blk;
- int64_t offset;
- void *iobuf;
int ret;
- BdrvRequestFlags flags;
+
+ union {
+ struct {
+ int64_t offset;
+ int bytes;
+ QEMUIOVector *qiov;
+ BdrvRequestFlags flags;
+ } prwv;
+
+ struct {
+ int64_t offset;
+ int bytes;
+ void *buf;
+ BdrvRequestFlags flags;
+ } prw;
+
+ struct {
+ unsigned long int req;
+ void *buf;
+ } ioctl;
+ };
+
} BlkRwCo;
static void blk_read_entry(void *opaque)
{
BlkRwCo *rwco = opaque;
- QEMUIOVector *qiov = rwco->iobuf;
+ QEMUIOVector qiov;
+ struct iovec iov;
- rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, qiov->size,
- qiov, rwco->flags);
+ iov = (struct iovec) {
+ .iov_base = rwco->prw.buf,
+ .iov_len = rwco->prw.bytes,
+ };
+ qemu_iovec_init_external(&qiov, &iov, 1);
+
+ rwco->ret = blk_co_preadv(rwco->blk, rwco->prw.offset, rwco->prw.bytes,
+ &qiov, rwco->prw.flags);
}
static void blk_write_entry(void *opaque)
{
BlkRwCo *rwco = opaque;
- QEMUIOVector *qiov = rwco->iobuf;
-
- rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, qiov->size,
- qiov, rwco->flags);
-}
-
-static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
- int64_t bytes, CoroutineEntry co_entry,
- BdrvRequestFlags flags)
-{
QEMUIOVector qiov;
struct iovec iov;
- BlkRwCo rwco;
iov = (struct iovec) {
- .iov_base = buf,
- .iov_len = bytes,
+ .iov_base = rwco->prw.buf,
+ .iov_len = rwco->prw.bytes,
};
qemu_iovec_init_external(&qiov, &iov, 1);
- rwco = (BlkRwCo) {
- .blk = blk,
- .offset = offset,
- .iobuf = &qiov,
- .flags = flags,
- .ret = NOT_DONE,
- };
+ rwco->ret = blk_co_pwritev(rwco->blk, rwco->prw.offset, rwco->prw.bytes,
+ &qiov, rwco->prw.flags);
+}
+static int blk_prw(BlockBackend *blk, BlkRwCo *rwco,
+ CoroutineEntry co_entry)
+{
+
+ rwco->blk = blk;
+ rwco->ret = NOT_DONE;
if (qemu_in_coroutine()) {
/* Fast-path if already in coroutine context */
- co_entry(&rwco);
+ co_entry(rwco);
} else {
- Coroutine *co = qemu_coroutine_create(co_entry, &rwco);
+ Coroutine *co = qemu_coroutine_create(co_entry, rwco);
bdrv_coroutine_enter(blk_bs(blk), co);
- BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE);
+ BDRV_POLL_WHILE(blk_bs(blk), rwco->ret == NOT_DONE);
}
- return rwco.ret;
+ return rwco->ret;
}
int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf,
@@ -1269,8 +1286,12 @@ int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf,
int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
int bytes, BdrvRequestFlags flags)
{
- return blk_prw(blk, offset, NULL, bytes, blk_write_entry,
- flags | BDRV_REQ_ZERO_WRITE);
+ BlkRwCo rwco = (BlkRwCo) {
+ .prwv.offset = offset,
+ .prwv.bytes = bytes,
+ .prwv.flags = flags | BDRV_REQ_ZERO_WRITE,
+ };
+ return blk_prw(blk, &rwco, blk_write_entry);
}
int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags)
@@ -1316,7 +1337,6 @@ BlockAIOCB *blk_abort_aio_request(BlockBackend *blk,
typedef struct BlkAioEmAIOCB {
BlockAIOCB common;
BlkRwCo rwco;
- int bytes;
bool has_returned;
} BlkAioEmAIOCB;
@@ -1340,9 +1360,8 @@ static void blk_aio_complete_bh(void *opaque)
blk_aio_complete(acb);
}
-static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
- void *iobuf, CoroutineEntry co_entry,
- BdrvRequestFlags flags,
+static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, const BlkRwCo *rwco,
+ CoroutineEntry co_entry,
BlockCompletionFunc *cb, void *opaque)
{
BlkAioEmAIOCB *acb;
@@ -1350,14 +1369,9 @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
blk_inc_in_flight(blk);
acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
- acb->rwco = (BlkRwCo) {
- .blk = blk,
- .offset = offset,
- .iobuf = iobuf,
- .flags = flags,
- .ret = NOT_DONE,
- };
- acb->bytes = bytes;
+ acb->rwco = *rwco;
+ acb->rwco.blk = blk;
+ acb->rwco.ret = NOT_DONE;
acb->has_returned = false;
co = qemu_coroutine_create(co_entry, acb);
@@ -1376,11 +1390,11 @@ static void blk_aio_read_entry(void *opaque)
{
BlkAioEmAIOCB *acb = opaque;
BlkRwCo *rwco = &acb->rwco;
- QEMUIOVector *qiov = rwco->iobuf;
+ QEMUIOVector *qiov = rwco->prwv.qiov;
- assert(qiov->size == acb->bytes);
- rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes,
- qiov, rwco->flags);
+ assert(qiov->size == rwco->prwv.bytes);
+ rwco->ret = blk_co_preadv(rwco->blk, rwco->prwv.offset, rwco->prwv.bytes,
+ qiov, rwco->prwv.flags);
blk_aio_complete(acb);
}
@@ -1388,11 +1402,11 @@ static void blk_aio_write_entry(void *opaque)
{
BlkAioEmAIOCB *acb = opaque;
BlkRwCo *rwco = &acb->rwco;
- QEMUIOVector *qiov = rwco->iobuf;
+ QEMUIOVector *qiov = rwco->prwv.qiov;
- assert(!qiov || qiov->size == acb->bytes);
- rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes,
- qiov, rwco->flags);
+ assert(!qiov || qiov->size == rwco->prwv.bytes);
+ rwco->ret = blk_co_pwritev(rwco->blk, rwco->prwv.offset, rwco->prwv.bytes,
+ qiov, rwco->prwv.flags);
blk_aio_complete(acb);
}
@@ -1400,13 +1414,22 @@ BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset,
int count, BdrvRequestFlags flags,
BlockCompletionFunc *cb, void *opaque)
{
- return blk_aio_prwv(blk, offset, count, NULL, blk_aio_write_entry,
- flags | BDRV_REQ_ZERO_WRITE, cb, opaque);
+ BlkRwCo rwco = (BlkRwCo) {
+ .prwv.offset = offset,
+ .prwv.bytes = count,
+ .prwv.flags = flags | BDRV_REQ_ZERO_WRITE,
+ };
+ return blk_aio_prwv(blk, &rwco, blk_aio_write_entry, cb, opaque);
}
int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int count)
{
- int ret = blk_prw(blk, offset, buf, count, blk_read_entry, 0);
+ BlkRwCo rwco = (BlkRwCo) {
+ .prw.offset = offset,
+ .prw.bytes = count,
+ .prw.buf = buf,
+ };
+ int ret = blk_prw(blk, &rwco, blk_read_entry);
if (ret < 0) {
return ret;
}
@@ -1416,8 +1439,13 @@ int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int count)
int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count,
BdrvRequestFlags flags)
{
- int ret = blk_prw(blk, offset, (void *) buf, count, blk_write_entry,
- flags);
+ BlkRwCo rwco = (BlkRwCo) {
+ .prw.offset = offset,
+ .prw.bytes = count,
+ .prw.buf = (void *)buf,
+ .prw.flags = flags,
+ };
+ int ret = blk_prw(blk, &rwco, blk_write_entry);
if (ret < 0) {
return ret;
}
@@ -1455,16 +1483,26 @@ BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset,
QEMUIOVector *qiov, BdrvRequestFlags flags,
BlockCompletionFunc *cb, void *opaque)
{
- return blk_aio_prwv(blk, offset, qiov->size, qiov,
- blk_aio_read_entry, flags, cb, opaque);
+ BlkRwCo rwco = (BlkRwCo) {
+ .prwv.offset = offset,
+ .prwv.bytes = qiov->size,
+ .prwv.flags = flags,
+ .prwv.qiov = qiov,
+ };
+ return blk_aio_prwv(blk, &rwco, blk_aio_read_entry, cb, opaque);
}
BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset,
QEMUIOVector *qiov, BdrvRequestFlags flags,
BlockCompletionFunc *cb, void *opaque)
{
- return blk_aio_prwv(blk, offset, qiov->size, qiov,
- blk_aio_write_entry, flags, cb, opaque);
+ BlkRwCo rwco = (BlkRwCo) {
+ .prwv.offset = offset,
+ .prwv.bytes = qiov->size,
+ .prwv.flags = flags,
+ .prwv.qiov = qiov,
+ };
+ return blk_aio_prwv(blk, &rwco, blk_aio_write_entry, cb, opaque);
}
static void blk_aio_flush_entry(void *opaque)
@@ -1479,7 +1517,8 @@ static void blk_aio_flush_entry(void *opaque)
BlockAIOCB *blk_aio_flush(BlockBackend *blk,
BlockCompletionFunc *cb, void *opaque)
{
- return blk_aio_prwv(blk, 0, 0, NULL, blk_aio_flush_entry, 0, cb, opaque);
+ BlkRwCo rwco = { };
+ return blk_aio_prwv(blk, &rwco, blk_aio_flush_entry, cb, opaque);
}
static void blk_aio_pdiscard_entry(void *opaque)
@@ -1487,7 +1526,7 @@ static void blk_aio_pdiscard_entry(void *opaque)
BlkAioEmAIOCB *acb = opaque;
BlkRwCo *rwco = &acb->rwco;
- rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, acb->bytes);
+ rwco->ret = blk_co_pdiscard(rwco->blk, rwco->prwv.offset, rwco->prwv.bytes);
blk_aio_complete(acb);
}
@@ -1495,8 +1534,11 @@ BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk,
int64_t offset, int bytes,
BlockCompletionFunc *cb, void *opaque)
{
- return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_pdiscard_entry, 0,
- cb, opaque);
+ BlkRwCo rwco = (BlkRwCo) {
+ .prwv.offset = offset,
+ .prwv.bytes = bytes,
+ };
+ return blk_aio_prwv(blk, &rwco, blk_aio_pdiscard_entry, cb, opaque);
}
void blk_aio_cancel(BlockAIOCB *acb)
@@ -1521,15 +1563,17 @@ int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
static void blk_ioctl_entry(void *opaque)
{
BlkRwCo *rwco = opaque;
- QEMUIOVector *qiov = rwco->iobuf;
- rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset,
- qiov->iov[0].iov_base);
+ rwco->ret = blk_co_ioctl(rwco->blk, rwco->ioctl.req, rwco->ioctl.buf);
}
int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
{
- return blk_prw(blk, req, buf, 0, blk_ioctl_entry, 0);
+ BlkRwCo rwco = (BlkRwCo) {
+ .ioctl.req = req,
+ .ioctl.buf = buf,
+ };
+ return blk_prw(blk, &rwco, blk_ioctl_entry);
}
static void blk_aio_ioctl_entry(void *opaque)
@@ -1537,7 +1581,7 @@ static void blk_aio_ioctl_entry(void *opaque)
BlkAioEmAIOCB *acb = opaque;
BlkRwCo *rwco = &acb->rwco;
- rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, rwco->iobuf);
+ rwco->ret = blk_co_ioctl(rwco->blk, rwco->ioctl.req, rwco->ioctl.buf);
blk_aio_complete(acb);
}
@@ -1545,7 +1589,11 @@ static void blk_aio_ioctl_entry(void *opaque)
BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
BlockCompletionFunc *cb, void *opaque)
{
- return blk_aio_prwv(blk, req, 0, buf, blk_aio_ioctl_entry, 0, cb, opaque);
+ BlkRwCo rwco = (BlkRwCo) {
+ .ioctl.req = req,
+ .ioctl.buf = buf,
+ };
+ return blk_aio_prwv(blk, &rwco, blk_aio_ioctl_entry, cb, opaque);
}
int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
@@ -1575,7 +1623,8 @@ static void blk_flush_entry(void *opaque)
int blk_flush(BlockBackend *blk)
{
- return blk_prw(blk, 0, NULL, 0, blk_flush_entry, 0);
+ BlkRwCo rwco = { };
+ return blk_prw(blk, &rwco, blk_flush_entry);
}
void blk_drain(BlockBackend *blk)
@@ -1985,8 +2034,13 @@ int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf,
int count)
{
- return blk_prw(blk, offset, (void *) buf, count, blk_write_entry,
- BDRV_REQ_WRITE_COMPRESSED);
+ BlkRwCo rwco = (BlkRwCo) {
+ .prw.offset = offset,
+ .prw.buf = (void *)buf,
+ .prw.bytes = count,
+ .prw.flags = BDRV_REQ_WRITE_COMPRESSED,
+ };
+ return blk_prw(blk, &rwco, blk_write_entry);
}
int blk_truncate(BlockBackend *blk, int64_t offset, PreallocMode prealloc,
@@ -2003,14 +2057,17 @@ int blk_truncate(BlockBackend *blk, int64_t offset, PreallocMode prealloc,
static void blk_pdiscard_entry(void *opaque)
{
BlkRwCo *rwco = opaque;
- QEMUIOVector *qiov = rwco->iobuf;
- rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, qiov->size);
+ rwco->ret = blk_co_pdiscard(rwco->blk, rwco->prw.offset, rwco->prw.bytes);
}
int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
{
- return blk_prw(blk, offset, NULL, bytes, blk_pdiscard_entry, 0);
+ BlkRwCo rwco = (BlkRwCo) {
+ .prw.offset = offset,
+ .prw.bytes = bytes,
+ };
+ return blk_prw(blk, &rwco, blk_pdiscard_entry);
}
int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
--
2.17.0
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [Qemu-devel] [PATCH 6/6] mirror: Use copy offloading
2018-06-08 6:04 [Qemu-devel] [PATCH 0/6] mirror: Use copy offloading Fam Zheng
` (4 preceding siblings ...)
2018-06-08 6:04 ` [Qemu-devel] [PATCH 5/6] block: Add backing passthrough implementations for copy_range Fam Zheng
@ 2018-06-08 6:04 ` Fam Zheng
2018-06-15 16:23 ` Stefan Hajnoczi
5 siblings, 1 reply; 13+ messages in thread
From: Fam Zheng @ 2018-06-08 6:04 UTC (permalink / raw)
To: qemu-devel
Cc: qemu-block, Fam Zheng, Kevin Wolf, Max Reitz, Jeff Cody,
Stefan Hajnoczi
This makes the mirror job to try offloaded copy. If it fails, error
action will not be taken yet, instead the failed cluster and all the
subsequent ones will fall back to bounce buffer.
Signed-off-by: Fam Zheng <famz@redhat.com>
---
block/mirror.c | 71 +++++++++++++++++++++++++++++++++++++++++++++-
block/trace-events | 1 +
2 files changed, 71 insertions(+), 1 deletion(-)
diff --git a/block/mirror.c b/block/mirror.c
index 435268bbbf..a90b64550c 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -71,6 +71,8 @@ typedef struct MirrorBlockJob {
int target_cluster_size;
int max_iov;
bool initial_zeroing_ongoing;
+
+ bool use_copy_range;
} MirrorBlockJob;
typedef struct MirrorOp {
@@ -293,6 +295,69 @@ static uint64_t mirror_do_read(MirrorBlockJob *s, int64_t offset,
return ret;
}
+static void mirror_copy_range_complete(void *opaque, int ret)
+{
+ MirrorOp *op = opaque;
+ MirrorBlockJob *s = op->s;
+
+ aio_context_acquire(blk_get_aio_context(s->common.blk));
+ trace_mirror_copy_range_complete(s, ret, op->offset, op->bytes);
+ if (!ret) {
+ mirror_iteration_done(op, ret);
+ } else {
+ uint64_t bytes;
+
+ s->use_copy_range = false;
+ s->in_flight--;
+ s->bytes_in_flight -= op->bytes;
+ bytes = mirror_do_read(s, op->offset, op->bytes);
+ /* No alignment adjusting in mirror_do_read since we've already done
+ * that in mirror_do_copy(). */
+ assert(bytes == op->bytes);
+ g_free(op);
+ }
+ aio_context_release(blk_get_aio_context(s->common.blk));
+}
+
+static uint64_t mirror_do_copy(MirrorBlockJob *s, int64_t offset,
+ uint64_t bytes)
+{
+ uint64_t ret;
+ MirrorOp *op;
+
+ if (!s->use_copy_range || offset < BLOCK_PROBE_BUF_SIZE) {
+ return mirror_do_read(s, offset, bytes);
+ }
+
+ assert(bytes);
+ assert(bytes < BDRV_REQUEST_MAX_BYTES);
+ ret = bytes;
+
+ if (s->cow_bitmap) {
+ ret += mirror_cow_align(s, &offset, &bytes);
+ }
+ /* The offset is granularity-aligned because:
+ * 1) Caller passes in aligned values;
+ * 2) mirror_cow_align is used only when target cluster is larger. */
+ assert(QEMU_IS_ALIGNED(offset, s->granularity));
+ /* The range is sector-aligned, since bdrv_getlength() rounds up. */
+ assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE));
+
+ op = g_new0(MirrorOp, 1);
+ op->s = s;
+ op->offset = offset;
+ op->bytes = bytes;
+
+ /* Copy the dirty cluster. */
+ s->in_flight++;
+ s->bytes_in_flight += bytes;
+ trace_mirror_one_iteration(s, offset, bytes);
+
+ blk_aio_copy_range(s->common.blk, offset, s->target, offset,
+ bytes, 0, mirror_copy_range_complete, op);
+ return ret;
+}
+
static void mirror_do_zero_or_discard(MirrorBlockJob *s,
int64_t offset,
uint64_t bytes,
@@ -429,7 +494,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
io_bytes = mirror_clip_bytes(s, offset, io_bytes);
switch (mirror_method) {
case MIRROR_METHOD_COPY:
- io_bytes = io_bytes_acct = mirror_do_read(s, offset, io_bytes);
+ io_bytes = io_bytes_acct = mirror_do_copy(s, offset, io_bytes);
break;
case MIRROR_METHOD_ZERO:
case MIRROR_METHOD_DISCARD:
@@ -1090,6 +1155,8 @@ static BlockDriver bdrv_mirror_top = {
.bdrv_co_pdiscard = bdrv_mirror_top_pdiscard,
.bdrv_co_flush = bdrv_mirror_top_flush,
.bdrv_co_block_status = bdrv_co_block_status_from_backing,
+ .bdrv_co_copy_range_from = bdrv_co_copy_range_from_backing,
+ .bdrv_co_copy_range_to = bdrv_co_copy_range_to_backing,
.bdrv_refresh_filename = bdrv_mirror_top_refresh_filename,
.bdrv_close = bdrv_mirror_top_close,
.bdrv_child_perm = bdrv_mirror_top_child_perm,
@@ -1177,6 +1244,8 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
s->source = bs;
s->mirror_top_bs = mirror_top_bs;
+ s->use_copy_range = true;
+
/* No resize for the target either; while the mirror is still running, a
* consistent read isn't necessarily possible. We could possibly allow
* writes and graph modifications, though it would likely defeat the
diff --git a/block/trace-events b/block/trace-events
index 2d59b53fd3..2958003e33 100644
--- a/block/trace-events
+++ b/block/trace-events
@@ -34,6 +34,7 @@ mirror_one_iteration(void *s, int64_t offset, uint64_t bytes) "s %p offset %" PR
mirror_iteration_done(void *s, int64_t offset, uint64_t bytes, int ret) "s %p offset %" PRId64 " bytes %" PRIu64 " ret %d"
mirror_yield(void *s, int64_t cnt, int buf_free_count, int in_flight) "s %p dirty count %"PRId64" free buffers %d in_flight %d"
mirror_yield_in_flight(void *s, int64_t offset, int in_flight) "s %p offset %" PRId64 " in_flight %d"
+mirror_copy_range_complete(void *s, int ret, int64_t offset, uint64_t bytes) "s %p ret %d offset %" PRId64 " bytes %" PRIu64
# block/backup.c
backup_do_cow_enter(void *job, int64_t start, int64_t offset, uint64_t bytes) "job %p start %" PRId64 " offset %" PRId64 " bytes %" PRIu64
--
2.17.0
^ permalink raw reply related [flat|nested] 13+ messages in thread